{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 19458, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.139274334463974e-05, "grad_norm": 18.983932495117188, "learning_rate": 1.7123287671232876e-08, "loss": 1.5915, "step": 1 }, { "epoch": 0.00010278548668927948, "grad_norm": 4.5546159744262695, "learning_rate": 3.424657534246575e-08, "loss": 1.0546, "step": 2 }, { "epoch": 0.0001541782300339192, "grad_norm": 16.91658592224121, "learning_rate": 5.136986301369863e-08, "loss": 1.6223, "step": 3 }, { "epoch": 0.00020557097337855896, "grad_norm": 17.328807830810547, "learning_rate": 6.84931506849315e-08, "loss": 1.5198, "step": 4 }, { "epoch": 0.00025696371672319867, "grad_norm": 18.038570404052734, "learning_rate": 8.561643835616439e-08, "loss": 1.6088, "step": 5 }, { "epoch": 0.0003083564600678384, "grad_norm": 17.907302856445312, "learning_rate": 1.0273972602739726e-07, "loss": 1.5136, "step": 6 }, { "epoch": 0.00035974920341247813, "grad_norm": 18.671998977661133, "learning_rate": 1.1986301369863014e-07, "loss": 1.5237, "step": 7 }, { "epoch": 0.0004111419467571179, "grad_norm": 15.614900588989258, "learning_rate": 1.36986301369863e-07, "loss": 1.5378, "step": 8 }, { "epoch": 0.00046253469010175765, "grad_norm": 19.567293167114258, "learning_rate": 1.541095890410959e-07, "loss": 1.5818, "step": 9 }, { "epoch": 0.0005139274334463973, "grad_norm": 15.693636894226074, "learning_rate": 1.7123287671232878e-07, "loss": 1.5181, "step": 10 }, { "epoch": 0.0005653201767910371, "grad_norm": 16.947132110595703, "learning_rate": 1.8835616438356165e-07, "loss": 1.5241, "step": 11 }, { "epoch": 0.0006167129201356768, "grad_norm": 19.202796936035156, "learning_rate": 2.0547945205479452e-07, "loss": 1.5925, "step": 12 }, { "epoch": 0.0006681056634803166, "grad_norm": 20.05133628845215, "learning_rate": 2.226027397260274e-07, "loss": 1.5909, "step": 13 }, { "epoch": 0.0007194984068249563, "grad_norm": 18.02488899230957, "learning_rate": 2.397260273972603e-07, "loss": 1.5972, "step": 14 }, { "epoch": 0.000770891150169596, "grad_norm": 19.896997451782227, "learning_rate": 2.568493150684932e-07, "loss": 1.6129, "step": 15 }, { "epoch": 0.0008222838935142358, "grad_norm": 4.352841377258301, "learning_rate": 2.73972602739726e-07, "loss": 1.0081, "step": 16 }, { "epoch": 0.0008736766368588755, "grad_norm": 14.912474632263184, "learning_rate": 2.910958904109589e-07, "loss": 1.4785, "step": 17 }, { "epoch": 0.0009250693802035153, "grad_norm": 15.636301040649414, "learning_rate": 3.082191780821918e-07, "loss": 1.566, "step": 18 }, { "epoch": 0.000976462123548155, "grad_norm": 13.412144660949707, "learning_rate": 3.2534246575342466e-07, "loss": 1.5415, "step": 19 }, { "epoch": 0.0010278548668927947, "grad_norm": 14.40302562713623, "learning_rate": 3.4246575342465755e-07, "loss": 1.5296, "step": 20 }, { "epoch": 0.0010792476102374346, "grad_norm": 3.5788261890411377, "learning_rate": 3.595890410958904e-07, "loss": 0.9648, "step": 21 }, { "epoch": 0.0011306403535820742, "grad_norm": 4.096730709075928, "learning_rate": 3.767123287671233e-07, "loss": 0.9836, "step": 22 }, { "epoch": 0.001182033096926714, "grad_norm": 15.651545524597168, "learning_rate": 3.938356164383562e-07, "loss": 1.4893, "step": 23 }, { "epoch": 0.0012334258402713536, "grad_norm": 11.431192398071289, "learning_rate": 4.1095890410958903e-07, "loss": 1.4979, "step": 24 }, { "epoch": 0.0012848185836159935, "grad_norm": 8.430842399597168, "learning_rate": 4.2808219178082193e-07, "loss": 1.4355, "step": 25 }, { "epoch": 0.0013362113269606332, "grad_norm": 8.259258270263672, "learning_rate": 4.452054794520548e-07, "loss": 1.4671, "step": 26 }, { "epoch": 0.0013876040703052729, "grad_norm": 8.58527946472168, "learning_rate": 4.6232876712328767e-07, "loss": 1.4885, "step": 27 }, { "epoch": 0.0014389968136499125, "grad_norm": 8.001357078552246, "learning_rate": 4.794520547945206e-07, "loss": 1.4421, "step": 28 }, { "epoch": 0.0014903895569945524, "grad_norm": 6.4084343910217285, "learning_rate": 4.965753424657534e-07, "loss": 1.3488, "step": 29 }, { "epoch": 0.001541782300339192, "grad_norm": 7.5306291580200195, "learning_rate": 5.136986301369864e-07, "loss": 1.4144, "step": 30 }, { "epoch": 0.0015931750436838318, "grad_norm": 3.1045236587524414, "learning_rate": 5.308219178082192e-07, "loss": 0.9378, "step": 31 }, { "epoch": 0.0016445677870284717, "grad_norm": 6.288414001464844, "learning_rate": 5.47945205479452e-07, "loss": 1.4156, "step": 32 }, { "epoch": 0.0016959605303731114, "grad_norm": 2.602916717529297, "learning_rate": 5.65068493150685e-07, "loss": 0.9508, "step": 33 }, { "epoch": 0.001747353273717751, "grad_norm": 3.513256549835205, "learning_rate": 5.821917808219178e-07, "loss": 1.3227, "step": 34 }, { "epoch": 0.0017987460170623907, "grad_norm": 2.450483560562134, "learning_rate": 5.993150684931507e-07, "loss": 0.9727, "step": 35 }, { "epoch": 0.0018501387604070306, "grad_norm": 2.507052183151245, "learning_rate": 6.164383561643836e-07, "loss": 0.9407, "step": 36 }, { "epoch": 0.0019015315037516703, "grad_norm": 3.8305482864379883, "learning_rate": 6.335616438356165e-07, "loss": 1.2981, "step": 37 }, { "epoch": 0.00195292424709631, "grad_norm": 3.7786405086517334, "learning_rate": 6.506849315068493e-07, "loss": 1.3007, "step": 38 }, { "epoch": 0.0020043169904409497, "grad_norm": 3.929419994354248, "learning_rate": 6.678082191780823e-07, "loss": 1.2698, "step": 39 }, { "epoch": 0.0020557097337855893, "grad_norm": 2.5162405967712402, "learning_rate": 6.849315068493151e-07, "loss": 0.9581, "step": 40 }, { "epoch": 0.002107102477130229, "grad_norm": 3.632524013519287, "learning_rate": 7.020547945205481e-07, "loss": 1.2698, "step": 41 }, { "epoch": 0.002158495220474869, "grad_norm": 3.509355068206787, "learning_rate": 7.191780821917808e-07, "loss": 1.2689, "step": 42 }, { "epoch": 0.002209887963819509, "grad_norm": 3.567049741744995, "learning_rate": 7.363013698630137e-07, "loss": 1.3343, "step": 43 }, { "epoch": 0.0022612807071641485, "grad_norm": 2.3726067543029785, "learning_rate": 7.534246575342466e-07, "loss": 0.9122, "step": 44 }, { "epoch": 0.002312673450508788, "grad_norm": 3.2103631496429443, "learning_rate": 7.705479452054795e-07, "loss": 1.2341, "step": 45 }, { "epoch": 0.002364066193853428, "grad_norm": 3.190967321395874, "learning_rate": 7.876712328767124e-07, "loss": 1.306, "step": 46 }, { "epoch": 0.0024154589371980675, "grad_norm": 2.694685459136963, "learning_rate": 8.047945205479453e-07, "loss": 1.2526, "step": 47 }, { "epoch": 0.002466851680542707, "grad_norm": 2.587993860244751, "learning_rate": 8.219178082191781e-07, "loss": 1.2065, "step": 48 }, { "epoch": 0.0025182444238873473, "grad_norm": 1.8106046915054321, "learning_rate": 8.39041095890411e-07, "loss": 0.8855, "step": 49 }, { "epoch": 0.002569637167231987, "grad_norm": 2.268317937850952, "learning_rate": 8.561643835616439e-07, "loss": 1.2454, "step": 50 }, { "epoch": 0.0026210299105766267, "grad_norm": 1.9886833429336548, "learning_rate": 8.732876712328768e-07, "loss": 1.2747, "step": 51 }, { "epoch": 0.0026724226539212663, "grad_norm": 1.6132230758666992, "learning_rate": 8.904109589041097e-07, "loss": 0.9159, "step": 52 }, { "epoch": 0.002723815397265906, "grad_norm": 2.5420403480529785, "learning_rate": 9.075342465753426e-07, "loss": 1.2213, "step": 53 }, { "epoch": 0.0027752081406105457, "grad_norm": 2.235058069229126, "learning_rate": 9.246575342465753e-07, "loss": 1.2533, "step": 54 }, { "epoch": 0.0028266008839551854, "grad_norm": 2.079479932785034, "learning_rate": 9.417808219178083e-07, "loss": 1.1482, "step": 55 }, { "epoch": 0.002877993627299825, "grad_norm": 2.2093863487243652, "learning_rate": 9.589041095890411e-07, "loss": 1.2019, "step": 56 }, { "epoch": 0.002929386370644465, "grad_norm": 2.105095148086548, "learning_rate": 9.76027397260274e-07, "loss": 1.1674, "step": 57 }, { "epoch": 0.002980779113989105, "grad_norm": 1.8603713512420654, "learning_rate": 9.931506849315068e-07, "loss": 0.9176, "step": 58 }, { "epoch": 0.0030321718573337445, "grad_norm": 1.8567322492599487, "learning_rate": 1.0102739726027399e-06, "loss": 0.9331, "step": 59 }, { "epoch": 0.003083564600678384, "grad_norm": 1.9976909160614014, "learning_rate": 1.0273972602739727e-06, "loss": 1.1061, "step": 60 }, { "epoch": 0.003134957344023024, "grad_norm": 1.964990496635437, "learning_rate": 1.0445205479452056e-06, "loss": 1.2088, "step": 61 }, { "epoch": 0.0031863500873676636, "grad_norm": 1.89922297000885, "learning_rate": 1.0616438356164384e-06, "loss": 1.2079, "step": 62 }, { "epoch": 0.0032377428307123032, "grad_norm": 1.9209692478179932, "learning_rate": 1.0787671232876712e-06, "loss": 1.1916, "step": 63 }, { "epoch": 0.0032891355740569434, "grad_norm": 1.6601163148880005, "learning_rate": 1.095890410958904e-06, "loss": 1.1852, "step": 64 }, { "epoch": 0.003340528317401583, "grad_norm": 1.662931203842163, "learning_rate": 1.1130136986301371e-06, "loss": 0.8447, "step": 65 }, { "epoch": 0.0033919210607462227, "grad_norm": 1.6068320274353027, "learning_rate": 1.13013698630137e-06, "loss": 1.1932, "step": 66 }, { "epoch": 0.0034433138040908624, "grad_norm": 1.7915514707565308, "learning_rate": 1.1472602739726028e-06, "loss": 1.1881, "step": 67 }, { "epoch": 0.003494706547435502, "grad_norm": 1.7644329071044922, "learning_rate": 1.1643835616438357e-06, "loss": 1.1785, "step": 68 }, { "epoch": 0.0035460992907801418, "grad_norm": 1.5297054052352905, "learning_rate": 1.1815068493150685e-06, "loss": 1.0823, "step": 69 }, { "epoch": 0.0035974920341247814, "grad_norm": 1.6290435791015625, "learning_rate": 1.1986301369863014e-06, "loss": 1.1546, "step": 70 }, { "epoch": 0.003648884777469421, "grad_norm": 1.503154993057251, "learning_rate": 1.2157534246575344e-06, "loss": 0.8808, "step": 71 }, { "epoch": 0.0037002775208140612, "grad_norm": 1.669655680656433, "learning_rate": 1.2328767123287673e-06, "loss": 1.2138, "step": 72 }, { "epoch": 0.003751670264158701, "grad_norm": 1.6407079696655273, "learning_rate": 1.25e-06, "loss": 1.1743, "step": 73 }, { "epoch": 0.0038030630075033406, "grad_norm": 1.433539867401123, "learning_rate": 1.267123287671233e-06, "loss": 0.9028, "step": 74 }, { "epoch": 0.0038544557508479803, "grad_norm": 1.5575098991394043, "learning_rate": 1.284246575342466e-06, "loss": 1.1462, "step": 75 }, { "epoch": 0.00390584849419262, "grad_norm": 1.3791413307189941, "learning_rate": 1.3013698630136986e-06, "loss": 0.9162, "step": 76 }, { "epoch": 0.00395724123753726, "grad_norm": 1.2825815677642822, "learning_rate": 1.3184931506849317e-06, "loss": 0.8641, "step": 77 }, { "epoch": 0.004008633980881899, "grad_norm": 1.521395206451416, "learning_rate": 1.3356164383561645e-06, "loss": 1.1824, "step": 78 }, { "epoch": 0.004060026724226539, "grad_norm": 1.635690689086914, "learning_rate": 1.3527397260273976e-06, "loss": 1.2042, "step": 79 }, { "epoch": 0.004111419467571179, "grad_norm": 1.5896871089935303, "learning_rate": 1.3698630136986302e-06, "loss": 1.1835, "step": 80 }, { "epoch": 0.004162812210915819, "grad_norm": 1.5003917217254639, "learning_rate": 1.386986301369863e-06, "loss": 1.2263, "step": 81 }, { "epoch": 0.004214204954260458, "grad_norm": 1.2084776163101196, "learning_rate": 1.4041095890410961e-06, "loss": 0.8795, "step": 82 }, { "epoch": 0.004265597697605098, "grad_norm": 1.4843071699142456, "learning_rate": 1.421232876712329e-06, "loss": 1.2149, "step": 83 }, { "epoch": 0.004316990440949738, "grad_norm": 1.675994873046875, "learning_rate": 1.4383561643835616e-06, "loss": 1.1447, "step": 84 }, { "epoch": 0.0043683831842943775, "grad_norm": 1.1407861709594727, "learning_rate": 1.4554794520547946e-06, "loss": 0.868, "step": 85 }, { "epoch": 0.004419775927639018, "grad_norm": 1.8884973526000977, "learning_rate": 1.4726027397260275e-06, "loss": 1.202, "step": 86 }, { "epoch": 0.004471168670983657, "grad_norm": 1.434266448020935, "learning_rate": 1.4897260273972605e-06, "loss": 1.1098, "step": 87 }, { "epoch": 0.004522561414328297, "grad_norm": 1.4622822999954224, "learning_rate": 1.5068493150684932e-06, "loss": 1.0677, "step": 88 }, { "epoch": 0.004573954157672936, "grad_norm": 1.4630745649337769, "learning_rate": 1.5239726027397262e-06, "loss": 1.0797, "step": 89 }, { "epoch": 0.004625346901017576, "grad_norm": 1.4478847980499268, "learning_rate": 1.541095890410959e-06, "loss": 1.1446, "step": 90 }, { "epoch": 0.004676739644362216, "grad_norm": 1.4457114934921265, "learning_rate": 1.5582191780821921e-06, "loss": 1.0545, "step": 91 }, { "epoch": 0.004728132387706856, "grad_norm": 1.4008527994155884, "learning_rate": 1.5753424657534248e-06, "loss": 1.11, "step": 92 }, { "epoch": 0.004779525131051496, "grad_norm": 1.3423473834991455, "learning_rate": 1.5924657534246576e-06, "loss": 1.1122, "step": 93 }, { "epoch": 0.004830917874396135, "grad_norm": 1.3709416389465332, "learning_rate": 1.6095890410958907e-06, "loss": 1.0117, "step": 94 }, { "epoch": 0.004882310617740775, "grad_norm": 1.381400465965271, "learning_rate": 1.6267123287671235e-06, "loss": 0.9166, "step": 95 }, { "epoch": 0.004933703361085414, "grad_norm": 1.9969120025634766, "learning_rate": 1.6438356164383561e-06, "loss": 1.1557, "step": 96 }, { "epoch": 0.0049850961044300545, "grad_norm": 1.4465864896774292, "learning_rate": 1.6609589041095892e-06, "loss": 1.1365, "step": 97 }, { "epoch": 0.005036488847774695, "grad_norm": 2.0803210735321045, "learning_rate": 1.678082191780822e-06, "loss": 1.1198, "step": 98 }, { "epoch": 0.005087881591119334, "grad_norm": 1.6235849857330322, "learning_rate": 1.695205479452055e-06, "loss": 1.1256, "step": 99 }, { "epoch": 0.005139274334463974, "grad_norm": 1.590221881866455, "learning_rate": 1.7123287671232877e-06, "loss": 1.0791, "step": 100 }, { "epoch": 0.005190667077808613, "grad_norm": 2.3823463916778564, "learning_rate": 1.7294520547945206e-06, "loss": 0.9377, "step": 101 }, { "epoch": 0.005242059821153253, "grad_norm": 1.3421955108642578, "learning_rate": 1.7465753424657536e-06, "loss": 1.054, "step": 102 }, { "epoch": 0.005293452564497893, "grad_norm": 1.121549129486084, "learning_rate": 1.7636986301369865e-06, "loss": 0.8505, "step": 103 }, { "epoch": 0.005344845307842533, "grad_norm": 1.8572452068328857, "learning_rate": 1.7808219178082193e-06, "loss": 1.0778, "step": 104 }, { "epoch": 0.005396238051187172, "grad_norm": 1.1325087547302246, "learning_rate": 1.7979452054794521e-06, "loss": 0.7966, "step": 105 }, { "epoch": 0.005447630794531812, "grad_norm": 1.4741758108139038, "learning_rate": 1.8150684931506852e-06, "loss": 1.0987, "step": 106 }, { "epoch": 0.005499023537876452, "grad_norm": 1.4123742580413818, "learning_rate": 1.832191780821918e-06, "loss": 1.1147, "step": 107 }, { "epoch": 0.005550416281221091, "grad_norm": 1.3640860319137573, "learning_rate": 1.8493150684931507e-06, "loss": 1.1028, "step": 108 }, { "epoch": 0.0056018090245657315, "grad_norm": 1.4527561664581299, "learning_rate": 1.8664383561643837e-06, "loss": 1.0789, "step": 109 }, { "epoch": 0.005653201767910371, "grad_norm": 1.0783603191375732, "learning_rate": 1.8835616438356166e-06, "loss": 0.9282, "step": 110 }, { "epoch": 0.005704594511255011, "grad_norm": 1.396845817565918, "learning_rate": 1.9006849315068496e-06, "loss": 1.0918, "step": 111 }, { "epoch": 0.00575598725459965, "grad_norm": 0.992274820804596, "learning_rate": 1.9178082191780823e-06, "loss": 0.776, "step": 112 }, { "epoch": 0.00580737999794429, "grad_norm": 1.4029620885849, "learning_rate": 1.9349315068493153e-06, "loss": 1.0743, "step": 113 }, { "epoch": 0.00585877274128893, "grad_norm": 1.3273193836212158, "learning_rate": 1.952054794520548e-06, "loss": 1.0451, "step": 114 }, { "epoch": 0.00591016548463357, "grad_norm": 1.0597772598266602, "learning_rate": 1.969178082191781e-06, "loss": 0.8557, "step": 115 }, { "epoch": 0.00596155822797821, "grad_norm": 1.3844999074935913, "learning_rate": 1.9863013698630136e-06, "loss": 1.05, "step": 116 }, { "epoch": 0.006012950971322849, "grad_norm": 1.449572205543518, "learning_rate": 2.0034246575342467e-06, "loss": 1.0657, "step": 117 }, { "epoch": 0.006064343714667489, "grad_norm": 1.3290841579437256, "learning_rate": 2.0205479452054797e-06, "loss": 0.9605, "step": 118 }, { "epoch": 0.006115736458012128, "grad_norm": 1.4795900583267212, "learning_rate": 2.037671232876713e-06, "loss": 1.1017, "step": 119 }, { "epoch": 0.006167129201356768, "grad_norm": 1.4142014980316162, "learning_rate": 2.0547945205479454e-06, "loss": 1.1145, "step": 120 }, { "epoch": 0.0062185219447014085, "grad_norm": 1.7041820287704468, "learning_rate": 2.071917808219178e-06, "loss": 1.0972, "step": 121 }, { "epoch": 0.006269914688046048, "grad_norm": 1.4172296524047852, "learning_rate": 2.089041095890411e-06, "loss": 1.0592, "step": 122 }, { "epoch": 0.006321307431390688, "grad_norm": 1.5352330207824707, "learning_rate": 2.106164383561644e-06, "loss": 1.088, "step": 123 }, { "epoch": 0.006372700174735327, "grad_norm": 1.5836726427078247, "learning_rate": 2.123287671232877e-06, "loss": 1.0782, "step": 124 }, { "epoch": 0.006424092918079967, "grad_norm": 1.33640718460083, "learning_rate": 2.14041095890411e-06, "loss": 1.0236, "step": 125 }, { "epoch": 0.0064754856614246065, "grad_norm": 1.4273734092712402, "learning_rate": 2.1575342465753425e-06, "loss": 1.054, "step": 126 }, { "epoch": 0.006526878404769247, "grad_norm": 1.447994351387024, "learning_rate": 2.1746575342465755e-06, "loss": 1.0304, "step": 127 }, { "epoch": 0.006578271148113887, "grad_norm": 1.3829752206802368, "learning_rate": 2.191780821917808e-06, "loss": 1.0589, "step": 128 }, { "epoch": 0.006629663891458526, "grad_norm": 0.9735933542251587, "learning_rate": 2.2089041095890412e-06, "loss": 0.8343, "step": 129 }, { "epoch": 0.006681056634803166, "grad_norm": 1.5326259136199951, "learning_rate": 2.2260273972602743e-06, "loss": 1.013, "step": 130 }, { "epoch": 0.006732449378147805, "grad_norm": 1.454871654510498, "learning_rate": 2.243150684931507e-06, "loss": 1.0101, "step": 131 }, { "epoch": 0.0067838421214924454, "grad_norm": 1.1300477981567383, "learning_rate": 2.26027397260274e-06, "loss": 0.8717, "step": 132 }, { "epoch": 0.006835234864837085, "grad_norm": 1.4412310123443604, "learning_rate": 2.2773972602739726e-06, "loss": 1.0502, "step": 133 }, { "epoch": 0.006886627608181725, "grad_norm": 1.4205049276351929, "learning_rate": 2.2945205479452057e-06, "loss": 1.0533, "step": 134 }, { "epoch": 0.006938020351526364, "grad_norm": 1.7551305294036865, "learning_rate": 2.3116438356164387e-06, "loss": 1.0119, "step": 135 }, { "epoch": 0.006989413094871004, "grad_norm": 1.3921761512756348, "learning_rate": 2.3287671232876713e-06, "loss": 1.0708, "step": 136 }, { "epoch": 0.007040805838215644, "grad_norm": 1.415696382522583, "learning_rate": 2.3458904109589044e-06, "loss": 1.0456, "step": 137 }, { "epoch": 0.0070921985815602835, "grad_norm": 1.12046217918396, "learning_rate": 2.363013698630137e-06, "loss": 0.8092, "step": 138 }, { "epoch": 0.007143591324904924, "grad_norm": 1.3467131853103638, "learning_rate": 2.38013698630137e-06, "loss": 1.0546, "step": 139 }, { "epoch": 0.007194984068249563, "grad_norm": 0.8997917771339417, "learning_rate": 2.3972602739726027e-06, "loss": 0.822, "step": 140 }, { "epoch": 0.007246376811594203, "grad_norm": 1.397667646408081, "learning_rate": 2.4143835616438358e-06, "loss": 1.0567, "step": 141 }, { "epoch": 0.007297769554938842, "grad_norm": 1.604783296585083, "learning_rate": 2.431506849315069e-06, "loss": 1.0272, "step": 142 }, { "epoch": 0.007349162298283482, "grad_norm": 1.4480196237564087, "learning_rate": 2.4486301369863015e-06, "loss": 0.8425, "step": 143 }, { "epoch": 0.0074005550416281225, "grad_norm": 1.4750083684921265, "learning_rate": 2.4657534246575345e-06, "loss": 0.9614, "step": 144 }, { "epoch": 0.007451947784972762, "grad_norm": 1.3589426279067993, "learning_rate": 2.482876712328767e-06, "loss": 0.9821, "step": 145 }, { "epoch": 0.007503340528317402, "grad_norm": 1.4352126121520996, "learning_rate": 2.5e-06, "loss": 1.0243, "step": 146 }, { "epoch": 0.007554733271662041, "grad_norm": 1.4147143363952637, "learning_rate": 2.5171232876712333e-06, "loss": 0.9968, "step": 147 }, { "epoch": 0.007606126015006681, "grad_norm": 1.359907627105713, "learning_rate": 2.534246575342466e-06, "loss": 1.0272, "step": 148 }, { "epoch": 0.00765751875835132, "grad_norm": 1.3496214151382446, "learning_rate": 2.551369863013699e-06, "loss": 0.9633, "step": 149 }, { "epoch": 0.0077089115016959605, "grad_norm": 1.859374761581421, "learning_rate": 2.568493150684932e-06, "loss": 1.0322, "step": 150 }, { "epoch": 0.007760304245040601, "grad_norm": 1.4341025352478027, "learning_rate": 2.585616438356164e-06, "loss": 1.0475, "step": 151 }, { "epoch": 0.00781169698838524, "grad_norm": 1.4478822946548462, "learning_rate": 2.6027397260273973e-06, "loss": 1.0347, "step": 152 }, { "epoch": 0.00786308973172988, "grad_norm": 1.4878865480422974, "learning_rate": 2.6198630136986303e-06, "loss": 1.0536, "step": 153 }, { "epoch": 0.00791448247507452, "grad_norm": 1.0657761096954346, "learning_rate": 2.6369863013698634e-06, "loss": 0.8161, "step": 154 }, { "epoch": 0.007965875218419158, "grad_norm": 1.97149658203125, "learning_rate": 2.654109589041096e-06, "loss": 1.0389, "step": 155 }, { "epoch": 0.008017267961763799, "grad_norm": 1.1665364503860474, "learning_rate": 2.671232876712329e-06, "loss": 0.8622, "step": 156 }, { "epoch": 0.008068660705108439, "grad_norm": 1.52482008934021, "learning_rate": 2.688356164383562e-06, "loss": 0.9671, "step": 157 }, { "epoch": 0.008120053448453079, "grad_norm": 1.8569509983062744, "learning_rate": 2.705479452054795e-06, "loss": 1.009, "step": 158 }, { "epoch": 0.008171446191797719, "grad_norm": 1.0221922397613525, "learning_rate": 2.7226027397260274e-06, "loss": 0.8084, "step": 159 }, { "epoch": 0.008222838935142357, "grad_norm": 1.399552822113037, "learning_rate": 2.7397260273972604e-06, "loss": 1.0087, "step": 160 }, { "epoch": 0.008274231678486997, "grad_norm": 1.404884934425354, "learning_rate": 2.7568493150684935e-06, "loss": 0.9931, "step": 161 }, { "epoch": 0.008325624421831638, "grad_norm": 1.3287042379379272, "learning_rate": 2.773972602739726e-06, "loss": 0.9632, "step": 162 }, { "epoch": 0.008377017165176278, "grad_norm": 1.4677836894989014, "learning_rate": 2.791095890410959e-06, "loss": 0.9791, "step": 163 }, { "epoch": 0.008428409908520916, "grad_norm": 1.1199952363967896, "learning_rate": 2.8082191780821922e-06, "loss": 0.8202, "step": 164 }, { "epoch": 0.008479802651865556, "grad_norm": 1.3308522701263428, "learning_rate": 2.8253424657534253e-06, "loss": 0.9557, "step": 165 }, { "epoch": 0.008531195395210196, "grad_norm": 1.4022819995880127, "learning_rate": 2.842465753424658e-06, "loss": 0.9773, "step": 166 }, { "epoch": 0.008582588138554836, "grad_norm": 1.3995765447616577, "learning_rate": 2.8595890410958905e-06, "loss": 1.0606, "step": 167 }, { "epoch": 0.008633980881899476, "grad_norm": 1.7663745880126953, "learning_rate": 2.876712328767123e-06, "loss": 1.0134, "step": 168 }, { "epoch": 0.008685373625244115, "grad_norm": 1.3537906408309937, "learning_rate": 2.8938356164383562e-06, "loss": 0.9966, "step": 169 }, { "epoch": 0.008736766368588755, "grad_norm": 1.5769232511520386, "learning_rate": 2.9109589041095893e-06, "loss": 0.9464, "step": 170 }, { "epoch": 0.008788159111933395, "grad_norm": 1.6474496126174927, "learning_rate": 2.9280821917808223e-06, "loss": 0.9946, "step": 171 }, { "epoch": 0.008839551855278035, "grad_norm": 1.3077877759933472, "learning_rate": 2.945205479452055e-06, "loss": 0.9827, "step": 172 }, { "epoch": 0.008890944598622675, "grad_norm": 1.4552854299545288, "learning_rate": 2.962328767123288e-06, "loss": 1.0465, "step": 173 }, { "epoch": 0.008942337341967314, "grad_norm": 1.3246502876281738, "learning_rate": 2.979452054794521e-06, "loss": 1.0203, "step": 174 }, { "epoch": 0.008993730085311954, "grad_norm": 1.292330026626587, "learning_rate": 2.9965753424657533e-06, "loss": 1.0213, "step": 175 }, { "epoch": 0.009045122828656594, "grad_norm": 1.3146753311157227, "learning_rate": 3.0136986301369864e-06, "loss": 0.9799, "step": 176 }, { "epoch": 0.009096515572001234, "grad_norm": 0.9643856883049011, "learning_rate": 3.0308219178082194e-06, "loss": 0.814, "step": 177 }, { "epoch": 0.009147908315345872, "grad_norm": 1.344741940498352, "learning_rate": 3.0479452054794525e-06, "loss": 1.0283, "step": 178 }, { "epoch": 0.009199301058690513, "grad_norm": 1.3446404933929443, "learning_rate": 3.065068493150685e-06, "loss": 1.0285, "step": 179 }, { "epoch": 0.009250693802035153, "grad_norm": 1.33372962474823, "learning_rate": 3.082191780821918e-06, "loss": 1.0179, "step": 180 }, { "epoch": 0.009302086545379793, "grad_norm": 1.2783104181289673, "learning_rate": 3.099315068493151e-06, "loss": 1.0117, "step": 181 }, { "epoch": 0.009353479288724433, "grad_norm": 1.0819905996322632, "learning_rate": 3.1164383561643843e-06, "loss": 0.7637, "step": 182 }, { "epoch": 0.009404872032069071, "grad_norm": 1.5114994049072266, "learning_rate": 3.1335616438356165e-06, "loss": 0.9671, "step": 183 }, { "epoch": 0.009456264775413711, "grad_norm": 0.8833361268043518, "learning_rate": 3.1506849315068495e-06, "loss": 0.7806, "step": 184 }, { "epoch": 0.009507657518758351, "grad_norm": 1.5377718210220337, "learning_rate": 3.167808219178082e-06, "loss": 1.0183, "step": 185 }, { "epoch": 0.009559050262102992, "grad_norm": 1.4501428604125977, "learning_rate": 3.184931506849315e-06, "loss": 1.049, "step": 186 }, { "epoch": 0.00961044300544763, "grad_norm": 1.1223877668380737, "learning_rate": 3.2020547945205483e-06, "loss": 0.8187, "step": 187 }, { "epoch": 0.00966183574879227, "grad_norm": 1.33689546585083, "learning_rate": 3.2191780821917813e-06, "loss": 0.8652, "step": 188 }, { "epoch": 0.00971322849213691, "grad_norm": 1.5084279775619507, "learning_rate": 3.236301369863014e-06, "loss": 0.997, "step": 189 }, { "epoch": 0.00976462123548155, "grad_norm": 1.3489594459533691, "learning_rate": 3.253424657534247e-06, "loss": 1.0134, "step": 190 }, { "epoch": 0.00981601397882619, "grad_norm": 0.8779999017715454, "learning_rate": 3.2705479452054796e-06, "loss": 0.8041, "step": 191 }, { "epoch": 0.009867406722170829, "grad_norm": 1.529397964477539, "learning_rate": 3.2876712328767123e-06, "loss": 1.0341, "step": 192 }, { "epoch": 0.009918799465515469, "grad_norm": 1.4480550289154053, "learning_rate": 3.3047945205479453e-06, "loss": 0.9562, "step": 193 }, { "epoch": 0.009970192208860109, "grad_norm": 1.3672980070114136, "learning_rate": 3.3219178082191784e-06, "loss": 0.9988, "step": 194 }, { "epoch": 0.010021584952204749, "grad_norm": 0.8857347965240479, "learning_rate": 3.3390410958904114e-06, "loss": 0.7854, "step": 195 }, { "epoch": 0.01007297769554939, "grad_norm": 0.9103947877883911, "learning_rate": 3.356164383561644e-06, "loss": 0.7833, "step": 196 }, { "epoch": 0.010124370438894028, "grad_norm": 1.821307897567749, "learning_rate": 3.373287671232877e-06, "loss": 0.9508, "step": 197 }, { "epoch": 0.010175763182238668, "grad_norm": 1.4973331689834595, "learning_rate": 3.39041095890411e-06, "loss": 1.0175, "step": 198 }, { "epoch": 0.010227155925583308, "grad_norm": 1.3801848888397217, "learning_rate": 3.4075342465753424e-06, "loss": 1.046, "step": 199 }, { "epoch": 0.010278548668927948, "grad_norm": 1.4681309461593628, "learning_rate": 3.4246575342465754e-06, "loss": 0.9831, "step": 200 }, { "epoch": 0.010329941412272586, "grad_norm": 1.5392931699752808, "learning_rate": 3.4417808219178085e-06, "loss": 0.9838, "step": 201 }, { "epoch": 0.010381334155617226, "grad_norm": 1.4939289093017578, "learning_rate": 3.458904109589041e-06, "loss": 1.0093, "step": 202 }, { "epoch": 0.010432726898961867, "grad_norm": 1.4377801418304443, "learning_rate": 3.476027397260274e-06, "loss": 0.9876, "step": 203 }, { "epoch": 0.010484119642306507, "grad_norm": 1.546335220336914, "learning_rate": 3.4931506849315072e-06, "loss": 0.9303, "step": 204 }, { "epoch": 0.010535512385651147, "grad_norm": 1.359356164932251, "learning_rate": 3.5102739726027403e-06, "loss": 0.929, "step": 205 }, { "epoch": 0.010586905128995785, "grad_norm": 0.9822274446487427, "learning_rate": 3.527397260273973e-06, "loss": 0.8617, "step": 206 }, { "epoch": 0.010638297872340425, "grad_norm": 0.9313762784004211, "learning_rate": 3.5445205479452056e-06, "loss": 0.8215, "step": 207 }, { "epoch": 0.010689690615685065, "grad_norm": 1.4884226322174072, "learning_rate": 3.5616438356164386e-06, "loss": 0.9872, "step": 208 }, { "epoch": 0.010741083359029705, "grad_norm": 1.4232925176620483, "learning_rate": 3.5787671232876712e-06, "loss": 0.9577, "step": 209 }, { "epoch": 0.010792476102374344, "grad_norm": 1.4278874397277832, "learning_rate": 3.5958904109589043e-06, "loss": 0.9844, "step": 210 }, { "epoch": 0.010843868845718984, "grad_norm": 0.8510668873786926, "learning_rate": 3.6130136986301373e-06, "loss": 0.8278, "step": 211 }, { "epoch": 0.010895261589063624, "grad_norm": 0.9740152359008789, "learning_rate": 3.6301369863013704e-06, "loss": 0.798, "step": 212 }, { "epoch": 0.010946654332408264, "grad_norm": 1.3978337049484253, "learning_rate": 3.647260273972603e-06, "loss": 1.0321, "step": 213 }, { "epoch": 0.010998047075752904, "grad_norm": 0.9594858884811401, "learning_rate": 3.664383561643836e-06, "loss": 0.8215, "step": 214 }, { "epoch": 0.011049439819097543, "grad_norm": 1.3392057418823242, "learning_rate": 3.6815068493150687e-06, "loss": 0.9828, "step": 215 }, { "epoch": 0.011100832562442183, "grad_norm": 0.940901517868042, "learning_rate": 3.6986301369863014e-06, "loss": 0.7873, "step": 216 }, { "epoch": 0.011152225305786823, "grad_norm": 1.3940353393554688, "learning_rate": 3.7157534246575344e-06, "loss": 0.9713, "step": 217 }, { "epoch": 0.011203618049131463, "grad_norm": 1.3776757717132568, "learning_rate": 3.7328767123287675e-06, "loss": 0.9929, "step": 218 }, { "epoch": 0.011255010792476103, "grad_norm": 1.338098406791687, "learning_rate": 3.7500000000000005e-06, "loss": 1.0037, "step": 219 }, { "epoch": 0.011306403535820742, "grad_norm": 1.431422233581543, "learning_rate": 3.767123287671233e-06, "loss": 0.9562, "step": 220 }, { "epoch": 0.011357796279165382, "grad_norm": 1.3269257545471191, "learning_rate": 3.784246575342466e-06, "loss": 1.0014, "step": 221 }, { "epoch": 0.011409189022510022, "grad_norm": 1.2928478717803955, "learning_rate": 3.8013698630136993e-06, "loss": 0.9367, "step": 222 }, { "epoch": 0.011460581765854662, "grad_norm": 0.9002137780189514, "learning_rate": 3.818493150684932e-06, "loss": 0.8075, "step": 223 }, { "epoch": 0.0115119745091993, "grad_norm": 1.3738528490066528, "learning_rate": 3.8356164383561645e-06, "loss": 0.9403, "step": 224 }, { "epoch": 0.01156336725254394, "grad_norm": 1.4318796396255493, "learning_rate": 3.852739726027397e-06, "loss": 0.9459, "step": 225 }, { "epoch": 0.01161475999588858, "grad_norm": 1.1008661985397339, "learning_rate": 3.869863013698631e-06, "loss": 0.8388, "step": 226 }, { "epoch": 0.01166615273923322, "grad_norm": 1.5036805868148804, "learning_rate": 3.886986301369863e-06, "loss": 0.9753, "step": 227 }, { "epoch": 0.01171754548257786, "grad_norm": 1.336403250694275, "learning_rate": 3.904109589041096e-06, "loss": 1.0148, "step": 228 }, { "epoch": 0.011768938225922499, "grad_norm": 1.4252920150756836, "learning_rate": 3.921232876712329e-06, "loss": 0.9182, "step": 229 }, { "epoch": 0.01182033096926714, "grad_norm": 1.3516569137573242, "learning_rate": 3.938356164383562e-06, "loss": 0.989, "step": 230 }, { "epoch": 0.01187172371261178, "grad_norm": 1.0649734735488892, "learning_rate": 3.9554794520547955e-06, "loss": 0.7999, "step": 231 }, { "epoch": 0.01192311645595642, "grad_norm": 1.3591573238372803, "learning_rate": 3.972602739726027e-06, "loss": 0.9598, "step": 232 }, { "epoch": 0.01197450919930106, "grad_norm": 1.8600594997406006, "learning_rate": 3.989726027397261e-06, "loss": 0.933, "step": 233 }, { "epoch": 0.012025901942645698, "grad_norm": 1.3544315099716187, "learning_rate": 4.006849315068493e-06, "loss": 0.9473, "step": 234 }, { "epoch": 0.012077294685990338, "grad_norm": 1.410604476928711, "learning_rate": 4.023972602739726e-06, "loss": 0.9927, "step": 235 }, { "epoch": 0.012128687429334978, "grad_norm": 1.3216272592544556, "learning_rate": 4.0410958904109595e-06, "loss": 0.9414, "step": 236 }, { "epoch": 0.012180080172679618, "grad_norm": 1.4487719535827637, "learning_rate": 4.058219178082192e-06, "loss": 0.9896, "step": 237 }, { "epoch": 0.012231472916024257, "grad_norm": 1.48616361618042, "learning_rate": 4.075342465753426e-06, "loss": 1.0043, "step": 238 }, { "epoch": 0.012282865659368897, "grad_norm": 1.428780436515808, "learning_rate": 4.092465753424658e-06, "loss": 0.9794, "step": 239 }, { "epoch": 0.012334258402713537, "grad_norm": 1.4327056407928467, "learning_rate": 4.109589041095891e-06, "loss": 0.9936, "step": 240 }, { "epoch": 0.012385651146058177, "grad_norm": 1.3653701543807983, "learning_rate": 4.1267123287671235e-06, "loss": 0.9727, "step": 241 }, { "epoch": 0.012437043889402817, "grad_norm": 1.2695425748825073, "learning_rate": 4.143835616438356e-06, "loss": 1.0017, "step": 242 }, { "epoch": 0.012488436632747455, "grad_norm": 1.2896161079406738, "learning_rate": 4.16095890410959e-06, "loss": 0.9541, "step": 243 }, { "epoch": 0.012539829376092096, "grad_norm": 1.2927240133285522, "learning_rate": 4.178082191780822e-06, "loss": 0.9417, "step": 244 }, { "epoch": 0.012591222119436736, "grad_norm": 0.9451867938041687, "learning_rate": 4.195205479452055e-06, "loss": 0.8099, "step": 245 }, { "epoch": 0.012642614862781376, "grad_norm": 1.327837347984314, "learning_rate": 4.212328767123288e-06, "loss": 1.0247, "step": 246 }, { "epoch": 0.012694007606126014, "grad_norm": 1.7542698383331299, "learning_rate": 4.229452054794521e-06, "loss": 0.9664, "step": 247 }, { "epoch": 0.012745400349470654, "grad_norm": 1.5239324569702148, "learning_rate": 4.246575342465754e-06, "loss": 0.9556, "step": 248 }, { "epoch": 0.012796793092815294, "grad_norm": 1.2914499044418335, "learning_rate": 4.263698630136986e-06, "loss": 0.8311, "step": 249 }, { "epoch": 0.012848185836159935, "grad_norm": 0.9499532580375671, "learning_rate": 4.28082191780822e-06, "loss": 0.7882, "step": 250 }, { "epoch": 0.012899578579504575, "grad_norm": 1.4977997541427612, "learning_rate": 4.297945205479452e-06, "loss": 0.9427, "step": 251 }, { "epoch": 0.012950971322849213, "grad_norm": 1.5635406970977783, "learning_rate": 4.315068493150685e-06, "loss": 0.9925, "step": 252 }, { "epoch": 0.013002364066193853, "grad_norm": 1.3498114347457886, "learning_rate": 4.3321917808219185e-06, "loss": 0.9673, "step": 253 }, { "epoch": 0.013053756809538493, "grad_norm": 1.3927500247955322, "learning_rate": 4.349315068493151e-06, "loss": 0.9581, "step": 254 }, { "epoch": 0.013105149552883133, "grad_norm": 1.3908201456069946, "learning_rate": 4.3664383561643846e-06, "loss": 0.9689, "step": 255 }, { "epoch": 0.013156542296227773, "grad_norm": 1.3558933734893799, "learning_rate": 4.383561643835616e-06, "loss": 0.9759, "step": 256 }, { "epoch": 0.013207935039572412, "grad_norm": 1.5187759399414062, "learning_rate": 4.40068493150685e-06, "loss": 0.9272, "step": 257 }, { "epoch": 0.013259327782917052, "grad_norm": 1.4035674333572388, "learning_rate": 4.4178082191780825e-06, "loss": 0.9814, "step": 258 }, { "epoch": 0.013310720526261692, "grad_norm": 1.3661476373672485, "learning_rate": 4.434931506849315e-06, "loss": 0.9428, "step": 259 }, { "epoch": 0.013362113269606332, "grad_norm": 1.3462390899658203, "learning_rate": 4.4520547945205486e-06, "loss": 0.9524, "step": 260 }, { "epoch": 0.01341350601295097, "grad_norm": 1.2937759160995483, "learning_rate": 4.469178082191781e-06, "loss": 0.9337, "step": 261 }, { "epoch": 0.01346489875629561, "grad_norm": 1.3532419204711914, "learning_rate": 4.486301369863014e-06, "loss": 1.0307, "step": 262 }, { "epoch": 0.01351629149964025, "grad_norm": 0.8749688267707825, "learning_rate": 4.503424657534247e-06, "loss": 0.7494, "step": 263 }, { "epoch": 0.013567684242984891, "grad_norm": 0.9858847260475159, "learning_rate": 4.52054794520548e-06, "loss": 0.8273, "step": 264 }, { "epoch": 0.013619076986329531, "grad_norm": 1.7489287853240967, "learning_rate": 4.537671232876713e-06, "loss": 0.9371, "step": 265 }, { "epoch": 0.01367046972967417, "grad_norm": 1.5460171699523926, "learning_rate": 4.554794520547945e-06, "loss": 0.8878, "step": 266 }, { "epoch": 0.01372186247301881, "grad_norm": 1.434395432472229, "learning_rate": 4.571917808219179e-06, "loss": 0.9427, "step": 267 }, { "epoch": 0.01377325521636345, "grad_norm": 1.308001160621643, "learning_rate": 4.589041095890411e-06, "loss": 0.9972, "step": 268 }, { "epoch": 0.01382464795970809, "grad_norm": 1.2756896018981934, "learning_rate": 4.606164383561644e-06, "loss": 0.9386, "step": 269 }, { "epoch": 0.013876040703052728, "grad_norm": 1.363455057144165, "learning_rate": 4.6232876712328774e-06, "loss": 0.981, "step": 270 }, { "epoch": 0.013927433446397368, "grad_norm": 0.8435012102127075, "learning_rate": 4.64041095890411e-06, "loss": 0.7865, "step": 271 }, { "epoch": 0.013978826189742008, "grad_norm": 1.3855482339859009, "learning_rate": 4.657534246575343e-06, "loss": 0.7666, "step": 272 }, { "epoch": 0.014030218933086648, "grad_norm": 1.3576053380966187, "learning_rate": 4.674657534246575e-06, "loss": 0.9482, "step": 273 }, { "epoch": 0.014081611676431289, "grad_norm": 1.412172555923462, "learning_rate": 4.691780821917809e-06, "loss": 0.9507, "step": 274 }, { "epoch": 0.014133004419775927, "grad_norm": 1.417295217514038, "learning_rate": 4.7089041095890414e-06, "loss": 0.9619, "step": 275 }, { "epoch": 0.014184397163120567, "grad_norm": 1.2438583374023438, "learning_rate": 4.726027397260274e-06, "loss": 0.9134, "step": 276 }, { "epoch": 0.014235789906465207, "grad_norm": 1.4614331722259521, "learning_rate": 4.7431506849315075e-06, "loss": 0.9364, "step": 277 }, { "epoch": 0.014287182649809847, "grad_norm": 1.36215078830719, "learning_rate": 4.76027397260274e-06, "loss": 0.9903, "step": 278 }, { "epoch": 0.014338575393154487, "grad_norm": 1.529297113418579, "learning_rate": 4.777397260273973e-06, "loss": 0.9316, "step": 279 }, { "epoch": 0.014389968136499126, "grad_norm": 0.9258805513381958, "learning_rate": 4.7945205479452054e-06, "loss": 0.8018, "step": 280 }, { "epoch": 0.014441360879843766, "grad_norm": 1.5578103065490723, "learning_rate": 4.811643835616439e-06, "loss": 0.9279, "step": 281 }, { "epoch": 0.014492753623188406, "grad_norm": 1.422542691230774, "learning_rate": 4.8287671232876716e-06, "loss": 0.9283, "step": 282 }, { "epoch": 0.014544146366533046, "grad_norm": 1.4068387746810913, "learning_rate": 4.845890410958904e-06, "loss": 0.9731, "step": 283 }, { "epoch": 0.014595539109877684, "grad_norm": 0.9610908031463623, "learning_rate": 4.863013698630138e-06, "loss": 0.7616, "step": 284 }, { "epoch": 0.014646931853222325, "grad_norm": 1.8511143922805786, "learning_rate": 4.88013698630137e-06, "loss": 0.91, "step": 285 }, { "epoch": 0.014698324596566965, "grad_norm": 1.3814449310302734, "learning_rate": 4.897260273972603e-06, "loss": 0.9226, "step": 286 }, { "epoch": 0.014749717339911605, "grad_norm": 1.5602917671203613, "learning_rate": 4.914383561643836e-06, "loss": 0.9199, "step": 287 }, { "epoch": 0.014801110083256245, "grad_norm": 1.2778757810592651, "learning_rate": 4.931506849315069e-06, "loss": 0.9865, "step": 288 }, { "epoch": 0.014852502826600883, "grad_norm": 1.3599287271499634, "learning_rate": 4.948630136986302e-06, "loss": 0.9854, "step": 289 }, { "epoch": 0.014903895569945523, "grad_norm": 1.4347745180130005, "learning_rate": 4.965753424657534e-06, "loss": 0.9572, "step": 290 }, { "epoch": 0.014955288313290164, "grad_norm": 1.7798638343811035, "learning_rate": 4.982876712328768e-06, "loss": 0.9056, "step": 291 }, { "epoch": 0.015006681056634804, "grad_norm": 1.6638036966323853, "learning_rate": 5e-06, "loss": 0.9379, "step": 292 }, { "epoch": 0.015058073799979444, "grad_norm": 1.4086577892303467, "learning_rate": 5.017123287671233e-06, "loss": 0.9601, "step": 293 }, { "epoch": 0.015109466543324082, "grad_norm": 1.3726524114608765, "learning_rate": 5.0342465753424665e-06, "loss": 0.8875, "step": 294 }, { "epoch": 0.015160859286668722, "grad_norm": 1.4501359462738037, "learning_rate": 5.051369863013698e-06, "loss": 0.9678, "step": 295 }, { "epoch": 0.015212252030013362, "grad_norm": 1.3798588514328003, "learning_rate": 5.068493150684932e-06, "loss": 0.982, "step": 296 }, { "epoch": 0.015263644773358002, "grad_norm": 1.5755680799484253, "learning_rate": 5.085616438356164e-06, "loss": 0.9253, "step": 297 }, { "epoch": 0.01531503751670264, "grad_norm": 0.8342780470848083, "learning_rate": 5.102739726027398e-06, "loss": 0.7714, "step": 298 }, { "epoch": 0.015366430260047281, "grad_norm": 2.09627628326416, "learning_rate": 5.1198630136986305e-06, "loss": 0.9229, "step": 299 }, { "epoch": 0.015417823003391921, "grad_norm": 1.3578782081604004, "learning_rate": 5.136986301369864e-06, "loss": 0.9518, "step": 300 }, { "epoch": 0.015469215746736561, "grad_norm": 1.680617094039917, "learning_rate": 5.154109589041097e-06, "loss": 0.9778, "step": 301 }, { "epoch": 0.015520608490081201, "grad_norm": 1.3613545894622803, "learning_rate": 5.171232876712328e-06, "loss": 0.9288, "step": 302 }, { "epoch": 0.01557200123342584, "grad_norm": 1.403126835823059, "learning_rate": 5.188356164383562e-06, "loss": 0.8962, "step": 303 }, { "epoch": 0.01562339397677048, "grad_norm": 1.5144846439361572, "learning_rate": 5.2054794520547945e-06, "loss": 0.9268, "step": 304 }, { "epoch": 0.01567478672011512, "grad_norm": 1.4168750047683716, "learning_rate": 5.222602739726028e-06, "loss": 0.9976, "step": 305 }, { "epoch": 0.01572617946345976, "grad_norm": 1.2738122940063477, "learning_rate": 5.239726027397261e-06, "loss": 0.9101, "step": 306 }, { "epoch": 0.0157775722068044, "grad_norm": 1.5010753870010376, "learning_rate": 5.256849315068494e-06, "loss": 0.9762, "step": 307 }, { "epoch": 0.01582896495014904, "grad_norm": 1.3929429054260254, "learning_rate": 5.273972602739727e-06, "loss": 0.9544, "step": 308 }, { "epoch": 0.01588035769349368, "grad_norm": 1.0012789964675903, "learning_rate": 5.2910958904109585e-06, "loss": 0.7541, "step": 309 }, { "epoch": 0.015931750436838317, "grad_norm": 1.007140874862671, "learning_rate": 5.308219178082192e-06, "loss": 0.7853, "step": 310 }, { "epoch": 0.015983143180182957, "grad_norm": 0.9868108034133911, "learning_rate": 5.325342465753425e-06, "loss": 0.7855, "step": 311 }, { "epoch": 0.016034535923527597, "grad_norm": 1.3924250602722168, "learning_rate": 5.342465753424658e-06, "loss": 0.9485, "step": 312 }, { "epoch": 0.016085928666872237, "grad_norm": 1.3775475025177002, "learning_rate": 5.359589041095891e-06, "loss": 0.957, "step": 313 }, { "epoch": 0.016137321410216877, "grad_norm": 1.4916207790374756, "learning_rate": 5.376712328767124e-06, "loss": 0.9483, "step": 314 }, { "epoch": 0.016188714153561518, "grad_norm": 0.9480423331260681, "learning_rate": 5.393835616438357e-06, "loss": 0.7487, "step": 315 }, { "epoch": 0.016240106896906158, "grad_norm": 1.4612795114517212, "learning_rate": 5.41095890410959e-06, "loss": 0.9515, "step": 316 }, { "epoch": 0.016291499640250798, "grad_norm": 1.4398325681686401, "learning_rate": 5.428082191780822e-06, "loss": 0.9681, "step": 317 }, { "epoch": 0.016342892383595438, "grad_norm": 1.3018696308135986, "learning_rate": 5.445205479452055e-06, "loss": 0.9708, "step": 318 }, { "epoch": 0.016394285126940075, "grad_norm": 1.6679677963256836, "learning_rate": 5.462328767123288e-06, "loss": 0.9553, "step": 319 }, { "epoch": 0.016445677870284715, "grad_norm": 1.6321253776550293, "learning_rate": 5.479452054794521e-06, "loss": 0.9212, "step": 320 }, { "epoch": 0.016497070613629355, "grad_norm": 1.4244147539138794, "learning_rate": 5.496575342465754e-06, "loss": 0.9129, "step": 321 }, { "epoch": 0.016548463356973995, "grad_norm": 1.3892810344696045, "learning_rate": 5.513698630136987e-06, "loss": 0.9415, "step": 322 }, { "epoch": 0.016599856100318635, "grad_norm": 1.5489944219589233, "learning_rate": 5.5308219178082205e-06, "loss": 0.9071, "step": 323 }, { "epoch": 0.016651248843663275, "grad_norm": 1.471104383468628, "learning_rate": 5.547945205479452e-06, "loss": 0.9639, "step": 324 }, { "epoch": 0.016702641587007915, "grad_norm": 1.2654674053192139, "learning_rate": 5.565068493150685e-06, "loss": 0.9327, "step": 325 }, { "epoch": 0.016754034330352555, "grad_norm": 1.489034652709961, "learning_rate": 5.582191780821918e-06, "loss": 0.9138, "step": 326 }, { "epoch": 0.016805427073697195, "grad_norm": 1.2891885042190552, "learning_rate": 5.599315068493151e-06, "loss": 0.9494, "step": 327 }, { "epoch": 0.016856819817041832, "grad_norm": 1.5262436866760254, "learning_rate": 5.6164383561643845e-06, "loss": 0.9181, "step": 328 }, { "epoch": 0.016908212560386472, "grad_norm": 1.3686035871505737, "learning_rate": 5.633561643835616e-06, "loss": 0.9706, "step": 329 }, { "epoch": 0.016959605303731112, "grad_norm": 1.0391219854354858, "learning_rate": 5.6506849315068506e-06, "loss": 0.7452, "step": 330 }, { "epoch": 0.017010998047075752, "grad_norm": 1.3939989805221558, "learning_rate": 5.667808219178082e-06, "loss": 0.9516, "step": 331 }, { "epoch": 0.017062390790420393, "grad_norm": 1.5979833602905273, "learning_rate": 5.684931506849316e-06, "loss": 0.9319, "step": 332 }, { "epoch": 0.017113783533765033, "grad_norm": 1.290928602218628, "learning_rate": 5.7020547945205485e-06, "loss": 0.9187, "step": 333 }, { "epoch": 0.017165176277109673, "grad_norm": 1.3770060539245605, "learning_rate": 5.719178082191781e-06, "loss": 0.8851, "step": 334 }, { "epoch": 0.017216569020454313, "grad_norm": 1.4167877435684204, "learning_rate": 5.736301369863015e-06, "loss": 0.8445, "step": 335 }, { "epoch": 0.017267961763798953, "grad_norm": 1.4726027250289917, "learning_rate": 5.753424657534246e-06, "loss": 0.9331, "step": 336 }, { "epoch": 0.01731935450714359, "grad_norm": 1.5692436695098877, "learning_rate": 5.77054794520548e-06, "loss": 0.9434, "step": 337 }, { "epoch": 0.01737074725048823, "grad_norm": 1.3294492959976196, "learning_rate": 5.7876712328767125e-06, "loss": 0.8812, "step": 338 }, { "epoch": 0.01742213999383287, "grad_norm": 1.3776077032089233, "learning_rate": 5.804794520547946e-06, "loss": 0.9406, "step": 339 }, { "epoch": 0.01747353273717751, "grad_norm": 1.1556237936019897, "learning_rate": 5.821917808219179e-06, "loss": 0.7863, "step": 340 }, { "epoch": 0.01752492548052215, "grad_norm": 1.296207308769226, "learning_rate": 5.839041095890411e-06, "loss": 0.8985, "step": 341 }, { "epoch": 0.01757631822386679, "grad_norm": 0.9795817732810974, "learning_rate": 5.856164383561645e-06, "loss": 0.8046, "step": 342 }, { "epoch": 0.01762771096721143, "grad_norm": 1.393006443977356, "learning_rate": 5.8732876712328765e-06, "loss": 0.9877, "step": 343 }, { "epoch": 0.01767910371055607, "grad_norm": 1.3946648836135864, "learning_rate": 5.89041095890411e-06, "loss": 0.8758, "step": 344 }, { "epoch": 0.01773049645390071, "grad_norm": 1.7366056442260742, "learning_rate": 5.907534246575343e-06, "loss": 0.8851, "step": 345 }, { "epoch": 0.01778188919724535, "grad_norm": 1.3458216190338135, "learning_rate": 5.924657534246576e-06, "loss": 0.9078, "step": 346 }, { "epoch": 0.017833281940589987, "grad_norm": 1.3426355123519897, "learning_rate": 5.941780821917809e-06, "loss": 0.9457, "step": 347 }, { "epoch": 0.017884674683934627, "grad_norm": 1.40995454788208, "learning_rate": 5.958904109589042e-06, "loss": 0.8889, "step": 348 }, { "epoch": 0.017936067427279268, "grad_norm": 1.346014142036438, "learning_rate": 5.976027397260275e-06, "loss": 0.8812, "step": 349 }, { "epoch": 0.017987460170623908, "grad_norm": 1.356227993965149, "learning_rate": 5.993150684931507e-06, "loss": 0.9212, "step": 350 }, { "epoch": 0.018038852913968548, "grad_norm": 1.462105631828308, "learning_rate": 6.01027397260274e-06, "loss": 0.8681, "step": 351 }, { "epoch": 0.018090245657313188, "grad_norm": 1.391425609588623, "learning_rate": 6.027397260273973e-06, "loss": 0.9499, "step": 352 }, { "epoch": 0.018141638400657828, "grad_norm": 1.4402172565460205, "learning_rate": 6.044520547945206e-06, "loss": 0.9208, "step": 353 }, { "epoch": 0.018193031144002468, "grad_norm": 1.3497949838638306, "learning_rate": 6.061643835616439e-06, "loss": 0.9442, "step": 354 }, { "epoch": 0.018244423887347108, "grad_norm": 2.0256710052490234, "learning_rate": 6.078767123287672e-06, "loss": 0.933, "step": 355 }, { "epoch": 0.018295816630691745, "grad_norm": 1.3752402067184448, "learning_rate": 6.095890410958905e-06, "loss": 0.989, "step": 356 }, { "epoch": 0.018347209374036385, "grad_norm": 1.2419018745422363, "learning_rate": 6.113013698630137e-06, "loss": 0.932, "step": 357 }, { "epoch": 0.018398602117381025, "grad_norm": 1.3438737392425537, "learning_rate": 6.13013698630137e-06, "loss": 0.902, "step": 358 }, { "epoch": 0.018449994860725665, "grad_norm": 1.2710260152816772, "learning_rate": 6.147260273972603e-06, "loss": 0.9042, "step": 359 }, { "epoch": 0.018501387604070305, "grad_norm": 1.0422375202178955, "learning_rate": 6.164383561643836e-06, "loss": 0.7595, "step": 360 }, { "epoch": 0.018552780347414945, "grad_norm": 1.309249997138977, "learning_rate": 6.181506849315069e-06, "loss": 0.8895, "step": 361 }, { "epoch": 0.018604173090759586, "grad_norm": 1.3443186283111572, "learning_rate": 6.198630136986302e-06, "loss": 0.9764, "step": 362 }, { "epoch": 0.018655565834104226, "grad_norm": 1.0971837043762207, "learning_rate": 6.215753424657535e-06, "loss": 0.7716, "step": 363 }, { "epoch": 0.018706958577448866, "grad_norm": 1.3132166862487793, "learning_rate": 6.2328767123287685e-06, "loss": 0.9934, "step": 364 }, { "epoch": 0.018758351320793502, "grad_norm": 0.8206494450569153, "learning_rate": 6.25e-06, "loss": 0.8413, "step": 365 }, { "epoch": 0.018809744064138142, "grad_norm": 1.3536328077316284, "learning_rate": 6.267123287671233e-06, "loss": 0.9598, "step": 366 }, { "epoch": 0.018861136807482783, "grad_norm": 1.3234273195266724, "learning_rate": 6.284246575342466e-06, "loss": 0.9691, "step": 367 }, { "epoch": 0.018912529550827423, "grad_norm": 1.3051198720932007, "learning_rate": 6.301369863013699e-06, "loss": 1.0015, "step": 368 }, { "epoch": 0.018963922294172063, "grad_norm": 2.0231826305389404, "learning_rate": 6.3184931506849325e-06, "loss": 1.0115, "step": 369 }, { "epoch": 0.019015315037516703, "grad_norm": 1.624998688697815, "learning_rate": 6.335616438356164e-06, "loss": 0.9672, "step": 370 }, { "epoch": 0.019066707780861343, "grad_norm": 1.3017244338989258, "learning_rate": 6.352739726027398e-06, "loss": 0.8957, "step": 371 }, { "epoch": 0.019118100524205983, "grad_norm": 1.3124035596847534, "learning_rate": 6.36986301369863e-06, "loss": 0.886, "step": 372 }, { "epoch": 0.019169493267550623, "grad_norm": 1.5079002380371094, "learning_rate": 6.386986301369864e-06, "loss": 0.8884, "step": 373 }, { "epoch": 0.01922088601089526, "grad_norm": 0.9560659527778625, "learning_rate": 6.4041095890410965e-06, "loss": 0.7692, "step": 374 }, { "epoch": 0.0192722787542399, "grad_norm": 1.3236057758331299, "learning_rate": 6.421232876712329e-06, "loss": 0.8956, "step": 375 }, { "epoch": 0.01932367149758454, "grad_norm": 1.3703919649124146, "learning_rate": 6.438356164383563e-06, "loss": 0.9326, "step": 376 }, { "epoch": 0.01937506424092918, "grad_norm": 1.3508483171463013, "learning_rate": 6.455479452054794e-06, "loss": 0.915, "step": 377 }, { "epoch": 0.01942645698427382, "grad_norm": 0.864358127117157, "learning_rate": 6.472602739726028e-06, "loss": 0.775, "step": 378 }, { "epoch": 0.01947784972761846, "grad_norm": 1.316990613937378, "learning_rate": 6.4897260273972605e-06, "loss": 0.8833, "step": 379 }, { "epoch": 0.0195292424709631, "grad_norm": 1.3886998891830444, "learning_rate": 6.506849315068494e-06, "loss": 0.9576, "step": 380 }, { "epoch": 0.01958063521430774, "grad_norm": 0.8977193236351013, "learning_rate": 6.523972602739727e-06, "loss": 0.7429, "step": 381 }, { "epoch": 0.01963202795765238, "grad_norm": 1.2974063158035278, "learning_rate": 6.541095890410959e-06, "loss": 0.9836, "step": 382 }, { "epoch": 0.01968342070099702, "grad_norm": 1.2336817979812622, "learning_rate": 6.558219178082193e-06, "loss": 0.8745, "step": 383 }, { "epoch": 0.019734813444341658, "grad_norm": 1.3238983154296875, "learning_rate": 6.5753424657534245e-06, "loss": 0.9416, "step": 384 }, { "epoch": 0.019786206187686298, "grad_norm": 1.4125125408172607, "learning_rate": 6.592465753424658e-06, "loss": 0.9572, "step": 385 }, { "epoch": 0.019837598931030938, "grad_norm": 1.3972201347351074, "learning_rate": 6.609589041095891e-06, "loss": 0.9207, "step": 386 }, { "epoch": 0.019888991674375578, "grad_norm": 0.9599208831787109, "learning_rate": 6.626712328767124e-06, "loss": 0.819, "step": 387 }, { "epoch": 0.019940384417720218, "grad_norm": 1.2984942197799683, "learning_rate": 6.643835616438357e-06, "loss": 0.9084, "step": 388 }, { "epoch": 0.019991777161064858, "grad_norm": 1.420408010482788, "learning_rate": 6.66095890410959e-06, "loss": 0.9027, "step": 389 }, { "epoch": 0.020043169904409498, "grad_norm": 1.3168015480041504, "learning_rate": 6.678082191780823e-06, "loss": 0.9178, "step": 390 }, { "epoch": 0.02009456264775414, "grad_norm": 1.3716025352478027, "learning_rate": 6.695205479452055e-06, "loss": 0.9449, "step": 391 }, { "epoch": 0.02014595539109878, "grad_norm": 1.3068482875823975, "learning_rate": 6.712328767123288e-06, "loss": 0.8779, "step": 392 }, { "epoch": 0.020197348134443415, "grad_norm": 1.261688470840454, "learning_rate": 6.729452054794521e-06, "loss": 0.9063, "step": 393 }, { "epoch": 0.020248740877788055, "grad_norm": 1.3852274417877197, "learning_rate": 6.746575342465754e-06, "loss": 0.9187, "step": 394 }, { "epoch": 0.020300133621132695, "grad_norm": 1.3238940238952637, "learning_rate": 6.763698630136987e-06, "loss": 0.8916, "step": 395 }, { "epoch": 0.020351526364477335, "grad_norm": 1.2999320030212402, "learning_rate": 6.78082191780822e-06, "loss": 0.9293, "step": 396 }, { "epoch": 0.020402919107821976, "grad_norm": 1.3329417705535889, "learning_rate": 6.797945205479453e-06, "loss": 0.9258, "step": 397 }, { "epoch": 0.020454311851166616, "grad_norm": 1.3602274656295776, "learning_rate": 6.815068493150685e-06, "loss": 0.9048, "step": 398 }, { "epoch": 0.020505704594511256, "grad_norm": 1.3849248886108398, "learning_rate": 6.832191780821918e-06, "loss": 0.9595, "step": 399 }, { "epoch": 0.020557097337855896, "grad_norm": 1.168339490890503, "learning_rate": 6.849315068493151e-06, "loss": 0.7628, "step": 400 }, { "epoch": 0.020608490081200536, "grad_norm": 1.285650610923767, "learning_rate": 6.866438356164384e-06, "loss": 0.9392, "step": 401 }, { "epoch": 0.020659882824545173, "grad_norm": 1.033577561378479, "learning_rate": 6.883561643835617e-06, "loss": 0.7979, "step": 402 }, { "epoch": 0.020711275567889813, "grad_norm": 1.316782832145691, "learning_rate": 6.9006849315068505e-06, "loss": 0.9075, "step": 403 }, { "epoch": 0.020762668311234453, "grad_norm": 1.3603746891021729, "learning_rate": 6.917808219178082e-06, "loss": 0.9979, "step": 404 }, { "epoch": 0.020814061054579093, "grad_norm": 0.8924427032470703, "learning_rate": 6.9349315068493166e-06, "loss": 0.7678, "step": 405 }, { "epoch": 0.020865453797923733, "grad_norm": 1.3670562505722046, "learning_rate": 6.952054794520548e-06, "loss": 0.9005, "step": 406 }, { "epoch": 0.020916846541268373, "grad_norm": 1.270111083984375, "learning_rate": 6.969178082191781e-06, "loss": 0.9192, "step": 407 }, { "epoch": 0.020968239284613013, "grad_norm": 1.3472708463668823, "learning_rate": 6.9863013698630145e-06, "loss": 0.9591, "step": 408 }, { "epoch": 0.021019632027957653, "grad_norm": 1.2749837636947632, "learning_rate": 7.003424657534247e-06, "loss": 0.9058, "step": 409 }, { "epoch": 0.021071024771302294, "grad_norm": 1.2710347175598145, "learning_rate": 7.020547945205481e-06, "loss": 0.9332, "step": 410 }, { "epoch": 0.02112241751464693, "grad_norm": 1.3473694324493408, "learning_rate": 7.037671232876712e-06, "loss": 0.8746, "step": 411 }, { "epoch": 0.02117381025799157, "grad_norm": 1.4851934909820557, "learning_rate": 7.054794520547946e-06, "loss": 0.9648, "step": 412 }, { "epoch": 0.02122520300133621, "grad_norm": 0.9628872275352478, "learning_rate": 7.0719178082191785e-06, "loss": 0.7489, "step": 413 }, { "epoch": 0.02127659574468085, "grad_norm": 1.3260000944137573, "learning_rate": 7.089041095890411e-06, "loss": 0.9467, "step": 414 }, { "epoch": 0.02132798848802549, "grad_norm": 0.8726317286491394, "learning_rate": 7.106164383561645e-06, "loss": 0.7638, "step": 415 }, { "epoch": 0.02137938123137013, "grad_norm": 0.8331014513969421, "learning_rate": 7.123287671232877e-06, "loss": 0.7773, "step": 416 }, { "epoch": 0.02143077397471477, "grad_norm": 1.250875473022461, "learning_rate": 7.140410958904111e-06, "loss": 0.8514, "step": 417 }, { "epoch": 0.02148216671805941, "grad_norm": 1.4555552005767822, "learning_rate": 7.1575342465753425e-06, "loss": 0.9066, "step": 418 }, { "epoch": 0.02153355946140405, "grad_norm": 1.4497421979904175, "learning_rate": 7.174657534246576e-06, "loss": 0.9651, "step": 419 }, { "epoch": 0.021584952204748688, "grad_norm": 1.360961675643921, "learning_rate": 7.191780821917809e-06, "loss": 0.9091, "step": 420 }, { "epoch": 0.021636344948093328, "grad_norm": 0.9046820402145386, "learning_rate": 7.208904109589042e-06, "loss": 0.7895, "step": 421 }, { "epoch": 0.021687737691437968, "grad_norm": 0.9685566425323486, "learning_rate": 7.226027397260275e-06, "loss": 0.8109, "step": 422 }, { "epoch": 0.021739130434782608, "grad_norm": 1.4642139673233032, "learning_rate": 7.243150684931507e-06, "loss": 0.8898, "step": 423 }, { "epoch": 0.021790523178127248, "grad_norm": 0.941369891166687, "learning_rate": 7.260273972602741e-06, "loss": 0.7847, "step": 424 }, { "epoch": 0.02184191592147189, "grad_norm": 1.502695918083191, "learning_rate": 7.277397260273973e-06, "loss": 0.9316, "step": 425 }, { "epoch": 0.02189330866481653, "grad_norm": 1.435257911682129, "learning_rate": 7.294520547945206e-06, "loss": 0.86, "step": 426 }, { "epoch": 0.02194470140816117, "grad_norm": 0.8365346789360046, "learning_rate": 7.311643835616439e-06, "loss": 0.7686, "step": 427 }, { "epoch": 0.02199609415150581, "grad_norm": 1.302642583847046, "learning_rate": 7.328767123287672e-06, "loss": 0.9244, "step": 428 }, { "epoch": 0.02204748689485045, "grad_norm": 1.379760980606079, "learning_rate": 7.345890410958905e-06, "loss": 0.923, "step": 429 }, { "epoch": 0.022098879638195085, "grad_norm": 0.8242074847221375, "learning_rate": 7.3630136986301374e-06, "loss": 0.7484, "step": 430 }, { "epoch": 0.022150272381539726, "grad_norm": 1.5371723175048828, "learning_rate": 7.380136986301371e-06, "loss": 0.8992, "step": 431 }, { "epoch": 0.022201665124884366, "grad_norm": 1.2551140785217285, "learning_rate": 7.397260273972603e-06, "loss": 0.9111, "step": 432 }, { "epoch": 0.022253057868229006, "grad_norm": 1.2591822147369385, "learning_rate": 7.414383561643836e-06, "loss": 0.9252, "step": 433 }, { "epoch": 0.022304450611573646, "grad_norm": 1.4434462785720825, "learning_rate": 7.431506849315069e-06, "loss": 0.8905, "step": 434 }, { "epoch": 0.022355843354918286, "grad_norm": 1.3594499826431274, "learning_rate": 7.448630136986302e-06, "loss": 0.9449, "step": 435 }, { "epoch": 0.022407236098262926, "grad_norm": 1.3536311388015747, "learning_rate": 7.465753424657535e-06, "loss": 0.8685, "step": 436 }, { "epoch": 0.022458628841607566, "grad_norm": 1.2333133220672607, "learning_rate": 7.482876712328768e-06, "loss": 0.8983, "step": 437 }, { "epoch": 0.022510021584952206, "grad_norm": 1.5317007303237915, "learning_rate": 7.500000000000001e-06, "loss": 0.9548, "step": 438 }, { "epoch": 0.022561414328296843, "grad_norm": 1.0413998365402222, "learning_rate": 7.517123287671233e-06, "loss": 0.8, "step": 439 }, { "epoch": 0.022612807071641483, "grad_norm": 1.3448002338409424, "learning_rate": 7.534246575342466e-06, "loss": 0.952, "step": 440 }, { "epoch": 0.022664199814986123, "grad_norm": 1.3198351860046387, "learning_rate": 7.551369863013699e-06, "loss": 0.9142, "step": 441 }, { "epoch": 0.022715592558330763, "grad_norm": 0.9853560328483582, "learning_rate": 7.568493150684932e-06, "loss": 0.7682, "step": 442 }, { "epoch": 0.022766985301675403, "grad_norm": 1.3470577001571655, "learning_rate": 7.585616438356165e-06, "loss": 0.8641, "step": 443 }, { "epoch": 0.022818378045020044, "grad_norm": 1.5047709941864014, "learning_rate": 7.6027397260273985e-06, "loss": 0.8773, "step": 444 }, { "epoch": 0.022869770788364684, "grad_norm": 1.2822844982147217, "learning_rate": 7.61986301369863e-06, "loss": 0.9194, "step": 445 }, { "epoch": 0.022921163531709324, "grad_norm": 1.2968412637710571, "learning_rate": 7.636986301369864e-06, "loss": 0.877, "step": 446 }, { "epoch": 0.022972556275053964, "grad_norm": 1.3752939701080322, "learning_rate": 7.654109589041097e-06, "loss": 0.927, "step": 447 }, { "epoch": 0.0230239490183986, "grad_norm": 1.317786455154419, "learning_rate": 7.671232876712329e-06, "loss": 0.9084, "step": 448 }, { "epoch": 0.02307534176174324, "grad_norm": 1.365004539489746, "learning_rate": 7.688356164383563e-06, "loss": 0.8406, "step": 449 }, { "epoch": 0.02312673450508788, "grad_norm": 1.2234840393066406, "learning_rate": 7.705479452054794e-06, "loss": 0.8853, "step": 450 }, { "epoch": 0.02317812724843252, "grad_norm": 1.3845592737197876, "learning_rate": 7.722602739726028e-06, "loss": 0.8775, "step": 451 }, { "epoch": 0.02322951999177716, "grad_norm": 1.2836191654205322, "learning_rate": 7.739726027397261e-06, "loss": 0.8983, "step": 452 }, { "epoch": 0.0232809127351218, "grad_norm": 1.307465672492981, "learning_rate": 7.756849315068495e-06, "loss": 0.9377, "step": 453 }, { "epoch": 0.02333230547846644, "grad_norm": 1.323665976524353, "learning_rate": 7.773972602739727e-06, "loss": 0.9041, "step": 454 }, { "epoch": 0.02338369822181108, "grad_norm": 0.9895176887512207, "learning_rate": 7.791095890410958e-06, "loss": 0.781, "step": 455 }, { "epoch": 0.02343509096515572, "grad_norm": 1.4118356704711914, "learning_rate": 7.808219178082192e-06, "loss": 0.9338, "step": 456 }, { "epoch": 0.023486483708500358, "grad_norm": 1.2677648067474365, "learning_rate": 7.825342465753425e-06, "loss": 0.8752, "step": 457 }, { "epoch": 0.023537876451844998, "grad_norm": 0.8953911662101746, "learning_rate": 7.842465753424659e-06, "loss": 0.7926, "step": 458 }, { "epoch": 0.02358926919518964, "grad_norm": 0.9235230684280396, "learning_rate": 7.85958904109589e-06, "loss": 0.779, "step": 459 }, { "epoch": 0.02364066193853428, "grad_norm": 1.4078550338745117, "learning_rate": 7.876712328767124e-06, "loss": 0.8697, "step": 460 }, { "epoch": 0.02369205468187892, "grad_norm": 1.4137729406356812, "learning_rate": 7.893835616438357e-06, "loss": 0.9036, "step": 461 }, { "epoch": 0.02374344742522356, "grad_norm": 1.3060131072998047, "learning_rate": 7.910958904109591e-06, "loss": 0.889, "step": 462 }, { "epoch": 0.0237948401685682, "grad_norm": 1.2938730716705322, "learning_rate": 7.928082191780823e-06, "loss": 0.8787, "step": 463 }, { "epoch": 0.02384623291191284, "grad_norm": 1.2133954763412476, "learning_rate": 7.945205479452055e-06, "loss": 0.8951, "step": 464 }, { "epoch": 0.02389762565525748, "grad_norm": 1.0642343759536743, "learning_rate": 7.962328767123288e-06, "loss": 0.7783, "step": 465 }, { "epoch": 0.02394901839860212, "grad_norm": 1.457010269165039, "learning_rate": 7.979452054794521e-06, "loss": 0.9638, "step": 466 }, { "epoch": 0.024000411141946756, "grad_norm": 1.1099587678909302, "learning_rate": 7.996575342465755e-06, "loss": 0.7329, "step": 467 }, { "epoch": 0.024051803885291396, "grad_norm": 1.3938628435134888, "learning_rate": 8.013698630136987e-06, "loss": 0.9033, "step": 468 }, { "epoch": 0.024103196628636036, "grad_norm": 0.8189371824264526, "learning_rate": 8.03082191780822e-06, "loss": 0.7044, "step": 469 }, { "epoch": 0.024154589371980676, "grad_norm": 1.444404125213623, "learning_rate": 8.047945205479452e-06, "loss": 0.9838, "step": 470 }, { "epoch": 0.024205982115325316, "grad_norm": 1.3004415035247803, "learning_rate": 8.065068493150686e-06, "loss": 0.9098, "step": 471 }, { "epoch": 0.024257374858669956, "grad_norm": 1.2714608907699585, "learning_rate": 8.082191780821919e-06, "loss": 0.8452, "step": 472 }, { "epoch": 0.024308767602014596, "grad_norm": 0.916312575340271, "learning_rate": 8.09931506849315e-06, "loss": 0.808, "step": 473 }, { "epoch": 0.024360160345359236, "grad_norm": 1.299035906791687, "learning_rate": 8.116438356164384e-06, "loss": 0.9937, "step": 474 }, { "epoch": 0.024411553088703877, "grad_norm": 1.332391381263733, "learning_rate": 8.133561643835616e-06, "loss": 0.9253, "step": 475 }, { "epoch": 0.024462945832048513, "grad_norm": 1.3227243423461914, "learning_rate": 8.150684931506851e-06, "loss": 0.9039, "step": 476 }, { "epoch": 0.024514338575393153, "grad_norm": 0.9378124475479126, "learning_rate": 8.167808219178083e-06, "loss": 0.7843, "step": 477 }, { "epoch": 0.024565731318737793, "grad_norm": 1.27048659324646, "learning_rate": 8.184931506849316e-06, "loss": 0.8905, "step": 478 }, { "epoch": 0.024617124062082434, "grad_norm": 1.4072848558425903, "learning_rate": 8.202054794520548e-06, "loss": 0.9027, "step": 479 }, { "epoch": 0.024668516805427074, "grad_norm": 1.315452218055725, "learning_rate": 8.219178082191782e-06, "loss": 0.8958, "step": 480 }, { "epoch": 0.024719909548771714, "grad_norm": 1.2800546884536743, "learning_rate": 8.236301369863015e-06, "loss": 0.9197, "step": 481 }, { "epoch": 0.024771302292116354, "grad_norm": 0.9843120574951172, "learning_rate": 8.253424657534247e-06, "loss": 0.722, "step": 482 }, { "epoch": 0.024822695035460994, "grad_norm": 1.2540639638900757, "learning_rate": 8.27054794520548e-06, "loss": 0.8471, "step": 483 }, { "epoch": 0.024874087778805634, "grad_norm": 1.417625069618225, "learning_rate": 8.287671232876712e-06, "loss": 0.938, "step": 484 }, { "epoch": 0.02492548052215027, "grad_norm": 1.3447060585021973, "learning_rate": 8.304794520547946e-06, "loss": 0.9048, "step": 485 }, { "epoch": 0.02497687326549491, "grad_norm": 1.3468190431594849, "learning_rate": 8.32191780821918e-06, "loss": 0.95, "step": 486 }, { "epoch": 0.02502826600883955, "grad_norm": 1.3093456029891968, "learning_rate": 8.339041095890411e-06, "loss": 0.8961, "step": 487 }, { "epoch": 0.02507965875218419, "grad_norm": 1.320758581161499, "learning_rate": 8.356164383561644e-06, "loss": 0.8272, "step": 488 }, { "epoch": 0.02513105149552883, "grad_norm": 1.4054956436157227, "learning_rate": 8.373287671232876e-06, "loss": 0.9157, "step": 489 }, { "epoch": 0.02518244423887347, "grad_norm": 1.3265403509140015, "learning_rate": 8.39041095890411e-06, "loss": 0.8638, "step": 490 }, { "epoch": 0.02523383698221811, "grad_norm": 1.2834113836288452, "learning_rate": 8.407534246575343e-06, "loss": 0.8892, "step": 491 }, { "epoch": 0.02528522972556275, "grad_norm": 1.4444869756698608, "learning_rate": 8.424657534246577e-06, "loss": 0.9675, "step": 492 }, { "epoch": 0.02533662246890739, "grad_norm": 1.3552523851394653, "learning_rate": 8.441780821917808e-06, "loss": 0.8755, "step": 493 }, { "epoch": 0.02538801521225203, "grad_norm": 1.2685366868972778, "learning_rate": 8.458904109589042e-06, "loss": 0.8719, "step": 494 }, { "epoch": 0.02543940795559667, "grad_norm": 1.3015787601470947, "learning_rate": 8.476027397260275e-06, "loss": 0.8681, "step": 495 }, { "epoch": 0.02549080069894131, "grad_norm": 0.9308233261108398, "learning_rate": 8.493150684931507e-06, "loss": 0.7643, "step": 496 }, { "epoch": 0.02554219344228595, "grad_norm": 1.3988115787506104, "learning_rate": 8.51027397260274e-06, "loss": 0.9531, "step": 497 }, { "epoch": 0.02559358618563059, "grad_norm": 1.3057786226272583, "learning_rate": 8.527397260273972e-06, "loss": 0.94, "step": 498 }, { "epoch": 0.02564497892897523, "grad_norm": 1.245055079460144, "learning_rate": 8.544520547945206e-06, "loss": 0.9073, "step": 499 }, { "epoch": 0.02569637167231987, "grad_norm": 1.2349739074707031, "learning_rate": 8.56164383561644e-06, "loss": 0.8856, "step": 500 }, { "epoch": 0.02574776441566451, "grad_norm": 1.3133636713027954, "learning_rate": 8.578767123287673e-06, "loss": 0.9515, "step": 501 }, { "epoch": 0.02579915715900915, "grad_norm": 1.3320437669754028, "learning_rate": 8.595890410958905e-06, "loss": 0.9599, "step": 502 }, { "epoch": 0.02585054990235379, "grad_norm": 1.3448456525802612, "learning_rate": 8.613013698630136e-06, "loss": 0.919, "step": 503 }, { "epoch": 0.025901942645698426, "grad_norm": 1.631572961807251, "learning_rate": 8.63013698630137e-06, "loss": 0.8978, "step": 504 }, { "epoch": 0.025953335389043066, "grad_norm": 1.336680293083191, "learning_rate": 8.647260273972603e-06, "loss": 0.9386, "step": 505 }, { "epoch": 0.026004728132387706, "grad_norm": 1.5338842868804932, "learning_rate": 8.664383561643837e-06, "loss": 0.9319, "step": 506 }, { "epoch": 0.026056120875732346, "grad_norm": 1.3168104887008667, "learning_rate": 8.681506849315069e-06, "loss": 0.9353, "step": 507 }, { "epoch": 0.026107513619076986, "grad_norm": 1.422582983970642, "learning_rate": 8.698630136986302e-06, "loss": 0.8878, "step": 508 }, { "epoch": 0.026158906362421627, "grad_norm": 1.3373196125030518, "learning_rate": 8.715753424657536e-06, "loss": 0.8858, "step": 509 }, { "epoch": 0.026210299105766267, "grad_norm": 1.4007619619369507, "learning_rate": 8.732876712328769e-06, "loss": 0.9865, "step": 510 }, { "epoch": 0.026261691849110907, "grad_norm": 1.2220408916473389, "learning_rate": 8.750000000000001e-06, "loss": 0.843, "step": 511 }, { "epoch": 0.026313084592455547, "grad_norm": 1.3026251792907715, "learning_rate": 8.767123287671233e-06, "loss": 0.886, "step": 512 }, { "epoch": 0.026364477335800184, "grad_norm": 1.3168457746505737, "learning_rate": 8.784246575342466e-06, "loss": 0.8811, "step": 513 }, { "epoch": 0.026415870079144824, "grad_norm": 0.8794660568237305, "learning_rate": 8.8013698630137e-06, "loss": 0.745, "step": 514 }, { "epoch": 0.026467262822489464, "grad_norm": 1.1745655536651611, "learning_rate": 8.818493150684933e-06, "loss": 0.8788, "step": 515 }, { "epoch": 0.026518655565834104, "grad_norm": 1.6665301322937012, "learning_rate": 8.835616438356165e-06, "loss": 0.9026, "step": 516 }, { "epoch": 0.026570048309178744, "grad_norm": 0.81757652759552, "learning_rate": 8.852739726027398e-06, "loss": 0.7922, "step": 517 }, { "epoch": 0.026621441052523384, "grad_norm": 1.3101197481155396, "learning_rate": 8.86986301369863e-06, "loss": 0.9025, "step": 518 }, { "epoch": 0.026672833795868024, "grad_norm": 1.31574285030365, "learning_rate": 8.886986301369864e-06, "loss": 0.8853, "step": 519 }, { "epoch": 0.026724226539212664, "grad_norm": 1.3258775472640991, "learning_rate": 8.904109589041097e-06, "loss": 0.9031, "step": 520 }, { "epoch": 0.026775619282557304, "grad_norm": 1.3346577882766724, "learning_rate": 8.921232876712329e-06, "loss": 0.8933, "step": 521 }, { "epoch": 0.02682701202590194, "grad_norm": 1.2473400831222534, "learning_rate": 8.938356164383562e-06, "loss": 0.895, "step": 522 }, { "epoch": 0.02687840476924658, "grad_norm": 1.393160104751587, "learning_rate": 8.955479452054794e-06, "loss": 0.9309, "step": 523 }, { "epoch": 0.02692979751259122, "grad_norm": 1.2653206586837769, "learning_rate": 8.972602739726028e-06, "loss": 0.8854, "step": 524 }, { "epoch": 0.02698119025593586, "grad_norm": 1.2554978132247925, "learning_rate": 8.989726027397261e-06, "loss": 0.8798, "step": 525 }, { "epoch": 0.0270325829992805, "grad_norm": 1.053855061531067, "learning_rate": 9.006849315068495e-06, "loss": 0.7405, "step": 526 }, { "epoch": 0.02708397574262514, "grad_norm": 1.3175573348999023, "learning_rate": 9.023972602739726e-06, "loss": 0.9455, "step": 527 }, { "epoch": 0.027135368485969782, "grad_norm": 1.4462525844573975, "learning_rate": 9.04109589041096e-06, "loss": 0.8866, "step": 528 }, { "epoch": 0.027186761229314422, "grad_norm": 1.3384977579116821, "learning_rate": 9.058219178082193e-06, "loss": 0.928, "step": 529 }, { "epoch": 0.027238153972659062, "grad_norm": 1.3296003341674805, "learning_rate": 9.075342465753425e-06, "loss": 0.9485, "step": 530 }, { "epoch": 0.0272895467160037, "grad_norm": 1.2948641777038574, "learning_rate": 9.092465753424659e-06, "loss": 0.9269, "step": 531 }, { "epoch": 0.02734093945934834, "grad_norm": 1.2651878595352173, "learning_rate": 9.10958904109589e-06, "loss": 0.8796, "step": 532 }, { "epoch": 0.02739233220269298, "grad_norm": 1.3631980419158936, "learning_rate": 9.126712328767124e-06, "loss": 0.875, "step": 533 }, { "epoch": 0.02744372494603762, "grad_norm": 1.3452153205871582, "learning_rate": 9.143835616438357e-06, "loss": 0.9005, "step": 534 }, { "epoch": 0.02749511768938226, "grad_norm": 1.597040057182312, "learning_rate": 9.160958904109591e-06, "loss": 0.936, "step": 535 }, { "epoch": 0.0275465104327269, "grad_norm": 0.9114028811454773, "learning_rate": 9.178082191780823e-06, "loss": 0.7766, "step": 536 }, { "epoch": 0.02759790317607154, "grad_norm": 1.4212180376052856, "learning_rate": 9.195205479452054e-06, "loss": 0.8899, "step": 537 }, { "epoch": 0.02764929591941618, "grad_norm": 0.9036937355995178, "learning_rate": 9.212328767123288e-06, "loss": 0.7256, "step": 538 }, { "epoch": 0.02770068866276082, "grad_norm": 1.7179898023605347, "learning_rate": 9.229452054794521e-06, "loss": 0.8575, "step": 539 }, { "epoch": 0.027752081406105456, "grad_norm": 1.0034892559051514, "learning_rate": 9.246575342465755e-06, "loss": 0.7628, "step": 540 }, { "epoch": 0.027803474149450096, "grad_norm": 1.3620257377624512, "learning_rate": 9.263698630136987e-06, "loss": 0.8688, "step": 541 }, { "epoch": 0.027854866892794736, "grad_norm": 1.2762998342514038, "learning_rate": 9.28082191780822e-06, "loss": 0.8426, "step": 542 }, { "epoch": 0.027906259636139377, "grad_norm": 1.327167272567749, "learning_rate": 9.297945205479454e-06, "loss": 0.9141, "step": 543 }, { "epoch": 0.027957652379484017, "grad_norm": 1.724090337753296, "learning_rate": 9.315068493150685e-06, "loss": 0.9725, "step": 544 }, { "epoch": 0.028009045122828657, "grad_norm": 1.2443183660507202, "learning_rate": 9.332191780821919e-06, "loss": 0.9013, "step": 545 }, { "epoch": 0.028060437866173297, "grad_norm": 1.4725433588027954, "learning_rate": 9.34931506849315e-06, "loss": 0.9119, "step": 546 }, { "epoch": 0.028111830609517937, "grad_norm": 1.154873013496399, "learning_rate": 9.366438356164384e-06, "loss": 0.9116, "step": 547 }, { "epoch": 0.028163223352862577, "grad_norm": 1.2435468435287476, "learning_rate": 9.383561643835618e-06, "loss": 0.9111, "step": 548 }, { "epoch": 0.028214616096207217, "grad_norm": 1.277804970741272, "learning_rate": 9.400684931506851e-06, "loss": 0.816, "step": 549 }, { "epoch": 0.028266008839551854, "grad_norm": 1.4964183568954468, "learning_rate": 9.417808219178083e-06, "loss": 0.9148, "step": 550 }, { "epoch": 0.028317401582896494, "grad_norm": 0.9280702471733093, "learning_rate": 9.434931506849316e-06, "loss": 0.7587, "step": 551 }, { "epoch": 0.028368794326241134, "grad_norm": 1.3291866779327393, "learning_rate": 9.452054794520548e-06, "loss": 0.9138, "step": 552 }, { "epoch": 0.028420187069585774, "grad_norm": 1.3056268692016602, "learning_rate": 9.469178082191782e-06, "loss": 0.9014, "step": 553 }, { "epoch": 0.028471579812930414, "grad_norm": 1.3049315214157104, "learning_rate": 9.486301369863015e-06, "loss": 0.9186, "step": 554 }, { "epoch": 0.028522972556275054, "grad_norm": 0.9101473689079285, "learning_rate": 9.503424657534247e-06, "loss": 0.7749, "step": 555 }, { "epoch": 0.028574365299619695, "grad_norm": 1.24942946434021, "learning_rate": 9.52054794520548e-06, "loss": 0.9414, "step": 556 }, { "epoch": 0.028625758042964335, "grad_norm": 0.7815028429031372, "learning_rate": 9.537671232876712e-06, "loss": 0.7474, "step": 557 }, { "epoch": 0.028677150786308975, "grad_norm": 1.3021060228347778, "learning_rate": 9.554794520547946e-06, "loss": 0.904, "step": 558 }, { "epoch": 0.02872854352965361, "grad_norm": 1.4569242000579834, "learning_rate": 9.571917808219179e-06, "loss": 0.9555, "step": 559 }, { "epoch": 0.02877993627299825, "grad_norm": 1.3382290601730347, "learning_rate": 9.589041095890411e-06, "loss": 0.9057, "step": 560 }, { "epoch": 0.02883132901634289, "grad_norm": 1.2955886125564575, "learning_rate": 9.606164383561644e-06, "loss": 0.9103, "step": 561 }, { "epoch": 0.02888272175968753, "grad_norm": 0.9059459567070007, "learning_rate": 9.623287671232878e-06, "loss": 0.7646, "step": 562 }, { "epoch": 0.028934114503032172, "grad_norm": 1.2799419164657593, "learning_rate": 9.640410958904111e-06, "loss": 0.9653, "step": 563 }, { "epoch": 0.028985507246376812, "grad_norm": 1.4008582830429077, "learning_rate": 9.657534246575343e-06, "loss": 0.9203, "step": 564 }, { "epoch": 0.029036899989721452, "grad_norm": 1.3185688257217407, "learning_rate": 9.674657534246577e-06, "loss": 0.9308, "step": 565 }, { "epoch": 0.029088292733066092, "grad_norm": 1.2655415534973145, "learning_rate": 9.691780821917808e-06, "loss": 0.9055, "step": 566 }, { "epoch": 0.029139685476410732, "grad_norm": 1.2992969751358032, "learning_rate": 9.708904109589042e-06, "loss": 0.9107, "step": 567 }, { "epoch": 0.02919107821975537, "grad_norm": 0.9581859707832336, "learning_rate": 9.726027397260275e-06, "loss": 0.8054, "step": 568 }, { "epoch": 0.02924247096310001, "grad_norm": 1.3011211156845093, "learning_rate": 9.743150684931507e-06, "loss": 0.9577, "step": 569 }, { "epoch": 0.02929386370644465, "grad_norm": 0.9357772469520569, "learning_rate": 9.76027397260274e-06, "loss": 0.7503, "step": 570 }, { "epoch": 0.02934525644978929, "grad_norm": 0.9294967651367188, "learning_rate": 9.777397260273972e-06, "loss": 0.7447, "step": 571 }, { "epoch": 0.02939664919313393, "grad_norm": 1.2695695161819458, "learning_rate": 9.794520547945206e-06, "loss": 0.8848, "step": 572 }, { "epoch": 0.02944804193647857, "grad_norm": 0.9333198070526123, "learning_rate": 9.81164383561644e-06, "loss": 0.7458, "step": 573 }, { "epoch": 0.02949943467982321, "grad_norm": 1.3401907682418823, "learning_rate": 9.828767123287673e-06, "loss": 0.8545, "step": 574 }, { "epoch": 0.02955082742316785, "grad_norm": 1.2322946786880493, "learning_rate": 9.845890410958905e-06, "loss": 0.899, "step": 575 }, { "epoch": 0.02960222016651249, "grad_norm": 1.2715824842453003, "learning_rate": 9.863013698630138e-06, "loss": 0.8651, "step": 576 }, { "epoch": 0.029653612909857126, "grad_norm": 1.333211064338684, "learning_rate": 9.880136986301372e-06, "loss": 0.9101, "step": 577 }, { "epoch": 0.029705005653201767, "grad_norm": 1.2988038063049316, "learning_rate": 9.897260273972603e-06, "loss": 0.88, "step": 578 }, { "epoch": 0.029756398396546407, "grad_norm": 1.460774302482605, "learning_rate": 9.914383561643837e-06, "loss": 0.9268, "step": 579 }, { "epoch": 0.029807791139891047, "grad_norm": 1.265076756477356, "learning_rate": 9.931506849315069e-06, "loss": 0.8335, "step": 580 }, { "epoch": 0.029859183883235687, "grad_norm": 0.943951427936554, "learning_rate": 9.948630136986302e-06, "loss": 0.7368, "step": 581 }, { "epoch": 0.029910576626580327, "grad_norm": 1.2957152128219604, "learning_rate": 9.965753424657536e-06, "loss": 0.9397, "step": 582 }, { "epoch": 0.029961969369924967, "grad_norm": 1.28718101978302, "learning_rate": 9.982876712328769e-06, "loss": 0.9139, "step": 583 }, { "epoch": 0.030013362113269607, "grad_norm": 1.2743099927902222, "learning_rate": 1e-05, "loss": 0.9163, "step": 584 }, { "epoch": 0.030064754856614247, "grad_norm": 1.2341121435165405, "learning_rate": 9.999999930735318e-06, "loss": 0.9249, "step": 585 }, { "epoch": 0.030116147599958887, "grad_norm": 0.8864800333976746, "learning_rate": 9.99999972294127e-06, "loss": 0.7115, "step": 586 }, { "epoch": 0.030167540343303524, "grad_norm": 1.2989847660064697, "learning_rate": 9.999999376617863e-06, "loss": 0.8635, "step": 587 }, { "epoch": 0.030218933086648164, "grad_norm": 1.0578948259353638, "learning_rate": 9.99999889176511e-06, "loss": 0.7805, "step": 588 }, { "epoch": 0.030270325829992804, "grad_norm": 1.2896292209625244, "learning_rate": 9.999998268383018e-06, "loss": 0.9167, "step": 589 }, { "epoch": 0.030321718573337444, "grad_norm": 1.466029167175293, "learning_rate": 9.99999750647161e-06, "loss": 0.9525, "step": 590 }, { "epoch": 0.030373111316682085, "grad_norm": 1.366807222366333, "learning_rate": 9.999996606030905e-06, "loss": 0.875, "step": 591 }, { "epoch": 0.030424504060026725, "grad_norm": 1.3337703943252563, "learning_rate": 9.999995567060927e-06, "loss": 0.9312, "step": 592 }, { "epoch": 0.030475896803371365, "grad_norm": 1.2864301204681396, "learning_rate": 9.999994389561704e-06, "loss": 0.906, "step": 593 }, { "epoch": 0.030527289546716005, "grad_norm": 1.3428853750228882, "learning_rate": 9.999993073533273e-06, "loss": 0.8645, "step": 594 }, { "epoch": 0.030578682290060645, "grad_norm": 1.2185192108154297, "learning_rate": 9.999991618975667e-06, "loss": 0.9117, "step": 595 }, { "epoch": 0.03063007503340528, "grad_norm": 1.3805981874465942, "learning_rate": 9.999990025888925e-06, "loss": 0.9444, "step": 596 }, { "epoch": 0.030681467776749922, "grad_norm": 1.3172290325164795, "learning_rate": 9.999988294273095e-06, "loss": 0.9011, "step": 597 }, { "epoch": 0.030732860520094562, "grad_norm": 1.5256799459457397, "learning_rate": 9.999986424128224e-06, "loss": 0.9358, "step": 598 }, { "epoch": 0.030784253263439202, "grad_norm": 1.2593270540237427, "learning_rate": 9.999984415454362e-06, "loss": 0.9033, "step": 599 }, { "epoch": 0.030835646006783842, "grad_norm": 1.292752742767334, "learning_rate": 9.999982268251565e-06, "loss": 0.8908, "step": 600 }, { "epoch": 0.030887038750128482, "grad_norm": 1.512018084526062, "learning_rate": 9.999979982519892e-06, "loss": 0.9191, "step": 601 }, { "epoch": 0.030938431493473122, "grad_norm": 1.3083957433700562, "learning_rate": 9.99997755825941e-06, "loss": 0.9545, "step": 602 }, { "epoch": 0.030989824236817762, "grad_norm": 1.592584490776062, "learning_rate": 9.99997499547018e-06, "loss": 0.9306, "step": 603 }, { "epoch": 0.031041216980162403, "grad_norm": 1.25839102268219, "learning_rate": 9.99997229415228e-06, "loss": 0.8516, "step": 604 }, { "epoch": 0.03109260972350704, "grad_norm": 1.3436771631240845, "learning_rate": 9.99996945430578e-06, "loss": 0.9107, "step": 605 }, { "epoch": 0.03114400246685168, "grad_norm": 1.5542758703231812, "learning_rate": 9.99996647593076e-06, "loss": 0.9661, "step": 606 }, { "epoch": 0.03119539521019632, "grad_norm": 1.3693493604660034, "learning_rate": 9.999963359027303e-06, "loss": 0.8683, "step": 607 }, { "epoch": 0.03124678795354096, "grad_norm": 1.2629454135894775, "learning_rate": 9.999960103595495e-06, "loss": 0.8985, "step": 608 }, { "epoch": 0.0312981806968856, "grad_norm": 1.2224509716033936, "learning_rate": 9.999956709635427e-06, "loss": 0.8551, "step": 609 }, { "epoch": 0.03134957344023024, "grad_norm": 1.2754219770431519, "learning_rate": 9.99995317714719e-06, "loss": 0.9594, "step": 610 }, { "epoch": 0.03140096618357488, "grad_norm": 1.2681502103805542, "learning_rate": 9.999949506130886e-06, "loss": 0.8778, "step": 611 }, { "epoch": 0.03145235892691952, "grad_norm": 1.3756245374679565, "learning_rate": 9.999945696586613e-06, "loss": 0.8867, "step": 612 }, { "epoch": 0.03150375167026416, "grad_norm": 1.2620686292648315, "learning_rate": 9.99994174851448e-06, "loss": 0.9525, "step": 613 }, { "epoch": 0.0315551444136088, "grad_norm": 1.2894796133041382, "learning_rate": 9.999937661914593e-06, "loss": 0.8999, "step": 614 }, { "epoch": 0.03160653715695344, "grad_norm": 1.348724365234375, "learning_rate": 9.999933436787068e-06, "loss": 0.9606, "step": 615 }, { "epoch": 0.03165792990029808, "grad_norm": 1.2647407054901123, "learning_rate": 9.999929073132022e-06, "loss": 0.846, "step": 616 }, { "epoch": 0.03170932264364272, "grad_norm": 1.4494421482086182, "learning_rate": 9.999924570949573e-06, "loss": 0.8995, "step": 617 }, { "epoch": 0.03176071538698736, "grad_norm": 1.3663712739944458, "learning_rate": 9.999919930239847e-06, "loss": 0.9398, "step": 618 }, { "epoch": 0.031812108130331994, "grad_norm": 1.2619757652282715, "learning_rate": 9.999915151002976e-06, "loss": 0.9251, "step": 619 }, { "epoch": 0.031863500873676634, "grad_norm": 1.265712857246399, "learning_rate": 9.999910233239087e-06, "loss": 0.925, "step": 620 }, { "epoch": 0.031914893617021274, "grad_norm": 1.3284708261489868, "learning_rate": 9.999905176948321e-06, "loss": 0.9401, "step": 621 }, { "epoch": 0.031966286360365914, "grad_norm": 1.212500810623169, "learning_rate": 9.999899982130814e-06, "loss": 0.9478, "step": 622 }, { "epoch": 0.032017679103710554, "grad_norm": 1.14016592502594, "learning_rate": 9.999894648786713e-06, "loss": 0.7327, "step": 623 }, { "epoch": 0.032069071847055194, "grad_norm": 1.397247314453125, "learning_rate": 9.999889176916164e-06, "loss": 0.9271, "step": 624 }, { "epoch": 0.032120464590399835, "grad_norm": 1.258782982826233, "learning_rate": 9.99988356651932e-06, "loss": 0.8644, "step": 625 }, { "epoch": 0.032171857333744475, "grad_norm": 1.7937285900115967, "learning_rate": 9.999877817596336e-06, "loss": 0.9062, "step": 626 }, { "epoch": 0.032223250077089115, "grad_norm": 1.538447618484497, "learning_rate": 9.999871930147369e-06, "loss": 0.8829, "step": 627 }, { "epoch": 0.032274642820433755, "grad_norm": 1.2555336952209473, "learning_rate": 9.999865904172585e-06, "loss": 0.8701, "step": 628 }, { "epoch": 0.032326035563778395, "grad_norm": 1.2921088933944702, "learning_rate": 9.999859739672151e-06, "loss": 0.9332, "step": 629 }, { "epoch": 0.032377428307123035, "grad_norm": 1.2067265510559082, "learning_rate": 9.999853436646237e-06, "loss": 0.8662, "step": 630 }, { "epoch": 0.032428821050467675, "grad_norm": 1.271276831626892, "learning_rate": 9.999846995095016e-06, "loss": 0.8084, "step": 631 }, { "epoch": 0.032480213793812315, "grad_norm": 1.3928368091583252, "learning_rate": 9.99984041501867e-06, "loss": 0.868, "step": 632 }, { "epoch": 0.032531606537156955, "grad_norm": 1.2404413223266602, "learning_rate": 9.999833696417376e-06, "loss": 0.9162, "step": 633 }, { "epoch": 0.032582999280501596, "grad_norm": 1.3089007139205933, "learning_rate": 9.999826839291325e-06, "loss": 0.8752, "step": 634 }, { "epoch": 0.032634392023846236, "grad_norm": 1.3305209875106812, "learning_rate": 9.999819843640706e-06, "loss": 0.8634, "step": 635 }, { "epoch": 0.032685784767190876, "grad_norm": 1.265342116355896, "learning_rate": 9.999812709465711e-06, "loss": 0.8658, "step": 636 }, { "epoch": 0.03273717751053551, "grad_norm": 1.3872308731079102, "learning_rate": 9.99980543676654e-06, "loss": 0.8684, "step": 637 }, { "epoch": 0.03278857025388015, "grad_norm": 1.2806648015975952, "learning_rate": 9.999798025543393e-06, "loss": 0.9003, "step": 638 }, { "epoch": 0.03283996299722479, "grad_norm": 1.2658412456512451, "learning_rate": 9.999790475796475e-06, "loss": 0.8383, "step": 639 }, { "epoch": 0.03289135574056943, "grad_norm": 1.6125788688659668, "learning_rate": 9.999782787525995e-06, "loss": 0.8849, "step": 640 }, { "epoch": 0.03294274848391407, "grad_norm": 1.1932075023651123, "learning_rate": 9.99977496073217e-06, "loss": 0.8996, "step": 641 }, { "epoch": 0.03299414122725871, "grad_norm": 1.2844421863555908, "learning_rate": 9.99976699541521e-06, "loss": 0.8828, "step": 642 }, { "epoch": 0.03304553397060335, "grad_norm": 1.053963541984558, "learning_rate": 9.999758891575342e-06, "loss": 0.6973, "step": 643 }, { "epoch": 0.03309692671394799, "grad_norm": 1.3341604471206665, "learning_rate": 9.999750649212787e-06, "loss": 0.898, "step": 644 }, { "epoch": 0.03314831945729263, "grad_norm": 1.2578569650650024, "learning_rate": 9.999742268327774e-06, "loss": 0.7745, "step": 645 }, { "epoch": 0.03319971220063727, "grad_norm": 1.382563591003418, "learning_rate": 9.999733748920537e-06, "loss": 0.9263, "step": 646 }, { "epoch": 0.03325110494398191, "grad_norm": 1.3626760244369507, "learning_rate": 9.999725090991308e-06, "loss": 0.9254, "step": 647 }, { "epoch": 0.03330249768732655, "grad_norm": 0.9993988871574402, "learning_rate": 9.999716294540331e-06, "loss": 0.7546, "step": 648 }, { "epoch": 0.03335389043067119, "grad_norm": 0.9365474581718445, "learning_rate": 9.999707359567847e-06, "loss": 0.7375, "step": 649 }, { "epoch": 0.03340528317401583, "grad_norm": 1.3606112003326416, "learning_rate": 9.999698286074107e-06, "loss": 0.8897, "step": 650 }, { "epoch": 0.03345667591736047, "grad_norm": 1.243239164352417, "learning_rate": 9.999689074059358e-06, "loss": 0.8646, "step": 651 }, { "epoch": 0.03350806866070511, "grad_norm": 1.31145179271698, "learning_rate": 9.999679723523857e-06, "loss": 0.889, "step": 652 }, { "epoch": 0.03355946140404975, "grad_norm": 1.3311896324157715, "learning_rate": 9.999670234467864e-06, "loss": 0.9205, "step": 653 }, { "epoch": 0.03361085414739439, "grad_norm": 1.4149863719940186, "learning_rate": 9.999660606891642e-06, "loss": 0.8216, "step": 654 }, { "epoch": 0.03366224689073903, "grad_norm": 1.2291474342346191, "learning_rate": 9.999650840795456e-06, "loss": 0.8304, "step": 655 }, { "epoch": 0.033713639634083664, "grad_norm": 1.907629370689392, "learning_rate": 9.999640936179578e-06, "loss": 0.9434, "step": 656 }, { "epoch": 0.033765032377428304, "grad_norm": 1.2146872282028198, "learning_rate": 9.99963089304428e-06, "loss": 0.8204, "step": 657 }, { "epoch": 0.033816425120772944, "grad_norm": 1.2001131772994995, "learning_rate": 9.999620711389846e-06, "loss": 0.9003, "step": 658 }, { "epoch": 0.033867817864117584, "grad_norm": 1.2128374576568604, "learning_rate": 9.999610391216552e-06, "loss": 0.8561, "step": 659 }, { "epoch": 0.033919210607462225, "grad_norm": 1.2235902547836304, "learning_rate": 9.999599932524686e-06, "loss": 0.8785, "step": 660 }, { "epoch": 0.033970603350806865, "grad_norm": 1.276451587677002, "learning_rate": 9.99958933531454e-06, "loss": 0.9213, "step": 661 }, { "epoch": 0.034021996094151505, "grad_norm": 1.3629931211471558, "learning_rate": 9.999578599586403e-06, "loss": 0.845, "step": 662 }, { "epoch": 0.034073388837496145, "grad_norm": 1.4644831418991089, "learning_rate": 9.999567725340576e-06, "loss": 0.9308, "step": 663 }, { "epoch": 0.034124781580840785, "grad_norm": 1.230907917022705, "learning_rate": 9.99955671257736e-06, "loss": 0.8848, "step": 664 }, { "epoch": 0.034176174324185425, "grad_norm": 1.3106567859649658, "learning_rate": 9.999545561297056e-06, "loss": 0.8804, "step": 665 }, { "epoch": 0.034227567067530065, "grad_norm": 1.1782751083374023, "learning_rate": 9.999534271499982e-06, "loss": 0.7919, "step": 666 }, { "epoch": 0.034278959810874705, "grad_norm": 1.2492984533309937, "learning_rate": 9.999522843186442e-06, "loss": 0.8357, "step": 667 }, { "epoch": 0.034330352554219346, "grad_norm": 1.1943573951721191, "learning_rate": 9.999511276356755e-06, "loss": 0.9432, "step": 668 }, { "epoch": 0.034381745297563986, "grad_norm": 1.321894645690918, "learning_rate": 9.999499571011244e-06, "loss": 0.9181, "step": 669 }, { "epoch": 0.034433138040908626, "grad_norm": 1.2689727544784546, "learning_rate": 9.999487727150232e-06, "loss": 0.9105, "step": 670 }, { "epoch": 0.034484530784253266, "grad_norm": 1.1956273317337036, "learning_rate": 9.999475744774046e-06, "loss": 0.8256, "step": 671 }, { "epoch": 0.034535923527597906, "grad_norm": 1.2164257764816284, "learning_rate": 9.999463623883017e-06, "loss": 0.8054, "step": 672 }, { "epoch": 0.034587316270942546, "grad_norm": 0.8890982270240784, "learning_rate": 9.999451364477487e-06, "loss": 0.735, "step": 673 }, { "epoch": 0.03463870901428718, "grad_norm": 1.3526277542114258, "learning_rate": 9.999438966557787e-06, "loss": 0.9192, "step": 674 }, { "epoch": 0.03469010175763182, "grad_norm": 1.432132363319397, "learning_rate": 9.999426430124266e-06, "loss": 0.8689, "step": 675 }, { "epoch": 0.03474149450097646, "grad_norm": 0.9870145320892334, "learning_rate": 9.999413755177269e-06, "loss": 0.7423, "step": 676 }, { "epoch": 0.0347928872443211, "grad_norm": 0.8942204117774963, "learning_rate": 9.999400941717151e-06, "loss": 0.7597, "step": 677 }, { "epoch": 0.03484427998766574, "grad_norm": 1.3862026929855347, "learning_rate": 9.999387989744262e-06, "loss": 0.8977, "step": 678 }, { "epoch": 0.03489567273101038, "grad_norm": 1.256982684135437, "learning_rate": 9.999374899258964e-06, "loss": 0.8729, "step": 679 }, { "epoch": 0.03494706547435502, "grad_norm": 1.264243721961975, "learning_rate": 9.999361670261618e-06, "loss": 0.8814, "step": 680 }, { "epoch": 0.03499845821769966, "grad_norm": 0.9732556343078613, "learning_rate": 9.999348302752592e-06, "loss": 0.8128, "step": 681 }, { "epoch": 0.0350498509610443, "grad_norm": 1.3299076557159424, "learning_rate": 9.999334796732255e-06, "loss": 0.9228, "step": 682 }, { "epoch": 0.03510124370438894, "grad_norm": 0.9515122175216675, "learning_rate": 9.999321152200982e-06, "loss": 0.7468, "step": 683 }, { "epoch": 0.03515263644773358, "grad_norm": 1.2781256437301636, "learning_rate": 9.99930736915915e-06, "loss": 0.9191, "step": 684 }, { "epoch": 0.03520402919107822, "grad_norm": 1.3453145027160645, "learning_rate": 9.999293447607143e-06, "loss": 0.8895, "step": 685 }, { "epoch": 0.03525542193442286, "grad_norm": 1.3089325428009033, "learning_rate": 9.999279387545346e-06, "loss": 0.919, "step": 686 }, { "epoch": 0.0353068146777675, "grad_norm": 1.3162271976470947, "learning_rate": 9.999265188974149e-06, "loss": 0.9113, "step": 687 }, { "epoch": 0.03535820742111214, "grad_norm": 1.1579227447509766, "learning_rate": 9.99925085189394e-06, "loss": 0.8468, "step": 688 }, { "epoch": 0.03540960016445678, "grad_norm": 1.318410873413086, "learning_rate": 9.999236376305123e-06, "loss": 0.8899, "step": 689 }, { "epoch": 0.03546099290780142, "grad_norm": 1.266695499420166, "learning_rate": 9.999221762208098e-06, "loss": 0.8224, "step": 690 }, { "epoch": 0.03551238565114606, "grad_norm": 1.184025764465332, "learning_rate": 9.999207009603266e-06, "loss": 0.8368, "step": 691 }, { "epoch": 0.0355637783944907, "grad_norm": 1.245678186416626, "learning_rate": 9.99919211849104e-06, "loss": 0.8565, "step": 692 }, { "epoch": 0.035615171137835334, "grad_norm": 1.2107875347137451, "learning_rate": 9.99917708887183e-06, "loss": 0.8637, "step": 693 }, { "epoch": 0.035666563881179975, "grad_norm": 1.2797000408172607, "learning_rate": 9.999161920746055e-06, "loss": 0.7954, "step": 694 }, { "epoch": 0.035717956624524615, "grad_norm": 0.9214054346084595, "learning_rate": 9.99914661411413e-06, "loss": 0.7751, "step": 695 }, { "epoch": 0.035769349367869255, "grad_norm": 1.3945088386535645, "learning_rate": 9.999131168976485e-06, "loss": 0.9364, "step": 696 }, { "epoch": 0.035820742111213895, "grad_norm": 1.3009486198425293, "learning_rate": 9.999115585333546e-06, "loss": 0.8712, "step": 697 }, { "epoch": 0.035872134854558535, "grad_norm": 1.2121648788452148, "learning_rate": 9.999099863185743e-06, "loss": 0.8373, "step": 698 }, { "epoch": 0.035923527597903175, "grad_norm": 1.3146964311599731, "learning_rate": 9.999084002533513e-06, "loss": 0.9365, "step": 699 }, { "epoch": 0.035974920341247815, "grad_norm": 1.4433462619781494, "learning_rate": 9.999068003377296e-06, "loss": 0.8918, "step": 700 }, { "epoch": 0.036026313084592455, "grad_norm": 1.297878623008728, "learning_rate": 9.999051865717535e-06, "loss": 0.8625, "step": 701 }, { "epoch": 0.036077705827937095, "grad_norm": 1.3019086122512817, "learning_rate": 9.999035589554675e-06, "loss": 0.9551, "step": 702 }, { "epoch": 0.036129098571281736, "grad_norm": 1.201297402381897, "learning_rate": 9.99901917488917e-06, "loss": 0.8528, "step": 703 }, { "epoch": 0.036180491314626376, "grad_norm": 1.2464736700057983, "learning_rate": 9.999002621721473e-06, "loss": 0.901, "step": 704 }, { "epoch": 0.036231884057971016, "grad_norm": 1.7273004055023193, "learning_rate": 9.998985930052042e-06, "loss": 0.7966, "step": 705 }, { "epoch": 0.036283276801315656, "grad_norm": 1.2638431787490845, "learning_rate": 9.998969099881341e-06, "loss": 0.967, "step": 706 }, { "epoch": 0.036334669544660296, "grad_norm": 1.2900207042694092, "learning_rate": 9.998952131209836e-06, "loss": 0.8496, "step": 707 }, { "epoch": 0.036386062288004936, "grad_norm": 0.8974463939666748, "learning_rate": 9.998935024037998e-06, "loss": 0.748, "step": 708 }, { "epoch": 0.036437455031349576, "grad_norm": 1.2598536014556885, "learning_rate": 9.998917778366299e-06, "loss": 0.8948, "step": 709 }, { "epoch": 0.036488847774694216, "grad_norm": 1.433349370956421, "learning_rate": 9.998900394195217e-06, "loss": 0.8779, "step": 710 }, { "epoch": 0.03654024051803885, "grad_norm": 1.191897988319397, "learning_rate": 9.998882871525234e-06, "loss": 0.8856, "step": 711 }, { "epoch": 0.03659163326138349, "grad_norm": 1.0730814933776855, "learning_rate": 9.998865210356839e-06, "loss": 0.7432, "step": 712 }, { "epoch": 0.03664302600472813, "grad_norm": 1.4379756450653076, "learning_rate": 9.998847410690515e-06, "loss": 0.9208, "step": 713 }, { "epoch": 0.03669441874807277, "grad_norm": 1.2651309967041016, "learning_rate": 9.998829472526758e-06, "loss": 0.8665, "step": 714 }, { "epoch": 0.03674581149141741, "grad_norm": 1.178600549697876, "learning_rate": 9.998811395866067e-06, "loss": 0.8208, "step": 715 }, { "epoch": 0.03679720423476205, "grad_norm": 1.254520297050476, "learning_rate": 9.99879318070894e-06, "loss": 0.8775, "step": 716 }, { "epoch": 0.03684859697810669, "grad_norm": 1.3362984657287598, "learning_rate": 9.998774827055884e-06, "loss": 0.916, "step": 717 }, { "epoch": 0.03689998972145133, "grad_norm": 1.2543765306472778, "learning_rate": 9.998756334907404e-06, "loss": 0.8354, "step": 718 }, { "epoch": 0.03695138246479597, "grad_norm": 1.393196702003479, "learning_rate": 9.998737704264017e-06, "loss": 0.8723, "step": 719 }, { "epoch": 0.03700277520814061, "grad_norm": 1.2626738548278809, "learning_rate": 9.998718935126236e-06, "loss": 0.8213, "step": 720 }, { "epoch": 0.03705416795148525, "grad_norm": 1.3131617307662964, "learning_rate": 9.99870002749458e-06, "loss": 0.8805, "step": 721 }, { "epoch": 0.03710556069482989, "grad_norm": 1.271390438079834, "learning_rate": 9.998680981369577e-06, "loss": 0.8748, "step": 722 }, { "epoch": 0.03715695343817453, "grad_norm": 1.1442046165466309, "learning_rate": 9.998661796751751e-06, "loss": 0.8429, "step": 723 }, { "epoch": 0.03720834618151917, "grad_norm": 0.9757936596870422, "learning_rate": 9.998642473641638e-06, "loss": 0.7885, "step": 724 }, { "epoch": 0.03725973892486381, "grad_norm": 1.2529871463775635, "learning_rate": 9.998623012039768e-06, "loss": 0.9013, "step": 725 }, { "epoch": 0.03731113166820845, "grad_norm": 1.3068562746047974, "learning_rate": 9.99860341194668e-06, "loss": 0.9318, "step": 726 }, { "epoch": 0.03736252441155309, "grad_norm": 0.9531645774841309, "learning_rate": 9.998583673362922e-06, "loss": 0.7402, "step": 727 }, { "epoch": 0.03741391715489773, "grad_norm": 1.2776199579238892, "learning_rate": 9.998563796289038e-06, "loss": 0.9033, "step": 728 }, { "epoch": 0.03746530989824237, "grad_norm": 0.8968835473060608, "learning_rate": 9.99854378072558e-06, "loss": 0.7731, "step": 729 }, { "epoch": 0.037516702641587005, "grad_norm": 1.2996851205825806, "learning_rate": 9.9985236266731e-06, "loss": 0.8855, "step": 730 }, { "epoch": 0.037568095384931645, "grad_norm": 1.360831618309021, "learning_rate": 9.998503334132159e-06, "loss": 0.859, "step": 731 }, { "epoch": 0.037619488128276285, "grad_norm": 1.271178126335144, "learning_rate": 9.998482903103318e-06, "loss": 0.8943, "step": 732 }, { "epoch": 0.037670880871620925, "grad_norm": 1.3013023138046265, "learning_rate": 9.998462333587143e-06, "loss": 0.9497, "step": 733 }, { "epoch": 0.037722273614965565, "grad_norm": 1.2942689657211304, "learning_rate": 9.998441625584206e-06, "loss": 0.9516, "step": 734 }, { "epoch": 0.037773666358310205, "grad_norm": 1.2302870750427246, "learning_rate": 9.998420779095076e-06, "loss": 0.9122, "step": 735 }, { "epoch": 0.037825059101654845, "grad_norm": 0.7748158574104309, "learning_rate": 9.998399794120335e-06, "loss": 0.7811, "step": 736 }, { "epoch": 0.037876451844999486, "grad_norm": 1.2053734064102173, "learning_rate": 9.998378670660563e-06, "loss": 0.8755, "step": 737 }, { "epoch": 0.037927844588344126, "grad_norm": 1.5758365392684937, "learning_rate": 9.998357408716345e-06, "loss": 0.95, "step": 738 }, { "epoch": 0.037979237331688766, "grad_norm": 1.298250436782837, "learning_rate": 9.998336008288269e-06, "loss": 0.7766, "step": 739 }, { "epoch": 0.038030630075033406, "grad_norm": 1.288628101348877, "learning_rate": 9.99831446937693e-06, "loss": 0.8274, "step": 740 }, { "epoch": 0.038082022818378046, "grad_norm": 1.2925089597702026, "learning_rate": 9.998292791982924e-06, "loss": 0.9455, "step": 741 }, { "epoch": 0.038133415561722686, "grad_norm": 1.236112117767334, "learning_rate": 9.998270976106852e-06, "loss": 0.937, "step": 742 }, { "epoch": 0.038184808305067326, "grad_norm": 0.9334130883216858, "learning_rate": 9.998249021749317e-06, "loss": 0.7603, "step": 743 }, { "epoch": 0.038236201048411966, "grad_norm": 0.9988341927528381, "learning_rate": 9.998226928910928e-06, "loss": 0.7624, "step": 744 }, { "epoch": 0.038287593791756606, "grad_norm": 1.3378779888153076, "learning_rate": 9.998204697592298e-06, "loss": 0.8701, "step": 745 }, { "epoch": 0.03833898653510125, "grad_norm": 1.3172721862792969, "learning_rate": 9.998182327794042e-06, "loss": 0.8697, "step": 746 }, { "epoch": 0.03839037927844589, "grad_norm": 1.5073113441467285, "learning_rate": 9.99815981951678e-06, "loss": 0.8773, "step": 747 }, { "epoch": 0.03844177202179052, "grad_norm": 1.2601313591003418, "learning_rate": 9.998137172761136e-06, "loss": 0.8611, "step": 748 }, { "epoch": 0.03849316476513516, "grad_norm": 1.2902367115020752, "learning_rate": 9.998114387527736e-06, "loss": 0.8753, "step": 749 }, { "epoch": 0.0385445575084798, "grad_norm": 1.341047763824463, "learning_rate": 9.998091463817214e-06, "loss": 0.8848, "step": 750 }, { "epoch": 0.03859595025182444, "grad_norm": 1.2246860265731812, "learning_rate": 9.9980684016302e-06, "loss": 0.8842, "step": 751 }, { "epoch": 0.03864734299516908, "grad_norm": 1.3612843751907349, "learning_rate": 9.99804520096734e-06, "loss": 0.9267, "step": 752 }, { "epoch": 0.03869873573851372, "grad_norm": 1.2748205661773682, "learning_rate": 9.998021861829272e-06, "loss": 0.9499, "step": 753 }, { "epoch": 0.03875012848185836, "grad_norm": 1.2747546434402466, "learning_rate": 9.997998384216645e-06, "loss": 0.9278, "step": 754 }, { "epoch": 0.038801521225203, "grad_norm": 1.2689528465270996, "learning_rate": 9.997974768130106e-06, "loss": 0.8811, "step": 755 }, { "epoch": 0.03885291396854764, "grad_norm": 1.2552950382232666, "learning_rate": 9.997951013570312e-06, "loss": 0.925, "step": 756 }, { "epoch": 0.03890430671189228, "grad_norm": 1.4981372356414795, "learning_rate": 9.997927120537923e-06, "loss": 0.8707, "step": 757 }, { "epoch": 0.03895569945523692, "grad_norm": 1.0191279649734497, "learning_rate": 9.997903089033596e-06, "loss": 0.7708, "step": 758 }, { "epoch": 0.03900709219858156, "grad_norm": 1.2214558124542236, "learning_rate": 9.997878919058001e-06, "loss": 0.8456, "step": 759 }, { "epoch": 0.0390584849419262, "grad_norm": 0.8248938918113708, "learning_rate": 9.997854610611805e-06, "loss": 0.7085, "step": 760 }, { "epoch": 0.03910987768527084, "grad_norm": 1.341586709022522, "learning_rate": 9.997830163695685e-06, "loss": 0.8779, "step": 761 }, { "epoch": 0.03916127042861548, "grad_norm": 1.238930344581604, "learning_rate": 9.997805578310313e-06, "loss": 0.8587, "step": 762 }, { "epoch": 0.03921266317196012, "grad_norm": 1.192710280418396, "learning_rate": 9.997780854456376e-06, "loss": 0.7974, "step": 763 }, { "epoch": 0.03926405591530476, "grad_norm": 1.2870088815689087, "learning_rate": 9.997755992134554e-06, "loss": 0.8906, "step": 764 }, { "epoch": 0.0393154486586494, "grad_norm": 0.9952512979507446, "learning_rate": 9.99773099134554e-06, "loss": 0.7892, "step": 765 }, { "epoch": 0.03936684140199404, "grad_norm": 1.2427223920822144, "learning_rate": 9.997705852090023e-06, "loss": 0.8255, "step": 766 }, { "epoch": 0.039418234145338675, "grad_norm": 0.998755931854248, "learning_rate": 9.9976805743687e-06, "loss": 0.7362, "step": 767 }, { "epoch": 0.039469626888683315, "grad_norm": 1.1804059743881226, "learning_rate": 9.997655158182274e-06, "loss": 0.8316, "step": 768 }, { "epoch": 0.039521019632027955, "grad_norm": 1.2887158393859863, "learning_rate": 9.99762960353145e-06, "loss": 0.9099, "step": 769 }, { "epoch": 0.039572412375372595, "grad_norm": 1.2008178234100342, "learning_rate": 9.99760391041693e-06, "loss": 0.8307, "step": 770 }, { "epoch": 0.039623805118717235, "grad_norm": 1.2601547241210938, "learning_rate": 9.99757807883943e-06, "loss": 0.9175, "step": 771 }, { "epoch": 0.039675197862061876, "grad_norm": 1.3057823181152344, "learning_rate": 9.997552108799667e-06, "loss": 0.8564, "step": 772 }, { "epoch": 0.039726590605406516, "grad_norm": 1.3248363733291626, "learning_rate": 9.997526000298357e-06, "loss": 0.9223, "step": 773 }, { "epoch": 0.039777983348751156, "grad_norm": 1.6168568134307861, "learning_rate": 9.997499753336225e-06, "loss": 0.9364, "step": 774 }, { "epoch": 0.039829376092095796, "grad_norm": 0.9902132153511047, "learning_rate": 9.997473367914002e-06, "loss": 0.769, "step": 775 }, { "epoch": 0.039880768835440436, "grad_norm": 1.4149781465530396, "learning_rate": 9.997446844032412e-06, "loss": 0.8625, "step": 776 }, { "epoch": 0.039932161578785076, "grad_norm": 1.4048004150390625, "learning_rate": 9.997420181692194e-06, "loss": 0.8149, "step": 777 }, { "epoch": 0.039983554322129716, "grad_norm": 1.2902519702911377, "learning_rate": 9.997393380894085e-06, "loss": 0.8474, "step": 778 }, { "epoch": 0.040034947065474356, "grad_norm": 1.215725302696228, "learning_rate": 9.997366441638829e-06, "loss": 0.8152, "step": 779 }, { "epoch": 0.040086339808818996, "grad_norm": 1.2510340213775635, "learning_rate": 9.997339363927172e-06, "loss": 0.8811, "step": 780 }, { "epoch": 0.04013773255216364, "grad_norm": 1.3313243389129639, "learning_rate": 9.997312147759864e-06, "loss": 0.8737, "step": 781 }, { "epoch": 0.04018912529550828, "grad_norm": 1.2366669178009033, "learning_rate": 9.99728479313766e-06, "loss": 0.8386, "step": 782 }, { "epoch": 0.04024051803885292, "grad_norm": 1.2371160984039307, "learning_rate": 9.997257300061316e-06, "loss": 0.874, "step": 783 }, { "epoch": 0.04029191078219756, "grad_norm": 1.276847243309021, "learning_rate": 9.997229668531595e-06, "loss": 0.8282, "step": 784 }, { "epoch": 0.04034330352554219, "grad_norm": 1.2422157526016235, "learning_rate": 9.997201898549262e-06, "loss": 0.8679, "step": 785 }, { "epoch": 0.04039469626888683, "grad_norm": 1.412040114402771, "learning_rate": 9.997173990115085e-06, "loss": 0.8773, "step": 786 }, { "epoch": 0.04044608901223147, "grad_norm": 1.271793007850647, "learning_rate": 9.997145943229843e-06, "loss": 0.9256, "step": 787 }, { "epoch": 0.04049748175557611, "grad_norm": 1.2193374633789062, "learning_rate": 9.997117757894306e-06, "loss": 0.8797, "step": 788 }, { "epoch": 0.04054887449892075, "grad_norm": 1.1020599603652954, "learning_rate": 9.997089434109257e-06, "loss": 0.7367, "step": 789 }, { "epoch": 0.04060026724226539, "grad_norm": 1.3232066631317139, "learning_rate": 9.997060971875483e-06, "loss": 0.8628, "step": 790 }, { "epoch": 0.04065165998561003, "grad_norm": 1.250009298324585, "learning_rate": 9.997032371193771e-06, "loss": 0.892, "step": 791 }, { "epoch": 0.04070305272895467, "grad_norm": 1.2399975061416626, "learning_rate": 9.997003632064914e-06, "loss": 0.8727, "step": 792 }, { "epoch": 0.04075444547229931, "grad_norm": 1.1934688091278076, "learning_rate": 9.996974754489707e-06, "loss": 0.7545, "step": 793 }, { "epoch": 0.04080583821564395, "grad_norm": 0.8413489460945129, "learning_rate": 9.99694573846895e-06, "loss": 0.7173, "step": 794 }, { "epoch": 0.04085723095898859, "grad_norm": 1.244056224822998, "learning_rate": 9.996916584003448e-06, "loss": 0.8666, "step": 795 }, { "epoch": 0.04090862370233323, "grad_norm": 1.2638477087020874, "learning_rate": 9.996887291094011e-06, "loss": 0.8823, "step": 796 }, { "epoch": 0.04096001644567787, "grad_norm": 1.3062065839767456, "learning_rate": 9.996857859741447e-06, "loss": 0.8987, "step": 797 }, { "epoch": 0.04101140918902251, "grad_norm": 1.2934308052062988, "learning_rate": 9.996828289946571e-06, "loss": 0.9168, "step": 798 }, { "epoch": 0.04106280193236715, "grad_norm": 3.6413021087646484, "learning_rate": 9.996798581710205e-06, "loss": 0.8263, "step": 799 }, { "epoch": 0.04111419467571179, "grad_norm": 1.3083852529525757, "learning_rate": 9.99676873503317e-06, "loss": 0.8921, "step": 800 }, { "epoch": 0.04116558741905643, "grad_norm": 1.2100480794906616, "learning_rate": 9.996738749916294e-06, "loss": 0.9232, "step": 801 }, { "epoch": 0.04121698016240107, "grad_norm": 1.220504879951477, "learning_rate": 9.99670862636041e-06, "loss": 0.8708, "step": 802 }, { "epoch": 0.04126837290574571, "grad_norm": 1.1817091703414917, "learning_rate": 9.996678364366347e-06, "loss": 0.7598, "step": 803 }, { "epoch": 0.041319765649090345, "grad_norm": 1.2372924089431763, "learning_rate": 9.996647963934946e-06, "loss": 0.8688, "step": 804 }, { "epoch": 0.041371158392434985, "grad_norm": 1.2500534057617188, "learning_rate": 9.996617425067052e-06, "loss": 0.8714, "step": 805 }, { "epoch": 0.041422551135779626, "grad_norm": 0.849418580532074, "learning_rate": 9.99658674776351e-06, "loss": 0.7227, "step": 806 }, { "epoch": 0.041473943879124266, "grad_norm": 1.2983394861221313, "learning_rate": 9.996555932025167e-06, "loss": 0.8657, "step": 807 }, { "epoch": 0.041525336622468906, "grad_norm": 1.3199512958526611, "learning_rate": 9.99652497785288e-06, "loss": 0.8479, "step": 808 }, { "epoch": 0.041576729365813546, "grad_norm": 1.2835640907287598, "learning_rate": 9.996493885247504e-06, "loss": 0.9049, "step": 809 }, { "epoch": 0.041628122109158186, "grad_norm": 1.361087441444397, "learning_rate": 9.996462654209903e-06, "loss": 0.8891, "step": 810 }, { "epoch": 0.041679514852502826, "grad_norm": 1.259198546409607, "learning_rate": 9.99643128474094e-06, "loss": 0.8741, "step": 811 }, { "epoch": 0.041730907595847466, "grad_norm": 1.2737828493118286, "learning_rate": 9.996399776841484e-06, "loss": 0.8768, "step": 812 }, { "epoch": 0.041782300339192106, "grad_norm": 1.2765504121780396, "learning_rate": 9.99636813051241e-06, "loss": 0.922, "step": 813 }, { "epoch": 0.041833693082536746, "grad_norm": 1.311645269393921, "learning_rate": 9.996336345754597e-06, "loss": 0.9281, "step": 814 }, { "epoch": 0.04188508582588139, "grad_norm": 1.3025455474853516, "learning_rate": 9.996304422568919e-06, "loss": 0.8609, "step": 815 }, { "epoch": 0.04193647856922603, "grad_norm": 0.9871379137039185, "learning_rate": 9.996272360956265e-06, "loss": 0.7395, "step": 816 }, { "epoch": 0.04198787131257067, "grad_norm": 1.3791704177856445, "learning_rate": 9.99624016091752e-06, "loss": 0.8602, "step": 817 }, { "epoch": 0.04203926405591531, "grad_norm": 1.3354383707046509, "learning_rate": 9.996207822453583e-06, "loss": 0.9623, "step": 818 }, { "epoch": 0.04209065679925995, "grad_norm": 1.2503334283828735, "learning_rate": 9.996175345565342e-06, "loss": 0.8866, "step": 819 }, { "epoch": 0.04214204954260459, "grad_norm": 1.2029733657836914, "learning_rate": 9.996142730253701e-06, "loss": 0.8049, "step": 820 }, { "epoch": 0.04219344228594923, "grad_norm": 0.9518265724182129, "learning_rate": 9.996109976519564e-06, "loss": 0.7469, "step": 821 }, { "epoch": 0.04224483502929386, "grad_norm": 0.8854423761367798, "learning_rate": 9.996077084363836e-06, "loss": 0.7245, "step": 822 }, { "epoch": 0.0422962277726385, "grad_norm": 0.7791023850440979, "learning_rate": 9.996044053787428e-06, "loss": 0.7654, "step": 823 }, { "epoch": 0.04234762051598314, "grad_norm": 1.2723650932312012, "learning_rate": 9.996010884791258e-06, "loss": 0.8454, "step": 824 }, { "epoch": 0.04239901325932778, "grad_norm": 1.2480024099349976, "learning_rate": 9.995977577376245e-06, "loss": 0.8405, "step": 825 }, { "epoch": 0.04245040600267242, "grad_norm": 1.2302253246307373, "learning_rate": 9.995944131543311e-06, "loss": 0.8855, "step": 826 }, { "epoch": 0.04250179874601706, "grad_norm": 4.765591144561768, "learning_rate": 9.99591054729338e-06, "loss": 0.7343, "step": 827 }, { "epoch": 0.0425531914893617, "grad_norm": 0.977520227432251, "learning_rate": 9.995876824627386e-06, "loss": 0.7399, "step": 828 }, { "epoch": 0.04260458423270634, "grad_norm": 1.3106623888015747, "learning_rate": 9.995842963546261e-06, "loss": 0.9051, "step": 829 }, { "epoch": 0.04265597697605098, "grad_norm": 1.2262039184570312, "learning_rate": 9.995808964050946e-06, "loss": 0.9119, "step": 830 }, { "epoch": 0.04270736971939562, "grad_norm": 1.2501107454299927, "learning_rate": 9.99577482614238e-06, "loss": 0.9084, "step": 831 }, { "epoch": 0.04275876246274026, "grad_norm": 1.253807544708252, "learning_rate": 9.99574054982151e-06, "loss": 0.9051, "step": 832 }, { "epoch": 0.0428101552060849, "grad_norm": 1.3432157039642334, "learning_rate": 9.995706135089283e-06, "loss": 0.8671, "step": 833 }, { "epoch": 0.04286154794942954, "grad_norm": 1.2076985836029053, "learning_rate": 9.995671581946658e-06, "loss": 0.8541, "step": 834 }, { "epoch": 0.04291294069277418, "grad_norm": 1.402858018875122, "learning_rate": 9.995636890394588e-06, "loss": 0.826, "step": 835 }, { "epoch": 0.04296433343611882, "grad_norm": 1.1987099647521973, "learning_rate": 9.995602060434036e-06, "loss": 0.883, "step": 836 }, { "epoch": 0.04301572617946346, "grad_norm": 1.6601828336715698, "learning_rate": 9.995567092065967e-06, "loss": 0.8607, "step": 837 }, { "epoch": 0.0430671189228081, "grad_norm": 1.201905369758606, "learning_rate": 9.99553198529135e-06, "loss": 0.8441, "step": 838 }, { "epoch": 0.04311851166615274, "grad_norm": 1.1625796556472778, "learning_rate": 9.995496740111155e-06, "loss": 0.8303, "step": 839 }, { "epoch": 0.043169904409497376, "grad_norm": 1.228757381439209, "learning_rate": 9.995461356526362e-06, "loss": 0.8511, "step": 840 }, { "epoch": 0.043221297152842016, "grad_norm": 1.1575579643249512, "learning_rate": 9.99542583453795e-06, "loss": 0.9641, "step": 841 }, { "epoch": 0.043272689896186656, "grad_norm": 1.2157249450683594, "learning_rate": 9.995390174146901e-06, "loss": 0.8376, "step": 842 }, { "epoch": 0.043324082639531296, "grad_norm": 1.3628737926483154, "learning_rate": 9.995354375354207e-06, "loss": 0.7744, "step": 843 }, { "epoch": 0.043375475382875936, "grad_norm": 1.309718132019043, "learning_rate": 9.995318438160858e-06, "loss": 0.9427, "step": 844 }, { "epoch": 0.043426868126220576, "grad_norm": 1.284839153289795, "learning_rate": 9.995282362567848e-06, "loss": 0.8414, "step": 845 }, { "epoch": 0.043478260869565216, "grad_norm": 1.5498034954071045, "learning_rate": 9.99524614857618e-06, "loss": 0.847, "step": 846 }, { "epoch": 0.043529653612909856, "grad_norm": 0.9326204061508179, "learning_rate": 9.995209796186854e-06, "loss": 0.7553, "step": 847 }, { "epoch": 0.043581046356254496, "grad_norm": 0.8366790413856506, "learning_rate": 9.99517330540088e-06, "loss": 0.7461, "step": 848 }, { "epoch": 0.043632439099599137, "grad_norm": 1.3054695129394531, "learning_rate": 9.995136676219265e-06, "loss": 0.8842, "step": 849 }, { "epoch": 0.04368383184294378, "grad_norm": 0.9366014003753662, "learning_rate": 9.99509990864303e-06, "loss": 0.7269, "step": 850 }, { "epoch": 0.04373522458628842, "grad_norm": 1.3300738334655762, "learning_rate": 9.995063002673186e-06, "loss": 0.9163, "step": 851 }, { "epoch": 0.04378661732963306, "grad_norm": 0.9600208401679993, "learning_rate": 9.995025958310762e-06, "loss": 0.8206, "step": 852 }, { "epoch": 0.0438380100729777, "grad_norm": 1.2376947402954102, "learning_rate": 9.994988775556782e-06, "loss": 0.91, "step": 853 }, { "epoch": 0.04388940281632234, "grad_norm": 1.197518229484558, "learning_rate": 9.994951454412276e-06, "loss": 0.8496, "step": 854 }, { "epoch": 0.04394079555966698, "grad_norm": 1.2506026029586792, "learning_rate": 9.994913994878276e-06, "loss": 0.8787, "step": 855 }, { "epoch": 0.04399218830301162, "grad_norm": 1.1781127452850342, "learning_rate": 9.994876396955827e-06, "loss": 0.878, "step": 856 }, { "epoch": 0.04404358104635626, "grad_norm": 1.1351370811462402, "learning_rate": 9.994838660645961e-06, "loss": 0.8494, "step": 857 }, { "epoch": 0.0440949737897009, "grad_norm": 1.331215262413025, "learning_rate": 9.99480078594973e-06, "loss": 0.8764, "step": 858 }, { "epoch": 0.04414636653304553, "grad_norm": 1.2526382207870483, "learning_rate": 9.994762772868181e-06, "loss": 0.8266, "step": 859 }, { "epoch": 0.04419775927639017, "grad_norm": 0.9349003434181213, "learning_rate": 9.994724621402367e-06, "loss": 0.7088, "step": 860 }, { "epoch": 0.04424915201973481, "grad_norm": 1.250608205795288, "learning_rate": 9.994686331553347e-06, "loss": 0.9288, "step": 861 }, { "epoch": 0.04430054476307945, "grad_norm": 1.1939023733139038, "learning_rate": 9.99464790332218e-06, "loss": 0.9197, "step": 862 }, { "epoch": 0.04435193750642409, "grad_norm": 0.9250991344451904, "learning_rate": 9.994609336709932e-06, "loss": 0.7363, "step": 863 }, { "epoch": 0.04440333024976873, "grad_norm": 1.230297565460205, "learning_rate": 9.994570631717672e-06, "loss": 0.8224, "step": 864 }, { "epoch": 0.04445472299311337, "grad_norm": 1.2914111614227295, "learning_rate": 9.994531788346468e-06, "loss": 0.8489, "step": 865 }, { "epoch": 0.04450611573645801, "grad_norm": 1.2369592189788818, "learning_rate": 9.994492806597402e-06, "loss": 0.857, "step": 866 }, { "epoch": 0.04455750847980265, "grad_norm": 1.362882375717163, "learning_rate": 9.99445368647155e-06, "loss": 0.8636, "step": 867 }, { "epoch": 0.04460890122314729, "grad_norm": 1.0259376764297485, "learning_rate": 9.994414427969999e-06, "loss": 0.7544, "step": 868 }, { "epoch": 0.04466029396649193, "grad_norm": 1.288793683052063, "learning_rate": 9.994375031093833e-06, "loss": 0.9061, "step": 869 }, { "epoch": 0.04471168670983657, "grad_norm": 0.7983279228210449, "learning_rate": 9.994335495844145e-06, "loss": 0.765, "step": 870 }, { "epoch": 0.04476307945318121, "grad_norm": 1.2740875482559204, "learning_rate": 9.994295822222032e-06, "loss": 0.8894, "step": 871 }, { "epoch": 0.04481447219652585, "grad_norm": 0.8956534266471863, "learning_rate": 9.994256010228592e-06, "loss": 0.7289, "step": 872 }, { "epoch": 0.04486586493987049, "grad_norm": 0.9120482802391052, "learning_rate": 9.994216059864928e-06, "loss": 0.7659, "step": 873 }, { "epoch": 0.04491725768321513, "grad_norm": 1.2172009944915771, "learning_rate": 9.994175971132147e-06, "loss": 0.814, "step": 874 }, { "epoch": 0.04496865042655977, "grad_norm": 1.3630836009979248, "learning_rate": 9.99413574403136e-06, "loss": 0.9081, "step": 875 }, { "epoch": 0.04502004316990441, "grad_norm": 1.2624825239181519, "learning_rate": 9.994095378563679e-06, "loss": 0.8703, "step": 876 }, { "epoch": 0.045071435913249046, "grad_norm": 1.2351304292678833, "learning_rate": 9.994054874730227e-06, "loss": 0.865, "step": 877 }, { "epoch": 0.045122828656593686, "grad_norm": 1.1374231576919556, "learning_rate": 9.994014232532123e-06, "loss": 0.8449, "step": 878 }, { "epoch": 0.045174221399938326, "grad_norm": 1.2344890832901, "learning_rate": 9.993973451970493e-06, "loss": 0.9139, "step": 879 }, { "epoch": 0.045225614143282966, "grad_norm": 1.2068499326705933, "learning_rate": 9.993932533046469e-06, "loss": 0.858, "step": 880 }, { "epoch": 0.045277006886627606, "grad_norm": 1.2650738954544067, "learning_rate": 9.993891475761181e-06, "loss": 0.8465, "step": 881 }, { "epoch": 0.045328399629972246, "grad_norm": 1.2620521783828735, "learning_rate": 9.99385028011577e-06, "loss": 0.8735, "step": 882 }, { "epoch": 0.045379792373316886, "grad_norm": 1.1206152439117432, "learning_rate": 9.993808946111376e-06, "loss": 0.8119, "step": 883 }, { "epoch": 0.04543118511666153, "grad_norm": 1.4009485244750977, "learning_rate": 9.993767473749145e-06, "loss": 0.861, "step": 884 }, { "epoch": 0.04548257786000617, "grad_norm": 1.3079158067703247, "learning_rate": 9.993725863030224e-06, "loss": 0.8456, "step": 885 }, { "epoch": 0.04553397060335081, "grad_norm": 1.1648167371749878, "learning_rate": 9.993684113955769e-06, "loss": 0.935, "step": 886 }, { "epoch": 0.04558536334669545, "grad_norm": 1.1732007265090942, "learning_rate": 9.993642226526934e-06, "loss": 0.923, "step": 887 }, { "epoch": 0.04563675609004009, "grad_norm": 1.198224425315857, "learning_rate": 9.99360020074488e-06, "loss": 0.8903, "step": 888 }, { "epoch": 0.04568814883338473, "grad_norm": 1.4259309768676758, "learning_rate": 9.99355803661077e-06, "loss": 0.9653, "step": 889 }, { "epoch": 0.04573954157672937, "grad_norm": 1.2178943157196045, "learning_rate": 9.993515734125777e-06, "loss": 0.9011, "step": 890 }, { "epoch": 0.04579093432007401, "grad_norm": 1.1884068250656128, "learning_rate": 9.99347329329107e-06, "loss": 0.8365, "step": 891 }, { "epoch": 0.04584232706341865, "grad_norm": 1.3186900615692139, "learning_rate": 9.993430714107823e-06, "loss": 0.8671, "step": 892 }, { "epoch": 0.04589371980676329, "grad_norm": 1.2285939455032349, "learning_rate": 9.99338799657722e-06, "loss": 0.8665, "step": 893 }, { "epoch": 0.04594511255010793, "grad_norm": 1.2262039184570312, "learning_rate": 9.99334514070044e-06, "loss": 0.8828, "step": 894 }, { "epoch": 0.04599650529345257, "grad_norm": 1.2611392736434937, "learning_rate": 9.993302146478673e-06, "loss": 0.8595, "step": 895 }, { "epoch": 0.0460478980367972, "grad_norm": 1.150963544845581, "learning_rate": 9.99325901391311e-06, "loss": 0.7621, "step": 896 }, { "epoch": 0.04609929078014184, "grad_norm": 1.3334376811981201, "learning_rate": 9.993215743004947e-06, "loss": 0.9106, "step": 897 }, { "epoch": 0.04615068352348648, "grad_norm": 1.216707706451416, "learning_rate": 9.993172333755379e-06, "loss": 0.8735, "step": 898 }, { "epoch": 0.04620207626683112, "grad_norm": 1.1890424489974976, "learning_rate": 9.993128786165613e-06, "loss": 0.8786, "step": 899 }, { "epoch": 0.04625346901017576, "grad_norm": 1.1397758722305298, "learning_rate": 9.993085100236852e-06, "loss": 0.8319, "step": 900 }, { "epoch": 0.0463048617535204, "grad_norm": 1.268941044807434, "learning_rate": 9.99304127597031e-06, "loss": 0.8706, "step": 901 }, { "epoch": 0.04635625449686504, "grad_norm": 1.1660728454589844, "learning_rate": 9.992997313367199e-06, "loss": 0.8663, "step": 902 }, { "epoch": 0.04640764724020968, "grad_norm": 1.2808856964111328, "learning_rate": 9.992953212428738e-06, "loss": 0.8469, "step": 903 }, { "epoch": 0.04645903998355432, "grad_norm": 1.2366071939468384, "learning_rate": 9.992908973156145e-06, "loss": 0.8916, "step": 904 }, { "epoch": 0.04651043272689896, "grad_norm": 1.21945321559906, "learning_rate": 9.992864595550653e-06, "loss": 0.8588, "step": 905 }, { "epoch": 0.0465618254702436, "grad_norm": 0.7959684729576111, "learning_rate": 9.992820079613484e-06, "loss": 0.7348, "step": 906 }, { "epoch": 0.04661321821358824, "grad_norm": 1.2446109056472778, "learning_rate": 9.992775425345877e-06, "loss": 0.8705, "step": 907 }, { "epoch": 0.04666461095693288, "grad_norm": 1.2366207838058472, "learning_rate": 9.992730632749065e-06, "loss": 0.8227, "step": 908 }, { "epoch": 0.04671600370027752, "grad_norm": 1.2123759984970093, "learning_rate": 9.992685701824292e-06, "loss": 0.8488, "step": 909 }, { "epoch": 0.04676739644362216, "grad_norm": 0.9096382260322571, "learning_rate": 9.992640632572802e-06, "loss": 0.7521, "step": 910 }, { "epoch": 0.0468187891869668, "grad_norm": 0.8697991967201233, "learning_rate": 9.992595424995843e-06, "loss": 0.7857, "step": 911 }, { "epoch": 0.04687018193031144, "grad_norm": 1.2709344625473022, "learning_rate": 9.99255007909467e-06, "loss": 0.8836, "step": 912 }, { "epoch": 0.04692157467365608, "grad_norm": 1.2813383340835571, "learning_rate": 9.992504594870535e-06, "loss": 0.8368, "step": 913 }, { "epoch": 0.046972967417000716, "grad_norm": 1.4158188104629517, "learning_rate": 9.992458972324702e-06, "loss": 0.9485, "step": 914 }, { "epoch": 0.047024360160345356, "grad_norm": 1.161807656288147, "learning_rate": 9.992413211458431e-06, "loss": 0.8275, "step": 915 }, { "epoch": 0.047075752903689996, "grad_norm": 1.1344105005264282, "learning_rate": 9.992367312272995e-06, "loss": 0.854, "step": 916 }, { "epoch": 0.047127145647034636, "grad_norm": 1.260993480682373, "learning_rate": 9.992321274769661e-06, "loss": 0.869, "step": 917 }, { "epoch": 0.04717853839037928, "grad_norm": 1.3351908922195435, "learning_rate": 9.992275098949709e-06, "loss": 0.8803, "step": 918 }, { "epoch": 0.04722993113372392, "grad_norm": 1.2215328216552734, "learning_rate": 9.992228784814414e-06, "loss": 0.8758, "step": 919 }, { "epoch": 0.04728132387706856, "grad_norm": 1.2368913888931274, "learning_rate": 9.99218233236506e-06, "loss": 0.8615, "step": 920 }, { "epoch": 0.0473327166204132, "grad_norm": 0.9433385133743286, "learning_rate": 9.992135741602937e-06, "loss": 0.7385, "step": 921 }, { "epoch": 0.04738410936375784, "grad_norm": 1.1929795742034912, "learning_rate": 9.992089012529335e-06, "loss": 0.8962, "step": 922 }, { "epoch": 0.04743550210710248, "grad_norm": 1.2160569429397583, "learning_rate": 9.992042145145547e-06, "loss": 0.9045, "step": 923 }, { "epoch": 0.04748689485044712, "grad_norm": 1.5480883121490479, "learning_rate": 9.99199513945287e-06, "loss": 0.8098, "step": 924 }, { "epoch": 0.04753828759379176, "grad_norm": 1.2664825916290283, "learning_rate": 9.991947995452612e-06, "loss": 0.9039, "step": 925 }, { "epoch": 0.0475896803371364, "grad_norm": 1.2778738737106323, "learning_rate": 9.991900713146073e-06, "loss": 0.8572, "step": 926 }, { "epoch": 0.04764107308048104, "grad_norm": 1.5822649002075195, "learning_rate": 9.991853292534565e-06, "loss": 0.8533, "step": 927 }, { "epoch": 0.04769246582382568, "grad_norm": 1.2170166969299316, "learning_rate": 9.991805733619405e-06, "loss": 0.9285, "step": 928 }, { "epoch": 0.04774385856717032, "grad_norm": 1.2741129398345947, "learning_rate": 9.991758036401905e-06, "loss": 0.8095, "step": 929 }, { "epoch": 0.04779525131051496, "grad_norm": 0.8206681609153748, "learning_rate": 9.991710200883391e-06, "loss": 0.7744, "step": 930 }, { "epoch": 0.0478466440538596, "grad_norm": 1.1823898553848267, "learning_rate": 9.991662227065187e-06, "loss": 0.8293, "step": 931 }, { "epoch": 0.04789803679720424, "grad_norm": 1.1964441537857056, "learning_rate": 9.991614114948623e-06, "loss": 0.8378, "step": 932 }, { "epoch": 0.04794942954054887, "grad_norm": 1.189880609512329, "learning_rate": 9.991565864535028e-06, "loss": 0.8139, "step": 933 }, { "epoch": 0.04800082228389351, "grad_norm": 1.2207995653152466, "learning_rate": 9.991517475825744e-06, "loss": 0.9251, "step": 934 }, { "epoch": 0.04805221502723815, "grad_norm": 1.1745632886886597, "learning_rate": 9.991468948822111e-06, "loss": 0.857, "step": 935 }, { "epoch": 0.04810360777058279, "grad_norm": 1.2542591094970703, "learning_rate": 9.99142028352547e-06, "loss": 0.9277, "step": 936 }, { "epoch": 0.04815500051392743, "grad_norm": 1.1780142784118652, "learning_rate": 9.991371479937174e-06, "loss": 0.8924, "step": 937 }, { "epoch": 0.04820639325727207, "grad_norm": 1.2028493881225586, "learning_rate": 9.99132253805857e-06, "loss": 0.8033, "step": 938 }, { "epoch": 0.04825778600061671, "grad_norm": 1.1642537117004395, "learning_rate": 9.991273457891015e-06, "loss": 0.8291, "step": 939 }, { "epoch": 0.04830917874396135, "grad_norm": 1.2911299467086792, "learning_rate": 9.991224239435873e-06, "loss": 0.8627, "step": 940 }, { "epoch": 0.04836057148730599, "grad_norm": 1.182978630065918, "learning_rate": 9.991174882694504e-06, "loss": 0.9141, "step": 941 }, { "epoch": 0.04841196423065063, "grad_norm": 1.2126566171646118, "learning_rate": 9.991125387668276e-06, "loss": 0.896, "step": 942 }, { "epoch": 0.04846335697399527, "grad_norm": 1.1788886785507202, "learning_rate": 9.99107575435856e-06, "loss": 0.9205, "step": 943 }, { "epoch": 0.04851474971733991, "grad_norm": 0.8734241127967834, "learning_rate": 9.991025982766733e-06, "loss": 0.7539, "step": 944 }, { "epoch": 0.04856614246068455, "grad_norm": 1.3815383911132812, "learning_rate": 9.990976072894172e-06, "loss": 0.8866, "step": 945 }, { "epoch": 0.04861753520402919, "grad_norm": 1.3776768445968628, "learning_rate": 9.990926024742262e-06, "loss": 0.8968, "step": 946 }, { "epoch": 0.04866892794737383, "grad_norm": 1.209430456161499, "learning_rate": 9.990875838312387e-06, "loss": 0.896, "step": 947 }, { "epoch": 0.04872032069071847, "grad_norm": 0.752593994140625, "learning_rate": 9.99082551360594e-06, "loss": 0.7264, "step": 948 }, { "epoch": 0.04877171343406311, "grad_norm": 1.249581217765808, "learning_rate": 9.990775050624312e-06, "loss": 0.8631, "step": 949 }, { "epoch": 0.04882310617740775, "grad_norm": 1.3245007991790771, "learning_rate": 9.990724449368903e-06, "loss": 0.8771, "step": 950 }, { "epoch": 0.048874498920752386, "grad_norm": 1.2435704469680786, "learning_rate": 9.990673709841117e-06, "loss": 0.888, "step": 951 }, { "epoch": 0.048925891664097027, "grad_norm": 1.1820589303970337, "learning_rate": 9.990622832042355e-06, "loss": 0.8308, "step": 952 }, { "epoch": 0.04897728440744167, "grad_norm": 0.8945339918136597, "learning_rate": 9.990571815974032e-06, "loss": 0.673, "step": 953 }, { "epoch": 0.04902867715078631, "grad_norm": 1.3911305665969849, "learning_rate": 9.990520661637559e-06, "loss": 0.8685, "step": 954 }, { "epoch": 0.04908006989413095, "grad_norm": 1.5091290473937988, "learning_rate": 9.990469369034353e-06, "loss": 0.8097, "step": 955 }, { "epoch": 0.04913146263747559, "grad_norm": 1.2043997049331665, "learning_rate": 9.990417938165834e-06, "loss": 0.8348, "step": 956 }, { "epoch": 0.04918285538082023, "grad_norm": 1.2417445182800293, "learning_rate": 9.990366369033428e-06, "loss": 0.8868, "step": 957 }, { "epoch": 0.04923424812416487, "grad_norm": 1.203045129776001, "learning_rate": 9.990314661638563e-06, "loss": 0.8806, "step": 958 }, { "epoch": 0.04928564086750951, "grad_norm": 1.1912086009979248, "learning_rate": 9.990262815982674e-06, "loss": 0.8932, "step": 959 }, { "epoch": 0.04933703361085415, "grad_norm": 1.2181487083435059, "learning_rate": 9.990210832067197e-06, "loss": 0.8746, "step": 960 }, { "epoch": 0.04938842635419879, "grad_norm": 1.251497507095337, "learning_rate": 9.99015870989357e-06, "loss": 0.8681, "step": 961 }, { "epoch": 0.04943981909754343, "grad_norm": 1.2161120176315308, "learning_rate": 9.99010644946324e-06, "loss": 0.866, "step": 962 }, { "epoch": 0.04949121184088807, "grad_norm": 1.2148690223693848, "learning_rate": 9.990054050777652e-06, "loss": 0.8245, "step": 963 }, { "epoch": 0.04954260458423271, "grad_norm": 1.3033994436264038, "learning_rate": 9.990001513838257e-06, "loss": 0.8874, "step": 964 }, { "epoch": 0.04959399732757735, "grad_norm": 1.1783607006072998, "learning_rate": 9.989948838646515e-06, "loss": 0.8553, "step": 965 }, { "epoch": 0.04964539007092199, "grad_norm": 1.2339223623275757, "learning_rate": 9.989896025203882e-06, "loss": 0.8185, "step": 966 }, { "epoch": 0.04969678281426663, "grad_norm": 1.2227520942687988, "learning_rate": 9.989843073511823e-06, "loss": 0.8142, "step": 967 }, { "epoch": 0.04974817555761127, "grad_norm": 0.8825535774230957, "learning_rate": 9.989789983571803e-06, "loss": 0.7025, "step": 968 }, { "epoch": 0.04979956830095591, "grad_norm": 0.8393763899803162, "learning_rate": 9.989736755385296e-06, "loss": 0.7113, "step": 969 }, { "epoch": 0.04985096104430054, "grad_norm": 1.344678521156311, "learning_rate": 9.989683388953772e-06, "loss": 0.8574, "step": 970 }, { "epoch": 0.04990235378764518, "grad_norm": 1.5798983573913574, "learning_rate": 9.989629884278715e-06, "loss": 0.9022, "step": 971 }, { "epoch": 0.04995374653098982, "grad_norm": 1.1685906648635864, "learning_rate": 9.989576241361606e-06, "loss": 0.8992, "step": 972 }, { "epoch": 0.05000513927433446, "grad_norm": 1.1721597909927368, "learning_rate": 9.989522460203927e-06, "loss": 0.8778, "step": 973 }, { "epoch": 0.0500565320176791, "grad_norm": 1.5442276000976562, "learning_rate": 9.989468540807173e-06, "loss": 0.7839, "step": 974 }, { "epoch": 0.05010792476102374, "grad_norm": 1.2165648937225342, "learning_rate": 9.989414483172836e-06, "loss": 0.897, "step": 975 }, { "epoch": 0.05015931750436838, "grad_norm": 1.1980541944503784, "learning_rate": 9.989360287302414e-06, "loss": 0.8156, "step": 976 }, { "epoch": 0.05021071024771302, "grad_norm": 1.239953637123108, "learning_rate": 9.989305953197407e-06, "loss": 0.8944, "step": 977 }, { "epoch": 0.05026210299105766, "grad_norm": 1.1867806911468506, "learning_rate": 9.989251480859322e-06, "loss": 0.9201, "step": 978 }, { "epoch": 0.0503134957344023, "grad_norm": 1.2487953901290894, "learning_rate": 9.989196870289668e-06, "loss": 0.8963, "step": 979 }, { "epoch": 0.05036488847774694, "grad_norm": 1.2734178304672241, "learning_rate": 9.989142121489958e-06, "loss": 0.8605, "step": 980 }, { "epoch": 0.05041628122109158, "grad_norm": 0.9181957244873047, "learning_rate": 9.98908723446171e-06, "loss": 0.7451, "step": 981 }, { "epoch": 0.05046767396443622, "grad_norm": 1.2341207265853882, "learning_rate": 9.989032209206441e-06, "loss": 0.8784, "step": 982 }, { "epoch": 0.05051906670778086, "grad_norm": 1.2605217695236206, "learning_rate": 9.98897704572568e-06, "loss": 0.8552, "step": 983 }, { "epoch": 0.0505704594511255, "grad_norm": 0.7708296179771423, "learning_rate": 9.988921744020953e-06, "loss": 0.7355, "step": 984 }, { "epoch": 0.05062185219447014, "grad_norm": 0.8412183523178101, "learning_rate": 9.988866304093794e-06, "loss": 0.7563, "step": 985 }, { "epoch": 0.05067324493781478, "grad_norm": 1.17894446849823, "learning_rate": 9.988810725945736e-06, "loss": 0.7985, "step": 986 }, { "epoch": 0.050724637681159424, "grad_norm": 1.2365716695785522, "learning_rate": 9.98875500957832e-06, "loss": 0.8291, "step": 987 }, { "epoch": 0.05077603042450406, "grad_norm": 0.9223111867904663, "learning_rate": 9.98869915499309e-06, "loss": 0.812, "step": 988 }, { "epoch": 0.0508274231678487, "grad_norm": 1.2350711822509766, "learning_rate": 9.988643162191594e-06, "loss": 0.928, "step": 989 }, { "epoch": 0.05087881591119334, "grad_norm": 1.1816688776016235, "learning_rate": 9.988587031175384e-06, "loss": 0.8543, "step": 990 }, { "epoch": 0.05093020865453798, "grad_norm": 1.224225640296936, "learning_rate": 9.98853076194601e-06, "loss": 0.8251, "step": 991 }, { "epoch": 0.05098160139788262, "grad_norm": 0.9485036134719849, "learning_rate": 9.98847435450504e-06, "loss": 0.7785, "step": 992 }, { "epoch": 0.05103299414122726, "grad_norm": 1.2254879474639893, "learning_rate": 9.988417808854029e-06, "loss": 0.8685, "step": 993 }, { "epoch": 0.0510843868845719, "grad_norm": 1.220816731452942, "learning_rate": 9.988361124994547e-06, "loss": 0.8996, "step": 994 }, { "epoch": 0.05113577962791654, "grad_norm": 1.2276394367218018, "learning_rate": 9.988304302928165e-06, "loss": 0.8858, "step": 995 }, { "epoch": 0.05118717237126118, "grad_norm": 1.2470687627792358, "learning_rate": 9.988247342656456e-06, "loss": 0.871, "step": 996 }, { "epoch": 0.05123856511460582, "grad_norm": 1.3044482469558716, "learning_rate": 9.988190244180998e-06, "loss": 0.8866, "step": 997 }, { "epoch": 0.05128995785795046, "grad_norm": 0.8551509976387024, "learning_rate": 9.988133007503374e-06, "loss": 0.7724, "step": 998 }, { "epoch": 0.0513413506012951, "grad_norm": 1.4605845212936401, "learning_rate": 9.988075632625168e-06, "loss": 0.9155, "step": 999 }, { "epoch": 0.05139274334463974, "grad_norm": 1.2902426719665527, "learning_rate": 9.988018119547971e-06, "loss": 0.7996, "step": 1000 }, { "epoch": 0.05144413608798438, "grad_norm": 1.2236751317977905, "learning_rate": 9.987960468273377e-06, "loss": 0.9907, "step": 1001 }, { "epoch": 0.05149552883132902, "grad_norm": 1.1850382089614868, "learning_rate": 9.987902678802983e-06, "loss": 0.8435, "step": 1002 }, { "epoch": 0.05154692157467366, "grad_norm": 1.1683807373046875, "learning_rate": 9.987844751138389e-06, "loss": 0.7919, "step": 1003 }, { "epoch": 0.0515983143180183, "grad_norm": 1.381160020828247, "learning_rate": 9.9877866852812e-06, "loss": 0.8719, "step": 1004 }, { "epoch": 0.05164970706136294, "grad_norm": 1.1939815282821655, "learning_rate": 9.987728481233025e-06, "loss": 0.869, "step": 1005 }, { "epoch": 0.05170109980470758, "grad_norm": 1.1784522533416748, "learning_rate": 9.987670138995478e-06, "loss": 0.8336, "step": 1006 }, { "epoch": 0.05175249254805221, "grad_norm": 1.1805102825164795, "learning_rate": 9.987611658570174e-06, "loss": 0.9248, "step": 1007 }, { "epoch": 0.05180388529139685, "grad_norm": 0.8179561495780945, "learning_rate": 9.987553039958732e-06, "loss": 0.7525, "step": 1008 }, { "epoch": 0.05185527803474149, "grad_norm": 1.22408926486969, "learning_rate": 9.98749428316278e-06, "loss": 0.8827, "step": 1009 }, { "epoch": 0.05190667077808613, "grad_norm": 0.7488641142845154, "learning_rate": 9.987435388183944e-06, "loss": 0.7476, "step": 1010 }, { "epoch": 0.05195806352143077, "grad_norm": 1.181216835975647, "learning_rate": 9.987376355023853e-06, "loss": 0.9104, "step": 1011 }, { "epoch": 0.05200945626477541, "grad_norm": 1.1705787181854248, "learning_rate": 9.987317183684146e-06, "loss": 0.8537, "step": 1012 }, { "epoch": 0.05206084900812005, "grad_norm": 1.3003146648406982, "learning_rate": 9.987257874166461e-06, "loss": 0.8231, "step": 1013 }, { "epoch": 0.05211224175146469, "grad_norm": 1.1806389093399048, "learning_rate": 9.987198426472442e-06, "loss": 0.8397, "step": 1014 }, { "epoch": 0.05216363449480933, "grad_norm": 1.2405699491500854, "learning_rate": 9.987138840603735e-06, "loss": 0.8915, "step": 1015 }, { "epoch": 0.05221502723815397, "grad_norm": 1.2919827699661255, "learning_rate": 9.987079116561993e-06, "loss": 0.9294, "step": 1016 }, { "epoch": 0.05226641998149861, "grad_norm": 1.185698390007019, "learning_rate": 9.987019254348867e-06, "loss": 0.8878, "step": 1017 }, { "epoch": 0.05231781272484325, "grad_norm": 1.253759503364563, "learning_rate": 9.986959253966018e-06, "loss": 0.8949, "step": 1018 }, { "epoch": 0.05236920546818789, "grad_norm": 1.3481354713439941, "learning_rate": 9.98689911541511e-06, "loss": 0.8776, "step": 1019 }, { "epoch": 0.05242059821153253, "grad_norm": 1.232424020767212, "learning_rate": 9.986838838697806e-06, "loss": 0.8595, "step": 1020 }, { "epoch": 0.05247199095487717, "grad_norm": 0.9810240864753723, "learning_rate": 9.986778423815777e-06, "loss": 0.7827, "step": 1021 }, { "epoch": 0.052523383698221814, "grad_norm": 1.2821382284164429, "learning_rate": 9.986717870770697e-06, "loss": 0.9027, "step": 1022 }, { "epoch": 0.052574776441566454, "grad_norm": 1.1697237491607666, "learning_rate": 9.986657179564244e-06, "loss": 0.8659, "step": 1023 }, { "epoch": 0.052626169184911094, "grad_norm": 1.1487267017364502, "learning_rate": 9.986596350198099e-06, "loss": 0.8716, "step": 1024 }, { "epoch": 0.05267756192825573, "grad_norm": 1.1695270538330078, "learning_rate": 9.986535382673947e-06, "loss": 0.9102, "step": 1025 }, { "epoch": 0.05272895467160037, "grad_norm": 1.4008209705352783, "learning_rate": 9.98647427699348e-06, "loss": 0.902, "step": 1026 }, { "epoch": 0.05278034741494501, "grad_norm": 1.5199717283248901, "learning_rate": 9.986413033158386e-06, "loss": 0.9022, "step": 1027 }, { "epoch": 0.05283174015828965, "grad_norm": 0.9533934593200684, "learning_rate": 9.986351651170367e-06, "loss": 0.7045, "step": 1028 }, { "epoch": 0.05288313290163429, "grad_norm": 1.3180456161499023, "learning_rate": 9.98629013103112e-06, "loss": 0.8411, "step": 1029 }, { "epoch": 0.05293452564497893, "grad_norm": 1.2371271848678589, "learning_rate": 9.986228472742352e-06, "loss": 0.8687, "step": 1030 }, { "epoch": 0.05298591838832357, "grad_norm": 1.2746150493621826, "learning_rate": 9.986166676305767e-06, "loss": 0.881, "step": 1031 }, { "epoch": 0.05303731113166821, "grad_norm": 1.211668610572815, "learning_rate": 9.98610474172308e-06, "loss": 0.8549, "step": 1032 }, { "epoch": 0.05308870387501285, "grad_norm": 1.3090100288391113, "learning_rate": 9.986042668996012e-06, "loss": 0.8788, "step": 1033 }, { "epoch": 0.05314009661835749, "grad_norm": 1.2651063203811646, "learning_rate": 9.985980458126275e-06, "loss": 0.8467, "step": 1034 }, { "epoch": 0.05319148936170213, "grad_norm": 1.2291208505630493, "learning_rate": 9.985918109115594e-06, "loss": 0.8443, "step": 1035 }, { "epoch": 0.05324288210504677, "grad_norm": 1.2718762159347534, "learning_rate": 9.985855621965699e-06, "loss": 0.8285, "step": 1036 }, { "epoch": 0.05329427484839141, "grad_norm": 1.2881598472595215, "learning_rate": 9.98579299667832e-06, "loss": 0.9003, "step": 1037 }, { "epoch": 0.05334566759173605, "grad_norm": 1.1758185625076294, "learning_rate": 9.985730233255193e-06, "loss": 0.8947, "step": 1038 }, { "epoch": 0.05339706033508069, "grad_norm": 1.0350744724273682, "learning_rate": 9.985667331698056e-06, "loss": 0.7741, "step": 1039 }, { "epoch": 0.05344845307842533, "grad_norm": 1.23697829246521, "learning_rate": 9.985604292008651e-06, "loss": 0.8552, "step": 1040 }, { "epoch": 0.05349984582176997, "grad_norm": 1.2260633707046509, "learning_rate": 9.985541114188727e-06, "loss": 0.907, "step": 1041 }, { "epoch": 0.05355123856511461, "grad_norm": 0.8537032008171082, "learning_rate": 9.985477798240031e-06, "loss": 0.7334, "step": 1042 }, { "epoch": 0.05360263130845925, "grad_norm": 0.8612209558486938, "learning_rate": 9.985414344164319e-06, "loss": 0.7084, "step": 1043 }, { "epoch": 0.05365402405180388, "grad_norm": 1.2466439008712769, "learning_rate": 9.985350751963349e-06, "loss": 0.8141, "step": 1044 }, { "epoch": 0.05370541679514852, "grad_norm": 1.2100951671600342, "learning_rate": 9.985287021638885e-06, "loss": 0.8426, "step": 1045 }, { "epoch": 0.05375680953849316, "grad_norm": 1.2189053297042847, "learning_rate": 9.985223153192688e-06, "loss": 0.8117, "step": 1046 }, { "epoch": 0.0538082022818378, "grad_norm": 1.2657212018966675, "learning_rate": 9.985159146626533e-06, "loss": 0.8353, "step": 1047 }, { "epoch": 0.05385959502518244, "grad_norm": 1.265740156173706, "learning_rate": 9.985095001942189e-06, "loss": 0.9153, "step": 1048 }, { "epoch": 0.05391098776852708, "grad_norm": 1.2464641332626343, "learning_rate": 9.985030719141435e-06, "loss": 0.8382, "step": 1049 }, { "epoch": 0.05396238051187172, "grad_norm": 1.208024501800537, "learning_rate": 9.984966298226052e-06, "loss": 0.89, "step": 1050 }, { "epoch": 0.05401377325521636, "grad_norm": 1.2079896926879883, "learning_rate": 9.984901739197826e-06, "loss": 0.9109, "step": 1051 }, { "epoch": 0.054065165998561, "grad_norm": 1.1374354362487793, "learning_rate": 9.984837042058541e-06, "loss": 0.8636, "step": 1052 }, { "epoch": 0.05411655874190564, "grad_norm": 1.2355918884277344, "learning_rate": 9.984772206809995e-06, "loss": 0.8409, "step": 1053 }, { "epoch": 0.05416795148525028, "grad_norm": 0.7978951930999756, "learning_rate": 9.984707233453981e-06, "loss": 0.7453, "step": 1054 }, { "epoch": 0.05421934422859492, "grad_norm": 1.1659529209136963, "learning_rate": 9.984642121992302e-06, "loss": 0.8486, "step": 1055 }, { "epoch": 0.054270736971939564, "grad_norm": 1.1831881999969482, "learning_rate": 9.984576872426758e-06, "loss": 0.8362, "step": 1056 }, { "epoch": 0.054322129715284204, "grad_norm": 1.197871446609497, "learning_rate": 9.98451148475916e-06, "loss": 0.8395, "step": 1057 }, { "epoch": 0.054373522458628844, "grad_norm": 1.2231751680374146, "learning_rate": 9.98444595899132e-06, "loss": 0.8576, "step": 1058 }, { "epoch": 0.054424915201973484, "grad_norm": 1.2782758474349976, "learning_rate": 9.984380295125052e-06, "loss": 0.9037, "step": 1059 }, { "epoch": 0.054476307945318124, "grad_norm": 1.1367336511611938, "learning_rate": 9.984314493162172e-06, "loss": 0.8509, "step": 1060 }, { "epoch": 0.054527700688662764, "grad_norm": 0.8485698103904724, "learning_rate": 9.98424855310451e-06, "loss": 0.7943, "step": 1061 }, { "epoch": 0.0545790934320074, "grad_norm": 0.8685941696166992, "learning_rate": 9.984182474953887e-06, "loss": 0.7202, "step": 1062 }, { "epoch": 0.05463048617535204, "grad_norm": 1.2617424726486206, "learning_rate": 9.984116258712138e-06, "loss": 0.8514, "step": 1063 }, { "epoch": 0.05468187891869668, "grad_norm": 0.7770624756813049, "learning_rate": 9.984049904381095e-06, "loss": 0.7428, "step": 1064 }, { "epoch": 0.05473327166204132, "grad_norm": 0.9104249477386475, "learning_rate": 9.983983411962597e-06, "loss": 0.7272, "step": 1065 }, { "epoch": 0.05478466440538596, "grad_norm": 1.2631593942642212, "learning_rate": 9.983916781458485e-06, "loss": 0.8651, "step": 1066 }, { "epoch": 0.0548360571487306, "grad_norm": 1.2397582530975342, "learning_rate": 9.983850012870609e-06, "loss": 0.8765, "step": 1067 }, { "epoch": 0.05488744989207524, "grad_norm": 1.2812387943267822, "learning_rate": 9.983783106200814e-06, "loss": 0.8428, "step": 1068 }, { "epoch": 0.05493884263541988, "grad_norm": 1.2409775257110596, "learning_rate": 9.983716061450957e-06, "loss": 0.8867, "step": 1069 }, { "epoch": 0.05499023537876452, "grad_norm": 0.9984674453735352, "learning_rate": 9.983648878622894e-06, "loss": 0.7219, "step": 1070 }, { "epoch": 0.05504162812210916, "grad_norm": 1.2058324813842773, "learning_rate": 9.983581557718487e-06, "loss": 0.8227, "step": 1071 }, { "epoch": 0.0550930208654538, "grad_norm": 1.1356886625289917, "learning_rate": 9.983514098739602e-06, "loss": 0.8367, "step": 1072 }, { "epoch": 0.05514441360879844, "grad_norm": 0.8446734547615051, "learning_rate": 9.983446501688105e-06, "loss": 0.758, "step": 1073 }, { "epoch": 0.05519580635214308, "grad_norm": 1.3006490468978882, "learning_rate": 9.983378766565874e-06, "loss": 0.9235, "step": 1074 }, { "epoch": 0.05524719909548772, "grad_norm": 1.3172494173049927, "learning_rate": 9.983310893374781e-06, "loss": 0.8969, "step": 1075 }, { "epoch": 0.05529859183883236, "grad_norm": 1.1803165674209595, "learning_rate": 9.983242882116707e-06, "loss": 0.848, "step": 1076 }, { "epoch": 0.055349984582177, "grad_norm": 1.2701787948608398, "learning_rate": 9.98317473279354e-06, "loss": 0.884, "step": 1077 }, { "epoch": 0.05540137732552164, "grad_norm": 0.791339635848999, "learning_rate": 9.983106445407162e-06, "loss": 0.7379, "step": 1078 }, { "epoch": 0.05545277006886628, "grad_norm": 1.2090282440185547, "learning_rate": 9.98303801995947e-06, "loss": 0.8823, "step": 1079 }, { "epoch": 0.05550416281221091, "grad_norm": 1.262412190437317, "learning_rate": 9.98296945645236e-06, "loss": 0.8604, "step": 1080 }, { "epoch": 0.05555555555555555, "grad_norm": 1.3175773620605469, "learning_rate": 9.982900754887728e-06, "loss": 0.9153, "step": 1081 }, { "epoch": 0.05560694829890019, "grad_norm": 1.375179648399353, "learning_rate": 9.982831915267479e-06, "loss": 0.8324, "step": 1082 }, { "epoch": 0.05565834104224483, "grad_norm": 1.2052053213119507, "learning_rate": 9.982762937593522e-06, "loss": 0.8503, "step": 1083 }, { "epoch": 0.05570973378558947, "grad_norm": 0.7859538793563843, "learning_rate": 9.982693821867765e-06, "loss": 0.7321, "step": 1084 }, { "epoch": 0.05576112652893411, "grad_norm": 1.315988540649414, "learning_rate": 9.982624568092127e-06, "loss": 0.9078, "step": 1085 }, { "epoch": 0.05581251927227875, "grad_norm": 1.3059765100479126, "learning_rate": 9.982555176268522e-06, "loss": 0.8708, "step": 1086 }, { "epoch": 0.05586391201562339, "grad_norm": 0.8186153769493103, "learning_rate": 9.982485646398876e-06, "loss": 0.6849, "step": 1087 }, { "epoch": 0.05591530475896803, "grad_norm": 1.2100918292999268, "learning_rate": 9.982415978485114e-06, "loss": 0.8035, "step": 1088 }, { "epoch": 0.05596669750231267, "grad_norm": 1.2082200050354004, "learning_rate": 9.982346172529166e-06, "loss": 0.8517, "step": 1089 }, { "epoch": 0.056018090245657313, "grad_norm": 0.8391363024711609, "learning_rate": 9.982276228532967e-06, "loss": 0.7698, "step": 1090 }, { "epoch": 0.056069482989001954, "grad_norm": 1.2486977577209473, "learning_rate": 9.982206146498455e-06, "loss": 0.8893, "step": 1091 }, { "epoch": 0.056120875732346594, "grad_norm": 1.217722773551941, "learning_rate": 9.98213592642757e-06, "loss": 0.8639, "step": 1092 }, { "epoch": 0.056172268475691234, "grad_norm": 1.2651174068450928, "learning_rate": 9.982065568322258e-06, "loss": 0.8814, "step": 1093 }, { "epoch": 0.056223661219035874, "grad_norm": 1.2954434156417847, "learning_rate": 9.98199507218447e-06, "loss": 0.855, "step": 1094 }, { "epoch": 0.056275053962380514, "grad_norm": 1.2195677757263184, "learning_rate": 9.981924438016157e-06, "loss": 0.8541, "step": 1095 }, { "epoch": 0.056326446705725154, "grad_norm": 1.1401110887527466, "learning_rate": 9.981853665819277e-06, "loss": 0.7586, "step": 1096 }, { "epoch": 0.056377839449069794, "grad_norm": 1.1970553398132324, "learning_rate": 9.981782755595792e-06, "loss": 0.8537, "step": 1097 }, { "epoch": 0.056429232192414434, "grad_norm": 1.17902410030365, "learning_rate": 9.981711707347666e-06, "loss": 0.8833, "step": 1098 }, { "epoch": 0.05648062493575907, "grad_norm": 0.8351032137870789, "learning_rate": 9.981640521076865e-06, "loss": 0.7286, "step": 1099 }, { "epoch": 0.05653201767910371, "grad_norm": 1.3155903816223145, "learning_rate": 9.981569196785364e-06, "loss": 0.8347, "step": 1100 }, { "epoch": 0.05658341042244835, "grad_norm": 0.9494535326957703, "learning_rate": 9.981497734475138e-06, "loss": 0.7345, "step": 1101 }, { "epoch": 0.05663480316579299, "grad_norm": 1.194558024406433, "learning_rate": 9.981426134148167e-06, "loss": 0.904, "step": 1102 }, { "epoch": 0.05668619590913763, "grad_norm": 1.1780834197998047, "learning_rate": 9.981354395806434e-06, "loss": 0.8438, "step": 1103 }, { "epoch": 0.05673758865248227, "grad_norm": 1.2865723371505737, "learning_rate": 9.981282519451931e-06, "loss": 0.8595, "step": 1104 }, { "epoch": 0.05678898139582691, "grad_norm": 0.8499127626419067, "learning_rate": 9.981210505086644e-06, "loss": 0.6971, "step": 1105 }, { "epoch": 0.05684037413917155, "grad_norm": 1.257904052734375, "learning_rate": 9.98113835271257e-06, "loss": 0.8507, "step": 1106 }, { "epoch": 0.05689176688251619, "grad_norm": 1.2113239765167236, "learning_rate": 9.981066062331708e-06, "loss": 0.8779, "step": 1107 }, { "epoch": 0.05694315962586083, "grad_norm": 1.3840869665145874, "learning_rate": 9.98099363394606e-06, "loss": 0.9012, "step": 1108 }, { "epoch": 0.05699455236920547, "grad_norm": 1.2059961557388306, "learning_rate": 9.980921067557636e-06, "loss": 0.9265, "step": 1109 }, { "epoch": 0.05704594511255011, "grad_norm": 1.1659185886383057, "learning_rate": 9.980848363168443e-06, "loss": 0.8556, "step": 1110 }, { "epoch": 0.05709733785589475, "grad_norm": 1.2384744882583618, "learning_rate": 9.980775520780497e-06, "loss": 0.8517, "step": 1111 }, { "epoch": 0.05714873059923939, "grad_norm": 1.1806620359420776, "learning_rate": 9.980702540395815e-06, "loss": 0.8619, "step": 1112 }, { "epoch": 0.05720012334258403, "grad_norm": 1.2017359733581543, "learning_rate": 9.98062942201642e-06, "loss": 0.8324, "step": 1113 }, { "epoch": 0.05725151608592867, "grad_norm": 1.1942718029022217, "learning_rate": 9.980556165644339e-06, "loss": 0.8244, "step": 1114 }, { "epoch": 0.05730290882927331, "grad_norm": 1.211240530014038, "learning_rate": 9.9804827712816e-06, "loss": 0.8238, "step": 1115 }, { "epoch": 0.05735430157261795, "grad_norm": 1.1809043884277344, "learning_rate": 9.980409238930235e-06, "loss": 0.8169, "step": 1116 }, { "epoch": 0.05740569431596258, "grad_norm": 1.2746447324752808, "learning_rate": 9.980335568592285e-06, "loss": 0.8177, "step": 1117 }, { "epoch": 0.05745708705930722, "grad_norm": 1.225766658782959, "learning_rate": 9.980261760269787e-06, "loss": 0.8119, "step": 1118 }, { "epoch": 0.05750847980265186, "grad_norm": 0.8702387809753418, "learning_rate": 9.980187813964789e-06, "loss": 0.7458, "step": 1119 }, { "epoch": 0.0575598725459965, "grad_norm": 1.1936593055725098, "learning_rate": 9.980113729679338e-06, "loss": 0.8993, "step": 1120 }, { "epoch": 0.05761126528934114, "grad_norm": 0.8612618446350098, "learning_rate": 9.980039507415488e-06, "loss": 0.7113, "step": 1121 }, { "epoch": 0.05766265803268578, "grad_norm": 1.2471098899841309, "learning_rate": 9.979965147175295e-06, "loss": 0.8518, "step": 1122 }, { "epoch": 0.05771405077603042, "grad_norm": 1.209656834602356, "learning_rate": 9.979890648960816e-06, "loss": 0.8756, "step": 1123 }, { "epoch": 0.05776544351937506, "grad_norm": 1.309747576713562, "learning_rate": 9.979816012774122e-06, "loss": 0.8305, "step": 1124 }, { "epoch": 0.057816836262719704, "grad_norm": 0.8830257654190063, "learning_rate": 9.979741238617275e-06, "loss": 0.7598, "step": 1125 }, { "epoch": 0.057868229006064344, "grad_norm": 1.198681116104126, "learning_rate": 9.979666326492348e-06, "loss": 0.8055, "step": 1126 }, { "epoch": 0.057919621749408984, "grad_norm": 0.9580893516540527, "learning_rate": 9.979591276401417e-06, "loss": 0.7818, "step": 1127 }, { "epoch": 0.057971014492753624, "grad_norm": 1.212040901184082, "learning_rate": 9.979516088346562e-06, "loss": 0.8292, "step": 1128 }, { "epoch": 0.058022407236098264, "grad_norm": 1.2667771577835083, "learning_rate": 9.979440762329863e-06, "loss": 0.8984, "step": 1129 }, { "epoch": 0.058073799979442904, "grad_norm": 1.2194665670394897, "learning_rate": 9.979365298353412e-06, "loss": 0.8714, "step": 1130 }, { "epoch": 0.058125192722787544, "grad_norm": 1.157158374786377, "learning_rate": 9.979289696419298e-06, "loss": 0.8365, "step": 1131 }, { "epoch": 0.058176585466132184, "grad_norm": 1.224460244178772, "learning_rate": 9.979213956529613e-06, "loss": 0.8659, "step": 1132 }, { "epoch": 0.058227978209476824, "grad_norm": 1.1096312999725342, "learning_rate": 9.979138078686459e-06, "loss": 0.7563, "step": 1133 }, { "epoch": 0.058279370952821465, "grad_norm": 1.2722305059432983, "learning_rate": 9.979062062891934e-06, "loss": 0.8694, "step": 1134 }, { "epoch": 0.058330763696166105, "grad_norm": 1.2927436828613281, "learning_rate": 9.978985909148148e-06, "loss": 0.8451, "step": 1135 }, { "epoch": 0.05838215643951074, "grad_norm": 1.1044056415557861, "learning_rate": 9.978909617457208e-06, "loss": 0.8249, "step": 1136 }, { "epoch": 0.05843354918285538, "grad_norm": 1.2341526746749878, "learning_rate": 9.978833187821232e-06, "loss": 0.9132, "step": 1137 }, { "epoch": 0.05848494192620002, "grad_norm": 1.2127639055252075, "learning_rate": 9.978756620242333e-06, "loss": 0.864, "step": 1138 }, { "epoch": 0.05853633466954466, "grad_norm": 1.2146912813186646, "learning_rate": 9.978679914722636e-06, "loss": 0.8296, "step": 1139 }, { "epoch": 0.0585877274128893, "grad_norm": 1.2359148263931274, "learning_rate": 9.978603071264263e-06, "loss": 0.8225, "step": 1140 }, { "epoch": 0.05863912015623394, "grad_norm": 1.1174594163894653, "learning_rate": 9.978526089869344e-06, "loss": 0.7333, "step": 1141 }, { "epoch": 0.05869051289957858, "grad_norm": 0.9986005425453186, "learning_rate": 9.978448970540013e-06, "loss": 0.7699, "step": 1142 }, { "epoch": 0.05874190564292322, "grad_norm": 1.2127052545547485, "learning_rate": 9.978371713278405e-06, "loss": 0.8283, "step": 1143 }, { "epoch": 0.05879329838626786, "grad_norm": 1.301411509513855, "learning_rate": 9.978294318086661e-06, "loss": 0.9018, "step": 1144 }, { "epoch": 0.0588446911296125, "grad_norm": 1.2672181129455566, "learning_rate": 9.978216784966927e-06, "loss": 0.863, "step": 1145 }, { "epoch": 0.05889608387295714, "grad_norm": 2.0091192722320557, "learning_rate": 9.97813911392135e-06, "loss": 0.9362, "step": 1146 }, { "epoch": 0.05894747661630178, "grad_norm": 1.2876921892166138, "learning_rate": 9.97806130495208e-06, "loss": 0.8502, "step": 1147 }, { "epoch": 0.05899886935964642, "grad_norm": 1.1820954084396362, "learning_rate": 9.977983358061276e-06, "loss": 0.885, "step": 1148 }, { "epoch": 0.05905026210299106, "grad_norm": 1.2279303073883057, "learning_rate": 9.977905273251094e-06, "loss": 0.8557, "step": 1149 }, { "epoch": 0.0591016548463357, "grad_norm": 1.0473719835281372, "learning_rate": 9.977827050523703e-06, "loss": 0.7275, "step": 1150 }, { "epoch": 0.05915304758968034, "grad_norm": 1.1875505447387695, "learning_rate": 9.977748689881263e-06, "loss": 0.8613, "step": 1151 }, { "epoch": 0.05920444033302498, "grad_norm": 1.2748993635177612, "learning_rate": 9.977670191325951e-06, "loss": 0.8957, "step": 1152 }, { "epoch": 0.05925583307636962, "grad_norm": 1.1879079341888428, "learning_rate": 9.97759155485994e-06, "loss": 0.8217, "step": 1153 }, { "epoch": 0.05930722581971425, "grad_norm": 1.2057673931121826, "learning_rate": 9.977512780485407e-06, "loss": 0.8568, "step": 1154 }, { "epoch": 0.05935861856305889, "grad_norm": 1.3518967628479004, "learning_rate": 9.977433868204536e-06, "loss": 0.8646, "step": 1155 }, { "epoch": 0.05941001130640353, "grad_norm": 1.3617931604385376, "learning_rate": 9.977354818019514e-06, "loss": 0.8135, "step": 1156 }, { "epoch": 0.05946140404974817, "grad_norm": 1.2480753660202026, "learning_rate": 9.97727562993253e-06, "loss": 0.8801, "step": 1157 }, { "epoch": 0.05951279679309281, "grad_norm": 1.1186765432357788, "learning_rate": 9.977196303945778e-06, "loss": 0.7491, "step": 1158 }, { "epoch": 0.059564189536437454, "grad_norm": 1.4840799570083618, "learning_rate": 9.977116840061456e-06, "loss": 0.8154, "step": 1159 }, { "epoch": 0.059615582279782094, "grad_norm": 1.2440422773361206, "learning_rate": 9.977037238281766e-06, "loss": 0.8596, "step": 1160 }, { "epoch": 0.059666975023126734, "grad_norm": 1.3036857843399048, "learning_rate": 9.976957498608913e-06, "loss": 0.7861, "step": 1161 }, { "epoch": 0.059718367766471374, "grad_norm": 1.229193925857544, "learning_rate": 9.976877621045106e-06, "loss": 0.8558, "step": 1162 }, { "epoch": 0.059769760509816014, "grad_norm": 1.1989786624908447, "learning_rate": 9.97679760559256e-06, "loss": 0.8448, "step": 1163 }, { "epoch": 0.059821153253160654, "grad_norm": 1.264654278755188, "learning_rate": 9.976717452253488e-06, "loss": 0.8425, "step": 1164 }, { "epoch": 0.059872545996505294, "grad_norm": 1.127537488937378, "learning_rate": 9.976637161030114e-06, "loss": 0.8465, "step": 1165 }, { "epoch": 0.059923938739849934, "grad_norm": 1.2033878564834595, "learning_rate": 9.976556731924663e-06, "loss": 0.8519, "step": 1166 }, { "epoch": 0.059975331483194574, "grad_norm": 0.886642575263977, "learning_rate": 9.976476164939361e-06, "loss": 0.6987, "step": 1167 }, { "epoch": 0.060026724226539215, "grad_norm": 1.2378264665603638, "learning_rate": 9.97639546007644e-06, "loss": 0.8623, "step": 1168 }, { "epoch": 0.060078116969883855, "grad_norm": 1.2321252822875977, "learning_rate": 9.976314617338139e-06, "loss": 0.8544, "step": 1169 }, { "epoch": 0.060129509713228495, "grad_norm": 1.206283450126648, "learning_rate": 9.976233636726696e-06, "loss": 0.8491, "step": 1170 }, { "epoch": 0.060180902456573135, "grad_norm": 1.1792248487472534, "learning_rate": 9.976152518244352e-06, "loss": 0.8634, "step": 1171 }, { "epoch": 0.060232295199917775, "grad_norm": 1.2804940938949585, "learning_rate": 9.976071261893361e-06, "loss": 0.9686, "step": 1172 }, { "epoch": 0.06028368794326241, "grad_norm": 1.231398344039917, "learning_rate": 9.975989867675968e-06, "loss": 0.8894, "step": 1173 }, { "epoch": 0.06033508068660705, "grad_norm": 1.161859154701233, "learning_rate": 9.97590833559443e-06, "loss": 0.836, "step": 1174 }, { "epoch": 0.06038647342995169, "grad_norm": 1.2021398544311523, "learning_rate": 9.975826665651007e-06, "loss": 0.8787, "step": 1175 }, { "epoch": 0.06043786617329633, "grad_norm": 1.1408778429031372, "learning_rate": 9.975744857847963e-06, "loss": 0.8159, "step": 1176 }, { "epoch": 0.06048925891664097, "grad_norm": 1.0622419118881226, "learning_rate": 9.97566291218756e-06, "loss": 0.7685, "step": 1177 }, { "epoch": 0.06054065165998561, "grad_norm": 1.140544056892395, "learning_rate": 9.975580828672072e-06, "loss": 0.836, "step": 1178 }, { "epoch": 0.06059204440333025, "grad_norm": 1.323164701461792, "learning_rate": 9.975498607303772e-06, "loss": 0.8441, "step": 1179 }, { "epoch": 0.06064343714667489, "grad_norm": 0.9059852957725525, "learning_rate": 9.975416248084939e-06, "loss": 0.7244, "step": 1180 }, { "epoch": 0.06069482989001953, "grad_norm": 1.1558243036270142, "learning_rate": 9.975333751017856e-06, "loss": 0.8689, "step": 1181 }, { "epoch": 0.06074622263336417, "grad_norm": 1.23139488697052, "learning_rate": 9.975251116104802e-06, "loss": 0.8863, "step": 1182 }, { "epoch": 0.06079761537670881, "grad_norm": 1.1719777584075928, "learning_rate": 9.975168343348074e-06, "loss": 0.8845, "step": 1183 }, { "epoch": 0.06084900812005345, "grad_norm": 1.1954281330108643, "learning_rate": 9.975085432749962e-06, "loss": 0.8835, "step": 1184 }, { "epoch": 0.06090040086339809, "grad_norm": 1.0291945934295654, "learning_rate": 9.975002384312764e-06, "loss": 0.758, "step": 1185 }, { "epoch": 0.06095179360674273, "grad_norm": 1.1825456619262695, "learning_rate": 9.974919198038782e-06, "loss": 0.8585, "step": 1186 }, { "epoch": 0.06100318635008737, "grad_norm": 1.1498017311096191, "learning_rate": 9.974835873930316e-06, "loss": 0.8437, "step": 1187 }, { "epoch": 0.06105457909343201, "grad_norm": 1.1359291076660156, "learning_rate": 9.97475241198968e-06, "loss": 0.8942, "step": 1188 }, { "epoch": 0.06110597183677665, "grad_norm": 1.1776137351989746, "learning_rate": 9.974668812219184e-06, "loss": 0.8171, "step": 1189 }, { "epoch": 0.06115736458012129, "grad_norm": 1.1480019092559814, "learning_rate": 9.974585074621143e-06, "loss": 0.8753, "step": 1190 }, { "epoch": 0.06120875732346592, "grad_norm": 0.8512030839920044, "learning_rate": 9.974501199197882e-06, "loss": 0.7772, "step": 1191 }, { "epoch": 0.06126015006681056, "grad_norm": 0.8866260647773743, "learning_rate": 9.974417185951718e-06, "loss": 0.6972, "step": 1192 }, { "epoch": 0.0613115428101552, "grad_norm": 1.0050370693206787, "learning_rate": 9.974333034884983e-06, "loss": 0.7133, "step": 1193 }, { "epoch": 0.061362935553499844, "grad_norm": 1.2891030311584473, "learning_rate": 9.974248746000007e-06, "loss": 0.899, "step": 1194 }, { "epoch": 0.061414328296844484, "grad_norm": 1.2006300687789917, "learning_rate": 9.974164319299127e-06, "loss": 0.8579, "step": 1195 }, { "epoch": 0.061465721040189124, "grad_norm": 0.8573015928268433, "learning_rate": 9.97407975478468e-06, "loss": 0.759, "step": 1196 }, { "epoch": 0.061517113783533764, "grad_norm": 0.7824249267578125, "learning_rate": 9.97399505245901e-06, "loss": 0.7519, "step": 1197 }, { "epoch": 0.061568506526878404, "grad_norm": 1.1752259731292725, "learning_rate": 9.973910212324463e-06, "loss": 0.8405, "step": 1198 }, { "epoch": 0.061619899270223044, "grad_norm": 0.8630449771881104, "learning_rate": 9.973825234383392e-06, "loss": 0.7313, "step": 1199 }, { "epoch": 0.061671292013567684, "grad_norm": 1.2532098293304443, "learning_rate": 9.973740118638147e-06, "loss": 0.8543, "step": 1200 }, { "epoch": 0.061722684756912324, "grad_norm": 1.2244421243667603, "learning_rate": 9.97365486509109e-06, "loss": 0.8644, "step": 1201 }, { "epoch": 0.061774077500256964, "grad_norm": 1.1914186477661133, "learning_rate": 9.973569473744583e-06, "loss": 0.8245, "step": 1202 }, { "epoch": 0.061825470243601605, "grad_norm": 1.2026646137237549, "learning_rate": 9.973483944600987e-06, "loss": 0.8265, "step": 1203 }, { "epoch": 0.061876862986946245, "grad_norm": 1.1772708892822266, "learning_rate": 9.973398277662678e-06, "loss": 0.8637, "step": 1204 }, { "epoch": 0.061928255730290885, "grad_norm": 1.220398187637329, "learning_rate": 9.973312472932026e-06, "loss": 0.8277, "step": 1205 }, { "epoch": 0.061979648473635525, "grad_norm": 1.199230670928955, "learning_rate": 9.97322653041141e-06, "loss": 0.8668, "step": 1206 }, { "epoch": 0.062031041216980165, "grad_norm": 1.1803576946258545, "learning_rate": 9.973140450103209e-06, "loss": 0.8534, "step": 1207 }, { "epoch": 0.062082433960324805, "grad_norm": 1.1516332626342773, "learning_rate": 9.97305423200981e-06, "loss": 0.8641, "step": 1208 }, { "epoch": 0.062133826703669445, "grad_norm": 1.1796621084213257, "learning_rate": 9.972967876133602e-06, "loss": 0.8675, "step": 1209 }, { "epoch": 0.06218521944701408, "grad_norm": 1.1729705333709717, "learning_rate": 9.972881382476974e-06, "loss": 0.8338, "step": 1210 }, { "epoch": 0.06223661219035872, "grad_norm": 1.1264246702194214, "learning_rate": 9.972794751042324e-06, "loss": 0.8247, "step": 1211 }, { "epoch": 0.06228800493370336, "grad_norm": 1.191347360610962, "learning_rate": 9.972707981832055e-06, "loss": 0.8228, "step": 1212 }, { "epoch": 0.062339397677048, "grad_norm": 0.9049070477485657, "learning_rate": 9.972621074848569e-06, "loss": 0.7365, "step": 1213 }, { "epoch": 0.06239079042039264, "grad_norm": 1.1573753356933594, "learning_rate": 9.972534030094272e-06, "loss": 0.8444, "step": 1214 }, { "epoch": 0.06244218316373728, "grad_norm": 1.2291463613510132, "learning_rate": 9.97244684757158e-06, "loss": 0.7692, "step": 1215 }, { "epoch": 0.06249357590708192, "grad_norm": 1.3106968402862549, "learning_rate": 9.972359527282905e-06, "loss": 0.8194, "step": 1216 }, { "epoch": 0.06254496865042657, "grad_norm": 1.1492574214935303, "learning_rate": 9.972272069230667e-06, "loss": 0.7946, "step": 1217 }, { "epoch": 0.0625963613937712, "grad_norm": 1.2317800521850586, "learning_rate": 9.972184473417287e-06, "loss": 0.9376, "step": 1218 }, { "epoch": 0.06264775413711583, "grad_norm": 1.166545033454895, "learning_rate": 9.972096739845196e-06, "loss": 0.8414, "step": 1219 }, { "epoch": 0.06269914688046048, "grad_norm": 1.1994495391845703, "learning_rate": 9.972008868516826e-06, "loss": 0.8601, "step": 1220 }, { "epoch": 0.06275053962380511, "grad_norm": 1.2833749055862427, "learning_rate": 9.971920859434604e-06, "loss": 0.8464, "step": 1221 }, { "epoch": 0.06280193236714976, "grad_norm": 1.2128567695617676, "learning_rate": 9.971832712600976e-06, "loss": 0.9019, "step": 1222 }, { "epoch": 0.06285332511049439, "grad_norm": 1.1879664659500122, "learning_rate": 9.971744428018378e-06, "loss": 0.8314, "step": 1223 }, { "epoch": 0.06290471785383904, "grad_norm": 1.148087978363037, "learning_rate": 9.97165600568926e-06, "loss": 0.7985, "step": 1224 }, { "epoch": 0.06295611059718367, "grad_norm": 1.1824296712875366, "learning_rate": 9.971567445616072e-06, "loss": 0.8649, "step": 1225 }, { "epoch": 0.06300750334052832, "grad_norm": 0.8248127102851868, "learning_rate": 9.971478747801268e-06, "loss": 0.7213, "step": 1226 }, { "epoch": 0.06305889608387295, "grad_norm": 1.2181551456451416, "learning_rate": 9.971389912247302e-06, "loss": 0.8074, "step": 1227 }, { "epoch": 0.0631102888272176, "grad_norm": 0.7653465270996094, "learning_rate": 9.971300938956638e-06, "loss": 0.723, "step": 1228 }, { "epoch": 0.06316168157056223, "grad_norm": 1.2489670515060425, "learning_rate": 9.97121182793174e-06, "loss": 0.8389, "step": 1229 }, { "epoch": 0.06321307431390688, "grad_norm": 1.2638297080993652, "learning_rate": 9.971122579175077e-06, "loss": 0.8699, "step": 1230 }, { "epoch": 0.06326446705725151, "grad_norm": 1.1709225177764893, "learning_rate": 9.971033192689121e-06, "loss": 0.8145, "step": 1231 }, { "epoch": 0.06331585980059616, "grad_norm": 1.1956020593643188, "learning_rate": 9.970943668476351e-06, "loss": 0.8733, "step": 1232 }, { "epoch": 0.0633672525439408, "grad_norm": 1.2909343242645264, "learning_rate": 9.970854006539246e-06, "loss": 0.8841, "step": 1233 }, { "epoch": 0.06341864528728544, "grad_norm": 0.8098524212837219, "learning_rate": 9.970764206880289e-06, "loss": 0.7543, "step": 1234 }, { "epoch": 0.06347003803063007, "grad_norm": 1.2524017095565796, "learning_rate": 9.970674269501968e-06, "loss": 0.8381, "step": 1235 }, { "epoch": 0.06352143077397472, "grad_norm": 1.1908453702926636, "learning_rate": 9.970584194406779e-06, "loss": 0.8495, "step": 1236 }, { "epoch": 0.06357282351731935, "grad_norm": 1.1576836109161377, "learning_rate": 9.97049398159721e-06, "loss": 0.8073, "step": 1237 }, { "epoch": 0.06362421626066399, "grad_norm": 1.2045015096664429, "learning_rate": 9.970403631075768e-06, "loss": 0.8404, "step": 1238 }, { "epoch": 0.06367560900400863, "grad_norm": 1.183829665184021, "learning_rate": 9.970313142844951e-06, "loss": 0.8123, "step": 1239 }, { "epoch": 0.06372700174735327, "grad_norm": 1.152247428894043, "learning_rate": 9.97022251690727e-06, "loss": 0.8696, "step": 1240 }, { "epoch": 0.06377839449069791, "grad_norm": 1.2260886430740356, "learning_rate": 9.970131753265234e-06, "loss": 0.8567, "step": 1241 }, { "epoch": 0.06382978723404255, "grad_norm": 1.2068758010864258, "learning_rate": 9.970040851921356e-06, "loss": 0.8128, "step": 1242 }, { "epoch": 0.0638811799773872, "grad_norm": 1.2278228998184204, "learning_rate": 9.969949812878158e-06, "loss": 0.7683, "step": 1243 }, { "epoch": 0.06393257272073183, "grad_norm": 1.1791683435440063, "learning_rate": 9.969858636138158e-06, "loss": 0.8496, "step": 1244 }, { "epoch": 0.06398396546407648, "grad_norm": 1.3437433242797852, "learning_rate": 9.969767321703886e-06, "loss": 0.8125, "step": 1245 }, { "epoch": 0.06403535820742111, "grad_norm": 1.2271130084991455, "learning_rate": 9.969675869577873e-06, "loss": 0.8836, "step": 1246 }, { "epoch": 0.06408675095076576, "grad_norm": 1.219010353088379, "learning_rate": 9.969584279762648e-06, "loss": 0.8354, "step": 1247 }, { "epoch": 0.06413814369411039, "grad_norm": 0.8295942544937134, "learning_rate": 9.96949255226075e-06, "loss": 0.7467, "step": 1248 }, { "epoch": 0.06418953643745504, "grad_norm": 1.1781601905822754, "learning_rate": 9.969400687074722e-06, "loss": 0.8542, "step": 1249 }, { "epoch": 0.06424092918079967, "grad_norm": 1.2492057085037231, "learning_rate": 9.969308684207109e-06, "loss": 0.8038, "step": 1250 }, { "epoch": 0.06429232192414432, "grad_norm": 1.21192467212677, "learning_rate": 9.969216543660458e-06, "loss": 0.8889, "step": 1251 }, { "epoch": 0.06434371466748895, "grad_norm": 1.3369771242141724, "learning_rate": 9.969124265437325e-06, "loss": 0.8664, "step": 1252 }, { "epoch": 0.0643951074108336, "grad_norm": 1.2625089883804321, "learning_rate": 9.969031849540263e-06, "loss": 0.8652, "step": 1253 }, { "epoch": 0.06444650015417823, "grad_norm": 1.2198420763015747, "learning_rate": 9.968939295971837e-06, "loss": 0.8287, "step": 1254 }, { "epoch": 0.06449789289752288, "grad_norm": 0.9190962910652161, "learning_rate": 9.968846604734608e-06, "loss": 0.7637, "step": 1255 }, { "epoch": 0.06454928564086751, "grad_norm": 1.1250752210617065, "learning_rate": 9.968753775831144e-06, "loss": 0.8618, "step": 1256 }, { "epoch": 0.06460067838421214, "grad_norm": 1.2384765148162842, "learning_rate": 9.968660809264016e-06, "loss": 0.8549, "step": 1257 }, { "epoch": 0.06465207112755679, "grad_norm": 1.2538996934890747, "learning_rate": 9.968567705035805e-06, "loss": 0.8815, "step": 1258 }, { "epoch": 0.06470346387090142, "grad_norm": 1.16378653049469, "learning_rate": 9.968474463149084e-06, "loss": 0.8147, "step": 1259 }, { "epoch": 0.06475485661424607, "grad_norm": 1.2450807094573975, "learning_rate": 9.968381083606442e-06, "loss": 0.888, "step": 1260 }, { "epoch": 0.0648062493575907, "grad_norm": 1.2954580783843994, "learning_rate": 9.968287566410461e-06, "loss": 0.8838, "step": 1261 }, { "epoch": 0.06485764210093535, "grad_norm": 1.27425217628479, "learning_rate": 9.968193911563735e-06, "loss": 0.8145, "step": 1262 }, { "epoch": 0.06490903484427998, "grad_norm": 1.2476718425750732, "learning_rate": 9.96810011906886e-06, "loss": 0.9031, "step": 1263 }, { "epoch": 0.06496042758762463, "grad_norm": 0.7890223860740662, "learning_rate": 9.968006188928432e-06, "loss": 0.7331, "step": 1264 }, { "epoch": 0.06501182033096926, "grad_norm": 0.8047551512718201, "learning_rate": 9.967912121145054e-06, "loss": 0.7191, "step": 1265 }, { "epoch": 0.06506321307431391, "grad_norm": 0.8476386070251465, "learning_rate": 9.967817915721332e-06, "loss": 0.7867, "step": 1266 }, { "epoch": 0.06511460581765854, "grad_norm": 1.2589093446731567, "learning_rate": 9.967723572659876e-06, "loss": 0.9136, "step": 1267 }, { "epoch": 0.06516599856100319, "grad_norm": 1.2061235904693604, "learning_rate": 9.967629091963302e-06, "loss": 0.8366, "step": 1268 }, { "epoch": 0.06521739130434782, "grad_norm": 1.2397112846374512, "learning_rate": 9.967534473634224e-06, "loss": 0.9452, "step": 1269 }, { "epoch": 0.06526878404769247, "grad_norm": 1.2598036527633667, "learning_rate": 9.967439717675269e-06, "loss": 0.8465, "step": 1270 }, { "epoch": 0.0653201767910371, "grad_norm": 1.2256470918655396, "learning_rate": 9.967344824089056e-06, "loss": 0.7987, "step": 1271 }, { "epoch": 0.06537156953438175, "grad_norm": 1.2414723634719849, "learning_rate": 9.967249792878217e-06, "loss": 0.8307, "step": 1272 }, { "epoch": 0.06542296227772638, "grad_norm": 1.166878581047058, "learning_rate": 9.967154624045385e-06, "loss": 0.8482, "step": 1273 }, { "epoch": 0.06547435502107102, "grad_norm": 1.2057427167892456, "learning_rate": 9.967059317593197e-06, "loss": 0.9112, "step": 1274 }, { "epoch": 0.06552574776441566, "grad_norm": 0.9161434173583984, "learning_rate": 9.966963873524294e-06, "loss": 0.7217, "step": 1275 }, { "epoch": 0.0655771405077603, "grad_norm": 1.271059274673462, "learning_rate": 9.966868291841318e-06, "loss": 0.8414, "step": 1276 }, { "epoch": 0.06562853325110495, "grad_norm": 1.275327205657959, "learning_rate": 9.96677257254692e-06, "loss": 0.8452, "step": 1277 }, { "epoch": 0.06567992599444958, "grad_norm": 1.2687252759933472, "learning_rate": 9.966676715643751e-06, "loss": 0.8467, "step": 1278 }, { "epoch": 0.06573131873779423, "grad_norm": 1.3421111106872559, "learning_rate": 9.966580721134465e-06, "loss": 0.793, "step": 1279 }, { "epoch": 0.06578271148113886, "grad_norm": 1.2187787294387817, "learning_rate": 9.966484589021724e-06, "loss": 0.8333, "step": 1280 }, { "epoch": 0.0658341042244835, "grad_norm": 1.2390424013137817, "learning_rate": 9.966388319308191e-06, "loss": 0.8797, "step": 1281 }, { "epoch": 0.06588549696782814, "grad_norm": 1.1580936908721924, "learning_rate": 9.96629191199653e-06, "loss": 0.8206, "step": 1282 }, { "epoch": 0.06593688971117279, "grad_norm": 1.034580945968628, "learning_rate": 9.966195367089418e-06, "loss": 0.7395, "step": 1283 }, { "epoch": 0.06598828245451742, "grad_norm": 1.2569457292556763, "learning_rate": 9.966098684589527e-06, "loss": 0.8635, "step": 1284 }, { "epoch": 0.06603967519786207, "grad_norm": 0.8791694641113281, "learning_rate": 9.966001864499533e-06, "loss": 0.7715, "step": 1285 }, { "epoch": 0.0660910679412067, "grad_norm": 1.3085097074508667, "learning_rate": 9.965904906822121e-06, "loss": 0.9, "step": 1286 }, { "epoch": 0.06614246068455135, "grad_norm": 0.828238308429718, "learning_rate": 9.965807811559978e-06, "loss": 0.7455, "step": 1287 }, { "epoch": 0.06619385342789598, "grad_norm": 1.1314805746078491, "learning_rate": 9.965710578715792e-06, "loss": 0.8649, "step": 1288 }, { "epoch": 0.06624524617124063, "grad_norm": 1.1361806392669678, "learning_rate": 9.96561320829226e-06, "loss": 0.806, "step": 1289 }, { "epoch": 0.06629663891458526, "grad_norm": 1.2397502660751343, "learning_rate": 9.965515700292076e-06, "loss": 0.8575, "step": 1290 }, { "epoch": 0.0663480316579299, "grad_norm": 1.1956923007965088, "learning_rate": 9.965418054717944e-06, "loss": 0.8262, "step": 1291 }, { "epoch": 0.06639942440127454, "grad_norm": 1.2116965055465698, "learning_rate": 9.96532027157257e-06, "loss": 0.8852, "step": 1292 }, { "epoch": 0.06645081714461917, "grad_norm": 1.07035231590271, "learning_rate": 9.965222350858661e-06, "loss": 0.7711, "step": 1293 }, { "epoch": 0.06650220988796382, "grad_norm": 1.1506820917129517, "learning_rate": 9.965124292578932e-06, "loss": 0.8336, "step": 1294 }, { "epoch": 0.06655360263130845, "grad_norm": 0.8365129828453064, "learning_rate": 9.965026096736097e-06, "loss": 0.7489, "step": 1295 }, { "epoch": 0.0666049953746531, "grad_norm": 1.1129027605056763, "learning_rate": 9.964927763332879e-06, "loss": 0.8311, "step": 1296 }, { "epoch": 0.06665638811799773, "grad_norm": 1.1906336545944214, "learning_rate": 9.964829292372001e-06, "loss": 0.8124, "step": 1297 }, { "epoch": 0.06670778086134238, "grad_norm": 1.1677844524383545, "learning_rate": 9.964730683856194e-06, "loss": 0.8292, "step": 1298 }, { "epoch": 0.06675917360468701, "grad_norm": 1.1898362636566162, "learning_rate": 9.964631937788185e-06, "loss": 0.8468, "step": 1299 }, { "epoch": 0.06681056634803166, "grad_norm": 1.079712152481079, "learning_rate": 9.964533054170716e-06, "loss": 0.7879, "step": 1300 }, { "epoch": 0.0668619590913763, "grad_norm": 0.9606438875198364, "learning_rate": 9.96443403300652e-06, "loss": 0.7172, "step": 1301 }, { "epoch": 0.06691335183472094, "grad_norm": 1.185306429862976, "learning_rate": 9.964334874298347e-06, "loss": 0.8675, "step": 1302 }, { "epoch": 0.06696474457806557, "grad_norm": 1.1964657306671143, "learning_rate": 9.96423557804894e-06, "loss": 0.8073, "step": 1303 }, { "epoch": 0.06701613732141022, "grad_norm": 1.2034879922866821, "learning_rate": 9.964136144261051e-06, "loss": 0.8471, "step": 1304 }, { "epoch": 0.06706753006475485, "grad_norm": 1.229433536529541, "learning_rate": 9.964036572937436e-06, "loss": 0.783, "step": 1305 }, { "epoch": 0.0671189228080995, "grad_norm": 1.227113962173462, "learning_rate": 9.963936864080854e-06, "loss": 0.8878, "step": 1306 }, { "epoch": 0.06717031555144413, "grad_norm": 1.1944153308868408, "learning_rate": 9.963837017694065e-06, "loss": 0.7389, "step": 1307 }, { "epoch": 0.06722170829478878, "grad_norm": 1.3319650888442993, "learning_rate": 9.963737033779837e-06, "loss": 0.804, "step": 1308 }, { "epoch": 0.06727310103813341, "grad_norm": 1.2119909524917603, "learning_rate": 9.96363691234094e-06, "loss": 0.8445, "step": 1309 }, { "epoch": 0.06732449378147806, "grad_norm": 1.1178308725357056, "learning_rate": 9.963536653380145e-06, "loss": 0.8293, "step": 1310 }, { "epoch": 0.0673758865248227, "grad_norm": 1.2156850099563599, "learning_rate": 9.963436256900236e-06, "loss": 0.8444, "step": 1311 }, { "epoch": 0.06742727926816733, "grad_norm": 1.308240294456482, "learning_rate": 9.963335722903991e-06, "loss": 0.8515, "step": 1312 }, { "epoch": 0.06747867201151198, "grad_norm": 1.3126437664031982, "learning_rate": 9.963235051394196e-06, "loss": 0.882, "step": 1313 }, { "epoch": 0.06753006475485661, "grad_norm": 1.2073324918746948, "learning_rate": 9.963134242373639e-06, "loss": 0.7931, "step": 1314 }, { "epoch": 0.06758145749820126, "grad_norm": 1.2467983961105347, "learning_rate": 9.963033295845113e-06, "loss": 0.8773, "step": 1315 }, { "epoch": 0.06763285024154589, "grad_norm": 1.2667630910873413, "learning_rate": 9.962932211811415e-06, "loss": 0.8744, "step": 1316 }, { "epoch": 0.06768424298489054, "grad_norm": 1.1797868013381958, "learning_rate": 9.962830990275349e-06, "loss": 0.8587, "step": 1317 }, { "epoch": 0.06773563572823517, "grad_norm": 1.239017367362976, "learning_rate": 9.962729631239716e-06, "loss": 0.798, "step": 1318 }, { "epoch": 0.06778702847157982, "grad_norm": 1.222000002861023, "learning_rate": 9.962628134707324e-06, "loss": 0.8645, "step": 1319 }, { "epoch": 0.06783842121492445, "grad_norm": 1.1918072700500488, "learning_rate": 9.962526500680986e-06, "loss": 0.8859, "step": 1320 }, { "epoch": 0.0678898139582691, "grad_norm": 1.2195672988891602, "learning_rate": 9.962424729163517e-06, "loss": 0.8535, "step": 1321 }, { "epoch": 0.06794120670161373, "grad_norm": 1.247909665107727, "learning_rate": 9.96232282015774e-06, "loss": 0.8565, "step": 1322 }, { "epoch": 0.06799259944495838, "grad_norm": 1.1036990880966187, "learning_rate": 9.962220773666473e-06, "loss": 0.8684, "step": 1323 }, { "epoch": 0.06804399218830301, "grad_norm": 0.811055064201355, "learning_rate": 9.962118589692547e-06, "loss": 0.7471, "step": 1324 }, { "epoch": 0.06809538493164766, "grad_norm": 1.156511902809143, "learning_rate": 9.962016268238793e-06, "loss": 0.8306, "step": 1325 }, { "epoch": 0.06814677767499229, "grad_norm": 1.2628639936447144, "learning_rate": 9.961913809308045e-06, "loss": 0.9013, "step": 1326 }, { "epoch": 0.06819817041833694, "grad_norm": 1.2208572626113892, "learning_rate": 9.961811212903143e-06, "loss": 0.8463, "step": 1327 }, { "epoch": 0.06824956316168157, "grad_norm": 1.2117400169372559, "learning_rate": 9.961708479026927e-06, "loss": 0.8498, "step": 1328 }, { "epoch": 0.06830095590502622, "grad_norm": 1.1851780414581299, "learning_rate": 9.961605607682245e-06, "loss": 0.8617, "step": 1329 }, { "epoch": 0.06835234864837085, "grad_norm": 1.1723960638046265, "learning_rate": 9.961502598871946e-06, "loss": 0.8261, "step": 1330 }, { "epoch": 0.06840374139171548, "grad_norm": 1.4972400665283203, "learning_rate": 9.961399452598887e-06, "loss": 0.8019, "step": 1331 }, { "epoch": 0.06845513413506013, "grad_norm": 1.3451943397521973, "learning_rate": 9.961296168865923e-06, "loss": 0.8512, "step": 1332 }, { "epoch": 0.06850652687840476, "grad_norm": 1.1305317878723145, "learning_rate": 9.961192747675917e-06, "loss": 0.8135, "step": 1333 }, { "epoch": 0.06855791962174941, "grad_norm": 1.2738243341445923, "learning_rate": 9.961089189031731e-06, "loss": 0.8886, "step": 1334 }, { "epoch": 0.06860931236509404, "grad_norm": 1.1738700866699219, "learning_rate": 9.960985492936239e-06, "loss": 0.8486, "step": 1335 }, { "epoch": 0.06866070510843869, "grad_norm": 1.1840111017227173, "learning_rate": 9.960881659392311e-06, "loss": 0.8418, "step": 1336 }, { "epoch": 0.06871209785178332, "grad_norm": 1.1579750776290894, "learning_rate": 9.960777688402826e-06, "loss": 0.8227, "step": 1337 }, { "epoch": 0.06876349059512797, "grad_norm": 11.913105964660645, "learning_rate": 9.960673579970661e-06, "loss": 0.8345, "step": 1338 }, { "epoch": 0.0688148833384726, "grad_norm": 1.1559432744979858, "learning_rate": 9.960569334098705e-06, "loss": 0.8506, "step": 1339 }, { "epoch": 0.06886627608181725, "grad_norm": 1.2218172550201416, "learning_rate": 9.96046495078984e-06, "loss": 0.8251, "step": 1340 }, { "epoch": 0.06891766882516188, "grad_norm": 1.2240543365478516, "learning_rate": 9.960360430046965e-06, "loss": 0.8498, "step": 1341 }, { "epoch": 0.06896906156850653, "grad_norm": 1.1750335693359375, "learning_rate": 9.960255771872972e-06, "loss": 0.8708, "step": 1342 }, { "epoch": 0.06902045431185116, "grad_norm": 1.1911288499832153, "learning_rate": 9.96015097627076e-06, "loss": 0.8281, "step": 1343 }, { "epoch": 0.06907184705519581, "grad_norm": 1.249687671661377, "learning_rate": 9.960046043243236e-06, "loss": 0.845, "step": 1344 }, { "epoch": 0.06912323979854045, "grad_norm": 1.124484658241272, "learning_rate": 9.959940972793305e-06, "loss": 0.8181, "step": 1345 }, { "epoch": 0.06917463254188509, "grad_norm": 1.2790043354034424, "learning_rate": 9.959835764923877e-06, "loss": 0.8349, "step": 1346 }, { "epoch": 0.06922602528522973, "grad_norm": 1.1683560609817505, "learning_rate": 9.959730419637867e-06, "loss": 0.8272, "step": 1347 }, { "epoch": 0.06927741802857436, "grad_norm": 0.9273435473442078, "learning_rate": 9.959624936938198e-06, "loss": 0.766, "step": 1348 }, { "epoch": 0.069328810771919, "grad_norm": 1.2485847473144531, "learning_rate": 9.959519316827785e-06, "loss": 0.9199, "step": 1349 }, { "epoch": 0.06938020351526364, "grad_norm": 1.2011022567749023, "learning_rate": 9.95941355930956e-06, "loss": 0.8807, "step": 1350 }, { "epoch": 0.06943159625860829, "grad_norm": 1.2683961391448975, "learning_rate": 9.959307664386452e-06, "loss": 0.8585, "step": 1351 }, { "epoch": 0.06948298900195292, "grad_norm": 1.188191294670105, "learning_rate": 9.959201632061395e-06, "loss": 0.8632, "step": 1352 }, { "epoch": 0.06953438174529757, "grad_norm": 1.1577982902526855, "learning_rate": 9.959095462337326e-06, "loss": 0.8558, "step": 1353 }, { "epoch": 0.0695857744886422, "grad_norm": 1.255170464515686, "learning_rate": 9.958989155217187e-06, "loss": 0.7719, "step": 1354 }, { "epoch": 0.06963716723198685, "grad_norm": 1.1004319190979004, "learning_rate": 9.95888271070392e-06, "loss": 0.703, "step": 1355 }, { "epoch": 0.06968855997533148, "grad_norm": 1.1970584392547607, "learning_rate": 9.95877612880048e-06, "loss": 0.8844, "step": 1356 }, { "epoch": 0.06973995271867613, "grad_norm": 1.2254855632781982, "learning_rate": 9.958669409509818e-06, "loss": 0.7915, "step": 1357 }, { "epoch": 0.06979134546202076, "grad_norm": 1.1627140045166016, "learning_rate": 9.958562552834889e-06, "loss": 0.8252, "step": 1358 }, { "epoch": 0.0698427382053654, "grad_norm": 0.7959070801734924, "learning_rate": 9.958455558778653e-06, "loss": 0.7079, "step": 1359 }, { "epoch": 0.06989413094871004, "grad_norm": 1.1797618865966797, "learning_rate": 9.958348427344077e-06, "loss": 0.8108, "step": 1360 }, { "epoch": 0.06994552369205469, "grad_norm": 1.248896837234497, "learning_rate": 9.958241158534128e-06, "loss": 0.8556, "step": 1361 }, { "epoch": 0.06999691643539932, "grad_norm": 1.2889940738677979, "learning_rate": 9.958133752351778e-06, "loss": 0.8482, "step": 1362 }, { "epoch": 0.07004830917874397, "grad_norm": 0.8384664058685303, "learning_rate": 9.958026208800003e-06, "loss": 0.7828, "step": 1363 }, { "epoch": 0.0700997019220886, "grad_norm": 1.2159619331359863, "learning_rate": 9.95791852788178e-06, "loss": 0.8833, "step": 1364 }, { "epoch": 0.07015109466543325, "grad_norm": 1.1174484491348267, "learning_rate": 9.957810709600097e-06, "loss": 0.7938, "step": 1365 }, { "epoch": 0.07020248740877788, "grad_norm": 1.2247254848480225, "learning_rate": 9.957702753957938e-06, "loss": 0.8095, "step": 1366 }, { "epoch": 0.07025388015212251, "grad_norm": 1.2365052700042725, "learning_rate": 9.957594660958296e-06, "loss": 0.8266, "step": 1367 }, { "epoch": 0.07030527289546716, "grad_norm": 1.1571627855300903, "learning_rate": 9.957486430604163e-06, "loss": 0.8344, "step": 1368 }, { "epoch": 0.0703566656388118, "grad_norm": 0.9697171449661255, "learning_rate": 9.957378062898541e-06, "loss": 0.7369, "step": 1369 }, { "epoch": 0.07040805838215644, "grad_norm": 1.4132081270217896, "learning_rate": 9.95726955784443e-06, "loss": 0.8857, "step": 1370 }, { "epoch": 0.07045945112550107, "grad_norm": 1.1720525026321411, "learning_rate": 9.95716091544484e-06, "loss": 0.8819, "step": 1371 }, { "epoch": 0.07051084386884572, "grad_norm": 1.38770592212677, "learning_rate": 9.957052135702774e-06, "loss": 0.7999, "step": 1372 }, { "epoch": 0.07056223661219035, "grad_norm": 1.1321516036987305, "learning_rate": 9.95694321862125e-06, "loss": 0.8554, "step": 1373 }, { "epoch": 0.070613629355535, "grad_norm": 1.1667180061340332, "learning_rate": 9.956834164203287e-06, "loss": 0.7785, "step": 1374 }, { "epoch": 0.07066502209887963, "grad_norm": 1.2644528150558472, "learning_rate": 9.956724972451906e-06, "loss": 0.8474, "step": 1375 }, { "epoch": 0.07071641484222428, "grad_norm": 1.300689458847046, "learning_rate": 9.95661564337013e-06, "loss": 0.8833, "step": 1376 }, { "epoch": 0.07076780758556891, "grad_norm": 1.3651669025421143, "learning_rate": 9.956506176960987e-06, "loss": 0.8129, "step": 1377 }, { "epoch": 0.07081920032891356, "grad_norm": 1.2010197639465332, "learning_rate": 9.956396573227517e-06, "loss": 0.834, "step": 1378 }, { "epoch": 0.0708705930722582, "grad_norm": 1.2028528451919556, "learning_rate": 9.956286832172747e-06, "loss": 0.8359, "step": 1379 }, { "epoch": 0.07092198581560284, "grad_norm": 1.178982138633728, "learning_rate": 9.956176953799725e-06, "loss": 0.8358, "step": 1380 }, { "epoch": 0.07097337855894748, "grad_norm": 1.200798511505127, "learning_rate": 9.956066938111491e-06, "loss": 0.8423, "step": 1381 }, { "epoch": 0.07102477130229212, "grad_norm": 1.1842976808547974, "learning_rate": 9.955956785111095e-06, "loss": 0.783, "step": 1382 }, { "epoch": 0.07107616404563676, "grad_norm": 1.1839478015899658, "learning_rate": 9.955846494801589e-06, "loss": 0.7479, "step": 1383 }, { "epoch": 0.0711275567889814, "grad_norm": 0.8355082273483276, "learning_rate": 9.955736067186029e-06, "loss": 0.7837, "step": 1384 }, { "epoch": 0.07117894953232604, "grad_norm": 1.1897263526916504, "learning_rate": 9.955625502267471e-06, "loss": 0.8656, "step": 1385 }, { "epoch": 0.07123034227567067, "grad_norm": 1.1653515100479126, "learning_rate": 9.955514800048985e-06, "loss": 0.8606, "step": 1386 }, { "epoch": 0.07128173501901532, "grad_norm": 0.7644600868225098, "learning_rate": 9.955403960533632e-06, "loss": 0.739, "step": 1387 }, { "epoch": 0.07133312776235995, "grad_norm": 1.317298412322998, "learning_rate": 9.955292983724484e-06, "loss": 0.8196, "step": 1388 }, { "epoch": 0.0713845205057046, "grad_norm": 1.2663854360580444, "learning_rate": 9.955181869624618e-06, "loss": 0.8669, "step": 1389 }, { "epoch": 0.07143591324904923, "grad_norm": 0.7830958366394043, "learning_rate": 9.95507061823711e-06, "loss": 0.7528, "step": 1390 }, { "epoch": 0.07148730599239388, "grad_norm": 0.7879834771156311, "learning_rate": 9.954959229565044e-06, "loss": 0.7409, "step": 1391 }, { "epoch": 0.07153869873573851, "grad_norm": 1.1435881853103638, "learning_rate": 9.954847703611506e-06, "loss": 0.9095, "step": 1392 }, { "epoch": 0.07159009147908316, "grad_norm": 1.270622968673706, "learning_rate": 9.954736040379587e-06, "loss": 0.8461, "step": 1393 }, { "epoch": 0.07164148422242779, "grad_norm": 1.233609676361084, "learning_rate": 9.954624239872377e-06, "loss": 0.8715, "step": 1394 }, { "epoch": 0.07169287696577244, "grad_norm": 1.1390472650527954, "learning_rate": 9.954512302092976e-06, "loss": 0.8315, "step": 1395 }, { "epoch": 0.07174426970911707, "grad_norm": 0.8489704132080078, "learning_rate": 9.954400227044487e-06, "loss": 0.6961, "step": 1396 }, { "epoch": 0.07179566245246172, "grad_norm": 1.161421775817871, "learning_rate": 9.954288014730012e-06, "loss": 0.8712, "step": 1397 }, { "epoch": 0.07184705519580635, "grad_norm": 1.1996312141418457, "learning_rate": 9.954175665152661e-06, "loss": 0.8478, "step": 1398 }, { "epoch": 0.071898447939151, "grad_norm": 0.852490246295929, "learning_rate": 9.954063178315549e-06, "loss": 0.7643, "step": 1399 }, { "epoch": 0.07194984068249563, "grad_norm": 1.2015862464904785, "learning_rate": 9.953950554221789e-06, "loss": 0.8842, "step": 1400 }, { "epoch": 0.07200123342584028, "grad_norm": 1.1348485946655273, "learning_rate": 9.953837792874502e-06, "loss": 0.7997, "step": 1401 }, { "epoch": 0.07205262616918491, "grad_norm": 1.1355761289596558, "learning_rate": 9.953724894276815e-06, "loss": 0.8445, "step": 1402 }, { "epoch": 0.07210401891252956, "grad_norm": 1.175842523574829, "learning_rate": 9.953611858431852e-06, "loss": 0.8716, "step": 1403 }, { "epoch": 0.07215541165587419, "grad_norm": 1.194063663482666, "learning_rate": 9.953498685342748e-06, "loss": 0.8049, "step": 1404 }, { "epoch": 0.07220680439921882, "grad_norm": 0.9623196125030518, "learning_rate": 9.953385375012637e-06, "loss": 0.7444, "step": 1405 }, { "epoch": 0.07225819714256347, "grad_norm": 0.9269728660583496, "learning_rate": 9.953271927444659e-06, "loss": 0.7305, "step": 1406 }, { "epoch": 0.0723095898859081, "grad_norm": 1.1395899057388306, "learning_rate": 9.953158342641956e-06, "loss": 0.7863, "step": 1407 }, { "epoch": 0.07236098262925275, "grad_norm": 1.2366182804107666, "learning_rate": 9.953044620607677e-06, "loss": 0.8513, "step": 1408 }, { "epoch": 0.07241237537259738, "grad_norm": 1.21642005443573, "learning_rate": 9.95293076134497e-06, "loss": 0.8399, "step": 1409 }, { "epoch": 0.07246376811594203, "grad_norm": 1.2443125247955322, "learning_rate": 9.952816764856992e-06, "loss": 0.9065, "step": 1410 }, { "epoch": 0.07251516085928666, "grad_norm": 1.131524682044983, "learning_rate": 9.952702631146901e-06, "loss": 0.7952, "step": 1411 }, { "epoch": 0.07256655360263131, "grad_norm": 0.9441289901733398, "learning_rate": 9.952588360217858e-06, "loss": 0.7774, "step": 1412 }, { "epoch": 0.07261794634597594, "grad_norm": 1.1668057441711426, "learning_rate": 9.95247395207303e-06, "loss": 0.8996, "step": 1413 }, { "epoch": 0.07266933908932059, "grad_norm": 1.1729233264923096, "learning_rate": 9.952359406715586e-06, "loss": 0.791, "step": 1414 }, { "epoch": 0.07272073183266523, "grad_norm": 1.1911845207214355, "learning_rate": 9.9522447241487e-06, "loss": 0.7958, "step": 1415 }, { "epoch": 0.07277212457600987, "grad_norm": 1.1784542798995972, "learning_rate": 9.95212990437555e-06, "loss": 0.8568, "step": 1416 }, { "epoch": 0.0728235173193545, "grad_norm": 1.310339093208313, "learning_rate": 9.952014947399318e-06, "loss": 0.9307, "step": 1417 }, { "epoch": 0.07287491006269915, "grad_norm": 1.253079891204834, "learning_rate": 9.951899853223185e-06, "loss": 0.8781, "step": 1418 }, { "epoch": 0.07292630280604379, "grad_norm": 1.1128742694854736, "learning_rate": 9.951784621850344e-06, "loss": 0.8193, "step": 1419 }, { "epoch": 0.07297769554938843, "grad_norm": 0.8029535412788391, "learning_rate": 9.951669253283985e-06, "loss": 0.7202, "step": 1420 }, { "epoch": 0.07302908829273307, "grad_norm": 1.2505106925964355, "learning_rate": 9.951553747527306e-06, "loss": 0.8311, "step": 1421 }, { "epoch": 0.0730804810360777, "grad_norm": 1.2667174339294434, "learning_rate": 9.951438104583505e-06, "loss": 0.8703, "step": 1422 }, { "epoch": 0.07313187377942235, "grad_norm": 0.7906657457351685, "learning_rate": 9.951322324455788e-06, "loss": 0.7101, "step": 1423 }, { "epoch": 0.07318326652276698, "grad_norm": 1.15032160282135, "learning_rate": 9.951206407147364e-06, "loss": 0.8473, "step": 1424 }, { "epoch": 0.07323465926611163, "grad_norm": 1.255661964416504, "learning_rate": 9.95109035266144e-06, "loss": 0.8276, "step": 1425 }, { "epoch": 0.07328605200945626, "grad_norm": 1.1481667757034302, "learning_rate": 9.950974161001237e-06, "loss": 0.8088, "step": 1426 }, { "epoch": 0.0733374447528009, "grad_norm": 1.1616981029510498, "learning_rate": 9.950857832169971e-06, "loss": 0.8489, "step": 1427 }, { "epoch": 0.07338883749614554, "grad_norm": 1.1490039825439453, "learning_rate": 9.950741366170864e-06, "loss": 0.831, "step": 1428 }, { "epoch": 0.07344023023949019, "grad_norm": 1.151681661605835, "learning_rate": 9.950624763007147e-06, "loss": 0.8816, "step": 1429 }, { "epoch": 0.07349162298283482, "grad_norm": 1.1748698949813843, "learning_rate": 9.950508022682044e-06, "loss": 0.8589, "step": 1430 }, { "epoch": 0.07354301572617947, "grad_norm": 1.2848342657089233, "learning_rate": 9.950391145198795e-06, "loss": 0.8749, "step": 1431 }, { "epoch": 0.0735944084695241, "grad_norm": 1.223555088043213, "learning_rate": 9.950274130560638e-06, "loss": 0.8585, "step": 1432 }, { "epoch": 0.07364580121286875, "grad_norm": 1.177095651626587, "learning_rate": 9.95015697877081e-06, "loss": 0.745, "step": 1433 }, { "epoch": 0.07369719395621338, "grad_norm": 0.9290037155151367, "learning_rate": 9.950039689832565e-06, "loss": 0.7747, "step": 1434 }, { "epoch": 0.07374858669955803, "grad_norm": 1.2345010042190552, "learning_rate": 9.949922263749144e-06, "loss": 0.8246, "step": 1435 }, { "epoch": 0.07379997944290266, "grad_norm": 1.2172366380691528, "learning_rate": 9.949804700523805e-06, "loss": 0.8949, "step": 1436 }, { "epoch": 0.07385137218624731, "grad_norm": 1.1880131959915161, "learning_rate": 9.949687000159805e-06, "loss": 0.8409, "step": 1437 }, { "epoch": 0.07390276492959194, "grad_norm": 1.6032335758209229, "learning_rate": 9.949569162660405e-06, "loss": 0.8448, "step": 1438 }, { "epoch": 0.07395415767293659, "grad_norm": 1.176414966583252, "learning_rate": 9.949451188028867e-06, "loss": 0.8391, "step": 1439 }, { "epoch": 0.07400555041628122, "grad_norm": 1.1669632196426392, "learning_rate": 9.949333076268464e-06, "loss": 0.8002, "step": 1440 }, { "epoch": 0.07405694315962585, "grad_norm": 1.3067156076431274, "learning_rate": 9.949214827382463e-06, "loss": 0.7976, "step": 1441 }, { "epoch": 0.0741083359029705, "grad_norm": 0.8184141516685486, "learning_rate": 9.949096441374146e-06, "loss": 0.6819, "step": 1442 }, { "epoch": 0.07415972864631513, "grad_norm": 1.1940512657165527, "learning_rate": 9.948977918246791e-06, "loss": 0.8145, "step": 1443 }, { "epoch": 0.07421112138965978, "grad_norm": 0.8063547015190125, "learning_rate": 9.94885925800368e-06, "loss": 0.7396, "step": 1444 }, { "epoch": 0.07426251413300441, "grad_norm": 1.213698387145996, "learning_rate": 9.9487404606481e-06, "loss": 0.8319, "step": 1445 }, { "epoch": 0.07431390687634906, "grad_norm": 0.8747170567512512, "learning_rate": 9.948621526183346e-06, "loss": 0.7597, "step": 1446 }, { "epoch": 0.0743652996196937, "grad_norm": 1.2030444145202637, "learning_rate": 9.948502454612712e-06, "loss": 0.8668, "step": 1447 }, { "epoch": 0.07441669236303834, "grad_norm": 0.9899352788925171, "learning_rate": 9.948383245939495e-06, "loss": 0.7368, "step": 1448 }, { "epoch": 0.07446808510638298, "grad_norm": 0.8577443957328796, "learning_rate": 9.948263900166998e-06, "loss": 0.7243, "step": 1449 }, { "epoch": 0.07451947784972762, "grad_norm": 0.786955714225769, "learning_rate": 9.948144417298531e-06, "loss": 0.7095, "step": 1450 }, { "epoch": 0.07457087059307226, "grad_norm": 1.2283196449279785, "learning_rate": 9.948024797337402e-06, "loss": 0.8284, "step": 1451 }, { "epoch": 0.0746222633364169, "grad_norm": 1.1920987367630005, "learning_rate": 9.947905040286922e-06, "loss": 0.8245, "step": 1452 }, { "epoch": 0.07467365607976154, "grad_norm": 1.3656609058380127, "learning_rate": 9.947785146150414e-06, "loss": 0.8164, "step": 1453 }, { "epoch": 0.07472504882310618, "grad_norm": 1.1480259895324707, "learning_rate": 9.9476651149312e-06, "loss": 0.9128, "step": 1454 }, { "epoch": 0.07477644156645082, "grad_norm": 0.9765171408653259, "learning_rate": 9.9475449466326e-06, "loss": 0.7796, "step": 1455 }, { "epoch": 0.07482783430979546, "grad_norm": 1.1816232204437256, "learning_rate": 9.947424641257948e-06, "loss": 0.7744, "step": 1456 }, { "epoch": 0.0748792270531401, "grad_norm": 0.8721120953559875, "learning_rate": 9.947304198810577e-06, "loss": 0.7367, "step": 1457 }, { "epoch": 0.07493061979648474, "grad_norm": 1.2565195560455322, "learning_rate": 9.947183619293822e-06, "loss": 0.901, "step": 1458 }, { "epoch": 0.07498201253982938, "grad_norm": 1.2259550094604492, "learning_rate": 9.947062902711024e-06, "loss": 0.8315, "step": 1459 }, { "epoch": 0.07503340528317401, "grad_norm": 0.9153401255607605, "learning_rate": 9.946942049065532e-06, "loss": 0.7573, "step": 1460 }, { "epoch": 0.07508479802651866, "grad_norm": 1.2016843557357788, "learning_rate": 9.946821058360685e-06, "loss": 0.8429, "step": 1461 }, { "epoch": 0.07513619076986329, "grad_norm": 0.8889265656471252, "learning_rate": 9.946699930599845e-06, "loss": 0.6882, "step": 1462 }, { "epoch": 0.07518758351320794, "grad_norm": 1.3222475051879883, "learning_rate": 9.946578665786362e-06, "loss": 0.8192, "step": 1463 }, { "epoch": 0.07523897625655257, "grad_norm": 1.1569023132324219, "learning_rate": 9.946457263923599e-06, "loss": 0.8718, "step": 1464 }, { "epoch": 0.07529036899989722, "grad_norm": 1.1867568492889404, "learning_rate": 9.946335725014918e-06, "loss": 0.8506, "step": 1465 }, { "epoch": 0.07534176174324185, "grad_norm": 0.9825249910354614, "learning_rate": 9.946214049063684e-06, "loss": 0.724, "step": 1466 }, { "epoch": 0.0753931544865865, "grad_norm": 0.8492066860198975, "learning_rate": 9.946092236073272e-06, "loss": 0.7231, "step": 1467 }, { "epoch": 0.07544454722993113, "grad_norm": 1.2318816184997559, "learning_rate": 9.945970286047055e-06, "loss": 0.8728, "step": 1468 }, { "epoch": 0.07549593997327578, "grad_norm": 1.2679754495620728, "learning_rate": 9.945848198988412e-06, "loss": 0.8697, "step": 1469 }, { "epoch": 0.07554733271662041, "grad_norm": 1.0174459218978882, "learning_rate": 9.945725974900728e-06, "loss": 0.7369, "step": 1470 }, { "epoch": 0.07559872545996506, "grad_norm": 1.165276050567627, "learning_rate": 9.945603613787385e-06, "loss": 0.7709, "step": 1471 }, { "epoch": 0.07565011820330969, "grad_norm": 1.2141611576080322, "learning_rate": 9.945481115651774e-06, "loss": 0.883, "step": 1472 }, { "epoch": 0.07570151094665434, "grad_norm": 1.182141900062561, "learning_rate": 9.945358480497293e-06, "loss": 0.88, "step": 1473 }, { "epoch": 0.07575290368999897, "grad_norm": 1.215726613998413, "learning_rate": 9.945235708327335e-06, "loss": 0.8595, "step": 1474 }, { "epoch": 0.07580429643334362, "grad_norm": 1.23809814453125, "learning_rate": 9.945112799145305e-06, "loss": 0.8469, "step": 1475 }, { "epoch": 0.07585568917668825, "grad_norm": 1.179121494293213, "learning_rate": 9.944989752954604e-06, "loss": 0.8458, "step": 1476 }, { "epoch": 0.07590708192003288, "grad_norm": 0.9221994876861572, "learning_rate": 9.944866569758644e-06, "loss": 0.7097, "step": 1477 }, { "epoch": 0.07595847466337753, "grad_norm": 1.167385220527649, "learning_rate": 9.94474324956084e-06, "loss": 0.9073, "step": 1478 }, { "epoch": 0.07600986740672216, "grad_norm": 0.7991660833358765, "learning_rate": 9.944619792364606e-06, "loss": 0.736, "step": 1479 }, { "epoch": 0.07606126015006681, "grad_norm": 1.156245231628418, "learning_rate": 9.94449619817336e-06, "loss": 0.8384, "step": 1480 }, { "epoch": 0.07611265289341144, "grad_norm": 0.8453945517539978, "learning_rate": 9.94437246699053e-06, "loss": 0.6775, "step": 1481 }, { "epoch": 0.07616404563675609, "grad_norm": 1.1651897430419922, "learning_rate": 9.944248598819546e-06, "loss": 0.8419, "step": 1482 }, { "epoch": 0.07621543838010073, "grad_norm": 1.2072960138320923, "learning_rate": 9.944124593663835e-06, "loss": 0.7924, "step": 1483 }, { "epoch": 0.07626683112344537, "grad_norm": 1.234621524810791, "learning_rate": 9.944000451526833e-06, "loss": 0.8932, "step": 1484 }, { "epoch": 0.07631822386679, "grad_norm": 1.410512089729309, "learning_rate": 9.943876172411983e-06, "loss": 0.8674, "step": 1485 }, { "epoch": 0.07636961661013465, "grad_norm": 1.156535029411316, "learning_rate": 9.943751756322727e-06, "loss": 0.8117, "step": 1486 }, { "epoch": 0.07642100935347929, "grad_norm": 1.265147089958191, "learning_rate": 9.943627203262511e-06, "loss": 0.8851, "step": 1487 }, { "epoch": 0.07647240209682393, "grad_norm": 1.42330002784729, "learning_rate": 9.943502513234786e-06, "loss": 0.872, "step": 1488 }, { "epoch": 0.07652379484016857, "grad_norm": 1.1698558330535889, "learning_rate": 9.943377686243008e-06, "loss": 0.8876, "step": 1489 }, { "epoch": 0.07657518758351321, "grad_norm": 1.261244535446167, "learning_rate": 9.943252722290633e-06, "loss": 0.8417, "step": 1490 }, { "epoch": 0.07662658032685785, "grad_norm": 1.14500093460083, "learning_rate": 9.943127621381126e-06, "loss": 0.8316, "step": 1491 }, { "epoch": 0.0766779730702025, "grad_norm": 1.1792700290679932, "learning_rate": 9.94300238351795e-06, "loss": 0.814, "step": 1492 }, { "epoch": 0.07672936581354713, "grad_norm": 1.2186336517333984, "learning_rate": 9.942877008704576e-06, "loss": 0.7863, "step": 1493 }, { "epoch": 0.07678075855689177, "grad_norm": 1.179352045059204, "learning_rate": 9.94275149694448e-06, "loss": 0.839, "step": 1494 }, { "epoch": 0.0768321513002364, "grad_norm": 1.1480205059051514, "learning_rate": 9.942625848241138e-06, "loss": 0.8481, "step": 1495 }, { "epoch": 0.07688354404358104, "grad_norm": 1.1068614721298218, "learning_rate": 9.94250006259803e-06, "loss": 0.8276, "step": 1496 }, { "epoch": 0.07693493678692569, "grad_norm": 1.2442585229873657, "learning_rate": 9.942374140018641e-06, "loss": 0.8391, "step": 1497 }, { "epoch": 0.07698632953027032, "grad_norm": 1.207851529121399, "learning_rate": 9.942248080506462e-06, "loss": 0.824, "step": 1498 }, { "epoch": 0.07703772227361497, "grad_norm": 1.221541166305542, "learning_rate": 9.942121884064984e-06, "loss": 0.8529, "step": 1499 }, { "epoch": 0.0770891150169596, "grad_norm": 1.1786326169967651, "learning_rate": 9.941995550697702e-06, "loss": 0.8263, "step": 1500 }, { "epoch": 0.07714050776030425, "grad_norm": 1.1345199346542358, "learning_rate": 9.941869080408118e-06, "loss": 0.8055, "step": 1501 }, { "epoch": 0.07719190050364888, "grad_norm": 1.1585556268692017, "learning_rate": 9.941742473199737e-06, "loss": 0.8309, "step": 1502 }, { "epoch": 0.07724329324699353, "grad_norm": 1.142100214958191, "learning_rate": 9.941615729076063e-06, "loss": 0.866, "step": 1503 }, { "epoch": 0.07729468599033816, "grad_norm": 1.2723445892333984, "learning_rate": 9.941488848040612e-06, "loss": 0.8779, "step": 1504 }, { "epoch": 0.07734607873368281, "grad_norm": 1.1520577669143677, "learning_rate": 9.941361830096899e-06, "loss": 0.7641, "step": 1505 }, { "epoch": 0.07739747147702744, "grad_norm": 1.180328607559204, "learning_rate": 9.941234675248438e-06, "loss": 0.8326, "step": 1506 }, { "epoch": 0.07744886422037209, "grad_norm": 1.1629438400268555, "learning_rate": 9.941107383498757e-06, "loss": 0.83, "step": 1507 }, { "epoch": 0.07750025696371672, "grad_norm": 1.227476954460144, "learning_rate": 9.940979954851383e-06, "loss": 0.8818, "step": 1508 }, { "epoch": 0.07755164970706137, "grad_norm": 1.2439676523208618, "learning_rate": 9.940852389309841e-06, "loss": 0.8443, "step": 1509 }, { "epoch": 0.077603042450406, "grad_norm": 1.1380934715270996, "learning_rate": 9.940724686877672e-06, "loss": 0.851, "step": 1510 }, { "epoch": 0.07765443519375065, "grad_norm": 1.1234251260757446, "learning_rate": 9.94059684755841e-06, "loss": 0.8157, "step": 1511 }, { "epoch": 0.07770582793709528, "grad_norm": 1.1356112957000732, "learning_rate": 9.940468871355598e-06, "loss": 0.859, "step": 1512 }, { "epoch": 0.07775722068043993, "grad_norm": 0.8068326115608215, "learning_rate": 9.94034075827278e-06, "loss": 0.7219, "step": 1513 }, { "epoch": 0.07780861342378456, "grad_norm": 1.1665666103363037, "learning_rate": 9.940212508313509e-06, "loss": 0.8223, "step": 1514 }, { "epoch": 0.0778600061671292, "grad_norm": 0.8754160404205322, "learning_rate": 9.940084121481336e-06, "loss": 0.7071, "step": 1515 }, { "epoch": 0.07791139891047384, "grad_norm": 1.2813318967819214, "learning_rate": 9.93995559777982e-06, "loss": 0.8728, "step": 1516 }, { "epoch": 0.07796279165381848, "grad_norm": 1.2071044445037842, "learning_rate": 9.939826937212517e-06, "loss": 0.832, "step": 1517 }, { "epoch": 0.07801418439716312, "grad_norm": 1.2397701740264893, "learning_rate": 9.939698139782998e-06, "loss": 0.8633, "step": 1518 }, { "epoch": 0.07806557714050776, "grad_norm": 1.1649987697601318, "learning_rate": 9.939569205494829e-06, "loss": 0.831, "step": 1519 }, { "epoch": 0.0781169698838524, "grad_norm": 1.0392868518829346, "learning_rate": 9.939440134351578e-06, "loss": 0.7676, "step": 1520 }, { "epoch": 0.07816836262719704, "grad_norm": 1.2819868326187134, "learning_rate": 9.939310926356828e-06, "loss": 0.8081, "step": 1521 }, { "epoch": 0.07821975537054168, "grad_norm": 1.20846426486969, "learning_rate": 9.939181581514155e-06, "loss": 0.8264, "step": 1522 }, { "epoch": 0.07827114811388632, "grad_norm": 0.8298874497413635, "learning_rate": 9.939052099827142e-06, "loss": 0.7511, "step": 1523 }, { "epoch": 0.07832254085723096, "grad_norm": 1.191690444946289, "learning_rate": 9.938922481299378e-06, "loss": 0.8948, "step": 1524 }, { "epoch": 0.0783739336005756, "grad_norm": 1.1529649496078491, "learning_rate": 9.938792725934455e-06, "loss": 0.7952, "step": 1525 }, { "epoch": 0.07842532634392024, "grad_norm": 1.1194936037063599, "learning_rate": 9.938662833735966e-06, "loss": 0.843, "step": 1526 }, { "epoch": 0.07847671908726488, "grad_norm": 1.1901437044143677, "learning_rate": 9.93853280470751e-06, "loss": 0.8535, "step": 1527 }, { "epoch": 0.07852811183060952, "grad_norm": 1.533829689025879, "learning_rate": 9.93840263885269e-06, "loss": 0.8294, "step": 1528 }, { "epoch": 0.07857950457395416, "grad_norm": 0.9010263085365295, "learning_rate": 9.938272336175113e-06, "loss": 0.7387, "step": 1529 }, { "epoch": 0.0786308973172988, "grad_norm": 0.8380365371704102, "learning_rate": 9.938141896678388e-06, "loss": 0.6974, "step": 1530 }, { "epoch": 0.07868229006064344, "grad_norm": 1.2068496942520142, "learning_rate": 9.93801132036613e-06, "loss": 0.8268, "step": 1531 }, { "epoch": 0.07873368280398808, "grad_norm": 1.2153056859970093, "learning_rate": 9.937880607241956e-06, "loss": 0.8109, "step": 1532 }, { "epoch": 0.07878507554733272, "grad_norm": 1.1627013683319092, "learning_rate": 9.937749757309489e-06, "loss": 0.8535, "step": 1533 }, { "epoch": 0.07883646829067735, "grad_norm": 1.1460394859313965, "learning_rate": 9.937618770572352e-06, "loss": 0.8462, "step": 1534 }, { "epoch": 0.078887861034022, "grad_norm": 1.1696171760559082, "learning_rate": 9.937487647034176e-06, "loss": 0.8693, "step": 1535 }, { "epoch": 0.07893925377736663, "grad_norm": 0.8339906930923462, "learning_rate": 9.937356386698593e-06, "loss": 0.7087, "step": 1536 }, { "epoch": 0.07899064652071128, "grad_norm": 1.1579349040985107, "learning_rate": 9.937224989569239e-06, "loss": 0.822, "step": 1537 }, { "epoch": 0.07904203926405591, "grad_norm": 1.2062667608261108, "learning_rate": 9.937093455649755e-06, "loss": 0.888, "step": 1538 }, { "epoch": 0.07909343200740056, "grad_norm": 1.3188236951828003, "learning_rate": 9.936961784943785e-06, "loss": 0.856, "step": 1539 }, { "epoch": 0.07914482475074519, "grad_norm": 1.4719436168670654, "learning_rate": 9.93682997745498e-06, "loss": 0.8293, "step": 1540 }, { "epoch": 0.07919621749408984, "grad_norm": 1.2409130334854126, "learning_rate": 9.936698033186988e-06, "loss": 0.8802, "step": 1541 }, { "epoch": 0.07924761023743447, "grad_norm": 1.1431233882904053, "learning_rate": 9.936565952143466e-06, "loss": 0.8779, "step": 1542 }, { "epoch": 0.07929900298077912, "grad_norm": 1.2298637628555298, "learning_rate": 9.936433734328075e-06, "loss": 0.8227, "step": 1543 }, { "epoch": 0.07935039572412375, "grad_norm": 1.142795205116272, "learning_rate": 9.936301379744475e-06, "loss": 0.8651, "step": 1544 }, { "epoch": 0.0794017884674684, "grad_norm": 1.1544638872146606, "learning_rate": 9.936168888396337e-06, "loss": 0.7847, "step": 1545 }, { "epoch": 0.07945318121081303, "grad_norm": 1.1904575824737549, "learning_rate": 9.936036260287325e-06, "loss": 0.8296, "step": 1546 }, { "epoch": 0.07950457395415768, "grad_norm": 1.2933287620544434, "learning_rate": 9.935903495421122e-06, "loss": 0.8829, "step": 1547 }, { "epoch": 0.07955596669750231, "grad_norm": 1.2042230367660522, "learning_rate": 9.935770593801401e-06, "loss": 0.8435, "step": 1548 }, { "epoch": 0.07960735944084696, "grad_norm": 1.0343199968338013, "learning_rate": 9.935637555431845e-06, "loss": 0.7205, "step": 1549 }, { "epoch": 0.07965875218419159, "grad_norm": 1.2809213399887085, "learning_rate": 9.935504380316141e-06, "loss": 0.8765, "step": 1550 }, { "epoch": 0.07971014492753623, "grad_norm": 1.2070891857147217, "learning_rate": 9.935371068457979e-06, "loss": 0.8762, "step": 1551 }, { "epoch": 0.07976153767088087, "grad_norm": 1.2169498205184937, "learning_rate": 9.935237619861051e-06, "loss": 0.7749, "step": 1552 }, { "epoch": 0.0798129304142255, "grad_norm": 1.2012264728546143, "learning_rate": 9.935104034529054e-06, "loss": 0.8357, "step": 1553 }, { "epoch": 0.07986432315757015, "grad_norm": 1.1824877262115479, "learning_rate": 9.934970312465692e-06, "loss": 0.8241, "step": 1554 }, { "epoch": 0.07991571590091479, "grad_norm": 1.1611387729644775, "learning_rate": 9.934836453674667e-06, "loss": 0.8175, "step": 1555 }, { "epoch": 0.07996710864425943, "grad_norm": 1.2355875968933105, "learning_rate": 9.93470245815969e-06, "loss": 0.8149, "step": 1556 }, { "epoch": 0.08001850138760407, "grad_norm": 0.7743059396743774, "learning_rate": 9.934568325924472e-06, "loss": 0.7496, "step": 1557 }, { "epoch": 0.08006989413094871, "grad_norm": 1.317503809928894, "learning_rate": 9.934434056972728e-06, "loss": 0.8467, "step": 1558 }, { "epoch": 0.08012128687429335, "grad_norm": 0.771838366985321, "learning_rate": 9.93429965130818e-06, "loss": 0.731, "step": 1559 }, { "epoch": 0.08017267961763799, "grad_norm": 1.1630659103393555, "learning_rate": 9.934165108934552e-06, "loss": 0.8292, "step": 1560 }, { "epoch": 0.08022407236098263, "grad_norm": 1.182859182357788, "learning_rate": 9.93403042985557e-06, "loss": 0.8685, "step": 1561 }, { "epoch": 0.08027546510432727, "grad_norm": 1.1724762916564941, "learning_rate": 9.933895614074966e-06, "loss": 0.7261, "step": 1562 }, { "epoch": 0.0803268578476719, "grad_norm": 0.739230751991272, "learning_rate": 9.933760661596478e-06, "loss": 0.7088, "step": 1563 }, { "epoch": 0.08037825059101655, "grad_norm": 1.2899434566497803, "learning_rate": 9.93362557242384e-06, "loss": 0.8419, "step": 1564 }, { "epoch": 0.08042964333436119, "grad_norm": 1.2055768966674805, "learning_rate": 9.9334903465608e-06, "loss": 0.8959, "step": 1565 }, { "epoch": 0.08048103607770583, "grad_norm": 1.1167572736740112, "learning_rate": 9.933354984011098e-06, "loss": 0.8142, "step": 1566 }, { "epoch": 0.08053242882105047, "grad_norm": 1.0762931108474731, "learning_rate": 9.933219484778491e-06, "loss": 0.8056, "step": 1567 }, { "epoch": 0.08058382156439511, "grad_norm": 1.1156624555587769, "learning_rate": 9.93308384886673e-06, "loss": 0.8121, "step": 1568 }, { "epoch": 0.08063521430773975, "grad_norm": 0.8716696500778198, "learning_rate": 9.932948076279571e-06, "loss": 0.7291, "step": 1569 }, { "epoch": 0.08068660705108438, "grad_norm": 1.1160387992858887, "learning_rate": 9.93281216702078e-06, "loss": 0.8296, "step": 1570 }, { "epoch": 0.08073799979442903, "grad_norm": 1.1406720876693726, "learning_rate": 9.93267612109412e-06, "loss": 0.8575, "step": 1571 }, { "epoch": 0.08078939253777366, "grad_norm": 1.2666432857513428, "learning_rate": 9.932539938503361e-06, "loss": 0.892, "step": 1572 }, { "epoch": 0.08084078528111831, "grad_norm": 1.1326005458831787, "learning_rate": 9.932403619252274e-06, "loss": 0.7921, "step": 1573 }, { "epoch": 0.08089217802446294, "grad_norm": 0.9215224385261536, "learning_rate": 9.93226716334464e-06, "loss": 0.7327, "step": 1574 }, { "epoch": 0.08094357076780759, "grad_norm": 1.1820156574249268, "learning_rate": 9.932130570784236e-06, "loss": 0.9139, "step": 1575 }, { "epoch": 0.08099496351115222, "grad_norm": 1.2150022983551025, "learning_rate": 9.931993841574846e-06, "loss": 0.8315, "step": 1576 }, { "epoch": 0.08104635625449687, "grad_norm": 1.1447468996047974, "learning_rate": 9.931856975720263e-06, "loss": 0.782, "step": 1577 }, { "epoch": 0.0810977489978415, "grad_norm": 1.1437690258026123, "learning_rate": 9.931719973224272e-06, "loss": 0.8255, "step": 1578 }, { "epoch": 0.08114914174118615, "grad_norm": 1.1846719980239868, "learning_rate": 9.931582834090676e-06, "loss": 0.8004, "step": 1579 }, { "epoch": 0.08120053448453078, "grad_norm": 0.7766371369361877, "learning_rate": 9.931445558323269e-06, "loss": 0.6986, "step": 1580 }, { "epoch": 0.08125192722787543, "grad_norm": 1.184993863105774, "learning_rate": 9.931308145925858e-06, "loss": 0.8207, "step": 1581 }, { "epoch": 0.08130331997122006, "grad_norm": 1.2104731798171997, "learning_rate": 9.931170596902246e-06, "loss": 0.8031, "step": 1582 }, { "epoch": 0.08135471271456471, "grad_norm": 1.093277096748352, "learning_rate": 9.931032911256249e-06, "loss": 0.9028, "step": 1583 }, { "epoch": 0.08140610545790934, "grad_norm": 1.2365262508392334, "learning_rate": 9.930895088991678e-06, "loss": 0.8832, "step": 1584 }, { "epoch": 0.08145749820125399, "grad_norm": 1.2132134437561035, "learning_rate": 9.930757130112354e-06, "loss": 0.8222, "step": 1585 }, { "epoch": 0.08150889094459862, "grad_norm": 1.1220144033432007, "learning_rate": 9.930619034622095e-06, "loss": 0.8661, "step": 1586 }, { "epoch": 0.08156028368794327, "grad_norm": 1.2653635740280151, "learning_rate": 9.930480802524733e-06, "loss": 0.8175, "step": 1587 }, { "epoch": 0.0816116764312879, "grad_norm": 1.1704076528549194, "learning_rate": 9.930342433824094e-06, "loss": 0.8977, "step": 1588 }, { "epoch": 0.08166306917463254, "grad_norm": 1.1403098106384277, "learning_rate": 9.930203928524012e-06, "loss": 0.8181, "step": 1589 }, { "epoch": 0.08171446191797718, "grad_norm": 1.1767399311065674, "learning_rate": 9.930065286628325e-06, "loss": 0.8518, "step": 1590 }, { "epoch": 0.08176585466132182, "grad_norm": 0.8117778301239014, "learning_rate": 9.929926508140875e-06, "loss": 0.745, "step": 1591 }, { "epoch": 0.08181724740466646, "grad_norm": 0.7984206080436707, "learning_rate": 9.929787593065506e-06, "loss": 0.6961, "step": 1592 }, { "epoch": 0.0818686401480111, "grad_norm": 1.2913763523101807, "learning_rate": 9.929648541406065e-06, "loss": 0.8533, "step": 1593 }, { "epoch": 0.08192003289135574, "grad_norm": 1.2602064609527588, "learning_rate": 9.929509353166408e-06, "loss": 0.8624, "step": 1594 }, { "epoch": 0.08197142563470038, "grad_norm": 1.1494656801223755, "learning_rate": 9.929370028350389e-06, "loss": 0.8225, "step": 1595 }, { "epoch": 0.08202281837804502, "grad_norm": 1.1617251634597778, "learning_rate": 9.92923056696187e-06, "loss": 0.8628, "step": 1596 }, { "epoch": 0.08207421112138966, "grad_norm": 1.2061105966567993, "learning_rate": 9.929090969004713e-06, "loss": 0.8629, "step": 1597 }, { "epoch": 0.0821256038647343, "grad_norm": 1.1750879287719727, "learning_rate": 9.928951234482786e-06, "loss": 0.8912, "step": 1598 }, { "epoch": 0.08217699660807894, "grad_norm": 1.14617121219635, "learning_rate": 9.928811363399961e-06, "loss": 0.8423, "step": 1599 }, { "epoch": 0.08222838935142358, "grad_norm": 1.1267099380493164, "learning_rate": 9.928671355760114e-06, "loss": 0.8629, "step": 1600 }, { "epoch": 0.08227978209476822, "grad_norm": 1.2203190326690674, "learning_rate": 9.928531211567122e-06, "loss": 0.8128, "step": 1601 }, { "epoch": 0.08233117483811286, "grad_norm": 1.1688432693481445, "learning_rate": 9.928390930824869e-06, "loss": 0.8511, "step": 1602 }, { "epoch": 0.0823825675814575, "grad_norm": 1.2543197870254517, "learning_rate": 9.928250513537242e-06, "loss": 0.8191, "step": 1603 }, { "epoch": 0.08243396032480214, "grad_norm": 0.9396727085113525, "learning_rate": 9.928109959708131e-06, "loss": 0.7219, "step": 1604 }, { "epoch": 0.08248535306814678, "grad_norm": 1.206816554069519, "learning_rate": 9.927969269341431e-06, "loss": 0.8721, "step": 1605 }, { "epoch": 0.08253674581149142, "grad_norm": 1.2839499711990356, "learning_rate": 9.927828442441038e-06, "loss": 0.8268, "step": 1606 }, { "epoch": 0.08258813855483606, "grad_norm": 1.5705254077911377, "learning_rate": 9.927687479010855e-06, "loss": 0.8246, "step": 1607 }, { "epoch": 0.08263953129818069, "grad_norm": 1.1614164113998413, "learning_rate": 9.927546379054786e-06, "loss": 0.8608, "step": 1608 }, { "epoch": 0.08269092404152534, "grad_norm": 1.188598394393921, "learning_rate": 9.927405142576744e-06, "loss": 0.8613, "step": 1609 }, { "epoch": 0.08274231678486997, "grad_norm": 1.1738187074661255, "learning_rate": 9.927263769580639e-06, "loss": 0.8375, "step": 1610 }, { "epoch": 0.08279370952821462, "grad_norm": 0.9251877665519714, "learning_rate": 9.927122260070388e-06, "loss": 0.7336, "step": 1611 }, { "epoch": 0.08284510227155925, "grad_norm": 1.1685748100280762, "learning_rate": 9.926980614049913e-06, "loss": 0.7769, "step": 1612 }, { "epoch": 0.0828964950149039, "grad_norm": 1.2056996822357178, "learning_rate": 9.926838831523136e-06, "loss": 0.8557, "step": 1613 }, { "epoch": 0.08294788775824853, "grad_norm": 1.1196720600128174, "learning_rate": 9.926696912493988e-06, "loss": 0.8099, "step": 1614 }, { "epoch": 0.08299928050159318, "grad_norm": 1.269811987876892, "learning_rate": 9.926554856966399e-06, "loss": 0.8729, "step": 1615 }, { "epoch": 0.08305067324493781, "grad_norm": 1.1423460245132446, "learning_rate": 9.926412664944308e-06, "loss": 0.7912, "step": 1616 }, { "epoch": 0.08310206598828246, "grad_norm": 1.2107304334640503, "learning_rate": 9.926270336431649e-06, "loss": 0.8389, "step": 1617 }, { "epoch": 0.08315345873162709, "grad_norm": 1.3321670293807983, "learning_rate": 9.92612787143237e-06, "loss": 0.8208, "step": 1618 }, { "epoch": 0.08320485147497174, "grad_norm": 1.2418450117111206, "learning_rate": 9.925985269950417e-06, "loss": 0.8776, "step": 1619 }, { "epoch": 0.08325624421831637, "grad_norm": 1.39900541305542, "learning_rate": 9.92584253198974e-06, "loss": 0.8069, "step": 1620 }, { "epoch": 0.08330763696166102, "grad_norm": 1.2392438650131226, "learning_rate": 9.925699657554294e-06, "loss": 0.8143, "step": 1621 }, { "epoch": 0.08335902970500565, "grad_norm": 1.18338942527771, "learning_rate": 9.925556646648039e-06, "loss": 0.8686, "step": 1622 }, { "epoch": 0.0834104224483503, "grad_norm": 0.9409334063529968, "learning_rate": 9.925413499274935e-06, "loss": 0.6862, "step": 1623 }, { "epoch": 0.08346181519169493, "grad_norm": 1.302871823310852, "learning_rate": 9.925270215438947e-06, "loss": 0.8698, "step": 1624 }, { "epoch": 0.08351320793503957, "grad_norm": 1.3412436246871948, "learning_rate": 9.925126795144048e-06, "loss": 0.8131, "step": 1625 }, { "epoch": 0.08356460067838421, "grad_norm": 1.3268060684204102, "learning_rate": 9.924983238394212e-06, "loss": 0.7826, "step": 1626 }, { "epoch": 0.08361599342172885, "grad_norm": 1.24637770652771, "learning_rate": 9.924839545193412e-06, "loss": 0.8274, "step": 1627 }, { "epoch": 0.08366738616507349, "grad_norm": 1.3227349519729614, "learning_rate": 9.924695715545634e-06, "loss": 0.7981, "step": 1628 }, { "epoch": 0.08371877890841813, "grad_norm": 1.1521252393722534, "learning_rate": 9.924551749454861e-06, "loss": 0.7988, "step": 1629 }, { "epoch": 0.08377017165176277, "grad_norm": 1.182499885559082, "learning_rate": 9.92440764692508e-06, "loss": 0.84, "step": 1630 }, { "epoch": 0.0838215643951074, "grad_norm": 1.210735559463501, "learning_rate": 9.924263407960285e-06, "loss": 0.8569, "step": 1631 }, { "epoch": 0.08387295713845205, "grad_norm": 1.194484829902649, "learning_rate": 9.924119032564472e-06, "loss": 0.8735, "step": 1632 }, { "epoch": 0.08392434988179669, "grad_norm": 1.1902440786361694, "learning_rate": 9.923974520741642e-06, "loss": 0.8325, "step": 1633 }, { "epoch": 0.08397574262514133, "grad_norm": 0.9878230690956116, "learning_rate": 9.923829872495799e-06, "loss": 0.7547, "step": 1634 }, { "epoch": 0.08402713536848597, "grad_norm": 1.0768442153930664, "learning_rate": 9.923685087830947e-06, "loss": 0.7315, "step": 1635 }, { "epoch": 0.08407852811183061, "grad_norm": 1.1201190948486328, "learning_rate": 9.923540166751102e-06, "loss": 0.8132, "step": 1636 }, { "epoch": 0.08412992085517525, "grad_norm": 1.207828402519226, "learning_rate": 9.923395109260276e-06, "loss": 0.8567, "step": 1637 }, { "epoch": 0.0841813135985199, "grad_norm": 1.174572229385376, "learning_rate": 9.923249915362491e-06, "loss": 0.8372, "step": 1638 }, { "epoch": 0.08423270634186453, "grad_norm": 1.1701222658157349, "learning_rate": 9.923104585061766e-06, "loss": 0.8142, "step": 1639 }, { "epoch": 0.08428409908520917, "grad_norm": 1.159571647644043, "learning_rate": 9.922959118362132e-06, "loss": 0.796, "step": 1640 }, { "epoch": 0.08433549182855381, "grad_norm": 1.1724703311920166, "learning_rate": 9.922813515267614e-06, "loss": 0.9127, "step": 1641 }, { "epoch": 0.08438688457189845, "grad_norm": 1.1685751676559448, "learning_rate": 9.92266777578225e-06, "loss": 0.8545, "step": 1642 }, { "epoch": 0.08443827731524309, "grad_norm": 1.173614740371704, "learning_rate": 9.922521899910076e-06, "loss": 0.8735, "step": 1643 }, { "epoch": 0.08448967005858772, "grad_norm": 1.1921416521072388, "learning_rate": 9.922375887655136e-06, "loss": 0.8827, "step": 1644 }, { "epoch": 0.08454106280193237, "grad_norm": 1.0121287107467651, "learning_rate": 9.922229739021471e-06, "loss": 0.7882, "step": 1645 }, { "epoch": 0.084592455545277, "grad_norm": 1.1683837175369263, "learning_rate": 9.922083454013134e-06, "loss": 0.8126, "step": 1646 }, { "epoch": 0.08464384828862165, "grad_norm": 1.148938536643982, "learning_rate": 9.921937032634177e-06, "loss": 0.8313, "step": 1647 }, { "epoch": 0.08469524103196628, "grad_norm": 0.9767423868179321, "learning_rate": 9.921790474888656e-06, "loss": 0.715, "step": 1648 }, { "epoch": 0.08474663377531093, "grad_norm": 1.1744794845581055, "learning_rate": 9.921643780780631e-06, "loss": 0.81, "step": 1649 }, { "epoch": 0.08479802651865556, "grad_norm": 1.0476856231689453, "learning_rate": 9.921496950314169e-06, "loss": 0.7134, "step": 1650 }, { "epoch": 0.08484941926200021, "grad_norm": 1.0916850566864014, "learning_rate": 9.921349983493336e-06, "loss": 0.7874, "step": 1651 }, { "epoch": 0.08490081200534484, "grad_norm": 1.165315866470337, "learning_rate": 9.921202880322204e-06, "loss": 0.82, "step": 1652 }, { "epoch": 0.08495220474868949, "grad_norm": 1.2108068466186523, "learning_rate": 9.921055640804849e-06, "loss": 0.8478, "step": 1653 }, { "epoch": 0.08500359749203412, "grad_norm": 1.081020712852478, "learning_rate": 9.920908264945349e-06, "loss": 0.8261, "step": 1654 }, { "epoch": 0.08505499023537877, "grad_norm": 1.456040859222412, "learning_rate": 9.92076075274779e-06, "loss": 0.7596, "step": 1655 }, { "epoch": 0.0851063829787234, "grad_norm": 1.2016627788543701, "learning_rate": 9.920613104216256e-06, "loss": 0.8583, "step": 1656 }, { "epoch": 0.08515777572206805, "grad_norm": 1.2699116468429565, "learning_rate": 9.92046531935484e-06, "loss": 0.8366, "step": 1657 }, { "epoch": 0.08520916846541268, "grad_norm": 1.1259067058563232, "learning_rate": 9.920317398167634e-06, "loss": 0.8305, "step": 1658 }, { "epoch": 0.08526056120875733, "grad_norm": 1.1446977853775024, "learning_rate": 9.920169340658739e-06, "loss": 0.8855, "step": 1659 }, { "epoch": 0.08531195395210196, "grad_norm": 1.245345115661621, "learning_rate": 9.920021146832256e-06, "loss": 0.8594, "step": 1660 }, { "epoch": 0.08536334669544661, "grad_norm": 1.22573721408844, "learning_rate": 9.919872816692291e-06, "loss": 0.8671, "step": 1661 }, { "epoch": 0.08541473943879124, "grad_norm": 1.1051931381225586, "learning_rate": 9.919724350242953e-06, "loss": 0.7381, "step": 1662 }, { "epoch": 0.08546613218213588, "grad_norm": 1.1745229959487915, "learning_rate": 9.919575747488355e-06, "loss": 0.8987, "step": 1663 }, { "epoch": 0.08551752492548052, "grad_norm": 1.2365570068359375, "learning_rate": 9.919427008432615e-06, "loss": 0.9219, "step": 1664 }, { "epoch": 0.08556891766882516, "grad_norm": 0.9980310201644897, "learning_rate": 9.919278133079855e-06, "loss": 0.7436, "step": 1665 }, { "epoch": 0.0856203104121698, "grad_norm": 1.1453474760055542, "learning_rate": 9.919129121434198e-06, "loss": 0.7722, "step": 1666 }, { "epoch": 0.08567170315551444, "grad_norm": 1.1426036357879639, "learning_rate": 9.918979973499774e-06, "loss": 0.8587, "step": 1667 }, { "epoch": 0.08572309589885908, "grad_norm": 1.136110782623291, "learning_rate": 9.918830689280714e-06, "loss": 0.827, "step": 1668 }, { "epoch": 0.08577448864220372, "grad_norm": 1.167225956916809, "learning_rate": 9.918681268781154e-06, "loss": 0.8335, "step": 1669 }, { "epoch": 0.08582588138554836, "grad_norm": 1.209023356437683, "learning_rate": 9.918531712005234e-06, "loss": 0.8774, "step": 1670 }, { "epoch": 0.085877274128893, "grad_norm": 1.1121746301651, "learning_rate": 9.918382018957098e-06, "loss": 0.8603, "step": 1671 }, { "epoch": 0.08592866687223764, "grad_norm": 1.30025053024292, "learning_rate": 9.918232189640892e-06, "loss": 0.8565, "step": 1672 }, { "epoch": 0.08598005961558228, "grad_norm": 1.1765111684799194, "learning_rate": 9.91808222406077e-06, "loss": 0.8444, "step": 1673 }, { "epoch": 0.08603145235892692, "grad_norm": 1.1734223365783691, "learning_rate": 9.917932122220886e-06, "loss": 0.8015, "step": 1674 }, { "epoch": 0.08608284510227156, "grad_norm": 1.1578689813613892, "learning_rate": 9.917781884125397e-06, "loss": 0.8256, "step": 1675 }, { "epoch": 0.0861342378456162, "grad_norm": 0.8297823071479797, "learning_rate": 9.917631509778468e-06, "loss": 0.751, "step": 1676 }, { "epoch": 0.08618563058896084, "grad_norm": 1.2077951431274414, "learning_rate": 9.91748099918426e-06, "loss": 0.8859, "step": 1677 }, { "epoch": 0.08623702333230548, "grad_norm": 1.190809965133667, "learning_rate": 9.91733035234695e-06, "loss": 0.8647, "step": 1678 }, { "epoch": 0.08628841607565012, "grad_norm": 1.2281314134597778, "learning_rate": 9.917179569270708e-06, "loss": 0.804, "step": 1679 }, { "epoch": 0.08633980881899475, "grad_norm": 1.2048341035842896, "learning_rate": 9.91702864995971e-06, "loss": 0.9118, "step": 1680 }, { "epoch": 0.0863912015623394, "grad_norm": 1.1584261655807495, "learning_rate": 9.916877594418141e-06, "loss": 0.8589, "step": 1681 }, { "epoch": 0.08644259430568403, "grad_norm": 1.0403865575790405, "learning_rate": 9.916726402650185e-06, "loss": 0.77, "step": 1682 }, { "epoch": 0.08649398704902868, "grad_norm": 1.1586493253707886, "learning_rate": 9.916575074660031e-06, "loss": 0.8347, "step": 1683 }, { "epoch": 0.08654537979237331, "grad_norm": 1.1266425848007202, "learning_rate": 9.916423610451871e-06, "loss": 0.8409, "step": 1684 }, { "epoch": 0.08659677253571796, "grad_norm": 1.1396063566207886, "learning_rate": 9.9162720100299e-06, "loss": 0.7928, "step": 1685 }, { "epoch": 0.08664816527906259, "grad_norm": 0.7864115834236145, "learning_rate": 9.916120273398321e-06, "loss": 0.7434, "step": 1686 }, { "epoch": 0.08669955802240724, "grad_norm": 1.137732982635498, "learning_rate": 9.915968400561337e-06, "loss": 0.8163, "step": 1687 }, { "epoch": 0.08675095076575187, "grad_norm": 1.2246334552764893, "learning_rate": 9.915816391523156e-06, "loss": 0.8392, "step": 1688 }, { "epoch": 0.08680234350909652, "grad_norm": 1.1159659624099731, "learning_rate": 9.915664246287988e-06, "loss": 0.8277, "step": 1689 }, { "epoch": 0.08685373625244115, "grad_norm": 1.1441612243652344, "learning_rate": 9.91551196486005e-06, "loss": 0.8295, "step": 1690 }, { "epoch": 0.0869051289957858, "grad_norm": 1.1027507781982422, "learning_rate": 9.915359547243562e-06, "loss": 0.8074, "step": 1691 }, { "epoch": 0.08695652173913043, "grad_norm": 1.2078737020492554, "learning_rate": 9.915206993442742e-06, "loss": 0.8981, "step": 1692 }, { "epoch": 0.08700791448247508, "grad_norm": 1.1267225742340088, "learning_rate": 9.915054303461824e-06, "loss": 0.8265, "step": 1693 }, { "epoch": 0.08705930722581971, "grad_norm": 1.1323634386062622, "learning_rate": 9.914901477305033e-06, "loss": 0.8489, "step": 1694 }, { "epoch": 0.08711069996916436, "grad_norm": 1.1748361587524414, "learning_rate": 9.914748514976602e-06, "loss": 0.8685, "step": 1695 }, { "epoch": 0.08716209271250899, "grad_norm": 1.2105607986450195, "learning_rate": 9.914595416480775e-06, "loss": 0.8914, "step": 1696 }, { "epoch": 0.08721348545585364, "grad_norm": 0.9802582263946533, "learning_rate": 9.91444218182179e-06, "loss": 0.7275, "step": 1697 }, { "epoch": 0.08726487819919827, "grad_norm": 1.2489691972732544, "learning_rate": 9.91428881100389e-06, "loss": 0.8424, "step": 1698 }, { "epoch": 0.0873162709425429, "grad_norm": 1.191060185432434, "learning_rate": 9.91413530403133e-06, "loss": 0.8359, "step": 1699 }, { "epoch": 0.08736766368588755, "grad_norm": 0.8627701997756958, "learning_rate": 9.91398166090836e-06, "loss": 0.7629, "step": 1700 }, { "epoch": 0.08741905642923219, "grad_norm": 1.1759506464004517, "learning_rate": 9.913827881639238e-06, "loss": 0.8113, "step": 1701 }, { "epoch": 0.08747044917257683, "grad_norm": 1.2642565965652466, "learning_rate": 9.91367396622822e-06, "loss": 0.8866, "step": 1702 }, { "epoch": 0.08752184191592147, "grad_norm": 1.1577627658843994, "learning_rate": 9.913519914679576e-06, "loss": 0.8489, "step": 1703 }, { "epoch": 0.08757323465926611, "grad_norm": 1.1268728971481323, "learning_rate": 9.913365726997572e-06, "loss": 0.8065, "step": 1704 }, { "epoch": 0.08762462740261075, "grad_norm": 1.2316921949386597, "learning_rate": 9.91321140318648e-06, "loss": 0.8748, "step": 1705 }, { "epoch": 0.0876760201459554, "grad_norm": 0.920242428779602, "learning_rate": 9.913056943250577e-06, "loss": 0.7523, "step": 1706 }, { "epoch": 0.08772741288930003, "grad_norm": 0.8819411993026733, "learning_rate": 9.912902347194138e-06, "loss": 0.7034, "step": 1707 }, { "epoch": 0.08777880563264467, "grad_norm": 1.2137534618377686, "learning_rate": 9.912747615021452e-06, "loss": 0.8567, "step": 1708 }, { "epoch": 0.08783019837598931, "grad_norm": 1.1915690898895264, "learning_rate": 9.912592746736803e-06, "loss": 0.819, "step": 1709 }, { "epoch": 0.08788159111933395, "grad_norm": 1.2644426822662354, "learning_rate": 9.91243774234448e-06, "loss": 0.8375, "step": 1710 }, { "epoch": 0.08793298386267859, "grad_norm": 1.1931955814361572, "learning_rate": 9.912282601848782e-06, "loss": 0.8204, "step": 1711 }, { "epoch": 0.08798437660602323, "grad_norm": 0.8790128231048584, "learning_rate": 9.912127325254003e-06, "loss": 0.6827, "step": 1712 }, { "epoch": 0.08803576934936787, "grad_norm": 1.1609959602355957, "learning_rate": 9.911971912564449e-06, "loss": 0.8488, "step": 1713 }, { "epoch": 0.08808716209271251, "grad_norm": 0.8543535470962524, "learning_rate": 9.911816363784421e-06, "loss": 0.7388, "step": 1714 }, { "epoch": 0.08813855483605715, "grad_norm": 1.1180012226104736, "learning_rate": 9.911660678918233e-06, "loss": 0.8656, "step": 1715 }, { "epoch": 0.0881899475794018, "grad_norm": 1.1848636865615845, "learning_rate": 9.911504857970198e-06, "loss": 0.8759, "step": 1716 }, { "epoch": 0.08824134032274643, "grad_norm": 1.1929327249526978, "learning_rate": 9.91134890094463e-06, "loss": 0.8569, "step": 1717 }, { "epoch": 0.08829273306609106, "grad_norm": 1.4534296989440918, "learning_rate": 9.911192807845853e-06, "loss": 0.8385, "step": 1718 }, { "epoch": 0.08834412580943571, "grad_norm": 1.1895525455474854, "learning_rate": 9.91103657867819e-06, "loss": 0.8231, "step": 1719 }, { "epoch": 0.08839551855278034, "grad_norm": 1.6340380907058716, "learning_rate": 9.910880213445971e-06, "loss": 0.8758, "step": 1720 }, { "epoch": 0.08844691129612499, "grad_norm": 1.146881341934204, "learning_rate": 9.910723712153526e-06, "loss": 0.8813, "step": 1721 }, { "epoch": 0.08849830403946962, "grad_norm": 1.2568217515945435, "learning_rate": 9.910567074805192e-06, "loss": 0.8494, "step": 1722 }, { "epoch": 0.08854969678281427, "grad_norm": 1.1822679042816162, "learning_rate": 9.91041030140531e-06, "loss": 0.8024, "step": 1723 }, { "epoch": 0.0886010895261589, "grad_norm": 1.5603710412979126, "learning_rate": 9.910253391958224e-06, "loss": 0.7578, "step": 1724 }, { "epoch": 0.08865248226950355, "grad_norm": 1.0265038013458252, "learning_rate": 9.910096346468279e-06, "loss": 0.7669, "step": 1725 }, { "epoch": 0.08870387501284818, "grad_norm": 1.3067086935043335, "learning_rate": 9.909939164939825e-06, "loss": 0.8291, "step": 1726 }, { "epoch": 0.08875526775619283, "grad_norm": 0.7920119166374207, "learning_rate": 9.909781847377223e-06, "loss": 0.7212, "step": 1727 }, { "epoch": 0.08880666049953746, "grad_norm": 1.2579916715621948, "learning_rate": 9.909624393784824e-06, "loss": 0.8196, "step": 1728 }, { "epoch": 0.08885805324288211, "grad_norm": 1.1902517080307007, "learning_rate": 9.909466804166994e-06, "loss": 0.8208, "step": 1729 }, { "epoch": 0.08890944598622674, "grad_norm": 1.1936190128326416, "learning_rate": 9.909309078528099e-06, "loss": 0.883, "step": 1730 }, { "epoch": 0.08896083872957139, "grad_norm": 1.1592642068862915, "learning_rate": 9.90915121687251e-06, "loss": 0.8637, "step": 1731 }, { "epoch": 0.08901223147291602, "grad_norm": 1.1445428133010864, "learning_rate": 9.9089932192046e-06, "loss": 0.8371, "step": 1732 }, { "epoch": 0.08906362421626067, "grad_norm": 1.2663488388061523, "learning_rate": 9.908835085528745e-06, "loss": 0.7851, "step": 1733 }, { "epoch": 0.0891150169596053, "grad_norm": 1.2600806951522827, "learning_rate": 9.90867681584933e-06, "loss": 0.8628, "step": 1734 }, { "epoch": 0.08916640970294995, "grad_norm": 1.2068052291870117, "learning_rate": 9.908518410170734e-06, "loss": 0.8098, "step": 1735 }, { "epoch": 0.08921780244629458, "grad_norm": 1.3630139827728271, "learning_rate": 9.908359868497351e-06, "loss": 0.8433, "step": 1736 }, { "epoch": 0.08926919518963922, "grad_norm": 1.229305386543274, "learning_rate": 9.90820119083357e-06, "loss": 0.8874, "step": 1737 }, { "epoch": 0.08932058793298386, "grad_norm": 0.8338720202445984, "learning_rate": 9.90804237718379e-06, "loss": 0.7063, "step": 1738 }, { "epoch": 0.0893719806763285, "grad_norm": 1.1197295188903809, "learning_rate": 9.90788342755241e-06, "loss": 0.8625, "step": 1739 }, { "epoch": 0.08942337341967314, "grad_norm": 1.1867824792861938, "learning_rate": 9.907724341943834e-06, "loss": 0.9239, "step": 1740 }, { "epoch": 0.08947476616301778, "grad_norm": 1.0972654819488525, "learning_rate": 9.90756512036247e-06, "loss": 0.827, "step": 1741 }, { "epoch": 0.08952615890636242, "grad_norm": 1.1490797996520996, "learning_rate": 9.907405762812727e-06, "loss": 0.7919, "step": 1742 }, { "epoch": 0.08957755164970706, "grad_norm": 1.1660102605819702, "learning_rate": 9.907246269299024e-06, "loss": 0.8564, "step": 1743 }, { "epoch": 0.0896289443930517, "grad_norm": 1.116220235824585, "learning_rate": 9.907086639825777e-06, "loss": 0.782, "step": 1744 }, { "epoch": 0.08968033713639634, "grad_norm": 1.1477996110916138, "learning_rate": 9.906926874397408e-06, "loss": 0.8486, "step": 1745 }, { "epoch": 0.08973172987974098, "grad_norm": 1.1711969375610352, "learning_rate": 9.906766973018348e-06, "loss": 0.7998, "step": 1746 }, { "epoch": 0.08978312262308562, "grad_norm": 1.1594072580337524, "learning_rate": 9.906606935693023e-06, "loss": 0.8742, "step": 1747 }, { "epoch": 0.08983451536643026, "grad_norm": 1.085246205329895, "learning_rate": 9.906446762425867e-06, "loss": 0.8875, "step": 1748 }, { "epoch": 0.0898859081097749, "grad_norm": 1.1919771432876587, "learning_rate": 9.906286453221321e-06, "loss": 0.8565, "step": 1749 }, { "epoch": 0.08993730085311955, "grad_norm": 1.129128336906433, "learning_rate": 9.906126008083823e-06, "loss": 0.8062, "step": 1750 }, { "epoch": 0.08998869359646418, "grad_norm": 1.1392971277236938, "learning_rate": 9.90596542701782e-06, "loss": 0.8189, "step": 1751 }, { "epoch": 0.09004008633980883, "grad_norm": 1.1751765012741089, "learning_rate": 9.90580471002776e-06, "loss": 0.7489, "step": 1752 }, { "epoch": 0.09009147908315346, "grad_norm": 1.247114896774292, "learning_rate": 9.905643857118097e-06, "loss": 0.8858, "step": 1753 }, { "epoch": 0.09014287182649809, "grad_norm": 1.1875725984573364, "learning_rate": 9.905482868293287e-06, "loss": 0.7805, "step": 1754 }, { "epoch": 0.09019426456984274, "grad_norm": 1.1310409307479858, "learning_rate": 9.905321743557792e-06, "loss": 0.8283, "step": 1755 }, { "epoch": 0.09024565731318737, "grad_norm": 1.1902302503585815, "learning_rate": 9.905160482916074e-06, "loss": 0.7898, "step": 1756 }, { "epoch": 0.09029705005653202, "grad_norm": 1.1485869884490967, "learning_rate": 9.904999086372602e-06, "loss": 0.7992, "step": 1757 }, { "epoch": 0.09034844279987665, "grad_norm": 1.1292792558670044, "learning_rate": 9.904837553931846e-06, "loss": 0.7609, "step": 1758 }, { "epoch": 0.0903998355432213, "grad_norm": 1.2959582805633545, "learning_rate": 9.904675885598281e-06, "loss": 0.7961, "step": 1759 }, { "epoch": 0.09045122828656593, "grad_norm": 1.1334614753723145, "learning_rate": 9.904514081376388e-06, "loss": 0.8255, "step": 1760 }, { "epoch": 0.09050262102991058, "grad_norm": 1.2229472398757935, "learning_rate": 9.904352141270652e-06, "loss": 0.8224, "step": 1761 }, { "epoch": 0.09055401377325521, "grad_norm": 1.148348331451416, "learning_rate": 9.904190065285554e-06, "loss": 0.8531, "step": 1762 }, { "epoch": 0.09060540651659986, "grad_norm": 1.174596905708313, "learning_rate": 9.90402785342559e-06, "loss": 0.8217, "step": 1763 }, { "epoch": 0.09065679925994449, "grad_norm": 1.14903724193573, "learning_rate": 9.903865505695252e-06, "loss": 0.81, "step": 1764 }, { "epoch": 0.09070819200328914, "grad_norm": 1.234180212020874, "learning_rate": 9.903703022099037e-06, "loss": 0.8435, "step": 1765 }, { "epoch": 0.09075958474663377, "grad_norm": 1.1840254068374634, "learning_rate": 9.903540402641449e-06, "loss": 0.8851, "step": 1766 }, { "epoch": 0.09081097748997842, "grad_norm": 1.1155592203140259, "learning_rate": 9.903377647326991e-06, "loss": 0.8184, "step": 1767 }, { "epoch": 0.09086237023332305, "grad_norm": 1.1460785865783691, "learning_rate": 9.903214756160173e-06, "loss": 0.8425, "step": 1768 }, { "epoch": 0.0909137629766677, "grad_norm": 1.1041440963745117, "learning_rate": 9.903051729145508e-06, "loss": 0.8766, "step": 1769 }, { "epoch": 0.09096515572001233, "grad_norm": 1.1446219682693481, "learning_rate": 9.902888566287516e-06, "loss": 0.8132, "step": 1770 }, { "epoch": 0.09101654846335698, "grad_norm": 1.140639305114746, "learning_rate": 9.902725267590711e-06, "loss": 0.8347, "step": 1771 }, { "epoch": 0.09106794120670161, "grad_norm": 1.3522577285766602, "learning_rate": 9.902561833059625e-06, "loss": 0.9177, "step": 1772 }, { "epoch": 0.09111933395004625, "grad_norm": 1.1369584798812866, "learning_rate": 9.90239826269878e-06, "loss": 0.8455, "step": 1773 }, { "epoch": 0.0911707266933909, "grad_norm": 1.206945776939392, "learning_rate": 9.902234556512711e-06, "loss": 0.8595, "step": 1774 }, { "epoch": 0.09122211943673553, "grad_norm": 1.1400874853134155, "learning_rate": 9.902070714505951e-06, "loss": 0.8575, "step": 1775 }, { "epoch": 0.09127351218008017, "grad_norm": 1.124389410018921, "learning_rate": 9.901906736683044e-06, "loss": 0.8419, "step": 1776 }, { "epoch": 0.09132490492342481, "grad_norm": 1.1665382385253906, "learning_rate": 9.901742623048529e-06, "loss": 0.8846, "step": 1777 }, { "epoch": 0.09137629766676945, "grad_norm": 0.8407975435256958, "learning_rate": 9.901578373606953e-06, "loss": 0.7765, "step": 1778 }, { "epoch": 0.09142769041011409, "grad_norm": 1.2934582233428955, "learning_rate": 9.90141398836287e-06, "loss": 0.8469, "step": 1779 }, { "epoch": 0.09147908315345873, "grad_norm": 1.211266279220581, "learning_rate": 9.901249467320832e-06, "loss": 0.8657, "step": 1780 }, { "epoch": 0.09153047589680337, "grad_norm": 1.145798921585083, "learning_rate": 9.901084810485397e-06, "loss": 0.8086, "step": 1781 }, { "epoch": 0.09158186864014801, "grad_norm": 1.1913820505142212, "learning_rate": 9.900920017861126e-06, "loss": 0.8779, "step": 1782 }, { "epoch": 0.09163326138349265, "grad_norm": 1.1218396425247192, "learning_rate": 9.900755089452589e-06, "loss": 0.8289, "step": 1783 }, { "epoch": 0.0916846541268373, "grad_norm": 1.14694082736969, "learning_rate": 9.90059002526435e-06, "loss": 0.8032, "step": 1784 }, { "epoch": 0.09173604687018193, "grad_norm": 1.1361756324768066, "learning_rate": 9.900424825300987e-06, "loss": 0.8574, "step": 1785 }, { "epoch": 0.09178743961352658, "grad_norm": 1.3079322576522827, "learning_rate": 9.900259489567075e-06, "loss": 0.882, "step": 1786 }, { "epoch": 0.09183883235687121, "grad_norm": 1.211051344871521, "learning_rate": 9.900094018067193e-06, "loss": 0.8617, "step": 1787 }, { "epoch": 0.09189022510021586, "grad_norm": 1.2048643827438354, "learning_rate": 9.899928410805928e-06, "loss": 0.8829, "step": 1788 }, { "epoch": 0.09194161784356049, "grad_norm": 1.3317652940750122, "learning_rate": 9.899762667787868e-06, "loss": 0.8882, "step": 1789 }, { "epoch": 0.09199301058690514, "grad_norm": 0.8494691252708435, "learning_rate": 9.899596789017604e-06, "loss": 0.6927, "step": 1790 }, { "epoch": 0.09204440333024977, "grad_norm": 1.14299476146698, "learning_rate": 9.899430774499731e-06, "loss": 0.832, "step": 1791 }, { "epoch": 0.0920957960735944, "grad_norm": 1.1811003684997559, "learning_rate": 9.899264624238854e-06, "loss": 0.7885, "step": 1792 }, { "epoch": 0.09214718881693905, "grad_norm": 1.1288859844207764, "learning_rate": 9.89909833823957e-06, "loss": 0.8618, "step": 1793 }, { "epoch": 0.09219858156028368, "grad_norm": 1.1633673906326294, "learning_rate": 9.898931916506487e-06, "loss": 0.8386, "step": 1794 }, { "epoch": 0.09224997430362833, "grad_norm": 1.2165002822875977, "learning_rate": 9.89876535904422e-06, "loss": 0.8657, "step": 1795 }, { "epoch": 0.09230136704697296, "grad_norm": 0.8270774483680725, "learning_rate": 9.89859866585738e-06, "loss": 0.75, "step": 1796 }, { "epoch": 0.09235275979031761, "grad_norm": 1.130789875984192, "learning_rate": 9.898431836950585e-06, "loss": 0.8534, "step": 1797 }, { "epoch": 0.09240415253366224, "grad_norm": 1.1629869937896729, "learning_rate": 9.898264872328461e-06, "loss": 0.8008, "step": 1798 }, { "epoch": 0.09245554527700689, "grad_norm": 1.1575783491134644, "learning_rate": 9.898097771995628e-06, "loss": 0.7962, "step": 1799 }, { "epoch": 0.09250693802035152, "grad_norm": 1.1942816972732544, "learning_rate": 9.897930535956722e-06, "loss": 0.7994, "step": 1800 }, { "epoch": 0.09255833076369617, "grad_norm": 1.1875114440917969, "learning_rate": 9.897763164216372e-06, "loss": 0.7908, "step": 1801 }, { "epoch": 0.0926097235070408, "grad_norm": 1.0235319137573242, "learning_rate": 9.897595656779215e-06, "loss": 0.7046, "step": 1802 }, { "epoch": 0.09266111625038545, "grad_norm": 1.1574156284332275, "learning_rate": 9.897428013649896e-06, "loss": 0.8396, "step": 1803 }, { "epoch": 0.09271250899373008, "grad_norm": 0.8634977340698242, "learning_rate": 9.897260234833057e-06, "loss": 0.6808, "step": 1804 }, { "epoch": 0.09276390173707473, "grad_norm": 1.2188562154769897, "learning_rate": 9.897092320333346e-06, "loss": 0.8992, "step": 1805 }, { "epoch": 0.09281529448041936, "grad_norm": 1.130176067352295, "learning_rate": 9.896924270155416e-06, "loss": 0.842, "step": 1806 }, { "epoch": 0.09286668722376401, "grad_norm": 1.2023639678955078, "learning_rate": 9.896756084303922e-06, "loss": 0.8234, "step": 1807 }, { "epoch": 0.09291807996710864, "grad_norm": 1.1483135223388672, "learning_rate": 9.896587762783527e-06, "loss": 0.7868, "step": 1808 }, { "epoch": 0.09296947271045329, "grad_norm": 1.1314959526062012, "learning_rate": 9.89641930559889e-06, "loss": 0.8395, "step": 1809 }, { "epoch": 0.09302086545379792, "grad_norm": 1.1751201152801514, "learning_rate": 9.896250712754681e-06, "loss": 0.8244, "step": 1810 }, { "epoch": 0.09307225819714256, "grad_norm": 1.2030655145645142, "learning_rate": 9.896081984255571e-06, "loss": 0.832, "step": 1811 }, { "epoch": 0.0931236509404872, "grad_norm": 1.1651982069015503, "learning_rate": 9.895913120106233e-06, "loss": 0.7897, "step": 1812 }, { "epoch": 0.09317504368383184, "grad_norm": 0.9788298606872559, "learning_rate": 9.895744120311348e-06, "loss": 0.7899, "step": 1813 }, { "epoch": 0.09322643642717648, "grad_norm": 0.8545430302619934, "learning_rate": 9.895574984875595e-06, "loss": 0.7375, "step": 1814 }, { "epoch": 0.09327782917052112, "grad_norm": 1.247603416442871, "learning_rate": 9.895405713803666e-06, "loss": 0.9005, "step": 1815 }, { "epoch": 0.09332922191386576, "grad_norm": 1.1607825756072998, "learning_rate": 9.895236307100242e-06, "loss": 0.8445, "step": 1816 }, { "epoch": 0.0933806146572104, "grad_norm": 1.216247320175171, "learning_rate": 9.895066764770025e-06, "loss": 0.8295, "step": 1817 }, { "epoch": 0.09343200740055504, "grad_norm": 1.1355317831039429, "learning_rate": 9.894897086817707e-06, "loss": 0.816, "step": 1818 }, { "epoch": 0.09348340014389968, "grad_norm": 1.2093919515609741, "learning_rate": 9.89472727324799e-06, "loss": 0.824, "step": 1819 }, { "epoch": 0.09353479288724433, "grad_norm": 1.2408522367477417, "learning_rate": 9.89455732406558e-06, "loss": 0.8274, "step": 1820 }, { "epoch": 0.09358618563058896, "grad_norm": 1.2961534261703491, "learning_rate": 9.894387239275187e-06, "loss": 0.7489, "step": 1821 }, { "epoch": 0.0936375783739336, "grad_norm": 1.069570779800415, "learning_rate": 9.89421701888152e-06, "loss": 0.8274, "step": 1822 }, { "epoch": 0.09368897111727824, "grad_norm": 1.0915688276290894, "learning_rate": 9.894046662889297e-06, "loss": 0.8225, "step": 1823 }, { "epoch": 0.09374036386062289, "grad_norm": 1.4259387254714966, "learning_rate": 9.893876171303238e-06, "loss": 0.8274, "step": 1824 }, { "epoch": 0.09379175660396752, "grad_norm": 1.2512353658676147, "learning_rate": 9.893705544128065e-06, "loss": 0.8534, "step": 1825 }, { "epoch": 0.09384314934731217, "grad_norm": 1.18647038936615, "learning_rate": 9.893534781368508e-06, "loss": 0.8277, "step": 1826 }, { "epoch": 0.0938945420906568, "grad_norm": 1.1751434803009033, "learning_rate": 9.893363883029294e-06, "loss": 0.8766, "step": 1827 }, { "epoch": 0.09394593483400143, "grad_norm": 1.0925143957138062, "learning_rate": 9.893192849115163e-06, "loss": 0.7637, "step": 1828 }, { "epoch": 0.09399732757734608, "grad_norm": 1.1270004510879517, "learning_rate": 9.89302167963085e-06, "loss": 0.8157, "step": 1829 }, { "epoch": 0.09404872032069071, "grad_norm": 1.2134119272232056, "learning_rate": 9.8928503745811e-06, "loss": 0.7248, "step": 1830 }, { "epoch": 0.09410011306403536, "grad_norm": 1.255647897720337, "learning_rate": 9.892678933970656e-06, "loss": 0.874, "step": 1831 }, { "epoch": 0.09415150580737999, "grad_norm": 1.166581153869629, "learning_rate": 9.89250735780427e-06, "loss": 0.8473, "step": 1832 }, { "epoch": 0.09420289855072464, "grad_norm": 0.7590866684913635, "learning_rate": 9.892335646086697e-06, "loss": 0.6881, "step": 1833 }, { "epoch": 0.09425429129406927, "grad_norm": 1.1782327890396118, "learning_rate": 9.892163798822692e-06, "loss": 0.8666, "step": 1834 }, { "epoch": 0.09430568403741392, "grad_norm": 1.4020658731460571, "learning_rate": 9.891991816017015e-06, "loss": 0.7937, "step": 1835 }, { "epoch": 0.09435707678075855, "grad_norm": 1.110379695892334, "learning_rate": 9.891819697674434e-06, "loss": 0.7729, "step": 1836 }, { "epoch": 0.0944084695241032, "grad_norm": 0.9698451161384583, "learning_rate": 9.891647443799717e-06, "loss": 0.7335, "step": 1837 }, { "epoch": 0.09445986226744783, "grad_norm": 1.130768060684204, "learning_rate": 9.891475054397635e-06, "loss": 0.8299, "step": 1838 }, { "epoch": 0.09451125501079248, "grad_norm": 1.1199626922607422, "learning_rate": 9.891302529472965e-06, "loss": 0.8047, "step": 1839 }, { "epoch": 0.09456264775413711, "grad_norm": 1.087475299835205, "learning_rate": 9.89112986903049e-06, "loss": 0.8214, "step": 1840 }, { "epoch": 0.09461404049748176, "grad_norm": 1.3037495613098145, "learning_rate": 9.890957073074989e-06, "loss": 0.8659, "step": 1841 }, { "epoch": 0.0946654332408264, "grad_norm": 1.2137969732284546, "learning_rate": 9.890784141611249e-06, "loss": 0.8113, "step": 1842 }, { "epoch": 0.09471682598417104, "grad_norm": 1.1723822355270386, "learning_rate": 9.890611074644067e-06, "loss": 0.7764, "step": 1843 }, { "epoch": 0.09476821872751567, "grad_norm": 1.0850350856781006, "learning_rate": 9.890437872178232e-06, "loss": 0.7877, "step": 1844 }, { "epoch": 0.09481961147086032, "grad_norm": 1.0865315198898315, "learning_rate": 9.890264534218546e-06, "loss": 0.7854, "step": 1845 }, { "epoch": 0.09487100421420495, "grad_norm": 0.8876744508743286, "learning_rate": 9.890091060769812e-06, "loss": 0.7321, "step": 1846 }, { "epoch": 0.09492239695754959, "grad_norm": 1.307947039604187, "learning_rate": 9.889917451836834e-06, "loss": 0.8251, "step": 1847 }, { "epoch": 0.09497378970089423, "grad_norm": 0.837812066078186, "learning_rate": 9.889743707424422e-06, "loss": 0.7255, "step": 1848 }, { "epoch": 0.09502518244423887, "grad_norm": 0.83307945728302, "learning_rate": 9.889569827537392e-06, "loss": 0.7435, "step": 1849 }, { "epoch": 0.09507657518758351, "grad_norm": 0.8142296075820923, "learning_rate": 9.88939581218056e-06, "loss": 0.7143, "step": 1850 }, { "epoch": 0.09512796793092815, "grad_norm": 1.2148126363754272, "learning_rate": 9.889221661358745e-06, "loss": 0.8502, "step": 1851 }, { "epoch": 0.0951793606742728, "grad_norm": 1.2017245292663574, "learning_rate": 9.889047375076777e-06, "loss": 0.8508, "step": 1852 }, { "epoch": 0.09523075341761743, "grad_norm": 1.1075420379638672, "learning_rate": 9.888872953339481e-06, "loss": 0.7604, "step": 1853 }, { "epoch": 0.09528214616096208, "grad_norm": 1.1481331586837769, "learning_rate": 9.888698396151692e-06, "loss": 0.8114, "step": 1854 }, { "epoch": 0.09533353890430671, "grad_norm": 1.1724259853363037, "learning_rate": 9.888523703518244e-06, "loss": 0.8095, "step": 1855 }, { "epoch": 0.09538493164765136, "grad_norm": 1.4335479736328125, "learning_rate": 9.888348875443978e-06, "loss": 0.7941, "step": 1856 }, { "epoch": 0.09543632439099599, "grad_norm": 1.1842377185821533, "learning_rate": 9.888173911933739e-06, "loss": 0.9032, "step": 1857 }, { "epoch": 0.09548771713434064, "grad_norm": 1.1997178792953491, "learning_rate": 9.88799881299237e-06, "loss": 0.8193, "step": 1858 }, { "epoch": 0.09553910987768527, "grad_norm": 1.2129074335098267, "learning_rate": 9.887823578624729e-06, "loss": 0.8391, "step": 1859 }, { "epoch": 0.09559050262102992, "grad_norm": 1.2300792932510376, "learning_rate": 9.887648208835664e-06, "loss": 0.8749, "step": 1860 }, { "epoch": 0.09564189536437455, "grad_norm": 1.2000662088394165, "learning_rate": 9.887472703630039e-06, "loss": 0.8414, "step": 1861 }, { "epoch": 0.0956932881077192, "grad_norm": 1.178249716758728, "learning_rate": 9.887297063012715e-06, "loss": 0.894, "step": 1862 }, { "epoch": 0.09574468085106383, "grad_norm": 1.5113506317138672, "learning_rate": 9.887121286988559e-06, "loss": 0.8001, "step": 1863 }, { "epoch": 0.09579607359440848, "grad_norm": 1.1608010530471802, "learning_rate": 9.886945375562438e-06, "loss": 0.817, "step": 1864 }, { "epoch": 0.09584746633775311, "grad_norm": 1.1100857257843018, "learning_rate": 9.88676932873923e-06, "loss": 0.8306, "step": 1865 }, { "epoch": 0.09589885908109774, "grad_norm": 1.210470199584961, "learning_rate": 9.886593146523808e-06, "loss": 0.8615, "step": 1866 }, { "epoch": 0.09595025182444239, "grad_norm": 1.1187340021133423, "learning_rate": 9.886416828921056e-06, "loss": 0.7577, "step": 1867 }, { "epoch": 0.09600164456778702, "grad_norm": 1.1965348720550537, "learning_rate": 9.886240375935861e-06, "loss": 0.8123, "step": 1868 }, { "epoch": 0.09605303731113167, "grad_norm": 1.2064812183380127, "learning_rate": 9.886063787573109e-06, "loss": 0.8437, "step": 1869 }, { "epoch": 0.0961044300544763, "grad_norm": 1.1664938926696777, "learning_rate": 9.885887063837691e-06, "loss": 0.8504, "step": 1870 }, { "epoch": 0.09615582279782095, "grad_norm": 1.1598814725875854, "learning_rate": 9.885710204734507e-06, "loss": 0.8659, "step": 1871 }, { "epoch": 0.09620721554116558, "grad_norm": 1.1873193979263306, "learning_rate": 9.885533210268456e-06, "loss": 0.8842, "step": 1872 }, { "epoch": 0.09625860828451023, "grad_norm": 1.0674279928207397, "learning_rate": 9.88535608044444e-06, "loss": 0.8393, "step": 1873 }, { "epoch": 0.09631000102785486, "grad_norm": 1.0970643758773804, "learning_rate": 9.885178815267367e-06, "loss": 0.84, "step": 1874 }, { "epoch": 0.09636139377119951, "grad_norm": 1.1926662921905518, "learning_rate": 9.88500141474215e-06, "loss": 0.8434, "step": 1875 }, { "epoch": 0.09641278651454414, "grad_norm": 1.189504623413086, "learning_rate": 9.884823878873702e-06, "loss": 0.8493, "step": 1876 }, { "epoch": 0.09646417925788879, "grad_norm": 1.3385924100875854, "learning_rate": 9.884646207666943e-06, "loss": 0.8196, "step": 1877 }, { "epoch": 0.09651557200123342, "grad_norm": 1.108610987663269, "learning_rate": 9.884468401126797e-06, "loss": 0.8258, "step": 1878 }, { "epoch": 0.09656696474457807, "grad_norm": 1.1384812593460083, "learning_rate": 9.884290459258188e-06, "loss": 0.8437, "step": 1879 }, { "epoch": 0.0966183574879227, "grad_norm": 1.2351970672607422, "learning_rate": 9.884112382066048e-06, "loss": 0.8752, "step": 1880 }, { "epoch": 0.09666975023126735, "grad_norm": 1.0431514978408813, "learning_rate": 9.883934169555305e-06, "loss": 0.8127, "step": 1881 }, { "epoch": 0.09672114297461198, "grad_norm": 1.2366681098937988, "learning_rate": 9.883755821730905e-06, "loss": 0.8233, "step": 1882 }, { "epoch": 0.09677253571795663, "grad_norm": 1.3336654901504517, "learning_rate": 9.883577338597784e-06, "loss": 0.7904, "step": 1883 }, { "epoch": 0.09682392846130126, "grad_norm": 1.1166678667068481, "learning_rate": 9.883398720160887e-06, "loss": 0.7452, "step": 1884 }, { "epoch": 0.0968753212046459, "grad_norm": 1.1957266330718994, "learning_rate": 9.883219966425164e-06, "loss": 0.8569, "step": 1885 }, { "epoch": 0.09692671394799054, "grad_norm": 1.2624551057815552, "learning_rate": 9.88304107739557e-06, "loss": 0.8346, "step": 1886 }, { "epoch": 0.09697810669133518, "grad_norm": 1.1623643636703491, "learning_rate": 9.882862053077057e-06, "loss": 0.8348, "step": 1887 }, { "epoch": 0.09702949943467983, "grad_norm": 1.1269875764846802, "learning_rate": 9.882682893474588e-06, "loss": 0.8452, "step": 1888 }, { "epoch": 0.09708089217802446, "grad_norm": 1.1543737649917603, "learning_rate": 9.882503598593124e-06, "loss": 0.8776, "step": 1889 }, { "epoch": 0.0971322849213691, "grad_norm": 0.9897733926773071, "learning_rate": 9.882324168437635e-06, "loss": 0.7387, "step": 1890 }, { "epoch": 0.09718367766471374, "grad_norm": 0.9477784037590027, "learning_rate": 9.882144603013093e-06, "loss": 0.7292, "step": 1891 }, { "epoch": 0.09723507040805839, "grad_norm": 1.1817985773086548, "learning_rate": 9.88196490232447e-06, "loss": 0.8236, "step": 1892 }, { "epoch": 0.09728646315140302, "grad_norm": 1.196481466293335, "learning_rate": 9.881785066376747e-06, "loss": 0.8813, "step": 1893 }, { "epoch": 0.09733785589474767, "grad_norm": 0.7876906394958496, "learning_rate": 9.881605095174905e-06, "loss": 0.7382, "step": 1894 }, { "epoch": 0.0973892486380923, "grad_norm": 1.1125801801681519, "learning_rate": 9.881424988723931e-06, "loss": 0.755, "step": 1895 }, { "epoch": 0.09744064138143695, "grad_norm": 0.7514511346817017, "learning_rate": 9.881244747028815e-06, "loss": 0.6809, "step": 1896 }, { "epoch": 0.09749203412478158, "grad_norm": 1.162706971168518, "learning_rate": 9.881064370094552e-06, "loss": 0.7938, "step": 1897 }, { "epoch": 0.09754342686812623, "grad_norm": 1.2461947202682495, "learning_rate": 9.880883857926137e-06, "loss": 0.7983, "step": 1898 }, { "epoch": 0.09759481961147086, "grad_norm": 1.1399445533752441, "learning_rate": 9.880703210528572e-06, "loss": 0.8359, "step": 1899 }, { "epoch": 0.0976462123548155, "grad_norm": 1.1532084941864014, "learning_rate": 9.880522427906864e-06, "loss": 0.9289, "step": 1900 }, { "epoch": 0.09769760509816014, "grad_norm": 0.8654432892799377, "learning_rate": 9.88034151006602e-06, "loss": 0.7471, "step": 1901 }, { "epoch": 0.09774899784150477, "grad_norm": 1.268162727355957, "learning_rate": 9.880160457011053e-06, "loss": 0.8543, "step": 1902 }, { "epoch": 0.09780039058484942, "grad_norm": 1.1969913244247437, "learning_rate": 9.879979268746977e-06, "loss": 0.8709, "step": 1903 }, { "epoch": 0.09785178332819405, "grad_norm": 1.166963815689087, "learning_rate": 9.879797945278816e-06, "loss": 0.7749, "step": 1904 }, { "epoch": 0.0979031760715387, "grad_norm": 1.1033501625061035, "learning_rate": 9.87961648661159e-06, "loss": 0.836, "step": 1905 }, { "epoch": 0.09795456881488333, "grad_norm": 0.8506897687911987, "learning_rate": 9.87943489275033e-06, "loss": 0.7521, "step": 1906 }, { "epoch": 0.09800596155822798, "grad_norm": 1.1612355709075928, "learning_rate": 9.879253163700064e-06, "loss": 0.919, "step": 1907 }, { "epoch": 0.09805735430157261, "grad_norm": 1.1377818584442139, "learning_rate": 9.87907129946583e-06, "loss": 0.8544, "step": 1908 }, { "epoch": 0.09810874704491726, "grad_norm": 1.1257753372192383, "learning_rate": 9.878889300052663e-06, "loss": 0.7727, "step": 1909 }, { "epoch": 0.0981601397882619, "grad_norm": 1.11044442653656, "learning_rate": 9.87870716546561e-06, "loss": 0.7919, "step": 1910 }, { "epoch": 0.09821153253160654, "grad_norm": 1.1394280195236206, "learning_rate": 9.878524895709711e-06, "loss": 0.7915, "step": 1911 }, { "epoch": 0.09826292527495117, "grad_norm": 1.3458809852600098, "learning_rate": 9.878342490790022e-06, "loss": 0.904, "step": 1912 }, { "epoch": 0.09831431801829582, "grad_norm": 1.203850269317627, "learning_rate": 9.878159950711594e-06, "loss": 0.8187, "step": 1913 }, { "epoch": 0.09836571076164045, "grad_norm": 1.1976758241653442, "learning_rate": 9.877977275479485e-06, "loss": 0.8431, "step": 1914 }, { "epoch": 0.0984171035049851, "grad_norm": 1.2261943817138672, "learning_rate": 9.877794465098755e-06, "loss": 0.8407, "step": 1915 }, { "epoch": 0.09846849624832973, "grad_norm": 1.1451129913330078, "learning_rate": 9.87761151957447e-06, "loss": 0.8086, "step": 1916 }, { "epoch": 0.09851988899167438, "grad_norm": 1.1906285285949707, "learning_rate": 9.877428438911699e-06, "loss": 0.8376, "step": 1917 }, { "epoch": 0.09857128173501901, "grad_norm": 1.135897159576416, "learning_rate": 9.877245223115514e-06, "loss": 0.8919, "step": 1918 }, { "epoch": 0.09862267447836366, "grad_norm": 1.0655922889709473, "learning_rate": 9.87706187219099e-06, "loss": 0.7791, "step": 1919 }, { "epoch": 0.0986740672217083, "grad_norm": 1.0949722528457642, "learning_rate": 9.87687838614321e-06, "loss": 0.8655, "step": 1920 }, { "epoch": 0.09872545996505293, "grad_norm": 1.1466466188430786, "learning_rate": 9.876694764977256e-06, "loss": 0.7745, "step": 1921 }, { "epoch": 0.09877685270839758, "grad_norm": 1.4071240425109863, "learning_rate": 9.876511008698211e-06, "loss": 0.8552, "step": 1922 }, { "epoch": 0.09882824545174221, "grad_norm": 1.2041629552841187, "learning_rate": 9.876327117311173e-06, "loss": 0.7922, "step": 1923 }, { "epoch": 0.09887963819508686, "grad_norm": 1.1415711641311646, "learning_rate": 9.876143090821234e-06, "loss": 0.7956, "step": 1924 }, { "epoch": 0.09893103093843149, "grad_norm": 1.1186227798461914, "learning_rate": 9.875958929233492e-06, "loss": 0.8175, "step": 1925 }, { "epoch": 0.09898242368177614, "grad_norm": 1.3107589483261108, "learning_rate": 9.87577463255305e-06, "loss": 0.831, "step": 1926 }, { "epoch": 0.09903381642512077, "grad_norm": 0.7583712339401245, "learning_rate": 9.875590200785015e-06, "loss": 0.7013, "step": 1927 }, { "epoch": 0.09908520916846542, "grad_norm": 0.8107669353485107, "learning_rate": 9.875405633934493e-06, "loss": 0.7445, "step": 1928 }, { "epoch": 0.09913660191181005, "grad_norm": 1.2696977853775024, "learning_rate": 9.875220932006604e-06, "loss": 0.8216, "step": 1929 }, { "epoch": 0.0991879946551547, "grad_norm": 1.1522916555404663, "learning_rate": 9.87503609500646e-06, "loss": 0.9029, "step": 1930 }, { "epoch": 0.09923938739849933, "grad_norm": 1.2198549509048462, "learning_rate": 9.874851122939184e-06, "loss": 0.8317, "step": 1931 }, { "epoch": 0.09929078014184398, "grad_norm": 1.1317845582962036, "learning_rate": 9.874666015809901e-06, "loss": 0.8284, "step": 1932 }, { "epoch": 0.09934217288518861, "grad_norm": 1.1639400720596313, "learning_rate": 9.87448077362374e-06, "loss": 0.8315, "step": 1933 }, { "epoch": 0.09939356562853326, "grad_norm": 1.1315388679504395, "learning_rate": 9.874295396385831e-06, "loss": 0.8096, "step": 1934 }, { "epoch": 0.09944495837187789, "grad_norm": 1.3996027708053589, "learning_rate": 9.874109884101314e-06, "loss": 0.7659, "step": 1935 }, { "epoch": 0.09949635111522254, "grad_norm": 1.1562831401824951, "learning_rate": 9.873924236775324e-06, "loss": 0.8235, "step": 1936 }, { "epoch": 0.09954774385856717, "grad_norm": 1.2070807218551636, "learning_rate": 9.873738454413007e-06, "loss": 0.7747, "step": 1937 }, { "epoch": 0.09959913660191182, "grad_norm": 1.109803318977356, "learning_rate": 9.873552537019512e-06, "loss": 0.8314, "step": 1938 }, { "epoch": 0.09965052934525645, "grad_norm": 0.9041234254837036, "learning_rate": 9.873366484599987e-06, "loss": 0.7461, "step": 1939 }, { "epoch": 0.09970192208860108, "grad_norm": 1.2265452146530151, "learning_rate": 9.873180297159588e-06, "loss": 0.8244, "step": 1940 }, { "epoch": 0.09975331483194573, "grad_norm": 0.9118494987487793, "learning_rate": 9.872993974703473e-06, "loss": 0.7349, "step": 1941 }, { "epoch": 0.09980470757529036, "grad_norm": 1.1241427659988403, "learning_rate": 9.872807517236804e-06, "loss": 0.8263, "step": 1942 }, { "epoch": 0.09985610031863501, "grad_norm": 1.2760899066925049, "learning_rate": 9.87262092476475e-06, "loss": 0.7973, "step": 1943 }, { "epoch": 0.09990749306197964, "grad_norm": 1.1521601676940918, "learning_rate": 9.872434197292476e-06, "loss": 0.7692, "step": 1944 }, { "epoch": 0.09995888580532429, "grad_norm": 1.2208954095840454, "learning_rate": 9.872247334825158e-06, "loss": 0.7998, "step": 1945 }, { "epoch": 0.10001027854866892, "grad_norm": 1.2851568460464478, "learning_rate": 9.872060337367975e-06, "loss": 0.8112, "step": 1946 }, { "epoch": 0.10006167129201357, "grad_norm": 1.1856228113174438, "learning_rate": 9.871873204926104e-06, "loss": 0.7819, "step": 1947 }, { "epoch": 0.1001130640353582, "grad_norm": 1.1499347686767578, "learning_rate": 9.87168593750473e-06, "loss": 0.84, "step": 1948 }, { "epoch": 0.10016445677870285, "grad_norm": 1.152206540107727, "learning_rate": 9.871498535109046e-06, "loss": 0.8534, "step": 1949 }, { "epoch": 0.10021584952204748, "grad_norm": 1.1630617380142212, "learning_rate": 9.871310997744241e-06, "loss": 0.8407, "step": 1950 }, { "epoch": 0.10026724226539213, "grad_norm": 1.1810673475265503, "learning_rate": 9.871123325415509e-06, "loss": 0.8423, "step": 1951 }, { "epoch": 0.10031863500873676, "grad_norm": 1.157597303390503, "learning_rate": 9.870935518128053e-06, "loss": 0.8525, "step": 1952 }, { "epoch": 0.10037002775208141, "grad_norm": 1.0883376598358154, "learning_rate": 9.870747575887074e-06, "loss": 0.8238, "step": 1953 }, { "epoch": 0.10042142049542604, "grad_norm": 0.8618469834327698, "learning_rate": 9.870559498697781e-06, "loss": 0.7389, "step": 1954 }, { "epoch": 0.10047281323877069, "grad_norm": 1.4844752550125122, "learning_rate": 9.870371286565383e-06, "loss": 0.7735, "step": 1955 }, { "epoch": 0.10052420598211532, "grad_norm": 0.7890344262123108, "learning_rate": 9.870182939495096e-06, "loss": 0.7169, "step": 1956 }, { "epoch": 0.10057559872545996, "grad_norm": 1.1457399129867554, "learning_rate": 9.869994457492138e-06, "loss": 0.8318, "step": 1957 }, { "epoch": 0.1006269914688046, "grad_norm": 1.0957731008529663, "learning_rate": 9.869805840561731e-06, "loss": 0.7761, "step": 1958 }, { "epoch": 0.10067838421214924, "grad_norm": 0.7526283264160156, "learning_rate": 9.869617088709101e-06, "loss": 0.7239, "step": 1959 }, { "epoch": 0.10072977695549389, "grad_norm": 0.9006901979446411, "learning_rate": 9.869428201939476e-06, "loss": 0.6991, "step": 1960 }, { "epoch": 0.10078116969883852, "grad_norm": 1.2227797508239746, "learning_rate": 9.86923918025809e-06, "loss": 0.8145, "step": 1961 }, { "epoch": 0.10083256244218317, "grad_norm": 1.2286690473556519, "learning_rate": 9.869050023670182e-06, "loss": 0.8341, "step": 1962 }, { "epoch": 0.1008839551855278, "grad_norm": 0.7617394328117371, "learning_rate": 9.868860732180989e-06, "loss": 0.7317, "step": 1963 }, { "epoch": 0.10093534792887245, "grad_norm": 1.212906837463379, "learning_rate": 9.86867130579576e-06, "loss": 0.844, "step": 1964 }, { "epoch": 0.10098674067221708, "grad_norm": 1.2200617790222168, "learning_rate": 9.86848174451974e-06, "loss": 0.8441, "step": 1965 }, { "epoch": 0.10103813341556173, "grad_norm": 1.1495684385299683, "learning_rate": 9.868292048358183e-06, "loss": 0.8589, "step": 1966 }, { "epoch": 0.10108952615890636, "grad_norm": 1.1524317264556885, "learning_rate": 9.868102217316342e-06, "loss": 0.8106, "step": 1967 }, { "epoch": 0.101140918902251, "grad_norm": 1.1042249202728271, "learning_rate": 9.867912251399479e-06, "loss": 0.8159, "step": 1968 }, { "epoch": 0.10119231164559564, "grad_norm": 1.0767053365707397, "learning_rate": 9.867722150612855e-06, "loss": 0.7984, "step": 1969 }, { "epoch": 0.10124370438894029, "grad_norm": 0.9872498512268066, "learning_rate": 9.86753191496174e-06, "loss": 0.734, "step": 1970 }, { "epoch": 0.10129509713228492, "grad_norm": 1.1424567699432373, "learning_rate": 9.867341544451401e-06, "loss": 0.8342, "step": 1971 }, { "epoch": 0.10134648987562957, "grad_norm": 0.9847910404205322, "learning_rate": 9.867151039087115e-06, "loss": 0.7457, "step": 1972 }, { "epoch": 0.1013978826189742, "grad_norm": 1.1396901607513428, "learning_rate": 9.866960398874159e-06, "loss": 0.8373, "step": 1973 }, { "epoch": 0.10144927536231885, "grad_norm": 1.1302088499069214, "learning_rate": 9.866769623817816e-06, "loss": 0.9205, "step": 1974 }, { "epoch": 0.10150066810566348, "grad_norm": 1.1684679985046387, "learning_rate": 9.866578713923369e-06, "loss": 0.8282, "step": 1975 }, { "epoch": 0.10155206084900811, "grad_norm": 1.1139990091323853, "learning_rate": 9.866387669196112e-06, "loss": 0.8158, "step": 1976 }, { "epoch": 0.10160345359235276, "grad_norm": 1.0582804679870605, "learning_rate": 9.866196489641332e-06, "loss": 0.7916, "step": 1977 }, { "epoch": 0.1016548463356974, "grad_norm": 1.1517270803451538, "learning_rate": 9.866005175264331e-06, "loss": 0.8431, "step": 1978 }, { "epoch": 0.10170623907904204, "grad_norm": 1.0896328687667847, "learning_rate": 9.865813726070405e-06, "loss": 0.805, "step": 1979 }, { "epoch": 0.10175763182238667, "grad_norm": 1.091223955154419, "learning_rate": 9.865622142064863e-06, "loss": 0.819, "step": 1980 }, { "epoch": 0.10180902456573132, "grad_norm": 1.0777031183242798, "learning_rate": 9.86543042325301e-06, "loss": 0.8607, "step": 1981 }, { "epoch": 0.10186041730907595, "grad_norm": 1.3352848291397095, "learning_rate": 9.865238569640157e-06, "loss": 0.8373, "step": 1982 }, { "epoch": 0.1019118100524206, "grad_norm": 1.1215145587921143, "learning_rate": 9.865046581231624e-06, "loss": 0.8096, "step": 1983 }, { "epoch": 0.10196320279576523, "grad_norm": 1.3076443672180176, "learning_rate": 9.864854458032724e-06, "loss": 0.8428, "step": 1984 }, { "epoch": 0.10201459553910988, "grad_norm": 1.0836284160614014, "learning_rate": 9.864662200048784e-06, "loss": 0.8413, "step": 1985 }, { "epoch": 0.10206598828245451, "grad_norm": 1.2135984897613525, "learning_rate": 9.86446980728513e-06, "loss": 0.8324, "step": 1986 }, { "epoch": 0.10211738102579916, "grad_norm": 1.1524406671524048, "learning_rate": 9.864277279747092e-06, "loss": 0.8309, "step": 1987 }, { "epoch": 0.1021687737691438, "grad_norm": 1.0766704082489014, "learning_rate": 9.864084617440004e-06, "loss": 0.829, "step": 1988 }, { "epoch": 0.10222016651248844, "grad_norm": 0.995010495185852, "learning_rate": 9.863891820369205e-06, "loss": 0.7795, "step": 1989 }, { "epoch": 0.10227155925583307, "grad_norm": 1.2125451564788818, "learning_rate": 9.863698888540035e-06, "loss": 0.8997, "step": 1990 }, { "epoch": 0.10232295199917772, "grad_norm": 0.8472604751586914, "learning_rate": 9.86350582195784e-06, "loss": 0.7164, "step": 1991 }, { "epoch": 0.10237434474252236, "grad_norm": 1.2747083902359009, "learning_rate": 9.86331262062797e-06, "loss": 0.8774, "step": 1992 }, { "epoch": 0.102425737485867, "grad_norm": 1.1559460163116455, "learning_rate": 9.863119284555776e-06, "loss": 0.8064, "step": 1993 }, { "epoch": 0.10247713022921164, "grad_norm": 1.1248106956481934, "learning_rate": 9.862925813746616e-06, "loss": 0.8005, "step": 1994 }, { "epoch": 0.10252852297255627, "grad_norm": 1.1622852087020874, "learning_rate": 9.862732208205849e-06, "loss": 0.8304, "step": 1995 }, { "epoch": 0.10257991571590092, "grad_norm": 1.2017699480056763, "learning_rate": 9.862538467938842e-06, "loss": 0.8492, "step": 1996 }, { "epoch": 0.10263130845924555, "grad_norm": 1.2489099502563477, "learning_rate": 9.862344592950958e-06, "loss": 0.8329, "step": 1997 }, { "epoch": 0.1026827012025902, "grad_norm": 1.201389193534851, "learning_rate": 9.862150583247574e-06, "loss": 0.8639, "step": 1998 }, { "epoch": 0.10273409394593483, "grad_norm": 1.2007253170013428, "learning_rate": 9.86195643883406e-06, "loss": 0.8994, "step": 1999 }, { "epoch": 0.10278548668927948, "grad_norm": 1.121754765510559, "learning_rate": 9.861762159715798e-06, "loss": 0.8269, "step": 2000 }, { "epoch": 0.10283687943262411, "grad_norm": 1.2858860492706299, "learning_rate": 9.861567745898169e-06, "loss": 0.8733, "step": 2001 }, { "epoch": 0.10288827217596876, "grad_norm": 1.1056945323944092, "learning_rate": 9.86137319738656e-06, "loss": 0.8241, "step": 2002 }, { "epoch": 0.10293966491931339, "grad_norm": 0.9627084732055664, "learning_rate": 9.861178514186363e-06, "loss": 0.7263, "step": 2003 }, { "epoch": 0.10299105766265804, "grad_norm": 1.185849666595459, "learning_rate": 9.86098369630297e-06, "loss": 0.803, "step": 2004 }, { "epoch": 0.10304245040600267, "grad_norm": 1.2022292613983154, "learning_rate": 9.860788743741778e-06, "loss": 0.8392, "step": 2005 }, { "epoch": 0.10309384314934732, "grad_norm": 1.1016179323196411, "learning_rate": 9.860593656508188e-06, "loss": 0.8508, "step": 2006 }, { "epoch": 0.10314523589269195, "grad_norm": 1.3060848712921143, "learning_rate": 9.860398434607609e-06, "loss": 0.8402, "step": 2007 }, { "epoch": 0.1031966286360366, "grad_norm": 0.9066895842552185, "learning_rate": 9.860203078045445e-06, "loss": 0.7333, "step": 2008 }, { "epoch": 0.10324802137938123, "grad_norm": 1.6079376935958862, "learning_rate": 9.860007586827112e-06, "loss": 0.8228, "step": 2009 }, { "epoch": 0.10329941412272588, "grad_norm": 1.2383357286453247, "learning_rate": 9.859811960958025e-06, "loss": 0.8628, "step": 2010 }, { "epoch": 0.10335080686607051, "grad_norm": 1.1892626285552979, "learning_rate": 9.859616200443603e-06, "loss": 0.8122, "step": 2011 }, { "epoch": 0.10340219960941516, "grad_norm": 1.1847467422485352, "learning_rate": 9.85942030528927e-06, "loss": 0.8447, "step": 2012 }, { "epoch": 0.10345359235275979, "grad_norm": 1.1552761793136597, "learning_rate": 9.859224275500454e-06, "loss": 0.785, "step": 2013 }, { "epoch": 0.10350498509610442, "grad_norm": 1.2984205484390259, "learning_rate": 9.859028111082587e-06, "loss": 0.8166, "step": 2014 }, { "epoch": 0.10355637783944907, "grad_norm": 1.0715218782424927, "learning_rate": 9.858831812041102e-06, "loss": 0.7129, "step": 2015 }, { "epoch": 0.1036077705827937, "grad_norm": 0.9487645030021667, "learning_rate": 9.85863537838144e-06, "loss": 0.6786, "step": 2016 }, { "epoch": 0.10365916332613835, "grad_norm": 1.3574559688568115, "learning_rate": 9.858438810109044e-06, "loss": 0.8436, "step": 2017 }, { "epoch": 0.10371055606948298, "grad_norm": 1.1608316898345947, "learning_rate": 9.858242107229355e-06, "loss": 0.8035, "step": 2018 }, { "epoch": 0.10376194881282763, "grad_norm": 1.112083077430725, "learning_rate": 9.858045269747826e-06, "loss": 0.8425, "step": 2019 }, { "epoch": 0.10381334155617226, "grad_norm": 1.3320881128311157, "learning_rate": 9.857848297669912e-06, "loss": 0.8981, "step": 2020 }, { "epoch": 0.10386473429951691, "grad_norm": 1.1864501237869263, "learning_rate": 9.857651191001067e-06, "loss": 0.8221, "step": 2021 }, { "epoch": 0.10391612704286154, "grad_norm": 1.2660017013549805, "learning_rate": 9.857453949746756e-06, "loss": 0.8604, "step": 2022 }, { "epoch": 0.10396751978620619, "grad_norm": 1.1579972505569458, "learning_rate": 9.857256573912441e-06, "loss": 0.8257, "step": 2023 }, { "epoch": 0.10401891252955082, "grad_norm": 1.1935635805130005, "learning_rate": 9.85705906350359e-06, "loss": 0.8171, "step": 2024 }, { "epoch": 0.10407030527289547, "grad_norm": 1.2708181142807007, "learning_rate": 9.856861418525678e-06, "loss": 0.8117, "step": 2025 }, { "epoch": 0.1041216980162401, "grad_norm": 0.8358376622200012, "learning_rate": 9.856663638984177e-06, "loss": 0.6917, "step": 2026 }, { "epoch": 0.10417309075958475, "grad_norm": 1.15700364112854, "learning_rate": 9.85646572488457e-06, "loss": 0.8215, "step": 2027 }, { "epoch": 0.10422448350292939, "grad_norm": 1.273124098777771, "learning_rate": 9.856267676232339e-06, "loss": 0.8325, "step": 2028 }, { "epoch": 0.10427587624627403, "grad_norm": 1.1909046173095703, "learning_rate": 9.856069493032971e-06, "loss": 0.822, "step": 2029 }, { "epoch": 0.10432726898961867, "grad_norm": 1.1918940544128418, "learning_rate": 9.855871175291958e-06, "loss": 0.87, "step": 2030 }, { "epoch": 0.1043786617329633, "grad_norm": 1.2000300884246826, "learning_rate": 9.855672723014792e-06, "loss": 0.8185, "step": 2031 }, { "epoch": 0.10443005447630795, "grad_norm": 1.0824081897735596, "learning_rate": 9.855474136206975e-06, "loss": 0.8173, "step": 2032 }, { "epoch": 0.10448144721965258, "grad_norm": 1.2731975317001343, "learning_rate": 9.855275414874007e-06, "loss": 0.8249, "step": 2033 }, { "epoch": 0.10453283996299723, "grad_norm": 1.1852995157241821, "learning_rate": 9.855076559021392e-06, "loss": 0.809, "step": 2034 }, { "epoch": 0.10458423270634186, "grad_norm": 0.9092479348182678, "learning_rate": 9.854877568654644e-06, "loss": 0.7449, "step": 2035 }, { "epoch": 0.1046356254496865, "grad_norm": 1.1946287155151367, "learning_rate": 9.854678443779273e-06, "loss": 0.8475, "step": 2036 }, { "epoch": 0.10468701819303114, "grad_norm": 1.1364498138427734, "learning_rate": 9.854479184400793e-06, "loss": 0.8348, "step": 2037 }, { "epoch": 0.10473841093637579, "grad_norm": 1.2741285562515259, "learning_rate": 9.85427979052473e-06, "loss": 0.8109, "step": 2038 }, { "epoch": 0.10478980367972042, "grad_norm": 1.1620416641235352, "learning_rate": 9.854080262156609e-06, "loss": 0.7829, "step": 2039 }, { "epoch": 0.10484119642306507, "grad_norm": 1.1623344421386719, "learning_rate": 9.853880599301952e-06, "loss": 0.7821, "step": 2040 }, { "epoch": 0.1048925891664097, "grad_norm": 0.7803221940994263, "learning_rate": 9.853680801966297e-06, "loss": 0.7291, "step": 2041 }, { "epoch": 0.10494398190975435, "grad_norm": 1.1708322763442993, "learning_rate": 9.853480870155175e-06, "loss": 0.8614, "step": 2042 }, { "epoch": 0.10499537465309898, "grad_norm": 1.155012607574463, "learning_rate": 9.853280803874128e-06, "loss": 0.8111, "step": 2043 }, { "epoch": 0.10504676739644363, "grad_norm": 1.2395075559616089, "learning_rate": 9.853080603128698e-06, "loss": 0.8636, "step": 2044 }, { "epoch": 0.10509816013978826, "grad_norm": 1.3967252969741821, "learning_rate": 9.852880267924431e-06, "loss": 0.8555, "step": 2045 }, { "epoch": 0.10514955288313291, "grad_norm": 1.384650468826294, "learning_rate": 9.85267979826688e-06, "loss": 0.8256, "step": 2046 }, { "epoch": 0.10520094562647754, "grad_norm": 1.2941588163375854, "learning_rate": 9.852479194161597e-06, "loss": 0.8777, "step": 2047 }, { "epoch": 0.10525233836982219, "grad_norm": 0.86928391456604, "learning_rate": 9.852278455614142e-06, "loss": 0.7763, "step": 2048 }, { "epoch": 0.10530373111316682, "grad_norm": 1.1695020198822021, "learning_rate": 9.852077582630073e-06, "loss": 0.8226, "step": 2049 }, { "epoch": 0.10535512385651145, "grad_norm": 1.0998610258102417, "learning_rate": 9.851876575214957e-06, "loss": 0.7926, "step": 2050 }, { "epoch": 0.1054065165998561, "grad_norm": 0.8325785994529724, "learning_rate": 9.851675433374366e-06, "loss": 0.7115, "step": 2051 }, { "epoch": 0.10545790934320073, "grad_norm": 1.0745104551315308, "learning_rate": 9.851474157113869e-06, "loss": 0.8372, "step": 2052 }, { "epoch": 0.10550930208654538, "grad_norm": 1.3948298692703247, "learning_rate": 9.851272746439045e-06, "loss": 0.8342, "step": 2053 }, { "epoch": 0.10556069482989001, "grad_norm": 0.7847484350204468, "learning_rate": 9.851071201355473e-06, "loss": 0.7027, "step": 2054 }, { "epoch": 0.10561208757323466, "grad_norm": 0.8258352279663086, "learning_rate": 9.850869521868736e-06, "loss": 0.6856, "step": 2055 }, { "epoch": 0.1056634803165793, "grad_norm": 1.1569632291793823, "learning_rate": 9.850667707984425e-06, "loss": 0.7977, "step": 2056 }, { "epoch": 0.10571487305992394, "grad_norm": 0.8674617409706116, "learning_rate": 9.850465759708127e-06, "loss": 0.7323, "step": 2057 }, { "epoch": 0.10576626580326857, "grad_norm": 1.0897079706192017, "learning_rate": 9.85026367704544e-06, "loss": 0.8463, "step": 2058 }, { "epoch": 0.10581765854661322, "grad_norm": 1.0594137907028198, "learning_rate": 9.850061460001963e-06, "loss": 0.7503, "step": 2059 }, { "epoch": 0.10586905128995786, "grad_norm": 1.1581857204437256, "learning_rate": 9.849859108583298e-06, "loss": 0.8204, "step": 2060 }, { "epoch": 0.1059204440333025, "grad_norm": 0.85575270652771, "learning_rate": 9.849656622795052e-06, "loss": 0.7616, "step": 2061 }, { "epoch": 0.10597183677664714, "grad_norm": 1.1002634763717651, "learning_rate": 9.849454002642833e-06, "loss": 0.8263, "step": 2062 }, { "epoch": 0.10602322951999178, "grad_norm": 1.199106216430664, "learning_rate": 9.849251248132257e-06, "loss": 0.8019, "step": 2063 }, { "epoch": 0.10607462226333642, "grad_norm": 1.1747106313705444, "learning_rate": 9.84904835926894e-06, "loss": 0.8138, "step": 2064 }, { "epoch": 0.10612601500668106, "grad_norm": 0.9194386601448059, "learning_rate": 9.848845336058503e-06, "loss": 0.7092, "step": 2065 }, { "epoch": 0.1061774077500257, "grad_norm": 0.8934338688850403, "learning_rate": 9.848642178506573e-06, "loss": 0.7293, "step": 2066 }, { "epoch": 0.10622880049337034, "grad_norm": 1.2011280059814453, "learning_rate": 9.848438886618777e-06, "loss": 0.8039, "step": 2067 }, { "epoch": 0.10628019323671498, "grad_norm": 1.1254558563232422, "learning_rate": 9.848235460400748e-06, "loss": 0.86, "step": 2068 }, { "epoch": 0.10633158598005961, "grad_norm": 1.160234808921814, "learning_rate": 9.84803189985812e-06, "loss": 0.8012, "step": 2069 }, { "epoch": 0.10638297872340426, "grad_norm": 1.2093859910964966, "learning_rate": 9.84782820499654e-06, "loss": 0.8424, "step": 2070 }, { "epoch": 0.10643437146674889, "grad_norm": 1.153065800666809, "learning_rate": 9.84762437582164e-06, "loss": 0.8216, "step": 2071 }, { "epoch": 0.10648576421009354, "grad_norm": 1.0969511270523071, "learning_rate": 9.847420412339077e-06, "loss": 0.843, "step": 2072 }, { "epoch": 0.10653715695343817, "grad_norm": 1.0607304573059082, "learning_rate": 9.847216314554497e-06, "loss": 0.7702, "step": 2073 }, { "epoch": 0.10658854969678282, "grad_norm": 1.1696754693984985, "learning_rate": 9.847012082473559e-06, "loss": 0.877, "step": 2074 }, { "epoch": 0.10663994244012745, "grad_norm": 1.0082323551177979, "learning_rate": 9.846807716101916e-06, "loss": 0.7426, "step": 2075 }, { "epoch": 0.1066913351834721, "grad_norm": 1.187567114830017, "learning_rate": 9.846603215445232e-06, "loss": 0.8129, "step": 2076 }, { "epoch": 0.10674272792681673, "grad_norm": 1.1597225666046143, "learning_rate": 9.846398580509176e-06, "loss": 0.8249, "step": 2077 }, { "epoch": 0.10679412067016138, "grad_norm": 1.1469331979751587, "learning_rate": 9.846193811299414e-06, "loss": 0.7991, "step": 2078 }, { "epoch": 0.10684551341350601, "grad_norm": 1.0819371938705444, "learning_rate": 9.845988907821621e-06, "loss": 0.8229, "step": 2079 }, { "epoch": 0.10689690615685066, "grad_norm": 1.178691029548645, "learning_rate": 9.845783870081473e-06, "loss": 0.8177, "step": 2080 }, { "epoch": 0.10694829890019529, "grad_norm": 1.094375729560852, "learning_rate": 9.845578698084652e-06, "loss": 0.8094, "step": 2081 }, { "epoch": 0.10699969164353994, "grad_norm": 1.1425831317901611, "learning_rate": 9.845373391836842e-06, "loss": 0.8226, "step": 2082 }, { "epoch": 0.10705108438688457, "grad_norm": 0.844132661819458, "learning_rate": 9.84516795134373e-06, "loss": 0.7712, "step": 2083 }, { "epoch": 0.10710247713022922, "grad_norm": 1.2742677927017212, "learning_rate": 9.844962376611009e-06, "loss": 0.862, "step": 2084 }, { "epoch": 0.10715386987357385, "grad_norm": 1.2135865688323975, "learning_rate": 9.844756667644375e-06, "loss": 0.8326, "step": 2085 }, { "epoch": 0.1072052626169185, "grad_norm": 1.1784766912460327, "learning_rate": 9.844550824449526e-06, "loss": 0.8758, "step": 2086 }, { "epoch": 0.10725665536026313, "grad_norm": 1.184257984161377, "learning_rate": 9.844344847032168e-06, "loss": 0.8139, "step": 2087 }, { "epoch": 0.10730804810360776, "grad_norm": 0.8778985142707825, "learning_rate": 9.844138735398003e-06, "loss": 0.7399, "step": 2088 }, { "epoch": 0.10735944084695241, "grad_norm": 1.1916102170944214, "learning_rate": 9.843932489552746e-06, "loss": 0.8393, "step": 2089 }, { "epoch": 0.10741083359029704, "grad_norm": 1.190514326095581, "learning_rate": 9.84372610950211e-06, "loss": 0.7854, "step": 2090 }, { "epoch": 0.10746222633364169, "grad_norm": 1.189003825187683, "learning_rate": 9.843519595251811e-06, "loss": 0.852, "step": 2091 }, { "epoch": 0.10751361907698632, "grad_norm": 1.1399040222167969, "learning_rate": 9.843312946807573e-06, "loss": 0.8597, "step": 2092 }, { "epoch": 0.10756501182033097, "grad_norm": 1.2766233682632446, "learning_rate": 9.84310616417512e-06, "loss": 0.8448, "step": 2093 }, { "epoch": 0.1076164045636756, "grad_norm": 1.0877881050109863, "learning_rate": 9.842899247360181e-06, "loss": 0.766, "step": 2094 }, { "epoch": 0.10766779730702025, "grad_norm": 1.15092933177948, "learning_rate": 9.84269219636849e-06, "loss": 0.8475, "step": 2095 }, { "epoch": 0.10771919005036489, "grad_norm": 1.1373077630996704, "learning_rate": 9.842485011205782e-06, "loss": 0.8456, "step": 2096 }, { "epoch": 0.10777058279370953, "grad_norm": 1.1603868007659912, "learning_rate": 9.8422776918778e-06, "loss": 0.8172, "step": 2097 }, { "epoch": 0.10782197553705417, "grad_norm": 1.1840358972549438, "learning_rate": 9.842070238390284e-06, "loss": 0.8801, "step": 2098 }, { "epoch": 0.10787336828039881, "grad_norm": 1.1693062782287598, "learning_rate": 9.841862650748983e-06, "loss": 0.8315, "step": 2099 }, { "epoch": 0.10792476102374345, "grad_norm": 1.1618939638137817, "learning_rate": 9.841654928959651e-06, "loss": 0.8554, "step": 2100 }, { "epoch": 0.10797615376708809, "grad_norm": 1.1679095029830933, "learning_rate": 9.84144707302804e-06, "loss": 0.8003, "step": 2101 }, { "epoch": 0.10802754651043273, "grad_norm": 1.2202725410461426, "learning_rate": 9.841239082959913e-06, "loss": 0.8577, "step": 2102 }, { "epoch": 0.10807893925377737, "grad_norm": 1.1412358283996582, "learning_rate": 9.841030958761026e-06, "loss": 0.8348, "step": 2103 }, { "epoch": 0.108130331997122, "grad_norm": 1.2195676565170288, "learning_rate": 9.84082270043715e-06, "loss": 0.7994, "step": 2104 }, { "epoch": 0.10818172474046664, "grad_norm": 1.3252432346343994, "learning_rate": 9.840614307994056e-06, "loss": 0.8217, "step": 2105 }, { "epoch": 0.10823311748381129, "grad_norm": 1.162333607673645, "learning_rate": 9.840405781437515e-06, "loss": 0.7966, "step": 2106 }, { "epoch": 0.10828451022715592, "grad_norm": 1.1192104816436768, "learning_rate": 9.840197120773303e-06, "loss": 0.8182, "step": 2107 }, { "epoch": 0.10833590297050057, "grad_norm": 1.1943600177764893, "learning_rate": 9.839988326007204e-06, "loss": 0.8539, "step": 2108 }, { "epoch": 0.1083872957138452, "grad_norm": 1.1747912168502808, "learning_rate": 9.839779397145002e-06, "loss": 0.8416, "step": 2109 }, { "epoch": 0.10843868845718985, "grad_norm": 1.2193492650985718, "learning_rate": 9.839570334192485e-06, "loss": 0.8389, "step": 2110 }, { "epoch": 0.10849008120053448, "grad_norm": 1.1557869911193848, "learning_rate": 9.839361137155445e-06, "loss": 0.8047, "step": 2111 }, { "epoch": 0.10854147394387913, "grad_norm": 1.0687378644943237, "learning_rate": 9.839151806039681e-06, "loss": 0.8312, "step": 2112 }, { "epoch": 0.10859286668722376, "grad_norm": 0.7824373841285706, "learning_rate": 9.83894234085099e-06, "loss": 0.7271, "step": 2113 }, { "epoch": 0.10864425943056841, "grad_norm": 1.1830910444259644, "learning_rate": 9.838732741595174e-06, "loss": 0.7912, "step": 2114 }, { "epoch": 0.10869565217391304, "grad_norm": 1.0808295011520386, "learning_rate": 9.838523008278043e-06, "loss": 0.8084, "step": 2115 }, { "epoch": 0.10874704491725769, "grad_norm": 1.2928946018218994, "learning_rate": 9.838313140905407e-06, "loss": 0.7243, "step": 2116 }, { "epoch": 0.10879843766060232, "grad_norm": 1.1614266633987427, "learning_rate": 9.838103139483082e-06, "loss": 0.7778, "step": 2117 }, { "epoch": 0.10884983040394697, "grad_norm": 1.1963202953338623, "learning_rate": 9.837893004016883e-06, "loss": 0.8136, "step": 2118 }, { "epoch": 0.1089012231472916, "grad_norm": 1.7935479879379272, "learning_rate": 9.837682734512633e-06, "loss": 0.7559, "step": 2119 }, { "epoch": 0.10895261589063625, "grad_norm": 0.8835573196411133, "learning_rate": 9.83747233097616e-06, "loss": 0.7271, "step": 2120 }, { "epoch": 0.10900400863398088, "grad_norm": 1.2589309215545654, "learning_rate": 9.837261793413292e-06, "loss": 0.8552, "step": 2121 }, { "epoch": 0.10905540137732553, "grad_norm": 1.185909390449524, "learning_rate": 9.837051121829859e-06, "loss": 0.8122, "step": 2122 }, { "epoch": 0.10910679412067016, "grad_norm": 1.11640202999115, "learning_rate": 9.836840316231704e-06, "loss": 0.8339, "step": 2123 }, { "epoch": 0.1091581868640148, "grad_norm": 1.1984152793884277, "learning_rate": 9.836629376624663e-06, "loss": 0.8202, "step": 2124 }, { "epoch": 0.10920957960735944, "grad_norm": 1.2150204181671143, "learning_rate": 9.83641830301458e-06, "loss": 0.8028, "step": 2125 }, { "epoch": 0.10926097235070407, "grad_norm": 1.1551257371902466, "learning_rate": 9.836207095407306e-06, "loss": 0.7802, "step": 2126 }, { "epoch": 0.10931236509404872, "grad_norm": 1.1546604633331299, "learning_rate": 9.83599575380869e-06, "loss": 0.823, "step": 2127 }, { "epoch": 0.10936375783739335, "grad_norm": 0.8745397329330444, "learning_rate": 9.835784278224591e-06, "loss": 0.7263, "step": 2128 }, { "epoch": 0.109415150580738, "grad_norm": 1.2139627933502197, "learning_rate": 9.835572668660866e-06, "loss": 0.8662, "step": 2129 }, { "epoch": 0.10946654332408264, "grad_norm": 1.140381097793579, "learning_rate": 9.835360925123376e-06, "loss": 0.8385, "step": 2130 }, { "epoch": 0.10951793606742728, "grad_norm": 0.7668219208717346, "learning_rate": 9.835149047617989e-06, "loss": 0.6953, "step": 2131 }, { "epoch": 0.10956932881077192, "grad_norm": 0.8458096981048584, "learning_rate": 9.834937036150576e-06, "loss": 0.6732, "step": 2132 }, { "epoch": 0.10962072155411656, "grad_norm": 0.8098699450492859, "learning_rate": 9.83472489072701e-06, "loss": 0.7035, "step": 2133 }, { "epoch": 0.1096721142974612, "grad_norm": 0.9325520992279053, "learning_rate": 9.834512611353168e-06, "loss": 0.6944, "step": 2134 }, { "epoch": 0.10972350704080584, "grad_norm": 1.1876887083053589, "learning_rate": 9.834300198034934e-06, "loss": 0.8423, "step": 2135 }, { "epoch": 0.10977489978415048, "grad_norm": 0.8546350002288818, "learning_rate": 9.83408765077819e-06, "loss": 0.7267, "step": 2136 }, { "epoch": 0.10982629252749512, "grad_norm": 1.1998388767242432, "learning_rate": 9.833874969588828e-06, "loss": 0.792, "step": 2137 }, { "epoch": 0.10987768527083976, "grad_norm": 1.2354494333267212, "learning_rate": 9.833662154472738e-06, "loss": 0.7892, "step": 2138 }, { "epoch": 0.1099290780141844, "grad_norm": 1.1702477931976318, "learning_rate": 9.833449205435817e-06, "loss": 0.8115, "step": 2139 }, { "epoch": 0.10998047075752904, "grad_norm": 1.1714764833450317, "learning_rate": 9.833236122483967e-06, "loss": 0.8342, "step": 2140 }, { "epoch": 0.11003186350087368, "grad_norm": 1.1763198375701904, "learning_rate": 9.833022905623086e-06, "loss": 0.8534, "step": 2141 }, { "epoch": 0.11008325624421832, "grad_norm": 1.2342931032180786, "learning_rate": 9.832809554859088e-06, "loss": 0.923, "step": 2142 }, { "epoch": 0.11013464898756295, "grad_norm": 1.1907038688659668, "learning_rate": 9.83259607019788e-06, "loss": 0.7705, "step": 2143 }, { "epoch": 0.1101860417309076, "grad_norm": 1.1392539739608765, "learning_rate": 9.832382451645377e-06, "loss": 0.8351, "step": 2144 }, { "epoch": 0.11023743447425223, "grad_norm": 1.198627233505249, "learning_rate": 9.832168699207498e-06, "loss": 0.8773, "step": 2145 }, { "epoch": 0.11028882721759688, "grad_norm": 1.2492464780807495, "learning_rate": 9.831954812890168e-06, "loss": 0.8655, "step": 2146 }, { "epoch": 0.11034021996094151, "grad_norm": 1.2627941370010376, "learning_rate": 9.83174079269931e-06, "loss": 0.7434, "step": 2147 }, { "epoch": 0.11039161270428616, "grad_norm": 1.1658904552459717, "learning_rate": 9.831526638640852e-06, "loss": 0.8412, "step": 2148 }, { "epoch": 0.11044300544763079, "grad_norm": 1.7002475261688232, "learning_rate": 9.83131235072073e-06, "loss": 0.8496, "step": 2149 }, { "epoch": 0.11049439819097544, "grad_norm": 1.151473879814148, "learning_rate": 9.83109792894488e-06, "loss": 0.7836, "step": 2150 }, { "epoch": 0.11054579093432007, "grad_norm": 1.1377246379852295, "learning_rate": 9.830883373319244e-06, "loss": 0.8052, "step": 2151 }, { "epoch": 0.11059718367766472, "grad_norm": 1.029439926147461, "learning_rate": 9.830668683849766e-06, "loss": 0.7213, "step": 2152 }, { "epoch": 0.11064857642100935, "grad_norm": 1.1953693628311157, "learning_rate": 9.830453860542393e-06, "loss": 0.8036, "step": 2153 }, { "epoch": 0.110699969164354, "grad_norm": 1.1742392778396606, "learning_rate": 9.830238903403078e-06, "loss": 0.7974, "step": 2154 }, { "epoch": 0.11075136190769863, "grad_norm": 1.1791337728500366, "learning_rate": 9.830023812437777e-06, "loss": 0.8304, "step": 2155 }, { "epoch": 0.11080275465104328, "grad_norm": 1.1293309926986694, "learning_rate": 9.829808587652447e-06, "loss": 0.8188, "step": 2156 }, { "epoch": 0.11085414739438791, "grad_norm": 1.1872262954711914, "learning_rate": 9.829593229053055e-06, "loss": 0.832, "step": 2157 }, { "epoch": 0.11090554013773256, "grad_norm": 1.202713966369629, "learning_rate": 9.829377736645562e-06, "loss": 0.8391, "step": 2158 }, { "epoch": 0.11095693288107719, "grad_norm": 1.1206715106964111, "learning_rate": 9.82916211043594e-06, "loss": 0.7979, "step": 2159 }, { "epoch": 0.11100832562442182, "grad_norm": 1.1883658170700073, "learning_rate": 9.828946350430169e-06, "loss": 0.8654, "step": 2160 }, { "epoch": 0.11105971836776647, "grad_norm": 1.1700845956802368, "learning_rate": 9.828730456634222e-06, "loss": 0.8531, "step": 2161 }, { "epoch": 0.1111111111111111, "grad_norm": 0.9313642978668213, "learning_rate": 9.82851442905408e-06, "loss": 0.75, "step": 2162 }, { "epoch": 0.11116250385445575, "grad_norm": 1.217764973640442, "learning_rate": 9.82829826769573e-06, "loss": 0.8306, "step": 2163 }, { "epoch": 0.11121389659780039, "grad_norm": 1.1495599746704102, "learning_rate": 9.82808197256516e-06, "loss": 0.7775, "step": 2164 }, { "epoch": 0.11126528934114503, "grad_norm": 1.1596499681472778, "learning_rate": 9.827865543668362e-06, "loss": 0.8938, "step": 2165 }, { "epoch": 0.11131668208448967, "grad_norm": 1.2563353776931763, "learning_rate": 9.827648981011334e-06, "loss": 0.8334, "step": 2166 }, { "epoch": 0.11136807482783431, "grad_norm": 0.8503729701042175, "learning_rate": 9.827432284600073e-06, "loss": 0.7069, "step": 2167 }, { "epoch": 0.11141946757117895, "grad_norm": 1.1692421436309814, "learning_rate": 9.827215454440588e-06, "loss": 0.7936, "step": 2168 }, { "epoch": 0.11147086031452359, "grad_norm": 0.8492381572723389, "learning_rate": 9.826998490538883e-06, "loss": 0.7473, "step": 2169 }, { "epoch": 0.11152225305786823, "grad_norm": 1.0849483013153076, "learning_rate": 9.826781392900968e-06, "loss": 0.7947, "step": 2170 }, { "epoch": 0.11157364580121287, "grad_norm": 0.8289567232131958, "learning_rate": 9.82656416153286e-06, "loss": 0.7413, "step": 2171 }, { "epoch": 0.1116250385445575, "grad_norm": 1.1089200973510742, "learning_rate": 9.82634679644058e-06, "loss": 0.8241, "step": 2172 }, { "epoch": 0.11167643128790215, "grad_norm": 1.1167725324630737, "learning_rate": 9.826129297630146e-06, "loss": 0.7698, "step": 2173 }, { "epoch": 0.11172782403124679, "grad_norm": 1.1596695184707642, "learning_rate": 9.825911665107584e-06, "loss": 0.8354, "step": 2174 }, { "epoch": 0.11177921677459143, "grad_norm": 1.1653627157211304, "learning_rate": 9.825693898878925e-06, "loss": 0.8043, "step": 2175 }, { "epoch": 0.11183060951793607, "grad_norm": 1.160455584526062, "learning_rate": 9.825475998950203e-06, "loss": 0.8086, "step": 2176 }, { "epoch": 0.11188200226128071, "grad_norm": 1.2420039176940918, "learning_rate": 9.825257965327454e-06, "loss": 0.8526, "step": 2177 }, { "epoch": 0.11193339500462535, "grad_norm": 0.8005742430686951, "learning_rate": 9.825039798016723e-06, "loss": 0.7035, "step": 2178 }, { "epoch": 0.11198478774796998, "grad_norm": 0.8854387402534485, "learning_rate": 9.824821497024046e-06, "loss": 0.7325, "step": 2179 }, { "epoch": 0.11203618049131463, "grad_norm": 0.7694113850593567, "learning_rate": 9.82460306235548e-06, "loss": 0.6782, "step": 2180 }, { "epoch": 0.11208757323465926, "grad_norm": 0.7485276460647583, "learning_rate": 9.824384494017072e-06, "loss": 0.7378, "step": 2181 }, { "epoch": 0.11213896597800391, "grad_norm": 0.7948840260505676, "learning_rate": 9.824165792014877e-06, "loss": 0.7209, "step": 2182 }, { "epoch": 0.11219035872134854, "grad_norm": 1.305915117263794, "learning_rate": 9.823946956354958e-06, "loss": 0.8635, "step": 2183 }, { "epoch": 0.11224175146469319, "grad_norm": 0.7420186996459961, "learning_rate": 9.823727987043376e-06, "loss": 0.6824, "step": 2184 }, { "epoch": 0.11229314420803782, "grad_norm": 0.7992856502532959, "learning_rate": 9.823508884086197e-06, "loss": 0.7047, "step": 2185 }, { "epoch": 0.11234453695138247, "grad_norm": 1.1752492189407349, "learning_rate": 9.823289647489493e-06, "loss": 0.8568, "step": 2186 }, { "epoch": 0.1123959296947271, "grad_norm": 1.4357744455337524, "learning_rate": 9.823070277259337e-06, "loss": 0.8649, "step": 2187 }, { "epoch": 0.11244732243807175, "grad_norm": 1.105179786682129, "learning_rate": 9.822850773401807e-06, "loss": 0.798, "step": 2188 }, { "epoch": 0.11249871518141638, "grad_norm": 1.189935564994812, "learning_rate": 9.822631135922984e-06, "loss": 0.8346, "step": 2189 }, { "epoch": 0.11255010792476103, "grad_norm": 1.2357518672943115, "learning_rate": 9.822411364828957e-06, "loss": 0.8323, "step": 2190 }, { "epoch": 0.11260150066810566, "grad_norm": 1.1284202337265015, "learning_rate": 9.82219146012581e-06, "loss": 0.825, "step": 2191 }, { "epoch": 0.11265289341145031, "grad_norm": 1.198906421661377, "learning_rate": 9.821971421819637e-06, "loss": 0.8671, "step": 2192 }, { "epoch": 0.11270428615479494, "grad_norm": 1.0011494159698486, "learning_rate": 9.821751249916536e-06, "loss": 0.7101, "step": 2193 }, { "epoch": 0.11275567889813959, "grad_norm": 1.1242879629135132, "learning_rate": 9.821530944422606e-06, "loss": 0.824, "step": 2194 }, { "epoch": 0.11280707164148422, "grad_norm": 1.192854404449463, "learning_rate": 9.82131050534395e-06, "loss": 0.8145, "step": 2195 }, { "epoch": 0.11285846438482887, "grad_norm": 0.8611165285110474, "learning_rate": 9.821089932686677e-06, "loss": 0.7311, "step": 2196 }, { "epoch": 0.1129098571281735, "grad_norm": 1.0399948358535767, "learning_rate": 9.820869226456897e-06, "loss": 0.8317, "step": 2197 }, { "epoch": 0.11296124987151814, "grad_norm": 1.1548027992248535, "learning_rate": 9.820648386660727e-06, "loss": 0.8093, "step": 2198 }, { "epoch": 0.11301264261486278, "grad_norm": 1.1734575033187866, "learning_rate": 9.820427413304282e-06, "loss": 0.7881, "step": 2199 }, { "epoch": 0.11306403535820742, "grad_norm": 1.1255524158477783, "learning_rate": 9.820206306393687e-06, "loss": 0.8569, "step": 2200 }, { "epoch": 0.11311542810155206, "grad_norm": 1.189300775527954, "learning_rate": 9.819985065935065e-06, "loss": 0.865, "step": 2201 }, { "epoch": 0.1131668208448967, "grad_norm": 0.842091977596283, "learning_rate": 9.819763691934551e-06, "loss": 0.6655, "step": 2202 }, { "epoch": 0.11321821358824134, "grad_norm": 1.22659432888031, "learning_rate": 9.819542184398273e-06, "loss": 0.8303, "step": 2203 }, { "epoch": 0.11326960633158598, "grad_norm": 1.1401138305664062, "learning_rate": 9.819320543332371e-06, "loss": 0.8408, "step": 2204 }, { "epoch": 0.11332099907493062, "grad_norm": 0.8609177470207214, "learning_rate": 9.819098768742985e-06, "loss": 0.7204, "step": 2205 }, { "epoch": 0.11337239181827526, "grad_norm": 1.1957910060882568, "learning_rate": 9.818876860636259e-06, "loss": 0.8133, "step": 2206 }, { "epoch": 0.1134237845616199, "grad_norm": 1.171829342842102, "learning_rate": 9.818654819018341e-06, "loss": 0.7967, "step": 2207 }, { "epoch": 0.11347517730496454, "grad_norm": 0.8800224661827087, "learning_rate": 9.818432643895383e-06, "loss": 0.6815, "step": 2208 }, { "epoch": 0.11352657004830918, "grad_norm": 1.1448153257369995, "learning_rate": 9.818210335273541e-06, "loss": 0.8125, "step": 2209 }, { "epoch": 0.11357796279165382, "grad_norm": 1.3789615631103516, "learning_rate": 9.817987893158976e-06, "loss": 0.8499, "step": 2210 }, { "epoch": 0.11362935553499846, "grad_norm": 0.8859899044036865, "learning_rate": 9.817765317557848e-06, "loss": 0.7424, "step": 2211 }, { "epoch": 0.1136807482783431, "grad_norm": 1.135510802268982, "learning_rate": 9.817542608476325e-06, "loss": 0.8468, "step": 2212 }, { "epoch": 0.11373214102168774, "grad_norm": 1.1696711778640747, "learning_rate": 9.817319765920576e-06, "loss": 0.8337, "step": 2213 }, { "epoch": 0.11378353376503238, "grad_norm": 1.391337275505066, "learning_rate": 9.817096789896779e-06, "loss": 0.8101, "step": 2214 }, { "epoch": 0.11383492650837702, "grad_norm": 0.8232397437095642, "learning_rate": 9.816873680411106e-06, "loss": 0.7946, "step": 2215 }, { "epoch": 0.11388631925172166, "grad_norm": 1.0893553495407104, "learning_rate": 9.816650437469744e-06, "loss": 0.8166, "step": 2216 }, { "epoch": 0.11393771199506629, "grad_norm": 1.0865901708602905, "learning_rate": 9.816427061078872e-06, "loss": 0.7737, "step": 2217 }, { "epoch": 0.11398910473841094, "grad_norm": 1.1675657033920288, "learning_rate": 9.816203551244686e-06, "loss": 0.8598, "step": 2218 }, { "epoch": 0.11404049748175557, "grad_norm": 1.155751347541809, "learning_rate": 9.815979907973373e-06, "loss": 0.8249, "step": 2219 }, { "epoch": 0.11409189022510022, "grad_norm": 1.0168051719665527, "learning_rate": 9.81575613127113e-06, "loss": 0.7335, "step": 2220 }, { "epoch": 0.11414328296844485, "grad_norm": 1.156654715538025, "learning_rate": 9.81553222114416e-06, "loss": 0.8244, "step": 2221 }, { "epoch": 0.1141946757117895, "grad_norm": 1.1916625499725342, "learning_rate": 9.815308177598664e-06, "loss": 0.8108, "step": 2222 }, { "epoch": 0.11424606845513413, "grad_norm": 1.0490589141845703, "learning_rate": 9.815084000640851e-06, "loss": 0.8074, "step": 2223 }, { "epoch": 0.11429746119847878, "grad_norm": 1.127611517906189, "learning_rate": 9.81485969027693e-06, "loss": 0.7945, "step": 2224 }, { "epoch": 0.11434885394182341, "grad_norm": 1.0956743955612183, "learning_rate": 9.814635246513117e-06, "loss": 0.7805, "step": 2225 }, { "epoch": 0.11440024668516806, "grad_norm": 0.8723738789558411, "learning_rate": 9.81441066935563e-06, "loss": 0.7058, "step": 2226 }, { "epoch": 0.11445163942851269, "grad_norm": 1.197574257850647, "learning_rate": 9.814185958810692e-06, "loss": 0.804, "step": 2227 }, { "epoch": 0.11450303217185734, "grad_norm": 1.2121386528015137, "learning_rate": 9.813961114884527e-06, "loss": 0.8049, "step": 2228 }, { "epoch": 0.11455442491520197, "grad_norm": 1.231401801109314, "learning_rate": 9.813736137583366e-06, "loss": 0.848, "step": 2229 }, { "epoch": 0.11460581765854662, "grad_norm": 0.7662510871887207, "learning_rate": 9.813511026913442e-06, "loss": 0.7384, "step": 2230 }, { "epoch": 0.11465721040189125, "grad_norm": 0.8181769251823425, "learning_rate": 9.81328578288099e-06, "loss": 0.7442, "step": 2231 }, { "epoch": 0.1147086031452359, "grad_norm": 1.1366883516311646, "learning_rate": 9.813060405492254e-06, "loss": 0.8171, "step": 2232 }, { "epoch": 0.11475999588858053, "grad_norm": 1.1009812355041504, "learning_rate": 9.812834894753476e-06, "loss": 0.7749, "step": 2233 }, { "epoch": 0.11481138863192517, "grad_norm": 1.388122320175171, "learning_rate": 9.812609250670904e-06, "loss": 0.7959, "step": 2234 }, { "epoch": 0.11486278137526981, "grad_norm": 1.1714955568313599, "learning_rate": 9.81238347325079e-06, "loss": 0.855, "step": 2235 }, { "epoch": 0.11491417411861445, "grad_norm": 1.2079038619995117, "learning_rate": 9.812157562499391e-06, "loss": 0.8214, "step": 2236 }, { "epoch": 0.11496556686195909, "grad_norm": 1.1966966390609741, "learning_rate": 9.811931518422963e-06, "loss": 0.8083, "step": 2237 }, { "epoch": 0.11501695960530373, "grad_norm": 1.0508400201797485, "learning_rate": 9.811705341027772e-06, "loss": 0.7889, "step": 2238 }, { "epoch": 0.11506835234864837, "grad_norm": 1.2941479682922363, "learning_rate": 9.811479030320081e-06, "loss": 0.7083, "step": 2239 }, { "epoch": 0.115119745091993, "grad_norm": 0.8416644334793091, "learning_rate": 9.811252586306164e-06, "loss": 0.7267, "step": 2240 }, { "epoch": 0.11517113783533765, "grad_norm": 1.208182692527771, "learning_rate": 9.81102600899229e-06, "loss": 0.8419, "step": 2241 }, { "epoch": 0.11522253057868229, "grad_norm": 0.7682667374610901, "learning_rate": 9.810799298384742e-06, "loss": 0.7507, "step": 2242 }, { "epoch": 0.11527392332202693, "grad_norm": 1.1417720317840576, "learning_rate": 9.810572454489796e-06, "loss": 0.7738, "step": 2243 }, { "epoch": 0.11532531606537157, "grad_norm": 1.1823337078094482, "learning_rate": 9.81034547731374e-06, "loss": 0.8585, "step": 2244 }, { "epoch": 0.11537670880871621, "grad_norm": 0.9936612844467163, "learning_rate": 9.810118366862862e-06, "loss": 0.7301, "step": 2245 }, { "epoch": 0.11542810155206085, "grad_norm": 1.2431813478469849, "learning_rate": 9.809891123143455e-06, "loss": 0.8515, "step": 2246 }, { "epoch": 0.1154794942954055, "grad_norm": 1.1989020109176636, "learning_rate": 9.809663746161812e-06, "loss": 0.8486, "step": 2247 }, { "epoch": 0.11553088703875013, "grad_norm": 1.1301652193069458, "learning_rate": 9.809436235924237e-06, "loss": 0.8482, "step": 2248 }, { "epoch": 0.11558227978209477, "grad_norm": 1.1326934099197388, "learning_rate": 9.809208592437032e-06, "loss": 0.8063, "step": 2249 }, { "epoch": 0.11563367252543941, "grad_norm": 0.7685805559158325, "learning_rate": 9.808980815706502e-06, "loss": 0.7051, "step": 2250 }, { "epoch": 0.11568506526878405, "grad_norm": 1.1498092412948608, "learning_rate": 9.808752905738958e-06, "loss": 0.7975, "step": 2251 }, { "epoch": 0.11573645801212869, "grad_norm": 1.1571431159973145, "learning_rate": 9.808524862540718e-06, "loss": 0.8442, "step": 2252 }, { "epoch": 0.11578785075547332, "grad_norm": 1.2044833898544312, "learning_rate": 9.808296686118097e-06, "loss": 0.8168, "step": 2253 }, { "epoch": 0.11583924349881797, "grad_norm": 1.1793773174285889, "learning_rate": 9.808068376477415e-06, "loss": 0.8131, "step": 2254 }, { "epoch": 0.1158906362421626, "grad_norm": 1.0460063219070435, "learning_rate": 9.807839933625003e-06, "loss": 0.7302, "step": 2255 }, { "epoch": 0.11594202898550725, "grad_norm": 1.1322252750396729, "learning_rate": 9.807611357567185e-06, "loss": 0.8225, "step": 2256 }, { "epoch": 0.11599342172885188, "grad_norm": 1.0627758502960205, "learning_rate": 9.807382648310298e-06, "loss": 0.7657, "step": 2257 }, { "epoch": 0.11604481447219653, "grad_norm": 1.160506248474121, "learning_rate": 9.807153805860676e-06, "loss": 0.7815, "step": 2258 }, { "epoch": 0.11609620721554116, "grad_norm": 1.1657752990722656, "learning_rate": 9.80692483022466e-06, "loss": 0.8051, "step": 2259 }, { "epoch": 0.11614759995888581, "grad_norm": 1.2565639019012451, "learning_rate": 9.806695721408595e-06, "loss": 0.8148, "step": 2260 }, { "epoch": 0.11619899270223044, "grad_norm": 1.1335959434509277, "learning_rate": 9.806466479418826e-06, "loss": 0.8249, "step": 2261 }, { "epoch": 0.11625038544557509, "grad_norm": 1.3181891441345215, "learning_rate": 9.806237104261706e-06, "loss": 0.877, "step": 2262 }, { "epoch": 0.11630177818891972, "grad_norm": 1.1931564807891846, "learning_rate": 9.806007595943593e-06, "loss": 0.8316, "step": 2263 }, { "epoch": 0.11635317093226437, "grad_norm": 0.9158135652542114, "learning_rate": 9.805777954470839e-06, "loss": 0.7474, "step": 2264 }, { "epoch": 0.116404563675609, "grad_norm": 1.092392921447754, "learning_rate": 9.80554817984981e-06, "loss": 0.7904, "step": 2265 }, { "epoch": 0.11645595641895365, "grad_norm": 1.1873093843460083, "learning_rate": 9.805318272086874e-06, "loss": 0.8432, "step": 2266 }, { "epoch": 0.11650734916229828, "grad_norm": 1.1701726913452148, "learning_rate": 9.8050882311884e-06, "loss": 0.8385, "step": 2267 }, { "epoch": 0.11655874190564293, "grad_norm": 1.1609998941421509, "learning_rate": 9.804858057160758e-06, "loss": 0.7997, "step": 2268 }, { "epoch": 0.11661013464898756, "grad_norm": 1.1385737657546997, "learning_rate": 9.804627750010329e-06, "loss": 0.7694, "step": 2269 }, { "epoch": 0.11666152739233221, "grad_norm": 1.19200599193573, "learning_rate": 9.804397309743493e-06, "loss": 0.8275, "step": 2270 }, { "epoch": 0.11671292013567684, "grad_norm": 1.3303487300872803, "learning_rate": 9.804166736366635e-06, "loss": 0.8326, "step": 2271 }, { "epoch": 0.11676431287902148, "grad_norm": 0.9244875311851501, "learning_rate": 9.803936029886141e-06, "loss": 0.7688, "step": 2272 }, { "epoch": 0.11681570562236612, "grad_norm": 1.2007381916046143, "learning_rate": 9.803705190308404e-06, "loss": 0.8164, "step": 2273 }, { "epoch": 0.11686709836571076, "grad_norm": 1.1963053941726685, "learning_rate": 9.803474217639821e-06, "loss": 0.8646, "step": 2274 }, { "epoch": 0.1169184911090554, "grad_norm": 1.2735464572906494, "learning_rate": 9.803243111886788e-06, "loss": 0.8183, "step": 2275 }, { "epoch": 0.11696988385240004, "grad_norm": 1.1631420850753784, "learning_rate": 9.803011873055713e-06, "loss": 0.8074, "step": 2276 }, { "epoch": 0.11702127659574468, "grad_norm": 1.1905298233032227, "learning_rate": 9.802780501152997e-06, "loss": 0.8607, "step": 2277 }, { "epoch": 0.11707266933908932, "grad_norm": 0.8752067685127258, "learning_rate": 9.802548996185056e-06, "loss": 0.7716, "step": 2278 }, { "epoch": 0.11712406208243396, "grad_norm": 1.2684789896011353, "learning_rate": 9.8023173581583e-06, "loss": 0.8314, "step": 2279 }, { "epoch": 0.1171754548257786, "grad_norm": 1.1342294216156006, "learning_rate": 9.802085587079147e-06, "loss": 0.749, "step": 2280 }, { "epoch": 0.11722684756912324, "grad_norm": 1.1198534965515137, "learning_rate": 9.80185368295402e-06, "loss": 0.7954, "step": 2281 }, { "epoch": 0.11727824031246788, "grad_norm": 1.0954395532608032, "learning_rate": 9.801621645789346e-06, "loss": 0.7939, "step": 2282 }, { "epoch": 0.11732963305581252, "grad_norm": 1.151352882385254, "learning_rate": 9.801389475591548e-06, "loss": 0.8916, "step": 2283 }, { "epoch": 0.11738102579915716, "grad_norm": 1.222489356994629, "learning_rate": 9.801157172367064e-06, "loss": 0.811, "step": 2284 }, { "epoch": 0.1174324185425018, "grad_norm": 1.1428749561309814, "learning_rate": 9.800924736122326e-06, "loss": 0.8499, "step": 2285 }, { "epoch": 0.11748381128584644, "grad_norm": 1.2226756811141968, "learning_rate": 9.800692166863777e-06, "loss": 0.8123, "step": 2286 }, { "epoch": 0.11753520402919108, "grad_norm": 0.7957601547241211, "learning_rate": 9.80045946459786e-06, "loss": 0.7308, "step": 2287 }, { "epoch": 0.11758659677253572, "grad_norm": 1.1141963005065918, "learning_rate": 9.80022662933102e-06, "loss": 0.7744, "step": 2288 }, { "epoch": 0.11763798951588036, "grad_norm": 1.1813395023345947, "learning_rate": 9.799993661069712e-06, "loss": 0.7754, "step": 2289 }, { "epoch": 0.117689382259225, "grad_norm": 1.1548686027526855, "learning_rate": 9.799760559820385e-06, "loss": 0.8755, "step": 2290 }, { "epoch": 0.11774077500256963, "grad_norm": 0.8712412118911743, "learning_rate": 9.799527325589503e-06, "loss": 0.7138, "step": 2291 }, { "epoch": 0.11779216774591428, "grad_norm": 1.1727826595306396, "learning_rate": 9.799293958383523e-06, "loss": 0.8311, "step": 2292 }, { "epoch": 0.11784356048925891, "grad_norm": 1.1654831171035767, "learning_rate": 9.799060458208913e-06, "loss": 0.8075, "step": 2293 }, { "epoch": 0.11789495323260356, "grad_norm": 1.1513378620147705, "learning_rate": 9.798826825072144e-06, "loss": 0.8108, "step": 2294 }, { "epoch": 0.11794634597594819, "grad_norm": 1.213681936264038, "learning_rate": 9.798593058979686e-06, "loss": 0.9134, "step": 2295 }, { "epoch": 0.11799773871929284, "grad_norm": 1.1846833229064941, "learning_rate": 9.798359159938017e-06, "loss": 0.7895, "step": 2296 }, { "epoch": 0.11804913146263747, "grad_norm": 1.0933934450149536, "learning_rate": 9.798125127953617e-06, "loss": 0.836, "step": 2297 }, { "epoch": 0.11810052420598212, "grad_norm": 1.159136414527893, "learning_rate": 9.797890963032972e-06, "loss": 0.8006, "step": 2298 }, { "epoch": 0.11815191694932675, "grad_norm": 0.8138665556907654, "learning_rate": 9.797656665182567e-06, "loss": 0.7267, "step": 2299 }, { "epoch": 0.1182033096926714, "grad_norm": 1.2907710075378418, "learning_rate": 9.797422234408896e-06, "loss": 0.7974, "step": 2300 }, { "epoch": 0.11825470243601603, "grad_norm": 1.0957804918289185, "learning_rate": 9.79718767071845e-06, "loss": 0.8965, "step": 2301 }, { "epoch": 0.11830609517936068, "grad_norm": 1.2075046300888062, "learning_rate": 9.796952974117732e-06, "loss": 0.8354, "step": 2302 }, { "epoch": 0.11835748792270531, "grad_norm": 1.2988404035568237, "learning_rate": 9.796718144613242e-06, "loss": 0.8473, "step": 2303 }, { "epoch": 0.11840888066604996, "grad_norm": 1.2331697940826416, "learning_rate": 9.796483182211488e-06, "loss": 0.8433, "step": 2304 }, { "epoch": 0.11846027340939459, "grad_norm": 0.8854478001594543, "learning_rate": 9.79624808691898e-06, "loss": 0.7321, "step": 2305 }, { "epoch": 0.11851166615273924, "grad_norm": 1.151158332824707, "learning_rate": 9.79601285874223e-06, "loss": 0.791, "step": 2306 }, { "epoch": 0.11856305889608387, "grad_norm": 1.1608421802520752, "learning_rate": 9.795777497687755e-06, "loss": 0.8245, "step": 2307 }, { "epoch": 0.1186144516394285, "grad_norm": 1.1874397993087769, "learning_rate": 9.795542003762076e-06, "loss": 0.7834, "step": 2308 }, { "epoch": 0.11866584438277315, "grad_norm": 0.7717701196670532, "learning_rate": 9.795306376971719e-06, "loss": 0.7241, "step": 2309 }, { "epoch": 0.11871723712611779, "grad_norm": 1.1602261066436768, "learning_rate": 9.795070617323211e-06, "loss": 0.8437, "step": 2310 }, { "epoch": 0.11876862986946243, "grad_norm": 1.1960617303848267, "learning_rate": 9.794834724823084e-06, "loss": 0.8495, "step": 2311 }, { "epoch": 0.11882002261280707, "grad_norm": 1.1137713193893433, "learning_rate": 9.794598699477874e-06, "loss": 0.7788, "step": 2312 }, { "epoch": 0.11887141535615171, "grad_norm": 0.7985857129096985, "learning_rate": 9.79436254129412e-06, "loss": 0.6837, "step": 2313 }, { "epoch": 0.11892280809949635, "grad_norm": 1.001046061515808, "learning_rate": 9.794126250278366e-06, "loss": 0.7176, "step": 2314 }, { "epoch": 0.118974200842841, "grad_norm": 0.8436415791511536, "learning_rate": 9.793889826437158e-06, "loss": 0.7105, "step": 2315 }, { "epoch": 0.11902559358618563, "grad_norm": 1.1825486421585083, "learning_rate": 9.793653269777043e-06, "loss": 0.8236, "step": 2316 }, { "epoch": 0.11907698632953027, "grad_norm": 1.1977462768554688, "learning_rate": 9.793416580304582e-06, "loss": 0.8774, "step": 2317 }, { "epoch": 0.11912837907287491, "grad_norm": 0.8693056702613831, "learning_rate": 9.793179758026328e-06, "loss": 0.6691, "step": 2318 }, { "epoch": 0.11917977181621955, "grad_norm": 0.8027037978172302, "learning_rate": 9.792942802948842e-06, "loss": 0.6605, "step": 2319 }, { "epoch": 0.11923116455956419, "grad_norm": 1.1929094791412354, "learning_rate": 9.792705715078691e-06, "loss": 0.7963, "step": 2320 }, { "epoch": 0.11928255730290883, "grad_norm": 1.2470293045043945, "learning_rate": 9.792468494422443e-06, "loss": 0.8188, "step": 2321 }, { "epoch": 0.11933395004625347, "grad_norm": 1.083120584487915, "learning_rate": 9.79223114098667e-06, "loss": 0.8888, "step": 2322 }, { "epoch": 0.11938534278959811, "grad_norm": 0.7924959659576416, "learning_rate": 9.791993654777949e-06, "loss": 0.7239, "step": 2323 }, { "epoch": 0.11943673553294275, "grad_norm": 1.1215131282806396, "learning_rate": 9.791756035802858e-06, "loss": 0.786, "step": 2324 }, { "epoch": 0.1194881282762874, "grad_norm": 1.283737063407898, "learning_rate": 9.791518284067983e-06, "loss": 0.79, "step": 2325 }, { "epoch": 0.11953952101963203, "grad_norm": 1.1455661058425903, "learning_rate": 9.79128039957991e-06, "loss": 0.8132, "step": 2326 }, { "epoch": 0.11959091376297666, "grad_norm": 1.1607595682144165, "learning_rate": 9.791042382345227e-06, "loss": 0.7933, "step": 2327 }, { "epoch": 0.11964230650632131, "grad_norm": 1.1484507322311401, "learning_rate": 9.790804232370533e-06, "loss": 0.7774, "step": 2328 }, { "epoch": 0.11969369924966594, "grad_norm": 1.0853508710861206, "learning_rate": 9.790565949662425e-06, "loss": 0.7915, "step": 2329 }, { "epoch": 0.11974509199301059, "grad_norm": 0.9143819212913513, "learning_rate": 9.790327534227502e-06, "loss": 0.7331, "step": 2330 }, { "epoch": 0.11979648473635522, "grad_norm": 1.2360515594482422, "learning_rate": 9.790088986072372e-06, "loss": 0.7699, "step": 2331 }, { "epoch": 0.11984787747969987, "grad_norm": 1.1892566680908203, "learning_rate": 9.789850305203646e-06, "loss": 0.8275, "step": 2332 }, { "epoch": 0.1198992702230445, "grad_norm": 1.2155404090881348, "learning_rate": 9.789611491627933e-06, "loss": 0.7942, "step": 2333 }, { "epoch": 0.11995066296638915, "grad_norm": 1.1557867527008057, "learning_rate": 9.789372545351851e-06, "loss": 0.8055, "step": 2334 }, { "epoch": 0.12000205570973378, "grad_norm": 1.1441162824630737, "learning_rate": 9.78913346638202e-06, "loss": 0.8614, "step": 2335 }, { "epoch": 0.12005344845307843, "grad_norm": 1.1550605297088623, "learning_rate": 9.788894254725065e-06, "loss": 0.799, "step": 2336 }, { "epoch": 0.12010484119642306, "grad_norm": 1.201387643814087, "learning_rate": 9.788654910387611e-06, "loss": 0.8131, "step": 2337 }, { "epoch": 0.12015623393976771, "grad_norm": 1.1856136322021484, "learning_rate": 9.788415433376293e-06, "loss": 0.8414, "step": 2338 }, { "epoch": 0.12020762668311234, "grad_norm": 1.193785309791565, "learning_rate": 9.788175823697744e-06, "loss": 0.8332, "step": 2339 }, { "epoch": 0.12025901942645699, "grad_norm": 0.8861750960350037, "learning_rate": 9.787936081358602e-06, "loss": 0.7092, "step": 2340 }, { "epoch": 0.12031041216980162, "grad_norm": 0.7914197444915771, "learning_rate": 9.78769620636551e-06, "loss": 0.6724, "step": 2341 }, { "epoch": 0.12036180491314627, "grad_norm": 1.2197389602661133, "learning_rate": 9.787456198725114e-06, "loss": 0.8405, "step": 2342 }, { "epoch": 0.1204131976564909, "grad_norm": 1.2777796983718872, "learning_rate": 9.787216058444063e-06, "loss": 0.8338, "step": 2343 }, { "epoch": 0.12046459039983555, "grad_norm": 1.1930240392684937, "learning_rate": 9.78697578552901e-06, "loss": 0.8247, "step": 2344 }, { "epoch": 0.12051598314318018, "grad_norm": 1.2151107788085938, "learning_rate": 9.786735379986613e-06, "loss": 0.8665, "step": 2345 }, { "epoch": 0.12056737588652482, "grad_norm": 0.7986428141593933, "learning_rate": 9.786494841823534e-06, "loss": 0.7414, "step": 2346 }, { "epoch": 0.12061876862986946, "grad_norm": 1.2018343210220337, "learning_rate": 9.786254171046434e-06, "loss": 0.829, "step": 2347 }, { "epoch": 0.1206701613732141, "grad_norm": 1.1624906063079834, "learning_rate": 9.786013367661982e-06, "loss": 0.8097, "step": 2348 }, { "epoch": 0.12072155411655874, "grad_norm": 1.1820101737976074, "learning_rate": 9.785772431676852e-06, "loss": 0.8393, "step": 2349 }, { "epoch": 0.12077294685990338, "grad_norm": 1.219596028327942, "learning_rate": 9.785531363097718e-06, "loss": 0.8454, "step": 2350 }, { "epoch": 0.12082433960324802, "grad_norm": 1.1194829940795898, "learning_rate": 9.785290161931256e-06, "loss": 0.8882, "step": 2351 }, { "epoch": 0.12087573234659266, "grad_norm": 1.1620408296585083, "learning_rate": 9.785048828184153e-06, "loss": 0.849, "step": 2352 }, { "epoch": 0.1209271250899373, "grad_norm": 1.1587157249450684, "learning_rate": 9.784807361863094e-06, "loss": 0.8306, "step": 2353 }, { "epoch": 0.12097851783328194, "grad_norm": 1.1986109018325806, "learning_rate": 9.784565762974768e-06, "loss": 0.7667, "step": 2354 }, { "epoch": 0.12102991057662658, "grad_norm": 1.1149792671203613, "learning_rate": 9.78432403152587e-06, "loss": 0.7846, "step": 2355 }, { "epoch": 0.12108130331997122, "grad_norm": 1.1920340061187744, "learning_rate": 9.784082167523097e-06, "loss": 0.8115, "step": 2356 }, { "epoch": 0.12113269606331586, "grad_norm": 1.2083313465118408, "learning_rate": 9.783840170973149e-06, "loss": 0.8229, "step": 2357 }, { "epoch": 0.1211840888066605, "grad_norm": 0.8737711310386658, "learning_rate": 9.783598041882732e-06, "loss": 0.7323, "step": 2358 }, { "epoch": 0.12123548155000514, "grad_norm": 1.077492594718933, "learning_rate": 9.783355780258554e-06, "loss": 0.779, "step": 2359 }, { "epoch": 0.12128687429334978, "grad_norm": 0.8473538756370544, "learning_rate": 9.783113386107328e-06, "loss": 0.7365, "step": 2360 }, { "epoch": 0.12133826703669442, "grad_norm": 1.2189054489135742, "learning_rate": 9.782870859435768e-06, "loss": 0.8042, "step": 2361 }, { "epoch": 0.12138965978003906, "grad_norm": 1.0938615798950195, "learning_rate": 9.782628200250595e-06, "loss": 0.8019, "step": 2362 }, { "epoch": 0.1214410525233837, "grad_norm": 1.0917021036148071, "learning_rate": 9.78238540855853e-06, "loss": 0.773, "step": 2363 }, { "epoch": 0.12149244526672834, "grad_norm": 1.108105182647705, "learning_rate": 9.782142484366301e-06, "loss": 0.8194, "step": 2364 }, { "epoch": 0.12154383801007297, "grad_norm": 1.1767961978912354, "learning_rate": 9.78189942768064e-06, "loss": 0.8348, "step": 2365 }, { "epoch": 0.12159523075341762, "grad_norm": 1.179188847541809, "learning_rate": 9.781656238508279e-06, "loss": 0.8172, "step": 2366 }, { "epoch": 0.12164662349676225, "grad_norm": 1.1754181385040283, "learning_rate": 9.781412916855954e-06, "loss": 0.8896, "step": 2367 }, { "epoch": 0.1216980162401069, "grad_norm": 1.1560802459716797, "learning_rate": 9.781169462730412e-06, "loss": 0.7877, "step": 2368 }, { "epoch": 0.12174940898345153, "grad_norm": 1.171472191810608, "learning_rate": 9.780925876138393e-06, "loss": 0.8424, "step": 2369 }, { "epoch": 0.12180080172679618, "grad_norm": 1.1786916255950928, "learning_rate": 9.78068215708665e-06, "loss": 0.8786, "step": 2370 }, { "epoch": 0.12185219447014081, "grad_norm": 0.7980872392654419, "learning_rate": 9.780438305581931e-06, "loss": 0.7392, "step": 2371 }, { "epoch": 0.12190358721348546, "grad_norm": 1.3483103513717651, "learning_rate": 9.780194321630996e-06, "loss": 0.7793, "step": 2372 }, { "epoch": 0.12195497995683009, "grad_norm": 0.9097517132759094, "learning_rate": 9.7799502052406e-06, "loss": 0.7196, "step": 2373 }, { "epoch": 0.12200637270017474, "grad_norm": 0.7508184313774109, "learning_rate": 9.77970595641751e-06, "loss": 0.7666, "step": 2374 }, { "epoch": 0.12205776544351937, "grad_norm": 1.1788885593414307, "learning_rate": 9.779461575168497e-06, "loss": 0.7951, "step": 2375 }, { "epoch": 0.12210915818686402, "grad_norm": 1.1702628135681152, "learning_rate": 9.779217061500324e-06, "loss": 0.831, "step": 2376 }, { "epoch": 0.12216055093020865, "grad_norm": 0.8389960527420044, "learning_rate": 9.778972415419768e-06, "loss": 0.7138, "step": 2377 }, { "epoch": 0.1222119436735533, "grad_norm": 1.1207269430160522, "learning_rate": 9.77872763693361e-06, "loss": 0.7974, "step": 2378 }, { "epoch": 0.12226333641689793, "grad_norm": 0.7351743578910828, "learning_rate": 9.77848272604863e-06, "loss": 0.7194, "step": 2379 }, { "epoch": 0.12231472916024258, "grad_norm": 1.4855936765670776, "learning_rate": 9.778237682771612e-06, "loss": 0.7872, "step": 2380 }, { "epoch": 0.12236612190358721, "grad_norm": 1.1282931566238403, "learning_rate": 9.777992507109345e-06, "loss": 0.8238, "step": 2381 }, { "epoch": 0.12241751464693185, "grad_norm": 1.102705717086792, "learning_rate": 9.777747199068626e-06, "loss": 0.8485, "step": 2382 }, { "epoch": 0.1224689073902765, "grad_norm": 1.090747594833374, "learning_rate": 9.777501758656249e-06, "loss": 0.781, "step": 2383 }, { "epoch": 0.12252030013362113, "grad_norm": 1.1472095251083374, "learning_rate": 9.777256185879012e-06, "loss": 0.7966, "step": 2384 }, { "epoch": 0.12257169287696577, "grad_norm": 1.0857065916061401, "learning_rate": 9.77701048074372e-06, "loss": 0.7938, "step": 2385 }, { "epoch": 0.1226230856203104, "grad_norm": 1.1367560625076294, "learning_rate": 9.776764643257184e-06, "loss": 0.802, "step": 2386 }, { "epoch": 0.12267447836365505, "grad_norm": 1.0472407341003418, "learning_rate": 9.77651867342621e-06, "loss": 0.825, "step": 2387 }, { "epoch": 0.12272587110699969, "grad_norm": 1.086228609085083, "learning_rate": 9.776272571257617e-06, "loss": 0.8402, "step": 2388 }, { "epoch": 0.12277726385034433, "grad_norm": 1.2209358215332031, "learning_rate": 9.77602633675822e-06, "loss": 0.8282, "step": 2389 }, { "epoch": 0.12282865659368897, "grad_norm": 1.1555324792861938, "learning_rate": 9.775779969934842e-06, "loss": 0.8823, "step": 2390 }, { "epoch": 0.12288004933703361, "grad_norm": 1.0583807229995728, "learning_rate": 9.775533470794312e-06, "loss": 0.6895, "step": 2391 }, { "epoch": 0.12293144208037825, "grad_norm": 1.1415427923202515, "learning_rate": 9.775286839343456e-06, "loss": 0.7884, "step": 2392 }, { "epoch": 0.1229828348237229, "grad_norm": 1.2491494417190552, "learning_rate": 9.775040075589107e-06, "loss": 0.8348, "step": 2393 }, { "epoch": 0.12303422756706753, "grad_norm": 1.210504174232483, "learning_rate": 9.774793179538104e-06, "loss": 0.9094, "step": 2394 }, { "epoch": 0.12308562031041217, "grad_norm": 1.1285442113876343, "learning_rate": 9.774546151197285e-06, "loss": 0.7987, "step": 2395 }, { "epoch": 0.12313701305375681, "grad_norm": 1.1042815446853638, "learning_rate": 9.774298990573499e-06, "loss": 0.7701, "step": 2396 }, { "epoch": 0.12318840579710146, "grad_norm": 1.1584590673446655, "learning_rate": 9.774051697673587e-06, "loss": 0.8272, "step": 2397 }, { "epoch": 0.12323979854044609, "grad_norm": 1.1461942195892334, "learning_rate": 9.773804272504404e-06, "loss": 0.8311, "step": 2398 }, { "epoch": 0.12329119128379074, "grad_norm": 1.1121602058410645, "learning_rate": 9.773556715072806e-06, "loss": 0.8718, "step": 2399 }, { "epoch": 0.12334258402713537, "grad_norm": 0.8707857728004456, "learning_rate": 9.773309025385652e-06, "loss": 0.7157, "step": 2400 }, { "epoch": 0.12339397677048, "grad_norm": 1.0973265171051025, "learning_rate": 9.7730612034498e-06, "loss": 0.8048, "step": 2401 }, { "epoch": 0.12344536951382465, "grad_norm": 1.1713505983352661, "learning_rate": 9.772813249272121e-06, "loss": 0.8723, "step": 2402 }, { "epoch": 0.12349676225716928, "grad_norm": 1.1715407371520996, "learning_rate": 9.772565162859483e-06, "loss": 0.8259, "step": 2403 }, { "epoch": 0.12354815500051393, "grad_norm": 1.0965425968170166, "learning_rate": 9.77231694421876e-06, "loss": 0.8943, "step": 2404 }, { "epoch": 0.12359954774385856, "grad_norm": 1.1724035739898682, "learning_rate": 9.772068593356829e-06, "loss": 0.8144, "step": 2405 }, { "epoch": 0.12365094048720321, "grad_norm": 1.1090623140335083, "learning_rate": 9.77182011028057e-06, "loss": 0.8161, "step": 2406 }, { "epoch": 0.12370233323054784, "grad_norm": 1.2380086183547974, "learning_rate": 9.771571494996866e-06, "loss": 0.9177, "step": 2407 }, { "epoch": 0.12375372597389249, "grad_norm": 1.1686373949050903, "learning_rate": 9.77132274751261e-06, "loss": 0.8012, "step": 2408 }, { "epoch": 0.12380511871723712, "grad_norm": 1.174485206604004, "learning_rate": 9.771073867834689e-06, "loss": 0.7706, "step": 2409 }, { "epoch": 0.12385651146058177, "grad_norm": 1.1513538360595703, "learning_rate": 9.77082485597e-06, "loss": 0.8249, "step": 2410 }, { "epoch": 0.1239079042039264, "grad_norm": 1.2050405740737915, "learning_rate": 9.770575711925443e-06, "loss": 0.8024, "step": 2411 }, { "epoch": 0.12395929694727105, "grad_norm": 1.0723958015441895, "learning_rate": 9.77032643570792e-06, "loss": 0.821, "step": 2412 }, { "epoch": 0.12401068969061568, "grad_norm": 1.1408740282058716, "learning_rate": 9.770077027324338e-06, "loss": 0.8116, "step": 2413 }, { "epoch": 0.12406208243396033, "grad_norm": 1.134290099143982, "learning_rate": 9.769827486781605e-06, "loss": 0.7669, "step": 2414 }, { "epoch": 0.12411347517730496, "grad_norm": 1.1239155530929565, "learning_rate": 9.769577814086638e-06, "loss": 0.8213, "step": 2415 }, { "epoch": 0.12416486792064961, "grad_norm": 1.100677728652954, "learning_rate": 9.76932800924635e-06, "loss": 0.8276, "step": 2416 }, { "epoch": 0.12421626066399424, "grad_norm": 0.9586683511734009, "learning_rate": 9.769078072267668e-06, "loss": 0.6942, "step": 2417 }, { "epoch": 0.12426765340733889, "grad_norm": 1.1630403995513916, "learning_rate": 9.76882800315751e-06, "loss": 0.8387, "step": 2418 }, { "epoch": 0.12431904615068352, "grad_norm": 1.1736809015274048, "learning_rate": 9.76857780192281e-06, "loss": 0.849, "step": 2419 }, { "epoch": 0.12437043889402816, "grad_norm": 1.1257340908050537, "learning_rate": 9.768327468570498e-06, "loss": 0.8389, "step": 2420 }, { "epoch": 0.1244218316373728, "grad_norm": 0.8188894391059875, "learning_rate": 9.76807700310751e-06, "loss": 0.6977, "step": 2421 }, { "epoch": 0.12447322438071744, "grad_norm": 1.1124860048294067, "learning_rate": 9.767826405540784e-06, "loss": 0.8248, "step": 2422 }, { "epoch": 0.12452461712406208, "grad_norm": 1.0898975133895874, "learning_rate": 9.767575675877263e-06, "loss": 0.7911, "step": 2423 }, { "epoch": 0.12457600986740672, "grad_norm": 1.0893003940582275, "learning_rate": 9.767324814123896e-06, "loss": 0.8442, "step": 2424 }, { "epoch": 0.12462740261075136, "grad_norm": 1.3539092540740967, "learning_rate": 9.76707382028763e-06, "loss": 0.8346, "step": 2425 }, { "epoch": 0.124678795354096, "grad_norm": 1.19283926486969, "learning_rate": 9.766822694375424e-06, "loss": 0.8191, "step": 2426 }, { "epoch": 0.12473018809744064, "grad_norm": 1.1377184391021729, "learning_rate": 9.766571436394232e-06, "loss": 0.8725, "step": 2427 }, { "epoch": 0.12478158084078528, "grad_norm": 1.1395691633224487, "learning_rate": 9.766320046351015e-06, "loss": 0.8504, "step": 2428 }, { "epoch": 0.12483297358412992, "grad_norm": 1.0948843955993652, "learning_rate": 9.766068524252739e-06, "loss": 0.8183, "step": 2429 }, { "epoch": 0.12488436632747456, "grad_norm": 1.196698546409607, "learning_rate": 9.765816870106373e-06, "loss": 0.8508, "step": 2430 }, { "epoch": 0.1249357590708192, "grad_norm": 1.1633023023605347, "learning_rate": 9.765565083918889e-06, "loss": 0.8811, "step": 2431 }, { "epoch": 0.12498715181416384, "grad_norm": 0.9639090299606323, "learning_rate": 9.765313165697263e-06, "loss": 0.6867, "step": 2432 }, { "epoch": 0.12503854455750849, "grad_norm": 1.1229802370071411, "learning_rate": 9.765061115448474e-06, "loss": 0.8122, "step": 2433 }, { "epoch": 0.12508993730085313, "grad_norm": 0.7699605226516724, "learning_rate": 9.764808933179505e-06, "loss": 0.7129, "step": 2434 }, { "epoch": 0.12514133004419775, "grad_norm": 1.1316336393356323, "learning_rate": 9.764556618897345e-06, "loss": 0.8039, "step": 2435 }, { "epoch": 0.1251927227875424, "grad_norm": 0.7841988205909729, "learning_rate": 9.764304172608982e-06, "loss": 0.7109, "step": 2436 }, { "epoch": 0.12524411553088705, "grad_norm": 1.1598159074783325, "learning_rate": 9.764051594321412e-06, "loss": 0.8732, "step": 2437 }, { "epoch": 0.12529550827423167, "grad_norm": 1.162996768951416, "learning_rate": 9.763798884041634e-06, "loss": 0.7986, "step": 2438 }, { "epoch": 0.1253469010175763, "grad_norm": 1.1298846006393433, "learning_rate": 9.763546041776646e-06, "loss": 0.7717, "step": 2439 }, { "epoch": 0.12539829376092096, "grad_norm": 1.1655163764953613, "learning_rate": 9.763293067533455e-06, "loss": 0.7986, "step": 2440 }, { "epoch": 0.1254496865042656, "grad_norm": 1.1782069206237793, "learning_rate": 9.763039961319072e-06, "loss": 0.7931, "step": 2441 }, { "epoch": 0.12550107924761023, "grad_norm": 1.1473352909088135, "learning_rate": 9.762786723140505e-06, "loss": 0.7973, "step": 2442 }, { "epoch": 0.12555247199095487, "grad_norm": 0.9067972898483276, "learning_rate": 9.762533353004774e-06, "loss": 0.6754, "step": 2443 }, { "epoch": 0.12560386473429952, "grad_norm": 0.966131329536438, "learning_rate": 9.7622798509189e-06, "loss": 0.7291, "step": 2444 }, { "epoch": 0.12565525747764417, "grad_norm": 1.2590121030807495, "learning_rate": 9.7620262168899e-06, "loss": 0.8821, "step": 2445 }, { "epoch": 0.12570665022098879, "grad_norm": 1.1836737394332886, "learning_rate": 9.761772450924807e-06, "loss": 0.7902, "step": 2446 }, { "epoch": 0.12575804296433343, "grad_norm": 1.1875131130218506, "learning_rate": 9.761518553030651e-06, "loss": 0.7986, "step": 2447 }, { "epoch": 0.12580943570767808, "grad_norm": 0.8529171943664551, "learning_rate": 9.761264523214465e-06, "loss": 0.6867, "step": 2448 }, { "epoch": 0.12586082845102273, "grad_norm": 1.3119860887527466, "learning_rate": 9.761010361483287e-06, "loss": 0.8208, "step": 2449 }, { "epoch": 0.12591222119436735, "grad_norm": 1.1417913436889648, "learning_rate": 9.760756067844159e-06, "loss": 0.8314, "step": 2450 }, { "epoch": 0.125963613937712, "grad_norm": 1.2178484201431274, "learning_rate": 9.760501642304127e-06, "loss": 0.8432, "step": 2451 }, { "epoch": 0.12601500668105664, "grad_norm": 1.1822431087493896, "learning_rate": 9.760247084870242e-06, "loss": 0.8576, "step": 2452 }, { "epoch": 0.1260663994244013, "grad_norm": 1.1628656387329102, "learning_rate": 9.759992395549553e-06, "loss": 0.8169, "step": 2453 }, { "epoch": 0.1261177921677459, "grad_norm": 1.1693353652954102, "learning_rate": 9.759737574349118e-06, "loss": 0.8416, "step": 2454 }, { "epoch": 0.12616918491109055, "grad_norm": 0.8510347008705139, "learning_rate": 9.759482621275998e-06, "loss": 0.7307, "step": 2455 }, { "epoch": 0.1262205776544352, "grad_norm": 1.1376551389694214, "learning_rate": 9.759227536337254e-06, "loss": 0.8301, "step": 2456 }, { "epoch": 0.12627197039777982, "grad_norm": 1.2361568212509155, "learning_rate": 9.758972319539957e-06, "loss": 0.7843, "step": 2457 }, { "epoch": 0.12632336314112447, "grad_norm": 0.7573777437210083, "learning_rate": 9.758716970891174e-06, "loss": 0.7186, "step": 2458 }, { "epoch": 0.12637475588446911, "grad_norm": 0.7261207103729248, "learning_rate": 9.758461490397983e-06, "loss": 0.7094, "step": 2459 }, { "epoch": 0.12642614862781376, "grad_norm": 1.0854101181030273, "learning_rate": 9.758205878067461e-06, "loss": 0.8166, "step": 2460 }, { "epoch": 0.12647754137115838, "grad_norm": 1.09835684299469, "learning_rate": 9.75795013390669e-06, "loss": 0.8159, "step": 2461 }, { "epoch": 0.12652893411450303, "grad_norm": 0.9098944067955017, "learning_rate": 9.757694257922757e-06, "loss": 0.7334, "step": 2462 }, { "epoch": 0.12658032685784767, "grad_norm": 0.9366123080253601, "learning_rate": 9.757438250122749e-06, "loss": 0.7116, "step": 2463 }, { "epoch": 0.12663171960119232, "grad_norm": 1.224448561668396, "learning_rate": 9.75718211051376e-06, "loss": 0.8677, "step": 2464 }, { "epoch": 0.12668311234453694, "grad_norm": 1.0930780172348022, "learning_rate": 9.756925839102886e-06, "loss": 0.7819, "step": 2465 }, { "epoch": 0.1267345050878816, "grad_norm": 1.092489242553711, "learning_rate": 9.756669435897228e-06, "loss": 0.7784, "step": 2466 }, { "epoch": 0.12678589783122624, "grad_norm": 1.129995584487915, "learning_rate": 9.75641290090389e-06, "loss": 0.8004, "step": 2467 }, { "epoch": 0.12683729057457088, "grad_norm": 1.194913387298584, "learning_rate": 9.756156234129979e-06, "loss": 0.8121, "step": 2468 }, { "epoch": 0.1268886833179155, "grad_norm": 1.1954747438430786, "learning_rate": 9.755899435582605e-06, "loss": 0.8519, "step": 2469 }, { "epoch": 0.12694007606126015, "grad_norm": 1.1264718770980835, "learning_rate": 9.755642505268885e-06, "loss": 0.8498, "step": 2470 }, { "epoch": 0.1269914688046048, "grad_norm": 1.1738253831863403, "learning_rate": 9.755385443195938e-06, "loss": 0.8364, "step": 2471 }, { "epoch": 0.12704286154794944, "grad_norm": 1.0835410356521606, "learning_rate": 9.755128249370881e-06, "loss": 0.7762, "step": 2472 }, { "epoch": 0.12709425429129406, "grad_norm": 1.0523349046707153, "learning_rate": 9.754870923800846e-06, "loss": 0.7709, "step": 2473 }, { "epoch": 0.1271456470346387, "grad_norm": 1.088438630104065, "learning_rate": 9.75461346649296e-06, "loss": 0.7829, "step": 2474 }, { "epoch": 0.12719703977798336, "grad_norm": 1.112038493156433, "learning_rate": 9.754355877454355e-06, "loss": 0.7755, "step": 2475 }, { "epoch": 0.12724843252132798, "grad_norm": 1.238457441329956, "learning_rate": 9.75409815669217e-06, "loss": 0.8486, "step": 2476 }, { "epoch": 0.12729982526467262, "grad_norm": 1.1355949640274048, "learning_rate": 9.753840304213544e-06, "loss": 0.8167, "step": 2477 }, { "epoch": 0.12735121800801727, "grad_norm": 1.1541801691055298, "learning_rate": 9.753582320025619e-06, "loss": 0.786, "step": 2478 }, { "epoch": 0.12740261075136192, "grad_norm": 1.1330422163009644, "learning_rate": 9.753324204135548e-06, "loss": 0.8133, "step": 2479 }, { "epoch": 0.12745400349470654, "grad_norm": 1.1421079635620117, "learning_rate": 9.753065956550476e-06, "loss": 0.8388, "step": 2480 }, { "epoch": 0.12750539623805118, "grad_norm": 1.1337547302246094, "learning_rate": 9.752807577277563e-06, "loss": 0.8134, "step": 2481 }, { "epoch": 0.12755678898139583, "grad_norm": 1.1110343933105469, "learning_rate": 9.752549066323965e-06, "loss": 0.8118, "step": 2482 }, { "epoch": 0.12760818172474048, "grad_norm": 1.16692316532135, "learning_rate": 9.752290423696843e-06, "loss": 0.8378, "step": 2483 }, { "epoch": 0.1276595744680851, "grad_norm": 1.1433643102645874, "learning_rate": 9.752031649403368e-06, "loss": 0.8659, "step": 2484 }, { "epoch": 0.12771096721142974, "grad_norm": 1.1853506565093994, "learning_rate": 9.751772743450707e-06, "loss": 0.8415, "step": 2485 }, { "epoch": 0.1277623599547744, "grad_norm": 1.1135592460632324, "learning_rate": 9.751513705846032e-06, "loss": 0.8629, "step": 2486 }, { "epoch": 0.12781375269811904, "grad_norm": 1.139089822769165, "learning_rate": 9.751254536596519e-06, "loss": 0.8387, "step": 2487 }, { "epoch": 0.12786514544146366, "grad_norm": 1.2364814281463623, "learning_rate": 9.75099523570935e-06, "loss": 0.8424, "step": 2488 }, { "epoch": 0.1279165381848083, "grad_norm": 1.1059569120407104, "learning_rate": 9.75073580319171e-06, "loss": 0.8593, "step": 2489 }, { "epoch": 0.12796793092815295, "grad_norm": 0.875663697719574, "learning_rate": 9.750476239050786e-06, "loss": 0.6782, "step": 2490 }, { "epoch": 0.1280193236714976, "grad_norm": 1.1016995906829834, "learning_rate": 9.750216543293769e-06, "loss": 0.8821, "step": 2491 }, { "epoch": 0.12807071641484222, "grad_norm": 1.1005722284317017, "learning_rate": 9.749956715927856e-06, "loss": 0.8115, "step": 2492 }, { "epoch": 0.12812210915818686, "grad_norm": 1.2815628051757812, "learning_rate": 9.749696756960243e-06, "loss": 0.8868, "step": 2493 }, { "epoch": 0.1281735019015315, "grad_norm": 1.095913290977478, "learning_rate": 9.749436666398135e-06, "loss": 0.8262, "step": 2494 }, { "epoch": 0.12822489464487613, "grad_norm": 1.1588845252990723, "learning_rate": 9.749176444248734e-06, "loss": 0.8031, "step": 2495 }, { "epoch": 0.12827628738822078, "grad_norm": 1.123256802558899, "learning_rate": 9.748916090519256e-06, "loss": 0.7933, "step": 2496 }, { "epoch": 0.12832768013156542, "grad_norm": 1.1677677631378174, "learning_rate": 9.748655605216908e-06, "loss": 0.8397, "step": 2497 }, { "epoch": 0.12837907287491007, "grad_norm": 1.1567836999893188, "learning_rate": 9.74839498834891e-06, "loss": 0.8054, "step": 2498 }, { "epoch": 0.1284304656182547, "grad_norm": 1.0221271514892578, "learning_rate": 9.748134239922484e-06, "loss": 0.801, "step": 2499 }, { "epoch": 0.12848185836159934, "grad_norm": 1.0962082147598267, "learning_rate": 9.747873359944852e-06, "loss": 0.786, "step": 2500 }, { "epoch": 0.12853325110494399, "grad_norm": 1.1158311367034912, "learning_rate": 9.747612348423241e-06, "loss": 0.8168, "step": 2501 }, { "epoch": 0.12858464384828863, "grad_norm": 1.304885983467102, "learning_rate": 9.747351205364885e-06, "loss": 0.7765, "step": 2502 }, { "epoch": 0.12863603659163325, "grad_norm": 1.1309278011322021, "learning_rate": 9.74708993077702e-06, "loss": 0.8041, "step": 2503 }, { "epoch": 0.1286874293349779, "grad_norm": 0.8903225064277649, "learning_rate": 9.746828524666881e-06, "loss": 0.7539, "step": 2504 }, { "epoch": 0.12873882207832255, "grad_norm": 1.125013828277588, "learning_rate": 9.746566987041713e-06, "loss": 0.8249, "step": 2505 }, { "epoch": 0.1287902148216672, "grad_norm": 0.802054762840271, "learning_rate": 9.746305317908762e-06, "loss": 0.6945, "step": 2506 }, { "epoch": 0.1288416075650118, "grad_norm": 1.1441078186035156, "learning_rate": 9.746043517275278e-06, "loss": 0.8347, "step": 2507 }, { "epoch": 0.12889300030835646, "grad_norm": 1.0917900800704956, "learning_rate": 9.745781585148512e-06, "loss": 0.8145, "step": 2508 }, { "epoch": 0.1289443930517011, "grad_norm": 1.1471260786056519, "learning_rate": 9.745519521535725e-06, "loss": 0.8185, "step": 2509 }, { "epoch": 0.12899578579504575, "grad_norm": 1.1653029918670654, "learning_rate": 9.745257326444176e-06, "loss": 0.8946, "step": 2510 }, { "epoch": 0.12904717853839037, "grad_norm": 1.110302209854126, "learning_rate": 9.744994999881128e-06, "loss": 0.8134, "step": 2511 }, { "epoch": 0.12909857128173502, "grad_norm": 8.557238578796387, "learning_rate": 9.74473254185385e-06, "loss": 0.8255, "step": 2512 }, { "epoch": 0.12914996402507967, "grad_norm": 1.25217866897583, "learning_rate": 9.744469952369614e-06, "loss": 0.8471, "step": 2513 }, { "epoch": 0.12920135676842429, "grad_norm": 1.1042636632919312, "learning_rate": 9.744207231435695e-06, "loss": 0.7905, "step": 2514 }, { "epoch": 0.12925274951176893, "grad_norm": 0.9154908061027527, "learning_rate": 9.743944379059369e-06, "loss": 0.728, "step": 2515 }, { "epoch": 0.12930414225511358, "grad_norm": 1.1178827285766602, "learning_rate": 9.743681395247924e-06, "loss": 0.8358, "step": 2516 }, { "epoch": 0.12935553499845823, "grad_norm": 1.1692816019058228, "learning_rate": 9.743418280008643e-06, "loss": 0.894, "step": 2517 }, { "epoch": 0.12940692774180285, "grad_norm": 1.2138783931732178, "learning_rate": 9.743155033348817e-06, "loss": 0.8147, "step": 2518 }, { "epoch": 0.1294583204851475, "grad_norm": 0.7936961650848389, "learning_rate": 9.742891655275738e-06, "loss": 0.7215, "step": 2519 }, { "epoch": 0.12950971322849214, "grad_norm": 1.1221630573272705, "learning_rate": 9.742628145796704e-06, "loss": 0.8303, "step": 2520 }, { "epoch": 0.1295611059718368, "grad_norm": 1.1911793947219849, "learning_rate": 9.742364504919018e-06, "loss": 0.8082, "step": 2521 }, { "epoch": 0.1296124987151814, "grad_norm": 1.143926739692688, "learning_rate": 9.74210073264998e-06, "loss": 0.7922, "step": 2522 }, { "epoch": 0.12966389145852605, "grad_norm": 1.122007131576538, "learning_rate": 9.7418368289969e-06, "loss": 0.8126, "step": 2523 }, { "epoch": 0.1297152842018707, "grad_norm": 10.368847846984863, "learning_rate": 9.74157279396709e-06, "loss": 1.077, "step": 2524 }, { "epoch": 0.12976667694521535, "grad_norm": 1.2167574167251587, "learning_rate": 9.741308627567866e-06, "loss": 0.8361, "step": 2525 }, { "epoch": 0.12981806968855997, "grad_norm": 1.0986183881759644, "learning_rate": 9.741044329806546e-06, "loss": 0.7848, "step": 2526 }, { "epoch": 0.12986946243190461, "grad_norm": 1.1548548936843872, "learning_rate": 9.74077990069045e-06, "loss": 0.8285, "step": 2527 }, { "epoch": 0.12992085517524926, "grad_norm": 1.1838788986206055, "learning_rate": 9.740515340226911e-06, "loss": 0.8044, "step": 2528 }, { "epoch": 0.1299722479185939, "grad_norm": 1.1177500486373901, "learning_rate": 9.740250648423252e-06, "loss": 0.7872, "step": 2529 }, { "epoch": 0.13002364066193853, "grad_norm": 1.1367467641830444, "learning_rate": 9.739985825286813e-06, "loss": 0.6893, "step": 2530 }, { "epoch": 0.13007503340528317, "grad_norm": 1.612687587738037, "learning_rate": 9.739720870824924e-06, "loss": 0.8172, "step": 2531 }, { "epoch": 0.13012642614862782, "grad_norm": 1.122273564338684, "learning_rate": 9.73945578504493e-06, "loss": 0.783, "step": 2532 }, { "epoch": 0.13017781889197244, "grad_norm": 0.9774260520935059, "learning_rate": 9.739190567954176e-06, "loss": 0.701, "step": 2533 }, { "epoch": 0.1302292116353171, "grad_norm": 1.2303053140640259, "learning_rate": 9.73892521956001e-06, "loss": 0.8618, "step": 2534 }, { "epoch": 0.13028060437866174, "grad_norm": 0.7637819051742554, "learning_rate": 9.73865973986978e-06, "loss": 0.6732, "step": 2535 }, { "epoch": 0.13033199712200638, "grad_norm": 1.1514431238174438, "learning_rate": 9.738394128890845e-06, "loss": 0.7937, "step": 2536 }, { "epoch": 0.130383389865351, "grad_norm": 1.1339715719223022, "learning_rate": 9.738128386630561e-06, "loss": 0.8662, "step": 2537 }, { "epoch": 0.13043478260869565, "grad_norm": 1.2402563095092773, "learning_rate": 9.737862513096294e-06, "loss": 0.7785, "step": 2538 }, { "epoch": 0.1304861753520403, "grad_norm": 1.165157437324524, "learning_rate": 9.73759650829541e-06, "loss": 0.832, "step": 2539 }, { "epoch": 0.13053756809538494, "grad_norm": 1.2463504076004028, "learning_rate": 9.737330372235276e-06, "loss": 0.861, "step": 2540 }, { "epoch": 0.13058896083872956, "grad_norm": 1.1279863119125366, "learning_rate": 9.737064104923269e-06, "loss": 0.8407, "step": 2541 }, { "epoch": 0.1306403535820742, "grad_norm": 1.2176536321640015, "learning_rate": 9.736797706366763e-06, "loss": 0.79, "step": 2542 }, { "epoch": 0.13069174632541886, "grad_norm": 1.3712241649627686, "learning_rate": 9.73653117657314e-06, "loss": 0.8002, "step": 2543 }, { "epoch": 0.1307431390687635, "grad_norm": 1.1009495258331299, "learning_rate": 9.736264515549785e-06, "loss": 0.8095, "step": 2544 }, { "epoch": 0.13079453181210812, "grad_norm": 1.1206634044647217, "learning_rate": 9.735997723304085e-06, "loss": 0.7942, "step": 2545 }, { "epoch": 0.13084592455545277, "grad_norm": 1.1654514074325562, "learning_rate": 9.735730799843433e-06, "loss": 0.7751, "step": 2546 }, { "epoch": 0.13089731729879742, "grad_norm": 1.1726800203323364, "learning_rate": 9.735463745175223e-06, "loss": 0.8371, "step": 2547 }, { "epoch": 0.13094871004214204, "grad_norm": 1.1220122575759888, "learning_rate": 9.735196559306857e-06, "loss": 0.7997, "step": 2548 }, { "epoch": 0.13100010278548668, "grad_norm": 1.14466392993927, "learning_rate": 9.734929242245732e-06, "loss": 0.8208, "step": 2549 }, { "epoch": 0.13105149552883133, "grad_norm": 1.3103368282318115, "learning_rate": 9.73466179399926e-06, "loss": 0.7657, "step": 2550 }, { "epoch": 0.13110288827217598, "grad_norm": 0.8653972744941711, "learning_rate": 9.734394214574848e-06, "loss": 0.694, "step": 2551 }, { "epoch": 0.1311542810155206, "grad_norm": 0.9475825428962708, "learning_rate": 9.734126503979911e-06, "loss": 0.7256, "step": 2552 }, { "epoch": 0.13120567375886524, "grad_norm": 1.0775402784347534, "learning_rate": 9.733858662221863e-06, "loss": 0.7769, "step": 2553 }, { "epoch": 0.1312570665022099, "grad_norm": 1.1888377666473389, "learning_rate": 9.733590689308128e-06, "loss": 0.8184, "step": 2554 }, { "epoch": 0.13130845924555454, "grad_norm": 1.19166100025177, "learning_rate": 9.73332258524613e-06, "loss": 0.8029, "step": 2555 }, { "epoch": 0.13135985198889916, "grad_norm": 1.056988000869751, "learning_rate": 9.733054350043295e-06, "loss": 0.8581, "step": 2556 }, { "epoch": 0.1314112447322438, "grad_norm": 0.7992630004882812, "learning_rate": 9.732785983707056e-06, "loss": 0.6592, "step": 2557 }, { "epoch": 0.13146263747558845, "grad_norm": 1.1102385520935059, "learning_rate": 9.73251748624485e-06, "loss": 0.7871, "step": 2558 }, { "epoch": 0.1315140302189331, "grad_norm": 1.1597158908843994, "learning_rate": 9.732248857664115e-06, "loss": 0.8326, "step": 2559 }, { "epoch": 0.13156542296227772, "grad_norm": 1.1040688753128052, "learning_rate": 9.73198009797229e-06, "loss": 0.798, "step": 2560 }, { "epoch": 0.13161681570562236, "grad_norm": 1.1124264001846313, "learning_rate": 9.731711207176826e-06, "loss": 0.7974, "step": 2561 }, { "epoch": 0.131668208448967, "grad_norm": 1.1696454286575317, "learning_rate": 9.731442185285172e-06, "loss": 0.8378, "step": 2562 }, { "epoch": 0.13171960119231166, "grad_norm": 1.261734127998352, "learning_rate": 9.73117303230478e-06, "loss": 0.8198, "step": 2563 }, { "epoch": 0.13177099393565628, "grad_norm": 1.1441841125488281, "learning_rate": 9.730903748243107e-06, "loss": 0.8282, "step": 2564 }, { "epoch": 0.13182238667900092, "grad_norm": 1.1437608003616333, "learning_rate": 9.730634333107613e-06, "loss": 0.8515, "step": 2565 }, { "epoch": 0.13187377942234557, "grad_norm": 1.264147400856018, "learning_rate": 9.730364786905765e-06, "loss": 0.8741, "step": 2566 }, { "epoch": 0.1319251721656902, "grad_norm": 1.188440203666687, "learning_rate": 9.730095109645032e-06, "loss": 0.8423, "step": 2567 }, { "epoch": 0.13197656490903484, "grad_norm": 1.1565133333206177, "learning_rate": 9.729825301332882e-06, "loss": 0.7745, "step": 2568 }, { "epoch": 0.13202795765237949, "grad_norm": 1.1575068235397339, "learning_rate": 9.729555361976792e-06, "loss": 0.8232, "step": 2569 }, { "epoch": 0.13207935039572413, "grad_norm": 1.13129460811615, "learning_rate": 9.729285291584239e-06, "loss": 0.8257, "step": 2570 }, { "epoch": 0.13213074313906875, "grad_norm": 1.1367108821868896, "learning_rate": 9.729015090162709e-06, "loss": 0.7524, "step": 2571 }, { "epoch": 0.1321821358824134, "grad_norm": 1.1301205158233643, "learning_rate": 9.728744757719685e-06, "loss": 0.7987, "step": 2572 }, { "epoch": 0.13223352862575805, "grad_norm": 1.1188976764678955, "learning_rate": 9.72847429426266e-06, "loss": 0.784, "step": 2573 }, { "epoch": 0.1322849213691027, "grad_norm": 1.3200082778930664, "learning_rate": 9.728203699799123e-06, "loss": 0.7682, "step": 2574 }, { "epoch": 0.1323363141124473, "grad_norm": 0.8083642721176147, "learning_rate": 9.727932974336576e-06, "loss": 0.7082, "step": 2575 }, { "epoch": 0.13238770685579196, "grad_norm": 1.0618683099746704, "learning_rate": 9.727662117882517e-06, "loss": 0.7546, "step": 2576 }, { "epoch": 0.1324390995991366, "grad_norm": 1.1620287895202637, "learning_rate": 9.72739113044445e-06, "loss": 0.8559, "step": 2577 }, { "epoch": 0.13249049234248125, "grad_norm": 1.1489086151123047, "learning_rate": 9.727120012029882e-06, "loss": 0.8371, "step": 2578 }, { "epoch": 0.13254188508582587, "grad_norm": 1.2074640989303589, "learning_rate": 9.726848762646329e-06, "loss": 0.8346, "step": 2579 }, { "epoch": 0.13259327782917052, "grad_norm": 1.392053484916687, "learning_rate": 9.7265773823013e-06, "loss": 0.8141, "step": 2580 }, { "epoch": 0.13264467057251517, "grad_norm": 1.184875249862671, "learning_rate": 9.72630587100232e-06, "loss": 0.7666, "step": 2581 }, { "epoch": 0.1326960633158598, "grad_norm": 1.1245810985565186, "learning_rate": 9.726034228756908e-06, "loss": 0.817, "step": 2582 }, { "epoch": 0.13274745605920443, "grad_norm": 1.1560232639312744, "learning_rate": 9.72576245557259e-06, "loss": 0.8116, "step": 2583 }, { "epoch": 0.13279884880254908, "grad_norm": 1.1937674283981323, "learning_rate": 9.725490551456897e-06, "loss": 0.8234, "step": 2584 }, { "epoch": 0.13285024154589373, "grad_norm": 0.839627206325531, "learning_rate": 9.725218516417362e-06, "loss": 0.7255, "step": 2585 }, { "epoch": 0.13290163428923835, "grad_norm": 1.1455011367797852, "learning_rate": 9.724946350461522e-06, "loss": 0.7953, "step": 2586 }, { "epoch": 0.132953027032583, "grad_norm": 1.1295292377471924, "learning_rate": 9.724674053596917e-06, "loss": 0.7461, "step": 2587 }, { "epoch": 0.13300441977592764, "grad_norm": 1.1307677030563354, "learning_rate": 9.72440162583109e-06, "loss": 0.7985, "step": 2588 }, { "epoch": 0.1330558125192723, "grad_norm": 1.2196446657180786, "learning_rate": 9.724129067171593e-06, "loss": 0.8421, "step": 2589 }, { "epoch": 0.1331072052626169, "grad_norm": 0.965071439743042, "learning_rate": 9.723856377625973e-06, "loss": 0.7361, "step": 2590 }, { "epoch": 0.13315859800596155, "grad_norm": 1.1619025468826294, "learning_rate": 9.723583557201788e-06, "loss": 0.8407, "step": 2591 }, { "epoch": 0.1332099907493062, "grad_norm": 1.1729711294174194, "learning_rate": 9.723310605906595e-06, "loss": 0.819, "step": 2592 }, { "epoch": 0.13326138349265085, "grad_norm": 0.7274436950683594, "learning_rate": 9.723037523747957e-06, "loss": 0.6828, "step": 2593 }, { "epoch": 0.13331277623599547, "grad_norm": 0.8518123030662537, "learning_rate": 9.72276431073344e-06, "loss": 0.706, "step": 2594 }, { "epoch": 0.13336416897934011, "grad_norm": 1.1291074752807617, "learning_rate": 9.722490966870614e-06, "loss": 0.7829, "step": 2595 }, { "epoch": 0.13341556172268476, "grad_norm": 1.0963987112045288, "learning_rate": 9.722217492167052e-06, "loss": 0.8332, "step": 2596 }, { "epoch": 0.1334669544660294, "grad_norm": 1.0919013023376465, "learning_rate": 9.72194388663033e-06, "loss": 0.7639, "step": 2597 }, { "epoch": 0.13351834720937403, "grad_norm": 1.046149492263794, "learning_rate": 9.721670150268029e-06, "loss": 0.7833, "step": 2598 }, { "epoch": 0.13356973995271867, "grad_norm": 1.1089986562728882, "learning_rate": 9.721396283087736e-06, "loss": 0.8237, "step": 2599 }, { "epoch": 0.13362113269606332, "grad_norm": 1.1304103136062622, "learning_rate": 9.721122285097034e-06, "loss": 0.7458, "step": 2600 }, { "epoch": 0.13367252543940797, "grad_norm": 1.2912743091583252, "learning_rate": 9.720848156303518e-06, "loss": 0.8359, "step": 2601 }, { "epoch": 0.1337239181827526, "grad_norm": 0.8447766304016113, "learning_rate": 9.720573896714778e-06, "loss": 0.708, "step": 2602 }, { "epoch": 0.13377531092609724, "grad_norm": 0.9680198431015015, "learning_rate": 9.720299506338418e-06, "loss": 0.7942, "step": 2603 }, { "epoch": 0.13382670366944188, "grad_norm": 1.139566421508789, "learning_rate": 9.72002498518204e-06, "loss": 0.8131, "step": 2604 }, { "epoch": 0.1338780964127865, "grad_norm": 1.2419695854187012, "learning_rate": 9.719750333253246e-06, "loss": 0.8127, "step": 2605 }, { "epoch": 0.13392948915613115, "grad_norm": 1.2263543605804443, "learning_rate": 9.719475550559648e-06, "loss": 0.8517, "step": 2606 }, { "epoch": 0.1339808818994758, "grad_norm": 1.1253172159194946, "learning_rate": 9.719200637108857e-06, "loss": 0.7928, "step": 2607 }, { "epoch": 0.13403227464282044, "grad_norm": 1.1045161485671997, "learning_rate": 9.718925592908492e-06, "loss": 0.8264, "step": 2608 }, { "epoch": 0.13408366738616506, "grad_norm": 1.313133716583252, "learning_rate": 9.718650417966174e-06, "loss": 0.8472, "step": 2609 }, { "epoch": 0.1341350601295097, "grad_norm": 4.287288665771484, "learning_rate": 9.718375112289525e-06, "loss": 0.8429, "step": 2610 }, { "epoch": 0.13418645287285436, "grad_norm": 1.1689093112945557, "learning_rate": 9.718099675886173e-06, "loss": 0.8386, "step": 2611 }, { "epoch": 0.134237845616199, "grad_norm": 1.1137447357177734, "learning_rate": 9.71782410876375e-06, "loss": 0.8598, "step": 2612 }, { "epoch": 0.13428923835954362, "grad_norm": 0.8552294373512268, "learning_rate": 9.71754841092989e-06, "loss": 0.7216, "step": 2613 }, { "epoch": 0.13434063110288827, "grad_norm": 0.9169567823410034, "learning_rate": 9.717272582392232e-06, "loss": 0.7178, "step": 2614 }, { "epoch": 0.13439202384623292, "grad_norm": 1.226951003074646, "learning_rate": 9.716996623158417e-06, "loss": 0.8317, "step": 2615 }, { "epoch": 0.13444341658957756, "grad_norm": 1.2023383378982544, "learning_rate": 9.716720533236093e-06, "loss": 0.8035, "step": 2616 }, { "epoch": 0.13449480933292218, "grad_norm": 1.1587753295898438, "learning_rate": 9.716444312632906e-06, "loss": 0.8164, "step": 2617 }, { "epoch": 0.13454620207626683, "grad_norm": 1.2885563373565674, "learning_rate": 9.716167961356511e-06, "loss": 0.8863, "step": 2618 }, { "epoch": 0.13459759481961148, "grad_norm": 1.1696535348892212, "learning_rate": 9.715891479414565e-06, "loss": 0.7684, "step": 2619 }, { "epoch": 0.13464898756295612, "grad_norm": 1.0802528858184814, "learning_rate": 9.715614866814726e-06, "loss": 0.7732, "step": 2620 }, { "epoch": 0.13470038030630074, "grad_norm": 0.9993689060211182, "learning_rate": 9.715338123564662e-06, "loss": 0.73, "step": 2621 }, { "epoch": 0.1347517730496454, "grad_norm": 1.3452314138412476, "learning_rate": 9.715061249672036e-06, "loss": 0.8114, "step": 2622 }, { "epoch": 0.13480316579299004, "grad_norm": 1.230893850326538, "learning_rate": 9.71478424514452e-06, "loss": 0.8117, "step": 2623 }, { "epoch": 0.13485455853633466, "grad_norm": 1.2168797254562378, "learning_rate": 9.71450710998979e-06, "loss": 0.8102, "step": 2624 }, { "epoch": 0.1349059512796793, "grad_norm": 1.2299240827560425, "learning_rate": 9.714229844215523e-06, "loss": 0.8305, "step": 2625 }, { "epoch": 0.13495734402302395, "grad_norm": 1.1167078018188477, "learning_rate": 9.713952447829402e-06, "loss": 0.7424, "step": 2626 }, { "epoch": 0.1350087367663686, "grad_norm": 1.1739639043807983, "learning_rate": 9.713674920839113e-06, "loss": 0.7812, "step": 2627 }, { "epoch": 0.13506012950971322, "grad_norm": 1.1659791469573975, "learning_rate": 9.713397263252342e-06, "loss": 0.7917, "step": 2628 }, { "epoch": 0.13511152225305786, "grad_norm": 0.8265565633773804, "learning_rate": 9.713119475076786e-06, "loss": 0.7352, "step": 2629 }, { "epoch": 0.1351629149964025, "grad_norm": 0.8502550721168518, "learning_rate": 9.712841556320139e-06, "loss": 0.7484, "step": 2630 }, { "epoch": 0.13521430773974716, "grad_norm": 1.1613914966583252, "learning_rate": 9.7125635069901e-06, "loss": 0.7909, "step": 2631 }, { "epoch": 0.13526570048309178, "grad_norm": 1.1277912855148315, "learning_rate": 9.712285327094374e-06, "loss": 0.8384, "step": 2632 }, { "epoch": 0.13531709322643642, "grad_norm": 1.0719398260116577, "learning_rate": 9.712007016640667e-06, "loss": 0.7837, "step": 2633 }, { "epoch": 0.13536848596978107, "grad_norm": 1.222659707069397, "learning_rate": 9.711728575636695e-06, "loss": 0.8223, "step": 2634 }, { "epoch": 0.13541987871312572, "grad_norm": 1.183280348777771, "learning_rate": 9.711450004090165e-06, "loss": 0.819, "step": 2635 }, { "epoch": 0.13547127145647034, "grad_norm": 1.1620908975601196, "learning_rate": 9.711171302008798e-06, "loss": 0.8713, "step": 2636 }, { "epoch": 0.13552266419981499, "grad_norm": 1.1609777212142944, "learning_rate": 9.710892469400316e-06, "loss": 0.8787, "step": 2637 }, { "epoch": 0.13557405694315963, "grad_norm": 1.138584017753601, "learning_rate": 9.710613506272447e-06, "loss": 0.8551, "step": 2638 }, { "epoch": 0.13562544968650428, "grad_norm": 1.2348012924194336, "learning_rate": 9.710334412632916e-06, "loss": 0.8032, "step": 2639 }, { "epoch": 0.1356768424298489, "grad_norm": 1.1296855211257935, "learning_rate": 9.710055188489456e-06, "loss": 0.7954, "step": 2640 }, { "epoch": 0.13572823517319355, "grad_norm": 0.8864043951034546, "learning_rate": 9.709775833849804e-06, "loss": 0.7179, "step": 2641 }, { "epoch": 0.1357796279165382, "grad_norm": 1.190123200416565, "learning_rate": 9.7094963487217e-06, "loss": 0.7965, "step": 2642 }, { "epoch": 0.1358310206598828, "grad_norm": 1.1864268779754639, "learning_rate": 9.709216733112888e-06, "loss": 0.8471, "step": 2643 }, { "epoch": 0.13588241340322746, "grad_norm": 0.8380422592163086, "learning_rate": 9.708936987031115e-06, "loss": 0.6969, "step": 2644 }, { "epoch": 0.1359338061465721, "grad_norm": 1.3133583068847656, "learning_rate": 9.708657110484128e-06, "loss": 0.813, "step": 2645 }, { "epoch": 0.13598519888991675, "grad_norm": 1.2705795764923096, "learning_rate": 9.708377103479685e-06, "loss": 0.876, "step": 2646 }, { "epoch": 0.13603659163326137, "grad_norm": 1.1146432161331177, "learning_rate": 9.708096966025544e-06, "loss": 0.8495, "step": 2647 }, { "epoch": 0.13608798437660602, "grad_norm": 0.8538182973861694, "learning_rate": 9.707816698129464e-06, "loss": 0.7761, "step": 2648 }, { "epoch": 0.13613937711995067, "grad_norm": 1.1765365600585938, "learning_rate": 9.707536299799212e-06, "loss": 0.8806, "step": 2649 }, { "epoch": 0.1361907698632953, "grad_norm": 1.085740327835083, "learning_rate": 9.707255771042555e-06, "loss": 0.8055, "step": 2650 }, { "epoch": 0.13624216260663993, "grad_norm": 1.1011942625045776, "learning_rate": 9.706975111867267e-06, "loss": 0.7637, "step": 2651 }, { "epoch": 0.13629355534998458, "grad_norm": 1.204447627067566, "learning_rate": 9.706694322281124e-06, "loss": 0.8038, "step": 2652 }, { "epoch": 0.13634494809332923, "grad_norm": 0.7580429315567017, "learning_rate": 9.706413402291903e-06, "loss": 0.7305, "step": 2653 }, { "epoch": 0.13639634083667387, "grad_norm": 1.1226884126663208, "learning_rate": 9.70613235190739e-06, "loss": 0.8551, "step": 2654 }, { "epoch": 0.1364477335800185, "grad_norm": 1.0983165502548218, "learning_rate": 9.705851171135369e-06, "loss": 0.8058, "step": 2655 }, { "epoch": 0.13649912632336314, "grad_norm": 0.8131149411201477, "learning_rate": 9.705569859983633e-06, "loss": 0.7156, "step": 2656 }, { "epoch": 0.1365505190667078, "grad_norm": 1.0995423793792725, "learning_rate": 9.705288418459976e-06, "loss": 0.7811, "step": 2657 }, { "epoch": 0.13660191181005243, "grad_norm": 1.2127082347869873, "learning_rate": 9.705006846572194e-06, "loss": 0.8417, "step": 2658 }, { "epoch": 0.13665330455339705, "grad_norm": 0.8489097952842712, "learning_rate": 9.704725144328087e-06, "loss": 0.7049, "step": 2659 }, { "epoch": 0.1367046972967417, "grad_norm": 1.19580078125, "learning_rate": 9.704443311735462e-06, "loss": 0.8299, "step": 2660 }, { "epoch": 0.13675609004008635, "grad_norm": 1.1455217599868774, "learning_rate": 9.704161348802128e-06, "loss": 0.8167, "step": 2661 }, { "epoch": 0.13680748278343097, "grad_norm": 1.10663902759552, "learning_rate": 9.703879255535896e-06, "loss": 0.7779, "step": 2662 }, { "epoch": 0.13685887552677561, "grad_norm": 1.3961073160171509, "learning_rate": 9.703597031944582e-06, "loss": 0.7755, "step": 2663 }, { "epoch": 0.13691026827012026, "grad_norm": 1.087077260017395, "learning_rate": 9.703314678036004e-06, "loss": 0.8376, "step": 2664 }, { "epoch": 0.1369616610134649, "grad_norm": 1.17799711227417, "learning_rate": 9.703032193817985e-06, "loss": 0.827, "step": 2665 }, { "epoch": 0.13701305375680953, "grad_norm": 1.8682605028152466, "learning_rate": 9.702749579298354e-06, "loss": 0.6878, "step": 2666 }, { "epoch": 0.13706444650015417, "grad_norm": 1.2004543542861938, "learning_rate": 9.702466834484938e-06, "loss": 0.8131, "step": 2667 }, { "epoch": 0.13711583924349882, "grad_norm": 1.1433489322662354, "learning_rate": 9.702183959385571e-06, "loss": 0.8016, "step": 2668 }, { "epoch": 0.13716723198684347, "grad_norm": 1.1702873706817627, "learning_rate": 9.701900954008092e-06, "loss": 0.8072, "step": 2669 }, { "epoch": 0.1372186247301881, "grad_norm": 1.1314847469329834, "learning_rate": 9.701617818360342e-06, "loss": 0.7812, "step": 2670 }, { "epoch": 0.13727001747353273, "grad_norm": 1.1351122856140137, "learning_rate": 9.701334552450165e-06, "loss": 0.8049, "step": 2671 }, { "epoch": 0.13732141021687738, "grad_norm": 1.1694854497909546, "learning_rate": 9.701051156285407e-06, "loss": 0.8187, "step": 2672 }, { "epoch": 0.13737280296022203, "grad_norm": 0.8477898240089417, "learning_rate": 9.700767629873923e-06, "loss": 0.7354, "step": 2673 }, { "epoch": 0.13742419570356665, "grad_norm": 1.1977671384811401, "learning_rate": 9.700483973223567e-06, "loss": 0.8588, "step": 2674 }, { "epoch": 0.1374755884469113, "grad_norm": 1.2428123950958252, "learning_rate": 9.700200186342197e-06, "loss": 0.7964, "step": 2675 }, { "epoch": 0.13752698119025594, "grad_norm": 0.7044335007667542, "learning_rate": 9.699916269237676e-06, "loss": 0.6562, "step": 2676 }, { "epoch": 0.13757837393360056, "grad_norm": 1.1356942653656006, "learning_rate": 9.699632221917872e-06, "loss": 0.8361, "step": 2677 }, { "epoch": 0.1376297666769452, "grad_norm": 1.1374279260635376, "learning_rate": 9.699348044390652e-06, "loss": 0.7868, "step": 2678 }, { "epoch": 0.13768115942028986, "grad_norm": 1.1640079021453857, "learning_rate": 9.699063736663892e-06, "loss": 0.8378, "step": 2679 }, { "epoch": 0.1377325521636345, "grad_norm": 1.0794360637664795, "learning_rate": 9.698779298745468e-06, "loss": 0.7853, "step": 2680 }, { "epoch": 0.13778394490697912, "grad_norm": 0.941981315612793, "learning_rate": 9.69849473064326e-06, "loss": 0.7393, "step": 2681 }, { "epoch": 0.13783533765032377, "grad_norm": 1.1368045806884766, "learning_rate": 9.698210032365151e-06, "loss": 0.7606, "step": 2682 }, { "epoch": 0.13788673039366842, "grad_norm": 1.1727396249771118, "learning_rate": 9.697925203919032e-06, "loss": 0.79, "step": 2683 }, { "epoch": 0.13793812313701306, "grad_norm": 1.1475847959518433, "learning_rate": 9.697640245312793e-06, "loss": 0.7926, "step": 2684 }, { "epoch": 0.13798951588035768, "grad_norm": 1.1517375707626343, "learning_rate": 9.697355156554328e-06, "loss": 0.7962, "step": 2685 }, { "epoch": 0.13804090862370233, "grad_norm": 1.2805520296096802, "learning_rate": 9.697069937651535e-06, "loss": 0.8869, "step": 2686 }, { "epoch": 0.13809230136704698, "grad_norm": 0.978940486907959, "learning_rate": 9.69678458861232e-06, "loss": 0.7031, "step": 2687 }, { "epoch": 0.13814369411039162, "grad_norm": 1.1227670907974243, "learning_rate": 9.696499109444587e-06, "loss": 0.8085, "step": 2688 }, { "epoch": 0.13819508685373624, "grad_norm": 1.090261459350586, "learning_rate": 9.696213500156243e-06, "loss": 0.8353, "step": 2689 }, { "epoch": 0.1382464795970809, "grad_norm": 1.196114182472229, "learning_rate": 9.695927760755203e-06, "loss": 0.7922, "step": 2690 }, { "epoch": 0.13829787234042554, "grad_norm": 1.1394280195236206, "learning_rate": 9.695641891249385e-06, "loss": 0.8398, "step": 2691 }, { "epoch": 0.13834926508377018, "grad_norm": 1.1485434770584106, "learning_rate": 9.695355891646707e-06, "loss": 0.7382, "step": 2692 }, { "epoch": 0.1384006578271148, "grad_norm": 1.180327296257019, "learning_rate": 9.695069761955096e-06, "loss": 0.8092, "step": 2693 }, { "epoch": 0.13845205057045945, "grad_norm": 1.0974851846694946, "learning_rate": 9.694783502182474e-06, "loss": 0.8088, "step": 2694 }, { "epoch": 0.1385034433138041, "grad_norm": 1.173720359802246, "learning_rate": 9.694497112336778e-06, "loss": 0.8444, "step": 2695 }, { "epoch": 0.13855483605714872, "grad_norm": 1.2134408950805664, "learning_rate": 9.69421059242594e-06, "loss": 0.7904, "step": 2696 }, { "epoch": 0.13860622880049336, "grad_norm": 1.0976135730743408, "learning_rate": 9.693923942457897e-06, "loss": 0.7804, "step": 2697 }, { "epoch": 0.138657621543838, "grad_norm": 1.1164802312850952, "learning_rate": 9.693637162440592e-06, "loss": 0.7936, "step": 2698 }, { "epoch": 0.13870901428718266, "grad_norm": 0.8723376393318176, "learning_rate": 9.693350252381972e-06, "loss": 0.7768, "step": 2699 }, { "epoch": 0.13876040703052728, "grad_norm": 1.2291243076324463, "learning_rate": 9.693063212289983e-06, "loss": 0.8261, "step": 2700 }, { "epoch": 0.13881179977387192, "grad_norm": 1.1501260995864868, "learning_rate": 9.692776042172582e-06, "loss": 0.8239, "step": 2701 }, { "epoch": 0.13886319251721657, "grad_norm": 1.1100144386291504, "learning_rate": 9.692488742037721e-06, "loss": 0.8396, "step": 2702 }, { "epoch": 0.13891458526056122, "grad_norm": 0.7832348346710205, "learning_rate": 9.692201311893362e-06, "loss": 0.7374, "step": 2703 }, { "epoch": 0.13896597800390584, "grad_norm": 1.0825269222259521, "learning_rate": 9.691913751747468e-06, "loss": 0.8237, "step": 2704 }, { "epoch": 0.13901737074725048, "grad_norm": 1.1884174346923828, "learning_rate": 9.691626061608008e-06, "loss": 0.8466, "step": 2705 }, { "epoch": 0.13906876349059513, "grad_norm": 1.1264417171478271, "learning_rate": 9.69133824148295e-06, "loss": 0.8076, "step": 2706 }, { "epoch": 0.13912015623393978, "grad_norm": 1.14151930809021, "learning_rate": 9.691050291380268e-06, "loss": 0.8075, "step": 2707 }, { "epoch": 0.1391715489772844, "grad_norm": 1.1592979431152344, "learning_rate": 9.690762211307942e-06, "loss": 0.8255, "step": 2708 }, { "epoch": 0.13922294172062905, "grad_norm": 1.1559135913848877, "learning_rate": 9.690474001273953e-06, "loss": 0.8356, "step": 2709 }, { "epoch": 0.1392743344639737, "grad_norm": 1.0876551866531372, "learning_rate": 9.690185661286287e-06, "loss": 0.7496, "step": 2710 }, { "epoch": 0.13932572720731834, "grad_norm": 1.1137487888336182, "learning_rate": 9.68989719135293e-06, "loss": 0.8275, "step": 2711 }, { "epoch": 0.13937711995066296, "grad_norm": 1.1706751585006714, "learning_rate": 9.689608591481877e-06, "loss": 0.7768, "step": 2712 }, { "epoch": 0.1394285126940076, "grad_norm": 1.1375924348831177, "learning_rate": 9.689319861681123e-06, "loss": 0.7991, "step": 2713 }, { "epoch": 0.13947990543735225, "grad_norm": 1.1523921489715576, "learning_rate": 9.689031001958667e-06, "loss": 0.8537, "step": 2714 }, { "epoch": 0.13953129818069687, "grad_norm": 1.2321090698242188, "learning_rate": 9.688742012322512e-06, "loss": 0.8445, "step": 2715 }, { "epoch": 0.13958269092404152, "grad_norm": 1.1780710220336914, "learning_rate": 9.688452892780664e-06, "loss": 0.8086, "step": 2716 }, { "epoch": 0.13963408366738617, "grad_norm": 1.0607858896255493, "learning_rate": 9.688163643341136e-06, "loss": 0.8115, "step": 2717 }, { "epoch": 0.1396854764107308, "grad_norm": 1.1239253282546997, "learning_rate": 9.687874264011941e-06, "loss": 0.8133, "step": 2718 }, { "epoch": 0.13973686915407543, "grad_norm": 1.1768728494644165, "learning_rate": 9.687584754801093e-06, "loss": 0.869, "step": 2719 }, { "epoch": 0.13978826189742008, "grad_norm": 1.099822759628296, "learning_rate": 9.68729511571662e-06, "loss": 0.8125, "step": 2720 }, { "epoch": 0.13983965464076473, "grad_norm": 1.0882219076156616, "learning_rate": 9.68700534676654e-06, "loss": 0.8603, "step": 2721 }, { "epoch": 0.13989104738410937, "grad_norm": 0.8649500012397766, "learning_rate": 9.686715447958883e-06, "loss": 0.6943, "step": 2722 }, { "epoch": 0.139942440127454, "grad_norm": 1.1535301208496094, "learning_rate": 9.686425419301684e-06, "loss": 0.815, "step": 2723 }, { "epoch": 0.13999383287079864, "grad_norm": 1.101647973060608, "learning_rate": 9.686135260802975e-06, "loss": 0.8147, "step": 2724 }, { "epoch": 0.1400452256141433, "grad_norm": 1.1324268579483032, "learning_rate": 9.685844972470797e-06, "loss": 0.8328, "step": 2725 }, { "epoch": 0.14009661835748793, "grad_norm": 2.122354745864868, "learning_rate": 9.685554554313192e-06, "loss": 0.7859, "step": 2726 }, { "epoch": 0.14014801110083255, "grad_norm": 1.1310182809829712, "learning_rate": 9.685264006338207e-06, "loss": 0.8524, "step": 2727 }, { "epoch": 0.1401994038441772, "grad_norm": 1.206246256828308, "learning_rate": 9.684973328553893e-06, "loss": 0.7713, "step": 2728 }, { "epoch": 0.14025079658752185, "grad_norm": 1.043457269668579, "learning_rate": 9.684682520968298e-06, "loss": 0.7698, "step": 2729 }, { "epoch": 0.1403021893308665, "grad_norm": 1.1174529790878296, "learning_rate": 9.684391583589485e-06, "loss": 0.8197, "step": 2730 }, { "epoch": 0.14035358207421111, "grad_norm": 1.1224461793899536, "learning_rate": 9.684100516425513e-06, "loss": 0.8176, "step": 2731 }, { "epoch": 0.14040497481755576, "grad_norm": 1.1051719188690186, "learning_rate": 9.683809319484444e-06, "loss": 0.7758, "step": 2732 }, { "epoch": 0.1404563675609004, "grad_norm": 1.0629916191101074, "learning_rate": 9.683517992774349e-06, "loss": 0.7951, "step": 2733 }, { "epoch": 0.14050776030424503, "grad_norm": 4.309171676635742, "learning_rate": 9.683226536303298e-06, "loss": 0.7924, "step": 2734 }, { "epoch": 0.14055915304758967, "grad_norm": 1.1143549680709839, "learning_rate": 9.682934950079367e-06, "loss": 0.8384, "step": 2735 }, { "epoch": 0.14061054579093432, "grad_norm": 1.2448540925979614, "learning_rate": 9.682643234110634e-06, "loss": 0.8823, "step": 2736 }, { "epoch": 0.14066193853427897, "grad_norm": 1.675135612487793, "learning_rate": 9.68235138840518e-06, "loss": 0.7369, "step": 2737 }, { "epoch": 0.1407133312776236, "grad_norm": 0.7427315711975098, "learning_rate": 9.682059412971093e-06, "loss": 0.6501, "step": 2738 }, { "epoch": 0.14076472402096823, "grad_norm": 1.2052559852600098, "learning_rate": 9.681767307816458e-06, "loss": 0.8602, "step": 2739 }, { "epoch": 0.14081611676431288, "grad_norm": 1.0685110092163086, "learning_rate": 9.681475072949376e-06, "loss": 0.787, "step": 2740 }, { "epoch": 0.14086750950765753, "grad_norm": 1.1438913345336914, "learning_rate": 9.681182708377937e-06, "loss": 0.8531, "step": 2741 }, { "epoch": 0.14091890225100215, "grad_norm": 1.123216986656189, "learning_rate": 9.680890214110242e-06, "loss": 0.8001, "step": 2742 }, { "epoch": 0.1409702949943468, "grad_norm": 1.1277879476547241, "learning_rate": 9.680597590154397e-06, "loss": 0.8331, "step": 2743 }, { "epoch": 0.14102168773769144, "grad_norm": 1.1109282970428467, "learning_rate": 9.68030483651851e-06, "loss": 0.8317, "step": 2744 }, { "epoch": 0.1410730804810361, "grad_norm": 1.1681088209152222, "learning_rate": 9.680011953210688e-06, "loss": 0.8023, "step": 2745 }, { "epoch": 0.1411244732243807, "grad_norm": 1.1539644002914429, "learning_rate": 9.679718940239049e-06, "loss": 0.8713, "step": 2746 }, { "epoch": 0.14117586596772536, "grad_norm": 1.1707487106323242, "learning_rate": 9.679425797611711e-06, "loss": 0.7797, "step": 2747 }, { "epoch": 0.14122725871107, "grad_norm": 1.1454941034317017, "learning_rate": 9.679132525336794e-06, "loss": 0.7339, "step": 2748 }, { "epoch": 0.14127865145441465, "grad_norm": 1.1407099962234497, "learning_rate": 9.678839123422426e-06, "loss": 0.7414, "step": 2749 }, { "epoch": 0.14133004419775927, "grad_norm": 1.17057204246521, "learning_rate": 9.678545591876734e-06, "loss": 0.8506, "step": 2750 }, { "epoch": 0.14138143694110392, "grad_norm": 1.0912845134735107, "learning_rate": 9.678251930707849e-06, "loss": 0.8705, "step": 2751 }, { "epoch": 0.14143282968444856, "grad_norm": 0.8528375625610352, "learning_rate": 9.67795813992391e-06, "loss": 0.7135, "step": 2752 }, { "epoch": 0.14148422242779318, "grad_norm": 1.000312089920044, "learning_rate": 9.677664219533056e-06, "loss": 0.7643, "step": 2753 }, { "epoch": 0.14153561517113783, "grad_norm": 1.283801555633545, "learning_rate": 9.67737016954343e-06, "loss": 0.7829, "step": 2754 }, { "epoch": 0.14158700791448248, "grad_norm": 1.1663647890090942, "learning_rate": 9.677075989963178e-06, "loss": 0.8347, "step": 2755 }, { "epoch": 0.14163840065782712, "grad_norm": 1.2120170593261719, "learning_rate": 9.676781680800453e-06, "loss": 0.7752, "step": 2756 }, { "epoch": 0.14168979340117174, "grad_norm": 1.063825011253357, "learning_rate": 9.676487242063406e-06, "loss": 0.8276, "step": 2757 }, { "epoch": 0.1417411861445164, "grad_norm": 1.1821837425231934, "learning_rate": 9.676192673760197e-06, "loss": 0.8856, "step": 2758 }, { "epoch": 0.14179257888786104, "grad_norm": 1.1495966911315918, "learning_rate": 9.675897975898986e-06, "loss": 0.7856, "step": 2759 }, { "epoch": 0.14184397163120568, "grad_norm": 1.194322109222412, "learning_rate": 9.67560314848794e-06, "loss": 0.8319, "step": 2760 }, { "epoch": 0.1418953643745503, "grad_norm": 1.0979180335998535, "learning_rate": 9.675308191535224e-06, "loss": 0.7608, "step": 2761 }, { "epoch": 0.14194675711789495, "grad_norm": 1.1436878442764282, "learning_rate": 9.675013105049015e-06, "loss": 0.7815, "step": 2762 }, { "epoch": 0.1419981498612396, "grad_norm": 1.2227396965026855, "learning_rate": 9.674717889037481e-06, "loss": 0.8219, "step": 2763 }, { "epoch": 0.14204954260458424, "grad_norm": 1.1543980836868286, "learning_rate": 9.674422543508808e-06, "loss": 0.7778, "step": 2764 }, { "epoch": 0.14210093534792886, "grad_norm": 1.187705159187317, "learning_rate": 9.674127068471177e-06, "loss": 0.8468, "step": 2765 }, { "epoch": 0.1421523280912735, "grad_norm": 1.1667296886444092, "learning_rate": 9.673831463932773e-06, "loss": 0.7672, "step": 2766 }, { "epoch": 0.14220372083461816, "grad_norm": 1.107059121131897, "learning_rate": 9.673535729901788e-06, "loss": 0.7684, "step": 2767 }, { "epoch": 0.1422551135779628, "grad_norm": 1.1269954442977905, "learning_rate": 9.673239866386414e-06, "loss": 0.8404, "step": 2768 }, { "epoch": 0.14230650632130742, "grad_norm": 1.0767295360565186, "learning_rate": 9.672943873394848e-06, "loss": 0.8103, "step": 2769 }, { "epoch": 0.14235789906465207, "grad_norm": 1.2439650297164917, "learning_rate": 9.67264775093529e-06, "loss": 0.7964, "step": 2770 }, { "epoch": 0.14240929180799672, "grad_norm": 1.1509159803390503, "learning_rate": 9.672351499015949e-06, "loss": 0.7949, "step": 2771 }, { "epoch": 0.14246068455134134, "grad_norm": 1.0878055095672607, "learning_rate": 9.672055117645026e-06, "loss": 0.8597, "step": 2772 }, { "epoch": 0.14251207729468598, "grad_norm": 1.0522780418395996, "learning_rate": 9.67175860683074e-06, "loss": 0.7897, "step": 2773 }, { "epoch": 0.14256347003803063, "grad_norm": 1.168533444404602, "learning_rate": 9.671461966581301e-06, "loss": 0.8491, "step": 2774 }, { "epoch": 0.14261486278137528, "grad_norm": 1.242834210395813, "learning_rate": 9.671165196904929e-06, "loss": 0.8567, "step": 2775 }, { "epoch": 0.1426662555247199, "grad_norm": 1.1433464288711548, "learning_rate": 9.670868297809844e-06, "loss": 0.8429, "step": 2776 }, { "epoch": 0.14271764826806455, "grad_norm": 1.2627933025360107, "learning_rate": 9.670571269304275e-06, "loss": 0.8065, "step": 2777 }, { "epoch": 0.1427690410114092, "grad_norm": 0.7583116888999939, "learning_rate": 9.670274111396452e-06, "loss": 0.7, "step": 2778 }, { "epoch": 0.14282043375475384, "grad_norm": 1.1929150819778442, "learning_rate": 9.669976824094604e-06, "loss": 0.8413, "step": 2779 }, { "epoch": 0.14287182649809846, "grad_norm": 1.1292308568954468, "learning_rate": 9.669679407406972e-06, "loss": 0.7978, "step": 2780 }, { "epoch": 0.1429232192414431, "grad_norm": 1.1096967458724976, "learning_rate": 9.669381861341794e-06, "loss": 0.792, "step": 2781 }, { "epoch": 0.14297461198478775, "grad_norm": 1.1445257663726807, "learning_rate": 9.669084185907313e-06, "loss": 0.811, "step": 2782 }, { "epoch": 0.1430260047281324, "grad_norm": 1.0826023817062378, "learning_rate": 9.668786381111778e-06, "loss": 0.8169, "step": 2783 }, { "epoch": 0.14307739747147702, "grad_norm": 1.127459168434143, "learning_rate": 9.66848844696344e-06, "loss": 0.8182, "step": 2784 }, { "epoch": 0.14312879021482167, "grad_norm": 1.1308761835098267, "learning_rate": 9.668190383470551e-06, "loss": 0.8125, "step": 2785 }, { "epoch": 0.1431801829581663, "grad_norm": 1.1512395143508911, "learning_rate": 9.667892190641373e-06, "loss": 0.7987, "step": 2786 }, { "epoch": 0.14323157570151096, "grad_norm": 1.1589457988739014, "learning_rate": 9.667593868484165e-06, "loss": 0.8709, "step": 2787 }, { "epoch": 0.14328296844485558, "grad_norm": 1.1947205066680908, "learning_rate": 9.667295417007193e-06, "loss": 0.8611, "step": 2788 }, { "epoch": 0.14333436118820023, "grad_norm": 0.894476592540741, "learning_rate": 9.666996836218725e-06, "loss": 0.702, "step": 2789 }, { "epoch": 0.14338575393154487, "grad_norm": 1.144202709197998, "learning_rate": 9.666698126127034e-06, "loss": 0.821, "step": 2790 }, { "epoch": 0.1434371466748895, "grad_norm": 1.075032114982605, "learning_rate": 9.666399286740397e-06, "loss": 0.8255, "step": 2791 }, { "epoch": 0.14348853941823414, "grad_norm": 0.8268353939056396, "learning_rate": 9.666100318067093e-06, "loss": 0.7171, "step": 2792 }, { "epoch": 0.1435399321615788, "grad_norm": 1.151853084564209, "learning_rate": 9.665801220115405e-06, "loss": 0.8292, "step": 2793 }, { "epoch": 0.14359132490492343, "grad_norm": 1.117978811264038, "learning_rate": 9.66550199289362e-06, "loss": 0.8696, "step": 2794 }, { "epoch": 0.14364271764826805, "grad_norm": 1.0856496095657349, "learning_rate": 9.665202636410028e-06, "loss": 0.7751, "step": 2795 }, { "epoch": 0.1436941103916127, "grad_norm": 1.2467879056930542, "learning_rate": 9.664903150672922e-06, "loss": 0.7809, "step": 2796 }, { "epoch": 0.14374550313495735, "grad_norm": 0.7887930274009705, "learning_rate": 9.664603535690602e-06, "loss": 0.7006, "step": 2797 }, { "epoch": 0.143796895878302, "grad_norm": 1.1251088380813599, "learning_rate": 9.664303791471369e-06, "loss": 0.7756, "step": 2798 }, { "epoch": 0.14384828862164661, "grad_norm": 1.1136263608932495, "learning_rate": 9.664003918023523e-06, "loss": 0.8232, "step": 2799 }, { "epoch": 0.14389968136499126, "grad_norm": 1.254871129989624, "learning_rate": 9.663703915355379e-06, "loss": 0.7936, "step": 2800 }, { "epoch": 0.1439510741083359, "grad_norm": 1.244874119758606, "learning_rate": 9.663403783475242e-06, "loss": 0.8016, "step": 2801 }, { "epoch": 0.14400246685168056, "grad_norm": 1.1680164337158203, "learning_rate": 9.663103522391433e-06, "loss": 0.7817, "step": 2802 }, { "epoch": 0.14405385959502517, "grad_norm": 0.869336724281311, "learning_rate": 9.662803132112269e-06, "loss": 0.7005, "step": 2803 }, { "epoch": 0.14410525233836982, "grad_norm": 0.7678632736206055, "learning_rate": 9.662502612646073e-06, "loss": 0.7529, "step": 2804 }, { "epoch": 0.14415664508171447, "grad_norm": 1.0485777854919434, "learning_rate": 9.662201964001168e-06, "loss": 0.7963, "step": 2805 }, { "epoch": 0.14420803782505912, "grad_norm": 1.1833264827728271, "learning_rate": 9.66190118618589e-06, "loss": 0.8831, "step": 2806 }, { "epoch": 0.14425943056840373, "grad_norm": 1.1319341659545898, "learning_rate": 9.661600279208566e-06, "loss": 0.8706, "step": 2807 }, { "epoch": 0.14431082331174838, "grad_norm": 0.7123062610626221, "learning_rate": 9.661299243077535e-06, "loss": 0.679, "step": 2808 }, { "epoch": 0.14436221605509303, "grad_norm": 1.1030851602554321, "learning_rate": 9.66099807780114e-06, "loss": 0.7907, "step": 2809 }, { "epoch": 0.14441360879843765, "grad_norm": 1.1934466361999512, "learning_rate": 9.660696783387721e-06, "loss": 0.8396, "step": 2810 }, { "epoch": 0.1444650015417823, "grad_norm": 1.2162672281265259, "learning_rate": 9.66039535984563e-06, "loss": 0.7848, "step": 2811 }, { "epoch": 0.14451639428512694, "grad_norm": 1.2180185317993164, "learning_rate": 9.660093807183215e-06, "loss": 0.8073, "step": 2812 }, { "epoch": 0.1445677870284716, "grad_norm": 1.6599971055984497, "learning_rate": 9.659792125408832e-06, "loss": 0.8632, "step": 2813 }, { "epoch": 0.1446191797718162, "grad_norm": 1.1207119226455688, "learning_rate": 9.659490314530837e-06, "loss": 0.8681, "step": 2814 }, { "epoch": 0.14467057251516086, "grad_norm": 1.0741400718688965, "learning_rate": 9.659188374557596e-06, "loss": 0.8764, "step": 2815 }, { "epoch": 0.1447219652585055, "grad_norm": 1.111618995666504, "learning_rate": 9.658886305497472e-06, "loss": 0.7852, "step": 2816 }, { "epoch": 0.14477335800185015, "grad_norm": 0.8580141067504883, "learning_rate": 9.658584107358835e-06, "loss": 0.7013, "step": 2817 }, { "epoch": 0.14482475074519477, "grad_norm": 1.1124556064605713, "learning_rate": 9.658281780150057e-06, "loss": 0.7501, "step": 2818 }, { "epoch": 0.14487614348853942, "grad_norm": 0.8735184073448181, "learning_rate": 9.657979323879514e-06, "loss": 0.743, "step": 2819 }, { "epoch": 0.14492753623188406, "grad_norm": 0.7720156908035278, "learning_rate": 9.657676738555588e-06, "loss": 0.7585, "step": 2820 }, { "epoch": 0.1449789289752287, "grad_norm": 1.124157428741455, "learning_rate": 9.657374024186659e-06, "loss": 0.797, "step": 2821 }, { "epoch": 0.14503032171857333, "grad_norm": 1.1096609830856323, "learning_rate": 9.657071180781114e-06, "loss": 0.827, "step": 2822 }, { "epoch": 0.14508171446191798, "grad_norm": 0.9002783894538879, "learning_rate": 9.656768208347348e-06, "loss": 0.7156, "step": 2823 }, { "epoch": 0.14513310720526262, "grad_norm": 0.8159273266792297, "learning_rate": 9.656465106893751e-06, "loss": 0.7424, "step": 2824 }, { "epoch": 0.14518449994860724, "grad_norm": 0.9551854729652405, "learning_rate": 9.656161876428722e-06, "loss": 0.7377, "step": 2825 }, { "epoch": 0.1452358926919519, "grad_norm": 1.2128301858901978, "learning_rate": 9.655858516960664e-06, "loss": 0.8025, "step": 2826 }, { "epoch": 0.14528728543529654, "grad_norm": 1.1419256925582886, "learning_rate": 9.655555028497978e-06, "loss": 0.7618, "step": 2827 }, { "epoch": 0.14533867817864118, "grad_norm": 1.1777626276016235, "learning_rate": 9.655251411049075e-06, "loss": 0.8568, "step": 2828 }, { "epoch": 0.1453900709219858, "grad_norm": 0.789156973361969, "learning_rate": 9.654947664622367e-06, "loss": 0.7059, "step": 2829 }, { "epoch": 0.14544146366533045, "grad_norm": 0.7554160952568054, "learning_rate": 9.654643789226267e-06, "loss": 0.752, "step": 2830 }, { "epoch": 0.1454928564086751, "grad_norm": 0.7973955273628235, "learning_rate": 9.6543397848692e-06, "loss": 0.7005, "step": 2831 }, { "epoch": 0.14554424915201974, "grad_norm": 1.152632236480713, "learning_rate": 9.654035651559583e-06, "loss": 0.8085, "step": 2832 }, { "epoch": 0.14559564189536436, "grad_norm": 1.182613730430603, "learning_rate": 9.653731389305843e-06, "loss": 0.8355, "step": 2833 }, { "epoch": 0.145647034638709, "grad_norm": 1.088126540184021, "learning_rate": 9.653426998116412e-06, "loss": 0.7957, "step": 2834 }, { "epoch": 0.14569842738205366, "grad_norm": 1.1100579500198364, "learning_rate": 9.653122477999724e-06, "loss": 0.8552, "step": 2835 }, { "epoch": 0.1457498201253983, "grad_norm": 1.121160626411438, "learning_rate": 9.652817828964212e-06, "loss": 0.7724, "step": 2836 }, { "epoch": 0.14580121286874292, "grad_norm": 1.111227035522461, "learning_rate": 9.652513051018319e-06, "loss": 0.7403, "step": 2837 }, { "epoch": 0.14585260561208757, "grad_norm": 1.0959700345993042, "learning_rate": 9.65220814417049e-06, "loss": 0.8049, "step": 2838 }, { "epoch": 0.14590399835543222, "grad_norm": 1.1258735656738281, "learning_rate": 9.651903108429172e-06, "loss": 0.7874, "step": 2839 }, { "epoch": 0.14595539109877687, "grad_norm": 1.0866092443466187, "learning_rate": 9.651597943802817e-06, "loss": 0.8416, "step": 2840 }, { "epoch": 0.14600678384212148, "grad_norm": 0.8451864719390869, "learning_rate": 9.651292650299877e-06, "loss": 0.7084, "step": 2841 }, { "epoch": 0.14605817658546613, "grad_norm": 1.16310453414917, "learning_rate": 9.650987227928815e-06, "loss": 0.8453, "step": 2842 }, { "epoch": 0.14610956932881078, "grad_norm": 1.3342370986938477, "learning_rate": 9.650681676698088e-06, "loss": 0.7605, "step": 2843 }, { "epoch": 0.1461609620721554, "grad_norm": 1.4599759578704834, "learning_rate": 9.650375996616164e-06, "loss": 0.8379, "step": 2844 }, { "epoch": 0.14621235481550005, "grad_norm": 1.0545005798339844, "learning_rate": 9.650070187691514e-06, "loss": 0.8314, "step": 2845 }, { "epoch": 0.1462637475588447, "grad_norm": 1.0731251239776611, "learning_rate": 9.649764249932608e-06, "loss": 0.839, "step": 2846 }, { "epoch": 0.14631514030218934, "grad_norm": 1.3592246770858765, "learning_rate": 9.649458183347921e-06, "loss": 0.7921, "step": 2847 }, { "epoch": 0.14636653304553396, "grad_norm": 1.1174054145812988, "learning_rate": 9.649151987945938e-06, "loss": 0.8272, "step": 2848 }, { "epoch": 0.1464179257888786, "grad_norm": 1.0916825532913208, "learning_rate": 9.648845663735137e-06, "loss": 0.752, "step": 2849 }, { "epoch": 0.14646931853222325, "grad_norm": 1.0872632265090942, "learning_rate": 9.648539210724009e-06, "loss": 0.8718, "step": 2850 }, { "epoch": 0.1465207112755679, "grad_norm": 1.1224303245544434, "learning_rate": 9.648232628921041e-06, "loss": 0.7933, "step": 2851 }, { "epoch": 0.14657210401891252, "grad_norm": 1.1784203052520752, "learning_rate": 9.64792591833473e-06, "loss": 0.8249, "step": 2852 }, { "epoch": 0.14662349676225717, "grad_norm": 1.1208337545394897, "learning_rate": 9.647619078973573e-06, "loss": 0.8253, "step": 2853 }, { "epoch": 0.1466748895056018, "grad_norm": 1.0951341390609741, "learning_rate": 9.647312110846072e-06, "loss": 0.8206, "step": 2854 }, { "epoch": 0.14672628224894646, "grad_norm": 0.9326683282852173, "learning_rate": 9.647005013960729e-06, "loss": 0.7517, "step": 2855 }, { "epoch": 0.14677767499229108, "grad_norm": 1.1209354400634766, "learning_rate": 9.646697788326053e-06, "loss": 0.7804, "step": 2856 }, { "epoch": 0.14682906773563573, "grad_norm": 1.1009749174118042, "learning_rate": 9.64639043395056e-06, "loss": 0.7645, "step": 2857 }, { "epoch": 0.14688046047898037, "grad_norm": 0.6894964575767517, "learning_rate": 9.64608295084276e-06, "loss": 0.6925, "step": 2858 }, { "epoch": 0.14693185322232502, "grad_norm": 1.1445415019989014, "learning_rate": 9.645775339011178e-06, "loss": 0.8574, "step": 2859 }, { "epoch": 0.14698324596566964, "grad_norm": 1.1482607126235962, "learning_rate": 9.645467598464331e-06, "loss": 0.8881, "step": 2860 }, { "epoch": 0.1470346387090143, "grad_norm": 0.7924277782440186, "learning_rate": 9.645159729210747e-06, "loss": 0.7235, "step": 2861 }, { "epoch": 0.14708603145235893, "grad_norm": 1.0638656616210938, "learning_rate": 9.644851731258957e-06, "loss": 0.7991, "step": 2862 }, { "epoch": 0.14713742419570355, "grad_norm": 1.1228902339935303, "learning_rate": 9.644543604617495e-06, "loss": 0.8015, "step": 2863 }, { "epoch": 0.1471888169390482, "grad_norm": 1.2089815139770508, "learning_rate": 9.644235349294896e-06, "loss": 0.8241, "step": 2864 }, { "epoch": 0.14724020968239285, "grad_norm": 1.0557582378387451, "learning_rate": 9.6439269652997e-06, "loss": 0.8076, "step": 2865 }, { "epoch": 0.1472916024257375, "grad_norm": 1.1455074548721313, "learning_rate": 9.643618452640453e-06, "loss": 0.8003, "step": 2866 }, { "epoch": 0.1473429951690821, "grad_norm": 1.1890294551849365, "learning_rate": 9.6433098113257e-06, "loss": 0.8004, "step": 2867 }, { "epoch": 0.14739438791242676, "grad_norm": 1.0783731937408447, "learning_rate": 9.643001041363997e-06, "loss": 0.7807, "step": 2868 }, { "epoch": 0.1474457806557714, "grad_norm": 1.0693098306655884, "learning_rate": 9.642692142763893e-06, "loss": 0.7815, "step": 2869 }, { "epoch": 0.14749717339911605, "grad_norm": 2.2871763706207275, "learning_rate": 9.64238311553395e-06, "loss": 0.8495, "step": 2870 }, { "epoch": 0.14754856614246067, "grad_norm": 1.0887948274612427, "learning_rate": 9.64207395968273e-06, "loss": 0.7897, "step": 2871 }, { "epoch": 0.14759995888580532, "grad_norm": 1.0784099102020264, "learning_rate": 9.641764675218795e-06, "loss": 0.8009, "step": 2872 }, { "epoch": 0.14765135162914997, "grad_norm": 1.1485944986343384, "learning_rate": 9.641455262150718e-06, "loss": 0.8332, "step": 2873 }, { "epoch": 0.14770274437249462, "grad_norm": 1.1570560932159424, "learning_rate": 9.64114572048707e-06, "loss": 0.8024, "step": 2874 }, { "epoch": 0.14775413711583923, "grad_norm": 1.1131924390792847, "learning_rate": 9.640836050236426e-06, "loss": 0.7483, "step": 2875 }, { "epoch": 0.14780552985918388, "grad_norm": 1.2897237539291382, "learning_rate": 9.640526251407368e-06, "loss": 0.7805, "step": 2876 }, { "epoch": 0.14785692260252853, "grad_norm": 1.109590768814087, "learning_rate": 9.640216324008476e-06, "loss": 0.8036, "step": 2877 }, { "epoch": 0.14790831534587318, "grad_norm": 1.0632981061935425, "learning_rate": 9.639906268048339e-06, "loss": 0.7816, "step": 2878 }, { "epoch": 0.1479597080892178, "grad_norm": 0.9479115009307861, "learning_rate": 9.639596083535547e-06, "loss": 0.7536, "step": 2879 }, { "epoch": 0.14801110083256244, "grad_norm": 1.1946831941604614, "learning_rate": 9.639285770478696e-06, "loss": 0.8872, "step": 2880 }, { "epoch": 0.1480624935759071, "grad_norm": 1.0937085151672363, "learning_rate": 9.63897532888638e-06, "loss": 0.7513, "step": 2881 }, { "epoch": 0.1481138863192517, "grad_norm": 1.1250693798065186, "learning_rate": 9.638664758767203e-06, "loss": 0.8877, "step": 2882 }, { "epoch": 0.14816527906259636, "grad_norm": 1.1539740562438965, "learning_rate": 9.638354060129766e-06, "loss": 0.8129, "step": 2883 }, { "epoch": 0.148216671805941, "grad_norm": 0.8243728280067444, "learning_rate": 9.63804323298268e-06, "loss": 0.7646, "step": 2884 }, { "epoch": 0.14826806454928565, "grad_norm": 0.8005802035331726, "learning_rate": 9.637732277334555e-06, "loss": 0.6997, "step": 2885 }, { "epoch": 0.14831945729263027, "grad_norm": 1.1559743881225586, "learning_rate": 9.63742119319401e-06, "loss": 0.8377, "step": 2886 }, { "epoch": 0.14837085003597492, "grad_norm": 1.1165852546691895, "learning_rate": 9.637109980569659e-06, "loss": 0.8116, "step": 2887 }, { "epoch": 0.14842224277931956, "grad_norm": 2.090949296951294, "learning_rate": 9.63679863947013e-06, "loss": 0.7226, "step": 2888 }, { "epoch": 0.1484736355226642, "grad_norm": 1.0865733623504639, "learning_rate": 9.636487169904041e-06, "loss": 0.8042, "step": 2889 }, { "epoch": 0.14852502826600883, "grad_norm": 1.357225775718689, "learning_rate": 9.636175571880028e-06, "loss": 0.8223, "step": 2890 }, { "epoch": 0.14857642100935348, "grad_norm": 1.160138726234436, "learning_rate": 9.635863845406723e-06, "loss": 0.8265, "step": 2891 }, { "epoch": 0.14862781375269812, "grad_norm": 1.0842944383621216, "learning_rate": 9.635551990492762e-06, "loss": 0.8019, "step": 2892 }, { "epoch": 0.14867920649604277, "grad_norm": 1.135393738746643, "learning_rate": 9.635240007146785e-06, "loss": 0.7571, "step": 2893 }, { "epoch": 0.1487305992393874, "grad_norm": 1.138669729232788, "learning_rate": 9.634927895377434e-06, "loss": 0.8391, "step": 2894 }, { "epoch": 0.14878199198273204, "grad_norm": 1.0314048528671265, "learning_rate": 9.634615655193363e-06, "loss": 0.7413, "step": 2895 }, { "epoch": 0.14883338472607668, "grad_norm": 1.0841628313064575, "learning_rate": 9.634303286603213e-06, "loss": 0.8175, "step": 2896 }, { "epoch": 0.14888477746942133, "grad_norm": 1.4417554140090942, "learning_rate": 9.633990789615646e-06, "loss": 0.786, "step": 2897 }, { "epoch": 0.14893617021276595, "grad_norm": 1.5736850500106812, "learning_rate": 9.633678164239318e-06, "loss": 0.8303, "step": 2898 }, { "epoch": 0.1489875629561106, "grad_norm": 1.1179144382476807, "learning_rate": 9.633365410482889e-06, "loss": 0.8605, "step": 2899 }, { "epoch": 0.14903895569945524, "grad_norm": 1.1698509454727173, "learning_rate": 9.633052528355025e-06, "loss": 0.7788, "step": 2900 }, { "epoch": 0.14909034844279986, "grad_norm": 1.2293646335601807, "learning_rate": 9.632739517864396e-06, "loss": 0.7936, "step": 2901 }, { "epoch": 0.1491417411861445, "grad_norm": 1.1333012580871582, "learning_rate": 9.632426379019672e-06, "loss": 0.824, "step": 2902 }, { "epoch": 0.14919313392948916, "grad_norm": 1.195748209953308, "learning_rate": 9.632113111829531e-06, "loss": 0.8131, "step": 2903 }, { "epoch": 0.1492445266728338, "grad_norm": 1.111189365386963, "learning_rate": 9.63179971630265e-06, "loss": 0.7834, "step": 2904 }, { "epoch": 0.14929591941617842, "grad_norm": 1.1388803720474243, "learning_rate": 9.631486192447714e-06, "loss": 0.8012, "step": 2905 }, { "epoch": 0.14934731215952307, "grad_norm": 1.2618441581726074, "learning_rate": 9.63117254027341e-06, "loss": 0.7955, "step": 2906 }, { "epoch": 0.14939870490286772, "grad_norm": 1.4290024042129517, "learning_rate": 9.630858759788425e-06, "loss": 0.8325, "step": 2907 }, { "epoch": 0.14945009764621237, "grad_norm": 1.197076678276062, "learning_rate": 9.630544851001453e-06, "loss": 0.8305, "step": 2908 }, { "epoch": 0.14950149038955698, "grad_norm": 1.087079644203186, "learning_rate": 9.630230813921194e-06, "loss": 0.7274, "step": 2909 }, { "epoch": 0.14955288313290163, "grad_norm": 1.129672646522522, "learning_rate": 9.629916648556347e-06, "loss": 0.7828, "step": 2910 }, { "epoch": 0.14960427587624628, "grad_norm": 1.167987585067749, "learning_rate": 9.629602354915616e-06, "loss": 0.8351, "step": 2911 }, { "epoch": 0.14965566861959093, "grad_norm": 0.9685371518135071, "learning_rate": 9.62928793300771e-06, "loss": 0.7573, "step": 2912 }, { "epoch": 0.14970706136293555, "grad_norm": 0.8648548126220703, "learning_rate": 9.628973382841338e-06, "loss": 0.72, "step": 2913 }, { "epoch": 0.1497584541062802, "grad_norm": 1.0924067497253418, "learning_rate": 9.628658704425215e-06, "loss": 0.8289, "step": 2914 }, { "epoch": 0.14980984684962484, "grad_norm": 1.190320372581482, "learning_rate": 9.628343897768062e-06, "loss": 0.8264, "step": 2915 }, { "epoch": 0.1498612395929695, "grad_norm": 1.1206905841827393, "learning_rate": 9.6280289628786e-06, "loss": 0.8305, "step": 2916 }, { "epoch": 0.1499126323363141, "grad_norm": 1.1106091737747192, "learning_rate": 9.627713899765554e-06, "loss": 0.8469, "step": 2917 }, { "epoch": 0.14996402507965875, "grad_norm": 1.0479516983032227, "learning_rate": 9.627398708437654e-06, "loss": 0.7908, "step": 2918 }, { "epoch": 0.1500154178230034, "grad_norm": 1.098765254020691, "learning_rate": 9.62708338890363e-06, "loss": 0.7931, "step": 2919 }, { "epoch": 0.15006681056634802, "grad_norm": 1.0796483755111694, "learning_rate": 9.626767941172222e-06, "loss": 0.8246, "step": 2920 }, { "epoch": 0.15011820330969267, "grad_norm": 1.1554334163665771, "learning_rate": 9.626452365252168e-06, "loss": 0.8163, "step": 2921 }, { "epoch": 0.1501695960530373, "grad_norm": 1.1507536172866821, "learning_rate": 9.62613666115221e-06, "loss": 0.8234, "step": 2922 }, { "epoch": 0.15022098879638196, "grad_norm": 1.1683820486068726, "learning_rate": 9.625820828881096e-06, "loss": 0.7735, "step": 2923 }, { "epoch": 0.15027238153972658, "grad_norm": 1.120169997215271, "learning_rate": 9.625504868447577e-06, "loss": 0.834, "step": 2924 }, { "epoch": 0.15032377428307123, "grad_norm": 1.137853980064392, "learning_rate": 9.625188779860407e-06, "loss": 0.7956, "step": 2925 }, { "epoch": 0.15037516702641587, "grad_norm": 1.113329291343689, "learning_rate": 9.624872563128342e-06, "loss": 0.7993, "step": 2926 }, { "epoch": 0.15042655976976052, "grad_norm": 0.8806728720664978, "learning_rate": 9.624556218260144e-06, "loss": 0.6828, "step": 2927 }, { "epoch": 0.15047795251310514, "grad_norm": 1.130362868309021, "learning_rate": 9.624239745264578e-06, "loss": 0.7646, "step": 2928 }, { "epoch": 0.1505293452564498, "grad_norm": 0.9131379723548889, "learning_rate": 9.62392314415041e-06, "loss": 0.7036, "step": 2929 }, { "epoch": 0.15058073799979443, "grad_norm": 1.1984407901763916, "learning_rate": 9.623606414926416e-06, "loss": 0.8113, "step": 2930 }, { "epoch": 0.15063213074313908, "grad_norm": 1.1617956161499023, "learning_rate": 9.623289557601369e-06, "loss": 0.7972, "step": 2931 }, { "epoch": 0.1506835234864837, "grad_norm": 1.1492894887924194, "learning_rate": 9.622972572184047e-06, "loss": 0.8038, "step": 2932 }, { "epoch": 0.15073491622982835, "grad_norm": 1.0828661918640137, "learning_rate": 9.62265545868323e-06, "loss": 0.8241, "step": 2933 }, { "epoch": 0.150786308973173, "grad_norm": 0.8112282752990723, "learning_rate": 9.62233821710771e-06, "loss": 0.6988, "step": 2934 }, { "epoch": 0.15083770171651764, "grad_norm": 1.1404075622558594, "learning_rate": 9.622020847466274e-06, "loss": 0.8171, "step": 2935 }, { "epoch": 0.15088909445986226, "grad_norm": 0.8881917595863342, "learning_rate": 9.621703349767712e-06, "loss": 0.7569, "step": 2936 }, { "epoch": 0.1509404872032069, "grad_norm": 1.2118170261383057, "learning_rate": 9.621385724020824e-06, "loss": 0.9396, "step": 2937 }, { "epoch": 0.15099187994655155, "grad_norm": 1.148276448249817, "learning_rate": 9.621067970234408e-06, "loss": 0.835, "step": 2938 }, { "epoch": 0.15104327268989617, "grad_norm": 1.1282833814620972, "learning_rate": 9.620750088417268e-06, "loss": 0.8175, "step": 2939 }, { "epoch": 0.15109466543324082, "grad_norm": 1.109713077545166, "learning_rate": 9.620432078578213e-06, "loss": 0.7927, "step": 2940 }, { "epoch": 0.15114605817658547, "grad_norm": 0.7900930643081665, "learning_rate": 9.620113940726052e-06, "loss": 0.7464, "step": 2941 }, { "epoch": 0.15119745091993012, "grad_norm": 1.152547001838684, "learning_rate": 9.619795674869601e-06, "loss": 0.7936, "step": 2942 }, { "epoch": 0.15124884366327473, "grad_norm": 1.09788978099823, "learning_rate": 9.619477281017675e-06, "loss": 0.8573, "step": 2943 }, { "epoch": 0.15130023640661938, "grad_norm": 1.0922348499298096, "learning_rate": 9.619158759179098e-06, "loss": 0.8245, "step": 2944 }, { "epoch": 0.15135162914996403, "grad_norm": 1.1092678308486938, "learning_rate": 9.618840109362693e-06, "loss": 0.7579, "step": 2945 }, { "epoch": 0.15140302189330868, "grad_norm": 1.141142725944519, "learning_rate": 9.61852133157729e-06, "loss": 0.8134, "step": 2946 }, { "epoch": 0.1514544146366533, "grad_norm": 0.8310251235961914, "learning_rate": 9.61820242583172e-06, "loss": 0.7005, "step": 2947 }, { "epoch": 0.15150580737999794, "grad_norm": 0.7588998675346375, "learning_rate": 9.61788339213482e-06, "loss": 0.7357, "step": 2948 }, { "epoch": 0.1515572001233426, "grad_norm": 1.4933528900146484, "learning_rate": 9.617564230495428e-06, "loss": 0.8218, "step": 2949 }, { "epoch": 0.15160859286668724, "grad_norm": 0.803726315498352, "learning_rate": 9.617244940922386e-06, "loss": 0.6845, "step": 2950 }, { "epoch": 0.15165998561003186, "grad_norm": 1.274340271949768, "learning_rate": 9.616925523424541e-06, "loss": 0.9059, "step": 2951 }, { "epoch": 0.1517113783533765, "grad_norm": 1.2259368896484375, "learning_rate": 9.616605978010742e-06, "loss": 0.7815, "step": 2952 }, { "epoch": 0.15176277109672115, "grad_norm": 0.7884451746940613, "learning_rate": 9.616286304689843e-06, "loss": 0.7158, "step": 2953 }, { "epoch": 0.15181416384006577, "grad_norm": 1.1206575632095337, "learning_rate": 9.615966503470703e-06, "loss": 0.8044, "step": 2954 }, { "epoch": 0.15186555658341042, "grad_norm": 1.2555440664291382, "learning_rate": 9.615646574362178e-06, "loss": 0.8495, "step": 2955 }, { "epoch": 0.15191694932675506, "grad_norm": 1.056719422340393, "learning_rate": 9.615326517373135e-06, "loss": 0.8072, "step": 2956 }, { "epoch": 0.1519683420700997, "grad_norm": 0.8035044074058533, "learning_rate": 9.61500633251244e-06, "loss": 0.7283, "step": 2957 }, { "epoch": 0.15201973481344433, "grad_norm": 1.2515276670455933, "learning_rate": 9.614686019788966e-06, "loss": 0.8739, "step": 2958 }, { "epoch": 0.15207112755678898, "grad_norm": 1.1463450193405151, "learning_rate": 9.614365579211585e-06, "loss": 0.8513, "step": 2959 }, { "epoch": 0.15212252030013362, "grad_norm": 1.116761565208435, "learning_rate": 9.614045010789175e-06, "loss": 0.8273, "step": 2960 }, { "epoch": 0.15217391304347827, "grad_norm": 1.1044739484786987, "learning_rate": 9.613724314530621e-06, "loss": 0.7587, "step": 2961 }, { "epoch": 0.1522253057868229, "grad_norm": 1.0965312719345093, "learning_rate": 9.613403490444804e-06, "loss": 0.7869, "step": 2962 }, { "epoch": 0.15227669853016754, "grad_norm": 1.139046549797058, "learning_rate": 9.613082538540614e-06, "loss": 0.7942, "step": 2963 }, { "epoch": 0.15232809127351218, "grad_norm": 0.8312227129936218, "learning_rate": 9.612761458826947e-06, "loss": 0.7246, "step": 2964 }, { "epoch": 0.15237948401685683, "grad_norm": 1.1537731885910034, "learning_rate": 9.612440251312694e-06, "loss": 0.807, "step": 2965 }, { "epoch": 0.15243087676020145, "grad_norm": 1.0862990617752075, "learning_rate": 9.612118916006757e-06, "loss": 0.7842, "step": 2966 }, { "epoch": 0.1524822695035461, "grad_norm": 1.2529340982437134, "learning_rate": 9.611797452918038e-06, "loss": 0.7649, "step": 2967 }, { "epoch": 0.15253366224689074, "grad_norm": 0.8279370069503784, "learning_rate": 9.61147586205544e-06, "loss": 0.7283, "step": 2968 }, { "epoch": 0.1525850549902354, "grad_norm": 0.8124724626541138, "learning_rate": 9.611154143427878e-06, "loss": 0.7181, "step": 2969 }, { "epoch": 0.15263644773358, "grad_norm": 1.1039282083511353, "learning_rate": 9.610832297044265e-06, "loss": 0.8086, "step": 2970 }, { "epoch": 0.15268784047692466, "grad_norm": 1.0751821994781494, "learning_rate": 9.610510322913518e-06, "loss": 0.7618, "step": 2971 }, { "epoch": 0.1527392332202693, "grad_norm": 1.1291072368621826, "learning_rate": 9.610188221044554e-06, "loss": 0.8286, "step": 2972 }, { "epoch": 0.15279062596361392, "grad_norm": 1.1156890392303467, "learning_rate": 9.609865991446302e-06, "loss": 0.7895, "step": 2973 }, { "epoch": 0.15284201870695857, "grad_norm": 1.1015522480010986, "learning_rate": 9.609543634127683e-06, "loss": 0.8147, "step": 2974 }, { "epoch": 0.15289341145030322, "grad_norm": 1.1526223421096802, "learning_rate": 9.609221149097636e-06, "loss": 0.8246, "step": 2975 }, { "epoch": 0.15294480419364787, "grad_norm": 1.1723006963729858, "learning_rate": 9.60889853636509e-06, "loss": 0.7787, "step": 2976 }, { "epoch": 0.15299619693699248, "grad_norm": 1.1209646463394165, "learning_rate": 9.608575795938986e-06, "loss": 0.7952, "step": 2977 }, { "epoch": 0.15304758968033713, "grad_norm": 1.1622693538665771, "learning_rate": 9.608252927828267e-06, "loss": 0.8188, "step": 2978 }, { "epoch": 0.15309898242368178, "grad_norm": 1.3454382419586182, "learning_rate": 9.607929932041874e-06, "loss": 0.8023, "step": 2979 }, { "epoch": 0.15315037516702643, "grad_norm": 1.1904152631759644, "learning_rate": 9.60760680858876e-06, "loss": 0.7886, "step": 2980 }, { "epoch": 0.15320176791037105, "grad_norm": 1.2229959964752197, "learning_rate": 9.607283557477876e-06, "loss": 0.8834, "step": 2981 }, { "epoch": 0.1532531606537157, "grad_norm": 0.9498704671859741, "learning_rate": 9.606960178718175e-06, "loss": 0.7274, "step": 2982 }, { "epoch": 0.15330455339706034, "grad_norm": 0.8235172629356384, "learning_rate": 9.606636672318623e-06, "loss": 0.756, "step": 2983 }, { "epoch": 0.153355946140405, "grad_norm": 1.0444530248641968, "learning_rate": 9.606313038288177e-06, "loss": 0.7972, "step": 2984 }, { "epoch": 0.1534073388837496, "grad_norm": 1.1400405168533325, "learning_rate": 9.605989276635805e-06, "loss": 0.8118, "step": 2985 }, { "epoch": 0.15345873162709425, "grad_norm": 1.0790746212005615, "learning_rate": 9.60566538737048e-06, "loss": 0.7795, "step": 2986 }, { "epoch": 0.1535101243704389, "grad_norm": 0.9387630224227905, "learning_rate": 9.605341370501173e-06, "loss": 0.7594, "step": 2987 }, { "epoch": 0.15356151711378355, "grad_norm": 1.158873438835144, "learning_rate": 9.605017226036861e-06, "loss": 0.81, "step": 2988 }, { "epoch": 0.15361290985712817, "grad_norm": 1.1313420534133911, "learning_rate": 9.604692953986526e-06, "loss": 0.8594, "step": 2989 }, { "epoch": 0.1536643026004728, "grad_norm": 1.0720127820968628, "learning_rate": 9.604368554359152e-06, "loss": 0.791, "step": 2990 }, { "epoch": 0.15371569534381746, "grad_norm": 1.1312565803527832, "learning_rate": 9.604044027163725e-06, "loss": 0.795, "step": 2991 }, { "epoch": 0.15376708808716208, "grad_norm": 1.133973479270935, "learning_rate": 9.60371937240924e-06, "loss": 0.8124, "step": 2992 }, { "epoch": 0.15381848083050673, "grad_norm": 0.7848932147026062, "learning_rate": 9.603394590104689e-06, "loss": 0.7201, "step": 2993 }, { "epoch": 0.15386987357385137, "grad_norm": 0.9597364068031311, "learning_rate": 9.60306968025907e-06, "loss": 0.7226, "step": 2994 }, { "epoch": 0.15392126631719602, "grad_norm": 1.1648770570755005, "learning_rate": 9.602744642881387e-06, "loss": 0.8061, "step": 2995 }, { "epoch": 0.15397265906054064, "grad_norm": 0.7717452049255371, "learning_rate": 9.602419477980643e-06, "loss": 0.7146, "step": 2996 }, { "epoch": 0.1540240518038853, "grad_norm": 1.1483856439590454, "learning_rate": 9.60209418556585e-06, "loss": 0.8003, "step": 2997 }, { "epoch": 0.15407544454722993, "grad_norm": 0.9341374635696411, "learning_rate": 9.601768765646018e-06, "loss": 0.6776, "step": 2998 }, { "epoch": 0.15412683729057458, "grad_norm": 0.8843017816543579, "learning_rate": 9.601443218230163e-06, "loss": 0.746, "step": 2999 }, { "epoch": 0.1541782300339192, "grad_norm": 1.2591841220855713, "learning_rate": 9.601117543327305e-06, "loss": 0.7891, "step": 3000 }, { "epoch": 0.15422962277726385, "grad_norm": 1.1563690900802612, "learning_rate": 9.60079174094647e-06, "loss": 0.814, "step": 3001 }, { "epoch": 0.1542810155206085, "grad_norm": 1.1140031814575195, "learning_rate": 9.600465811096682e-06, "loss": 0.8519, "step": 3002 }, { "epoch": 0.15433240826395314, "grad_norm": 1.2970277070999146, "learning_rate": 9.60013975378697e-06, "loss": 0.7649, "step": 3003 }, { "epoch": 0.15438380100729776, "grad_norm": 1.7794508934020996, "learning_rate": 9.59981356902637e-06, "loss": 0.8601, "step": 3004 }, { "epoch": 0.1544351937506424, "grad_norm": 0.9201565980911255, "learning_rate": 9.599487256823918e-06, "loss": 0.726, "step": 3005 }, { "epoch": 0.15448658649398705, "grad_norm": 1.1056443452835083, "learning_rate": 9.599160817188655e-06, "loss": 0.7554, "step": 3006 }, { "epoch": 0.1545379792373317, "grad_norm": 1.1170471906661987, "learning_rate": 9.598834250129626e-06, "loss": 0.8006, "step": 3007 }, { "epoch": 0.15458937198067632, "grad_norm": 1.1487152576446533, "learning_rate": 9.598507555655875e-06, "loss": 0.8502, "step": 3008 }, { "epoch": 0.15464076472402097, "grad_norm": 1.1027685403823853, "learning_rate": 9.59818073377646e-06, "loss": 0.861, "step": 3009 }, { "epoch": 0.15469215746736562, "grad_norm": 1.0589098930358887, "learning_rate": 9.59785378450043e-06, "loss": 0.791, "step": 3010 }, { "epoch": 0.15474355021071023, "grad_norm": 1.0861899852752686, "learning_rate": 9.597526707836849e-06, "loss": 0.822, "step": 3011 }, { "epoch": 0.15479494295405488, "grad_norm": 1.5645182132720947, "learning_rate": 9.597199503794774e-06, "loss": 0.7781, "step": 3012 }, { "epoch": 0.15484633569739953, "grad_norm": 1.1648106575012207, "learning_rate": 9.596872172383273e-06, "loss": 0.817, "step": 3013 }, { "epoch": 0.15489772844074418, "grad_norm": 1.131691575050354, "learning_rate": 9.596544713611413e-06, "loss": 0.8319, "step": 3014 }, { "epoch": 0.1549491211840888, "grad_norm": 1.2071539163589478, "learning_rate": 9.596217127488268e-06, "loss": 0.8823, "step": 3015 }, { "epoch": 0.15500051392743344, "grad_norm": 0.9304954409599304, "learning_rate": 9.595889414022914e-06, "loss": 0.7584, "step": 3016 }, { "epoch": 0.1550519066707781, "grad_norm": 1.0601961612701416, "learning_rate": 9.595561573224433e-06, "loss": 0.7422, "step": 3017 }, { "epoch": 0.15510329941412274, "grad_norm": 1.0696712732315063, "learning_rate": 9.5952336051019e-06, "loss": 0.8429, "step": 3018 }, { "epoch": 0.15515469215746736, "grad_norm": 0.7522971630096436, "learning_rate": 9.594905509664412e-06, "loss": 0.7537, "step": 3019 }, { "epoch": 0.155206084900812, "grad_norm": 0.8931046724319458, "learning_rate": 9.594577286921054e-06, "loss": 0.7511, "step": 3020 }, { "epoch": 0.15525747764415665, "grad_norm": 1.1393920183181763, "learning_rate": 9.594248936880919e-06, "loss": 0.7462, "step": 3021 }, { "epoch": 0.1553088703875013, "grad_norm": 1.0803031921386719, "learning_rate": 9.593920459553105e-06, "loss": 0.8448, "step": 3022 }, { "epoch": 0.15536026313084592, "grad_norm": 1.1737204790115356, "learning_rate": 9.593591854946713e-06, "loss": 0.7248, "step": 3023 }, { "epoch": 0.15541165587419056, "grad_norm": 1.0927925109863281, "learning_rate": 9.593263123070849e-06, "loss": 0.7274, "step": 3024 }, { "epoch": 0.1554630486175352, "grad_norm": 1.1205379962921143, "learning_rate": 9.592934263934617e-06, "loss": 0.7834, "step": 3025 }, { "epoch": 0.15551444136087986, "grad_norm": 1.0936018228530884, "learning_rate": 9.59260527754713e-06, "loss": 0.8014, "step": 3026 }, { "epoch": 0.15556583410422448, "grad_norm": 1.1625615358352661, "learning_rate": 9.592276163917507e-06, "loss": 0.7853, "step": 3027 }, { "epoch": 0.15561722684756912, "grad_norm": 1.1285988092422485, "learning_rate": 9.59194692305486e-06, "loss": 0.8173, "step": 3028 }, { "epoch": 0.15566861959091377, "grad_norm": 1.2074480056762695, "learning_rate": 9.591617554968313e-06, "loss": 0.8081, "step": 3029 }, { "epoch": 0.1557200123342584, "grad_norm": 1.0644116401672363, "learning_rate": 9.591288059666993e-06, "loss": 0.7958, "step": 3030 }, { "epoch": 0.15577140507760304, "grad_norm": 1.1561336517333984, "learning_rate": 9.590958437160028e-06, "loss": 0.8117, "step": 3031 }, { "epoch": 0.15582279782094768, "grad_norm": 1.2603542804718018, "learning_rate": 9.59062868745655e-06, "loss": 0.7798, "step": 3032 }, { "epoch": 0.15587419056429233, "grad_norm": 1.1251877546310425, "learning_rate": 9.590298810565697e-06, "loss": 0.8199, "step": 3033 }, { "epoch": 0.15592558330763695, "grad_norm": 1.1156567335128784, "learning_rate": 9.589968806496605e-06, "loss": 0.7859, "step": 3034 }, { "epoch": 0.1559769760509816, "grad_norm": 1.1767079830169678, "learning_rate": 9.58963867525842e-06, "loss": 0.8298, "step": 3035 }, { "epoch": 0.15602836879432624, "grad_norm": 1.0802053213119507, "learning_rate": 9.589308416860287e-06, "loss": 0.7945, "step": 3036 }, { "epoch": 0.1560797615376709, "grad_norm": 1.1257559061050415, "learning_rate": 9.58897803131136e-06, "loss": 0.8153, "step": 3037 }, { "epoch": 0.1561311542810155, "grad_norm": 0.9175286889076233, "learning_rate": 9.588647518620786e-06, "loss": 0.771, "step": 3038 }, { "epoch": 0.15618254702436016, "grad_norm": 1.1217080354690552, "learning_rate": 9.588316878797726e-06, "loss": 0.8253, "step": 3039 }, { "epoch": 0.1562339397677048, "grad_norm": 0.7711915969848633, "learning_rate": 9.58798611185134e-06, "loss": 0.7206, "step": 3040 }, { "epoch": 0.15628533251104945, "grad_norm": 1.1445866823196411, "learning_rate": 9.587655217790792e-06, "loss": 0.8226, "step": 3041 }, { "epoch": 0.15633672525439407, "grad_norm": 1.1377453804016113, "learning_rate": 9.587324196625252e-06, "loss": 0.8379, "step": 3042 }, { "epoch": 0.15638811799773872, "grad_norm": 0.8878036141395569, "learning_rate": 9.586993048363888e-06, "loss": 0.6898, "step": 3043 }, { "epoch": 0.15643951074108337, "grad_norm": 1.3285459280014038, "learning_rate": 9.586661773015875e-06, "loss": 0.8733, "step": 3044 }, { "epoch": 0.156490903484428, "grad_norm": 0.8551929593086243, "learning_rate": 9.586330370590393e-06, "loss": 0.7343, "step": 3045 }, { "epoch": 0.15654229622777263, "grad_norm": 1.0831973552703857, "learning_rate": 9.585998841096624e-06, "loss": 0.7861, "step": 3046 }, { "epoch": 0.15659368897111728, "grad_norm": 1.199660301208496, "learning_rate": 9.585667184543753e-06, "loss": 0.8218, "step": 3047 }, { "epoch": 0.15664508171446193, "grad_norm": 1.1207102537155151, "learning_rate": 9.585335400940967e-06, "loss": 0.7858, "step": 3048 }, { "epoch": 0.15669647445780654, "grad_norm": 0.8962370753288269, "learning_rate": 9.58500349029746e-06, "loss": 0.7223, "step": 3049 }, { "epoch": 0.1567478672011512, "grad_norm": 1.1775535345077515, "learning_rate": 9.584671452622427e-06, "loss": 0.8017, "step": 3050 }, { "epoch": 0.15679925994449584, "grad_norm": 1.1942967176437378, "learning_rate": 9.584339287925068e-06, "loss": 0.8163, "step": 3051 }, { "epoch": 0.15685065268784049, "grad_norm": 1.151305913925171, "learning_rate": 9.584006996214585e-06, "loss": 0.7986, "step": 3052 }, { "epoch": 0.1569020454311851, "grad_norm": 1.054308295249939, "learning_rate": 9.583674577500185e-06, "loss": 0.824, "step": 3053 }, { "epoch": 0.15695343817452975, "grad_norm": 1.142619013786316, "learning_rate": 9.583342031791081e-06, "loss": 0.8351, "step": 3054 }, { "epoch": 0.1570048309178744, "grad_norm": 1.0959057807922363, "learning_rate": 9.583009359096483e-06, "loss": 0.7646, "step": 3055 }, { "epoch": 0.15705622366121905, "grad_norm": 1.116235375404358, "learning_rate": 9.582676559425605e-06, "loss": 0.8211, "step": 3056 }, { "epoch": 0.15710761640456367, "grad_norm": 0.9597142338752747, "learning_rate": 9.582343632787675e-06, "loss": 0.7209, "step": 3057 }, { "epoch": 0.1571590091479083, "grad_norm": 1.1602106094360352, "learning_rate": 9.582010579191912e-06, "loss": 0.8306, "step": 3058 }, { "epoch": 0.15721040189125296, "grad_norm": 1.341246247291565, "learning_rate": 9.581677398647544e-06, "loss": 0.819, "step": 3059 }, { "epoch": 0.1572617946345976, "grad_norm": 1.0703506469726562, "learning_rate": 9.581344091163803e-06, "loss": 0.8175, "step": 3060 }, { "epoch": 0.15731318737794223, "grad_norm": 1.060909390449524, "learning_rate": 9.581010656749924e-06, "loss": 0.8182, "step": 3061 }, { "epoch": 0.15736458012128687, "grad_norm": 1.1364223957061768, "learning_rate": 9.580677095415144e-06, "loss": 0.8128, "step": 3062 }, { "epoch": 0.15741597286463152, "grad_norm": 0.7858501672744751, "learning_rate": 9.580343407168704e-06, "loss": 0.7177, "step": 3063 }, { "epoch": 0.15746736560797617, "grad_norm": 0.7859862446784973, "learning_rate": 9.580009592019853e-06, "loss": 0.7055, "step": 3064 }, { "epoch": 0.1575187583513208, "grad_norm": 1.477375864982605, "learning_rate": 9.579675649977833e-06, "loss": 0.8005, "step": 3065 }, { "epoch": 0.15757015109466543, "grad_norm": 1.226243495941162, "learning_rate": 9.579341581051902e-06, "loss": 0.7751, "step": 3066 }, { "epoch": 0.15762154383801008, "grad_norm": 7.041565418243408, "learning_rate": 9.579007385251315e-06, "loss": 0.8248, "step": 3067 }, { "epoch": 0.1576729365813547, "grad_norm": 1.043668508529663, "learning_rate": 9.578673062585328e-06, "loss": 0.7887, "step": 3068 }, { "epoch": 0.15772432932469935, "grad_norm": 1.154679298400879, "learning_rate": 9.578338613063204e-06, "loss": 0.8372, "step": 3069 }, { "epoch": 0.157775722068044, "grad_norm": 2.3733458518981934, "learning_rate": 9.578004036694214e-06, "loss": 0.8061, "step": 3070 }, { "epoch": 0.15782711481138864, "grad_norm": 1.2154031991958618, "learning_rate": 9.577669333487622e-06, "loss": 0.8013, "step": 3071 }, { "epoch": 0.15787850755473326, "grad_norm": 1.1823316812515259, "learning_rate": 9.577334503452706e-06, "loss": 0.8473, "step": 3072 }, { "epoch": 0.1579299002980779, "grad_norm": 1.139605164527893, "learning_rate": 9.576999546598739e-06, "loss": 0.8185, "step": 3073 }, { "epoch": 0.15798129304142255, "grad_norm": 1.136753797531128, "learning_rate": 9.576664462935003e-06, "loss": 0.8018, "step": 3074 }, { "epoch": 0.1580326857847672, "grad_norm": 1.198256015777588, "learning_rate": 9.57632925247078e-06, "loss": 0.7323, "step": 3075 }, { "epoch": 0.15808407852811182, "grad_norm": 1.1671271324157715, "learning_rate": 9.575993915215361e-06, "loss": 0.8329, "step": 3076 }, { "epoch": 0.15813547127145647, "grad_norm": 1.124439001083374, "learning_rate": 9.575658451178034e-06, "loss": 0.8341, "step": 3077 }, { "epoch": 0.15818686401480112, "grad_norm": 1.091123104095459, "learning_rate": 9.575322860368093e-06, "loss": 0.815, "step": 3078 }, { "epoch": 0.15823825675814576, "grad_norm": 1.131137728691101, "learning_rate": 9.574987142794839e-06, "loss": 0.8112, "step": 3079 }, { "epoch": 0.15828964950149038, "grad_norm": 0.956962525844574, "learning_rate": 9.574651298467569e-06, "loss": 0.7368, "step": 3080 }, { "epoch": 0.15834104224483503, "grad_norm": 1.2453482151031494, "learning_rate": 9.57431532739559e-06, "loss": 0.7677, "step": 3081 }, { "epoch": 0.15839243498817968, "grad_norm": 1.1722524166107178, "learning_rate": 9.57397922958821e-06, "loss": 0.804, "step": 3082 }, { "epoch": 0.15844382773152432, "grad_norm": 1.2492121458053589, "learning_rate": 9.573643005054744e-06, "loss": 0.8303, "step": 3083 }, { "epoch": 0.15849522047486894, "grad_norm": 1.1757866144180298, "learning_rate": 9.573306653804503e-06, "loss": 0.8092, "step": 3084 }, { "epoch": 0.1585466132182136, "grad_norm": 1.0953439474105835, "learning_rate": 9.572970175846807e-06, "loss": 0.8396, "step": 3085 }, { "epoch": 0.15859800596155824, "grad_norm": 1.181564450263977, "learning_rate": 9.572633571190978e-06, "loss": 0.8354, "step": 3086 }, { "epoch": 0.15864939870490286, "grad_norm": 1.1329485177993774, "learning_rate": 9.572296839846346e-06, "loss": 0.7932, "step": 3087 }, { "epoch": 0.1587007914482475, "grad_norm": 1.109900712966919, "learning_rate": 9.571959981822236e-06, "loss": 0.8035, "step": 3088 }, { "epoch": 0.15875218419159215, "grad_norm": 1.0511541366577148, "learning_rate": 9.57162299712798e-06, "loss": 0.7743, "step": 3089 }, { "epoch": 0.1588035769349368, "grad_norm": 1.1241798400878906, "learning_rate": 9.571285885772919e-06, "loss": 0.7923, "step": 3090 }, { "epoch": 0.15885496967828142, "grad_norm": 0.9507374167442322, "learning_rate": 9.570948647766391e-06, "loss": 0.7045, "step": 3091 }, { "epoch": 0.15890636242162606, "grad_norm": 1.132149577140808, "learning_rate": 9.570611283117738e-06, "loss": 0.8289, "step": 3092 }, { "epoch": 0.1589577551649707, "grad_norm": 1.1935265064239502, "learning_rate": 9.57027379183631e-06, "loss": 0.8208, "step": 3093 }, { "epoch": 0.15900914790831536, "grad_norm": 1.1184380054473877, "learning_rate": 9.569936173931453e-06, "loss": 0.8161, "step": 3094 }, { "epoch": 0.15906054065165998, "grad_norm": 0.7748022079467773, "learning_rate": 9.569598429412526e-06, "loss": 0.7227, "step": 3095 }, { "epoch": 0.15911193339500462, "grad_norm": 0.9522273540496826, "learning_rate": 9.569260558288883e-06, "loss": 0.7548, "step": 3096 }, { "epoch": 0.15916332613834927, "grad_norm": 1.267147421836853, "learning_rate": 9.568922560569885e-06, "loss": 0.8296, "step": 3097 }, { "epoch": 0.15921471888169392, "grad_norm": 1.300022006034851, "learning_rate": 9.568584436264899e-06, "loss": 0.8262, "step": 3098 }, { "epoch": 0.15926611162503854, "grad_norm": 1.0244982242584229, "learning_rate": 9.568246185383291e-06, "loss": 0.749, "step": 3099 }, { "epoch": 0.15931750436838318, "grad_norm": 1.1157578229904175, "learning_rate": 9.567907807934434e-06, "loss": 0.7567, "step": 3100 }, { "epoch": 0.15936889711172783, "grad_norm": 1.0519708395004272, "learning_rate": 9.567569303927702e-06, "loss": 0.7892, "step": 3101 }, { "epoch": 0.15942028985507245, "grad_norm": 1.1085740327835083, "learning_rate": 9.567230673372475e-06, "loss": 0.832, "step": 3102 }, { "epoch": 0.1594716825984171, "grad_norm": 1.0715935230255127, "learning_rate": 9.566891916278131e-06, "loss": 0.8272, "step": 3103 }, { "epoch": 0.15952307534176174, "grad_norm": 0.8947829008102417, "learning_rate": 9.566553032654061e-06, "loss": 0.6814, "step": 3104 }, { "epoch": 0.1595744680851064, "grad_norm": 0.8069021701812744, "learning_rate": 9.566214022509651e-06, "loss": 0.7176, "step": 3105 }, { "epoch": 0.159625860828451, "grad_norm": 1.1611638069152832, "learning_rate": 9.565874885854293e-06, "loss": 0.8048, "step": 3106 }, { "epoch": 0.15967725357179566, "grad_norm": 1.128555417060852, "learning_rate": 9.565535622697387e-06, "loss": 0.8009, "step": 3107 }, { "epoch": 0.1597286463151403, "grad_norm": 1.1363729238510132, "learning_rate": 9.565196233048325e-06, "loss": 0.8326, "step": 3108 }, { "epoch": 0.15978003905848495, "grad_norm": 0.7629484534263611, "learning_rate": 9.56485671691652e-06, "loss": 0.7318, "step": 3109 }, { "epoch": 0.15983143180182957, "grad_norm": 1.218059778213501, "learning_rate": 9.56451707431137e-06, "loss": 0.8601, "step": 3110 }, { "epoch": 0.15988282454517422, "grad_norm": 1.1709080934524536, "learning_rate": 9.56417730524229e-06, "loss": 0.7581, "step": 3111 }, { "epoch": 0.15993421728851887, "grad_norm": 1.1050457954406738, "learning_rate": 9.56383740971869e-06, "loss": 0.8224, "step": 3112 }, { "epoch": 0.1599856100318635, "grad_norm": 0.9180664420127869, "learning_rate": 9.563497387749993e-06, "loss": 0.7258, "step": 3113 }, { "epoch": 0.16003700277520813, "grad_norm": 1.1267971992492676, "learning_rate": 9.563157239345613e-06, "loss": 0.8106, "step": 3114 }, { "epoch": 0.16008839551855278, "grad_norm": 1.1436210870742798, "learning_rate": 9.562816964514979e-06, "loss": 0.8076, "step": 3115 }, { "epoch": 0.16013978826189743, "grad_norm": 1.1593447923660278, "learning_rate": 9.562476563267514e-06, "loss": 0.8037, "step": 3116 }, { "epoch": 0.16019118100524207, "grad_norm": 1.0998963117599487, "learning_rate": 9.562136035612653e-06, "loss": 0.7659, "step": 3117 }, { "epoch": 0.1602425737485867, "grad_norm": 0.7985364198684692, "learning_rate": 9.561795381559828e-06, "loss": 0.7135, "step": 3118 }, { "epoch": 0.16029396649193134, "grad_norm": 1.115679383277893, "learning_rate": 9.56145460111848e-06, "loss": 0.8586, "step": 3119 }, { "epoch": 0.16034535923527599, "grad_norm": 1.0436532497406006, "learning_rate": 9.56111369429805e-06, "loss": 0.7556, "step": 3120 }, { "epoch": 0.1603967519786206, "grad_norm": 1.1787986755371094, "learning_rate": 9.56077266110798e-06, "loss": 0.8416, "step": 3121 }, { "epoch": 0.16044814472196525, "grad_norm": 1.0705763101577759, "learning_rate": 9.560431501557721e-06, "loss": 0.7912, "step": 3122 }, { "epoch": 0.1604995374653099, "grad_norm": 0.7579458951950073, "learning_rate": 9.560090215656726e-06, "loss": 0.7193, "step": 3123 }, { "epoch": 0.16055093020865455, "grad_norm": 1.0716207027435303, "learning_rate": 9.559748803414448e-06, "loss": 0.8354, "step": 3124 }, { "epoch": 0.16060232295199917, "grad_norm": 1.0800081491470337, "learning_rate": 9.559407264840348e-06, "loss": 0.8406, "step": 3125 }, { "epoch": 0.1606537156953438, "grad_norm": 1.1658076047897339, "learning_rate": 9.55906559994389e-06, "loss": 0.8153, "step": 3126 }, { "epoch": 0.16070510843868846, "grad_norm": 1.1271469593048096, "learning_rate": 9.558723808734536e-06, "loss": 0.7817, "step": 3127 }, { "epoch": 0.1607565011820331, "grad_norm": 1.1793339252471924, "learning_rate": 9.55838189122176e-06, "loss": 0.8295, "step": 3128 }, { "epoch": 0.16080789392537773, "grad_norm": 1.1448228359222412, "learning_rate": 9.558039847415033e-06, "loss": 0.761, "step": 3129 }, { "epoch": 0.16085928666872237, "grad_norm": 1.167729377746582, "learning_rate": 9.55769767732383e-06, "loss": 0.7989, "step": 3130 }, { "epoch": 0.16091067941206702, "grad_norm": 1.1033068895339966, "learning_rate": 9.557355380957633e-06, "loss": 0.8118, "step": 3131 }, { "epoch": 0.16096207215541167, "grad_norm": 1.0857632160186768, "learning_rate": 9.557012958325926e-06, "loss": 0.8288, "step": 3132 }, { "epoch": 0.1610134648987563, "grad_norm": 1.127107858657837, "learning_rate": 9.556670409438197e-06, "loss": 0.8054, "step": 3133 }, { "epoch": 0.16106485764210093, "grad_norm": 1.1581300497055054, "learning_rate": 9.556327734303934e-06, "loss": 0.8236, "step": 3134 }, { "epoch": 0.16111625038544558, "grad_norm": 1.167819619178772, "learning_rate": 9.555984932932631e-06, "loss": 0.7537, "step": 3135 }, { "epoch": 0.16116764312879023, "grad_norm": 1.205673098564148, "learning_rate": 9.555642005333789e-06, "loss": 0.8111, "step": 3136 }, { "epoch": 0.16121903587213485, "grad_norm": 1.1420509815216064, "learning_rate": 9.555298951516907e-06, "loss": 0.8138, "step": 3137 }, { "epoch": 0.1612704286154795, "grad_norm": 1.1440784931182861, "learning_rate": 9.55495577149149e-06, "loss": 0.7964, "step": 3138 }, { "epoch": 0.16132182135882414, "grad_norm": 1.1752656698226929, "learning_rate": 9.554612465267044e-06, "loss": 0.8007, "step": 3139 }, { "epoch": 0.16137321410216876, "grad_norm": 1.0747060775756836, "learning_rate": 9.554269032853084e-06, "loss": 0.8033, "step": 3140 }, { "epoch": 0.1614246068455134, "grad_norm": 1.1762104034423828, "learning_rate": 9.553925474259123e-06, "loss": 0.839, "step": 3141 }, { "epoch": 0.16147599958885805, "grad_norm": 1.0837814807891846, "learning_rate": 9.55358178949468e-06, "loss": 0.7826, "step": 3142 }, { "epoch": 0.1615273923322027, "grad_norm": 1.1304336786270142, "learning_rate": 9.553237978569276e-06, "loss": 0.8474, "step": 3143 }, { "epoch": 0.16157878507554732, "grad_norm": 1.1496949195861816, "learning_rate": 9.552894041492439e-06, "loss": 0.8433, "step": 3144 }, { "epoch": 0.16163017781889197, "grad_norm": 1.1746634244918823, "learning_rate": 9.552549978273697e-06, "loss": 0.8133, "step": 3145 }, { "epoch": 0.16168157056223662, "grad_norm": 1.1522823572158813, "learning_rate": 9.552205788922582e-06, "loss": 0.7971, "step": 3146 }, { "epoch": 0.16173296330558126, "grad_norm": 1.119305968284607, "learning_rate": 9.55186147344863e-06, "loss": 0.773, "step": 3147 }, { "epoch": 0.16178435604892588, "grad_norm": 0.8213911056518555, "learning_rate": 9.55151703186138e-06, "loss": 0.7447, "step": 3148 }, { "epoch": 0.16183574879227053, "grad_norm": 1.1103883981704712, "learning_rate": 9.551172464170377e-06, "loss": 0.8008, "step": 3149 }, { "epoch": 0.16188714153561518, "grad_norm": 1.1040072441101074, "learning_rate": 9.550827770385167e-06, "loss": 0.7636, "step": 3150 }, { "epoch": 0.16193853427895982, "grad_norm": 1.128901481628418, "learning_rate": 9.550482950515301e-06, "loss": 0.794, "step": 3151 }, { "epoch": 0.16198992702230444, "grad_norm": 1.1219249963760376, "learning_rate": 9.550138004570328e-06, "loss": 0.7992, "step": 3152 }, { "epoch": 0.1620413197656491, "grad_norm": 1.0926544666290283, "learning_rate": 9.549792932559811e-06, "loss": 0.778, "step": 3153 }, { "epoch": 0.16209271250899374, "grad_norm": 0.995165228843689, "learning_rate": 9.549447734493306e-06, "loss": 0.7005, "step": 3154 }, { "epoch": 0.16214410525233838, "grad_norm": 1.1856826543807983, "learning_rate": 9.54910241038038e-06, "loss": 0.8078, "step": 3155 }, { "epoch": 0.162195497995683, "grad_norm": 1.161812424659729, "learning_rate": 9.548756960230598e-06, "loss": 0.8354, "step": 3156 }, { "epoch": 0.16224689073902765, "grad_norm": 1.1402584314346313, "learning_rate": 9.548411384053534e-06, "loss": 0.736, "step": 3157 }, { "epoch": 0.1622982834823723, "grad_norm": 1.092926263809204, "learning_rate": 9.548065681858758e-06, "loss": 0.8042, "step": 3158 }, { "epoch": 0.16234967622571692, "grad_norm": 1.1787275075912476, "learning_rate": 9.547719853655853e-06, "loss": 0.8125, "step": 3159 }, { "epoch": 0.16240106896906156, "grad_norm": 1.1544336080551147, "learning_rate": 9.547373899454397e-06, "loss": 0.8873, "step": 3160 }, { "epoch": 0.1624524617124062, "grad_norm": 1.1269683837890625, "learning_rate": 9.547027819263976e-06, "loss": 0.7755, "step": 3161 }, { "epoch": 0.16250385445575086, "grad_norm": 1.5253983736038208, "learning_rate": 9.54668161309418e-06, "loss": 0.7777, "step": 3162 }, { "epoch": 0.16255524719909548, "grad_norm": 1.1081044673919678, "learning_rate": 9.546335280954599e-06, "loss": 0.7656, "step": 3163 }, { "epoch": 0.16260663994244012, "grad_norm": 0.8480460047721863, "learning_rate": 9.545988822854829e-06, "loss": 0.71, "step": 3164 }, { "epoch": 0.16265803268578477, "grad_norm": 0.8821783661842346, "learning_rate": 9.545642238804469e-06, "loss": 0.7521, "step": 3165 }, { "epoch": 0.16270942542912942, "grad_norm": 1.2296350002288818, "learning_rate": 9.545295528813121e-06, "loss": 0.7608, "step": 3166 }, { "epoch": 0.16276081817247404, "grad_norm": 1.169236421585083, "learning_rate": 9.544948692890392e-06, "loss": 0.7668, "step": 3167 }, { "epoch": 0.16281221091581868, "grad_norm": 1.0998739004135132, "learning_rate": 9.544601731045888e-06, "loss": 0.7418, "step": 3168 }, { "epoch": 0.16286360365916333, "grad_norm": 1.2018539905548096, "learning_rate": 9.544254643289226e-06, "loss": 0.8165, "step": 3169 }, { "epoch": 0.16291499640250798, "grad_norm": 1.1397939920425415, "learning_rate": 9.543907429630021e-06, "loss": 0.9075, "step": 3170 }, { "epoch": 0.1629663891458526, "grad_norm": 1.1398556232452393, "learning_rate": 9.543560090077895e-06, "loss": 0.8091, "step": 3171 }, { "epoch": 0.16301778188919724, "grad_norm": 0.8102909922599792, "learning_rate": 9.543212624642466e-06, "loss": 0.7159, "step": 3172 }, { "epoch": 0.1630691746325419, "grad_norm": 1.1493078470230103, "learning_rate": 9.542865033333364e-06, "loss": 0.8216, "step": 3173 }, { "epoch": 0.16312056737588654, "grad_norm": 1.2763092517852783, "learning_rate": 9.542517316160222e-06, "loss": 0.833, "step": 3174 }, { "epoch": 0.16317196011923116, "grad_norm": 0.9804087281227112, "learning_rate": 9.542169473132669e-06, "loss": 0.6838, "step": 3175 }, { "epoch": 0.1632233528625758, "grad_norm": 1.14390230178833, "learning_rate": 9.541821504260345e-06, "loss": 0.7921, "step": 3176 }, { "epoch": 0.16327474560592045, "grad_norm": 0.901193380355835, "learning_rate": 9.541473409552888e-06, "loss": 0.7216, "step": 3177 }, { "epoch": 0.16332613834926507, "grad_norm": 1.3020175695419312, "learning_rate": 9.541125189019947e-06, "loss": 0.8005, "step": 3178 }, { "epoch": 0.16337753109260972, "grad_norm": 1.1251487731933594, "learning_rate": 9.540776842671166e-06, "loss": 0.807, "step": 3179 }, { "epoch": 0.16342892383595437, "grad_norm": 1.1327316761016846, "learning_rate": 9.540428370516197e-06, "loss": 0.7693, "step": 3180 }, { "epoch": 0.163480316579299, "grad_norm": 1.1738780736923218, "learning_rate": 9.540079772564695e-06, "loss": 0.8116, "step": 3181 }, { "epoch": 0.16353170932264363, "grad_norm": 1.2064341306686401, "learning_rate": 9.539731048826319e-06, "loss": 0.7979, "step": 3182 }, { "epoch": 0.16358310206598828, "grad_norm": 1.1245938539505005, "learning_rate": 9.53938219931073e-06, "loss": 0.8012, "step": 3183 }, { "epoch": 0.16363449480933293, "grad_norm": 1.338637113571167, "learning_rate": 9.539033224027594e-06, "loss": 0.8266, "step": 3184 }, { "epoch": 0.16368588755267757, "grad_norm": 1.0943418741226196, "learning_rate": 9.538684122986577e-06, "loss": 0.7632, "step": 3185 }, { "epoch": 0.1637372802960222, "grad_norm": 1.1340044736862183, "learning_rate": 9.538334896197355e-06, "loss": 0.8112, "step": 3186 }, { "epoch": 0.16378867303936684, "grad_norm": 1.062220811843872, "learning_rate": 9.5379855436696e-06, "loss": 0.7951, "step": 3187 }, { "epoch": 0.16384006578271149, "grad_norm": 1.0626157522201538, "learning_rate": 9.537636065412994e-06, "loss": 0.766, "step": 3188 }, { "epoch": 0.16389145852605613, "grad_norm": 0.9828280210494995, "learning_rate": 9.537286461437216e-06, "loss": 0.6822, "step": 3189 }, { "epoch": 0.16394285126940075, "grad_norm": 1.122507929801941, "learning_rate": 9.536936731751957e-06, "loss": 0.8461, "step": 3190 }, { "epoch": 0.1639942440127454, "grad_norm": 1.0993677377700806, "learning_rate": 9.536586876366902e-06, "loss": 0.8689, "step": 3191 }, { "epoch": 0.16404563675609005, "grad_norm": 1.0578441619873047, "learning_rate": 9.536236895291749e-06, "loss": 0.821, "step": 3192 }, { "epoch": 0.1640970294994347, "grad_norm": 1.0542157888412476, "learning_rate": 9.535886788536188e-06, "loss": 0.7938, "step": 3193 }, { "epoch": 0.1641484222427793, "grad_norm": 1.1256091594696045, "learning_rate": 9.535536556109924e-06, "loss": 0.7914, "step": 3194 }, { "epoch": 0.16419981498612396, "grad_norm": 0.7573138475418091, "learning_rate": 9.53518619802266e-06, "loss": 0.7009, "step": 3195 }, { "epoch": 0.1642512077294686, "grad_norm": 1.1330567598342896, "learning_rate": 9.5348357142841e-06, "loss": 0.821, "step": 3196 }, { "epoch": 0.16430260047281323, "grad_norm": 1.2477898597717285, "learning_rate": 9.534485104903959e-06, "loss": 0.8216, "step": 3197 }, { "epoch": 0.16435399321615787, "grad_norm": 1.0125752687454224, "learning_rate": 9.534134369891948e-06, "loss": 0.7528, "step": 3198 }, { "epoch": 0.16440538595950252, "grad_norm": 0.8818283081054688, "learning_rate": 9.533783509257784e-06, "loss": 0.7054, "step": 3199 }, { "epoch": 0.16445677870284717, "grad_norm": 1.1226658821105957, "learning_rate": 9.53343252301119e-06, "loss": 0.7999, "step": 3200 }, { "epoch": 0.1645081714461918, "grad_norm": 0.9891276955604553, "learning_rate": 9.533081411161886e-06, "loss": 0.7114, "step": 3201 }, { "epoch": 0.16455956418953643, "grad_norm": 1.0801430940628052, "learning_rate": 9.532730173719606e-06, "loss": 0.7552, "step": 3202 }, { "epoch": 0.16461095693288108, "grad_norm": 1.203872561454773, "learning_rate": 9.532378810694079e-06, "loss": 0.8366, "step": 3203 }, { "epoch": 0.16466234967622573, "grad_norm": 1.1817381381988525, "learning_rate": 9.532027322095037e-06, "loss": 0.754, "step": 3204 }, { "epoch": 0.16471374241957035, "grad_norm": 0.8346467018127441, "learning_rate": 9.531675707932221e-06, "loss": 0.6748, "step": 3205 }, { "epoch": 0.164765135162915, "grad_norm": 1.122004508972168, "learning_rate": 9.531323968215372e-06, "loss": 0.8271, "step": 3206 }, { "epoch": 0.16481652790625964, "grad_norm": 0.833706259727478, "learning_rate": 9.530972102954237e-06, "loss": 0.6826, "step": 3207 }, { "epoch": 0.1648679206496043, "grad_norm": 1.1040239334106445, "learning_rate": 9.530620112158561e-06, "loss": 0.8063, "step": 3208 }, { "epoch": 0.1649193133929489, "grad_norm": 0.7802413702011108, "learning_rate": 9.5302679958381e-06, "loss": 0.7054, "step": 3209 }, { "epoch": 0.16497070613629355, "grad_norm": 1.139785647392273, "learning_rate": 9.529915754002608e-06, "loss": 0.8067, "step": 3210 }, { "epoch": 0.1650220988796382, "grad_norm": 1.2267383337020874, "learning_rate": 9.529563386661845e-06, "loss": 0.8928, "step": 3211 }, { "epoch": 0.16507349162298285, "grad_norm": 1.18010675907135, "learning_rate": 9.52921089382557e-06, "loss": 0.7962, "step": 3212 }, { "epoch": 0.16512488436632747, "grad_norm": 1.0691472291946411, "learning_rate": 9.528858275503556e-06, "loss": 0.7564, "step": 3213 }, { "epoch": 0.16517627710967211, "grad_norm": 1.112184762954712, "learning_rate": 9.528505531705567e-06, "loss": 0.8289, "step": 3214 }, { "epoch": 0.16522766985301676, "grad_norm": 1.1967002153396606, "learning_rate": 9.528152662441376e-06, "loss": 0.8129, "step": 3215 }, { "epoch": 0.16527906259636138, "grad_norm": 1.06976318359375, "learning_rate": 9.527799667720764e-06, "loss": 0.7514, "step": 3216 }, { "epoch": 0.16533045533970603, "grad_norm": 1.1949013471603394, "learning_rate": 9.527446547553507e-06, "loss": 0.8407, "step": 3217 }, { "epoch": 0.16538184808305068, "grad_norm": 1.1443004608154297, "learning_rate": 9.52709330194939e-06, "loss": 0.8709, "step": 3218 }, { "epoch": 0.16543324082639532, "grad_norm": 1.0867016315460205, "learning_rate": 9.526739930918201e-06, "loss": 0.8091, "step": 3219 }, { "epoch": 0.16548463356973994, "grad_norm": 1.1380915641784668, "learning_rate": 9.526386434469727e-06, "loss": 0.7791, "step": 3220 }, { "epoch": 0.1655360263130846, "grad_norm": 1.0839924812316895, "learning_rate": 9.526032812613766e-06, "loss": 0.7819, "step": 3221 }, { "epoch": 0.16558741905642924, "grad_norm": 1.1421802043914795, "learning_rate": 9.525679065360113e-06, "loss": 0.8201, "step": 3222 }, { "epoch": 0.16563881179977388, "grad_norm": 1.0658698081970215, "learning_rate": 9.52532519271857e-06, "loss": 0.7183, "step": 3223 }, { "epoch": 0.1656902045431185, "grad_norm": 1.111944556236267, "learning_rate": 9.52497119469894e-06, "loss": 0.8406, "step": 3224 }, { "epoch": 0.16574159728646315, "grad_norm": 0.7980111837387085, "learning_rate": 9.524617071311031e-06, "loss": 0.7062, "step": 3225 }, { "epoch": 0.1657929900298078, "grad_norm": 1.1120167970657349, "learning_rate": 9.524262822564656e-06, "loss": 0.811, "step": 3226 }, { "epoch": 0.16584438277315244, "grad_norm": 1.0894891023635864, "learning_rate": 9.52390844846963e-06, "loss": 0.8326, "step": 3227 }, { "epoch": 0.16589577551649706, "grad_norm": 1.1481879949569702, "learning_rate": 9.523553949035768e-06, "loss": 0.8294, "step": 3228 }, { "epoch": 0.1659471682598417, "grad_norm": 1.0807256698608398, "learning_rate": 9.523199324272894e-06, "loss": 0.8193, "step": 3229 }, { "epoch": 0.16599856100318636, "grad_norm": 1.214396595954895, "learning_rate": 9.522844574190833e-06, "loss": 0.7622, "step": 3230 }, { "epoch": 0.16604995374653098, "grad_norm": 1.240247368812561, "learning_rate": 9.522489698799412e-06, "loss": 0.7571, "step": 3231 }, { "epoch": 0.16610134648987562, "grad_norm": 1.0789235830307007, "learning_rate": 9.522134698108468e-06, "loss": 0.8095, "step": 3232 }, { "epoch": 0.16615273923322027, "grad_norm": 1.0555205345153809, "learning_rate": 9.521779572127832e-06, "loss": 0.7576, "step": 3233 }, { "epoch": 0.16620413197656492, "grad_norm": 1.1000267267227173, "learning_rate": 9.521424320867343e-06, "loss": 0.8142, "step": 3234 }, { "epoch": 0.16625552471990954, "grad_norm": 0.7666199803352356, "learning_rate": 9.521068944336847e-06, "loss": 0.7112, "step": 3235 }, { "epoch": 0.16630691746325418, "grad_norm": 1.1695549488067627, "learning_rate": 9.520713442546186e-06, "loss": 0.8387, "step": 3236 }, { "epoch": 0.16635831020659883, "grad_norm": 0.7128877639770508, "learning_rate": 9.520357815505212e-06, "loss": 0.6888, "step": 3237 }, { "epoch": 0.16640970294994348, "grad_norm": 1.0921233892440796, "learning_rate": 9.520002063223777e-06, "loss": 0.7871, "step": 3238 }, { "epoch": 0.1664610956932881, "grad_norm": 1.150153636932373, "learning_rate": 9.519646185711739e-06, "loss": 0.8343, "step": 3239 }, { "epoch": 0.16651248843663274, "grad_norm": 1.1637077331542969, "learning_rate": 9.519290182978956e-06, "loss": 0.8023, "step": 3240 }, { "epoch": 0.1665638811799774, "grad_norm": 1.136567234992981, "learning_rate": 9.51893405503529e-06, "loss": 0.8462, "step": 3241 }, { "epoch": 0.16661527392332204, "grad_norm": 0.8910467624664307, "learning_rate": 9.518577801890612e-06, "loss": 0.7085, "step": 3242 }, { "epoch": 0.16666666666666666, "grad_norm": 1.3249033689498901, "learning_rate": 9.51822142355479e-06, "loss": 0.8509, "step": 3243 }, { "epoch": 0.1667180594100113, "grad_norm": 1.1374561786651611, "learning_rate": 9.517864920037698e-06, "loss": 0.7423, "step": 3244 }, { "epoch": 0.16676945215335595, "grad_norm": 1.2098907232284546, "learning_rate": 9.517508291349214e-06, "loss": 0.8424, "step": 3245 }, { "epoch": 0.1668208448967006, "grad_norm": 1.332830548286438, "learning_rate": 9.517151537499216e-06, "loss": 0.7662, "step": 3246 }, { "epoch": 0.16687223764004522, "grad_norm": 1.1399847269058228, "learning_rate": 9.516794658497593e-06, "loss": 0.8068, "step": 3247 }, { "epoch": 0.16692363038338986, "grad_norm": 1.1291741132736206, "learning_rate": 9.516437654354226e-06, "loss": 0.8556, "step": 3248 }, { "epoch": 0.1669750231267345, "grad_norm": 1.1566137075424194, "learning_rate": 9.516080525079013e-06, "loss": 0.8185, "step": 3249 }, { "epoch": 0.16702641587007913, "grad_norm": 0.9099522233009338, "learning_rate": 9.515723270681842e-06, "loss": 0.7148, "step": 3250 }, { "epoch": 0.16707780861342378, "grad_norm": 0.729370653629303, "learning_rate": 9.515365891172618e-06, "loss": 0.7262, "step": 3251 }, { "epoch": 0.16712920135676843, "grad_norm": 1.177507996559143, "learning_rate": 9.515008386561237e-06, "loss": 0.7557, "step": 3252 }, { "epoch": 0.16718059410011307, "grad_norm": 1.061783790588379, "learning_rate": 9.514650756857607e-06, "loss": 0.784, "step": 3253 }, { "epoch": 0.1672319868434577, "grad_norm": 1.0806080102920532, "learning_rate": 9.514293002071635e-06, "loss": 0.7376, "step": 3254 }, { "epoch": 0.16728337958680234, "grad_norm": 1.1734668016433716, "learning_rate": 9.513935122213232e-06, "loss": 0.76, "step": 3255 }, { "epoch": 0.16733477233014699, "grad_norm": 1.2129284143447876, "learning_rate": 9.513577117292317e-06, "loss": 0.814, "step": 3256 }, { "epoch": 0.16738616507349163, "grad_norm": 1.1298599243164062, "learning_rate": 9.513218987318805e-06, "loss": 0.7892, "step": 3257 }, { "epoch": 0.16743755781683625, "grad_norm": 1.1670111417770386, "learning_rate": 9.51286073230262e-06, "loss": 0.8502, "step": 3258 }, { "epoch": 0.1674889505601809, "grad_norm": 1.088180422782898, "learning_rate": 9.512502352253687e-06, "loss": 0.7749, "step": 3259 }, { "epoch": 0.16754034330352555, "grad_norm": 1.066748857498169, "learning_rate": 9.512143847181938e-06, "loss": 0.7887, "step": 3260 }, { "epoch": 0.1675917360468702, "grad_norm": 1.1440898180007935, "learning_rate": 9.5117852170973e-06, "loss": 0.7662, "step": 3261 }, { "epoch": 0.1676431287902148, "grad_norm": 1.184906244277954, "learning_rate": 9.511426462009716e-06, "loss": 0.8306, "step": 3262 }, { "epoch": 0.16769452153355946, "grad_norm": 1.097861886024475, "learning_rate": 9.511067581929122e-06, "loss": 0.7857, "step": 3263 }, { "epoch": 0.1677459142769041, "grad_norm": 7.879432678222656, "learning_rate": 9.51070857686546e-06, "loss": 0.8673, "step": 3264 }, { "epoch": 0.16779730702024875, "grad_norm": 1.1445090770721436, "learning_rate": 9.510349446828677e-06, "loss": 0.7599, "step": 3265 }, { "epoch": 0.16784869976359337, "grad_norm": 1.1551319360733032, "learning_rate": 9.509990191828727e-06, "loss": 0.7779, "step": 3266 }, { "epoch": 0.16790009250693802, "grad_norm": 1.249333381652832, "learning_rate": 9.509630811875557e-06, "loss": 0.7678, "step": 3267 }, { "epoch": 0.16795148525028267, "grad_norm": 1.315457820892334, "learning_rate": 9.509271306979129e-06, "loss": 0.8145, "step": 3268 }, { "epoch": 0.1680028779936273, "grad_norm": 1.1426721811294556, "learning_rate": 9.508911677149402e-06, "loss": 0.7596, "step": 3269 }, { "epoch": 0.16805427073697193, "grad_norm": 1.002423644065857, "learning_rate": 9.50855192239634e-06, "loss": 0.6855, "step": 3270 }, { "epoch": 0.16810566348031658, "grad_norm": 1.2738754749298096, "learning_rate": 9.508192042729908e-06, "loss": 0.8331, "step": 3271 }, { "epoch": 0.16815705622366123, "grad_norm": 1.1588183641433716, "learning_rate": 9.507832038160081e-06, "loss": 0.817, "step": 3272 }, { "epoch": 0.16820844896700585, "grad_norm": 1.1641695499420166, "learning_rate": 9.50747190869683e-06, "loss": 0.8409, "step": 3273 }, { "epoch": 0.1682598417103505, "grad_norm": 1.1562764644622803, "learning_rate": 9.507111654350134e-06, "loss": 0.8083, "step": 3274 }, { "epoch": 0.16831123445369514, "grad_norm": 1.1415313482284546, "learning_rate": 9.506751275129974e-06, "loss": 0.7253, "step": 3275 }, { "epoch": 0.1683626271970398, "grad_norm": 1.1189708709716797, "learning_rate": 9.506390771046332e-06, "loss": 0.7325, "step": 3276 }, { "epoch": 0.1684140199403844, "grad_norm": 1.1310651302337646, "learning_rate": 9.506030142109202e-06, "loss": 0.8092, "step": 3277 }, { "epoch": 0.16846541268372905, "grad_norm": 2.6251814365386963, "learning_rate": 9.505669388328568e-06, "loss": 0.8332, "step": 3278 }, { "epoch": 0.1685168054270737, "grad_norm": 1.1220414638519287, "learning_rate": 9.505308509714433e-06, "loss": 0.7718, "step": 3279 }, { "epoch": 0.16856819817041835, "grad_norm": 0.7799605131149292, "learning_rate": 9.50494750627679e-06, "loss": 0.6964, "step": 3280 }, { "epoch": 0.16861959091376297, "grad_norm": 0.8771004676818848, "learning_rate": 9.504586378025641e-06, "loss": 0.7629, "step": 3281 }, { "epoch": 0.16867098365710761, "grad_norm": 1.1117522716522217, "learning_rate": 9.504225124970997e-06, "loss": 0.7906, "step": 3282 }, { "epoch": 0.16872237640045226, "grad_norm": 1.0804903507232666, "learning_rate": 9.50386374712286e-06, "loss": 0.7739, "step": 3283 }, { "epoch": 0.1687737691437969, "grad_norm": 1.1400279998779297, "learning_rate": 9.503502244491243e-06, "loss": 0.7996, "step": 3284 }, { "epoch": 0.16882516188714153, "grad_norm": 1.1324580907821655, "learning_rate": 9.503140617086165e-06, "loss": 0.823, "step": 3285 }, { "epoch": 0.16887655463048618, "grad_norm": 1.1264513731002808, "learning_rate": 9.502778864917644e-06, "loss": 0.7816, "step": 3286 }, { "epoch": 0.16892794737383082, "grad_norm": 1.1614048480987549, "learning_rate": 9.502416987995704e-06, "loss": 0.8072, "step": 3287 }, { "epoch": 0.16897934011717544, "grad_norm": 1.0859893560409546, "learning_rate": 9.502054986330367e-06, "loss": 0.7857, "step": 3288 }, { "epoch": 0.1690307328605201, "grad_norm": 1.1314196586608887, "learning_rate": 9.501692859931669e-06, "loss": 0.7767, "step": 3289 }, { "epoch": 0.16908212560386474, "grad_norm": 1.1476948261260986, "learning_rate": 9.501330608809636e-06, "loss": 0.8269, "step": 3290 }, { "epoch": 0.16913351834720938, "grad_norm": 1.1194007396697998, "learning_rate": 9.500968232974308e-06, "loss": 0.7872, "step": 3291 }, { "epoch": 0.169184911090554, "grad_norm": 0.8801155090332031, "learning_rate": 9.500605732435726e-06, "loss": 0.6508, "step": 3292 }, { "epoch": 0.16923630383389865, "grad_norm": 1.1869466304779053, "learning_rate": 9.50024310720393e-06, "loss": 0.8529, "step": 3293 }, { "epoch": 0.1692876965772433, "grad_norm": 1.105828881263733, "learning_rate": 9.49988035728897e-06, "loss": 0.7789, "step": 3294 }, { "epoch": 0.16933908932058794, "grad_norm": 1.0911446809768677, "learning_rate": 9.499517482700896e-06, "loss": 0.784, "step": 3295 }, { "epoch": 0.16939048206393256, "grad_norm": 1.089745283126831, "learning_rate": 9.49915448344976e-06, "loss": 0.7817, "step": 3296 }, { "epoch": 0.1694418748072772, "grad_norm": 0.8322347402572632, "learning_rate": 9.49879135954562e-06, "loss": 0.7325, "step": 3297 }, { "epoch": 0.16949326755062186, "grad_norm": 0.7731889486312866, "learning_rate": 9.498428110998538e-06, "loss": 0.7376, "step": 3298 }, { "epoch": 0.1695446602939665, "grad_norm": 0.6958489418029785, "learning_rate": 9.498064737818577e-06, "loss": 0.6839, "step": 3299 }, { "epoch": 0.16959605303731112, "grad_norm": 1.264474868774414, "learning_rate": 9.497701240015805e-06, "loss": 0.7524, "step": 3300 }, { "epoch": 0.16964744578065577, "grad_norm": 0.7674562335014343, "learning_rate": 9.497337617600291e-06, "loss": 0.716, "step": 3301 }, { "epoch": 0.16969883852400042, "grad_norm": 1.1512178182601929, "learning_rate": 9.496973870582112e-06, "loss": 0.7777, "step": 3302 }, { "epoch": 0.16975023126734506, "grad_norm": 1.2055432796478271, "learning_rate": 9.496609998971343e-06, "loss": 0.8092, "step": 3303 }, { "epoch": 0.16980162401068968, "grad_norm": 1.0939481258392334, "learning_rate": 9.49624600277807e-06, "loss": 0.793, "step": 3304 }, { "epoch": 0.16985301675403433, "grad_norm": 1.134118914604187, "learning_rate": 9.495881882012374e-06, "loss": 0.8288, "step": 3305 }, { "epoch": 0.16990440949737898, "grad_norm": 1.0895812511444092, "learning_rate": 9.495517636684343e-06, "loss": 0.8047, "step": 3306 }, { "epoch": 0.1699558022407236, "grad_norm": 1.0916963815689087, "learning_rate": 9.495153266804072e-06, "loss": 0.7736, "step": 3307 }, { "epoch": 0.17000719498406824, "grad_norm": 1.1814486980438232, "learning_rate": 9.494788772381653e-06, "loss": 0.8169, "step": 3308 }, { "epoch": 0.1700585877274129, "grad_norm": 1.1182068586349487, "learning_rate": 9.494424153427188e-06, "loss": 0.8167, "step": 3309 }, { "epoch": 0.17010998047075754, "grad_norm": 1.1320017576217651, "learning_rate": 9.494059409950776e-06, "loss": 0.8143, "step": 3310 }, { "epoch": 0.17016137321410216, "grad_norm": 1.2176772356033325, "learning_rate": 9.493694541962524e-06, "loss": 0.8074, "step": 3311 }, { "epoch": 0.1702127659574468, "grad_norm": 1.2402642965316772, "learning_rate": 9.493329549472542e-06, "loss": 0.8194, "step": 3312 }, { "epoch": 0.17026415870079145, "grad_norm": 1.244359016418457, "learning_rate": 9.492964432490939e-06, "loss": 0.7585, "step": 3313 }, { "epoch": 0.1703155514441361, "grad_norm": 1.1706732511520386, "learning_rate": 9.492599191027832e-06, "loss": 0.8453, "step": 3314 }, { "epoch": 0.17036694418748072, "grad_norm": 1.1012581586837769, "learning_rate": 9.492233825093343e-06, "loss": 0.78, "step": 3315 }, { "epoch": 0.17041833693082536, "grad_norm": 1.0937896966934204, "learning_rate": 9.491868334697592e-06, "loss": 0.7669, "step": 3316 }, { "epoch": 0.17046972967417, "grad_norm": 1.195407748222351, "learning_rate": 9.491502719850707e-06, "loss": 0.8287, "step": 3317 }, { "epoch": 0.17052112241751466, "grad_norm": 1.0681138038635254, "learning_rate": 9.491136980562819e-06, "loss": 0.7145, "step": 3318 }, { "epoch": 0.17057251516085928, "grad_norm": 1.1032580137252808, "learning_rate": 9.490771116844054e-06, "loss": 0.7816, "step": 3319 }, { "epoch": 0.17062390790420393, "grad_norm": 1.0793702602386475, "learning_rate": 9.49040512870456e-06, "loss": 0.788, "step": 3320 }, { "epoch": 0.17067530064754857, "grad_norm": 0.9614205956459045, "learning_rate": 9.490039016154466e-06, "loss": 0.7102, "step": 3321 }, { "epoch": 0.17072669339089322, "grad_norm": 1.2258070707321167, "learning_rate": 9.489672779203923e-06, "loss": 0.8231, "step": 3322 }, { "epoch": 0.17077808613423784, "grad_norm": 0.868203341960907, "learning_rate": 9.489306417863073e-06, "loss": 0.7297, "step": 3323 }, { "epoch": 0.17082947887758249, "grad_norm": 1.1281613111495972, "learning_rate": 9.488939932142069e-06, "loss": 0.7735, "step": 3324 }, { "epoch": 0.17088087162092713, "grad_norm": 1.1692947149276733, "learning_rate": 9.488573322051065e-06, "loss": 0.8042, "step": 3325 }, { "epoch": 0.17093226436427175, "grad_norm": 1.132978081703186, "learning_rate": 9.488206587600216e-06, "loss": 0.8566, "step": 3326 }, { "epoch": 0.1709836571076164, "grad_norm": 1.1298434734344482, "learning_rate": 9.487839728799685e-06, "loss": 0.7854, "step": 3327 }, { "epoch": 0.17103504985096105, "grad_norm": 1.1052907705307007, "learning_rate": 9.487472745659636e-06, "loss": 0.7851, "step": 3328 }, { "epoch": 0.1710864425943057, "grad_norm": 1.2407386302947998, "learning_rate": 9.487105638190235e-06, "loss": 0.8049, "step": 3329 }, { "epoch": 0.1711378353376503, "grad_norm": 1.0993572473526, "learning_rate": 9.486738406401654e-06, "loss": 0.7695, "step": 3330 }, { "epoch": 0.17118922808099496, "grad_norm": 1.0431360006332397, "learning_rate": 9.486371050304069e-06, "loss": 0.8087, "step": 3331 }, { "epoch": 0.1712406208243396, "grad_norm": 1.106719732284546, "learning_rate": 9.486003569907653e-06, "loss": 0.7928, "step": 3332 }, { "epoch": 0.17129201356768425, "grad_norm": 1.226104497909546, "learning_rate": 9.485635965222592e-06, "loss": 0.7764, "step": 3333 }, { "epoch": 0.17134340631102887, "grad_norm": 1.1749354600906372, "learning_rate": 9.485268236259073e-06, "loss": 0.8315, "step": 3334 }, { "epoch": 0.17139479905437352, "grad_norm": 1.1435697078704834, "learning_rate": 9.484900383027277e-06, "loss": 0.8452, "step": 3335 }, { "epoch": 0.17144619179771817, "grad_norm": 1.1379668712615967, "learning_rate": 9.484532405537401e-06, "loss": 0.8488, "step": 3336 }, { "epoch": 0.17149758454106281, "grad_norm": 1.1501673460006714, "learning_rate": 9.484164303799638e-06, "loss": 0.7658, "step": 3337 }, { "epoch": 0.17154897728440743, "grad_norm": 1.1365940570831299, "learning_rate": 9.483796077824187e-06, "loss": 0.7984, "step": 3338 }, { "epoch": 0.17160037002775208, "grad_norm": 1.1052764654159546, "learning_rate": 9.483427727621251e-06, "loss": 0.7292, "step": 3339 }, { "epoch": 0.17165176277109673, "grad_norm": 0.9644396305084229, "learning_rate": 9.483059253201035e-06, "loss": 0.7018, "step": 3340 }, { "epoch": 0.17170315551444137, "grad_norm": 1.1723569631576538, "learning_rate": 9.482690654573745e-06, "loss": 0.7905, "step": 3341 }, { "epoch": 0.171754548257786, "grad_norm": 1.127434253692627, "learning_rate": 9.482321931749598e-06, "loss": 0.8272, "step": 3342 }, { "epoch": 0.17180594100113064, "grad_norm": 1.1210561990737915, "learning_rate": 9.481953084738809e-06, "loss": 0.8372, "step": 3343 }, { "epoch": 0.1718573337444753, "grad_norm": 1.122659683227539, "learning_rate": 9.481584113551594e-06, "loss": 0.8206, "step": 3344 }, { "epoch": 0.1719087264878199, "grad_norm": 0.8356234431266785, "learning_rate": 9.481215018198177e-06, "loss": 0.6954, "step": 3345 }, { "epoch": 0.17196011923116455, "grad_norm": 0.9994503259658813, "learning_rate": 9.480845798688786e-06, "loss": 0.7696, "step": 3346 }, { "epoch": 0.1720115119745092, "grad_norm": 1.1517689228057861, "learning_rate": 9.48047645503365e-06, "loss": 0.8747, "step": 3347 }, { "epoch": 0.17206290471785385, "grad_norm": 1.082833170890808, "learning_rate": 9.480106987243001e-06, "loss": 0.7575, "step": 3348 }, { "epoch": 0.17211429746119847, "grad_norm": 1.090004801750183, "learning_rate": 9.479737395327074e-06, "loss": 0.8026, "step": 3349 }, { "epoch": 0.17216569020454311, "grad_norm": 0.8001365661621094, "learning_rate": 9.479367679296111e-06, "loss": 0.7162, "step": 3350 }, { "epoch": 0.17221708294788776, "grad_norm": 1.2041667699813843, "learning_rate": 9.478997839160356e-06, "loss": 0.8279, "step": 3351 }, { "epoch": 0.1722684756912324, "grad_norm": 1.1921515464782715, "learning_rate": 9.478627874930053e-06, "loss": 0.8779, "step": 3352 }, { "epoch": 0.17231986843457703, "grad_norm": 1.1196902990341187, "learning_rate": 9.478257786615457e-06, "loss": 0.7982, "step": 3353 }, { "epoch": 0.17237126117792168, "grad_norm": 1.0839660167694092, "learning_rate": 9.477887574226815e-06, "loss": 0.7863, "step": 3354 }, { "epoch": 0.17242265392126632, "grad_norm": 1.1060118675231934, "learning_rate": 9.47751723777439e-06, "loss": 0.8044, "step": 3355 }, { "epoch": 0.17247404666461097, "grad_norm": 0.7706414461135864, "learning_rate": 9.477146777268437e-06, "loss": 0.7154, "step": 3356 }, { "epoch": 0.1725254394079556, "grad_norm": 1.1496667861938477, "learning_rate": 9.476776192719226e-06, "loss": 0.8046, "step": 3357 }, { "epoch": 0.17257683215130024, "grad_norm": 1.1055561304092407, "learning_rate": 9.476405484137019e-06, "loss": 0.8151, "step": 3358 }, { "epoch": 0.17262822489464488, "grad_norm": 0.777931809425354, "learning_rate": 9.476034651532092e-06, "loss": 0.7334, "step": 3359 }, { "epoch": 0.1726796176379895, "grad_norm": 1.083731770515442, "learning_rate": 9.475663694914712e-06, "loss": 0.7969, "step": 3360 }, { "epoch": 0.17273101038133415, "grad_norm": 1.0626894235610962, "learning_rate": 9.475292614295163e-06, "loss": 0.7869, "step": 3361 }, { "epoch": 0.1727824031246788, "grad_norm": 1.1252862215042114, "learning_rate": 9.474921409683726e-06, "loss": 0.7781, "step": 3362 }, { "epoch": 0.17283379586802344, "grad_norm": 1.1365172863006592, "learning_rate": 9.474550081090681e-06, "loss": 0.811, "step": 3363 }, { "epoch": 0.17288518861136806, "grad_norm": 1.1246477365493774, "learning_rate": 9.47417862852632e-06, "loss": 0.7581, "step": 3364 }, { "epoch": 0.1729365813547127, "grad_norm": 1.1258962154388428, "learning_rate": 9.473807052000933e-06, "loss": 0.8032, "step": 3365 }, { "epoch": 0.17298797409805736, "grad_norm": 1.127597689628601, "learning_rate": 9.473435351524815e-06, "loss": 0.8295, "step": 3366 }, { "epoch": 0.173039366841402, "grad_norm": 1.3430150747299194, "learning_rate": 9.473063527108264e-06, "loss": 0.7765, "step": 3367 }, { "epoch": 0.17309075958474662, "grad_norm": 1.072946548461914, "learning_rate": 9.472691578761582e-06, "loss": 0.7691, "step": 3368 }, { "epoch": 0.17314215232809127, "grad_norm": 1.13961660861969, "learning_rate": 9.472319506495073e-06, "loss": 0.806, "step": 3369 }, { "epoch": 0.17319354507143592, "grad_norm": 1.1598742008209229, "learning_rate": 9.471947310319047e-06, "loss": 0.8787, "step": 3370 }, { "epoch": 0.17324493781478056, "grad_norm": 1.017896294593811, "learning_rate": 9.471574990243818e-06, "loss": 0.7732, "step": 3371 }, { "epoch": 0.17329633055812518, "grad_norm": 1.1058242321014404, "learning_rate": 9.471202546279695e-06, "loss": 0.7502, "step": 3372 }, { "epoch": 0.17334772330146983, "grad_norm": 1.0317755937576294, "learning_rate": 9.470829978437004e-06, "loss": 0.7443, "step": 3373 }, { "epoch": 0.17339911604481448, "grad_norm": 1.1426212787628174, "learning_rate": 9.470457286726063e-06, "loss": 0.827, "step": 3374 }, { "epoch": 0.17345050878815912, "grad_norm": 0.8564515709877014, "learning_rate": 9.470084471157199e-06, "loss": 0.6842, "step": 3375 }, { "epoch": 0.17350190153150374, "grad_norm": 1.166164755821228, "learning_rate": 9.469711531740744e-06, "loss": 0.7888, "step": 3376 }, { "epoch": 0.1735532942748484, "grad_norm": 0.7758494019508362, "learning_rate": 9.469338468487023e-06, "loss": 0.6676, "step": 3377 }, { "epoch": 0.17360468701819304, "grad_norm": 0.8199867606163025, "learning_rate": 9.468965281406381e-06, "loss": 0.6914, "step": 3378 }, { "epoch": 0.17365607976153766, "grad_norm": 1.135945200920105, "learning_rate": 9.468591970509153e-06, "loss": 0.841, "step": 3379 }, { "epoch": 0.1737074725048823, "grad_norm": 1.3191595077514648, "learning_rate": 9.46821853580568e-06, "loss": 0.844, "step": 3380 }, { "epoch": 0.17375886524822695, "grad_norm": 1.1246620416641235, "learning_rate": 9.467844977306313e-06, "loss": 0.7744, "step": 3381 }, { "epoch": 0.1738102579915716, "grad_norm": 1.1327685117721558, "learning_rate": 9.467471295021397e-06, "loss": 0.751, "step": 3382 }, { "epoch": 0.17386165073491622, "grad_norm": 1.1279501914978027, "learning_rate": 9.46709748896129e-06, "loss": 0.8304, "step": 3383 }, { "epoch": 0.17391304347826086, "grad_norm": 1.1337774991989136, "learning_rate": 9.466723559136343e-06, "loss": 0.8378, "step": 3384 }, { "epoch": 0.1739644362216055, "grad_norm": 1.100205421447754, "learning_rate": 9.466349505556922e-06, "loss": 0.7181, "step": 3385 }, { "epoch": 0.17401582896495016, "grad_norm": 1.154875636100769, "learning_rate": 9.465975328233387e-06, "loss": 0.8112, "step": 3386 }, { "epoch": 0.17406722170829478, "grad_norm": 0.9268988370895386, "learning_rate": 9.465601027176108e-06, "loss": 0.7446, "step": 3387 }, { "epoch": 0.17411861445163943, "grad_norm": 1.108013391494751, "learning_rate": 9.46522660239545e-06, "loss": 0.7554, "step": 3388 }, { "epoch": 0.17417000719498407, "grad_norm": 1.0779836177825928, "learning_rate": 9.464852053901789e-06, "loss": 0.7719, "step": 3389 }, { "epoch": 0.17422139993832872, "grad_norm": 1.0938935279846191, "learning_rate": 9.464477381705505e-06, "loss": 0.7854, "step": 3390 }, { "epoch": 0.17427279268167334, "grad_norm": 1.1851407289505005, "learning_rate": 9.464102585816977e-06, "loss": 0.7709, "step": 3391 }, { "epoch": 0.17432418542501799, "grad_norm": 1.1759357452392578, "learning_rate": 9.463727666246586e-06, "loss": 0.8114, "step": 3392 }, { "epoch": 0.17437557816836263, "grad_norm": 1.09968900680542, "learning_rate": 9.463352623004725e-06, "loss": 0.7889, "step": 3393 }, { "epoch": 0.17442697091170728, "grad_norm": 1.1203958988189697, "learning_rate": 9.462977456101781e-06, "loss": 0.7214, "step": 3394 }, { "epoch": 0.1744783636550519, "grad_norm": 0.8718360662460327, "learning_rate": 9.462602165548148e-06, "loss": 0.7201, "step": 3395 }, { "epoch": 0.17452975639839655, "grad_norm": 1.0940511226654053, "learning_rate": 9.462226751354227e-06, "loss": 0.8226, "step": 3396 }, { "epoch": 0.1745811491417412, "grad_norm": 1.1910855770111084, "learning_rate": 9.461851213530415e-06, "loss": 0.8348, "step": 3397 }, { "epoch": 0.1746325418850858, "grad_norm": 1.0758998394012451, "learning_rate": 9.461475552087121e-06, "loss": 0.7697, "step": 3398 }, { "epoch": 0.17468393462843046, "grad_norm": 1.1878212690353394, "learning_rate": 9.46109976703475e-06, "loss": 0.7933, "step": 3399 }, { "epoch": 0.1747353273717751, "grad_norm": 1.2305246591567993, "learning_rate": 9.460723858383714e-06, "loss": 0.7961, "step": 3400 }, { "epoch": 0.17478672011511975, "grad_norm": 1.0558608770370483, "learning_rate": 9.460347826144429e-06, "loss": 0.7906, "step": 3401 }, { "epoch": 0.17483811285846437, "grad_norm": 1.1486040353775024, "learning_rate": 9.45997167032731e-06, "loss": 0.7635, "step": 3402 }, { "epoch": 0.17488950560180902, "grad_norm": 1.0449331998825073, "learning_rate": 9.459595390942785e-06, "loss": 0.7943, "step": 3403 }, { "epoch": 0.17494089834515367, "grad_norm": 1.1241176128387451, "learning_rate": 9.459218988001273e-06, "loss": 0.8407, "step": 3404 }, { "epoch": 0.17499229108849831, "grad_norm": 1.0613652467727661, "learning_rate": 9.458842461513206e-06, "loss": 0.7654, "step": 3405 }, { "epoch": 0.17504368383184293, "grad_norm": 1.140722393989563, "learning_rate": 9.458465811489014e-06, "loss": 0.8602, "step": 3406 }, { "epoch": 0.17509507657518758, "grad_norm": 1.141890048980713, "learning_rate": 9.458089037939134e-06, "loss": 0.8262, "step": 3407 }, { "epoch": 0.17514646931853223, "grad_norm": 1.3865612745285034, "learning_rate": 9.457712140874006e-06, "loss": 0.8142, "step": 3408 }, { "epoch": 0.17519786206187687, "grad_norm": 1.126225233078003, "learning_rate": 9.457335120304068e-06, "loss": 0.8215, "step": 3409 }, { "epoch": 0.1752492548052215, "grad_norm": 0.87749844789505, "learning_rate": 9.456957976239767e-06, "loss": 0.702, "step": 3410 }, { "epoch": 0.17530064754856614, "grad_norm": 1.1537779569625854, "learning_rate": 9.456580708691556e-06, "loss": 0.8224, "step": 3411 }, { "epoch": 0.1753520402919108, "grad_norm": 0.8072025179862976, "learning_rate": 9.456203317669884e-06, "loss": 0.711, "step": 3412 }, { "epoch": 0.17540343303525543, "grad_norm": 1.1483186483383179, "learning_rate": 9.455825803185206e-06, "loss": 0.8369, "step": 3413 }, { "epoch": 0.17545482577860005, "grad_norm": 1.0160456895828247, "learning_rate": 9.455448165247985e-06, "loss": 0.7476, "step": 3414 }, { "epoch": 0.1755062185219447, "grad_norm": 1.089198112487793, "learning_rate": 9.455070403868682e-06, "loss": 0.7481, "step": 3415 }, { "epoch": 0.17555761126528935, "grad_norm": 1.1497104167938232, "learning_rate": 9.454692519057763e-06, "loss": 0.7798, "step": 3416 }, { "epoch": 0.17560900400863397, "grad_norm": 0.7521306872367859, "learning_rate": 9.454314510825698e-06, "loss": 0.7211, "step": 3417 }, { "epoch": 0.17566039675197861, "grad_norm": 1.1692216396331787, "learning_rate": 9.453936379182957e-06, "loss": 0.8858, "step": 3418 }, { "epoch": 0.17571178949532326, "grad_norm": 1.2222484350204468, "learning_rate": 9.453558124140023e-06, "loss": 0.7688, "step": 3419 }, { "epoch": 0.1757631822386679, "grad_norm": 1.0425777435302734, "learning_rate": 9.45317974570737e-06, "loss": 0.8038, "step": 3420 }, { "epoch": 0.17581457498201253, "grad_norm": 1.1201952695846558, "learning_rate": 9.452801243895485e-06, "loss": 0.7852, "step": 3421 }, { "epoch": 0.17586596772535718, "grad_norm": 1.2040051221847534, "learning_rate": 9.452422618714852e-06, "loss": 0.7955, "step": 3422 }, { "epoch": 0.17591736046870182, "grad_norm": 1.370451807975769, "learning_rate": 9.452043870175962e-06, "loss": 0.7553, "step": 3423 }, { "epoch": 0.17596875321204647, "grad_norm": 1.1475248336791992, "learning_rate": 9.451664998289307e-06, "loss": 0.7789, "step": 3424 }, { "epoch": 0.1760201459553911, "grad_norm": 1.0839588642120361, "learning_rate": 9.451286003065389e-06, "loss": 0.8171, "step": 3425 }, { "epoch": 0.17607153869873574, "grad_norm": 1.2139391899108887, "learning_rate": 9.450906884514705e-06, "loss": 0.8433, "step": 3426 }, { "epoch": 0.17612293144208038, "grad_norm": 1.146639347076416, "learning_rate": 9.450527642647757e-06, "loss": 0.789, "step": 3427 }, { "epoch": 0.17617432418542503, "grad_norm": 1.11262845993042, "learning_rate": 9.450148277475057e-06, "loss": 0.7421, "step": 3428 }, { "epoch": 0.17622571692876965, "grad_norm": 0.7766870856285095, "learning_rate": 9.44976878900711e-06, "loss": 0.7161, "step": 3429 }, { "epoch": 0.1762771096721143, "grad_norm": 1.1464704275131226, "learning_rate": 9.449389177254436e-06, "loss": 0.7977, "step": 3430 }, { "epoch": 0.17632850241545894, "grad_norm": 1.0623836517333984, "learning_rate": 9.449009442227548e-06, "loss": 0.8414, "step": 3431 }, { "epoch": 0.1763798951588036, "grad_norm": 1.2111117839813232, "learning_rate": 9.448629583936967e-06, "loss": 0.8343, "step": 3432 }, { "epoch": 0.1764312879021482, "grad_norm": 0.7276771068572998, "learning_rate": 9.44824960239322e-06, "loss": 0.6972, "step": 3433 }, { "epoch": 0.17648268064549286, "grad_norm": 1.1208516359329224, "learning_rate": 9.447869497606833e-06, "loss": 0.8134, "step": 3434 }, { "epoch": 0.1765340733888375, "grad_norm": 1.140098214149475, "learning_rate": 9.44748926958834e-06, "loss": 0.8114, "step": 3435 }, { "epoch": 0.17658546613218212, "grad_norm": 1.0979682207107544, "learning_rate": 9.44710891834827e-06, "loss": 0.7932, "step": 3436 }, { "epoch": 0.17663685887552677, "grad_norm": 1.090012788772583, "learning_rate": 9.446728443897164e-06, "loss": 0.8027, "step": 3437 }, { "epoch": 0.17668825161887142, "grad_norm": 1.0815987586975098, "learning_rate": 9.446347846245566e-06, "loss": 0.7867, "step": 3438 }, { "epoch": 0.17673964436221606, "grad_norm": 1.1917003393173218, "learning_rate": 9.445967125404014e-06, "loss": 0.8368, "step": 3439 }, { "epoch": 0.17679103710556068, "grad_norm": 1.1139849424362183, "learning_rate": 9.445586281383064e-06, "loss": 0.7661, "step": 3440 }, { "epoch": 0.17684242984890533, "grad_norm": 0.7517603039741516, "learning_rate": 9.445205314193263e-06, "loss": 0.6879, "step": 3441 }, { "epoch": 0.17689382259224998, "grad_norm": 1.1422585248947144, "learning_rate": 9.444824223845165e-06, "loss": 0.7711, "step": 3442 }, { "epoch": 0.17694521533559462, "grad_norm": 1.1635183095932007, "learning_rate": 9.444443010349333e-06, "loss": 0.8472, "step": 3443 }, { "epoch": 0.17699660807893924, "grad_norm": 1.060729742050171, "learning_rate": 9.444061673716327e-06, "loss": 0.8276, "step": 3444 }, { "epoch": 0.1770480008222839, "grad_norm": 1.1513441801071167, "learning_rate": 9.44368021395671e-06, "loss": 0.7628, "step": 3445 }, { "epoch": 0.17709939356562854, "grad_norm": 1.1990025043487549, "learning_rate": 9.44329863108105e-06, "loss": 0.8096, "step": 3446 }, { "epoch": 0.17715078630897318, "grad_norm": 1.1629341840744019, "learning_rate": 9.442916925099925e-06, "loss": 0.8382, "step": 3447 }, { "epoch": 0.1772021790523178, "grad_norm": 0.7434694766998291, "learning_rate": 9.442535096023906e-06, "loss": 0.73, "step": 3448 }, { "epoch": 0.17725357179566245, "grad_norm": 1.0744454860687256, "learning_rate": 9.442153143863571e-06, "loss": 0.8029, "step": 3449 }, { "epoch": 0.1773049645390071, "grad_norm": 1.1132200956344604, "learning_rate": 9.441771068629507e-06, "loss": 0.7834, "step": 3450 }, { "epoch": 0.17735635728235175, "grad_norm": 1.1523597240447998, "learning_rate": 9.441388870332294e-06, "loss": 0.8391, "step": 3451 }, { "epoch": 0.17740775002569636, "grad_norm": 1.2505656480789185, "learning_rate": 9.441006548982526e-06, "loss": 0.777, "step": 3452 }, { "epoch": 0.177459142769041, "grad_norm": 1.1059337854385376, "learning_rate": 9.440624104590793e-06, "loss": 0.8205, "step": 3453 }, { "epoch": 0.17751053551238566, "grad_norm": 3.040278196334839, "learning_rate": 9.440241537167689e-06, "loss": 0.8545, "step": 3454 }, { "epoch": 0.17756192825573028, "grad_norm": 1.165428638458252, "learning_rate": 9.43985884672382e-06, "loss": 0.8363, "step": 3455 }, { "epoch": 0.17761332099907493, "grad_norm": 0.7192800641059875, "learning_rate": 9.439476033269781e-06, "loss": 0.7291, "step": 3456 }, { "epoch": 0.17766471374241957, "grad_norm": 1.2035566568374634, "learning_rate": 9.439093096816183e-06, "loss": 0.8853, "step": 3457 }, { "epoch": 0.17771610648576422, "grad_norm": 1.1474984884262085, "learning_rate": 9.438710037373635e-06, "loss": 0.7916, "step": 3458 }, { "epoch": 0.17776749922910884, "grad_norm": 1.1859370470046997, "learning_rate": 9.438326854952748e-06, "loss": 0.8186, "step": 3459 }, { "epoch": 0.17781889197245349, "grad_norm": 1.067668080329895, "learning_rate": 9.437943549564142e-06, "loss": 0.7901, "step": 3460 }, { "epoch": 0.17787028471579813, "grad_norm": 1.1691845655441284, "learning_rate": 9.437560121218433e-06, "loss": 0.7809, "step": 3461 }, { "epoch": 0.17792167745914278, "grad_norm": 1.1305781602859497, "learning_rate": 9.437176569926245e-06, "loss": 0.7603, "step": 3462 }, { "epoch": 0.1779730702024874, "grad_norm": 1.0872985124588013, "learning_rate": 9.436792895698206e-06, "loss": 0.7415, "step": 3463 }, { "epoch": 0.17802446294583205, "grad_norm": 1.0424630641937256, "learning_rate": 9.436409098544946e-06, "loss": 0.8028, "step": 3464 }, { "epoch": 0.1780758556891767, "grad_norm": 1.1164084672927856, "learning_rate": 9.436025178477097e-06, "loss": 0.8058, "step": 3465 }, { "epoch": 0.17812724843252134, "grad_norm": 1.1132771968841553, "learning_rate": 9.435641135505297e-06, "loss": 0.81, "step": 3466 }, { "epoch": 0.17817864117586596, "grad_norm": 1.0557054281234741, "learning_rate": 9.435256969640188e-06, "loss": 0.7627, "step": 3467 }, { "epoch": 0.1782300339192106, "grad_norm": 1.2178592681884766, "learning_rate": 9.434872680892409e-06, "loss": 0.8437, "step": 3468 }, { "epoch": 0.17828142666255525, "grad_norm": 1.0634652376174927, "learning_rate": 9.43448826927261e-06, "loss": 0.7834, "step": 3469 }, { "epoch": 0.1783328194058999, "grad_norm": 1.1347999572753906, "learning_rate": 9.434103734791442e-06, "loss": 0.7642, "step": 3470 }, { "epoch": 0.17838421214924452, "grad_norm": 0.773730993270874, "learning_rate": 9.433719077459558e-06, "loss": 0.6943, "step": 3471 }, { "epoch": 0.17843560489258917, "grad_norm": 1.1731210947036743, "learning_rate": 9.433334297287615e-06, "loss": 0.8022, "step": 3472 }, { "epoch": 0.17848699763593381, "grad_norm": 0.9087056517601013, "learning_rate": 9.432949394286273e-06, "loss": 0.7188, "step": 3473 }, { "epoch": 0.17853839037927843, "grad_norm": 0.7269626259803772, "learning_rate": 9.432564368466196e-06, "loss": 0.7253, "step": 3474 }, { "epoch": 0.17858978312262308, "grad_norm": 1.0776389837265015, "learning_rate": 9.432179219838055e-06, "loss": 0.8147, "step": 3475 }, { "epoch": 0.17864117586596773, "grad_norm": 1.095043659210205, "learning_rate": 9.431793948412519e-06, "loss": 0.7807, "step": 3476 }, { "epoch": 0.17869256860931237, "grad_norm": 1.1292461156845093, "learning_rate": 9.43140855420026e-06, "loss": 0.8136, "step": 3477 }, { "epoch": 0.178743961352657, "grad_norm": 1.0339425802230835, "learning_rate": 9.431023037211958e-06, "loss": 0.807, "step": 3478 }, { "epoch": 0.17879535409600164, "grad_norm": 1.131508708000183, "learning_rate": 9.430637397458291e-06, "loss": 0.8032, "step": 3479 }, { "epoch": 0.1788467468393463, "grad_norm": 0.7799059748649597, "learning_rate": 9.430251634949949e-06, "loss": 0.7202, "step": 3480 }, { "epoch": 0.17889813958269093, "grad_norm": 1.1265454292297363, "learning_rate": 9.429865749697615e-06, "loss": 0.7919, "step": 3481 }, { "epoch": 0.17894953232603555, "grad_norm": 1.0092310905456543, "learning_rate": 9.429479741711982e-06, "loss": 0.7619, "step": 3482 }, { "epoch": 0.1790009250693802, "grad_norm": 1.1282694339752197, "learning_rate": 9.429093611003745e-06, "loss": 0.7745, "step": 3483 }, { "epoch": 0.17905231781272485, "grad_norm": 1.0650157928466797, "learning_rate": 9.428707357583603e-06, "loss": 0.7518, "step": 3484 }, { "epoch": 0.1791037105560695, "grad_norm": 1.1747040748596191, "learning_rate": 9.428320981462255e-06, "loss": 0.7938, "step": 3485 }, { "epoch": 0.17915510329941411, "grad_norm": 1.0909932851791382, "learning_rate": 9.427934482650406e-06, "loss": 0.7886, "step": 3486 }, { "epoch": 0.17920649604275876, "grad_norm": 0.8447384238243103, "learning_rate": 9.42754786115877e-06, "loss": 0.7523, "step": 3487 }, { "epoch": 0.1792578887861034, "grad_norm": 0.9629266262054443, "learning_rate": 9.42716111699805e-06, "loss": 0.7438, "step": 3488 }, { "epoch": 0.17930928152944806, "grad_norm": 1.0727958679199219, "learning_rate": 9.426774250178967e-06, "loss": 0.7618, "step": 3489 }, { "epoch": 0.17936067427279268, "grad_norm": 0.7674674391746521, "learning_rate": 9.426387260712238e-06, "loss": 0.6983, "step": 3490 }, { "epoch": 0.17941206701613732, "grad_norm": 1.1505568027496338, "learning_rate": 9.426000148608582e-06, "loss": 0.8316, "step": 3491 }, { "epoch": 0.17946345975948197, "grad_norm": 1.088375210762024, "learning_rate": 9.42561291387873e-06, "loss": 0.7697, "step": 3492 }, { "epoch": 0.1795148525028266, "grad_norm": 1.1327625513076782, "learning_rate": 9.425225556533405e-06, "loss": 0.7699, "step": 3493 }, { "epoch": 0.17956624524617124, "grad_norm": 1.1232110261917114, "learning_rate": 9.424838076583344e-06, "loss": 0.7602, "step": 3494 }, { "epoch": 0.17961763798951588, "grad_norm": 1.0624712705612183, "learning_rate": 9.42445047403928e-06, "loss": 0.779, "step": 3495 }, { "epoch": 0.17966903073286053, "grad_norm": 1.1274298429489136, "learning_rate": 9.42406274891195e-06, "loss": 0.8037, "step": 3496 }, { "epoch": 0.17972042347620515, "grad_norm": 1.1554615497589111, "learning_rate": 9.4236749012121e-06, "loss": 0.8454, "step": 3497 }, { "epoch": 0.1797718162195498, "grad_norm": 1.05156409740448, "learning_rate": 9.423286930950473e-06, "loss": 0.8262, "step": 3498 }, { "epoch": 0.17982320896289444, "grad_norm": 1.159987211227417, "learning_rate": 9.42289883813782e-06, "loss": 0.8042, "step": 3499 }, { "epoch": 0.1798746017062391, "grad_norm": 1.0476018190383911, "learning_rate": 9.42251062278489e-06, "loss": 0.7554, "step": 3500 }, { "epoch": 0.1799259944495837, "grad_norm": 1.1192255020141602, "learning_rate": 9.422122284902445e-06, "loss": 0.7678, "step": 3501 }, { "epoch": 0.17997738719292836, "grad_norm": 1.1830130815505981, "learning_rate": 9.421733824501237e-06, "loss": 0.8074, "step": 3502 }, { "epoch": 0.180028779936273, "grad_norm": 1.068735957145691, "learning_rate": 9.421345241592035e-06, "loss": 0.8225, "step": 3503 }, { "epoch": 0.18008017267961765, "grad_norm": 1.1787382364273071, "learning_rate": 9.420956536185601e-06, "loss": 0.8618, "step": 3504 }, { "epoch": 0.18013156542296227, "grad_norm": 0.8685758709907532, "learning_rate": 9.420567708292705e-06, "loss": 0.6954, "step": 3505 }, { "epoch": 0.18018295816630692, "grad_norm": 1.1619783639907837, "learning_rate": 9.42017875792412e-06, "loss": 0.7866, "step": 3506 }, { "epoch": 0.18023435090965156, "grad_norm": 1.1650017499923706, "learning_rate": 9.419789685090623e-06, "loss": 0.8078, "step": 3507 }, { "epoch": 0.18028574365299618, "grad_norm": 1.1158781051635742, "learning_rate": 9.419400489802995e-06, "loss": 0.7912, "step": 3508 }, { "epoch": 0.18033713639634083, "grad_norm": 1.2970175743103027, "learning_rate": 9.419011172072015e-06, "loss": 0.7491, "step": 3509 }, { "epoch": 0.18038852913968548, "grad_norm": 0.9577391147613525, "learning_rate": 9.418621731908473e-06, "loss": 0.7011, "step": 3510 }, { "epoch": 0.18043992188303012, "grad_norm": 1.0487060546875, "learning_rate": 9.418232169323157e-06, "loss": 0.7431, "step": 3511 }, { "epoch": 0.18049131462637474, "grad_norm": 1.08879816532135, "learning_rate": 9.417842484326861e-06, "loss": 0.8081, "step": 3512 }, { "epoch": 0.1805427073697194, "grad_norm": 1.1513770818710327, "learning_rate": 9.41745267693038e-06, "loss": 0.777, "step": 3513 }, { "epoch": 0.18059410011306404, "grad_norm": 0.8504171371459961, "learning_rate": 9.417062747144514e-06, "loss": 0.7494, "step": 3514 }, { "epoch": 0.18064549285640868, "grad_norm": 0.9390472769737244, "learning_rate": 9.41667269498007e-06, "loss": 0.7141, "step": 3515 }, { "epoch": 0.1806968855997533, "grad_norm": 1.1276520490646362, "learning_rate": 9.416282520447852e-06, "loss": 0.8015, "step": 3516 }, { "epoch": 0.18074827834309795, "grad_norm": 1.1711316108703613, "learning_rate": 9.415892223558668e-06, "loss": 0.8564, "step": 3517 }, { "epoch": 0.1807996710864426, "grad_norm": 1.1999324560165405, "learning_rate": 9.415501804323336e-06, "loss": 0.7266, "step": 3518 }, { "epoch": 0.18085106382978725, "grad_norm": 1.1760706901550293, "learning_rate": 9.41511126275267e-06, "loss": 0.8169, "step": 3519 }, { "epoch": 0.18090245657313186, "grad_norm": 1.1594830751419067, "learning_rate": 9.414720598857492e-06, "loss": 0.7801, "step": 3520 }, { "epoch": 0.1809538493164765, "grad_norm": 0.8029430508613586, "learning_rate": 9.414329812648624e-06, "loss": 0.6878, "step": 3521 }, { "epoch": 0.18100524205982116, "grad_norm": 1.1957621574401855, "learning_rate": 9.413938904136892e-06, "loss": 0.7954, "step": 3522 }, { "epoch": 0.1810566348031658, "grad_norm": 1.1458935737609863, "learning_rate": 9.41354787333313e-06, "loss": 0.8104, "step": 3523 }, { "epoch": 0.18110802754651043, "grad_norm": 1.078250765800476, "learning_rate": 9.41315672024817e-06, "loss": 0.8068, "step": 3524 }, { "epoch": 0.18115942028985507, "grad_norm": 1.159317970275879, "learning_rate": 9.41276544489285e-06, "loss": 0.7939, "step": 3525 }, { "epoch": 0.18121081303319972, "grad_norm": 1.1015045642852783, "learning_rate": 9.412374047278008e-06, "loss": 0.7674, "step": 3526 }, { "epoch": 0.18126220577654434, "grad_norm": 0.7945823073387146, "learning_rate": 9.41198252741449e-06, "loss": 0.7177, "step": 3527 }, { "epoch": 0.18131359851988899, "grad_norm": 1.0673738718032837, "learning_rate": 9.411590885313144e-06, "loss": 0.7913, "step": 3528 }, { "epoch": 0.18136499126323363, "grad_norm": 1.1799509525299072, "learning_rate": 9.411199120984822e-06, "loss": 0.8195, "step": 3529 }, { "epoch": 0.18141638400657828, "grad_norm": 1.1215784549713135, "learning_rate": 9.410807234440374e-06, "loss": 0.7387, "step": 3530 }, { "epoch": 0.1814677767499229, "grad_norm": 1.1578919887542725, "learning_rate": 9.41041522569066e-06, "loss": 0.7251, "step": 3531 }, { "epoch": 0.18151916949326755, "grad_norm": 1.0562947988510132, "learning_rate": 9.410023094746542e-06, "loss": 0.8127, "step": 3532 }, { "epoch": 0.1815705622366122, "grad_norm": 1.0952017307281494, "learning_rate": 9.409630841618881e-06, "loss": 0.7925, "step": 3533 }, { "epoch": 0.18162195497995684, "grad_norm": 1.051434874534607, "learning_rate": 9.409238466318548e-06, "loss": 0.7862, "step": 3534 }, { "epoch": 0.18167334772330146, "grad_norm": 1.0959357023239136, "learning_rate": 9.408845968856414e-06, "loss": 0.8215, "step": 3535 }, { "epoch": 0.1817247404666461, "grad_norm": 1.105502963066101, "learning_rate": 9.408453349243352e-06, "loss": 0.7818, "step": 3536 }, { "epoch": 0.18177613320999075, "grad_norm": 1.1461687088012695, "learning_rate": 9.40806060749024e-06, "loss": 0.7866, "step": 3537 }, { "epoch": 0.1818275259533354, "grad_norm": 1.1649479866027832, "learning_rate": 9.40766774360796e-06, "loss": 0.8435, "step": 3538 }, { "epoch": 0.18187891869668002, "grad_norm": 1.2510865926742554, "learning_rate": 9.407274757607396e-06, "loss": 0.7768, "step": 3539 }, { "epoch": 0.18193031144002467, "grad_norm": 1.0944989919662476, "learning_rate": 9.406881649499436e-06, "loss": 0.7948, "step": 3540 }, { "epoch": 0.1819817041833693, "grad_norm": 1.1343542337417603, "learning_rate": 9.40648841929497e-06, "loss": 0.8797, "step": 3541 }, { "epoch": 0.18203309692671396, "grad_norm": 0.9086080193519592, "learning_rate": 9.406095067004896e-06, "loss": 0.7037, "step": 3542 }, { "epoch": 0.18208448967005858, "grad_norm": 0.7800127267837524, "learning_rate": 9.405701592640112e-06, "loss": 0.6671, "step": 3543 }, { "epoch": 0.18213588241340323, "grad_norm": 0.8836771249771118, "learning_rate": 9.405307996211516e-06, "loss": 0.695, "step": 3544 }, { "epoch": 0.18218727515674787, "grad_norm": 1.1251201629638672, "learning_rate": 9.404914277730017e-06, "loss": 0.7604, "step": 3545 }, { "epoch": 0.1822386679000925, "grad_norm": 1.1130990982055664, "learning_rate": 9.40452043720652e-06, "loss": 0.7965, "step": 3546 }, { "epoch": 0.18229006064343714, "grad_norm": 1.138291358947754, "learning_rate": 9.404126474651937e-06, "loss": 0.8411, "step": 3547 }, { "epoch": 0.1823414533867818, "grad_norm": 1.1458921432495117, "learning_rate": 9.403732390077185e-06, "loss": 0.7395, "step": 3548 }, { "epoch": 0.18239284613012643, "grad_norm": 1.2307687997817993, "learning_rate": 9.403338183493182e-06, "loss": 0.8341, "step": 3549 }, { "epoch": 0.18244423887347105, "grad_norm": 1.120360255241394, "learning_rate": 9.40294385491085e-06, "loss": 0.7678, "step": 3550 }, { "epoch": 0.1824956316168157, "grad_norm": 1.165789246559143, "learning_rate": 9.402549404341112e-06, "loss": 0.8282, "step": 3551 }, { "epoch": 0.18254702436016035, "grad_norm": 1.0280251502990723, "learning_rate": 9.402154831794901e-06, "loss": 0.7691, "step": 3552 }, { "epoch": 0.182598417103505, "grad_norm": 1.1076387166976929, "learning_rate": 9.401760137283144e-06, "loss": 0.7589, "step": 3553 }, { "epoch": 0.18264980984684961, "grad_norm": 0.9806888103485107, "learning_rate": 9.401365320816778e-06, "loss": 0.7162, "step": 3554 }, { "epoch": 0.18270120259019426, "grad_norm": 1.3336249589920044, "learning_rate": 9.400970382406744e-06, "loss": 0.7692, "step": 3555 }, { "epoch": 0.1827525953335389, "grad_norm": 1.1388095617294312, "learning_rate": 9.400575322063983e-06, "loss": 0.7885, "step": 3556 }, { "epoch": 0.18280398807688356, "grad_norm": 1.084636926651001, "learning_rate": 9.400180139799438e-06, "loss": 0.8226, "step": 3557 }, { "epoch": 0.18285538082022817, "grad_norm": 1.1302292346954346, "learning_rate": 9.399784835624061e-06, "loss": 0.8005, "step": 3558 }, { "epoch": 0.18290677356357282, "grad_norm": 1.147099256515503, "learning_rate": 9.399389409548802e-06, "loss": 0.9052, "step": 3559 }, { "epoch": 0.18295816630691747, "grad_norm": 0.7813428044319153, "learning_rate": 9.398993861584618e-06, "loss": 0.6863, "step": 3560 }, { "epoch": 0.18300955905026212, "grad_norm": 1.2046120166778564, "learning_rate": 9.398598191742468e-06, "loss": 0.7811, "step": 3561 }, { "epoch": 0.18306095179360674, "grad_norm": 1.1721488237380981, "learning_rate": 9.398202400033313e-06, "loss": 0.8694, "step": 3562 }, { "epoch": 0.18311234453695138, "grad_norm": 1.408753752708435, "learning_rate": 9.397806486468121e-06, "loss": 0.7794, "step": 3563 }, { "epoch": 0.18316373728029603, "grad_norm": 1.167740821838379, "learning_rate": 9.39741045105786e-06, "loss": 0.809, "step": 3564 }, { "epoch": 0.18321513002364065, "grad_norm": 1.0459738969802856, "learning_rate": 9.397014293813502e-06, "loss": 0.8327, "step": 3565 }, { "epoch": 0.1832665227669853, "grad_norm": 0.8933892846107483, "learning_rate": 9.396618014746024e-06, "loss": 0.7085, "step": 3566 }, { "epoch": 0.18331791551032994, "grad_norm": 1.1517727375030518, "learning_rate": 9.396221613866406e-06, "loss": 0.8066, "step": 3567 }, { "epoch": 0.1833693082536746, "grad_norm": 1.1041796207427979, "learning_rate": 9.395825091185627e-06, "loss": 0.759, "step": 3568 }, { "epoch": 0.1834207009970192, "grad_norm": 1.3052785396575928, "learning_rate": 9.395428446714675e-06, "loss": 0.7988, "step": 3569 }, { "epoch": 0.18347209374036386, "grad_norm": 1.0522657632827759, "learning_rate": 9.395031680464539e-06, "loss": 0.7666, "step": 3570 }, { "epoch": 0.1835234864837085, "grad_norm": 0.8035390377044678, "learning_rate": 9.394634792446213e-06, "loss": 0.7012, "step": 3571 }, { "epoch": 0.18357487922705315, "grad_norm": 0.788076639175415, "learning_rate": 9.394237782670695e-06, "loss": 0.6688, "step": 3572 }, { "epoch": 0.18362627197039777, "grad_norm": 0.7577769756317139, "learning_rate": 9.39384065114898e-06, "loss": 0.6624, "step": 3573 }, { "epoch": 0.18367766471374242, "grad_norm": 0.995138943195343, "learning_rate": 9.393443397892072e-06, "loss": 0.7197, "step": 3574 }, { "epoch": 0.18372905745708706, "grad_norm": 1.1804691553115845, "learning_rate": 9.393046022910978e-06, "loss": 0.8282, "step": 3575 }, { "epoch": 0.1837804502004317, "grad_norm": 1.2751673460006714, "learning_rate": 9.39264852621671e-06, "loss": 0.7813, "step": 3576 }, { "epoch": 0.18383184294377633, "grad_norm": 3.0481412410736084, "learning_rate": 9.392250907820277e-06, "loss": 0.7616, "step": 3577 }, { "epoch": 0.18388323568712098, "grad_norm": 1.0465646982192993, "learning_rate": 9.391853167732697e-06, "loss": 0.7533, "step": 3578 }, { "epoch": 0.18393462843046562, "grad_norm": 1.1468281745910645, "learning_rate": 9.391455305964992e-06, "loss": 0.7882, "step": 3579 }, { "epoch": 0.18398602117381027, "grad_norm": 1.1356210708618164, "learning_rate": 9.39105732252818e-06, "loss": 0.8443, "step": 3580 }, { "epoch": 0.1840374139171549, "grad_norm": 1.2170732021331787, "learning_rate": 9.39065921743329e-06, "loss": 0.8289, "step": 3581 }, { "epoch": 0.18408880666049954, "grad_norm": 1.11255943775177, "learning_rate": 9.390260990691356e-06, "loss": 0.8042, "step": 3582 }, { "epoch": 0.18414019940384418, "grad_norm": 1.1234798431396484, "learning_rate": 9.389862642313406e-06, "loss": 0.768, "step": 3583 }, { "epoch": 0.1841915921471888, "grad_norm": 1.1570765972137451, "learning_rate": 9.389464172310476e-06, "loss": 0.8409, "step": 3584 }, { "epoch": 0.18424298489053345, "grad_norm": 1.113181710243225, "learning_rate": 9.38906558069361e-06, "loss": 0.7855, "step": 3585 }, { "epoch": 0.1842943776338781, "grad_norm": 1.1660375595092773, "learning_rate": 9.38866686747385e-06, "loss": 0.7317, "step": 3586 }, { "epoch": 0.18434577037722275, "grad_norm": 0.9136704206466675, "learning_rate": 9.38826803266224e-06, "loss": 0.68, "step": 3587 }, { "epoch": 0.18439716312056736, "grad_norm": 0.8244784474372864, "learning_rate": 9.387869076269834e-06, "loss": 0.6963, "step": 3588 }, { "epoch": 0.184448555863912, "grad_norm": 1.3582353591918945, "learning_rate": 9.387469998307681e-06, "loss": 0.824, "step": 3589 }, { "epoch": 0.18449994860725666, "grad_norm": 1.1117995977401733, "learning_rate": 9.387070798786843e-06, "loss": 0.8112, "step": 3590 }, { "epoch": 0.1845513413506013, "grad_norm": 1.2154436111450195, "learning_rate": 9.386671477718376e-06, "loss": 0.7846, "step": 3591 }, { "epoch": 0.18460273409394592, "grad_norm": 1.125707745552063, "learning_rate": 9.386272035113346e-06, "loss": 0.8186, "step": 3592 }, { "epoch": 0.18465412683729057, "grad_norm": 1.095779538154602, "learning_rate": 9.38587247098282e-06, "loss": 0.7937, "step": 3593 }, { "epoch": 0.18470551958063522, "grad_norm": 1.139014482498169, "learning_rate": 9.385472785337866e-06, "loss": 0.6794, "step": 3594 }, { "epoch": 0.18475691232397987, "grad_norm": 1.0869598388671875, "learning_rate": 9.385072978189558e-06, "loss": 0.8252, "step": 3595 }, { "epoch": 0.18480830506732449, "grad_norm": 1.1353763341903687, "learning_rate": 9.384673049548974e-06, "loss": 0.7882, "step": 3596 }, { "epoch": 0.18485969781066913, "grad_norm": 1.1481024026870728, "learning_rate": 9.384272999427196e-06, "loss": 0.7628, "step": 3597 }, { "epoch": 0.18491109055401378, "grad_norm": 1.1034921407699585, "learning_rate": 9.383872827835305e-06, "loss": 0.7872, "step": 3598 }, { "epoch": 0.18496248329735843, "grad_norm": 1.0840333700180054, "learning_rate": 9.383472534784388e-06, "loss": 0.7987, "step": 3599 }, { "epoch": 0.18501387604070305, "grad_norm": 1.1127790212631226, "learning_rate": 9.38307212028554e-06, "loss": 0.7985, "step": 3600 }, { "epoch": 0.1850652687840477, "grad_norm": 1.1410014629364014, "learning_rate": 9.382671584349848e-06, "loss": 0.7292, "step": 3601 }, { "epoch": 0.18511666152739234, "grad_norm": 1.5273505449295044, "learning_rate": 9.382270926988413e-06, "loss": 0.7942, "step": 3602 }, { "epoch": 0.18516805427073696, "grad_norm": 1.2378844022750854, "learning_rate": 9.381870148212335e-06, "loss": 0.7689, "step": 3603 }, { "epoch": 0.1852194470140816, "grad_norm": 1.1396708488464355, "learning_rate": 9.38146924803272e-06, "loss": 0.8114, "step": 3604 }, { "epoch": 0.18527083975742625, "grad_norm": 1.0924010276794434, "learning_rate": 9.381068226460672e-06, "loss": 0.727, "step": 3605 }, { "epoch": 0.1853222325007709, "grad_norm": 0.9004477262496948, "learning_rate": 9.380667083507304e-06, "loss": 0.6756, "step": 3606 }, { "epoch": 0.18537362524411552, "grad_norm": 1.1782633066177368, "learning_rate": 9.380265819183729e-06, "loss": 0.7889, "step": 3607 }, { "epoch": 0.18542501798746017, "grad_norm": 0.773456871509552, "learning_rate": 9.379864433501064e-06, "loss": 0.7265, "step": 3608 }, { "epoch": 0.1854764107308048, "grad_norm": 1.1103293895721436, "learning_rate": 9.37946292647043e-06, "loss": 0.8059, "step": 3609 }, { "epoch": 0.18552780347414946, "grad_norm": 1.1295528411865234, "learning_rate": 9.379061298102952e-06, "loss": 0.776, "step": 3610 }, { "epoch": 0.18557919621749408, "grad_norm": 0.8431046009063721, "learning_rate": 9.378659548409755e-06, "loss": 0.7427, "step": 3611 }, { "epoch": 0.18563058896083873, "grad_norm": 1.2043536901474, "learning_rate": 9.378257677401972e-06, "loss": 0.9075, "step": 3612 }, { "epoch": 0.18568198170418337, "grad_norm": 1.0653971433639526, "learning_rate": 9.377855685090738e-06, "loss": 0.7766, "step": 3613 }, { "epoch": 0.18573337444752802, "grad_norm": 1.089861512184143, "learning_rate": 9.37745357148719e-06, "loss": 0.7795, "step": 3614 }, { "epoch": 0.18578476719087264, "grad_norm": 1.1111794710159302, "learning_rate": 9.377051336602467e-06, "loss": 0.7771, "step": 3615 }, { "epoch": 0.1858361599342173, "grad_norm": 1.1197954416275024, "learning_rate": 9.376648980447713e-06, "loss": 0.7751, "step": 3616 }, { "epoch": 0.18588755267756193, "grad_norm": 0.8791465759277344, "learning_rate": 9.37624650303408e-06, "loss": 0.7822, "step": 3617 }, { "epoch": 0.18593894542090658, "grad_norm": 1.0727379322052002, "learning_rate": 9.375843904372714e-06, "loss": 0.7831, "step": 3618 }, { "epoch": 0.1859903381642512, "grad_norm": 1.0167869329452515, "learning_rate": 9.375441184474773e-06, "loss": 0.7406, "step": 3619 }, { "epoch": 0.18604173090759585, "grad_norm": 1.213617205619812, "learning_rate": 9.375038343351412e-06, "loss": 0.809, "step": 3620 }, { "epoch": 0.1860931236509405, "grad_norm": 1.1311434507369995, "learning_rate": 9.374635381013793e-06, "loss": 0.7872, "step": 3621 }, { "epoch": 0.18614451639428511, "grad_norm": 1.034319281578064, "learning_rate": 9.374232297473082e-06, "loss": 0.7447, "step": 3622 }, { "epoch": 0.18619590913762976, "grad_norm": 1.1859114170074463, "learning_rate": 9.373829092740444e-06, "loss": 0.8156, "step": 3623 }, { "epoch": 0.1862473018809744, "grad_norm": 1.1164816617965698, "learning_rate": 9.373425766827053e-06, "loss": 0.7714, "step": 3624 }, { "epoch": 0.18629869462431906, "grad_norm": 1.1615970134735107, "learning_rate": 9.37302231974408e-06, "loss": 0.86, "step": 3625 }, { "epoch": 0.18635008736766367, "grad_norm": 1.1209359169006348, "learning_rate": 9.372618751502706e-06, "loss": 0.7202, "step": 3626 }, { "epoch": 0.18640148011100832, "grad_norm": 1.1102511882781982, "learning_rate": 9.372215062114111e-06, "loss": 0.8177, "step": 3627 }, { "epoch": 0.18645287285435297, "grad_norm": 1.2142083644866943, "learning_rate": 9.37181125158948e-06, "loss": 0.8385, "step": 3628 }, { "epoch": 0.18650426559769762, "grad_norm": 0.8942787647247314, "learning_rate": 9.371407319940003e-06, "loss": 0.7243, "step": 3629 }, { "epoch": 0.18655565834104224, "grad_norm": 1.1200281381607056, "learning_rate": 9.371003267176865e-06, "loss": 0.8222, "step": 3630 }, { "epoch": 0.18660705108438688, "grad_norm": 1.1421840190887451, "learning_rate": 9.370599093311266e-06, "loss": 0.8077, "step": 3631 }, { "epoch": 0.18665844382773153, "grad_norm": 1.088209629058838, "learning_rate": 9.370194798354403e-06, "loss": 0.8145, "step": 3632 }, { "epoch": 0.18670983657107618, "grad_norm": 1.0613124370574951, "learning_rate": 9.369790382317476e-06, "loss": 0.7607, "step": 3633 }, { "epoch": 0.1867612293144208, "grad_norm": 1.1450103521347046, "learning_rate": 9.369385845211692e-06, "loss": 0.769, "step": 3634 }, { "epoch": 0.18681262205776544, "grad_norm": 0.7586758136749268, "learning_rate": 9.368981187048256e-06, "loss": 0.7054, "step": 3635 }, { "epoch": 0.1868640148011101, "grad_norm": 1.105120301246643, "learning_rate": 9.368576407838381e-06, "loss": 0.7947, "step": 3636 }, { "epoch": 0.1869154075444547, "grad_norm": 1.1226897239685059, "learning_rate": 9.368171507593283e-06, "loss": 0.8463, "step": 3637 }, { "epoch": 0.18696680028779936, "grad_norm": 0.7046691179275513, "learning_rate": 9.367766486324179e-06, "loss": 0.6549, "step": 3638 }, { "epoch": 0.187018193031144, "grad_norm": 1.075786828994751, "learning_rate": 9.367361344042289e-06, "loss": 0.8164, "step": 3639 }, { "epoch": 0.18706958577448865, "grad_norm": 1.1229912042617798, "learning_rate": 9.36695608075884e-06, "loss": 0.782, "step": 3640 }, { "epoch": 0.18712097851783327, "grad_norm": 1.0883831977844238, "learning_rate": 9.36655069648506e-06, "loss": 0.7906, "step": 3641 }, { "epoch": 0.18717237126117792, "grad_norm": 1.0604411363601685, "learning_rate": 9.366145191232179e-06, "loss": 0.7722, "step": 3642 }, { "epoch": 0.18722376400452256, "grad_norm": 1.1309033632278442, "learning_rate": 9.365739565011433e-06, "loss": 0.8232, "step": 3643 }, { "epoch": 0.1872751567478672, "grad_norm": 1.1275619268417358, "learning_rate": 9.36533381783406e-06, "loss": 0.8299, "step": 3644 }, { "epoch": 0.18732654949121183, "grad_norm": 0.8733184933662415, "learning_rate": 9.3649279497113e-06, "loss": 0.6955, "step": 3645 }, { "epoch": 0.18737794223455648, "grad_norm": 0.7558917999267578, "learning_rate": 9.364521960654403e-06, "loss": 0.7217, "step": 3646 }, { "epoch": 0.18742933497790112, "grad_norm": 1.1200082302093506, "learning_rate": 9.364115850674611e-06, "loss": 0.8107, "step": 3647 }, { "epoch": 0.18748072772124577, "grad_norm": 1.1277496814727783, "learning_rate": 9.363709619783179e-06, "loss": 0.7788, "step": 3648 }, { "epoch": 0.1875321204645904, "grad_norm": 0.880764901638031, "learning_rate": 9.363303267991362e-06, "loss": 0.7323, "step": 3649 }, { "epoch": 0.18758351320793504, "grad_norm": 1.126083493232727, "learning_rate": 9.362896795310417e-06, "loss": 0.8227, "step": 3650 }, { "epoch": 0.18763490595127968, "grad_norm": 1.1959525346755981, "learning_rate": 9.362490201751606e-06, "loss": 0.8221, "step": 3651 }, { "epoch": 0.18768629869462433, "grad_norm": 1.0710294246673584, "learning_rate": 9.362083487326196e-06, "loss": 0.775, "step": 3652 }, { "epoch": 0.18773769143796895, "grad_norm": 1.0628138780593872, "learning_rate": 9.361676652045453e-06, "loss": 0.7714, "step": 3653 }, { "epoch": 0.1877890841813136, "grad_norm": 1.0532938241958618, "learning_rate": 9.36126969592065e-06, "loss": 0.7778, "step": 3654 }, { "epoch": 0.18784047692465825, "grad_norm": 1.0857855081558228, "learning_rate": 9.36086261896306e-06, "loss": 0.8193, "step": 3655 }, { "epoch": 0.18789186966800286, "grad_norm": 0.904384434223175, "learning_rate": 9.360455421183965e-06, "loss": 0.7433, "step": 3656 }, { "epoch": 0.1879432624113475, "grad_norm": 1.0233732461929321, "learning_rate": 9.360048102594645e-06, "loss": 0.8354, "step": 3657 }, { "epoch": 0.18799465515469216, "grad_norm": 1.072852373123169, "learning_rate": 9.359640663206385e-06, "loss": 0.7963, "step": 3658 }, { "epoch": 0.1880460478980368, "grad_norm": 1.0768646001815796, "learning_rate": 9.35923310303047e-06, "loss": 0.7461, "step": 3659 }, { "epoch": 0.18809744064138142, "grad_norm": 0.8939974308013916, "learning_rate": 9.358825422078197e-06, "loss": 0.7216, "step": 3660 }, { "epoch": 0.18814883338472607, "grad_norm": 1.1154544353485107, "learning_rate": 9.358417620360862e-06, "loss": 0.8072, "step": 3661 }, { "epoch": 0.18820022612807072, "grad_norm": 1.0925756692886353, "learning_rate": 9.358009697889758e-06, "loss": 0.827, "step": 3662 }, { "epoch": 0.18825161887141537, "grad_norm": 1.1930550336837769, "learning_rate": 9.357601654676191e-06, "loss": 0.8483, "step": 3663 }, { "epoch": 0.18830301161475999, "grad_norm": 1.1426748037338257, "learning_rate": 9.357193490731464e-06, "loss": 0.7981, "step": 3664 }, { "epoch": 0.18835440435810463, "grad_norm": 5.05674934387207, "learning_rate": 9.356785206066887e-06, "loss": 0.7186, "step": 3665 }, { "epoch": 0.18840579710144928, "grad_norm": 1.112631916999817, "learning_rate": 9.356376800693772e-06, "loss": 0.7854, "step": 3666 }, { "epoch": 0.18845718984479393, "grad_norm": 1.1439162492752075, "learning_rate": 9.355968274623432e-06, "loss": 0.8133, "step": 3667 }, { "epoch": 0.18850858258813855, "grad_norm": 1.0858850479125977, "learning_rate": 9.355559627867187e-06, "loss": 0.8333, "step": 3668 }, { "epoch": 0.1885599753314832, "grad_norm": 1.0909470319747925, "learning_rate": 9.355150860436362e-06, "loss": 0.7845, "step": 3669 }, { "epoch": 0.18861136807482784, "grad_norm": 1.1100512742996216, "learning_rate": 9.354741972342276e-06, "loss": 0.8011, "step": 3670 }, { "epoch": 0.1886627608181725, "grad_norm": 1.103771448135376, "learning_rate": 9.354332963596262e-06, "loss": 0.7495, "step": 3671 }, { "epoch": 0.1887141535615171, "grad_norm": 1.2236180305480957, "learning_rate": 9.353923834209651e-06, "loss": 0.7595, "step": 3672 }, { "epoch": 0.18876554630486175, "grad_norm": 1.1341357231140137, "learning_rate": 9.35351458419378e-06, "loss": 0.7438, "step": 3673 }, { "epoch": 0.1888169390482064, "grad_norm": 0.872042179107666, "learning_rate": 9.353105213559983e-06, "loss": 0.6686, "step": 3674 }, { "epoch": 0.18886833179155102, "grad_norm": 1.5196270942687988, "learning_rate": 9.352695722319606e-06, "loss": 0.7337, "step": 3675 }, { "epoch": 0.18891972453489567, "grad_norm": 1.1158409118652344, "learning_rate": 9.352286110483993e-06, "loss": 0.799, "step": 3676 }, { "epoch": 0.1889711172782403, "grad_norm": 1.0876052379608154, "learning_rate": 9.351876378064493e-06, "loss": 0.7819, "step": 3677 }, { "epoch": 0.18902251002158496, "grad_norm": 0.6968023777008057, "learning_rate": 9.351466525072457e-06, "loss": 0.7145, "step": 3678 }, { "epoch": 0.18907390276492958, "grad_norm": 1.1194862127304077, "learning_rate": 9.35105655151924e-06, "loss": 0.8109, "step": 3679 }, { "epoch": 0.18912529550827423, "grad_norm": 1.2120953798294067, "learning_rate": 9.350646457416203e-06, "loss": 0.774, "step": 3680 }, { "epoch": 0.18917668825161887, "grad_norm": 1.2555986642837524, "learning_rate": 9.350236242774705e-06, "loss": 0.7784, "step": 3681 }, { "epoch": 0.18922808099496352, "grad_norm": 1.0862518548965454, "learning_rate": 9.349825907606116e-06, "loss": 0.7815, "step": 3682 }, { "epoch": 0.18927947373830814, "grad_norm": 1.1779824495315552, "learning_rate": 9.3494154519218e-06, "loss": 0.7668, "step": 3683 }, { "epoch": 0.1893308664816528, "grad_norm": 1.0437113046646118, "learning_rate": 9.349004875733131e-06, "loss": 0.7929, "step": 3684 }, { "epoch": 0.18938225922499743, "grad_norm": 1.253122091293335, "learning_rate": 9.348594179051485e-06, "loss": 0.7733, "step": 3685 }, { "epoch": 0.18943365196834208, "grad_norm": 1.129672884941101, "learning_rate": 9.34818336188824e-06, "loss": 0.8384, "step": 3686 }, { "epoch": 0.1894850447116867, "grad_norm": 1.2041178941726685, "learning_rate": 9.347772424254777e-06, "loss": 0.8055, "step": 3687 }, { "epoch": 0.18953643745503135, "grad_norm": 1.0828802585601807, "learning_rate": 9.347361366162483e-06, "loss": 0.8032, "step": 3688 }, { "epoch": 0.189587830198376, "grad_norm": 1.1776783466339111, "learning_rate": 9.346950187622745e-06, "loss": 0.8204, "step": 3689 }, { "epoch": 0.18963922294172064, "grad_norm": 0.8706642985343933, "learning_rate": 9.346538888646956e-06, "loss": 0.7198, "step": 3690 }, { "epoch": 0.18969061568506526, "grad_norm": 1.130881905555725, "learning_rate": 9.346127469246513e-06, "loss": 0.8392, "step": 3691 }, { "epoch": 0.1897420084284099, "grad_norm": 1.147912859916687, "learning_rate": 9.345715929432812e-06, "loss": 0.8317, "step": 3692 }, { "epoch": 0.18979340117175456, "grad_norm": 1.0743414163589478, "learning_rate": 9.345304269217258e-06, "loss": 0.789, "step": 3693 }, { "epoch": 0.18984479391509917, "grad_norm": 1.1149293184280396, "learning_rate": 9.344892488611253e-06, "loss": 0.7819, "step": 3694 }, { "epoch": 0.18989618665844382, "grad_norm": 1.116666316986084, "learning_rate": 9.344480587626207e-06, "loss": 0.7696, "step": 3695 }, { "epoch": 0.18994757940178847, "grad_norm": 1.1542799472808838, "learning_rate": 9.344068566273535e-06, "loss": 0.7725, "step": 3696 }, { "epoch": 0.18999897214513312, "grad_norm": 1.2475882768630981, "learning_rate": 9.34365642456465e-06, "loss": 0.748, "step": 3697 }, { "epoch": 0.19005036488847774, "grad_norm": 1.0761419534683228, "learning_rate": 9.343244162510966e-06, "loss": 0.8178, "step": 3698 }, { "epoch": 0.19010175763182238, "grad_norm": 1.0996686220169067, "learning_rate": 9.342831780123914e-06, "loss": 0.7922, "step": 3699 }, { "epoch": 0.19015315037516703, "grad_norm": 1.0922256708145142, "learning_rate": 9.342419277414914e-06, "loss": 0.8074, "step": 3700 }, { "epoch": 0.19020454311851168, "grad_norm": 0.8283430337905884, "learning_rate": 9.342006654395396e-06, "loss": 0.716, "step": 3701 }, { "epoch": 0.1902559358618563, "grad_norm": 0.8511554002761841, "learning_rate": 9.341593911076791e-06, "loss": 0.7315, "step": 3702 }, { "epoch": 0.19030732860520094, "grad_norm": 1.11526620388031, "learning_rate": 9.341181047470538e-06, "loss": 0.8215, "step": 3703 }, { "epoch": 0.1903587213485456, "grad_norm": 1.1742713451385498, "learning_rate": 9.34076806358807e-06, "loss": 0.8056, "step": 3704 }, { "epoch": 0.19041011409189024, "grad_norm": 1.173837423324585, "learning_rate": 9.340354959440835e-06, "loss": 0.8499, "step": 3705 }, { "epoch": 0.19046150683523486, "grad_norm": 1.06858229637146, "learning_rate": 9.339941735040274e-06, "loss": 0.7365, "step": 3706 }, { "epoch": 0.1905128995785795, "grad_norm": 1.158582329750061, "learning_rate": 9.339528390397839e-06, "loss": 0.876, "step": 3707 }, { "epoch": 0.19056429232192415, "grad_norm": 1.0987800359725952, "learning_rate": 9.33911492552498e-06, "loss": 0.7901, "step": 3708 }, { "epoch": 0.1906156850652688, "grad_norm": 1.1143357753753662, "learning_rate": 9.338701340433152e-06, "loss": 0.8236, "step": 3709 }, { "epoch": 0.19066707780861342, "grad_norm": 1.2264057397842407, "learning_rate": 9.338287635133814e-06, "loss": 0.8332, "step": 3710 }, { "epoch": 0.19071847055195806, "grad_norm": 3.8975017070770264, "learning_rate": 9.337873809638428e-06, "loss": 0.7913, "step": 3711 }, { "epoch": 0.1907698632953027, "grad_norm": 1.0601764917373657, "learning_rate": 9.337459863958462e-06, "loss": 0.8054, "step": 3712 }, { "epoch": 0.19082125603864733, "grad_norm": 0.8995238542556763, "learning_rate": 9.337045798105384e-06, "loss": 0.7266, "step": 3713 }, { "epoch": 0.19087264878199198, "grad_norm": 1.1024494171142578, "learning_rate": 9.336631612090663e-06, "loss": 0.7341, "step": 3714 }, { "epoch": 0.19092404152533662, "grad_norm": 0.8019346594810486, "learning_rate": 9.336217305925777e-06, "loss": 0.675, "step": 3715 }, { "epoch": 0.19097543426868127, "grad_norm": 1.1403672695159912, "learning_rate": 9.335802879622203e-06, "loss": 0.8044, "step": 3716 }, { "epoch": 0.1910268270120259, "grad_norm": 1.1501086950302124, "learning_rate": 9.335388333191424e-06, "loss": 0.8401, "step": 3717 }, { "epoch": 0.19107821975537054, "grad_norm": 1.1162502765655518, "learning_rate": 9.334973666644927e-06, "loss": 0.7691, "step": 3718 }, { "epoch": 0.19112961249871518, "grad_norm": 1.174485445022583, "learning_rate": 9.334558879994198e-06, "loss": 0.8362, "step": 3719 }, { "epoch": 0.19118100524205983, "grad_norm": 0.8484004139900208, "learning_rate": 9.33414397325073e-06, "loss": 0.7093, "step": 3720 }, { "epoch": 0.19123239798540445, "grad_norm": 1.0226993560791016, "learning_rate": 9.333728946426018e-06, "loss": 0.7975, "step": 3721 }, { "epoch": 0.1912837907287491, "grad_norm": 1.1397813558578491, "learning_rate": 9.333313799531563e-06, "loss": 0.8299, "step": 3722 }, { "epoch": 0.19133518347209374, "grad_norm": 0.7835520505905151, "learning_rate": 9.332898532578862e-06, "loss": 0.7185, "step": 3723 }, { "epoch": 0.1913865762154384, "grad_norm": 1.1082810163497925, "learning_rate": 9.332483145579427e-06, "loss": 0.7298, "step": 3724 }, { "epoch": 0.191437968958783, "grad_norm": 1.471495509147644, "learning_rate": 9.33206763854476e-06, "loss": 0.8584, "step": 3725 }, { "epoch": 0.19148936170212766, "grad_norm": 1.175403356552124, "learning_rate": 9.331652011486378e-06, "loss": 0.8538, "step": 3726 }, { "epoch": 0.1915407544454723, "grad_norm": 0.8146130442619324, "learning_rate": 9.331236264415795e-06, "loss": 0.7222, "step": 3727 }, { "epoch": 0.19159214718881695, "grad_norm": 1.1014879941940308, "learning_rate": 9.330820397344528e-06, "loss": 0.8092, "step": 3728 }, { "epoch": 0.19164353993216157, "grad_norm": 1.1478755474090576, "learning_rate": 9.3304044102841e-06, "loss": 0.7852, "step": 3729 }, { "epoch": 0.19169493267550622, "grad_norm": 0.7476874589920044, "learning_rate": 9.329988303246036e-06, "loss": 0.7202, "step": 3730 }, { "epoch": 0.19174632541885087, "grad_norm": 1.1256016492843628, "learning_rate": 9.329572076241866e-06, "loss": 0.7997, "step": 3731 }, { "epoch": 0.19179771816219549, "grad_norm": 0.703258216381073, "learning_rate": 9.329155729283118e-06, "loss": 0.6871, "step": 3732 }, { "epoch": 0.19184911090554013, "grad_norm": 0.8316715359687805, "learning_rate": 9.328739262381335e-06, "loss": 0.7266, "step": 3733 }, { "epoch": 0.19190050364888478, "grad_norm": 1.133061170578003, "learning_rate": 9.328322675548048e-06, "loss": 0.8332, "step": 3734 }, { "epoch": 0.19195189639222943, "grad_norm": 0.7546510696411133, "learning_rate": 9.3279059687948e-06, "loss": 0.679, "step": 3735 }, { "epoch": 0.19200328913557405, "grad_norm": 1.5760674476623535, "learning_rate": 9.32748914213314e-06, "loss": 0.7254, "step": 3736 }, { "epoch": 0.1920546818789187, "grad_norm": 0.7752396464347839, "learning_rate": 9.327072195574613e-06, "loss": 0.6932, "step": 3737 }, { "epoch": 0.19210607462226334, "grad_norm": 0.7861957550048828, "learning_rate": 9.326655129130774e-06, "loss": 0.7448, "step": 3738 }, { "epoch": 0.192157467365608, "grad_norm": 1.0868723392486572, "learning_rate": 9.326237942813175e-06, "loss": 0.8358, "step": 3739 }, { "epoch": 0.1922088601089526, "grad_norm": 1.277090311050415, "learning_rate": 9.325820636633376e-06, "loss": 0.8165, "step": 3740 }, { "epoch": 0.19226025285229725, "grad_norm": 1.1212096214294434, "learning_rate": 9.325403210602938e-06, "loss": 0.8236, "step": 3741 }, { "epoch": 0.1923116455956419, "grad_norm": 1.1101020574569702, "learning_rate": 9.32498566473343e-06, "loss": 0.7992, "step": 3742 }, { "epoch": 0.19236303833898655, "grad_norm": 1.0129382610321045, "learning_rate": 9.324567999036415e-06, "loss": 0.7883, "step": 3743 }, { "epoch": 0.19241443108233117, "grad_norm": 1.1038954257965088, "learning_rate": 9.324150213523468e-06, "loss": 0.7249, "step": 3744 }, { "epoch": 0.1924658238256758, "grad_norm": 1.139496922492981, "learning_rate": 9.323732308206165e-06, "loss": 0.842, "step": 3745 }, { "epoch": 0.19251721656902046, "grad_norm": 1.0169386863708496, "learning_rate": 9.32331428309608e-06, "loss": 0.7364, "step": 3746 }, { "epoch": 0.1925686093123651, "grad_norm": 1.2437777519226074, "learning_rate": 9.322896138204798e-06, "loss": 0.8004, "step": 3747 }, { "epoch": 0.19262000205570973, "grad_norm": 1.134329915046692, "learning_rate": 9.322477873543903e-06, "loss": 0.7754, "step": 3748 }, { "epoch": 0.19267139479905437, "grad_norm": 1.0473402738571167, "learning_rate": 9.322059489124984e-06, "loss": 0.8044, "step": 3749 }, { "epoch": 0.19272278754239902, "grad_norm": 1.1545026302337646, "learning_rate": 9.321640984959635e-06, "loss": 0.7697, "step": 3750 }, { "epoch": 0.19277418028574364, "grad_norm": 1.0880850553512573, "learning_rate": 9.321222361059446e-06, "loss": 0.786, "step": 3751 }, { "epoch": 0.1928255730290883, "grad_norm": 1.2690527439117432, "learning_rate": 9.32080361743602e-06, "loss": 0.837, "step": 3752 }, { "epoch": 0.19287696577243293, "grad_norm": 0.9260185956954956, "learning_rate": 9.320384754100955e-06, "loss": 0.6938, "step": 3753 }, { "epoch": 0.19292835851577758, "grad_norm": 1.1241703033447266, "learning_rate": 9.319965771065857e-06, "loss": 0.7841, "step": 3754 }, { "epoch": 0.1929797512591222, "grad_norm": 0.7416635155677795, "learning_rate": 9.319546668342337e-06, "loss": 0.6806, "step": 3755 }, { "epoch": 0.19303114400246685, "grad_norm": 1.105777621269226, "learning_rate": 9.319127445942004e-06, "loss": 0.8363, "step": 3756 }, { "epoch": 0.1930825367458115, "grad_norm": 1.1783965826034546, "learning_rate": 9.318708103876473e-06, "loss": 0.7916, "step": 3757 }, { "epoch": 0.19313392948915614, "grad_norm": 1.2658344507217407, "learning_rate": 9.318288642157362e-06, "loss": 0.8243, "step": 3758 }, { "epoch": 0.19318532223250076, "grad_norm": 1.0369431972503662, "learning_rate": 9.317869060796296e-06, "loss": 0.7732, "step": 3759 }, { "epoch": 0.1932367149758454, "grad_norm": 1.2240486145019531, "learning_rate": 9.317449359804894e-06, "loss": 0.8045, "step": 3760 }, { "epoch": 0.19328810771919006, "grad_norm": 1.1623338460922241, "learning_rate": 9.31702953919479e-06, "loss": 0.794, "step": 3761 }, { "epoch": 0.1933395004625347, "grad_norm": 0.9955929517745972, "learning_rate": 9.31660959897761e-06, "loss": 0.733, "step": 3762 }, { "epoch": 0.19339089320587932, "grad_norm": 1.1080474853515625, "learning_rate": 9.316189539164993e-06, "loss": 0.8142, "step": 3763 }, { "epoch": 0.19344228594922397, "grad_norm": 1.0698845386505127, "learning_rate": 9.315769359768576e-06, "loss": 0.805, "step": 3764 }, { "epoch": 0.19349367869256862, "grad_norm": 1.1078908443450928, "learning_rate": 9.315349060799999e-06, "loss": 0.797, "step": 3765 }, { "epoch": 0.19354507143591326, "grad_norm": 0.9161761403083801, "learning_rate": 9.314928642270909e-06, "loss": 0.6327, "step": 3766 }, { "epoch": 0.19359646417925788, "grad_norm": 1.1263906955718994, "learning_rate": 9.314508104192953e-06, "loss": 0.8345, "step": 3767 }, { "epoch": 0.19364785692260253, "grad_norm": 1.1812494993209839, "learning_rate": 9.314087446577781e-06, "loss": 0.8077, "step": 3768 }, { "epoch": 0.19369924966594718, "grad_norm": 0.8404086232185364, "learning_rate": 9.31366666943705e-06, "loss": 0.7044, "step": 3769 }, { "epoch": 0.1937506424092918, "grad_norm": 0.7887621521949768, "learning_rate": 9.31324577278242e-06, "loss": 0.7329, "step": 3770 }, { "epoch": 0.19380203515263644, "grad_norm": 1.035783052444458, "learning_rate": 9.312824756625545e-06, "loss": 0.7801, "step": 3771 }, { "epoch": 0.1938534278959811, "grad_norm": 1.0649718046188354, "learning_rate": 9.312403620978096e-06, "loss": 0.7574, "step": 3772 }, { "epoch": 0.19390482063932574, "grad_norm": 1.160948395729065, "learning_rate": 9.311982365851738e-06, "loss": 0.8266, "step": 3773 }, { "epoch": 0.19395621338267036, "grad_norm": 0.7871479392051697, "learning_rate": 9.311560991258145e-06, "loss": 0.742, "step": 3774 }, { "epoch": 0.194007606126015, "grad_norm": 1.09604811668396, "learning_rate": 9.311139497208987e-06, "loss": 0.8446, "step": 3775 }, { "epoch": 0.19405899886935965, "grad_norm": 1.123671054840088, "learning_rate": 9.310717883715946e-06, "loss": 0.8413, "step": 3776 }, { "epoch": 0.1941103916127043, "grad_norm": 0.842566728591919, "learning_rate": 9.310296150790701e-06, "loss": 0.6526, "step": 3777 }, { "epoch": 0.19416178435604892, "grad_norm": 1.2030017375946045, "learning_rate": 9.309874298444938e-06, "loss": 0.7857, "step": 3778 }, { "epoch": 0.19421317709939356, "grad_norm": 1.1333600282669067, "learning_rate": 9.309452326690345e-06, "loss": 0.7948, "step": 3779 }, { "epoch": 0.1942645698427382, "grad_norm": 1.1983345746994019, "learning_rate": 9.30903023553861e-06, "loss": 0.8443, "step": 3780 }, { "epoch": 0.19431596258608286, "grad_norm": 1.1283385753631592, "learning_rate": 9.30860802500143e-06, "loss": 0.7728, "step": 3781 }, { "epoch": 0.19436735532942748, "grad_norm": 1.0331902503967285, "learning_rate": 9.308185695090504e-06, "loss": 0.769, "step": 3782 }, { "epoch": 0.19441874807277212, "grad_norm": 1.0716427564620972, "learning_rate": 9.30776324581753e-06, "loss": 0.7599, "step": 3783 }, { "epoch": 0.19447014081611677, "grad_norm": 1.1300439834594727, "learning_rate": 9.307340677194213e-06, "loss": 0.7798, "step": 3784 }, { "epoch": 0.1945215335594614, "grad_norm": 1.1070586442947388, "learning_rate": 9.30691798923226e-06, "loss": 0.8404, "step": 3785 }, { "epoch": 0.19457292630280604, "grad_norm": 1.079648733139038, "learning_rate": 9.306495181943385e-06, "loss": 0.7789, "step": 3786 }, { "epoch": 0.19462431904615068, "grad_norm": 1.6093806028366089, "learning_rate": 9.3060722553393e-06, "loss": 0.7776, "step": 3787 }, { "epoch": 0.19467571178949533, "grad_norm": 1.092764139175415, "learning_rate": 9.305649209431724e-06, "loss": 0.7642, "step": 3788 }, { "epoch": 0.19472710453283995, "grad_norm": 1.1992213726043701, "learning_rate": 9.305226044232375e-06, "loss": 0.8456, "step": 3789 }, { "epoch": 0.1947784972761846, "grad_norm": 1.08976411819458, "learning_rate": 9.30480275975298e-06, "loss": 0.7466, "step": 3790 }, { "epoch": 0.19482989001952924, "grad_norm": 1.0628770589828491, "learning_rate": 9.304379356005264e-06, "loss": 0.8171, "step": 3791 }, { "epoch": 0.1948812827628739, "grad_norm": 1.1960781812667847, "learning_rate": 9.30395583300096e-06, "loss": 0.8564, "step": 3792 }, { "epoch": 0.1949326755062185, "grad_norm": 1.0505499839782715, "learning_rate": 9.303532190751802e-06, "loss": 0.784, "step": 3793 }, { "epoch": 0.19498406824956316, "grad_norm": 1.1564574241638184, "learning_rate": 9.303108429269526e-06, "loss": 0.781, "step": 3794 }, { "epoch": 0.1950354609929078, "grad_norm": 0.9236723184585571, "learning_rate": 9.302684548565873e-06, "loss": 0.7245, "step": 3795 }, { "epoch": 0.19508685373625245, "grad_norm": 0.8070688843727112, "learning_rate": 9.30226054865259e-06, "loss": 0.6946, "step": 3796 }, { "epoch": 0.19513824647959707, "grad_norm": 1.1324723958969116, "learning_rate": 9.301836429541417e-06, "loss": 0.774, "step": 3797 }, { "epoch": 0.19518963922294172, "grad_norm": 1.1159038543701172, "learning_rate": 9.301412191244112e-06, "loss": 0.7959, "step": 3798 }, { "epoch": 0.19524103196628637, "grad_norm": 1.1220502853393555, "learning_rate": 9.300987833772425e-06, "loss": 0.8143, "step": 3799 }, { "epoch": 0.195292424709631, "grad_norm": 0.801337480545044, "learning_rate": 9.300563357138114e-06, "loss": 0.7252, "step": 3800 }, { "epoch": 0.19534381745297563, "grad_norm": 1.302274465560913, "learning_rate": 9.300138761352942e-06, "loss": 0.7697, "step": 3801 }, { "epoch": 0.19539521019632028, "grad_norm": 1.1282092332839966, "learning_rate": 9.299714046428668e-06, "loss": 0.8612, "step": 3802 }, { "epoch": 0.19544660293966493, "grad_norm": 0.8231169581413269, "learning_rate": 9.299289212377063e-06, "loss": 0.6918, "step": 3803 }, { "epoch": 0.19549799568300955, "grad_norm": 0.7592513561248779, "learning_rate": 9.298864259209895e-06, "loss": 0.719, "step": 3804 }, { "epoch": 0.1955493884263542, "grad_norm": 0.8732669353485107, "learning_rate": 9.29843918693894e-06, "loss": 0.6627, "step": 3805 }, { "epoch": 0.19560078116969884, "grad_norm": 1.1332786083221436, "learning_rate": 9.298013995575974e-06, "loss": 0.7734, "step": 3806 }, { "epoch": 0.1956521739130435, "grad_norm": 1.1731846332550049, "learning_rate": 9.297588685132775e-06, "loss": 0.7988, "step": 3807 }, { "epoch": 0.1957035666563881, "grad_norm": 1.1542022228240967, "learning_rate": 9.29716325562113e-06, "loss": 0.8031, "step": 3808 }, { "epoch": 0.19575495939973275, "grad_norm": 0.8504632711410522, "learning_rate": 9.296737707052824e-06, "loss": 0.7005, "step": 3809 }, { "epoch": 0.1958063521430774, "grad_norm": 0.8363313674926758, "learning_rate": 9.296312039439649e-06, "loss": 0.7286, "step": 3810 }, { "epoch": 0.19585774488642205, "grad_norm": 0.8486849069595337, "learning_rate": 9.295886252793395e-06, "loss": 0.6998, "step": 3811 }, { "epoch": 0.19590913762976667, "grad_norm": 1.174891471862793, "learning_rate": 9.295460347125864e-06, "loss": 0.7426, "step": 3812 }, { "epoch": 0.1959605303731113, "grad_norm": 1.2724941968917847, "learning_rate": 9.295034322448851e-06, "loss": 0.7262, "step": 3813 }, { "epoch": 0.19601192311645596, "grad_norm": 0.8891855478286743, "learning_rate": 9.294608178774162e-06, "loss": 0.7025, "step": 3814 }, { "epoch": 0.1960633158598006, "grad_norm": 1.1561217308044434, "learning_rate": 9.294181916113603e-06, "loss": 0.7814, "step": 3815 }, { "epoch": 0.19611470860314523, "grad_norm": 1.3036850690841675, "learning_rate": 9.293755534478985e-06, "loss": 0.7772, "step": 3816 }, { "epoch": 0.19616610134648987, "grad_norm": 1.215003252029419, "learning_rate": 9.29332903388212e-06, "loss": 0.8149, "step": 3817 }, { "epoch": 0.19621749408983452, "grad_norm": 1.151149868965149, "learning_rate": 9.292902414334824e-06, "loss": 0.7753, "step": 3818 }, { "epoch": 0.19626888683317917, "grad_norm": 1.1120131015777588, "learning_rate": 9.292475675848918e-06, "loss": 0.7712, "step": 3819 }, { "epoch": 0.1963202795765238, "grad_norm": 1.0402251482009888, "learning_rate": 9.292048818436225e-06, "loss": 0.6883, "step": 3820 }, { "epoch": 0.19637167231986843, "grad_norm": 3.532841920852661, "learning_rate": 9.291621842108572e-06, "loss": 0.8344, "step": 3821 }, { "epoch": 0.19642306506321308, "grad_norm": 1.1286754608154297, "learning_rate": 9.291194746877788e-06, "loss": 0.7671, "step": 3822 }, { "epoch": 0.1964744578065577, "grad_norm": 1.1132481098175049, "learning_rate": 9.290767532755706e-06, "loss": 0.7829, "step": 3823 }, { "epoch": 0.19652585054990235, "grad_norm": 1.101176381111145, "learning_rate": 9.290340199754165e-06, "loss": 0.7998, "step": 3824 }, { "epoch": 0.196577243293247, "grad_norm": 1.1110650300979614, "learning_rate": 9.289912747885e-06, "loss": 0.776, "step": 3825 }, { "epoch": 0.19662863603659164, "grad_norm": 1.0882169008255005, "learning_rate": 9.289485177160055e-06, "loss": 0.7483, "step": 3826 }, { "epoch": 0.19668002877993626, "grad_norm": 0.8537433743476868, "learning_rate": 9.28905748759118e-06, "loss": 0.7084, "step": 3827 }, { "epoch": 0.1967314215232809, "grad_norm": 0.8792944550514221, "learning_rate": 9.288629679190222e-06, "loss": 0.6742, "step": 3828 }, { "epoch": 0.19678281426662556, "grad_norm": 0.7601463198661804, "learning_rate": 9.288201751969031e-06, "loss": 0.7048, "step": 3829 }, { "epoch": 0.1968342070099702, "grad_norm": 1.265582799911499, "learning_rate": 9.28777370593947e-06, "loss": 0.8184, "step": 3830 }, { "epoch": 0.19688559975331482, "grad_norm": 1.0593284368515015, "learning_rate": 9.287345541113391e-06, "loss": 0.8006, "step": 3831 }, { "epoch": 0.19693699249665947, "grad_norm": 1.373486876487732, "learning_rate": 9.286917257502658e-06, "loss": 0.7975, "step": 3832 }, { "epoch": 0.19698838524000412, "grad_norm": 1.134986162185669, "learning_rate": 9.286488855119143e-06, "loss": 0.7929, "step": 3833 }, { "epoch": 0.19703977798334876, "grad_norm": 1.20670485496521, "learning_rate": 9.28606033397471e-06, "loss": 0.7903, "step": 3834 }, { "epoch": 0.19709117072669338, "grad_norm": 1.1070822477340698, "learning_rate": 9.285631694081233e-06, "loss": 0.809, "step": 3835 }, { "epoch": 0.19714256347003803, "grad_norm": 1.030637264251709, "learning_rate": 9.285202935450586e-06, "loss": 0.7232, "step": 3836 }, { "epoch": 0.19719395621338268, "grad_norm": 1.0732930898666382, "learning_rate": 9.28477405809465e-06, "loss": 0.7694, "step": 3837 }, { "epoch": 0.19724534895672732, "grad_norm": 1.1373001337051392, "learning_rate": 9.284345062025306e-06, "loss": 0.8151, "step": 3838 }, { "epoch": 0.19729674170007194, "grad_norm": 1.0920013189315796, "learning_rate": 9.283915947254443e-06, "loss": 0.7599, "step": 3839 }, { "epoch": 0.1973481344434166, "grad_norm": 1.09823477268219, "learning_rate": 9.283486713793948e-06, "loss": 0.7515, "step": 3840 }, { "epoch": 0.19739952718676124, "grad_norm": 1.1406538486480713, "learning_rate": 9.283057361655711e-06, "loss": 0.8202, "step": 3841 }, { "epoch": 0.19745091993010586, "grad_norm": 1.1148929595947266, "learning_rate": 9.28262789085163e-06, "loss": 0.774, "step": 3842 }, { "epoch": 0.1975023126734505, "grad_norm": 1.0480315685272217, "learning_rate": 9.282198301393606e-06, "loss": 0.8107, "step": 3843 }, { "epoch": 0.19755370541679515, "grad_norm": 1.1431549787521362, "learning_rate": 9.281768593293536e-06, "loss": 0.812, "step": 3844 }, { "epoch": 0.1976050981601398, "grad_norm": 1.216841220855713, "learning_rate": 9.281338766563328e-06, "loss": 0.7588, "step": 3845 }, { "epoch": 0.19765649090348442, "grad_norm": 1.1709885597229004, "learning_rate": 9.280908821214893e-06, "loss": 0.8153, "step": 3846 }, { "epoch": 0.19770788364682906, "grad_norm": 1.1572009325027466, "learning_rate": 9.280478757260138e-06, "loss": 0.8147, "step": 3847 }, { "epoch": 0.1977592763901737, "grad_norm": 1.1200300455093384, "learning_rate": 9.280048574710983e-06, "loss": 0.8242, "step": 3848 }, { "epoch": 0.19781066913351836, "grad_norm": 1.059646487236023, "learning_rate": 9.279618273579346e-06, "loss": 0.7937, "step": 3849 }, { "epoch": 0.19786206187686298, "grad_norm": 1.1451395750045776, "learning_rate": 9.279187853877145e-06, "loss": 0.8064, "step": 3850 }, { "epoch": 0.19791345462020762, "grad_norm": 0.9026865363121033, "learning_rate": 9.27875731561631e-06, "loss": 0.7081, "step": 3851 }, { "epoch": 0.19796484736355227, "grad_norm": 1.1173293590545654, "learning_rate": 9.278326658808765e-06, "loss": 0.7849, "step": 3852 }, { "epoch": 0.19801624010689692, "grad_norm": 1.1208003759384155, "learning_rate": 9.277895883466444e-06, "loss": 0.8032, "step": 3853 }, { "epoch": 0.19806763285024154, "grad_norm": 1.1404167413711548, "learning_rate": 9.277464989601283e-06, "loss": 0.7905, "step": 3854 }, { "epoch": 0.19811902559358618, "grad_norm": 1.1774544715881348, "learning_rate": 9.27703397722522e-06, "loss": 0.7506, "step": 3855 }, { "epoch": 0.19817041833693083, "grad_norm": 1.0444045066833496, "learning_rate": 9.276602846350194e-06, "loss": 0.8306, "step": 3856 }, { "epoch": 0.19822181108027548, "grad_norm": 1.3302297592163086, "learning_rate": 9.276171596988152e-06, "loss": 0.7824, "step": 3857 }, { "epoch": 0.1982732038236201, "grad_norm": 1.0858607292175293, "learning_rate": 9.275740229151043e-06, "loss": 0.7531, "step": 3858 }, { "epoch": 0.19832459656696474, "grad_norm": 1.112226963043213, "learning_rate": 9.275308742850815e-06, "loss": 0.7737, "step": 3859 }, { "epoch": 0.1983759893103094, "grad_norm": 1.1191438436508179, "learning_rate": 9.274877138099427e-06, "loss": 0.8537, "step": 3860 }, { "epoch": 0.198427382053654, "grad_norm": 0.9639666676521301, "learning_rate": 9.274445414908834e-06, "loss": 0.6851, "step": 3861 }, { "epoch": 0.19847877479699866, "grad_norm": 1.0692031383514404, "learning_rate": 9.274013573290998e-06, "loss": 0.7793, "step": 3862 }, { "epoch": 0.1985301675403433, "grad_norm": 1.1660304069519043, "learning_rate": 9.273581613257883e-06, "loss": 0.8227, "step": 3863 }, { "epoch": 0.19858156028368795, "grad_norm": 1.0875794887542725, "learning_rate": 9.27314953482146e-06, "loss": 0.8002, "step": 3864 }, { "epoch": 0.19863295302703257, "grad_norm": 1.1471729278564453, "learning_rate": 9.272717337993695e-06, "loss": 0.8135, "step": 3865 }, { "epoch": 0.19868434577037722, "grad_norm": 1.1451597213745117, "learning_rate": 9.272285022786567e-06, "loss": 0.798, "step": 3866 }, { "epoch": 0.19873573851372187, "grad_norm": 1.1351687908172607, "learning_rate": 9.27185258921205e-06, "loss": 0.8556, "step": 3867 }, { "epoch": 0.1987871312570665, "grad_norm": 1.0907889604568481, "learning_rate": 9.271420037282127e-06, "loss": 0.7618, "step": 3868 }, { "epoch": 0.19883852400041113, "grad_norm": 1.0901378393173218, "learning_rate": 9.270987367008784e-06, "loss": 0.8302, "step": 3869 }, { "epoch": 0.19888991674375578, "grad_norm": 1.127983570098877, "learning_rate": 9.270554578404003e-06, "loss": 0.8115, "step": 3870 }, { "epoch": 0.19894130948710043, "grad_norm": 1.0593156814575195, "learning_rate": 9.27012167147978e-06, "loss": 0.8203, "step": 3871 }, { "epoch": 0.19899270223044507, "grad_norm": 1.1964728832244873, "learning_rate": 9.269688646248108e-06, "loss": 0.7258, "step": 3872 }, { "epoch": 0.1990440949737897, "grad_norm": 0.8178982138633728, "learning_rate": 9.269255502720983e-06, "loss": 0.7661, "step": 3873 }, { "epoch": 0.19909548771713434, "grad_norm": 1.09017813205719, "learning_rate": 9.268822240910404e-06, "loss": 0.8371, "step": 3874 }, { "epoch": 0.199146880460479, "grad_norm": 1.0892798900604248, "learning_rate": 9.26838886082838e-06, "loss": 0.7843, "step": 3875 }, { "epoch": 0.19919827320382363, "grad_norm": 1.1496427059173584, "learning_rate": 9.267955362486915e-06, "loss": 0.8626, "step": 3876 }, { "epoch": 0.19924966594716825, "grad_norm": 1.0521094799041748, "learning_rate": 9.267521745898018e-06, "loss": 0.745, "step": 3877 }, { "epoch": 0.1993010586905129, "grad_norm": 1.0460869073867798, "learning_rate": 9.267088011073706e-06, "loss": 0.719, "step": 3878 }, { "epoch": 0.19935245143385755, "grad_norm": 1.1293199062347412, "learning_rate": 9.266654158025993e-06, "loss": 0.8377, "step": 3879 }, { "epoch": 0.19940384417720217, "grad_norm": 1.0580253601074219, "learning_rate": 9.266220186766902e-06, "loss": 0.7712, "step": 3880 }, { "epoch": 0.1994552369205468, "grad_norm": 1.1796948909759521, "learning_rate": 9.265786097308455e-06, "loss": 0.7998, "step": 3881 }, { "epoch": 0.19950662966389146, "grad_norm": 0.7304195165634155, "learning_rate": 9.26535188966268e-06, "loss": 0.7088, "step": 3882 }, { "epoch": 0.1995580224072361, "grad_norm": 1.1592259407043457, "learning_rate": 9.264917563841605e-06, "loss": 0.8006, "step": 3883 }, { "epoch": 0.19960941515058073, "grad_norm": 0.8051683902740479, "learning_rate": 9.264483119857264e-06, "loss": 0.6894, "step": 3884 }, { "epoch": 0.19966080789392537, "grad_norm": 0.8977078795433044, "learning_rate": 9.264048557721695e-06, "loss": 0.7532, "step": 3885 }, { "epoch": 0.19971220063727002, "grad_norm": 1.12592351436615, "learning_rate": 9.263613877446937e-06, "loss": 0.8084, "step": 3886 }, { "epoch": 0.19976359338061467, "grad_norm": 1.1313230991363525, "learning_rate": 9.263179079045032e-06, "loss": 0.7487, "step": 3887 }, { "epoch": 0.1998149861239593, "grad_norm": 1.094099998474121, "learning_rate": 9.26274416252803e-06, "loss": 0.7922, "step": 3888 }, { "epoch": 0.19986637886730393, "grad_norm": 0.8224782943725586, "learning_rate": 9.262309127907978e-06, "loss": 0.6581, "step": 3889 }, { "epoch": 0.19991777161064858, "grad_norm": 1.1467647552490234, "learning_rate": 9.26187397519693e-06, "loss": 0.7635, "step": 3890 }, { "epoch": 0.19996916435399323, "grad_norm": 1.1063134670257568, "learning_rate": 9.26143870440694e-06, "loss": 0.7974, "step": 3891 }, { "epoch": 0.20002055709733785, "grad_norm": 1.2155370712280273, "learning_rate": 9.261003315550073e-06, "loss": 0.7945, "step": 3892 }, { "epoch": 0.2000719498406825, "grad_norm": 1.0735578536987305, "learning_rate": 9.260567808638384e-06, "loss": 0.7508, "step": 3893 }, { "epoch": 0.20012334258402714, "grad_norm": 1.122339129447937, "learning_rate": 9.260132183683945e-06, "loss": 0.8278, "step": 3894 }, { "epoch": 0.2001747353273718, "grad_norm": 1.0974421501159668, "learning_rate": 9.259696440698824e-06, "loss": 0.7616, "step": 3895 }, { "epoch": 0.2002261280707164, "grad_norm": 1.0067311525344849, "learning_rate": 9.259260579695094e-06, "loss": 0.7508, "step": 3896 }, { "epoch": 0.20027752081406106, "grad_norm": 1.1260629892349243, "learning_rate": 9.25882460068483e-06, "loss": 0.7416, "step": 3897 }, { "epoch": 0.2003289135574057, "grad_norm": 1.3214530944824219, "learning_rate": 9.258388503680112e-06, "loss": 0.8449, "step": 3898 }, { "epoch": 0.20038030630075032, "grad_norm": 1.1357483863830566, "learning_rate": 9.25795228869302e-06, "loss": 0.7538, "step": 3899 }, { "epoch": 0.20043169904409497, "grad_norm": 1.0284645557403564, "learning_rate": 9.257515955735643e-06, "loss": 0.749, "step": 3900 }, { "epoch": 0.20048309178743962, "grad_norm": 0.8301587104797363, "learning_rate": 9.257079504820069e-06, "loss": 0.7187, "step": 3901 }, { "epoch": 0.20053448453078426, "grad_norm": 1.0816036462783813, "learning_rate": 9.256642935958388e-06, "loss": 0.7648, "step": 3902 }, { "epoch": 0.20058587727412888, "grad_norm": 1.159629464149475, "learning_rate": 9.256206249162698e-06, "loss": 0.7748, "step": 3903 }, { "epoch": 0.20063727001747353, "grad_norm": 1.116652011871338, "learning_rate": 9.255769444445098e-06, "loss": 0.7793, "step": 3904 }, { "epoch": 0.20068866276081818, "grad_norm": 1.0868444442749023, "learning_rate": 9.255332521817688e-06, "loss": 0.8123, "step": 3905 }, { "epoch": 0.20074005550416282, "grad_norm": 1.0706380605697632, "learning_rate": 9.254895481292575e-06, "loss": 0.8061, "step": 3906 }, { "epoch": 0.20079144824750744, "grad_norm": 1.05228590965271, "learning_rate": 9.25445832288187e-06, "loss": 0.7678, "step": 3907 }, { "epoch": 0.2008428409908521, "grad_norm": 1.0935534238815308, "learning_rate": 9.254021046597678e-06, "loss": 0.7984, "step": 3908 }, { "epoch": 0.20089423373419674, "grad_norm": 0.7832483053207397, "learning_rate": 9.253583652452118e-06, "loss": 0.7512, "step": 3909 }, { "epoch": 0.20094562647754138, "grad_norm": 0.8347471356391907, "learning_rate": 9.253146140457313e-06, "loss": 0.7254, "step": 3910 }, { "epoch": 0.200997019220886, "grad_norm": 1.2558609247207642, "learning_rate": 9.252708510625376e-06, "loss": 0.7968, "step": 3911 }, { "epoch": 0.20104841196423065, "grad_norm": 1.1686102151870728, "learning_rate": 9.252270762968436e-06, "loss": 0.8048, "step": 3912 }, { "epoch": 0.2010998047075753, "grad_norm": 1.2918058633804321, "learning_rate": 9.251832897498622e-06, "loss": 0.8168, "step": 3913 }, { "epoch": 0.20115119745091992, "grad_norm": 1.0661139488220215, "learning_rate": 9.251394914228067e-06, "loss": 0.7818, "step": 3914 }, { "epoch": 0.20120259019426456, "grad_norm": 1.0685780048370361, "learning_rate": 9.250956813168902e-06, "loss": 0.783, "step": 3915 }, { "epoch": 0.2012539829376092, "grad_norm": 0.7564540505409241, "learning_rate": 9.250518594333264e-06, "loss": 0.734, "step": 3916 }, { "epoch": 0.20130537568095386, "grad_norm": 1.1519596576690674, "learning_rate": 9.2500802577333e-06, "loss": 0.7922, "step": 3917 }, { "epoch": 0.20135676842429848, "grad_norm": 1.1389437913894653, "learning_rate": 9.24964180338115e-06, "loss": 0.8106, "step": 3918 }, { "epoch": 0.20140816116764312, "grad_norm": 1.0936566591262817, "learning_rate": 9.249203231288961e-06, "loss": 0.7585, "step": 3919 }, { "epoch": 0.20145955391098777, "grad_norm": 1.0326403379440308, "learning_rate": 9.248764541468888e-06, "loss": 0.7386, "step": 3920 }, { "epoch": 0.20151094665433242, "grad_norm": 0.890773355960846, "learning_rate": 9.248325733933084e-06, "loss": 0.6995, "step": 3921 }, { "epoch": 0.20156233939767704, "grad_norm": 1.1256107091903687, "learning_rate": 9.247886808693702e-06, "loss": 0.7557, "step": 3922 }, { "epoch": 0.20161373214102168, "grad_norm": 1.1653518676757812, "learning_rate": 9.24744776576291e-06, "loss": 0.7848, "step": 3923 }, { "epoch": 0.20166512488436633, "grad_norm": 1.076413869857788, "learning_rate": 9.247008605152867e-06, "loss": 0.8185, "step": 3924 }, { "epoch": 0.20171651762771098, "grad_norm": 1.1787540912628174, "learning_rate": 9.246569326875743e-06, "loss": 0.844, "step": 3925 }, { "epoch": 0.2017679103710556, "grad_norm": 1.1158901453018188, "learning_rate": 9.246129930943706e-06, "loss": 0.7596, "step": 3926 }, { "epoch": 0.20181930311440024, "grad_norm": 1.0653053522109985, "learning_rate": 9.245690417368933e-06, "loss": 0.802, "step": 3927 }, { "epoch": 0.2018706958577449, "grad_norm": 1.0104701519012451, "learning_rate": 9.245250786163599e-06, "loss": 0.7136, "step": 3928 }, { "epoch": 0.20192208860108954, "grad_norm": 1.085080862045288, "learning_rate": 9.244811037339884e-06, "loss": 0.8028, "step": 3929 }, { "epoch": 0.20197348134443416, "grad_norm": 1.0771104097366333, "learning_rate": 9.244371170909973e-06, "loss": 0.7695, "step": 3930 }, { "epoch": 0.2020248740877788, "grad_norm": 1.0662623643875122, "learning_rate": 9.243931186886052e-06, "loss": 0.721, "step": 3931 }, { "epoch": 0.20207626683112345, "grad_norm": 1.168370246887207, "learning_rate": 9.243491085280311e-06, "loss": 0.8244, "step": 3932 }, { "epoch": 0.20212765957446807, "grad_norm": 0.7746744751930237, "learning_rate": 9.243050866104946e-06, "loss": 0.7088, "step": 3933 }, { "epoch": 0.20217905231781272, "grad_norm": 1.1639010906219482, "learning_rate": 9.24261052937215e-06, "loss": 0.8214, "step": 3934 }, { "epoch": 0.20223044506115737, "grad_norm": 1.0887398719787598, "learning_rate": 9.242170075094125e-06, "loss": 0.8387, "step": 3935 }, { "epoch": 0.202281837804502, "grad_norm": 1.2270092964172363, "learning_rate": 9.241729503283072e-06, "loss": 0.8324, "step": 3936 }, { "epoch": 0.20233323054784663, "grad_norm": 1.1386643648147583, "learning_rate": 9.241288813951201e-06, "loss": 0.8353, "step": 3937 }, { "epoch": 0.20238462329119128, "grad_norm": 1.062848448753357, "learning_rate": 9.240848007110719e-06, "loss": 0.7861, "step": 3938 }, { "epoch": 0.20243601603453593, "grad_norm": 0.7572140097618103, "learning_rate": 9.24040708277384e-06, "loss": 0.6962, "step": 3939 }, { "epoch": 0.20248740877788057, "grad_norm": 1.1148643493652344, "learning_rate": 9.23996604095278e-06, "loss": 0.8414, "step": 3940 }, { "epoch": 0.2025388015212252, "grad_norm": 1.0984480381011963, "learning_rate": 9.239524881659758e-06, "loss": 0.7667, "step": 3941 }, { "epoch": 0.20259019426456984, "grad_norm": 1.2138004302978516, "learning_rate": 9.239083604906997e-06, "loss": 0.8037, "step": 3942 }, { "epoch": 0.2026415870079145, "grad_norm": 1.1093533039093018, "learning_rate": 9.238642210706723e-06, "loss": 0.7777, "step": 3943 }, { "epoch": 0.20269297975125913, "grad_norm": 1.08134126663208, "learning_rate": 9.238200699071164e-06, "loss": 0.7663, "step": 3944 }, { "epoch": 0.20274437249460375, "grad_norm": 1.1083340644836426, "learning_rate": 9.237759070012555e-06, "loss": 0.827, "step": 3945 }, { "epoch": 0.2027957652379484, "grad_norm": 1.0571959018707275, "learning_rate": 9.23731732354313e-06, "loss": 0.7139, "step": 3946 }, { "epoch": 0.20284715798129305, "grad_norm": 1.0644627809524536, "learning_rate": 9.236875459675129e-06, "loss": 0.7247, "step": 3947 }, { "epoch": 0.2028985507246377, "grad_norm": 0.9224267601966858, "learning_rate": 9.236433478420792e-06, "loss": 0.6676, "step": 3948 }, { "epoch": 0.2029499434679823, "grad_norm": 1.1146587133407593, "learning_rate": 9.235991379792368e-06, "loss": 0.8352, "step": 3949 }, { "epoch": 0.20300133621132696, "grad_norm": 1.0715752840042114, "learning_rate": 9.235549163802102e-06, "loss": 0.7266, "step": 3950 }, { "epoch": 0.2030527289546716, "grad_norm": 0.8882959485054016, "learning_rate": 9.235106830462249e-06, "loss": 0.7155, "step": 3951 }, { "epoch": 0.20310412169801623, "grad_norm": 1.1432756185531616, "learning_rate": 9.234664379785064e-06, "loss": 0.7905, "step": 3952 }, { "epoch": 0.20315551444136087, "grad_norm": 1.0763963460922241, "learning_rate": 9.234221811782802e-06, "loss": 0.7916, "step": 3953 }, { "epoch": 0.20320690718470552, "grad_norm": 0.881188690662384, "learning_rate": 9.233779126467729e-06, "loss": 0.7326, "step": 3954 }, { "epoch": 0.20325829992805017, "grad_norm": 0.7955000996589661, "learning_rate": 9.233336323852107e-06, "loss": 0.742, "step": 3955 }, { "epoch": 0.2033096926713948, "grad_norm": 1.071510910987854, "learning_rate": 9.232893403948205e-06, "loss": 0.7736, "step": 3956 }, { "epoch": 0.20336108541473943, "grad_norm": 0.7522737979888916, "learning_rate": 9.232450366768295e-06, "loss": 0.6856, "step": 3957 }, { "epoch": 0.20341247815808408, "grad_norm": 1.387927770614624, "learning_rate": 9.232007212324654e-06, "loss": 0.8402, "step": 3958 }, { "epoch": 0.20346387090142873, "grad_norm": 0.8359056711196899, "learning_rate": 9.231563940629555e-06, "loss": 0.7266, "step": 3959 }, { "epoch": 0.20351526364477335, "grad_norm": 0.7284241318702698, "learning_rate": 9.231120551695283e-06, "loss": 0.684, "step": 3960 }, { "epoch": 0.203566656388118, "grad_norm": 1.181214451789856, "learning_rate": 9.230677045534121e-06, "loss": 0.8123, "step": 3961 }, { "epoch": 0.20361804913146264, "grad_norm": 1.0820766687393188, "learning_rate": 9.230233422158357e-06, "loss": 0.7692, "step": 3962 }, { "epoch": 0.2036694418748073, "grad_norm": 0.8252794742584229, "learning_rate": 9.229789681580283e-06, "loss": 0.7306, "step": 3963 }, { "epoch": 0.2037208346181519, "grad_norm": 1.0831453800201416, "learning_rate": 9.229345823812191e-06, "loss": 0.7277, "step": 3964 }, { "epoch": 0.20377222736149656, "grad_norm": 1.0945754051208496, "learning_rate": 9.228901848866379e-06, "loss": 0.8239, "step": 3965 }, { "epoch": 0.2038236201048412, "grad_norm": 0.6960675716400146, "learning_rate": 9.22845775675515e-06, "loss": 0.6866, "step": 3966 }, { "epoch": 0.20387501284818585, "grad_norm": 1.365492820739746, "learning_rate": 9.228013547490805e-06, "loss": 0.7776, "step": 3967 }, { "epoch": 0.20392640559153047, "grad_norm": 0.7762057781219482, "learning_rate": 9.227569221085655e-06, "loss": 0.6648, "step": 3968 }, { "epoch": 0.20397779833487512, "grad_norm": 1.1423217058181763, "learning_rate": 9.227124777552006e-06, "loss": 0.8792, "step": 3969 }, { "epoch": 0.20402919107821976, "grad_norm": 1.0567117929458618, "learning_rate": 9.226680216902176e-06, "loss": 0.7905, "step": 3970 }, { "epoch": 0.20408058382156438, "grad_norm": 1.1712135076522827, "learning_rate": 9.226235539148476e-06, "loss": 0.7913, "step": 3971 }, { "epoch": 0.20413197656490903, "grad_norm": 1.0792492628097534, "learning_rate": 9.225790744303231e-06, "loss": 0.8193, "step": 3972 }, { "epoch": 0.20418336930825368, "grad_norm": 1.0488131046295166, "learning_rate": 9.225345832378765e-06, "loss": 0.7826, "step": 3973 }, { "epoch": 0.20423476205159832, "grad_norm": 1.082502007484436, "learning_rate": 9.224900803387402e-06, "loss": 0.8158, "step": 3974 }, { "epoch": 0.20428615479494294, "grad_norm": 1.048316240310669, "learning_rate": 9.224455657341474e-06, "loss": 0.7843, "step": 3975 }, { "epoch": 0.2043375475382876, "grad_norm": 0.8091312646865845, "learning_rate": 9.22401039425331e-06, "loss": 0.7491, "step": 3976 }, { "epoch": 0.20438894028163224, "grad_norm": 1.1756280660629272, "learning_rate": 9.223565014135252e-06, "loss": 0.8003, "step": 3977 }, { "epoch": 0.20444033302497688, "grad_norm": 1.1426849365234375, "learning_rate": 9.223119516999636e-06, "loss": 0.8063, "step": 3978 }, { "epoch": 0.2044917257683215, "grad_norm": 1.0891655683517456, "learning_rate": 9.222673902858807e-06, "loss": 0.7901, "step": 3979 }, { "epoch": 0.20454311851166615, "grad_norm": 0.9885302782058716, "learning_rate": 9.22222817172511e-06, "loss": 0.7391, "step": 3980 }, { "epoch": 0.2045945112550108, "grad_norm": 1.2324937582015991, "learning_rate": 9.221782323610894e-06, "loss": 0.7814, "step": 3981 }, { "epoch": 0.20464590399835544, "grad_norm": 1.2639241218566895, "learning_rate": 9.221336358528512e-06, "loss": 0.7528, "step": 3982 }, { "epoch": 0.20469729674170006, "grad_norm": 1.1169867515563965, "learning_rate": 9.22089027649032e-06, "loss": 0.8051, "step": 3983 }, { "epoch": 0.2047486894850447, "grad_norm": 1.1546446084976196, "learning_rate": 9.220444077508678e-06, "loss": 0.7803, "step": 3984 }, { "epoch": 0.20480008222838936, "grad_norm": 1.2693123817443848, "learning_rate": 9.219997761595944e-06, "loss": 0.7983, "step": 3985 }, { "epoch": 0.204851474971734, "grad_norm": 1.1667239665985107, "learning_rate": 9.21955132876449e-06, "loss": 0.8, "step": 3986 }, { "epoch": 0.20490286771507862, "grad_norm": 1.0777825117111206, "learning_rate": 9.219104779026682e-06, "loss": 0.7892, "step": 3987 }, { "epoch": 0.20495426045842327, "grad_norm": 1.0768225193023682, "learning_rate": 9.21865811239489e-06, "loss": 0.7799, "step": 3988 }, { "epoch": 0.20500565320176792, "grad_norm": 1.1802806854248047, "learning_rate": 9.218211328881492e-06, "loss": 0.8588, "step": 3989 }, { "epoch": 0.20505704594511254, "grad_norm": 1.0987366437911987, "learning_rate": 9.217764428498865e-06, "loss": 0.7738, "step": 3990 }, { "epoch": 0.20510843868845718, "grad_norm": 0.7711794972419739, "learning_rate": 9.217317411259392e-06, "loss": 0.7196, "step": 3991 }, { "epoch": 0.20515983143180183, "grad_norm": 1.0974634885787964, "learning_rate": 9.216870277175458e-06, "loss": 0.7549, "step": 3992 }, { "epoch": 0.20521122417514648, "grad_norm": 1.1164509057998657, "learning_rate": 9.21642302625945e-06, "loss": 0.794, "step": 3993 }, { "epoch": 0.2052626169184911, "grad_norm": 1.1832889318466187, "learning_rate": 9.215975658523759e-06, "loss": 0.8077, "step": 3994 }, { "epoch": 0.20531400966183574, "grad_norm": 1.1268428564071655, "learning_rate": 9.215528173980781e-06, "loss": 0.7876, "step": 3995 }, { "epoch": 0.2053654024051804, "grad_norm": 1.2040382623672485, "learning_rate": 9.215080572642915e-06, "loss": 0.7963, "step": 3996 }, { "epoch": 0.20541679514852504, "grad_norm": 1.177782654762268, "learning_rate": 9.21463285452256e-06, "loss": 0.864, "step": 3997 }, { "epoch": 0.20546818789186966, "grad_norm": 0.7775552868843079, "learning_rate": 9.214185019632123e-06, "loss": 0.6736, "step": 3998 }, { "epoch": 0.2055195806352143, "grad_norm": 1.0257055759429932, "learning_rate": 9.213737067984007e-06, "loss": 0.8217, "step": 3999 }, { "epoch": 0.20557097337855895, "grad_norm": 1.1647230386734009, "learning_rate": 9.21328899959063e-06, "loss": 0.829, "step": 4000 }, { "epoch": 0.2056223661219036, "grad_norm": 0.7926165461540222, "learning_rate": 9.212840814464399e-06, "loss": 0.7381, "step": 4001 }, { "epoch": 0.20567375886524822, "grad_norm": 0.770859956741333, "learning_rate": 9.212392512617734e-06, "loss": 0.7243, "step": 4002 }, { "epoch": 0.20572515160859287, "grad_norm": 0.7493070363998413, "learning_rate": 9.211944094063059e-06, "loss": 0.7077, "step": 4003 }, { "epoch": 0.2057765443519375, "grad_norm": 1.1485666036605835, "learning_rate": 9.211495558812793e-06, "loss": 0.7505, "step": 4004 }, { "epoch": 0.20582793709528216, "grad_norm": 0.775187611579895, "learning_rate": 9.211046906879363e-06, "loss": 0.7456, "step": 4005 }, { "epoch": 0.20587932983862678, "grad_norm": 1.141463041305542, "learning_rate": 9.210598138275204e-06, "loss": 0.7971, "step": 4006 }, { "epoch": 0.20593072258197143, "grad_norm": 1.1968775987625122, "learning_rate": 9.210149253012745e-06, "loss": 0.8239, "step": 4007 }, { "epoch": 0.20598211532531607, "grad_norm": 1.034183382987976, "learning_rate": 9.209700251104426e-06, "loss": 0.7546, "step": 4008 }, { "epoch": 0.2060335080686607, "grad_norm": 1.1379923820495605, "learning_rate": 9.209251132562685e-06, "loss": 0.7989, "step": 4009 }, { "epoch": 0.20608490081200534, "grad_norm": 1.0693378448486328, "learning_rate": 9.208801897399966e-06, "loss": 0.8053, "step": 4010 }, { "epoch": 0.20613629355535, "grad_norm": 1.0320727825164795, "learning_rate": 9.208352545628714e-06, "loss": 0.7635, "step": 4011 }, { "epoch": 0.20618768629869463, "grad_norm": 1.0974820852279663, "learning_rate": 9.20790307726138e-06, "loss": 0.7951, "step": 4012 }, { "epoch": 0.20623907904203925, "grad_norm": 1.20841646194458, "learning_rate": 9.207453492310417e-06, "loss": 0.799, "step": 4013 }, { "epoch": 0.2062904717853839, "grad_norm": 1.1445960998535156, "learning_rate": 9.20700379078828e-06, "loss": 0.8037, "step": 4014 }, { "epoch": 0.20634186452872855, "grad_norm": 1.0224921703338623, "learning_rate": 9.20655397270743e-06, "loss": 0.812, "step": 4015 }, { "epoch": 0.2063932572720732, "grad_norm": 0.8716250061988831, "learning_rate": 9.20610403808033e-06, "loss": 0.7298, "step": 4016 }, { "epoch": 0.2064446500154178, "grad_norm": 1.1250152587890625, "learning_rate": 9.205653986919443e-06, "loss": 0.8304, "step": 4017 }, { "epoch": 0.20649604275876246, "grad_norm": 1.1214884519577026, "learning_rate": 9.20520381923724e-06, "loss": 0.79, "step": 4018 }, { "epoch": 0.2065474355021071, "grad_norm": 0.6935552358627319, "learning_rate": 9.204753535046193e-06, "loss": 0.7154, "step": 4019 }, { "epoch": 0.20659882824545175, "grad_norm": 1.122473955154419, "learning_rate": 9.20430313435878e-06, "loss": 0.7568, "step": 4020 }, { "epoch": 0.20665022098879637, "grad_norm": 1.156865119934082, "learning_rate": 9.203852617187474e-06, "loss": 0.7597, "step": 4021 }, { "epoch": 0.20670161373214102, "grad_norm": 1.1034526824951172, "learning_rate": 9.203401983544762e-06, "loss": 0.7569, "step": 4022 }, { "epoch": 0.20675300647548567, "grad_norm": 1.0662312507629395, "learning_rate": 9.202951233443126e-06, "loss": 0.8416, "step": 4023 }, { "epoch": 0.20680439921883031, "grad_norm": 1.1231504678726196, "learning_rate": 9.202500366895057e-06, "loss": 0.7714, "step": 4024 }, { "epoch": 0.20685579196217493, "grad_norm": 1.2251371145248413, "learning_rate": 9.202049383913045e-06, "loss": 0.7818, "step": 4025 }, { "epoch": 0.20690718470551958, "grad_norm": 1.0744401216506958, "learning_rate": 9.201598284509586e-06, "loss": 0.7699, "step": 4026 }, { "epoch": 0.20695857744886423, "grad_norm": 1.1441630125045776, "learning_rate": 9.201147068697178e-06, "loss": 0.7751, "step": 4027 }, { "epoch": 0.20700997019220885, "grad_norm": 1.113736629486084, "learning_rate": 9.200695736488322e-06, "loss": 0.7919, "step": 4028 }, { "epoch": 0.2070613629355535, "grad_norm": 1.2082083225250244, "learning_rate": 9.20024428789552e-06, "loss": 0.8091, "step": 4029 }, { "epoch": 0.20711275567889814, "grad_norm": 1.1719342470169067, "learning_rate": 9.199792722931285e-06, "loss": 0.8161, "step": 4030 }, { "epoch": 0.2071641484222428, "grad_norm": 1.0741727352142334, "learning_rate": 9.199341041608123e-06, "loss": 0.8123, "step": 4031 }, { "epoch": 0.2072155411655874, "grad_norm": 1.1328620910644531, "learning_rate": 9.19888924393855e-06, "loss": 0.8236, "step": 4032 }, { "epoch": 0.20726693390893206, "grad_norm": 1.1496222019195557, "learning_rate": 9.198437329935086e-06, "loss": 0.791, "step": 4033 }, { "epoch": 0.2073183266522767, "grad_norm": 1.107384204864502, "learning_rate": 9.197985299610247e-06, "loss": 0.782, "step": 4034 }, { "epoch": 0.20736971939562135, "grad_norm": 0.808521568775177, "learning_rate": 9.197533152976563e-06, "loss": 0.7207, "step": 4035 }, { "epoch": 0.20742111213896597, "grad_norm": 1.0780826807022095, "learning_rate": 9.197080890046555e-06, "loss": 0.758, "step": 4036 }, { "epoch": 0.20747250488231062, "grad_norm": 1.120969533920288, "learning_rate": 9.196628510832756e-06, "loss": 0.7838, "step": 4037 }, { "epoch": 0.20752389762565526, "grad_norm": 0.7947130799293518, "learning_rate": 9.1961760153477e-06, "loss": 0.6869, "step": 4038 }, { "epoch": 0.2075752903689999, "grad_norm": 1.1954935789108276, "learning_rate": 9.195723403603922e-06, "loss": 0.7912, "step": 4039 }, { "epoch": 0.20762668311234453, "grad_norm": 0.7600930333137512, "learning_rate": 9.195270675613965e-06, "loss": 0.6849, "step": 4040 }, { "epoch": 0.20767807585568918, "grad_norm": 1.0679527521133423, "learning_rate": 9.19481783139037e-06, "loss": 0.7422, "step": 4041 }, { "epoch": 0.20772946859903382, "grad_norm": 0.8906779885292053, "learning_rate": 9.194364870945683e-06, "loss": 0.6891, "step": 4042 }, { "epoch": 0.20778086134237844, "grad_norm": 1.1099522113800049, "learning_rate": 9.193911794292455e-06, "loss": 0.8177, "step": 4043 }, { "epoch": 0.2078322540857231, "grad_norm": 0.8021684288978577, "learning_rate": 9.193458601443238e-06, "loss": 0.6635, "step": 4044 }, { "epoch": 0.20788364682906774, "grad_norm": 1.0883532762527466, "learning_rate": 9.19300529241059e-06, "loss": 0.7316, "step": 4045 }, { "epoch": 0.20793503957241238, "grad_norm": 1.0455607175827026, "learning_rate": 9.192551867207066e-06, "loss": 0.7946, "step": 4046 }, { "epoch": 0.207986432315757, "grad_norm": 0.7443743348121643, "learning_rate": 9.192098325845234e-06, "loss": 0.7043, "step": 4047 }, { "epoch": 0.20803782505910165, "grad_norm": 1.102075219154358, "learning_rate": 9.191644668337656e-06, "loss": 0.7619, "step": 4048 }, { "epoch": 0.2080892178024463, "grad_norm": 1.138498306274414, "learning_rate": 9.191190894696904e-06, "loss": 0.8153, "step": 4049 }, { "epoch": 0.20814061054579094, "grad_norm": 1.1311196088790894, "learning_rate": 9.190737004935545e-06, "loss": 0.8111, "step": 4050 }, { "epoch": 0.20819200328913556, "grad_norm": 1.0865874290466309, "learning_rate": 9.19028299906616e-06, "loss": 0.7774, "step": 4051 }, { "epoch": 0.2082433960324802, "grad_norm": 0.9141707420349121, "learning_rate": 9.189828877101326e-06, "loss": 0.7166, "step": 4052 }, { "epoch": 0.20829478877582486, "grad_norm": 1.1001232862472534, "learning_rate": 9.189374639053624e-06, "loss": 0.7647, "step": 4053 }, { "epoch": 0.2083461815191695, "grad_norm": 1.10018789768219, "learning_rate": 9.188920284935638e-06, "loss": 0.7696, "step": 4054 }, { "epoch": 0.20839757426251412, "grad_norm": 1.09285569190979, "learning_rate": 9.188465814759956e-06, "loss": 0.8418, "step": 4055 }, { "epoch": 0.20844896700585877, "grad_norm": 0.7205379009246826, "learning_rate": 9.188011228539175e-06, "loss": 0.688, "step": 4056 }, { "epoch": 0.20850035974920342, "grad_norm": 1.1213802099227905, "learning_rate": 9.187556526285883e-06, "loss": 0.7996, "step": 4057 }, { "epoch": 0.20855175249254806, "grad_norm": 1.144457221031189, "learning_rate": 9.187101708012682e-06, "loss": 0.8256, "step": 4058 }, { "epoch": 0.20860314523589268, "grad_norm": 1.125562310218811, "learning_rate": 9.186646773732171e-06, "loss": 0.8484, "step": 4059 }, { "epoch": 0.20865453797923733, "grad_norm": 0.7997061014175415, "learning_rate": 9.186191723456956e-06, "loss": 0.6995, "step": 4060 }, { "epoch": 0.20870593072258198, "grad_norm": 1.1060621738433838, "learning_rate": 9.185736557199644e-06, "loss": 0.8127, "step": 4061 }, { "epoch": 0.2087573234659266, "grad_norm": 1.1874028444290161, "learning_rate": 9.185281274972844e-06, "loss": 0.8244, "step": 4062 }, { "epoch": 0.20880871620927124, "grad_norm": 1.0982171297073364, "learning_rate": 9.184825876789171e-06, "loss": 0.7792, "step": 4063 }, { "epoch": 0.2088601089526159, "grad_norm": 1.0794787406921387, "learning_rate": 9.184370362661245e-06, "loss": 0.7406, "step": 4064 }, { "epoch": 0.20891150169596054, "grad_norm": 1.1303212642669678, "learning_rate": 9.183914732601685e-06, "loss": 0.7916, "step": 4065 }, { "epoch": 0.20896289443930516, "grad_norm": 1.093451976776123, "learning_rate": 9.18345898662311e-06, "loss": 0.768, "step": 4066 }, { "epoch": 0.2090142871826498, "grad_norm": 1.076591968536377, "learning_rate": 9.183003124738153e-06, "loss": 0.867, "step": 4067 }, { "epoch": 0.20906567992599445, "grad_norm": 0.8431175351142883, "learning_rate": 9.182547146959441e-06, "loss": 0.7073, "step": 4068 }, { "epoch": 0.2091170726693391, "grad_norm": 1.1227257251739502, "learning_rate": 9.18209105329961e-06, "loss": 0.7652, "step": 4069 }, { "epoch": 0.20916846541268372, "grad_norm": 1.097144603729248, "learning_rate": 9.181634843771291e-06, "loss": 0.7395, "step": 4070 }, { "epoch": 0.20921985815602837, "grad_norm": 1.123072624206543, "learning_rate": 9.181178518387128e-06, "loss": 0.7657, "step": 4071 }, { "epoch": 0.209271250899373, "grad_norm": 1.036139726638794, "learning_rate": 9.180722077159765e-06, "loss": 0.7664, "step": 4072 }, { "epoch": 0.20932264364271766, "grad_norm": 1.1210362911224365, "learning_rate": 9.180265520101843e-06, "loss": 0.8445, "step": 4073 }, { "epoch": 0.20937403638606228, "grad_norm": 1.1159181594848633, "learning_rate": 9.179808847226017e-06, "loss": 0.8035, "step": 4074 }, { "epoch": 0.20942542912940693, "grad_norm": 1.122412085533142, "learning_rate": 9.179352058544935e-06, "loss": 0.8214, "step": 4075 }, { "epoch": 0.20947682187275157, "grad_norm": 0.8825933933258057, "learning_rate": 9.178895154071255e-06, "loss": 0.7118, "step": 4076 }, { "epoch": 0.20952821461609622, "grad_norm": 1.1096134185791016, "learning_rate": 9.178438133817637e-06, "loss": 0.8037, "step": 4077 }, { "epoch": 0.20957960735944084, "grad_norm": 1.1554313898086548, "learning_rate": 9.17798099779674e-06, "loss": 0.777, "step": 4078 }, { "epoch": 0.2096310001027855, "grad_norm": 0.8162883520126343, "learning_rate": 9.177523746021232e-06, "loss": 0.7389, "step": 4079 }, { "epoch": 0.20968239284613013, "grad_norm": 1.0952943563461304, "learning_rate": 9.17706637850378e-06, "loss": 0.8214, "step": 4080 }, { "epoch": 0.20973378558947475, "grad_norm": 1.326655387878418, "learning_rate": 9.176608895257057e-06, "loss": 0.8016, "step": 4081 }, { "epoch": 0.2097851783328194, "grad_norm": 0.8916594386100769, "learning_rate": 9.176151296293739e-06, "loss": 0.706, "step": 4082 }, { "epoch": 0.20983657107616405, "grad_norm": 1.212406039237976, "learning_rate": 9.1756935816265e-06, "loss": 0.7847, "step": 4083 }, { "epoch": 0.2098879638195087, "grad_norm": 2.6856772899627686, "learning_rate": 9.175235751268023e-06, "loss": 0.7286, "step": 4084 }, { "epoch": 0.2099393565628533, "grad_norm": 1.0466660261154175, "learning_rate": 9.174777805230997e-06, "loss": 0.7383, "step": 4085 }, { "epoch": 0.20999074930619796, "grad_norm": 1.1743662357330322, "learning_rate": 9.174319743528104e-06, "loss": 0.818, "step": 4086 }, { "epoch": 0.2100421420495426, "grad_norm": 1.1300913095474243, "learning_rate": 9.173861566172038e-06, "loss": 0.7937, "step": 4087 }, { "epoch": 0.21009353479288725, "grad_norm": 0.8407766222953796, "learning_rate": 9.173403273175492e-06, "loss": 0.6763, "step": 4088 }, { "epoch": 0.21014492753623187, "grad_norm": 1.072435736656189, "learning_rate": 9.172944864551163e-06, "loss": 0.7882, "step": 4089 }, { "epoch": 0.21019632027957652, "grad_norm": 1.0915374755859375, "learning_rate": 9.172486340311755e-06, "loss": 0.7844, "step": 4090 }, { "epoch": 0.21024771302292117, "grad_norm": 1.3030527830123901, "learning_rate": 9.172027700469967e-06, "loss": 0.8454, "step": 4091 }, { "epoch": 0.21029910576626581, "grad_norm": 1.1855648756027222, "learning_rate": 9.17156894503851e-06, "loss": 0.821, "step": 4092 }, { "epoch": 0.21035049850961043, "grad_norm": 0.7978924512863159, "learning_rate": 9.171110074030092e-06, "loss": 0.6707, "step": 4093 }, { "epoch": 0.21040189125295508, "grad_norm": 1.1909937858581543, "learning_rate": 9.170651087457427e-06, "loss": 0.7577, "step": 4094 }, { "epoch": 0.21045328399629973, "grad_norm": 1.1295768022537231, "learning_rate": 9.17019198533323e-06, "loss": 0.7734, "step": 4095 }, { "epoch": 0.21050467673964438, "grad_norm": 1.3759740591049194, "learning_rate": 9.169732767670225e-06, "loss": 0.8333, "step": 4096 }, { "epoch": 0.210556069482989, "grad_norm": 1.1282240152359009, "learning_rate": 9.169273434481132e-06, "loss": 0.7976, "step": 4097 }, { "epoch": 0.21060746222633364, "grad_norm": 1.5105525255203247, "learning_rate": 9.168813985778677e-06, "loss": 0.7399, "step": 4098 }, { "epoch": 0.2106588549696783, "grad_norm": 0.7949177622795105, "learning_rate": 9.16835442157559e-06, "loss": 0.7409, "step": 4099 }, { "epoch": 0.2107102477130229, "grad_norm": 1.0517692565917969, "learning_rate": 9.167894741884604e-06, "loss": 0.7258, "step": 4100 }, { "epoch": 0.21076164045636755, "grad_norm": 1.0692452192306519, "learning_rate": 9.167434946718455e-06, "loss": 0.8521, "step": 4101 }, { "epoch": 0.2108130331997122, "grad_norm": 0.7752538919448853, "learning_rate": 9.16697503608988e-06, "loss": 0.69, "step": 4102 }, { "epoch": 0.21086442594305685, "grad_norm": 1.1508145332336426, "learning_rate": 9.166515010011625e-06, "loss": 0.8239, "step": 4103 }, { "epoch": 0.21091581868640147, "grad_norm": 1.1212024688720703, "learning_rate": 9.166054868496433e-06, "loss": 0.7824, "step": 4104 }, { "epoch": 0.21096721142974612, "grad_norm": 0.7913763523101807, "learning_rate": 9.165594611557052e-06, "loss": 0.6895, "step": 4105 }, { "epoch": 0.21101860417309076, "grad_norm": 1.0686748027801514, "learning_rate": 9.165134239206236e-06, "loss": 0.8244, "step": 4106 }, { "epoch": 0.2110699969164354, "grad_norm": 1.0987708568572998, "learning_rate": 9.164673751456738e-06, "loss": 0.8037, "step": 4107 }, { "epoch": 0.21112138965978003, "grad_norm": 1.0956056118011475, "learning_rate": 9.164213148321315e-06, "loss": 0.8406, "step": 4108 }, { "epoch": 0.21117278240312468, "grad_norm": 1.1297279596328735, "learning_rate": 9.163752429812733e-06, "loss": 0.8626, "step": 4109 }, { "epoch": 0.21122417514646932, "grad_norm": 0.9671109914779663, "learning_rate": 9.163291595943753e-06, "loss": 0.6765, "step": 4110 }, { "epoch": 0.21127556788981397, "grad_norm": 1.112369418144226, "learning_rate": 9.162830646727143e-06, "loss": 0.8295, "step": 4111 }, { "epoch": 0.2113269606331586, "grad_norm": 1.121092677116394, "learning_rate": 9.162369582175676e-06, "loss": 0.7927, "step": 4112 }, { "epoch": 0.21137835337650324, "grad_norm": 1.1575745344161987, "learning_rate": 9.161908402302125e-06, "loss": 0.7862, "step": 4113 }, { "epoch": 0.21142974611984788, "grad_norm": 1.0813905000686646, "learning_rate": 9.161447107119266e-06, "loss": 0.7926, "step": 4114 }, { "epoch": 0.21148113886319253, "grad_norm": 1.1230854988098145, "learning_rate": 9.160985696639882e-06, "loss": 0.785, "step": 4115 }, { "epoch": 0.21153253160653715, "grad_norm": 1.1330019235610962, "learning_rate": 9.160524170876756e-06, "loss": 0.8373, "step": 4116 }, { "epoch": 0.2115839243498818, "grad_norm": 0.7802382707595825, "learning_rate": 9.160062529842675e-06, "loss": 0.6708, "step": 4117 }, { "epoch": 0.21163531709322644, "grad_norm": 1.1753462553024292, "learning_rate": 9.159600773550428e-06, "loss": 0.7757, "step": 4118 }, { "epoch": 0.21168670983657106, "grad_norm": 1.136126160621643, "learning_rate": 9.15913890201281e-06, "loss": 0.8242, "step": 4119 }, { "epoch": 0.2117381025799157, "grad_norm": 1.087391972541809, "learning_rate": 9.158676915242615e-06, "loss": 0.7408, "step": 4120 }, { "epoch": 0.21178949532326036, "grad_norm": 1.2799772024154663, "learning_rate": 9.158214813252646e-06, "loss": 0.8115, "step": 4121 }, { "epoch": 0.211840888066605, "grad_norm": 1.114206314086914, "learning_rate": 9.157752596055704e-06, "loss": 0.823, "step": 4122 }, { "epoch": 0.21189228080994962, "grad_norm": 1.092413306236267, "learning_rate": 9.157290263664596e-06, "loss": 0.8126, "step": 4123 }, { "epoch": 0.21194367355329427, "grad_norm": 0.7647567391395569, "learning_rate": 9.15682781609213e-06, "loss": 0.6799, "step": 4124 }, { "epoch": 0.21199506629663892, "grad_norm": 0.7781330943107605, "learning_rate": 9.15636525335112e-06, "loss": 0.718, "step": 4125 }, { "epoch": 0.21204645903998356, "grad_norm": 1.1484543085098267, "learning_rate": 9.155902575454381e-06, "loss": 0.7885, "step": 4126 }, { "epoch": 0.21209785178332818, "grad_norm": 1.244184136390686, "learning_rate": 9.155439782414732e-06, "loss": 0.8081, "step": 4127 }, { "epoch": 0.21214924452667283, "grad_norm": 1.12799870967865, "learning_rate": 9.154976874244996e-06, "loss": 0.7673, "step": 4128 }, { "epoch": 0.21220063727001748, "grad_norm": 1.1538182497024536, "learning_rate": 9.154513850957995e-06, "loss": 0.7868, "step": 4129 }, { "epoch": 0.21225203001336213, "grad_norm": 1.1459710597991943, "learning_rate": 9.154050712566563e-06, "loss": 0.7947, "step": 4130 }, { "epoch": 0.21230342275670674, "grad_norm": 1.1236449480056763, "learning_rate": 9.153587459083525e-06, "loss": 0.8077, "step": 4131 }, { "epoch": 0.2123548155000514, "grad_norm": 1.090453863143921, "learning_rate": 9.153124090521723e-06, "loss": 0.7571, "step": 4132 }, { "epoch": 0.21240620824339604, "grad_norm": 1.104998230934143, "learning_rate": 9.152660606893991e-06, "loss": 0.809, "step": 4133 }, { "epoch": 0.21245760098674069, "grad_norm": 1.0853921175003052, "learning_rate": 9.152197008213169e-06, "loss": 0.7708, "step": 4134 }, { "epoch": 0.2125089937300853, "grad_norm": 1.1155692338943481, "learning_rate": 9.151733294492101e-06, "loss": 0.8059, "step": 4135 }, { "epoch": 0.21256038647342995, "grad_norm": 1.091387152671814, "learning_rate": 9.151269465743641e-06, "loss": 0.7658, "step": 4136 }, { "epoch": 0.2126117792167746, "grad_norm": 1.074527621269226, "learning_rate": 9.150805521980634e-06, "loss": 0.7429, "step": 4137 }, { "epoch": 0.21266317196011922, "grad_norm": 1.1839638948440552, "learning_rate": 9.150341463215935e-06, "loss": 0.8066, "step": 4138 }, { "epoch": 0.21271456470346387, "grad_norm": 1.147925853729248, "learning_rate": 9.1498772894624e-06, "loss": 0.7642, "step": 4139 }, { "epoch": 0.2127659574468085, "grad_norm": 1.1146399974822998, "learning_rate": 9.149413000732892e-06, "loss": 0.8282, "step": 4140 }, { "epoch": 0.21281735019015316, "grad_norm": 1.0158265829086304, "learning_rate": 9.148948597040274e-06, "loss": 0.7226, "step": 4141 }, { "epoch": 0.21286874293349778, "grad_norm": 0.8555070161819458, "learning_rate": 9.148484078397412e-06, "loss": 0.6998, "step": 4142 }, { "epoch": 0.21292013567684243, "grad_norm": 1.2350852489471436, "learning_rate": 9.148019444817175e-06, "loss": 0.7903, "step": 4143 }, { "epoch": 0.21297152842018707, "grad_norm": 1.1090493202209473, "learning_rate": 9.147554696312438e-06, "loss": 0.8118, "step": 4144 }, { "epoch": 0.21302292116353172, "grad_norm": 1.0957188606262207, "learning_rate": 9.147089832896075e-06, "loss": 0.7279, "step": 4145 }, { "epoch": 0.21307431390687634, "grad_norm": 1.1010417938232422, "learning_rate": 9.146624854580968e-06, "loss": 0.8248, "step": 4146 }, { "epoch": 0.213125706650221, "grad_norm": 1.0710384845733643, "learning_rate": 9.146159761379998e-06, "loss": 0.7796, "step": 4147 }, { "epoch": 0.21317709939356563, "grad_norm": 1.104913353919983, "learning_rate": 9.145694553306051e-06, "loss": 0.7783, "step": 4148 }, { "epoch": 0.21322849213691028, "grad_norm": 1.1737264394760132, "learning_rate": 9.145229230372017e-06, "loss": 0.8813, "step": 4149 }, { "epoch": 0.2132798848802549, "grad_norm": 1.098204493522644, "learning_rate": 9.144763792590787e-06, "loss": 0.8129, "step": 4150 }, { "epoch": 0.21333127762359955, "grad_norm": 1.3907911777496338, "learning_rate": 9.144298239975255e-06, "loss": 0.8014, "step": 4151 }, { "epoch": 0.2133826703669442, "grad_norm": 1.1620992422103882, "learning_rate": 9.143832572538324e-06, "loss": 0.7728, "step": 4152 }, { "epoch": 0.21343406311028884, "grad_norm": 0.8103654384613037, "learning_rate": 9.143366790292892e-06, "loss": 0.7174, "step": 4153 }, { "epoch": 0.21348545585363346, "grad_norm": 1.1073077917099, "learning_rate": 9.142900893251864e-06, "loss": 0.7936, "step": 4154 }, { "epoch": 0.2135368485969781, "grad_norm": 1.1065016984939575, "learning_rate": 9.14243488142815e-06, "loss": 0.8063, "step": 4155 }, { "epoch": 0.21358824134032275, "grad_norm": 1.1206870079040527, "learning_rate": 9.14196875483466e-06, "loss": 0.8022, "step": 4156 }, { "epoch": 0.21363963408366737, "grad_norm": 1.1078795194625854, "learning_rate": 9.141502513484308e-06, "loss": 0.7639, "step": 4157 }, { "epoch": 0.21369102682701202, "grad_norm": 0.7858216762542725, "learning_rate": 9.141036157390014e-06, "loss": 0.757, "step": 4158 }, { "epoch": 0.21374241957035667, "grad_norm": 1.0773473978042603, "learning_rate": 9.140569686564695e-06, "loss": 0.7673, "step": 4159 }, { "epoch": 0.21379381231370131, "grad_norm": 1.1611648797988892, "learning_rate": 9.14010310102128e-06, "loss": 0.7841, "step": 4160 }, { "epoch": 0.21384520505704593, "grad_norm": 1.0764635801315308, "learning_rate": 9.139636400772692e-06, "loss": 0.7509, "step": 4161 }, { "epoch": 0.21389659780039058, "grad_norm": 0.8107448816299438, "learning_rate": 9.139169585831862e-06, "loss": 0.6771, "step": 4162 }, { "epoch": 0.21394799054373523, "grad_norm": 0.7882181406021118, "learning_rate": 9.138702656211723e-06, "loss": 0.6587, "step": 4163 }, { "epoch": 0.21399938328707988, "grad_norm": 1.0768563747406006, "learning_rate": 9.138235611925215e-06, "loss": 0.8408, "step": 4164 }, { "epoch": 0.2140507760304245, "grad_norm": 1.3791990280151367, "learning_rate": 9.137768452985273e-06, "loss": 0.8195, "step": 4165 }, { "epoch": 0.21410216877376914, "grad_norm": 1.171093463897705, "learning_rate": 9.137301179404845e-06, "loss": 0.8214, "step": 4166 }, { "epoch": 0.2141535615171138, "grad_norm": 1.111439824104309, "learning_rate": 9.136833791196875e-06, "loss": 0.7838, "step": 4167 }, { "epoch": 0.21420495426045844, "grad_norm": 1.1361054182052612, "learning_rate": 9.136366288374311e-06, "loss": 0.8215, "step": 4168 }, { "epoch": 0.21425634700380305, "grad_norm": 1.1506834030151367, "learning_rate": 9.135898670950108e-06, "loss": 0.8051, "step": 4169 }, { "epoch": 0.2143077397471477, "grad_norm": 1.0800647735595703, "learning_rate": 9.135430938937219e-06, "loss": 0.7629, "step": 4170 }, { "epoch": 0.21435913249049235, "grad_norm": 1.371882677078247, "learning_rate": 9.134963092348608e-06, "loss": 0.8322, "step": 4171 }, { "epoch": 0.214410525233837, "grad_norm": 1.0859326124191284, "learning_rate": 9.134495131197231e-06, "loss": 0.7789, "step": 4172 }, { "epoch": 0.21446191797718162, "grad_norm": 1.1921128034591675, "learning_rate": 9.134027055496057e-06, "loss": 0.7917, "step": 4173 }, { "epoch": 0.21451331072052626, "grad_norm": 1.2561819553375244, "learning_rate": 9.133558865258052e-06, "loss": 0.8345, "step": 4174 }, { "epoch": 0.2145647034638709, "grad_norm": 1.0786267518997192, "learning_rate": 9.13309056049619e-06, "loss": 0.7668, "step": 4175 }, { "epoch": 0.21461609620721553, "grad_norm": 1.0573195219039917, "learning_rate": 9.132622141223447e-06, "loss": 0.7805, "step": 4176 }, { "epoch": 0.21466748895056018, "grad_norm": 1.0086778402328491, "learning_rate": 9.132153607452795e-06, "loss": 0.7415, "step": 4177 }, { "epoch": 0.21471888169390482, "grad_norm": 1.3335920572280884, "learning_rate": 9.131684959197222e-06, "loss": 0.7906, "step": 4178 }, { "epoch": 0.21477027443724947, "grad_norm": 1.1119905710220337, "learning_rate": 9.131216196469706e-06, "loss": 0.7694, "step": 4179 }, { "epoch": 0.2148216671805941, "grad_norm": 1.1683192253112793, "learning_rate": 9.13074731928324e-06, "loss": 0.7492, "step": 4180 }, { "epoch": 0.21487305992393874, "grad_norm": 1.136493444442749, "learning_rate": 9.130278327650812e-06, "loss": 0.7294, "step": 4181 }, { "epoch": 0.21492445266728338, "grad_norm": 1.1425938606262207, "learning_rate": 9.129809221585416e-06, "loss": 0.7924, "step": 4182 }, { "epoch": 0.21497584541062803, "grad_norm": 1.1476768255233765, "learning_rate": 9.12934000110005e-06, "loss": 0.7814, "step": 4183 }, { "epoch": 0.21502723815397265, "grad_norm": 1.1310791969299316, "learning_rate": 9.128870666207711e-06, "loss": 0.8608, "step": 4184 }, { "epoch": 0.2150786308973173, "grad_norm": 1.1162221431732178, "learning_rate": 9.128401216921407e-06, "loss": 0.7906, "step": 4185 }, { "epoch": 0.21513002364066194, "grad_norm": 1.05939781665802, "learning_rate": 9.127931653254143e-06, "loss": 0.794, "step": 4186 }, { "epoch": 0.2151814163840066, "grad_norm": 1.1121755838394165, "learning_rate": 9.127461975218926e-06, "loss": 0.8339, "step": 4187 }, { "epoch": 0.2152328091273512, "grad_norm": 1.129523515701294, "learning_rate": 9.12699218282877e-06, "loss": 0.7701, "step": 4188 }, { "epoch": 0.21528420187069586, "grad_norm": 1.0820891857147217, "learning_rate": 9.126522276096694e-06, "loss": 0.7914, "step": 4189 }, { "epoch": 0.2153355946140405, "grad_norm": 1.0078492164611816, "learning_rate": 9.126052255035714e-06, "loss": 0.7366, "step": 4190 }, { "epoch": 0.21538698735738512, "grad_norm": 0.871012806892395, "learning_rate": 9.12558211965885e-06, "loss": 0.7274, "step": 4191 }, { "epoch": 0.21543838010072977, "grad_norm": 0.8761668801307678, "learning_rate": 9.125111869979136e-06, "loss": 0.7172, "step": 4192 }, { "epoch": 0.21548977284407442, "grad_norm": 1.0423604249954224, "learning_rate": 9.124641506009593e-06, "loss": 0.786, "step": 4193 }, { "epoch": 0.21554116558741906, "grad_norm": 1.108942985534668, "learning_rate": 9.124171027763255e-06, "loss": 0.7609, "step": 4194 }, { "epoch": 0.21559255833076368, "grad_norm": 1.1549575328826904, "learning_rate": 9.123700435253157e-06, "loss": 0.8103, "step": 4195 }, { "epoch": 0.21564395107410833, "grad_norm": 1.0554914474487305, "learning_rate": 9.123229728492338e-06, "loss": 0.8096, "step": 4196 }, { "epoch": 0.21569534381745298, "grad_norm": 1.1119860410690308, "learning_rate": 9.122758907493839e-06, "loss": 0.7533, "step": 4197 }, { "epoch": 0.21574673656079763, "grad_norm": 1.1737781763076782, "learning_rate": 9.122287972270704e-06, "loss": 0.7745, "step": 4198 }, { "epoch": 0.21579812930414224, "grad_norm": 1.1175786256790161, "learning_rate": 9.121816922835982e-06, "loss": 0.7173, "step": 4199 }, { "epoch": 0.2158495220474869, "grad_norm": 1.1554908752441406, "learning_rate": 9.121345759202722e-06, "loss": 0.8963, "step": 4200 }, { "epoch": 0.21590091479083154, "grad_norm": 1.1126316785812378, "learning_rate": 9.120874481383979e-06, "loss": 0.7997, "step": 4201 }, { "epoch": 0.21595230753417619, "grad_norm": 1.1244614124298096, "learning_rate": 9.120403089392808e-06, "loss": 0.8033, "step": 4202 }, { "epoch": 0.2160037002775208, "grad_norm": 1.0426387786865234, "learning_rate": 9.119931583242275e-06, "loss": 0.749, "step": 4203 }, { "epoch": 0.21605509302086545, "grad_norm": 1.1404590606689453, "learning_rate": 9.119459962945436e-06, "loss": 0.7906, "step": 4204 }, { "epoch": 0.2161064857642101, "grad_norm": 1.107214331626892, "learning_rate": 9.118988228515365e-06, "loss": 0.7866, "step": 4205 }, { "epoch": 0.21615787850755475, "grad_norm": 0.9314901828765869, "learning_rate": 9.118516379965126e-06, "loss": 0.6897, "step": 4206 }, { "epoch": 0.21620927125089937, "grad_norm": 1.1537880897521973, "learning_rate": 9.118044417307793e-06, "loss": 0.7745, "step": 4207 }, { "epoch": 0.216260663994244, "grad_norm": 1.1069575548171997, "learning_rate": 9.117572340556446e-06, "loss": 0.762, "step": 4208 }, { "epoch": 0.21631205673758866, "grad_norm": 1.1378830671310425, "learning_rate": 9.11710014972416e-06, "loss": 0.8789, "step": 4209 }, { "epoch": 0.21636344948093328, "grad_norm": 1.1269419193267822, "learning_rate": 9.11662784482402e-06, "loss": 0.7967, "step": 4210 }, { "epoch": 0.21641484222427793, "grad_norm": 0.777703046798706, "learning_rate": 9.11615542586911e-06, "loss": 0.704, "step": 4211 }, { "epoch": 0.21646623496762257, "grad_norm": 1.108641266822815, "learning_rate": 9.115682892872521e-06, "loss": 0.7728, "step": 4212 }, { "epoch": 0.21651762771096722, "grad_norm": 1.1214264631271362, "learning_rate": 9.115210245847343e-06, "loss": 0.7236, "step": 4213 }, { "epoch": 0.21656902045431184, "grad_norm": 1.136697769165039, "learning_rate": 9.11473748480667e-06, "loss": 0.8146, "step": 4214 }, { "epoch": 0.2166204131976565, "grad_norm": 1.1365002393722534, "learning_rate": 9.114264609763603e-06, "loss": 0.809, "step": 4215 }, { "epoch": 0.21667180594100113, "grad_norm": 1.1066017150878906, "learning_rate": 9.113791620731242e-06, "loss": 0.7794, "step": 4216 }, { "epoch": 0.21672319868434578, "grad_norm": 1.1277374029159546, "learning_rate": 9.113318517722693e-06, "loss": 0.7895, "step": 4217 }, { "epoch": 0.2167745914276904, "grad_norm": 1.0917003154754639, "learning_rate": 9.11284530075106e-06, "loss": 0.7415, "step": 4218 }, { "epoch": 0.21682598417103505, "grad_norm": 1.0714293718338013, "learning_rate": 9.112371969829458e-06, "loss": 0.8347, "step": 4219 }, { "epoch": 0.2168773769143797, "grad_norm": 1.143161654472351, "learning_rate": 9.111898524971e-06, "loss": 0.7608, "step": 4220 }, { "epoch": 0.21692876965772434, "grad_norm": 1.1832191944122314, "learning_rate": 9.111424966188802e-06, "loss": 0.8163, "step": 4221 }, { "epoch": 0.21698016240106896, "grad_norm": 0.7797741293907166, "learning_rate": 9.110951293495983e-06, "loss": 0.7226, "step": 4222 }, { "epoch": 0.2170315551444136, "grad_norm": 0.8132724165916443, "learning_rate": 9.110477506905672e-06, "loss": 0.7269, "step": 4223 }, { "epoch": 0.21708294788775825, "grad_norm": 1.154966950416565, "learning_rate": 9.110003606430991e-06, "loss": 0.7881, "step": 4224 }, { "epoch": 0.2171343406311029, "grad_norm": 1.0446710586547852, "learning_rate": 9.10952959208507e-06, "loss": 0.762, "step": 4225 }, { "epoch": 0.21718573337444752, "grad_norm": 0.7129215598106384, "learning_rate": 9.109055463881045e-06, "loss": 0.7073, "step": 4226 }, { "epoch": 0.21723712611779217, "grad_norm": 1.0493582487106323, "learning_rate": 9.10858122183205e-06, "loss": 0.809, "step": 4227 }, { "epoch": 0.21728851886113681, "grad_norm": 1.0873157978057861, "learning_rate": 9.108106865951223e-06, "loss": 0.7873, "step": 4228 }, { "epoch": 0.21733991160448143, "grad_norm": 0.8385129570960999, "learning_rate": 9.10763239625171e-06, "loss": 0.7598, "step": 4229 }, { "epoch": 0.21739130434782608, "grad_norm": 0.9693784713745117, "learning_rate": 9.107157812746652e-06, "loss": 0.6354, "step": 4230 }, { "epoch": 0.21744269709117073, "grad_norm": 1.0374523401260376, "learning_rate": 9.1066831154492e-06, "loss": 0.7641, "step": 4231 }, { "epoch": 0.21749408983451538, "grad_norm": 1.118335485458374, "learning_rate": 9.106208304372509e-06, "loss": 0.6814, "step": 4232 }, { "epoch": 0.21754548257786, "grad_norm": 1.0439949035644531, "learning_rate": 9.10573337952973e-06, "loss": 0.74, "step": 4233 }, { "epoch": 0.21759687532120464, "grad_norm": 1.1221650838851929, "learning_rate": 9.10525834093402e-06, "loss": 0.8212, "step": 4234 }, { "epoch": 0.2176482680645493, "grad_norm": 0.786357045173645, "learning_rate": 9.104783188598545e-06, "loss": 0.6903, "step": 4235 }, { "epoch": 0.21769966080789394, "grad_norm": 1.0469022989273071, "learning_rate": 9.104307922536466e-06, "loss": 0.7557, "step": 4236 }, { "epoch": 0.21775105355123855, "grad_norm": 0.753881573677063, "learning_rate": 9.103832542760954e-06, "loss": 0.7059, "step": 4237 }, { "epoch": 0.2178024462945832, "grad_norm": 1.14176607131958, "learning_rate": 9.103357049285177e-06, "loss": 0.7253, "step": 4238 }, { "epoch": 0.21785383903792785, "grad_norm": 0.8362029194831848, "learning_rate": 9.102881442122308e-06, "loss": 0.7107, "step": 4239 }, { "epoch": 0.2179052317812725, "grad_norm": 0.7867474555969238, "learning_rate": 9.102405721285527e-06, "loss": 0.6787, "step": 4240 }, { "epoch": 0.21795662452461712, "grad_norm": 0.7196457386016846, "learning_rate": 9.101929886788014e-06, "loss": 0.653, "step": 4241 }, { "epoch": 0.21800801726796176, "grad_norm": 1.1716468334197998, "learning_rate": 9.101453938642951e-06, "loss": 0.772, "step": 4242 }, { "epoch": 0.2180594100113064, "grad_norm": 1.053269386291504, "learning_rate": 9.100977876863527e-06, "loss": 0.7364, "step": 4243 }, { "epoch": 0.21811080275465106, "grad_norm": 1.0741207599639893, "learning_rate": 9.100501701462925e-06, "loss": 0.7623, "step": 4244 }, { "epoch": 0.21816219549799568, "grad_norm": 1.0592821836471558, "learning_rate": 9.100025412454346e-06, "loss": 0.7707, "step": 4245 }, { "epoch": 0.21821358824134032, "grad_norm": 1.1328836679458618, "learning_rate": 9.099549009850984e-06, "loss": 0.7818, "step": 4246 }, { "epoch": 0.21826498098468497, "grad_norm": 1.1115812063217163, "learning_rate": 9.099072493666032e-06, "loss": 0.7955, "step": 4247 }, { "epoch": 0.2183163737280296, "grad_norm": 1.1512393951416016, "learning_rate": 9.0985958639127e-06, "loss": 0.8269, "step": 4248 }, { "epoch": 0.21836776647137424, "grad_norm": 1.1867098808288574, "learning_rate": 9.09811912060419e-06, "loss": 0.8134, "step": 4249 }, { "epoch": 0.21841915921471888, "grad_norm": 1.0500290393829346, "learning_rate": 9.097642263753712e-06, "loss": 0.7623, "step": 4250 }, { "epoch": 0.21847055195806353, "grad_norm": 1.0363597869873047, "learning_rate": 9.097165293374477e-06, "loss": 0.7873, "step": 4251 }, { "epoch": 0.21852194470140815, "grad_norm": 0.9595499038696289, "learning_rate": 9.096688209479699e-06, "loss": 0.6616, "step": 4252 }, { "epoch": 0.2185733374447528, "grad_norm": 1.1250566244125366, "learning_rate": 9.096211012082596e-06, "loss": 0.7687, "step": 4253 }, { "epoch": 0.21862473018809744, "grad_norm": 1.0896481275558472, "learning_rate": 9.095733701196392e-06, "loss": 0.7973, "step": 4254 }, { "epoch": 0.2186761229314421, "grad_norm": 1.0918326377868652, "learning_rate": 9.095256276834308e-06, "loss": 0.7783, "step": 4255 }, { "epoch": 0.2187275156747867, "grad_norm": 1.1954625844955444, "learning_rate": 9.094778739009571e-06, "loss": 0.7601, "step": 4256 }, { "epoch": 0.21877890841813136, "grad_norm": 0.8080825209617615, "learning_rate": 9.094301087735415e-06, "loss": 0.7092, "step": 4257 }, { "epoch": 0.218830301161476, "grad_norm": 1.0741758346557617, "learning_rate": 9.093823323025073e-06, "loss": 0.7616, "step": 4258 }, { "epoch": 0.21888169390482065, "grad_norm": 1.0875614881515503, "learning_rate": 9.093345444891778e-06, "loss": 0.8056, "step": 4259 }, { "epoch": 0.21893308664816527, "grad_norm": 1.09056556224823, "learning_rate": 9.092867453348776e-06, "loss": 0.7879, "step": 4260 }, { "epoch": 0.21898447939150992, "grad_norm": 1.122534155845642, "learning_rate": 9.092389348409306e-06, "loss": 0.7981, "step": 4261 }, { "epoch": 0.21903587213485456, "grad_norm": 1.0954110622406006, "learning_rate": 9.091911130086616e-06, "loss": 0.758, "step": 4262 }, { "epoch": 0.2190872648781992, "grad_norm": 1.1522572040557861, "learning_rate": 9.091432798393954e-06, "loss": 0.8237, "step": 4263 }, { "epoch": 0.21913865762154383, "grad_norm": 0.9982960820198059, "learning_rate": 9.090954353344574e-06, "loss": 0.7266, "step": 4264 }, { "epoch": 0.21919005036488848, "grad_norm": 1.0826950073242188, "learning_rate": 9.09047579495173e-06, "loss": 0.7736, "step": 4265 }, { "epoch": 0.21924144310823312, "grad_norm": 1.2022103071212769, "learning_rate": 9.089997123228684e-06, "loss": 0.8082, "step": 4266 }, { "epoch": 0.21929283585157774, "grad_norm": 1.1171106100082397, "learning_rate": 9.089518338188695e-06, "loss": 0.8277, "step": 4267 }, { "epoch": 0.2193442285949224, "grad_norm": 1.0945594310760498, "learning_rate": 9.08903943984503e-06, "loss": 0.7808, "step": 4268 }, { "epoch": 0.21939562133826704, "grad_norm": 1.1168155670166016, "learning_rate": 9.088560428210956e-06, "loss": 0.8519, "step": 4269 }, { "epoch": 0.21944701408161169, "grad_norm": 1.0966004133224487, "learning_rate": 9.088081303299745e-06, "loss": 0.7984, "step": 4270 }, { "epoch": 0.2194984068249563, "grad_norm": 0.8071814179420471, "learning_rate": 9.087602065124672e-06, "loss": 0.7084, "step": 4271 }, { "epoch": 0.21954979956830095, "grad_norm": 1.1144964694976807, "learning_rate": 9.087122713699014e-06, "loss": 0.7709, "step": 4272 }, { "epoch": 0.2196011923116456, "grad_norm": 1.0576223134994507, "learning_rate": 9.086643249036053e-06, "loss": 0.8007, "step": 4273 }, { "epoch": 0.21965258505499025, "grad_norm": 1.1493570804595947, "learning_rate": 9.086163671149071e-06, "loss": 0.7256, "step": 4274 }, { "epoch": 0.21970397779833487, "grad_norm": 0.7365999221801758, "learning_rate": 9.085683980051356e-06, "loss": 0.7046, "step": 4275 }, { "epoch": 0.2197553705416795, "grad_norm": 0.8011993169784546, "learning_rate": 9.085204175756199e-06, "loss": 0.7055, "step": 4276 }, { "epoch": 0.21980676328502416, "grad_norm": 1.090439796447754, "learning_rate": 9.084724258276894e-06, "loss": 0.7941, "step": 4277 }, { "epoch": 0.2198581560283688, "grad_norm": 1.2138172388076782, "learning_rate": 9.084244227626736e-06, "loss": 0.8068, "step": 4278 }, { "epoch": 0.21990954877171343, "grad_norm": 1.1071089506149292, "learning_rate": 9.083764083819024e-06, "loss": 0.8307, "step": 4279 }, { "epoch": 0.21996094151505807, "grad_norm": 1.1578168869018555, "learning_rate": 9.083283826867063e-06, "loss": 0.824, "step": 4280 }, { "epoch": 0.22001233425840272, "grad_norm": 1.1298171281814575, "learning_rate": 9.082803456784157e-06, "loss": 0.8725, "step": 4281 }, { "epoch": 0.22006372700174737, "grad_norm": 1.1123601198196411, "learning_rate": 9.082322973583616e-06, "loss": 0.7709, "step": 4282 }, { "epoch": 0.22011511974509199, "grad_norm": 1.1035863161087036, "learning_rate": 9.081842377278754e-06, "loss": 0.7655, "step": 4283 }, { "epoch": 0.22016651248843663, "grad_norm": 1.102044939994812, "learning_rate": 9.081361667882883e-06, "loss": 0.7485, "step": 4284 }, { "epoch": 0.22021790523178128, "grad_norm": 0.9537819623947144, "learning_rate": 9.080880845409324e-06, "loss": 0.717, "step": 4285 }, { "epoch": 0.2202692979751259, "grad_norm": 1.2634668350219727, "learning_rate": 9.080399909871395e-06, "loss": 0.7778, "step": 4286 }, { "epoch": 0.22032069071847055, "grad_norm": 1.194694995880127, "learning_rate": 9.079918861282427e-06, "loss": 0.781, "step": 4287 }, { "epoch": 0.2203720834618152, "grad_norm": 1.0634434223175049, "learning_rate": 9.079437699655743e-06, "loss": 0.7688, "step": 4288 }, { "epoch": 0.22042347620515984, "grad_norm": 0.8530346751213074, "learning_rate": 9.078956425004672e-06, "loss": 0.6735, "step": 4289 }, { "epoch": 0.22047486894850446, "grad_norm": 1.0793602466583252, "learning_rate": 9.078475037342555e-06, "loss": 0.7694, "step": 4290 }, { "epoch": 0.2205262616918491, "grad_norm": 1.0319033861160278, "learning_rate": 9.077993536682723e-06, "loss": 0.7633, "step": 4291 }, { "epoch": 0.22057765443519375, "grad_norm": 0.7619673609733582, "learning_rate": 9.07751192303852e-06, "loss": 0.6922, "step": 4292 }, { "epoch": 0.2206290471785384, "grad_norm": 1.058688759803772, "learning_rate": 9.07703019642329e-06, "loss": 0.7637, "step": 4293 }, { "epoch": 0.22068043992188302, "grad_norm": 1.0879544019699097, "learning_rate": 9.076548356850376e-06, "loss": 0.7912, "step": 4294 }, { "epoch": 0.22073183266522767, "grad_norm": 1.1209956407546997, "learning_rate": 9.076066404333132e-06, "loss": 0.7812, "step": 4295 }, { "epoch": 0.22078322540857231, "grad_norm": 1.112785816192627, "learning_rate": 9.075584338884907e-06, "loss": 0.814, "step": 4296 }, { "epoch": 0.22083461815191696, "grad_norm": 1.204616665840149, "learning_rate": 9.07510216051906e-06, "loss": 0.8328, "step": 4297 }, { "epoch": 0.22088601089526158, "grad_norm": 1.1497029066085815, "learning_rate": 9.07461986924895e-06, "loss": 0.7882, "step": 4298 }, { "epoch": 0.22093740363860623, "grad_norm": 1.06961190700531, "learning_rate": 9.074137465087935e-06, "loss": 0.8282, "step": 4299 }, { "epoch": 0.22098879638195087, "grad_norm": 1.0907753705978394, "learning_rate": 9.073654948049387e-06, "loss": 0.7823, "step": 4300 }, { "epoch": 0.22104018912529552, "grad_norm": 0.8082263469696045, "learning_rate": 9.073172318146671e-06, "loss": 0.7314, "step": 4301 }, { "epoch": 0.22109158186864014, "grad_norm": 1.1509058475494385, "learning_rate": 9.07268957539316e-06, "loss": 0.8147, "step": 4302 }, { "epoch": 0.2211429746119848, "grad_norm": 1.0601377487182617, "learning_rate": 9.072206719802225e-06, "loss": 0.7164, "step": 4303 }, { "epoch": 0.22119436735532944, "grad_norm": 1.0618751049041748, "learning_rate": 9.071723751387247e-06, "loss": 0.7634, "step": 4304 }, { "epoch": 0.22124576009867405, "grad_norm": 1.1580489873886108, "learning_rate": 9.071240670161609e-06, "loss": 0.7804, "step": 4305 }, { "epoch": 0.2212971528420187, "grad_norm": 1.1457750797271729, "learning_rate": 9.070757476138692e-06, "loss": 0.808, "step": 4306 }, { "epoch": 0.22134854558536335, "grad_norm": 0.748276948928833, "learning_rate": 9.070274169331884e-06, "loss": 0.7648, "step": 4307 }, { "epoch": 0.221399938328708, "grad_norm": 1.173012137413025, "learning_rate": 9.069790749754576e-06, "loss": 0.8053, "step": 4308 }, { "epoch": 0.22145133107205262, "grad_norm": 1.1354033946990967, "learning_rate": 9.069307217420163e-06, "loss": 0.79, "step": 4309 }, { "epoch": 0.22150272381539726, "grad_norm": 1.0960289239883423, "learning_rate": 9.068823572342038e-06, "loss": 0.7786, "step": 4310 }, { "epoch": 0.2215541165587419, "grad_norm": 1.0697039365768433, "learning_rate": 9.068339814533602e-06, "loss": 0.8123, "step": 4311 }, { "epoch": 0.22160550930208656, "grad_norm": 1.0992567539215088, "learning_rate": 9.06785594400826e-06, "loss": 0.7684, "step": 4312 }, { "epoch": 0.22165690204543118, "grad_norm": 1.0429848432540894, "learning_rate": 9.067371960779419e-06, "loss": 0.7642, "step": 4313 }, { "epoch": 0.22170829478877582, "grad_norm": 1.074609398841858, "learning_rate": 9.066887864860483e-06, "loss": 0.7397, "step": 4314 }, { "epoch": 0.22175968753212047, "grad_norm": 1.0766611099243164, "learning_rate": 9.066403656264868e-06, "loss": 0.7811, "step": 4315 }, { "epoch": 0.22181108027546512, "grad_norm": 1.1003326177597046, "learning_rate": 9.065919335005989e-06, "loss": 0.8023, "step": 4316 }, { "epoch": 0.22186247301880974, "grad_norm": 1.0612140893936157, "learning_rate": 9.065434901097266e-06, "loss": 0.7807, "step": 4317 }, { "epoch": 0.22191386576215438, "grad_norm": 1.0972939729690552, "learning_rate": 9.064950354552115e-06, "loss": 0.7792, "step": 4318 }, { "epoch": 0.22196525850549903, "grad_norm": 1.1753911972045898, "learning_rate": 9.064465695383968e-06, "loss": 0.8264, "step": 4319 }, { "epoch": 0.22201665124884365, "grad_norm": 1.1278507709503174, "learning_rate": 9.063980923606249e-06, "loss": 0.7913, "step": 4320 }, { "epoch": 0.2220680439921883, "grad_norm": 1.1230570077896118, "learning_rate": 9.063496039232389e-06, "loss": 0.8273, "step": 4321 }, { "epoch": 0.22211943673553294, "grad_norm": 1.1253111362457275, "learning_rate": 9.063011042275824e-06, "loss": 0.7372, "step": 4322 }, { "epoch": 0.2221708294788776, "grad_norm": 0.8204745054244995, "learning_rate": 9.06252593274999e-06, "loss": 0.6768, "step": 4323 }, { "epoch": 0.2222222222222222, "grad_norm": 1.1202855110168457, "learning_rate": 9.062040710668325e-06, "loss": 0.7912, "step": 4324 }, { "epoch": 0.22227361496556686, "grad_norm": 1.126147985458374, "learning_rate": 9.061555376044278e-06, "loss": 0.7511, "step": 4325 }, { "epoch": 0.2223250077089115, "grad_norm": 0.9461855292320251, "learning_rate": 9.061069928891291e-06, "loss": 0.7137, "step": 4326 }, { "epoch": 0.22237640045225615, "grad_norm": 1.1177200078964233, "learning_rate": 9.060584369222816e-06, "loss": 0.7502, "step": 4327 }, { "epoch": 0.22242779319560077, "grad_norm": 1.214698076248169, "learning_rate": 9.060098697052304e-06, "loss": 0.7886, "step": 4328 }, { "epoch": 0.22247918593894542, "grad_norm": 1.0965791940689087, "learning_rate": 9.059612912393213e-06, "loss": 0.7515, "step": 4329 }, { "epoch": 0.22253057868229006, "grad_norm": 1.1039177179336548, "learning_rate": 9.059127015259002e-06, "loss": 0.8275, "step": 4330 }, { "epoch": 0.2225819714256347, "grad_norm": 0.741967499256134, "learning_rate": 9.05864100566313e-06, "loss": 0.6929, "step": 4331 }, { "epoch": 0.22263336416897933, "grad_norm": 1.0916329622268677, "learning_rate": 9.058154883619068e-06, "loss": 0.8796, "step": 4332 }, { "epoch": 0.22268475691232398, "grad_norm": 1.04004967212677, "learning_rate": 9.05766864914028e-06, "loss": 0.7688, "step": 4333 }, { "epoch": 0.22273614965566862, "grad_norm": 1.1143995523452759, "learning_rate": 9.057182302240237e-06, "loss": 0.7307, "step": 4334 }, { "epoch": 0.22278754239901327, "grad_norm": 1.0955370664596558, "learning_rate": 9.056695842932417e-06, "loss": 0.7393, "step": 4335 }, { "epoch": 0.2228389351423579, "grad_norm": 0.7515177726745605, "learning_rate": 9.056209271230296e-06, "loss": 0.7327, "step": 4336 }, { "epoch": 0.22289032788570254, "grad_norm": 1.0424476861953735, "learning_rate": 9.055722587147356e-06, "loss": 0.8171, "step": 4337 }, { "epoch": 0.22294172062904719, "grad_norm": 1.1556648015975952, "learning_rate": 9.055235790697077e-06, "loss": 0.8175, "step": 4338 }, { "epoch": 0.2229931133723918, "grad_norm": 1.1507762670516968, "learning_rate": 9.054748881892952e-06, "loss": 0.7655, "step": 4339 }, { "epoch": 0.22304450611573645, "grad_norm": 1.1124593019485474, "learning_rate": 9.054261860748468e-06, "loss": 0.7441, "step": 4340 }, { "epoch": 0.2230958988590811, "grad_norm": 1.1042824983596802, "learning_rate": 9.053774727277119e-06, "loss": 0.7626, "step": 4341 }, { "epoch": 0.22314729160242575, "grad_norm": 1.2667195796966553, "learning_rate": 9.0532874814924e-06, "loss": 0.821, "step": 4342 }, { "epoch": 0.22319868434577037, "grad_norm": 1.2013894319534302, "learning_rate": 9.052800123407812e-06, "loss": 0.8227, "step": 4343 }, { "epoch": 0.223250077089115, "grad_norm": 1.1458237171173096, "learning_rate": 9.052312653036858e-06, "loss": 0.7942, "step": 4344 }, { "epoch": 0.22330146983245966, "grad_norm": 1.0957010984420776, "learning_rate": 9.051825070393043e-06, "loss": 0.8045, "step": 4345 }, { "epoch": 0.2233528625758043, "grad_norm": 1.116281270980835, "learning_rate": 9.051337375489877e-06, "loss": 0.7745, "step": 4346 }, { "epoch": 0.22340425531914893, "grad_norm": 0.82821124792099, "learning_rate": 9.050849568340872e-06, "loss": 0.6804, "step": 4347 }, { "epoch": 0.22345564806249357, "grad_norm": 1.11138916015625, "learning_rate": 9.05036164895954e-06, "loss": 0.7884, "step": 4348 }, { "epoch": 0.22350704080583822, "grad_norm": 1.0637050867080688, "learning_rate": 9.049873617359404e-06, "loss": 0.7817, "step": 4349 }, { "epoch": 0.22355843354918287, "grad_norm": 1.1221212148666382, "learning_rate": 9.04938547355398e-06, "loss": 0.84, "step": 4350 }, { "epoch": 0.22360982629252749, "grad_norm": 0.7269451022148132, "learning_rate": 9.048897217556798e-06, "loss": 0.73, "step": 4351 }, { "epoch": 0.22366121903587213, "grad_norm": 1.0613042116165161, "learning_rate": 9.04840884938138e-06, "loss": 0.8104, "step": 4352 }, { "epoch": 0.22371261177921678, "grad_norm": 1.072580099105835, "learning_rate": 9.047920369041264e-06, "loss": 0.8032, "step": 4353 }, { "epoch": 0.22376400452256143, "grad_norm": 1.1029006242752075, "learning_rate": 9.047431776549976e-06, "loss": 0.7955, "step": 4354 }, { "epoch": 0.22381539726590605, "grad_norm": 4.790678024291992, "learning_rate": 9.046943071921057e-06, "loss": 0.854, "step": 4355 }, { "epoch": 0.2238667900092507, "grad_norm": 0.8257871270179749, "learning_rate": 9.046454255168045e-06, "loss": 0.6975, "step": 4356 }, { "epoch": 0.22391818275259534, "grad_norm": 1.0733985900878906, "learning_rate": 9.045965326304486e-06, "loss": 0.806, "step": 4357 }, { "epoch": 0.22396957549593996, "grad_norm": 1.1073120832443237, "learning_rate": 9.045476285343924e-06, "loss": 0.7573, "step": 4358 }, { "epoch": 0.2240209682392846, "grad_norm": 1.1115367412567139, "learning_rate": 9.044987132299909e-06, "loss": 0.7826, "step": 4359 }, { "epoch": 0.22407236098262925, "grad_norm": 1.1194162368774414, "learning_rate": 9.044497867185994e-06, "loss": 0.7762, "step": 4360 }, { "epoch": 0.2241237537259739, "grad_norm": 1.3852365016937256, "learning_rate": 9.044008490015731e-06, "loss": 0.8142, "step": 4361 }, { "epoch": 0.22417514646931852, "grad_norm": 0.9488205909729004, "learning_rate": 9.043519000802683e-06, "loss": 0.7508, "step": 4362 }, { "epoch": 0.22422653921266317, "grad_norm": 1.0546590089797974, "learning_rate": 9.043029399560412e-06, "loss": 0.7599, "step": 4363 }, { "epoch": 0.22427793195600781, "grad_norm": 1.0679047107696533, "learning_rate": 9.04253968630248e-06, "loss": 0.7923, "step": 4364 }, { "epoch": 0.22432932469935246, "grad_norm": 1.0072203874588013, "learning_rate": 9.042049861042455e-06, "loss": 0.797, "step": 4365 }, { "epoch": 0.22438071744269708, "grad_norm": 1.1805046796798706, "learning_rate": 9.041559923793908e-06, "loss": 0.828, "step": 4366 }, { "epoch": 0.22443211018604173, "grad_norm": 0.8413265347480774, "learning_rate": 9.041069874570416e-06, "loss": 0.673, "step": 4367 }, { "epoch": 0.22448350292938637, "grad_norm": 1.0767192840576172, "learning_rate": 9.040579713385553e-06, "loss": 0.8117, "step": 4368 }, { "epoch": 0.22453489567273102, "grad_norm": 1.0955297946929932, "learning_rate": 9.040089440252901e-06, "loss": 0.7536, "step": 4369 }, { "epoch": 0.22458628841607564, "grad_norm": 1.06802237033844, "learning_rate": 9.039599055186044e-06, "loss": 0.8012, "step": 4370 }, { "epoch": 0.2246376811594203, "grad_norm": 1.1725267171859741, "learning_rate": 9.039108558198566e-06, "loss": 0.8195, "step": 4371 }, { "epoch": 0.22468907390276494, "grad_norm": 1.1977851390838623, "learning_rate": 9.038617949304058e-06, "loss": 0.8276, "step": 4372 }, { "epoch": 0.22474046664610958, "grad_norm": 1.0465898513793945, "learning_rate": 9.038127228516114e-06, "loss": 0.7673, "step": 4373 }, { "epoch": 0.2247918593894542, "grad_norm": 1.0858691930770874, "learning_rate": 9.03763639584833e-06, "loss": 0.7972, "step": 4374 }, { "epoch": 0.22484325213279885, "grad_norm": 1.0539582967758179, "learning_rate": 9.037145451314303e-06, "loss": 0.7816, "step": 4375 }, { "epoch": 0.2248946448761435, "grad_norm": 1.0489228963851929, "learning_rate": 9.036654394927635e-06, "loss": 0.7472, "step": 4376 }, { "epoch": 0.22494603761948812, "grad_norm": 1.2524515390396118, "learning_rate": 9.036163226701933e-06, "loss": 0.8101, "step": 4377 }, { "epoch": 0.22499743036283276, "grad_norm": 1.1080033779144287, "learning_rate": 9.035671946650803e-06, "loss": 0.8068, "step": 4378 }, { "epoch": 0.2250488231061774, "grad_norm": 0.7447730898857117, "learning_rate": 9.035180554787859e-06, "loss": 0.7079, "step": 4379 }, { "epoch": 0.22510021584952206, "grad_norm": 0.7839935421943665, "learning_rate": 9.034689051126712e-06, "loss": 0.7158, "step": 4380 }, { "epoch": 0.22515160859286668, "grad_norm": 1.1289641857147217, "learning_rate": 9.034197435680984e-06, "loss": 0.7953, "step": 4381 }, { "epoch": 0.22520300133621132, "grad_norm": 1.193946361541748, "learning_rate": 9.033705708464291e-06, "loss": 0.8185, "step": 4382 }, { "epoch": 0.22525439407955597, "grad_norm": 1.0938438177108765, "learning_rate": 9.033213869490261e-06, "loss": 0.7964, "step": 4383 }, { "epoch": 0.22530578682290062, "grad_norm": 1.1532719135284424, "learning_rate": 9.032721918772518e-06, "loss": 0.8084, "step": 4384 }, { "epoch": 0.22535717956624524, "grad_norm": 1.2949411869049072, "learning_rate": 9.032229856324693e-06, "loss": 0.74, "step": 4385 }, { "epoch": 0.22540857230958988, "grad_norm": 1.1437602043151855, "learning_rate": 9.031737682160418e-06, "loss": 0.7805, "step": 4386 }, { "epoch": 0.22545996505293453, "grad_norm": 1.0841890573501587, "learning_rate": 9.031245396293332e-06, "loss": 0.7745, "step": 4387 }, { "epoch": 0.22551135779627918, "grad_norm": 0.8282328844070435, "learning_rate": 9.030752998737069e-06, "loss": 0.7187, "step": 4388 }, { "epoch": 0.2255627505396238, "grad_norm": 1.1629488468170166, "learning_rate": 9.030260489505276e-06, "loss": 0.7829, "step": 4389 }, { "epoch": 0.22561414328296844, "grad_norm": 1.0477409362792969, "learning_rate": 9.029767868611597e-06, "loss": 0.7615, "step": 4390 }, { "epoch": 0.2256655360263131, "grad_norm": 1.159829020500183, "learning_rate": 9.029275136069678e-06, "loss": 0.7608, "step": 4391 }, { "epoch": 0.22571692876965774, "grad_norm": 1.1267942190170288, "learning_rate": 9.028782291893174e-06, "loss": 0.8103, "step": 4392 }, { "epoch": 0.22576832151300236, "grad_norm": 1.0907851457595825, "learning_rate": 9.02828933609574e-06, "loss": 0.7784, "step": 4393 }, { "epoch": 0.225819714256347, "grad_norm": 1.163543939590454, "learning_rate": 9.02779626869103e-06, "loss": 0.798, "step": 4394 }, { "epoch": 0.22587110699969165, "grad_norm": 1.0861319303512573, "learning_rate": 9.027303089692709e-06, "loss": 0.8274, "step": 4395 }, { "epoch": 0.22592249974303627, "grad_norm": 1.1938151121139526, "learning_rate": 9.026809799114438e-06, "loss": 0.7648, "step": 4396 }, { "epoch": 0.22597389248638092, "grad_norm": 1.1090257167816162, "learning_rate": 9.026316396969886e-06, "loss": 0.7908, "step": 4397 }, { "epoch": 0.22602528522972556, "grad_norm": 1.1303318738937378, "learning_rate": 9.025822883272721e-06, "loss": 0.8011, "step": 4398 }, { "epoch": 0.2260766779730702, "grad_norm": 1.145537257194519, "learning_rate": 9.02532925803662e-06, "loss": 0.7724, "step": 4399 }, { "epoch": 0.22612807071641483, "grad_norm": 1.1233175992965698, "learning_rate": 9.024835521275254e-06, "loss": 0.8114, "step": 4400 }, { "epoch": 0.22617946345975948, "grad_norm": 1.113986611366272, "learning_rate": 9.024341673002306e-06, "loss": 0.821, "step": 4401 }, { "epoch": 0.22623085620310412, "grad_norm": 1.1142867803573608, "learning_rate": 9.023847713231457e-06, "loss": 0.8513, "step": 4402 }, { "epoch": 0.22628224894644877, "grad_norm": 1.0345150232315063, "learning_rate": 9.023353641976395e-06, "loss": 0.7772, "step": 4403 }, { "epoch": 0.2263336416897934, "grad_norm": 1.0581997632980347, "learning_rate": 9.022859459250806e-06, "loss": 0.7977, "step": 4404 }, { "epoch": 0.22638503443313804, "grad_norm": 1.0816503763198853, "learning_rate": 9.022365165068383e-06, "loss": 0.7146, "step": 4405 }, { "epoch": 0.22643642717648269, "grad_norm": 1.1069536209106445, "learning_rate": 9.02187075944282e-06, "loss": 0.7615, "step": 4406 }, { "epoch": 0.22648781991982733, "grad_norm": 1.1262763738632202, "learning_rate": 9.021376242387816e-06, "loss": 0.8341, "step": 4407 }, { "epoch": 0.22653921266317195, "grad_norm": 1.0377700328826904, "learning_rate": 9.020881613917071e-06, "loss": 0.7573, "step": 4408 }, { "epoch": 0.2265906054065166, "grad_norm": 1.2070157527923584, "learning_rate": 9.020386874044292e-06, "loss": 0.7885, "step": 4409 }, { "epoch": 0.22664199814986125, "grad_norm": 1.1245477199554443, "learning_rate": 9.019892022783182e-06, "loss": 0.7898, "step": 4410 }, { "epoch": 0.2266933908932059, "grad_norm": 1.2579352855682373, "learning_rate": 9.019397060147453e-06, "loss": 0.7415, "step": 4411 }, { "epoch": 0.2267447836365505, "grad_norm": 1.1080354452133179, "learning_rate": 9.018901986150818e-06, "loss": 0.7613, "step": 4412 }, { "epoch": 0.22679617637989516, "grad_norm": 1.0786489248275757, "learning_rate": 9.018406800806996e-06, "loss": 0.7787, "step": 4413 }, { "epoch": 0.2268475691232398, "grad_norm": 1.088999629020691, "learning_rate": 9.017911504129704e-06, "loss": 0.7566, "step": 4414 }, { "epoch": 0.22689896186658443, "grad_norm": 1.1700572967529297, "learning_rate": 9.017416096132666e-06, "loss": 0.8157, "step": 4415 }, { "epoch": 0.22695035460992907, "grad_norm": 1.074612021446228, "learning_rate": 9.016920576829605e-06, "loss": 0.7906, "step": 4416 }, { "epoch": 0.22700174735327372, "grad_norm": 1.0596516132354736, "learning_rate": 9.01642494623425e-06, "loss": 0.8096, "step": 4417 }, { "epoch": 0.22705314009661837, "grad_norm": 1.140804648399353, "learning_rate": 9.015929204360338e-06, "loss": 0.7971, "step": 4418 }, { "epoch": 0.22710453283996299, "grad_norm": 0.7986681461334229, "learning_rate": 9.015433351221599e-06, "loss": 0.7308, "step": 4419 }, { "epoch": 0.22715592558330763, "grad_norm": 1.0625759363174438, "learning_rate": 9.014937386831772e-06, "loss": 0.7755, "step": 4420 }, { "epoch": 0.22720731832665228, "grad_norm": 1.0955561399459839, "learning_rate": 9.014441311204601e-06, "loss": 0.8003, "step": 4421 }, { "epoch": 0.22725871106999693, "grad_norm": 0.7873517870903015, "learning_rate": 9.013945124353825e-06, "loss": 0.6508, "step": 4422 }, { "epoch": 0.22731010381334155, "grad_norm": 1.1384801864624023, "learning_rate": 9.013448826293197e-06, "loss": 0.7577, "step": 4423 }, { "epoch": 0.2273614965566862, "grad_norm": 1.138342022895813, "learning_rate": 9.012952417036462e-06, "loss": 0.811, "step": 4424 }, { "epoch": 0.22741288930003084, "grad_norm": 1.2481677532196045, "learning_rate": 9.012455896597376e-06, "loss": 0.8544, "step": 4425 }, { "epoch": 0.2274642820433755, "grad_norm": 1.1288131475448608, "learning_rate": 9.011959264989698e-06, "loss": 0.7707, "step": 4426 }, { "epoch": 0.2275156747867201, "grad_norm": 1.0423487424850464, "learning_rate": 9.011462522227184e-06, "loss": 0.7593, "step": 4427 }, { "epoch": 0.22756706753006475, "grad_norm": 1.0508102178573608, "learning_rate": 9.010965668323596e-06, "loss": 0.7695, "step": 4428 }, { "epoch": 0.2276184602734094, "grad_norm": 1.0805801153182983, "learning_rate": 9.010468703292703e-06, "loss": 0.7554, "step": 4429 }, { "epoch": 0.22766985301675405, "grad_norm": 0.8440739512443542, "learning_rate": 9.009971627148272e-06, "loss": 0.6645, "step": 4430 }, { "epoch": 0.22772124576009867, "grad_norm": 1.0897002220153809, "learning_rate": 9.009474439904077e-06, "loss": 0.8681, "step": 4431 }, { "epoch": 0.22777263850344331, "grad_norm": 1.0401653051376343, "learning_rate": 9.008977141573892e-06, "loss": 0.7388, "step": 4432 }, { "epoch": 0.22782403124678796, "grad_norm": 1.361769676208496, "learning_rate": 9.008479732171491e-06, "loss": 0.8219, "step": 4433 }, { "epoch": 0.22787542399013258, "grad_norm": 1.1984179019927979, "learning_rate": 9.007982211710662e-06, "loss": 0.8121, "step": 4434 }, { "epoch": 0.22792681673347723, "grad_norm": 0.7784889936447144, "learning_rate": 9.007484580205183e-06, "loss": 0.7179, "step": 4435 }, { "epoch": 0.22797820947682187, "grad_norm": 1.1230177879333496, "learning_rate": 9.006986837668848e-06, "loss": 0.7252, "step": 4436 }, { "epoch": 0.22802960222016652, "grad_norm": 0.8413630723953247, "learning_rate": 9.00648898411544e-06, "loss": 0.7143, "step": 4437 }, { "epoch": 0.22808099496351114, "grad_norm": 1.2831002473831177, "learning_rate": 9.005991019558757e-06, "loss": 0.7109, "step": 4438 }, { "epoch": 0.2281323877068558, "grad_norm": 1.1145620346069336, "learning_rate": 9.005492944012596e-06, "loss": 0.7894, "step": 4439 }, { "epoch": 0.22818378045020044, "grad_norm": 1.246866226196289, "learning_rate": 9.004994757490753e-06, "loss": 0.7372, "step": 4440 }, { "epoch": 0.22823517319354508, "grad_norm": 1.0651546716690063, "learning_rate": 9.004496460007033e-06, "loss": 0.7823, "step": 4441 }, { "epoch": 0.2282865659368897, "grad_norm": 1.1191308498382568, "learning_rate": 9.003998051575245e-06, "loss": 0.8379, "step": 4442 }, { "epoch": 0.22833795868023435, "grad_norm": 1.1205394268035889, "learning_rate": 9.003499532209192e-06, "loss": 0.8078, "step": 4443 }, { "epoch": 0.228389351423579, "grad_norm": 1.1528823375701904, "learning_rate": 9.003000901922686e-06, "loss": 0.7854, "step": 4444 }, { "epoch": 0.22844074416692364, "grad_norm": 0.8207101225852966, "learning_rate": 9.00250216072955e-06, "loss": 0.7128, "step": 4445 }, { "epoch": 0.22849213691026826, "grad_norm": 1.1122715473175049, "learning_rate": 9.002003308643593e-06, "loss": 0.794, "step": 4446 }, { "epoch": 0.2285435296536129, "grad_norm": 1.116278052330017, "learning_rate": 9.00150434567864e-06, "loss": 0.7944, "step": 4447 }, { "epoch": 0.22859492239695756, "grad_norm": 0.7807151675224304, "learning_rate": 9.001005271848514e-06, "loss": 0.6728, "step": 4448 }, { "epoch": 0.2286463151403022, "grad_norm": 1.1525764465332031, "learning_rate": 9.000506087167045e-06, "loss": 0.8539, "step": 4449 }, { "epoch": 0.22869770788364682, "grad_norm": 1.152514100074768, "learning_rate": 9.00000679164806e-06, "loss": 0.7782, "step": 4450 }, { "epoch": 0.22874910062699147, "grad_norm": 1.0513501167297363, "learning_rate": 8.999507385305394e-06, "loss": 0.8015, "step": 4451 }, { "epoch": 0.22880049337033612, "grad_norm": 1.1203625202178955, "learning_rate": 8.999007868152884e-06, "loss": 0.8346, "step": 4452 }, { "epoch": 0.22885188611368074, "grad_norm": 1.0725682973861694, "learning_rate": 8.998508240204368e-06, "loss": 0.8004, "step": 4453 }, { "epoch": 0.22890327885702538, "grad_norm": 1.120689034461975, "learning_rate": 8.998008501473689e-06, "loss": 0.7415, "step": 4454 }, { "epoch": 0.22895467160037003, "grad_norm": 0.9727587699890137, "learning_rate": 8.997508651974695e-06, "loss": 0.6759, "step": 4455 }, { "epoch": 0.22900606434371468, "grad_norm": 1.18680739402771, "learning_rate": 8.997008691721232e-06, "loss": 0.7564, "step": 4456 }, { "epoch": 0.2290574570870593, "grad_norm": 1.1619582176208496, "learning_rate": 8.996508620727153e-06, "loss": 0.7865, "step": 4457 }, { "epoch": 0.22910884983040394, "grad_norm": 1.1755377054214478, "learning_rate": 8.996008439006314e-06, "loss": 0.7574, "step": 4458 }, { "epoch": 0.2291602425737486, "grad_norm": 1.1475725173950195, "learning_rate": 8.99550814657257e-06, "loss": 0.8064, "step": 4459 }, { "epoch": 0.22921163531709324, "grad_norm": 1.1178922653198242, "learning_rate": 8.995007743439785e-06, "loss": 0.758, "step": 4460 }, { "epoch": 0.22926302806043786, "grad_norm": 1.1709433794021606, "learning_rate": 8.99450722962182e-06, "loss": 0.8076, "step": 4461 }, { "epoch": 0.2293144208037825, "grad_norm": 0.8454722166061401, "learning_rate": 8.994006605132545e-06, "loss": 0.6948, "step": 4462 }, { "epoch": 0.22936581354712715, "grad_norm": 1.1098442077636719, "learning_rate": 8.99350586998583e-06, "loss": 0.7606, "step": 4463 }, { "epoch": 0.2294172062904718, "grad_norm": 1.0443400144577026, "learning_rate": 8.993005024195547e-06, "loss": 0.8005, "step": 4464 }, { "epoch": 0.22946859903381642, "grad_norm": 1.1996192932128906, "learning_rate": 8.992504067775573e-06, "loss": 0.8142, "step": 4465 }, { "epoch": 0.22951999177716106, "grad_norm": 0.8399350643157959, "learning_rate": 8.992003000739788e-06, "loss": 0.7124, "step": 4466 }, { "epoch": 0.2295713845205057, "grad_norm": 1.1041971445083618, "learning_rate": 8.991501823102074e-06, "loss": 0.768, "step": 4467 }, { "epoch": 0.22962277726385033, "grad_norm": 1.094053030014038, "learning_rate": 8.991000534876314e-06, "loss": 0.8055, "step": 4468 }, { "epoch": 0.22967417000719498, "grad_norm": 1.2026352882385254, "learning_rate": 8.990499136076402e-06, "loss": 0.8133, "step": 4469 }, { "epoch": 0.22972556275053962, "grad_norm": 0.7800304293632507, "learning_rate": 8.989997626716226e-06, "loss": 0.6957, "step": 4470 }, { "epoch": 0.22977695549388427, "grad_norm": 1.1374176740646362, "learning_rate": 8.98949600680968e-06, "loss": 0.858, "step": 4471 }, { "epoch": 0.2298283482372289, "grad_norm": 0.7385038137435913, "learning_rate": 8.988994276370666e-06, "loss": 0.6652, "step": 4472 }, { "epoch": 0.22987974098057354, "grad_norm": 1.0653998851776123, "learning_rate": 8.988492435413079e-06, "loss": 0.7535, "step": 4473 }, { "epoch": 0.22993113372391819, "grad_norm": 1.1988612413406372, "learning_rate": 8.987990483950828e-06, "loss": 0.7988, "step": 4474 }, { "epoch": 0.22998252646726283, "grad_norm": 1.2839815616607666, "learning_rate": 8.987488421997817e-06, "loss": 0.8316, "step": 4475 }, { "epoch": 0.23003391921060745, "grad_norm": 1.1141865253448486, "learning_rate": 8.98698624956796e-06, "loss": 0.8094, "step": 4476 }, { "epoch": 0.2300853119539521, "grad_norm": 1.1077133417129517, "learning_rate": 8.986483966675165e-06, "loss": 0.7718, "step": 4477 }, { "epoch": 0.23013670469729675, "grad_norm": 0.812037467956543, "learning_rate": 8.98598157333335e-06, "loss": 0.704, "step": 4478 }, { "epoch": 0.2301880974406414, "grad_norm": 1.0690003633499146, "learning_rate": 8.985479069556435e-06, "loss": 0.8058, "step": 4479 }, { "epoch": 0.230239490183986, "grad_norm": 1.1047130823135376, "learning_rate": 8.984976455358343e-06, "loss": 0.8027, "step": 4480 }, { "epoch": 0.23029088292733066, "grad_norm": 1.0936604738235474, "learning_rate": 8.984473730752999e-06, "loss": 0.7636, "step": 4481 }, { "epoch": 0.2303422756706753, "grad_norm": 1.1142051219940186, "learning_rate": 8.98397089575433e-06, "loss": 0.7486, "step": 4482 }, { "epoch": 0.23039366841401995, "grad_norm": 1.1740727424621582, "learning_rate": 8.983467950376269e-06, "loss": 0.779, "step": 4483 }, { "epoch": 0.23044506115736457, "grad_norm": 1.217950463294983, "learning_rate": 8.982964894632748e-06, "loss": 0.8035, "step": 4484 }, { "epoch": 0.23049645390070922, "grad_norm": 1.1258676052093506, "learning_rate": 8.98246172853771e-06, "loss": 0.7557, "step": 4485 }, { "epoch": 0.23054784664405387, "grad_norm": 1.2208675146102905, "learning_rate": 8.981958452105089e-06, "loss": 0.8168, "step": 4486 }, { "epoch": 0.23059923938739849, "grad_norm": 1.1240817308425903, "learning_rate": 8.981455065348833e-06, "loss": 0.8311, "step": 4487 }, { "epoch": 0.23065063213074313, "grad_norm": 1.0656596422195435, "learning_rate": 8.980951568282887e-06, "loss": 0.8146, "step": 4488 }, { "epoch": 0.23070202487408778, "grad_norm": 1.082695722579956, "learning_rate": 8.980447960921202e-06, "loss": 0.7956, "step": 4489 }, { "epoch": 0.23075341761743243, "grad_norm": 1.1599644422531128, "learning_rate": 8.979944243277731e-06, "loss": 0.7998, "step": 4490 }, { "epoch": 0.23080481036077705, "grad_norm": 0.8711803555488586, "learning_rate": 8.979440415366428e-06, "loss": 0.7813, "step": 4491 }, { "epoch": 0.2308562031041217, "grad_norm": 1.0243113040924072, "learning_rate": 8.978936477201254e-06, "loss": 0.7542, "step": 4492 }, { "epoch": 0.23090759584746634, "grad_norm": 1.1325196027755737, "learning_rate": 8.978432428796172e-06, "loss": 0.835, "step": 4493 }, { "epoch": 0.230958988590811, "grad_norm": 1.199767827987671, "learning_rate": 8.977928270165142e-06, "loss": 0.8232, "step": 4494 }, { "epoch": 0.2310103813341556, "grad_norm": 1.0398298501968384, "learning_rate": 8.977424001322138e-06, "loss": 0.8073, "step": 4495 }, { "epoch": 0.23106177407750025, "grad_norm": 0.7911036610603333, "learning_rate": 8.97691962228113e-06, "loss": 0.669, "step": 4496 }, { "epoch": 0.2311131668208449, "grad_norm": 1.0952215194702148, "learning_rate": 8.976415133056086e-06, "loss": 0.7722, "step": 4497 }, { "epoch": 0.23116455956418955, "grad_norm": 1.1566587686538696, "learning_rate": 8.975910533660991e-06, "loss": 0.7534, "step": 4498 }, { "epoch": 0.23121595230753417, "grad_norm": 1.1169732809066772, "learning_rate": 8.975405824109824e-06, "loss": 0.7574, "step": 4499 }, { "epoch": 0.23126734505087881, "grad_norm": 1.0392191410064697, "learning_rate": 8.974901004416566e-06, "loss": 0.7814, "step": 4500 }, { "epoch": 0.23131873779422346, "grad_norm": 1.1220823526382446, "learning_rate": 8.974396074595206e-06, "loss": 0.8103, "step": 4501 }, { "epoch": 0.2313701305375681, "grad_norm": 1.0670078992843628, "learning_rate": 8.97389103465973e-06, "loss": 0.7642, "step": 4502 }, { "epoch": 0.23142152328091273, "grad_norm": 1.1362812519073486, "learning_rate": 8.973385884624134e-06, "loss": 0.7947, "step": 4503 }, { "epoch": 0.23147291602425737, "grad_norm": 1.1523749828338623, "learning_rate": 8.972880624502412e-06, "loss": 0.7975, "step": 4504 }, { "epoch": 0.23152430876760202, "grad_norm": 1.1217130422592163, "learning_rate": 8.972375254308563e-06, "loss": 0.749, "step": 4505 }, { "epoch": 0.23157570151094664, "grad_norm": 1.173633337020874, "learning_rate": 8.971869774056588e-06, "loss": 0.8364, "step": 4506 }, { "epoch": 0.2316270942542913, "grad_norm": 0.8056478500366211, "learning_rate": 8.971364183760493e-06, "loss": 0.6664, "step": 4507 }, { "epoch": 0.23167848699763594, "grad_norm": 0.7606384754180908, "learning_rate": 8.970858483434288e-06, "loss": 0.6788, "step": 4508 }, { "epoch": 0.23172987974098058, "grad_norm": 0.7088168859481812, "learning_rate": 8.970352673091977e-06, "loss": 0.716, "step": 4509 }, { "epoch": 0.2317812724843252, "grad_norm": 0.8459866642951965, "learning_rate": 8.96984675274758e-06, "loss": 0.7091, "step": 4510 }, { "epoch": 0.23183266522766985, "grad_norm": 1.1425189971923828, "learning_rate": 8.969340722415112e-06, "loss": 0.7561, "step": 4511 }, { "epoch": 0.2318840579710145, "grad_norm": 0.7219640612602234, "learning_rate": 8.968834582108595e-06, "loss": 0.7114, "step": 4512 }, { "epoch": 0.23193545071435914, "grad_norm": 1.202033519744873, "learning_rate": 8.968328331842047e-06, "loss": 0.8163, "step": 4513 }, { "epoch": 0.23198684345770376, "grad_norm": 1.132049798965454, "learning_rate": 8.967821971629498e-06, "loss": 0.7245, "step": 4514 }, { "epoch": 0.2320382362010484, "grad_norm": 1.0010114908218384, "learning_rate": 8.967315501484976e-06, "loss": 0.7391, "step": 4515 }, { "epoch": 0.23208962894439306, "grad_norm": 1.1907685995101929, "learning_rate": 8.966808921422517e-06, "loss": 0.7508, "step": 4516 }, { "epoch": 0.2321410216877377, "grad_norm": 1.1338914632797241, "learning_rate": 8.966302231456149e-06, "loss": 0.8153, "step": 4517 }, { "epoch": 0.23219241443108232, "grad_norm": 0.885730504989624, "learning_rate": 8.965795431599915e-06, "loss": 0.6724, "step": 4518 }, { "epoch": 0.23224380717442697, "grad_norm": 1.1674913167953491, "learning_rate": 8.965288521867857e-06, "loss": 0.8093, "step": 4519 }, { "epoch": 0.23229519991777162, "grad_norm": 1.1272701025009155, "learning_rate": 8.964781502274016e-06, "loss": 0.7623, "step": 4520 }, { "epoch": 0.23234659266111626, "grad_norm": 1.206316590309143, "learning_rate": 8.964274372832442e-06, "loss": 0.7989, "step": 4521 }, { "epoch": 0.23239798540446088, "grad_norm": 1.1185319423675537, "learning_rate": 8.963767133557184e-06, "loss": 0.7863, "step": 4522 }, { "epoch": 0.23244937814780553, "grad_norm": 1.223645567893982, "learning_rate": 8.963259784462297e-06, "loss": 0.8357, "step": 4523 }, { "epoch": 0.23250077089115018, "grad_norm": 1.0890369415283203, "learning_rate": 8.962752325561838e-06, "loss": 0.7882, "step": 4524 }, { "epoch": 0.2325521636344948, "grad_norm": 1.0306440591812134, "learning_rate": 8.962244756869864e-06, "loss": 0.775, "step": 4525 }, { "epoch": 0.23260355637783944, "grad_norm": 1.1407660245895386, "learning_rate": 8.961737078400438e-06, "loss": 0.771, "step": 4526 }, { "epoch": 0.2326549491211841, "grad_norm": 1.143796682357788, "learning_rate": 8.961229290167629e-06, "loss": 0.8029, "step": 4527 }, { "epoch": 0.23270634186452874, "grad_norm": 1.1385587453842163, "learning_rate": 8.960721392185502e-06, "loss": 0.7906, "step": 4528 }, { "epoch": 0.23275773460787336, "grad_norm": 1.0587302446365356, "learning_rate": 8.960213384468131e-06, "loss": 0.8018, "step": 4529 }, { "epoch": 0.232809127351218, "grad_norm": 1.0513598918914795, "learning_rate": 8.959705267029588e-06, "loss": 0.7932, "step": 4530 }, { "epoch": 0.23286052009456265, "grad_norm": 1.1498783826828003, "learning_rate": 8.959197039883953e-06, "loss": 0.8374, "step": 4531 }, { "epoch": 0.2329119128379073, "grad_norm": 1.338956356048584, "learning_rate": 8.958688703045308e-06, "loss": 0.8174, "step": 4532 }, { "epoch": 0.23296330558125192, "grad_norm": 1.1401498317718506, "learning_rate": 8.958180256527737e-06, "loss": 0.7903, "step": 4533 }, { "epoch": 0.23301469832459656, "grad_norm": 1.056542158126831, "learning_rate": 8.957671700345323e-06, "loss": 0.7848, "step": 4534 }, { "epoch": 0.2330660910679412, "grad_norm": 1.1784013509750366, "learning_rate": 8.95716303451216e-06, "loss": 0.7592, "step": 4535 }, { "epoch": 0.23311748381128586, "grad_norm": 1.1189968585968018, "learning_rate": 8.95665425904234e-06, "loss": 0.8134, "step": 4536 }, { "epoch": 0.23316887655463048, "grad_norm": 1.0967286825180054, "learning_rate": 8.956145373949956e-06, "loss": 0.7691, "step": 4537 }, { "epoch": 0.23322026929797512, "grad_norm": 1.098055362701416, "learning_rate": 8.95563637924911e-06, "loss": 0.8455, "step": 4538 }, { "epoch": 0.23327166204131977, "grad_norm": 1.1101347208023071, "learning_rate": 8.955127274953907e-06, "loss": 0.79, "step": 4539 }, { "epoch": 0.23332305478466442, "grad_norm": 1.1038672924041748, "learning_rate": 8.954618061078446e-06, "loss": 0.8121, "step": 4540 }, { "epoch": 0.23337444752800904, "grad_norm": 1.1377947330474854, "learning_rate": 8.95410873763684e-06, "loss": 0.7491, "step": 4541 }, { "epoch": 0.23342584027135369, "grad_norm": 1.0690524578094482, "learning_rate": 8.9535993046432e-06, "loss": 0.8045, "step": 4542 }, { "epoch": 0.23347723301469833, "grad_norm": 1.1139917373657227, "learning_rate": 8.953089762111635e-06, "loss": 0.7898, "step": 4543 }, { "epoch": 0.23352862575804295, "grad_norm": 0.8483744859695435, "learning_rate": 8.952580110056268e-06, "loss": 0.7127, "step": 4544 }, { "epoch": 0.2335800185013876, "grad_norm": 1.1292800903320312, "learning_rate": 8.952070348491218e-06, "loss": 0.7718, "step": 4545 }, { "epoch": 0.23363141124473225, "grad_norm": 1.0786324739456177, "learning_rate": 8.951560477430608e-06, "loss": 0.7644, "step": 4546 }, { "epoch": 0.2336828039880769, "grad_norm": 1.1720752716064453, "learning_rate": 8.951050496888563e-06, "loss": 0.7792, "step": 4547 }, { "epoch": 0.2337341967314215, "grad_norm": 1.1188567876815796, "learning_rate": 8.950540406879213e-06, "loss": 0.7516, "step": 4548 }, { "epoch": 0.23378558947476616, "grad_norm": 1.1418123245239258, "learning_rate": 8.950030207416693e-06, "loss": 0.8257, "step": 4549 }, { "epoch": 0.2338369822181108, "grad_norm": 1.0946413278579712, "learning_rate": 8.949519898515137e-06, "loss": 0.8446, "step": 4550 }, { "epoch": 0.23388837496145545, "grad_norm": 0.9861295223236084, "learning_rate": 8.949009480188683e-06, "loss": 0.703, "step": 4551 }, { "epoch": 0.23393976770480007, "grad_norm": 1.0266975164413452, "learning_rate": 8.948498952451471e-06, "loss": 0.7289, "step": 4552 }, { "epoch": 0.23399116044814472, "grad_norm": 0.8530096411705017, "learning_rate": 8.94798831531765e-06, "loss": 0.6927, "step": 4553 }, { "epoch": 0.23404255319148937, "grad_norm": 1.1061002016067505, "learning_rate": 8.947477568801365e-06, "loss": 0.8045, "step": 4554 }, { "epoch": 0.234093945934834, "grad_norm": 1.099226951599121, "learning_rate": 8.946966712916767e-06, "loss": 0.7296, "step": 4555 }, { "epoch": 0.23414533867817863, "grad_norm": 0.7294313311576843, "learning_rate": 8.946455747678007e-06, "loss": 0.6967, "step": 4556 }, { "epoch": 0.23419673142152328, "grad_norm": 0.7947079539299011, "learning_rate": 8.945944673099247e-06, "loss": 0.6819, "step": 4557 }, { "epoch": 0.23424812416486793, "grad_norm": 1.1666070222854614, "learning_rate": 8.945433489194644e-06, "loss": 0.8125, "step": 4558 }, { "epoch": 0.23429951690821257, "grad_norm": 1.1196647882461548, "learning_rate": 8.944922195978358e-06, "loss": 0.7334, "step": 4559 }, { "epoch": 0.2343509096515572, "grad_norm": 1.1116751432418823, "learning_rate": 8.94441079346456e-06, "loss": 0.8016, "step": 4560 }, { "epoch": 0.23440230239490184, "grad_norm": 0.8251853585243225, "learning_rate": 8.943899281667417e-06, "loss": 0.691, "step": 4561 }, { "epoch": 0.2344536951382465, "grad_norm": 1.170778751373291, "learning_rate": 8.9433876606011e-06, "loss": 0.7783, "step": 4562 }, { "epoch": 0.2345050878815911, "grad_norm": 1.0678445100784302, "learning_rate": 8.942875930279783e-06, "loss": 0.8069, "step": 4563 }, { "epoch": 0.23455648062493575, "grad_norm": 0.7048591375350952, "learning_rate": 8.942364090717646e-06, "loss": 0.6724, "step": 4564 }, { "epoch": 0.2346078733682804, "grad_norm": 1.0690581798553467, "learning_rate": 8.941852141928871e-06, "loss": 0.8012, "step": 4565 }, { "epoch": 0.23465926611162505, "grad_norm": 1.0831481218338013, "learning_rate": 8.941340083927636e-06, "loss": 0.7832, "step": 4566 }, { "epoch": 0.23471065885496967, "grad_norm": 1.034258484840393, "learning_rate": 8.940827916728136e-06, "loss": 0.7502, "step": 4567 }, { "epoch": 0.23476205159831431, "grad_norm": 1.0713729858398438, "learning_rate": 8.940315640344558e-06, "loss": 0.7836, "step": 4568 }, { "epoch": 0.23481344434165896, "grad_norm": 0.7750464677810669, "learning_rate": 8.93980325479109e-06, "loss": 0.7455, "step": 4569 }, { "epoch": 0.2348648370850036, "grad_norm": 1.0907200574874878, "learning_rate": 8.939290760081936e-06, "loss": 0.7765, "step": 4570 }, { "epoch": 0.23491622982834823, "grad_norm": 0.7505485415458679, "learning_rate": 8.93877815623129e-06, "loss": 0.7139, "step": 4571 }, { "epoch": 0.23496762257169287, "grad_norm": 1.0978574752807617, "learning_rate": 8.938265443253356e-06, "loss": 0.789, "step": 4572 }, { "epoch": 0.23501901531503752, "grad_norm": 1.0731624364852905, "learning_rate": 8.937752621162338e-06, "loss": 0.7851, "step": 4573 }, { "epoch": 0.23507040805838217, "grad_norm": 1.0606452226638794, "learning_rate": 8.937239689972447e-06, "loss": 0.8246, "step": 4574 }, { "epoch": 0.2351218008017268, "grad_norm": 1.1022939682006836, "learning_rate": 8.93672664969789e-06, "loss": 0.7821, "step": 4575 }, { "epoch": 0.23517319354507144, "grad_norm": 1.070448875427246, "learning_rate": 8.936213500352885e-06, "loss": 0.7312, "step": 4576 }, { "epoch": 0.23522458628841608, "grad_norm": 1.2047014236450195, "learning_rate": 8.935700241951648e-06, "loss": 0.7973, "step": 4577 }, { "epoch": 0.23527597903176073, "grad_norm": 1.0923635959625244, "learning_rate": 8.935186874508398e-06, "loss": 0.7559, "step": 4578 }, { "epoch": 0.23532737177510535, "grad_norm": 1.3156719207763672, "learning_rate": 8.934673398037359e-06, "loss": 0.695, "step": 4579 }, { "epoch": 0.23537876451845, "grad_norm": 1.1310994625091553, "learning_rate": 8.934159812552758e-06, "loss": 0.7762, "step": 4580 }, { "epoch": 0.23543015726179464, "grad_norm": 1.1260188817977905, "learning_rate": 8.933646118068823e-06, "loss": 0.758, "step": 4581 }, { "epoch": 0.23548155000513926, "grad_norm": 1.0956807136535645, "learning_rate": 8.93313231459979e-06, "loss": 0.747, "step": 4582 }, { "epoch": 0.2355329427484839, "grad_norm": 1.0462346076965332, "learning_rate": 8.932618402159889e-06, "loss": 0.8148, "step": 4583 }, { "epoch": 0.23558433549182856, "grad_norm": 1.121314525604248, "learning_rate": 8.932104380763363e-06, "loss": 0.7542, "step": 4584 }, { "epoch": 0.2356357282351732, "grad_norm": 0.8486557602882385, "learning_rate": 8.93159025042445e-06, "loss": 0.7125, "step": 4585 }, { "epoch": 0.23568712097851782, "grad_norm": 1.0845333337783813, "learning_rate": 8.931076011157395e-06, "loss": 0.7961, "step": 4586 }, { "epoch": 0.23573851372186247, "grad_norm": 1.1341445446014404, "learning_rate": 8.930561662976447e-06, "loss": 0.8298, "step": 4587 }, { "epoch": 0.23578990646520712, "grad_norm": 1.028184413909912, "learning_rate": 8.930047205895857e-06, "loss": 0.7072, "step": 4588 }, { "epoch": 0.23584129920855176, "grad_norm": 0.8409712314605713, "learning_rate": 8.929532639929877e-06, "loss": 0.7093, "step": 4589 }, { "epoch": 0.23589269195189638, "grad_norm": 1.1844570636749268, "learning_rate": 8.929017965092764e-06, "loss": 0.7725, "step": 4590 }, { "epoch": 0.23594408469524103, "grad_norm": 1.053727149963379, "learning_rate": 8.928503181398776e-06, "loss": 0.7503, "step": 4591 }, { "epoch": 0.23599547743858568, "grad_norm": 1.0847516059875488, "learning_rate": 8.927988288862178e-06, "loss": 0.8394, "step": 4592 }, { "epoch": 0.23604687018193032, "grad_norm": 1.1723809242248535, "learning_rate": 8.927473287497234e-06, "loss": 0.7766, "step": 4593 }, { "epoch": 0.23609826292527494, "grad_norm": 1.328163743019104, "learning_rate": 8.926958177318212e-06, "loss": 0.7778, "step": 4594 }, { "epoch": 0.2361496556686196, "grad_norm": 1.1360726356506348, "learning_rate": 8.926442958339385e-06, "loss": 0.8347, "step": 4595 }, { "epoch": 0.23620104841196424, "grad_norm": 1.1715898513793945, "learning_rate": 8.925927630575029e-06, "loss": 0.8164, "step": 4596 }, { "epoch": 0.23625244115530886, "grad_norm": 1.2126623392105103, "learning_rate": 8.925412194039418e-06, "loss": 0.8685, "step": 4597 }, { "epoch": 0.2363038338986535, "grad_norm": 0.8496134281158447, "learning_rate": 8.924896648746833e-06, "loss": 0.6945, "step": 4598 }, { "epoch": 0.23635522664199815, "grad_norm": 0.813201904296875, "learning_rate": 8.924380994711562e-06, "loss": 0.696, "step": 4599 }, { "epoch": 0.2364066193853428, "grad_norm": 1.106196641921997, "learning_rate": 8.923865231947886e-06, "loss": 0.8128, "step": 4600 }, { "epoch": 0.23645801212868742, "grad_norm": 1.0879048109054565, "learning_rate": 8.923349360470097e-06, "loss": 0.8075, "step": 4601 }, { "epoch": 0.23650940487203206, "grad_norm": 1.0690749883651733, "learning_rate": 8.92283338029249e-06, "loss": 0.7977, "step": 4602 }, { "epoch": 0.2365607976153767, "grad_norm": 1.0636183023452759, "learning_rate": 8.922317291429359e-06, "loss": 0.7493, "step": 4603 }, { "epoch": 0.23661219035872136, "grad_norm": 1.1910406351089478, "learning_rate": 8.921801093894999e-06, "loss": 0.7833, "step": 4604 }, { "epoch": 0.23666358310206598, "grad_norm": 1.0821884870529175, "learning_rate": 8.921284787703719e-06, "loss": 0.7591, "step": 4605 }, { "epoch": 0.23671497584541062, "grad_norm": 1.050493597984314, "learning_rate": 8.920768372869818e-06, "loss": 0.7265, "step": 4606 }, { "epoch": 0.23676636858875527, "grad_norm": 1.0911407470703125, "learning_rate": 8.920251849407603e-06, "loss": 0.7685, "step": 4607 }, { "epoch": 0.23681776133209992, "grad_norm": 1.0285933017730713, "learning_rate": 8.919735217331391e-06, "loss": 0.7892, "step": 4608 }, { "epoch": 0.23686915407544454, "grad_norm": 1.2337404489517212, "learning_rate": 8.91921847665549e-06, "loss": 0.7928, "step": 4609 }, { "epoch": 0.23692054681878918, "grad_norm": 1.114739179611206, "learning_rate": 8.91870162739422e-06, "loss": 0.8135, "step": 4610 }, { "epoch": 0.23697193956213383, "grad_norm": 1.0755085945129395, "learning_rate": 8.918184669561898e-06, "loss": 0.7435, "step": 4611 }, { "epoch": 0.23702333230547848, "grad_norm": 0.9738936424255371, "learning_rate": 8.917667603172849e-06, "loss": 0.7545, "step": 4612 }, { "epoch": 0.2370747250488231, "grad_norm": 1.1980764865875244, "learning_rate": 8.917150428241396e-06, "loss": 0.8395, "step": 4613 }, { "epoch": 0.23712611779216775, "grad_norm": 1.0453612804412842, "learning_rate": 8.916633144781872e-06, "loss": 0.7529, "step": 4614 }, { "epoch": 0.2371775105355124, "grad_norm": 1.217781662940979, "learning_rate": 8.916115752808606e-06, "loss": 0.7697, "step": 4615 }, { "epoch": 0.237228903278857, "grad_norm": 1.076009750366211, "learning_rate": 8.915598252335932e-06, "loss": 0.7917, "step": 4616 }, { "epoch": 0.23728029602220166, "grad_norm": 0.8455723524093628, "learning_rate": 8.91508064337819e-06, "loss": 0.6762, "step": 4617 }, { "epoch": 0.2373316887655463, "grad_norm": 1.0471481084823608, "learning_rate": 8.914562925949722e-06, "loss": 0.7684, "step": 4618 }, { "epoch": 0.23738308150889095, "grad_norm": 1.0268428325653076, "learning_rate": 8.914045100064867e-06, "loss": 0.7982, "step": 4619 }, { "epoch": 0.23743447425223557, "grad_norm": 1.1538622379302979, "learning_rate": 8.913527165737977e-06, "loss": 0.8277, "step": 4620 }, { "epoch": 0.23748586699558022, "grad_norm": 0.8173934817314148, "learning_rate": 8.913009122983398e-06, "loss": 0.6387, "step": 4621 }, { "epoch": 0.23753725973892487, "grad_norm": 1.1196017265319824, "learning_rate": 8.912490971815484e-06, "loss": 0.8006, "step": 4622 }, { "epoch": 0.2375886524822695, "grad_norm": 0.7456162571907043, "learning_rate": 8.911972712248591e-06, "loss": 0.7491, "step": 4623 }, { "epoch": 0.23764004522561413, "grad_norm": 1.1029239892959595, "learning_rate": 8.911454344297079e-06, "loss": 0.7641, "step": 4624 }, { "epoch": 0.23769143796895878, "grad_norm": 1.128382921218872, "learning_rate": 8.910935867975309e-06, "loss": 0.7897, "step": 4625 }, { "epoch": 0.23774283071230343, "grad_norm": 1.164041519165039, "learning_rate": 8.910417283297644e-06, "loss": 0.8348, "step": 4626 }, { "epoch": 0.23779422345564807, "grad_norm": 1.1257448196411133, "learning_rate": 8.909898590278454e-06, "loss": 0.8082, "step": 4627 }, { "epoch": 0.2378456161989927, "grad_norm": 1.1597113609313965, "learning_rate": 8.909379788932109e-06, "loss": 0.7304, "step": 4628 }, { "epoch": 0.23789700894233734, "grad_norm": 1.0364222526550293, "learning_rate": 8.908860879272984e-06, "loss": 0.7434, "step": 4629 }, { "epoch": 0.237948401685682, "grad_norm": 1.1011673212051392, "learning_rate": 8.908341861315455e-06, "loss": 0.7693, "step": 4630 }, { "epoch": 0.23799979442902663, "grad_norm": 1.011147379875183, "learning_rate": 8.9078227350739e-06, "loss": 0.6551, "step": 4631 }, { "epoch": 0.23805118717237125, "grad_norm": 1.146329641342163, "learning_rate": 8.907303500562706e-06, "loss": 0.8192, "step": 4632 }, { "epoch": 0.2381025799157159, "grad_norm": 1.1481460332870483, "learning_rate": 8.906784157796255e-06, "loss": 0.8728, "step": 4633 }, { "epoch": 0.23815397265906055, "grad_norm": 1.1370223760604858, "learning_rate": 8.906264706788938e-06, "loss": 0.8327, "step": 4634 }, { "epoch": 0.23820536540240517, "grad_norm": 1.2424817085266113, "learning_rate": 8.905745147555145e-06, "loss": 0.8169, "step": 4635 }, { "epoch": 0.23825675814574981, "grad_norm": 1.0799906253814697, "learning_rate": 8.905225480109273e-06, "loss": 0.7915, "step": 4636 }, { "epoch": 0.23830815088909446, "grad_norm": 0.9839283227920532, "learning_rate": 8.90470570446572e-06, "loss": 0.6992, "step": 4637 }, { "epoch": 0.2383595436324391, "grad_norm": 1.1425650119781494, "learning_rate": 8.904185820638883e-06, "loss": 0.805, "step": 4638 }, { "epoch": 0.23841093637578373, "grad_norm": 1.0785012245178223, "learning_rate": 8.903665828643171e-06, "loss": 0.7964, "step": 4639 }, { "epoch": 0.23846232911912837, "grad_norm": 1.0946489572525024, "learning_rate": 8.903145728492986e-06, "loss": 0.7881, "step": 4640 }, { "epoch": 0.23851372186247302, "grad_norm": 1.0840612649917603, "learning_rate": 8.902625520202742e-06, "loss": 0.6918, "step": 4641 }, { "epoch": 0.23856511460581767, "grad_norm": 1.1396162509918213, "learning_rate": 8.90210520378685e-06, "loss": 0.7199, "step": 4642 }, { "epoch": 0.2386165073491623, "grad_norm": 1.1419869661331177, "learning_rate": 8.901584779259724e-06, "loss": 0.7596, "step": 4643 }, { "epoch": 0.23866790009250693, "grad_norm": 1.1918631792068481, "learning_rate": 8.901064246635786e-06, "loss": 0.7865, "step": 4644 }, { "epoch": 0.23871929283585158, "grad_norm": 1.1053193807601929, "learning_rate": 8.900543605929457e-06, "loss": 0.8104, "step": 4645 }, { "epoch": 0.23877068557919623, "grad_norm": 1.0856996774673462, "learning_rate": 8.900022857155162e-06, "loss": 0.7859, "step": 4646 }, { "epoch": 0.23882207832254085, "grad_norm": 1.0968729257583618, "learning_rate": 8.899502000327326e-06, "loss": 0.8558, "step": 4647 }, { "epoch": 0.2388734710658855, "grad_norm": 1.1180557012557983, "learning_rate": 8.898981035460384e-06, "loss": 0.8288, "step": 4648 }, { "epoch": 0.23892486380923014, "grad_norm": 1.1450949907302856, "learning_rate": 8.898459962568766e-06, "loss": 0.816, "step": 4649 }, { "epoch": 0.2389762565525748, "grad_norm": 1.1149415969848633, "learning_rate": 8.89793878166691e-06, "loss": 0.8358, "step": 4650 }, { "epoch": 0.2390276492959194, "grad_norm": 1.0573062896728516, "learning_rate": 8.897417492769258e-06, "loss": 0.8233, "step": 4651 }, { "epoch": 0.23907904203926406, "grad_norm": 1.0443198680877686, "learning_rate": 8.896896095890249e-06, "loss": 0.7348, "step": 4652 }, { "epoch": 0.2391304347826087, "grad_norm": 0.9232312440872192, "learning_rate": 8.896374591044333e-06, "loss": 0.7323, "step": 4653 }, { "epoch": 0.23918182752595332, "grad_norm": 1.1453986167907715, "learning_rate": 8.895852978245953e-06, "loss": 0.7608, "step": 4654 }, { "epoch": 0.23923322026929797, "grad_norm": 1.0893425941467285, "learning_rate": 8.895331257509567e-06, "loss": 0.7375, "step": 4655 }, { "epoch": 0.23928461301264262, "grad_norm": 0.7401126027107239, "learning_rate": 8.894809428849626e-06, "loss": 0.6899, "step": 4656 }, { "epoch": 0.23933600575598726, "grad_norm": 0.7751044034957886, "learning_rate": 8.89428749228059e-06, "loss": 0.6604, "step": 4657 }, { "epoch": 0.23938739849933188, "grad_norm": 1.0986485481262207, "learning_rate": 8.893765447816916e-06, "loss": 0.7595, "step": 4658 }, { "epoch": 0.23943879124267653, "grad_norm": 0.9008376598358154, "learning_rate": 8.893243295473074e-06, "loss": 0.7473, "step": 4659 }, { "epoch": 0.23949018398602118, "grad_norm": 1.1359559297561646, "learning_rate": 8.892721035263523e-06, "loss": 0.7984, "step": 4660 }, { "epoch": 0.23954157672936582, "grad_norm": 1.1289680004119873, "learning_rate": 8.892198667202737e-06, "loss": 0.7515, "step": 4661 }, { "epoch": 0.23959296947271044, "grad_norm": 1.2171883583068848, "learning_rate": 8.891676191305189e-06, "loss": 0.832, "step": 4662 }, { "epoch": 0.2396443622160551, "grad_norm": 0.7863712906837463, "learning_rate": 8.891153607585353e-06, "loss": 0.6769, "step": 4663 }, { "epoch": 0.23969575495939974, "grad_norm": 0.7765997648239136, "learning_rate": 8.89063091605771e-06, "loss": 0.7329, "step": 4664 }, { "epoch": 0.23974714770274438, "grad_norm": 1.1068962812423706, "learning_rate": 8.890108116736737e-06, "loss": 0.8031, "step": 4665 }, { "epoch": 0.239798540446089, "grad_norm": 0.8460896015167236, "learning_rate": 8.889585209636923e-06, "loss": 0.6946, "step": 4666 }, { "epoch": 0.23984993318943365, "grad_norm": 1.1296321153640747, "learning_rate": 8.889062194772755e-06, "loss": 0.8177, "step": 4667 }, { "epoch": 0.2399013259327783, "grad_norm": 0.9098497033119202, "learning_rate": 8.888539072158725e-06, "loss": 0.7137, "step": 4668 }, { "epoch": 0.23995271867612294, "grad_norm": 0.872864305973053, "learning_rate": 8.88801584180932e-06, "loss": 0.72, "step": 4669 }, { "epoch": 0.24000411141946756, "grad_norm": 1.0843358039855957, "learning_rate": 8.887492503739043e-06, "loss": 0.799, "step": 4670 }, { "epoch": 0.2400555041628122, "grad_norm": 1.0153932571411133, "learning_rate": 8.886969057962392e-06, "loss": 0.7932, "step": 4671 }, { "epoch": 0.24010689690615686, "grad_norm": 1.439363718032837, "learning_rate": 8.886445504493867e-06, "loss": 0.7693, "step": 4672 }, { "epoch": 0.24015828964950148, "grad_norm": 0.7604982852935791, "learning_rate": 8.885921843347976e-06, "loss": 0.6512, "step": 4673 }, { "epoch": 0.24020968239284612, "grad_norm": 1.1090489625930786, "learning_rate": 8.885398074539229e-06, "loss": 0.7521, "step": 4674 }, { "epoch": 0.24026107513619077, "grad_norm": 1.0481171607971191, "learning_rate": 8.884874198082133e-06, "loss": 0.7501, "step": 4675 }, { "epoch": 0.24031246787953542, "grad_norm": 1.0286756753921509, "learning_rate": 8.884350213991206e-06, "loss": 0.7358, "step": 4676 }, { "epoch": 0.24036386062288004, "grad_norm": 1.0527909994125366, "learning_rate": 8.883826122280963e-06, "loss": 0.7942, "step": 4677 }, { "epoch": 0.24041525336622468, "grad_norm": 1.1009913682937622, "learning_rate": 8.883301922965929e-06, "loss": 0.8168, "step": 4678 }, { "epoch": 0.24046664610956933, "grad_norm": 1.1306523084640503, "learning_rate": 8.882777616060621e-06, "loss": 0.8462, "step": 4679 }, { "epoch": 0.24051803885291398, "grad_norm": 1.6010468006134033, "learning_rate": 8.88225320157957e-06, "loss": 0.7841, "step": 4680 }, { "epoch": 0.2405694315962586, "grad_norm": 0.7657957673072815, "learning_rate": 8.881728679537303e-06, "loss": 0.7034, "step": 4681 }, { "epoch": 0.24062082433960325, "grad_norm": 1.0416008234024048, "learning_rate": 8.881204049948355e-06, "loss": 0.7756, "step": 4682 }, { "epoch": 0.2406722170829479, "grad_norm": 1.0818085670471191, "learning_rate": 8.880679312827259e-06, "loss": 0.7797, "step": 4683 }, { "epoch": 0.24072360982629254, "grad_norm": 1.025872826576233, "learning_rate": 8.880154468188552e-06, "loss": 0.7524, "step": 4684 }, { "epoch": 0.24077500256963716, "grad_norm": 1.0807186365127563, "learning_rate": 8.879629516046778e-06, "loss": 0.816, "step": 4685 }, { "epoch": 0.2408263953129818, "grad_norm": 1.1045392751693726, "learning_rate": 8.87910445641648e-06, "loss": 0.7695, "step": 4686 }, { "epoch": 0.24087778805632645, "grad_norm": 1.1010226011276245, "learning_rate": 8.878579289312208e-06, "loss": 0.7887, "step": 4687 }, { "epoch": 0.2409291807996711, "grad_norm": 1.0953706502914429, "learning_rate": 8.878054014748507e-06, "loss": 0.7692, "step": 4688 }, { "epoch": 0.24098057354301572, "grad_norm": 1.2257829904556274, "learning_rate": 8.877528632739936e-06, "loss": 0.7809, "step": 4689 }, { "epoch": 0.24103196628636037, "grad_norm": 0.9221262335777283, "learning_rate": 8.877003143301046e-06, "loss": 0.7037, "step": 4690 }, { "epoch": 0.241083359029705, "grad_norm": 0.824110746383667, "learning_rate": 8.876477546446398e-06, "loss": 0.7257, "step": 4691 }, { "epoch": 0.24113475177304963, "grad_norm": 1.0780715942382812, "learning_rate": 8.875951842190555e-06, "loss": 0.7974, "step": 4692 }, { "epoch": 0.24118614451639428, "grad_norm": 1.1381309032440186, "learning_rate": 8.875426030548082e-06, "loss": 0.8029, "step": 4693 }, { "epoch": 0.24123753725973893, "grad_norm": 1.121325135231018, "learning_rate": 8.874900111533548e-06, "loss": 0.7681, "step": 4694 }, { "epoch": 0.24128893000308357, "grad_norm": 1.0619456768035889, "learning_rate": 8.87437408516152e-06, "loss": 0.727, "step": 4695 }, { "epoch": 0.2413403227464282, "grad_norm": 1.1301515102386475, "learning_rate": 8.873847951446577e-06, "loss": 0.806, "step": 4696 }, { "epoch": 0.24139171548977284, "grad_norm": 1.1236293315887451, "learning_rate": 8.873321710403291e-06, "loss": 0.8116, "step": 4697 }, { "epoch": 0.2414431082331175, "grad_norm": 1.0581533908843994, "learning_rate": 8.872795362046246e-06, "loss": 0.7769, "step": 4698 }, { "epoch": 0.24149450097646213, "grad_norm": 1.2560044527053833, "learning_rate": 8.872268906390025e-06, "loss": 0.8299, "step": 4699 }, { "epoch": 0.24154589371980675, "grad_norm": 0.7762824892997742, "learning_rate": 8.87174234344921e-06, "loss": 0.6805, "step": 4700 }, { "epoch": 0.2415972864631514, "grad_norm": 1.0945128202438354, "learning_rate": 8.871215673238395e-06, "loss": 0.7721, "step": 4701 }, { "epoch": 0.24164867920649605, "grad_norm": 1.2013400793075562, "learning_rate": 8.870688895772168e-06, "loss": 0.754, "step": 4702 }, { "epoch": 0.2417000719498407, "grad_norm": 0.7914587259292603, "learning_rate": 8.870162011065125e-06, "loss": 0.7172, "step": 4703 }, { "epoch": 0.24175146469318531, "grad_norm": 1.1099352836608887, "learning_rate": 8.869635019131863e-06, "loss": 0.7252, "step": 4704 }, { "epoch": 0.24180285743652996, "grad_norm": 1.04390549659729, "learning_rate": 8.869107919986986e-06, "loss": 0.7271, "step": 4705 }, { "epoch": 0.2418542501798746, "grad_norm": 1.2175709009170532, "learning_rate": 8.868580713645094e-06, "loss": 0.7416, "step": 4706 }, { "epoch": 0.24190564292321926, "grad_norm": 1.137791395187378, "learning_rate": 8.868053400120796e-06, "loss": 0.7982, "step": 4707 }, { "epoch": 0.24195703566656387, "grad_norm": 1.1600502729415894, "learning_rate": 8.8675259794287e-06, "loss": 0.8103, "step": 4708 }, { "epoch": 0.24200842840990852, "grad_norm": 1.1336969137191772, "learning_rate": 8.866998451583418e-06, "loss": 0.7705, "step": 4709 }, { "epoch": 0.24205982115325317, "grad_norm": 1.0881843566894531, "learning_rate": 8.866470816599569e-06, "loss": 0.7607, "step": 4710 }, { "epoch": 0.2421112138965978, "grad_norm": 1.1276473999023438, "learning_rate": 8.865943074491769e-06, "loss": 0.8072, "step": 4711 }, { "epoch": 0.24216260663994243, "grad_norm": 1.1565089225769043, "learning_rate": 8.86541522527464e-06, "loss": 0.773, "step": 4712 }, { "epoch": 0.24221399938328708, "grad_norm": 1.0687453746795654, "learning_rate": 8.864887268962807e-06, "loss": 0.7429, "step": 4713 }, { "epoch": 0.24226539212663173, "grad_norm": 0.9930742383003235, "learning_rate": 8.864359205570899e-06, "loss": 0.7583, "step": 4714 }, { "epoch": 0.24231678486997635, "grad_norm": 0.9466663599014282, "learning_rate": 8.863831035113542e-06, "loss": 0.7272, "step": 4715 }, { "epoch": 0.242368177613321, "grad_norm": 1.1421409845352173, "learning_rate": 8.863302757605373e-06, "loss": 0.7373, "step": 4716 }, { "epoch": 0.24241957035666564, "grad_norm": 1.1920064687728882, "learning_rate": 8.862774373061026e-06, "loss": 0.7543, "step": 4717 }, { "epoch": 0.2424709631000103, "grad_norm": 1.0750113725662231, "learning_rate": 8.862245881495144e-06, "loss": 0.8017, "step": 4718 }, { "epoch": 0.2425223558433549, "grad_norm": 0.865716814994812, "learning_rate": 8.861717282922369e-06, "loss": 0.7103, "step": 4719 }, { "epoch": 0.24257374858669956, "grad_norm": 1.1474857330322266, "learning_rate": 8.86118857735734e-06, "loss": 0.7498, "step": 4720 }, { "epoch": 0.2426251413300442, "grad_norm": 1.1631758213043213, "learning_rate": 8.860659764814713e-06, "loss": 0.7864, "step": 4721 }, { "epoch": 0.24267653407338885, "grad_norm": 1.0661959648132324, "learning_rate": 8.860130845309134e-06, "loss": 0.7952, "step": 4722 }, { "epoch": 0.24272792681673347, "grad_norm": 1.179243803024292, "learning_rate": 8.85960181885526e-06, "loss": 0.7598, "step": 4723 }, { "epoch": 0.24277931956007812, "grad_norm": 1.128421664237976, "learning_rate": 8.859072685467746e-06, "loss": 0.7671, "step": 4724 }, { "epoch": 0.24283071230342276, "grad_norm": 1.0811216831207275, "learning_rate": 8.858543445161256e-06, "loss": 0.7783, "step": 4725 }, { "epoch": 0.2428821050467674, "grad_norm": 1.0029337406158447, "learning_rate": 8.858014097950448e-06, "loss": 0.7492, "step": 4726 }, { "epoch": 0.24293349779011203, "grad_norm": 1.0179736614227295, "learning_rate": 8.857484643849991e-06, "loss": 0.7444, "step": 4727 }, { "epoch": 0.24298489053345668, "grad_norm": 0.7943317890167236, "learning_rate": 8.856955082874554e-06, "loss": 0.7022, "step": 4728 }, { "epoch": 0.24303628327680132, "grad_norm": 1.0310739278793335, "learning_rate": 8.856425415038808e-06, "loss": 0.7725, "step": 4729 }, { "epoch": 0.24308767602014594, "grad_norm": 0.7097966074943542, "learning_rate": 8.855895640357429e-06, "loss": 0.6781, "step": 4730 }, { "epoch": 0.2431390687634906, "grad_norm": 0.7207580208778381, "learning_rate": 8.855365758845092e-06, "loss": 0.7311, "step": 4731 }, { "epoch": 0.24319046150683524, "grad_norm": 1.0366616249084473, "learning_rate": 8.854835770516483e-06, "loss": 0.7837, "step": 4732 }, { "epoch": 0.24324185425017988, "grad_norm": 0.7645068764686584, "learning_rate": 8.85430567538628e-06, "loss": 0.7056, "step": 4733 }, { "epoch": 0.2432932469935245, "grad_norm": 1.1128915548324585, "learning_rate": 8.853775473469174e-06, "loss": 0.8017, "step": 4734 }, { "epoch": 0.24334463973686915, "grad_norm": 1.1075836420059204, "learning_rate": 8.853245164779853e-06, "loss": 0.8206, "step": 4735 }, { "epoch": 0.2433960324802138, "grad_norm": 1.0938459634780884, "learning_rate": 8.852714749333008e-06, "loss": 0.7321, "step": 4736 }, { "epoch": 0.24344742522355844, "grad_norm": 1.0838030576705933, "learning_rate": 8.85218422714334e-06, "loss": 0.8294, "step": 4737 }, { "epoch": 0.24349881796690306, "grad_norm": 1.0463584661483765, "learning_rate": 8.851653598225542e-06, "loss": 0.7946, "step": 4738 }, { "epoch": 0.2435502107102477, "grad_norm": 1.056262493133545, "learning_rate": 8.851122862594319e-06, "loss": 0.7492, "step": 4739 }, { "epoch": 0.24360160345359236, "grad_norm": 1.1224285364151, "learning_rate": 8.850592020264373e-06, "loss": 0.8422, "step": 4740 }, { "epoch": 0.243652996196937, "grad_norm": 1.0783976316452026, "learning_rate": 8.850061071250413e-06, "loss": 0.7924, "step": 4741 }, { "epoch": 0.24370438894028162, "grad_norm": 1.1357896327972412, "learning_rate": 8.849530015567149e-06, "loss": 0.7784, "step": 4742 }, { "epoch": 0.24375578168362627, "grad_norm": 1.089284062385559, "learning_rate": 8.848998853229294e-06, "loss": 0.7429, "step": 4743 }, { "epoch": 0.24380717442697092, "grad_norm": 1.0761910676956177, "learning_rate": 8.848467584251563e-06, "loss": 0.7396, "step": 4744 }, { "epoch": 0.24385856717031554, "grad_norm": 0.8542054295539856, "learning_rate": 8.84793620864868e-06, "loss": 0.6922, "step": 4745 }, { "epoch": 0.24390995991366018, "grad_norm": 1.1290571689605713, "learning_rate": 8.847404726435363e-06, "loss": 0.6948, "step": 4746 }, { "epoch": 0.24396135265700483, "grad_norm": 1.0668021440505981, "learning_rate": 8.846873137626338e-06, "loss": 0.7665, "step": 4747 }, { "epoch": 0.24401274540034948, "grad_norm": 1.0984876155853271, "learning_rate": 8.846341442236333e-06, "loss": 0.8261, "step": 4748 }, { "epoch": 0.2440641381436941, "grad_norm": 1.0564554929733276, "learning_rate": 8.84580964028008e-06, "loss": 0.7989, "step": 4749 }, { "epoch": 0.24411553088703875, "grad_norm": 1.2301905155181885, "learning_rate": 8.845277731772312e-06, "loss": 0.7858, "step": 4750 }, { "epoch": 0.2441669236303834, "grad_norm": 0.7716259956359863, "learning_rate": 8.844745716727767e-06, "loss": 0.6789, "step": 4751 }, { "epoch": 0.24421831637372804, "grad_norm": 1.4047126770019531, "learning_rate": 8.844213595161186e-06, "loss": 0.7874, "step": 4752 }, { "epoch": 0.24426970911707266, "grad_norm": 1.187303900718689, "learning_rate": 8.84368136708731e-06, "loss": 0.7958, "step": 4753 }, { "epoch": 0.2443211018604173, "grad_norm": 1.1583155393600464, "learning_rate": 8.843149032520884e-06, "loss": 0.8104, "step": 4754 }, { "epoch": 0.24437249460376195, "grad_norm": 0.7943881750106812, "learning_rate": 8.842616591476659e-06, "loss": 0.6914, "step": 4755 }, { "epoch": 0.2444238873471066, "grad_norm": 1.0763680934906006, "learning_rate": 8.842084043969383e-06, "loss": 0.7844, "step": 4756 }, { "epoch": 0.24447528009045122, "grad_norm": 1.150385856628418, "learning_rate": 8.841551390013816e-06, "loss": 0.78, "step": 4757 }, { "epoch": 0.24452667283379587, "grad_norm": 1.1021569967269897, "learning_rate": 8.841018629624714e-06, "loss": 0.7522, "step": 4758 }, { "epoch": 0.2445780655771405, "grad_norm": 1.145723581314087, "learning_rate": 8.840485762816835e-06, "loss": 0.8086, "step": 4759 }, { "epoch": 0.24462945832048516, "grad_norm": 0.7842158079147339, "learning_rate": 8.839952789604946e-06, "loss": 0.7585, "step": 4760 }, { "epoch": 0.24468085106382978, "grad_norm": 1.1114294528961182, "learning_rate": 8.83941971000381e-06, "loss": 0.7974, "step": 4761 }, { "epoch": 0.24473224380717443, "grad_norm": 0.7318762540817261, "learning_rate": 8.838886524028198e-06, "loss": 0.7292, "step": 4762 }, { "epoch": 0.24478363655051907, "grad_norm": 0.7385783791542053, "learning_rate": 8.838353231692884e-06, "loss": 0.6978, "step": 4763 }, { "epoch": 0.2448350292938637, "grad_norm": 1.1245821714401245, "learning_rate": 8.837819833012642e-06, "loss": 0.7756, "step": 4764 }, { "epoch": 0.24488642203720834, "grad_norm": 1.1304298639297485, "learning_rate": 8.83728632800225e-06, "loss": 0.7272, "step": 4765 }, { "epoch": 0.244937814780553, "grad_norm": 0.9192416071891785, "learning_rate": 8.83675271667649e-06, "loss": 0.7087, "step": 4766 }, { "epoch": 0.24498920752389763, "grad_norm": 0.8372642397880554, "learning_rate": 8.836218999050143e-06, "loss": 0.7281, "step": 4767 }, { "epoch": 0.24504060026724225, "grad_norm": 1.4684903621673584, "learning_rate": 8.835685175138e-06, "loss": 0.8055, "step": 4768 }, { "epoch": 0.2450919930105869, "grad_norm": 1.075905442237854, "learning_rate": 8.83515124495485e-06, "loss": 0.7624, "step": 4769 }, { "epoch": 0.24514338575393155, "grad_norm": 0.7970883846282959, "learning_rate": 8.834617208515486e-06, "loss": 0.7161, "step": 4770 }, { "epoch": 0.2451947784972762, "grad_norm": 1.1217435598373413, "learning_rate": 8.834083065834702e-06, "loss": 0.8332, "step": 4771 }, { "epoch": 0.2452461712406208, "grad_norm": 0.905123233795166, "learning_rate": 8.8335488169273e-06, "loss": 0.6922, "step": 4772 }, { "epoch": 0.24529756398396546, "grad_norm": 1.1003978252410889, "learning_rate": 8.83301446180808e-06, "loss": 0.7984, "step": 4773 }, { "epoch": 0.2453489567273101, "grad_norm": 1.2487705945968628, "learning_rate": 8.832480000491847e-06, "loss": 0.7346, "step": 4774 }, { "epoch": 0.24540034947065476, "grad_norm": 1.1054446697235107, "learning_rate": 8.831945432993411e-06, "loss": 0.8237, "step": 4775 }, { "epoch": 0.24545174221399937, "grad_norm": 1.1106668710708618, "learning_rate": 8.831410759327579e-06, "loss": 0.7999, "step": 4776 }, { "epoch": 0.24550313495734402, "grad_norm": 0.7692917585372925, "learning_rate": 8.830875979509165e-06, "loss": 0.6842, "step": 4777 }, { "epoch": 0.24555452770068867, "grad_norm": 1.0115216970443726, "learning_rate": 8.830341093552988e-06, "loss": 0.7095, "step": 4778 }, { "epoch": 0.24560592044403332, "grad_norm": 0.8051899075508118, "learning_rate": 8.829806101473866e-06, "loss": 0.7366, "step": 4779 }, { "epoch": 0.24565731318737793, "grad_norm": 0.8092594742774963, "learning_rate": 8.829271003286621e-06, "loss": 0.7001, "step": 4780 }, { "epoch": 0.24570870593072258, "grad_norm": 1.1987711191177368, "learning_rate": 8.82873579900608e-06, "loss": 0.8187, "step": 4781 }, { "epoch": 0.24576009867406723, "grad_norm": 1.0493359565734863, "learning_rate": 8.82820048864707e-06, "loss": 0.7593, "step": 4782 }, { "epoch": 0.24581149141741185, "grad_norm": 0.8839566707611084, "learning_rate": 8.827665072224422e-06, "loss": 0.7042, "step": 4783 }, { "epoch": 0.2458628841607565, "grad_norm": 1.1349273920059204, "learning_rate": 8.82712954975297e-06, "loss": 0.8006, "step": 4784 }, { "epoch": 0.24591427690410114, "grad_norm": 1.0763131380081177, "learning_rate": 8.826593921247554e-06, "loss": 0.817, "step": 4785 }, { "epoch": 0.2459656696474458, "grad_norm": 1.0929253101348877, "learning_rate": 8.82605818672301e-06, "loss": 0.8018, "step": 4786 }, { "epoch": 0.2460170623907904, "grad_norm": 0.9399827122688293, "learning_rate": 8.825522346194184e-06, "loss": 0.7326, "step": 4787 }, { "epoch": 0.24606845513413506, "grad_norm": 1.0634305477142334, "learning_rate": 8.82498639967592e-06, "loss": 0.7901, "step": 4788 }, { "epoch": 0.2461198478774797, "grad_norm": 1.214478850364685, "learning_rate": 8.824450347183067e-06, "loss": 0.7627, "step": 4789 }, { "epoch": 0.24617124062082435, "grad_norm": 0.9317747950553894, "learning_rate": 8.82391418873048e-06, "loss": 0.7237, "step": 4790 }, { "epoch": 0.24622263336416897, "grad_norm": 1.0761404037475586, "learning_rate": 8.82337792433301e-06, "loss": 0.7809, "step": 4791 }, { "epoch": 0.24627402610751362, "grad_norm": 1.033531665802002, "learning_rate": 8.822841554005514e-06, "loss": 0.7695, "step": 4792 }, { "epoch": 0.24632541885085826, "grad_norm": 1.172295093536377, "learning_rate": 8.822305077762856e-06, "loss": 0.8267, "step": 4793 }, { "epoch": 0.2463768115942029, "grad_norm": 1.1045151948928833, "learning_rate": 8.821768495619897e-06, "loss": 0.775, "step": 4794 }, { "epoch": 0.24642820433754753, "grad_norm": 0.7222758531570435, "learning_rate": 8.821231807591508e-06, "loss": 0.7355, "step": 4795 }, { "epoch": 0.24647959708089218, "grad_norm": 1.0813877582550049, "learning_rate": 8.82069501369255e-06, "loss": 0.7448, "step": 4796 }, { "epoch": 0.24653098982423682, "grad_norm": 1.094832181930542, "learning_rate": 8.820158113937904e-06, "loss": 0.7615, "step": 4797 }, { "epoch": 0.24658238256758147, "grad_norm": 1.0937809944152832, "learning_rate": 8.81962110834244e-06, "loss": 0.7703, "step": 4798 }, { "epoch": 0.2466337753109261, "grad_norm": 1.0613828897476196, "learning_rate": 8.819083996921038e-06, "loss": 0.7822, "step": 4799 }, { "epoch": 0.24668516805427074, "grad_norm": 1.0581883192062378, "learning_rate": 8.81854677968858e-06, "loss": 0.7135, "step": 4800 }, { "epoch": 0.24673656079761538, "grad_norm": 1.125534176826477, "learning_rate": 8.818009456659947e-06, "loss": 0.7827, "step": 4801 }, { "epoch": 0.24678795354096, "grad_norm": 1.1007810831069946, "learning_rate": 8.817472027850026e-06, "loss": 0.7705, "step": 4802 }, { "epoch": 0.24683934628430465, "grad_norm": 0.7310556769371033, "learning_rate": 8.816934493273713e-06, "loss": 0.7016, "step": 4803 }, { "epoch": 0.2468907390276493, "grad_norm": 1.0512195825576782, "learning_rate": 8.816396852945896e-06, "loss": 0.7838, "step": 4804 }, { "epoch": 0.24694213177099394, "grad_norm": 1.0588195323944092, "learning_rate": 8.815859106881471e-06, "loss": 0.7194, "step": 4805 }, { "epoch": 0.24699352451433856, "grad_norm": 1.0114433765411377, "learning_rate": 8.815321255095337e-06, "loss": 0.7229, "step": 4806 }, { "epoch": 0.2470449172576832, "grad_norm": 0.7256984710693359, "learning_rate": 8.814783297602394e-06, "loss": 0.7246, "step": 4807 }, { "epoch": 0.24709631000102786, "grad_norm": 1.1471654176712036, "learning_rate": 8.814245234417551e-06, "loss": 0.8076, "step": 4808 }, { "epoch": 0.2471477027443725, "grad_norm": 0.7653288245201111, "learning_rate": 8.813707065555713e-06, "loss": 0.7048, "step": 4809 }, { "epoch": 0.24719909548771712, "grad_norm": 1.0900944471359253, "learning_rate": 8.81316879103179e-06, "loss": 0.7806, "step": 4810 }, { "epoch": 0.24725048823106177, "grad_norm": 0.85138338804245, "learning_rate": 8.812630410860697e-06, "loss": 0.7298, "step": 4811 }, { "epoch": 0.24730188097440642, "grad_norm": 0.7847518920898438, "learning_rate": 8.812091925057347e-06, "loss": 0.6982, "step": 4812 }, { "epoch": 0.24735327371775107, "grad_norm": 1.2386887073516846, "learning_rate": 8.811553333636663e-06, "loss": 0.8292, "step": 4813 }, { "epoch": 0.24740466646109568, "grad_norm": 1.2640650272369385, "learning_rate": 8.811014636613564e-06, "loss": 0.768, "step": 4814 }, { "epoch": 0.24745605920444033, "grad_norm": 1.6772304773330688, "learning_rate": 8.810475834002976e-06, "loss": 0.7881, "step": 4815 }, { "epoch": 0.24750745194778498, "grad_norm": 1.0697636604309082, "learning_rate": 8.80993692581983e-06, "loss": 0.7781, "step": 4816 }, { "epoch": 0.24755884469112963, "grad_norm": 1.0476969480514526, "learning_rate": 8.809397912079054e-06, "loss": 0.7716, "step": 4817 }, { "epoch": 0.24761023743447425, "grad_norm": 0.9056881666183472, "learning_rate": 8.808858792795581e-06, "loss": 0.7223, "step": 4818 }, { "epoch": 0.2476616301778189, "grad_norm": 0.8230631351470947, "learning_rate": 8.808319567984348e-06, "loss": 0.6915, "step": 4819 }, { "epoch": 0.24771302292116354, "grad_norm": 1.1733490228652954, "learning_rate": 8.8077802376603e-06, "loss": 0.7341, "step": 4820 }, { "epoch": 0.24776441566450816, "grad_norm": 1.1157865524291992, "learning_rate": 8.807240801838373e-06, "loss": 0.7713, "step": 4821 }, { "epoch": 0.2478158084078528, "grad_norm": 1.090049386024475, "learning_rate": 8.806701260533514e-06, "loss": 0.7891, "step": 4822 }, { "epoch": 0.24786720115119745, "grad_norm": 1.123653531074524, "learning_rate": 8.806161613760674e-06, "loss": 0.753, "step": 4823 }, { "epoch": 0.2479185938945421, "grad_norm": 1.1349486112594604, "learning_rate": 8.805621861534802e-06, "loss": 0.7383, "step": 4824 }, { "epoch": 0.24796998663788672, "grad_norm": 1.1102007627487183, "learning_rate": 8.805082003870851e-06, "loss": 0.7909, "step": 4825 }, { "epoch": 0.24802137938123137, "grad_norm": 1.1651266813278198, "learning_rate": 8.804542040783783e-06, "loss": 0.8268, "step": 4826 }, { "epoch": 0.248072772124576, "grad_norm": 1.107848048210144, "learning_rate": 8.804001972288554e-06, "loss": 0.7421, "step": 4827 }, { "epoch": 0.24812416486792066, "grad_norm": 1.1189789772033691, "learning_rate": 8.80346179840013e-06, "loss": 0.7914, "step": 4828 }, { "epoch": 0.24817555761126528, "grad_norm": 1.0694493055343628, "learning_rate": 8.802921519133475e-06, "loss": 0.8046, "step": 4829 }, { "epoch": 0.24822695035460993, "grad_norm": 1.2083418369293213, "learning_rate": 8.802381134503559e-06, "loss": 0.769, "step": 4830 }, { "epoch": 0.24827834309795457, "grad_norm": 1.0982270240783691, "learning_rate": 8.801840644525352e-06, "loss": 0.7426, "step": 4831 }, { "epoch": 0.24832973584129922, "grad_norm": 0.8952498435974121, "learning_rate": 8.80130004921383e-06, "loss": 0.7044, "step": 4832 }, { "epoch": 0.24838112858464384, "grad_norm": 1.2216222286224365, "learning_rate": 8.800759348583972e-06, "loss": 0.7719, "step": 4833 }, { "epoch": 0.2484325213279885, "grad_norm": 1.1027307510375977, "learning_rate": 8.800218542650757e-06, "loss": 0.8374, "step": 4834 }, { "epoch": 0.24848391407133313, "grad_norm": 1.131588101387024, "learning_rate": 8.79967763142917e-06, "loss": 0.8336, "step": 4835 }, { "epoch": 0.24853530681467778, "grad_norm": 1.0985100269317627, "learning_rate": 8.799136614934192e-06, "loss": 0.7432, "step": 4836 }, { "epoch": 0.2485866995580224, "grad_norm": 1.1121591329574585, "learning_rate": 8.798595493180819e-06, "loss": 0.8282, "step": 4837 }, { "epoch": 0.24863809230136705, "grad_norm": 1.09903085231781, "learning_rate": 8.798054266184041e-06, "loss": 0.7997, "step": 4838 }, { "epoch": 0.2486894850447117, "grad_norm": 1.0357295274734497, "learning_rate": 8.797512933958853e-06, "loss": 0.7785, "step": 4839 }, { "epoch": 0.2487408777880563, "grad_norm": 1.0244630575180054, "learning_rate": 8.796971496520252e-06, "loss": 0.7668, "step": 4840 }, { "epoch": 0.24879227053140096, "grad_norm": 1.065058946609497, "learning_rate": 8.79642995388324e-06, "loss": 0.7434, "step": 4841 }, { "epoch": 0.2488436632747456, "grad_norm": 1.043064832687378, "learning_rate": 8.795888306062823e-06, "loss": 0.7633, "step": 4842 }, { "epoch": 0.24889505601809025, "grad_norm": 1.1005511283874512, "learning_rate": 8.795346553074005e-06, "loss": 0.8141, "step": 4843 }, { "epoch": 0.24894644876143487, "grad_norm": 1.0140762329101562, "learning_rate": 8.794804694931795e-06, "loss": 0.7549, "step": 4844 }, { "epoch": 0.24899784150477952, "grad_norm": 1.1355570554733276, "learning_rate": 8.79426273165121e-06, "loss": 0.8015, "step": 4845 }, { "epoch": 0.24904923424812417, "grad_norm": 1.103007435798645, "learning_rate": 8.793720663247259e-06, "loss": 0.8372, "step": 4846 }, { "epoch": 0.24910062699146882, "grad_norm": 1.1142619848251343, "learning_rate": 8.793178489734966e-06, "loss": 0.8508, "step": 4847 }, { "epoch": 0.24915201973481343, "grad_norm": 0.8448128700256348, "learning_rate": 8.79263621112935e-06, "loss": 0.6659, "step": 4848 }, { "epoch": 0.24920341247815808, "grad_norm": 0.9091752767562866, "learning_rate": 8.792093827445437e-06, "loss": 0.7079, "step": 4849 }, { "epoch": 0.24925480522150273, "grad_norm": 1.2166436910629272, "learning_rate": 8.791551338698252e-06, "loss": 0.8276, "step": 4850 }, { "epoch": 0.24930619796484738, "grad_norm": 1.0822728872299194, "learning_rate": 8.791008744902827e-06, "loss": 0.7539, "step": 4851 }, { "epoch": 0.249357590708192, "grad_norm": 1.1478793621063232, "learning_rate": 8.790466046074193e-06, "loss": 0.8044, "step": 4852 }, { "epoch": 0.24940898345153664, "grad_norm": 1.1383388042449951, "learning_rate": 8.789923242227389e-06, "loss": 0.7851, "step": 4853 }, { "epoch": 0.2494603761948813, "grad_norm": 1.0272635221481323, "learning_rate": 8.789380333377451e-06, "loss": 0.797, "step": 4854 }, { "epoch": 0.24951176893822594, "grad_norm": 1.1973938941955566, "learning_rate": 8.788837319539421e-06, "loss": 0.8078, "step": 4855 }, { "epoch": 0.24956316168157056, "grad_norm": 1.0882850885391235, "learning_rate": 8.788294200728345e-06, "loss": 0.768, "step": 4856 }, { "epoch": 0.2496145544249152, "grad_norm": 1.124497652053833, "learning_rate": 8.78775097695927e-06, "loss": 0.7577, "step": 4857 }, { "epoch": 0.24966594716825985, "grad_norm": 1.048090934753418, "learning_rate": 8.787207648247249e-06, "loss": 0.7636, "step": 4858 }, { "epoch": 0.24971733991160447, "grad_norm": 1.0921497344970703, "learning_rate": 8.78666421460733e-06, "loss": 0.85, "step": 4859 }, { "epoch": 0.24976873265494912, "grad_norm": 1.1429721117019653, "learning_rate": 8.786120676054573e-06, "loss": 0.7988, "step": 4860 }, { "epoch": 0.24982012539829376, "grad_norm": 1.210495114326477, "learning_rate": 8.785577032604036e-06, "loss": 0.7913, "step": 4861 }, { "epoch": 0.2498715181416384, "grad_norm": 1.0442157983779907, "learning_rate": 8.785033284270783e-06, "loss": 0.8144, "step": 4862 }, { "epoch": 0.24992291088498303, "grad_norm": 1.0624934434890747, "learning_rate": 8.784489431069878e-06, "loss": 0.8, "step": 4863 }, { "epoch": 0.24997430362832768, "grad_norm": 1.1329983472824097, "learning_rate": 8.783945473016387e-06, "loss": 0.7648, "step": 4864 }, { "epoch": 0.2500256963716723, "grad_norm": 1.2176034450531006, "learning_rate": 8.783401410125383e-06, "loss": 0.7952, "step": 4865 }, { "epoch": 0.25007708911501697, "grad_norm": 0.9558403491973877, "learning_rate": 8.782857242411937e-06, "loss": 0.7442, "step": 4866 }, { "epoch": 0.2501284818583616, "grad_norm": 0.9302815794944763, "learning_rate": 8.782312969891131e-06, "loss": 0.7091, "step": 4867 }, { "epoch": 0.25017987460170626, "grad_norm": 1.12680184841156, "learning_rate": 8.78176859257804e-06, "loss": 0.7044, "step": 4868 }, { "epoch": 0.25023126734505086, "grad_norm": 1.1850327253341675, "learning_rate": 8.781224110487747e-06, "loss": 0.7586, "step": 4869 }, { "epoch": 0.2502826600883955, "grad_norm": 1.1618678569793701, "learning_rate": 8.780679523635339e-06, "loss": 0.7799, "step": 4870 }, { "epoch": 0.25033405283174015, "grad_norm": 1.1340609788894653, "learning_rate": 8.780134832035904e-06, "loss": 0.7964, "step": 4871 }, { "epoch": 0.2503854455750848, "grad_norm": 1.1002004146575928, "learning_rate": 8.77959003570453e-06, "loss": 0.8099, "step": 4872 }, { "epoch": 0.25043683831842944, "grad_norm": 1.0841517448425293, "learning_rate": 8.779045134656317e-06, "loss": 0.7599, "step": 4873 }, { "epoch": 0.2504882310617741, "grad_norm": 1.073439598083496, "learning_rate": 8.778500128906355e-06, "loss": 0.7929, "step": 4874 }, { "epoch": 0.25053962380511874, "grad_norm": 1.0940415859222412, "learning_rate": 8.77795501846975e-06, "loss": 0.8059, "step": 4875 }, { "epoch": 0.25059101654846333, "grad_norm": 1.0908809900283813, "learning_rate": 8.7774098033616e-06, "loss": 0.7435, "step": 4876 }, { "epoch": 0.250642409291808, "grad_norm": 1.0483298301696777, "learning_rate": 8.776864483597014e-06, "loss": 0.7276, "step": 4877 }, { "epoch": 0.2506938020351526, "grad_norm": 1.0982178449630737, "learning_rate": 8.7763190591911e-06, "loss": 0.7697, "step": 4878 }, { "epoch": 0.25074519477849727, "grad_norm": 1.3911322355270386, "learning_rate": 8.775773530158968e-06, "loss": 0.6958, "step": 4879 }, { "epoch": 0.2507965875218419, "grad_norm": 0.7102989554405212, "learning_rate": 8.775227896515734e-06, "loss": 0.673, "step": 4880 }, { "epoch": 0.25084798026518657, "grad_norm": 1.1448079347610474, "learning_rate": 8.774682158276512e-06, "loss": 0.8116, "step": 4881 }, { "epoch": 0.2508993730085312, "grad_norm": 1.0189813375473022, "learning_rate": 8.774136315456428e-06, "loss": 0.7397, "step": 4882 }, { "epoch": 0.25095076575187586, "grad_norm": 1.1340067386627197, "learning_rate": 8.7735903680706e-06, "loss": 0.8496, "step": 4883 }, { "epoch": 0.25100215849522045, "grad_norm": 1.1618138551712036, "learning_rate": 8.773044316134156e-06, "loss": 0.751, "step": 4884 }, { "epoch": 0.2510535512385651, "grad_norm": 1.035233974456787, "learning_rate": 8.772498159662223e-06, "loss": 0.705, "step": 4885 }, { "epoch": 0.25110494398190975, "grad_norm": 1.0860227346420288, "learning_rate": 8.771951898669935e-06, "loss": 0.7664, "step": 4886 }, { "epoch": 0.2511563367252544, "grad_norm": 1.0964267253875732, "learning_rate": 8.771405533172426e-06, "loss": 0.7827, "step": 4887 }, { "epoch": 0.25120772946859904, "grad_norm": 1.1427735090255737, "learning_rate": 8.770859063184833e-06, "loss": 0.7852, "step": 4888 }, { "epoch": 0.2512591222119437, "grad_norm": 1.029692530632019, "learning_rate": 8.770312488722297e-06, "loss": 0.7579, "step": 4889 }, { "epoch": 0.25131051495528833, "grad_norm": 1.0979682207107544, "learning_rate": 8.769765809799962e-06, "loss": 0.7934, "step": 4890 }, { "epoch": 0.251361907698633, "grad_norm": 1.119998574256897, "learning_rate": 8.769219026432974e-06, "loss": 0.8121, "step": 4891 }, { "epoch": 0.25141330044197757, "grad_norm": 1.0282078981399536, "learning_rate": 8.768672138636477e-06, "loss": 0.7592, "step": 4892 }, { "epoch": 0.2514646931853222, "grad_norm": 1.1124991178512573, "learning_rate": 8.768125146425632e-06, "loss": 0.8187, "step": 4893 }, { "epoch": 0.25151608592866687, "grad_norm": 1.0637891292572021, "learning_rate": 8.767578049815589e-06, "loss": 0.7697, "step": 4894 }, { "epoch": 0.2515674786720115, "grad_norm": 1.0996404886245728, "learning_rate": 8.767030848821503e-06, "loss": 0.7549, "step": 4895 }, { "epoch": 0.25161887141535616, "grad_norm": 1.0600802898406982, "learning_rate": 8.76648354345854e-06, "loss": 0.7675, "step": 4896 }, { "epoch": 0.2516702641587008, "grad_norm": 1.13198983669281, "learning_rate": 8.765936133741861e-06, "loss": 0.805, "step": 4897 }, { "epoch": 0.25172165690204545, "grad_norm": 1.0183762311935425, "learning_rate": 8.765388619686632e-06, "loss": 0.7344, "step": 4898 }, { "epoch": 0.25177304964539005, "grad_norm": 1.1054775714874268, "learning_rate": 8.764841001308024e-06, "loss": 0.8151, "step": 4899 }, { "epoch": 0.2518244423887347, "grad_norm": 1.2429449558258057, "learning_rate": 8.76429327862121e-06, "loss": 0.7952, "step": 4900 }, { "epoch": 0.25187583513207934, "grad_norm": 1.1846113204956055, "learning_rate": 8.763745451641361e-06, "loss": 0.818, "step": 4901 }, { "epoch": 0.251927227875424, "grad_norm": 0.9064440727233887, "learning_rate": 8.763197520383659e-06, "loss": 0.7484, "step": 4902 }, { "epoch": 0.25197862061876863, "grad_norm": 0.8786363005638123, "learning_rate": 8.762649484863284e-06, "loss": 0.6768, "step": 4903 }, { "epoch": 0.2520300133621133, "grad_norm": 1.1978445053100586, "learning_rate": 8.762101345095417e-06, "loss": 0.7347, "step": 4904 }, { "epoch": 0.25208140610545793, "grad_norm": 1.0382858514785767, "learning_rate": 8.76155310109525e-06, "loss": 0.7723, "step": 4905 }, { "epoch": 0.2521327988488026, "grad_norm": 1.1391074657440186, "learning_rate": 8.761004752877967e-06, "loss": 0.8323, "step": 4906 }, { "epoch": 0.25218419159214717, "grad_norm": 1.0844844579696655, "learning_rate": 8.760456300458765e-06, "loss": 0.8273, "step": 4907 }, { "epoch": 0.2522355843354918, "grad_norm": 1.124345064163208, "learning_rate": 8.759907743852836e-06, "loss": 0.8255, "step": 4908 }, { "epoch": 0.25228697707883646, "grad_norm": 1.1418821811676025, "learning_rate": 8.759359083075381e-06, "loss": 0.7624, "step": 4909 }, { "epoch": 0.2523383698221811, "grad_norm": 1.0390063524246216, "learning_rate": 8.758810318141598e-06, "loss": 0.8392, "step": 4910 }, { "epoch": 0.25238976256552575, "grad_norm": 1.0535207986831665, "learning_rate": 8.758261449066694e-06, "loss": 0.785, "step": 4911 }, { "epoch": 0.2524411553088704, "grad_norm": 1.0583621263504028, "learning_rate": 8.757712475865873e-06, "loss": 0.7098, "step": 4912 }, { "epoch": 0.25249254805221505, "grad_norm": 1.0995343923568726, "learning_rate": 8.757163398554349e-06, "loss": 0.7531, "step": 4913 }, { "epoch": 0.25254394079555964, "grad_norm": 1.0213854312896729, "learning_rate": 8.75661421714733e-06, "loss": 0.7664, "step": 4914 }, { "epoch": 0.2525953335389043, "grad_norm": 1.1030197143554688, "learning_rate": 8.756064931660035e-06, "loss": 0.838, "step": 4915 }, { "epoch": 0.25264672628224893, "grad_norm": 1.116595983505249, "learning_rate": 8.755515542107682e-06, "loss": 0.7127, "step": 4916 }, { "epoch": 0.2526981190255936, "grad_norm": 1.089133858680725, "learning_rate": 8.75496604850549e-06, "loss": 0.769, "step": 4917 }, { "epoch": 0.25274951176893823, "grad_norm": 1.0371425151824951, "learning_rate": 8.754416450868683e-06, "loss": 0.7073, "step": 4918 }, { "epoch": 0.2528009045122829, "grad_norm": 1.168327808380127, "learning_rate": 8.753866749212491e-06, "loss": 0.8087, "step": 4919 }, { "epoch": 0.2528522972556275, "grad_norm": 1.1146843433380127, "learning_rate": 8.753316943552143e-06, "loss": 0.739, "step": 4920 }, { "epoch": 0.25290368999897217, "grad_norm": 0.7569361329078674, "learning_rate": 8.752767033902873e-06, "loss": 0.7263, "step": 4921 }, { "epoch": 0.25295508274231676, "grad_norm": 1.0661402940750122, "learning_rate": 8.752217020279914e-06, "loss": 0.7507, "step": 4922 }, { "epoch": 0.2530064754856614, "grad_norm": 1.0629280805587769, "learning_rate": 8.751666902698506e-06, "loss": 0.7684, "step": 4923 }, { "epoch": 0.25305786822900606, "grad_norm": 1.0731984376907349, "learning_rate": 8.75111668117389e-06, "loss": 0.7975, "step": 4924 }, { "epoch": 0.2531092609723507, "grad_norm": 1.0546234846115112, "learning_rate": 8.750566355721311e-06, "loss": 0.8127, "step": 4925 }, { "epoch": 0.25316065371569535, "grad_norm": 1.1837005615234375, "learning_rate": 8.750015926356014e-06, "loss": 0.8075, "step": 4926 }, { "epoch": 0.25321204645904, "grad_norm": 1.0947976112365723, "learning_rate": 8.749465393093253e-06, "loss": 0.8067, "step": 4927 }, { "epoch": 0.25326343920238464, "grad_norm": 1.197335958480835, "learning_rate": 8.74891475594828e-06, "loss": 0.8037, "step": 4928 }, { "epoch": 0.2533148319457293, "grad_norm": 1.0454286336898804, "learning_rate": 8.74836401493635e-06, "loss": 0.7186, "step": 4929 }, { "epoch": 0.2533662246890739, "grad_norm": 0.9589943885803223, "learning_rate": 8.74781317007272e-06, "loss": 0.7115, "step": 4930 }, { "epoch": 0.25341761743241853, "grad_norm": 1.107591986656189, "learning_rate": 8.747262221372653e-06, "loss": 0.7425, "step": 4931 }, { "epoch": 0.2534690101757632, "grad_norm": 1.0696320533752441, "learning_rate": 8.746711168851416e-06, "loss": 0.768, "step": 4932 }, { "epoch": 0.2535204029191078, "grad_norm": 1.2163894176483154, "learning_rate": 8.746160012524273e-06, "loss": 0.7873, "step": 4933 }, { "epoch": 0.25357179566245247, "grad_norm": 1.149474024772644, "learning_rate": 8.745608752406496e-06, "loss": 0.8166, "step": 4934 }, { "epoch": 0.2536231884057971, "grad_norm": 0.8342404365539551, "learning_rate": 8.745057388513357e-06, "loss": 0.658, "step": 4935 }, { "epoch": 0.25367458114914176, "grad_norm": 1.1002564430236816, "learning_rate": 8.744505920860133e-06, "loss": 0.7489, "step": 4936 }, { "epoch": 0.25372597389248636, "grad_norm": 0.7716323137283325, "learning_rate": 8.743954349462103e-06, "loss": 0.7094, "step": 4937 }, { "epoch": 0.253777366635831, "grad_norm": 1.1159147024154663, "learning_rate": 8.743402674334548e-06, "loss": 0.8203, "step": 4938 }, { "epoch": 0.25382875937917565, "grad_norm": 1.0739567279815674, "learning_rate": 8.742850895492753e-06, "loss": 0.7985, "step": 4939 }, { "epoch": 0.2538801521225203, "grad_norm": 0.8378993272781372, "learning_rate": 8.742299012952006e-06, "loss": 0.7256, "step": 4940 }, { "epoch": 0.25393154486586494, "grad_norm": 1.1590425968170166, "learning_rate": 8.741747026727596e-06, "loss": 0.8107, "step": 4941 }, { "epoch": 0.2539829376092096, "grad_norm": 1.1324280500411987, "learning_rate": 8.741194936834818e-06, "loss": 0.7977, "step": 4942 }, { "epoch": 0.25403433035255424, "grad_norm": 1.1204556226730347, "learning_rate": 8.740642743288966e-06, "loss": 0.78, "step": 4943 }, { "epoch": 0.2540857230958989, "grad_norm": 1.0953364372253418, "learning_rate": 8.740090446105342e-06, "loss": 0.7656, "step": 4944 }, { "epoch": 0.2541371158392435, "grad_norm": 1.0907549858093262, "learning_rate": 8.739538045299245e-06, "loss": 0.7689, "step": 4945 }, { "epoch": 0.2541885085825881, "grad_norm": 1.0318487882614136, "learning_rate": 8.738985540885981e-06, "loss": 0.7512, "step": 4946 }, { "epoch": 0.25423990132593277, "grad_norm": 1.1583892107009888, "learning_rate": 8.738432932880858e-06, "loss": 0.8172, "step": 4947 }, { "epoch": 0.2542912940692774, "grad_norm": 1.1423659324645996, "learning_rate": 8.737880221299187e-06, "loss": 0.7866, "step": 4948 }, { "epoch": 0.25434268681262207, "grad_norm": 1.1041771173477173, "learning_rate": 8.73732740615628e-06, "loss": 0.8195, "step": 4949 }, { "epoch": 0.2543940795559667, "grad_norm": 1.1157997846603394, "learning_rate": 8.736774487467452e-06, "loss": 0.8101, "step": 4950 }, { "epoch": 0.25444547229931136, "grad_norm": 1.0511146783828735, "learning_rate": 8.736221465248025e-06, "loss": 0.7203, "step": 4951 }, { "epoch": 0.25449686504265595, "grad_norm": 1.0837618112564087, "learning_rate": 8.735668339513319e-06, "loss": 0.8002, "step": 4952 }, { "epoch": 0.2545482577860006, "grad_norm": 1.0301893949508667, "learning_rate": 8.73511511027866e-06, "loss": 0.7427, "step": 4953 }, { "epoch": 0.25459965052934524, "grad_norm": 1.1394309997558594, "learning_rate": 8.734561777559375e-06, "loss": 0.76, "step": 4954 }, { "epoch": 0.2546510432726899, "grad_norm": 0.8104747533798218, "learning_rate": 8.734008341370795e-06, "loss": 0.6917, "step": 4955 }, { "epoch": 0.25470243601603454, "grad_norm": 1.1251262426376343, "learning_rate": 8.733454801728253e-06, "loss": 0.7651, "step": 4956 }, { "epoch": 0.2547538287593792, "grad_norm": 1.0606492757797241, "learning_rate": 8.732901158647084e-06, "loss": 0.8331, "step": 4957 }, { "epoch": 0.25480522150272383, "grad_norm": 1.0583362579345703, "learning_rate": 8.732347412142632e-06, "loss": 0.7876, "step": 4958 }, { "epoch": 0.2548566142460685, "grad_norm": 0.989799439907074, "learning_rate": 8.731793562230232e-06, "loss": 0.747, "step": 4959 }, { "epoch": 0.25490800698941307, "grad_norm": 0.7415286898612976, "learning_rate": 8.731239608925235e-06, "loss": 0.6395, "step": 4960 }, { "epoch": 0.2549593997327577, "grad_norm": 1.0102417469024658, "learning_rate": 8.730685552242986e-06, "loss": 0.8068, "step": 4961 }, { "epoch": 0.25501079247610237, "grad_norm": 1.1203761100769043, "learning_rate": 8.730131392198836e-06, "loss": 0.8247, "step": 4962 }, { "epoch": 0.255062185219447, "grad_norm": 1.1466277837753296, "learning_rate": 8.729577128808138e-06, "loss": 0.7664, "step": 4963 }, { "epoch": 0.25511357796279166, "grad_norm": 0.718056321144104, "learning_rate": 8.72902276208625e-06, "loss": 0.7195, "step": 4964 }, { "epoch": 0.2551649707061363, "grad_norm": 0.7646077275276184, "learning_rate": 8.72846829204853e-06, "loss": 0.7061, "step": 4965 }, { "epoch": 0.25521636344948095, "grad_norm": 1.080056071281433, "learning_rate": 8.727913718710338e-06, "loss": 0.7759, "step": 4966 }, { "epoch": 0.25526775619282555, "grad_norm": 1.0603599548339844, "learning_rate": 8.727359042087043e-06, "loss": 0.752, "step": 4967 }, { "epoch": 0.2553191489361702, "grad_norm": 1.0147136449813843, "learning_rate": 8.726804262194009e-06, "loss": 0.7794, "step": 4968 }, { "epoch": 0.25537054167951484, "grad_norm": 1.2840293645858765, "learning_rate": 8.726249379046609e-06, "loss": 0.7425, "step": 4969 }, { "epoch": 0.2554219344228595, "grad_norm": 1.090493083000183, "learning_rate": 8.725694392660217e-06, "loss": 0.7547, "step": 4970 }, { "epoch": 0.25547332716620413, "grad_norm": 1.0992645025253296, "learning_rate": 8.72513930305021e-06, "loss": 0.8339, "step": 4971 }, { "epoch": 0.2555247199095488, "grad_norm": 1.0836302042007446, "learning_rate": 8.724584110231962e-06, "loss": 0.7487, "step": 4972 }, { "epoch": 0.25557611265289343, "grad_norm": 0.8223841786384583, "learning_rate": 8.724028814220863e-06, "loss": 0.7373, "step": 4973 }, { "epoch": 0.2556275053962381, "grad_norm": 1.6133450269699097, "learning_rate": 8.723473415032288e-06, "loss": 0.7086, "step": 4974 }, { "epoch": 0.25567889813958267, "grad_norm": 1.1400054693222046, "learning_rate": 8.722917912681635e-06, "loss": 0.7844, "step": 4975 }, { "epoch": 0.2557302908829273, "grad_norm": 1.12342369556427, "learning_rate": 8.722362307184288e-06, "loss": 0.7324, "step": 4976 }, { "epoch": 0.25578168362627196, "grad_norm": 1.0867410898208618, "learning_rate": 8.721806598555644e-06, "loss": 0.7731, "step": 4977 }, { "epoch": 0.2558330763696166, "grad_norm": 0.8389273881912231, "learning_rate": 8.721250786811099e-06, "loss": 0.6945, "step": 4978 }, { "epoch": 0.25588446911296125, "grad_norm": 1.1234270334243774, "learning_rate": 8.72069487196605e-06, "loss": 0.7571, "step": 4979 }, { "epoch": 0.2559358618563059, "grad_norm": 1.1643598079681396, "learning_rate": 8.7201388540359e-06, "loss": 0.7864, "step": 4980 }, { "epoch": 0.25598725459965055, "grad_norm": 0.7769237756729126, "learning_rate": 8.719582733036056e-06, "loss": 0.6761, "step": 4981 }, { "epoch": 0.2560386473429952, "grad_norm": 1.110906720161438, "learning_rate": 8.719026508981922e-06, "loss": 0.7742, "step": 4982 }, { "epoch": 0.2560900400863398, "grad_norm": 1.195372462272644, "learning_rate": 8.718470181888913e-06, "loss": 0.855, "step": 4983 }, { "epoch": 0.25614143282968443, "grad_norm": 1.1559065580368042, "learning_rate": 8.717913751772441e-06, "loss": 0.8368, "step": 4984 }, { "epoch": 0.2561928255730291, "grad_norm": 1.0620718002319336, "learning_rate": 8.717357218647921e-06, "loss": 0.7777, "step": 4985 }, { "epoch": 0.25624421831637373, "grad_norm": 1.0265811681747437, "learning_rate": 8.716800582530773e-06, "loss": 0.779, "step": 4986 }, { "epoch": 0.2562956110597184, "grad_norm": 1.0878773927688599, "learning_rate": 8.716243843436419e-06, "loss": 0.786, "step": 4987 }, { "epoch": 0.256347003803063, "grad_norm": 1.1171302795410156, "learning_rate": 8.715687001380284e-06, "loss": 0.7597, "step": 4988 }, { "epoch": 0.25639839654640767, "grad_norm": 1.1406511068344116, "learning_rate": 8.715130056377796e-06, "loss": 0.738, "step": 4989 }, { "epoch": 0.25644978928975226, "grad_norm": 1.052254557609558, "learning_rate": 8.714573008444384e-06, "loss": 0.8281, "step": 4990 }, { "epoch": 0.2565011820330969, "grad_norm": 1.089924931526184, "learning_rate": 8.714015857595486e-06, "loss": 0.7863, "step": 4991 }, { "epoch": 0.25655257477644156, "grad_norm": 1.038926124572754, "learning_rate": 8.713458603846534e-06, "loss": 0.7337, "step": 4992 }, { "epoch": 0.2566039675197862, "grad_norm": 1.222162127494812, "learning_rate": 8.71290124721297e-06, "loss": 0.7727, "step": 4993 }, { "epoch": 0.25665536026313085, "grad_norm": 0.7234727144241333, "learning_rate": 8.712343787710233e-06, "loss": 0.7213, "step": 4994 }, { "epoch": 0.2567067530064755, "grad_norm": 1.1149122714996338, "learning_rate": 8.71178622535377e-06, "loss": 0.7795, "step": 4995 }, { "epoch": 0.25675814574982014, "grad_norm": 0.762754499912262, "learning_rate": 8.711228560159028e-06, "loss": 0.7041, "step": 4996 }, { "epoch": 0.2568095384931648, "grad_norm": 1.1941490173339844, "learning_rate": 8.710670792141457e-06, "loss": 0.781, "step": 4997 }, { "epoch": 0.2568609312365094, "grad_norm": 1.1050397157669067, "learning_rate": 8.710112921316513e-06, "loss": 0.7821, "step": 4998 }, { "epoch": 0.25691232397985403, "grad_norm": 1.0956331491470337, "learning_rate": 8.709554947699651e-06, "loss": 0.7783, "step": 4999 }, { "epoch": 0.2569637167231987, "grad_norm": 1.0385510921478271, "learning_rate": 8.70899687130633e-06, "loss": 0.7627, "step": 5000 }, { "epoch": 0.2570151094665433, "grad_norm": 1.1730506420135498, "learning_rate": 8.70843869215201e-06, "loss": 0.7567, "step": 5001 }, { "epoch": 0.25706650220988797, "grad_norm": 0.813791036605835, "learning_rate": 8.70788041025216e-06, "loss": 0.6647, "step": 5002 }, { "epoch": 0.2571178949532326, "grad_norm": 1.2626097202301025, "learning_rate": 8.707322025622244e-06, "loss": 0.6425, "step": 5003 }, { "epoch": 0.25716928769657726, "grad_norm": 1.4615684747695923, "learning_rate": 8.706763538277734e-06, "loss": 0.8106, "step": 5004 }, { "epoch": 0.25722068043992186, "grad_norm": 1.0810116529464722, "learning_rate": 8.706204948234103e-06, "loss": 0.8044, "step": 5005 }, { "epoch": 0.2572720731832665, "grad_norm": 1.1453149318695068, "learning_rate": 8.705646255506827e-06, "loss": 0.8038, "step": 5006 }, { "epoch": 0.25732346592661115, "grad_norm": 1.0877200365066528, "learning_rate": 8.705087460111387e-06, "loss": 0.7832, "step": 5007 }, { "epoch": 0.2573748586699558, "grad_norm": 1.072830319404602, "learning_rate": 8.704528562063264e-06, "loss": 0.7436, "step": 5008 }, { "epoch": 0.25742625141330044, "grad_norm": 1.3051509857177734, "learning_rate": 8.70396956137794e-06, "loss": 0.7607, "step": 5009 }, { "epoch": 0.2574776441566451, "grad_norm": 1.1476794481277466, "learning_rate": 8.703410458070906e-06, "loss": 0.777, "step": 5010 }, { "epoch": 0.25752903689998974, "grad_norm": 0.7591148018836975, "learning_rate": 8.702851252157651e-06, "loss": 0.6938, "step": 5011 }, { "epoch": 0.2575804296433344, "grad_norm": 1.0850828886032104, "learning_rate": 8.702291943653669e-06, "loss": 0.7332, "step": 5012 }, { "epoch": 0.257631822386679, "grad_norm": 1.0773557424545288, "learning_rate": 8.701732532574453e-06, "loss": 0.8087, "step": 5013 }, { "epoch": 0.2576832151300236, "grad_norm": 1.158715844154358, "learning_rate": 8.70117301893551e-06, "loss": 0.8394, "step": 5014 }, { "epoch": 0.25773460787336827, "grad_norm": 1.02519953250885, "learning_rate": 8.700613402752332e-06, "loss": 0.8025, "step": 5015 }, { "epoch": 0.2577860006167129, "grad_norm": 1.1967909336090088, "learning_rate": 8.70005368404043e-06, "loss": 0.7651, "step": 5016 }, { "epoch": 0.25783739336005757, "grad_norm": 1.0611220598220825, "learning_rate": 8.699493862815308e-06, "loss": 0.801, "step": 5017 }, { "epoch": 0.2578887861034022, "grad_norm": 1.1413936614990234, "learning_rate": 8.69893393909248e-06, "loss": 0.8026, "step": 5018 }, { "epoch": 0.25794017884674686, "grad_norm": 1.3112047910690308, "learning_rate": 8.698373912887457e-06, "loss": 0.7915, "step": 5019 }, { "epoch": 0.2579915715900915, "grad_norm": 1.1287792921066284, "learning_rate": 8.697813784215755e-06, "loss": 0.7579, "step": 5020 }, { "epoch": 0.2580429643334361, "grad_norm": 1.0979455709457397, "learning_rate": 8.697253553092893e-06, "loss": 0.7549, "step": 5021 }, { "epoch": 0.25809435707678074, "grad_norm": 1.2231143712997437, "learning_rate": 8.69669321953439e-06, "loss": 0.8331, "step": 5022 }, { "epoch": 0.2581457498201254, "grad_norm": 1.136443018913269, "learning_rate": 8.696132783555777e-06, "loss": 0.7266, "step": 5023 }, { "epoch": 0.25819714256347004, "grad_norm": 1.1581809520721436, "learning_rate": 8.695572245172577e-06, "loss": 0.8048, "step": 5024 }, { "epoch": 0.2582485353068147, "grad_norm": 1.0799040794372559, "learning_rate": 8.695011604400318e-06, "loss": 0.6908, "step": 5025 }, { "epoch": 0.25829992805015933, "grad_norm": 1.0844402313232422, "learning_rate": 8.69445086125454e-06, "loss": 0.813, "step": 5026 }, { "epoch": 0.258351320793504, "grad_norm": 1.1042795181274414, "learning_rate": 8.693890015750772e-06, "loss": 0.827, "step": 5027 }, { "epoch": 0.25840271353684857, "grad_norm": 1.0872057676315308, "learning_rate": 8.693329067904556e-06, "loss": 0.7743, "step": 5028 }, { "epoch": 0.2584541062801932, "grad_norm": 0.788754940032959, "learning_rate": 8.692768017731432e-06, "loss": 0.7389, "step": 5029 }, { "epoch": 0.25850549902353787, "grad_norm": 1.173190951347351, "learning_rate": 8.692206865246947e-06, "loss": 0.8211, "step": 5030 }, { "epoch": 0.2585568917668825, "grad_norm": 1.1367353200912476, "learning_rate": 8.691645610466647e-06, "loss": 0.7815, "step": 5031 }, { "epoch": 0.25860828451022716, "grad_norm": 1.2713085412979126, "learning_rate": 8.69108425340608e-06, "loss": 0.7401, "step": 5032 }, { "epoch": 0.2586596772535718, "grad_norm": 1.0603816509246826, "learning_rate": 8.6905227940808e-06, "loss": 0.7759, "step": 5033 }, { "epoch": 0.25871106999691645, "grad_norm": 1.0356656312942505, "learning_rate": 8.689961232506365e-06, "loss": 0.6597, "step": 5034 }, { "epoch": 0.2587624627402611, "grad_norm": 0.810200572013855, "learning_rate": 8.689399568698332e-06, "loss": 0.6643, "step": 5035 }, { "epoch": 0.2588138554836057, "grad_norm": 1.0437504053115845, "learning_rate": 8.688837802672262e-06, "loss": 0.7397, "step": 5036 }, { "epoch": 0.25886524822695034, "grad_norm": 1.1028997898101807, "learning_rate": 8.688275934443718e-06, "loss": 0.8189, "step": 5037 }, { "epoch": 0.258916640970295, "grad_norm": 1.0486176013946533, "learning_rate": 8.687713964028269e-06, "loss": 0.7322, "step": 5038 }, { "epoch": 0.25896803371363963, "grad_norm": 0.8827479481697083, "learning_rate": 8.687151891441485e-06, "loss": 0.732, "step": 5039 }, { "epoch": 0.2590194264569843, "grad_norm": 1.0800729990005493, "learning_rate": 8.686589716698938e-06, "loss": 0.8007, "step": 5040 }, { "epoch": 0.25907081920032893, "grad_norm": 1.0989967584609985, "learning_rate": 8.686027439816203e-06, "loss": 0.7402, "step": 5041 }, { "epoch": 0.2591222119436736, "grad_norm": 1.1183170080184937, "learning_rate": 8.68546506080886e-06, "loss": 0.8842, "step": 5042 }, { "epoch": 0.25917360468701817, "grad_norm": 0.7797960638999939, "learning_rate": 8.684902579692487e-06, "loss": 0.727, "step": 5043 }, { "epoch": 0.2592249974303628, "grad_norm": 1.0695255994796753, "learning_rate": 8.684339996482672e-06, "loss": 0.7888, "step": 5044 }, { "epoch": 0.25927639017370746, "grad_norm": 1.0753449201583862, "learning_rate": 8.683777311195e-06, "loss": 0.8243, "step": 5045 }, { "epoch": 0.2593277829170521, "grad_norm": 1.0878175497055054, "learning_rate": 8.68321452384506e-06, "loss": 0.7643, "step": 5046 }, { "epoch": 0.25937917566039675, "grad_norm": 1.0748659372329712, "learning_rate": 8.682651634448447e-06, "loss": 0.7614, "step": 5047 }, { "epoch": 0.2594305684037414, "grad_norm": 1.0884603261947632, "learning_rate": 8.682088643020754e-06, "loss": 0.7311, "step": 5048 }, { "epoch": 0.25948196114708605, "grad_norm": 0.8031647205352783, "learning_rate": 8.681525549577578e-06, "loss": 0.6888, "step": 5049 }, { "epoch": 0.2595333538904307, "grad_norm": 1.0524901151657104, "learning_rate": 8.680962354134524e-06, "loss": 0.7752, "step": 5050 }, { "epoch": 0.2595847466337753, "grad_norm": 0.6999702453613281, "learning_rate": 8.680399056707192e-06, "loss": 0.6595, "step": 5051 }, { "epoch": 0.25963613937711993, "grad_norm": 1.1733062267303467, "learning_rate": 8.679835657311191e-06, "loss": 0.7802, "step": 5052 }, { "epoch": 0.2596875321204646, "grad_norm": 0.9867770671844482, "learning_rate": 8.67927215596213e-06, "loss": 0.7245, "step": 5053 }, { "epoch": 0.25973892486380923, "grad_norm": 1.10702383518219, "learning_rate": 8.67870855267562e-06, "loss": 0.7867, "step": 5054 }, { "epoch": 0.2597903176071539, "grad_norm": 0.7846687436103821, "learning_rate": 8.678144847467278e-06, "loss": 0.7008, "step": 5055 }, { "epoch": 0.2598417103504985, "grad_norm": 1.0470688343048096, "learning_rate": 8.67758104035272e-06, "loss": 0.7566, "step": 5056 }, { "epoch": 0.25989310309384317, "grad_norm": 1.096498966217041, "learning_rate": 8.677017131347571e-06, "loss": 0.7461, "step": 5057 }, { "epoch": 0.2599444958371878, "grad_norm": 1.0553582906723022, "learning_rate": 8.676453120467449e-06, "loss": 0.7382, "step": 5058 }, { "epoch": 0.2599958885805324, "grad_norm": 1.1455203294754028, "learning_rate": 8.675889007727982e-06, "loss": 0.8325, "step": 5059 }, { "epoch": 0.26004728132387706, "grad_norm": 1.11444091796875, "learning_rate": 8.675324793144801e-06, "loss": 0.7885, "step": 5060 }, { "epoch": 0.2600986740672217, "grad_norm": 0.8875847458839417, "learning_rate": 8.674760476733537e-06, "loss": 0.699, "step": 5061 }, { "epoch": 0.26015006681056635, "grad_norm": 0.7678377628326416, "learning_rate": 8.674196058509826e-06, "loss": 0.7154, "step": 5062 }, { "epoch": 0.260201459553911, "grad_norm": 0.9734421968460083, "learning_rate": 8.673631538489305e-06, "loss": 0.7798, "step": 5063 }, { "epoch": 0.26025285229725564, "grad_norm": 1.1335232257843018, "learning_rate": 8.673066916687613e-06, "loss": 0.7203, "step": 5064 }, { "epoch": 0.2603042450406003, "grad_norm": 1.07967209815979, "learning_rate": 8.672502193120394e-06, "loss": 0.8198, "step": 5065 }, { "epoch": 0.2603556377839449, "grad_norm": 1.126824975013733, "learning_rate": 8.671937367803295e-06, "loss": 0.8024, "step": 5066 }, { "epoch": 0.26040703052728953, "grad_norm": 0.7854825258255005, "learning_rate": 8.671372440751966e-06, "loss": 0.6872, "step": 5067 }, { "epoch": 0.2604584232706342, "grad_norm": 0.7319926023483276, "learning_rate": 8.670807411982056e-06, "loss": 0.6964, "step": 5068 }, { "epoch": 0.2605098160139788, "grad_norm": 1.024675965309143, "learning_rate": 8.670242281509222e-06, "loss": 0.8014, "step": 5069 }, { "epoch": 0.26056120875732347, "grad_norm": 0.9250428080558777, "learning_rate": 8.669677049349122e-06, "loss": 0.6704, "step": 5070 }, { "epoch": 0.2606126015006681, "grad_norm": 1.0948251485824585, "learning_rate": 8.669111715517413e-06, "loss": 0.8217, "step": 5071 }, { "epoch": 0.26066399424401276, "grad_norm": 0.777854323387146, "learning_rate": 8.668546280029759e-06, "loss": 0.6905, "step": 5072 }, { "epoch": 0.2607153869873574, "grad_norm": 1.0761494636535645, "learning_rate": 8.66798074290183e-06, "loss": 0.8133, "step": 5073 }, { "epoch": 0.260766779730702, "grad_norm": 1.0734977722167969, "learning_rate": 8.667415104149288e-06, "loss": 0.755, "step": 5074 }, { "epoch": 0.26081817247404665, "grad_norm": 1.1395795345306396, "learning_rate": 8.66684936378781e-06, "loss": 0.84, "step": 5075 }, { "epoch": 0.2608695652173913, "grad_norm": 0.794562816619873, "learning_rate": 8.666283521833067e-06, "loss": 0.6791, "step": 5076 }, { "epoch": 0.26092095796073594, "grad_norm": 1.1529595851898193, "learning_rate": 8.665717578300739e-06, "loss": 0.7896, "step": 5077 }, { "epoch": 0.2609723507040806, "grad_norm": 0.7578186392784119, "learning_rate": 8.665151533206504e-06, "loss": 0.7024, "step": 5078 }, { "epoch": 0.26102374344742524, "grad_norm": 1.1673104763031006, "learning_rate": 8.664585386566046e-06, "loss": 0.8382, "step": 5079 }, { "epoch": 0.2610751361907699, "grad_norm": 1.203487515449524, "learning_rate": 8.664019138395047e-06, "loss": 0.8381, "step": 5080 }, { "epoch": 0.2611265289341145, "grad_norm": 1.1061145067214966, "learning_rate": 8.6634527887092e-06, "loss": 0.7673, "step": 5081 }, { "epoch": 0.2611779216774591, "grad_norm": 0.8297333717346191, "learning_rate": 8.662886337524196e-06, "loss": 0.6849, "step": 5082 }, { "epoch": 0.26122931442080377, "grad_norm": 1.1618329286575317, "learning_rate": 8.662319784855727e-06, "loss": 0.8178, "step": 5083 }, { "epoch": 0.2612807071641484, "grad_norm": 0.8649482131004333, "learning_rate": 8.661753130719488e-06, "loss": 0.6944, "step": 5084 }, { "epoch": 0.26133209990749307, "grad_norm": 1.0497634410858154, "learning_rate": 8.661186375131181e-06, "loss": 0.7729, "step": 5085 }, { "epoch": 0.2613834926508377, "grad_norm": 0.761749804019928, "learning_rate": 8.66061951810651e-06, "loss": 0.7245, "step": 5086 }, { "epoch": 0.26143488539418236, "grad_norm": 1.1066533327102661, "learning_rate": 8.660052559661178e-06, "loss": 0.798, "step": 5087 }, { "epoch": 0.261486278137527, "grad_norm": 1.1950860023498535, "learning_rate": 8.659485499810894e-06, "loss": 0.7683, "step": 5088 }, { "epoch": 0.2615376708808716, "grad_norm": 1.0164082050323486, "learning_rate": 8.658918338571367e-06, "loss": 0.7085, "step": 5089 }, { "epoch": 0.26158906362421624, "grad_norm": 1.0267949104309082, "learning_rate": 8.658351075958314e-06, "loss": 0.7453, "step": 5090 }, { "epoch": 0.2616404563675609, "grad_norm": 1.1727787256240845, "learning_rate": 8.65778371198745e-06, "loss": 0.7814, "step": 5091 }, { "epoch": 0.26169184911090554, "grad_norm": 1.1793876886367798, "learning_rate": 8.657216246674493e-06, "loss": 0.8194, "step": 5092 }, { "epoch": 0.2617432418542502, "grad_norm": 1.0998691320419312, "learning_rate": 8.656648680035166e-06, "loss": 0.7992, "step": 5093 }, { "epoch": 0.26179463459759483, "grad_norm": 0.8477131724357605, "learning_rate": 8.656081012085196e-06, "loss": 0.751, "step": 5094 }, { "epoch": 0.2618460273409395, "grad_norm": 1.113799810409546, "learning_rate": 8.655513242840308e-06, "loss": 0.7602, "step": 5095 }, { "epoch": 0.26189742008428407, "grad_norm": 1.1180912256240845, "learning_rate": 8.654945372316233e-06, "loss": 0.7779, "step": 5096 }, { "epoch": 0.2619488128276287, "grad_norm": 1.2480909824371338, "learning_rate": 8.654377400528706e-06, "loss": 0.8325, "step": 5097 }, { "epoch": 0.26200020557097337, "grad_norm": 1.0494873523712158, "learning_rate": 8.65380932749346e-06, "loss": 0.7941, "step": 5098 }, { "epoch": 0.262051598314318, "grad_norm": 0.7988775372505188, "learning_rate": 8.653241153226237e-06, "loss": 0.7066, "step": 5099 }, { "epoch": 0.26210299105766266, "grad_norm": 0.7774263024330139, "learning_rate": 8.652672877742778e-06, "loss": 0.6591, "step": 5100 }, { "epoch": 0.2621543838010073, "grad_norm": 0.8363752961158752, "learning_rate": 8.652104501058827e-06, "loss": 0.7104, "step": 5101 }, { "epoch": 0.26220577654435195, "grad_norm": 1.057090163230896, "learning_rate": 8.651536023190131e-06, "loss": 0.8097, "step": 5102 }, { "epoch": 0.2622571692876966, "grad_norm": 1.4543486833572388, "learning_rate": 8.650967444152441e-06, "loss": 0.7698, "step": 5103 }, { "epoch": 0.2623085620310412, "grad_norm": 1.0873017311096191, "learning_rate": 8.650398763961511e-06, "loss": 0.7985, "step": 5104 }, { "epoch": 0.26235995477438584, "grad_norm": 1.0244823694229126, "learning_rate": 8.649829982633094e-06, "loss": 0.737, "step": 5105 }, { "epoch": 0.2624113475177305, "grad_norm": 1.105635643005371, "learning_rate": 8.649261100182951e-06, "loss": 0.7347, "step": 5106 }, { "epoch": 0.26246274026107513, "grad_norm": 1.021653652191162, "learning_rate": 8.648692116626842e-06, "loss": 0.7414, "step": 5107 }, { "epoch": 0.2625141330044198, "grad_norm": 1.1400269269943237, "learning_rate": 8.648123031980533e-06, "loss": 0.7245, "step": 5108 }, { "epoch": 0.26256552574776443, "grad_norm": 1.0821542739868164, "learning_rate": 8.647553846259789e-06, "loss": 0.7688, "step": 5109 }, { "epoch": 0.2626169184911091, "grad_norm": 0.8694726824760437, "learning_rate": 8.64698455948038e-06, "loss": 0.7083, "step": 5110 }, { "epoch": 0.2626683112344537, "grad_norm": 1.0345886945724487, "learning_rate": 8.646415171658078e-06, "loss": 0.7441, "step": 5111 }, { "epoch": 0.2627197039777983, "grad_norm": 1.1652065515518188, "learning_rate": 8.64584568280866e-06, "loss": 0.7594, "step": 5112 }, { "epoch": 0.26277109672114296, "grad_norm": 1.1456356048583984, "learning_rate": 8.645276092947906e-06, "loss": 0.7828, "step": 5113 }, { "epoch": 0.2628224894644876, "grad_norm": 1.012272596359253, "learning_rate": 8.644706402091594e-06, "loss": 0.7396, "step": 5114 }, { "epoch": 0.26287388220783225, "grad_norm": 1.0662199258804321, "learning_rate": 8.644136610255506e-06, "loss": 0.7493, "step": 5115 }, { "epoch": 0.2629252749511769, "grad_norm": 1.0539804697036743, "learning_rate": 8.643566717455433e-06, "loss": 0.7198, "step": 5116 }, { "epoch": 0.26297666769452155, "grad_norm": 1.1356357336044312, "learning_rate": 8.642996723707162e-06, "loss": 0.8406, "step": 5117 }, { "epoch": 0.2630280604378662, "grad_norm": 1.1133993864059448, "learning_rate": 8.642426629026485e-06, "loss": 0.7375, "step": 5118 }, { "epoch": 0.2630794531812108, "grad_norm": 1.1521869897842407, "learning_rate": 8.6418564334292e-06, "loss": 0.8206, "step": 5119 }, { "epoch": 0.26313084592455543, "grad_norm": 1.0873502492904663, "learning_rate": 8.641286136931097e-06, "loss": 0.8057, "step": 5120 }, { "epoch": 0.2631822386679001, "grad_norm": 1.2929216623306274, "learning_rate": 8.640715739547986e-06, "loss": 0.8423, "step": 5121 }, { "epoch": 0.26323363141124473, "grad_norm": 0.7494547367095947, "learning_rate": 8.640145241295665e-06, "loss": 0.6868, "step": 5122 }, { "epoch": 0.2632850241545894, "grad_norm": 1.157834768295288, "learning_rate": 8.639574642189942e-06, "loss": 0.7931, "step": 5123 }, { "epoch": 0.263336416897934, "grad_norm": 1.177663803100586, "learning_rate": 8.639003942246625e-06, "loss": 0.8395, "step": 5124 }, { "epoch": 0.26338780964127867, "grad_norm": 1.0290026664733887, "learning_rate": 8.638433141481526e-06, "loss": 0.6914, "step": 5125 }, { "epoch": 0.2634392023846233, "grad_norm": 1.0879486799240112, "learning_rate": 8.63786223991046e-06, "loss": 0.7715, "step": 5126 }, { "epoch": 0.2634905951279679, "grad_norm": 1.1543580293655396, "learning_rate": 8.637291237549243e-06, "loss": 0.7774, "step": 5127 }, { "epoch": 0.26354198787131256, "grad_norm": 1.1650959253311157, "learning_rate": 8.636720134413696e-06, "loss": 0.7929, "step": 5128 }, { "epoch": 0.2635933806146572, "grad_norm": 1.182424545288086, "learning_rate": 8.636148930519642e-06, "loss": 0.7495, "step": 5129 }, { "epoch": 0.26364477335800185, "grad_norm": 0.9837983250617981, "learning_rate": 8.635577625882907e-06, "loss": 0.7398, "step": 5130 }, { "epoch": 0.2636961661013465, "grad_norm": 1.0453872680664062, "learning_rate": 8.63500622051932e-06, "loss": 0.755, "step": 5131 }, { "epoch": 0.26374755884469114, "grad_norm": 1.0032366514205933, "learning_rate": 8.634434714444711e-06, "loss": 0.7382, "step": 5132 }, { "epoch": 0.2637989515880358, "grad_norm": 1.1037267446517944, "learning_rate": 8.633863107674914e-06, "loss": 0.7449, "step": 5133 }, { "epoch": 0.2638503443313804, "grad_norm": 1.072238564491272, "learning_rate": 8.633291400225766e-06, "loss": 0.7802, "step": 5134 }, { "epoch": 0.26390173707472503, "grad_norm": 1.1269937753677368, "learning_rate": 8.632719592113106e-06, "loss": 0.7634, "step": 5135 }, { "epoch": 0.2639531298180697, "grad_norm": 0.7631784677505493, "learning_rate": 8.63214768335278e-06, "loss": 0.7382, "step": 5136 }, { "epoch": 0.2640045225614143, "grad_norm": 1.1025582551956177, "learning_rate": 8.631575673960628e-06, "loss": 0.8277, "step": 5137 }, { "epoch": 0.26405591530475897, "grad_norm": 0.7933250069618225, "learning_rate": 8.631003563952504e-06, "loss": 0.6967, "step": 5138 }, { "epoch": 0.2641073080481036, "grad_norm": 1.1932233572006226, "learning_rate": 8.630431353344254e-06, "loss": 0.813, "step": 5139 }, { "epoch": 0.26415870079144826, "grad_norm": 1.1918531656265259, "learning_rate": 8.629859042151732e-06, "loss": 0.7922, "step": 5140 }, { "epoch": 0.2642100935347929, "grad_norm": 1.0909969806671143, "learning_rate": 8.629286630390797e-06, "loss": 0.7623, "step": 5141 }, { "epoch": 0.2642614862781375, "grad_norm": 1.0843340158462524, "learning_rate": 8.628714118077305e-06, "loss": 0.7488, "step": 5142 }, { "epoch": 0.26431287902148215, "grad_norm": 1.1961742639541626, "learning_rate": 8.628141505227121e-06, "loss": 0.7505, "step": 5143 }, { "epoch": 0.2643642717648268, "grad_norm": 1.1419634819030762, "learning_rate": 8.627568791856107e-06, "loss": 0.8349, "step": 5144 }, { "epoch": 0.26441566450817144, "grad_norm": 1.1175090074539185, "learning_rate": 8.626995977980133e-06, "loss": 0.7927, "step": 5145 }, { "epoch": 0.2644670572515161, "grad_norm": 1.0712730884552002, "learning_rate": 8.626423063615068e-06, "loss": 0.7784, "step": 5146 }, { "epoch": 0.26451844999486074, "grad_norm": 1.1190199851989746, "learning_rate": 8.625850048776785e-06, "loss": 0.8551, "step": 5147 }, { "epoch": 0.2645698427382054, "grad_norm": 1.1434930562973022, "learning_rate": 8.625276933481162e-06, "loss": 0.815, "step": 5148 }, { "epoch": 0.26462123548155003, "grad_norm": 1.1858470439910889, "learning_rate": 8.624703717744073e-06, "loss": 0.7797, "step": 5149 }, { "epoch": 0.2646726282248946, "grad_norm": 0.7807711362838745, "learning_rate": 8.624130401581403e-06, "loss": 0.6667, "step": 5150 }, { "epoch": 0.26472402096823927, "grad_norm": 1.0951123237609863, "learning_rate": 8.623556985009035e-06, "loss": 0.7942, "step": 5151 }, { "epoch": 0.2647754137115839, "grad_norm": 0.747201681137085, "learning_rate": 8.622983468042858e-06, "loss": 0.7058, "step": 5152 }, { "epoch": 0.26482680645492856, "grad_norm": 1.1744269132614136, "learning_rate": 8.62240985069876e-06, "loss": 0.7728, "step": 5153 }, { "epoch": 0.2648781991982732, "grad_norm": 1.1512211561203003, "learning_rate": 8.621836132992634e-06, "loss": 0.8197, "step": 5154 }, { "epoch": 0.26492959194161786, "grad_norm": 1.1541978120803833, "learning_rate": 8.621262314940375e-06, "loss": 0.7131, "step": 5155 }, { "epoch": 0.2649809846849625, "grad_norm": 0.7653104662895203, "learning_rate": 8.62068839655788e-06, "loss": 0.7284, "step": 5156 }, { "epoch": 0.2650323774283071, "grad_norm": 1.1041626930236816, "learning_rate": 8.620114377861053e-06, "loss": 0.8126, "step": 5157 }, { "epoch": 0.26508377017165174, "grad_norm": 1.0981786251068115, "learning_rate": 8.619540258865795e-06, "loss": 0.8056, "step": 5158 }, { "epoch": 0.2651351629149964, "grad_norm": 1.1062973737716675, "learning_rate": 8.618966039588015e-06, "loss": 0.8203, "step": 5159 }, { "epoch": 0.26518655565834104, "grad_norm": 1.0797981023788452, "learning_rate": 8.618391720043619e-06, "loss": 0.7644, "step": 5160 }, { "epoch": 0.2652379484016857, "grad_norm": 0.9855642914772034, "learning_rate": 8.61781730024852e-06, "loss": 0.7819, "step": 5161 }, { "epoch": 0.26528934114503033, "grad_norm": 1.036364197731018, "learning_rate": 8.617242780218634e-06, "loss": 0.7873, "step": 5162 }, { "epoch": 0.265340733888375, "grad_norm": 1.1648101806640625, "learning_rate": 8.61666815996988e-06, "loss": 0.8164, "step": 5163 }, { "epoch": 0.2653921266317196, "grad_norm": 0.7500884532928467, "learning_rate": 8.616093439518174e-06, "loss": 0.6845, "step": 5164 }, { "epoch": 0.2654435193750642, "grad_norm": 0.8555760383605957, "learning_rate": 8.61551861887944e-06, "loss": 0.6706, "step": 5165 }, { "epoch": 0.26549491211840887, "grad_norm": 1.122339129447937, "learning_rate": 8.61494369806961e-06, "loss": 0.7486, "step": 5166 }, { "epoch": 0.2655463048617535, "grad_norm": 1.2896727323532104, "learning_rate": 8.614368677104605e-06, "loss": 0.7495, "step": 5167 }, { "epoch": 0.26559769760509816, "grad_norm": 0.7060507535934448, "learning_rate": 8.613793556000361e-06, "loss": 0.7243, "step": 5168 }, { "epoch": 0.2656490903484428, "grad_norm": 1.0945807695388794, "learning_rate": 8.61321833477281e-06, "loss": 0.7809, "step": 5169 }, { "epoch": 0.26570048309178745, "grad_norm": 1.1773170232772827, "learning_rate": 8.61264301343789e-06, "loss": 0.7957, "step": 5170 }, { "epoch": 0.2657518758351321, "grad_norm": 1.1937668323516846, "learning_rate": 8.612067592011542e-06, "loss": 0.8343, "step": 5171 }, { "epoch": 0.2658032685784767, "grad_norm": 1.1287274360656738, "learning_rate": 8.611492070509704e-06, "loss": 0.8351, "step": 5172 }, { "epoch": 0.26585466132182134, "grad_norm": 1.0422691106796265, "learning_rate": 8.610916448948326e-06, "loss": 0.7549, "step": 5173 }, { "epoch": 0.265906054065166, "grad_norm": 1.053804636001587, "learning_rate": 8.610340727343355e-06, "loss": 0.7362, "step": 5174 }, { "epoch": 0.26595744680851063, "grad_norm": 1.0331788063049316, "learning_rate": 8.609764905710743e-06, "loss": 0.7673, "step": 5175 }, { "epoch": 0.2660088395518553, "grad_norm": 1.1716344356536865, "learning_rate": 8.609188984066438e-06, "loss": 0.7855, "step": 5176 }, { "epoch": 0.2660602322951999, "grad_norm": 1.093544363975525, "learning_rate": 8.608612962426404e-06, "loss": 0.7802, "step": 5177 }, { "epoch": 0.2661116250385446, "grad_norm": 1.080235242843628, "learning_rate": 8.608036840806596e-06, "loss": 0.7464, "step": 5178 }, { "epoch": 0.2661630177818892, "grad_norm": 1.0788439512252808, "learning_rate": 8.607460619222976e-06, "loss": 0.7617, "step": 5179 }, { "epoch": 0.2662144105252338, "grad_norm": 1.0717482566833496, "learning_rate": 8.606884297691508e-06, "loss": 0.811, "step": 5180 }, { "epoch": 0.26626580326857846, "grad_norm": 0.7974849939346313, "learning_rate": 8.606307876228164e-06, "loss": 0.7467, "step": 5181 }, { "epoch": 0.2663171960119231, "grad_norm": 1.110707402229309, "learning_rate": 8.60573135484891e-06, "loss": 0.7343, "step": 5182 }, { "epoch": 0.26636858875526775, "grad_norm": 1.0326586961746216, "learning_rate": 8.605154733569719e-06, "loss": 0.6968, "step": 5183 }, { "epoch": 0.2664199814986124, "grad_norm": 1.1575734615325928, "learning_rate": 8.604578012406568e-06, "loss": 0.8542, "step": 5184 }, { "epoch": 0.26647137424195705, "grad_norm": 0.8235086798667908, "learning_rate": 8.604001191375436e-06, "loss": 0.6815, "step": 5185 }, { "epoch": 0.2665227669853017, "grad_norm": 0.7503124475479126, "learning_rate": 8.603424270492305e-06, "loss": 0.701, "step": 5186 }, { "epoch": 0.26657415972864634, "grad_norm": 1.07439124584198, "learning_rate": 8.602847249773157e-06, "loss": 0.7795, "step": 5187 }, { "epoch": 0.26662555247199093, "grad_norm": 1.0742294788360596, "learning_rate": 8.602270129233979e-06, "loss": 0.7283, "step": 5188 }, { "epoch": 0.2666769452153356, "grad_norm": 1.0845779180526733, "learning_rate": 8.601692908890761e-06, "loss": 0.8082, "step": 5189 }, { "epoch": 0.26672833795868023, "grad_norm": 1.169121265411377, "learning_rate": 8.601115588759497e-06, "loss": 0.7409, "step": 5190 }, { "epoch": 0.2667797307020249, "grad_norm": 1.1016429662704468, "learning_rate": 8.600538168856181e-06, "loss": 0.6637, "step": 5191 }, { "epoch": 0.2668311234453695, "grad_norm": 1.0152069330215454, "learning_rate": 8.59996064919681e-06, "loss": 0.7655, "step": 5192 }, { "epoch": 0.26688251618871417, "grad_norm": 1.1455893516540527, "learning_rate": 8.599383029797385e-06, "loss": 0.7382, "step": 5193 }, { "epoch": 0.2669339089320588, "grad_norm": 1.1317106485366821, "learning_rate": 8.598805310673913e-06, "loss": 0.8099, "step": 5194 }, { "epoch": 0.2669853016754034, "grad_norm": 1.1316012144088745, "learning_rate": 8.598227491842395e-06, "loss": 0.7898, "step": 5195 }, { "epoch": 0.26703669441874806, "grad_norm": 1.0742870569229126, "learning_rate": 8.597649573318842e-06, "loss": 0.7666, "step": 5196 }, { "epoch": 0.2670880871620927, "grad_norm": 1.0907400846481323, "learning_rate": 8.597071555119268e-06, "loss": 0.8118, "step": 5197 }, { "epoch": 0.26713947990543735, "grad_norm": 1.0511053800582886, "learning_rate": 8.596493437259684e-06, "loss": 0.7121, "step": 5198 }, { "epoch": 0.267190872648782, "grad_norm": 1.049952507019043, "learning_rate": 8.595915219756108e-06, "loss": 0.7571, "step": 5199 }, { "epoch": 0.26724226539212664, "grad_norm": 1.2749274969100952, "learning_rate": 8.595336902624562e-06, "loss": 0.7701, "step": 5200 }, { "epoch": 0.2672936581354713, "grad_norm": 0.8104772567749023, "learning_rate": 8.594758485881066e-06, "loss": 0.7115, "step": 5201 }, { "epoch": 0.26734505087881594, "grad_norm": 1.1337356567382812, "learning_rate": 8.594179969541649e-06, "loss": 0.7462, "step": 5202 }, { "epoch": 0.26739644362216053, "grad_norm": 0.8976438641548157, "learning_rate": 8.593601353622337e-06, "loss": 0.7139, "step": 5203 }, { "epoch": 0.2674478363655052, "grad_norm": 1.150750756263733, "learning_rate": 8.59302263813916e-06, "loss": 0.8114, "step": 5204 }, { "epoch": 0.2674992291088498, "grad_norm": 1.183227777481079, "learning_rate": 8.592443823108155e-06, "loss": 0.8059, "step": 5205 }, { "epoch": 0.26755062185219447, "grad_norm": 0.6917653679847717, "learning_rate": 8.591864908545355e-06, "loss": 0.6633, "step": 5206 }, { "epoch": 0.2676020145955391, "grad_norm": 1.0715919733047485, "learning_rate": 8.591285894466802e-06, "loss": 0.7681, "step": 5207 }, { "epoch": 0.26765340733888376, "grad_norm": 1.0410206317901611, "learning_rate": 8.590706780888536e-06, "loss": 0.7709, "step": 5208 }, { "epoch": 0.2677048000822284, "grad_norm": 1.1177297830581665, "learning_rate": 8.590127567826605e-06, "loss": 0.7949, "step": 5209 }, { "epoch": 0.267756192825573, "grad_norm": 1.0460163354873657, "learning_rate": 8.589548255297053e-06, "loss": 0.7188, "step": 5210 }, { "epoch": 0.26780758556891765, "grad_norm": 1.1022695302963257, "learning_rate": 8.588968843315934e-06, "loss": 0.7839, "step": 5211 }, { "epoch": 0.2678589783122623, "grad_norm": 0.85365229845047, "learning_rate": 8.588389331899298e-06, "loss": 0.695, "step": 5212 }, { "epoch": 0.26791037105560694, "grad_norm": 0.7649239301681519, "learning_rate": 8.587809721063202e-06, "loss": 0.6645, "step": 5213 }, { "epoch": 0.2679617637989516, "grad_norm": 1.0407639741897583, "learning_rate": 8.587230010823704e-06, "loss": 0.7223, "step": 5214 }, { "epoch": 0.26801315654229624, "grad_norm": 1.111070990562439, "learning_rate": 8.586650201196866e-06, "loss": 0.8047, "step": 5215 }, { "epoch": 0.2680645492856409, "grad_norm": 1.070738434791565, "learning_rate": 8.586070292198754e-06, "loss": 0.7869, "step": 5216 }, { "epoch": 0.26811594202898553, "grad_norm": 1.0527263879776, "learning_rate": 8.585490283845432e-06, "loss": 0.7646, "step": 5217 }, { "epoch": 0.2681673347723301, "grad_norm": 1.1252719163894653, "learning_rate": 8.58491017615297e-06, "loss": 0.8296, "step": 5218 }, { "epoch": 0.26821872751567477, "grad_norm": 1.064440369606018, "learning_rate": 8.584329969137442e-06, "loss": 0.7831, "step": 5219 }, { "epoch": 0.2682701202590194, "grad_norm": 1.135534644126892, "learning_rate": 8.583749662814922e-06, "loss": 0.7966, "step": 5220 }, { "epoch": 0.26832151300236406, "grad_norm": 1.0979188680648804, "learning_rate": 8.58316925720149e-06, "loss": 0.7799, "step": 5221 }, { "epoch": 0.2683729057457087, "grad_norm": 0.8376423716545105, "learning_rate": 8.58258875231322e-06, "loss": 0.6975, "step": 5222 }, { "epoch": 0.26842429848905336, "grad_norm": 1.1467187404632568, "learning_rate": 8.582008148166204e-06, "loss": 0.7253, "step": 5223 }, { "epoch": 0.268475691232398, "grad_norm": 1.1565247774124146, "learning_rate": 8.581427444776524e-06, "loss": 0.7909, "step": 5224 }, { "epoch": 0.2685270839757426, "grad_norm": 1.0919477939605713, "learning_rate": 8.580846642160268e-06, "loss": 0.7911, "step": 5225 }, { "epoch": 0.26857847671908724, "grad_norm": 0.7637118101119995, "learning_rate": 8.58026574033353e-06, "loss": 0.6743, "step": 5226 }, { "epoch": 0.2686298694624319, "grad_norm": 0.6978450417518616, "learning_rate": 8.579684739312401e-06, "loss": 0.7074, "step": 5227 }, { "epoch": 0.26868126220577654, "grad_norm": 0.7102873921394348, "learning_rate": 8.579103639112983e-06, "loss": 0.6827, "step": 5228 }, { "epoch": 0.2687326549491212, "grad_norm": 1.1391888856887817, "learning_rate": 8.57852243975137e-06, "loss": 0.7582, "step": 5229 }, { "epoch": 0.26878404769246583, "grad_norm": 1.0433030128479004, "learning_rate": 8.577941141243672e-06, "loss": 0.7798, "step": 5230 }, { "epoch": 0.2688354404358105, "grad_norm": 1.1109044551849365, "learning_rate": 8.577359743605989e-06, "loss": 0.8187, "step": 5231 }, { "epoch": 0.2688868331791551, "grad_norm": 0.7943516969680786, "learning_rate": 8.576778246854429e-06, "loss": 0.6777, "step": 5232 }, { "epoch": 0.2689382259224997, "grad_norm": 1.088222861289978, "learning_rate": 8.576196651005105e-06, "loss": 0.7823, "step": 5233 }, { "epoch": 0.26898961866584437, "grad_norm": 1.1497323513031006, "learning_rate": 8.575614956074129e-06, "loss": 0.7857, "step": 5234 }, { "epoch": 0.269041011409189, "grad_norm": 1.1060123443603516, "learning_rate": 8.57503316207762e-06, "loss": 0.7232, "step": 5235 }, { "epoch": 0.26909240415253366, "grad_norm": 1.0698469877243042, "learning_rate": 8.574451269031694e-06, "loss": 0.7686, "step": 5236 }, { "epoch": 0.2691437968958783, "grad_norm": 1.1172964572906494, "learning_rate": 8.573869276952475e-06, "loss": 0.7963, "step": 5237 }, { "epoch": 0.26919518963922295, "grad_norm": 1.1459288597106934, "learning_rate": 8.573287185856085e-06, "loss": 0.7425, "step": 5238 }, { "epoch": 0.2692465823825676, "grad_norm": 1.1439753770828247, "learning_rate": 8.572704995758656e-06, "loss": 0.8356, "step": 5239 }, { "epoch": 0.26929797512591225, "grad_norm": 1.1226779222488403, "learning_rate": 8.572122706676314e-06, "loss": 0.859, "step": 5240 }, { "epoch": 0.26934936786925684, "grad_norm": 1.0330973863601685, "learning_rate": 8.571540318625192e-06, "loss": 0.7626, "step": 5241 }, { "epoch": 0.2694007606126015, "grad_norm": 1.102895736694336, "learning_rate": 8.570957831621429e-06, "loss": 0.8574, "step": 5242 }, { "epoch": 0.26945215335594613, "grad_norm": 1.0460255146026611, "learning_rate": 8.57037524568116e-06, "loss": 0.7875, "step": 5243 }, { "epoch": 0.2695035460992908, "grad_norm": 1.0452920198440552, "learning_rate": 8.569792560820525e-06, "loss": 0.7903, "step": 5244 }, { "epoch": 0.2695549388426354, "grad_norm": 1.0433146953582764, "learning_rate": 8.569209777055671e-06, "loss": 0.7232, "step": 5245 }, { "epoch": 0.2696063315859801, "grad_norm": 1.107867956161499, "learning_rate": 8.568626894402744e-06, "loss": 0.8027, "step": 5246 }, { "epoch": 0.2696577243293247, "grad_norm": 1.1847078800201416, "learning_rate": 8.568043912877893e-06, "loss": 0.8256, "step": 5247 }, { "epoch": 0.2697091170726693, "grad_norm": 0.8863545060157776, "learning_rate": 8.567460832497268e-06, "loss": 0.7405, "step": 5248 }, { "epoch": 0.26976050981601396, "grad_norm": 1.001041293144226, "learning_rate": 8.566877653277027e-06, "loss": 0.7763, "step": 5249 }, { "epoch": 0.2698119025593586, "grad_norm": 1.0426985025405884, "learning_rate": 8.566294375233325e-06, "loss": 0.8406, "step": 5250 }, { "epoch": 0.26986329530270325, "grad_norm": 1.037388563156128, "learning_rate": 8.565710998382325e-06, "loss": 0.7776, "step": 5251 }, { "epoch": 0.2699146880460479, "grad_norm": 1.0881850719451904, "learning_rate": 8.565127522740184e-06, "loss": 0.805, "step": 5252 }, { "epoch": 0.26996608078939255, "grad_norm": 1.0541712045669556, "learning_rate": 8.564543948323073e-06, "loss": 0.79, "step": 5253 }, { "epoch": 0.2700174735327372, "grad_norm": 1.0583751201629639, "learning_rate": 8.563960275147162e-06, "loss": 0.7638, "step": 5254 }, { "epoch": 0.27006886627608184, "grad_norm": 1.1146904230117798, "learning_rate": 8.563376503228618e-06, "loss": 0.7554, "step": 5255 }, { "epoch": 0.27012025901942643, "grad_norm": 1.114192247390747, "learning_rate": 8.562792632583616e-06, "loss": 0.7395, "step": 5256 }, { "epoch": 0.2701716517627711, "grad_norm": 0.8435994982719421, "learning_rate": 8.562208663228334e-06, "loss": 0.7073, "step": 5257 }, { "epoch": 0.27022304450611573, "grad_norm": 1.1012240648269653, "learning_rate": 8.561624595178947e-06, "loss": 0.8356, "step": 5258 }, { "epoch": 0.2702744372494604, "grad_norm": 1.1703585386276245, "learning_rate": 8.561040428451644e-06, "loss": 0.8053, "step": 5259 }, { "epoch": 0.270325829992805, "grad_norm": 1.1016672849655151, "learning_rate": 8.560456163062604e-06, "loss": 0.8081, "step": 5260 }, { "epoch": 0.27037722273614967, "grad_norm": 1.152122139930725, "learning_rate": 8.559871799028017e-06, "loss": 0.7346, "step": 5261 }, { "epoch": 0.2704286154794943, "grad_norm": 1.1270984411239624, "learning_rate": 8.559287336364075e-06, "loss": 0.8063, "step": 5262 }, { "epoch": 0.2704800082228389, "grad_norm": 0.803991436958313, "learning_rate": 8.558702775086969e-06, "loss": 0.7281, "step": 5263 }, { "epoch": 0.27053140096618356, "grad_norm": 0.7978885173797607, "learning_rate": 8.558118115212892e-06, "loss": 0.6921, "step": 5264 }, { "epoch": 0.2705827937095282, "grad_norm": 1.1450138092041016, "learning_rate": 8.557533356758049e-06, "loss": 0.7398, "step": 5265 }, { "epoch": 0.27063418645287285, "grad_norm": 1.167701005935669, "learning_rate": 8.556948499738635e-06, "loss": 0.769, "step": 5266 }, { "epoch": 0.2706855791962175, "grad_norm": 1.3588557243347168, "learning_rate": 8.556363544170859e-06, "loss": 0.6969, "step": 5267 }, { "epoch": 0.27073697193956214, "grad_norm": 1.1220605373382568, "learning_rate": 8.555778490070925e-06, "loss": 0.7478, "step": 5268 }, { "epoch": 0.2707883646829068, "grad_norm": 1.1549495458602905, "learning_rate": 8.555193337455043e-06, "loss": 0.7526, "step": 5269 }, { "epoch": 0.27083975742625144, "grad_norm": 0.8924218416213989, "learning_rate": 8.554608086339425e-06, "loss": 0.6692, "step": 5270 }, { "epoch": 0.27089115016959603, "grad_norm": 1.0983549356460571, "learning_rate": 8.554022736740284e-06, "loss": 0.7464, "step": 5271 }, { "epoch": 0.2709425429129407, "grad_norm": 1.115360975265503, "learning_rate": 8.55343728867384e-06, "loss": 0.752, "step": 5272 }, { "epoch": 0.2709939356562853, "grad_norm": 0.7294501662254333, "learning_rate": 8.552851742156314e-06, "loss": 0.6485, "step": 5273 }, { "epoch": 0.27104532839962997, "grad_norm": 1.089875340461731, "learning_rate": 8.55226609720393e-06, "loss": 0.7347, "step": 5274 }, { "epoch": 0.2710967211429746, "grad_norm": 1.076592206954956, "learning_rate": 8.551680353832908e-06, "loss": 0.789, "step": 5275 }, { "epoch": 0.27114811388631926, "grad_norm": 1.1637295484542847, "learning_rate": 8.551094512059483e-06, "loss": 0.8085, "step": 5276 }, { "epoch": 0.2711995066296639, "grad_norm": 1.0136363506317139, "learning_rate": 8.550508571899882e-06, "loss": 0.744, "step": 5277 }, { "epoch": 0.27125089937300856, "grad_norm": 1.1101983785629272, "learning_rate": 8.549922533370341e-06, "loss": 0.7885, "step": 5278 }, { "epoch": 0.27130229211635315, "grad_norm": 1.0831009149551392, "learning_rate": 8.549336396487096e-06, "loss": 0.8321, "step": 5279 }, { "epoch": 0.2713536848596978, "grad_norm": 1.0593751668930054, "learning_rate": 8.548750161266388e-06, "loss": 0.7146, "step": 5280 }, { "epoch": 0.27140507760304244, "grad_norm": 1.0701123476028442, "learning_rate": 8.548163827724456e-06, "loss": 0.7725, "step": 5281 }, { "epoch": 0.2714564703463871, "grad_norm": 1.0847185850143433, "learning_rate": 8.547577395877548e-06, "loss": 0.8096, "step": 5282 }, { "epoch": 0.27150786308973174, "grad_norm": 1.0586906671524048, "learning_rate": 8.54699086574191e-06, "loss": 0.7954, "step": 5283 }, { "epoch": 0.2715592558330764, "grad_norm": 0.9394407868385315, "learning_rate": 8.546404237333793e-06, "loss": 0.7246, "step": 5284 }, { "epoch": 0.27161064857642103, "grad_norm": 1.0791479349136353, "learning_rate": 8.545817510669449e-06, "loss": 0.7418, "step": 5285 }, { "epoch": 0.2716620413197656, "grad_norm": 1.1013513803482056, "learning_rate": 8.545230685765134e-06, "loss": 0.8055, "step": 5286 }, { "epoch": 0.27171343406311027, "grad_norm": 0.7479370832443237, "learning_rate": 8.544643762637109e-06, "loss": 0.6781, "step": 5287 }, { "epoch": 0.2717648268064549, "grad_norm": 1.0581074953079224, "learning_rate": 8.544056741301632e-06, "loss": 0.8666, "step": 5288 }, { "epoch": 0.27181621954979956, "grad_norm": 1.062928318977356, "learning_rate": 8.543469621774968e-06, "loss": 0.7564, "step": 5289 }, { "epoch": 0.2718676122931442, "grad_norm": 1.145674705505371, "learning_rate": 8.542882404073384e-06, "loss": 0.7747, "step": 5290 }, { "epoch": 0.27191900503648886, "grad_norm": 1.059910535812378, "learning_rate": 8.54229508821315e-06, "loss": 0.8131, "step": 5291 }, { "epoch": 0.2719703977798335, "grad_norm": 1.0476080179214478, "learning_rate": 8.541707674210536e-06, "loss": 0.7652, "step": 5292 }, { "epoch": 0.27202179052317815, "grad_norm": 1.0923998355865479, "learning_rate": 8.541120162081818e-06, "loss": 0.8069, "step": 5293 }, { "epoch": 0.27207318326652274, "grad_norm": 0.9357771277427673, "learning_rate": 8.540532551843274e-06, "loss": 0.6962, "step": 5294 }, { "epoch": 0.2721245760098674, "grad_norm": 1.0638911724090576, "learning_rate": 8.539944843511186e-06, "loss": 0.7443, "step": 5295 }, { "epoch": 0.27217596875321204, "grad_norm": 1.110261082649231, "learning_rate": 8.539357037101833e-06, "loss": 0.775, "step": 5296 }, { "epoch": 0.2722273614965567, "grad_norm": 1.137700080871582, "learning_rate": 8.538769132631501e-06, "loss": 0.7799, "step": 5297 }, { "epoch": 0.27227875423990133, "grad_norm": 1.0177677869796753, "learning_rate": 8.538181130116484e-06, "loss": 0.7313, "step": 5298 }, { "epoch": 0.272330146983246, "grad_norm": 1.0921170711517334, "learning_rate": 8.537593029573066e-06, "loss": 0.8394, "step": 5299 }, { "epoch": 0.2723815397265906, "grad_norm": 1.4290047883987427, "learning_rate": 8.537004831017544e-06, "loss": 0.7282, "step": 5300 }, { "epoch": 0.2724329324699352, "grad_norm": 1.0766286849975586, "learning_rate": 8.536416534466215e-06, "loss": 0.7806, "step": 5301 }, { "epoch": 0.27248432521327987, "grad_norm": 1.1013396978378296, "learning_rate": 8.535828139935378e-06, "loss": 0.7552, "step": 5302 }, { "epoch": 0.2725357179566245, "grad_norm": 0.8312982320785522, "learning_rate": 8.535239647441335e-06, "loss": 0.6766, "step": 5303 }, { "epoch": 0.27258711069996916, "grad_norm": 1.0789040327072144, "learning_rate": 8.53465105700039e-06, "loss": 0.7712, "step": 5304 }, { "epoch": 0.2726385034433138, "grad_norm": 1.102656602859497, "learning_rate": 8.53406236862885e-06, "loss": 0.7403, "step": 5305 }, { "epoch": 0.27268989618665845, "grad_norm": 0.7600048780441284, "learning_rate": 8.533473582343029e-06, "loss": 0.6674, "step": 5306 }, { "epoch": 0.2727412889300031, "grad_norm": 1.037812352180481, "learning_rate": 8.532884698159233e-06, "loss": 0.7506, "step": 5307 }, { "epoch": 0.27279268167334775, "grad_norm": 1.1774437427520752, "learning_rate": 8.532295716093784e-06, "loss": 0.7656, "step": 5308 }, { "epoch": 0.27284407441669234, "grad_norm": 0.7507709860801697, "learning_rate": 8.531706636162997e-06, "loss": 0.7033, "step": 5309 }, { "epoch": 0.272895467160037, "grad_norm": 1.014439344406128, "learning_rate": 8.531117458383194e-06, "loss": 0.7521, "step": 5310 }, { "epoch": 0.27294685990338163, "grad_norm": 0.7373960614204407, "learning_rate": 8.530528182770696e-06, "loss": 0.701, "step": 5311 }, { "epoch": 0.2729982526467263, "grad_norm": 0.798416793346405, "learning_rate": 8.529938809341833e-06, "loss": 0.6688, "step": 5312 }, { "epoch": 0.2730496453900709, "grad_norm": 0.8056088089942932, "learning_rate": 8.529349338112934e-06, "loss": 0.7222, "step": 5313 }, { "epoch": 0.2731010381334156, "grad_norm": 0.8066539168357849, "learning_rate": 8.528759769100329e-06, "loss": 0.6882, "step": 5314 }, { "epoch": 0.2731524308767602, "grad_norm": 0.8207864165306091, "learning_rate": 8.528170102320352e-06, "loss": 0.7567, "step": 5315 }, { "epoch": 0.27320382362010487, "grad_norm": 1.0950533151626587, "learning_rate": 8.527580337789343e-06, "loss": 0.7921, "step": 5316 }, { "epoch": 0.27325521636344946, "grad_norm": 0.7688065767288208, "learning_rate": 8.52699047552364e-06, "loss": 0.6609, "step": 5317 }, { "epoch": 0.2733066091067941, "grad_norm": 1.0789706707000732, "learning_rate": 8.526400515539585e-06, "loss": 0.7268, "step": 5318 }, { "epoch": 0.27335800185013875, "grad_norm": 1.0623565912246704, "learning_rate": 8.525810457853525e-06, "loss": 0.8116, "step": 5319 }, { "epoch": 0.2734093945934834, "grad_norm": 0.8834909796714783, "learning_rate": 8.525220302481807e-06, "loss": 0.6576, "step": 5320 }, { "epoch": 0.27346078733682805, "grad_norm": 1.0651026964187622, "learning_rate": 8.524630049440783e-06, "loss": 0.704, "step": 5321 }, { "epoch": 0.2735121800801727, "grad_norm": 1.0169562101364136, "learning_rate": 8.524039698746804e-06, "loss": 0.6895, "step": 5322 }, { "epoch": 0.27356357282351734, "grad_norm": 1.0979901552200317, "learning_rate": 8.523449250416229e-06, "loss": 0.8008, "step": 5323 }, { "epoch": 0.27361496556686193, "grad_norm": 1.0751806497573853, "learning_rate": 8.522858704465416e-06, "loss": 0.7647, "step": 5324 }, { "epoch": 0.2736663583102066, "grad_norm": 0.8188237547874451, "learning_rate": 8.522268060910726e-06, "loss": 0.7143, "step": 5325 }, { "epoch": 0.27371775105355123, "grad_norm": 1.1386761665344238, "learning_rate": 8.521677319768523e-06, "loss": 0.7909, "step": 5326 }, { "epoch": 0.2737691437968959, "grad_norm": 0.829815149307251, "learning_rate": 8.521086481055175e-06, "loss": 0.7065, "step": 5327 }, { "epoch": 0.2738205365402405, "grad_norm": 1.119255781173706, "learning_rate": 8.52049554478705e-06, "loss": 0.7297, "step": 5328 }, { "epoch": 0.27387192928358517, "grad_norm": 1.049949288368225, "learning_rate": 8.519904510980524e-06, "loss": 0.789, "step": 5329 }, { "epoch": 0.2739233220269298, "grad_norm": 0.7242515683174133, "learning_rate": 8.519313379651968e-06, "loss": 0.6972, "step": 5330 }, { "epoch": 0.27397471477027446, "grad_norm": 1.0899670124053955, "learning_rate": 8.518722150817762e-06, "loss": 0.7836, "step": 5331 }, { "epoch": 0.27402610751361905, "grad_norm": 1.2884798049926758, "learning_rate": 8.518130824494286e-06, "loss": 0.7318, "step": 5332 }, { "epoch": 0.2740775002569637, "grad_norm": 0.7555671334266663, "learning_rate": 8.517539400697924e-06, "loss": 0.7017, "step": 5333 }, { "epoch": 0.27412889300030835, "grad_norm": 1.0647447109222412, "learning_rate": 8.516947879445061e-06, "loss": 0.7661, "step": 5334 }, { "epoch": 0.274180285743653, "grad_norm": 1.06218421459198, "learning_rate": 8.516356260752086e-06, "loss": 0.7841, "step": 5335 }, { "epoch": 0.27423167848699764, "grad_norm": 0.7147724032402039, "learning_rate": 8.515764544635389e-06, "loss": 0.7299, "step": 5336 }, { "epoch": 0.2742830712303423, "grad_norm": 0.7570116519927979, "learning_rate": 8.515172731111367e-06, "loss": 0.7034, "step": 5337 }, { "epoch": 0.27433446397368694, "grad_norm": 1.0586297512054443, "learning_rate": 8.514580820196414e-06, "loss": 0.7234, "step": 5338 }, { "epoch": 0.27438585671703153, "grad_norm": 0.7304074764251709, "learning_rate": 8.51398881190693e-06, "loss": 0.6942, "step": 5339 }, { "epoch": 0.2744372494603762, "grad_norm": 0.9326439499855042, "learning_rate": 8.513396706259319e-06, "loss": 0.7073, "step": 5340 }, { "epoch": 0.2744886422037208, "grad_norm": 1.1090595722198486, "learning_rate": 8.51280450326998e-06, "loss": 0.7647, "step": 5341 }, { "epoch": 0.27454003494706547, "grad_norm": 1.241669774055481, "learning_rate": 8.512212202955329e-06, "loss": 0.827, "step": 5342 }, { "epoch": 0.2745914276904101, "grad_norm": 1.0170283317565918, "learning_rate": 8.51161980533177e-06, "loss": 0.8398, "step": 5343 }, { "epoch": 0.27464282043375476, "grad_norm": 1.3315719366073608, "learning_rate": 8.511027310415718e-06, "loss": 0.8119, "step": 5344 }, { "epoch": 0.2746942131770994, "grad_norm": 0.9917698502540588, "learning_rate": 8.51043471822359e-06, "loss": 0.7266, "step": 5345 }, { "epoch": 0.27474560592044406, "grad_norm": 0.7620207071304321, "learning_rate": 8.5098420287718e-06, "loss": 0.6947, "step": 5346 }, { "epoch": 0.27479699866378865, "grad_norm": 1.072868824005127, "learning_rate": 8.509249242076774e-06, "loss": 0.7461, "step": 5347 }, { "epoch": 0.2748483914071333, "grad_norm": 1.074194073677063, "learning_rate": 8.508656358154932e-06, "loss": 0.7864, "step": 5348 }, { "epoch": 0.27489978415047794, "grad_norm": 1.122197151184082, "learning_rate": 8.5080633770227e-06, "loss": 0.8152, "step": 5349 }, { "epoch": 0.2749511768938226, "grad_norm": 1.094781517982483, "learning_rate": 8.507470298696512e-06, "loss": 0.849, "step": 5350 }, { "epoch": 0.27500256963716724, "grad_norm": 1.1370644569396973, "learning_rate": 8.506877123192796e-06, "loss": 0.7829, "step": 5351 }, { "epoch": 0.2750539623805119, "grad_norm": 1.0682247877120972, "learning_rate": 8.506283850527985e-06, "loss": 0.7511, "step": 5352 }, { "epoch": 0.27510535512385653, "grad_norm": 0.916619598865509, "learning_rate": 8.505690480718521e-06, "loss": 0.6929, "step": 5353 }, { "epoch": 0.2751567478672011, "grad_norm": 0.8062255382537842, "learning_rate": 8.505097013780837e-06, "loss": 0.6804, "step": 5354 }, { "epoch": 0.27520814061054577, "grad_norm": 1.1527599096298218, "learning_rate": 8.504503449731382e-06, "loss": 0.8282, "step": 5355 }, { "epoch": 0.2752595333538904, "grad_norm": 1.1587036848068237, "learning_rate": 8.503909788586598e-06, "loss": 0.7733, "step": 5356 }, { "epoch": 0.27531092609723506, "grad_norm": 1.1360194683074951, "learning_rate": 8.503316030362934e-06, "loss": 0.789, "step": 5357 }, { "epoch": 0.2753623188405797, "grad_norm": 1.1783379316329956, "learning_rate": 8.50272217507684e-06, "loss": 0.7551, "step": 5358 }, { "epoch": 0.27541371158392436, "grad_norm": 1.0723165273666382, "learning_rate": 8.50212822274477e-06, "loss": 0.7856, "step": 5359 }, { "epoch": 0.275465104327269, "grad_norm": 1.0904268026351929, "learning_rate": 8.501534173383178e-06, "loss": 0.7922, "step": 5360 }, { "epoch": 0.27551649707061365, "grad_norm": 1.19148588180542, "learning_rate": 8.500940027008524e-06, "loss": 0.7012, "step": 5361 }, { "epoch": 0.27556788981395824, "grad_norm": 1.1049919128417969, "learning_rate": 8.50034578363727e-06, "loss": 0.7397, "step": 5362 }, { "epoch": 0.2756192825573029, "grad_norm": 0.9565017819404602, "learning_rate": 8.49975144328588e-06, "loss": 0.6937, "step": 5363 }, { "epoch": 0.27567067530064754, "grad_norm": 0.8546234965324402, "learning_rate": 8.499157005970819e-06, "loss": 0.7046, "step": 5364 }, { "epoch": 0.2757220680439922, "grad_norm": 0.7992849349975586, "learning_rate": 8.498562471708558e-06, "loss": 0.7107, "step": 5365 }, { "epoch": 0.27577346078733683, "grad_norm": 1.1289910078048706, "learning_rate": 8.49796784051557e-06, "loss": 0.7702, "step": 5366 }, { "epoch": 0.2758248535306815, "grad_norm": 1.22629714012146, "learning_rate": 8.497373112408327e-06, "loss": 0.8207, "step": 5367 }, { "epoch": 0.2758762462740261, "grad_norm": 0.8818389177322388, "learning_rate": 8.496778287403308e-06, "loss": 0.7082, "step": 5368 }, { "epoch": 0.2759276390173708, "grad_norm": 1.2009376287460327, "learning_rate": 8.496183365516992e-06, "loss": 0.8156, "step": 5369 }, { "epoch": 0.27597903176071537, "grad_norm": 1.110411524772644, "learning_rate": 8.495588346765864e-06, "loss": 0.7846, "step": 5370 }, { "epoch": 0.27603042450406, "grad_norm": 1.100922703742981, "learning_rate": 8.494993231166408e-06, "loss": 0.8622, "step": 5371 }, { "epoch": 0.27608181724740466, "grad_norm": 1.1176868677139282, "learning_rate": 8.494398018735113e-06, "loss": 0.7837, "step": 5372 }, { "epoch": 0.2761332099907493, "grad_norm": 1.2481036186218262, "learning_rate": 8.49380270948847e-06, "loss": 0.7691, "step": 5373 }, { "epoch": 0.27618460273409395, "grad_norm": 1.0315886735916138, "learning_rate": 8.493207303442971e-06, "loss": 0.7736, "step": 5374 }, { "epoch": 0.2762359954774386, "grad_norm": 0.9144653081893921, "learning_rate": 8.492611800615114e-06, "loss": 0.733, "step": 5375 }, { "epoch": 0.27628738822078325, "grad_norm": 0.8169595003128052, "learning_rate": 8.492016201021396e-06, "loss": 0.6648, "step": 5376 }, { "epoch": 0.27633878096412784, "grad_norm": 1.0692890882492065, "learning_rate": 8.49142050467832e-06, "loss": 0.7419, "step": 5377 }, { "epoch": 0.2763901737074725, "grad_norm": 1.1379425525665283, "learning_rate": 8.49082471160239e-06, "loss": 0.7409, "step": 5378 }, { "epoch": 0.27644156645081713, "grad_norm": 0.9054638147354126, "learning_rate": 8.490228821810114e-06, "loss": 0.707, "step": 5379 }, { "epoch": 0.2764929591941618, "grad_norm": 1.0645204782485962, "learning_rate": 8.489632835318e-06, "loss": 0.7533, "step": 5380 }, { "epoch": 0.2765443519375064, "grad_norm": 0.7317367792129517, "learning_rate": 8.489036752142561e-06, "loss": 0.7131, "step": 5381 }, { "epoch": 0.2765957446808511, "grad_norm": 0.967462956905365, "learning_rate": 8.488440572300312e-06, "loss": 0.6899, "step": 5382 }, { "epoch": 0.2766471374241957, "grad_norm": 1.082800030708313, "learning_rate": 8.48784429580777e-06, "loss": 0.7569, "step": 5383 }, { "epoch": 0.27669853016754037, "grad_norm": 0.7163935899734497, "learning_rate": 8.487247922681459e-06, "loss": 0.6803, "step": 5384 }, { "epoch": 0.27674992291088496, "grad_norm": 1.061950445175171, "learning_rate": 8.486651452937896e-06, "loss": 0.7694, "step": 5385 }, { "epoch": 0.2768013156542296, "grad_norm": 1.1027023792266846, "learning_rate": 8.486054886593612e-06, "loss": 0.7393, "step": 5386 }, { "epoch": 0.27685270839757425, "grad_norm": 1.1072064638137817, "learning_rate": 8.48545822366513e-06, "loss": 0.788, "step": 5387 }, { "epoch": 0.2769041011409189, "grad_norm": 1.1185179948806763, "learning_rate": 8.484861464168987e-06, "loss": 0.8333, "step": 5388 }, { "epoch": 0.27695549388426355, "grad_norm": 1.0440915822982788, "learning_rate": 8.484264608121713e-06, "loss": 0.7648, "step": 5389 }, { "epoch": 0.2770068866276082, "grad_norm": 1.0296357870101929, "learning_rate": 8.483667655539846e-06, "loss": 0.7531, "step": 5390 }, { "epoch": 0.27705827937095284, "grad_norm": 1.048190951347351, "learning_rate": 8.483070606439923e-06, "loss": 0.8223, "step": 5391 }, { "epoch": 0.27710967211429743, "grad_norm": 0.9317312836647034, "learning_rate": 8.48247346083849e-06, "loss": 0.6566, "step": 5392 }, { "epoch": 0.2771610648576421, "grad_norm": 0.7316219210624695, "learning_rate": 8.481876218752085e-06, "loss": 0.6595, "step": 5393 }, { "epoch": 0.27721245760098673, "grad_norm": 1.1095342636108398, "learning_rate": 8.481278880197261e-06, "loss": 0.771, "step": 5394 }, { "epoch": 0.2772638503443314, "grad_norm": 1.0935989618301392, "learning_rate": 8.480681445190566e-06, "loss": 0.7435, "step": 5395 }, { "epoch": 0.277315243087676, "grad_norm": 0.7224438190460205, "learning_rate": 8.480083913748551e-06, "loss": 0.708, "step": 5396 }, { "epoch": 0.27736663583102067, "grad_norm": 0.7135837078094482, "learning_rate": 8.479486285887774e-06, "loss": 0.6797, "step": 5397 }, { "epoch": 0.2774180285743653, "grad_norm": 1.2090249061584473, "learning_rate": 8.478888561624789e-06, "loss": 0.8155, "step": 5398 }, { "epoch": 0.27746942131770996, "grad_norm": 1.0543020963668823, "learning_rate": 8.478290740976161e-06, "loss": 0.7858, "step": 5399 }, { "epoch": 0.27752081406105455, "grad_norm": 1.0511952638626099, "learning_rate": 8.477692823958448e-06, "loss": 0.7264, "step": 5400 }, { "epoch": 0.2775722068043992, "grad_norm": 1.1163196563720703, "learning_rate": 8.477094810588219e-06, "loss": 0.7905, "step": 5401 }, { "epoch": 0.27762359954774385, "grad_norm": 1.0773780345916748, "learning_rate": 8.476496700882042e-06, "loss": 0.8054, "step": 5402 }, { "epoch": 0.2776749922910885, "grad_norm": 1.1471275091171265, "learning_rate": 8.47589849485649e-06, "loss": 0.798, "step": 5403 }, { "epoch": 0.27772638503443314, "grad_norm": 1.1065164804458618, "learning_rate": 8.475300192528132e-06, "loss": 0.8049, "step": 5404 }, { "epoch": 0.2777777777777778, "grad_norm": 0.7866690754890442, "learning_rate": 8.47470179391355e-06, "loss": 0.6966, "step": 5405 }, { "epoch": 0.27782917052112244, "grad_norm": 1.0428229570388794, "learning_rate": 8.47410329902932e-06, "loss": 0.7944, "step": 5406 }, { "epoch": 0.2778805632644671, "grad_norm": 1.0897939205169678, "learning_rate": 8.473504707892021e-06, "loss": 0.8426, "step": 5407 }, { "epoch": 0.2779319560078117, "grad_norm": 1.0835639238357544, "learning_rate": 8.472906020518243e-06, "loss": 0.7974, "step": 5408 }, { "epoch": 0.2779833487511563, "grad_norm": 1.0952891111373901, "learning_rate": 8.472307236924573e-06, "loss": 0.7558, "step": 5409 }, { "epoch": 0.27803474149450097, "grad_norm": 1.1048285961151123, "learning_rate": 8.471708357127597e-06, "loss": 0.7846, "step": 5410 }, { "epoch": 0.2780861342378456, "grad_norm": 0.75752192735672, "learning_rate": 8.47110938114391e-06, "loss": 0.7368, "step": 5411 }, { "epoch": 0.27813752698119026, "grad_norm": 1.2553702592849731, "learning_rate": 8.470510308990105e-06, "loss": 0.7968, "step": 5412 }, { "epoch": 0.2781889197245349, "grad_norm": 1.0701875686645508, "learning_rate": 8.469911140682782e-06, "loss": 0.8207, "step": 5413 }, { "epoch": 0.27824031246787956, "grad_norm": 1.1648060083389282, "learning_rate": 8.469311876238542e-06, "loss": 0.8424, "step": 5414 }, { "epoch": 0.27829170521122415, "grad_norm": 1.2138426303863525, "learning_rate": 8.468712515673985e-06, "loss": 0.7975, "step": 5415 }, { "epoch": 0.2783430979545688, "grad_norm": 1.0524147748947144, "learning_rate": 8.46811305900572e-06, "loss": 0.7646, "step": 5416 }, { "epoch": 0.27839449069791344, "grad_norm": 0.7821248769760132, "learning_rate": 8.467513506250354e-06, "loss": 0.7003, "step": 5417 }, { "epoch": 0.2784458834412581, "grad_norm": 1.1876329183578491, "learning_rate": 8.466913857424499e-06, "loss": 0.8045, "step": 5418 }, { "epoch": 0.27849727618460274, "grad_norm": 1.0438202619552612, "learning_rate": 8.466314112544767e-06, "loss": 0.7365, "step": 5419 }, { "epoch": 0.2785486689279474, "grad_norm": 1.0742578506469727, "learning_rate": 8.465714271627777e-06, "loss": 0.7974, "step": 5420 }, { "epoch": 0.27860006167129203, "grad_norm": 0.7598520517349243, "learning_rate": 8.465114334690146e-06, "loss": 0.6677, "step": 5421 }, { "epoch": 0.2786514544146367, "grad_norm": 1.092868447303772, "learning_rate": 8.464514301748496e-06, "loss": 0.7995, "step": 5422 }, { "epoch": 0.27870284715798127, "grad_norm": 1.0680253505706787, "learning_rate": 8.463914172819452e-06, "loss": 0.7613, "step": 5423 }, { "epoch": 0.2787542399013259, "grad_norm": 1.1098958253860474, "learning_rate": 8.46331394791964e-06, "loss": 0.7286, "step": 5424 }, { "epoch": 0.27880563264467056, "grad_norm": 1.123618483543396, "learning_rate": 8.46271362706569e-06, "loss": 0.7662, "step": 5425 }, { "epoch": 0.2788570253880152, "grad_norm": 1.1878520250320435, "learning_rate": 8.462113210274239e-06, "loss": 0.8002, "step": 5426 }, { "epoch": 0.27890841813135986, "grad_norm": 1.0876141786575317, "learning_rate": 8.461512697561915e-06, "loss": 0.7332, "step": 5427 }, { "epoch": 0.2789598108747045, "grad_norm": 0.7556460499763489, "learning_rate": 8.460912088945361e-06, "loss": 0.7154, "step": 5428 }, { "epoch": 0.27901120361804915, "grad_norm": 0.780170738697052, "learning_rate": 8.460311384441215e-06, "loss": 0.7298, "step": 5429 }, { "epoch": 0.27906259636139374, "grad_norm": 1.0849303007125854, "learning_rate": 8.45971058406612e-06, "loss": 0.7705, "step": 5430 }, { "epoch": 0.2791139891047384, "grad_norm": 0.7247140407562256, "learning_rate": 8.459109687836721e-06, "loss": 0.6882, "step": 5431 }, { "epoch": 0.27916538184808304, "grad_norm": 0.7308477163314819, "learning_rate": 8.458508695769669e-06, "loss": 0.7294, "step": 5432 }, { "epoch": 0.2792167745914277, "grad_norm": 0.800254225730896, "learning_rate": 8.457907607881612e-06, "loss": 0.6964, "step": 5433 }, { "epoch": 0.27926816733477233, "grad_norm": 1.088930606842041, "learning_rate": 8.457306424189207e-06, "loss": 0.6773, "step": 5434 }, { "epoch": 0.279319560078117, "grad_norm": 1.1602481603622437, "learning_rate": 8.456705144709108e-06, "loss": 0.7528, "step": 5435 }, { "epoch": 0.2793709528214616, "grad_norm": 1.5446277856826782, "learning_rate": 8.456103769457974e-06, "loss": 0.7796, "step": 5436 }, { "epoch": 0.2794223455648063, "grad_norm": 1.1176252365112305, "learning_rate": 8.455502298452467e-06, "loss": 0.7225, "step": 5437 }, { "epoch": 0.27947373830815087, "grad_norm": 1.0958378314971924, "learning_rate": 8.45490073170925e-06, "loss": 0.803, "step": 5438 }, { "epoch": 0.2795251310514955, "grad_norm": 0.7505422234535217, "learning_rate": 8.454299069244993e-06, "loss": 0.7058, "step": 5439 }, { "epoch": 0.27957652379484016, "grad_norm": 0.8660080432891846, "learning_rate": 8.453697311076364e-06, "loss": 0.6874, "step": 5440 }, { "epoch": 0.2796279165381848, "grad_norm": 1.0632009506225586, "learning_rate": 8.453095457220033e-06, "loss": 0.7674, "step": 5441 }, { "epoch": 0.27967930928152945, "grad_norm": 1.084378719329834, "learning_rate": 8.45249350769268e-06, "loss": 0.7459, "step": 5442 }, { "epoch": 0.2797307020248741, "grad_norm": 0.7546572685241699, "learning_rate": 8.451891462510977e-06, "loss": 0.7398, "step": 5443 }, { "epoch": 0.27978209476821875, "grad_norm": 1.0199382305145264, "learning_rate": 8.451289321691609e-06, "loss": 0.801, "step": 5444 }, { "epoch": 0.2798334875115634, "grad_norm": 1.1750742197036743, "learning_rate": 8.450687085251255e-06, "loss": 0.8125, "step": 5445 }, { "epoch": 0.279884880254908, "grad_norm": 0.7520389556884766, "learning_rate": 8.450084753206601e-06, "loss": 0.7157, "step": 5446 }, { "epoch": 0.27993627299825263, "grad_norm": 0.850633442401886, "learning_rate": 8.449482325574339e-06, "loss": 0.6856, "step": 5447 }, { "epoch": 0.2799876657415973, "grad_norm": 1.1524409055709839, "learning_rate": 8.448879802371155e-06, "loss": 0.7676, "step": 5448 }, { "epoch": 0.2800390584849419, "grad_norm": 1.1859307289123535, "learning_rate": 8.448277183613743e-06, "loss": 0.7898, "step": 5449 }, { "epoch": 0.2800904512282866, "grad_norm": 0.8038631677627563, "learning_rate": 8.447674469318802e-06, "loss": 0.7195, "step": 5450 }, { "epoch": 0.2801418439716312, "grad_norm": 1.1111232042312622, "learning_rate": 8.44707165950303e-06, "loss": 0.7946, "step": 5451 }, { "epoch": 0.28019323671497587, "grad_norm": 0.7855945229530334, "learning_rate": 8.446468754183125e-06, "loss": 0.6936, "step": 5452 }, { "epoch": 0.28024462945832046, "grad_norm": 1.0613653659820557, "learning_rate": 8.445865753375797e-06, "loss": 0.7517, "step": 5453 }, { "epoch": 0.2802960222016651, "grad_norm": 1.114047646522522, "learning_rate": 8.445262657097748e-06, "loss": 0.7919, "step": 5454 }, { "epoch": 0.28034741494500975, "grad_norm": 1.1228047609329224, "learning_rate": 8.444659465365688e-06, "loss": 0.8, "step": 5455 }, { "epoch": 0.2803988076883544, "grad_norm": 1.032792568206787, "learning_rate": 8.444056178196329e-06, "loss": 0.7221, "step": 5456 }, { "epoch": 0.28045020043169905, "grad_norm": 1.1268510818481445, "learning_rate": 8.443452795606385e-06, "loss": 0.7749, "step": 5457 }, { "epoch": 0.2805015931750437, "grad_norm": 1.1430174112319946, "learning_rate": 8.442849317612578e-06, "loss": 0.7949, "step": 5458 }, { "epoch": 0.28055298591838834, "grad_norm": 1.0804526805877686, "learning_rate": 8.442245744231621e-06, "loss": 0.8009, "step": 5459 }, { "epoch": 0.280604378661733, "grad_norm": 0.8016070127487183, "learning_rate": 8.441642075480241e-06, "loss": 0.7135, "step": 5460 }, { "epoch": 0.2806557714050776, "grad_norm": 1.0725575685501099, "learning_rate": 8.441038311375163e-06, "loss": 0.7327, "step": 5461 }, { "epoch": 0.28070716414842223, "grad_norm": 1.0554770231246948, "learning_rate": 8.440434451933112e-06, "loss": 0.804, "step": 5462 }, { "epoch": 0.2807585568917669, "grad_norm": 1.995858907699585, "learning_rate": 8.43983049717082e-06, "loss": 0.7411, "step": 5463 }, { "epoch": 0.2808099496351115, "grad_norm": 1.1054331064224243, "learning_rate": 8.439226447105021e-06, "loss": 0.8082, "step": 5464 }, { "epoch": 0.28086134237845617, "grad_norm": 1.0908995866775513, "learning_rate": 8.438622301752451e-06, "loss": 0.774, "step": 5465 }, { "epoch": 0.2809127351218008, "grad_norm": 1.0509769916534424, "learning_rate": 8.438018061129846e-06, "loss": 0.7433, "step": 5466 }, { "epoch": 0.28096412786514546, "grad_norm": 0.8322144150733948, "learning_rate": 8.437413725253949e-06, "loss": 0.6506, "step": 5467 }, { "epoch": 0.28101552060849005, "grad_norm": 1.1251063346862793, "learning_rate": 8.436809294141503e-06, "loss": 0.7111, "step": 5468 }, { "epoch": 0.2810669133518347, "grad_norm": 1.106492519378662, "learning_rate": 8.436204767809254e-06, "loss": 0.8089, "step": 5469 }, { "epoch": 0.28111830609517935, "grad_norm": 0.7839671969413757, "learning_rate": 8.435600146273953e-06, "loss": 0.6875, "step": 5470 }, { "epoch": 0.281169698838524, "grad_norm": 1.10338294506073, "learning_rate": 8.434995429552347e-06, "loss": 0.7929, "step": 5471 }, { "epoch": 0.28122109158186864, "grad_norm": 0.8638939261436462, "learning_rate": 8.434390617661195e-06, "loss": 0.6829, "step": 5472 }, { "epoch": 0.2812724843252133, "grad_norm": 0.6777721643447876, "learning_rate": 8.433785710617249e-06, "loss": 0.6698, "step": 5473 }, { "epoch": 0.28132387706855794, "grad_norm": 1.1163380146026611, "learning_rate": 8.433180708437274e-06, "loss": 0.7898, "step": 5474 }, { "epoch": 0.2813752698119026, "grad_norm": 0.8966031670570374, "learning_rate": 8.43257561113803e-06, "loss": 0.6901, "step": 5475 }, { "epoch": 0.2814266625552472, "grad_norm": 1.3003183603286743, "learning_rate": 8.43197041873628e-06, "loss": 0.751, "step": 5476 }, { "epoch": 0.2814780552985918, "grad_norm": 1.0738472938537598, "learning_rate": 8.431365131248791e-06, "loss": 0.7405, "step": 5477 }, { "epoch": 0.28152944804193647, "grad_norm": 1.0920639038085938, "learning_rate": 8.430759748692336e-06, "loss": 0.773, "step": 5478 }, { "epoch": 0.2815808407852811, "grad_norm": 1.0732194185256958, "learning_rate": 8.430154271083688e-06, "loss": 0.8153, "step": 5479 }, { "epoch": 0.28163223352862576, "grad_norm": 1.2458409070968628, "learning_rate": 8.42954869843962e-06, "loss": 0.7318, "step": 5480 }, { "epoch": 0.2816836262719704, "grad_norm": 1.157037377357483, "learning_rate": 8.428943030776907e-06, "loss": 0.8012, "step": 5481 }, { "epoch": 0.28173501901531506, "grad_norm": 1.1143583059310913, "learning_rate": 8.428337268112338e-06, "loss": 0.7583, "step": 5482 }, { "epoch": 0.28178641175865965, "grad_norm": 1.1703667640686035, "learning_rate": 8.42773141046269e-06, "loss": 0.7513, "step": 5483 }, { "epoch": 0.2818378045020043, "grad_norm": 1.0678144693374634, "learning_rate": 8.427125457844746e-06, "loss": 0.7577, "step": 5484 }, { "epoch": 0.28188919724534894, "grad_norm": 0.7678773999214172, "learning_rate": 8.426519410275304e-06, "loss": 0.6921, "step": 5485 }, { "epoch": 0.2819405899886936, "grad_norm": 1.1912566423416138, "learning_rate": 8.425913267771146e-06, "loss": 0.8636, "step": 5486 }, { "epoch": 0.28199198273203824, "grad_norm": 1.189429759979248, "learning_rate": 8.42530703034907e-06, "loss": 0.7913, "step": 5487 }, { "epoch": 0.2820433754753829, "grad_norm": 1.1005582809448242, "learning_rate": 8.424700698025873e-06, "loss": 0.8107, "step": 5488 }, { "epoch": 0.28209476821872753, "grad_norm": 1.049407720565796, "learning_rate": 8.42409427081835e-06, "loss": 0.7916, "step": 5489 }, { "epoch": 0.2821461609620722, "grad_norm": 1.205552577972412, "learning_rate": 8.423487748743306e-06, "loss": 0.7698, "step": 5490 }, { "epoch": 0.28219755370541677, "grad_norm": 1.2440963983535767, "learning_rate": 8.422881131817546e-06, "loss": 0.7554, "step": 5491 }, { "epoch": 0.2822489464487614, "grad_norm": 0.7267575860023499, "learning_rate": 8.422274420057875e-06, "loss": 0.7106, "step": 5492 }, { "epoch": 0.28230033919210606, "grad_norm": 1.0136288404464722, "learning_rate": 8.421667613481102e-06, "loss": 0.7958, "step": 5493 }, { "epoch": 0.2823517319354507, "grad_norm": 1.067198395729065, "learning_rate": 8.421060712104038e-06, "loss": 0.7648, "step": 5494 }, { "epoch": 0.28240312467879536, "grad_norm": 1.1421473026275635, "learning_rate": 8.420453715943502e-06, "loss": 0.7602, "step": 5495 }, { "epoch": 0.28245451742214, "grad_norm": 1.1569751501083374, "learning_rate": 8.419846625016307e-06, "loss": 0.7703, "step": 5496 }, { "epoch": 0.28250591016548465, "grad_norm": 0.794883668422699, "learning_rate": 8.419239439339277e-06, "loss": 0.7213, "step": 5497 }, { "epoch": 0.2825573029088293, "grad_norm": 1.2750657796859741, "learning_rate": 8.418632158929233e-06, "loss": 0.7643, "step": 5498 }, { "epoch": 0.2826086956521739, "grad_norm": 1.0822252035140991, "learning_rate": 8.418024783802999e-06, "loss": 0.7712, "step": 5499 }, { "epoch": 0.28266008839551854, "grad_norm": 1.223413109779358, "learning_rate": 8.417417313977402e-06, "loss": 0.7898, "step": 5500 }, { "epoch": 0.2827114811388632, "grad_norm": 1.0950260162353516, "learning_rate": 8.416809749469275e-06, "loss": 0.7906, "step": 5501 }, { "epoch": 0.28276287388220783, "grad_norm": 1.1743396520614624, "learning_rate": 8.416202090295448e-06, "loss": 0.8077, "step": 5502 }, { "epoch": 0.2828142666255525, "grad_norm": 1.1662472486495972, "learning_rate": 8.415594336472764e-06, "loss": 0.8005, "step": 5503 }, { "epoch": 0.2828656593688971, "grad_norm": 0.7794203162193298, "learning_rate": 8.414986488018053e-06, "loss": 0.6802, "step": 5504 }, { "epoch": 0.2829170521122418, "grad_norm": 1.1030915975570679, "learning_rate": 8.414378544948159e-06, "loss": 0.7047, "step": 5505 }, { "epoch": 0.28296844485558637, "grad_norm": 1.120401382446289, "learning_rate": 8.413770507279926e-06, "loss": 0.7845, "step": 5506 }, { "epoch": 0.283019837598931, "grad_norm": 1.0885928869247437, "learning_rate": 8.413162375030202e-06, "loss": 0.7565, "step": 5507 }, { "epoch": 0.28307123034227566, "grad_norm": 1.0733895301818848, "learning_rate": 8.41255414821583e-06, "loss": 0.726, "step": 5508 }, { "epoch": 0.2831226230856203, "grad_norm": 1.1310372352600098, "learning_rate": 8.41194582685367e-06, "loss": 0.7094, "step": 5509 }, { "epoch": 0.28317401582896495, "grad_norm": 1.1021548509597778, "learning_rate": 8.411337410960567e-06, "loss": 0.7621, "step": 5510 }, { "epoch": 0.2832254085723096, "grad_norm": 0.7907571792602539, "learning_rate": 8.410728900553384e-06, "loss": 0.7, "step": 5511 }, { "epoch": 0.28327680131565425, "grad_norm": 0.8956877589225769, "learning_rate": 8.41012029564898e-06, "loss": 0.6343, "step": 5512 }, { "epoch": 0.2833281940589989, "grad_norm": 0.7475018501281738, "learning_rate": 8.409511596264213e-06, "loss": 0.6756, "step": 5513 }, { "epoch": 0.2833795868023435, "grad_norm": 1.1505030393600464, "learning_rate": 8.408902802415951e-06, "loss": 0.7693, "step": 5514 }, { "epoch": 0.28343097954568813, "grad_norm": 1.1221455335617065, "learning_rate": 8.40829391412106e-06, "loss": 0.7891, "step": 5515 }, { "epoch": 0.2834823722890328, "grad_norm": 1.0632977485656738, "learning_rate": 8.40768493139641e-06, "loss": 0.746, "step": 5516 }, { "epoch": 0.2835337650323774, "grad_norm": 1.0695126056671143, "learning_rate": 8.407075854258873e-06, "loss": 0.7757, "step": 5517 }, { "epoch": 0.2835851577757221, "grad_norm": 0.7279176115989685, "learning_rate": 8.406466682725324e-06, "loss": 0.6869, "step": 5518 }, { "epoch": 0.2836365505190667, "grad_norm": 1.1311094760894775, "learning_rate": 8.40585741681264e-06, "loss": 0.8046, "step": 5519 }, { "epoch": 0.28368794326241137, "grad_norm": 1.1183536052703857, "learning_rate": 8.405248056537704e-06, "loss": 0.7964, "step": 5520 }, { "epoch": 0.28373933600575596, "grad_norm": 1.1579886674880981, "learning_rate": 8.404638601917396e-06, "loss": 0.7759, "step": 5521 }, { "epoch": 0.2837907287491006, "grad_norm": 1.131960153579712, "learning_rate": 8.404029052968603e-06, "loss": 0.7978, "step": 5522 }, { "epoch": 0.28384212149244525, "grad_norm": 0.9828609824180603, "learning_rate": 8.403419409708214e-06, "loss": 0.6789, "step": 5523 }, { "epoch": 0.2838935142357899, "grad_norm": 1.1015490293502808, "learning_rate": 8.402809672153115e-06, "loss": 0.8088, "step": 5524 }, { "epoch": 0.28394490697913455, "grad_norm": 1.073251485824585, "learning_rate": 8.402199840320204e-06, "loss": 0.7815, "step": 5525 }, { "epoch": 0.2839962997224792, "grad_norm": 0.7691653966903687, "learning_rate": 8.401589914226376e-06, "loss": 0.6764, "step": 5526 }, { "epoch": 0.28404769246582384, "grad_norm": 6.130173206329346, "learning_rate": 8.400979893888529e-06, "loss": 0.8631, "step": 5527 }, { "epoch": 0.2840990852091685, "grad_norm": 0.7439635992050171, "learning_rate": 8.400369779323563e-06, "loss": 0.6851, "step": 5528 }, { "epoch": 0.2841504779525131, "grad_norm": 1.007220983505249, "learning_rate": 8.399759570548383e-06, "loss": 0.7609, "step": 5529 }, { "epoch": 0.28420187069585773, "grad_norm": 1.109399437904358, "learning_rate": 8.399149267579896e-06, "loss": 0.8075, "step": 5530 }, { "epoch": 0.2842532634392024, "grad_norm": 1.1456400156021118, "learning_rate": 8.39853887043501e-06, "loss": 0.7849, "step": 5531 }, { "epoch": 0.284304656182547, "grad_norm": 0.851325511932373, "learning_rate": 8.397928379130637e-06, "loss": 0.6856, "step": 5532 }, { "epoch": 0.28435604892589167, "grad_norm": 1.0858800411224365, "learning_rate": 8.39731779368369e-06, "loss": 0.7338, "step": 5533 }, { "epoch": 0.2844074416692363, "grad_norm": 1.124840497970581, "learning_rate": 8.396707114111089e-06, "loss": 0.8116, "step": 5534 }, { "epoch": 0.28445883441258096, "grad_norm": 1.015531301498413, "learning_rate": 8.39609634042975e-06, "loss": 0.6974, "step": 5535 }, { "epoch": 0.2845102271559256, "grad_norm": 1.022371530532837, "learning_rate": 8.395485472656596e-06, "loss": 0.7132, "step": 5536 }, { "epoch": 0.2845616198992702, "grad_norm": 1.0348109006881714, "learning_rate": 8.394874510808552e-06, "loss": 0.76, "step": 5537 }, { "epoch": 0.28461301264261485, "grad_norm": 0.8034092783927917, "learning_rate": 8.394263454902545e-06, "loss": 0.7276, "step": 5538 }, { "epoch": 0.2846644053859595, "grad_norm": 1.0559484958648682, "learning_rate": 8.393652304955506e-06, "loss": 0.815, "step": 5539 }, { "epoch": 0.28471579812930414, "grad_norm": 1.0767971277236938, "learning_rate": 8.393041060984366e-06, "loss": 0.7517, "step": 5540 }, { "epoch": 0.2847671908726488, "grad_norm": 1.0510344505310059, "learning_rate": 8.392429723006059e-06, "loss": 0.7939, "step": 5541 }, { "epoch": 0.28481858361599344, "grad_norm": 1.095042109489441, "learning_rate": 8.391818291037526e-06, "loss": 0.7789, "step": 5542 }, { "epoch": 0.2848699763593381, "grad_norm": 1.271406650543213, "learning_rate": 8.391206765095705e-06, "loss": 0.7416, "step": 5543 }, { "epoch": 0.2849213691026827, "grad_norm": 1.087684988975525, "learning_rate": 8.39059514519754e-06, "loss": 0.7577, "step": 5544 }, { "epoch": 0.2849727618460273, "grad_norm": 0.8637363910675049, "learning_rate": 8.389983431359973e-06, "loss": 0.6961, "step": 5545 }, { "epoch": 0.28502415458937197, "grad_norm": 0.8796000480651855, "learning_rate": 8.389371623599956e-06, "loss": 0.6626, "step": 5546 }, { "epoch": 0.2850755473327166, "grad_norm": 1.0837634801864624, "learning_rate": 8.388759721934439e-06, "loss": 0.7923, "step": 5547 }, { "epoch": 0.28512694007606126, "grad_norm": 1.1532115936279297, "learning_rate": 8.388147726380374e-06, "loss": 0.8316, "step": 5548 }, { "epoch": 0.2851783328194059, "grad_norm": 0.8441208004951477, "learning_rate": 8.387535636954719e-06, "loss": 0.712, "step": 5549 }, { "epoch": 0.28522972556275056, "grad_norm": 1.100807785987854, "learning_rate": 8.38692345367443e-06, "loss": 0.7576, "step": 5550 }, { "epoch": 0.2852811183060952, "grad_norm": 1.1495287418365479, "learning_rate": 8.386311176556467e-06, "loss": 0.7829, "step": 5551 }, { "epoch": 0.2853325110494398, "grad_norm": 0.7996395826339722, "learning_rate": 8.3856988056178e-06, "loss": 0.6936, "step": 5552 }, { "epoch": 0.28538390379278444, "grad_norm": 1.0803638696670532, "learning_rate": 8.385086340875388e-06, "loss": 0.8106, "step": 5553 }, { "epoch": 0.2854352965361291, "grad_norm": 1.097886085510254, "learning_rate": 8.384473782346203e-06, "loss": 0.7908, "step": 5554 }, { "epoch": 0.28548668927947374, "grad_norm": 1.0587661266326904, "learning_rate": 8.383861130047218e-06, "loss": 0.7735, "step": 5555 }, { "epoch": 0.2855380820228184, "grad_norm": 1.1672956943511963, "learning_rate": 8.383248383995405e-06, "loss": 0.7644, "step": 5556 }, { "epoch": 0.28558947476616303, "grad_norm": 1.1685290336608887, "learning_rate": 8.38263554420774e-06, "loss": 0.7508, "step": 5557 }, { "epoch": 0.2856408675095077, "grad_norm": 1.0458850860595703, "learning_rate": 8.382022610701204e-06, "loss": 0.74, "step": 5558 }, { "epoch": 0.28569226025285227, "grad_norm": 1.0270891189575195, "learning_rate": 8.38140958349278e-06, "loss": 0.7676, "step": 5559 }, { "epoch": 0.2857436529961969, "grad_norm": 1.0821629762649536, "learning_rate": 8.380796462599448e-06, "loss": 0.796, "step": 5560 }, { "epoch": 0.28579504573954156, "grad_norm": 1.111987829208374, "learning_rate": 8.380183248038198e-06, "loss": 0.7464, "step": 5561 }, { "epoch": 0.2858464384828862, "grad_norm": 1.0526288747787476, "learning_rate": 8.379569939826022e-06, "loss": 0.7472, "step": 5562 }, { "epoch": 0.28589783122623086, "grad_norm": 1.2051901817321777, "learning_rate": 8.378956537979907e-06, "loss": 0.7756, "step": 5563 }, { "epoch": 0.2859492239695755, "grad_norm": 1.0770734548568726, "learning_rate": 8.378343042516853e-06, "loss": 0.7754, "step": 5564 }, { "epoch": 0.28600061671292015, "grad_norm": 1.1310136318206787, "learning_rate": 8.377729453453852e-06, "loss": 0.7881, "step": 5565 }, { "epoch": 0.2860520094562648, "grad_norm": 1.1231906414031982, "learning_rate": 8.37711577080791e-06, "loss": 0.8021, "step": 5566 }, { "epoch": 0.2861034021996094, "grad_norm": 1.0788301229476929, "learning_rate": 8.376501994596022e-06, "loss": 0.7912, "step": 5567 }, { "epoch": 0.28615479494295404, "grad_norm": 1.1053603887557983, "learning_rate": 8.3758881248352e-06, "loss": 0.7447, "step": 5568 }, { "epoch": 0.2862061876862987, "grad_norm": 1.2205684185028076, "learning_rate": 8.37527416154245e-06, "loss": 0.8371, "step": 5569 }, { "epoch": 0.28625758042964333, "grad_norm": 0.9917560815811157, "learning_rate": 8.374660104734784e-06, "loss": 0.7161, "step": 5570 }, { "epoch": 0.286308973172988, "grad_norm": 1.0688197612762451, "learning_rate": 8.374045954429211e-06, "loss": 0.822, "step": 5571 }, { "epoch": 0.2863603659163326, "grad_norm": 1.2498170137405396, "learning_rate": 8.373431710642748e-06, "loss": 0.7807, "step": 5572 }, { "epoch": 0.2864117586596773, "grad_norm": 1.055712342262268, "learning_rate": 8.372817373392412e-06, "loss": 0.7837, "step": 5573 }, { "epoch": 0.2864631514030219, "grad_norm": 0.8229730129241943, "learning_rate": 8.372202942695228e-06, "loss": 0.684, "step": 5574 }, { "epoch": 0.2865145441463665, "grad_norm": 0.7589721083641052, "learning_rate": 8.371588418568216e-06, "loss": 0.6801, "step": 5575 }, { "epoch": 0.28656593688971116, "grad_norm": 1.1124353408813477, "learning_rate": 8.370973801028404e-06, "loss": 0.8072, "step": 5576 }, { "epoch": 0.2866173296330558, "grad_norm": 1.1101818084716797, "learning_rate": 8.370359090092816e-06, "loss": 0.8049, "step": 5577 }, { "epoch": 0.28666872237640045, "grad_norm": 0.929706871509552, "learning_rate": 8.369744285778489e-06, "loss": 0.6492, "step": 5578 }, { "epoch": 0.2867201151197451, "grad_norm": 1.1461102962493896, "learning_rate": 8.369129388102453e-06, "loss": 0.7328, "step": 5579 }, { "epoch": 0.28677150786308975, "grad_norm": 1.1354528665542603, "learning_rate": 8.368514397081744e-06, "loss": 0.8171, "step": 5580 }, { "epoch": 0.2868229006064344, "grad_norm": 1.067921757698059, "learning_rate": 8.367899312733404e-06, "loss": 0.7386, "step": 5581 }, { "epoch": 0.286874293349779, "grad_norm": 1.1526296138763428, "learning_rate": 8.367284135074472e-06, "loss": 0.7819, "step": 5582 }, { "epoch": 0.28692568609312363, "grad_norm": 1.0225430727005005, "learning_rate": 8.366668864121991e-06, "loss": 0.8147, "step": 5583 }, { "epoch": 0.2869770788364683, "grad_norm": 1.0895295143127441, "learning_rate": 8.366053499893012e-06, "loss": 0.7885, "step": 5584 }, { "epoch": 0.2870284715798129, "grad_norm": 1.1318414211273193, "learning_rate": 8.36543804240458e-06, "loss": 0.7876, "step": 5585 }, { "epoch": 0.2870798643231576, "grad_norm": 0.7978352904319763, "learning_rate": 8.364822491673749e-06, "loss": 0.7149, "step": 5586 }, { "epoch": 0.2871312570665022, "grad_norm": 1.0664278268814087, "learning_rate": 8.36420684771757e-06, "loss": 0.6915, "step": 5587 }, { "epoch": 0.28718264980984687, "grad_norm": 1.0693354606628418, "learning_rate": 8.363591110553105e-06, "loss": 0.7747, "step": 5588 }, { "epoch": 0.2872340425531915, "grad_norm": 1.101609468460083, "learning_rate": 8.36297528019741e-06, "loss": 0.7749, "step": 5589 }, { "epoch": 0.2872854352965361, "grad_norm": 0.784743070602417, "learning_rate": 8.362359356667548e-06, "loss": 0.7076, "step": 5590 }, { "epoch": 0.28733682803988075, "grad_norm": 1.1941373348236084, "learning_rate": 8.361743339980586e-06, "loss": 0.7885, "step": 5591 }, { "epoch": 0.2873882207832254, "grad_norm": 1.1541401147842407, "learning_rate": 8.361127230153588e-06, "loss": 0.8146, "step": 5592 }, { "epoch": 0.28743961352657005, "grad_norm": 1.043007731437683, "learning_rate": 8.360511027203624e-06, "loss": 0.7392, "step": 5593 }, { "epoch": 0.2874910062699147, "grad_norm": 1.027769923210144, "learning_rate": 8.359894731147767e-06, "loss": 0.7828, "step": 5594 }, { "epoch": 0.28754239901325934, "grad_norm": 1.0957672595977783, "learning_rate": 8.359278342003094e-06, "loss": 0.7927, "step": 5595 }, { "epoch": 0.287593791756604, "grad_norm": 1.103031039237976, "learning_rate": 8.35866185978668e-06, "loss": 0.8367, "step": 5596 }, { "epoch": 0.2876451844999486, "grad_norm": 1.0995343923568726, "learning_rate": 8.358045284515607e-06, "loss": 0.7902, "step": 5597 }, { "epoch": 0.28769657724329323, "grad_norm": 1.044679045677185, "learning_rate": 8.357428616206958e-06, "loss": 0.7699, "step": 5598 }, { "epoch": 0.2877479699866379, "grad_norm": 1.236358642578125, "learning_rate": 8.356811854877815e-06, "loss": 0.8296, "step": 5599 }, { "epoch": 0.2877993627299825, "grad_norm": 1.036624550819397, "learning_rate": 8.35619500054527e-06, "loss": 0.7063, "step": 5600 }, { "epoch": 0.28785075547332717, "grad_norm": 1.1533541679382324, "learning_rate": 8.35557805322641e-06, "loss": 0.7591, "step": 5601 }, { "epoch": 0.2879021482166718, "grad_norm": 1.0949684381484985, "learning_rate": 8.354961012938332e-06, "loss": 0.7883, "step": 5602 }, { "epoch": 0.28795354096001646, "grad_norm": 1.1274027824401855, "learning_rate": 8.354343879698127e-06, "loss": 0.8172, "step": 5603 }, { "epoch": 0.2880049337033611, "grad_norm": 1.0572094917297363, "learning_rate": 8.353726653522897e-06, "loss": 0.7866, "step": 5604 }, { "epoch": 0.2880563264467057, "grad_norm": 1.0444684028625488, "learning_rate": 8.353109334429742e-06, "loss": 0.7383, "step": 5605 }, { "epoch": 0.28810771919005035, "grad_norm": 0.824914813041687, "learning_rate": 8.352491922435763e-06, "loss": 0.6817, "step": 5606 }, { "epoch": 0.288159111933395, "grad_norm": 1.0018022060394287, "learning_rate": 8.35187441755807e-06, "loss": 0.7131, "step": 5607 }, { "epoch": 0.28821050467673964, "grad_norm": 1.1074885129928589, "learning_rate": 8.35125681981377e-06, "loss": 0.7719, "step": 5608 }, { "epoch": 0.2882618974200843, "grad_norm": 1.0992136001586914, "learning_rate": 8.35063912921997e-06, "loss": 0.8299, "step": 5609 }, { "epoch": 0.28831329016342894, "grad_norm": 1.117929220199585, "learning_rate": 8.350021345793788e-06, "loss": 0.7939, "step": 5610 }, { "epoch": 0.2883646829067736, "grad_norm": 1.1543155908584595, "learning_rate": 8.34940346955234e-06, "loss": 0.8063, "step": 5611 }, { "epoch": 0.28841607565011823, "grad_norm": 1.1072852611541748, "learning_rate": 8.348785500512744e-06, "loss": 0.7475, "step": 5612 }, { "epoch": 0.2884674683934628, "grad_norm": 0.8498141169548035, "learning_rate": 8.348167438692121e-06, "loss": 0.681, "step": 5613 }, { "epoch": 0.28851886113680747, "grad_norm": 1.1013530492782593, "learning_rate": 8.347549284107595e-06, "loss": 0.7977, "step": 5614 }, { "epoch": 0.2885702538801521, "grad_norm": 1.0776612758636475, "learning_rate": 8.346931036776293e-06, "loss": 0.7545, "step": 5615 }, { "epoch": 0.28862164662349676, "grad_norm": 1.1438453197479248, "learning_rate": 8.346312696715346e-06, "loss": 0.7399, "step": 5616 }, { "epoch": 0.2886730393668414, "grad_norm": 1.0392910242080688, "learning_rate": 8.34569426394188e-06, "loss": 0.7671, "step": 5617 }, { "epoch": 0.28872443211018606, "grad_norm": 1.5211209058761597, "learning_rate": 8.345075738473036e-06, "loss": 0.8003, "step": 5618 }, { "epoch": 0.2887758248535307, "grad_norm": 1.0443077087402344, "learning_rate": 8.344457120325947e-06, "loss": 0.746, "step": 5619 }, { "epoch": 0.2888272175968753, "grad_norm": 1.0974020957946777, "learning_rate": 8.343838409517752e-06, "loss": 0.7881, "step": 5620 }, { "epoch": 0.28887861034021994, "grad_norm": 1.1144496202468872, "learning_rate": 8.343219606065594e-06, "loss": 0.7889, "step": 5621 }, { "epoch": 0.2889300030835646, "grad_norm": 1.0476267337799072, "learning_rate": 8.342600709986617e-06, "loss": 0.7922, "step": 5622 }, { "epoch": 0.28898139582690924, "grad_norm": 1.1764726638793945, "learning_rate": 8.34198172129797e-06, "loss": 0.7736, "step": 5623 }, { "epoch": 0.2890327885702539, "grad_norm": 1.1430237293243408, "learning_rate": 8.3413626400168e-06, "loss": 0.7675, "step": 5624 }, { "epoch": 0.28908418131359853, "grad_norm": 1.0761466026306152, "learning_rate": 8.34074346616026e-06, "loss": 0.7455, "step": 5625 }, { "epoch": 0.2891355740569432, "grad_norm": 1.0496340990066528, "learning_rate": 8.340124199745504e-06, "loss": 0.7798, "step": 5626 }, { "epoch": 0.2891869668002878, "grad_norm": 1.0536208152770996, "learning_rate": 8.339504840789692e-06, "loss": 0.7647, "step": 5627 }, { "epoch": 0.2892383595436324, "grad_norm": 1.0972557067871094, "learning_rate": 8.338885389309983e-06, "loss": 0.7946, "step": 5628 }, { "epoch": 0.28928975228697706, "grad_norm": 1.084070086479187, "learning_rate": 8.338265845323537e-06, "loss": 0.8029, "step": 5629 }, { "epoch": 0.2893411450303217, "grad_norm": 1.0093731880187988, "learning_rate": 8.33764620884752e-06, "loss": 0.7366, "step": 5630 }, { "epoch": 0.28939253777366636, "grad_norm": 0.7974849343299866, "learning_rate": 8.337026479899103e-06, "loss": 0.7087, "step": 5631 }, { "epoch": 0.289443930517011, "grad_norm": 1.091705083847046, "learning_rate": 8.336406658495451e-06, "loss": 0.7712, "step": 5632 }, { "epoch": 0.28949532326035565, "grad_norm": 1.121004343032837, "learning_rate": 8.33578674465374e-06, "loss": 0.7659, "step": 5633 }, { "epoch": 0.2895467160037003, "grad_norm": 1.0774955749511719, "learning_rate": 8.335166738391143e-06, "loss": 0.8366, "step": 5634 }, { "epoch": 0.2895981087470449, "grad_norm": 1.1118353605270386, "learning_rate": 8.334546639724839e-06, "loss": 0.7641, "step": 5635 }, { "epoch": 0.28964950149038954, "grad_norm": 1.0571403503417969, "learning_rate": 8.33392644867201e-06, "loss": 0.7592, "step": 5636 }, { "epoch": 0.2897008942337342, "grad_norm": 1.1711331605911255, "learning_rate": 8.333306165249836e-06, "loss": 0.7847, "step": 5637 }, { "epoch": 0.28975228697707883, "grad_norm": 1.0693656206130981, "learning_rate": 8.332685789475505e-06, "loss": 0.7621, "step": 5638 }, { "epoch": 0.2898036797204235, "grad_norm": 1.0910197496414185, "learning_rate": 8.332065321366205e-06, "loss": 0.7364, "step": 5639 }, { "epoch": 0.2898550724637681, "grad_norm": 1.2406820058822632, "learning_rate": 8.331444760939124e-06, "loss": 0.8178, "step": 5640 }, { "epoch": 0.2899064652071128, "grad_norm": 1.08094322681427, "learning_rate": 8.330824108211456e-06, "loss": 0.7003, "step": 5641 }, { "epoch": 0.2899578579504574, "grad_norm": 1.0541576147079468, "learning_rate": 8.3302033632004e-06, "loss": 0.7594, "step": 5642 }, { "epoch": 0.290009250693802, "grad_norm": 1.1146278381347656, "learning_rate": 8.32958252592315e-06, "loss": 0.7401, "step": 5643 }, { "epoch": 0.29006064343714666, "grad_norm": 1.0300171375274658, "learning_rate": 8.328961596396909e-06, "loss": 0.7563, "step": 5644 }, { "epoch": 0.2901120361804913, "grad_norm": 1.0906977653503418, "learning_rate": 8.32834057463888e-06, "loss": 0.7982, "step": 5645 }, { "epoch": 0.29016342892383595, "grad_norm": 0.7406985759735107, "learning_rate": 8.327719460666268e-06, "loss": 0.7098, "step": 5646 }, { "epoch": 0.2902148216671806, "grad_norm": 1.0802584886550903, "learning_rate": 8.327098254496283e-06, "loss": 0.8108, "step": 5647 }, { "epoch": 0.29026621441052525, "grad_norm": 1.0948731899261475, "learning_rate": 8.326476956146137e-06, "loss": 0.8159, "step": 5648 }, { "epoch": 0.2903176071538699, "grad_norm": 1.1170322895050049, "learning_rate": 8.32585556563304e-06, "loss": 0.7929, "step": 5649 }, { "epoch": 0.2903689998972145, "grad_norm": 1.10662043094635, "learning_rate": 8.32523408297421e-06, "loss": 0.7909, "step": 5650 }, { "epoch": 0.29042039264055913, "grad_norm": 1.0914491415023804, "learning_rate": 8.324612508186867e-06, "loss": 0.8208, "step": 5651 }, { "epoch": 0.2904717853839038, "grad_norm": 1.0864827632904053, "learning_rate": 8.323990841288232e-06, "loss": 0.7454, "step": 5652 }, { "epoch": 0.2905231781272484, "grad_norm": 1.0954060554504395, "learning_rate": 8.323369082295526e-06, "loss": 0.8062, "step": 5653 }, { "epoch": 0.2905745708705931, "grad_norm": 1.1018965244293213, "learning_rate": 8.32274723122598e-06, "loss": 0.8131, "step": 5654 }, { "epoch": 0.2906259636139377, "grad_norm": 1.1031262874603271, "learning_rate": 8.322125288096818e-06, "loss": 0.7304, "step": 5655 }, { "epoch": 0.29067735635728237, "grad_norm": 1.0805996656417847, "learning_rate": 8.321503252925276e-06, "loss": 0.7531, "step": 5656 }, { "epoch": 0.290728749100627, "grad_norm": 1.2027499675750732, "learning_rate": 8.320881125728585e-06, "loss": 0.771, "step": 5657 }, { "epoch": 0.2907801418439716, "grad_norm": 0.7643687129020691, "learning_rate": 8.320258906523983e-06, "loss": 0.6457, "step": 5658 }, { "epoch": 0.29083153458731625, "grad_norm": 1.1028028726577759, "learning_rate": 8.319636595328709e-06, "loss": 0.819, "step": 5659 }, { "epoch": 0.2908829273306609, "grad_norm": 1.1677026748657227, "learning_rate": 8.319014192160001e-06, "loss": 0.7573, "step": 5660 }, { "epoch": 0.29093432007400555, "grad_norm": 1.1305981874465942, "learning_rate": 8.31839169703511e-06, "loss": 0.7485, "step": 5661 }, { "epoch": 0.2909857128173502, "grad_norm": 1.188927412033081, "learning_rate": 8.317769109971277e-06, "loss": 0.8072, "step": 5662 }, { "epoch": 0.29103710556069484, "grad_norm": 0.8874804973602295, "learning_rate": 8.317146430985757e-06, "loss": 0.7183, "step": 5663 }, { "epoch": 0.2910884983040395, "grad_norm": 1.0651243925094604, "learning_rate": 8.316523660095795e-06, "loss": 0.7513, "step": 5664 }, { "epoch": 0.29113989104738414, "grad_norm": 1.0614503622055054, "learning_rate": 8.31590079731865e-06, "loss": 0.7364, "step": 5665 }, { "epoch": 0.2911912837907287, "grad_norm": 0.7790494561195374, "learning_rate": 8.315277842671578e-06, "loss": 0.7104, "step": 5666 }, { "epoch": 0.2912426765340734, "grad_norm": 1.0102440118789673, "learning_rate": 8.314654796171837e-06, "loss": 0.7188, "step": 5667 }, { "epoch": 0.291294069277418, "grad_norm": 1.0722849369049072, "learning_rate": 8.314031657836692e-06, "loss": 0.765, "step": 5668 }, { "epoch": 0.29134546202076267, "grad_norm": 1.060153841972351, "learning_rate": 8.313408427683406e-06, "loss": 0.7128, "step": 5669 }, { "epoch": 0.2913968547641073, "grad_norm": 0.6985536217689514, "learning_rate": 8.312785105729244e-06, "loss": 0.7427, "step": 5670 }, { "epoch": 0.29144824750745196, "grad_norm": 1.0739047527313232, "learning_rate": 8.31216169199148e-06, "loss": 0.7983, "step": 5671 }, { "epoch": 0.2914996402507966, "grad_norm": 1.0389541387557983, "learning_rate": 8.311538186487384e-06, "loss": 0.751, "step": 5672 }, { "epoch": 0.2915510329941412, "grad_norm": 1.0273922681808472, "learning_rate": 8.31091458923423e-06, "loss": 0.7358, "step": 5673 }, { "epoch": 0.29160242573748585, "grad_norm": 1.0913567543029785, "learning_rate": 8.310290900249297e-06, "loss": 0.7897, "step": 5674 }, { "epoch": 0.2916538184808305, "grad_norm": 1.0414783954620361, "learning_rate": 8.309667119549862e-06, "loss": 0.7703, "step": 5675 }, { "epoch": 0.29170521122417514, "grad_norm": 1.1161731481552124, "learning_rate": 8.30904324715321e-06, "loss": 0.7682, "step": 5676 }, { "epoch": 0.2917566039675198, "grad_norm": 1.082031011581421, "learning_rate": 8.308419283076626e-06, "loss": 0.7834, "step": 5677 }, { "epoch": 0.29180799671086444, "grad_norm": 1.0049506425857544, "learning_rate": 8.307795227337397e-06, "loss": 0.7429, "step": 5678 }, { "epoch": 0.2918593894542091, "grad_norm": 1.1536693572998047, "learning_rate": 8.307171079952812e-06, "loss": 0.797, "step": 5679 }, { "epoch": 0.29191078219755373, "grad_norm": 1.1051832437515259, "learning_rate": 8.306546840940165e-06, "loss": 0.7571, "step": 5680 }, { "epoch": 0.2919621749408983, "grad_norm": 1.1587854623794556, "learning_rate": 8.305922510316749e-06, "loss": 0.7965, "step": 5681 }, { "epoch": 0.29201356768424297, "grad_norm": 1.0823333263397217, "learning_rate": 8.305298088099864e-06, "loss": 0.7235, "step": 5682 }, { "epoch": 0.2920649604275876, "grad_norm": 1.1792079210281372, "learning_rate": 8.304673574306807e-06, "loss": 0.8006, "step": 5683 }, { "epoch": 0.29211635317093226, "grad_norm": 1.0556005239486694, "learning_rate": 8.304048968954885e-06, "loss": 0.7532, "step": 5684 }, { "epoch": 0.2921677459142769, "grad_norm": 0.747546911239624, "learning_rate": 8.303424272061402e-06, "loss": 0.6977, "step": 5685 }, { "epoch": 0.29221913865762156, "grad_norm": 1.049843668937683, "learning_rate": 8.302799483643661e-06, "loss": 0.7504, "step": 5686 }, { "epoch": 0.2922705314009662, "grad_norm": 1.18901789188385, "learning_rate": 8.302174603718981e-06, "loss": 0.8433, "step": 5687 }, { "epoch": 0.2923219241443108, "grad_norm": 1.1371957063674927, "learning_rate": 8.301549632304668e-06, "loss": 0.8193, "step": 5688 }, { "epoch": 0.29237331688765544, "grad_norm": 0.9709553122520447, "learning_rate": 8.300924569418039e-06, "loss": 0.7664, "step": 5689 }, { "epoch": 0.2924247096310001, "grad_norm": 1.0759268999099731, "learning_rate": 8.300299415076412e-06, "loss": 0.8017, "step": 5690 }, { "epoch": 0.29247610237434474, "grad_norm": 1.1185113191604614, "learning_rate": 8.299674169297107e-06, "loss": 0.8092, "step": 5691 }, { "epoch": 0.2925274951176894, "grad_norm": 1.0796475410461426, "learning_rate": 8.29904883209745e-06, "loss": 0.7633, "step": 5692 }, { "epoch": 0.29257888786103403, "grad_norm": 1.038589596748352, "learning_rate": 8.298423403494764e-06, "loss": 0.7614, "step": 5693 }, { "epoch": 0.2926302806043787, "grad_norm": 1.0312401056289673, "learning_rate": 8.297797883506376e-06, "loss": 0.7376, "step": 5694 }, { "epoch": 0.2926816733477233, "grad_norm": 1.0788819789886475, "learning_rate": 8.297172272149618e-06, "loss": 0.8343, "step": 5695 }, { "epoch": 0.2927330660910679, "grad_norm": 0.7674497365951538, "learning_rate": 8.296546569441824e-06, "loss": 0.6587, "step": 5696 }, { "epoch": 0.29278445883441256, "grad_norm": 1.0895041227340698, "learning_rate": 8.295920775400326e-06, "loss": 0.76, "step": 5697 }, { "epoch": 0.2928358515777572, "grad_norm": 1.0605055093765259, "learning_rate": 8.295294890042467e-06, "loss": 0.7833, "step": 5698 }, { "epoch": 0.29288724432110186, "grad_norm": 1.0856248140335083, "learning_rate": 8.294668913385584e-06, "loss": 0.7653, "step": 5699 }, { "epoch": 0.2929386370644465, "grad_norm": 1.0853033065795898, "learning_rate": 8.294042845447024e-06, "loss": 0.7933, "step": 5700 }, { "epoch": 0.29299002980779115, "grad_norm": 1.080798864364624, "learning_rate": 8.293416686244128e-06, "loss": 0.7273, "step": 5701 }, { "epoch": 0.2930414225511358, "grad_norm": 1.074588656425476, "learning_rate": 8.292790435794249e-06, "loss": 0.8113, "step": 5702 }, { "epoch": 0.29309281529448045, "grad_norm": 0.8278897404670715, "learning_rate": 8.292164094114734e-06, "loss": 0.6833, "step": 5703 }, { "epoch": 0.29314420803782504, "grad_norm": 1.0970269441604614, "learning_rate": 8.29153766122294e-06, "loss": 0.7886, "step": 5704 }, { "epoch": 0.2931956007811697, "grad_norm": 1.2030894756317139, "learning_rate": 8.290911137136219e-06, "loss": 0.7804, "step": 5705 }, { "epoch": 0.29324699352451433, "grad_norm": 1.0756796598434448, "learning_rate": 8.29028452187193e-06, "loss": 0.7666, "step": 5706 }, { "epoch": 0.293298386267859, "grad_norm": 0.8275964856147766, "learning_rate": 8.289657815447436e-06, "loss": 0.679, "step": 5707 }, { "epoch": 0.2933497790112036, "grad_norm": 0.7493463158607483, "learning_rate": 8.289031017880101e-06, "loss": 0.685, "step": 5708 }, { "epoch": 0.2934011717545483, "grad_norm": 1.1303842067718506, "learning_rate": 8.28840412918729e-06, "loss": 0.777, "step": 5709 }, { "epoch": 0.2934525644978929, "grad_norm": 0.7323131561279297, "learning_rate": 8.287777149386369e-06, "loss": 0.6752, "step": 5710 }, { "epoch": 0.2935039572412375, "grad_norm": 1.0850731134414673, "learning_rate": 8.28715007849471e-06, "loss": 0.7356, "step": 5711 }, { "epoch": 0.29355534998458216, "grad_norm": 1.1178058385849, "learning_rate": 8.28652291652969e-06, "loss": 0.7879, "step": 5712 }, { "epoch": 0.2936067427279268, "grad_norm": 1.0428707599639893, "learning_rate": 8.285895663508681e-06, "loss": 0.7542, "step": 5713 }, { "epoch": 0.29365813547127145, "grad_norm": 1.1091639995574951, "learning_rate": 8.285268319449066e-06, "loss": 0.7433, "step": 5714 }, { "epoch": 0.2937095282146161, "grad_norm": 1.0691372156143188, "learning_rate": 8.28464088436822e-06, "loss": 0.7508, "step": 5715 }, { "epoch": 0.29376092095796075, "grad_norm": 1.0697696208953857, "learning_rate": 8.284013358283532e-06, "loss": 0.7236, "step": 5716 }, { "epoch": 0.2938123137013054, "grad_norm": 0.7605612874031067, "learning_rate": 8.283385741212386e-06, "loss": 0.6525, "step": 5717 }, { "epoch": 0.29386370644465004, "grad_norm": 0.9314214587211609, "learning_rate": 8.28275803317217e-06, "loss": 0.6916, "step": 5718 }, { "epoch": 0.29391509918799463, "grad_norm": 1.143584132194519, "learning_rate": 8.282130234180277e-06, "loss": 0.8481, "step": 5719 }, { "epoch": 0.2939664919313393, "grad_norm": 1.0635656118392944, "learning_rate": 8.281502344254099e-06, "loss": 0.7283, "step": 5720 }, { "epoch": 0.2940178846746839, "grad_norm": 0.78325355052948, "learning_rate": 8.280874363411034e-06, "loss": 0.6878, "step": 5721 }, { "epoch": 0.2940692774180286, "grad_norm": 1.1425780057907104, "learning_rate": 8.280246291668479e-06, "loss": 0.7958, "step": 5722 }, { "epoch": 0.2941206701613732, "grad_norm": 0.8000555634498596, "learning_rate": 8.279618129043837e-06, "loss": 0.6833, "step": 5723 }, { "epoch": 0.29417206290471787, "grad_norm": 1.1261862516403198, "learning_rate": 8.278989875554508e-06, "loss": 0.7936, "step": 5724 }, { "epoch": 0.2942234556480625, "grad_norm": 1.1115299463272095, "learning_rate": 8.278361531217904e-06, "loss": 0.7379, "step": 5725 }, { "epoch": 0.2942748483914071, "grad_norm": 0.8913065791130066, "learning_rate": 8.27773309605143e-06, "loss": 0.6833, "step": 5726 }, { "epoch": 0.29432624113475175, "grad_norm": 0.9982072710990906, "learning_rate": 8.277104570072498e-06, "loss": 0.8078, "step": 5727 }, { "epoch": 0.2943776338780964, "grad_norm": 0.6892127394676208, "learning_rate": 8.276475953298522e-06, "loss": 0.6543, "step": 5728 }, { "epoch": 0.29442902662144105, "grad_norm": 1.0854095220565796, "learning_rate": 8.27584724574692e-06, "loss": 0.7948, "step": 5729 }, { "epoch": 0.2944804193647857, "grad_norm": 1.1071066856384277, "learning_rate": 8.27521844743511e-06, "loss": 0.7527, "step": 5730 }, { "epoch": 0.29453181210813034, "grad_norm": 1.0253219604492188, "learning_rate": 8.274589558380511e-06, "loss": 0.6996, "step": 5731 }, { "epoch": 0.294583204851475, "grad_norm": 1.0593576431274414, "learning_rate": 8.273960578600549e-06, "loss": 0.7827, "step": 5732 }, { "epoch": 0.29463459759481964, "grad_norm": 0.7113011479377747, "learning_rate": 8.27333150811265e-06, "loss": 0.6749, "step": 5733 }, { "epoch": 0.2946859903381642, "grad_norm": 1.2593320608139038, "learning_rate": 8.272702346934243e-06, "loss": 0.7625, "step": 5734 }, { "epoch": 0.2947373830815089, "grad_norm": 0.9264829754829407, "learning_rate": 8.272073095082761e-06, "loss": 0.7198, "step": 5735 }, { "epoch": 0.2947887758248535, "grad_norm": 1.0380034446716309, "learning_rate": 8.271443752575636e-06, "loss": 0.7223, "step": 5736 }, { "epoch": 0.29484016856819817, "grad_norm": 1.0233192443847656, "learning_rate": 8.270814319430304e-06, "loss": 0.7809, "step": 5737 }, { "epoch": 0.2948915613115428, "grad_norm": 1.0644688606262207, "learning_rate": 8.270184795664206e-06, "loss": 0.7714, "step": 5738 }, { "epoch": 0.29494295405488746, "grad_norm": 1.343535304069519, "learning_rate": 8.269555181294781e-06, "loss": 0.7498, "step": 5739 }, { "epoch": 0.2949943467982321, "grad_norm": 1.086153507232666, "learning_rate": 8.268925476339477e-06, "loss": 0.7688, "step": 5740 }, { "epoch": 0.29504573954157676, "grad_norm": 1.0465667247772217, "learning_rate": 8.268295680815738e-06, "loss": 0.7777, "step": 5741 }, { "epoch": 0.29509713228492135, "grad_norm": 1.045179843902588, "learning_rate": 8.26766579474101e-06, "loss": 0.7345, "step": 5742 }, { "epoch": 0.295148525028266, "grad_norm": 1.2030725479125977, "learning_rate": 8.267035818132752e-06, "loss": 0.8281, "step": 5743 }, { "epoch": 0.29519991777161064, "grad_norm": 1.1048808097839355, "learning_rate": 8.266405751008411e-06, "loss": 0.7141, "step": 5744 }, { "epoch": 0.2952513105149553, "grad_norm": 1.0474354028701782, "learning_rate": 8.265775593385448e-06, "loss": 0.7936, "step": 5745 }, { "epoch": 0.29530270325829994, "grad_norm": 1.1040276288986206, "learning_rate": 8.265145345281319e-06, "loss": 0.8437, "step": 5746 }, { "epoch": 0.2953540960016446, "grad_norm": 1.2018976211547852, "learning_rate": 8.264515006713488e-06, "loss": 0.7376, "step": 5747 }, { "epoch": 0.29540548874498923, "grad_norm": 1.1449804306030273, "learning_rate": 8.263884577699416e-06, "loss": 0.7612, "step": 5748 }, { "epoch": 0.2954568814883338, "grad_norm": 1.0426193475723267, "learning_rate": 8.263254058256573e-06, "loss": 0.7609, "step": 5749 }, { "epoch": 0.29550827423167847, "grad_norm": 1.060174822807312, "learning_rate": 8.262623448402428e-06, "loss": 0.7692, "step": 5750 }, { "epoch": 0.2955596669750231, "grad_norm": 1.0850762128829956, "learning_rate": 8.261992748154449e-06, "loss": 0.7679, "step": 5751 }, { "epoch": 0.29561105971836776, "grad_norm": 1.2262543439865112, "learning_rate": 8.261361957530113e-06, "loss": 0.7612, "step": 5752 }, { "epoch": 0.2956624524617124, "grad_norm": 1.140245795249939, "learning_rate": 8.260731076546898e-06, "loss": 0.7625, "step": 5753 }, { "epoch": 0.29571384520505706, "grad_norm": 1.0653430223464966, "learning_rate": 8.260100105222279e-06, "loss": 0.7685, "step": 5754 }, { "epoch": 0.2957652379484017, "grad_norm": 1.0963032245635986, "learning_rate": 8.25946904357374e-06, "loss": 0.7677, "step": 5755 }, { "epoch": 0.29581663069174635, "grad_norm": 1.1433473825454712, "learning_rate": 8.258837891618765e-06, "loss": 0.7283, "step": 5756 }, { "epoch": 0.29586802343509094, "grad_norm": 0.863707959651947, "learning_rate": 8.25820664937484e-06, "loss": 0.7059, "step": 5757 }, { "epoch": 0.2959194161784356, "grad_norm": 1.0429195165634155, "learning_rate": 8.257575316859454e-06, "loss": 0.7783, "step": 5758 }, { "epoch": 0.29597080892178024, "grad_norm": 1.2195403575897217, "learning_rate": 8.256943894090101e-06, "loss": 0.8092, "step": 5759 }, { "epoch": 0.2960222016651249, "grad_norm": 1.0325510501861572, "learning_rate": 8.256312381084272e-06, "loss": 0.7635, "step": 5760 }, { "epoch": 0.29607359440846953, "grad_norm": 1.2305238246917725, "learning_rate": 8.255680777859466e-06, "loss": 0.7523, "step": 5761 }, { "epoch": 0.2961249871518142, "grad_norm": 0.6854696869850159, "learning_rate": 8.25504908443318e-06, "loss": 0.7004, "step": 5762 }, { "epoch": 0.2961763798951588, "grad_norm": 1.1869467496871948, "learning_rate": 8.254417300822916e-06, "loss": 0.7778, "step": 5763 }, { "epoch": 0.2962277726385034, "grad_norm": 1.0577789545059204, "learning_rate": 8.253785427046181e-06, "loss": 0.7938, "step": 5764 }, { "epoch": 0.29627916538184806, "grad_norm": 0.6905539631843567, "learning_rate": 8.25315346312048e-06, "loss": 0.6804, "step": 5765 }, { "epoch": 0.2963305581251927, "grad_norm": 1.1074823141098022, "learning_rate": 8.252521409063318e-06, "loss": 0.7471, "step": 5766 }, { "epoch": 0.29638195086853736, "grad_norm": 1.092867136001587, "learning_rate": 8.251889264892212e-06, "loss": 0.778, "step": 5767 }, { "epoch": 0.296433343611882, "grad_norm": 1.1158491373062134, "learning_rate": 8.251257030624675e-06, "loss": 0.7623, "step": 5768 }, { "epoch": 0.29648473635522665, "grad_norm": 1.2056506872177124, "learning_rate": 8.250624706278222e-06, "loss": 0.7727, "step": 5769 }, { "epoch": 0.2965361290985713, "grad_norm": 0.7627761960029602, "learning_rate": 8.249992291870373e-06, "loss": 0.7172, "step": 5770 }, { "epoch": 0.29658752184191595, "grad_norm": 1.0663686990737915, "learning_rate": 8.249359787418649e-06, "loss": 0.7612, "step": 5771 }, { "epoch": 0.29663891458526054, "grad_norm": 1.1841156482696533, "learning_rate": 8.248727192940575e-06, "loss": 0.7992, "step": 5772 }, { "epoch": 0.2966903073286052, "grad_norm": 0.788904070854187, "learning_rate": 8.248094508453678e-06, "loss": 0.6657, "step": 5773 }, { "epoch": 0.29674170007194983, "grad_norm": 1.2766172885894775, "learning_rate": 8.247461733975486e-06, "loss": 0.7917, "step": 5774 }, { "epoch": 0.2967930928152945, "grad_norm": 1.116125464439392, "learning_rate": 8.24682886952353e-06, "loss": 0.7292, "step": 5775 }, { "epoch": 0.2968444855586391, "grad_norm": 1.2301216125488281, "learning_rate": 8.246195915115345e-06, "loss": 0.8084, "step": 5776 }, { "epoch": 0.2968958783019838, "grad_norm": 1.4403924942016602, "learning_rate": 8.24556287076847e-06, "loss": 0.7705, "step": 5777 }, { "epoch": 0.2969472710453284, "grad_norm": 1.063284993171692, "learning_rate": 8.244929736500437e-06, "loss": 0.7522, "step": 5778 }, { "epoch": 0.296998663788673, "grad_norm": 1.1716357469558716, "learning_rate": 8.244296512328794e-06, "loss": 0.856, "step": 5779 }, { "epoch": 0.29705005653201766, "grad_norm": 1.045871376991272, "learning_rate": 8.243663198271083e-06, "loss": 0.7752, "step": 5780 }, { "epoch": 0.2971014492753623, "grad_norm": 1.0875658988952637, "learning_rate": 8.243029794344852e-06, "loss": 0.7783, "step": 5781 }, { "epoch": 0.29715284201870695, "grad_norm": 1.1296182870864868, "learning_rate": 8.242396300567647e-06, "loss": 0.7557, "step": 5782 }, { "epoch": 0.2972042347620516, "grad_norm": 1.066383957862854, "learning_rate": 8.241762716957022e-06, "loss": 0.8204, "step": 5783 }, { "epoch": 0.29725562750539625, "grad_norm": 1.0765166282653809, "learning_rate": 8.24112904353053e-06, "loss": 0.7812, "step": 5784 }, { "epoch": 0.2973070202487409, "grad_norm": 1.1533211469650269, "learning_rate": 8.240495280305726e-06, "loss": 0.7504, "step": 5785 }, { "epoch": 0.29735841299208554, "grad_norm": 1.0487736463546753, "learning_rate": 8.239861427300174e-06, "loss": 0.7948, "step": 5786 }, { "epoch": 0.29740980573543013, "grad_norm": 1.1606242656707764, "learning_rate": 8.239227484531428e-06, "loss": 0.7742, "step": 5787 }, { "epoch": 0.2974611984787748, "grad_norm": 1.0776646137237549, "learning_rate": 8.238593452017057e-06, "loss": 0.7647, "step": 5788 }, { "epoch": 0.2975125912221194, "grad_norm": 1.055606484413147, "learning_rate": 8.237959329774627e-06, "loss": 0.7366, "step": 5789 }, { "epoch": 0.2975639839654641, "grad_norm": 1.1487818956375122, "learning_rate": 8.237325117821707e-06, "loss": 0.7463, "step": 5790 }, { "epoch": 0.2976153767088087, "grad_norm": 1.061753511428833, "learning_rate": 8.236690816175867e-06, "loss": 0.7447, "step": 5791 }, { "epoch": 0.29766676945215337, "grad_norm": 1.1628844738006592, "learning_rate": 8.236056424854681e-06, "loss": 0.8055, "step": 5792 }, { "epoch": 0.297718162195498, "grad_norm": 1.045771837234497, "learning_rate": 8.235421943875725e-06, "loss": 0.7178, "step": 5793 }, { "epoch": 0.29776955493884266, "grad_norm": 1.1070959568023682, "learning_rate": 8.23478737325658e-06, "loss": 0.7857, "step": 5794 }, { "epoch": 0.29782094768218725, "grad_norm": 1.1098008155822754, "learning_rate": 8.234152713014827e-06, "loss": 0.7615, "step": 5795 }, { "epoch": 0.2978723404255319, "grad_norm": 0.9443713426589966, "learning_rate": 8.233517963168047e-06, "loss": 0.7576, "step": 5796 }, { "epoch": 0.29792373316887655, "grad_norm": 1.1075993776321411, "learning_rate": 8.23288312373383e-06, "loss": 0.815, "step": 5797 }, { "epoch": 0.2979751259122212, "grad_norm": 1.0640676021575928, "learning_rate": 8.232248194729761e-06, "loss": 0.7556, "step": 5798 }, { "epoch": 0.29802651865556584, "grad_norm": 1.086224913597107, "learning_rate": 8.231613176173436e-06, "loss": 0.7857, "step": 5799 }, { "epoch": 0.2980779113989105, "grad_norm": 1.1312247514724731, "learning_rate": 8.230978068082443e-06, "loss": 0.8036, "step": 5800 }, { "epoch": 0.29812930414225514, "grad_norm": 1.1406044960021973, "learning_rate": 8.230342870474384e-06, "loss": 0.8042, "step": 5801 }, { "epoch": 0.2981806968855997, "grad_norm": 1.0455833673477173, "learning_rate": 8.229707583366852e-06, "loss": 0.7252, "step": 5802 }, { "epoch": 0.2982320896289444, "grad_norm": 1.103022575378418, "learning_rate": 8.229072206777453e-06, "loss": 0.7537, "step": 5803 }, { "epoch": 0.298283482372289, "grad_norm": 1.0311105251312256, "learning_rate": 8.22843674072379e-06, "loss": 0.6781, "step": 5804 }, { "epoch": 0.29833487511563367, "grad_norm": 1.0067039728164673, "learning_rate": 8.227801185223465e-06, "loss": 0.7053, "step": 5805 }, { "epoch": 0.2983862678589783, "grad_norm": 1.1207455396652222, "learning_rate": 8.227165540294088e-06, "loss": 0.8304, "step": 5806 }, { "epoch": 0.29843766060232296, "grad_norm": 0.739044725894928, "learning_rate": 8.226529805953277e-06, "loss": 0.7051, "step": 5807 }, { "epoch": 0.2984890533456676, "grad_norm": 1.0492382049560547, "learning_rate": 8.225893982218636e-06, "loss": 0.7924, "step": 5808 }, { "epoch": 0.29854044608901226, "grad_norm": 1.1446664333343506, "learning_rate": 8.225258069107786e-06, "loss": 0.732, "step": 5809 }, { "epoch": 0.29859183883235685, "grad_norm": 1.0315366983413696, "learning_rate": 8.224622066638346e-06, "loss": 0.7694, "step": 5810 }, { "epoch": 0.2986432315757015, "grad_norm": 1.0689424276351929, "learning_rate": 8.223985974827935e-06, "loss": 0.7548, "step": 5811 }, { "epoch": 0.29869462431904614, "grad_norm": 1.1001231670379639, "learning_rate": 8.223349793694177e-06, "loss": 0.8299, "step": 5812 }, { "epoch": 0.2987460170623908, "grad_norm": 1.1299155950546265, "learning_rate": 8.222713523254699e-06, "loss": 0.7607, "step": 5813 }, { "epoch": 0.29879740980573544, "grad_norm": 1.1189833879470825, "learning_rate": 8.222077163527128e-06, "loss": 0.7758, "step": 5814 }, { "epoch": 0.2988488025490801, "grad_norm": 0.7642161250114441, "learning_rate": 8.221440714529096e-06, "loss": 0.6578, "step": 5815 }, { "epoch": 0.29890019529242473, "grad_norm": 1.0617568492889404, "learning_rate": 8.220804176278234e-06, "loss": 0.737, "step": 5816 }, { "epoch": 0.2989515880357693, "grad_norm": 1.0783023834228516, "learning_rate": 8.220167548792183e-06, "loss": 0.7509, "step": 5817 }, { "epoch": 0.29900298077911397, "grad_norm": 1.1129320859909058, "learning_rate": 8.219530832088576e-06, "loss": 0.7894, "step": 5818 }, { "epoch": 0.2990543735224586, "grad_norm": 0.7372670769691467, "learning_rate": 8.218894026185055e-06, "loss": 0.7061, "step": 5819 }, { "epoch": 0.29910576626580326, "grad_norm": 1.0834494829177856, "learning_rate": 8.218257131099267e-06, "loss": 0.7377, "step": 5820 }, { "epoch": 0.2991571590091479, "grad_norm": 1.0054423809051514, "learning_rate": 8.217620146848852e-06, "loss": 0.7521, "step": 5821 }, { "epoch": 0.29920855175249256, "grad_norm": 0.9185977578163147, "learning_rate": 8.216983073451463e-06, "loss": 0.6563, "step": 5822 }, { "epoch": 0.2992599444958372, "grad_norm": 0.7151365876197815, "learning_rate": 8.216345910924749e-06, "loss": 0.7017, "step": 5823 }, { "epoch": 0.29931133723918185, "grad_norm": 0.7121673822402954, "learning_rate": 8.215708659286362e-06, "loss": 0.6917, "step": 5824 }, { "epoch": 0.29936272998252644, "grad_norm": 1.1187169551849365, "learning_rate": 8.215071318553958e-06, "loss": 0.7897, "step": 5825 }, { "epoch": 0.2994141227258711, "grad_norm": 1.115398645401001, "learning_rate": 8.214433888745196e-06, "loss": 0.8009, "step": 5826 }, { "epoch": 0.29946551546921574, "grad_norm": 1.0566033124923706, "learning_rate": 8.213796369877736e-06, "loss": 0.7621, "step": 5827 }, { "epoch": 0.2995169082125604, "grad_norm": 1.3706203699111938, "learning_rate": 8.213158761969241e-06, "loss": 0.8194, "step": 5828 }, { "epoch": 0.29956830095590503, "grad_norm": 0.796194851398468, "learning_rate": 8.212521065037377e-06, "loss": 0.7286, "step": 5829 }, { "epoch": 0.2996196936992497, "grad_norm": 1.0713847875595093, "learning_rate": 8.211883279099811e-06, "loss": 0.7256, "step": 5830 }, { "epoch": 0.2996710864425943, "grad_norm": 1.009265661239624, "learning_rate": 8.211245404174215e-06, "loss": 0.6832, "step": 5831 }, { "epoch": 0.299722479185939, "grad_norm": 1.0439300537109375, "learning_rate": 8.21060744027826e-06, "loss": 0.7978, "step": 5832 }, { "epoch": 0.29977387192928356, "grad_norm": 1.142016887664795, "learning_rate": 8.209969387429624e-06, "loss": 0.7922, "step": 5833 }, { "epoch": 0.2998252646726282, "grad_norm": 1.0432748794555664, "learning_rate": 8.209331245645981e-06, "loss": 0.7843, "step": 5834 }, { "epoch": 0.29987665741597286, "grad_norm": 1.1137516498565674, "learning_rate": 8.208693014945013e-06, "loss": 0.8004, "step": 5835 }, { "epoch": 0.2999280501593175, "grad_norm": 1.073835015296936, "learning_rate": 8.208054695344404e-06, "loss": 0.7787, "step": 5836 }, { "epoch": 0.29997944290266215, "grad_norm": 1.143459439277649, "learning_rate": 8.207416286861839e-06, "loss": 0.7483, "step": 5837 }, { "epoch": 0.3000308356460068, "grad_norm": 1.1468329429626465, "learning_rate": 8.206777789515003e-06, "loss": 0.8289, "step": 5838 }, { "epoch": 0.30008222838935145, "grad_norm": 1.0970426797866821, "learning_rate": 8.20613920332159e-06, "loss": 0.7422, "step": 5839 }, { "epoch": 0.30013362113269604, "grad_norm": 1.094024419784546, "learning_rate": 8.205500528299289e-06, "loss": 0.7995, "step": 5840 }, { "epoch": 0.3001850138760407, "grad_norm": 1.0331776142120361, "learning_rate": 8.204861764465798e-06, "loss": 0.7193, "step": 5841 }, { "epoch": 0.30023640661938533, "grad_norm": 1.0264288187026978, "learning_rate": 8.204222911838813e-06, "loss": 0.7479, "step": 5842 }, { "epoch": 0.30028779936273, "grad_norm": 1.0783313512802124, "learning_rate": 8.203583970436033e-06, "loss": 0.7224, "step": 5843 }, { "epoch": 0.3003391921060746, "grad_norm": 1.0672835111618042, "learning_rate": 8.202944940275164e-06, "loss": 0.7589, "step": 5844 }, { "epoch": 0.3003905848494193, "grad_norm": 0.9001355171203613, "learning_rate": 8.202305821373907e-06, "loss": 0.7032, "step": 5845 }, { "epoch": 0.3004419775927639, "grad_norm": 0.9846897721290588, "learning_rate": 8.20166661374997e-06, "loss": 0.752, "step": 5846 }, { "epoch": 0.30049337033610857, "grad_norm": 0.7710789442062378, "learning_rate": 8.201027317421064e-06, "loss": 0.7183, "step": 5847 }, { "epoch": 0.30054476307945316, "grad_norm": 1.2258596420288086, "learning_rate": 8.200387932404901e-06, "loss": 0.7818, "step": 5848 }, { "epoch": 0.3005961558227978, "grad_norm": 1.0297565460205078, "learning_rate": 8.199748458719197e-06, "loss": 0.784, "step": 5849 }, { "epoch": 0.30064754856614245, "grad_norm": 1.1144261360168457, "learning_rate": 8.199108896381667e-06, "loss": 0.7241, "step": 5850 }, { "epoch": 0.3006989413094871, "grad_norm": 1.1186150312423706, "learning_rate": 8.19846924541003e-06, "loss": 0.796, "step": 5851 }, { "epoch": 0.30075033405283175, "grad_norm": 1.088149905204773, "learning_rate": 8.197829505822012e-06, "loss": 0.7912, "step": 5852 }, { "epoch": 0.3008017267961764, "grad_norm": 1.044201374053955, "learning_rate": 8.197189677635333e-06, "loss": 0.7433, "step": 5853 }, { "epoch": 0.30085311953952104, "grad_norm": 0.8247054219245911, "learning_rate": 8.196549760867722e-06, "loss": 0.6375, "step": 5854 }, { "epoch": 0.30090451228286563, "grad_norm": 1.0789122581481934, "learning_rate": 8.195909755536911e-06, "loss": 0.7611, "step": 5855 }, { "epoch": 0.3009559050262103, "grad_norm": 1.0973631143569946, "learning_rate": 8.195269661660625e-06, "loss": 0.714, "step": 5856 }, { "epoch": 0.3010072977695549, "grad_norm": 1.0778968334197998, "learning_rate": 8.194629479256605e-06, "loss": 0.7458, "step": 5857 }, { "epoch": 0.3010586905128996, "grad_norm": 0.7376991510391235, "learning_rate": 8.193989208342588e-06, "loss": 0.6779, "step": 5858 }, { "epoch": 0.3011100832562442, "grad_norm": 1.1209033727645874, "learning_rate": 8.19334884893631e-06, "loss": 0.8002, "step": 5859 }, { "epoch": 0.30116147599958887, "grad_norm": 1.1218087673187256, "learning_rate": 8.192708401055512e-06, "loss": 0.7158, "step": 5860 }, { "epoch": 0.3012128687429335, "grad_norm": 0.9160116910934448, "learning_rate": 8.192067864717939e-06, "loss": 0.7167, "step": 5861 }, { "epoch": 0.30126426148627816, "grad_norm": 1.0480130910873413, "learning_rate": 8.19142723994134e-06, "loss": 0.7643, "step": 5862 }, { "epoch": 0.30131565422962275, "grad_norm": 1.0397722721099854, "learning_rate": 8.190786526743462e-06, "loss": 0.7577, "step": 5863 }, { "epoch": 0.3013670469729674, "grad_norm": 1.0381280183792114, "learning_rate": 8.190145725142056e-06, "loss": 0.7628, "step": 5864 }, { "epoch": 0.30141843971631205, "grad_norm": 0.7517771124839783, "learning_rate": 8.189504835154878e-06, "loss": 0.6787, "step": 5865 }, { "epoch": 0.3014698324596567, "grad_norm": 0.7399656772613525, "learning_rate": 8.188863856799685e-06, "loss": 0.6964, "step": 5866 }, { "epoch": 0.30152122520300134, "grad_norm": 0.8144989609718323, "learning_rate": 8.188222790094231e-06, "loss": 0.7233, "step": 5867 }, { "epoch": 0.301572617946346, "grad_norm": 1.0153788328170776, "learning_rate": 8.187581635056282e-06, "loss": 0.7209, "step": 5868 }, { "epoch": 0.30162401068969064, "grad_norm": 1.0634764432907104, "learning_rate": 8.186940391703599e-06, "loss": 0.7435, "step": 5869 }, { "epoch": 0.3016754034330353, "grad_norm": 1.3272348642349243, "learning_rate": 8.186299060053952e-06, "loss": 0.7764, "step": 5870 }, { "epoch": 0.3017267961763799, "grad_norm": 1.098400592803955, "learning_rate": 8.185657640125105e-06, "loss": 0.8083, "step": 5871 }, { "epoch": 0.3017781889197245, "grad_norm": 1.0837249755859375, "learning_rate": 8.185016131934832e-06, "loss": 0.8055, "step": 5872 }, { "epoch": 0.30182958166306917, "grad_norm": 1.107259750366211, "learning_rate": 8.184374535500907e-06, "loss": 0.7647, "step": 5873 }, { "epoch": 0.3018809744064138, "grad_norm": 0.7389606833457947, "learning_rate": 8.183732850841102e-06, "loss": 0.6647, "step": 5874 }, { "epoch": 0.30193236714975846, "grad_norm": 1.1562050580978394, "learning_rate": 8.1830910779732e-06, "loss": 0.7618, "step": 5875 }, { "epoch": 0.3019837598931031, "grad_norm": 0.8052880167961121, "learning_rate": 8.18244921691498e-06, "loss": 0.6818, "step": 5876 }, { "epoch": 0.30203515263644776, "grad_norm": 1.1951496601104736, "learning_rate": 8.181807267684225e-06, "loss": 0.7318, "step": 5877 }, { "epoch": 0.30208654537979235, "grad_norm": 1.139418601989746, "learning_rate": 8.18116523029872e-06, "loss": 0.7292, "step": 5878 }, { "epoch": 0.302137938123137, "grad_norm": 1.0954374074935913, "learning_rate": 8.180523104776256e-06, "loss": 0.7899, "step": 5879 }, { "epoch": 0.30218933086648164, "grad_norm": 1.0914121866226196, "learning_rate": 8.179880891134622e-06, "loss": 0.8042, "step": 5880 }, { "epoch": 0.3022407236098263, "grad_norm": 1.0850141048431396, "learning_rate": 8.179238589391612e-06, "loss": 0.7275, "step": 5881 }, { "epoch": 0.30229211635317094, "grad_norm": 0.7990648746490479, "learning_rate": 8.178596199565019e-06, "loss": 0.6706, "step": 5882 }, { "epoch": 0.3023435090965156, "grad_norm": 1.0986249446868896, "learning_rate": 8.177953721672642e-06, "loss": 0.7477, "step": 5883 }, { "epoch": 0.30239490183986023, "grad_norm": 1.0733144283294678, "learning_rate": 8.177311155732283e-06, "loss": 0.7647, "step": 5884 }, { "epoch": 0.3024462945832049, "grad_norm": 1.0199863910675049, "learning_rate": 8.176668501761743e-06, "loss": 0.7784, "step": 5885 }, { "epoch": 0.30249768732654947, "grad_norm": 1.0245037078857422, "learning_rate": 8.176025759778829e-06, "loss": 0.7605, "step": 5886 }, { "epoch": 0.3025490800698941, "grad_norm": 1.0109847784042358, "learning_rate": 8.17538292980135e-06, "loss": 0.715, "step": 5887 }, { "epoch": 0.30260047281323876, "grad_norm": 0.8150759935379028, "learning_rate": 8.17474001184711e-06, "loss": 0.6899, "step": 5888 }, { "epoch": 0.3026518655565834, "grad_norm": 0.7515466809272766, "learning_rate": 8.17409700593393e-06, "loss": 0.6788, "step": 5889 }, { "epoch": 0.30270325829992806, "grad_norm": 1.0849751234054565, "learning_rate": 8.173453912079618e-06, "loss": 0.749, "step": 5890 }, { "epoch": 0.3027546510432727, "grad_norm": 1.090773582458496, "learning_rate": 8.172810730301993e-06, "loss": 0.7742, "step": 5891 }, { "epoch": 0.30280604378661735, "grad_norm": 1.112127661705017, "learning_rate": 8.172167460618879e-06, "loss": 0.6885, "step": 5892 }, { "epoch": 0.30285743652996194, "grad_norm": 0.7266148924827576, "learning_rate": 8.171524103048092e-06, "loss": 0.7432, "step": 5893 }, { "epoch": 0.3029088292733066, "grad_norm": 1.216835379600525, "learning_rate": 8.170880657607463e-06, "loss": 0.7806, "step": 5894 }, { "epoch": 0.30296022201665124, "grad_norm": 1.0476655960083008, "learning_rate": 8.170237124314816e-06, "loss": 0.7107, "step": 5895 }, { "epoch": 0.3030116147599959, "grad_norm": 1.1506412029266357, "learning_rate": 8.16959350318798e-06, "loss": 0.7893, "step": 5896 }, { "epoch": 0.30306300750334053, "grad_norm": 1.1130180358886719, "learning_rate": 8.16894979424479e-06, "loss": 0.7546, "step": 5897 }, { "epoch": 0.3031144002466852, "grad_norm": 1.1210126876831055, "learning_rate": 8.168305997503076e-06, "loss": 0.7673, "step": 5898 }, { "epoch": 0.3031657929900298, "grad_norm": 1.1421161890029907, "learning_rate": 8.16766211298068e-06, "loss": 0.7993, "step": 5899 }, { "epoch": 0.3032171857333745, "grad_norm": 1.119368314743042, "learning_rate": 8.167018140695438e-06, "loss": 0.7861, "step": 5900 }, { "epoch": 0.30326857847671906, "grad_norm": 1.0474625825881958, "learning_rate": 8.166374080665193e-06, "loss": 0.7166, "step": 5901 }, { "epoch": 0.3033199712200637, "grad_norm": 1.1310187578201294, "learning_rate": 8.165729932907788e-06, "loss": 0.7694, "step": 5902 }, { "epoch": 0.30337136396340836, "grad_norm": 1.2724446058273315, "learning_rate": 8.165085697441074e-06, "loss": 0.7849, "step": 5903 }, { "epoch": 0.303422756706753, "grad_norm": 1.1535512208938599, "learning_rate": 8.164441374282895e-06, "loss": 0.7356, "step": 5904 }, { "epoch": 0.30347414945009765, "grad_norm": 1.0342637300491333, "learning_rate": 8.163796963451102e-06, "loss": 0.7604, "step": 5905 }, { "epoch": 0.3035255421934423, "grad_norm": 0.7419325709342957, "learning_rate": 8.163152464963555e-06, "loss": 0.679, "step": 5906 }, { "epoch": 0.30357693493678695, "grad_norm": 0.7999743819236755, "learning_rate": 8.162507878838104e-06, "loss": 0.7291, "step": 5907 }, { "epoch": 0.30362832768013154, "grad_norm": 1.0854853391647339, "learning_rate": 8.161863205092612e-06, "loss": 0.7354, "step": 5908 }, { "epoch": 0.3036797204234762, "grad_norm": 0.726539134979248, "learning_rate": 8.161218443744938e-06, "loss": 0.6625, "step": 5909 }, { "epoch": 0.30373111316682083, "grad_norm": 1.0884379148483276, "learning_rate": 8.160573594812948e-06, "loss": 0.7671, "step": 5910 }, { "epoch": 0.3037825059101655, "grad_norm": 1.0739367008209229, "learning_rate": 8.159928658314506e-06, "loss": 0.7471, "step": 5911 }, { "epoch": 0.3038338986535101, "grad_norm": 1.1041159629821777, "learning_rate": 8.159283634267482e-06, "loss": 0.8212, "step": 5912 }, { "epoch": 0.3038852913968548, "grad_norm": 0.7195490598678589, "learning_rate": 8.158638522689743e-06, "loss": 0.6969, "step": 5913 }, { "epoch": 0.3039366841401994, "grad_norm": 1.0414634943008423, "learning_rate": 8.15799332359917e-06, "loss": 0.799, "step": 5914 }, { "epoch": 0.30398807688354407, "grad_norm": 1.1665323972702026, "learning_rate": 8.15734803701363e-06, "loss": 0.7462, "step": 5915 }, { "epoch": 0.30403946962688866, "grad_norm": 1.1425188779830933, "learning_rate": 8.156702662951007e-06, "loss": 0.7346, "step": 5916 }, { "epoch": 0.3040908623702333, "grad_norm": 1.0286660194396973, "learning_rate": 8.156057201429179e-06, "loss": 0.779, "step": 5917 }, { "epoch": 0.30414225511357795, "grad_norm": 1.0954350233078003, "learning_rate": 8.155411652466032e-06, "loss": 0.7662, "step": 5918 }, { "epoch": 0.3041936478569226, "grad_norm": 1.0761384963989258, "learning_rate": 8.15476601607945e-06, "loss": 0.7564, "step": 5919 }, { "epoch": 0.30424504060026725, "grad_norm": 1.0697237253189087, "learning_rate": 8.15412029228732e-06, "loss": 0.7193, "step": 5920 }, { "epoch": 0.3042964333436119, "grad_norm": 1.0740307569503784, "learning_rate": 8.153474481107533e-06, "loss": 0.7818, "step": 5921 }, { "epoch": 0.30434782608695654, "grad_norm": 1.149929165840149, "learning_rate": 8.152828582557983e-06, "loss": 0.7952, "step": 5922 }, { "epoch": 0.3043992188303012, "grad_norm": 1.093737006187439, "learning_rate": 8.152182596656561e-06, "loss": 0.8202, "step": 5923 }, { "epoch": 0.3044506115736458, "grad_norm": 1.11074697971344, "learning_rate": 8.15153652342117e-06, "loss": 0.7485, "step": 5924 }, { "epoch": 0.3045020043169904, "grad_norm": 0.7818731665611267, "learning_rate": 8.150890362869705e-06, "loss": 0.6728, "step": 5925 }, { "epoch": 0.3045533970603351, "grad_norm": 1.2714394330978394, "learning_rate": 8.150244115020073e-06, "loss": 0.8073, "step": 5926 }, { "epoch": 0.3046047898036797, "grad_norm": 1.0588757991790771, "learning_rate": 8.149597779890178e-06, "loss": 0.7474, "step": 5927 }, { "epoch": 0.30465618254702437, "grad_norm": 1.0225762128829956, "learning_rate": 8.148951357497925e-06, "loss": 0.7189, "step": 5928 }, { "epoch": 0.304707575290369, "grad_norm": 0.8176491260528564, "learning_rate": 8.148304847861224e-06, "loss": 0.7421, "step": 5929 }, { "epoch": 0.30475896803371366, "grad_norm": 0.7753500938415527, "learning_rate": 8.14765825099799e-06, "loss": 0.6843, "step": 5930 }, { "epoch": 0.30481036077705825, "grad_norm": 1.0372161865234375, "learning_rate": 8.147011566926133e-06, "loss": 0.6853, "step": 5931 }, { "epoch": 0.3048617535204029, "grad_norm": 1.0844743251800537, "learning_rate": 8.146364795663572e-06, "loss": 0.7505, "step": 5932 }, { "epoch": 0.30491314626374755, "grad_norm": 0.845554530620575, "learning_rate": 8.145717937228228e-06, "loss": 0.6814, "step": 5933 }, { "epoch": 0.3049645390070922, "grad_norm": 1.1065099239349365, "learning_rate": 8.145070991638023e-06, "loss": 0.7903, "step": 5934 }, { "epoch": 0.30501593175043684, "grad_norm": 1.0979385375976562, "learning_rate": 8.144423958910879e-06, "loss": 0.7481, "step": 5935 }, { "epoch": 0.3050673244937815, "grad_norm": 1.0313923358917236, "learning_rate": 8.143776839064723e-06, "loss": 0.7358, "step": 5936 }, { "epoch": 0.30511871723712614, "grad_norm": 1.1388959884643555, "learning_rate": 8.143129632117484e-06, "loss": 0.8216, "step": 5937 }, { "epoch": 0.3051701099804708, "grad_norm": 1.0782533884048462, "learning_rate": 8.142482338087095e-06, "loss": 0.7833, "step": 5938 }, { "epoch": 0.3052215027238154, "grad_norm": 0.9653657078742981, "learning_rate": 8.141834956991485e-06, "loss": 0.6611, "step": 5939 }, { "epoch": 0.30527289546716, "grad_norm": 1.1193424463272095, "learning_rate": 8.141187488848598e-06, "loss": 0.8046, "step": 5940 }, { "epoch": 0.30532428821050467, "grad_norm": 1.147467851638794, "learning_rate": 8.140539933676369e-06, "loss": 0.7573, "step": 5941 }, { "epoch": 0.3053756809538493, "grad_norm": 0.7805129885673523, "learning_rate": 8.139892291492734e-06, "loss": 0.716, "step": 5942 }, { "epoch": 0.30542707369719396, "grad_norm": 1.0494109392166138, "learning_rate": 8.139244562315646e-06, "loss": 0.7045, "step": 5943 }, { "epoch": 0.3054784664405386, "grad_norm": 1.2248508930206299, "learning_rate": 8.138596746163044e-06, "loss": 0.7926, "step": 5944 }, { "epoch": 0.30552985918388326, "grad_norm": 0.7430875897407532, "learning_rate": 8.137948843052878e-06, "loss": 0.6913, "step": 5945 }, { "epoch": 0.30558125192722785, "grad_norm": 1.1389179229736328, "learning_rate": 8.1373008530031e-06, "loss": 0.7451, "step": 5946 }, { "epoch": 0.3056326446705725, "grad_norm": 1.1152384281158447, "learning_rate": 8.136652776031662e-06, "loss": 0.7845, "step": 5947 }, { "epoch": 0.30568403741391714, "grad_norm": 1.0958131551742554, "learning_rate": 8.13600461215652e-06, "loss": 0.7963, "step": 5948 }, { "epoch": 0.3057354301572618, "grad_norm": 1.1137341260910034, "learning_rate": 8.135356361395633e-06, "loss": 0.7385, "step": 5949 }, { "epoch": 0.30578682290060644, "grad_norm": 1.1153043508529663, "learning_rate": 8.134708023766957e-06, "loss": 0.7761, "step": 5950 }, { "epoch": 0.3058382156439511, "grad_norm": 0.7776283025741577, "learning_rate": 8.13405959928846e-06, "loss": 0.6913, "step": 5951 }, { "epoch": 0.30588960838729573, "grad_norm": 1.1174592971801758, "learning_rate": 8.133411087978105e-06, "loss": 0.7452, "step": 5952 }, { "epoch": 0.3059410011306404, "grad_norm": 0.8260684013366699, "learning_rate": 8.132762489853858e-06, "loss": 0.654, "step": 5953 }, { "epoch": 0.30599239387398497, "grad_norm": 0.9808185696601868, "learning_rate": 8.13211380493369e-06, "loss": 0.746, "step": 5954 }, { "epoch": 0.3060437866173296, "grad_norm": 1.0762827396392822, "learning_rate": 8.131465033235576e-06, "loss": 0.7517, "step": 5955 }, { "epoch": 0.30609517936067426, "grad_norm": 1.0400757789611816, "learning_rate": 8.130816174777489e-06, "loss": 0.7216, "step": 5956 }, { "epoch": 0.3061465721040189, "grad_norm": 1.1839991807937622, "learning_rate": 8.130167229577406e-06, "loss": 0.8049, "step": 5957 }, { "epoch": 0.30619796484736356, "grad_norm": 1.0235670804977417, "learning_rate": 8.129518197653305e-06, "loss": 0.7577, "step": 5958 }, { "epoch": 0.3062493575907082, "grad_norm": 1.0928524732589722, "learning_rate": 8.128869079023171e-06, "loss": 0.7483, "step": 5959 }, { "epoch": 0.30630075033405285, "grad_norm": 1.0802463293075562, "learning_rate": 8.128219873704986e-06, "loss": 0.7401, "step": 5960 }, { "epoch": 0.3063521430773975, "grad_norm": 1.1580710411071777, "learning_rate": 8.127570581716737e-06, "loss": 0.8207, "step": 5961 }, { "epoch": 0.3064035358207421, "grad_norm": 0.7668749690055847, "learning_rate": 8.126921203076414e-06, "loss": 0.6601, "step": 5962 }, { "epoch": 0.30645492856408674, "grad_norm": 1.0720173120498657, "learning_rate": 8.126271737802009e-06, "loss": 0.7512, "step": 5963 }, { "epoch": 0.3065063213074314, "grad_norm": 1.0890666246414185, "learning_rate": 8.125622185911517e-06, "loss": 0.7771, "step": 5964 }, { "epoch": 0.30655771405077603, "grad_norm": 1.1244418621063232, "learning_rate": 8.124972547422931e-06, "loss": 0.796, "step": 5965 }, { "epoch": 0.3066091067941207, "grad_norm": 1.1394412517547607, "learning_rate": 8.124322822354252e-06, "loss": 0.7174, "step": 5966 }, { "epoch": 0.3066604995374653, "grad_norm": 1.168843150138855, "learning_rate": 8.123673010723482e-06, "loss": 0.7377, "step": 5967 }, { "epoch": 0.30671189228081, "grad_norm": 1.0686968564987183, "learning_rate": 8.123023112548623e-06, "loss": 0.7667, "step": 5968 }, { "epoch": 0.30676328502415456, "grad_norm": 0.8895867466926575, "learning_rate": 8.12237312784768e-06, "loss": 0.7133, "step": 5969 }, { "epoch": 0.3068146777674992, "grad_norm": 8.128183364868164, "learning_rate": 8.121723056638664e-06, "loss": 0.7229, "step": 5970 }, { "epoch": 0.30686607051084386, "grad_norm": 1.076133131980896, "learning_rate": 8.121072898939586e-06, "loss": 0.7478, "step": 5971 }, { "epoch": 0.3069174632541885, "grad_norm": 0.7858735918998718, "learning_rate": 8.120422654768457e-06, "loss": 0.6872, "step": 5972 }, { "epoch": 0.30696885599753315, "grad_norm": 1.1371923685073853, "learning_rate": 8.119772324143293e-06, "loss": 0.7145, "step": 5973 }, { "epoch": 0.3070202487408778, "grad_norm": 1.0432298183441162, "learning_rate": 8.119121907082112e-06, "loss": 0.7632, "step": 5974 }, { "epoch": 0.30707164148422245, "grad_norm": 0.7751417756080627, "learning_rate": 8.118471403602935e-06, "loss": 0.6984, "step": 5975 }, { "epoch": 0.3071230342275671, "grad_norm": 1.1347055435180664, "learning_rate": 8.117820813723785e-06, "loss": 0.7753, "step": 5976 }, { "epoch": 0.3071744269709117, "grad_norm": 1.1113218069076538, "learning_rate": 8.117170137462687e-06, "loss": 0.8097, "step": 5977 }, { "epoch": 0.30722581971425633, "grad_norm": 1.1859724521636963, "learning_rate": 8.116519374837668e-06, "loss": 0.7525, "step": 5978 }, { "epoch": 0.307277212457601, "grad_norm": 1.1094385385513306, "learning_rate": 8.115868525866758e-06, "loss": 0.6916, "step": 5979 }, { "epoch": 0.3073286052009456, "grad_norm": 1.0491571426391602, "learning_rate": 8.11521759056799e-06, "loss": 0.7682, "step": 5980 }, { "epoch": 0.3073799979442903, "grad_norm": 1.083126187324524, "learning_rate": 8.114566568959397e-06, "loss": 0.7599, "step": 5981 }, { "epoch": 0.3074313906876349, "grad_norm": 1.207197904586792, "learning_rate": 8.11391546105902e-06, "loss": 0.7854, "step": 5982 }, { "epoch": 0.30748278343097957, "grad_norm": 1.1165351867675781, "learning_rate": 8.113264266884895e-06, "loss": 0.7414, "step": 5983 }, { "epoch": 0.30753417617432416, "grad_norm": 1.1018052101135254, "learning_rate": 8.112612986455064e-06, "loss": 0.7336, "step": 5984 }, { "epoch": 0.3075855689176688, "grad_norm": 1.0903675556182861, "learning_rate": 8.111961619787571e-06, "loss": 0.807, "step": 5985 }, { "epoch": 0.30763696166101345, "grad_norm": 2.0057973861694336, "learning_rate": 8.111310166900465e-06, "loss": 0.793, "step": 5986 }, { "epoch": 0.3076883544043581, "grad_norm": 1.1542638540267944, "learning_rate": 8.110658627811796e-06, "loss": 0.7872, "step": 5987 }, { "epoch": 0.30773974714770275, "grad_norm": 1.0234479904174805, "learning_rate": 8.110007002539611e-06, "loss": 0.7329, "step": 5988 }, { "epoch": 0.3077911398910474, "grad_norm": 1.092002034187317, "learning_rate": 8.109355291101965e-06, "loss": 0.763, "step": 5989 }, { "epoch": 0.30784253263439204, "grad_norm": 1.1079283952713013, "learning_rate": 8.108703493516919e-06, "loss": 0.7866, "step": 5990 }, { "epoch": 0.3078939253777367, "grad_norm": 1.048746109008789, "learning_rate": 8.108051609802526e-06, "loss": 0.791, "step": 5991 }, { "epoch": 0.3079453181210813, "grad_norm": 1.0536657571792603, "learning_rate": 8.10739963997685e-06, "loss": 0.8016, "step": 5992 }, { "epoch": 0.3079967108644259, "grad_norm": 1.053540825843811, "learning_rate": 8.106747584057952e-06, "loss": 0.7657, "step": 5993 }, { "epoch": 0.3080481036077706, "grad_norm": 1.0464996099472046, "learning_rate": 8.1060954420639e-06, "loss": 0.733, "step": 5994 }, { "epoch": 0.3080994963511152, "grad_norm": 1.1036494970321655, "learning_rate": 8.105443214012762e-06, "loss": 0.7752, "step": 5995 }, { "epoch": 0.30815088909445987, "grad_norm": 1.1091340780258179, "learning_rate": 8.104790899922608e-06, "loss": 0.805, "step": 5996 }, { "epoch": 0.3082022818378045, "grad_norm": 0.9466354250907898, "learning_rate": 8.10413849981151e-06, "loss": 0.7175, "step": 5997 }, { "epoch": 0.30825367458114916, "grad_norm": 1.1139405965805054, "learning_rate": 8.103486013697544e-06, "loss": 0.7918, "step": 5998 }, { "epoch": 0.3083050673244938, "grad_norm": 1.1200132369995117, "learning_rate": 8.102833441598787e-06, "loss": 0.8174, "step": 5999 }, { "epoch": 0.3083564600678384, "grad_norm": 1.2123644351959229, "learning_rate": 8.102180783533322e-06, "loss": 0.792, "step": 6000 }, { "epoch": 0.30840785281118305, "grad_norm": 1.0958315134048462, "learning_rate": 8.101528039519229e-06, "loss": 0.7897, "step": 6001 }, { "epoch": 0.3084592455545277, "grad_norm": 1.1540733575820923, "learning_rate": 8.100875209574592e-06, "loss": 0.8121, "step": 6002 }, { "epoch": 0.30851063829787234, "grad_norm": 0.8598061203956604, "learning_rate": 8.100222293717502e-06, "loss": 0.7041, "step": 6003 }, { "epoch": 0.308562031041217, "grad_norm": 1.0622655153274536, "learning_rate": 8.099569291966044e-06, "loss": 0.8124, "step": 6004 }, { "epoch": 0.30861342378456164, "grad_norm": 1.0091347694396973, "learning_rate": 8.098916204338311e-06, "loss": 0.7461, "step": 6005 }, { "epoch": 0.3086648165279063, "grad_norm": 1.0971156358718872, "learning_rate": 8.098263030852403e-06, "loss": 0.7217, "step": 6006 }, { "epoch": 0.3087162092712509, "grad_norm": 1.2319941520690918, "learning_rate": 8.097609771526409e-06, "loss": 0.7748, "step": 6007 }, { "epoch": 0.3087676020145955, "grad_norm": 1.0875362157821655, "learning_rate": 8.09695642637843e-06, "loss": 0.7745, "step": 6008 }, { "epoch": 0.30881899475794017, "grad_norm": 1.0700194835662842, "learning_rate": 8.09630299542657e-06, "loss": 0.7432, "step": 6009 }, { "epoch": 0.3088703875012848, "grad_norm": 1.0887404680252075, "learning_rate": 8.095649478688934e-06, "loss": 0.7575, "step": 6010 }, { "epoch": 0.30892178024462946, "grad_norm": 1.0383274555206299, "learning_rate": 8.094995876183623e-06, "loss": 0.7441, "step": 6011 }, { "epoch": 0.3089731729879741, "grad_norm": 1.1790608167648315, "learning_rate": 8.09434218792875e-06, "loss": 0.8092, "step": 6012 }, { "epoch": 0.30902456573131876, "grad_norm": 0.8077702522277832, "learning_rate": 8.093688413942424e-06, "loss": 0.6484, "step": 6013 }, { "epoch": 0.3090759584746634, "grad_norm": 1.0213290452957153, "learning_rate": 8.09303455424276e-06, "loss": 0.8183, "step": 6014 }, { "epoch": 0.309127351218008, "grad_norm": 0.6674919128417969, "learning_rate": 8.092380608847871e-06, "loss": 0.7314, "step": 6015 }, { "epoch": 0.30917874396135264, "grad_norm": 1.076224684715271, "learning_rate": 8.091726577775878e-06, "loss": 0.7477, "step": 6016 }, { "epoch": 0.3092301367046973, "grad_norm": 0.8392216563224792, "learning_rate": 8.0910724610449e-06, "loss": 0.7284, "step": 6017 }, { "epoch": 0.30928152944804194, "grad_norm": 1.079715609550476, "learning_rate": 8.090418258673061e-06, "loss": 0.836, "step": 6018 }, { "epoch": 0.3093329221913866, "grad_norm": 1.1012918949127197, "learning_rate": 8.089763970678484e-06, "loss": 0.7506, "step": 6019 }, { "epoch": 0.30938431493473123, "grad_norm": 1.0374441146850586, "learning_rate": 8.0891095970793e-06, "loss": 0.7295, "step": 6020 }, { "epoch": 0.3094357076780759, "grad_norm": 1.0798447132110596, "learning_rate": 8.088455137893634e-06, "loss": 0.7435, "step": 6021 }, { "epoch": 0.30948710042142047, "grad_norm": 0.7154061794281006, "learning_rate": 8.087800593139623e-06, "loss": 0.7008, "step": 6022 }, { "epoch": 0.3095384931647651, "grad_norm": 1.0409945249557495, "learning_rate": 8.087145962835401e-06, "loss": 0.7346, "step": 6023 }, { "epoch": 0.30958988590810976, "grad_norm": 1.182163953781128, "learning_rate": 8.086491246999104e-06, "loss": 0.7545, "step": 6024 }, { "epoch": 0.3096412786514544, "grad_norm": 1.0903769731521606, "learning_rate": 8.085836445648872e-06, "loss": 0.7564, "step": 6025 }, { "epoch": 0.30969267139479906, "grad_norm": 1.1197534799575806, "learning_rate": 8.085181558802844e-06, "loss": 0.7752, "step": 6026 }, { "epoch": 0.3097440641381437, "grad_norm": 1.044586420059204, "learning_rate": 8.08452658647917e-06, "loss": 0.7841, "step": 6027 }, { "epoch": 0.30979545688148835, "grad_norm": 0.7822510004043579, "learning_rate": 8.083871528695992e-06, "loss": 0.6583, "step": 6028 }, { "epoch": 0.309846849624833, "grad_norm": 1.0788698196411133, "learning_rate": 8.083216385471462e-06, "loss": 0.7984, "step": 6029 }, { "epoch": 0.3098982423681776, "grad_norm": 1.1136059761047363, "learning_rate": 8.082561156823726e-06, "loss": 0.7849, "step": 6030 }, { "epoch": 0.30994963511152224, "grad_norm": 1.1260932683944702, "learning_rate": 8.081905842770945e-06, "loss": 0.758, "step": 6031 }, { "epoch": 0.3100010278548669, "grad_norm": 1.029131293296814, "learning_rate": 8.08125044333127e-06, "loss": 0.724, "step": 6032 }, { "epoch": 0.31005242059821153, "grad_norm": 1.1441344022750854, "learning_rate": 8.08059495852286e-06, "loss": 0.76, "step": 6033 }, { "epoch": 0.3101038133415562, "grad_norm": 1.0803885459899902, "learning_rate": 8.079939388363878e-06, "loss": 0.7489, "step": 6034 }, { "epoch": 0.3101552060849008, "grad_norm": 1.1191673278808594, "learning_rate": 8.079283732872483e-06, "loss": 0.7948, "step": 6035 }, { "epoch": 0.3102065988282455, "grad_norm": 1.0153367519378662, "learning_rate": 8.078627992066846e-06, "loss": 0.7649, "step": 6036 }, { "epoch": 0.31025799157159006, "grad_norm": 1.0923380851745605, "learning_rate": 8.077972165965131e-06, "loss": 0.748, "step": 6037 }, { "epoch": 0.3103093843149347, "grad_norm": 1.0747849941253662, "learning_rate": 8.07731625458551e-06, "loss": 0.7497, "step": 6038 }, { "epoch": 0.31036077705827936, "grad_norm": 1.110371708869934, "learning_rate": 8.076660257946155e-06, "loss": 0.7853, "step": 6039 }, { "epoch": 0.310412169801624, "grad_norm": 1.027322769165039, "learning_rate": 8.07600417606524e-06, "loss": 0.6806, "step": 6040 }, { "epoch": 0.31046356254496865, "grad_norm": 1.0725265741348267, "learning_rate": 8.075348008960945e-06, "loss": 0.7653, "step": 6041 }, { "epoch": 0.3105149552883133, "grad_norm": 1.0926538705825806, "learning_rate": 8.074691756651445e-06, "loss": 0.7472, "step": 6042 }, { "epoch": 0.31056634803165795, "grad_norm": 1.1099355220794678, "learning_rate": 8.074035419154927e-06, "loss": 0.7637, "step": 6043 }, { "epoch": 0.3106177407750026, "grad_norm": 1.0330449342727661, "learning_rate": 8.07337899648957e-06, "loss": 0.7512, "step": 6044 }, { "epoch": 0.3106691335183472, "grad_norm": 1.14008367061615, "learning_rate": 8.072722488673568e-06, "loss": 0.7696, "step": 6045 }, { "epoch": 0.31072052626169183, "grad_norm": 1.211603045463562, "learning_rate": 8.072065895725105e-06, "loss": 0.8355, "step": 6046 }, { "epoch": 0.3107719190050365, "grad_norm": 1.139074683189392, "learning_rate": 8.071409217662373e-06, "loss": 0.75, "step": 6047 }, { "epoch": 0.3108233117483811, "grad_norm": 1.1317499876022339, "learning_rate": 8.070752454503567e-06, "loss": 0.773, "step": 6048 }, { "epoch": 0.3108747044917258, "grad_norm": 0.700289249420166, "learning_rate": 8.070095606266885e-06, "loss": 0.6899, "step": 6049 }, { "epoch": 0.3109260972350704, "grad_norm": 1.1059043407440186, "learning_rate": 8.06943867297052e-06, "loss": 0.7965, "step": 6050 }, { "epoch": 0.31097748997841507, "grad_norm": 1.0818907022476196, "learning_rate": 8.068781654632679e-06, "loss": 0.7901, "step": 6051 }, { "epoch": 0.3110288827217597, "grad_norm": 1.1344012022018433, "learning_rate": 8.06812455127156e-06, "loss": 0.7626, "step": 6052 }, { "epoch": 0.3110802754651043, "grad_norm": 0.7358098030090332, "learning_rate": 8.067467362905373e-06, "loss": 0.6474, "step": 6053 }, { "epoch": 0.31113166820844895, "grad_norm": 1.1475454568862915, "learning_rate": 8.066810089552322e-06, "loss": 0.7716, "step": 6054 }, { "epoch": 0.3111830609517936, "grad_norm": 0.840649425983429, "learning_rate": 8.06615273123062e-06, "loss": 0.7182, "step": 6055 }, { "epoch": 0.31123445369513825, "grad_norm": 1.0517313480377197, "learning_rate": 8.06549528795848e-06, "loss": 0.7631, "step": 6056 }, { "epoch": 0.3112858464384829, "grad_norm": 1.101258397102356, "learning_rate": 8.064837759754116e-06, "loss": 0.7621, "step": 6057 }, { "epoch": 0.31133723918182754, "grad_norm": 1.0718002319335938, "learning_rate": 8.064180146635745e-06, "loss": 0.7571, "step": 6058 }, { "epoch": 0.3113886319251722, "grad_norm": 1.0681408643722534, "learning_rate": 8.063522448621588e-06, "loss": 0.7742, "step": 6059 }, { "epoch": 0.3114400246685168, "grad_norm": 1.1885234117507935, "learning_rate": 8.062864665729865e-06, "loss": 0.8138, "step": 6060 }, { "epoch": 0.3114914174118614, "grad_norm": 1.0517560243606567, "learning_rate": 8.062206797978803e-06, "loss": 0.7319, "step": 6061 }, { "epoch": 0.3115428101552061, "grad_norm": 1.0204495191574097, "learning_rate": 8.061548845386627e-06, "loss": 0.7673, "step": 6062 }, { "epoch": 0.3115942028985507, "grad_norm": 1.0379875898361206, "learning_rate": 8.060890807971566e-06, "loss": 0.7486, "step": 6063 }, { "epoch": 0.31164559564189537, "grad_norm": 1.0938879251480103, "learning_rate": 8.060232685751854e-06, "loss": 0.8022, "step": 6064 }, { "epoch": 0.31169698838524, "grad_norm": 1.041452169418335, "learning_rate": 8.059574478745723e-06, "loss": 0.8404, "step": 6065 }, { "epoch": 0.31174838112858466, "grad_norm": 1.1018478870391846, "learning_rate": 8.058916186971407e-06, "loss": 0.7744, "step": 6066 }, { "epoch": 0.3117997738719293, "grad_norm": 1.0761829614639282, "learning_rate": 8.05825781044715e-06, "loss": 0.8018, "step": 6067 }, { "epoch": 0.3118511666152739, "grad_norm": 1.131446361541748, "learning_rate": 8.057599349191187e-06, "loss": 0.7954, "step": 6068 }, { "epoch": 0.31190255935861855, "grad_norm": 1.0285425186157227, "learning_rate": 8.056940803221767e-06, "loss": 0.7106, "step": 6069 }, { "epoch": 0.3119539521019632, "grad_norm": 0.9851360321044922, "learning_rate": 8.05628217255713e-06, "loss": 0.7396, "step": 6070 }, { "epoch": 0.31200534484530784, "grad_norm": 1.158008098602295, "learning_rate": 8.055623457215526e-06, "loss": 0.8469, "step": 6071 }, { "epoch": 0.3120567375886525, "grad_norm": 1.0554207563400269, "learning_rate": 8.054964657215208e-06, "loss": 0.7781, "step": 6072 }, { "epoch": 0.31210813033199714, "grad_norm": 1.1226186752319336, "learning_rate": 8.054305772574427e-06, "loss": 0.7817, "step": 6073 }, { "epoch": 0.3121595230753418, "grad_norm": 0.7736796736717224, "learning_rate": 8.053646803311435e-06, "loss": 0.708, "step": 6074 }, { "epoch": 0.3122109158186864, "grad_norm": 0.7992336750030518, "learning_rate": 8.052987749444493e-06, "loss": 0.6865, "step": 6075 }, { "epoch": 0.312262308562031, "grad_norm": 1.025526523590088, "learning_rate": 8.05232861099186e-06, "loss": 0.7436, "step": 6076 }, { "epoch": 0.31231370130537567, "grad_norm": 1.123650312423706, "learning_rate": 8.051669387971796e-06, "loss": 0.7597, "step": 6077 }, { "epoch": 0.3123650940487203, "grad_norm": 0.835012674331665, "learning_rate": 8.051010080402568e-06, "loss": 0.6625, "step": 6078 }, { "epoch": 0.31241648679206496, "grad_norm": 1.0923599004745483, "learning_rate": 8.050350688302439e-06, "loss": 0.7649, "step": 6079 }, { "epoch": 0.3124678795354096, "grad_norm": 1.101789951324463, "learning_rate": 8.049691211689684e-06, "loss": 0.8024, "step": 6080 }, { "epoch": 0.31251927227875426, "grad_norm": 1.0039594173431396, "learning_rate": 8.049031650582568e-06, "loss": 0.7408, "step": 6081 }, { "epoch": 0.3125706650220989, "grad_norm": 1.1349210739135742, "learning_rate": 8.048372004999369e-06, "loss": 0.8075, "step": 6082 }, { "epoch": 0.3126220577654435, "grad_norm": 1.2421382665634155, "learning_rate": 8.047712274958362e-06, "loss": 0.7652, "step": 6083 }, { "epoch": 0.31267345050878814, "grad_norm": 1.1897873878479004, "learning_rate": 8.047052460477825e-06, "loss": 0.853, "step": 6084 }, { "epoch": 0.3127248432521328, "grad_norm": 1.069273829460144, "learning_rate": 8.046392561576037e-06, "loss": 0.7878, "step": 6085 }, { "epoch": 0.31277623599547744, "grad_norm": 1.1286972761154175, "learning_rate": 8.045732578271284e-06, "loss": 0.7312, "step": 6086 }, { "epoch": 0.3128276287388221, "grad_norm": 1.0797669887542725, "learning_rate": 8.04507251058185e-06, "loss": 0.7904, "step": 6087 }, { "epoch": 0.31287902148216673, "grad_norm": 0.9267917275428772, "learning_rate": 8.044412358526024e-06, "loss": 0.7048, "step": 6088 }, { "epoch": 0.3129304142255114, "grad_norm": 1.1157392263412476, "learning_rate": 8.043752122122094e-06, "loss": 0.75, "step": 6089 }, { "epoch": 0.312981806968856, "grad_norm": 1.1277902126312256, "learning_rate": 8.043091801388354e-06, "loss": 0.7483, "step": 6090 }, { "epoch": 0.3130331997122006, "grad_norm": 1.1129240989685059, "learning_rate": 8.042431396343099e-06, "loss": 0.7391, "step": 6091 }, { "epoch": 0.31308459245554526, "grad_norm": 1.1318491697311401, "learning_rate": 8.041770907004624e-06, "loss": 0.7111, "step": 6092 }, { "epoch": 0.3131359851988899, "grad_norm": 1.0705219507217407, "learning_rate": 8.041110333391229e-06, "loss": 0.8057, "step": 6093 }, { "epoch": 0.31318737794223456, "grad_norm": 0.8475239276885986, "learning_rate": 8.04044967552122e-06, "loss": 0.6906, "step": 6094 }, { "epoch": 0.3132387706855792, "grad_norm": 0.8920000195503235, "learning_rate": 8.039788933412894e-06, "loss": 0.6755, "step": 6095 }, { "epoch": 0.31329016342892385, "grad_norm": 0.8327194452285767, "learning_rate": 8.039128107084562e-06, "loss": 0.6801, "step": 6096 }, { "epoch": 0.3133415561722685, "grad_norm": 1.0118904113769531, "learning_rate": 8.038467196554533e-06, "loss": 0.7339, "step": 6097 }, { "epoch": 0.3133929489156131, "grad_norm": 1.0632312297821045, "learning_rate": 8.037806201841117e-06, "loss": 0.7574, "step": 6098 }, { "epoch": 0.31344434165895774, "grad_norm": 1.0491844415664673, "learning_rate": 8.037145122962627e-06, "loss": 0.7582, "step": 6099 }, { "epoch": 0.3134957344023024, "grad_norm": 1.0903195142745972, "learning_rate": 8.036483959937381e-06, "loss": 0.7408, "step": 6100 }, { "epoch": 0.31354712714564703, "grad_norm": 0.8597413897514343, "learning_rate": 8.035822712783692e-06, "loss": 0.6869, "step": 6101 }, { "epoch": 0.3135985198889917, "grad_norm": 1.0731096267700195, "learning_rate": 8.035161381519884e-06, "loss": 0.7635, "step": 6102 }, { "epoch": 0.3136499126323363, "grad_norm": 1.0981554985046387, "learning_rate": 8.034499966164282e-06, "loss": 0.7607, "step": 6103 }, { "epoch": 0.31370130537568097, "grad_norm": 1.0279661417007446, "learning_rate": 8.033838466735208e-06, "loss": 0.7496, "step": 6104 }, { "epoch": 0.3137526981190256, "grad_norm": 1.0758126974105835, "learning_rate": 8.033176883250989e-06, "loss": 0.7595, "step": 6105 }, { "epoch": 0.3138040908623702, "grad_norm": 1.069772481918335, "learning_rate": 8.032515215729956e-06, "loss": 0.7998, "step": 6106 }, { "epoch": 0.31385548360571486, "grad_norm": 1.0834616422653198, "learning_rate": 8.031853464190442e-06, "loss": 0.7995, "step": 6107 }, { "epoch": 0.3139068763490595, "grad_norm": 1.0511776208877563, "learning_rate": 8.031191628650778e-06, "loss": 0.8146, "step": 6108 }, { "epoch": 0.31395826909240415, "grad_norm": 1.0922143459320068, "learning_rate": 8.030529709129305e-06, "loss": 0.8326, "step": 6109 }, { "epoch": 0.3140096618357488, "grad_norm": 0.8175050020217896, "learning_rate": 8.029867705644358e-06, "loss": 0.7226, "step": 6110 }, { "epoch": 0.31406105457909345, "grad_norm": 0.8077910542488098, "learning_rate": 8.029205618214282e-06, "loss": 0.6643, "step": 6111 }, { "epoch": 0.3141124473224381, "grad_norm": 1.1243703365325928, "learning_rate": 8.028543446857419e-06, "loss": 0.7477, "step": 6112 }, { "epoch": 0.3141638400657827, "grad_norm": 1.075564980506897, "learning_rate": 8.027881191592115e-06, "loss": 0.7632, "step": 6113 }, { "epoch": 0.31421523280912733, "grad_norm": 1.0410335063934326, "learning_rate": 8.027218852436719e-06, "loss": 0.8005, "step": 6114 }, { "epoch": 0.314266625552472, "grad_norm": 0.9974504709243774, "learning_rate": 8.02655642940958e-06, "loss": 0.727, "step": 6115 }, { "epoch": 0.3143180182958166, "grad_norm": 1.1410577297210693, "learning_rate": 8.025893922529053e-06, "loss": 0.7229, "step": 6116 }, { "epoch": 0.3143694110391613, "grad_norm": 1.0655590295791626, "learning_rate": 8.02523133181349e-06, "loss": 0.7395, "step": 6117 }, { "epoch": 0.3144208037825059, "grad_norm": 1.0955928564071655, "learning_rate": 8.024568657281255e-06, "loss": 0.71, "step": 6118 }, { "epoch": 0.31447219652585057, "grad_norm": 1.0123881101608276, "learning_rate": 8.023905898950702e-06, "loss": 0.7566, "step": 6119 }, { "epoch": 0.3145235892691952, "grad_norm": 1.0509437322616577, "learning_rate": 8.023243056840196e-06, "loss": 0.7698, "step": 6120 }, { "epoch": 0.3145749820125398, "grad_norm": 1.032643437385559, "learning_rate": 8.0225801309681e-06, "loss": 0.7659, "step": 6121 }, { "epoch": 0.31462637475588445, "grad_norm": 1.0893285274505615, "learning_rate": 8.021917121352784e-06, "loss": 0.8137, "step": 6122 }, { "epoch": 0.3146777674992291, "grad_norm": 1.0889554023742676, "learning_rate": 8.021254028012614e-06, "loss": 0.7452, "step": 6123 }, { "epoch": 0.31472916024257375, "grad_norm": 1.229623794555664, "learning_rate": 8.020590850965965e-06, "loss": 0.7653, "step": 6124 }, { "epoch": 0.3147805529859184, "grad_norm": 1.0645302534103394, "learning_rate": 8.019927590231207e-06, "loss": 0.7546, "step": 6125 }, { "epoch": 0.31483194572926304, "grad_norm": 0.9680023789405823, "learning_rate": 8.019264245826718e-06, "loss": 0.6873, "step": 6126 }, { "epoch": 0.3148833384726077, "grad_norm": 1.074756383895874, "learning_rate": 8.018600817770876e-06, "loss": 0.7568, "step": 6127 }, { "epoch": 0.31493473121595233, "grad_norm": 1.1985946893692017, "learning_rate": 8.017937306082062e-06, "loss": 0.7334, "step": 6128 }, { "epoch": 0.3149861239592969, "grad_norm": 1.1500622034072876, "learning_rate": 8.017273710778661e-06, "loss": 0.7592, "step": 6129 }, { "epoch": 0.3150375167026416, "grad_norm": 1.0453654527664185, "learning_rate": 8.016610031879057e-06, "loss": 0.7347, "step": 6130 }, { "epoch": 0.3150889094459862, "grad_norm": 1.0796889066696167, "learning_rate": 8.015946269401637e-06, "loss": 0.7883, "step": 6131 }, { "epoch": 0.31514030218933087, "grad_norm": 1.0624734163284302, "learning_rate": 8.015282423364792e-06, "loss": 0.791, "step": 6132 }, { "epoch": 0.3151916949326755, "grad_norm": 1.0314422845840454, "learning_rate": 8.014618493786913e-06, "loss": 0.7209, "step": 6133 }, { "epoch": 0.31524308767602016, "grad_norm": 1.0661484003067017, "learning_rate": 8.0139544806864e-06, "loss": 0.7803, "step": 6134 }, { "epoch": 0.3152944804193648, "grad_norm": 0.9232274889945984, "learning_rate": 8.013290384081643e-06, "loss": 0.664, "step": 6135 }, { "epoch": 0.3153458731627094, "grad_norm": 0.8029962182044983, "learning_rate": 8.012626203991046e-06, "loss": 0.7226, "step": 6136 }, { "epoch": 0.31539726590605405, "grad_norm": 1.1550087928771973, "learning_rate": 8.01196194043301e-06, "loss": 0.8251, "step": 6137 }, { "epoch": 0.3154486586493987, "grad_norm": 1.0771244764328003, "learning_rate": 8.011297593425936e-06, "loss": 0.7586, "step": 6138 }, { "epoch": 0.31550005139274334, "grad_norm": 1.117776870727539, "learning_rate": 8.010633162988234e-06, "loss": 0.8063, "step": 6139 }, { "epoch": 0.315551444136088, "grad_norm": 1.0716899633407593, "learning_rate": 8.00996864913831e-06, "loss": 0.7758, "step": 6140 }, { "epoch": 0.31560283687943264, "grad_norm": 0.8726239204406738, "learning_rate": 8.009304051894577e-06, "loss": 0.6857, "step": 6141 }, { "epoch": 0.3156542296227773, "grad_norm": 1.0879548788070679, "learning_rate": 8.008639371275449e-06, "loss": 0.8012, "step": 6142 }, { "epoch": 0.31570562236612193, "grad_norm": 3.052990436553955, "learning_rate": 8.007974607299337e-06, "loss": 0.7581, "step": 6143 }, { "epoch": 0.3157570151094665, "grad_norm": 1.054518461227417, "learning_rate": 8.007309759984664e-06, "loss": 0.7348, "step": 6144 }, { "epoch": 0.31580840785281117, "grad_norm": 1.1291894912719727, "learning_rate": 8.006644829349848e-06, "loss": 0.8026, "step": 6145 }, { "epoch": 0.3158598005961558, "grad_norm": 1.045858383178711, "learning_rate": 8.005979815413309e-06, "loss": 0.7565, "step": 6146 }, { "epoch": 0.31591119333950046, "grad_norm": 1.1505354642868042, "learning_rate": 8.005314718193476e-06, "loss": 0.813, "step": 6147 }, { "epoch": 0.3159625860828451, "grad_norm": 1.0502619743347168, "learning_rate": 8.004649537708775e-06, "loss": 0.7511, "step": 6148 }, { "epoch": 0.31601397882618976, "grad_norm": 1.0704747438430786, "learning_rate": 8.003984273977634e-06, "loss": 0.7931, "step": 6149 }, { "epoch": 0.3160653715695344, "grad_norm": 1.0849039554595947, "learning_rate": 8.003318927018485e-06, "loss": 0.7664, "step": 6150 }, { "epoch": 0.316116764312879, "grad_norm": 1.0384345054626465, "learning_rate": 8.002653496849762e-06, "loss": 0.7167, "step": 6151 }, { "epoch": 0.31616815705622364, "grad_norm": 0.9973111152648926, "learning_rate": 8.001987983489904e-06, "loss": 0.7296, "step": 6152 }, { "epoch": 0.3162195497995683, "grad_norm": 0.8917011618614197, "learning_rate": 8.001322386957347e-06, "loss": 0.7105, "step": 6153 }, { "epoch": 0.31627094254291294, "grad_norm": 1.1230117082595825, "learning_rate": 8.000656707270531e-06, "loss": 0.7281, "step": 6154 }, { "epoch": 0.3163223352862576, "grad_norm": 1.0961412191390991, "learning_rate": 7.999990944447902e-06, "loss": 0.7737, "step": 6155 }, { "epoch": 0.31637372802960223, "grad_norm": 0.789203941822052, "learning_rate": 7.999325098507902e-06, "loss": 0.6927, "step": 6156 }, { "epoch": 0.3164251207729469, "grad_norm": 1.0528838634490967, "learning_rate": 7.998659169468983e-06, "loss": 0.7696, "step": 6157 }, { "epoch": 0.3164765135162915, "grad_norm": 1.1201075315475464, "learning_rate": 7.997993157349593e-06, "loss": 0.7573, "step": 6158 }, { "epoch": 0.3165279062596361, "grad_norm": 0.7588522434234619, "learning_rate": 7.997327062168182e-06, "loss": 0.6951, "step": 6159 }, { "epoch": 0.31657929900298076, "grad_norm": 1.0413274765014648, "learning_rate": 7.99666088394321e-06, "loss": 0.7421, "step": 6160 }, { "epoch": 0.3166306917463254, "grad_norm": 0.7666056156158447, "learning_rate": 7.995994622693133e-06, "loss": 0.7008, "step": 6161 }, { "epoch": 0.31668208448967006, "grad_norm": 1.1226341724395752, "learning_rate": 7.995328278436406e-06, "loss": 0.7148, "step": 6162 }, { "epoch": 0.3167334772330147, "grad_norm": 1.0278602838516235, "learning_rate": 7.994661851191494e-06, "loss": 0.7526, "step": 6163 }, { "epoch": 0.31678486997635935, "grad_norm": 1.1021512746810913, "learning_rate": 7.99399534097686e-06, "loss": 0.751, "step": 6164 }, { "epoch": 0.316836262719704, "grad_norm": 1.0913143157958984, "learning_rate": 7.993328747810971e-06, "loss": 0.7255, "step": 6165 }, { "epoch": 0.31688765546304865, "grad_norm": 1.0669944286346436, "learning_rate": 7.992662071712296e-06, "loss": 0.7594, "step": 6166 }, { "epoch": 0.31693904820639324, "grad_norm": 1.1349221467971802, "learning_rate": 7.991995312699303e-06, "loss": 0.766, "step": 6167 }, { "epoch": 0.3169904409497379, "grad_norm": 1.0363006591796875, "learning_rate": 7.991328470790467e-06, "loss": 0.7405, "step": 6168 }, { "epoch": 0.31704183369308253, "grad_norm": 1.0679699182510376, "learning_rate": 7.990661546004266e-06, "loss": 0.738, "step": 6169 }, { "epoch": 0.3170932264364272, "grad_norm": 1.1096373796463013, "learning_rate": 7.989994538359174e-06, "loss": 0.7304, "step": 6170 }, { "epoch": 0.3171446191797718, "grad_norm": 1.102096676826477, "learning_rate": 7.989327447873672e-06, "loss": 0.7528, "step": 6171 }, { "epoch": 0.31719601192311647, "grad_norm": 1.0745854377746582, "learning_rate": 7.988660274566244e-06, "loss": 0.7517, "step": 6172 }, { "epoch": 0.3172474046664611, "grad_norm": 1.0453765392303467, "learning_rate": 7.98799301845537e-06, "loss": 0.7523, "step": 6173 }, { "epoch": 0.3172987974098057, "grad_norm": 1.0133247375488281, "learning_rate": 7.987325679559545e-06, "loss": 0.7375, "step": 6174 }, { "epoch": 0.31735019015315036, "grad_norm": 1.053646445274353, "learning_rate": 7.986658257897249e-06, "loss": 0.7727, "step": 6175 }, { "epoch": 0.317401582896495, "grad_norm": 1.1851238012313843, "learning_rate": 7.985990753486981e-06, "loss": 0.8222, "step": 6176 }, { "epoch": 0.31745297563983965, "grad_norm": 1.016196846961975, "learning_rate": 7.985323166347231e-06, "loss": 0.7326, "step": 6177 }, { "epoch": 0.3175043683831843, "grad_norm": 1.0063202381134033, "learning_rate": 7.984655496496494e-06, "loss": 0.7664, "step": 6178 }, { "epoch": 0.31755576112652895, "grad_norm": 1.0691715478897095, "learning_rate": 7.983987743953272e-06, "loss": 0.834, "step": 6179 }, { "epoch": 0.3176071538698736, "grad_norm": 1.1008692979812622, "learning_rate": 7.983319908736062e-06, "loss": 0.7434, "step": 6180 }, { "epoch": 0.31765854661321824, "grad_norm": 1.0929378271102905, "learning_rate": 7.98265199086337e-06, "loss": 0.7449, "step": 6181 }, { "epoch": 0.31770993935656283, "grad_norm": 1.1153590679168701, "learning_rate": 7.981983990353701e-06, "loss": 0.8249, "step": 6182 }, { "epoch": 0.3177613320999075, "grad_norm": 1.0066204071044922, "learning_rate": 7.98131590722556e-06, "loss": 0.7507, "step": 6183 }, { "epoch": 0.3178127248432521, "grad_norm": 1.1370506286621094, "learning_rate": 7.980647741497458e-06, "loss": 0.8231, "step": 6184 }, { "epoch": 0.3178641175865968, "grad_norm": 0.791109561920166, "learning_rate": 7.979979493187909e-06, "loss": 0.6683, "step": 6185 }, { "epoch": 0.3179155103299414, "grad_norm": 1.0686885118484497, "learning_rate": 7.979311162315425e-06, "loss": 0.764, "step": 6186 }, { "epoch": 0.31796690307328607, "grad_norm": 1.1198740005493164, "learning_rate": 7.978642748898524e-06, "loss": 0.7418, "step": 6187 }, { "epoch": 0.3180182958166307, "grad_norm": 1.1276917457580566, "learning_rate": 7.977974252955725e-06, "loss": 0.7943, "step": 6188 }, { "epoch": 0.3180696885599753, "grad_norm": 1.0539357662200928, "learning_rate": 7.977305674505547e-06, "loss": 0.7692, "step": 6189 }, { "epoch": 0.31812108130331995, "grad_norm": 1.0832836627960205, "learning_rate": 7.976637013566517e-06, "loss": 0.7834, "step": 6190 }, { "epoch": 0.3181724740466646, "grad_norm": 1.096444845199585, "learning_rate": 7.97596827015716e-06, "loss": 0.7756, "step": 6191 }, { "epoch": 0.31822386679000925, "grad_norm": 1.1156165599822998, "learning_rate": 7.975299444296001e-06, "loss": 0.8029, "step": 6192 }, { "epoch": 0.3182752595333539, "grad_norm": 1.0446009635925293, "learning_rate": 7.974630536001573e-06, "loss": 0.7969, "step": 6193 }, { "epoch": 0.31832665227669854, "grad_norm": 1.0631943941116333, "learning_rate": 7.97396154529241e-06, "loss": 0.7212, "step": 6194 }, { "epoch": 0.3183780450200432, "grad_norm": 1.1738412380218506, "learning_rate": 7.973292472187045e-06, "loss": 0.7881, "step": 6195 }, { "epoch": 0.31842943776338783, "grad_norm": 1.0756568908691406, "learning_rate": 7.972623316704015e-06, "loss": 0.7769, "step": 6196 }, { "epoch": 0.3184808305067324, "grad_norm": 1.0694456100463867, "learning_rate": 7.97195407886186e-06, "loss": 0.7027, "step": 6197 }, { "epoch": 0.3185322232500771, "grad_norm": 1.0254652500152588, "learning_rate": 7.971284758679122e-06, "loss": 0.6879, "step": 6198 }, { "epoch": 0.3185836159934217, "grad_norm": 1.163947343826294, "learning_rate": 7.970615356174348e-06, "loss": 0.7698, "step": 6199 }, { "epoch": 0.31863500873676637, "grad_norm": 1.053846001625061, "learning_rate": 7.969945871366079e-06, "loss": 0.8041, "step": 6200 }, { "epoch": 0.318686401480111, "grad_norm": 1.0574476718902588, "learning_rate": 7.969276304272865e-06, "loss": 0.7585, "step": 6201 }, { "epoch": 0.31873779422345566, "grad_norm": 0.7400321960449219, "learning_rate": 7.968606654913259e-06, "loss": 0.693, "step": 6202 }, { "epoch": 0.3187891869668003, "grad_norm": 1.1234692335128784, "learning_rate": 7.967936923305815e-06, "loss": 0.7862, "step": 6203 }, { "epoch": 0.3188405797101449, "grad_norm": 0.8033209443092346, "learning_rate": 7.967267109469085e-06, "loss": 0.7019, "step": 6204 }, { "epoch": 0.31889197245348955, "grad_norm": 0.7713354825973511, "learning_rate": 7.96659721342163e-06, "loss": 0.6811, "step": 6205 }, { "epoch": 0.3189433651968342, "grad_norm": 0.7624207735061646, "learning_rate": 7.965927235182008e-06, "loss": 0.6901, "step": 6206 }, { "epoch": 0.31899475794017884, "grad_norm": 0.7091054320335388, "learning_rate": 7.96525717476878e-06, "loss": 0.6948, "step": 6207 }, { "epoch": 0.3190461506835235, "grad_norm": 1.1390208005905151, "learning_rate": 7.964587032200514e-06, "loss": 0.7632, "step": 6208 }, { "epoch": 0.31909754342686814, "grad_norm": 1.0724012851715088, "learning_rate": 7.963916807495776e-06, "loss": 0.7864, "step": 6209 }, { "epoch": 0.3191489361702128, "grad_norm": 1.094946265220642, "learning_rate": 7.963246500673134e-06, "loss": 0.7352, "step": 6210 }, { "epoch": 0.31920032891355743, "grad_norm": 1.1584030389785767, "learning_rate": 7.96257611175116e-06, "loss": 0.7632, "step": 6211 }, { "epoch": 0.319251721656902, "grad_norm": 1.1413359642028809, "learning_rate": 7.961905640748428e-06, "loss": 0.8289, "step": 6212 }, { "epoch": 0.31930311440024667, "grad_norm": 0.7515755295753479, "learning_rate": 7.961235087683514e-06, "loss": 0.6572, "step": 6213 }, { "epoch": 0.3193545071435913, "grad_norm": 1.4042295217514038, "learning_rate": 7.960564452574997e-06, "loss": 0.7716, "step": 6214 }, { "epoch": 0.31940589988693596, "grad_norm": 0.7963113188743591, "learning_rate": 7.959893735441455e-06, "loss": 0.6629, "step": 6215 }, { "epoch": 0.3194572926302806, "grad_norm": 0.8841925859451294, "learning_rate": 7.959222936301472e-06, "loss": 0.7294, "step": 6216 }, { "epoch": 0.31950868537362526, "grad_norm": 1.0700139999389648, "learning_rate": 7.958552055173635e-06, "loss": 0.8318, "step": 6217 }, { "epoch": 0.3195600781169699, "grad_norm": 0.7045684456825256, "learning_rate": 7.957881092076529e-06, "loss": 0.7228, "step": 6218 }, { "epoch": 0.31961147086031455, "grad_norm": 1.0172303915023804, "learning_rate": 7.957210047028744e-06, "loss": 0.7973, "step": 6219 }, { "epoch": 0.31966286360365914, "grad_norm": 1.0464023351669312, "learning_rate": 7.956538920048873e-06, "loss": 0.7503, "step": 6220 }, { "epoch": 0.3197142563470038, "grad_norm": 1.0726381540298462, "learning_rate": 7.955867711155509e-06, "loss": 0.7176, "step": 6221 }, { "epoch": 0.31976564909034844, "grad_norm": 1.0692014694213867, "learning_rate": 7.95519642036725e-06, "loss": 0.791, "step": 6222 }, { "epoch": 0.3198170418336931, "grad_norm": 1.1003714799880981, "learning_rate": 7.954525047702691e-06, "loss": 0.7874, "step": 6223 }, { "epoch": 0.31986843457703773, "grad_norm": 1.0203440189361572, "learning_rate": 7.953853593180439e-06, "loss": 0.7148, "step": 6224 }, { "epoch": 0.3199198273203824, "grad_norm": 0.7499158978462219, "learning_rate": 7.953182056819092e-06, "loss": 0.6843, "step": 6225 }, { "epoch": 0.319971220063727, "grad_norm": 1.0867787599563599, "learning_rate": 7.952510438637255e-06, "loss": 0.7566, "step": 6226 }, { "epoch": 0.3200226128070716, "grad_norm": 1.1037946939468384, "learning_rate": 7.951838738653541e-06, "loss": 0.7288, "step": 6227 }, { "epoch": 0.32007400555041626, "grad_norm": 1.0909851789474487, "learning_rate": 7.951166956886556e-06, "loss": 0.804, "step": 6228 }, { "epoch": 0.3201253982937609, "grad_norm": 1.1042238473892212, "learning_rate": 7.950495093354914e-06, "loss": 0.8448, "step": 6229 }, { "epoch": 0.32017679103710556, "grad_norm": 1.1097403764724731, "learning_rate": 7.949823148077228e-06, "loss": 0.7715, "step": 6230 }, { "epoch": 0.3202281837804502, "grad_norm": 1.0580353736877441, "learning_rate": 7.949151121072116e-06, "loss": 0.7293, "step": 6231 }, { "epoch": 0.32027957652379485, "grad_norm": 0.7942004203796387, "learning_rate": 7.948479012358198e-06, "loss": 0.6698, "step": 6232 }, { "epoch": 0.3203309692671395, "grad_norm": 1.2243528366088867, "learning_rate": 7.947806821954094e-06, "loss": 0.7246, "step": 6233 }, { "epoch": 0.32038236201048415, "grad_norm": 1.0543429851531982, "learning_rate": 7.947134549878426e-06, "loss": 0.8022, "step": 6234 }, { "epoch": 0.32043375475382874, "grad_norm": 0.6779053807258606, "learning_rate": 7.946462196149824e-06, "loss": 0.6862, "step": 6235 }, { "epoch": 0.3204851474971734, "grad_norm": 0.8913925886154175, "learning_rate": 7.945789760786912e-06, "loss": 0.7275, "step": 6236 }, { "epoch": 0.32053654024051803, "grad_norm": 1.1080243587493896, "learning_rate": 7.945117243808323e-06, "loss": 0.7687, "step": 6237 }, { "epoch": 0.3205879329838627, "grad_norm": 1.0549356937408447, "learning_rate": 7.944444645232689e-06, "loss": 0.7302, "step": 6238 }, { "epoch": 0.3206393257272073, "grad_norm": 1.1131651401519775, "learning_rate": 7.943771965078646e-06, "loss": 0.7619, "step": 6239 }, { "epoch": 0.32069071847055197, "grad_norm": 1.1166050434112549, "learning_rate": 7.943099203364827e-06, "loss": 0.7842, "step": 6240 }, { "epoch": 0.3207421112138966, "grad_norm": 1.0342165231704712, "learning_rate": 7.942426360109879e-06, "loss": 0.7508, "step": 6241 }, { "epoch": 0.3207935039572412, "grad_norm": 1.0975227355957031, "learning_rate": 7.941753435332435e-06, "loss": 0.7799, "step": 6242 }, { "epoch": 0.32084489670058586, "grad_norm": 0.7095525860786438, "learning_rate": 7.941080429051147e-06, "loss": 0.6434, "step": 6243 }, { "epoch": 0.3208962894439305, "grad_norm": 0.8280468583106995, "learning_rate": 7.940407341284654e-06, "loss": 0.7301, "step": 6244 }, { "epoch": 0.32094768218727515, "grad_norm": 1.074492335319519, "learning_rate": 7.93973417205161e-06, "loss": 0.7429, "step": 6245 }, { "epoch": 0.3209990749306198, "grad_norm": 0.7331387400627136, "learning_rate": 7.939060921370663e-06, "loss": 0.6729, "step": 6246 }, { "epoch": 0.32105046767396445, "grad_norm": 1.237428903579712, "learning_rate": 7.938387589260465e-06, "loss": 0.8511, "step": 6247 }, { "epoch": 0.3211018604173091, "grad_norm": 1.053725004196167, "learning_rate": 7.937714175739677e-06, "loss": 0.7609, "step": 6248 }, { "epoch": 0.32115325316065374, "grad_norm": 1.0612396001815796, "learning_rate": 7.937040680826948e-06, "loss": 0.7878, "step": 6249 }, { "epoch": 0.32120464590399833, "grad_norm": 1.149339199066162, "learning_rate": 7.936367104540946e-06, "loss": 0.8547, "step": 6250 }, { "epoch": 0.321256038647343, "grad_norm": 1.0942442417144775, "learning_rate": 7.935693446900326e-06, "loss": 0.8101, "step": 6251 }, { "epoch": 0.3213074313906876, "grad_norm": 1.146078109741211, "learning_rate": 7.935019707923757e-06, "loss": 0.8229, "step": 6252 }, { "epoch": 0.3213588241340323, "grad_norm": 0.7818619608879089, "learning_rate": 7.934345887629904e-06, "loss": 0.6865, "step": 6253 }, { "epoch": 0.3214102168773769, "grad_norm": 1.0790176391601562, "learning_rate": 7.933671986037436e-06, "loss": 0.7889, "step": 6254 }, { "epoch": 0.32146160962072157, "grad_norm": 1.0117785930633545, "learning_rate": 7.932998003165025e-06, "loss": 0.7698, "step": 6255 }, { "epoch": 0.3215130023640662, "grad_norm": 1.024689793586731, "learning_rate": 7.93232393903134e-06, "loss": 0.7457, "step": 6256 }, { "epoch": 0.32156439510741086, "grad_norm": 1.006550908088684, "learning_rate": 7.931649793655062e-06, "loss": 0.6864, "step": 6257 }, { "epoch": 0.32161578785075545, "grad_norm": 1.0390712022781372, "learning_rate": 7.930975567054866e-06, "loss": 0.72, "step": 6258 }, { "epoch": 0.3216671805941001, "grad_norm": 1.0208220481872559, "learning_rate": 7.930301259249431e-06, "loss": 0.736, "step": 6259 }, { "epoch": 0.32171857333744475, "grad_norm": 0.8802581429481506, "learning_rate": 7.929626870257443e-06, "loss": 0.6492, "step": 6260 }, { "epoch": 0.3217699660807894, "grad_norm": 1.1177890300750732, "learning_rate": 7.928952400097583e-06, "loss": 0.7245, "step": 6261 }, { "epoch": 0.32182135882413404, "grad_norm": 1.0833348035812378, "learning_rate": 7.928277848788539e-06, "loss": 0.7925, "step": 6262 }, { "epoch": 0.3218727515674787, "grad_norm": 1.086914300918579, "learning_rate": 7.927603216349e-06, "loss": 0.8045, "step": 6263 }, { "epoch": 0.32192414431082333, "grad_norm": 1.0966742038726807, "learning_rate": 7.926928502797657e-06, "loss": 0.7519, "step": 6264 }, { "epoch": 0.3219755370541679, "grad_norm": 1.0158556699752808, "learning_rate": 7.926253708153205e-06, "loss": 0.7391, "step": 6265 }, { "epoch": 0.3220269297975126, "grad_norm": 1.2897666692733765, "learning_rate": 7.925578832434337e-06, "loss": 0.771, "step": 6266 }, { "epoch": 0.3220783225408572, "grad_norm": 1.0462756156921387, "learning_rate": 7.924903875659755e-06, "loss": 0.7503, "step": 6267 }, { "epoch": 0.32212971528420187, "grad_norm": 1.1187732219696045, "learning_rate": 7.924228837848155e-06, "loss": 0.7585, "step": 6268 }, { "epoch": 0.3221811080275465, "grad_norm": 1.010280966758728, "learning_rate": 7.923553719018244e-06, "loss": 0.7341, "step": 6269 }, { "epoch": 0.32223250077089116, "grad_norm": 1.013115406036377, "learning_rate": 7.922878519188723e-06, "loss": 0.6999, "step": 6270 }, { "epoch": 0.3222838935142358, "grad_norm": 1.1518288850784302, "learning_rate": 7.922203238378299e-06, "loss": 0.761, "step": 6271 }, { "epoch": 0.32233528625758046, "grad_norm": 1.0540426969528198, "learning_rate": 7.921527876605684e-06, "loss": 0.7776, "step": 6272 }, { "epoch": 0.32238667900092505, "grad_norm": 0.8120995163917542, "learning_rate": 7.920852433889587e-06, "loss": 0.6461, "step": 6273 }, { "epoch": 0.3224380717442697, "grad_norm": 0.8062677383422852, "learning_rate": 7.920176910248722e-06, "loss": 0.6663, "step": 6274 }, { "epoch": 0.32248946448761434, "grad_norm": 1.1233443021774292, "learning_rate": 7.919501305701808e-06, "loss": 0.7915, "step": 6275 }, { "epoch": 0.322540857230959, "grad_norm": 1.1094499826431274, "learning_rate": 7.91882562026756e-06, "loss": 0.8091, "step": 6276 }, { "epoch": 0.32259224997430364, "grad_norm": 1.0231685638427734, "learning_rate": 7.918149853964698e-06, "loss": 0.8293, "step": 6277 }, { "epoch": 0.3226436427176483, "grad_norm": 1.0143455266952515, "learning_rate": 7.917474006811948e-06, "loss": 0.7527, "step": 6278 }, { "epoch": 0.32269503546099293, "grad_norm": 1.097053050994873, "learning_rate": 7.916798078828032e-06, "loss": 0.7312, "step": 6279 }, { "epoch": 0.3227464282043375, "grad_norm": 1.010754942893982, "learning_rate": 7.916122070031678e-06, "loss": 0.7178, "step": 6280 }, { "epoch": 0.32279782094768217, "grad_norm": 1.1419605016708374, "learning_rate": 7.915445980441616e-06, "loss": 0.7353, "step": 6281 }, { "epoch": 0.3228492136910268, "grad_norm": 1.4826608896255493, "learning_rate": 7.914769810076577e-06, "loss": 0.7681, "step": 6282 }, { "epoch": 0.32290060643437146, "grad_norm": 1.065619707107544, "learning_rate": 7.914093558955296e-06, "loss": 0.7373, "step": 6283 }, { "epoch": 0.3229519991777161, "grad_norm": 0.7846266627311707, "learning_rate": 7.913417227096505e-06, "loss": 0.6418, "step": 6284 }, { "epoch": 0.32300339192106076, "grad_norm": 1.079285740852356, "learning_rate": 7.91274081451895e-06, "loss": 0.797, "step": 6285 }, { "epoch": 0.3230547846644054, "grad_norm": 0.7065392136573792, "learning_rate": 7.912064321241363e-06, "loss": 0.6448, "step": 6286 }, { "epoch": 0.32310617740775005, "grad_norm": 1.0687423944473267, "learning_rate": 7.911387747282493e-06, "loss": 0.7902, "step": 6287 }, { "epoch": 0.32315757015109464, "grad_norm": 1.0653470754623413, "learning_rate": 7.910711092661085e-06, "loss": 0.7601, "step": 6288 }, { "epoch": 0.3232089628944393, "grad_norm": 0.7815491557121277, "learning_rate": 7.910034357395883e-06, "loss": 0.6921, "step": 6289 }, { "epoch": 0.32326035563778394, "grad_norm": 1.1885305643081665, "learning_rate": 7.909357541505637e-06, "loss": 0.775, "step": 6290 }, { "epoch": 0.3233117483811286, "grad_norm": 1.0751159191131592, "learning_rate": 7.908680645009099e-06, "loss": 0.7729, "step": 6291 }, { "epoch": 0.32336314112447323, "grad_norm": 1.161222219467163, "learning_rate": 7.908003667925026e-06, "loss": 0.7533, "step": 6292 }, { "epoch": 0.3234145338678179, "grad_norm": 1.1146621704101562, "learning_rate": 7.90732661027217e-06, "loss": 0.7693, "step": 6293 }, { "epoch": 0.3234659266111625, "grad_norm": 1.1772619485855103, "learning_rate": 7.906649472069294e-06, "loss": 0.8412, "step": 6294 }, { "epoch": 0.32351731935450717, "grad_norm": 1.008787989616394, "learning_rate": 7.905972253335154e-06, "loss": 0.7347, "step": 6295 }, { "epoch": 0.32356871209785176, "grad_norm": 1.1262861490249634, "learning_rate": 7.905294954088515e-06, "loss": 0.7875, "step": 6296 }, { "epoch": 0.3236201048411964, "grad_norm": 1.0279282331466675, "learning_rate": 7.904617574348144e-06, "loss": 0.7042, "step": 6297 }, { "epoch": 0.32367149758454106, "grad_norm": 1.1229885816574097, "learning_rate": 7.903940114132806e-06, "loss": 0.7736, "step": 6298 }, { "epoch": 0.3237228903278857, "grad_norm": 1.104676365852356, "learning_rate": 7.90326257346127e-06, "loss": 0.7734, "step": 6299 }, { "epoch": 0.32377428307123035, "grad_norm": 1.0531375408172607, "learning_rate": 7.90258495235231e-06, "loss": 0.7442, "step": 6300 }, { "epoch": 0.323825675814575, "grad_norm": 1.1484031677246094, "learning_rate": 7.901907250824701e-06, "loss": 0.7827, "step": 6301 }, { "epoch": 0.32387706855791965, "grad_norm": 0.8167237639427185, "learning_rate": 7.901229468897215e-06, "loss": 0.6459, "step": 6302 }, { "epoch": 0.32392846130126424, "grad_norm": 1.0975357294082642, "learning_rate": 7.900551606588633e-06, "loss": 0.7271, "step": 6303 }, { "epoch": 0.3239798540446089, "grad_norm": 1.0882256031036377, "learning_rate": 7.89987366391774e-06, "loss": 0.7253, "step": 6304 }, { "epoch": 0.32403124678795353, "grad_norm": 1.136600136756897, "learning_rate": 7.89919564090331e-06, "loss": 0.7981, "step": 6305 }, { "epoch": 0.3240826395312982, "grad_norm": 1.0951790809631348, "learning_rate": 7.898517537564134e-06, "loss": 0.7874, "step": 6306 }, { "epoch": 0.3241340322746428, "grad_norm": 1.1364099979400635, "learning_rate": 7.897839353919e-06, "loss": 0.7662, "step": 6307 }, { "epoch": 0.32418542501798747, "grad_norm": 0.8488683104515076, "learning_rate": 7.897161089986696e-06, "loss": 0.6872, "step": 6308 }, { "epoch": 0.3242368177613321, "grad_norm": 0.796160876750946, "learning_rate": 7.896482745786014e-06, "loss": 0.7036, "step": 6309 }, { "epoch": 0.32428821050467677, "grad_norm": 1.0552352666854858, "learning_rate": 7.895804321335748e-06, "loss": 0.7113, "step": 6310 }, { "epoch": 0.32433960324802136, "grad_norm": 0.772499144077301, "learning_rate": 7.895125816654693e-06, "loss": 0.6841, "step": 6311 }, { "epoch": 0.324390995991366, "grad_norm": 1.1797508001327515, "learning_rate": 7.89444723176165e-06, "loss": 0.7356, "step": 6312 }, { "epoch": 0.32444238873471065, "grad_norm": 1.0207335948944092, "learning_rate": 7.89376856667542e-06, "loss": 0.7452, "step": 6313 }, { "epoch": 0.3244937814780553, "grad_norm": 1.0907418727874756, "learning_rate": 7.893089821414804e-06, "loss": 0.7626, "step": 6314 }, { "epoch": 0.32454517422139995, "grad_norm": 0.9334765672683716, "learning_rate": 7.892410995998609e-06, "loss": 0.7115, "step": 6315 }, { "epoch": 0.3245965669647446, "grad_norm": 6.051718235015869, "learning_rate": 7.891732090445641e-06, "loss": 0.9259, "step": 6316 }, { "epoch": 0.32464795970808924, "grad_norm": 1.096279263496399, "learning_rate": 7.891053104774709e-06, "loss": 0.729, "step": 6317 }, { "epoch": 0.32469935245143383, "grad_norm": 0.7192781567573547, "learning_rate": 7.890374039004628e-06, "loss": 0.6712, "step": 6318 }, { "epoch": 0.3247507451947785, "grad_norm": 0.8034277558326721, "learning_rate": 7.88969489315421e-06, "loss": 0.679, "step": 6319 }, { "epoch": 0.3248021379381231, "grad_norm": 1.3794063329696655, "learning_rate": 7.88901566724227e-06, "loss": 0.7519, "step": 6320 }, { "epoch": 0.3248535306814678, "grad_norm": 1.125765085220337, "learning_rate": 7.88833636128763e-06, "loss": 0.7518, "step": 6321 }, { "epoch": 0.3249049234248124, "grad_norm": 1.070083737373352, "learning_rate": 7.887656975309106e-06, "loss": 0.7688, "step": 6322 }, { "epoch": 0.32495631616815707, "grad_norm": 1.0824249982833862, "learning_rate": 7.886977509325527e-06, "loss": 0.7803, "step": 6323 }, { "epoch": 0.3250077089115017, "grad_norm": 0.8180423378944397, "learning_rate": 7.886297963355714e-06, "loss": 0.6868, "step": 6324 }, { "epoch": 0.32505910165484636, "grad_norm": 1.103896141052246, "learning_rate": 7.885618337418496e-06, "loss": 0.6936, "step": 6325 }, { "epoch": 0.32511049439819095, "grad_norm": 1.1351890563964844, "learning_rate": 7.884938631532702e-06, "loss": 0.7511, "step": 6326 }, { "epoch": 0.3251618871415356, "grad_norm": 0.8250926733016968, "learning_rate": 7.884258845717163e-06, "loss": 0.7118, "step": 6327 }, { "epoch": 0.32521327988488025, "grad_norm": 0.7486550807952881, "learning_rate": 7.883578979990715e-06, "loss": 0.648, "step": 6328 }, { "epoch": 0.3252646726282249, "grad_norm": 1.0689102411270142, "learning_rate": 7.882899034372193e-06, "loss": 0.7299, "step": 6329 }, { "epoch": 0.32531606537156954, "grad_norm": 1.0918580293655396, "learning_rate": 7.882219008880435e-06, "loss": 0.7562, "step": 6330 }, { "epoch": 0.3253674581149142, "grad_norm": 0.7655850052833557, "learning_rate": 7.881538903534283e-06, "loss": 0.7082, "step": 6331 }, { "epoch": 0.32541885085825883, "grad_norm": 1.088153600692749, "learning_rate": 7.880858718352579e-06, "loss": 0.8159, "step": 6332 }, { "epoch": 0.3254702436016034, "grad_norm": 0.8767863512039185, "learning_rate": 7.88017845335417e-06, "loss": 0.6692, "step": 6333 }, { "epoch": 0.3255216363449481, "grad_norm": 1.0915653705596924, "learning_rate": 7.879498108557902e-06, "loss": 0.7674, "step": 6334 }, { "epoch": 0.3255730290882927, "grad_norm": 1.0005794763565063, "learning_rate": 7.878817683982623e-06, "loss": 0.763, "step": 6335 }, { "epoch": 0.32562442183163737, "grad_norm": 1.0348544120788574, "learning_rate": 7.878137179647187e-06, "loss": 0.748, "step": 6336 }, { "epoch": 0.325675814574982, "grad_norm": 1.0506395101547241, "learning_rate": 7.877456595570446e-06, "loss": 0.7537, "step": 6337 }, { "epoch": 0.32572720731832666, "grad_norm": 1.082154393196106, "learning_rate": 7.876775931771257e-06, "loss": 0.762, "step": 6338 }, { "epoch": 0.3257786000616713, "grad_norm": 1.1421531438827515, "learning_rate": 7.87609518826848e-06, "loss": 0.8107, "step": 6339 }, { "epoch": 0.32582999280501596, "grad_norm": 1.1036674976348877, "learning_rate": 7.875414365080974e-06, "loss": 0.7507, "step": 6340 }, { "epoch": 0.32588138554836055, "grad_norm": 1.0783600807189941, "learning_rate": 7.874733462227602e-06, "loss": 0.7466, "step": 6341 }, { "epoch": 0.3259327782917052, "grad_norm": 1.0899938344955444, "learning_rate": 7.874052479727228e-06, "loss": 0.7836, "step": 6342 }, { "epoch": 0.32598417103504984, "grad_norm": 1.085042119026184, "learning_rate": 7.87337141759872e-06, "loss": 0.7645, "step": 6343 }, { "epoch": 0.3260355637783945, "grad_norm": 1.0419930219650269, "learning_rate": 7.872690275860951e-06, "loss": 0.7524, "step": 6344 }, { "epoch": 0.32608695652173914, "grad_norm": 0.9993297457695007, "learning_rate": 7.872009054532787e-06, "loss": 0.756, "step": 6345 }, { "epoch": 0.3261383492650838, "grad_norm": 0.9247534275054932, "learning_rate": 7.871327753633105e-06, "loss": 0.7107, "step": 6346 }, { "epoch": 0.32618974200842843, "grad_norm": 0.8019363880157471, "learning_rate": 7.87064637318078e-06, "loss": 0.6757, "step": 6347 }, { "epoch": 0.3262411347517731, "grad_norm": 0.8444534540176392, "learning_rate": 7.86996491319469e-06, "loss": 0.6968, "step": 6348 }, { "epoch": 0.32629252749511767, "grad_norm": 1.0891953706741333, "learning_rate": 7.869283373693717e-06, "loss": 0.7516, "step": 6349 }, { "epoch": 0.3263439202384623, "grad_norm": 0.7442377209663391, "learning_rate": 7.868601754696741e-06, "loss": 0.6909, "step": 6350 }, { "epoch": 0.32639531298180696, "grad_norm": 1.0952770709991455, "learning_rate": 7.867920056222648e-06, "loss": 0.7663, "step": 6351 }, { "epoch": 0.3264467057251516, "grad_norm": 1.1148467063903809, "learning_rate": 7.867238278290327e-06, "loss": 0.7757, "step": 6352 }, { "epoch": 0.32649809846849626, "grad_norm": 1.0050616264343262, "learning_rate": 7.866556420918666e-06, "loss": 0.7134, "step": 6353 }, { "epoch": 0.3265494912118409, "grad_norm": 1.083299160003662, "learning_rate": 7.865874484126554e-06, "loss": 0.7625, "step": 6354 }, { "epoch": 0.32660088395518555, "grad_norm": 1.0797746181488037, "learning_rate": 7.86519246793289e-06, "loss": 0.7415, "step": 6355 }, { "epoch": 0.32665227669853014, "grad_norm": 0.979505717754364, "learning_rate": 7.864510372356564e-06, "loss": 0.7267, "step": 6356 }, { "epoch": 0.3267036694418748, "grad_norm": 0.776856541633606, "learning_rate": 7.863828197416479e-06, "loss": 0.6932, "step": 6357 }, { "epoch": 0.32675506218521944, "grad_norm": 1.1441924571990967, "learning_rate": 7.863145943131531e-06, "loss": 0.8434, "step": 6358 }, { "epoch": 0.3268064549285641, "grad_norm": 1.0159094333648682, "learning_rate": 7.862463609520625e-06, "loss": 0.746, "step": 6359 }, { "epoch": 0.32685784767190873, "grad_norm": 1.0966925621032715, "learning_rate": 7.861781196602665e-06, "loss": 0.7595, "step": 6360 }, { "epoch": 0.3269092404152534, "grad_norm": 1.0946656465530396, "learning_rate": 7.861098704396557e-06, "loss": 0.7521, "step": 6361 }, { "epoch": 0.326960633158598, "grad_norm": 1.1001232862472534, "learning_rate": 7.860416132921213e-06, "loss": 0.7894, "step": 6362 }, { "epoch": 0.32701202590194267, "grad_norm": 1.1131579875946045, "learning_rate": 7.85973348219554e-06, "loss": 0.8359, "step": 6363 }, { "epoch": 0.32706341864528726, "grad_norm": 0.9947192668914795, "learning_rate": 7.859050752238455e-06, "loss": 0.7125, "step": 6364 }, { "epoch": 0.3271148113886319, "grad_norm": 1.0519870519638062, "learning_rate": 7.858367943068873e-06, "loss": 0.7246, "step": 6365 }, { "epoch": 0.32716620413197656, "grad_norm": 1.0918675661087036, "learning_rate": 7.857685054705711e-06, "loss": 0.7328, "step": 6366 }, { "epoch": 0.3272175968753212, "grad_norm": 1.1316570043563843, "learning_rate": 7.857002087167886e-06, "loss": 0.7632, "step": 6367 }, { "epoch": 0.32726898961866585, "grad_norm": 1.135451316833496, "learning_rate": 7.856319040474326e-06, "loss": 0.7821, "step": 6368 }, { "epoch": 0.3273203823620105, "grad_norm": 1.0485374927520752, "learning_rate": 7.855635914643953e-06, "loss": 0.7228, "step": 6369 }, { "epoch": 0.32737177510535515, "grad_norm": 1.0320537090301514, "learning_rate": 7.854952709695692e-06, "loss": 0.8066, "step": 6370 }, { "epoch": 0.32742316784869974, "grad_norm": 1.0669602155685425, "learning_rate": 7.854269425648475e-06, "loss": 0.7378, "step": 6371 }, { "epoch": 0.3274745605920444, "grad_norm": 1.1363108158111572, "learning_rate": 7.853586062521229e-06, "loss": 0.7668, "step": 6372 }, { "epoch": 0.32752595333538903, "grad_norm": 1.0675972700119019, "learning_rate": 7.85290262033289e-06, "loss": 0.7882, "step": 6373 }, { "epoch": 0.3275773460787337, "grad_norm": 1.0326762199401855, "learning_rate": 7.852219099102392e-06, "loss": 0.7373, "step": 6374 }, { "epoch": 0.3276287388220783, "grad_norm": 1.1321542263031006, "learning_rate": 7.851535498848674e-06, "loss": 0.6985, "step": 6375 }, { "epoch": 0.32768013156542297, "grad_norm": 1.1553641557693481, "learning_rate": 7.850851819590675e-06, "loss": 0.8503, "step": 6376 }, { "epoch": 0.3277315243087676, "grad_norm": 1.089906096458435, "learning_rate": 7.850168061347336e-06, "loss": 0.7126, "step": 6377 }, { "epoch": 0.32778291705211227, "grad_norm": 1.146667242050171, "learning_rate": 7.849484224137603e-06, "loss": 0.8311, "step": 6378 }, { "epoch": 0.32783430979545686, "grad_norm": 1.105252981185913, "learning_rate": 7.84880030798042e-06, "loss": 0.6919, "step": 6379 }, { "epoch": 0.3278857025388015, "grad_norm": 1.1272486448287964, "learning_rate": 7.848116312894737e-06, "loss": 0.7897, "step": 6380 }, { "epoch": 0.32793709528214615, "grad_norm": 1.0277893543243408, "learning_rate": 7.847432238899507e-06, "loss": 0.7866, "step": 6381 }, { "epoch": 0.3279884880254908, "grad_norm": 1.1312599182128906, "learning_rate": 7.846748086013679e-06, "loss": 0.8072, "step": 6382 }, { "epoch": 0.32803988076883545, "grad_norm": 1.1221188306808472, "learning_rate": 7.846063854256207e-06, "loss": 0.7906, "step": 6383 }, { "epoch": 0.3280912735121801, "grad_norm": 1.190596342086792, "learning_rate": 7.845379543646053e-06, "loss": 0.7597, "step": 6384 }, { "epoch": 0.32814266625552474, "grad_norm": 1.1072745323181152, "learning_rate": 7.844695154202173e-06, "loss": 0.823, "step": 6385 }, { "epoch": 0.3281940589988694, "grad_norm": 1.0647591352462769, "learning_rate": 7.84401068594353e-06, "loss": 0.7572, "step": 6386 }, { "epoch": 0.328245451742214, "grad_norm": 1.1113308668136597, "learning_rate": 7.843326138889088e-06, "loss": 0.7667, "step": 6387 }, { "epoch": 0.3282968444855586, "grad_norm": 0.7089352607727051, "learning_rate": 7.842641513057811e-06, "loss": 0.6799, "step": 6388 }, { "epoch": 0.3283482372289033, "grad_norm": 1.0567331314086914, "learning_rate": 7.84195680846867e-06, "loss": 0.7672, "step": 6389 }, { "epoch": 0.3283996299722479, "grad_norm": 1.1223642826080322, "learning_rate": 7.841272025140633e-06, "loss": 0.7921, "step": 6390 }, { "epoch": 0.32845102271559257, "grad_norm": 1.1001906394958496, "learning_rate": 7.840587163092674e-06, "loss": 0.7513, "step": 6391 }, { "epoch": 0.3285024154589372, "grad_norm": 1.064583420753479, "learning_rate": 7.839902222343767e-06, "loss": 0.7636, "step": 6392 }, { "epoch": 0.32855380820228186, "grad_norm": 0.695277214050293, "learning_rate": 7.83921720291289e-06, "loss": 0.6708, "step": 6393 }, { "epoch": 0.32860520094562645, "grad_norm": 1.039265513420105, "learning_rate": 7.838532104819018e-06, "loss": 0.7441, "step": 6394 }, { "epoch": 0.3286565936889711, "grad_norm": 0.7285810112953186, "learning_rate": 7.837846928081138e-06, "loss": 0.6646, "step": 6395 }, { "epoch": 0.32870798643231575, "grad_norm": 1.0592541694641113, "learning_rate": 7.83716167271823e-06, "loss": 0.7316, "step": 6396 }, { "epoch": 0.3287593791756604, "grad_norm": 1.15116548538208, "learning_rate": 7.836476338749282e-06, "loss": 0.7952, "step": 6397 }, { "epoch": 0.32881077191900504, "grad_norm": 1.1327732801437378, "learning_rate": 7.835790926193277e-06, "loss": 0.8228, "step": 6398 }, { "epoch": 0.3288621646623497, "grad_norm": 1.070181965827942, "learning_rate": 7.83510543506921e-06, "loss": 0.7814, "step": 6399 }, { "epoch": 0.32891355740569433, "grad_norm": 0.8484385013580322, "learning_rate": 7.83441986539607e-06, "loss": 0.7367, "step": 6400 }, { "epoch": 0.328964950149039, "grad_norm": 1.1784979104995728, "learning_rate": 7.833734217192852e-06, "loss": 0.702, "step": 6401 }, { "epoch": 0.3290163428923836, "grad_norm": 0.7750094532966614, "learning_rate": 7.833048490478556e-06, "loss": 0.6754, "step": 6402 }, { "epoch": 0.3290677356357282, "grad_norm": 0.7292079925537109, "learning_rate": 7.832362685272176e-06, "loss": 0.6794, "step": 6403 }, { "epoch": 0.32911912837907287, "grad_norm": 1.128514289855957, "learning_rate": 7.831676801592714e-06, "loss": 0.7626, "step": 6404 }, { "epoch": 0.3291705211224175, "grad_norm": 1.0972299575805664, "learning_rate": 7.830990839459174e-06, "loss": 0.7216, "step": 6405 }, { "epoch": 0.32922191386576216, "grad_norm": 0.9930311441421509, "learning_rate": 7.83030479889056e-06, "loss": 0.7308, "step": 6406 }, { "epoch": 0.3292733066091068, "grad_norm": 1.0696057081222534, "learning_rate": 7.829618679905879e-06, "loss": 0.7831, "step": 6407 }, { "epoch": 0.32932469935245146, "grad_norm": 1.1358466148376465, "learning_rate": 7.828932482524143e-06, "loss": 0.739, "step": 6408 }, { "epoch": 0.32937609209579605, "grad_norm": 1.0338650941848755, "learning_rate": 7.828246206764363e-06, "loss": 0.7882, "step": 6409 }, { "epoch": 0.3294274848391407, "grad_norm": 1.081601619720459, "learning_rate": 7.827559852645549e-06, "loss": 0.7375, "step": 6410 }, { "epoch": 0.32947887758248534, "grad_norm": 1.0984724760055542, "learning_rate": 7.826873420186725e-06, "loss": 0.7758, "step": 6411 }, { "epoch": 0.32953027032583, "grad_norm": 1.1807386875152588, "learning_rate": 7.826186909406901e-06, "loss": 0.7888, "step": 6412 }, { "epoch": 0.32958166306917464, "grad_norm": 1.1002141237258911, "learning_rate": 7.8255003203251e-06, "loss": 0.781, "step": 6413 }, { "epoch": 0.3296330558125193, "grad_norm": 1.0721930265426636, "learning_rate": 7.824813652960347e-06, "loss": 0.799, "step": 6414 }, { "epoch": 0.32968444855586393, "grad_norm": 1.0996019840240479, "learning_rate": 7.824126907331666e-06, "loss": 0.7208, "step": 6415 }, { "epoch": 0.3297358412992086, "grad_norm": 1.1063165664672852, "learning_rate": 7.82344008345808e-06, "loss": 0.7584, "step": 6416 }, { "epoch": 0.32978723404255317, "grad_norm": 1.0351561307907104, "learning_rate": 7.822753181358623e-06, "loss": 0.7222, "step": 6417 }, { "epoch": 0.3298386267858978, "grad_norm": 1.056374430656433, "learning_rate": 7.822066201052324e-06, "loss": 0.7485, "step": 6418 }, { "epoch": 0.32989001952924246, "grad_norm": 1.028637409210205, "learning_rate": 7.821379142558217e-06, "loss": 0.7249, "step": 6419 }, { "epoch": 0.3299414122725871, "grad_norm": 1.0313010215759277, "learning_rate": 7.820692005895335e-06, "loss": 0.7259, "step": 6420 }, { "epoch": 0.32999280501593176, "grad_norm": 1.0892776250839233, "learning_rate": 7.82000479108272e-06, "loss": 0.7456, "step": 6421 }, { "epoch": 0.3300441977592764, "grad_norm": 1.0584865808486938, "learning_rate": 7.819317498139407e-06, "loss": 0.801, "step": 6422 }, { "epoch": 0.33009559050262105, "grad_norm": 0.7319139838218689, "learning_rate": 7.818630127084443e-06, "loss": 0.7185, "step": 6423 }, { "epoch": 0.3301469832459657, "grad_norm": 1.071027398109436, "learning_rate": 7.81794267793687e-06, "loss": 0.788, "step": 6424 }, { "epoch": 0.3301983759893103, "grad_norm": 1.078505516052246, "learning_rate": 7.817255150715734e-06, "loss": 0.7896, "step": 6425 }, { "epoch": 0.33024976873265494, "grad_norm": 0.7777031064033508, "learning_rate": 7.816567545440084e-06, "loss": 0.6669, "step": 6426 }, { "epoch": 0.3303011614759996, "grad_norm": 0.7845080494880676, "learning_rate": 7.815879862128972e-06, "loss": 0.6928, "step": 6427 }, { "epoch": 0.33035255421934423, "grad_norm": 1.0688804388046265, "learning_rate": 7.815192100801446e-06, "loss": 0.744, "step": 6428 }, { "epoch": 0.3304039469626889, "grad_norm": 1.1140738725662231, "learning_rate": 7.814504261476567e-06, "loss": 0.7594, "step": 6429 }, { "epoch": 0.3304553397060335, "grad_norm": 1.0074553489685059, "learning_rate": 7.813816344173388e-06, "loss": 0.7566, "step": 6430 }, { "epoch": 0.33050673244937817, "grad_norm": 0.8538505434989929, "learning_rate": 7.813128348910972e-06, "loss": 0.718, "step": 6431 }, { "epoch": 0.33055812519272276, "grad_norm": 1.0806833505630493, "learning_rate": 7.812440275708377e-06, "loss": 0.7082, "step": 6432 }, { "epoch": 0.3306095179360674, "grad_norm": 1.1388258934020996, "learning_rate": 7.81175212458467e-06, "loss": 0.7251, "step": 6433 }, { "epoch": 0.33066091067941206, "grad_norm": 1.0561637878417969, "learning_rate": 7.811063895558913e-06, "loss": 0.7394, "step": 6434 }, { "epoch": 0.3307123034227567, "grad_norm": 1.701047658920288, "learning_rate": 7.810375588650177e-06, "loss": 0.7975, "step": 6435 }, { "epoch": 0.33076369616610135, "grad_norm": 1.039394497871399, "learning_rate": 7.809687203877532e-06, "loss": 0.6964, "step": 6436 }, { "epoch": 0.330815088909446, "grad_norm": 0.9989181160926819, "learning_rate": 7.808998741260047e-06, "loss": 0.7726, "step": 6437 }, { "epoch": 0.33086648165279064, "grad_norm": 1.0428011417388916, "learning_rate": 7.8083102008168e-06, "loss": 0.7043, "step": 6438 }, { "epoch": 0.3309178743961353, "grad_norm": 1.0471765995025635, "learning_rate": 7.80762158256687e-06, "loss": 0.7559, "step": 6439 }, { "epoch": 0.3309692671394799, "grad_norm": 0.8309373259544373, "learning_rate": 7.806932886529328e-06, "loss": 0.6748, "step": 6440 }, { "epoch": 0.33102065988282453, "grad_norm": 1.0479280948638916, "learning_rate": 7.80624411272326e-06, "loss": 0.798, "step": 6441 }, { "epoch": 0.3310720526261692, "grad_norm": 1.1494965553283691, "learning_rate": 7.80555526116775e-06, "loss": 0.7014, "step": 6442 }, { "epoch": 0.3311234453695138, "grad_norm": 1.1162176132202148, "learning_rate": 7.80486633188188e-06, "loss": 0.8006, "step": 6443 }, { "epoch": 0.33117483811285847, "grad_norm": 1.1520477533340454, "learning_rate": 7.80417732488474e-06, "loss": 0.7906, "step": 6444 }, { "epoch": 0.3312262308562031, "grad_norm": 0.8363535404205322, "learning_rate": 7.803488240195418e-06, "loss": 0.6661, "step": 6445 }, { "epoch": 0.33127762359954777, "grad_norm": 1.0864309072494507, "learning_rate": 7.802799077833005e-06, "loss": 0.7539, "step": 6446 }, { "epoch": 0.33132901634289236, "grad_norm": 1.0707521438598633, "learning_rate": 7.802109837816598e-06, "loss": 0.7461, "step": 6447 }, { "epoch": 0.331380409086237, "grad_norm": 1.1968246698379517, "learning_rate": 7.80142052016529e-06, "loss": 0.7807, "step": 6448 }, { "epoch": 0.33143180182958165, "grad_norm": 1.0774612426757812, "learning_rate": 7.80073112489818e-06, "loss": 0.7502, "step": 6449 }, { "epoch": 0.3314831945729263, "grad_norm": 1.079773187637329, "learning_rate": 7.800041652034369e-06, "loss": 0.7572, "step": 6450 }, { "epoch": 0.33153458731627095, "grad_norm": 1.1255322694778442, "learning_rate": 7.799352101592958e-06, "loss": 0.812, "step": 6451 }, { "epoch": 0.3315859800596156, "grad_norm": 1.0406098365783691, "learning_rate": 7.798662473593054e-06, "loss": 0.738, "step": 6452 }, { "epoch": 0.33163737280296024, "grad_norm": 0.8371950387954712, "learning_rate": 7.797972768053761e-06, "loss": 0.6757, "step": 6453 }, { "epoch": 0.3316887655463049, "grad_norm": 1.1555914878845215, "learning_rate": 7.79728298499419e-06, "loss": 0.7337, "step": 6454 }, { "epoch": 0.3317401582896495, "grad_norm": 1.041063904762268, "learning_rate": 7.796593124433451e-06, "loss": 0.8317, "step": 6455 }, { "epoch": 0.3317915510329941, "grad_norm": 0.7440322637557983, "learning_rate": 7.795903186390656e-06, "loss": 0.7236, "step": 6456 }, { "epoch": 0.3318429437763388, "grad_norm": 1.1185075044631958, "learning_rate": 7.795213170884923e-06, "loss": 0.7827, "step": 6457 }, { "epoch": 0.3318943365196834, "grad_norm": 1.0305125713348389, "learning_rate": 7.79452307793537e-06, "loss": 0.7497, "step": 6458 }, { "epoch": 0.33194572926302807, "grad_norm": 1.0552159547805786, "learning_rate": 7.793832907561113e-06, "loss": 0.7932, "step": 6459 }, { "epoch": 0.3319971220063727, "grad_norm": 0.8533280491828918, "learning_rate": 7.793142659781274e-06, "loss": 0.7078, "step": 6460 }, { "epoch": 0.33204851474971736, "grad_norm": 0.72590571641922, "learning_rate": 7.792452334614981e-06, "loss": 0.7149, "step": 6461 }, { "epoch": 0.33209990749306195, "grad_norm": 1.1133724451065063, "learning_rate": 7.791761932081356e-06, "loss": 0.7286, "step": 6462 }, { "epoch": 0.3321513002364066, "grad_norm": 1.209977388381958, "learning_rate": 7.79107145219953e-06, "loss": 0.8303, "step": 6463 }, { "epoch": 0.33220269297975125, "grad_norm": 0.7041590213775635, "learning_rate": 7.790380894988632e-06, "loss": 0.698, "step": 6464 }, { "epoch": 0.3322540857230959, "grad_norm": 1.0389889478683472, "learning_rate": 7.789690260467793e-06, "loss": 0.6747, "step": 6465 }, { "epoch": 0.33230547846644054, "grad_norm": 1.145019769668579, "learning_rate": 7.78899954865615e-06, "loss": 0.734, "step": 6466 }, { "epoch": 0.3323568712097852, "grad_norm": 1.0577338933944702, "learning_rate": 7.78830875957284e-06, "loss": 0.7551, "step": 6467 }, { "epoch": 0.33240826395312983, "grad_norm": 1.0348252058029175, "learning_rate": 7.787617893237e-06, "loss": 0.7723, "step": 6468 }, { "epoch": 0.3324596566964745, "grad_norm": 1.1440144777297974, "learning_rate": 7.786926949667773e-06, "loss": 0.7815, "step": 6469 }, { "epoch": 0.3325110494398191, "grad_norm": 0.9907153844833374, "learning_rate": 7.7862359288843e-06, "loss": 0.7149, "step": 6470 }, { "epoch": 0.3325624421831637, "grad_norm": 1.0473151206970215, "learning_rate": 7.785544830905729e-06, "loss": 0.7489, "step": 6471 }, { "epoch": 0.33261383492650837, "grad_norm": 1.0405088663101196, "learning_rate": 7.784853655751205e-06, "loss": 0.7444, "step": 6472 }, { "epoch": 0.332665227669853, "grad_norm": 1.0650453567504883, "learning_rate": 7.784162403439877e-06, "loss": 0.7469, "step": 6473 }, { "epoch": 0.33271662041319766, "grad_norm": 1.0978261232376099, "learning_rate": 7.783471073990901e-06, "loss": 0.7974, "step": 6474 }, { "epoch": 0.3327680131565423, "grad_norm": 0.7830113768577576, "learning_rate": 7.782779667423428e-06, "loss": 0.6755, "step": 6475 }, { "epoch": 0.33281940589988696, "grad_norm": 1.2977112531661987, "learning_rate": 7.782088183756613e-06, "loss": 0.8099, "step": 6476 }, { "epoch": 0.3328707986432316, "grad_norm": 1.103999376296997, "learning_rate": 7.781396623009616e-06, "loss": 0.7321, "step": 6477 }, { "epoch": 0.3329221913865762, "grad_norm": 1.0974771976470947, "learning_rate": 7.780704985201598e-06, "loss": 0.746, "step": 6478 }, { "epoch": 0.33297358412992084, "grad_norm": 1.1084760427474976, "learning_rate": 7.780013270351718e-06, "loss": 0.7849, "step": 6479 }, { "epoch": 0.3330249768732655, "grad_norm": 1.0800585746765137, "learning_rate": 7.779321478479143e-06, "loss": 0.7429, "step": 6480 }, { "epoch": 0.33307636961661014, "grad_norm": 1.0699074268341064, "learning_rate": 7.77862960960304e-06, "loss": 0.7638, "step": 6481 }, { "epoch": 0.3331277623599548, "grad_norm": 1.0960007905960083, "learning_rate": 7.777937663742578e-06, "loss": 0.7763, "step": 6482 }, { "epoch": 0.33317915510329943, "grad_norm": 1.0531806945800781, "learning_rate": 7.777245640916926e-06, "loss": 0.7773, "step": 6483 }, { "epoch": 0.3332305478466441, "grad_norm": 0.9032198190689087, "learning_rate": 7.77655354114526e-06, "loss": 0.7, "step": 6484 }, { "epoch": 0.33328194058998867, "grad_norm": 1.0535330772399902, "learning_rate": 7.775861364446752e-06, "loss": 0.7447, "step": 6485 }, { "epoch": 0.3333333333333333, "grad_norm": 1.073972225189209, "learning_rate": 7.77516911084058e-06, "loss": 0.7947, "step": 6486 }, { "epoch": 0.33338472607667796, "grad_norm": 0.7861914038658142, "learning_rate": 7.774476780345926e-06, "loss": 0.739, "step": 6487 }, { "epoch": 0.3334361188200226, "grad_norm": 0.9564030170440674, "learning_rate": 7.77378437298197e-06, "loss": 0.742, "step": 6488 }, { "epoch": 0.33348751156336726, "grad_norm": 1.1033538579940796, "learning_rate": 7.773091888767894e-06, "loss": 0.7681, "step": 6489 }, { "epoch": 0.3335389043067119, "grad_norm": 1.0671360492706299, "learning_rate": 7.772399327722888e-06, "loss": 0.7397, "step": 6490 }, { "epoch": 0.33359029705005655, "grad_norm": 0.7774039506912231, "learning_rate": 7.771706689866137e-06, "loss": 0.6806, "step": 6491 }, { "epoch": 0.3336416897934012, "grad_norm": 1.1928331851959229, "learning_rate": 7.77101397521683e-06, "loss": 0.7744, "step": 6492 }, { "epoch": 0.3336930825367458, "grad_norm": 1.020980954170227, "learning_rate": 7.770321183794164e-06, "loss": 0.7454, "step": 6493 }, { "epoch": 0.33374447528009044, "grad_norm": 0.8944103121757507, "learning_rate": 7.769628315617328e-06, "loss": 0.7115, "step": 6494 }, { "epoch": 0.3337958680234351, "grad_norm": 0.6898053884506226, "learning_rate": 7.768935370705523e-06, "loss": 0.6738, "step": 6495 }, { "epoch": 0.33384726076677973, "grad_norm": 1.0660370588302612, "learning_rate": 7.768242349077944e-06, "loss": 0.7005, "step": 6496 }, { "epoch": 0.3338986535101244, "grad_norm": 1.1497564315795898, "learning_rate": 7.767549250753793e-06, "loss": 0.7826, "step": 6497 }, { "epoch": 0.333950046253469, "grad_norm": 1.0521914958953857, "learning_rate": 7.766856075752276e-06, "loss": 0.7165, "step": 6498 }, { "epoch": 0.33400143899681367, "grad_norm": 0.9812231659889221, "learning_rate": 7.766162824092593e-06, "loss": 0.6942, "step": 6499 }, { "epoch": 0.33405283174015826, "grad_norm": 1.1092708110809326, "learning_rate": 7.765469495793953e-06, "loss": 0.7253, "step": 6500 }, { "epoch": 0.3341042244835029, "grad_norm": 1.0689315795898438, "learning_rate": 7.764776090875566e-06, "loss": 0.7609, "step": 6501 }, { "epoch": 0.33415561722684756, "grad_norm": 1.1116224527359009, "learning_rate": 7.764082609356644e-06, "loss": 0.8015, "step": 6502 }, { "epoch": 0.3342070099701922, "grad_norm": 1.068313717842102, "learning_rate": 7.763389051256399e-06, "loss": 0.7782, "step": 6503 }, { "epoch": 0.33425840271353685, "grad_norm": 1.153111219406128, "learning_rate": 7.762695416594048e-06, "loss": 0.7531, "step": 6504 }, { "epoch": 0.3343097954568815, "grad_norm": 0.7740160226821899, "learning_rate": 7.762001705388807e-06, "loss": 0.7261, "step": 6505 }, { "epoch": 0.33436118820022614, "grad_norm": 1.1248310804367065, "learning_rate": 7.761307917659897e-06, "loss": 0.8001, "step": 6506 }, { "epoch": 0.3344125809435708, "grad_norm": 1.078467845916748, "learning_rate": 7.76061405342654e-06, "loss": 0.7407, "step": 6507 }, { "epoch": 0.3344639736869154, "grad_norm": 1.1185346841812134, "learning_rate": 7.759920112707959e-06, "loss": 0.7744, "step": 6508 }, { "epoch": 0.33451536643026003, "grad_norm": 1.1144089698791504, "learning_rate": 7.759226095523385e-06, "loss": 0.7316, "step": 6509 }, { "epoch": 0.3345667591736047, "grad_norm": 1.2007198333740234, "learning_rate": 7.758532001892039e-06, "loss": 0.7768, "step": 6510 }, { "epoch": 0.3346181519169493, "grad_norm": 1.033096432685852, "learning_rate": 7.757837831833155e-06, "loss": 0.7943, "step": 6511 }, { "epoch": 0.33466954466029397, "grad_norm": 1.1185754537582397, "learning_rate": 7.757143585365967e-06, "loss": 0.8029, "step": 6512 }, { "epoch": 0.3347209374036386, "grad_norm": 1.06898832321167, "learning_rate": 7.756449262509706e-06, "loss": 0.7296, "step": 6513 }, { "epoch": 0.33477233014698327, "grad_norm": 1.115201711654663, "learning_rate": 7.755754863283614e-06, "loss": 0.7986, "step": 6514 }, { "epoch": 0.3348237228903279, "grad_norm": 1.1190185546875, "learning_rate": 7.755060387706926e-06, "loss": 0.7463, "step": 6515 }, { "epoch": 0.3348751156336725, "grad_norm": 1.111128568649292, "learning_rate": 7.754365835798884e-06, "loss": 0.7935, "step": 6516 }, { "epoch": 0.33492650837701715, "grad_norm": 1.0540839433670044, "learning_rate": 7.753671207578731e-06, "loss": 0.7794, "step": 6517 }, { "epoch": 0.3349779011203618, "grad_norm": 1.5448687076568604, "learning_rate": 7.752976503065713e-06, "loss": 0.7345, "step": 6518 }, { "epoch": 0.33502929386370645, "grad_norm": 1.1589034795761108, "learning_rate": 7.752281722279077e-06, "loss": 0.7302, "step": 6519 }, { "epoch": 0.3350806866070511, "grad_norm": 1.0417239665985107, "learning_rate": 7.751586865238073e-06, "loss": 0.7167, "step": 6520 }, { "epoch": 0.33513207935039574, "grad_norm": 1.1238305568695068, "learning_rate": 7.75089193196195e-06, "loss": 0.7765, "step": 6521 }, { "epoch": 0.3351834720937404, "grad_norm": 0.8735576868057251, "learning_rate": 7.750196922469966e-06, "loss": 0.6691, "step": 6522 }, { "epoch": 0.335234864837085, "grad_norm": 1.110530138015747, "learning_rate": 7.749501836781374e-06, "loss": 0.7944, "step": 6523 }, { "epoch": 0.3352862575804296, "grad_norm": 1.03462553024292, "learning_rate": 7.748806674915433e-06, "loss": 0.7473, "step": 6524 }, { "epoch": 0.3353376503237743, "grad_norm": 0.9000665545463562, "learning_rate": 7.748111436891401e-06, "loss": 0.7145, "step": 6525 }, { "epoch": 0.3353890430671189, "grad_norm": 1.0227618217468262, "learning_rate": 7.747416122728544e-06, "loss": 0.7596, "step": 6526 }, { "epoch": 0.33544043581046357, "grad_norm": 1.0274015665054321, "learning_rate": 7.746720732446123e-06, "loss": 0.7805, "step": 6527 }, { "epoch": 0.3354918285538082, "grad_norm": 1.029577374458313, "learning_rate": 7.746025266063406e-06, "loss": 0.7699, "step": 6528 }, { "epoch": 0.33554322129715286, "grad_norm": 1.1308836936950684, "learning_rate": 7.74532972359966e-06, "loss": 0.7941, "step": 6529 }, { "epoch": 0.3355946140404975, "grad_norm": 0.8033400774002075, "learning_rate": 7.744634105074158e-06, "loss": 0.7163, "step": 6530 }, { "epoch": 0.3356460067838421, "grad_norm": 1.0922857522964478, "learning_rate": 7.74393841050617e-06, "loss": 0.7899, "step": 6531 }, { "epoch": 0.33569739952718675, "grad_norm": 0.7340012788772583, "learning_rate": 7.743242639914974e-06, "loss": 0.6737, "step": 6532 }, { "epoch": 0.3357487922705314, "grad_norm": 1.0350805521011353, "learning_rate": 7.742546793319844e-06, "loss": 0.7083, "step": 6533 }, { "epoch": 0.33580018501387604, "grad_norm": 1.076413631439209, "learning_rate": 7.74185087074006e-06, "loss": 0.7588, "step": 6534 }, { "epoch": 0.3358515777572207, "grad_norm": 1.0699633359909058, "learning_rate": 7.741154872194905e-06, "loss": 0.7328, "step": 6535 }, { "epoch": 0.33590297050056533, "grad_norm": 0.7075090408325195, "learning_rate": 7.740458797703658e-06, "loss": 0.6978, "step": 6536 }, { "epoch": 0.33595436324391, "grad_norm": 1.016798973083496, "learning_rate": 7.73976264728561e-06, "loss": 0.733, "step": 6537 }, { "epoch": 0.3360057559872546, "grad_norm": 1.0697447061538696, "learning_rate": 7.739066420960042e-06, "loss": 0.787, "step": 6538 }, { "epoch": 0.3360571487305992, "grad_norm": 1.019687533378601, "learning_rate": 7.738370118746248e-06, "loss": 0.7474, "step": 6539 }, { "epoch": 0.33610854147394387, "grad_norm": 1.1537458896636963, "learning_rate": 7.73767374066352e-06, "loss": 0.7492, "step": 6540 }, { "epoch": 0.3361599342172885, "grad_norm": 1.0208557844161987, "learning_rate": 7.73697728673115e-06, "loss": 0.7001, "step": 6541 }, { "epoch": 0.33621132696063316, "grad_norm": 1.057917833328247, "learning_rate": 7.736280756968433e-06, "loss": 0.7946, "step": 6542 }, { "epoch": 0.3362627197039778, "grad_norm": 1.049900770187378, "learning_rate": 7.73558415139467e-06, "loss": 0.7571, "step": 6543 }, { "epoch": 0.33631411244732246, "grad_norm": 1.0712857246398926, "learning_rate": 7.734887470029157e-06, "loss": 0.8518, "step": 6544 }, { "epoch": 0.3363655051906671, "grad_norm": 1.0285158157348633, "learning_rate": 7.7341907128912e-06, "loss": 0.7323, "step": 6545 }, { "epoch": 0.3364168979340117, "grad_norm": 1.0724751949310303, "learning_rate": 7.733493880000102e-06, "loss": 0.7728, "step": 6546 }, { "epoch": 0.33646829067735634, "grad_norm": 1.0794790983200073, "learning_rate": 7.732796971375167e-06, "loss": 0.776, "step": 6547 }, { "epoch": 0.336519683420701, "grad_norm": 1.0515930652618408, "learning_rate": 7.732099987035707e-06, "loss": 0.792, "step": 6548 }, { "epoch": 0.33657107616404563, "grad_norm": 1.1038976907730103, "learning_rate": 7.731402927001032e-06, "loss": 0.7361, "step": 6549 }, { "epoch": 0.3366224689073903, "grad_norm": 0.7011096477508545, "learning_rate": 7.730705791290452e-06, "loss": 0.705, "step": 6550 }, { "epoch": 0.33667386165073493, "grad_norm": 1.0449178218841553, "learning_rate": 7.730008579923285e-06, "loss": 0.7841, "step": 6551 }, { "epoch": 0.3367252543940796, "grad_norm": 1.1194484233856201, "learning_rate": 7.729311292918844e-06, "loss": 0.8356, "step": 6552 }, { "epoch": 0.3367766471374242, "grad_norm": 1.0231194496154785, "learning_rate": 7.728613930296452e-06, "loss": 0.7613, "step": 6553 }, { "epoch": 0.3368280398807688, "grad_norm": 1.1133205890655518, "learning_rate": 7.727916492075429e-06, "loss": 0.7492, "step": 6554 }, { "epoch": 0.33687943262411346, "grad_norm": 1.065483808517456, "learning_rate": 7.727218978275095e-06, "loss": 0.7644, "step": 6555 }, { "epoch": 0.3369308253674581, "grad_norm": 1.0070241689682007, "learning_rate": 7.72652138891478e-06, "loss": 0.7781, "step": 6556 }, { "epoch": 0.33698221811080276, "grad_norm": 1.058231234550476, "learning_rate": 7.725823724013808e-06, "loss": 0.7534, "step": 6557 }, { "epoch": 0.3370336108541474, "grad_norm": 1.1050045490264893, "learning_rate": 7.725125983591511e-06, "loss": 0.7883, "step": 6558 }, { "epoch": 0.33708500359749205, "grad_norm": 1.0163242816925049, "learning_rate": 7.724428167667216e-06, "loss": 0.7581, "step": 6559 }, { "epoch": 0.3371363963408367, "grad_norm": 1.0442469120025635, "learning_rate": 7.723730276260262e-06, "loss": 0.7913, "step": 6560 }, { "epoch": 0.3371877890841813, "grad_norm": 1.0541136264801025, "learning_rate": 7.723032309389981e-06, "loss": 0.7769, "step": 6561 }, { "epoch": 0.33723918182752594, "grad_norm": 1.15898859500885, "learning_rate": 7.722334267075713e-06, "loss": 0.8122, "step": 6562 }, { "epoch": 0.3372905745708706, "grad_norm": 1.0498425960540771, "learning_rate": 7.721636149336795e-06, "loss": 0.8236, "step": 6563 }, { "epoch": 0.33734196731421523, "grad_norm": 1.058797836303711, "learning_rate": 7.72093795619257e-06, "loss": 0.7305, "step": 6564 }, { "epoch": 0.3373933600575599, "grad_norm": 1.0965588092803955, "learning_rate": 7.720239687662388e-06, "loss": 0.7691, "step": 6565 }, { "epoch": 0.3374447528009045, "grad_norm": 1.0800951719284058, "learning_rate": 7.719541343765586e-06, "loss": 0.7693, "step": 6566 }, { "epoch": 0.33749614554424917, "grad_norm": 1.0936470031738281, "learning_rate": 7.718842924521516e-06, "loss": 0.7813, "step": 6567 }, { "epoch": 0.3375475382875938, "grad_norm": 1.066245675086975, "learning_rate": 7.71814442994953e-06, "loss": 0.7072, "step": 6568 }, { "epoch": 0.3375989310309384, "grad_norm": 1.1159350872039795, "learning_rate": 7.717445860068979e-06, "loss": 0.7107, "step": 6569 }, { "epoch": 0.33765032377428306, "grad_norm": 1.117701530456543, "learning_rate": 7.716747214899217e-06, "loss": 0.6961, "step": 6570 }, { "epoch": 0.3377017165176277, "grad_norm": 0.7588281035423279, "learning_rate": 7.716048494459601e-06, "loss": 0.6804, "step": 6571 }, { "epoch": 0.33775310926097235, "grad_norm": 1.1264597177505493, "learning_rate": 7.71534969876949e-06, "loss": 0.7958, "step": 6572 }, { "epoch": 0.337804502004317, "grad_norm": 1.1038175821304321, "learning_rate": 7.714650827848245e-06, "loss": 0.7376, "step": 6573 }, { "epoch": 0.33785589474766164, "grad_norm": 0.7939095497131348, "learning_rate": 7.713951881715227e-06, "loss": 0.7041, "step": 6574 }, { "epoch": 0.3379072874910063, "grad_norm": 0.7537832856178284, "learning_rate": 7.713252860389803e-06, "loss": 0.6624, "step": 6575 }, { "epoch": 0.3379586802343509, "grad_norm": 1.0547189712524414, "learning_rate": 7.71255376389134e-06, "loss": 0.7439, "step": 6576 }, { "epoch": 0.33801007297769553, "grad_norm": 0.7780723571777344, "learning_rate": 7.711854592239203e-06, "loss": 0.6928, "step": 6577 }, { "epoch": 0.3380614657210402, "grad_norm": 1.0735018253326416, "learning_rate": 7.711155345452769e-06, "loss": 0.8039, "step": 6578 }, { "epoch": 0.3381128584643848, "grad_norm": 0.7522705793380737, "learning_rate": 7.710456023551409e-06, "loss": 0.706, "step": 6579 }, { "epoch": 0.33816425120772947, "grad_norm": 1.1249234676361084, "learning_rate": 7.709756626554496e-06, "loss": 0.7497, "step": 6580 }, { "epoch": 0.3382156439510741, "grad_norm": 1.1204546689987183, "learning_rate": 7.709057154481413e-06, "loss": 0.7495, "step": 6581 }, { "epoch": 0.33826703669441877, "grad_norm": 1.0404729843139648, "learning_rate": 7.70835760735153e-06, "loss": 0.741, "step": 6582 }, { "epoch": 0.3383184294377634, "grad_norm": 1.0971697568893433, "learning_rate": 7.70765798518424e-06, "loss": 0.752, "step": 6583 }, { "epoch": 0.338369822181108, "grad_norm": 1.1227312088012695, "learning_rate": 7.706958287998918e-06, "loss": 0.8168, "step": 6584 }, { "epoch": 0.33842121492445265, "grad_norm": 1.063349723815918, "learning_rate": 7.706258515814953e-06, "loss": 0.8158, "step": 6585 }, { "epoch": 0.3384726076677973, "grad_norm": 1.0252267122268677, "learning_rate": 7.705558668651733e-06, "loss": 0.7443, "step": 6586 }, { "epoch": 0.33852400041114195, "grad_norm": 1.0455039739608765, "learning_rate": 7.704858746528649e-06, "loss": 0.7674, "step": 6587 }, { "epoch": 0.3385753931544866, "grad_norm": 1.143568754196167, "learning_rate": 7.70415874946509e-06, "loss": 0.7628, "step": 6588 }, { "epoch": 0.33862678589783124, "grad_norm": 1.0962506532669067, "learning_rate": 7.703458677480451e-06, "loss": 0.7511, "step": 6589 }, { "epoch": 0.3386781786411759, "grad_norm": 0.7368639707565308, "learning_rate": 7.70275853059413e-06, "loss": 0.6692, "step": 6590 }, { "epoch": 0.3387295713845205, "grad_norm": 1.1238256692886353, "learning_rate": 7.702058308825523e-06, "loss": 0.7117, "step": 6591 }, { "epoch": 0.3387809641278651, "grad_norm": 1.0725135803222656, "learning_rate": 7.701358012194031e-06, "loss": 0.735, "step": 6592 }, { "epoch": 0.33883235687120977, "grad_norm": 1.1122689247131348, "learning_rate": 7.700657640719055e-06, "loss": 0.7568, "step": 6593 }, { "epoch": 0.3388837496145544, "grad_norm": 1.095253348350525, "learning_rate": 7.699957194420004e-06, "loss": 0.7471, "step": 6594 }, { "epoch": 0.33893514235789907, "grad_norm": 0.7063501477241516, "learning_rate": 7.699256673316277e-06, "loss": 0.7085, "step": 6595 }, { "epoch": 0.3389865351012437, "grad_norm": 1.101936936378479, "learning_rate": 7.698556077427291e-06, "loss": 0.7814, "step": 6596 }, { "epoch": 0.33903792784458836, "grad_norm": 1.0311704874038696, "learning_rate": 7.69785540677245e-06, "loss": 0.6829, "step": 6597 }, { "epoch": 0.339089320587933, "grad_norm": 1.50922691822052, "learning_rate": 7.69715466137117e-06, "loss": 0.7658, "step": 6598 }, { "epoch": 0.3391407133312776, "grad_norm": 1.0183446407318115, "learning_rate": 7.696453841242863e-06, "loss": 0.7819, "step": 6599 }, { "epoch": 0.33919210607462225, "grad_norm": 0.956010103225708, "learning_rate": 7.69575294640695e-06, "loss": 0.7294, "step": 6600 }, { "epoch": 0.3392434988179669, "grad_norm": 1.0670047998428345, "learning_rate": 7.695051976882845e-06, "loss": 0.7691, "step": 6601 }, { "epoch": 0.33929489156131154, "grad_norm": 1.0840113162994385, "learning_rate": 7.694350932689974e-06, "loss": 0.7746, "step": 6602 }, { "epoch": 0.3393462843046562, "grad_norm": 0.7704764604568481, "learning_rate": 7.693649813847756e-06, "loss": 0.6765, "step": 6603 }, { "epoch": 0.33939767704800083, "grad_norm": 1.0854947566986084, "learning_rate": 7.69294862037562e-06, "loss": 0.719, "step": 6604 }, { "epoch": 0.3394490697913455, "grad_norm": 1.0851550102233887, "learning_rate": 7.69224735229299e-06, "loss": 0.7043, "step": 6605 }, { "epoch": 0.33950046253469013, "grad_norm": 1.1060842275619507, "learning_rate": 7.691546009619292e-06, "loss": 0.8084, "step": 6606 }, { "epoch": 0.3395518552780347, "grad_norm": 1.1430354118347168, "learning_rate": 7.690844592373967e-06, "loss": 0.7344, "step": 6607 }, { "epoch": 0.33960324802137937, "grad_norm": 0.7923489212989807, "learning_rate": 7.69014310057644e-06, "loss": 0.7025, "step": 6608 }, { "epoch": 0.339654640764724, "grad_norm": 4.38373327255249, "learning_rate": 7.68944153424615e-06, "loss": 0.9484, "step": 6609 }, { "epoch": 0.33970603350806866, "grad_norm": 1.0721818208694458, "learning_rate": 7.688739893402532e-06, "loss": 0.7739, "step": 6610 }, { "epoch": 0.3397574262514133, "grad_norm": 0.8866074085235596, "learning_rate": 7.688038178065029e-06, "loss": 0.6952, "step": 6611 }, { "epoch": 0.33980881899475796, "grad_norm": 1.0030882358551025, "learning_rate": 7.687336388253081e-06, "loss": 0.7384, "step": 6612 }, { "epoch": 0.3398602117381026, "grad_norm": 1.0326414108276367, "learning_rate": 7.686634523986133e-06, "loss": 0.758, "step": 6613 }, { "epoch": 0.3399116044814472, "grad_norm": 0.925369381904602, "learning_rate": 7.685932585283627e-06, "loss": 0.6654, "step": 6614 }, { "epoch": 0.33996299722479184, "grad_norm": 0.7294013500213623, "learning_rate": 7.685230572165014e-06, "loss": 0.6803, "step": 6615 }, { "epoch": 0.3400143899681365, "grad_norm": 1.0630218982696533, "learning_rate": 7.684528484649744e-06, "loss": 0.765, "step": 6616 }, { "epoch": 0.34006578271148113, "grad_norm": 1.0217283964157104, "learning_rate": 7.683826322757268e-06, "loss": 0.726, "step": 6617 }, { "epoch": 0.3401171754548258, "grad_norm": 1.120306372642517, "learning_rate": 7.68312408650704e-06, "loss": 0.7658, "step": 6618 }, { "epoch": 0.34016856819817043, "grad_norm": 0.7621231079101562, "learning_rate": 7.682421775918514e-06, "loss": 0.6936, "step": 6619 }, { "epoch": 0.3402199609415151, "grad_norm": 1.1259628534317017, "learning_rate": 7.681719391011154e-06, "loss": 0.7965, "step": 6620 }, { "epoch": 0.3402713536848597, "grad_norm": 1.0145872831344604, "learning_rate": 7.681016931804413e-06, "loss": 0.8164, "step": 6621 }, { "epoch": 0.3403227464282043, "grad_norm": 0.9857984781265259, "learning_rate": 7.680314398317758e-06, "loss": 0.7505, "step": 6622 }, { "epoch": 0.34037413917154896, "grad_norm": 1.232672095298767, "learning_rate": 7.679611790570653e-06, "loss": 0.8069, "step": 6623 }, { "epoch": 0.3404255319148936, "grad_norm": 1.0144422054290771, "learning_rate": 7.678909108582561e-06, "loss": 0.8147, "step": 6624 }, { "epoch": 0.34047692465823826, "grad_norm": 1.0247306823730469, "learning_rate": 7.678206352372955e-06, "loss": 0.7364, "step": 6625 }, { "epoch": 0.3405283174015829, "grad_norm": 1.0642890930175781, "learning_rate": 7.677503521961303e-06, "loss": 0.758, "step": 6626 }, { "epoch": 0.34057971014492755, "grad_norm": 1.030613660812378, "learning_rate": 7.676800617367078e-06, "loss": 0.7377, "step": 6627 }, { "epoch": 0.3406311028882722, "grad_norm": 1.0734517574310303, "learning_rate": 7.676097638609753e-06, "loss": 0.7733, "step": 6628 }, { "epoch": 0.3406824956316168, "grad_norm": 1.098297119140625, "learning_rate": 7.675394585708807e-06, "loss": 0.7939, "step": 6629 }, { "epoch": 0.34073388837496144, "grad_norm": 1.0911213159561157, "learning_rate": 7.674691458683718e-06, "loss": 0.7609, "step": 6630 }, { "epoch": 0.3407852811183061, "grad_norm": 1.0663267374038696, "learning_rate": 7.673988257553966e-06, "loss": 0.7948, "step": 6631 }, { "epoch": 0.34083667386165073, "grad_norm": 0.6964709162712097, "learning_rate": 7.673284982339035e-06, "loss": 0.6704, "step": 6632 }, { "epoch": 0.3408880666049954, "grad_norm": 1.0632686614990234, "learning_rate": 7.672581633058408e-06, "loss": 0.7707, "step": 6633 }, { "epoch": 0.34093945934834, "grad_norm": 1.0994741916656494, "learning_rate": 7.671878209731573e-06, "loss": 0.7925, "step": 6634 }, { "epoch": 0.34099085209168467, "grad_norm": 1.0407366752624512, "learning_rate": 7.67117471237802e-06, "loss": 0.7492, "step": 6635 }, { "epoch": 0.3410422448350293, "grad_norm": 1.099822998046875, "learning_rate": 7.67047114101724e-06, "loss": 0.7819, "step": 6636 }, { "epoch": 0.3410936375783739, "grad_norm": 1.0465624332427979, "learning_rate": 7.669767495668723e-06, "loss": 0.692, "step": 6637 }, { "epoch": 0.34114503032171856, "grad_norm": 1.0452357530593872, "learning_rate": 7.669063776351966e-06, "loss": 0.7312, "step": 6638 }, { "epoch": 0.3411964230650632, "grad_norm": 0.8059674501419067, "learning_rate": 7.668359983086468e-06, "loss": 0.6994, "step": 6639 }, { "epoch": 0.34124781580840785, "grad_norm": 1.024306058883667, "learning_rate": 7.667656115891726e-06, "loss": 0.7562, "step": 6640 }, { "epoch": 0.3412992085517525, "grad_norm": 0.8661328554153442, "learning_rate": 7.666952174787241e-06, "loss": 0.6936, "step": 6641 }, { "epoch": 0.34135060129509714, "grad_norm": 0.6932468414306641, "learning_rate": 7.666248159792517e-06, "loss": 0.7029, "step": 6642 }, { "epoch": 0.3414019940384418, "grad_norm": 1.052911400794983, "learning_rate": 7.66554407092706e-06, "loss": 0.825, "step": 6643 }, { "epoch": 0.34145338678178644, "grad_norm": 1.0475996732711792, "learning_rate": 7.664839908210378e-06, "loss": 0.7404, "step": 6644 }, { "epoch": 0.34150477952513103, "grad_norm": 1.0537971258163452, "learning_rate": 7.664135671661978e-06, "loss": 0.7326, "step": 6645 }, { "epoch": 0.3415561722684757, "grad_norm": 1.0285451412200928, "learning_rate": 7.663431361301372e-06, "loss": 0.7462, "step": 6646 }, { "epoch": 0.3416075650118203, "grad_norm": 1.0768494606018066, "learning_rate": 7.662726977148077e-06, "loss": 0.8133, "step": 6647 }, { "epoch": 0.34165895775516497, "grad_norm": 1.0296823978424072, "learning_rate": 7.662022519221604e-06, "loss": 0.7481, "step": 6648 }, { "epoch": 0.3417103504985096, "grad_norm": 0.7730618715286255, "learning_rate": 7.661317987541472e-06, "loss": 0.7083, "step": 6649 }, { "epoch": 0.34176174324185427, "grad_norm": 1.1077300310134888, "learning_rate": 7.660613382127201e-06, "loss": 0.7341, "step": 6650 }, { "epoch": 0.3418131359851989, "grad_norm": 1.0408177375793457, "learning_rate": 7.659908702998313e-06, "loss": 0.766, "step": 6651 }, { "epoch": 0.3418645287285435, "grad_norm": 1.0484448671340942, "learning_rate": 7.659203950174333e-06, "loss": 0.7667, "step": 6652 }, { "epoch": 0.34191592147188815, "grad_norm": 1.048295021057129, "learning_rate": 7.658499123674784e-06, "loss": 0.7772, "step": 6653 }, { "epoch": 0.3419673142152328, "grad_norm": 1.1377633810043335, "learning_rate": 7.657794223519198e-06, "loss": 0.7285, "step": 6654 }, { "epoch": 0.34201870695857745, "grad_norm": 0.8304348587989807, "learning_rate": 7.6570892497271e-06, "loss": 0.6968, "step": 6655 }, { "epoch": 0.3420700997019221, "grad_norm": 1.0792810916900635, "learning_rate": 7.656384202318024e-06, "loss": 0.7899, "step": 6656 }, { "epoch": 0.34212149244526674, "grad_norm": 1.0511394739151, "learning_rate": 7.655679081311505e-06, "loss": 0.7919, "step": 6657 }, { "epoch": 0.3421728851886114, "grad_norm": 0.8317001461982727, "learning_rate": 7.654973886727076e-06, "loss": 0.6945, "step": 6658 }, { "epoch": 0.34222427793195603, "grad_norm": 0.8909692764282227, "learning_rate": 7.654268618584277e-06, "loss": 0.6842, "step": 6659 }, { "epoch": 0.3422756706753006, "grad_norm": 1.1138473749160767, "learning_rate": 7.653563276902651e-06, "loss": 0.7422, "step": 6660 }, { "epoch": 0.34232706341864527, "grad_norm": 1.1204191446304321, "learning_rate": 7.652857861701735e-06, "loss": 0.7375, "step": 6661 }, { "epoch": 0.3423784561619899, "grad_norm": 1.0435632467269897, "learning_rate": 7.652152373001077e-06, "loss": 0.7576, "step": 6662 }, { "epoch": 0.34242984890533457, "grad_norm": 0.8679373860359192, "learning_rate": 7.65144681082022e-06, "loss": 0.6911, "step": 6663 }, { "epoch": 0.3424812416486792, "grad_norm": 0.7349409461021423, "learning_rate": 7.650741175178712e-06, "loss": 0.6712, "step": 6664 }, { "epoch": 0.34253263439202386, "grad_norm": 0.8016467690467834, "learning_rate": 7.650035466096109e-06, "loss": 0.7141, "step": 6665 }, { "epoch": 0.3425840271353685, "grad_norm": 1.0859202146530151, "learning_rate": 7.649329683591959e-06, "loss": 0.7475, "step": 6666 }, { "epoch": 0.3426354198787131, "grad_norm": 1.0483160018920898, "learning_rate": 7.648623827685813e-06, "loss": 0.7195, "step": 6667 }, { "epoch": 0.34268681262205775, "grad_norm": 0.7807158827781677, "learning_rate": 7.647917898397235e-06, "loss": 0.695, "step": 6668 }, { "epoch": 0.3427382053654024, "grad_norm": 1.1061903238296509, "learning_rate": 7.647211895745777e-06, "loss": 0.7297, "step": 6669 }, { "epoch": 0.34278959810874704, "grad_norm": 0.7125497460365295, "learning_rate": 7.646505819751e-06, "loss": 0.6822, "step": 6670 }, { "epoch": 0.3428409908520917, "grad_norm": 1.074361801147461, "learning_rate": 7.645799670432473e-06, "loss": 0.8248, "step": 6671 }, { "epoch": 0.34289238359543633, "grad_norm": 1.0187973976135254, "learning_rate": 7.645093447809751e-06, "loss": 0.7552, "step": 6672 }, { "epoch": 0.342943776338781, "grad_norm": 1.1492890119552612, "learning_rate": 7.644387151902408e-06, "loss": 0.7254, "step": 6673 }, { "epoch": 0.34299516908212563, "grad_norm": 1.1094372272491455, "learning_rate": 7.64368078273001e-06, "loss": 0.8263, "step": 6674 }, { "epoch": 0.3430465618254702, "grad_norm": 1.0237451791763306, "learning_rate": 7.642974340312126e-06, "loss": 0.7543, "step": 6675 }, { "epoch": 0.34309795456881487, "grad_norm": 1.027280569076538, "learning_rate": 7.642267824668331e-06, "loss": 0.7684, "step": 6676 }, { "epoch": 0.3431493473121595, "grad_norm": 1.05669105052948, "learning_rate": 7.641561235818197e-06, "loss": 0.7646, "step": 6677 }, { "epoch": 0.34320074005550416, "grad_norm": 1.0904415845870972, "learning_rate": 7.640854573781303e-06, "loss": 0.7659, "step": 6678 }, { "epoch": 0.3432521327988488, "grad_norm": 1.1408905982971191, "learning_rate": 7.640147838577228e-06, "loss": 0.7244, "step": 6679 }, { "epoch": 0.34330352554219346, "grad_norm": 1.0549665689468384, "learning_rate": 7.63944103022555e-06, "loss": 0.7508, "step": 6680 }, { "epoch": 0.3433549182855381, "grad_norm": 1.075059413909912, "learning_rate": 7.638734148745855e-06, "loss": 0.75, "step": 6681 }, { "epoch": 0.34340631102888275, "grad_norm": 0.7598670721054077, "learning_rate": 7.638027194157725e-06, "loss": 0.6965, "step": 6682 }, { "epoch": 0.34345770377222734, "grad_norm": 0.813686192035675, "learning_rate": 7.637320166480746e-06, "loss": 0.6938, "step": 6683 }, { "epoch": 0.343509096515572, "grad_norm": 1.097036361694336, "learning_rate": 7.636613065734513e-06, "loss": 0.737, "step": 6684 }, { "epoch": 0.34356048925891663, "grad_norm": 1.075984001159668, "learning_rate": 7.63590589193861e-06, "loss": 0.7918, "step": 6685 }, { "epoch": 0.3436118820022613, "grad_norm": 1.144142508506775, "learning_rate": 7.635198645112634e-06, "loss": 0.7713, "step": 6686 }, { "epoch": 0.34366327474560593, "grad_norm": 1.0059248208999634, "learning_rate": 7.634491325276176e-06, "loss": 0.7067, "step": 6687 }, { "epoch": 0.3437146674889506, "grad_norm": 1.200853943824768, "learning_rate": 7.633783932448837e-06, "loss": 0.7558, "step": 6688 }, { "epoch": 0.3437660602322952, "grad_norm": 1.134192943572998, "learning_rate": 7.633076466650213e-06, "loss": 0.725, "step": 6689 }, { "epoch": 0.3438174529756398, "grad_norm": 1.0616955757141113, "learning_rate": 7.632368927899909e-06, "loss": 0.7827, "step": 6690 }, { "epoch": 0.34386884571898446, "grad_norm": 1.05637526512146, "learning_rate": 7.631661316217523e-06, "loss": 0.7579, "step": 6691 }, { "epoch": 0.3439202384623291, "grad_norm": 1.069056749343872, "learning_rate": 7.630953631622662e-06, "loss": 0.7994, "step": 6692 }, { "epoch": 0.34397163120567376, "grad_norm": 1.095033049583435, "learning_rate": 7.630245874134934e-06, "loss": 0.7668, "step": 6693 }, { "epoch": 0.3440230239490184, "grad_norm": 1.039291501045227, "learning_rate": 7.629538043773945e-06, "loss": 0.7995, "step": 6694 }, { "epoch": 0.34407441669236305, "grad_norm": 0.8052263855934143, "learning_rate": 7.628830140559311e-06, "loss": 0.6517, "step": 6695 }, { "epoch": 0.3441258094357077, "grad_norm": 1.031767725944519, "learning_rate": 7.628122164510641e-06, "loss": 0.7341, "step": 6696 }, { "epoch": 0.34417720217905234, "grad_norm": 0.7558072209358215, "learning_rate": 7.627414115647554e-06, "loss": 0.7498, "step": 6697 }, { "epoch": 0.34422859492239694, "grad_norm": 1.075257658958435, "learning_rate": 7.626705993989662e-06, "loss": 0.7377, "step": 6698 }, { "epoch": 0.3442799876657416, "grad_norm": 1.0834792852401733, "learning_rate": 7.625997799556586e-06, "loss": 0.849, "step": 6699 }, { "epoch": 0.34433138040908623, "grad_norm": 0.8612902164459229, "learning_rate": 7.625289532367948e-06, "loss": 0.6877, "step": 6700 }, { "epoch": 0.3443827731524309, "grad_norm": 0.7838582396507263, "learning_rate": 7.624581192443372e-06, "loss": 0.697, "step": 6701 }, { "epoch": 0.3444341658957755, "grad_norm": 1.0737199783325195, "learning_rate": 7.623872779802483e-06, "loss": 0.74, "step": 6702 }, { "epoch": 0.34448555863912017, "grad_norm": 1.087113380432129, "learning_rate": 7.623164294464906e-06, "loss": 0.7156, "step": 6703 }, { "epoch": 0.3445369513824648, "grad_norm": 1.1013505458831787, "learning_rate": 7.6224557364502715e-06, "loss": 0.7895, "step": 6704 }, { "epoch": 0.3445883441258094, "grad_norm": 0.8384442329406738, "learning_rate": 7.621747105778212e-06, "loss": 0.6927, "step": 6705 }, { "epoch": 0.34463973686915406, "grad_norm": 1.1046620607376099, "learning_rate": 7.621038402468359e-06, "loss": 0.797, "step": 6706 }, { "epoch": 0.3446911296124987, "grad_norm": 1.0412647724151611, "learning_rate": 7.620329626540348e-06, "loss": 0.7453, "step": 6707 }, { "epoch": 0.34474252235584335, "grad_norm": 1.1194000244140625, "learning_rate": 7.6196207780138164e-06, "loss": 0.7848, "step": 6708 }, { "epoch": 0.344793915099188, "grad_norm": 1.0361796617507935, "learning_rate": 7.6189118569084045e-06, "loss": 0.7678, "step": 6709 }, { "epoch": 0.34484530784253264, "grad_norm": 1.1156220436096191, "learning_rate": 7.618202863243751e-06, "loss": 0.7893, "step": 6710 }, { "epoch": 0.3448967005858773, "grad_norm": 1.0447503328323364, "learning_rate": 7.617493797039501e-06, "loss": 0.7257, "step": 6711 }, { "epoch": 0.34494809332922194, "grad_norm": 1.0587265491485596, "learning_rate": 7.6167846583153e-06, "loss": 0.7635, "step": 6712 }, { "epoch": 0.34499948607256653, "grad_norm": 1.1661818027496338, "learning_rate": 7.616075447090796e-06, "loss": 0.7363, "step": 6713 }, { "epoch": 0.3450508788159112, "grad_norm": 0.8274634480476379, "learning_rate": 7.6153661633856365e-06, "loss": 0.6875, "step": 6714 }, { "epoch": 0.3451022715592558, "grad_norm": 1.0906699895858765, "learning_rate": 7.614656807219474e-06, "loss": 0.7972, "step": 6715 }, { "epoch": 0.34515366430260047, "grad_norm": 1.0328997373580933, "learning_rate": 7.613947378611961e-06, "loss": 0.738, "step": 6716 }, { "epoch": 0.3452050570459451, "grad_norm": 1.0382572412490845, "learning_rate": 7.613237877582753e-06, "loss": 0.7686, "step": 6717 }, { "epoch": 0.34525644978928977, "grad_norm": 1.0837785005569458, "learning_rate": 7.6125283041515085e-06, "loss": 0.7576, "step": 6718 }, { "epoch": 0.3453078425326344, "grad_norm": 1.086661458015442, "learning_rate": 7.611818658337886e-06, "loss": 0.7586, "step": 6719 }, { "epoch": 0.345359235275979, "grad_norm": 1.0097092390060425, "learning_rate": 7.611108940161546e-06, "loss": 0.7825, "step": 6720 }, { "epoch": 0.34541062801932365, "grad_norm": 0.71791672706604, "learning_rate": 7.610399149642151e-06, "loss": 0.6978, "step": 6721 }, { "epoch": 0.3454620207626683, "grad_norm": 1.0498000383377075, "learning_rate": 7.609689286799371e-06, "loss": 0.7426, "step": 6722 }, { "epoch": 0.34551341350601295, "grad_norm": 0.6963216066360474, "learning_rate": 7.60897935165287e-06, "loss": 0.6501, "step": 6723 }, { "epoch": 0.3455648062493576, "grad_norm": 1.1035126447677612, "learning_rate": 7.608269344222317e-06, "loss": 0.8557, "step": 6724 }, { "epoch": 0.34561619899270224, "grad_norm": 1.0338149070739746, "learning_rate": 7.607559264527384e-06, "loss": 0.7149, "step": 6725 }, { "epoch": 0.3456675917360469, "grad_norm": 1.0522608757019043, "learning_rate": 7.606849112587744e-06, "loss": 0.792, "step": 6726 }, { "epoch": 0.34571898447939153, "grad_norm": 1.0813616514205933, "learning_rate": 7.6061388884230755e-06, "loss": 0.7489, "step": 6727 }, { "epoch": 0.3457703772227361, "grad_norm": 1.1051362752914429, "learning_rate": 7.60542859205305e-06, "loss": 0.791, "step": 6728 }, { "epoch": 0.34582176996608077, "grad_norm": 1.0514551401138306, "learning_rate": 7.604718223497352e-06, "loss": 0.7541, "step": 6729 }, { "epoch": 0.3458731627094254, "grad_norm": 1.0483429431915283, "learning_rate": 7.604007782775662e-06, "loss": 0.7409, "step": 6730 }, { "epoch": 0.34592455545277007, "grad_norm": 1.0433050394058228, "learning_rate": 7.60329726990766e-06, "loss": 0.7601, "step": 6731 }, { "epoch": 0.3459759481961147, "grad_norm": 1.1083686351776123, "learning_rate": 7.602586684913036e-06, "loss": 0.773, "step": 6732 }, { "epoch": 0.34602734093945936, "grad_norm": 1.089171051979065, "learning_rate": 7.601876027811475e-06, "loss": 0.7481, "step": 6733 }, { "epoch": 0.346078733682804, "grad_norm": 1.0874563455581665, "learning_rate": 7.601165298622664e-06, "loss": 0.734, "step": 6734 }, { "epoch": 0.34613012642614865, "grad_norm": 0.6653624773025513, "learning_rate": 7.600454497366299e-06, "loss": 0.6827, "step": 6735 }, { "epoch": 0.34618151916949325, "grad_norm": 0.6917997598648071, "learning_rate": 7.5997436240620715e-06, "loss": 0.68, "step": 6736 }, { "epoch": 0.3462329119128379, "grad_norm": 1.1056466102600098, "learning_rate": 7.599032678729676e-06, "loss": 0.7292, "step": 6737 }, { "epoch": 0.34628430465618254, "grad_norm": 1.0500496625900269, "learning_rate": 7.598321661388812e-06, "loss": 0.767, "step": 6738 }, { "epoch": 0.3463356973995272, "grad_norm": 1.1421741247177124, "learning_rate": 7.597610572059175e-06, "loss": 0.7128, "step": 6739 }, { "epoch": 0.34638709014287183, "grad_norm": 1.1197861433029175, "learning_rate": 7.59689941076047e-06, "loss": 0.8204, "step": 6740 }, { "epoch": 0.3464384828862165, "grad_norm": 1.0676395893096924, "learning_rate": 7.5961881775124e-06, "loss": 0.742, "step": 6741 }, { "epoch": 0.34648987562956113, "grad_norm": 1.0702288150787354, "learning_rate": 7.595476872334668e-06, "loss": 0.7079, "step": 6742 }, { "epoch": 0.3465412683729057, "grad_norm": 1.075451135635376, "learning_rate": 7.594765495246984e-06, "loss": 0.7928, "step": 6743 }, { "epoch": 0.34659266111625037, "grad_norm": 1.091914415359497, "learning_rate": 7.594054046269055e-06, "loss": 0.7077, "step": 6744 }, { "epoch": 0.346644053859595, "grad_norm": 1.174403190612793, "learning_rate": 7.593342525420595e-06, "loss": 0.8204, "step": 6745 }, { "epoch": 0.34669544660293966, "grad_norm": 1.0626065731048584, "learning_rate": 7.5926309327213145e-06, "loss": 0.7017, "step": 6746 }, { "epoch": 0.3467468393462843, "grad_norm": 1.1501697301864624, "learning_rate": 7.591919268190929e-06, "loss": 0.7783, "step": 6747 }, { "epoch": 0.34679823208962895, "grad_norm": 1.070454716682434, "learning_rate": 7.591207531849159e-06, "loss": 0.7179, "step": 6748 }, { "epoch": 0.3468496248329736, "grad_norm": 1.0858380794525146, "learning_rate": 7.59049572371572e-06, "loss": 0.7692, "step": 6749 }, { "epoch": 0.34690101757631825, "grad_norm": 1.0918105840682983, "learning_rate": 7.589783843810336e-06, "loss": 0.792, "step": 6750 }, { "epoch": 0.34695241031966284, "grad_norm": 1.0945814847946167, "learning_rate": 7.589071892152728e-06, "loss": 0.7761, "step": 6751 }, { "epoch": 0.3470038030630075, "grad_norm": 1.0814858675003052, "learning_rate": 7.588359868762622e-06, "loss": 0.7801, "step": 6752 }, { "epoch": 0.34705519580635213, "grad_norm": 1.0508838891983032, "learning_rate": 7.587647773659745e-06, "loss": 0.7974, "step": 6753 }, { "epoch": 0.3471065885496968, "grad_norm": 1.156484603881836, "learning_rate": 7.58693560686383e-06, "loss": 0.7397, "step": 6754 }, { "epoch": 0.34715798129304143, "grad_norm": 0.9922685623168945, "learning_rate": 7.5862233683946025e-06, "loss": 0.7558, "step": 6755 }, { "epoch": 0.3472093740363861, "grad_norm": 1.0413216352462769, "learning_rate": 7.585511058271799e-06, "loss": 0.6774, "step": 6756 }, { "epoch": 0.3472607667797307, "grad_norm": 1.5578668117523193, "learning_rate": 7.584798676515154e-06, "loss": 0.7583, "step": 6757 }, { "epoch": 0.3473121595230753, "grad_norm": 1.089167594909668, "learning_rate": 7.5840862231444035e-06, "loss": 0.7416, "step": 6758 }, { "epoch": 0.34736355226641996, "grad_norm": 0.8310548067092896, "learning_rate": 7.583373698179288e-06, "loss": 0.7035, "step": 6759 }, { "epoch": 0.3474149450097646, "grad_norm": 1.0672982931137085, "learning_rate": 7.582661101639548e-06, "loss": 0.7422, "step": 6760 }, { "epoch": 0.34746633775310926, "grad_norm": 1.1020509004592896, "learning_rate": 7.581948433544928e-06, "loss": 0.7593, "step": 6761 }, { "epoch": 0.3475177304964539, "grad_norm": 1.0925347805023193, "learning_rate": 7.581235693915172e-06, "loss": 0.8331, "step": 6762 }, { "epoch": 0.34756912323979855, "grad_norm": 0.8080524206161499, "learning_rate": 7.580522882770025e-06, "loss": 0.6884, "step": 6763 }, { "epoch": 0.3476205159831432, "grad_norm": 0.7478787302970886, "learning_rate": 7.57981000012924e-06, "loss": 0.677, "step": 6764 }, { "epoch": 0.34767190872648784, "grad_norm": 1.0973118543624878, "learning_rate": 7.579097046012565e-06, "loss": 0.7944, "step": 6765 }, { "epoch": 0.34772330146983244, "grad_norm": 1.0880799293518066, "learning_rate": 7.578384020439755e-06, "loss": 0.7507, "step": 6766 }, { "epoch": 0.3477746942131771, "grad_norm": 0.7046155333518982, "learning_rate": 7.577670923430565e-06, "loss": 0.6908, "step": 6767 }, { "epoch": 0.34782608695652173, "grad_norm": 1.08762526512146, "learning_rate": 7.57695775500475e-06, "loss": 0.7944, "step": 6768 }, { "epoch": 0.3478774796998664, "grad_norm": 1.0283652544021606, "learning_rate": 7.576244515182071e-06, "loss": 0.7143, "step": 6769 }, { "epoch": 0.347928872443211, "grad_norm": 1.0499347448349, "learning_rate": 7.575531203982287e-06, "loss": 0.791, "step": 6770 }, { "epoch": 0.34798026518655567, "grad_norm": 1.0651031732559204, "learning_rate": 7.574817821425162e-06, "loss": 0.7428, "step": 6771 }, { "epoch": 0.3480316579299003, "grad_norm": 0.7700686454772949, "learning_rate": 7.574104367530461e-06, "loss": 0.6662, "step": 6772 }, { "epoch": 0.34808305067324496, "grad_norm": 0.790373682975769, "learning_rate": 7.573390842317949e-06, "loss": 0.7072, "step": 6773 }, { "epoch": 0.34813444341658956, "grad_norm": 1.0704606771469116, "learning_rate": 7.5726772458073985e-06, "loss": 0.7894, "step": 6774 }, { "epoch": 0.3481858361599342, "grad_norm": 0.762983500957489, "learning_rate": 7.57196357801858e-06, "loss": 0.6327, "step": 6775 }, { "epoch": 0.34823722890327885, "grad_norm": 1.1258478164672852, "learning_rate": 7.5712498389712615e-06, "loss": 0.8141, "step": 6776 }, { "epoch": 0.3482886216466235, "grad_norm": 0.7803865075111389, "learning_rate": 7.570536028685222e-06, "loss": 0.6919, "step": 6777 }, { "epoch": 0.34834001438996814, "grad_norm": 0.7991212606430054, "learning_rate": 7.569822147180237e-06, "loss": 0.716, "step": 6778 }, { "epoch": 0.3483914071333128, "grad_norm": 1.1707431077957153, "learning_rate": 7.569108194476086e-06, "loss": 0.7836, "step": 6779 }, { "epoch": 0.34844279987665744, "grad_norm": 0.7451980113983154, "learning_rate": 7.568394170592548e-06, "loss": 0.6988, "step": 6780 }, { "epoch": 0.34849419262000203, "grad_norm": 1.089350938796997, "learning_rate": 7.567680075549407e-06, "loss": 0.7931, "step": 6781 }, { "epoch": 0.3485455853633467, "grad_norm": 1.1157748699188232, "learning_rate": 7.566965909366447e-06, "loss": 0.82, "step": 6782 }, { "epoch": 0.3485969781066913, "grad_norm": 1.0351017713546753, "learning_rate": 7.566251672063456e-06, "loss": 0.6785, "step": 6783 }, { "epoch": 0.34864837085003597, "grad_norm": 1.056187391281128, "learning_rate": 7.565537363660221e-06, "loss": 0.7413, "step": 6784 }, { "epoch": 0.3486997635933806, "grad_norm": 1.0081517696380615, "learning_rate": 7.564822984176532e-06, "loss": 0.7038, "step": 6785 }, { "epoch": 0.34875115633672527, "grad_norm": 1.1150524616241455, "learning_rate": 7.564108533632184e-06, "loss": 0.7739, "step": 6786 }, { "epoch": 0.3488025490800699, "grad_norm": 1.0703020095825195, "learning_rate": 7.56339401204697e-06, "loss": 0.7164, "step": 6787 }, { "epoch": 0.34885394182341456, "grad_norm": 1.088294506072998, "learning_rate": 7.562679419440685e-06, "loss": 0.7567, "step": 6788 }, { "epoch": 0.34890533456675915, "grad_norm": 0.7753811478614807, "learning_rate": 7.561964755833129e-06, "loss": 0.7296, "step": 6789 }, { "epoch": 0.3489567273101038, "grad_norm": 0.9371304512023926, "learning_rate": 7.561250021244103e-06, "loss": 0.7233, "step": 6790 }, { "epoch": 0.34900812005344845, "grad_norm": 1.0753734111785889, "learning_rate": 7.560535215693408e-06, "loss": 0.7127, "step": 6791 }, { "epoch": 0.3490595127967931, "grad_norm": 1.0859993696212769, "learning_rate": 7.5598203392008495e-06, "loss": 0.7527, "step": 6792 }, { "epoch": 0.34911090554013774, "grad_norm": 1.0808069705963135, "learning_rate": 7.559105391786232e-06, "loss": 0.7884, "step": 6793 }, { "epoch": 0.3491622982834824, "grad_norm": 1.0388644933700562, "learning_rate": 7.558390373469366e-06, "loss": 0.779, "step": 6794 }, { "epoch": 0.34921369102682703, "grad_norm": 1.0808041095733643, "learning_rate": 7.5576752842700606e-06, "loss": 0.6875, "step": 6795 }, { "epoch": 0.3492650837701716, "grad_norm": 1.0876045227050781, "learning_rate": 7.556960124208128e-06, "loss": 0.7862, "step": 6796 }, { "epoch": 0.34931647651351627, "grad_norm": 1.0943766832351685, "learning_rate": 7.556244893303382e-06, "loss": 0.7539, "step": 6797 }, { "epoch": 0.3493678692568609, "grad_norm": 1.1543569564819336, "learning_rate": 7.555529591575639e-06, "loss": 0.7622, "step": 6798 }, { "epoch": 0.34941926200020557, "grad_norm": 1.072258710861206, "learning_rate": 7.554814219044718e-06, "loss": 0.7477, "step": 6799 }, { "epoch": 0.3494706547435502, "grad_norm": 1.0761510133743286, "learning_rate": 7.554098775730436e-06, "loss": 0.7509, "step": 6800 }, { "epoch": 0.34952204748689486, "grad_norm": 0.7720167636871338, "learning_rate": 7.55338326165262e-06, "loss": 0.6725, "step": 6801 }, { "epoch": 0.3495734402302395, "grad_norm": 0.7899565696716309, "learning_rate": 7.552667676831089e-06, "loss": 0.7295, "step": 6802 }, { "epoch": 0.34962483297358415, "grad_norm": 1.1246100664138794, "learning_rate": 7.55195202128567e-06, "loss": 0.7161, "step": 6803 }, { "epoch": 0.34967622571692875, "grad_norm": 1.1196826696395874, "learning_rate": 7.551236295036193e-06, "loss": 0.7678, "step": 6804 }, { "epoch": 0.3497276184602734, "grad_norm": 1.1064887046813965, "learning_rate": 7.550520498102487e-06, "loss": 0.8177, "step": 6805 }, { "epoch": 0.34977901120361804, "grad_norm": 0.9994227290153503, "learning_rate": 7.549804630504383e-06, "loss": 0.721, "step": 6806 }, { "epoch": 0.3498304039469627, "grad_norm": 1.091808557510376, "learning_rate": 7.549088692261716e-06, "loss": 0.7833, "step": 6807 }, { "epoch": 0.34988179669030733, "grad_norm": 0.8548765182495117, "learning_rate": 7.548372683394318e-06, "loss": 0.6919, "step": 6808 }, { "epoch": 0.349933189433652, "grad_norm": 1.167075276374817, "learning_rate": 7.5476566039220335e-06, "loss": 0.7844, "step": 6809 }, { "epoch": 0.34998458217699663, "grad_norm": 0.8158947229385376, "learning_rate": 7.546940453864695e-06, "loss": 0.693, "step": 6810 }, { "epoch": 0.3500359749203413, "grad_norm": 1.0715219974517822, "learning_rate": 7.546224233242147e-06, "loss": 0.7108, "step": 6811 }, { "epoch": 0.35008736766368587, "grad_norm": 1.1415905952453613, "learning_rate": 7.545507942074235e-06, "loss": 0.7078, "step": 6812 }, { "epoch": 0.3501387604070305, "grad_norm": 1.0799692869186401, "learning_rate": 7.544791580380801e-06, "loss": 0.7155, "step": 6813 }, { "epoch": 0.35019015315037516, "grad_norm": 1.131688117980957, "learning_rate": 7.544075148181696e-06, "loss": 0.7773, "step": 6814 }, { "epoch": 0.3502415458937198, "grad_norm": 1.1263842582702637, "learning_rate": 7.543358645496766e-06, "loss": 0.7664, "step": 6815 }, { "epoch": 0.35029293863706445, "grad_norm": 1.0170152187347412, "learning_rate": 7.542642072345864e-06, "loss": 0.7277, "step": 6816 }, { "epoch": 0.3503443313804091, "grad_norm": 1.0750617980957031, "learning_rate": 7.541925428748843e-06, "loss": 0.7951, "step": 6817 }, { "epoch": 0.35039572412375375, "grad_norm": 1.1086397171020508, "learning_rate": 7.541208714725558e-06, "loss": 0.7836, "step": 6818 }, { "epoch": 0.35044711686709834, "grad_norm": 1.094606637954712, "learning_rate": 7.540491930295867e-06, "loss": 0.7568, "step": 6819 }, { "epoch": 0.350498509610443, "grad_norm": 1.059920072555542, "learning_rate": 7.5397750754796296e-06, "loss": 0.7445, "step": 6820 }, { "epoch": 0.35054990235378763, "grad_norm": 1.0477545261383057, "learning_rate": 7.539058150296703e-06, "loss": 0.762, "step": 6821 }, { "epoch": 0.3506012950971323, "grad_norm": 1.0866957902908325, "learning_rate": 7.538341154766955e-06, "loss": 0.7379, "step": 6822 }, { "epoch": 0.35065268784047693, "grad_norm": 1.0484524965286255, "learning_rate": 7.537624088910249e-06, "loss": 0.7612, "step": 6823 }, { "epoch": 0.3507040805838216, "grad_norm": 0.8495510220527649, "learning_rate": 7.53690695274645e-06, "loss": 0.7191, "step": 6824 }, { "epoch": 0.3507554733271662, "grad_norm": 1.0750449895858765, "learning_rate": 7.5361897462954305e-06, "loss": 0.8086, "step": 6825 }, { "epoch": 0.35080686607051087, "grad_norm": 0.8097819089889526, "learning_rate": 7.535472469577059e-06, "loss": 0.708, "step": 6826 }, { "epoch": 0.35085825881385546, "grad_norm": 1.0629990100860596, "learning_rate": 7.534755122611208e-06, "loss": 0.7699, "step": 6827 }, { "epoch": 0.3509096515572001, "grad_norm": 0.8553597927093506, "learning_rate": 7.534037705417754e-06, "loss": 0.6331, "step": 6828 }, { "epoch": 0.35096104430054476, "grad_norm": 1.1731806993484497, "learning_rate": 7.533320218016571e-06, "loss": 0.8059, "step": 6829 }, { "epoch": 0.3510124370438894, "grad_norm": 1.2728736400604248, "learning_rate": 7.5326026604275395e-06, "loss": 0.7608, "step": 6830 }, { "epoch": 0.35106382978723405, "grad_norm": 1.064273715019226, "learning_rate": 7.531885032670541e-06, "loss": 0.746, "step": 6831 }, { "epoch": 0.3511152225305787, "grad_norm": 1.0850670337677002, "learning_rate": 7.531167334765455e-06, "loss": 0.7815, "step": 6832 }, { "epoch": 0.35116661527392334, "grad_norm": 1.1325316429138184, "learning_rate": 7.530449566732167e-06, "loss": 0.7685, "step": 6833 }, { "epoch": 0.35121800801726794, "grad_norm": 1.0625728368759155, "learning_rate": 7.529731728590567e-06, "loss": 0.7752, "step": 6834 }, { "epoch": 0.3512694007606126, "grad_norm": 1.1588932275772095, "learning_rate": 7.529013820360538e-06, "loss": 0.7049, "step": 6835 }, { "epoch": 0.35132079350395723, "grad_norm": 1.1938048601150513, "learning_rate": 7.528295842061974e-06, "loss": 0.8379, "step": 6836 }, { "epoch": 0.3513721862473019, "grad_norm": 1.054770827293396, "learning_rate": 7.5275777937147645e-06, "loss": 0.7146, "step": 6837 }, { "epoch": 0.3514235789906465, "grad_norm": 1.1136995553970337, "learning_rate": 7.526859675338807e-06, "loss": 0.7427, "step": 6838 }, { "epoch": 0.35147497173399117, "grad_norm": 1.0492607355117798, "learning_rate": 7.526141486953995e-06, "loss": 0.7809, "step": 6839 }, { "epoch": 0.3515263644773358, "grad_norm": 0.8977307081222534, "learning_rate": 7.525423228580227e-06, "loss": 0.6765, "step": 6840 }, { "epoch": 0.35157775722068046, "grad_norm": 1.0170862674713135, "learning_rate": 7.524704900237403e-06, "loss": 0.6955, "step": 6841 }, { "epoch": 0.35162914996402506, "grad_norm": 1.1323602199554443, "learning_rate": 7.523986501945424e-06, "loss": 0.748, "step": 6842 }, { "epoch": 0.3516805427073697, "grad_norm": 1.1696484088897705, "learning_rate": 7.523268033724196e-06, "loss": 0.7598, "step": 6843 }, { "epoch": 0.35173193545071435, "grad_norm": 1.1741268634796143, "learning_rate": 7.522549495593623e-06, "loss": 0.752, "step": 6844 }, { "epoch": 0.351783328194059, "grad_norm": 1.0321629047393799, "learning_rate": 7.521830887573614e-06, "loss": 0.7262, "step": 6845 }, { "epoch": 0.35183472093740364, "grad_norm": 1.0211818218231201, "learning_rate": 7.521112209684079e-06, "loss": 0.7547, "step": 6846 }, { "epoch": 0.3518861136807483, "grad_norm": 0.7404752373695374, "learning_rate": 7.520393461944926e-06, "loss": 0.6625, "step": 6847 }, { "epoch": 0.35193750642409294, "grad_norm": 1.0592786073684692, "learning_rate": 7.519674644376073e-06, "loss": 0.762, "step": 6848 }, { "epoch": 0.3519888991674376, "grad_norm": 1.0230878591537476, "learning_rate": 7.518955756997435e-06, "loss": 0.6874, "step": 6849 }, { "epoch": 0.3520402919107822, "grad_norm": 1.0642518997192383, "learning_rate": 7.518236799828926e-06, "loss": 0.7295, "step": 6850 }, { "epoch": 0.3520916846541268, "grad_norm": 1.1067473888397217, "learning_rate": 7.517517772890468e-06, "loss": 0.74, "step": 6851 }, { "epoch": 0.35214307739747147, "grad_norm": 1.1237318515777588, "learning_rate": 7.516798676201981e-06, "loss": 0.7882, "step": 6852 }, { "epoch": 0.3521944701408161, "grad_norm": 1.055605411529541, "learning_rate": 7.51607950978339e-06, "loss": 0.7554, "step": 6853 }, { "epoch": 0.35224586288416077, "grad_norm": 1.1078089475631714, "learning_rate": 7.5153602736546195e-06, "loss": 0.7919, "step": 6854 }, { "epoch": 0.3522972556275054, "grad_norm": 1.077776551246643, "learning_rate": 7.514640967835595e-06, "loss": 0.7097, "step": 6855 }, { "epoch": 0.35234864837085006, "grad_norm": 1.0163429975509644, "learning_rate": 7.513921592346247e-06, "loss": 0.7649, "step": 6856 }, { "epoch": 0.35240004111419465, "grad_norm": 1.1352893114089966, "learning_rate": 7.513202147206506e-06, "loss": 0.7401, "step": 6857 }, { "epoch": 0.3524514338575393, "grad_norm": 1.0356788635253906, "learning_rate": 7.512482632436304e-06, "loss": 0.7933, "step": 6858 }, { "epoch": 0.35250282660088395, "grad_norm": 1.1079896688461304, "learning_rate": 7.5117630480555785e-06, "loss": 0.7913, "step": 6859 }, { "epoch": 0.3525542193442286, "grad_norm": 1.0858827829360962, "learning_rate": 7.511043394084263e-06, "loss": 0.7737, "step": 6860 }, { "epoch": 0.35260561208757324, "grad_norm": 1.062917709350586, "learning_rate": 7.510323670542298e-06, "loss": 0.7228, "step": 6861 }, { "epoch": 0.3526570048309179, "grad_norm": 1.0806519985198975, "learning_rate": 7.509603877449624e-06, "loss": 0.7324, "step": 6862 }, { "epoch": 0.35270839757426253, "grad_norm": 1.054922103881836, "learning_rate": 7.508884014826181e-06, "loss": 0.8115, "step": 6863 }, { "epoch": 0.3527597903176072, "grad_norm": 1.006226897239685, "learning_rate": 7.508164082691918e-06, "loss": 0.7916, "step": 6864 }, { "epoch": 0.35281118306095177, "grad_norm": 1.0074583292007446, "learning_rate": 7.507444081066777e-06, "loss": 0.7328, "step": 6865 }, { "epoch": 0.3528625758042964, "grad_norm": 1.0810068845748901, "learning_rate": 7.50672400997071e-06, "loss": 0.7895, "step": 6866 }, { "epoch": 0.35291396854764107, "grad_norm": 1.06901216506958, "learning_rate": 7.506003869423664e-06, "loss": 0.763, "step": 6867 }, { "epoch": 0.3529653612909857, "grad_norm": 1.0450772047042847, "learning_rate": 7.505283659445593e-06, "loss": 0.7704, "step": 6868 }, { "epoch": 0.35301675403433036, "grad_norm": 1.1080470085144043, "learning_rate": 7.5045633800564495e-06, "loss": 0.7856, "step": 6869 }, { "epoch": 0.353068146777675, "grad_norm": 0.8002954125404358, "learning_rate": 7.503843031276192e-06, "loss": 0.6677, "step": 6870 }, { "epoch": 0.35311953952101965, "grad_norm": 1.0990715026855469, "learning_rate": 7.5031226131247755e-06, "loss": 0.7996, "step": 6871 }, { "epoch": 0.35317093226436425, "grad_norm": 0.7224442958831787, "learning_rate": 7.502402125622162e-06, "loss": 0.69, "step": 6872 }, { "epoch": 0.3532223250077089, "grad_norm": 0.7838165760040283, "learning_rate": 7.501681568788313e-06, "loss": 0.7186, "step": 6873 }, { "epoch": 0.35327371775105354, "grad_norm": 0.6799275279045105, "learning_rate": 7.500960942643189e-06, "loss": 0.6654, "step": 6874 }, { "epoch": 0.3533251104943982, "grad_norm": 1.0534428358078003, "learning_rate": 7.5002402472067605e-06, "loss": 0.7461, "step": 6875 }, { "epoch": 0.35337650323774283, "grad_norm": 1.0646260976791382, "learning_rate": 7.499519482498992e-06, "loss": 0.7655, "step": 6876 }, { "epoch": 0.3534278959810875, "grad_norm": 1.1189757585525513, "learning_rate": 7.498798648539853e-06, "loss": 0.8056, "step": 6877 }, { "epoch": 0.35347928872443213, "grad_norm": 0.8376971483230591, "learning_rate": 7.498077745349317e-06, "loss": 0.6922, "step": 6878 }, { "epoch": 0.3535306814677768, "grad_norm": 1.1545957326889038, "learning_rate": 7.497356772947355e-06, "loss": 0.7867, "step": 6879 }, { "epoch": 0.35358207421112137, "grad_norm": 1.0761505365371704, "learning_rate": 7.496635731353942e-06, "loss": 0.806, "step": 6880 }, { "epoch": 0.353633466954466, "grad_norm": 0.7250274419784546, "learning_rate": 7.495914620589056e-06, "loss": 0.6909, "step": 6881 }, { "epoch": 0.35368485969781066, "grad_norm": 1.067765712738037, "learning_rate": 7.495193440672676e-06, "loss": 0.767, "step": 6882 }, { "epoch": 0.3537362524411553, "grad_norm": 1.0351186990737915, "learning_rate": 7.494472191624783e-06, "loss": 0.7499, "step": 6883 }, { "epoch": 0.35378764518449995, "grad_norm": 0.761271595954895, "learning_rate": 7.49375087346536e-06, "loss": 0.6923, "step": 6884 }, { "epoch": 0.3538390379278446, "grad_norm": 1.1362197399139404, "learning_rate": 7.49302948621439e-06, "loss": 0.7729, "step": 6885 }, { "epoch": 0.35389043067118925, "grad_norm": 1.0826493501663208, "learning_rate": 7.492308029891863e-06, "loss": 0.7605, "step": 6886 }, { "epoch": 0.35394182341453384, "grad_norm": 1.075717806816101, "learning_rate": 7.491586504517765e-06, "loss": 0.7046, "step": 6887 }, { "epoch": 0.3539932161578785, "grad_norm": 1.0867955684661865, "learning_rate": 7.490864910112086e-06, "loss": 0.7322, "step": 6888 }, { "epoch": 0.35404460890122313, "grad_norm": 1.0871459245681763, "learning_rate": 7.490143246694821e-06, "loss": 0.7619, "step": 6889 }, { "epoch": 0.3540960016445678, "grad_norm": 1.0426596403121948, "learning_rate": 7.4894215142859614e-06, "loss": 0.711, "step": 6890 }, { "epoch": 0.35414739438791243, "grad_norm": 2.9700584411621094, "learning_rate": 7.488699712905506e-06, "loss": 0.7517, "step": 6891 }, { "epoch": 0.3541987871312571, "grad_norm": 1.1073030233383179, "learning_rate": 7.487977842573453e-06, "loss": 0.769, "step": 6892 }, { "epoch": 0.3542501798746017, "grad_norm": 1.1013861894607544, "learning_rate": 7.487255903309798e-06, "loss": 0.7637, "step": 6893 }, { "epoch": 0.35430157261794637, "grad_norm": 1.091839075088501, "learning_rate": 7.486533895134549e-06, "loss": 0.7636, "step": 6894 }, { "epoch": 0.35435296536129096, "grad_norm": 0.7659119367599487, "learning_rate": 7.485811818067705e-06, "loss": 0.6472, "step": 6895 }, { "epoch": 0.3544043581046356, "grad_norm": 1.0987520217895508, "learning_rate": 7.485089672129275e-06, "loss": 0.7292, "step": 6896 }, { "epoch": 0.35445575084798026, "grad_norm": 1.033129096031189, "learning_rate": 7.484367457339265e-06, "loss": 0.7171, "step": 6897 }, { "epoch": 0.3545071435913249, "grad_norm": 1.0865167379379272, "learning_rate": 7.483645173717686e-06, "loss": 0.6974, "step": 6898 }, { "epoch": 0.35455853633466955, "grad_norm": 0.7539694905281067, "learning_rate": 7.4829228212845485e-06, "loss": 0.6827, "step": 6899 }, { "epoch": 0.3546099290780142, "grad_norm": 0.6504347324371338, "learning_rate": 7.482200400059867e-06, "loss": 0.6765, "step": 6900 }, { "epoch": 0.35466132182135884, "grad_norm": 1.068043828010559, "learning_rate": 7.481477910063654e-06, "loss": 0.7365, "step": 6901 }, { "epoch": 0.3547127145647035, "grad_norm": 1.0536192655563354, "learning_rate": 7.480755351315929e-06, "loss": 0.7438, "step": 6902 }, { "epoch": 0.3547641073080481, "grad_norm": 6.880355358123779, "learning_rate": 7.4800327238367125e-06, "loss": 0.7662, "step": 6903 }, { "epoch": 0.35481550005139273, "grad_norm": 1.1039276123046875, "learning_rate": 7.479310027646021e-06, "loss": 0.7473, "step": 6904 }, { "epoch": 0.3548668927947374, "grad_norm": 0.826196014881134, "learning_rate": 7.47858726276388e-06, "loss": 0.6588, "step": 6905 }, { "epoch": 0.354918285538082, "grad_norm": 1.102452039718628, "learning_rate": 7.477864429210315e-06, "loss": 0.8111, "step": 6906 }, { "epoch": 0.35496967828142667, "grad_norm": 1.0219717025756836, "learning_rate": 7.477141527005354e-06, "loss": 0.769, "step": 6907 }, { "epoch": 0.3550210710247713, "grad_norm": 1.101218819618225, "learning_rate": 7.476418556169023e-06, "loss": 0.7274, "step": 6908 }, { "epoch": 0.35507246376811596, "grad_norm": 1.11151123046875, "learning_rate": 7.475695516721353e-06, "loss": 0.7682, "step": 6909 }, { "epoch": 0.35512385651146056, "grad_norm": 1.0312201976776123, "learning_rate": 7.474972408682377e-06, "loss": 0.7367, "step": 6910 }, { "epoch": 0.3551752492548052, "grad_norm": 1.1744149923324585, "learning_rate": 7.47424923207213e-06, "loss": 0.7564, "step": 6911 }, { "epoch": 0.35522664199814985, "grad_norm": 1.1267353296279907, "learning_rate": 7.473525986910646e-06, "loss": 0.7502, "step": 6912 }, { "epoch": 0.3552780347414945, "grad_norm": 1.1210782527923584, "learning_rate": 7.472802673217965e-06, "loss": 0.7855, "step": 6913 }, { "epoch": 0.35532942748483914, "grad_norm": 0.8535274863243103, "learning_rate": 7.472079291014127e-06, "loss": 0.6712, "step": 6914 }, { "epoch": 0.3553808202281838, "grad_norm": 1.039354681968689, "learning_rate": 7.471355840319172e-06, "loss": 0.6666, "step": 6915 }, { "epoch": 0.35543221297152844, "grad_norm": 1.2426090240478516, "learning_rate": 7.470632321153148e-06, "loss": 0.809, "step": 6916 }, { "epoch": 0.3554836057148731, "grad_norm": 1.0780504941940308, "learning_rate": 7.469908733536095e-06, "loss": 0.749, "step": 6917 }, { "epoch": 0.3555349984582177, "grad_norm": 1.0589290857315063, "learning_rate": 7.469185077488066e-06, "loss": 0.7853, "step": 6918 }, { "epoch": 0.3555863912015623, "grad_norm": 1.063048243522644, "learning_rate": 7.468461353029109e-06, "loss": 0.7845, "step": 6919 }, { "epoch": 0.35563778394490697, "grad_norm": 1.0464750528335571, "learning_rate": 7.4677375601792715e-06, "loss": 0.7766, "step": 6920 }, { "epoch": 0.3556891766882516, "grad_norm": 1.0719619989395142, "learning_rate": 7.467013698958613e-06, "loss": 0.7545, "step": 6921 }, { "epoch": 0.35574056943159627, "grad_norm": 1.0718047618865967, "learning_rate": 7.466289769387183e-06, "loss": 0.7719, "step": 6922 }, { "epoch": 0.3557919621749409, "grad_norm": 0.8977873921394348, "learning_rate": 7.465565771485044e-06, "loss": 0.6847, "step": 6923 }, { "epoch": 0.35584335491828556, "grad_norm": 0.7830643653869629, "learning_rate": 7.464841705272251e-06, "loss": 0.6772, "step": 6924 }, { "epoch": 0.35589474766163015, "grad_norm": 0.825669527053833, "learning_rate": 7.464117570768865e-06, "loss": 0.6989, "step": 6925 }, { "epoch": 0.3559461404049748, "grad_norm": 1.020740032196045, "learning_rate": 7.463393367994951e-06, "loss": 0.7576, "step": 6926 }, { "epoch": 0.35599753314831944, "grad_norm": 0.7267536520957947, "learning_rate": 7.462669096970573e-06, "loss": 0.6765, "step": 6927 }, { "epoch": 0.3560489258916641, "grad_norm": 1.1210054159164429, "learning_rate": 7.4619447577157955e-06, "loss": 0.8293, "step": 6928 }, { "epoch": 0.35610031863500874, "grad_norm": 1.0775084495544434, "learning_rate": 7.4612203502506906e-06, "loss": 0.7804, "step": 6929 }, { "epoch": 0.3561517113783534, "grad_norm": 1.1095895767211914, "learning_rate": 7.460495874595325e-06, "loss": 0.7378, "step": 6930 }, { "epoch": 0.35620310412169803, "grad_norm": 0.8248242139816284, "learning_rate": 7.4597713307697735e-06, "loss": 0.7269, "step": 6931 }, { "epoch": 0.3562544968650427, "grad_norm": 1.0864014625549316, "learning_rate": 7.45904671879411e-06, "loss": 0.7733, "step": 6932 }, { "epoch": 0.35630588960838727, "grad_norm": 1.0493333339691162, "learning_rate": 7.458322038688408e-06, "loss": 0.6973, "step": 6933 }, { "epoch": 0.3563572823517319, "grad_norm": 1.1172261238098145, "learning_rate": 7.457597290472749e-06, "loss": 0.8583, "step": 6934 }, { "epoch": 0.35640867509507657, "grad_norm": 0.7226443886756897, "learning_rate": 7.456872474167211e-06, "loss": 0.6483, "step": 6935 }, { "epoch": 0.3564600678384212, "grad_norm": 1.0405539274215698, "learning_rate": 7.4561475897918735e-06, "loss": 0.8193, "step": 6936 }, { "epoch": 0.35651146058176586, "grad_norm": 1.048471450805664, "learning_rate": 7.455422637366823e-06, "loss": 0.761, "step": 6937 }, { "epoch": 0.3565628533251105, "grad_norm": 1.0708593130111694, "learning_rate": 7.454697616912146e-06, "loss": 0.7825, "step": 6938 }, { "epoch": 0.35661424606845515, "grad_norm": 1.019115686416626, "learning_rate": 7.453972528447926e-06, "loss": 0.7776, "step": 6939 }, { "epoch": 0.3566656388117998, "grad_norm": 0.804804265499115, "learning_rate": 7.453247371994256e-06, "loss": 0.7317, "step": 6940 }, { "epoch": 0.3567170315551444, "grad_norm": 1.0177278518676758, "learning_rate": 7.452522147571224e-06, "loss": 0.7484, "step": 6941 }, { "epoch": 0.35676842429848904, "grad_norm": 1.0316647291183472, "learning_rate": 7.451796855198925e-06, "loss": 0.6673, "step": 6942 }, { "epoch": 0.3568198170418337, "grad_norm": 1.0306365489959717, "learning_rate": 7.451071494897452e-06, "loss": 0.7097, "step": 6943 }, { "epoch": 0.35687120978517833, "grad_norm": 0.9948005080223083, "learning_rate": 7.4503460666869036e-06, "loss": 0.7002, "step": 6944 }, { "epoch": 0.356922602528523, "grad_norm": 1.0587838888168335, "learning_rate": 7.449620570587377e-06, "loss": 0.7608, "step": 6945 }, { "epoch": 0.35697399527186763, "grad_norm": 0.7872125506401062, "learning_rate": 7.448895006618973e-06, "loss": 0.6574, "step": 6946 }, { "epoch": 0.3570253880152123, "grad_norm": 1.0763453245162964, "learning_rate": 7.448169374801796e-06, "loss": 0.7341, "step": 6947 }, { "epoch": 0.35707678075855687, "grad_norm": 1.0607833862304688, "learning_rate": 7.4474436751559474e-06, "loss": 0.7192, "step": 6948 }, { "epoch": 0.3571281735019015, "grad_norm": 0.8524363040924072, "learning_rate": 7.446717907701535e-06, "loss": 0.6967, "step": 6949 }, { "epoch": 0.35717956624524616, "grad_norm": 1.1366530656814575, "learning_rate": 7.445992072458666e-06, "loss": 0.7357, "step": 6950 }, { "epoch": 0.3572309589885908, "grad_norm": 1.0873734951019287, "learning_rate": 7.445266169447453e-06, "loss": 0.6983, "step": 6951 }, { "epoch": 0.35728235173193545, "grad_norm": 0.8307279944419861, "learning_rate": 7.444540198688002e-06, "loss": 0.7042, "step": 6952 }, { "epoch": 0.3573337444752801, "grad_norm": 1.1543588638305664, "learning_rate": 7.443814160200432e-06, "loss": 0.7474, "step": 6953 }, { "epoch": 0.35738513721862475, "grad_norm": 1.0813277959823608, "learning_rate": 7.443088054004857e-06, "loss": 0.7472, "step": 6954 }, { "epoch": 0.3574365299619694, "grad_norm": 1.0217931270599365, "learning_rate": 7.442361880121392e-06, "loss": 0.756, "step": 6955 }, { "epoch": 0.357487922705314, "grad_norm": 0.9164536595344543, "learning_rate": 7.441635638570161e-06, "loss": 0.6586, "step": 6956 }, { "epoch": 0.35753931544865863, "grad_norm": 1.15981924533844, "learning_rate": 7.44090932937128e-06, "loss": 0.7406, "step": 6957 }, { "epoch": 0.3575907081920033, "grad_norm": 1.0778522491455078, "learning_rate": 7.440182952544876e-06, "loss": 0.775, "step": 6958 }, { "epoch": 0.35764210093534793, "grad_norm": 0.6957578063011169, "learning_rate": 7.439456508111072e-06, "loss": 0.6491, "step": 6959 }, { "epoch": 0.3576934936786926, "grad_norm": 0.6759206056594849, "learning_rate": 7.438729996089995e-06, "loss": 0.6619, "step": 6960 }, { "epoch": 0.3577448864220372, "grad_norm": 0.7313870191574097, "learning_rate": 7.438003416501774e-06, "loss": 0.7205, "step": 6961 }, { "epoch": 0.35779627916538187, "grad_norm": 1.0625187158584595, "learning_rate": 7.437276769366539e-06, "loss": 0.7365, "step": 6962 }, { "epoch": 0.35784767190872646, "grad_norm": 1.1405127048492432, "learning_rate": 7.436550054704424e-06, "loss": 0.7685, "step": 6963 }, { "epoch": 0.3578990646520711, "grad_norm": 1.028132677078247, "learning_rate": 7.435823272535563e-06, "loss": 0.7423, "step": 6964 }, { "epoch": 0.35795045739541576, "grad_norm": 0.8267134428024292, "learning_rate": 7.4350964228800885e-06, "loss": 0.6942, "step": 6965 }, { "epoch": 0.3580018501387604, "grad_norm": 1.0466917753219604, "learning_rate": 7.434369505758141e-06, "loss": 0.7523, "step": 6966 }, { "epoch": 0.35805324288210505, "grad_norm": 1.0389124155044556, "learning_rate": 7.433642521189863e-06, "loss": 0.7635, "step": 6967 }, { "epoch": 0.3581046356254497, "grad_norm": 1.077820062637329, "learning_rate": 7.4329154691953916e-06, "loss": 0.7773, "step": 6968 }, { "epoch": 0.35815602836879434, "grad_norm": 1.1435121297836304, "learning_rate": 7.432188349794874e-06, "loss": 0.7616, "step": 6969 }, { "epoch": 0.358207421112139, "grad_norm": 1.0581074953079224, "learning_rate": 7.431461163008453e-06, "loss": 0.7587, "step": 6970 }, { "epoch": 0.3582588138554836, "grad_norm": 1.0587475299835205, "learning_rate": 7.430733908856279e-06, "loss": 0.7599, "step": 6971 }, { "epoch": 0.35831020659882823, "grad_norm": 1.0356876850128174, "learning_rate": 7.4300065873584985e-06, "loss": 0.734, "step": 6972 }, { "epoch": 0.3583615993421729, "grad_norm": 1.041116714477539, "learning_rate": 7.429279198535263e-06, "loss": 0.7672, "step": 6973 }, { "epoch": 0.3584129920855175, "grad_norm": 1.041608214378357, "learning_rate": 7.4285517424067266e-06, "loss": 0.748, "step": 6974 }, { "epoch": 0.35846438482886217, "grad_norm": 1.0443669557571411, "learning_rate": 7.4278242189930435e-06, "loss": 0.7727, "step": 6975 }, { "epoch": 0.3585157775722068, "grad_norm": 0.8906344175338745, "learning_rate": 7.42709662831437e-06, "loss": 0.6738, "step": 6976 }, { "epoch": 0.35856717031555146, "grad_norm": 0.8813257217407227, "learning_rate": 7.426368970390865e-06, "loss": 0.6431, "step": 6977 }, { "epoch": 0.3586185630588961, "grad_norm": 1.0723553895950317, "learning_rate": 7.425641245242689e-06, "loss": 0.7724, "step": 6978 }, { "epoch": 0.3586699558022407, "grad_norm": 0.9958428144454956, "learning_rate": 7.424913452890004e-06, "loss": 0.7787, "step": 6979 }, { "epoch": 0.35872134854558535, "grad_norm": 1.1119393110275269, "learning_rate": 7.424185593352975e-06, "loss": 0.8162, "step": 6980 }, { "epoch": 0.35877274128893, "grad_norm": 1.0245420932769775, "learning_rate": 7.423457666651765e-06, "loss": 0.7036, "step": 6981 }, { "epoch": 0.35882413403227464, "grad_norm": 0.8802167773246765, "learning_rate": 7.422729672806547e-06, "loss": 0.7385, "step": 6982 }, { "epoch": 0.3588755267756193, "grad_norm": 0.800028920173645, "learning_rate": 7.422001611837486e-06, "loss": 0.6764, "step": 6983 }, { "epoch": 0.35892691951896394, "grad_norm": 0.7771615982055664, "learning_rate": 7.421273483764755e-06, "loss": 0.6804, "step": 6984 }, { "epoch": 0.3589783122623086, "grad_norm": 1.0924482345581055, "learning_rate": 7.420545288608529e-06, "loss": 0.748, "step": 6985 }, { "epoch": 0.3590297050056532, "grad_norm": 1.154711365699768, "learning_rate": 7.4198170263889815e-06, "loss": 0.7546, "step": 6986 }, { "epoch": 0.3590810977489978, "grad_norm": 1.0513806343078613, "learning_rate": 7.41908869712629e-06, "loss": 0.7679, "step": 6987 }, { "epoch": 0.35913249049234247, "grad_norm": 1.0149319171905518, "learning_rate": 7.418360300840635e-06, "loss": 0.7259, "step": 6988 }, { "epoch": 0.3591838832356871, "grad_norm": 1.0402402877807617, "learning_rate": 7.417631837552194e-06, "loss": 0.7308, "step": 6989 }, { "epoch": 0.35923527597903177, "grad_norm": 1.1022813320159912, "learning_rate": 7.416903307281153e-06, "loss": 0.8066, "step": 6990 }, { "epoch": 0.3592866687223764, "grad_norm": 0.9306803345680237, "learning_rate": 7.416174710047696e-06, "loss": 0.6944, "step": 6991 }, { "epoch": 0.35933806146572106, "grad_norm": 1.0914621353149414, "learning_rate": 7.415446045872007e-06, "loss": 0.8124, "step": 6992 }, { "epoch": 0.3593894542090657, "grad_norm": 1.050011396408081, "learning_rate": 7.4147173147742765e-06, "loss": 0.737, "step": 6993 }, { "epoch": 0.3594408469524103, "grad_norm": 0.7137007713317871, "learning_rate": 7.413988516774695e-06, "loss": 0.7059, "step": 6994 }, { "epoch": 0.35949223969575494, "grad_norm": 1.11740243434906, "learning_rate": 7.413259651893453e-06, "loss": 0.7153, "step": 6995 }, { "epoch": 0.3595436324390996, "grad_norm": 0.7023674845695496, "learning_rate": 7.412530720150746e-06, "loss": 0.6804, "step": 6996 }, { "epoch": 0.35959502518244424, "grad_norm": 1.0877118110656738, "learning_rate": 7.411801721566767e-06, "loss": 0.7757, "step": 6997 }, { "epoch": 0.3596464179257889, "grad_norm": 0.738810658454895, "learning_rate": 7.4110726561617155e-06, "loss": 0.6329, "step": 6998 }, { "epoch": 0.35969781066913353, "grad_norm": 1.0508731603622437, "learning_rate": 7.410343523955791e-06, "loss": 0.7896, "step": 6999 }, { "epoch": 0.3597492034124782, "grad_norm": 1.0721107721328735, "learning_rate": 7.409614324969195e-06, "loss": 0.7313, "step": 7000 }, { "epoch": 0.35980059615582277, "grad_norm": 1.0753889083862305, "learning_rate": 7.40888505922213e-06, "loss": 0.7532, "step": 7001 }, { "epoch": 0.3598519888991674, "grad_norm": 1.0790959596633911, "learning_rate": 7.4081557267348e-06, "loss": 0.757, "step": 7002 }, { "epoch": 0.35990338164251207, "grad_norm": 1.073265552520752, "learning_rate": 7.407426327527413e-06, "loss": 0.7599, "step": 7003 }, { "epoch": 0.3599547743858567, "grad_norm": 1.1742689609527588, "learning_rate": 7.406696861620177e-06, "loss": 0.7199, "step": 7004 }, { "epoch": 0.36000616712920136, "grad_norm": 1.0453686714172363, "learning_rate": 7.405967329033303e-06, "loss": 0.769, "step": 7005 }, { "epoch": 0.360057559872546, "grad_norm": 0.7631792426109314, "learning_rate": 7.4052377297870035e-06, "loss": 0.6846, "step": 7006 }, { "epoch": 0.36010895261589065, "grad_norm": 1.1031794548034668, "learning_rate": 7.404508063901492e-06, "loss": 0.7817, "step": 7007 }, { "epoch": 0.3601603453592353, "grad_norm": 1.057044506072998, "learning_rate": 7.4037783313969845e-06, "loss": 0.7784, "step": 7008 }, { "epoch": 0.3602117381025799, "grad_norm": 1.1388499736785889, "learning_rate": 7.4030485322937e-06, "loss": 0.7329, "step": 7009 }, { "epoch": 0.36026313084592454, "grad_norm": 1.1187939643859863, "learning_rate": 7.402318666611856e-06, "loss": 0.8233, "step": 7010 }, { "epoch": 0.3603145235892692, "grad_norm": 1.1181526184082031, "learning_rate": 7.4015887343716785e-06, "loss": 0.7481, "step": 7011 }, { "epoch": 0.36036591633261383, "grad_norm": 1.0606495141983032, "learning_rate": 7.400858735593386e-06, "loss": 0.7506, "step": 7012 }, { "epoch": 0.3604173090759585, "grad_norm": 1.084176778793335, "learning_rate": 7.400128670297205e-06, "loss": 0.7081, "step": 7013 }, { "epoch": 0.36046870181930313, "grad_norm": 1.1095635890960693, "learning_rate": 7.399398538503365e-06, "loss": 0.8121, "step": 7014 }, { "epoch": 0.3605200945626478, "grad_norm": 0.6976984143257141, "learning_rate": 7.398668340232091e-06, "loss": 0.661, "step": 7015 }, { "epoch": 0.36057148730599237, "grad_norm": 1.0054301023483276, "learning_rate": 7.397938075503619e-06, "loss": 0.7356, "step": 7016 }, { "epoch": 0.360622880049337, "grad_norm": 1.0758758783340454, "learning_rate": 7.3972077443381764e-06, "loss": 0.7667, "step": 7017 }, { "epoch": 0.36067427279268166, "grad_norm": 1.063114047050476, "learning_rate": 7.3964773467560015e-06, "loss": 0.7385, "step": 7018 }, { "epoch": 0.3607256655360263, "grad_norm": 1.0553628206253052, "learning_rate": 7.395746882777327e-06, "loss": 0.7564, "step": 7019 }, { "epoch": 0.36077705827937095, "grad_norm": 1.050002932548523, "learning_rate": 7.395016352422395e-06, "loss": 0.7238, "step": 7020 }, { "epoch": 0.3608284510227156, "grad_norm": 1.0819106101989746, "learning_rate": 7.394285755711441e-06, "loss": 0.7468, "step": 7021 }, { "epoch": 0.36087984376606025, "grad_norm": 0.8149110674858093, "learning_rate": 7.3935550926647125e-06, "loss": 0.6886, "step": 7022 }, { "epoch": 0.3609312365094049, "grad_norm": 1.0606942176818848, "learning_rate": 7.392824363302448e-06, "loss": 0.7653, "step": 7023 }, { "epoch": 0.3609826292527495, "grad_norm": 1.1929858922958374, "learning_rate": 7.392093567644896e-06, "loss": 0.7099, "step": 7024 }, { "epoch": 0.36103402199609413, "grad_norm": 1.099770188331604, "learning_rate": 7.391362705712302e-06, "loss": 0.7854, "step": 7025 }, { "epoch": 0.3610854147394388, "grad_norm": 1.014477014541626, "learning_rate": 7.390631777524916e-06, "loss": 0.7692, "step": 7026 }, { "epoch": 0.36113680748278343, "grad_norm": 1.177830696105957, "learning_rate": 7.38990078310299e-06, "loss": 0.7757, "step": 7027 }, { "epoch": 0.3611882002261281, "grad_norm": 1.0108778476715088, "learning_rate": 7.389169722466773e-06, "loss": 0.7667, "step": 7028 }, { "epoch": 0.3612395929694727, "grad_norm": 1.0059643983840942, "learning_rate": 7.388438595636525e-06, "loss": 0.7173, "step": 7029 }, { "epoch": 0.36129098571281737, "grad_norm": 1.0134066343307495, "learning_rate": 7.3877074026325e-06, "loss": 0.7131, "step": 7030 }, { "epoch": 0.361342378456162, "grad_norm": 0.7488081455230713, "learning_rate": 7.386976143474955e-06, "loss": 0.7106, "step": 7031 }, { "epoch": 0.3613937711995066, "grad_norm": 1.0309405326843262, "learning_rate": 7.386244818184154e-06, "loss": 0.7186, "step": 7032 }, { "epoch": 0.36144516394285126, "grad_norm": 1.044551134109497, "learning_rate": 7.385513426780355e-06, "loss": 0.7383, "step": 7033 }, { "epoch": 0.3614965566861959, "grad_norm": 1.1181553602218628, "learning_rate": 7.384781969283823e-06, "loss": 0.7922, "step": 7034 }, { "epoch": 0.36154794942954055, "grad_norm": 1.1420637369155884, "learning_rate": 7.384050445714825e-06, "loss": 0.7108, "step": 7035 }, { "epoch": 0.3615993421728852, "grad_norm": 1.1736031770706177, "learning_rate": 7.3833188560936275e-06, "loss": 0.7311, "step": 7036 }, { "epoch": 0.36165073491622984, "grad_norm": 1.0539335012435913, "learning_rate": 7.382587200440498e-06, "loss": 0.6899, "step": 7037 }, { "epoch": 0.3617021276595745, "grad_norm": 1.0954970121383667, "learning_rate": 7.381855478775711e-06, "loss": 0.7649, "step": 7038 }, { "epoch": 0.3617535204029191, "grad_norm": 1.039907693862915, "learning_rate": 7.381123691119538e-06, "loss": 0.7344, "step": 7039 }, { "epoch": 0.36180491314626373, "grad_norm": 0.9794743657112122, "learning_rate": 7.3803918374922545e-06, "loss": 0.7264, "step": 7040 }, { "epoch": 0.3618563058896084, "grad_norm": 1.142135739326477, "learning_rate": 7.379659917914136e-06, "loss": 0.7393, "step": 7041 }, { "epoch": 0.361907698632953, "grad_norm": 1.2996269464492798, "learning_rate": 7.378927932405461e-06, "loss": 0.8249, "step": 7042 }, { "epoch": 0.36195909137629767, "grad_norm": 1.0665199756622314, "learning_rate": 7.378195880986511e-06, "loss": 0.7995, "step": 7043 }, { "epoch": 0.3620104841196423, "grad_norm": 1.058315396308899, "learning_rate": 7.377463763677567e-06, "loss": 0.7347, "step": 7044 }, { "epoch": 0.36206187686298696, "grad_norm": 0.9880419373512268, "learning_rate": 7.376731580498912e-06, "loss": 0.7003, "step": 7045 }, { "epoch": 0.3621132696063316, "grad_norm": 1.1635642051696777, "learning_rate": 7.3759993314708355e-06, "loss": 0.8007, "step": 7046 }, { "epoch": 0.3621646623496762, "grad_norm": 1.004340648651123, "learning_rate": 7.375267016613621e-06, "loss": 0.7065, "step": 7047 }, { "epoch": 0.36221605509302085, "grad_norm": 1.037726879119873, "learning_rate": 7.37453463594756e-06, "loss": 0.7536, "step": 7048 }, { "epoch": 0.3622674478363655, "grad_norm": 1.1057242155075073, "learning_rate": 7.373802189492943e-06, "loss": 0.7254, "step": 7049 }, { "epoch": 0.36231884057971014, "grad_norm": 1.1648019552230835, "learning_rate": 7.3730696772700635e-06, "loss": 0.8049, "step": 7050 }, { "epoch": 0.3623702333230548, "grad_norm": 1.0979446172714233, "learning_rate": 7.3723370992992175e-06, "loss": 0.7784, "step": 7051 }, { "epoch": 0.36242162606639944, "grad_norm": 0.8164729475975037, "learning_rate": 7.3716044556007e-06, "loss": 0.6803, "step": 7052 }, { "epoch": 0.3624730188097441, "grad_norm": 1.1668245792388916, "learning_rate": 7.37087174619481e-06, "loss": 0.7875, "step": 7053 }, { "epoch": 0.3625244115530887, "grad_norm": 1.1078276634216309, "learning_rate": 7.370138971101848e-06, "loss": 0.7505, "step": 7054 }, { "epoch": 0.3625758042964333, "grad_norm": 1.0943260192871094, "learning_rate": 7.369406130342115e-06, "loss": 0.7735, "step": 7055 }, { "epoch": 0.36262719703977797, "grad_norm": 1.1172891855239868, "learning_rate": 7.3686732239359174e-06, "loss": 0.8424, "step": 7056 }, { "epoch": 0.3626785897831226, "grad_norm": 0.7857499718666077, "learning_rate": 7.3679402519035595e-06, "loss": 0.6845, "step": 7057 }, { "epoch": 0.36272998252646727, "grad_norm": 1.0472089052200317, "learning_rate": 7.367207214265348e-06, "loss": 0.6895, "step": 7058 }, { "epoch": 0.3627813752698119, "grad_norm": 1.0975582599639893, "learning_rate": 7.366474111041594e-06, "loss": 0.795, "step": 7059 }, { "epoch": 0.36283276801315656, "grad_norm": 0.9648140072822571, "learning_rate": 7.365740942252609e-06, "loss": 0.738, "step": 7060 }, { "epoch": 0.3628841607565012, "grad_norm": 1.0345007181167603, "learning_rate": 7.3650077079187045e-06, "loss": 0.7508, "step": 7061 }, { "epoch": 0.3629355534998458, "grad_norm": 1.1473277807235718, "learning_rate": 7.364274408060197e-06, "loss": 0.7974, "step": 7062 }, { "epoch": 0.36298694624319044, "grad_norm": 1.4830121994018555, "learning_rate": 7.3635410426974015e-06, "loss": 0.7307, "step": 7063 }, { "epoch": 0.3630383389865351, "grad_norm": 1.0932552814483643, "learning_rate": 7.3628076118506386e-06, "loss": 0.7265, "step": 7064 }, { "epoch": 0.36308973172987974, "grad_norm": 1.0681736469268799, "learning_rate": 7.362074115540228e-06, "loss": 0.7573, "step": 7065 }, { "epoch": 0.3631411244732244, "grad_norm": 1.1017327308654785, "learning_rate": 7.361340553786489e-06, "loss": 0.7306, "step": 7066 }, { "epoch": 0.36319251721656903, "grad_norm": 1.062074065208435, "learning_rate": 7.3606069266097504e-06, "loss": 0.7319, "step": 7067 }, { "epoch": 0.3632439099599137, "grad_norm": 1.1176702976226807, "learning_rate": 7.359873234030334e-06, "loss": 0.7683, "step": 7068 }, { "epoch": 0.3632953027032583, "grad_norm": 1.0860753059387207, "learning_rate": 7.359139476068568e-06, "loss": 0.8024, "step": 7069 }, { "epoch": 0.3633466954466029, "grad_norm": 1.0659818649291992, "learning_rate": 7.3584056527447845e-06, "loss": 0.7496, "step": 7070 }, { "epoch": 0.36339808818994757, "grad_norm": 1.4316399097442627, "learning_rate": 7.3576717640793114e-06, "loss": 0.7651, "step": 7071 }, { "epoch": 0.3634494809332922, "grad_norm": 0.9972586035728455, "learning_rate": 7.356937810092486e-06, "loss": 0.7189, "step": 7072 }, { "epoch": 0.36350087367663686, "grad_norm": 0.7550626397132874, "learning_rate": 7.356203790804638e-06, "loss": 0.6586, "step": 7073 }, { "epoch": 0.3635522664199815, "grad_norm": 1.0677191019058228, "learning_rate": 7.355469706236107e-06, "loss": 0.7326, "step": 7074 }, { "epoch": 0.36360365916332615, "grad_norm": 0.7250852584838867, "learning_rate": 7.354735556407231e-06, "loss": 0.6581, "step": 7075 }, { "epoch": 0.3636550519066708, "grad_norm": 1.0729914903640747, "learning_rate": 7.354001341338351e-06, "loss": 0.7754, "step": 7076 }, { "epoch": 0.3637064446500154, "grad_norm": 1.0173065662384033, "learning_rate": 7.353267061049807e-06, "loss": 0.7992, "step": 7077 }, { "epoch": 0.36375783739336004, "grad_norm": 1.1127136945724487, "learning_rate": 7.352532715561944e-06, "loss": 0.7962, "step": 7078 }, { "epoch": 0.3638092301367047, "grad_norm": 1.062265157699585, "learning_rate": 7.351798304895108e-06, "loss": 0.7429, "step": 7079 }, { "epoch": 0.36386062288004933, "grad_norm": 1.0323481559753418, "learning_rate": 7.351063829069647e-06, "loss": 0.7017, "step": 7080 }, { "epoch": 0.363912015623394, "grad_norm": 1.0366523265838623, "learning_rate": 7.350329288105909e-06, "loss": 0.7428, "step": 7081 }, { "epoch": 0.3639634083667386, "grad_norm": 1.0216281414031982, "learning_rate": 7.3495946820242445e-06, "loss": 0.7434, "step": 7082 }, { "epoch": 0.3640148011100833, "grad_norm": 0.732904314994812, "learning_rate": 7.348860010845009e-06, "loss": 0.7228, "step": 7083 }, { "epoch": 0.3640661938534279, "grad_norm": 1.1881695985794067, "learning_rate": 7.348125274588557e-06, "loss": 0.7996, "step": 7084 }, { "epoch": 0.3641175865967725, "grad_norm": 1.0608257055282593, "learning_rate": 7.347390473275242e-06, "loss": 0.759, "step": 7085 }, { "epoch": 0.36416897934011716, "grad_norm": 1.0205358266830444, "learning_rate": 7.346655606925425e-06, "loss": 0.7811, "step": 7086 }, { "epoch": 0.3642203720834618, "grad_norm": 1.0110136270523071, "learning_rate": 7.3459206755594635e-06, "loss": 0.7187, "step": 7087 }, { "epoch": 0.36427176482680645, "grad_norm": 1.0529857873916626, "learning_rate": 7.345185679197723e-06, "loss": 0.7307, "step": 7088 }, { "epoch": 0.3643231575701511, "grad_norm": 1.040067195892334, "learning_rate": 7.344450617860563e-06, "loss": 0.752, "step": 7089 }, { "epoch": 0.36437455031349575, "grad_norm": 0.7865608334541321, "learning_rate": 7.343715491568353e-06, "loss": 0.6868, "step": 7090 }, { "epoch": 0.3644259430568404, "grad_norm": 1.2349356412887573, "learning_rate": 7.34298030034146e-06, "loss": 0.7716, "step": 7091 }, { "epoch": 0.364477335800185, "grad_norm": 1.035965085029602, "learning_rate": 7.3422450442002506e-06, "loss": 0.7697, "step": 7092 }, { "epoch": 0.36452872854352963, "grad_norm": 1.0702332258224487, "learning_rate": 7.341509723165096e-06, "loss": 0.7343, "step": 7093 }, { "epoch": 0.3645801212868743, "grad_norm": 1.0724430084228516, "learning_rate": 7.340774337256371e-06, "loss": 0.8056, "step": 7094 }, { "epoch": 0.36463151403021893, "grad_norm": 1.1434651613235474, "learning_rate": 7.340038886494447e-06, "loss": 0.8159, "step": 7095 }, { "epoch": 0.3646829067735636, "grad_norm": 1.179262399673462, "learning_rate": 7.339303370899705e-06, "loss": 0.837, "step": 7096 }, { "epoch": 0.3647342995169082, "grad_norm": 7.245774745941162, "learning_rate": 7.338567790492519e-06, "loss": 0.8884, "step": 7097 }, { "epoch": 0.36478569226025287, "grad_norm": 1.1961015462875366, "learning_rate": 7.33783214529327e-06, "loss": 0.7691, "step": 7098 }, { "epoch": 0.3648370850035975, "grad_norm": 1.198743462562561, "learning_rate": 7.33709643532234e-06, "loss": 0.7944, "step": 7099 }, { "epoch": 0.3648884777469421, "grad_norm": 1.1719127893447876, "learning_rate": 7.3363606606001125e-06, "loss": 0.7577, "step": 7100 }, { "epoch": 0.36493987049028676, "grad_norm": 0.7972054481506348, "learning_rate": 7.335624821146973e-06, "loss": 0.6822, "step": 7101 }, { "epoch": 0.3649912632336314, "grad_norm": 0.8277395963668823, "learning_rate": 7.3348889169833086e-06, "loss": 0.7072, "step": 7102 }, { "epoch": 0.36504265597697605, "grad_norm": 1.1211965084075928, "learning_rate": 7.334152948129506e-06, "loss": 0.7973, "step": 7103 }, { "epoch": 0.3650940487203207, "grad_norm": 1.1106305122375488, "learning_rate": 7.33341691460596e-06, "loss": 0.7658, "step": 7104 }, { "epoch": 0.36514544146366534, "grad_norm": 1.1830538511276245, "learning_rate": 7.33268081643306e-06, "loss": 0.7867, "step": 7105 }, { "epoch": 0.36519683420701, "grad_norm": 1.0430490970611572, "learning_rate": 7.3319446536312e-06, "loss": 0.7782, "step": 7106 }, { "epoch": 0.36524822695035464, "grad_norm": 1.0280683040618896, "learning_rate": 7.3312084262207775e-06, "loss": 0.786, "step": 7107 }, { "epoch": 0.36529961969369923, "grad_norm": 1.128024697303772, "learning_rate": 7.330472134222191e-06, "loss": 0.7507, "step": 7108 }, { "epoch": 0.3653510124370439, "grad_norm": 0.8541820645332336, "learning_rate": 7.329735777655837e-06, "loss": 0.6279, "step": 7109 }, { "epoch": 0.3654024051803885, "grad_norm": 1.0252243280410767, "learning_rate": 7.328999356542119e-06, "loss": 0.7712, "step": 7110 }, { "epoch": 0.36545379792373317, "grad_norm": 1.1370036602020264, "learning_rate": 7.328262870901441e-06, "loss": 0.7506, "step": 7111 }, { "epoch": 0.3655051906670778, "grad_norm": 1.098652958869934, "learning_rate": 7.327526320754207e-06, "loss": 0.7954, "step": 7112 }, { "epoch": 0.36555658341042246, "grad_norm": 1.1582125425338745, "learning_rate": 7.326789706120824e-06, "loss": 0.7442, "step": 7113 }, { "epoch": 0.3656079761537671, "grad_norm": 1.023566722869873, "learning_rate": 7.3260530270217e-06, "loss": 0.7589, "step": 7114 }, { "epoch": 0.3656593688971117, "grad_norm": 1.0819870233535767, "learning_rate": 7.325316283477246e-06, "loss": 0.7947, "step": 7115 }, { "epoch": 0.36571076164045635, "grad_norm": 0.8672119975090027, "learning_rate": 7.324579475507873e-06, "loss": 0.708, "step": 7116 }, { "epoch": 0.365762154383801, "grad_norm": 1.1197950839996338, "learning_rate": 7.323842603133996e-06, "loss": 0.7675, "step": 7117 }, { "epoch": 0.36581354712714564, "grad_norm": 1.104225516319275, "learning_rate": 7.323105666376031e-06, "loss": 0.7948, "step": 7118 }, { "epoch": 0.3658649398704903, "grad_norm": 1.0510808229446411, "learning_rate": 7.322368665254394e-06, "loss": 0.698, "step": 7119 }, { "epoch": 0.36591633261383494, "grad_norm": 1.1101880073547363, "learning_rate": 7.321631599789506e-06, "loss": 0.7041, "step": 7120 }, { "epoch": 0.3659677253571796, "grad_norm": 0.7290747761726379, "learning_rate": 7.320894470001787e-06, "loss": 0.7158, "step": 7121 }, { "epoch": 0.36601911810052423, "grad_norm": 1.1536113023757935, "learning_rate": 7.320157275911659e-06, "loss": 0.7722, "step": 7122 }, { "epoch": 0.3660705108438688, "grad_norm": 1.1032837629318237, "learning_rate": 7.319420017539549e-06, "loss": 0.6992, "step": 7123 }, { "epoch": 0.36612190358721347, "grad_norm": 1.1017006635665894, "learning_rate": 7.318682694905881e-06, "loss": 0.794, "step": 7124 }, { "epoch": 0.3661732963305581, "grad_norm": 1.120020866394043, "learning_rate": 7.317945308031085e-06, "loss": 0.7736, "step": 7125 }, { "epoch": 0.36622468907390276, "grad_norm": 1.0796051025390625, "learning_rate": 7.31720785693559e-06, "loss": 0.7875, "step": 7126 }, { "epoch": 0.3662760818172474, "grad_norm": 1.0570719242095947, "learning_rate": 7.316470341639827e-06, "loss": 0.762, "step": 7127 }, { "epoch": 0.36632747456059206, "grad_norm": 1.3974111080169678, "learning_rate": 7.3157327621642305e-06, "loss": 0.7371, "step": 7128 }, { "epoch": 0.3663788673039367, "grad_norm": 1.145666480064392, "learning_rate": 7.314995118529237e-06, "loss": 0.8007, "step": 7129 }, { "epoch": 0.3664302600472813, "grad_norm": 0.6940429210662842, "learning_rate": 7.31425741075528e-06, "loss": 0.6541, "step": 7130 }, { "epoch": 0.36648165279062594, "grad_norm": 0.7124250531196594, "learning_rate": 7.313519638862802e-06, "loss": 0.6804, "step": 7131 }, { "epoch": 0.3665330455339706, "grad_norm": 1.100724458694458, "learning_rate": 7.312781802872241e-06, "loss": 0.7615, "step": 7132 }, { "epoch": 0.36658443827731524, "grad_norm": 1.1048794984817505, "learning_rate": 7.312043902804042e-06, "loss": 0.7817, "step": 7133 }, { "epoch": 0.3666358310206599, "grad_norm": 1.1387883424758911, "learning_rate": 7.311305938678647e-06, "loss": 0.7934, "step": 7134 }, { "epoch": 0.36668722376400453, "grad_norm": 1.0954293012619019, "learning_rate": 7.310567910516502e-06, "loss": 0.7611, "step": 7135 }, { "epoch": 0.3667386165073492, "grad_norm": 1.072045087814331, "learning_rate": 7.309829818338057e-06, "loss": 0.7247, "step": 7136 }, { "epoch": 0.3667900092506938, "grad_norm": 1.1318070888519287, "learning_rate": 7.309091662163759e-06, "loss": 0.7754, "step": 7137 }, { "epoch": 0.3668414019940384, "grad_norm": 1.1188750267028809, "learning_rate": 7.308353442014059e-06, "loss": 0.7501, "step": 7138 }, { "epoch": 0.36689279473738307, "grad_norm": 1.0617001056671143, "learning_rate": 7.307615157909413e-06, "loss": 0.7524, "step": 7139 }, { "epoch": 0.3669441874807277, "grad_norm": 0.7577106356620789, "learning_rate": 7.306876809870272e-06, "loss": 0.7164, "step": 7140 }, { "epoch": 0.36699558022407236, "grad_norm": 1.0569356679916382, "learning_rate": 7.306138397917095e-06, "loss": 0.7607, "step": 7141 }, { "epoch": 0.367046972967417, "grad_norm": 1.190929651260376, "learning_rate": 7.305399922070341e-06, "loss": 0.7766, "step": 7142 }, { "epoch": 0.36709836571076165, "grad_norm": 1.1272035837173462, "learning_rate": 7.304661382350467e-06, "loss": 0.7446, "step": 7143 }, { "epoch": 0.3671497584541063, "grad_norm": 1.1266297101974487, "learning_rate": 7.303922778777939e-06, "loss": 0.7303, "step": 7144 }, { "epoch": 0.3672011511974509, "grad_norm": 1.0627626180648804, "learning_rate": 7.303184111373218e-06, "loss": 0.7722, "step": 7145 }, { "epoch": 0.36725254394079554, "grad_norm": 1.1550190448760986, "learning_rate": 7.302445380156769e-06, "loss": 0.7688, "step": 7146 }, { "epoch": 0.3673039366841402, "grad_norm": 0.8579680323600769, "learning_rate": 7.301706585149062e-06, "loss": 0.6771, "step": 7147 }, { "epoch": 0.36735532942748483, "grad_norm": 1.0349979400634766, "learning_rate": 7.300967726370563e-06, "loss": 0.7824, "step": 7148 }, { "epoch": 0.3674067221708295, "grad_norm": 1.0926611423492432, "learning_rate": 7.300228803841744e-06, "loss": 0.7426, "step": 7149 }, { "epoch": 0.3674581149141741, "grad_norm": 1.0555914640426636, "learning_rate": 7.299489817583077e-06, "loss": 0.7364, "step": 7150 }, { "epoch": 0.3675095076575188, "grad_norm": 1.1215845346450806, "learning_rate": 7.298750767615037e-06, "loss": 0.7797, "step": 7151 }, { "epoch": 0.3675609004008634, "grad_norm": 1.173999547958374, "learning_rate": 7.298011653958099e-06, "loss": 0.7846, "step": 7152 }, { "epoch": 0.367612293144208, "grad_norm": 1.2063031196594238, "learning_rate": 7.297272476632742e-06, "loss": 0.7314, "step": 7153 }, { "epoch": 0.36766368588755266, "grad_norm": 1.0700774192810059, "learning_rate": 7.296533235659444e-06, "loss": 0.7645, "step": 7154 }, { "epoch": 0.3677150786308973, "grad_norm": 1.0352728366851807, "learning_rate": 7.295793931058688e-06, "loss": 0.75, "step": 7155 }, { "epoch": 0.36776647137424195, "grad_norm": 1.1127796173095703, "learning_rate": 7.295054562850956e-06, "loss": 0.7731, "step": 7156 }, { "epoch": 0.3678178641175866, "grad_norm": 1.106644868850708, "learning_rate": 7.294315131056734e-06, "loss": 0.7386, "step": 7157 }, { "epoch": 0.36786925686093125, "grad_norm": 1.0459643602371216, "learning_rate": 7.293575635696508e-06, "loss": 0.7528, "step": 7158 }, { "epoch": 0.3679206496042759, "grad_norm": 1.0873206853866577, "learning_rate": 7.292836076790764e-06, "loss": 0.7635, "step": 7159 }, { "epoch": 0.36797204234762054, "grad_norm": 5.0787882804870605, "learning_rate": 7.292096454359997e-06, "loss": 0.838, "step": 7160 }, { "epoch": 0.36802343509096513, "grad_norm": 1.0541611909866333, "learning_rate": 7.291356768424695e-06, "loss": 0.7368, "step": 7161 }, { "epoch": 0.3680748278343098, "grad_norm": 1.167487621307373, "learning_rate": 7.290617019005352e-06, "loss": 0.8085, "step": 7162 }, { "epoch": 0.36812622057765443, "grad_norm": 1.095430850982666, "learning_rate": 7.289877206122466e-06, "loss": 0.7467, "step": 7163 }, { "epoch": 0.3681776133209991, "grad_norm": 1.168697476387024, "learning_rate": 7.28913732979653e-06, "loss": 0.782, "step": 7164 }, { "epoch": 0.3682290060643437, "grad_norm": 0.7951579093933105, "learning_rate": 7.2883973900480475e-06, "loss": 0.6669, "step": 7165 }, { "epoch": 0.36828039880768837, "grad_norm": 1.0883334875106812, "learning_rate": 7.287657386897517e-06, "loss": 0.7838, "step": 7166 }, { "epoch": 0.368331791551033, "grad_norm": 1.1320598125457764, "learning_rate": 7.286917320365439e-06, "loss": 0.721, "step": 7167 }, { "epoch": 0.3683831842943776, "grad_norm": 0.7666705846786499, "learning_rate": 7.286177190472321e-06, "loss": 0.6821, "step": 7168 }, { "epoch": 0.36843457703772226, "grad_norm": 1.1280375719070435, "learning_rate": 7.285436997238668e-06, "loss": 0.7841, "step": 7169 }, { "epoch": 0.3684859697810669, "grad_norm": 1.066644549369812, "learning_rate": 7.284696740684986e-06, "loss": 0.725, "step": 7170 }, { "epoch": 0.36853736252441155, "grad_norm": 1.026785969734192, "learning_rate": 7.283956420831786e-06, "loss": 0.7342, "step": 7171 }, { "epoch": 0.3685887552677562, "grad_norm": 1.0876283645629883, "learning_rate": 7.2832160376995785e-06, "loss": 0.7544, "step": 7172 }, { "epoch": 0.36864014801110084, "grad_norm": 1.0831308364868164, "learning_rate": 7.2824755913088776e-06, "loss": 0.7518, "step": 7173 }, { "epoch": 0.3686915407544455, "grad_norm": 1.1821656227111816, "learning_rate": 7.281735081680197e-06, "loss": 0.7279, "step": 7174 }, { "epoch": 0.36874293349779014, "grad_norm": 1.056152105331421, "learning_rate": 7.280994508834054e-06, "loss": 0.7531, "step": 7175 }, { "epoch": 0.36879432624113473, "grad_norm": 0.9553729295730591, "learning_rate": 7.280253872790965e-06, "loss": 0.6825, "step": 7176 }, { "epoch": 0.3688457189844794, "grad_norm": 1.0687272548675537, "learning_rate": 7.279513173571453e-06, "loss": 0.7781, "step": 7177 }, { "epoch": 0.368897111727824, "grad_norm": 1.0544956922531128, "learning_rate": 7.278772411196037e-06, "loss": 0.7328, "step": 7178 }, { "epoch": 0.36894850447116867, "grad_norm": 0.7564080953598022, "learning_rate": 7.278031585685241e-06, "loss": 0.7531, "step": 7179 }, { "epoch": 0.3689998972145133, "grad_norm": 1.0805671215057373, "learning_rate": 7.2772906970595915e-06, "loss": 0.7741, "step": 7180 }, { "epoch": 0.36905128995785796, "grad_norm": 1.0677911043167114, "learning_rate": 7.276549745339614e-06, "loss": 0.7, "step": 7181 }, { "epoch": 0.3691026827012026, "grad_norm": 1.0912599563598633, "learning_rate": 7.275808730545838e-06, "loss": 0.7368, "step": 7182 }, { "epoch": 0.3691540754445472, "grad_norm": 0.7646737098693848, "learning_rate": 7.275067652698793e-06, "loss": 0.6439, "step": 7183 }, { "epoch": 0.36920546818789185, "grad_norm": 1.1100205183029175, "learning_rate": 7.274326511819014e-06, "loss": 0.7758, "step": 7184 }, { "epoch": 0.3692568609312365, "grad_norm": 1.1088718175888062, "learning_rate": 7.273585307927031e-06, "loss": 0.7446, "step": 7185 }, { "epoch": 0.36930825367458114, "grad_norm": 1.0760447978973389, "learning_rate": 7.272844041043383e-06, "loss": 0.7232, "step": 7186 }, { "epoch": 0.3693596464179258, "grad_norm": 0.724325954914093, "learning_rate": 7.272102711188606e-06, "loss": 0.7281, "step": 7187 }, { "epoch": 0.36941103916127044, "grad_norm": 1.071779727935791, "learning_rate": 7.2713613183832375e-06, "loss": 0.793, "step": 7188 }, { "epoch": 0.3694624319046151, "grad_norm": 1.5506794452667236, "learning_rate": 7.270619862647823e-06, "loss": 0.7342, "step": 7189 }, { "epoch": 0.36951382464795973, "grad_norm": 1.1077038049697876, "learning_rate": 7.269878344002901e-06, "loss": 0.7508, "step": 7190 }, { "epoch": 0.3695652173913043, "grad_norm": 0.760924220085144, "learning_rate": 7.269136762469017e-06, "loss": 0.6567, "step": 7191 }, { "epoch": 0.36961661013464897, "grad_norm": 1.122255563735962, "learning_rate": 7.2683951180667166e-06, "loss": 0.7491, "step": 7192 }, { "epoch": 0.3696680028779936, "grad_norm": 1.021269679069519, "learning_rate": 7.267653410816551e-06, "loss": 0.7505, "step": 7193 }, { "epoch": 0.36971939562133826, "grad_norm": 1.0304324626922607, "learning_rate": 7.266911640739065e-06, "loss": 0.7588, "step": 7194 }, { "epoch": 0.3697707883646829, "grad_norm": 0.7316375970840454, "learning_rate": 7.266169807854815e-06, "loss": 0.6824, "step": 7195 }, { "epoch": 0.36982218110802756, "grad_norm": 1.3811365365982056, "learning_rate": 7.265427912184349e-06, "loss": 0.7709, "step": 7196 }, { "epoch": 0.3698735738513722, "grad_norm": 0.8304383158683777, "learning_rate": 7.264685953748226e-06, "loss": 0.6729, "step": 7197 }, { "epoch": 0.36992496659471685, "grad_norm": 1.0946966409683228, "learning_rate": 7.263943932567001e-06, "loss": 0.7446, "step": 7198 }, { "epoch": 0.36997635933806144, "grad_norm": 1.1057438850402832, "learning_rate": 7.263201848661231e-06, "loss": 0.7434, "step": 7199 }, { "epoch": 0.3700277520814061, "grad_norm": 1.0597110986709595, "learning_rate": 7.2624597020514785e-06, "loss": 0.7846, "step": 7200 }, { "epoch": 0.37007914482475074, "grad_norm": 0.7607244253158569, "learning_rate": 7.261717492758306e-06, "loss": 0.6808, "step": 7201 }, { "epoch": 0.3701305375680954, "grad_norm": 1.0400351285934448, "learning_rate": 7.260975220802272e-06, "loss": 0.7057, "step": 7202 }, { "epoch": 0.37018193031144003, "grad_norm": 1.0781724452972412, "learning_rate": 7.2602328862039465e-06, "loss": 0.7741, "step": 7203 }, { "epoch": 0.3702333230547847, "grad_norm": 1.0425996780395508, "learning_rate": 7.259490488983895e-06, "loss": 0.7363, "step": 7204 }, { "epoch": 0.3702847157981293, "grad_norm": 1.0915305614471436, "learning_rate": 7.258748029162688e-06, "loss": 0.7144, "step": 7205 }, { "epoch": 0.3703361085414739, "grad_norm": 1.0756149291992188, "learning_rate": 7.258005506760892e-06, "loss": 0.7342, "step": 7206 }, { "epoch": 0.37038750128481857, "grad_norm": 1.1805963516235352, "learning_rate": 7.257262921799083e-06, "loss": 0.7976, "step": 7207 }, { "epoch": 0.3704388940281632, "grad_norm": 1.1794517040252686, "learning_rate": 7.256520274297833e-06, "loss": 0.7267, "step": 7208 }, { "epoch": 0.37049028677150786, "grad_norm": 1.0186569690704346, "learning_rate": 7.25577756427772e-06, "loss": 0.7168, "step": 7209 }, { "epoch": 0.3705416795148525, "grad_norm": 1.283857822418213, "learning_rate": 7.255034791759318e-06, "loss": 0.7475, "step": 7210 }, { "epoch": 0.37059307225819715, "grad_norm": 0.827991783618927, "learning_rate": 7.254291956763208e-06, "loss": 0.6881, "step": 7211 }, { "epoch": 0.3706444650015418, "grad_norm": 1.1486775875091553, "learning_rate": 7.253549059309971e-06, "loss": 0.7644, "step": 7212 }, { "epoch": 0.37069585774488645, "grad_norm": 1.1158725023269653, "learning_rate": 7.25280609942019e-06, "loss": 0.8371, "step": 7213 }, { "epoch": 0.37074725048823104, "grad_norm": 1.2006750106811523, "learning_rate": 7.252063077114449e-06, "loss": 0.8024, "step": 7214 }, { "epoch": 0.3707986432315757, "grad_norm": 1.051665186882019, "learning_rate": 7.251319992413333e-06, "loss": 0.7583, "step": 7215 }, { "epoch": 0.37085003597492033, "grad_norm": 1.0720508098602295, "learning_rate": 7.25057684533743e-06, "loss": 0.7633, "step": 7216 }, { "epoch": 0.370901428718265, "grad_norm": 1.020507574081421, "learning_rate": 7.249833635907332e-06, "loss": 0.7524, "step": 7217 }, { "epoch": 0.3709528214616096, "grad_norm": 1.0020560026168823, "learning_rate": 7.249090364143627e-06, "loss": 0.745, "step": 7218 }, { "epoch": 0.3710042142049543, "grad_norm": 1.20321524143219, "learning_rate": 7.24834703006691e-06, "loss": 0.8087, "step": 7219 }, { "epoch": 0.3710556069482989, "grad_norm": 1.0610431432724, "learning_rate": 7.247603633697775e-06, "loss": 0.7839, "step": 7220 }, { "epoch": 0.3711069996916435, "grad_norm": 1.0345391035079956, "learning_rate": 7.246860175056819e-06, "loss": 0.7443, "step": 7221 }, { "epoch": 0.37115839243498816, "grad_norm": 1.1314092874526978, "learning_rate": 7.246116654164639e-06, "loss": 0.7738, "step": 7222 }, { "epoch": 0.3712097851783328, "grad_norm": 1.0557901859283447, "learning_rate": 7.245373071041837e-06, "loss": 0.7226, "step": 7223 }, { "epoch": 0.37126117792167745, "grad_norm": 1.0262720584869385, "learning_rate": 7.244629425709012e-06, "loss": 0.7213, "step": 7224 }, { "epoch": 0.3713125706650221, "grad_norm": 1.1466014385223389, "learning_rate": 7.2438857181867685e-06, "loss": 0.7467, "step": 7225 }, { "epoch": 0.37136396340836675, "grad_norm": 0.8458223938941956, "learning_rate": 7.243141948495714e-06, "loss": 0.6657, "step": 7226 }, { "epoch": 0.3714153561517114, "grad_norm": 1.1147453784942627, "learning_rate": 7.24239811665645e-06, "loss": 0.7902, "step": 7227 }, { "epoch": 0.37146674889505604, "grad_norm": 1.1038767099380493, "learning_rate": 7.241654222689589e-06, "loss": 0.7678, "step": 7228 }, { "epoch": 0.37151814163840063, "grad_norm": 1.1124335527420044, "learning_rate": 7.2409102666157415e-06, "loss": 0.7297, "step": 7229 }, { "epoch": 0.3715695343817453, "grad_norm": 1.0584475994110107, "learning_rate": 7.240166248455518e-06, "loss": 0.6697, "step": 7230 }, { "epoch": 0.37162092712508993, "grad_norm": 1.0489606857299805, "learning_rate": 7.239422168229532e-06, "loss": 0.7699, "step": 7231 }, { "epoch": 0.3716723198684346, "grad_norm": 1.1438466310501099, "learning_rate": 7.238678025958399e-06, "loss": 0.7685, "step": 7232 }, { "epoch": 0.3717237126117792, "grad_norm": 1.0332139730453491, "learning_rate": 7.237933821662736e-06, "loss": 0.8229, "step": 7233 }, { "epoch": 0.37177510535512387, "grad_norm": 0.80059814453125, "learning_rate": 7.237189555363162e-06, "loss": 0.6626, "step": 7234 }, { "epoch": 0.3718264980984685, "grad_norm": 1.0390715599060059, "learning_rate": 7.2364452270802984e-06, "loss": 0.7166, "step": 7235 }, { "epoch": 0.37187789084181316, "grad_norm": 1.0490527153015137, "learning_rate": 7.235700836834767e-06, "loss": 0.7519, "step": 7236 }, { "epoch": 0.37192928358515775, "grad_norm": 1.1012601852416992, "learning_rate": 7.234956384647191e-06, "loss": 0.7765, "step": 7237 }, { "epoch": 0.3719806763285024, "grad_norm": 0.6621500849723816, "learning_rate": 7.234211870538198e-06, "loss": 0.6866, "step": 7238 }, { "epoch": 0.37203206907184705, "grad_norm": 1.015376091003418, "learning_rate": 7.2334672945284135e-06, "loss": 0.7642, "step": 7239 }, { "epoch": 0.3720834618151917, "grad_norm": 1.1455764770507812, "learning_rate": 7.232722656638467e-06, "loss": 0.7583, "step": 7240 }, { "epoch": 0.37213485455853634, "grad_norm": 0.9944524168968201, "learning_rate": 7.2319779568889895e-06, "loss": 0.7014, "step": 7241 }, { "epoch": 0.372186247301881, "grad_norm": 1.0475542545318604, "learning_rate": 7.231233195300613e-06, "loss": 0.7113, "step": 7242 }, { "epoch": 0.37223764004522564, "grad_norm": 0.7069808840751648, "learning_rate": 7.230488371893974e-06, "loss": 0.663, "step": 7243 }, { "epoch": 0.37228903278857023, "grad_norm": 1.0262353420257568, "learning_rate": 7.229743486689705e-06, "loss": 0.7079, "step": 7244 }, { "epoch": 0.3723404255319149, "grad_norm": 0.7837396860122681, "learning_rate": 7.228998539708447e-06, "loss": 0.7176, "step": 7245 }, { "epoch": 0.3723918182752595, "grad_norm": 1.1227233409881592, "learning_rate": 7.228253530970838e-06, "loss": 0.821, "step": 7246 }, { "epoch": 0.37244321101860417, "grad_norm": 1.050466537475586, "learning_rate": 7.227508460497519e-06, "loss": 0.7381, "step": 7247 }, { "epoch": 0.3724946037619488, "grad_norm": 1.0220190286636353, "learning_rate": 7.226763328309132e-06, "loss": 0.7602, "step": 7248 }, { "epoch": 0.37254599650529346, "grad_norm": 1.0997636318206787, "learning_rate": 7.2260181344263226e-06, "loss": 0.7702, "step": 7249 }, { "epoch": 0.3725973892486381, "grad_norm": 1.13875150680542, "learning_rate": 7.225272878869738e-06, "loss": 0.7097, "step": 7250 }, { "epoch": 0.37264878199198276, "grad_norm": 0.7041965126991272, "learning_rate": 7.224527561660025e-06, "loss": 0.7071, "step": 7251 }, { "epoch": 0.37270017473532735, "grad_norm": 1.0214099884033203, "learning_rate": 7.2237821828178325e-06, "loss": 0.7281, "step": 7252 }, { "epoch": 0.372751567478672, "grad_norm": 1.0446522235870361, "learning_rate": 7.223036742363814e-06, "loss": 0.7275, "step": 7253 }, { "epoch": 0.37280296022201664, "grad_norm": 1.0758243799209595, "learning_rate": 7.22229124031862e-06, "loss": 0.7751, "step": 7254 }, { "epoch": 0.3728543529653613, "grad_norm": 1.4733484983444214, "learning_rate": 7.2215456767029055e-06, "loss": 0.7556, "step": 7255 }, { "epoch": 0.37290574570870594, "grad_norm": 0.8676440119743347, "learning_rate": 7.220800051537331e-06, "loss": 0.6917, "step": 7256 }, { "epoch": 0.3729571384520506, "grad_norm": 1.1262989044189453, "learning_rate": 7.220054364842549e-06, "loss": 0.7351, "step": 7257 }, { "epoch": 0.37300853119539523, "grad_norm": 1.1365312337875366, "learning_rate": 7.219308616639225e-06, "loss": 0.725, "step": 7258 }, { "epoch": 0.3730599239387398, "grad_norm": 1.041709303855896, "learning_rate": 7.218562806948016e-06, "loss": 0.7391, "step": 7259 }, { "epoch": 0.37311131668208447, "grad_norm": 0.7140436172485352, "learning_rate": 7.217816935789588e-06, "loss": 0.7031, "step": 7260 }, { "epoch": 0.3731627094254291, "grad_norm": 1.033147931098938, "learning_rate": 7.217071003184604e-06, "loss": 0.7527, "step": 7261 }, { "epoch": 0.37321410216877376, "grad_norm": 0.7554978728294373, "learning_rate": 7.2163250091537325e-06, "loss": 0.6995, "step": 7262 }, { "epoch": 0.3732654949121184, "grad_norm": 1.1233800649642944, "learning_rate": 7.215578953717641e-06, "loss": 0.7994, "step": 7263 }, { "epoch": 0.37331688765546306, "grad_norm": 1.1571245193481445, "learning_rate": 7.214832836896999e-06, "loss": 0.7814, "step": 7264 }, { "epoch": 0.3733682803988077, "grad_norm": 1.062779188156128, "learning_rate": 7.21408665871248e-06, "loss": 0.7486, "step": 7265 }, { "epoch": 0.37341967314215235, "grad_norm": 0.774655818939209, "learning_rate": 7.213340419184758e-06, "loss": 0.705, "step": 7266 }, { "epoch": 0.37347106588549694, "grad_norm": 2.3855013847351074, "learning_rate": 7.212594118334506e-06, "loss": 0.7836, "step": 7267 }, { "epoch": 0.3735224586288416, "grad_norm": 0.779570460319519, "learning_rate": 7.211847756182401e-06, "loss": 0.7014, "step": 7268 }, { "epoch": 0.37357385137218624, "grad_norm": 1.1229965686798096, "learning_rate": 7.211101332749123e-06, "loss": 0.7532, "step": 7269 }, { "epoch": 0.3736252441155309, "grad_norm": 1.0663634538650513, "learning_rate": 7.210354848055352e-06, "loss": 0.7517, "step": 7270 }, { "epoch": 0.37367663685887553, "grad_norm": 1.044818639755249, "learning_rate": 7.209608302121769e-06, "loss": 0.737, "step": 7271 }, { "epoch": 0.3737280296022202, "grad_norm": 1.050997018814087, "learning_rate": 7.208861694969059e-06, "loss": 0.7129, "step": 7272 }, { "epoch": 0.3737794223455648, "grad_norm": 1.0292834043502808, "learning_rate": 7.208115026617906e-06, "loss": 0.7591, "step": 7273 }, { "epoch": 0.3738308150889094, "grad_norm": 1.0508451461791992, "learning_rate": 7.207368297088999e-06, "loss": 0.7011, "step": 7274 }, { "epoch": 0.37388220783225407, "grad_norm": 1.173186182975769, "learning_rate": 7.2066215064030255e-06, "loss": 0.7027, "step": 7275 }, { "epoch": 0.3739336005755987, "grad_norm": 1.0943999290466309, "learning_rate": 7.2058746545806755e-06, "loss": 0.7297, "step": 7276 }, { "epoch": 0.37398499331894336, "grad_norm": 1.2071597576141357, "learning_rate": 7.2051277416426435e-06, "loss": 0.8096, "step": 7277 }, { "epoch": 0.374036386062288, "grad_norm": 0.8069635629653931, "learning_rate": 7.204380767609621e-06, "loss": 0.6631, "step": 7278 }, { "epoch": 0.37408777880563265, "grad_norm": 1.0536696910858154, "learning_rate": 7.203633732502304e-06, "loss": 0.7808, "step": 7279 }, { "epoch": 0.3741391715489773, "grad_norm": 1.1158949136734009, "learning_rate": 7.202886636341391e-06, "loss": 0.745, "step": 7280 }, { "epoch": 0.37419056429232195, "grad_norm": 1.2475515604019165, "learning_rate": 7.202139479147578e-06, "loss": 0.7383, "step": 7281 }, { "epoch": 0.37424195703566654, "grad_norm": 1.0595248937606812, "learning_rate": 7.201392260941571e-06, "loss": 0.7964, "step": 7282 }, { "epoch": 0.3742933497790112, "grad_norm": 0.9792985320091248, "learning_rate": 7.200644981744067e-06, "loss": 0.7101, "step": 7283 }, { "epoch": 0.37434474252235583, "grad_norm": 1.112979531288147, "learning_rate": 7.199897641575773e-06, "loss": 0.7711, "step": 7284 }, { "epoch": 0.3743961352657005, "grad_norm": 1.0467731952667236, "learning_rate": 7.199150240457392e-06, "loss": 0.7324, "step": 7285 }, { "epoch": 0.3744475280090451, "grad_norm": 0.7209861278533936, "learning_rate": 7.1984027784096346e-06, "loss": 0.7212, "step": 7286 }, { "epoch": 0.3744989207523898, "grad_norm": 1.0659093856811523, "learning_rate": 7.197655255453207e-06, "loss": 0.7478, "step": 7287 }, { "epoch": 0.3745503134957344, "grad_norm": 1.1402009725570679, "learning_rate": 7.196907671608824e-06, "loss": 0.7991, "step": 7288 }, { "epoch": 0.37460170623907907, "grad_norm": 0.8056334257125854, "learning_rate": 7.1961600268971945e-06, "loss": 0.6804, "step": 7289 }, { "epoch": 0.37465309898242366, "grad_norm": 1.1002293825149536, "learning_rate": 7.1954123213390324e-06, "loss": 0.6984, "step": 7290 }, { "epoch": 0.3747044917257683, "grad_norm": 1.0335334539413452, "learning_rate": 7.1946645549550575e-06, "loss": 0.7574, "step": 7291 }, { "epoch": 0.37475588446911295, "grad_norm": 1.0619194507598877, "learning_rate": 7.193916727765981e-06, "loss": 0.7646, "step": 7292 }, { "epoch": 0.3748072772124576, "grad_norm": 1.1324498653411865, "learning_rate": 7.193168839792529e-06, "loss": 0.7571, "step": 7293 }, { "epoch": 0.37485866995580225, "grad_norm": 1.1125948429107666, "learning_rate": 7.192420891055418e-06, "loss": 0.7616, "step": 7294 }, { "epoch": 0.3749100626991469, "grad_norm": 1.1340450048446655, "learning_rate": 7.191672881575371e-06, "loss": 0.7423, "step": 7295 }, { "epoch": 0.37496145544249154, "grad_norm": 1.1012948751449585, "learning_rate": 7.190924811373113e-06, "loss": 0.799, "step": 7296 }, { "epoch": 0.37501284818583613, "grad_norm": 1.117787480354309, "learning_rate": 7.190176680469369e-06, "loss": 0.8074, "step": 7297 }, { "epoch": 0.3750642409291808, "grad_norm": 1.1589043140411377, "learning_rate": 7.189428488884871e-06, "loss": 0.7294, "step": 7298 }, { "epoch": 0.37511563367252543, "grad_norm": 1.0923062562942505, "learning_rate": 7.188680236640343e-06, "loss": 0.7172, "step": 7299 }, { "epoch": 0.3751670264158701, "grad_norm": 1.0963752269744873, "learning_rate": 7.187931923756517e-06, "loss": 0.7283, "step": 7300 }, { "epoch": 0.3752184191592147, "grad_norm": 1.082520604133606, "learning_rate": 7.187183550254126e-06, "loss": 0.7337, "step": 7301 }, { "epoch": 0.37526981190255937, "grad_norm": 1.0780723094940186, "learning_rate": 7.186435116153907e-06, "loss": 0.7499, "step": 7302 }, { "epoch": 0.375321204645904, "grad_norm": 0.8717724084854126, "learning_rate": 7.185686621476592e-06, "loss": 0.6621, "step": 7303 }, { "epoch": 0.37537259738924866, "grad_norm": 0.7696239948272705, "learning_rate": 7.184938066242922e-06, "loss": 0.6616, "step": 7304 }, { "epoch": 0.37542399013259325, "grad_norm": 1.0677084922790527, "learning_rate": 7.184189450473633e-06, "loss": 0.7607, "step": 7305 }, { "epoch": 0.3754753828759379, "grad_norm": 1.009419322013855, "learning_rate": 7.183440774189468e-06, "loss": 0.7308, "step": 7306 }, { "epoch": 0.37552677561928255, "grad_norm": 0.7687630653381348, "learning_rate": 7.182692037411172e-06, "loss": 0.6118, "step": 7307 }, { "epoch": 0.3755781683626272, "grad_norm": 1.1391465663909912, "learning_rate": 7.181943240159484e-06, "loss": 0.7638, "step": 7308 }, { "epoch": 0.37562956110597184, "grad_norm": 1.0756276845932007, "learning_rate": 7.181194382455156e-06, "loss": 0.7757, "step": 7309 }, { "epoch": 0.3756809538493165, "grad_norm": 0.70989590883255, "learning_rate": 7.180445464318932e-06, "loss": 0.6735, "step": 7310 }, { "epoch": 0.37573234659266114, "grad_norm": 1.1215742826461792, "learning_rate": 7.179696485771561e-06, "loss": 0.8149, "step": 7311 }, { "epoch": 0.37578373933600573, "grad_norm": 1.0532371997833252, "learning_rate": 7.178947446833798e-06, "loss": 0.768, "step": 7312 }, { "epoch": 0.3758351320793504, "grad_norm": 1.0300507545471191, "learning_rate": 7.1781983475263905e-06, "loss": 0.7628, "step": 7313 }, { "epoch": 0.375886524822695, "grad_norm": 0.938471257686615, "learning_rate": 7.177449187870098e-06, "loss": 0.6749, "step": 7314 }, { "epoch": 0.37593791756603967, "grad_norm": 1.066704273223877, "learning_rate": 7.176699967885672e-06, "loss": 0.7305, "step": 7315 }, { "epoch": 0.3759893103093843, "grad_norm": 0.8025457859039307, "learning_rate": 7.175950687593873e-06, "loss": 0.6846, "step": 7316 }, { "epoch": 0.37604070305272896, "grad_norm": 1.107663631439209, "learning_rate": 7.175201347015463e-06, "loss": 0.7519, "step": 7317 }, { "epoch": 0.3760920957960736, "grad_norm": 1.068961501121521, "learning_rate": 7.174451946171198e-06, "loss": 0.744, "step": 7318 }, { "epoch": 0.37614348853941826, "grad_norm": 1.0240728855133057, "learning_rate": 7.173702485081843e-06, "loss": 0.7047, "step": 7319 }, { "epoch": 0.37619488128276285, "grad_norm": 1.0331439971923828, "learning_rate": 7.172952963768163e-06, "loss": 0.7721, "step": 7320 }, { "epoch": 0.3762462740261075, "grad_norm": 1.0934516191482544, "learning_rate": 7.172203382250922e-06, "loss": 0.7207, "step": 7321 }, { "epoch": 0.37629766676945214, "grad_norm": 1.0872308015823364, "learning_rate": 7.171453740550891e-06, "loss": 0.7285, "step": 7322 }, { "epoch": 0.3763490595127968, "grad_norm": 1.00347101688385, "learning_rate": 7.170704038688838e-06, "loss": 0.77, "step": 7323 }, { "epoch": 0.37640045225614144, "grad_norm": 1.1341794729232788, "learning_rate": 7.169954276685533e-06, "loss": 0.727, "step": 7324 }, { "epoch": 0.3764518449994861, "grad_norm": 0.8027995824813843, "learning_rate": 7.169204454561752e-06, "loss": 0.6679, "step": 7325 }, { "epoch": 0.37650323774283073, "grad_norm": 1.0181519985198975, "learning_rate": 7.168454572338265e-06, "loss": 0.7293, "step": 7326 }, { "epoch": 0.3765546304861754, "grad_norm": 1.0490118265151978, "learning_rate": 7.16770463003585e-06, "loss": 0.7489, "step": 7327 }, { "epoch": 0.37660602322951997, "grad_norm": 1.0263705253601074, "learning_rate": 7.166954627675288e-06, "loss": 0.7423, "step": 7328 }, { "epoch": 0.3766574159728646, "grad_norm": 0.9749269485473633, "learning_rate": 7.166204565277353e-06, "loss": 0.7123, "step": 7329 }, { "epoch": 0.37670880871620926, "grad_norm": 0.7837164402008057, "learning_rate": 7.165454442862832e-06, "loss": 0.6576, "step": 7330 }, { "epoch": 0.3767602014595539, "grad_norm": 0.7601831555366516, "learning_rate": 7.164704260452502e-06, "loss": 0.6958, "step": 7331 }, { "epoch": 0.37681159420289856, "grad_norm": 1.1043617725372314, "learning_rate": 7.1639540180671504e-06, "loss": 0.7359, "step": 7332 }, { "epoch": 0.3768629869462432, "grad_norm": 1.0691887140274048, "learning_rate": 7.163203715727564e-06, "loss": 0.8021, "step": 7333 }, { "epoch": 0.37691437968958785, "grad_norm": 1.1172916889190674, "learning_rate": 7.162453353454531e-06, "loss": 0.8136, "step": 7334 }, { "epoch": 0.37696577243293244, "grad_norm": 0.8592469096183777, "learning_rate": 7.1617029312688365e-06, "loss": 0.7172, "step": 7335 }, { "epoch": 0.3770171651762771, "grad_norm": 1.0296452045440674, "learning_rate": 7.1609524491912765e-06, "loss": 0.8158, "step": 7336 }, { "epoch": 0.37706855791962174, "grad_norm": 1.1944377422332764, "learning_rate": 7.160201907242641e-06, "loss": 0.7349, "step": 7337 }, { "epoch": 0.3771199506629664, "grad_norm": 1.046364426612854, "learning_rate": 7.1594513054437255e-06, "loss": 0.736, "step": 7338 }, { "epoch": 0.37717134340631103, "grad_norm": 1.1314833164215088, "learning_rate": 7.1587006438153265e-06, "loss": 0.7764, "step": 7339 }, { "epoch": 0.3772227361496557, "grad_norm": 1.0466359853744507, "learning_rate": 7.15794992237824e-06, "loss": 0.7549, "step": 7340 }, { "epoch": 0.3772741288930003, "grad_norm": 1.0583604574203491, "learning_rate": 7.157199141153268e-06, "loss": 0.7875, "step": 7341 }, { "epoch": 0.377325521636345, "grad_norm": 0.770566463470459, "learning_rate": 7.156448300161208e-06, "loss": 0.6973, "step": 7342 }, { "epoch": 0.37737691437968957, "grad_norm": 1.0318974256515503, "learning_rate": 7.155697399422866e-06, "loss": 0.7308, "step": 7343 }, { "epoch": 0.3774283071230342, "grad_norm": 1.068851113319397, "learning_rate": 7.154946438959046e-06, "loss": 0.7863, "step": 7344 }, { "epoch": 0.37747969986637886, "grad_norm": 1.0377678871154785, "learning_rate": 7.154195418790551e-06, "loss": 0.679, "step": 7345 }, { "epoch": 0.3775310926097235, "grad_norm": 1.0351192951202393, "learning_rate": 7.153444338938191e-06, "loss": 0.7331, "step": 7346 }, { "epoch": 0.37758248535306815, "grad_norm": 0.8633785247802734, "learning_rate": 7.152693199422778e-06, "loss": 0.6943, "step": 7347 }, { "epoch": 0.3776338780964128, "grad_norm": 0.7407357096672058, "learning_rate": 7.151942000265117e-06, "loss": 0.6631, "step": 7348 }, { "epoch": 0.37768527083975745, "grad_norm": 0.8120464086532593, "learning_rate": 7.1511907414860245e-06, "loss": 0.7125, "step": 7349 }, { "epoch": 0.37773666358310204, "grad_norm": 1.0121363401412964, "learning_rate": 7.150439423106314e-06, "loss": 0.7513, "step": 7350 }, { "epoch": 0.3777880563264467, "grad_norm": 0.8445212841033936, "learning_rate": 7.149688045146802e-06, "loss": 0.6854, "step": 7351 }, { "epoch": 0.37783944906979133, "grad_norm": 1.1821459531784058, "learning_rate": 7.148936607628306e-06, "loss": 0.798, "step": 7352 }, { "epoch": 0.377890841813136, "grad_norm": 1.0836069583892822, "learning_rate": 7.148185110571644e-06, "loss": 0.7694, "step": 7353 }, { "epoch": 0.3779422345564806, "grad_norm": 1.0769526958465576, "learning_rate": 7.147433553997638e-06, "loss": 0.7606, "step": 7354 }, { "epoch": 0.3779936272998253, "grad_norm": 1.072932243347168, "learning_rate": 7.1466819379271095e-06, "loss": 0.7795, "step": 7355 }, { "epoch": 0.3780450200431699, "grad_norm": 1.130855679512024, "learning_rate": 7.145930262380883e-06, "loss": 0.8146, "step": 7356 }, { "epoch": 0.37809641278651457, "grad_norm": 1.0468519926071167, "learning_rate": 7.1451785273797845e-06, "loss": 0.7836, "step": 7357 }, { "epoch": 0.37814780552985916, "grad_norm": 1.1851191520690918, "learning_rate": 7.144426732944642e-06, "loss": 0.7745, "step": 7358 }, { "epoch": 0.3781991982732038, "grad_norm": 1.0813720226287842, "learning_rate": 7.143674879096285e-06, "loss": 0.7202, "step": 7359 }, { "epoch": 0.37825059101654845, "grad_norm": 1.0562858581542969, "learning_rate": 7.142922965855543e-06, "loss": 0.7442, "step": 7360 }, { "epoch": 0.3783019837598931, "grad_norm": 1.1034413576126099, "learning_rate": 7.142170993243249e-06, "loss": 0.7873, "step": 7361 }, { "epoch": 0.37835337650323775, "grad_norm": 1.109531283378601, "learning_rate": 7.141418961280238e-06, "loss": 0.7666, "step": 7362 }, { "epoch": 0.3784047692465824, "grad_norm": 1.1119484901428223, "learning_rate": 7.140666869987344e-06, "loss": 0.7103, "step": 7363 }, { "epoch": 0.37845616198992704, "grad_norm": 0.8298102021217346, "learning_rate": 7.139914719385405e-06, "loss": 0.7316, "step": 7364 }, { "epoch": 0.3785075547332717, "grad_norm": 1.1190065145492554, "learning_rate": 7.13916250949526e-06, "loss": 0.7654, "step": 7365 }, { "epoch": 0.3785589474766163, "grad_norm": 1.0564727783203125, "learning_rate": 7.1384102403377485e-06, "loss": 0.7184, "step": 7366 }, { "epoch": 0.37861034021996093, "grad_norm": 1.0378198623657227, "learning_rate": 7.137657911933714e-06, "loss": 0.7255, "step": 7367 }, { "epoch": 0.3786617329633056, "grad_norm": 0.8552929759025574, "learning_rate": 7.136905524304002e-06, "loss": 0.674, "step": 7368 }, { "epoch": 0.3787131257066502, "grad_norm": 0.7065010666847229, "learning_rate": 7.136153077469456e-06, "loss": 0.6896, "step": 7369 }, { "epoch": 0.37876451844999487, "grad_norm": 1.2000168561935425, "learning_rate": 7.135400571450923e-06, "loss": 0.7731, "step": 7370 }, { "epoch": 0.3788159111933395, "grad_norm": 1.1487095355987549, "learning_rate": 7.134648006269253e-06, "loss": 0.7471, "step": 7371 }, { "epoch": 0.37886730393668416, "grad_norm": 1.0376038551330566, "learning_rate": 7.133895381945295e-06, "loss": 0.7488, "step": 7372 }, { "epoch": 0.37891869668002875, "grad_norm": 0.8188409805297852, "learning_rate": 7.1331426984999045e-06, "loss": 0.6456, "step": 7373 }, { "epoch": 0.3789700894233734, "grad_norm": 1.1227713823318481, "learning_rate": 7.132389955953929e-06, "loss": 0.7836, "step": 7374 }, { "epoch": 0.37902148216671805, "grad_norm": 0.7337707877159119, "learning_rate": 7.131637154328231e-06, "loss": 0.66, "step": 7375 }, { "epoch": 0.3790728749100627, "grad_norm": 3.011390209197998, "learning_rate": 7.130884293643663e-06, "loss": 0.6954, "step": 7376 }, { "epoch": 0.37912426765340734, "grad_norm": 1.0165685415267944, "learning_rate": 7.130131373921084e-06, "loss": 0.6994, "step": 7377 }, { "epoch": 0.379175660396752, "grad_norm": 1.0319876670837402, "learning_rate": 7.129378395181355e-06, "loss": 0.7551, "step": 7378 }, { "epoch": 0.37922705314009664, "grad_norm": 1.0393660068511963, "learning_rate": 7.1286253574453405e-06, "loss": 0.7657, "step": 7379 }, { "epoch": 0.3792784458834413, "grad_norm": 1.1224186420440674, "learning_rate": 7.127872260733899e-06, "loss": 0.7529, "step": 7380 }, { "epoch": 0.3793298386267859, "grad_norm": 0.6923304200172424, "learning_rate": 7.1271191050679e-06, "loss": 0.6582, "step": 7381 }, { "epoch": 0.3793812313701305, "grad_norm": 1.0675216913223267, "learning_rate": 7.126365890468207e-06, "loss": 0.7523, "step": 7382 }, { "epoch": 0.37943262411347517, "grad_norm": 1.0050772428512573, "learning_rate": 7.125612616955693e-06, "loss": 0.688, "step": 7383 }, { "epoch": 0.3794840168568198, "grad_norm": 1.1310421228408813, "learning_rate": 7.124859284551223e-06, "loss": 0.7754, "step": 7384 }, { "epoch": 0.37953540960016446, "grad_norm": 1.026774525642395, "learning_rate": 7.124105893275671e-06, "loss": 0.7354, "step": 7385 }, { "epoch": 0.3795868023435091, "grad_norm": 1.0665091276168823, "learning_rate": 7.123352443149913e-06, "loss": 0.7643, "step": 7386 }, { "epoch": 0.37963819508685376, "grad_norm": 0.7611773610115051, "learning_rate": 7.1225989341948175e-06, "loss": 0.706, "step": 7387 }, { "epoch": 0.37968958783019835, "grad_norm": 1.1037728786468506, "learning_rate": 7.121845366431267e-06, "loss": 0.8075, "step": 7388 }, { "epoch": 0.379740980573543, "grad_norm": 1.048171877861023, "learning_rate": 7.121091739880137e-06, "loss": 0.7157, "step": 7389 }, { "epoch": 0.37979237331688764, "grad_norm": 0.7966815233230591, "learning_rate": 7.120338054562309e-06, "loss": 0.6845, "step": 7390 }, { "epoch": 0.3798437660602323, "grad_norm": 1.0229524374008179, "learning_rate": 7.119584310498664e-06, "loss": 0.75, "step": 7391 }, { "epoch": 0.37989515880357694, "grad_norm": 1.0557529926300049, "learning_rate": 7.118830507710084e-06, "loss": 0.7695, "step": 7392 }, { "epoch": 0.3799465515469216, "grad_norm": 1.0853312015533447, "learning_rate": 7.118076646217454e-06, "loss": 0.7969, "step": 7393 }, { "epoch": 0.37999794429026623, "grad_norm": 0.7926809787750244, "learning_rate": 7.117322726041662e-06, "loss": 0.6688, "step": 7394 }, { "epoch": 0.3800493370336109, "grad_norm": 1.1121256351470947, "learning_rate": 7.116568747203596e-06, "loss": 0.7077, "step": 7395 }, { "epoch": 0.38010072977695547, "grad_norm": 1.1015864610671997, "learning_rate": 7.1158147097241425e-06, "loss": 0.7351, "step": 7396 }, { "epoch": 0.3801521225203001, "grad_norm": 1.099389910697937, "learning_rate": 7.115060613624194e-06, "loss": 0.7038, "step": 7397 }, { "epoch": 0.38020351526364476, "grad_norm": 0.7381818890571594, "learning_rate": 7.114306458924647e-06, "loss": 0.6953, "step": 7398 }, { "epoch": 0.3802549080069894, "grad_norm": 1.0690655708312988, "learning_rate": 7.1135522456463914e-06, "loss": 0.8036, "step": 7399 }, { "epoch": 0.38030630075033406, "grad_norm": 1.0561307668685913, "learning_rate": 7.112797973810326e-06, "loss": 0.75, "step": 7400 }, { "epoch": 0.3803576934936787, "grad_norm": 1.1480324268341064, "learning_rate": 7.112043643437347e-06, "loss": 0.7692, "step": 7401 }, { "epoch": 0.38040908623702335, "grad_norm": 0.9994832277297974, "learning_rate": 7.111289254548355e-06, "loss": 0.712, "step": 7402 }, { "epoch": 0.38046047898036794, "grad_norm": 1.1053189039230347, "learning_rate": 7.110534807164251e-06, "loss": 0.7286, "step": 7403 }, { "epoch": 0.3805118717237126, "grad_norm": 0.7683562636375427, "learning_rate": 7.109780301305935e-06, "loss": 0.6631, "step": 7404 }, { "epoch": 0.38056326446705724, "grad_norm": 1.1451411247253418, "learning_rate": 7.109025736994316e-06, "loss": 0.7953, "step": 7405 }, { "epoch": 0.3806146572104019, "grad_norm": 1.0412806272506714, "learning_rate": 7.108271114250296e-06, "loss": 0.7247, "step": 7406 }, { "epoch": 0.38066604995374653, "grad_norm": 0.706887423992157, "learning_rate": 7.107516433094785e-06, "loss": 0.6699, "step": 7407 }, { "epoch": 0.3807174426970912, "grad_norm": 1.0059337615966797, "learning_rate": 7.1067616935486895e-06, "loss": 0.7278, "step": 7408 }, { "epoch": 0.3807688354404358, "grad_norm": 1.1390876770019531, "learning_rate": 7.106006895632921e-06, "loss": 0.7509, "step": 7409 }, { "epoch": 0.3808202281837805, "grad_norm": 1.060619592666626, "learning_rate": 7.105252039368393e-06, "loss": 0.7573, "step": 7410 }, { "epoch": 0.38087162092712507, "grad_norm": 1.036307454109192, "learning_rate": 7.10449712477602e-06, "loss": 0.7141, "step": 7411 }, { "epoch": 0.3809230136704697, "grad_norm": 1.157056450843811, "learning_rate": 7.103742151876716e-06, "loss": 0.7156, "step": 7412 }, { "epoch": 0.38097440641381436, "grad_norm": 1.0527716875076294, "learning_rate": 7.102987120691398e-06, "loss": 0.7212, "step": 7413 }, { "epoch": 0.381025799157159, "grad_norm": 1.0051586627960205, "learning_rate": 7.102232031240985e-06, "loss": 0.7077, "step": 7414 }, { "epoch": 0.38107719190050365, "grad_norm": 0.8157718777656555, "learning_rate": 7.101476883546399e-06, "loss": 0.6481, "step": 7415 }, { "epoch": 0.3811285846438483, "grad_norm": 1.1330392360687256, "learning_rate": 7.100721677628561e-06, "loss": 0.7749, "step": 7416 }, { "epoch": 0.38117997738719295, "grad_norm": 1.1204426288604736, "learning_rate": 7.099966413508393e-06, "loss": 0.8013, "step": 7417 }, { "epoch": 0.3812313701305376, "grad_norm": 1.0978925228118896, "learning_rate": 7.099211091206822e-06, "loss": 0.6828, "step": 7418 }, { "epoch": 0.3812827628738822, "grad_norm": 1.1226383447647095, "learning_rate": 7.098455710744776e-06, "loss": 0.7879, "step": 7419 }, { "epoch": 0.38133415561722683, "grad_norm": 0.7480544447898865, "learning_rate": 7.09770027214318e-06, "loss": 0.6927, "step": 7420 }, { "epoch": 0.3813855483605715, "grad_norm": 1.2020505666732788, "learning_rate": 7.0969447754229685e-06, "loss": 0.7694, "step": 7421 }, { "epoch": 0.3814369411039161, "grad_norm": 1.1181451082229614, "learning_rate": 7.096189220605069e-06, "loss": 0.7111, "step": 7422 }, { "epoch": 0.3814883338472608, "grad_norm": 0.9593185782432556, "learning_rate": 7.095433607710418e-06, "loss": 0.662, "step": 7423 }, { "epoch": 0.3815397265906054, "grad_norm": 1.0330407619476318, "learning_rate": 7.094677936759948e-06, "loss": 0.6899, "step": 7424 }, { "epoch": 0.38159111933395007, "grad_norm": 1.0459941625595093, "learning_rate": 7.093922207774597e-06, "loss": 0.7674, "step": 7425 }, { "epoch": 0.38164251207729466, "grad_norm": 0.9885093569755554, "learning_rate": 7.0931664207753035e-06, "loss": 0.6997, "step": 7426 }, { "epoch": 0.3816939048206393, "grad_norm": 1.044066309928894, "learning_rate": 7.0924105757830066e-06, "loss": 0.639, "step": 7427 }, { "epoch": 0.38174529756398395, "grad_norm": 0.8201054334640503, "learning_rate": 7.091654672818646e-06, "loss": 0.6981, "step": 7428 }, { "epoch": 0.3817966903073286, "grad_norm": 0.992118239402771, "learning_rate": 7.090898711903166e-06, "loss": 0.6869, "step": 7429 }, { "epoch": 0.38184808305067325, "grad_norm": 1.0499683618545532, "learning_rate": 7.090142693057511e-06, "loss": 0.7147, "step": 7430 }, { "epoch": 0.3818994757940179, "grad_norm": 1.0943446159362793, "learning_rate": 7.08938661630263e-06, "loss": 0.7706, "step": 7431 }, { "epoch": 0.38195086853736254, "grad_norm": 1.1973525285720825, "learning_rate": 7.088630481659466e-06, "loss": 0.8135, "step": 7432 }, { "epoch": 0.3820022612807072, "grad_norm": 1.0697773694992065, "learning_rate": 7.087874289148972e-06, "loss": 0.72, "step": 7433 }, { "epoch": 0.3820536540240518, "grad_norm": 1.1684834957122803, "learning_rate": 7.087118038792097e-06, "loss": 0.821, "step": 7434 }, { "epoch": 0.38210504676739643, "grad_norm": 1.0874080657958984, "learning_rate": 7.086361730609795e-06, "loss": 0.7813, "step": 7435 }, { "epoch": 0.3821564395107411, "grad_norm": 1.0783307552337646, "learning_rate": 7.085605364623018e-06, "loss": 0.7079, "step": 7436 }, { "epoch": 0.3822078322540857, "grad_norm": 1.127070665359497, "learning_rate": 7.0848489408527246e-06, "loss": 0.7824, "step": 7437 }, { "epoch": 0.38225922499743037, "grad_norm": 0.9674443602561951, "learning_rate": 7.084092459319869e-06, "loss": 0.6562, "step": 7438 }, { "epoch": 0.382310617740775, "grad_norm": 1.038142204284668, "learning_rate": 7.083335920045411e-06, "loss": 0.7548, "step": 7439 }, { "epoch": 0.38236201048411966, "grad_norm": 0.7793687582015991, "learning_rate": 7.082579323050315e-06, "loss": 0.6965, "step": 7440 }, { "epoch": 0.38241340322746425, "grad_norm": 0.7511942982673645, "learning_rate": 7.081822668355538e-06, "loss": 0.6844, "step": 7441 }, { "epoch": 0.3824647959708089, "grad_norm": 1.076296329498291, "learning_rate": 7.0810659559820484e-06, "loss": 0.801, "step": 7442 }, { "epoch": 0.38251618871415355, "grad_norm": 1.1076669692993164, "learning_rate": 7.080309185950808e-06, "loss": 0.808, "step": 7443 }, { "epoch": 0.3825675814574982, "grad_norm": 1.051639437675476, "learning_rate": 7.079552358282784e-06, "loss": 0.7372, "step": 7444 }, { "epoch": 0.38261897420084284, "grad_norm": 1.0340163707733154, "learning_rate": 7.078795472998947e-06, "loss": 0.7475, "step": 7445 }, { "epoch": 0.3826703669441875, "grad_norm": 1.1695044040679932, "learning_rate": 7.078038530120266e-06, "loss": 0.7881, "step": 7446 }, { "epoch": 0.38272175968753214, "grad_norm": 1.4706473350524902, "learning_rate": 7.077281529667713e-06, "loss": 0.7352, "step": 7447 }, { "epoch": 0.3827731524308768, "grad_norm": 0.9759430885314941, "learning_rate": 7.076524471662261e-06, "loss": 0.7177, "step": 7448 }, { "epoch": 0.3828245451742214, "grad_norm": 1.0149638652801514, "learning_rate": 7.0757673561248855e-06, "loss": 0.7474, "step": 7449 }, { "epoch": 0.382875937917566, "grad_norm": 1.0085992813110352, "learning_rate": 7.075010183076563e-06, "loss": 0.6806, "step": 7450 }, { "epoch": 0.38292733066091067, "grad_norm": 1.0078824758529663, "learning_rate": 7.0742529525382706e-06, "loss": 0.7324, "step": 7451 }, { "epoch": 0.3829787234042553, "grad_norm": 1.0417908430099487, "learning_rate": 7.073495664530989e-06, "loss": 0.7989, "step": 7452 }, { "epoch": 0.38303011614759996, "grad_norm": 1.040493130683899, "learning_rate": 7.0727383190757e-06, "loss": 0.7376, "step": 7453 }, { "epoch": 0.3830815088909446, "grad_norm": 1.0949937105178833, "learning_rate": 7.071980916193384e-06, "loss": 0.8, "step": 7454 }, { "epoch": 0.38313290163428926, "grad_norm": 1.0469510555267334, "learning_rate": 7.0712234559050295e-06, "loss": 0.7888, "step": 7455 }, { "epoch": 0.3831842943776339, "grad_norm": 1.0587806701660156, "learning_rate": 7.070465938231621e-06, "loss": 0.7709, "step": 7456 }, { "epoch": 0.3832356871209785, "grad_norm": 0.854256808757782, "learning_rate": 7.0697083631941436e-06, "loss": 0.7104, "step": 7457 }, { "epoch": 0.38328707986432314, "grad_norm": 1.1093567609786987, "learning_rate": 7.06895073081359e-06, "loss": 0.7997, "step": 7458 }, { "epoch": 0.3833384726076678, "grad_norm": 1.0292497873306274, "learning_rate": 7.068193041110948e-06, "loss": 0.7054, "step": 7459 }, { "epoch": 0.38338986535101244, "grad_norm": 1.2124760150909424, "learning_rate": 7.067435294107212e-06, "loss": 0.7894, "step": 7460 }, { "epoch": 0.3834412580943571, "grad_norm": 1.109876036643982, "learning_rate": 7.066677489823378e-06, "loss": 0.7056, "step": 7461 }, { "epoch": 0.38349265083770173, "grad_norm": 1.056516408920288, "learning_rate": 7.065919628280437e-06, "loss": 0.6755, "step": 7462 }, { "epoch": 0.3835440435810464, "grad_norm": 1.118664026260376, "learning_rate": 7.065161709499391e-06, "loss": 0.7443, "step": 7463 }, { "epoch": 0.38359543632439097, "grad_norm": 1.0236186981201172, "learning_rate": 7.064403733501235e-06, "loss": 0.7476, "step": 7464 }, { "epoch": 0.3836468290677356, "grad_norm": 1.1197069883346558, "learning_rate": 7.063645700306971e-06, "loss": 0.7836, "step": 7465 }, { "epoch": 0.38369822181108026, "grad_norm": 0.8992980122566223, "learning_rate": 7.062887609937601e-06, "loss": 0.6816, "step": 7466 }, { "epoch": 0.3837496145544249, "grad_norm": 1.0554096698760986, "learning_rate": 7.062129462414128e-06, "loss": 0.7044, "step": 7467 }, { "epoch": 0.38380100729776956, "grad_norm": 0.7279976606369019, "learning_rate": 7.0613712577575576e-06, "loss": 0.6884, "step": 7468 }, { "epoch": 0.3838524000411142, "grad_norm": 0.7279179692268372, "learning_rate": 7.060612995988896e-06, "loss": 0.6688, "step": 7469 }, { "epoch": 0.38390379278445885, "grad_norm": 1.1429977416992188, "learning_rate": 7.059854677129153e-06, "loss": 0.7366, "step": 7470 }, { "epoch": 0.3839551855278035, "grad_norm": 1.150233507156372, "learning_rate": 7.059096301199336e-06, "loss": 0.8176, "step": 7471 }, { "epoch": 0.3840065782711481, "grad_norm": 0.7654420137405396, "learning_rate": 7.058337868220459e-06, "loss": 0.685, "step": 7472 }, { "epoch": 0.38405797101449274, "grad_norm": 1.0729304552078247, "learning_rate": 7.057579378213534e-06, "loss": 0.7771, "step": 7473 }, { "epoch": 0.3841093637578374, "grad_norm": 0.7671759128570557, "learning_rate": 7.056820831199576e-06, "loss": 0.6778, "step": 7474 }, { "epoch": 0.38416075650118203, "grad_norm": 1.0311747789382935, "learning_rate": 7.0560622271995995e-06, "loss": 0.7304, "step": 7475 }, { "epoch": 0.3842121492445267, "grad_norm": 1.1003468036651611, "learning_rate": 7.055303566234624e-06, "loss": 0.7643, "step": 7476 }, { "epoch": 0.3842635419878713, "grad_norm": 0.9820614457130432, "learning_rate": 7.05454484832567e-06, "loss": 0.6997, "step": 7477 }, { "epoch": 0.384314934731216, "grad_norm": 0.9953899383544922, "learning_rate": 7.053786073493753e-06, "loss": 0.6681, "step": 7478 }, { "epoch": 0.38436632747456057, "grad_norm": 1.0100626945495605, "learning_rate": 7.053027241759903e-06, "loss": 0.7153, "step": 7479 }, { "epoch": 0.3844177202179052, "grad_norm": 1.0939350128173828, "learning_rate": 7.05226835314514e-06, "loss": 0.7764, "step": 7480 }, { "epoch": 0.38446911296124986, "grad_norm": 0.8679288625717163, "learning_rate": 7.0515094076704885e-06, "loss": 0.6613, "step": 7481 }, { "epoch": 0.3845205057045945, "grad_norm": 1.0305333137512207, "learning_rate": 7.0507504053569785e-06, "loss": 0.7695, "step": 7482 }, { "epoch": 0.38457189844793915, "grad_norm": 1.0743308067321777, "learning_rate": 7.049991346225636e-06, "loss": 0.7629, "step": 7483 }, { "epoch": 0.3846232911912838, "grad_norm": 1.085890293121338, "learning_rate": 7.049232230297496e-06, "loss": 0.7914, "step": 7484 }, { "epoch": 0.38467468393462845, "grad_norm": 1.103814721107483, "learning_rate": 7.048473057593586e-06, "loss": 0.771, "step": 7485 }, { "epoch": 0.3847260766779731, "grad_norm": 1.1282713413238525, "learning_rate": 7.047713828134941e-06, "loss": 0.8059, "step": 7486 }, { "epoch": 0.3847774694213177, "grad_norm": 1.077272891998291, "learning_rate": 7.046954541942597e-06, "loss": 0.7709, "step": 7487 }, { "epoch": 0.38482886216466233, "grad_norm": 1.0906522274017334, "learning_rate": 7.04619519903759e-06, "loss": 0.7568, "step": 7488 }, { "epoch": 0.384880254908007, "grad_norm": 1.1227941513061523, "learning_rate": 7.045435799440957e-06, "loss": 0.8007, "step": 7489 }, { "epoch": 0.3849316476513516, "grad_norm": 1.031508207321167, "learning_rate": 7.044676343173738e-06, "loss": 0.7166, "step": 7490 }, { "epoch": 0.3849830403946963, "grad_norm": 1.109743356704712, "learning_rate": 7.043916830256977e-06, "loss": 0.8055, "step": 7491 }, { "epoch": 0.3850344331380409, "grad_norm": 1.0598150491714478, "learning_rate": 7.043157260711716e-06, "loss": 0.7053, "step": 7492 }, { "epoch": 0.38508582588138557, "grad_norm": 0.9949901103973389, "learning_rate": 7.042397634559e-06, "loss": 0.7339, "step": 7493 }, { "epoch": 0.3851372186247302, "grad_norm": 0.7405714988708496, "learning_rate": 7.041637951819871e-06, "loss": 0.6678, "step": 7494 }, { "epoch": 0.3851886113680748, "grad_norm": 1.0699007511138916, "learning_rate": 7.040878212515382e-06, "loss": 0.7673, "step": 7495 }, { "epoch": 0.38524000411141945, "grad_norm": 0.8301787972450256, "learning_rate": 7.040118416666579e-06, "loss": 0.6526, "step": 7496 }, { "epoch": 0.3852913968547641, "grad_norm": 0.9950885772705078, "learning_rate": 7.039358564294514e-06, "loss": 0.7471, "step": 7497 }, { "epoch": 0.38534278959810875, "grad_norm": 0.9953995943069458, "learning_rate": 7.038598655420239e-06, "loss": 0.7515, "step": 7498 }, { "epoch": 0.3853941823414534, "grad_norm": 1.0471075773239136, "learning_rate": 7.037838690064807e-06, "loss": 0.7626, "step": 7499 }, { "epoch": 0.38544557508479804, "grad_norm": 1.0842597484588623, "learning_rate": 7.037078668249275e-06, "loss": 0.7422, "step": 7500 }, { "epoch": 0.3854969678281427, "grad_norm": 1.0505601167678833, "learning_rate": 7.036318589994701e-06, "loss": 0.7454, "step": 7501 }, { "epoch": 0.3855483605714873, "grad_norm": 1.1080377101898193, "learning_rate": 7.03555845532214e-06, "loss": 0.8134, "step": 7502 }, { "epoch": 0.38559975331483193, "grad_norm": 1.049881100654602, "learning_rate": 7.034798264252657e-06, "loss": 0.7332, "step": 7503 }, { "epoch": 0.3856511460581766, "grad_norm": 1.0777125358581543, "learning_rate": 7.034038016807309e-06, "loss": 0.6827, "step": 7504 }, { "epoch": 0.3857025388015212, "grad_norm": 1.0869483947753906, "learning_rate": 7.0332777130071625e-06, "loss": 0.8075, "step": 7505 }, { "epoch": 0.38575393154486587, "grad_norm": 1.065034031867981, "learning_rate": 7.032517352873283e-06, "loss": 0.7443, "step": 7506 }, { "epoch": 0.3858053242882105, "grad_norm": 1.0039747953414917, "learning_rate": 7.031756936426733e-06, "loss": 0.7571, "step": 7507 }, { "epoch": 0.38585671703155516, "grad_norm": 1.1473783254623413, "learning_rate": 7.030996463688584e-06, "loss": 0.7946, "step": 7508 }, { "epoch": 0.3859081097748998, "grad_norm": 0.8165815472602844, "learning_rate": 7.030235934679905e-06, "loss": 0.6869, "step": 7509 }, { "epoch": 0.3859595025182444, "grad_norm": 0.8059452176094055, "learning_rate": 7.029475349421765e-06, "loss": 0.6842, "step": 7510 }, { "epoch": 0.38601089526158905, "grad_norm": 1.0866498947143555, "learning_rate": 7.028714707935238e-06, "loss": 0.7538, "step": 7511 }, { "epoch": 0.3860622880049337, "grad_norm": 1.1299792528152466, "learning_rate": 7.0279540102414e-06, "loss": 0.7527, "step": 7512 }, { "epoch": 0.38611368074827834, "grad_norm": 1.0811710357666016, "learning_rate": 7.027193256361324e-06, "loss": 0.6644, "step": 7513 }, { "epoch": 0.386165073491623, "grad_norm": 1.2123768329620361, "learning_rate": 7.026432446316089e-06, "loss": 0.7903, "step": 7514 }, { "epoch": 0.38621646623496764, "grad_norm": 1.1964912414550781, "learning_rate": 7.025671580126774e-06, "loss": 0.7088, "step": 7515 }, { "epoch": 0.3862678589783123, "grad_norm": 0.8216059803962708, "learning_rate": 7.024910657814459e-06, "loss": 0.686, "step": 7516 }, { "epoch": 0.3863192517216569, "grad_norm": 1.0295400619506836, "learning_rate": 7.024149679400226e-06, "loss": 0.7249, "step": 7517 }, { "epoch": 0.3863706444650015, "grad_norm": 1.0168280601501465, "learning_rate": 7.023388644905158e-06, "loss": 0.764, "step": 7518 }, { "epoch": 0.38642203720834617, "grad_norm": 1.2338663339614868, "learning_rate": 7.0226275543503406e-06, "loss": 0.7941, "step": 7519 }, { "epoch": 0.3864734299516908, "grad_norm": 1.1368939876556396, "learning_rate": 7.021866407756861e-06, "loss": 0.7777, "step": 7520 }, { "epoch": 0.38652482269503546, "grad_norm": 1.0670526027679443, "learning_rate": 7.021105205145806e-06, "loss": 0.7379, "step": 7521 }, { "epoch": 0.3865762154383801, "grad_norm": 0.813489556312561, "learning_rate": 7.020343946538269e-06, "loss": 0.7026, "step": 7522 }, { "epoch": 0.38662760818172476, "grad_norm": 1.0601547956466675, "learning_rate": 7.019582631955335e-06, "loss": 0.6988, "step": 7523 }, { "epoch": 0.3866790009250694, "grad_norm": 1.1179819107055664, "learning_rate": 7.018821261418104e-06, "loss": 0.6947, "step": 7524 }, { "epoch": 0.386730393668414, "grad_norm": 1.155249834060669, "learning_rate": 7.018059834947666e-06, "loss": 0.7566, "step": 7525 }, { "epoch": 0.38678178641175864, "grad_norm": 1.030011534690857, "learning_rate": 7.017298352565116e-06, "loss": 0.7497, "step": 7526 }, { "epoch": 0.3868331791551033, "grad_norm": 1.040596604347229, "learning_rate": 7.016536814291557e-06, "loss": 0.7651, "step": 7527 }, { "epoch": 0.38688457189844794, "grad_norm": 1.2649948596954346, "learning_rate": 7.015775220148082e-06, "loss": 0.792, "step": 7528 }, { "epoch": 0.3869359646417926, "grad_norm": 0.7738057374954224, "learning_rate": 7.015013570155795e-06, "loss": 0.6876, "step": 7529 }, { "epoch": 0.38698735738513723, "grad_norm": 1.05807626247406, "learning_rate": 7.014251864335798e-06, "loss": 0.7962, "step": 7530 }, { "epoch": 0.3870387501284819, "grad_norm": 1.0294122695922852, "learning_rate": 7.0134901027091925e-06, "loss": 0.7393, "step": 7531 }, { "epoch": 0.3870901428718265, "grad_norm": 1.131093144416809, "learning_rate": 7.012728285297086e-06, "loss": 0.8163, "step": 7532 }, { "epoch": 0.3871415356151711, "grad_norm": 1.7459410429000854, "learning_rate": 7.011966412120586e-06, "loss": 0.7297, "step": 7533 }, { "epoch": 0.38719292835851576, "grad_norm": 1.1630157232284546, "learning_rate": 7.011204483200799e-06, "loss": 0.7281, "step": 7534 }, { "epoch": 0.3872443211018604, "grad_norm": 1.1783239841461182, "learning_rate": 7.010442498558836e-06, "loss": 0.8154, "step": 7535 }, { "epoch": 0.38729571384520506, "grad_norm": 0.7386965751647949, "learning_rate": 7.009680458215807e-06, "loss": 0.6724, "step": 7536 }, { "epoch": 0.3873471065885497, "grad_norm": 1.050032138824463, "learning_rate": 7.008918362192826e-06, "loss": 0.7518, "step": 7537 }, { "epoch": 0.38739849933189435, "grad_norm": 1.0325521230697632, "learning_rate": 7.008156210511008e-06, "loss": 0.7705, "step": 7538 }, { "epoch": 0.387449892075239, "grad_norm": 1.1409049034118652, "learning_rate": 7.007394003191468e-06, "loss": 0.7998, "step": 7539 }, { "epoch": 0.3875012848185836, "grad_norm": 1.1737031936645508, "learning_rate": 7.006631740255325e-06, "loss": 0.7652, "step": 7540 }, { "epoch": 0.38755267756192824, "grad_norm": 0.8028273582458496, "learning_rate": 7.0058694217236965e-06, "loss": 0.6816, "step": 7541 }, { "epoch": 0.3876040703052729, "grad_norm": 1.0774755477905273, "learning_rate": 7.0051070476177055e-06, "loss": 0.7136, "step": 7542 }, { "epoch": 0.38765546304861753, "grad_norm": 1.0718046426773071, "learning_rate": 7.0043446179584715e-06, "loss": 0.7396, "step": 7543 }, { "epoch": 0.3877068557919622, "grad_norm": 1.0895307064056396, "learning_rate": 7.003582132767121e-06, "loss": 0.717, "step": 7544 }, { "epoch": 0.3877582485353068, "grad_norm": 1.0787065029144287, "learning_rate": 7.002819592064776e-06, "loss": 0.7757, "step": 7545 }, { "epoch": 0.3878096412786515, "grad_norm": 1.1404898166656494, "learning_rate": 7.002056995872566e-06, "loss": 0.7504, "step": 7546 }, { "epoch": 0.3878610340219961, "grad_norm": 1.0683035850524902, "learning_rate": 7.0012943442116185e-06, "loss": 0.7052, "step": 7547 }, { "epoch": 0.3879124267653407, "grad_norm": 1.1014246940612793, "learning_rate": 7.000531637103064e-06, "loss": 0.7426, "step": 7548 }, { "epoch": 0.38796381950868536, "grad_norm": 1.0781656503677368, "learning_rate": 6.999768874568034e-06, "loss": 0.7176, "step": 7549 }, { "epoch": 0.38801521225203, "grad_norm": 1.1062637567520142, "learning_rate": 6.999006056627659e-06, "loss": 0.7572, "step": 7550 }, { "epoch": 0.38806660499537465, "grad_norm": 1.097590684890747, "learning_rate": 6.998243183303078e-06, "loss": 0.7598, "step": 7551 }, { "epoch": 0.3881179977387193, "grad_norm": 1.1087310314178467, "learning_rate": 6.997480254615423e-06, "loss": 0.6769, "step": 7552 }, { "epoch": 0.38816939048206395, "grad_norm": 1.0431005954742432, "learning_rate": 6.996717270585833e-06, "loss": 0.719, "step": 7553 }, { "epoch": 0.3882207832254086, "grad_norm": 0.7828953862190247, "learning_rate": 6.99595423123545e-06, "loss": 0.7011, "step": 7554 }, { "epoch": 0.3882721759687532, "grad_norm": 0.9954776763916016, "learning_rate": 6.9951911365854094e-06, "loss": 0.7286, "step": 7555 }, { "epoch": 0.38832356871209783, "grad_norm": 1.1003565788269043, "learning_rate": 6.994427986656858e-06, "loss": 0.781, "step": 7556 }, { "epoch": 0.3883749614554425, "grad_norm": 1.1294127702713013, "learning_rate": 6.9936647814709364e-06, "loss": 0.7875, "step": 7557 }, { "epoch": 0.3884263541987871, "grad_norm": 0.761332094669342, "learning_rate": 6.992901521048792e-06, "loss": 0.6734, "step": 7558 }, { "epoch": 0.3884777469421318, "grad_norm": 1.077763557434082, "learning_rate": 6.992138205411569e-06, "loss": 0.7434, "step": 7559 }, { "epoch": 0.3885291396854764, "grad_norm": 1.0792584419250488, "learning_rate": 6.99137483458042e-06, "loss": 0.781, "step": 7560 }, { "epoch": 0.38858053242882107, "grad_norm": 0.6737803220748901, "learning_rate": 6.99061140857649e-06, "loss": 0.708, "step": 7561 }, { "epoch": 0.3886319251721657, "grad_norm": 1.057395577430725, "learning_rate": 6.989847927420932e-06, "loss": 0.7369, "step": 7562 }, { "epoch": 0.3886833179155103, "grad_norm": 1.1328836679458618, "learning_rate": 6.9890843911349005e-06, "loss": 0.755, "step": 7563 }, { "epoch": 0.38873471065885495, "grad_norm": 0.6859034299850464, "learning_rate": 6.98832079973955e-06, "loss": 0.6742, "step": 7564 }, { "epoch": 0.3887861034021996, "grad_norm": 1.0645487308502197, "learning_rate": 6.987557153256035e-06, "loss": 0.7565, "step": 7565 }, { "epoch": 0.38883749614554425, "grad_norm": 1.1425814628601074, "learning_rate": 6.986793451705512e-06, "loss": 0.7193, "step": 7566 }, { "epoch": 0.3888888888888889, "grad_norm": 1.0597894191741943, "learning_rate": 6.986029695109145e-06, "loss": 0.693, "step": 7567 }, { "epoch": 0.38894028163223354, "grad_norm": 1.1472972631454468, "learning_rate": 6.985265883488088e-06, "loss": 0.7453, "step": 7568 }, { "epoch": 0.3889916743755782, "grad_norm": 1.0479751825332642, "learning_rate": 6.9845020168635055e-06, "loss": 0.7793, "step": 7569 }, { "epoch": 0.3890430671189228, "grad_norm": 1.183117151260376, "learning_rate": 6.983738095256565e-06, "loss": 0.7869, "step": 7570 }, { "epoch": 0.3890944598622674, "grad_norm": 1.0616142749786377, "learning_rate": 6.9829741186884246e-06, "loss": 0.7289, "step": 7571 }, { "epoch": 0.3891458526056121, "grad_norm": 1.0492910146713257, "learning_rate": 6.982210087180257e-06, "loss": 0.7374, "step": 7572 }, { "epoch": 0.3891972453489567, "grad_norm": 1.0634167194366455, "learning_rate": 6.981446000753227e-06, "loss": 0.7418, "step": 7573 }, { "epoch": 0.38924863809230137, "grad_norm": 1.0403907299041748, "learning_rate": 6.980681859428504e-06, "loss": 0.7377, "step": 7574 }, { "epoch": 0.389300030835646, "grad_norm": 1.0965745449066162, "learning_rate": 6.979917663227262e-06, "loss": 0.789, "step": 7575 }, { "epoch": 0.38935142357899066, "grad_norm": 1.0559697151184082, "learning_rate": 6.979153412170673e-06, "loss": 0.7025, "step": 7576 }, { "epoch": 0.3894028163223353, "grad_norm": 1.099478006362915, "learning_rate": 6.9783891062799106e-06, "loss": 0.7328, "step": 7577 }, { "epoch": 0.3894542090656799, "grad_norm": 1.1058732271194458, "learning_rate": 6.9776247455761495e-06, "loss": 0.7617, "step": 7578 }, { "epoch": 0.38950560180902455, "grad_norm": 1.0469423532485962, "learning_rate": 6.976860330080567e-06, "loss": 0.7848, "step": 7579 }, { "epoch": 0.3895569945523692, "grad_norm": 1.1475212574005127, "learning_rate": 6.976095859814344e-06, "loss": 0.7434, "step": 7580 }, { "epoch": 0.38960838729571384, "grad_norm": 1.081917643547058, "learning_rate": 6.975331334798659e-06, "loss": 0.7943, "step": 7581 }, { "epoch": 0.3896597800390585, "grad_norm": 0.9631706476211548, "learning_rate": 6.9745667550546945e-06, "loss": 0.7106, "step": 7582 }, { "epoch": 0.38971117278240314, "grad_norm": 1.1239150762557983, "learning_rate": 6.973802120603633e-06, "loss": 0.7662, "step": 7583 }, { "epoch": 0.3897625655257478, "grad_norm": 1.059920072555542, "learning_rate": 6.973037431466661e-06, "loss": 0.7551, "step": 7584 }, { "epoch": 0.38981395826909243, "grad_norm": 1.0924482345581055, "learning_rate": 6.972272687664964e-06, "loss": 0.7737, "step": 7585 }, { "epoch": 0.389865351012437, "grad_norm": 1.040684461593628, "learning_rate": 6.971507889219731e-06, "loss": 0.7763, "step": 7586 }, { "epoch": 0.38991674375578167, "grad_norm": 1.0347356796264648, "learning_rate": 6.970743036152148e-06, "loss": 0.687, "step": 7587 }, { "epoch": 0.3899681364991263, "grad_norm": 1.0466582775115967, "learning_rate": 6.96997812848341e-06, "loss": 0.7234, "step": 7588 }, { "epoch": 0.39001952924247096, "grad_norm": 1.0861093997955322, "learning_rate": 6.9692131662347076e-06, "loss": 0.7849, "step": 7589 }, { "epoch": 0.3900709219858156, "grad_norm": 0.6770915389060974, "learning_rate": 6.968448149427233e-06, "loss": 0.6508, "step": 7590 }, { "epoch": 0.39012231472916026, "grad_norm": 1.0380191802978516, "learning_rate": 6.967683078082186e-06, "loss": 0.6917, "step": 7591 }, { "epoch": 0.3901737074725049, "grad_norm": 1.0087499618530273, "learning_rate": 6.96691795222076e-06, "loss": 0.7507, "step": 7592 }, { "epoch": 0.3902251002158495, "grad_norm": 1.125178575515747, "learning_rate": 6.966152771864153e-06, "loss": 0.7583, "step": 7593 }, { "epoch": 0.39027649295919414, "grad_norm": 1.0458521842956543, "learning_rate": 6.965387537033569e-06, "loss": 0.7864, "step": 7594 }, { "epoch": 0.3903278857025388, "grad_norm": 1.1327494382858276, "learning_rate": 6.964622247750205e-06, "loss": 0.7676, "step": 7595 }, { "epoch": 0.39037927844588344, "grad_norm": 0.8194754123687744, "learning_rate": 6.963856904035268e-06, "loss": 0.6339, "step": 7596 }, { "epoch": 0.3904306711892281, "grad_norm": 1.1856040954589844, "learning_rate": 6.96309150590996e-06, "loss": 0.8211, "step": 7597 }, { "epoch": 0.39048206393257273, "grad_norm": 1.0627042055130005, "learning_rate": 6.962326053395486e-06, "loss": 0.715, "step": 7598 }, { "epoch": 0.3905334566759174, "grad_norm": 1.0718475580215454, "learning_rate": 6.961560546513057e-06, "loss": 0.7503, "step": 7599 }, { "epoch": 0.390584849419262, "grad_norm": 0.7488054633140564, "learning_rate": 6.960794985283878e-06, "loss": 0.6311, "step": 7600 }, { "epoch": 0.3906362421626066, "grad_norm": 1.1386374235153198, "learning_rate": 6.9600293697291645e-06, "loss": 0.7533, "step": 7601 }, { "epoch": 0.39068763490595126, "grad_norm": 0.8333669900894165, "learning_rate": 6.959263699870125e-06, "loss": 0.7075, "step": 7602 }, { "epoch": 0.3907390276492959, "grad_norm": 1.0365575551986694, "learning_rate": 6.958497975727973e-06, "loss": 0.715, "step": 7603 }, { "epoch": 0.39079042039264056, "grad_norm": 0.7197472453117371, "learning_rate": 6.957732197323923e-06, "loss": 0.7103, "step": 7604 }, { "epoch": 0.3908418131359852, "grad_norm": 1.1052440404891968, "learning_rate": 6.956966364679195e-06, "loss": 0.7589, "step": 7605 }, { "epoch": 0.39089320587932985, "grad_norm": 0.7330883145332336, "learning_rate": 6.956200477815004e-06, "loss": 0.6794, "step": 7606 }, { "epoch": 0.3909445986226745, "grad_norm": 1.1211355924606323, "learning_rate": 6.955434536752571e-06, "loss": 0.8298, "step": 7607 }, { "epoch": 0.3909959913660191, "grad_norm": 1.0111035108566284, "learning_rate": 6.954668541513115e-06, "loss": 0.6908, "step": 7608 }, { "epoch": 0.39104738410936374, "grad_norm": 1.1451387405395508, "learning_rate": 6.9539024921178635e-06, "loss": 0.7756, "step": 7609 }, { "epoch": 0.3910987768527084, "grad_norm": 1.0160584449768066, "learning_rate": 6.9531363885880355e-06, "loss": 0.7218, "step": 7610 }, { "epoch": 0.39115016959605303, "grad_norm": 1.0737526416778564, "learning_rate": 6.952370230944857e-06, "loss": 0.7762, "step": 7611 }, { "epoch": 0.3912015623393977, "grad_norm": 1.0658982992172241, "learning_rate": 6.951604019209558e-06, "loss": 0.7784, "step": 7612 }, { "epoch": 0.3912529550827423, "grad_norm": 1.0661336183547974, "learning_rate": 6.950837753403364e-06, "loss": 0.7816, "step": 7613 }, { "epoch": 0.391304347826087, "grad_norm": 1.098449468612671, "learning_rate": 6.9500714335475075e-06, "loss": 0.7588, "step": 7614 }, { "epoch": 0.3913557405694316, "grad_norm": 1.124150037765503, "learning_rate": 6.949305059663219e-06, "loss": 0.7906, "step": 7615 }, { "epoch": 0.3914071333127762, "grad_norm": 1.110224962234497, "learning_rate": 6.9485386317717306e-06, "loss": 0.8329, "step": 7616 }, { "epoch": 0.39145852605612086, "grad_norm": 0.7523649334907532, "learning_rate": 6.947772149894278e-06, "loss": 0.6988, "step": 7617 }, { "epoch": 0.3915099187994655, "grad_norm": 1.074568271636963, "learning_rate": 6.947005614052099e-06, "loss": 0.7068, "step": 7618 }, { "epoch": 0.39156131154281015, "grad_norm": 1.1233516931533813, "learning_rate": 6.946239024266428e-06, "loss": 0.7351, "step": 7619 }, { "epoch": 0.3916127042861548, "grad_norm": 1.0954830646514893, "learning_rate": 6.945472380558506e-06, "loss": 0.7486, "step": 7620 }, { "epoch": 0.39166409702949945, "grad_norm": 1.0953837633132935, "learning_rate": 6.944705682949573e-06, "loss": 0.7028, "step": 7621 }, { "epoch": 0.3917154897728441, "grad_norm": 1.115058183670044, "learning_rate": 6.943938931460869e-06, "loss": 0.7717, "step": 7622 }, { "epoch": 0.39176688251618874, "grad_norm": 1.0459257364273071, "learning_rate": 6.943172126113639e-06, "loss": 0.7146, "step": 7623 }, { "epoch": 0.39181827525953333, "grad_norm": 1.108222484588623, "learning_rate": 6.942405266929131e-06, "loss": 0.7465, "step": 7624 }, { "epoch": 0.391869668002878, "grad_norm": 1.0019032955169678, "learning_rate": 6.941638353928587e-06, "loss": 0.7182, "step": 7625 }, { "epoch": 0.3919210607462226, "grad_norm": 1.0332640409469604, "learning_rate": 6.940871387133259e-06, "loss": 0.7265, "step": 7626 }, { "epoch": 0.3919724534895673, "grad_norm": 1.0989500284194946, "learning_rate": 6.940104366564392e-06, "loss": 0.7557, "step": 7627 }, { "epoch": 0.3920238462329119, "grad_norm": 1.0905793905258179, "learning_rate": 6.93933729224324e-06, "loss": 0.7729, "step": 7628 }, { "epoch": 0.39207523897625657, "grad_norm": 1.03756844997406, "learning_rate": 6.9385701641910565e-06, "loss": 0.7939, "step": 7629 }, { "epoch": 0.3921266317196012, "grad_norm": 1.0292121171951294, "learning_rate": 6.9378029824290915e-06, "loss": 0.7701, "step": 7630 }, { "epoch": 0.3921780244629458, "grad_norm": 1.1175249814987183, "learning_rate": 6.937035746978604e-06, "loss": 0.7136, "step": 7631 }, { "epoch": 0.39222941720629045, "grad_norm": 1.1299725770950317, "learning_rate": 6.936268457860848e-06, "loss": 0.7764, "step": 7632 }, { "epoch": 0.3922808099496351, "grad_norm": 1.0661554336547852, "learning_rate": 6.935501115097085e-06, "loss": 0.7235, "step": 7633 }, { "epoch": 0.39233220269297975, "grad_norm": 1.0558375120162964, "learning_rate": 6.934733718708573e-06, "loss": 0.7734, "step": 7634 }, { "epoch": 0.3923835954363244, "grad_norm": 1.0801435708999634, "learning_rate": 6.933966268716575e-06, "loss": 0.8107, "step": 7635 }, { "epoch": 0.39243498817966904, "grad_norm": 0.7441138029098511, "learning_rate": 6.93319876514235e-06, "loss": 0.6853, "step": 7636 }, { "epoch": 0.3924863809230137, "grad_norm": 1.0828814506530762, "learning_rate": 6.932431208007168e-06, "loss": 0.7694, "step": 7637 }, { "epoch": 0.39253777366635834, "grad_norm": 1.090512752532959, "learning_rate": 6.93166359733229e-06, "loss": 0.7703, "step": 7638 }, { "epoch": 0.3925891664097029, "grad_norm": 1.0441005229949951, "learning_rate": 6.930895933138986e-06, "loss": 0.7815, "step": 7639 }, { "epoch": 0.3926405591530476, "grad_norm": 1.1414127349853516, "learning_rate": 6.930128215448524e-06, "loss": 0.7484, "step": 7640 }, { "epoch": 0.3926919518963922, "grad_norm": 0.8270021080970764, "learning_rate": 6.929360444282174e-06, "loss": 0.6734, "step": 7641 }, { "epoch": 0.39274334463973687, "grad_norm": 1.1540815830230713, "learning_rate": 6.92859261966121e-06, "loss": 0.8229, "step": 7642 }, { "epoch": 0.3927947373830815, "grad_norm": 1.0113199949264526, "learning_rate": 6.9278247416069e-06, "loss": 0.754, "step": 7643 }, { "epoch": 0.39284613012642616, "grad_norm": 0.7301357388496399, "learning_rate": 6.9270568101405236e-06, "loss": 0.6849, "step": 7644 }, { "epoch": 0.3928975228697708, "grad_norm": 1.0406712293624878, "learning_rate": 6.926288825283356e-06, "loss": 0.7165, "step": 7645 }, { "epoch": 0.3929489156131154, "grad_norm": 0.794864296913147, "learning_rate": 6.925520787056673e-06, "loss": 0.6918, "step": 7646 }, { "epoch": 0.39300030835646005, "grad_norm": 1.0652917623519897, "learning_rate": 6.924752695481757e-06, "loss": 0.819, "step": 7647 }, { "epoch": 0.3930517010998047, "grad_norm": 0.7337948083877563, "learning_rate": 6.923984550579885e-06, "loss": 0.6858, "step": 7648 }, { "epoch": 0.39310309384314934, "grad_norm": 1.056088924407959, "learning_rate": 6.923216352372343e-06, "loss": 0.77, "step": 7649 }, { "epoch": 0.393154486586494, "grad_norm": 1.0483630895614624, "learning_rate": 6.9224481008804115e-06, "loss": 0.756, "step": 7650 }, { "epoch": 0.39320587932983864, "grad_norm": 1.0716615915298462, "learning_rate": 6.921679796125375e-06, "loss": 0.7688, "step": 7651 }, { "epoch": 0.3932572720731833, "grad_norm": 1.0684136152267456, "learning_rate": 6.920911438128524e-06, "loss": 0.7307, "step": 7652 }, { "epoch": 0.39330866481652793, "grad_norm": 1.0338706970214844, "learning_rate": 6.920143026911143e-06, "loss": 0.734, "step": 7653 }, { "epoch": 0.3933600575598725, "grad_norm": 1.034462809562683, "learning_rate": 6.919374562494523e-06, "loss": 0.7517, "step": 7654 }, { "epoch": 0.39341145030321717, "grad_norm": 1.1041597127914429, "learning_rate": 6.918606044899955e-06, "loss": 0.698, "step": 7655 }, { "epoch": 0.3934628430465618, "grad_norm": 1.0727167129516602, "learning_rate": 6.91783747414873e-06, "loss": 0.7886, "step": 7656 }, { "epoch": 0.39351423578990646, "grad_norm": 0.7407634258270264, "learning_rate": 6.9170688502621445e-06, "loss": 0.6663, "step": 7657 }, { "epoch": 0.3935656285332511, "grad_norm": 1.0173885822296143, "learning_rate": 6.9163001732614925e-06, "loss": 0.7207, "step": 7658 }, { "epoch": 0.39361702127659576, "grad_norm": 1.2487030029296875, "learning_rate": 6.915531443168069e-06, "loss": 0.7171, "step": 7659 }, { "epoch": 0.3936684140199404, "grad_norm": 1.0532139539718628, "learning_rate": 6.9147626600031754e-06, "loss": 0.807, "step": 7660 }, { "epoch": 0.39371980676328505, "grad_norm": 1.1360340118408203, "learning_rate": 6.91399382378811e-06, "loss": 0.8117, "step": 7661 }, { "epoch": 0.39377119950662964, "grad_norm": 1.0676496028900146, "learning_rate": 6.913224934544174e-06, "loss": 0.7427, "step": 7662 }, { "epoch": 0.3938225922499743, "grad_norm": 1.0968384742736816, "learning_rate": 6.912455992292672e-06, "loss": 0.8227, "step": 7663 }, { "epoch": 0.39387398499331894, "grad_norm": 1.1550003290176392, "learning_rate": 6.911686997054905e-06, "loss": 0.6706, "step": 7664 }, { "epoch": 0.3939253777366636, "grad_norm": 1.024246096611023, "learning_rate": 6.910917948852181e-06, "loss": 0.7762, "step": 7665 }, { "epoch": 0.39397677048000823, "grad_norm": 1.1120320558547974, "learning_rate": 6.910148847705805e-06, "loss": 0.7869, "step": 7666 }, { "epoch": 0.3940281632233529, "grad_norm": 0.6909576058387756, "learning_rate": 6.909379693637088e-06, "loss": 0.6489, "step": 7667 }, { "epoch": 0.3940795559666975, "grad_norm": 1.1059415340423584, "learning_rate": 6.908610486667341e-06, "loss": 0.7642, "step": 7668 }, { "epoch": 0.3941309487100421, "grad_norm": 1.0094114542007446, "learning_rate": 6.9078412268178726e-06, "loss": 0.7487, "step": 7669 }, { "epoch": 0.39418234145338676, "grad_norm": 1.045431137084961, "learning_rate": 6.907071914109997e-06, "loss": 0.7167, "step": 7670 }, { "epoch": 0.3942337341967314, "grad_norm": 1.041668176651001, "learning_rate": 6.906302548565029e-06, "loss": 0.7292, "step": 7671 }, { "epoch": 0.39428512694007606, "grad_norm": 1.0912792682647705, "learning_rate": 6.905533130204284e-06, "loss": 0.7102, "step": 7672 }, { "epoch": 0.3943365196834207, "grad_norm": 1.0472631454467773, "learning_rate": 6.9047636590490796e-06, "loss": 0.7358, "step": 7673 }, { "epoch": 0.39438791242676535, "grad_norm": 1.0955181121826172, "learning_rate": 6.903994135120734e-06, "loss": 0.7637, "step": 7674 }, { "epoch": 0.39443930517011, "grad_norm": 1.124322533607483, "learning_rate": 6.903224558440569e-06, "loss": 0.7679, "step": 7675 }, { "epoch": 0.39449069791345465, "grad_norm": 0.7169439792633057, "learning_rate": 6.902454929029904e-06, "loss": 0.6724, "step": 7676 }, { "epoch": 0.39454209065679924, "grad_norm": 1.1768615245819092, "learning_rate": 6.901685246910067e-06, "loss": 0.7041, "step": 7677 }, { "epoch": 0.3945934834001439, "grad_norm": 1.0974135398864746, "learning_rate": 6.900915512102377e-06, "loss": 0.7171, "step": 7678 }, { "epoch": 0.39464487614348853, "grad_norm": 0.7104974985122681, "learning_rate": 6.900145724628164e-06, "loss": 0.6841, "step": 7679 }, { "epoch": 0.3946962688868332, "grad_norm": 1.089743733406067, "learning_rate": 6.899375884508755e-06, "loss": 0.7396, "step": 7680 }, { "epoch": 0.3947476616301778, "grad_norm": 1.0988154411315918, "learning_rate": 6.898605991765478e-06, "loss": 0.7127, "step": 7681 }, { "epoch": 0.3947990543735225, "grad_norm": 1.0543286800384521, "learning_rate": 6.897836046419663e-06, "loss": 0.7126, "step": 7682 }, { "epoch": 0.3948504471168671, "grad_norm": 1.016357660293579, "learning_rate": 6.897066048492645e-06, "loss": 0.7788, "step": 7683 }, { "epoch": 0.3949018398602117, "grad_norm": 1.095605492591858, "learning_rate": 6.8962959980057535e-06, "loss": 0.7163, "step": 7684 }, { "epoch": 0.39495323260355636, "grad_norm": 1.0979453325271606, "learning_rate": 6.895525894980326e-06, "loss": 0.7476, "step": 7685 }, { "epoch": 0.395004625346901, "grad_norm": 0.7965176105499268, "learning_rate": 6.894755739437698e-06, "loss": 0.7273, "step": 7686 }, { "epoch": 0.39505601809024565, "grad_norm": 1.0486069917678833, "learning_rate": 6.893985531399209e-06, "loss": 0.7813, "step": 7687 }, { "epoch": 0.3951074108335903, "grad_norm": 1.0257296562194824, "learning_rate": 6.8932152708861956e-06, "loss": 0.7636, "step": 7688 }, { "epoch": 0.39515880357693495, "grad_norm": 1.0557836294174194, "learning_rate": 6.89244495792e-06, "loss": 0.7587, "step": 7689 }, { "epoch": 0.3952101963202796, "grad_norm": 1.0282378196716309, "learning_rate": 6.891674592521966e-06, "loss": 0.7963, "step": 7690 }, { "epoch": 0.39526158906362424, "grad_norm": 1.1251766681671143, "learning_rate": 6.8909041747134335e-06, "loss": 0.8144, "step": 7691 }, { "epoch": 0.39531298180696883, "grad_norm": 1.0695043802261353, "learning_rate": 6.890133704515751e-06, "loss": 0.8112, "step": 7692 }, { "epoch": 0.3953643745503135, "grad_norm": 0.7831131815910339, "learning_rate": 6.889363181950262e-06, "loss": 0.6809, "step": 7693 }, { "epoch": 0.3954157672936581, "grad_norm": 1.0838066339492798, "learning_rate": 6.888592607038318e-06, "loss": 0.764, "step": 7694 }, { "epoch": 0.3954671600370028, "grad_norm": 1.046748399734497, "learning_rate": 6.887821979801266e-06, "loss": 0.7771, "step": 7695 }, { "epoch": 0.3955185527803474, "grad_norm": 0.9859500527381897, "learning_rate": 6.887051300260456e-06, "loss": 0.7089, "step": 7696 }, { "epoch": 0.39556994552369207, "grad_norm": 0.7578884363174438, "learning_rate": 6.886280568437244e-06, "loss": 0.6525, "step": 7697 }, { "epoch": 0.3956213382670367, "grad_norm": 1.0375925302505493, "learning_rate": 6.885509784352982e-06, "loss": 0.7356, "step": 7698 }, { "epoch": 0.3956727310103813, "grad_norm": 1.0571956634521484, "learning_rate": 6.8847389480290225e-06, "loss": 0.751, "step": 7699 }, { "epoch": 0.39572412375372595, "grad_norm": 1.036126971244812, "learning_rate": 6.883968059486726e-06, "loss": 0.7356, "step": 7700 }, { "epoch": 0.3957755164970706, "grad_norm": 0.7724809050559998, "learning_rate": 6.8831971187474485e-06, "loss": 0.6437, "step": 7701 }, { "epoch": 0.39582690924041525, "grad_norm": 1.061787724494934, "learning_rate": 6.882426125832552e-06, "loss": 0.7447, "step": 7702 }, { "epoch": 0.3958783019837599, "grad_norm": 0.9916962385177612, "learning_rate": 6.8816550807633954e-06, "loss": 0.7157, "step": 7703 }, { "epoch": 0.39592969472710454, "grad_norm": 1.0794934034347534, "learning_rate": 6.88088398356134e-06, "loss": 0.7412, "step": 7704 }, { "epoch": 0.3959810874704492, "grad_norm": 1.0943633317947388, "learning_rate": 6.880112834247754e-06, "loss": 0.7258, "step": 7705 }, { "epoch": 0.39603248021379384, "grad_norm": 1.0183488130569458, "learning_rate": 6.879341632843998e-06, "loss": 0.7245, "step": 7706 }, { "epoch": 0.3960838729571384, "grad_norm": 0.7264434695243835, "learning_rate": 6.878570379371441e-06, "loss": 0.68, "step": 7707 }, { "epoch": 0.3961352657004831, "grad_norm": 1.1244704723358154, "learning_rate": 6.877799073851452e-06, "loss": 0.8166, "step": 7708 }, { "epoch": 0.3961866584438277, "grad_norm": 0.737281084060669, "learning_rate": 6.877027716305399e-06, "loss": 0.7017, "step": 7709 }, { "epoch": 0.39623805118717237, "grad_norm": 1.0160399675369263, "learning_rate": 6.876256306754655e-06, "loss": 0.7876, "step": 7710 }, { "epoch": 0.396289443930517, "grad_norm": 1.013521432876587, "learning_rate": 6.875484845220592e-06, "loss": 0.7181, "step": 7711 }, { "epoch": 0.39634083667386166, "grad_norm": 0.7500552535057068, "learning_rate": 6.874713331724581e-06, "loss": 0.6982, "step": 7712 }, { "epoch": 0.3963922294172063, "grad_norm": 1.0660873651504517, "learning_rate": 6.8739417662880035e-06, "loss": 0.7494, "step": 7713 }, { "epoch": 0.39644362216055096, "grad_norm": 0.8136256337165833, "learning_rate": 6.873170148932231e-06, "loss": 0.6971, "step": 7714 }, { "epoch": 0.39649501490389555, "grad_norm": 0.8580878376960754, "learning_rate": 6.872398479678645e-06, "loss": 0.7295, "step": 7715 }, { "epoch": 0.3965464076472402, "grad_norm": 1.1223771572113037, "learning_rate": 6.871626758548622e-06, "loss": 0.7252, "step": 7716 }, { "epoch": 0.39659780039058484, "grad_norm": 1.0317038297653198, "learning_rate": 6.8708549855635465e-06, "loss": 0.7128, "step": 7717 }, { "epoch": 0.3966491931339295, "grad_norm": 1.0364794731140137, "learning_rate": 6.870083160744801e-06, "loss": 0.7446, "step": 7718 }, { "epoch": 0.39670058587727414, "grad_norm": 1.0909066200256348, "learning_rate": 6.869311284113767e-06, "loss": 0.7308, "step": 7719 }, { "epoch": 0.3967519786206188, "grad_norm": 1.0916565656661987, "learning_rate": 6.868539355691831e-06, "loss": 0.7594, "step": 7720 }, { "epoch": 0.39680337136396343, "grad_norm": 1.0309057235717773, "learning_rate": 6.867767375500382e-06, "loss": 0.8361, "step": 7721 }, { "epoch": 0.396854764107308, "grad_norm": 0.7288246750831604, "learning_rate": 6.866995343560807e-06, "loss": 0.6119, "step": 7722 }, { "epoch": 0.39690615685065267, "grad_norm": 0.7469972372055054, "learning_rate": 6.866223259894493e-06, "loss": 0.6512, "step": 7723 }, { "epoch": 0.3969575495939973, "grad_norm": 1.290073037147522, "learning_rate": 6.865451124522837e-06, "loss": 0.7332, "step": 7724 }, { "epoch": 0.39700894233734196, "grad_norm": 1.102309226989746, "learning_rate": 6.8646789374672274e-06, "loss": 0.7573, "step": 7725 }, { "epoch": 0.3970603350806866, "grad_norm": 1.0716608762741089, "learning_rate": 6.86390669874906e-06, "loss": 0.7479, "step": 7726 }, { "epoch": 0.39711172782403126, "grad_norm": 1.1664611101150513, "learning_rate": 6.863134408389729e-06, "loss": 0.7289, "step": 7727 }, { "epoch": 0.3971631205673759, "grad_norm": 1.1512418985366821, "learning_rate": 6.862362066410631e-06, "loss": 0.7256, "step": 7728 }, { "epoch": 0.39721451331072055, "grad_norm": 0.7525127530097961, "learning_rate": 6.861589672833169e-06, "loss": 0.7066, "step": 7729 }, { "epoch": 0.39726590605406514, "grad_norm": 1.0336024761199951, "learning_rate": 6.860817227678738e-06, "loss": 0.7325, "step": 7730 }, { "epoch": 0.3973172987974098, "grad_norm": 1.0607967376708984, "learning_rate": 6.860044730968739e-06, "loss": 0.7272, "step": 7731 }, { "epoch": 0.39736869154075444, "grad_norm": 0.7145070433616638, "learning_rate": 6.859272182724579e-06, "loss": 0.6964, "step": 7732 }, { "epoch": 0.3974200842840991, "grad_norm": 0.6816957592964172, "learning_rate": 6.858499582967658e-06, "loss": 0.6834, "step": 7733 }, { "epoch": 0.39747147702744373, "grad_norm": 1.0652270317077637, "learning_rate": 6.857726931719384e-06, "loss": 0.6982, "step": 7734 }, { "epoch": 0.3975228697707884, "grad_norm": 1.0576021671295166, "learning_rate": 6.8569542290011634e-06, "loss": 0.7456, "step": 7735 }, { "epoch": 0.397574262514133, "grad_norm": 1.0707768201828003, "learning_rate": 6.856181474834402e-06, "loss": 0.7817, "step": 7736 }, { "epoch": 0.3976256552574776, "grad_norm": 1.0332192182540894, "learning_rate": 6.855408669240514e-06, "loss": 0.7451, "step": 7737 }, { "epoch": 0.39767704800082226, "grad_norm": 1.0896917581558228, "learning_rate": 6.854635812240908e-06, "loss": 0.7264, "step": 7738 }, { "epoch": 0.3977284407441669, "grad_norm": 1.10227632522583, "learning_rate": 6.853862903856998e-06, "loss": 0.7768, "step": 7739 }, { "epoch": 0.39777983348751156, "grad_norm": 1.1464961767196655, "learning_rate": 6.853089944110197e-06, "loss": 0.7836, "step": 7740 }, { "epoch": 0.3978312262308562, "grad_norm": 1.1983598470687866, "learning_rate": 6.852316933021921e-06, "loss": 0.6845, "step": 7741 }, { "epoch": 0.39788261897420085, "grad_norm": 1.0834624767303467, "learning_rate": 6.851543870613586e-06, "loss": 0.7332, "step": 7742 }, { "epoch": 0.3979340117175455, "grad_norm": 0.7786639928817749, "learning_rate": 6.850770756906613e-06, "loss": 0.7053, "step": 7743 }, { "epoch": 0.39798540446089015, "grad_norm": 0.7227832078933716, "learning_rate": 6.849997591922418e-06, "loss": 0.7294, "step": 7744 }, { "epoch": 0.39803679720423474, "grad_norm": 1.0818644762039185, "learning_rate": 6.849224375682426e-06, "loss": 0.714, "step": 7745 }, { "epoch": 0.3980881899475794, "grad_norm": 0.94719398021698, "learning_rate": 6.8484511082080565e-06, "loss": 0.6937, "step": 7746 }, { "epoch": 0.39813958269092403, "grad_norm": 1.0978195667266846, "learning_rate": 6.847677789520735e-06, "loss": 0.7772, "step": 7747 }, { "epoch": 0.3981909754342687, "grad_norm": 1.032971739768982, "learning_rate": 6.846904419641886e-06, "loss": 0.8105, "step": 7748 }, { "epoch": 0.3982423681776133, "grad_norm": 1.0822910070419312, "learning_rate": 6.8461309985929396e-06, "loss": 0.7737, "step": 7749 }, { "epoch": 0.398293760920958, "grad_norm": 1.0729904174804688, "learning_rate": 6.845357526395321e-06, "loss": 0.7449, "step": 7750 }, { "epoch": 0.3983451536643026, "grad_norm": 1.0648449659347534, "learning_rate": 6.844584003070461e-06, "loss": 0.7979, "step": 7751 }, { "epoch": 0.39839654640764727, "grad_norm": 0.8209916353225708, "learning_rate": 6.843810428639789e-06, "loss": 0.6533, "step": 7752 }, { "epoch": 0.39844793915099186, "grad_norm": 1.030351996421814, "learning_rate": 6.843036803124739e-06, "loss": 0.7656, "step": 7753 }, { "epoch": 0.3984993318943365, "grad_norm": 0.8125653862953186, "learning_rate": 6.842263126546746e-06, "loss": 0.6533, "step": 7754 }, { "epoch": 0.39855072463768115, "grad_norm": 1.1078312397003174, "learning_rate": 6.841489398927245e-06, "loss": 0.8019, "step": 7755 }, { "epoch": 0.3986021173810258, "grad_norm": 1.1109648942947388, "learning_rate": 6.840715620287671e-06, "loss": 0.6381, "step": 7756 }, { "epoch": 0.39865351012437045, "grad_norm": 1.1202822923660278, "learning_rate": 6.839941790649462e-06, "loss": 0.8132, "step": 7757 }, { "epoch": 0.3987049028677151, "grad_norm": 1.110429286956787, "learning_rate": 6.839167910034061e-06, "loss": 0.7751, "step": 7758 }, { "epoch": 0.39875629561105974, "grad_norm": 0.9082592725753784, "learning_rate": 6.838393978462907e-06, "loss": 0.6657, "step": 7759 }, { "epoch": 0.39880768835440433, "grad_norm": 1.1499602794647217, "learning_rate": 6.8376199959574405e-06, "loss": 0.7739, "step": 7760 }, { "epoch": 0.398859081097749, "grad_norm": 1.0758951902389526, "learning_rate": 6.836845962539109e-06, "loss": 0.7114, "step": 7761 }, { "epoch": 0.3989104738410936, "grad_norm": 1.0509676933288574, "learning_rate": 6.8360718782293565e-06, "loss": 0.7221, "step": 7762 }, { "epoch": 0.3989618665844383, "grad_norm": 1.0686817169189453, "learning_rate": 6.835297743049628e-06, "loss": 0.7502, "step": 7763 }, { "epoch": 0.3990132593277829, "grad_norm": 0.763863742351532, "learning_rate": 6.834523557021374e-06, "loss": 0.7015, "step": 7764 }, { "epoch": 0.39906465207112757, "grad_norm": 1.0583761930465698, "learning_rate": 6.833749320166042e-06, "loss": 0.7603, "step": 7765 }, { "epoch": 0.3991160448144722, "grad_norm": 1.079943060874939, "learning_rate": 6.832975032505085e-06, "loss": 0.7326, "step": 7766 }, { "epoch": 0.39916743755781686, "grad_norm": 1.066389560699463, "learning_rate": 6.832200694059953e-06, "loss": 0.7635, "step": 7767 }, { "epoch": 0.39921883030116145, "grad_norm": 1.0397090911865234, "learning_rate": 6.8314263048521e-06, "loss": 0.7527, "step": 7768 }, { "epoch": 0.3992702230445061, "grad_norm": 0.812747061252594, "learning_rate": 6.830651864902984e-06, "loss": 0.6601, "step": 7769 }, { "epoch": 0.39932161578785075, "grad_norm": 1.8816694021224976, "learning_rate": 6.829877374234059e-06, "loss": 0.6956, "step": 7770 }, { "epoch": 0.3993730085311954, "grad_norm": 1.0488512516021729, "learning_rate": 6.829102832866782e-06, "loss": 0.7548, "step": 7771 }, { "epoch": 0.39942440127454004, "grad_norm": 1.0703542232513428, "learning_rate": 6.828328240822615e-06, "loss": 0.7687, "step": 7772 }, { "epoch": 0.3994757940178847, "grad_norm": 1.0814608335494995, "learning_rate": 6.8275535981230155e-06, "loss": 0.7274, "step": 7773 }, { "epoch": 0.39952718676122934, "grad_norm": 1.0757063627243042, "learning_rate": 6.826778904789451e-06, "loss": 0.7349, "step": 7774 }, { "epoch": 0.3995785795045739, "grad_norm": 0.7019311785697937, "learning_rate": 6.82600416084338e-06, "loss": 0.6561, "step": 7775 }, { "epoch": 0.3996299722479186, "grad_norm": 1.0670479536056519, "learning_rate": 6.825229366306269e-06, "loss": 0.7322, "step": 7776 }, { "epoch": 0.3996813649912632, "grad_norm": 1.0538848638534546, "learning_rate": 6.824454521199585e-06, "loss": 0.7808, "step": 7777 }, { "epoch": 0.39973275773460787, "grad_norm": 0.6868626475334167, "learning_rate": 6.823679625544794e-06, "loss": 0.7096, "step": 7778 }, { "epoch": 0.3997841504779525, "grad_norm": 0.9946128129959106, "learning_rate": 6.822904679363367e-06, "loss": 0.6895, "step": 7779 }, { "epoch": 0.39983554322129716, "grad_norm": 1.0078725814819336, "learning_rate": 6.822129682676774e-06, "loss": 0.7259, "step": 7780 }, { "epoch": 0.3998869359646418, "grad_norm": 1.1129213571548462, "learning_rate": 6.821354635506489e-06, "loss": 0.6952, "step": 7781 }, { "epoch": 0.39993832870798646, "grad_norm": 0.9731841683387756, "learning_rate": 6.8205795378739815e-06, "loss": 0.6606, "step": 7782 }, { "epoch": 0.39998972145133105, "grad_norm": 1.08409583568573, "learning_rate": 6.819804389800728e-06, "loss": 0.8091, "step": 7783 }, { "epoch": 0.4000411141946757, "grad_norm": 0.7332271933555603, "learning_rate": 6.819029191308205e-06, "loss": 0.6755, "step": 7784 }, { "epoch": 0.40009250693802034, "grad_norm": 1.128983974456787, "learning_rate": 6.818253942417891e-06, "loss": 0.7832, "step": 7785 }, { "epoch": 0.400143899681365, "grad_norm": 1.0079869031906128, "learning_rate": 6.817478643151263e-06, "loss": 0.7207, "step": 7786 }, { "epoch": 0.40019529242470964, "grad_norm": 1.014378309249878, "learning_rate": 6.816703293529802e-06, "loss": 0.7118, "step": 7787 }, { "epoch": 0.4002466851680543, "grad_norm": 0.826568067073822, "learning_rate": 6.8159278935749885e-06, "loss": 0.7164, "step": 7788 }, { "epoch": 0.40029807791139893, "grad_norm": 1.0655832290649414, "learning_rate": 6.815152443308307e-06, "loss": 0.7382, "step": 7789 }, { "epoch": 0.4003494706547436, "grad_norm": 1.2548801898956299, "learning_rate": 6.8143769427512445e-06, "loss": 0.6768, "step": 7790 }, { "epoch": 0.40040086339808817, "grad_norm": 0.6576259732246399, "learning_rate": 6.813601391925283e-06, "loss": 0.6719, "step": 7791 }, { "epoch": 0.4004522561414328, "grad_norm": 1.334196925163269, "learning_rate": 6.81282579085191e-06, "loss": 0.7797, "step": 7792 }, { "epoch": 0.40050364888477746, "grad_norm": 0.7099539041519165, "learning_rate": 6.812050139552617e-06, "loss": 0.653, "step": 7793 }, { "epoch": 0.4005550416281221, "grad_norm": 1.0754698514938354, "learning_rate": 6.8112744380488934e-06, "loss": 0.7745, "step": 7794 }, { "epoch": 0.40060643437146676, "grad_norm": 1.087551474571228, "learning_rate": 6.810498686362228e-06, "loss": 0.7486, "step": 7795 }, { "epoch": 0.4006578271148114, "grad_norm": 1.0259875059127808, "learning_rate": 6.809722884514116e-06, "loss": 0.7185, "step": 7796 }, { "epoch": 0.40070921985815605, "grad_norm": 1.09176504611969, "learning_rate": 6.808947032526051e-06, "loss": 0.7327, "step": 7797 }, { "epoch": 0.40076061260150064, "grad_norm": 1.198707103729248, "learning_rate": 6.808171130419528e-06, "loss": 0.7655, "step": 7798 }, { "epoch": 0.4008120053448453, "grad_norm": 1.1064229011535645, "learning_rate": 6.807395178216047e-06, "loss": 0.7418, "step": 7799 }, { "epoch": 0.40086339808818994, "grad_norm": 1.0397785902023315, "learning_rate": 6.806619175937102e-06, "loss": 0.7306, "step": 7800 }, { "epoch": 0.4009147908315346, "grad_norm": 1.1523398160934448, "learning_rate": 6.805843123604197e-06, "loss": 0.7562, "step": 7801 }, { "epoch": 0.40096618357487923, "grad_norm": 0.7127928733825684, "learning_rate": 6.805067021238831e-06, "loss": 0.6401, "step": 7802 }, { "epoch": 0.4010175763182239, "grad_norm": 1.0998114347457886, "learning_rate": 6.804290868862506e-06, "loss": 0.8263, "step": 7803 }, { "epoch": 0.4010689690615685, "grad_norm": 1.1519544124603271, "learning_rate": 6.803514666496728e-06, "loss": 0.7285, "step": 7804 }, { "epoch": 0.4011203618049132, "grad_norm": 1.0409435033798218, "learning_rate": 6.802738414163e-06, "loss": 0.7433, "step": 7805 }, { "epoch": 0.40117175454825776, "grad_norm": 1.0748502016067505, "learning_rate": 6.801962111882832e-06, "loss": 0.7432, "step": 7806 }, { "epoch": 0.4012231472916024, "grad_norm": 1.0552645921707153, "learning_rate": 6.801185759677729e-06, "loss": 0.7202, "step": 7807 }, { "epoch": 0.40127454003494706, "grad_norm": 1.0588408708572388, "learning_rate": 6.800409357569201e-06, "loss": 0.7903, "step": 7808 }, { "epoch": 0.4013259327782917, "grad_norm": 1.0377616882324219, "learning_rate": 6.799632905578759e-06, "loss": 0.6909, "step": 7809 }, { "epoch": 0.40137732552163635, "grad_norm": 0.7881459593772888, "learning_rate": 6.798856403727916e-06, "loss": 0.669, "step": 7810 }, { "epoch": 0.401428718264981, "grad_norm": 0.7208056449890137, "learning_rate": 6.798079852038187e-06, "loss": 0.6921, "step": 7811 }, { "epoch": 0.40148011100832565, "grad_norm": 1.0429795980453491, "learning_rate": 6.797303250531085e-06, "loss": 0.7296, "step": 7812 }, { "epoch": 0.40153150375167024, "grad_norm": 0.796619713306427, "learning_rate": 6.796526599228127e-06, "loss": 0.6649, "step": 7813 }, { "epoch": 0.4015828964950149, "grad_norm": 1.190920114517212, "learning_rate": 6.795749898150831e-06, "loss": 0.6997, "step": 7814 }, { "epoch": 0.40163428923835953, "grad_norm": 1.0986249446868896, "learning_rate": 6.794973147320716e-06, "loss": 0.8788, "step": 7815 }, { "epoch": 0.4016856819817042, "grad_norm": 0.7709076404571533, "learning_rate": 6.794196346759301e-06, "loss": 0.7282, "step": 7816 }, { "epoch": 0.4017370747250488, "grad_norm": 1.185579776763916, "learning_rate": 6.793419496488112e-06, "loss": 0.824, "step": 7817 }, { "epoch": 0.4017884674683935, "grad_norm": 0.8647105097770691, "learning_rate": 6.792642596528667e-06, "loss": 0.6958, "step": 7818 }, { "epoch": 0.4018398602117381, "grad_norm": 1.033717155456543, "learning_rate": 6.7918656469024935e-06, "loss": 0.7173, "step": 7819 }, { "epoch": 0.40189125295508277, "grad_norm": 0.972399115562439, "learning_rate": 6.79108864763112e-06, "loss": 0.6992, "step": 7820 }, { "epoch": 0.40194264569842736, "grad_norm": 1.12163245677948, "learning_rate": 6.79031159873607e-06, "loss": 0.7386, "step": 7821 }, { "epoch": 0.401994038441772, "grad_norm": 1.0556813478469849, "learning_rate": 6.789534500238874e-06, "loss": 0.7916, "step": 7822 }, { "epoch": 0.40204543118511665, "grad_norm": 1.0941044092178345, "learning_rate": 6.788757352161062e-06, "loss": 0.7231, "step": 7823 }, { "epoch": 0.4020968239284613, "grad_norm": 1.0354557037353516, "learning_rate": 6.787980154524165e-06, "loss": 0.7298, "step": 7824 }, { "epoch": 0.40214821667180595, "grad_norm": 0.8134052753448486, "learning_rate": 6.787202907349717e-06, "loss": 0.7304, "step": 7825 }, { "epoch": 0.4021996094151506, "grad_norm": 1.0776816606521606, "learning_rate": 6.786425610659252e-06, "loss": 0.7704, "step": 7826 }, { "epoch": 0.40225100215849524, "grad_norm": 1.0329506397247314, "learning_rate": 6.785648264474305e-06, "loss": 0.7437, "step": 7827 }, { "epoch": 0.40230239490183983, "grad_norm": 1.117995023727417, "learning_rate": 6.784870868816414e-06, "loss": 0.7991, "step": 7828 }, { "epoch": 0.4023537876451845, "grad_norm": 1.0235388278961182, "learning_rate": 6.784093423707116e-06, "loss": 0.7857, "step": 7829 }, { "epoch": 0.4024051803885291, "grad_norm": 0.7928805351257324, "learning_rate": 6.783315929167953e-06, "loss": 0.6502, "step": 7830 }, { "epoch": 0.4024565731318738, "grad_norm": 0.7468971014022827, "learning_rate": 6.782538385220465e-06, "loss": 0.7484, "step": 7831 }, { "epoch": 0.4025079658752184, "grad_norm": 1.019614815711975, "learning_rate": 6.781760791886193e-06, "loss": 0.6843, "step": 7832 }, { "epoch": 0.40255935861856307, "grad_norm": 1.0580393075942993, "learning_rate": 6.780983149186684e-06, "loss": 0.7099, "step": 7833 }, { "epoch": 0.4026107513619077, "grad_norm": 1.1143561601638794, "learning_rate": 6.7802054571434794e-06, "loss": 0.8028, "step": 7834 }, { "epoch": 0.40266214410525236, "grad_norm": 1.0954301357269287, "learning_rate": 6.77942771577813e-06, "loss": 0.7409, "step": 7835 }, { "epoch": 0.40271353684859695, "grad_norm": 1.0712257623672485, "learning_rate": 6.778649925112181e-06, "loss": 0.7268, "step": 7836 }, { "epoch": 0.4027649295919416, "grad_norm": 0.7284889221191406, "learning_rate": 6.777872085167183e-06, "loss": 0.6794, "step": 7837 }, { "epoch": 0.40281632233528625, "grad_norm": 1.035780906677246, "learning_rate": 6.777094195964686e-06, "loss": 0.688, "step": 7838 }, { "epoch": 0.4028677150786309, "grad_norm": 1.0690861940383911, "learning_rate": 6.776316257526243e-06, "loss": 0.7201, "step": 7839 }, { "epoch": 0.40291910782197554, "grad_norm": 1.0391000509262085, "learning_rate": 6.7755382698734066e-06, "loss": 0.8167, "step": 7840 }, { "epoch": 0.4029705005653202, "grad_norm": 1.0274579524993896, "learning_rate": 6.774760233027732e-06, "loss": 0.6713, "step": 7841 }, { "epoch": 0.40302189330866484, "grad_norm": 1.1139503717422485, "learning_rate": 6.773982147010775e-06, "loss": 0.8049, "step": 7842 }, { "epoch": 0.4030732860520095, "grad_norm": 1.0674599409103394, "learning_rate": 6.773204011844095e-06, "loss": 0.7447, "step": 7843 }, { "epoch": 0.4031246787953541, "grad_norm": 1.0830377340316772, "learning_rate": 6.772425827549248e-06, "loss": 0.7372, "step": 7844 }, { "epoch": 0.4031760715386987, "grad_norm": 1.0315676927566528, "learning_rate": 6.7716475941477956e-06, "loss": 0.7464, "step": 7845 }, { "epoch": 0.40322746428204337, "grad_norm": 1.0504807233810425, "learning_rate": 6.7708693116613e-06, "loss": 0.7497, "step": 7846 }, { "epoch": 0.403278857025388, "grad_norm": 1.0584125518798828, "learning_rate": 6.770090980111324e-06, "loss": 0.7428, "step": 7847 }, { "epoch": 0.40333024976873266, "grad_norm": 1.099342703819275, "learning_rate": 6.7693125995194305e-06, "loss": 0.7731, "step": 7848 }, { "epoch": 0.4033816425120773, "grad_norm": 1.0795953273773193, "learning_rate": 6.768534169907187e-06, "loss": 0.75, "step": 7849 }, { "epoch": 0.40343303525542196, "grad_norm": 0.9759268760681152, "learning_rate": 6.767755691296161e-06, "loss": 0.6626, "step": 7850 }, { "epoch": 0.40348442799876655, "grad_norm": 1.0457463264465332, "learning_rate": 6.766977163707919e-06, "loss": 0.7719, "step": 7851 }, { "epoch": 0.4035358207421112, "grad_norm": 0.7540583610534668, "learning_rate": 6.766198587164031e-06, "loss": 0.6957, "step": 7852 }, { "epoch": 0.40358721348545584, "grad_norm": 0.6900243759155273, "learning_rate": 6.765419961686069e-06, "loss": 0.6657, "step": 7853 }, { "epoch": 0.4036386062288005, "grad_norm": 1.0358715057373047, "learning_rate": 6.764641287295607e-06, "loss": 0.7145, "step": 7854 }, { "epoch": 0.40368999897214514, "grad_norm": 1.078387975692749, "learning_rate": 6.7638625640142165e-06, "loss": 0.7251, "step": 7855 }, { "epoch": 0.4037413917154898, "grad_norm": 1.0359172821044922, "learning_rate": 6.763083791863472e-06, "loss": 0.775, "step": 7856 }, { "epoch": 0.40379278445883443, "grad_norm": 1.1707258224487305, "learning_rate": 6.762304970864952e-06, "loss": 0.7427, "step": 7857 }, { "epoch": 0.4038441772021791, "grad_norm": 1.0416220426559448, "learning_rate": 6.761526101040234e-06, "loss": 0.7636, "step": 7858 }, { "epoch": 0.40389556994552367, "grad_norm": 1.062119722366333, "learning_rate": 6.760747182410898e-06, "loss": 0.7583, "step": 7859 }, { "epoch": 0.4039469626888683, "grad_norm": 1.2153178453445435, "learning_rate": 6.759968214998522e-06, "loss": 0.7639, "step": 7860 }, { "epoch": 0.40399835543221296, "grad_norm": 1.0584911108016968, "learning_rate": 6.759189198824691e-06, "loss": 0.8317, "step": 7861 }, { "epoch": 0.4040497481755576, "grad_norm": 0.7581250667572021, "learning_rate": 6.758410133910988e-06, "loss": 0.671, "step": 7862 }, { "epoch": 0.40410114091890226, "grad_norm": 1.1762150526046753, "learning_rate": 6.7576310202789955e-06, "loss": 0.7506, "step": 7863 }, { "epoch": 0.4041525336622469, "grad_norm": 0.7374364137649536, "learning_rate": 6.7568518579503e-06, "loss": 0.7005, "step": 7864 }, { "epoch": 0.40420392640559155, "grad_norm": 1.0703513622283936, "learning_rate": 6.756072646946491e-06, "loss": 0.8093, "step": 7865 }, { "epoch": 0.40425531914893614, "grad_norm": 1.0418522357940674, "learning_rate": 6.755293387289154e-06, "loss": 0.7566, "step": 7866 }, { "epoch": 0.4043067118922808, "grad_norm": 0.7447736263275146, "learning_rate": 6.754514078999883e-06, "loss": 0.6305, "step": 7867 }, { "epoch": 0.40435810463562544, "grad_norm": 1.0050560235977173, "learning_rate": 6.753734722100267e-06, "loss": 0.7893, "step": 7868 }, { "epoch": 0.4044094973789701, "grad_norm": 0.7225925326347351, "learning_rate": 6.752955316611898e-06, "loss": 0.6707, "step": 7869 }, { "epoch": 0.40446089012231473, "grad_norm": 1.0461152791976929, "learning_rate": 6.752175862556371e-06, "loss": 0.7757, "step": 7870 }, { "epoch": 0.4045122828656594, "grad_norm": 1.0345289707183838, "learning_rate": 6.751396359955283e-06, "loss": 0.7599, "step": 7871 }, { "epoch": 0.404563675609004, "grad_norm": 1.5227004289627075, "learning_rate": 6.750616808830228e-06, "loss": 0.7001, "step": 7872 }, { "epoch": 0.4046150683523487, "grad_norm": 1.1362090110778809, "learning_rate": 6.7498372092028076e-06, "loss": 0.8339, "step": 7873 }, { "epoch": 0.40466646109569326, "grad_norm": 1.0582871437072754, "learning_rate": 6.749057561094618e-06, "loss": 0.7729, "step": 7874 }, { "epoch": 0.4047178538390379, "grad_norm": 0.7246353030204773, "learning_rate": 6.7482778645272615e-06, "loss": 0.6814, "step": 7875 }, { "epoch": 0.40476924658238256, "grad_norm": 0.7528852820396423, "learning_rate": 6.747498119522341e-06, "loss": 0.6821, "step": 7876 }, { "epoch": 0.4048206393257272, "grad_norm": 1.0142005681991577, "learning_rate": 6.7467183261014575e-06, "loss": 0.7247, "step": 7877 }, { "epoch": 0.40487203206907185, "grad_norm": 1.0177489519119263, "learning_rate": 6.745938484286219e-06, "loss": 0.7452, "step": 7878 }, { "epoch": 0.4049234248124165, "grad_norm": 1.0192375183105469, "learning_rate": 6.745158594098229e-06, "loss": 0.7221, "step": 7879 }, { "epoch": 0.40497481755576115, "grad_norm": 1.0155107975006104, "learning_rate": 6.744378655559096e-06, "loss": 0.7573, "step": 7880 }, { "epoch": 0.4050262102991058, "grad_norm": 1.0010485649108887, "learning_rate": 6.74359866869043e-06, "loss": 0.7352, "step": 7881 }, { "epoch": 0.4050776030424504, "grad_norm": 0.7212871313095093, "learning_rate": 6.742818633513839e-06, "loss": 0.6972, "step": 7882 }, { "epoch": 0.40512899578579503, "grad_norm": 1.0373481512069702, "learning_rate": 6.742038550050937e-06, "loss": 0.7539, "step": 7883 }, { "epoch": 0.4051803885291397, "grad_norm": 1.0220096111297607, "learning_rate": 6.741258418323335e-06, "loss": 0.706, "step": 7884 }, { "epoch": 0.4052317812724843, "grad_norm": 1.0339752435684204, "learning_rate": 6.740478238352649e-06, "loss": 0.7506, "step": 7885 }, { "epoch": 0.405283174015829, "grad_norm": 1.0616086721420288, "learning_rate": 6.7396980101604935e-06, "loss": 0.7902, "step": 7886 }, { "epoch": 0.4053345667591736, "grad_norm": 1.0507900714874268, "learning_rate": 6.738917733768485e-06, "loss": 0.7691, "step": 7887 }, { "epoch": 0.40538595950251827, "grad_norm": 1.1763027906417847, "learning_rate": 6.738137409198242e-06, "loss": 0.7282, "step": 7888 }, { "epoch": 0.40543735224586286, "grad_norm": 1.0808122158050537, "learning_rate": 6.7373570364713845e-06, "loss": 0.7612, "step": 7889 }, { "epoch": 0.4054887449892075, "grad_norm": 0.799289882183075, "learning_rate": 6.736576615609532e-06, "loss": 0.6452, "step": 7890 }, { "epoch": 0.40554013773255215, "grad_norm": 1.4809321165084839, "learning_rate": 6.735796146634309e-06, "loss": 0.7288, "step": 7891 }, { "epoch": 0.4055915304758968, "grad_norm": 1.0577455759048462, "learning_rate": 6.735015629567338e-06, "loss": 0.718, "step": 7892 }, { "epoch": 0.40564292321924145, "grad_norm": 1.0974152088165283, "learning_rate": 6.734235064430244e-06, "loss": 0.8018, "step": 7893 }, { "epoch": 0.4056943159625861, "grad_norm": 1.1344894170761108, "learning_rate": 6.733454451244653e-06, "loss": 0.7499, "step": 7894 }, { "epoch": 0.40574570870593074, "grad_norm": 1.103716254234314, "learning_rate": 6.732673790032194e-06, "loss": 0.7605, "step": 7895 }, { "epoch": 0.4057971014492754, "grad_norm": 1.0246015787124634, "learning_rate": 6.731893080814492e-06, "loss": 0.7532, "step": 7896 }, { "epoch": 0.40584849419262, "grad_norm": 0.7563291788101196, "learning_rate": 6.731112323613181e-06, "loss": 0.6639, "step": 7897 }, { "epoch": 0.4058998869359646, "grad_norm": 1.1162755489349365, "learning_rate": 6.730331518449892e-06, "loss": 0.7367, "step": 7898 }, { "epoch": 0.4059512796793093, "grad_norm": 1.0836974382400513, "learning_rate": 6.729550665346257e-06, "loss": 0.6916, "step": 7899 }, { "epoch": 0.4060026724226539, "grad_norm": 1.1318376064300537, "learning_rate": 6.72876976432391e-06, "loss": 0.7257, "step": 7900 }, { "epoch": 0.40605406516599857, "grad_norm": 0.7126916646957397, "learning_rate": 6.727988815404487e-06, "loss": 0.7027, "step": 7901 }, { "epoch": 0.4061054579093432, "grad_norm": 1.0463669300079346, "learning_rate": 6.727207818609625e-06, "loss": 0.7372, "step": 7902 }, { "epoch": 0.40615685065268786, "grad_norm": 0.7074301242828369, "learning_rate": 6.726426773960963e-06, "loss": 0.6659, "step": 7903 }, { "epoch": 0.40620824339603245, "grad_norm": 1.2183748483657837, "learning_rate": 6.7256456814801385e-06, "loss": 0.7869, "step": 7904 }, { "epoch": 0.4062596361393771, "grad_norm": 1.0458272695541382, "learning_rate": 6.724864541188795e-06, "loss": 0.7126, "step": 7905 }, { "epoch": 0.40631102888272175, "grad_norm": 0.7194162011146545, "learning_rate": 6.724083353108572e-06, "loss": 0.6864, "step": 7906 }, { "epoch": 0.4063624216260664, "grad_norm": 0.7353252172470093, "learning_rate": 6.723302117261114e-06, "loss": 0.6489, "step": 7907 }, { "epoch": 0.40641381436941104, "grad_norm": 0.995393693447113, "learning_rate": 6.722520833668067e-06, "loss": 0.7541, "step": 7908 }, { "epoch": 0.4064652071127557, "grad_norm": 1.039131760597229, "learning_rate": 6.721739502351075e-06, "loss": 0.74, "step": 7909 }, { "epoch": 0.40651659985610034, "grad_norm": 1.0343166589736938, "learning_rate": 6.7209581233317865e-06, "loss": 0.7065, "step": 7910 }, { "epoch": 0.406567992599445, "grad_norm": 1.125809907913208, "learning_rate": 6.720176696631851e-06, "loss": 0.7563, "step": 7911 }, { "epoch": 0.4066193853427896, "grad_norm": 1.133452296257019, "learning_rate": 6.719395222272918e-06, "loss": 0.7539, "step": 7912 }, { "epoch": 0.4066707780861342, "grad_norm": 1.1749025583267212, "learning_rate": 6.718613700276638e-06, "loss": 0.7497, "step": 7913 }, { "epoch": 0.40672217082947887, "grad_norm": 1.0754362344741821, "learning_rate": 6.717832130664666e-06, "loss": 0.7167, "step": 7914 }, { "epoch": 0.4067735635728235, "grad_norm": 1.0977122783660889, "learning_rate": 6.717050513458654e-06, "loss": 0.7633, "step": 7915 }, { "epoch": 0.40682495631616816, "grad_norm": 1.1105183362960815, "learning_rate": 6.716268848680258e-06, "loss": 0.7648, "step": 7916 }, { "epoch": 0.4068763490595128, "grad_norm": 1.2383711338043213, "learning_rate": 6.715487136351135e-06, "loss": 0.737, "step": 7917 }, { "epoch": 0.40692774180285746, "grad_norm": 1.0289186239242554, "learning_rate": 6.714705376492944e-06, "loss": 0.726, "step": 7918 }, { "epoch": 0.4069791345462021, "grad_norm": 1.1408699750900269, "learning_rate": 6.713923569127342e-06, "loss": 0.7217, "step": 7919 }, { "epoch": 0.4070305272895467, "grad_norm": 0.7385805249214172, "learning_rate": 6.71314171427599e-06, "loss": 0.6442, "step": 7920 }, { "epoch": 0.40708192003289134, "grad_norm": 1.022669792175293, "learning_rate": 6.712359811960552e-06, "loss": 0.669, "step": 7921 }, { "epoch": 0.407133312776236, "grad_norm": 1.0556048154830933, "learning_rate": 6.71157786220269e-06, "loss": 0.7361, "step": 7922 }, { "epoch": 0.40718470551958064, "grad_norm": 1.0673633813858032, "learning_rate": 6.710795865024069e-06, "loss": 0.7113, "step": 7923 }, { "epoch": 0.4072360982629253, "grad_norm": 0.9981977343559265, "learning_rate": 6.710013820446354e-06, "loss": 0.7581, "step": 7924 }, { "epoch": 0.40728749100626993, "grad_norm": 1.059861421585083, "learning_rate": 6.709231728491212e-06, "loss": 0.7779, "step": 7925 }, { "epoch": 0.4073388837496146, "grad_norm": 1.1826664209365845, "learning_rate": 6.708449589180315e-06, "loss": 0.7793, "step": 7926 }, { "epoch": 0.40739027649295917, "grad_norm": 0.7404249906539917, "learning_rate": 6.707667402535327e-06, "loss": 0.7032, "step": 7927 }, { "epoch": 0.4074416692363038, "grad_norm": 1.1206835508346558, "learning_rate": 6.706885168577926e-06, "loss": 0.727, "step": 7928 }, { "epoch": 0.40749306197964846, "grad_norm": 1.0846922397613525, "learning_rate": 6.706102887329779e-06, "loss": 0.7493, "step": 7929 }, { "epoch": 0.4075444547229931, "grad_norm": 0.9938129782676697, "learning_rate": 6.705320558812561e-06, "loss": 0.727, "step": 7930 }, { "epoch": 0.40759584746633776, "grad_norm": 1.0975090265274048, "learning_rate": 6.704538183047948e-06, "loss": 0.7832, "step": 7931 }, { "epoch": 0.4076472402096824, "grad_norm": 1.2475781440734863, "learning_rate": 6.7037557600576166e-06, "loss": 0.7706, "step": 7932 }, { "epoch": 0.40769863295302705, "grad_norm": 1.024477481842041, "learning_rate": 6.7029732898632435e-06, "loss": 0.7765, "step": 7933 }, { "epoch": 0.4077500256963717, "grad_norm": 3.735666036605835, "learning_rate": 6.70219077248651e-06, "loss": 0.7832, "step": 7934 }, { "epoch": 0.4078014184397163, "grad_norm": 1.024791955947876, "learning_rate": 6.7014082079490925e-06, "loss": 0.7787, "step": 7935 }, { "epoch": 0.40785281118306094, "grad_norm": 1.0390478372573853, "learning_rate": 6.700625596272676e-06, "loss": 0.7742, "step": 7936 }, { "epoch": 0.4079042039264056, "grad_norm": 0.9915521740913391, "learning_rate": 6.699842937478943e-06, "loss": 0.7746, "step": 7937 }, { "epoch": 0.40795559666975023, "grad_norm": 1.098596453666687, "learning_rate": 6.699060231589576e-06, "loss": 0.7376, "step": 7938 }, { "epoch": 0.4080069894130949, "grad_norm": 1.0754375457763672, "learning_rate": 6.698277478626262e-06, "loss": 0.7301, "step": 7939 }, { "epoch": 0.4080583821564395, "grad_norm": 0.6903818845748901, "learning_rate": 6.697494678610687e-06, "loss": 0.6705, "step": 7940 }, { "epoch": 0.4081097748997842, "grad_norm": 1.0542471408843994, "learning_rate": 6.69671183156454e-06, "loss": 0.6802, "step": 7941 }, { "epoch": 0.40816116764312876, "grad_norm": 0.7270194292068481, "learning_rate": 6.69592893750951e-06, "loss": 0.6984, "step": 7942 }, { "epoch": 0.4082125603864734, "grad_norm": 1.0911885499954224, "learning_rate": 6.695145996467287e-06, "loss": 0.7764, "step": 7943 }, { "epoch": 0.40826395312981806, "grad_norm": 1.1180353164672852, "learning_rate": 6.694363008459565e-06, "loss": 0.7209, "step": 7944 }, { "epoch": 0.4083153458731627, "grad_norm": 1.1125692129135132, "learning_rate": 6.693579973508037e-06, "loss": 0.72, "step": 7945 }, { "epoch": 0.40836673861650735, "grad_norm": 1.1563745737075806, "learning_rate": 6.692796891634394e-06, "loss": 0.7786, "step": 7946 }, { "epoch": 0.408418131359852, "grad_norm": 0.7223285436630249, "learning_rate": 6.6920137628603375e-06, "loss": 0.6522, "step": 7947 }, { "epoch": 0.40846952410319665, "grad_norm": 0.7379501461982727, "learning_rate": 6.6912305872075625e-06, "loss": 0.689, "step": 7948 }, { "epoch": 0.4085209168465413, "grad_norm": 1.043828010559082, "learning_rate": 6.690447364697765e-06, "loss": 0.6923, "step": 7949 }, { "epoch": 0.4085723095898859, "grad_norm": 0.6911939382553101, "learning_rate": 6.689664095352649e-06, "loss": 0.6945, "step": 7950 }, { "epoch": 0.40862370233323053, "grad_norm": 1.0108751058578491, "learning_rate": 6.688880779193912e-06, "loss": 0.6998, "step": 7951 }, { "epoch": 0.4086750950765752, "grad_norm": 1.1145551204681396, "learning_rate": 6.68809741624326e-06, "loss": 0.7428, "step": 7952 }, { "epoch": 0.4087264878199198, "grad_norm": 1.1391953229904175, "learning_rate": 6.687314006522394e-06, "loss": 0.7186, "step": 7953 }, { "epoch": 0.4087778805632645, "grad_norm": 1.129879117012024, "learning_rate": 6.686530550053019e-06, "loss": 0.7243, "step": 7954 }, { "epoch": 0.4088292733066091, "grad_norm": 1.0704381465911865, "learning_rate": 6.685747046856844e-06, "loss": 0.7173, "step": 7955 }, { "epoch": 0.40888066604995377, "grad_norm": 1.0790979862213135, "learning_rate": 6.684963496955575e-06, "loss": 0.7887, "step": 7956 }, { "epoch": 0.40893205879329836, "grad_norm": 1.1580049991607666, "learning_rate": 6.68417990037092e-06, "loss": 0.7341, "step": 7957 }, { "epoch": 0.408983451536643, "grad_norm": 1.129349708557129, "learning_rate": 6.6833962571245915e-06, "loss": 0.6943, "step": 7958 }, { "epoch": 0.40903484427998765, "grad_norm": 1.1082468032836914, "learning_rate": 6.682612567238298e-06, "loss": 0.7637, "step": 7959 }, { "epoch": 0.4090862370233323, "grad_norm": 1.122299075126648, "learning_rate": 6.681828830733756e-06, "loss": 0.7117, "step": 7960 }, { "epoch": 0.40913762976667695, "grad_norm": 1.1041449308395386, "learning_rate": 6.681045047632676e-06, "loss": 0.7527, "step": 7961 }, { "epoch": 0.4091890225100216, "grad_norm": 1.1245919466018677, "learning_rate": 6.680261217956775e-06, "loss": 0.7363, "step": 7962 }, { "epoch": 0.40924041525336624, "grad_norm": 1.2942348718643188, "learning_rate": 6.679477341727769e-06, "loss": 0.7553, "step": 7963 }, { "epoch": 0.4092918079967109, "grad_norm": 1.0296908617019653, "learning_rate": 6.678693418967379e-06, "loss": 0.7209, "step": 7964 }, { "epoch": 0.4093432007400555, "grad_norm": 0.9074142575263977, "learning_rate": 6.67790944969732e-06, "loss": 0.7098, "step": 7965 }, { "epoch": 0.4093945934834001, "grad_norm": 1.0555102825164795, "learning_rate": 6.677125433939316e-06, "loss": 0.7526, "step": 7966 }, { "epoch": 0.4094459862267448, "grad_norm": 1.0784554481506348, "learning_rate": 6.676341371715085e-06, "loss": 0.7562, "step": 7967 }, { "epoch": 0.4094973789700894, "grad_norm": 0.8925933837890625, "learning_rate": 6.675557263046355e-06, "loss": 0.6786, "step": 7968 }, { "epoch": 0.40954877171343407, "grad_norm": 1.1209834814071655, "learning_rate": 6.674773107954846e-06, "loss": 0.7722, "step": 7969 }, { "epoch": 0.4096001644567787, "grad_norm": 1.1376556158065796, "learning_rate": 6.673988906462286e-06, "loss": 0.7475, "step": 7970 }, { "epoch": 0.40965155720012336, "grad_norm": 0.8658275604248047, "learning_rate": 6.673204658590402e-06, "loss": 0.678, "step": 7971 }, { "epoch": 0.409702949943468, "grad_norm": 1.1058467626571655, "learning_rate": 6.672420364360922e-06, "loss": 0.7153, "step": 7972 }, { "epoch": 0.4097543426868126, "grad_norm": 1.3219773769378662, "learning_rate": 6.671636023795574e-06, "loss": 0.6954, "step": 7973 }, { "epoch": 0.40980573543015725, "grad_norm": 1.0119574069976807, "learning_rate": 6.670851636916092e-06, "loss": 0.7351, "step": 7974 }, { "epoch": 0.4098571281735019, "grad_norm": 0.7516946196556091, "learning_rate": 6.670067203744205e-06, "loss": 0.7331, "step": 7975 }, { "epoch": 0.40990852091684654, "grad_norm": 1.092466950416565, "learning_rate": 6.669282724301649e-06, "loss": 0.7284, "step": 7976 }, { "epoch": 0.4099599136601912, "grad_norm": 1.09486722946167, "learning_rate": 6.668498198610157e-06, "loss": 0.765, "step": 7977 }, { "epoch": 0.41001130640353584, "grad_norm": 0.8249090313911438, "learning_rate": 6.667713626691464e-06, "loss": 0.645, "step": 7978 }, { "epoch": 0.4100626991468805, "grad_norm": 1.1555579900741577, "learning_rate": 6.666929008567311e-06, "loss": 0.7941, "step": 7979 }, { "epoch": 0.4101140918902251, "grad_norm": 1.028634786605835, "learning_rate": 6.666144344259433e-06, "loss": 0.7189, "step": 7980 }, { "epoch": 0.4101654846335697, "grad_norm": 1.0098729133605957, "learning_rate": 6.665359633789571e-06, "loss": 0.8234, "step": 7981 }, { "epoch": 0.41021687737691437, "grad_norm": 1.10939359664917, "learning_rate": 6.664574877179466e-06, "loss": 0.719, "step": 7982 }, { "epoch": 0.410268270120259, "grad_norm": 1.1669492721557617, "learning_rate": 6.66379007445086e-06, "loss": 0.7629, "step": 7983 }, { "epoch": 0.41031966286360366, "grad_norm": 0.6827564835548401, "learning_rate": 6.663005225625498e-06, "loss": 0.626, "step": 7984 }, { "epoch": 0.4103710556069483, "grad_norm": 1.048548698425293, "learning_rate": 6.662220330725123e-06, "loss": 0.7849, "step": 7985 }, { "epoch": 0.41042244835029296, "grad_norm": 0.7019035816192627, "learning_rate": 6.6614353897714825e-06, "loss": 0.7116, "step": 7986 }, { "epoch": 0.4104738410936376, "grad_norm": 1.0581175088882446, "learning_rate": 6.660650402786324e-06, "loss": 0.7314, "step": 7987 }, { "epoch": 0.4105252338369822, "grad_norm": 1.0912425518035889, "learning_rate": 6.659865369791398e-06, "loss": 0.7665, "step": 7988 }, { "epoch": 0.41057662658032684, "grad_norm": 0.8129478693008423, "learning_rate": 6.659080290808449e-06, "loss": 0.6657, "step": 7989 }, { "epoch": 0.4106280193236715, "grad_norm": 1.0536584854125977, "learning_rate": 6.658295165859233e-06, "loss": 0.7547, "step": 7990 }, { "epoch": 0.41067941206701614, "grad_norm": 1.038142204284668, "learning_rate": 6.6575099949655005e-06, "loss": 0.7629, "step": 7991 }, { "epoch": 0.4107308048103608, "grad_norm": 0.695204496383667, "learning_rate": 6.656724778149008e-06, "loss": 0.6487, "step": 7992 }, { "epoch": 0.41078219755370543, "grad_norm": 1.0843878984451294, "learning_rate": 6.655939515431508e-06, "loss": 0.7468, "step": 7993 }, { "epoch": 0.4108335902970501, "grad_norm": 1.1089707612991333, "learning_rate": 6.655154206834757e-06, "loss": 0.7389, "step": 7994 }, { "epoch": 0.41088498304039467, "grad_norm": 1.0390061140060425, "learning_rate": 6.654368852380515e-06, "loss": 0.7245, "step": 7995 }, { "epoch": 0.4109363757837393, "grad_norm": 0.9975454807281494, "learning_rate": 6.653583452090538e-06, "loss": 0.7596, "step": 7996 }, { "epoch": 0.41098776852708396, "grad_norm": 1.2355459928512573, "learning_rate": 6.652798005986587e-06, "loss": 0.7753, "step": 7997 }, { "epoch": 0.4110391612704286, "grad_norm": 1.093235731124878, "learning_rate": 6.652012514090425e-06, "loss": 0.78, "step": 7998 }, { "epoch": 0.41109055401377326, "grad_norm": 0.8892166018486023, "learning_rate": 6.651226976423813e-06, "loss": 0.6328, "step": 7999 }, { "epoch": 0.4111419467571179, "grad_norm": 1.1210532188415527, "learning_rate": 6.650441393008517e-06, "loss": 0.775, "step": 8000 }, { "epoch": 0.41119333950046255, "grad_norm": 1.0821943283081055, "learning_rate": 6.649655763866301e-06, "loss": 0.7294, "step": 8001 }, { "epoch": 0.4112447322438072, "grad_norm": 0.7084898948669434, "learning_rate": 6.64887008901893e-06, "loss": 0.638, "step": 8002 }, { "epoch": 0.4112961249871518, "grad_norm": 1.068906545639038, "learning_rate": 6.648084368488174e-06, "loss": 0.7367, "step": 8003 }, { "epoch": 0.41134751773049644, "grad_norm": 1.0813502073287964, "learning_rate": 6.6472986022958005e-06, "loss": 0.7753, "step": 8004 }, { "epoch": 0.4113989104738411, "grad_norm": 1.1435678005218506, "learning_rate": 6.646512790463582e-06, "loss": 0.7937, "step": 8005 }, { "epoch": 0.41145030321718573, "grad_norm": 1.0334559679031372, "learning_rate": 6.645726933013288e-06, "loss": 0.7164, "step": 8006 }, { "epoch": 0.4115016959605304, "grad_norm": 1.0630053281784058, "learning_rate": 6.644941029966692e-06, "loss": 0.7616, "step": 8007 }, { "epoch": 0.411553088703875, "grad_norm": 1.265312910079956, "learning_rate": 6.644155081345569e-06, "loss": 0.7655, "step": 8008 }, { "epoch": 0.41160448144721967, "grad_norm": 1.0740901231765747, "learning_rate": 6.643369087171695e-06, "loss": 0.6962, "step": 8009 }, { "epoch": 0.4116558741905643, "grad_norm": 1.0570896863937378, "learning_rate": 6.642583047466843e-06, "loss": 0.7164, "step": 8010 }, { "epoch": 0.4117072669339089, "grad_norm": 1.0792806148529053, "learning_rate": 6.641796962252796e-06, "loss": 0.7496, "step": 8011 }, { "epoch": 0.41175865967725356, "grad_norm": 1.0608534812927246, "learning_rate": 6.641010831551329e-06, "loss": 0.708, "step": 8012 }, { "epoch": 0.4118100524205982, "grad_norm": 0.9808920621871948, "learning_rate": 6.640224655384222e-06, "loss": 0.7496, "step": 8013 }, { "epoch": 0.41186144516394285, "grad_norm": 1.0432575941085815, "learning_rate": 6.63943843377326e-06, "loss": 0.7447, "step": 8014 }, { "epoch": 0.4119128379072875, "grad_norm": 0.9936188459396362, "learning_rate": 6.638652166740225e-06, "loss": 0.7049, "step": 8015 }, { "epoch": 0.41196423065063215, "grad_norm": 0.763602077960968, "learning_rate": 6.637865854306901e-06, "loss": 0.6892, "step": 8016 }, { "epoch": 0.4120156233939768, "grad_norm": 1.1282601356506348, "learning_rate": 6.637079496495073e-06, "loss": 0.749, "step": 8017 }, { "epoch": 0.4120670161373214, "grad_norm": 1.0999804735183716, "learning_rate": 6.636293093326527e-06, "loss": 0.7258, "step": 8018 }, { "epoch": 0.41211840888066603, "grad_norm": 0.7895588874816895, "learning_rate": 6.635506644823053e-06, "loss": 0.6964, "step": 8019 }, { "epoch": 0.4121698016240107, "grad_norm": 0.6827719807624817, "learning_rate": 6.634720151006439e-06, "loss": 0.6429, "step": 8020 }, { "epoch": 0.4122211943673553, "grad_norm": 0.6972300410270691, "learning_rate": 6.633933611898475e-06, "loss": 0.6914, "step": 8021 }, { "epoch": 0.4122725871107, "grad_norm": 1.0962932109832764, "learning_rate": 6.633147027520955e-06, "loss": 0.7809, "step": 8022 }, { "epoch": 0.4123239798540446, "grad_norm": 0.8886080384254456, "learning_rate": 6.63236039789567e-06, "loss": 0.6941, "step": 8023 }, { "epoch": 0.41237537259738927, "grad_norm": 1.0961463451385498, "learning_rate": 6.631573723044413e-06, "loss": 0.7342, "step": 8024 }, { "epoch": 0.4124267653407339, "grad_norm": 1.1105092763900757, "learning_rate": 6.630787002988983e-06, "loss": 0.7911, "step": 8025 }, { "epoch": 0.4124781580840785, "grad_norm": 1.0195692777633667, "learning_rate": 6.630000237751175e-06, "loss": 0.6795, "step": 8026 }, { "epoch": 0.41252955082742315, "grad_norm": 0.8244153261184692, "learning_rate": 6.629213427352787e-06, "loss": 0.6356, "step": 8027 }, { "epoch": 0.4125809435707678, "grad_norm": 1.0605871677398682, "learning_rate": 6.628426571815618e-06, "loss": 0.7748, "step": 8028 }, { "epoch": 0.41263233631411245, "grad_norm": 1.1088330745697021, "learning_rate": 6.6276396711614685e-06, "loss": 0.7195, "step": 8029 }, { "epoch": 0.4126837290574571, "grad_norm": 1.0487561225891113, "learning_rate": 6.626852725412142e-06, "loss": 0.7044, "step": 8030 }, { "epoch": 0.41273512180080174, "grad_norm": 1.0625576972961426, "learning_rate": 6.626065734589437e-06, "loss": 0.7644, "step": 8031 }, { "epoch": 0.4127865145441464, "grad_norm": 1.0724247694015503, "learning_rate": 6.625278698715164e-06, "loss": 0.753, "step": 8032 }, { "epoch": 0.412837907287491, "grad_norm": 1.0660923719406128, "learning_rate": 6.624491617811125e-06, "loss": 0.7268, "step": 8033 }, { "epoch": 0.4128893000308356, "grad_norm": 1.0670807361602783, "learning_rate": 6.6237044918991256e-06, "loss": 0.7494, "step": 8034 }, { "epoch": 0.4129406927741803, "grad_norm": 0.9825435280799866, "learning_rate": 6.622917321000976e-06, "loss": 0.7101, "step": 8035 }, { "epoch": 0.4129920855175249, "grad_norm": 1.0288969278335571, "learning_rate": 6.622130105138485e-06, "loss": 0.71, "step": 8036 }, { "epoch": 0.41304347826086957, "grad_norm": 1.1274778842926025, "learning_rate": 6.621342844333463e-06, "loss": 0.7525, "step": 8037 }, { "epoch": 0.4130948710042142, "grad_norm": 1.1139103174209595, "learning_rate": 6.620555538607722e-06, "loss": 0.7971, "step": 8038 }, { "epoch": 0.41314626374755886, "grad_norm": 1.0781826972961426, "learning_rate": 6.619768187983074e-06, "loss": 0.7599, "step": 8039 }, { "epoch": 0.4131976564909035, "grad_norm": 1.1014935970306396, "learning_rate": 6.6189807924813335e-06, "loss": 0.8046, "step": 8040 }, { "epoch": 0.4132490492342481, "grad_norm": 1.1662489175796509, "learning_rate": 6.618193352124318e-06, "loss": 0.7164, "step": 8041 }, { "epoch": 0.41330044197759275, "grad_norm": 1.1016510725021362, "learning_rate": 6.6174058669338405e-06, "loss": 0.744, "step": 8042 }, { "epoch": 0.4133518347209374, "grad_norm": 1.1341471672058105, "learning_rate": 6.616618336931723e-06, "loss": 0.7417, "step": 8043 }, { "epoch": 0.41340322746428204, "grad_norm": 0.7457482814788818, "learning_rate": 6.6158307621397814e-06, "loss": 0.6385, "step": 8044 }, { "epoch": 0.4134546202076267, "grad_norm": 0.7525314092636108, "learning_rate": 6.6150431425798375e-06, "loss": 0.7421, "step": 8045 }, { "epoch": 0.41350601295097134, "grad_norm": 0.7390742897987366, "learning_rate": 6.6142554782737145e-06, "loss": 0.6755, "step": 8046 }, { "epoch": 0.413557405694316, "grad_norm": 1.1073259115219116, "learning_rate": 6.613467769243231e-06, "loss": 0.7609, "step": 8047 }, { "epoch": 0.41360879843766063, "grad_norm": 1.105319619178772, "learning_rate": 6.612680015510218e-06, "loss": 0.7142, "step": 8048 }, { "epoch": 0.4136601911810052, "grad_norm": 1.0521056652069092, "learning_rate": 6.611892217096494e-06, "loss": 0.769, "step": 8049 }, { "epoch": 0.41371158392434987, "grad_norm": 1.0649930238723755, "learning_rate": 6.611104374023889e-06, "loss": 0.7285, "step": 8050 }, { "epoch": 0.4137629766676945, "grad_norm": 1.0694961547851562, "learning_rate": 6.610316486314231e-06, "loss": 0.7497, "step": 8051 }, { "epoch": 0.41381436941103916, "grad_norm": 1.0268933773040771, "learning_rate": 6.609528553989349e-06, "loss": 0.7127, "step": 8052 }, { "epoch": 0.4138657621543838, "grad_norm": 1.0627905130386353, "learning_rate": 6.608740577071072e-06, "loss": 0.7366, "step": 8053 }, { "epoch": 0.41391715489772846, "grad_norm": 1.1004197597503662, "learning_rate": 6.6079525555812315e-06, "loss": 0.7154, "step": 8054 }, { "epoch": 0.4139685476410731, "grad_norm": 1.1193249225616455, "learning_rate": 6.607164489541664e-06, "loss": 0.7857, "step": 8055 }, { "epoch": 0.4140199403844177, "grad_norm": 1.1357054710388184, "learning_rate": 6.6063763789741985e-06, "loss": 0.7747, "step": 8056 }, { "epoch": 0.41407133312776234, "grad_norm": 1.0460679531097412, "learning_rate": 6.605588223900674e-06, "loss": 0.7287, "step": 8057 }, { "epoch": 0.414122725871107, "grad_norm": 1.0804314613342285, "learning_rate": 6.604800024342925e-06, "loss": 0.7721, "step": 8058 }, { "epoch": 0.41417411861445164, "grad_norm": 1.0311671495437622, "learning_rate": 6.60401178032279e-06, "loss": 0.7684, "step": 8059 }, { "epoch": 0.4142255113577963, "grad_norm": 1.1368834972381592, "learning_rate": 6.603223491862107e-06, "loss": 0.8338, "step": 8060 }, { "epoch": 0.41427690410114093, "grad_norm": 8.082850456237793, "learning_rate": 6.6024351589827195e-06, "loss": 0.7723, "step": 8061 }, { "epoch": 0.4143282968444856, "grad_norm": 1.0659213066101074, "learning_rate": 6.601646781706466e-06, "loss": 0.7203, "step": 8062 }, { "epoch": 0.4143796895878302, "grad_norm": 1.0736312866210938, "learning_rate": 6.600858360055189e-06, "loss": 0.7195, "step": 8063 }, { "epoch": 0.4144310823311748, "grad_norm": 1.2954188585281372, "learning_rate": 6.600069894050734e-06, "loss": 0.7187, "step": 8064 }, { "epoch": 0.41448247507451946, "grad_norm": 1.1182818412780762, "learning_rate": 6.599281383714944e-06, "loss": 0.7746, "step": 8065 }, { "epoch": 0.4145338678178641, "grad_norm": 0.7987220287322998, "learning_rate": 6.598492829069667e-06, "loss": 0.6286, "step": 8066 }, { "epoch": 0.41458526056120876, "grad_norm": 1.0978220701217651, "learning_rate": 6.597704230136751e-06, "loss": 0.7208, "step": 8067 }, { "epoch": 0.4146366533045534, "grad_norm": 1.081256628036499, "learning_rate": 6.596915586938043e-06, "loss": 0.7671, "step": 8068 }, { "epoch": 0.41468804604789805, "grad_norm": 1.067455530166626, "learning_rate": 6.596126899495395e-06, "loss": 0.6885, "step": 8069 }, { "epoch": 0.4147394387912427, "grad_norm": 1.0716016292572021, "learning_rate": 6.595338167830658e-06, "loss": 0.7478, "step": 8070 }, { "epoch": 0.4147908315345873, "grad_norm": 1.0021570920944214, "learning_rate": 6.594549391965683e-06, "loss": 0.7817, "step": 8071 }, { "epoch": 0.41484222427793194, "grad_norm": 1.0885429382324219, "learning_rate": 6.593760571922326e-06, "loss": 0.6927, "step": 8072 }, { "epoch": 0.4148936170212766, "grad_norm": 1.0912247896194458, "learning_rate": 6.59297170772244e-06, "loss": 0.7879, "step": 8073 }, { "epoch": 0.41494500976462123, "grad_norm": 1.1450971364974976, "learning_rate": 6.59218279938788e-06, "loss": 0.7893, "step": 8074 }, { "epoch": 0.4149964025079659, "grad_norm": 1.067766547203064, "learning_rate": 6.591393846940507e-06, "loss": 0.734, "step": 8075 }, { "epoch": 0.4150477952513105, "grad_norm": 1.0950958728790283, "learning_rate": 6.590604850402178e-06, "loss": 0.7708, "step": 8076 }, { "epoch": 0.41509918799465517, "grad_norm": 1.088057518005371, "learning_rate": 6.589815809794752e-06, "loss": 0.7505, "step": 8077 }, { "epoch": 0.4151505807379998, "grad_norm": 1.0441371202468872, "learning_rate": 6.589026725140091e-06, "loss": 0.7165, "step": 8078 }, { "epoch": 0.4152019734813444, "grad_norm": 0.8173975944519043, "learning_rate": 6.588237596460056e-06, "loss": 0.5868, "step": 8079 }, { "epoch": 0.41525336622468906, "grad_norm": 1.0947449207305908, "learning_rate": 6.587448423776512e-06, "loss": 0.781, "step": 8080 }, { "epoch": 0.4153047589680337, "grad_norm": 1.1305118799209595, "learning_rate": 6.586659207111325e-06, "loss": 0.7693, "step": 8081 }, { "epoch": 0.41535615171137835, "grad_norm": 1.2380605936050415, "learning_rate": 6.585869946486356e-06, "loss": 0.7608, "step": 8082 }, { "epoch": 0.415407544454723, "grad_norm": 1.0711721181869507, "learning_rate": 6.585080641923478e-06, "loss": 0.7393, "step": 8083 }, { "epoch": 0.41545893719806765, "grad_norm": 1.151130199432373, "learning_rate": 6.5842912934445545e-06, "loss": 0.7926, "step": 8084 }, { "epoch": 0.4155103299414123, "grad_norm": 0.7503238320350647, "learning_rate": 6.583501901071459e-06, "loss": 0.6844, "step": 8085 }, { "epoch": 0.4155617226847569, "grad_norm": 1.0263811349868774, "learning_rate": 6.582712464826059e-06, "loss": 0.7172, "step": 8086 }, { "epoch": 0.41561311542810153, "grad_norm": 1.135429859161377, "learning_rate": 6.581922984730228e-06, "loss": 0.7376, "step": 8087 }, { "epoch": 0.4156645081714462, "grad_norm": 1.0504059791564941, "learning_rate": 6.581133460805841e-06, "loss": 0.7107, "step": 8088 }, { "epoch": 0.4157159009147908, "grad_norm": 1.010265588760376, "learning_rate": 6.5803438930747695e-06, "loss": 0.8095, "step": 8089 }, { "epoch": 0.4157672936581355, "grad_norm": 0.846817672252655, "learning_rate": 6.579554281558891e-06, "loss": 0.6841, "step": 8090 }, { "epoch": 0.4158186864014801, "grad_norm": 1.105237364768982, "learning_rate": 6.578764626280082e-06, "loss": 0.7898, "step": 8091 }, { "epoch": 0.41587007914482477, "grad_norm": 1.076355218887329, "learning_rate": 6.577974927260219e-06, "loss": 0.7413, "step": 8092 }, { "epoch": 0.4159214718881694, "grad_norm": 1.055192470550537, "learning_rate": 6.577185184521184e-06, "loss": 0.7145, "step": 8093 }, { "epoch": 0.415972864631514, "grad_norm": 1.261038899421692, "learning_rate": 6.576395398084856e-06, "loss": 0.7645, "step": 8094 }, { "epoch": 0.41602425737485865, "grad_norm": 1.0586450099945068, "learning_rate": 6.575605567973117e-06, "loss": 0.7114, "step": 8095 }, { "epoch": 0.4160756501182033, "grad_norm": 1.0073610544204712, "learning_rate": 6.574815694207849e-06, "loss": 0.6922, "step": 8096 }, { "epoch": 0.41612704286154795, "grad_norm": 1.1039044857025146, "learning_rate": 6.574025776810939e-06, "loss": 0.7373, "step": 8097 }, { "epoch": 0.4161784356048926, "grad_norm": 0.76905357837677, "learning_rate": 6.573235815804267e-06, "loss": 0.7025, "step": 8098 }, { "epoch": 0.41622982834823724, "grad_norm": 1.0936994552612305, "learning_rate": 6.572445811209726e-06, "loss": 0.7652, "step": 8099 }, { "epoch": 0.4162812210915819, "grad_norm": 1.1234445571899414, "learning_rate": 6.571655763049198e-06, "loss": 0.7624, "step": 8100 }, { "epoch": 0.41633261383492653, "grad_norm": 1.1500602960586548, "learning_rate": 6.570865671344577e-06, "loss": 0.7512, "step": 8101 }, { "epoch": 0.4163840065782711, "grad_norm": 1.0710879564285278, "learning_rate": 6.5700755361177505e-06, "loss": 0.7082, "step": 8102 }, { "epoch": 0.4164353993216158, "grad_norm": 0.7484697103500366, "learning_rate": 6.569285357390609e-06, "loss": 0.6856, "step": 8103 }, { "epoch": 0.4164867920649604, "grad_norm": 1.1131904125213623, "learning_rate": 6.568495135185048e-06, "loss": 0.7162, "step": 8104 }, { "epoch": 0.41653818480830507, "grad_norm": 1.0708578824996948, "learning_rate": 6.5677048695229586e-06, "loss": 0.686, "step": 8105 }, { "epoch": 0.4165895775516497, "grad_norm": 1.2042489051818848, "learning_rate": 6.566914560426236e-06, "loss": 0.6986, "step": 8106 }, { "epoch": 0.41664097029499436, "grad_norm": 0.8376294374465942, "learning_rate": 6.566124207916778e-06, "loss": 0.6528, "step": 8107 }, { "epoch": 0.416692363038339, "grad_norm": 1.0975004434585571, "learning_rate": 6.5653338120164815e-06, "loss": 0.7798, "step": 8108 }, { "epoch": 0.4167437557816836, "grad_norm": 0.7418920993804932, "learning_rate": 6.564543372747244e-06, "loss": 0.6513, "step": 8109 }, { "epoch": 0.41679514852502825, "grad_norm": 0.8498019576072693, "learning_rate": 6.563752890130968e-06, "loss": 0.6927, "step": 8110 }, { "epoch": 0.4168465412683729, "grad_norm": 0.9223999381065369, "learning_rate": 6.56296236418955e-06, "loss": 0.6627, "step": 8111 }, { "epoch": 0.41689793401171754, "grad_norm": 1.099507451057434, "learning_rate": 6.562171794944897e-06, "loss": 0.7436, "step": 8112 }, { "epoch": 0.4169493267550622, "grad_norm": 1.1419322490692139, "learning_rate": 6.5613811824189096e-06, "loss": 0.7847, "step": 8113 }, { "epoch": 0.41700071949840684, "grad_norm": 0.7048693299293518, "learning_rate": 6.560590526633493e-06, "loss": 0.6705, "step": 8114 }, { "epoch": 0.4170521122417515, "grad_norm": 1.1191236972808838, "learning_rate": 6.559799827610554e-06, "loss": 0.743, "step": 8115 }, { "epoch": 0.41710350498509613, "grad_norm": 1.1514685153961182, "learning_rate": 6.559009085371997e-06, "loss": 0.7673, "step": 8116 }, { "epoch": 0.4171548977284407, "grad_norm": 0.7785151600837708, "learning_rate": 6.558218299939731e-06, "loss": 0.7033, "step": 8117 }, { "epoch": 0.41720629047178537, "grad_norm": 1.200346827507019, "learning_rate": 6.557427471335669e-06, "loss": 0.7584, "step": 8118 }, { "epoch": 0.41725768321513, "grad_norm": 1.0426331758499146, "learning_rate": 6.556636599581717e-06, "loss": 0.7791, "step": 8119 }, { "epoch": 0.41730907595847466, "grad_norm": 1.114567756652832, "learning_rate": 6.55584568469979e-06, "loss": 0.8171, "step": 8120 }, { "epoch": 0.4173604687018193, "grad_norm": 1.1332224607467651, "learning_rate": 6.5550547267117995e-06, "loss": 0.7365, "step": 8121 }, { "epoch": 0.41741186144516396, "grad_norm": 0.8459557294845581, "learning_rate": 6.5542637256396595e-06, "loss": 0.6767, "step": 8122 }, { "epoch": 0.4174632541885086, "grad_norm": 1.145411729812622, "learning_rate": 6.553472681505286e-06, "loss": 0.7782, "step": 8123 }, { "epoch": 0.4175146469318532, "grad_norm": 0.7274598479270935, "learning_rate": 6.552681594330594e-06, "loss": 0.6582, "step": 8124 }, { "epoch": 0.41756603967519784, "grad_norm": 1.0792773962020874, "learning_rate": 6.551890464137505e-06, "loss": 0.8218, "step": 8125 }, { "epoch": 0.4176174324185425, "grad_norm": 1.070538878440857, "learning_rate": 6.551099290947933e-06, "loss": 0.7688, "step": 8126 }, { "epoch": 0.41766882516188714, "grad_norm": 0.6803285479545593, "learning_rate": 6.550308074783802e-06, "loss": 0.7056, "step": 8127 }, { "epoch": 0.4177202179052318, "grad_norm": 1.1391576528549194, "learning_rate": 6.549516815667031e-06, "loss": 0.747, "step": 8128 }, { "epoch": 0.41777161064857643, "grad_norm": 1.0530043840408325, "learning_rate": 6.548725513619545e-06, "loss": 0.7213, "step": 8129 }, { "epoch": 0.4178230033919211, "grad_norm": 1.0601760149002075, "learning_rate": 6.547934168663265e-06, "loss": 0.7486, "step": 8130 }, { "epoch": 0.4178743961352657, "grad_norm": 1.0474406480789185, "learning_rate": 6.547142780820119e-06, "loss": 0.7432, "step": 8131 }, { "epoch": 0.4179257888786103, "grad_norm": 0.7322412133216858, "learning_rate": 6.546351350112028e-06, "loss": 0.6714, "step": 8132 }, { "epoch": 0.41797718162195496, "grad_norm": 0.9110136032104492, "learning_rate": 6.545559876560925e-06, "loss": 0.6782, "step": 8133 }, { "epoch": 0.4180285743652996, "grad_norm": 1.0378695726394653, "learning_rate": 6.544768360188736e-06, "loss": 0.6847, "step": 8134 }, { "epoch": 0.41807996710864426, "grad_norm": 1.090872883796692, "learning_rate": 6.543976801017389e-06, "loss": 0.7547, "step": 8135 }, { "epoch": 0.4181313598519889, "grad_norm": 1.1600438356399536, "learning_rate": 6.543185199068818e-06, "loss": 0.7552, "step": 8136 }, { "epoch": 0.41818275259533355, "grad_norm": 1.0688138008117676, "learning_rate": 6.542393554364952e-06, "loss": 0.795, "step": 8137 }, { "epoch": 0.4182341453386782, "grad_norm": 1.0418423414230347, "learning_rate": 6.541601866927727e-06, "loss": 0.7949, "step": 8138 }, { "epoch": 0.41828553808202285, "grad_norm": 1.0694576501846313, "learning_rate": 6.540810136779075e-06, "loss": 0.7317, "step": 8139 }, { "epoch": 0.41833693082536744, "grad_norm": 1.0427879095077515, "learning_rate": 6.540018363940933e-06, "loss": 0.7773, "step": 8140 }, { "epoch": 0.4183883235687121, "grad_norm": 0.7623458504676819, "learning_rate": 6.539226548435238e-06, "loss": 0.6889, "step": 8141 }, { "epoch": 0.41843971631205673, "grad_norm": 0.7354753017425537, "learning_rate": 6.538434690283928e-06, "loss": 0.6813, "step": 8142 }, { "epoch": 0.4184911090554014, "grad_norm": 0.9941310286521912, "learning_rate": 6.53764278950894e-06, "loss": 0.7138, "step": 8143 }, { "epoch": 0.418542501798746, "grad_norm": 1.0244524478912354, "learning_rate": 6.536850846132217e-06, "loss": 0.7346, "step": 8144 }, { "epoch": 0.41859389454209067, "grad_norm": 1.2205531597137451, "learning_rate": 6.5360588601756994e-06, "loss": 0.7523, "step": 8145 }, { "epoch": 0.4186452872854353, "grad_norm": 1.0979371070861816, "learning_rate": 6.535266831661327e-06, "loss": 0.698, "step": 8146 }, { "epoch": 0.4186966800287799, "grad_norm": 0.9743461012840271, "learning_rate": 6.534474760611049e-06, "loss": 0.7209, "step": 8147 }, { "epoch": 0.41874807277212456, "grad_norm": 1.0523778200149536, "learning_rate": 6.533682647046806e-06, "loss": 0.6897, "step": 8148 }, { "epoch": 0.4187994655154692, "grad_norm": 1.0779714584350586, "learning_rate": 6.532890490990548e-06, "loss": 0.7488, "step": 8149 }, { "epoch": 0.41885085825881385, "grad_norm": 1.1054168939590454, "learning_rate": 6.53209829246422e-06, "loss": 0.7674, "step": 8150 }, { "epoch": 0.4189022510021585, "grad_norm": 1.0905927419662476, "learning_rate": 6.531306051489769e-06, "loss": 0.7606, "step": 8151 }, { "epoch": 0.41895364374550315, "grad_norm": 1.1185486316680908, "learning_rate": 6.530513768089148e-06, "loss": 0.7367, "step": 8152 }, { "epoch": 0.4190050364888478, "grad_norm": 0.8157680630683899, "learning_rate": 6.529721442284308e-06, "loss": 0.7016, "step": 8153 }, { "epoch": 0.41905642923219244, "grad_norm": 1.058084487915039, "learning_rate": 6.5289290740971975e-06, "loss": 0.7433, "step": 8154 }, { "epoch": 0.41910782197553703, "grad_norm": 1.0676947832107544, "learning_rate": 6.5281366635497734e-06, "loss": 0.7135, "step": 8155 }, { "epoch": 0.4191592147188817, "grad_norm": 1.0593070983886719, "learning_rate": 6.527344210663986e-06, "loss": 0.7141, "step": 8156 }, { "epoch": 0.4192106074622263, "grad_norm": 0.774882435798645, "learning_rate": 6.526551715461796e-06, "loss": 0.683, "step": 8157 }, { "epoch": 0.419262000205571, "grad_norm": 1.029242753982544, "learning_rate": 6.525759177965157e-06, "loss": 0.7385, "step": 8158 }, { "epoch": 0.4193133929489156, "grad_norm": 1.0384578704833984, "learning_rate": 6.5249665981960275e-06, "loss": 0.7523, "step": 8159 }, { "epoch": 0.41936478569226027, "grad_norm": 0.8123130798339844, "learning_rate": 6.5241739761763664e-06, "loss": 0.7297, "step": 8160 }, { "epoch": 0.4194161784356049, "grad_norm": 0.7716857194900513, "learning_rate": 6.523381311928134e-06, "loss": 0.7107, "step": 8161 }, { "epoch": 0.4194675711789495, "grad_norm": 0.8681047558784485, "learning_rate": 6.522588605473293e-06, "loss": 0.6663, "step": 8162 }, { "epoch": 0.41951896392229415, "grad_norm": 1.02434504032135, "learning_rate": 6.521795856833806e-06, "loss": 0.7231, "step": 8163 }, { "epoch": 0.4195703566656388, "grad_norm": 1.1238293647766113, "learning_rate": 6.521003066031634e-06, "loss": 0.7505, "step": 8164 }, { "epoch": 0.41962174940898345, "grad_norm": 0.7427038550376892, "learning_rate": 6.520210233088745e-06, "loss": 0.674, "step": 8165 }, { "epoch": 0.4196731421523281, "grad_norm": 1.0484944581985474, "learning_rate": 6.519417358027104e-06, "loss": 0.7615, "step": 8166 }, { "epoch": 0.41972453489567274, "grad_norm": 1.1346802711486816, "learning_rate": 6.5186244408686775e-06, "loss": 0.7546, "step": 8167 }, { "epoch": 0.4197759276390174, "grad_norm": 0.9560454487800598, "learning_rate": 6.517831481635435e-06, "loss": 0.6599, "step": 8168 }, { "epoch": 0.41982732038236203, "grad_norm": 1.0495399236679077, "learning_rate": 6.517038480349345e-06, "loss": 0.8055, "step": 8169 }, { "epoch": 0.4198787131257066, "grad_norm": 0.9989027976989746, "learning_rate": 6.51624543703238e-06, "loss": 0.7269, "step": 8170 }, { "epoch": 0.4199301058690513, "grad_norm": 1.1640007495880127, "learning_rate": 6.515452351706511e-06, "loss": 0.7729, "step": 8171 }, { "epoch": 0.4199814986123959, "grad_norm": 1.127619981765747, "learning_rate": 6.514659224393711e-06, "loss": 0.785, "step": 8172 }, { "epoch": 0.42003289135574057, "grad_norm": 0.7142515778541565, "learning_rate": 6.513866055115956e-06, "loss": 0.6658, "step": 8173 }, { "epoch": 0.4200842840990852, "grad_norm": 1.0726038217544556, "learning_rate": 6.513072843895219e-06, "loss": 0.716, "step": 8174 }, { "epoch": 0.42013567684242986, "grad_norm": 1.1132415533065796, "learning_rate": 6.5122795907534775e-06, "loss": 0.7281, "step": 8175 }, { "epoch": 0.4201870695857745, "grad_norm": 1.0760494470596313, "learning_rate": 6.51148629571271e-06, "loss": 0.7501, "step": 8176 }, { "epoch": 0.42023846232911916, "grad_norm": 1.1639372110366821, "learning_rate": 6.510692958794893e-06, "loss": 0.7707, "step": 8177 }, { "epoch": 0.42028985507246375, "grad_norm": 1.1178914308547974, "learning_rate": 6.50989958002201e-06, "loss": 0.7497, "step": 8178 }, { "epoch": 0.4203412478158084, "grad_norm": 0.7563962340354919, "learning_rate": 6.50910615941604e-06, "loss": 0.6238, "step": 8179 }, { "epoch": 0.42039264055915304, "grad_norm": 1.0762879848480225, "learning_rate": 6.5083126969989656e-06, "loss": 0.7632, "step": 8180 }, { "epoch": 0.4204440333024977, "grad_norm": 0.7737055420875549, "learning_rate": 6.507519192792771e-06, "loss": 0.6472, "step": 8181 }, { "epoch": 0.42049542604584234, "grad_norm": 1.0744820833206177, "learning_rate": 6.5067256468194425e-06, "loss": 0.6937, "step": 8182 }, { "epoch": 0.420546818789187, "grad_norm": 1.1416133642196655, "learning_rate": 6.505932059100962e-06, "loss": 0.7487, "step": 8183 }, { "epoch": 0.42059821153253163, "grad_norm": 1.0580724477767944, "learning_rate": 6.5051384296593204e-06, "loss": 0.7186, "step": 8184 }, { "epoch": 0.4206496042758762, "grad_norm": 1.0841445922851562, "learning_rate": 6.504344758516503e-06, "loss": 0.7685, "step": 8185 }, { "epoch": 0.42070099701922087, "grad_norm": 1.056885004043579, "learning_rate": 6.5035510456945e-06, "loss": 0.7118, "step": 8186 }, { "epoch": 0.4207523897625655, "grad_norm": 0.7877774238586426, "learning_rate": 6.502757291215305e-06, "loss": 0.6776, "step": 8187 }, { "epoch": 0.42080378250591016, "grad_norm": 1.0735968351364136, "learning_rate": 6.5019634951009045e-06, "loss": 0.783, "step": 8188 }, { "epoch": 0.4208551752492548, "grad_norm": 0.78546142578125, "learning_rate": 6.5011696573732926e-06, "loss": 0.6954, "step": 8189 }, { "epoch": 0.42090656799259946, "grad_norm": 1.0860130786895752, "learning_rate": 6.500375778054467e-06, "loss": 0.7494, "step": 8190 }, { "epoch": 0.4209579607359441, "grad_norm": 1.1014548540115356, "learning_rate": 6.499581857166419e-06, "loss": 0.6635, "step": 8191 }, { "epoch": 0.42100935347928875, "grad_norm": 1.1342618465423584, "learning_rate": 6.498787894731148e-06, "loss": 0.8156, "step": 8192 }, { "epoch": 0.42106074622263334, "grad_norm": 0.961471438407898, "learning_rate": 6.4979938907706475e-06, "loss": 0.7226, "step": 8193 }, { "epoch": 0.421112138965978, "grad_norm": 1.146230697631836, "learning_rate": 6.497199845306919e-06, "loss": 0.7963, "step": 8194 }, { "epoch": 0.42116353170932264, "grad_norm": 0.7355208396911621, "learning_rate": 6.496405758361962e-06, "loss": 0.6771, "step": 8195 }, { "epoch": 0.4212149244526673, "grad_norm": 1.1335139274597168, "learning_rate": 6.4956116299577756e-06, "loss": 0.7597, "step": 8196 }, { "epoch": 0.42126631719601193, "grad_norm": 1.089436411857605, "learning_rate": 6.494817460116364e-06, "loss": 0.743, "step": 8197 }, { "epoch": 0.4213177099393566, "grad_norm": 1.0815538167953491, "learning_rate": 6.494023248859728e-06, "loss": 0.7621, "step": 8198 }, { "epoch": 0.4213691026827012, "grad_norm": 0.7486788630485535, "learning_rate": 6.493228996209872e-06, "loss": 0.659, "step": 8199 }, { "epoch": 0.4214204954260458, "grad_norm": 0.7722164392471313, "learning_rate": 6.492434702188806e-06, "loss": 0.6477, "step": 8200 }, { "epoch": 0.42147188816939046, "grad_norm": 1.069804072380066, "learning_rate": 6.491640366818531e-06, "loss": 0.7261, "step": 8201 }, { "epoch": 0.4215232809127351, "grad_norm": 1.0602997541427612, "learning_rate": 6.4908459901210596e-06, "loss": 0.704, "step": 8202 }, { "epoch": 0.42157467365607976, "grad_norm": 1.0329790115356445, "learning_rate": 6.4900515721183966e-06, "loss": 0.7347, "step": 8203 }, { "epoch": 0.4216260663994244, "grad_norm": 1.058082938194275, "learning_rate": 6.489257112832553e-06, "loss": 0.7159, "step": 8204 }, { "epoch": 0.42167745914276905, "grad_norm": 1.0252909660339355, "learning_rate": 6.488462612285542e-06, "loss": 0.7619, "step": 8205 }, { "epoch": 0.4217288518861137, "grad_norm": 1.0015450716018677, "learning_rate": 6.487668070499374e-06, "loss": 0.7076, "step": 8206 }, { "epoch": 0.42178024462945835, "grad_norm": 0.8854926228523254, "learning_rate": 6.486873487496063e-06, "loss": 0.7281, "step": 8207 }, { "epoch": 0.42183163737280294, "grad_norm": 0.7147998213768005, "learning_rate": 6.486078863297623e-06, "loss": 0.708, "step": 8208 }, { "epoch": 0.4218830301161476, "grad_norm": 0.698016345500946, "learning_rate": 6.48528419792607e-06, "loss": 0.6967, "step": 8209 }, { "epoch": 0.42193442285949223, "grad_norm": 1.0992891788482666, "learning_rate": 6.484489491403422e-06, "loss": 0.7381, "step": 8210 }, { "epoch": 0.4219858156028369, "grad_norm": 1.1006677150726318, "learning_rate": 6.483694743751696e-06, "loss": 0.7324, "step": 8211 }, { "epoch": 0.4220372083461815, "grad_norm": 1.140717625617981, "learning_rate": 6.482899954992911e-06, "loss": 0.7719, "step": 8212 }, { "epoch": 0.42208860108952617, "grad_norm": 1.074892282485962, "learning_rate": 6.48210512514909e-06, "loss": 0.7955, "step": 8213 }, { "epoch": 0.4221399938328708, "grad_norm": 1.069056749343872, "learning_rate": 6.481310254242252e-06, "loss": 0.7716, "step": 8214 }, { "epoch": 0.42219138657621547, "grad_norm": 1.1065367460250854, "learning_rate": 6.480515342294418e-06, "loss": 0.774, "step": 8215 }, { "epoch": 0.42224277931956006, "grad_norm": 1.0123153924942017, "learning_rate": 6.479720389327615e-06, "loss": 0.7241, "step": 8216 }, { "epoch": 0.4222941720629047, "grad_norm": 1.10072922706604, "learning_rate": 6.478925395363866e-06, "loss": 0.7444, "step": 8217 }, { "epoch": 0.42234556480624935, "grad_norm": 1.048885464668274, "learning_rate": 6.478130360425197e-06, "loss": 0.6838, "step": 8218 }, { "epoch": 0.422396957549594, "grad_norm": 1.0726813077926636, "learning_rate": 6.4773352845336345e-06, "loss": 0.7387, "step": 8219 }, { "epoch": 0.42244835029293865, "grad_norm": 0.7953245043754578, "learning_rate": 6.47654016771121e-06, "loss": 0.6689, "step": 8220 }, { "epoch": 0.4224997430362833, "grad_norm": 1.119653582572937, "learning_rate": 6.47574500997995e-06, "loss": 0.7108, "step": 8221 }, { "epoch": 0.42255113577962794, "grad_norm": 1.112272024154663, "learning_rate": 6.4749498113618855e-06, "loss": 0.7978, "step": 8222 }, { "epoch": 0.42260252852297253, "grad_norm": 1.0212616920471191, "learning_rate": 6.4741545718790485e-06, "loss": 0.702, "step": 8223 }, { "epoch": 0.4226539212663172, "grad_norm": 1.0341118574142456, "learning_rate": 6.473359291553474e-06, "loss": 0.7637, "step": 8224 }, { "epoch": 0.4227053140096618, "grad_norm": 0.8750165104866028, "learning_rate": 6.472563970407191e-06, "loss": 0.7004, "step": 8225 }, { "epoch": 0.4227567067530065, "grad_norm": 0.7670082449913025, "learning_rate": 6.471768608462239e-06, "loss": 0.6819, "step": 8226 }, { "epoch": 0.4228080994963511, "grad_norm": 1.1221908330917358, "learning_rate": 6.470973205740651e-06, "loss": 0.7734, "step": 8227 }, { "epoch": 0.42285949223969577, "grad_norm": 1.0767858028411865, "learning_rate": 6.4701777622644665e-06, "loss": 0.702, "step": 8228 }, { "epoch": 0.4229108849830404, "grad_norm": 1.0595110654830933, "learning_rate": 6.469382278055723e-06, "loss": 0.765, "step": 8229 }, { "epoch": 0.42296227772638506, "grad_norm": 1.0989242792129517, "learning_rate": 6.468586753136461e-06, "loss": 0.7695, "step": 8230 }, { "epoch": 0.42301367046972965, "grad_norm": 1.024972677230835, "learning_rate": 6.467791187528719e-06, "loss": 0.7237, "step": 8231 }, { "epoch": 0.4230650632130743, "grad_norm": 0.9777402281761169, "learning_rate": 6.466995581254543e-06, "loss": 0.7566, "step": 8232 }, { "epoch": 0.42311645595641895, "grad_norm": 1.055586338043213, "learning_rate": 6.4661999343359705e-06, "loss": 0.71, "step": 8233 }, { "epoch": 0.4231678486997636, "grad_norm": 1.0453256368637085, "learning_rate": 6.465404246795051e-06, "loss": 0.6905, "step": 8234 }, { "epoch": 0.42321924144310824, "grad_norm": 1.0758497714996338, "learning_rate": 6.464608518653827e-06, "loss": 0.6696, "step": 8235 }, { "epoch": 0.4232706341864529, "grad_norm": 1.1033661365509033, "learning_rate": 6.463812749934343e-06, "loss": 0.7654, "step": 8236 }, { "epoch": 0.42332202692979753, "grad_norm": 1.008488416671753, "learning_rate": 6.463016940658649e-06, "loss": 0.703, "step": 8237 }, { "epoch": 0.4233734196731421, "grad_norm": 0.7929936051368713, "learning_rate": 6.4622210908487946e-06, "loss": 0.6784, "step": 8238 }, { "epoch": 0.4234248124164868, "grad_norm": 1.001913070678711, "learning_rate": 6.461425200526828e-06, "loss": 0.7131, "step": 8239 }, { "epoch": 0.4234762051598314, "grad_norm": 0.8796018362045288, "learning_rate": 6.460629269714797e-06, "loss": 0.6782, "step": 8240 }, { "epoch": 0.42352759790317607, "grad_norm": 1.1097356081008911, "learning_rate": 6.459833298434759e-06, "loss": 0.774, "step": 8241 }, { "epoch": 0.4235789906465207, "grad_norm": 1.0712469816207886, "learning_rate": 6.459037286708764e-06, "loss": 0.7674, "step": 8242 }, { "epoch": 0.42363038338986536, "grad_norm": 1.0817714929580688, "learning_rate": 6.458241234558869e-06, "loss": 0.7353, "step": 8243 }, { "epoch": 0.42368177613321, "grad_norm": 1.0979995727539062, "learning_rate": 6.457445142007125e-06, "loss": 0.742, "step": 8244 }, { "epoch": 0.42373316887655466, "grad_norm": 1.081201195716858, "learning_rate": 6.4566490090755904e-06, "loss": 0.7751, "step": 8245 }, { "epoch": 0.42378456161989925, "grad_norm": 1.105096459388733, "learning_rate": 6.455852835786325e-06, "loss": 0.7362, "step": 8246 }, { "epoch": 0.4238359543632439, "grad_norm": 0.9971508383750916, "learning_rate": 6.455056622161384e-06, "loss": 0.701, "step": 8247 }, { "epoch": 0.42388734710658854, "grad_norm": 0.9921983480453491, "learning_rate": 6.4542603682228285e-06, "loss": 0.7474, "step": 8248 }, { "epoch": 0.4239387398499332, "grad_norm": 1.0649241209030151, "learning_rate": 6.45346407399272e-06, "loss": 0.7212, "step": 8249 }, { "epoch": 0.42399013259327784, "grad_norm": 1.233341097831726, "learning_rate": 6.452667739493119e-06, "loss": 0.7854, "step": 8250 }, { "epoch": 0.4240415253366225, "grad_norm": 1.1549886465072632, "learning_rate": 6.451871364746092e-06, "loss": 0.7424, "step": 8251 }, { "epoch": 0.42409291807996713, "grad_norm": 1.1375632286071777, "learning_rate": 6.451074949773699e-06, "loss": 0.7203, "step": 8252 }, { "epoch": 0.4241443108233117, "grad_norm": 1.0938986539840698, "learning_rate": 6.450278494598009e-06, "loss": 0.7527, "step": 8253 }, { "epoch": 0.42419570356665637, "grad_norm": 1.110064148902893, "learning_rate": 6.449481999241086e-06, "loss": 0.8271, "step": 8254 }, { "epoch": 0.424247096310001, "grad_norm": 1.0966453552246094, "learning_rate": 6.4486854637249985e-06, "loss": 0.7106, "step": 8255 }, { "epoch": 0.42429848905334566, "grad_norm": 1.0951966047286987, "learning_rate": 6.447888888071816e-06, "loss": 0.7327, "step": 8256 }, { "epoch": 0.4243498817966903, "grad_norm": 1.0976653099060059, "learning_rate": 6.447092272303606e-06, "loss": 0.7998, "step": 8257 }, { "epoch": 0.42440127454003496, "grad_norm": 1.0412391424179077, "learning_rate": 6.4462956164424415e-06, "loss": 0.7088, "step": 8258 }, { "epoch": 0.4244526672833796, "grad_norm": 1.0415542125701904, "learning_rate": 6.445498920510395e-06, "loss": 0.7646, "step": 8259 }, { "epoch": 0.42450406002672425, "grad_norm": 1.1661365032196045, "learning_rate": 6.444702184529537e-06, "loss": 0.7496, "step": 8260 }, { "epoch": 0.42455545277006884, "grad_norm": 1.090607762336731, "learning_rate": 6.443905408521943e-06, "loss": 0.7133, "step": 8261 }, { "epoch": 0.4246068455134135, "grad_norm": 1.1093566417694092, "learning_rate": 6.44310859250969e-06, "loss": 0.7079, "step": 8262 }, { "epoch": 0.42465823825675814, "grad_norm": 1.0579400062561035, "learning_rate": 6.442311736514853e-06, "loss": 0.6838, "step": 8263 }, { "epoch": 0.4247096310001028, "grad_norm": 4.69133996963501, "learning_rate": 6.441514840559511e-06, "loss": 0.8511, "step": 8264 }, { "epoch": 0.42476102374344743, "grad_norm": 1.1832209825515747, "learning_rate": 6.44071790466574e-06, "loss": 0.7482, "step": 8265 }, { "epoch": 0.4248124164867921, "grad_norm": 1.0471915006637573, "learning_rate": 6.439920928855623e-06, "loss": 0.7638, "step": 8266 }, { "epoch": 0.4248638092301367, "grad_norm": 1.0906472206115723, "learning_rate": 6.439123913151238e-06, "loss": 0.6992, "step": 8267 }, { "epoch": 0.42491520197348137, "grad_norm": 1.0624840259552002, "learning_rate": 6.438326857574668e-06, "loss": 0.7534, "step": 8268 }, { "epoch": 0.42496659471682596, "grad_norm": 0.8258217573165894, "learning_rate": 6.437529762147997e-06, "loss": 0.6908, "step": 8269 }, { "epoch": 0.4250179874601706, "grad_norm": 1.0538914203643799, "learning_rate": 6.436732626893308e-06, "loss": 0.7921, "step": 8270 }, { "epoch": 0.42506938020351526, "grad_norm": 1.1080495119094849, "learning_rate": 6.435935451832686e-06, "loss": 0.7462, "step": 8271 }, { "epoch": 0.4251207729468599, "grad_norm": 1.071796178817749, "learning_rate": 6.435138236988221e-06, "loss": 0.7626, "step": 8272 }, { "epoch": 0.42517216569020455, "grad_norm": 1.04335355758667, "learning_rate": 6.434340982381997e-06, "loss": 0.7178, "step": 8273 }, { "epoch": 0.4252235584335492, "grad_norm": 1.0774140357971191, "learning_rate": 6.4335436880361035e-06, "loss": 0.7633, "step": 8274 }, { "epoch": 0.42527495117689385, "grad_norm": 1.23483407497406, "learning_rate": 6.4327463539726295e-06, "loss": 0.8217, "step": 8275 }, { "epoch": 0.42532634392023844, "grad_norm": 1.067987322807312, "learning_rate": 6.431948980213667e-06, "loss": 0.6737, "step": 8276 }, { "epoch": 0.4253777366635831, "grad_norm": 1.1560516357421875, "learning_rate": 6.4311515667813085e-06, "loss": 0.7805, "step": 8277 }, { "epoch": 0.42542912940692773, "grad_norm": 1.0915915966033936, "learning_rate": 6.430354113697645e-06, "loss": 0.7641, "step": 8278 }, { "epoch": 0.4254805221502724, "grad_norm": 0.8349462151527405, "learning_rate": 6.429556620984772e-06, "loss": 0.6726, "step": 8279 }, { "epoch": 0.425531914893617, "grad_norm": 1.0294008255004883, "learning_rate": 6.4287590886647845e-06, "loss": 0.7656, "step": 8280 }, { "epoch": 0.42558330763696167, "grad_norm": 0.8001941442489624, "learning_rate": 6.42796151675978e-06, "loss": 0.6581, "step": 8281 }, { "epoch": 0.4256347003803063, "grad_norm": 1.0716136693954468, "learning_rate": 6.4271639052918536e-06, "loss": 0.7209, "step": 8282 }, { "epoch": 0.42568609312365097, "grad_norm": 1.0247186422348022, "learning_rate": 6.426366254283105e-06, "loss": 0.7553, "step": 8283 }, { "epoch": 0.42573748586699556, "grad_norm": 0.7531420588493347, "learning_rate": 6.425568563755633e-06, "loss": 0.6475, "step": 8284 }, { "epoch": 0.4257888786103402, "grad_norm": 1.0480122566223145, "learning_rate": 6.4247708337315394e-06, "loss": 0.7917, "step": 8285 }, { "epoch": 0.42584027135368485, "grad_norm": 1.0335288047790527, "learning_rate": 6.423973064232926e-06, "loss": 0.732, "step": 8286 }, { "epoch": 0.4258916640970295, "grad_norm": 0.7679970264434814, "learning_rate": 6.423175255281896e-06, "loss": 0.684, "step": 8287 }, { "epoch": 0.42594305684037415, "grad_norm": 1.181100606918335, "learning_rate": 6.422377406900553e-06, "loss": 0.795, "step": 8288 }, { "epoch": 0.4259944495837188, "grad_norm": 1.07860267162323, "learning_rate": 6.421579519111e-06, "loss": 0.7791, "step": 8289 }, { "epoch": 0.42604584232706344, "grad_norm": 1.1637686491012573, "learning_rate": 6.4207815919353465e-06, "loss": 0.7725, "step": 8290 }, { "epoch": 0.42609723507040803, "grad_norm": 1.0649746656417847, "learning_rate": 6.419983625395697e-06, "loss": 0.7547, "step": 8291 }, { "epoch": 0.4261486278137527, "grad_norm": 1.1197888851165771, "learning_rate": 6.4191856195141625e-06, "loss": 0.7846, "step": 8292 }, { "epoch": 0.4262000205570973, "grad_norm": 1.0475739240646362, "learning_rate": 6.418387574312851e-06, "loss": 0.7388, "step": 8293 }, { "epoch": 0.426251413300442, "grad_norm": 1.0338460206985474, "learning_rate": 6.417589489813871e-06, "loss": 0.7257, "step": 8294 }, { "epoch": 0.4263028060437866, "grad_norm": 1.2435382604599, "learning_rate": 6.416791366039339e-06, "loss": 0.7656, "step": 8295 }, { "epoch": 0.42635419878713127, "grad_norm": 1.0318820476531982, "learning_rate": 6.415993203011365e-06, "loss": 0.7093, "step": 8296 }, { "epoch": 0.4264055915304759, "grad_norm": 0.7912799715995789, "learning_rate": 6.4151950007520616e-06, "loss": 0.6494, "step": 8297 }, { "epoch": 0.42645698427382056, "grad_norm": 0.6895196437835693, "learning_rate": 6.414396759283546e-06, "loss": 0.686, "step": 8298 }, { "epoch": 0.42650837701716515, "grad_norm": 1.133817195892334, "learning_rate": 6.413598478627932e-06, "loss": 0.7652, "step": 8299 }, { "epoch": 0.4265597697605098, "grad_norm": 0.8014428615570068, "learning_rate": 6.412800158807337e-06, "loss": 0.6668, "step": 8300 }, { "epoch": 0.42661116250385445, "grad_norm": 1.1080423593521118, "learning_rate": 6.412001799843881e-06, "loss": 0.813, "step": 8301 }, { "epoch": 0.4266625552471991, "grad_norm": 1.0469075441360474, "learning_rate": 6.411203401759682e-06, "loss": 0.7301, "step": 8302 }, { "epoch": 0.42671394799054374, "grad_norm": 3.1170568466186523, "learning_rate": 6.410404964576861e-06, "loss": 0.708, "step": 8303 }, { "epoch": 0.4267653407338884, "grad_norm": 1.0842232704162598, "learning_rate": 6.409606488317538e-06, "loss": 0.7044, "step": 8304 }, { "epoch": 0.42681673347723303, "grad_norm": 1.0441584587097168, "learning_rate": 6.408807973003837e-06, "loss": 0.6987, "step": 8305 }, { "epoch": 0.4268681262205777, "grad_norm": 1.0478596687316895, "learning_rate": 6.40800941865788e-06, "loss": 0.7312, "step": 8306 }, { "epoch": 0.4269195189639223, "grad_norm": 1.0818253755569458, "learning_rate": 6.407210825301794e-06, "loss": 0.715, "step": 8307 }, { "epoch": 0.4269709117072669, "grad_norm": 1.0268062353134155, "learning_rate": 6.4064121929577016e-06, "loss": 0.7237, "step": 8308 }, { "epoch": 0.42702230445061157, "grad_norm": 1.077306866645813, "learning_rate": 6.405613521647732e-06, "loss": 0.7896, "step": 8309 }, { "epoch": 0.4270736971939562, "grad_norm": 1.078835129737854, "learning_rate": 6.4048148113940115e-06, "loss": 0.7085, "step": 8310 }, { "epoch": 0.42712508993730086, "grad_norm": 1.4233893156051636, "learning_rate": 6.404016062218672e-06, "loss": 0.693, "step": 8311 }, { "epoch": 0.4271764826806455, "grad_norm": 0.7691375017166138, "learning_rate": 6.4032172741438395e-06, "loss": 0.6566, "step": 8312 }, { "epoch": 0.42722787542399016, "grad_norm": 1.108933687210083, "learning_rate": 6.402418447191646e-06, "loss": 0.7449, "step": 8313 }, { "epoch": 0.42727926816733475, "grad_norm": 1.1395201683044434, "learning_rate": 6.401619581384227e-06, "loss": 0.7496, "step": 8314 }, { "epoch": 0.4273306609106794, "grad_norm": 1.030959963798523, "learning_rate": 6.400820676743714e-06, "loss": 0.7094, "step": 8315 }, { "epoch": 0.42738205365402404, "grad_norm": 1.120534062385559, "learning_rate": 6.400021733292239e-06, "loss": 0.8133, "step": 8316 }, { "epoch": 0.4274334463973687, "grad_norm": 0.7026606798171997, "learning_rate": 6.3992227510519415e-06, "loss": 0.6219, "step": 8317 }, { "epoch": 0.42748483914071334, "grad_norm": 1.1032322645187378, "learning_rate": 6.398423730044954e-06, "loss": 0.7206, "step": 8318 }, { "epoch": 0.427536231884058, "grad_norm": 1.0905557870864868, "learning_rate": 6.397624670293417e-06, "loss": 0.7605, "step": 8319 }, { "epoch": 0.42758762462740263, "grad_norm": 1.079391360282898, "learning_rate": 6.396825571819467e-06, "loss": 0.7473, "step": 8320 }, { "epoch": 0.4276390173707473, "grad_norm": 1.072669267654419, "learning_rate": 6.396026434645245e-06, "loss": 0.7349, "step": 8321 }, { "epoch": 0.42769041011409187, "grad_norm": 1.160841703414917, "learning_rate": 6.395227258792891e-06, "loss": 0.7994, "step": 8322 }, { "epoch": 0.4277418028574365, "grad_norm": 0.8698166608810425, "learning_rate": 6.394428044284549e-06, "loss": 0.7168, "step": 8323 }, { "epoch": 0.42779319560078116, "grad_norm": 0.7399750351905823, "learning_rate": 6.393628791142359e-06, "loss": 0.6734, "step": 8324 }, { "epoch": 0.4278445883441258, "grad_norm": 1.2460758686065674, "learning_rate": 6.392829499388467e-06, "loss": 0.6618, "step": 8325 }, { "epoch": 0.42789598108747046, "grad_norm": 1.0808895826339722, "learning_rate": 6.392030169045016e-06, "loss": 0.6902, "step": 8326 }, { "epoch": 0.4279473738308151, "grad_norm": 1.0457595586776733, "learning_rate": 6.391230800134156e-06, "loss": 0.7312, "step": 8327 }, { "epoch": 0.42799876657415975, "grad_norm": 1.0832843780517578, "learning_rate": 6.3904313926780314e-06, "loss": 0.7799, "step": 8328 }, { "epoch": 0.42805015931750434, "grad_norm": 1.0643500089645386, "learning_rate": 6.389631946698789e-06, "loss": 0.7253, "step": 8329 }, { "epoch": 0.428101552060849, "grad_norm": 1.1019455194473267, "learning_rate": 6.388832462218582e-06, "loss": 0.714, "step": 8330 }, { "epoch": 0.42815294480419364, "grad_norm": 1.0317736864089966, "learning_rate": 6.388032939259558e-06, "loss": 0.7292, "step": 8331 }, { "epoch": 0.4282043375475383, "grad_norm": 1.0136743783950806, "learning_rate": 6.387233377843869e-06, "loss": 0.6877, "step": 8332 }, { "epoch": 0.42825573029088293, "grad_norm": 1.0540369749069214, "learning_rate": 6.386433777993668e-06, "loss": 0.7252, "step": 8333 }, { "epoch": 0.4283071230342276, "grad_norm": 1.1630367040634155, "learning_rate": 6.385634139731109e-06, "loss": 0.7385, "step": 8334 }, { "epoch": 0.4283585157775722, "grad_norm": 0.8848607540130615, "learning_rate": 6.384834463078345e-06, "loss": 0.687, "step": 8335 }, { "epoch": 0.42840990852091687, "grad_norm": 1.1070735454559326, "learning_rate": 6.384034748057535e-06, "loss": 0.7533, "step": 8336 }, { "epoch": 0.42846130126426146, "grad_norm": 1.2160663604736328, "learning_rate": 6.383234994690832e-06, "loss": 0.8477, "step": 8337 }, { "epoch": 0.4285126940076061, "grad_norm": 1.0909734964370728, "learning_rate": 6.382435203000396e-06, "loss": 0.7801, "step": 8338 }, { "epoch": 0.42856408675095076, "grad_norm": 1.1377675533294678, "learning_rate": 6.381635373008387e-06, "loss": 0.7847, "step": 8339 }, { "epoch": 0.4286154794942954, "grad_norm": 1.065069317817688, "learning_rate": 6.380835504736962e-06, "loss": 0.697, "step": 8340 }, { "epoch": 0.42866687223764005, "grad_norm": 1.0549118518829346, "learning_rate": 6.3800355982082825e-06, "loss": 0.7453, "step": 8341 }, { "epoch": 0.4287182649809847, "grad_norm": 1.0431360006332397, "learning_rate": 6.379235653444513e-06, "loss": 0.7806, "step": 8342 }, { "epoch": 0.42876965772432934, "grad_norm": 0.929344654083252, "learning_rate": 6.378435670467815e-06, "loss": 0.6732, "step": 8343 }, { "epoch": 0.428821050467674, "grad_norm": 1.1320505142211914, "learning_rate": 6.377635649300353e-06, "loss": 0.7277, "step": 8344 }, { "epoch": 0.4288724432110186, "grad_norm": 1.0407710075378418, "learning_rate": 6.376835589964292e-06, "loss": 0.7322, "step": 8345 }, { "epoch": 0.42892383595436323, "grad_norm": 1.1120134592056274, "learning_rate": 6.3760354924818e-06, "loss": 0.7101, "step": 8346 }, { "epoch": 0.4289752286977079, "grad_norm": 1.073689579963684, "learning_rate": 6.375235356875042e-06, "loss": 0.7608, "step": 8347 }, { "epoch": 0.4290266214410525, "grad_norm": 0.7342345714569092, "learning_rate": 6.374435183166188e-06, "loss": 0.6786, "step": 8348 }, { "epoch": 0.42907801418439717, "grad_norm": 1.0894666910171509, "learning_rate": 6.373634971377407e-06, "loss": 0.7908, "step": 8349 }, { "epoch": 0.4291294069277418, "grad_norm": 1.051718831062317, "learning_rate": 6.372834721530868e-06, "loss": 0.7205, "step": 8350 }, { "epoch": 0.42918079967108647, "grad_norm": 0.7243857383728027, "learning_rate": 6.372034433648746e-06, "loss": 0.6512, "step": 8351 }, { "epoch": 0.42923219241443106, "grad_norm": 0.9980666637420654, "learning_rate": 6.371234107753212e-06, "loss": 0.7141, "step": 8352 }, { "epoch": 0.4292835851577757, "grad_norm": 1.0969419479370117, "learning_rate": 6.370433743866438e-06, "loss": 0.7574, "step": 8353 }, { "epoch": 0.42933497790112035, "grad_norm": 1.1351345777511597, "learning_rate": 6.3696333420106015e-06, "loss": 0.7446, "step": 8354 }, { "epoch": 0.429386370644465, "grad_norm": 1.0555768013000488, "learning_rate": 6.368832902207878e-06, "loss": 0.7227, "step": 8355 }, { "epoch": 0.42943776338780965, "grad_norm": 1.1305559873580933, "learning_rate": 6.368032424480443e-06, "loss": 0.785, "step": 8356 }, { "epoch": 0.4294891561311543, "grad_norm": 0.7501512765884399, "learning_rate": 6.367231908850475e-06, "loss": 0.6534, "step": 8357 }, { "epoch": 0.42954054887449894, "grad_norm": 1.0587468147277832, "learning_rate": 6.366431355340152e-06, "loss": 0.7621, "step": 8358 }, { "epoch": 0.4295919416178436, "grad_norm": 1.061094880104065, "learning_rate": 6.3656307639716565e-06, "loss": 0.7467, "step": 8359 }, { "epoch": 0.4296433343611882, "grad_norm": 0.7430071234703064, "learning_rate": 6.364830134767168e-06, "loss": 0.6524, "step": 8360 }, { "epoch": 0.4296947271045328, "grad_norm": 1.0598183870315552, "learning_rate": 6.3640294677488676e-06, "loss": 0.7758, "step": 8361 }, { "epoch": 0.4297461198478775, "grad_norm": 1.2606260776519775, "learning_rate": 6.363228762938941e-06, "loss": 0.8014, "step": 8362 }, { "epoch": 0.4297975125912221, "grad_norm": 1.1287667751312256, "learning_rate": 6.36242802035957e-06, "loss": 0.777, "step": 8363 }, { "epoch": 0.42984890533456677, "grad_norm": 0.7967458963394165, "learning_rate": 6.361627240032942e-06, "loss": 0.6593, "step": 8364 }, { "epoch": 0.4299002980779114, "grad_norm": 1.077114224433899, "learning_rate": 6.360826421981242e-06, "loss": 0.7326, "step": 8365 }, { "epoch": 0.42995169082125606, "grad_norm": 1.0205336809158325, "learning_rate": 6.3600255662266566e-06, "loss": 0.7667, "step": 8366 }, { "epoch": 0.43000308356460065, "grad_norm": 0.7071298956871033, "learning_rate": 6.359224672791377e-06, "loss": 0.6604, "step": 8367 }, { "epoch": 0.4300544763079453, "grad_norm": 1.0826181173324585, "learning_rate": 6.3584237416975905e-06, "loss": 0.8129, "step": 8368 }, { "epoch": 0.43010586905128995, "grad_norm": 1.1143178939819336, "learning_rate": 6.357622772967487e-06, "loss": 0.776, "step": 8369 }, { "epoch": 0.4301572617946346, "grad_norm": 0.704164981842041, "learning_rate": 6.35682176662326e-06, "loss": 0.6742, "step": 8370 }, { "epoch": 0.43020865453797924, "grad_norm": 1.1142781972885132, "learning_rate": 6.356020722687101e-06, "loss": 0.7303, "step": 8371 }, { "epoch": 0.4302600472813239, "grad_norm": 1.071683406829834, "learning_rate": 6.3552196411812025e-06, "loss": 0.685, "step": 8372 }, { "epoch": 0.43031144002466853, "grad_norm": 1.091601848602295, "learning_rate": 6.354418522127761e-06, "loss": 0.7529, "step": 8373 }, { "epoch": 0.4303628327680132, "grad_norm": 0.7305396795272827, "learning_rate": 6.353617365548972e-06, "loss": 0.6413, "step": 8374 }, { "epoch": 0.4304142255113578, "grad_norm": 1.102630853652954, "learning_rate": 6.352816171467032e-06, "loss": 0.7141, "step": 8375 }, { "epoch": 0.4304656182547024, "grad_norm": 1.0008684396743774, "learning_rate": 6.352014939904139e-06, "loss": 0.7236, "step": 8376 }, { "epoch": 0.43051701099804707, "grad_norm": 1.0773789882659912, "learning_rate": 6.35121367088249e-06, "loss": 0.7424, "step": 8377 }, { "epoch": 0.4305684037413917, "grad_norm": 1.1423330307006836, "learning_rate": 6.350412364424288e-06, "loss": 0.7682, "step": 8378 }, { "epoch": 0.43061979648473636, "grad_norm": 1.1046618223190308, "learning_rate": 6.349611020551732e-06, "loss": 0.7829, "step": 8379 }, { "epoch": 0.430671189228081, "grad_norm": 1.1270986795425415, "learning_rate": 6.348809639287022e-06, "loss": 0.803, "step": 8380 }, { "epoch": 0.43072258197142566, "grad_norm": 1.1230157613754272, "learning_rate": 6.348008220652366e-06, "loss": 0.7069, "step": 8381 }, { "epoch": 0.43077397471477025, "grad_norm": 1.0743664503097534, "learning_rate": 6.347206764669963e-06, "loss": 0.7657, "step": 8382 }, { "epoch": 0.4308253674581149, "grad_norm": 0.7270748615264893, "learning_rate": 6.346405271362022e-06, "loss": 0.6883, "step": 8383 }, { "epoch": 0.43087676020145954, "grad_norm": 1.0805649757385254, "learning_rate": 6.345603740750745e-06, "loss": 0.7839, "step": 8384 }, { "epoch": 0.4309281529448042, "grad_norm": 0.8860407471656799, "learning_rate": 6.344802172858342e-06, "loss": 0.7003, "step": 8385 }, { "epoch": 0.43097954568814884, "grad_norm": 0.800311267375946, "learning_rate": 6.3440005677070216e-06, "loss": 0.6805, "step": 8386 }, { "epoch": 0.4310309384314935, "grad_norm": 1.065180778503418, "learning_rate": 6.343198925318992e-06, "loss": 0.7127, "step": 8387 }, { "epoch": 0.43108233117483813, "grad_norm": 0.7365872859954834, "learning_rate": 6.342397245716461e-06, "loss": 0.6549, "step": 8388 }, { "epoch": 0.4311337239181828, "grad_norm": 1.0598411560058594, "learning_rate": 6.341595528921645e-06, "loss": 0.6857, "step": 8389 }, { "epoch": 0.43118511666152737, "grad_norm": 1.0902824401855469, "learning_rate": 6.340793774956751e-06, "loss": 0.7682, "step": 8390 }, { "epoch": 0.431236509404872, "grad_norm": 1.0106401443481445, "learning_rate": 6.339991983843995e-06, "loss": 0.7361, "step": 8391 }, { "epoch": 0.43128790214821666, "grad_norm": 0.9538063406944275, "learning_rate": 6.339190155605592e-06, "loss": 0.7278, "step": 8392 }, { "epoch": 0.4313392948915613, "grad_norm": 1.0787593126296997, "learning_rate": 6.338388290263755e-06, "loss": 0.7894, "step": 8393 }, { "epoch": 0.43139068763490596, "grad_norm": 1.147099494934082, "learning_rate": 6.337586387840702e-06, "loss": 0.7554, "step": 8394 }, { "epoch": 0.4314420803782506, "grad_norm": 1.0683928728103638, "learning_rate": 6.336784448358649e-06, "loss": 0.7891, "step": 8395 }, { "epoch": 0.43149347312159525, "grad_norm": 1.063873529434204, "learning_rate": 6.3359824718398175e-06, "loss": 0.7523, "step": 8396 }, { "epoch": 0.4315448658649399, "grad_norm": 1.029515266418457, "learning_rate": 6.335180458306424e-06, "loss": 0.7013, "step": 8397 }, { "epoch": 0.4315962586082845, "grad_norm": 1.083827018737793, "learning_rate": 6.334378407780692e-06, "loss": 0.7636, "step": 8398 }, { "epoch": 0.43164765135162914, "grad_norm": 1.037231206893921, "learning_rate": 6.333576320284839e-06, "loss": 0.7377, "step": 8399 }, { "epoch": 0.4316990440949738, "grad_norm": 1.0276010036468506, "learning_rate": 6.3327741958410904e-06, "loss": 0.7665, "step": 8400 }, { "epoch": 0.43175043683831843, "grad_norm": 1.1145011186599731, "learning_rate": 6.3319720344716674e-06, "loss": 0.7614, "step": 8401 }, { "epoch": 0.4318018295816631, "grad_norm": 1.1464964151382446, "learning_rate": 6.3311698361987985e-06, "loss": 0.7818, "step": 8402 }, { "epoch": 0.4318532223250077, "grad_norm": 1.0981749296188354, "learning_rate": 6.330367601044705e-06, "loss": 0.719, "step": 8403 }, { "epoch": 0.43190461506835237, "grad_norm": 1.0901585817337036, "learning_rate": 6.329565329031614e-06, "loss": 0.7758, "step": 8404 }, { "epoch": 0.43195600781169696, "grad_norm": 0.8460590839385986, "learning_rate": 6.328763020181756e-06, "loss": 0.6807, "step": 8405 }, { "epoch": 0.4320074005550416, "grad_norm": 1.1460169553756714, "learning_rate": 6.327960674517358e-06, "loss": 0.7666, "step": 8406 }, { "epoch": 0.43205879329838626, "grad_norm": 1.0639140605926514, "learning_rate": 6.327158292060651e-06, "loss": 0.7805, "step": 8407 }, { "epoch": 0.4321101860417309, "grad_norm": 1.0985881090164185, "learning_rate": 6.326355872833865e-06, "loss": 0.6885, "step": 8408 }, { "epoch": 0.43216157878507555, "grad_norm": 1.2176406383514404, "learning_rate": 6.325553416859229e-06, "loss": 0.7658, "step": 8409 }, { "epoch": 0.4322129715284202, "grad_norm": 0.8720978498458862, "learning_rate": 6.32475092415898e-06, "loss": 0.7014, "step": 8410 }, { "epoch": 0.43226436427176484, "grad_norm": 1.0955424308776855, "learning_rate": 6.323948394755348e-06, "loss": 0.761, "step": 8411 }, { "epoch": 0.4323157570151095, "grad_norm": 1.0364047288894653, "learning_rate": 6.323145828670572e-06, "loss": 0.6833, "step": 8412 }, { "epoch": 0.4323671497584541, "grad_norm": 0.9960904121398926, "learning_rate": 6.3223432259268844e-06, "loss": 0.7505, "step": 8413 }, { "epoch": 0.43241854250179873, "grad_norm": 1.2153621912002563, "learning_rate": 6.321540586546522e-06, "loss": 0.7231, "step": 8414 }, { "epoch": 0.4324699352451434, "grad_norm": 0.7874817252159119, "learning_rate": 6.320737910551724e-06, "loss": 0.701, "step": 8415 }, { "epoch": 0.432521327988488, "grad_norm": 1.0423996448516846, "learning_rate": 6.31993519796473e-06, "loss": 0.7384, "step": 8416 }, { "epoch": 0.43257272073183267, "grad_norm": 0.7048157453536987, "learning_rate": 6.3191324488077765e-06, "loss": 0.6672, "step": 8417 }, { "epoch": 0.4326241134751773, "grad_norm": 1.163750171661377, "learning_rate": 6.318329663103109e-06, "loss": 0.7977, "step": 8418 }, { "epoch": 0.43267550621852197, "grad_norm": 1.0547767877578735, "learning_rate": 6.317526840872965e-06, "loss": 0.7798, "step": 8419 }, { "epoch": 0.43272689896186656, "grad_norm": 1.0751404762268066, "learning_rate": 6.316723982139591e-06, "loss": 0.7206, "step": 8420 }, { "epoch": 0.4327782917052112, "grad_norm": 1.0892095565795898, "learning_rate": 6.315921086925229e-06, "loss": 0.7703, "step": 8421 }, { "epoch": 0.43282968444855585, "grad_norm": 0.7128773927688599, "learning_rate": 6.3151181552521225e-06, "loss": 0.6697, "step": 8422 }, { "epoch": 0.4328810771919005, "grad_norm": 0.850968599319458, "learning_rate": 6.314315187142521e-06, "loss": 0.728, "step": 8423 }, { "epoch": 0.43293246993524515, "grad_norm": 0.8781245350837708, "learning_rate": 6.313512182618669e-06, "loss": 0.696, "step": 8424 }, { "epoch": 0.4329838626785898, "grad_norm": 1.0837726593017578, "learning_rate": 6.312709141702813e-06, "loss": 0.7451, "step": 8425 }, { "epoch": 0.43303525542193444, "grad_norm": 1.0780463218688965, "learning_rate": 6.311906064417206e-06, "loss": 0.7086, "step": 8426 }, { "epoch": 0.4330866481652791, "grad_norm": 1.0713844299316406, "learning_rate": 6.311102950784096e-06, "loss": 0.7407, "step": 8427 }, { "epoch": 0.4331380409086237, "grad_norm": 1.1044763326644897, "learning_rate": 6.310299800825733e-06, "loss": 0.7806, "step": 8428 }, { "epoch": 0.4331894336519683, "grad_norm": 1.1343417167663574, "learning_rate": 6.309496614564371e-06, "loss": 0.8192, "step": 8429 }, { "epoch": 0.433240826395313, "grad_norm": 0.8975526094436646, "learning_rate": 6.30869339202226e-06, "loss": 0.6357, "step": 8430 }, { "epoch": 0.4332922191386576, "grad_norm": 1.0665843486785889, "learning_rate": 6.3078901332216565e-06, "loss": 0.7494, "step": 8431 }, { "epoch": 0.43334361188200227, "grad_norm": 1.038925290107727, "learning_rate": 6.3070868381848175e-06, "loss": 0.7479, "step": 8432 }, { "epoch": 0.4333950046253469, "grad_norm": 1.123015284538269, "learning_rate": 6.306283506933992e-06, "loss": 0.8152, "step": 8433 }, { "epoch": 0.43344639736869156, "grad_norm": 1.0991424322128296, "learning_rate": 6.3054801394914435e-06, "loss": 0.7572, "step": 8434 }, { "epoch": 0.4334977901120362, "grad_norm": 1.0407248735427856, "learning_rate": 6.304676735879427e-06, "loss": 0.7459, "step": 8435 }, { "epoch": 0.4335491828553808, "grad_norm": 1.0816010236740112, "learning_rate": 6.303873296120202e-06, "loss": 0.7547, "step": 8436 }, { "epoch": 0.43360057559872545, "grad_norm": 0.818271279335022, "learning_rate": 6.30306982023603e-06, "loss": 0.6593, "step": 8437 }, { "epoch": 0.4336519683420701, "grad_norm": 1.0635895729064941, "learning_rate": 6.302266308249168e-06, "loss": 0.7574, "step": 8438 }, { "epoch": 0.43370336108541474, "grad_norm": 1.1095529794692993, "learning_rate": 6.3014627601818835e-06, "loss": 0.7463, "step": 8439 }, { "epoch": 0.4337547538287594, "grad_norm": 1.038257122039795, "learning_rate": 6.300659176056436e-06, "loss": 0.7181, "step": 8440 }, { "epoch": 0.43380614657210403, "grad_norm": 0.6907318234443665, "learning_rate": 6.2998555558950905e-06, "loss": 0.6548, "step": 8441 }, { "epoch": 0.4338575393154487, "grad_norm": 1.0988339185714722, "learning_rate": 6.2990518997201115e-06, "loss": 0.7594, "step": 8442 }, { "epoch": 0.4339089320587933, "grad_norm": 0.7527409195899963, "learning_rate": 6.298248207553764e-06, "loss": 0.7062, "step": 8443 }, { "epoch": 0.4339603248021379, "grad_norm": 1.0896986722946167, "learning_rate": 6.2974444794183175e-06, "loss": 0.7499, "step": 8444 }, { "epoch": 0.43401171754548257, "grad_norm": 1.1018531322479248, "learning_rate": 6.296640715336039e-06, "loss": 0.7109, "step": 8445 }, { "epoch": 0.4340631102888272, "grad_norm": 0.7952009439468384, "learning_rate": 6.295836915329195e-06, "loss": 0.6729, "step": 8446 }, { "epoch": 0.43411450303217186, "grad_norm": 0.7238272428512573, "learning_rate": 6.295033079420061e-06, "loss": 0.7238, "step": 8447 }, { "epoch": 0.4341658957755165, "grad_norm": 1.0481033325195312, "learning_rate": 6.294229207630903e-06, "loss": 0.7516, "step": 8448 }, { "epoch": 0.43421728851886116, "grad_norm": 1.0630829334259033, "learning_rate": 6.293425299983994e-06, "loss": 0.707, "step": 8449 }, { "epoch": 0.4342686812622058, "grad_norm": 0.9712737798690796, "learning_rate": 6.292621356501609e-06, "loss": 0.725, "step": 8450 }, { "epoch": 0.4343200740055504, "grad_norm": 1.0961427688598633, "learning_rate": 6.291817377206019e-06, "loss": 0.7857, "step": 8451 }, { "epoch": 0.43437146674889504, "grad_norm": 1.1055337190628052, "learning_rate": 6.2910133621195015e-06, "loss": 0.7871, "step": 8452 }, { "epoch": 0.4344228594922397, "grad_norm": 0.8499512076377869, "learning_rate": 6.290209311264332e-06, "loss": 0.6399, "step": 8453 }, { "epoch": 0.43447425223558434, "grad_norm": 1.0557571649551392, "learning_rate": 6.289405224662786e-06, "loss": 0.7266, "step": 8454 }, { "epoch": 0.434525644978929, "grad_norm": 0.7364407777786255, "learning_rate": 6.2886011023371405e-06, "loss": 0.7212, "step": 8455 }, { "epoch": 0.43457703772227363, "grad_norm": 0.9995099306106567, "learning_rate": 6.287796944309679e-06, "loss": 0.6879, "step": 8456 }, { "epoch": 0.4346284304656183, "grad_norm": 1.1055337190628052, "learning_rate": 6.286992750602677e-06, "loss": 0.7724, "step": 8457 }, { "epoch": 0.43467982320896287, "grad_norm": 1.2343580722808838, "learning_rate": 6.286188521238419e-06, "loss": 0.8389, "step": 8458 }, { "epoch": 0.4347312159523075, "grad_norm": 1.0446220636367798, "learning_rate": 6.285384256239182e-06, "loss": 0.682, "step": 8459 }, { "epoch": 0.43478260869565216, "grad_norm": 1.0600786209106445, "learning_rate": 6.2845799556272535e-06, "loss": 0.7502, "step": 8460 }, { "epoch": 0.4348340014389968, "grad_norm": 1.2921860218048096, "learning_rate": 6.283775619424916e-06, "loss": 0.7364, "step": 8461 }, { "epoch": 0.43488539418234146, "grad_norm": 1.0856540203094482, "learning_rate": 6.282971247654453e-06, "loss": 0.6799, "step": 8462 }, { "epoch": 0.4349367869256861, "grad_norm": 1.0880122184753418, "learning_rate": 6.282166840338152e-06, "loss": 0.7409, "step": 8463 }, { "epoch": 0.43498817966903075, "grad_norm": 1.0276278257369995, "learning_rate": 6.281362397498299e-06, "loss": 0.7382, "step": 8464 }, { "epoch": 0.4350395724123754, "grad_norm": 1.0319180488586426, "learning_rate": 6.2805579191571805e-06, "loss": 0.7132, "step": 8465 }, { "epoch": 0.43509096515572, "grad_norm": 1.0523196458816528, "learning_rate": 6.279753405337087e-06, "loss": 0.7406, "step": 8466 }, { "epoch": 0.43514235789906464, "grad_norm": 1.1038187742233276, "learning_rate": 6.278948856060307e-06, "loss": 0.7263, "step": 8467 }, { "epoch": 0.4351937506424093, "grad_norm": 1.2612719535827637, "learning_rate": 6.2781442713491345e-06, "loss": 0.703, "step": 8468 }, { "epoch": 0.43524514338575393, "grad_norm": 1.410661220550537, "learning_rate": 6.277339651225858e-06, "loss": 0.771, "step": 8469 }, { "epoch": 0.4352965361290986, "grad_norm": 1.0300686359405518, "learning_rate": 6.2765349957127706e-06, "loss": 0.7591, "step": 8470 }, { "epoch": 0.4353479288724432, "grad_norm": 1.0803775787353516, "learning_rate": 6.275730304832167e-06, "loss": 0.7209, "step": 8471 }, { "epoch": 0.43539932161578787, "grad_norm": 1.103232502937317, "learning_rate": 6.274925578606341e-06, "loss": 0.6907, "step": 8472 }, { "epoch": 0.4354507143591325, "grad_norm": 1.0692774057388306, "learning_rate": 6.274120817057588e-06, "loss": 0.7811, "step": 8473 }, { "epoch": 0.4355021071024771, "grad_norm": 1.0819768905639648, "learning_rate": 6.2733160202082064e-06, "loss": 0.7249, "step": 8474 }, { "epoch": 0.43555349984582176, "grad_norm": 1.159351110458374, "learning_rate": 6.272511188080491e-06, "loss": 0.7099, "step": 8475 }, { "epoch": 0.4356048925891664, "grad_norm": 0.8502740263938904, "learning_rate": 6.271706320696742e-06, "loss": 0.6576, "step": 8476 }, { "epoch": 0.43565628533251105, "grad_norm": 0.8586541414260864, "learning_rate": 6.2709014180792605e-06, "loss": 0.6566, "step": 8477 }, { "epoch": 0.4357076780758557, "grad_norm": 1.1031328439712524, "learning_rate": 6.2700964802503425e-06, "loss": 0.7514, "step": 8478 }, { "epoch": 0.43575907081920034, "grad_norm": 1.091124415397644, "learning_rate": 6.2692915072322955e-06, "loss": 0.7326, "step": 8479 }, { "epoch": 0.435810463562545, "grad_norm": 1.1567610502243042, "learning_rate": 6.268486499047418e-06, "loss": 0.7443, "step": 8480 }, { "epoch": 0.4358618563058896, "grad_norm": 0.7727978825569153, "learning_rate": 6.267681455718013e-06, "loss": 0.6679, "step": 8481 }, { "epoch": 0.43591324904923423, "grad_norm": 1.1718244552612305, "learning_rate": 6.266876377266389e-06, "loss": 0.7665, "step": 8482 }, { "epoch": 0.4359646417925789, "grad_norm": 1.080930233001709, "learning_rate": 6.2660712637148455e-06, "loss": 0.7282, "step": 8483 }, { "epoch": 0.4360160345359235, "grad_norm": 1.0717263221740723, "learning_rate": 6.265266115085695e-06, "loss": 0.7224, "step": 8484 }, { "epoch": 0.43606742727926817, "grad_norm": 1.0754872560501099, "learning_rate": 6.264460931401241e-06, "loss": 0.8059, "step": 8485 }, { "epoch": 0.4361188200226128, "grad_norm": 1.1302589178085327, "learning_rate": 6.263655712683791e-06, "loss": 0.7911, "step": 8486 }, { "epoch": 0.43617021276595747, "grad_norm": 1.0252442359924316, "learning_rate": 6.262850458955657e-06, "loss": 0.6952, "step": 8487 }, { "epoch": 0.4362216055093021, "grad_norm": 1.1333891153335571, "learning_rate": 6.262045170239148e-06, "loss": 0.732, "step": 8488 }, { "epoch": 0.4362729982526467, "grad_norm": 1.0965452194213867, "learning_rate": 6.261239846556576e-06, "loss": 0.7171, "step": 8489 }, { "epoch": 0.43632439099599135, "grad_norm": 1.074366807937622, "learning_rate": 6.260434487930254e-06, "loss": 0.7155, "step": 8490 }, { "epoch": 0.436375783739336, "grad_norm": 0.8176406621932983, "learning_rate": 6.259629094382491e-06, "loss": 0.6701, "step": 8491 }, { "epoch": 0.43642717648268065, "grad_norm": 1.1059508323669434, "learning_rate": 6.258823665935606e-06, "loss": 0.7515, "step": 8492 }, { "epoch": 0.4364785692260253, "grad_norm": 0.7819482684135437, "learning_rate": 6.258018202611912e-06, "loss": 0.6725, "step": 8493 }, { "epoch": 0.43652996196936994, "grad_norm": 1.0585912466049194, "learning_rate": 6.2572127044337236e-06, "loss": 0.7367, "step": 8494 }, { "epoch": 0.4365813547127146, "grad_norm": 1.086531400680542, "learning_rate": 6.256407171423361e-06, "loss": 0.7328, "step": 8495 }, { "epoch": 0.4366327474560592, "grad_norm": 1.0657939910888672, "learning_rate": 6.25560160360314e-06, "loss": 0.7229, "step": 8496 }, { "epoch": 0.4366841401994038, "grad_norm": 1.072554349899292, "learning_rate": 6.254796000995379e-06, "loss": 0.7451, "step": 8497 }, { "epoch": 0.4367355329427485, "grad_norm": 1.1120802164077759, "learning_rate": 6.253990363622401e-06, "loss": 0.7705, "step": 8498 }, { "epoch": 0.4367869256860931, "grad_norm": 0.7722738981246948, "learning_rate": 6.253184691506523e-06, "loss": 0.685, "step": 8499 }, { "epoch": 0.43683831842943777, "grad_norm": 1.0963382720947266, "learning_rate": 6.252378984670071e-06, "loss": 0.7465, "step": 8500 }, { "epoch": 0.4368897111727824, "grad_norm": 1.0725950002670288, "learning_rate": 6.251573243135365e-06, "loss": 0.7475, "step": 8501 }, { "epoch": 0.43694110391612706, "grad_norm": 1.048986554145813, "learning_rate": 6.250767466924728e-06, "loss": 0.7421, "step": 8502 }, { "epoch": 0.4369924966594717, "grad_norm": 1.0091770887374878, "learning_rate": 6.249961656060487e-06, "loss": 0.7638, "step": 8503 }, { "epoch": 0.4370438894028163, "grad_norm": 1.0706737041473389, "learning_rate": 6.249155810564967e-06, "loss": 0.7207, "step": 8504 }, { "epoch": 0.43709528214616095, "grad_norm": 1.0429250001907349, "learning_rate": 6.248349930460494e-06, "loss": 0.7979, "step": 8505 }, { "epoch": 0.4371466748895056, "grad_norm": 1.0935955047607422, "learning_rate": 6.247544015769396e-06, "loss": 0.756, "step": 8506 }, { "epoch": 0.43719806763285024, "grad_norm": 1.1420365571975708, "learning_rate": 6.246738066514002e-06, "loss": 0.7523, "step": 8507 }, { "epoch": 0.4372494603761949, "grad_norm": 0.7566020488739014, "learning_rate": 6.245932082716641e-06, "loss": 0.6654, "step": 8508 }, { "epoch": 0.43730085311953953, "grad_norm": 0.7629073262214661, "learning_rate": 6.245126064399644e-06, "loss": 0.6826, "step": 8509 }, { "epoch": 0.4373522458628842, "grad_norm": 1.0363222360610962, "learning_rate": 6.244320011585341e-06, "loss": 0.7194, "step": 8510 }, { "epoch": 0.4374036386062288, "grad_norm": 1.1170072555541992, "learning_rate": 6.2435139242960665e-06, "loss": 0.7848, "step": 8511 }, { "epoch": 0.4374550313495734, "grad_norm": 1.1099908351898193, "learning_rate": 6.242707802554152e-06, "loss": 0.7766, "step": 8512 }, { "epoch": 0.43750642409291807, "grad_norm": 1.034143328666687, "learning_rate": 6.241901646381934e-06, "loss": 0.738, "step": 8513 }, { "epoch": 0.4375578168362627, "grad_norm": 1.0583261251449585, "learning_rate": 6.241095455801746e-06, "loss": 0.7282, "step": 8514 }, { "epoch": 0.43760920957960736, "grad_norm": 1.2888649702072144, "learning_rate": 6.240289230835924e-06, "loss": 0.7319, "step": 8515 }, { "epoch": 0.437660602322952, "grad_norm": 1.0071271657943726, "learning_rate": 6.239482971506806e-06, "loss": 0.6963, "step": 8516 }, { "epoch": 0.43771199506629666, "grad_norm": 1.088568925857544, "learning_rate": 6.2386766778367306e-06, "loss": 0.7491, "step": 8517 }, { "epoch": 0.4377633878096413, "grad_norm": 0.7671567797660828, "learning_rate": 6.237870349848034e-06, "loss": 0.6794, "step": 8518 }, { "epoch": 0.4378147805529859, "grad_norm": 1.0515166521072388, "learning_rate": 6.23706398756306e-06, "loss": 0.7555, "step": 8519 }, { "epoch": 0.43786617329633054, "grad_norm": 1.0878161191940308, "learning_rate": 6.236257591004148e-06, "loss": 0.7214, "step": 8520 }, { "epoch": 0.4379175660396752, "grad_norm": 1.0449882745742798, "learning_rate": 6.23545116019364e-06, "loss": 0.7222, "step": 8521 }, { "epoch": 0.43796895878301983, "grad_norm": 1.11459219455719, "learning_rate": 6.234644695153878e-06, "loss": 0.7393, "step": 8522 }, { "epoch": 0.4380203515263645, "grad_norm": 1.059685230255127, "learning_rate": 6.233838195907207e-06, "loss": 0.7287, "step": 8523 }, { "epoch": 0.43807174426970913, "grad_norm": 1.0576393604278564, "learning_rate": 6.2330316624759725e-06, "loss": 0.7343, "step": 8524 }, { "epoch": 0.4381231370130538, "grad_norm": 1.092231035232544, "learning_rate": 6.232225094882518e-06, "loss": 0.685, "step": 8525 }, { "epoch": 0.4381745297563984, "grad_norm": 0.7335990071296692, "learning_rate": 6.231418493149191e-06, "loss": 0.6551, "step": 8526 }, { "epoch": 0.438225922499743, "grad_norm": 1.165383219718933, "learning_rate": 6.230611857298339e-06, "loss": 0.7834, "step": 8527 }, { "epoch": 0.43827731524308766, "grad_norm": 1.05681312084198, "learning_rate": 6.229805187352311e-06, "loss": 0.7278, "step": 8528 }, { "epoch": 0.4383287079864323, "grad_norm": 1.0848851203918457, "learning_rate": 6.228998483333457e-06, "loss": 0.7211, "step": 8529 }, { "epoch": 0.43838010072977696, "grad_norm": 1.1085296869277954, "learning_rate": 6.228191745264127e-06, "loss": 0.7808, "step": 8530 }, { "epoch": 0.4384314934731216, "grad_norm": 1.0813217163085938, "learning_rate": 6.227384973166671e-06, "loss": 0.7861, "step": 8531 }, { "epoch": 0.43848288621646625, "grad_norm": 1.0476735830307007, "learning_rate": 6.226578167063444e-06, "loss": 0.7587, "step": 8532 }, { "epoch": 0.4385342789598109, "grad_norm": 1.4857110977172852, "learning_rate": 6.225771326976797e-06, "loss": 0.7059, "step": 8533 }, { "epoch": 0.4385856717031555, "grad_norm": 1.1320213079452515, "learning_rate": 6.224964452929085e-06, "loss": 0.7273, "step": 8534 }, { "epoch": 0.43863706444650014, "grad_norm": 0.9844960570335388, "learning_rate": 6.224157544942664e-06, "loss": 0.6839, "step": 8535 }, { "epoch": 0.4386884571898448, "grad_norm": 0.6965320706367493, "learning_rate": 6.2233506030398885e-06, "loss": 0.6668, "step": 8536 }, { "epoch": 0.43873984993318943, "grad_norm": 1.0655978918075562, "learning_rate": 6.222543627243116e-06, "loss": 0.7279, "step": 8537 }, { "epoch": 0.4387912426765341, "grad_norm": 0.7516601085662842, "learning_rate": 6.221736617574704e-06, "loss": 0.6416, "step": 8538 }, { "epoch": 0.4388426354198787, "grad_norm": 1.0668766498565674, "learning_rate": 6.220929574057013e-06, "loss": 0.7315, "step": 8539 }, { "epoch": 0.43889402816322337, "grad_norm": 0.6921804547309875, "learning_rate": 6.2201224967124015e-06, "loss": 0.707, "step": 8540 }, { "epoch": 0.438945420906568, "grad_norm": 1.1046688556671143, "learning_rate": 6.219315385563233e-06, "loss": 0.731, "step": 8541 }, { "epoch": 0.4389968136499126, "grad_norm": 1.024468183517456, "learning_rate": 6.218508240631864e-06, "loss": 0.76, "step": 8542 }, { "epoch": 0.43904820639325726, "grad_norm": 0.8090651631355286, "learning_rate": 6.2177010619406616e-06, "loss": 0.6126, "step": 8543 }, { "epoch": 0.4390995991366019, "grad_norm": 1.0247395038604736, "learning_rate": 6.216893849511988e-06, "loss": 0.6867, "step": 8544 }, { "epoch": 0.43915099187994655, "grad_norm": 0.9569130539894104, "learning_rate": 6.216086603368208e-06, "loss": 0.7252, "step": 8545 }, { "epoch": 0.4392023846232912, "grad_norm": 1.080208659172058, "learning_rate": 6.215279323531687e-06, "loss": 0.7341, "step": 8546 }, { "epoch": 0.43925377736663584, "grad_norm": 1.2321627140045166, "learning_rate": 6.214472010024789e-06, "loss": 0.7489, "step": 8547 }, { "epoch": 0.4393051701099805, "grad_norm": 0.9707562923431396, "learning_rate": 6.213664662869884e-06, "loss": 0.7326, "step": 8548 }, { "epoch": 0.4393565628533251, "grad_norm": 1.0804896354675293, "learning_rate": 6.212857282089341e-06, "loss": 0.7011, "step": 8549 }, { "epoch": 0.43940795559666973, "grad_norm": 0.6854485273361206, "learning_rate": 6.2120498677055265e-06, "loss": 0.6779, "step": 8550 }, { "epoch": 0.4394593483400144, "grad_norm": 1.1602165699005127, "learning_rate": 6.211242419740813e-06, "loss": 0.7596, "step": 8551 }, { "epoch": 0.439510741083359, "grad_norm": 1.1067523956298828, "learning_rate": 6.21043493821757e-06, "loss": 0.7446, "step": 8552 }, { "epoch": 0.43956213382670367, "grad_norm": 1.0530751943588257, "learning_rate": 6.20962742315817e-06, "loss": 0.7996, "step": 8553 }, { "epoch": 0.4396135265700483, "grad_norm": 1.1048904657363892, "learning_rate": 6.208819874584987e-06, "loss": 0.7747, "step": 8554 }, { "epoch": 0.43966491931339297, "grad_norm": 1.120870590209961, "learning_rate": 6.208012292520392e-06, "loss": 0.6899, "step": 8555 }, { "epoch": 0.4397163120567376, "grad_norm": 0.6727537512779236, "learning_rate": 6.207204676986763e-06, "loss": 0.6399, "step": 8556 }, { "epoch": 0.4397677048000822, "grad_norm": 1.0299372673034668, "learning_rate": 6.206397028006473e-06, "loss": 0.6862, "step": 8557 }, { "epoch": 0.43981909754342685, "grad_norm": 1.0700434446334839, "learning_rate": 6.205589345601903e-06, "loss": 0.7517, "step": 8558 }, { "epoch": 0.4398704902867715, "grad_norm": 1.098470687866211, "learning_rate": 6.204781629795424e-06, "loss": 0.7539, "step": 8559 }, { "epoch": 0.43992188303011615, "grad_norm": 1.0596468448638916, "learning_rate": 6.203973880609418e-06, "loss": 0.7009, "step": 8560 }, { "epoch": 0.4399732757734608, "grad_norm": 0.6885043382644653, "learning_rate": 6.203166098066266e-06, "loss": 0.6445, "step": 8561 }, { "epoch": 0.44002466851680544, "grad_norm": 0.7829486131668091, "learning_rate": 6.202358282188346e-06, "loss": 0.6536, "step": 8562 }, { "epoch": 0.4400760612601501, "grad_norm": 1.1044014692306519, "learning_rate": 6.201550432998039e-06, "loss": 0.7799, "step": 8563 }, { "epoch": 0.44012745400349473, "grad_norm": 0.7496541738510132, "learning_rate": 6.200742550517729e-06, "loss": 0.6342, "step": 8564 }, { "epoch": 0.4401788467468393, "grad_norm": 1.0694981813430786, "learning_rate": 6.1999346347697986e-06, "loss": 0.6815, "step": 8565 }, { "epoch": 0.44023023949018397, "grad_norm": 1.1206732988357544, "learning_rate": 6.19912668577663e-06, "loss": 0.7714, "step": 8566 }, { "epoch": 0.4402816322335286, "grad_norm": 1.0986313819885254, "learning_rate": 6.19831870356061e-06, "loss": 0.7447, "step": 8567 }, { "epoch": 0.44033302497687327, "grad_norm": 0.9972346425056458, "learning_rate": 6.197510688144124e-06, "loss": 0.7196, "step": 8568 }, { "epoch": 0.4403844177202179, "grad_norm": 1.0339921712875366, "learning_rate": 6.196702639549557e-06, "loss": 0.7299, "step": 8569 }, { "epoch": 0.44043581046356256, "grad_norm": 1.1181626319885254, "learning_rate": 6.1958945577993e-06, "loss": 0.7213, "step": 8570 }, { "epoch": 0.4404872032069072, "grad_norm": 1.0512242317199707, "learning_rate": 6.19508644291574e-06, "loss": 0.7696, "step": 8571 }, { "epoch": 0.4405385959502518, "grad_norm": 1.1621426343917847, "learning_rate": 6.1942782949212665e-06, "loss": 0.7568, "step": 8572 }, { "epoch": 0.44058998869359645, "grad_norm": 1.0266258716583252, "learning_rate": 6.19347011383827e-06, "loss": 0.7343, "step": 8573 }, { "epoch": 0.4406413814369411, "grad_norm": 1.091570258140564, "learning_rate": 6.192661899689141e-06, "loss": 0.7119, "step": 8574 }, { "epoch": 0.44069277418028574, "grad_norm": 1.021521806716919, "learning_rate": 6.191853652496273e-06, "loss": 0.7096, "step": 8575 }, { "epoch": 0.4407441669236304, "grad_norm": 1.130705714225769, "learning_rate": 6.19104537228206e-06, "loss": 0.7954, "step": 8576 }, { "epoch": 0.44079555966697503, "grad_norm": 1.0801620483398438, "learning_rate": 6.190237059068893e-06, "loss": 0.7062, "step": 8577 }, { "epoch": 0.4408469524103197, "grad_norm": 0.8573058247566223, "learning_rate": 6.18942871287917e-06, "loss": 0.6666, "step": 8578 }, { "epoch": 0.44089834515366433, "grad_norm": 1.0760871171951294, "learning_rate": 6.188620333735284e-06, "loss": 0.7075, "step": 8579 }, { "epoch": 0.4409497378970089, "grad_norm": 1.0348376035690308, "learning_rate": 6.187811921659637e-06, "loss": 0.7557, "step": 8580 }, { "epoch": 0.44100113064035357, "grad_norm": 0.7962344288825989, "learning_rate": 6.187003476674621e-06, "loss": 0.6813, "step": 8581 }, { "epoch": 0.4410525233836982, "grad_norm": 0.7579706907272339, "learning_rate": 6.186194998802638e-06, "loss": 0.6793, "step": 8582 }, { "epoch": 0.44110391612704286, "grad_norm": 1.0536022186279297, "learning_rate": 6.185386488066087e-06, "loss": 0.7242, "step": 8583 }, { "epoch": 0.4411553088703875, "grad_norm": 1.0478723049163818, "learning_rate": 6.184577944487366e-06, "loss": 0.7541, "step": 8584 }, { "epoch": 0.44120670161373216, "grad_norm": 1.0063352584838867, "learning_rate": 6.183769368088882e-06, "loss": 0.7507, "step": 8585 }, { "epoch": 0.4412580943570768, "grad_norm": 0.9992077350616455, "learning_rate": 6.182960758893033e-06, "loss": 0.7332, "step": 8586 }, { "epoch": 0.4413094871004214, "grad_norm": 1.0855505466461182, "learning_rate": 6.182152116922222e-06, "loss": 0.7671, "step": 8587 }, { "epoch": 0.44136087984376604, "grad_norm": 1.0898454189300537, "learning_rate": 6.181343442198855e-06, "loss": 0.7697, "step": 8588 }, { "epoch": 0.4414122725871107, "grad_norm": 0.7950151562690735, "learning_rate": 6.180534734745336e-06, "loss": 0.6433, "step": 8589 }, { "epoch": 0.44146366533045533, "grad_norm": 1.030315637588501, "learning_rate": 6.17972599458407e-06, "loss": 0.7431, "step": 8590 }, { "epoch": 0.4415150580738, "grad_norm": 1.113896369934082, "learning_rate": 6.1789172217374675e-06, "loss": 0.7531, "step": 8591 }, { "epoch": 0.44156645081714463, "grad_norm": 1.0667732954025269, "learning_rate": 6.1781084162279326e-06, "loss": 0.7594, "step": 8592 }, { "epoch": 0.4416178435604893, "grad_norm": 1.1164042949676514, "learning_rate": 6.1772995780778754e-06, "loss": 0.7555, "step": 8593 }, { "epoch": 0.4416692363038339, "grad_norm": 0.8158687949180603, "learning_rate": 6.176490707309707e-06, "loss": 0.6583, "step": 8594 }, { "epoch": 0.4417206290471785, "grad_norm": 1.1119701862335205, "learning_rate": 6.175681803945834e-06, "loss": 0.748, "step": 8595 }, { "epoch": 0.44177202179052316, "grad_norm": 1.0504924058914185, "learning_rate": 6.174872868008671e-06, "loss": 0.7498, "step": 8596 }, { "epoch": 0.4418234145338678, "grad_norm": 0.6794732809066772, "learning_rate": 6.174063899520629e-06, "loss": 0.6722, "step": 8597 }, { "epoch": 0.44187480727721246, "grad_norm": 1.08985435962677, "learning_rate": 6.1732548985041205e-06, "loss": 0.7639, "step": 8598 }, { "epoch": 0.4419262000205571, "grad_norm": 1.125518560409546, "learning_rate": 6.172445864981561e-06, "loss": 0.7351, "step": 8599 }, { "epoch": 0.44197759276390175, "grad_norm": 2.6217942237854004, "learning_rate": 6.1716367989753646e-06, "loss": 0.6925, "step": 8600 }, { "epoch": 0.4420289855072464, "grad_norm": 1.0023314952850342, "learning_rate": 6.170827700507948e-06, "loss": 0.7166, "step": 8601 }, { "epoch": 0.44208037825059104, "grad_norm": 1.099668264389038, "learning_rate": 6.170018569601729e-06, "loss": 0.7054, "step": 8602 }, { "epoch": 0.44213177099393564, "grad_norm": 0.7717718482017517, "learning_rate": 6.169209406279122e-06, "loss": 0.6608, "step": 8603 }, { "epoch": 0.4421831637372803, "grad_norm": 1.0417449474334717, "learning_rate": 6.168400210562548e-06, "loss": 0.6909, "step": 8604 }, { "epoch": 0.44223455648062493, "grad_norm": 1.0432336330413818, "learning_rate": 6.167590982474427e-06, "loss": 0.7388, "step": 8605 }, { "epoch": 0.4422859492239696, "grad_norm": 1.0393587350845337, "learning_rate": 6.1667817220371775e-06, "loss": 0.7338, "step": 8606 }, { "epoch": 0.4423373419673142, "grad_norm": 1.2178142070770264, "learning_rate": 6.165972429273221e-06, "loss": 0.7607, "step": 8607 }, { "epoch": 0.44238873471065887, "grad_norm": 1.0836079120635986, "learning_rate": 6.16516310420498e-06, "loss": 0.7905, "step": 8608 }, { "epoch": 0.4424401274540035, "grad_norm": 1.0543299913406372, "learning_rate": 6.1643537468548785e-06, "loss": 0.7322, "step": 8609 }, { "epoch": 0.4424915201973481, "grad_norm": 1.0480260848999023, "learning_rate": 6.163544357245339e-06, "loss": 0.7329, "step": 8610 }, { "epoch": 0.44254291294069276, "grad_norm": 1.122348427772522, "learning_rate": 6.162734935398786e-06, "loss": 0.7247, "step": 8611 }, { "epoch": 0.4425943056840374, "grad_norm": 1.080051302909851, "learning_rate": 6.161925481337648e-06, "loss": 0.7436, "step": 8612 }, { "epoch": 0.44264569842738205, "grad_norm": 0.7509061098098755, "learning_rate": 6.16111599508435e-06, "loss": 0.6452, "step": 8613 }, { "epoch": 0.4426970911707267, "grad_norm": 1.0758047103881836, "learning_rate": 6.160306476661319e-06, "loss": 0.7995, "step": 8614 }, { "epoch": 0.44274848391407134, "grad_norm": 0.6906732320785522, "learning_rate": 6.159496926090983e-06, "loss": 0.6394, "step": 8615 }, { "epoch": 0.442799876657416, "grad_norm": 1.1165037155151367, "learning_rate": 6.158687343395773e-06, "loss": 0.7238, "step": 8616 }, { "epoch": 0.44285126940076064, "grad_norm": 1.105836033821106, "learning_rate": 6.157877728598118e-06, "loss": 0.7517, "step": 8617 }, { "epoch": 0.44290266214410523, "grad_norm": 0.8277804255485535, "learning_rate": 6.157068081720449e-06, "loss": 0.7159, "step": 8618 }, { "epoch": 0.4429540548874499, "grad_norm": 1.1816530227661133, "learning_rate": 6.1562584027852e-06, "loss": 0.7447, "step": 8619 }, { "epoch": 0.4430054476307945, "grad_norm": 1.0948307514190674, "learning_rate": 6.1554486918148e-06, "loss": 0.7857, "step": 8620 }, { "epoch": 0.44305684037413917, "grad_norm": 1.2470206022262573, "learning_rate": 6.154638948831687e-06, "loss": 0.7774, "step": 8621 }, { "epoch": 0.4431082331174838, "grad_norm": 1.0419647693634033, "learning_rate": 6.153829173858293e-06, "loss": 0.803, "step": 8622 }, { "epoch": 0.44315962586082847, "grad_norm": 1.1359453201293945, "learning_rate": 6.153019366917053e-06, "loss": 0.7395, "step": 8623 }, { "epoch": 0.4432110186041731, "grad_norm": 1.0342309474945068, "learning_rate": 6.152209528030406e-06, "loss": 0.7632, "step": 8624 }, { "epoch": 0.4432624113475177, "grad_norm": 0.8497276902198792, "learning_rate": 6.151399657220788e-06, "loss": 0.6546, "step": 8625 }, { "epoch": 0.44331380409086235, "grad_norm": 1.0763641595840454, "learning_rate": 6.150589754510636e-06, "loss": 0.7182, "step": 8626 }, { "epoch": 0.443365196834207, "grad_norm": 0.9959622621536255, "learning_rate": 6.14977981992239e-06, "loss": 0.7639, "step": 8627 }, { "epoch": 0.44341658957755165, "grad_norm": 1.0687144994735718, "learning_rate": 6.148969853478491e-06, "loss": 0.7245, "step": 8628 }, { "epoch": 0.4434679823208963, "grad_norm": 1.105849027633667, "learning_rate": 6.148159855201379e-06, "loss": 0.786, "step": 8629 }, { "epoch": 0.44351937506424094, "grad_norm": 1.0340808629989624, "learning_rate": 6.147349825113494e-06, "loss": 0.7312, "step": 8630 }, { "epoch": 0.4435707678075856, "grad_norm": 1.0555766820907593, "learning_rate": 6.146539763237283e-06, "loss": 0.7501, "step": 8631 }, { "epoch": 0.44362216055093023, "grad_norm": 1.0347938537597656, "learning_rate": 6.145729669595184e-06, "loss": 0.7033, "step": 8632 }, { "epoch": 0.4436735532942748, "grad_norm": 1.0456756353378296, "learning_rate": 6.144919544209644e-06, "loss": 0.7354, "step": 8633 }, { "epoch": 0.44372494603761947, "grad_norm": 0.9883608222007751, "learning_rate": 6.144109387103111e-06, "loss": 0.6761, "step": 8634 }, { "epoch": 0.4437763387809641, "grad_norm": 1.0230767726898193, "learning_rate": 6.143299198298025e-06, "loss": 0.7083, "step": 8635 }, { "epoch": 0.44382773152430877, "grad_norm": 0.7963536977767944, "learning_rate": 6.142488977816838e-06, "loss": 0.6832, "step": 8636 }, { "epoch": 0.4438791242676534, "grad_norm": 0.7524346113204956, "learning_rate": 6.141678725681997e-06, "loss": 0.6688, "step": 8637 }, { "epoch": 0.44393051701099806, "grad_norm": 1.0277385711669922, "learning_rate": 6.140868441915949e-06, "loss": 0.7022, "step": 8638 }, { "epoch": 0.4439819097543427, "grad_norm": 1.0472514629364014, "learning_rate": 6.1400581265411454e-06, "loss": 0.7274, "step": 8639 }, { "epoch": 0.4440333024976873, "grad_norm": 0.7473666071891785, "learning_rate": 6.139247779580034e-06, "loss": 0.7052, "step": 8640 }, { "epoch": 0.44408469524103195, "grad_norm": 1.0470846891403198, "learning_rate": 6.138437401055068e-06, "loss": 0.7215, "step": 8641 }, { "epoch": 0.4441360879843766, "grad_norm": 0.8383448719978333, "learning_rate": 6.137626990988701e-06, "loss": 0.6827, "step": 8642 }, { "epoch": 0.44418748072772124, "grad_norm": 1.0874866247177124, "learning_rate": 6.136816549403385e-06, "loss": 0.7846, "step": 8643 }, { "epoch": 0.4442388734710659, "grad_norm": 1.1708168983459473, "learning_rate": 6.136006076321573e-06, "loss": 0.7642, "step": 8644 }, { "epoch": 0.44429026621441053, "grad_norm": 0.7425611019134521, "learning_rate": 6.135195571765721e-06, "loss": 0.6725, "step": 8645 }, { "epoch": 0.4443416589577552, "grad_norm": 1.216227650642395, "learning_rate": 6.134385035758284e-06, "loss": 0.7087, "step": 8646 }, { "epoch": 0.44439305170109983, "grad_norm": 1.033188819885254, "learning_rate": 6.13357446832172e-06, "loss": 0.706, "step": 8647 }, { "epoch": 0.4444444444444444, "grad_norm": 1.0633041858673096, "learning_rate": 6.132763869478484e-06, "loss": 0.7301, "step": 8648 }, { "epoch": 0.44449583718778907, "grad_norm": 1.0413168668746948, "learning_rate": 6.131953239251037e-06, "loss": 0.7952, "step": 8649 }, { "epoch": 0.4445472299311337, "grad_norm": 1.0456479787826538, "learning_rate": 6.131142577661836e-06, "loss": 0.7251, "step": 8650 }, { "epoch": 0.44459862267447836, "grad_norm": 1.0407538414001465, "learning_rate": 6.130331884733343e-06, "loss": 0.6853, "step": 8651 }, { "epoch": 0.444650015417823, "grad_norm": 1.0133893489837646, "learning_rate": 6.129521160488018e-06, "loss": 0.742, "step": 8652 }, { "epoch": 0.44470140816116766, "grad_norm": 1.121391773223877, "learning_rate": 6.1287104049483215e-06, "loss": 0.7524, "step": 8653 }, { "epoch": 0.4447528009045123, "grad_norm": 1.0003796815872192, "learning_rate": 6.12789961813672e-06, "loss": 0.7512, "step": 8654 }, { "epoch": 0.44480419364785695, "grad_norm": 1.0605747699737549, "learning_rate": 6.127088800075673e-06, "loss": 0.7475, "step": 8655 }, { "epoch": 0.44485558639120154, "grad_norm": 1.07450270652771, "learning_rate": 6.126277950787647e-06, "loss": 0.7317, "step": 8656 }, { "epoch": 0.4449069791345462, "grad_norm": 1.0545673370361328, "learning_rate": 6.125467070295108e-06, "loss": 0.7602, "step": 8657 }, { "epoch": 0.44495837187789083, "grad_norm": 1.1869577169418335, "learning_rate": 6.12465615862052e-06, "loss": 0.7588, "step": 8658 }, { "epoch": 0.4450097646212355, "grad_norm": 1.0719866752624512, "learning_rate": 6.123845215786351e-06, "loss": 0.7491, "step": 8659 }, { "epoch": 0.44506115736458013, "grad_norm": 1.1084269285202026, "learning_rate": 6.123034241815069e-06, "loss": 0.7077, "step": 8660 }, { "epoch": 0.4451125501079248, "grad_norm": 1.075262188911438, "learning_rate": 6.122223236729141e-06, "loss": 0.745, "step": 8661 }, { "epoch": 0.4451639428512694, "grad_norm": 0.7542333602905273, "learning_rate": 6.12141220055104e-06, "loss": 0.6585, "step": 8662 }, { "epoch": 0.445215335594614, "grad_norm": 1.024070382118225, "learning_rate": 6.1206011333032345e-06, "loss": 0.7133, "step": 8663 }, { "epoch": 0.44526672833795866, "grad_norm": 1.105056881904602, "learning_rate": 6.119790035008195e-06, "loss": 0.7553, "step": 8664 }, { "epoch": 0.4453181210813033, "grad_norm": 1.0491613149642944, "learning_rate": 6.1189789056883975e-06, "loss": 0.7167, "step": 8665 }, { "epoch": 0.44536951382464796, "grad_norm": 1.047937273979187, "learning_rate": 6.11816774536631e-06, "loss": 0.7435, "step": 8666 }, { "epoch": 0.4454209065679926, "grad_norm": 0.7605351805686951, "learning_rate": 6.117356554064409e-06, "loss": 0.699, "step": 8667 }, { "epoch": 0.44547229931133725, "grad_norm": 1.1410140991210938, "learning_rate": 6.116545331805169e-06, "loss": 0.7408, "step": 8668 }, { "epoch": 0.4455236920546819, "grad_norm": 1.062819004058838, "learning_rate": 6.1157340786110656e-06, "loss": 0.7621, "step": 8669 }, { "epoch": 0.44557508479802654, "grad_norm": 0.980811595916748, "learning_rate": 6.114922794504577e-06, "loss": 0.7616, "step": 8670 }, { "epoch": 0.44562647754137114, "grad_norm": 0.9879646301269531, "learning_rate": 6.114111479508176e-06, "loss": 0.7511, "step": 8671 }, { "epoch": 0.4456778702847158, "grad_norm": 1.1515069007873535, "learning_rate": 6.113300133644343e-06, "loss": 0.7127, "step": 8672 }, { "epoch": 0.44572926302806043, "grad_norm": 0.7640554904937744, "learning_rate": 6.11248875693556e-06, "loss": 0.6833, "step": 8673 }, { "epoch": 0.4457806557714051, "grad_norm": 1.026353359222412, "learning_rate": 6.111677349404305e-06, "loss": 0.7026, "step": 8674 }, { "epoch": 0.4458320485147497, "grad_norm": 1.098001480102539, "learning_rate": 6.1108659110730565e-06, "loss": 0.7436, "step": 8675 }, { "epoch": 0.44588344125809437, "grad_norm": 0.7734015583992004, "learning_rate": 6.110054441964298e-06, "loss": 0.7093, "step": 8676 }, { "epoch": 0.445934834001439, "grad_norm": 1.0277550220489502, "learning_rate": 6.109242942100513e-06, "loss": 0.7275, "step": 8677 }, { "epoch": 0.4459862267447836, "grad_norm": 1.0378984212875366, "learning_rate": 6.108431411504183e-06, "loss": 0.7506, "step": 8678 }, { "epoch": 0.44603761948812826, "grad_norm": 1.070737361907959, "learning_rate": 6.107619850197794e-06, "loss": 0.6873, "step": 8679 }, { "epoch": 0.4460890122314729, "grad_norm": 1.014614224433899, "learning_rate": 6.1068082582038264e-06, "loss": 0.7579, "step": 8680 }, { "epoch": 0.44614040497481755, "grad_norm": 1.052404761314392, "learning_rate": 6.105996635544772e-06, "loss": 0.7104, "step": 8681 }, { "epoch": 0.4461917977181622, "grad_norm": 1.0825116634368896, "learning_rate": 6.105184982243115e-06, "loss": 0.7538, "step": 8682 }, { "epoch": 0.44624319046150684, "grad_norm": 1.0540229082107544, "learning_rate": 6.104373298321342e-06, "loss": 0.7316, "step": 8683 }, { "epoch": 0.4462945832048515, "grad_norm": 1.0700398683547974, "learning_rate": 6.103561583801943e-06, "loss": 0.7674, "step": 8684 }, { "epoch": 0.44634597594819614, "grad_norm": 1.041287899017334, "learning_rate": 6.102749838707407e-06, "loss": 0.718, "step": 8685 }, { "epoch": 0.44639736869154073, "grad_norm": 1.0677050352096558, "learning_rate": 6.1019380630602246e-06, "loss": 0.7298, "step": 8686 }, { "epoch": 0.4464487614348854, "grad_norm": 1.1162304878234863, "learning_rate": 6.101126256882885e-06, "loss": 0.7197, "step": 8687 }, { "epoch": 0.44650015417823, "grad_norm": 1.1728863716125488, "learning_rate": 6.100314420197879e-06, "loss": 0.752, "step": 8688 }, { "epoch": 0.44655154692157467, "grad_norm": 1.177954912185669, "learning_rate": 6.099502553027704e-06, "loss": 0.7137, "step": 8689 }, { "epoch": 0.4466029396649193, "grad_norm": 1.1424295902252197, "learning_rate": 6.09869065539485e-06, "loss": 0.7815, "step": 8690 }, { "epoch": 0.44665433240826397, "grad_norm": 0.7147906422615051, "learning_rate": 6.097878727321811e-06, "loss": 0.6552, "step": 8691 }, { "epoch": 0.4467057251516086, "grad_norm": 1.055344820022583, "learning_rate": 6.097066768831083e-06, "loss": 0.8121, "step": 8692 }, { "epoch": 0.44675711789495326, "grad_norm": 1.1017271280288696, "learning_rate": 6.096254779945161e-06, "loss": 0.7953, "step": 8693 }, { "epoch": 0.44680851063829785, "grad_norm": 1.0747833251953125, "learning_rate": 6.095442760686545e-06, "loss": 0.7586, "step": 8694 }, { "epoch": 0.4468599033816425, "grad_norm": 1.033912181854248, "learning_rate": 6.0946307110777316e-06, "loss": 0.7193, "step": 8695 }, { "epoch": 0.44691129612498715, "grad_norm": 1.0123625993728638, "learning_rate": 6.093818631141218e-06, "loss": 0.7526, "step": 8696 }, { "epoch": 0.4469626888683318, "grad_norm": 1.0873355865478516, "learning_rate": 6.093006520899502e-06, "loss": 0.7455, "step": 8697 }, { "epoch": 0.44701408161167644, "grad_norm": 1.0881823301315308, "learning_rate": 6.0921943803750885e-06, "loss": 0.7591, "step": 8698 }, { "epoch": 0.4470654743550211, "grad_norm": 1.1538639068603516, "learning_rate": 6.091382209590474e-06, "loss": 0.793, "step": 8699 }, { "epoch": 0.44711686709836573, "grad_norm": 0.9964667558670044, "learning_rate": 6.090570008568164e-06, "loss": 0.719, "step": 8700 }, { "epoch": 0.4471682598417103, "grad_norm": 1.062361478805542, "learning_rate": 6.089757777330658e-06, "loss": 0.6719, "step": 8701 }, { "epoch": 0.44721965258505497, "grad_norm": 1.140395164489746, "learning_rate": 6.088945515900461e-06, "loss": 0.7699, "step": 8702 }, { "epoch": 0.4472710453283996, "grad_norm": 0.7888919711112976, "learning_rate": 6.088133224300079e-06, "loss": 0.6699, "step": 8703 }, { "epoch": 0.44732243807174427, "grad_norm": 1.0718834400177002, "learning_rate": 6.087320902552014e-06, "loss": 0.7589, "step": 8704 }, { "epoch": 0.4473738308150889, "grad_norm": 1.0600528717041016, "learning_rate": 6.086508550678776e-06, "loss": 0.7509, "step": 8705 }, { "epoch": 0.44742522355843356, "grad_norm": 0.8602684736251831, "learning_rate": 6.085696168702869e-06, "loss": 0.6721, "step": 8706 }, { "epoch": 0.4474766163017782, "grad_norm": 1.0821882486343384, "learning_rate": 6.084883756646801e-06, "loss": 0.7797, "step": 8707 }, { "epoch": 0.44752800904512285, "grad_norm": 1.0473047494888306, "learning_rate": 6.084071314533082e-06, "loss": 0.7213, "step": 8708 }, { "epoch": 0.44757940178846745, "grad_norm": 1.1313908100128174, "learning_rate": 6.0832588423842195e-06, "loss": 0.7358, "step": 8709 }, { "epoch": 0.4476307945318121, "grad_norm": 1.0331586599349976, "learning_rate": 6.082446340222726e-06, "loss": 0.7557, "step": 8710 }, { "epoch": 0.44768218727515674, "grad_norm": 1.075469970703125, "learning_rate": 6.08163380807111e-06, "loss": 0.7549, "step": 8711 }, { "epoch": 0.4477335800185014, "grad_norm": 1.1163846254348755, "learning_rate": 6.0808212459518865e-06, "loss": 0.7633, "step": 8712 }, { "epoch": 0.44778497276184603, "grad_norm": 1.113763451576233, "learning_rate": 6.080008653887566e-06, "loss": 0.6874, "step": 8713 }, { "epoch": 0.4478363655051907, "grad_norm": 0.9843372702598572, "learning_rate": 6.079196031900663e-06, "loss": 0.7293, "step": 8714 }, { "epoch": 0.44788775824853533, "grad_norm": 1.1348650455474854, "learning_rate": 6.078383380013689e-06, "loss": 0.7331, "step": 8715 }, { "epoch": 0.4479391509918799, "grad_norm": 1.012511134147644, "learning_rate": 6.077570698249164e-06, "loss": 0.7359, "step": 8716 }, { "epoch": 0.44799054373522457, "grad_norm": 1.0930002927780151, "learning_rate": 6.076757986629602e-06, "loss": 0.7628, "step": 8717 }, { "epoch": 0.4480419364785692, "grad_norm": 0.7513021230697632, "learning_rate": 6.075945245177519e-06, "loss": 0.6938, "step": 8718 }, { "epoch": 0.44809332922191386, "grad_norm": 1.1562069654464722, "learning_rate": 6.075132473915435e-06, "loss": 0.7635, "step": 8719 }, { "epoch": 0.4481447219652585, "grad_norm": 0.8543563485145569, "learning_rate": 6.074319672865865e-06, "loss": 0.6779, "step": 8720 }, { "epoch": 0.44819611470860315, "grad_norm": 1.1893200874328613, "learning_rate": 6.073506842051331e-06, "loss": 0.7066, "step": 8721 }, { "epoch": 0.4482475074519478, "grad_norm": 1.08341383934021, "learning_rate": 6.072693981494352e-06, "loss": 0.6879, "step": 8722 }, { "epoch": 0.44829890019529245, "grad_norm": 1.070870041847229, "learning_rate": 6.07188109121745e-06, "loss": 0.725, "step": 8723 }, { "epoch": 0.44835029293863704, "grad_norm": 0.7500124573707581, "learning_rate": 6.071068171243146e-06, "loss": 0.6631, "step": 8724 }, { "epoch": 0.4484016856819817, "grad_norm": 1.0653276443481445, "learning_rate": 6.070255221593963e-06, "loss": 0.7522, "step": 8725 }, { "epoch": 0.44845307842532633, "grad_norm": 1.1105051040649414, "learning_rate": 6.069442242292425e-06, "loss": 0.718, "step": 8726 }, { "epoch": 0.448504471168671, "grad_norm": 1.032767415046692, "learning_rate": 6.0686292333610565e-06, "loss": 0.7544, "step": 8727 }, { "epoch": 0.44855586391201563, "grad_norm": 1.1087244749069214, "learning_rate": 6.06781619482238e-06, "loss": 0.7847, "step": 8728 }, { "epoch": 0.4486072566553603, "grad_norm": 1.1503602266311646, "learning_rate": 6.067003126698925e-06, "loss": 0.7618, "step": 8729 }, { "epoch": 0.4486586493987049, "grad_norm": 1.0570268630981445, "learning_rate": 6.066190029013217e-06, "loss": 0.753, "step": 8730 }, { "epoch": 0.44871004214204957, "grad_norm": 1.0824902057647705, "learning_rate": 6.065376901787781e-06, "loss": 0.7894, "step": 8731 }, { "epoch": 0.44876143488539416, "grad_norm": 1.0582715272903442, "learning_rate": 6.064563745045149e-06, "loss": 0.7367, "step": 8732 }, { "epoch": 0.4488128276287388, "grad_norm": 1.2017083168029785, "learning_rate": 6.063750558807848e-06, "loss": 0.7286, "step": 8733 }, { "epoch": 0.44886422037208346, "grad_norm": 1.1653245687484741, "learning_rate": 6.06293734309841e-06, "loss": 0.8064, "step": 8734 }, { "epoch": 0.4489156131154281, "grad_norm": 1.0741320848464966, "learning_rate": 6.062124097939363e-06, "loss": 0.708, "step": 8735 }, { "epoch": 0.44896700585877275, "grad_norm": 1.0196375846862793, "learning_rate": 6.061310823353242e-06, "loss": 0.6681, "step": 8736 }, { "epoch": 0.4490183986021174, "grad_norm": 0.7248873114585876, "learning_rate": 6.060497519362578e-06, "loss": 0.6599, "step": 8737 }, { "epoch": 0.44906979134546204, "grad_norm": 1.0733503103256226, "learning_rate": 6.059684185989905e-06, "loss": 0.7734, "step": 8738 }, { "epoch": 0.44912118408880664, "grad_norm": 1.1071665287017822, "learning_rate": 6.058870823257753e-06, "loss": 0.8375, "step": 8739 }, { "epoch": 0.4491725768321513, "grad_norm": 1.0948023796081543, "learning_rate": 6.058057431188663e-06, "loss": 0.7285, "step": 8740 }, { "epoch": 0.44922396957549593, "grad_norm": 1.1065912246704102, "learning_rate": 6.057244009805167e-06, "loss": 0.7171, "step": 8741 }, { "epoch": 0.4492753623188406, "grad_norm": 1.050552487373352, "learning_rate": 6.0564305591298024e-06, "loss": 0.7294, "step": 8742 }, { "epoch": 0.4493267550621852, "grad_norm": 1.0280983448028564, "learning_rate": 6.055617079185105e-06, "loss": 0.7197, "step": 8743 }, { "epoch": 0.44937814780552987, "grad_norm": 1.1325042247772217, "learning_rate": 6.054803569993617e-06, "loss": 0.6921, "step": 8744 }, { "epoch": 0.4494295405488745, "grad_norm": 1.0866978168487549, "learning_rate": 6.053990031577875e-06, "loss": 0.8101, "step": 8745 }, { "epoch": 0.44948093329221916, "grad_norm": 0.7294654846191406, "learning_rate": 6.053176463960417e-06, "loss": 0.6333, "step": 8746 }, { "epoch": 0.44953232603556376, "grad_norm": 1.0454052686691284, "learning_rate": 6.0523628671637865e-06, "loss": 0.7152, "step": 8747 }, { "epoch": 0.4495837187789084, "grad_norm": 0.7002802491188049, "learning_rate": 6.051549241210525e-06, "loss": 0.6329, "step": 8748 }, { "epoch": 0.44963511152225305, "grad_norm": 0.7702453136444092, "learning_rate": 6.050735586123171e-06, "loss": 0.6356, "step": 8749 }, { "epoch": 0.4496865042655977, "grad_norm": 1.0647058486938477, "learning_rate": 6.049921901924271e-06, "loss": 0.6905, "step": 8750 }, { "epoch": 0.44973789700894234, "grad_norm": 0.7184454798698425, "learning_rate": 6.04910818863637e-06, "loss": 0.641, "step": 8751 }, { "epoch": 0.449789289752287, "grad_norm": 1.13466215133667, "learning_rate": 6.048294446282008e-06, "loss": 0.7849, "step": 8752 }, { "epoch": 0.44984068249563164, "grad_norm": 1.0794284343719482, "learning_rate": 6.0474806748837325e-06, "loss": 0.703, "step": 8753 }, { "epoch": 0.44989207523897623, "grad_norm": 1.0722686052322388, "learning_rate": 6.046666874464091e-06, "loss": 0.7762, "step": 8754 }, { "epoch": 0.4499434679823209, "grad_norm": 1.0447022914886475, "learning_rate": 6.045853045045631e-06, "loss": 0.8023, "step": 8755 }, { "epoch": 0.4499948607256655, "grad_norm": 1.0594232082366943, "learning_rate": 6.0450391866508984e-06, "loss": 0.7303, "step": 8756 }, { "epoch": 0.45004625346901017, "grad_norm": 1.0883451700210571, "learning_rate": 6.044225299302442e-06, "loss": 0.7481, "step": 8757 }, { "epoch": 0.4500976462123548, "grad_norm": 0.7400407791137695, "learning_rate": 6.043411383022812e-06, "loss": 0.7309, "step": 8758 }, { "epoch": 0.45014903895569947, "grad_norm": 1.2627620697021484, "learning_rate": 6.04259743783456e-06, "loss": 0.7637, "step": 8759 }, { "epoch": 0.4502004316990441, "grad_norm": 1.0771883726119995, "learning_rate": 6.041783463760233e-06, "loss": 0.7358, "step": 8760 }, { "epoch": 0.45025182444238876, "grad_norm": 0.8023768663406372, "learning_rate": 6.040969460822387e-06, "loss": 0.6864, "step": 8761 }, { "epoch": 0.45030321718573335, "grad_norm": 0.7750018239021301, "learning_rate": 6.0401554290435724e-06, "loss": 0.6934, "step": 8762 }, { "epoch": 0.450354609929078, "grad_norm": 1.0368411540985107, "learning_rate": 6.039341368446344e-06, "loss": 0.6895, "step": 8763 }, { "epoch": 0.45040600267242265, "grad_norm": 1.1251578330993652, "learning_rate": 6.038527279053255e-06, "loss": 0.7877, "step": 8764 }, { "epoch": 0.4504573954157673, "grad_norm": 1.0740529298782349, "learning_rate": 6.03771316088686e-06, "loss": 0.7752, "step": 8765 }, { "epoch": 0.45050878815911194, "grad_norm": 1.1583425998687744, "learning_rate": 6.036899013969717e-06, "loss": 0.7562, "step": 8766 }, { "epoch": 0.4505601809024566, "grad_norm": 0.7255450487136841, "learning_rate": 6.0360848383243805e-06, "loss": 0.7217, "step": 8767 }, { "epoch": 0.45061157364580123, "grad_norm": 1.0998930931091309, "learning_rate": 6.035270633973409e-06, "loss": 0.7522, "step": 8768 }, { "epoch": 0.4506629663891459, "grad_norm": 0.8069698810577393, "learning_rate": 6.034456400939361e-06, "loss": 0.6706, "step": 8769 }, { "epoch": 0.45071435913249047, "grad_norm": 1.0711617469787598, "learning_rate": 6.033642139244794e-06, "loss": 0.708, "step": 8770 }, { "epoch": 0.4507657518758351, "grad_norm": 1.0387017726898193, "learning_rate": 6.032827848912271e-06, "loss": 0.7344, "step": 8771 }, { "epoch": 0.45081714461917977, "grad_norm": 1.043518304824829, "learning_rate": 6.032013529964349e-06, "loss": 0.7285, "step": 8772 }, { "epoch": 0.4508685373625244, "grad_norm": 1.0986995697021484, "learning_rate": 6.031199182423591e-06, "loss": 0.7518, "step": 8773 }, { "epoch": 0.45091993010586906, "grad_norm": 0.6784643530845642, "learning_rate": 6.0303848063125594e-06, "loss": 0.6686, "step": 8774 }, { "epoch": 0.4509713228492137, "grad_norm": 1.0071924924850464, "learning_rate": 6.029570401653817e-06, "loss": 0.6932, "step": 8775 }, { "epoch": 0.45102271559255835, "grad_norm": 1.1687572002410889, "learning_rate": 6.0287559684699255e-06, "loss": 0.7702, "step": 8776 }, { "epoch": 0.45107410833590295, "grad_norm": 1.1235078573226929, "learning_rate": 6.0279415067834546e-06, "loss": 0.8043, "step": 8777 }, { "epoch": 0.4511255010792476, "grad_norm": 1.0233426094055176, "learning_rate": 6.027127016616965e-06, "loss": 0.755, "step": 8778 }, { "epoch": 0.45117689382259224, "grad_norm": 1.067035436630249, "learning_rate": 6.026312497993025e-06, "loss": 0.683, "step": 8779 }, { "epoch": 0.4512282865659369, "grad_norm": 1.0684348344802856, "learning_rate": 6.0254979509342025e-06, "loss": 0.7165, "step": 8780 }, { "epoch": 0.45127967930928153, "grad_norm": 0.8247617483139038, "learning_rate": 6.0246833754630615e-06, "loss": 0.6698, "step": 8781 }, { "epoch": 0.4513310720526262, "grad_norm": 1.0869888067245483, "learning_rate": 6.023868771602174e-06, "loss": 0.7054, "step": 8782 }, { "epoch": 0.45138246479597083, "grad_norm": 1.0583577156066895, "learning_rate": 6.023054139374107e-06, "loss": 0.7294, "step": 8783 }, { "epoch": 0.4514338575393155, "grad_norm": 0.729859471321106, "learning_rate": 6.022239478801433e-06, "loss": 0.6898, "step": 8784 }, { "epoch": 0.45148525028266007, "grad_norm": 1.0447458028793335, "learning_rate": 6.0214247899067205e-06, "loss": 0.7493, "step": 8785 }, { "epoch": 0.4515366430260047, "grad_norm": 1.1456223726272583, "learning_rate": 6.020610072712542e-06, "loss": 0.7456, "step": 8786 }, { "epoch": 0.45158803576934936, "grad_norm": 1.0732054710388184, "learning_rate": 6.019795327241471e-06, "loss": 0.7066, "step": 8787 }, { "epoch": 0.451639428512694, "grad_norm": 0.8310574293136597, "learning_rate": 6.018980553516081e-06, "loss": 0.6568, "step": 8788 }, { "epoch": 0.45169082125603865, "grad_norm": 1.0504428148269653, "learning_rate": 6.018165751558943e-06, "loss": 0.7152, "step": 8789 }, { "epoch": 0.4517422139993833, "grad_norm": 0.9774937629699707, "learning_rate": 6.017350921392635e-06, "loss": 0.695, "step": 8790 }, { "epoch": 0.45179360674272795, "grad_norm": 0.8530800342559814, "learning_rate": 6.016536063039731e-06, "loss": 0.6731, "step": 8791 }, { "epoch": 0.45184499948607254, "grad_norm": 0.9511625170707703, "learning_rate": 6.015721176522806e-06, "loss": 0.7251, "step": 8792 }, { "epoch": 0.4518963922294172, "grad_norm": 1.0526080131530762, "learning_rate": 6.0149062618644415e-06, "loss": 0.6722, "step": 8793 }, { "epoch": 0.45194778497276183, "grad_norm": 0.9751661419868469, "learning_rate": 6.014091319087211e-06, "loss": 0.678, "step": 8794 }, { "epoch": 0.4519991777161065, "grad_norm": 1.1386967897415161, "learning_rate": 6.013276348213694e-06, "loss": 0.7687, "step": 8795 }, { "epoch": 0.45205057045945113, "grad_norm": 0.778996467590332, "learning_rate": 6.012461349266474e-06, "loss": 0.6821, "step": 8796 }, { "epoch": 0.4521019632027958, "grad_norm": 0.8433127999305725, "learning_rate": 6.011646322268127e-06, "loss": 0.6709, "step": 8797 }, { "epoch": 0.4521533559461404, "grad_norm": 1.1352248191833496, "learning_rate": 6.010831267241235e-06, "loss": 0.6916, "step": 8798 }, { "epoch": 0.45220474868948507, "grad_norm": 1.0445233583450317, "learning_rate": 6.010016184208381e-06, "loss": 0.6579, "step": 8799 }, { "epoch": 0.45225614143282966, "grad_norm": 0.7776066064834595, "learning_rate": 6.0092010731921435e-06, "loss": 0.6662, "step": 8800 }, { "epoch": 0.4523075341761743, "grad_norm": 0.6743345856666565, "learning_rate": 6.008385934215112e-06, "loss": 0.6304, "step": 8801 }, { "epoch": 0.45235892691951896, "grad_norm": 1.046325922012329, "learning_rate": 6.007570767299866e-06, "loss": 0.7266, "step": 8802 }, { "epoch": 0.4524103196628636, "grad_norm": 1.0551177263259888, "learning_rate": 6.006755572468993e-06, "loss": 0.6636, "step": 8803 }, { "epoch": 0.45246171240620825, "grad_norm": 1.0989246368408203, "learning_rate": 6.005940349745077e-06, "loss": 0.7148, "step": 8804 }, { "epoch": 0.4525131051495529, "grad_norm": 1.0230375528335571, "learning_rate": 6.005125099150705e-06, "loss": 0.7244, "step": 8805 }, { "epoch": 0.45256449789289754, "grad_norm": 1.1634154319763184, "learning_rate": 6.004309820708466e-06, "loss": 0.7639, "step": 8806 }, { "epoch": 0.45261589063624214, "grad_norm": 1.0131864547729492, "learning_rate": 6.003494514440946e-06, "loss": 0.6997, "step": 8807 }, { "epoch": 0.4526672833795868, "grad_norm": 1.046074390411377, "learning_rate": 6.002679180370733e-06, "loss": 0.7286, "step": 8808 }, { "epoch": 0.45271867612293143, "grad_norm": 1.0035896301269531, "learning_rate": 6.0018638185204195e-06, "loss": 0.7155, "step": 8809 }, { "epoch": 0.4527700688662761, "grad_norm": 0.8956099152565002, "learning_rate": 6.001048428912591e-06, "loss": 0.6778, "step": 8810 }, { "epoch": 0.4528214616096207, "grad_norm": 1.0704295635223389, "learning_rate": 6.000233011569845e-06, "loss": 0.6985, "step": 8811 }, { "epoch": 0.45287285435296537, "grad_norm": 1.0480855703353882, "learning_rate": 5.999417566514768e-06, "loss": 0.7651, "step": 8812 }, { "epoch": 0.45292424709631, "grad_norm": 1.1389119625091553, "learning_rate": 5.998602093769955e-06, "loss": 0.714, "step": 8813 }, { "epoch": 0.45297563983965466, "grad_norm": 1.1206594705581665, "learning_rate": 5.997786593358e-06, "loss": 0.7262, "step": 8814 }, { "epoch": 0.45302703258299926, "grad_norm": 1.0106931924819946, "learning_rate": 5.996971065301494e-06, "loss": 0.7184, "step": 8815 }, { "epoch": 0.4530784253263439, "grad_norm": 1.01264488697052, "learning_rate": 5.996155509623034e-06, "loss": 0.7316, "step": 8816 }, { "epoch": 0.45312981806968855, "grad_norm": 1.0353342294692993, "learning_rate": 5.995339926345219e-06, "loss": 0.6825, "step": 8817 }, { "epoch": 0.4531812108130332, "grad_norm": 1.0812112092971802, "learning_rate": 5.994524315490639e-06, "loss": 0.8164, "step": 8818 }, { "epoch": 0.45323260355637784, "grad_norm": 1.0570074319839478, "learning_rate": 5.993708677081895e-06, "loss": 0.7224, "step": 8819 }, { "epoch": 0.4532839962997225, "grad_norm": 1.236261010169983, "learning_rate": 5.992893011141585e-06, "loss": 0.7555, "step": 8820 }, { "epoch": 0.45333538904306714, "grad_norm": 1.1103880405426025, "learning_rate": 5.992077317692307e-06, "loss": 0.8053, "step": 8821 }, { "epoch": 0.4533867817864118, "grad_norm": 1.060664415359497, "learning_rate": 5.991261596756661e-06, "loss": 0.7893, "step": 8822 }, { "epoch": 0.4534381745297564, "grad_norm": 1.1645253896713257, "learning_rate": 5.990445848357247e-06, "loss": 0.6486, "step": 8823 }, { "epoch": 0.453489567273101, "grad_norm": 1.1562540531158447, "learning_rate": 5.989630072516665e-06, "loss": 0.7506, "step": 8824 }, { "epoch": 0.45354096001644567, "grad_norm": 1.0875499248504639, "learning_rate": 5.988814269257517e-06, "loss": 0.6898, "step": 8825 }, { "epoch": 0.4535923527597903, "grad_norm": 1.20587956905365, "learning_rate": 5.987998438602406e-06, "loss": 0.7269, "step": 8826 }, { "epoch": 0.45364374550313497, "grad_norm": 1.0175936222076416, "learning_rate": 5.987182580573937e-06, "loss": 0.7027, "step": 8827 }, { "epoch": 0.4536951382464796, "grad_norm": 1.021414875984192, "learning_rate": 5.986366695194713e-06, "loss": 0.7174, "step": 8828 }, { "epoch": 0.45374653098982426, "grad_norm": 1.1210459470748901, "learning_rate": 5.985550782487336e-06, "loss": 0.7505, "step": 8829 }, { "epoch": 0.45379792373316885, "grad_norm": 1.2500932216644287, "learning_rate": 5.984734842474417e-06, "loss": 0.7233, "step": 8830 }, { "epoch": 0.4538493164765135, "grad_norm": 0.8410046696662903, "learning_rate": 5.9839188751785575e-06, "loss": 0.6741, "step": 8831 }, { "epoch": 0.45390070921985815, "grad_norm": 1.081351399421692, "learning_rate": 5.983102880622366e-06, "loss": 0.8385, "step": 8832 }, { "epoch": 0.4539521019632028, "grad_norm": 1.076521396636963, "learning_rate": 5.982286858828452e-06, "loss": 0.7647, "step": 8833 }, { "epoch": 0.45400349470654744, "grad_norm": 1.0220509767532349, "learning_rate": 5.981470809819421e-06, "loss": 0.7243, "step": 8834 }, { "epoch": 0.4540548874498921, "grad_norm": 1.1124690771102905, "learning_rate": 5.980654733617885e-06, "loss": 0.7301, "step": 8835 }, { "epoch": 0.45410628019323673, "grad_norm": 1.1178840398788452, "learning_rate": 5.979838630246454e-06, "loss": 0.7985, "step": 8836 }, { "epoch": 0.4541576729365814, "grad_norm": 1.1785459518432617, "learning_rate": 5.979022499727737e-06, "loss": 0.8169, "step": 8837 }, { "epoch": 0.45420906567992597, "grad_norm": 1.0918692350387573, "learning_rate": 5.978206342084347e-06, "loss": 0.7215, "step": 8838 }, { "epoch": 0.4542604584232706, "grad_norm": 0.6651068329811096, "learning_rate": 5.977390157338897e-06, "loss": 0.6618, "step": 8839 }, { "epoch": 0.45431185116661527, "grad_norm": 1.1593281030654907, "learning_rate": 5.9765739455139986e-06, "loss": 0.7322, "step": 8840 }, { "epoch": 0.4543632439099599, "grad_norm": 0.9651076793670654, "learning_rate": 5.975757706632266e-06, "loss": 0.7421, "step": 8841 }, { "epoch": 0.45441463665330456, "grad_norm": 1.121661901473999, "learning_rate": 5.974941440716314e-06, "loss": 0.8054, "step": 8842 }, { "epoch": 0.4544660293966492, "grad_norm": 1.0248355865478516, "learning_rate": 5.974125147788759e-06, "loss": 0.6987, "step": 8843 }, { "epoch": 0.45451742213999385, "grad_norm": 1.1309994459152222, "learning_rate": 5.973308827872216e-06, "loss": 0.7698, "step": 8844 }, { "epoch": 0.45456881488333845, "grad_norm": 1.0594685077667236, "learning_rate": 5.9724924809893e-06, "loss": 0.7437, "step": 8845 }, { "epoch": 0.4546202076266831, "grad_norm": 1.0823743343353271, "learning_rate": 5.971676107162632e-06, "loss": 0.7885, "step": 8846 }, { "epoch": 0.45467160037002774, "grad_norm": 1.117125391960144, "learning_rate": 5.97085970641483e-06, "loss": 0.7388, "step": 8847 }, { "epoch": 0.4547229931133724, "grad_norm": 0.8462509512901306, "learning_rate": 5.9700432787685105e-06, "loss": 0.7087, "step": 8848 }, { "epoch": 0.45477438585671703, "grad_norm": 0.9970471858978271, "learning_rate": 5.969226824246295e-06, "loss": 0.7647, "step": 8849 }, { "epoch": 0.4548257786000617, "grad_norm": 1.0255184173583984, "learning_rate": 5.968410342870804e-06, "loss": 0.7518, "step": 8850 }, { "epoch": 0.45487717134340633, "grad_norm": 1.0873438119888306, "learning_rate": 5.96759383466466e-06, "loss": 0.7519, "step": 8851 }, { "epoch": 0.454928564086751, "grad_norm": 1.0741733312606812, "learning_rate": 5.966777299650483e-06, "loss": 0.8132, "step": 8852 }, { "epoch": 0.45497995683009557, "grad_norm": 0.7321688532829285, "learning_rate": 5.965960737850897e-06, "loss": 0.6752, "step": 8853 }, { "epoch": 0.4550313495734402, "grad_norm": 1.0968360900878906, "learning_rate": 5.965144149288525e-06, "loss": 0.724, "step": 8854 }, { "epoch": 0.45508274231678486, "grad_norm": 1.0788156986236572, "learning_rate": 5.964327533985991e-06, "loss": 0.8018, "step": 8855 }, { "epoch": 0.4551341350601295, "grad_norm": 1.0266183614730835, "learning_rate": 5.96351089196592e-06, "loss": 0.7472, "step": 8856 }, { "epoch": 0.45518552780347415, "grad_norm": 1.1603820323944092, "learning_rate": 5.96269422325094e-06, "loss": 0.7888, "step": 8857 }, { "epoch": 0.4552369205468188, "grad_norm": 0.708926796913147, "learning_rate": 5.9618775278636745e-06, "loss": 0.6654, "step": 8858 }, { "epoch": 0.45528831329016345, "grad_norm": 1.1360771656036377, "learning_rate": 5.961060805826753e-06, "loss": 0.6886, "step": 8859 }, { "epoch": 0.4553397060335081, "grad_norm": 1.0509719848632812, "learning_rate": 5.9602440571628024e-06, "loss": 0.7626, "step": 8860 }, { "epoch": 0.4553910987768527, "grad_norm": 1.0279475450515747, "learning_rate": 5.959427281894452e-06, "loss": 0.7025, "step": 8861 }, { "epoch": 0.45544249152019733, "grad_norm": 0.990743100643158, "learning_rate": 5.958610480044331e-06, "loss": 0.726, "step": 8862 }, { "epoch": 0.455493884263542, "grad_norm": 1.0370988845825195, "learning_rate": 5.957793651635069e-06, "loss": 0.704, "step": 8863 }, { "epoch": 0.45554527700688663, "grad_norm": 0.713202714920044, "learning_rate": 5.956976796689298e-06, "loss": 0.7083, "step": 8864 }, { "epoch": 0.4555966697502313, "grad_norm": 1.0848804712295532, "learning_rate": 5.95615991522965e-06, "loss": 0.741, "step": 8865 }, { "epoch": 0.4556480624935759, "grad_norm": 1.1045877933502197, "learning_rate": 5.9553430072787545e-06, "loss": 0.7756, "step": 8866 }, { "epoch": 0.45569945523692057, "grad_norm": 1.1364927291870117, "learning_rate": 5.954526072859248e-06, "loss": 0.7517, "step": 8867 }, { "epoch": 0.45575084798026516, "grad_norm": 1.0900721549987793, "learning_rate": 5.953709111993763e-06, "loss": 0.746, "step": 8868 }, { "epoch": 0.4558022407236098, "grad_norm": 0.7207632660865784, "learning_rate": 5.952892124704933e-06, "loss": 0.6536, "step": 8869 }, { "epoch": 0.45585363346695446, "grad_norm": 1.098912239074707, "learning_rate": 5.952075111015396e-06, "loss": 0.7371, "step": 8870 }, { "epoch": 0.4559050262102991, "grad_norm": 1.1477363109588623, "learning_rate": 5.9512580709477865e-06, "loss": 0.8166, "step": 8871 }, { "epoch": 0.45595641895364375, "grad_norm": 1.0114022493362427, "learning_rate": 5.950441004524742e-06, "loss": 0.7537, "step": 8872 }, { "epoch": 0.4560078116969884, "grad_norm": 0.6834369897842407, "learning_rate": 5.949623911768899e-06, "loss": 0.6589, "step": 8873 }, { "epoch": 0.45605920444033304, "grad_norm": 0.9901047945022583, "learning_rate": 5.948806792702896e-06, "loss": 0.733, "step": 8874 }, { "epoch": 0.4561105971836777, "grad_norm": 0.8677506446838379, "learning_rate": 5.947989647349372e-06, "loss": 0.7416, "step": 8875 }, { "epoch": 0.4561619899270223, "grad_norm": 1.0808535814285278, "learning_rate": 5.947172475730967e-06, "loss": 0.756, "step": 8876 }, { "epoch": 0.45621338267036693, "grad_norm": 1.030073642730713, "learning_rate": 5.946355277870322e-06, "loss": 0.7692, "step": 8877 }, { "epoch": 0.4562647754137116, "grad_norm": 1.1000250577926636, "learning_rate": 5.9455380537900776e-06, "loss": 0.7408, "step": 8878 }, { "epoch": 0.4563161681570562, "grad_norm": 1.0278034210205078, "learning_rate": 5.944720803512874e-06, "loss": 0.7735, "step": 8879 }, { "epoch": 0.45636756090040087, "grad_norm": 1.0684130191802979, "learning_rate": 5.943903527061359e-06, "loss": 0.7345, "step": 8880 }, { "epoch": 0.4564189536437455, "grad_norm": 0.7972928881645203, "learning_rate": 5.94308622445817e-06, "loss": 0.6416, "step": 8881 }, { "epoch": 0.45647034638709016, "grad_norm": 1.0249911546707153, "learning_rate": 5.942268895725955e-06, "loss": 0.7868, "step": 8882 }, { "epoch": 0.45652173913043476, "grad_norm": 1.025810718536377, "learning_rate": 5.9414515408873565e-06, "loss": 0.7837, "step": 8883 }, { "epoch": 0.4565731318737794, "grad_norm": 0.9920753240585327, "learning_rate": 5.9406341599650215e-06, "loss": 0.7756, "step": 8884 }, { "epoch": 0.45662452461712405, "grad_norm": 0.7278254628181458, "learning_rate": 5.939816752981594e-06, "loss": 0.6557, "step": 8885 }, { "epoch": 0.4566759173604687, "grad_norm": 1.0390771627426147, "learning_rate": 5.938999319959724e-06, "loss": 0.712, "step": 8886 }, { "epoch": 0.45672731010381334, "grad_norm": 1.0876400470733643, "learning_rate": 5.938181860922059e-06, "loss": 0.7359, "step": 8887 }, { "epoch": 0.456778702847158, "grad_norm": 1.1079777479171753, "learning_rate": 5.937364375891244e-06, "loss": 0.6797, "step": 8888 }, { "epoch": 0.45683009559050264, "grad_norm": 1.06488835811615, "learning_rate": 5.936546864889931e-06, "loss": 0.737, "step": 8889 }, { "epoch": 0.4568814883338473, "grad_norm": 1.0920110940933228, "learning_rate": 5.93572932794077e-06, "loss": 0.7562, "step": 8890 }, { "epoch": 0.4569328810771919, "grad_norm": 1.0269219875335693, "learning_rate": 5.9349117650664115e-06, "loss": 0.7464, "step": 8891 }, { "epoch": 0.4569842738205365, "grad_norm": 1.1151102781295776, "learning_rate": 5.934094176289505e-06, "loss": 0.7241, "step": 8892 }, { "epoch": 0.45703566656388117, "grad_norm": 1.0413362979888916, "learning_rate": 5.9332765616327035e-06, "loss": 0.7139, "step": 8893 }, { "epoch": 0.4570870593072258, "grad_norm": 1.061132788658142, "learning_rate": 5.932458921118661e-06, "loss": 0.7046, "step": 8894 }, { "epoch": 0.45713845205057047, "grad_norm": 1.0489342212677002, "learning_rate": 5.931641254770028e-06, "loss": 0.7577, "step": 8895 }, { "epoch": 0.4571898447939151, "grad_norm": 1.037558913230896, "learning_rate": 5.930823562609464e-06, "loss": 0.725, "step": 8896 }, { "epoch": 0.45724123753725976, "grad_norm": 1.074945330619812, "learning_rate": 5.930005844659616e-06, "loss": 0.7044, "step": 8897 }, { "epoch": 0.4572926302806044, "grad_norm": 1.0921604633331299, "learning_rate": 5.929188100943146e-06, "loss": 0.7408, "step": 8898 }, { "epoch": 0.457344023023949, "grad_norm": 1.00763738155365, "learning_rate": 5.928370331482709e-06, "loss": 0.7523, "step": 8899 }, { "epoch": 0.45739541576729364, "grad_norm": 0.9662325382232666, "learning_rate": 5.927552536300961e-06, "loss": 0.772, "step": 8900 }, { "epoch": 0.4574468085106383, "grad_norm": 1.0094407796859741, "learning_rate": 5.926734715420559e-06, "loss": 0.7609, "step": 8901 }, { "epoch": 0.45749820125398294, "grad_norm": 1.112140417098999, "learning_rate": 5.925916868864163e-06, "loss": 0.7708, "step": 8902 }, { "epoch": 0.4575495939973276, "grad_norm": 1.0625040531158447, "learning_rate": 5.925098996654432e-06, "loss": 0.7352, "step": 8903 }, { "epoch": 0.45760098674067223, "grad_norm": 1.1343601942062378, "learning_rate": 5.924281098814025e-06, "loss": 0.7757, "step": 8904 }, { "epoch": 0.4576523794840169, "grad_norm": 1.1333168745040894, "learning_rate": 5.923463175365603e-06, "loss": 0.7772, "step": 8905 }, { "epoch": 0.45770377222736147, "grad_norm": 1.106407642364502, "learning_rate": 5.922645226331827e-06, "loss": 0.7368, "step": 8906 }, { "epoch": 0.4577551649707061, "grad_norm": 0.9942788481712341, "learning_rate": 5.921827251735359e-06, "loss": 0.6851, "step": 8907 }, { "epoch": 0.45780655771405077, "grad_norm": 1.0715062618255615, "learning_rate": 5.921009251598864e-06, "loss": 0.75, "step": 8908 }, { "epoch": 0.4578579504573954, "grad_norm": 1.036826252937317, "learning_rate": 5.920191225945001e-06, "loss": 0.7448, "step": 8909 }, { "epoch": 0.45790934320074006, "grad_norm": 1.1309218406677246, "learning_rate": 5.919373174796438e-06, "loss": 0.7734, "step": 8910 }, { "epoch": 0.4579607359440847, "grad_norm": 1.0349030494689941, "learning_rate": 5.918555098175838e-06, "loss": 0.6973, "step": 8911 }, { "epoch": 0.45801212868742935, "grad_norm": 1.1430330276489258, "learning_rate": 5.917736996105867e-06, "loss": 0.7525, "step": 8912 }, { "epoch": 0.458063521430774, "grad_norm": 2.8730721473693848, "learning_rate": 5.9169188686091915e-06, "loss": 0.7975, "step": 8913 }, { "epoch": 0.4581149141741186, "grad_norm": 1.1182926893234253, "learning_rate": 5.916100715708477e-06, "loss": 0.7553, "step": 8914 }, { "epoch": 0.45816630691746324, "grad_norm": 0.837189793586731, "learning_rate": 5.9152825374263934e-06, "loss": 0.7017, "step": 8915 }, { "epoch": 0.4582176996608079, "grad_norm": 3.481139659881592, "learning_rate": 5.914464333785608e-06, "loss": 0.7562, "step": 8916 }, { "epoch": 0.45826909240415253, "grad_norm": 1.005302906036377, "learning_rate": 5.913646104808788e-06, "loss": 0.7192, "step": 8917 }, { "epoch": 0.4583204851474972, "grad_norm": 1.0742340087890625, "learning_rate": 5.912827850518606e-06, "loss": 0.7548, "step": 8918 }, { "epoch": 0.45837187789084183, "grad_norm": 0.7497778534889221, "learning_rate": 5.912009570937729e-06, "loss": 0.6611, "step": 8919 }, { "epoch": 0.4584232706341865, "grad_norm": 1.0084547996520996, "learning_rate": 5.911191266088834e-06, "loss": 0.7422, "step": 8920 }, { "epoch": 0.45847466337753107, "grad_norm": 1.0855684280395508, "learning_rate": 5.910372935994587e-06, "loss": 0.7346, "step": 8921 }, { "epoch": 0.4585260561208757, "grad_norm": 1.1071358919143677, "learning_rate": 5.909554580677663e-06, "loss": 0.7494, "step": 8922 }, { "epoch": 0.45857744886422036, "grad_norm": 1.0432698726654053, "learning_rate": 5.908736200160736e-06, "loss": 0.7171, "step": 8923 }, { "epoch": 0.458628841607565, "grad_norm": 1.1482576131820679, "learning_rate": 5.90791779446648e-06, "loss": 0.7566, "step": 8924 }, { "epoch": 0.45868023435090965, "grad_norm": 1.041137933731079, "learning_rate": 5.907099363617567e-06, "loss": 0.7383, "step": 8925 }, { "epoch": 0.4587316270942543, "grad_norm": 1.1440412998199463, "learning_rate": 5.906280907636675e-06, "loss": 0.7703, "step": 8926 }, { "epoch": 0.45878301983759895, "grad_norm": 1.075931429862976, "learning_rate": 5.905462426546476e-06, "loss": 0.7866, "step": 8927 }, { "epoch": 0.4588344125809436, "grad_norm": 1.1004613637924194, "learning_rate": 5.904643920369652e-06, "loss": 0.7265, "step": 8928 }, { "epoch": 0.4588858053242882, "grad_norm": 1.0379250049591064, "learning_rate": 5.903825389128878e-06, "loss": 0.6747, "step": 8929 }, { "epoch": 0.45893719806763283, "grad_norm": 1.0629864931106567, "learning_rate": 5.903006832846833e-06, "loss": 0.792, "step": 8930 }, { "epoch": 0.4589885908109775, "grad_norm": 1.1615045070648193, "learning_rate": 5.9021882515461955e-06, "loss": 0.7542, "step": 8931 }, { "epoch": 0.45903998355432213, "grad_norm": 1.0473079681396484, "learning_rate": 5.901369645249645e-06, "loss": 0.7288, "step": 8932 }, { "epoch": 0.4590913762976668, "grad_norm": 1.0952554941177368, "learning_rate": 5.9005510139798595e-06, "loss": 0.6933, "step": 8933 }, { "epoch": 0.4591427690410114, "grad_norm": 1.0468007326126099, "learning_rate": 5.899732357759523e-06, "loss": 0.7743, "step": 8934 }, { "epoch": 0.45919416178435607, "grad_norm": 1.0818570852279663, "learning_rate": 5.898913676611315e-06, "loss": 0.7555, "step": 8935 }, { "epoch": 0.45924555452770066, "grad_norm": 1.1436657905578613, "learning_rate": 5.898094970557919e-06, "loss": 0.726, "step": 8936 }, { "epoch": 0.4592969472710453, "grad_norm": 0.7811368703842163, "learning_rate": 5.897276239622017e-06, "loss": 0.7249, "step": 8937 }, { "epoch": 0.45934834001438996, "grad_norm": 1.0438807010650635, "learning_rate": 5.8964574838262944e-06, "loss": 0.7383, "step": 8938 }, { "epoch": 0.4593997327577346, "grad_norm": 1.066916823387146, "learning_rate": 5.895638703193434e-06, "loss": 0.7363, "step": 8939 }, { "epoch": 0.45945112550107925, "grad_norm": 1.0424696207046509, "learning_rate": 5.894819897746121e-06, "loss": 0.7623, "step": 8940 }, { "epoch": 0.4595025182444239, "grad_norm": 1.1100372076034546, "learning_rate": 5.894001067507041e-06, "loss": 0.7456, "step": 8941 }, { "epoch": 0.45955391098776854, "grad_norm": 1.1429909467697144, "learning_rate": 5.893182212498882e-06, "loss": 0.7111, "step": 8942 }, { "epoch": 0.4596053037311132, "grad_norm": 1.0366735458374023, "learning_rate": 5.892363332744329e-06, "loss": 0.6266, "step": 8943 }, { "epoch": 0.4596566964744578, "grad_norm": 1.0912282466888428, "learning_rate": 5.891544428266071e-06, "loss": 0.7641, "step": 8944 }, { "epoch": 0.45970808921780243, "grad_norm": 1.0330140590667725, "learning_rate": 5.890725499086796e-06, "loss": 0.7701, "step": 8945 }, { "epoch": 0.4597594819611471, "grad_norm": 1.0573067665100098, "learning_rate": 5.889906545229192e-06, "loss": 0.7292, "step": 8946 }, { "epoch": 0.4598108747044917, "grad_norm": 1.0763170719146729, "learning_rate": 5.889087566715952e-06, "loss": 0.7293, "step": 8947 }, { "epoch": 0.45986226744783637, "grad_norm": 0.6770279407501221, "learning_rate": 5.888268563569762e-06, "loss": 0.6425, "step": 8948 }, { "epoch": 0.459913660191181, "grad_norm": 0.7366620302200317, "learning_rate": 5.887449535813318e-06, "loss": 0.6363, "step": 8949 }, { "epoch": 0.45996505293452566, "grad_norm": 1.0756702423095703, "learning_rate": 5.886630483469309e-06, "loss": 0.7421, "step": 8950 }, { "epoch": 0.4600164456778703, "grad_norm": 0.7815482020378113, "learning_rate": 5.885811406560428e-06, "loss": 0.6845, "step": 8951 }, { "epoch": 0.4600678384212149, "grad_norm": 1.1058789491653442, "learning_rate": 5.88499230510937e-06, "loss": 0.7175, "step": 8952 }, { "epoch": 0.46011923116455955, "grad_norm": 1.1916143894195557, "learning_rate": 5.884173179138826e-06, "loss": 0.6629, "step": 8953 }, { "epoch": 0.4601706239079042, "grad_norm": 1.1176693439483643, "learning_rate": 5.88335402867149e-06, "loss": 0.7127, "step": 8954 }, { "epoch": 0.46022201665124884, "grad_norm": 1.0726125240325928, "learning_rate": 5.882534853730062e-06, "loss": 0.768, "step": 8955 }, { "epoch": 0.4602734093945935, "grad_norm": 1.0672916173934937, "learning_rate": 5.881715654337235e-06, "loss": 0.729, "step": 8956 }, { "epoch": 0.46032480213793814, "grad_norm": 1.0780296325683594, "learning_rate": 5.880896430515706e-06, "loss": 0.72, "step": 8957 }, { "epoch": 0.4603761948812828, "grad_norm": 1.064030647277832, "learning_rate": 5.880077182288169e-06, "loss": 0.7252, "step": 8958 }, { "epoch": 0.4604275876246274, "grad_norm": 0.8556851148605347, "learning_rate": 5.879257909677329e-06, "loss": 0.7201, "step": 8959 }, { "epoch": 0.460478980367972, "grad_norm": 0.7724310159683228, "learning_rate": 5.878438612705879e-06, "loss": 0.6328, "step": 8960 }, { "epoch": 0.46053037311131667, "grad_norm": 1.0718120336532593, "learning_rate": 5.877619291396522e-06, "loss": 0.7448, "step": 8961 }, { "epoch": 0.4605817658546613, "grad_norm": 1.0667632818222046, "learning_rate": 5.876799945771954e-06, "loss": 0.7409, "step": 8962 }, { "epoch": 0.46063315859800597, "grad_norm": 0.7033167481422424, "learning_rate": 5.875980575854878e-06, "loss": 0.6096, "step": 8963 }, { "epoch": 0.4606845513413506, "grad_norm": 1.046469807624817, "learning_rate": 5.875161181667996e-06, "loss": 0.7326, "step": 8964 }, { "epoch": 0.46073594408469526, "grad_norm": 1.1148231029510498, "learning_rate": 5.87434176323401e-06, "loss": 0.7698, "step": 8965 }, { "epoch": 0.4607873368280399, "grad_norm": 1.1007137298583984, "learning_rate": 5.873522320575621e-06, "loss": 0.7532, "step": 8966 }, { "epoch": 0.4608387295713845, "grad_norm": 1.0778814554214478, "learning_rate": 5.872702853715532e-06, "loss": 0.7206, "step": 8967 }, { "epoch": 0.46089012231472914, "grad_norm": 0.7628300786018372, "learning_rate": 5.871883362676451e-06, "loss": 0.6239, "step": 8968 }, { "epoch": 0.4609415150580738, "grad_norm": 1.0552924871444702, "learning_rate": 5.871063847481078e-06, "loss": 0.7395, "step": 8969 }, { "epoch": 0.46099290780141844, "grad_norm": 1.0969772338867188, "learning_rate": 5.87024430815212e-06, "loss": 0.7442, "step": 8970 }, { "epoch": 0.4610443005447631, "grad_norm": 1.033203125, "learning_rate": 5.869424744712285e-06, "loss": 0.7149, "step": 8971 }, { "epoch": 0.46109569328810773, "grad_norm": 1.0113794803619385, "learning_rate": 5.868605157184279e-06, "loss": 0.7185, "step": 8972 }, { "epoch": 0.4611470860314524, "grad_norm": 1.1189467906951904, "learning_rate": 5.867785545590806e-06, "loss": 0.7427, "step": 8973 }, { "epoch": 0.46119847877479697, "grad_norm": 1.08404541015625, "learning_rate": 5.866965909954578e-06, "loss": 0.7678, "step": 8974 }, { "epoch": 0.4612498715181416, "grad_norm": 1.0680245161056519, "learning_rate": 5.8661462502983024e-06, "loss": 0.768, "step": 8975 }, { "epoch": 0.46130126426148627, "grad_norm": 1.0621349811553955, "learning_rate": 5.86532656664469e-06, "loss": 0.6948, "step": 8976 }, { "epoch": 0.4613526570048309, "grad_norm": 1.3989737033843994, "learning_rate": 5.864506859016448e-06, "loss": 0.7728, "step": 8977 }, { "epoch": 0.46140404974817556, "grad_norm": 1.1540238857269287, "learning_rate": 5.863687127436288e-06, "loss": 0.8093, "step": 8978 }, { "epoch": 0.4614554424915202, "grad_norm": 1.0131595134735107, "learning_rate": 5.862867371926922e-06, "loss": 0.7147, "step": 8979 }, { "epoch": 0.46150683523486485, "grad_norm": 1.0851927995681763, "learning_rate": 5.862047592511062e-06, "loss": 0.7201, "step": 8980 }, { "epoch": 0.4615582279782095, "grad_norm": 1.0080941915512085, "learning_rate": 5.861227789211423e-06, "loss": 0.7545, "step": 8981 }, { "epoch": 0.4616096207215541, "grad_norm": 1.176670789718628, "learning_rate": 5.860407962050714e-06, "loss": 0.7277, "step": 8982 }, { "epoch": 0.46166101346489874, "grad_norm": 1.1212631464004517, "learning_rate": 5.859588111051651e-06, "loss": 0.7134, "step": 8983 }, { "epoch": 0.4617124062082434, "grad_norm": 1.1416161060333252, "learning_rate": 5.858768236236949e-06, "loss": 0.7674, "step": 8984 }, { "epoch": 0.46176379895158803, "grad_norm": 1.0965406894683838, "learning_rate": 5.857948337629324e-06, "loss": 0.7633, "step": 8985 }, { "epoch": 0.4618151916949327, "grad_norm": 1.0567373037338257, "learning_rate": 5.85712841525149e-06, "loss": 0.7685, "step": 8986 }, { "epoch": 0.46186658443827733, "grad_norm": 1.1180740594863892, "learning_rate": 5.856308469126165e-06, "loss": 0.6777, "step": 8987 }, { "epoch": 0.461917977181622, "grad_norm": 1.0868412256240845, "learning_rate": 5.855488499276067e-06, "loss": 0.7214, "step": 8988 }, { "epoch": 0.4619693699249666, "grad_norm": 0.7879918813705444, "learning_rate": 5.854668505723911e-06, "loss": 0.6564, "step": 8989 }, { "epoch": 0.4620207626683112, "grad_norm": 1.0919828414916992, "learning_rate": 5.8538484884924195e-06, "loss": 0.708, "step": 8990 }, { "epoch": 0.46207215541165586, "grad_norm": 1.0887128114700317, "learning_rate": 5.853028447604309e-06, "loss": 0.736, "step": 8991 }, { "epoch": 0.4621235481550005, "grad_norm": 1.108296275138855, "learning_rate": 5.8522083830823e-06, "loss": 0.7599, "step": 8992 }, { "epoch": 0.46217494089834515, "grad_norm": 1.0775268077850342, "learning_rate": 5.851388294949116e-06, "loss": 0.7562, "step": 8993 }, { "epoch": 0.4622263336416898, "grad_norm": 1.0385046005249023, "learning_rate": 5.850568183227474e-06, "loss": 0.7003, "step": 8994 }, { "epoch": 0.46227772638503445, "grad_norm": 1.0174345970153809, "learning_rate": 5.849748047940098e-06, "loss": 0.7155, "step": 8995 }, { "epoch": 0.4623291191283791, "grad_norm": 1.1871230602264404, "learning_rate": 5.84892788910971e-06, "loss": 0.7598, "step": 8996 }, { "epoch": 0.4623805118717237, "grad_norm": 1.0645506381988525, "learning_rate": 5.8481077067590344e-06, "loss": 0.7223, "step": 8997 }, { "epoch": 0.46243190461506833, "grad_norm": 1.1522107124328613, "learning_rate": 5.847287500910794e-06, "loss": 0.7543, "step": 8998 }, { "epoch": 0.462483297358413, "grad_norm": 1.0858427286148071, "learning_rate": 5.846467271587712e-06, "loss": 0.7073, "step": 8999 }, { "epoch": 0.46253469010175763, "grad_norm": 0.9995402097702026, "learning_rate": 5.845647018812517e-06, "loss": 0.6965, "step": 9000 }, { "epoch": 0.4625860828451023, "grad_norm": 1.1073576211929321, "learning_rate": 5.844826742607932e-06, "loss": 0.7093, "step": 9001 }, { "epoch": 0.4626374755884469, "grad_norm": 1.0715878009796143, "learning_rate": 5.844006442996684e-06, "loss": 0.7633, "step": 9002 }, { "epoch": 0.46268886833179157, "grad_norm": 1.1467885971069336, "learning_rate": 5.843186120001502e-06, "loss": 0.8419, "step": 9003 }, { "epoch": 0.4627402610751362, "grad_norm": 0.6586687564849854, "learning_rate": 5.84236577364511e-06, "loss": 0.6796, "step": 9004 }, { "epoch": 0.4627916538184808, "grad_norm": 1.0261226892471313, "learning_rate": 5.841545403950241e-06, "loss": 0.7336, "step": 9005 }, { "epoch": 0.46284304656182546, "grad_norm": 1.125173568725586, "learning_rate": 5.840725010939621e-06, "loss": 0.6709, "step": 9006 }, { "epoch": 0.4628944393051701, "grad_norm": 1.0425140857696533, "learning_rate": 5.839904594635979e-06, "loss": 0.7213, "step": 9007 }, { "epoch": 0.46294583204851475, "grad_norm": 1.0032652616500854, "learning_rate": 5.8390841550620485e-06, "loss": 0.6975, "step": 9008 }, { "epoch": 0.4629972247918594, "grad_norm": 1.2033262252807617, "learning_rate": 5.838263692240557e-06, "loss": 0.7951, "step": 9009 }, { "epoch": 0.46304861753520404, "grad_norm": 1.072046160697937, "learning_rate": 5.837443206194239e-06, "loss": 0.7756, "step": 9010 }, { "epoch": 0.4631000102785487, "grad_norm": 1.1966664791107178, "learning_rate": 5.836622696945825e-06, "loss": 0.7884, "step": 9011 }, { "epoch": 0.4631514030218933, "grad_norm": 0.763951301574707, "learning_rate": 5.835802164518049e-06, "loss": 0.661, "step": 9012 }, { "epoch": 0.46320279576523793, "grad_norm": 1.0983941555023193, "learning_rate": 5.834981608933646e-06, "loss": 0.729, "step": 9013 }, { "epoch": 0.4632541885085826, "grad_norm": 1.105281949043274, "learning_rate": 5.8341610302153465e-06, "loss": 0.7541, "step": 9014 }, { "epoch": 0.4633055812519272, "grad_norm": 1.0642547607421875, "learning_rate": 5.8333404283858864e-06, "loss": 0.7125, "step": 9015 }, { "epoch": 0.46335697399527187, "grad_norm": 0.7126036882400513, "learning_rate": 5.832519803468003e-06, "loss": 0.6668, "step": 9016 }, { "epoch": 0.4634083667386165, "grad_norm": 0.7853831052780151, "learning_rate": 5.8316991554844325e-06, "loss": 0.7318, "step": 9017 }, { "epoch": 0.46345975948196116, "grad_norm": 1.0368157625198364, "learning_rate": 5.830878484457909e-06, "loss": 0.7225, "step": 9018 }, { "epoch": 0.4635111522253058, "grad_norm": 1.1008564233779907, "learning_rate": 5.830057790411173e-06, "loss": 0.7108, "step": 9019 }, { "epoch": 0.4635625449686504, "grad_norm": 1.0381215810775757, "learning_rate": 5.829237073366959e-06, "loss": 0.7306, "step": 9020 }, { "epoch": 0.46361393771199505, "grad_norm": 0.8310261964797974, "learning_rate": 5.828416333348009e-06, "loss": 0.6766, "step": 9021 }, { "epoch": 0.4636653304553397, "grad_norm": 1.0303864479064941, "learning_rate": 5.8275955703770615e-06, "loss": 0.6795, "step": 9022 }, { "epoch": 0.46371672319868434, "grad_norm": 1.105635643005371, "learning_rate": 5.826774784476855e-06, "loss": 0.7478, "step": 9023 }, { "epoch": 0.463768115942029, "grad_norm": 1.052188515663147, "learning_rate": 5.825953975670132e-06, "loss": 0.7188, "step": 9024 }, { "epoch": 0.46381950868537364, "grad_norm": 1.0696007013320923, "learning_rate": 5.825133143979633e-06, "loss": 0.736, "step": 9025 }, { "epoch": 0.4638709014287183, "grad_norm": 1.1537314653396606, "learning_rate": 5.824312289428099e-06, "loss": 0.7869, "step": 9026 }, { "epoch": 0.46392229417206293, "grad_norm": 1.0709136724472046, "learning_rate": 5.823491412038273e-06, "loss": 0.6675, "step": 9027 }, { "epoch": 0.4639736869154075, "grad_norm": 1.0840314626693726, "learning_rate": 5.8226705118329e-06, "loss": 0.7709, "step": 9028 }, { "epoch": 0.46402507965875217, "grad_norm": 1.1706689596176147, "learning_rate": 5.82184958883472e-06, "loss": 0.7383, "step": 9029 }, { "epoch": 0.4640764724020968, "grad_norm": 1.029422640800476, "learning_rate": 5.82102864306648e-06, "loss": 0.7412, "step": 9030 }, { "epoch": 0.46412786514544146, "grad_norm": 1.0164307355880737, "learning_rate": 5.820207674550924e-06, "loss": 0.7304, "step": 9031 }, { "epoch": 0.4641792578887861, "grad_norm": 1.068285584449768, "learning_rate": 5.819386683310801e-06, "loss": 0.7903, "step": 9032 }, { "epoch": 0.46423065063213076, "grad_norm": 0.9525502324104309, "learning_rate": 5.818565669368852e-06, "loss": 0.7017, "step": 9033 }, { "epoch": 0.4642820433754754, "grad_norm": 1.0377914905548096, "learning_rate": 5.817744632747826e-06, "loss": 0.7375, "step": 9034 }, { "epoch": 0.46433343611882, "grad_norm": 1.0358093976974487, "learning_rate": 5.816923573470472e-06, "loss": 0.7407, "step": 9035 }, { "epoch": 0.46438482886216464, "grad_norm": 0.7444321513175964, "learning_rate": 5.816102491559536e-06, "loss": 0.6619, "step": 9036 }, { "epoch": 0.4644362216055093, "grad_norm": 1.0119374990463257, "learning_rate": 5.815281387037769e-06, "loss": 0.6936, "step": 9037 }, { "epoch": 0.46448761434885394, "grad_norm": 1.0652567148208618, "learning_rate": 5.814460259927919e-06, "loss": 0.7545, "step": 9038 }, { "epoch": 0.4645390070921986, "grad_norm": 1.0876790285110474, "learning_rate": 5.8136391102527355e-06, "loss": 0.7528, "step": 9039 }, { "epoch": 0.46459039983554323, "grad_norm": 1.068233847618103, "learning_rate": 5.81281793803497e-06, "loss": 0.7577, "step": 9040 }, { "epoch": 0.4646417925788879, "grad_norm": 0.8206691741943359, "learning_rate": 5.811996743297375e-06, "loss": 0.656, "step": 9041 }, { "epoch": 0.4646931853222325, "grad_norm": 0.8031511306762695, "learning_rate": 5.811175526062699e-06, "loss": 0.6547, "step": 9042 }, { "epoch": 0.4647445780655771, "grad_norm": 1.1287580728530884, "learning_rate": 5.810354286353699e-06, "loss": 0.7211, "step": 9043 }, { "epoch": 0.46479597080892177, "grad_norm": 1.0637571811676025, "learning_rate": 5.809533024193124e-06, "loss": 0.7162, "step": 9044 }, { "epoch": 0.4648473635522664, "grad_norm": 1.0583562850952148, "learning_rate": 5.808711739603731e-06, "loss": 0.711, "step": 9045 }, { "epoch": 0.46489875629561106, "grad_norm": 1.0513478517532349, "learning_rate": 5.807890432608272e-06, "loss": 0.7366, "step": 9046 }, { "epoch": 0.4649501490389557, "grad_norm": 1.010457992553711, "learning_rate": 5.807069103229504e-06, "loss": 0.6823, "step": 9047 }, { "epoch": 0.46500154178230035, "grad_norm": 0.7982476353645325, "learning_rate": 5.806247751490182e-06, "loss": 0.6446, "step": 9048 }, { "epoch": 0.465052934525645, "grad_norm": 1.1567963361740112, "learning_rate": 5.805426377413061e-06, "loss": 0.772, "step": 9049 }, { "epoch": 0.4651043272689896, "grad_norm": 1.0834085941314697, "learning_rate": 5.8046049810208985e-06, "loss": 0.7238, "step": 9050 }, { "epoch": 0.46515572001233424, "grad_norm": 1.0637526512145996, "learning_rate": 5.803783562336452e-06, "loss": 0.7572, "step": 9051 }, { "epoch": 0.4652071127556789, "grad_norm": 1.0176827907562256, "learning_rate": 5.8029621213824806e-06, "loss": 0.7217, "step": 9052 }, { "epoch": 0.46525850549902353, "grad_norm": 1.1860554218292236, "learning_rate": 5.802140658181744e-06, "loss": 0.7254, "step": 9053 }, { "epoch": 0.4653098982423682, "grad_norm": 0.7953958511352539, "learning_rate": 5.801319172757e-06, "loss": 0.6777, "step": 9054 }, { "epoch": 0.4653612909857128, "grad_norm": 1.100940465927124, "learning_rate": 5.8004976651310064e-06, "loss": 0.7889, "step": 9055 }, { "epoch": 0.4654126837290575, "grad_norm": 1.034502387046814, "learning_rate": 5.799676135326527e-06, "loss": 0.7369, "step": 9056 }, { "epoch": 0.4654640764724021, "grad_norm": 1.1026217937469482, "learning_rate": 5.798854583366322e-06, "loss": 0.8042, "step": 9057 }, { "epoch": 0.4655154692157467, "grad_norm": 1.0479249954223633, "learning_rate": 5.798033009273154e-06, "loss": 0.7582, "step": 9058 }, { "epoch": 0.46556686195909136, "grad_norm": 1.0674561262130737, "learning_rate": 5.797211413069784e-06, "loss": 0.7607, "step": 9059 }, { "epoch": 0.465618254702436, "grad_norm": 1.0040827989578247, "learning_rate": 5.796389794778975e-06, "loss": 0.7594, "step": 9060 }, { "epoch": 0.46566964744578065, "grad_norm": 1.0610641241073608, "learning_rate": 5.7955681544234906e-06, "loss": 0.7622, "step": 9061 }, { "epoch": 0.4657210401891253, "grad_norm": 1.1866360902786255, "learning_rate": 5.794746492026097e-06, "loss": 0.7197, "step": 9062 }, { "epoch": 0.46577243293246995, "grad_norm": 1.1726354360580444, "learning_rate": 5.7939248076095575e-06, "loss": 0.7333, "step": 9063 }, { "epoch": 0.4658238256758146, "grad_norm": 1.1168599128723145, "learning_rate": 5.7931031011966385e-06, "loss": 0.7548, "step": 9064 }, { "epoch": 0.4658752184191592, "grad_norm": 1.0656801462173462, "learning_rate": 5.792281372810106e-06, "loss": 0.7487, "step": 9065 }, { "epoch": 0.46592661116250383, "grad_norm": 1.05821692943573, "learning_rate": 5.791459622472725e-06, "loss": 0.7262, "step": 9066 }, { "epoch": 0.4659780039058485, "grad_norm": 1.0438910722732544, "learning_rate": 5.790637850207265e-06, "loss": 0.7261, "step": 9067 }, { "epoch": 0.46602939664919313, "grad_norm": 1.0939925909042358, "learning_rate": 5.789816056036492e-06, "loss": 0.7369, "step": 9068 }, { "epoch": 0.4660807893925378, "grad_norm": 1.0153177976608276, "learning_rate": 5.788994239983176e-06, "loss": 0.75, "step": 9069 }, { "epoch": 0.4661321821358824, "grad_norm": 1.0503993034362793, "learning_rate": 5.788172402070086e-06, "loss": 0.7098, "step": 9070 }, { "epoch": 0.46618357487922707, "grad_norm": 1.096700668334961, "learning_rate": 5.78735054231999e-06, "loss": 0.7187, "step": 9071 }, { "epoch": 0.4662349676225717, "grad_norm": 1.0928943157196045, "learning_rate": 5.786528660755659e-06, "loss": 0.8071, "step": 9072 }, { "epoch": 0.4662863603659163, "grad_norm": 1.1031432151794434, "learning_rate": 5.7857067573998675e-06, "loss": 0.7717, "step": 9073 }, { "epoch": 0.46633775310926096, "grad_norm": 1.0703964233398438, "learning_rate": 5.784884832275382e-06, "loss": 0.7649, "step": 9074 }, { "epoch": 0.4663891458526056, "grad_norm": 1.1120059490203857, "learning_rate": 5.784062885404978e-06, "loss": 0.7354, "step": 9075 }, { "epoch": 0.46644053859595025, "grad_norm": 1.059704303741455, "learning_rate": 5.783240916811426e-06, "loss": 0.6959, "step": 9076 }, { "epoch": 0.4664919313392949, "grad_norm": 1.087563157081604, "learning_rate": 5.782418926517501e-06, "loss": 0.692, "step": 9077 }, { "epoch": 0.46654332408263954, "grad_norm": 0.8931577801704407, "learning_rate": 5.781596914545977e-06, "loss": 0.6437, "step": 9078 }, { "epoch": 0.4665947168259842, "grad_norm": 1.1860404014587402, "learning_rate": 5.780774880919626e-06, "loss": 0.7631, "step": 9079 }, { "epoch": 0.46664610956932884, "grad_norm": 1.0655113458633423, "learning_rate": 5.779952825661228e-06, "loss": 0.6939, "step": 9080 }, { "epoch": 0.46669750231267343, "grad_norm": 1.1033114194869995, "learning_rate": 5.779130748793553e-06, "loss": 0.787, "step": 9081 }, { "epoch": 0.4667488950560181, "grad_norm": 0.7280256748199463, "learning_rate": 5.778308650339382e-06, "loss": 0.6181, "step": 9082 }, { "epoch": 0.4668002877993627, "grad_norm": 1.063771367073059, "learning_rate": 5.77748653032149e-06, "loss": 0.6855, "step": 9083 }, { "epoch": 0.46685168054270737, "grad_norm": 1.1214535236358643, "learning_rate": 5.776664388762654e-06, "loss": 0.7276, "step": 9084 }, { "epoch": 0.466903073286052, "grad_norm": 1.08709716796875, "learning_rate": 5.775842225685654e-06, "loss": 0.7217, "step": 9085 }, { "epoch": 0.46695446602939666, "grad_norm": 1.0346719026565552, "learning_rate": 5.775020041113268e-06, "loss": 0.7315, "step": 9086 }, { "epoch": 0.4670058587727413, "grad_norm": 1.0865516662597656, "learning_rate": 5.774197835068273e-06, "loss": 0.7528, "step": 9087 }, { "epoch": 0.4670572515160859, "grad_norm": 1.0703585147857666, "learning_rate": 5.773375607573451e-06, "loss": 0.7563, "step": 9088 }, { "epoch": 0.46710864425943055, "grad_norm": 1.161829948425293, "learning_rate": 5.7725533586515835e-06, "loss": 0.7658, "step": 9089 }, { "epoch": 0.4671600370027752, "grad_norm": 1.0884894132614136, "learning_rate": 5.7717310883254515e-06, "loss": 0.7481, "step": 9090 }, { "epoch": 0.46721142974611984, "grad_norm": 1.3401157855987549, "learning_rate": 5.7709087966178345e-06, "loss": 0.7518, "step": 9091 }, { "epoch": 0.4672628224894645, "grad_norm": 1.1092058420181274, "learning_rate": 5.770086483551516e-06, "loss": 0.758, "step": 9092 }, { "epoch": 0.46731421523280914, "grad_norm": 1.0136786699295044, "learning_rate": 5.769264149149279e-06, "loss": 0.6851, "step": 9093 }, { "epoch": 0.4673656079761538, "grad_norm": 1.1079891920089722, "learning_rate": 5.768441793433908e-06, "loss": 0.7544, "step": 9094 }, { "epoch": 0.46741700071949843, "grad_norm": 1.0423799753189087, "learning_rate": 5.767619416428185e-06, "loss": 0.7469, "step": 9095 }, { "epoch": 0.467468393462843, "grad_norm": 1.047964096069336, "learning_rate": 5.766797018154896e-06, "loss": 0.7103, "step": 9096 }, { "epoch": 0.46751978620618767, "grad_norm": 1.020954966545105, "learning_rate": 5.765974598636826e-06, "loss": 0.7399, "step": 9097 }, { "epoch": 0.4675711789495323, "grad_norm": 1.1197277307510376, "learning_rate": 5.765152157896762e-06, "loss": 0.7711, "step": 9098 }, { "epoch": 0.46762257169287696, "grad_norm": 0.790625274181366, "learning_rate": 5.764329695957489e-06, "loss": 0.6927, "step": 9099 }, { "epoch": 0.4676739644362216, "grad_norm": 1.081231951713562, "learning_rate": 5.763507212841793e-06, "loss": 0.7646, "step": 9100 }, { "epoch": 0.46772535717956626, "grad_norm": 1.1325751543045044, "learning_rate": 5.762684708572464e-06, "loss": 0.7005, "step": 9101 }, { "epoch": 0.4677767499229109, "grad_norm": 1.1213966608047485, "learning_rate": 5.761862183172288e-06, "loss": 0.7804, "step": 9102 }, { "epoch": 0.4678281426662555, "grad_norm": 1.4390102624893188, "learning_rate": 5.761039636664055e-06, "loss": 0.6872, "step": 9103 }, { "epoch": 0.46787953540960014, "grad_norm": 1.0561023950576782, "learning_rate": 5.760217069070555e-06, "loss": 0.7217, "step": 9104 }, { "epoch": 0.4679309281529448, "grad_norm": 1.0902947187423706, "learning_rate": 5.7593944804145775e-06, "loss": 0.7761, "step": 9105 }, { "epoch": 0.46798232089628944, "grad_norm": 0.7485927939414978, "learning_rate": 5.758571870718912e-06, "loss": 0.6497, "step": 9106 }, { "epoch": 0.4680337136396341, "grad_norm": 1.1094627380371094, "learning_rate": 5.757749240006352e-06, "loss": 0.7384, "step": 9107 }, { "epoch": 0.46808510638297873, "grad_norm": 1.058859944343567, "learning_rate": 5.756926588299686e-06, "loss": 0.6883, "step": 9108 }, { "epoch": 0.4681364991263234, "grad_norm": 1.0982186794281006, "learning_rate": 5.756103915621709e-06, "loss": 0.7394, "step": 9109 }, { "epoch": 0.468187891869668, "grad_norm": 1.0921043157577515, "learning_rate": 5.755281221995212e-06, "loss": 0.7453, "step": 9110 }, { "epoch": 0.4682392846130126, "grad_norm": 1.0325762033462524, "learning_rate": 5.754458507442989e-06, "loss": 0.7877, "step": 9111 }, { "epoch": 0.46829067735635727, "grad_norm": 1.0677783489227295, "learning_rate": 5.753635771987834e-06, "loss": 0.7374, "step": 9112 }, { "epoch": 0.4683420700997019, "grad_norm": 0.9708812832832336, "learning_rate": 5.752813015652543e-06, "loss": 0.7168, "step": 9113 }, { "epoch": 0.46839346284304656, "grad_norm": 1.0660696029663086, "learning_rate": 5.751990238459909e-06, "loss": 0.7017, "step": 9114 }, { "epoch": 0.4684448555863912, "grad_norm": 0.7915019392967224, "learning_rate": 5.75116744043273e-06, "loss": 0.6398, "step": 9115 }, { "epoch": 0.46849624832973585, "grad_norm": 1.0071666240692139, "learning_rate": 5.7503446215937995e-06, "loss": 0.7369, "step": 9116 }, { "epoch": 0.4685476410730805, "grad_norm": 1.0285992622375488, "learning_rate": 5.749521781965917e-06, "loss": 0.7048, "step": 9117 }, { "epoch": 0.46859903381642515, "grad_norm": 1.071171760559082, "learning_rate": 5.7486989215718806e-06, "loss": 0.7778, "step": 9118 }, { "epoch": 0.46865042655976974, "grad_norm": 1.0935696363449097, "learning_rate": 5.747876040434484e-06, "loss": 0.7563, "step": 9119 }, { "epoch": 0.4687018193031144, "grad_norm": 0.831864058971405, "learning_rate": 5.747053138576529e-06, "loss": 0.6895, "step": 9120 }, { "epoch": 0.46875321204645903, "grad_norm": 0.7572145462036133, "learning_rate": 5.746230216020815e-06, "loss": 0.7595, "step": 9121 }, { "epoch": 0.4688046047898037, "grad_norm": 1.1116102933883667, "learning_rate": 5.745407272790142e-06, "loss": 0.7687, "step": 9122 }, { "epoch": 0.4688559975331483, "grad_norm": 1.0225255489349365, "learning_rate": 5.744584308907308e-06, "loss": 0.7255, "step": 9123 }, { "epoch": 0.468907390276493, "grad_norm": 0.6658361554145813, "learning_rate": 5.743761324395116e-06, "loss": 0.6835, "step": 9124 }, { "epoch": 0.4689587830198376, "grad_norm": 1.028028964996338, "learning_rate": 5.742938319276368e-06, "loss": 0.7458, "step": 9125 }, { "epoch": 0.4690101757631822, "grad_norm": 1.0913782119750977, "learning_rate": 5.742115293573866e-06, "loss": 0.7938, "step": 9126 }, { "epoch": 0.46906156850652686, "grad_norm": 1.0903044939041138, "learning_rate": 5.7412922473104095e-06, "loss": 0.6991, "step": 9127 }, { "epoch": 0.4691129612498715, "grad_norm": 1.05997633934021, "learning_rate": 5.740469180508804e-06, "loss": 0.7848, "step": 9128 }, { "epoch": 0.46916435399321615, "grad_norm": 1.095229983329773, "learning_rate": 5.739646093191854e-06, "loss": 0.6849, "step": 9129 }, { "epoch": 0.4692157467365608, "grad_norm": 1.0129516124725342, "learning_rate": 5.738822985382364e-06, "loss": 0.7122, "step": 9130 }, { "epoch": 0.46926713947990545, "grad_norm": 1.0196632146835327, "learning_rate": 5.737999857103137e-06, "loss": 0.7535, "step": 9131 }, { "epoch": 0.4693185322232501, "grad_norm": 1.0907652378082275, "learning_rate": 5.737176708376979e-06, "loss": 0.7664, "step": 9132 }, { "epoch": 0.46936992496659474, "grad_norm": 1.0542631149291992, "learning_rate": 5.7363535392266965e-06, "loss": 0.7274, "step": 9133 }, { "epoch": 0.46942131770993933, "grad_norm": 1.0437034368515015, "learning_rate": 5.735530349675098e-06, "loss": 0.7446, "step": 9134 }, { "epoch": 0.469472710453284, "grad_norm": 1.0687042474746704, "learning_rate": 5.734707139744988e-06, "loss": 0.709, "step": 9135 }, { "epoch": 0.46952410319662863, "grad_norm": 1.1929371356964111, "learning_rate": 5.733883909459175e-06, "loss": 0.7948, "step": 9136 }, { "epoch": 0.4695754959399733, "grad_norm": 1.0236198902130127, "learning_rate": 5.7330606588404655e-06, "loss": 0.7457, "step": 9137 }, { "epoch": 0.4696268886833179, "grad_norm": 1.050380825996399, "learning_rate": 5.7322373879116736e-06, "loss": 0.7573, "step": 9138 }, { "epoch": 0.46967828142666257, "grad_norm": 1.04297935962677, "learning_rate": 5.731414096695603e-06, "loss": 0.6499, "step": 9139 }, { "epoch": 0.4697296741700072, "grad_norm": 1.0867043733596802, "learning_rate": 5.7305907852150665e-06, "loss": 0.7553, "step": 9140 }, { "epoch": 0.4697810669133518, "grad_norm": 0.7333614826202393, "learning_rate": 5.729767453492876e-06, "loss": 0.6558, "step": 9141 }, { "epoch": 0.46983245965669646, "grad_norm": 1.0604865550994873, "learning_rate": 5.728944101551841e-06, "loss": 0.7394, "step": 9142 }, { "epoch": 0.4698838524000411, "grad_norm": 1.0322959423065186, "learning_rate": 5.728120729414771e-06, "loss": 0.7742, "step": 9143 }, { "epoch": 0.46993524514338575, "grad_norm": 1.0673558712005615, "learning_rate": 5.727297337104481e-06, "loss": 0.7635, "step": 9144 }, { "epoch": 0.4699866378867304, "grad_norm": 1.0791901350021362, "learning_rate": 5.726473924643783e-06, "loss": 0.7492, "step": 9145 }, { "epoch": 0.47003803063007504, "grad_norm": 1.056095004081726, "learning_rate": 5.725650492055491e-06, "loss": 0.7347, "step": 9146 }, { "epoch": 0.4700894233734197, "grad_norm": 1.1589689254760742, "learning_rate": 5.7248270393624185e-06, "loss": 0.7764, "step": 9147 }, { "epoch": 0.47014081611676434, "grad_norm": 1.053005576133728, "learning_rate": 5.724003566587379e-06, "loss": 0.7065, "step": 9148 }, { "epoch": 0.47019220886010893, "grad_norm": 1.0851510763168335, "learning_rate": 5.723180073753191e-06, "loss": 0.7094, "step": 9149 }, { "epoch": 0.4702436016034536, "grad_norm": 1.0544852018356323, "learning_rate": 5.7223565608826655e-06, "loss": 0.7179, "step": 9150 }, { "epoch": 0.4702949943467982, "grad_norm": 1.0030632019042969, "learning_rate": 5.72153302799862e-06, "loss": 0.727, "step": 9151 }, { "epoch": 0.47034638709014287, "grad_norm": 1.0418330430984497, "learning_rate": 5.720709475123874e-06, "loss": 0.7793, "step": 9152 }, { "epoch": 0.4703977798334875, "grad_norm": 1.0173323154449463, "learning_rate": 5.719885902281241e-06, "loss": 0.7236, "step": 9153 }, { "epoch": 0.47044917257683216, "grad_norm": 1.0436437129974365, "learning_rate": 5.71906230949354e-06, "loss": 0.6893, "step": 9154 }, { "epoch": 0.4705005653201768, "grad_norm": 1.035166621208191, "learning_rate": 5.718238696783592e-06, "loss": 0.7542, "step": 9155 }, { "epoch": 0.47055195806352146, "grad_norm": 1.0700384378433228, "learning_rate": 5.717415064174212e-06, "loss": 0.7123, "step": 9156 }, { "epoch": 0.47060335080686605, "grad_norm": 1.031958818435669, "learning_rate": 5.716591411688222e-06, "loss": 0.7059, "step": 9157 }, { "epoch": 0.4706547435502107, "grad_norm": 1.0683881044387817, "learning_rate": 5.715767739348441e-06, "loss": 0.7741, "step": 9158 }, { "epoch": 0.47070613629355534, "grad_norm": 1.0181117057800293, "learning_rate": 5.714944047177689e-06, "loss": 0.7255, "step": 9159 }, { "epoch": 0.4707575290369, "grad_norm": 1.0431150197982788, "learning_rate": 5.714120335198789e-06, "loss": 0.7045, "step": 9160 }, { "epoch": 0.47080892178024464, "grad_norm": 1.02367103099823, "learning_rate": 5.713296603434561e-06, "loss": 0.7359, "step": 9161 }, { "epoch": 0.4708603145235893, "grad_norm": 1.123472809791565, "learning_rate": 5.712472851907828e-06, "loss": 0.8097, "step": 9162 }, { "epoch": 0.47091170726693393, "grad_norm": 1.0098596811294556, "learning_rate": 5.711649080641411e-06, "loss": 0.7141, "step": 9163 }, { "epoch": 0.4709631000102785, "grad_norm": 1.3075313568115234, "learning_rate": 5.710825289658137e-06, "loss": 0.7292, "step": 9164 }, { "epoch": 0.47101449275362317, "grad_norm": 0.9928123354911804, "learning_rate": 5.710001478980825e-06, "loss": 0.7443, "step": 9165 }, { "epoch": 0.4710658854969678, "grad_norm": 1.088578701019287, "learning_rate": 5.709177648632305e-06, "loss": 0.7477, "step": 9166 }, { "epoch": 0.47111727824031246, "grad_norm": 0.9493699073791504, "learning_rate": 5.708353798635397e-06, "loss": 0.6709, "step": 9167 }, { "epoch": 0.4711686709836571, "grad_norm": 1.180959939956665, "learning_rate": 5.7075299290129296e-06, "loss": 0.7968, "step": 9168 }, { "epoch": 0.47122006372700176, "grad_norm": 1.1137547492980957, "learning_rate": 5.706706039787726e-06, "loss": 0.6983, "step": 9169 }, { "epoch": 0.4712714564703464, "grad_norm": 1.0846682786941528, "learning_rate": 5.705882130982616e-06, "loss": 0.7159, "step": 9170 }, { "epoch": 0.47132284921369105, "grad_norm": 0.9548245072364807, "learning_rate": 5.705058202620426e-06, "loss": 0.7302, "step": 9171 }, { "epoch": 0.47137424195703564, "grad_norm": 0.7919398546218872, "learning_rate": 5.7042342547239806e-06, "loss": 0.6784, "step": 9172 }, { "epoch": 0.4714256347003803, "grad_norm": 0.7552742958068848, "learning_rate": 5.703410287316111e-06, "loss": 0.6712, "step": 9173 }, { "epoch": 0.47147702744372494, "grad_norm": 1.0433566570281982, "learning_rate": 5.702586300419645e-06, "loss": 0.716, "step": 9174 }, { "epoch": 0.4715284201870696, "grad_norm": 1.120636224746704, "learning_rate": 5.701762294057411e-06, "loss": 0.7375, "step": 9175 }, { "epoch": 0.47157981293041423, "grad_norm": 1.1099270582199097, "learning_rate": 5.700938268252243e-06, "loss": 0.8098, "step": 9176 }, { "epoch": 0.4716312056737589, "grad_norm": 0.7387608289718628, "learning_rate": 5.700114223026965e-06, "loss": 0.6638, "step": 9177 }, { "epoch": 0.4716825984171035, "grad_norm": 1.0869698524475098, "learning_rate": 5.699290158404412e-06, "loss": 0.7451, "step": 9178 }, { "epoch": 0.4717339911604481, "grad_norm": 0.7070497870445251, "learning_rate": 5.698466074407416e-06, "loss": 0.6838, "step": 9179 }, { "epoch": 0.47178538390379277, "grad_norm": 1.0209150314331055, "learning_rate": 5.697641971058806e-06, "loss": 0.67, "step": 9180 }, { "epoch": 0.4718367766471374, "grad_norm": 1.1599454879760742, "learning_rate": 5.696817848381417e-06, "loss": 0.6882, "step": 9181 }, { "epoch": 0.47188816939048206, "grad_norm": 0.7231242656707764, "learning_rate": 5.695993706398081e-06, "loss": 0.675, "step": 9182 }, { "epoch": 0.4719395621338267, "grad_norm": 1.169425129890442, "learning_rate": 5.69516954513163e-06, "loss": 0.756, "step": 9183 }, { "epoch": 0.47199095487717135, "grad_norm": 1.0638024806976318, "learning_rate": 5.6943453646049e-06, "loss": 0.7265, "step": 9184 }, { "epoch": 0.472042347620516, "grad_norm": 0.7346010208129883, "learning_rate": 5.693521164840725e-06, "loss": 0.6943, "step": 9185 }, { "epoch": 0.47209374036386065, "grad_norm": 1.1254210472106934, "learning_rate": 5.6926969458619415e-06, "loss": 0.7356, "step": 9186 }, { "epoch": 0.47214513310720524, "grad_norm": 1.079274296760559, "learning_rate": 5.691872707691386e-06, "loss": 0.7296, "step": 9187 }, { "epoch": 0.4721965258505499, "grad_norm": 1.1221206188201904, "learning_rate": 5.69104845035189e-06, "loss": 0.7215, "step": 9188 }, { "epoch": 0.47224791859389453, "grad_norm": 1.0603994131088257, "learning_rate": 5.690224173866294e-06, "loss": 0.7675, "step": 9189 }, { "epoch": 0.4722993113372392, "grad_norm": 1.0694677829742432, "learning_rate": 5.689399878257437e-06, "loss": 0.7758, "step": 9190 }, { "epoch": 0.4723507040805838, "grad_norm": 0.7089584469795227, "learning_rate": 5.688575563548151e-06, "loss": 0.6788, "step": 9191 }, { "epoch": 0.4724020968239285, "grad_norm": 0.7196356654167175, "learning_rate": 5.68775122976128e-06, "loss": 0.6584, "step": 9192 }, { "epoch": 0.4724534895672731, "grad_norm": 1.0938193798065186, "learning_rate": 5.686926876919659e-06, "loss": 0.7301, "step": 9193 }, { "epoch": 0.4725048823106177, "grad_norm": 1.1049164533615112, "learning_rate": 5.686102505046129e-06, "loss": 0.7275, "step": 9194 }, { "epoch": 0.47255627505396236, "grad_norm": 0.9249621629714966, "learning_rate": 5.68527811416353e-06, "loss": 0.6617, "step": 9195 }, { "epoch": 0.472607667797307, "grad_norm": 1.0655007362365723, "learning_rate": 5.684453704294703e-06, "loss": 0.7332, "step": 9196 }, { "epoch": 0.47265906054065165, "grad_norm": 1.0716074705123901, "learning_rate": 5.683629275462489e-06, "loss": 0.7527, "step": 9197 }, { "epoch": 0.4727104532839963, "grad_norm": 0.9782666563987732, "learning_rate": 5.6828048276897295e-06, "loss": 0.7065, "step": 9198 }, { "epoch": 0.47276184602734095, "grad_norm": 0.6938285827636719, "learning_rate": 5.681980360999264e-06, "loss": 0.6788, "step": 9199 }, { "epoch": 0.4728132387706856, "grad_norm": 1.1836494207382202, "learning_rate": 5.681155875413938e-06, "loss": 0.7248, "step": 9200 }, { "epoch": 0.47286463151403024, "grad_norm": 1.1004406213760376, "learning_rate": 5.680331370956593e-06, "loss": 0.7521, "step": 9201 }, { "epoch": 0.47291602425737483, "grad_norm": 1.032076120376587, "learning_rate": 5.6795068476500745e-06, "loss": 0.7492, "step": 9202 }, { "epoch": 0.4729674170007195, "grad_norm": 1.1093873977661133, "learning_rate": 5.6786823055172254e-06, "loss": 0.7914, "step": 9203 }, { "epoch": 0.47301880974406413, "grad_norm": 1.1851825714111328, "learning_rate": 5.677857744580889e-06, "loss": 0.8116, "step": 9204 }, { "epoch": 0.4730702024874088, "grad_norm": 0.9952714443206787, "learning_rate": 5.677033164863912e-06, "loss": 0.7412, "step": 9205 }, { "epoch": 0.4731215952307534, "grad_norm": 1.1736841201782227, "learning_rate": 5.676208566389141e-06, "loss": 0.7375, "step": 9206 }, { "epoch": 0.47317298797409807, "grad_norm": 1.0979254245758057, "learning_rate": 5.67538394917942e-06, "loss": 0.7151, "step": 9207 }, { "epoch": 0.4732243807174427, "grad_norm": 1.0516613721847534, "learning_rate": 5.674559313257598e-06, "loss": 0.737, "step": 9208 }, { "epoch": 0.47327577346078736, "grad_norm": 1.0974156856536865, "learning_rate": 5.673734658646521e-06, "loss": 0.7941, "step": 9209 }, { "epoch": 0.47332716620413195, "grad_norm": 1.0812278985977173, "learning_rate": 5.6729099853690375e-06, "loss": 0.7458, "step": 9210 }, { "epoch": 0.4733785589474766, "grad_norm": 1.0341105461120605, "learning_rate": 5.672085293447994e-06, "loss": 0.7593, "step": 9211 }, { "epoch": 0.47342995169082125, "grad_norm": 0.966256856918335, "learning_rate": 5.671260582906242e-06, "loss": 0.6602, "step": 9212 }, { "epoch": 0.4734813444341659, "grad_norm": 0.6759054064750671, "learning_rate": 5.670435853766628e-06, "loss": 0.6802, "step": 9213 }, { "epoch": 0.47353273717751054, "grad_norm": 1.0618385076522827, "learning_rate": 5.669611106052003e-06, "loss": 0.7326, "step": 9214 }, { "epoch": 0.4735841299208552, "grad_norm": 1.0748823881149292, "learning_rate": 5.668786339785219e-06, "loss": 0.7816, "step": 9215 }, { "epoch": 0.47363552266419984, "grad_norm": 1.0557425022125244, "learning_rate": 5.667961554989124e-06, "loss": 0.7246, "step": 9216 }, { "epoch": 0.47368691540754443, "grad_norm": 1.0283653736114502, "learning_rate": 5.6671367516865705e-06, "loss": 0.7167, "step": 9217 }, { "epoch": 0.4737383081508891, "grad_norm": 1.0345820188522339, "learning_rate": 5.666311929900412e-06, "loss": 0.7196, "step": 9218 }, { "epoch": 0.4737897008942337, "grad_norm": 0.7294495105743408, "learning_rate": 5.665487089653501e-06, "loss": 0.6806, "step": 9219 }, { "epoch": 0.47384109363757837, "grad_norm": 1.0568950176239014, "learning_rate": 5.664662230968686e-06, "loss": 0.7069, "step": 9220 }, { "epoch": 0.473892486380923, "grad_norm": 1.0549429655075073, "learning_rate": 5.663837353868826e-06, "loss": 0.6943, "step": 9221 }, { "epoch": 0.47394387912426766, "grad_norm": 0.826448917388916, "learning_rate": 5.663012458376771e-06, "loss": 0.6993, "step": 9222 }, { "epoch": 0.4739952718676123, "grad_norm": 0.8376103639602661, "learning_rate": 5.6621875445153775e-06, "loss": 0.6577, "step": 9223 }, { "epoch": 0.47404666461095696, "grad_norm": 1.0684700012207031, "learning_rate": 5.661362612307499e-06, "loss": 0.7469, "step": 9224 }, { "epoch": 0.47409805735430155, "grad_norm": 1.058569073677063, "learning_rate": 5.660537661775992e-06, "loss": 0.7842, "step": 9225 }, { "epoch": 0.4741494500976462, "grad_norm": 1.0559556484222412, "learning_rate": 5.659712692943712e-06, "loss": 0.7321, "step": 9226 }, { "epoch": 0.47420084284099084, "grad_norm": 1.1208834648132324, "learning_rate": 5.658887705833517e-06, "loss": 0.7765, "step": 9227 }, { "epoch": 0.4742522355843355, "grad_norm": 1.0904886722564697, "learning_rate": 5.658062700468261e-06, "loss": 0.7011, "step": 9228 }, { "epoch": 0.47430362832768014, "grad_norm": 1.0916576385498047, "learning_rate": 5.657237676870805e-06, "loss": 0.7474, "step": 9229 }, { "epoch": 0.4743550210710248, "grad_norm": 1.088936686515808, "learning_rate": 5.6564126350640035e-06, "loss": 0.7478, "step": 9230 }, { "epoch": 0.47440641381436943, "grad_norm": 1.0974252223968506, "learning_rate": 5.655587575070719e-06, "loss": 0.7374, "step": 9231 }, { "epoch": 0.474457806557714, "grad_norm": 1.0085128545761108, "learning_rate": 5.654762496913807e-06, "loss": 0.6862, "step": 9232 }, { "epoch": 0.47450919930105867, "grad_norm": 1.082457184791565, "learning_rate": 5.653937400616129e-06, "loss": 0.7711, "step": 9233 }, { "epoch": 0.4745605920444033, "grad_norm": 1.0591022968292236, "learning_rate": 5.653112286200543e-06, "loss": 0.7291, "step": 9234 }, { "epoch": 0.47461198478774796, "grad_norm": 1.1501758098602295, "learning_rate": 5.65228715368991e-06, "loss": 0.757, "step": 9235 }, { "epoch": 0.4746633775310926, "grad_norm": 1.012110948562622, "learning_rate": 5.651462003107093e-06, "loss": 0.7252, "step": 9236 }, { "epoch": 0.47471477027443726, "grad_norm": 1.1001081466674805, "learning_rate": 5.650636834474953e-06, "loss": 0.7199, "step": 9237 }, { "epoch": 0.4747661630177819, "grad_norm": 1.1054781675338745, "learning_rate": 5.64981164781635e-06, "loss": 0.7482, "step": 9238 }, { "epoch": 0.47481755576112655, "grad_norm": 1.0530426502227783, "learning_rate": 5.6489864431541495e-06, "loss": 0.7367, "step": 9239 }, { "epoch": 0.47486894850447114, "grad_norm": 1.0405614376068115, "learning_rate": 5.648161220511212e-06, "loss": 0.7247, "step": 9240 }, { "epoch": 0.4749203412478158, "grad_norm": 1.0769857168197632, "learning_rate": 5.647335979910402e-06, "loss": 0.7116, "step": 9241 }, { "epoch": 0.47497173399116044, "grad_norm": 1.116463541984558, "learning_rate": 5.646510721374584e-06, "loss": 0.7222, "step": 9242 }, { "epoch": 0.4750231267345051, "grad_norm": 1.0808955430984497, "learning_rate": 5.645685444926622e-06, "loss": 0.7276, "step": 9243 }, { "epoch": 0.47507451947784973, "grad_norm": 1.027607798576355, "learning_rate": 5.644860150589379e-06, "loss": 0.7095, "step": 9244 }, { "epoch": 0.4751259122211944, "grad_norm": 1.1123250722885132, "learning_rate": 5.644034838385724e-06, "loss": 0.7638, "step": 9245 }, { "epoch": 0.475177304964539, "grad_norm": 0.7411683797836304, "learning_rate": 5.643209508338521e-06, "loss": 0.6575, "step": 9246 }, { "epoch": 0.4752286977078837, "grad_norm": 1.021462082862854, "learning_rate": 5.642384160470635e-06, "loss": 0.7061, "step": 9247 }, { "epoch": 0.47528009045122827, "grad_norm": 0.853138267993927, "learning_rate": 5.641558794804938e-06, "loss": 0.6699, "step": 9248 }, { "epoch": 0.4753314831945729, "grad_norm": 1.188086986541748, "learning_rate": 5.640733411364292e-06, "loss": 0.7402, "step": 9249 }, { "epoch": 0.47538287593791756, "grad_norm": 1.187341570854187, "learning_rate": 5.639908010171568e-06, "loss": 0.7388, "step": 9250 }, { "epoch": 0.4754342686812622, "grad_norm": 1.1461656093597412, "learning_rate": 5.639082591249635e-06, "loss": 0.8267, "step": 9251 }, { "epoch": 0.47548566142460685, "grad_norm": 1.0076953172683716, "learning_rate": 5.63825715462136e-06, "loss": 0.7617, "step": 9252 }, { "epoch": 0.4755370541679515, "grad_norm": 1.0705081224441528, "learning_rate": 5.637431700309613e-06, "loss": 0.7074, "step": 9253 }, { "epoch": 0.47558844691129615, "grad_norm": 0.9094269871711731, "learning_rate": 5.636606228337264e-06, "loss": 0.6961, "step": 9254 }, { "epoch": 0.47563983965464074, "grad_norm": 1.1163562536239624, "learning_rate": 5.635780738727183e-06, "loss": 0.7432, "step": 9255 }, { "epoch": 0.4756912323979854, "grad_norm": 1.0883206129074097, "learning_rate": 5.6349552315022424e-06, "loss": 0.7015, "step": 9256 }, { "epoch": 0.47574262514133003, "grad_norm": 1.1945425271987915, "learning_rate": 5.6341297066853114e-06, "loss": 0.799, "step": 9257 }, { "epoch": 0.4757940178846747, "grad_norm": 0.6965017318725586, "learning_rate": 5.6333041642992646e-06, "loss": 0.6654, "step": 9258 }, { "epoch": 0.4758454106280193, "grad_norm": 1.1033636331558228, "learning_rate": 5.632478604366973e-06, "loss": 0.7533, "step": 9259 }, { "epoch": 0.475896803371364, "grad_norm": 0.885718822479248, "learning_rate": 5.631653026911308e-06, "loss": 0.7126, "step": 9260 }, { "epoch": 0.4759481961147086, "grad_norm": 1.0306239128112793, "learning_rate": 5.630827431955146e-06, "loss": 0.7212, "step": 9261 }, { "epoch": 0.47599958885805327, "grad_norm": 1.0844796895980835, "learning_rate": 5.630001819521358e-06, "loss": 0.7433, "step": 9262 }, { "epoch": 0.47605098160139786, "grad_norm": 1.0109909772872925, "learning_rate": 5.62917618963282e-06, "loss": 0.7381, "step": 9263 }, { "epoch": 0.4761023743447425, "grad_norm": 1.066633939743042, "learning_rate": 5.628350542312407e-06, "loss": 0.7288, "step": 9264 }, { "epoch": 0.47615376708808715, "grad_norm": 1.0770617723464966, "learning_rate": 5.62752487758299e-06, "loss": 0.6651, "step": 9265 }, { "epoch": 0.4762051598314318, "grad_norm": 1.0272241830825806, "learning_rate": 5.6266991954674525e-06, "loss": 0.702, "step": 9266 }, { "epoch": 0.47625655257477645, "grad_norm": 1.0983392000198364, "learning_rate": 5.625873495988663e-06, "loss": 0.7202, "step": 9267 }, { "epoch": 0.4763079453181211, "grad_norm": 1.1578168869018555, "learning_rate": 5.625047779169503e-06, "loss": 0.7339, "step": 9268 }, { "epoch": 0.47635933806146574, "grad_norm": 0.8839200735092163, "learning_rate": 5.62422204503285e-06, "loss": 0.6894, "step": 9269 }, { "epoch": 0.47641073080481033, "grad_norm": 1.0937029123306274, "learning_rate": 5.623396293601578e-06, "loss": 0.6823, "step": 9270 }, { "epoch": 0.476462123548155, "grad_norm": 1.1051486730575562, "learning_rate": 5.622570524898569e-06, "loss": 0.7042, "step": 9271 }, { "epoch": 0.47651351629149963, "grad_norm": 1.0830765962600708, "learning_rate": 5.621744738946699e-06, "loss": 0.7696, "step": 9272 }, { "epoch": 0.4765649090348443, "grad_norm": 1.2515655755996704, "learning_rate": 5.620918935768849e-06, "loss": 0.7592, "step": 9273 }, { "epoch": 0.4766163017781889, "grad_norm": 1.042804479598999, "learning_rate": 5.6200931153878965e-06, "loss": 0.7512, "step": 9274 }, { "epoch": 0.47666769452153357, "grad_norm": 1.1719958782196045, "learning_rate": 5.619267277826724e-06, "loss": 0.7555, "step": 9275 }, { "epoch": 0.4767190872648782, "grad_norm": 1.0380206108093262, "learning_rate": 5.61844142310821e-06, "loss": 0.7093, "step": 9276 }, { "epoch": 0.47677048000822286, "grad_norm": 1.0361785888671875, "learning_rate": 5.617615551255235e-06, "loss": 0.7698, "step": 9277 }, { "epoch": 0.47682187275156745, "grad_norm": 1.1024243831634521, "learning_rate": 5.616789662290684e-06, "loss": 0.7959, "step": 9278 }, { "epoch": 0.4768732654949121, "grad_norm": 1.0970488786697388, "learning_rate": 5.6159637562374355e-06, "loss": 0.7563, "step": 9279 }, { "epoch": 0.47692465823825675, "grad_norm": 0.6821733713150024, "learning_rate": 5.615137833118375e-06, "loss": 0.6571, "step": 9280 }, { "epoch": 0.4769760509816014, "grad_norm": 1.1118887662887573, "learning_rate": 5.6143118929563825e-06, "loss": 0.778, "step": 9281 }, { "epoch": 0.47702744372494604, "grad_norm": 1.0481951236724854, "learning_rate": 5.613485935774342e-06, "loss": 0.7284, "step": 9282 }, { "epoch": 0.4770788364682907, "grad_norm": 1.0448375940322876, "learning_rate": 5.612659961595139e-06, "loss": 0.7286, "step": 9283 }, { "epoch": 0.47713022921163534, "grad_norm": 1.0651406049728394, "learning_rate": 5.611833970441656e-06, "loss": 0.7377, "step": 9284 }, { "epoch": 0.47718162195498, "grad_norm": 1.0012246370315552, "learning_rate": 5.6110079623367795e-06, "loss": 0.7084, "step": 9285 }, { "epoch": 0.4772330146983246, "grad_norm": 1.0567457675933838, "learning_rate": 5.610181937303392e-06, "loss": 0.7865, "step": 9286 }, { "epoch": 0.4772844074416692, "grad_norm": 1.0820527076721191, "learning_rate": 5.609355895364382e-06, "loss": 0.7158, "step": 9287 }, { "epoch": 0.47733580018501387, "grad_norm": 1.0715365409851074, "learning_rate": 5.6085298365426355e-06, "loss": 0.7309, "step": 9288 }, { "epoch": 0.4773871929283585, "grad_norm": 1.015673041343689, "learning_rate": 5.6077037608610376e-06, "loss": 0.6902, "step": 9289 }, { "epoch": 0.47743858567170316, "grad_norm": 0.9144046902656555, "learning_rate": 5.606877668342476e-06, "loss": 0.6646, "step": 9290 }, { "epoch": 0.4774899784150478, "grad_norm": 0.9954757690429688, "learning_rate": 5.6060515590098405e-06, "loss": 0.7603, "step": 9291 }, { "epoch": 0.47754137115839246, "grad_norm": 1.0255017280578613, "learning_rate": 5.605225432886016e-06, "loss": 0.6763, "step": 9292 }, { "epoch": 0.47759276390173705, "grad_norm": 1.0901261568069458, "learning_rate": 5.604399289993893e-06, "loss": 0.7378, "step": 9293 }, { "epoch": 0.4776441566450817, "grad_norm": 1.0971847772598267, "learning_rate": 5.603573130356359e-06, "loss": 0.7691, "step": 9294 }, { "epoch": 0.47769554938842634, "grad_norm": 1.1193262338638306, "learning_rate": 5.602746953996306e-06, "loss": 0.7049, "step": 9295 }, { "epoch": 0.477746942131771, "grad_norm": 1.061022400856018, "learning_rate": 5.601920760936621e-06, "loss": 0.7624, "step": 9296 }, { "epoch": 0.47779833487511564, "grad_norm": 1.0158711671829224, "learning_rate": 5.601094551200194e-06, "loss": 0.7258, "step": 9297 }, { "epoch": 0.4778497276184603, "grad_norm": 0.7592194676399231, "learning_rate": 5.60026832480992e-06, "loss": 0.633, "step": 9298 }, { "epoch": 0.47790112036180493, "grad_norm": 0.6777262091636658, "learning_rate": 5.599442081788687e-06, "loss": 0.7244, "step": 9299 }, { "epoch": 0.4779525131051496, "grad_norm": 1.0996352434158325, "learning_rate": 5.598615822159388e-06, "loss": 0.7241, "step": 9300 }, { "epoch": 0.47800390584849417, "grad_norm": 0.7368532419204712, "learning_rate": 5.597789545944915e-06, "loss": 0.7033, "step": 9301 }, { "epoch": 0.4780552985918388, "grad_norm": 1.0993101596832275, "learning_rate": 5.596963253168159e-06, "loss": 0.7412, "step": 9302 }, { "epoch": 0.47810669133518346, "grad_norm": 1.0420209169387817, "learning_rate": 5.596136943852017e-06, "loss": 0.7484, "step": 9303 }, { "epoch": 0.4781580840785281, "grad_norm": 1.0188112258911133, "learning_rate": 5.595310618019381e-06, "loss": 0.7294, "step": 9304 }, { "epoch": 0.47820947682187276, "grad_norm": 1.0193397998809814, "learning_rate": 5.5944842756931425e-06, "loss": 0.7733, "step": 9305 }, { "epoch": 0.4782608695652174, "grad_norm": 1.035310983657837, "learning_rate": 5.593657916896199e-06, "loss": 0.7174, "step": 9306 }, { "epoch": 0.47831226230856205, "grad_norm": 1.0632489919662476, "learning_rate": 5.592831541651444e-06, "loss": 0.7475, "step": 9307 }, { "epoch": 0.47836365505190664, "grad_norm": 1.091317892074585, "learning_rate": 5.592005149981773e-06, "loss": 0.7007, "step": 9308 }, { "epoch": 0.4784150477952513, "grad_norm": 1.1301343441009521, "learning_rate": 5.5911787419100825e-06, "loss": 0.7182, "step": 9309 }, { "epoch": 0.47846644053859594, "grad_norm": 0.7572804689407349, "learning_rate": 5.5903523174592685e-06, "loss": 0.6974, "step": 9310 }, { "epoch": 0.4785178332819406, "grad_norm": 1.152143955230713, "learning_rate": 5.58952587665223e-06, "loss": 0.7022, "step": 9311 }, { "epoch": 0.47856922602528523, "grad_norm": 1.0656213760375977, "learning_rate": 5.58869941951186e-06, "loss": 0.6877, "step": 9312 }, { "epoch": 0.4786206187686299, "grad_norm": 1.2253516912460327, "learning_rate": 5.5878729460610594e-06, "loss": 0.8016, "step": 9313 }, { "epoch": 0.4786720115119745, "grad_norm": 1.129782795906067, "learning_rate": 5.587046456322726e-06, "loss": 0.776, "step": 9314 }, { "epoch": 0.4787234042553192, "grad_norm": 1.1205326318740845, "learning_rate": 5.586219950319758e-06, "loss": 0.7311, "step": 9315 }, { "epoch": 0.47877479699866377, "grad_norm": 1.1362173557281494, "learning_rate": 5.585393428075053e-06, "loss": 0.7616, "step": 9316 }, { "epoch": 0.4788261897420084, "grad_norm": 1.0390986204147339, "learning_rate": 5.584566889611513e-06, "loss": 0.7227, "step": 9317 }, { "epoch": 0.47887758248535306, "grad_norm": 1.108679175376892, "learning_rate": 5.583740334952037e-06, "loss": 0.7535, "step": 9318 }, { "epoch": 0.4789289752286977, "grad_norm": 1.0474495887756348, "learning_rate": 5.582913764119524e-06, "loss": 0.7272, "step": 9319 }, { "epoch": 0.47898036797204235, "grad_norm": 1.0956218242645264, "learning_rate": 5.582087177136877e-06, "loss": 0.7986, "step": 9320 }, { "epoch": 0.479031760715387, "grad_norm": 0.7247957587242126, "learning_rate": 5.5812605740269955e-06, "loss": 0.6533, "step": 9321 }, { "epoch": 0.47908315345873165, "grad_norm": 0.6783864498138428, "learning_rate": 5.580433954812784e-06, "loss": 0.6294, "step": 9322 }, { "epoch": 0.47913454620207624, "grad_norm": 1.0336880683898926, "learning_rate": 5.5796073195171414e-06, "loss": 0.6682, "step": 9323 }, { "epoch": 0.4791859389454209, "grad_norm": 1.0942810773849487, "learning_rate": 5.578780668162973e-06, "loss": 0.7341, "step": 9324 }, { "epoch": 0.47923733168876553, "grad_norm": 1.0713120698928833, "learning_rate": 5.57795400077318e-06, "loss": 0.8021, "step": 9325 }, { "epoch": 0.4792887244321102, "grad_norm": 1.0736093521118164, "learning_rate": 5.577127317370668e-06, "loss": 0.7408, "step": 9326 }, { "epoch": 0.4793401171754548, "grad_norm": 1.1135426759719849, "learning_rate": 5.576300617978339e-06, "loss": 0.7053, "step": 9327 }, { "epoch": 0.4793915099187995, "grad_norm": 0.7100291848182678, "learning_rate": 5.575473902619096e-06, "loss": 0.6956, "step": 9328 }, { "epoch": 0.4794429026621441, "grad_norm": 1.0849339962005615, "learning_rate": 5.574647171315848e-06, "loss": 0.7425, "step": 9329 }, { "epoch": 0.47949429540548877, "grad_norm": 1.0699530839920044, "learning_rate": 5.5738204240914985e-06, "loss": 0.7548, "step": 9330 }, { "epoch": 0.47954568814883336, "grad_norm": 0.9828718304634094, "learning_rate": 5.572993660968952e-06, "loss": 0.7147, "step": 9331 }, { "epoch": 0.479597080892178, "grad_norm": 1.0109267234802246, "learning_rate": 5.572166881971118e-06, "loss": 0.7218, "step": 9332 }, { "epoch": 0.47964847363552265, "grad_norm": 1.0340708494186401, "learning_rate": 5.571340087120899e-06, "loss": 0.7172, "step": 9333 }, { "epoch": 0.4796998663788673, "grad_norm": 1.0614937543869019, "learning_rate": 5.570513276441203e-06, "loss": 0.7586, "step": 9334 }, { "epoch": 0.47975125912221195, "grad_norm": 1.0970309972763062, "learning_rate": 5.56968644995494e-06, "loss": 0.7797, "step": 9335 }, { "epoch": 0.4798026518655566, "grad_norm": 0.7149417400360107, "learning_rate": 5.5688596076850174e-06, "loss": 0.6384, "step": 9336 }, { "epoch": 0.47985404460890124, "grad_norm": 1.0598479509353638, "learning_rate": 5.5680327496543406e-06, "loss": 0.7106, "step": 9337 }, { "epoch": 0.4799054373522459, "grad_norm": 0.784929633140564, "learning_rate": 5.56720587588582e-06, "loss": 0.6508, "step": 9338 }, { "epoch": 0.4799568300955905, "grad_norm": 1.0527395009994507, "learning_rate": 5.566378986402367e-06, "loss": 0.6873, "step": 9339 }, { "epoch": 0.48000822283893513, "grad_norm": 1.0780612230300903, "learning_rate": 5.5655520812268895e-06, "loss": 0.729, "step": 9340 }, { "epoch": 0.4800596155822798, "grad_norm": 1.0857625007629395, "learning_rate": 5.564725160382298e-06, "loss": 0.7682, "step": 9341 }, { "epoch": 0.4801110083256244, "grad_norm": 1.0418905019760132, "learning_rate": 5.5638982238915016e-06, "loss": 0.7297, "step": 9342 }, { "epoch": 0.48016240106896907, "grad_norm": 1.1343281269073486, "learning_rate": 5.563071271777413e-06, "loss": 0.8151, "step": 9343 }, { "epoch": 0.4802137938123137, "grad_norm": 1.0613363981246948, "learning_rate": 5.5622443040629435e-06, "loss": 0.7074, "step": 9344 }, { "epoch": 0.48026518655565836, "grad_norm": 1.0590349435806274, "learning_rate": 5.561417320771004e-06, "loss": 0.7683, "step": 9345 }, { "epoch": 0.48031657929900295, "grad_norm": 1.0071563720703125, "learning_rate": 5.560590321924508e-06, "loss": 0.6351, "step": 9346 }, { "epoch": 0.4803679720423476, "grad_norm": 1.01652193069458, "learning_rate": 5.559763307546366e-06, "loss": 0.7025, "step": 9347 }, { "epoch": 0.48041936478569225, "grad_norm": 1.1316579580307007, "learning_rate": 5.558936277659496e-06, "loss": 0.7676, "step": 9348 }, { "epoch": 0.4804707575290369, "grad_norm": 1.1674885749816895, "learning_rate": 5.558109232286804e-06, "loss": 0.7172, "step": 9349 }, { "epoch": 0.48052215027238154, "grad_norm": 1.11307954788208, "learning_rate": 5.557282171451212e-06, "loss": 0.7684, "step": 9350 }, { "epoch": 0.4805735430157262, "grad_norm": 1.1166071891784668, "learning_rate": 5.556455095175629e-06, "loss": 0.6948, "step": 9351 }, { "epoch": 0.48062493575907084, "grad_norm": 1.1304198503494263, "learning_rate": 5.555628003482972e-06, "loss": 0.7251, "step": 9352 }, { "epoch": 0.4806763285024155, "grad_norm": 1.0502710342407227, "learning_rate": 5.554800896396156e-06, "loss": 0.7362, "step": 9353 }, { "epoch": 0.4807277212457601, "grad_norm": 1.1019030809402466, "learning_rate": 5.553973773938098e-06, "loss": 0.7354, "step": 9354 }, { "epoch": 0.4807791139891047, "grad_norm": 1.091068148612976, "learning_rate": 5.553146636131711e-06, "loss": 0.7869, "step": 9355 }, { "epoch": 0.48083050673244937, "grad_norm": 0.897142767906189, "learning_rate": 5.552319482999914e-06, "loss": 0.6793, "step": 9356 }, { "epoch": 0.480881899475794, "grad_norm": 1.1070574522018433, "learning_rate": 5.551492314565624e-06, "loss": 0.7477, "step": 9357 }, { "epoch": 0.48093329221913866, "grad_norm": 1.0726337432861328, "learning_rate": 5.550665130851756e-06, "loss": 0.7389, "step": 9358 }, { "epoch": 0.4809846849624833, "grad_norm": 1.0646748542785645, "learning_rate": 5.549837931881231e-06, "loss": 0.6984, "step": 9359 }, { "epoch": 0.48103607770582796, "grad_norm": 0.8094071745872498, "learning_rate": 5.549010717676967e-06, "loss": 0.6983, "step": 9360 }, { "epoch": 0.48108747044917255, "grad_norm": 1.0667892694473267, "learning_rate": 5.5481834882618805e-06, "loss": 0.729, "step": 9361 }, { "epoch": 0.4811388631925172, "grad_norm": 0.7472618222236633, "learning_rate": 5.547356243658891e-06, "loss": 0.6709, "step": 9362 }, { "epoch": 0.48119025593586184, "grad_norm": 0.8461788892745972, "learning_rate": 5.546528983890919e-06, "loss": 0.663, "step": 9363 }, { "epoch": 0.4812416486792065, "grad_norm": 0.9896567463874817, "learning_rate": 5.545701708980886e-06, "loss": 0.7224, "step": 9364 }, { "epoch": 0.48129304142255114, "grad_norm": 1.1093904972076416, "learning_rate": 5.54487441895171e-06, "loss": 0.8096, "step": 9365 }, { "epoch": 0.4813444341658958, "grad_norm": 1.1064847707748413, "learning_rate": 5.544047113826311e-06, "loss": 0.7578, "step": 9366 }, { "epoch": 0.48139582690924043, "grad_norm": 1.9331846237182617, "learning_rate": 5.543219793627613e-06, "loss": 0.66, "step": 9367 }, { "epoch": 0.4814472196525851, "grad_norm": 1.0688841342926025, "learning_rate": 5.542392458378535e-06, "loss": 0.7226, "step": 9368 }, { "epoch": 0.48149861239592967, "grad_norm": 1.0167242288589478, "learning_rate": 5.541565108102002e-06, "loss": 0.748, "step": 9369 }, { "epoch": 0.4815500051392743, "grad_norm": 1.074195146560669, "learning_rate": 5.540737742820933e-06, "loss": 0.7771, "step": 9370 }, { "epoch": 0.48160139788261896, "grad_norm": 0.8099533915519714, "learning_rate": 5.5399103625582525e-06, "loss": 0.7045, "step": 9371 }, { "epoch": 0.4816527906259636, "grad_norm": 0.8186115026473999, "learning_rate": 5.539082967336886e-06, "loss": 0.7044, "step": 9372 }, { "epoch": 0.48170418336930826, "grad_norm": 1.0901892185211182, "learning_rate": 5.538255557179754e-06, "loss": 0.7448, "step": 9373 }, { "epoch": 0.4817555761126529, "grad_norm": 1.1268736124038696, "learning_rate": 5.537428132109782e-06, "loss": 0.7421, "step": 9374 }, { "epoch": 0.48180696885599755, "grad_norm": 0.9127083420753479, "learning_rate": 5.536600692149895e-06, "loss": 0.6473, "step": 9375 }, { "epoch": 0.4818583615993422, "grad_norm": 1.014736294746399, "learning_rate": 5.535773237323017e-06, "loss": 0.705, "step": 9376 }, { "epoch": 0.4819097543426868, "grad_norm": 1.0789954662322998, "learning_rate": 5.534945767652071e-06, "loss": 0.6701, "step": 9377 }, { "epoch": 0.48196114708603144, "grad_norm": 1.0487639904022217, "learning_rate": 5.534118283159988e-06, "loss": 0.7782, "step": 9378 }, { "epoch": 0.4820125398293761, "grad_norm": 1.021418571472168, "learning_rate": 5.533290783869691e-06, "loss": 0.7375, "step": 9379 }, { "epoch": 0.48206393257272073, "grad_norm": 1.0228590965270996, "learning_rate": 5.532463269804106e-06, "loss": 0.7183, "step": 9380 }, { "epoch": 0.4821153253160654, "grad_norm": 1.07512629032135, "learning_rate": 5.531635740986163e-06, "loss": 0.7936, "step": 9381 }, { "epoch": 0.48216671805941, "grad_norm": 1.0449707508087158, "learning_rate": 5.530808197438786e-06, "loss": 0.798, "step": 9382 }, { "epoch": 0.4822181108027547, "grad_norm": 1.104135513305664, "learning_rate": 5.529980639184906e-06, "loss": 0.7702, "step": 9383 }, { "epoch": 0.48226950354609927, "grad_norm": 1.115519404411316, "learning_rate": 5.529153066247449e-06, "loss": 0.746, "step": 9384 }, { "epoch": 0.4823208962894439, "grad_norm": 1.1583386659622192, "learning_rate": 5.5283254786493435e-06, "loss": 0.7519, "step": 9385 }, { "epoch": 0.48237228903278856, "grad_norm": 1.0242222547531128, "learning_rate": 5.52749787641352e-06, "loss": 0.7469, "step": 9386 }, { "epoch": 0.4824236817761332, "grad_norm": 1.0207234621047974, "learning_rate": 5.5266702595629065e-06, "loss": 0.7174, "step": 9387 }, { "epoch": 0.48247507451947785, "grad_norm": 1.2652196884155273, "learning_rate": 5.525842628120433e-06, "loss": 0.7777, "step": 9388 }, { "epoch": 0.4825264672628225, "grad_norm": 1.0312669277191162, "learning_rate": 5.525014982109032e-06, "loss": 0.7458, "step": 9389 }, { "epoch": 0.48257786000616715, "grad_norm": 1.0260474681854248, "learning_rate": 5.5241873215516315e-06, "loss": 0.7336, "step": 9390 }, { "epoch": 0.4826292527495118, "grad_norm": 1.0687013864517212, "learning_rate": 5.523359646471164e-06, "loss": 0.7487, "step": 9391 }, { "epoch": 0.4826806454928564, "grad_norm": 1.1108461618423462, "learning_rate": 5.52253195689056e-06, "loss": 0.735, "step": 9392 }, { "epoch": 0.48273203823620103, "grad_norm": 1.0323104858398438, "learning_rate": 5.521704252832752e-06, "loss": 0.6977, "step": 9393 }, { "epoch": 0.4827834309795457, "grad_norm": 1.0239531993865967, "learning_rate": 5.520876534320672e-06, "loss": 0.6796, "step": 9394 }, { "epoch": 0.4828348237228903, "grad_norm": 1.1626373529434204, "learning_rate": 5.520048801377252e-06, "loss": 0.7381, "step": 9395 }, { "epoch": 0.482886216466235, "grad_norm": 1.0431400537490845, "learning_rate": 5.519221054025428e-06, "loss": 0.7599, "step": 9396 }, { "epoch": 0.4829376092095796, "grad_norm": 0.7426326274871826, "learning_rate": 5.51839329228813e-06, "loss": 0.6716, "step": 9397 }, { "epoch": 0.48298900195292427, "grad_norm": 1.0871039628982544, "learning_rate": 5.517565516188292e-06, "loss": 0.7422, "step": 9398 }, { "epoch": 0.48304039469626886, "grad_norm": 1.1728631258010864, "learning_rate": 5.51673772574885e-06, "loss": 0.7809, "step": 9399 }, { "epoch": 0.4830917874396135, "grad_norm": 1.1152043342590332, "learning_rate": 5.515909920992739e-06, "loss": 0.733, "step": 9400 }, { "epoch": 0.48314318018295815, "grad_norm": 1.0957964658737183, "learning_rate": 5.515082101942893e-06, "loss": 0.7188, "step": 9401 }, { "epoch": 0.4831945729263028, "grad_norm": 1.0207455158233643, "learning_rate": 5.514254268622247e-06, "loss": 0.7203, "step": 9402 }, { "epoch": 0.48324596566964745, "grad_norm": 1.0612454414367676, "learning_rate": 5.513426421053737e-06, "loss": 0.7339, "step": 9403 }, { "epoch": 0.4832973584129921, "grad_norm": 1.0991202592849731, "learning_rate": 5.512598559260302e-06, "loss": 0.7648, "step": 9404 }, { "epoch": 0.48334875115633674, "grad_norm": 1.0323444604873657, "learning_rate": 5.511770683264874e-06, "loss": 0.7526, "step": 9405 }, { "epoch": 0.4834001438996814, "grad_norm": 1.035022258758545, "learning_rate": 5.5109427930903926e-06, "loss": 0.7472, "step": 9406 }, { "epoch": 0.483451536643026, "grad_norm": 1.113804578781128, "learning_rate": 5.510114888759795e-06, "loss": 0.7098, "step": 9407 }, { "epoch": 0.48350292938637063, "grad_norm": 1.0342614650726318, "learning_rate": 5.509286970296021e-06, "loss": 0.718, "step": 9408 }, { "epoch": 0.4835543221297153, "grad_norm": 1.1069092750549316, "learning_rate": 5.5084590377220035e-06, "loss": 0.7418, "step": 9409 }, { "epoch": 0.4836057148730599, "grad_norm": 1.0686357021331787, "learning_rate": 5.5076310910606855e-06, "loss": 0.749, "step": 9410 }, { "epoch": 0.48365710761640457, "grad_norm": 1.043076515197754, "learning_rate": 5.5068031303350046e-06, "loss": 0.7341, "step": 9411 }, { "epoch": 0.4837085003597492, "grad_norm": 0.8362899422645569, "learning_rate": 5.505975155567902e-06, "loss": 0.6765, "step": 9412 }, { "epoch": 0.48375989310309386, "grad_norm": 0.7150397300720215, "learning_rate": 5.505147166782316e-06, "loss": 0.647, "step": 9413 }, { "epoch": 0.4838112858464385, "grad_norm": 1.1081665754318237, "learning_rate": 5.504319164001184e-06, "loss": 0.7098, "step": 9414 }, { "epoch": 0.4838626785897831, "grad_norm": 1.0340710878372192, "learning_rate": 5.5034911472474514e-06, "loss": 0.7336, "step": 9415 }, { "epoch": 0.48391407133312775, "grad_norm": 0.7537180781364441, "learning_rate": 5.502663116544057e-06, "loss": 0.6787, "step": 9416 }, { "epoch": 0.4839654640764724, "grad_norm": 1.1423346996307373, "learning_rate": 5.50183507191394e-06, "loss": 0.8111, "step": 9417 }, { "epoch": 0.48401685681981704, "grad_norm": 1.019592046737671, "learning_rate": 5.501007013380046e-06, "loss": 0.6829, "step": 9418 }, { "epoch": 0.4840682495631617, "grad_norm": 1.0390514135360718, "learning_rate": 5.500178940965314e-06, "loss": 0.6712, "step": 9419 }, { "epoch": 0.48411964230650634, "grad_norm": 1.1257941722869873, "learning_rate": 5.499350854692689e-06, "loss": 0.7841, "step": 9420 }, { "epoch": 0.484171035049851, "grad_norm": 1.1053500175476074, "learning_rate": 5.49852275458511e-06, "loss": 0.7316, "step": 9421 }, { "epoch": 0.4842224277931956, "grad_norm": 1.0224664211273193, "learning_rate": 5.497694640665526e-06, "loss": 0.745, "step": 9422 }, { "epoch": 0.4842738205365402, "grad_norm": 1.0418940782546997, "learning_rate": 5.496866512956876e-06, "loss": 0.6939, "step": 9423 }, { "epoch": 0.48432521327988487, "grad_norm": 1.0702447891235352, "learning_rate": 5.496038371482104e-06, "loss": 0.7418, "step": 9424 }, { "epoch": 0.4843766060232295, "grad_norm": 1.049414873123169, "learning_rate": 5.4952102162641575e-06, "loss": 0.7635, "step": 9425 }, { "epoch": 0.48442799876657416, "grad_norm": 0.9871751070022583, "learning_rate": 5.494382047325979e-06, "loss": 0.7107, "step": 9426 }, { "epoch": 0.4844793915099188, "grad_norm": 1.0417555570602417, "learning_rate": 5.493553864690513e-06, "loss": 0.7369, "step": 9427 }, { "epoch": 0.48453078425326346, "grad_norm": 1.1234605312347412, "learning_rate": 5.492725668380709e-06, "loss": 0.7634, "step": 9428 }, { "epoch": 0.4845821769966081, "grad_norm": 1.023833155632019, "learning_rate": 5.491897458419508e-06, "loss": 0.753, "step": 9429 }, { "epoch": 0.4846335697399527, "grad_norm": 1.1806488037109375, "learning_rate": 5.4910692348298575e-06, "loss": 0.7822, "step": 9430 }, { "epoch": 0.48468496248329734, "grad_norm": 0.9322224259376526, "learning_rate": 5.490240997634706e-06, "loss": 0.686, "step": 9431 }, { "epoch": 0.484736355226642, "grad_norm": 1.041218638420105, "learning_rate": 5.4894127468570014e-06, "loss": 0.7126, "step": 9432 }, { "epoch": 0.48478774796998664, "grad_norm": 1.0199620723724365, "learning_rate": 5.4885844825196866e-06, "loss": 0.7359, "step": 9433 }, { "epoch": 0.4848391407133313, "grad_norm": 1.0956038236618042, "learning_rate": 5.487756204645714e-06, "loss": 0.7252, "step": 9434 }, { "epoch": 0.48489053345667593, "grad_norm": 1.035573124885559, "learning_rate": 5.486927913258028e-06, "loss": 0.6881, "step": 9435 }, { "epoch": 0.4849419262000206, "grad_norm": 1.030729055404663, "learning_rate": 5.48609960837958e-06, "loss": 0.729, "step": 9436 }, { "epoch": 0.48499331894336517, "grad_norm": 1.0253101587295532, "learning_rate": 5.4852712900333185e-06, "loss": 0.6982, "step": 9437 }, { "epoch": 0.4850447116867098, "grad_norm": 1.075419545173645, "learning_rate": 5.484442958242191e-06, "loss": 0.7428, "step": 9438 }, { "epoch": 0.48509610443005446, "grad_norm": 1.1414252519607544, "learning_rate": 5.483614613029149e-06, "loss": 0.7657, "step": 9439 }, { "epoch": 0.4851474971733991, "grad_norm": 1.1154879331588745, "learning_rate": 5.4827862544171414e-06, "loss": 0.716, "step": 9440 }, { "epoch": 0.48519888991674376, "grad_norm": 1.0697299242019653, "learning_rate": 5.48195788242912e-06, "loss": 0.7405, "step": 9441 }, { "epoch": 0.4852502826600884, "grad_norm": 0.9866037368774414, "learning_rate": 5.4811294970880335e-06, "loss": 0.6897, "step": 9442 }, { "epoch": 0.48530167540343305, "grad_norm": 1.1108030080795288, "learning_rate": 5.480301098416834e-06, "loss": 0.7395, "step": 9443 }, { "epoch": 0.4853530681467777, "grad_norm": 1.1003204584121704, "learning_rate": 5.479472686438474e-06, "loss": 0.7614, "step": 9444 }, { "epoch": 0.4854044608901223, "grad_norm": 0.8542898893356323, "learning_rate": 5.478644261175904e-06, "loss": 0.6569, "step": 9445 }, { "epoch": 0.48545585363346694, "grad_norm": 1.0212955474853516, "learning_rate": 5.477815822652076e-06, "loss": 0.7506, "step": 9446 }, { "epoch": 0.4855072463768116, "grad_norm": 1.0662847757339478, "learning_rate": 5.476987370889945e-06, "loss": 0.7031, "step": 9447 }, { "epoch": 0.48555863912015623, "grad_norm": 1.0966449975967407, "learning_rate": 5.476158905912461e-06, "loss": 0.7127, "step": 9448 }, { "epoch": 0.4856100318635009, "grad_norm": 1.1148920059204102, "learning_rate": 5.47533042774258e-06, "loss": 0.7564, "step": 9449 }, { "epoch": 0.4856614246068455, "grad_norm": 1.0633505582809448, "learning_rate": 5.474501936403255e-06, "loss": 0.7693, "step": 9450 }, { "epoch": 0.4857128173501902, "grad_norm": 1.0824952125549316, "learning_rate": 5.473673431917437e-06, "loss": 0.7183, "step": 9451 }, { "epoch": 0.4857642100935348, "grad_norm": 1.0508842468261719, "learning_rate": 5.472844914308084e-06, "loss": 0.7375, "step": 9452 }, { "epoch": 0.4858156028368794, "grad_norm": 0.7544159293174744, "learning_rate": 5.47201638359815e-06, "loss": 0.6689, "step": 9453 }, { "epoch": 0.48586699558022406, "grad_norm": 0.9461003541946411, "learning_rate": 5.47118783981059e-06, "loss": 0.681, "step": 9454 }, { "epoch": 0.4859183883235687, "grad_norm": 1.0841604471206665, "learning_rate": 5.470359282968359e-06, "loss": 0.7852, "step": 9455 }, { "epoch": 0.48596978106691335, "grad_norm": 1.2208683490753174, "learning_rate": 5.4695307130944135e-06, "loss": 0.6862, "step": 9456 }, { "epoch": 0.486021173810258, "grad_norm": 1.054756760597229, "learning_rate": 5.4687021302117105e-06, "loss": 0.6971, "step": 9457 }, { "epoch": 0.48607256655360265, "grad_norm": 1.0326743125915527, "learning_rate": 5.467873534343206e-06, "loss": 0.716, "step": 9458 }, { "epoch": 0.4861239592969473, "grad_norm": 0.7408831119537354, "learning_rate": 5.467044925511854e-06, "loss": 0.7048, "step": 9459 }, { "epoch": 0.4861753520402919, "grad_norm": 1.0980217456817627, "learning_rate": 5.466216303740617e-06, "loss": 0.7241, "step": 9460 }, { "epoch": 0.48622674478363653, "grad_norm": 1.0268486738204956, "learning_rate": 5.4653876690524486e-06, "loss": 0.7361, "step": 9461 }, { "epoch": 0.4862781375269812, "grad_norm": 1.0121040344238281, "learning_rate": 5.464559021470308e-06, "loss": 0.6675, "step": 9462 }, { "epoch": 0.4863295302703258, "grad_norm": 1.1095714569091797, "learning_rate": 5.463730361017155e-06, "loss": 0.7158, "step": 9463 }, { "epoch": 0.4863809230136705, "grad_norm": 1.0649281740188599, "learning_rate": 5.462901687715948e-06, "loss": 0.7667, "step": 9464 }, { "epoch": 0.4864323157570151, "grad_norm": 1.0386686325073242, "learning_rate": 5.4620730015896454e-06, "loss": 0.7056, "step": 9465 }, { "epoch": 0.48648370850035977, "grad_norm": 1.0437140464782715, "learning_rate": 5.461244302661207e-06, "loss": 0.7163, "step": 9466 }, { "epoch": 0.4865351012437044, "grad_norm": 0.8819904327392578, "learning_rate": 5.460415590953592e-06, "loss": 0.6776, "step": 9467 }, { "epoch": 0.486586493987049, "grad_norm": 1.064073920249939, "learning_rate": 5.45958686648976e-06, "loss": 0.6793, "step": 9468 }, { "epoch": 0.48663788673039365, "grad_norm": 1.0936199426651, "learning_rate": 5.458758129292674e-06, "loss": 0.707, "step": 9469 }, { "epoch": 0.4866892794737383, "grad_norm": 0.9179823398590088, "learning_rate": 5.457929379385293e-06, "loss": 0.7087, "step": 9470 }, { "epoch": 0.48674067221708295, "grad_norm": 1.0610612630844116, "learning_rate": 5.4571006167905786e-06, "loss": 0.7201, "step": 9471 }, { "epoch": 0.4867920649604276, "grad_norm": 1.04362952709198, "learning_rate": 5.4562718415314916e-06, "loss": 0.7035, "step": 9472 }, { "epoch": 0.48684345770377224, "grad_norm": 1.0488141775131226, "learning_rate": 5.455443053630995e-06, "loss": 0.7697, "step": 9473 }, { "epoch": 0.4868948504471169, "grad_norm": 1.0690515041351318, "learning_rate": 5.454614253112052e-06, "loss": 0.7591, "step": 9474 }, { "epoch": 0.4869462431904615, "grad_norm": 1.081305742263794, "learning_rate": 5.453785439997623e-06, "loss": 0.6973, "step": 9475 }, { "epoch": 0.48699763593380613, "grad_norm": 0.7594033479690552, "learning_rate": 5.452956614310673e-06, "loss": 0.67, "step": 9476 }, { "epoch": 0.4870490286771508, "grad_norm": 0.7347837090492249, "learning_rate": 5.452127776074164e-06, "loss": 0.6944, "step": 9477 }, { "epoch": 0.4871004214204954, "grad_norm": 1.087239146232605, "learning_rate": 5.45129892531106e-06, "loss": 0.7405, "step": 9478 }, { "epoch": 0.48715181416384007, "grad_norm": 1.3487238883972168, "learning_rate": 5.450470062044326e-06, "loss": 0.7149, "step": 9479 }, { "epoch": 0.4872032069071847, "grad_norm": 1.0602219104766846, "learning_rate": 5.4496411862969244e-06, "loss": 0.7193, "step": 9480 }, { "epoch": 0.48725459965052936, "grad_norm": 1.0953388214111328, "learning_rate": 5.448812298091822e-06, "loss": 0.742, "step": 9481 }, { "epoch": 0.487305992393874, "grad_norm": 1.0527359247207642, "learning_rate": 5.447983397451983e-06, "loss": 0.7173, "step": 9482 }, { "epoch": 0.4873573851372186, "grad_norm": 1.0570027828216553, "learning_rate": 5.447154484400371e-06, "loss": 0.6761, "step": 9483 }, { "epoch": 0.48740877788056325, "grad_norm": 1.0538517236709595, "learning_rate": 5.446325558959955e-06, "loss": 0.7841, "step": 9484 }, { "epoch": 0.4874601706239079, "grad_norm": 0.8465815186500549, "learning_rate": 5.445496621153701e-06, "loss": 0.6656, "step": 9485 }, { "epoch": 0.48751156336725254, "grad_norm": 0.721488893032074, "learning_rate": 5.444667671004572e-06, "loss": 0.6731, "step": 9486 }, { "epoch": 0.4875629561105972, "grad_norm": 1.089809775352478, "learning_rate": 5.443838708535538e-06, "loss": 0.7267, "step": 9487 }, { "epoch": 0.48761434885394184, "grad_norm": 0.7786794900894165, "learning_rate": 5.443009733769563e-06, "loss": 0.7051, "step": 9488 }, { "epoch": 0.4876657415972865, "grad_norm": 0.764552116394043, "learning_rate": 5.442180746729619e-06, "loss": 0.6638, "step": 9489 }, { "epoch": 0.4877171343406311, "grad_norm": 1.057086706161499, "learning_rate": 5.441351747438671e-06, "loss": 0.7144, "step": 9490 }, { "epoch": 0.4877685270839757, "grad_norm": 1.1409790515899658, "learning_rate": 5.440522735919685e-06, "loss": 0.7225, "step": 9491 }, { "epoch": 0.48781991982732037, "grad_norm": 0.8731694221496582, "learning_rate": 5.439693712195635e-06, "loss": 0.673, "step": 9492 }, { "epoch": 0.487871312570665, "grad_norm": 0.6754937767982483, "learning_rate": 5.438864676289484e-06, "loss": 0.6318, "step": 9493 }, { "epoch": 0.48792270531400966, "grad_norm": 1.1085034608840942, "learning_rate": 5.438035628224205e-06, "loss": 0.7108, "step": 9494 }, { "epoch": 0.4879740980573543, "grad_norm": 1.0729222297668457, "learning_rate": 5.437206568022768e-06, "loss": 0.7791, "step": 9495 }, { "epoch": 0.48802549080069896, "grad_norm": 1.0918159484863281, "learning_rate": 5.436377495708139e-06, "loss": 0.7112, "step": 9496 }, { "epoch": 0.4880768835440436, "grad_norm": 1.0716404914855957, "learning_rate": 5.435548411303292e-06, "loss": 0.7601, "step": 9497 }, { "epoch": 0.4881282762873882, "grad_norm": 1.1228057146072388, "learning_rate": 5.434719314831196e-06, "loss": 0.7364, "step": 9498 }, { "epoch": 0.48817966903073284, "grad_norm": 1.076778531074524, "learning_rate": 5.43389020631482e-06, "loss": 0.6947, "step": 9499 }, { "epoch": 0.4882310617740775, "grad_norm": 1.0357011556625366, "learning_rate": 5.433061085777138e-06, "loss": 0.7226, "step": 9500 }, { "epoch": 0.48828245451742214, "grad_norm": 1.1128188371658325, "learning_rate": 5.432231953241122e-06, "loss": 0.7778, "step": 9501 }, { "epoch": 0.4883338472607668, "grad_norm": 1.0051047801971436, "learning_rate": 5.43140280872974e-06, "loss": 0.6871, "step": 9502 }, { "epoch": 0.48838524000411143, "grad_norm": 0.8065167665481567, "learning_rate": 5.4305736522659665e-06, "loss": 0.6781, "step": 9503 }, { "epoch": 0.4884366327474561, "grad_norm": 0.7258554100990295, "learning_rate": 5.429744483872774e-06, "loss": 0.6467, "step": 9504 }, { "epoch": 0.4884880254908007, "grad_norm": 0.989908754825592, "learning_rate": 5.428915303573137e-06, "loss": 0.7021, "step": 9505 }, { "epoch": 0.4885394182341453, "grad_norm": 1.0529425144195557, "learning_rate": 5.428086111390026e-06, "loss": 0.743, "step": 9506 }, { "epoch": 0.48859081097748996, "grad_norm": 0.7950737476348877, "learning_rate": 5.427256907346415e-06, "loss": 0.6514, "step": 9507 }, { "epoch": 0.4886422037208346, "grad_norm": 1.0267528295516968, "learning_rate": 5.4264276914652794e-06, "loss": 0.7475, "step": 9508 }, { "epoch": 0.48869359646417926, "grad_norm": 1.0431227684020996, "learning_rate": 5.4255984637695915e-06, "loss": 0.6702, "step": 9509 }, { "epoch": 0.4887449892075239, "grad_norm": 1.0637890100479126, "learning_rate": 5.424769224282326e-06, "loss": 0.7082, "step": 9510 }, { "epoch": 0.48879638195086855, "grad_norm": 1.1046890020370483, "learning_rate": 5.423939973026459e-06, "loss": 0.7177, "step": 9511 }, { "epoch": 0.4888477746942132, "grad_norm": 0.7119315266609192, "learning_rate": 5.423110710024964e-06, "loss": 0.6912, "step": 9512 }, { "epoch": 0.4888991674375578, "grad_norm": 1.0632457733154297, "learning_rate": 5.422281435300817e-06, "loss": 0.8007, "step": 9513 }, { "epoch": 0.48895056018090244, "grad_norm": 1.1114763021469116, "learning_rate": 5.421452148876995e-06, "loss": 0.7446, "step": 9514 }, { "epoch": 0.4890019529242471, "grad_norm": 1.0081735849380493, "learning_rate": 5.420622850776473e-06, "loss": 0.7081, "step": 9515 }, { "epoch": 0.48905334566759173, "grad_norm": 1.0235774517059326, "learning_rate": 5.419793541022229e-06, "loss": 0.7135, "step": 9516 }, { "epoch": 0.4891047384109364, "grad_norm": 1.0041431188583374, "learning_rate": 5.418964219637237e-06, "loss": 0.7083, "step": 9517 }, { "epoch": 0.489156131154281, "grad_norm": 0.9953317046165466, "learning_rate": 5.418134886644475e-06, "loss": 0.7067, "step": 9518 }, { "epoch": 0.4892075238976257, "grad_norm": 1.015871286392212, "learning_rate": 5.417305542066922e-06, "loss": 0.6647, "step": 9519 }, { "epoch": 0.4892589166409703, "grad_norm": 0.9987191557884216, "learning_rate": 5.416476185927553e-06, "loss": 0.7052, "step": 9520 }, { "epoch": 0.4893103093843149, "grad_norm": 1.0949862003326416, "learning_rate": 5.415646818249349e-06, "loss": 0.7357, "step": 9521 }, { "epoch": 0.48936170212765956, "grad_norm": 0.9590392112731934, "learning_rate": 5.414817439055288e-06, "loss": 0.7013, "step": 9522 }, { "epoch": 0.4894130948710042, "grad_norm": 1.1485745906829834, "learning_rate": 5.413988048368345e-06, "loss": 0.696, "step": 9523 }, { "epoch": 0.48946448761434885, "grad_norm": 1.066043734550476, "learning_rate": 5.413158646211502e-06, "loss": 0.7712, "step": 9524 }, { "epoch": 0.4895158803576935, "grad_norm": 1.0126806497573853, "learning_rate": 5.412329232607739e-06, "loss": 0.7445, "step": 9525 }, { "epoch": 0.48956727310103815, "grad_norm": 0.762461245059967, "learning_rate": 5.411499807580034e-06, "loss": 0.6701, "step": 9526 }, { "epoch": 0.4896186658443828, "grad_norm": 1.063962697982788, "learning_rate": 5.410670371151367e-06, "loss": 0.7519, "step": 9527 }, { "epoch": 0.4896700585877274, "grad_norm": 1.2112607955932617, "learning_rate": 5.409840923344719e-06, "loss": 0.7086, "step": 9528 }, { "epoch": 0.48972145133107203, "grad_norm": 1.100990653038025, "learning_rate": 5.409011464183071e-06, "loss": 0.7695, "step": 9529 }, { "epoch": 0.4897728440744167, "grad_norm": 0.9387048482894897, "learning_rate": 5.408181993689404e-06, "loss": 0.7116, "step": 9530 }, { "epoch": 0.4898242368177613, "grad_norm": 1.0274606943130493, "learning_rate": 5.407352511886696e-06, "loss": 0.632, "step": 9531 }, { "epoch": 0.489875629561106, "grad_norm": 1.0875567197799683, "learning_rate": 5.406523018797933e-06, "loss": 0.7345, "step": 9532 }, { "epoch": 0.4899270223044506, "grad_norm": 1.0277342796325684, "learning_rate": 5.405693514446094e-06, "loss": 0.7155, "step": 9533 }, { "epoch": 0.48997841504779527, "grad_norm": 1.0742981433868408, "learning_rate": 5.404863998854162e-06, "loss": 0.7277, "step": 9534 }, { "epoch": 0.4900298077911399, "grad_norm": 1.1260440349578857, "learning_rate": 5.404034472045119e-06, "loss": 0.7476, "step": 9535 }, { "epoch": 0.4900812005344845, "grad_norm": 1.0490459203720093, "learning_rate": 5.403204934041948e-06, "loss": 0.7474, "step": 9536 }, { "epoch": 0.49013259327782915, "grad_norm": 1.0062735080718994, "learning_rate": 5.4023753848676334e-06, "loss": 0.7582, "step": 9537 }, { "epoch": 0.4901839860211738, "grad_norm": 1.026776909828186, "learning_rate": 5.401545824545157e-06, "loss": 0.6594, "step": 9538 }, { "epoch": 0.49023537876451845, "grad_norm": 1.0998259782791138, "learning_rate": 5.400716253097502e-06, "loss": 0.7063, "step": 9539 }, { "epoch": 0.4902867715078631, "grad_norm": 1.0422205924987793, "learning_rate": 5.399886670547655e-06, "loss": 0.7256, "step": 9540 }, { "epoch": 0.49033816425120774, "grad_norm": 1.0290815830230713, "learning_rate": 5.399057076918598e-06, "loss": 0.7556, "step": 9541 }, { "epoch": 0.4903895569945524, "grad_norm": 1.0331223011016846, "learning_rate": 5.398227472233315e-06, "loss": 0.7588, "step": 9542 }, { "epoch": 0.49044094973789704, "grad_norm": 1.0452048778533936, "learning_rate": 5.397397856514792e-06, "loss": 0.6972, "step": 9543 }, { "epoch": 0.4904923424812416, "grad_norm": 1.0479238033294678, "learning_rate": 5.396568229786015e-06, "loss": 0.7378, "step": 9544 }, { "epoch": 0.4905437352245863, "grad_norm": 1.0410690307617188, "learning_rate": 5.395738592069969e-06, "loss": 0.7262, "step": 9545 }, { "epoch": 0.4905951279679309, "grad_norm": 1.0231990814208984, "learning_rate": 5.39490894338964e-06, "loss": 0.7253, "step": 9546 }, { "epoch": 0.49064652071127557, "grad_norm": 1.0775612592697144, "learning_rate": 5.394079283768013e-06, "loss": 0.7267, "step": 9547 }, { "epoch": 0.4906979134546202, "grad_norm": 1.1124353408813477, "learning_rate": 5.393249613228076e-06, "loss": 0.7743, "step": 9548 }, { "epoch": 0.49074930619796486, "grad_norm": 1.1986351013183594, "learning_rate": 5.392419931792816e-06, "loss": 0.6769, "step": 9549 }, { "epoch": 0.4908006989413095, "grad_norm": 1.0849008560180664, "learning_rate": 5.391590239485217e-06, "loss": 0.6916, "step": 9550 }, { "epoch": 0.4908520916846541, "grad_norm": 0.7937957048416138, "learning_rate": 5.390760536328271e-06, "loss": 0.6646, "step": 9551 }, { "epoch": 0.49090348442799875, "grad_norm": 1.3388746976852417, "learning_rate": 5.389930822344961e-06, "loss": 0.7954, "step": 9552 }, { "epoch": 0.4909548771713434, "grad_norm": 1.1110291481018066, "learning_rate": 5.389101097558278e-06, "loss": 0.7382, "step": 9553 }, { "epoch": 0.49100626991468804, "grad_norm": 1.0181349515914917, "learning_rate": 5.3882713619912085e-06, "loss": 0.6832, "step": 9554 }, { "epoch": 0.4910576626580327, "grad_norm": 0.9963048696517944, "learning_rate": 5.387441615666742e-06, "loss": 0.6808, "step": 9555 }, { "epoch": 0.49110905540137734, "grad_norm": 1.064102292060852, "learning_rate": 5.386611858607867e-06, "loss": 0.7363, "step": 9556 }, { "epoch": 0.491160448144722, "grad_norm": 1.025748610496521, "learning_rate": 5.3857820908375735e-06, "loss": 0.7675, "step": 9557 }, { "epoch": 0.49121184088806663, "grad_norm": 1.1190896034240723, "learning_rate": 5.384952312378851e-06, "loss": 0.7558, "step": 9558 }, { "epoch": 0.4912632336314112, "grad_norm": 1.0449038743972778, "learning_rate": 5.384122523254689e-06, "loss": 0.7436, "step": 9559 }, { "epoch": 0.49131462637475587, "grad_norm": 1.007791519165039, "learning_rate": 5.383292723488075e-06, "loss": 0.7424, "step": 9560 }, { "epoch": 0.4913660191181005, "grad_norm": 1.0655417442321777, "learning_rate": 5.382462913102002e-06, "loss": 0.7508, "step": 9561 }, { "epoch": 0.49141741186144516, "grad_norm": 1.0937548875808716, "learning_rate": 5.381633092119462e-06, "loss": 0.6862, "step": 9562 }, { "epoch": 0.4914688046047898, "grad_norm": 1.0049195289611816, "learning_rate": 5.380803260563441e-06, "loss": 0.6774, "step": 9563 }, { "epoch": 0.49152019734813446, "grad_norm": 1.0798002481460571, "learning_rate": 5.379973418456934e-06, "loss": 0.7574, "step": 9564 }, { "epoch": 0.4915715900914791, "grad_norm": 1.1226277351379395, "learning_rate": 5.379143565822932e-06, "loss": 0.7289, "step": 9565 }, { "epoch": 0.4916229828348237, "grad_norm": 1.043548822402954, "learning_rate": 5.378313702684425e-06, "loss": 0.6853, "step": 9566 }, { "epoch": 0.49167437557816834, "grad_norm": 1.202102780342102, "learning_rate": 5.3774838290644095e-06, "loss": 0.7415, "step": 9567 }, { "epoch": 0.491725768321513, "grad_norm": 1.0347708463668823, "learning_rate": 5.376653944985872e-06, "loss": 0.7296, "step": 9568 }, { "epoch": 0.49177716106485764, "grad_norm": 1.1020474433898926, "learning_rate": 5.375824050471811e-06, "loss": 0.7663, "step": 9569 }, { "epoch": 0.4918285538082023, "grad_norm": 1.070357322692871, "learning_rate": 5.374994145545214e-06, "loss": 0.6992, "step": 9570 }, { "epoch": 0.49187994655154693, "grad_norm": 1.0060158967971802, "learning_rate": 5.374164230229078e-06, "loss": 0.7173, "step": 9571 }, { "epoch": 0.4919313392948916, "grad_norm": 1.0634430646896362, "learning_rate": 5.3733343045463945e-06, "loss": 0.7174, "step": 9572 }, { "epoch": 0.4919827320382362, "grad_norm": 1.14021897315979, "learning_rate": 5.372504368520158e-06, "loss": 0.7603, "step": 9573 }, { "epoch": 0.4920341247815808, "grad_norm": 1.1049827337265015, "learning_rate": 5.371674422173364e-06, "loss": 0.722, "step": 9574 }, { "epoch": 0.49208551752492546, "grad_norm": 1.042099118232727, "learning_rate": 5.3708444655290036e-06, "loss": 0.6942, "step": 9575 }, { "epoch": 0.4921369102682701, "grad_norm": 1.02151358127594, "learning_rate": 5.370014498610075e-06, "loss": 0.7515, "step": 9576 }, { "epoch": 0.49218830301161476, "grad_norm": 0.978813111782074, "learning_rate": 5.369184521439571e-06, "loss": 0.7145, "step": 9577 }, { "epoch": 0.4922396957549594, "grad_norm": 0.862740159034729, "learning_rate": 5.368354534040487e-06, "loss": 0.7193, "step": 9578 }, { "epoch": 0.49229108849830405, "grad_norm": 1.1445499658584595, "learning_rate": 5.3675245364358195e-06, "loss": 0.7195, "step": 9579 }, { "epoch": 0.4923424812416487, "grad_norm": 1.0370557308197021, "learning_rate": 5.366694528648564e-06, "loss": 0.7544, "step": 9580 }, { "epoch": 0.49239387398499335, "grad_norm": 1.1446062326431274, "learning_rate": 5.365864510701716e-06, "loss": 0.7302, "step": 9581 }, { "epoch": 0.49244526672833794, "grad_norm": 0.9933484196662903, "learning_rate": 5.365034482618273e-06, "loss": 0.7739, "step": 9582 }, { "epoch": 0.4924966594716826, "grad_norm": 1.0212934017181396, "learning_rate": 5.36420444442123e-06, "loss": 0.6992, "step": 9583 }, { "epoch": 0.49254805221502723, "grad_norm": 1.083933711051941, "learning_rate": 5.363374396133585e-06, "loss": 0.7044, "step": 9584 }, { "epoch": 0.4925994449583719, "grad_norm": 1.0710134506225586, "learning_rate": 5.3625443377783345e-06, "loss": 0.7062, "step": 9585 }, { "epoch": 0.4926508377017165, "grad_norm": 1.1001828908920288, "learning_rate": 5.361714269378477e-06, "loss": 0.6699, "step": 9586 }, { "epoch": 0.4927022304450612, "grad_norm": 1.1703779697418213, "learning_rate": 5.360884190957009e-06, "loss": 0.6955, "step": 9587 }, { "epoch": 0.4927536231884058, "grad_norm": 1.0764039754867554, "learning_rate": 5.360054102536932e-06, "loss": 0.7366, "step": 9588 }, { "epoch": 0.4928050159317504, "grad_norm": 0.8195311427116394, "learning_rate": 5.359224004141239e-06, "loss": 0.6617, "step": 9589 }, { "epoch": 0.49285640867509506, "grad_norm": 1.093002200126648, "learning_rate": 5.358393895792934e-06, "loss": 0.7736, "step": 9590 }, { "epoch": 0.4929078014184397, "grad_norm": 1.0097301006317139, "learning_rate": 5.357563777515012e-06, "loss": 0.7501, "step": 9591 }, { "epoch": 0.49295919416178435, "grad_norm": 0.8267471194267273, "learning_rate": 5.356733649330471e-06, "loss": 0.6674, "step": 9592 }, { "epoch": 0.493010586905129, "grad_norm": 1.0620254278182983, "learning_rate": 5.3559035112623155e-06, "loss": 0.6703, "step": 9593 }, { "epoch": 0.49306197964847365, "grad_norm": 1.049376368522644, "learning_rate": 5.35507336333354e-06, "loss": 0.7327, "step": 9594 }, { "epoch": 0.4931133723918183, "grad_norm": 1.0751487016677856, "learning_rate": 5.35424320556715e-06, "loss": 0.7658, "step": 9595 }, { "epoch": 0.49316476513516294, "grad_norm": 1.0827155113220215, "learning_rate": 5.35341303798614e-06, "loss": 0.7221, "step": 9596 }, { "epoch": 0.49321615787850753, "grad_norm": 0.735813558101654, "learning_rate": 5.352582860613513e-06, "loss": 0.6938, "step": 9597 }, { "epoch": 0.4932675506218522, "grad_norm": 1.0243816375732422, "learning_rate": 5.3517526734722724e-06, "loss": 0.6826, "step": 9598 }, { "epoch": 0.4933189433651968, "grad_norm": 1.0461496114730835, "learning_rate": 5.350922476585416e-06, "loss": 0.7456, "step": 9599 }, { "epoch": 0.4933703361085415, "grad_norm": 1.1016942262649536, "learning_rate": 5.350092269975945e-06, "loss": 0.7964, "step": 9600 }, { "epoch": 0.4934217288518861, "grad_norm": 1.0775535106658936, "learning_rate": 5.349262053666862e-06, "loss": 0.7673, "step": 9601 }, { "epoch": 0.49347312159523077, "grad_norm": 1.0956441164016724, "learning_rate": 5.348431827681169e-06, "loss": 0.7941, "step": 9602 }, { "epoch": 0.4935245143385754, "grad_norm": 1.108159065246582, "learning_rate": 5.347601592041869e-06, "loss": 0.7566, "step": 9603 }, { "epoch": 0.49357590708192, "grad_norm": 1.030697226524353, "learning_rate": 5.346771346771963e-06, "loss": 0.749, "step": 9604 }, { "epoch": 0.49362729982526465, "grad_norm": 0.7208949327468872, "learning_rate": 5.345941091894453e-06, "loss": 0.6412, "step": 9605 }, { "epoch": 0.4936786925686093, "grad_norm": 1.0395797491073608, "learning_rate": 5.345110827432343e-06, "loss": 0.7254, "step": 9606 }, { "epoch": 0.49373008531195395, "grad_norm": 1.0050891637802124, "learning_rate": 5.3442805534086365e-06, "loss": 0.6659, "step": 9607 }, { "epoch": 0.4937814780552986, "grad_norm": 1.1142754554748535, "learning_rate": 5.343450269846336e-06, "loss": 0.7544, "step": 9608 }, { "epoch": 0.49383287079864324, "grad_norm": 1.0315179824829102, "learning_rate": 5.342619976768447e-06, "loss": 0.7355, "step": 9609 }, { "epoch": 0.4938842635419879, "grad_norm": 1.0394960641860962, "learning_rate": 5.341789674197973e-06, "loss": 0.7096, "step": 9610 }, { "epoch": 0.49393565628533254, "grad_norm": 1.135819673538208, "learning_rate": 5.340959362157916e-06, "loss": 0.801, "step": 9611 }, { "epoch": 0.4939870490286771, "grad_norm": 1.0144143104553223, "learning_rate": 5.340129040671283e-06, "loss": 0.716, "step": 9612 }, { "epoch": 0.4940384417720218, "grad_norm": 1.031213641166687, "learning_rate": 5.339298709761077e-06, "loss": 0.7399, "step": 9613 }, { "epoch": 0.4940898345153664, "grad_norm": 1.077452301979065, "learning_rate": 5.3384683694503045e-06, "loss": 0.7526, "step": 9614 }, { "epoch": 0.49414122725871107, "grad_norm": 5.250999927520752, "learning_rate": 5.33763801976197e-06, "loss": 0.8154, "step": 9615 }, { "epoch": 0.4941926200020557, "grad_norm": 0.738052248954773, "learning_rate": 5.336807660719081e-06, "loss": 0.6513, "step": 9616 }, { "epoch": 0.49424401274540036, "grad_norm": 0.684563159942627, "learning_rate": 5.3359772923446395e-06, "loss": 0.6772, "step": 9617 }, { "epoch": 0.494295405488745, "grad_norm": 1.1076298952102661, "learning_rate": 5.335146914661655e-06, "loss": 0.6777, "step": 9618 }, { "epoch": 0.4943467982320896, "grad_norm": 1.0343161821365356, "learning_rate": 5.3343165276931315e-06, "loss": 0.7311, "step": 9619 }, { "epoch": 0.49439819097543425, "grad_norm": 1.0697482824325562, "learning_rate": 5.333486131462078e-06, "loss": 0.7595, "step": 9620 }, { "epoch": 0.4944495837187789, "grad_norm": 0.8122111558914185, "learning_rate": 5.332655725991498e-06, "loss": 0.673, "step": 9621 }, { "epoch": 0.49450097646212354, "grad_norm": 0.7715480923652649, "learning_rate": 5.331825311304403e-06, "loss": 0.6603, "step": 9622 }, { "epoch": 0.4945523692054682, "grad_norm": 1.0459703207015991, "learning_rate": 5.330994887423797e-06, "loss": 0.7273, "step": 9623 }, { "epoch": 0.49460376194881284, "grad_norm": 1.1299817562103271, "learning_rate": 5.330164454372688e-06, "loss": 0.7502, "step": 9624 }, { "epoch": 0.4946551546921575, "grad_norm": 1.1158483028411865, "learning_rate": 5.329334012174086e-06, "loss": 0.7756, "step": 9625 }, { "epoch": 0.49470654743550213, "grad_norm": 1.011510968208313, "learning_rate": 5.328503560850996e-06, "loss": 0.7395, "step": 9626 }, { "epoch": 0.4947579401788467, "grad_norm": 1.1215327978134155, "learning_rate": 5.327673100426428e-06, "loss": 0.724, "step": 9627 }, { "epoch": 0.49480933292219137, "grad_norm": 1.0712478160858154, "learning_rate": 5.3268426309233915e-06, "loss": 0.7393, "step": 9628 }, { "epoch": 0.494860725665536, "grad_norm": 0.9891983270645142, "learning_rate": 5.326012152364894e-06, "loss": 0.7271, "step": 9629 }, { "epoch": 0.49491211840888066, "grad_norm": 0.728307843208313, "learning_rate": 5.325181664773945e-06, "loss": 0.669, "step": 9630 }, { "epoch": 0.4949635111522253, "grad_norm": 0.8278331160545349, "learning_rate": 5.324351168173555e-06, "loss": 0.6617, "step": 9631 }, { "epoch": 0.49501490389556996, "grad_norm": 1.0681143999099731, "learning_rate": 5.323520662586732e-06, "loss": 0.773, "step": 9632 }, { "epoch": 0.4950662966389146, "grad_norm": 1.1373975276947021, "learning_rate": 5.322690148036487e-06, "loss": 0.6991, "step": 9633 }, { "epoch": 0.49511768938225925, "grad_norm": 1.1680774688720703, "learning_rate": 5.321859624545829e-06, "loss": 0.6914, "step": 9634 }, { "epoch": 0.49516908212560384, "grad_norm": 1.0779521465301514, "learning_rate": 5.321029092137769e-06, "loss": 0.7437, "step": 9635 }, { "epoch": 0.4952204748689485, "grad_norm": 1.073246717453003, "learning_rate": 5.320198550835317e-06, "loss": 0.7275, "step": 9636 }, { "epoch": 0.49527186761229314, "grad_norm": 1.1071537733078003, "learning_rate": 5.319368000661485e-06, "loss": 0.7194, "step": 9637 }, { "epoch": 0.4953232603556378, "grad_norm": 0.8047956824302673, "learning_rate": 5.318537441639284e-06, "loss": 0.6999, "step": 9638 }, { "epoch": 0.49537465309898243, "grad_norm": 1.1221832036972046, "learning_rate": 5.317706873791726e-06, "loss": 0.7097, "step": 9639 }, { "epoch": 0.4954260458423271, "grad_norm": 1.1357895135879517, "learning_rate": 5.316876297141819e-06, "loss": 0.7437, "step": 9640 }, { "epoch": 0.4954774385856717, "grad_norm": 1.0529829263687134, "learning_rate": 5.316045711712581e-06, "loss": 0.7253, "step": 9641 }, { "epoch": 0.4955288313290163, "grad_norm": 1.062212586402893, "learning_rate": 5.31521511752702e-06, "loss": 0.7968, "step": 9642 }, { "epoch": 0.49558022407236096, "grad_norm": 1.1542739868164062, "learning_rate": 5.314384514608147e-06, "loss": 0.7364, "step": 9643 }, { "epoch": 0.4956316168157056, "grad_norm": 1.1220766305923462, "learning_rate": 5.313553902978976e-06, "loss": 0.7826, "step": 9644 }, { "epoch": 0.49568300955905026, "grad_norm": 1.0064659118652344, "learning_rate": 5.312723282662522e-06, "loss": 0.717, "step": 9645 }, { "epoch": 0.4957344023023949, "grad_norm": 1.0356231927871704, "learning_rate": 5.311892653681796e-06, "loss": 0.7153, "step": 9646 }, { "epoch": 0.49578579504573955, "grad_norm": 1.1202448606491089, "learning_rate": 5.31106201605981e-06, "loss": 0.7895, "step": 9647 }, { "epoch": 0.4958371877890842, "grad_norm": 0.703321099281311, "learning_rate": 5.310231369819581e-06, "loss": 0.6845, "step": 9648 }, { "epoch": 0.49588858053242885, "grad_norm": 1.1695502996444702, "learning_rate": 5.309400714984121e-06, "loss": 0.7455, "step": 9649 }, { "epoch": 0.49593997327577344, "grad_norm": 1.094068169593811, "learning_rate": 5.308570051576443e-06, "loss": 0.7101, "step": 9650 }, { "epoch": 0.4959913660191181, "grad_norm": 1.1081759929656982, "learning_rate": 5.307739379619563e-06, "loss": 0.7709, "step": 9651 }, { "epoch": 0.49604275876246273, "grad_norm": 0.7252534627914429, "learning_rate": 5.306908699136496e-06, "loss": 0.6998, "step": 9652 }, { "epoch": 0.4960941515058074, "grad_norm": 1.0350507497787476, "learning_rate": 5.306078010150254e-06, "loss": 0.7511, "step": 9653 }, { "epoch": 0.496145544249152, "grad_norm": 0.706834077835083, "learning_rate": 5.305247312683853e-06, "loss": 0.6506, "step": 9654 }, { "epoch": 0.4961969369924967, "grad_norm": 1.027999758720398, "learning_rate": 5.304416606760311e-06, "loss": 0.7025, "step": 9655 }, { "epoch": 0.4962483297358413, "grad_norm": 1.042326807975769, "learning_rate": 5.303585892402638e-06, "loss": 0.6973, "step": 9656 }, { "epoch": 0.4962997224791859, "grad_norm": 1.2624150514602661, "learning_rate": 5.302755169633854e-06, "loss": 0.7833, "step": 9657 }, { "epoch": 0.49635111522253056, "grad_norm": 1.0304924249649048, "learning_rate": 5.301924438476974e-06, "loss": 0.6808, "step": 9658 }, { "epoch": 0.4964025079658752, "grad_norm": 0.9943413734436035, "learning_rate": 5.301093698955013e-06, "loss": 0.6542, "step": 9659 }, { "epoch": 0.49645390070921985, "grad_norm": 1.1055750846862793, "learning_rate": 5.30026295109099e-06, "loss": 0.7367, "step": 9660 }, { "epoch": 0.4965052934525645, "grad_norm": 1.1270208358764648, "learning_rate": 5.299432194907918e-06, "loss": 0.7885, "step": 9661 }, { "epoch": 0.49655668619590915, "grad_norm": 1.0890278816223145, "learning_rate": 5.298601430428816e-06, "loss": 0.7337, "step": 9662 }, { "epoch": 0.4966080789392538, "grad_norm": 1.017724871635437, "learning_rate": 5.297770657676701e-06, "loss": 0.7582, "step": 9663 }, { "epoch": 0.49665947168259844, "grad_norm": 1.0810452699661255, "learning_rate": 5.296939876674588e-06, "loss": 0.6933, "step": 9664 }, { "epoch": 0.49671086442594303, "grad_norm": 1.093916893005371, "learning_rate": 5.296109087445499e-06, "loss": 0.7374, "step": 9665 }, { "epoch": 0.4967622571692877, "grad_norm": 1.1080012321472168, "learning_rate": 5.295278290012448e-06, "loss": 0.7033, "step": 9666 }, { "epoch": 0.4968136499126323, "grad_norm": 1.0958491563796997, "learning_rate": 5.294447484398454e-06, "loss": 0.6928, "step": 9667 }, { "epoch": 0.496865042655977, "grad_norm": 1.1174042224884033, "learning_rate": 5.293616670626536e-06, "loss": 0.6757, "step": 9668 }, { "epoch": 0.4969164353993216, "grad_norm": 1.0633642673492432, "learning_rate": 5.29278584871971e-06, "loss": 0.7605, "step": 9669 }, { "epoch": 0.49696782814266627, "grad_norm": 1.0971599817276, "learning_rate": 5.291955018700998e-06, "loss": 0.764, "step": 9670 }, { "epoch": 0.4970192208860109, "grad_norm": 0.748594343662262, "learning_rate": 5.291124180593418e-06, "loss": 0.6579, "step": 9671 }, { "epoch": 0.49707061362935556, "grad_norm": 1.0452440977096558, "learning_rate": 5.290293334419986e-06, "loss": 0.7701, "step": 9672 }, { "epoch": 0.49712200637270015, "grad_norm": 0.8301875591278076, "learning_rate": 5.289462480203725e-06, "loss": 0.7051, "step": 9673 }, { "epoch": 0.4971733991160448, "grad_norm": 1.078459620475769, "learning_rate": 5.288631617967653e-06, "loss": 0.7268, "step": 9674 }, { "epoch": 0.49722479185938945, "grad_norm": 0.9991085529327393, "learning_rate": 5.287800747734792e-06, "loss": 0.7196, "step": 9675 }, { "epoch": 0.4972761846027341, "grad_norm": 1.060250997543335, "learning_rate": 5.286969869528158e-06, "loss": 0.7105, "step": 9676 }, { "epoch": 0.49732757734607874, "grad_norm": 1.0783272981643677, "learning_rate": 5.286138983370772e-06, "loss": 0.7821, "step": 9677 }, { "epoch": 0.4973789700894234, "grad_norm": 1.1080095767974854, "learning_rate": 5.285308089285657e-06, "loss": 0.7422, "step": 9678 }, { "epoch": 0.49743036283276804, "grad_norm": 1.0426934957504272, "learning_rate": 5.2844771872958325e-06, "loss": 0.7761, "step": 9679 }, { "epoch": 0.4974817555761126, "grad_norm": 0.7346612215042114, "learning_rate": 5.283646277424318e-06, "loss": 0.6799, "step": 9680 }, { "epoch": 0.4975331483194573, "grad_norm": 1.2342984676361084, "learning_rate": 5.282815359694137e-06, "loss": 0.7685, "step": 9681 }, { "epoch": 0.4975845410628019, "grad_norm": 0.726105809211731, "learning_rate": 5.281984434128307e-06, "loss": 0.6509, "step": 9682 }, { "epoch": 0.49763593380614657, "grad_norm": 0.7369107007980347, "learning_rate": 5.281153500749856e-06, "loss": 0.703, "step": 9683 }, { "epoch": 0.4976873265494912, "grad_norm": 1.0366274118423462, "learning_rate": 5.2803225595818e-06, "loss": 0.7238, "step": 9684 }, { "epoch": 0.49773871929283586, "grad_norm": 1.0933395624160767, "learning_rate": 5.279491610647162e-06, "loss": 0.7605, "step": 9685 }, { "epoch": 0.4977901120361805, "grad_norm": 0.7025826573371887, "learning_rate": 5.278660653968965e-06, "loss": 0.6564, "step": 9686 }, { "epoch": 0.49784150477952516, "grad_norm": 0.988102376461029, "learning_rate": 5.277829689570231e-06, "loss": 0.7503, "step": 9687 }, { "epoch": 0.49789289752286975, "grad_norm": 1.0864988565444946, "learning_rate": 5.2769987174739835e-06, "loss": 0.7011, "step": 9688 }, { "epoch": 0.4979442902662144, "grad_norm": 1.0948187112808228, "learning_rate": 5.276167737703244e-06, "loss": 0.745, "step": 9689 }, { "epoch": 0.49799568300955904, "grad_norm": 1.082934856414795, "learning_rate": 5.275336750281036e-06, "loss": 0.6679, "step": 9690 }, { "epoch": 0.4980470757529037, "grad_norm": 0.7197828888893127, "learning_rate": 5.274505755230384e-06, "loss": 0.6347, "step": 9691 }, { "epoch": 0.49809846849624834, "grad_norm": 1.0458934307098389, "learning_rate": 5.27367475257431e-06, "loss": 0.7365, "step": 9692 }, { "epoch": 0.498149861239593, "grad_norm": 1.0925427675247192, "learning_rate": 5.272843742335838e-06, "loss": 0.7196, "step": 9693 }, { "epoch": 0.49820125398293763, "grad_norm": 1.093076229095459, "learning_rate": 5.272012724537993e-06, "loss": 0.7344, "step": 9694 }, { "epoch": 0.4982526467262822, "grad_norm": 0.8250869512557983, "learning_rate": 5.271181699203796e-06, "loss": 0.6848, "step": 9695 }, { "epoch": 0.49830403946962687, "grad_norm": 1.0631132125854492, "learning_rate": 5.270350666356274e-06, "loss": 0.8014, "step": 9696 }, { "epoch": 0.4983554322129715, "grad_norm": 0.7209571003913879, "learning_rate": 5.269519626018451e-06, "loss": 0.657, "step": 9697 }, { "epoch": 0.49840682495631616, "grad_norm": 1.065102458000183, "learning_rate": 5.26868857821335e-06, "loss": 0.6533, "step": 9698 }, { "epoch": 0.4984582176996608, "grad_norm": 1.0978947877883911, "learning_rate": 5.267857522963999e-06, "loss": 0.765, "step": 9699 }, { "epoch": 0.49850961044300546, "grad_norm": 1.1280215978622437, "learning_rate": 5.267026460293421e-06, "loss": 0.744, "step": 9700 }, { "epoch": 0.4985610031863501, "grad_norm": 1.062023401260376, "learning_rate": 5.26619539022464e-06, "loss": 0.695, "step": 9701 }, { "epoch": 0.49861239592969475, "grad_norm": 1.0390610694885254, "learning_rate": 5.2653643127806854e-06, "loss": 0.754, "step": 9702 }, { "epoch": 0.49866378867303934, "grad_norm": 1.0924310684204102, "learning_rate": 5.264533227984581e-06, "loss": 0.6825, "step": 9703 }, { "epoch": 0.498715181416384, "grad_norm": 0.9988656640052795, "learning_rate": 5.2637021358593496e-06, "loss": 0.6697, "step": 9704 }, { "epoch": 0.49876657415972864, "grad_norm": 1.0447070598602295, "learning_rate": 5.262871036428023e-06, "loss": 0.7346, "step": 9705 }, { "epoch": 0.4988179669030733, "grad_norm": 0.7650237083435059, "learning_rate": 5.262039929713624e-06, "loss": 0.6881, "step": 9706 }, { "epoch": 0.49886935964641793, "grad_norm": 1.054854154586792, "learning_rate": 5.26120881573918e-06, "loss": 0.7329, "step": 9707 }, { "epoch": 0.4989207523897626, "grad_norm": 0.996692955493927, "learning_rate": 5.260377694527717e-06, "loss": 0.7191, "step": 9708 }, { "epoch": 0.4989721451331072, "grad_norm": 1.1335670948028564, "learning_rate": 5.259546566102263e-06, "loss": 0.7073, "step": 9709 }, { "epoch": 0.4990235378764519, "grad_norm": 1.023481845855713, "learning_rate": 5.258715430485846e-06, "loss": 0.7638, "step": 9710 }, { "epoch": 0.49907493061979646, "grad_norm": 1.0852361917495728, "learning_rate": 5.257884287701491e-06, "loss": 0.7277, "step": 9711 }, { "epoch": 0.4991263233631411, "grad_norm": 0.7520588040351868, "learning_rate": 5.257053137772227e-06, "loss": 0.6633, "step": 9712 }, { "epoch": 0.49917771610648576, "grad_norm": 1.060171127319336, "learning_rate": 5.256221980721081e-06, "loss": 0.6772, "step": 9713 }, { "epoch": 0.4992291088498304, "grad_norm": 1.0898305177688599, "learning_rate": 5.255390816571081e-06, "loss": 0.7518, "step": 9714 }, { "epoch": 0.49928050159317505, "grad_norm": 0.8825089931488037, "learning_rate": 5.254559645345255e-06, "loss": 0.6416, "step": 9715 }, { "epoch": 0.4993318943365197, "grad_norm": 1.0771045684814453, "learning_rate": 5.253728467066633e-06, "loss": 0.7767, "step": 9716 }, { "epoch": 0.49938328707986435, "grad_norm": 1.0837466716766357, "learning_rate": 5.252897281758241e-06, "loss": 0.6988, "step": 9717 }, { "epoch": 0.49943467982320894, "grad_norm": 1.1109775304794312, "learning_rate": 5.252066089443109e-06, "loss": 0.7643, "step": 9718 }, { "epoch": 0.4994860725665536, "grad_norm": 0.7742611169815063, "learning_rate": 5.2512348901442674e-06, "loss": 0.6486, "step": 9719 }, { "epoch": 0.49953746530989823, "grad_norm": 1.0676828622817993, "learning_rate": 5.2504036838847425e-06, "loss": 0.748, "step": 9720 }, { "epoch": 0.4995888580532429, "grad_norm": 1.0265713930130005, "learning_rate": 5.249572470687566e-06, "loss": 0.6595, "step": 9721 }, { "epoch": 0.4996402507965875, "grad_norm": 1.0895823240280151, "learning_rate": 5.248741250575765e-06, "loss": 0.6794, "step": 9722 }, { "epoch": 0.4996916435399322, "grad_norm": 1.1792957782745361, "learning_rate": 5.247910023572371e-06, "loss": 0.6982, "step": 9723 }, { "epoch": 0.4997430362832768, "grad_norm": 1.0468356609344482, "learning_rate": 5.247078789700414e-06, "loss": 0.7734, "step": 9724 }, { "epoch": 0.49979442902662147, "grad_norm": 0.9966729283332825, "learning_rate": 5.246247548982922e-06, "loss": 0.761, "step": 9725 }, { "epoch": 0.49984582176996606, "grad_norm": 1.0957149267196655, "learning_rate": 5.245416301442928e-06, "loss": 0.7252, "step": 9726 }, { "epoch": 0.4998972145133107, "grad_norm": 1.0969775915145874, "learning_rate": 5.244585047103461e-06, "loss": 0.7572, "step": 9727 }, { "epoch": 0.49994860725665535, "grad_norm": 1.0184416770935059, "learning_rate": 5.243753785987551e-06, "loss": 0.7151, "step": 9728 }, { "epoch": 0.5, "grad_norm": 0.8881608247756958, "learning_rate": 5.242922518118229e-06, "loss": 0.6984, "step": 9729 }, { "epoch": 0.5000513927433446, "grad_norm": 0.9978525638580322, "learning_rate": 5.2420912435185255e-06, "loss": 0.7395, "step": 9730 }, { "epoch": 0.5001027854866893, "grad_norm": 1.0411361455917358, "learning_rate": 5.241259962211475e-06, "loss": 0.7679, "step": 9731 }, { "epoch": 0.5001541782300339, "grad_norm": 1.0489391088485718, "learning_rate": 5.240428674220106e-06, "loss": 0.6484, "step": 9732 }, { "epoch": 0.5002055709733786, "grad_norm": 0.8208889961242676, "learning_rate": 5.239597379567451e-06, "loss": 0.7005, "step": 9733 }, { "epoch": 0.5002569637167232, "grad_norm": 1.0971370935440063, "learning_rate": 5.238766078276541e-06, "loss": 0.7348, "step": 9734 }, { "epoch": 0.5003083564600679, "grad_norm": 1.0362783670425415, "learning_rate": 5.237934770370408e-06, "loss": 0.7171, "step": 9735 }, { "epoch": 0.5003597492034125, "grad_norm": 1.1381134986877441, "learning_rate": 5.2371034558720845e-06, "loss": 0.7567, "step": 9736 }, { "epoch": 0.5004111419467571, "grad_norm": 1.0195916891098022, "learning_rate": 5.2362721348046035e-06, "loss": 0.7427, "step": 9737 }, { "epoch": 0.5004625346901017, "grad_norm": 1.15377676486969, "learning_rate": 5.235440807190994e-06, "loss": 0.7576, "step": 9738 }, { "epoch": 0.5005139274334464, "grad_norm": 1.032058596611023, "learning_rate": 5.234609473054293e-06, "loss": 0.7367, "step": 9739 }, { "epoch": 0.500565320176791, "grad_norm": 1.0951296091079712, "learning_rate": 5.233778132417533e-06, "loss": 0.7481, "step": 9740 }, { "epoch": 0.5006167129201357, "grad_norm": 1.0305310487747192, "learning_rate": 5.2329467853037444e-06, "loss": 0.6947, "step": 9741 }, { "epoch": 0.5006681056634803, "grad_norm": 1.077505350112915, "learning_rate": 5.2321154317359615e-06, "loss": 0.7395, "step": 9742 }, { "epoch": 0.500719498406825, "grad_norm": 1.0372668504714966, "learning_rate": 5.231284071737219e-06, "loss": 0.7567, "step": 9743 }, { "epoch": 0.5007708911501696, "grad_norm": 1.0231887102127075, "learning_rate": 5.230452705330548e-06, "loss": 0.6947, "step": 9744 }, { "epoch": 0.5008222838935142, "grad_norm": 0.737357497215271, "learning_rate": 5.229621332538985e-06, "loss": 0.7297, "step": 9745 }, { "epoch": 0.5008736766368589, "grad_norm": 1.098196268081665, "learning_rate": 5.228789953385561e-06, "loss": 0.7409, "step": 9746 }, { "epoch": 0.5009250693802035, "grad_norm": 1.1107432842254639, "learning_rate": 5.227958567893312e-06, "loss": 0.7301, "step": 9747 }, { "epoch": 0.5009764621235482, "grad_norm": 1.1124063730239868, "learning_rate": 5.227127176085272e-06, "loss": 0.7323, "step": 9748 }, { "epoch": 0.5010278548668928, "grad_norm": 1.1481200456619263, "learning_rate": 5.2262957779844756e-06, "loss": 0.7028, "step": 9749 }, { "epoch": 0.5010792476102375, "grad_norm": 1.040295124053955, "learning_rate": 5.225464373613956e-06, "loss": 0.6624, "step": 9750 }, { "epoch": 0.5011306403535821, "grad_norm": 1.0675523281097412, "learning_rate": 5.224632962996751e-06, "loss": 0.7519, "step": 9751 }, { "epoch": 0.5011820330969267, "grad_norm": 1.0806888341903687, "learning_rate": 5.223801546155891e-06, "loss": 0.7278, "step": 9752 }, { "epoch": 0.5012334258402713, "grad_norm": 1.166495442390442, "learning_rate": 5.2229701231144155e-06, "loss": 0.7187, "step": 9753 }, { "epoch": 0.501284818583616, "grad_norm": 1.123867154121399, "learning_rate": 5.222138693895358e-06, "loss": 0.7166, "step": 9754 }, { "epoch": 0.5013362113269606, "grad_norm": 1.07319974899292, "learning_rate": 5.221307258521754e-06, "loss": 0.7327, "step": 9755 }, { "epoch": 0.5013876040703052, "grad_norm": 1.0976871252059937, "learning_rate": 5.220475817016639e-06, "loss": 0.7442, "step": 9756 }, { "epoch": 0.5014389968136499, "grad_norm": 0.9777010679244995, "learning_rate": 5.219644369403047e-06, "loss": 0.689, "step": 9757 }, { "epoch": 0.5014903895569945, "grad_norm": 1.0784786939620972, "learning_rate": 5.218812915704019e-06, "loss": 0.6815, "step": 9758 }, { "epoch": 0.5015417823003392, "grad_norm": 1.1116551160812378, "learning_rate": 5.217981455942586e-06, "loss": 0.7102, "step": 9759 }, { "epoch": 0.5015931750436838, "grad_norm": 1.0390983819961548, "learning_rate": 5.217149990141786e-06, "loss": 0.7218, "step": 9760 }, { "epoch": 0.5016445677870285, "grad_norm": 0.9617928862571716, "learning_rate": 5.2163185183246575e-06, "loss": 0.6825, "step": 9761 }, { "epoch": 0.5016959605303731, "grad_norm": 1.035317063331604, "learning_rate": 5.215487040514235e-06, "loss": 0.75, "step": 9762 }, { "epoch": 0.5017473532737178, "grad_norm": 1.0783419609069824, "learning_rate": 5.214655556733556e-06, "loss": 0.7051, "step": 9763 }, { "epoch": 0.5017987460170624, "grad_norm": 1.084179162979126, "learning_rate": 5.213824067005658e-06, "loss": 0.7347, "step": 9764 }, { "epoch": 0.5018501387604071, "grad_norm": 1.1248584985733032, "learning_rate": 5.212992571353577e-06, "loss": 0.7589, "step": 9765 }, { "epoch": 0.5019015315037517, "grad_norm": 1.1401808261871338, "learning_rate": 5.212161069800351e-06, "loss": 0.7782, "step": 9766 }, { "epoch": 0.5019529242470963, "grad_norm": 1.0196930170059204, "learning_rate": 5.211329562369017e-06, "loss": 0.6927, "step": 9767 }, { "epoch": 0.5020043169904409, "grad_norm": 1.1252310276031494, "learning_rate": 5.210498049082613e-06, "loss": 0.7573, "step": 9768 }, { "epoch": 0.5020557097337855, "grad_norm": 1.024275541305542, "learning_rate": 5.209666529964175e-06, "loss": 0.6932, "step": 9769 }, { "epoch": 0.5021071024771302, "grad_norm": 1.0425715446472168, "learning_rate": 5.208835005036745e-06, "loss": 0.7136, "step": 9770 }, { "epoch": 0.5021584952204748, "grad_norm": 1.0093886852264404, "learning_rate": 5.208003474323359e-06, "loss": 0.7201, "step": 9771 }, { "epoch": 0.5022098879638195, "grad_norm": 1.058971643447876, "learning_rate": 5.207171937847054e-06, "loss": 0.7678, "step": 9772 }, { "epoch": 0.5022612807071641, "grad_norm": 1.0527020692825317, "learning_rate": 5.206340395630869e-06, "loss": 0.7302, "step": 9773 }, { "epoch": 0.5023126734505088, "grad_norm": 1.0632209777832031, "learning_rate": 5.205508847697844e-06, "loss": 0.7109, "step": 9774 }, { "epoch": 0.5023640661938534, "grad_norm": 1.0606930255889893, "learning_rate": 5.204677294071018e-06, "loss": 0.7473, "step": 9775 }, { "epoch": 0.5024154589371981, "grad_norm": 1.1294353008270264, "learning_rate": 5.203845734773428e-06, "loss": 0.7257, "step": 9776 }, { "epoch": 0.5024668516805427, "grad_norm": 1.1610182523727417, "learning_rate": 5.203014169828114e-06, "loss": 0.7297, "step": 9777 }, { "epoch": 0.5025182444238874, "grad_norm": 1.1174776554107666, "learning_rate": 5.202182599258113e-06, "loss": 0.7721, "step": 9778 }, { "epoch": 0.502569637167232, "grad_norm": 1.077193021774292, "learning_rate": 5.201351023086469e-06, "loss": 0.7883, "step": 9779 }, { "epoch": 0.5026210299105767, "grad_norm": 1.0956318378448486, "learning_rate": 5.200519441336218e-06, "loss": 0.7533, "step": 9780 }, { "epoch": 0.5026724226539213, "grad_norm": 1.056382179260254, "learning_rate": 5.199687854030401e-06, "loss": 0.7378, "step": 9781 }, { "epoch": 0.502723815397266, "grad_norm": 1.1149080991744995, "learning_rate": 5.198856261192058e-06, "loss": 0.7838, "step": 9782 }, { "epoch": 0.5027752081406105, "grad_norm": 1.1333467960357666, "learning_rate": 5.198024662844229e-06, "loss": 0.8131, "step": 9783 }, { "epoch": 0.5028266008839551, "grad_norm": 1.0871139764785767, "learning_rate": 5.197193059009953e-06, "loss": 0.673, "step": 9784 }, { "epoch": 0.5028779936272998, "grad_norm": 1.2073267698287964, "learning_rate": 5.196361449712272e-06, "loss": 0.7961, "step": 9785 }, { "epoch": 0.5029293863706444, "grad_norm": 0.8785910606384277, "learning_rate": 5.1955298349742235e-06, "loss": 0.7065, "step": 9786 }, { "epoch": 0.5029807791139891, "grad_norm": 1.110816240310669, "learning_rate": 5.194698214818852e-06, "loss": 0.7355, "step": 9787 }, { "epoch": 0.5030321718573337, "grad_norm": 1.1505292654037476, "learning_rate": 5.1938665892691966e-06, "loss": 0.6293, "step": 9788 }, { "epoch": 0.5030835646006784, "grad_norm": 1.11864173412323, "learning_rate": 5.193034958348297e-06, "loss": 0.739, "step": 9789 }, { "epoch": 0.503134957344023, "grad_norm": 1.0772300958633423, "learning_rate": 5.192203322079194e-06, "loss": 0.7926, "step": 9790 }, { "epoch": 0.5031863500873677, "grad_norm": 1.0961942672729492, "learning_rate": 5.191371680484934e-06, "loss": 0.7535, "step": 9791 }, { "epoch": 0.5032377428307123, "grad_norm": 0.8938323259353638, "learning_rate": 5.190540033588551e-06, "loss": 0.691, "step": 9792 }, { "epoch": 0.503289135574057, "grad_norm": 0.7154179215431213, "learning_rate": 5.189708381413092e-06, "loss": 0.6826, "step": 9793 }, { "epoch": 0.5033405283174016, "grad_norm": 0.9982395768165588, "learning_rate": 5.188876723981595e-06, "loss": 0.7256, "step": 9794 }, { "epoch": 0.5033919210607463, "grad_norm": 1.0275741815567017, "learning_rate": 5.1880450613171054e-06, "loss": 0.6308, "step": 9795 }, { "epoch": 0.5034433138040909, "grad_norm": 1.0949006080627441, "learning_rate": 5.187213393442663e-06, "loss": 0.7752, "step": 9796 }, { "epoch": 0.5034947065474356, "grad_norm": 1.186237096786499, "learning_rate": 5.186381720381309e-06, "loss": 0.7493, "step": 9797 }, { "epoch": 0.5035460992907801, "grad_norm": 1.2436245679855347, "learning_rate": 5.185550042156087e-06, "loss": 0.6943, "step": 9798 }, { "epoch": 0.5035974920341247, "grad_norm": 1.0442477464675903, "learning_rate": 5.184718358790037e-06, "loss": 0.7736, "step": 9799 }, { "epoch": 0.5036488847774694, "grad_norm": 1.0788089036941528, "learning_rate": 5.183886670306207e-06, "loss": 0.7142, "step": 9800 }, { "epoch": 0.503700277520814, "grad_norm": 0.7839220762252808, "learning_rate": 5.183054976727634e-06, "loss": 0.6696, "step": 9801 }, { "epoch": 0.5037516702641587, "grad_norm": 0.8155900239944458, "learning_rate": 5.182223278077363e-06, "loss": 0.6816, "step": 9802 }, { "epoch": 0.5038030630075033, "grad_norm": 1.1803193092346191, "learning_rate": 5.1813915743784384e-06, "loss": 0.7111, "step": 9803 }, { "epoch": 0.503854455750848, "grad_norm": 1.0620794296264648, "learning_rate": 5.180559865653901e-06, "loss": 0.7454, "step": 9804 }, { "epoch": 0.5039058484941926, "grad_norm": 1.0659598112106323, "learning_rate": 5.179728151926795e-06, "loss": 0.7483, "step": 9805 }, { "epoch": 0.5039572412375373, "grad_norm": 1.0828704833984375, "learning_rate": 5.178896433220164e-06, "loss": 0.7106, "step": 9806 }, { "epoch": 0.5040086339808819, "grad_norm": 1.0648293495178223, "learning_rate": 5.178064709557051e-06, "loss": 0.7219, "step": 9807 }, { "epoch": 0.5040600267242266, "grad_norm": 1.0650081634521484, "learning_rate": 5.177232980960499e-06, "loss": 0.7155, "step": 9808 }, { "epoch": 0.5041114194675712, "grad_norm": 0.7046471834182739, "learning_rate": 5.176401247453553e-06, "loss": 0.627, "step": 9809 }, { "epoch": 0.5041628122109159, "grad_norm": 1.0383120775222778, "learning_rate": 5.175569509059256e-06, "loss": 0.7392, "step": 9810 }, { "epoch": 0.5042142049542605, "grad_norm": 1.1036735773086548, "learning_rate": 5.174737765800652e-06, "loss": 0.7808, "step": 9811 }, { "epoch": 0.5042655976976052, "grad_norm": 1.081710696220398, "learning_rate": 5.1739060177007875e-06, "loss": 0.7693, "step": 9812 }, { "epoch": 0.5043169904409497, "grad_norm": 1.0076935291290283, "learning_rate": 5.173074264782703e-06, "loss": 0.7361, "step": 9813 }, { "epoch": 0.5043683831842943, "grad_norm": 1.17203950881958, "learning_rate": 5.172242507069446e-06, "loss": 0.7011, "step": 9814 }, { "epoch": 0.504419775927639, "grad_norm": 1.0138545036315918, "learning_rate": 5.171410744584059e-06, "loss": 0.7181, "step": 9815 }, { "epoch": 0.5044711686709836, "grad_norm": 1.0348193645477295, "learning_rate": 5.170578977349589e-06, "loss": 0.6914, "step": 9816 }, { "epoch": 0.5045225614143283, "grad_norm": 0.6981536746025085, "learning_rate": 5.1697472053890785e-06, "loss": 0.6297, "step": 9817 }, { "epoch": 0.5045739541576729, "grad_norm": 0.7072228193283081, "learning_rate": 5.168915428725574e-06, "loss": 0.6388, "step": 9818 }, { "epoch": 0.5046253469010176, "grad_norm": 1.1172759532928467, "learning_rate": 5.16808364738212e-06, "loss": 0.7241, "step": 9819 }, { "epoch": 0.5046767396443622, "grad_norm": 0.7374457716941833, "learning_rate": 5.1672518613817605e-06, "loss": 0.6859, "step": 9820 }, { "epoch": 0.5047281323877069, "grad_norm": 0.9920042157173157, "learning_rate": 5.166420070747543e-06, "loss": 0.708, "step": 9821 }, { "epoch": 0.5047795251310515, "grad_norm": 0.7187145352363586, "learning_rate": 5.1655882755025125e-06, "loss": 0.7196, "step": 9822 }, { "epoch": 0.5048309178743962, "grad_norm": 0.810431182384491, "learning_rate": 5.164756475669713e-06, "loss": 0.6609, "step": 9823 }, { "epoch": 0.5048823106177408, "grad_norm": 1.058018684387207, "learning_rate": 5.163924671272192e-06, "loss": 0.7585, "step": 9824 }, { "epoch": 0.5049337033610855, "grad_norm": 1.0703145265579224, "learning_rate": 5.163092862332997e-06, "loss": 0.6841, "step": 9825 }, { "epoch": 0.5049850961044301, "grad_norm": 1.0309933423995972, "learning_rate": 5.162261048875169e-06, "loss": 0.696, "step": 9826 }, { "epoch": 0.5050364888477747, "grad_norm": 1.0483273267745972, "learning_rate": 5.161429230921759e-06, "loss": 0.7515, "step": 9827 }, { "epoch": 0.5050878815911193, "grad_norm": 0.8678324222564697, "learning_rate": 5.16059740849581e-06, "loss": 0.6417, "step": 9828 }, { "epoch": 0.5051392743344639, "grad_norm": 1.1109814643859863, "learning_rate": 5.159765581620369e-06, "loss": 0.8359, "step": 9829 }, { "epoch": 0.5051906670778086, "grad_norm": 1.0828970670700073, "learning_rate": 5.158933750318484e-06, "loss": 0.7397, "step": 9830 }, { "epoch": 0.5052420598211532, "grad_norm": 1.1663058996200562, "learning_rate": 5.158101914613201e-06, "loss": 0.7285, "step": 9831 }, { "epoch": 0.5052934525644979, "grad_norm": 1.0458813905715942, "learning_rate": 5.157270074527565e-06, "loss": 0.6395, "step": 9832 }, { "epoch": 0.5053448453078425, "grad_norm": 1.175789713859558, "learning_rate": 5.156438230084625e-06, "loss": 0.7267, "step": 9833 }, { "epoch": 0.5053962380511872, "grad_norm": 1.0505309104919434, "learning_rate": 5.155606381307427e-06, "loss": 0.7154, "step": 9834 }, { "epoch": 0.5054476307945318, "grad_norm": 1.005852460861206, "learning_rate": 5.154774528219019e-06, "loss": 0.706, "step": 9835 }, { "epoch": 0.5054990235378765, "grad_norm": 0.7402378916740417, "learning_rate": 5.153942670842448e-06, "loss": 0.6656, "step": 9836 }, { "epoch": 0.5055504162812211, "grad_norm": 1.0661238431930542, "learning_rate": 5.153110809200759e-06, "loss": 0.6759, "step": 9837 }, { "epoch": 0.5056018090245658, "grad_norm": 0.9975918531417847, "learning_rate": 5.152278943317003e-06, "loss": 0.7051, "step": 9838 }, { "epoch": 0.5056532017679104, "grad_norm": 1.0874539613723755, "learning_rate": 5.151447073214224e-06, "loss": 0.7548, "step": 9839 }, { "epoch": 0.505704594511255, "grad_norm": 1.1117216348648071, "learning_rate": 5.150615198915474e-06, "loss": 0.7214, "step": 9840 }, { "epoch": 0.5057559872545997, "grad_norm": 1.1202377080917358, "learning_rate": 5.149783320443796e-06, "loss": 0.742, "step": 9841 }, { "epoch": 0.5058073799979443, "grad_norm": 1.064525842666626, "learning_rate": 5.148951437822241e-06, "loss": 0.6909, "step": 9842 }, { "epoch": 0.5058587727412889, "grad_norm": 1.0754978656768799, "learning_rate": 5.148119551073858e-06, "loss": 0.765, "step": 9843 }, { "epoch": 0.5059101654846335, "grad_norm": 1.0283032655715942, "learning_rate": 5.147287660221693e-06, "loss": 0.7053, "step": 9844 }, { "epoch": 0.5059615582279782, "grad_norm": 1.0967696905136108, "learning_rate": 5.1464557652887935e-06, "loss": 0.7832, "step": 9845 }, { "epoch": 0.5060129509713228, "grad_norm": 0.7381188869476318, "learning_rate": 5.145623866298211e-06, "loss": 0.6268, "step": 9846 }, { "epoch": 0.5060643437146675, "grad_norm": 1.0572466850280762, "learning_rate": 5.1447919632729894e-06, "loss": 0.7453, "step": 9847 }, { "epoch": 0.5061157364580121, "grad_norm": 0.7554457783699036, "learning_rate": 5.143960056236183e-06, "loss": 0.6579, "step": 9848 }, { "epoch": 0.5061671292013568, "grad_norm": 1.0734033584594727, "learning_rate": 5.1431281452108365e-06, "loss": 0.7078, "step": 9849 }, { "epoch": 0.5062185219447014, "grad_norm": 1.057795524597168, "learning_rate": 5.142296230219997e-06, "loss": 0.7036, "step": 9850 }, { "epoch": 0.506269914688046, "grad_norm": 1.1338948011398315, "learning_rate": 5.14146431128672e-06, "loss": 0.7102, "step": 9851 }, { "epoch": 0.5063213074313907, "grad_norm": 1.0521376132965088, "learning_rate": 5.140632388434048e-06, "loss": 0.7244, "step": 9852 }, { "epoch": 0.5063727001747353, "grad_norm": 0.8181077837944031, "learning_rate": 5.139800461685034e-06, "loss": 0.6847, "step": 9853 }, { "epoch": 0.50642409291808, "grad_norm": 1.0888718366622925, "learning_rate": 5.138968531062727e-06, "loss": 0.7596, "step": 9854 }, { "epoch": 0.5064754856614246, "grad_norm": 1.0510457754135132, "learning_rate": 5.138136596590174e-06, "loss": 0.7058, "step": 9855 }, { "epoch": 0.5065268784047693, "grad_norm": 1.0899425745010376, "learning_rate": 5.137304658290427e-06, "loss": 0.7139, "step": 9856 }, { "epoch": 0.5065782711481139, "grad_norm": 1.230408787727356, "learning_rate": 5.136472716186535e-06, "loss": 0.7262, "step": 9857 }, { "epoch": 0.5066296638914586, "grad_norm": 1.0192619562149048, "learning_rate": 5.135640770301545e-06, "loss": 0.7649, "step": 9858 }, { "epoch": 0.5066810566348031, "grad_norm": 0.8518606424331665, "learning_rate": 5.134808820658511e-06, "loss": 0.6881, "step": 9859 }, { "epoch": 0.5067324493781478, "grad_norm": 0.7164705395698547, "learning_rate": 5.133976867280481e-06, "loss": 0.6915, "step": 9860 }, { "epoch": 0.5067838421214924, "grad_norm": 1.0537047386169434, "learning_rate": 5.133144910190504e-06, "loss": 0.7172, "step": 9861 }, { "epoch": 0.5068352348648371, "grad_norm": 1.061172604560852, "learning_rate": 5.132312949411631e-06, "loss": 0.7249, "step": 9862 }, { "epoch": 0.5068866276081817, "grad_norm": 1.153988242149353, "learning_rate": 5.131480984966911e-06, "loss": 0.7244, "step": 9863 }, { "epoch": 0.5069380203515264, "grad_norm": 1.0733693838119507, "learning_rate": 5.130649016879398e-06, "loss": 0.7108, "step": 9864 }, { "epoch": 0.506989413094871, "grad_norm": 2.552502155303955, "learning_rate": 5.1298170451721394e-06, "loss": 0.7569, "step": 9865 }, { "epoch": 0.5070408058382156, "grad_norm": 1.0848277807235718, "learning_rate": 5.128985069868185e-06, "loss": 0.7734, "step": 9866 }, { "epoch": 0.5070921985815603, "grad_norm": 1.121888518333435, "learning_rate": 5.128153090990587e-06, "loss": 0.6832, "step": 9867 }, { "epoch": 0.5071435913249049, "grad_norm": 1.10399329662323, "learning_rate": 5.127321108562398e-06, "loss": 0.7478, "step": 9868 }, { "epoch": 0.5071949840682496, "grad_norm": 1.0932260751724243, "learning_rate": 5.126489122606663e-06, "loss": 0.7018, "step": 9869 }, { "epoch": 0.5072463768115942, "grad_norm": 1.073297142982483, "learning_rate": 5.125657133146439e-06, "loss": 0.7086, "step": 9870 }, { "epoch": 0.5072977695549389, "grad_norm": 1.020331621170044, "learning_rate": 5.1248251402047725e-06, "loss": 0.7026, "step": 9871 }, { "epoch": 0.5073491622982835, "grad_norm": 1.0914050340652466, "learning_rate": 5.1239931438047175e-06, "loss": 0.728, "step": 9872 }, { "epoch": 0.5074005550416282, "grad_norm": 1.0729299783706665, "learning_rate": 5.123161143969323e-06, "loss": 0.7288, "step": 9873 }, { "epoch": 0.5074519477849727, "grad_norm": 1.0422276258468628, "learning_rate": 5.122329140721641e-06, "loss": 0.7662, "step": 9874 }, { "epoch": 0.5075033405283174, "grad_norm": 1.1172897815704346, "learning_rate": 5.121497134084726e-06, "loss": 0.7177, "step": 9875 }, { "epoch": 0.507554733271662, "grad_norm": 1.034440279006958, "learning_rate": 5.120665124081626e-06, "loss": 0.739, "step": 9876 }, { "epoch": 0.5076061260150067, "grad_norm": 1.0757503509521484, "learning_rate": 5.119833110735393e-06, "loss": 0.731, "step": 9877 }, { "epoch": 0.5076575187583513, "grad_norm": 1.1239045858383179, "learning_rate": 5.1190010940690785e-06, "loss": 0.7504, "step": 9878 }, { "epoch": 0.507708911501696, "grad_norm": 1.0658067464828491, "learning_rate": 5.118169074105734e-06, "loss": 0.7231, "step": 9879 }, { "epoch": 0.5077603042450406, "grad_norm": 0.7545881867408752, "learning_rate": 5.117337050868415e-06, "loss": 0.6886, "step": 9880 }, { "epoch": 0.5078116969883852, "grad_norm": 1.0129677057266235, "learning_rate": 5.116505024380168e-06, "loss": 0.6922, "step": 9881 }, { "epoch": 0.5078630897317299, "grad_norm": 1.0667717456817627, "learning_rate": 5.1156729946640485e-06, "loss": 0.7622, "step": 9882 }, { "epoch": 0.5079144824750745, "grad_norm": 1.1132174730300903, "learning_rate": 5.1148409617431075e-06, "loss": 0.7872, "step": 9883 }, { "epoch": 0.5079658752184192, "grad_norm": 1.0740832090377808, "learning_rate": 5.114008925640398e-06, "loss": 0.7618, "step": 9884 }, { "epoch": 0.5080172679617638, "grad_norm": 1.092882513999939, "learning_rate": 5.11317688637897e-06, "loss": 0.6922, "step": 9885 }, { "epoch": 0.5080686607051085, "grad_norm": 1.0890393257141113, "learning_rate": 5.112344843981879e-06, "loss": 0.7061, "step": 9886 }, { "epoch": 0.5081200534484531, "grad_norm": 1.139702558517456, "learning_rate": 5.111512798472176e-06, "loss": 0.8031, "step": 9887 }, { "epoch": 0.5081714461917978, "grad_norm": 1.0248024463653564, "learning_rate": 5.110680749872913e-06, "loss": 0.7195, "step": 9888 }, { "epoch": 0.5082228389351423, "grad_norm": 0.7777139544487, "learning_rate": 5.109848698207144e-06, "loss": 0.6722, "step": 9889 }, { "epoch": 0.508274231678487, "grad_norm": 1.1264641284942627, "learning_rate": 5.109016643497919e-06, "loss": 0.7731, "step": 9890 }, { "epoch": 0.5083256244218316, "grad_norm": 0.7348080277442932, "learning_rate": 5.108184585768294e-06, "loss": 0.6771, "step": 9891 }, { "epoch": 0.5083770171651762, "grad_norm": 1.0893429517745972, "learning_rate": 5.10735252504132e-06, "loss": 0.6846, "step": 9892 }, { "epoch": 0.5084284099085209, "grad_norm": 1.1225247383117676, "learning_rate": 5.106520461340051e-06, "loss": 0.7067, "step": 9893 }, { "epoch": 0.5084798026518655, "grad_norm": 1.0337660312652588, "learning_rate": 5.10568839468754e-06, "loss": 0.7696, "step": 9894 }, { "epoch": 0.5085311953952102, "grad_norm": 1.0286237001419067, "learning_rate": 5.1048563251068395e-06, "loss": 0.6988, "step": 9895 }, { "epoch": 0.5085825881385548, "grad_norm": 1.1379164457321167, "learning_rate": 5.104024252621002e-06, "loss": 0.7518, "step": 9896 }, { "epoch": 0.5086339808818995, "grad_norm": 1.0808881521224976, "learning_rate": 5.103192177253084e-06, "loss": 0.6951, "step": 9897 }, { "epoch": 0.5086853736252441, "grad_norm": 1.0295133590698242, "learning_rate": 5.102360099026134e-06, "loss": 0.7057, "step": 9898 }, { "epoch": 0.5087367663685888, "grad_norm": 1.051543116569519, "learning_rate": 5.10152801796321e-06, "loss": 0.7242, "step": 9899 }, { "epoch": 0.5087881591119334, "grad_norm": 1.0434367656707764, "learning_rate": 5.100695934087363e-06, "loss": 0.7192, "step": 9900 }, { "epoch": 0.5088395518552781, "grad_norm": 1.0412479639053345, "learning_rate": 5.099863847421647e-06, "loss": 0.7107, "step": 9901 }, { "epoch": 0.5088909445986227, "grad_norm": 1.094910979270935, "learning_rate": 5.0990317579891165e-06, "loss": 0.7107, "step": 9902 }, { "epoch": 0.5089423373419674, "grad_norm": 1.0198018550872803, "learning_rate": 5.098199665812823e-06, "loss": 0.7135, "step": 9903 }, { "epoch": 0.5089937300853119, "grad_norm": 0.7979608178138733, "learning_rate": 5.0973675709158234e-06, "loss": 0.6523, "step": 9904 }, { "epoch": 0.5090451228286565, "grad_norm": 0.7448724508285522, "learning_rate": 5.09653547332117e-06, "loss": 0.6832, "step": 9905 }, { "epoch": 0.5090965155720012, "grad_norm": 1.1511354446411133, "learning_rate": 5.095703373051917e-06, "loss": 0.7184, "step": 9906 }, { "epoch": 0.5091479083153458, "grad_norm": 0.7776548266410828, "learning_rate": 5.0948712701311185e-06, "loss": 0.6957, "step": 9907 }, { "epoch": 0.5091993010586905, "grad_norm": 1.082369327545166, "learning_rate": 5.094039164581828e-06, "loss": 0.682, "step": 9908 }, { "epoch": 0.5092506938020351, "grad_norm": 1.1514898538589478, "learning_rate": 5.093207056427101e-06, "loss": 0.816, "step": 9909 }, { "epoch": 0.5093020865453798, "grad_norm": 0.984919011592865, "learning_rate": 5.092374945689992e-06, "loss": 0.6843, "step": 9910 }, { "epoch": 0.5093534792887244, "grad_norm": 1.0366929769515991, "learning_rate": 5.0915428323935525e-06, "loss": 0.7004, "step": 9911 }, { "epoch": 0.5094048720320691, "grad_norm": 1.102841854095459, "learning_rate": 5.0907107165608406e-06, "loss": 0.7391, "step": 9912 }, { "epoch": 0.5094562647754137, "grad_norm": 1.0569156408309937, "learning_rate": 5.089878598214908e-06, "loss": 0.6792, "step": 9913 }, { "epoch": 0.5095076575187584, "grad_norm": 1.0401074886322021, "learning_rate": 5.08904647737881e-06, "loss": 0.7325, "step": 9914 }, { "epoch": 0.509559050262103, "grad_norm": 1.025696873664856, "learning_rate": 5.088214354075603e-06, "loss": 0.7125, "step": 9915 }, { "epoch": 0.5096104430054477, "grad_norm": 0.809995174407959, "learning_rate": 5.087382228328338e-06, "loss": 0.6852, "step": 9916 }, { "epoch": 0.5096618357487923, "grad_norm": 1.0682092905044556, "learning_rate": 5.086550100160074e-06, "loss": 0.7195, "step": 9917 }, { "epoch": 0.509713228492137, "grad_norm": 1.1462408304214478, "learning_rate": 5.0857179695938655e-06, "loss": 0.7639, "step": 9918 }, { "epoch": 0.5097646212354815, "grad_norm": 0.9014225602149963, "learning_rate": 5.084885836652762e-06, "loss": 0.7075, "step": 9919 }, { "epoch": 0.5098160139788261, "grad_norm": 0.7621055841445923, "learning_rate": 5.0840537013598245e-06, "loss": 0.7053, "step": 9920 }, { "epoch": 0.5098674067221708, "grad_norm": 1.0539437532424927, "learning_rate": 5.0832215637381065e-06, "loss": 0.7304, "step": 9921 }, { "epoch": 0.5099187994655154, "grad_norm": 0.6832962036132812, "learning_rate": 5.082389423810661e-06, "loss": 0.6825, "step": 9922 }, { "epoch": 0.5099701922088601, "grad_norm": 1.0831444263458252, "learning_rate": 5.081557281600546e-06, "loss": 0.7533, "step": 9923 }, { "epoch": 0.5100215849522047, "grad_norm": 0.8382664322853088, "learning_rate": 5.080725137130813e-06, "loss": 0.6879, "step": 9924 }, { "epoch": 0.5100729776955494, "grad_norm": 1.068884015083313, "learning_rate": 5.07989299042452e-06, "loss": 0.7419, "step": 9925 }, { "epoch": 0.510124370438894, "grad_norm": 0.7375056743621826, "learning_rate": 5.079060841504722e-06, "loss": 0.6332, "step": 9926 }, { "epoch": 0.5101757631822387, "grad_norm": 1.2274845838546753, "learning_rate": 5.0782286903944756e-06, "loss": 0.6946, "step": 9927 }, { "epoch": 0.5102271559255833, "grad_norm": 1.1557543277740479, "learning_rate": 5.077396537116834e-06, "loss": 0.7152, "step": 9928 }, { "epoch": 0.510278548668928, "grad_norm": 1.0374456644058228, "learning_rate": 5.076564381694855e-06, "loss": 0.7224, "step": 9929 }, { "epoch": 0.5103299414122726, "grad_norm": 1.1077392101287842, "learning_rate": 5.075732224151591e-06, "loss": 0.7681, "step": 9930 }, { "epoch": 0.5103813341556173, "grad_norm": 1.0155898332595825, "learning_rate": 5.0749000645101024e-06, "loss": 0.6725, "step": 9931 }, { "epoch": 0.5104327268989619, "grad_norm": 1.1397408246994019, "learning_rate": 5.0740679027934396e-06, "loss": 0.7295, "step": 9932 }, { "epoch": 0.5104841196423066, "grad_norm": 1.0489726066589355, "learning_rate": 5.073235739024662e-06, "loss": 0.7521, "step": 9933 }, { "epoch": 0.5105355123856511, "grad_norm": 1.1073660850524902, "learning_rate": 5.072403573226824e-06, "loss": 0.7808, "step": 9934 }, { "epoch": 0.5105869051289957, "grad_norm": 0.9545060396194458, "learning_rate": 5.0715714054229805e-06, "loss": 0.6692, "step": 9935 }, { "epoch": 0.5106382978723404, "grad_norm": 1.1086797714233398, "learning_rate": 5.070739235636191e-06, "loss": 0.7343, "step": 9936 }, { "epoch": 0.510689690615685, "grad_norm": 1.048006534576416, "learning_rate": 5.0699070638895085e-06, "loss": 0.704, "step": 9937 }, { "epoch": 0.5107410833590297, "grad_norm": 1.081774115562439, "learning_rate": 5.069074890205988e-06, "loss": 0.7776, "step": 9938 }, { "epoch": 0.5107924761023743, "grad_norm": 1.0957489013671875, "learning_rate": 5.0682427146086895e-06, "loss": 0.705, "step": 9939 }, { "epoch": 0.510843868845719, "grad_norm": 1.0766584873199463, "learning_rate": 5.067410537120666e-06, "loss": 0.6759, "step": 9940 }, { "epoch": 0.5108952615890636, "grad_norm": 0.7760927677154541, "learning_rate": 5.066578357764977e-06, "loss": 0.6445, "step": 9941 }, { "epoch": 0.5109466543324083, "grad_norm": 1.0265936851501465, "learning_rate": 5.065746176564674e-06, "loss": 0.6772, "step": 9942 }, { "epoch": 0.5109980470757529, "grad_norm": 1.101129174232483, "learning_rate": 5.064913993542816e-06, "loss": 0.6589, "step": 9943 }, { "epoch": 0.5110494398190976, "grad_norm": 1.0290597677230835, "learning_rate": 5.0640818087224585e-06, "loss": 0.7251, "step": 9944 }, { "epoch": 0.5111008325624422, "grad_norm": 1.0331300497055054, "learning_rate": 5.063249622126659e-06, "loss": 0.7202, "step": 9945 }, { "epoch": 0.5111522253057869, "grad_norm": 1.0417733192443848, "learning_rate": 5.062417433778474e-06, "loss": 0.7423, "step": 9946 }, { "epoch": 0.5112036180491315, "grad_norm": 1.0495240688323975, "learning_rate": 5.0615852437009595e-06, "loss": 0.7096, "step": 9947 }, { "epoch": 0.5112550107924761, "grad_norm": 0.7170805931091309, "learning_rate": 5.060753051917171e-06, "loss": 0.6781, "step": 9948 }, { "epoch": 0.5113064035358208, "grad_norm": 1.0996216535568237, "learning_rate": 5.059920858450168e-06, "loss": 0.7297, "step": 9949 }, { "epoch": 0.5113577962791653, "grad_norm": 0.9236396551132202, "learning_rate": 5.0590886633230055e-06, "loss": 0.7292, "step": 9950 }, { "epoch": 0.51140918902251, "grad_norm": 1.0355949401855469, "learning_rate": 5.058256466558737e-06, "loss": 0.6779, "step": 9951 }, { "epoch": 0.5114605817658546, "grad_norm": 0.7145227789878845, "learning_rate": 5.057424268180425e-06, "loss": 0.6418, "step": 9952 }, { "epoch": 0.5115119745091993, "grad_norm": 0.7311011552810669, "learning_rate": 5.056592068211123e-06, "loss": 0.6776, "step": 9953 }, { "epoch": 0.5115633672525439, "grad_norm": 1.0962895154953003, "learning_rate": 5.055759866673887e-06, "loss": 0.78, "step": 9954 }, { "epoch": 0.5116147599958886, "grad_norm": 1.0451743602752686, "learning_rate": 5.054927663591777e-06, "loss": 0.7727, "step": 9955 }, { "epoch": 0.5116661527392332, "grad_norm": 1.0709999799728394, "learning_rate": 5.054095458987845e-06, "loss": 0.733, "step": 9956 }, { "epoch": 0.5117175454825779, "grad_norm": 1.0972120761871338, "learning_rate": 5.053263252885154e-06, "loss": 0.7979, "step": 9957 }, { "epoch": 0.5117689382259225, "grad_norm": 0.9974589943885803, "learning_rate": 5.052431045306758e-06, "loss": 0.6952, "step": 9958 }, { "epoch": 0.5118203309692672, "grad_norm": 1.06667959690094, "learning_rate": 5.051598836275713e-06, "loss": 0.7011, "step": 9959 }, { "epoch": 0.5118717237126118, "grad_norm": 1.2247523069381714, "learning_rate": 5.050766625815078e-06, "loss": 0.6712, "step": 9960 }, { "epoch": 0.5119231164559565, "grad_norm": 1.089448094367981, "learning_rate": 5.04993441394791e-06, "loss": 0.7714, "step": 9961 }, { "epoch": 0.5119745091993011, "grad_norm": 1.1249605417251587, "learning_rate": 5.049102200697263e-06, "loss": 0.7324, "step": 9962 }, { "epoch": 0.5120259019426457, "grad_norm": 1.035601019859314, "learning_rate": 5.0482699860862e-06, "loss": 0.7162, "step": 9963 }, { "epoch": 0.5120772946859904, "grad_norm": 1.060017466545105, "learning_rate": 5.047437770137772e-06, "loss": 0.6778, "step": 9964 }, { "epoch": 0.5121286874293349, "grad_norm": 1.140423059463501, "learning_rate": 5.046605552875039e-06, "loss": 0.7114, "step": 9965 }, { "epoch": 0.5121800801726796, "grad_norm": 1.0284943580627441, "learning_rate": 5.04577333432106e-06, "loss": 0.7291, "step": 9966 }, { "epoch": 0.5122314729160242, "grad_norm": 1.0369336605072021, "learning_rate": 5.04494111449889e-06, "loss": 0.7148, "step": 9967 }, { "epoch": 0.5122828656593689, "grad_norm": 0.7402137517929077, "learning_rate": 5.0441088934315875e-06, "loss": 0.687, "step": 9968 }, { "epoch": 0.5123342584027135, "grad_norm": 0.7451249361038208, "learning_rate": 5.0432766711422095e-06, "loss": 0.6985, "step": 9969 }, { "epoch": 0.5123856511460582, "grad_norm": 1.0351440906524658, "learning_rate": 5.042444447653814e-06, "loss": 0.709, "step": 9970 }, { "epoch": 0.5124370438894028, "grad_norm": 1.1339695453643799, "learning_rate": 5.041612222989458e-06, "loss": 0.754, "step": 9971 }, { "epoch": 0.5124884366327475, "grad_norm": 1.0996034145355225, "learning_rate": 5.040779997172198e-06, "loss": 0.7807, "step": 9972 }, { "epoch": 0.5125398293760921, "grad_norm": 1.024046540260315, "learning_rate": 5.039947770225094e-06, "loss": 0.7229, "step": 9973 }, { "epoch": 0.5125912221194368, "grad_norm": 1.1051123142242432, "learning_rate": 5.039115542171201e-06, "loss": 0.7109, "step": 9974 }, { "epoch": 0.5126426148627814, "grad_norm": 1.1773992776870728, "learning_rate": 5.0382833130335785e-06, "loss": 0.7709, "step": 9975 }, { "epoch": 0.512694007606126, "grad_norm": 1.102802038192749, "learning_rate": 5.037451082835282e-06, "loss": 0.7984, "step": 9976 }, { "epoch": 0.5127454003494707, "grad_norm": 1.1007990837097168, "learning_rate": 5.036618851599372e-06, "loss": 0.7723, "step": 9977 }, { "epoch": 0.5127967930928153, "grad_norm": 1.059372901916504, "learning_rate": 5.035786619348904e-06, "loss": 0.724, "step": 9978 }, { "epoch": 0.51284818583616, "grad_norm": 0.7930027842521667, "learning_rate": 5.0349543861069375e-06, "loss": 0.659, "step": 9979 }, { "epoch": 0.5128995785795045, "grad_norm": 1.0394139289855957, "learning_rate": 5.034122151896528e-06, "loss": 0.7419, "step": 9980 }, { "epoch": 0.5129509713228492, "grad_norm": 1.0532726049423218, "learning_rate": 5.0332899167407345e-06, "loss": 0.7668, "step": 9981 }, { "epoch": 0.5130023640661938, "grad_norm": 1.030001163482666, "learning_rate": 5.032457680662617e-06, "loss": 0.7421, "step": 9982 }, { "epoch": 0.5130537568095385, "grad_norm": 1.0722559690475464, "learning_rate": 5.031625443685229e-06, "loss": 0.7684, "step": 9983 }, { "epoch": 0.5131051495528831, "grad_norm": 1.0781745910644531, "learning_rate": 5.03079320583163e-06, "loss": 0.6827, "step": 9984 }, { "epoch": 0.5131565422962278, "grad_norm": 0.8634644150733948, "learning_rate": 5.0299609671248794e-06, "loss": 0.6325, "step": 9985 }, { "epoch": 0.5132079350395724, "grad_norm": 1.0503097772598267, "learning_rate": 5.029128727588033e-06, "loss": 0.7377, "step": 9986 }, { "epoch": 0.513259327782917, "grad_norm": 1.04427969455719, "learning_rate": 5.028296487244151e-06, "loss": 0.7441, "step": 9987 }, { "epoch": 0.5133107205262617, "grad_norm": 1.1315069198608398, "learning_rate": 5.027464246116289e-06, "loss": 0.7332, "step": 9988 }, { "epoch": 0.5133621132696063, "grad_norm": 1.2521798610687256, "learning_rate": 5.026632004227507e-06, "loss": 0.6841, "step": 9989 }, { "epoch": 0.513413506012951, "grad_norm": 1.0465410947799683, "learning_rate": 5.025799761600863e-06, "loss": 0.6803, "step": 9990 }, { "epoch": 0.5134648987562956, "grad_norm": 1.0811731815338135, "learning_rate": 5.024967518259412e-06, "loss": 0.7408, "step": 9991 }, { "epoch": 0.5135162914996403, "grad_norm": 1.020810604095459, "learning_rate": 5.024135274226215e-06, "loss": 0.7477, "step": 9992 }, { "epoch": 0.5135676842429849, "grad_norm": 1.0928665399551392, "learning_rate": 5.02330302952433e-06, "loss": 0.7356, "step": 9993 }, { "epoch": 0.5136190769863296, "grad_norm": 1.1087265014648438, "learning_rate": 5.022470784176813e-06, "loss": 0.6909, "step": 9994 }, { "epoch": 0.5136704697296741, "grad_norm": 1.0444000959396362, "learning_rate": 5.021638538206722e-06, "loss": 0.7003, "step": 9995 }, { "epoch": 0.5137218624730188, "grad_norm": 0.978169322013855, "learning_rate": 5.020806291637119e-06, "loss": 0.6627, "step": 9996 }, { "epoch": 0.5137732552163634, "grad_norm": 1.0952918529510498, "learning_rate": 5.0199740444910585e-06, "loss": 0.714, "step": 9997 }, { "epoch": 0.5138246479597081, "grad_norm": 1.0135573148727417, "learning_rate": 5.0191417967916e-06, "loss": 0.7325, "step": 9998 }, { "epoch": 0.5138760407030527, "grad_norm": 1.144079327583313, "learning_rate": 5.018309548561801e-06, "loss": 0.716, "step": 9999 }, { "epoch": 0.5139274334463974, "grad_norm": 1.0562576055526733, "learning_rate": 5.01747729982472e-06, "loss": 0.7315, "step": 10000 }, { "epoch": 0.513978826189742, "grad_norm": 1.0727083683013916, "learning_rate": 5.016645050603416e-06, "loss": 0.6809, "step": 10001 }, { "epoch": 0.5140302189330866, "grad_norm": 0.7119978070259094, "learning_rate": 5.015812800920945e-06, "loss": 0.6537, "step": 10002 }, { "epoch": 0.5140816116764313, "grad_norm": 0.7321502566337585, "learning_rate": 5.014980550800368e-06, "loss": 0.6737, "step": 10003 }, { "epoch": 0.5141330044197759, "grad_norm": 1.125680923461914, "learning_rate": 5.01414830026474e-06, "loss": 0.7386, "step": 10004 }, { "epoch": 0.5141843971631206, "grad_norm": 1.0520412921905518, "learning_rate": 5.0133160493371225e-06, "loss": 0.7153, "step": 10005 }, { "epoch": 0.5142357899064652, "grad_norm": 1.0806152820587158, "learning_rate": 5.01248379804057e-06, "loss": 0.7748, "step": 10006 }, { "epoch": 0.5142871826498099, "grad_norm": 0.7093769907951355, "learning_rate": 5.0116515463981445e-06, "loss": 0.644, "step": 10007 }, { "epoch": 0.5143385753931545, "grad_norm": 1.1175665855407715, "learning_rate": 5.010819294432903e-06, "loss": 0.7956, "step": 10008 }, { "epoch": 0.5143899681364992, "grad_norm": 1.0346875190734863, "learning_rate": 5.0099870421679045e-06, "loss": 0.8035, "step": 10009 }, { "epoch": 0.5144413608798437, "grad_norm": 1.0701708793640137, "learning_rate": 5.0091547896262035e-06, "loss": 0.781, "step": 10010 }, { "epoch": 0.5144927536231884, "grad_norm": 1.1261237859725952, "learning_rate": 5.008322536830863e-06, "loss": 0.7357, "step": 10011 }, { "epoch": 0.514544146366533, "grad_norm": 1.097928524017334, "learning_rate": 5.007490283804939e-06, "loss": 0.7564, "step": 10012 }, { "epoch": 0.5145955391098777, "grad_norm": 1.0451159477233887, "learning_rate": 5.006658030571489e-06, "loss": 0.6905, "step": 10013 }, { "epoch": 0.5146469318532223, "grad_norm": 1.0826547145843506, "learning_rate": 5.005825777153576e-06, "loss": 0.7205, "step": 10014 }, { "epoch": 0.514698324596567, "grad_norm": 0.7968506813049316, "learning_rate": 5.004993523574251e-06, "loss": 0.6967, "step": 10015 }, { "epoch": 0.5147497173399116, "grad_norm": 1.021269679069519, "learning_rate": 5.004161269856577e-06, "loss": 0.7051, "step": 10016 }, { "epoch": 0.5148011100832562, "grad_norm": 1.056077480316162, "learning_rate": 5.003329016023611e-06, "loss": 0.6972, "step": 10017 }, { "epoch": 0.5148525028266009, "grad_norm": 0.735413670539856, "learning_rate": 5.002496762098412e-06, "loss": 0.6744, "step": 10018 }, { "epoch": 0.5149038955699455, "grad_norm": 0.754229724407196, "learning_rate": 5.00166450810404e-06, "loss": 0.6715, "step": 10019 }, { "epoch": 0.5149552883132902, "grad_norm": 1.1047674417495728, "learning_rate": 5.000832254063549e-06, "loss": 0.7997, "step": 10020 }, { "epoch": 0.5150066810566348, "grad_norm": 1.0972453355789185, "learning_rate": 5e-06, "loss": 0.7358, "step": 10021 }, { "epoch": 0.5150580737999795, "grad_norm": 1.1739020347595215, "learning_rate": 4.999167745936452e-06, "loss": 0.8159, "step": 10022 }, { "epoch": 0.5151094665433241, "grad_norm": 1.0756162405014038, "learning_rate": 4.998335491895963e-06, "loss": 0.7041, "step": 10023 }, { "epoch": 0.5151608592866688, "grad_norm": 1.083341360092163, "learning_rate": 4.9975032379015884e-06, "loss": 0.7095, "step": 10024 }, { "epoch": 0.5152122520300133, "grad_norm": 1.0261797904968262, "learning_rate": 4.9966709839763895e-06, "loss": 0.7285, "step": 10025 }, { "epoch": 0.515263644773358, "grad_norm": 1.1664340496063232, "learning_rate": 4.995838730143425e-06, "loss": 0.751, "step": 10026 }, { "epoch": 0.5153150375167026, "grad_norm": 1.1296825408935547, "learning_rate": 4.995006476425751e-06, "loss": 0.7619, "step": 10027 }, { "epoch": 0.5153664302600472, "grad_norm": 0.9979335069656372, "learning_rate": 4.994174222846426e-06, "loss": 0.7322, "step": 10028 }, { "epoch": 0.5154178230033919, "grad_norm": 0.8124058246612549, "learning_rate": 4.99334196942851e-06, "loss": 0.6606, "step": 10029 }, { "epoch": 0.5154692157467365, "grad_norm": 0.8053849339485168, "learning_rate": 4.992509716195063e-06, "loss": 0.6602, "step": 10030 }, { "epoch": 0.5155206084900812, "grad_norm": 1.1213752031326294, "learning_rate": 4.991677463169138e-06, "loss": 0.7262, "step": 10031 }, { "epoch": 0.5155720012334258, "grad_norm": 0.7765405774116516, "learning_rate": 4.9908452103737965e-06, "loss": 0.7036, "step": 10032 }, { "epoch": 0.5156233939767705, "grad_norm": 1.114177942276001, "learning_rate": 4.990012957832099e-06, "loss": 0.7338, "step": 10033 }, { "epoch": 0.5156747867201151, "grad_norm": 1.068792700767517, "learning_rate": 4.989180705567098e-06, "loss": 0.7376, "step": 10034 }, { "epoch": 0.5157261794634598, "grad_norm": 1.0835320949554443, "learning_rate": 4.988348453601856e-06, "loss": 0.7111, "step": 10035 }, { "epoch": 0.5157775722068044, "grad_norm": 1.068804144859314, "learning_rate": 4.987516201959431e-06, "loss": 0.6811, "step": 10036 }, { "epoch": 0.5158289649501491, "grad_norm": 1.0691921710968018, "learning_rate": 4.986683950662879e-06, "loss": 0.7853, "step": 10037 }, { "epoch": 0.5158803576934937, "grad_norm": 1.025018572807312, "learning_rate": 4.98585169973526e-06, "loss": 0.7909, "step": 10038 }, { "epoch": 0.5159317504368384, "grad_norm": 0.9912868738174438, "learning_rate": 4.985019449199635e-06, "loss": 0.6044, "step": 10039 }, { "epoch": 0.515983143180183, "grad_norm": 1.0241923332214355, "learning_rate": 4.9841871990790565e-06, "loss": 0.7355, "step": 10040 }, { "epoch": 0.5160345359235275, "grad_norm": 0.734035849571228, "learning_rate": 4.9833549493965854e-06, "loss": 0.6246, "step": 10041 }, { "epoch": 0.5160859286668722, "grad_norm": 1.035252332687378, "learning_rate": 4.982522700175282e-06, "loss": 0.7083, "step": 10042 }, { "epoch": 0.5161373214102168, "grad_norm": 1.0407353639602661, "learning_rate": 4.981690451438201e-06, "loss": 0.7477, "step": 10043 }, { "epoch": 0.5161887141535615, "grad_norm": 1.0906702280044556, "learning_rate": 4.980858203208402e-06, "loss": 0.7333, "step": 10044 }, { "epoch": 0.5162401068969061, "grad_norm": 0.9903335571289062, "learning_rate": 4.980025955508942e-06, "loss": 0.6734, "step": 10045 }, { "epoch": 0.5162914996402508, "grad_norm": 1.0677039623260498, "learning_rate": 4.979193708362882e-06, "loss": 0.6967, "step": 10046 }, { "epoch": 0.5163428923835954, "grad_norm": 1.1202876567840576, "learning_rate": 4.978361461793279e-06, "loss": 0.7315, "step": 10047 }, { "epoch": 0.5163942851269401, "grad_norm": 1.0525214672088623, "learning_rate": 4.977529215823189e-06, "loss": 0.7153, "step": 10048 }, { "epoch": 0.5164456778702847, "grad_norm": 1.147531509399414, "learning_rate": 4.9766969704756725e-06, "loss": 0.7864, "step": 10049 }, { "epoch": 0.5164970706136294, "grad_norm": 1.084831953048706, "learning_rate": 4.9758647257737865e-06, "loss": 0.7198, "step": 10050 }, { "epoch": 0.516548463356974, "grad_norm": 0.8449286818504333, "learning_rate": 4.975032481740589e-06, "loss": 0.7134, "step": 10051 }, { "epoch": 0.5165998561003187, "grad_norm": 1.09345543384552, "learning_rate": 4.97420023839914e-06, "loss": 0.7746, "step": 10052 }, { "epoch": 0.5166512488436633, "grad_norm": 1.1523234844207764, "learning_rate": 4.973367995772494e-06, "loss": 0.7652, "step": 10053 }, { "epoch": 0.516702641587008, "grad_norm": 1.1210484504699707, "learning_rate": 4.972535753883712e-06, "loss": 0.7441, "step": 10054 }, { "epoch": 0.5167540343303526, "grad_norm": 0.9923346042633057, "learning_rate": 4.971703512755852e-06, "loss": 0.6943, "step": 10055 }, { "epoch": 0.5168054270736971, "grad_norm": 1.0453273057937622, "learning_rate": 4.970871272411968e-06, "loss": 0.7679, "step": 10056 }, { "epoch": 0.5168568198170418, "grad_norm": 1.0931272506713867, "learning_rate": 4.970039032875122e-06, "loss": 0.71, "step": 10057 }, { "epoch": 0.5169082125603864, "grad_norm": 1.0573817491531372, "learning_rate": 4.969206794168372e-06, "loss": 0.6858, "step": 10058 }, { "epoch": 0.5169596053037311, "grad_norm": 1.043793797492981, "learning_rate": 4.968374556314774e-06, "loss": 0.7537, "step": 10059 }, { "epoch": 0.5170109980470757, "grad_norm": 1.0709426403045654, "learning_rate": 4.967542319337385e-06, "loss": 0.6552, "step": 10060 }, { "epoch": 0.5170623907904204, "grad_norm": 1.0774037837982178, "learning_rate": 4.966710083259265e-06, "loss": 0.758, "step": 10061 }, { "epoch": 0.517113783533765, "grad_norm": 1.1162967681884766, "learning_rate": 4.965877848103474e-06, "loss": 0.7456, "step": 10062 }, { "epoch": 0.5171651762771097, "grad_norm": 1.0241361856460571, "learning_rate": 4.965045613893064e-06, "loss": 0.654, "step": 10063 }, { "epoch": 0.5172165690204543, "grad_norm": 1.0501713752746582, "learning_rate": 4.964213380651096e-06, "loss": 0.6959, "step": 10064 }, { "epoch": 0.517267961763799, "grad_norm": 1.083525538444519, "learning_rate": 4.96338114840063e-06, "loss": 0.667, "step": 10065 }, { "epoch": 0.5173193545071436, "grad_norm": 1.0726380348205566, "learning_rate": 4.962548917164719e-06, "loss": 0.7419, "step": 10066 }, { "epoch": 0.5173707472504883, "grad_norm": 0.7506343722343445, "learning_rate": 4.961716686966423e-06, "loss": 0.6315, "step": 10067 }, { "epoch": 0.5174221399938329, "grad_norm": 0.7659317255020142, "learning_rate": 4.960884457828801e-06, "loss": 0.6765, "step": 10068 }, { "epoch": 0.5174735327371776, "grad_norm": 1.0174864530563354, "learning_rate": 4.960052229774908e-06, "loss": 0.6993, "step": 10069 }, { "epoch": 0.5175249254805222, "grad_norm": 1.0752745866775513, "learning_rate": 4.959220002827802e-06, "loss": 0.7202, "step": 10070 }, { "epoch": 0.5175763182238667, "grad_norm": 1.0877786874771118, "learning_rate": 4.9583877770105446e-06, "loss": 0.7045, "step": 10071 }, { "epoch": 0.5176277109672114, "grad_norm": 1.0791257619857788, "learning_rate": 4.957555552346188e-06, "loss": 0.7238, "step": 10072 }, { "epoch": 0.517679103710556, "grad_norm": 1.1036862134933472, "learning_rate": 4.956723328857791e-06, "loss": 0.7397, "step": 10073 }, { "epoch": 0.5177304964539007, "grad_norm": 1.1329838037490845, "learning_rate": 4.955891106568414e-06, "loss": 0.7505, "step": 10074 }, { "epoch": 0.5177818891972453, "grad_norm": 1.0794751644134521, "learning_rate": 4.9550588855011115e-06, "loss": 0.7816, "step": 10075 }, { "epoch": 0.51783328194059, "grad_norm": 1.0275871753692627, "learning_rate": 4.954226665678941e-06, "loss": 0.696, "step": 10076 }, { "epoch": 0.5178846746839346, "grad_norm": 1.032483696937561, "learning_rate": 4.953394447124961e-06, "loss": 0.7308, "step": 10077 }, { "epoch": 0.5179360674272793, "grad_norm": 1.056928277015686, "learning_rate": 4.95256222986223e-06, "loss": 0.6688, "step": 10078 }, { "epoch": 0.5179874601706239, "grad_norm": 1.0809650421142578, "learning_rate": 4.951730013913803e-06, "loss": 0.7119, "step": 10079 }, { "epoch": 0.5180388529139686, "grad_norm": 0.840225338935852, "learning_rate": 4.950897799302737e-06, "loss": 0.6552, "step": 10080 }, { "epoch": 0.5180902456573132, "grad_norm": 1.2608189582824707, "learning_rate": 4.950065586052093e-06, "loss": 0.7485, "step": 10081 }, { "epoch": 0.5181416384006579, "grad_norm": 1.1171305179595947, "learning_rate": 4.949233374184923e-06, "loss": 0.6957, "step": 10082 }, { "epoch": 0.5181930311440025, "grad_norm": 1.106289029121399, "learning_rate": 4.948401163724288e-06, "loss": 0.7655, "step": 10083 }, { "epoch": 0.5182444238873471, "grad_norm": 1.152886986732483, "learning_rate": 4.9475689546932445e-06, "loss": 0.7348, "step": 10084 }, { "epoch": 0.5182958166306918, "grad_norm": 1.0269783735275269, "learning_rate": 4.946736747114847e-06, "loss": 0.6873, "step": 10085 }, { "epoch": 0.5183472093740363, "grad_norm": 1.0755980014801025, "learning_rate": 4.945904541012155e-06, "loss": 0.7754, "step": 10086 }, { "epoch": 0.518398602117381, "grad_norm": 1.166402816772461, "learning_rate": 4.945072336408226e-06, "loss": 0.7383, "step": 10087 }, { "epoch": 0.5184499948607256, "grad_norm": 1.0920078754425049, "learning_rate": 4.9442401333261134e-06, "loss": 0.7521, "step": 10088 }, { "epoch": 0.5185013876040703, "grad_norm": 1.0589168071746826, "learning_rate": 4.943407931788878e-06, "loss": 0.7312, "step": 10089 }, { "epoch": 0.5185527803474149, "grad_norm": 1.1120007038116455, "learning_rate": 4.942575731819577e-06, "loss": 0.7695, "step": 10090 }, { "epoch": 0.5186041730907596, "grad_norm": 1.0632996559143066, "learning_rate": 4.941743533441264e-06, "loss": 0.7013, "step": 10091 }, { "epoch": 0.5186555658341042, "grad_norm": 1.064518928527832, "learning_rate": 4.940911336676996e-06, "loss": 0.6918, "step": 10092 }, { "epoch": 0.5187069585774489, "grad_norm": 0.9917042851448059, "learning_rate": 4.940079141549832e-06, "loss": 0.693, "step": 10093 }, { "epoch": 0.5187583513207935, "grad_norm": 1.0558648109436035, "learning_rate": 4.93924694808283e-06, "loss": 0.6743, "step": 10094 }, { "epoch": 0.5188097440641382, "grad_norm": 1.0437923669815063, "learning_rate": 4.938414756299041e-06, "loss": 0.7187, "step": 10095 }, { "epoch": 0.5188611368074828, "grad_norm": 1.1169308423995972, "learning_rate": 4.937582566221527e-06, "loss": 0.7063, "step": 10096 }, { "epoch": 0.5189125295508275, "grad_norm": 1.0844709873199463, "learning_rate": 4.9367503778733416e-06, "loss": 0.7864, "step": 10097 }, { "epoch": 0.5189639222941721, "grad_norm": 1.0871585607528687, "learning_rate": 4.935918191277543e-06, "loss": 0.747, "step": 10098 }, { "epoch": 0.5190153150375167, "grad_norm": 1.0182937383651733, "learning_rate": 4.935086006457186e-06, "loss": 0.6907, "step": 10099 }, { "epoch": 0.5190667077808614, "grad_norm": 1.0490105152130127, "learning_rate": 4.934253823435329e-06, "loss": 0.7234, "step": 10100 }, { "epoch": 0.5191181005242059, "grad_norm": 1.078484058380127, "learning_rate": 4.933421642235026e-06, "loss": 0.7675, "step": 10101 }, { "epoch": 0.5191694932675506, "grad_norm": 1.065463662147522, "learning_rate": 4.932589462879334e-06, "loss": 0.6461, "step": 10102 }, { "epoch": 0.5192208860108952, "grad_norm": 1.0733187198638916, "learning_rate": 4.931757285391312e-06, "loss": 0.7449, "step": 10103 }, { "epoch": 0.5192722787542399, "grad_norm": 1.0455801486968994, "learning_rate": 4.930925109794013e-06, "loss": 0.6703, "step": 10104 }, { "epoch": 0.5193236714975845, "grad_norm": 1.0816839933395386, "learning_rate": 4.930092936110493e-06, "loss": 0.7065, "step": 10105 }, { "epoch": 0.5193750642409292, "grad_norm": 1.0817430019378662, "learning_rate": 4.929260764363812e-06, "loss": 0.7376, "step": 10106 }, { "epoch": 0.5194264569842738, "grad_norm": 1.0709303617477417, "learning_rate": 4.92842859457702e-06, "loss": 0.7111, "step": 10107 }, { "epoch": 0.5194778497276185, "grad_norm": 1.1121569871902466, "learning_rate": 4.927596426773178e-06, "loss": 0.7007, "step": 10108 }, { "epoch": 0.5195292424709631, "grad_norm": 0.9839836955070496, "learning_rate": 4.926764260975339e-06, "loss": 0.6718, "step": 10109 }, { "epoch": 0.5195806352143078, "grad_norm": 1.1132720708847046, "learning_rate": 4.925932097206562e-06, "loss": 0.7776, "step": 10110 }, { "epoch": 0.5196320279576524, "grad_norm": 0.9853299260139465, "learning_rate": 4.925099935489899e-06, "loss": 0.6324, "step": 10111 }, { "epoch": 0.519683420700997, "grad_norm": 2.1348841190338135, "learning_rate": 4.924267775848409e-06, "loss": 0.7693, "step": 10112 }, { "epoch": 0.5197348134443417, "grad_norm": 1.0486620664596558, "learning_rate": 4.923435618305147e-06, "loss": 0.6807, "step": 10113 }, { "epoch": 0.5197862061876863, "grad_norm": 1.0264983177185059, "learning_rate": 4.922603462883167e-06, "loss": 0.7206, "step": 10114 }, { "epoch": 0.519837598931031, "grad_norm": 1.076831340789795, "learning_rate": 4.921771309605525e-06, "loss": 0.7243, "step": 10115 }, { "epoch": 0.5198889916743756, "grad_norm": 1.3218351602554321, "learning_rate": 4.92093915849528e-06, "loss": 0.7257, "step": 10116 }, { "epoch": 0.5199403844177202, "grad_norm": 1.0666029453277588, "learning_rate": 4.920107009575482e-06, "loss": 0.7127, "step": 10117 }, { "epoch": 0.5199917771610648, "grad_norm": 1.143746256828308, "learning_rate": 4.919274862869189e-06, "loss": 0.6984, "step": 10118 }, { "epoch": 0.5200431699044095, "grad_norm": 0.7287662029266357, "learning_rate": 4.918442718399458e-06, "loss": 0.7039, "step": 10119 }, { "epoch": 0.5200945626477541, "grad_norm": 1.0016412734985352, "learning_rate": 4.917610576189341e-06, "loss": 0.6697, "step": 10120 }, { "epoch": 0.5201459553910988, "grad_norm": 1.0646061897277832, "learning_rate": 4.916778436261895e-06, "loss": 0.7257, "step": 10121 }, { "epoch": 0.5201973481344434, "grad_norm": 1.1687465906143188, "learning_rate": 4.915946298640177e-06, "loss": 0.7019, "step": 10122 }, { "epoch": 0.520248740877788, "grad_norm": 1.0763282775878906, "learning_rate": 4.915114163347239e-06, "loss": 0.795, "step": 10123 }, { "epoch": 0.5203001336211327, "grad_norm": 1.0665556192398071, "learning_rate": 4.914282030406137e-06, "loss": 0.7865, "step": 10124 }, { "epoch": 0.5203515263644773, "grad_norm": 0.7754822969436646, "learning_rate": 4.913449899839926e-06, "loss": 0.6761, "step": 10125 }, { "epoch": 0.520402919107822, "grad_norm": 1.0359448194503784, "learning_rate": 4.912617771671663e-06, "loss": 0.7521, "step": 10126 }, { "epoch": 0.5204543118511666, "grad_norm": 1.0206087827682495, "learning_rate": 4.911785645924399e-06, "loss": 0.6975, "step": 10127 }, { "epoch": 0.5205057045945113, "grad_norm": 1.1768959760665894, "learning_rate": 4.910953522621191e-06, "loss": 0.6475, "step": 10128 }, { "epoch": 0.5205570973378559, "grad_norm": 1.079666256904602, "learning_rate": 4.910121401785094e-06, "loss": 0.6817, "step": 10129 }, { "epoch": 0.5206084900812006, "grad_norm": 0.7699293494224548, "learning_rate": 4.909289283439161e-06, "loss": 0.671, "step": 10130 }, { "epoch": 0.5206598828245452, "grad_norm": 1.0507467985153198, "learning_rate": 4.9084571676064475e-06, "loss": 0.7393, "step": 10131 }, { "epoch": 0.5207112755678898, "grad_norm": 1.1871628761291504, "learning_rate": 4.9076250543100105e-06, "loss": 0.7654, "step": 10132 }, { "epoch": 0.5207626683112344, "grad_norm": 0.8325191140174866, "learning_rate": 4.9067929435729e-06, "loss": 0.6644, "step": 10133 }, { "epoch": 0.5208140610545791, "grad_norm": 1.1358674764633179, "learning_rate": 4.905960835418173e-06, "loss": 0.7668, "step": 10134 }, { "epoch": 0.5208654537979237, "grad_norm": 1.1057240962982178, "learning_rate": 4.905128729868884e-06, "loss": 0.7026, "step": 10135 }, { "epoch": 0.5209168465412684, "grad_norm": 0.7924284338951111, "learning_rate": 4.904296626948085e-06, "loss": 0.6731, "step": 10136 }, { "epoch": 0.520968239284613, "grad_norm": 1.0748714208602905, "learning_rate": 4.903464526678831e-06, "loss": 0.681, "step": 10137 }, { "epoch": 0.5210196320279576, "grad_norm": 1.0847370624542236, "learning_rate": 4.902632429084177e-06, "loss": 0.7345, "step": 10138 }, { "epoch": 0.5210710247713023, "grad_norm": 1.0287234783172607, "learning_rate": 4.901800334187178e-06, "loss": 0.745, "step": 10139 }, { "epoch": 0.5211224175146469, "grad_norm": 1.1694872379302979, "learning_rate": 4.900968242010885e-06, "loss": 0.7201, "step": 10140 }, { "epoch": 0.5211738102579916, "grad_norm": 1.1210083961486816, "learning_rate": 4.900136152578354e-06, "loss": 0.7147, "step": 10141 }, { "epoch": 0.5212252030013362, "grad_norm": 1.1188421249389648, "learning_rate": 4.899304065912639e-06, "loss": 0.7554, "step": 10142 }, { "epoch": 0.5212765957446809, "grad_norm": 1.0639983415603638, "learning_rate": 4.898471982036792e-06, "loss": 0.7317, "step": 10143 }, { "epoch": 0.5213279884880255, "grad_norm": 0.739722490310669, "learning_rate": 4.897639900973866e-06, "loss": 0.6638, "step": 10144 }, { "epoch": 0.5213793812313702, "grad_norm": 1.1600335836410522, "learning_rate": 4.896807822746919e-06, "loss": 0.7262, "step": 10145 }, { "epoch": 0.5214307739747148, "grad_norm": 1.085508108139038, "learning_rate": 4.8959757473789986e-06, "loss": 0.7449, "step": 10146 }, { "epoch": 0.5214821667180594, "grad_norm": 1.0488207340240479, "learning_rate": 4.895143674893161e-06, "loss": 0.7278, "step": 10147 }, { "epoch": 0.521533559461404, "grad_norm": 1.114403486251831, "learning_rate": 4.8943116053124615e-06, "loss": 0.7415, "step": 10148 }, { "epoch": 0.5215849522047487, "grad_norm": 1.0503954887390137, "learning_rate": 4.89347953865995e-06, "loss": 0.7192, "step": 10149 }, { "epoch": 0.5216363449480933, "grad_norm": 1.0436384677886963, "learning_rate": 4.892647474958681e-06, "loss": 0.706, "step": 10150 }, { "epoch": 0.521687737691438, "grad_norm": 1.090611219406128, "learning_rate": 4.891815414231707e-06, "loss": 0.7629, "step": 10151 }, { "epoch": 0.5217391304347826, "grad_norm": 1.0910191535949707, "learning_rate": 4.890983356502082e-06, "loss": 0.687, "step": 10152 }, { "epoch": 0.5217905231781272, "grad_norm": 1.1816563606262207, "learning_rate": 4.890151301792857e-06, "loss": 0.6715, "step": 10153 }, { "epoch": 0.5218419159214719, "grad_norm": 0.9829698801040649, "learning_rate": 4.889319250127087e-06, "loss": 0.724, "step": 10154 }, { "epoch": 0.5218933086648165, "grad_norm": 1.0997509956359863, "learning_rate": 4.888487201527826e-06, "loss": 0.7435, "step": 10155 }, { "epoch": 0.5219447014081612, "grad_norm": 1.037282109260559, "learning_rate": 4.8876551560181225e-06, "loss": 0.6444, "step": 10156 }, { "epoch": 0.5219960941515058, "grad_norm": 0.6939500570297241, "learning_rate": 4.88682311362103e-06, "loss": 0.6768, "step": 10157 }, { "epoch": 0.5220474868948505, "grad_norm": 0.7951695322990417, "learning_rate": 4.885991074359605e-06, "loss": 0.6591, "step": 10158 }, { "epoch": 0.5220988796381951, "grad_norm": 1.0964356660842896, "learning_rate": 4.885159038256894e-06, "loss": 0.7285, "step": 10159 }, { "epoch": 0.5221502723815398, "grad_norm": 1.0419641733169556, "learning_rate": 4.884327005335952e-06, "loss": 0.7291, "step": 10160 }, { "epoch": 0.5222016651248844, "grad_norm": 1.0542296171188354, "learning_rate": 4.883494975619833e-06, "loss": 0.7241, "step": 10161 }, { "epoch": 0.522253057868229, "grad_norm": 1.0588669776916504, "learning_rate": 4.882662949131587e-06, "loss": 0.7139, "step": 10162 }, { "epoch": 0.5223044506115736, "grad_norm": 1.071363091468811, "learning_rate": 4.881830925894265e-06, "loss": 0.7674, "step": 10163 }, { "epoch": 0.5223558433549182, "grad_norm": 1.142183542251587, "learning_rate": 4.880998905930924e-06, "loss": 0.6654, "step": 10164 }, { "epoch": 0.5224072360982629, "grad_norm": 1.1020852327346802, "learning_rate": 4.88016688926461e-06, "loss": 0.7294, "step": 10165 }, { "epoch": 0.5224586288416075, "grad_norm": 1.1549524068832397, "learning_rate": 4.8793348759183756e-06, "loss": 0.7544, "step": 10166 }, { "epoch": 0.5225100215849522, "grad_norm": 1.0551776885986328, "learning_rate": 4.878502865915276e-06, "loss": 0.698, "step": 10167 }, { "epoch": 0.5225614143282968, "grad_norm": 0.7616724371910095, "learning_rate": 4.87767085927836e-06, "loss": 0.668, "step": 10168 }, { "epoch": 0.5226128070716415, "grad_norm": 1.1036018133163452, "learning_rate": 4.876838856030679e-06, "loss": 0.7435, "step": 10169 }, { "epoch": 0.5226641998149861, "grad_norm": 1.1078691482543945, "learning_rate": 4.876006856195284e-06, "loss": 0.7477, "step": 10170 }, { "epoch": 0.5227155925583308, "grad_norm": 1.1335638761520386, "learning_rate": 4.875174859795229e-06, "loss": 0.7281, "step": 10171 }, { "epoch": 0.5227669853016754, "grad_norm": 0.7079783082008362, "learning_rate": 4.874342866853563e-06, "loss": 0.6881, "step": 10172 }, { "epoch": 0.5228183780450201, "grad_norm": 1.086212396621704, "learning_rate": 4.873510877393337e-06, "loss": 0.7363, "step": 10173 }, { "epoch": 0.5228697707883647, "grad_norm": 1.057417392730713, "learning_rate": 4.872678891437606e-06, "loss": 0.7446, "step": 10174 }, { "epoch": 0.5229211635317094, "grad_norm": 1.0979691743850708, "learning_rate": 4.871846909009414e-06, "loss": 0.7654, "step": 10175 }, { "epoch": 0.522972556275054, "grad_norm": 0.6658157110214233, "learning_rate": 4.8710149301318155e-06, "loss": 0.6953, "step": 10176 }, { "epoch": 0.5230239490183985, "grad_norm": 0.931196391582489, "learning_rate": 4.870182954827863e-06, "loss": 0.6932, "step": 10177 }, { "epoch": 0.5230753417617432, "grad_norm": 0.7058324217796326, "learning_rate": 4.869350983120603e-06, "loss": 0.6797, "step": 10178 }, { "epoch": 0.5231267345050878, "grad_norm": 1.1077871322631836, "learning_rate": 4.868519015033089e-06, "loss": 0.7127, "step": 10179 }, { "epoch": 0.5231781272484325, "grad_norm": 1.0670933723449707, "learning_rate": 4.8676870505883705e-06, "loss": 0.7328, "step": 10180 }, { "epoch": 0.5232295199917771, "grad_norm": 1.0528850555419922, "learning_rate": 4.8668550898094975e-06, "loss": 0.7198, "step": 10181 }, { "epoch": 0.5232809127351218, "grad_norm": 0.7773083448410034, "learning_rate": 4.86602313271952e-06, "loss": 0.6487, "step": 10182 }, { "epoch": 0.5233323054784664, "grad_norm": 1.0999242067337036, "learning_rate": 4.8651911793414905e-06, "loss": 0.752, "step": 10183 }, { "epoch": 0.5233836982218111, "grad_norm": 1.0907025337219238, "learning_rate": 4.864359229698456e-06, "loss": 0.7515, "step": 10184 }, { "epoch": 0.5234350909651557, "grad_norm": 0.6740009784698486, "learning_rate": 4.863527283813467e-06, "loss": 0.6762, "step": 10185 }, { "epoch": 0.5234864837085004, "grad_norm": 1.061476707458496, "learning_rate": 4.862695341709574e-06, "loss": 0.7547, "step": 10186 }, { "epoch": 0.523537876451845, "grad_norm": 1.0332081317901611, "learning_rate": 4.861863403409828e-06, "loss": 0.736, "step": 10187 }, { "epoch": 0.5235892691951897, "grad_norm": 1.1380336284637451, "learning_rate": 4.861031468937275e-06, "loss": 0.7106, "step": 10188 }, { "epoch": 0.5236406619385343, "grad_norm": 1.0341191291809082, "learning_rate": 4.860199538314966e-06, "loss": 0.6609, "step": 10189 }, { "epoch": 0.523692054681879, "grad_norm": 0.7224943041801453, "learning_rate": 4.859367611565953e-06, "loss": 0.6675, "step": 10190 }, { "epoch": 0.5237434474252236, "grad_norm": 1.0201328992843628, "learning_rate": 4.858535688713281e-06, "loss": 0.6438, "step": 10191 }, { "epoch": 0.5237948401685681, "grad_norm": 1.0973433256149292, "learning_rate": 4.857703769780002e-06, "loss": 0.729, "step": 10192 }, { "epoch": 0.5238462329119128, "grad_norm": 1.0729687213897705, "learning_rate": 4.856871854789167e-06, "loss": 0.7575, "step": 10193 }, { "epoch": 0.5238976256552574, "grad_norm": 1.0226545333862305, "learning_rate": 4.856039943763819e-06, "loss": 0.7059, "step": 10194 }, { "epoch": 0.5239490183986021, "grad_norm": 1.0748804807662964, "learning_rate": 4.8552080367270105e-06, "loss": 0.7484, "step": 10195 }, { "epoch": 0.5240004111419467, "grad_norm": 1.5080205202102661, "learning_rate": 4.854376133701792e-06, "loss": 0.6819, "step": 10196 }, { "epoch": 0.5240518038852914, "grad_norm": 1.1196929216384888, "learning_rate": 4.853544234711207e-06, "loss": 0.7374, "step": 10197 }, { "epoch": 0.524103196628636, "grad_norm": 0.6745384335517883, "learning_rate": 4.852712339778308e-06, "loss": 0.6508, "step": 10198 }, { "epoch": 0.5241545893719807, "grad_norm": 1.0236445665359497, "learning_rate": 4.851880448926144e-06, "loss": 0.7673, "step": 10199 }, { "epoch": 0.5242059821153253, "grad_norm": 1.4175792932510376, "learning_rate": 4.85104856217776e-06, "loss": 0.7595, "step": 10200 }, { "epoch": 0.52425737485867, "grad_norm": 1.0875300168991089, "learning_rate": 4.850216679556205e-06, "loss": 0.6993, "step": 10201 }, { "epoch": 0.5243087676020146, "grad_norm": 1.0749180316925049, "learning_rate": 4.849384801084527e-06, "loss": 0.6939, "step": 10202 }, { "epoch": 0.5243601603453593, "grad_norm": 1.117702603340149, "learning_rate": 4.8485529267857765e-06, "loss": 0.6837, "step": 10203 }, { "epoch": 0.5244115530887039, "grad_norm": 0.8182233572006226, "learning_rate": 4.847721056682999e-06, "loss": 0.651, "step": 10204 }, { "epoch": 0.5244629458320486, "grad_norm": 0.6755691766738892, "learning_rate": 4.846889190799241e-06, "loss": 0.6355, "step": 10205 }, { "epoch": 0.5245143385753932, "grad_norm": 1.0055376291275024, "learning_rate": 4.846057329157555e-06, "loss": 0.7129, "step": 10206 }, { "epoch": 0.5245657313187378, "grad_norm": 1.1391932964324951, "learning_rate": 4.8452254717809826e-06, "loss": 0.7123, "step": 10207 }, { "epoch": 0.5246171240620824, "grad_norm": 1.0397183895111084, "learning_rate": 4.844393618692573e-06, "loss": 0.7576, "step": 10208 }, { "epoch": 0.524668516805427, "grad_norm": 1.0056294202804565, "learning_rate": 4.843561769915378e-06, "loss": 0.7177, "step": 10209 }, { "epoch": 0.5247199095487717, "grad_norm": 1.0432686805725098, "learning_rate": 4.842729925472437e-06, "loss": 0.6911, "step": 10210 }, { "epoch": 0.5247713022921163, "grad_norm": 1.0614737272262573, "learning_rate": 4.841898085386802e-06, "loss": 0.6934, "step": 10211 }, { "epoch": 0.524822695035461, "grad_norm": 1.086755394935608, "learning_rate": 4.8410662496815185e-06, "loss": 0.7108, "step": 10212 }, { "epoch": 0.5248740877788056, "grad_norm": 0.7585174441337585, "learning_rate": 4.8402344183796325e-06, "loss": 0.6749, "step": 10213 }, { "epoch": 0.5249254805221503, "grad_norm": 0.9998639225959778, "learning_rate": 4.839402591504192e-06, "loss": 0.7207, "step": 10214 }, { "epoch": 0.5249768732654949, "grad_norm": 1.1510059833526611, "learning_rate": 4.838570769078244e-06, "loss": 0.7575, "step": 10215 }, { "epoch": 0.5250282660088396, "grad_norm": 1.228126883506775, "learning_rate": 4.837738951124832e-06, "loss": 0.7772, "step": 10216 }, { "epoch": 0.5250796587521842, "grad_norm": 1.0061830282211304, "learning_rate": 4.836907137667005e-06, "loss": 0.7468, "step": 10217 }, { "epoch": 0.5251310514955289, "grad_norm": 0.7103826999664307, "learning_rate": 4.836075328727808e-06, "loss": 0.675, "step": 10218 }, { "epoch": 0.5251824442388735, "grad_norm": 1.0398668050765991, "learning_rate": 4.8352435243302884e-06, "loss": 0.7278, "step": 10219 }, { "epoch": 0.5252338369822181, "grad_norm": 1.0127153396606445, "learning_rate": 4.834411724497489e-06, "loss": 0.6853, "step": 10220 }, { "epoch": 0.5252852297255628, "grad_norm": 1.0530105829238892, "learning_rate": 4.833579929252458e-06, "loss": 0.7219, "step": 10221 }, { "epoch": 0.5253366224689074, "grad_norm": 0.8183920979499817, "learning_rate": 4.832748138618241e-06, "loss": 0.6258, "step": 10222 }, { "epoch": 0.525388015212252, "grad_norm": 1.0908894538879395, "learning_rate": 4.831916352617882e-06, "loss": 0.7374, "step": 10223 }, { "epoch": 0.5254394079555966, "grad_norm": 1.1123764514923096, "learning_rate": 4.831084571274427e-06, "loss": 0.6751, "step": 10224 }, { "epoch": 0.5254908006989413, "grad_norm": 1.2426478862762451, "learning_rate": 4.830252794610923e-06, "loss": 0.7505, "step": 10225 }, { "epoch": 0.5255421934422859, "grad_norm": 1.0600411891937256, "learning_rate": 4.829421022650413e-06, "loss": 0.7078, "step": 10226 }, { "epoch": 0.5255935861856306, "grad_norm": 1.0181952714920044, "learning_rate": 4.828589255415942e-06, "loss": 0.7063, "step": 10227 }, { "epoch": 0.5256449789289752, "grad_norm": 1.1194193363189697, "learning_rate": 4.8277574929305565e-06, "loss": 0.7232, "step": 10228 }, { "epoch": 0.5256963716723199, "grad_norm": 1.054977297782898, "learning_rate": 4.826925735217299e-06, "loss": 0.7088, "step": 10229 }, { "epoch": 0.5257477644156645, "grad_norm": 0.798984944820404, "learning_rate": 4.826093982299214e-06, "loss": 0.6856, "step": 10230 }, { "epoch": 0.5257991571590092, "grad_norm": 1.0804462432861328, "learning_rate": 4.825262234199349e-06, "loss": 0.7682, "step": 10231 }, { "epoch": 0.5258505499023538, "grad_norm": 1.0957400798797607, "learning_rate": 4.8244304909407455e-06, "loss": 0.7417, "step": 10232 }, { "epoch": 0.5259019426456985, "grad_norm": 1.0321849584579468, "learning_rate": 4.823598752546448e-06, "loss": 0.7316, "step": 10233 }, { "epoch": 0.5259533353890431, "grad_norm": 1.106554388999939, "learning_rate": 4.822767019039502e-06, "loss": 0.7384, "step": 10234 }, { "epoch": 0.5260047281323877, "grad_norm": 1.0579158067703247, "learning_rate": 4.821935290442951e-06, "loss": 0.7738, "step": 10235 }, { "epoch": 0.5260561208757324, "grad_norm": 1.1413239240646362, "learning_rate": 4.821103566779837e-06, "loss": 0.6916, "step": 10236 }, { "epoch": 0.526107513619077, "grad_norm": 1.0151246786117554, "learning_rate": 4.8202718480732054e-06, "loss": 0.7383, "step": 10237 }, { "epoch": 0.5261589063624216, "grad_norm": 1.1452213525772095, "learning_rate": 4.819440134346101e-06, "loss": 0.7968, "step": 10238 }, { "epoch": 0.5262102991057662, "grad_norm": 1.0373313426971436, "learning_rate": 4.818608425621563e-06, "loss": 0.7407, "step": 10239 }, { "epoch": 0.5262616918491109, "grad_norm": 1.0357334613800049, "learning_rate": 4.817776721922637e-06, "loss": 0.7358, "step": 10240 }, { "epoch": 0.5263130845924555, "grad_norm": 1.0234582424163818, "learning_rate": 4.816945023272368e-06, "loss": 0.7112, "step": 10241 }, { "epoch": 0.5263644773358002, "grad_norm": 1.0189048051834106, "learning_rate": 4.816113329693794e-06, "loss": 0.7221, "step": 10242 }, { "epoch": 0.5264158700791448, "grad_norm": 0.9779747128486633, "learning_rate": 4.815281641209963e-06, "loss": 0.6886, "step": 10243 }, { "epoch": 0.5264672628224895, "grad_norm": 1.1011875867843628, "learning_rate": 4.814449957843916e-06, "loss": 0.7168, "step": 10244 }, { "epoch": 0.5265186555658341, "grad_norm": 1.0463697910308838, "learning_rate": 4.813618279618693e-06, "loss": 0.6907, "step": 10245 }, { "epoch": 0.5265700483091788, "grad_norm": 1.132140040397644, "learning_rate": 4.812786606557339e-06, "loss": 0.7163, "step": 10246 }, { "epoch": 0.5266214410525234, "grad_norm": 1.0805333852767944, "learning_rate": 4.811954938682897e-06, "loss": 0.7183, "step": 10247 }, { "epoch": 0.526672833795868, "grad_norm": 1.066996693611145, "learning_rate": 4.811123276018407e-06, "loss": 0.7228, "step": 10248 }, { "epoch": 0.5267242265392127, "grad_norm": 1.0911773443222046, "learning_rate": 4.810291618586909e-06, "loss": 0.697, "step": 10249 }, { "epoch": 0.5267756192825573, "grad_norm": 1.148009181022644, "learning_rate": 4.80945996641145e-06, "loss": 0.7217, "step": 10250 }, { "epoch": 0.526827012025902, "grad_norm": 1.0908042192459106, "learning_rate": 4.808628319515068e-06, "loss": 0.7276, "step": 10251 }, { "epoch": 0.5268784047692466, "grad_norm": 1.0533244609832764, "learning_rate": 4.807796677920807e-06, "loss": 0.6403, "step": 10252 }, { "epoch": 0.5269297975125912, "grad_norm": 0.8397180438041687, "learning_rate": 4.806965041651704e-06, "loss": 0.7166, "step": 10253 }, { "epoch": 0.5269811902559358, "grad_norm": 1.136452555656433, "learning_rate": 4.806133410730806e-06, "loss": 0.7344, "step": 10254 }, { "epoch": 0.5270325829992805, "grad_norm": 0.7566211819648743, "learning_rate": 4.805301785181149e-06, "loss": 0.6179, "step": 10255 }, { "epoch": 0.5270839757426251, "grad_norm": 1.1047295331954956, "learning_rate": 4.8044701650257765e-06, "loss": 0.7254, "step": 10256 }, { "epoch": 0.5271353684859698, "grad_norm": 0.7362626194953918, "learning_rate": 4.8036385502877315e-06, "loss": 0.6763, "step": 10257 }, { "epoch": 0.5271867612293144, "grad_norm": 1.00727117061615, "learning_rate": 4.802806940990049e-06, "loss": 0.6939, "step": 10258 }, { "epoch": 0.527238153972659, "grad_norm": 1.0371261835098267, "learning_rate": 4.8019753371557725e-06, "loss": 0.7149, "step": 10259 }, { "epoch": 0.5272895467160037, "grad_norm": 0.7272729277610779, "learning_rate": 4.801143738807945e-06, "loss": 0.6567, "step": 10260 }, { "epoch": 0.5273409394593483, "grad_norm": 1.1050797700881958, "learning_rate": 4.8003121459696e-06, "loss": 0.7665, "step": 10261 }, { "epoch": 0.527392332202693, "grad_norm": 1.0831407308578491, "learning_rate": 4.799480558663784e-06, "loss": 0.6998, "step": 10262 }, { "epoch": 0.5274437249460376, "grad_norm": 1.0474841594696045, "learning_rate": 4.798648976913532e-06, "loss": 0.6856, "step": 10263 }, { "epoch": 0.5274951176893823, "grad_norm": 1.081330418586731, "learning_rate": 4.797817400741888e-06, "loss": 0.7484, "step": 10264 }, { "epoch": 0.5275465104327269, "grad_norm": 1.0493674278259277, "learning_rate": 4.796985830171888e-06, "loss": 0.7066, "step": 10265 }, { "epoch": 0.5275979031760716, "grad_norm": 1.0850765705108643, "learning_rate": 4.796154265226573e-06, "loss": 0.6975, "step": 10266 }, { "epoch": 0.5276492959194162, "grad_norm": 1.1307713985443115, "learning_rate": 4.795322705928984e-06, "loss": 0.7218, "step": 10267 }, { "epoch": 0.5277006886627608, "grad_norm": 1.0447183847427368, "learning_rate": 4.794491152302157e-06, "loss": 0.682, "step": 10268 }, { "epoch": 0.5277520814061054, "grad_norm": 0.9921669960021973, "learning_rate": 4.793659604369131e-06, "loss": 0.7082, "step": 10269 }, { "epoch": 0.5278034741494501, "grad_norm": 1.1212100982666016, "learning_rate": 4.792828062152948e-06, "loss": 0.7227, "step": 10270 }, { "epoch": 0.5278548668927947, "grad_norm": 0.8607807755470276, "learning_rate": 4.791996525676643e-06, "loss": 0.6612, "step": 10271 }, { "epoch": 0.5279062596361394, "grad_norm": 1.0803894996643066, "learning_rate": 4.791164994963256e-06, "loss": 0.7714, "step": 10272 }, { "epoch": 0.527957652379484, "grad_norm": 0.9982879161834717, "learning_rate": 4.7903334700358254e-06, "loss": 0.7502, "step": 10273 }, { "epoch": 0.5280090451228286, "grad_norm": 1.014747977256775, "learning_rate": 4.7895019509173885e-06, "loss": 0.708, "step": 10274 }, { "epoch": 0.5280604378661733, "grad_norm": 1.0326505899429321, "learning_rate": 4.788670437630984e-06, "loss": 0.6256, "step": 10275 }, { "epoch": 0.5281118306095179, "grad_norm": 1.1371957063674927, "learning_rate": 4.787838930199651e-06, "loss": 0.7921, "step": 10276 }, { "epoch": 0.5281632233528626, "grad_norm": 1.0599125623703003, "learning_rate": 4.7870074286464245e-06, "loss": 0.7302, "step": 10277 }, { "epoch": 0.5282146160962072, "grad_norm": 1.029662013053894, "learning_rate": 4.7861759329943425e-06, "loss": 0.7183, "step": 10278 }, { "epoch": 0.5282660088395519, "grad_norm": 0.8277777433395386, "learning_rate": 4.785344443266444e-06, "loss": 0.7097, "step": 10279 }, { "epoch": 0.5283174015828965, "grad_norm": 1.1486098766326904, "learning_rate": 4.7845129594857656e-06, "loss": 0.7438, "step": 10280 }, { "epoch": 0.5283687943262412, "grad_norm": 1.0795249938964844, "learning_rate": 4.783681481675343e-06, "loss": 0.7004, "step": 10281 }, { "epoch": 0.5284201870695858, "grad_norm": 1.1701425313949585, "learning_rate": 4.782850009858214e-06, "loss": 0.7015, "step": 10282 }, { "epoch": 0.5284715798129304, "grad_norm": 1.0508663654327393, "learning_rate": 4.782018544057415e-06, "loss": 0.7707, "step": 10283 }, { "epoch": 0.528522972556275, "grad_norm": 1.2026253938674927, "learning_rate": 4.781187084295982e-06, "loss": 0.7821, "step": 10284 }, { "epoch": 0.5285743652996197, "grad_norm": 1.0074617862701416, "learning_rate": 4.780355630596953e-06, "loss": 0.7428, "step": 10285 }, { "epoch": 0.5286257580429643, "grad_norm": 0.7750651836395264, "learning_rate": 4.779524182983364e-06, "loss": 0.7325, "step": 10286 }, { "epoch": 0.528677150786309, "grad_norm": 0.9617918133735657, "learning_rate": 4.778692741478248e-06, "loss": 0.7238, "step": 10287 }, { "epoch": 0.5287285435296536, "grad_norm": 0.992879331111908, "learning_rate": 4.777861306104643e-06, "loss": 0.7373, "step": 10288 }, { "epoch": 0.5287799362729982, "grad_norm": 1.0186505317687988, "learning_rate": 4.777029876885586e-06, "loss": 0.7686, "step": 10289 }, { "epoch": 0.5288313290163429, "grad_norm": 1.0073426961898804, "learning_rate": 4.77619845384411e-06, "loss": 0.7346, "step": 10290 }, { "epoch": 0.5288827217596875, "grad_norm": 1.0588667392730713, "learning_rate": 4.7753670370032516e-06, "loss": 0.7166, "step": 10291 }, { "epoch": 0.5289341145030322, "grad_norm": 1.1066269874572754, "learning_rate": 4.7745356263860455e-06, "loss": 0.7017, "step": 10292 }, { "epoch": 0.5289855072463768, "grad_norm": 0.7340662479400635, "learning_rate": 4.773704222015525e-06, "loss": 0.6767, "step": 10293 }, { "epoch": 0.5290368999897215, "grad_norm": 1.051391363143921, "learning_rate": 4.7728728239147285e-06, "loss": 0.7189, "step": 10294 }, { "epoch": 0.5290882927330661, "grad_norm": 1.0894246101379395, "learning_rate": 4.772041432106689e-06, "loss": 0.7085, "step": 10295 }, { "epoch": 0.5291396854764108, "grad_norm": 0.9623305201530457, "learning_rate": 4.771210046614441e-06, "loss": 0.6336, "step": 10296 }, { "epoch": 0.5291910782197554, "grad_norm": 1.1843966245651245, "learning_rate": 4.770378667461016e-06, "loss": 0.7706, "step": 10297 }, { "epoch": 0.5292424709631001, "grad_norm": 1.008323311805725, "learning_rate": 4.769547294669452e-06, "loss": 0.7694, "step": 10298 }, { "epoch": 0.5292938637064446, "grad_norm": 1.0887399911880493, "learning_rate": 4.7687159282627834e-06, "loss": 0.7312, "step": 10299 }, { "epoch": 0.5293452564497892, "grad_norm": 0.8242732882499695, "learning_rate": 4.767884568264039e-06, "loss": 0.681, "step": 10300 }, { "epoch": 0.5293966491931339, "grad_norm": 1.1898654699325562, "learning_rate": 4.767053214696256e-06, "loss": 0.7314, "step": 10301 }, { "epoch": 0.5294480419364785, "grad_norm": 0.675067663192749, "learning_rate": 4.766221867582469e-06, "loss": 0.6621, "step": 10302 }, { "epoch": 0.5294994346798232, "grad_norm": 1.0693490505218506, "learning_rate": 4.765390526945708e-06, "loss": 0.7473, "step": 10303 }, { "epoch": 0.5295508274231678, "grad_norm": 0.7189530730247498, "learning_rate": 4.7645591928090065e-06, "loss": 0.653, "step": 10304 }, { "epoch": 0.5296022201665125, "grad_norm": 0.7237748503684998, "learning_rate": 4.7637278651954e-06, "loss": 0.6675, "step": 10305 }, { "epoch": 0.5296536129098571, "grad_norm": 0.7539510726928711, "learning_rate": 4.762896544127917e-06, "loss": 0.6449, "step": 10306 }, { "epoch": 0.5297050056532018, "grad_norm": 1.0273839235305786, "learning_rate": 4.762065229629593e-06, "loss": 0.7508, "step": 10307 }, { "epoch": 0.5297563983965464, "grad_norm": 1.063821792602539, "learning_rate": 4.761233921723462e-06, "loss": 0.7068, "step": 10308 }, { "epoch": 0.5298077911398911, "grad_norm": 8.256381034851074, "learning_rate": 4.760402620432551e-06, "loss": 0.7892, "step": 10309 }, { "epoch": 0.5298591838832357, "grad_norm": 1.0470741987228394, "learning_rate": 4.759571325779895e-06, "loss": 0.7787, "step": 10310 }, { "epoch": 0.5299105766265804, "grad_norm": 1.084010124206543, "learning_rate": 4.758740037788525e-06, "loss": 0.6789, "step": 10311 }, { "epoch": 0.529961969369925, "grad_norm": 1.11545729637146, "learning_rate": 4.757908756481475e-06, "loss": 0.7282, "step": 10312 }, { "epoch": 0.5300133621132697, "grad_norm": 1.0468107461929321, "learning_rate": 4.757077481881774e-06, "loss": 0.7078, "step": 10313 }, { "epoch": 0.5300647548566142, "grad_norm": 1.0252079963684082, "learning_rate": 4.756246214012451e-06, "loss": 0.7009, "step": 10314 }, { "epoch": 0.5301161475999588, "grad_norm": 1.0048270225524902, "learning_rate": 4.7554149528965414e-06, "loss": 0.717, "step": 10315 }, { "epoch": 0.5301675403433035, "grad_norm": 1.0224692821502686, "learning_rate": 4.754583698557073e-06, "loss": 0.6806, "step": 10316 }, { "epoch": 0.5302189330866481, "grad_norm": 1.0386464595794678, "learning_rate": 4.7537524510170776e-06, "loss": 0.7489, "step": 10317 }, { "epoch": 0.5302703258299928, "grad_norm": 0.9968034029006958, "learning_rate": 4.752921210299588e-06, "loss": 0.6956, "step": 10318 }, { "epoch": 0.5303217185733374, "grad_norm": 1.0880216360092163, "learning_rate": 4.75208997642763e-06, "loss": 0.7823, "step": 10319 }, { "epoch": 0.5303731113166821, "grad_norm": 1.0688482522964478, "learning_rate": 4.7512587494242355e-06, "loss": 0.711, "step": 10320 }, { "epoch": 0.5304245040600267, "grad_norm": 1.050085186958313, "learning_rate": 4.750427529312436e-06, "loss": 0.7676, "step": 10321 }, { "epoch": 0.5304758968033714, "grad_norm": 1.0743380784988403, "learning_rate": 4.749596316115258e-06, "loss": 0.7693, "step": 10322 }, { "epoch": 0.530527289546716, "grad_norm": 1.1947100162506104, "learning_rate": 4.748765109855733e-06, "loss": 0.7482, "step": 10323 }, { "epoch": 0.5305786822900607, "grad_norm": 1.0616408586502075, "learning_rate": 4.747933910556891e-06, "loss": 0.6889, "step": 10324 }, { "epoch": 0.5306300750334053, "grad_norm": 1.140880823135376, "learning_rate": 4.74710271824176e-06, "loss": 0.7335, "step": 10325 }, { "epoch": 0.53068146777675, "grad_norm": 1.1313060522079468, "learning_rate": 4.746271532933368e-06, "loss": 0.7111, "step": 10326 }, { "epoch": 0.5307328605200946, "grad_norm": 0.9172868728637695, "learning_rate": 4.745440354654745e-06, "loss": 0.704, "step": 10327 }, { "epoch": 0.5307842532634393, "grad_norm": 1.1060254573822021, "learning_rate": 4.744609183428921e-06, "loss": 0.702, "step": 10328 }, { "epoch": 0.5308356460067838, "grad_norm": 1.069342017173767, "learning_rate": 4.7437780192789205e-06, "loss": 0.7726, "step": 10329 }, { "epoch": 0.5308870387501284, "grad_norm": 0.7403047680854797, "learning_rate": 4.742946862227774e-06, "loss": 0.6554, "step": 10330 }, { "epoch": 0.5309384314934731, "grad_norm": 1.0570991039276123, "learning_rate": 4.7421157122985115e-06, "loss": 0.7455, "step": 10331 }, { "epoch": 0.5309898242368177, "grad_norm": 1.0414022207260132, "learning_rate": 4.741284569514156e-06, "loss": 0.7043, "step": 10332 }, { "epoch": 0.5310412169801624, "grad_norm": 1.1439743041992188, "learning_rate": 4.740453433897737e-06, "loss": 0.7093, "step": 10333 }, { "epoch": 0.531092609723507, "grad_norm": 1.0015448331832886, "learning_rate": 4.739622305472284e-06, "loss": 0.7086, "step": 10334 }, { "epoch": 0.5311440024668517, "grad_norm": 1.0276317596435547, "learning_rate": 4.7387911842608206e-06, "loss": 0.7034, "step": 10335 }, { "epoch": 0.5311953952101963, "grad_norm": 1.0847787857055664, "learning_rate": 4.737960070286376e-06, "loss": 0.741, "step": 10336 }, { "epoch": 0.531246787953541, "grad_norm": 0.9731278419494629, "learning_rate": 4.737128963571979e-06, "loss": 0.6941, "step": 10337 }, { "epoch": 0.5312981806968856, "grad_norm": 1.1218215227127075, "learning_rate": 4.736297864140651e-06, "loss": 0.7018, "step": 10338 }, { "epoch": 0.5313495734402303, "grad_norm": 1.1538740396499634, "learning_rate": 4.735466772015421e-06, "loss": 0.7594, "step": 10339 }, { "epoch": 0.5314009661835749, "grad_norm": 1.0631097555160522, "learning_rate": 4.734635687219317e-06, "loss": 0.7043, "step": 10340 }, { "epoch": 0.5314523589269196, "grad_norm": 1.0648765563964844, "learning_rate": 4.733804609775362e-06, "loss": 0.7742, "step": 10341 }, { "epoch": 0.5315037516702642, "grad_norm": 1.1639678478240967, "learning_rate": 4.732973539706581e-06, "loss": 0.7954, "step": 10342 }, { "epoch": 0.5315551444136088, "grad_norm": 1.1686532497406006, "learning_rate": 4.732142477036002e-06, "loss": 0.7192, "step": 10343 }, { "epoch": 0.5316065371569534, "grad_norm": 1.0241726636886597, "learning_rate": 4.731311421786651e-06, "loss": 0.702, "step": 10344 }, { "epoch": 0.531657929900298, "grad_norm": 0.9771272540092468, "learning_rate": 4.730480373981551e-06, "loss": 0.6897, "step": 10345 }, { "epoch": 0.5317093226436427, "grad_norm": 0.9966562390327454, "learning_rate": 4.729649333643728e-06, "loss": 0.742, "step": 10346 }, { "epoch": 0.5317607153869873, "grad_norm": 1.0112990140914917, "learning_rate": 4.728818300796206e-06, "loss": 0.6957, "step": 10347 }, { "epoch": 0.531812108130332, "grad_norm": 1.060575246810913, "learning_rate": 4.727987275462009e-06, "loss": 0.7295, "step": 10348 }, { "epoch": 0.5318635008736766, "grad_norm": 1.0529887676239014, "learning_rate": 4.727156257664162e-06, "loss": 0.7058, "step": 10349 }, { "epoch": 0.5319148936170213, "grad_norm": 1.0608493089675903, "learning_rate": 4.726325247425691e-06, "loss": 0.747, "step": 10350 }, { "epoch": 0.5319662863603659, "grad_norm": 1.175390601158142, "learning_rate": 4.725494244769617e-06, "loss": 0.6832, "step": 10351 }, { "epoch": 0.5320176791037106, "grad_norm": 1.0679497718811035, "learning_rate": 4.724663249718964e-06, "loss": 0.7381, "step": 10352 }, { "epoch": 0.5320690718470552, "grad_norm": 1.0728440284729004, "learning_rate": 4.723832262296757e-06, "loss": 0.7428, "step": 10353 }, { "epoch": 0.5321204645903999, "grad_norm": 1.0997991561889648, "learning_rate": 4.723001282526018e-06, "loss": 0.7459, "step": 10354 }, { "epoch": 0.5321718573337445, "grad_norm": 1.067649483680725, "learning_rate": 4.72217031042977e-06, "loss": 0.6444, "step": 10355 }, { "epoch": 0.5322232500770891, "grad_norm": 1.0473803281784058, "learning_rate": 4.721339346031037e-06, "loss": 0.7296, "step": 10356 }, { "epoch": 0.5322746428204338, "grad_norm": 1.011902928352356, "learning_rate": 4.72050838935284e-06, "loss": 0.7365, "step": 10357 }, { "epoch": 0.5323260355637784, "grad_norm": 1.0421733856201172, "learning_rate": 4.7196774404182016e-06, "loss": 0.7038, "step": 10358 }, { "epoch": 0.532377428307123, "grad_norm": 1.0585135221481323, "learning_rate": 4.718846499250145e-06, "loss": 0.7368, "step": 10359 }, { "epoch": 0.5324288210504676, "grad_norm": 1.1533455848693848, "learning_rate": 4.7180155658716935e-06, "loss": 0.7233, "step": 10360 }, { "epoch": 0.5324802137938123, "grad_norm": 1.0762910842895508, "learning_rate": 4.717184640305865e-06, "loss": 0.7217, "step": 10361 }, { "epoch": 0.5325316065371569, "grad_norm": 1.035648226737976, "learning_rate": 4.7163537225756825e-06, "loss": 0.6751, "step": 10362 }, { "epoch": 0.5325829992805016, "grad_norm": 1.0751457214355469, "learning_rate": 4.71552281270417e-06, "loss": 0.7823, "step": 10363 }, { "epoch": 0.5326343920238462, "grad_norm": 1.0853872299194336, "learning_rate": 4.714691910714345e-06, "loss": 0.7104, "step": 10364 }, { "epoch": 0.5326857847671909, "grad_norm": 1.078939437866211, "learning_rate": 4.713861016629229e-06, "loss": 0.7263, "step": 10365 }, { "epoch": 0.5327371775105355, "grad_norm": 1.0856150388717651, "learning_rate": 4.713030130471845e-06, "loss": 0.7005, "step": 10366 }, { "epoch": 0.5327885702538802, "grad_norm": 0.7965691089630127, "learning_rate": 4.712199252265211e-06, "loss": 0.6635, "step": 10367 }, { "epoch": 0.5328399629972248, "grad_norm": 1.077125072479248, "learning_rate": 4.711368382032347e-06, "loss": 0.7211, "step": 10368 }, { "epoch": 0.5328913557405695, "grad_norm": 1.1458086967468262, "learning_rate": 4.7105375197962765e-06, "loss": 0.7097, "step": 10369 }, { "epoch": 0.5329427484839141, "grad_norm": 1.0119352340698242, "learning_rate": 4.709706665580015e-06, "loss": 0.7196, "step": 10370 }, { "epoch": 0.5329941412272587, "grad_norm": 1.0383985042572021, "learning_rate": 4.708875819406584e-06, "loss": 0.6638, "step": 10371 }, { "epoch": 0.5330455339706034, "grad_norm": 1.0867875814437866, "learning_rate": 4.708044981299002e-06, "loss": 0.7011, "step": 10372 }, { "epoch": 0.533096926713948, "grad_norm": 1.0672067403793335, "learning_rate": 4.7072141512802915e-06, "loss": 0.7662, "step": 10373 }, { "epoch": 0.5331483194572927, "grad_norm": 1.0415112972259521, "learning_rate": 4.7063833293734665e-06, "loss": 0.7055, "step": 10374 }, { "epoch": 0.5331997122006372, "grad_norm": 1.07720947265625, "learning_rate": 4.705552515601547e-06, "loss": 0.7397, "step": 10375 }, { "epoch": 0.5332511049439819, "grad_norm": 0.7575289011001587, "learning_rate": 4.704721709987554e-06, "loss": 0.6784, "step": 10376 }, { "epoch": 0.5333024976873265, "grad_norm": 1.1129956245422363, "learning_rate": 4.7038909125545025e-06, "loss": 0.7165, "step": 10377 }, { "epoch": 0.5333538904306712, "grad_norm": 1.0422471761703491, "learning_rate": 4.703060123325412e-06, "loss": 0.7206, "step": 10378 }, { "epoch": 0.5334052831740158, "grad_norm": 1.0564301013946533, "learning_rate": 4.702229342323302e-06, "loss": 0.7652, "step": 10379 }, { "epoch": 0.5334566759173605, "grad_norm": 0.7374210357666016, "learning_rate": 4.7013985695711854e-06, "loss": 0.6618, "step": 10380 }, { "epoch": 0.5335080686607051, "grad_norm": 1.0290828943252563, "learning_rate": 4.700567805092083e-06, "loss": 0.705, "step": 10381 }, { "epoch": 0.5335594614040498, "grad_norm": 1.0550882816314697, "learning_rate": 4.6997370489090135e-06, "loss": 0.7131, "step": 10382 }, { "epoch": 0.5336108541473944, "grad_norm": 0.6664023399353027, "learning_rate": 4.698906301044988e-06, "loss": 0.6571, "step": 10383 }, { "epoch": 0.533662246890739, "grad_norm": 1.0975451469421387, "learning_rate": 4.698075561523027e-06, "loss": 0.8014, "step": 10384 }, { "epoch": 0.5337136396340837, "grad_norm": 1.128504991531372, "learning_rate": 4.697244830366148e-06, "loss": 0.7923, "step": 10385 }, { "epoch": 0.5337650323774283, "grad_norm": 1.0647557973861694, "learning_rate": 4.696414107597363e-06, "loss": 0.7446, "step": 10386 }, { "epoch": 0.533816425120773, "grad_norm": 1.050640344619751, "learning_rate": 4.695583393239691e-06, "loss": 0.6876, "step": 10387 }, { "epoch": 0.5338678178641176, "grad_norm": 0.8202245831489563, "learning_rate": 4.694752687316146e-06, "loss": 0.6634, "step": 10388 }, { "epoch": 0.5339192106074623, "grad_norm": 1.0535153150558472, "learning_rate": 4.693921989849748e-06, "loss": 0.7107, "step": 10389 }, { "epoch": 0.5339706033508068, "grad_norm": 0.7564883232116699, "learning_rate": 4.693091300863505e-06, "loss": 0.6732, "step": 10390 }, { "epoch": 0.5340219960941515, "grad_norm": 1.0116223096847534, "learning_rate": 4.692260620380436e-06, "loss": 0.6948, "step": 10391 }, { "epoch": 0.5340733888374961, "grad_norm": 0.7743096947669983, "learning_rate": 4.691429948423558e-06, "loss": 0.669, "step": 10392 }, { "epoch": 0.5341247815808408, "grad_norm": 1.0886726379394531, "learning_rate": 4.6905992850158805e-06, "loss": 0.7098, "step": 10393 }, { "epoch": 0.5341761743241854, "grad_norm": 1.0373095273971558, "learning_rate": 4.68976863018042e-06, "loss": 0.7369, "step": 10394 }, { "epoch": 0.53422756706753, "grad_norm": 0.8782917261123657, "learning_rate": 4.688937983940191e-06, "loss": 0.6345, "step": 10395 }, { "epoch": 0.5342789598108747, "grad_norm": 1.0406688451766968, "learning_rate": 4.688107346318206e-06, "loss": 0.675, "step": 10396 }, { "epoch": 0.5343303525542193, "grad_norm": 1.1739256381988525, "learning_rate": 4.68727671733748e-06, "loss": 0.7606, "step": 10397 }, { "epoch": 0.534381745297564, "grad_norm": 0.7114465236663818, "learning_rate": 4.6864460970210254e-06, "loss": 0.6392, "step": 10398 }, { "epoch": 0.5344331380409086, "grad_norm": 1.0907105207443237, "learning_rate": 4.685615485391856e-06, "loss": 0.7979, "step": 10399 }, { "epoch": 0.5344845307842533, "grad_norm": 0.8148313760757446, "learning_rate": 4.684784882472983e-06, "loss": 0.6983, "step": 10400 }, { "epoch": 0.5345359235275979, "grad_norm": 1.0856566429138184, "learning_rate": 4.683954288287422e-06, "loss": 0.7031, "step": 10401 }, { "epoch": 0.5345873162709426, "grad_norm": 1.0352681875228882, "learning_rate": 4.6831237028581815e-06, "loss": 0.6817, "step": 10402 }, { "epoch": 0.5346387090142872, "grad_norm": 1.0405305624008179, "learning_rate": 4.682293126208275e-06, "loss": 0.77, "step": 10403 }, { "epoch": 0.5346901017576319, "grad_norm": 1.0445367097854614, "learning_rate": 4.681462558360716e-06, "loss": 0.7129, "step": 10404 }, { "epoch": 0.5347414945009764, "grad_norm": 1.0563595294952393, "learning_rate": 4.680631999338517e-06, "loss": 0.7885, "step": 10405 }, { "epoch": 0.5347928872443211, "grad_norm": 1.0365999937057495, "learning_rate": 4.6798014491646845e-06, "loss": 0.7276, "step": 10406 }, { "epoch": 0.5348442799876657, "grad_norm": 1.0962785482406616, "learning_rate": 4.678970907862232e-06, "loss": 0.766, "step": 10407 }, { "epoch": 0.5348956727310104, "grad_norm": 1.048727035522461, "learning_rate": 4.678140375454173e-06, "loss": 0.7257, "step": 10408 }, { "epoch": 0.534947065474355, "grad_norm": 1.0664485692977905, "learning_rate": 4.677309851963516e-06, "loss": 0.7797, "step": 10409 }, { "epoch": 0.5349984582176996, "grad_norm": 0.8399204015731812, "learning_rate": 4.676479337413269e-06, "loss": 0.6421, "step": 10410 }, { "epoch": 0.5350498509610443, "grad_norm": 1.209705114364624, "learning_rate": 4.675648831826448e-06, "loss": 0.8031, "step": 10411 }, { "epoch": 0.5351012437043889, "grad_norm": 1.0256298780441284, "learning_rate": 4.674818335226056e-06, "loss": 0.6778, "step": 10412 }, { "epoch": 0.5351526364477336, "grad_norm": 1.0213309526443481, "learning_rate": 4.673987847635107e-06, "loss": 0.7648, "step": 10413 }, { "epoch": 0.5352040291910782, "grad_norm": 0.896597146987915, "learning_rate": 4.673157369076611e-06, "loss": 0.643, "step": 10414 }, { "epoch": 0.5352554219344229, "grad_norm": 1.0874860286712646, "learning_rate": 4.672326899573574e-06, "loss": 0.7262, "step": 10415 }, { "epoch": 0.5353068146777675, "grad_norm": 1.0786807537078857, "learning_rate": 4.671496439149006e-06, "loss": 0.741, "step": 10416 }, { "epoch": 0.5353582074211122, "grad_norm": 1.1190440654754639, "learning_rate": 4.670665987825917e-06, "loss": 0.7801, "step": 10417 }, { "epoch": 0.5354096001644568, "grad_norm": 1.0380901098251343, "learning_rate": 4.669835545627313e-06, "loss": 0.7344, "step": 10418 }, { "epoch": 0.5354609929078015, "grad_norm": 1.1197763681411743, "learning_rate": 4.669005112576204e-06, "loss": 0.6671, "step": 10419 }, { "epoch": 0.535512385651146, "grad_norm": 1.0650744438171387, "learning_rate": 4.668174688695597e-06, "loss": 0.7408, "step": 10420 }, { "epoch": 0.5355637783944907, "grad_norm": 1.0149824619293213, "learning_rate": 4.667344274008503e-06, "loss": 0.7432, "step": 10421 }, { "epoch": 0.5356151711378353, "grad_norm": 1.055267572402954, "learning_rate": 4.6665138685379236e-06, "loss": 0.7176, "step": 10422 }, { "epoch": 0.53566656388118, "grad_norm": 1.0338681936264038, "learning_rate": 4.665683472306869e-06, "loss": 0.8122, "step": 10423 }, { "epoch": 0.5357179566245246, "grad_norm": 1.0707435607910156, "learning_rate": 4.664853085338348e-06, "loss": 0.7333, "step": 10424 }, { "epoch": 0.5357693493678692, "grad_norm": 1.0820441246032715, "learning_rate": 4.664022707655362e-06, "loss": 0.7082, "step": 10425 }, { "epoch": 0.5358207421112139, "grad_norm": 1.1046950817108154, "learning_rate": 4.663192339280921e-06, "loss": 0.7334, "step": 10426 }, { "epoch": 0.5358721348545585, "grad_norm": 0.6828492879867554, "learning_rate": 4.662361980238031e-06, "loss": 0.6424, "step": 10427 }, { "epoch": 0.5359235275979032, "grad_norm": 1.030519723892212, "learning_rate": 4.661531630549696e-06, "loss": 0.6895, "step": 10428 }, { "epoch": 0.5359749203412478, "grad_norm": 1.0477770566940308, "learning_rate": 4.660701290238923e-06, "loss": 0.6857, "step": 10429 }, { "epoch": 0.5360263130845925, "grad_norm": 1.0766668319702148, "learning_rate": 4.659870959328719e-06, "loss": 0.7591, "step": 10430 }, { "epoch": 0.5360777058279371, "grad_norm": 1.157882809638977, "learning_rate": 4.659040637842085e-06, "loss": 0.7332, "step": 10431 }, { "epoch": 0.5361290985712818, "grad_norm": 1.0176080465316772, "learning_rate": 4.658210325802028e-06, "loss": 0.7363, "step": 10432 }, { "epoch": 0.5361804913146264, "grad_norm": 1.0790412425994873, "learning_rate": 4.657380023231556e-06, "loss": 0.7267, "step": 10433 }, { "epoch": 0.5362318840579711, "grad_norm": 1.1094882488250732, "learning_rate": 4.656549730153665e-06, "loss": 0.7133, "step": 10434 }, { "epoch": 0.5362832768013156, "grad_norm": 1.136902928352356, "learning_rate": 4.655719446591364e-06, "loss": 0.7407, "step": 10435 }, { "epoch": 0.5363346695446602, "grad_norm": 1.0712867975234985, "learning_rate": 4.654889172567658e-06, "loss": 0.7605, "step": 10436 }, { "epoch": 0.5363860622880049, "grad_norm": 1.0519243478775024, "learning_rate": 4.654058908105548e-06, "loss": 0.6973, "step": 10437 }, { "epoch": 0.5364374550313495, "grad_norm": 1.1641972064971924, "learning_rate": 4.6532286532280385e-06, "loss": 0.7291, "step": 10438 }, { "epoch": 0.5364888477746942, "grad_norm": 1.0016021728515625, "learning_rate": 4.652398407958132e-06, "loss": 0.6809, "step": 10439 }, { "epoch": 0.5365402405180388, "grad_norm": 1.022668480873108, "learning_rate": 4.651568172318832e-06, "loss": 0.7341, "step": 10440 }, { "epoch": 0.5365916332613835, "grad_norm": 1.185032606124878, "learning_rate": 4.650737946333139e-06, "loss": 0.7567, "step": 10441 }, { "epoch": 0.5366430260047281, "grad_norm": 1.1219017505645752, "learning_rate": 4.649907730024056e-06, "loss": 0.7512, "step": 10442 }, { "epoch": 0.5366944187480728, "grad_norm": 1.0617390871047974, "learning_rate": 4.649077523414587e-06, "loss": 0.7386, "step": 10443 }, { "epoch": 0.5367458114914174, "grad_norm": 1.080819010734558, "learning_rate": 4.648247326527729e-06, "loss": 0.7609, "step": 10444 }, { "epoch": 0.5367972042347621, "grad_norm": 1.0205076932907104, "learning_rate": 4.647417139386487e-06, "loss": 0.7139, "step": 10445 }, { "epoch": 0.5368485969781067, "grad_norm": 1.0862131118774414, "learning_rate": 4.646586962013861e-06, "loss": 0.7192, "step": 10446 }, { "epoch": 0.5368999897214514, "grad_norm": 1.1342219114303589, "learning_rate": 4.645756794432852e-06, "loss": 0.772, "step": 10447 }, { "epoch": 0.536951382464796, "grad_norm": 1.0725730657577515, "learning_rate": 4.644926636666461e-06, "loss": 0.6864, "step": 10448 }, { "epoch": 0.5370027752081407, "grad_norm": 1.062612771987915, "learning_rate": 4.644096488737687e-06, "loss": 0.7076, "step": 10449 }, { "epoch": 0.5370541679514852, "grad_norm": 1.066922903060913, "learning_rate": 4.64326635066953e-06, "loss": 0.7139, "step": 10450 }, { "epoch": 0.5371055606948298, "grad_norm": 1.0566930770874023, "learning_rate": 4.64243622248499e-06, "loss": 0.765, "step": 10451 }, { "epoch": 0.5371569534381745, "grad_norm": 1.1445295810699463, "learning_rate": 4.641606104207067e-06, "loss": 0.7379, "step": 10452 }, { "epoch": 0.5372083461815191, "grad_norm": 1.1217474937438965, "learning_rate": 4.640775995858762e-06, "loss": 0.6979, "step": 10453 }, { "epoch": 0.5372597389248638, "grad_norm": 1.1136583089828491, "learning_rate": 4.63994589746307e-06, "loss": 0.7596, "step": 10454 }, { "epoch": 0.5373111316682084, "grad_norm": 1.1378822326660156, "learning_rate": 4.63911580904299e-06, "loss": 0.7225, "step": 10455 }, { "epoch": 0.5373625244115531, "grad_norm": 1.1026440858840942, "learning_rate": 4.638285730621525e-06, "loss": 0.6889, "step": 10456 }, { "epoch": 0.5374139171548977, "grad_norm": 1.0863673686981201, "learning_rate": 4.637455662221666e-06, "loss": 0.7586, "step": 10457 }, { "epoch": 0.5374653098982424, "grad_norm": 1.096804141998291, "learning_rate": 4.636625603866417e-06, "loss": 0.7488, "step": 10458 }, { "epoch": 0.537516702641587, "grad_norm": 1.1065821647644043, "learning_rate": 4.635795555578772e-06, "loss": 0.701, "step": 10459 }, { "epoch": 0.5375680953849317, "grad_norm": 1.1217025518417358, "learning_rate": 4.634965517381729e-06, "loss": 0.7053, "step": 10460 }, { "epoch": 0.5376194881282763, "grad_norm": 1.0425738096237183, "learning_rate": 4.634135489298284e-06, "loss": 0.699, "step": 10461 }, { "epoch": 0.537670880871621, "grad_norm": 1.0263397693634033, "learning_rate": 4.6333054713514384e-06, "loss": 0.7232, "step": 10462 }, { "epoch": 0.5377222736149656, "grad_norm": 1.1022967100143433, "learning_rate": 4.632475463564182e-06, "loss": 0.698, "step": 10463 }, { "epoch": 0.5377736663583103, "grad_norm": 0.9844361543655396, "learning_rate": 4.631645465959514e-06, "loss": 0.6991, "step": 10464 }, { "epoch": 0.5378250591016549, "grad_norm": 1.078611135482788, "learning_rate": 4.630815478560432e-06, "loss": 0.7363, "step": 10465 }, { "epoch": 0.5378764518449994, "grad_norm": 1.1333802938461304, "learning_rate": 4.629985501389928e-06, "loss": 0.7787, "step": 10466 }, { "epoch": 0.5379278445883441, "grad_norm": 1.0465352535247803, "learning_rate": 4.629155534470997e-06, "loss": 0.6898, "step": 10467 }, { "epoch": 0.5379792373316887, "grad_norm": 1.1531829833984375, "learning_rate": 4.628325577826638e-06, "loss": 0.7499, "step": 10468 }, { "epoch": 0.5380306300750334, "grad_norm": 1.084073543548584, "learning_rate": 4.627495631479843e-06, "loss": 0.7204, "step": 10469 }, { "epoch": 0.538082022818378, "grad_norm": 1.0935046672821045, "learning_rate": 4.626665695453606e-06, "loss": 0.6936, "step": 10470 }, { "epoch": 0.5381334155617227, "grad_norm": 1.030538558959961, "learning_rate": 4.625835769770923e-06, "loss": 0.7003, "step": 10471 }, { "epoch": 0.5381848083050673, "grad_norm": 1.0489671230316162, "learning_rate": 4.625005854454788e-06, "loss": 0.7521, "step": 10472 }, { "epoch": 0.538236201048412, "grad_norm": 1.0836923122406006, "learning_rate": 4.6241759495281916e-06, "loss": 0.7137, "step": 10473 }, { "epoch": 0.5382875937917566, "grad_norm": 1.1485892534255981, "learning_rate": 4.623346055014128e-06, "loss": 0.7482, "step": 10474 }, { "epoch": 0.5383389865351013, "grad_norm": 1.1004501581192017, "learning_rate": 4.622516170935593e-06, "loss": 0.721, "step": 10475 }, { "epoch": 0.5383903792784459, "grad_norm": 1.1818166971206665, "learning_rate": 4.6216862973155756e-06, "loss": 0.708, "step": 10476 }, { "epoch": 0.5384417720217906, "grad_norm": 0.7811095714569092, "learning_rate": 4.62085643417707e-06, "loss": 0.6724, "step": 10477 }, { "epoch": 0.5384931647651352, "grad_norm": 1.1231791973114014, "learning_rate": 4.620026581543068e-06, "loss": 0.75, "step": 10478 }, { "epoch": 0.5385445575084798, "grad_norm": 1.0823745727539062, "learning_rate": 4.619196739436561e-06, "loss": 0.7248, "step": 10479 }, { "epoch": 0.5385959502518245, "grad_norm": 1.0404369831085205, "learning_rate": 4.618366907880541e-06, "loss": 0.7332, "step": 10480 }, { "epoch": 0.538647342995169, "grad_norm": 1.0908387899398804, "learning_rate": 4.617537086898e-06, "loss": 0.6897, "step": 10481 }, { "epoch": 0.5386987357385137, "grad_norm": 1.050247073173523, "learning_rate": 4.616707276511927e-06, "loss": 0.6876, "step": 10482 }, { "epoch": 0.5387501284818583, "grad_norm": 1.2456105947494507, "learning_rate": 4.615877476745313e-06, "loss": 0.7396, "step": 10483 }, { "epoch": 0.538801521225203, "grad_norm": 1.017628788948059, "learning_rate": 4.615047687621149e-06, "loss": 0.7094, "step": 10484 }, { "epoch": 0.5388529139685476, "grad_norm": 1.0478549003601074, "learning_rate": 4.614217909162428e-06, "loss": 0.7151, "step": 10485 }, { "epoch": 0.5389043067118923, "grad_norm": 1.0526535511016846, "learning_rate": 4.613388141392134e-06, "loss": 0.7315, "step": 10486 }, { "epoch": 0.5389556994552369, "grad_norm": 1.0699610710144043, "learning_rate": 4.612558384333259e-06, "loss": 0.6779, "step": 10487 }, { "epoch": 0.5390070921985816, "grad_norm": 0.713754415512085, "learning_rate": 4.611728638008792e-06, "loss": 0.6433, "step": 10488 }, { "epoch": 0.5390584849419262, "grad_norm": 0.6681198477745056, "learning_rate": 4.610898902441723e-06, "loss": 0.6715, "step": 10489 }, { "epoch": 0.5391098776852709, "grad_norm": 1.075799822807312, "learning_rate": 4.61006917765504e-06, "loss": 0.699, "step": 10490 }, { "epoch": 0.5391612704286155, "grad_norm": 1.0532586574554443, "learning_rate": 4.609239463671732e-06, "loss": 0.7361, "step": 10491 }, { "epoch": 0.5392126631719601, "grad_norm": 1.116230845451355, "learning_rate": 4.608409760514784e-06, "loss": 0.6918, "step": 10492 }, { "epoch": 0.5392640559153048, "grad_norm": 1.04694402217865, "learning_rate": 4.607580068207185e-06, "loss": 0.7287, "step": 10493 }, { "epoch": 0.5393154486586494, "grad_norm": 1.0567365884780884, "learning_rate": 4.606750386771926e-06, "loss": 0.7328, "step": 10494 }, { "epoch": 0.5393668414019941, "grad_norm": 1.0793025493621826, "learning_rate": 4.605920716231988e-06, "loss": 0.6959, "step": 10495 }, { "epoch": 0.5394182341453386, "grad_norm": 1.0591254234313965, "learning_rate": 4.605091056610361e-06, "loss": 0.7167, "step": 10496 }, { "epoch": 0.5394696268886833, "grad_norm": 1.094467282295227, "learning_rate": 4.6042614079300316e-06, "loss": 0.7391, "step": 10497 }, { "epoch": 0.5395210196320279, "grad_norm": 1.1201578378677368, "learning_rate": 4.6034317702139865e-06, "loss": 0.7154, "step": 10498 }, { "epoch": 0.5395724123753726, "grad_norm": 1.0707334280014038, "learning_rate": 4.60260214348521e-06, "loss": 0.7424, "step": 10499 }, { "epoch": 0.5396238051187172, "grad_norm": 1.2802926301956177, "learning_rate": 4.601772527766687e-06, "loss": 0.7252, "step": 10500 }, { "epoch": 0.5396751978620619, "grad_norm": 0.8095126748085022, "learning_rate": 4.600942923081405e-06, "loss": 0.6501, "step": 10501 }, { "epoch": 0.5397265906054065, "grad_norm": 1.0807127952575684, "learning_rate": 4.600113329452347e-06, "loss": 0.7407, "step": 10502 }, { "epoch": 0.5397779833487512, "grad_norm": 1.0562797784805298, "learning_rate": 4.599283746902498e-06, "loss": 0.7015, "step": 10503 }, { "epoch": 0.5398293760920958, "grad_norm": 1.066268801689148, "learning_rate": 4.598454175454845e-06, "loss": 0.7697, "step": 10504 }, { "epoch": 0.5398807688354404, "grad_norm": 1.0533407926559448, "learning_rate": 4.597624615132367e-06, "loss": 0.7127, "step": 10505 }, { "epoch": 0.5399321615787851, "grad_norm": 0.6977935433387756, "learning_rate": 4.596795065958053e-06, "loss": 0.6423, "step": 10506 }, { "epoch": 0.5399835543221297, "grad_norm": 0.7883232831954956, "learning_rate": 4.595965527954883e-06, "loss": 0.6595, "step": 10507 }, { "epoch": 0.5400349470654744, "grad_norm": 1.1504698991775513, "learning_rate": 4.59513600114584e-06, "loss": 0.6964, "step": 10508 }, { "epoch": 0.540086339808819, "grad_norm": 1.0998119115829468, "learning_rate": 4.594306485553907e-06, "loss": 0.7463, "step": 10509 }, { "epoch": 0.5401377325521637, "grad_norm": 1.0223561525344849, "learning_rate": 4.5934769812020694e-06, "loss": 0.694, "step": 10510 }, { "epoch": 0.5401891252955082, "grad_norm": 1.0451717376708984, "learning_rate": 4.592647488113304e-06, "loss": 0.7294, "step": 10511 }, { "epoch": 0.5402405180388529, "grad_norm": 1.0131254196166992, "learning_rate": 4.591818006310597e-06, "loss": 0.7333, "step": 10512 }, { "epoch": 0.5402919107821975, "grad_norm": 1.0558116436004639, "learning_rate": 4.590988535816929e-06, "loss": 0.7775, "step": 10513 }, { "epoch": 0.5403433035255422, "grad_norm": 1.0845632553100586, "learning_rate": 4.5901590766552815e-06, "loss": 0.7219, "step": 10514 }, { "epoch": 0.5403946962688868, "grad_norm": 1.0390470027923584, "learning_rate": 4.589329628848634e-06, "loss": 0.6995, "step": 10515 }, { "epoch": 0.5404460890122315, "grad_norm": 1.0534014701843262, "learning_rate": 4.588500192419967e-06, "loss": 0.6922, "step": 10516 }, { "epoch": 0.5404974817555761, "grad_norm": 0.8329530954360962, "learning_rate": 4.587670767392264e-06, "loss": 0.7116, "step": 10517 }, { "epoch": 0.5405488744989208, "grad_norm": 1.092004656791687, "learning_rate": 4.5868413537885e-06, "loss": 0.7492, "step": 10518 }, { "epoch": 0.5406002672422654, "grad_norm": 0.7006005048751831, "learning_rate": 4.586011951631657e-06, "loss": 0.6398, "step": 10519 }, { "epoch": 0.54065165998561, "grad_norm": 1.5161834955215454, "learning_rate": 4.585182560944715e-06, "loss": 0.6713, "step": 10520 }, { "epoch": 0.5407030527289547, "grad_norm": 0.6968628168106079, "learning_rate": 4.5843531817506524e-06, "loss": 0.6589, "step": 10521 }, { "epoch": 0.5407544454722993, "grad_norm": 1.0662380456924438, "learning_rate": 4.583523814072447e-06, "loss": 0.773, "step": 10522 }, { "epoch": 0.540805838215644, "grad_norm": 0.7012926340103149, "learning_rate": 4.58269445793308e-06, "loss": 0.6212, "step": 10523 }, { "epoch": 0.5408572309589886, "grad_norm": 1.1276826858520508, "learning_rate": 4.581865113355526e-06, "loss": 0.6581, "step": 10524 }, { "epoch": 0.5409086237023333, "grad_norm": 1.0877556800842285, "learning_rate": 4.581035780362764e-06, "loss": 0.6767, "step": 10525 }, { "epoch": 0.5409600164456778, "grad_norm": 1.0615692138671875, "learning_rate": 4.580206458977774e-06, "loss": 0.7316, "step": 10526 }, { "epoch": 0.5410114091890225, "grad_norm": 1.1296268701553345, "learning_rate": 4.579377149223528e-06, "loss": 0.7644, "step": 10527 }, { "epoch": 0.5410628019323671, "grad_norm": 1.3686052560806274, "learning_rate": 4.578547851123006e-06, "loss": 0.7025, "step": 10528 }, { "epoch": 0.5411141946757118, "grad_norm": 1.0313549041748047, "learning_rate": 4.577718564699183e-06, "loss": 0.6916, "step": 10529 }, { "epoch": 0.5411655874190564, "grad_norm": 12.718899726867676, "learning_rate": 4.576889289975037e-06, "loss": 0.7649, "step": 10530 }, { "epoch": 0.541216980162401, "grad_norm": 0.8783236742019653, "learning_rate": 4.576060026973542e-06, "loss": 0.6953, "step": 10531 }, { "epoch": 0.5412683729057457, "grad_norm": 0.7239128351211548, "learning_rate": 4.575230775717674e-06, "loss": 0.6934, "step": 10532 }, { "epoch": 0.5413197656490903, "grad_norm": 0.9787454009056091, "learning_rate": 4.574401536230411e-06, "loss": 0.705, "step": 10533 }, { "epoch": 0.541371158392435, "grad_norm": 0.8964075446128845, "learning_rate": 4.573572308534723e-06, "loss": 0.6879, "step": 10534 }, { "epoch": 0.5414225511357796, "grad_norm": 1.1308505535125732, "learning_rate": 4.572743092653586e-06, "loss": 0.7013, "step": 10535 }, { "epoch": 0.5414739438791243, "grad_norm": 0.7774497270584106, "learning_rate": 4.571913888609977e-06, "loss": 0.6941, "step": 10536 }, { "epoch": 0.5415253366224689, "grad_norm": 1.0057257413864136, "learning_rate": 4.571084696426865e-06, "loss": 0.6928, "step": 10537 }, { "epoch": 0.5415767293658136, "grad_norm": 0.7181929349899292, "learning_rate": 4.570255516127227e-06, "loss": 0.6553, "step": 10538 }, { "epoch": 0.5416281221091582, "grad_norm": 1.1059304475784302, "learning_rate": 4.569426347734035e-06, "loss": 0.739, "step": 10539 }, { "epoch": 0.5416795148525029, "grad_norm": 1.07304048538208, "learning_rate": 4.568597191270261e-06, "loss": 0.6747, "step": 10540 }, { "epoch": 0.5417309075958475, "grad_norm": 1.0642088651657104, "learning_rate": 4.56776804675888e-06, "loss": 0.7592, "step": 10541 }, { "epoch": 0.5417823003391921, "grad_norm": 1.059482455253601, "learning_rate": 4.5669389142228634e-06, "loss": 0.7299, "step": 10542 }, { "epoch": 0.5418336930825367, "grad_norm": 1.064316987991333, "learning_rate": 4.5661097936851815e-06, "loss": 0.7224, "step": 10543 }, { "epoch": 0.5418850858258814, "grad_norm": 1.140000581741333, "learning_rate": 4.5652806851688055e-06, "loss": 0.7243, "step": 10544 }, { "epoch": 0.541936478569226, "grad_norm": 1.0654624700546265, "learning_rate": 4.5644515886967086e-06, "loss": 0.7317, "step": 10545 }, { "epoch": 0.5419878713125706, "grad_norm": 1.0676462650299072, "learning_rate": 4.563622504291862e-06, "loss": 0.7393, "step": 10546 }, { "epoch": 0.5420392640559153, "grad_norm": 1.0956944227218628, "learning_rate": 4.562793431977234e-06, "loss": 0.7294, "step": 10547 }, { "epoch": 0.5420906567992599, "grad_norm": 1.0937620401382446, "learning_rate": 4.561964371775795e-06, "loss": 0.7673, "step": 10548 }, { "epoch": 0.5421420495426046, "grad_norm": 1.1047075986862183, "learning_rate": 4.561135323710516e-06, "loss": 0.7172, "step": 10549 }, { "epoch": 0.5421934422859492, "grad_norm": 1.049577236175537, "learning_rate": 4.560306287804366e-06, "loss": 0.7404, "step": 10550 }, { "epoch": 0.5422448350292939, "grad_norm": 0.7473107576370239, "learning_rate": 4.5594772640803155e-06, "loss": 0.6402, "step": 10551 }, { "epoch": 0.5422962277726385, "grad_norm": 1.1504651308059692, "learning_rate": 4.558648252561332e-06, "loss": 0.7579, "step": 10552 }, { "epoch": 0.5423476205159832, "grad_norm": 1.0655155181884766, "learning_rate": 4.557819253270382e-06, "loss": 0.7109, "step": 10553 }, { "epoch": 0.5423990132593278, "grad_norm": 0.7419631481170654, "learning_rate": 4.556990266230437e-06, "loss": 0.6603, "step": 10554 }, { "epoch": 0.5424504060026725, "grad_norm": 1.0555150508880615, "learning_rate": 4.5561612914644646e-06, "loss": 0.7067, "step": 10555 }, { "epoch": 0.5425017987460171, "grad_norm": 0.7106221914291382, "learning_rate": 4.5553323289954295e-06, "loss": 0.6676, "step": 10556 }, { "epoch": 0.5425531914893617, "grad_norm": 1.0670651197433472, "learning_rate": 4.5545033788463015e-06, "loss": 0.7154, "step": 10557 }, { "epoch": 0.5426045842327063, "grad_norm": 1.2171638011932373, "learning_rate": 4.553674441040047e-06, "loss": 0.7477, "step": 10558 }, { "epoch": 0.542655976976051, "grad_norm": 1.1148573160171509, "learning_rate": 4.552845515599631e-06, "loss": 0.7902, "step": 10559 }, { "epoch": 0.5427073697193956, "grad_norm": 1.125661849975586, "learning_rate": 4.55201660254802e-06, "loss": 0.6832, "step": 10560 }, { "epoch": 0.5427587624627402, "grad_norm": 0.7490405440330505, "learning_rate": 4.551187701908179e-06, "loss": 0.6739, "step": 10561 }, { "epoch": 0.5428101552060849, "grad_norm": 1.113082766532898, "learning_rate": 4.550358813703077e-06, "loss": 0.761, "step": 10562 }, { "epoch": 0.5428615479494295, "grad_norm": 1.028760313987732, "learning_rate": 4.549529937955676e-06, "loss": 0.707, "step": 10563 }, { "epoch": 0.5429129406927742, "grad_norm": 1.1576337814331055, "learning_rate": 4.548701074688941e-06, "loss": 0.7206, "step": 10564 }, { "epoch": 0.5429643334361188, "grad_norm": 1.0066628456115723, "learning_rate": 4.547872223925838e-06, "loss": 0.6641, "step": 10565 }, { "epoch": 0.5430157261794635, "grad_norm": 1.1145844459533691, "learning_rate": 4.547043385689329e-06, "loss": 0.7278, "step": 10566 }, { "epoch": 0.5430671189228081, "grad_norm": 0.7140308022499084, "learning_rate": 4.546214560002378e-06, "loss": 0.6166, "step": 10567 }, { "epoch": 0.5431185116661528, "grad_norm": 1.1552926301956177, "learning_rate": 4.545385746887951e-06, "loss": 0.7096, "step": 10568 }, { "epoch": 0.5431699044094974, "grad_norm": 1.1254518032073975, "learning_rate": 4.5445569463690065e-06, "loss": 0.7284, "step": 10569 }, { "epoch": 0.5432212971528421, "grad_norm": 1.1393241882324219, "learning_rate": 4.54372815846851e-06, "loss": 0.7422, "step": 10570 }, { "epoch": 0.5432726898961867, "grad_norm": 0.8248307108879089, "learning_rate": 4.542899383209424e-06, "loss": 0.6839, "step": 10571 }, { "epoch": 0.5433240826395312, "grad_norm": 1.0755550861358643, "learning_rate": 4.5420706206147094e-06, "loss": 0.7685, "step": 10572 }, { "epoch": 0.5433754753828759, "grad_norm": 1.0673508644104004, "learning_rate": 4.541241870707327e-06, "loss": 0.7436, "step": 10573 }, { "epoch": 0.5434268681262205, "grad_norm": 1.147019863128662, "learning_rate": 4.540413133510242e-06, "loss": 0.7231, "step": 10574 }, { "epoch": 0.5434782608695652, "grad_norm": 1.0845848321914673, "learning_rate": 4.53958440904641e-06, "loss": 0.7445, "step": 10575 }, { "epoch": 0.5435296536129098, "grad_norm": 1.2527992725372314, "learning_rate": 4.538755697338795e-06, "loss": 0.8395, "step": 10576 }, { "epoch": 0.5435810463562545, "grad_norm": 1.0688257217407227, "learning_rate": 4.5379269984103545e-06, "loss": 0.7293, "step": 10577 }, { "epoch": 0.5436324390995991, "grad_norm": 1.0712201595306396, "learning_rate": 4.537098312284053e-06, "loss": 0.727, "step": 10578 }, { "epoch": 0.5436838318429438, "grad_norm": 1.0583878755569458, "learning_rate": 4.536269638982846e-06, "loss": 0.6695, "step": 10579 }, { "epoch": 0.5437352245862884, "grad_norm": 1.0115925073623657, "learning_rate": 4.535440978529692e-06, "loss": 0.7264, "step": 10580 }, { "epoch": 0.5437866173296331, "grad_norm": 1.0664162635803223, "learning_rate": 4.534612330947553e-06, "loss": 0.7213, "step": 10581 }, { "epoch": 0.5438380100729777, "grad_norm": 1.0657728910446167, "learning_rate": 4.533783696259384e-06, "loss": 0.7413, "step": 10582 }, { "epoch": 0.5438894028163224, "grad_norm": 1.066246747970581, "learning_rate": 4.532955074488146e-06, "loss": 0.7057, "step": 10583 }, { "epoch": 0.543940795559667, "grad_norm": 1.015142798423767, "learning_rate": 4.532126465656798e-06, "loss": 0.71, "step": 10584 }, { "epoch": 0.5439921883030117, "grad_norm": 1.0686124563217163, "learning_rate": 4.531297869788291e-06, "loss": 0.7174, "step": 10585 }, { "epoch": 0.5440435810463563, "grad_norm": 1.0495305061340332, "learning_rate": 4.5304692869055865e-06, "loss": 0.7284, "step": 10586 }, { "epoch": 0.5440949737897008, "grad_norm": 1.096439242362976, "learning_rate": 4.5296407170316435e-06, "loss": 0.6757, "step": 10587 }, { "epoch": 0.5441463665330455, "grad_norm": 1.0084543228149414, "learning_rate": 4.528812160189412e-06, "loss": 0.6978, "step": 10588 }, { "epoch": 0.5441977592763901, "grad_norm": 0.786526083946228, "learning_rate": 4.527983616401851e-06, "loss": 0.6527, "step": 10589 }, { "epoch": 0.5442491520197348, "grad_norm": 0.7137128114700317, "learning_rate": 4.5271550856919176e-06, "loss": 0.6503, "step": 10590 }, { "epoch": 0.5443005447630794, "grad_norm": 1.0851043462753296, "learning_rate": 4.526326568082564e-06, "loss": 0.7444, "step": 10591 }, { "epoch": 0.5443519375064241, "grad_norm": 1.0560194253921509, "learning_rate": 4.525498063596747e-06, "loss": 0.7349, "step": 10592 }, { "epoch": 0.5444033302497687, "grad_norm": 1.1076501607894897, "learning_rate": 4.52466957225742e-06, "loss": 0.7046, "step": 10593 }, { "epoch": 0.5444547229931134, "grad_norm": 1.0844630002975464, "learning_rate": 4.52384109408754e-06, "loss": 0.7474, "step": 10594 }, { "epoch": 0.544506115736458, "grad_norm": 1.1662654876708984, "learning_rate": 4.523012629110056e-06, "loss": 0.6892, "step": 10595 }, { "epoch": 0.5445575084798027, "grad_norm": 1.0642030239105225, "learning_rate": 4.522184177347923e-06, "loss": 0.7001, "step": 10596 }, { "epoch": 0.5446089012231473, "grad_norm": 1.1036603450775146, "learning_rate": 4.5213557388240985e-06, "loss": 0.7524, "step": 10597 }, { "epoch": 0.544660293966492, "grad_norm": 1.138271689414978, "learning_rate": 4.520527313561528e-06, "loss": 0.7396, "step": 10598 }, { "epoch": 0.5447116867098366, "grad_norm": 1.0434203147888184, "learning_rate": 4.519698901583167e-06, "loss": 0.7112, "step": 10599 }, { "epoch": 0.5447630794531813, "grad_norm": 1.0473898649215698, "learning_rate": 4.518870502911968e-06, "loss": 0.7371, "step": 10600 }, { "epoch": 0.5448144721965259, "grad_norm": 1.0573638677597046, "learning_rate": 4.5180421175708816e-06, "loss": 0.6618, "step": 10601 }, { "epoch": 0.5448658649398704, "grad_norm": 1.0986590385437012, "learning_rate": 4.51721374558286e-06, "loss": 0.7602, "step": 10602 }, { "epoch": 0.5449172576832151, "grad_norm": 0.8032775521278381, "learning_rate": 4.516385386970853e-06, "loss": 0.6567, "step": 10603 }, { "epoch": 0.5449686504265597, "grad_norm": 1.1532690525054932, "learning_rate": 4.515557041757811e-06, "loss": 0.6528, "step": 10604 }, { "epoch": 0.5450200431699044, "grad_norm": 1.1178356409072876, "learning_rate": 4.514728709966682e-06, "loss": 0.7511, "step": 10605 }, { "epoch": 0.545071435913249, "grad_norm": 1.1708389520645142, "learning_rate": 4.51390039162042e-06, "loss": 0.7331, "step": 10606 }, { "epoch": 0.5451228286565937, "grad_norm": 1.0684096813201904, "learning_rate": 4.5130720867419735e-06, "loss": 0.7115, "step": 10607 }, { "epoch": 0.5451742213999383, "grad_norm": 0.6783982515335083, "learning_rate": 4.512243795354288e-06, "loss": 0.6582, "step": 10608 }, { "epoch": 0.545225614143283, "grad_norm": 1.0814152956008911, "learning_rate": 4.511415517480313e-06, "loss": 0.7748, "step": 10609 }, { "epoch": 0.5452770068866276, "grad_norm": 1.036346673965454, "learning_rate": 4.510587253143002e-06, "loss": 0.7264, "step": 10610 }, { "epoch": 0.5453283996299723, "grad_norm": 1.080217957496643, "learning_rate": 4.5097590023652945e-06, "loss": 0.7752, "step": 10611 }, { "epoch": 0.5453797923733169, "grad_norm": 1.0431418418884277, "learning_rate": 4.508930765170143e-06, "loss": 0.6888, "step": 10612 }, { "epoch": 0.5454311851166616, "grad_norm": 0.7632734775543213, "learning_rate": 4.5081025415804944e-06, "loss": 0.6571, "step": 10613 }, { "epoch": 0.5454825778600062, "grad_norm": 1.1495115756988525, "learning_rate": 4.507274331619293e-06, "loss": 0.7203, "step": 10614 }, { "epoch": 0.5455339706033508, "grad_norm": 1.0857124328613281, "learning_rate": 4.506446135309487e-06, "loss": 0.7413, "step": 10615 }, { "epoch": 0.5455853633466955, "grad_norm": 1.0376615524291992, "learning_rate": 4.5056179526740235e-06, "loss": 0.69, "step": 10616 }, { "epoch": 0.54563675609004, "grad_norm": 1.0994131565093994, "learning_rate": 4.504789783735844e-06, "loss": 0.7257, "step": 10617 }, { "epoch": 0.5456881488333847, "grad_norm": 0.972282350063324, "learning_rate": 4.503961628517897e-06, "loss": 0.7818, "step": 10618 }, { "epoch": 0.5457395415767293, "grad_norm": 1.0303558111190796, "learning_rate": 4.503133487043128e-06, "loss": 0.678, "step": 10619 }, { "epoch": 0.545790934320074, "grad_norm": 1.0216469764709473, "learning_rate": 4.502305359334477e-06, "loss": 0.7433, "step": 10620 }, { "epoch": 0.5458423270634186, "grad_norm": 1.0060765743255615, "learning_rate": 4.501477245414891e-06, "loss": 0.7064, "step": 10621 }, { "epoch": 0.5458937198067633, "grad_norm": 1.0621914863586426, "learning_rate": 4.5006491453073125e-06, "loss": 0.7021, "step": 10622 }, { "epoch": 0.5459451125501079, "grad_norm": 1.2142199277877808, "learning_rate": 4.499821059034688e-06, "loss": 0.7415, "step": 10623 }, { "epoch": 0.5459965052934526, "grad_norm": 1.0349504947662354, "learning_rate": 4.4989929866199555e-06, "loss": 0.7128, "step": 10624 }, { "epoch": 0.5460478980367972, "grad_norm": 1.0740280151367188, "learning_rate": 4.4981649280860605e-06, "loss": 0.7782, "step": 10625 }, { "epoch": 0.5460992907801419, "grad_norm": 1.088388442993164, "learning_rate": 4.497336883455945e-06, "loss": 0.7515, "step": 10626 }, { "epoch": 0.5461506835234865, "grad_norm": 0.8848722577095032, "learning_rate": 4.49650885275255e-06, "loss": 0.692, "step": 10627 }, { "epoch": 0.5462020762668311, "grad_norm": 1.0878801345825195, "learning_rate": 4.495680835998815e-06, "loss": 0.7177, "step": 10628 }, { "epoch": 0.5462534690101758, "grad_norm": 1.0452316999435425, "learning_rate": 4.494852833217687e-06, "loss": 0.689, "step": 10629 }, { "epoch": 0.5463048617535204, "grad_norm": 1.0819647312164307, "learning_rate": 4.494024844432099e-06, "loss": 0.663, "step": 10630 }, { "epoch": 0.5463562544968651, "grad_norm": 1.043624997138977, "learning_rate": 4.493196869664995e-06, "loss": 0.7192, "step": 10631 }, { "epoch": 0.5464076472402097, "grad_norm": 1.0298439264297485, "learning_rate": 4.492368908939316e-06, "loss": 0.7128, "step": 10632 }, { "epoch": 0.5464590399835543, "grad_norm": 1.0974156856536865, "learning_rate": 4.491540962277997e-06, "loss": 0.7095, "step": 10633 }, { "epoch": 0.5465104327268989, "grad_norm": 1.0415605306625366, "learning_rate": 4.490713029703981e-06, "loss": 0.7722, "step": 10634 }, { "epoch": 0.5465618254702436, "grad_norm": 1.1123496294021606, "learning_rate": 4.4898851112402066e-06, "loss": 0.839, "step": 10635 }, { "epoch": 0.5466132182135882, "grad_norm": 1.152778148651123, "learning_rate": 4.489057206909609e-06, "loss": 0.7462, "step": 10636 }, { "epoch": 0.5466646109569329, "grad_norm": 1.079625129699707, "learning_rate": 4.488229316735127e-06, "loss": 0.7172, "step": 10637 }, { "epoch": 0.5467160037002775, "grad_norm": 1.1122641563415527, "learning_rate": 4.487401440739699e-06, "loss": 0.7413, "step": 10638 }, { "epoch": 0.5467673964436222, "grad_norm": 1.0967655181884766, "learning_rate": 4.486573578946264e-06, "loss": 0.7317, "step": 10639 }, { "epoch": 0.5468187891869668, "grad_norm": 1.1058483123779297, "learning_rate": 4.485745731377754e-06, "loss": 0.7572, "step": 10640 }, { "epoch": 0.5468701819303114, "grad_norm": 1.0113062858581543, "learning_rate": 4.484917898057108e-06, "loss": 0.7636, "step": 10641 }, { "epoch": 0.5469215746736561, "grad_norm": 1.076033353805542, "learning_rate": 4.4840900790072626e-06, "loss": 0.7558, "step": 10642 }, { "epoch": 0.5469729674170007, "grad_norm": 0.997805118560791, "learning_rate": 4.4832622742511504e-06, "loss": 0.675, "step": 10643 }, { "epoch": 0.5470243601603454, "grad_norm": 1.115605354309082, "learning_rate": 4.482434483811708e-06, "loss": 0.6423, "step": 10644 }, { "epoch": 0.54707575290369, "grad_norm": 1.0271010398864746, "learning_rate": 4.481606707711873e-06, "loss": 0.7278, "step": 10645 }, { "epoch": 0.5471271456470347, "grad_norm": 1.1900086402893066, "learning_rate": 4.480778945974575e-06, "loss": 0.7568, "step": 10646 }, { "epoch": 0.5471785383903793, "grad_norm": 0.9834240078926086, "learning_rate": 4.4799511986227485e-06, "loss": 0.712, "step": 10647 }, { "epoch": 0.5472299311337239, "grad_norm": 1.0759097337722778, "learning_rate": 4.4791234656793305e-06, "loss": 0.7529, "step": 10648 }, { "epoch": 0.5472813238770685, "grad_norm": 0.7260752320289612, "learning_rate": 4.47829574716725e-06, "loss": 0.6678, "step": 10649 }, { "epoch": 0.5473327166204132, "grad_norm": 1.131471037864685, "learning_rate": 4.477468043109441e-06, "loss": 0.7585, "step": 10650 }, { "epoch": 0.5473841093637578, "grad_norm": 1.0357584953308105, "learning_rate": 4.476640353528838e-06, "loss": 0.7842, "step": 10651 }, { "epoch": 0.5474355021071025, "grad_norm": 1.116679310798645, "learning_rate": 4.475812678448369e-06, "loss": 0.69, "step": 10652 }, { "epoch": 0.5474868948504471, "grad_norm": 0.7543790936470032, "learning_rate": 4.47498501789097e-06, "loss": 0.6624, "step": 10653 }, { "epoch": 0.5475382875937918, "grad_norm": 1.011736273765564, "learning_rate": 4.474157371879567e-06, "loss": 0.6862, "step": 10654 }, { "epoch": 0.5475896803371364, "grad_norm": 0.7670367956161499, "learning_rate": 4.473329740437096e-06, "loss": 0.6899, "step": 10655 }, { "epoch": 0.547641073080481, "grad_norm": 1.1023555994033813, "learning_rate": 4.4725021235864815e-06, "loss": 0.7279, "step": 10656 }, { "epoch": 0.5476924658238257, "grad_norm": 0.6802577376365662, "learning_rate": 4.471674521350657e-06, "loss": 0.6448, "step": 10657 }, { "epoch": 0.5477438585671703, "grad_norm": 0.7912315726280212, "learning_rate": 4.4708469337525535e-06, "loss": 0.6582, "step": 10658 }, { "epoch": 0.547795251310515, "grad_norm": 0.9843100309371948, "learning_rate": 4.470019360815095e-06, "loss": 0.669, "step": 10659 }, { "epoch": 0.5478466440538596, "grad_norm": 1.1052237749099731, "learning_rate": 4.469191802561214e-06, "loss": 0.7583, "step": 10660 }, { "epoch": 0.5478980367972043, "grad_norm": 1.303665041923523, "learning_rate": 4.468364259013839e-06, "loss": 0.6935, "step": 10661 }, { "epoch": 0.5479494295405489, "grad_norm": 1.0570329427719116, "learning_rate": 4.467536730195895e-06, "loss": 0.7541, "step": 10662 }, { "epoch": 0.5480008222838935, "grad_norm": 1.0624116659164429, "learning_rate": 4.4667092161303104e-06, "loss": 0.6755, "step": 10663 }, { "epoch": 0.5480522150272381, "grad_norm": 1.0217534303665161, "learning_rate": 4.465881716840014e-06, "loss": 0.7119, "step": 10664 }, { "epoch": 0.5481036077705828, "grad_norm": 1.056186556816101, "learning_rate": 4.46505423234793e-06, "loss": 0.714, "step": 10665 }, { "epoch": 0.5481550005139274, "grad_norm": 1.056942343711853, "learning_rate": 4.464226762676985e-06, "loss": 0.7198, "step": 10666 }, { "epoch": 0.548206393257272, "grad_norm": 1.0565210580825806, "learning_rate": 4.463399307850108e-06, "loss": 0.7017, "step": 10667 }, { "epoch": 0.5482577860006167, "grad_norm": 1.080053687095642, "learning_rate": 4.46257186789022e-06, "loss": 0.7424, "step": 10668 }, { "epoch": 0.5483091787439613, "grad_norm": 0.7140719890594482, "learning_rate": 4.4617444428202464e-06, "loss": 0.6641, "step": 10669 }, { "epoch": 0.548360571487306, "grad_norm": 1.0117716789245605, "learning_rate": 4.460917032663114e-06, "loss": 0.7004, "step": 10670 }, { "epoch": 0.5484119642306506, "grad_norm": 1.001701831817627, "learning_rate": 4.460089637441748e-06, "loss": 0.6945, "step": 10671 }, { "epoch": 0.5484633569739953, "grad_norm": 1.0643306970596313, "learning_rate": 4.459262257179068e-06, "loss": 0.7039, "step": 10672 }, { "epoch": 0.5485147497173399, "grad_norm": 1.0727132558822632, "learning_rate": 4.458434891897999e-06, "loss": 0.7899, "step": 10673 }, { "epoch": 0.5485661424606846, "grad_norm": 0.7940266132354736, "learning_rate": 4.457607541621466e-06, "loss": 0.6834, "step": 10674 }, { "epoch": 0.5486175352040292, "grad_norm": 1.0557655096054077, "learning_rate": 4.456780206372388e-06, "loss": 0.7664, "step": 10675 }, { "epoch": 0.5486689279473739, "grad_norm": 0.9803177118301392, "learning_rate": 4.455952886173689e-06, "loss": 0.7003, "step": 10676 }, { "epoch": 0.5487203206907185, "grad_norm": 1.1900302171707153, "learning_rate": 4.455125581048293e-06, "loss": 0.7249, "step": 10677 }, { "epoch": 0.5487717134340631, "grad_norm": 0.683269202709198, "learning_rate": 4.4542982910191155e-06, "loss": 0.6918, "step": 10678 }, { "epoch": 0.5488231061774077, "grad_norm": 1.1276438236236572, "learning_rate": 4.4534710161090814e-06, "loss": 0.709, "step": 10679 }, { "epoch": 0.5488744989207524, "grad_norm": 1.0157021284103394, "learning_rate": 4.4526437563411114e-06, "loss": 0.6645, "step": 10680 }, { "epoch": 0.548925891664097, "grad_norm": 1.107812523841858, "learning_rate": 4.451816511738122e-06, "loss": 0.7325, "step": 10681 }, { "epoch": 0.5489772844074416, "grad_norm": 1.0462309122085571, "learning_rate": 4.450989282323035e-06, "loss": 0.7089, "step": 10682 }, { "epoch": 0.5490286771507863, "grad_norm": 0.6964511871337891, "learning_rate": 4.45016206811877e-06, "loss": 0.6092, "step": 10683 }, { "epoch": 0.5490800698941309, "grad_norm": 1.1735060214996338, "learning_rate": 4.449334869148244e-06, "loss": 0.759, "step": 10684 }, { "epoch": 0.5491314626374756, "grad_norm": 1.0112004280090332, "learning_rate": 4.448507685434378e-06, "loss": 0.7302, "step": 10685 }, { "epoch": 0.5491828553808202, "grad_norm": 1.0510773658752441, "learning_rate": 4.4476805170000866e-06, "loss": 0.7159, "step": 10686 }, { "epoch": 0.5492342481241649, "grad_norm": 1.1503227949142456, "learning_rate": 4.446853363868291e-06, "loss": 0.7093, "step": 10687 }, { "epoch": 0.5492856408675095, "grad_norm": 1.0337512493133545, "learning_rate": 4.446026226061903e-06, "loss": 0.7178, "step": 10688 }, { "epoch": 0.5493370336108542, "grad_norm": 1.1303627490997314, "learning_rate": 4.445199103603844e-06, "loss": 0.7502, "step": 10689 }, { "epoch": 0.5493884263541988, "grad_norm": 1.1557204723358154, "learning_rate": 4.4443719965170296e-06, "loss": 0.6793, "step": 10690 }, { "epoch": 0.5494398190975435, "grad_norm": 1.1321524381637573, "learning_rate": 4.4435449048243724e-06, "loss": 0.7108, "step": 10691 }, { "epoch": 0.5494912118408881, "grad_norm": 0.7390400171279907, "learning_rate": 4.442717828548789e-06, "loss": 0.6329, "step": 10692 }, { "epoch": 0.5495426045842327, "grad_norm": 1.178370475769043, "learning_rate": 4.4418907677131965e-06, "loss": 0.7761, "step": 10693 }, { "epoch": 0.5495939973275773, "grad_norm": 1.013204574584961, "learning_rate": 4.4410637223405064e-06, "loss": 0.6744, "step": 10694 }, { "epoch": 0.549645390070922, "grad_norm": 1.0659483671188354, "learning_rate": 4.4402366924536335e-06, "loss": 0.7176, "step": 10695 }, { "epoch": 0.5496967828142666, "grad_norm": 0.8338388204574585, "learning_rate": 4.439409678075494e-06, "loss": 0.6632, "step": 10696 }, { "epoch": 0.5497481755576112, "grad_norm": 1.110988736152649, "learning_rate": 4.438582679228997e-06, "loss": 0.717, "step": 10697 }, { "epoch": 0.5497995683009559, "grad_norm": 1.0613336563110352, "learning_rate": 4.437755695937057e-06, "loss": 0.7201, "step": 10698 }, { "epoch": 0.5498509610443005, "grad_norm": 1.077130675315857, "learning_rate": 4.436928728222589e-06, "loss": 0.6953, "step": 10699 }, { "epoch": 0.5499023537876452, "grad_norm": 0.9596279859542847, "learning_rate": 4.4361017761085e-06, "loss": 0.6801, "step": 10700 }, { "epoch": 0.5499537465309898, "grad_norm": 0.7890768647193909, "learning_rate": 4.4352748396177034e-06, "loss": 0.6904, "step": 10701 }, { "epoch": 0.5500051392743345, "grad_norm": 1.0956487655639648, "learning_rate": 4.434447918773111e-06, "loss": 0.7589, "step": 10702 }, { "epoch": 0.5500565320176791, "grad_norm": 1.0249221324920654, "learning_rate": 4.4336210135976335e-06, "loss": 0.698, "step": 10703 }, { "epoch": 0.5501079247610238, "grad_norm": 0.8186615705490112, "learning_rate": 4.4327941241141805e-06, "loss": 0.7116, "step": 10704 }, { "epoch": 0.5501593175043684, "grad_norm": 0.7293616533279419, "learning_rate": 4.43196725034566e-06, "loss": 0.6848, "step": 10705 }, { "epoch": 0.5502107102477131, "grad_norm": 0.7331995368003845, "learning_rate": 4.431140392314985e-06, "loss": 0.6834, "step": 10706 }, { "epoch": 0.5502621029910577, "grad_norm": 0.7464468479156494, "learning_rate": 4.4303135500450604e-06, "loss": 0.6372, "step": 10707 }, { "epoch": 0.5503134957344022, "grad_norm": 1.0478460788726807, "learning_rate": 4.429486723558797e-06, "loss": 0.7135, "step": 10708 }, { "epoch": 0.5503648884777469, "grad_norm": 1.0936639308929443, "learning_rate": 4.428659912879104e-06, "loss": 0.7173, "step": 10709 }, { "epoch": 0.5504162812210915, "grad_norm": 1.091056227684021, "learning_rate": 4.427833118028885e-06, "loss": 0.7534, "step": 10710 }, { "epoch": 0.5504676739644362, "grad_norm": 1.055601954460144, "learning_rate": 4.427006339031048e-06, "loss": 0.7471, "step": 10711 }, { "epoch": 0.5505190667077808, "grad_norm": 0.7356933355331421, "learning_rate": 4.426179575908504e-06, "loss": 0.6325, "step": 10712 }, { "epoch": 0.5505704594511255, "grad_norm": 1.018147349357605, "learning_rate": 4.425352828684154e-06, "loss": 0.7804, "step": 10713 }, { "epoch": 0.5506218521944701, "grad_norm": 1.048281192779541, "learning_rate": 4.4245260973809055e-06, "loss": 0.7115, "step": 10714 }, { "epoch": 0.5506732449378148, "grad_norm": 0.9898094534873962, "learning_rate": 4.423699382021665e-06, "loss": 0.6796, "step": 10715 }, { "epoch": 0.5507246376811594, "grad_norm": 0.7895371317863464, "learning_rate": 4.422872682629335e-06, "loss": 0.6959, "step": 10716 }, { "epoch": 0.5507760304245041, "grad_norm": 1.1945199966430664, "learning_rate": 4.422045999226821e-06, "loss": 0.6928, "step": 10717 }, { "epoch": 0.5508274231678487, "grad_norm": 0.7323358654975891, "learning_rate": 4.421219331837027e-06, "loss": 0.6849, "step": 10718 }, { "epoch": 0.5508788159111934, "grad_norm": 1.1355222463607788, "learning_rate": 4.42039268048286e-06, "loss": 0.7553, "step": 10719 }, { "epoch": 0.550930208654538, "grad_norm": 1.0402915477752686, "learning_rate": 4.4195660451872175e-06, "loss": 0.7106, "step": 10720 }, { "epoch": 0.5509816013978827, "grad_norm": 1.1234004497528076, "learning_rate": 4.4187394259730044e-06, "loss": 0.6836, "step": 10721 }, { "epoch": 0.5510329941412273, "grad_norm": 1.0415055751800537, "learning_rate": 4.4179128228631255e-06, "loss": 0.6834, "step": 10722 }, { "epoch": 0.551084386884572, "grad_norm": 1.0618886947631836, "learning_rate": 4.417086235880478e-06, "loss": 0.7072, "step": 10723 }, { "epoch": 0.5511357796279165, "grad_norm": 11.103466033935547, "learning_rate": 4.416259665047965e-06, "loss": 0.6903, "step": 10724 }, { "epoch": 0.5511871723712611, "grad_norm": 1.159605860710144, "learning_rate": 4.415433110388489e-06, "loss": 0.7612, "step": 10725 }, { "epoch": 0.5512385651146058, "grad_norm": 1.057541012763977, "learning_rate": 4.414606571924948e-06, "loss": 0.7863, "step": 10726 }, { "epoch": 0.5512899578579504, "grad_norm": 1.0768178701400757, "learning_rate": 4.413780049680243e-06, "loss": 0.6737, "step": 10727 }, { "epoch": 0.5513413506012951, "grad_norm": 1.142947793006897, "learning_rate": 4.412953543677276e-06, "loss": 0.7368, "step": 10728 }, { "epoch": 0.5513927433446397, "grad_norm": 1.0690091848373413, "learning_rate": 4.412127053938941e-06, "loss": 0.7309, "step": 10729 }, { "epoch": 0.5514441360879844, "grad_norm": 1.0647177696228027, "learning_rate": 4.4113005804881404e-06, "loss": 0.7471, "step": 10730 }, { "epoch": 0.551495528831329, "grad_norm": 1.0899895429611206, "learning_rate": 4.410474123347771e-06, "loss": 0.7327, "step": 10731 }, { "epoch": 0.5515469215746737, "grad_norm": 1.1271027326583862, "learning_rate": 4.409647682540732e-06, "loss": 0.7575, "step": 10732 }, { "epoch": 0.5515983143180183, "grad_norm": 1.1342661380767822, "learning_rate": 4.408821258089918e-06, "loss": 0.8081, "step": 10733 }, { "epoch": 0.551649707061363, "grad_norm": 1.0233356952667236, "learning_rate": 4.407994850018228e-06, "loss": 0.7154, "step": 10734 }, { "epoch": 0.5517010998047076, "grad_norm": 0.7880732417106628, "learning_rate": 4.407168458348558e-06, "loss": 0.6555, "step": 10735 }, { "epoch": 0.5517524925480523, "grad_norm": 1.5140451192855835, "learning_rate": 4.406342083103802e-06, "loss": 0.7895, "step": 10736 }, { "epoch": 0.5518038852913969, "grad_norm": 1.0206595659255981, "learning_rate": 4.405515724306858e-06, "loss": 0.7037, "step": 10737 }, { "epoch": 0.5518552780347415, "grad_norm": 1.0632920265197754, "learning_rate": 4.404689381980622e-06, "loss": 0.7316, "step": 10738 }, { "epoch": 0.5519066707780861, "grad_norm": 1.092975378036499, "learning_rate": 4.4038630561479836e-06, "loss": 0.671, "step": 10739 }, { "epoch": 0.5519580635214307, "grad_norm": 1.203824520111084, "learning_rate": 4.40303674683184e-06, "loss": 0.714, "step": 10740 }, { "epoch": 0.5520094562647754, "grad_norm": 0.7501099109649658, "learning_rate": 4.402210454055088e-06, "loss": 0.6612, "step": 10741 }, { "epoch": 0.55206084900812, "grad_norm": 1.0705556869506836, "learning_rate": 4.401384177840614e-06, "loss": 0.7577, "step": 10742 }, { "epoch": 0.5521122417514647, "grad_norm": 1.0825904607772827, "learning_rate": 4.400557918211314e-06, "loss": 0.7695, "step": 10743 }, { "epoch": 0.5521636344948093, "grad_norm": 0.8202421069145203, "learning_rate": 4.399731675190081e-06, "loss": 0.6976, "step": 10744 }, { "epoch": 0.552215027238154, "grad_norm": 1.0230708122253418, "learning_rate": 4.398905448799806e-06, "loss": 0.7278, "step": 10745 }, { "epoch": 0.5522664199814986, "grad_norm": 1.0411584377288818, "learning_rate": 4.398079239063381e-06, "loss": 0.6549, "step": 10746 }, { "epoch": 0.5523178127248433, "grad_norm": 1.065542459487915, "learning_rate": 4.397253046003696e-06, "loss": 0.7506, "step": 10747 }, { "epoch": 0.5523692054681879, "grad_norm": 1.110318660736084, "learning_rate": 4.3964268696436425e-06, "loss": 0.7507, "step": 10748 }, { "epoch": 0.5524205982115326, "grad_norm": 1.082706332206726, "learning_rate": 4.395600710006108e-06, "loss": 0.7402, "step": 10749 }, { "epoch": 0.5524719909548772, "grad_norm": 1.0900524854660034, "learning_rate": 4.394774567113985e-06, "loss": 0.7147, "step": 10750 }, { "epoch": 0.5525233836982218, "grad_norm": 1.077059030532837, "learning_rate": 4.393948440990162e-06, "loss": 0.7555, "step": 10751 }, { "epoch": 0.5525747764415665, "grad_norm": 1.0487898588180542, "learning_rate": 4.393122331657525e-06, "loss": 0.6919, "step": 10752 }, { "epoch": 0.5526261691849111, "grad_norm": 1.1181272268295288, "learning_rate": 4.392296239138963e-06, "loss": 0.726, "step": 10753 }, { "epoch": 0.5526775619282557, "grad_norm": 1.0952658653259277, "learning_rate": 4.391470163457366e-06, "loss": 0.7212, "step": 10754 }, { "epoch": 0.5527289546716003, "grad_norm": 1.0867129564285278, "learning_rate": 4.390644104635619e-06, "loss": 0.7362, "step": 10755 }, { "epoch": 0.552780347414945, "grad_norm": 1.0549699068069458, "learning_rate": 4.389818062696609e-06, "loss": 0.7506, "step": 10756 }, { "epoch": 0.5528317401582896, "grad_norm": 1.1104376316070557, "learning_rate": 4.388992037663223e-06, "loss": 0.8272, "step": 10757 }, { "epoch": 0.5528831329016343, "grad_norm": 1.1957920789718628, "learning_rate": 4.388166029558346e-06, "loss": 0.685, "step": 10758 }, { "epoch": 0.5529345256449789, "grad_norm": 1.0681620836257935, "learning_rate": 4.387340038404862e-06, "loss": 0.7239, "step": 10759 }, { "epoch": 0.5529859183883236, "grad_norm": 1.079546570777893, "learning_rate": 4.38651406422566e-06, "loss": 0.68, "step": 10760 }, { "epoch": 0.5530373111316682, "grad_norm": 1.0699070692062378, "learning_rate": 4.38568810704362e-06, "loss": 0.7349, "step": 10761 }, { "epoch": 0.5530887038750129, "grad_norm": 1.018877625465393, "learning_rate": 4.384862166881627e-06, "loss": 0.7077, "step": 10762 }, { "epoch": 0.5531400966183575, "grad_norm": 1.120166540145874, "learning_rate": 4.384036243762564e-06, "loss": 0.7722, "step": 10763 }, { "epoch": 0.5531914893617021, "grad_norm": 1.0192737579345703, "learning_rate": 4.383210337709318e-06, "loss": 0.7532, "step": 10764 }, { "epoch": 0.5532428821050468, "grad_norm": 1.0826292037963867, "learning_rate": 4.382384448744766e-06, "loss": 0.7475, "step": 10765 }, { "epoch": 0.5532942748483914, "grad_norm": 0.9969246983528137, "learning_rate": 4.381558576891791e-06, "loss": 0.6797, "step": 10766 }, { "epoch": 0.5533456675917361, "grad_norm": 1.2029521465301514, "learning_rate": 4.380732722173278e-06, "loss": 0.7745, "step": 10767 }, { "epoch": 0.5533970603350807, "grad_norm": 1.0097633600234985, "learning_rate": 4.379906884612104e-06, "loss": 0.6923, "step": 10768 }, { "epoch": 0.5534484530784253, "grad_norm": 0.749476432800293, "learning_rate": 4.379081064231151e-06, "loss": 0.6584, "step": 10769 }, { "epoch": 0.5534998458217699, "grad_norm": 0.9810890555381775, "learning_rate": 4.378255261053302e-06, "loss": 0.7225, "step": 10770 }, { "epoch": 0.5535512385651146, "grad_norm": 1.0234977006912231, "learning_rate": 4.377429475101432e-06, "loss": 0.7264, "step": 10771 }, { "epoch": 0.5536026313084592, "grad_norm": 1.089060664176941, "learning_rate": 4.376603706398422e-06, "loss": 0.7191, "step": 10772 }, { "epoch": 0.5536540240518039, "grad_norm": 1.1215370893478394, "learning_rate": 4.375777954967152e-06, "loss": 0.7021, "step": 10773 }, { "epoch": 0.5537054167951485, "grad_norm": 1.0508062839508057, "learning_rate": 4.3749522208304975e-06, "loss": 0.7321, "step": 10774 }, { "epoch": 0.5537568095384932, "grad_norm": 1.063835859298706, "learning_rate": 4.374126504011338e-06, "loss": 0.7057, "step": 10775 }, { "epoch": 0.5538082022818378, "grad_norm": 1.1080663204193115, "learning_rate": 4.373300804532551e-06, "loss": 0.7151, "step": 10776 }, { "epoch": 0.5538595950251824, "grad_norm": 1.084398865699768, "learning_rate": 4.372475122417011e-06, "loss": 0.6901, "step": 10777 }, { "epoch": 0.5539109877685271, "grad_norm": 0.7795893549919128, "learning_rate": 4.371649457687596e-06, "loss": 0.6656, "step": 10778 }, { "epoch": 0.5539623805118717, "grad_norm": 1.1194603443145752, "learning_rate": 4.370823810367181e-06, "loss": 0.7441, "step": 10779 }, { "epoch": 0.5540137732552164, "grad_norm": 1.0783047676086426, "learning_rate": 4.369998180478644e-06, "loss": 0.7096, "step": 10780 }, { "epoch": 0.554065165998561, "grad_norm": 0.8340365886688232, "learning_rate": 4.3691725680448555e-06, "loss": 0.6768, "step": 10781 }, { "epoch": 0.5541165587419057, "grad_norm": 0.7244275808334351, "learning_rate": 4.368346973088692e-06, "loss": 0.6651, "step": 10782 }, { "epoch": 0.5541679514852503, "grad_norm": 1.045332908630371, "learning_rate": 4.367521395633029e-06, "loss": 0.6927, "step": 10783 }, { "epoch": 0.5542193442285949, "grad_norm": 1.0329526662826538, "learning_rate": 4.366695835700736e-06, "loss": 0.6818, "step": 10784 }, { "epoch": 0.5542707369719395, "grad_norm": 1.077038288116455, "learning_rate": 4.3658702933146885e-06, "loss": 0.7902, "step": 10785 }, { "epoch": 0.5543221297152842, "grad_norm": 1.0502935647964478, "learning_rate": 4.365044768497759e-06, "loss": 0.7655, "step": 10786 }, { "epoch": 0.5543735224586288, "grad_norm": 0.7210720181465149, "learning_rate": 4.364219261272817e-06, "loss": 0.6974, "step": 10787 }, { "epoch": 0.5544249152019735, "grad_norm": 1.1018387079238892, "learning_rate": 4.363393771662736e-06, "loss": 0.7202, "step": 10788 }, { "epoch": 0.5544763079453181, "grad_norm": 1.1479874849319458, "learning_rate": 4.362568299690389e-06, "loss": 0.7691, "step": 10789 }, { "epoch": 0.5545277006886628, "grad_norm": 1.1982343196868896, "learning_rate": 4.361742845378641e-06, "loss": 0.7179, "step": 10790 }, { "epoch": 0.5545790934320074, "grad_norm": 1.0024508237838745, "learning_rate": 4.360917408750366e-06, "loss": 0.6668, "step": 10791 }, { "epoch": 0.554630486175352, "grad_norm": 1.507300615310669, "learning_rate": 4.360091989828434e-06, "loss": 0.7143, "step": 10792 }, { "epoch": 0.5546818789186967, "grad_norm": 1.1222071647644043, "learning_rate": 4.3592665886357095e-06, "loss": 0.8018, "step": 10793 }, { "epoch": 0.5547332716620413, "grad_norm": 1.0552480220794678, "learning_rate": 4.358441205195064e-06, "loss": 0.71, "step": 10794 }, { "epoch": 0.554784664405386, "grad_norm": 1.0867812633514404, "learning_rate": 4.357615839529365e-06, "loss": 0.7568, "step": 10795 }, { "epoch": 0.5548360571487306, "grad_norm": 1.0883057117462158, "learning_rate": 4.356790491661481e-06, "loss": 0.7587, "step": 10796 }, { "epoch": 0.5548874498920753, "grad_norm": 0.7757565379142761, "learning_rate": 4.355965161614277e-06, "loss": 0.6841, "step": 10797 }, { "epoch": 0.5549388426354199, "grad_norm": 1.0912585258483887, "learning_rate": 4.355139849410622e-06, "loss": 0.7008, "step": 10798 }, { "epoch": 0.5549902353787646, "grad_norm": 1.0982303619384766, "learning_rate": 4.354314555073381e-06, "loss": 0.7354, "step": 10799 }, { "epoch": 0.5550416281221091, "grad_norm": 1.1373717784881592, "learning_rate": 4.353489278625418e-06, "loss": 0.7492, "step": 10800 }, { "epoch": 0.5550930208654538, "grad_norm": 1.125774621963501, "learning_rate": 4.3526640200895985e-06, "loss": 0.7186, "step": 10801 }, { "epoch": 0.5551444136087984, "grad_norm": 1.130272388458252, "learning_rate": 4.35183877948879e-06, "loss": 0.7403, "step": 10802 }, { "epoch": 0.555195806352143, "grad_norm": 1.0017223358154297, "learning_rate": 4.351013556845851e-06, "loss": 0.7075, "step": 10803 }, { "epoch": 0.5552471990954877, "grad_norm": 1.0309321880340576, "learning_rate": 4.35018835218365e-06, "loss": 0.6835, "step": 10804 }, { "epoch": 0.5552985918388323, "grad_norm": 1.0943472385406494, "learning_rate": 4.349363165525048e-06, "loss": 0.6406, "step": 10805 }, { "epoch": 0.555349984582177, "grad_norm": 1.037827968597412, "learning_rate": 4.348537996892908e-06, "loss": 0.7708, "step": 10806 }, { "epoch": 0.5554013773255216, "grad_norm": 1.009874701499939, "learning_rate": 4.34771284631009e-06, "loss": 0.707, "step": 10807 }, { "epoch": 0.5554527700688663, "grad_norm": 1.0244871377944946, "learning_rate": 4.346887713799459e-06, "loss": 0.6938, "step": 10808 }, { "epoch": 0.5555041628122109, "grad_norm": 1.0723369121551514, "learning_rate": 4.346062599383874e-06, "loss": 0.6962, "step": 10809 }, { "epoch": 0.5555555555555556, "grad_norm": 1.0282995700836182, "learning_rate": 4.345237503086194e-06, "loss": 0.7723, "step": 10810 }, { "epoch": 0.5556069482989002, "grad_norm": 1.0838866233825684, "learning_rate": 4.344412424929281e-06, "loss": 0.7677, "step": 10811 }, { "epoch": 0.5556583410422449, "grad_norm": 1.2450435161590576, "learning_rate": 4.343587364935997e-06, "loss": 0.8431, "step": 10812 }, { "epoch": 0.5557097337855895, "grad_norm": 1.1466076374053955, "learning_rate": 4.342762323129196e-06, "loss": 0.7335, "step": 10813 }, { "epoch": 0.5557611265289342, "grad_norm": 1.0430716276168823, "learning_rate": 4.341937299531739e-06, "loss": 0.7385, "step": 10814 }, { "epoch": 0.5558125192722787, "grad_norm": 1.0248312950134277, "learning_rate": 4.341112294166486e-06, "loss": 0.7003, "step": 10815 }, { "epoch": 0.5558639120156234, "grad_norm": 1.0875623226165771, "learning_rate": 4.340287307056289e-06, "loss": 0.6972, "step": 10816 }, { "epoch": 0.555915304758968, "grad_norm": 0.7583824992179871, "learning_rate": 4.339462338224009e-06, "loss": 0.6836, "step": 10817 }, { "epoch": 0.5559666975023126, "grad_norm": 1.0924382209777832, "learning_rate": 4.338637387692503e-06, "loss": 0.7458, "step": 10818 }, { "epoch": 0.5560180902456573, "grad_norm": 1.034134864807129, "learning_rate": 4.337812455484625e-06, "loss": 0.7277, "step": 10819 }, { "epoch": 0.5560694829890019, "grad_norm": 0.7286117672920227, "learning_rate": 4.33698754162323e-06, "loss": 0.6603, "step": 10820 }, { "epoch": 0.5561208757323466, "grad_norm": 1.0440044403076172, "learning_rate": 4.336162646131177e-06, "loss": 0.6971, "step": 10821 }, { "epoch": 0.5561722684756912, "grad_norm": 1.3210711479187012, "learning_rate": 4.3353377690313156e-06, "loss": 0.7244, "step": 10822 }, { "epoch": 0.5562236612190359, "grad_norm": 1.0478872060775757, "learning_rate": 4.334512910346501e-06, "loss": 0.7499, "step": 10823 }, { "epoch": 0.5562750539623805, "grad_norm": 1.0514074563980103, "learning_rate": 4.33368807009959e-06, "loss": 0.6824, "step": 10824 }, { "epoch": 0.5563264467057252, "grad_norm": 1.0398327112197876, "learning_rate": 4.33286324831343e-06, "loss": 0.7399, "step": 10825 }, { "epoch": 0.5563778394490698, "grad_norm": 0.7362720370292664, "learning_rate": 4.332038445010877e-06, "loss": 0.6637, "step": 10826 }, { "epoch": 0.5564292321924145, "grad_norm": 1.0675467252731323, "learning_rate": 4.331213660214782e-06, "loss": 0.7453, "step": 10827 }, { "epoch": 0.5564806249357591, "grad_norm": 1.0229288339614868, "learning_rate": 4.330388893947998e-06, "loss": 0.7025, "step": 10828 }, { "epoch": 0.5565320176791038, "grad_norm": 1.016847014427185, "learning_rate": 4.329564146233373e-06, "loss": 0.7177, "step": 10829 }, { "epoch": 0.5565834104224483, "grad_norm": 1.0456981658935547, "learning_rate": 4.328739417093759e-06, "loss": 0.7461, "step": 10830 }, { "epoch": 0.556634803165793, "grad_norm": 1.0804519653320312, "learning_rate": 4.3279147065520074e-06, "loss": 0.7449, "step": 10831 }, { "epoch": 0.5566861959091376, "grad_norm": 1.106640100479126, "learning_rate": 4.327090014630964e-06, "loss": 0.7086, "step": 10832 }, { "epoch": 0.5567375886524822, "grad_norm": 1.16204833984375, "learning_rate": 4.3262653413534795e-06, "loss": 0.7748, "step": 10833 }, { "epoch": 0.5567889813958269, "grad_norm": 0.830194354057312, "learning_rate": 4.325440686742404e-06, "loss": 0.6554, "step": 10834 }, { "epoch": 0.5568403741391715, "grad_norm": 1.0983648300170898, "learning_rate": 4.324616050820581e-06, "loss": 0.7072, "step": 10835 }, { "epoch": 0.5568917668825162, "grad_norm": 1.1424726247787476, "learning_rate": 4.3237914336108596e-06, "loss": 0.7498, "step": 10836 }, { "epoch": 0.5569431596258608, "grad_norm": 1.0313241481781006, "learning_rate": 4.322966835136089e-06, "loss": 0.6964, "step": 10837 }, { "epoch": 0.5569945523692055, "grad_norm": 0.7365595698356628, "learning_rate": 4.322142255419112e-06, "loss": 0.6867, "step": 10838 }, { "epoch": 0.5570459451125501, "grad_norm": 1.1180793046951294, "learning_rate": 4.321317694482776e-06, "loss": 0.7285, "step": 10839 }, { "epoch": 0.5570973378558948, "grad_norm": 1.0917288064956665, "learning_rate": 4.3204931523499255e-06, "loss": 0.7688, "step": 10840 }, { "epoch": 0.5571487305992394, "grad_norm": 1.1243914365768433, "learning_rate": 4.319668629043408e-06, "loss": 0.7179, "step": 10841 }, { "epoch": 0.5572001233425841, "grad_norm": 1.0081278085708618, "learning_rate": 4.3188441245860625e-06, "loss": 0.6624, "step": 10842 }, { "epoch": 0.5572515160859287, "grad_norm": 1.0330395698547363, "learning_rate": 4.318019639000737e-06, "loss": 0.7453, "step": 10843 }, { "epoch": 0.5573029088292734, "grad_norm": 1.016631841659546, "learning_rate": 4.317195172310273e-06, "loss": 0.7431, "step": 10844 }, { "epoch": 0.5573543015726179, "grad_norm": 1.0297139883041382, "learning_rate": 4.316370724537512e-06, "loss": 0.7007, "step": 10845 }, { "epoch": 0.5574056943159625, "grad_norm": 0.7857033014297485, "learning_rate": 4.3155462957052976e-06, "loss": 0.6847, "step": 10846 }, { "epoch": 0.5574570870593072, "grad_norm": 1.0852837562561035, "learning_rate": 4.3147218858364705e-06, "loss": 0.6979, "step": 10847 }, { "epoch": 0.5575084798026518, "grad_norm": 1.0930466651916504, "learning_rate": 4.313897494953872e-06, "loss": 0.6894, "step": 10848 }, { "epoch": 0.5575598725459965, "grad_norm": 1.0994106531143188, "learning_rate": 4.313073123080342e-06, "loss": 0.7399, "step": 10849 }, { "epoch": 0.5576112652893411, "grad_norm": 1.2448499202728271, "learning_rate": 4.312248770238723e-06, "loss": 0.7095, "step": 10850 }, { "epoch": 0.5576626580326858, "grad_norm": 0.7117018103599548, "learning_rate": 4.311424436451851e-06, "loss": 0.7052, "step": 10851 }, { "epoch": 0.5577140507760304, "grad_norm": 1.0360850095748901, "learning_rate": 4.310600121742566e-06, "loss": 0.6877, "step": 10852 }, { "epoch": 0.5577654435193751, "grad_norm": 1.0797063112258911, "learning_rate": 4.3097758261337075e-06, "loss": 0.7199, "step": 10853 }, { "epoch": 0.5578168362627197, "grad_norm": 1.0390173196792603, "learning_rate": 4.3089515496481116e-06, "loss": 0.6873, "step": 10854 }, { "epoch": 0.5578682290060644, "grad_norm": 0.8859268426895142, "learning_rate": 4.308127292308616e-06, "loss": 0.6546, "step": 10855 }, { "epoch": 0.557919621749409, "grad_norm": 0.7278128266334534, "learning_rate": 4.307303054138058e-06, "loss": 0.6482, "step": 10856 }, { "epoch": 0.5579710144927537, "grad_norm": 1.0542969703674316, "learning_rate": 4.306478835159276e-06, "loss": 0.6939, "step": 10857 }, { "epoch": 0.5580224072360983, "grad_norm": 1.133650541305542, "learning_rate": 4.305654635395101e-06, "loss": 0.773, "step": 10858 }, { "epoch": 0.558073799979443, "grad_norm": 1.0271515846252441, "learning_rate": 4.304830454868372e-06, "loss": 0.7248, "step": 10859 }, { "epoch": 0.5581251927227875, "grad_norm": 1.0311022996902466, "learning_rate": 4.304006293601922e-06, "loss": 0.7105, "step": 10860 }, { "epoch": 0.5581765854661321, "grad_norm": 0.802905261516571, "learning_rate": 4.303182151618585e-06, "loss": 0.6952, "step": 10861 }, { "epoch": 0.5582279782094768, "grad_norm": 1.1078637838363647, "learning_rate": 4.3023580289411945e-06, "loss": 0.6971, "step": 10862 }, { "epoch": 0.5582793709528214, "grad_norm": 1.0535496473312378, "learning_rate": 4.301533925592586e-06, "loss": 0.7196, "step": 10863 }, { "epoch": 0.5583307636961661, "grad_norm": 1.0529030561447144, "learning_rate": 4.3007098415955884e-06, "loss": 0.6926, "step": 10864 }, { "epoch": 0.5583821564395107, "grad_norm": 1.0854411125183105, "learning_rate": 4.299885776973035e-06, "loss": 0.6672, "step": 10865 }, { "epoch": 0.5584335491828554, "grad_norm": 1.0727567672729492, "learning_rate": 4.29906173174776e-06, "loss": 0.7279, "step": 10866 }, { "epoch": 0.5584849419262, "grad_norm": 1.1993035078048706, "learning_rate": 4.29823770594259e-06, "loss": 0.7409, "step": 10867 }, { "epoch": 0.5585363346695447, "grad_norm": 1.045276403427124, "learning_rate": 4.297413699580356e-06, "loss": 0.673, "step": 10868 }, { "epoch": 0.5585877274128893, "grad_norm": 1.0450563430786133, "learning_rate": 4.296589712683891e-06, "loss": 0.7188, "step": 10869 }, { "epoch": 0.558639120156234, "grad_norm": 0.7919402718544006, "learning_rate": 4.295765745276021e-06, "loss": 0.6953, "step": 10870 }, { "epoch": 0.5586905128995786, "grad_norm": 1.091357707977295, "learning_rate": 4.294941797379576e-06, "loss": 0.6813, "step": 10871 }, { "epoch": 0.5587419056429233, "grad_norm": 1.139304757118225, "learning_rate": 4.294117869017384e-06, "loss": 0.7432, "step": 10872 }, { "epoch": 0.5587932983862679, "grad_norm": 1.0857518911361694, "learning_rate": 4.293293960212275e-06, "loss": 0.7152, "step": 10873 }, { "epoch": 0.5588446911296125, "grad_norm": 1.033813714981079, "learning_rate": 4.292470070987072e-06, "loss": 0.7175, "step": 10874 }, { "epoch": 0.5588960838729571, "grad_norm": 1.107561707496643, "learning_rate": 4.2916462013646035e-06, "loss": 0.7022, "step": 10875 }, { "epoch": 0.5589474766163017, "grad_norm": 1.0633291006088257, "learning_rate": 4.290822351367697e-06, "loss": 0.7361, "step": 10876 }, { "epoch": 0.5589988693596464, "grad_norm": 1.0579557418823242, "learning_rate": 4.289998521019176e-06, "loss": 0.6944, "step": 10877 }, { "epoch": 0.559050262102991, "grad_norm": 1.080997109413147, "learning_rate": 4.2891747103418645e-06, "loss": 0.7485, "step": 10878 }, { "epoch": 0.5591016548463357, "grad_norm": 0.9321607947349548, "learning_rate": 4.2883509193585895e-06, "loss": 0.6649, "step": 10879 }, { "epoch": 0.5591530475896803, "grad_norm": 1.020646333694458, "learning_rate": 4.287527148092173e-06, "loss": 0.7294, "step": 10880 }, { "epoch": 0.559204440333025, "grad_norm": 1.0576215982437134, "learning_rate": 4.286703396565439e-06, "loss": 0.7687, "step": 10881 }, { "epoch": 0.5592558330763696, "grad_norm": 1.1232056617736816, "learning_rate": 4.285879664801213e-06, "loss": 0.7099, "step": 10882 }, { "epoch": 0.5593072258197143, "grad_norm": 0.769159197807312, "learning_rate": 4.2850559528223124e-06, "loss": 0.6081, "step": 10883 }, { "epoch": 0.5593586185630589, "grad_norm": 1.1259429454803467, "learning_rate": 4.28423226065156e-06, "loss": 0.7477, "step": 10884 }, { "epoch": 0.5594100113064036, "grad_norm": 0.6801486611366272, "learning_rate": 4.283408588311781e-06, "loss": 0.7073, "step": 10885 }, { "epoch": 0.5594614040497482, "grad_norm": 1.060115098953247, "learning_rate": 4.28258493582579e-06, "loss": 0.7705, "step": 10886 }, { "epoch": 0.5595127967930928, "grad_norm": 0.8084056973457336, "learning_rate": 4.2817613032164094e-06, "loss": 0.6218, "step": 10887 }, { "epoch": 0.5595641895364375, "grad_norm": 0.726311206817627, "learning_rate": 4.28093769050646e-06, "loss": 0.6172, "step": 10888 }, { "epoch": 0.5596155822797821, "grad_norm": 0.6586894392967224, "learning_rate": 4.2801140977187605e-06, "loss": 0.6673, "step": 10889 }, { "epoch": 0.5596669750231268, "grad_norm": 1.1524149179458618, "learning_rate": 4.2792905248761275e-06, "loss": 0.7329, "step": 10890 }, { "epoch": 0.5597183677664713, "grad_norm": 0.7418955564498901, "learning_rate": 4.27846697200138e-06, "loss": 0.6604, "step": 10891 }, { "epoch": 0.559769760509816, "grad_norm": 1.1461540460586548, "learning_rate": 4.277643439117337e-06, "loss": 0.7818, "step": 10892 }, { "epoch": 0.5598211532531606, "grad_norm": 1.064190149307251, "learning_rate": 4.276819926246811e-06, "loss": 0.7055, "step": 10893 }, { "epoch": 0.5598725459965053, "grad_norm": 1.0946849584579468, "learning_rate": 4.2759964334126206e-06, "loss": 0.7415, "step": 10894 }, { "epoch": 0.5599239387398499, "grad_norm": 1.0719082355499268, "learning_rate": 4.275172960637583e-06, "loss": 0.7158, "step": 10895 }, { "epoch": 0.5599753314831946, "grad_norm": 0.7232871651649475, "learning_rate": 4.27434950794451e-06, "loss": 0.6617, "step": 10896 }, { "epoch": 0.5600267242265392, "grad_norm": 1.0991734266281128, "learning_rate": 4.273526075356218e-06, "loss": 0.7213, "step": 10897 }, { "epoch": 0.5600781169698839, "grad_norm": 0.8204271197319031, "learning_rate": 4.272702662895521e-06, "loss": 0.645, "step": 10898 }, { "epoch": 0.5601295097132285, "grad_norm": 0.7794287800788879, "learning_rate": 4.27187927058523e-06, "loss": 0.6511, "step": 10899 }, { "epoch": 0.5601809024565731, "grad_norm": 0.7154523730278015, "learning_rate": 4.271055898448161e-06, "loss": 0.6411, "step": 10900 }, { "epoch": 0.5602322951999178, "grad_norm": 1.1501259803771973, "learning_rate": 4.2702325465071266e-06, "loss": 0.7286, "step": 10901 }, { "epoch": 0.5602836879432624, "grad_norm": 1.219252109527588, "learning_rate": 4.269409214784934e-06, "loss": 0.681, "step": 10902 }, { "epoch": 0.5603350806866071, "grad_norm": 1.021567940711975, "learning_rate": 4.268585903304398e-06, "loss": 0.7136, "step": 10903 }, { "epoch": 0.5603864734299517, "grad_norm": 0.9399927258491516, "learning_rate": 4.267762612088327e-06, "loss": 0.6859, "step": 10904 }, { "epoch": 0.5604378661732964, "grad_norm": 1.0040158033370972, "learning_rate": 4.266939341159535e-06, "loss": 0.6616, "step": 10905 }, { "epoch": 0.5604892589166409, "grad_norm": 1.042953372001648, "learning_rate": 4.266116090540827e-06, "loss": 0.7074, "step": 10906 }, { "epoch": 0.5605406516599856, "grad_norm": 1.1135450601577759, "learning_rate": 4.265292860255013e-06, "loss": 0.8105, "step": 10907 }, { "epoch": 0.5605920444033302, "grad_norm": 1.07875394821167, "learning_rate": 4.264469650324904e-06, "loss": 0.7178, "step": 10908 }, { "epoch": 0.5606434371466749, "grad_norm": 1.16061532497406, "learning_rate": 4.263646460773304e-06, "loss": 0.7192, "step": 10909 }, { "epoch": 0.5606948298900195, "grad_norm": 1.0205036401748657, "learning_rate": 4.262823291623022e-06, "loss": 0.7489, "step": 10910 }, { "epoch": 0.5607462226333642, "grad_norm": 1.0088468790054321, "learning_rate": 4.262000142896865e-06, "loss": 0.7324, "step": 10911 }, { "epoch": 0.5607976153767088, "grad_norm": 1.0353134870529175, "learning_rate": 4.261177014617638e-06, "loss": 0.7536, "step": 10912 }, { "epoch": 0.5608490081200534, "grad_norm": 0.9968785643577576, "learning_rate": 4.260353906808146e-06, "loss": 0.732, "step": 10913 }, { "epoch": 0.5609004008633981, "grad_norm": 1.09078848361969, "learning_rate": 4.2595308194911975e-06, "loss": 0.7079, "step": 10914 }, { "epoch": 0.5609517936067427, "grad_norm": 0.82099449634552, "learning_rate": 4.258707752689593e-06, "loss": 0.6534, "step": 10915 }, { "epoch": 0.5610031863500874, "grad_norm": 1.041460394859314, "learning_rate": 4.257884706426136e-06, "loss": 0.7423, "step": 10916 }, { "epoch": 0.561054579093432, "grad_norm": 1.1150918006896973, "learning_rate": 4.257061680723634e-06, "loss": 0.7062, "step": 10917 }, { "epoch": 0.5611059718367767, "grad_norm": 1.0044100284576416, "learning_rate": 4.2562386756048855e-06, "loss": 0.6841, "step": 10918 }, { "epoch": 0.5611573645801213, "grad_norm": 1.0380232334136963, "learning_rate": 4.255415691092693e-06, "loss": 0.7216, "step": 10919 }, { "epoch": 0.561208757323466, "grad_norm": 1.0614938735961914, "learning_rate": 4.2545927272098595e-06, "loss": 0.7242, "step": 10920 }, { "epoch": 0.5612601500668105, "grad_norm": 0.7795531153678894, "learning_rate": 4.253769783979186e-06, "loss": 0.6142, "step": 10921 }, { "epoch": 0.5613115428101552, "grad_norm": 1.075221061706543, "learning_rate": 4.252946861423472e-06, "loss": 0.7308, "step": 10922 }, { "epoch": 0.5613629355534998, "grad_norm": 1.0202656984329224, "learning_rate": 4.252123959565517e-06, "loss": 0.6351, "step": 10923 }, { "epoch": 0.5614143282968445, "grad_norm": 1.0964025259017944, "learning_rate": 4.251301078428123e-06, "loss": 0.6909, "step": 10924 }, { "epoch": 0.5614657210401891, "grad_norm": 1.060097098350525, "learning_rate": 4.250478218034084e-06, "loss": 0.75, "step": 10925 }, { "epoch": 0.5615171137835338, "grad_norm": 0.9913344383239746, "learning_rate": 4.249655378406201e-06, "loss": 0.7108, "step": 10926 }, { "epoch": 0.5615685065268784, "grad_norm": 1.0450727939605713, "learning_rate": 4.248832559567273e-06, "loss": 0.7362, "step": 10927 }, { "epoch": 0.561619899270223, "grad_norm": 1.1383724212646484, "learning_rate": 4.248009761540092e-06, "loss": 0.7707, "step": 10928 }, { "epoch": 0.5616712920135677, "grad_norm": 1.1207242012023926, "learning_rate": 4.247186984347458e-06, "loss": 0.7397, "step": 10929 }, { "epoch": 0.5617226847569123, "grad_norm": 1.0426716804504395, "learning_rate": 4.246364228012167e-06, "loss": 0.7112, "step": 10930 }, { "epoch": 0.561774077500257, "grad_norm": 1.0183974504470825, "learning_rate": 4.245541492557012e-06, "loss": 0.704, "step": 10931 }, { "epoch": 0.5618254702436016, "grad_norm": 1.0449295043945312, "learning_rate": 4.244718778004789e-06, "loss": 0.7187, "step": 10932 }, { "epoch": 0.5618768629869463, "grad_norm": 1.1089107990264893, "learning_rate": 4.243896084378294e-06, "loss": 0.734, "step": 10933 }, { "epoch": 0.5619282557302909, "grad_norm": 0.8600550889968872, "learning_rate": 4.243073411700315e-06, "loss": 0.6606, "step": 10934 }, { "epoch": 0.5619796484736356, "grad_norm": 1.0280994176864624, "learning_rate": 4.242250759993649e-06, "loss": 0.6897, "step": 10935 }, { "epoch": 0.5620310412169801, "grad_norm": 1.0299351215362549, "learning_rate": 4.2414281292810875e-06, "loss": 0.677, "step": 10936 }, { "epoch": 0.5620824339603248, "grad_norm": 1.1593406200408936, "learning_rate": 4.240605519585424e-06, "loss": 0.7699, "step": 10937 }, { "epoch": 0.5621338267036694, "grad_norm": 1.1487523317337036, "learning_rate": 4.239782930929446e-06, "loss": 0.7264, "step": 10938 }, { "epoch": 0.562185219447014, "grad_norm": 1.085103988647461, "learning_rate": 4.238960363335946e-06, "loss": 0.7375, "step": 10939 }, { "epoch": 0.5622366121903587, "grad_norm": 1.0652331113815308, "learning_rate": 4.238137816827714e-06, "loss": 0.7077, "step": 10940 }, { "epoch": 0.5622880049337033, "grad_norm": 1.1124215126037598, "learning_rate": 4.237315291427538e-06, "loss": 0.7225, "step": 10941 }, { "epoch": 0.562339397677048, "grad_norm": 1.0701571702957153, "learning_rate": 4.236492787158208e-06, "loss": 0.711, "step": 10942 }, { "epoch": 0.5623907904203926, "grad_norm": 1.0151680707931519, "learning_rate": 4.235670304042514e-06, "loss": 0.7009, "step": 10943 }, { "epoch": 0.5624421831637373, "grad_norm": 1.0536092519760132, "learning_rate": 4.23484784210324e-06, "loss": 0.7426, "step": 10944 }, { "epoch": 0.5624935759070819, "grad_norm": 1.0561531782150269, "learning_rate": 4.2340254013631744e-06, "loss": 0.7033, "step": 10945 }, { "epoch": 0.5625449686504266, "grad_norm": 1.090956687927246, "learning_rate": 4.233202981845106e-06, "loss": 0.7297, "step": 10946 }, { "epoch": 0.5625963613937712, "grad_norm": 1.052664875984192, "learning_rate": 4.232380583571817e-06, "loss": 0.7623, "step": 10947 }, { "epoch": 0.5626477541371159, "grad_norm": 0.7026613354682922, "learning_rate": 4.231558206566093e-06, "loss": 0.6838, "step": 10948 }, { "epoch": 0.5626991468804605, "grad_norm": 1.1899166107177734, "learning_rate": 4.230735850850722e-06, "loss": 0.6964, "step": 10949 }, { "epoch": 0.5627505396238052, "grad_norm": 0.8542056083679199, "learning_rate": 4.229913516448485e-06, "loss": 0.6107, "step": 10950 }, { "epoch": 0.5628019323671497, "grad_norm": 1.102874994277954, "learning_rate": 4.229091203382166e-06, "loss": 0.7877, "step": 10951 }, { "epoch": 0.5628533251104944, "grad_norm": 1.0361963510513306, "learning_rate": 4.22826891167455e-06, "loss": 0.7374, "step": 10952 }, { "epoch": 0.562904717853839, "grad_norm": 1.048791527748108, "learning_rate": 4.227446641348418e-06, "loss": 0.712, "step": 10953 }, { "epoch": 0.5629561105971836, "grad_norm": 1.0534428358078003, "learning_rate": 4.2266243924265496e-06, "loss": 0.6611, "step": 10954 }, { "epoch": 0.5630075033405283, "grad_norm": 1.0068188905715942, "learning_rate": 4.225802164931728e-06, "loss": 0.7647, "step": 10955 }, { "epoch": 0.5630588960838729, "grad_norm": 1.0235264301300049, "learning_rate": 4.224979958886736e-06, "loss": 0.6533, "step": 10956 }, { "epoch": 0.5631102888272176, "grad_norm": 1.0482769012451172, "learning_rate": 4.224157774314348e-06, "loss": 0.7444, "step": 10957 }, { "epoch": 0.5631616815705622, "grad_norm": 1.1097075939178467, "learning_rate": 4.223335611237347e-06, "loss": 0.7266, "step": 10958 }, { "epoch": 0.5632130743139069, "grad_norm": 1.046775221824646, "learning_rate": 4.222513469678512e-06, "loss": 0.7734, "step": 10959 }, { "epoch": 0.5632644670572515, "grad_norm": 0.7375898361206055, "learning_rate": 4.22169134966062e-06, "loss": 0.689, "step": 10960 }, { "epoch": 0.5633158598005962, "grad_norm": 1.0279228687286377, "learning_rate": 4.220869251206448e-06, "loss": 0.7163, "step": 10961 }, { "epoch": 0.5633672525439408, "grad_norm": 1.03515625, "learning_rate": 4.220047174338775e-06, "loss": 0.7376, "step": 10962 }, { "epoch": 0.5634186452872855, "grad_norm": 0.7687429785728455, "learning_rate": 4.2192251190803745e-06, "loss": 0.6572, "step": 10963 }, { "epoch": 0.5634700380306301, "grad_norm": 1.0392944812774658, "learning_rate": 4.218403085454025e-06, "loss": 0.6674, "step": 10964 }, { "epoch": 0.5635214307739748, "grad_norm": 1.0999081134796143, "learning_rate": 4.2175810734825e-06, "loss": 0.7551, "step": 10965 }, { "epoch": 0.5635728235173193, "grad_norm": 0.7833535075187683, "learning_rate": 4.2167590831885766e-06, "loss": 0.6795, "step": 10966 }, { "epoch": 0.563624216260664, "grad_norm": 1.1171650886535645, "learning_rate": 4.215937114595024e-06, "loss": 0.6722, "step": 10967 }, { "epoch": 0.5636756090040086, "grad_norm": 1.1054211854934692, "learning_rate": 4.215115167724619e-06, "loss": 0.6316, "step": 10968 }, { "epoch": 0.5637270017473532, "grad_norm": 1.1324361562728882, "learning_rate": 4.214293242600136e-06, "loss": 0.7447, "step": 10969 }, { "epoch": 0.5637783944906979, "grad_norm": 1.0604054927825928, "learning_rate": 4.213471339244342e-06, "loss": 0.7348, "step": 10970 }, { "epoch": 0.5638297872340425, "grad_norm": 1.2330487966537476, "learning_rate": 4.212649457680012e-06, "loss": 0.6863, "step": 10971 }, { "epoch": 0.5638811799773872, "grad_norm": 1.082266092300415, "learning_rate": 4.211827597929917e-06, "loss": 0.6692, "step": 10972 }, { "epoch": 0.5639325727207318, "grad_norm": 1.0797470808029175, "learning_rate": 4.211005760016826e-06, "loss": 0.6614, "step": 10973 }, { "epoch": 0.5639839654640765, "grad_norm": 1.0578923225402832, "learning_rate": 4.210183943963509e-06, "loss": 0.7394, "step": 10974 }, { "epoch": 0.5640353582074211, "grad_norm": 1.0117113590240479, "learning_rate": 4.209362149792739e-06, "loss": 0.7089, "step": 10975 }, { "epoch": 0.5640867509507658, "grad_norm": 1.05546236038208, "learning_rate": 4.208540377527277e-06, "loss": 0.7337, "step": 10976 }, { "epoch": 0.5641381436941104, "grad_norm": 1.1539944410324097, "learning_rate": 4.207718627189896e-06, "loss": 0.7284, "step": 10977 }, { "epoch": 0.5641895364374551, "grad_norm": 0.7652021050453186, "learning_rate": 4.206896898803364e-06, "loss": 0.6619, "step": 10978 }, { "epoch": 0.5642409291807997, "grad_norm": 1.0343468189239502, "learning_rate": 4.206075192390444e-06, "loss": 0.7405, "step": 10979 }, { "epoch": 0.5642923219241444, "grad_norm": 1.0969241857528687, "learning_rate": 4.2052535079739035e-06, "loss": 0.7341, "step": 10980 }, { "epoch": 0.564343714667489, "grad_norm": 1.0535887479782104, "learning_rate": 4.20443184557651e-06, "loss": 0.7278, "step": 10981 }, { "epoch": 0.5643951074108335, "grad_norm": 1.0153982639312744, "learning_rate": 4.203610205221027e-06, "loss": 0.7031, "step": 10982 }, { "epoch": 0.5644465001541782, "grad_norm": 1.0559056997299194, "learning_rate": 4.202788586930217e-06, "loss": 0.7766, "step": 10983 }, { "epoch": 0.5644978928975228, "grad_norm": 1.0815242528915405, "learning_rate": 4.201966990726847e-06, "loss": 0.6552, "step": 10984 }, { "epoch": 0.5645492856408675, "grad_norm": 1.1948115825653076, "learning_rate": 4.20114541663368e-06, "loss": 0.7069, "step": 10985 }, { "epoch": 0.5646006783842121, "grad_norm": 1.096306324005127, "learning_rate": 4.200323864673474e-06, "loss": 0.7564, "step": 10986 }, { "epoch": 0.5646520711275568, "grad_norm": 1.010321021080017, "learning_rate": 4.1995023348689935e-06, "loss": 0.7105, "step": 10987 }, { "epoch": 0.5647034638709014, "grad_norm": 1.0823198556900024, "learning_rate": 4.198680827243004e-06, "loss": 0.7261, "step": 10988 }, { "epoch": 0.5647548566142461, "grad_norm": 0.9965549111366272, "learning_rate": 4.197859341818257e-06, "loss": 0.6983, "step": 10989 }, { "epoch": 0.5648062493575907, "grad_norm": 1.101845145225525, "learning_rate": 4.197037878617519e-06, "loss": 0.7241, "step": 10990 }, { "epoch": 0.5648576421009354, "grad_norm": 1.0959886312484741, "learning_rate": 4.196216437663549e-06, "loss": 0.7129, "step": 10991 }, { "epoch": 0.56490903484428, "grad_norm": 1.0790483951568604, "learning_rate": 4.195395018979102e-06, "loss": 0.7573, "step": 10992 }, { "epoch": 0.5649604275876247, "grad_norm": 1.087011694908142, "learning_rate": 4.19457362258694e-06, "loss": 0.7537, "step": 10993 }, { "epoch": 0.5650118203309693, "grad_norm": 1.0050640106201172, "learning_rate": 4.1937522485098215e-06, "loss": 0.731, "step": 10994 }, { "epoch": 0.565063213074314, "grad_norm": 0.6807528138160706, "learning_rate": 4.192930896770498e-06, "loss": 0.6855, "step": 10995 }, { "epoch": 0.5651146058176586, "grad_norm": 0.7436098456382751, "learning_rate": 4.192109567391729e-06, "loss": 0.6514, "step": 10996 }, { "epoch": 0.5651659985610031, "grad_norm": 1.0787243843078613, "learning_rate": 4.19128826039627e-06, "loss": 0.7702, "step": 10997 }, { "epoch": 0.5652173913043478, "grad_norm": 1.0713164806365967, "learning_rate": 4.190466975806878e-06, "loss": 0.7768, "step": 10998 }, { "epoch": 0.5652687840476924, "grad_norm": 1.1721802949905396, "learning_rate": 4.189645713646303e-06, "loss": 0.7479, "step": 10999 }, { "epoch": 0.5653201767910371, "grad_norm": 1.0569698810577393, "learning_rate": 4.188824473937301e-06, "loss": 0.7025, "step": 11000 }, { "epoch": 0.5653715695343817, "grad_norm": 1.3192484378814697, "learning_rate": 4.188003256702628e-06, "loss": 0.6671, "step": 11001 }, { "epoch": 0.5654229622777264, "grad_norm": 1.0347923040390015, "learning_rate": 4.187182061965031e-06, "loss": 0.6839, "step": 11002 }, { "epoch": 0.565474355021071, "grad_norm": 1.0944939851760864, "learning_rate": 4.186360889747266e-06, "loss": 0.776, "step": 11003 }, { "epoch": 0.5655257477644157, "grad_norm": 1.1577610969543457, "learning_rate": 4.185539740072083e-06, "loss": 0.7471, "step": 11004 }, { "epoch": 0.5655771405077603, "grad_norm": 1.1276535987854004, "learning_rate": 4.184718612962232e-06, "loss": 0.6993, "step": 11005 }, { "epoch": 0.565628533251105, "grad_norm": 1.052153468132019, "learning_rate": 4.1838975084404645e-06, "loss": 0.7171, "step": 11006 }, { "epoch": 0.5656799259944496, "grad_norm": 1.0479240417480469, "learning_rate": 4.183076426529531e-06, "loss": 0.7375, "step": 11007 }, { "epoch": 0.5657313187377943, "grad_norm": 1.0511529445648193, "learning_rate": 4.182255367252175e-06, "loss": 0.6863, "step": 11008 }, { "epoch": 0.5657827114811389, "grad_norm": 0.7077540159225464, "learning_rate": 4.18143433063115e-06, "loss": 0.6704, "step": 11009 }, { "epoch": 0.5658341042244835, "grad_norm": 1.0110533237457275, "learning_rate": 4.180613316689202e-06, "loss": 0.73, "step": 11010 }, { "epoch": 0.5658854969678282, "grad_norm": 1.1433515548706055, "learning_rate": 4.179792325449077e-06, "loss": 0.6777, "step": 11011 }, { "epoch": 0.5659368897111727, "grad_norm": 1.0699175596237183, "learning_rate": 4.178971356933521e-06, "loss": 0.7538, "step": 11012 }, { "epoch": 0.5659882824545174, "grad_norm": 1.1669446229934692, "learning_rate": 4.1781504111652806e-06, "loss": 0.7066, "step": 11013 }, { "epoch": 0.566039675197862, "grad_norm": 1.0704914331436157, "learning_rate": 4.177329488167102e-06, "loss": 0.7082, "step": 11014 }, { "epoch": 0.5660910679412067, "grad_norm": 1.0695264339447021, "learning_rate": 4.176508587961728e-06, "loss": 0.7001, "step": 11015 }, { "epoch": 0.5661424606845513, "grad_norm": 1.090102195739746, "learning_rate": 4.175687710571902e-06, "loss": 0.6625, "step": 11016 }, { "epoch": 0.566193853427896, "grad_norm": 1.1025681495666504, "learning_rate": 4.174866856020369e-06, "loss": 0.7657, "step": 11017 }, { "epoch": 0.5662452461712406, "grad_norm": 1.0325428247451782, "learning_rate": 4.174046024329869e-06, "loss": 0.7016, "step": 11018 }, { "epoch": 0.5662966389145853, "grad_norm": 1.0691859722137451, "learning_rate": 4.173225215523146e-06, "loss": 0.6608, "step": 11019 }, { "epoch": 0.5663480316579299, "grad_norm": 1.178205966949463, "learning_rate": 4.172404429622941e-06, "loss": 0.7005, "step": 11020 }, { "epoch": 0.5663994244012746, "grad_norm": 1.0893807411193848, "learning_rate": 4.1715836666519924e-06, "loss": 0.6973, "step": 11021 }, { "epoch": 0.5664508171446192, "grad_norm": 1.113642930984497, "learning_rate": 4.1707629266330414e-06, "loss": 0.7835, "step": 11022 }, { "epoch": 0.5665022098879638, "grad_norm": 1.0823112726211548, "learning_rate": 4.16994220958883e-06, "loss": 0.7172, "step": 11023 }, { "epoch": 0.5665536026313085, "grad_norm": 1.1308764219284058, "learning_rate": 4.1691215155420924e-06, "loss": 0.7029, "step": 11024 }, { "epoch": 0.5666049953746531, "grad_norm": 0.8499131202697754, "learning_rate": 4.168300844515569e-06, "loss": 0.6848, "step": 11025 }, { "epoch": 0.5666563881179978, "grad_norm": 1.0882580280303955, "learning_rate": 4.167480196531999e-06, "loss": 0.7179, "step": 11026 }, { "epoch": 0.5667077808613423, "grad_norm": 1.0615354776382446, "learning_rate": 4.166659571614115e-06, "loss": 0.7024, "step": 11027 }, { "epoch": 0.566759173604687, "grad_norm": 1.1401622295379639, "learning_rate": 4.165838969784656e-06, "loss": 0.8025, "step": 11028 }, { "epoch": 0.5668105663480316, "grad_norm": 1.0660475492477417, "learning_rate": 4.165018391066355e-06, "loss": 0.699, "step": 11029 }, { "epoch": 0.5668619590913763, "grad_norm": 1.002280831336975, "learning_rate": 4.164197835481952e-06, "loss": 0.7021, "step": 11030 }, { "epoch": 0.5669133518347209, "grad_norm": 0.7960710525512695, "learning_rate": 4.1633773030541756e-06, "loss": 0.6526, "step": 11031 }, { "epoch": 0.5669647445780656, "grad_norm": 0.967799723148346, "learning_rate": 4.162556793805762e-06, "loss": 0.7273, "step": 11032 }, { "epoch": 0.5670161373214102, "grad_norm": 1.10124671459198, "learning_rate": 4.161736307759444e-06, "loss": 0.7687, "step": 11033 }, { "epoch": 0.5670675300647549, "grad_norm": 0.7569063901901245, "learning_rate": 4.160915844937953e-06, "loss": 0.6544, "step": 11034 }, { "epoch": 0.5671189228080995, "grad_norm": 1.1301634311676025, "learning_rate": 4.160095405364021e-06, "loss": 0.7566, "step": 11035 }, { "epoch": 0.5671703155514441, "grad_norm": 1.0733606815338135, "learning_rate": 4.159274989060382e-06, "loss": 0.7214, "step": 11036 }, { "epoch": 0.5672217082947888, "grad_norm": 0.9614720344543457, "learning_rate": 4.158454596049761e-06, "loss": 0.6712, "step": 11037 }, { "epoch": 0.5672731010381334, "grad_norm": 0.7964835166931152, "learning_rate": 4.15763422635489e-06, "loss": 0.7113, "step": 11038 }, { "epoch": 0.5673244937814781, "grad_norm": 1.1233831644058228, "learning_rate": 4.156813879998501e-06, "loss": 0.7044, "step": 11039 }, { "epoch": 0.5673758865248227, "grad_norm": 1.1169955730438232, "learning_rate": 4.155993557003317e-06, "loss": 0.6653, "step": 11040 }, { "epoch": 0.5674272792681674, "grad_norm": 1.0280835628509521, "learning_rate": 4.155173257392069e-06, "loss": 0.7314, "step": 11041 }, { "epoch": 0.5674786720115119, "grad_norm": 0.708518385887146, "learning_rate": 4.154352981187485e-06, "loss": 0.6711, "step": 11042 }, { "epoch": 0.5675300647548566, "grad_norm": 1.1703786849975586, "learning_rate": 4.1535327284122885e-06, "loss": 0.7227, "step": 11043 }, { "epoch": 0.5675814574982012, "grad_norm": 1.1340153217315674, "learning_rate": 4.152712499089207e-06, "loss": 0.6961, "step": 11044 }, { "epoch": 0.5676328502415459, "grad_norm": 1.0886057615280151, "learning_rate": 4.1518922932409655e-06, "loss": 0.7653, "step": 11045 }, { "epoch": 0.5676842429848905, "grad_norm": 1.1166150569915771, "learning_rate": 4.1510721108902916e-06, "loss": 0.7194, "step": 11046 }, { "epoch": 0.5677356357282352, "grad_norm": 1.125437617301941, "learning_rate": 4.150251952059903e-06, "loss": 0.7652, "step": 11047 }, { "epoch": 0.5677870284715798, "grad_norm": 1.1061116456985474, "learning_rate": 4.149431816772526e-06, "loss": 0.7274, "step": 11048 }, { "epoch": 0.5678384212149244, "grad_norm": 1.0566638708114624, "learning_rate": 4.148611705050886e-06, "loss": 0.705, "step": 11049 }, { "epoch": 0.5678898139582691, "grad_norm": 1.1856369972229004, "learning_rate": 4.1477916169177005e-06, "loss": 0.6572, "step": 11050 }, { "epoch": 0.5679412067016137, "grad_norm": 1.1192147731781006, "learning_rate": 4.146971552395692e-06, "loss": 0.6844, "step": 11051 }, { "epoch": 0.5679925994449584, "grad_norm": 0.9797646403312683, "learning_rate": 4.146151511507582e-06, "loss": 0.6754, "step": 11052 }, { "epoch": 0.568043992188303, "grad_norm": 1.043342113494873, "learning_rate": 4.14533149427609e-06, "loss": 0.7951, "step": 11053 }, { "epoch": 0.5680953849316477, "grad_norm": 1.0352638959884644, "learning_rate": 4.144511500723936e-06, "loss": 0.7125, "step": 11054 }, { "epoch": 0.5681467776749923, "grad_norm": 1.108858346939087, "learning_rate": 4.143691530873837e-06, "loss": 0.7257, "step": 11055 }, { "epoch": 0.568198170418337, "grad_norm": 1.123024344444275, "learning_rate": 4.142871584748511e-06, "loss": 0.7509, "step": 11056 }, { "epoch": 0.5682495631616816, "grad_norm": 1.127915859222412, "learning_rate": 4.142051662370678e-06, "loss": 0.7346, "step": 11057 }, { "epoch": 0.5683009559050262, "grad_norm": 1.0987194776535034, "learning_rate": 4.1412317637630526e-06, "loss": 0.7135, "step": 11058 }, { "epoch": 0.5683523486483708, "grad_norm": 1.120337963104248, "learning_rate": 4.140411888948351e-06, "loss": 0.6603, "step": 11059 }, { "epoch": 0.5684037413917155, "grad_norm": 0.7597155570983887, "learning_rate": 4.139592037949287e-06, "loss": 0.6982, "step": 11060 }, { "epoch": 0.5684551341350601, "grad_norm": 1.1014034748077393, "learning_rate": 4.138772210788578e-06, "loss": 0.6997, "step": 11061 }, { "epoch": 0.5685065268784047, "grad_norm": 1.0854308605194092, "learning_rate": 4.137952407488939e-06, "loss": 0.7421, "step": 11062 }, { "epoch": 0.5685579196217494, "grad_norm": 1.0766063928604126, "learning_rate": 4.1371326280730786e-06, "loss": 0.6922, "step": 11063 }, { "epoch": 0.568609312365094, "grad_norm": 1.138703465461731, "learning_rate": 4.136312872563713e-06, "loss": 0.7609, "step": 11064 }, { "epoch": 0.5686607051084387, "grad_norm": 1.2177025079727173, "learning_rate": 4.135493140983554e-06, "loss": 0.7316, "step": 11065 }, { "epoch": 0.5687120978517833, "grad_norm": 1.0706874132156372, "learning_rate": 4.134673433355312e-06, "loss": 0.7754, "step": 11066 }, { "epoch": 0.568763490595128, "grad_norm": 1.150875449180603, "learning_rate": 4.1338537497016975e-06, "loss": 0.6622, "step": 11067 }, { "epoch": 0.5688148833384726, "grad_norm": 1.1102443933486938, "learning_rate": 4.133034090045424e-06, "loss": 0.7811, "step": 11068 }, { "epoch": 0.5688662760818173, "grad_norm": 1.1013644933700562, "learning_rate": 4.132214454409195e-06, "loss": 0.7081, "step": 11069 }, { "epoch": 0.5689176688251619, "grad_norm": 1.1496880054473877, "learning_rate": 4.131394842815723e-06, "loss": 0.77, "step": 11070 }, { "epoch": 0.5689690615685066, "grad_norm": 1.1390855312347412, "learning_rate": 4.130575255287718e-06, "loss": 0.7419, "step": 11071 }, { "epoch": 0.5690204543118512, "grad_norm": 1.0418493747711182, "learning_rate": 4.129755691847882e-06, "loss": 0.6541, "step": 11072 }, { "epoch": 0.5690718470551958, "grad_norm": 1.11246919631958, "learning_rate": 4.128936152518924e-06, "loss": 0.6826, "step": 11073 }, { "epoch": 0.5691232397985404, "grad_norm": 0.7747859358787537, "learning_rate": 4.128116637323552e-06, "loss": 0.641, "step": 11074 }, { "epoch": 0.569174632541885, "grad_norm": 1.0382137298583984, "learning_rate": 4.127297146284469e-06, "loss": 0.6783, "step": 11075 }, { "epoch": 0.5692260252852297, "grad_norm": 0.6698956489562988, "learning_rate": 4.126477679424381e-06, "loss": 0.6544, "step": 11076 }, { "epoch": 0.5692774180285743, "grad_norm": 1.09820556640625, "learning_rate": 4.125658236765991e-06, "loss": 0.7094, "step": 11077 }, { "epoch": 0.569328810771919, "grad_norm": 1.089430570602417, "learning_rate": 4.124838818332006e-06, "loss": 0.6953, "step": 11078 }, { "epoch": 0.5693802035152636, "grad_norm": 1.0253576040267944, "learning_rate": 4.124019424145123e-06, "loss": 0.7345, "step": 11079 }, { "epoch": 0.5694315962586083, "grad_norm": 1.0222501754760742, "learning_rate": 4.123200054228047e-06, "loss": 0.6694, "step": 11080 }, { "epoch": 0.5694829890019529, "grad_norm": 1.0777531862258911, "learning_rate": 4.122380708603481e-06, "loss": 0.7092, "step": 11081 }, { "epoch": 0.5695343817452976, "grad_norm": 1.0338901281356812, "learning_rate": 4.121561387294122e-06, "loss": 0.6733, "step": 11082 }, { "epoch": 0.5695857744886422, "grad_norm": 1.091964840888977, "learning_rate": 4.120742090322672e-06, "loss": 0.7169, "step": 11083 }, { "epoch": 0.5696371672319869, "grad_norm": 1.0256109237670898, "learning_rate": 4.1199228177118315e-06, "loss": 0.6553, "step": 11084 }, { "epoch": 0.5696885599753315, "grad_norm": 1.0932872295379639, "learning_rate": 4.119103569484296e-06, "loss": 0.7236, "step": 11085 }, { "epoch": 0.5697399527186762, "grad_norm": 1.0545307397842407, "learning_rate": 4.1182843456627655e-06, "loss": 0.7133, "step": 11086 }, { "epoch": 0.5697913454620208, "grad_norm": 1.0992408990859985, "learning_rate": 4.11746514626994e-06, "loss": 0.6983, "step": 11087 }, { "epoch": 0.5698427382053654, "grad_norm": 0.7405276298522949, "learning_rate": 4.11664597132851e-06, "loss": 0.6516, "step": 11088 }, { "epoch": 0.56989413094871, "grad_norm": 1.0907763242721558, "learning_rate": 4.115826820861177e-06, "loss": 0.6907, "step": 11089 }, { "epoch": 0.5699455236920546, "grad_norm": 0.798585057258606, "learning_rate": 4.115007694890631e-06, "loss": 0.6318, "step": 11090 }, { "epoch": 0.5699969164353993, "grad_norm": 1.4931803941726685, "learning_rate": 4.114188593439574e-06, "loss": 0.7302, "step": 11091 }, { "epoch": 0.5700483091787439, "grad_norm": 0.7386412620544434, "learning_rate": 4.113369516530693e-06, "loss": 0.6402, "step": 11092 }, { "epoch": 0.5700997019220886, "grad_norm": 1.1752276420593262, "learning_rate": 4.112550464186683e-06, "loss": 0.8014, "step": 11093 }, { "epoch": 0.5701510946654332, "grad_norm": 1.0570437908172607, "learning_rate": 4.111731436430239e-06, "loss": 0.738, "step": 11094 }, { "epoch": 0.5702024874087779, "grad_norm": 1.031988501548767, "learning_rate": 4.11091243328405e-06, "loss": 0.7353, "step": 11095 }, { "epoch": 0.5702538801521225, "grad_norm": 1.2070701122283936, "learning_rate": 4.1100934547708085e-06, "loss": 0.7533, "step": 11096 }, { "epoch": 0.5703052728954672, "grad_norm": 1.1088544130325317, "learning_rate": 4.1092745009132066e-06, "loss": 0.7167, "step": 11097 }, { "epoch": 0.5703566656388118, "grad_norm": 0.655411958694458, "learning_rate": 4.108455571733931e-06, "loss": 0.641, "step": 11098 }, { "epoch": 0.5704080583821565, "grad_norm": 1.1411075592041016, "learning_rate": 4.107636667255671e-06, "loss": 0.7698, "step": 11099 }, { "epoch": 0.5704594511255011, "grad_norm": 1.0562725067138672, "learning_rate": 4.10681778750112e-06, "loss": 0.7221, "step": 11100 }, { "epoch": 0.5705108438688458, "grad_norm": 1.0551602840423584, "learning_rate": 4.10599893249296e-06, "loss": 0.7187, "step": 11101 }, { "epoch": 0.5705622366121904, "grad_norm": 1.1278977394104004, "learning_rate": 4.1051801022538805e-06, "loss": 0.7081, "step": 11102 }, { "epoch": 0.570613629355535, "grad_norm": 1.0314875841140747, "learning_rate": 4.104361296806568e-06, "loss": 0.7068, "step": 11103 }, { "epoch": 0.5706650220988796, "grad_norm": 1.1056450605392456, "learning_rate": 4.103542516173706e-06, "loss": 0.7586, "step": 11104 }, { "epoch": 0.5707164148422242, "grad_norm": 1.0216871500015259, "learning_rate": 4.102723760377984e-06, "loss": 0.6757, "step": 11105 }, { "epoch": 0.5707678075855689, "grad_norm": 1.1434584856033325, "learning_rate": 4.101905029442082e-06, "loss": 0.7134, "step": 11106 }, { "epoch": 0.5708192003289135, "grad_norm": 1.0356143712997437, "learning_rate": 4.1010863233886865e-06, "loss": 0.6647, "step": 11107 }, { "epoch": 0.5708705930722582, "grad_norm": 1.0801175832748413, "learning_rate": 4.100267642240478e-06, "loss": 0.6958, "step": 11108 }, { "epoch": 0.5709219858156028, "grad_norm": 0.7265595197677612, "learning_rate": 4.0994489860201404e-06, "loss": 0.6429, "step": 11109 }, { "epoch": 0.5709733785589475, "grad_norm": 0.6772834062576294, "learning_rate": 4.098630354750358e-06, "loss": 0.6284, "step": 11110 }, { "epoch": 0.5710247713022921, "grad_norm": 1.0246405601501465, "learning_rate": 4.097811748453806e-06, "loss": 0.6914, "step": 11111 }, { "epoch": 0.5710761640456368, "grad_norm": 0.7394922971725464, "learning_rate": 4.0969931671531675e-06, "loss": 0.6366, "step": 11112 }, { "epoch": 0.5711275567889814, "grad_norm": 0.7672628164291382, "learning_rate": 4.096174610871123e-06, "loss": 0.6739, "step": 11113 }, { "epoch": 0.5711789495323261, "grad_norm": 0.7954023480415344, "learning_rate": 4.0953560796303494e-06, "loss": 0.6551, "step": 11114 }, { "epoch": 0.5712303422756707, "grad_norm": 1.0739543437957764, "learning_rate": 4.0945375734535245e-06, "loss": 0.724, "step": 11115 }, { "epoch": 0.5712817350190154, "grad_norm": 0.7781845331192017, "learning_rate": 4.0937190923633285e-06, "loss": 0.6706, "step": 11116 }, { "epoch": 0.57133312776236, "grad_norm": 1.1202223300933838, "learning_rate": 4.092900636382435e-06, "loss": 0.7076, "step": 11117 }, { "epoch": 0.5713845205057045, "grad_norm": 1.1085606813430786, "learning_rate": 4.092082205533522e-06, "loss": 0.7123, "step": 11118 }, { "epoch": 0.5714359132490492, "grad_norm": 1.130695104598999, "learning_rate": 4.091263799839266e-06, "loss": 0.6728, "step": 11119 }, { "epoch": 0.5714873059923938, "grad_norm": 1.096818208694458, "learning_rate": 4.090445419322338e-06, "loss": 0.7559, "step": 11120 }, { "epoch": 0.5715386987357385, "grad_norm": 1.071595549583435, "learning_rate": 4.089627064005414e-06, "loss": 0.7239, "step": 11121 }, { "epoch": 0.5715900914790831, "grad_norm": 1.0662117004394531, "learning_rate": 4.088808733911167e-06, "loss": 0.7038, "step": 11122 }, { "epoch": 0.5716414842224278, "grad_norm": 0.8210490942001343, "learning_rate": 4.087990429062272e-06, "loss": 0.6503, "step": 11123 }, { "epoch": 0.5716928769657724, "grad_norm": 1.068395972251892, "learning_rate": 4.087172149481397e-06, "loss": 0.6696, "step": 11124 }, { "epoch": 0.5717442697091171, "grad_norm": 1.0581468343734741, "learning_rate": 4.086353895191213e-06, "loss": 0.6948, "step": 11125 }, { "epoch": 0.5717956624524617, "grad_norm": 1.0160549879074097, "learning_rate": 4.0855356662143956e-06, "loss": 0.6927, "step": 11126 }, { "epoch": 0.5718470551958064, "grad_norm": 1.1025118827819824, "learning_rate": 4.084717462573608e-06, "loss": 0.7389, "step": 11127 }, { "epoch": 0.571898447939151, "grad_norm": 1.1304795742034912, "learning_rate": 4.083899284291523e-06, "loss": 0.7732, "step": 11128 }, { "epoch": 0.5719498406824957, "grad_norm": 1.076361060142517, "learning_rate": 4.083081131390811e-06, "loss": 0.7042, "step": 11129 }, { "epoch": 0.5720012334258403, "grad_norm": 1.0693769454956055, "learning_rate": 4.082263003894134e-06, "loss": 0.765, "step": 11130 }, { "epoch": 0.572052626169185, "grad_norm": 1.0774987936019897, "learning_rate": 4.081444901824163e-06, "loss": 0.7234, "step": 11131 }, { "epoch": 0.5721040189125296, "grad_norm": 1.0646545886993408, "learning_rate": 4.080626825203564e-06, "loss": 0.724, "step": 11132 }, { "epoch": 0.5721554116558741, "grad_norm": 1.1105787754058838, "learning_rate": 4.0798087740550006e-06, "loss": 0.6962, "step": 11133 }, { "epoch": 0.5722068043992188, "grad_norm": 1.0818690061569214, "learning_rate": 4.078990748401138e-06, "loss": 0.7445, "step": 11134 }, { "epoch": 0.5722581971425634, "grad_norm": 1.064070701599121, "learning_rate": 4.078172748264642e-06, "loss": 0.7317, "step": 11135 }, { "epoch": 0.5723095898859081, "grad_norm": 1.026812195777893, "learning_rate": 4.077354773668174e-06, "loss": 0.6828, "step": 11136 }, { "epoch": 0.5723609826292527, "grad_norm": 1.7307419776916504, "learning_rate": 4.076536824634398e-06, "loss": 0.6693, "step": 11137 }, { "epoch": 0.5724123753725974, "grad_norm": 1.0935274362564087, "learning_rate": 4.075718901185976e-06, "loss": 0.7039, "step": 11138 }, { "epoch": 0.572463768115942, "grad_norm": 0.7822026014328003, "learning_rate": 4.07490100334557e-06, "loss": 0.67, "step": 11139 }, { "epoch": 0.5725151608592867, "grad_norm": 0.9213756918907166, "learning_rate": 4.074083131135838e-06, "loss": 0.6692, "step": 11140 }, { "epoch": 0.5725665536026313, "grad_norm": 1.1129777431488037, "learning_rate": 4.073265284579441e-06, "loss": 0.7597, "step": 11141 }, { "epoch": 0.572617946345976, "grad_norm": 0.7165144681930542, "learning_rate": 4.072447463699042e-06, "loss": 0.6394, "step": 11142 }, { "epoch": 0.5726693390893206, "grad_norm": 1.121860384941101, "learning_rate": 4.071629668517292e-06, "loss": 0.6887, "step": 11143 }, { "epoch": 0.5727207318326653, "grad_norm": 1.1139198541641235, "learning_rate": 4.070811899056854e-06, "loss": 0.7311, "step": 11144 }, { "epoch": 0.5727721245760099, "grad_norm": 1.0638325214385986, "learning_rate": 4.0699941553403845e-06, "loss": 0.7539, "step": 11145 }, { "epoch": 0.5728235173193545, "grad_norm": 1.109394907951355, "learning_rate": 4.069176437390538e-06, "loss": 0.7114, "step": 11146 }, { "epoch": 0.5728749100626992, "grad_norm": 1.0663273334503174, "learning_rate": 4.068358745229971e-06, "loss": 0.733, "step": 11147 }, { "epoch": 0.5729263028060438, "grad_norm": 0.9138264060020447, "learning_rate": 4.067541078881341e-06, "loss": 0.6559, "step": 11148 }, { "epoch": 0.5729776955493884, "grad_norm": 1.0690398216247559, "learning_rate": 4.066723438367297e-06, "loss": 0.7053, "step": 11149 }, { "epoch": 0.573029088292733, "grad_norm": 1.0747218132019043, "learning_rate": 4.0659058237104954e-06, "loss": 0.7706, "step": 11150 }, { "epoch": 0.5730804810360777, "grad_norm": 1.125722885131836, "learning_rate": 4.065088234933591e-06, "loss": 0.7398, "step": 11151 }, { "epoch": 0.5731318737794223, "grad_norm": 1.0590214729309082, "learning_rate": 4.064270672059231e-06, "loss": 0.7327, "step": 11152 }, { "epoch": 0.573183266522767, "grad_norm": 0.7506423592567444, "learning_rate": 4.0634531351100695e-06, "loss": 0.6388, "step": 11153 }, { "epoch": 0.5732346592661116, "grad_norm": 1.1002992391586304, "learning_rate": 4.062635624108756e-06, "loss": 0.7147, "step": 11154 }, { "epoch": 0.5732860520094563, "grad_norm": 1.1006497144699097, "learning_rate": 4.061818139077944e-06, "loss": 0.7417, "step": 11155 }, { "epoch": 0.5733374447528009, "grad_norm": 0.7600101828575134, "learning_rate": 4.061000680040278e-06, "loss": 0.6612, "step": 11156 }, { "epoch": 0.5733888374961456, "grad_norm": 0.9178047776222229, "learning_rate": 4.0601832470184065e-06, "loss": 0.6719, "step": 11157 }, { "epoch": 0.5734402302394902, "grad_norm": 1.0835356712341309, "learning_rate": 4.059365840034981e-06, "loss": 0.7696, "step": 11158 }, { "epoch": 0.5734916229828348, "grad_norm": 1.0631808042526245, "learning_rate": 4.058548459112645e-06, "loss": 0.6997, "step": 11159 }, { "epoch": 0.5735430157261795, "grad_norm": 0.7505224347114563, "learning_rate": 4.057731104274046e-06, "loss": 0.6873, "step": 11160 }, { "epoch": 0.5735944084695241, "grad_norm": 1.0952993631362915, "learning_rate": 4.056913775541832e-06, "loss": 0.7137, "step": 11161 }, { "epoch": 0.5736458012128688, "grad_norm": 1.0359461307525635, "learning_rate": 4.056096472938643e-06, "loss": 0.6479, "step": 11162 }, { "epoch": 0.5736971939562134, "grad_norm": 1.0359959602355957, "learning_rate": 4.0552791964871256e-06, "loss": 0.75, "step": 11163 }, { "epoch": 0.573748586699558, "grad_norm": 1.011143684387207, "learning_rate": 4.054461946209926e-06, "loss": 0.6856, "step": 11164 }, { "epoch": 0.5737999794429026, "grad_norm": 1.0200068950653076, "learning_rate": 4.053644722129681e-06, "loss": 0.6883, "step": 11165 }, { "epoch": 0.5738513721862473, "grad_norm": 1.0730551481246948, "learning_rate": 4.052827524269034e-06, "loss": 0.7292, "step": 11166 }, { "epoch": 0.5739027649295919, "grad_norm": 1.065383791923523, "learning_rate": 4.05201035265063e-06, "loss": 0.7855, "step": 11167 }, { "epoch": 0.5739541576729366, "grad_norm": 1.1123597621917725, "learning_rate": 4.051193207297106e-06, "loss": 0.8085, "step": 11168 }, { "epoch": 0.5740055504162812, "grad_norm": 1.0475029945373535, "learning_rate": 4.050376088231103e-06, "loss": 0.7276, "step": 11169 }, { "epoch": 0.5740569431596259, "grad_norm": 1.0452054738998413, "learning_rate": 4.049558995475259e-06, "loss": 0.7432, "step": 11170 }, { "epoch": 0.5741083359029705, "grad_norm": 1.0660079717636108, "learning_rate": 4.048741929052215e-06, "loss": 0.724, "step": 11171 }, { "epoch": 0.5741597286463151, "grad_norm": 1.050893783569336, "learning_rate": 4.047924888984605e-06, "loss": 0.7273, "step": 11172 }, { "epoch": 0.5742111213896598, "grad_norm": 1.1354444026947021, "learning_rate": 4.047107875295067e-06, "loss": 0.7051, "step": 11173 }, { "epoch": 0.5742625141330044, "grad_norm": 0.8164092898368835, "learning_rate": 4.04629088800624e-06, "loss": 0.6564, "step": 11174 }, { "epoch": 0.5743139068763491, "grad_norm": 0.7610651850700378, "learning_rate": 4.045473927140754e-06, "loss": 0.6924, "step": 11175 }, { "epoch": 0.5743652996196937, "grad_norm": 1.0491775274276733, "learning_rate": 4.044656992721246e-06, "loss": 0.705, "step": 11176 }, { "epoch": 0.5744166923630384, "grad_norm": 1.1353886127471924, "learning_rate": 4.043840084770353e-06, "loss": 0.7558, "step": 11177 }, { "epoch": 0.574468085106383, "grad_norm": 1.1617685556411743, "learning_rate": 4.043023203310703e-06, "loss": 0.7115, "step": 11178 }, { "epoch": 0.5745194778497276, "grad_norm": 1.1784075498580933, "learning_rate": 4.042206348364931e-06, "loss": 0.6914, "step": 11179 }, { "epoch": 0.5745708705930722, "grad_norm": 1.2941726446151733, "learning_rate": 4.0413895199556714e-06, "loss": 0.8257, "step": 11180 }, { "epoch": 0.5746222633364169, "grad_norm": 1.01173734664917, "learning_rate": 4.040572718105549e-06, "loss": 0.6937, "step": 11181 }, { "epoch": 0.5746736560797615, "grad_norm": 1.060337781906128, "learning_rate": 4.039755942837198e-06, "loss": 0.7409, "step": 11182 }, { "epoch": 0.5747250488231062, "grad_norm": 1.071387529373169, "learning_rate": 4.038939194173249e-06, "loss": 0.7677, "step": 11183 }, { "epoch": 0.5747764415664508, "grad_norm": 1.0330414772033691, "learning_rate": 4.038122472136327e-06, "loss": 0.6949, "step": 11184 }, { "epoch": 0.5748278343097954, "grad_norm": 1.0479745864868164, "learning_rate": 4.037305776749062e-06, "loss": 0.7026, "step": 11185 }, { "epoch": 0.5748792270531401, "grad_norm": 0.7281084656715393, "learning_rate": 4.0364891080340805e-06, "loss": 0.6631, "step": 11186 }, { "epoch": 0.5749306197964847, "grad_norm": 1.0737628936767578, "learning_rate": 4.035672466014011e-06, "loss": 0.688, "step": 11187 }, { "epoch": 0.5749820125398294, "grad_norm": 1.040325403213501, "learning_rate": 4.034855850711476e-06, "loss": 0.6704, "step": 11188 }, { "epoch": 0.575033405283174, "grad_norm": 1.0865850448608398, "learning_rate": 4.034039262149104e-06, "loss": 0.7103, "step": 11189 }, { "epoch": 0.5750847980265187, "grad_norm": 0.7549672722816467, "learning_rate": 4.033222700349519e-06, "loss": 0.6513, "step": 11190 }, { "epoch": 0.5751361907698633, "grad_norm": 1.0655280351638794, "learning_rate": 4.032406165335342e-06, "loss": 0.7231, "step": 11191 }, { "epoch": 0.575187583513208, "grad_norm": 1.0084712505340576, "learning_rate": 4.031589657129196e-06, "loss": 0.7265, "step": 11192 }, { "epoch": 0.5752389762565526, "grad_norm": 1.0789988040924072, "learning_rate": 4.030773175753707e-06, "loss": 0.7168, "step": 11193 }, { "epoch": 0.5752903689998972, "grad_norm": 1.0806828737258911, "learning_rate": 4.029956721231492e-06, "loss": 0.7114, "step": 11194 }, { "epoch": 0.5753417617432418, "grad_norm": 1.0372344255447388, "learning_rate": 4.029140293585172e-06, "loss": 0.6991, "step": 11195 }, { "epoch": 0.5753931544865865, "grad_norm": 1.200129747390747, "learning_rate": 4.028323892837369e-06, "loss": 0.6665, "step": 11196 }, { "epoch": 0.5754445472299311, "grad_norm": 1.0972546339035034, "learning_rate": 4.0275075190107e-06, "loss": 0.7224, "step": 11197 }, { "epoch": 0.5754959399732757, "grad_norm": 0.730038046836853, "learning_rate": 4.026691172127786e-06, "loss": 0.5976, "step": 11198 }, { "epoch": 0.5755473327166204, "grad_norm": 1.0375398397445679, "learning_rate": 4.025874852211241e-06, "loss": 0.6598, "step": 11199 }, { "epoch": 0.575598725459965, "grad_norm": 1.113016963005066, "learning_rate": 4.025058559283687e-06, "loss": 0.7246, "step": 11200 }, { "epoch": 0.5756501182033097, "grad_norm": 1.0992497205734253, "learning_rate": 4.024242293367735e-06, "loss": 0.71, "step": 11201 }, { "epoch": 0.5757015109466543, "grad_norm": 1.1115483045578003, "learning_rate": 4.023426054486002e-06, "loss": 0.8117, "step": 11202 }, { "epoch": 0.575752903689999, "grad_norm": 1.040340542793274, "learning_rate": 4.022609842661105e-06, "loss": 0.7448, "step": 11203 }, { "epoch": 0.5758042964333436, "grad_norm": 0.8044306039810181, "learning_rate": 4.021793657915654e-06, "loss": 0.6441, "step": 11204 }, { "epoch": 0.5758556891766883, "grad_norm": 1.0453929901123047, "learning_rate": 4.020977500272264e-06, "loss": 0.7334, "step": 11205 }, { "epoch": 0.5759070819200329, "grad_norm": 1.1692211627960205, "learning_rate": 4.020161369753548e-06, "loss": 0.7911, "step": 11206 }, { "epoch": 0.5759584746633776, "grad_norm": 1.1164125204086304, "learning_rate": 4.0193452663821155e-06, "loss": 0.7096, "step": 11207 }, { "epoch": 0.5760098674067222, "grad_norm": 1.1373391151428223, "learning_rate": 4.01852919018058e-06, "loss": 0.7258, "step": 11208 }, { "epoch": 0.5760612601500668, "grad_norm": 1.2386666536331177, "learning_rate": 4.017713141171551e-06, "loss": 0.6911, "step": 11209 }, { "epoch": 0.5761126528934114, "grad_norm": 1.1416493654251099, "learning_rate": 4.016897119377635e-06, "loss": 0.7764, "step": 11210 }, { "epoch": 0.576164045636756, "grad_norm": 0.7335790395736694, "learning_rate": 4.016081124821443e-06, "loss": 0.7018, "step": 11211 }, { "epoch": 0.5762154383801007, "grad_norm": 0.7119911909103394, "learning_rate": 4.0152651575255865e-06, "loss": 0.6922, "step": 11212 }, { "epoch": 0.5762668311234453, "grad_norm": 1.0537574291229248, "learning_rate": 4.014449217512665e-06, "loss": 0.7002, "step": 11213 }, { "epoch": 0.57631822386679, "grad_norm": 1.0979411602020264, "learning_rate": 4.013633304805289e-06, "loss": 0.7471, "step": 11214 }, { "epoch": 0.5763696166101346, "grad_norm": 1.0876742601394653, "learning_rate": 4.012817419426063e-06, "loss": 0.6873, "step": 11215 }, { "epoch": 0.5764210093534793, "grad_norm": 1.0729069709777832, "learning_rate": 4.0120015613975955e-06, "loss": 0.7417, "step": 11216 }, { "epoch": 0.5764724020968239, "grad_norm": 1.066092610359192, "learning_rate": 4.011185730742485e-06, "loss": 0.7239, "step": 11217 }, { "epoch": 0.5765237948401686, "grad_norm": 1.0434346199035645, "learning_rate": 4.010369927483338e-06, "loss": 0.7042, "step": 11218 }, { "epoch": 0.5765751875835132, "grad_norm": 1.0563926696777344, "learning_rate": 4.0095541516427565e-06, "loss": 0.6929, "step": 11219 }, { "epoch": 0.5766265803268579, "grad_norm": 0.6743776798248291, "learning_rate": 4.008738403243341e-06, "loss": 0.6228, "step": 11220 }, { "epoch": 0.5766779730702025, "grad_norm": 1.0235823392868042, "learning_rate": 4.007922682307693e-06, "loss": 0.7155, "step": 11221 }, { "epoch": 0.5767293658135472, "grad_norm": 1.1041605472564697, "learning_rate": 4.007106988858417e-06, "loss": 0.7318, "step": 11222 }, { "epoch": 0.5767807585568918, "grad_norm": 0.7637249827384949, "learning_rate": 4.006291322918106e-06, "loss": 0.6186, "step": 11223 }, { "epoch": 0.5768321513002365, "grad_norm": 0.9446680545806885, "learning_rate": 4.005475684509362e-06, "loss": 0.6688, "step": 11224 }, { "epoch": 0.576883544043581, "grad_norm": 1.0663249492645264, "learning_rate": 4.004660073654785e-06, "loss": 0.7168, "step": 11225 }, { "epoch": 0.5769349367869256, "grad_norm": 1.0669889450073242, "learning_rate": 4.003844490376967e-06, "loss": 0.7059, "step": 11226 }, { "epoch": 0.5769863295302703, "grad_norm": 1.1404662132263184, "learning_rate": 4.003028934698507e-06, "loss": 0.7279, "step": 11227 }, { "epoch": 0.5770377222736149, "grad_norm": 1.03303062915802, "learning_rate": 4.002213406642003e-06, "loss": 0.6643, "step": 11228 }, { "epoch": 0.5770891150169596, "grad_norm": 1.05778169631958, "learning_rate": 4.001397906230047e-06, "loss": 0.7051, "step": 11229 }, { "epoch": 0.5771405077603042, "grad_norm": 1.0519039630889893, "learning_rate": 4.0005824334852325e-06, "loss": 0.7692, "step": 11230 }, { "epoch": 0.5771919005036489, "grad_norm": 1.0377106666564941, "learning_rate": 3.999766988430156e-06, "loss": 0.6821, "step": 11231 }, { "epoch": 0.5772432932469935, "grad_norm": 1.1369349956512451, "learning_rate": 3.99895157108741e-06, "loss": 0.7297, "step": 11232 }, { "epoch": 0.5772946859903382, "grad_norm": 1.1392085552215576, "learning_rate": 3.998136181479583e-06, "loss": 0.7466, "step": 11233 }, { "epoch": 0.5773460787336828, "grad_norm": 1.1218342781066895, "learning_rate": 3.997320819629268e-06, "loss": 0.7568, "step": 11234 }, { "epoch": 0.5773974714770275, "grad_norm": 1.0153709650039673, "learning_rate": 3.996505485559057e-06, "loss": 0.7426, "step": 11235 }, { "epoch": 0.5774488642203721, "grad_norm": 1.050020456314087, "learning_rate": 3.995690179291536e-06, "loss": 0.7661, "step": 11236 }, { "epoch": 0.5775002569637168, "grad_norm": 1.0119903087615967, "learning_rate": 3.994874900849295e-06, "loss": 0.6648, "step": 11237 }, { "epoch": 0.5775516497070614, "grad_norm": 0.7563510537147522, "learning_rate": 3.994059650254924e-06, "loss": 0.6803, "step": 11238 }, { "epoch": 0.5776030424504061, "grad_norm": 1.0323697328567505, "learning_rate": 3.993244427531008e-06, "loss": 0.6764, "step": 11239 }, { "epoch": 0.5776544351937506, "grad_norm": 1.083554744720459, "learning_rate": 3.992429232700134e-06, "loss": 0.704, "step": 11240 }, { "epoch": 0.5777058279370952, "grad_norm": 0.754530668258667, "learning_rate": 3.99161406578489e-06, "loss": 0.6466, "step": 11241 }, { "epoch": 0.5777572206804399, "grad_norm": 1.0307697057724, "learning_rate": 3.990798926807857e-06, "loss": 0.7134, "step": 11242 }, { "epoch": 0.5778086134237845, "grad_norm": 1.0322731733322144, "learning_rate": 3.989983815791622e-06, "loss": 0.6779, "step": 11243 }, { "epoch": 0.5778600061671292, "grad_norm": 1.0287203788757324, "learning_rate": 3.989168732758768e-06, "loss": 0.668, "step": 11244 }, { "epoch": 0.5779113989104738, "grad_norm": 1.1255407333374023, "learning_rate": 3.988353677731876e-06, "loss": 0.7193, "step": 11245 }, { "epoch": 0.5779627916538185, "grad_norm": 1.1046271324157715, "learning_rate": 3.987538650733527e-06, "loss": 0.6597, "step": 11246 }, { "epoch": 0.5780141843971631, "grad_norm": 1.130760908126831, "learning_rate": 3.986723651786305e-06, "loss": 0.6805, "step": 11247 }, { "epoch": 0.5780655771405078, "grad_norm": 1.0813477039337158, "learning_rate": 3.98590868091279e-06, "loss": 0.8068, "step": 11248 }, { "epoch": 0.5781169698838524, "grad_norm": 1.0859423875808716, "learning_rate": 3.98509373813556e-06, "loss": 0.7072, "step": 11249 }, { "epoch": 0.5781683626271971, "grad_norm": 1.067419409751892, "learning_rate": 3.984278823477193e-06, "loss": 0.7368, "step": 11250 }, { "epoch": 0.5782197553705417, "grad_norm": 1.018715739250183, "learning_rate": 3.983463936960272e-06, "loss": 0.7221, "step": 11251 }, { "epoch": 0.5782711481138864, "grad_norm": 1.115850567817688, "learning_rate": 3.982649078607367e-06, "loss": 0.7565, "step": 11252 }, { "epoch": 0.578322540857231, "grad_norm": 1.0523897409439087, "learning_rate": 3.981834248441058e-06, "loss": 0.7242, "step": 11253 }, { "epoch": 0.5783739336005757, "grad_norm": 1.1480294466018677, "learning_rate": 3.981019446483922e-06, "loss": 0.7345, "step": 11254 }, { "epoch": 0.5784253263439202, "grad_norm": 1.0524932146072388, "learning_rate": 3.98020467275853e-06, "loss": 0.7078, "step": 11255 }, { "epoch": 0.5784767190872648, "grad_norm": 1.0533568859100342, "learning_rate": 3.979389927287458e-06, "loss": 0.6979, "step": 11256 }, { "epoch": 0.5785281118306095, "grad_norm": 1.436596155166626, "learning_rate": 3.978575210093281e-06, "loss": 0.7406, "step": 11257 }, { "epoch": 0.5785795045739541, "grad_norm": 1.0759296417236328, "learning_rate": 3.977760521198568e-06, "loss": 0.6905, "step": 11258 }, { "epoch": 0.5786308973172988, "grad_norm": 1.1142526865005493, "learning_rate": 3.9769458606258946e-06, "loss": 0.7035, "step": 11259 }, { "epoch": 0.5786822900606434, "grad_norm": 0.9966619610786438, "learning_rate": 3.976131228397828e-06, "loss": 0.7216, "step": 11260 }, { "epoch": 0.5787336828039881, "grad_norm": 1.0635515451431274, "learning_rate": 3.97531662453694e-06, "loss": 0.6814, "step": 11261 }, { "epoch": 0.5787850755473327, "grad_norm": 0.7547945380210876, "learning_rate": 3.974502049065799e-06, "loss": 0.6742, "step": 11262 }, { "epoch": 0.5788364682906774, "grad_norm": 1.2300665378570557, "learning_rate": 3.973687502006974e-06, "loss": 0.7288, "step": 11263 }, { "epoch": 0.578887861034022, "grad_norm": 1.023860216140747, "learning_rate": 3.9728729833830355e-06, "loss": 0.6907, "step": 11264 }, { "epoch": 0.5789392537773667, "grad_norm": 1.06327486038208, "learning_rate": 3.972058493216546e-06, "loss": 0.7026, "step": 11265 }, { "epoch": 0.5789906465207113, "grad_norm": 1.0746352672576904, "learning_rate": 3.971244031530074e-06, "loss": 0.6994, "step": 11266 }, { "epoch": 0.579042039264056, "grad_norm": 1.1093498468399048, "learning_rate": 3.970429598346186e-06, "loss": 0.7512, "step": 11267 }, { "epoch": 0.5790934320074006, "grad_norm": 1.1092314720153809, "learning_rate": 3.969615193687443e-06, "loss": 0.7514, "step": 11268 }, { "epoch": 0.5791448247507452, "grad_norm": 1.2824609279632568, "learning_rate": 3.96880081757641e-06, "loss": 0.7228, "step": 11269 }, { "epoch": 0.5791962174940898, "grad_norm": 1.0995031595230103, "learning_rate": 3.967986470035653e-06, "loss": 0.729, "step": 11270 }, { "epoch": 0.5792476102374344, "grad_norm": 1.0830724239349365, "learning_rate": 3.967172151087731e-06, "loss": 0.7386, "step": 11271 }, { "epoch": 0.5792990029807791, "grad_norm": 1.0968003273010254, "learning_rate": 3.966357860755206e-06, "loss": 0.7012, "step": 11272 }, { "epoch": 0.5793503957241237, "grad_norm": 1.0464897155761719, "learning_rate": 3.9655435990606415e-06, "loss": 0.6683, "step": 11273 }, { "epoch": 0.5794017884674684, "grad_norm": 1.1040109395980835, "learning_rate": 3.964729366026593e-06, "loss": 0.7153, "step": 11274 }, { "epoch": 0.579453181210813, "grad_norm": 0.9912078976631165, "learning_rate": 3.96391516167562e-06, "loss": 0.7157, "step": 11275 }, { "epoch": 0.5795045739541577, "grad_norm": 1.0945379734039307, "learning_rate": 3.963100986030286e-06, "loss": 0.7679, "step": 11276 }, { "epoch": 0.5795559666975023, "grad_norm": 1.0576988458633423, "learning_rate": 3.962286839113142e-06, "loss": 0.6607, "step": 11277 }, { "epoch": 0.579607359440847, "grad_norm": 0.7103452086448669, "learning_rate": 3.961472720946748e-06, "loss": 0.6558, "step": 11278 }, { "epoch": 0.5796587521841916, "grad_norm": 1.052274465560913, "learning_rate": 3.960658631553658e-06, "loss": 0.74, "step": 11279 }, { "epoch": 0.5797101449275363, "grad_norm": 0.7780666351318359, "learning_rate": 3.959844570956429e-06, "loss": 0.6468, "step": 11280 }, { "epoch": 0.5797615376708809, "grad_norm": 0.701003909111023, "learning_rate": 3.959030539177614e-06, "loss": 0.6764, "step": 11281 }, { "epoch": 0.5798129304142255, "grad_norm": 1.0648292303085327, "learning_rate": 3.958216536239767e-06, "loss": 0.7013, "step": 11282 }, { "epoch": 0.5798643231575702, "grad_norm": 1.0383782386779785, "learning_rate": 3.9574025621654435e-06, "loss": 0.7399, "step": 11283 }, { "epoch": 0.5799157159009148, "grad_norm": 1.099942684173584, "learning_rate": 3.9565886169771895e-06, "loss": 0.7641, "step": 11284 }, { "epoch": 0.5799671086442594, "grad_norm": 1.0414830446243286, "learning_rate": 3.955774700697559e-06, "loss": 0.73, "step": 11285 }, { "epoch": 0.580018501387604, "grad_norm": 0.825836718082428, "learning_rate": 3.954960813349104e-06, "loss": 0.6899, "step": 11286 }, { "epoch": 0.5800698941309487, "grad_norm": 0.7473108768463135, "learning_rate": 3.954146954954371e-06, "loss": 0.6595, "step": 11287 }, { "epoch": 0.5801212868742933, "grad_norm": 1.0817381143569946, "learning_rate": 3.953333125535909e-06, "loss": 0.7535, "step": 11288 }, { "epoch": 0.580172679617638, "grad_norm": 0.88969886302948, "learning_rate": 3.952519325116268e-06, "loss": 0.6164, "step": 11289 }, { "epoch": 0.5802240723609826, "grad_norm": 1.030044436454773, "learning_rate": 3.951705553717994e-06, "loss": 0.6778, "step": 11290 }, { "epoch": 0.5802754651043273, "grad_norm": 1.0894672870635986, "learning_rate": 3.950891811363632e-06, "loss": 0.7515, "step": 11291 }, { "epoch": 0.5803268578476719, "grad_norm": 1.0215415954589844, "learning_rate": 3.95007809807573e-06, "loss": 0.6758, "step": 11292 }, { "epoch": 0.5803782505910166, "grad_norm": 1.221496820449829, "learning_rate": 3.94926441387683e-06, "loss": 0.7268, "step": 11293 }, { "epoch": 0.5804296433343612, "grad_norm": 1.122450590133667, "learning_rate": 3.948450758789477e-06, "loss": 0.698, "step": 11294 }, { "epoch": 0.5804810360777058, "grad_norm": 1.0512635707855225, "learning_rate": 3.9476371328362135e-06, "loss": 0.708, "step": 11295 }, { "epoch": 0.5805324288210505, "grad_norm": 1.0841056108474731, "learning_rate": 3.946823536039584e-06, "loss": 0.7221, "step": 11296 }, { "epoch": 0.5805838215643951, "grad_norm": 1.1028002500534058, "learning_rate": 3.946009968422127e-06, "loss": 0.7422, "step": 11297 }, { "epoch": 0.5806352143077398, "grad_norm": 1.0233488082885742, "learning_rate": 3.945196430006384e-06, "loss": 0.6852, "step": 11298 }, { "epoch": 0.5806866070510844, "grad_norm": 1.0731375217437744, "learning_rate": 3.944382920814895e-06, "loss": 0.6896, "step": 11299 }, { "epoch": 0.580737999794429, "grad_norm": 1.1066044569015503, "learning_rate": 3.943569440870199e-06, "loss": 0.7257, "step": 11300 }, { "epoch": 0.5807893925377736, "grad_norm": 1.0877400636672974, "learning_rate": 3.942755990194834e-06, "loss": 0.7956, "step": 11301 }, { "epoch": 0.5808407852811183, "grad_norm": 1.018005132675171, "learning_rate": 3.9419425688113396e-06, "loss": 0.6921, "step": 11302 }, { "epoch": 0.5808921780244629, "grad_norm": 1.1270915269851685, "learning_rate": 3.9411291767422476e-06, "loss": 0.7263, "step": 11303 }, { "epoch": 0.5809435707678076, "grad_norm": 1.0828816890716553, "learning_rate": 3.940315814010097e-06, "loss": 0.7462, "step": 11304 }, { "epoch": 0.5809949635111522, "grad_norm": 0.9924828410148621, "learning_rate": 3.939502480637424e-06, "loss": 0.7071, "step": 11305 }, { "epoch": 0.5810463562544969, "grad_norm": 1.0831063985824585, "learning_rate": 3.938689176646759e-06, "loss": 0.7123, "step": 11306 }, { "epoch": 0.5810977489978415, "grad_norm": 1.037266731262207, "learning_rate": 3.9378759020606375e-06, "loss": 0.6558, "step": 11307 }, { "epoch": 0.5811491417411861, "grad_norm": 1.030300498008728, "learning_rate": 3.93706265690159e-06, "loss": 0.678, "step": 11308 }, { "epoch": 0.5812005344845308, "grad_norm": 0.8015631437301636, "learning_rate": 3.936249441192153e-06, "loss": 0.6612, "step": 11309 }, { "epoch": 0.5812519272278754, "grad_norm": 1.0393797159194946, "learning_rate": 3.935436254954853e-06, "loss": 0.7019, "step": 11310 }, { "epoch": 0.5813033199712201, "grad_norm": 1.111364483833313, "learning_rate": 3.93462309821222e-06, "loss": 0.7222, "step": 11311 }, { "epoch": 0.5813547127145647, "grad_norm": 1.0797027349472046, "learning_rate": 3.9338099709867865e-06, "loss": 0.6692, "step": 11312 }, { "epoch": 0.5814061054579094, "grad_norm": 1.0732990503311157, "learning_rate": 3.9329968733010764e-06, "loss": 0.6956, "step": 11313 }, { "epoch": 0.581457498201254, "grad_norm": 1.0442534685134888, "learning_rate": 3.93218380517762e-06, "loss": 0.7168, "step": 11314 }, { "epoch": 0.5815088909445987, "grad_norm": 1.111409306526184, "learning_rate": 3.931370766638946e-06, "loss": 0.7161, "step": 11315 }, { "epoch": 0.5815602836879432, "grad_norm": 0.7506732940673828, "learning_rate": 3.930557757707576e-06, "loss": 0.682, "step": 11316 }, { "epoch": 0.5816116764312879, "grad_norm": 1.1169236898422241, "learning_rate": 3.929744778406037e-06, "loss": 0.7605, "step": 11317 }, { "epoch": 0.5816630691746325, "grad_norm": 1.052832841873169, "learning_rate": 3.928931828756857e-06, "loss": 0.6902, "step": 11318 }, { "epoch": 0.5817144619179772, "grad_norm": 1.0512350797653198, "learning_rate": 3.928118908782552e-06, "loss": 0.6963, "step": 11319 }, { "epoch": 0.5817658546613218, "grad_norm": 1.026297688484192, "learning_rate": 3.927306018505649e-06, "loss": 0.7146, "step": 11320 }, { "epoch": 0.5818172474046664, "grad_norm": 1.0498464107513428, "learning_rate": 3.926493157948672e-06, "loss": 0.7284, "step": 11321 }, { "epoch": 0.5818686401480111, "grad_norm": 1.1579846143722534, "learning_rate": 3.925680327134137e-06, "loss": 0.7335, "step": 11322 }, { "epoch": 0.5819200328913557, "grad_norm": 1.041756510734558, "learning_rate": 3.924867526084567e-06, "loss": 0.6957, "step": 11323 }, { "epoch": 0.5819714256347004, "grad_norm": 1.0770649909973145, "learning_rate": 3.92405475482248e-06, "loss": 0.7097, "step": 11324 }, { "epoch": 0.582022818378045, "grad_norm": 1.1626355648040771, "learning_rate": 3.9232420133704e-06, "loss": 0.7354, "step": 11325 }, { "epoch": 0.5820742111213897, "grad_norm": 1.0778135061264038, "learning_rate": 3.9224293017508365e-06, "loss": 0.719, "step": 11326 }, { "epoch": 0.5821256038647343, "grad_norm": 0.6938640475273132, "learning_rate": 3.921616619986311e-06, "loss": 0.651, "step": 11327 }, { "epoch": 0.582176996608079, "grad_norm": 1.1208105087280273, "learning_rate": 3.920803968099341e-06, "loss": 0.7323, "step": 11328 }, { "epoch": 0.5822283893514236, "grad_norm": 1.0829557180404663, "learning_rate": 3.919991346112436e-06, "loss": 0.7376, "step": 11329 }, { "epoch": 0.5822797820947683, "grad_norm": 1.105506420135498, "learning_rate": 3.919178754048114e-06, "loss": 0.6508, "step": 11330 }, { "epoch": 0.5823311748381128, "grad_norm": 1.0941522121429443, "learning_rate": 3.9183661919288905e-06, "loss": 0.6793, "step": 11331 }, { "epoch": 0.5823825675814575, "grad_norm": 1.1453266143798828, "learning_rate": 3.917553659777276e-06, "loss": 0.7256, "step": 11332 }, { "epoch": 0.5824339603248021, "grad_norm": 1.1139830350875854, "learning_rate": 3.916741157615781e-06, "loss": 0.6947, "step": 11333 }, { "epoch": 0.5824853530681467, "grad_norm": 1.1017076969146729, "learning_rate": 3.915928685466921e-06, "loss": 0.7248, "step": 11334 }, { "epoch": 0.5825367458114914, "grad_norm": 1.1475648880004883, "learning_rate": 3.915116243353201e-06, "loss": 0.7383, "step": 11335 }, { "epoch": 0.582588138554836, "grad_norm": 1.0920730829238892, "learning_rate": 3.914303831297132e-06, "loss": 0.7193, "step": 11336 }, { "epoch": 0.5826395312981807, "grad_norm": 1.0276113748550415, "learning_rate": 3.913491449321227e-06, "loss": 0.674, "step": 11337 }, { "epoch": 0.5826909240415253, "grad_norm": 1.2802693843841553, "learning_rate": 3.912679097447987e-06, "loss": 0.719, "step": 11338 }, { "epoch": 0.58274231678487, "grad_norm": 1.037237524986267, "learning_rate": 3.911866775699923e-06, "loss": 0.7321, "step": 11339 }, { "epoch": 0.5827937095282146, "grad_norm": 1.059210181236267, "learning_rate": 3.911054484099539e-06, "loss": 0.749, "step": 11340 }, { "epoch": 0.5828451022715593, "grad_norm": 1.0539554357528687, "learning_rate": 3.9102422226693436e-06, "loss": 0.7352, "step": 11341 }, { "epoch": 0.5828964950149039, "grad_norm": 1.1592546701431274, "learning_rate": 3.909429991431838e-06, "loss": 0.7065, "step": 11342 }, { "epoch": 0.5829478877582486, "grad_norm": 1.0351492166519165, "learning_rate": 3.9086177904095266e-06, "loss": 0.6888, "step": 11343 }, { "epoch": 0.5829992805015932, "grad_norm": 1.046500563621521, "learning_rate": 3.907805619624914e-06, "loss": 0.6884, "step": 11344 }, { "epoch": 0.5830506732449379, "grad_norm": 1.0641207695007324, "learning_rate": 3.9069934791004985e-06, "loss": 0.7248, "step": 11345 }, { "epoch": 0.5831020659882824, "grad_norm": 1.0540226697921753, "learning_rate": 3.906181368858783e-06, "loss": 0.6541, "step": 11346 }, { "epoch": 0.583153458731627, "grad_norm": 1.0949785709381104, "learning_rate": 3.90536928892227e-06, "loss": 0.7033, "step": 11347 }, { "epoch": 0.5832048514749717, "grad_norm": 1.098457932472229, "learning_rate": 3.904557239313456e-06, "loss": 0.7132, "step": 11348 }, { "epoch": 0.5832562442183163, "grad_norm": 1.0412571430206299, "learning_rate": 3.903745220054838e-06, "loss": 0.6922, "step": 11349 }, { "epoch": 0.583307636961661, "grad_norm": 1.0224560499191284, "learning_rate": 3.902933231168919e-06, "loss": 0.6823, "step": 11350 }, { "epoch": 0.5833590297050056, "grad_norm": 1.0984859466552734, "learning_rate": 3.902121272678191e-06, "loss": 0.7004, "step": 11351 }, { "epoch": 0.5834104224483503, "grad_norm": 1.108841896057129, "learning_rate": 3.901309344605152e-06, "loss": 0.7373, "step": 11352 }, { "epoch": 0.5834618151916949, "grad_norm": 1.081255555152893, "learning_rate": 3.9004974469722986e-06, "loss": 0.7206, "step": 11353 }, { "epoch": 0.5835132079350396, "grad_norm": 1.0958716869354248, "learning_rate": 3.899685579802122e-06, "loss": 0.7595, "step": 11354 }, { "epoch": 0.5835646006783842, "grad_norm": 1.0395506620407104, "learning_rate": 3.898873743117117e-06, "loss": 0.7104, "step": 11355 }, { "epoch": 0.5836159934217289, "grad_norm": 1.0136529207229614, "learning_rate": 3.898061936939776e-06, "loss": 0.6765, "step": 11356 }, { "epoch": 0.5836673861650735, "grad_norm": 1.0542958974838257, "learning_rate": 3.897250161292595e-06, "loss": 0.7047, "step": 11357 }, { "epoch": 0.5837187789084182, "grad_norm": 1.0979572534561157, "learning_rate": 3.896438416198058e-06, "loss": 0.7381, "step": 11358 }, { "epoch": 0.5837701716517628, "grad_norm": 0.7045717239379883, "learning_rate": 3.895626701678658e-06, "loss": 0.6959, "step": 11359 }, { "epoch": 0.5838215643951075, "grad_norm": 0.851304292678833, "learning_rate": 3.894815017756887e-06, "loss": 0.634, "step": 11360 }, { "epoch": 0.583872957138452, "grad_norm": 1.0458976030349731, "learning_rate": 3.89400336445523e-06, "loss": 0.7246, "step": 11361 }, { "epoch": 0.5839243498817966, "grad_norm": 0.8145836591720581, "learning_rate": 3.893191741796174e-06, "loss": 0.6559, "step": 11362 }, { "epoch": 0.5839757426251413, "grad_norm": 1.1119391918182373, "learning_rate": 3.89238014980221e-06, "loss": 0.7032, "step": 11363 }, { "epoch": 0.5840271353684859, "grad_norm": 1.0739480257034302, "learning_rate": 3.8915685884958185e-06, "loss": 0.7189, "step": 11364 }, { "epoch": 0.5840785281118306, "grad_norm": 1.0773121118545532, "learning_rate": 3.890757057899488e-06, "loss": 0.6732, "step": 11365 }, { "epoch": 0.5841299208551752, "grad_norm": 1.0478408336639404, "learning_rate": 3.889945558035703e-06, "loss": 0.7045, "step": 11366 }, { "epoch": 0.5841813135985199, "grad_norm": 1.1085213422775269, "learning_rate": 3.889134088926945e-06, "loss": 0.7146, "step": 11367 }, { "epoch": 0.5842327063418645, "grad_norm": 0.8725783228874207, "learning_rate": 3.8883226505956966e-06, "loss": 0.6623, "step": 11368 }, { "epoch": 0.5842840990852092, "grad_norm": 1.0414930582046509, "learning_rate": 3.887511243064442e-06, "loss": 0.7137, "step": 11369 }, { "epoch": 0.5843354918285538, "grad_norm": 1.217572569847107, "learning_rate": 3.886699866355658e-06, "loss": 0.7737, "step": 11370 }, { "epoch": 0.5843868845718985, "grad_norm": 0.7633144855499268, "learning_rate": 3.8858885204918265e-06, "loss": 0.6156, "step": 11371 }, { "epoch": 0.5844382773152431, "grad_norm": 1.065104365348816, "learning_rate": 3.885077205495426e-06, "loss": 0.7214, "step": 11372 }, { "epoch": 0.5844896700585878, "grad_norm": 0.9881397485733032, "learning_rate": 3.884265921388935e-06, "loss": 0.7561, "step": 11373 }, { "epoch": 0.5845410628019324, "grad_norm": 1.1618558168411255, "learning_rate": 3.883454668194832e-06, "loss": 0.6889, "step": 11374 }, { "epoch": 0.5845924555452771, "grad_norm": 0.7622618079185486, "learning_rate": 3.882643445935591e-06, "loss": 0.7173, "step": 11375 }, { "epoch": 0.5846438482886216, "grad_norm": 1.0533428192138672, "learning_rate": 3.881832254633693e-06, "loss": 0.7157, "step": 11376 }, { "epoch": 0.5846952410319662, "grad_norm": 1.059116244316101, "learning_rate": 3.881021094311604e-06, "loss": 0.716, "step": 11377 }, { "epoch": 0.5847466337753109, "grad_norm": 1.0572422742843628, "learning_rate": 3.880209964991804e-06, "loss": 0.7526, "step": 11378 }, { "epoch": 0.5847980265186555, "grad_norm": 1.0340025424957275, "learning_rate": 3.879398866696767e-06, "loss": 0.7371, "step": 11379 }, { "epoch": 0.5848494192620002, "grad_norm": 0.9992474317550659, "learning_rate": 3.878587799448962e-06, "loss": 0.7007, "step": 11380 }, { "epoch": 0.5849008120053448, "grad_norm": 0.7635375261306763, "learning_rate": 3.87777676327086e-06, "loss": 0.6406, "step": 11381 }, { "epoch": 0.5849522047486895, "grad_norm": 1.1275649070739746, "learning_rate": 3.876965758184934e-06, "loss": 0.7617, "step": 11382 }, { "epoch": 0.5850035974920341, "grad_norm": 1.1199650764465332, "learning_rate": 3.876154784213651e-06, "loss": 0.7407, "step": 11383 }, { "epoch": 0.5850549902353788, "grad_norm": 1.0286909341812134, "learning_rate": 3.875343841379481e-06, "loss": 0.7101, "step": 11384 }, { "epoch": 0.5851063829787234, "grad_norm": 1.0594675540924072, "learning_rate": 3.874532929704895e-06, "loss": 0.759, "step": 11385 }, { "epoch": 0.5851577757220681, "grad_norm": 1.0797507762908936, "learning_rate": 3.873722049212354e-06, "loss": 0.687, "step": 11386 }, { "epoch": 0.5852091684654127, "grad_norm": 1.0735011100769043, "learning_rate": 3.8729111999243275e-06, "loss": 0.7044, "step": 11387 }, { "epoch": 0.5852605612087574, "grad_norm": 0.8172444105148315, "learning_rate": 3.872100381863281e-06, "loss": 0.6631, "step": 11388 }, { "epoch": 0.585311953952102, "grad_norm": 1.151502251625061, "learning_rate": 3.871289595051679e-06, "loss": 0.7113, "step": 11389 }, { "epoch": 0.5853633466954467, "grad_norm": 1.1006399393081665, "learning_rate": 3.870478839511984e-06, "loss": 0.6957, "step": 11390 }, { "epoch": 0.5854147394387912, "grad_norm": 0.6983252167701721, "learning_rate": 3.869668115266658e-06, "loss": 0.5999, "step": 11391 }, { "epoch": 0.5854661321821358, "grad_norm": 1.2201707363128662, "learning_rate": 3.868857422338165e-06, "loss": 0.7259, "step": 11392 }, { "epoch": 0.5855175249254805, "grad_norm": 1.1849849224090576, "learning_rate": 3.868046760748964e-06, "loss": 0.7413, "step": 11393 }, { "epoch": 0.5855689176688251, "grad_norm": 0.8120681643486023, "learning_rate": 3.867236130521516e-06, "loss": 0.6511, "step": 11394 }, { "epoch": 0.5856203104121698, "grad_norm": 1.1112028360366821, "learning_rate": 3.866425531678282e-06, "loss": 0.7398, "step": 11395 }, { "epoch": 0.5856717031555144, "grad_norm": 0.6971831917762756, "learning_rate": 3.865614964241717e-06, "loss": 0.6495, "step": 11396 }, { "epoch": 0.5857230958988591, "grad_norm": 1.069751262664795, "learning_rate": 3.86480442823428e-06, "loss": 0.6733, "step": 11397 }, { "epoch": 0.5857744886422037, "grad_norm": 1.0719739198684692, "learning_rate": 3.863993923678429e-06, "loss": 0.6913, "step": 11398 }, { "epoch": 0.5858258813855484, "grad_norm": 1.015916109085083, "learning_rate": 3.863183450596617e-06, "loss": 0.699, "step": 11399 }, { "epoch": 0.585877274128893, "grad_norm": 9.213147163391113, "learning_rate": 3.8623730090113e-06, "loss": 0.7258, "step": 11400 }, { "epoch": 0.5859286668722377, "grad_norm": 1.0615557432174683, "learning_rate": 3.861562598944933e-06, "loss": 0.7186, "step": 11401 }, { "epoch": 0.5859800596155823, "grad_norm": 0.7782182097434998, "learning_rate": 3.860752220419968e-06, "loss": 0.6361, "step": 11402 }, { "epoch": 0.586031452358927, "grad_norm": 1.0788471698760986, "learning_rate": 3.859941873458857e-06, "loss": 0.6974, "step": 11403 }, { "epoch": 0.5860828451022716, "grad_norm": 0.745480477809906, "learning_rate": 3.859131558084052e-06, "loss": 0.6714, "step": 11404 }, { "epoch": 0.5861342378456162, "grad_norm": 1.0484389066696167, "learning_rate": 3.858321274318006e-06, "loss": 0.7332, "step": 11405 }, { "epoch": 0.5861856305889609, "grad_norm": 0.6571010947227478, "learning_rate": 3.857511022183163e-06, "loss": 0.6702, "step": 11406 }, { "epoch": 0.5862370233323054, "grad_norm": 1.0439367294311523, "learning_rate": 3.856700801701975e-06, "loss": 0.7635, "step": 11407 }, { "epoch": 0.5862884160756501, "grad_norm": 1.0609468221664429, "learning_rate": 3.855890612896892e-06, "loss": 0.6463, "step": 11408 }, { "epoch": 0.5863398088189947, "grad_norm": 1.0257848501205444, "learning_rate": 3.855080455790357e-06, "loss": 0.6744, "step": 11409 }, { "epoch": 0.5863912015623394, "grad_norm": 1.0989704132080078, "learning_rate": 3.854270330404817e-06, "loss": 0.7123, "step": 11410 }, { "epoch": 0.586442594305684, "grad_norm": 1.060628890991211, "learning_rate": 3.853460236762719e-06, "loss": 0.7386, "step": 11411 }, { "epoch": 0.5864939870490287, "grad_norm": 1.069916844367981, "learning_rate": 3.8526501748865075e-06, "loss": 0.7188, "step": 11412 }, { "epoch": 0.5865453797923733, "grad_norm": 1.0351778268814087, "learning_rate": 3.8518401447986225e-06, "loss": 0.7271, "step": 11413 }, { "epoch": 0.586596772535718, "grad_norm": 1.0779837369918823, "learning_rate": 3.8510301465215105e-06, "loss": 0.6725, "step": 11414 }, { "epoch": 0.5866481652790626, "grad_norm": 1.1485346555709839, "learning_rate": 3.850220180077611e-06, "loss": 0.7224, "step": 11415 }, { "epoch": 0.5866995580224073, "grad_norm": 1.1570924520492554, "learning_rate": 3.8494102454893645e-06, "loss": 0.7447, "step": 11416 }, { "epoch": 0.5867509507657519, "grad_norm": 1.137811303138733, "learning_rate": 3.848600342779215e-06, "loss": 0.7356, "step": 11417 }, { "epoch": 0.5868023435090965, "grad_norm": 1.0702612400054932, "learning_rate": 3.847790471969596e-06, "loss": 0.7196, "step": 11418 }, { "epoch": 0.5868537362524412, "grad_norm": 1.0681898593902588, "learning_rate": 3.8469806330829475e-06, "loss": 0.7633, "step": 11419 }, { "epoch": 0.5869051289957858, "grad_norm": 1.1845144033432007, "learning_rate": 3.846170826141708e-06, "loss": 0.7261, "step": 11420 }, { "epoch": 0.5869565217391305, "grad_norm": 1.036198616027832, "learning_rate": 3.8453610511683155e-06, "loss": 0.6733, "step": 11421 }, { "epoch": 0.587007914482475, "grad_norm": 1.0969637632369995, "learning_rate": 3.844551308185202e-06, "loss": 0.7468, "step": 11422 }, { "epoch": 0.5870593072258197, "grad_norm": 0.743741512298584, "learning_rate": 3.843741597214802e-06, "loss": 0.6569, "step": 11423 }, { "epoch": 0.5871106999691643, "grad_norm": 1.019382357597351, "learning_rate": 3.842931918279552e-06, "loss": 0.6991, "step": 11424 }, { "epoch": 0.587162092712509, "grad_norm": 1.1030614376068115, "learning_rate": 3.842122271401883e-06, "loss": 0.7385, "step": 11425 }, { "epoch": 0.5872134854558536, "grad_norm": 0.7521125674247742, "learning_rate": 3.841312656604228e-06, "loss": 0.6453, "step": 11426 }, { "epoch": 0.5872648781991983, "grad_norm": 1.0179952383041382, "learning_rate": 3.840503073909019e-06, "loss": 0.7372, "step": 11427 }, { "epoch": 0.5873162709425429, "grad_norm": 1.0493487119674683, "learning_rate": 3.839693523338684e-06, "loss": 0.7276, "step": 11428 }, { "epoch": 0.5873676636858876, "grad_norm": 0.7874628305435181, "learning_rate": 3.838884004915652e-06, "loss": 0.6575, "step": 11429 }, { "epoch": 0.5874190564292322, "grad_norm": 1.09451162815094, "learning_rate": 3.838074518662355e-06, "loss": 0.7229, "step": 11430 }, { "epoch": 0.5874704491725768, "grad_norm": 0.8572195172309875, "learning_rate": 3.8372650646012155e-06, "loss": 0.6875, "step": 11431 }, { "epoch": 0.5875218419159215, "grad_norm": 1.0479536056518555, "learning_rate": 3.836455642754663e-06, "loss": 0.6967, "step": 11432 }, { "epoch": 0.5875732346592661, "grad_norm": 1.066697359085083, "learning_rate": 3.835646253145123e-06, "loss": 0.6812, "step": 11433 }, { "epoch": 0.5876246274026108, "grad_norm": 1.0161327123641968, "learning_rate": 3.8348368957950215e-06, "loss": 0.6988, "step": 11434 }, { "epoch": 0.5876760201459554, "grad_norm": 1.0635770559310913, "learning_rate": 3.8340275707267804e-06, "loss": 0.6866, "step": 11435 }, { "epoch": 0.5877274128893001, "grad_norm": 1.1258184909820557, "learning_rate": 3.833218277962823e-06, "loss": 0.7857, "step": 11436 }, { "epoch": 0.5877788056326446, "grad_norm": 1.1673272848129272, "learning_rate": 3.832409017525575e-06, "loss": 0.7742, "step": 11437 }, { "epoch": 0.5878301983759893, "grad_norm": 0.7034832835197449, "learning_rate": 3.8315997894374526e-06, "loss": 0.6734, "step": 11438 }, { "epoch": 0.5878815911193339, "grad_norm": 1.0350385904312134, "learning_rate": 3.8307905937208774e-06, "loss": 0.704, "step": 11439 }, { "epoch": 0.5879329838626786, "grad_norm": 0.7118763327598572, "learning_rate": 3.829981430398273e-06, "loss": 0.6371, "step": 11440 }, { "epoch": 0.5879843766060232, "grad_norm": 1.0899955034255981, "learning_rate": 3.8291722994920526e-06, "loss": 0.6795, "step": 11441 }, { "epoch": 0.5880357693493679, "grad_norm": 1.0256341695785522, "learning_rate": 3.828363201024635e-06, "loss": 0.7315, "step": 11442 }, { "epoch": 0.5880871620927125, "grad_norm": 1.04414963722229, "learning_rate": 3.8275541350184405e-06, "loss": 0.7078, "step": 11443 }, { "epoch": 0.5881385548360571, "grad_norm": 1.0545594692230225, "learning_rate": 3.82674510149588e-06, "loss": 0.7147, "step": 11444 }, { "epoch": 0.5881899475794018, "grad_norm": 1.1301032304763794, "learning_rate": 3.8259361004793725e-06, "loss": 0.7732, "step": 11445 }, { "epoch": 0.5882413403227464, "grad_norm": 1.1334842443466187, "learning_rate": 3.825127131991332e-06, "loss": 0.7776, "step": 11446 }, { "epoch": 0.5882927330660911, "grad_norm": 0.7404409050941467, "learning_rate": 3.824318196054167e-06, "loss": 0.636, "step": 11447 }, { "epoch": 0.5883441258094357, "grad_norm": 1.0624581575393677, "learning_rate": 3.823509292690295e-06, "loss": 0.7105, "step": 11448 }, { "epoch": 0.5883955185527804, "grad_norm": 0.9810363054275513, "learning_rate": 3.8227004219221245e-06, "loss": 0.6799, "step": 11449 }, { "epoch": 0.588446911296125, "grad_norm": 1.235578179359436, "learning_rate": 3.821891583772069e-06, "loss": 0.6709, "step": 11450 }, { "epoch": 0.5884983040394697, "grad_norm": 1.0692559480667114, "learning_rate": 3.821082778262533e-06, "loss": 0.7205, "step": 11451 }, { "epoch": 0.5885496967828142, "grad_norm": 1.0192731618881226, "learning_rate": 3.82027400541593e-06, "loss": 0.6985, "step": 11452 }, { "epoch": 0.5886010895261589, "grad_norm": 0.9839527010917664, "learning_rate": 3.819465265254666e-06, "loss": 0.6554, "step": 11453 }, { "epoch": 0.5886524822695035, "grad_norm": 0.7507407069206238, "learning_rate": 3.818656557801146e-06, "loss": 0.6579, "step": 11454 }, { "epoch": 0.5887038750128482, "grad_norm": 0.7295409440994263, "learning_rate": 3.81784788307778e-06, "loss": 0.6973, "step": 11455 }, { "epoch": 0.5887552677561928, "grad_norm": 1.1134618520736694, "learning_rate": 3.81703924110697e-06, "loss": 0.6967, "step": 11456 }, { "epoch": 0.5888066604995374, "grad_norm": 1.0342113971710205, "learning_rate": 3.81623063191112e-06, "loss": 0.6944, "step": 11457 }, { "epoch": 0.5888580532428821, "grad_norm": 1.019368290901184, "learning_rate": 3.815422055512633e-06, "loss": 0.6835, "step": 11458 }, { "epoch": 0.5889094459862267, "grad_norm": 1.0310980081558228, "learning_rate": 3.8146135119339155e-06, "loss": 0.7262, "step": 11459 }, { "epoch": 0.5889608387295714, "grad_norm": 1.0068244934082031, "learning_rate": 3.813805001197364e-06, "loss": 0.6786, "step": 11460 }, { "epoch": 0.589012231472916, "grad_norm": 0.7800037264823914, "learning_rate": 3.8129965233253796e-06, "loss": 0.6307, "step": 11461 }, { "epoch": 0.5890636242162607, "grad_norm": 1.088172435760498, "learning_rate": 3.8121880783403652e-06, "loss": 0.7996, "step": 11462 }, { "epoch": 0.5891150169596053, "grad_norm": 1.0754464864730835, "learning_rate": 3.8113796662647166e-06, "loss": 0.715, "step": 11463 }, { "epoch": 0.58916640970295, "grad_norm": 1.0561606884002686, "learning_rate": 3.8105712871208315e-06, "loss": 0.7602, "step": 11464 }, { "epoch": 0.5892178024462946, "grad_norm": 1.1313804388046265, "learning_rate": 3.8097629409311075e-06, "loss": 0.7305, "step": 11465 }, { "epoch": 0.5892691951896393, "grad_norm": 1.0491067171096802, "learning_rate": 3.8089546277179423e-06, "loss": 0.6884, "step": 11466 }, { "epoch": 0.5893205879329838, "grad_norm": 1.03443443775177, "learning_rate": 3.8081463475037276e-06, "loss": 0.6774, "step": 11467 }, { "epoch": 0.5893719806763285, "grad_norm": 0.9868887066841125, "learning_rate": 3.807338100310859e-06, "loss": 0.6921, "step": 11468 }, { "epoch": 0.5894233734196731, "grad_norm": 1.26797354221344, "learning_rate": 3.806529886161732e-06, "loss": 0.701, "step": 11469 }, { "epoch": 0.5894747661630177, "grad_norm": 0.9926590323448181, "learning_rate": 3.8057217050787348e-06, "loss": 0.6712, "step": 11470 }, { "epoch": 0.5895261589063624, "grad_norm": 1.0562177896499634, "learning_rate": 3.8049135570842604e-06, "loss": 0.7092, "step": 11471 }, { "epoch": 0.589577551649707, "grad_norm": 1.1049439907073975, "learning_rate": 3.8041054422007017e-06, "loss": 0.749, "step": 11472 }, { "epoch": 0.5896289443930517, "grad_norm": 0.768600583076477, "learning_rate": 3.8032973604504443e-06, "loss": 0.6591, "step": 11473 }, { "epoch": 0.5896803371363963, "grad_norm": 0.7454254031181335, "learning_rate": 3.802489311855878e-06, "loss": 0.6671, "step": 11474 }, { "epoch": 0.589731729879741, "grad_norm": 1.1571478843688965, "learning_rate": 3.8016812964393923e-06, "loss": 0.6873, "step": 11475 }, { "epoch": 0.5897831226230856, "grad_norm": 1.0735392570495605, "learning_rate": 3.8008733142233718e-06, "loss": 0.7022, "step": 11476 }, { "epoch": 0.5898345153664303, "grad_norm": 1.075069546699524, "learning_rate": 3.800065365230203e-06, "loss": 0.6875, "step": 11477 }, { "epoch": 0.5898859081097749, "grad_norm": 0.6997358798980713, "learning_rate": 3.7992574494822734e-06, "loss": 0.6649, "step": 11478 }, { "epoch": 0.5899373008531196, "grad_norm": 0.6858736872673035, "learning_rate": 3.798449567001963e-06, "loss": 0.6237, "step": 11479 }, { "epoch": 0.5899886935964642, "grad_norm": 1.1006945371627808, "learning_rate": 3.7976417178116552e-06, "loss": 0.7519, "step": 11480 }, { "epoch": 0.5900400863398089, "grad_norm": 1.0428390502929688, "learning_rate": 3.7968339019337347e-06, "loss": 0.7508, "step": 11481 }, { "epoch": 0.5900914790831535, "grad_norm": 1.025068759918213, "learning_rate": 3.7960261193905836e-06, "loss": 0.746, "step": 11482 }, { "epoch": 0.590142871826498, "grad_norm": 1.090218186378479, "learning_rate": 3.795218370204578e-06, "loss": 0.7187, "step": 11483 }, { "epoch": 0.5901942645698427, "grad_norm": 0.8351790308952332, "learning_rate": 3.7944106543980995e-06, "loss": 0.6172, "step": 11484 }, { "epoch": 0.5902456573131873, "grad_norm": 1.1013123989105225, "learning_rate": 3.7936029719935276e-06, "loss": 0.7106, "step": 11485 }, { "epoch": 0.590297050056532, "grad_norm": 1.015156626701355, "learning_rate": 3.792795323013238e-06, "loss": 0.6869, "step": 11486 }, { "epoch": 0.5903484427998766, "grad_norm": 1.13678777217865, "learning_rate": 3.791987707479608e-06, "loss": 0.8274, "step": 11487 }, { "epoch": 0.5903998355432213, "grad_norm": 0.851416826248169, "learning_rate": 3.7911801254150152e-06, "loss": 0.6657, "step": 11488 }, { "epoch": 0.5904512282865659, "grad_norm": 1.0114825963974, "learning_rate": 3.790372576841831e-06, "loss": 0.661, "step": 11489 }, { "epoch": 0.5905026210299106, "grad_norm": 1.0497239828109741, "learning_rate": 3.789565061782431e-06, "loss": 0.6994, "step": 11490 }, { "epoch": 0.5905540137732552, "grad_norm": 1.1517434120178223, "learning_rate": 3.78875758025919e-06, "loss": 0.7113, "step": 11491 }, { "epoch": 0.5906054065165999, "grad_norm": 1.1280349493026733, "learning_rate": 3.7879501322944756e-06, "loss": 0.6752, "step": 11492 }, { "epoch": 0.5906567992599445, "grad_norm": 1.0300226211547852, "learning_rate": 3.787142717910661e-06, "loss": 0.7306, "step": 11493 }, { "epoch": 0.5907081920032892, "grad_norm": 1.1014257669448853, "learning_rate": 3.7863353371301176e-06, "loss": 0.6951, "step": 11494 }, { "epoch": 0.5907595847466338, "grad_norm": 1.0422186851501465, "learning_rate": 3.785527989975213e-06, "loss": 0.7229, "step": 11495 }, { "epoch": 0.5908109774899785, "grad_norm": 1.0603100061416626, "learning_rate": 3.784720676468315e-06, "loss": 0.7344, "step": 11496 }, { "epoch": 0.5908623702333231, "grad_norm": 1.0279526710510254, "learning_rate": 3.783913396631793e-06, "loss": 0.6351, "step": 11497 }, { "epoch": 0.5909137629766676, "grad_norm": 1.1161231994628906, "learning_rate": 3.783106150488014e-06, "loss": 0.7396, "step": 11498 }, { "epoch": 0.5909651557200123, "grad_norm": 1.1192265748977661, "learning_rate": 3.7822989380593393e-06, "loss": 0.6818, "step": 11499 }, { "epoch": 0.5910165484633569, "grad_norm": 1.1414432525634766, "learning_rate": 3.781491759368136e-06, "loss": 0.7216, "step": 11500 }, { "epoch": 0.5910679412067016, "grad_norm": 1.0672099590301514, "learning_rate": 3.7806846144367704e-06, "loss": 0.6644, "step": 11501 }, { "epoch": 0.5911193339500462, "grad_norm": 1.0559134483337402, "learning_rate": 3.779877503287599e-06, "loss": 0.702, "step": 11502 }, { "epoch": 0.5911707266933909, "grad_norm": 1.0466864109039307, "learning_rate": 3.779070425942988e-06, "loss": 0.6968, "step": 11503 }, { "epoch": 0.5912221194367355, "grad_norm": 1.0700088739395142, "learning_rate": 3.778263382425297e-06, "loss": 0.7692, "step": 11504 }, { "epoch": 0.5912735121800802, "grad_norm": 1.0586183071136475, "learning_rate": 3.7774563727568857e-06, "loss": 0.7382, "step": 11505 }, { "epoch": 0.5913249049234248, "grad_norm": 1.0176730155944824, "learning_rate": 3.7766493969601136e-06, "loss": 0.6501, "step": 11506 }, { "epoch": 0.5913762976667695, "grad_norm": 0.8501995205879211, "learning_rate": 3.7758424550573385e-06, "loss": 0.6794, "step": 11507 }, { "epoch": 0.5914276904101141, "grad_norm": 1.127898931503296, "learning_rate": 3.7750355470709167e-06, "loss": 0.7355, "step": 11508 }, { "epoch": 0.5914790831534588, "grad_norm": 1.0252556800842285, "learning_rate": 3.7742286730232036e-06, "loss": 0.6954, "step": 11509 }, { "epoch": 0.5915304758968034, "grad_norm": 1.053673267364502, "learning_rate": 3.7734218329365586e-06, "loss": 0.7503, "step": 11510 }, { "epoch": 0.5915818686401481, "grad_norm": 0.7045313715934753, "learning_rate": 3.77261502683333e-06, "loss": 0.6904, "step": 11511 }, { "epoch": 0.5916332613834927, "grad_norm": 1.056077003479004, "learning_rate": 3.7718082547358744e-06, "loss": 0.7455, "step": 11512 }, { "epoch": 0.5916846541268372, "grad_norm": 1.1707674264907837, "learning_rate": 3.7710015166665433e-06, "loss": 0.7597, "step": 11513 }, { "epoch": 0.5917360468701819, "grad_norm": 1.1338014602661133, "learning_rate": 3.7701948126476907e-06, "loss": 0.6744, "step": 11514 }, { "epoch": 0.5917874396135265, "grad_norm": 1.035415768623352, "learning_rate": 3.769388142701662e-06, "loss": 0.7209, "step": 11515 }, { "epoch": 0.5918388323568712, "grad_norm": 0.7342637777328491, "learning_rate": 3.76858150685081e-06, "loss": 0.6311, "step": 11516 }, { "epoch": 0.5918902251002158, "grad_norm": 1.0672574043273926, "learning_rate": 3.767774905117484e-06, "loss": 0.7405, "step": 11517 }, { "epoch": 0.5919416178435605, "grad_norm": 1.0254216194152832, "learning_rate": 3.766968337524029e-06, "loss": 0.6809, "step": 11518 }, { "epoch": 0.5919930105869051, "grad_norm": 1.1160180568695068, "learning_rate": 3.766161804092793e-06, "loss": 0.7406, "step": 11519 }, { "epoch": 0.5920444033302498, "grad_norm": 0.7081332206726074, "learning_rate": 3.7653553048461233e-06, "loss": 0.6537, "step": 11520 }, { "epoch": 0.5920957960735944, "grad_norm": 1.1758373975753784, "learning_rate": 3.7645488398063613e-06, "loss": 0.7305, "step": 11521 }, { "epoch": 0.5921471888169391, "grad_norm": 1.0893099308013916, "learning_rate": 3.7637424089958526e-06, "loss": 0.7492, "step": 11522 }, { "epoch": 0.5921985815602837, "grad_norm": 0.9970418810844421, "learning_rate": 3.7629360124369417e-06, "loss": 0.6971, "step": 11523 }, { "epoch": 0.5922499743036284, "grad_norm": 1.1113557815551758, "learning_rate": 3.7621296501519676e-06, "loss": 0.6493, "step": 11524 }, { "epoch": 0.592301367046973, "grad_norm": 1.0411341190338135, "learning_rate": 3.7613233221632715e-06, "loss": 0.6843, "step": 11525 }, { "epoch": 0.5923527597903177, "grad_norm": 1.0228313207626343, "learning_rate": 3.760517028493196e-06, "loss": 0.6722, "step": 11526 }, { "epoch": 0.5924041525336623, "grad_norm": 0.7046017646789551, "learning_rate": 3.7597107691640777e-06, "loss": 0.618, "step": 11527 }, { "epoch": 0.5924555452770068, "grad_norm": 1.0765703916549683, "learning_rate": 3.7589045441982554e-06, "loss": 0.7196, "step": 11528 }, { "epoch": 0.5925069380203515, "grad_norm": 1.1152563095092773, "learning_rate": 3.7580983536180667e-06, "loss": 0.7696, "step": 11529 }, { "epoch": 0.5925583307636961, "grad_norm": 1.0075186491012573, "learning_rate": 3.7572921974458493e-06, "loss": 0.7453, "step": 11530 }, { "epoch": 0.5926097235070408, "grad_norm": 0.7157771587371826, "learning_rate": 3.7564860757039347e-06, "loss": 0.665, "step": 11531 }, { "epoch": 0.5926611162503854, "grad_norm": 1.517165184020996, "learning_rate": 3.7556799884146596e-06, "loss": 0.7078, "step": 11532 }, { "epoch": 0.5927125089937301, "grad_norm": 1.034657597541809, "learning_rate": 3.754873935600359e-06, "loss": 0.6777, "step": 11533 }, { "epoch": 0.5927639017370747, "grad_norm": 0.7848665714263916, "learning_rate": 3.754067917283361e-06, "loss": 0.679, "step": 11534 }, { "epoch": 0.5928152944804194, "grad_norm": 1.0560595989227295, "learning_rate": 3.753261933485999e-06, "loss": 0.6842, "step": 11535 }, { "epoch": 0.592866687223764, "grad_norm": 1.1652435064315796, "learning_rate": 3.7524559842306056e-06, "loss": 0.732, "step": 11536 }, { "epoch": 0.5929180799671087, "grad_norm": 0.7382861375808716, "learning_rate": 3.7516500695395075e-06, "loss": 0.6362, "step": 11537 }, { "epoch": 0.5929694727104533, "grad_norm": 0.7808324694633484, "learning_rate": 3.750844189435034e-06, "loss": 0.6772, "step": 11538 }, { "epoch": 0.593020865453798, "grad_norm": 0.7170704007148743, "learning_rate": 3.7500383439395153e-06, "loss": 0.6522, "step": 11539 }, { "epoch": 0.5930722581971426, "grad_norm": 1.1628870964050293, "learning_rate": 3.749232533075273e-06, "loss": 0.7285, "step": 11540 }, { "epoch": 0.5931236509404872, "grad_norm": 1.0524749755859375, "learning_rate": 3.748426756864637e-06, "loss": 0.7032, "step": 11541 }, { "epoch": 0.5931750436838319, "grad_norm": 1.0761662721633911, "learning_rate": 3.7476210153299295e-06, "loss": 0.7632, "step": 11542 }, { "epoch": 0.5932264364271764, "grad_norm": 1.0588785409927368, "learning_rate": 3.7468153084934783e-06, "loss": 0.7053, "step": 11543 }, { "epoch": 0.5932778291705211, "grad_norm": 1.0721025466918945, "learning_rate": 3.746009636377601e-06, "loss": 0.7278, "step": 11544 }, { "epoch": 0.5933292219138657, "grad_norm": 1.1676280498504639, "learning_rate": 3.7452039990046214e-06, "loss": 0.6952, "step": 11545 }, { "epoch": 0.5933806146572104, "grad_norm": 1.1545284986495972, "learning_rate": 3.7443983963968622e-06, "loss": 0.7053, "step": 11546 }, { "epoch": 0.593432007400555, "grad_norm": 1.070684790611267, "learning_rate": 3.74359282857664e-06, "loss": 0.7544, "step": 11547 }, { "epoch": 0.5934834001438997, "grad_norm": 1.0640026330947876, "learning_rate": 3.7427872955662764e-06, "loss": 0.7137, "step": 11548 }, { "epoch": 0.5935347928872443, "grad_norm": 1.206650972366333, "learning_rate": 3.7419817973880904e-06, "loss": 0.785, "step": 11549 }, { "epoch": 0.593586185630589, "grad_norm": 1.0431939363479614, "learning_rate": 3.741176334064396e-06, "loss": 0.7568, "step": 11550 }, { "epoch": 0.5936375783739336, "grad_norm": 1.1005574464797974, "learning_rate": 3.74037090561751e-06, "loss": 0.6928, "step": 11551 }, { "epoch": 0.5936889711172783, "grad_norm": 1.129004716873169, "learning_rate": 3.7395655120697493e-06, "loss": 0.7232, "step": 11552 }, { "epoch": 0.5937403638606229, "grad_norm": 1.1927634477615356, "learning_rate": 3.7387601534434254e-06, "loss": 0.7031, "step": 11553 }, { "epoch": 0.5937917566039675, "grad_norm": 1.035874605178833, "learning_rate": 3.7379548297608524e-06, "loss": 0.712, "step": 11554 }, { "epoch": 0.5938431493473122, "grad_norm": 1.0102505683898926, "learning_rate": 3.737149541044345e-06, "loss": 0.6867, "step": 11555 }, { "epoch": 0.5938945420906568, "grad_norm": 1.0925077199935913, "learning_rate": 3.7363442873162105e-06, "loss": 0.7103, "step": 11556 }, { "epoch": 0.5939459348340015, "grad_norm": 1.0355783700942993, "learning_rate": 3.7355390685987607e-06, "loss": 0.7611, "step": 11557 }, { "epoch": 0.593997327577346, "grad_norm": 1.0587716102600098, "learning_rate": 3.7347338849143065e-06, "loss": 0.6764, "step": 11558 }, { "epoch": 0.5940487203206907, "grad_norm": 0.7768186926841736, "learning_rate": 3.733928736285155e-06, "loss": 0.6648, "step": 11559 }, { "epoch": 0.5941001130640353, "grad_norm": 1.1297720670700073, "learning_rate": 3.7331236227336125e-06, "loss": 0.7545, "step": 11560 }, { "epoch": 0.59415150580738, "grad_norm": 0.7062411308288574, "learning_rate": 3.7323185442819863e-06, "loss": 0.6308, "step": 11561 }, { "epoch": 0.5942028985507246, "grad_norm": 0.7260512113571167, "learning_rate": 3.731513500952584e-06, "loss": 0.6289, "step": 11562 }, { "epoch": 0.5942542912940693, "grad_norm": 0.683921754360199, "learning_rate": 3.7307084927677058e-06, "loss": 0.7028, "step": 11563 }, { "epoch": 0.5943056840374139, "grad_norm": 0.7255199551582336, "learning_rate": 3.729903519749657e-06, "loss": 0.6375, "step": 11564 }, { "epoch": 0.5943570767807586, "grad_norm": 1.007773518562317, "learning_rate": 3.7290985819207424e-06, "loss": 0.7204, "step": 11565 }, { "epoch": 0.5944084695241032, "grad_norm": 1.0374869108200073, "learning_rate": 3.7282936793032594e-06, "loss": 0.7309, "step": 11566 }, { "epoch": 0.5944598622674478, "grad_norm": 1.0316556692123413, "learning_rate": 3.7274888119195107e-06, "loss": 0.7544, "step": 11567 }, { "epoch": 0.5945112550107925, "grad_norm": 1.0869574546813965, "learning_rate": 3.7266839797917965e-06, "loss": 0.7279, "step": 11568 }, { "epoch": 0.5945626477541371, "grad_norm": 1.0463942289352417, "learning_rate": 3.7258791829424134e-06, "loss": 0.7092, "step": 11569 }, { "epoch": 0.5946140404974818, "grad_norm": 1.1058204174041748, "learning_rate": 3.7250744213936594e-06, "loss": 0.7276, "step": 11570 }, { "epoch": 0.5946654332408264, "grad_norm": 0.7811251878738403, "learning_rate": 3.7242696951678353e-06, "loss": 0.6373, "step": 11571 }, { "epoch": 0.5947168259841711, "grad_norm": 1.1674333810806274, "learning_rate": 3.723465004287231e-06, "loss": 0.8053, "step": 11572 }, { "epoch": 0.5947682187275157, "grad_norm": 1.0823485851287842, "learning_rate": 3.722660348774143e-06, "loss": 0.7337, "step": 11573 }, { "epoch": 0.5948196114708603, "grad_norm": 1.0887356996536255, "learning_rate": 3.7218557286508663e-06, "loss": 0.6892, "step": 11574 }, { "epoch": 0.5948710042142049, "grad_norm": 1.0993797779083252, "learning_rate": 3.7210511439396934e-06, "loss": 0.6934, "step": 11575 }, { "epoch": 0.5949223969575496, "grad_norm": 1.061801791191101, "learning_rate": 3.7202465946629145e-06, "loss": 0.6734, "step": 11576 }, { "epoch": 0.5949737897008942, "grad_norm": 1.0608551502227783, "learning_rate": 3.719442080842821e-06, "loss": 0.6932, "step": 11577 }, { "epoch": 0.5950251824442389, "grad_norm": 1.026380181312561, "learning_rate": 3.7186376025017036e-06, "loss": 0.6585, "step": 11578 }, { "epoch": 0.5950765751875835, "grad_norm": 1.1202996969223022, "learning_rate": 3.71783315966185e-06, "loss": 0.6924, "step": 11579 }, { "epoch": 0.5951279679309281, "grad_norm": 0.9567104578018188, "learning_rate": 3.7170287523455474e-06, "loss": 0.6711, "step": 11580 }, { "epoch": 0.5951793606742728, "grad_norm": 0.7997528910636902, "learning_rate": 3.7162243805750863e-06, "loss": 0.6678, "step": 11581 }, { "epoch": 0.5952307534176174, "grad_norm": 1.0786402225494385, "learning_rate": 3.7154200443727473e-06, "loss": 0.7708, "step": 11582 }, { "epoch": 0.5952821461609621, "grad_norm": 1.1045929193496704, "learning_rate": 3.714615743760818e-06, "loss": 0.6729, "step": 11583 }, { "epoch": 0.5953335389043067, "grad_norm": 0.9946085810661316, "learning_rate": 3.713811478761584e-06, "loss": 0.7403, "step": 11584 }, { "epoch": 0.5953849316476514, "grad_norm": 1.2164396047592163, "learning_rate": 3.7130072493973247e-06, "loss": 0.7642, "step": 11585 }, { "epoch": 0.595436324390996, "grad_norm": 1.07233464717865, "learning_rate": 3.712203055690322e-06, "loss": 0.6574, "step": 11586 }, { "epoch": 0.5954877171343407, "grad_norm": 1.118683099746704, "learning_rate": 3.71139889766286e-06, "loss": 0.7201, "step": 11587 }, { "epoch": 0.5955391098776853, "grad_norm": 0.7063745260238647, "learning_rate": 3.7105947753372164e-06, "loss": 0.6554, "step": 11588 }, { "epoch": 0.5955905026210299, "grad_norm": 1.0506211519241333, "learning_rate": 3.7097906887356694e-06, "loss": 0.6734, "step": 11589 }, { "epoch": 0.5956418953643745, "grad_norm": 1.1040539741516113, "learning_rate": 3.7089866378804985e-06, "loss": 0.7552, "step": 11590 }, { "epoch": 0.5956932881077192, "grad_norm": 1.1247520446777344, "learning_rate": 3.7081826227939826e-06, "loss": 0.7786, "step": 11591 }, { "epoch": 0.5957446808510638, "grad_norm": 1.1174639463424683, "learning_rate": 3.707378643498393e-06, "loss": 0.7758, "step": 11592 }, { "epoch": 0.5957960735944084, "grad_norm": 1.0261585712432861, "learning_rate": 3.7065747000160067e-06, "loss": 0.693, "step": 11593 }, { "epoch": 0.5958474663377531, "grad_norm": 1.1189981698989868, "learning_rate": 3.7057707923690998e-06, "loss": 0.7434, "step": 11594 }, { "epoch": 0.5958988590810977, "grad_norm": 0.8146452903747559, "learning_rate": 3.704966920579941e-06, "loss": 0.6789, "step": 11595 }, { "epoch": 0.5959502518244424, "grad_norm": 1.0878275632858276, "learning_rate": 3.7041630846708047e-06, "loss": 0.6598, "step": 11596 }, { "epoch": 0.596001644567787, "grad_norm": 1.0307964086532593, "learning_rate": 3.7033592846639634e-06, "loss": 0.6947, "step": 11597 }, { "epoch": 0.5960530373111317, "grad_norm": 1.0266512632369995, "learning_rate": 3.7025555205816833e-06, "loss": 0.6963, "step": 11598 }, { "epoch": 0.5961044300544763, "grad_norm": 1.0802282094955444, "learning_rate": 3.7017517924462363e-06, "loss": 0.6882, "step": 11599 }, { "epoch": 0.596155822797821, "grad_norm": 0.7667773962020874, "learning_rate": 3.7009481002798914e-06, "loss": 0.6452, "step": 11600 }, { "epoch": 0.5962072155411656, "grad_norm": 1.1536296606063843, "learning_rate": 3.7001444441049116e-06, "loss": 0.6935, "step": 11601 }, { "epoch": 0.5962586082845103, "grad_norm": 0.7323923707008362, "learning_rate": 3.6993408239435645e-06, "loss": 0.6385, "step": 11602 }, { "epoch": 0.5963100010278549, "grad_norm": 1.0601736307144165, "learning_rate": 3.6985372398181185e-06, "loss": 0.6817, "step": 11603 }, { "epoch": 0.5963613937711995, "grad_norm": 1.1324458122253418, "learning_rate": 3.6977336917508334e-06, "loss": 0.6983, "step": 11604 }, { "epoch": 0.5964127865145441, "grad_norm": 0.690304696559906, "learning_rate": 3.6969301797639725e-06, "loss": 0.6299, "step": 11605 }, { "epoch": 0.5964641792578887, "grad_norm": 1.0298535823822021, "learning_rate": 3.6961267038797988e-06, "loss": 0.717, "step": 11606 }, { "epoch": 0.5965155720012334, "grad_norm": 0.8577505946159363, "learning_rate": 3.695323264120575e-06, "loss": 0.634, "step": 11607 }, { "epoch": 0.596566964744578, "grad_norm": 1.0696932077407837, "learning_rate": 3.6945198605085573e-06, "loss": 0.7, "step": 11608 }, { "epoch": 0.5966183574879227, "grad_norm": 1.0264880657196045, "learning_rate": 3.6937164930660096e-06, "loss": 0.7179, "step": 11609 }, { "epoch": 0.5966697502312673, "grad_norm": 1.0531730651855469, "learning_rate": 3.692913161815186e-06, "loss": 0.7303, "step": 11610 }, { "epoch": 0.596721142974612, "grad_norm": 1.0644382238388062, "learning_rate": 3.692109866778344e-06, "loss": 0.6674, "step": 11611 }, { "epoch": 0.5967725357179566, "grad_norm": 0.6842978000640869, "learning_rate": 3.6913066079777404e-06, "loss": 0.6799, "step": 11612 }, { "epoch": 0.5968239284613013, "grad_norm": 1.1380321979522705, "learning_rate": 3.6905033854356315e-06, "loss": 0.7257, "step": 11613 }, { "epoch": 0.5968753212046459, "grad_norm": 1.0504555702209473, "learning_rate": 3.689700199174268e-06, "loss": 0.6603, "step": 11614 }, { "epoch": 0.5969267139479906, "grad_norm": 1.145670771598816, "learning_rate": 3.688897049215905e-06, "loss": 0.7004, "step": 11615 }, { "epoch": 0.5969781066913352, "grad_norm": 1.1740987300872803, "learning_rate": 3.688093935582795e-06, "loss": 0.7634, "step": 11616 }, { "epoch": 0.5970294994346799, "grad_norm": 1.0503276586532593, "learning_rate": 3.6872908582971878e-06, "loss": 0.7693, "step": 11617 }, { "epoch": 0.5970808921780245, "grad_norm": 1.0400291681289673, "learning_rate": 3.6864878173813333e-06, "loss": 0.7464, "step": 11618 }, { "epoch": 0.597132284921369, "grad_norm": 1.0749807357788086, "learning_rate": 3.6856848128574816e-06, "loss": 0.7078, "step": 11619 }, { "epoch": 0.5971836776647137, "grad_norm": 1.1221987009048462, "learning_rate": 3.6848818447478783e-06, "loss": 0.6786, "step": 11620 }, { "epoch": 0.5972350704080583, "grad_norm": 1.0897870063781738, "learning_rate": 3.6840789130747728e-06, "loss": 0.7569, "step": 11621 }, { "epoch": 0.597286463151403, "grad_norm": 0.7247182130813599, "learning_rate": 3.68327601786041e-06, "loss": 0.6727, "step": 11622 }, { "epoch": 0.5973378558947476, "grad_norm": 1.014936923980713, "learning_rate": 3.6824731591270368e-06, "loss": 0.6759, "step": 11623 }, { "epoch": 0.5973892486380923, "grad_norm": 0.7325012683868408, "learning_rate": 3.681670336896893e-06, "loss": 0.6639, "step": 11624 }, { "epoch": 0.5974406413814369, "grad_norm": 1.0550287961959839, "learning_rate": 3.680867551192224e-06, "loss": 0.7376, "step": 11625 }, { "epoch": 0.5974920341247816, "grad_norm": 1.0291054248809814, "learning_rate": 3.680064802035273e-06, "loss": 0.7328, "step": 11626 }, { "epoch": 0.5975434268681262, "grad_norm": 1.0198391675949097, "learning_rate": 3.679262089448278e-06, "loss": 0.6988, "step": 11627 }, { "epoch": 0.5975948196114709, "grad_norm": 1.2681646347045898, "learning_rate": 3.678459413453479e-06, "loss": 0.7767, "step": 11628 }, { "epoch": 0.5976462123548155, "grad_norm": 1.1136642694473267, "learning_rate": 3.6776567740731185e-06, "loss": 0.7552, "step": 11629 }, { "epoch": 0.5976976050981602, "grad_norm": 1.073568344116211, "learning_rate": 3.676854171329429e-06, "loss": 0.752, "step": 11630 }, { "epoch": 0.5977489978415048, "grad_norm": 0.7971687316894531, "learning_rate": 3.6760516052446514e-06, "loss": 0.6518, "step": 11631 }, { "epoch": 0.5978003905848495, "grad_norm": 1.1023242473602295, "learning_rate": 3.6752490758410218e-06, "loss": 0.7159, "step": 11632 }, { "epoch": 0.5978517833281941, "grad_norm": 0.7493652701377869, "learning_rate": 3.674446583140772e-06, "loss": 0.6232, "step": 11633 }, { "epoch": 0.5979031760715386, "grad_norm": 1.0546517372131348, "learning_rate": 3.6736441271661366e-06, "loss": 0.6756, "step": 11634 }, { "epoch": 0.5979545688148833, "grad_norm": 0.663945198059082, "learning_rate": 3.6728417079393506e-06, "loss": 0.6618, "step": 11635 }, { "epoch": 0.5980059615582279, "grad_norm": 1.0837557315826416, "learning_rate": 3.6720393254826423e-06, "loss": 0.7142, "step": 11636 }, { "epoch": 0.5980573543015726, "grad_norm": 1.0990701913833618, "learning_rate": 3.6712369798182445e-06, "loss": 0.688, "step": 11637 }, { "epoch": 0.5981087470449172, "grad_norm": 0.9982719421386719, "learning_rate": 3.6704346709683863e-06, "loss": 0.7082, "step": 11638 }, { "epoch": 0.5981601397882619, "grad_norm": 1.1350024938583374, "learning_rate": 3.6696323989552973e-06, "loss": 0.6982, "step": 11639 }, { "epoch": 0.5982115325316065, "grad_norm": 1.0660784244537354, "learning_rate": 3.6688301638012036e-06, "loss": 0.7206, "step": 11640 }, { "epoch": 0.5982629252749512, "grad_norm": 1.0749998092651367, "learning_rate": 3.6680279655283325e-06, "loss": 0.7334, "step": 11641 }, { "epoch": 0.5983143180182958, "grad_norm": 1.0760926008224487, "learning_rate": 3.667225804158912e-06, "loss": 0.6986, "step": 11642 }, { "epoch": 0.5983657107616405, "grad_norm": 1.033233880996704, "learning_rate": 3.6664236797151624e-06, "loss": 0.7147, "step": 11643 }, { "epoch": 0.5984171035049851, "grad_norm": 1.0752875804901123, "learning_rate": 3.6656215922193096e-06, "loss": 0.7152, "step": 11644 }, { "epoch": 0.5984684962483298, "grad_norm": 1.0163453817367554, "learning_rate": 3.6648195416935774e-06, "loss": 0.6696, "step": 11645 }, { "epoch": 0.5985198889916744, "grad_norm": 1.0013858079910278, "learning_rate": 3.6640175281601833e-06, "loss": 0.6845, "step": 11646 }, { "epoch": 0.5985712817350191, "grad_norm": 0.7177518606185913, "learning_rate": 3.663215551641351e-06, "loss": 0.6888, "step": 11647 }, { "epoch": 0.5986226744783637, "grad_norm": 1.096608281135559, "learning_rate": 3.6624136121592995e-06, "loss": 0.768, "step": 11648 }, { "epoch": 0.5986740672217082, "grad_norm": 1.0215293169021606, "learning_rate": 3.6616117097362464e-06, "loss": 0.6327, "step": 11649 }, { "epoch": 0.5987254599650529, "grad_norm": 1.0305129289627075, "learning_rate": 3.6608098443944098e-06, "loss": 0.7101, "step": 11650 }, { "epoch": 0.5987768527083975, "grad_norm": 1.0206955671310425, "learning_rate": 3.6600080161560073e-06, "loss": 0.7401, "step": 11651 }, { "epoch": 0.5988282454517422, "grad_norm": 1.0805392265319824, "learning_rate": 3.6592062250432513e-06, "loss": 0.7816, "step": 11652 }, { "epoch": 0.5988796381950868, "grad_norm": 1.2910736799240112, "learning_rate": 3.658404471078357e-06, "loss": 0.7484, "step": 11653 }, { "epoch": 0.5989310309384315, "grad_norm": 1.0983330011367798, "learning_rate": 3.657602754283539e-06, "loss": 0.6854, "step": 11654 }, { "epoch": 0.5989824236817761, "grad_norm": 1.1698567867279053, "learning_rate": 3.6568010746810113e-06, "loss": 0.7388, "step": 11655 }, { "epoch": 0.5990338164251208, "grad_norm": 1.0521223545074463, "learning_rate": 3.6559994322929805e-06, "loss": 0.6431, "step": 11656 }, { "epoch": 0.5990852091684654, "grad_norm": 1.1070897579193115, "learning_rate": 3.6551978271416587e-06, "loss": 0.729, "step": 11657 }, { "epoch": 0.5991366019118101, "grad_norm": 1.0167614221572876, "learning_rate": 3.6543962592492566e-06, "loss": 0.7097, "step": 11658 }, { "epoch": 0.5991879946551547, "grad_norm": 0.8062676191329956, "learning_rate": 3.65359472863798e-06, "loss": 0.7016, "step": 11659 }, { "epoch": 0.5992393873984994, "grad_norm": 0.7227058410644531, "learning_rate": 3.6527932353300385e-06, "loss": 0.6897, "step": 11660 }, { "epoch": 0.599290780141844, "grad_norm": 1.0217188596725464, "learning_rate": 3.6519917793476366e-06, "loss": 0.7292, "step": 11661 }, { "epoch": 0.5993421728851887, "grad_norm": 0.752909243106842, "learning_rate": 3.651190360712978e-06, "loss": 0.6529, "step": 11662 }, { "epoch": 0.5993935656285333, "grad_norm": 1.0698862075805664, "learning_rate": 3.6503889794482695e-06, "loss": 0.7409, "step": 11663 }, { "epoch": 0.599444958371878, "grad_norm": 1.3373955488204956, "learning_rate": 3.6495876355757142e-06, "loss": 0.7123, "step": 11664 }, { "epoch": 0.5994963511152225, "grad_norm": 1.2184749841690063, "learning_rate": 3.6487863291175114e-06, "loss": 0.7078, "step": 11665 }, { "epoch": 0.5995477438585671, "grad_norm": 1.2269978523254395, "learning_rate": 3.6479850600958626e-06, "loss": 0.7708, "step": 11666 }, { "epoch": 0.5995991366019118, "grad_norm": 0.8566799163818359, "learning_rate": 3.6471838285329686e-06, "loss": 0.6976, "step": 11667 }, { "epoch": 0.5996505293452564, "grad_norm": 1.0815638303756714, "learning_rate": 3.64638263445103e-06, "loss": 0.697, "step": 11668 }, { "epoch": 0.5997019220886011, "grad_norm": 1.1381852626800537, "learning_rate": 3.6455814778722406e-06, "loss": 0.7504, "step": 11669 }, { "epoch": 0.5997533148319457, "grad_norm": 0.9927752017974854, "learning_rate": 3.6447803588187987e-06, "loss": 0.6789, "step": 11670 }, { "epoch": 0.5998047075752904, "grad_norm": 1.0346016883850098, "learning_rate": 3.643979277312901e-06, "loss": 0.6972, "step": 11671 }, { "epoch": 0.599856100318635, "grad_norm": 1.1680797338485718, "learning_rate": 3.643178233376742e-06, "loss": 0.7197, "step": 11672 }, { "epoch": 0.5999074930619797, "grad_norm": 0.6752480268478394, "learning_rate": 3.6423772270325135e-06, "loss": 0.6182, "step": 11673 }, { "epoch": 0.5999588858053243, "grad_norm": 1.0828133821487427, "learning_rate": 3.6415762583024124e-06, "loss": 0.7292, "step": 11674 }, { "epoch": 0.600010278548669, "grad_norm": 1.0699411630630493, "learning_rate": 3.6407753272086245e-06, "loss": 0.6896, "step": 11675 }, { "epoch": 0.6000616712920136, "grad_norm": 1.1065906286239624, "learning_rate": 3.6399744337733434e-06, "loss": 0.7443, "step": 11676 }, { "epoch": 0.6001130640353582, "grad_norm": 1.0475575923919678, "learning_rate": 3.639173578018761e-06, "loss": 0.7223, "step": 11677 }, { "epoch": 0.6001644567787029, "grad_norm": 0.7609853148460388, "learning_rate": 3.6383727599670598e-06, "loss": 0.6446, "step": 11678 }, { "epoch": 0.6002158495220475, "grad_norm": 1.1061346530914307, "learning_rate": 3.637571979640431e-06, "loss": 0.7572, "step": 11679 }, { "epoch": 0.6002672422653921, "grad_norm": 1.0751458406448364, "learning_rate": 3.6367712370610607e-06, "loss": 0.7741, "step": 11680 }, { "epoch": 0.6003186350087367, "grad_norm": 0.8085489869117737, "learning_rate": 3.6359705322511337e-06, "loss": 0.6636, "step": 11681 }, { "epoch": 0.6003700277520814, "grad_norm": 1.0430597066879272, "learning_rate": 3.635169865232833e-06, "loss": 0.6927, "step": 11682 }, { "epoch": 0.600421420495426, "grad_norm": 1.0076080560684204, "learning_rate": 3.634369236028344e-06, "loss": 0.7009, "step": 11683 }, { "epoch": 0.6004728132387707, "grad_norm": 1.0606179237365723, "learning_rate": 3.6335686446598497e-06, "loss": 0.7447, "step": 11684 }, { "epoch": 0.6005242059821153, "grad_norm": 0.7435125112533569, "learning_rate": 3.6327680911495265e-06, "loss": 0.678, "step": 11685 }, { "epoch": 0.60057559872546, "grad_norm": 1.2992113828659058, "learning_rate": 3.631967575519558e-06, "loss": 0.7244, "step": 11686 }, { "epoch": 0.6006269914688046, "grad_norm": 0.755803644657135, "learning_rate": 3.6311670977921245e-06, "loss": 0.6691, "step": 11687 }, { "epoch": 0.6006783842121493, "grad_norm": 1.100875735282898, "learning_rate": 3.6303666579893994e-06, "loss": 0.7124, "step": 11688 }, { "epoch": 0.6007297769554939, "grad_norm": 1.0880601406097412, "learning_rate": 3.629566256133562e-06, "loss": 0.753, "step": 11689 }, { "epoch": 0.6007811696988385, "grad_norm": 1.1492772102355957, "learning_rate": 3.62876589224679e-06, "loss": 0.7784, "step": 11690 }, { "epoch": 0.6008325624421832, "grad_norm": 1.1275568008422852, "learning_rate": 3.627965566351255e-06, "loss": 0.7312, "step": 11691 }, { "epoch": 0.6008839551855278, "grad_norm": 10.821277618408203, "learning_rate": 3.627165278469132e-06, "loss": 0.8398, "step": 11692 }, { "epoch": 0.6009353479288725, "grad_norm": 1.0418554544448853, "learning_rate": 3.626365028622596e-06, "loss": 0.7503, "step": 11693 }, { "epoch": 0.6009867406722171, "grad_norm": 1.113433837890625, "learning_rate": 3.625564816833814e-06, "loss": 0.7725, "step": 11694 }, { "epoch": 0.6010381334155617, "grad_norm": 0.708872377872467, "learning_rate": 3.6247646431249596e-06, "loss": 0.6314, "step": 11695 }, { "epoch": 0.6010895261589063, "grad_norm": 0.7126109004020691, "learning_rate": 3.623964507518203e-06, "loss": 0.6458, "step": 11696 }, { "epoch": 0.601140918902251, "grad_norm": 1.0847748517990112, "learning_rate": 3.6231644100357094e-06, "loss": 0.7663, "step": 11697 }, { "epoch": 0.6011923116455956, "grad_norm": 1.0719671249389648, "learning_rate": 3.6223643506996483e-06, "loss": 0.7239, "step": 11698 }, { "epoch": 0.6012437043889403, "grad_norm": 1.075773000717163, "learning_rate": 3.6215643295321856e-06, "loss": 0.7318, "step": 11699 }, { "epoch": 0.6012950971322849, "grad_norm": 1.1973028182983398, "learning_rate": 3.6207643465554886e-06, "loss": 0.7249, "step": 11700 }, { "epoch": 0.6013464898756296, "grad_norm": 1.0512226819992065, "learning_rate": 3.619964401791718e-06, "loss": 0.7533, "step": 11701 }, { "epoch": 0.6013978826189742, "grad_norm": 0.6967232823371887, "learning_rate": 3.619164495263039e-06, "loss": 0.6646, "step": 11702 }, { "epoch": 0.6014492753623188, "grad_norm": 1.0241248607635498, "learning_rate": 3.6183646269916157e-06, "loss": 0.6661, "step": 11703 }, { "epoch": 0.6015006681056635, "grad_norm": 1.0468987226486206, "learning_rate": 3.617564796999605e-06, "loss": 0.7527, "step": 11704 }, { "epoch": 0.6015520608490081, "grad_norm": 1.119964838027954, "learning_rate": 3.6167650053091686e-06, "loss": 0.7496, "step": 11705 }, { "epoch": 0.6016034535923528, "grad_norm": 1.0813889503479004, "learning_rate": 3.615965251942467e-06, "loss": 0.7477, "step": 11706 }, { "epoch": 0.6016548463356974, "grad_norm": 1.116546869277954, "learning_rate": 3.6151655369216553e-06, "loss": 0.7315, "step": 11707 }, { "epoch": 0.6017062390790421, "grad_norm": 0.7476264834403992, "learning_rate": 3.6143658602688918e-06, "loss": 0.6453, "step": 11708 }, { "epoch": 0.6017576318223867, "grad_norm": 0.758554995059967, "learning_rate": 3.613566222006333e-06, "loss": 0.6899, "step": 11709 }, { "epoch": 0.6018090245657313, "grad_norm": 1.0207343101501465, "learning_rate": 3.612766622156132e-06, "loss": 0.6966, "step": 11710 }, { "epoch": 0.6018604173090759, "grad_norm": 1.0654312372207642, "learning_rate": 3.611967060740444e-06, "loss": 0.6971, "step": 11711 }, { "epoch": 0.6019118100524206, "grad_norm": 1.036113977432251, "learning_rate": 3.61116753778142e-06, "loss": 0.6866, "step": 11712 }, { "epoch": 0.6019632027957652, "grad_norm": 1.0024757385253906, "learning_rate": 3.610368053301212e-06, "loss": 0.7112, "step": 11713 }, { "epoch": 0.6020145955391099, "grad_norm": 1.0704784393310547, "learning_rate": 3.6095686073219706e-06, "loss": 0.7163, "step": 11714 }, { "epoch": 0.6020659882824545, "grad_norm": 0.7803016901016235, "learning_rate": 3.6087691998658445e-06, "loss": 0.6724, "step": 11715 }, { "epoch": 0.6021173810257991, "grad_norm": 1.097983479499817, "learning_rate": 3.607969830954985e-06, "loss": 0.7446, "step": 11716 }, { "epoch": 0.6021687737691438, "grad_norm": 0.7536942958831787, "learning_rate": 3.607170500611534e-06, "loss": 0.657, "step": 11717 }, { "epoch": 0.6022201665124884, "grad_norm": 1.1865179538726807, "learning_rate": 3.606371208857642e-06, "loss": 0.7218, "step": 11718 }, { "epoch": 0.6022715592558331, "grad_norm": 1.0578848123550415, "learning_rate": 3.605571955715454e-06, "loss": 0.7401, "step": 11719 }, { "epoch": 0.6023229519991777, "grad_norm": 1.1631569862365723, "learning_rate": 3.604772741207111e-06, "loss": 0.7259, "step": 11720 }, { "epoch": 0.6023743447425224, "grad_norm": 1.0604852437973022, "learning_rate": 3.603973565354757e-06, "loss": 0.7317, "step": 11721 }, { "epoch": 0.602425737485867, "grad_norm": 1.0480883121490479, "learning_rate": 3.6031744281805354e-06, "loss": 0.7332, "step": 11722 }, { "epoch": 0.6024771302292117, "grad_norm": 0.7200015783309937, "learning_rate": 3.6023753297065853e-06, "loss": 0.6605, "step": 11723 }, { "epoch": 0.6025285229725563, "grad_norm": 1.068132758140564, "learning_rate": 3.601576269955047e-06, "loss": 0.7307, "step": 11724 }, { "epoch": 0.6025799157159009, "grad_norm": 1.0491328239440918, "learning_rate": 3.600777248948062e-06, "loss": 0.6706, "step": 11725 }, { "epoch": 0.6026313084592455, "grad_norm": 1.086289882659912, "learning_rate": 3.599978266707762e-06, "loss": 0.7746, "step": 11726 }, { "epoch": 0.6026827012025902, "grad_norm": 1.0240601301193237, "learning_rate": 3.599179323256288e-06, "loss": 0.7034, "step": 11727 }, { "epoch": 0.6027340939459348, "grad_norm": 0.831072211265564, "learning_rate": 3.598380418615775e-06, "loss": 0.6563, "step": 11728 }, { "epoch": 0.6027854866892794, "grad_norm": 0.9224907755851746, "learning_rate": 3.5975815528083545e-06, "loss": 0.6713, "step": 11729 }, { "epoch": 0.6028368794326241, "grad_norm": 1.0543758869171143, "learning_rate": 3.5967827258561626e-06, "loss": 0.7365, "step": 11730 }, { "epoch": 0.6028882721759687, "grad_norm": 1.0721358060836792, "learning_rate": 3.59598393778133e-06, "loss": 0.7286, "step": 11731 }, { "epoch": 0.6029396649193134, "grad_norm": 1.0560622215270996, "learning_rate": 3.5951851886059898e-06, "loss": 0.721, "step": 11732 }, { "epoch": 0.602991057662658, "grad_norm": 1.0434192419052124, "learning_rate": 3.5943864783522693e-06, "loss": 0.6894, "step": 11733 }, { "epoch": 0.6030424504060027, "grad_norm": 0.717589795589447, "learning_rate": 3.5935878070422993e-06, "loss": 0.7028, "step": 11734 }, { "epoch": 0.6030938431493473, "grad_norm": 1.0498300790786743, "learning_rate": 3.592789174698209e-06, "loss": 0.759, "step": 11735 }, { "epoch": 0.603145235892692, "grad_norm": 1.0579242706298828, "learning_rate": 3.5919905813421214e-06, "loss": 0.6718, "step": 11736 }, { "epoch": 0.6031966286360366, "grad_norm": 1.1115270853042603, "learning_rate": 3.5911920269961642e-06, "loss": 0.7154, "step": 11737 }, { "epoch": 0.6032480213793813, "grad_norm": 1.057466983795166, "learning_rate": 3.590393511682464e-06, "loss": 0.7389, "step": 11738 }, { "epoch": 0.6032994141227259, "grad_norm": 1.1278526782989502, "learning_rate": 3.5895950354231414e-06, "loss": 0.7101, "step": 11739 }, { "epoch": 0.6033508068660706, "grad_norm": 1.0959759950637817, "learning_rate": 3.588796598240319e-06, "loss": 0.7177, "step": 11740 }, { "epoch": 0.6034021996094151, "grad_norm": 0.7061886191368103, "learning_rate": 3.5879982001561205e-06, "loss": 0.701, "step": 11741 }, { "epoch": 0.6034535923527597, "grad_norm": 1.0735312700271606, "learning_rate": 3.5871998411926635e-06, "loss": 0.7551, "step": 11742 }, { "epoch": 0.6035049850961044, "grad_norm": 1.0347939729690552, "learning_rate": 3.5864015213720694e-06, "loss": 0.7363, "step": 11743 }, { "epoch": 0.603556377839449, "grad_norm": 1.0885738134384155, "learning_rate": 3.5856032407164572e-06, "loss": 0.6831, "step": 11744 }, { "epoch": 0.6036077705827937, "grad_norm": 1.2064160108566284, "learning_rate": 3.58480499924794e-06, "loss": 0.6967, "step": 11745 }, { "epoch": 0.6036591633261383, "grad_norm": 1.189035415649414, "learning_rate": 3.584006796988636e-06, "loss": 0.7264, "step": 11746 }, { "epoch": 0.603710556069483, "grad_norm": 0.7170475125312805, "learning_rate": 3.583208633960661e-06, "loss": 0.628, "step": 11747 }, { "epoch": 0.6037619488128276, "grad_norm": 1.103580355644226, "learning_rate": 3.5824105101861296e-06, "loss": 0.7175, "step": 11748 }, { "epoch": 0.6038133415561723, "grad_norm": 0.9946369528770447, "learning_rate": 3.581612425687151e-06, "loss": 0.7539, "step": 11749 }, { "epoch": 0.6038647342995169, "grad_norm": 1.0530636310577393, "learning_rate": 3.5808143804858388e-06, "loss": 0.7205, "step": 11750 }, { "epoch": 0.6039161270428616, "grad_norm": 1.059380054473877, "learning_rate": 3.5800163746043037e-06, "loss": 0.7356, "step": 11751 }, { "epoch": 0.6039675197862062, "grad_norm": 1.0513274669647217, "learning_rate": 3.5792184080646547e-06, "loss": 0.6743, "step": 11752 }, { "epoch": 0.6040189125295509, "grad_norm": 1.059709906578064, "learning_rate": 3.578420480889e-06, "loss": 0.7441, "step": 11753 }, { "epoch": 0.6040703052728955, "grad_norm": 1.5214269161224365, "learning_rate": 3.57762259309945e-06, "loss": 0.6808, "step": 11754 }, { "epoch": 0.6041216980162402, "grad_norm": 0.7365110516548157, "learning_rate": 3.576824744718106e-06, "loss": 0.6476, "step": 11755 }, { "epoch": 0.6041730907595847, "grad_norm": 0.7900581955909729, "learning_rate": 3.576026935767074e-06, "loss": 0.6679, "step": 11756 }, { "epoch": 0.6042244835029293, "grad_norm": 0.8507570028305054, "learning_rate": 3.5752291662684622e-06, "loss": 0.6142, "step": 11757 }, { "epoch": 0.604275876246274, "grad_norm": 1.0490598678588867, "learning_rate": 3.5744314362443687e-06, "loss": 0.677, "step": 11758 }, { "epoch": 0.6043272689896186, "grad_norm": 1.072921633720398, "learning_rate": 3.573633745716897e-06, "loss": 0.7443, "step": 11759 }, { "epoch": 0.6043786617329633, "grad_norm": 1.035670518875122, "learning_rate": 3.5728360947081485e-06, "loss": 0.7021, "step": 11760 }, { "epoch": 0.6044300544763079, "grad_norm": 1.1715338230133057, "learning_rate": 3.5720384832402217e-06, "loss": 0.7311, "step": 11761 }, { "epoch": 0.6044814472196526, "grad_norm": 1.0863670110702515, "learning_rate": 3.5712409113352163e-06, "loss": 0.6934, "step": 11762 }, { "epoch": 0.6045328399629972, "grad_norm": 1.0841480493545532, "learning_rate": 3.570443379015229e-06, "loss": 0.7053, "step": 11763 }, { "epoch": 0.6045842327063419, "grad_norm": 0.7957704663276672, "learning_rate": 3.5696458863023563e-06, "loss": 0.6817, "step": 11764 }, { "epoch": 0.6046356254496865, "grad_norm": 1.0549801588058472, "learning_rate": 3.5688484332186923e-06, "loss": 0.682, "step": 11765 }, { "epoch": 0.6046870181930312, "grad_norm": 1.0382803678512573, "learning_rate": 3.5680510197863325e-06, "loss": 0.7133, "step": 11766 }, { "epoch": 0.6047384109363758, "grad_norm": 1.0233252048492432, "learning_rate": 3.567253646027372e-06, "loss": 0.731, "step": 11767 }, { "epoch": 0.6047898036797205, "grad_norm": 1.0693570375442505, "learning_rate": 3.5664563119638986e-06, "loss": 0.7064, "step": 11768 }, { "epoch": 0.6048411964230651, "grad_norm": 1.039784550666809, "learning_rate": 3.565659017618004e-06, "loss": 0.7181, "step": 11769 }, { "epoch": 0.6048925891664098, "grad_norm": 1.1097475290298462, "learning_rate": 3.564861763011781e-06, "loss": 0.6816, "step": 11770 }, { "epoch": 0.6049439819097543, "grad_norm": 1.183729648590088, "learning_rate": 3.564064548167314e-06, "loss": 0.737, "step": 11771 }, { "epoch": 0.6049953746530989, "grad_norm": 1.2708473205566406, "learning_rate": 3.5632673731066935e-06, "loss": 0.7365, "step": 11772 }, { "epoch": 0.6050467673964436, "grad_norm": 1.0473058223724365, "learning_rate": 3.5624702378520047e-06, "loss": 0.7275, "step": 11773 }, { "epoch": 0.6050981601397882, "grad_norm": 1.0184966325759888, "learning_rate": 3.5616731424253337e-06, "loss": 0.753, "step": 11774 }, { "epoch": 0.6051495528831329, "grad_norm": 1.0237458944320679, "learning_rate": 3.5608760868487634e-06, "loss": 0.691, "step": 11775 }, { "epoch": 0.6052009456264775, "grad_norm": 1.064508318901062, "learning_rate": 3.5600790711443776e-06, "loss": 0.6798, "step": 11776 }, { "epoch": 0.6052523383698222, "grad_norm": 1.0603259801864624, "learning_rate": 3.559282095334261e-06, "loss": 0.7544, "step": 11777 }, { "epoch": 0.6053037311131668, "grad_norm": 1.0520302057266235, "learning_rate": 3.55848515944049e-06, "loss": 0.7453, "step": 11778 }, { "epoch": 0.6053551238565115, "grad_norm": 1.138112187385559, "learning_rate": 3.5576882634851467e-06, "loss": 0.7626, "step": 11779 }, { "epoch": 0.6054065165998561, "grad_norm": 1.0735533237457275, "learning_rate": 3.5568914074903114e-06, "loss": 0.6623, "step": 11780 }, { "epoch": 0.6054579093432008, "grad_norm": 1.0294348001480103, "learning_rate": 3.5560945914780575e-06, "loss": 0.7908, "step": 11781 }, { "epoch": 0.6055093020865454, "grad_norm": 1.0777082443237305, "learning_rate": 3.5552978154704644e-06, "loss": 0.7921, "step": 11782 }, { "epoch": 0.6055606948298901, "grad_norm": 0.7403438091278076, "learning_rate": 3.5545010794896073e-06, "loss": 0.6818, "step": 11783 }, { "epoch": 0.6056120875732347, "grad_norm": 1.1131402254104614, "learning_rate": 3.5537043835575597e-06, "loss": 0.67, "step": 11784 }, { "epoch": 0.6056634803165794, "grad_norm": 0.7971148490905762, "learning_rate": 3.5529077276963946e-06, "loss": 0.6476, "step": 11785 }, { "epoch": 0.6057148730599239, "grad_norm": 1.073693037033081, "learning_rate": 3.552111111928187e-06, "loss": 0.711, "step": 11786 }, { "epoch": 0.6057662658032685, "grad_norm": 1.1514228582382202, "learning_rate": 3.5513145362750036e-06, "loss": 0.753, "step": 11787 }, { "epoch": 0.6058176585466132, "grad_norm": 1.116470456123352, "learning_rate": 3.5505180007589146e-06, "loss": 0.6996, "step": 11788 }, { "epoch": 0.6058690512899578, "grad_norm": 0.7149177193641663, "learning_rate": 3.549721505401994e-06, "loss": 0.6794, "step": 11789 }, { "epoch": 0.6059204440333025, "grad_norm": 1.0795890092849731, "learning_rate": 3.5489250502263023e-06, "loss": 0.6836, "step": 11790 }, { "epoch": 0.6059718367766471, "grad_norm": 1.0409691333770752, "learning_rate": 3.5481286352539097e-06, "loss": 0.6564, "step": 11791 }, { "epoch": 0.6060232295199918, "grad_norm": 1.0496233701705933, "learning_rate": 3.5473322605068806e-06, "loss": 0.6415, "step": 11792 }, { "epoch": 0.6060746222633364, "grad_norm": 0.6695075035095215, "learning_rate": 3.5465359260072814e-06, "loss": 0.6733, "step": 11793 }, { "epoch": 0.6061260150066811, "grad_norm": 1.0904182195663452, "learning_rate": 3.545739631777172e-06, "loss": 0.7471, "step": 11794 }, { "epoch": 0.6061774077500257, "grad_norm": 1.098577618598938, "learning_rate": 3.5449433778386167e-06, "loss": 0.7332, "step": 11795 }, { "epoch": 0.6062288004933704, "grad_norm": 1.1283539533615112, "learning_rate": 3.5441471642136773e-06, "loss": 0.7676, "step": 11796 }, { "epoch": 0.606280193236715, "grad_norm": 1.0964044332504272, "learning_rate": 3.54335099092441e-06, "loss": 0.7322, "step": 11797 }, { "epoch": 0.6063315859800597, "grad_norm": 1.1049939393997192, "learning_rate": 3.5425548579928757e-06, "loss": 0.7244, "step": 11798 }, { "epoch": 0.6063829787234043, "grad_norm": 1.0632071495056152, "learning_rate": 3.5417587654411334e-06, "loss": 0.6854, "step": 11799 }, { "epoch": 0.606434371466749, "grad_norm": 0.7904536128044128, "learning_rate": 3.540962713291237e-06, "loss": 0.6806, "step": 11800 }, { "epoch": 0.6064857642100935, "grad_norm": 1.1116286516189575, "learning_rate": 3.5401667015652414e-06, "loss": 0.7135, "step": 11801 }, { "epoch": 0.6065371569534381, "grad_norm": 1.1878255605697632, "learning_rate": 3.539370730285204e-06, "loss": 0.7358, "step": 11802 }, { "epoch": 0.6065885496967828, "grad_norm": 1.1322591304779053, "learning_rate": 3.5385747994731747e-06, "loss": 0.7162, "step": 11803 }, { "epoch": 0.6066399424401274, "grad_norm": 1.041743516921997, "learning_rate": 3.537778909151206e-06, "loss": 0.7333, "step": 11804 }, { "epoch": 0.6066913351834721, "grad_norm": 1.0568022727966309, "learning_rate": 3.5369830593413524e-06, "loss": 0.6746, "step": 11805 }, { "epoch": 0.6067427279268167, "grad_norm": 1.1037957668304443, "learning_rate": 3.5361872500656584e-06, "loss": 0.6883, "step": 11806 }, { "epoch": 0.6067941206701614, "grad_norm": 1.118453860282898, "learning_rate": 3.5353914813461756e-06, "loss": 0.7183, "step": 11807 }, { "epoch": 0.606845513413506, "grad_norm": 1.151315689086914, "learning_rate": 3.5345957532049495e-06, "loss": 0.7435, "step": 11808 }, { "epoch": 0.6068969061568507, "grad_norm": 1.054097056388855, "learning_rate": 3.5338000656640303e-06, "loss": 0.6719, "step": 11809 }, { "epoch": 0.6069482989001953, "grad_norm": 1.0745588541030884, "learning_rate": 3.5330044187454593e-06, "loss": 0.7456, "step": 11810 }, { "epoch": 0.60699969164354, "grad_norm": 1.0319892168045044, "learning_rate": 3.532208812471281e-06, "loss": 0.7353, "step": 11811 }, { "epoch": 0.6070510843868846, "grad_norm": 1.139896273612976, "learning_rate": 3.531413246863541e-06, "loss": 0.7729, "step": 11812 }, { "epoch": 0.6071024771302292, "grad_norm": 1.1132032871246338, "learning_rate": 3.530617721944279e-06, "loss": 0.6698, "step": 11813 }, { "epoch": 0.6071538698735739, "grad_norm": 1.096381425857544, "learning_rate": 3.529822237735535e-06, "loss": 0.7583, "step": 11814 }, { "epoch": 0.6072052626169185, "grad_norm": 1.1011208295822144, "learning_rate": 3.5290267942593513e-06, "loss": 0.7839, "step": 11815 }, { "epoch": 0.6072566553602631, "grad_norm": 0.7371395826339722, "learning_rate": 3.528231391537763e-06, "loss": 0.6777, "step": 11816 }, { "epoch": 0.6073080481036077, "grad_norm": 1.111451268196106, "learning_rate": 3.52743602959281e-06, "loss": 0.7193, "step": 11817 }, { "epoch": 0.6073594408469524, "grad_norm": 1.1175260543823242, "learning_rate": 3.526640708446529e-06, "loss": 0.6724, "step": 11818 }, { "epoch": 0.607410833590297, "grad_norm": 1.0813630819320679, "learning_rate": 3.525845428120952e-06, "loss": 0.7462, "step": 11819 }, { "epoch": 0.6074622263336417, "grad_norm": 1.0229310989379883, "learning_rate": 3.525050188638115e-06, "loss": 0.6461, "step": 11820 }, { "epoch": 0.6075136190769863, "grad_norm": 1.0155729055404663, "learning_rate": 3.524254990020053e-06, "loss": 0.7088, "step": 11821 }, { "epoch": 0.607565011820331, "grad_norm": 1.0471463203430176, "learning_rate": 3.523459832288792e-06, "loss": 0.701, "step": 11822 }, { "epoch": 0.6076164045636756, "grad_norm": 1.0348927974700928, "learning_rate": 3.5226647154663664e-06, "loss": 0.6631, "step": 11823 }, { "epoch": 0.6076677973070203, "grad_norm": 1.0795766115188599, "learning_rate": 3.5218696395748044e-06, "loss": 0.6771, "step": 11824 }, { "epoch": 0.6077191900503649, "grad_norm": 1.0964086055755615, "learning_rate": 3.5210746046361364e-06, "loss": 0.7025, "step": 11825 }, { "epoch": 0.6077705827937095, "grad_norm": 1.0548222064971924, "learning_rate": 3.520279610672387e-06, "loss": 0.7316, "step": 11826 }, { "epoch": 0.6078219755370542, "grad_norm": 1.1473653316497803, "learning_rate": 3.5194846577055823e-06, "loss": 0.749, "step": 11827 }, { "epoch": 0.6078733682803988, "grad_norm": 1.0289922952651978, "learning_rate": 3.518689745757751e-06, "loss": 0.6661, "step": 11828 }, { "epoch": 0.6079247610237435, "grad_norm": 1.1346315145492554, "learning_rate": 3.517894874850911e-06, "loss": 0.7115, "step": 11829 }, { "epoch": 0.6079761537670881, "grad_norm": 1.0271775722503662, "learning_rate": 3.517100045007088e-06, "loss": 0.6493, "step": 11830 }, { "epoch": 0.6080275465104328, "grad_norm": 1.0954210758209229, "learning_rate": 3.5163052562483057e-06, "loss": 0.6552, "step": 11831 }, { "epoch": 0.6080789392537773, "grad_norm": 1.1188998222351074, "learning_rate": 3.5155105085965798e-06, "loss": 0.7238, "step": 11832 }, { "epoch": 0.608130331997122, "grad_norm": 0.8290671110153198, "learning_rate": 3.5147158020739315e-06, "loss": 0.6309, "step": 11833 }, { "epoch": 0.6081817247404666, "grad_norm": 1.0628269910812378, "learning_rate": 3.5139211367023796e-06, "loss": 0.7209, "step": 11834 }, { "epoch": 0.6082331174838113, "grad_norm": 0.6959296464920044, "learning_rate": 3.513126512503939e-06, "loss": 0.6147, "step": 11835 }, { "epoch": 0.6082845102271559, "grad_norm": 1.1849548816680908, "learning_rate": 3.5123319295006277e-06, "loss": 0.7394, "step": 11836 }, { "epoch": 0.6083359029705006, "grad_norm": 1.1849671602249146, "learning_rate": 3.511537387714461e-06, "loss": 0.6911, "step": 11837 }, { "epoch": 0.6083872957138452, "grad_norm": 1.048609733581543, "learning_rate": 3.5107428871674486e-06, "loss": 0.7603, "step": 11838 }, { "epoch": 0.6084386884571898, "grad_norm": 1.0889729261398315, "learning_rate": 3.5099484278816047e-06, "loss": 0.7244, "step": 11839 }, { "epoch": 0.6084900812005345, "grad_norm": 1.1150254011154175, "learning_rate": 3.5091540098789412e-06, "loss": 0.717, "step": 11840 }, { "epoch": 0.6085414739438791, "grad_norm": 1.092042326927185, "learning_rate": 3.5083596331814693e-06, "loss": 0.6727, "step": 11841 }, { "epoch": 0.6085928666872238, "grad_norm": 1.0360329151153564, "learning_rate": 3.5075652978111953e-06, "loss": 0.7372, "step": 11842 }, { "epoch": 0.6086442594305684, "grad_norm": 1.1318461894989014, "learning_rate": 3.506771003790127e-06, "loss": 0.7393, "step": 11843 }, { "epoch": 0.6086956521739131, "grad_norm": 1.1159861087799072, "learning_rate": 3.5059767511402738e-06, "loss": 0.6954, "step": 11844 }, { "epoch": 0.6087470449172577, "grad_norm": 1.0765126943588257, "learning_rate": 3.505182539883638e-06, "loss": 0.7222, "step": 11845 }, { "epoch": 0.6087984376606024, "grad_norm": 0.9926835298538208, "learning_rate": 3.504388370042225e-06, "loss": 0.7, "step": 11846 }, { "epoch": 0.6088498304039469, "grad_norm": 0.8482490181922913, "learning_rate": 3.5035942416380407e-06, "loss": 0.6781, "step": 11847 }, { "epoch": 0.6089012231472916, "grad_norm": 0.7162907719612122, "learning_rate": 3.5028001546930822e-06, "loss": 0.6442, "step": 11848 }, { "epoch": 0.6089526158906362, "grad_norm": 1.0388542413711548, "learning_rate": 3.5020061092293534e-06, "loss": 0.7451, "step": 11849 }, { "epoch": 0.6090040086339809, "grad_norm": 1.0878304243087769, "learning_rate": 3.501212105268855e-06, "loss": 0.7127, "step": 11850 }, { "epoch": 0.6090554013773255, "grad_norm": 0.9801203608512878, "learning_rate": 3.5004181428335816e-06, "loss": 0.7169, "step": 11851 }, { "epoch": 0.6091067941206701, "grad_norm": 1.1127053499221802, "learning_rate": 3.499624221945534e-06, "loss": 0.6478, "step": 11852 }, { "epoch": 0.6091581868640148, "grad_norm": 0.7497096061706543, "learning_rate": 3.498830342626708e-06, "loss": 0.7188, "step": 11853 }, { "epoch": 0.6092095796073594, "grad_norm": 1.3945552110671997, "learning_rate": 3.498036504899097e-06, "loss": 0.729, "step": 11854 }, { "epoch": 0.6092609723507041, "grad_norm": 1.056204080581665, "learning_rate": 3.4972427087846973e-06, "loss": 0.6881, "step": 11855 }, { "epoch": 0.6093123650940487, "grad_norm": 0.8180471062660217, "learning_rate": 3.496448954305499e-06, "loss": 0.6779, "step": 11856 }, { "epoch": 0.6093637578373934, "grad_norm": 0.7603835463523865, "learning_rate": 3.4956552414834987e-06, "loss": 0.5966, "step": 11857 }, { "epoch": 0.609415150580738, "grad_norm": 0.7971979379653931, "learning_rate": 3.4948615703406812e-06, "loss": 0.6861, "step": 11858 }, { "epoch": 0.6094665433240827, "grad_norm": 1.0230458974838257, "learning_rate": 3.4940679408990385e-06, "loss": 0.7339, "step": 11859 }, { "epoch": 0.6095179360674273, "grad_norm": 1.0044119358062744, "learning_rate": 3.49327435318056e-06, "loss": 0.7153, "step": 11860 }, { "epoch": 0.609569328810772, "grad_norm": 1.0402259826660156, "learning_rate": 3.4924808072072295e-06, "loss": 0.7348, "step": 11861 }, { "epoch": 0.6096207215541165, "grad_norm": 1.5505672693252563, "learning_rate": 3.4916873030010344e-06, "loss": 0.7458, "step": 11862 }, { "epoch": 0.6096721142974612, "grad_norm": 1.008429765701294, "learning_rate": 3.4908938405839615e-06, "loss": 0.7018, "step": 11863 }, { "epoch": 0.6097235070408058, "grad_norm": 1.075774073600769, "learning_rate": 3.490100419977992e-06, "loss": 0.6922, "step": 11864 }, { "epoch": 0.6097748997841504, "grad_norm": 1.0135674476623535, "learning_rate": 3.4893070412051076e-06, "loss": 0.7143, "step": 11865 }, { "epoch": 0.6098262925274951, "grad_norm": 1.0693053007125854, "learning_rate": 3.488513704287293e-06, "loss": 0.7557, "step": 11866 }, { "epoch": 0.6098776852708397, "grad_norm": 1.1149888038635254, "learning_rate": 3.4877204092465238e-06, "loss": 0.6834, "step": 11867 }, { "epoch": 0.6099290780141844, "grad_norm": 0.7114417552947998, "learning_rate": 3.4869271561047816e-06, "loss": 0.621, "step": 11868 }, { "epoch": 0.609980470757529, "grad_norm": 1.1114683151245117, "learning_rate": 3.4861339448840464e-06, "loss": 0.6935, "step": 11869 }, { "epoch": 0.6100318635008737, "grad_norm": 0.7656902074813843, "learning_rate": 3.4853407756062898e-06, "loss": 0.6617, "step": 11870 }, { "epoch": 0.6100832562442183, "grad_norm": 1.194212555885315, "learning_rate": 3.484547648293489e-06, "loss": 0.6801, "step": 11871 }, { "epoch": 0.610134648987563, "grad_norm": 1.124890923500061, "learning_rate": 3.4837545629676205e-06, "loss": 0.7283, "step": 11872 }, { "epoch": 0.6101860417309076, "grad_norm": 1.166640043258667, "learning_rate": 3.482961519650657e-06, "loss": 0.7608, "step": 11873 }, { "epoch": 0.6102374344742523, "grad_norm": 1.0783902406692505, "learning_rate": 3.482168518364567e-06, "loss": 0.7195, "step": 11874 }, { "epoch": 0.6102888272175969, "grad_norm": 1.0856562852859497, "learning_rate": 3.481375559131324e-06, "loss": 0.6978, "step": 11875 }, { "epoch": 0.6103402199609416, "grad_norm": 1.1609246730804443, "learning_rate": 3.4805826419728984e-06, "loss": 0.7283, "step": 11876 }, { "epoch": 0.6103916127042861, "grad_norm": 0.6993660926818848, "learning_rate": 3.4797897669112567e-06, "loss": 0.6119, "step": 11877 }, { "epoch": 0.6104430054476307, "grad_norm": 1.0403070449829102, "learning_rate": 3.478996933968366e-06, "loss": 0.6947, "step": 11878 }, { "epoch": 0.6104943981909754, "grad_norm": 1.0169188976287842, "learning_rate": 3.4782041431661966e-06, "loss": 0.7209, "step": 11879 }, { "epoch": 0.61054579093432, "grad_norm": 0.7749274373054504, "learning_rate": 3.477411394526708e-06, "loss": 0.638, "step": 11880 }, { "epoch": 0.6105971836776647, "grad_norm": 1.0414111614227295, "learning_rate": 3.476618688071866e-06, "loss": 0.7039, "step": 11881 }, { "epoch": 0.6106485764210093, "grad_norm": 1.0281057357788086, "learning_rate": 3.4758260238236357e-06, "loss": 0.7346, "step": 11882 }, { "epoch": 0.610699969164354, "grad_norm": 1.0106642246246338, "learning_rate": 3.4750334018039746e-06, "loss": 0.727, "step": 11883 }, { "epoch": 0.6107513619076986, "grad_norm": 1.0776649713516235, "learning_rate": 3.4742408220348446e-06, "loss": 0.7592, "step": 11884 }, { "epoch": 0.6108027546510433, "grad_norm": 0.6897673606872559, "learning_rate": 3.4734482845382055e-06, "loss": 0.651, "step": 11885 }, { "epoch": 0.6108541473943879, "grad_norm": 1.1204874515533447, "learning_rate": 3.472655789336015e-06, "loss": 0.6911, "step": 11886 }, { "epoch": 0.6109055401377326, "grad_norm": 0.741254985332489, "learning_rate": 3.4718633364502287e-06, "loss": 0.6269, "step": 11887 }, { "epoch": 0.6109569328810772, "grad_norm": 1.0893442630767822, "learning_rate": 3.471070925902803e-06, "loss": 0.678, "step": 11888 }, { "epoch": 0.6110083256244219, "grad_norm": 1.04536771774292, "learning_rate": 3.4702785577156946e-06, "loss": 0.6889, "step": 11889 }, { "epoch": 0.6110597183677665, "grad_norm": 1.1782145500183105, "learning_rate": 3.4694862319108523e-06, "loss": 0.7742, "step": 11890 }, { "epoch": 0.6111111111111112, "grad_norm": 1.0272762775421143, "learning_rate": 3.468693948510231e-06, "loss": 0.6625, "step": 11891 }, { "epoch": 0.6111625038544557, "grad_norm": 1.095741629600525, "learning_rate": 3.4679017075357825e-06, "loss": 0.7479, "step": 11892 }, { "epoch": 0.6112138965978003, "grad_norm": 0.8137418627738953, "learning_rate": 3.4671095090094536e-06, "loss": 0.6191, "step": 11893 }, { "epoch": 0.611265289341145, "grad_norm": 1.1190258264541626, "learning_rate": 3.466317352953194e-06, "loss": 0.6869, "step": 11894 }, { "epoch": 0.6113166820844896, "grad_norm": 1.0588442087173462, "learning_rate": 3.4655252393889527e-06, "loss": 0.682, "step": 11895 }, { "epoch": 0.6113680748278343, "grad_norm": 1.09615957736969, "learning_rate": 3.464733168338673e-06, "loss": 0.7663, "step": 11896 }, { "epoch": 0.6114194675711789, "grad_norm": 0.9907885193824768, "learning_rate": 3.4639411398243026e-06, "loss": 0.7413, "step": 11897 }, { "epoch": 0.6114708603145236, "grad_norm": 0.7235219478607178, "learning_rate": 3.463149153867785e-06, "loss": 0.7098, "step": 11898 }, { "epoch": 0.6115222530578682, "grad_norm": 1.0669646263122559, "learning_rate": 3.4623572104910615e-06, "loss": 0.7292, "step": 11899 }, { "epoch": 0.6115736458012129, "grad_norm": 1.0527647733688354, "learning_rate": 3.4615653097160735e-06, "loss": 0.7174, "step": 11900 }, { "epoch": 0.6116250385445575, "grad_norm": 1.3574820756912231, "learning_rate": 3.4607734515647617e-06, "loss": 0.7152, "step": 11901 }, { "epoch": 0.6116764312879022, "grad_norm": 1.0775631666183472, "learning_rate": 3.459981636059068e-06, "loss": 0.7438, "step": 11902 }, { "epoch": 0.6117278240312468, "grad_norm": 1.0844933986663818, "learning_rate": 3.4591898632209257e-06, "loss": 0.6977, "step": 11903 }, { "epoch": 0.6117792167745915, "grad_norm": 1.079079508781433, "learning_rate": 3.4583981330722736e-06, "loss": 0.7233, "step": 11904 }, { "epoch": 0.6118306095179361, "grad_norm": 1.0844032764434814, "learning_rate": 3.4576064456350487e-06, "loss": 0.7563, "step": 11905 }, { "epoch": 0.6118820022612808, "grad_norm": 0.9849695563316345, "learning_rate": 3.456814800931183e-06, "loss": 0.6763, "step": 11906 }, { "epoch": 0.6119333950046254, "grad_norm": 0.9981567859649658, "learning_rate": 3.456023198982611e-06, "loss": 0.7119, "step": 11907 }, { "epoch": 0.6119847877479699, "grad_norm": 1.0896896123886108, "learning_rate": 3.4552316398112666e-06, "loss": 0.7291, "step": 11908 }, { "epoch": 0.6120361804913146, "grad_norm": 1.0749777555465698, "learning_rate": 3.4544401234390768e-06, "loss": 0.7472, "step": 11909 }, { "epoch": 0.6120875732346592, "grad_norm": 0.913811206817627, "learning_rate": 3.453648649887972e-06, "loss": 0.6738, "step": 11910 }, { "epoch": 0.6121389659780039, "grad_norm": 1.0979852676391602, "learning_rate": 3.4528572191798847e-06, "loss": 0.6912, "step": 11911 }, { "epoch": 0.6121903587213485, "grad_norm": 1.0542471408843994, "learning_rate": 3.452065831336737e-06, "loss": 0.714, "step": 11912 }, { "epoch": 0.6122417514646932, "grad_norm": 1.0307493209838867, "learning_rate": 3.4512744863804566e-06, "loss": 0.7737, "step": 11913 }, { "epoch": 0.6122931442080378, "grad_norm": 1.0496041774749756, "learning_rate": 3.45048318433297e-06, "loss": 0.6995, "step": 11914 }, { "epoch": 0.6123445369513825, "grad_norm": 1.135891318321228, "learning_rate": 3.4496919252161997e-06, "loss": 0.7203, "step": 11915 }, { "epoch": 0.6123959296947271, "grad_norm": 1.1007752418518066, "learning_rate": 3.448900709052069e-06, "loss": 0.7755, "step": 11916 }, { "epoch": 0.6124473224380718, "grad_norm": 1.062880277633667, "learning_rate": 3.4481095358624973e-06, "loss": 0.709, "step": 11917 }, { "epoch": 0.6124987151814164, "grad_norm": 1.0238465070724487, "learning_rate": 3.4473184056694078e-06, "loss": 0.7173, "step": 11918 }, { "epoch": 0.6125501079247611, "grad_norm": 0.724390983581543, "learning_rate": 3.4465273184947157e-06, "loss": 0.6394, "step": 11919 }, { "epoch": 0.6126015006681057, "grad_norm": 0.7711619138717651, "learning_rate": 3.4457362743603418e-06, "loss": 0.6519, "step": 11920 }, { "epoch": 0.6126528934114503, "grad_norm": 1.081173300743103, "learning_rate": 3.4449452732882026e-06, "loss": 0.7083, "step": 11921 }, { "epoch": 0.612704286154795, "grad_norm": 1.058774471282959, "learning_rate": 3.4441543153002117e-06, "loss": 0.6996, "step": 11922 }, { "epoch": 0.6127556788981395, "grad_norm": 1.0600477457046509, "learning_rate": 3.4433634004182837e-06, "loss": 0.7427, "step": 11923 }, { "epoch": 0.6128070716414842, "grad_norm": 0.7493856549263, "learning_rate": 3.442572528664334e-06, "loss": 0.6378, "step": 11924 }, { "epoch": 0.6128584643848288, "grad_norm": 1.1099209785461426, "learning_rate": 3.4417817000602705e-06, "loss": 0.7347, "step": 11925 }, { "epoch": 0.6129098571281735, "grad_norm": 1.0539991855621338, "learning_rate": 3.440990914628005e-06, "loss": 0.6716, "step": 11926 }, { "epoch": 0.6129612498715181, "grad_norm": 1.1441667079925537, "learning_rate": 3.4402001723894495e-06, "loss": 0.7727, "step": 11927 }, { "epoch": 0.6130126426148628, "grad_norm": 1.2216852903366089, "learning_rate": 3.4394094733665086e-06, "loss": 0.7134, "step": 11928 }, { "epoch": 0.6130640353582074, "grad_norm": 1.1003215312957764, "learning_rate": 3.4386188175810912e-06, "loss": 0.7187, "step": 11929 }, { "epoch": 0.6131154281015521, "grad_norm": 1.1060786247253418, "learning_rate": 3.4378282050551055e-06, "loss": 0.7638, "step": 11930 }, { "epoch": 0.6131668208448967, "grad_norm": 1.0837609767913818, "learning_rate": 3.4370376358104514e-06, "loss": 0.7587, "step": 11931 }, { "epoch": 0.6132182135882414, "grad_norm": 0.7459571361541748, "learning_rate": 3.436247109869034e-06, "loss": 0.6936, "step": 11932 }, { "epoch": 0.613269606331586, "grad_norm": 0.6606773138046265, "learning_rate": 3.4354566272527558e-06, "loss": 0.6272, "step": 11933 }, { "epoch": 0.6133209990749307, "grad_norm": 0.7025207877159119, "learning_rate": 3.4346661879835206e-06, "loss": 0.6604, "step": 11934 }, { "epoch": 0.6133723918182753, "grad_norm": 1.0718263387680054, "learning_rate": 3.4338757920832232e-06, "loss": 0.7346, "step": 11935 }, { "epoch": 0.61342378456162, "grad_norm": 0.7326095104217529, "learning_rate": 3.4330854395737645e-06, "loss": 0.6343, "step": 11936 }, { "epoch": 0.6134751773049646, "grad_norm": 1.03853440284729, "learning_rate": 3.4322951304770435e-06, "loss": 0.7051, "step": 11937 }, { "epoch": 0.6135265700483091, "grad_norm": 0.8832135200500488, "learning_rate": 3.4315048648149536e-06, "loss": 0.6675, "step": 11938 }, { "epoch": 0.6135779627916538, "grad_norm": 1.0592994689941406, "learning_rate": 3.4307146426093907e-06, "loss": 0.7129, "step": 11939 }, { "epoch": 0.6136293555349984, "grad_norm": 1.0465888977050781, "learning_rate": 3.4299244638822516e-06, "loss": 0.7254, "step": 11940 }, { "epoch": 0.6136807482783431, "grad_norm": 1.0536340475082397, "learning_rate": 3.429134328655424e-06, "loss": 0.6841, "step": 11941 }, { "epoch": 0.6137321410216877, "grad_norm": 1.1462889909744263, "learning_rate": 3.4283442369508015e-06, "loss": 0.7386, "step": 11942 }, { "epoch": 0.6137835337650324, "grad_norm": 0.9829745888710022, "learning_rate": 3.427554188790277e-06, "loss": 0.6611, "step": 11943 }, { "epoch": 0.613834926508377, "grad_norm": 1.0320862531661987, "learning_rate": 3.4267641841957345e-06, "loss": 0.6321, "step": 11944 }, { "epoch": 0.6138863192517217, "grad_norm": 1.1200461387634277, "learning_rate": 3.4259742231890638e-06, "loss": 0.7314, "step": 11945 }, { "epoch": 0.6139377119950663, "grad_norm": 0.9819492101669312, "learning_rate": 3.4251843057921524e-06, "loss": 0.7122, "step": 11946 }, { "epoch": 0.613989104738411, "grad_norm": 1.084065318107605, "learning_rate": 3.4243944320268847e-06, "loss": 0.7351, "step": 11947 }, { "epoch": 0.6140404974817556, "grad_norm": 1.089953064918518, "learning_rate": 3.4236046019151448e-06, "loss": 0.708, "step": 11948 }, { "epoch": 0.6140918902251002, "grad_norm": 0.8278733491897583, "learning_rate": 3.422814815478816e-06, "loss": 0.6769, "step": 11949 }, { "epoch": 0.6141432829684449, "grad_norm": 0.7419480085372925, "learning_rate": 3.422025072739782e-06, "loss": 0.7056, "step": 11950 }, { "epoch": 0.6141946757117895, "grad_norm": 1.2013943195343018, "learning_rate": 3.42123537371992e-06, "loss": 0.7135, "step": 11951 }, { "epoch": 0.6142460684551342, "grad_norm": 1.0517261028289795, "learning_rate": 3.42044571844111e-06, "loss": 0.686, "step": 11952 }, { "epoch": 0.6142974611984787, "grad_norm": 1.1152492761611938, "learning_rate": 3.419656106925232e-06, "loss": 0.7829, "step": 11953 }, { "epoch": 0.6143488539418234, "grad_norm": 0.9811345338821411, "learning_rate": 3.418866539194161e-06, "loss": 0.6442, "step": 11954 }, { "epoch": 0.614400246685168, "grad_norm": 1.0819642543792725, "learning_rate": 3.4180770152697725e-06, "loss": 0.7238, "step": 11955 }, { "epoch": 0.6144516394285127, "grad_norm": 1.0472996234893799, "learning_rate": 3.4172875351739425e-06, "loss": 0.6851, "step": 11956 }, { "epoch": 0.6145030321718573, "grad_norm": 1.075195074081421, "learning_rate": 3.4164980989285424e-06, "loss": 0.6773, "step": 11957 }, { "epoch": 0.614554424915202, "grad_norm": 1.0791219472885132, "learning_rate": 3.4157087065554463e-06, "loss": 0.6986, "step": 11958 }, { "epoch": 0.6146058176585466, "grad_norm": 0.7175921201705933, "learning_rate": 3.414919358076525e-06, "loss": 0.6752, "step": 11959 }, { "epoch": 0.6146572104018913, "grad_norm": 1.1144776344299316, "learning_rate": 3.414130053513645e-06, "loss": 0.6737, "step": 11960 }, { "epoch": 0.6147086031452359, "grad_norm": 1.0300164222717285, "learning_rate": 3.413340792888677e-06, "loss": 0.7013, "step": 11961 }, { "epoch": 0.6147599958885805, "grad_norm": 1.0904394388198853, "learning_rate": 3.4125515762234895e-06, "loss": 0.6695, "step": 11962 }, { "epoch": 0.6148113886319252, "grad_norm": 0.7407886981964111, "learning_rate": 3.4117624035399456e-06, "loss": 0.6589, "step": 11963 }, { "epoch": 0.6148627813752698, "grad_norm": 1.0961823463439941, "learning_rate": 3.410973274859911e-06, "loss": 0.6625, "step": 11964 }, { "epoch": 0.6149141741186145, "grad_norm": 0.8541749119758606, "learning_rate": 3.4101841902052492e-06, "loss": 0.6938, "step": 11965 }, { "epoch": 0.6149655668619591, "grad_norm": 1.0181026458740234, "learning_rate": 3.409395149597823e-06, "loss": 0.7096, "step": 11966 }, { "epoch": 0.6150169596053038, "grad_norm": 1.060071349143982, "learning_rate": 3.4086061530594944e-06, "loss": 0.7042, "step": 11967 }, { "epoch": 0.6150683523486483, "grad_norm": 1.0849549770355225, "learning_rate": 3.40781720061212e-06, "loss": 0.7028, "step": 11968 }, { "epoch": 0.615119745091993, "grad_norm": 0.9998595118522644, "learning_rate": 3.407028292277562e-06, "loss": 0.7085, "step": 11969 }, { "epoch": 0.6151711378353376, "grad_norm": 1.1476051807403564, "learning_rate": 3.4062394280776754e-06, "loss": 0.7082, "step": 11970 }, { "epoch": 0.6152225305786823, "grad_norm": 0.8414899706840515, "learning_rate": 3.405450608034317e-06, "loss": 0.6441, "step": 11971 }, { "epoch": 0.6152739233220269, "grad_norm": 1.0804591178894043, "learning_rate": 3.4046618321693437e-06, "loss": 0.6955, "step": 11972 }, { "epoch": 0.6153253160653716, "grad_norm": 1.0082899332046509, "learning_rate": 3.4038731005046054e-06, "loss": 0.7377, "step": 11973 }, { "epoch": 0.6153767088087162, "grad_norm": 1.03804349899292, "learning_rate": 3.4030844130619573e-06, "loss": 0.7115, "step": 11974 }, { "epoch": 0.6154281015520608, "grad_norm": 0.9946032762527466, "learning_rate": 3.4022957698632517e-06, "loss": 0.6828, "step": 11975 }, { "epoch": 0.6154794942954055, "grad_norm": 1.109576940536499, "learning_rate": 3.4015071709303347e-06, "loss": 0.6204, "step": 11976 }, { "epoch": 0.6155308870387501, "grad_norm": 1.0410388708114624, "learning_rate": 3.4007186162850576e-06, "loss": 0.7003, "step": 11977 }, { "epoch": 0.6155822797820948, "grad_norm": 1.045224666595459, "learning_rate": 3.3999301059492682e-06, "loss": 0.6632, "step": 11978 }, { "epoch": 0.6156336725254394, "grad_norm": 1.0935750007629395, "learning_rate": 3.399141639944813e-06, "loss": 0.7261, "step": 11979 }, { "epoch": 0.6156850652687841, "grad_norm": 1.105319619178772, "learning_rate": 3.3983532182935353e-06, "loss": 0.7271, "step": 11980 }, { "epoch": 0.6157364580121287, "grad_norm": 1.0905101299285889, "learning_rate": 3.39756484101728e-06, "loss": 0.7401, "step": 11981 }, { "epoch": 0.6157878507554734, "grad_norm": 1.0350977182388306, "learning_rate": 3.3967765081378933e-06, "loss": 0.6524, "step": 11982 }, { "epoch": 0.6158392434988179, "grad_norm": 1.127240777015686, "learning_rate": 3.395988219677211e-06, "loss": 0.7135, "step": 11983 }, { "epoch": 0.6158906362421626, "grad_norm": 1.0493940114974976, "learning_rate": 3.3951999756570754e-06, "loss": 0.7156, "step": 11984 }, { "epoch": 0.6159420289855072, "grad_norm": 1.0714149475097656, "learning_rate": 3.394411776099328e-06, "loss": 0.7152, "step": 11985 }, { "epoch": 0.6159934217288519, "grad_norm": 1.0448074340820312, "learning_rate": 3.3936236210258028e-06, "loss": 0.7025, "step": 11986 }, { "epoch": 0.6160448144721965, "grad_norm": 1.0660523176193237, "learning_rate": 3.392835510458338e-06, "loss": 0.7097, "step": 11987 }, { "epoch": 0.6160962072155411, "grad_norm": 1.0485416650772095, "learning_rate": 3.392047444418769e-06, "loss": 0.6797, "step": 11988 }, { "epoch": 0.6161475999588858, "grad_norm": 0.7331197261810303, "learning_rate": 3.3912594229289296e-06, "loss": 0.6641, "step": 11989 }, { "epoch": 0.6161989927022304, "grad_norm": 0.6869722604751587, "learning_rate": 3.3904714460106527e-06, "loss": 0.6761, "step": 11990 }, { "epoch": 0.6162503854455751, "grad_norm": 1.1129176616668701, "learning_rate": 3.3896835136857707e-06, "loss": 0.6979, "step": 11991 }, { "epoch": 0.6163017781889197, "grad_norm": 1.070527195930481, "learning_rate": 3.388895625976112e-06, "loss": 0.7043, "step": 11992 }, { "epoch": 0.6163531709322644, "grad_norm": 1.0062566995620728, "learning_rate": 3.388107782903507e-06, "loss": 0.6904, "step": 11993 }, { "epoch": 0.616404563675609, "grad_norm": 1.1211024522781372, "learning_rate": 3.387319984489785e-06, "loss": 0.725, "step": 11994 }, { "epoch": 0.6164559564189537, "grad_norm": 0.7740526795387268, "learning_rate": 3.3865322307567692e-06, "loss": 0.6344, "step": 11995 }, { "epoch": 0.6165073491622983, "grad_norm": 1.1901206970214844, "learning_rate": 3.3857445217262876e-06, "loss": 0.7121, "step": 11996 }, { "epoch": 0.616558741905643, "grad_norm": 1.119513750076294, "learning_rate": 3.384956857420163e-06, "loss": 0.7488, "step": 11997 }, { "epoch": 0.6166101346489876, "grad_norm": 1.0623103380203247, "learning_rate": 3.38416923786022e-06, "loss": 0.6765, "step": 11998 }, { "epoch": 0.6166615273923322, "grad_norm": 1.1121455430984497, "learning_rate": 3.3833816630682786e-06, "loss": 0.7258, "step": 11999 }, { "epoch": 0.6167129201356768, "grad_norm": 1.2464754581451416, "learning_rate": 3.3825941330661594e-06, "loss": 0.6804, "step": 12000 }, { "epoch": 0.6167643128790214, "grad_norm": 1.045380711555481, "learning_rate": 3.381806647875685e-06, "loss": 0.7242, "step": 12001 }, { "epoch": 0.6168157056223661, "grad_norm": 1.005313754081726, "learning_rate": 3.3810192075186677e-06, "loss": 0.6833, "step": 12002 }, { "epoch": 0.6168670983657107, "grad_norm": 0.6825047731399536, "learning_rate": 3.3802318120169276e-06, "loss": 0.6907, "step": 12003 }, { "epoch": 0.6169184911090554, "grad_norm": 1.12043297290802, "learning_rate": 3.3794444613922813e-06, "loss": 0.7719, "step": 12004 }, { "epoch": 0.6169698838524, "grad_norm": 1.041176199913025, "learning_rate": 3.378657155666539e-06, "loss": 0.7143, "step": 12005 }, { "epoch": 0.6170212765957447, "grad_norm": 1.0308610200881958, "learning_rate": 3.377869894861516e-06, "loss": 0.7262, "step": 12006 }, { "epoch": 0.6170726693390893, "grad_norm": 0.8717643022537231, "learning_rate": 3.3770826789990257e-06, "loss": 0.6585, "step": 12007 }, { "epoch": 0.617124062082434, "grad_norm": 0.9989317655563354, "learning_rate": 3.3762955081008757e-06, "loss": 0.6829, "step": 12008 }, { "epoch": 0.6171754548257786, "grad_norm": 1.0629189014434814, "learning_rate": 3.3755083821888767e-06, "loss": 0.6978, "step": 12009 }, { "epoch": 0.6172268475691233, "grad_norm": 0.8257383704185486, "learning_rate": 3.3747213012848374e-06, "loss": 0.6725, "step": 12010 }, { "epoch": 0.6172782403124679, "grad_norm": 0.7306297421455383, "learning_rate": 3.3739342654105633e-06, "loss": 0.6548, "step": 12011 }, { "epoch": 0.6173296330558126, "grad_norm": 1.1134037971496582, "learning_rate": 3.37314727458786e-06, "loss": 0.7049, "step": 12012 }, { "epoch": 0.6173810257991572, "grad_norm": 1.0994467735290527, "learning_rate": 3.372360328838532e-06, "loss": 0.727, "step": 12013 }, { "epoch": 0.6174324185425017, "grad_norm": 1.106757402420044, "learning_rate": 3.3715734281843836e-06, "loss": 0.702, "step": 12014 }, { "epoch": 0.6174838112858464, "grad_norm": 1.1218456029891968, "learning_rate": 3.3707865726472145e-06, "loss": 0.7051, "step": 12015 }, { "epoch": 0.617535204029191, "grad_norm": 1.109925389289856, "learning_rate": 3.3699997622488258e-06, "loss": 0.7753, "step": 12016 }, { "epoch": 0.6175865967725357, "grad_norm": 1.0417683124542236, "learning_rate": 3.3692129970110177e-06, "loss": 0.6741, "step": 12017 }, { "epoch": 0.6176379895158803, "grad_norm": 1.0980241298675537, "learning_rate": 3.3684262769555877e-06, "loss": 0.7299, "step": 12018 }, { "epoch": 0.617689382259225, "grad_norm": 1.0709458589553833, "learning_rate": 3.3676396021043316e-06, "loss": 0.7048, "step": 12019 }, { "epoch": 0.6177407750025696, "grad_norm": 0.9785841703414917, "learning_rate": 3.3668529724790467e-06, "loss": 0.7338, "step": 12020 }, { "epoch": 0.6177921677459143, "grad_norm": 1.0190116167068481, "learning_rate": 3.3660663881015253e-06, "loss": 0.7098, "step": 12021 }, { "epoch": 0.6178435604892589, "grad_norm": 1.043182373046875, "learning_rate": 3.365279848993561e-06, "loss": 0.6559, "step": 12022 }, { "epoch": 0.6178949532326036, "grad_norm": 1.0624303817749023, "learning_rate": 3.364493355176949e-06, "loss": 0.7556, "step": 12023 }, { "epoch": 0.6179463459759482, "grad_norm": 0.7292889952659607, "learning_rate": 3.363706906673474e-06, "loss": 0.6333, "step": 12024 }, { "epoch": 0.6179977387192929, "grad_norm": 1.083287239074707, "learning_rate": 3.3629205035049286e-06, "loss": 0.7256, "step": 12025 }, { "epoch": 0.6180491314626375, "grad_norm": 1.2116307020187378, "learning_rate": 3.3621341456931e-06, "loss": 0.7038, "step": 12026 }, { "epoch": 0.6181005242059822, "grad_norm": 1.0136851072311401, "learning_rate": 3.361347833259777e-06, "loss": 0.6772, "step": 12027 }, { "epoch": 0.6181519169493268, "grad_norm": 1.057914137840271, "learning_rate": 3.360561566226741e-06, "loss": 0.6631, "step": 12028 }, { "epoch": 0.6182033096926713, "grad_norm": 0.9898151159286499, "learning_rate": 3.359775344615779e-06, "loss": 0.6984, "step": 12029 }, { "epoch": 0.618254702436016, "grad_norm": 1.161797285079956, "learning_rate": 3.3589891684486742e-06, "loss": 0.7316, "step": 12030 }, { "epoch": 0.6183060951793606, "grad_norm": 1.1593981981277466, "learning_rate": 3.3582030377472065e-06, "loss": 0.736, "step": 12031 }, { "epoch": 0.6183574879227053, "grad_norm": 1.067274808883667, "learning_rate": 3.3574169525331567e-06, "loss": 0.7279, "step": 12032 }, { "epoch": 0.6184088806660499, "grad_norm": 1.1198688745498657, "learning_rate": 3.356630912828308e-06, "loss": 0.7116, "step": 12033 }, { "epoch": 0.6184602734093946, "grad_norm": 1.110133171081543, "learning_rate": 3.355844918654432e-06, "loss": 0.7018, "step": 12034 }, { "epoch": 0.6185116661527392, "grad_norm": 0.7323580980300903, "learning_rate": 3.355058970033308e-06, "loss": 0.7122, "step": 12035 }, { "epoch": 0.6185630588960839, "grad_norm": 1.077885627746582, "learning_rate": 3.354273066986714e-06, "loss": 0.7443, "step": 12036 }, { "epoch": 0.6186144516394285, "grad_norm": 0.8660556077957153, "learning_rate": 3.3534872095364203e-06, "loss": 0.6444, "step": 12037 }, { "epoch": 0.6186658443827732, "grad_norm": 1.056745171546936, "learning_rate": 3.3527013977042007e-06, "loss": 0.7077, "step": 12038 }, { "epoch": 0.6187172371261178, "grad_norm": 1.0295047760009766, "learning_rate": 3.3519156315118282e-06, "loss": 0.6066, "step": 12039 }, { "epoch": 0.6187686298694625, "grad_norm": 0.7480875253677368, "learning_rate": 3.3511299109810715e-06, "loss": 0.669, "step": 12040 }, { "epoch": 0.6188200226128071, "grad_norm": 1.0890185832977295, "learning_rate": 3.3503442361337002e-06, "loss": 0.6982, "step": 12041 }, { "epoch": 0.6188714153561518, "grad_norm": 0.8537014722824097, "learning_rate": 3.349558606991483e-06, "loss": 0.7009, "step": 12042 }, { "epoch": 0.6189228080994964, "grad_norm": 1.0447927713394165, "learning_rate": 3.348773023576188e-06, "loss": 0.6654, "step": 12043 }, { "epoch": 0.6189742008428409, "grad_norm": 1.093884825706482, "learning_rate": 3.3479874859095755e-06, "loss": 0.7128, "step": 12044 }, { "epoch": 0.6190255935861856, "grad_norm": 1.9971200227737427, "learning_rate": 3.3472019940134125e-06, "loss": 0.7152, "step": 12045 }, { "epoch": 0.6190769863295302, "grad_norm": 1.0972340106964111, "learning_rate": 3.346416547909464e-06, "loss": 0.7606, "step": 12046 }, { "epoch": 0.6191283790728749, "grad_norm": 0.7798712253570557, "learning_rate": 3.345631147619487e-06, "loss": 0.6707, "step": 12047 }, { "epoch": 0.6191797718162195, "grad_norm": 1.1579269170761108, "learning_rate": 3.3448457931652436e-06, "loss": 0.7189, "step": 12048 }, { "epoch": 0.6192311645595642, "grad_norm": 1.030864953994751, "learning_rate": 3.344060484568493e-06, "loss": 0.6889, "step": 12049 }, { "epoch": 0.6192825573029088, "grad_norm": 0.9841315746307373, "learning_rate": 3.343275221850993e-06, "loss": 0.6264, "step": 12050 }, { "epoch": 0.6193339500462535, "grad_norm": 1.1842811107635498, "learning_rate": 3.3424900050344986e-06, "loss": 0.7136, "step": 12051 }, { "epoch": 0.6193853427895981, "grad_norm": 1.1328599452972412, "learning_rate": 3.341704834140769e-06, "loss": 0.6743, "step": 12052 }, { "epoch": 0.6194367355329428, "grad_norm": 1.0348427295684814, "learning_rate": 3.3409197091915526e-06, "loss": 0.6707, "step": 12053 }, { "epoch": 0.6194881282762874, "grad_norm": 0.7747828960418701, "learning_rate": 3.3401346302086048e-06, "loss": 0.6614, "step": 12054 }, { "epoch": 0.6195395210196321, "grad_norm": 1.0832515954971313, "learning_rate": 3.3393495972136782e-06, "loss": 0.7285, "step": 12055 }, { "epoch": 0.6195909137629767, "grad_norm": 0.8003397583961487, "learning_rate": 3.3385646102285183e-06, "loss": 0.6467, "step": 12056 }, { "epoch": 0.6196423065063213, "grad_norm": 0.9832813143730164, "learning_rate": 3.337779669274878e-06, "loss": 0.6464, "step": 12057 }, { "epoch": 0.619693699249666, "grad_norm": 0.9928159713745117, "learning_rate": 3.3369947743745036e-06, "loss": 0.6648, "step": 12058 }, { "epoch": 0.6197450919930105, "grad_norm": 0.6698116064071655, "learning_rate": 3.336209925549141e-06, "loss": 0.6391, "step": 12059 }, { "epoch": 0.6197964847363552, "grad_norm": 1.108699083328247, "learning_rate": 3.3354251228205354e-06, "loss": 0.7148, "step": 12060 }, { "epoch": 0.6198478774796998, "grad_norm": 0.6748021245002747, "learning_rate": 3.334640366210431e-06, "loss": 0.6486, "step": 12061 }, { "epoch": 0.6198992702230445, "grad_norm": 1.0672430992126465, "learning_rate": 3.3338556557405695e-06, "loss": 0.6815, "step": 12062 }, { "epoch": 0.6199506629663891, "grad_norm": 1.105315923690796, "learning_rate": 3.333070991432691e-06, "loss": 0.6892, "step": 12063 }, { "epoch": 0.6200020557097338, "grad_norm": 1.027273416519165, "learning_rate": 3.332286373308536e-06, "loss": 0.6656, "step": 12064 }, { "epoch": 0.6200534484530784, "grad_norm": 1.1047329902648926, "learning_rate": 3.331501801389846e-06, "loss": 0.7114, "step": 12065 }, { "epoch": 0.6201048411964231, "grad_norm": 0.7975478768348694, "learning_rate": 3.330717275698353e-06, "loss": 0.6423, "step": 12066 }, { "epoch": 0.6201562339397677, "grad_norm": 1.1321862936019897, "learning_rate": 3.329932796255796e-06, "loss": 0.7698, "step": 12067 }, { "epoch": 0.6202076266831124, "grad_norm": 0.7159425616264343, "learning_rate": 3.3291483630839094e-06, "loss": 0.5928, "step": 12068 }, { "epoch": 0.620259019426457, "grad_norm": 1.0901782512664795, "learning_rate": 3.328363976204427e-06, "loss": 0.6935, "step": 12069 }, { "epoch": 0.6203104121698017, "grad_norm": 1.0330448150634766, "learning_rate": 3.32757963563908e-06, "loss": 0.7303, "step": 12070 }, { "epoch": 0.6203618049131463, "grad_norm": 0.7264391183853149, "learning_rate": 3.3267953414095997e-06, "loss": 0.6417, "step": 12071 }, { "epoch": 0.620413197656491, "grad_norm": 1.085404634475708, "learning_rate": 3.3260110935377155e-06, "loss": 0.7207, "step": 12072 }, { "epoch": 0.6204645903998356, "grad_norm": 1.0804234743118286, "learning_rate": 3.3252268920451546e-06, "loss": 0.6962, "step": 12073 }, { "epoch": 0.6205159831431801, "grad_norm": 1.1416406631469727, "learning_rate": 3.324442736953646e-06, "loss": 0.7151, "step": 12074 }, { "epoch": 0.6205673758865248, "grad_norm": 1.110981822013855, "learning_rate": 3.3236586282849164e-06, "loss": 0.7512, "step": 12075 }, { "epoch": 0.6206187686298694, "grad_norm": 1.0306501388549805, "learning_rate": 3.3228745660606864e-06, "loss": 0.7072, "step": 12076 }, { "epoch": 0.6206701613732141, "grad_norm": 1.0540844202041626, "learning_rate": 3.3220905503026806e-06, "loss": 0.7193, "step": 12077 }, { "epoch": 0.6207215541165587, "grad_norm": 1.218295931816101, "learning_rate": 3.321306581032624e-06, "loss": 0.7634, "step": 12078 }, { "epoch": 0.6207729468599034, "grad_norm": 1.0965325832366943, "learning_rate": 3.3205226582722318e-06, "loss": 0.7499, "step": 12079 }, { "epoch": 0.620824339603248, "grad_norm": 1.129643440246582, "learning_rate": 3.3197387820432262e-06, "loss": 0.7261, "step": 12080 }, { "epoch": 0.6208757323465927, "grad_norm": 0.7016200423240662, "learning_rate": 3.3189549523673264e-06, "loss": 0.611, "step": 12081 }, { "epoch": 0.6209271250899373, "grad_norm": 1.1036901473999023, "learning_rate": 3.3181711692662468e-06, "loss": 0.7349, "step": 12082 }, { "epoch": 0.620978517833282, "grad_norm": 1.016593098640442, "learning_rate": 3.3173874327617028e-06, "loss": 0.6979, "step": 12083 }, { "epoch": 0.6210299105766266, "grad_norm": 1.2975292205810547, "learning_rate": 3.3166037428754115e-06, "loss": 0.6955, "step": 12084 }, { "epoch": 0.6210813033199712, "grad_norm": 0.8700991868972778, "learning_rate": 3.315820099629081e-06, "loss": 0.636, "step": 12085 }, { "epoch": 0.6211326960633159, "grad_norm": 1.1041383743286133, "learning_rate": 3.3150365030444264e-06, "loss": 0.7621, "step": 12086 }, { "epoch": 0.6211840888066605, "grad_norm": 1.0860810279846191, "learning_rate": 3.3142529531431584e-06, "loss": 0.737, "step": 12087 }, { "epoch": 0.6212354815500052, "grad_norm": 1.0357568264007568, "learning_rate": 3.313469449946982e-06, "loss": 0.6792, "step": 12088 }, { "epoch": 0.6212868742933498, "grad_norm": 0.7414577603340149, "learning_rate": 3.312685993477608e-06, "loss": 0.6814, "step": 12089 }, { "epoch": 0.6213382670366944, "grad_norm": 0.7697691321372986, "learning_rate": 3.3119025837567415e-06, "loss": 0.6882, "step": 12090 }, { "epoch": 0.621389659780039, "grad_norm": 0.8091595768928528, "learning_rate": 3.3111192208060893e-06, "loss": 0.685, "step": 12091 }, { "epoch": 0.6214410525233837, "grad_norm": 1.2382551431655884, "learning_rate": 3.3103359046473527e-06, "loss": 0.6555, "step": 12092 }, { "epoch": 0.6214924452667283, "grad_norm": 1.0569231510162354, "learning_rate": 3.309552635302235e-06, "loss": 0.712, "step": 12093 }, { "epoch": 0.621543838010073, "grad_norm": 1.1205781698226929, "learning_rate": 3.3087694127924404e-06, "loss": 0.7353, "step": 12094 }, { "epoch": 0.6215952307534176, "grad_norm": 1.0179264545440674, "learning_rate": 3.3079862371396637e-06, "loss": 0.6864, "step": 12095 }, { "epoch": 0.6216466234967623, "grad_norm": 0.8273069858551025, "learning_rate": 3.307203108365606e-06, "loss": 0.6534, "step": 12096 }, { "epoch": 0.6216980162401069, "grad_norm": 0.9565629363059998, "learning_rate": 3.3064200264919667e-06, "loss": 0.6998, "step": 12097 }, { "epoch": 0.6217494089834515, "grad_norm": 1.1001129150390625, "learning_rate": 3.3056369915404364e-06, "loss": 0.7407, "step": 12098 }, { "epoch": 0.6218008017267962, "grad_norm": 1.0582191944122314, "learning_rate": 3.304854003532714e-06, "loss": 0.7166, "step": 12099 }, { "epoch": 0.6218521944701408, "grad_norm": 1.1117619276046753, "learning_rate": 3.304071062490492e-06, "loss": 0.7509, "step": 12100 }, { "epoch": 0.6219035872134855, "grad_norm": 1.1450592279434204, "learning_rate": 3.3032881684354614e-06, "loss": 0.7301, "step": 12101 }, { "epoch": 0.6219549799568301, "grad_norm": 1.064035415649414, "learning_rate": 3.302505321389313e-06, "loss": 0.7209, "step": 12102 }, { "epoch": 0.6220063727001748, "grad_norm": 1.0156742334365845, "learning_rate": 3.30172252137374e-06, "loss": 0.7243, "step": 12103 }, { "epoch": 0.6220577654435194, "grad_norm": 1.143764853477478, "learning_rate": 3.300939768410426e-06, "loss": 0.7546, "step": 12104 }, { "epoch": 0.622109158186864, "grad_norm": 0.804700493812561, "learning_rate": 3.3001570625210583e-06, "loss": 0.6649, "step": 12105 }, { "epoch": 0.6221605509302086, "grad_norm": 1.1368941068649292, "learning_rate": 3.299374403727324e-06, "loss": 0.7285, "step": 12106 }, { "epoch": 0.6222119436735533, "grad_norm": 1.0081709623336792, "learning_rate": 3.298591792050909e-06, "loss": 0.6979, "step": 12107 }, { "epoch": 0.6222633364168979, "grad_norm": 1.1481094360351562, "learning_rate": 3.2978092275134922e-06, "loss": 0.6985, "step": 12108 }, { "epoch": 0.6223147291602426, "grad_norm": 1.1093038320541382, "learning_rate": 3.297026710136757e-06, "loss": 0.7126, "step": 12109 }, { "epoch": 0.6223661219035872, "grad_norm": 1.1402454376220703, "learning_rate": 3.2962442399423842e-06, "loss": 0.6547, "step": 12110 }, { "epoch": 0.6224175146469318, "grad_norm": 1.1249759197235107, "learning_rate": 3.2954618169520523e-06, "loss": 0.698, "step": 12111 }, { "epoch": 0.6224689073902765, "grad_norm": 1.1534699201583862, "learning_rate": 3.2946794411874406e-06, "loss": 0.7443, "step": 12112 }, { "epoch": 0.6225203001336211, "grad_norm": 1.1092865467071533, "learning_rate": 3.2938971126702233e-06, "loss": 0.7318, "step": 12113 }, { "epoch": 0.6225716928769658, "grad_norm": 1.082006812095642, "learning_rate": 3.293114831422076e-06, "loss": 0.67, "step": 12114 }, { "epoch": 0.6226230856203104, "grad_norm": 0.75110924243927, "learning_rate": 3.2923325974646723e-06, "loss": 0.7106, "step": 12115 }, { "epoch": 0.6226744783636551, "grad_norm": 0.6790202856063843, "learning_rate": 3.2915504108196885e-06, "loss": 0.6756, "step": 12116 }, { "epoch": 0.6227258711069997, "grad_norm": 0.7299683094024658, "learning_rate": 3.2907682715087887e-06, "loss": 0.7017, "step": 12117 }, { "epoch": 0.6227772638503444, "grad_norm": 1.1198533773422241, "learning_rate": 3.2899861795536476e-06, "loss": 0.7039, "step": 12118 }, { "epoch": 0.622828656593689, "grad_norm": 1.049757957458496, "learning_rate": 3.2892041349759316e-06, "loss": 0.7259, "step": 12119 }, { "epoch": 0.6228800493370336, "grad_norm": 1.1526069641113281, "learning_rate": 3.2884221377973124e-06, "loss": 0.7179, "step": 12120 }, { "epoch": 0.6229314420803782, "grad_norm": 1.0349680185317993, "learning_rate": 3.2876401880394495e-06, "loss": 0.7414, "step": 12121 }, { "epoch": 0.6229828348237229, "grad_norm": 1.0531561374664307, "learning_rate": 3.2868582857240107e-06, "loss": 0.7109, "step": 12122 }, { "epoch": 0.6230342275670675, "grad_norm": 1.0895947217941284, "learning_rate": 3.28607643087266e-06, "loss": 0.7451, "step": 12123 }, { "epoch": 0.6230856203104121, "grad_norm": 0.6915774345397949, "learning_rate": 3.2852946235070583e-06, "loss": 0.657, "step": 12124 }, { "epoch": 0.6231370130537568, "grad_norm": 1.0454248189926147, "learning_rate": 3.2845128636488656e-06, "loss": 0.6756, "step": 12125 }, { "epoch": 0.6231884057971014, "grad_norm": 1.129713773727417, "learning_rate": 3.283731151319744e-06, "loss": 0.7639, "step": 12126 }, { "epoch": 0.6232397985404461, "grad_norm": 0.7431758642196655, "learning_rate": 3.282949486541348e-06, "loss": 0.7185, "step": 12127 }, { "epoch": 0.6232911912837907, "grad_norm": 1.1020402908325195, "learning_rate": 3.282167869335335e-06, "loss": 0.7462, "step": 12128 }, { "epoch": 0.6233425840271354, "grad_norm": 1.0747895240783691, "learning_rate": 3.281386299723364e-06, "loss": 0.6964, "step": 12129 }, { "epoch": 0.62339397677048, "grad_norm": 0.7203353643417358, "learning_rate": 3.2806047777270845e-06, "loss": 0.6667, "step": 12130 }, { "epoch": 0.6234453695138247, "grad_norm": 1.1273399591445923, "learning_rate": 3.2798233033681504e-06, "loss": 0.7722, "step": 12131 }, { "epoch": 0.6234967622571693, "grad_norm": 1.0584933757781982, "learning_rate": 3.279041876668215e-06, "loss": 0.7034, "step": 12132 }, { "epoch": 0.623548155000514, "grad_norm": 1.0835890769958496, "learning_rate": 3.2782604976489274e-06, "loss": 0.7218, "step": 12133 }, { "epoch": 0.6235995477438586, "grad_norm": 1.0417078733444214, "learning_rate": 3.277479166331935e-06, "loss": 0.7785, "step": 12134 }, { "epoch": 0.6236509404872032, "grad_norm": 1.014622449874878, "learning_rate": 3.2766978827388864e-06, "loss": 0.6622, "step": 12135 }, { "epoch": 0.6237023332305478, "grad_norm": 0.6786527633666992, "learning_rate": 3.2759166468914305e-06, "loss": 0.6627, "step": 12136 }, { "epoch": 0.6237537259738924, "grad_norm": 1.1211113929748535, "learning_rate": 3.275135458811207e-06, "loss": 0.7416, "step": 12137 }, { "epoch": 0.6238051187172371, "grad_norm": 1.0647112131118774, "learning_rate": 3.274354318519862e-06, "loss": 0.7309, "step": 12138 }, { "epoch": 0.6238565114605817, "grad_norm": 1.0913984775543213, "learning_rate": 3.2735732260390397e-06, "loss": 0.7387, "step": 12139 }, { "epoch": 0.6239079042039264, "grad_norm": 0.7015288472175598, "learning_rate": 3.272792181390376e-06, "loss": 0.6744, "step": 12140 }, { "epoch": 0.623959296947271, "grad_norm": 1.1160334348678589, "learning_rate": 3.272011184595514e-06, "loss": 0.7125, "step": 12141 }, { "epoch": 0.6240106896906157, "grad_norm": 1.0897620916366577, "learning_rate": 3.2712302356760917e-06, "loss": 0.7758, "step": 12142 }, { "epoch": 0.6240620824339603, "grad_norm": 1.0814694166183472, "learning_rate": 3.2704493346537448e-06, "loss": 0.7007, "step": 12143 }, { "epoch": 0.624113475177305, "grad_norm": 1.0665570497512817, "learning_rate": 3.269668481550109e-06, "loss": 0.7179, "step": 12144 }, { "epoch": 0.6241648679206496, "grad_norm": 0.8287755250930786, "learning_rate": 3.2688876763868204e-06, "loss": 0.6901, "step": 12145 }, { "epoch": 0.6242162606639943, "grad_norm": 0.6976672410964966, "learning_rate": 3.268106919185509e-06, "loss": 0.6237, "step": 12146 }, { "epoch": 0.6242676534073389, "grad_norm": 1.080417513847351, "learning_rate": 3.267326209967808e-06, "loss": 0.7476, "step": 12147 }, { "epoch": 0.6243190461506836, "grad_norm": 1.1457500457763672, "learning_rate": 3.266545548755349e-06, "loss": 0.7331, "step": 12148 }, { "epoch": 0.6243704388940282, "grad_norm": 1.162287712097168, "learning_rate": 3.2657649355697572e-06, "loss": 0.7774, "step": 12149 }, { "epoch": 0.6244218316373727, "grad_norm": 1.0370932817459106, "learning_rate": 3.2649843704326627e-06, "loss": 0.6921, "step": 12150 }, { "epoch": 0.6244732243807174, "grad_norm": 1.1899876594543457, "learning_rate": 3.2642038533656916e-06, "loss": 0.7344, "step": 12151 }, { "epoch": 0.624524617124062, "grad_norm": 0.7286758422851562, "learning_rate": 3.2634233843904687e-06, "loss": 0.6255, "step": 12152 }, { "epoch": 0.6245760098674067, "grad_norm": 0.7241337895393372, "learning_rate": 3.262642963528617e-06, "loss": 0.6693, "step": 12153 }, { "epoch": 0.6246274026107513, "grad_norm": 1.0441992282867432, "learning_rate": 3.2618625908017587e-06, "loss": 0.6926, "step": 12154 }, { "epoch": 0.624678795354096, "grad_norm": 1.066538691520691, "learning_rate": 3.2610822662315172e-06, "loss": 0.7694, "step": 12155 }, { "epoch": 0.6247301880974406, "grad_norm": 1.091275930404663, "learning_rate": 3.260301989839508e-06, "loss": 0.7237, "step": 12156 }, { "epoch": 0.6247815808407853, "grad_norm": 1.1216894388198853, "learning_rate": 3.2595217616473516e-06, "loss": 0.7191, "step": 12157 }, { "epoch": 0.6248329735841299, "grad_norm": 1.0999263525009155, "learning_rate": 3.2587415816766664e-06, "loss": 0.7281, "step": 12158 }, { "epoch": 0.6248843663274746, "grad_norm": 1.1139849424362183, "learning_rate": 3.257961449949064e-06, "loss": 0.7284, "step": 12159 }, { "epoch": 0.6249357590708192, "grad_norm": 1.034637212753296, "learning_rate": 3.2571813664861616e-06, "loss": 0.7186, "step": 12160 }, { "epoch": 0.6249871518141639, "grad_norm": 1.14315927028656, "learning_rate": 3.256401331309572e-06, "loss": 0.6675, "step": 12161 }, { "epoch": 0.6250385445575085, "grad_norm": 1.2068579196929932, "learning_rate": 3.2556213444409054e-06, "loss": 0.7083, "step": 12162 }, { "epoch": 0.6250899373008532, "grad_norm": 1.15444815158844, "learning_rate": 3.254841405901773e-06, "loss": 0.7337, "step": 12163 }, { "epoch": 0.6251413300441978, "grad_norm": 0.7700082063674927, "learning_rate": 3.254061515713784e-06, "loss": 0.6976, "step": 12164 }, { "epoch": 0.6251927227875425, "grad_norm": 0.7679399251937866, "learning_rate": 3.253281673898544e-06, "loss": 0.6459, "step": 12165 }, { "epoch": 0.625244115530887, "grad_norm": 1.101738691329956, "learning_rate": 3.2525018804776608e-06, "loss": 0.7133, "step": 12166 }, { "epoch": 0.6252955082742316, "grad_norm": 1.0523805618286133, "learning_rate": 3.2517221354727385e-06, "loss": 0.7092, "step": 12167 }, { "epoch": 0.6253469010175763, "grad_norm": 1.1280841827392578, "learning_rate": 3.2509424389053834e-06, "loss": 0.7322, "step": 12168 }, { "epoch": 0.6253982937609209, "grad_norm": 0.9063828587532043, "learning_rate": 3.2501627907971933e-06, "loss": 0.624, "step": 12169 }, { "epoch": 0.6254496865042656, "grad_norm": 1.14544677734375, "learning_rate": 3.2493831911697715e-06, "loss": 0.6969, "step": 12170 }, { "epoch": 0.6255010792476102, "grad_norm": 0.7130489945411682, "learning_rate": 3.248603640044719e-06, "loss": 0.6683, "step": 12171 }, { "epoch": 0.6255524719909549, "grad_norm": 1.0864167213439941, "learning_rate": 3.24782413744363e-06, "loss": 0.7146, "step": 12172 }, { "epoch": 0.6256038647342995, "grad_norm": 0.744078516960144, "learning_rate": 3.2470446833881033e-06, "loss": 0.6721, "step": 12173 }, { "epoch": 0.6256552574776442, "grad_norm": 0.6829371452331543, "learning_rate": 3.246265277899735e-06, "loss": 0.672, "step": 12174 }, { "epoch": 0.6257066502209888, "grad_norm": 1.051866054534912, "learning_rate": 3.245485921000118e-06, "loss": 0.7608, "step": 12175 }, { "epoch": 0.6257580429643335, "grad_norm": 0.7587919235229492, "learning_rate": 3.2447066127108456e-06, "loss": 0.6816, "step": 12176 }, { "epoch": 0.6258094357076781, "grad_norm": 1.0298322439193726, "learning_rate": 3.2439273530535113e-06, "loss": 0.678, "step": 12177 }, { "epoch": 0.6258608284510228, "grad_norm": 1.1157838106155396, "learning_rate": 3.243148142049701e-06, "loss": 0.7231, "step": 12178 }, { "epoch": 0.6259122211943674, "grad_norm": 1.1767150163650513, "learning_rate": 3.242368979721006e-06, "loss": 0.7381, "step": 12179 }, { "epoch": 0.625963613937712, "grad_norm": 1.0673679113388062, "learning_rate": 3.2415898660890154e-06, "loss": 0.7178, "step": 12180 }, { "epoch": 0.6260150066810566, "grad_norm": 1.012874722480774, "learning_rate": 3.2408108011753105e-06, "loss": 0.7231, "step": 12181 }, { "epoch": 0.6260663994244012, "grad_norm": 1.1434369087219238, "learning_rate": 3.2400317850014784e-06, "loss": 0.7232, "step": 12182 }, { "epoch": 0.6261177921677459, "grad_norm": 1.0482686758041382, "learning_rate": 3.2392528175891037e-06, "loss": 0.6872, "step": 12183 }, { "epoch": 0.6261691849110905, "grad_norm": 1.1044414043426514, "learning_rate": 3.238473898959767e-06, "loss": 0.6891, "step": 12184 }, { "epoch": 0.6262205776544352, "grad_norm": 1.0434261560440063, "learning_rate": 3.2376950291350484e-06, "loss": 0.7156, "step": 12185 }, { "epoch": 0.6262719703977798, "grad_norm": 1.1340068578720093, "learning_rate": 3.2369162081365284e-06, "loss": 0.7372, "step": 12186 }, { "epoch": 0.6263233631411245, "grad_norm": 1.112946629524231, "learning_rate": 3.236137435985787e-06, "loss": 0.7676, "step": 12187 }, { "epoch": 0.6263747558844691, "grad_norm": 1.0489273071289062, "learning_rate": 3.2353587127043953e-06, "loss": 0.6939, "step": 12188 }, { "epoch": 0.6264261486278138, "grad_norm": 1.125565528869629, "learning_rate": 3.2345800383139313e-06, "loss": 0.6963, "step": 12189 }, { "epoch": 0.6264775413711584, "grad_norm": 1.1287851333618164, "learning_rate": 3.2338014128359714e-06, "loss": 0.6467, "step": 12190 }, { "epoch": 0.626528934114503, "grad_norm": 1.1763123273849487, "learning_rate": 3.2330228362920833e-06, "loss": 0.7595, "step": 12191 }, { "epoch": 0.6265803268578477, "grad_norm": 1.073868751525879, "learning_rate": 3.2322443087038407e-06, "loss": 0.7075, "step": 12192 }, { "epoch": 0.6266317196011923, "grad_norm": 1.093248963356018, "learning_rate": 3.2314658300928147e-06, "loss": 0.7005, "step": 12193 }, { "epoch": 0.626683112344537, "grad_norm": 0.8614458441734314, "learning_rate": 3.2306874004805704e-06, "loss": 0.673, "step": 12194 }, { "epoch": 0.6267345050878816, "grad_norm": 1.0557905435562134, "learning_rate": 3.2299090198886773e-06, "loss": 0.7496, "step": 12195 }, { "epoch": 0.6267858978312262, "grad_norm": 1.11032235622406, "learning_rate": 3.229130688338702e-06, "loss": 0.7063, "step": 12196 }, { "epoch": 0.6268372905745708, "grad_norm": 1.1229689121246338, "learning_rate": 3.2283524058522065e-06, "loss": 0.7, "step": 12197 }, { "epoch": 0.6268886833179155, "grad_norm": 1.124016523361206, "learning_rate": 3.227574172450754e-06, "loss": 0.7599, "step": 12198 }, { "epoch": 0.6269400760612601, "grad_norm": 0.7309842705726624, "learning_rate": 3.226795988155906e-06, "loss": 0.6996, "step": 12199 }, { "epoch": 0.6269914688046048, "grad_norm": 0.7533501386642456, "learning_rate": 3.2260178529892266e-06, "loss": 0.64, "step": 12200 }, { "epoch": 0.6270428615479494, "grad_norm": 1.1044063568115234, "learning_rate": 3.225239766972269e-06, "loss": 0.6933, "step": 12201 }, { "epoch": 0.6270942542912941, "grad_norm": 0.7195678949356079, "learning_rate": 3.2244617301265947e-06, "loss": 0.6747, "step": 12202 }, { "epoch": 0.6271456470346387, "grad_norm": 1.1011396646499634, "learning_rate": 3.223683742473758e-06, "loss": 0.6973, "step": 12203 }, { "epoch": 0.6271970397779834, "grad_norm": 1.1169767379760742, "learning_rate": 3.2229058040353145e-06, "loss": 0.7128, "step": 12204 }, { "epoch": 0.627248432521328, "grad_norm": 0.6582478284835815, "learning_rate": 3.222127914832817e-06, "loss": 0.6523, "step": 12205 }, { "epoch": 0.6272998252646727, "grad_norm": 1.0497746467590332, "learning_rate": 3.2213500748878207e-06, "loss": 0.6578, "step": 12206 }, { "epoch": 0.6273512180080173, "grad_norm": 1.0629608631134033, "learning_rate": 3.2205722842218714e-06, "loss": 0.6894, "step": 12207 }, { "epoch": 0.6274026107513619, "grad_norm": 1.060852289199829, "learning_rate": 3.219794542856521e-06, "loss": 0.7176, "step": 12208 }, { "epoch": 0.6274540034947066, "grad_norm": 1.0877114534378052, "learning_rate": 3.219016850813319e-06, "loss": 0.7233, "step": 12209 }, { "epoch": 0.6275053962380512, "grad_norm": 0.7119256854057312, "learning_rate": 3.2182392081138085e-06, "loss": 0.6441, "step": 12210 }, { "epoch": 0.6275567889813958, "grad_norm": 1.1235225200653076, "learning_rate": 3.2174616147795367e-06, "loss": 0.7321, "step": 12211 }, { "epoch": 0.6276081817247404, "grad_norm": 1.0369467735290527, "learning_rate": 3.216684070832049e-06, "loss": 0.7062, "step": 12212 }, { "epoch": 0.6276595744680851, "grad_norm": 0.6881214380264282, "learning_rate": 3.2159065762928842e-06, "loss": 0.6475, "step": 12213 }, { "epoch": 0.6277109672114297, "grad_norm": 1.074841856956482, "learning_rate": 3.215129131183586e-06, "loss": 0.7229, "step": 12214 }, { "epoch": 0.6277623599547744, "grad_norm": 0.9904302954673767, "learning_rate": 3.2143517355256958e-06, "loss": 0.7517, "step": 12215 }, { "epoch": 0.627813752698119, "grad_norm": 0.7632718682289124, "learning_rate": 3.2135743893407497e-06, "loss": 0.6763, "step": 12216 }, { "epoch": 0.6278651454414637, "grad_norm": 1.0803008079528809, "learning_rate": 3.2127970926502837e-06, "loss": 0.6965, "step": 12217 }, { "epoch": 0.6279165381848083, "grad_norm": 0.7880908846855164, "learning_rate": 3.2120198454758354e-06, "loss": 0.7054, "step": 12218 }, { "epoch": 0.627967930928153, "grad_norm": 0.8806077241897583, "learning_rate": 3.21124264783894e-06, "loss": 0.6431, "step": 12219 }, { "epoch": 0.6280193236714976, "grad_norm": 1.0449053049087524, "learning_rate": 3.210465499761127e-06, "loss": 0.6591, "step": 12220 }, { "epoch": 0.6280707164148422, "grad_norm": 1.0450180768966675, "learning_rate": 3.2096884012639305e-06, "loss": 0.6619, "step": 12221 }, { "epoch": 0.6281221091581869, "grad_norm": 1.065653681755066, "learning_rate": 3.2089113523688824e-06, "loss": 0.7304, "step": 12222 }, { "epoch": 0.6281735019015315, "grad_norm": 0.7441000938415527, "learning_rate": 3.2081343530975073e-06, "loss": 0.6419, "step": 12223 }, { "epoch": 0.6282248946448762, "grad_norm": 1.1724079847335815, "learning_rate": 3.2073574034713346e-06, "loss": 0.765, "step": 12224 }, { "epoch": 0.6282762873882208, "grad_norm": 1.0313328504562378, "learning_rate": 3.2065805035118913e-06, "loss": 0.7139, "step": 12225 }, { "epoch": 0.6283276801315654, "grad_norm": 0.7392985820770264, "learning_rate": 3.2058036532407e-06, "loss": 0.6963, "step": 12226 }, { "epoch": 0.62837907287491, "grad_norm": 1.040402889251709, "learning_rate": 3.2050268526792855e-06, "loss": 0.7546, "step": 12227 }, { "epoch": 0.6284304656182547, "grad_norm": 0.7218077778816223, "learning_rate": 3.204250101849171e-06, "loss": 0.6574, "step": 12228 }, { "epoch": 0.6284818583615993, "grad_norm": 1.0274429321289062, "learning_rate": 3.203473400771875e-06, "loss": 0.7419, "step": 12229 }, { "epoch": 0.628533251104944, "grad_norm": 1.0963010787963867, "learning_rate": 3.202696749468916e-06, "loss": 0.6768, "step": 12230 }, { "epoch": 0.6285846438482886, "grad_norm": 1.097629189491272, "learning_rate": 3.2019201479618132e-06, "loss": 0.7057, "step": 12231 }, { "epoch": 0.6286360365916333, "grad_norm": 1.1337676048278809, "learning_rate": 3.201143596272085e-06, "loss": 0.7498, "step": 12232 }, { "epoch": 0.6286874293349779, "grad_norm": 1.0891634225845337, "learning_rate": 3.2003670944212424e-06, "loss": 0.7187, "step": 12233 }, { "epoch": 0.6287388220783225, "grad_norm": 1.0536798238754272, "learning_rate": 3.199590642430801e-06, "loss": 0.7157, "step": 12234 }, { "epoch": 0.6287902148216672, "grad_norm": 1.0377498865127563, "learning_rate": 3.1988142403222738e-06, "loss": 0.7957, "step": 12235 }, { "epoch": 0.6288416075650118, "grad_norm": 0.9967600107192993, "learning_rate": 3.1980378881171704e-06, "loss": 0.7272, "step": 12236 }, { "epoch": 0.6288930003083565, "grad_norm": 1.0806214809417725, "learning_rate": 3.1972615858370004e-06, "loss": 0.7157, "step": 12237 }, { "epoch": 0.6289443930517011, "grad_norm": 1.0752514600753784, "learning_rate": 3.1964853335032742e-06, "loss": 0.666, "step": 12238 }, { "epoch": 0.6289957857950458, "grad_norm": 0.7343825697898865, "learning_rate": 3.1957091311374954e-06, "loss": 0.6329, "step": 12239 }, { "epoch": 0.6290471785383904, "grad_norm": 1.0848947763442993, "learning_rate": 3.1949329787611697e-06, "loss": 0.7663, "step": 12240 }, { "epoch": 0.629098571281735, "grad_norm": 1.051859974861145, "learning_rate": 3.1941568763958055e-06, "loss": 0.7091, "step": 12241 }, { "epoch": 0.6291499640250796, "grad_norm": 1.113353967666626, "learning_rate": 3.1933808240628987e-06, "loss": 0.7169, "step": 12242 }, { "epoch": 0.6292013567684243, "grad_norm": 1.1560386419296265, "learning_rate": 3.1926048217839546e-06, "loss": 0.7588, "step": 12243 }, { "epoch": 0.6292527495117689, "grad_norm": 0.9234907031059265, "learning_rate": 3.191828869580472e-06, "loss": 0.6523, "step": 12244 }, { "epoch": 0.6293041422551136, "grad_norm": 1.1048601865768433, "learning_rate": 3.1910529674739505e-06, "loss": 0.7119, "step": 12245 }, { "epoch": 0.6293555349984582, "grad_norm": 1.0674775838851929, "learning_rate": 3.1902771154858846e-06, "loss": 0.7261, "step": 12246 }, { "epoch": 0.6294069277418028, "grad_norm": 1.0577160120010376, "learning_rate": 3.1895013136377727e-06, "loss": 0.7175, "step": 12247 }, { "epoch": 0.6294583204851475, "grad_norm": 1.0778430700302124, "learning_rate": 3.188725561951109e-06, "loss": 0.6769, "step": 12248 }, { "epoch": 0.6295097132284921, "grad_norm": 1.151700496673584, "learning_rate": 3.1879498604473834e-06, "loss": 0.7271, "step": 12249 }, { "epoch": 0.6295611059718368, "grad_norm": 1.0111408233642578, "learning_rate": 3.1871742091480894e-06, "loss": 0.7257, "step": 12250 }, { "epoch": 0.6296124987151814, "grad_norm": 1.1598451137542725, "learning_rate": 3.1863986080747195e-06, "loss": 0.7573, "step": 12251 }, { "epoch": 0.6296638914585261, "grad_norm": 1.0505614280700684, "learning_rate": 3.185623057248757e-06, "loss": 0.6984, "step": 12252 }, { "epoch": 0.6297152842018707, "grad_norm": 1.0693230628967285, "learning_rate": 3.1848475566916927e-06, "loss": 0.7161, "step": 12253 }, { "epoch": 0.6297666769452154, "grad_norm": 1.1077351570129395, "learning_rate": 3.184072106425013e-06, "loss": 0.7347, "step": 12254 }, { "epoch": 0.62981806968856, "grad_norm": 0.740250825881958, "learning_rate": 3.1832967064702005e-06, "loss": 0.6568, "step": 12255 }, { "epoch": 0.6298694624319047, "grad_norm": 1.1873940229415894, "learning_rate": 3.1825213568487384e-06, "loss": 0.7406, "step": 12256 }, { "epoch": 0.6299208551752492, "grad_norm": 1.1760528087615967, "learning_rate": 3.181746057582111e-06, "loss": 0.7124, "step": 12257 }, { "epoch": 0.6299722479185939, "grad_norm": 1.2016164064407349, "learning_rate": 3.1809708086917956e-06, "loss": 0.7082, "step": 12258 }, { "epoch": 0.6300236406619385, "grad_norm": 1.0883525609970093, "learning_rate": 3.1801956101992726e-06, "loss": 0.7478, "step": 12259 }, { "epoch": 0.6300750334052831, "grad_norm": 1.0923559665679932, "learning_rate": 3.179420462126019e-06, "loss": 0.7054, "step": 12260 }, { "epoch": 0.6301264261486278, "grad_norm": 1.0236154794692993, "learning_rate": 3.1786453644935133e-06, "loss": 0.7072, "step": 12261 }, { "epoch": 0.6301778188919724, "grad_norm": 1.0006366968154907, "learning_rate": 3.177870317323226e-06, "loss": 0.7259, "step": 12262 }, { "epoch": 0.6302292116353171, "grad_norm": 0.9935770034790039, "learning_rate": 3.177095320636633e-06, "loss": 0.6984, "step": 12263 }, { "epoch": 0.6302806043786617, "grad_norm": 1.1000913381576538, "learning_rate": 3.1763203744552073e-06, "loss": 0.7252, "step": 12264 }, { "epoch": 0.6303319971220064, "grad_norm": 1.060997724533081, "learning_rate": 3.1755454788004164e-06, "loss": 0.7047, "step": 12265 }, { "epoch": 0.630383389865351, "grad_norm": 1.0514204502105713, "learning_rate": 3.174770633693733e-06, "loss": 0.7194, "step": 12266 }, { "epoch": 0.6304347826086957, "grad_norm": 1.1313573122024536, "learning_rate": 3.173995839156622e-06, "loss": 0.7213, "step": 12267 }, { "epoch": 0.6304861753520403, "grad_norm": 0.7685599327087402, "learning_rate": 3.1732210952105507e-06, "loss": 0.6586, "step": 12268 }, { "epoch": 0.630537568095385, "grad_norm": 1.104049563407898, "learning_rate": 3.172446401876984e-06, "loss": 0.7338, "step": 12269 }, { "epoch": 0.6305889608387296, "grad_norm": 0.7544822096824646, "learning_rate": 3.1716717591773875e-06, "loss": 0.678, "step": 12270 }, { "epoch": 0.6306403535820743, "grad_norm": 1.0510345697402954, "learning_rate": 3.17089716713322e-06, "loss": 0.6371, "step": 12271 }, { "epoch": 0.6306917463254188, "grad_norm": 1.0860135555267334, "learning_rate": 3.1701226257659433e-06, "loss": 0.7147, "step": 12272 }, { "epoch": 0.6307431390687634, "grad_norm": 0.7083735466003418, "learning_rate": 3.169348135097018e-06, "loss": 0.6583, "step": 12273 }, { "epoch": 0.6307945318121081, "grad_norm": 1.051897644996643, "learning_rate": 3.1685736951479017e-06, "loss": 0.7145, "step": 12274 }, { "epoch": 0.6308459245554527, "grad_norm": 0.7621341943740845, "learning_rate": 3.1677993059400493e-06, "loss": 0.6613, "step": 12275 }, { "epoch": 0.6308973172987974, "grad_norm": 1.1170969009399414, "learning_rate": 3.1670249674949167e-06, "loss": 0.7488, "step": 12276 }, { "epoch": 0.630948710042142, "grad_norm": 1.0387990474700928, "learning_rate": 3.1662506798339586e-06, "loss": 0.7166, "step": 12277 }, { "epoch": 0.6310001027854867, "grad_norm": 1.0262210369110107, "learning_rate": 3.1654764429786266e-06, "loss": 0.7043, "step": 12278 }, { "epoch": 0.6310514955288313, "grad_norm": 1.1153687238693237, "learning_rate": 3.1647022569503715e-06, "loss": 0.712, "step": 12279 }, { "epoch": 0.631102888272176, "grad_norm": 1.086102843284607, "learning_rate": 3.163928121770645e-06, "loss": 0.6968, "step": 12280 }, { "epoch": 0.6311542810155206, "grad_norm": 0.7293244004249573, "learning_rate": 3.1631540374608915e-06, "loss": 0.6465, "step": 12281 }, { "epoch": 0.6312056737588653, "grad_norm": 1.1921792030334473, "learning_rate": 3.1623800040425595e-06, "loss": 0.7396, "step": 12282 }, { "epoch": 0.6312570665022099, "grad_norm": 1.1262049674987793, "learning_rate": 3.161606021537096e-06, "loss": 0.7765, "step": 12283 }, { "epoch": 0.6313084592455546, "grad_norm": 1.0663120746612549, "learning_rate": 3.1608320899659404e-06, "loss": 0.679, "step": 12284 }, { "epoch": 0.6313598519888992, "grad_norm": 1.103186845779419, "learning_rate": 3.160058209350538e-06, "loss": 0.7311, "step": 12285 }, { "epoch": 0.6314112447322439, "grad_norm": 1.039108395576477, "learning_rate": 3.159284379712332e-06, "loss": 0.6872, "step": 12286 }, { "epoch": 0.6314626374755884, "grad_norm": 1.0961426496505737, "learning_rate": 3.1585106010727573e-06, "loss": 0.7149, "step": 12287 }, { "epoch": 0.631514030218933, "grad_norm": 1.1371374130249023, "learning_rate": 3.157736873453254e-06, "loss": 0.6906, "step": 12288 }, { "epoch": 0.6315654229622777, "grad_norm": 1.0854449272155762, "learning_rate": 3.1569631968752624e-06, "loss": 0.7419, "step": 12289 }, { "epoch": 0.6316168157056223, "grad_norm": 0.7221952676773071, "learning_rate": 3.156189571360213e-06, "loss": 0.6479, "step": 12290 }, { "epoch": 0.631668208448967, "grad_norm": 1.178221344947815, "learning_rate": 3.155415996929541e-06, "loss": 0.7276, "step": 12291 }, { "epoch": 0.6317196011923116, "grad_norm": 1.1213458776474, "learning_rate": 3.15464247360468e-06, "loss": 0.7578, "step": 12292 }, { "epoch": 0.6317709939356563, "grad_norm": 1.0248063802719116, "learning_rate": 3.153869001407062e-06, "loss": 0.6945, "step": 12293 }, { "epoch": 0.6318223866790009, "grad_norm": 1.091252088546753, "learning_rate": 3.153095580358114e-06, "loss": 0.7276, "step": 12294 }, { "epoch": 0.6318737794223456, "grad_norm": 1.045844554901123, "learning_rate": 3.1523222104792662e-06, "loss": 0.7373, "step": 12295 }, { "epoch": 0.6319251721656902, "grad_norm": 0.9801787734031677, "learning_rate": 3.151548891791945e-06, "loss": 0.6685, "step": 12296 }, { "epoch": 0.6319765649090349, "grad_norm": 0.9949115514755249, "learning_rate": 3.1507756243175756e-06, "loss": 0.6437, "step": 12297 }, { "epoch": 0.6320279576523795, "grad_norm": 1.0982470512390137, "learning_rate": 3.150002408077582e-06, "loss": 0.7014, "step": 12298 }, { "epoch": 0.6320793503957242, "grad_norm": 1.1212944984436035, "learning_rate": 3.149229243093389e-06, "loss": 0.735, "step": 12299 }, { "epoch": 0.6321307431390688, "grad_norm": 1.0576363801956177, "learning_rate": 3.1484561293864147e-06, "loss": 0.7263, "step": 12300 }, { "epoch": 0.6321821358824135, "grad_norm": 1.025613784790039, "learning_rate": 3.14768306697808e-06, "loss": 0.7059, "step": 12301 }, { "epoch": 0.632233528625758, "grad_norm": 0.7626006007194519, "learning_rate": 3.146910055889805e-06, "loss": 0.6293, "step": 12302 }, { "epoch": 0.6322849213691026, "grad_norm": 1.1101508140563965, "learning_rate": 3.146137096143004e-06, "loss": 0.6968, "step": 12303 }, { "epoch": 0.6323363141124473, "grad_norm": 1.0245519876480103, "learning_rate": 3.1453641877590925e-06, "loss": 0.6852, "step": 12304 }, { "epoch": 0.6323877068557919, "grad_norm": 1.0685632228851318, "learning_rate": 3.144591330759487e-06, "loss": 0.77, "step": 12305 }, { "epoch": 0.6324390995991366, "grad_norm": 1.1563650369644165, "learning_rate": 3.1438185251655984e-06, "loss": 0.713, "step": 12306 }, { "epoch": 0.6324904923424812, "grad_norm": 0.8291066884994507, "learning_rate": 3.1430457709988382e-06, "loss": 0.6822, "step": 12307 }, { "epoch": 0.6325418850858259, "grad_norm": 1.0921175479888916, "learning_rate": 3.142273068280616e-06, "loss": 0.7633, "step": 12308 }, { "epoch": 0.6325932778291705, "grad_norm": 1.0751779079437256, "learning_rate": 3.1415004170323436e-06, "loss": 0.7016, "step": 12309 }, { "epoch": 0.6326446705725152, "grad_norm": 1.0517675876617432, "learning_rate": 3.1407278172754224e-06, "loss": 0.6748, "step": 12310 }, { "epoch": 0.6326960633158598, "grad_norm": 0.849648654460907, "learning_rate": 3.139955269031261e-06, "loss": 0.6555, "step": 12311 }, { "epoch": 0.6327474560592045, "grad_norm": 1.071668028831482, "learning_rate": 3.1391827723212653e-06, "loss": 0.7138, "step": 12312 }, { "epoch": 0.6327988488025491, "grad_norm": 1.1235990524291992, "learning_rate": 3.138410327166833e-06, "loss": 0.7671, "step": 12313 }, { "epoch": 0.6328502415458938, "grad_norm": 1.0338060855865479, "learning_rate": 3.137637933589369e-06, "loss": 0.7044, "step": 12314 }, { "epoch": 0.6329016342892384, "grad_norm": 1.044932246208191, "learning_rate": 3.136865591610273e-06, "loss": 0.6394, "step": 12315 }, { "epoch": 0.632953027032583, "grad_norm": 1.049924373626709, "learning_rate": 3.136093301250942e-06, "loss": 0.74, "step": 12316 }, { "epoch": 0.6330044197759276, "grad_norm": 1.0535130500793457, "learning_rate": 3.1353210625327747e-06, "loss": 0.6791, "step": 12317 }, { "epoch": 0.6330558125192722, "grad_norm": 1.1084398031234741, "learning_rate": 3.134548875477165e-06, "loss": 0.7722, "step": 12318 }, { "epoch": 0.6331072052626169, "grad_norm": 1.0418457984924316, "learning_rate": 3.1337767401055076e-06, "loss": 0.6999, "step": 12319 }, { "epoch": 0.6331585980059615, "grad_norm": 1.0596643686294556, "learning_rate": 3.133004656439195e-06, "loss": 0.6816, "step": 12320 }, { "epoch": 0.6332099907493062, "grad_norm": 1.0481611490249634, "learning_rate": 3.1322326244996203e-06, "loss": 0.6726, "step": 12321 }, { "epoch": 0.6332613834926508, "grad_norm": 1.1281737089157104, "learning_rate": 3.13146064430817e-06, "loss": 0.684, "step": 12322 }, { "epoch": 0.6333127762359955, "grad_norm": 1.089752435684204, "learning_rate": 3.1306887158862347e-06, "loss": 0.6844, "step": 12323 }, { "epoch": 0.6333641689793401, "grad_norm": 1.1281038522720337, "learning_rate": 3.1299168392552005e-06, "loss": 0.6907, "step": 12324 }, { "epoch": 0.6334155617226848, "grad_norm": 1.0633339881896973, "learning_rate": 3.1291450144364556e-06, "loss": 0.7193, "step": 12325 }, { "epoch": 0.6334669544660294, "grad_norm": 1.1600886583328247, "learning_rate": 3.128373241451379e-06, "loss": 0.6896, "step": 12326 }, { "epoch": 0.633518347209374, "grad_norm": 1.0765910148620605, "learning_rate": 3.127601520321357e-06, "loss": 0.743, "step": 12327 }, { "epoch": 0.6335697399527187, "grad_norm": 0.7219448089599609, "learning_rate": 3.126829851067771e-06, "loss": 0.6429, "step": 12328 }, { "epoch": 0.6336211326960633, "grad_norm": 1.0843355655670166, "learning_rate": 3.126058233711998e-06, "loss": 0.7596, "step": 12329 }, { "epoch": 0.633672525439408, "grad_norm": 1.1019511222839355, "learning_rate": 3.1252866682754177e-06, "loss": 0.7249, "step": 12330 }, { "epoch": 0.6337239181827526, "grad_norm": 1.0878782272338867, "learning_rate": 3.1245151547794105e-06, "loss": 0.7444, "step": 12331 }, { "epoch": 0.6337753109260973, "grad_norm": 1.104278802871704, "learning_rate": 3.1237436932453463e-06, "loss": 0.7305, "step": 12332 }, { "epoch": 0.6338267036694418, "grad_norm": 1.122657299041748, "learning_rate": 3.1229722836946013e-06, "loss": 0.704, "step": 12333 }, { "epoch": 0.6338780964127865, "grad_norm": 0.7560040950775146, "learning_rate": 3.122200926148551e-06, "loss": 0.7078, "step": 12334 }, { "epoch": 0.6339294891561311, "grad_norm": 1.0697526931762695, "learning_rate": 3.121429620628561e-06, "loss": 0.6981, "step": 12335 }, { "epoch": 0.6339808818994758, "grad_norm": 1.084586262702942, "learning_rate": 3.120658367156004e-06, "loss": 0.7386, "step": 12336 }, { "epoch": 0.6340322746428204, "grad_norm": 1.1103061437606812, "learning_rate": 3.119887165752249e-06, "loss": 0.7629, "step": 12337 }, { "epoch": 0.6340836673861651, "grad_norm": 1.0343531370162964, "learning_rate": 3.119116016438661e-06, "loss": 0.7086, "step": 12338 }, { "epoch": 0.6341350601295097, "grad_norm": 1.0980199575424194, "learning_rate": 3.1183449192366054e-06, "loss": 0.6996, "step": 12339 }, { "epoch": 0.6341864528728544, "grad_norm": 1.092248558998108, "learning_rate": 3.117573874167448e-06, "loss": 0.7276, "step": 12340 }, { "epoch": 0.634237845616199, "grad_norm": 1.055367112159729, "learning_rate": 3.116802881252552e-06, "loss": 0.7339, "step": 12341 }, { "epoch": 0.6342892383595437, "grad_norm": 1.0575984716415405, "learning_rate": 3.116031940513275e-06, "loss": 0.7191, "step": 12342 }, { "epoch": 0.6343406311028883, "grad_norm": 1.083843469619751, "learning_rate": 3.115261051970978e-06, "loss": 0.7353, "step": 12343 }, { "epoch": 0.6343920238462329, "grad_norm": 1.0618653297424316, "learning_rate": 3.1144902156470215e-06, "loss": 0.6522, "step": 12344 }, { "epoch": 0.6344434165895776, "grad_norm": 1.0892795324325562, "learning_rate": 3.113719431562757e-06, "loss": 0.6793, "step": 12345 }, { "epoch": 0.6344948093329222, "grad_norm": 1.1061733961105347, "learning_rate": 3.112948699739544e-06, "loss": 0.7193, "step": 12346 }, { "epoch": 0.6345462020762669, "grad_norm": 0.6837403178215027, "learning_rate": 3.1121780201987363e-06, "loss": 0.6627, "step": 12347 }, { "epoch": 0.6345975948196114, "grad_norm": 1.0572859048843384, "learning_rate": 3.1114073929616828e-06, "loss": 0.7062, "step": 12348 }, { "epoch": 0.6346489875629561, "grad_norm": 0.7688980102539062, "learning_rate": 3.110636818049738e-06, "loss": 0.6447, "step": 12349 }, { "epoch": 0.6347003803063007, "grad_norm": 1.0971711874008179, "learning_rate": 3.1098662954842516e-06, "loss": 0.7202, "step": 12350 }, { "epoch": 0.6347517730496454, "grad_norm": 1.0794202089309692, "learning_rate": 3.1090958252865678e-06, "loss": 0.6876, "step": 12351 }, { "epoch": 0.63480316579299, "grad_norm": 0.7854575514793396, "learning_rate": 3.1083254074780356e-06, "loss": 0.6513, "step": 12352 }, { "epoch": 0.6348545585363347, "grad_norm": 1.058131456375122, "learning_rate": 3.107555042079999e-06, "loss": 0.744, "step": 12353 }, { "epoch": 0.6349059512796793, "grad_norm": 1.0140466690063477, "learning_rate": 3.106784729113806e-06, "loss": 0.6823, "step": 12354 }, { "epoch": 0.634957344023024, "grad_norm": 1.037102460861206, "learning_rate": 3.1060144686007924e-06, "loss": 0.6974, "step": 12355 }, { "epoch": 0.6350087367663686, "grad_norm": 1.0821844339370728, "learning_rate": 3.1052442605623024e-06, "loss": 0.7417, "step": 12356 }, { "epoch": 0.6350601295097132, "grad_norm": 0.7502889037132263, "learning_rate": 3.1044741050196754e-06, "loss": 0.6611, "step": 12357 }, { "epoch": 0.6351115222530579, "grad_norm": 1.0920366048812866, "learning_rate": 3.1037040019942477e-06, "loss": 0.7342, "step": 12358 }, { "epoch": 0.6351629149964025, "grad_norm": 1.055985450744629, "learning_rate": 3.1029339515073566e-06, "loss": 0.7065, "step": 12359 }, { "epoch": 0.6352143077397472, "grad_norm": 1.0918570756912231, "learning_rate": 3.1021639535803384e-06, "loss": 0.6966, "step": 12360 }, { "epoch": 0.6352657004830918, "grad_norm": 1.1720868349075317, "learning_rate": 3.101394008234524e-06, "loss": 0.7501, "step": 12361 }, { "epoch": 0.6353170932264365, "grad_norm": 1.07143235206604, "learning_rate": 3.100624115491246e-06, "loss": 0.7181, "step": 12362 }, { "epoch": 0.635368485969781, "grad_norm": 1.1465545892715454, "learning_rate": 3.0998542753718373e-06, "loss": 0.7531, "step": 12363 }, { "epoch": 0.6354198787131257, "grad_norm": 1.1917341947555542, "learning_rate": 3.099084487897624e-06, "loss": 0.7211, "step": 12364 }, { "epoch": 0.6354712714564703, "grad_norm": 0.9880145788192749, "learning_rate": 3.0983147530899342e-06, "loss": 0.6872, "step": 12365 }, { "epoch": 0.635522664199815, "grad_norm": 1.0699357986450195, "learning_rate": 3.097545070970096e-06, "loss": 0.7496, "step": 12366 }, { "epoch": 0.6355740569431596, "grad_norm": 1.0874959230422974, "learning_rate": 3.096775441559432e-06, "loss": 0.7314, "step": 12367 }, { "epoch": 0.6356254496865043, "grad_norm": 1.0823090076446533, "learning_rate": 3.0960058648792674e-06, "loss": 0.7366, "step": 12368 }, { "epoch": 0.6356768424298489, "grad_norm": 1.1026524305343628, "learning_rate": 3.095236340950922e-06, "loss": 0.7375, "step": 12369 }, { "epoch": 0.6357282351731935, "grad_norm": 1.057992935180664, "learning_rate": 3.094466869795718e-06, "loss": 0.7029, "step": 12370 }, { "epoch": 0.6357796279165382, "grad_norm": 1.031686782836914, "learning_rate": 3.093697451434973e-06, "loss": 0.6818, "step": 12371 }, { "epoch": 0.6358310206598828, "grad_norm": 1.0793780088424683, "learning_rate": 3.0929280858900035e-06, "loss": 0.7, "step": 12372 }, { "epoch": 0.6358824134032275, "grad_norm": 1.1831921339035034, "learning_rate": 3.092158773182129e-06, "loss": 0.6919, "step": 12373 }, { "epoch": 0.6359338061465721, "grad_norm": 1.093214988708496, "learning_rate": 3.0913895133326605e-06, "loss": 0.704, "step": 12374 }, { "epoch": 0.6359851988899168, "grad_norm": 1.0415292978286743, "learning_rate": 3.0906203063629116e-06, "loss": 0.7256, "step": 12375 }, { "epoch": 0.6360365916332614, "grad_norm": 1.0699998140335083, "learning_rate": 3.0898511522941967e-06, "loss": 0.6724, "step": 12376 }, { "epoch": 0.6360879843766061, "grad_norm": 1.642176628112793, "learning_rate": 3.0890820511478216e-06, "loss": 0.6798, "step": 12377 }, { "epoch": 0.6361393771199506, "grad_norm": 0.8243094086647034, "learning_rate": 3.088313002945097e-06, "loss": 0.6767, "step": 12378 }, { "epoch": 0.6361907698632953, "grad_norm": 1.0829601287841797, "learning_rate": 3.087544007707331e-06, "loss": 0.7699, "step": 12379 }, { "epoch": 0.6362421626066399, "grad_norm": 1.046200156211853, "learning_rate": 3.086775065455827e-06, "loss": 0.7258, "step": 12380 }, { "epoch": 0.6362935553499846, "grad_norm": 1.100565791130066, "learning_rate": 3.086006176211891e-06, "loss": 0.7918, "step": 12381 }, { "epoch": 0.6363449480933292, "grad_norm": 1.0411874055862427, "learning_rate": 3.085237339996827e-06, "loss": 0.6958, "step": 12382 }, { "epoch": 0.6363963408366738, "grad_norm": 1.1154569387435913, "learning_rate": 3.0844685568319326e-06, "loss": 0.7152, "step": 12383 }, { "epoch": 0.6364477335800185, "grad_norm": 1.148619532585144, "learning_rate": 3.08369982673851e-06, "loss": 0.6937, "step": 12384 }, { "epoch": 0.6364991263233631, "grad_norm": 1.121852993965149, "learning_rate": 3.0829311497378567e-06, "loss": 0.8027, "step": 12385 }, { "epoch": 0.6365505190667078, "grad_norm": 1.1024125814437866, "learning_rate": 3.0821625258512717e-06, "loss": 0.6982, "step": 12386 }, { "epoch": 0.6366019118100524, "grad_norm": 1.1549736261367798, "learning_rate": 3.0813939551000473e-06, "loss": 0.7525, "step": 12387 }, { "epoch": 0.6366533045533971, "grad_norm": 0.7429879307746887, "learning_rate": 3.080625437505478e-06, "loss": 0.6756, "step": 12388 }, { "epoch": 0.6367046972967417, "grad_norm": 1.0803486108779907, "learning_rate": 3.0798569730888584e-06, "loss": 0.6759, "step": 12389 }, { "epoch": 0.6367560900400864, "grad_norm": 0.711780309677124, "learning_rate": 3.0790885618714773e-06, "loss": 0.6577, "step": 12390 }, { "epoch": 0.636807482783431, "grad_norm": 1.0022251605987549, "learning_rate": 3.0783202038746246e-06, "loss": 0.6691, "step": 12391 }, { "epoch": 0.6368588755267757, "grad_norm": 1.1240925788879395, "learning_rate": 3.0775518991195905e-06, "loss": 0.7406, "step": 12392 }, { "epoch": 0.6369102682701202, "grad_norm": 1.0533243417739868, "learning_rate": 3.076783647627659e-06, "loss": 0.725, "step": 12393 }, { "epoch": 0.6369616610134649, "grad_norm": 1.2586220502853394, "learning_rate": 3.0760154494201155e-06, "loss": 0.6959, "step": 12394 }, { "epoch": 0.6370130537568095, "grad_norm": 0.7132500410079956, "learning_rate": 3.075247304518245e-06, "loss": 0.6668, "step": 12395 }, { "epoch": 0.6370644465001541, "grad_norm": 1.1156556606292725, "learning_rate": 3.0744792129433282e-06, "loss": 0.7514, "step": 12396 }, { "epoch": 0.6371158392434988, "grad_norm": 1.101128101348877, "learning_rate": 3.0737111747166458e-06, "loss": 0.7287, "step": 12397 }, { "epoch": 0.6371672319868434, "grad_norm": 1.0426234006881714, "learning_rate": 3.072943189859478e-06, "loss": 0.6906, "step": 12398 }, { "epoch": 0.6372186247301881, "grad_norm": 1.116647481918335, "learning_rate": 3.072175258393101e-06, "loss": 0.713, "step": 12399 }, { "epoch": 0.6372700174735327, "grad_norm": 1.0076799392700195, "learning_rate": 3.0714073803387925e-06, "loss": 0.7029, "step": 12400 }, { "epoch": 0.6373214102168774, "grad_norm": 1.0805941820144653, "learning_rate": 3.070639555717826e-06, "loss": 0.7455, "step": 12401 }, { "epoch": 0.637372802960222, "grad_norm": 0.8590505719184875, "learning_rate": 3.069871784551478e-06, "loss": 0.6603, "step": 12402 }, { "epoch": 0.6374241957035667, "grad_norm": 0.7081646919250488, "learning_rate": 3.069104066861015e-06, "loss": 0.6401, "step": 12403 }, { "epoch": 0.6374755884469113, "grad_norm": 1.0418975353240967, "learning_rate": 3.06833640266771e-06, "loss": 0.7051, "step": 12404 }, { "epoch": 0.637526981190256, "grad_norm": 1.1633358001708984, "learning_rate": 3.0675687919928345e-06, "loss": 0.7013, "step": 12405 }, { "epoch": 0.6375783739336006, "grad_norm": 1.263550877571106, "learning_rate": 3.0668012348576505e-06, "loss": 0.6992, "step": 12406 }, { "epoch": 0.6376297666769453, "grad_norm": 1.0553500652313232, "learning_rate": 3.0660337312834266e-06, "loss": 0.7252, "step": 12407 }, { "epoch": 0.6376811594202898, "grad_norm": 1.0750170946121216, "learning_rate": 3.065266281291428e-06, "loss": 0.7128, "step": 12408 }, { "epoch": 0.6377325521636344, "grad_norm": 1.058333158493042, "learning_rate": 3.0644988849029158e-06, "loss": 0.7188, "step": 12409 }, { "epoch": 0.6377839449069791, "grad_norm": 1.1271239519119263, "learning_rate": 3.063731542139152e-06, "loss": 0.6908, "step": 12410 }, { "epoch": 0.6378353376503237, "grad_norm": 1.1228219270706177, "learning_rate": 3.0629642530213986e-06, "loss": 0.7446, "step": 12411 }, { "epoch": 0.6378867303936684, "grad_norm": 1.0554416179656982, "learning_rate": 3.0621970175709105e-06, "loss": 0.7341, "step": 12412 }, { "epoch": 0.637938123137013, "grad_norm": 0.7691937685012817, "learning_rate": 3.0614298358089456e-06, "loss": 0.6275, "step": 12413 }, { "epoch": 0.6379895158803577, "grad_norm": 1.3259146213531494, "learning_rate": 3.060662707756762e-06, "loss": 0.6843, "step": 12414 }, { "epoch": 0.6380409086237023, "grad_norm": 0.6934236884117126, "learning_rate": 3.05989563343561e-06, "loss": 0.6542, "step": 12415 }, { "epoch": 0.638092301367047, "grad_norm": 1.0731662511825562, "learning_rate": 3.059128612866743e-06, "loss": 0.7318, "step": 12416 }, { "epoch": 0.6381436941103916, "grad_norm": 0.8232048153877258, "learning_rate": 3.0583616460714127e-06, "loss": 0.6152, "step": 12417 }, { "epoch": 0.6381950868537363, "grad_norm": 0.7083306908607483, "learning_rate": 3.05759473307087e-06, "loss": 0.6502, "step": 12418 }, { "epoch": 0.6382464795970809, "grad_norm": 1.112864375114441, "learning_rate": 3.0568278738863614e-06, "loss": 0.7173, "step": 12419 }, { "epoch": 0.6382978723404256, "grad_norm": 1.088829517364502, "learning_rate": 3.0560610685391323e-06, "loss": 0.7119, "step": 12420 }, { "epoch": 0.6383492650837702, "grad_norm": 1.0680503845214844, "learning_rate": 3.05529431705043e-06, "loss": 0.691, "step": 12421 }, { "epoch": 0.6384006578271149, "grad_norm": 1.0511410236358643, "learning_rate": 3.0545276194414952e-06, "loss": 0.6856, "step": 12422 }, { "epoch": 0.6384520505704595, "grad_norm": 1.0105831623077393, "learning_rate": 3.0537609757335723e-06, "loss": 0.6536, "step": 12423 }, { "epoch": 0.638503443313804, "grad_norm": 0.7362377047538757, "learning_rate": 3.052994385947903e-06, "loss": 0.668, "step": 12424 }, { "epoch": 0.6385548360571487, "grad_norm": 1.0641356706619263, "learning_rate": 3.052227850105722e-06, "loss": 0.6692, "step": 12425 }, { "epoch": 0.6386062288004933, "grad_norm": 1.0279804468154907, "learning_rate": 3.0514613682282703e-06, "loss": 0.7333, "step": 12426 }, { "epoch": 0.638657621543838, "grad_norm": 1.1566846370697021, "learning_rate": 3.0506949403367834e-06, "loss": 0.7814, "step": 12427 }, { "epoch": 0.6387090142871826, "grad_norm": 0.8539129495620728, "learning_rate": 3.0499285664524946e-06, "loss": 0.6488, "step": 12428 }, { "epoch": 0.6387604070305273, "grad_norm": 1.1233267784118652, "learning_rate": 3.0491622465966373e-06, "loss": 0.7209, "step": 12429 }, { "epoch": 0.6388117997738719, "grad_norm": 1.0695856809616089, "learning_rate": 3.0483959807904445e-06, "loss": 0.7237, "step": 12430 }, { "epoch": 0.6388631925172166, "grad_norm": 1.0778601169586182, "learning_rate": 3.047629769055144e-06, "loss": 0.679, "step": 12431 }, { "epoch": 0.6389145852605612, "grad_norm": 0.7820619940757751, "learning_rate": 3.0468636114119666e-06, "loss": 0.6504, "step": 12432 }, { "epoch": 0.6389659780039059, "grad_norm": 1.0276966094970703, "learning_rate": 3.0460975078821377e-06, "loss": 0.7486, "step": 12433 }, { "epoch": 0.6390173707472505, "grad_norm": 0.6749736070632935, "learning_rate": 3.0453314584868854e-06, "loss": 0.6514, "step": 12434 }, { "epoch": 0.6390687634905952, "grad_norm": 1.045594334602356, "learning_rate": 3.044565463247431e-06, "loss": 0.6868, "step": 12435 }, { "epoch": 0.6391201562339398, "grad_norm": 1.1094048023223877, "learning_rate": 3.043799522184997e-06, "loss": 0.7656, "step": 12436 }, { "epoch": 0.6391715489772845, "grad_norm": 0.7875816822052002, "learning_rate": 3.043033635320808e-06, "loss": 0.6507, "step": 12437 }, { "epoch": 0.6392229417206291, "grad_norm": 0.7932091355323792, "learning_rate": 3.0422678026760784e-06, "loss": 0.648, "step": 12438 }, { "epoch": 0.6392743344639736, "grad_norm": 0.8026290535926819, "learning_rate": 3.0415020242720294e-06, "loss": 0.6833, "step": 12439 }, { "epoch": 0.6393257272073183, "grad_norm": 1.044281005859375, "learning_rate": 3.040736300129877e-06, "loss": 0.7642, "step": 12440 }, { "epoch": 0.6393771199506629, "grad_norm": 1.0879682302474976, "learning_rate": 3.0399706302708363e-06, "loss": 0.7336, "step": 12441 }, { "epoch": 0.6394285126940076, "grad_norm": 1.1501655578613281, "learning_rate": 3.0392050147161213e-06, "loss": 0.6896, "step": 12442 }, { "epoch": 0.6394799054373522, "grad_norm": 0.7306084036827087, "learning_rate": 3.0384394534869455e-06, "loss": 0.6706, "step": 12443 }, { "epoch": 0.6395312981806969, "grad_norm": 1.0372000932693481, "learning_rate": 3.0376739466045146e-06, "loss": 0.6945, "step": 12444 }, { "epoch": 0.6395826909240415, "grad_norm": 0.7529664635658264, "learning_rate": 3.036908494090042e-06, "loss": 0.6171, "step": 12445 }, { "epoch": 0.6396340836673862, "grad_norm": 1.092300534248352, "learning_rate": 3.0361430959647343e-06, "loss": 0.6768, "step": 12446 }, { "epoch": 0.6396854764107308, "grad_norm": 1.0681698322296143, "learning_rate": 3.0353777522497963e-06, "loss": 0.7008, "step": 12447 }, { "epoch": 0.6397368691540755, "grad_norm": 1.0579118728637695, "learning_rate": 3.0346124629664326e-06, "loss": 0.7294, "step": 12448 }, { "epoch": 0.6397882618974201, "grad_norm": 0.7365854978561401, "learning_rate": 3.0338472281358466e-06, "loss": 0.6854, "step": 12449 }, { "epoch": 0.6398396546407648, "grad_norm": 1.1051244735717773, "learning_rate": 3.0330820477792424e-06, "loss": 0.7384, "step": 12450 }, { "epoch": 0.6398910473841094, "grad_norm": 0.9895092844963074, "learning_rate": 3.032316921917815e-06, "loss": 0.7111, "step": 12451 }, { "epoch": 0.639942440127454, "grad_norm": 1.0925488471984863, "learning_rate": 3.0315518505727666e-06, "loss": 0.7365, "step": 12452 }, { "epoch": 0.6399938328707987, "grad_norm": 1.0687555074691772, "learning_rate": 3.0307868337652945e-06, "loss": 0.724, "step": 12453 }, { "epoch": 0.6400452256141432, "grad_norm": 1.127245306968689, "learning_rate": 3.0300218715165915e-06, "loss": 0.6994, "step": 12454 }, { "epoch": 0.6400966183574879, "grad_norm": 1.0680683851242065, "learning_rate": 3.029256963847852e-06, "loss": 0.7035, "step": 12455 }, { "epoch": 0.6401480111008325, "grad_norm": 0.766851544380188, "learning_rate": 3.028492110780272e-06, "loss": 0.656, "step": 12456 }, { "epoch": 0.6401994038441772, "grad_norm": 1.0638370513916016, "learning_rate": 3.027727312335037e-06, "loss": 0.7782, "step": 12457 }, { "epoch": 0.6402507965875218, "grad_norm": 1.026733636856079, "learning_rate": 3.02696256853334e-06, "loss": 0.679, "step": 12458 }, { "epoch": 0.6403021893308665, "grad_norm": 1.1674253940582275, "learning_rate": 3.026197879396368e-06, "loss": 0.73, "step": 12459 }, { "epoch": 0.6403535820742111, "grad_norm": 1.1570357084274292, "learning_rate": 3.025433244945307e-06, "loss": 0.7426, "step": 12460 }, { "epoch": 0.6404049748175558, "grad_norm": 1.0358844995498657, "learning_rate": 3.0246686652013425e-06, "loss": 0.7259, "step": 12461 }, { "epoch": 0.6404563675609004, "grad_norm": 1.0972083806991577, "learning_rate": 3.0239041401856583e-06, "loss": 0.6965, "step": 12462 }, { "epoch": 0.640507760304245, "grad_norm": 1.1043134927749634, "learning_rate": 3.023139669919435e-06, "loss": 0.6854, "step": 12463 }, { "epoch": 0.6405591530475897, "grad_norm": 0.754643976688385, "learning_rate": 3.022375254423852e-06, "loss": 0.6492, "step": 12464 }, { "epoch": 0.6406105457909343, "grad_norm": 1.0815545320510864, "learning_rate": 3.0216108937200907e-06, "loss": 0.688, "step": 12465 }, { "epoch": 0.640661938534279, "grad_norm": 1.1285542249679565, "learning_rate": 3.0208465878293287e-06, "loss": 0.7192, "step": 12466 }, { "epoch": 0.6407133312776236, "grad_norm": 0.7049675583839417, "learning_rate": 3.0200823367727384e-06, "loss": 0.6679, "step": 12467 }, { "epoch": 0.6407647240209683, "grad_norm": 0.7776630520820618, "learning_rate": 3.0193181405714954e-06, "loss": 0.6275, "step": 12468 }, { "epoch": 0.6408161167643128, "grad_norm": 1.077939748764038, "learning_rate": 3.018553999246775e-06, "loss": 0.7349, "step": 12469 }, { "epoch": 0.6408675095076575, "grad_norm": 1.1270390748977661, "learning_rate": 3.0177899128197453e-06, "loss": 0.7272, "step": 12470 }, { "epoch": 0.6409189022510021, "grad_norm": 1.1545878648757935, "learning_rate": 3.017025881311576e-06, "loss": 0.6845, "step": 12471 }, { "epoch": 0.6409702949943468, "grad_norm": 0.7495307922363281, "learning_rate": 3.0162619047434384e-06, "loss": 0.6457, "step": 12472 }, { "epoch": 0.6410216877376914, "grad_norm": 1.1431505680084229, "learning_rate": 3.015497983136495e-06, "loss": 0.7267, "step": 12473 }, { "epoch": 0.6410730804810361, "grad_norm": 1.0917288064956665, "learning_rate": 3.014734116511913e-06, "loss": 0.7772, "step": 12474 }, { "epoch": 0.6411244732243807, "grad_norm": 1.1838593482971191, "learning_rate": 3.0139703048908586e-06, "loss": 0.7052, "step": 12475 }, { "epoch": 0.6411758659677254, "grad_norm": 1.0653914213180542, "learning_rate": 3.013206548294488e-06, "loss": 0.7166, "step": 12476 }, { "epoch": 0.64122725871107, "grad_norm": 1.0956439971923828, "learning_rate": 3.012442846743966e-06, "loss": 0.7143, "step": 12477 }, { "epoch": 0.6412786514544146, "grad_norm": 1.0114266872406006, "learning_rate": 3.0116792002604506e-06, "loss": 0.679, "step": 12478 }, { "epoch": 0.6413300441977593, "grad_norm": 0.7221886515617371, "learning_rate": 3.010915608865101e-06, "loss": 0.6654, "step": 12479 }, { "epoch": 0.6413814369411039, "grad_norm": 1.06960129737854, "learning_rate": 3.0101520725790688e-06, "loss": 0.7487, "step": 12480 }, { "epoch": 0.6414328296844486, "grad_norm": 0.7439162135124207, "learning_rate": 3.009388591423512e-06, "loss": 0.6432, "step": 12481 }, { "epoch": 0.6414842224277932, "grad_norm": 1.0023384094238281, "learning_rate": 3.008625165419583e-06, "loss": 0.6473, "step": 12482 }, { "epoch": 0.6415356151711379, "grad_norm": 1.0683302879333496, "learning_rate": 3.0078617945884314e-06, "loss": 0.6876, "step": 12483 }, { "epoch": 0.6415870079144824, "grad_norm": 1.1591871976852417, "learning_rate": 3.0070984789512093e-06, "loss": 0.7096, "step": 12484 }, { "epoch": 0.6416384006578271, "grad_norm": 1.1107230186462402, "learning_rate": 3.0063352185290656e-06, "loss": 0.7589, "step": 12485 }, { "epoch": 0.6416897934011717, "grad_norm": 1.0828583240509033, "learning_rate": 3.0055720133431437e-06, "loss": 0.6317, "step": 12486 }, { "epoch": 0.6417411861445164, "grad_norm": 0.9825509786605835, "learning_rate": 3.004808863414591e-06, "loss": 0.6697, "step": 12487 }, { "epoch": 0.641792578887861, "grad_norm": 1.1393312215805054, "learning_rate": 3.004045768764553e-06, "loss": 0.7427, "step": 12488 }, { "epoch": 0.6418439716312057, "grad_norm": 1.051935076713562, "learning_rate": 3.0032827294141674e-06, "loss": 0.7303, "step": 12489 }, { "epoch": 0.6418953643745503, "grad_norm": 1.01982581615448, "learning_rate": 3.002519745384578e-06, "loss": 0.7243, "step": 12490 }, { "epoch": 0.641946757117895, "grad_norm": 0.7550032734870911, "learning_rate": 3.001756816696924e-06, "loss": 0.6956, "step": 12491 }, { "epoch": 0.6419981498612396, "grad_norm": 0.8016767501831055, "learning_rate": 3.000993943372341e-06, "loss": 0.6447, "step": 12492 }, { "epoch": 0.6420495426045842, "grad_norm": 1.0741279125213623, "learning_rate": 3.000231125431967e-06, "loss": 0.6926, "step": 12493 }, { "epoch": 0.6421009353479289, "grad_norm": 1.014305830001831, "learning_rate": 2.9994683628969364e-06, "loss": 0.7456, "step": 12494 }, { "epoch": 0.6421523280912735, "grad_norm": 0.7242282629013062, "learning_rate": 2.998705655788383e-06, "loss": 0.6611, "step": 12495 }, { "epoch": 0.6422037208346182, "grad_norm": 0.9902424812316895, "learning_rate": 2.9979430041274347e-06, "loss": 0.6154, "step": 12496 }, { "epoch": 0.6422551135779628, "grad_norm": 1.0947375297546387, "learning_rate": 2.9971804079352244e-06, "loss": 0.7472, "step": 12497 }, { "epoch": 0.6423065063213075, "grad_norm": 0.7447814345359802, "learning_rate": 2.996417867232882e-06, "loss": 0.6592, "step": 12498 }, { "epoch": 0.642357899064652, "grad_norm": 1.1309776306152344, "learning_rate": 2.99565538204153e-06, "loss": 0.7527, "step": 12499 }, { "epoch": 0.6424092918079967, "grad_norm": 1.1105777025222778, "learning_rate": 2.9948929523822958e-06, "loss": 0.7291, "step": 12500 }, { "epoch": 0.6424606845513413, "grad_norm": 1.088119387626648, "learning_rate": 2.9941305782763043e-06, "loss": 0.7514, "step": 12501 }, { "epoch": 0.642512077294686, "grad_norm": 0.7007126808166504, "learning_rate": 2.9933682597446755e-06, "loss": 0.6344, "step": 12502 }, { "epoch": 0.6425634700380306, "grad_norm": 1.0505273342132568, "learning_rate": 2.992605996808532e-06, "loss": 0.704, "step": 12503 }, { "epoch": 0.6426148627813753, "grad_norm": 1.147643804550171, "learning_rate": 2.991843789488994e-06, "loss": 0.7022, "step": 12504 }, { "epoch": 0.6426662555247199, "grad_norm": 1.2377270460128784, "learning_rate": 2.9910816378071748e-06, "loss": 0.7445, "step": 12505 }, { "epoch": 0.6427176482680645, "grad_norm": 1.0696444511413574, "learning_rate": 2.990319541784194e-06, "loss": 0.7159, "step": 12506 }, { "epoch": 0.6427690410114092, "grad_norm": 1.0263750553131104, "learning_rate": 2.989557501441167e-06, "loss": 0.7014, "step": 12507 }, { "epoch": 0.6428204337547538, "grad_norm": 1.7530046701431274, "learning_rate": 2.9887955167992026e-06, "loss": 0.755, "step": 12508 }, { "epoch": 0.6428718264980985, "grad_norm": 0.7299738526344299, "learning_rate": 2.9880335878794154e-06, "loss": 0.6656, "step": 12509 }, { "epoch": 0.6429232192414431, "grad_norm": 1.14931321144104, "learning_rate": 2.987271714702914e-06, "loss": 0.7172, "step": 12510 }, { "epoch": 0.6429746119847878, "grad_norm": 1.04103422164917, "learning_rate": 2.9865098972908084e-06, "loss": 0.7124, "step": 12511 }, { "epoch": 0.6430260047281324, "grad_norm": 3.011867046356201, "learning_rate": 2.9857481356642037e-06, "loss": 0.7174, "step": 12512 }, { "epoch": 0.6430773974714771, "grad_norm": 1.0644625425338745, "learning_rate": 2.9849864298442056e-06, "loss": 0.7024, "step": 12513 }, { "epoch": 0.6431287902148217, "grad_norm": 1.0423341989517212, "learning_rate": 2.98422477985192e-06, "loss": 0.6921, "step": 12514 }, { "epoch": 0.6431801829581663, "grad_norm": 1.0194381475448608, "learning_rate": 2.983463185708445e-06, "loss": 0.7109, "step": 12515 }, { "epoch": 0.6432315757015109, "grad_norm": 1.1233144998550415, "learning_rate": 2.9827016474348836e-06, "loss": 0.7522, "step": 12516 }, { "epoch": 0.6432829684448556, "grad_norm": 1.178581953048706, "learning_rate": 2.981940165052337e-06, "loss": 0.7534, "step": 12517 }, { "epoch": 0.6433343611882002, "grad_norm": 1.147438406944275, "learning_rate": 2.981178738581898e-06, "loss": 0.6313, "step": 12518 }, { "epoch": 0.6433857539315448, "grad_norm": 1.0982946157455444, "learning_rate": 2.9804173680446647e-06, "loss": 0.7372, "step": 12519 }, { "epoch": 0.6434371466748895, "grad_norm": 1.0677376985549927, "learning_rate": 2.9796560534617335e-06, "loss": 0.6836, "step": 12520 }, { "epoch": 0.6434885394182341, "grad_norm": 0.813614547252655, "learning_rate": 2.9788947948541945e-06, "loss": 0.6607, "step": 12521 }, { "epoch": 0.6435399321615788, "grad_norm": 1.112439513206482, "learning_rate": 2.9781335922431398e-06, "loss": 0.6763, "step": 12522 }, { "epoch": 0.6435913249049234, "grad_norm": 1.152753233909607, "learning_rate": 2.977372445649661e-06, "loss": 0.6748, "step": 12523 }, { "epoch": 0.6436427176482681, "grad_norm": 1.1023285388946533, "learning_rate": 2.9766113550948434e-06, "loss": 0.6853, "step": 12524 }, { "epoch": 0.6436941103916127, "grad_norm": 1.0749114751815796, "learning_rate": 2.9758503205997747e-06, "loss": 0.7356, "step": 12525 }, { "epoch": 0.6437455031349574, "grad_norm": 1.0768135786056519, "learning_rate": 2.975089342185541e-06, "loss": 0.7214, "step": 12526 }, { "epoch": 0.643796895878302, "grad_norm": 1.0707271099090576, "learning_rate": 2.974328419873227e-06, "loss": 0.7271, "step": 12527 }, { "epoch": 0.6438482886216467, "grad_norm": 0.9974445104598999, "learning_rate": 2.9735675536839115e-06, "loss": 0.6618, "step": 12528 }, { "epoch": 0.6438996813649913, "grad_norm": 1.0249018669128418, "learning_rate": 2.972806743638676e-06, "loss": 0.6411, "step": 12529 }, { "epoch": 0.6439510741083359, "grad_norm": 1.0546780824661255, "learning_rate": 2.972045989758602e-06, "loss": 0.6922, "step": 12530 }, { "epoch": 0.6440024668516805, "grad_norm": 1.1408554315567017, "learning_rate": 2.9712852920647627e-06, "loss": 0.7116, "step": 12531 }, { "epoch": 0.6440538595950251, "grad_norm": 0.7988154888153076, "learning_rate": 2.970524650578236e-06, "loss": 0.639, "step": 12532 }, { "epoch": 0.6441052523383698, "grad_norm": 1.0956555604934692, "learning_rate": 2.9697640653200975e-06, "loss": 0.7277, "step": 12533 }, { "epoch": 0.6441566450817144, "grad_norm": 1.0431976318359375, "learning_rate": 2.9690035363114167e-06, "loss": 0.7722, "step": 12534 }, { "epoch": 0.6442080378250591, "grad_norm": 1.1353570222854614, "learning_rate": 2.9682430635732675e-06, "loss": 0.7007, "step": 12535 }, { "epoch": 0.6442594305684037, "grad_norm": 1.110840916633606, "learning_rate": 2.9674826471267193e-06, "loss": 0.732, "step": 12536 }, { "epoch": 0.6443108233117484, "grad_norm": 1.161787748336792, "learning_rate": 2.966722286992838e-06, "loss": 0.7008, "step": 12537 }, { "epoch": 0.644362216055093, "grad_norm": 1.0624558925628662, "learning_rate": 2.965961983192691e-06, "loss": 0.6562, "step": 12538 }, { "epoch": 0.6444136087984377, "grad_norm": 1.1931118965148926, "learning_rate": 2.9652017357473457e-06, "loss": 0.7242, "step": 12539 }, { "epoch": 0.6444650015417823, "grad_norm": 1.0274269580841064, "learning_rate": 2.964441544677861e-06, "loss": 0.6672, "step": 12540 }, { "epoch": 0.644516394285127, "grad_norm": 1.0350313186645508, "learning_rate": 2.963681410005301e-06, "loss": 0.7218, "step": 12541 }, { "epoch": 0.6445677870284716, "grad_norm": 1.0855530500411987, "learning_rate": 2.9629213317507253e-06, "loss": 0.7169, "step": 12542 }, { "epoch": 0.6446191797718163, "grad_norm": 1.0388375520706177, "learning_rate": 2.962161309935194e-06, "loss": 0.7569, "step": 12543 }, { "epoch": 0.6446705725151609, "grad_norm": 1.0432623624801636, "learning_rate": 2.961401344579763e-06, "loss": 0.6506, "step": 12544 }, { "epoch": 0.6447219652585054, "grad_norm": 1.0363191366195679, "learning_rate": 2.960641435705487e-06, "loss": 0.7355, "step": 12545 }, { "epoch": 0.6447733580018501, "grad_norm": 1.0218799114227295, "learning_rate": 2.959881583333423e-06, "loss": 0.6851, "step": 12546 }, { "epoch": 0.6448247507451947, "grad_norm": 0.9656689763069153, "learning_rate": 2.9591217874846203e-06, "loss": 0.6606, "step": 12547 }, { "epoch": 0.6448761434885394, "grad_norm": 0.7550825476646423, "learning_rate": 2.958362048180129e-06, "loss": 0.6459, "step": 12548 }, { "epoch": 0.644927536231884, "grad_norm": 1.096889615058899, "learning_rate": 2.9576023654410038e-06, "loss": 0.6929, "step": 12549 }, { "epoch": 0.6449789289752287, "grad_norm": 1.1097761392593384, "learning_rate": 2.9568427392882846e-06, "loss": 0.7067, "step": 12550 }, { "epoch": 0.6450303217185733, "grad_norm": 1.0830873250961304, "learning_rate": 2.9560831697430226e-06, "loss": 0.6463, "step": 12551 }, { "epoch": 0.645081714461918, "grad_norm": 1.0513182878494263, "learning_rate": 2.955323656826262e-06, "loss": 0.66, "step": 12552 }, { "epoch": 0.6451331072052626, "grad_norm": 1.1426093578338623, "learning_rate": 2.9545642005590445e-06, "loss": 0.7581, "step": 12553 }, { "epoch": 0.6451844999486073, "grad_norm": 1.0884987115859985, "learning_rate": 2.9538048009624116e-06, "loss": 0.6804, "step": 12554 }, { "epoch": 0.6452358926919519, "grad_norm": 1.0375171899795532, "learning_rate": 2.9530454580574054e-06, "loss": 0.7424, "step": 12555 }, { "epoch": 0.6452872854352966, "grad_norm": 1.057641863822937, "learning_rate": 2.9522861718650608e-06, "loss": 0.744, "step": 12556 }, { "epoch": 0.6453386781786412, "grad_norm": 1.0870685577392578, "learning_rate": 2.9515269424064154e-06, "loss": 0.7206, "step": 12557 }, { "epoch": 0.6453900709219859, "grad_norm": 0.7162367105484009, "learning_rate": 2.950767769702505e-06, "loss": 0.6276, "step": 12558 }, { "epoch": 0.6454414636653305, "grad_norm": 1.1099445819854736, "learning_rate": 2.950008653774365e-06, "loss": 0.733, "step": 12559 }, { "epoch": 0.645492856408675, "grad_norm": 1.1124504804611206, "learning_rate": 2.949249594643023e-06, "loss": 0.7223, "step": 12560 }, { "epoch": 0.6455442491520197, "grad_norm": 1.0755809545516968, "learning_rate": 2.9484905923295127e-06, "loss": 0.6696, "step": 12561 }, { "epoch": 0.6455956418953643, "grad_norm": 1.26218843460083, "learning_rate": 2.947731646854862e-06, "loss": 0.7385, "step": 12562 }, { "epoch": 0.645647034638709, "grad_norm": 1.0858018398284912, "learning_rate": 2.9469727582400982e-06, "loss": 0.6856, "step": 12563 }, { "epoch": 0.6456984273820536, "grad_norm": 1.1529483795166016, "learning_rate": 2.9462139265062464e-06, "loss": 0.7054, "step": 12564 }, { "epoch": 0.6457498201253983, "grad_norm": 1.126605749130249, "learning_rate": 2.945455151674333e-06, "loss": 0.6943, "step": 12565 }, { "epoch": 0.6458012128687429, "grad_norm": 1.055630087852478, "learning_rate": 2.9446964337653773e-06, "loss": 0.7636, "step": 12566 }, { "epoch": 0.6458526056120876, "grad_norm": 1.1092755794525146, "learning_rate": 2.943937772800401e-06, "loss": 0.6737, "step": 12567 }, { "epoch": 0.6459039983554322, "grad_norm": 0.8738458156585693, "learning_rate": 2.9431791688004265e-06, "loss": 0.6607, "step": 12568 }, { "epoch": 0.6459553910987769, "grad_norm": 1.0223608016967773, "learning_rate": 2.942420621786467e-06, "loss": 0.6911, "step": 12569 }, { "epoch": 0.6460067838421215, "grad_norm": 1.0747071504592896, "learning_rate": 2.9416621317795415e-06, "loss": 0.7405, "step": 12570 }, { "epoch": 0.6460581765854662, "grad_norm": 1.1457273960113525, "learning_rate": 2.9409036988006644e-06, "loss": 0.7817, "step": 12571 }, { "epoch": 0.6461095693288108, "grad_norm": 1.0779527425765991, "learning_rate": 2.940145322870848e-06, "loss": 0.6641, "step": 12572 }, { "epoch": 0.6461609620721555, "grad_norm": 0.7959700226783752, "learning_rate": 2.939387004011105e-06, "loss": 0.6562, "step": 12573 }, { "epoch": 0.6462123548155001, "grad_norm": 1.1567611694335938, "learning_rate": 2.9386287422424433e-06, "loss": 0.7105, "step": 12574 }, { "epoch": 0.6462637475588446, "grad_norm": 1.070190668106079, "learning_rate": 2.9378705375858737e-06, "loss": 0.747, "step": 12575 }, { "epoch": 0.6463151403021893, "grad_norm": 1.1524614095687866, "learning_rate": 2.9371123900623997e-06, "loss": 0.6661, "step": 12576 }, { "epoch": 0.6463665330455339, "grad_norm": 1.08661949634552, "learning_rate": 2.9363542996930295e-06, "loss": 0.6878, "step": 12577 }, { "epoch": 0.6464179257888786, "grad_norm": 1.0304526090621948, "learning_rate": 2.935596266498767e-06, "loss": 0.6876, "step": 12578 }, { "epoch": 0.6464693185322232, "grad_norm": 1.019436240196228, "learning_rate": 2.9348382905006113e-06, "loss": 0.6979, "step": 12579 }, { "epoch": 0.6465207112755679, "grad_norm": 1.0169875621795654, "learning_rate": 2.934080371719563e-06, "loss": 0.6683, "step": 12580 }, { "epoch": 0.6465721040189125, "grad_norm": 1.217195987701416, "learning_rate": 2.933322510176625e-06, "loss": 0.7245, "step": 12581 }, { "epoch": 0.6466234967622572, "grad_norm": 1.0641175508499146, "learning_rate": 2.9325647058927887e-06, "loss": 0.7536, "step": 12582 }, { "epoch": 0.6466748895056018, "grad_norm": 1.08638334274292, "learning_rate": 2.931806958889053e-06, "loss": 0.7252, "step": 12583 }, { "epoch": 0.6467262822489465, "grad_norm": 1.148267149925232, "learning_rate": 2.9310492691864128e-06, "loss": 0.7402, "step": 12584 }, { "epoch": 0.6467776749922911, "grad_norm": 1.1270256042480469, "learning_rate": 2.930291636805858e-06, "loss": 0.7209, "step": 12585 }, { "epoch": 0.6468290677356358, "grad_norm": 0.8185450434684753, "learning_rate": 2.929534061768381e-06, "loss": 0.6661, "step": 12586 }, { "epoch": 0.6468804604789804, "grad_norm": 1.0447026491165161, "learning_rate": 2.9287765440949705e-06, "loss": 0.6838, "step": 12587 }, { "epoch": 0.646931853222325, "grad_norm": 1.0965949296951294, "learning_rate": 2.928019083806617e-06, "loss": 0.7017, "step": 12588 }, { "epoch": 0.6469832459656697, "grad_norm": 1.1301207542419434, "learning_rate": 2.9272616809243016e-06, "loss": 0.7186, "step": 12589 }, { "epoch": 0.6470346387090143, "grad_norm": 1.1941970586776733, "learning_rate": 2.9265043354690115e-06, "loss": 0.7307, "step": 12590 }, { "epoch": 0.6470860314523589, "grad_norm": 1.1365361213684082, "learning_rate": 2.9257470474617315e-06, "loss": 0.764, "step": 12591 }, { "epoch": 0.6471374241957035, "grad_norm": 0.9992626905441284, "learning_rate": 2.9249898169234394e-06, "loss": 0.6982, "step": 12592 }, { "epoch": 0.6471888169390482, "grad_norm": 1.0347284078598022, "learning_rate": 2.9242326438751157e-06, "loss": 0.6901, "step": 12593 }, { "epoch": 0.6472402096823928, "grad_norm": 0.7505594491958618, "learning_rate": 2.92347552833774e-06, "loss": 0.7026, "step": 12594 }, { "epoch": 0.6472916024257375, "grad_norm": 1.029219150543213, "learning_rate": 2.922718470332288e-06, "loss": 0.7281, "step": 12595 }, { "epoch": 0.6473429951690821, "grad_norm": 1.0583431720733643, "learning_rate": 2.921961469879734e-06, "loss": 0.6953, "step": 12596 }, { "epoch": 0.6473943879124268, "grad_norm": 1.0568609237670898, "learning_rate": 2.9212045270010547e-06, "loss": 0.7541, "step": 12597 }, { "epoch": 0.6474457806557714, "grad_norm": 1.0321409702301025, "learning_rate": 2.9204476417172166e-06, "loss": 0.7146, "step": 12598 }, { "epoch": 0.647497173399116, "grad_norm": 1.0842598676681519, "learning_rate": 2.9196908140491932e-06, "loss": 0.734, "step": 12599 }, { "epoch": 0.6475485661424607, "grad_norm": 1.086569905281067, "learning_rate": 2.918934044017954e-06, "loss": 0.6725, "step": 12600 }, { "epoch": 0.6475999588858053, "grad_norm": 0.9834355711936951, "learning_rate": 2.9181773316444615e-06, "loss": 0.6445, "step": 12601 }, { "epoch": 0.64765135162915, "grad_norm": 1.111777901649475, "learning_rate": 2.917420676949686e-06, "loss": 0.7441, "step": 12602 }, { "epoch": 0.6477027443724946, "grad_norm": 1.1209532022476196, "learning_rate": 2.9166640799545877e-06, "loss": 0.7463, "step": 12603 }, { "epoch": 0.6477541371158393, "grad_norm": 0.7377808094024658, "learning_rate": 2.915907540680134e-06, "loss": 0.6779, "step": 12604 }, { "epoch": 0.6478055298591839, "grad_norm": 1.082658052444458, "learning_rate": 2.9151510591472775e-06, "loss": 0.6818, "step": 12605 }, { "epoch": 0.6478569226025285, "grad_norm": 1.0856460332870483, "learning_rate": 2.9143946353769836e-06, "loss": 0.7181, "step": 12606 }, { "epoch": 0.6479083153458731, "grad_norm": 1.0511581897735596, "learning_rate": 2.9136382693902075e-06, "loss": 0.7147, "step": 12607 }, { "epoch": 0.6479597080892178, "grad_norm": 0.964617133140564, "learning_rate": 2.9128819612079053e-06, "loss": 0.6952, "step": 12608 }, { "epoch": 0.6480111008325624, "grad_norm": 0.8149796724319458, "learning_rate": 2.912125710851029e-06, "loss": 0.6626, "step": 12609 }, { "epoch": 0.6480624935759071, "grad_norm": 1.1061389446258545, "learning_rate": 2.911369518340535e-06, "loss": 0.6908, "step": 12610 }, { "epoch": 0.6481138863192517, "grad_norm": 1.0958303213119507, "learning_rate": 2.910613383697372e-06, "loss": 0.7164, "step": 12611 }, { "epoch": 0.6481652790625964, "grad_norm": 1.093487024307251, "learning_rate": 2.9098573069424883e-06, "loss": 0.7427, "step": 12612 }, { "epoch": 0.648216671805941, "grad_norm": 1.2232463359832764, "learning_rate": 2.9091012880968373e-06, "loss": 0.7975, "step": 12613 }, { "epoch": 0.6482680645492856, "grad_norm": 1.111070990562439, "learning_rate": 2.908345327181356e-06, "loss": 0.7467, "step": 12614 }, { "epoch": 0.6483194572926303, "grad_norm": 1.0774874687194824, "learning_rate": 2.907589424216997e-06, "loss": 0.6995, "step": 12615 }, { "epoch": 0.6483708500359749, "grad_norm": 1.1049230098724365, "learning_rate": 2.9068335792246994e-06, "loss": 0.7411, "step": 12616 }, { "epoch": 0.6484222427793196, "grad_norm": 0.788194477558136, "learning_rate": 2.9060777922254055e-06, "loss": 0.6591, "step": 12617 }, { "epoch": 0.6484736355226642, "grad_norm": 1.0172597169876099, "learning_rate": 2.905322063240053e-06, "loss": 0.6648, "step": 12618 }, { "epoch": 0.6485250282660089, "grad_norm": 1.1682467460632324, "learning_rate": 2.904566392289584e-06, "loss": 0.7104, "step": 12619 }, { "epoch": 0.6485764210093535, "grad_norm": 0.7147257328033447, "learning_rate": 2.903810779394933e-06, "loss": 0.6399, "step": 12620 }, { "epoch": 0.6486278137526981, "grad_norm": 1.100781798362732, "learning_rate": 2.9030552245770323e-06, "loss": 0.7132, "step": 12621 }, { "epoch": 0.6486792064960427, "grad_norm": 1.0393102169036865, "learning_rate": 2.90229972785682e-06, "loss": 0.7223, "step": 12622 }, { "epoch": 0.6487305992393874, "grad_norm": 1.0879110097885132, "learning_rate": 2.901544289255226e-06, "loss": 0.6582, "step": 12623 }, { "epoch": 0.648781991982732, "grad_norm": 0.9889287352561951, "learning_rate": 2.9007889087931797e-06, "loss": 0.6635, "step": 12624 }, { "epoch": 0.6488333847260767, "grad_norm": 0.9613938927650452, "learning_rate": 2.900033586491607e-06, "loss": 0.6914, "step": 12625 }, { "epoch": 0.6488847774694213, "grad_norm": 1.1242367029190063, "learning_rate": 2.899278322371443e-06, "loss": 0.7101, "step": 12626 }, { "epoch": 0.648936170212766, "grad_norm": 1.062567114830017, "learning_rate": 2.898523116453602e-06, "loss": 0.7377, "step": 12627 }, { "epoch": 0.6489875629561106, "grad_norm": 1.0578736066818237, "learning_rate": 2.897767968759016e-06, "loss": 0.6649, "step": 12628 }, { "epoch": 0.6490389556994552, "grad_norm": 0.8728432059288025, "learning_rate": 2.897012879308604e-06, "loss": 0.6708, "step": 12629 }, { "epoch": 0.6490903484427999, "grad_norm": 1.1333264112472534, "learning_rate": 2.896257848123285e-06, "loss": 0.7075, "step": 12630 }, { "epoch": 0.6491417411861445, "grad_norm": 1.0440527200698853, "learning_rate": 2.895502875223981e-06, "loss": 0.6932, "step": 12631 }, { "epoch": 0.6491931339294892, "grad_norm": 1.0709819793701172, "learning_rate": 2.8947479606316074e-06, "loss": 0.7166, "step": 12632 }, { "epoch": 0.6492445266728338, "grad_norm": 1.0816556215286255, "learning_rate": 2.8939931043670805e-06, "loss": 0.727, "step": 12633 }, { "epoch": 0.6492959194161785, "grad_norm": 1.0810855627059937, "learning_rate": 2.8932383064513114e-06, "loss": 0.7162, "step": 12634 }, { "epoch": 0.6493473121595231, "grad_norm": 1.0960279703140259, "learning_rate": 2.8924835669052166e-06, "loss": 0.675, "step": 12635 }, { "epoch": 0.6493987049028677, "grad_norm": 1.1212399005889893, "learning_rate": 2.891728885749705e-06, "loss": 0.7252, "step": 12636 }, { "epoch": 0.6494500976462123, "grad_norm": 1.1157004833221436, "learning_rate": 2.890974263005686e-06, "loss": 0.7112, "step": 12637 }, { "epoch": 0.649501490389557, "grad_norm": 1.114310622215271, "learning_rate": 2.8902196986940647e-06, "loss": 0.6579, "step": 12638 }, { "epoch": 0.6495528831329016, "grad_norm": 0.6990789771080017, "learning_rate": 2.889465192835751e-06, "loss": 0.6431, "step": 12639 }, { "epoch": 0.6496042758762463, "grad_norm": 1.1571526527404785, "learning_rate": 2.8887107454516467e-06, "loss": 0.7152, "step": 12640 }, { "epoch": 0.6496556686195909, "grad_norm": 0.8259235620498657, "learning_rate": 2.887956356562655e-06, "loss": 0.6688, "step": 12641 }, { "epoch": 0.6497070613629355, "grad_norm": 1.102955937385559, "learning_rate": 2.887202026189677e-06, "loss": 0.8067, "step": 12642 }, { "epoch": 0.6497584541062802, "grad_norm": 1.1036666631698608, "learning_rate": 2.8864477543536094e-06, "loss": 0.7321, "step": 12643 }, { "epoch": 0.6498098468496248, "grad_norm": 1.0310508012771606, "learning_rate": 2.885693541075355e-06, "loss": 0.7342, "step": 12644 }, { "epoch": 0.6498612395929695, "grad_norm": 1.0737802982330322, "learning_rate": 2.884939386375807e-06, "loss": 0.7256, "step": 12645 }, { "epoch": 0.6499126323363141, "grad_norm": 1.4719418287277222, "learning_rate": 2.8841852902758605e-06, "loss": 0.7017, "step": 12646 }, { "epoch": 0.6499640250796588, "grad_norm": 1.1220132112503052, "learning_rate": 2.883431252796406e-06, "loss": 0.7073, "step": 12647 }, { "epoch": 0.6500154178230034, "grad_norm": 1.083936333656311, "learning_rate": 2.8826772739583386e-06, "loss": 0.7129, "step": 12648 }, { "epoch": 0.6500668105663481, "grad_norm": 1.048462986946106, "learning_rate": 2.881923353782547e-06, "loss": 0.6763, "step": 12649 }, { "epoch": 0.6501182033096927, "grad_norm": 0.7035284638404846, "learning_rate": 2.881169492289918e-06, "loss": 0.675, "step": 12650 }, { "epoch": 0.6501695960530373, "grad_norm": 0.8668980598449707, "learning_rate": 2.880415689501337e-06, "loss": 0.6864, "step": 12651 }, { "epoch": 0.6502209887963819, "grad_norm": 1.21080482006073, "learning_rate": 2.8796619454376916e-06, "loss": 0.7665, "step": 12652 }, { "epoch": 0.6502723815397266, "grad_norm": 0.7108464241027832, "learning_rate": 2.8789082601198636e-06, "loss": 0.6643, "step": 12653 }, { "epoch": 0.6503237742830712, "grad_norm": 1.0314874649047852, "learning_rate": 2.8781546335687325e-06, "loss": 0.6364, "step": 12654 }, { "epoch": 0.6503751670264158, "grad_norm": 1.1164394617080688, "learning_rate": 2.877401065805184e-06, "loss": 0.7524, "step": 12655 }, { "epoch": 0.6504265597697605, "grad_norm": 1.0630953311920166, "learning_rate": 2.8766475568500897e-06, "loss": 0.6656, "step": 12656 }, { "epoch": 0.6504779525131051, "grad_norm": 1.0845907926559448, "learning_rate": 2.8758941067243295e-06, "loss": 0.6963, "step": 12657 }, { "epoch": 0.6505293452564498, "grad_norm": 1.0170427560806274, "learning_rate": 2.8751407154487786e-06, "loss": 0.6536, "step": 12658 }, { "epoch": 0.6505807379997944, "grad_norm": 0.7387202978134155, "learning_rate": 2.8743873830443104e-06, "loss": 0.6766, "step": 12659 }, { "epoch": 0.6506321307431391, "grad_norm": 1.1213816404342651, "learning_rate": 2.8736341095317925e-06, "loss": 0.6631, "step": 12660 }, { "epoch": 0.6506835234864837, "grad_norm": 1.0957870483398438, "learning_rate": 2.8728808949321013e-06, "loss": 0.6956, "step": 12661 }, { "epoch": 0.6507349162298284, "grad_norm": 0.9973905682563782, "learning_rate": 2.8721277392661025e-06, "loss": 0.6875, "step": 12662 }, { "epoch": 0.650786308973173, "grad_norm": 1.0251758098602295, "learning_rate": 2.8713746425546608e-06, "loss": 0.6966, "step": 12663 }, { "epoch": 0.6508377017165177, "grad_norm": 1.064182996749878, "learning_rate": 2.870621604818647e-06, "loss": 0.6799, "step": 12664 }, { "epoch": 0.6508890944598623, "grad_norm": 1.0819506645202637, "learning_rate": 2.869868626078917e-06, "loss": 0.6965, "step": 12665 }, { "epoch": 0.6509404872032069, "grad_norm": 1.125706672668457, "learning_rate": 2.869115706356339e-06, "loss": 0.7097, "step": 12666 }, { "epoch": 0.6509918799465515, "grad_norm": 1.1135793924331665, "learning_rate": 2.8683628456717693e-06, "loss": 0.7178, "step": 12667 }, { "epoch": 0.6510432726898961, "grad_norm": 1.0797240734100342, "learning_rate": 2.867610044046073e-06, "loss": 0.7399, "step": 12668 }, { "epoch": 0.6510946654332408, "grad_norm": 1.1318385601043701, "learning_rate": 2.8668573015000976e-06, "loss": 0.7007, "step": 12669 }, { "epoch": 0.6511460581765854, "grad_norm": 0.7572476863861084, "learning_rate": 2.866104618054706e-06, "loss": 0.6308, "step": 12670 }, { "epoch": 0.6511974509199301, "grad_norm": 1.0626306533813477, "learning_rate": 2.8653519937307497e-06, "loss": 0.702, "step": 12671 }, { "epoch": 0.6512488436632747, "grad_norm": 1.0133082866668701, "learning_rate": 2.8645994285490774e-06, "loss": 0.7541, "step": 12672 }, { "epoch": 0.6513002364066194, "grad_norm": 1.0453921556472778, "learning_rate": 2.8638469225305456e-06, "loss": 0.7273, "step": 12673 }, { "epoch": 0.651351629149964, "grad_norm": 1.1614511013031006, "learning_rate": 2.863094475695999e-06, "loss": 0.7755, "step": 12674 }, { "epoch": 0.6514030218933087, "grad_norm": 1.1173795461654663, "learning_rate": 2.8623420880662866e-06, "loss": 0.714, "step": 12675 }, { "epoch": 0.6514544146366533, "grad_norm": 1.1158134937286377, "learning_rate": 2.8615897596622524e-06, "loss": 0.6737, "step": 12676 }, { "epoch": 0.651505807379998, "grad_norm": 1.0734282732009888, "learning_rate": 2.8608374905047436e-06, "loss": 0.7225, "step": 12677 }, { "epoch": 0.6515572001233426, "grad_norm": 1.099980354309082, "learning_rate": 2.8600852806145973e-06, "loss": 0.7148, "step": 12678 }, { "epoch": 0.6516085928666873, "grad_norm": 1.0808162689208984, "learning_rate": 2.859333130012658e-06, "loss": 0.6887, "step": 12679 }, { "epoch": 0.6516599856100319, "grad_norm": 1.070190191268921, "learning_rate": 2.858581038719764e-06, "loss": 0.7285, "step": 12680 }, { "epoch": 0.6517113783533766, "grad_norm": 1.080062985420227, "learning_rate": 2.857829006756751e-06, "loss": 0.7193, "step": 12681 }, { "epoch": 0.6517627710967211, "grad_norm": 0.8259374499320984, "learning_rate": 2.8570770341444577e-06, "loss": 0.6061, "step": 12682 }, { "epoch": 0.6518141638400657, "grad_norm": 1.0425503253936768, "learning_rate": 2.856325120903714e-06, "loss": 0.7194, "step": 12683 }, { "epoch": 0.6518655565834104, "grad_norm": 1.0406701564788818, "learning_rate": 2.8555732670553595e-06, "loss": 0.6595, "step": 12684 }, { "epoch": 0.651916949326755, "grad_norm": 1.2220526933670044, "learning_rate": 2.8548214726202155e-06, "loss": 0.7472, "step": 12685 }, { "epoch": 0.6519683420700997, "grad_norm": 1.0990723371505737, "learning_rate": 2.8540697376191185e-06, "loss": 0.732, "step": 12686 }, { "epoch": 0.6520197348134443, "grad_norm": 0.6881179809570312, "learning_rate": 2.853318062072893e-06, "loss": 0.6395, "step": 12687 }, { "epoch": 0.652071127556789, "grad_norm": 1.0097739696502686, "learning_rate": 2.852566446002365e-06, "loss": 0.6971, "step": 12688 }, { "epoch": 0.6521225203001336, "grad_norm": 1.1167685985565186, "learning_rate": 2.851814889428357e-06, "loss": 0.6819, "step": 12689 }, { "epoch": 0.6521739130434783, "grad_norm": 1.043837070465088, "learning_rate": 2.8510633923716957e-06, "loss": 0.7354, "step": 12690 }, { "epoch": 0.6522253057868229, "grad_norm": 1.015903115272522, "learning_rate": 2.850311954853199e-06, "loss": 0.7729, "step": 12691 }, { "epoch": 0.6522766985301676, "grad_norm": 0.6884838938713074, "learning_rate": 2.8495605768936874e-06, "loss": 0.6754, "step": 12692 }, { "epoch": 0.6523280912735122, "grad_norm": 1.109500527381897, "learning_rate": 2.848809258513977e-06, "loss": 0.704, "step": 12693 }, { "epoch": 0.6523794840168569, "grad_norm": 0.7294967770576477, "learning_rate": 2.8480579997348833e-06, "loss": 0.6779, "step": 12694 }, { "epoch": 0.6524308767602015, "grad_norm": 1.0306261777877808, "learning_rate": 2.847306800577224e-06, "loss": 0.659, "step": 12695 }, { "epoch": 0.6524822695035462, "grad_norm": 0.763022243976593, "learning_rate": 2.8465556610618097e-06, "loss": 0.6796, "step": 12696 }, { "epoch": 0.6525336622468907, "grad_norm": 1.0255707502365112, "learning_rate": 2.845804581209451e-06, "loss": 0.7457, "step": 12697 }, { "epoch": 0.6525850549902353, "grad_norm": 1.1152048110961914, "learning_rate": 2.845053561040956e-06, "loss": 0.7014, "step": 12698 }, { "epoch": 0.65263644773358, "grad_norm": 1.005722999572754, "learning_rate": 2.8443026005771345e-06, "loss": 0.6791, "step": 12699 }, { "epoch": 0.6526878404769246, "grad_norm": 1.0054409503936768, "learning_rate": 2.843551699838793e-06, "loss": 0.6602, "step": 12700 }, { "epoch": 0.6527392332202693, "grad_norm": 1.078627586364746, "learning_rate": 2.842800858846735e-06, "loss": 0.755, "step": 12701 }, { "epoch": 0.6527906259636139, "grad_norm": 1.286373257637024, "learning_rate": 2.8420500776217602e-06, "loss": 0.6785, "step": 12702 }, { "epoch": 0.6528420187069586, "grad_norm": 1.0865894556045532, "learning_rate": 2.8412993561846748e-06, "loss": 0.7048, "step": 12703 }, { "epoch": 0.6528934114503032, "grad_norm": 1.0086597204208374, "learning_rate": 2.8405486945562753e-06, "loss": 0.6798, "step": 12704 }, { "epoch": 0.6529448041936479, "grad_norm": 1.008651852607727, "learning_rate": 2.8397980927573586e-06, "loss": 0.7053, "step": 12705 }, { "epoch": 0.6529961969369925, "grad_norm": 1.0774078369140625, "learning_rate": 2.839047550808726e-06, "loss": 0.6775, "step": 12706 }, { "epoch": 0.6530475896803372, "grad_norm": 1.0694278478622437, "learning_rate": 2.838297068731164e-06, "loss": 0.7183, "step": 12707 }, { "epoch": 0.6530989824236818, "grad_norm": 1.178061842918396, "learning_rate": 2.8375466465454714e-06, "loss": 0.8103, "step": 12708 }, { "epoch": 0.6531503751670265, "grad_norm": 1.1364883184432983, "learning_rate": 2.8367962842724373e-06, "loss": 0.7279, "step": 12709 }, { "epoch": 0.6532017679103711, "grad_norm": 0.8000428080558777, "learning_rate": 2.8360459819328512e-06, "loss": 0.6941, "step": 12710 }, { "epoch": 0.6532531606537157, "grad_norm": 0.7019063234329224, "learning_rate": 2.835295739547499e-06, "loss": 0.6607, "step": 12711 }, { "epoch": 0.6533045533970603, "grad_norm": 1.083438515663147, "learning_rate": 2.8345455571371703e-06, "loss": 0.6721, "step": 12712 }, { "epoch": 0.6533559461404049, "grad_norm": 0.7948020696640015, "learning_rate": 2.8337954347226483e-06, "loss": 0.6524, "step": 12713 }, { "epoch": 0.6534073388837496, "grad_norm": 1.0776002407073975, "learning_rate": 2.833045372324713e-06, "loss": 0.6914, "step": 12714 }, { "epoch": 0.6534587316270942, "grad_norm": 1.107102394104004, "learning_rate": 2.8322953699641497e-06, "loss": 0.7202, "step": 12715 }, { "epoch": 0.6535101243704389, "grad_norm": 1.0733683109283447, "learning_rate": 2.831545427661737e-06, "loss": 0.7222, "step": 12716 }, { "epoch": 0.6535615171137835, "grad_norm": 1.1022833585739136, "learning_rate": 2.830795545438251e-06, "loss": 0.6851, "step": 12717 }, { "epoch": 0.6536129098571282, "grad_norm": 1.0910674333572388, "learning_rate": 2.8300457233144673e-06, "loss": 0.6692, "step": 12718 }, { "epoch": 0.6536643026004728, "grad_norm": 0.7024424076080322, "learning_rate": 2.829295961311165e-06, "loss": 0.6834, "step": 12719 }, { "epoch": 0.6537156953438175, "grad_norm": 1.022907018661499, "learning_rate": 2.82854625944911e-06, "loss": 0.6712, "step": 12720 }, { "epoch": 0.6537670880871621, "grad_norm": 1.0808684825897217, "learning_rate": 2.827796617749079e-06, "loss": 0.7098, "step": 12721 }, { "epoch": 0.6538184808305068, "grad_norm": 0.7135031223297119, "learning_rate": 2.8270470362318403e-06, "loss": 0.6199, "step": 12722 }, { "epoch": 0.6538698735738514, "grad_norm": 1.0814285278320312, "learning_rate": 2.8262975149181583e-06, "loss": 0.6846, "step": 12723 }, { "epoch": 0.653921266317196, "grad_norm": 1.1053740978240967, "learning_rate": 2.825548053828804e-06, "loss": 0.704, "step": 12724 }, { "epoch": 0.6539726590605407, "grad_norm": 0.8136286735534668, "learning_rate": 2.8247986529845397e-06, "loss": 0.6946, "step": 12725 }, { "epoch": 0.6540240518038853, "grad_norm": 1.059436559677124, "learning_rate": 2.8240493124061274e-06, "loss": 0.6908, "step": 12726 }, { "epoch": 0.6540754445472299, "grad_norm": 1.0118805170059204, "learning_rate": 2.8233000321143277e-06, "loss": 0.6703, "step": 12727 }, { "epoch": 0.6541268372905745, "grad_norm": 1.0381046533584595, "learning_rate": 2.8225508121299037e-06, "loss": 0.7546, "step": 12728 }, { "epoch": 0.6541782300339192, "grad_norm": 1.0622750520706177, "learning_rate": 2.8218016524736103e-06, "loss": 0.6797, "step": 12729 }, { "epoch": 0.6542296227772638, "grad_norm": 0.7641800045967102, "learning_rate": 2.821052553166205e-06, "loss": 0.6752, "step": 12730 }, { "epoch": 0.6542810155206085, "grad_norm": 1.0862160921096802, "learning_rate": 2.8203035142284386e-06, "loss": 0.7362, "step": 12731 }, { "epoch": 0.6543324082639531, "grad_norm": 1.108565092086792, "learning_rate": 2.8195545356810696e-06, "loss": 0.7283, "step": 12732 }, { "epoch": 0.6543838010072978, "grad_norm": 1.0464057922363281, "learning_rate": 2.8188056175448454e-06, "loss": 0.6863, "step": 12733 }, { "epoch": 0.6544351937506424, "grad_norm": 1.1162748336791992, "learning_rate": 2.818056759840515e-06, "loss": 0.7499, "step": 12734 }, { "epoch": 0.654486586493987, "grad_norm": 1.0928720235824585, "learning_rate": 2.817307962588831e-06, "loss": 0.7409, "step": 12735 }, { "epoch": 0.6545379792373317, "grad_norm": 1.075702428817749, "learning_rate": 2.8165592258105322e-06, "loss": 0.6874, "step": 12736 }, { "epoch": 0.6545893719806763, "grad_norm": 1.0422711372375488, "learning_rate": 2.815810549526368e-06, "loss": 0.7177, "step": 12737 }, { "epoch": 0.654640764724021, "grad_norm": 1.2073267698287964, "learning_rate": 2.8150619337570804e-06, "loss": 0.7258, "step": 12738 }, { "epoch": 0.6546921574673656, "grad_norm": 0.8831818103790283, "learning_rate": 2.81431337852341e-06, "loss": 0.6535, "step": 12739 }, { "epoch": 0.6547435502107103, "grad_norm": 1.0626106262207031, "learning_rate": 2.8135648838460943e-06, "loss": 0.7188, "step": 12740 }, { "epoch": 0.6547949429540549, "grad_norm": 0.7242793440818787, "learning_rate": 2.812816449745874e-06, "loss": 0.6719, "step": 12741 }, { "epoch": 0.6548463356973995, "grad_norm": 1.0402781963348389, "learning_rate": 2.812068076243485e-06, "loss": 0.6684, "step": 12742 }, { "epoch": 0.6548977284407441, "grad_norm": 1.0340954065322876, "learning_rate": 2.8113197633596577e-06, "loss": 0.7316, "step": 12743 }, { "epoch": 0.6549491211840888, "grad_norm": 0.9980387091636658, "learning_rate": 2.81057151111513e-06, "loss": 0.6872, "step": 12744 }, { "epoch": 0.6550005139274334, "grad_norm": 1.1070702075958252, "learning_rate": 2.8098233195306303e-06, "loss": 0.7241, "step": 12745 }, { "epoch": 0.6550519066707781, "grad_norm": 1.0863522291183472, "learning_rate": 2.809075188626888e-06, "loss": 0.7213, "step": 12746 }, { "epoch": 0.6551032994141227, "grad_norm": 1.1725856065750122, "learning_rate": 2.8083271184246292e-06, "loss": 0.6979, "step": 12747 }, { "epoch": 0.6551546921574674, "grad_norm": 1.0646820068359375, "learning_rate": 2.807579108944585e-06, "loss": 0.7227, "step": 12748 }, { "epoch": 0.655206084900812, "grad_norm": 1.0303170680999756, "learning_rate": 2.806831160207472e-06, "loss": 0.7246, "step": 12749 }, { "epoch": 0.6552574776441566, "grad_norm": 1.069653034210205, "learning_rate": 2.806083272234019e-06, "loss": 0.7021, "step": 12750 }, { "epoch": 0.6553088703875013, "grad_norm": 1.1190775632858276, "learning_rate": 2.8053354450449455e-06, "loss": 0.747, "step": 12751 }, { "epoch": 0.6553602631308459, "grad_norm": 0.773638129234314, "learning_rate": 2.8045876786609692e-06, "loss": 0.6481, "step": 12752 }, { "epoch": 0.6554116558741906, "grad_norm": 1.0609550476074219, "learning_rate": 2.8038399731028067e-06, "loss": 0.7028, "step": 12753 }, { "epoch": 0.6554630486175352, "grad_norm": 1.0849156379699707, "learning_rate": 2.803092328391177e-06, "loss": 0.7044, "step": 12754 }, { "epoch": 0.6555144413608799, "grad_norm": 0.9928613901138306, "learning_rate": 2.802344744546793e-06, "loss": 0.6482, "step": 12755 }, { "epoch": 0.6555658341042245, "grad_norm": 1.0639619827270508, "learning_rate": 2.8015972215903654e-06, "loss": 0.7092, "step": 12756 }, { "epoch": 0.6556172268475691, "grad_norm": 1.1189416646957397, "learning_rate": 2.8008497595426103e-06, "loss": 0.6428, "step": 12757 }, { "epoch": 0.6556686195909137, "grad_norm": 1.0047690868377686, "learning_rate": 2.8001023584242292e-06, "loss": 0.7003, "step": 12758 }, { "epoch": 0.6557200123342584, "grad_norm": 1.0537421703338623, "learning_rate": 2.7993550182559347e-06, "loss": 0.7732, "step": 12759 }, { "epoch": 0.655771405077603, "grad_norm": 1.0244888067245483, "learning_rate": 2.79860773905843e-06, "loss": 0.6736, "step": 12760 }, { "epoch": 0.6558227978209477, "grad_norm": 1.082221508026123, "learning_rate": 2.7978605208524233e-06, "loss": 0.7416, "step": 12761 }, { "epoch": 0.6558741905642923, "grad_norm": 1.081788182258606, "learning_rate": 2.7971133636586106e-06, "loss": 0.6589, "step": 12762 }, { "epoch": 0.655925583307637, "grad_norm": 1.1062562465667725, "learning_rate": 2.7963662674976977e-06, "loss": 0.7442, "step": 12763 }, { "epoch": 0.6559769760509816, "grad_norm": 0.6969239711761475, "learning_rate": 2.7956192323903812e-06, "loss": 0.706, "step": 12764 }, { "epoch": 0.6560283687943262, "grad_norm": 0.6793100237846375, "learning_rate": 2.7948722583573573e-06, "loss": 0.6496, "step": 12765 }, { "epoch": 0.6560797615376709, "grad_norm": 1.0185930728912354, "learning_rate": 2.794125345419325e-06, "loss": 0.7091, "step": 12766 }, { "epoch": 0.6561311542810155, "grad_norm": 0.7710469365119934, "learning_rate": 2.793378493596976e-06, "loss": 0.6798, "step": 12767 }, { "epoch": 0.6561825470243602, "grad_norm": 1.1846907138824463, "learning_rate": 2.7926317029110027e-06, "loss": 0.7357, "step": 12768 }, { "epoch": 0.6562339397677048, "grad_norm": 1.0201274156570435, "learning_rate": 2.791884973382094e-06, "loss": 0.6732, "step": 12769 }, { "epoch": 0.6562853325110495, "grad_norm": 1.0298786163330078, "learning_rate": 2.7911383050309444e-06, "loss": 0.6943, "step": 12770 }, { "epoch": 0.6563367252543941, "grad_norm": 1.082228422164917, "learning_rate": 2.7903916978782332e-06, "loss": 0.7015, "step": 12771 }, { "epoch": 0.6563881179977388, "grad_norm": 1.0424376726150513, "learning_rate": 2.7896451519446505e-06, "loss": 0.7179, "step": 12772 }, { "epoch": 0.6564395107410833, "grad_norm": 1.0396262407302856, "learning_rate": 2.78889866725088e-06, "loss": 0.7077, "step": 12773 }, { "epoch": 0.656490903484428, "grad_norm": 1.1299337148666382, "learning_rate": 2.7881522438176e-06, "loss": 0.6985, "step": 12774 }, { "epoch": 0.6565422962277726, "grad_norm": 1.1063848733901978, "learning_rate": 2.787405881665496e-06, "loss": 0.7178, "step": 12775 }, { "epoch": 0.6565936889711173, "grad_norm": 1.081787347793579, "learning_rate": 2.786659580815242e-06, "loss": 0.7184, "step": 12776 }, { "epoch": 0.6566450817144619, "grad_norm": 1.0519335269927979, "learning_rate": 2.785913341287522e-06, "loss": 0.7293, "step": 12777 }, { "epoch": 0.6566964744578065, "grad_norm": 1.0808764696121216, "learning_rate": 2.785167163103001e-06, "loss": 0.6821, "step": 12778 }, { "epoch": 0.6567478672011512, "grad_norm": 1.0175098180770874, "learning_rate": 2.784421046282361e-06, "loss": 0.6812, "step": 12779 }, { "epoch": 0.6567992599444958, "grad_norm": 1.0509356260299683, "learning_rate": 2.783674990846269e-06, "loss": 0.7871, "step": 12780 }, { "epoch": 0.6568506526878405, "grad_norm": 1.1779853105545044, "learning_rate": 2.782928996815398e-06, "loss": 0.6582, "step": 12781 }, { "epoch": 0.6569020454311851, "grad_norm": 1.0136502981185913, "learning_rate": 2.782183064210413e-06, "loss": 0.6787, "step": 12782 }, { "epoch": 0.6569534381745298, "grad_norm": 1.0517176389694214, "learning_rate": 2.7814371930519844e-06, "loss": 0.7277, "step": 12783 }, { "epoch": 0.6570048309178744, "grad_norm": 0.7343508005142212, "learning_rate": 2.780691383360777e-06, "loss": 0.6633, "step": 12784 }, { "epoch": 0.6570562236612191, "grad_norm": 1.0693659782409668, "learning_rate": 2.7799456351574493e-06, "loss": 0.6577, "step": 12785 }, { "epoch": 0.6571076164045637, "grad_norm": 1.0741453170776367, "learning_rate": 2.779199948462672e-06, "loss": 0.7059, "step": 12786 }, { "epoch": 0.6571590091479084, "grad_norm": 1.0592615604400635, "learning_rate": 2.778454323297094e-06, "loss": 0.7045, "step": 12787 }, { "epoch": 0.6572104018912529, "grad_norm": 1.047134518623352, "learning_rate": 2.777708759681382e-06, "loss": 0.7427, "step": 12788 }, { "epoch": 0.6572617946345976, "grad_norm": 1.1295169591903687, "learning_rate": 2.776963257636188e-06, "loss": 0.7152, "step": 12789 }, { "epoch": 0.6573131873779422, "grad_norm": 0.6891986131668091, "learning_rate": 2.7762178171821696e-06, "loss": 0.6668, "step": 12790 }, { "epoch": 0.6573645801212868, "grad_norm": 1.05350923538208, "learning_rate": 2.7754724383399763e-06, "loss": 0.6845, "step": 12791 }, { "epoch": 0.6574159728646315, "grad_norm": 1.0742684602737427, "learning_rate": 2.774727121130263e-06, "loss": 0.7061, "step": 12792 }, { "epoch": 0.6574673656079761, "grad_norm": 1.1062641143798828, "learning_rate": 2.7739818655736783e-06, "loss": 0.718, "step": 12793 }, { "epoch": 0.6575187583513208, "grad_norm": 1.099281668663025, "learning_rate": 2.773236671690868e-06, "loss": 0.692, "step": 12794 }, { "epoch": 0.6575701510946654, "grad_norm": 1.0931957960128784, "learning_rate": 2.772491539502482e-06, "loss": 0.7272, "step": 12795 }, { "epoch": 0.6576215438380101, "grad_norm": 1.0395638942718506, "learning_rate": 2.7717464690291633e-06, "loss": 0.6752, "step": 12796 }, { "epoch": 0.6576729365813547, "grad_norm": 1.0390788316726685, "learning_rate": 2.7710014602915547e-06, "loss": 0.7019, "step": 12797 }, { "epoch": 0.6577243293246994, "grad_norm": 1.090287208557129, "learning_rate": 2.7702565133102945e-06, "loss": 0.6396, "step": 12798 }, { "epoch": 0.657775722068044, "grad_norm": 1.0715219974517822, "learning_rate": 2.769511628106029e-06, "loss": 0.7007, "step": 12799 }, { "epoch": 0.6578271148113887, "grad_norm": 1.0754685401916504, "learning_rate": 2.768766804699388e-06, "loss": 0.7201, "step": 12800 }, { "epoch": 0.6578785075547333, "grad_norm": 1.155867576599121, "learning_rate": 2.7680220431110126e-06, "loss": 0.7857, "step": 12801 }, { "epoch": 0.657929900298078, "grad_norm": 1.3431057929992676, "learning_rate": 2.767277343361535e-06, "loss": 0.7104, "step": 12802 }, { "epoch": 0.6579812930414225, "grad_norm": 1.015091061592102, "learning_rate": 2.7665327054715895e-06, "loss": 0.7154, "step": 12803 }, { "epoch": 0.6580326857847671, "grad_norm": 1.0379616022109985, "learning_rate": 2.7657881294618027e-06, "loss": 0.6882, "step": 12804 }, { "epoch": 0.6580840785281118, "grad_norm": 1.0949854850769043, "learning_rate": 2.7650436153528093e-06, "loss": 0.7467, "step": 12805 }, { "epoch": 0.6581354712714564, "grad_norm": 1.1441411972045898, "learning_rate": 2.764299163165235e-06, "loss": 0.6855, "step": 12806 }, { "epoch": 0.6581868640148011, "grad_norm": 1.1006847620010376, "learning_rate": 2.7635547729197015e-06, "loss": 0.7071, "step": 12807 }, { "epoch": 0.6582382567581457, "grad_norm": 0.6870553493499756, "learning_rate": 2.7628104446368386e-06, "loss": 0.6185, "step": 12808 }, { "epoch": 0.6582896495014904, "grad_norm": 1.0310174226760864, "learning_rate": 2.7620661783372653e-06, "loss": 0.7299, "step": 12809 }, { "epoch": 0.658341042244835, "grad_norm": 1.0141171216964722, "learning_rate": 2.761321974041603e-06, "loss": 0.6382, "step": 12810 }, { "epoch": 0.6583924349881797, "grad_norm": 1.0433390140533447, "learning_rate": 2.760577831770469e-06, "loss": 0.7513, "step": 12811 }, { "epoch": 0.6584438277315243, "grad_norm": 1.0822899341583252, "learning_rate": 2.759833751544485e-06, "loss": 0.6576, "step": 12812 }, { "epoch": 0.658495220474869, "grad_norm": 1.085644006729126, "learning_rate": 2.7590897333842594e-06, "loss": 0.7532, "step": 12813 }, { "epoch": 0.6585466132182136, "grad_norm": 1.0528186559677124, "learning_rate": 2.7583457773104116e-06, "loss": 0.6937, "step": 12814 }, { "epoch": 0.6585980059615583, "grad_norm": 1.1516897678375244, "learning_rate": 2.7576018833435513e-06, "loss": 0.6773, "step": 12815 }, { "epoch": 0.6586493987049029, "grad_norm": 1.0743348598480225, "learning_rate": 2.7568580515042876e-06, "loss": 0.6823, "step": 12816 }, { "epoch": 0.6587007914482476, "grad_norm": 1.0742267370224, "learning_rate": 2.756114281813232e-06, "loss": 0.7369, "step": 12817 }, { "epoch": 0.6587521841915921, "grad_norm": 1.1002205610275269, "learning_rate": 2.75537057429099e-06, "loss": 0.7683, "step": 12818 }, { "epoch": 0.6588035769349367, "grad_norm": 1.3030164241790771, "learning_rate": 2.7546269289581655e-06, "loss": 0.7255, "step": 12819 }, { "epoch": 0.6588549696782814, "grad_norm": 0.8345321416854858, "learning_rate": 2.7538833458353613e-06, "loss": 0.6662, "step": 12820 }, { "epoch": 0.658906362421626, "grad_norm": 1.0949599742889404, "learning_rate": 2.753139824943182e-06, "loss": 0.7624, "step": 12821 }, { "epoch": 0.6589577551649707, "grad_norm": 0.7916919589042664, "learning_rate": 2.7523963663022267e-06, "loss": 0.6432, "step": 12822 }, { "epoch": 0.6590091479083153, "grad_norm": 1.0783405303955078, "learning_rate": 2.7516529699330917e-06, "loss": 0.7531, "step": 12823 }, { "epoch": 0.65906054065166, "grad_norm": 1.1259506940841675, "learning_rate": 2.7509096358563737e-06, "loss": 0.7125, "step": 12824 }, { "epoch": 0.6591119333950046, "grad_norm": 1.025465965270996, "learning_rate": 2.7501663640926695e-06, "loss": 0.7083, "step": 12825 }, { "epoch": 0.6591633261383493, "grad_norm": 1.1218931674957275, "learning_rate": 2.749423154662571e-06, "loss": 0.6896, "step": 12826 }, { "epoch": 0.6592147188816939, "grad_norm": 1.0682748556137085, "learning_rate": 2.748680007586667e-06, "loss": 0.6971, "step": 12827 }, { "epoch": 0.6592661116250386, "grad_norm": 1.0668566226959229, "learning_rate": 2.747936922885554e-06, "loss": 0.7245, "step": 12828 }, { "epoch": 0.6593175043683832, "grad_norm": 1.0837361812591553, "learning_rate": 2.747193900579811e-06, "loss": 0.6987, "step": 12829 }, { "epoch": 0.6593688971117279, "grad_norm": 1.0189871788024902, "learning_rate": 2.7464509406900297e-06, "loss": 0.6969, "step": 12830 }, { "epoch": 0.6594202898550725, "grad_norm": 1.057712435722351, "learning_rate": 2.7457080432367934e-06, "loss": 0.6866, "step": 12831 }, { "epoch": 0.6594716825984172, "grad_norm": 0.6882720589637756, "learning_rate": 2.744965208240684e-06, "loss": 0.6211, "step": 12832 }, { "epoch": 0.6595230753417617, "grad_norm": 0.7165482640266418, "learning_rate": 2.744222435722281e-06, "loss": 0.6807, "step": 12833 }, { "epoch": 0.6595744680851063, "grad_norm": 0.7354871034622192, "learning_rate": 2.743479725702167e-06, "loss": 0.6405, "step": 12834 }, { "epoch": 0.659625860828451, "grad_norm": 1.1313343048095703, "learning_rate": 2.7427370782009184e-06, "loss": 0.7235, "step": 12835 }, { "epoch": 0.6596772535717956, "grad_norm": 1.085094928741455, "learning_rate": 2.7419944932391072e-06, "loss": 0.7108, "step": 12836 }, { "epoch": 0.6597286463151403, "grad_norm": 1.1693204641342163, "learning_rate": 2.741251970837313e-06, "loss": 0.6974, "step": 12837 }, { "epoch": 0.6597800390584849, "grad_norm": 1.0934221744537354, "learning_rate": 2.7405095110161052e-06, "loss": 0.7361, "step": 12838 }, { "epoch": 0.6598314318018296, "grad_norm": 1.0419096946716309, "learning_rate": 2.7397671137960547e-06, "loss": 0.7079, "step": 12839 }, { "epoch": 0.6598828245451742, "grad_norm": 1.1381500959396362, "learning_rate": 2.7390247791977277e-06, "loss": 0.7339, "step": 12840 }, { "epoch": 0.6599342172885189, "grad_norm": 1.132917881011963, "learning_rate": 2.738282507241698e-06, "loss": 0.7557, "step": 12841 }, { "epoch": 0.6599856100318635, "grad_norm": 1.0529555082321167, "learning_rate": 2.7375402979485223e-06, "loss": 0.7413, "step": 12842 }, { "epoch": 0.6600370027752082, "grad_norm": 1.0734312534332275, "learning_rate": 2.73679815133877e-06, "loss": 0.645, "step": 12843 }, { "epoch": 0.6600883955185528, "grad_norm": 1.0571225881576538, "learning_rate": 2.7360560674330015e-06, "loss": 0.6877, "step": 12844 }, { "epoch": 0.6601397882618975, "grad_norm": 1.1606024503707886, "learning_rate": 2.735314046251775e-06, "loss": 0.7497, "step": 12845 }, { "epoch": 0.6601911810052421, "grad_norm": 0.8661072254180908, "learning_rate": 2.7345720878156513e-06, "loss": 0.6594, "step": 12846 }, { "epoch": 0.6602425737485867, "grad_norm": 1.015209674835205, "learning_rate": 2.733830192145187e-06, "loss": 0.6661, "step": 12847 }, { "epoch": 0.6602939664919314, "grad_norm": 0.7820641398429871, "learning_rate": 2.7330883592609356e-06, "loss": 0.6775, "step": 12848 }, { "epoch": 0.6603453592352759, "grad_norm": 1.1234233379364014, "learning_rate": 2.73234658918345e-06, "loss": 0.6843, "step": 12849 }, { "epoch": 0.6603967519786206, "grad_norm": 0.7217654585838318, "learning_rate": 2.731604881933285e-06, "loss": 0.6066, "step": 12850 }, { "epoch": 0.6604481447219652, "grad_norm": 1.137237548828125, "learning_rate": 2.7308632375309845e-06, "loss": 0.7596, "step": 12851 }, { "epoch": 0.6604995374653099, "grad_norm": 1.1154121160507202, "learning_rate": 2.730121655997101e-06, "loss": 0.7482, "step": 12852 }, { "epoch": 0.6605509302086545, "grad_norm": 1.032662034034729, "learning_rate": 2.7293801373521776e-06, "loss": 0.715, "step": 12853 }, { "epoch": 0.6606023229519992, "grad_norm": 1.0052317380905151, "learning_rate": 2.728638681616764e-06, "loss": 0.6993, "step": 12854 }, { "epoch": 0.6606537156953438, "grad_norm": 0.6921722292900085, "learning_rate": 2.7278972888113954e-06, "loss": 0.6759, "step": 12855 }, { "epoch": 0.6607051084386885, "grad_norm": 1.146413803100586, "learning_rate": 2.7271559589566187e-06, "loss": 0.7389, "step": 12856 }, { "epoch": 0.6607565011820331, "grad_norm": 0.8863875269889832, "learning_rate": 2.7264146920729704e-06, "loss": 0.6774, "step": 12857 }, { "epoch": 0.6608078939253778, "grad_norm": 1.112107515335083, "learning_rate": 2.7256734881809864e-06, "loss": 0.6846, "step": 12858 }, { "epoch": 0.6608592866687224, "grad_norm": 1.153350830078125, "learning_rate": 2.7249323473012067e-06, "loss": 0.7165, "step": 12859 }, { "epoch": 0.660910679412067, "grad_norm": 1.064603328704834, "learning_rate": 2.7241912694541635e-06, "loss": 0.6703, "step": 12860 }, { "epoch": 0.6609620721554117, "grad_norm": 1.0802053213119507, "learning_rate": 2.723450254660388e-06, "loss": 0.7535, "step": 12861 }, { "epoch": 0.6610134648987563, "grad_norm": 1.0695346593856812, "learning_rate": 2.7227093029404093e-06, "loss": 0.7583, "step": 12862 }, { "epoch": 0.661064857642101, "grad_norm": 1.1594820022583008, "learning_rate": 2.7219684143147616e-06, "loss": 0.7265, "step": 12863 }, { "epoch": 0.6611162503854455, "grad_norm": 1.1278560161590576, "learning_rate": 2.7212275888039647e-06, "loss": 0.6458, "step": 12864 }, { "epoch": 0.6611676431287902, "grad_norm": 1.0376242399215698, "learning_rate": 2.720486826428549e-06, "loss": 0.6832, "step": 12865 }, { "epoch": 0.6612190358721348, "grad_norm": 1.1174986362457275, "learning_rate": 2.7197461272090364e-06, "loss": 0.7007, "step": 12866 }, { "epoch": 0.6612704286154795, "grad_norm": 1.0603545904159546, "learning_rate": 2.7190054911659467e-06, "loss": 0.703, "step": 12867 }, { "epoch": 0.6613218213588241, "grad_norm": 1.0593544244766235, "learning_rate": 2.718264918319804e-06, "loss": 0.7296, "step": 12868 }, { "epoch": 0.6613732141021688, "grad_norm": 1.0537619590759277, "learning_rate": 2.7175244086911224e-06, "loss": 0.7316, "step": 12869 }, { "epoch": 0.6614246068455134, "grad_norm": 0.8461965322494507, "learning_rate": 2.716783962300423e-06, "loss": 0.6542, "step": 12870 }, { "epoch": 0.661475999588858, "grad_norm": 1.0707175731658936, "learning_rate": 2.716043579168215e-06, "loss": 0.7165, "step": 12871 }, { "epoch": 0.6615273923322027, "grad_norm": 1.0807058811187744, "learning_rate": 2.715303259315016e-06, "loss": 0.6933, "step": 12872 }, { "epoch": 0.6615787850755473, "grad_norm": 1.0899757146835327, "learning_rate": 2.7145630027613345e-06, "loss": 0.7436, "step": 12873 }, { "epoch": 0.661630177818892, "grad_norm": 1.1173241138458252, "learning_rate": 2.7138228095276814e-06, "loss": 0.6769, "step": 12874 }, { "epoch": 0.6616815705622366, "grad_norm": 0.8813436627388, "learning_rate": 2.713082679634561e-06, "loss": 0.6524, "step": 12875 }, { "epoch": 0.6617329633055813, "grad_norm": 1.043320894241333, "learning_rate": 2.712342613102485e-06, "loss": 0.7338, "step": 12876 }, { "epoch": 0.6617843560489259, "grad_norm": 1.0370912551879883, "learning_rate": 2.7116026099519545e-06, "loss": 0.7598, "step": 12877 }, { "epoch": 0.6618357487922706, "grad_norm": 1.0835531949996948, "learning_rate": 2.7108626702034692e-06, "loss": 0.6604, "step": 12878 }, { "epoch": 0.6618871415356151, "grad_norm": 1.0562360286712646, "learning_rate": 2.7101227938775372e-06, "loss": 0.7276, "step": 12879 }, { "epoch": 0.6619385342789598, "grad_norm": 1.0672177076339722, "learning_rate": 2.7093829809946487e-06, "loss": 0.6926, "step": 12880 }, { "epoch": 0.6619899270223044, "grad_norm": 1.1066380739212036, "learning_rate": 2.7086432315753065e-06, "loss": 0.7237, "step": 12881 }, { "epoch": 0.6620413197656491, "grad_norm": 1.0359033346176147, "learning_rate": 2.707903545640005e-06, "loss": 0.6986, "step": 12882 }, { "epoch": 0.6620927125089937, "grad_norm": 1.0642324686050415, "learning_rate": 2.7071639232092372e-06, "loss": 0.7203, "step": 12883 }, { "epoch": 0.6621441052523384, "grad_norm": 1.1208524703979492, "learning_rate": 2.706424364303494e-06, "loss": 0.7454, "step": 12884 }, { "epoch": 0.662195497995683, "grad_norm": 1.046903133392334, "learning_rate": 2.705684868943267e-06, "loss": 0.7152, "step": 12885 }, { "epoch": 0.6622468907390276, "grad_norm": 1.1236436367034912, "learning_rate": 2.7049454371490453e-06, "loss": 0.7415, "step": 12886 }, { "epoch": 0.6622982834823723, "grad_norm": 1.056667447090149, "learning_rate": 2.704206068941312e-06, "loss": 0.7266, "step": 12887 }, { "epoch": 0.6623496762257169, "grad_norm": 1.0895304679870605, "learning_rate": 2.7034667643405565e-06, "loss": 0.7088, "step": 12888 }, { "epoch": 0.6624010689690616, "grad_norm": 1.0453078746795654, "learning_rate": 2.7027275233672595e-06, "loss": 0.6953, "step": 12889 }, { "epoch": 0.6624524617124062, "grad_norm": 0.7355624437332153, "learning_rate": 2.7019883460419027e-06, "loss": 0.6284, "step": 12890 }, { "epoch": 0.6625038544557509, "grad_norm": 1.0479023456573486, "learning_rate": 2.7012492323849637e-06, "loss": 0.7041, "step": 12891 }, { "epoch": 0.6625552471990955, "grad_norm": 0.8359283208847046, "learning_rate": 2.7005101824169254e-06, "loss": 0.6644, "step": 12892 }, { "epoch": 0.6626066399424402, "grad_norm": 1.0445494651794434, "learning_rate": 2.699771196158257e-06, "loss": 0.749, "step": 12893 }, { "epoch": 0.6626580326857847, "grad_norm": 1.1334178447723389, "learning_rate": 2.699032273629439e-06, "loss": 0.7641, "step": 12894 }, { "epoch": 0.6627094254291294, "grad_norm": 1.087429404258728, "learning_rate": 2.69829341485094e-06, "loss": 0.6796, "step": 12895 }, { "epoch": 0.662760818172474, "grad_norm": 1.2374082803726196, "learning_rate": 2.6975546198432307e-06, "loss": 0.6819, "step": 12896 }, { "epoch": 0.6628122109158187, "grad_norm": 1.1020188331604004, "learning_rate": 2.6968158886267826e-06, "loss": 0.714, "step": 12897 }, { "epoch": 0.6628636036591633, "grad_norm": 1.0726667642593384, "learning_rate": 2.696077221222062e-06, "loss": 0.7025, "step": 12898 }, { "epoch": 0.662914996402508, "grad_norm": 1.064894437789917, "learning_rate": 2.6953386176495333e-06, "loss": 0.7028, "step": 12899 }, { "epoch": 0.6629663891458526, "grad_norm": 1.1138255596160889, "learning_rate": 2.6946000779296595e-06, "loss": 0.6948, "step": 12900 }, { "epoch": 0.6630177818891972, "grad_norm": 1.0681095123291016, "learning_rate": 2.6938616020829055e-06, "loss": 0.7504, "step": 12901 }, { "epoch": 0.6630691746325419, "grad_norm": 1.101899266242981, "learning_rate": 2.6931231901297296e-06, "loss": 0.6759, "step": 12902 }, { "epoch": 0.6631205673758865, "grad_norm": 1.0485601425170898, "learning_rate": 2.6923848420905894e-06, "loss": 0.7164, "step": 12903 }, { "epoch": 0.6631719601192312, "grad_norm": 1.1175611019134521, "learning_rate": 2.6916465579859413e-06, "loss": 0.6848, "step": 12904 }, { "epoch": 0.6632233528625758, "grad_norm": 1.0747559070587158, "learning_rate": 2.6909083378362443e-06, "loss": 0.7115, "step": 12905 }, { "epoch": 0.6632747456059205, "grad_norm": 1.0026954412460327, "learning_rate": 2.690170181661945e-06, "loss": 0.7054, "step": 12906 }, { "epoch": 0.6633261383492651, "grad_norm": 1.0330870151519775, "learning_rate": 2.6894320894835e-06, "loss": 0.6807, "step": 12907 }, { "epoch": 0.6633775310926098, "grad_norm": 0.8364273905754089, "learning_rate": 2.688694061321355e-06, "loss": 0.6631, "step": 12908 }, { "epoch": 0.6634289238359543, "grad_norm": 0.7108138799667358, "learning_rate": 2.6879560971959596e-06, "loss": 0.6312, "step": 12909 }, { "epoch": 0.663480316579299, "grad_norm": 0.9977409243583679, "learning_rate": 2.68721819712776e-06, "loss": 0.7048, "step": 12910 }, { "epoch": 0.6635317093226436, "grad_norm": 1.1724025011062622, "learning_rate": 2.6864803611372005e-06, "loss": 0.7372, "step": 12911 }, { "epoch": 0.6635831020659883, "grad_norm": 0.702157735824585, "learning_rate": 2.685742589244722e-06, "loss": 0.6002, "step": 12912 }, { "epoch": 0.6636344948093329, "grad_norm": 0.8037451505661011, "learning_rate": 2.685004881470765e-06, "loss": 0.636, "step": 12913 }, { "epoch": 0.6636858875526775, "grad_norm": 1.1100704669952393, "learning_rate": 2.6842672378357724e-06, "loss": 0.7237, "step": 12914 }, { "epoch": 0.6637372802960222, "grad_norm": 1.0537354946136475, "learning_rate": 2.683529658360175e-06, "loss": 0.664, "step": 12915 }, { "epoch": 0.6637886730393668, "grad_norm": 1.122029423713684, "learning_rate": 2.6827921430644134e-06, "loss": 0.7384, "step": 12916 }, { "epoch": 0.6638400657827115, "grad_norm": 0.7626706957817078, "learning_rate": 2.682054691968916e-06, "loss": 0.6549, "step": 12917 }, { "epoch": 0.6638914585260561, "grad_norm": 1.077371597290039, "learning_rate": 2.68131730509412e-06, "loss": 0.7114, "step": 12918 }, { "epoch": 0.6639428512694008, "grad_norm": 1.0130869150161743, "learning_rate": 2.680579982460453e-06, "loss": 0.7152, "step": 12919 }, { "epoch": 0.6639942440127454, "grad_norm": 0.9791831374168396, "learning_rate": 2.6798427240883406e-06, "loss": 0.6747, "step": 12920 }, { "epoch": 0.6640456367560901, "grad_norm": 1.0818803310394287, "learning_rate": 2.679105529998216e-06, "loss": 0.7692, "step": 12921 }, { "epoch": 0.6640970294994347, "grad_norm": 1.022172212600708, "learning_rate": 2.6783684002104947e-06, "loss": 0.7294, "step": 12922 }, { "epoch": 0.6641484222427794, "grad_norm": 1.145811915397644, "learning_rate": 2.6776313347456075e-06, "loss": 0.7068, "step": 12923 }, { "epoch": 0.6641998149861239, "grad_norm": 0.75337815284729, "learning_rate": 2.6768943336239713e-06, "loss": 0.6577, "step": 12924 }, { "epoch": 0.6642512077294686, "grad_norm": 1.0113253593444824, "learning_rate": 2.676157396866006e-06, "loss": 0.6871, "step": 12925 }, { "epoch": 0.6643026004728132, "grad_norm": 1.1185624599456787, "learning_rate": 2.6754205244921276e-06, "loss": 0.6686, "step": 12926 }, { "epoch": 0.6643539932161578, "grad_norm": 1.1275732517242432, "learning_rate": 2.674683716522756e-06, "loss": 0.7195, "step": 12927 }, { "epoch": 0.6644053859595025, "grad_norm": 0.7710570096969604, "learning_rate": 2.673946972978302e-06, "loss": 0.6069, "step": 12928 }, { "epoch": 0.6644567787028471, "grad_norm": 1.1093297004699707, "learning_rate": 2.6732102938791758e-06, "loss": 0.7286, "step": 12929 }, { "epoch": 0.6645081714461918, "grad_norm": 1.0068796873092651, "learning_rate": 2.672473679245795e-06, "loss": 0.6898, "step": 12930 }, { "epoch": 0.6645595641895364, "grad_norm": 1.092211127281189, "learning_rate": 2.6717371290985596e-06, "loss": 0.6837, "step": 12931 }, { "epoch": 0.6646109569328811, "grad_norm": 1.2151323556900024, "learning_rate": 2.6710006434578817e-06, "loss": 0.689, "step": 12932 }, { "epoch": 0.6646623496762257, "grad_norm": 0.7107070088386536, "learning_rate": 2.670264222344163e-06, "loss": 0.6365, "step": 12933 }, { "epoch": 0.6647137424195704, "grad_norm": 1.0214279890060425, "learning_rate": 2.6695278657778123e-06, "loss": 0.6662, "step": 12934 }, { "epoch": 0.664765135162915, "grad_norm": 1.0154188871383667, "learning_rate": 2.6687915737792237e-06, "loss": 0.6869, "step": 12935 }, { "epoch": 0.6648165279062597, "grad_norm": 1.0958317518234253, "learning_rate": 2.668055346368802e-06, "loss": 0.739, "step": 12936 }, { "epoch": 0.6648679206496043, "grad_norm": 0.9915972948074341, "learning_rate": 2.6673191835669434e-06, "loss": 0.7194, "step": 12937 }, { "epoch": 0.664919313392949, "grad_norm": 1.2342073917388916, "learning_rate": 2.666583085394041e-06, "loss": 0.7498, "step": 12938 }, { "epoch": 0.6649707061362936, "grad_norm": 0.6850623488426208, "learning_rate": 2.665847051870494e-06, "loss": 0.6408, "step": 12939 }, { "epoch": 0.6650220988796381, "grad_norm": 1.1085309982299805, "learning_rate": 2.665111083016694e-06, "loss": 0.7008, "step": 12940 }, { "epoch": 0.6650734916229828, "grad_norm": 0.9892670512199402, "learning_rate": 2.6643751788530293e-06, "loss": 0.6826, "step": 12941 }, { "epoch": 0.6651248843663274, "grad_norm": 1.1044886112213135, "learning_rate": 2.663639339399887e-06, "loss": 0.7445, "step": 12942 }, { "epoch": 0.6651762771096721, "grad_norm": 1.0368824005126953, "learning_rate": 2.6629035646776625e-06, "loss": 0.6899, "step": 12943 }, { "epoch": 0.6652276698530167, "grad_norm": 1.1664491891860962, "learning_rate": 2.662167854706731e-06, "loss": 0.7509, "step": 12944 }, { "epoch": 0.6652790625963614, "grad_norm": 1.092483639717102, "learning_rate": 2.6614322095074827e-06, "loss": 0.6688, "step": 12945 }, { "epoch": 0.665330455339706, "grad_norm": 1.0791107416152954, "learning_rate": 2.6606966291002956e-06, "loss": 0.7076, "step": 12946 }, { "epoch": 0.6653818480830507, "grad_norm": 1.0799063444137573, "learning_rate": 2.6599611135055527e-06, "loss": 0.7059, "step": 12947 }, { "epoch": 0.6654332408263953, "grad_norm": 1.0485109090805054, "learning_rate": 2.659225662743631e-06, "loss": 0.7363, "step": 12948 }, { "epoch": 0.66548463356974, "grad_norm": 1.0942001342773438, "learning_rate": 2.658490276834905e-06, "loss": 0.7132, "step": 12949 }, { "epoch": 0.6655360263130846, "grad_norm": 1.1476563215255737, "learning_rate": 2.6577549557997515e-06, "loss": 0.7211, "step": 12950 }, { "epoch": 0.6655874190564293, "grad_norm": 0.9851348400115967, "learning_rate": 2.6570196996585407e-06, "loss": 0.6968, "step": 12951 }, { "epoch": 0.6656388117997739, "grad_norm": 1.1379748582839966, "learning_rate": 2.6562845084316467e-06, "loss": 0.6957, "step": 12952 }, { "epoch": 0.6656902045431186, "grad_norm": 0.843419075012207, "learning_rate": 2.6555493821394373e-06, "loss": 0.6851, "step": 12953 }, { "epoch": 0.6657415972864632, "grad_norm": 1.04120934009552, "learning_rate": 2.6548143208022794e-06, "loss": 0.6963, "step": 12954 }, { "epoch": 0.6657929900298077, "grad_norm": 1.0185667276382446, "learning_rate": 2.6540793244405364e-06, "loss": 0.667, "step": 12955 }, { "epoch": 0.6658443827731524, "grad_norm": 1.038561224937439, "learning_rate": 2.6533443930745788e-06, "loss": 0.6686, "step": 12956 }, { "epoch": 0.665895775516497, "grad_norm": 1.105364441871643, "learning_rate": 2.6526095267247598e-06, "loss": 0.7036, "step": 12957 }, { "epoch": 0.6659471682598417, "grad_norm": 0.9806315302848816, "learning_rate": 2.651874725411445e-06, "loss": 0.6941, "step": 12958 }, { "epoch": 0.6659985610031863, "grad_norm": 1.0594521760940552, "learning_rate": 2.6511399891549927e-06, "loss": 0.7132, "step": 12959 }, { "epoch": 0.666049953746531, "grad_norm": 1.1371122598648071, "learning_rate": 2.650405317975755e-06, "loss": 0.7084, "step": 12960 }, { "epoch": 0.6661013464898756, "grad_norm": 1.045655369758606, "learning_rate": 2.6496707118940924e-06, "loss": 0.7327, "step": 12961 }, { "epoch": 0.6661527392332203, "grad_norm": 1.119173288345337, "learning_rate": 2.648936170930353e-06, "loss": 0.7237, "step": 12962 }, { "epoch": 0.6662041319765649, "grad_norm": 0.920009195804596, "learning_rate": 2.648201695104894e-06, "loss": 0.6461, "step": 12963 }, { "epoch": 0.6662555247199096, "grad_norm": 0.7543112635612488, "learning_rate": 2.6474672844380566e-06, "loss": 0.6639, "step": 12964 }, { "epoch": 0.6663069174632542, "grad_norm": 1.097749948501587, "learning_rate": 2.6467329389501946e-06, "loss": 0.7405, "step": 12965 }, { "epoch": 0.6663583102065989, "grad_norm": 1.0818537473678589, "learning_rate": 2.6459986586616515e-06, "loss": 0.7568, "step": 12966 }, { "epoch": 0.6664097029499435, "grad_norm": 1.1216773986816406, "learning_rate": 2.6452644435927712e-06, "loss": 0.7066, "step": 12967 }, { "epoch": 0.6664610956932882, "grad_norm": 1.1157472133636475, "learning_rate": 2.644530293763893e-06, "loss": 0.7251, "step": 12968 }, { "epoch": 0.6665124884366328, "grad_norm": 1.1142723560333252, "learning_rate": 2.6437962091953628e-06, "loss": 0.7961, "step": 12969 }, { "epoch": 0.6665638811799773, "grad_norm": 0.7631222009658813, "learning_rate": 2.643062189907516e-06, "loss": 0.6527, "step": 12970 }, { "epoch": 0.666615273923322, "grad_norm": 1.0619889497756958, "learning_rate": 2.6423282359206877e-06, "loss": 0.689, "step": 12971 }, { "epoch": 0.6666666666666666, "grad_norm": 1.1143907308578491, "learning_rate": 2.6415943472552176e-06, "loss": 0.7299, "step": 12972 }, { "epoch": 0.6667180594100113, "grad_norm": 0.7048237919807434, "learning_rate": 2.640860523931432e-06, "loss": 0.6148, "step": 12973 }, { "epoch": 0.6667694521533559, "grad_norm": 1.0469070672988892, "learning_rate": 2.6401267659696684e-06, "loss": 0.7182, "step": 12974 }, { "epoch": 0.6668208448967006, "grad_norm": 0.7999299168586731, "learning_rate": 2.6393930733902525e-06, "loss": 0.6549, "step": 12975 }, { "epoch": 0.6668722376400452, "grad_norm": 1.1045491695404053, "learning_rate": 2.6386594462135128e-06, "loss": 0.702, "step": 12976 }, { "epoch": 0.6669236303833899, "grad_norm": 1.201661467552185, "learning_rate": 2.6379258844597738e-06, "loss": 0.7121, "step": 12977 }, { "epoch": 0.6669750231267345, "grad_norm": 1.1330125331878662, "learning_rate": 2.6371923881493627e-06, "loss": 0.6542, "step": 12978 }, { "epoch": 0.6670264158700792, "grad_norm": 1.0941286087036133, "learning_rate": 2.6364589573026e-06, "loss": 0.6682, "step": 12979 }, { "epoch": 0.6670778086134238, "grad_norm": 1.0680410861968994, "learning_rate": 2.6357255919398027e-06, "loss": 0.6904, "step": 12980 }, { "epoch": 0.6671292013567685, "grad_norm": 1.1758029460906982, "learning_rate": 2.634992292081296e-06, "loss": 0.7069, "step": 12981 }, { "epoch": 0.6671805941001131, "grad_norm": 1.1242934465408325, "learning_rate": 2.6342590577473926e-06, "loss": 0.7487, "step": 12982 }, { "epoch": 0.6672319868434577, "grad_norm": 1.089093804359436, "learning_rate": 2.633525888958407e-06, "loss": 0.6794, "step": 12983 }, { "epoch": 0.6672833795868024, "grad_norm": 1.077668309211731, "learning_rate": 2.632792785734652e-06, "loss": 0.7132, "step": 12984 }, { "epoch": 0.6673347723301469, "grad_norm": 1.1061662435531616, "learning_rate": 2.632059748096444e-06, "loss": 0.6843, "step": 12985 }, { "epoch": 0.6673861650734916, "grad_norm": 0.7946038246154785, "learning_rate": 2.6313267760640842e-06, "loss": 0.6564, "step": 12986 }, { "epoch": 0.6674375578168362, "grad_norm": 1.0307117700576782, "learning_rate": 2.6305938696578866e-06, "loss": 0.6981, "step": 12987 }, { "epoch": 0.6674889505601809, "grad_norm": 1.066892147064209, "learning_rate": 2.6298610288981553e-06, "loss": 0.7817, "step": 12988 }, { "epoch": 0.6675403433035255, "grad_norm": 1.0612431764602661, "learning_rate": 2.629128253805191e-06, "loss": 0.7093, "step": 12989 }, { "epoch": 0.6675917360468702, "grad_norm": 1.1109445095062256, "learning_rate": 2.6283955443993015e-06, "loss": 0.7142, "step": 12990 }, { "epoch": 0.6676431287902148, "grad_norm": 1.0170804262161255, "learning_rate": 2.6276629007007838e-06, "loss": 0.6319, "step": 12991 }, { "epoch": 0.6676945215335595, "grad_norm": 1.1019597053527832, "learning_rate": 2.626930322729938e-06, "loss": 0.6879, "step": 12992 }, { "epoch": 0.6677459142769041, "grad_norm": 1.0517843961715698, "learning_rate": 2.6261978105070575e-06, "loss": 0.6755, "step": 12993 }, { "epoch": 0.6677973070202488, "grad_norm": 1.1824597120285034, "learning_rate": 2.625465364052441e-06, "loss": 0.7672, "step": 12994 }, { "epoch": 0.6678486997635934, "grad_norm": 1.098812460899353, "learning_rate": 2.6247329833863804e-06, "loss": 0.7355, "step": 12995 }, { "epoch": 0.667900092506938, "grad_norm": 0.7628695964813232, "learning_rate": 2.624000668529167e-06, "loss": 0.6582, "step": 12996 }, { "epoch": 0.6679514852502827, "grad_norm": 0.7828240394592285, "learning_rate": 2.6232684195010876e-06, "loss": 0.6547, "step": 12997 }, { "epoch": 0.6680028779936273, "grad_norm": 1.0493202209472656, "learning_rate": 2.6225362363224346e-06, "loss": 0.7351, "step": 12998 }, { "epoch": 0.668054270736972, "grad_norm": 1.0406752824783325, "learning_rate": 2.621804119013491e-06, "loss": 0.728, "step": 12999 }, { "epoch": 0.6681056634803165, "grad_norm": 0.8195496201515198, "learning_rate": 2.6210720675945407e-06, "loss": 0.6664, "step": 13000 }, { "epoch": 0.6681570562236612, "grad_norm": 1.0710667371749878, "learning_rate": 2.620340082085866e-06, "loss": 0.718, "step": 13001 }, { "epoch": 0.6682084489670058, "grad_norm": 1.0106840133666992, "learning_rate": 2.6196081625077463e-06, "loss": 0.6745, "step": 13002 }, { "epoch": 0.6682598417103505, "grad_norm": 1.0486606359481812, "learning_rate": 2.6188763088804624e-06, "loss": 0.7013, "step": 13003 }, { "epoch": 0.6683112344536951, "grad_norm": 1.022236704826355, "learning_rate": 2.6181445212242897e-06, "loss": 0.6632, "step": 13004 }, { "epoch": 0.6683626271970398, "grad_norm": 0.8202311992645264, "learning_rate": 2.6174127995595035e-06, "loss": 0.6832, "step": 13005 }, { "epoch": 0.6684140199403844, "grad_norm": 1.1213316917419434, "learning_rate": 2.6166811439063733e-06, "loss": 0.7157, "step": 13006 }, { "epoch": 0.668465412683729, "grad_norm": 1.0338537693023682, "learning_rate": 2.615949554285178e-06, "loss": 0.7103, "step": 13007 }, { "epoch": 0.6685168054270737, "grad_norm": 1.1118669509887695, "learning_rate": 2.6152180307161777e-06, "loss": 0.7517, "step": 13008 }, { "epoch": 0.6685681981704183, "grad_norm": 1.1068660020828247, "learning_rate": 2.6144865732196467e-06, "loss": 0.72, "step": 13009 }, { "epoch": 0.668619590913763, "grad_norm": 1.0240241289138794, "learning_rate": 2.6137551818158467e-06, "loss": 0.6949, "step": 13010 }, { "epoch": 0.6686709836571076, "grad_norm": 1.0715107917785645, "learning_rate": 2.6130238565250443e-06, "loss": 0.659, "step": 13011 }, { "epoch": 0.6687223764004523, "grad_norm": 1.0885446071624756, "learning_rate": 2.6122925973675006e-06, "loss": 0.7081, "step": 13012 }, { "epoch": 0.6687737691437969, "grad_norm": 1.142139196395874, "learning_rate": 2.611561404363474e-06, "loss": 0.7213, "step": 13013 }, { "epoch": 0.6688251618871416, "grad_norm": 1.0383515357971191, "learning_rate": 2.6108302775332285e-06, "loss": 0.6533, "step": 13014 }, { "epoch": 0.6688765546304862, "grad_norm": 1.1004455089569092, "learning_rate": 2.610099216897012e-06, "loss": 0.7198, "step": 13015 }, { "epoch": 0.6689279473738308, "grad_norm": 1.1366934776306152, "learning_rate": 2.6093682224750856e-06, "loss": 0.7104, "step": 13016 }, { "epoch": 0.6689793401171754, "grad_norm": 1.07968270778656, "learning_rate": 2.6086372942877e-06, "loss": 0.7275, "step": 13017 }, { "epoch": 0.6690307328605201, "grad_norm": 0.7909573316574097, "learning_rate": 2.6079064323551072e-06, "loss": 0.6762, "step": 13018 }, { "epoch": 0.6690821256038647, "grad_norm": 1.092541217803955, "learning_rate": 2.607175636697553e-06, "loss": 0.7093, "step": 13019 }, { "epoch": 0.6691335183472094, "grad_norm": 1.0228408575057983, "learning_rate": 2.606444907335289e-06, "loss": 0.6737, "step": 13020 }, { "epoch": 0.669184911090554, "grad_norm": 1.0342910289764404, "learning_rate": 2.60571424428856e-06, "loss": 0.6593, "step": 13021 }, { "epoch": 0.6692363038338986, "grad_norm": 1.0905957221984863, "learning_rate": 2.604983647577606e-06, "loss": 0.7159, "step": 13022 }, { "epoch": 0.6692876965772433, "grad_norm": 1.0802081823349, "learning_rate": 2.6042531172226755e-06, "loss": 0.7369, "step": 13023 }, { "epoch": 0.6693390893205879, "grad_norm": 1.055462121963501, "learning_rate": 2.6035226532440006e-06, "loss": 0.7044, "step": 13024 }, { "epoch": 0.6693904820639326, "grad_norm": 0.8474434018135071, "learning_rate": 2.6027922556618252e-06, "loss": 0.7038, "step": 13025 }, { "epoch": 0.6694418748072772, "grad_norm": 1.0953435897827148, "learning_rate": 2.602061924496382e-06, "loss": 0.7057, "step": 13026 }, { "epoch": 0.6694932675506219, "grad_norm": 1.085310697555542, "learning_rate": 2.6013316597679105e-06, "loss": 0.6776, "step": 13027 }, { "epoch": 0.6695446602939665, "grad_norm": 0.6719794273376465, "learning_rate": 2.6006014614966364e-06, "loss": 0.6349, "step": 13028 }, { "epoch": 0.6695960530373112, "grad_norm": 1.0670769214630127, "learning_rate": 2.5998713297027955e-06, "loss": 0.7228, "step": 13029 }, { "epoch": 0.6696474457806558, "grad_norm": 1.145293951034546, "learning_rate": 2.599141264406616e-06, "loss": 0.755, "step": 13030 }, { "epoch": 0.6696988385240004, "grad_norm": 0.8037762641906738, "learning_rate": 2.5984112656283223e-06, "loss": 0.6194, "step": 13031 }, { "epoch": 0.669750231267345, "grad_norm": 1.149341106414795, "learning_rate": 2.5976813333881436e-06, "loss": 0.7279, "step": 13032 }, { "epoch": 0.6698016240106897, "grad_norm": 1.0252666473388672, "learning_rate": 2.596951467706301e-06, "loss": 0.6754, "step": 13033 }, { "epoch": 0.6698530167540343, "grad_norm": 1.0942459106445312, "learning_rate": 2.5962216686030172e-06, "loss": 0.6908, "step": 13034 }, { "epoch": 0.669904409497379, "grad_norm": 1.129592776298523, "learning_rate": 2.5954919360985086e-06, "loss": 0.7408, "step": 13035 }, { "epoch": 0.6699558022407236, "grad_norm": 1.0698856115341187, "learning_rate": 2.5947622702129994e-06, "loss": 0.7293, "step": 13036 }, { "epoch": 0.6700071949840682, "grad_norm": 1.0777108669281006, "learning_rate": 2.5940326709666986e-06, "loss": 0.6972, "step": 13037 }, { "epoch": 0.6700585877274129, "grad_norm": 1.1078829765319824, "learning_rate": 2.5933031383798248e-06, "loss": 0.7244, "step": 13038 }, { "epoch": 0.6701099804707575, "grad_norm": 1.1121273040771484, "learning_rate": 2.59257367247259e-06, "loss": 0.6602, "step": 13039 }, { "epoch": 0.6701613732141022, "grad_norm": 1.0727003812789917, "learning_rate": 2.5918442732652017e-06, "loss": 0.7541, "step": 13040 }, { "epoch": 0.6702127659574468, "grad_norm": 1.0119311809539795, "learning_rate": 2.5911149407778723e-06, "loss": 0.7204, "step": 13041 }, { "epoch": 0.6702641587007915, "grad_norm": 1.0637989044189453, "learning_rate": 2.590385675030805e-06, "loss": 0.7006, "step": 13042 }, { "epoch": 0.6703155514441361, "grad_norm": 1.087314248085022, "learning_rate": 2.589656476044211e-06, "loss": 0.7378, "step": 13043 }, { "epoch": 0.6703669441874808, "grad_norm": 1.0874582529067993, "learning_rate": 2.588927343838285e-06, "loss": 0.7493, "step": 13044 }, { "epoch": 0.6704183369308254, "grad_norm": 0.7219361066818237, "learning_rate": 2.5881982784332344e-06, "loss": 0.6477, "step": 13045 }, { "epoch": 0.67046972967417, "grad_norm": 1.0843855142593384, "learning_rate": 2.5874692798492567e-06, "loss": 0.6362, "step": 13046 }, { "epoch": 0.6705211224175146, "grad_norm": 1.0886350870132446, "learning_rate": 2.586740348106549e-06, "loss": 0.6998, "step": 13047 }, { "epoch": 0.6705725151608593, "grad_norm": 1.1643319129943848, "learning_rate": 2.586011483225306e-06, "loss": 0.7174, "step": 13048 }, { "epoch": 0.6706239079042039, "grad_norm": 0.7661274671554565, "learning_rate": 2.585282685225724e-06, "loss": 0.65, "step": 13049 }, { "epoch": 0.6706753006475485, "grad_norm": 1.0566658973693848, "learning_rate": 2.5845539541279946e-06, "loss": 0.6845, "step": 13050 }, { "epoch": 0.6707266933908932, "grad_norm": 1.0441278219223022, "learning_rate": 2.583825289952307e-06, "loss": 0.6807, "step": 13051 }, { "epoch": 0.6707780861342378, "grad_norm": 1.0378021001815796, "learning_rate": 2.5830966927188495e-06, "loss": 0.7092, "step": 13052 }, { "epoch": 0.6708294788775825, "grad_norm": 1.061447024345398, "learning_rate": 2.582368162447807e-06, "loss": 0.7138, "step": 13053 }, { "epoch": 0.6708808716209271, "grad_norm": 1.0806057453155518, "learning_rate": 2.5816396991593672e-06, "loss": 0.7193, "step": 13054 }, { "epoch": 0.6709322643642718, "grad_norm": 1.2929465770721436, "learning_rate": 2.5809113028737097e-06, "loss": 0.7552, "step": 13055 }, { "epoch": 0.6709836571076164, "grad_norm": 1.0460340976715088, "learning_rate": 2.5801829736110206e-06, "loss": 0.7011, "step": 13056 }, { "epoch": 0.6710350498509611, "grad_norm": 1.1173027753829956, "learning_rate": 2.579454711391472e-06, "loss": 0.6868, "step": 13057 }, { "epoch": 0.6710864425943057, "grad_norm": 1.0581424236297607, "learning_rate": 2.578726516235246e-06, "loss": 0.6749, "step": 13058 }, { "epoch": 0.6711378353376504, "grad_norm": 0.7346340417861938, "learning_rate": 2.5779983881625158e-06, "loss": 0.6523, "step": 13059 }, { "epoch": 0.671189228080995, "grad_norm": 0.7397363185882568, "learning_rate": 2.577270327193456e-06, "loss": 0.6498, "step": 13060 }, { "epoch": 0.6712406208243396, "grad_norm": 1.037415862083435, "learning_rate": 2.5765423333482355e-06, "loss": 0.7242, "step": 13061 }, { "epoch": 0.6712920135676842, "grad_norm": 1.1445791721343994, "learning_rate": 2.575814406647027e-06, "loss": 0.7272, "step": 13062 }, { "epoch": 0.6713434063110288, "grad_norm": 1.1003161668777466, "learning_rate": 2.575086547109997e-06, "loss": 0.7337, "step": 13063 }, { "epoch": 0.6713947990543735, "grad_norm": 1.170903205871582, "learning_rate": 2.5743587547573114e-06, "loss": 0.6773, "step": 13064 }, { "epoch": 0.6714461917977181, "grad_norm": 1.1652607917785645, "learning_rate": 2.573631029609137e-06, "loss": 0.6811, "step": 13065 }, { "epoch": 0.6714975845410628, "grad_norm": 1.06890869140625, "learning_rate": 2.5729033716856313e-06, "loss": 0.6962, "step": 13066 }, { "epoch": 0.6715489772844074, "grad_norm": 1.0035547018051147, "learning_rate": 2.572175781006958e-06, "loss": 0.6714, "step": 13067 }, { "epoch": 0.6716003700277521, "grad_norm": 1.0934373140335083, "learning_rate": 2.5714482575932755e-06, "loss": 0.6975, "step": 13068 }, { "epoch": 0.6716517627710967, "grad_norm": 1.0862419605255127, "learning_rate": 2.570720801464739e-06, "loss": 0.7528, "step": 13069 }, { "epoch": 0.6717031555144414, "grad_norm": 1.1682851314544678, "learning_rate": 2.5699934126415027e-06, "loss": 0.7053, "step": 13070 }, { "epoch": 0.671754548257786, "grad_norm": 1.2616493701934814, "learning_rate": 2.569266091143723e-06, "loss": 0.704, "step": 13071 }, { "epoch": 0.6718059410011307, "grad_norm": 1.0981091260910034, "learning_rate": 2.5685388369915477e-06, "loss": 0.7537, "step": 13072 }, { "epoch": 0.6718573337444753, "grad_norm": 0.7039056420326233, "learning_rate": 2.5678116502051263e-06, "loss": 0.6654, "step": 13073 }, { "epoch": 0.67190872648782, "grad_norm": 1.1649174690246582, "learning_rate": 2.567084530804609e-06, "loss": 0.7219, "step": 13074 }, { "epoch": 0.6719601192311646, "grad_norm": 0.7009938359260559, "learning_rate": 2.5663574788101385e-06, "loss": 0.6424, "step": 13075 }, { "epoch": 0.6720115119745091, "grad_norm": 0.9895606637001038, "learning_rate": 2.5656304942418596e-06, "loss": 0.6761, "step": 13076 }, { "epoch": 0.6720629047178538, "grad_norm": 1.0448269844055176, "learning_rate": 2.5649035771199114e-06, "loss": 0.6861, "step": 13077 }, { "epoch": 0.6721142974611984, "grad_norm": 1.188977599143982, "learning_rate": 2.5641767274644408e-06, "loss": 0.75, "step": 13078 }, { "epoch": 0.6721656902045431, "grad_norm": 1.0759861469268799, "learning_rate": 2.5634499452955765e-06, "loss": 0.7192, "step": 13079 }, { "epoch": 0.6722170829478877, "grad_norm": 0.6729865670204163, "learning_rate": 2.5627232306334615e-06, "loss": 0.6795, "step": 13080 }, { "epoch": 0.6722684756912324, "grad_norm": 1.1020084619522095, "learning_rate": 2.5619965834982275e-06, "loss": 0.6903, "step": 13081 }, { "epoch": 0.672319868434577, "grad_norm": 1.0308765172958374, "learning_rate": 2.5612700039100053e-06, "loss": 0.7, "step": 13082 }, { "epoch": 0.6723712611779217, "grad_norm": 1.1218749284744263, "learning_rate": 2.5605434918889287e-06, "loss": 0.6872, "step": 13083 }, { "epoch": 0.6724226539212663, "grad_norm": 1.1067495346069336, "learning_rate": 2.5598170474551254e-06, "loss": 0.6765, "step": 13084 }, { "epoch": 0.672474046664611, "grad_norm": 1.04864501953125, "learning_rate": 2.5590906706287217e-06, "loss": 0.6928, "step": 13085 }, { "epoch": 0.6725254394079556, "grad_norm": 1.1000691652297974, "learning_rate": 2.55836436142984e-06, "loss": 0.6992, "step": 13086 }, { "epoch": 0.6725768321513003, "grad_norm": 1.046045184135437, "learning_rate": 2.5576381198786084e-06, "loss": 0.75, "step": 13087 }, { "epoch": 0.6726282248946449, "grad_norm": 1.0704320669174194, "learning_rate": 2.556911945995145e-06, "loss": 0.6569, "step": 13088 }, { "epoch": 0.6726796176379896, "grad_norm": 1.063007116317749, "learning_rate": 2.5561858397995696e-06, "loss": 0.7299, "step": 13089 }, { "epoch": 0.6727310103813342, "grad_norm": 1.0866018533706665, "learning_rate": 2.555459801311998e-06, "loss": 0.7866, "step": 13090 }, { "epoch": 0.6727824031246787, "grad_norm": 1.0894092321395874, "learning_rate": 2.5547338305525492e-06, "loss": 0.7167, "step": 13091 }, { "epoch": 0.6728337958680234, "grad_norm": 0.7166454195976257, "learning_rate": 2.554007927541334e-06, "loss": 0.6988, "step": 13092 }, { "epoch": 0.672885188611368, "grad_norm": 1.141790747642517, "learning_rate": 2.5532820922984648e-06, "loss": 0.7166, "step": 13093 }, { "epoch": 0.6729365813547127, "grad_norm": 1.0133863687515259, "learning_rate": 2.5525563248440542e-06, "loss": 0.6717, "step": 13094 }, { "epoch": 0.6729879740980573, "grad_norm": 1.2195563316345215, "learning_rate": 2.551830625198205e-06, "loss": 0.782, "step": 13095 }, { "epoch": 0.673039366841402, "grad_norm": 1.040109634399414, "learning_rate": 2.5511049933810274e-06, "loss": 0.6378, "step": 13096 }, { "epoch": 0.6730907595847466, "grad_norm": 1.1011369228363037, "learning_rate": 2.550379429412625e-06, "loss": 0.7248, "step": 13097 }, { "epoch": 0.6731421523280913, "grad_norm": 1.0901676416397095, "learning_rate": 2.549653933313099e-06, "loss": 0.6657, "step": 13098 }, { "epoch": 0.6731935450714359, "grad_norm": 1.1082632541656494, "learning_rate": 2.5489285051025492e-06, "loss": 0.6533, "step": 13099 }, { "epoch": 0.6732449378147806, "grad_norm": 1.2068206071853638, "learning_rate": 2.5482031448010767e-06, "loss": 0.7539, "step": 13100 }, { "epoch": 0.6732963305581252, "grad_norm": 1.1151360273361206, "learning_rate": 2.547477852428778e-06, "loss": 0.7532, "step": 13101 }, { "epoch": 0.6733477233014699, "grad_norm": 1.0475815534591675, "learning_rate": 2.5467526280057463e-06, "loss": 0.7231, "step": 13102 }, { "epoch": 0.6733991160448145, "grad_norm": 1.0282297134399414, "learning_rate": 2.5460274715520737e-06, "loss": 0.6675, "step": 13103 }, { "epoch": 0.6734505087881592, "grad_norm": 1.0747696161270142, "learning_rate": 2.5453023830878554e-06, "loss": 0.6627, "step": 13104 }, { "epoch": 0.6735019015315038, "grad_norm": 1.0827078819274902, "learning_rate": 2.544577362633177e-06, "loss": 0.7425, "step": 13105 }, { "epoch": 0.6735532942748484, "grad_norm": 1.057867169380188, "learning_rate": 2.5438524102081256e-06, "loss": 0.6886, "step": 13106 }, { "epoch": 0.673604687018193, "grad_norm": 1.0974222421646118, "learning_rate": 2.5431275258327927e-06, "loss": 0.6921, "step": 13107 }, { "epoch": 0.6736560797615376, "grad_norm": 0.7434971332550049, "learning_rate": 2.5424027095272517e-06, "loss": 0.6501, "step": 13108 }, { "epoch": 0.6737074725048823, "grad_norm": 0.9824162721633911, "learning_rate": 2.5416779613115927e-06, "loss": 0.6371, "step": 13109 }, { "epoch": 0.6737588652482269, "grad_norm": 1.0667482614517212, "learning_rate": 2.5409532812058923e-06, "loss": 0.7152, "step": 13110 }, { "epoch": 0.6738102579915716, "grad_norm": 1.1122701168060303, "learning_rate": 2.540228669230228e-06, "loss": 0.7292, "step": 13111 }, { "epoch": 0.6738616507349162, "grad_norm": 1.0843881368637085, "learning_rate": 2.5395041254046747e-06, "loss": 0.7345, "step": 13112 }, { "epoch": 0.6739130434782609, "grad_norm": 1.272737741470337, "learning_rate": 2.538779649749311e-06, "loss": 0.7153, "step": 13113 }, { "epoch": 0.6739644362216055, "grad_norm": 1.0220656394958496, "learning_rate": 2.5380552422842053e-06, "loss": 0.6688, "step": 13114 }, { "epoch": 0.6740158289649502, "grad_norm": 1.0114638805389404, "learning_rate": 2.537330903029428e-06, "loss": 0.6889, "step": 13115 }, { "epoch": 0.6740672217082948, "grad_norm": 1.102752447128296, "learning_rate": 2.536606632005052e-06, "loss": 0.6953, "step": 13116 }, { "epoch": 0.6741186144516395, "grad_norm": 1.0608450174331665, "learning_rate": 2.535882429231136e-06, "loss": 0.6504, "step": 13117 }, { "epoch": 0.6741700071949841, "grad_norm": 1.0559003353118896, "learning_rate": 2.535158294727752e-06, "loss": 0.6457, "step": 13118 }, { "epoch": 0.6742213999383287, "grad_norm": 1.0380640029907227, "learning_rate": 2.534434228514957e-06, "loss": 0.7546, "step": 13119 }, { "epoch": 0.6742727926816734, "grad_norm": 1.0353333950042725, "learning_rate": 2.533710230612819e-06, "loss": 0.7126, "step": 13120 }, { "epoch": 0.674324185425018, "grad_norm": 1.0319677591323853, "learning_rate": 2.532986301041389e-06, "loss": 0.7293, "step": 13121 }, { "epoch": 0.6743755781683626, "grad_norm": 1.1223087310791016, "learning_rate": 2.5322624398207297e-06, "loss": 0.7445, "step": 13122 }, { "epoch": 0.6744269709117072, "grad_norm": 1.0661731958389282, "learning_rate": 2.531538646970894e-06, "loss": 0.7617, "step": 13123 }, { "epoch": 0.6744783636550519, "grad_norm": 1.0735751390457153, "learning_rate": 2.5308149225119337e-06, "loss": 0.6671, "step": 13124 }, { "epoch": 0.6745297563983965, "grad_norm": 1.0273689031600952, "learning_rate": 2.5300912664639047e-06, "loss": 0.6753, "step": 13125 }, { "epoch": 0.6745811491417412, "grad_norm": 0.8318812251091003, "learning_rate": 2.529367678846854e-06, "loss": 0.6313, "step": 13126 }, { "epoch": 0.6746325418850858, "grad_norm": 0.7973877787590027, "learning_rate": 2.5286441596808286e-06, "loss": 0.6141, "step": 13127 }, { "epoch": 0.6746839346284305, "grad_norm": 0.7554808259010315, "learning_rate": 2.5279207089858736e-06, "loss": 0.6792, "step": 13128 }, { "epoch": 0.6747353273717751, "grad_norm": 1.1440098285675049, "learning_rate": 2.5271973267820384e-06, "loss": 0.7345, "step": 13129 }, { "epoch": 0.6747867201151198, "grad_norm": 1.0106546878814697, "learning_rate": 2.526474013089355e-06, "loss": 0.697, "step": 13130 }, { "epoch": 0.6748381128584644, "grad_norm": 1.041168451309204, "learning_rate": 2.5257507679278723e-06, "loss": 0.6767, "step": 13131 }, { "epoch": 0.674889505601809, "grad_norm": 1.0405133962631226, "learning_rate": 2.525027591317625e-06, "loss": 0.6881, "step": 13132 }, { "epoch": 0.6749408983451537, "grad_norm": 1.0461935997009277, "learning_rate": 2.524304483278648e-06, "loss": 0.6866, "step": 13133 }, { "epoch": 0.6749922910884983, "grad_norm": 0.6814484596252441, "learning_rate": 2.523581443830978e-06, "loss": 0.6561, "step": 13134 }, { "epoch": 0.675043683831843, "grad_norm": 1.0452812910079956, "learning_rate": 2.5228584729946455e-06, "loss": 0.6878, "step": 13135 }, { "epoch": 0.6750950765751876, "grad_norm": 1.7889001369476318, "learning_rate": 2.5221355707896855e-06, "loss": 0.728, "step": 13136 }, { "epoch": 0.6751464693185322, "grad_norm": 1.1480262279510498, "learning_rate": 2.52141273723612e-06, "loss": 0.744, "step": 13137 }, { "epoch": 0.6751978620618768, "grad_norm": 1.0950225591659546, "learning_rate": 2.520689972353981e-06, "loss": 0.778, "step": 13138 }, { "epoch": 0.6752492548052215, "grad_norm": 1.0076096057891846, "learning_rate": 2.519967276163291e-06, "loss": 0.6658, "step": 13139 }, { "epoch": 0.6753006475485661, "grad_norm": 1.102768898010254, "learning_rate": 2.5192446486840733e-06, "loss": 0.7342, "step": 13140 }, { "epoch": 0.6753520402919108, "grad_norm": 1.06729257106781, "learning_rate": 2.518522089936347e-06, "loss": 0.7352, "step": 13141 }, { "epoch": 0.6754034330352554, "grad_norm": 1.1567792892456055, "learning_rate": 2.517799599940135e-06, "loss": 0.6884, "step": 13142 }, { "epoch": 0.6754548257786, "grad_norm": 1.1541435718536377, "learning_rate": 2.5170771787154523e-06, "loss": 0.6953, "step": 13143 }, { "epoch": 0.6755062185219447, "grad_norm": 0.9961956739425659, "learning_rate": 2.5163548262823135e-06, "loss": 0.7342, "step": 13144 }, { "epoch": 0.6755576112652893, "grad_norm": 1.0637978315353394, "learning_rate": 2.5156325426607363e-06, "loss": 0.697, "step": 13145 }, { "epoch": 0.675609004008634, "grad_norm": 0.7624397277832031, "learning_rate": 2.5149103278707254e-06, "loss": 0.6472, "step": 13146 }, { "epoch": 0.6756603967519786, "grad_norm": 1.0833979845046997, "learning_rate": 2.5141881819322955e-06, "loss": 0.6561, "step": 13147 }, { "epoch": 0.6757117894953233, "grad_norm": 1.073819875717163, "learning_rate": 2.5134661048654534e-06, "loss": 0.7123, "step": 13148 }, { "epoch": 0.6757631822386679, "grad_norm": 0.8595784306526184, "learning_rate": 2.512744096690204e-06, "loss": 0.6467, "step": 13149 }, { "epoch": 0.6758145749820126, "grad_norm": 0.7041749954223633, "learning_rate": 2.512022157426549e-06, "loss": 0.7005, "step": 13150 }, { "epoch": 0.6758659677253572, "grad_norm": 0.7271613478660583, "learning_rate": 2.5113002870944953e-06, "loss": 0.6349, "step": 13151 }, { "epoch": 0.6759173604687018, "grad_norm": 1.0554580688476562, "learning_rate": 2.51057848571404e-06, "loss": 0.6827, "step": 13152 }, { "epoch": 0.6759687532120464, "grad_norm": 1.0741956233978271, "learning_rate": 2.5098567533051813e-06, "loss": 0.7252, "step": 13153 }, { "epoch": 0.6760201459553911, "grad_norm": 0.8376405239105225, "learning_rate": 2.509135089887914e-06, "loss": 0.6402, "step": 13154 }, { "epoch": 0.6760715386987357, "grad_norm": 1.0455571413040161, "learning_rate": 2.508413495482237e-06, "loss": 0.662, "step": 13155 }, { "epoch": 0.6761229314420804, "grad_norm": 1.0987800359725952, "learning_rate": 2.5076919701081395e-06, "loss": 0.7186, "step": 13156 }, { "epoch": 0.676174324185425, "grad_norm": 0.8191101551055908, "learning_rate": 2.5069705137856095e-06, "loss": 0.6314, "step": 13157 }, { "epoch": 0.6762257169287696, "grad_norm": 1.1435883045196533, "learning_rate": 2.506249126534643e-06, "loss": 0.7153, "step": 13158 }, { "epoch": 0.6762771096721143, "grad_norm": 0.7505073547363281, "learning_rate": 2.505527808375218e-06, "loss": 0.6463, "step": 13159 }, { "epoch": 0.6763285024154589, "grad_norm": 1.098889708518982, "learning_rate": 2.504806559327325e-06, "loss": 0.72, "step": 13160 }, { "epoch": 0.6763798951588036, "grad_norm": 1.1002869606018066, "learning_rate": 2.504085379410946e-06, "loss": 0.7659, "step": 13161 }, { "epoch": 0.6764312879021482, "grad_norm": 0.8311879634857178, "learning_rate": 2.503364268646061e-06, "loss": 0.6268, "step": 13162 }, { "epoch": 0.6764826806454929, "grad_norm": 1.0641425848007202, "learning_rate": 2.5026432270526467e-06, "loss": 0.6958, "step": 13163 }, { "epoch": 0.6765340733888375, "grad_norm": 0.7869943976402283, "learning_rate": 2.501922254650685e-06, "loss": 0.6594, "step": 13164 }, { "epoch": 0.6765854661321822, "grad_norm": 1.012304425239563, "learning_rate": 2.501201351460148e-06, "loss": 0.6975, "step": 13165 }, { "epoch": 0.6766368588755268, "grad_norm": 1.1329584121704102, "learning_rate": 2.500480517501008e-06, "loss": 0.7034, "step": 13166 }, { "epoch": 0.6766882516188714, "grad_norm": 1.1433192491531372, "learning_rate": 2.49975975279324e-06, "loss": 0.7134, "step": 13167 }, { "epoch": 0.676739644362216, "grad_norm": 1.0830217599868774, "learning_rate": 2.499039057356811e-06, "loss": 0.752, "step": 13168 }, { "epoch": 0.6767910371055607, "grad_norm": 1.05928373336792, "learning_rate": 2.498318431211689e-06, "loss": 0.7033, "step": 13169 }, { "epoch": 0.6768424298489053, "grad_norm": 1.1782184839248657, "learning_rate": 2.497597874377838e-06, "loss": 0.7422, "step": 13170 }, { "epoch": 0.67689382259225, "grad_norm": 1.0819706916809082, "learning_rate": 2.4968773868752265e-06, "loss": 0.6857, "step": 13171 }, { "epoch": 0.6769452153355946, "grad_norm": 1.0625133514404297, "learning_rate": 2.4961569687238092e-06, "loss": 0.7047, "step": 13172 }, { "epoch": 0.6769966080789392, "grad_norm": 1.0723130702972412, "learning_rate": 2.4954366199435513e-06, "loss": 0.7172, "step": 13173 }, { "epoch": 0.6770480008222839, "grad_norm": 1.082820177078247, "learning_rate": 2.4947163405544093e-06, "loss": 0.7607, "step": 13174 }, { "epoch": 0.6770993935656285, "grad_norm": 0.7482179403305054, "learning_rate": 2.493996130576337e-06, "loss": 0.6166, "step": 13175 }, { "epoch": 0.6771507863089732, "grad_norm": 1.071678638458252, "learning_rate": 2.4932759900292915e-06, "loss": 0.6903, "step": 13176 }, { "epoch": 0.6772021790523178, "grad_norm": 1.0330917835235596, "learning_rate": 2.4925559189332237e-06, "loss": 0.7554, "step": 13177 }, { "epoch": 0.6772535717956625, "grad_norm": 1.0285574197769165, "learning_rate": 2.4918359173080843e-06, "loss": 0.6661, "step": 13178 }, { "epoch": 0.6773049645390071, "grad_norm": 0.8143740296363831, "learning_rate": 2.491115985173819e-06, "loss": 0.6854, "step": 13179 }, { "epoch": 0.6773563572823518, "grad_norm": 1.0581188201904297, "learning_rate": 2.4903961225503775e-06, "loss": 0.6696, "step": 13180 }, { "epoch": 0.6774077500256964, "grad_norm": 1.0039702653884888, "learning_rate": 2.4896763294577036e-06, "loss": 0.6635, "step": 13181 }, { "epoch": 0.677459142769041, "grad_norm": 0.7126507759094238, "learning_rate": 2.488956605915739e-06, "loss": 0.5948, "step": 13182 }, { "epoch": 0.6775105355123856, "grad_norm": 1.0816986560821533, "learning_rate": 2.4882369519444223e-06, "loss": 0.7411, "step": 13183 }, { "epoch": 0.6775619282557303, "grad_norm": 0.7402935028076172, "learning_rate": 2.4875173675636965e-06, "loss": 0.6537, "step": 13184 }, { "epoch": 0.6776133209990749, "grad_norm": 1.0375361442565918, "learning_rate": 2.4867978527934957e-06, "loss": 0.7393, "step": 13185 }, { "epoch": 0.6776647137424195, "grad_norm": 1.1466938257217407, "learning_rate": 2.486078407653753e-06, "loss": 0.7166, "step": 13186 }, { "epoch": 0.6777161064857642, "grad_norm": 0.7103905081748962, "learning_rate": 2.4853590321644074e-06, "loss": 0.6382, "step": 13187 }, { "epoch": 0.6777674992291088, "grad_norm": 1.08000910282135, "learning_rate": 2.484639726345382e-06, "loss": 0.7243, "step": 13188 }, { "epoch": 0.6778188919724535, "grad_norm": 1.0228893756866455, "learning_rate": 2.483920490216611e-06, "loss": 0.7619, "step": 13189 }, { "epoch": 0.6778702847157981, "grad_norm": 1.0764765739440918, "learning_rate": 2.4832013237980203e-06, "loss": 0.6977, "step": 13190 }, { "epoch": 0.6779216774591428, "grad_norm": 1.0704166889190674, "learning_rate": 2.4824822271095344e-06, "loss": 0.6862, "step": 13191 }, { "epoch": 0.6779730702024874, "grad_norm": 1.0351858139038086, "learning_rate": 2.4817632001710753e-06, "loss": 0.6875, "step": 13192 }, { "epoch": 0.6780244629458321, "grad_norm": 0.7257068753242493, "learning_rate": 2.481044243002567e-06, "loss": 0.614, "step": 13193 }, { "epoch": 0.6780758556891767, "grad_norm": 1.0680242776870728, "learning_rate": 2.4803253556239282e-06, "loss": 0.7121, "step": 13194 }, { "epoch": 0.6781272484325214, "grad_norm": 1.045596718788147, "learning_rate": 2.479606538055074e-06, "loss": 0.7402, "step": 13195 }, { "epoch": 0.678178641175866, "grad_norm": 1.0770820379257202, "learning_rate": 2.478887790315923e-06, "loss": 0.6673, "step": 13196 }, { "epoch": 0.6782300339192107, "grad_norm": 1.0784111022949219, "learning_rate": 2.4781691124263875e-06, "loss": 0.6795, "step": 13197 }, { "epoch": 0.6782814266625552, "grad_norm": 1.2641180753707886, "learning_rate": 2.4774505044063785e-06, "loss": 0.6842, "step": 13198 }, { "epoch": 0.6783328194058998, "grad_norm": 1.0442169904708862, "learning_rate": 2.476731966275805e-06, "loss": 0.6943, "step": 13199 }, { "epoch": 0.6783842121492445, "grad_norm": 1.047584891319275, "learning_rate": 2.476013498054579e-06, "loss": 0.7136, "step": 13200 }, { "epoch": 0.6784356048925891, "grad_norm": 1.0777981281280518, "learning_rate": 2.475295099762599e-06, "loss": 0.74, "step": 13201 }, { "epoch": 0.6784869976359338, "grad_norm": 1.1164733171463013, "learning_rate": 2.4745767714197757e-06, "loss": 0.7033, "step": 13202 }, { "epoch": 0.6785383903792784, "grad_norm": 1.1021857261657715, "learning_rate": 2.473858513046008e-06, "loss": 0.7242, "step": 13203 }, { "epoch": 0.6785897831226231, "grad_norm": 1.1564886569976807, "learning_rate": 2.473140324661196e-06, "loss": 0.718, "step": 13204 }, { "epoch": 0.6786411758659677, "grad_norm": 0.8104077577590942, "learning_rate": 2.4724222062852355e-06, "loss": 0.6392, "step": 13205 }, { "epoch": 0.6786925686093124, "grad_norm": 0.9967166185379028, "learning_rate": 2.4717041579380273e-06, "loss": 0.7127, "step": 13206 }, { "epoch": 0.678743961352657, "grad_norm": 1.0755122900009155, "learning_rate": 2.4709861796394634e-06, "loss": 0.7394, "step": 13207 }, { "epoch": 0.6787953540960017, "grad_norm": 0.7414858937263489, "learning_rate": 2.470268271409434e-06, "loss": 0.7221, "step": 13208 }, { "epoch": 0.6788467468393463, "grad_norm": 1.034874677658081, "learning_rate": 2.4695504332678345e-06, "loss": 0.6585, "step": 13209 }, { "epoch": 0.678898139582691, "grad_norm": 1.091726303100586, "learning_rate": 2.468832665234546e-06, "loss": 0.7072, "step": 13210 }, { "epoch": 0.6789495323260356, "grad_norm": 0.8990628719329834, "learning_rate": 2.468114967329462e-06, "loss": 0.639, "step": 13211 }, { "epoch": 0.6790009250693803, "grad_norm": 1.001253366470337, "learning_rate": 2.4673973395724605e-06, "loss": 0.7196, "step": 13212 }, { "epoch": 0.6790523178127248, "grad_norm": 1.0703651905059814, "learning_rate": 2.466679781983432e-06, "loss": 0.7042, "step": 13213 }, { "epoch": 0.6791037105560694, "grad_norm": 1.0038541555404663, "learning_rate": 2.4659622945822483e-06, "loss": 0.664, "step": 13214 }, { "epoch": 0.6791551032994141, "grad_norm": 1.06258225440979, "learning_rate": 2.465244877388794e-06, "loss": 0.7247, "step": 13215 }, { "epoch": 0.6792064960427587, "grad_norm": 0.6911278367042542, "learning_rate": 2.464527530422943e-06, "loss": 0.6154, "step": 13216 }, { "epoch": 0.6792578887861034, "grad_norm": 0.7348666191101074, "learning_rate": 2.46381025370457e-06, "loss": 0.6736, "step": 13217 }, { "epoch": 0.679309281529448, "grad_norm": 1.0497105121612549, "learning_rate": 2.46309304725355e-06, "loss": 0.6491, "step": 13218 }, { "epoch": 0.6793606742727927, "grad_norm": 1.0711970329284668, "learning_rate": 2.4623759110897528e-06, "loss": 0.7084, "step": 13219 }, { "epoch": 0.6794120670161373, "grad_norm": 1.1374893188476562, "learning_rate": 2.4616588452330465e-06, "loss": 0.7312, "step": 13220 }, { "epoch": 0.679463459759482, "grad_norm": 1.112452745437622, "learning_rate": 2.460941849703297e-06, "loss": 0.7414, "step": 13221 }, { "epoch": 0.6795148525028266, "grad_norm": 0.8322348594665527, "learning_rate": 2.4602249245203746e-06, "loss": 0.657, "step": 13222 }, { "epoch": 0.6795662452461713, "grad_norm": 1.106318473815918, "learning_rate": 2.4595080697041345e-06, "loss": 0.6896, "step": 13223 }, { "epoch": 0.6796176379895159, "grad_norm": 1.0241153240203857, "learning_rate": 2.4587912852744435e-06, "loss": 0.6776, "step": 13224 }, { "epoch": 0.6796690307328606, "grad_norm": 1.0289332866668701, "learning_rate": 2.4580745712511592e-06, "loss": 0.7418, "step": 13225 }, { "epoch": 0.6797204234762052, "grad_norm": 1.04972243309021, "learning_rate": 2.457357927654137e-06, "loss": 0.6447, "step": 13226 }, { "epoch": 0.6797718162195499, "grad_norm": 0.7166408896446228, "learning_rate": 2.456641354503235e-06, "loss": 0.667, "step": 13227 }, { "epoch": 0.6798232089628944, "grad_norm": 1.132569670677185, "learning_rate": 2.4559248518183042e-06, "loss": 0.6948, "step": 13228 }, { "epoch": 0.679874601706239, "grad_norm": 1.0630922317504883, "learning_rate": 2.4552084196192005e-06, "loss": 0.6853, "step": 13229 }, { "epoch": 0.6799259944495837, "grad_norm": 1.1038269996643066, "learning_rate": 2.4544920579257658e-06, "loss": 0.7022, "step": 13230 }, { "epoch": 0.6799773871929283, "grad_norm": 1.0308912992477417, "learning_rate": 2.453775766757853e-06, "loss": 0.6921, "step": 13231 }, { "epoch": 0.680028779936273, "grad_norm": 1.1923452615737915, "learning_rate": 2.453059546135307e-06, "loss": 0.6986, "step": 13232 }, { "epoch": 0.6800801726796176, "grad_norm": 1.0724221467971802, "learning_rate": 2.4523433960779695e-06, "loss": 0.7207, "step": 13233 }, { "epoch": 0.6801315654229623, "grad_norm": 1.0431641340255737, "learning_rate": 2.4516273166056814e-06, "loss": 0.7492, "step": 13234 }, { "epoch": 0.6801829581663069, "grad_norm": 1.1156055927276611, "learning_rate": 2.4509113077382855e-06, "loss": 0.7093, "step": 13235 }, { "epoch": 0.6802343509096516, "grad_norm": 0.8234076499938965, "learning_rate": 2.4501953694956183e-06, "loss": 0.6548, "step": 13236 }, { "epoch": 0.6802857436529962, "grad_norm": 1.1025968790054321, "learning_rate": 2.449479501897513e-06, "loss": 0.6919, "step": 13237 }, { "epoch": 0.6803371363963409, "grad_norm": 1.0031371116638184, "learning_rate": 2.4487637049638085e-06, "loss": 0.7183, "step": 13238 }, { "epoch": 0.6803885291396855, "grad_norm": 1.0633676052093506, "learning_rate": 2.4480479787143303e-06, "loss": 0.7401, "step": 13239 }, { "epoch": 0.6804399218830302, "grad_norm": 1.0840507745742798, "learning_rate": 2.4473323231689127e-06, "loss": 0.7186, "step": 13240 }, { "epoch": 0.6804913146263748, "grad_norm": 1.0725923776626587, "learning_rate": 2.4466167383473826e-06, "loss": 0.692, "step": 13241 }, { "epoch": 0.6805427073697194, "grad_norm": 0.7000992894172668, "learning_rate": 2.4459012242695656e-06, "loss": 0.6804, "step": 13242 }, { "epoch": 0.680594100113064, "grad_norm": 1.0073797702789307, "learning_rate": 2.4451857809552833e-06, "loss": 0.6798, "step": 13243 }, { "epoch": 0.6806454928564086, "grad_norm": 1.0870059728622437, "learning_rate": 2.444470408424362e-06, "loss": 0.7102, "step": 13244 }, { "epoch": 0.6806968855997533, "grad_norm": 1.053800344467163, "learning_rate": 2.443755106696619e-06, "loss": 0.6913, "step": 13245 }, { "epoch": 0.6807482783430979, "grad_norm": 1.0393702983856201, "learning_rate": 2.4430398757918717e-06, "loss": 0.7253, "step": 13246 }, { "epoch": 0.6807996710864426, "grad_norm": 0.830040693283081, "learning_rate": 2.4423247157299394e-06, "loss": 0.637, "step": 13247 }, { "epoch": 0.6808510638297872, "grad_norm": 0.7397857904434204, "learning_rate": 2.4416096265306344e-06, "loss": 0.667, "step": 13248 }, { "epoch": 0.6809024565731319, "grad_norm": 0.798831582069397, "learning_rate": 2.4408946082137684e-06, "loss": 0.6415, "step": 13249 }, { "epoch": 0.6809538493164765, "grad_norm": 1.1541287899017334, "learning_rate": 2.440179660799151e-06, "loss": 0.7792, "step": 13250 }, { "epoch": 0.6810052420598212, "grad_norm": 1.0994206666946411, "learning_rate": 2.439464784306594e-06, "loss": 0.7289, "step": 13251 }, { "epoch": 0.6810566348031658, "grad_norm": 0.9598152041435242, "learning_rate": 2.4387499787558976e-06, "loss": 0.6187, "step": 13252 }, { "epoch": 0.6811080275465105, "grad_norm": 1.051993727684021, "learning_rate": 2.438035244166872e-06, "loss": 0.6985, "step": 13253 }, { "epoch": 0.6811594202898551, "grad_norm": 0.7552441358566284, "learning_rate": 2.437320580559317e-06, "loss": 0.6809, "step": 13254 }, { "epoch": 0.6812108130331997, "grad_norm": 1.1250953674316406, "learning_rate": 2.4366059879530335e-06, "loss": 0.7288, "step": 13255 }, { "epoch": 0.6812622057765444, "grad_norm": 1.0953460931777954, "learning_rate": 2.4358914663678173e-06, "loss": 0.7486, "step": 13256 }, { "epoch": 0.681313598519889, "grad_norm": 1.3267933130264282, "learning_rate": 2.435177015823469e-06, "loss": 0.7038, "step": 13257 }, { "epoch": 0.6813649912632336, "grad_norm": 1.0587003231048584, "learning_rate": 2.434462636339781e-06, "loss": 0.6923, "step": 13258 }, { "epoch": 0.6814163840065782, "grad_norm": 1.1268763542175293, "learning_rate": 2.433748327936544e-06, "loss": 0.6824, "step": 13259 }, { "epoch": 0.6814677767499229, "grad_norm": 1.0469001531600952, "learning_rate": 2.433034090633553e-06, "loss": 0.697, "step": 13260 }, { "epoch": 0.6815191694932675, "grad_norm": 1.0895551443099976, "learning_rate": 2.4323199244505943e-06, "loss": 0.7072, "step": 13261 }, { "epoch": 0.6815705622366122, "grad_norm": 0.778810441493988, "learning_rate": 2.431605829407454e-06, "loss": 0.6563, "step": 13262 }, { "epoch": 0.6816219549799568, "grad_norm": 1.102554202079773, "learning_rate": 2.430891805523915e-06, "loss": 0.6573, "step": 13263 }, { "epoch": 0.6816733477233015, "grad_norm": 1.0904885530471802, "learning_rate": 2.4301778528197654e-06, "loss": 0.7017, "step": 13264 }, { "epoch": 0.6817247404666461, "grad_norm": 1.1557235717773438, "learning_rate": 2.4294639713147795e-06, "loss": 0.7046, "step": 13265 }, { "epoch": 0.6817761332099908, "grad_norm": 1.0727252960205078, "learning_rate": 2.42875016102874e-06, "loss": 0.6893, "step": 13266 }, { "epoch": 0.6818275259533354, "grad_norm": 1.0447843074798584, "learning_rate": 2.4280364219814234e-06, "loss": 0.7351, "step": 13267 }, { "epoch": 0.68187891869668, "grad_norm": 1.0589386224746704, "learning_rate": 2.4273227541926015e-06, "loss": 0.7276, "step": 13268 }, { "epoch": 0.6819303114400247, "grad_norm": 1.015015959739685, "learning_rate": 2.426609157682051e-06, "loss": 0.7006, "step": 13269 }, { "epoch": 0.6819817041833693, "grad_norm": 0.8710305094718933, "learning_rate": 2.425895632469541e-06, "loss": 0.6122, "step": 13270 }, { "epoch": 0.682033096926714, "grad_norm": 0.677774965763092, "learning_rate": 2.42518217857484e-06, "loss": 0.6479, "step": 13271 }, { "epoch": 0.6820844896700586, "grad_norm": 1.0322576761245728, "learning_rate": 2.424468796017714e-06, "loss": 0.6599, "step": 13272 }, { "epoch": 0.6821358824134033, "grad_norm": 1.0469520092010498, "learning_rate": 2.423755484817933e-06, "loss": 0.6744, "step": 13273 }, { "epoch": 0.6821872751567478, "grad_norm": 0.7185530066490173, "learning_rate": 2.4230422449952517e-06, "loss": 0.6228, "step": 13274 }, { "epoch": 0.6822386679000925, "grad_norm": 1.095075011253357, "learning_rate": 2.4223290765694375e-06, "loss": 0.7384, "step": 13275 }, { "epoch": 0.6822900606434371, "grad_norm": 1.0157147645950317, "learning_rate": 2.4216159795602444e-06, "loss": 0.657, "step": 13276 }, { "epoch": 0.6823414533867818, "grad_norm": 0.8043586015701294, "learning_rate": 2.420902953987435e-06, "loss": 0.6369, "step": 13277 }, { "epoch": 0.6823928461301264, "grad_norm": 0.8233100175857544, "learning_rate": 2.4201899998707613e-06, "loss": 0.6281, "step": 13278 }, { "epoch": 0.682444238873471, "grad_norm": 1.0913152694702148, "learning_rate": 2.4194771172299743e-06, "loss": 0.7014, "step": 13279 }, { "epoch": 0.6824956316168157, "grad_norm": 0.7298514246940613, "learning_rate": 2.418764306084831e-06, "loss": 0.6591, "step": 13280 }, { "epoch": 0.6825470243601603, "grad_norm": 1.0800707340240479, "learning_rate": 2.418051566455073e-06, "loss": 0.7377, "step": 13281 }, { "epoch": 0.682598417103505, "grad_norm": 1.0422908067703247, "learning_rate": 2.4173388983604527e-06, "loss": 0.6468, "step": 13282 }, { "epoch": 0.6826498098468496, "grad_norm": 1.1076951026916504, "learning_rate": 2.416626301820714e-06, "loss": 0.6665, "step": 13283 }, { "epoch": 0.6827012025901943, "grad_norm": 1.0576202869415283, "learning_rate": 2.415913776855599e-06, "loss": 0.6721, "step": 13284 }, { "epoch": 0.6827525953335389, "grad_norm": 0.7094531059265137, "learning_rate": 2.4152013234848476e-06, "loss": 0.6243, "step": 13285 }, { "epoch": 0.6828039880768836, "grad_norm": 1.1675782203674316, "learning_rate": 2.4144889417282026e-06, "loss": 0.7949, "step": 13286 }, { "epoch": 0.6828553808202282, "grad_norm": 1.0918580293655396, "learning_rate": 2.413776631605399e-06, "loss": 0.7481, "step": 13287 }, { "epoch": 0.6829067735635729, "grad_norm": 1.1489192247390747, "learning_rate": 2.413064393136171e-06, "loss": 0.74, "step": 13288 }, { "epoch": 0.6829581663069174, "grad_norm": 1.0683095455169678, "learning_rate": 2.4123522263402545e-06, "loss": 0.7544, "step": 13289 }, { "epoch": 0.6830095590502621, "grad_norm": 1.093979835510254, "learning_rate": 2.4116401312373788e-06, "loss": 0.7256, "step": 13290 }, { "epoch": 0.6830609517936067, "grad_norm": 1.0903630256652832, "learning_rate": 2.410928107847274e-06, "loss": 0.6746, "step": 13291 }, { "epoch": 0.6831123445369514, "grad_norm": 1.079805612564087, "learning_rate": 2.4102161561896655e-06, "loss": 0.7199, "step": 13292 }, { "epoch": 0.683163737280296, "grad_norm": 1.100926160812378, "learning_rate": 2.409504276284283e-06, "loss": 0.7454, "step": 13293 }, { "epoch": 0.6832151300236406, "grad_norm": 1.0561484098434448, "learning_rate": 2.408792468150843e-06, "loss": 0.6259, "step": 13294 }, { "epoch": 0.6832665227669853, "grad_norm": 1.003684639930725, "learning_rate": 2.4080807318090723e-06, "loss": 0.6844, "step": 13295 }, { "epoch": 0.6833179155103299, "grad_norm": 1.0702459812164307, "learning_rate": 2.407369067278688e-06, "loss": 0.6459, "step": 13296 }, { "epoch": 0.6833693082536746, "grad_norm": 1.1424384117126465, "learning_rate": 2.4066574745794064e-06, "loss": 0.7039, "step": 13297 }, { "epoch": 0.6834207009970192, "grad_norm": 1.064834475517273, "learning_rate": 2.4059459537309452e-06, "loss": 0.7194, "step": 13298 }, { "epoch": 0.6834720937403639, "grad_norm": 1.0064125061035156, "learning_rate": 2.4052345047530175e-06, "loss": 0.7249, "step": 13299 }, { "epoch": 0.6835234864837085, "grad_norm": 1.097679853439331, "learning_rate": 2.404523127665333e-06, "loss": 0.6973, "step": 13300 }, { "epoch": 0.6835748792270532, "grad_norm": 1.0807406902313232, "learning_rate": 2.4038118224876e-06, "loss": 0.6983, "step": 13301 }, { "epoch": 0.6836262719703978, "grad_norm": 1.0093823671340942, "learning_rate": 2.4031005892395314e-06, "loss": 0.6845, "step": 13302 }, { "epoch": 0.6836776647137425, "grad_norm": 1.0717673301696777, "learning_rate": 2.4023894279408257e-06, "loss": 0.7054, "step": 13303 }, { "epoch": 0.683729057457087, "grad_norm": 1.0829806327819824, "learning_rate": 2.4016783386111904e-06, "loss": 0.675, "step": 13304 }, { "epoch": 0.6837804502004317, "grad_norm": 1.0419515371322632, "learning_rate": 2.4009673212703233e-06, "loss": 0.6973, "step": 13305 }, { "epoch": 0.6838318429437763, "grad_norm": 1.0893298387527466, "learning_rate": 2.4002563759379305e-06, "loss": 0.73, "step": 13306 }, { "epoch": 0.683883235687121, "grad_norm": 1.0366350412368774, "learning_rate": 2.3995455026337015e-06, "loss": 0.6772, "step": 13307 }, { "epoch": 0.6839346284304656, "grad_norm": 1.0730319023132324, "learning_rate": 2.3988347013773364e-06, "loss": 0.7325, "step": 13308 }, { "epoch": 0.6839860211738102, "grad_norm": 1.1614218950271606, "learning_rate": 2.398123972188528e-06, "loss": 0.7419, "step": 13309 }, { "epoch": 0.6840374139171549, "grad_norm": 0.7046939730644226, "learning_rate": 2.397413315086965e-06, "loss": 0.6414, "step": 13310 }, { "epoch": 0.6840888066604995, "grad_norm": 1.092928409576416, "learning_rate": 2.396702730092341e-06, "loss": 0.7321, "step": 13311 }, { "epoch": 0.6841401994038442, "grad_norm": 1.149901032447815, "learning_rate": 2.39599221722434e-06, "loss": 0.7164, "step": 13312 }, { "epoch": 0.6841915921471888, "grad_norm": 1.1414217948913574, "learning_rate": 2.3952817765026494e-06, "loss": 0.7404, "step": 13313 }, { "epoch": 0.6842429848905335, "grad_norm": 1.098250150680542, "learning_rate": 2.3945714079469494e-06, "loss": 0.7233, "step": 13314 }, { "epoch": 0.6842943776338781, "grad_norm": 0.7288789749145508, "learning_rate": 2.3938611115769283e-06, "loss": 0.6713, "step": 13315 }, { "epoch": 0.6843457703772228, "grad_norm": 1.1219171285629272, "learning_rate": 2.3931508874122565e-06, "loss": 0.7707, "step": 13316 }, { "epoch": 0.6843971631205674, "grad_norm": 0.8191121220588684, "learning_rate": 2.3924407354726175e-06, "loss": 0.6386, "step": 13317 }, { "epoch": 0.6844485558639121, "grad_norm": 1.048741102218628, "learning_rate": 2.3917306557776853e-06, "loss": 0.6591, "step": 13318 }, { "epoch": 0.6844999486072566, "grad_norm": 1.049906611442566, "learning_rate": 2.391020648347131e-06, "loss": 0.8278, "step": 13319 }, { "epoch": 0.6845513413506013, "grad_norm": 1.100466251373291, "learning_rate": 2.39031071320063e-06, "loss": 0.7202, "step": 13320 }, { "epoch": 0.6846027340939459, "grad_norm": 0.7114228010177612, "learning_rate": 2.3896008503578476e-06, "loss": 0.6507, "step": 13321 }, { "epoch": 0.6846541268372905, "grad_norm": 1.1019946336746216, "learning_rate": 2.3888910598384567e-06, "loss": 0.6786, "step": 13322 }, { "epoch": 0.6847055195806352, "grad_norm": 0.7408274412155151, "learning_rate": 2.3881813416621157e-06, "loss": 0.6671, "step": 13323 }, { "epoch": 0.6847569123239798, "grad_norm": 0.7356608510017395, "learning_rate": 2.3874716958484927e-06, "loss": 0.6953, "step": 13324 }, { "epoch": 0.6848083050673245, "grad_norm": 1.0569781064987183, "learning_rate": 2.386762122417248e-06, "loss": 0.6974, "step": 13325 }, { "epoch": 0.6848596978106691, "grad_norm": 0.7239935994148254, "learning_rate": 2.3860526213880413e-06, "loss": 0.7091, "step": 13326 }, { "epoch": 0.6849110905540138, "grad_norm": 1.1210187673568726, "learning_rate": 2.3853431927805267e-06, "loss": 0.6825, "step": 13327 }, { "epoch": 0.6849624832973584, "grad_norm": 0.694304883480072, "learning_rate": 2.3846338366143644e-06, "loss": 0.7121, "step": 13328 }, { "epoch": 0.6850138760407031, "grad_norm": 1.097902536392212, "learning_rate": 2.3839245529092055e-06, "loss": 0.685, "step": 13329 }, { "epoch": 0.6850652687840477, "grad_norm": 1.0709285736083984, "learning_rate": 2.3832153416846995e-06, "loss": 0.6925, "step": 13330 }, { "epoch": 0.6851166615273924, "grad_norm": 1.1218461990356445, "learning_rate": 2.3825062029605014e-06, "loss": 0.7291, "step": 13331 }, { "epoch": 0.685168054270737, "grad_norm": 1.0941627025604248, "learning_rate": 2.3817971367562505e-06, "loss": 0.7527, "step": 13332 }, { "epoch": 0.6852194470140817, "grad_norm": 1.0594651699066162, "learning_rate": 2.3810881430915975e-06, "loss": 0.7014, "step": 13333 }, { "epoch": 0.6852708397574262, "grad_norm": 1.0984879732131958, "learning_rate": 2.380379221986186e-06, "loss": 0.6687, "step": 13334 }, { "epoch": 0.6853222325007708, "grad_norm": 0.6971931457519531, "learning_rate": 2.379670373459655e-06, "loss": 0.6356, "step": 13335 }, { "epoch": 0.6853736252441155, "grad_norm": 1.117618441581726, "learning_rate": 2.3789615975316423e-06, "loss": 0.6995, "step": 13336 }, { "epoch": 0.6854250179874601, "grad_norm": 1.022958517074585, "learning_rate": 2.37825289422179e-06, "loss": 0.6433, "step": 13337 }, { "epoch": 0.6854764107308048, "grad_norm": 1.023323893547058, "learning_rate": 2.3775442635497297e-06, "loss": 0.6601, "step": 13338 }, { "epoch": 0.6855278034741494, "grad_norm": 1.0816253423690796, "learning_rate": 2.3768357055350945e-06, "loss": 0.7245, "step": 13339 }, { "epoch": 0.6855791962174941, "grad_norm": 1.0332515239715576, "learning_rate": 2.3761272201975184e-06, "loss": 0.7225, "step": 13340 }, { "epoch": 0.6856305889608387, "grad_norm": 1.1607924699783325, "learning_rate": 2.375418807556629e-06, "loss": 0.7492, "step": 13341 }, { "epoch": 0.6856819817041834, "grad_norm": 1.0634335279464722, "learning_rate": 2.3747104676320533e-06, "loss": 0.6862, "step": 13342 }, { "epoch": 0.685733374447528, "grad_norm": 1.1076996326446533, "learning_rate": 2.3740022004434145e-06, "loss": 0.745, "step": 13343 }, { "epoch": 0.6857847671908727, "grad_norm": 1.1111207008361816, "learning_rate": 2.3732940060103417e-06, "loss": 0.7504, "step": 13344 }, { "epoch": 0.6858361599342173, "grad_norm": 1.04714834690094, "learning_rate": 2.3725858843524487e-06, "loss": 0.7171, "step": 13345 }, { "epoch": 0.685887552677562, "grad_norm": 0.6929269433021545, "learning_rate": 2.37187783548936e-06, "loss": 0.5974, "step": 13346 }, { "epoch": 0.6859389454209066, "grad_norm": 1.1622371673583984, "learning_rate": 2.371169859440691e-06, "loss": 0.7771, "step": 13347 }, { "epoch": 0.6859903381642513, "grad_norm": 1.1110748052597046, "learning_rate": 2.3704619562260545e-06, "loss": 0.7381, "step": 13348 }, { "epoch": 0.6860417309075958, "grad_norm": 1.0892292261123657, "learning_rate": 2.3697541258650676e-06, "loss": 0.6758, "step": 13349 }, { "epoch": 0.6860931236509404, "grad_norm": 0.8300483822822571, "learning_rate": 2.36904636837734e-06, "loss": 0.6711, "step": 13350 }, { "epoch": 0.6861445163942851, "grad_norm": 1.0972872972488403, "learning_rate": 2.3683386837824794e-06, "loss": 0.657, "step": 13351 }, { "epoch": 0.6861959091376297, "grad_norm": 0.759110152721405, "learning_rate": 2.3676310721000922e-06, "loss": 0.6485, "step": 13352 }, { "epoch": 0.6862473018809744, "grad_norm": 1.0260483026504517, "learning_rate": 2.3669235333497868e-06, "loss": 0.6444, "step": 13353 }, { "epoch": 0.686298694624319, "grad_norm": 1.0845814943313599, "learning_rate": 2.366216067551164e-06, "loss": 0.7671, "step": 13354 }, { "epoch": 0.6863500873676637, "grad_norm": 1.1047589778900146, "learning_rate": 2.365508674723825e-06, "loss": 0.6906, "step": 13355 }, { "epoch": 0.6864014801110083, "grad_norm": 1.0388273000717163, "learning_rate": 2.3648013548873672e-06, "loss": 0.6842, "step": 13356 }, { "epoch": 0.686452872854353, "grad_norm": 1.0922319889068604, "learning_rate": 2.3640941080613927e-06, "loss": 0.7262, "step": 13357 }, { "epoch": 0.6865042655976976, "grad_norm": 1.0639010667800903, "learning_rate": 2.3633869342654884e-06, "loss": 0.7291, "step": 13358 }, { "epoch": 0.6865556583410423, "grad_norm": 0.7619389295578003, "learning_rate": 2.362679833519254e-06, "loss": 0.6772, "step": 13359 }, { "epoch": 0.6866070510843869, "grad_norm": 1.097631812095642, "learning_rate": 2.361972805842278e-06, "loss": 0.767, "step": 13360 }, { "epoch": 0.6866584438277316, "grad_norm": 1.1154394149780273, "learning_rate": 2.3612658512541468e-06, "loss": 0.7049, "step": 13361 }, { "epoch": 0.6867098365710762, "grad_norm": 0.7978020310401917, "learning_rate": 2.360558969774451e-06, "loss": 0.6517, "step": 13362 }, { "epoch": 0.6867612293144209, "grad_norm": 1.103040337562561, "learning_rate": 2.359852161422774e-06, "loss": 0.6785, "step": 13363 }, { "epoch": 0.6868126220577655, "grad_norm": 0.9830541610717773, "learning_rate": 2.359145426218699e-06, "loss": 0.6595, "step": 13364 }, { "epoch": 0.68686401480111, "grad_norm": 1.0240007638931274, "learning_rate": 2.3584387641818035e-06, "loss": 0.7125, "step": 13365 }, { "epoch": 0.6869154075444547, "grad_norm": 1.100358247756958, "learning_rate": 2.357732175331673e-06, "loss": 0.6663, "step": 13366 }, { "epoch": 0.6869668002877993, "grad_norm": 1.0087822675704956, "learning_rate": 2.357025659687876e-06, "loss": 0.7115, "step": 13367 }, { "epoch": 0.687018193031144, "grad_norm": 1.0406197309494019, "learning_rate": 2.356319217269993e-06, "loss": 0.7565, "step": 13368 }, { "epoch": 0.6870695857744886, "grad_norm": 1.1621811389923096, "learning_rate": 2.3556128480975926e-06, "loss": 0.7173, "step": 13369 }, { "epoch": 0.6871209785178333, "grad_norm": 0.697023332118988, "learning_rate": 2.3549065521902493e-06, "loss": 0.6172, "step": 13370 }, { "epoch": 0.6871723712611779, "grad_norm": 1.0437123775482178, "learning_rate": 2.35420032956753e-06, "loss": 0.6942, "step": 13371 }, { "epoch": 0.6872237640045226, "grad_norm": 1.0900206565856934, "learning_rate": 2.3534941802489985e-06, "loss": 0.7209, "step": 13372 }, { "epoch": 0.6872751567478672, "grad_norm": 1.0648189783096313, "learning_rate": 2.352788104254226e-06, "loss": 0.7509, "step": 13373 }, { "epoch": 0.6873265494912119, "grad_norm": 0.8383201360702515, "learning_rate": 2.3520821016027674e-06, "loss": 0.6318, "step": 13374 }, { "epoch": 0.6873779422345565, "grad_norm": 1.157762885093689, "learning_rate": 2.3513761723141874e-06, "loss": 0.7062, "step": 13375 }, { "epoch": 0.6874293349779012, "grad_norm": 1.1178290843963623, "learning_rate": 2.3506703164080443e-06, "loss": 0.6757, "step": 13376 }, { "epoch": 0.6874807277212458, "grad_norm": 0.7638944983482361, "learning_rate": 2.349964533903893e-06, "loss": 0.6576, "step": 13377 }, { "epoch": 0.6875321204645904, "grad_norm": 1.0895625352859497, "learning_rate": 2.3492588248212868e-06, "loss": 0.7007, "step": 13378 }, { "epoch": 0.6875835132079351, "grad_norm": 1.0964897871017456, "learning_rate": 2.3485531891797812e-06, "loss": 0.6726, "step": 13379 }, { "epoch": 0.6876349059512796, "grad_norm": 1.1167876720428467, "learning_rate": 2.3478476269989254e-06, "loss": 0.8101, "step": 13380 }, { "epoch": 0.6876862986946243, "grad_norm": 1.0434703826904297, "learning_rate": 2.347142138298265e-06, "loss": 0.69, "step": 13381 }, { "epoch": 0.6877376914379689, "grad_norm": 1.1000373363494873, "learning_rate": 2.3464367230973516e-06, "loss": 0.717, "step": 13382 }, { "epoch": 0.6877890841813136, "grad_norm": 1.0852264165878296, "learning_rate": 2.345731381415723e-06, "loss": 0.6297, "step": 13383 }, { "epoch": 0.6878404769246582, "grad_norm": 1.1253141164779663, "learning_rate": 2.345026113272925e-06, "loss": 0.6463, "step": 13384 }, { "epoch": 0.6878918696680029, "grad_norm": 1.0757182836532593, "learning_rate": 2.3443209186884963e-06, "loss": 0.7259, "step": 13385 }, { "epoch": 0.6879432624113475, "grad_norm": 0.664968729019165, "learning_rate": 2.343615797681979e-06, "loss": 0.651, "step": 13386 }, { "epoch": 0.6879946551546922, "grad_norm": 0.7261338829994202, "learning_rate": 2.3429107502729016e-06, "loss": 0.699, "step": 13387 }, { "epoch": 0.6880460478980368, "grad_norm": 1.1462910175323486, "learning_rate": 2.3422057764808043e-06, "loss": 0.6695, "step": 13388 }, { "epoch": 0.6880974406413815, "grad_norm": 1.0899831056594849, "learning_rate": 2.3415008763252168e-06, "loss": 0.6508, "step": 13389 }, { "epoch": 0.6881488333847261, "grad_norm": 1.0417860746383667, "learning_rate": 2.340796049825667e-06, "loss": 0.7076, "step": 13390 }, { "epoch": 0.6882002261280707, "grad_norm": 1.070149302482605, "learning_rate": 2.3400912970016872e-06, "loss": 0.6723, "step": 13391 }, { "epoch": 0.6882516188714154, "grad_norm": 1.0535063743591309, "learning_rate": 2.3393866178728e-06, "loss": 0.7734, "step": 13392 }, { "epoch": 0.68830301161476, "grad_norm": 1.0936784744262695, "learning_rate": 2.3386820124585304e-06, "loss": 0.7356, "step": 13393 }, { "epoch": 0.6883544043581047, "grad_norm": 1.1746679544448853, "learning_rate": 2.337977480778398e-06, "loss": 0.7335, "step": 13394 }, { "epoch": 0.6884057971014492, "grad_norm": 1.1055418252944946, "learning_rate": 2.337273022851927e-06, "loss": 0.657, "step": 13395 }, { "epoch": 0.6884571898447939, "grad_norm": 1.104048728942871, "learning_rate": 2.336568638698629e-06, "loss": 0.7105, "step": 13396 }, { "epoch": 0.6885085825881385, "grad_norm": 1.1065303087234497, "learning_rate": 2.3358643283380243e-06, "loss": 0.6949, "step": 13397 }, { "epoch": 0.6885599753314832, "grad_norm": 1.1341803073883057, "learning_rate": 2.335160091789625e-06, "loss": 0.7281, "step": 13398 }, { "epoch": 0.6886113680748278, "grad_norm": 1.0388866662979126, "learning_rate": 2.33445592907294e-06, "loss": 0.6661, "step": 13399 }, { "epoch": 0.6886627608181725, "grad_norm": 0.7395660877227783, "learning_rate": 2.333751840207484e-06, "loss": 0.6482, "step": 13400 }, { "epoch": 0.6887141535615171, "grad_norm": 1.0575189590454102, "learning_rate": 2.333047825212761e-06, "loss": 0.7135, "step": 13401 }, { "epoch": 0.6887655463048618, "grad_norm": 1.059338092803955, "learning_rate": 2.3323438841082767e-06, "loss": 0.678, "step": 13402 }, { "epoch": 0.6888169390482064, "grad_norm": 0.7790700793266296, "learning_rate": 2.3316400169135332e-06, "loss": 0.6319, "step": 13403 }, { "epoch": 0.688868331791551, "grad_norm": 1.1395809650421143, "learning_rate": 2.330936223648035e-06, "loss": 0.7004, "step": 13404 }, { "epoch": 0.6889197245348957, "grad_norm": 1.043841004371643, "learning_rate": 2.330232504331279e-06, "loss": 0.6711, "step": 13405 }, { "epoch": 0.6889711172782403, "grad_norm": 1.0228657722473145, "learning_rate": 2.329528858982763e-06, "loss": 0.6974, "step": 13406 }, { "epoch": 0.689022510021585, "grad_norm": 1.0640373229980469, "learning_rate": 2.32882528762198e-06, "loss": 0.7111, "step": 13407 }, { "epoch": 0.6890739027649296, "grad_norm": 1.0011875629425049, "learning_rate": 2.3281217902684292e-06, "loss": 0.6827, "step": 13408 }, { "epoch": 0.6891252955082743, "grad_norm": 1.0655626058578491, "learning_rate": 2.3274183669415934e-06, "loss": 0.6575, "step": 13409 }, { "epoch": 0.6891766882516188, "grad_norm": 1.0391764640808105, "learning_rate": 2.3267150176609676e-06, "loss": 0.6831, "step": 13410 }, { "epoch": 0.6892280809949635, "grad_norm": 1.046884536743164, "learning_rate": 2.326011742446036e-06, "loss": 0.7275, "step": 13411 }, { "epoch": 0.6892794737383081, "grad_norm": 0.7641355991363525, "learning_rate": 2.325308541316283e-06, "loss": 0.6717, "step": 13412 }, { "epoch": 0.6893308664816528, "grad_norm": 1.0446792840957642, "learning_rate": 2.324605414291194e-06, "loss": 0.6809, "step": 13413 }, { "epoch": 0.6893822592249974, "grad_norm": 1.0867435932159424, "learning_rate": 2.3239023613902466e-06, "loss": 0.6979, "step": 13414 }, { "epoch": 0.689433651968342, "grad_norm": 0.813461184501648, "learning_rate": 2.3231993826329248e-06, "loss": 0.6214, "step": 13415 }, { "epoch": 0.6894850447116867, "grad_norm": 1.0633823871612549, "learning_rate": 2.322496478038698e-06, "loss": 0.7265, "step": 13416 }, { "epoch": 0.6895364374550313, "grad_norm": 1.0876883268356323, "learning_rate": 2.3217936476270457e-06, "loss": 0.7279, "step": 13417 }, { "epoch": 0.689587830198376, "grad_norm": 1.064242959022522, "learning_rate": 2.3210908914174396e-06, "loss": 0.686, "step": 13418 }, { "epoch": 0.6896392229417206, "grad_norm": 1.1113409996032715, "learning_rate": 2.3203882094293493e-06, "loss": 0.7161, "step": 13419 }, { "epoch": 0.6896906156850653, "grad_norm": 1.0891555547714233, "learning_rate": 2.319685601682242e-06, "loss": 0.6922, "step": 13420 }, { "epoch": 0.6897420084284099, "grad_norm": 0.9466959238052368, "learning_rate": 2.318983068195588e-06, "loss": 0.6919, "step": 13421 }, { "epoch": 0.6897934011717546, "grad_norm": 1.0454496145248413, "learning_rate": 2.3182806089888486e-06, "loss": 0.6857, "step": 13422 }, { "epoch": 0.6898447939150992, "grad_norm": 0.6607553362846375, "learning_rate": 2.3175782240814853e-06, "loss": 0.6897, "step": 13423 }, { "epoch": 0.6898961866584439, "grad_norm": 1.1564693450927734, "learning_rate": 2.3168759134929634e-06, "loss": 0.7195, "step": 13424 }, { "epoch": 0.6899475794017884, "grad_norm": 1.0280423164367676, "learning_rate": 2.3161736772427335e-06, "loss": 0.7321, "step": 13425 }, { "epoch": 0.6899989721451331, "grad_norm": 1.0199776887893677, "learning_rate": 2.3154715153502577e-06, "loss": 0.713, "step": 13426 }, { "epoch": 0.6900503648884777, "grad_norm": 1.0492305755615234, "learning_rate": 2.3147694278349873e-06, "loss": 0.6872, "step": 13427 }, { "epoch": 0.6901017576318224, "grad_norm": 1.0582655668258667, "learning_rate": 2.314067414716375e-06, "loss": 0.6894, "step": 13428 }, { "epoch": 0.690153150375167, "grad_norm": 1.0092233419418335, "learning_rate": 2.3133654760138683e-06, "loss": 0.6479, "step": 13429 }, { "epoch": 0.6902045431185116, "grad_norm": 1.145283579826355, "learning_rate": 2.3126636117469193e-06, "loss": 0.6867, "step": 13430 }, { "epoch": 0.6902559358618563, "grad_norm": 1.1579627990722656, "learning_rate": 2.3119618219349714e-06, "loss": 0.6665, "step": 13431 }, { "epoch": 0.6903073286052009, "grad_norm": 1.0809438228607178, "learning_rate": 2.311260106597467e-06, "loss": 0.7118, "step": 13432 }, { "epoch": 0.6903587213485456, "grad_norm": 1.0483686923980713, "learning_rate": 2.310558465753851e-06, "loss": 0.7024, "step": 13433 }, { "epoch": 0.6904101140918902, "grad_norm": 1.046481728553772, "learning_rate": 2.3098568994235615e-06, "loss": 0.6673, "step": 13434 }, { "epoch": 0.6904615068352349, "grad_norm": 1.0845974683761597, "learning_rate": 2.3091554076260352e-06, "loss": 0.712, "step": 13435 }, { "epoch": 0.6905128995785795, "grad_norm": 0.8732377886772156, "learning_rate": 2.3084539903807068e-06, "loss": 0.635, "step": 13436 }, { "epoch": 0.6905642923219242, "grad_norm": 1.1515207290649414, "learning_rate": 2.3077526477070146e-06, "loss": 0.7139, "step": 13437 }, { "epoch": 0.6906156850652688, "grad_norm": 0.6914734244346619, "learning_rate": 2.307051379624382e-06, "loss": 0.643, "step": 13438 }, { "epoch": 0.6906670778086135, "grad_norm": 1.1208144426345825, "learning_rate": 2.306350186152245e-06, "loss": 0.6664, "step": 13439 }, { "epoch": 0.690718470551958, "grad_norm": 0.7578917145729065, "learning_rate": 2.305649067310028e-06, "loss": 0.6688, "step": 13440 }, { "epoch": 0.6907698632953027, "grad_norm": 1.1689575910568237, "learning_rate": 2.3049480231171544e-06, "loss": 0.7268, "step": 13441 }, { "epoch": 0.6908212560386473, "grad_norm": 1.102469801902771, "learning_rate": 2.3042470535930516e-06, "loss": 0.8059, "step": 13442 }, { "epoch": 0.690872648781992, "grad_norm": 1.068149209022522, "learning_rate": 2.3035461587571378e-06, "loss": 0.7272, "step": 13443 }, { "epoch": 0.6909240415253366, "grad_norm": 1.055262565612793, "learning_rate": 2.3028453386288323e-06, "loss": 0.6787, "step": 13444 }, { "epoch": 0.6909754342686812, "grad_norm": 1.060615062713623, "learning_rate": 2.3021445932275506e-06, "loss": 0.7376, "step": 13445 }, { "epoch": 0.6910268270120259, "grad_norm": 1.119568943977356, "learning_rate": 2.3014439225727103e-06, "loss": 0.7477, "step": 13446 }, { "epoch": 0.6910782197553705, "grad_norm": 1.0908480882644653, "learning_rate": 2.300743326683723e-06, "loss": 0.7012, "step": 13447 }, { "epoch": 0.6911296124987152, "grad_norm": 1.092620611190796, "learning_rate": 2.3000428055799984e-06, "loss": 0.744, "step": 13448 }, { "epoch": 0.6911810052420598, "grad_norm": 1.0482251644134521, "learning_rate": 2.2993423592809445e-06, "loss": 0.6821, "step": 13449 }, { "epoch": 0.6912323979854045, "grad_norm": 1.1021394729614258, "learning_rate": 2.2986419878059703e-06, "loss": 0.6713, "step": 13450 }, { "epoch": 0.6912837907287491, "grad_norm": 1.1428685188293457, "learning_rate": 2.2979416911744784e-06, "loss": 0.7413, "step": 13451 }, { "epoch": 0.6913351834720938, "grad_norm": 1.078089714050293, "learning_rate": 2.297241469405872e-06, "loss": 0.6882, "step": 13452 }, { "epoch": 0.6913865762154384, "grad_norm": 1.0971280336380005, "learning_rate": 2.2965413225195502e-06, "loss": 0.7274, "step": 13453 }, { "epoch": 0.6914379689587831, "grad_norm": 0.7383515238761902, "learning_rate": 2.295841250534911e-06, "loss": 0.6681, "step": 13454 }, { "epoch": 0.6914893617021277, "grad_norm": 1.099220871925354, "learning_rate": 2.2951412534713525e-06, "loss": 0.6791, "step": 13455 }, { "epoch": 0.6915407544454722, "grad_norm": 1.2490869760513306, "learning_rate": 2.294441331348268e-06, "loss": 0.709, "step": 13456 }, { "epoch": 0.6915921471888169, "grad_norm": 1.112113356590271, "learning_rate": 2.293741484185048e-06, "loss": 0.7566, "step": 13457 }, { "epoch": 0.6916435399321615, "grad_norm": 0.7345757484436035, "learning_rate": 2.293041712001083e-06, "loss": 0.6528, "step": 13458 }, { "epoch": 0.6916949326755062, "grad_norm": 1.0814324617385864, "learning_rate": 2.292342014815762e-06, "loss": 0.7344, "step": 13459 }, { "epoch": 0.6917463254188508, "grad_norm": 1.0851945877075195, "learning_rate": 2.2916423926484705e-06, "loss": 0.6907, "step": 13460 }, { "epoch": 0.6917977181621955, "grad_norm": 1.0835740566253662, "learning_rate": 2.290942845518591e-06, "loss": 0.7407, "step": 13461 }, { "epoch": 0.6918491109055401, "grad_norm": 1.079351544380188, "learning_rate": 2.2902433734455044e-06, "loss": 0.7473, "step": 13462 }, { "epoch": 0.6919005036488848, "grad_norm": 1.071176528930664, "learning_rate": 2.2895439764485926e-06, "loss": 0.6979, "step": 13463 }, { "epoch": 0.6919518963922294, "grad_norm": 1.0512492656707764, "learning_rate": 2.2888446545472324e-06, "loss": 0.681, "step": 13464 }, { "epoch": 0.6920032891355741, "grad_norm": 1.0513546466827393, "learning_rate": 2.2881454077607968e-06, "loss": 0.715, "step": 13465 }, { "epoch": 0.6920546818789187, "grad_norm": 0.6778315305709839, "learning_rate": 2.287446236108664e-06, "loss": 0.643, "step": 13466 }, { "epoch": 0.6921060746222634, "grad_norm": 1.071773886680603, "learning_rate": 2.286747139610198e-06, "loss": 0.6871, "step": 13467 }, { "epoch": 0.692157467365608, "grad_norm": 1.0454281568527222, "learning_rate": 2.2860481182847742e-06, "loss": 0.7161, "step": 13468 }, { "epoch": 0.6922088601089527, "grad_norm": 1.0766897201538086, "learning_rate": 2.2853491721517574e-06, "loss": 0.6666, "step": 13469 }, { "epoch": 0.6922602528522973, "grad_norm": 1.078318476676941, "learning_rate": 2.2846503012305123e-06, "loss": 0.699, "step": 13470 }, { "epoch": 0.6923116455956418, "grad_norm": 1.1118658781051636, "learning_rate": 2.283951505540399e-06, "loss": 0.7074, "step": 13471 }, { "epoch": 0.6923630383389865, "grad_norm": 1.0923362970352173, "learning_rate": 2.283252785100784e-06, "loss": 0.7368, "step": 13472 }, { "epoch": 0.6924144310823311, "grad_norm": 0.7332751750946045, "learning_rate": 2.2825541399310223e-06, "loss": 0.6362, "step": 13473 }, { "epoch": 0.6924658238256758, "grad_norm": 1.035706877708435, "learning_rate": 2.2818555700504697e-06, "loss": 0.7355, "step": 13474 }, { "epoch": 0.6925172165690204, "grad_norm": 1.084615707397461, "learning_rate": 2.2811570754784856e-06, "loss": 0.7225, "step": 13475 }, { "epoch": 0.6925686093123651, "grad_norm": 0.7340329885482788, "learning_rate": 2.2804586562344155e-06, "loss": 0.6465, "step": 13476 }, { "epoch": 0.6926200020557097, "grad_norm": 1.044554591178894, "learning_rate": 2.279760312337615e-06, "loss": 0.6787, "step": 13477 }, { "epoch": 0.6926713947990544, "grad_norm": 1.0937559604644775, "learning_rate": 2.2790620438074278e-06, "loss": 0.6992, "step": 13478 }, { "epoch": 0.692722787542399, "grad_norm": 1.1471049785614014, "learning_rate": 2.2783638506632073e-06, "loss": 0.7134, "step": 13479 }, { "epoch": 0.6927741802857437, "grad_norm": 1.0870516300201416, "learning_rate": 2.277665732924289e-06, "loss": 0.7164, "step": 13480 }, { "epoch": 0.6928255730290883, "grad_norm": 0.9905552268028259, "learning_rate": 2.2769676906100207e-06, "loss": 0.6769, "step": 13481 }, { "epoch": 0.692876965772433, "grad_norm": 1.099609613418579, "learning_rate": 2.2762697237397406e-06, "loss": 0.7233, "step": 13482 }, { "epoch": 0.6929283585157776, "grad_norm": 1.0504956245422363, "learning_rate": 2.275571832332784e-06, "loss": 0.7112, "step": 13483 }, { "epoch": 0.6929797512591223, "grad_norm": 1.0653971433639526, "learning_rate": 2.2748740164084907e-06, "loss": 0.6814, "step": 13484 }, { "epoch": 0.6930311440024669, "grad_norm": 1.154149055480957, "learning_rate": 2.274176275986193e-06, "loss": 0.7133, "step": 13485 }, { "epoch": 0.6930825367458114, "grad_norm": 0.7868914008140564, "learning_rate": 2.273478611085222e-06, "loss": 0.6441, "step": 13486 }, { "epoch": 0.6931339294891561, "grad_norm": 1.097804307937622, "learning_rate": 2.2727810217249045e-06, "loss": 0.7107, "step": 13487 }, { "epoch": 0.6931853222325007, "grad_norm": 1.0980697870254517, "learning_rate": 2.2720835079245744e-06, "loss": 0.7395, "step": 13488 }, { "epoch": 0.6932367149758454, "grad_norm": 1.0722997188568115, "learning_rate": 2.271386069703549e-06, "loss": 0.7372, "step": 13489 }, { "epoch": 0.69328810771919, "grad_norm": 1.0293450355529785, "learning_rate": 2.270688707081157e-06, "loss": 0.7051, "step": 13490 }, { "epoch": 0.6933395004625347, "grad_norm": 1.1046091318130493, "learning_rate": 2.269991420076718e-06, "loss": 0.7526, "step": 13491 }, { "epoch": 0.6933908932058793, "grad_norm": 1.0929967164993286, "learning_rate": 2.2692942087095487e-06, "loss": 0.6725, "step": 13492 }, { "epoch": 0.693442285949224, "grad_norm": 0.8360573053359985, "learning_rate": 2.2685970729989696e-06, "loss": 0.6657, "step": 13493 }, { "epoch": 0.6934936786925686, "grad_norm": 0.7532451748847961, "learning_rate": 2.267900012964292e-06, "loss": 0.6644, "step": 13494 }, { "epoch": 0.6935450714359133, "grad_norm": 1.1184346675872803, "learning_rate": 2.2672030286248342e-06, "loss": 0.7184, "step": 13495 }, { "epoch": 0.6935964641792579, "grad_norm": 1.026456594467163, "learning_rate": 2.2665061199998996e-06, "loss": 0.6437, "step": 13496 }, { "epoch": 0.6936478569226026, "grad_norm": 1.0560096502304077, "learning_rate": 2.2658092871088013e-06, "loss": 0.6751, "step": 13497 }, { "epoch": 0.6936992496659472, "grad_norm": 1.028740644454956, "learning_rate": 2.265112529970844e-06, "loss": 0.7228, "step": 13498 }, { "epoch": 0.6937506424092919, "grad_norm": 0.863117516040802, "learning_rate": 2.2644158486053323e-06, "loss": 0.598, "step": 13499 }, { "epoch": 0.6938020351526365, "grad_norm": 1.0150525569915771, "learning_rate": 2.263719243031567e-06, "loss": 0.7142, "step": 13500 }, { "epoch": 0.693853427895981, "grad_norm": 1.0095702409744263, "learning_rate": 2.2630227132688515e-06, "loss": 0.6977, "step": 13501 }, { "epoch": 0.6939048206393257, "grad_norm": 1.0393542051315308, "learning_rate": 2.2623262593364813e-06, "loss": 0.6549, "step": 13502 }, { "epoch": 0.6939562133826703, "grad_norm": 0.9956376552581787, "learning_rate": 2.261629881253753e-06, "loss": 0.6542, "step": 13503 }, { "epoch": 0.694007606126015, "grad_norm": 1.1031354665756226, "learning_rate": 2.2609335790399602e-06, "loss": 0.6867, "step": 13504 }, { "epoch": 0.6940589988693596, "grad_norm": 1.0696535110473633, "learning_rate": 2.260237352714392e-06, "loss": 0.7057, "step": 13505 }, { "epoch": 0.6941103916127043, "grad_norm": 1.059991717338562, "learning_rate": 2.2595412022963424e-06, "loss": 0.691, "step": 13506 }, { "epoch": 0.6941617843560489, "grad_norm": 1.232047438621521, "learning_rate": 2.258845127805098e-06, "loss": 0.6761, "step": 13507 }, { "epoch": 0.6942131770993936, "grad_norm": 1.053749680519104, "learning_rate": 2.2581491292599415e-06, "loss": 0.6931, "step": 13508 }, { "epoch": 0.6942645698427382, "grad_norm": 1.1948935985565186, "learning_rate": 2.2574532066801567e-06, "loss": 0.694, "step": 13509 }, { "epoch": 0.6943159625860829, "grad_norm": 0.7406054139137268, "learning_rate": 2.2567573600850273e-06, "loss": 0.6674, "step": 13510 }, { "epoch": 0.6943673553294275, "grad_norm": 1.170021414756775, "learning_rate": 2.256061589493831e-06, "loss": 0.6722, "step": 13511 }, { "epoch": 0.6944187480727722, "grad_norm": 0.665573000907898, "learning_rate": 2.2553658949258445e-06, "loss": 0.6127, "step": 13512 }, { "epoch": 0.6944701408161168, "grad_norm": 1.0770032405853271, "learning_rate": 2.2546702764003403e-06, "loss": 0.7105, "step": 13513 }, { "epoch": 0.6945215335594614, "grad_norm": 1.0802977085113525, "learning_rate": 2.2539747339365957e-06, "loss": 0.6523, "step": 13514 }, { "epoch": 0.6945729263028061, "grad_norm": 1.0573500394821167, "learning_rate": 2.253279267553879e-06, "loss": 0.6757, "step": 13515 }, { "epoch": 0.6946243190461506, "grad_norm": 1.161020278930664, "learning_rate": 2.2525838772714564e-06, "loss": 0.6903, "step": 13516 }, { "epoch": 0.6946757117894953, "grad_norm": 1.08132004737854, "learning_rate": 2.251888563108601e-06, "loss": 0.7068, "step": 13517 }, { "epoch": 0.6947271045328399, "grad_norm": 0.6985142827033997, "learning_rate": 2.251193325084569e-06, "loss": 0.6941, "step": 13518 }, { "epoch": 0.6947784972761846, "grad_norm": 1.0044920444488525, "learning_rate": 2.2504981632186278e-06, "loss": 0.7025, "step": 13519 }, { "epoch": 0.6948298900195292, "grad_norm": 1.0493305921554565, "learning_rate": 2.249803077530036e-06, "loss": 0.7143, "step": 13520 }, { "epoch": 0.6948812827628739, "grad_norm": 1.0844831466674805, "learning_rate": 2.2491080680380515e-06, "loss": 0.6629, "step": 13521 }, { "epoch": 0.6949326755062185, "grad_norm": 0.7185050845146179, "learning_rate": 2.2484131347619285e-06, "loss": 0.6661, "step": 13522 }, { "epoch": 0.6949840682495632, "grad_norm": 1.09365713596344, "learning_rate": 2.2477182777209244e-06, "loss": 0.6656, "step": 13523 }, { "epoch": 0.6950354609929078, "grad_norm": 0.8049207329750061, "learning_rate": 2.2470234969342887e-06, "loss": 0.679, "step": 13524 }, { "epoch": 0.6950868537362525, "grad_norm": 0.7386157512664795, "learning_rate": 2.246328792421269e-06, "loss": 0.6768, "step": 13525 }, { "epoch": 0.6951382464795971, "grad_norm": 0.6995337009429932, "learning_rate": 2.245634164201117e-06, "loss": 0.6571, "step": 13526 }, { "epoch": 0.6951896392229417, "grad_norm": 1.1221362352371216, "learning_rate": 2.2449396122930755e-06, "loss": 0.7141, "step": 13527 }, { "epoch": 0.6952410319662864, "grad_norm": 1.2116707563400269, "learning_rate": 2.2442451367163877e-06, "loss": 0.7267, "step": 13528 }, { "epoch": 0.695292424709631, "grad_norm": 1.0572103261947632, "learning_rate": 2.243550737490293e-06, "loss": 0.7153, "step": 13529 }, { "epoch": 0.6953438174529757, "grad_norm": 1.1448333263397217, "learning_rate": 2.2428564146340357e-06, "loss": 0.7181, "step": 13530 }, { "epoch": 0.6953952101963203, "grad_norm": 1.1060892343521118, "learning_rate": 2.242162168166846e-06, "loss": 0.7155, "step": 13531 }, { "epoch": 0.6954466029396649, "grad_norm": 1.121079444885254, "learning_rate": 2.2414679981079635e-06, "loss": 0.7484, "step": 13532 }, { "epoch": 0.6954979956830095, "grad_norm": 0.8193677663803101, "learning_rate": 2.2407739044766187e-06, "loss": 0.6458, "step": 13533 }, { "epoch": 0.6955493884263542, "grad_norm": 1.0856173038482666, "learning_rate": 2.2400798872920404e-06, "loss": 0.7099, "step": 13534 }, { "epoch": 0.6956007811696988, "grad_norm": 1.0929173231124878, "learning_rate": 2.239385946573461e-06, "loss": 0.6294, "step": 13535 }, { "epoch": 0.6956521739130435, "grad_norm": 1.244460940361023, "learning_rate": 2.238692082340104e-06, "loss": 0.7127, "step": 13536 }, { "epoch": 0.6957035666563881, "grad_norm": 1.0711065530776978, "learning_rate": 2.237998294611195e-06, "loss": 0.6519, "step": 13537 }, { "epoch": 0.6957549593997328, "grad_norm": 1.119640827178955, "learning_rate": 2.237304583405953e-06, "loss": 0.6988, "step": 13538 }, { "epoch": 0.6958063521430774, "grad_norm": 0.7877973914146423, "learning_rate": 2.2366109487436016e-06, "loss": 0.6568, "step": 13539 }, { "epoch": 0.695857744886422, "grad_norm": 1.0677130222320557, "learning_rate": 2.2359173906433573e-06, "loss": 0.7198, "step": 13540 }, { "epoch": 0.6959091376297667, "grad_norm": 1.0715129375457764, "learning_rate": 2.2352239091244355e-06, "loss": 0.7079, "step": 13541 }, { "epoch": 0.6959605303731113, "grad_norm": 1.0717582702636719, "learning_rate": 2.234530504206047e-06, "loss": 0.6789, "step": 13542 }, { "epoch": 0.696011923116456, "grad_norm": 1.1999636888504028, "learning_rate": 2.2338371759074086e-06, "loss": 0.7582, "step": 13543 }, { "epoch": 0.6960633158598006, "grad_norm": 0.7118518948554993, "learning_rate": 2.2331439242477266e-06, "loss": 0.6599, "step": 13544 }, { "epoch": 0.6961147086031453, "grad_norm": 1.1292623281478882, "learning_rate": 2.232450749246206e-06, "loss": 0.7757, "step": 13545 }, { "epoch": 0.6961661013464899, "grad_norm": 1.00393545627594, "learning_rate": 2.2317576509220583e-06, "loss": 0.6949, "step": 13546 }, { "epoch": 0.6962174940898345, "grad_norm": 0.772158682346344, "learning_rate": 2.2310646292944782e-06, "loss": 0.634, "step": 13547 }, { "epoch": 0.6962688868331791, "grad_norm": 1.1463512182235718, "learning_rate": 2.230371684382673e-06, "loss": 0.7078, "step": 13548 }, { "epoch": 0.6963202795765238, "grad_norm": 1.0920727252960205, "learning_rate": 2.229678816205838e-06, "loss": 0.6741, "step": 13549 }, { "epoch": 0.6963716723198684, "grad_norm": 1.132951259613037, "learning_rate": 2.2289860247831708e-06, "loss": 0.7125, "step": 13550 }, { "epoch": 0.696423065063213, "grad_norm": 1.0562787055969238, "learning_rate": 2.228293310133864e-06, "loss": 0.6815, "step": 13551 }, { "epoch": 0.6964744578065577, "grad_norm": 1.0256842374801636, "learning_rate": 2.227600672277113e-06, "loss": 0.677, "step": 13552 }, { "epoch": 0.6965258505499023, "grad_norm": 1.1402944326400757, "learning_rate": 2.226908111232106e-06, "loss": 0.7094, "step": 13553 }, { "epoch": 0.696577243293247, "grad_norm": 1.100414514541626, "learning_rate": 2.2262156270180325e-06, "loss": 0.7393, "step": 13554 }, { "epoch": 0.6966286360365916, "grad_norm": 1.1295472383499146, "learning_rate": 2.2255232196540744e-06, "loss": 0.734, "step": 13555 }, { "epoch": 0.6966800287799363, "grad_norm": 1.111074686050415, "learning_rate": 2.2248308891594205e-06, "loss": 0.7036, "step": 13556 }, { "epoch": 0.6967314215232809, "grad_norm": 1.144273281097412, "learning_rate": 2.2241386355532502e-06, "loss": 0.7026, "step": 13557 }, { "epoch": 0.6967828142666256, "grad_norm": 0.6851841807365417, "learning_rate": 2.223446458854741e-06, "loss": 0.6114, "step": 13558 }, { "epoch": 0.6968342070099702, "grad_norm": 1.1043798923492432, "learning_rate": 2.2227543590830764e-06, "loss": 0.6963, "step": 13559 }, { "epoch": 0.6968855997533149, "grad_norm": 1.0370383262634277, "learning_rate": 2.2220623362574235e-06, "loss": 0.7016, "step": 13560 }, { "epoch": 0.6969369924966595, "grad_norm": 1.183712363243103, "learning_rate": 2.2213703903969607e-06, "loss": 0.752, "step": 13561 }, { "epoch": 0.6969883852400041, "grad_norm": 1.0715758800506592, "learning_rate": 2.220678521520858e-06, "loss": 0.707, "step": 13562 }, { "epoch": 0.6970397779833487, "grad_norm": 1.1316934823989868, "learning_rate": 2.2199867296482842e-06, "loss": 0.693, "step": 13563 }, { "epoch": 0.6970911707266934, "grad_norm": 1.0494171380996704, "learning_rate": 2.219295014798404e-06, "loss": 0.709, "step": 13564 }, { "epoch": 0.697142563470038, "grad_norm": 1.0881223678588867, "learning_rate": 2.2186033769903846e-06, "loss": 0.7428, "step": 13565 }, { "epoch": 0.6971939562133826, "grad_norm": 1.07633638381958, "learning_rate": 2.217911816243388e-06, "loss": 0.6992, "step": 13566 }, { "epoch": 0.6972453489567273, "grad_norm": 1.093612551689148, "learning_rate": 2.217220332576573e-06, "loss": 0.7377, "step": 13567 }, { "epoch": 0.6972967417000719, "grad_norm": 1.0358095169067383, "learning_rate": 2.216528926009101e-06, "loss": 0.7396, "step": 13568 }, { "epoch": 0.6973481344434166, "grad_norm": 1.046341896057129, "learning_rate": 2.215837596560123e-06, "loss": 0.7273, "step": 13569 }, { "epoch": 0.6973995271867612, "grad_norm": 1.0692846775054932, "learning_rate": 2.215146344248797e-06, "loss": 0.7117, "step": 13570 }, { "epoch": 0.6974509199301059, "grad_norm": 1.111298680305481, "learning_rate": 2.214455169094272e-06, "loss": 0.7233, "step": 13571 }, { "epoch": 0.6975023126734505, "grad_norm": 1.1238657236099243, "learning_rate": 2.2137640711157023e-06, "loss": 0.643, "step": 13572 }, { "epoch": 0.6975537054167952, "grad_norm": 1.131379246711731, "learning_rate": 2.2130730503322282e-06, "loss": 0.7643, "step": 13573 }, { "epoch": 0.6976050981601398, "grad_norm": 1.0902824401855469, "learning_rate": 2.2123821067630012e-06, "loss": 0.6756, "step": 13574 }, { "epoch": 0.6976564909034845, "grad_norm": 0.7707480192184448, "learning_rate": 2.2116912404271616e-06, "loss": 0.6181, "step": 13575 }, { "epoch": 0.6977078836468291, "grad_norm": 1.068686604499817, "learning_rate": 2.2110004513438498e-06, "loss": 0.6517, "step": 13576 }, { "epoch": 0.6977592763901737, "grad_norm": 1.0500993728637695, "learning_rate": 2.2103097395322074e-06, "loss": 0.7644, "step": 13577 }, { "epoch": 0.6978106691335183, "grad_norm": 1.1153894662857056, "learning_rate": 2.2096191050113703e-06, "loss": 0.6588, "step": 13578 }, { "epoch": 0.697862061876863, "grad_norm": 1.0967663526535034, "learning_rate": 2.2089285478004724e-06, "loss": 0.6989, "step": 13579 }, { "epoch": 0.6979134546202076, "grad_norm": 1.071048617362976, "learning_rate": 2.2082380679186445e-06, "loss": 0.7652, "step": 13580 }, { "epoch": 0.6979648473635522, "grad_norm": 1.0856297016143799, "learning_rate": 2.2075476653850227e-06, "loss": 0.7067, "step": 13581 }, { "epoch": 0.6980162401068969, "grad_norm": 0.8347252607345581, "learning_rate": 2.206857340218727e-06, "loss": 0.668, "step": 13582 }, { "epoch": 0.6980676328502415, "grad_norm": 1.0944101810455322, "learning_rate": 2.20616709243889e-06, "loss": 0.6891, "step": 13583 }, { "epoch": 0.6981190255935862, "grad_norm": 0.8433620929718018, "learning_rate": 2.205476922064633e-06, "loss": 0.6901, "step": 13584 }, { "epoch": 0.6981704183369308, "grad_norm": 1.1295604705810547, "learning_rate": 2.204786829115077e-06, "loss": 0.7019, "step": 13585 }, { "epoch": 0.6982218110802755, "grad_norm": 1.091489553451538, "learning_rate": 2.2040968136093445e-06, "loss": 0.7056, "step": 13586 }, { "epoch": 0.6982732038236201, "grad_norm": 1.0570809841156006, "learning_rate": 2.203406875566549e-06, "loss": 0.7053, "step": 13587 }, { "epoch": 0.6983245965669648, "grad_norm": 0.6707478761672974, "learning_rate": 2.202717015005812e-06, "loss": 0.698, "step": 13588 }, { "epoch": 0.6983759893103094, "grad_norm": 1.073694109916687, "learning_rate": 2.202027231946239e-06, "loss": 0.7072, "step": 13589 }, { "epoch": 0.6984273820536541, "grad_norm": 1.1681567430496216, "learning_rate": 2.2013375264069468e-06, "loss": 0.6689, "step": 13590 }, { "epoch": 0.6984787747969987, "grad_norm": 1.2627829313278198, "learning_rate": 2.200647898407043e-06, "loss": 0.7366, "step": 13591 }, { "epoch": 0.6985301675403432, "grad_norm": 1.079843521118164, "learning_rate": 2.1999583479656327e-06, "loss": 0.629, "step": 13592 }, { "epoch": 0.6985815602836879, "grad_norm": 1.0948940515518188, "learning_rate": 2.1992688751018203e-06, "loss": 0.6984, "step": 13593 }, { "epoch": 0.6986329530270325, "grad_norm": 1.0765708684921265, "learning_rate": 2.198579479834711e-06, "loss": 0.7202, "step": 13594 }, { "epoch": 0.6986843457703772, "grad_norm": 1.0639162063598633, "learning_rate": 2.1978901621834037e-06, "loss": 0.6726, "step": 13595 }, { "epoch": 0.6987357385137218, "grad_norm": 1.3543606996536255, "learning_rate": 2.1972009221669943e-06, "loss": 0.7086, "step": 13596 }, { "epoch": 0.6987871312570665, "grad_norm": 1.0584834814071655, "learning_rate": 2.1965117598045848e-06, "loss": 0.7193, "step": 13597 }, { "epoch": 0.6988385240004111, "grad_norm": 1.082047939300537, "learning_rate": 2.1958226751152613e-06, "loss": 0.6781, "step": 13598 }, { "epoch": 0.6988899167437558, "grad_norm": 1.0912306308746338, "learning_rate": 2.1951336681181214e-06, "loss": 0.665, "step": 13599 }, { "epoch": 0.6989413094871004, "grad_norm": 0.7504315376281738, "learning_rate": 2.194444738832252e-06, "loss": 0.6587, "step": 13600 }, { "epoch": 0.6989927022304451, "grad_norm": 1.216542363166809, "learning_rate": 2.1937558872767416e-06, "loss": 0.705, "step": 13601 }, { "epoch": 0.6990440949737897, "grad_norm": 0.9907384514808655, "learning_rate": 2.1930671134706727e-06, "loss": 0.6931, "step": 13602 }, { "epoch": 0.6990954877171344, "grad_norm": 1.0784937143325806, "learning_rate": 2.1923784174331323e-06, "loss": 0.7274, "step": 13603 }, { "epoch": 0.699146880460479, "grad_norm": 1.0981013774871826, "learning_rate": 2.1916897991832e-06, "loss": 0.7222, "step": 13604 }, { "epoch": 0.6991982732038237, "grad_norm": 0.7300714254379272, "learning_rate": 2.1910012587399544e-06, "loss": 0.6654, "step": 13605 }, { "epoch": 0.6992496659471683, "grad_norm": 0.6990795135498047, "learning_rate": 2.190312796122469e-06, "loss": 0.667, "step": 13606 }, { "epoch": 0.6993010586905128, "grad_norm": 1.1190791130065918, "learning_rate": 2.1896244113498237e-06, "loss": 0.6827, "step": 13607 }, { "epoch": 0.6993524514338575, "grad_norm": 1.0576037168502808, "learning_rate": 2.188936104441088e-06, "loss": 0.6927, "step": 13608 }, { "epoch": 0.6994038441772021, "grad_norm": 1.2227036952972412, "learning_rate": 2.188247875415331e-06, "loss": 0.7733, "step": 13609 }, { "epoch": 0.6994552369205468, "grad_norm": 0.752952516078949, "learning_rate": 2.1875597242916246e-06, "loss": 0.6075, "step": 13610 }, { "epoch": 0.6995066296638914, "grad_norm": 1.0708752870559692, "learning_rate": 2.186871651089029e-06, "loss": 0.7218, "step": 13611 }, { "epoch": 0.6995580224072361, "grad_norm": 1.0147924423217773, "learning_rate": 2.1861836558266127e-06, "loss": 0.7239, "step": 13612 }, { "epoch": 0.6996094151505807, "grad_norm": 1.0690596103668213, "learning_rate": 2.1854957385234347e-06, "loss": 0.6639, "step": 13613 }, { "epoch": 0.6996608078939254, "grad_norm": 1.1709914207458496, "learning_rate": 2.184807899198556e-06, "loss": 0.6993, "step": 13614 }, { "epoch": 0.69971220063727, "grad_norm": 1.0704842805862427, "learning_rate": 2.1841201378710303e-06, "loss": 0.7126, "step": 13615 }, { "epoch": 0.6997635933806147, "grad_norm": 1.31644606590271, "learning_rate": 2.183432454559917e-06, "loss": 0.6585, "step": 13616 }, { "epoch": 0.6998149861239593, "grad_norm": 1.1318049430847168, "learning_rate": 2.1827448492842675e-06, "loss": 0.7502, "step": 13617 }, { "epoch": 0.699866378867304, "grad_norm": 1.091261625289917, "learning_rate": 2.1820573220631304e-06, "loss": 0.7084, "step": 13618 }, { "epoch": 0.6999177716106486, "grad_norm": 0.7350858449935913, "learning_rate": 2.181369872915557e-06, "loss": 0.6686, "step": 13619 }, { "epoch": 0.6999691643539933, "grad_norm": 1.0864428281784058, "learning_rate": 2.180682501860593e-06, "loss": 0.7073, "step": 13620 }, { "epoch": 0.7000205570973379, "grad_norm": 1.1340830326080322, "learning_rate": 2.179995208917282e-06, "loss": 0.7126, "step": 13621 }, { "epoch": 0.7000719498406826, "grad_norm": 1.1045676469802856, "learning_rate": 2.1793079941046653e-06, "loss": 0.7106, "step": 13622 }, { "epoch": 0.7001233425840271, "grad_norm": 1.1023622751235962, "learning_rate": 2.1786208574417865e-06, "loss": 0.7351, "step": 13623 }, { "epoch": 0.7001747353273717, "grad_norm": 1.1220405101776123, "learning_rate": 2.1779337989476773e-06, "loss": 0.7003, "step": 13624 }, { "epoch": 0.7002261280707164, "grad_norm": 1.0507183074951172, "learning_rate": 2.177246818641378e-06, "loss": 0.694, "step": 13625 }, { "epoch": 0.700277520814061, "grad_norm": 1.1043332815170288, "learning_rate": 2.176559916541921e-06, "loss": 0.6942, "step": 13626 }, { "epoch": 0.7003289135574057, "grad_norm": 1.080931544303894, "learning_rate": 2.1758730926683353e-06, "loss": 0.7695, "step": 13627 }, { "epoch": 0.7003803063007503, "grad_norm": 1.1108373403549194, "learning_rate": 2.1751863470396535e-06, "loss": 0.7056, "step": 13628 }, { "epoch": 0.700431699044095, "grad_norm": 1.075114369392395, "learning_rate": 2.174499679674901e-06, "loss": 0.6637, "step": 13629 }, { "epoch": 0.7004830917874396, "grad_norm": 1.0331659317016602, "learning_rate": 2.1738130905931015e-06, "loss": 0.6879, "step": 13630 }, { "epoch": 0.7005344845307843, "grad_norm": 0.952104926109314, "learning_rate": 2.1731265798132765e-06, "loss": 0.6385, "step": 13631 }, { "epoch": 0.7005858772741289, "grad_norm": 1.091810941696167, "learning_rate": 2.172440147354452e-06, "loss": 0.7078, "step": 13632 }, { "epoch": 0.7006372700174736, "grad_norm": 1.0442560911178589, "learning_rate": 2.1717537932356387e-06, "loss": 0.7162, "step": 13633 }, { "epoch": 0.7006886627608182, "grad_norm": 1.119232177734375, "learning_rate": 2.1710675174758585e-06, "loss": 0.6665, "step": 13634 }, { "epoch": 0.7007400555041629, "grad_norm": 1.1632204055786133, "learning_rate": 2.170381320094121e-06, "loss": 0.6902, "step": 13635 }, { "epoch": 0.7007914482475075, "grad_norm": 1.0333095788955688, "learning_rate": 2.1696952011094413e-06, "loss": 0.7014, "step": 13636 }, { "epoch": 0.7008428409908521, "grad_norm": 1.061029314994812, "learning_rate": 2.169009160540828e-06, "loss": 0.7383, "step": 13637 }, { "epoch": 0.7008942337341967, "grad_norm": 1.0474401712417603, "learning_rate": 2.1683231984072866e-06, "loss": 0.741, "step": 13638 }, { "epoch": 0.7009456264775413, "grad_norm": 1.0333564281463623, "learning_rate": 2.167637314727827e-06, "loss": 0.6946, "step": 13639 }, { "epoch": 0.700997019220886, "grad_norm": 1.0708327293395996, "learning_rate": 2.1669515095214457e-06, "loss": 0.6804, "step": 13640 }, { "epoch": 0.7010484119642306, "grad_norm": 1.0710110664367676, "learning_rate": 2.1662657828071484e-06, "loss": 0.6693, "step": 13641 }, { "epoch": 0.7010998047075753, "grad_norm": 0.9644345045089722, "learning_rate": 2.1655801346039317e-06, "loss": 0.662, "step": 13642 }, { "epoch": 0.7011511974509199, "grad_norm": 1.1154215335845947, "learning_rate": 2.164894564930793e-06, "loss": 0.7623, "step": 13643 }, { "epoch": 0.7012025901942646, "grad_norm": 1.115875244140625, "learning_rate": 2.164209073806724e-06, "loss": 0.6539, "step": 13644 }, { "epoch": 0.7012539829376092, "grad_norm": 1.1323747634887695, "learning_rate": 2.1635236612507204e-06, "loss": 0.7305, "step": 13645 }, { "epoch": 0.7013053756809539, "grad_norm": 1.0729527473449707, "learning_rate": 2.162838327281771e-06, "loss": 0.6332, "step": 13646 }, { "epoch": 0.7013567684242985, "grad_norm": 1.1192591190338135, "learning_rate": 2.162153071918862e-06, "loss": 0.7029, "step": 13647 }, { "epoch": 0.7014081611676432, "grad_norm": 0.7902805805206299, "learning_rate": 2.1614678951809814e-06, "loss": 0.6678, "step": 13648 }, { "epoch": 0.7014595539109878, "grad_norm": 1.0752100944519043, "learning_rate": 2.160782797087112e-06, "loss": 0.6562, "step": 13649 }, { "epoch": 0.7015109466543324, "grad_norm": 1.044222116470337, "learning_rate": 2.1600977776562343e-06, "loss": 0.6867, "step": 13650 }, { "epoch": 0.7015623393976771, "grad_norm": 1.077748417854309, "learning_rate": 2.159412836907326e-06, "loss": 0.6493, "step": 13651 }, { "epoch": 0.7016137321410217, "grad_norm": 1.09913969039917, "learning_rate": 2.1587279748593685e-06, "loss": 0.6576, "step": 13652 }, { "epoch": 0.7016651248843663, "grad_norm": 1.2124676704406738, "learning_rate": 2.158043191531331e-06, "loss": 0.7248, "step": 13653 }, { "epoch": 0.7017165176277109, "grad_norm": 1.0583033561706543, "learning_rate": 2.1573584869421897e-06, "loss": 0.6929, "step": 13654 }, { "epoch": 0.7017679103710556, "grad_norm": 1.1056854724884033, "learning_rate": 2.156673861110914e-06, "loss": 0.7566, "step": 13655 }, { "epoch": 0.7018193031144002, "grad_norm": 1.1423996686935425, "learning_rate": 2.155989314056472e-06, "loss": 0.7329, "step": 13656 }, { "epoch": 0.7018706958577449, "grad_norm": 1.0696130990982056, "learning_rate": 2.1553048457978276e-06, "loss": 0.7174, "step": 13657 }, { "epoch": 0.7019220886010895, "grad_norm": 1.0908503532409668, "learning_rate": 2.1546204563539486e-06, "loss": 0.7047, "step": 13658 }, { "epoch": 0.7019734813444342, "grad_norm": 1.066223382949829, "learning_rate": 2.153936145743794e-06, "loss": 0.6784, "step": 13659 }, { "epoch": 0.7020248740877788, "grad_norm": 1.0545718669891357, "learning_rate": 2.153251913986322e-06, "loss": 0.7361, "step": 13660 }, { "epoch": 0.7020762668311235, "grad_norm": 0.8303635120391846, "learning_rate": 2.1525677611004964e-06, "loss": 0.6801, "step": 13661 }, { "epoch": 0.7021276595744681, "grad_norm": 0.7222477197647095, "learning_rate": 2.151883687105263e-06, "loss": 0.6509, "step": 13662 }, { "epoch": 0.7021790523178127, "grad_norm": 1.2067302465438843, "learning_rate": 2.151199692019581e-06, "loss": 0.6995, "step": 13663 }, { "epoch": 0.7022304450611574, "grad_norm": 1.164230465888977, "learning_rate": 2.1505157758623974e-06, "loss": 0.6388, "step": 13664 }, { "epoch": 0.702281837804502, "grad_norm": 1.1490386724472046, "learning_rate": 2.149831938652666e-06, "loss": 0.7215, "step": 13665 }, { "epoch": 0.7023332305478467, "grad_norm": 1.0562026500701904, "learning_rate": 2.1491481804093263e-06, "loss": 0.6535, "step": 13666 }, { "epoch": 0.7023846232911913, "grad_norm": 0.7007404565811157, "learning_rate": 2.1484645011513277e-06, "loss": 0.6503, "step": 13667 }, { "epoch": 0.7024360160345359, "grad_norm": 1.154572606086731, "learning_rate": 2.14778090089761e-06, "loss": 0.6918, "step": 13668 }, { "epoch": 0.7024874087778805, "grad_norm": 1.0800907611846924, "learning_rate": 2.1470973796671114e-06, "loss": 0.6634, "step": 13669 }, { "epoch": 0.7025388015212252, "grad_norm": 1.0401551723480225, "learning_rate": 2.146413937478772e-06, "loss": 0.6496, "step": 13670 }, { "epoch": 0.7025901942645698, "grad_norm": 1.0757418870925903, "learning_rate": 2.1457305743515278e-06, "loss": 0.7151, "step": 13671 }, { "epoch": 0.7026415870079145, "grad_norm": 1.047582745552063, "learning_rate": 2.14504729030431e-06, "loss": 0.6632, "step": 13672 }, { "epoch": 0.7026929797512591, "grad_norm": 1.1095973253250122, "learning_rate": 2.1443640853560484e-06, "loss": 0.6873, "step": 13673 }, { "epoch": 0.7027443724946038, "grad_norm": 1.0561424493789673, "learning_rate": 2.1436809595256762e-06, "loss": 0.6767, "step": 13674 }, { "epoch": 0.7027957652379484, "grad_norm": 1.0903648138046265, "learning_rate": 2.1429979128321147e-06, "loss": 0.6625, "step": 13675 }, { "epoch": 0.702847157981293, "grad_norm": 1.1231417655944824, "learning_rate": 2.142314945294293e-06, "loss": 0.6695, "step": 13676 }, { "epoch": 0.7028985507246377, "grad_norm": 0.7106789350509644, "learning_rate": 2.14163205693113e-06, "loss": 0.6061, "step": 13677 }, { "epoch": 0.7029499434679823, "grad_norm": 1.1653095483779907, "learning_rate": 2.140949247761546e-06, "loss": 0.7084, "step": 13678 }, { "epoch": 0.703001336211327, "grad_norm": 0.7626535892486572, "learning_rate": 2.1402665178044607e-06, "loss": 0.6794, "step": 13679 }, { "epoch": 0.7030527289546716, "grad_norm": 1.0738590955734253, "learning_rate": 2.139583867078788e-06, "loss": 0.7252, "step": 13680 }, { "epoch": 0.7031041216980163, "grad_norm": 1.0791456699371338, "learning_rate": 2.1389012956034446e-06, "loss": 0.6915, "step": 13681 }, { "epoch": 0.7031555144413609, "grad_norm": 1.16554856300354, "learning_rate": 2.138218803397336e-06, "loss": 0.6945, "step": 13682 }, { "epoch": 0.7032069071847055, "grad_norm": 1.117922067642212, "learning_rate": 2.1375363904793765e-06, "loss": 0.7271, "step": 13683 }, { "epoch": 0.7032582999280501, "grad_norm": 1.1187388896942139, "learning_rate": 2.136854056868471e-06, "loss": 0.6586, "step": 13684 }, { "epoch": 0.7033096926713948, "grad_norm": 1.023292064666748, "learning_rate": 2.1361718025835244e-06, "loss": 0.7324, "step": 13685 }, { "epoch": 0.7033610854147394, "grad_norm": 1.1801109313964844, "learning_rate": 2.135489627643436e-06, "loss": 0.6385, "step": 13686 }, { "epoch": 0.703412478158084, "grad_norm": 1.1244659423828125, "learning_rate": 2.1348075320671118e-06, "loss": 0.7755, "step": 13687 }, { "epoch": 0.7034638709014287, "grad_norm": 0.9937413334846497, "learning_rate": 2.1341255158734463e-06, "loss": 0.6908, "step": 13688 }, { "epoch": 0.7035152636447733, "grad_norm": 1.0457442998886108, "learning_rate": 2.1334435790813346e-06, "loss": 0.7156, "step": 13689 }, { "epoch": 0.703566656388118, "grad_norm": 1.1232115030288696, "learning_rate": 2.132761721709675e-06, "loss": 0.7111, "step": 13690 }, { "epoch": 0.7036180491314626, "grad_norm": 1.0953432321548462, "learning_rate": 2.132079943777352e-06, "loss": 0.7031, "step": 13691 }, { "epoch": 0.7036694418748073, "grad_norm": 1.060590147972107, "learning_rate": 2.1313982453032606e-06, "loss": 0.6321, "step": 13692 }, { "epoch": 0.7037208346181519, "grad_norm": 1.0582643747329712, "learning_rate": 2.1307166263062856e-06, "loss": 0.7644, "step": 13693 }, { "epoch": 0.7037722273614966, "grad_norm": 1.2292600870132446, "learning_rate": 2.1300350868053123e-06, "loss": 0.7469, "step": 13694 }, { "epoch": 0.7038236201048412, "grad_norm": 1.0825990438461304, "learning_rate": 2.129353626819221e-06, "loss": 0.6431, "step": 13695 }, { "epoch": 0.7038750128481859, "grad_norm": 1.0749919414520264, "learning_rate": 2.128672246366896e-06, "loss": 0.7363, "step": 13696 }, { "epoch": 0.7039264055915305, "grad_norm": 1.0899678468704224, "learning_rate": 2.1279909454672143e-06, "loss": 0.7532, "step": 13697 }, { "epoch": 0.7039777983348752, "grad_norm": 1.1461503505706787, "learning_rate": 2.1273097241390494e-06, "loss": 0.7137, "step": 13698 }, { "epoch": 0.7040291910782197, "grad_norm": 1.0445613861083984, "learning_rate": 2.1266285824012787e-06, "loss": 0.7256, "step": 13699 }, { "epoch": 0.7040805838215644, "grad_norm": 1.0993098020553589, "learning_rate": 2.1259475202727724e-06, "loss": 0.6933, "step": 13700 }, { "epoch": 0.704131976564909, "grad_norm": 1.067650556564331, "learning_rate": 2.1252665377724003e-06, "loss": 0.7316, "step": 13701 }, { "epoch": 0.7041833693082536, "grad_norm": 1.031410813331604, "learning_rate": 2.1245856349190262e-06, "loss": 0.7068, "step": 13702 }, { "epoch": 0.7042347620515983, "grad_norm": 1.032565951347351, "learning_rate": 2.1239048117315225e-06, "loss": 0.6464, "step": 13703 }, { "epoch": 0.7042861547949429, "grad_norm": 1.0571365356445312, "learning_rate": 2.1232240682287437e-06, "loss": 0.668, "step": 13704 }, { "epoch": 0.7043375475382876, "grad_norm": 1.1523305177688599, "learning_rate": 2.122543404429556e-06, "loss": 0.639, "step": 13705 }, { "epoch": 0.7043889402816322, "grad_norm": 1.123197317123413, "learning_rate": 2.1218628203528165e-06, "loss": 0.7134, "step": 13706 }, { "epoch": 0.7044403330249769, "grad_norm": 0.6851266622543335, "learning_rate": 2.12118231601738e-06, "loss": 0.6797, "step": 13707 }, { "epoch": 0.7044917257683215, "grad_norm": 1.1299890279769897, "learning_rate": 2.1205018914421e-06, "loss": 0.7074, "step": 13708 }, { "epoch": 0.7045431185116662, "grad_norm": 1.061153769493103, "learning_rate": 2.1198215466458317e-06, "loss": 0.6165, "step": 13709 }, { "epoch": 0.7045945112550108, "grad_norm": 1.0980801582336426, "learning_rate": 2.1191412816474215e-06, "loss": 0.7455, "step": 13710 }, { "epoch": 0.7046459039983555, "grad_norm": 1.1461031436920166, "learning_rate": 2.118461096465717e-06, "loss": 0.7646, "step": 13711 }, { "epoch": 0.7046972967417001, "grad_norm": 1.044925570487976, "learning_rate": 2.1177809911195655e-06, "loss": 0.6647, "step": 13712 }, { "epoch": 0.7047486894850448, "grad_norm": 1.2419744729995728, "learning_rate": 2.1171009656278086e-06, "loss": 0.7271, "step": 13713 }, { "epoch": 0.7048000822283893, "grad_norm": 1.0512958765029907, "learning_rate": 2.116421020009287e-06, "loss": 0.7044, "step": 13714 }, { "epoch": 0.704851474971734, "grad_norm": 1.0639828443527222, "learning_rate": 2.115741154282837e-06, "loss": 0.7075, "step": 13715 }, { "epoch": 0.7049028677150786, "grad_norm": 1.0687519311904907, "learning_rate": 2.1150613684673012e-06, "loss": 0.6832, "step": 13716 }, { "epoch": 0.7049542604584232, "grad_norm": 1.0934782028198242, "learning_rate": 2.1143816625815057e-06, "loss": 0.689, "step": 13717 }, { "epoch": 0.7050056532017679, "grad_norm": 1.0897703170776367, "learning_rate": 2.113702036644287e-06, "loss": 0.6598, "step": 13718 }, { "epoch": 0.7050570459451125, "grad_norm": 1.0416258573532104, "learning_rate": 2.1130224906744745e-06, "loss": 0.6947, "step": 13719 }, { "epoch": 0.7051084386884572, "grad_norm": 1.1381744146347046, "learning_rate": 2.1123430246908933e-06, "loss": 0.684, "step": 13720 }, { "epoch": 0.7051598314318018, "grad_norm": 1.0941307544708252, "learning_rate": 2.1116636387123717e-06, "loss": 0.7423, "step": 13721 }, { "epoch": 0.7052112241751465, "grad_norm": 1.0425844192504883, "learning_rate": 2.1109843327577312e-06, "loss": 0.6757, "step": 13722 }, { "epoch": 0.7052626169184911, "grad_norm": 1.129780650138855, "learning_rate": 2.110305106845793e-06, "loss": 0.7123, "step": 13723 }, { "epoch": 0.7053140096618358, "grad_norm": 1.0874446630477905, "learning_rate": 2.109625960995373e-06, "loss": 0.719, "step": 13724 }, { "epoch": 0.7053654024051804, "grad_norm": 0.7113963961601257, "learning_rate": 2.108946895225293e-06, "loss": 0.6682, "step": 13725 }, { "epoch": 0.7054167951485251, "grad_norm": 0.7825025320053101, "learning_rate": 2.108267909554361e-06, "loss": 0.6526, "step": 13726 }, { "epoch": 0.7054681878918697, "grad_norm": 1.0926584005355835, "learning_rate": 2.1075890040013937e-06, "loss": 0.7228, "step": 13727 }, { "epoch": 0.7055195806352144, "grad_norm": 1.1640859842300415, "learning_rate": 2.1069101785851964e-06, "loss": 0.7344, "step": 13728 }, { "epoch": 0.7055709733785589, "grad_norm": 1.1008111238479614, "learning_rate": 2.1062314333245807e-06, "loss": 0.6915, "step": 13729 }, { "epoch": 0.7056223661219035, "grad_norm": 1.0449261665344238, "learning_rate": 2.105552768238351e-06, "loss": 0.727, "step": 13730 }, { "epoch": 0.7056737588652482, "grad_norm": 1.0833659172058105, "learning_rate": 2.104874183345307e-06, "loss": 0.6768, "step": 13731 }, { "epoch": 0.7057251516085928, "grad_norm": 1.0926237106323242, "learning_rate": 2.1041956786642554e-06, "loss": 0.6703, "step": 13732 }, { "epoch": 0.7057765443519375, "grad_norm": 0.732568085193634, "learning_rate": 2.1035172542139876e-06, "loss": 0.6521, "step": 13733 }, { "epoch": 0.7058279370952821, "grad_norm": 1.1078683137893677, "learning_rate": 2.1028389100133055e-06, "loss": 0.693, "step": 13734 }, { "epoch": 0.7058793298386268, "grad_norm": 1.1113091707229614, "learning_rate": 2.102160646081002e-06, "loss": 0.6908, "step": 13735 }, { "epoch": 0.7059307225819714, "grad_norm": 1.0711385011672974, "learning_rate": 2.101482462435867e-06, "loss": 0.7083, "step": 13736 }, { "epoch": 0.7059821153253161, "grad_norm": 1.0911685228347778, "learning_rate": 2.100804359096691e-06, "loss": 0.6975, "step": 13737 }, { "epoch": 0.7060335080686607, "grad_norm": 1.0011365413665771, "learning_rate": 2.100126336082263e-06, "loss": 0.7599, "step": 13738 }, { "epoch": 0.7060849008120054, "grad_norm": 1.0789313316345215, "learning_rate": 2.099448393411367e-06, "loss": 0.6865, "step": 13739 }, { "epoch": 0.70613629355535, "grad_norm": 1.0851807594299316, "learning_rate": 2.0987705311027845e-06, "loss": 0.7197, "step": 13740 }, { "epoch": 0.7061876862986947, "grad_norm": 1.0809543132781982, "learning_rate": 2.0980927491753017e-06, "loss": 0.6783, "step": 13741 }, { "epoch": 0.7062390790420393, "grad_norm": 1.0799906253814697, "learning_rate": 2.09741504764769e-06, "loss": 0.7913, "step": 13742 }, { "epoch": 0.706290471785384, "grad_norm": 1.0651135444641113, "learning_rate": 2.096737426538731e-06, "loss": 0.6883, "step": 13743 }, { "epoch": 0.7063418645287285, "grad_norm": 0.6875742673873901, "learning_rate": 2.0960598858671942e-06, "loss": 0.6115, "step": 13744 }, { "epoch": 0.7063932572720731, "grad_norm": 1.1060433387756348, "learning_rate": 2.095382425651858e-06, "loss": 0.687, "step": 13745 }, { "epoch": 0.7064446500154178, "grad_norm": 1.0396978855133057, "learning_rate": 2.094705045911485e-06, "loss": 0.7132, "step": 13746 }, { "epoch": 0.7064960427587624, "grad_norm": 1.1153088808059692, "learning_rate": 2.0940277466648474e-06, "loss": 0.7064, "step": 13747 }, { "epoch": 0.7065474355021071, "grad_norm": 1.168765902519226, "learning_rate": 2.0933505279307083e-06, "loss": 0.7042, "step": 13748 }, { "epoch": 0.7065988282454517, "grad_norm": 1.0957200527191162, "learning_rate": 2.0926733897278294e-06, "loss": 0.6664, "step": 13749 }, { "epoch": 0.7066502209887964, "grad_norm": 0.757698118686676, "learning_rate": 2.0919963320749746e-06, "loss": 0.6588, "step": 13750 }, { "epoch": 0.706701613732141, "grad_norm": 1.0701937675476074, "learning_rate": 2.091319354990902e-06, "loss": 0.7045, "step": 13751 }, { "epoch": 0.7067530064754857, "grad_norm": 0.7378687262535095, "learning_rate": 2.0906424584943653e-06, "loss": 0.6012, "step": 13752 }, { "epoch": 0.7068043992188303, "grad_norm": 1.1651259660720825, "learning_rate": 2.0899656426041183e-06, "loss": 0.7384, "step": 13753 }, { "epoch": 0.706855791962175, "grad_norm": 1.07802152633667, "learning_rate": 2.0892889073389184e-06, "loss": 0.7122, "step": 13754 }, { "epoch": 0.7069071847055196, "grad_norm": 1.0923494100570679, "learning_rate": 2.0886122527175074e-06, "loss": 0.7109, "step": 13755 }, { "epoch": 0.7069585774488643, "grad_norm": 1.0939182043075562, "learning_rate": 2.0879356787586374e-06, "loss": 0.7248, "step": 13756 }, { "epoch": 0.7070099701922089, "grad_norm": 0.7409538626670837, "learning_rate": 2.0872591854810516e-06, "loss": 0.6616, "step": 13757 }, { "epoch": 0.7070613629355536, "grad_norm": 1.0640689134597778, "learning_rate": 2.0865827729034947e-06, "loss": 0.7159, "step": 13758 }, { "epoch": 0.7071127556788981, "grad_norm": 1.0520906448364258, "learning_rate": 2.0859064410447066e-06, "loss": 0.7389, "step": 13759 }, { "epoch": 0.7071641484222427, "grad_norm": 1.068968415260315, "learning_rate": 2.0852301899234252e-06, "loss": 0.7194, "step": 13760 }, { "epoch": 0.7072155411655874, "grad_norm": 1.1356557607650757, "learning_rate": 2.0845540195583862e-06, "loss": 0.6747, "step": 13761 }, { "epoch": 0.707266933908932, "grad_norm": 1.0984845161437988, "learning_rate": 2.0838779299683225e-06, "loss": 0.699, "step": 13762 }, { "epoch": 0.7073183266522767, "grad_norm": 1.0935633182525635, "learning_rate": 2.0832019211719696e-06, "loss": 0.6528, "step": 13763 }, { "epoch": 0.7073697193956213, "grad_norm": 1.1489708423614502, "learning_rate": 2.0825259931880537e-06, "loss": 0.7808, "step": 13764 }, { "epoch": 0.707421112138966, "grad_norm": 1.0749751329421997, "learning_rate": 2.081850146035303e-06, "loss": 0.7348, "step": 13765 }, { "epoch": 0.7074725048823106, "grad_norm": 1.0883599519729614, "learning_rate": 2.081174379732441e-06, "loss": 0.6776, "step": 13766 }, { "epoch": 0.7075238976256553, "grad_norm": 1.0754282474517822, "learning_rate": 2.080498694298195e-06, "loss": 0.7203, "step": 13767 }, { "epoch": 0.7075752903689999, "grad_norm": 1.0727996826171875, "learning_rate": 2.079823089751278e-06, "loss": 0.6779, "step": 13768 }, { "epoch": 0.7076266831123446, "grad_norm": 1.0764833688735962, "learning_rate": 2.0791475661104142e-06, "loss": 0.7063, "step": 13769 }, { "epoch": 0.7076780758556892, "grad_norm": 1.0532652139663696, "learning_rate": 2.0784721233943183e-06, "loss": 0.7222, "step": 13770 }, { "epoch": 0.7077294685990339, "grad_norm": 1.0890300273895264, "learning_rate": 2.0777967616217017e-06, "loss": 0.7131, "step": 13771 }, { "epoch": 0.7077808613423785, "grad_norm": 1.0651859045028687, "learning_rate": 2.077121480811279e-06, "loss": 0.6851, "step": 13772 }, { "epoch": 0.7078322540857231, "grad_norm": 1.0930585861206055, "learning_rate": 2.076446280981756e-06, "loss": 0.706, "step": 13773 }, { "epoch": 0.7078836468290677, "grad_norm": 0.7050151824951172, "learning_rate": 2.0757711621518458e-06, "loss": 0.6378, "step": 13774 }, { "epoch": 0.7079350395724123, "grad_norm": 1.042059302330017, "learning_rate": 2.0750961243402453e-06, "loss": 0.6709, "step": 13775 }, { "epoch": 0.707986432315757, "grad_norm": 0.9877983927726746, "learning_rate": 2.074421167565663e-06, "loss": 0.6792, "step": 13776 }, { "epoch": 0.7080378250591016, "grad_norm": 1.1301740407943726, "learning_rate": 2.0737462918467967e-06, "loss": 0.7207, "step": 13777 }, { "epoch": 0.7080892178024463, "grad_norm": 1.0695161819458008, "learning_rate": 2.0730714972023447e-06, "loss": 0.6362, "step": 13778 }, { "epoch": 0.7081406105457909, "grad_norm": 1.102811336517334, "learning_rate": 2.0723967836510004e-06, "loss": 0.7513, "step": 13779 }, { "epoch": 0.7081920032891356, "grad_norm": 1.2144571542739868, "learning_rate": 2.0717221512114626e-06, "loss": 0.7055, "step": 13780 }, { "epoch": 0.7082433960324802, "grad_norm": 1.067043423652649, "learning_rate": 2.0710475999024193e-06, "loss": 0.713, "step": 13781 }, { "epoch": 0.7082947887758249, "grad_norm": 1.0727665424346924, "learning_rate": 2.0703731297425577e-06, "loss": 0.7316, "step": 13782 }, { "epoch": 0.7083461815191695, "grad_norm": 1.0858210325241089, "learning_rate": 2.0696987407505707e-06, "loss": 0.6639, "step": 13783 }, { "epoch": 0.7083975742625142, "grad_norm": 1.067585825920105, "learning_rate": 2.069024432945136e-06, "loss": 0.7528, "step": 13784 }, { "epoch": 0.7084489670058588, "grad_norm": 1.07047700881958, "learning_rate": 2.06835020634494e-06, "loss": 0.7175, "step": 13785 }, { "epoch": 0.7085003597492034, "grad_norm": 1.1306891441345215, "learning_rate": 2.0676760609686615e-06, "loss": 0.6858, "step": 13786 }, { "epoch": 0.7085517524925481, "grad_norm": 1.1133527755737305, "learning_rate": 2.0670019968349787e-06, "loss": 0.7053, "step": 13787 }, { "epoch": 0.7086031452358927, "grad_norm": 1.0710773468017578, "learning_rate": 2.0663280139625647e-06, "loss": 0.7222, "step": 13788 }, { "epoch": 0.7086545379792374, "grad_norm": 0.791092574596405, "learning_rate": 2.065654112370097e-06, "loss": 0.6752, "step": 13789 }, { "epoch": 0.7087059307225819, "grad_norm": 1.046260118484497, "learning_rate": 2.064980292076244e-06, "loss": 0.7061, "step": 13790 }, { "epoch": 0.7087573234659266, "grad_norm": 1.0017520189285278, "learning_rate": 2.0643065530996736e-06, "loss": 0.6519, "step": 13791 }, { "epoch": 0.7088087162092712, "grad_norm": 1.0316901206970215, "learning_rate": 2.0636328954590555e-06, "loss": 0.6939, "step": 13792 }, { "epoch": 0.7088601089526159, "grad_norm": 1.106510043144226, "learning_rate": 2.0629593191730525e-06, "loss": 0.7262, "step": 13793 }, { "epoch": 0.7089115016959605, "grad_norm": 1.1042814254760742, "learning_rate": 2.0622858242603254e-06, "loss": 0.7186, "step": 13794 }, { "epoch": 0.7089628944393052, "grad_norm": 1.098565697669983, "learning_rate": 2.061612410739534e-06, "loss": 0.6607, "step": 13795 }, { "epoch": 0.7090142871826498, "grad_norm": 1.1267523765563965, "learning_rate": 2.0609390786293397e-06, "loss": 0.7097, "step": 13796 }, { "epoch": 0.7090656799259945, "grad_norm": 1.0906375646591187, "learning_rate": 2.0602658279483917e-06, "loss": 0.6841, "step": 13797 }, { "epoch": 0.7091170726693391, "grad_norm": 1.144810676574707, "learning_rate": 2.0595926587153477e-06, "loss": 0.6916, "step": 13798 }, { "epoch": 0.7091684654126837, "grad_norm": 1.0772943496704102, "learning_rate": 2.0589195709488564e-06, "loss": 0.7012, "step": 13799 }, { "epoch": 0.7092198581560284, "grad_norm": 1.0579253435134888, "learning_rate": 2.058246564667565e-06, "loss": 0.661, "step": 13800 }, { "epoch": 0.709271250899373, "grad_norm": 1.1185640096664429, "learning_rate": 2.057573639890123e-06, "loss": 0.6695, "step": 13801 }, { "epoch": 0.7093226436427177, "grad_norm": 0.7512776851654053, "learning_rate": 2.0569007966351727e-06, "loss": 0.6546, "step": 13802 }, { "epoch": 0.7093740363860623, "grad_norm": 1.1023831367492676, "learning_rate": 2.0562280349213564e-06, "loss": 0.7313, "step": 13803 }, { "epoch": 0.709425429129407, "grad_norm": 1.3334959745407104, "learning_rate": 2.0555553547673114e-06, "loss": 0.6624, "step": 13804 }, { "epoch": 0.7094768218727515, "grad_norm": 1.0482275485992432, "learning_rate": 2.0548827561916778e-06, "loss": 0.7459, "step": 13805 }, { "epoch": 0.7095282146160962, "grad_norm": 1.027115821838379, "learning_rate": 2.054210239213089e-06, "loss": 0.693, "step": 13806 }, { "epoch": 0.7095796073594408, "grad_norm": 1.113142967224121, "learning_rate": 2.0535378038501783e-06, "loss": 0.7185, "step": 13807 }, { "epoch": 0.7096310001027855, "grad_norm": 1.131225347518921, "learning_rate": 2.052865450121574e-06, "loss": 0.7162, "step": 13808 }, { "epoch": 0.7096823928461301, "grad_norm": 1.0713504552841187, "learning_rate": 2.0521931780459075e-06, "loss": 0.7023, "step": 13809 }, { "epoch": 0.7097337855894748, "grad_norm": 1.00968337059021, "learning_rate": 2.0515209876418034e-06, "loss": 0.6934, "step": 13810 }, { "epoch": 0.7097851783328194, "grad_norm": 0.7862069606781006, "learning_rate": 2.050848878927885e-06, "loss": 0.6301, "step": 13811 }, { "epoch": 0.709836571076164, "grad_norm": 1.2094237804412842, "learning_rate": 2.0501768519227736e-06, "loss": 0.702, "step": 13812 }, { "epoch": 0.7098879638195087, "grad_norm": 1.0520743131637573, "learning_rate": 2.0495049066450874e-06, "loss": 0.7487, "step": 13813 }, { "epoch": 0.7099393565628533, "grad_norm": 1.0909069776535034, "learning_rate": 2.048833043113445e-06, "loss": 0.6838, "step": 13814 }, { "epoch": 0.709990749306198, "grad_norm": 1.0861800909042358, "learning_rate": 2.0481612613464607e-06, "loss": 0.6901, "step": 13815 }, { "epoch": 0.7100421420495426, "grad_norm": 1.060874581336975, "learning_rate": 2.0474895613627465e-06, "loss": 0.7049, "step": 13816 }, { "epoch": 0.7100935347928873, "grad_norm": 1.1056462526321411, "learning_rate": 2.0468179431809106e-06, "loss": 0.7195, "step": 13817 }, { "epoch": 0.7101449275362319, "grad_norm": 0.744979202747345, "learning_rate": 2.046146406819565e-06, "loss": 0.6323, "step": 13818 }, { "epoch": 0.7101963202795766, "grad_norm": 1.1755642890930176, "learning_rate": 2.0454749522973098e-06, "loss": 0.6866, "step": 13819 }, { "epoch": 0.7102477130229211, "grad_norm": 1.148440957069397, "learning_rate": 2.044803579632753e-06, "loss": 0.6843, "step": 13820 }, { "epoch": 0.7102991057662658, "grad_norm": 1.0315439701080322, "learning_rate": 2.0441322888444913e-06, "loss": 0.6905, "step": 13821 }, { "epoch": 0.7103504985096104, "grad_norm": 0.800396203994751, "learning_rate": 2.0434610799511286e-06, "loss": 0.6693, "step": 13822 }, { "epoch": 0.710401891252955, "grad_norm": 1.1546156406402588, "learning_rate": 2.0427899529712577e-06, "loss": 0.7169, "step": 13823 }, { "epoch": 0.7104532839962997, "grad_norm": 1.1220064163208008, "learning_rate": 2.0421189079234717e-06, "loss": 0.7465, "step": 13824 }, { "epoch": 0.7105046767396443, "grad_norm": 1.1018762588500977, "learning_rate": 2.0414479448263674e-06, "loss": 0.6829, "step": 13825 }, { "epoch": 0.710556069482989, "grad_norm": 1.1131882667541504, "learning_rate": 2.0407770636985285e-06, "loss": 0.7004, "step": 13826 }, { "epoch": 0.7106074622263336, "grad_norm": 1.0459210872650146, "learning_rate": 2.0401062645585467e-06, "loss": 0.6362, "step": 13827 }, { "epoch": 0.7106588549696783, "grad_norm": 1.0698922872543335, "learning_rate": 2.0394355474250053e-06, "loss": 0.6971, "step": 13828 }, { "epoch": 0.7107102477130229, "grad_norm": 1.2453351020812988, "learning_rate": 2.0387649123164876e-06, "loss": 0.674, "step": 13829 }, { "epoch": 0.7107616404563676, "grad_norm": 1.2080566883087158, "learning_rate": 2.038094359251572e-06, "loss": 0.7013, "step": 13830 }, { "epoch": 0.7108130331997122, "grad_norm": 1.1145142316818237, "learning_rate": 2.0374238882488414e-06, "loss": 0.6904, "step": 13831 }, { "epoch": 0.7108644259430569, "grad_norm": 0.7786082029342651, "learning_rate": 2.036753499326868e-06, "loss": 0.652, "step": 13832 }, { "epoch": 0.7109158186864015, "grad_norm": 1.110437273979187, "learning_rate": 2.0360831925042246e-06, "loss": 0.7317, "step": 13833 }, { "epoch": 0.7109672114297462, "grad_norm": 1.044748306274414, "learning_rate": 2.0354129677994883e-06, "loss": 0.6938, "step": 13834 }, { "epoch": 0.7110186041730907, "grad_norm": 1.1183432340621948, "learning_rate": 2.0347428252312214e-06, "loss": 0.6841, "step": 13835 }, { "epoch": 0.7110699969164354, "grad_norm": 1.1269700527191162, "learning_rate": 2.0340727648179954e-06, "loss": 0.7813, "step": 13836 }, { "epoch": 0.71112138965978, "grad_norm": 0.8584217429161072, "learning_rate": 2.033402786578372e-06, "loss": 0.6511, "step": 13837 }, { "epoch": 0.7111727824031246, "grad_norm": 1.0577318668365479, "learning_rate": 2.032732890530918e-06, "loss": 0.6555, "step": 13838 }, { "epoch": 0.7112241751464693, "grad_norm": 0.7752557992935181, "learning_rate": 2.0320630766941866e-06, "loss": 0.6371, "step": 13839 }, { "epoch": 0.7112755678898139, "grad_norm": 1.0737941265106201, "learning_rate": 2.0313933450867417e-06, "loss": 0.6728, "step": 13840 }, { "epoch": 0.7113269606331586, "grad_norm": 1.0756685733795166, "learning_rate": 2.030723695727137e-06, "loss": 0.6498, "step": 13841 }, { "epoch": 0.7113783533765032, "grad_norm": 1.1216992139816284, "learning_rate": 2.0300541286339227e-06, "loss": 0.6904, "step": 13842 }, { "epoch": 0.7114297461198479, "grad_norm": 0.7423892021179199, "learning_rate": 2.029384643825654e-06, "loss": 0.6925, "step": 13843 }, { "epoch": 0.7114811388631925, "grad_norm": 1.0447964668273926, "learning_rate": 2.0287152413208784e-06, "loss": 0.7417, "step": 13844 }, { "epoch": 0.7115325316065372, "grad_norm": 1.0752873420715332, "learning_rate": 2.028045921138141e-06, "loss": 0.679, "step": 13845 }, { "epoch": 0.7115839243498818, "grad_norm": 1.0866988897323608, "learning_rate": 2.027376683295985e-06, "loss": 0.6897, "step": 13846 }, { "epoch": 0.7116353170932265, "grad_norm": 1.0621236562728882, "learning_rate": 2.026707527812958e-06, "loss": 0.7151, "step": 13847 }, { "epoch": 0.7116867098365711, "grad_norm": 1.0988514423370361, "learning_rate": 2.026038454707591e-06, "loss": 0.7421, "step": 13848 }, { "epoch": 0.7117381025799158, "grad_norm": 1.0312613248825073, "learning_rate": 2.025369463998428e-06, "loss": 0.689, "step": 13849 }, { "epoch": 0.7117894953232603, "grad_norm": 1.0904605388641357, "learning_rate": 2.0247005557040007e-06, "loss": 0.7156, "step": 13850 }, { "epoch": 0.711840888066605, "grad_norm": 1.1624822616577148, "learning_rate": 2.0240317298428412e-06, "loss": 0.7459, "step": 13851 }, { "epoch": 0.7118922808099496, "grad_norm": 1.103622317314148, "learning_rate": 2.0233629864334837e-06, "loss": 0.7406, "step": 13852 }, { "epoch": 0.7119436735532942, "grad_norm": 1.1310709714889526, "learning_rate": 2.0226943254944536e-06, "loss": 0.7409, "step": 13853 }, { "epoch": 0.7119950662966389, "grad_norm": 1.1085220575332642, "learning_rate": 2.022025747044277e-06, "loss": 0.6887, "step": 13854 }, { "epoch": 0.7120464590399835, "grad_norm": 1.0527399778366089, "learning_rate": 2.021357251101476e-06, "loss": 0.6504, "step": 13855 }, { "epoch": 0.7120978517833282, "grad_norm": 1.1091697216033936, "learning_rate": 2.020688837684576e-06, "loss": 0.7078, "step": 13856 }, { "epoch": 0.7121492445266728, "grad_norm": 0.7286615371704102, "learning_rate": 2.020020506812092e-06, "loss": 0.6149, "step": 13857 }, { "epoch": 0.7122006372700175, "grad_norm": 1.0779544115066528, "learning_rate": 2.019352258502543e-06, "loss": 0.6673, "step": 13858 }, { "epoch": 0.7122520300133621, "grad_norm": 0.7192875742912292, "learning_rate": 2.0186840927744407e-06, "loss": 0.6374, "step": 13859 }, { "epoch": 0.7123034227567068, "grad_norm": 1.079571008682251, "learning_rate": 2.0180160096463005e-06, "loss": 0.6542, "step": 13860 }, { "epoch": 0.7123548155000514, "grad_norm": 1.1336817741394043, "learning_rate": 2.0173480091366304e-06, "loss": 0.6861, "step": 13861 }, { "epoch": 0.7124062082433961, "grad_norm": 1.1800259351730347, "learning_rate": 2.016680091263939e-06, "loss": 0.7058, "step": 13862 }, { "epoch": 0.7124576009867407, "grad_norm": 1.0751947164535522, "learning_rate": 2.0160122560467306e-06, "loss": 0.7187, "step": 13863 }, { "epoch": 0.7125089937300854, "grad_norm": 1.0949985980987549, "learning_rate": 2.015344503503506e-06, "loss": 0.6907, "step": 13864 }, { "epoch": 0.7125603864734299, "grad_norm": 1.0753933191299438, "learning_rate": 2.014676833652771e-06, "loss": 0.7159, "step": 13865 }, { "epoch": 0.7126117792167745, "grad_norm": 0.7183886170387268, "learning_rate": 2.014009246513019e-06, "loss": 0.6559, "step": 13866 }, { "epoch": 0.7126631719601192, "grad_norm": 1.0763211250305176, "learning_rate": 2.0133417421027517e-06, "loss": 0.7202, "step": 13867 }, { "epoch": 0.7127145647034638, "grad_norm": 1.0732475519180298, "learning_rate": 2.0126743204404565e-06, "loss": 0.6899, "step": 13868 }, { "epoch": 0.7127659574468085, "grad_norm": 1.0360926389694214, "learning_rate": 2.0120069815446293e-06, "loss": 0.6815, "step": 13869 }, { "epoch": 0.7128173501901531, "grad_norm": 0.6912949085235596, "learning_rate": 2.011339725433758e-06, "loss": 0.6544, "step": 13870 }, { "epoch": 0.7128687429334978, "grad_norm": 1.1039221286773682, "learning_rate": 2.010672552126329e-06, "loss": 0.7274, "step": 13871 }, { "epoch": 0.7129201356768424, "grad_norm": 1.1151742935180664, "learning_rate": 2.0100054616408266e-06, "loss": 0.7279, "step": 13872 }, { "epoch": 0.7129715284201871, "grad_norm": 1.0857514142990112, "learning_rate": 2.009338453995735e-06, "loss": 0.6484, "step": 13873 }, { "epoch": 0.7130229211635317, "grad_norm": 0.9999823570251465, "learning_rate": 2.0086715292095334e-06, "loss": 0.72, "step": 13874 }, { "epoch": 0.7130743139068764, "grad_norm": 0.7725629210472107, "learning_rate": 2.008004687300697e-06, "loss": 0.685, "step": 13875 }, { "epoch": 0.713125706650221, "grad_norm": 1.081921935081482, "learning_rate": 2.0073379282877076e-06, "loss": 0.6604, "step": 13876 }, { "epoch": 0.7131770993935657, "grad_norm": 1.0342121124267578, "learning_rate": 2.0066712521890303e-06, "loss": 0.6824, "step": 13877 }, { "epoch": 0.7132284921369103, "grad_norm": 1.1725395917892456, "learning_rate": 2.0060046590231414e-06, "loss": 0.7204, "step": 13878 }, { "epoch": 0.713279884880255, "grad_norm": 1.1094027757644653, "learning_rate": 2.005338148808508e-06, "loss": 0.7062, "step": 13879 }, { "epoch": 0.7133312776235996, "grad_norm": 1.149617075920105, "learning_rate": 2.004671721563597e-06, "loss": 0.757, "step": 13880 }, { "epoch": 0.7133826703669441, "grad_norm": 1.102684736251831, "learning_rate": 2.004005377306869e-06, "loss": 0.731, "step": 13881 }, { "epoch": 0.7134340631102888, "grad_norm": 1.0622472763061523, "learning_rate": 2.0033391160567906e-06, "loss": 0.6775, "step": 13882 }, { "epoch": 0.7134854558536334, "grad_norm": 0.7642338871955872, "learning_rate": 2.0026729378318184e-06, "loss": 0.6592, "step": 13883 }, { "epoch": 0.7135368485969781, "grad_norm": 1.162483811378479, "learning_rate": 2.002006842650408e-06, "loss": 0.6824, "step": 13884 }, { "epoch": 0.7135882413403227, "grad_norm": 1.0743509531021118, "learning_rate": 2.0013408305310184e-06, "loss": 0.6999, "step": 13885 }, { "epoch": 0.7136396340836674, "grad_norm": 1.0861505270004272, "learning_rate": 2.000674901492099e-06, "loss": 0.6917, "step": 13886 }, { "epoch": 0.713691026827012, "grad_norm": 1.0744562149047852, "learning_rate": 2.0000090555521006e-06, "loss": 0.715, "step": 13887 }, { "epoch": 0.7137424195703567, "grad_norm": 1.0702255964279175, "learning_rate": 1.99934329272947e-06, "loss": 0.7236, "step": 13888 }, { "epoch": 0.7137938123137013, "grad_norm": 1.0426336526870728, "learning_rate": 1.9986776130426566e-06, "loss": 0.7253, "step": 13889 }, { "epoch": 0.713845205057046, "grad_norm": 1.0441347360610962, "learning_rate": 1.9980120165100974e-06, "loss": 0.6385, "step": 13890 }, { "epoch": 0.7138965978003906, "grad_norm": 0.9434767961502075, "learning_rate": 1.997346503150239e-06, "loss": 0.6494, "step": 13891 }, { "epoch": 0.7139479905437353, "grad_norm": 1.1863094568252563, "learning_rate": 1.9966810729815175e-06, "loss": 0.7334, "step": 13892 }, { "epoch": 0.7139993832870799, "grad_norm": 1.0655384063720703, "learning_rate": 1.9960157260223673e-06, "loss": 0.7167, "step": 13893 }, { "epoch": 0.7140507760304245, "grad_norm": 0.6862566471099854, "learning_rate": 1.995350462291227e-06, "loss": 0.6332, "step": 13894 }, { "epoch": 0.7141021687737692, "grad_norm": 1.0659171342849731, "learning_rate": 1.9946852818065253e-06, "loss": 0.6854, "step": 13895 }, { "epoch": 0.7141535615171137, "grad_norm": 1.0813877582550049, "learning_rate": 1.9940201845866925e-06, "loss": 0.6685, "step": 13896 }, { "epoch": 0.7142049542604584, "grad_norm": 1.033320426940918, "learning_rate": 1.9933551706501537e-06, "loss": 0.6956, "step": 13897 }, { "epoch": 0.714256347003803, "grad_norm": 0.7505856156349182, "learning_rate": 1.9926902400153374e-06, "loss": 0.6595, "step": 13898 }, { "epoch": 0.7143077397471477, "grad_norm": 1.1093891859054565, "learning_rate": 1.992025392700664e-06, "loss": 0.7293, "step": 13899 }, { "epoch": 0.7143591324904923, "grad_norm": 1.073056936264038, "learning_rate": 1.9913606287245537e-06, "loss": 0.6624, "step": 13900 }, { "epoch": 0.714410525233837, "grad_norm": 1.0899986028671265, "learning_rate": 1.990695948105423e-06, "loss": 0.7161, "step": 13901 }, { "epoch": 0.7144619179771816, "grad_norm": 0.7137531042098999, "learning_rate": 1.9900313508616904e-06, "loss": 0.6422, "step": 13902 }, { "epoch": 0.7145133107205263, "grad_norm": 1.0512052774429321, "learning_rate": 1.9893668370117677e-06, "loss": 0.6713, "step": 13903 }, { "epoch": 0.7145647034638709, "grad_norm": 0.7321227192878723, "learning_rate": 1.9887024065740657e-06, "loss": 0.6648, "step": 13904 }, { "epoch": 0.7146160962072156, "grad_norm": 1.0789406299591064, "learning_rate": 1.988038059566993e-06, "loss": 0.7391, "step": 13905 }, { "epoch": 0.7146674889505602, "grad_norm": 0.730172872543335, "learning_rate": 1.9873737960089545e-06, "loss": 0.6705, "step": 13906 }, { "epoch": 0.7147188816939049, "grad_norm": 1.0356365442276, "learning_rate": 1.9867096159183576e-06, "loss": 0.6734, "step": 13907 }, { "epoch": 0.7147702744372495, "grad_norm": 1.6314005851745605, "learning_rate": 1.986045519313602e-06, "loss": 0.7727, "step": 13908 }, { "epoch": 0.7148216671805941, "grad_norm": 1.0658164024353027, "learning_rate": 1.985381506213087e-06, "loss": 0.7016, "step": 13909 }, { "epoch": 0.7148730599239388, "grad_norm": 1.1861186027526855, "learning_rate": 1.9847175766352083e-06, "loss": 0.7104, "step": 13910 }, { "epoch": 0.7149244526672833, "grad_norm": 1.0932592153549194, "learning_rate": 1.9840537305983643e-06, "loss": 0.7547, "step": 13911 }, { "epoch": 0.714975845410628, "grad_norm": 0.8178917765617371, "learning_rate": 1.9833899681209444e-06, "loss": 0.6544, "step": 13912 }, { "epoch": 0.7150272381539726, "grad_norm": 0.7472593784332275, "learning_rate": 1.9827262892213408e-06, "loss": 0.6445, "step": 13913 }, { "epoch": 0.7150786308973173, "grad_norm": 0.9168313145637512, "learning_rate": 1.9820626939179378e-06, "loss": 0.6383, "step": 13914 }, { "epoch": 0.7151300236406619, "grad_norm": 1.0839271545410156, "learning_rate": 1.981399182229125e-06, "loss": 0.7092, "step": 13915 }, { "epoch": 0.7151814163840066, "grad_norm": 1.069693684577942, "learning_rate": 1.980735754173284e-06, "loss": 0.6845, "step": 13916 }, { "epoch": 0.7152328091273512, "grad_norm": 0.9683200716972351, "learning_rate": 1.980072409768794e-06, "loss": 0.6869, "step": 13917 }, { "epoch": 0.7152842018706959, "grad_norm": 0.9229991436004639, "learning_rate": 1.9794091490340385e-06, "loss": 0.6556, "step": 13918 }, { "epoch": 0.7153355946140405, "grad_norm": 1.0928629636764526, "learning_rate": 1.9787459719873857e-06, "loss": 0.6958, "step": 13919 }, { "epoch": 0.7153869873573852, "grad_norm": 1.0500705242156982, "learning_rate": 1.9780828786472167e-06, "loss": 0.6618, "step": 13920 }, { "epoch": 0.7154383801007298, "grad_norm": 1.1658707857131958, "learning_rate": 1.9774198690319004e-06, "loss": 0.7347, "step": 13921 }, { "epoch": 0.7154897728440744, "grad_norm": 1.0598798990249634, "learning_rate": 1.976756943159806e-06, "loss": 0.6932, "step": 13922 }, { "epoch": 0.7155411655874191, "grad_norm": 1.0335803031921387, "learning_rate": 1.9760941010492983e-06, "loss": 0.694, "step": 13923 }, { "epoch": 0.7155925583307637, "grad_norm": 1.1211134195327759, "learning_rate": 1.9754313427187466e-06, "loss": 0.6964, "step": 13924 }, { "epoch": 0.7156439510741084, "grad_norm": 1.3624458312988281, "learning_rate": 1.9747686681865107e-06, "loss": 0.6904, "step": 13925 }, { "epoch": 0.7156953438174529, "grad_norm": 1.302241325378418, "learning_rate": 1.974106077470949e-06, "loss": 0.6994, "step": 13926 }, { "epoch": 0.7157467365607976, "grad_norm": 1.0483657121658325, "learning_rate": 1.973443570590423e-06, "loss": 0.672, "step": 13927 }, { "epoch": 0.7157981293041422, "grad_norm": 1.1144992113113403, "learning_rate": 1.9727811475632836e-06, "loss": 0.7257, "step": 13928 }, { "epoch": 0.7158495220474869, "grad_norm": 1.0696719884872437, "learning_rate": 1.972118808407887e-06, "loss": 0.7456, "step": 13929 }, { "epoch": 0.7159009147908315, "grad_norm": 1.0393975973129272, "learning_rate": 1.971456553142582e-06, "loss": 0.6634, "step": 13930 }, { "epoch": 0.7159523075341762, "grad_norm": 1.0471985340118408, "learning_rate": 1.9707943817857206e-06, "loss": 0.6918, "step": 13931 }, { "epoch": 0.7160037002775208, "grad_norm": 1.1355485916137695, "learning_rate": 1.970132294355643e-06, "loss": 0.7217, "step": 13932 }, { "epoch": 0.7160550930208655, "grad_norm": 0.7269394993782043, "learning_rate": 1.969470290870697e-06, "loss": 0.6349, "step": 13933 }, { "epoch": 0.7161064857642101, "grad_norm": 1.1481826305389404, "learning_rate": 1.968808371349224e-06, "loss": 0.6968, "step": 13934 }, { "epoch": 0.7161578785075547, "grad_norm": 0.8469072580337524, "learning_rate": 1.9681465358095596e-06, "loss": 0.6801, "step": 13935 }, { "epoch": 0.7162092712508994, "grad_norm": 1.08064603805542, "learning_rate": 1.967484784270045e-06, "loss": 0.6688, "step": 13936 }, { "epoch": 0.716260663994244, "grad_norm": 0.706304132938385, "learning_rate": 1.9668231167490126e-06, "loss": 0.6405, "step": 13937 }, { "epoch": 0.7163120567375887, "grad_norm": 1.154129147529602, "learning_rate": 1.9661615332647944e-06, "loss": 0.6728, "step": 13938 }, { "epoch": 0.7163634494809333, "grad_norm": 0.9919379949569702, "learning_rate": 1.9655000338357184e-06, "loss": 0.6142, "step": 13939 }, { "epoch": 0.716414842224278, "grad_norm": 1.1220649480819702, "learning_rate": 1.9648386184801173e-06, "loss": 0.7197, "step": 13940 }, { "epoch": 0.7164662349676225, "grad_norm": 1.0807689428329468, "learning_rate": 1.964177287216309e-06, "loss": 0.7458, "step": 13941 }, { "epoch": 0.7165176277109672, "grad_norm": 1.0619655847549438, "learning_rate": 1.9635160400626217e-06, "loss": 0.6963, "step": 13942 }, { "epoch": 0.7165690204543118, "grad_norm": 1.0736488103866577, "learning_rate": 1.9628548770373745e-06, "loss": 0.7633, "step": 13943 }, { "epoch": 0.7166204131976565, "grad_norm": 1.0870729684829712, "learning_rate": 1.9621937981588832e-06, "loss": 0.7087, "step": 13944 }, { "epoch": 0.7166718059410011, "grad_norm": 0.7145068049430847, "learning_rate": 1.961532803445467e-06, "loss": 0.688, "step": 13945 }, { "epoch": 0.7167231986843458, "grad_norm": 1.0990524291992188, "learning_rate": 1.960871892915437e-06, "loss": 0.7296, "step": 13946 }, { "epoch": 0.7167745914276904, "grad_norm": 1.1086374521255493, "learning_rate": 1.9602110665871078e-06, "loss": 0.7141, "step": 13947 }, { "epoch": 0.716825984171035, "grad_norm": 1.03158438205719, "learning_rate": 1.9595503244787816e-06, "loss": 0.6675, "step": 13948 }, { "epoch": 0.7168773769143797, "grad_norm": 1.1512248516082764, "learning_rate": 1.9588896666087715e-06, "loss": 0.6856, "step": 13949 }, { "epoch": 0.7169287696577243, "grad_norm": 0.7473583221435547, "learning_rate": 1.958229092995378e-06, "loss": 0.675, "step": 13950 }, { "epoch": 0.716980162401069, "grad_norm": 1.0900137424468994, "learning_rate": 1.957568603656904e-06, "loss": 0.7451, "step": 13951 }, { "epoch": 0.7170315551444136, "grad_norm": 0.8128623962402344, "learning_rate": 1.956908198611647e-06, "loss": 0.6789, "step": 13952 }, { "epoch": 0.7170829478877583, "grad_norm": 1.0715359449386597, "learning_rate": 1.9562478778779072e-06, "loss": 0.7375, "step": 13953 }, { "epoch": 0.7171343406311029, "grad_norm": 1.1331875324249268, "learning_rate": 1.9555876414739775e-06, "loss": 0.7885, "step": 13954 }, { "epoch": 0.7171857333744476, "grad_norm": 1.055611491203308, "learning_rate": 1.954927489418151e-06, "loss": 0.7203, "step": 13955 }, { "epoch": 0.7172371261177922, "grad_norm": 1.1216692924499512, "learning_rate": 1.9542674217287185e-06, "loss": 0.7524, "step": 13956 }, { "epoch": 0.7172885188611368, "grad_norm": 1.0702455043792725, "learning_rate": 1.953607438423964e-06, "loss": 0.7292, "step": 13957 }, { "epoch": 0.7173399116044814, "grad_norm": 1.0641452074050903, "learning_rate": 1.9529475395221773e-06, "loss": 0.6756, "step": 13958 }, { "epoch": 0.717391304347826, "grad_norm": 1.0655573606491089, "learning_rate": 1.95228772504164e-06, "loss": 0.7202, "step": 13959 }, { "epoch": 0.7174426970911707, "grad_norm": 1.077936053276062, "learning_rate": 1.951627995000633e-06, "loss": 0.7355, "step": 13960 }, { "epoch": 0.7174940898345153, "grad_norm": 0.7696951031684875, "learning_rate": 1.9509683494174326e-06, "loss": 0.6018, "step": 13961 }, { "epoch": 0.71754548257786, "grad_norm": 1.0416902303695679, "learning_rate": 1.9503087883103176e-06, "loss": 0.7373, "step": 13962 }, { "epoch": 0.7175968753212046, "grad_norm": 1.1297434568405151, "learning_rate": 1.949649311697562e-06, "loss": 0.7428, "step": 13963 }, { "epoch": 0.7176482680645493, "grad_norm": 1.2995857000350952, "learning_rate": 1.948989919597435e-06, "loss": 0.7006, "step": 13964 }, { "epoch": 0.7176996608078939, "grad_norm": 1.142939805984497, "learning_rate": 1.9483306120282046e-06, "loss": 0.7303, "step": 13965 }, { "epoch": 0.7177510535512386, "grad_norm": 1.0437626838684082, "learning_rate": 1.9476713890081416e-06, "loss": 0.6739, "step": 13966 }, { "epoch": 0.7178024462945832, "grad_norm": 1.120582103729248, "learning_rate": 1.9470122505555083e-06, "loss": 0.6981, "step": 13967 }, { "epoch": 0.7178538390379279, "grad_norm": 1.157538890838623, "learning_rate": 1.946353196688565e-06, "loss": 0.6581, "step": 13968 }, { "epoch": 0.7179052317812725, "grad_norm": 1.0822207927703857, "learning_rate": 1.9456942274255762e-06, "loss": 0.7081, "step": 13969 }, { "epoch": 0.7179566245246172, "grad_norm": 0.7534598708152771, "learning_rate": 1.9450353427847922e-06, "loss": 0.6442, "step": 13970 }, { "epoch": 0.7180080172679618, "grad_norm": 1.0986030101776123, "learning_rate": 1.9443765427844743e-06, "loss": 0.7262, "step": 13971 }, { "epoch": 0.7180594100113064, "grad_norm": 1.1220083236694336, "learning_rate": 1.943717827442872e-06, "loss": 0.6488, "step": 13972 }, { "epoch": 0.718110802754651, "grad_norm": 1.099743366241455, "learning_rate": 1.9430591967782364e-06, "loss": 0.6651, "step": 13973 }, { "epoch": 0.7181621954979956, "grad_norm": 1.0417327880859375, "learning_rate": 1.9424006508088134e-06, "loss": 0.7712, "step": 13974 }, { "epoch": 0.7182135882413403, "grad_norm": 1.0913273096084595, "learning_rate": 1.941742189552852e-06, "loss": 0.7461, "step": 13975 }, { "epoch": 0.7182649809846849, "grad_norm": 1.1196143627166748, "learning_rate": 1.9410838130285937e-06, "loss": 0.6778, "step": 13976 }, { "epoch": 0.7183163737280296, "grad_norm": 1.1071196794509888, "learning_rate": 1.9404255212542783e-06, "loss": 0.7394, "step": 13977 }, { "epoch": 0.7183677664713742, "grad_norm": 1.0539600849151611, "learning_rate": 1.939767314248147e-06, "loss": 0.6849, "step": 13978 }, { "epoch": 0.7184191592147189, "grad_norm": 1.1147326231002808, "learning_rate": 1.9391091920284346e-06, "loss": 0.7075, "step": 13979 }, { "epoch": 0.7184705519580635, "grad_norm": 1.140195369720459, "learning_rate": 1.938451154613375e-06, "loss": 0.7137, "step": 13980 }, { "epoch": 0.7185219447014082, "grad_norm": 1.1057136058807373, "learning_rate": 1.9377932020211974e-06, "loss": 0.7117, "step": 13981 }, { "epoch": 0.7185733374447528, "grad_norm": 1.087666392326355, "learning_rate": 1.9371353342701376e-06, "loss": 0.6851, "step": 13982 }, { "epoch": 0.7186247301880975, "grad_norm": 1.0839101076126099, "learning_rate": 1.936477551378414e-06, "loss": 0.7102, "step": 13983 }, { "epoch": 0.7186761229314421, "grad_norm": 1.0515133142471313, "learning_rate": 1.935819853364257e-06, "loss": 0.7062, "step": 13984 }, { "epoch": 0.7187275156747868, "grad_norm": 1.3131293058395386, "learning_rate": 1.9351622402458864e-06, "loss": 0.7499, "step": 13985 }, { "epoch": 0.7187789084181314, "grad_norm": 0.992249608039856, "learning_rate": 1.9345047120415207e-06, "loss": 0.7061, "step": 13986 }, { "epoch": 0.718830301161476, "grad_norm": 1.1263328790664673, "learning_rate": 1.933847268769381e-06, "loss": 0.7353, "step": 13987 }, { "epoch": 0.7188816939048206, "grad_norm": 1.087959885597229, "learning_rate": 1.93318991044768e-06, "loss": 0.6852, "step": 13988 }, { "epoch": 0.7189330866481652, "grad_norm": 1.0750374794006348, "learning_rate": 1.93253263709463e-06, "loss": 0.7051, "step": 13989 }, { "epoch": 0.7189844793915099, "grad_norm": 1.0830811262130737, "learning_rate": 1.9318754487284407e-06, "loss": 0.7198, "step": 13990 }, { "epoch": 0.7190358721348545, "grad_norm": 1.0481847524642944, "learning_rate": 1.9312183453673232e-06, "loss": 0.647, "step": 13991 }, { "epoch": 0.7190872648781992, "grad_norm": 1.0567548274993896, "learning_rate": 1.9305613270294808e-06, "loss": 0.727, "step": 13992 }, { "epoch": 0.7191386576215438, "grad_norm": 1.1105051040649414, "learning_rate": 1.9299043937331173e-06, "loss": 0.7782, "step": 13993 }, { "epoch": 0.7191900503648885, "grad_norm": 1.083558440208435, "learning_rate": 1.929247545496432e-06, "loss": 0.7314, "step": 13994 }, { "epoch": 0.7192414431082331, "grad_norm": 1.1892979145050049, "learning_rate": 1.928590782337627e-06, "loss": 0.6949, "step": 13995 }, { "epoch": 0.7192928358515778, "grad_norm": 1.1118824481964111, "learning_rate": 1.9279341042748963e-06, "loss": 0.727, "step": 13996 }, { "epoch": 0.7193442285949224, "grad_norm": 1.0165057182312012, "learning_rate": 1.9272775113264313e-06, "loss": 0.7082, "step": 13997 }, { "epoch": 0.7193956213382671, "grad_norm": 1.100739598274231, "learning_rate": 1.9266210035104305e-06, "loss": 0.7027, "step": 13998 }, { "epoch": 0.7194470140816117, "grad_norm": 1.0895593166351318, "learning_rate": 1.925964580845075e-06, "loss": 0.7116, "step": 13999 }, { "epoch": 0.7194984068249564, "grad_norm": 1.1292909383773804, "learning_rate": 1.9253082433485566e-06, "loss": 0.7149, "step": 14000 }, { "epoch": 0.719549799568301, "grad_norm": 1.0232349634170532, "learning_rate": 1.9246519910390584e-06, "loss": 0.6561, "step": 14001 }, { "epoch": 0.7196011923116455, "grad_norm": 1.0902801752090454, "learning_rate": 1.923995823934762e-06, "loss": 0.7163, "step": 14002 }, { "epoch": 0.7196525850549902, "grad_norm": 1.124680995941162, "learning_rate": 1.923339742053846e-06, "loss": 0.7194, "step": 14003 }, { "epoch": 0.7197039777983348, "grad_norm": 1.135898232460022, "learning_rate": 1.9226837454144914e-06, "loss": 0.7171, "step": 14004 }, { "epoch": 0.7197553705416795, "grad_norm": 1.0684188604354858, "learning_rate": 1.92202783403487e-06, "loss": 0.7454, "step": 14005 }, { "epoch": 0.7198067632850241, "grad_norm": 0.7131996154785156, "learning_rate": 1.921372007933156e-06, "loss": 0.6487, "step": 14006 }, { "epoch": 0.7198581560283688, "grad_norm": 1.1133602857589722, "learning_rate": 1.9207162671275165e-06, "loss": 0.6443, "step": 14007 }, { "epoch": 0.7199095487717134, "grad_norm": 1.0943559408187866, "learning_rate": 1.9200606116361243e-06, "loss": 0.7142, "step": 14008 }, { "epoch": 0.7199609415150581, "grad_norm": 1.0612866878509521, "learning_rate": 1.9194050414771415e-06, "loss": 0.675, "step": 14009 }, { "epoch": 0.7200123342584027, "grad_norm": 1.0420299768447876, "learning_rate": 1.918749556668731e-06, "loss": 0.6321, "step": 14010 }, { "epoch": 0.7200637270017474, "grad_norm": 0.9825411438941956, "learning_rate": 1.918094157229058e-06, "loss": 0.6837, "step": 14011 }, { "epoch": 0.720115119745092, "grad_norm": 1.0787869691848755, "learning_rate": 1.9174388431762744e-06, "loss": 0.7198, "step": 14012 }, { "epoch": 0.7201665124884367, "grad_norm": 1.0238080024719238, "learning_rate": 1.9167836145285406e-06, "loss": 0.7404, "step": 14013 }, { "epoch": 0.7202179052317813, "grad_norm": 0.8096597790718079, "learning_rate": 1.9161284713040097e-06, "loss": 0.6377, "step": 14014 }, { "epoch": 0.720269297975126, "grad_norm": 1.0968657732009888, "learning_rate": 1.915473413520832e-06, "loss": 0.6932, "step": 14015 }, { "epoch": 0.7203206907184706, "grad_norm": 1.1483261585235596, "learning_rate": 1.9148184411971555e-06, "loss": 0.6613, "step": 14016 }, { "epoch": 0.7203720834618151, "grad_norm": 1.1127554178237915, "learning_rate": 1.91416355435113e-06, "loss": 0.7074, "step": 14017 }, { "epoch": 0.7204234762051598, "grad_norm": 0.795612096786499, "learning_rate": 1.9135087530008977e-06, "loss": 0.6438, "step": 14018 }, { "epoch": 0.7204748689485044, "grad_norm": 1.0631234645843506, "learning_rate": 1.9128540371645992e-06, "loss": 0.664, "step": 14019 }, { "epoch": 0.7205262616918491, "grad_norm": 1.9766687154769897, "learning_rate": 1.912199406860379e-06, "loss": 0.6613, "step": 14020 }, { "epoch": 0.7205776544351937, "grad_norm": 1.1213140487670898, "learning_rate": 1.911544862106367e-06, "loss": 0.7122, "step": 14021 }, { "epoch": 0.7206290471785384, "grad_norm": 1.2482061386108398, "learning_rate": 1.910890402920703e-06, "loss": 0.7507, "step": 14022 }, { "epoch": 0.720680439921883, "grad_norm": 0.9869159460067749, "learning_rate": 1.9102360293215164e-06, "loss": 0.6705, "step": 14023 }, { "epoch": 0.7207318326652277, "grad_norm": 1.0798088312149048, "learning_rate": 1.9095817413269425e-06, "loss": 0.6587, "step": 14024 }, { "epoch": 0.7207832254085723, "grad_norm": 1.0550565719604492, "learning_rate": 1.908927538955101e-06, "loss": 0.7206, "step": 14025 }, { "epoch": 0.720834618151917, "grad_norm": 2.960447311401367, "learning_rate": 1.908273422224123e-06, "loss": 0.7113, "step": 14026 }, { "epoch": 0.7208860108952616, "grad_norm": 0.9569618105888367, "learning_rate": 1.907619391152131e-06, "loss": 0.6544, "step": 14027 }, { "epoch": 0.7209374036386063, "grad_norm": 1.2490954399108887, "learning_rate": 1.9069654457572411e-06, "loss": 0.7229, "step": 14028 }, { "epoch": 0.7209887963819509, "grad_norm": 0.6951115727424622, "learning_rate": 1.9063115860575764e-06, "loss": 0.6467, "step": 14029 }, { "epoch": 0.7210401891252955, "grad_norm": 1.1127711534500122, "learning_rate": 1.9056578120712517e-06, "loss": 0.7608, "step": 14030 }, { "epoch": 0.7210915818686402, "grad_norm": 0.7124976515769958, "learning_rate": 1.9050041238163786e-06, "loss": 0.633, "step": 14031 }, { "epoch": 0.7211429746119847, "grad_norm": 1.055277943611145, "learning_rate": 1.9043505213110668e-06, "loss": 0.7155, "step": 14032 }, { "epoch": 0.7211943673553294, "grad_norm": 1.1538535356521606, "learning_rate": 1.9036970045734316e-06, "loss": 0.75, "step": 14033 }, { "epoch": 0.721245760098674, "grad_norm": 1.0537784099578857, "learning_rate": 1.9030435736215708e-06, "loss": 0.695, "step": 14034 }, { "epoch": 0.7212971528420187, "grad_norm": 1.0950802564620972, "learning_rate": 1.9023902284735934e-06, "loss": 0.6873, "step": 14035 }, { "epoch": 0.7213485455853633, "grad_norm": 1.0882608890533447, "learning_rate": 1.9017369691476e-06, "loss": 0.6744, "step": 14036 }, { "epoch": 0.721399938328708, "grad_norm": 0.7362953424453735, "learning_rate": 1.9010837956616878e-06, "loss": 0.6281, "step": 14037 }, { "epoch": 0.7214513310720526, "grad_norm": 1.0581896305084229, "learning_rate": 1.9004307080339574e-06, "loss": 0.7212, "step": 14038 }, { "epoch": 0.7215027238153973, "grad_norm": 0.8562883734703064, "learning_rate": 1.8997777062824984e-06, "loss": 0.6542, "step": 14039 }, { "epoch": 0.7215541165587419, "grad_norm": 1.0497134923934937, "learning_rate": 1.899124790425409e-06, "loss": 0.7343, "step": 14040 }, { "epoch": 0.7216055093020866, "grad_norm": 1.1082801818847656, "learning_rate": 1.8984719604807717e-06, "loss": 0.7149, "step": 14041 }, { "epoch": 0.7216569020454312, "grad_norm": 0.8463016152381897, "learning_rate": 1.8978192164666786e-06, "loss": 0.6197, "step": 14042 }, { "epoch": 0.7217082947887759, "grad_norm": 0.6962987780570984, "learning_rate": 1.897166558401214e-06, "loss": 0.6048, "step": 14043 }, { "epoch": 0.7217596875321205, "grad_norm": 1.0162689685821533, "learning_rate": 1.8965139863024584e-06, "loss": 0.7056, "step": 14044 }, { "epoch": 0.7218110802754651, "grad_norm": 1.0665395259857178, "learning_rate": 1.8958615001884917e-06, "loss": 0.6721, "step": 14045 }, { "epoch": 0.7218624730188098, "grad_norm": 0.8321764469146729, "learning_rate": 1.8952091000773943e-06, "loss": 0.6403, "step": 14046 }, { "epoch": 0.7219138657621544, "grad_norm": 1.1409231424331665, "learning_rate": 1.8945567859872399e-06, "loss": 0.7102, "step": 14047 }, { "epoch": 0.721965258505499, "grad_norm": 1.1195776462554932, "learning_rate": 1.8939045579360998e-06, "loss": 0.7258, "step": 14048 }, { "epoch": 0.7220166512488436, "grad_norm": 1.047645926475525, "learning_rate": 1.8932524159420502e-06, "loss": 0.686, "step": 14049 }, { "epoch": 0.7220680439921883, "grad_norm": 0.8034675121307373, "learning_rate": 1.8926003600231512e-06, "loss": 0.6225, "step": 14050 }, { "epoch": 0.7221194367355329, "grad_norm": 1.1398842334747314, "learning_rate": 1.891948390197475e-06, "loss": 0.7359, "step": 14051 }, { "epoch": 0.7221708294788776, "grad_norm": 1.0306109189987183, "learning_rate": 1.891296506483083e-06, "loss": 0.7065, "step": 14052 }, { "epoch": 0.7222222222222222, "grad_norm": 1.0688304901123047, "learning_rate": 1.8906447088980357e-06, "loss": 0.7684, "step": 14053 }, { "epoch": 0.7222736149655669, "grad_norm": 1.1035021543502808, "learning_rate": 1.8899929974603897e-06, "loss": 0.7223, "step": 14054 }, { "epoch": 0.7223250077089115, "grad_norm": 1.081215739250183, "learning_rate": 1.8893413721882059e-06, "loss": 0.6959, "step": 14055 }, { "epoch": 0.7223764004522562, "grad_norm": 1.0626963376998901, "learning_rate": 1.8886898330995358e-06, "loss": 0.7463, "step": 14056 }, { "epoch": 0.7224277931956008, "grad_norm": 1.0905474424362183, "learning_rate": 1.8880383802124302e-06, "loss": 0.7273, "step": 14057 }, { "epoch": 0.7224791859389454, "grad_norm": 0.6941466331481934, "learning_rate": 1.8873870135449373e-06, "loss": 0.6575, "step": 14058 }, { "epoch": 0.7225305786822901, "grad_norm": 1.054403305053711, "learning_rate": 1.8867357331151071e-06, "loss": 0.7456, "step": 14059 }, { "epoch": 0.7225819714256347, "grad_norm": 1.079525113105774, "learning_rate": 1.886084538940982e-06, "loss": 0.6352, "step": 14060 }, { "epoch": 0.7226333641689794, "grad_norm": 1.1174298524856567, "learning_rate": 1.8854334310406025e-06, "loss": 0.7563, "step": 14061 }, { "epoch": 0.722684756912324, "grad_norm": 0.6860200762748718, "learning_rate": 1.8847824094320128e-06, "loss": 0.6246, "step": 14062 }, { "epoch": 0.7227361496556686, "grad_norm": 1.0960761308670044, "learning_rate": 1.8841314741332429e-06, "loss": 0.7113, "step": 14063 }, { "epoch": 0.7227875423990132, "grad_norm": 1.1676884889602661, "learning_rate": 1.8834806251623338e-06, "loss": 0.7305, "step": 14064 }, { "epoch": 0.7228389351423579, "grad_norm": 0.7589775919914246, "learning_rate": 1.8828298625373148e-06, "loss": 0.6474, "step": 14065 }, { "epoch": 0.7228903278857025, "grad_norm": 1.184144139289856, "learning_rate": 1.8821791862762173e-06, "loss": 0.6919, "step": 14066 }, { "epoch": 0.7229417206290472, "grad_norm": 1.256714940071106, "learning_rate": 1.8815285963970658e-06, "loss": 0.6732, "step": 14067 }, { "epoch": 0.7229931133723918, "grad_norm": 0.6841539740562439, "learning_rate": 1.8808780929178894e-06, "loss": 0.6129, "step": 14068 }, { "epoch": 0.7230445061157365, "grad_norm": 1.092739462852478, "learning_rate": 1.8802276758567096e-06, "loss": 0.6867, "step": 14069 }, { "epoch": 0.7230958988590811, "grad_norm": 1.0674060583114624, "learning_rate": 1.8795773452315441e-06, "loss": 0.635, "step": 14070 }, { "epoch": 0.7231472916024257, "grad_norm": 1.213179111480713, "learning_rate": 1.8789271010604153e-06, "loss": 0.6657, "step": 14071 }, { "epoch": 0.7231986843457704, "grad_norm": 1.1276285648345947, "learning_rate": 1.8782769433613368e-06, "loss": 0.6452, "step": 14072 }, { "epoch": 0.723250077089115, "grad_norm": 0.6878656148910522, "learning_rate": 1.8776268721523212e-06, "loss": 0.6281, "step": 14073 }, { "epoch": 0.7233014698324597, "grad_norm": 1.158353567123413, "learning_rate": 1.8769768874513778e-06, "loss": 0.76, "step": 14074 }, { "epoch": 0.7233528625758043, "grad_norm": 1.1134287118911743, "learning_rate": 1.8763269892765206e-06, "loss": 0.7207, "step": 14075 }, { "epoch": 0.723404255319149, "grad_norm": 0.8018239140510559, "learning_rate": 1.875677177645749e-06, "loss": 0.6293, "step": 14076 }, { "epoch": 0.7234556480624936, "grad_norm": 0.7907557487487793, "learning_rate": 1.8750274525770707e-06, "loss": 0.6646, "step": 14077 }, { "epoch": 0.7235070408058382, "grad_norm": 1.0302646160125732, "learning_rate": 1.8743778140884855e-06, "loss": 0.6593, "step": 14078 }, { "epoch": 0.7235584335491828, "grad_norm": 1.0931450128555298, "learning_rate": 1.8737282621979908e-06, "loss": 0.6881, "step": 14079 }, { "epoch": 0.7236098262925275, "grad_norm": 1.0379433631896973, "learning_rate": 1.8730787969235864e-06, "loss": 0.6924, "step": 14080 }, { "epoch": 0.7236612190358721, "grad_norm": 0.7393413186073303, "learning_rate": 1.8724294182832642e-06, "loss": 0.6743, "step": 14081 }, { "epoch": 0.7237126117792168, "grad_norm": 1.1454415321350098, "learning_rate": 1.8717801262950164e-06, "loss": 0.7286, "step": 14082 }, { "epoch": 0.7237640045225614, "grad_norm": 1.0606586933135986, "learning_rate": 1.8711309209768303e-06, "loss": 0.721, "step": 14083 }, { "epoch": 0.723815397265906, "grad_norm": 1.0268588066101074, "learning_rate": 1.8704818023466975e-06, "loss": 0.7052, "step": 14084 }, { "epoch": 0.7238667900092507, "grad_norm": 1.0326272249221802, "learning_rate": 1.8698327704225955e-06, "loss": 0.6871, "step": 14085 }, { "epoch": 0.7239181827525953, "grad_norm": 1.0355812311172485, "learning_rate": 1.8691838252225125e-06, "loss": 0.662, "step": 14086 }, { "epoch": 0.72396957549594, "grad_norm": 1.1086996793746948, "learning_rate": 1.868534966764423e-06, "loss": 0.6974, "step": 14087 }, { "epoch": 0.7240209682392846, "grad_norm": 1.0858620405197144, "learning_rate": 1.867886195066309e-06, "loss": 0.6928, "step": 14088 }, { "epoch": 0.7240723609826293, "grad_norm": 0.8011069297790527, "learning_rate": 1.8672375101461433e-06, "loss": 0.6587, "step": 14089 }, { "epoch": 0.7241237537259739, "grad_norm": 1.1187071800231934, "learning_rate": 1.8665889120218954e-06, "loss": 0.7291, "step": 14090 }, { "epoch": 0.7241751464693186, "grad_norm": 1.0703941583633423, "learning_rate": 1.8659404007115422e-06, "loss": 0.7174, "step": 14091 }, { "epoch": 0.7242265392126632, "grad_norm": 0.7623540759086609, "learning_rate": 1.8652919762330434e-06, "loss": 0.604, "step": 14092 }, { "epoch": 0.7242779319560078, "grad_norm": 1.1057769060134888, "learning_rate": 1.8646436386043692e-06, "loss": 0.7634, "step": 14093 }, { "epoch": 0.7243293246993524, "grad_norm": 1.120051383972168, "learning_rate": 1.8639953878434813e-06, "loss": 0.7032, "step": 14094 }, { "epoch": 0.724380717442697, "grad_norm": 1.0555294752120972, "learning_rate": 1.8633472239683392e-06, "loss": 0.7034, "step": 14095 }, { "epoch": 0.7244321101860417, "grad_norm": 1.1033895015716553, "learning_rate": 1.8626991469969002e-06, "loss": 0.6716, "step": 14096 }, { "epoch": 0.7244835029293863, "grad_norm": 1.047999382019043, "learning_rate": 1.8620511569471222e-06, "loss": 0.6336, "step": 14097 }, { "epoch": 0.724534895672731, "grad_norm": 0.998139500617981, "learning_rate": 1.8614032538369576e-06, "loss": 0.6696, "step": 14098 }, { "epoch": 0.7245862884160756, "grad_norm": 1.0759371519088745, "learning_rate": 1.8607554376843546e-06, "loss": 0.6873, "step": 14099 }, { "epoch": 0.7246376811594203, "grad_norm": 0.6747581362724304, "learning_rate": 1.860107708507265e-06, "loss": 0.6293, "step": 14100 }, { "epoch": 0.7246890739027649, "grad_norm": 1.1367789506912231, "learning_rate": 1.8594600663236334e-06, "loss": 0.7328, "step": 14101 }, { "epoch": 0.7247404666461096, "grad_norm": 1.0425395965576172, "learning_rate": 1.8588125111514032e-06, "loss": 0.7091, "step": 14102 }, { "epoch": 0.7247918593894542, "grad_norm": 1.0911006927490234, "learning_rate": 1.8581650430085135e-06, "loss": 0.7584, "step": 14103 }, { "epoch": 0.7248432521327989, "grad_norm": 1.1549819707870483, "learning_rate": 1.8575176619129086e-06, "loss": 0.7299, "step": 14104 }, { "epoch": 0.7248946448761435, "grad_norm": 1.0662363767623901, "learning_rate": 1.8568703678825172e-06, "loss": 0.7376, "step": 14105 }, { "epoch": 0.7249460376194882, "grad_norm": 1.162440299987793, "learning_rate": 1.8562231609352788e-06, "loss": 0.7287, "step": 14106 }, { "epoch": 0.7249974303628328, "grad_norm": 0.8354968428611755, "learning_rate": 1.8555760410891232e-06, "loss": 0.707, "step": 14107 }, { "epoch": 0.7250488231061774, "grad_norm": 1.0202405452728271, "learning_rate": 1.8549290083619776e-06, "loss": 0.6983, "step": 14108 }, { "epoch": 0.725100215849522, "grad_norm": 1.1565581560134888, "learning_rate": 1.8542820627717717e-06, "loss": 0.7545, "step": 14109 }, { "epoch": 0.7251516085928666, "grad_norm": 1.0327166318893433, "learning_rate": 1.8536352043364287e-06, "loss": 0.7067, "step": 14110 }, { "epoch": 0.7252030013362113, "grad_norm": 1.0117297172546387, "learning_rate": 1.852988433073869e-06, "loss": 0.6417, "step": 14111 }, { "epoch": 0.7252543940795559, "grad_norm": 1.0451098680496216, "learning_rate": 1.8523417490020119e-06, "loss": 0.7, "step": 14112 }, { "epoch": 0.7253057868229006, "grad_norm": 1.1725757122039795, "learning_rate": 1.851695152138779e-06, "loss": 0.7188, "step": 14113 }, { "epoch": 0.7253571795662452, "grad_norm": 1.090002179145813, "learning_rate": 1.8510486425020769e-06, "loss": 0.7425, "step": 14114 }, { "epoch": 0.7254085723095899, "grad_norm": 1.048673391342163, "learning_rate": 1.8504022201098237e-06, "loss": 0.7581, "step": 14115 }, { "epoch": 0.7254599650529345, "grad_norm": 0.691389799118042, "learning_rate": 1.8497558849799264e-06, "loss": 0.6438, "step": 14116 }, { "epoch": 0.7255113577962792, "grad_norm": 1.0125349760055542, "learning_rate": 1.8491096371302962e-06, "loss": 0.6702, "step": 14117 }, { "epoch": 0.7255627505396238, "grad_norm": 1.1240495443344116, "learning_rate": 1.8484634765788316e-06, "loss": 0.6547, "step": 14118 }, { "epoch": 0.7256141432829685, "grad_norm": 1.1280101537704468, "learning_rate": 1.84781740334344e-06, "loss": 0.6748, "step": 14119 }, { "epoch": 0.7256655360263131, "grad_norm": 1.1014515161514282, "learning_rate": 1.8471714174420202e-06, "loss": 0.6859, "step": 14120 }, { "epoch": 0.7257169287696578, "grad_norm": 1.1023160219192505, "learning_rate": 1.8465255188924674e-06, "loss": 0.6654, "step": 14121 }, { "epoch": 0.7257683215130024, "grad_norm": 1.0864132642745972, "learning_rate": 1.8458797077126806e-06, "loss": 0.6781, "step": 14122 }, { "epoch": 0.725819714256347, "grad_norm": 0.7458215355873108, "learning_rate": 1.8452339839205512e-06, "loss": 0.7035, "step": 14123 }, { "epoch": 0.7258711069996916, "grad_norm": 1.095097303390503, "learning_rate": 1.8445883475339688e-06, "loss": 0.73, "step": 14124 }, { "epoch": 0.7259224997430362, "grad_norm": 1.1990714073181152, "learning_rate": 1.8439427985708202e-06, "loss": 0.729, "step": 14125 }, { "epoch": 0.7259738924863809, "grad_norm": 1.03083336353302, "learning_rate": 1.8432973370489953e-06, "loss": 0.7261, "step": 14126 }, { "epoch": 0.7260252852297255, "grad_norm": 1.364574670791626, "learning_rate": 1.8426519629863715e-06, "loss": 0.6632, "step": 14127 }, { "epoch": 0.7260766779730702, "grad_norm": 0.7561004161834717, "learning_rate": 1.8420066764008338e-06, "loss": 0.6426, "step": 14128 }, { "epoch": 0.7261280707164148, "grad_norm": 1.0767942667007446, "learning_rate": 1.8413614773102584e-06, "loss": 0.6533, "step": 14129 }, { "epoch": 0.7261794634597595, "grad_norm": 1.125990629196167, "learning_rate": 1.84071636573252e-06, "loss": 0.6825, "step": 14130 }, { "epoch": 0.7262308562031041, "grad_norm": 0.7646001577377319, "learning_rate": 1.8400713416854954e-06, "loss": 0.6578, "step": 14131 }, { "epoch": 0.7262822489464488, "grad_norm": 1.138761281967163, "learning_rate": 1.8394264051870519e-06, "loss": 0.7373, "step": 14132 }, { "epoch": 0.7263336416897934, "grad_norm": 1.1558908224105835, "learning_rate": 1.838781556255063e-06, "loss": 0.6711, "step": 14133 }, { "epoch": 0.7263850344331381, "grad_norm": 1.098095417022705, "learning_rate": 1.8381367949073882e-06, "loss": 0.6601, "step": 14134 }, { "epoch": 0.7264364271764827, "grad_norm": 0.6936180591583252, "learning_rate": 1.8374921211618969e-06, "loss": 0.6593, "step": 14135 }, { "epoch": 0.7264878199198274, "grad_norm": 1.1750619411468506, "learning_rate": 1.8368475350364473e-06, "loss": 0.6687, "step": 14136 }, { "epoch": 0.726539212663172, "grad_norm": 1.1021217107772827, "learning_rate": 1.8362030365488993e-06, "loss": 0.729, "step": 14137 }, { "epoch": 0.7265906054065167, "grad_norm": 0.8244487047195435, "learning_rate": 1.835558625717107e-06, "loss": 0.7265, "step": 14138 }, { "epoch": 0.7266419981498612, "grad_norm": 1.092629313468933, "learning_rate": 1.8349143025589278e-06, "loss": 0.686, "step": 14139 }, { "epoch": 0.7266933908932058, "grad_norm": 1.0888848304748535, "learning_rate": 1.834270067092212e-06, "loss": 0.683, "step": 14140 }, { "epoch": 0.7267447836365505, "grad_norm": 1.0825302600860596, "learning_rate": 1.8336259193348071e-06, "loss": 0.7254, "step": 14141 }, { "epoch": 0.7267961763798951, "grad_norm": 1.1431008577346802, "learning_rate": 1.832981859304564e-06, "loss": 0.707, "step": 14142 }, { "epoch": 0.7268475691232398, "grad_norm": 1.1113783121109009, "learning_rate": 1.832337887019321e-06, "loss": 0.7117, "step": 14143 }, { "epoch": 0.7268989618665844, "grad_norm": 1.0729255676269531, "learning_rate": 1.8316940024969244e-06, "loss": 0.6891, "step": 14144 }, { "epoch": 0.7269503546099291, "grad_norm": 1.5255107879638672, "learning_rate": 1.8310502057552127e-06, "loss": 0.7599, "step": 14145 }, { "epoch": 0.7270017473532737, "grad_norm": 1.0644080638885498, "learning_rate": 1.8304064968120216e-06, "loss": 0.6899, "step": 14146 }, { "epoch": 0.7270531400966184, "grad_norm": 1.007764458656311, "learning_rate": 1.8297628756851848e-06, "loss": 0.6524, "step": 14147 }, { "epoch": 0.727104532839963, "grad_norm": 1.0331767797470093, "learning_rate": 1.829119342392538e-06, "loss": 0.6395, "step": 14148 }, { "epoch": 0.7271559255833077, "grad_norm": 1.0513592958450317, "learning_rate": 1.8284758969519085e-06, "loss": 0.6908, "step": 14149 }, { "epoch": 0.7272073183266523, "grad_norm": 1.090409278869629, "learning_rate": 1.8278325393811226e-06, "loss": 0.6788, "step": 14150 }, { "epoch": 0.727258711069997, "grad_norm": 1.162103295326233, "learning_rate": 1.827189269698007e-06, "loss": 0.7348, "step": 14151 }, { "epoch": 0.7273101038133416, "grad_norm": 1.1262058019638062, "learning_rate": 1.8265460879203839e-06, "loss": 0.69, "step": 14152 }, { "epoch": 0.7273614965566862, "grad_norm": 1.0211646556854248, "learning_rate": 1.8259029940660728e-06, "loss": 0.6366, "step": 14153 }, { "epoch": 0.7274128893000308, "grad_norm": 0.7680974006652832, "learning_rate": 1.8252599881528893e-06, "loss": 0.6604, "step": 14154 }, { "epoch": 0.7274642820433754, "grad_norm": 0.7253857851028442, "learning_rate": 1.824617070198653e-06, "loss": 0.64, "step": 14155 }, { "epoch": 0.7275156747867201, "grad_norm": 1.0396854877471924, "learning_rate": 1.8239742402211709e-06, "loss": 0.6732, "step": 14156 }, { "epoch": 0.7275670675300647, "grad_norm": 0.8034530282020569, "learning_rate": 1.8233314982382571e-06, "loss": 0.6612, "step": 14157 }, { "epoch": 0.7276184602734094, "grad_norm": 1.0856924057006836, "learning_rate": 1.8226888442677182e-06, "loss": 0.6572, "step": 14158 }, { "epoch": 0.727669853016754, "grad_norm": 1.221211314201355, "learning_rate": 1.822046278327358e-06, "loss": 0.762, "step": 14159 }, { "epoch": 0.7277212457600987, "grad_norm": 1.0133737325668335, "learning_rate": 1.8214038004349826e-06, "loss": 0.721, "step": 14160 }, { "epoch": 0.7277726385034433, "grad_norm": 1.088274359703064, "learning_rate": 1.8207614106083905e-06, "loss": 0.7186, "step": 14161 }, { "epoch": 0.727824031246788, "grad_norm": 1.056518316268921, "learning_rate": 1.820119108865379e-06, "loss": 0.6927, "step": 14162 }, { "epoch": 0.7278754239901326, "grad_norm": 1.1000196933746338, "learning_rate": 1.8194768952237436e-06, "loss": 0.6606, "step": 14163 }, { "epoch": 0.7279268167334773, "grad_norm": 1.164321780204773, "learning_rate": 1.8188347697012798e-06, "loss": 0.7214, "step": 14164 }, { "epoch": 0.7279782094768219, "grad_norm": 1.1015284061431885, "learning_rate": 1.8181927323157765e-06, "loss": 0.6996, "step": 14165 }, { "epoch": 0.7280296022201665, "grad_norm": 1.1950316429138184, "learning_rate": 1.817550783085022e-06, "loss": 0.7327, "step": 14166 }, { "epoch": 0.7280809949635112, "grad_norm": 1.1232049465179443, "learning_rate": 1.8169089220268004e-06, "loss": 0.7516, "step": 14167 }, { "epoch": 0.7281323877068558, "grad_norm": 1.0463675260543823, "learning_rate": 1.8162671491589e-06, "loss": 0.6534, "step": 14168 }, { "epoch": 0.7281837804502004, "grad_norm": 1.077032446861267, "learning_rate": 1.815625464499095e-06, "loss": 0.737, "step": 14169 }, { "epoch": 0.728235173193545, "grad_norm": 1.099536418914795, "learning_rate": 1.8149838680651694e-06, "loss": 0.7212, "step": 14170 }, { "epoch": 0.7282865659368897, "grad_norm": 1.050209403038025, "learning_rate": 1.8143423598748967e-06, "loss": 0.7196, "step": 14171 }, { "epoch": 0.7283379586802343, "grad_norm": 1.2169212102890015, "learning_rate": 1.8137009399460492e-06, "loss": 0.7135, "step": 14172 }, { "epoch": 0.728389351423579, "grad_norm": 1.1904245615005493, "learning_rate": 1.8130596082964008e-06, "loss": 0.7275, "step": 14173 }, { "epoch": 0.7284407441669236, "grad_norm": 1.1048997640609741, "learning_rate": 1.8124183649437193e-06, "loss": 0.7154, "step": 14174 }, { "epoch": 0.7284921369102683, "grad_norm": 1.1846435070037842, "learning_rate": 1.8117772099057706e-06, "loss": 0.6776, "step": 14175 }, { "epoch": 0.7285435296536129, "grad_norm": 1.1215087175369263, "learning_rate": 1.8111361432003166e-06, "loss": 0.6722, "step": 14176 }, { "epoch": 0.7285949223969576, "grad_norm": 1.1111115217208862, "learning_rate": 1.810495164845124e-06, "loss": 0.6578, "step": 14177 }, { "epoch": 0.7286463151403022, "grad_norm": 1.0743190050125122, "learning_rate": 1.8098542748579446e-06, "loss": 0.642, "step": 14178 }, { "epoch": 0.7286977078836469, "grad_norm": 1.1126295328140259, "learning_rate": 1.8092134732565396e-06, "loss": 0.672, "step": 14179 }, { "epoch": 0.7287491006269915, "grad_norm": 1.0887190103530884, "learning_rate": 1.8085727600586606e-06, "loss": 0.6818, "step": 14180 }, { "epoch": 0.7288004933703361, "grad_norm": 1.1138381958007812, "learning_rate": 1.8079321352820616e-06, "loss": 0.7149, "step": 14181 }, { "epoch": 0.7288518861136808, "grad_norm": 1.0463840961456299, "learning_rate": 1.80729159894449e-06, "loss": 0.6846, "step": 14182 }, { "epoch": 0.7289032788570254, "grad_norm": 0.7222232818603516, "learning_rate": 1.8066511510636913e-06, "loss": 0.6855, "step": 14183 }, { "epoch": 0.72895467160037, "grad_norm": 1.0851243734359741, "learning_rate": 1.8060107916574143e-06, "loss": 0.7056, "step": 14184 }, { "epoch": 0.7290060643437146, "grad_norm": 1.147598147392273, "learning_rate": 1.8053705207433941e-06, "loss": 0.7511, "step": 14185 }, { "epoch": 0.7290574570870593, "grad_norm": 1.0548043251037598, "learning_rate": 1.804730338339375e-06, "loss": 0.7677, "step": 14186 }, { "epoch": 0.7291088498304039, "grad_norm": 1.1363869905471802, "learning_rate": 1.804090244463092e-06, "loss": 0.7889, "step": 14187 }, { "epoch": 0.7291602425737486, "grad_norm": 1.0757659673690796, "learning_rate": 1.8034502391322794e-06, "loss": 0.6833, "step": 14188 }, { "epoch": 0.7292116353170932, "grad_norm": 1.0416321754455566, "learning_rate": 1.802810322364668e-06, "loss": 0.6796, "step": 14189 }, { "epoch": 0.7292630280604379, "grad_norm": 1.0118740797042847, "learning_rate": 1.80217049417799e-06, "loss": 0.6859, "step": 14190 }, { "epoch": 0.7293144208037825, "grad_norm": 0.7777758240699768, "learning_rate": 1.8015307545899714e-06, "loss": 0.6491, "step": 14191 }, { "epoch": 0.7293658135471272, "grad_norm": 1.1217104196548462, "learning_rate": 1.800891103618334e-06, "loss": 0.6992, "step": 14192 }, { "epoch": 0.7294172062904718, "grad_norm": 1.2467244863510132, "learning_rate": 1.8002515412808058e-06, "loss": 0.6686, "step": 14193 }, { "epoch": 0.7294685990338164, "grad_norm": 1.090186595916748, "learning_rate": 1.7996120675950994e-06, "loss": 0.6892, "step": 14194 }, { "epoch": 0.7295199917771611, "grad_norm": 1.0485175848007202, "learning_rate": 1.7989726825789371e-06, "loss": 0.694, "step": 14195 }, { "epoch": 0.7295713845205057, "grad_norm": 1.1058467626571655, "learning_rate": 1.7983333862500301e-06, "loss": 0.7156, "step": 14196 }, { "epoch": 0.7296227772638504, "grad_norm": 1.1257439851760864, "learning_rate": 1.797694178626096e-06, "loss": 0.7333, "step": 14197 }, { "epoch": 0.729674170007195, "grad_norm": 1.0711151361465454, "learning_rate": 1.7970550597248377e-06, "loss": 0.6853, "step": 14198 }, { "epoch": 0.7297255627505396, "grad_norm": 1.0446034669876099, "learning_rate": 1.7964160295639678e-06, "loss": 0.6607, "step": 14199 }, { "epoch": 0.7297769554938842, "grad_norm": 0.6731815338134766, "learning_rate": 1.7957770881611886e-06, "loss": 0.6413, "step": 14200 }, { "epoch": 0.7298283482372289, "grad_norm": 1.097685694694519, "learning_rate": 1.7951382355342024e-06, "loss": 0.7283, "step": 14201 }, { "epoch": 0.7298797409805735, "grad_norm": 1.049747109413147, "learning_rate": 1.7944994717007108e-06, "loss": 0.7695, "step": 14202 }, { "epoch": 0.7299311337239182, "grad_norm": 1.0693048238754272, "learning_rate": 1.7938607966784111e-06, "loss": 0.7518, "step": 14203 }, { "epoch": 0.7299825264672628, "grad_norm": 1.0700889825820923, "learning_rate": 1.7932222104849979e-06, "loss": 0.7016, "step": 14204 }, { "epoch": 0.7300339192106075, "grad_norm": 1.0565557479858398, "learning_rate": 1.7925837131381618e-06, "loss": 0.674, "step": 14205 }, { "epoch": 0.7300853119539521, "grad_norm": 1.109625220298767, "learning_rate": 1.791945304655598e-06, "loss": 0.7059, "step": 14206 }, { "epoch": 0.7301367046972967, "grad_norm": 1.0743491649627686, "learning_rate": 1.7913069850549874e-06, "loss": 0.6613, "step": 14207 }, { "epoch": 0.7301880974406414, "grad_norm": 0.6909602284431458, "learning_rate": 1.7906687543540212e-06, "loss": 0.6621, "step": 14208 }, { "epoch": 0.730239490183986, "grad_norm": 1.108607292175293, "learning_rate": 1.7900306125703792e-06, "loss": 0.7114, "step": 14209 }, { "epoch": 0.7302908829273307, "grad_norm": 1.1064765453338623, "learning_rate": 1.7893925597217404e-06, "loss": 0.6997, "step": 14210 }, { "epoch": 0.7303422756706753, "grad_norm": 1.1871590614318848, "learning_rate": 1.7887545958257863e-06, "loss": 0.7421, "step": 14211 }, { "epoch": 0.73039366841402, "grad_norm": 0.7593753337860107, "learning_rate": 1.78811672090019e-06, "loss": 0.6463, "step": 14212 }, { "epoch": 0.7304450611573646, "grad_norm": 1.0574991703033447, "learning_rate": 1.7874789349626248e-06, "loss": 0.7147, "step": 14213 }, { "epoch": 0.7304964539007093, "grad_norm": 1.008554458618164, "learning_rate": 1.7868412380307599e-06, "loss": 0.7168, "step": 14214 }, { "epoch": 0.7305478466440538, "grad_norm": 0.71867835521698, "learning_rate": 1.7862036301222652e-06, "loss": 0.6985, "step": 14215 }, { "epoch": 0.7305992393873985, "grad_norm": 1.1462565660476685, "learning_rate": 1.7855661112548056e-06, "loss": 0.6941, "step": 14216 }, { "epoch": 0.7306506321307431, "grad_norm": 1.1414536237716675, "learning_rate": 1.7849286814460442e-06, "loss": 0.702, "step": 14217 }, { "epoch": 0.7307020248740878, "grad_norm": 0.8802616000175476, "learning_rate": 1.7842913407136392e-06, "loss": 0.6394, "step": 14218 }, { "epoch": 0.7307534176174324, "grad_norm": 1.0863372087478638, "learning_rate": 1.7836540890752546e-06, "loss": 0.6592, "step": 14219 }, { "epoch": 0.730804810360777, "grad_norm": 0.7025415301322937, "learning_rate": 1.783016926548538e-06, "loss": 0.6558, "step": 14220 }, { "epoch": 0.7308562031041217, "grad_norm": 1.0959371328353882, "learning_rate": 1.7823798531511487e-06, "loss": 0.6766, "step": 14221 }, { "epoch": 0.7309075958474663, "grad_norm": 1.0273948907852173, "learning_rate": 1.7817428689007354e-06, "loss": 0.6428, "step": 14222 }, { "epoch": 0.730958988590811, "grad_norm": 1.3654849529266357, "learning_rate": 1.7811059738149445e-06, "loss": 0.6434, "step": 14223 }, { "epoch": 0.7310103813341556, "grad_norm": 1.0721516609191895, "learning_rate": 1.780469167911425e-06, "loss": 0.6959, "step": 14224 }, { "epoch": 0.7310617740775003, "grad_norm": 1.1090396642684937, "learning_rate": 1.7798324512078174e-06, "loss": 0.7262, "step": 14225 }, { "epoch": 0.7311131668208449, "grad_norm": 1.0582424402236938, "learning_rate": 1.7791958237217666e-06, "loss": 0.6957, "step": 14226 }, { "epoch": 0.7311645595641896, "grad_norm": 1.1268717050552368, "learning_rate": 1.778559285470905e-06, "loss": 0.71, "step": 14227 }, { "epoch": 0.7312159523075342, "grad_norm": 1.053268551826477, "learning_rate": 1.7779228364728729e-06, "loss": 0.702, "step": 14228 }, { "epoch": 0.7312673450508789, "grad_norm": 1.062662124633789, "learning_rate": 1.777286476745303e-06, "loss": 0.7051, "step": 14229 }, { "epoch": 0.7313187377942234, "grad_norm": 1.0808087587356567, "learning_rate": 1.7766502063058245e-06, "loss": 0.718, "step": 14230 }, { "epoch": 0.731370130537568, "grad_norm": 1.0907949209213257, "learning_rate": 1.7760140251720658e-06, "loss": 0.7524, "step": 14231 }, { "epoch": 0.7314215232809127, "grad_norm": 0.7170012593269348, "learning_rate": 1.775377933361655e-06, "loss": 0.6203, "step": 14232 }, { "epoch": 0.7314729160242573, "grad_norm": 1.159892201423645, "learning_rate": 1.7747419308922147e-06, "loss": 0.7086, "step": 14233 }, { "epoch": 0.731524308767602, "grad_norm": 1.0847471952438354, "learning_rate": 1.774106017781364e-06, "loss": 0.7012, "step": 14234 }, { "epoch": 0.7315757015109466, "grad_norm": 1.0239468812942505, "learning_rate": 1.7734701940467263e-06, "loss": 0.6976, "step": 14235 }, { "epoch": 0.7316270942542913, "grad_norm": 1.0780420303344727, "learning_rate": 1.7728344597059117e-06, "loss": 0.7741, "step": 14236 }, { "epoch": 0.7316784869976359, "grad_norm": 1.148107886314392, "learning_rate": 1.7721988147765372e-06, "loss": 0.7254, "step": 14237 }, { "epoch": 0.7317298797409806, "grad_norm": 1.0864347219467163, "learning_rate": 1.7715632592762138e-06, "loss": 0.7095, "step": 14238 }, { "epoch": 0.7317812724843252, "grad_norm": 1.0514644384384155, "learning_rate": 1.7709277932225494e-06, "loss": 0.6897, "step": 14239 }, { "epoch": 0.7318326652276699, "grad_norm": 1.0319058895111084, "learning_rate": 1.7702924166331487e-06, "loss": 0.7307, "step": 14240 }, { "epoch": 0.7318840579710145, "grad_norm": 1.0796493291854858, "learning_rate": 1.7696571295256182e-06, "loss": 0.73, "step": 14241 }, { "epoch": 0.7319354507143592, "grad_norm": 1.0330322980880737, "learning_rate": 1.7690219319175584e-06, "loss": 0.6603, "step": 14242 }, { "epoch": 0.7319868434577038, "grad_norm": 0.7170283198356628, "learning_rate": 1.768386823826565e-06, "loss": 0.6325, "step": 14243 }, { "epoch": 0.7320382362010485, "grad_norm": 0.6943623423576355, "learning_rate": 1.7677518052702387e-06, "loss": 0.6328, "step": 14244 }, { "epoch": 0.732089628944393, "grad_norm": 0.6924095749855042, "learning_rate": 1.7671168762661712e-06, "loss": 0.6872, "step": 14245 }, { "epoch": 0.7321410216877376, "grad_norm": 1.1192182302474976, "learning_rate": 1.7664820368319534e-06, "loss": 0.752, "step": 14246 }, { "epoch": 0.7321924144310823, "grad_norm": 1.1731281280517578, "learning_rate": 1.7658472869851733e-06, "loss": 0.7455, "step": 14247 }, { "epoch": 0.7322438071744269, "grad_norm": 1.0350943803787231, "learning_rate": 1.7652126267434217e-06, "loss": 0.6899, "step": 14248 }, { "epoch": 0.7322951999177716, "grad_norm": 1.086379885673523, "learning_rate": 1.7645780561242748e-06, "loss": 0.714, "step": 14249 }, { "epoch": 0.7323465926611162, "grad_norm": 1.1786599159240723, "learning_rate": 1.7639435751453205e-06, "loss": 0.73, "step": 14250 }, { "epoch": 0.7323979854044609, "grad_norm": 1.144720196723938, "learning_rate": 1.7633091838241356e-06, "loss": 0.7297, "step": 14251 }, { "epoch": 0.7324493781478055, "grad_norm": 1.0773013830184937, "learning_rate": 1.762674882178294e-06, "loss": 0.7268, "step": 14252 }, { "epoch": 0.7325007708911502, "grad_norm": 1.0976192951202393, "learning_rate": 1.7620406702253734e-06, "loss": 0.6985, "step": 14253 }, { "epoch": 0.7325521636344948, "grad_norm": 1.1154916286468506, "learning_rate": 1.761406547982944e-06, "loss": 0.6963, "step": 14254 }, { "epoch": 0.7326035563778395, "grad_norm": 1.089724063873291, "learning_rate": 1.760772515468574e-06, "loss": 0.7551, "step": 14255 }, { "epoch": 0.7326549491211841, "grad_norm": 1.0380268096923828, "learning_rate": 1.7601385726998282e-06, "loss": 0.6359, "step": 14256 }, { "epoch": 0.7327063418645288, "grad_norm": 1.1182377338409424, "learning_rate": 1.7595047196942745e-06, "loss": 0.6982, "step": 14257 }, { "epoch": 0.7327577346078734, "grad_norm": 1.1102083921432495, "learning_rate": 1.7588709564694724e-06, "loss": 0.649, "step": 14258 }, { "epoch": 0.7328091273512181, "grad_norm": 1.058334231376648, "learning_rate": 1.75823728304298e-06, "loss": 0.6842, "step": 14259 }, { "epoch": 0.7328605200945626, "grad_norm": 1.0519452095031738, "learning_rate": 1.7576036994323537e-06, "loss": 0.7538, "step": 14260 }, { "epoch": 0.7329119128379072, "grad_norm": 0.7923277616500854, "learning_rate": 1.7569702056551492e-06, "loss": 0.677, "step": 14261 }, { "epoch": 0.7329633055812519, "grad_norm": 1.0972868204116821, "learning_rate": 1.7563368017289178e-06, "loss": 0.6621, "step": 14262 }, { "epoch": 0.7330146983245965, "grad_norm": 1.1292816400527954, "learning_rate": 1.7557034876712076e-06, "loss": 0.7358, "step": 14263 }, { "epoch": 0.7330660910679412, "grad_norm": 1.1095266342163086, "learning_rate": 1.7550702634995647e-06, "loss": 0.7365, "step": 14264 }, { "epoch": 0.7331174838112858, "grad_norm": 1.0919475555419922, "learning_rate": 1.7544371292315326e-06, "loss": 0.6828, "step": 14265 }, { "epoch": 0.7331688765546305, "grad_norm": 1.0480883121490479, "learning_rate": 1.753804084884656e-06, "loss": 0.7281, "step": 14266 }, { "epoch": 0.7332202692979751, "grad_norm": 1.12640380859375, "learning_rate": 1.7531711304764714e-06, "loss": 0.6605, "step": 14267 }, { "epoch": 0.7332716620413198, "grad_norm": 1.1045379638671875, "learning_rate": 1.7525382660245166e-06, "loss": 0.7229, "step": 14268 }, { "epoch": 0.7333230547846644, "grad_norm": 0.7560640573501587, "learning_rate": 1.751905491546323e-06, "loss": 0.6923, "step": 14269 }, { "epoch": 0.7333744475280091, "grad_norm": 1.1003350019454956, "learning_rate": 1.751272807059427e-06, "loss": 0.6773, "step": 14270 }, { "epoch": 0.7334258402713537, "grad_norm": 1.0724040269851685, "learning_rate": 1.7506402125813522e-06, "loss": 0.7364, "step": 14271 }, { "epoch": 0.7334772330146984, "grad_norm": 1.1029902696609497, "learning_rate": 1.7500077081296284e-06, "loss": 0.6844, "step": 14272 }, { "epoch": 0.733528625758043, "grad_norm": 1.0249310731887817, "learning_rate": 1.7493752937217784e-06, "loss": 0.6801, "step": 14273 }, { "epoch": 0.7335800185013877, "grad_norm": 1.2113527059555054, "learning_rate": 1.748742969375326e-06, "loss": 0.8207, "step": 14274 }, { "epoch": 0.7336314112447322, "grad_norm": 1.0830127000808716, "learning_rate": 1.7481107351077887e-06, "loss": 0.7696, "step": 14275 }, { "epoch": 0.7336828039880768, "grad_norm": 1.1088954210281372, "learning_rate": 1.7474785909366808e-06, "loss": 0.6499, "step": 14276 }, { "epoch": 0.7337341967314215, "grad_norm": 1.0736351013183594, "learning_rate": 1.7468465368795228e-06, "loss": 0.6734, "step": 14277 }, { "epoch": 0.7337855894747661, "grad_norm": 1.0742754936218262, "learning_rate": 1.7462145729538193e-06, "loss": 0.7059, "step": 14278 }, { "epoch": 0.7338369822181108, "grad_norm": 1.0745306015014648, "learning_rate": 1.7455826991770836e-06, "loss": 0.6538, "step": 14279 }, { "epoch": 0.7338883749614554, "grad_norm": 1.0449812412261963, "learning_rate": 1.7449509155668208e-06, "loss": 0.6604, "step": 14280 }, { "epoch": 0.7339397677048001, "grad_norm": 1.1699858903884888, "learning_rate": 1.744319222140536e-06, "loss": 0.6723, "step": 14281 }, { "epoch": 0.7339911604481447, "grad_norm": 1.1008058786392212, "learning_rate": 1.743687618915728e-06, "loss": 0.754, "step": 14282 }, { "epoch": 0.7340425531914894, "grad_norm": 0.7757901549339294, "learning_rate": 1.7430561059099e-06, "loss": 0.6547, "step": 14283 }, { "epoch": 0.734093945934834, "grad_norm": 1.1030393838882446, "learning_rate": 1.7424246831405466e-06, "loss": 0.7704, "step": 14284 }, { "epoch": 0.7341453386781787, "grad_norm": 0.7826477289199829, "learning_rate": 1.7417933506251605e-06, "loss": 0.6921, "step": 14285 }, { "epoch": 0.7341967314215233, "grad_norm": 1.1917823553085327, "learning_rate": 1.741162108381238e-06, "loss": 0.7574, "step": 14286 }, { "epoch": 0.734248124164868, "grad_norm": 1.259516954421997, "learning_rate": 1.7405309564262619e-06, "loss": 0.6814, "step": 14287 }, { "epoch": 0.7342995169082126, "grad_norm": 1.1350409984588623, "learning_rate": 1.7398998947777235e-06, "loss": 0.7587, "step": 14288 }, { "epoch": 0.7343509096515572, "grad_norm": 1.0344046354293823, "learning_rate": 1.7392689234531036e-06, "loss": 0.6896, "step": 14289 }, { "epoch": 0.7344023023949018, "grad_norm": 0.6549025177955627, "learning_rate": 1.7386380424698889e-06, "loss": 0.6381, "step": 14290 }, { "epoch": 0.7344536951382464, "grad_norm": 1.049855351448059, "learning_rate": 1.738007251845552e-06, "loss": 0.6983, "step": 14291 }, { "epoch": 0.7345050878815911, "grad_norm": 1.0957801342010498, "learning_rate": 1.7373765515975744e-06, "loss": 0.6404, "step": 14292 }, { "epoch": 0.7345564806249357, "grad_norm": 1.079935073852539, "learning_rate": 1.7367459417434285e-06, "loss": 0.709, "step": 14293 }, { "epoch": 0.7346078733682804, "grad_norm": 0.6613960862159729, "learning_rate": 1.7361154223005839e-06, "loss": 0.6348, "step": 14294 }, { "epoch": 0.734659266111625, "grad_norm": 1.0420163869857788, "learning_rate": 1.7354849932865136e-06, "loss": 0.6925, "step": 14295 }, { "epoch": 0.7347106588549697, "grad_norm": 1.0916849374771118, "learning_rate": 1.7348546547186824e-06, "loss": 0.6552, "step": 14296 }, { "epoch": 0.7347620515983143, "grad_norm": 0.6854373216629028, "learning_rate": 1.7342244066145542e-06, "loss": 0.6627, "step": 14297 }, { "epoch": 0.734813444341659, "grad_norm": 1.0763461589813232, "learning_rate": 1.7335942489915892e-06, "loss": 0.6931, "step": 14298 }, { "epoch": 0.7348648370850036, "grad_norm": 1.142867088317871, "learning_rate": 1.7329641818672505e-06, "loss": 0.7603, "step": 14299 }, { "epoch": 0.7349162298283483, "grad_norm": 1.0380083322525024, "learning_rate": 1.7323342052589892e-06, "loss": 0.6619, "step": 14300 }, { "epoch": 0.7349676225716929, "grad_norm": 1.0602900981903076, "learning_rate": 1.7317043191842642e-06, "loss": 0.6645, "step": 14301 }, { "epoch": 0.7350190153150375, "grad_norm": 1.081571102142334, "learning_rate": 1.7310745236605248e-06, "loss": 0.7094, "step": 14302 }, { "epoch": 0.7350704080583822, "grad_norm": 1.0831965208053589, "learning_rate": 1.7304448187052182e-06, "loss": 0.6577, "step": 14303 }, { "epoch": 0.7351218008017268, "grad_norm": 1.1221976280212402, "learning_rate": 1.7298152043357952e-06, "loss": 0.7251, "step": 14304 }, { "epoch": 0.7351731935450715, "grad_norm": 1.0369336605072021, "learning_rate": 1.7291856805696972e-06, "loss": 0.6848, "step": 14305 }, { "epoch": 0.735224586288416, "grad_norm": 1.2235352993011475, "learning_rate": 1.7285562474243667e-06, "loss": 0.6594, "step": 14306 }, { "epoch": 0.7352759790317607, "grad_norm": 1.0369309186935425, "learning_rate": 1.7279269049172403e-06, "loss": 0.6617, "step": 14307 }, { "epoch": 0.7353273717751053, "grad_norm": 0.7648594975471497, "learning_rate": 1.7272976530657575e-06, "loss": 0.6213, "step": 14308 }, { "epoch": 0.73537876451845, "grad_norm": 1.1536024808883667, "learning_rate": 1.7266684918873516e-06, "loss": 0.7317, "step": 14309 }, { "epoch": 0.7354301572617946, "grad_norm": 1.1113276481628418, "learning_rate": 1.7260394213994536e-06, "loss": 0.7238, "step": 14310 }, { "epoch": 0.7354815500051393, "grad_norm": 0.8133655190467834, "learning_rate": 1.72541044161949e-06, "loss": 0.6997, "step": 14311 }, { "epoch": 0.7355329427484839, "grad_norm": 1.0221985578536987, "learning_rate": 1.724781552564892e-06, "loss": 0.697, "step": 14312 }, { "epoch": 0.7355843354918286, "grad_norm": 1.0915533304214478, "learning_rate": 1.7241527542530812e-06, "loss": 0.6756, "step": 14313 }, { "epoch": 0.7356357282351732, "grad_norm": 1.094248652458191, "learning_rate": 1.7235240467014785e-06, "loss": 0.7057, "step": 14314 }, { "epoch": 0.7356871209785179, "grad_norm": 1.0718410015106201, "learning_rate": 1.7228954299275035e-06, "loss": 0.7053, "step": 14315 }, { "epoch": 0.7357385137218625, "grad_norm": 1.017695665359497, "learning_rate": 1.7222669039485707e-06, "loss": 0.7073, "step": 14316 }, { "epoch": 0.7357899064652071, "grad_norm": 1.0942466259002686, "learning_rate": 1.721638468782097e-06, "loss": 0.6962, "step": 14317 }, { "epoch": 0.7358412992085518, "grad_norm": 1.107970118522644, "learning_rate": 1.7210101244454923e-06, "loss": 0.6773, "step": 14318 }, { "epoch": 0.7358926919518964, "grad_norm": 1.056878685951233, "learning_rate": 1.720381870956166e-06, "loss": 0.733, "step": 14319 }, { "epoch": 0.7359440846952411, "grad_norm": 0.9937541484832764, "learning_rate": 1.719753708331522e-06, "loss": 0.6672, "step": 14320 }, { "epoch": 0.7359954774385856, "grad_norm": 1.0694400072097778, "learning_rate": 1.7191256365889674e-06, "loss": 0.7122, "step": 14321 }, { "epoch": 0.7360468701819303, "grad_norm": 0.792662501335144, "learning_rate": 1.718497655745902e-06, "loss": 0.6199, "step": 14322 }, { "epoch": 0.7360982629252749, "grad_norm": 1.1098343133926392, "learning_rate": 1.7178697658197246e-06, "loss": 0.6926, "step": 14323 }, { "epoch": 0.7361496556686196, "grad_norm": 0.7159417271614075, "learning_rate": 1.71724196682783e-06, "loss": 0.6301, "step": 14324 }, { "epoch": 0.7362010484119642, "grad_norm": 1.1120346784591675, "learning_rate": 1.716614258787615e-06, "loss": 0.804, "step": 14325 }, { "epoch": 0.7362524411553089, "grad_norm": 1.102393627166748, "learning_rate": 1.7159866417164688e-06, "loss": 0.7112, "step": 14326 }, { "epoch": 0.7363038338986535, "grad_norm": 1.1136939525604248, "learning_rate": 1.7153591156317796e-06, "loss": 0.7227, "step": 14327 }, { "epoch": 0.7363552266419982, "grad_norm": 1.145032286643982, "learning_rate": 1.7147316805509367e-06, "loss": 0.7819, "step": 14328 }, { "epoch": 0.7364066193853428, "grad_norm": 1.1060703992843628, "learning_rate": 1.714104336491319e-06, "loss": 0.7155, "step": 14329 }, { "epoch": 0.7364580121286874, "grad_norm": 1.4844474792480469, "learning_rate": 1.7134770834703112e-06, "loss": 0.7068, "step": 14330 }, { "epoch": 0.7365094048720321, "grad_norm": 1.0718817710876465, "learning_rate": 1.7128499215052908e-06, "loss": 0.6977, "step": 14331 }, { "epoch": 0.7365607976153767, "grad_norm": 1.1282298564910889, "learning_rate": 1.712222850613634e-06, "loss": 0.7441, "step": 14332 }, { "epoch": 0.7366121903587214, "grad_norm": 1.1066234111785889, "learning_rate": 1.7115958708127123e-06, "loss": 0.7603, "step": 14333 }, { "epoch": 0.736663583102066, "grad_norm": 1.1001508235931396, "learning_rate": 1.7109689821199e-06, "loss": 0.7377, "step": 14334 }, { "epoch": 0.7367149758454107, "grad_norm": 1.1116912364959717, "learning_rate": 1.7103421845525648e-06, "loss": 0.6974, "step": 14335 }, { "epoch": 0.7367663685887552, "grad_norm": 0.6937499046325684, "learning_rate": 1.70971547812807e-06, "loss": 0.6376, "step": 14336 }, { "epoch": 0.7368177613320999, "grad_norm": 1.0903215408325195, "learning_rate": 1.7090888628637825e-06, "loss": 0.6859, "step": 14337 }, { "epoch": 0.7368691540754445, "grad_norm": 1.060340166091919, "learning_rate": 1.708462338777062e-06, "loss": 0.689, "step": 14338 }, { "epoch": 0.7369205468187892, "grad_norm": 1.1633929014205933, "learning_rate": 1.7078359058852673e-06, "loss": 0.7314, "step": 14339 }, { "epoch": 0.7369719395621338, "grad_norm": 0.7298997640609741, "learning_rate": 1.7072095642057512e-06, "loss": 0.6711, "step": 14340 }, { "epoch": 0.7370233323054785, "grad_norm": 1.212416648864746, "learning_rate": 1.7065833137558736e-06, "loss": 0.7083, "step": 14341 }, { "epoch": 0.7370747250488231, "grad_norm": 1.0263867378234863, "learning_rate": 1.7059571545529775e-06, "loss": 0.6839, "step": 14342 }, { "epoch": 0.7371261177921677, "grad_norm": 1.078334093093872, "learning_rate": 1.7053310866144162e-06, "loss": 0.6891, "step": 14343 }, { "epoch": 0.7371775105355124, "grad_norm": 1.0823578834533691, "learning_rate": 1.7047051099575345e-06, "loss": 0.7408, "step": 14344 }, { "epoch": 0.737228903278857, "grad_norm": 1.1141408681869507, "learning_rate": 1.704079224599674e-06, "loss": 0.6997, "step": 14345 }, { "epoch": 0.7372802960222017, "grad_norm": 1.0204076766967773, "learning_rate": 1.7034534305581785e-06, "loss": 0.6229, "step": 14346 }, { "epoch": 0.7373316887655463, "grad_norm": 1.1284871101379395, "learning_rate": 1.7028277278503841e-06, "loss": 0.7607, "step": 14347 }, { "epoch": 0.737383081508891, "grad_norm": 1.080053448677063, "learning_rate": 1.7022021164936265e-06, "loss": 0.7151, "step": 14348 }, { "epoch": 0.7374344742522356, "grad_norm": 1.167927622795105, "learning_rate": 1.7015765965052377e-06, "loss": 0.7632, "step": 14349 }, { "epoch": 0.7374858669955803, "grad_norm": 1.137816309928894, "learning_rate": 1.700951167902551e-06, "loss": 0.6982, "step": 14350 }, { "epoch": 0.7375372597389248, "grad_norm": 0.7807933688163757, "learning_rate": 1.700325830702893e-06, "loss": 0.6673, "step": 14351 }, { "epoch": 0.7375886524822695, "grad_norm": 1.0947612524032593, "learning_rate": 1.6997005849235897e-06, "loss": 0.6932, "step": 14352 }, { "epoch": 0.7376400452256141, "grad_norm": 0.917332112789154, "learning_rate": 1.6990754305819618e-06, "loss": 0.6411, "step": 14353 }, { "epoch": 0.7376914379689588, "grad_norm": 1.0428218841552734, "learning_rate": 1.6984503676953333e-06, "loss": 0.7369, "step": 14354 }, { "epoch": 0.7377428307123034, "grad_norm": 1.125120997428894, "learning_rate": 1.6978253962810204e-06, "loss": 0.6916, "step": 14355 }, { "epoch": 0.737794223455648, "grad_norm": 1.096920132637024, "learning_rate": 1.6972005163563387e-06, "loss": 0.6965, "step": 14356 }, { "epoch": 0.7378456161989927, "grad_norm": 1.1084500551223755, "learning_rate": 1.6965757279386003e-06, "loss": 0.7592, "step": 14357 }, { "epoch": 0.7378970089423373, "grad_norm": 1.150902271270752, "learning_rate": 1.695951031045115e-06, "loss": 0.6657, "step": 14358 }, { "epoch": 0.737948401685682, "grad_norm": 1.1046724319458008, "learning_rate": 1.6953264256931928e-06, "loss": 0.6891, "step": 14359 }, { "epoch": 0.7379997944290266, "grad_norm": 0.7009958028793335, "learning_rate": 1.6947019119001378e-06, "loss": 0.6395, "step": 14360 }, { "epoch": 0.7380511871723713, "grad_norm": 0.7393320798873901, "learning_rate": 1.694077489683253e-06, "loss": 0.6307, "step": 14361 }, { "epoch": 0.7381025799157159, "grad_norm": 1.0800014734268188, "learning_rate": 1.6934531590598363e-06, "loss": 0.711, "step": 14362 }, { "epoch": 0.7381539726590606, "grad_norm": 0.7191575765609741, "learning_rate": 1.6928289200471893e-06, "loss": 0.6079, "step": 14363 }, { "epoch": 0.7382053654024052, "grad_norm": 1.0938640832901, "learning_rate": 1.6922047726626045e-06, "loss": 0.7293, "step": 14364 }, { "epoch": 0.7382567581457499, "grad_norm": 1.196191430091858, "learning_rate": 1.6915807169233756e-06, "loss": 0.7094, "step": 14365 }, { "epoch": 0.7383081508890944, "grad_norm": 0.7167832851409912, "learning_rate": 1.6909567528467897e-06, "loss": 0.6422, "step": 14366 }, { "epoch": 0.738359543632439, "grad_norm": 0.7350412011146545, "learning_rate": 1.6903328804501385e-06, "loss": 0.6648, "step": 14367 }, { "epoch": 0.7384109363757837, "grad_norm": 0.6949954032897949, "learning_rate": 1.6897090997507054e-06, "loss": 0.7046, "step": 14368 }, { "epoch": 0.7384623291191283, "grad_norm": 1.0517213344573975, "learning_rate": 1.68908541076577e-06, "loss": 0.7248, "step": 14369 }, { "epoch": 0.738513721862473, "grad_norm": 1.0870717763900757, "learning_rate": 1.6884618135126179e-06, "loss": 0.7081, "step": 14370 }, { "epoch": 0.7385651146058176, "grad_norm": 1.0994977951049805, "learning_rate": 1.6878383080085203e-06, "loss": 0.7038, "step": 14371 }, { "epoch": 0.7386165073491623, "grad_norm": 1.1249569654464722, "learning_rate": 1.6872148942707561e-06, "loss": 0.6783, "step": 14372 }, { "epoch": 0.7386679000925069, "grad_norm": 1.1715688705444336, "learning_rate": 1.686591572316596e-06, "loss": 0.6919, "step": 14373 }, { "epoch": 0.7387192928358516, "grad_norm": 1.0909693241119385, "learning_rate": 1.68596834216331e-06, "loss": 0.6743, "step": 14374 }, { "epoch": 0.7387706855791962, "grad_norm": 1.146000623703003, "learning_rate": 1.6853452038281631e-06, "loss": 0.7643, "step": 14375 }, { "epoch": 0.7388220783225409, "grad_norm": 1.1377971172332764, "learning_rate": 1.6847221573284234e-06, "loss": 0.7406, "step": 14376 }, { "epoch": 0.7388734710658855, "grad_norm": 0.9911730885505676, "learning_rate": 1.6840992026813518e-06, "loss": 0.6448, "step": 14377 }, { "epoch": 0.7389248638092302, "grad_norm": 1.0707061290740967, "learning_rate": 1.6834763399042054e-06, "loss": 0.6871, "step": 14378 }, { "epoch": 0.7389762565525748, "grad_norm": 1.0724838972091675, "learning_rate": 1.6828535690142467e-06, "loss": 0.7014, "step": 14379 }, { "epoch": 0.7390276492959195, "grad_norm": 1.0569941997528076, "learning_rate": 1.682230890028723e-06, "loss": 0.641, "step": 14380 }, { "epoch": 0.7390790420392641, "grad_norm": 1.000306248664856, "learning_rate": 1.6816083029648916e-06, "loss": 0.6758, "step": 14381 }, { "epoch": 0.7391304347826086, "grad_norm": 1.0481690168380737, "learning_rate": 1.6809858078399983e-06, "loss": 0.7056, "step": 14382 }, { "epoch": 0.7391818275259533, "grad_norm": 1.120600938796997, "learning_rate": 1.6803634046712946e-06, "loss": 0.6824, "step": 14383 }, { "epoch": 0.7392332202692979, "grad_norm": 1.11635160446167, "learning_rate": 1.6797410934760184e-06, "loss": 0.7611, "step": 14384 }, { "epoch": 0.7392846130126426, "grad_norm": 1.0553030967712402, "learning_rate": 1.679118874271416e-06, "loss": 0.6849, "step": 14385 }, { "epoch": 0.7393360057559872, "grad_norm": 1.143857717514038, "learning_rate": 1.6784967470747255e-06, "loss": 0.7238, "step": 14386 }, { "epoch": 0.7393873984993319, "grad_norm": 1.0616540908813477, "learning_rate": 1.6778747119031812e-06, "loss": 0.6871, "step": 14387 }, { "epoch": 0.7394387912426765, "grad_norm": 0.7484264969825745, "learning_rate": 1.6772527687740208e-06, "loss": 0.6454, "step": 14388 }, { "epoch": 0.7394901839860212, "grad_norm": 0.7689594030380249, "learning_rate": 1.6766309177044743e-06, "loss": 0.6393, "step": 14389 }, { "epoch": 0.7395415767293658, "grad_norm": 1.110905647277832, "learning_rate": 1.67600915871177e-06, "loss": 0.6834, "step": 14390 }, { "epoch": 0.7395929694727105, "grad_norm": 0.7399776577949524, "learning_rate": 1.675387491813133e-06, "loss": 0.6214, "step": 14391 }, { "epoch": 0.7396443622160551, "grad_norm": 1.1431277990341187, "learning_rate": 1.674765917025792e-06, "loss": 0.7284, "step": 14392 }, { "epoch": 0.7396957549593998, "grad_norm": 0.7543904781341553, "learning_rate": 1.6741444343669616e-06, "loss": 0.6676, "step": 14393 }, { "epoch": 0.7397471477027444, "grad_norm": 1.1097379922866821, "learning_rate": 1.6735230438538657e-06, "loss": 0.6909, "step": 14394 }, { "epoch": 0.7397985404460891, "grad_norm": 1.19999098777771, "learning_rate": 1.6729017455037188e-06, "loss": 0.6795, "step": 14395 }, { "epoch": 0.7398499331894337, "grad_norm": 0.8068374395370483, "learning_rate": 1.6722805393337328e-06, "loss": 0.6355, "step": 14396 }, { "epoch": 0.7399013259327782, "grad_norm": 1.103184461593628, "learning_rate": 1.6716594253611218e-06, "loss": 0.6873, "step": 14397 }, { "epoch": 0.7399527186761229, "grad_norm": 1.0564887523651123, "learning_rate": 1.6710384036030913e-06, "loss": 0.6572, "step": 14398 }, { "epoch": 0.7400041114194675, "grad_norm": 1.0392671823501587, "learning_rate": 1.670417474076852e-06, "loss": 0.6831, "step": 14399 }, { "epoch": 0.7400555041628122, "grad_norm": 1.2166334390640259, "learning_rate": 1.669796636799601e-06, "loss": 0.702, "step": 14400 }, { "epoch": 0.7401068969061568, "grad_norm": 1.0539792776107788, "learning_rate": 1.6691758917885436e-06, "loss": 0.7066, "step": 14401 }, { "epoch": 0.7401582896495015, "grad_norm": 1.0950050354003906, "learning_rate": 1.6685552390608773e-06, "loss": 0.7173, "step": 14402 }, { "epoch": 0.7402096823928461, "grad_norm": 1.070680022239685, "learning_rate": 1.6679346786337968e-06, "loss": 0.6797, "step": 14403 }, { "epoch": 0.7402610751361908, "grad_norm": 1.0760188102722168, "learning_rate": 1.6673142105244944e-06, "loss": 0.7342, "step": 14404 }, { "epoch": 0.7403124678795354, "grad_norm": 1.0499087572097778, "learning_rate": 1.6666938347501639e-06, "loss": 0.6629, "step": 14405 }, { "epoch": 0.7403638606228801, "grad_norm": 1.176893711090088, "learning_rate": 1.6660735513279908e-06, "loss": 0.67, "step": 14406 }, { "epoch": 0.7404152533662247, "grad_norm": 1.0847781896591187, "learning_rate": 1.66545336027516e-06, "loss": 0.7105, "step": 14407 }, { "epoch": 0.7404666461095694, "grad_norm": 1.1286346912384033, "learning_rate": 1.6648332616088591e-06, "loss": 0.713, "step": 14408 }, { "epoch": 0.740518038852914, "grad_norm": 1.0948981046676636, "learning_rate": 1.6642132553462614e-06, "loss": 0.739, "step": 14409 }, { "epoch": 0.7405694315962587, "grad_norm": 1.1173588037490845, "learning_rate": 1.6635933415045508e-06, "loss": 0.7488, "step": 14410 }, { "epoch": 0.7406208243396033, "grad_norm": 1.100874423980713, "learning_rate": 1.6629735201008995e-06, "loss": 0.6462, "step": 14411 }, { "epoch": 0.7406722170829478, "grad_norm": 1.084777593612671, "learning_rate": 1.6623537911524811e-06, "loss": 0.7555, "step": 14412 }, { "epoch": 0.7407236098262925, "grad_norm": 1.1028956174850464, "learning_rate": 1.6617341546764637e-06, "loss": 0.7103, "step": 14413 }, { "epoch": 0.7407750025696371, "grad_norm": 1.0656365156173706, "learning_rate": 1.6611146106900188e-06, "loss": 0.703, "step": 14414 }, { "epoch": 0.7408263953129818, "grad_norm": 1.5592055320739746, "learning_rate": 1.6604951592103085e-06, "loss": 0.6662, "step": 14415 }, { "epoch": 0.7408777880563264, "grad_norm": 1.1778279542922974, "learning_rate": 1.6598758002544968e-06, "loss": 0.7409, "step": 14416 }, { "epoch": 0.7409291807996711, "grad_norm": 0.9984576106071472, "learning_rate": 1.659256533839741e-06, "loss": 0.6356, "step": 14417 }, { "epoch": 0.7409805735430157, "grad_norm": 1.0771429538726807, "learning_rate": 1.658637359983201e-06, "loss": 0.6912, "step": 14418 }, { "epoch": 0.7410319662863604, "grad_norm": 1.1313060522079468, "learning_rate": 1.6580182787020316e-06, "loss": 0.6706, "step": 14419 }, { "epoch": 0.741083359029705, "grad_norm": 1.1150556802749634, "learning_rate": 1.6573992900133829e-06, "loss": 0.7111, "step": 14420 }, { "epoch": 0.7411347517730497, "grad_norm": 0.7771188616752625, "learning_rate": 1.6567803939344079e-06, "loss": 0.679, "step": 14421 }, { "epoch": 0.7411861445163943, "grad_norm": 1.1444941759109497, "learning_rate": 1.6561615904822492e-06, "loss": 0.6856, "step": 14422 }, { "epoch": 0.741237537259739, "grad_norm": 0.7549691200256348, "learning_rate": 1.6555428796740547e-06, "loss": 0.6639, "step": 14423 }, { "epoch": 0.7412889300030836, "grad_norm": 1.0170756578445435, "learning_rate": 1.6549242615269657e-06, "loss": 0.6378, "step": 14424 }, { "epoch": 0.7413403227464282, "grad_norm": 0.9287945628166199, "learning_rate": 1.6543057360581205e-06, "loss": 0.6573, "step": 14425 }, { "epoch": 0.7413917154897729, "grad_norm": 1.1105414628982544, "learning_rate": 1.6536873032846557e-06, "loss": 0.7085, "step": 14426 }, { "epoch": 0.7414431082331174, "grad_norm": 1.11565363407135, "learning_rate": 1.6530689632237074e-06, "loss": 0.7008, "step": 14427 }, { "epoch": 0.7414945009764621, "grad_norm": 1.0347980260849, "learning_rate": 1.6524507158924059e-06, "loss": 0.6648, "step": 14428 }, { "epoch": 0.7415458937198067, "grad_norm": 1.012943983078003, "learning_rate": 1.6518325613078796e-06, "loss": 0.7093, "step": 14429 }, { "epoch": 0.7415972864631514, "grad_norm": 0.7016245722770691, "learning_rate": 1.651214499487257e-06, "loss": 0.6068, "step": 14430 }, { "epoch": 0.741648679206496, "grad_norm": 1.0012305974960327, "learning_rate": 1.6505965304476612e-06, "loss": 0.6458, "step": 14431 }, { "epoch": 0.7417000719498407, "grad_norm": 0.7446156740188599, "learning_rate": 1.6499786542062135e-06, "loss": 0.6415, "step": 14432 }, { "epoch": 0.7417514646931853, "grad_norm": 1.1068799495697021, "learning_rate": 1.6493608707800307e-06, "loss": 0.7141, "step": 14433 }, { "epoch": 0.74180285743653, "grad_norm": 1.1145105361938477, "learning_rate": 1.648743180186234e-06, "loss": 0.693, "step": 14434 }, { "epoch": 0.7418542501798746, "grad_norm": 0.6873275637626648, "learning_rate": 1.6481255824419312e-06, "loss": 0.6775, "step": 14435 }, { "epoch": 0.7419056429232193, "grad_norm": 0.6833871603012085, "learning_rate": 1.647508077564237e-06, "loss": 0.6518, "step": 14436 }, { "epoch": 0.7419570356665639, "grad_norm": 1.0355788469314575, "learning_rate": 1.6468906655702598e-06, "loss": 0.6826, "step": 14437 }, { "epoch": 0.7420084284099085, "grad_norm": 1.0751303434371948, "learning_rate": 1.646273346477103e-06, "loss": 0.6574, "step": 14438 }, { "epoch": 0.7420598211532532, "grad_norm": 0.7193439602851868, "learning_rate": 1.6456561203018735e-06, "loss": 0.6668, "step": 14439 }, { "epoch": 0.7421112138965978, "grad_norm": 0.657912015914917, "learning_rate": 1.6450389870616701e-06, "loss": 0.6613, "step": 14440 }, { "epoch": 0.7421626066399425, "grad_norm": 1.0806620121002197, "learning_rate": 1.644421946773591e-06, "loss": 0.6639, "step": 14441 }, { "epoch": 0.742213999383287, "grad_norm": 1.0534764528274536, "learning_rate": 1.6438049994547312e-06, "loss": 0.6853, "step": 14442 }, { "epoch": 0.7422653921266317, "grad_norm": 1.0801892280578613, "learning_rate": 1.6431881451221876e-06, "loss": 0.6718, "step": 14443 }, { "epoch": 0.7423167848699763, "grad_norm": 0.8470436334609985, "learning_rate": 1.6425713837930446e-06, "loss": 0.6289, "step": 14444 }, { "epoch": 0.742368177613321, "grad_norm": 1.0590234994888306, "learning_rate": 1.6419547154843945e-06, "loss": 0.7243, "step": 14445 }, { "epoch": 0.7424195703566656, "grad_norm": 1.1374567747116089, "learning_rate": 1.6413381402133199e-06, "loss": 0.7092, "step": 14446 }, { "epoch": 0.7424709631000103, "grad_norm": 1.0237888097763062, "learning_rate": 1.640721657996907e-06, "loss": 0.7368, "step": 14447 }, { "epoch": 0.7425223558433549, "grad_norm": 0.7743707299232483, "learning_rate": 1.640105268852234e-06, "loss": 0.6768, "step": 14448 }, { "epoch": 0.7425737485866996, "grad_norm": 1.0602787733078003, "learning_rate": 1.6394889727963765e-06, "loss": 0.7498, "step": 14449 }, { "epoch": 0.7426251413300442, "grad_norm": 1.0571208000183105, "learning_rate": 1.6388727698464151e-06, "loss": 0.6865, "step": 14450 }, { "epoch": 0.7426765340733888, "grad_norm": 1.067276120185852, "learning_rate": 1.638256660019415e-06, "loss": 0.7306, "step": 14451 }, { "epoch": 0.7427279268167335, "grad_norm": 1.0972620248794556, "learning_rate": 1.6376406433324521e-06, "loss": 0.7325, "step": 14452 }, { "epoch": 0.7427793195600781, "grad_norm": 1.0665996074676514, "learning_rate": 1.6370247198025908e-06, "loss": 0.6595, "step": 14453 }, { "epoch": 0.7428307123034228, "grad_norm": 1.0487874746322632, "learning_rate": 1.6364088894468966e-06, "loss": 0.6999, "step": 14454 }, { "epoch": 0.7428821050467674, "grad_norm": 1.130619764328003, "learning_rate": 1.6357931522824294e-06, "loss": 0.7126, "step": 14455 }, { "epoch": 0.7429334977901121, "grad_norm": 1.067199468612671, "learning_rate": 1.6351775083262527e-06, "loss": 0.7005, "step": 14456 }, { "epoch": 0.7429848905334566, "grad_norm": 0.9996190667152405, "learning_rate": 1.6345619575954213e-06, "loss": 0.7012, "step": 14457 }, { "epoch": 0.7430362832768013, "grad_norm": 1.0502296686172485, "learning_rate": 1.6339465001069881e-06, "loss": 0.6348, "step": 14458 }, { "epoch": 0.7430876760201459, "grad_norm": 1.1761276721954346, "learning_rate": 1.633331135878008e-06, "loss": 0.694, "step": 14459 }, { "epoch": 0.7431390687634906, "grad_norm": 0.6698073148727417, "learning_rate": 1.6327158649255292e-06, "loss": 0.6386, "step": 14460 }, { "epoch": 0.7431904615068352, "grad_norm": 1.075586199760437, "learning_rate": 1.6321006872665973e-06, "loss": 0.7532, "step": 14461 }, { "epoch": 0.7432418542501799, "grad_norm": 1.1371773481369019, "learning_rate": 1.6314856029182551e-06, "loss": 0.7344, "step": 14462 }, { "epoch": 0.7432932469935245, "grad_norm": 0.7527408599853516, "learning_rate": 1.6308706118975497e-06, "loss": 0.6385, "step": 14463 }, { "epoch": 0.7433446397368692, "grad_norm": 1.0566409826278687, "learning_rate": 1.6302557142215126e-06, "loss": 0.744, "step": 14464 }, { "epoch": 0.7433960324802138, "grad_norm": 1.1459152698516846, "learning_rate": 1.6296409099071847e-06, "loss": 0.656, "step": 14465 }, { "epoch": 0.7434474252235584, "grad_norm": 1.0621795654296875, "learning_rate": 1.6290261989715988e-06, "loss": 0.7039, "step": 14466 }, { "epoch": 0.7434988179669031, "grad_norm": 1.0709971189498901, "learning_rate": 1.6284115814317858e-06, "loss": 0.6479, "step": 14467 }, { "epoch": 0.7435502107102477, "grad_norm": 1.2010709047317505, "learning_rate": 1.6277970573047725e-06, "loss": 0.6964, "step": 14468 }, { "epoch": 0.7436016034535924, "grad_norm": 1.0765489339828491, "learning_rate": 1.6271826266075879e-06, "loss": 0.6754, "step": 14469 }, { "epoch": 0.743652996196937, "grad_norm": 1.0910835266113281, "learning_rate": 1.6265682893572542e-06, "loss": 0.6746, "step": 14470 }, { "epoch": 0.7437043889402817, "grad_norm": 1.1003977060317993, "learning_rate": 1.6259540455707905e-06, "loss": 0.6998, "step": 14471 }, { "epoch": 0.7437557816836263, "grad_norm": 1.060490369796753, "learning_rate": 1.6253398952652195e-06, "loss": 0.6764, "step": 14472 }, { "epoch": 0.7438071744269709, "grad_norm": 0.7307368516921997, "learning_rate": 1.6247258384575498e-06, "loss": 0.6816, "step": 14473 }, { "epoch": 0.7438585671703155, "grad_norm": 1.0595277547836304, "learning_rate": 1.6241118751648e-06, "loss": 0.6952, "step": 14474 }, { "epoch": 0.7439099599136602, "grad_norm": 1.1569379568099976, "learning_rate": 1.6234980054039772e-06, "loss": 0.6301, "step": 14475 }, { "epoch": 0.7439613526570048, "grad_norm": 1.0260765552520752, "learning_rate": 1.6228842291920944e-06, "loss": 0.6693, "step": 14476 }, { "epoch": 0.7440127454003495, "grad_norm": 0.7341328263282776, "learning_rate": 1.622270546546149e-06, "loss": 0.6419, "step": 14477 }, { "epoch": 0.7440641381436941, "grad_norm": 1.0996835231781006, "learning_rate": 1.6216569574831498e-06, "loss": 0.718, "step": 14478 }, { "epoch": 0.7441155308870387, "grad_norm": 1.1043181419372559, "learning_rate": 1.6210434620200943e-06, "loss": 0.712, "step": 14479 }, { "epoch": 0.7441669236303834, "grad_norm": 1.1032187938690186, "learning_rate": 1.6204300601739793e-06, "loss": 0.7516, "step": 14480 }, { "epoch": 0.744218316373728, "grad_norm": 1.1184886693954468, "learning_rate": 1.619816751961802e-06, "loss": 0.7377, "step": 14481 }, { "epoch": 0.7442697091170727, "grad_norm": 1.0468957424163818, "learning_rate": 1.6192035374005527e-06, "loss": 0.6952, "step": 14482 }, { "epoch": 0.7443211018604173, "grad_norm": 1.1374778747558594, "learning_rate": 1.6185904165072224e-06, "loss": 0.6656, "step": 14483 }, { "epoch": 0.744372494603762, "grad_norm": 1.1544244289398193, "learning_rate": 1.6179773892987954e-06, "loss": 0.7108, "step": 14484 }, { "epoch": 0.7444238873471066, "grad_norm": 1.0571223497390747, "learning_rate": 1.6173644557922618e-06, "loss": 0.7322, "step": 14485 }, { "epoch": 0.7444752800904513, "grad_norm": 1.1012685298919678, "learning_rate": 1.6167516160045966e-06, "loss": 0.7105, "step": 14486 }, { "epoch": 0.7445266728337959, "grad_norm": 1.0537281036376953, "learning_rate": 1.6161388699527837e-06, "loss": 0.7002, "step": 14487 }, { "epoch": 0.7445780655771405, "grad_norm": 1.0902161598205566, "learning_rate": 1.6155262176537984e-06, "loss": 0.6969, "step": 14488 }, { "epoch": 0.7446294583204851, "grad_norm": 0.7881810069084167, "learning_rate": 1.6149136591246128e-06, "loss": 0.6332, "step": 14489 }, { "epoch": 0.7446808510638298, "grad_norm": 1.1265263557434082, "learning_rate": 1.6143011943822023e-06, "loss": 0.7117, "step": 14490 }, { "epoch": 0.7447322438071744, "grad_norm": 1.0892252922058105, "learning_rate": 1.6136888234435316e-06, "loss": 0.7196, "step": 14491 }, { "epoch": 0.744783636550519, "grad_norm": 1.1547425985336304, "learning_rate": 1.6130765463255727e-06, "loss": 0.7087, "step": 14492 }, { "epoch": 0.7448350292938637, "grad_norm": 1.1573731899261475, "learning_rate": 1.6124643630452824e-06, "loss": 0.6912, "step": 14493 }, { "epoch": 0.7448864220372083, "grad_norm": 1.1074811220169067, "learning_rate": 1.6118522736196268e-06, "loss": 0.7017, "step": 14494 }, { "epoch": 0.744937814780553, "grad_norm": 1.10630464553833, "learning_rate": 1.6112402780655628e-06, "loss": 0.7021, "step": 14495 }, { "epoch": 0.7449892075238976, "grad_norm": 1.064739465713501, "learning_rate": 1.6106283764000457e-06, "loss": 0.6311, "step": 14496 }, { "epoch": 0.7450406002672423, "grad_norm": 1.0302397012710571, "learning_rate": 1.6100165686400276e-06, "loss": 0.6682, "step": 14497 }, { "epoch": 0.7450919930105869, "grad_norm": 1.0948736667633057, "learning_rate": 1.6094048548024627e-06, "loss": 0.7271, "step": 14498 }, { "epoch": 0.7451433857539316, "grad_norm": 1.002073049545288, "learning_rate": 1.6087932349042972e-06, "loss": 0.6741, "step": 14499 }, { "epoch": 0.7451947784972762, "grad_norm": 0.7170271873474121, "learning_rate": 1.6081817089624747e-06, "loss": 0.6165, "step": 14500 }, { "epoch": 0.7452461712406209, "grad_norm": 1.085148572921753, "learning_rate": 1.6075702769939428e-06, "loss": 0.6686, "step": 14501 }, { "epoch": 0.7452975639839655, "grad_norm": 1.159510612487793, "learning_rate": 1.6069589390156354e-06, "loss": 0.6885, "step": 14502 }, { "epoch": 0.74534895672731, "grad_norm": 0.6998153924942017, "learning_rate": 1.6063476950444956e-06, "loss": 0.6614, "step": 14503 }, { "epoch": 0.7454003494706547, "grad_norm": 1.1275975704193115, "learning_rate": 1.6057365450974565e-06, "loss": 0.6925, "step": 14504 }, { "epoch": 0.7454517422139993, "grad_norm": 0.7556573748588562, "learning_rate": 1.6051254891914503e-06, "loss": 0.6465, "step": 14505 }, { "epoch": 0.745503134957344, "grad_norm": 1.081508755683899, "learning_rate": 1.6045145273434049e-06, "loss": 0.6697, "step": 14506 }, { "epoch": 0.7455545277006886, "grad_norm": 1.098134160041809, "learning_rate": 1.6039036595702516e-06, "loss": 0.6929, "step": 14507 }, { "epoch": 0.7456059204440333, "grad_norm": 1.0652738809585571, "learning_rate": 1.6032928858889129e-06, "loss": 0.7027, "step": 14508 }, { "epoch": 0.7456573131873779, "grad_norm": 1.0449364185333252, "learning_rate": 1.6026822063163094e-06, "loss": 0.714, "step": 14509 }, { "epoch": 0.7457087059307226, "grad_norm": 1.1428554058074951, "learning_rate": 1.6020716208693638e-06, "loss": 0.7099, "step": 14510 }, { "epoch": 0.7457600986740672, "grad_norm": 1.1615890264511108, "learning_rate": 1.6014611295649913e-06, "loss": 0.6992, "step": 14511 }, { "epoch": 0.7458114914174119, "grad_norm": 1.09706711769104, "learning_rate": 1.6008507324201057e-06, "loss": 0.7401, "step": 14512 }, { "epoch": 0.7458628841607565, "grad_norm": 1.0818554162979126, "learning_rate": 1.6002404294516172e-06, "loss": 0.6539, "step": 14513 }, { "epoch": 0.7459142769041012, "grad_norm": 1.068363904953003, "learning_rate": 1.5996302206764397e-06, "loss": 0.696, "step": 14514 }, { "epoch": 0.7459656696474458, "grad_norm": 1.1185141801834106, "learning_rate": 1.5990201061114735e-06, "loss": 0.6795, "step": 14515 }, { "epoch": 0.7460170623907905, "grad_norm": 1.102892279624939, "learning_rate": 1.5984100857736262e-06, "loss": 0.6981, "step": 14516 }, { "epoch": 0.7460684551341351, "grad_norm": 1.1448637247085571, "learning_rate": 1.5978001596797982e-06, "loss": 0.7107, "step": 14517 }, { "epoch": 0.7461198478774796, "grad_norm": 1.0524957180023193, "learning_rate": 1.5971903278468876e-06, "loss": 0.7193, "step": 14518 }, { "epoch": 0.7461712406208243, "grad_norm": 1.0600471496582031, "learning_rate": 1.5965805902917887e-06, "loss": 0.709, "step": 14519 }, { "epoch": 0.7462226333641689, "grad_norm": 0.7105703353881836, "learning_rate": 1.5959709470313983e-06, "loss": 0.6114, "step": 14520 }, { "epoch": 0.7462740261075136, "grad_norm": 1.126447081565857, "learning_rate": 1.595361398082605e-06, "loss": 0.6907, "step": 14521 }, { "epoch": 0.7463254188508582, "grad_norm": 0.76372891664505, "learning_rate": 1.594751943462296e-06, "loss": 0.6258, "step": 14522 }, { "epoch": 0.7463768115942029, "grad_norm": 1.1397889852523804, "learning_rate": 1.5941425831873598e-06, "loss": 0.7151, "step": 14523 }, { "epoch": 0.7464282043375475, "grad_norm": 1.0411078929901123, "learning_rate": 1.593533317274677e-06, "loss": 0.6991, "step": 14524 }, { "epoch": 0.7464795970808922, "grad_norm": 1.0593031644821167, "learning_rate": 1.5929241457411287e-06, "loss": 0.6751, "step": 14525 }, { "epoch": 0.7465309898242368, "grad_norm": 1.074785828590393, "learning_rate": 1.5923150686035904e-06, "loss": 0.7108, "step": 14526 }, { "epoch": 0.7465823825675815, "grad_norm": 1.077697992324829, "learning_rate": 1.5917060858789425e-06, "loss": 0.6936, "step": 14527 }, { "epoch": 0.7466337753109261, "grad_norm": 1.0711596012115479, "learning_rate": 1.59109719758405e-06, "loss": 0.7351, "step": 14528 }, { "epoch": 0.7466851680542708, "grad_norm": 1.1203749179840088, "learning_rate": 1.5904884037357881e-06, "loss": 0.7026, "step": 14529 }, { "epoch": 0.7467365607976154, "grad_norm": 1.076998233795166, "learning_rate": 1.5898797043510222e-06, "loss": 0.7283, "step": 14530 }, { "epoch": 0.7467879535409601, "grad_norm": 1.0998997688293457, "learning_rate": 1.589271099446616e-06, "loss": 0.68, "step": 14531 }, { "epoch": 0.7468393462843047, "grad_norm": 0.846117377281189, "learning_rate": 1.588662589039433e-06, "loss": 0.6679, "step": 14532 }, { "epoch": 0.7468907390276492, "grad_norm": 1.0174329280853271, "learning_rate": 1.5880541731463328e-06, "loss": 0.6802, "step": 14533 }, { "epoch": 0.7469421317709939, "grad_norm": 0.7177878618240356, "learning_rate": 1.5874458517841706e-06, "loss": 0.645, "step": 14534 }, { "epoch": 0.7469935245143385, "grad_norm": 1.0515228509902954, "learning_rate": 1.5868376249697997e-06, "loss": 0.6867, "step": 14535 }, { "epoch": 0.7470449172576832, "grad_norm": 1.0683770179748535, "learning_rate": 1.5862294927200767e-06, "loss": 0.7196, "step": 14536 }, { "epoch": 0.7470963100010278, "grad_norm": 1.0987048149108887, "learning_rate": 1.5856214550518428e-06, "loss": 0.72, "step": 14537 }, { "epoch": 0.7471477027443725, "grad_norm": 1.0804156064987183, "learning_rate": 1.5850135119819493e-06, "loss": 0.6493, "step": 14538 }, { "epoch": 0.7471990954877171, "grad_norm": 1.1463483572006226, "learning_rate": 1.5844056635272375e-06, "loss": 0.7099, "step": 14539 }, { "epoch": 0.7472504882310618, "grad_norm": 1.0617399215698242, "learning_rate": 1.5837979097045513e-06, "loss": 0.7329, "step": 14540 }, { "epoch": 0.7473018809744064, "grad_norm": 1.160759687423706, "learning_rate": 1.5831902505307266e-06, "loss": 0.6865, "step": 14541 }, { "epoch": 0.7473532737177511, "grad_norm": 1.0092291831970215, "learning_rate": 1.5825826860225984e-06, "loss": 0.6861, "step": 14542 }, { "epoch": 0.7474046664610957, "grad_norm": 0.7182475328445435, "learning_rate": 1.5819752161970041e-06, "loss": 0.6849, "step": 14543 }, { "epoch": 0.7474560592044404, "grad_norm": 1.113362431526184, "learning_rate": 1.5813678410707683e-06, "loss": 0.6957, "step": 14544 }, { "epoch": 0.747507451947785, "grad_norm": 1.0936427116394043, "learning_rate": 1.5807605606607234e-06, "loss": 0.7136, "step": 14545 }, { "epoch": 0.7475588446911297, "grad_norm": 1.088171362876892, "learning_rate": 1.5801533749836928e-06, "loss": 0.7113, "step": 14546 }, { "epoch": 0.7476102374344743, "grad_norm": 1.0859395265579224, "learning_rate": 1.5795462840564995e-06, "loss": 0.7175, "step": 14547 }, { "epoch": 0.7476616301778188, "grad_norm": 1.073412299156189, "learning_rate": 1.5789392878959613e-06, "loss": 0.7141, "step": 14548 }, { "epoch": 0.7477130229211635, "grad_norm": 1.0909572839736938, "learning_rate": 1.5783323865189e-06, "loss": 0.6582, "step": 14549 }, { "epoch": 0.7477644156645081, "grad_norm": 1.123697280883789, "learning_rate": 1.577725579942127e-06, "loss": 0.694, "step": 14550 }, { "epoch": 0.7478158084078528, "grad_norm": 1.052595615386963, "learning_rate": 1.577118868182454e-06, "loss": 0.664, "step": 14551 }, { "epoch": 0.7478672011511974, "grad_norm": 1.0164849758148193, "learning_rate": 1.576512251256695e-06, "loss": 0.7173, "step": 14552 }, { "epoch": 0.7479185938945421, "grad_norm": 1.3022266626358032, "learning_rate": 1.5759057291816504e-06, "loss": 0.7396, "step": 14553 }, { "epoch": 0.7479699866378867, "grad_norm": 1.105928659439087, "learning_rate": 1.5752993019741293e-06, "loss": 0.7448, "step": 14554 }, { "epoch": 0.7480213793812314, "grad_norm": 0.6971480250358582, "learning_rate": 1.5746929696509295e-06, "loss": 0.6364, "step": 14555 }, { "epoch": 0.748072772124576, "grad_norm": 1.050684928894043, "learning_rate": 1.574086732228856e-06, "loss": 0.7055, "step": 14556 }, { "epoch": 0.7481241648679207, "grad_norm": 0.8172042965888977, "learning_rate": 1.5734805897246975e-06, "loss": 0.6546, "step": 14557 }, { "epoch": 0.7481755576112653, "grad_norm": 1.0912295579910278, "learning_rate": 1.5728745421552533e-06, "loss": 0.6857, "step": 14558 }, { "epoch": 0.74822695035461, "grad_norm": 1.078141212463379, "learning_rate": 1.572268589537313e-06, "loss": 0.6535, "step": 14559 }, { "epoch": 0.7482783430979546, "grad_norm": 1.1294174194335938, "learning_rate": 1.5716627318876625e-06, "loss": 0.7245, "step": 14560 }, { "epoch": 0.7483297358412992, "grad_norm": 1.2866188287734985, "learning_rate": 1.5710569692230915e-06, "loss": 0.6683, "step": 14561 }, { "epoch": 0.7483811285846439, "grad_norm": 1.177375316619873, "learning_rate": 1.5704513015603816e-06, "loss": 0.7064, "step": 14562 }, { "epoch": 0.7484325213279885, "grad_norm": 1.1644792556762695, "learning_rate": 1.5698457289163133e-06, "loss": 0.7272, "step": 14563 }, { "epoch": 0.7484839140713331, "grad_norm": 1.1118727922439575, "learning_rate": 1.5692402513076626e-06, "loss": 0.7249, "step": 14564 }, { "epoch": 0.7485353068146777, "grad_norm": 1.0487784147262573, "learning_rate": 1.5686348687512104e-06, "loss": 0.6323, "step": 14565 }, { "epoch": 0.7485866995580224, "grad_norm": 1.0837593078613281, "learning_rate": 1.568029581263722e-06, "loss": 0.634, "step": 14566 }, { "epoch": 0.748638092301367, "grad_norm": 0.6909868121147156, "learning_rate": 1.5674243888619723e-06, "loss": 0.6632, "step": 14567 }, { "epoch": 0.7486894850447117, "grad_norm": 1.0808559656143188, "learning_rate": 1.566819291562726e-06, "loss": 0.697, "step": 14568 }, { "epoch": 0.7487408777880563, "grad_norm": 1.1089955568313599, "learning_rate": 1.5662142893827526e-06, "loss": 0.657, "step": 14569 }, { "epoch": 0.748792270531401, "grad_norm": 1.077532172203064, "learning_rate": 1.5656093823388075e-06, "loss": 0.6939, "step": 14570 }, { "epoch": 0.7488436632747456, "grad_norm": 1.0215346813201904, "learning_rate": 1.5650045704476551e-06, "loss": 0.655, "step": 14571 }, { "epoch": 0.7488950560180903, "grad_norm": 1.1013273000717163, "learning_rate": 1.5643998537260508e-06, "loss": 0.7004, "step": 14572 }, { "epoch": 0.7489464487614349, "grad_norm": 1.0711328983306885, "learning_rate": 1.5637952321907468e-06, "loss": 0.7203, "step": 14573 }, { "epoch": 0.7489978415047795, "grad_norm": 1.0642130374908447, "learning_rate": 1.5631907058584984e-06, "loss": 0.6854, "step": 14574 }, { "epoch": 0.7490492342481242, "grad_norm": 1.0488909482955933, "learning_rate": 1.5625862747460525e-06, "loss": 0.6764, "step": 14575 }, { "epoch": 0.7491006269914688, "grad_norm": 1.0491408109664917, "learning_rate": 1.5619819388701556e-06, "loss": 0.6731, "step": 14576 }, { "epoch": 0.7491520197348135, "grad_norm": 0.727572500705719, "learning_rate": 1.5613776982475498e-06, "loss": 0.6767, "step": 14577 }, { "epoch": 0.7492034124781581, "grad_norm": 1.0920815467834473, "learning_rate": 1.5607735528949809e-06, "loss": 0.6964, "step": 14578 }, { "epoch": 0.7492548052215027, "grad_norm": 1.144430160522461, "learning_rate": 1.5601695028291803e-06, "loss": 0.7364, "step": 14579 }, { "epoch": 0.7493061979648473, "grad_norm": 1.1286284923553467, "learning_rate": 1.5595655480668892e-06, "loss": 0.769, "step": 14580 }, { "epoch": 0.749357590708192, "grad_norm": 1.0696187019348145, "learning_rate": 1.5589616886248392e-06, "loss": 0.6849, "step": 14581 }, { "epoch": 0.7494089834515366, "grad_norm": 1.0763758420944214, "learning_rate": 1.558357924519759e-06, "loss": 0.736, "step": 14582 }, { "epoch": 0.7494603761948813, "grad_norm": 1.0998051166534424, "learning_rate": 1.5577542557683795e-06, "loss": 0.766, "step": 14583 }, { "epoch": 0.7495117689382259, "grad_norm": 1.0414323806762695, "learning_rate": 1.5571506823874222e-06, "loss": 0.7192, "step": 14584 }, { "epoch": 0.7495631616815706, "grad_norm": 0.7306990623474121, "learning_rate": 1.5565472043936153e-06, "loss": 0.648, "step": 14585 }, { "epoch": 0.7496145544249152, "grad_norm": 1.2100245952606201, "learning_rate": 1.5559438218036715e-06, "loss": 0.703, "step": 14586 }, { "epoch": 0.7496659471682598, "grad_norm": 1.096915602684021, "learning_rate": 1.5553405346343136e-06, "loss": 0.7429, "step": 14587 }, { "epoch": 0.7497173399116045, "grad_norm": 1.0621086359024048, "learning_rate": 1.5547373429022538e-06, "loss": 0.7181, "step": 14588 }, { "epoch": 0.7497687326549491, "grad_norm": 1.1159355640411377, "learning_rate": 1.5541342466242048e-06, "loss": 0.7111, "step": 14589 }, { "epoch": 0.7498201253982938, "grad_norm": 1.1127530336380005, "learning_rate": 1.553531245816874e-06, "loss": 0.716, "step": 14590 }, { "epoch": 0.7498715181416384, "grad_norm": 1.049605369567871, "learning_rate": 1.5529283404969708e-06, "loss": 0.7121, "step": 14591 }, { "epoch": 0.7499229108849831, "grad_norm": 1.033848762512207, "learning_rate": 1.5523255306811985e-06, "loss": 0.6896, "step": 14592 }, { "epoch": 0.7499743036283277, "grad_norm": 1.0976485013961792, "learning_rate": 1.5517228163862564e-06, "loss": 0.7484, "step": 14593 }, { "epoch": 0.7500256963716723, "grad_norm": 0.80330491065979, "learning_rate": 1.5511201976288482e-06, "loss": 0.654, "step": 14594 }, { "epoch": 0.7500770891150169, "grad_norm": 1.0927814245224, "learning_rate": 1.5505176744256635e-06, "loss": 0.7452, "step": 14595 }, { "epoch": 0.7501284818583616, "grad_norm": 1.0446815490722656, "learning_rate": 1.5499152467933997e-06, "loss": 0.6778, "step": 14596 }, { "epoch": 0.7501798746017062, "grad_norm": 1.1000257730484009, "learning_rate": 1.5493129147487472e-06, "loss": 0.7208, "step": 14597 }, { "epoch": 0.7502312673450509, "grad_norm": 1.0486077070236206, "learning_rate": 1.5487106783083938e-06, "loss": 0.7552, "step": 14598 }, { "epoch": 0.7502826600883955, "grad_norm": 1.0124183893203735, "learning_rate": 1.548108537489023e-06, "loss": 0.7464, "step": 14599 }, { "epoch": 0.7503340528317402, "grad_norm": 1.1284147500991821, "learning_rate": 1.5475064923073218e-06, "loss": 0.7104, "step": 14600 }, { "epoch": 0.7503854455750848, "grad_norm": 1.072666049003601, "learning_rate": 1.5469045427799673e-06, "loss": 0.7136, "step": 14601 }, { "epoch": 0.7504368383184294, "grad_norm": 1.0590327978134155, "learning_rate": 1.5463026889236366e-06, "loss": 0.6623, "step": 14602 }, { "epoch": 0.7504882310617741, "grad_norm": 0.6954993009567261, "learning_rate": 1.5457009307550075e-06, "loss": 0.6194, "step": 14603 }, { "epoch": 0.7505396238051187, "grad_norm": 1.0340416431427002, "learning_rate": 1.5450992682907505e-06, "loss": 0.6904, "step": 14604 }, { "epoch": 0.7505910165484634, "grad_norm": 0.7251468896865845, "learning_rate": 1.5444977015475348e-06, "loss": 0.6404, "step": 14605 }, { "epoch": 0.750642409291808, "grad_norm": 0.8599929809570312, "learning_rate": 1.5438962305420268e-06, "loss": 0.649, "step": 14606 }, { "epoch": 0.7506938020351527, "grad_norm": 1.076378583908081, "learning_rate": 1.543294855290895e-06, "loss": 0.6665, "step": 14607 }, { "epoch": 0.7507451947784973, "grad_norm": 1.1116105318069458, "learning_rate": 1.542693575810794e-06, "loss": 0.6896, "step": 14608 }, { "epoch": 0.7507965875218419, "grad_norm": 1.1071462631225586, "learning_rate": 1.5420923921183883e-06, "loss": 0.716, "step": 14609 }, { "epoch": 0.7508479802651865, "grad_norm": 1.0668339729309082, "learning_rate": 1.5414913042303326e-06, "loss": 0.6528, "step": 14610 }, { "epoch": 0.7508993730085312, "grad_norm": 1.0244096517562866, "learning_rate": 1.540890312163279e-06, "loss": 0.6694, "step": 14611 }, { "epoch": 0.7509507657518758, "grad_norm": 1.0975127220153809, "learning_rate": 1.5402894159338815e-06, "loss": 0.6952, "step": 14612 }, { "epoch": 0.7510021584952205, "grad_norm": 1.1539146900177002, "learning_rate": 1.5396886155587865e-06, "loss": 0.6837, "step": 14613 }, { "epoch": 0.7510535512385651, "grad_norm": 1.0508637428283691, "learning_rate": 1.539087911054641e-06, "loss": 0.7746, "step": 14614 }, { "epoch": 0.7511049439819097, "grad_norm": 0.8155038952827454, "learning_rate": 1.5384873024380847e-06, "loss": 0.603, "step": 14615 }, { "epoch": 0.7511563367252544, "grad_norm": 1.1555852890014648, "learning_rate": 1.5378867897257621e-06, "loss": 0.7162, "step": 14616 }, { "epoch": 0.751207729468599, "grad_norm": 0.7882441282272339, "learning_rate": 1.5372863729343095e-06, "loss": 0.6445, "step": 14617 }, { "epoch": 0.7512591222119437, "grad_norm": 1.050296664237976, "learning_rate": 1.5366860520803617e-06, "loss": 0.6924, "step": 14618 }, { "epoch": 0.7513105149552883, "grad_norm": 1.0270732641220093, "learning_rate": 1.5360858271805495e-06, "loss": 0.7052, "step": 14619 }, { "epoch": 0.751361907698633, "grad_norm": 1.0977312326431274, "learning_rate": 1.5354856982515075e-06, "loss": 0.6419, "step": 14620 }, { "epoch": 0.7514133004419776, "grad_norm": 1.0647611618041992, "learning_rate": 1.5348856653098564e-06, "loss": 0.6847, "step": 14621 }, { "epoch": 0.7514646931853223, "grad_norm": 0.7457675933837891, "learning_rate": 1.5342857283722252e-06, "loss": 0.6512, "step": 14622 }, { "epoch": 0.7515160859286669, "grad_norm": 1.1003379821777344, "learning_rate": 1.5336858874552347e-06, "loss": 0.702, "step": 14623 }, { "epoch": 0.7515674786720115, "grad_norm": 1.0859150886535645, "learning_rate": 1.5330861425755016e-06, "loss": 0.6916, "step": 14624 }, { "epoch": 0.7516188714153561, "grad_norm": 1.0299313068389893, "learning_rate": 1.5324864937496464e-06, "loss": 0.6725, "step": 14625 }, { "epoch": 0.7516702641587008, "grad_norm": 1.1331722736358643, "learning_rate": 1.5318869409942805e-06, "loss": 0.6525, "step": 14626 }, { "epoch": 0.7517216569020454, "grad_norm": 1.0439374446868896, "learning_rate": 1.5312874843260155e-06, "loss": 0.6756, "step": 14627 }, { "epoch": 0.75177304964539, "grad_norm": 1.1752055883407593, "learning_rate": 1.5306881237614585e-06, "loss": 0.7062, "step": 14628 }, { "epoch": 0.7518244423887347, "grad_norm": 1.2122249603271484, "learning_rate": 1.5300888593172197e-06, "loss": 0.7152, "step": 14629 }, { "epoch": 0.7518758351320793, "grad_norm": 0.7624787092208862, "learning_rate": 1.529489691009896e-06, "loss": 0.6764, "step": 14630 }, { "epoch": 0.751927227875424, "grad_norm": 1.1658755540847778, "learning_rate": 1.5288906188560921e-06, "loss": 0.6781, "step": 14631 }, { "epoch": 0.7519786206187686, "grad_norm": 1.1925791501998901, "learning_rate": 1.5282916428724037e-06, "loss": 0.7448, "step": 14632 }, { "epoch": 0.7520300133621133, "grad_norm": 1.104042649269104, "learning_rate": 1.5276927630754284e-06, "loss": 0.7336, "step": 14633 }, { "epoch": 0.7520814061054579, "grad_norm": 1.0433967113494873, "learning_rate": 1.5270939794817568e-06, "loss": 0.6799, "step": 14634 }, { "epoch": 0.7521327988488026, "grad_norm": 0.7943648099899292, "learning_rate": 1.5264952921079785e-06, "loss": 0.6486, "step": 14635 }, { "epoch": 0.7521841915921472, "grad_norm": 6.587066173553467, "learning_rate": 1.5258967009706838e-06, "loss": 0.8681, "step": 14636 }, { "epoch": 0.7522355843354919, "grad_norm": 1.0755301713943481, "learning_rate": 1.5252982060864519e-06, "loss": 0.7224, "step": 14637 }, { "epoch": 0.7522869770788365, "grad_norm": 1.0901204347610474, "learning_rate": 1.524699807471869e-06, "loss": 0.6499, "step": 14638 }, { "epoch": 0.7523383698221812, "grad_norm": 1.1285970211029053, "learning_rate": 1.5241015051435126e-06, "loss": 0.6964, "step": 14639 }, { "epoch": 0.7523897625655257, "grad_norm": 1.020275354385376, "learning_rate": 1.5235032991179594e-06, "loss": 0.6933, "step": 14640 }, { "epoch": 0.7524411553088703, "grad_norm": 0.7390571236610413, "learning_rate": 1.5229051894117814e-06, "loss": 0.6698, "step": 14641 }, { "epoch": 0.752492548052215, "grad_norm": 1.0690643787384033, "learning_rate": 1.5223071760415531e-06, "loss": 0.6811, "step": 14642 }, { "epoch": 0.7525439407955596, "grad_norm": 1.2547463178634644, "learning_rate": 1.5217092590238414e-06, "loss": 0.6922, "step": 14643 }, { "epoch": 0.7525953335389043, "grad_norm": 1.1222835779190063, "learning_rate": 1.5211114383752107e-06, "loss": 0.7404, "step": 14644 }, { "epoch": 0.7526467262822489, "grad_norm": 1.0229933261871338, "learning_rate": 1.5205137141122283e-06, "loss": 0.6338, "step": 14645 }, { "epoch": 0.7526981190255936, "grad_norm": 1.1105070114135742, "learning_rate": 1.5199160862514489e-06, "loss": 0.7126, "step": 14646 }, { "epoch": 0.7527495117689382, "grad_norm": 0.6789641380310059, "learning_rate": 1.5193185548094352e-06, "loss": 0.6643, "step": 14647 }, { "epoch": 0.7528009045122829, "grad_norm": 1.119982123374939, "learning_rate": 1.5187211198027384e-06, "loss": 0.714, "step": 14648 }, { "epoch": 0.7528522972556275, "grad_norm": 1.0780493021011353, "learning_rate": 1.5181237812479167e-06, "loss": 0.7298, "step": 14649 }, { "epoch": 0.7529036899989722, "grad_norm": 1.0204311609268188, "learning_rate": 1.5175265391615124e-06, "loss": 0.7045, "step": 14650 }, { "epoch": 0.7529550827423168, "grad_norm": 1.0576122999191284, "learning_rate": 1.5169293935600781e-06, "loss": 0.7343, "step": 14651 }, { "epoch": 0.7530064754856615, "grad_norm": 1.0363264083862305, "learning_rate": 1.5163323444601564e-06, "loss": 0.6365, "step": 14652 }, { "epoch": 0.7530578682290061, "grad_norm": 0.7709307074546814, "learning_rate": 1.515735391878288e-06, "loss": 0.6544, "step": 14653 }, { "epoch": 0.7531092609723508, "grad_norm": 1.1073668003082275, "learning_rate": 1.5151385358310145e-06, "loss": 0.7518, "step": 14654 }, { "epoch": 0.7531606537156953, "grad_norm": 1.0389786958694458, "learning_rate": 1.5145417763348707e-06, "loss": 0.7049, "step": 14655 }, { "epoch": 0.7532120464590399, "grad_norm": 1.0750513076782227, "learning_rate": 1.513945113406391e-06, "loss": 0.6797, "step": 14656 }, { "epoch": 0.7532634392023846, "grad_norm": 1.1291142702102661, "learning_rate": 1.5133485470621045e-06, "loss": 0.7425, "step": 14657 }, { "epoch": 0.7533148319457292, "grad_norm": 1.0670005083084106, "learning_rate": 1.5127520773185444e-06, "loss": 0.6299, "step": 14658 }, { "epoch": 0.7533662246890739, "grad_norm": 0.7200586199760437, "learning_rate": 1.5121557041922303e-06, "loss": 0.6849, "step": 14659 }, { "epoch": 0.7534176174324185, "grad_norm": 1.0573009252548218, "learning_rate": 1.5115594276996892e-06, "loss": 0.7155, "step": 14660 }, { "epoch": 0.7534690101757632, "grad_norm": 1.1047613620758057, "learning_rate": 1.510963247857441e-06, "loss": 0.7314, "step": 14661 }, { "epoch": 0.7535204029191078, "grad_norm": 1.0709912776947021, "learning_rate": 1.5103671646820005e-06, "loss": 0.6699, "step": 14662 }, { "epoch": 0.7535717956624525, "grad_norm": 1.3862683773040771, "learning_rate": 1.5097711781898872e-06, "loss": 0.6999, "step": 14663 }, { "epoch": 0.7536231884057971, "grad_norm": 1.0711385011672974, "learning_rate": 1.5091752883976108e-06, "loss": 0.6777, "step": 14664 }, { "epoch": 0.7536745811491418, "grad_norm": 0.711447536945343, "learning_rate": 1.5085794953216814e-06, "loss": 0.6691, "step": 14665 }, { "epoch": 0.7537259738924864, "grad_norm": 1.0052300691604614, "learning_rate": 1.507983798978605e-06, "loss": 0.6469, "step": 14666 }, { "epoch": 0.7537773666358311, "grad_norm": 1.0860706567764282, "learning_rate": 1.5073881993848878e-06, "loss": 0.7218, "step": 14667 }, { "epoch": 0.7538287593791757, "grad_norm": 1.0909597873687744, "learning_rate": 1.5067926965570306e-06, "loss": 0.6937, "step": 14668 }, { "epoch": 0.7538801521225204, "grad_norm": 1.0869536399841309, "learning_rate": 1.5061972905115325e-06, "loss": 0.6805, "step": 14669 }, { "epoch": 0.7539315448658649, "grad_norm": 1.1279218196868896, "learning_rate": 1.505601981264887e-06, "loss": 0.6851, "step": 14670 }, { "epoch": 0.7539829376092095, "grad_norm": 0.9909319877624512, "learning_rate": 1.505006768833594e-06, "loss": 0.7182, "step": 14671 }, { "epoch": 0.7540343303525542, "grad_norm": 1.1667131185531616, "learning_rate": 1.5044116532341368e-06, "loss": 0.7457, "step": 14672 }, { "epoch": 0.7540857230958988, "grad_norm": 0.986508309841156, "learning_rate": 1.503816634483009e-06, "loss": 0.6818, "step": 14673 }, { "epoch": 0.7541371158392435, "grad_norm": 1.0790176391601562, "learning_rate": 1.5032217125966941e-06, "loss": 0.6703, "step": 14674 }, { "epoch": 0.7541885085825881, "grad_norm": 0.6582726240158081, "learning_rate": 1.5026268875916744e-06, "loss": 0.6245, "step": 14675 }, { "epoch": 0.7542399013259328, "grad_norm": 1.1031982898712158, "learning_rate": 1.5020321594844318e-06, "loss": 0.7204, "step": 14676 }, { "epoch": 0.7542912940692774, "grad_norm": 1.0822887420654297, "learning_rate": 1.5014375282914428e-06, "loss": 0.7385, "step": 14677 }, { "epoch": 0.7543426868126221, "grad_norm": 0.7739769816398621, "learning_rate": 1.5008429940291824e-06, "loss": 0.6487, "step": 14678 }, { "epoch": 0.7543940795559667, "grad_norm": 1.0661025047302246, "learning_rate": 1.5002485567141206e-06, "loss": 0.6887, "step": 14679 }, { "epoch": 0.7544454722993114, "grad_norm": 1.2991708517074585, "learning_rate": 1.4996542163627304e-06, "loss": 0.7283, "step": 14680 }, { "epoch": 0.754496865042656, "grad_norm": 1.0514014959335327, "learning_rate": 1.4990599729914767e-06, "loss": 0.627, "step": 14681 }, { "epoch": 0.7545482577860007, "grad_norm": 1.0946869850158691, "learning_rate": 1.4984658266168238e-06, "loss": 0.6776, "step": 14682 }, { "epoch": 0.7545996505293453, "grad_norm": 1.0848274230957031, "learning_rate": 1.4978717772552315e-06, "loss": 0.6517, "step": 14683 }, { "epoch": 0.75465104327269, "grad_norm": 1.080322265625, "learning_rate": 1.4972778249231612e-06, "loss": 0.6928, "step": 14684 }, { "epoch": 0.7547024360160345, "grad_norm": 1.077034831047058, "learning_rate": 1.4966839696370672e-06, "loss": 0.6815, "step": 14685 }, { "epoch": 0.7547538287593791, "grad_norm": 0.8386796712875366, "learning_rate": 1.4960902114134018e-06, "loss": 0.668, "step": 14686 }, { "epoch": 0.7548052215027238, "grad_norm": 0.7833622694015503, "learning_rate": 1.4954965502686197e-06, "loss": 0.6679, "step": 14687 }, { "epoch": 0.7548566142460684, "grad_norm": 1.0739221572875977, "learning_rate": 1.4949029862191633e-06, "loss": 0.6743, "step": 14688 }, { "epoch": 0.7549080069894131, "grad_norm": 1.1791303157806396, "learning_rate": 1.4943095192814817e-06, "loss": 0.7518, "step": 14689 }, { "epoch": 0.7549593997327577, "grad_norm": 0.7052087187767029, "learning_rate": 1.4937161494720165e-06, "loss": 0.6566, "step": 14690 }, { "epoch": 0.7550107924761024, "grad_norm": 1.0414574146270752, "learning_rate": 1.4931228768072064e-06, "loss": 0.7458, "step": 14691 }, { "epoch": 0.755062185219447, "grad_norm": 1.0964345932006836, "learning_rate": 1.4925297013034885e-06, "loss": 0.697, "step": 14692 }, { "epoch": 0.7551135779627917, "grad_norm": 1.0862501859664917, "learning_rate": 1.4919366229772998e-06, "loss": 0.7084, "step": 14693 }, { "epoch": 0.7551649707061363, "grad_norm": 0.8173523545265198, "learning_rate": 1.49134364184507e-06, "loss": 0.6284, "step": 14694 }, { "epoch": 0.755216363449481, "grad_norm": 1.1427497863769531, "learning_rate": 1.4907507579232271e-06, "loss": 0.717, "step": 14695 }, { "epoch": 0.7552677561928256, "grad_norm": 1.0648058652877808, "learning_rate": 1.4901579712282005e-06, "loss": 0.6521, "step": 14696 }, { "epoch": 0.7553191489361702, "grad_norm": 1.0985503196716309, "learning_rate": 1.4895652817764123e-06, "loss": 0.7094, "step": 14697 }, { "epoch": 0.7553705416795149, "grad_norm": 0.7156886458396912, "learning_rate": 1.488972689584283e-06, "loss": 0.6671, "step": 14698 }, { "epoch": 0.7554219344228595, "grad_norm": 0.6683071255683899, "learning_rate": 1.4883801946682302e-06, "loss": 0.619, "step": 14699 }, { "epoch": 0.7554733271662041, "grad_norm": 1.0860047340393066, "learning_rate": 1.4877877970446736e-06, "loss": 0.7145, "step": 14700 }, { "epoch": 0.7555247199095487, "grad_norm": 1.1880815029144287, "learning_rate": 1.4871954967300195e-06, "loss": 0.7069, "step": 14701 }, { "epoch": 0.7555761126528934, "grad_norm": 1.0995229482650757, "learning_rate": 1.4866032937406838e-06, "loss": 0.7391, "step": 14702 }, { "epoch": 0.755627505396238, "grad_norm": 0.9969474673271179, "learning_rate": 1.4860111880930717e-06, "loss": 0.6347, "step": 14703 }, { "epoch": 0.7556788981395827, "grad_norm": 1.1377861499786377, "learning_rate": 1.4854191798035872e-06, "loss": 0.7374, "step": 14704 }, { "epoch": 0.7557302908829273, "grad_norm": 1.0894659757614136, "learning_rate": 1.4848272688886345e-06, "loss": 0.678, "step": 14705 }, { "epoch": 0.755781683626272, "grad_norm": 0.7088754773139954, "learning_rate": 1.4842354553646116e-06, "loss": 0.6116, "step": 14706 }, { "epoch": 0.7558330763696166, "grad_norm": 0.8325372934341431, "learning_rate": 1.4836437392479158e-06, "loss": 0.6579, "step": 14707 }, { "epoch": 0.7558844691129613, "grad_norm": 1.1061888933181763, "learning_rate": 1.4830521205549391e-06, "loss": 0.7424, "step": 14708 }, { "epoch": 0.7559358618563059, "grad_norm": 1.202398419380188, "learning_rate": 1.4824605993020768e-06, "loss": 0.7193, "step": 14709 }, { "epoch": 0.7559872545996505, "grad_norm": 0.6812346577644348, "learning_rate": 1.4818691755057147e-06, "loss": 0.6518, "step": 14710 }, { "epoch": 0.7560386473429952, "grad_norm": 1.055953860282898, "learning_rate": 1.4812778491822395e-06, "loss": 0.6658, "step": 14711 }, { "epoch": 0.7560900400863398, "grad_norm": 1.0800637006759644, "learning_rate": 1.4806866203480325e-06, "loss": 0.6986, "step": 14712 }, { "epoch": 0.7561414328296845, "grad_norm": 1.1118544340133667, "learning_rate": 1.4800954890194774e-06, "loss": 0.6833, "step": 14713 }, { "epoch": 0.7561928255730291, "grad_norm": 1.1362606287002563, "learning_rate": 1.4795044552129506e-06, "loss": 0.7288, "step": 14714 }, { "epoch": 0.7562442183163737, "grad_norm": 1.050248146057129, "learning_rate": 1.4789135189448272e-06, "loss": 0.6767, "step": 14715 }, { "epoch": 0.7562956110597183, "grad_norm": 1.141561508178711, "learning_rate": 1.4783226802314793e-06, "loss": 0.698, "step": 14716 }, { "epoch": 0.756347003803063, "grad_norm": 1.0791537761688232, "learning_rate": 1.4777319390892753e-06, "loss": 0.6131, "step": 14717 }, { "epoch": 0.7563983965464076, "grad_norm": 1.104942798614502, "learning_rate": 1.4771412955345854e-06, "loss": 0.6856, "step": 14718 }, { "epoch": 0.7564497892897523, "grad_norm": 0.9015743732452393, "learning_rate": 1.4765507495837723e-06, "loss": 0.6687, "step": 14719 }, { "epoch": 0.7565011820330969, "grad_norm": 0.8958661556243896, "learning_rate": 1.4759603012531977e-06, "loss": 0.6087, "step": 14720 }, { "epoch": 0.7565525747764416, "grad_norm": 1.1765196323394775, "learning_rate": 1.4753699505592183e-06, "loss": 0.682, "step": 14721 }, { "epoch": 0.7566039675197862, "grad_norm": 1.1300238370895386, "learning_rate": 1.4747796975181955e-06, "loss": 0.7107, "step": 14722 }, { "epoch": 0.7566553602631308, "grad_norm": 1.0682175159454346, "learning_rate": 1.4741895421464763e-06, "loss": 0.7295, "step": 14723 }, { "epoch": 0.7567067530064755, "grad_norm": 0.7258800268173218, "learning_rate": 1.4735994844604168e-06, "loss": 0.648, "step": 14724 }, { "epoch": 0.7567581457498201, "grad_norm": 0.7390783429145813, "learning_rate": 1.473009524476361e-06, "loss": 0.6368, "step": 14725 }, { "epoch": 0.7568095384931648, "grad_norm": 11.998215675354004, "learning_rate": 1.472419662210658e-06, "loss": 0.6954, "step": 14726 }, { "epoch": 0.7568609312365094, "grad_norm": 1.0905632972717285, "learning_rate": 1.4718298976796486e-06, "loss": 0.6824, "step": 14727 }, { "epoch": 0.7569123239798541, "grad_norm": 1.0100054740905762, "learning_rate": 1.471240230899671e-06, "loss": 0.6895, "step": 14728 }, { "epoch": 0.7569637167231987, "grad_norm": 1.1355903148651123, "learning_rate": 1.470650661887068e-06, "loss": 0.7073, "step": 14729 }, { "epoch": 0.7570151094665434, "grad_norm": 1.0258853435516357, "learning_rate": 1.4700611906581668e-06, "loss": 0.6989, "step": 14730 }, { "epoch": 0.7570665022098879, "grad_norm": 1.1530462503433228, "learning_rate": 1.4694718172293043e-06, "loss": 0.7427, "step": 14731 }, { "epoch": 0.7571178949532326, "grad_norm": 1.10906183719635, "learning_rate": 1.4688825416168085e-06, "loss": 0.7482, "step": 14732 }, { "epoch": 0.7571692876965772, "grad_norm": 1.081913709640503, "learning_rate": 1.468293363837005e-06, "loss": 0.6975, "step": 14733 }, { "epoch": 0.7572206804399219, "grad_norm": 1.0596214532852173, "learning_rate": 1.4677042839062167e-06, "loss": 0.6836, "step": 14734 }, { "epoch": 0.7572720731832665, "grad_norm": 1.1221777200698853, "learning_rate": 1.4671153018407675e-06, "loss": 0.7091, "step": 14735 }, { "epoch": 0.7573234659266112, "grad_norm": 1.0711395740509033, "learning_rate": 1.4665264176569732e-06, "loss": 0.71, "step": 14736 }, { "epoch": 0.7573748586699558, "grad_norm": 1.0611110925674438, "learning_rate": 1.4659376313711493e-06, "loss": 0.7518, "step": 14737 }, { "epoch": 0.7574262514133004, "grad_norm": 0.8284400701522827, "learning_rate": 1.465348942999612e-06, "loss": 0.621, "step": 14738 }, { "epoch": 0.7574776441566451, "grad_norm": 0.8643609285354614, "learning_rate": 1.4647603525586662e-06, "loss": 0.6798, "step": 14739 }, { "epoch": 0.7575290368999897, "grad_norm": 1.1170579195022583, "learning_rate": 1.4641718600646232e-06, "loss": 0.652, "step": 14740 }, { "epoch": 0.7575804296433344, "grad_norm": 1.1139161586761475, "learning_rate": 1.463583465533785e-06, "loss": 0.7205, "step": 14741 }, { "epoch": 0.757631822386679, "grad_norm": 0.8386148810386658, "learning_rate": 1.462995168982458e-06, "loss": 0.6349, "step": 14742 }, { "epoch": 0.7576832151300237, "grad_norm": 1.07239830493927, "learning_rate": 1.4624069704269356e-06, "loss": 0.7042, "step": 14743 }, { "epoch": 0.7577346078733683, "grad_norm": 1.2405868768692017, "learning_rate": 1.4618188698835183e-06, "loss": 0.7029, "step": 14744 }, { "epoch": 0.757786000616713, "grad_norm": 1.2066837549209595, "learning_rate": 1.4612308673684995e-06, "loss": 0.7578, "step": 14745 }, { "epoch": 0.7578373933600575, "grad_norm": 1.0945523977279663, "learning_rate": 1.4606429628981684e-06, "loss": 0.7309, "step": 14746 }, { "epoch": 0.7578887861034022, "grad_norm": 1.1321070194244385, "learning_rate": 1.4600551564888155e-06, "loss": 0.6889, "step": 14747 }, { "epoch": 0.7579401788467468, "grad_norm": 0.7038831114768982, "learning_rate": 1.4594674481567261e-06, "loss": 0.6646, "step": 14748 }, { "epoch": 0.7579915715900915, "grad_norm": 0.767866313457489, "learning_rate": 1.4588798379181828e-06, "loss": 0.6535, "step": 14749 }, { "epoch": 0.7580429643334361, "grad_norm": 1.0805637836456299, "learning_rate": 1.4582923257894644e-06, "loss": 0.6994, "step": 14750 }, { "epoch": 0.7580943570767807, "grad_norm": 1.0691684484481812, "learning_rate": 1.4577049117868531e-06, "loss": 0.6924, "step": 14751 }, { "epoch": 0.7581457498201254, "grad_norm": 1.1572397947311401, "learning_rate": 1.457117595926617e-06, "loss": 0.7133, "step": 14752 }, { "epoch": 0.75819714256347, "grad_norm": 1.1316540241241455, "learning_rate": 1.4565303782250334e-06, "loss": 0.6821, "step": 14753 }, { "epoch": 0.7582485353068147, "grad_norm": 1.01896333694458, "learning_rate": 1.45594325869837e-06, "loss": 0.6375, "step": 14754 }, { "epoch": 0.7582999280501593, "grad_norm": 1.1571788787841797, "learning_rate": 1.455356237362892e-06, "loss": 0.7122, "step": 14755 }, { "epoch": 0.758351320793504, "grad_norm": 0.7224657535552979, "learning_rate": 1.4547693142348662e-06, "loss": 0.6561, "step": 14756 }, { "epoch": 0.7584027135368486, "grad_norm": 0.7299119830131531, "learning_rate": 1.454182489330551e-06, "loss": 0.6316, "step": 14757 }, { "epoch": 0.7584541062801933, "grad_norm": 1.1546157598495483, "learning_rate": 1.4535957626662094e-06, "loss": 0.7291, "step": 14758 }, { "epoch": 0.7585054990235379, "grad_norm": 1.2008512020111084, "learning_rate": 1.453009134258091e-06, "loss": 0.7141, "step": 14759 }, { "epoch": 0.7585568917668826, "grad_norm": 1.039949893951416, "learning_rate": 1.4524226041224531e-06, "loss": 0.7258, "step": 14760 }, { "epoch": 0.7586082845102271, "grad_norm": 1.1076335906982422, "learning_rate": 1.4518361722755453e-06, "loss": 0.7173, "step": 14761 }, { "epoch": 0.7586596772535718, "grad_norm": 1.1180522441864014, "learning_rate": 1.4512498387336143e-06, "loss": 0.7612, "step": 14762 }, { "epoch": 0.7587110699969164, "grad_norm": 1.1019178628921509, "learning_rate": 1.450663603512904e-06, "loss": 0.6551, "step": 14763 }, { "epoch": 0.758762462740261, "grad_norm": 1.144449234008789, "learning_rate": 1.4500774666296603e-06, "loss": 0.7015, "step": 14764 }, { "epoch": 0.7588138554836057, "grad_norm": 0.9088379144668579, "learning_rate": 1.4494914281001194e-06, "loss": 0.6546, "step": 14765 }, { "epoch": 0.7588652482269503, "grad_norm": 1.0962613821029663, "learning_rate": 1.448905487940519e-06, "loss": 0.6962, "step": 14766 }, { "epoch": 0.758916640970295, "grad_norm": 1.1001778841018677, "learning_rate": 1.4483196461670934e-06, "loss": 0.7164, "step": 14767 }, { "epoch": 0.7589680337136396, "grad_norm": 1.1231653690338135, "learning_rate": 1.447733902796072e-06, "loss": 0.6976, "step": 14768 }, { "epoch": 0.7590194264569843, "grad_norm": 0.7405795454978943, "learning_rate": 1.4471482578436862e-06, "loss": 0.6452, "step": 14769 }, { "epoch": 0.7590708192003289, "grad_norm": 1.0876694917678833, "learning_rate": 1.4465627113261604e-06, "loss": 0.7455, "step": 14770 }, { "epoch": 0.7591222119436736, "grad_norm": 1.0004351139068604, "learning_rate": 1.4459772632597179e-06, "loss": 0.632, "step": 14771 }, { "epoch": 0.7591736046870182, "grad_norm": 1.0393434762954712, "learning_rate": 1.445391913660577e-06, "loss": 0.74, "step": 14772 }, { "epoch": 0.7592249974303629, "grad_norm": 0.7711547017097473, "learning_rate": 1.4448066625449587e-06, "loss": 0.6707, "step": 14773 }, { "epoch": 0.7592763901737075, "grad_norm": 0.8378944396972656, "learning_rate": 1.4442215099290768e-06, "loss": 0.6671, "step": 14774 }, { "epoch": 0.7593277829170522, "grad_norm": 1.117581844329834, "learning_rate": 1.4436364558291426e-06, "loss": 0.7867, "step": 14775 }, { "epoch": 0.7593791756603967, "grad_norm": 1.0259259939193726, "learning_rate": 1.4430515002613643e-06, "loss": 0.6509, "step": 14776 }, { "epoch": 0.7594305684037413, "grad_norm": 1.0407464504241943, "learning_rate": 1.442466643241952e-06, "loss": 0.6471, "step": 14777 }, { "epoch": 0.759481961147086, "grad_norm": 1.0910242795944214, "learning_rate": 1.4418818847871086e-06, "loss": 0.7256, "step": 14778 }, { "epoch": 0.7595333538904306, "grad_norm": 1.0766626596450806, "learning_rate": 1.4412972249130324e-06, "loss": 0.7338, "step": 14779 }, { "epoch": 0.7595847466337753, "grad_norm": 0.8886849284172058, "learning_rate": 1.4407126636359275e-06, "loss": 0.6092, "step": 14780 }, { "epoch": 0.7596361393771199, "grad_norm": 1.1051263809204102, "learning_rate": 1.440128200971983e-06, "loss": 0.6475, "step": 14781 }, { "epoch": 0.7596875321204646, "grad_norm": 1.1441853046417236, "learning_rate": 1.439543836937397e-06, "loss": 0.7735, "step": 14782 }, { "epoch": 0.7597389248638092, "grad_norm": 1.1015620231628418, "learning_rate": 1.4389595715483584e-06, "loss": 0.6712, "step": 14783 }, { "epoch": 0.7597903176071539, "grad_norm": 0.7938957810401917, "learning_rate": 1.4383754048210547e-06, "loss": 0.6771, "step": 14784 }, { "epoch": 0.7598417103504985, "grad_norm": 1.0900685787200928, "learning_rate": 1.4377913367716683e-06, "loss": 0.6953, "step": 14785 }, { "epoch": 0.7598931030938432, "grad_norm": 1.1259419918060303, "learning_rate": 1.4372073674163856e-06, "loss": 0.7018, "step": 14786 }, { "epoch": 0.7599444958371878, "grad_norm": 1.1968770027160645, "learning_rate": 1.4366234967713838e-06, "loss": 0.6813, "step": 14787 }, { "epoch": 0.7599958885805325, "grad_norm": 1.0884437561035156, "learning_rate": 1.4360397248528385e-06, "loss": 0.7358, "step": 14788 }, { "epoch": 0.7600472813238771, "grad_norm": 0.9581515192985535, "learning_rate": 1.435456051676926e-06, "loss": 0.634, "step": 14789 }, { "epoch": 0.7600986740672218, "grad_norm": 0.8046326041221619, "learning_rate": 1.4348724772598166e-06, "loss": 0.663, "step": 14790 }, { "epoch": 0.7601500668105663, "grad_norm": 1.0776023864746094, "learning_rate": 1.434289001617678e-06, "loss": 0.6815, "step": 14791 }, { "epoch": 0.7602014595539109, "grad_norm": 1.0970484018325806, "learning_rate": 1.4337056247666754e-06, "loss": 0.6592, "step": 14792 }, { "epoch": 0.7602528522972556, "grad_norm": 1.1023536920547485, "learning_rate": 1.4331223467229755e-06, "loss": 0.7536, "step": 14793 }, { "epoch": 0.7603042450406002, "grad_norm": 1.074668049812317, "learning_rate": 1.4325391675027328e-06, "loss": 0.6585, "step": 14794 }, { "epoch": 0.7603556377839449, "grad_norm": 1.072434425354004, "learning_rate": 1.431956087122109e-06, "loss": 0.7494, "step": 14795 }, { "epoch": 0.7604070305272895, "grad_norm": 0.6674770712852478, "learning_rate": 1.4313731055972575e-06, "loss": 0.6366, "step": 14796 }, { "epoch": 0.7604584232706342, "grad_norm": 1.145717740058899, "learning_rate": 1.4307902229443293e-06, "loss": 0.7026, "step": 14797 }, { "epoch": 0.7605098160139788, "grad_norm": 0.8038463592529297, "learning_rate": 1.4302074391794758e-06, "loss": 0.635, "step": 14798 }, { "epoch": 0.7605612087573235, "grad_norm": 1.1579630374908447, "learning_rate": 1.429624754318843e-06, "loss": 0.7373, "step": 14799 }, { "epoch": 0.7606126015006681, "grad_norm": 1.0624281167984009, "learning_rate": 1.4290421683785738e-06, "loss": 0.656, "step": 14800 }, { "epoch": 0.7606639942440128, "grad_norm": 1.0585752725601196, "learning_rate": 1.428459681374808e-06, "loss": 0.6767, "step": 14801 }, { "epoch": 0.7607153869873574, "grad_norm": 1.0929564237594604, "learning_rate": 1.4278772933236873e-06, "loss": 0.7143, "step": 14802 }, { "epoch": 0.7607667797307021, "grad_norm": 0.697571337223053, "learning_rate": 1.427295004241346e-06, "loss": 0.7019, "step": 14803 }, { "epoch": 0.7608181724740467, "grad_norm": 1.0117690563201904, "learning_rate": 1.4267128141439157e-06, "loss": 0.6817, "step": 14804 }, { "epoch": 0.7608695652173914, "grad_norm": 1.1021381616592407, "learning_rate": 1.4261307230475263e-06, "loss": 0.7084, "step": 14805 }, { "epoch": 0.7609209579607359, "grad_norm": 0.7960483431816101, "learning_rate": 1.425548730968307e-06, "loss": 0.6393, "step": 14806 }, { "epoch": 0.7609723507040805, "grad_norm": 1.080965280532837, "learning_rate": 1.4249668379223818e-06, "loss": 0.7124, "step": 14807 }, { "epoch": 0.7610237434474252, "grad_norm": 1.0404525995254517, "learning_rate": 1.4243850439258705e-06, "loss": 0.6919, "step": 14808 }, { "epoch": 0.7610751361907698, "grad_norm": 1.1059893369674683, "learning_rate": 1.4238033489948977e-06, "loss": 0.7502, "step": 14809 }, { "epoch": 0.7611265289341145, "grad_norm": 1.030486822128296, "learning_rate": 1.4232217531455722e-06, "loss": 0.7452, "step": 14810 }, { "epoch": 0.7611779216774591, "grad_norm": 1.0789388418197632, "learning_rate": 1.4226402563940133e-06, "loss": 0.698, "step": 14811 }, { "epoch": 0.7612293144208038, "grad_norm": 1.0779368877410889, "learning_rate": 1.42205885875633e-06, "loss": 0.6414, "step": 14812 }, { "epoch": 0.7612807071641484, "grad_norm": 1.033353567123413, "learning_rate": 1.4214775602486302e-06, "loss": 0.7108, "step": 14813 }, { "epoch": 0.7613320999074931, "grad_norm": 1.06680428981781, "learning_rate": 1.420896360887018e-06, "loss": 0.6727, "step": 14814 }, { "epoch": 0.7613834926508377, "grad_norm": 1.1153900623321533, "learning_rate": 1.4203152606875992e-06, "loss": 0.6829, "step": 14815 }, { "epoch": 0.7614348853941824, "grad_norm": 1.0962063074111938, "learning_rate": 1.419734259666472e-06, "loss": 0.6768, "step": 14816 }, { "epoch": 0.761486278137527, "grad_norm": 1.1300619840621948, "learning_rate": 1.4191533578397336e-06, "loss": 0.677, "step": 14817 }, { "epoch": 0.7615376708808717, "grad_norm": 0.736458420753479, "learning_rate": 1.4185725552234769e-06, "loss": 0.6705, "step": 14818 }, { "epoch": 0.7615890636242163, "grad_norm": 1.1062917709350586, "learning_rate": 1.4179918518337966e-06, "loss": 0.7441, "step": 14819 }, { "epoch": 0.761640456367561, "grad_norm": 1.133357048034668, "learning_rate": 1.4174112476867796e-06, "loss": 0.7031, "step": 14820 }, { "epoch": 0.7616918491109056, "grad_norm": 0.7239965796470642, "learning_rate": 1.4168307427985111e-06, "loss": 0.6595, "step": 14821 }, { "epoch": 0.7617432418542501, "grad_norm": 0.6841790080070496, "learning_rate": 1.4162503371850795e-06, "loss": 0.6471, "step": 14822 }, { "epoch": 0.7617946345975948, "grad_norm": 1.050122857093811, "learning_rate": 1.4156700308625582e-06, "loss": 0.6615, "step": 14823 }, { "epoch": 0.7618460273409394, "grad_norm": 0.7692776918411255, "learning_rate": 1.41508982384703e-06, "loss": 0.6293, "step": 14824 }, { "epoch": 0.7618974200842841, "grad_norm": 0.7168476581573486, "learning_rate": 1.4145097161545696e-06, "loss": 0.6624, "step": 14825 }, { "epoch": 0.7619488128276287, "grad_norm": 1.054405927658081, "learning_rate": 1.4139297078012476e-06, "loss": 0.6635, "step": 14826 }, { "epoch": 0.7620002055709734, "grad_norm": 1.1064034700393677, "learning_rate": 1.4133497988031337e-06, "loss": 0.7106, "step": 14827 }, { "epoch": 0.762051598314318, "grad_norm": 0.7077066898345947, "learning_rate": 1.4127699891762963e-06, "loss": 0.6495, "step": 14828 }, { "epoch": 0.7621029910576627, "grad_norm": 1.1734358072280884, "learning_rate": 1.4121902789367997e-06, "loss": 0.7292, "step": 14829 }, { "epoch": 0.7621543838010073, "grad_norm": 1.2431397438049316, "learning_rate": 1.4116106681007024e-06, "loss": 0.7022, "step": 14830 }, { "epoch": 0.762205776544352, "grad_norm": 0.696506917476654, "learning_rate": 1.4110311566840683e-06, "loss": 0.6283, "step": 14831 }, { "epoch": 0.7622571692876966, "grad_norm": 1.1775492429733276, "learning_rate": 1.4104517447029475e-06, "loss": 0.7581, "step": 14832 }, { "epoch": 0.7623085620310412, "grad_norm": 1.12771475315094, "learning_rate": 1.409872432173397e-06, "loss": 0.6704, "step": 14833 }, { "epoch": 0.7623599547743859, "grad_norm": 0.7990912795066833, "learning_rate": 1.4092932191114639e-06, "loss": 0.6794, "step": 14834 }, { "epoch": 0.7624113475177305, "grad_norm": 1.093575358390808, "learning_rate": 1.408714105533201e-06, "loss": 0.7362, "step": 14835 }, { "epoch": 0.7624627402610752, "grad_norm": 1.1284068822860718, "learning_rate": 1.4081350914546465e-06, "loss": 0.6783, "step": 14836 }, { "epoch": 0.7625141330044197, "grad_norm": 1.047597050666809, "learning_rate": 1.4075561768918477e-06, "loss": 0.6806, "step": 14837 }, { "epoch": 0.7625655257477644, "grad_norm": 1.091753602027893, "learning_rate": 1.4069773618608423e-06, "loss": 0.6769, "step": 14838 }, { "epoch": 0.762616918491109, "grad_norm": 1.1113297939300537, "learning_rate": 1.4063986463776646e-06, "loss": 0.6935, "step": 14839 }, { "epoch": 0.7626683112344537, "grad_norm": 1.0237194299697876, "learning_rate": 1.4058200304583524e-06, "loss": 0.6717, "step": 14840 }, { "epoch": 0.7627197039777983, "grad_norm": 1.0335760116577148, "learning_rate": 1.4052415141189346e-06, "loss": 0.6838, "step": 14841 }, { "epoch": 0.762771096721143, "grad_norm": 1.1562623977661133, "learning_rate": 1.4046630973754399e-06, "loss": 0.6877, "step": 14842 }, { "epoch": 0.7628224894644876, "grad_norm": 1.0531816482543945, "learning_rate": 1.4040847802438922e-06, "loss": 0.6834, "step": 14843 }, { "epoch": 0.7628738822078323, "grad_norm": 1.113742709159851, "learning_rate": 1.4035065627403187e-06, "loss": 0.7374, "step": 14844 }, { "epoch": 0.7629252749511769, "grad_norm": 0.7361916899681091, "learning_rate": 1.402928444880734e-06, "loss": 0.6501, "step": 14845 }, { "epoch": 0.7629766676945215, "grad_norm": 0.7651697993278503, "learning_rate": 1.4023504266811588e-06, "loss": 0.6502, "step": 14846 }, { "epoch": 0.7630280604378662, "grad_norm": 1.1182572841644287, "learning_rate": 1.4017725081576067e-06, "loss": 0.7058, "step": 14847 }, { "epoch": 0.7630794531812108, "grad_norm": 1.1113694906234741, "learning_rate": 1.4011946893260881e-06, "loss": 0.674, "step": 14848 }, { "epoch": 0.7631308459245555, "grad_norm": 0.7384142279624939, "learning_rate": 1.4006169702026146e-06, "loss": 0.6358, "step": 14849 }, { "epoch": 0.7631822386679001, "grad_norm": 1.1747992038726807, "learning_rate": 1.4000393508031896e-06, "loss": 0.7197, "step": 14850 }, { "epoch": 0.7632336314112448, "grad_norm": 0.6869306564331055, "learning_rate": 1.3994618311438214e-06, "loss": 0.6508, "step": 14851 }, { "epoch": 0.7632850241545893, "grad_norm": 1.0907849073410034, "learning_rate": 1.3988844112405037e-06, "loss": 0.6634, "step": 14852 }, { "epoch": 0.763336416897934, "grad_norm": 0.7894008755683899, "learning_rate": 1.39830709110924e-06, "loss": 0.6114, "step": 14853 }, { "epoch": 0.7633878096412786, "grad_norm": 0.693315327167511, "learning_rate": 1.397729870766023e-06, "loss": 0.6705, "step": 14854 }, { "epoch": 0.7634392023846233, "grad_norm": 1.155637502670288, "learning_rate": 1.397152750226846e-06, "loss": 0.6436, "step": 14855 }, { "epoch": 0.7634905951279679, "grad_norm": 0.7805944085121155, "learning_rate": 1.3965757295076966e-06, "loss": 0.631, "step": 14856 }, { "epoch": 0.7635419878713126, "grad_norm": 1.0873417854309082, "learning_rate": 1.3959988086245646e-06, "loss": 0.701, "step": 14857 }, { "epoch": 0.7635933806146572, "grad_norm": 1.0130430459976196, "learning_rate": 1.3954219875934332e-06, "loss": 0.7126, "step": 14858 }, { "epoch": 0.7636447733580018, "grad_norm": 1.1657843589782715, "learning_rate": 1.3948452664302814e-06, "loss": 0.6496, "step": 14859 }, { "epoch": 0.7636961661013465, "grad_norm": 1.0380887985229492, "learning_rate": 1.394268645151093e-06, "loss": 0.736, "step": 14860 }, { "epoch": 0.7637475588446911, "grad_norm": 1.1529345512390137, "learning_rate": 1.393692123771837e-06, "loss": 0.7752, "step": 14861 }, { "epoch": 0.7637989515880358, "grad_norm": 1.0513157844543457, "learning_rate": 1.3931157023084918e-06, "loss": 0.6373, "step": 14862 }, { "epoch": 0.7638503443313804, "grad_norm": 1.035420298576355, "learning_rate": 1.392539380777026e-06, "loss": 0.7055, "step": 14863 }, { "epoch": 0.7639017370747251, "grad_norm": 1.0435914993286133, "learning_rate": 1.3919631591934063e-06, "loss": 0.6596, "step": 14864 }, { "epoch": 0.7639531298180697, "grad_norm": 1.1054826974868774, "learning_rate": 1.3913870375735965e-06, "loss": 0.7074, "step": 14865 }, { "epoch": 0.7640045225614144, "grad_norm": 1.087292194366455, "learning_rate": 1.390811015933562e-06, "loss": 0.7279, "step": 14866 }, { "epoch": 0.7640559153047589, "grad_norm": 1.1077383756637573, "learning_rate": 1.3902350942892595e-06, "loss": 0.7102, "step": 14867 }, { "epoch": 0.7641073080481036, "grad_norm": 1.1028647422790527, "learning_rate": 1.389659272656646e-06, "loss": 0.7165, "step": 14868 }, { "epoch": 0.7641587007914482, "grad_norm": 1.0598477125167847, "learning_rate": 1.3890835510516738e-06, "loss": 0.6959, "step": 14869 }, { "epoch": 0.7642100935347929, "grad_norm": 1.1149882078170776, "learning_rate": 1.3885079294902965e-06, "loss": 0.6943, "step": 14870 }, { "epoch": 0.7642614862781375, "grad_norm": 1.100407361984253, "learning_rate": 1.387932407988461e-06, "loss": 0.7161, "step": 14871 }, { "epoch": 0.7643128790214821, "grad_norm": 1.3909032344818115, "learning_rate": 1.38735698656211e-06, "loss": 0.663, "step": 14872 }, { "epoch": 0.7643642717648268, "grad_norm": 0.7601723074913025, "learning_rate": 1.3867816652271921e-06, "loss": 0.6769, "step": 14873 }, { "epoch": 0.7644156645081714, "grad_norm": 1.1539779901504517, "learning_rate": 1.3862064439996403e-06, "loss": 0.746, "step": 14874 }, { "epoch": 0.7644670572515161, "grad_norm": 1.1712108850479126, "learning_rate": 1.385631322895396e-06, "loss": 0.7196, "step": 14875 }, { "epoch": 0.7645184499948607, "grad_norm": 0.7044581174850464, "learning_rate": 1.385056301930392e-06, "loss": 0.6592, "step": 14876 }, { "epoch": 0.7645698427382054, "grad_norm": 1.1520460844039917, "learning_rate": 1.3844813811205605e-06, "loss": 0.707, "step": 14877 }, { "epoch": 0.76462123548155, "grad_norm": 1.0719215869903564, "learning_rate": 1.3839065604818276e-06, "loss": 0.6557, "step": 14878 }, { "epoch": 0.7646726282248947, "grad_norm": 3.792450189590454, "learning_rate": 1.3833318400301227e-06, "loss": 0.7009, "step": 14879 }, { "epoch": 0.7647240209682393, "grad_norm": 0.7956342697143555, "learning_rate": 1.3827572197813672e-06, "loss": 0.6804, "step": 14880 }, { "epoch": 0.764775413711584, "grad_norm": 1.1279836893081665, "learning_rate": 1.3821826997514803e-06, "loss": 0.6941, "step": 14881 }, { "epoch": 0.7648268064549285, "grad_norm": 1.1273633241653442, "learning_rate": 1.3816082799563825e-06, "loss": 0.7042, "step": 14882 }, { "epoch": 0.7648781991982732, "grad_norm": 1.061460256576538, "learning_rate": 1.381033960411987e-06, "loss": 0.6705, "step": 14883 }, { "epoch": 0.7649295919416178, "grad_norm": 1.1251585483551025, "learning_rate": 1.3804597411342064e-06, "loss": 0.6871, "step": 14884 }, { "epoch": 0.7649809846849625, "grad_norm": 1.101520299911499, "learning_rate": 1.3798856221389473e-06, "loss": 0.7176, "step": 14885 }, { "epoch": 0.7650323774283071, "grad_norm": 0.98215651512146, "learning_rate": 1.379311603442121e-06, "loss": 0.6466, "step": 14886 }, { "epoch": 0.7650837701716517, "grad_norm": 0.984893262386322, "learning_rate": 1.3787376850596263e-06, "loss": 0.6967, "step": 14887 }, { "epoch": 0.7651351629149964, "grad_norm": 1.140602469444275, "learning_rate": 1.3781638670073672e-06, "loss": 0.7316, "step": 14888 }, { "epoch": 0.765186555658341, "grad_norm": 1.0875083208084106, "learning_rate": 1.3775901493012412e-06, "loss": 0.7019, "step": 14889 }, { "epoch": 0.7652379484016857, "grad_norm": 1.0610740184783936, "learning_rate": 1.3770165319571416e-06, "loss": 0.6589, "step": 14890 }, { "epoch": 0.7652893411450303, "grad_norm": 1.0957528352737427, "learning_rate": 1.3764430149909646e-06, "loss": 0.6619, "step": 14891 }, { "epoch": 0.765340733888375, "grad_norm": 1.1849086284637451, "learning_rate": 1.3758695984185976e-06, "loss": 0.6909, "step": 14892 }, { "epoch": 0.7653921266317196, "grad_norm": 1.0704739093780518, "learning_rate": 1.3752962822559285e-06, "loss": 0.6758, "step": 14893 }, { "epoch": 0.7654435193750643, "grad_norm": 1.0993798971176147, "learning_rate": 1.3747230665188394e-06, "loss": 0.734, "step": 14894 }, { "epoch": 0.7654949121184089, "grad_norm": 1.117469072341919, "learning_rate": 1.374149951223217e-06, "loss": 0.7104, "step": 14895 }, { "epoch": 0.7655463048617536, "grad_norm": 1.0320302248001099, "learning_rate": 1.3735769363849327e-06, "loss": 0.7013, "step": 14896 }, { "epoch": 0.7655976976050982, "grad_norm": 1.1136685609817505, "learning_rate": 1.3730040220198682e-06, "loss": 0.7212, "step": 14897 }, { "epoch": 0.7656490903484428, "grad_norm": 1.1024675369262695, "learning_rate": 1.3724312081438928e-06, "loss": 0.6983, "step": 14898 }, { "epoch": 0.7657004830917874, "grad_norm": 1.1504346132278442, "learning_rate": 1.3718584947728802e-06, "loss": 0.6847, "step": 14899 }, { "epoch": 0.765751875835132, "grad_norm": 1.0730969905853271, "learning_rate": 1.3712858819226959e-06, "loss": 0.6796, "step": 14900 }, { "epoch": 0.7658032685784767, "grad_norm": 1.0776852369308472, "learning_rate": 1.370713369609204e-06, "loss": 0.6921, "step": 14901 }, { "epoch": 0.7658546613218213, "grad_norm": 1.0063329935073853, "learning_rate": 1.3701409578482699e-06, "loss": 0.6096, "step": 14902 }, { "epoch": 0.765906054065166, "grad_norm": 1.2496848106384277, "learning_rate": 1.3695686466557479e-06, "loss": 0.7476, "step": 14903 }, { "epoch": 0.7659574468085106, "grad_norm": 1.0727111101150513, "learning_rate": 1.3689964360474977e-06, "loss": 0.6952, "step": 14904 }, { "epoch": 0.7660088395518553, "grad_norm": 1.101004958152771, "learning_rate": 1.3684243260393727e-06, "loss": 0.6648, "step": 14905 }, { "epoch": 0.7660602322951999, "grad_norm": 1.0858149528503418, "learning_rate": 1.3678523166472224e-06, "loss": 0.7236, "step": 14906 }, { "epoch": 0.7661116250385446, "grad_norm": 1.024114727973938, "learning_rate": 1.3672804078868939e-06, "loss": 0.6261, "step": 14907 }, { "epoch": 0.7661630177818892, "grad_norm": 1.0218740701675415, "learning_rate": 1.3667085997742357e-06, "loss": 0.6815, "step": 14908 }, { "epoch": 0.7662144105252339, "grad_norm": 1.0070140361785889, "learning_rate": 1.3661368923250884e-06, "loss": 0.6892, "step": 14909 }, { "epoch": 0.7662658032685785, "grad_norm": 1.0742318630218506, "learning_rate": 1.3655652855552903e-06, "loss": 0.7174, "step": 14910 }, { "epoch": 0.7663171960119232, "grad_norm": 1.1130491495132446, "learning_rate": 1.3649937794806828e-06, "loss": 0.709, "step": 14911 }, { "epoch": 0.7663685887552678, "grad_norm": 1.0463439226150513, "learning_rate": 1.3644223741170937e-06, "loss": 0.6848, "step": 14912 }, { "epoch": 0.7664199814986123, "grad_norm": 1.0667227506637573, "learning_rate": 1.3638510694803592e-06, "loss": 0.665, "step": 14913 }, { "epoch": 0.766471374241957, "grad_norm": 1.1058173179626465, "learning_rate": 1.3632798655863044e-06, "loss": 0.6682, "step": 14914 }, { "epoch": 0.7665227669853016, "grad_norm": 1.1155399084091187, "learning_rate": 1.3627087624507595e-06, "loss": 0.7422, "step": 14915 }, { "epoch": 0.7665741597286463, "grad_norm": 1.0278761386871338, "learning_rate": 1.362137760089542e-06, "loss": 0.6727, "step": 14916 }, { "epoch": 0.7666255524719909, "grad_norm": 1.0731332302093506, "learning_rate": 1.3615668585184755e-06, "loss": 0.6743, "step": 14917 }, { "epoch": 0.7666769452153356, "grad_norm": 1.1312066316604614, "learning_rate": 1.3609960577533771e-06, "loss": 0.6897, "step": 14918 }, { "epoch": 0.7667283379586802, "grad_norm": 1.134623646736145, "learning_rate": 1.36042535781006e-06, "loss": 0.7043, "step": 14919 }, { "epoch": 0.7667797307020249, "grad_norm": 0.6827690601348877, "learning_rate": 1.3598547587043353e-06, "loss": 0.669, "step": 14920 }, { "epoch": 0.7668311234453695, "grad_norm": 0.7556540966033936, "learning_rate": 1.359284260452015e-06, "loss": 0.6947, "step": 14921 }, { "epoch": 0.7668825161887142, "grad_norm": 1.1143728494644165, "learning_rate": 1.3587138630689034e-06, "loss": 0.695, "step": 14922 }, { "epoch": 0.7669339089320588, "grad_norm": 1.123047113418579, "learning_rate": 1.3581435665708026e-06, "loss": 0.7115, "step": 14923 }, { "epoch": 0.7669853016754035, "grad_norm": 1.070523977279663, "learning_rate": 1.3575733709735173e-06, "loss": 0.6743, "step": 14924 }, { "epoch": 0.7670366944187481, "grad_norm": 1.0614688396453857, "learning_rate": 1.3570032762928393e-06, "loss": 0.7387, "step": 14925 }, { "epoch": 0.7670880871620928, "grad_norm": 1.0980879068374634, "learning_rate": 1.3564332825445686e-06, "loss": 0.7061, "step": 14926 }, { "epoch": 0.7671394799054374, "grad_norm": 1.0785927772521973, "learning_rate": 1.355863389744494e-06, "loss": 0.763, "step": 14927 }, { "epoch": 0.7671908726487819, "grad_norm": 1.0811175107955933, "learning_rate": 1.355293597908409e-06, "loss": 0.6883, "step": 14928 }, { "epoch": 0.7672422653921266, "grad_norm": 1.1359682083129883, "learning_rate": 1.3547239070520952e-06, "loss": 0.6881, "step": 14929 }, { "epoch": 0.7672936581354712, "grad_norm": 1.1414706707000732, "learning_rate": 1.3541543171913402e-06, "loss": 0.7133, "step": 14930 }, { "epoch": 0.7673450508788159, "grad_norm": 1.1339125633239746, "learning_rate": 1.353584828341923e-06, "loss": 0.6387, "step": 14931 }, { "epoch": 0.7673964436221605, "grad_norm": 0.865020215511322, "learning_rate": 1.3530154405196212e-06, "loss": 0.6716, "step": 14932 }, { "epoch": 0.7674478363655052, "grad_norm": 1.1197324991226196, "learning_rate": 1.3524461537402128e-06, "loss": 0.728, "step": 14933 }, { "epoch": 0.7674992291088498, "grad_norm": 1.1189656257629395, "learning_rate": 1.3518769680194689e-06, "loss": 0.7117, "step": 14934 }, { "epoch": 0.7675506218521945, "grad_norm": 1.1070607900619507, "learning_rate": 1.351307883373159e-06, "loss": 0.65, "step": 14935 }, { "epoch": 0.7676020145955391, "grad_norm": 1.1507623195648193, "learning_rate": 1.3507388998170495e-06, "loss": 0.7271, "step": 14936 }, { "epoch": 0.7676534073388838, "grad_norm": 0.7905425429344177, "learning_rate": 1.350170017366908e-06, "loss": 0.5978, "step": 14937 }, { "epoch": 0.7677048000822284, "grad_norm": 1.0898300409317017, "learning_rate": 1.3496012360384909e-06, "loss": 0.647, "step": 14938 }, { "epoch": 0.7677561928255731, "grad_norm": 1.1691676378250122, "learning_rate": 1.3490325558475604e-06, "loss": 0.6905, "step": 14939 }, { "epoch": 0.7678075855689177, "grad_norm": 1.0773496627807617, "learning_rate": 1.3484639768098707e-06, "loss": 0.6641, "step": 14940 }, { "epoch": 0.7678589783122624, "grad_norm": 0.9809443354606628, "learning_rate": 1.3478954989411741e-06, "loss": 0.6975, "step": 14941 }, { "epoch": 0.767910371055607, "grad_norm": 1.1087486743927002, "learning_rate": 1.347327122257223e-06, "loss": 0.6884, "step": 14942 }, { "epoch": 0.7679617637989515, "grad_norm": 1.082742691040039, "learning_rate": 1.346758846773763e-06, "loss": 0.6886, "step": 14943 }, { "epoch": 0.7680131565422962, "grad_norm": 1.0822051763534546, "learning_rate": 1.3461906725065415e-06, "loss": 0.6829, "step": 14944 }, { "epoch": 0.7680645492856408, "grad_norm": 1.1438727378845215, "learning_rate": 1.3456225994712957e-06, "loss": 0.7385, "step": 14945 }, { "epoch": 0.7681159420289855, "grad_norm": 0.7072693109512329, "learning_rate": 1.3450546276837683e-06, "loss": 0.6002, "step": 14946 }, { "epoch": 0.7681673347723301, "grad_norm": 1.0974787473678589, "learning_rate": 1.3444867571596936e-06, "loss": 0.6895, "step": 14947 }, { "epoch": 0.7682187275156748, "grad_norm": 1.055829644203186, "learning_rate": 1.3439189879148063e-06, "loss": 0.6673, "step": 14948 }, { "epoch": 0.7682701202590194, "grad_norm": 1.089518427848816, "learning_rate": 1.343351319964834e-06, "loss": 0.6743, "step": 14949 }, { "epoch": 0.7683215130023641, "grad_norm": 1.088774561882019, "learning_rate": 1.342783753325508e-06, "loss": 0.6744, "step": 14950 }, { "epoch": 0.7683729057457087, "grad_norm": 1.1432021856307983, "learning_rate": 1.3422162880125516e-06, "loss": 0.6767, "step": 14951 }, { "epoch": 0.7684242984890534, "grad_norm": 1.0914415121078491, "learning_rate": 1.3416489240416863e-06, "loss": 0.6912, "step": 14952 }, { "epoch": 0.768475691232398, "grad_norm": 1.0884901285171509, "learning_rate": 1.3410816614286343e-06, "loss": 0.6841, "step": 14953 }, { "epoch": 0.7685270839757427, "grad_norm": 1.0588417053222656, "learning_rate": 1.340514500189108e-06, "loss": 0.6725, "step": 14954 }, { "epoch": 0.7685784767190873, "grad_norm": 0.8193063139915466, "learning_rate": 1.3399474403388236e-06, "loss": 0.6368, "step": 14955 }, { "epoch": 0.768629869462432, "grad_norm": 0.7181390523910522, "learning_rate": 1.339380481893492e-06, "loss": 0.6361, "step": 14956 }, { "epoch": 0.7686812622057766, "grad_norm": 1.122700572013855, "learning_rate": 1.3388136248688204e-06, "loss": 0.6439, "step": 14957 }, { "epoch": 0.7687326549491211, "grad_norm": 1.11155366897583, "learning_rate": 1.3382468692805134e-06, "loss": 0.7646, "step": 14958 }, { "epoch": 0.7687840476924658, "grad_norm": 1.2224559783935547, "learning_rate": 1.3376802151442758e-06, "loss": 0.7152, "step": 14959 }, { "epoch": 0.7688354404358104, "grad_norm": 1.1393802165985107, "learning_rate": 1.3371136624758062e-06, "loss": 0.7458, "step": 14960 }, { "epoch": 0.7688868331791551, "grad_norm": 1.054969072341919, "learning_rate": 1.3365472112907996e-06, "loss": 0.7064, "step": 14961 }, { "epoch": 0.7689382259224997, "grad_norm": 1.2036988735198975, "learning_rate": 1.3359808616049523e-06, "loss": 0.6983, "step": 14962 }, { "epoch": 0.7689896186658444, "grad_norm": 0.832382321357727, "learning_rate": 1.3354146134339556e-06, "loss": 0.6447, "step": 14963 }, { "epoch": 0.769041011409189, "grad_norm": 1.0457384586334229, "learning_rate": 1.3348484667934968e-06, "loss": 0.662, "step": 14964 }, { "epoch": 0.7690924041525337, "grad_norm": 1.051928997039795, "learning_rate": 1.3342824216992606e-06, "loss": 0.6897, "step": 14965 }, { "epoch": 0.7691437968958783, "grad_norm": 1.0934467315673828, "learning_rate": 1.3337164781669338e-06, "loss": 0.7029, "step": 14966 }, { "epoch": 0.769195189639223, "grad_norm": 1.10590398311615, "learning_rate": 1.3331506362121905e-06, "loss": 0.6498, "step": 14967 }, { "epoch": 0.7692465823825676, "grad_norm": 0.7254303693771362, "learning_rate": 1.3325848958507126e-06, "loss": 0.6183, "step": 14968 }, { "epoch": 0.7692979751259122, "grad_norm": 1.1386992931365967, "learning_rate": 1.3320192570981728e-06, "loss": 0.7404, "step": 14969 }, { "epoch": 0.7693493678692569, "grad_norm": 1.0649176836013794, "learning_rate": 1.3314537199702422e-06, "loss": 0.693, "step": 14970 }, { "epoch": 0.7694007606126015, "grad_norm": 1.1090952157974243, "learning_rate": 1.3308882844825882e-06, "loss": 0.6947, "step": 14971 }, { "epoch": 0.7694521533559462, "grad_norm": 1.0703777074813843, "learning_rate": 1.3303229506508796e-06, "loss": 0.7141, "step": 14972 }, { "epoch": 0.7695035460992907, "grad_norm": 1.0385386943817139, "learning_rate": 1.329757718490779e-06, "loss": 0.7045, "step": 14973 }, { "epoch": 0.7695549388426354, "grad_norm": 1.0577046871185303, "learning_rate": 1.3291925880179435e-06, "loss": 0.7178, "step": 14974 }, { "epoch": 0.76960633158598, "grad_norm": 1.0998378992080688, "learning_rate": 1.3286275592480347e-06, "loss": 0.6835, "step": 14975 }, { "epoch": 0.7696577243293247, "grad_norm": 1.1373803615570068, "learning_rate": 1.3280626321967054e-06, "loss": 0.6773, "step": 14976 }, { "epoch": 0.7697091170726693, "grad_norm": 1.0475999116897583, "learning_rate": 1.3274978068796074e-06, "loss": 0.6601, "step": 14977 }, { "epoch": 0.769760509816014, "grad_norm": 1.0746185779571533, "learning_rate": 1.3269330833123883e-06, "loss": 0.7003, "step": 14978 }, { "epoch": 0.7698119025593586, "grad_norm": 1.1823216676712036, "learning_rate": 1.3263684615106986e-06, "loss": 0.7216, "step": 14979 }, { "epoch": 0.7698632953027033, "grad_norm": 1.2271422147750854, "learning_rate": 1.3258039414901753e-06, "loss": 0.7208, "step": 14980 }, { "epoch": 0.7699146880460479, "grad_norm": 0.740078330039978, "learning_rate": 1.3252395232664638e-06, "loss": 0.7025, "step": 14981 }, { "epoch": 0.7699660807893925, "grad_norm": 1.1314533948898315, "learning_rate": 1.3246752068552005e-06, "loss": 0.6747, "step": 14982 }, { "epoch": 0.7700174735327372, "grad_norm": 1.1109609603881836, "learning_rate": 1.3241109922720185e-06, "loss": 0.6648, "step": 14983 }, { "epoch": 0.7700688662760818, "grad_norm": 0.7932596802711487, "learning_rate": 1.323546879532553e-06, "loss": 0.6468, "step": 14984 }, { "epoch": 0.7701202590194265, "grad_norm": 1.5118470191955566, "learning_rate": 1.3229828686524316e-06, "loss": 0.7246, "step": 14985 }, { "epoch": 0.7701716517627711, "grad_norm": 1.0029833316802979, "learning_rate": 1.3224189596472802e-06, "loss": 0.6867, "step": 14986 }, { "epoch": 0.7702230445061158, "grad_norm": 1.1663767099380493, "learning_rate": 1.3218551525327223e-06, "loss": 0.6746, "step": 14987 }, { "epoch": 0.7702744372494604, "grad_norm": 1.1489217281341553, "learning_rate": 1.321291447324382e-06, "loss": 0.73, "step": 14988 }, { "epoch": 0.770325829992805, "grad_norm": 1.1388678550720215, "learning_rate": 1.3207278440378712e-06, "loss": 0.6422, "step": 14989 }, { "epoch": 0.7703772227361496, "grad_norm": 1.083748459815979, "learning_rate": 1.3201643426888105e-06, "loss": 0.7003, "step": 14990 }, { "epoch": 0.7704286154794943, "grad_norm": 1.0556535720825195, "learning_rate": 1.319600943292808e-06, "loss": 0.644, "step": 14991 }, { "epoch": 0.7704800082228389, "grad_norm": 0.81584632396698, "learning_rate": 1.319037645865477e-06, "loss": 0.6414, "step": 14992 }, { "epoch": 0.7705314009661836, "grad_norm": 1.117989420890808, "learning_rate": 1.3184744504224223e-06, "loss": 0.6837, "step": 14993 }, { "epoch": 0.7705827937095282, "grad_norm": 1.0433285236358643, "learning_rate": 1.3179113569792468e-06, "loss": 0.6268, "step": 14994 }, { "epoch": 0.7706341864528728, "grad_norm": 1.0543354749679565, "learning_rate": 1.3173483655515551e-06, "loss": 0.6874, "step": 14995 }, { "epoch": 0.7706855791962175, "grad_norm": 0.692695140838623, "learning_rate": 1.3167854761549398e-06, "loss": 0.6265, "step": 14996 }, { "epoch": 0.7707369719395621, "grad_norm": 1.086273431777954, "learning_rate": 1.3162226888050006e-06, "loss": 0.653, "step": 14997 }, { "epoch": 0.7707883646829068, "grad_norm": 1.1172932386398315, "learning_rate": 1.315660003517329e-06, "loss": 0.719, "step": 14998 }, { "epoch": 0.7708397574262514, "grad_norm": 1.1586240530014038, "learning_rate": 1.315097420307514e-06, "loss": 0.7082, "step": 14999 }, { "epoch": 0.7708911501695961, "grad_norm": 1.1955419778823853, "learning_rate": 1.3145349391911411e-06, "loss": 0.7632, "step": 15000 }, { "epoch": 0.7709425429129407, "grad_norm": 1.1116758584976196, "learning_rate": 1.3139725601837983e-06, "loss": 0.76, "step": 15001 }, { "epoch": 0.7709939356562854, "grad_norm": 1.1061992645263672, "learning_rate": 1.3134102833010636e-06, "loss": 0.7492, "step": 15002 }, { "epoch": 0.77104532839963, "grad_norm": 1.1486345529556274, "learning_rate": 1.3128481085585155e-06, "loss": 0.7697, "step": 15003 }, { "epoch": 0.7710967211429746, "grad_norm": 1.0619031190872192, "learning_rate": 1.3122860359717331e-06, "loss": 0.6692, "step": 15004 }, { "epoch": 0.7711481138863192, "grad_norm": 1.133262276649475, "learning_rate": 1.311724065556283e-06, "loss": 0.7028, "step": 15005 }, { "epoch": 0.7711995066296639, "grad_norm": 1.0883941650390625, "learning_rate": 1.3111621973277406e-06, "loss": 0.633, "step": 15006 }, { "epoch": 0.7712508993730085, "grad_norm": 1.0570839643478394, "learning_rate": 1.310600431301669e-06, "loss": 0.651, "step": 15007 }, { "epoch": 0.7713022921163531, "grad_norm": 1.1378048658370972, "learning_rate": 1.3100387674936371e-06, "loss": 0.7082, "step": 15008 }, { "epoch": 0.7713536848596978, "grad_norm": 1.053115963935852, "learning_rate": 1.3094772059192e-06, "loss": 0.6867, "step": 15009 }, { "epoch": 0.7714050776030424, "grad_norm": 1.1144675016403198, "learning_rate": 1.3089157465939217e-06, "loss": 0.6988, "step": 15010 }, { "epoch": 0.7714564703463871, "grad_norm": 0.7625012993812561, "learning_rate": 1.3083543895333555e-06, "loss": 0.6512, "step": 15011 }, { "epoch": 0.7715078630897317, "grad_norm": 1.072308897972107, "learning_rate": 1.3077931347530537e-06, "loss": 0.6754, "step": 15012 }, { "epoch": 0.7715592558330764, "grad_norm": 1.020975947380066, "learning_rate": 1.307231982268568e-06, "loss": 0.694, "step": 15013 }, { "epoch": 0.771610648576421, "grad_norm": 1.1260422468185425, "learning_rate": 1.3066709320954457e-06, "loss": 0.706, "step": 15014 }, { "epoch": 0.7716620413197657, "grad_norm": 1.1120591163635254, "learning_rate": 1.3061099842492298e-06, "loss": 0.6959, "step": 15015 }, { "epoch": 0.7717134340631103, "grad_norm": 0.7297281622886658, "learning_rate": 1.3055491387454616e-06, "loss": 0.605, "step": 15016 }, { "epoch": 0.771764826806455, "grad_norm": 1.133056402206421, "learning_rate": 1.3049883955996827e-06, "loss": 0.697, "step": 15017 }, { "epoch": 0.7718162195497996, "grad_norm": 1.0469683408737183, "learning_rate": 1.3044277548274248e-06, "loss": 0.7059, "step": 15018 }, { "epoch": 0.7718676122931442, "grad_norm": 1.0993491411209106, "learning_rate": 1.3038672164442245e-06, "loss": 0.6438, "step": 15019 }, { "epoch": 0.7719190050364888, "grad_norm": 1.0702852010726929, "learning_rate": 1.3033067804656103e-06, "loss": 0.6761, "step": 15020 }, { "epoch": 0.7719703977798335, "grad_norm": 1.047142505645752, "learning_rate": 1.30274644690711e-06, "loss": 0.6255, "step": 15021 }, { "epoch": 0.7720217905231781, "grad_norm": 1.1507539749145508, "learning_rate": 1.3021862157842463e-06, "loss": 0.7017, "step": 15022 }, { "epoch": 0.7720731832665227, "grad_norm": 1.118160605430603, "learning_rate": 1.301626087112544e-06, "loss": 0.7276, "step": 15023 }, { "epoch": 0.7721245760098674, "grad_norm": 1.0919917821884155, "learning_rate": 1.3010660609075209e-06, "loss": 0.6976, "step": 15024 }, { "epoch": 0.772175968753212, "grad_norm": 1.0616801977157593, "learning_rate": 1.300506137184691e-06, "loss": 0.6918, "step": 15025 }, { "epoch": 0.7722273614965567, "grad_norm": 1.1338061094284058, "learning_rate": 1.299946315959571e-06, "loss": 0.7087, "step": 15026 }, { "epoch": 0.7722787542399013, "grad_norm": 1.1329405307769775, "learning_rate": 1.2993865972476688e-06, "loss": 0.7991, "step": 15027 }, { "epoch": 0.772330146983246, "grad_norm": 1.0974472761154175, "learning_rate": 1.2988269810644928e-06, "loss": 0.6758, "step": 15028 }, { "epoch": 0.7723815397265906, "grad_norm": 1.0620429515838623, "learning_rate": 1.2982674674255457e-06, "loss": 0.6248, "step": 15029 }, { "epoch": 0.7724329324699353, "grad_norm": 1.0933665037155151, "learning_rate": 1.297708056346334e-06, "loss": 0.7396, "step": 15030 }, { "epoch": 0.7724843252132799, "grad_norm": 1.067371129989624, "learning_rate": 1.2971487478423505e-06, "loss": 0.73, "step": 15031 }, { "epoch": 0.7725357179566246, "grad_norm": 1.1072922945022583, "learning_rate": 1.296589541929096e-06, "loss": 0.7467, "step": 15032 }, { "epoch": 0.7725871106999692, "grad_norm": 1.0448694229125977, "learning_rate": 1.2960304386220623e-06, "loss": 0.6815, "step": 15033 }, { "epoch": 0.7726385034433138, "grad_norm": 0.7934486865997314, "learning_rate": 1.2954714379367377e-06, "loss": 0.6184, "step": 15034 }, { "epoch": 0.7726898961866584, "grad_norm": 1.0337358713150024, "learning_rate": 1.2949125398886142e-06, "loss": 0.6445, "step": 15035 }, { "epoch": 0.772741288930003, "grad_norm": 1.1336174011230469, "learning_rate": 1.294353744493172e-06, "loss": 0.7281, "step": 15036 }, { "epoch": 0.7727926816733477, "grad_norm": 1.1540257930755615, "learning_rate": 1.2937950517658988e-06, "loss": 0.7515, "step": 15037 }, { "epoch": 0.7728440744166923, "grad_norm": 1.073453426361084, "learning_rate": 1.293236461722267e-06, "loss": 0.6647, "step": 15038 }, { "epoch": 0.772895467160037, "grad_norm": 1.1061946153640747, "learning_rate": 1.2926779743777573e-06, "loss": 0.6697, "step": 15039 }, { "epoch": 0.7729468599033816, "grad_norm": 1.3312920331954956, "learning_rate": 1.2921195897478417e-06, "loss": 0.7546, "step": 15040 }, { "epoch": 0.7729982526467263, "grad_norm": 1.1164494752883911, "learning_rate": 1.2915613078479905e-06, "loss": 0.6776, "step": 15041 }, { "epoch": 0.7730496453900709, "grad_norm": 1.0668320655822754, "learning_rate": 1.2910031286936708e-06, "loss": 0.708, "step": 15042 }, { "epoch": 0.7731010381334156, "grad_norm": 1.1103875637054443, "learning_rate": 1.2904450523003493e-06, "loss": 0.6616, "step": 15043 }, { "epoch": 0.7731524308767602, "grad_norm": 1.1029402017593384, "learning_rate": 1.2898870786834872e-06, "loss": 0.6941, "step": 15044 }, { "epoch": 0.7732038236201049, "grad_norm": 1.1395201683044434, "learning_rate": 1.2893292078585418e-06, "loss": 0.6971, "step": 15045 }, { "epoch": 0.7732552163634495, "grad_norm": 1.071264386177063, "learning_rate": 1.288771439840974e-06, "loss": 0.6728, "step": 15046 }, { "epoch": 0.7733066091067942, "grad_norm": 1.0800262689590454, "learning_rate": 1.2882137746462309e-06, "loss": 0.6482, "step": 15047 }, { "epoch": 0.7733580018501388, "grad_norm": 0.7044035196304321, "learning_rate": 1.2876562122897684e-06, "loss": 0.6256, "step": 15048 }, { "epoch": 0.7734093945934833, "grad_norm": 1.07472825050354, "learning_rate": 1.2870987527870326e-06, "loss": 0.665, "step": 15049 }, { "epoch": 0.773460787336828, "grad_norm": 0.7600728869438171, "learning_rate": 1.2865413961534679e-06, "loss": 0.6658, "step": 15050 }, { "epoch": 0.7735121800801726, "grad_norm": 1.0378344058990479, "learning_rate": 1.2859841424045145e-06, "loss": 0.7015, "step": 15051 }, { "epoch": 0.7735635728235173, "grad_norm": 1.1569468975067139, "learning_rate": 1.2854269915556155e-06, "loss": 0.7062, "step": 15052 }, { "epoch": 0.7736149655668619, "grad_norm": 1.0347727537155151, "learning_rate": 1.2848699436222057e-06, "loss": 0.7039, "step": 15053 }, { "epoch": 0.7736663583102066, "grad_norm": 1.0622179508209229, "learning_rate": 1.2843129986197167e-06, "loss": 0.715, "step": 15054 }, { "epoch": 0.7737177510535512, "grad_norm": 1.0950325727462769, "learning_rate": 1.2837561565635826e-06, "loss": 0.6871, "step": 15055 }, { "epoch": 0.7737691437968959, "grad_norm": 0.9359648823738098, "learning_rate": 1.2831994174692287e-06, "loss": 0.6615, "step": 15056 }, { "epoch": 0.7738205365402405, "grad_norm": 1.0624192953109741, "learning_rate": 1.2826427813520808e-06, "loss": 0.7033, "step": 15057 }, { "epoch": 0.7738719292835852, "grad_norm": 0.9694546461105347, "learning_rate": 1.2820862482275597e-06, "loss": 0.6468, "step": 15058 }, { "epoch": 0.7739233220269298, "grad_norm": 1.0054559707641602, "learning_rate": 1.2815298181110885e-06, "loss": 0.6781, "step": 15059 }, { "epoch": 0.7739747147702745, "grad_norm": 1.1061476469039917, "learning_rate": 1.2809734910180776e-06, "loss": 0.681, "step": 15060 }, { "epoch": 0.7740261075136191, "grad_norm": 0.656936764717102, "learning_rate": 1.280417266963946e-06, "loss": 0.6281, "step": 15061 }, { "epoch": 0.7740775002569638, "grad_norm": 1.0671902894973755, "learning_rate": 1.2798611459641014e-06, "loss": 0.6318, "step": 15062 }, { "epoch": 0.7741288930003084, "grad_norm": 1.0009052753448486, "learning_rate": 1.2793051280339507e-06, "loss": 0.6545, "step": 15063 }, { "epoch": 0.774180285743653, "grad_norm": 1.0898479223251343, "learning_rate": 1.2787492131889024e-06, "loss": 0.6765, "step": 15064 }, { "epoch": 0.7742316784869976, "grad_norm": 1.1622505187988281, "learning_rate": 1.2781934014443564e-06, "loss": 0.7107, "step": 15065 }, { "epoch": 0.7742830712303422, "grad_norm": 1.1035284996032715, "learning_rate": 1.2776376928157124e-06, "loss": 0.6869, "step": 15066 }, { "epoch": 0.7743344639736869, "grad_norm": 1.0921597480773926, "learning_rate": 1.2770820873183653e-06, "loss": 0.6996, "step": 15067 }, { "epoch": 0.7743858567170315, "grad_norm": 0.7903449535369873, "learning_rate": 1.2765265849677117e-06, "loss": 0.6826, "step": 15068 }, { "epoch": 0.7744372494603762, "grad_norm": 1.0121012926101685, "learning_rate": 1.2759711857791402e-06, "loss": 0.6794, "step": 15069 }, { "epoch": 0.7744886422037208, "grad_norm": 0.6857595443725586, "learning_rate": 1.2754158897680391e-06, "loss": 0.649, "step": 15070 }, { "epoch": 0.7745400349470655, "grad_norm": 1.1309070587158203, "learning_rate": 1.2748606969497918e-06, "loss": 0.7093, "step": 15071 }, { "epoch": 0.7745914276904101, "grad_norm": 1.2390469312667847, "learning_rate": 1.2743056073397847e-06, "loss": 0.6574, "step": 15072 }, { "epoch": 0.7746428204337548, "grad_norm": 1.2025063037872314, "learning_rate": 1.2737506209533913e-06, "loss": 0.7497, "step": 15073 }, { "epoch": 0.7746942131770994, "grad_norm": 1.122231125831604, "learning_rate": 1.2731957378059923e-06, "loss": 0.6793, "step": 15074 }, { "epoch": 0.7747456059204441, "grad_norm": 1.0672181844711304, "learning_rate": 1.2726409579129596e-06, "loss": 0.689, "step": 15075 }, { "epoch": 0.7747969986637887, "grad_norm": 1.0475307703018188, "learning_rate": 1.2720862812896628e-06, "loss": 0.6629, "step": 15076 }, { "epoch": 0.7748483914071334, "grad_norm": 1.089813232421875, "learning_rate": 1.2715317079514723e-06, "loss": 0.7069, "step": 15077 }, { "epoch": 0.774899784150478, "grad_norm": 1.0784388780593872, "learning_rate": 1.270977237913752e-06, "loss": 0.707, "step": 15078 }, { "epoch": 0.7749511768938226, "grad_norm": 0.7371863722801208, "learning_rate": 1.2704228711918631e-06, "loss": 0.6438, "step": 15079 }, { "epoch": 0.7750025696371672, "grad_norm": 1.062500238418579, "learning_rate": 1.269868607801164e-06, "loss": 0.7122, "step": 15080 }, { "epoch": 0.7750539623805118, "grad_norm": 1.16629159450531, "learning_rate": 1.2693144477570156e-06, "loss": 0.7199, "step": 15081 }, { "epoch": 0.7751053551238565, "grad_norm": 1.1163733005523682, "learning_rate": 1.268760391074766e-06, "loss": 0.6973, "step": 15082 }, { "epoch": 0.7751567478672011, "grad_norm": 1.0068421363830566, "learning_rate": 1.2682064377697684e-06, "loss": 0.6833, "step": 15083 }, { "epoch": 0.7752081406105458, "grad_norm": 1.196900486946106, "learning_rate": 1.2676525878573693e-06, "loss": 0.7338, "step": 15084 }, { "epoch": 0.7752595333538904, "grad_norm": 1.1061978340148926, "learning_rate": 1.2670988413529157e-06, "loss": 0.7436, "step": 15085 }, { "epoch": 0.7753109260972351, "grad_norm": 1.0913710594177246, "learning_rate": 1.2665451982717486e-06, "loss": 0.7292, "step": 15086 }, { "epoch": 0.7753623188405797, "grad_norm": 1.090458631515503, "learning_rate": 1.2659916586292054e-06, "loss": 0.6974, "step": 15087 }, { "epoch": 0.7754137115839244, "grad_norm": 1.120648980140686, "learning_rate": 1.265438222440627e-06, "loss": 0.7051, "step": 15088 }, { "epoch": 0.775465104327269, "grad_norm": 1.091753363609314, "learning_rate": 1.2648848897213412e-06, "loss": 0.6921, "step": 15089 }, { "epoch": 0.7755164970706137, "grad_norm": 1.0596282482147217, "learning_rate": 1.2643316604866822e-06, "loss": 0.6449, "step": 15090 }, { "epoch": 0.7755678898139583, "grad_norm": 1.0974825620651245, "learning_rate": 1.2637785347519771e-06, "loss": 0.6889, "step": 15091 }, { "epoch": 0.775619282557303, "grad_norm": 1.1222844123840332, "learning_rate": 1.26322551253255e-06, "loss": 0.7257, "step": 15092 }, { "epoch": 0.7756706753006476, "grad_norm": 1.111581802368164, "learning_rate": 1.2626725938437217e-06, "loss": 0.7162, "step": 15093 }, { "epoch": 0.7757220680439922, "grad_norm": 1.0950855016708374, "learning_rate": 1.2621197787008144e-06, "loss": 0.6725, "step": 15094 }, { "epoch": 0.7757734607873368, "grad_norm": 1.0455998182296753, "learning_rate": 1.2615670671191426e-06, "loss": 0.6864, "step": 15095 }, { "epoch": 0.7758248535306814, "grad_norm": 1.1095768213272095, "learning_rate": 1.2610144591140188e-06, "loss": 0.6393, "step": 15096 }, { "epoch": 0.7758762462740261, "grad_norm": 1.0568294525146484, "learning_rate": 1.2604619547007568e-06, "loss": 0.6678, "step": 15097 }, { "epoch": 0.7759276390173707, "grad_norm": 1.0725914239883423, "learning_rate": 1.2599095538946593e-06, "loss": 0.6584, "step": 15098 }, { "epoch": 0.7759790317607154, "grad_norm": 1.1389981508255005, "learning_rate": 1.2593572567110345e-06, "loss": 0.7026, "step": 15099 }, { "epoch": 0.77603042450406, "grad_norm": 1.1912168264389038, "learning_rate": 1.2588050631651826e-06, "loss": 0.7231, "step": 15100 }, { "epoch": 0.7760818172474047, "grad_norm": 1.0318366289138794, "learning_rate": 1.2582529732724063e-06, "loss": 0.6063, "step": 15101 }, { "epoch": 0.7761332099907493, "grad_norm": 1.0504744052886963, "learning_rate": 1.2577009870479956e-06, "loss": 0.6213, "step": 15102 }, { "epoch": 0.776184602734094, "grad_norm": 1.0184589624404907, "learning_rate": 1.2571491045072487e-06, "loss": 0.7065, "step": 15103 }, { "epoch": 0.7762359954774386, "grad_norm": 0.8171871900558472, "learning_rate": 1.256597325665454e-06, "loss": 0.6558, "step": 15104 }, { "epoch": 0.7762873882207832, "grad_norm": 1.0988686084747314, "learning_rate": 1.2560456505378976e-06, "loss": 0.717, "step": 15105 }, { "epoch": 0.7763387809641279, "grad_norm": 1.0973354578018188, "learning_rate": 1.255494079139868e-06, "loss": 0.6966, "step": 15106 }, { "epoch": 0.7763901737074725, "grad_norm": 1.0841270685195923, "learning_rate": 1.2549426114866443e-06, "loss": 0.7029, "step": 15107 }, { "epoch": 0.7764415664508172, "grad_norm": 1.041652798652649, "learning_rate": 1.2543912475935062e-06, "loss": 0.6904, "step": 15108 }, { "epoch": 0.7764929591941618, "grad_norm": 1.1016466617584229, "learning_rate": 1.2538399874757279e-06, "loss": 0.642, "step": 15109 }, { "epoch": 0.7765443519375064, "grad_norm": 1.1137545108795166, "learning_rate": 1.253288831148587e-06, "loss": 0.6431, "step": 15110 }, { "epoch": 0.776595744680851, "grad_norm": 1.0999317169189453, "learning_rate": 1.2527377786273477e-06, "loss": 0.6882, "step": 15111 }, { "epoch": 0.7766471374241957, "grad_norm": 1.1158596277236938, "learning_rate": 1.2521868299272815e-06, "loss": 0.7171, "step": 15112 }, { "epoch": 0.7766985301675403, "grad_norm": 1.0803213119506836, "learning_rate": 1.2516359850636528e-06, "loss": 0.6983, "step": 15113 }, { "epoch": 0.776749922910885, "grad_norm": 1.068266749382019, "learning_rate": 1.2510852440517207e-06, "loss": 0.7012, "step": 15114 }, { "epoch": 0.7768013156542296, "grad_norm": 1.1030173301696777, "learning_rate": 1.250534606906747e-06, "loss": 0.6707, "step": 15115 }, { "epoch": 0.7768527083975743, "grad_norm": 1.0002933740615845, "learning_rate": 1.249984073643986e-06, "loss": 0.6658, "step": 15116 }, { "epoch": 0.7769041011409189, "grad_norm": 1.0675413608551025, "learning_rate": 1.2494336442786913e-06, "loss": 0.7577, "step": 15117 }, { "epoch": 0.7769554938842635, "grad_norm": 1.1166417598724365, "learning_rate": 1.2488833188261107e-06, "loss": 0.7587, "step": 15118 }, { "epoch": 0.7770068866276082, "grad_norm": 1.1370668411254883, "learning_rate": 1.248333097301495e-06, "loss": 0.7457, "step": 15119 }, { "epoch": 0.7770582793709528, "grad_norm": 1.0643632411956787, "learning_rate": 1.2477829797200874e-06, "loss": 0.7122, "step": 15120 }, { "epoch": 0.7771096721142975, "grad_norm": 0.7214568853378296, "learning_rate": 1.2472329660971289e-06, "loss": 0.6655, "step": 15121 }, { "epoch": 0.7771610648576421, "grad_norm": 1.1130198240280151, "learning_rate": 1.246683056447856e-06, "loss": 0.6996, "step": 15122 }, { "epoch": 0.7772124576009868, "grad_norm": 0.9522799849510193, "learning_rate": 1.2461332507875085e-06, "loss": 0.6574, "step": 15123 }, { "epoch": 0.7772638503443314, "grad_norm": 1.0614334344863892, "learning_rate": 1.2455835491313173e-06, "loss": 0.7139, "step": 15124 }, { "epoch": 0.777315243087676, "grad_norm": 1.0643733739852905, "learning_rate": 1.2450339514945126e-06, "loss": 0.6592, "step": 15125 }, { "epoch": 0.7773666358310206, "grad_norm": 1.0488003492355347, "learning_rate": 1.244484457892321e-06, "loss": 0.6923, "step": 15126 }, { "epoch": 0.7774180285743653, "grad_norm": 0.7421144843101501, "learning_rate": 1.243935068339966e-06, "loss": 0.6331, "step": 15127 }, { "epoch": 0.7774694213177099, "grad_norm": 1.0389087200164795, "learning_rate": 1.2433857828526707e-06, "loss": 0.6883, "step": 15128 }, { "epoch": 0.7775208140610546, "grad_norm": 0.9748851656913757, "learning_rate": 1.2428366014456527e-06, "loss": 0.6395, "step": 15129 }, { "epoch": 0.7775722068043992, "grad_norm": 1.0269935131072998, "learning_rate": 1.242287524134128e-06, "loss": 0.6585, "step": 15130 }, { "epoch": 0.7776235995477438, "grad_norm": 1.0559674501419067, "learning_rate": 1.2417385509333075e-06, "loss": 0.7055, "step": 15131 }, { "epoch": 0.7776749922910885, "grad_norm": 0.7085198163986206, "learning_rate": 1.2411896818584035e-06, "loss": 0.6683, "step": 15132 }, { "epoch": 0.7777263850344331, "grad_norm": 0.6997009515762329, "learning_rate": 1.2406409169246214e-06, "loss": 0.6628, "step": 15133 }, { "epoch": 0.7777777777777778, "grad_norm": 1.1082754135131836, "learning_rate": 1.2400922561471663e-06, "loss": 0.6936, "step": 15134 }, { "epoch": 0.7778291705211224, "grad_norm": 0.7113308906555176, "learning_rate": 1.2395436995412362e-06, "loss": 0.622, "step": 15135 }, { "epoch": 0.7778805632644671, "grad_norm": 1.0882643461227417, "learning_rate": 1.238995247122034e-06, "loss": 0.6737, "step": 15136 }, { "epoch": 0.7779319560078117, "grad_norm": 0.8400201201438904, "learning_rate": 1.238446898904752e-06, "loss": 0.6753, "step": 15137 }, { "epoch": 0.7779833487511564, "grad_norm": 1.024397373199463, "learning_rate": 1.2378986549045823e-06, "loss": 0.6465, "step": 15138 }, { "epoch": 0.778034741494501, "grad_norm": 1.0820677280426025, "learning_rate": 1.2373505151367187e-06, "loss": 0.6479, "step": 15139 }, { "epoch": 0.7780861342378456, "grad_norm": 1.1748380661010742, "learning_rate": 1.236802479616342e-06, "loss": 0.6763, "step": 15140 }, { "epoch": 0.7781375269811902, "grad_norm": 0.7659570574760437, "learning_rate": 1.2362545483586397e-06, "loss": 0.6296, "step": 15141 }, { "epoch": 0.7781889197245349, "grad_norm": 1.075256109237671, "learning_rate": 1.235706721378792e-06, "loss": 0.6817, "step": 15142 }, { "epoch": 0.7782403124678795, "grad_norm": 1.0412192344665527, "learning_rate": 1.2351589986919772e-06, "loss": 0.7378, "step": 15143 }, { "epoch": 0.7782917052112241, "grad_norm": 0.7334829568862915, "learning_rate": 1.234611380313368e-06, "loss": 0.624, "step": 15144 }, { "epoch": 0.7783430979545688, "grad_norm": 1.034638524055481, "learning_rate": 1.23406386625814e-06, "loss": 0.656, "step": 15145 }, { "epoch": 0.7783944906979134, "grad_norm": 1.365031361579895, "learning_rate": 1.2335164565414615e-06, "loss": 0.6989, "step": 15146 }, { "epoch": 0.7784458834412581, "grad_norm": 0.7359990477561951, "learning_rate": 1.2329691511784968e-06, "loss": 0.5912, "step": 15147 }, { "epoch": 0.7784972761846027, "grad_norm": 1.104994297027588, "learning_rate": 1.232421950184413e-06, "loss": 0.7589, "step": 15148 }, { "epoch": 0.7785486689279474, "grad_norm": 1.056930661201477, "learning_rate": 1.2318748535743692e-06, "loss": 0.7097, "step": 15149 }, { "epoch": 0.778600061671292, "grad_norm": 0.7590477466583252, "learning_rate": 1.2313278613635227e-06, "loss": 0.6598, "step": 15150 }, { "epoch": 0.7786514544146367, "grad_norm": 1.1030726432800293, "learning_rate": 1.2307809735670278e-06, "loss": 0.6233, "step": 15151 }, { "epoch": 0.7787028471579813, "grad_norm": 1.1402710676193237, "learning_rate": 1.2302341902000404e-06, "loss": 0.664, "step": 15152 }, { "epoch": 0.778754239901326, "grad_norm": 1.0985804796218872, "learning_rate": 1.229687511277703e-06, "loss": 0.6254, "step": 15153 }, { "epoch": 0.7788056326446706, "grad_norm": 1.1713682413101196, "learning_rate": 1.2291409368151674e-06, "loss": 0.6847, "step": 15154 }, { "epoch": 0.7788570253880153, "grad_norm": 1.0770275592803955, "learning_rate": 1.2285944668275751e-06, "loss": 0.7433, "step": 15155 }, { "epoch": 0.7789084181313598, "grad_norm": 1.106186032295227, "learning_rate": 1.2280481013300655e-06, "loss": 0.7381, "step": 15156 }, { "epoch": 0.7789598108747045, "grad_norm": 1.0342353582382202, "learning_rate": 1.2275018403377775e-06, "loss": 0.6545, "step": 15157 }, { "epoch": 0.7790112036180491, "grad_norm": 0.7113685011863708, "learning_rate": 1.226955683865846e-06, "loss": 0.6543, "step": 15158 }, { "epoch": 0.7790625963613937, "grad_norm": 1.1921164989471436, "learning_rate": 1.2264096319294022e-06, "loss": 0.669, "step": 15159 }, { "epoch": 0.7791139891047384, "grad_norm": 1.063372015953064, "learning_rate": 1.2258636845435728e-06, "loss": 0.7078, "step": 15160 }, { "epoch": 0.779165381848083, "grad_norm": 1.0098252296447754, "learning_rate": 1.2253178417234872e-06, "loss": 0.6882, "step": 15161 }, { "epoch": 0.7792167745914277, "grad_norm": 0.8311984539031982, "learning_rate": 1.2247721034842674e-06, "loss": 0.6027, "step": 15162 }, { "epoch": 0.7792681673347723, "grad_norm": 1.0969524383544922, "learning_rate": 1.2242264698410333e-06, "loss": 0.6666, "step": 15163 }, { "epoch": 0.779319560078117, "grad_norm": 1.117874264717102, "learning_rate": 1.2236809408089e-06, "loss": 0.6585, "step": 15164 }, { "epoch": 0.7793709528214616, "grad_norm": 1.074995994567871, "learning_rate": 1.2231355164029862e-06, "loss": 0.6696, "step": 15165 }, { "epoch": 0.7794223455648063, "grad_norm": 1.0222442150115967, "learning_rate": 1.2225901966384002e-06, "loss": 0.7345, "step": 15166 }, { "epoch": 0.7794737383081509, "grad_norm": 1.0814427137374878, "learning_rate": 1.222044981530252e-06, "loss": 0.6676, "step": 15167 }, { "epoch": 0.7795251310514956, "grad_norm": 1.0580130815505981, "learning_rate": 1.221499871093646e-06, "loss": 0.6562, "step": 15168 }, { "epoch": 0.7795765237948402, "grad_norm": 1.1052751541137695, "learning_rate": 1.2209548653436848e-06, "loss": 0.7139, "step": 15169 }, { "epoch": 0.7796279165381849, "grad_norm": 1.0552520751953125, "learning_rate": 1.2204099642954702e-06, "loss": 0.6606, "step": 15170 }, { "epoch": 0.7796793092815294, "grad_norm": 1.0600188970565796, "learning_rate": 1.2198651679640983e-06, "loss": 0.6948, "step": 15171 }, { "epoch": 0.779730702024874, "grad_norm": 0.8103666305541992, "learning_rate": 1.2193204763646626e-06, "loss": 0.6701, "step": 15172 }, { "epoch": 0.7797820947682187, "grad_norm": 1.1309090852737427, "learning_rate": 1.218775889512253e-06, "loss": 0.6662, "step": 15173 }, { "epoch": 0.7798334875115633, "grad_norm": 1.0502512454986572, "learning_rate": 1.2182314074219615e-06, "loss": 0.6774, "step": 15174 }, { "epoch": 0.779884880254908, "grad_norm": 0.7737354636192322, "learning_rate": 1.2176870301088706e-06, "loss": 0.6206, "step": 15175 }, { "epoch": 0.7799362729982526, "grad_norm": 1.1204779148101807, "learning_rate": 1.2171427575880634e-06, "loss": 0.7456, "step": 15176 }, { "epoch": 0.7799876657415973, "grad_norm": 1.0258342027664185, "learning_rate": 1.216598589874618e-06, "loss": 0.6855, "step": 15177 }, { "epoch": 0.7800390584849419, "grad_norm": 1.162435531616211, "learning_rate": 1.2160545269836144e-06, "loss": 0.7076, "step": 15178 }, { "epoch": 0.7800904512282866, "grad_norm": 0.7873539328575134, "learning_rate": 1.215510568930124e-06, "loss": 0.6741, "step": 15179 }, { "epoch": 0.7801418439716312, "grad_norm": 1.03671133518219, "learning_rate": 1.2149667157292167e-06, "loss": 0.6566, "step": 15180 }, { "epoch": 0.7801932367149759, "grad_norm": 1.0335502624511719, "learning_rate": 1.2144229673959652e-06, "loss": 0.6418, "step": 15181 }, { "epoch": 0.7802446294583205, "grad_norm": 1.0655313730239868, "learning_rate": 1.2138793239454277e-06, "loss": 0.7284, "step": 15182 }, { "epoch": 0.7802960222016652, "grad_norm": 1.1996794939041138, "learning_rate": 1.213335785392671e-06, "loss": 0.7062, "step": 15183 }, { "epoch": 0.7803474149450098, "grad_norm": 0.7722945809364319, "learning_rate": 1.2127923517527535e-06, "loss": 0.6413, "step": 15184 }, { "epoch": 0.7803988076883545, "grad_norm": 1.2419521808624268, "learning_rate": 1.2122490230407307e-06, "loss": 0.7201, "step": 15185 }, { "epoch": 0.780450200431699, "grad_norm": 1.0601229667663574, "learning_rate": 1.2117057992716553e-06, "loss": 0.6556, "step": 15186 }, { "epoch": 0.7805015931750436, "grad_norm": 0.6914064884185791, "learning_rate": 1.2111626804605798e-06, "loss": 0.6338, "step": 15187 }, { "epoch": 0.7805529859183883, "grad_norm": 1.099282145500183, "learning_rate": 1.210619666622551e-06, "loss": 0.6962, "step": 15188 }, { "epoch": 0.7806043786617329, "grad_norm": 1.1018799543380737, "learning_rate": 1.210076757772612e-06, "loss": 0.7409, "step": 15189 }, { "epoch": 0.7806557714050776, "grad_norm": 1.0707240104675293, "learning_rate": 1.2095339539258088e-06, "loss": 0.6935, "step": 15190 }, { "epoch": 0.7807071641484222, "grad_norm": 1.0771310329437256, "learning_rate": 1.208991255097175e-06, "loss": 0.6624, "step": 15191 }, { "epoch": 0.7807585568917669, "grad_norm": 1.1325247287750244, "learning_rate": 1.20844866130175e-06, "loss": 0.7262, "step": 15192 }, { "epoch": 0.7808099496351115, "grad_norm": 1.0927236080169678, "learning_rate": 1.2079061725545638e-06, "loss": 0.7139, "step": 15193 }, { "epoch": 0.7808613423784562, "grad_norm": 1.1666457653045654, "learning_rate": 1.2073637888706518e-06, "loss": 0.6611, "step": 15194 }, { "epoch": 0.7809127351218008, "grad_norm": 1.1042253971099854, "learning_rate": 1.2068215102650355e-06, "loss": 0.6723, "step": 15195 }, { "epoch": 0.7809641278651455, "grad_norm": 0.7333838939666748, "learning_rate": 1.2062793367527425e-06, "loss": 0.6692, "step": 15196 }, { "epoch": 0.7810155206084901, "grad_norm": 0.9074286818504333, "learning_rate": 1.2057372683487938e-06, "loss": 0.6638, "step": 15197 }, { "epoch": 0.7810669133518348, "grad_norm": 1.0616259574890137, "learning_rate": 1.2051953050682058e-06, "loss": 0.7319, "step": 15198 }, { "epoch": 0.7811183060951794, "grad_norm": 1.1737288236618042, "learning_rate": 1.204653446925997e-06, "loss": 0.7286, "step": 15199 }, { "epoch": 0.781169698838524, "grad_norm": 1.0466140508651733, "learning_rate": 1.2041116939371782e-06, "loss": 0.6698, "step": 15200 }, { "epoch": 0.7812210915818686, "grad_norm": 1.0597411394119263, "learning_rate": 1.2035700461167605e-06, "loss": 0.6609, "step": 15201 }, { "epoch": 0.7812724843252132, "grad_norm": 1.098227858543396, "learning_rate": 1.203028503479748e-06, "loss": 0.7091, "step": 15202 }, { "epoch": 0.7813238770685579, "grad_norm": 1.0967403650283813, "learning_rate": 1.2024870660411497e-06, "loss": 0.7121, "step": 15203 }, { "epoch": 0.7813752698119025, "grad_norm": 0.6840171217918396, "learning_rate": 1.20194573381596e-06, "loss": 0.6192, "step": 15204 }, { "epoch": 0.7814266625552472, "grad_norm": 1.0861412286758423, "learning_rate": 1.2014045068191822e-06, "loss": 0.7349, "step": 15205 }, { "epoch": 0.7814780552985918, "grad_norm": 1.1589401960372925, "learning_rate": 1.2008633850658097e-06, "loss": 0.6742, "step": 15206 }, { "epoch": 0.7815294480419365, "grad_norm": 1.1869323253631592, "learning_rate": 1.200322368570833e-06, "loss": 0.7438, "step": 15207 }, { "epoch": 0.7815808407852811, "grad_norm": 1.146286129951477, "learning_rate": 1.1997814573492445e-06, "loss": 0.6719, "step": 15208 }, { "epoch": 0.7816322335286258, "grad_norm": 1.1644409894943237, "learning_rate": 1.1992406514160281e-06, "loss": 0.711, "step": 15209 }, { "epoch": 0.7816836262719704, "grad_norm": 1.0913883447647095, "learning_rate": 1.1986999507861714e-06, "loss": 0.6735, "step": 15210 }, { "epoch": 0.7817350190153151, "grad_norm": 1.1090627908706665, "learning_rate": 1.1981593554746485e-06, "loss": 0.6973, "step": 15211 }, { "epoch": 0.7817864117586597, "grad_norm": 0.744002103805542, "learning_rate": 1.1976188654964427e-06, "loss": 0.6588, "step": 15212 }, { "epoch": 0.7818378045020044, "grad_norm": 1.063124418258667, "learning_rate": 1.1970784808665264e-06, "loss": 0.7322, "step": 15213 }, { "epoch": 0.781889197245349, "grad_norm": 0.7242627143859863, "learning_rate": 1.1965382015998716e-06, "loss": 0.6701, "step": 15214 }, { "epoch": 0.7819405899886936, "grad_norm": 1.0454884767532349, "learning_rate": 1.1959980277114458e-06, "loss": 0.6623, "step": 15215 }, { "epoch": 0.7819919827320382, "grad_norm": 1.0639580488204956, "learning_rate": 1.195457959216218e-06, "loss": 0.7034, "step": 15216 }, { "epoch": 0.7820433754753828, "grad_norm": 1.209542989730835, "learning_rate": 1.1949179961291497e-06, "loss": 0.7425, "step": 15217 }, { "epoch": 0.7820947682187275, "grad_norm": 1.1384985446929932, "learning_rate": 1.194378138465201e-06, "loss": 0.6406, "step": 15218 }, { "epoch": 0.7821461609620721, "grad_norm": 1.1262634992599487, "learning_rate": 1.1938383862393294e-06, "loss": 0.6952, "step": 15219 }, { "epoch": 0.7821975537054168, "grad_norm": 1.0201456546783447, "learning_rate": 1.1932987394664874e-06, "loss": 0.6561, "step": 15220 }, { "epoch": 0.7822489464487614, "grad_norm": 1.0551997423171997, "learning_rate": 1.1927591981616294e-06, "loss": 0.7186, "step": 15221 }, { "epoch": 0.7823003391921061, "grad_norm": 1.1329519748687744, "learning_rate": 1.1922197623397025e-06, "loss": 0.7296, "step": 15222 }, { "epoch": 0.7823517319354507, "grad_norm": 1.069242000579834, "learning_rate": 1.1916804320156522e-06, "loss": 0.7311, "step": 15223 }, { "epoch": 0.7824031246787954, "grad_norm": 1.2301722764968872, "learning_rate": 1.1911412072044198e-06, "loss": 0.6621, "step": 15224 }, { "epoch": 0.78245451742214, "grad_norm": 1.0648915767669678, "learning_rate": 1.190602087920948e-06, "loss": 0.7255, "step": 15225 }, { "epoch": 0.7825059101654847, "grad_norm": 1.0701582431793213, "learning_rate": 1.1900630741801717e-06, "loss": 0.7313, "step": 15226 }, { "epoch": 0.7825573029088293, "grad_norm": 1.0615111589431763, "learning_rate": 1.1895241659970248e-06, "loss": 0.6473, "step": 15227 }, { "epoch": 0.782608695652174, "grad_norm": 1.0918419361114502, "learning_rate": 1.1889853633864367e-06, "loss": 0.6589, "step": 15228 }, { "epoch": 0.7826600883955186, "grad_norm": 1.0165928602218628, "learning_rate": 1.1884466663633388e-06, "loss": 0.6802, "step": 15229 }, { "epoch": 0.7827114811388632, "grad_norm": 1.063353419303894, "learning_rate": 1.1879080749426542e-06, "loss": 0.7442, "step": 15230 }, { "epoch": 0.7827628738822078, "grad_norm": 1.0520106554031372, "learning_rate": 1.1873695891393045e-06, "loss": 0.6641, "step": 15231 }, { "epoch": 0.7828142666255524, "grad_norm": 1.113895297050476, "learning_rate": 1.1868312089682115e-06, "loss": 0.7177, "step": 15232 }, { "epoch": 0.7828656593688971, "grad_norm": 1.0736457109451294, "learning_rate": 1.1862929344442876e-06, "loss": 0.6614, "step": 15233 }, { "epoch": 0.7829170521122417, "grad_norm": 1.0752116441726685, "learning_rate": 1.1857547655824498e-06, "loss": 0.6932, "step": 15234 }, { "epoch": 0.7829684448555864, "grad_norm": 1.212699294090271, "learning_rate": 1.1852167023976063e-06, "loss": 0.7333, "step": 15235 }, { "epoch": 0.783019837598931, "grad_norm": 1.0576047897338867, "learning_rate": 1.1846787449046653e-06, "loss": 0.6776, "step": 15236 }, { "epoch": 0.7830712303422757, "grad_norm": 0.7595146298408508, "learning_rate": 1.1841408931185304e-06, "loss": 0.6211, "step": 15237 }, { "epoch": 0.7831226230856203, "grad_norm": 1.0573607683181763, "learning_rate": 1.183603147054106e-06, "loss": 0.6799, "step": 15238 }, { "epoch": 0.783174015828965, "grad_norm": 0.7038010954856873, "learning_rate": 1.1830655067262886e-06, "loss": 0.6533, "step": 15239 }, { "epoch": 0.7832254085723096, "grad_norm": 1.1273553371429443, "learning_rate": 1.1825279721499726e-06, "loss": 0.7197, "step": 15240 }, { "epoch": 0.7832768013156542, "grad_norm": 1.1088274717330933, "learning_rate": 1.1819905433400547e-06, "loss": 0.6839, "step": 15241 }, { "epoch": 0.7833281940589989, "grad_norm": 1.1247124671936035, "learning_rate": 1.1814532203114226e-06, "loss": 0.7254, "step": 15242 }, { "epoch": 0.7833795868023435, "grad_norm": 1.101311445236206, "learning_rate": 1.1809160030789641e-06, "loss": 0.6767, "step": 15243 }, { "epoch": 0.7834309795456882, "grad_norm": 1.079833984375, "learning_rate": 1.1803788916575603e-06, "loss": 0.7373, "step": 15244 }, { "epoch": 0.7834823722890328, "grad_norm": 1.0802195072174072, "learning_rate": 1.1798418860620985e-06, "loss": 0.7036, "step": 15245 }, { "epoch": 0.7835337650323775, "grad_norm": 1.0954346656799316, "learning_rate": 1.1793049863074503e-06, "loss": 0.7337, "step": 15246 }, { "epoch": 0.783585157775722, "grad_norm": 1.1220605373382568, "learning_rate": 1.1787681924084948e-06, "loss": 0.76, "step": 15247 }, { "epoch": 0.7836365505190667, "grad_norm": 1.0888350009918213, "learning_rate": 1.1782315043801034e-06, "loss": 0.685, "step": 15248 }, { "epoch": 0.7836879432624113, "grad_norm": 1.0027142763137817, "learning_rate": 1.1776949222371442e-06, "loss": 0.617, "step": 15249 }, { "epoch": 0.783739336005756, "grad_norm": 1.1378785371780396, "learning_rate": 1.1771584459944862e-06, "loss": 0.7059, "step": 15250 }, { "epoch": 0.7837907287491006, "grad_norm": 1.0144895315170288, "learning_rate": 1.176622075666992e-06, "loss": 0.6381, "step": 15251 }, { "epoch": 0.7838421214924453, "grad_norm": 1.144880771636963, "learning_rate": 1.1760858112695222e-06, "loss": 0.6633, "step": 15252 }, { "epoch": 0.7838935142357899, "grad_norm": 1.119289755821228, "learning_rate": 1.1755496528169325e-06, "loss": 0.7189, "step": 15253 }, { "epoch": 0.7839449069791345, "grad_norm": 1.0901485681533813, "learning_rate": 1.175013600324082e-06, "loss": 0.7165, "step": 15254 }, { "epoch": 0.7839962997224792, "grad_norm": 1.0795358419418335, "learning_rate": 1.1744776538058173e-06, "loss": 0.6789, "step": 15255 }, { "epoch": 0.7840476924658238, "grad_norm": 1.0920484066009521, "learning_rate": 1.1739418132769915e-06, "loss": 0.6735, "step": 15256 }, { "epoch": 0.7840990852091685, "grad_norm": 1.137935996055603, "learning_rate": 1.173406078752447e-06, "loss": 0.72, "step": 15257 }, { "epoch": 0.7841504779525131, "grad_norm": 1.0923997163772583, "learning_rate": 1.1728704502470302e-06, "loss": 0.7203, "step": 15258 }, { "epoch": 0.7842018706958578, "grad_norm": 1.0696948766708374, "learning_rate": 1.172334927775579e-06, "loss": 0.7199, "step": 15259 }, { "epoch": 0.7842532634392024, "grad_norm": 1.1216126680374146, "learning_rate": 1.1717995113529306e-06, "loss": 0.6549, "step": 15260 }, { "epoch": 0.7843046561825471, "grad_norm": 0.7955917716026306, "learning_rate": 1.1712642009939219e-06, "loss": 0.6891, "step": 15261 }, { "epoch": 0.7843560489258916, "grad_norm": 1.0609737634658813, "learning_rate": 1.1707289967133794e-06, "loss": 0.7095, "step": 15262 }, { "epoch": 0.7844074416692363, "grad_norm": 1.0039712190628052, "learning_rate": 1.1701938985261353e-06, "loss": 0.6901, "step": 15263 }, { "epoch": 0.7844588344125809, "grad_norm": 1.0993261337280273, "learning_rate": 1.1696589064470138e-06, "loss": 0.6642, "step": 15264 }, { "epoch": 0.7845102271559256, "grad_norm": 1.063266396522522, "learning_rate": 1.1691240204908366e-06, "loss": 0.6465, "step": 15265 }, { "epoch": 0.7845616198992702, "grad_norm": 1.0830645561218262, "learning_rate": 1.1685892406724225e-06, "loss": 0.7528, "step": 15266 }, { "epoch": 0.7846130126426148, "grad_norm": 1.0366984605789185, "learning_rate": 1.1680545670065907e-06, "loss": 0.6849, "step": 15267 }, { "epoch": 0.7846644053859595, "grad_norm": 1.0818408727645874, "learning_rate": 1.1675199995081538e-06, "loss": 0.665, "step": 15268 }, { "epoch": 0.7847157981293041, "grad_norm": 1.0609105825424194, "learning_rate": 1.1669855381919214e-06, "loss": 0.6838, "step": 15269 }, { "epoch": 0.7847671908726488, "grad_norm": 1.129634976387024, "learning_rate": 1.1664511830727004e-06, "loss": 0.7014, "step": 15270 }, { "epoch": 0.7848185836159934, "grad_norm": 1.104885458946228, "learning_rate": 1.1659169341652986e-06, "loss": 0.6391, "step": 15271 }, { "epoch": 0.7848699763593381, "grad_norm": 1.1087974309921265, "learning_rate": 1.1653827914845162e-06, "loss": 0.6553, "step": 15272 }, { "epoch": 0.7849213691026827, "grad_norm": 1.07155442237854, "learning_rate": 1.1648487550451509e-06, "loss": 0.7328, "step": 15273 }, { "epoch": 0.7849727618460274, "grad_norm": 1.0907989740371704, "learning_rate": 1.1643148248620024e-06, "loss": 0.7626, "step": 15274 }, { "epoch": 0.785024154589372, "grad_norm": 1.080358862876892, "learning_rate": 1.1637810009498584e-06, "loss": 0.6801, "step": 15275 }, { "epoch": 0.7850755473327167, "grad_norm": 1.0830459594726562, "learning_rate": 1.163247283323513e-06, "loss": 0.7129, "step": 15276 }, { "epoch": 0.7851269400760612, "grad_norm": 1.1767476797103882, "learning_rate": 1.1627136719977521e-06, "loss": 0.7469, "step": 15277 }, { "epoch": 0.7851783328194059, "grad_norm": 1.0379974842071533, "learning_rate": 1.16218016698736e-06, "loss": 0.7045, "step": 15278 }, { "epoch": 0.7852297255627505, "grad_norm": 0.7395645380020142, "learning_rate": 1.1616467683071164e-06, "loss": 0.641, "step": 15279 }, { "epoch": 0.7852811183060951, "grad_norm": 0.718439519405365, "learning_rate": 1.1611134759718024e-06, "loss": 0.6423, "step": 15280 }, { "epoch": 0.7853325110494398, "grad_norm": 1.0622670650482178, "learning_rate": 1.1605802899961916e-06, "loss": 0.7016, "step": 15281 }, { "epoch": 0.7853839037927844, "grad_norm": 1.12211275100708, "learning_rate": 1.160047210395055e-06, "loss": 0.6991, "step": 15282 }, { "epoch": 0.7854352965361291, "grad_norm": 1.0998120307922363, "learning_rate": 1.1595142371831664e-06, "loss": 0.6689, "step": 15283 }, { "epoch": 0.7854866892794737, "grad_norm": 1.0639524459838867, "learning_rate": 1.1589813703752873e-06, "loss": 0.7095, "step": 15284 }, { "epoch": 0.7855380820228184, "grad_norm": 1.0444622039794922, "learning_rate": 1.1584486099861846e-06, "loss": 0.7119, "step": 15285 }, { "epoch": 0.785589474766163, "grad_norm": 1.0309337377548218, "learning_rate": 1.1579159560306162e-06, "loss": 0.6581, "step": 15286 }, { "epoch": 0.7856408675095077, "grad_norm": 1.1340227127075195, "learning_rate": 1.1573834085233443e-06, "loss": 0.712, "step": 15287 }, { "epoch": 0.7856922602528523, "grad_norm": 1.0500249862670898, "learning_rate": 1.1568509674791178e-06, "loss": 0.6363, "step": 15288 }, { "epoch": 0.785743652996197, "grad_norm": 1.0418407917022705, "learning_rate": 1.1563186329126925e-06, "loss": 0.6394, "step": 15289 }, { "epoch": 0.7857950457395416, "grad_norm": 1.14478600025177, "learning_rate": 1.1557864048388161e-06, "loss": 0.7266, "step": 15290 }, { "epoch": 0.7858464384828863, "grad_norm": 1.0929548740386963, "learning_rate": 1.155254283272233e-06, "loss": 0.6493, "step": 15291 }, { "epoch": 0.7858978312262308, "grad_norm": 0.7912446856498718, "learning_rate": 1.1547222682276882e-06, "loss": 0.6176, "step": 15292 }, { "epoch": 0.7859492239695755, "grad_norm": 1.0888994932174683, "learning_rate": 1.1541903597199216e-06, "loss": 0.7002, "step": 15293 }, { "epoch": 0.7860006167129201, "grad_norm": 1.1142997741699219, "learning_rate": 1.1536585577636688e-06, "loss": 0.6936, "step": 15294 }, { "epoch": 0.7860520094562647, "grad_norm": 1.058814287185669, "learning_rate": 1.1531268623736636e-06, "loss": 0.6909, "step": 15295 }, { "epoch": 0.7861034021996094, "grad_norm": 1.095106840133667, "learning_rate": 1.15259527356464e-06, "loss": 0.7284, "step": 15296 }, { "epoch": 0.786154794942954, "grad_norm": 0.8747300505638123, "learning_rate": 1.1520637913513222e-06, "loss": 0.6686, "step": 15297 }, { "epoch": 0.7862061876862987, "grad_norm": 1.1078985929489136, "learning_rate": 1.1515324157484382e-06, "loss": 0.7278, "step": 15298 }, { "epoch": 0.7862575804296433, "grad_norm": 1.117053747177124, "learning_rate": 1.151001146770709e-06, "loss": 0.6755, "step": 15299 }, { "epoch": 0.786308973172988, "grad_norm": 1.0862746238708496, "learning_rate": 1.1504699844328527e-06, "loss": 0.6879, "step": 15300 }, { "epoch": 0.7863603659163326, "grad_norm": 1.131516456604004, "learning_rate": 1.1499389287495888e-06, "loss": 0.7534, "step": 15301 }, { "epoch": 0.7864117586596773, "grad_norm": 1.0414159297943115, "learning_rate": 1.1494079797356271e-06, "loss": 0.6801, "step": 15302 }, { "epoch": 0.7864631514030219, "grad_norm": 1.0961095094680786, "learning_rate": 1.1488771374056834e-06, "loss": 0.6936, "step": 15303 }, { "epoch": 0.7865145441463666, "grad_norm": 1.0260212421417236, "learning_rate": 1.1483464017744583e-06, "loss": 0.6569, "step": 15304 }, { "epoch": 0.7865659368897112, "grad_norm": 1.1671698093414307, "learning_rate": 1.1478157728566608e-06, "loss": 0.6708, "step": 15305 }, { "epoch": 0.7866173296330559, "grad_norm": 1.0307930707931519, "learning_rate": 1.1472852506669917e-06, "loss": 0.6759, "step": 15306 }, { "epoch": 0.7866687223764004, "grad_norm": 0.755401611328125, "learning_rate": 1.146754835220149e-06, "loss": 0.6727, "step": 15307 }, { "epoch": 0.786720115119745, "grad_norm": 1.0879428386688232, "learning_rate": 1.1462245265308264e-06, "loss": 0.7193, "step": 15308 }, { "epoch": 0.7867715078630897, "grad_norm": 0.7504734992980957, "learning_rate": 1.145694324613721e-06, "loss": 0.6725, "step": 15309 }, { "epoch": 0.7868229006064343, "grad_norm": 1.0496242046356201, "learning_rate": 1.1451642294835192e-06, "loss": 0.6197, "step": 15310 }, { "epoch": 0.786874293349779, "grad_norm": 0.7963430285453796, "learning_rate": 1.1446342411549071e-06, "loss": 0.5974, "step": 15311 }, { "epoch": 0.7869256860931236, "grad_norm": 1.0633864402770996, "learning_rate": 1.1441043596425738e-06, "loss": 0.7066, "step": 15312 }, { "epoch": 0.7869770788364683, "grad_norm": 0.7577975988388062, "learning_rate": 1.143574584961193e-06, "loss": 0.6446, "step": 15313 }, { "epoch": 0.7870284715798129, "grad_norm": 0.7951050400733948, "learning_rate": 1.1430449171254472e-06, "loss": 0.6478, "step": 15314 }, { "epoch": 0.7870798643231576, "grad_norm": 1.067762851715088, "learning_rate": 1.1425153561500103e-06, "loss": 0.6979, "step": 15315 }, { "epoch": 0.7871312570665022, "grad_norm": 1.1863747835159302, "learning_rate": 1.141985902049554e-06, "loss": 0.6375, "step": 15316 }, { "epoch": 0.7871826498098469, "grad_norm": 1.1192349195480347, "learning_rate": 1.141456554838745e-06, "loss": 0.6847, "step": 15317 }, { "epoch": 0.7872340425531915, "grad_norm": 1.0687884092330933, "learning_rate": 1.140927314532254e-06, "loss": 0.6998, "step": 15318 }, { "epoch": 0.7872854352965362, "grad_norm": 0.7029162049293518, "learning_rate": 1.1403981811447412e-06, "loss": 0.6393, "step": 15319 }, { "epoch": 0.7873368280398808, "grad_norm": 1.165034294128418, "learning_rate": 1.1398691546908674e-06, "loss": 0.6864, "step": 15320 }, { "epoch": 0.7873882207832255, "grad_norm": 1.159232258796692, "learning_rate": 1.1393402351852884e-06, "loss": 0.7412, "step": 15321 }, { "epoch": 0.7874396135265701, "grad_norm": 1.13481605052948, "learning_rate": 1.13881142264266e-06, "loss": 0.7026, "step": 15322 }, { "epoch": 0.7874910062699146, "grad_norm": 1.14860999584198, "learning_rate": 1.1382827170776334e-06, "loss": 0.7037, "step": 15323 }, { "epoch": 0.7875423990132593, "grad_norm": 1.0993632078170776, "learning_rate": 1.137754118504855e-06, "loss": 0.7057, "step": 15324 }, { "epoch": 0.7875937917566039, "grad_norm": 1.106679916381836, "learning_rate": 1.1372256269389742e-06, "loss": 0.7162, "step": 15325 }, { "epoch": 0.7876451844999486, "grad_norm": 0.7836660146713257, "learning_rate": 1.1366972423946276e-06, "loss": 0.6259, "step": 15326 }, { "epoch": 0.7876965772432932, "grad_norm": 1.051287293434143, "learning_rate": 1.1361689648864592e-06, "loss": 0.6736, "step": 15327 }, { "epoch": 0.7877479699866379, "grad_norm": 1.0648866891860962, "learning_rate": 1.1356407944291037e-06, "loss": 0.7138, "step": 15328 }, { "epoch": 0.7877993627299825, "grad_norm": 1.0551220178604126, "learning_rate": 1.1351127310371946e-06, "loss": 0.6718, "step": 15329 }, { "epoch": 0.7878507554733272, "grad_norm": 1.0599972009658813, "learning_rate": 1.1345847747253602e-06, "loss": 0.6995, "step": 15330 }, { "epoch": 0.7879021482166718, "grad_norm": 0.6620064377784729, "learning_rate": 1.1340569255082318e-06, "loss": 0.6081, "step": 15331 }, { "epoch": 0.7879535409600165, "grad_norm": 0.7149621248245239, "learning_rate": 1.1335291834004324e-06, "loss": 0.6475, "step": 15332 }, { "epoch": 0.7880049337033611, "grad_norm": 1.1159652471542358, "learning_rate": 1.133001548416582e-06, "loss": 0.7404, "step": 15333 }, { "epoch": 0.7880563264467058, "grad_norm": 1.1008071899414062, "learning_rate": 1.1324740205713014e-06, "loss": 0.7867, "step": 15334 }, { "epoch": 0.7881077191900504, "grad_norm": 1.0862202644348145, "learning_rate": 1.1319465998792057e-06, "loss": 0.6913, "step": 15335 }, { "epoch": 0.788159111933395, "grad_norm": 1.120643973350525, "learning_rate": 1.1314192863549072e-06, "loss": 0.7168, "step": 15336 }, { "epoch": 0.7882105046767397, "grad_norm": 1.07773756980896, "learning_rate": 1.1308920800130146e-06, "loss": 0.6948, "step": 15337 }, { "epoch": 0.7882618974200842, "grad_norm": 1.143275260925293, "learning_rate": 1.1303649808681377e-06, "loss": 0.7255, "step": 15338 }, { "epoch": 0.7883132901634289, "grad_norm": 1.0647166967391968, "learning_rate": 1.1298379889348759e-06, "loss": 0.6771, "step": 15339 }, { "epoch": 0.7883646829067735, "grad_norm": 1.062179684638977, "learning_rate": 1.1293111042278332e-06, "loss": 0.6931, "step": 15340 }, { "epoch": 0.7884160756501182, "grad_norm": 0.6962597966194153, "learning_rate": 1.1287843267616067e-06, "loss": 0.6333, "step": 15341 }, { "epoch": 0.7884674683934628, "grad_norm": 1.111122727394104, "learning_rate": 1.128257656550789e-06, "loss": 0.7336, "step": 15342 }, { "epoch": 0.7885188611368075, "grad_norm": 1.185849905014038, "learning_rate": 1.1277310936099762e-06, "loss": 0.736, "step": 15343 }, { "epoch": 0.7885702538801521, "grad_norm": 1.091580867767334, "learning_rate": 1.1272046379537538e-06, "loss": 0.678, "step": 15344 }, { "epoch": 0.7886216466234968, "grad_norm": 0.7518974542617798, "learning_rate": 1.1266782895967098e-06, "loss": 0.6122, "step": 15345 }, { "epoch": 0.7886730393668414, "grad_norm": 1.0606197118759155, "learning_rate": 1.1261520485534238e-06, "loss": 0.7046, "step": 15346 }, { "epoch": 0.7887244321101861, "grad_norm": 1.096616506576538, "learning_rate": 1.1256259148384818e-06, "loss": 0.6493, "step": 15347 }, { "epoch": 0.7887758248535307, "grad_norm": 1.1386187076568604, "learning_rate": 1.125099888466454e-06, "loss": 0.6984, "step": 15348 }, { "epoch": 0.7888272175968754, "grad_norm": 1.1115142107009888, "learning_rate": 1.1245739694519187e-06, "loss": 0.7221, "step": 15349 }, { "epoch": 0.78887861034022, "grad_norm": 1.0191231966018677, "learning_rate": 1.1240481578094448e-06, "loss": 0.6827, "step": 15350 }, { "epoch": 0.7889300030835646, "grad_norm": 1.1206845045089722, "learning_rate": 1.123522453553602e-06, "loss": 0.6595, "step": 15351 }, { "epoch": 0.7889813958269093, "grad_norm": 1.060234546661377, "learning_rate": 1.1229968566989552e-06, "loss": 0.6212, "step": 15352 }, { "epoch": 0.7890327885702538, "grad_norm": 1.059466004371643, "learning_rate": 1.1224713672600646e-06, "loss": 0.6671, "step": 15353 }, { "epoch": 0.7890841813135985, "grad_norm": 0.873365044593811, "learning_rate": 1.1219459852514937e-06, "loss": 0.6293, "step": 15354 }, { "epoch": 0.7891355740569431, "grad_norm": 0.9886416792869568, "learning_rate": 1.1214207106877928e-06, "loss": 0.7025, "step": 15355 }, { "epoch": 0.7891869668002878, "grad_norm": 1.0985876321792603, "learning_rate": 1.1208955435835201e-06, "loss": 0.7038, "step": 15356 }, { "epoch": 0.7892383595436324, "grad_norm": 0.6853715777397156, "learning_rate": 1.1203704839532232e-06, "loss": 0.6188, "step": 15357 }, { "epoch": 0.7892897522869771, "grad_norm": 1.1219056844711304, "learning_rate": 1.1198455318114499e-06, "loss": 0.6938, "step": 15358 }, { "epoch": 0.7893411450303217, "grad_norm": 1.0656853914260864, "learning_rate": 1.119320687172743e-06, "loss": 0.6616, "step": 15359 }, { "epoch": 0.7893925377736664, "grad_norm": 1.1165146827697754, "learning_rate": 1.1187959500516465e-06, "loss": 0.6651, "step": 15360 }, { "epoch": 0.789443930517011, "grad_norm": 0.7125632762908936, "learning_rate": 1.1182713204626978e-06, "loss": 0.6287, "step": 15361 }, { "epoch": 0.7894953232603557, "grad_norm": 1.0847622156143188, "learning_rate": 1.1177467984204304e-06, "loss": 0.6631, "step": 15362 }, { "epoch": 0.7895467160037003, "grad_norm": 1.1944921016693115, "learning_rate": 1.1172223839393808e-06, "loss": 0.6907, "step": 15363 }, { "epoch": 0.789598108747045, "grad_norm": 1.137958288192749, "learning_rate": 1.116698077034073e-06, "loss": 0.6962, "step": 15364 }, { "epoch": 0.7896495014903896, "grad_norm": 1.092236876487732, "learning_rate": 1.1161738777190374e-06, "loss": 0.6685, "step": 15365 }, { "epoch": 0.7897008942337342, "grad_norm": 1.1049565076828003, "learning_rate": 1.1156497860087945e-06, "loss": 0.7135, "step": 15366 }, { "epoch": 0.7897522869770789, "grad_norm": 1.1603111028671265, "learning_rate": 1.115125801917869e-06, "loss": 0.6997, "step": 15367 }, { "epoch": 0.7898036797204234, "grad_norm": 1.22853684425354, "learning_rate": 1.1146019254607732e-06, "loss": 0.7021, "step": 15368 }, { "epoch": 0.7898550724637681, "grad_norm": 1.1579856872558594, "learning_rate": 1.1140781566520248e-06, "loss": 0.6951, "step": 15369 }, { "epoch": 0.7899064652071127, "grad_norm": 1.136399745941162, "learning_rate": 1.1135544955061344e-06, "loss": 0.7212, "step": 15370 }, { "epoch": 0.7899578579504574, "grad_norm": 1.0282552242279053, "learning_rate": 1.1130309420376112e-06, "loss": 0.6759, "step": 15371 }, { "epoch": 0.790009250693802, "grad_norm": 1.2187716960906982, "learning_rate": 1.1125074962609584e-06, "loss": 0.7373, "step": 15372 }, { "epoch": 0.7900606434371467, "grad_norm": 0.6978018879890442, "learning_rate": 1.1119841581906815e-06, "loss": 0.6225, "step": 15373 }, { "epoch": 0.7901120361804913, "grad_norm": 1.1205804347991943, "learning_rate": 1.1114609278412785e-06, "loss": 0.6929, "step": 15374 }, { "epoch": 0.790163428923836, "grad_norm": 1.1470102071762085, "learning_rate": 1.1109378052272446e-06, "loss": 0.6497, "step": 15375 }, { "epoch": 0.7902148216671806, "grad_norm": 1.073972463607788, "learning_rate": 1.110414790363078e-06, "loss": 0.6911, "step": 15376 }, { "epoch": 0.7902662144105252, "grad_norm": 1.080496072769165, "learning_rate": 1.1098918832632632e-06, "loss": 0.7286, "step": 15377 }, { "epoch": 0.7903176071538699, "grad_norm": 1.0411713123321533, "learning_rate": 1.1093690839422927e-06, "loss": 0.7172, "step": 15378 }, { "epoch": 0.7903689998972145, "grad_norm": 1.1401727199554443, "learning_rate": 1.1088463924146487e-06, "loss": 0.6686, "step": 15379 }, { "epoch": 0.7904203926405592, "grad_norm": 1.0712045431137085, "learning_rate": 1.1083238086948133e-06, "loss": 0.6868, "step": 15380 }, { "epoch": 0.7904717853839038, "grad_norm": 0.747482180595398, "learning_rate": 1.1078013327972636e-06, "loss": 0.6288, "step": 15381 }, { "epoch": 0.7905231781272485, "grad_norm": 0.8077569007873535, "learning_rate": 1.107278964736479e-06, "loss": 0.6822, "step": 15382 }, { "epoch": 0.790574570870593, "grad_norm": 1.1478655338287354, "learning_rate": 1.1067567045269295e-06, "loss": 0.7183, "step": 15383 }, { "epoch": 0.7906259636139377, "grad_norm": 1.1394224166870117, "learning_rate": 1.1062345521830837e-06, "loss": 0.6814, "step": 15384 }, { "epoch": 0.7906773563572823, "grad_norm": 1.108833909034729, "learning_rate": 1.1057125077194113e-06, "loss": 0.69, "step": 15385 }, { "epoch": 0.790728749100627, "grad_norm": 1.116007685661316, "learning_rate": 1.1051905711503746e-06, "loss": 0.647, "step": 15386 }, { "epoch": 0.7907801418439716, "grad_norm": 1.1368153095245361, "learning_rate": 1.104668742490434e-06, "loss": 0.6832, "step": 15387 }, { "epoch": 0.7908315345873163, "grad_norm": 0.707216203212738, "learning_rate": 1.1041470217540467e-06, "loss": 0.6569, "step": 15388 }, { "epoch": 0.7908829273306609, "grad_norm": 1.1801246404647827, "learning_rate": 1.1036254089556702e-06, "loss": 0.7399, "step": 15389 }, { "epoch": 0.7909343200740055, "grad_norm": 0.8007926344871521, "learning_rate": 1.1031039041097518e-06, "loss": 0.6408, "step": 15390 }, { "epoch": 0.7909857128173502, "grad_norm": 1.0790886878967285, "learning_rate": 1.1025825072307445e-06, "loss": 0.6522, "step": 15391 }, { "epoch": 0.7910371055606948, "grad_norm": 1.1342428922653198, "learning_rate": 1.1020612183330914e-06, "loss": 0.7072, "step": 15392 }, { "epoch": 0.7910884983040395, "grad_norm": 1.1169917583465576, "learning_rate": 1.101540037431235e-06, "loss": 0.7417, "step": 15393 }, { "epoch": 0.7911398910473841, "grad_norm": 1.0921988487243652, "learning_rate": 1.101018964539618e-06, "loss": 0.7439, "step": 15394 }, { "epoch": 0.7911912837907288, "grad_norm": 1.0874522924423218, "learning_rate": 1.100497999672674e-06, "loss": 0.7239, "step": 15395 }, { "epoch": 0.7912426765340734, "grad_norm": 1.16403329372406, "learning_rate": 1.0999771428448403e-06, "loss": 0.7077, "step": 15396 }, { "epoch": 0.7912940692774181, "grad_norm": 1.1167305707931519, "learning_rate": 1.0994563940705433e-06, "loss": 0.7549, "step": 15397 }, { "epoch": 0.7913454620207626, "grad_norm": 1.0662370920181274, "learning_rate": 1.0989357533642138e-06, "loss": 0.7008, "step": 15398 }, { "epoch": 0.7913968547641073, "grad_norm": 1.1400477886199951, "learning_rate": 1.0984152207402766e-06, "loss": 0.6512, "step": 15399 }, { "epoch": 0.7914482475074519, "grad_norm": 1.0472333431243896, "learning_rate": 1.097894796213152e-06, "loss": 0.7194, "step": 15400 }, { "epoch": 0.7914996402507966, "grad_norm": 1.099311113357544, "learning_rate": 1.0973744797972585e-06, "loss": 0.6885, "step": 15401 }, { "epoch": 0.7915510329941412, "grad_norm": 0.9624381065368652, "learning_rate": 1.096854271507014e-06, "loss": 0.6412, "step": 15402 }, { "epoch": 0.7916024257374858, "grad_norm": 0.995807945728302, "learning_rate": 1.096334171356831e-06, "loss": 0.6849, "step": 15403 }, { "epoch": 0.7916538184808305, "grad_norm": 1.0772331953048706, "learning_rate": 1.095814179361117e-06, "loss": 0.7222, "step": 15404 }, { "epoch": 0.7917052112241751, "grad_norm": 0.7315527200698853, "learning_rate": 1.0952942955342833e-06, "loss": 0.6396, "step": 15405 }, { "epoch": 0.7917566039675198, "grad_norm": 1.0514954328536987, "learning_rate": 1.0947745198907279e-06, "loss": 0.6554, "step": 15406 }, { "epoch": 0.7918079967108644, "grad_norm": 1.1457874774932861, "learning_rate": 1.094254852444856e-06, "loss": 0.7222, "step": 15407 }, { "epoch": 0.7918593894542091, "grad_norm": 1.104487419128418, "learning_rate": 1.093735293211064e-06, "loss": 0.727, "step": 15408 }, { "epoch": 0.7919107821975537, "grad_norm": 0.6998841762542725, "learning_rate": 1.093215842203747e-06, "loss": 0.6702, "step": 15409 }, { "epoch": 0.7919621749408984, "grad_norm": 0.7906347513198853, "learning_rate": 1.0926964994372952e-06, "loss": 0.6085, "step": 15410 }, { "epoch": 0.792013567684243, "grad_norm": 1.0360815525054932, "learning_rate": 1.0921772649261003e-06, "loss": 0.7055, "step": 15411 }, { "epoch": 0.7920649604275877, "grad_norm": 1.1705381870269775, "learning_rate": 1.091658138684547e-06, "loss": 0.7185, "step": 15412 }, { "epoch": 0.7921163531709323, "grad_norm": 0.7343947887420654, "learning_rate": 1.0911391207270167e-06, "loss": 0.6142, "step": 15413 }, { "epoch": 0.7921677459142769, "grad_norm": 0.7360662221908569, "learning_rate": 1.0906202110678909e-06, "loss": 0.6221, "step": 15414 }, { "epoch": 0.7922191386576215, "grad_norm": 1.0816441774368286, "learning_rate": 1.0901014097215468e-06, "loss": 0.7096, "step": 15415 }, { "epoch": 0.7922705314009661, "grad_norm": 1.0699526071548462, "learning_rate": 1.089582716702357e-06, "loss": 0.6984, "step": 15416 }, { "epoch": 0.7923219241443108, "grad_norm": 0.679084062576294, "learning_rate": 1.089064132024692e-06, "loss": 0.6247, "step": 15417 }, { "epoch": 0.7923733168876554, "grad_norm": 1.0251060724258423, "learning_rate": 1.0885456557029227e-06, "loss": 0.6763, "step": 15418 }, { "epoch": 0.7924247096310001, "grad_norm": 1.1182841062545776, "learning_rate": 1.0880272877514093e-06, "loss": 0.7153, "step": 15419 }, { "epoch": 0.7924761023743447, "grad_norm": 1.0087915658950806, "learning_rate": 1.087509028184517e-06, "loss": 0.67, "step": 15420 }, { "epoch": 0.7925274951176894, "grad_norm": 0.7238808870315552, "learning_rate": 1.0869908770166037e-06, "loss": 0.6492, "step": 15421 }, { "epoch": 0.792578887861034, "grad_norm": 1.124735713005066, "learning_rate": 1.0864728342620235e-06, "loss": 0.7536, "step": 15422 }, { "epoch": 0.7926302806043787, "grad_norm": 1.2828058004379272, "learning_rate": 1.0859548999351327e-06, "loss": 0.6964, "step": 15423 }, { "epoch": 0.7926816733477233, "grad_norm": 1.071630835533142, "learning_rate": 1.085437074050279e-06, "loss": 0.7005, "step": 15424 }, { "epoch": 0.792733066091068, "grad_norm": 1.17682945728302, "learning_rate": 1.0849193566218097e-06, "loss": 0.7131, "step": 15425 }, { "epoch": 0.7927844588344126, "grad_norm": 1.1434905529022217, "learning_rate": 1.0844017476640673e-06, "loss": 0.7137, "step": 15426 }, { "epoch": 0.7928358515777573, "grad_norm": 1.0828107595443726, "learning_rate": 1.0838842471913945e-06, "loss": 0.7025, "step": 15427 }, { "epoch": 0.7928872443211019, "grad_norm": 1.1218725442886353, "learning_rate": 1.0833668552181292e-06, "loss": 0.6961, "step": 15428 }, { "epoch": 0.7929386370644464, "grad_norm": 1.1331369876861572, "learning_rate": 1.0828495717586046e-06, "loss": 0.6944, "step": 15429 }, { "epoch": 0.7929900298077911, "grad_norm": 1.12319016456604, "learning_rate": 1.082332396827152e-06, "loss": 0.7044, "step": 15430 }, { "epoch": 0.7930414225511357, "grad_norm": 1.061699390411377, "learning_rate": 1.0818153304381046e-06, "loss": 0.6747, "step": 15431 }, { "epoch": 0.7930928152944804, "grad_norm": 1.1284937858581543, "learning_rate": 1.0812983726057818e-06, "loss": 0.7328, "step": 15432 }, { "epoch": 0.793144208037825, "grad_norm": 1.093911051750183, "learning_rate": 1.0807815233445113e-06, "loss": 0.6951, "step": 15433 }, { "epoch": 0.7931956007811697, "grad_norm": 0.8249751925468445, "learning_rate": 1.0802647826686107e-06, "loss": 0.6463, "step": 15434 }, { "epoch": 0.7932469935245143, "grad_norm": 1.108451247215271, "learning_rate": 1.0797481505923962e-06, "loss": 0.6782, "step": 15435 }, { "epoch": 0.793298386267859, "grad_norm": 0.9854241013526917, "learning_rate": 1.079231627130184e-06, "loss": 0.6602, "step": 15436 }, { "epoch": 0.7933497790112036, "grad_norm": 0.8660914897918701, "learning_rate": 1.0787152122962829e-06, "loss": 0.6254, "step": 15437 }, { "epoch": 0.7934011717545483, "grad_norm": 0.7369109988212585, "learning_rate": 1.0781989061050013e-06, "loss": 0.6672, "step": 15438 }, { "epoch": 0.7934525644978929, "grad_norm": 1.0640764236450195, "learning_rate": 1.0776827085706425e-06, "loss": 0.7052, "step": 15439 }, { "epoch": 0.7935039572412376, "grad_norm": 1.1049494743347168, "learning_rate": 1.077166619707512e-06, "loss": 0.7184, "step": 15440 }, { "epoch": 0.7935553499845822, "grad_norm": 0.8125507831573486, "learning_rate": 1.0766506395299032e-06, "loss": 0.6624, "step": 15441 }, { "epoch": 0.7936067427279269, "grad_norm": 1.0871307849884033, "learning_rate": 1.0761347680521157e-06, "loss": 0.7105, "step": 15442 }, { "epoch": 0.7936581354712715, "grad_norm": 1.1494214534759521, "learning_rate": 1.0756190052884396e-06, "loss": 0.6716, "step": 15443 }, { "epoch": 0.793709528214616, "grad_norm": 1.099273920059204, "learning_rate": 1.0751033512531672e-06, "loss": 0.7484, "step": 15444 }, { "epoch": 0.7937609209579607, "grad_norm": 0.7576067447662354, "learning_rate": 1.074587805960584e-06, "loss": 0.6579, "step": 15445 }, { "epoch": 0.7938123137013053, "grad_norm": 0.6758350729942322, "learning_rate": 1.0740723694249722e-06, "loss": 0.6545, "step": 15446 }, { "epoch": 0.79386370644465, "grad_norm": 1.1015040874481201, "learning_rate": 1.0735570416606161e-06, "loss": 0.7055, "step": 15447 }, { "epoch": 0.7939150991879946, "grad_norm": 1.1130210161209106, "learning_rate": 1.0730418226817885e-06, "loss": 0.7451, "step": 15448 }, { "epoch": 0.7939664919313393, "grad_norm": 1.0987616777420044, "learning_rate": 1.0725267125027676e-06, "loss": 0.7341, "step": 15449 }, { "epoch": 0.7940178846746839, "grad_norm": 1.0817186832427979, "learning_rate": 1.0720117111378236e-06, "loss": 0.6707, "step": 15450 }, { "epoch": 0.7940692774180286, "grad_norm": 1.2252196073532104, "learning_rate": 1.0714968186012254e-06, "loss": 0.6502, "step": 15451 }, { "epoch": 0.7941206701613732, "grad_norm": 1.096917986869812, "learning_rate": 1.070982034907237e-06, "loss": 0.7314, "step": 15452 }, { "epoch": 0.7941720629047179, "grad_norm": 0.806534469127655, "learning_rate": 1.0704673600701237e-06, "loss": 0.6288, "step": 15453 }, { "epoch": 0.7942234556480625, "grad_norm": 1.2084870338439941, "learning_rate": 1.0699527941041438e-06, "loss": 0.7791, "step": 15454 }, { "epoch": 0.7942748483914072, "grad_norm": 0.7811590433120728, "learning_rate": 1.0694383370235523e-06, "loss": 0.6449, "step": 15455 }, { "epoch": 0.7943262411347518, "grad_norm": 0.7063359022140503, "learning_rate": 1.0689239888426062e-06, "loss": 0.6451, "step": 15456 }, { "epoch": 0.7943776338780965, "grad_norm": 1.0560468435287476, "learning_rate": 1.0684097495755514e-06, "loss": 0.7243, "step": 15457 }, { "epoch": 0.7944290266214411, "grad_norm": 1.0755469799041748, "learning_rate": 1.067895619236639e-06, "loss": 0.6501, "step": 15458 }, { "epoch": 0.7944804193647856, "grad_norm": 1.1115938425064087, "learning_rate": 1.0673815978401108e-06, "loss": 0.6624, "step": 15459 }, { "epoch": 0.7945318121081303, "grad_norm": 1.1269582509994507, "learning_rate": 1.0668676854002124e-06, "loss": 0.706, "step": 15460 }, { "epoch": 0.7945832048514749, "grad_norm": 1.1205912828445435, "learning_rate": 1.066353881931177e-06, "loss": 0.7469, "step": 15461 }, { "epoch": 0.7946345975948196, "grad_norm": 0.8226692080497742, "learning_rate": 1.065840187447243e-06, "loss": 0.6725, "step": 15462 }, { "epoch": 0.7946859903381642, "grad_norm": 0.7626643776893616, "learning_rate": 1.0653266019626424e-06, "loss": 0.6521, "step": 15463 }, { "epoch": 0.7947373830815089, "grad_norm": 1.1196069717407227, "learning_rate": 1.0648131254916027e-06, "loss": 0.6626, "step": 15464 }, { "epoch": 0.7947887758248535, "grad_norm": 1.0794119834899902, "learning_rate": 1.0642997580483532e-06, "loss": 0.7218, "step": 15465 }, { "epoch": 0.7948401685681982, "grad_norm": 1.02251398563385, "learning_rate": 1.063786499647116e-06, "loss": 0.6871, "step": 15466 }, { "epoch": 0.7948915613115428, "grad_norm": 1.0550682544708252, "learning_rate": 1.063273350302111e-06, "loss": 0.7014, "step": 15467 }, { "epoch": 0.7949429540548875, "grad_norm": 1.0607175827026367, "learning_rate": 1.062760310027554e-06, "loss": 0.6451, "step": 15468 }, { "epoch": 0.7949943467982321, "grad_norm": 1.029678225517273, "learning_rate": 1.0622473788376636e-06, "loss": 0.7026, "step": 15469 }, { "epoch": 0.7950457395415768, "grad_norm": 1.1250770092010498, "learning_rate": 1.0617345567466453e-06, "loss": 0.7139, "step": 15470 }, { "epoch": 0.7950971322849214, "grad_norm": 1.1087660789489746, "learning_rate": 1.0612218437687117e-06, "loss": 0.7356, "step": 15471 }, { "epoch": 0.795148525028266, "grad_norm": 1.1242544651031494, "learning_rate": 1.0607092399180662e-06, "loss": 0.7083, "step": 15472 }, { "epoch": 0.7951999177716107, "grad_norm": 1.0315282344818115, "learning_rate": 1.06019674520891e-06, "loss": 0.7517, "step": 15473 }, { "epoch": 0.7952513105149552, "grad_norm": 1.2478526830673218, "learning_rate": 1.0596843596554452e-06, "loss": 0.6419, "step": 15474 }, { "epoch": 0.7953027032582999, "grad_norm": 1.0698009729385376, "learning_rate": 1.0591720832718654e-06, "loss": 0.6791, "step": 15475 }, { "epoch": 0.7953540960016445, "grad_norm": 1.1302348375320435, "learning_rate": 1.0586599160723643e-06, "loss": 0.7517, "step": 15476 }, { "epoch": 0.7954054887449892, "grad_norm": 1.3847373723983765, "learning_rate": 1.0581478580711307e-06, "loss": 0.661, "step": 15477 }, { "epoch": 0.7954568814883338, "grad_norm": 0.8708206415176392, "learning_rate": 1.0576359092823546e-06, "loss": 0.6355, "step": 15478 }, { "epoch": 0.7955082742316785, "grad_norm": 1.0562161207199097, "learning_rate": 1.057124069720218e-06, "loss": 0.6408, "step": 15479 }, { "epoch": 0.7955596669750231, "grad_norm": 1.0930520296096802, "learning_rate": 1.0566123393989025e-06, "loss": 0.663, "step": 15480 }, { "epoch": 0.7956110597183678, "grad_norm": 1.1158961057662964, "learning_rate": 1.0561007183325839e-06, "loss": 0.6789, "step": 15481 }, { "epoch": 0.7956624524617124, "grad_norm": 0.692581057548523, "learning_rate": 1.0555892065354416e-06, "loss": 0.6434, "step": 15482 }, { "epoch": 0.7957138452050571, "grad_norm": 1.0573724508285522, "learning_rate": 1.0550778040216426e-06, "loss": 0.6777, "step": 15483 }, { "epoch": 0.7957652379484017, "grad_norm": 1.0789636373519897, "learning_rate": 1.0545665108053588e-06, "loss": 0.6642, "step": 15484 }, { "epoch": 0.7958166306917464, "grad_norm": 1.0245907306671143, "learning_rate": 1.0540553269007547e-06, "loss": 0.6543, "step": 15485 }, { "epoch": 0.795868023435091, "grad_norm": 1.120058536529541, "learning_rate": 1.053544252321993e-06, "loss": 0.6469, "step": 15486 }, { "epoch": 0.7959194161784356, "grad_norm": 0.7369247078895569, "learning_rate": 1.053033287083235e-06, "loss": 0.6577, "step": 15487 }, { "epoch": 0.7959708089217803, "grad_norm": 1.1340211629867554, "learning_rate": 1.0525224311986365e-06, "loss": 0.6867, "step": 15488 }, { "epoch": 0.7960222016651248, "grad_norm": 0.7096269130706787, "learning_rate": 1.0520116846823514e-06, "loss": 0.6239, "step": 15489 }, { "epoch": 0.7960735944084695, "grad_norm": 0.7398607134819031, "learning_rate": 1.0515010475485283e-06, "loss": 0.6242, "step": 15490 }, { "epoch": 0.7961249871518141, "grad_norm": 1.0964971780776978, "learning_rate": 1.050990519811318e-06, "loss": 0.664, "step": 15491 }, { "epoch": 0.7961763798951588, "grad_norm": 1.0639350414276123, "learning_rate": 1.0504801014848642e-06, "loss": 0.6732, "step": 15492 }, { "epoch": 0.7962277726385034, "grad_norm": 1.0505406856536865, "learning_rate": 1.049969792583308e-06, "loss": 0.6634, "step": 15493 }, { "epoch": 0.7962791653818481, "grad_norm": 1.1659843921661377, "learning_rate": 1.0494595931207868e-06, "loss": 0.7412, "step": 15494 }, { "epoch": 0.7963305581251927, "grad_norm": 0.6134080290794373, "learning_rate": 1.0489495031114383e-06, "loss": 0.5861, "step": 15495 }, { "epoch": 0.7963819508685374, "grad_norm": 1.1059802770614624, "learning_rate": 1.0484395225693944e-06, "loss": 0.718, "step": 15496 }, { "epoch": 0.796433343611882, "grad_norm": 0.7163943648338318, "learning_rate": 1.0479296515087829e-06, "loss": 0.6066, "step": 15497 }, { "epoch": 0.7964847363552267, "grad_norm": 1.0998553037643433, "learning_rate": 1.0474198899437338e-06, "loss": 0.6791, "step": 15498 }, { "epoch": 0.7965361290985713, "grad_norm": 1.0531493425369263, "learning_rate": 1.0469102378883655e-06, "loss": 0.689, "step": 15499 }, { "epoch": 0.796587521841916, "grad_norm": 1.0829755067825317, "learning_rate": 1.0464006953568028e-06, "loss": 0.6864, "step": 15500 }, { "epoch": 0.7966389145852606, "grad_norm": 1.0901342630386353, "learning_rate": 1.0458912623631612e-06, "loss": 0.7151, "step": 15501 }, { "epoch": 0.7966903073286052, "grad_norm": 1.1875600814819336, "learning_rate": 1.0453819389215552e-06, "loss": 0.65, "step": 15502 }, { "epoch": 0.7967417000719499, "grad_norm": 1.1538118124008179, "learning_rate": 1.0448727250460945e-06, "loss": 0.6925, "step": 15503 }, { "epoch": 0.7967930928152945, "grad_norm": 1.0133543014526367, "learning_rate": 1.0443636207508901e-06, "loss": 0.6742, "step": 15504 }, { "epoch": 0.7968444855586391, "grad_norm": 1.0463714599609375, "learning_rate": 1.0438546260500455e-06, "loss": 0.6745, "step": 15505 }, { "epoch": 0.7968958783019837, "grad_norm": 1.0775257349014282, "learning_rate": 1.043345740957662e-06, "loss": 0.7062, "step": 15506 }, { "epoch": 0.7969472710453284, "grad_norm": 1.1011615991592407, "learning_rate": 1.0428369654878411e-06, "loss": 0.6864, "step": 15507 }, { "epoch": 0.796998663788673, "grad_norm": 1.0516574382781982, "learning_rate": 1.042328299654678e-06, "loss": 0.6813, "step": 15508 }, { "epoch": 0.7970500565320177, "grad_norm": 1.0544841289520264, "learning_rate": 1.0418197434722654e-06, "loss": 0.6963, "step": 15509 }, { "epoch": 0.7971014492753623, "grad_norm": 1.0776758193969727, "learning_rate": 1.0413112969546919e-06, "loss": 0.7233, "step": 15510 }, { "epoch": 0.797152842018707, "grad_norm": 1.0615752935409546, "learning_rate": 1.040802960116048e-06, "loss": 0.7203, "step": 15511 }, { "epoch": 0.7972042347620516, "grad_norm": 0.8504183292388916, "learning_rate": 1.0402947329704128e-06, "loss": 0.6262, "step": 15512 }, { "epoch": 0.7972556275053962, "grad_norm": 1.1260610818862915, "learning_rate": 1.0397866155318715e-06, "loss": 0.7167, "step": 15513 }, { "epoch": 0.7973070202487409, "grad_norm": 1.0777418613433838, "learning_rate": 1.0392786078145e-06, "loss": 0.7125, "step": 15514 }, { "epoch": 0.7973584129920855, "grad_norm": 1.129145860671997, "learning_rate": 1.0387707098323723e-06, "loss": 0.7395, "step": 15515 }, { "epoch": 0.7974098057354302, "grad_norm": 1.115346908569336, "learning_rate": 1.0382629215995627e-06, "loss": 0.7768, "step": 15516 }, { "epoch": 0.7974611984787748, "grad_norm": 1.2034854888916016, "learning_rate": 1.037755243130138e-06, "loss": 0.6906, "step": 15517 }, { "epoch": 0.7975125912221195, "grad_norm": 1.0902252197265625, "learning_rate": 1.0372476744381644e-06, "loss": 0.6732, "step": 15518 }, { "epoch": 0.7975639839654641, "grad_norm": 1.0775169134140015, "learning_rate": 1.0367402155377031e-06, "loss": 0.6899, "step": 15519 }, { "epoch": 0.7976153767088087, "grad_norm": 1.1425414085388184, "learning_rate": 1.0362328664428163e-06, "loss": 0.6947, "step": 15520 }, { "epoch": 0.7976667694521533, "grad_norm": 1.0651463270187378, "learning_rate": 1.0357256271675592e-06, "loss": 0.6408, "step": 15521 }, { "epoch": 0.797718162195498, "grad_norm": 1.1091195344924927, "learning_rate": 1.0352184977259855e-06, "loss": 0.6675, "step": 15522 }, { "epoch": 0.7977695549388426, "grad_norm": 1.086436152458191, "learning_rate": 1.0347114781321443e-06, "loss": 0.7005, "step": 15523 }, { "epoch": 0.7978209476821873, "grad_norm": 1.1279137134552002, "learning_rate": 1.0342045684000857e-06, "loss": 0.7386, "step": 15524 }, { "epoch": 0.7978723404255319, "grad_norm": 1.048555612564087, "learning_rate": 1.033697768543852e-06, "loss": 0.7227, "step": 15525 }, { "epoch": 0.7979237331688765, "grad_norm": 1.0486903190612793, "learning_rate": 1.0331910785774856e-06, "loss": 0.6992, "step": 15526 }, { "epoch": 0.7979751259122212, "grad_norm": 1.0420340299606323, "learning_rate": 1.0326844985150248e-06, "loss": 0.686, "step": 15527 }, { "epoch": 0.7980265186555658, "grad_norm": 1.1786748170852661, "learning_rate": 1.0321780283705023e-06, "loss": 0.675, "step": 15528 }, { "epoch": 0.7980779113989105, "grad_norm": 0.7970410585403442, "learning_rate": 1.0316716681579542e-06, "loss": 0.6326, "step": 15529 }, { "epoch": 0.7981293041422551, "grad_norm": 1.1323000192642212, "learning_rate": 1.0311654178914077e-06, "loss": 0.7366, "step": 15530 }, { "epoch": 0.7981806968855998, "grad_norm": 1.047073245048523, "learning_rate": 1.0306592775848895e-06, "loss": 0.5966, "step": 15531 }, { "epoch": 0.7982320896289444, "grad_norm": 1.116508960723877, "learning_rate": 1.03015324725242e-06, "loss": 0.6602, "step": 15532 }, { "epoch": 0.7982834823722891, "grad_norm": 1.1088706254959106, "learning_rate": 1.0296473269080248e-06, "loss": 0.712, "step": 15533 }, { "epoch": 0.7983348751156337, "grad_norm": 0.7519833445549011, "learning_rate": 1.0291415165657143e-06, "loss": 0.6156, "step": 15534 }, { "epoch": 0.7983862678589783, "grad_norm": 1.0853588581085205, "learning_rate": 1.0286358162395078e-06, "loss": 0.7073, "step": 15535 }, { "epoch": 0.7984376606023229, "grad_norm": 1.1213548183441162, "learning_rate": 1.0281302259434117e-06, "loss": 0.7099, "step": 15536 }, { "epoch": 0.7984890533456676, "grad_norm": 1.0626076459884644, "learning_rate": 1.0276247456914379e-06, "loss": 0.6133, "step": 15537 }, { "epoch": 0.7985404460890122, "grad_norm": 1.1052672863006592, "learning_rate": 1.0271193754975895e-06, "loss": 0.7001, "step": 15538 }, { "epoch": 0.7985918388323568, "grad_norm": 0.8310667276382446, "learning_rate": 1.0266141153758663e-06, "loss": 0.6647, "step": 15539 }, { "epoch": 0.7986432315757015, "grad_norm": 1.1308774948120117, "learning_rate": 1.0261089653402718e-06, "loss": 0.6995, "step": 15540 }, { "epoch": 0.7986946243190461, "grad_norm": 1.0704110860824585, "learning_rate": 1.0256039254047957e-06, "loss": 0.6167, "step": 15541 }, { "epoch": 0.7987460170623908, "grad_norm": 1.0361599922180176, "learning_rate": 1.025098995583435e-06, "loss": 0.7248, "step": 15542 }, { "epoch": 0.7987974098057354, "grad_norm": 1.0793198347091675, "learning_rate": 1.0245941758901773e-06, "loss": 0.6968, "step": 15543 }, { "epoch": 0.7988488025490801, "grad_norm": 0.6834498047828674, "learning_rate": 1.0240894663390099e-06, "loss": 0.6779, "step": 15544 }, { "epoch": 0.7989001952924247, "grad_norm": 1.0209816694259644, "learning_rate": 1.0235848669439142e-06, "loss": 0.6628, "step": 15545 }, { "epoch": 0.7989515880357694, "grad_norm": 1.0612114667892456, "learning_rate": 1.0230803777188735e-06, "loss": 0.7005, "step": 15546 }, { "epoch": 0.799002980779114, "grad_norm": 1.0969185829162598, "learning_rate": 1.0225759986778639e-06, "loss": 0.7234, "step": 15547 }, { "epoch": 0.7990543735224587, "grad_norm": 1.0239009857177734, "learning_rate": 1.022071729834858e-06, "loss": 0.663, "step": 15548 }, { "epoch": 0.7991057662658033, "grad_norm": 1.0455204248428345, "learning_rate": 1.0215675712038314e-06, "loss": 0.6833, "step": 15549 }, { "epoch": 0.7991571590091479, "grad_norm": 1.0256009101867676, "learning_rate": 1.0210635227987464e-06, "loss": 0.6624, "step": 15550 }, { "epoch": 0.7992085517524925, "grad_norm": 1.0611093044281006, "learning_rate": 1.0205595846335725e-06, "loss": 0.6867, "step": 15551 }, { "epoch": 0.7992599444958371, "grad_norm": 1.095306158065796, "learning_rate": 1.020055756722269e-06, "loss": 0.6809, "step": 15552 }, { "epoch": 0.7993113372391818, "grad_norm": 1.1080607175827026, "learning_rate": 1.0195520390787994e-06, "loss": 0.7465, "step": 15553 }, { "epoch": 0.7993627299825264, "grad_norm": 0.6830762624740601, "learning_rate": 1.0190484317171135e-06, "loss": 0.6493, "step": 15554 }, { "epoch": 0.7994141227258711, "grad_norm": 1.0532892942428589, "learning_rate": 1.0185449346511682e-06, "loss": 0.693, "step": 15555 }, { "epoch": 0.7994655154692157, "grad_norm": 1.1248561143875122, "learning_rate": 1.0180415478949124e-06, "loss": 0.7388, "step": 15556 }, { "epoch": 0.7995169082125604, "grad_norm": 1.112711787223816, "learning_rate": 1.0175382714622918e-06, "loss": 0.6962, "step": 15557 }, { "epoch": 0.799568300955905, "grad_norm": 1.0684731006622314, "learning_rate": 1.0170351053672516e-06, "loss": 0.7177, "step": 15558 }, { "epoch": 0.7996196936992497, "grad_norm": 1.0826455354690552, "learning_rate": 1.0165320496237324e-06, "loss": 0.6994, "step": 15559 }, { "epoch": 0.7996710864425943, "grad_norm": 1.101013422012329, "learning_rate": 1.0160291042456711e-06, "loss": 0.6792, "step": 15560 }, { "epoch": 0.799722479185939, "grad_norm": 0.7452312707901001, "learning_rate": 1.0155262692470013e-06, "loss": 0.6396, "step": 15561 }, { "epoch": 0.7997738719292836, "grad_norm": 1.071894884109497, "learning_rate": 1.0150235446416585e-06, "loss": 0.6931, "step": 15562 }, { "epoch": 0.7998252646726283, "grad_norm": 0.8513612747192383, "learning_rate": 1.014520930443565e-06, "loss": 0.6294, "step": 15563 }, { "epoch": 0.7998766574159729, "grad_norm": 1.0244433879852295, "learning_rate": 1.0140184266666508e-06, "loss": 0.6574, "step": 15564 }, { "epoch": 0.7999280501593174, "grad_norm": 1.205319881439209, "learning_rate": 1.0135160333248373e-06, "loss": 0.6913, "step": 15565 }, { "epoch": 0.7999794429026621, "grad_norm": 1.0883944034576416, "learning_rate": 1.013013750432042e-06, "loss": 0.7568, "step": 15566 }, { "epoch": 0.8000308356460067, "grad_norm": 1.0591858625411987, "learning_rate": 1.0125115780021833e-06, "loss": 0.6339, "step": 15567 }, { "epoch": 0.8000822283893514, "grad_norm": 1.0410797595977783, "learning_rate": 1.0120095160491732e-06, "loss": 0.6937, "step": 15568 }, { "epoch": 0.800133621132696, "grad_norm": 0.7365021109580994, "learning_rate": 1.0115075645869221e-06, "loss": 0.6459, "step": 15569 }, { "epoch": 0.8001850138760407, "grad_norm": 1.1553517580032349, "learning_rate": 1.0110057236293358e-06, "loss": 0.6838, "step": 15570 }, { "epoch": 0.8002364066193853, "grad_norm": 1.0949389934539795, "learning_rate": 1.0105039931903203e-06, "loss": 0.6504, "step": 15571 }, { "epoch": 0.80028779936273, "grad_norm": 1.1046245098114014, "learning_rate": 1.010002373283776e-06, "loss": 0.6931, "step": 15572 }, { "epoch": 0.8003391921060746, "grad_norm": 1.1114506721496582, "learning_rate": 1.0095008639235997e-06, "loss": 0.6671, "step": 15573 }, { "epoch": 0.8003905848494193, "grad_norm": 1.2202574014663696, "learning_rate": 1.0089994651236857e-06, "loss": 0.6798, "step": 15574 }, { "epoch": 0.8004419775927639, "grad_norm": 1.0099931955337524, "learning_rate": 1.0084981768979275e-06, "loss": 0.7015, "step": 15575 }, { "epoch": 0.8004933703361086, "grad_norm": 1.0747121572494507, "learning_rate": 1.007996999260213e-06, "loss": 0.7437, "step": 15576 }, { "epoch": 0.8005447630794532, "grad_norm": 1.162327766418457, "learning_rate": 1.0074959322244277e-06, "loss": 0.7627, "step": 15577 }, { "epoch": 0.8005961558227979, "grad_norm": 0.7410362362861633, "learning_rate": 1.006994975804454e-06, "loss": 0.6269, "step": 15578 }, { "epoch": 0.8006475485661425, "grad_norm": 1.1591094732284546, "learning_rate": 1.0064941300141701e-06, "loss": 0.7315, "step": 15579 }, { "epoch": 0.8006989413094872, "grad_norm": 1.108821153640747, "learning_rate": 1.0059933948674549e-06, "loss": 0.6776, "step": 15580 }, { "epoch": 0.8007503340528317, "grad_norm": 1.0784499645233154, "learning_rate": 1.0054927703781803e-06, "loss": 0.6911, "step": 15581 }, { "epoch": 0.8008017267961763, "grad_norm": 1.0099598169326782, "learning_rate": 1.0049922565602172e-06, "loss": 0.6709, "step": 15582 }, { "epoch": 0.800853119539521, "grad_norm": 1.070371150970459, "learning_rate": 1.00449185342743e-06, "loss": 0.6939, "step": 15583 }, { "epoch": 0.8009045122828656, "grad_norm": 1.055928349494934, "learning_rate": 1.0039915609936873e-06, "loss": 0.6671, "step": 15584 }, { "epoch": 0.8009559050262103, "grad_norm": 1.0562026500701904, "learning_rate": 1.0034913792728473e-06, "loss": 0.6894, "step": 15585 }, { "epoch": 0.8010072977695549, "grad_norm": 1.091769814491272, "learning_rate": 1.002991308278769e-06, "loss": 0.7148, "step": 15586 }, { "epoch": 0.8010586905128996, "grad_norm": 1.0034046173095703, "learning_rate": 1.0024913480253052e-06, "loss": 0.6782, "step": 15587 }, { "epoch": 0.8011100832562442, "grad_norm": 0.6950538754463196, "learning_rate": 1.0019914985263107e-06, "loss": 0.6747, "step": 15588 }, { "epoch": 0.8011614759995889, "grad_norm": 1.0451956987380981, "learning_rate": 1.001491759795633e-06, "loss": 0.6771, "step": 15589 }, { "epoch": 0.8012128687429335, "grad_norm": 1.072151780128479, "learning_rate": 1.0009921318471166e-06, "loss": 0.6786, "step": 15590 }, { "epoch": 0.8012642614862782, "grad_norm": 1.0466493368148804, "learning_rate": 1.0004926146946082e-06, "loss": 0.6758, "step": 15591 }, { "epoch": 0.8013156542296228, "grad_norm": 1.0870583057403564, "learning_rate": 9.999932083519414e-07, "loss": 0.718, "step": 15592 }, { "epoch": 0.8013670469729675, "grad_norm": 1.0378315448760986, "learning_rate": 9.994939128329572e-07, "loss": 0.7239, "step": 15593 }, { "epoch": 0.8014184397163121, "grad_norm": 1.083591341972351, "learning_rate": 9.989947281514872e-07, "loss": 0.7308, "step": 15594 }, { "epoch": 0.8014698324596568, "grad_norm": 1.0793935060501099, "learning_rate": 9.984956543213626e-07, "loss": 0.7228, "step": 15595 }, { "epoch": 0.8015212252030013, "grad_norm": 1.0766280889511108, "learning_rate": 9.979966913564088e-07, "loss": 0.7099, "step": 15596 }, { "epoch": 0.8015726179463459, "grad_norm": 0.7160735130310059, "learning_rate": 9.974978392704526e-07, "loss": 0.6255, "step": 15597 }, { "epoch": 0.8016240106896906, "grad_norm": 1.1363067626953125, "learning_rate": 9.969990980773143e-07, "loss": 0.6699, "step": 15598 }, { "epoch": 0.8016754034330352, "grad_norm": 1.161634922027588, "learning_rate": 9.965004677908097e-07, "loss": 0.6926, "step": 15599 }, { "epoch": 0.8017267961763799, "grad_norm": 1.2038350105285645, "learning_rate": 9.96001948424757e-07, "loss": 0.7305, "step": 15600 }, { "epoch": 0.8017781889197245, "grad_norm": 1.0605764389038086, "learning_rate": 9.955035399929668e-07, "loss": 0.6734, "step": 15601 }, { "epoch": 0.8018295816630692, "grad_norm": 1.115917444229126, "learning_rate": 9.950052425092482e-07, "loss": 0.7073, "step": 15602 }, { "epoch": 0.8018809744064138, "grad_norm": 1.080880880355835, "learning_rate": 9.945070559874054e-07, "loss": 0.6887, "step": 15603 }, { "epoch": 0.8019323671497585, "grad_norm": 1.098900556564331, "learning_rate": 9.94008980441245e-07, "loss": 0.6543, "step": 15604 }, { "epoch": 0.8019837598931031, "grad_norm": 1.1283313035964966, "learning_rate": 9.935110158845613e-07, "loss": 0.6558, "step": 15605 }, { "epoch": 0.8020351526364478, "grad_norm": 0.6943267583847046, "learning_rate": 9.930131623311545e-07, "loss": 0.686, "step": 15606 }, { "epoch": 0.8020865453797924, "grad_norm": 1.0808005332946777, "learning_rate": 9.925154197948178e-07, "loss": 0.7146, "step": 15607 }, { "epoch": 0.802137938123137, "grad_norm": 1.1013894081115723, "learning_rate": 9.92017788289339e-07, "loss": 0.7331, "step": 15608 }, { "epoch": 0.8021893308664817, "grad_norm": 1.070826530456543, "learning_rate": 9.91520267828509e-07, "loss": 0.6792, "step": 15609 }, { "epoch": 0.8022407236098263, "grad_norm": 1.0613547563552856, "learning_rate": 9.9102285842611e-07, "loss": 0.687, "step": 15610 }, { "epoch": 0.8022921163531709, "grad_norm": 0.7654101252555847, "learning_rate": 9.90525560095924e-07, "loss": 0.6429, "step": 15611 }, { "epoch": 0.8023435090965155, "grad_norm": 1.1360700130462646, "learning_rate": 9.900283728517268e-07, "loss": 0.7617, "step": 15612 }, { "epoch": 0.8023949018398602, "grad_norm": 1.096570372581482, "learning_rate": 9.89531296707298e-07, "loss": 0.6974, "step": 15613 }, { "epoch": 0.8024462945832048, "grad_norm": 1.1864348649978638, "learning_rate": 9.890343316764044e-07, "loss": 0.7214, "step": 15614 }, { "epoch": 0.8024976873265495, "grad_norm": 1.13390052318573, "learning_rate": 9.885374777728179e-07, "loss": 0.6595, "step": 15615 }, { "epoch": 0.8025490800698941, "grad_norm": 1.0865827798843384, "learning_rate": 9.880407350103026e-07, "loss": 0.7085, "step": 15616 }, { "epoch": 0.8026004728132388, "grad_norm": 1.1118522882461548, "learning_rate": 9.87544103402624e-07, "loss": 0.6965, "step": 15617 }, { "epoch": 0.8026518655565834, "grad_norm": 1.0904381275177002, "learning_rate": 9.87047582963539e-07, "loss": 0.7246, "step": 15618 }, { "epoch": 0.8027032582999281, "grad_norm": 1.0271846055984497, "learning_rate": 9.865511737068056e-07, "loss": 0.699, "step": 15619 }, { "epoch": 0.8027546510432727, "grad_norm": 1.064740538597107, "learning_rate": 9.860548756461763e-07, "loss": 0.7583, "step": 15620 }, { "epoch": 0.8028060437866174, "grad_norm": 1.0679099559783936, "learning_rate": 9.855586887954006e-07, "loss": 0.7152, "step": 15621 }, { "epoch": 0.802857436529962, "grad_norm": 1.084946870803833, "learning_rate": 9.850626131682283e-07, "loss": 0.6279, "step": 15622 }, { "epoch": 0.8029088292733066, "grad_norm": 1.075135588645935, "learning_rate": 9.845666487784027e-07, "loss": 0.652, "step": 15623 }, { "epoch": 0.8029602220166513, "grad_norm": 1.0716438293457031, "learning_rate": 9.84070795639664e-07, "loss": 0.6568, "step": 15624 }, { "epoch": 0.8030116147599959, "grad_norm": 1.0080878734588623, "learning_rate": 9.835750537657496e-07, "loss": 0.6888, "step": 15625 }, { "epoch": 0.8030630075033405, "grad_norm": 1.2478488683700562, "learning_rate": 9.830794231703972e-07, "loss": 0.6563, "step": 15626 }, { "epoch": 0.8031144002466851, "grad_norm": 1.0444831848144531, "learning_rate": 9.825839038673368e-07, "loss": 0.6629, "step": 15627 }, { "epoch": 0.8031657929900298, "grad_norm": 1.0878138542175293, "learning_rate": 9.820884958702982e-07, "loss": 0.6598, "step": 15628 }, { "epoch": 0.8032171857333744, "grad_norm": 0.7106992602348328, "learning_rate": 9.815931991930043e-07, "loss": 0.6209, "step": 15629 }, { "epoch": 0.8032685784767191, "grad_norm": 1.0674991607666016, "learning_rate": 9.810980138491816e-07, "loss": 0.7091, "step": 15630 }, { "epoch": 0.8033199712200637, "grad_norm": 1.1658207178115845, "learning_rate": 9.80602939852548e-07, "loss": 0.7056, "step": 15631 }, { "epoch": 0.8033713639634084, "grad_norm": 1.1125903129577637, "learning_rate": 9.801079772168182e-07, "loss": 0.693, "step": 15632 }, { "epoch": 0.803422756706753, "grad_norm": 1.257317066192627, "learning_rate": 9.796131259557102e-07, "loss": 0.7015, "step": 15633 }, { "epoch": 0.8034741494500977, "grad_norm": 1.1985628604888916, "learning_rate": 9.791183860829284e-07, "loss": 0.7352, "step": 15634 }, { "epoch": 0.8035255421934423, "grad_norm": 1.1209681034088135, "learning_rate": 9.786237576121843e-07, "loss": 0.6715, "step": 15635 }, { "epoch": 0.803576934936787, "grad_norm": 1.0488269329071045, "learning_rate": 9.781292405571808e-07, "loss": 0.6897, "step": 15636 }, { "epoch": 0.8036283276801316, "grad_norm": 1.1063227653503418, "learning_rate": 9.776348349316188e-07, "loss": 0.6677, "step": 15637 }, { "epoch": 0.8036797204234762, "grad_norm": 1.105486512184143, "learning_rate": 9.771405407491945e-07, "loss": 0.6687, "step": 15638 }, { "epoch": 0.8037311131668209, "grad_norm": 1.1207412481307983, "learning_rate": 9.766463580236063e-07, "loss": 0.7404, "step": 15639 }, { "epoch": 0.8037825059101655, "grad_norm": 1.1348786354064941, "learning_rate": 9.761522867685441e-07, "loss": 0.6946, "step": 15640 }, { "epoch": 0.8038338986535101, "grad_norm": 1.0985136032104492, "learning_rate": 9.75658326997695e-07, "loss": 0.6845, "step": 15641 }, { "epoch": 0.8038852913968547, "grad_norm": 1.0935940742492676, "learning_rate": 9.751644787247488e-07, "loss": 0.6789, "step": 15642 }, { "epoch": 0.8039366841401994, "grad_norm": 1.1621572971343994, "learning_rate": 9.746707419633827e-07, "loss": 0.6679, "step": 15643 }, { "epoch": 0.803988076883544, "grad_norm": 1.0795621871948242, "learning_rate": 9.741771167272802e-07, "loss": 0.6454, "step": 15644 }, { "epoch": 0.8040394696268887, "grad_norm": 0.7665844559669495, "learning_rate": 9.73683603030115e-07, "loss": 0.64, "step": 15645 }, { "epoch": 0.8040908623702333, "grad_norm": 0.7586016058921814, "learning_rate": 9.731902008855637e-07, "loss": 0.6952, "step": 15646 }, { "epoch": 0.804142255113578, "grad_norm": 1.0784357786178589, "learning_rate": 9.72696910307292e-07, "loss": 0.6533, "step": 15647 }, { "epoch": 0.8041936478569226, "grad_norm": 1.0640716552734375, "learning_rate": 9.722037313089706e-07, "loss": 0.746, "step": 15648 }, { "epoch": 0.8042450406002672, "grad_norm": 1.0395636558532715, "learning_rate": 9.717106639042623e-07, "loss": 0.6932, "step": 15649 }, { "epoch": 0.8042964333436119, "grad_norm": 1.0173046588897705, "learning_rate": 9.712177081068259e-07, "loss": 0.6643, "step": 15650 }, { "epoch": 0.8043478260869565, "grad_norm": 1.0855402946472168, "learning_rate": 9.707248639303223e-07, "loss": 0.662, "step": 15651 }, { "epoch": 0.8043992188303012, "grad_norm": 1.06850004196167, "learning_rate": 9.702321313884055e-07, "loss": 0.6414, "step": 15652 }, { "epoch": 0.8044506115736458, "grad_norm": 1.0983762741088867, "learning_rate": 9.69739510494726e-07, "loss": 0.7262, "step": 15653 }, { "epoch": 0.8045020043169905, "grad_norm": 1.365220069885254, "learning_rate": 9.692470012629318e-07, "loss": 0.6667, "step": 15654 }, { "epoch": 0.8045533970603351, "grad_norm": 1.0826640129089355, "learning_rate": 9.687546037066714e-07, "loss": 0.7195, "step": 15655 }, { "epoch": 0.8046047898036797, "grad_norm": 0.7553123235702515, "learning_rate": 9.682623178395828e-07, "loss": 0.6178, "step": 15656 }, { "epoch": 0.8046561825470243, "grad_norm": 1.1085318326950073, "learning_rate": 9.677701436753083e-07, "loss": 0.7208, "step": 15657 }, { "epoch": 0.804707575290369, "grad_norm": 1.1513320207595825, "learning_rate": 9.672780812274828e-07, "loss": 0.7226, "step": 15658 }, { "epoch": 0.8047589680337136, "grad_norm": 1.1253992319107056, "learning_rate": 9.667861305097392e-07, "loss": 0.6981, "step": 15659 }, { "epoch": 0.8048103607770583, "grad_norm": 1.0486522912979126, "learning_rate": 9.662942915357089e-07, "loss": 0.6468, "step": 15660 }, { "epoch": 0.8048617535204029, "grad_norm": 1.1129220724105835, "learning_rate": 9.658025643190156e-07, "loss": 0.6828, "step": 15661 }, { "epoch": 0.8049131462637475, "grad_norm": 1.104549765586853, "learning_rate": 9.653109488732886e-07, "loss": 0.7193, "step": 15662 }, { "epoch": 0.8049645390070922, "grad_norm": 1.1184258460998535, "learning_rate": 9.648194452121417e-07, "loss": 0.7119, "step": 15663 }, { "epoch": 0.8050159317504368, "grad_norm": 1.0413025617599487, "learning_rate": 9.643280533491978e-07, "loss": 0.6775, "step": 15664 }, { "epoch": 0.8050673244937815, "grad_norm": 1.085171103477478, "learning_rate": 9.638367732980686e-07, "loss": 0.7297, "step": 15665 }, { "epoch": 0.8051187172371261, "grad_norm": 1.055050015449524, "learning_rate": 9.633456050723666e-07, "loss": 0.6911, "step": 15666 }, { "epoch": 0.8051701099804708, "grad_norm": 0.7985503673553467, "learning_rate": 9.628545486856984e-07, "loss": 0.6405, "step": 15667 }, { "epoch": 0.8052215027238154, "grad_norm": 1.0397311449050903, "learning_rate": 9.62363604151671e-07, "loss": 0.6581, "step": 15668 }, { "epoch": 0.8052728954671601, "grad_norm": 1.1171287298202515, "learning_rate": 9.618727714838865e-07, "loss": 0.7058, "step": 15669 }, { "epoch": 0.8053242882105047, "grad_norm": 1.140602707862854, "learning_rate": 9.613820506959425e-07, "loss": 0.6892, "step": 15670 }, { "epoch": 0.8053756809538494, "grad_norm": 1.0671157836914062, "learning_rate": 9.608914418014358e-07, "loss": 0.6819, "step": 15671 }, { "epoch": 0.8054270736971939, "grad_norm": 1.0495786666870117, "learning_rate": 9.604009448139578e-07, "loss": 0.684, "step": 15672 }, { "epoch": 0.8054784664405386, "grad_norm": 1.1356549263000488, "learning_rate": 9.599105597471004e-07, "loss": 0.6342, "step": 15673 }, { "epoch": 0.8055298591838832, "grad_norm": 1.1024576425552368, "learning_rate": 9.594202866144487e-07, "loss": 0.7053, "step": 15674 }, { "epoch": 0.8055812519272278, "grad_norm": 1.5508676767349243, "learning_rate": 9.589301254295863e-07, "loss": 0.6786, "step": 15675 }, { "epoch": 0.8056326446705725, "grad_norm": 1.115859031677246, "learning_rate": 9.584400762060925e-07, "loss": 0.6749, "step": 15676 }, { "epoch": 0.8056840374139171, "grad_norm": 1.1650100946426392, "learning_rate": 9.579501389575468e-07, "loss": 0.7519, "step": 15677 }, { "epoch": 0.8057354301572618, "grad_norm": 1.087889313697815, "learning_rate": 9.574603136975224e-07, "loss": 0.743, "step": 15678 }, { "epoch": 0.8057868229006064, "grad_norm": 1.1158632040023804, "learning_rate": 9.569706004395902e-07, "loss": 0.7035, "step": 15679 }, { "epoch": 0.8058382156439511, "grad_norm": 1.1405482292175293, "learning_rate": 9.564809991973162e-07, "loss": 0.7114, "step": 15680 }, { "epoch": 0.8058896083872957, "grad_norm": 1.0613936185836792, "learning_rate": 9.559915099842686e-07, "loss": 0.7328, "step": 15681 }, { "epoch": 0.8059410011306404, "grad_norm": 1.132988691329956, "learning_rate": 9.555021328140079e-07, "loss": 0.7375, "step": 15682 }, { "epoch": 0.805992393873985, "grad_norm": 1.0173375606536865, "learning_rate": 9.550128677000913e-07, "loss": 0.6894, "step": 15683 }, { "epoch": 0.8060437866173297, "grad_norm": 1.0887941122055054, "learning_rate": 9.545237146560776e-07, "loss": 0.7042, "step": 15684 }, { "epoch": 0.8060951793606743, "grad_norm": 1.068013072013855, "learning_rate": 9.540346736955146e-07, "loss": 0.6867, "step": 15685 }, { "epoch": 0.806146572104019, "grad_norm": 0.7514823079109192, "learning_rate": 9.535457448319557e-07, "loss": 0.6406, "step": 15686 }, { "epoch": 0.8061979648473635, "grad_norm": 1.1355196237564087, "learning_rate": 9.530569280789453e-07, "loss": 0.6679, "step": 15687 }, { "epoch": 0.8062493575907081, "grad_norm": 1.115376353263855, "learning_rate": 9.525682234500266e-07, "loss": 0.7105, "step": 15688 }, { "epoch": 0.8063007503340528, "grad_norm": 1.0226393938064575, "learning_rate": 9.520796309587382e-07, "loss": 0.6378, "step": 15689 }, { "epoch": 0.8063521430773974, "grad_norm": 1.0538554191589355, "learning_rate": 9.515911506186198e-07, "loss": 0.7031, "step": 15690 }, { "epoch": 0.8064035358207421, "grad_norm": 1.3419560194015503, "learning_rate": 9.51102782443204e-07, "loss": 0.6946, "step": 15691 }, { "epoch": 0.8064549285640867, "grad_norm": 1.055940866470337, "learning_rate": 9.506145264460198e-07, "loss": 0.7036, "step": 15692 }, { "epoch": 0.8065063213074314, "grad_norm": 1.057896375656128, "learning_rate": 9.501263826405977e-07, "loss": 0.693, "step": 15693 }, { "epoch": 0.806557714050776, "grad_norm": 0.7647779583930969, "learning_rate": 9.496383510404605e-07, "loss": 0.6237, "step": 15694 }, { "epoch": 0.8066091067941207, "grad_norm": 1.1295554637908936, "learning_rate": 9.491504316591305e-07, "loss": 0.7537, "step": 15695 }, { "epoch": 0.8066604995374653, "grad_norm": 1.0404016971588135, "learning_rate": 9.486626245101227e-07, "loss": 0.6972, "step": 15696 }, { "epoch": 0.80671189228081, "grad_norm": 1.1245297193527222, "learning_rate": 9.481749296069581e-07, "loss": 0.7007, "step": 15697 }, { "epoch": 0.8067632850241546, "grad_norm": 1.0916781425476074, "learning_rate": 9.476873469631425e-07, "loss": 0.6921, "step": 15698 }, { "epoch": 0.8068146777674993, "grad_norm": 0.7407129406929016, "learning_rate": 9.471998765921886e-07, "loss": 0.5987, "step": 15699 }, { "epoch": 0.8068660705108439, "grad_norm": 1.0746461153030396, "learning_rate": 9.467125185076015e-07, "loss": 0.6822, "step": 15700 }, { "epoch": 0.8069174632541886, "grad_norm": 0.7065196633338928, "learning_rate": 9.462252727228827e-07, "loss": 0.701, "step": 15701 }, { "epoch": 0.8069688559975331, "grad_norm": 1.111271858215332, "learning_rate": 9.45738139251533e-07, "loss": 0.7077, "step": 15702 }, { "epoch": 0.8070202487408777, "grad_norm": 0.7323873043060303, "learning_rate": 9.452511181070495e-07, "loss": 0.6751, "step": 15703 }, { "epoch": 0.8070716414842224, "grad_norm": 1.0814621448516846, "learning_rate": 9.447642093029241e-07, "loss": 0.6295, "step": 15704 }, { "epoch": 0.807123034227567, "grad_norm": 0.7119811773300171, "learning_rate": 9.44277412852646e-07, "loss": 0.627, "step": 15705 }, { "epoch": 0.8071744269709117, "grad_norm": 1.097100853919983, "learning_rate": 9.437907287697063e-07, "loss": 0.7065, "step": 15706 }, { "epoch": 0.8072258197142563, "grad_norm": 1.092657446861267, "learning_rate": 9.433041570675844e-07, "loss": 0.7178, "step": 15707 }, { "epoch": 0.807277212457601, "grad_norm": 1.057586669921875, "learning_rate": 9.428176977597641e-07, "loss": 0.6167, "step": 15708 }, { "epoch": 0.8073286052009456, "grad_norm": 1.0280319452285767, "learning_rate": 9.423313508597215e-07, "loss": 0.6656, "step": 15709 }, { "epoch": 0.8073799979442903, "grad_norm": 1.059216022491455, "learning_rate": 9.418451163809334e-07, "loss": 0.7224, "step": 15710 }, { "epoch": 0.8074313906876349, "grad_norm": 1.1042299270629883, "learning_rate": 9.413589943368701e-07, "loss": 0.6767, "step": 15711 }, { "epoch": 0.8074827834309796, "grad_norm": 1.123189926147461, "learning_rate": 9.408729847409986e-07, "loss": 0.686, "step": 15712 }, { "epoch": 0.8075341761743242, "grad_norm": 1.057746171951294, "learning_rate": 9.403870876067883e-07, "loss": 0.7288, "step": 15713 }, { "epoch": 0.8075855689176689, "grad_norm": 1.0662380456924438, "learning_rate": 9.399013029476966e-07, "loss": 0.7151, "step": 15714 }, { "epoch": 0.8076369616610135, "grad_norm": 0.7433615922927856, "learning_rate": 9.394156307771857e-07, "loss": 0.6595, "step": 15715 }, { "epoch": 0.8076883544043582, "grad_norm": 1.012619137763977, "learning_rate": 9.389300711087107e-07, "loss": 0.6692, "step": 15716 }, { "epoch": 0.8077397471477027, "grad_norm": 0.6762338876724243, "learning_rate": 9.384446239557243e-07, "loss": 0.6169, "step": 15717 }, { "epoch": 0.8077911398910473, "grad_norm": 1.132218360900879, "learning_rate": 9.379592893316747e-07, "loss": 0.6916, "step": 15718 }, { "epoch": 0.807842532634392, "grad_norm": 1.0885467529296875, "learning_rate": 9.374740672500121e-07, "loss": 0.6729, "step": 15719 }, { "epoch": 0.8078939253777366, "grad_norm": 1.108532190322876, "learning_rate": 9.369889577241775e-07, "loss": 0.707, "step": 15720 }, { "epoch": 0.8079453181210813, "grad_norm": 1.0918254852294922, "learning_rate": 9.365039607676119e-07, "loss": 0.6692, "step": 15721 }, { "epoch": 0.8079967108644259, "grad_norm": 1.087377905845642, "learning_rate": 9.360190763937527e-07, "loss": 0.674, "step": 15722 }, { "epoch": 0.8080481036077706, "grad_norm": 1.094320297241211, "learning_rate": 9.355343046160326e-07, "loss": 0.67, "step": 15723 }, { "epoch": 0.8080994963511152, "grad_norm": 1.0726280212402344, "learning_rate": 9.350496454478852e-07, "loss": 0.7016, "step": 15724 }, { "epoch": 0.8081508890944599, "grad_norm": 1.114052414894104, "learning_rate": 9.345650989027355e-07, "loss": 0.756, "step": 15725 }, { "epoch": 0.8082022818378045, "grad_norm": 1.0577454566955566, "learning_rate": 9.340806649940126e-07, "loss": 0.6422, "step": 15726 }, { "epoch": 0.8082536745811492, "grad_norm": 1.0712518692016602, "learning_rate": 9.335963437351325e-07, "loss": 0.6679, "step": 15727 }, { "epoch": 0.8083050673244938, "grad_norm": 1.0712239742279053, "learning_rate": 9.331121351395184e-07, "loss": 0.7428, "step": 15728 }, { "epoch": 0.8083564600678385, "grad_norm": 1.047443151473999, "learning_rate": 9.326280392205838e-07, "loss": 0.6785, "step": 15729 }, { "epoch": 0.8084078528111831, "grad_norm": 0.7112809419631958, "learning_rate": 9.321440559917411e-07, "loss": 0.6528, "step": 15730 }, { "epoch": 0.8084592455545278, "grad_norm": 1.0286002159118652, "learning_rate": 9.316601854663982e-07, "loss": 0.6493, "step": 15731 }, { "epoch": 0.8085106382978723, "grad_norm": 1.0858556032180786, "learning_rate": 9.311764276579638e-07, "loss": 0.7672, "step": 15732 }, { "epoch": 0.8085620310412169, "grad_norm": 1.1062660217285156, "learning_rate": 9.306927825798401e-07, "loss": 0.7081, "step": 15733 }, { "epoch": 0.8086134237845616, "grad_norm": 1.0216107368469238, "learning_rate": 9.302092502454246e-07, "loss": 0.6897, "step": 15734 }, { "epoch": 0.8086648165279062, "grad_norm": 1.0044503211975098, "learning_rate": 9.297258306681184e-07, "loss": 0.6502, "step": 15735 }, { "epoch": 0.8087162092712509, "grad_norm": 1.053539752960205, "learning_rate": 9.292425238613095e-07, "loss": 0.6859, "step": 15736 }, { "epoch": 0.8087676020145955, "grad_norm": 1.121359944343567, "learning_rate": 9.287593298383929e-07, "loss": 0.7202, "step": 15737 }, { "epoch": 0.8088189947579402, "grad_norm": 1.1422903537750244, "learning_rate": 9.282762486127527e-07, "loss": 0.6971, "step": 15738 }, { "epoch": 0.8088703875012848, "grad_norm": 0.7986714243888855, "learning_rate": 9.277932801977773e-07, "loss": 0.6354, "step": 15739 }, { "epoch": 0.8089217802446295, "grad_norm": 1.117967128753662, "learning_rate": 9.273104246068426e-07, "loss": 0.6674, "step": 15740 }, { "epoch": 0.8089731729879741, "grad_norm": 1.1038676500320435, "learning_rate": 9.268276818533306e-07, "loss": 0.7255, "step": 15741 }, { "epoch": 0.8090245657313188, "grad_norm": 1.1117148399353027, "learning_rate": 9.263450519506146e-07, "loss": 0.6595, "step": 15742 }, { "epoch": 0.8090759584746634, "grad_norm": 1.1843831539154053, "learning_rate": 9.258625349120643e-07, "loss": 0.7194, "step": 15743 }, { "epoch": 0.809127351218008, "grad_norm": 1.0743772983551025, "learning_rate": 9.253801307510518e-07, "loss": 0.7205, "step": 15744 }, { "epoch": 0.8091787439613527, "grad_norm": 1.0779274702072144, "learning_rate": 9.24897839480941e-07, "loss": 0.7372, "step": 15745 }, { "epoch": 0.8092301367046973, "grad_norm": 1.1169421672821045, "learning_rate": 9.244156611150939e-07, "loss": 0.6943, "step": 15746 }, { "epoch": 0.809281529448042, "grad_norm": 1.1247409582138062, "learning_rate": 9.239335956668688e-07, "loss": 0.7663, "step": 15747 }, { "epoch": 0.8093329221913865, "grad_norm": 1.0870590209960938, "learning_rate": 9.234516431496255e-07, "loss": 0.6561, "step": 15748 }, { "epoch": 0.8093843149347312, "grad_norm": 1.01999032497406, "learning_rate": 9.229698035767115e-07, "loss": 0.689, "step": 15749 }, { "epoch": 0.8094357076780758, "grad_norm": 1.1100109815597534, "learning_rate": 9.224880769614802e-07, "loss": 0.7131, "step": 15750 }, { "epoch": 0.8094871004214205, "grad_norm": 1.0621927976608276, "learning_rate": 9.220064633172782e-07, "loss": 0.716, "step": 15751 }, { "epoch": 0.8095384931647651, "grad_norm": 1.1482672691345215, "learning_rate": 9.215249626574463e-07, "loss": 0.7401, "step": 15752 }, { "epoch": 0.8095898859081098, "grad_norm": 1.0536185503005981, "learning_rate": 9.210435749953283e-07, "loss": 0.7111, "step": 15753 }, { "epoch": 0.8096412786514544, "grad_norm": 0.6769371032714844, "learning_rate": 9.205623003442587e-07, "loss": 0.6057, "step": 15754 }, { "epoch": 0.8096926713947991, "grad_norm": 1.1154134273529053, "learning_rate": 9.200811387175757e-07, "loss": 0.7378, "step": 15755 }, { "epoch": 0.8097440641381437, "grad_norm": 1.0264484882354736, "learning_rate": 9.196000901286051e-07, "loss": 0.7002, "step": 15756 }, { "epoch": 0.8097954568814884, "grad_norm": 1.0683865547180176, "learning_rate": 9.19119154590678e-07, "loss": 0.7229, "step": 15757 }, { "epoch": 0.809846849624833, "grad_norm": 0.7656886577606201, "learning_rate": 9.186383321171183e-07, "loss": 0.6523, "step": 15758 }, { "epoch": 0.8098982423681776, "grad_norm": 1.0203948020935059, "learning_rate": 9.18157622721248e-07, "loss": 0.6423, "step": 15759 }, { "epoch": 0.8099496351115223, "grad_norm": 1.1084619760513306, "learning_rate": 9.176770264163837e-07, "loss": 0.7276, "step": 15760 }, { "epoch": 0.8100010278548669, "grad_norm": 1.1010531187057495, "learning_rate": 9.171965432158436e-07, "loss": 0.6365, "step": 15761 }, { "epoch": 0.8100524205982116, "grad_norm": 1.0976752042770386, "learning_rate": 9.167161731329383e-07, "loss": 0.6359, "step": 15762 }, { "epoch": 0.8101038133415561, "grad_norm": 1.0391898155212402, "learning_rate": 9.162359161809759e-07, "loss": 0.7008, "step": 15763 }, { "epoch": 0.8101552060849008, "grad_norm": 1.2269951105117798, "learning_rate": 9.157557723732663e-07, "loss": 0.6584, "step": 15764 }, { "epoch": 0.8102065988282454, "grad_norm": 1.0294278860092163, "learning_rate": 9.152757417231073e-07, "loss": 0.6456, "step": 15765 }, { "epoch": 0.8102579915715901, "grad_norm": 1.0425649881362915, "learning_rate": 9.147958242438015e-07, "loss": 0.6254, "step": 15766 }, { "epoch": 0.8103093843149347, "grad_norm": 1.0869574546813965, "learning_rate": 9.143160199486451e-07, "loss": 0.6795, "step": 15767 }, { "epoch": 0.8103607770582794, "grad_norm": 1.1499159336090088, "learning_rate": 9.138363288509311e-07, "loss": 0.6874, "step": 15768 }, { "epoch": 0.810412169801624, "grad_norm": 1.0265332460403442, "learning_rate": 9.133567509639485e-07, "loss": 0.6901, "step": 15769 }, { "epoch": 0.8104635625449687, "grad_norm": 1.1268881559371948, "learning_rate": 9.128772863009872e-07, "loss": 0.7581, "step": 15770 }, { "epoch": 0.8105149552883133, "grad_norm": 1.051356315612793, "learning_rate": 9.123979348753298e-07, "loss": 0.6763, "step": 15771 }, { "epoch": 0.810566348031658, "grad_norm": 1.1271179914474487, "learning_rate": 9.119186967002552e-07, "loss": 0.6837, "step": 15772 }, { "epoch": 0.8106177407750026, "grad_norm": 1.136104702949524, "learning_rate": 9.114395717890451e-07, "loss": 0.7132, "step": 15773 }, { "epoch": 0.8106691335183472, "grad_norm": 1.0177066326141357, "learning_rate": 9.109605601549715e-07, "loss": 0.635, "step": 15774 }, { "epoch": 0.8107205262616919, "grad_norm": 1.0858715772628784, "learning_rate": 9.10481661811306e-07, "loss": 0.6865, "step": 15775 }, { "epoch": 0.8107719190050365, "grad_norm": 1.1957052946090698, "learning_rate": 9.100028767713165e-07, "loss": 0.7596, "step": 15776 }, { "epoch": 0.8108233117483812, "grad_norm": 0.7590300440788269, "learning_rate": 9.095242050482711e-07, "loss": 0.6472, "step": 15777 }, { "epoch": 0.8108747044917257, "grad_norm": 1.0864298343658447, "learning_rate": 9.090456466554276e-07, "loss": 0.6912, "step": 15778 }, { "epoch": 0.8109260972350704, "grad_norm": 1.184112310409546, "learning_rate": 9.085672016060476e-07, "loss": 0.6964, "step": 15779 }, { "epoch": 0.810977489978415, "grad_norm": 0.9929559230804443, "learning_rate": 9.080888699133861e-07, "loss": 0.6842, "step": 15780 }, { "epoch": 0.8110288827217597, "grad_norm": 3.1788060665130615, "learning_rate": 9.076106515906957e-07, "loss": 0.6627, "step": 15781 }, { "epoch": 0.8110802754651043, "grad_norm": 1.07465660572052, "learning_rate": 9.071325466512248e-07, "loss": 0.6892, "step": 15782 }, { "epoch": 0.811131668208449, "grad_norm": 1.1318098306655884, "learning_rate": 9.066545551082217e-07, "loss": 0.7382, "step": 15783 }, { "epoch": 0.8111830609517936, "grad_norm": 1.1264047622680664, "learning_rate": 9.061766769749292e-07, "loss": 0.7033, "step": 15784 }, { "epoch": 0.8112344536951382, "grad_norm": 1.0931671857833862, "learning_rate": 9.056989122645849e-07, "loss": 0.6685, "step": 15785 }, { "epoch": 0.8112858464384829, "grad_norm": 0.7591493129730225, "learning_rate": 9.052212609904292e-07, "loss": 0.6653, "step": 15786 }, { "epoch": 0.8113372391818275, "grad_norm": 1.1107436418533325, "learning_rate": 9.047437231656941e-07, "loss": 0.7339, "step": 15787 }, { "epoch": 0.8113886319251722, "grad_norm": 1.0172797441482544, "learning_rate": 9.042662988036105e-07, "loss": 0.6834, "step": 15788 }, { "epoch": 0.8114400246685168, "grad_norm": 1.06940495967865, "learning_rate": 9.037889879174039e-07, "loss": 0.6598, "step": 15789 }, { "epoch": 0.8114914174118615, "grad_norm": 1.105717658996582, "learning_rate": 9.033117905203031e-07, "loss": 0.6735, "step": 15790 }, { "epoch": 0.8115428101552061, "grad_norm": 1.1460373401641846, "learning_rate": 9.028347066255244e-07, "loss": 0.7049, "step": 15791 }, { "epoch": 0.8115942028985508, "grad_norm": 1.0101784467697144, "learning_rate": 9.023577362462887e-07, "loss": 0.6599, "step": 15792 }, { "epoch": 0.8116455956418953, "grad_norm": 1.0904449224472046, "learning_rate": 9.018808793958105e-07, "loss": 0.7056, "step": 15793 }, { "epoch": 0.81169698838524, "grad_norm": 1.0892186164855957, "learning_rate": 9.014041360872999e-07, "loss": 0.6919, "step": 15794 }, { "epoch": 0.8117483811285846, "grad_norm": 1.088026523590088, "learning_rate": 9.009275063339679e-07, "loss": 0.6781, "step": 15795 }, { "epoch": 0.8117997738719293, "grad_norm": 1.1358389854431152, "learning_rate": 9.004509901490188e-07, "loss": 0.6701, "step": 15796 }, { "epoch": 0.8118511666152739, "grad_norm": 1.2061302661895752, "learning_rate": 8.99974587545655e-07, "loss": 0.7178, "step": 15797 }, { "epoch": 0.8119025593586185, "grad_norm": 0.6747239232063293, "learning_rate": 8.994982985370748e-07, "loss": 0.6577, "step": 15798 }, { "epoch": 0.8119539521019632, "grad_norm": 1.0582994222640991, "learning_rate": 8.990221231364771e-07, "loss": 0.6677, "step": 15799 }, { "epoch": 0.8120053448453078, "grad_norm": 0.6579222083091736, "learning_rate": 8.985460613570495e-07, "loss": 0.6675, "step": 15800 }, { "epoch": 0.8120567375886525, "grad_norm": 1.0619511604309082, "learning_rate": 8.980701132119868e-07, "loss": 0.7018, "step": 15801 }, { "epoch": 0.8121081303319971, "grad_norm": 1.12665855884552, "learning_rate": 8.975942787144726e-07, "loss": 0.7361, "step": 15802 }, { "epoch": 0.8121595230753418, "grad_norm": 1.1392865180969238, "learning_rate": 8.97118557877692e-07, "loss": 0.7145, "step": 15803 }, { "epoch": 0.8122109158186864, "grad_norm": 1.1067801713943481, "learning_rate": 8.966429507148244e-07, "loss": 0.6579, "step": 15804 }, { "epoch": 0.8122623085620311, "grad_norm": 1.0840297937393188, "learning_rate": 8.96167457239046e-07, "loss": 0.653, "step": 15805 }, { "epoch": 0.8123137013053757, "grad_norm": 1.095035195350647, "learning_rate": 8.956920774635347e-07, "loss": 0.6193, "step": 15806 }, { "epoch": 0.8123650940487204, "grad_norm": 1.1100565195083618, "learning_rate": 8.952168114014558e-07, "loss": 0.6164, "step": 15807 }, { "epoch": 0.8124164867920649, "grad_norm": 1.1385915279388428, "learning_rate": 8.947416590659808e-07, "loss": 0.7084, "step": 15808 }, { "epoch": 0.8124678795354096, "grad_norm": 0.7350379824638367, "learning_rate": 8.942666204702732e-07, "loss": 0.6476, "step": 15809 }, { "epoch": 0.8125192722787542, "grad_norm": 1.1220670938491821, "learning_rate": 8.937916956274939e-07, "loss": 0.7247, "step": 15810 }, { "epoch": 0.8125706650220988, "grad_norm": 1.195489525794983, "learning_rate": 8.933168845508006e-07, "loss": 0.7369, "step": 15811 }, { "epoch": 0.8126220577654435, "grad_norm": 1.1213343143463135, "learning_rate": 8.9284218725335e-07, "loss": 0.679, "step": 15812 }, { "epoch": 0.8126734505087881, "grad_norm": 1.129107117652893, "learning_rate": 8.923676037482931e-07, "loss": 0.6866, "step": 15813 }, { "epoch": 0.8127248432521328, "grad_norm": 0.6698294281959534, "learning_rate": 8.918931340487774e-07, "loss": 0.6338, "step": 15814 }, { "epoch": 0.8127762359954774, "grad_norm": 1.0568783283233643, "learning_rate": 8.914187781679529e-07, "loss": 0.6364, "step": 15815 }, { "epoch": 0.8128276287388221, "grad_norm": 1.124681830406189, "learning_rate": 8.909445361189556e-07, "loss": 0.7724, "step": 15816 }, { "epoch": 0.8128790214821667, "grad_norm": 1.0001587867736816, "learning_rate": 8.904704079149302e-07, "loss": 0.6339, "step": 15817 }, { "epoch": 0.8129304142255114, "grad_norm": 1.0655757188796997, "learning_rate": 8.899963935690087e-07, "loss": 0.7274, "step": 15818 }, { "epoch": 0.812981806968856, "grad_norm": 1.112015724182129, "learning_rate": 8.895224930943292e-07, "loss": 0.7039, "step": 15819 }, { "epoch": 0.8130331997122007, "grad_norm": 1.1138441562652588, "learning_rate": 8.89048706504016e-07, "loss": 0.6476, "step": 15820 }, { "epoch": 0.8130845924555453, "grad_norm": 1.0901150703430176, "learning_rate": 8.885750338111992e-07, "loss": 0.6919, "step": 15821 }, { "epoch": 0.81313598519889, "grad_norm": 1.0973469018936157, "learning_rate": 8.881014750290017e-07, "loss": 0.7019, "step": 15822 }, { "epoch": 0.8131873779422345, "grad_norm": 1.0800434350967407, "learning_rate": 8.876280301705419e-07, "loss": 0.6311, "step": 15823 }, { "epoch": 0.8132387706855791, "grad_norm": 1.2064049243927002, "learning_rate": 8.8715469924894e-07, "loss": 0.7132, "step": 15824 }, { "epoch": 0.8132901634289238, "grad_norm": 0.6733985543251038, "learning_rate": 8.86681482277309e-07, "loss": 0.6409, "step": 15825 }, { "epoch": 0.8133415561722684, "grad_norm": 1.106088638305664, "learning_rate": 8.862083792687592e-07, "loss": 0.7264, "step": 15826 }, { "epoch": 0.8133929489156131, "grad_norm": 0.6929330229759216, "learning_rate": 8.857353902363975e-07, "loss": 0.6303, "step": 15827 }, { "epoch": 0.8134443416589577, "grad_norm": 0.731674075126648, "learning_rate": 8.852625151933313e-07, "loss": 0.6337, "step": 15828 }, { "epoch": 0.8134957344023024, "grad_norm": 1.1046757698059082, "learning_rate": 8.84789754152659e-07, "loss": 0.7117, "step": 15829 }, { "epoch": 0.813547127145647, "grad_norm": 1.1163442134857178, "learning_rate": 8.843171071274803e-07, "loss": 0.6577, "step": 15830 }, { "epoch": 0.8135985198889917, "grad_norm": 1.0644265413284302, "learning_rate": 8.83844574130891e-07, "loss": 0.6822, "step": 15831 }, { "epoch": 0.8136499126323363, "grad_norm": 1.1497644186019897, "learning_rate": 8.833721551759817e-07, "loss": 0.674, "step": 15832 }, { "epoch": 0.813701305375681, "grad_norm": 1.060119390487671, "learning_rate": 8.828998502758407e-07, "loss": 0.638, "step": 15833 }, { "epoch": 0.8137526981190256, "grad_norm": 0.7519299983978271, "learning_rate": 8.824276594435554e-07, "loss": 0.6374, "step": 15834 }, { "epoch": 0.8138040908623703, "grad_norm": 1.2447428703308105, "learning_rate": 8.819555826922077e-07, "loss": 0.6984, "step": 15835 }, { "epoch": 0.8138554836057149, "grad_norm": 1.0567222833633423, "learning_rate": 8.814836200348753e-07, "loss": 0.7035, "step": 15836 }, { "epoch": 0.8139068763490596, "grad_norm": 1.0885140895843506, "learning_rate": 8.810117714846373e-07, "loss": 0.6285, "step": 15837 }, { "epoch": 0.8139582690924042, "grad_norm": 0.6613754630088806, "learning_rate": 8.805400370545647e-07, "loss": 0.6542, "step": 15838 }, { "epoch": 0.8140096618357487, "grad_norm": 1.100843906402588, "learning_rate": 8.800684167577278e-07, "loss": 0.6729, "step": 15839 }, { "epoch": 0.8140610545790934, "grad_norm": 1.0903635025024414, "learning_rate": 8.795969106071917e-07, "loss": 0.7063, "step": 15840 }, { "epoch": 0.814112447322438, "grad_norm": 1.142377495765686, "learning_rate": 8.791255186160236e-07, "loss": 0.7742, "step": 15841 }, { "epoch": 0.8141638400657827, "grad_norm": 1.097847819328308, "learning_rate": 8.786542407972793e-07, "loss": 0.7238, "step": 15842 }, { "epoch": 0.8142152328091273, "grad_norm": 1.040615200996399, "learning_rate": 8.781830771640198e-07, "loss": 0.7057, "step": 15843 }, { "epoch": 0.814266625552472, "grad_norm": 1.0631991624832153, "learning_rate": 8.777120277292972e-07, "loss": 0.6258, "step": 15844 }, { "epoch": 0.8143180182958166, "grad_norm": 1.0461479425430298, "learning_rate": 8.772410925061614e-07, "loss": 0.6607, "step": 15845 }, { "epoch": 0.8143694110391613, "grad_norm": 1.0002843141555786, "learning_rate": 8.767702715076626e-07, "loss": 0.6086, "step": 15846 }, { "epoch": 0.8144208037825059, "grad_norm": 1.0043505430221558, "learning_rate": 8.762995647468425e-07, "loss": 0.6784, "step": 15847 }, { "epoch": 0.8144721965258506, "grad_norm": 1.0576022863388062, "learning_rate": 8.758289722367463e-07, "loss": 0.6626, "step": 15848 }, { "epoch": 0.8145235892691952, "grad_norm": 1.1268022060394287, "learning_rate": 8.753584939904081e-07, "loss": 0.7438, "step": 15849 }, { "epoch": 0.8145749820125399, "grad_norm": 1.1017508506774902, "learning_rate": 8.748881300208651e-07, "loss": 0.6866, "step": 15850 }, { "epoch": 0.8146263747558845, "grad_norm": 0.8794753551483154, "learning_rate": 8.744178803411491e-07, "loss": 0.6672, "step": 15851 }, { "epoch": 0.8146777674992292, "grad_norm": 1.0583040714263916, "learning_rate": 8.739477449642885e-07, "loss": 0.6832, "step": 15852 }, { "epoch": 0.8147291602425738, "grad_norm": 1.0865466594696045, "learning_rate": 8.734777239033071e-07, "loss": 0.6752, "step": 15853 }, { "epoch": 0.8147805529859183, "grad_norm": 1.0640757083892822, "learning_rate": 8.730078171712303e-07, "loss": 0.6987, "step": 15854 }, { "epoch": 0.814831945729263, "grad_norm": 1.0666866302490234, "learning_rate": 8.725380247810755e-07, "loss": 0.7264, "step": 15855 }, { "epoch": 0.8148833384726076, "grad_norm": 1.0499472618103027, "learning_rate": 8.72068346745858e-07, "loss": 0.6379, "step": 15856 }, { "epoch": 0.8149347312159523, "grad_norm": 1.0980334281921387, "learning_rate": 8.715987830785944e-07, "loss": 0.704, "step": 15857 }, { "epoch": 0.8149861239592969, "grad_norm": 1.1336294412612915, "learning_rate": 8.711293337922883e-07, "loss": 0.7608, "step": 15858 }, { "epoch": 0.8150375167026416, "grad_norm": 1.1186856031417847, "learning_rate": 8.706599988999515e-07, "loss": 0.7171, "step": 15859 }, { "epoch": 0.8150889094459862, "grad_norm": 1.075716495513916, "learning_rate": 8.701907784145852e-07, "loss": 0.7125, "step": 15860 }, { "epoch": 0.8151403021893309, "grad_norm": 1.0665866136550903, "learning_rate": 8.697216723491897e-07, "loss": 0.6522, "step": 15861 }, { "epoch": 0.8151916949326755, "grad_norm": 0.7025566697120667, "learning_rate": 8.692526807167606e-07, "loss": 0.6735, "step": 15862 }, { "epoch": 0.8152430876760202, "grad_norm": 1.0913625955581665, "learning_rate": 8.687838035302942e-07, "loss": 0.6904, "step": 15863 }, { "epoch": 0.8152944804193648, "grad_norm": 1.0596872568130493, "learning_rate": 8.683150408027807e-07, "loss": 0.6904, "step": 15864 }, { "epoch": 0.8153458731627095, "grad_norm": 0.8113095760345459, "learning_rate": 8.678463925472052e-07, "loss": 0.6741, "step": 15865 }, { "epoch": 0.8153972659060541, "grad_norm": 1.0824666023254395, "learning_rate": 8.673778587765552e-07, "loss": 0.6476, "step": 15866 }, { "epoch": 0.8154486586493987, "grad_norm": 1.0351488590240479, "learning_rate": 8.669094395038103e-07, "loss": 0.6752, "step": 15867 }, { "epoch": 0.8155000513927434, "grad_norm": 1.0663225650787354, "learning_rate": 8.664411347419488e-07, "loss": 0.6551, "step": 15868 }, { "epoch": 0.8155514441360879, "grad_norm": 1.1081591844558716, "learning_rate": 8.659729445039439e-07, "loss": 0.6619, "step": 15869 }, { "epoch": 0.8156028368794326, "grad_norm": 1.0211765766143799, "learning_rate": 8.655048688027712e-07, "loss": 0.6844, "step": 15870 }, { "epoch": 0.8156542296227772, "grad_norm": 1.0937654972076416, "learning_rate": 8.650369076513937e-07, "loss": 0.7102, "step": 15871 }, { "epoch": 0.8157056223661219, "grad_norm": 1.0783026218414307, "learning_rate": 8.645690610627811e-07, "loss": 0.721, "step": 15872 }, { "epoch": 0.8157570151094665, "grad_norm": 1.101480484008789, "learning_rate": 8.641013290498934e-07, "loss": 0.7678, "step": 15873 }, { "epoch": 0.8158084078528112, "grad_norm": 1.0206547975540161, "learning_rate": 8.636337116256893e-07, "loss": 0.6929, "step": 15874 }, { "epoch": 0.8158598005961558, "grad_norm": 1.0886304378509521, "learning_rate": 8.631662088031262e-07, "loss": 0.667, "step": 15875 }, { "epoch": 0.8159111933395005, "grad_norm": 0.7084832787513733, "learning_rate": 8.626988205951558e-07, "loss": 0.6301, "step": 15876 }, { "epoch": 0.8159625860828451, "grad_norm": 1.1814095973968506, "learning_rate": 8.62231547014728e-07, "loss": 0.7283, "step": 15877 }, { "epoch": 0.8160139788261898, "grad_norm": 1.0779844522476196, "learning_rate": 8.617643880747867e-07, "loss": 0.6825, "step": 15878 }, { "epoch": 0.8160653715695344, "grad_norm": 1.0795459747314453, "learning_rate": 8.612973437882777e-07, "loss": 0.6855, "step": 15879 }, { "epoch": 0.816116764312879, "grad_norm": 1.026180386543274, "learning_rate": 8.608304141681406e-07, "loss": 0.6884, "step": 15880 }, { "epoch": 0.8161681570562237, "grad_norm": 1.0571925640106201, "learning_rate": 8.603635992273108e-07, "loss": 0.7251, "step": 15881 }, { "epoch": 0.8162195497995683, "grad_norm": 1.054360270500183, "learning_rate": 8.598968989787216e-07, "loss": 0.663, "step": 15882 }, { "epoch": 0.816270942542913, "grad_norm": 0.6769542694091797, "learning_rate": 8.59430313435306e-07, "loss": 0.6717, "step": 15883 }, { "epoch": 0.8163223352862575, "grad_norm": 1.154556155204773, "learning_rate": 8.589638426099873e-07, "loss": 0.7038, "step": 15884 }, { "epoch": 0.8163737280296022, "grad_norm": 1.064639687538147, "learning_rate": 8.584974865156925e-07, "loss": 0.679, "step": 15885 }, { "epoch": 0.8164251207729468, "grad_norm": 1.133123755455017, "learning_rate": 8.580312451653416e-07, "loss": 0.6521, "step": 15886 }, { "epoch": 0.8164765135162915, "grad_norm": 1.0538917779922485, "learning_rate": 8.575651185718503e-07, "loss": 0.6705, "step": 15887 }, { "epoch": 0.8165279062596361, "grad_norm": 1.0894396305084229, "learning_rate": 8.570991067481366e-07, "loss": 0.6961, "step": 15888 }, { "epoch": 0.8165792990029808, "grad_norm": 1.129911184310913, "learning_rate": 8.566332097071095e-07, "loss": 0.7178, "step": 15889 }, { "epoch": 0.8166306917463254, "grad_norm": 1.0769083499908447, "learning_rate": 8.561674274616777e-07, "loss": 0.7147, "step": 15890 }, { "epoch": 0.8166820844896701, "grad_norm": 1.1535837650299072, "learning_rate": 8.557017600247447e-07, "loss": 0.7175, "step": 15891 }, { "epoch": 0.8167334772330147, "grad_norm": 1.0542861223220825, "learning_rate": 8.552362074092157e-07, "loss": 0.6972, "step": 15892 }, { "epoch": 0.8167848699763594, "grad_norm": 1.0870426893234253, "learning_rate": 8.547707696279844e-07, "loss": 0.7268, "step": 15893 }, { "epoch": 0.816836262719704, "grad_norm": 1.1196643114089966, "learning_rate": 8.543054466939505e-07, "loss": 0.6925, "step": 15894 }, { "epoch": 0.8168876554630486, "grad_norm": 0.7005282640457153, "learning_rate": 8.538402386200023e-07, "loss": 0.6796, "step": 15895 }, { "epoch": 0.8169390482063933, "grad_norm": 1.0782498121261597, "learning_rate": 8.533751454190326e-07, "loss": 0.6963, "step": 15896 }, { "epoch": 0.8169904409497379, "grad_norm": 1.1897894144058228, "learning_rate": 8.529101671039258e-07, "loss": 0.7401, "step": 15897 }, { "epoch": 0.8170418336930826, "grad_norm": 1.0737711191177368, "learning_rate": 8.524453036875624e-07, "loss": 0.6973, "step": 15898 }, { "epoch": 0.8170932264364271, "grad_norm": 1.0845646858215332, "learning_rate": 8.519805551828269e-07, "loss": 0.691, "step": 15899 }, { "epoch": 0.8171446191797718, "grad_norm": 0.7155020833015442, "learning_rate": 8.515159216025893e-07, "loss": 0.7, "step": 15900 }, { "epoch": 0.8171960119231164, "grad_norm": 1.0626174211502075, "learning_rate": 8.510514029597272e-07, "loss": 0.6663, "step": 15901 }, { "epoch": 0.8172474046664611, "grad_norm": 0.6911786794662476, "learning_rate": 8.50586999267109e-07, "loss": 0.6554, "step": 15902 }, { "epoch": 0.8172987974098057, "grad_norm": 1.0124671459197998, "learning_rate": 8.501227105376015e-07, "loss": 0.6492, "step": 15903 }, { "epoch": 0.8173501901531504, "grad_norm": 0.7251526117324829, "learning_rate": 8.496585367840665e-07, "loss": 0.6263, "step": 15904 }, { "epoch": 0.817401582896495, "grad_norm": 1.0921012163162231, "learning_rate": 8.491944780193679e-07, "loss": 0.7155, "step": 15905 }, { "epoch": 0.8174529756398397, "grad_norm": 1.0037120580673218, "learning_rate": 8.487305342563601e-07, "loss": 0.7249, "step": 15906 }, { "epoch": 0.8175043683831843, "grad_norm": 1.0369433164596558, "learning_rate": 8.482667055078975e-07, "loss": 0.6621, "step": 15907 }, { "epoch": 0.817555761126529, "grad_norm": 1.105870246887207, "learning_rate": 8.478029917868336e-07, "loss": 0.6747, "step": 15908 }, { "epoch": 0.8176071538698736, "grad_norm": 1.1297708749771118, "learning_rate": 8.473393931060109e-07, "loss": 0.7091, "step": 15909 }, { "epoch": 0.8176585466132182, "grad_norm": 1.0341790914535522, "learning_rate": 8.468759094782781e-07, "loss": 0.7428, "step": 15910 }, { "epoch": 0.8177099393565629, "grad_norm": 1.0771733522415161, "learning_rate": 8.464125409164736e-07, "loss": 0.692, "step": 15911 }, { "epoch": 0.8177613320999075, "grad_norm": 0.8159832954406738, "learning_rate": 8.45949287433439e-07, "loss": 0.6751, "step": 15912 }, { "epoch": 0.8178127248432522, "grad_norm": 1.0718497037887573, "learning_rate": 8.454861490420046e-07, "loss": 0.6109, "step": 15913 }, { "epoch": 0.8178641175865967, "grad_norm": 1.0749939680099487, "learning_rate": 8.450231257550057e-07, "loss": 0.6514, "step": 15914 }, { "epoch": 0.8179155103299414, "grad_norm": 1.0642589330673218, "learning_rate": 8.445602175852691e-07, "loss": 0.7372, "step": 15915 }, { "epoch": 0.817966903073286, "grad_norm": 0.7097607254981995, "learning_rate": 8.440974245456196e-07, "loss": 0.6364, "step": 15916 }, { "epoch": 0.8180182958166307, "grad_norm": 1.0714733600616455, "learning_rate": 8.436347466488809e-07, "loss": 0.6899, "step": 15917 }, { "epoch": 0.8180696885599753, "grad_norm": 1.0946086645126343, "learning_rate": 8.43172183907871e-07, "loss": 0.6588, "step": 15918 }, { "epoch": 0.81812108130332, "grad_norm": 0.9855013489723206, "learning_rate": 8.427097363354065e-07, "loss": 0.6564, "step": 15919 }, { "epoch": 0.8181724740466646, "grad_norm": 0.7948664426803589, "learning_rate": 8.422474039442969e-07, "loss": 0.656, "step": 15920 }, { "epoch": 0.8182238667900092, "grad_norm": 1.0878255367279053, "learning_rate": 8.417851867473564e-07, "loss": 0.665, "step": 15921 }, { "epoch": 0.8182752595333539, "grad_norm": 1.0695507526397705, "learning_rate": 8.413230847573861e-07, "loss": 0.6444, "step": 15922 }, { "epoch": 0.8183266522766985, "grad_norm": 1.062950611114502, "learning_rate": 8.408610979871928e-07, "loss": 0.6869, "step": 15923 }, { "epoch": 0.8183780450200432, "grad_norm": 1.1164363622665405, "learning_rate": 8.403992264495742e-07, "loss": 0.7104, "step": 15924 }, { "epoch": 0.8184294377633878, "grad_norm": 1.0398523807525635, "learning_rate": 8.399374701573265e-07, "loss": 0.6347, "step": 15925 }, { "epoch": 0.8184808305067325, "grad_norm": 1.0860638618469238, "learning_rate": 8.394758291232446e-07, "loss": 0.7073, "step": 15926 }, { "epoch": 0.8185322232500771, "grad_norm": 1.0642421245574951, "learning_rate": 8.39014303360119e-07, "loss": 0.6874, "step": 15927 }, { "epoch": 0.8185836159934218, "grad_norm": 1.1776020526885986, "learning_rate": 8.385528928807346e-07, "loss": 0.7039, "step": 15928 }, { "epoch": 0.8186350087367664, "grad_norm": 1.1142851114273071, "learning_rate": 8.380915976978759e-07, "loss": 0.6707, "step": 15929 }, { "epoch": 0.818686401480111, "grad_norm": 1.0520637035369873, "learning_rate": 8.376304178243245e-07, "loss": 0.6815, "step": 15930 }, { "epoch": 0.8187377942234556, "grad_norm": 1.0598965883255005, "learning_rate": 8.371693532728575e-07, "loss": 0.6533, "step": 15931 }, { "epoch": 0.8187891869668003, "grad_norm": 1.1046322584152222, "learning_rate": 8.367084040562485e-07, "loss": 0.6589, "step": 15932 }, { "epoch": 0.8188405797101449, "grad_norm": 1.0862507820129395, "learning_rate": 8.362475701872675e-07, "loss": 0.6511, "step": 15933 }, { "epoch": 0.8188919724534895, "grad_norm": 1.106633186340332, "learning_rate": 8.357868516786861e-07, "loss": 0.6607, "step": 15934 }, { "epoch": 0.8189433651968342, "grad_norm": 1.1368271112442017, "learning_rate": 8.35326248543264e-07, "loss": 0.6783, "step": 15935 }, { "epoch": 0.8189947579401788, "grad_norm": 1.1690280437469482, "learning_rate": 8.34865760793766e-07, "loss": 0.6623, "step": 15936 }, { "epoch": 0.8190461506835235, "grad_norm": 1.1522434949874878, "learning_rate": 8.344053884429493e-07, "loss": 0.6536, "step": 15937 }, { "epoch": 0.8190975434268681, "grad_norm": 1.1830905675888062, "learning_rate": 8.33945131503568e-07, "loss": 0.6672, "step": 15938 }, { "epoch": 0.8191489361702128, "grad_norm": 1.0953369140625, "learning_rate": 8.334849899883757e-07, "loss": 0.6696, "step": 15939 }, { "epoch": 0.8192003289135574, "grad_norm": 1.0722713470458984, "learning_rate": 8.330249639101201e-07, "loss": 0.729, "step": 15940 }, { "epoch": 0.8192517216569021, "grad_norm": 1.1886411905288696, "learning_rate": 8.325650532815466e-07, "loss": 0.7251, "step": 15941 }, { "epoch": 0.8193031144002467, "grad_norm": 0.78179532289505, "learning_rate": 8.321052581153965e-07, "loss": 0.6692, "step": 15942 }, { "epoch": 0.8193545071435914, "grad_norm": 1.1280386447906494, "learning_rate": 8.31645578424411e-07, "loss": 0.6886, "step": 15943 }, { "epoch": 0.819405899886936, "grad_norm": 1.090631365776062, "learning_rate": 8.311860142213246e-07, "loss": 0.6939, "step": 15944 }, { "epoch": 0.8194572926302806, "grad_norm": 0.8102370500564575, "learning_rate": 8.307265655188701e-07, "loss": 0.6321, "step": 15945 }, { "epoch": 0.8195086853736252, "grad_norm": 0.7136482000350952, "learning_rate": 8.302672323297756e-07, "loss": 0.6752, "step": 15946 }, { "epoch": 0.8195600781169698, "grad_norm": 1.1100010871887207, "learning_rate": 8.298080146667698e-07, "loss": 0.6744, "step": 15947 }, { "epoch": 0.8196114708603145, "grad_norm": 1.2398725748062134, "learning_rate": 8.293489125425747e-07, "loss": 0.6642, "step": 15948 }, { "epoch": 0.8196628636036591, "grad_norm": 1.135555386543274, "learning_rate": 8.28889925969909e-07, "loss": 0.7059, "step": 15949 }, { "epoch": 0.8197142563470038, "grad_norm": 1.1473207473754883, "learning_rate": 8.284310549614922e-07, "loss": 0.6538, "step": 15950 }, { "epoch": 0.8197656490903484, "grad_norm": 1.1159553527832031, "learning_rate": 8.279722995300338e-07, "loss": 0.7, "step": 15951 }, { "epoch": 0.8198170418336931, "grad_norm": 1.1276623010635376, "learning_rate": 8.275136596882471e-07, "loss": 0.6852, "step": 15952 }, { "epoch": 0.8198684345770377, "grad_norm": 1.0680826902389526, "learning_rate": 8.270551354488382e-07, "loss": 0.74, "step": 15953 }, { "epoch": 0.8199198273203824, "grad_norm": 1.0428073406219482, "learning_rate": 8.265967268245106e-07, "loss": 0.7024, "step": 15954 }, { "epoch": 0.819971220063727, "grad_norm": 1.06885826587677, "learning_rate": 8.261384338279638e-07, "loss": 0.6511, "step": 15955 }, { "epoch": 0.8200226128070717, "grad_norm": 0.8059698939323425, "learning_rate": 8.25680256471898e-07, "loss": 0.6535, "step": 15956 }, { "epoch": 0.8200740055504163, "grad_norm": 0.7939055562019348, "learning_rate": 8.252221947690053e-07, "loss": 0.6495, "step": 15957 }, { "epoch": 0.820125398293761, "grad_norm": 1.1296801567077637, "learning_rate": 8.247642487319768e-07, "loss": 0.6414, "step": 15958 }, { "epoch": 0.8201767910371056, "grad_norm": 0.7933281064033508, "learning_rate": 8.243064183735017e-07, "loss": 0.6602, "step": 15959 }, { "epoch": 0.8202281837804501, "grad_norm": 0.8116136193275452, "learning_rate": 8.238487037062637e-07, "loss": 0.6465, "step": 15960 }, { "epoch": 0.8202795765237948, "grad_norm": 1.060389757156372, "learning_rate": 8.233911047429438e-07, "loss": 0.7281, "step": 15961 }, { "epoch": 0.8203309692671394, "grad_norm": 1.1530861854553223, "learning_rate": 8.229336214962197e-07, "loss": 0.7058, "step": 15962 }, { "epoch": 0.8203823620104841, "grad_norm": 1.0510258674621582, "learning_rate": 8.224762539787701e-07, "loss": 0.6691, "step": 15963 }, { "epoch": 0.8204337547538287, "grad_norm": 1.144066333770752, "learning_rate": 8.220190022032604e-07, "loss": 0.762, "step": 15964 }, { "epoch": 0.8204851474971734, "grad_norm": 0.7330953478813171, "learning_rate": 8.215618661823649e-07, "loss": 0.6832, "step": 15965 }, { "epoch": 0.820536540240518, "grad_norm": 1.1114577054977417, "learning_rate": 8.211048459287458e-07, "loss": 0.6776, "step": 15966 }, { "epoch": 0.8205879329838627, "grad_norm": 1.085736870765686, "learning_rate": 8.206479414550656e-07, "loss": 0.7289, "step": 15967 }, { "epoch": 0.8206393257272073, "grad_norm": 0.6623334288597107, "learning_rate": 8.201911527739847e-07, "loss": 0.6637, "step": 15968 }, { "epoch": 0.820690718470552, "grad_norm": 1.1326062679290771, "learning_rate": 8.197344798981577e-07, "loss": 0.692, "step": 15969 }, { "epoch": 0.8207421112138966, "grad_norm": 1.131390929222107, "learning_rate": 8.192779228402375e-07, "loss": 0.7013, "step": 15970 }, { "epoch": 0.8207935039572413, "grad_norm": 1.1198616027832031, "learning_rate": 8.18821481612872e-07, "loss": 0.682, "step": 15971 }, { "epoch": 0.8208448967005859, "grad_norm": 1.0767366886138916, "learning_rate": 8.183651562287098e-07, "loss": 0.7275, "step": 15972 }, { "epoch": 0.8208962894439306, "grad_norm": 0.6785878539085388, "learning_rate": 8.179089467003926e-07, "loss": 0.6398, "step": 15973 }, { "epoch": 0.8209476821872752, "grad_norm": 1.1476516723632812, "learning_rate": 8.174528530405602e-07, "loss": 0.6802, "step": 15974 }, { "epoch": 0.8209990749306197, "grad_norm": 0.7410844564437866, "learning_rate": 8.169968752618474e-07, "loss": 0.6358, "step": 15975 }, { "epoch": 0.8210504676739644, "grad_norm": 1.1050662994384766, "learning_rate": 8.165410133768897e-07, "loss": 0.7511, "step": 15976 }, { "epoch": 0.821101860417309, "grad_norm": 1.136380910873413, "learning_rate": 8.160852673983172e-07, "loss": 0.6957, "step": 15977 }, { "epoch": 0.8211532531606537, "grad_norm": 1.0785022974014282, "learning_rate": 8.156296373387557e-07, "loss": 0.6639, "step": 15978 }, { "epoch": 0.8212046459039983, "grad_norm": 0.7310556769371033, "learning_rate": 8.15174123210829e-07, "loss": 0.6271, "step": 15979 }, { "epoch": 0.821256038647343, "grad_norm": 0.8285425901412964, "learning_rate": 8.147187250271566e-07, "loss": 0.6761, "step": 15980 }, { "epoch": 0.8213074313906876, "grad_norm": 0.7083662152290344, "learning_rate": 8.142634428003577e-07, "loss": 0.6591, "step": 15981 }, { "epoch": 0.8213588241340323, "grad_norm": 1.0761287212371826, "learning_rate": 8.13808276543045e-07, "loss": 0.7542, "step": 15982 }, { "epoch": 0.8214102168773769, "grad_norm": 1.0235462188720703, "learning_rate": 8.133532262678301e-07, "loss": 0.682, "step": 15983 }, { "epoch": 0.8214616096207216, "grad_norm": 1.121659517288208, "learning_rate": 8.128982919873185e-07, "loss": 0.7204, "step": 15984 }, { "epoch": 0.8215130023640662, "grad_norm": 0.7196667790412903, "learning_rate": 8.124434737141184e-07, "loss": 0.6672, "step": 15985 }, { "epoch": 0.8215643951074109, "grad_norm": 1.232259750366211, "learning_rate": 8.119887714608265e-07, "loss": 0.7338, "step": 15986 }, { "epoch": 0.8216157878507555, "grad_norm": 0.8438336849212646, "learning_rate": 8.115341852400437e-07, "loss": 0.639, "step": 15987 }, { "epoch": 0.8216671805941002, "grad_norm": 1.0620945692062378, "learning_rate": 8.110797150643629e-07, "loss": 0.7196, "step": 15988 }, { "epoch": 0.8217185733374448, "grad_norm": 1.0905804634094238, "learning_rate": 8.106253609463776e-07, "loss": 0.7173, "step": 15989 }, { "epoch": 0.8217699660807893, "grad_norm": 0.7311794757843018, "learning_rate": 8.101711228986753e-07, "loss": 0.6082, "step": 15990 }, { "epoch": 0.821821358824134, "grad_norm": 1.1373704671859741, "learning_rate": 8.097170009338395e-07, "loss": 0.6917, "step": 15991 }, { "epoch": 0.8218727515674786, "grad_norm": 1.0940972566604614, "learning_rate": 8.092629950644553e-07, "loss": 0.6892, "step": 15992 }, { "epoch": 0.8219241443108233, "grad_norm": 1.1069122552871704, "learning_rate": 8.088091053030972e-07, "loss": 0.678, "step": 15993 }, { "epoch": 0.8219755370541679, "grad_norm": 1.0717417001724243, "learning_rate": 8.083553316623443e-07, "loss": 0.7159, "step": 15994 }, { "epoch": 0.8220269297975126, "grad_norm": 1.1317168474197388, "learning_rate": 8.079016741547669e-07, "loss": 0.7149, "step": 15995 }, { "epoch": 0.8220783225408572, "grad_norm": 1.109566569328308, "learning_rate": 8.074481327929345e-07, "loss": 0.7168, "step": 15996 }, { "epoch": 0.8221297152842019, "grad_norm": 1.0114786624908447, "learning_rate": 8.069947075894113e-07, "loss": 0.6269, "step": 15997 }, { "epoch": 0.8221811080275465, "grad_norm": 0.742664098739624, "learning_rate": 8.065413985567628e-07, "loss": 0.6498, "step": 15998 }, { "epoch": 0.8222325007708912, "grad_norm": 1.0992227792739868, "learning_rate": 8.060882057075464e-07, "loss": 0.6676, "step": 15999 }, { "epoch": 0.8222838935142358, "grad_norm": 1.0877968072891235, "learning_rate": 8.056351290543179e-07, "loss": 0.6772, "step": 16000 }, { "epoch": 0.8223352862575805, "grad_norm": 1.0807422399520874, "learning_rate": 8.051821686096328e-07, "loss": 0.6546, "step": 16001 }, { "epoch": 0.8223866790009251, "grad_norm": 1.1446477174758911, "learning_rate": 8.047293243860366e-07, "loss": 0.6785, "step": 16002 }, { "epoch": 0.8224380717442697, "grad_norm": 1.0511821508407593, "learning_rate": 8.042765963960786e-07, "loss": 0.7442, "step": 16003 }, { "epoch": 0.8224894644876144, "grad_norm": 1.1064132452011108, "learning_rate": 8.03823984652301e-07, "loss": 0.717, "step": 16004 }, { "epoch": 0.822540857230959, "grad_norm": 1.1460384130477905, "learning_rate": 8.033714891672462e-07, "loss": 0.6871, "step": 16005 }, { "epoch": 0.8225922499743036, "grad_norm": 1.099399447441101, "learning_rate": 8.029191099534467e-07, "loss": 0.694, "step": 16006 }, { "epoch": 0.8226436427176482, "grad_norm": 1.0820860862731934, "learning_rate": 8.024668470234393e-07, "loss": 0.6966, "step": 16007 }, { "epoch": 0.8226950354609929, "grad_norm": 1.063926339149475, "learning_rate": 8.020147003897533e-07, "loss": 0.6927, "step": 16008 }, { "epoch": 0.8227464282043375, "grad_norm": 0.8330867886543274, "learning_rate": 8.015626700649148e-07, "loss": 0.6903, "step": 16009 }, { "epoch": 0.8227978209476822, "grad_norm": 1.0246433019638062, "learning_rate": 8.0111075606145e-07, "loss": 0.644, "step": 16010 }, { "epoch": 0.8228492136910268, "grad_norm": 1.0607528686523438, "learning_rate": 8.00658958391879e-07, "loss": 0.67, "step": 16011 }, { "epoch": 0.8229006064343715, "grad_norm": 1.104867935180664, "learning_rate": 8.00207277068718e-07, "loss": 0.7561, "step": 16012 }, { "epoch": 0.8229519991777161, "grad_norm": 1.1388285160064697, "learning_rate": 7.997557121044803e-07, "loss": 0.7228, "step": 16013 }, { "epoch": 0.8230033919210608, "grad_norm": 1.1482667922973633, "learning_rate": 7.993042635116815e-07, "loss": 0.7778, "step": 16014 }, { "epoch": 0.8230547846644054, "grad_norm": 1.1118996143341064, "learning_rate": 7.988529313028237e-07, "loss": 0.7741, "step": 16015 }, { "epoch": 0.82310617740775, "grad_norm": 0.7849704623222351, "learning_rate": 7.984017154904151e-07, "loss": 0.69, "step": 16016 }, { "epoch": 0.8231575701510947, "grad_norm": 1.1126105785369873, "learning_rate": 7.97950616086956e-07, "loss": 0.678, "step": 16017 }, { "epoch": 0.8232089628944393, "grad_norm": 1.1017298698425293, "learning_rate": 7.974996331049434e-07, "loss": 0.6851, "step": 16018 }, { "epoch": 0.823260355637784, "grad_norm": 1.0966075658798218, "learning_rate": 7.970487665568743e-07, "loss": 0.6925, "step": 16019 }, { "epoch": 0.8233117483811286, "grad_norm": 1.1526294946670532, "learning_rate": 7.965980164552395e-07, "loss": 0.7452, "step": 16020 }, { "epoch": 0.8233631411244732, "grad_norm": 1.063724398612976, "learning_rate": 7.961473828125271e-07, "loss": 0.7166, "step": 16021 }, { "epoch": 0.8234145338678178, "grad_norm": 1.1120411157608032, "learning_rate": 7.956968656412217e-07, "loss": 0.6839, "step": 16022 }, { "epoch": 0.8234659266111625, "grad_norm": 1.0558279752731323, "learning_rate": 7.952464649538067e-07, "loss": 0.7003, "step": 16023 }, { "epoch": 0.8235173193545071, "grad_norm": 1.1339508295059204, "learning_rate": 7.947961807627602e-07, "loss": 0.7116, "step": 16024 }, { "epoch": 0.8235687120978518, "grad_norm": 0.7437736392021179, "learning_rate": 7.94346013080558e-07, "loss": 0.6388, "step": 16025 }, { "epoch": 0.8236201048411964, "grad_norm": 0.6681776642799377, "learning_rate": 7.938959619196707e-07, "loss": 0.6323, "step": 16026 }, { "epoch": 0.8236714975845411, "grad_norm": 1.0692873001098633, "learning_rate": 7.934460272925698e-07, "loss": 0.6977, "step": 16027 }, { "epoch": 0.8237228903278857, "grad_norm": 0.9259945750236511, "learning_rate": 7.929962092117205e-07, "loss": 0.6605, "step": 16028 }, { "epoch": 0.8237742830712304, "grad_norm": 1.094130516052246, "learning_rate": 7.925465076895844e-07, "loss": 0.677, "step": 16029 }, { "epoch": 0.823825675814575, "grad_norm": 1.062848687171936, "learning_rate": 7.920969227386216e-07, "loss": 0.7362, "step": 16030 }, { "epoch": 0.8238770685579196, "grad_norm": 1.1261876821517944, "learning_rate": 7.916474543712871e-07, "loss": 0.7126, "step": 16031 }, { "epoch": 0.8239284613012643, "grad_norm": 1.08109450340271, "learning_rate": 7.91198102600036e-07, "loss": 0.7026, "step": 16032 }, { "epoch": 0.8239798540446089, "grad_norm": 1.1750117540359497, "learning_rate": 7.907488674373165e-07, "loss": 0.6998, "step": 16033 }, { "epoch": 0.8240312467879536, "grad_norm": 1.0774067640304565, "learning_rate": 7.902997488955755e-07, "loss": 0.6957, "step": 16034 }, { "epoch": 0.8240826395312982, "grad_norm": 1.1907278299331665, "learning_rate": 7.898507469872546e-07, "loss": 0.68, "step": 16035 }, { "epoch": 0.8241340322746428, "grad_norm": 1.1572355031967163, "learning_rate": 7.894018617247968e-07, "loss": 0.6576, "step": 16036 }, { "epoch": 0.8241854250179874, "grad_norm": 1.0717113018035889, "learning_rate": 7.889530931206368e-07, "loss": 0.6763, "step": 16037 }, { "epoch": 0.8242368177613321, "grad_norm": 1.1185137033462524, "learning_rate": 7.88504441187209e-07, "loss": 0.6822, "step": 16038 }, { "epoch": 0.8242882105046767, "grad_norm": 1.1623841524124146, "learning_rate": 7.880559059369425e-07, "loss": 0.7189, "step": 16039 }, { "epoch": 0.8243396032480214, "grad_norm": 1.1048818826675415, "learning_rate": 7.876074873822659e-07, "loss": 0.6854, "step": 16040 }, { "epoch": 0.824390995991366, "grad_norm": 1.0829689502716064, "learning_rate": 7.87159185535602e-07, "loss": 0.7216, "step": 16041 }, { "epoch": 0.8244423887347107, "grad_norm": 1.2525339126586914, "learning_rate": 7.867110004093708e-07, "loss": 0.6481, "step": 16042 }, { "epoch": 0.8244937814780553, "grad_norm": 1.1360790729522705, "learning_rate": 7.862629320159931e-07, "loss": 0.685, "step": 16043 }, { "epoch": 0.8245451742214, "grad_norm": 1.1168122291564941, "learning_rate": 7.858149803678782e-07, "loss": 0.686, "step": 16044 }, { "epoch": 0.8245965669647446, "grad_norm": 1.1179600954055786, "learning_rate": 7.853671454774404e-07, "loss": 0.7085, "step": 16045 }, { "epoch": 0.8246479597080892, "grad_norm": 1.1545031070709229, "learning_rate": 7.84919427357086e-07, "loss": 0.7036, "step": 16046 }, { "epoch": 0.8246993524514339, "grad_norm": 0.673156201839447, "learning_rate": 7.844718260192196e-07, "loss": 0.6171, "step": 16047 }, { "epoch": 0.8247507451947785, "grad_norm": 1.1194005012512207, "learning_rate": 7.840243414762417e-07, "loss": 0.7191, "step": 16048 }, { "epoch": 0.8248021379381232, "grad_norm": 1.0752993822097778, "learning_rate": 7.835769737405518e-07, "loss": 0.7062, "step": 16049 }, { "epoch": 0.8248535306814678, "grad_norm": 1.0822778940200806, "learning_rate": 7.831297228245443e-07, "loss": 0.7466, "step": 16050 }, { "epoch": 0.8249049234248124, "grad_norm": 1.1792621612548828, "learning_rate": 7.826825887406086e-07, "loss": 0.694, "step": 16051 }, { "epoch": 0.824956316168157, "grad_norm": 0.7651743292808533, "learning_rate": 7.822355715011354e-07, "loss": 0.6672, "step": 16052 }, { "epoch": 0.8250077089115017, "grad_norm": 1.0082892179489136, "learning_rate": 7.817886711185091e-07, "loss": 0.6501, "step": 16053 }, { "epoch": 0.8250591016548463, "grad_norm": 1.137579083442688, "learning_rate": 7.813418876051115e-07, "loss": 0.7425, "step": 16054 }, { "epoch": 0.825110494398191, "grad_norm": 0.7235168218612671, "learning_rate": 7.808952209733195e-07, "loss": 0.6594, "step": 16055 }, { "epoch": 0.8251618871415356, "grad_norm": 1.1250555515289307, "learning_rate": 7.804486712355119e-07, "loss": 0.7151, "step": 16056 }, { "epoch": 0.8252132798848802, "grad_norm": 1.1263036727905273, "learning_rate": 7.800022384040562e-07, "loss": 0.7208, "step": 16057 }, { "epoch": 0.8252646726282249, "grad_norm": 1.0906765460968018, "learning_rate": 7.795559224913252e-07, "loss": 0.732, "step": 16058 }, { "epoch": 0.8253160653715695, "grad_norm": 1.1602904796600342, "learning_rate": 7.791097235096823e-07, "loss": 0.7041, "step": 16059 }, { "epoch": 0.8253674581149142, "grad_norm": 1.016738772392273, "learning_rate": 7.786636414714893e-07, "loss": 0.6601, "step": 16060 }, { "epoch": 0.8254188508582588, "grad_norm": 1.0749131441116333, "learning_rate": 7.782176763891075e-07, "loss": 0.7016, "step": 16061 }, { "epoch": 0.8254702436016035, "grad_norm": 1.0796241760253906, "learning_rate": 7.777718282748919e-07, "loss": 0.6833, "step": 16062 }, { "epoch": 0.8255216363449481, "grad_norm": 0.7335962057113647, "learning_rate": 7.773260971411944e-07, "loss": 0.647, "step": 16063 }, { "epoch": 0.8255730290882928, "grad_norm": 1.0635814666748047, "learning_rate": 7.76880483000364e-07, "loss": 0.6934, "step": 16064 }, { "epoch": 0.8256244218316374, "grad_norm": 1.1309388875961304, "learning_rate": 7.764349858647496e-07, "loss": 0.7013, "step": 16065 }, { "epoch": 0.825675814574982, "grad_norm": 0.7266185879707336, "learning_rate": 7.759896057466904e-07, "loss": 0.6527, "step": 16066 }, { "epoch": 0.8257272073183266, "grad_norm": 1.14070725440979, "learning_rate": 7.755443426585286e-07, "loss": 0.6868, "step": 16067 }, { "epoch": 0.8257786000616713, "grad_norm": 1.0829790830612183, "learning_rate": 7.750991966125987e-07, "loss": 0.687, "step": 16068 }, { "epoch": 0.8258299928050159, "grad_norm": 0.8859814405441284, "learning_rate": 7.746541676212355e-07, "loss": 0.6498, "step": 16069 }, { "epoch": 0.8258813855483605, "grad_norm": 1.080967664718628, "learning_rate": 7.742092556967689e-07, "loss": 0.6482, "step": 16070 }, { "epoch": 0.8259327782917052, "grad_norm": 1.0481303930282593, "learning_rate": 7.737644608515238e-07, "loss": 0.7033, "step": 16071 }, { "epoch": 0.8259841710350498, "grad_norm": 1.061299443244934, "learning_rate": 7.733197830978273e-07, "loss": 0.6405, "step": 16072 }, { "epoch": 0.8260355637783945, "grad_norm": 1.4374608993530273, "learning_rate": 7.728752224479946e-07, "loss": 0.6839, "step": 16073 }, { "epoch": 0.8260869565217391, "grad_norm": 1.106293797492981, "learning_rate": 7.724307789143465e-07, "loss": 0.6805, "step": 16074 }, { "epoch": 0.8261383492650838, "grad_norm": 1.0680798292160034, "learning_rate": 7.719864525091952e-07, "loss": 0.7044, "step": 16075 }, { "epoch": 0.8261897420084284, "grad_norm": 1.1610217094421387, "learning_rate": 7.71542243244851e-07, "loss": 0.7416, "step": 16076 }, { "epoch": 0.8262411347517731, "grad_norm": 1.1235777139663696, "learning_rate": 7.710981511336207e-07, "loss": 0.6604, "step": 16077 }, { "epoch": 0.8262925274951177, "grad_norm": 1.005269169807434, "learning_rate": 7.706541761878101e-07, "loss": 0.6551, "step": 16078 }, { "epoch": 0.8263439202384624, "grad_norm": 0.7603880763053894, "learning_rate": 7.70210318419719e-07, "loss": 0.633, "step": 16079 }, { "epoch": 0.826395312981807, "grad_norm": 1.0602203607559204, "learning_rate": 7.697665778416441e-07, "loss": 0.678, "step": 16080 }, { "epoch": 0.8264467057251516, "grad_norm": 0.7427306175231934, "learning_rate": 7.693229544658798e-07, "loss": 0.6284, "step": 16081 }, { "epoch": 0.8264980984684962, "grad_norm": 1.0884521007537842, "learning_rate": 7.688794483047179e-07, "loss": 0.7479, "step": 16082 }, { "epoch": 0.8265494912118408, "grad_norm": 0.9648956656455994, "learning_rate": 7.68436059370446e-07, "loss": 0.6634, "step": 16083 }, { "epoch": 0.8266008839551855, "grad_norm": 1.006766676902771, "learning_rate": 7.679927876753468e-07, "loss": 0.633, "step": 16084 }, { "epoch": 0.8266522766985301, "grad_norm": 1.0697578191757202, "learning_rate": 7.675496332317057e-07, "loss": 0.7172, "step": 16085 }, { "epoch": 0.8267036694418748, "grad_norm": 1.0052945613861084, "learning_rate": 7.671065960517954e-07, "loss": 0.6364, "step": 16086 }, { "epoch": 0.8267550621852194, "grad_norm": 1.4203604459762573, "learning_rate": 7.666636761478947e-07, "loss": 0.6864, "step": 16087 }, { "epoch": 0.8268064549285641, "grad_norm": 1.027005910873413, "learning_rate": 7.662208735322735e-07, "loss": 0.7022, "step": 16088 }, { "epoch": 0.8268578476719087, "grad_norm": 1.0732640027999878, "learning_rate": 7.657781882172e-07, "loss": 0.6407, "step": 16089 }, { "epoch": 0.8269092404152534, "grad_norm": 1.0870028734207153, "learning_rate": 7.653356202149381e-07, "loss": 0.7223, "step": 16090 }, { "epoch": 0.826960633158598, "grad_norm": 1.1163480281829834, "learning_rate": 7.648931695377521e-07, "loss": 0.6542, "step": 16091 }, { "epoch": 0.8270120259019427, "grad_norm": 1.1286154985427856, "learning_rate": 7.644508361978987e-07, "loss": 0.7438, "step": 16092 }, { "epoch": 0.8270634186452873, "grad_norm": 1.0654898881912231, "learning_rate": 7.640086202076325e-07, "loss": 0.7066, "step": 16093 }, { "epoch": 0.827114811388632, "grad_norm": 1.0906801223754883, "learning_rate": 7.635665215792093e-07, "loss": 0.7085, "step": 16094 }, { "epoch": 0.8271662041319766, "grad_norm": 1.1705752611160278, "learning_rate": 7.631245403248722e-07, "loss": 0.7286, "step": 16095 }, { "epoch": 0.8272175968753213, "grad_norm": 1.196956753730774, "learning_rate": 7.626826764568712e-07, "loss": 0.6975, "step": 16096 }, { "epoch": 0.8272689896186658, "grad_norm": 1.1648024320602417, "learning_rate": 7.622409299874451e-07, "loss": 0.6395, "step": 16097 }, { "epoch": 0.8273203823620104, "grad_norm": 1.0711920261383057, "learning_rate": 7.617993009288371e-07, "loss": 0.6844, "step": 16098 }, { "epoch": 0.8273717751053551, "grad_norm": 1.1348947286605835, "learning_rate": 7.613577892932783e-07, "loss": 0.7215, "step": 16099 }, { "epoch": 0.8274231678486997, "grad_norm": 1.0739679336547852, "learning_rate": 7.609163950930048e-07, "loss": 0.6815, "step": 16100 }, { "epoch": 0.8274745605920444, "grad_norm": 1.1137958765029907, "learning_rate": 7.604751183402437e-07, "loss": 0.6929, "step": 16101 }, { "epoch": 0.827525953335389, "grad_norm": 1.099880337715149, "learning_rate": 7.600339590472211e-07, "loss": 0.7106, "step": 16102 }, { "epoch": 0.8275773460787337, "grad_norm": 1.077462911605835, "learning_rate": 7.595929172261607e-07, "loss": 0.6746, "step": 16103 }, { "epoch": 0.8276287388220783, "grad_norm": 1.2197293043136597, "learning_rate": 7.591519928892816e-07, "loss": 0.6516, "step": 16104 }, { "epoch": 0.827680131565423, "grad_norm": 1.0328541994094849, "learning_rate": 7.587111860488e-07, "loss": 0.6424, "step": 16105 }, { "epoch": 0.8277315243087676, "grad_norm": 1.0963376760482788, "learning_rate": 7.582704967169274e-07, "loss": 0.689, "step": 16106 }, { "epoch": 0.8277829170521123, "grad_norm": 1.0226638317108154, "learning_rate": 7.578299249058774e-07, "loss": 0.6684, "step": 16107 }, { "epoch": 0.8278343097954569, "grad_norm": 1.1614270210266113, "learning_rate": 7.573894706278512e-07, "loss": 0.7209, "step": 16108 }, { "epoch": 0.8278857025388016, "grad_norm": 1.091646432876587, "learning_rate": 7.569491338950557e-07, "loss": 0.7003, "step": 16109 }, { "epoch": 0.8279370952821462, "grad_norm": 1.0209165811538696, "learning_rate": 7.565089147196897e-07, "loss": 0.6775, "step": 16110 }, { "epoch": 0.8279884880254909, "grad_norm": 1.0242887735366821, "learning_rate": 7.560688131139482e-07, "loss": 0.6827, "step": 16111 }, { "epoch": 0.8280398807688354, "grad_norm": 1.1423033475875854, "learning_rate": 7.556288290900283e-07, "loss": 0.696, "step": 16112 }, { "epoch": 0.82809127351218, "grad_norm": 1.103794813156128, "learning_rate": 7.551889626601161e-07, "loss": 0.6038, "step": 16113 }, { "epoch": 0.8281426662555247, "grad_norm": 1.0342439413070679, "learning_rate": 7.547492138364032e-07, "loss": 0.6382, "step": 16114 }, { "epoch": 0.8281940589988693, "grad_norm": 1.0883582830429077, "learning_rate": 7.543095826310676e-07, "loss": 0.6889, "step": 16115 }, { "epoch": 0.828245451742214, "grad_norm": 1.073947787284851, "learning_rate": 7.538700690562945e-07, "loss": 0.6807, "step": 16116 }, { "epoch": 0.8282968444855586, "grad_norm": 0.7490939497947693, "learning_rate": 7.534306731242585e-07, "loss": 0.6208, "step": 16117 }, { "epoch": 0.8283482372289033, "grad_norm": 0.8126642107963562, "learning_rate": 7.52991394847134e-07, "loss": 0.6487, "step": 16118 }, { "epoch": 0.8283996299722479, "grad_norm": 1.0209972858428955, "learning_rate": 7.525522342370906e-07, "loss": 0.6973, "step": 16119 }, { "epoch": 0.8284510227155926, "grad_norm": 0.8093645572662354, "learning_rate": 7.521131913062979e-07, "loss": 0.6163, "step": 16120 }, { "epoch": 0.8285024154589372, "grad_norm": 1.1898071765899658, "learning_rate": 7.516742660669185e-07, "loss": 0.6904, "step": 16121 }, { "epoch": 0.8285538082022819, "grad_norm": 1.0881743431091309, "learning_rate": 7.512354585311121e-07, "loss": 0.707, "step": 16122 }, { "epoch": 0.8286052009456265, "grad_norm": 1.1162421703338623, "learning_rate": 7.507967687110401e-07, "loss": 0.7229, "step": 16123 }, { "epoch": 0.8286565936889712, "grad_norm": 1.0954328775405884, "learning_rate": 7.503581966188517e-07, "loss": 0.6772, "step": 16124 }, { "epoch": 0.8287079864323158, "grad_norm": 1.0674210786819458, "learning_rate": 7.499197422667015e-07, "loss": 0.6896, "step": 16125 }, { "epoch": 0.8287593791756604, "grad_norm": 0.7597302198410034, "learning_rate": 7.494814056667366e-07, "loss": 0.6557, "step": 16126 }, { "epoch": 0.828810771919005, "grad_norm": 1.0989633798599243, "learning_rate": 7.490431868311005e-07, "loss": 0.6646, "step": 16127 }, { "epoch": 0.8288621646623496, "grad_norm": 1.0887296199798584, "learning_rate": 7.486050857719346e-07, "loss": 0.667, "step": 16128 }, { "epoch": 0.8289135574056943, "grad_norm": 1.1052231788635254, "learning_rate": 7.481671025013776e-07, "loss": 0.6669, "step": 16129 }, { "epoch": 0.8289649501490389, "grad_norm": 1.1002193689346313, "learning_rate": 7.477292370315647e-07, "loss": 0.7092, "step": 16130 }, { "epoch": 0.8290163428923836, "grad_norm": 1.110308289527893, "learning_rate": 7.472914893746263e-07, "loss": 0.692, "step": 16131 }, { "epoch": 0.8290677356357282, "grad_norm": 1.0620559453964233, "learning_rate": 7.468538595426894e-07, "loss": 0.7053, "step": 16132 }, { "epoch": 0.8291191283790729, "grad_norm": 0.6852643489837646, "learning_rate": 7.464163475478819e-07, "loss": 0.6557, "step": 16133 }, { "epoch": 0.8291705211224175, "grad_norm": 0.7366395592689514, "learning_rate": 7.459789534023237e-07, "loss": 0.6525, "step": 16134 }, { "epoch": 0.8292219138657622, "grad_norm": 1.0354663133621216, "learning_rate": 7.455416771181323e-07, "loss": 0.6428, "step": 16135 }, { "epoch": 0.8292733066091068, "grad_norm": 1.0779176950454712, "learning_rate": 7.451045187074263e-07, "loss": 0.6637, "step": 16136 }, { "epoch": 0.8293246993524515, "grad_norm": 0.8375317454338074, "learning_rate": 7.446674781823126e-07, "loss": 0.677, "step": 16137 }, { "epoch": 0.8293760920957961, "grad_norm": 1.077837586402893, "learning_rate": 7.442305555549034e-07, "loss": 0.6783, "step": 16138 }, { "epoch": 0.8294274848391407, "grad_norm": 1.0832537412643433, "learning_rate": 7.437937508373034e-07, "loss": 0.6302, "step": 16139 }, { "epoch": 0.8294788775824854, "grad_norm": 1.1278173923492432, "learning_rate": 7.433570640416144e-07, "loss": 0.7134, "step": 16140 }, { "epoch": 0.82953027032583, "grad_norm": 1.041581392288208, "learning_rate": 7.429204951799334e-07, "loss": 0.6818, "step": 16141 }, { "epoch": 0.8295816630691746, "grad_norm": 1.1409856081008911, "learning_rate": 7.424840442643588e-07, "loss": 0.6551, "step": 16142 }, { "epoch": 0.8296330558125192, "grad_norm": 1.043656587600708, "learning_rate": 7.420477113069818e-07, "loss": 0.7066, "step": 16143 }, { "epoch": 0.8296844485558639, "grad_norm": 1.0693556070327759, "learning_rate": 7.416114963198895e-07, "loss": 0.6849, "step": 16144 }, { "epoch": 0.8297358412992085, "grad_norm": 1.0682543516159058, "learning_rate": 7.41175399315171e-07, "loss": 0.7332, "step": 16145 }, { "epoch": 0.8297872340425532, "grad_norm": 0.7439308762550354, "learning_rate": 7.407394203049068e-07, "loss": 0.6183, "step": 16146 }, { "epoch": 0.8298386267858978, "grad_norm": 1.0198359489440918, "learning_rate": 7.403035593011765e-07, "loss": 0.7305, "step": 16147 }, { "epoch": 0.8298900195292425, "grad_norm": 1.1578389406204224, "learning_rate": 7.398678163160549e-07, "loss": 0.7185, "step": 16148 }, { "epoch": 0.8299414122725871, "grad_norm": 1.1866463422775269, "learning_rate": 7.394321913616176e-07, "loss": 0.7673, "step": 16149 }, { "epoch": 0.8299928050159318, "grad_norm": 1.1783944368362427, "learning_rate": 7.389966844499297e-07, "loss": 0.6968, "step": 16150 }, { "epoch": 0.8300441977592764, "grad_norm": 1.2017390727996826, "learning_rate": 7.385612955930605e-07, "loss": 0.7328, "step": 16151 }, { "epoch": 0.830095590502621, "grad_norm": 0.6819837093353271, "learning_rate": 7.38126024803072e-07, "loss": 0.6178, "step": 16152 }, { "epoch": 0.8301469832459657, "grad_norm": 1.1740351915359497, "learning_rate": 7.376908720920228e-07, "loss": 0.7542, "step": 16153 }, { "epoch": 0.8301983759893103, "grad_norm": 1.1745269298553467, "learning_rate": 7.372558374719707e-07, "loss": 0.7291, "step": 16154 }, { "epoch": 0.830249768732655, "grad_norm": 0.746801495552063, "learning_rate": 7.368209209549682e-07, "loss": 0.6578, "step": 16155 }, { "epoch": 0.8303011614759996, "grad_norm": 1.080525517463684, "learning_rate": 7.36386122553065e-07, "loss": 0.6715, "step": 16156 }, { "epoch": 0.8303525542193442, "grad_norm": 1.1389060020446777, "learning_rate": 7.35951442278306e-07, "loss": 0.7078, "step": 16157 }, { "epoch": 0.8304039469626888, "grad_norm": 1.033244013786316, "learning_rate": 7.355168801427387e-07, "loss": 0.6639, "step": 16158 }, { "epoch": 0.8304553397060335, "grad_norm": 1.1089292764663696, "learning_rate": 7.350824361583975e-07, "loss": 0.7096, "step": 16159 }, { "epoch": 0.8305067324493781, "grad_norm": 1.0920753479003906, "learning_rate": 7.346481103373227e-07, "loss": 0.7368, "step": 16160 }, { "epoch": 0.8305581251927228, "grad_norm": 1.0647251605987549, "learning_rate": 7.342139026915457e-07, "loss": 0.7333, "step": 16161 }, { "epoch": 0.8306095179360674, "grad_norm": 0.9290080070495605, "learning_rate": 7.337798132330992e-07, "loss": 0.65, "step": 16162 }, { "epoch": 0.8306609106794121, "grad_norm": 0.6671953797340393, "learning_rate": 7.33345841974008e-07, "loss": 0.635, "step": 16163 }, { "epoch": 0.8307123034227567, "grad_norm": 1.1195402145385742, "learning_rate": 7.329119889262948e-07, "loss": 0.7318, "step": 16164 }, { "epoch": 0.8307636961661014, "grad_norm": 1.1184241771697998, "learning_rate": 7.324782541019837e-07, "loss": 0.6835, "step": 16165 }, { "epoch": 0.830815088909446, "grad_norm": 1.0903064012527466, "learning_rate": 7.320446375130869e-07, "loss": 0.6564, "step": 16166 }, { "epoch": 0.8308664816527906, "grad_norm": 1.032010555267334, "learning_rate": 7.316111391716213e-07, "loss": 0.7193, "step": 16167 }, { "epoch": 0.8309178743961353, "grad_norm": 1.075393795967102, "learning_rate": 7.311777590895963e-07, "loss": 0.6545, "step": 16168 }, { "epoch": 0.8309692671394799, "grad_norm": 1.101967453956604, "learning_rate": 7.307444972790195e-07, "loss": 0.7002, "step": 16169 }, { "epoch": 0.8310206598828246, "grad_norm": 0.7552602887153625, "learning_rate": 7.30311353751893e-07, "loss": 0.6425, "step": 16170 }, { "epoch": 0.8310720526261692, "grad_norm": 1.045198678970337, "learning_rate": 7.298783285202205e-07, "loss": 0.7022, "step": 16171 }, { "epoch": 0.8311234453695138, "grad_norm": 1.169352650642395, "learning_rate": 7.294454215959979e-07, "loss": 0.6902, "step": 16172 }, { "epoch": 0.8311748381128584, "grad_norm": 1.1398063898086548, "learning_rate": 7.290126329912172e-07, "loss": 0.7111, "step": 16173 }, { "epoch": 0.8312262308562031, "grad_norm": 1.03053879737854, "learning_rate": 7.285799627178741e-07, "loss": 0.6814, "step": 16174 }, { "epoch": 0.8312776235995477, "grad_norm": 0.7771539688110352, "learning_rate": 7.28147410787951e-07, "loss": 0.6455, "step": 16175 }, { "epoch": 0.8313290163428924, "grad_norm": 1.0525847673416138, "learning_rate": 7.277149772134346e-07, "loss": 0.6832, "step": 16176 }, { "epoch": 0.831380409086237, "grad_norm": 1.0635712146759033, "learning_rate": 7.27282662006305e-07, "loss": 0.7151, "step": 16177 }, { "epoch": 0.8314318018295817, "grad_norm": 1.0379291772842407, "learning_rate": 7.268504651785424e-07, "loss": 0.6594, "step": 16178 }, { "epoch": 0.8314831945729263, "grad_norm": 1.095997929573059, "learning_rate": 7.26418386742117e-07, "loss": 0.721, "step": 16179 }, { "epoch": 0.831534587316271, "grad_norm": 0.6822413802146912, "learning_rate": 7.259864267090033e-07, "loss": 0.6756, "step": 16180 }, { "epoch": 0.8315859800596156, "grad_norm": 1.026462197303772, "learning_rate": 7.255545850911677e-07, "loss": 0.6399, "step": 16181 }, { "epoch": 0.8316373728029602, "grad_norm": 1.073300838470459, "learning_rate": 7.251228619005751e-07, "loss": 0.6341, "step": 16182 }, { "epoch": 0.8316887655463049, "grad_norm": 0.6554083824157715, "learning_rate": 7.246912571491854e-07, "loss": 0.6503, "step": 16183 }, { "epoch": 0.8317401582896495, "grad_norm": 1.1766184568405151, "learning_rate": 7.242597708489585e-07, "loss": 0.6874, "step": 16184 }, { "epoch": 0.8317915510329942, "grad_norm": 1.0305601358413696, "learning_rate": 7.238284030118492e-07, "loss": 0.6669, "step": 16185 }, { "epoch": 0.8318429437763388, "grad_norm": 1.1265326738357544, "learning_rate": 7.233971536498064e-07, "loss": 0.6988, "step": 16186 }, { "epoch": 0.8318943365196835, "grad_norm": 1.0956284999847412, "learning_rate": 7.229660227747825e-07, "loss": 0.7151, "step": 16187 }, { "epoch": 0.831945729263028, "grad_norm": 0.8320324420928955, "learning_rate": 7.225350103987178e-07, "loss": 0.608, "step": 16188 }, { "epoch": 0.8319971220063727, "grad_norm": 1.2274229526519775, "learning_rate": 7.221041165335568e-07, "loss": 0.6854, "step": 16189 }, { "epoch": 0.8320485147497173, "grad_norm": 1.117397427558899, "learning_rate": 7.216733411912369e-07, "loss": 0.707, "step": 16190 }, { "epoch": 0.832099907493062, "grad_norm": 1.0491365194320679, "learning_rate": 7.212426843836928e-07, "loss": 0.6797, "step": 16191 }, { "epoch": 0.8321513002364066, "grad_norm": 0.6913713216781616, "learning_rate": 7.208121461228556e-07, "loss": 0.6848, "step": 16192 }, { "epoch": 0.8322026929797512, "grad_norm": 1.117197871208191, "learning_rate": 7.203817264206558e-07, "loss": 0.7322, "step": 16193 }, { "epoch": 0.8322540857230959, "grad_norm": 0.727449893951416, "learning_rate": 7.199514252890178e-07, "loss": 0.6134, "step": 16194 }, { "epoch": 0.8323054784664405, "grad_norm": 1.123633861541748, "learning_rate": 7.195212427398618e-07, "loss": 0.7045, "step": 16195 }, { "epoch": 0.8323568712097852, "grad_norm": 1.0169224739074707, "learning_rate": 7.190911787851085e-07, "loss": 0.6806, "step": 16196 }, { "epoch": 0.8324082639531298, "grad_norm": 0.6878763437271118, "learning_rate": 7.186612334366727e-07, "loss": 0.6304, "step": 16197 }, { "epoch": 0.8324596566964745, "grad_norm": 1.0516421794891357, "learning_rate": 7.182314067064656e-07, "loss": 0.7084, "step": 16198 }, { "epoch": 0.8325110494398191, "grad_norm": 0.6842736601829529, "learning_rate": 7.178016986063957e-07, "loss": 0.6295, "step": 16199 }, { "epoch": 0.8325624421831638, "grad_norm": 1.0728563070297241, "learning_rate": 7.173721091483715e-07, "loss": 0.6564, "step": 16200 }, { "epoch": 0.8326138349265084, "grad_norm": 1.0831098556518555, "learning_rate": 7.169426383442901e-07, "loss": 0.7153, "step": 16201 }, { "epoch": 0.8326652276698531, "grad_norm": 1.1927567720413208, "learning_rate": 7.165132862060542e-07, "loss": 0.7333, "step": 16202 }, { "epoch": 0.8327166204131976, "grad_norm": 1.0496019124984741, "learning_rate": 7.160840527455587e-07, "loss": 0.6874, "step": 16203 }, { "epoch": 0.8327680131565423, "grad_norm": 1.063751459121704, "learning_rate": 7.156549379746941e-07, "loss": 0.6742, "step": 16204 }, { "epoch": 0.8328194058998869, "grad_norm": 0.6623212099075317, "learning_rate": 7.152259419053514e-07, "loss": 0.6529, "step": 16205 }, { "epoch": 0.8328707986432315, "grad_norm": 1.016613483428955, "learning_rate": 7.147970645494151e-07, "loss": 0.6868, "step": 16206 }, { "epoch": 0.8329221913865762, "grad_norm": 1.121164083480835, "learning_rate": 7.143683059187701e-07, "loss": 0.6552, "step": 16207 }, { "epoch": 0.8329735841299208, "grad_norm": 0.7065137028694153, "learning_rate": 7.139396660252917e-07, "loss": 0.6633, "step": 16208 }, { "epoch": 0.8330249768732655, "grad_norm": 1.082448959350586, "learning_rate": 7.135111448808585e-07, "loss": 0.6849, "step": 16209 }, { "epoch": 0.8330763696166101, "grad_norm": 1.1236902475357056, "learning_rate": 7.130827424973419e-07, "loss": 0.7513, "step": 16210 }, { "epoch": 0.8331277623599548, "grad_norm": 1.0635037422180176, "learning_rate": 7.126544588866119e-07, "loss": 0.7184, "step": 16211 }, { "epoch": 0.8331791551032994, "grad_norm": 0.6885570883750916, "learning_rate": 7.122262940605324e-07, "loss": 0.6387, "step": 16212 }, { "epoch": 0.8332305478466441, "grad_norm": 1.0762639045715332, "learning_rate": 7.117982480309693e-07, "loss": 0.6728, "step": 16213 }, { "epoch": 0.8332819405899887, "grad_norm": 1.1587618589401245, "learning_rate": 7.1137032080978e-07, "loss": 0.6812, "step": 16214 }, { "epoch": 0.8333333333333334, "grad_norm": 1.0824652910232544, "learning_rate": 7.109425124088204e-07, "loss": 0.6974, "step": 16215 }, { "epoch": 0.833384726076678, "grad_norm": 1.0130155086517334, "learning_rate": 7.105148228399455e-07, "loss": 0.6715, "step": 16216 }, { "epoch": 0.8334361188200227, "grad_norm": 1.1074610948562622, "learning_rate": 7.100872521150009e-07, "loss": 0.6995, "step": 16217 }, { "epoch": 0.8334875115633672, "grad_norm": 1.0440499782562256, "learning_rate": 7.096598002458372e-07, "loss": 0.6908, "step": 16218 }, { "epoch": 0.8335389043067118, "grad_norm": 1.0427745580673218, "learning_rate": 7.092324672442941e-07, "loss": 0.6806, "step": 16219 }, { "epoch": 0.8335902970500565, "grad_norm": 1.0647588968276978, "learning_rate": 7.088052531222134e-07, "loss": 0.6443, "step": 16220 }, { "epoch": 0.8336416897934011, "grad_norm": 1.069153904914856, "learning_rate": 7.083781578914284e-07, "loss": 0.6948, "step": 16221 }, { "epoch": 0.8336930825367458, "grad_norm": 1.1501058340072632, "learning_rate": 7.079511815637757e-07, "loss": 0.7796, "step": 16222 }, { "epoch": 0.8337444752800904, "grad_norm": 0.7508118748664856, "learning_rate": 7.075243241510837e-07, "loss": 0.6778, "step": 16223 }, { "epoch": 0.8337958680234351, "grad_norm": 1.1611324548721313, "learning_rate": 7.070975856651774e-07, "loss": 0.7381, "step": 16224 }, { "epoch": 0.8338472607667797, "grad_norm": 1.108817219734192, "learning_rate": 7.066709661178822e-07, "loss": 0.6526, "step": 16225 }, { "epoch": 0.8338986535101244, "grad_norm": 1.1026725769042969, "learning_rate": 7.062444655210171e-07, "loss": 0.7125, "step": 16226 }, { "epoch": 0.833950046253469, "grad_norm": 1.0851575136184692, "learning_rate": 7.058180838863987e-07, "loss": 0.7321, "step": 16227 }, { "epoch": 0.8340014389968137, "grad_norm": 1.0817135572433472, "learning_rate": 7.053918212258387e-07, "loss": 0.7122, "step": 16228 }, { "epoch": 0.8340528317401583, "grad_norm": 1.1189020872116089, "learning_rate": 7.049656775511509e-07, "loss": 0.6908, "step": 16229 }, { "epoch": 0.834104224483503, "grad_norm": 0.6704785227775574, "learning_rate": 7.045396528741377e-07, "loss": 0.6625, "step": 16230 }, { "epoch": 0.8341556172268476, "grad_norm": 1.001562237739563, "learning_rate": 7.041137472066051e-07, "loss": 0.6339, "step": 16231 }, { "epoch": 0.8342070099701923, "grad_norm": 1.1632343530654907, "learning_rate": 7.03687960560353e-07, "loss": 0.678, "step": 16232 }, { "epoch": 0.8342584027135368, "grad_norm": 1.0946893692016602, "learning_rate": 7.032622929471771e-07, "loss": 0.706, "step": 16233 }, { "epoch": 0.8343097954568814, "grad_norm": 1.1696473360061646, "learning_rate": 7.028367443788708e-07, "loss": 0.6746, "step": 16234 }, { "epoch": 0.8343611882002261, "grad_norm": 0.8556324243545532, "learning_rate": 7.024113148672257e-07, "loss": 0.6312, "step": 16235 }, { "epoch": 0.8344125809435707, "grad_norm": 0.7275747060775757, "learning_rate": 7.019860044240285e-07, "loss": 0.6215, "step": 16236 }, { "epoch": 0.8344639736869154, "grad_norm": 1.0374876260757446, "learning_rate": 7.015608130610607e-07, "loss": 0.684, "step": 16237 }, { "epoch": 0.83451536643026, "grad_norm": 1.1674046516418457, "learning_rate": 7.011357407901053e-07, "loss": 0.6725, "step": 16238 }, { "epoch": 0.8345667591736047, "grad_norm": 1.089556336402893, "learning_rate": 7.007107876229385e-07, "loss": 0.7047, "step": 16239 }, { "epoch": 0.8346181519169493, "grad_norm": 1.1114647388458252, "learning_rate": 7.00285953571333e-07, "loss": 0.6829, "step": 16240 }, { "epoch": 0.834669544660294, "grad_norm": 1.0776846408843994, "learning_rate": 6.998612386470593e-07, "loss": 0.7058, "step": 16241 }, { "epoch": 0.8347209374036386, "grad_norm": 1.0587656497955322, "learning_rate": 6.99436642861887e-07, "loss": 0.6502, "step": 16242 }, { "epoch": 0.8347723301469833, "grad_norm": 1.1687204837799072, "learning_rate": 6.990121662275761e-07, "loss": 0.6645, "step": 16243 }, { "epoch": 0.8348237228903279, "grad_norm": 0.7828230261802673, "learning_rate": 6.985878087558894e-07, "loss": 0.6551, "step": 16244 }, { "epoch": 0.8348751156336726, "grad_norm": 0.6792510747909546, "learning_rate": 6.981635704585843e-07, "loss": 0.6532, "step": 16245 }, { "epoch": 0.8349265083770172, "grad_norm": 1.1054781675338745, "learning_rate": 6.977394513474129e-07, "loss": 0.703, "step": 16246 }, { "epoch": 0.8349779011203619, "grad_norm": 0.7244647145271301, "learning_rate": 6.973154514341279e-07, "loss": 0.6849, "step": 16247 }, { "epoch": 0.8350292938637064, "grad_norm": 1.0690271854400635, "learning_rate": 6.968915707304751e-07, "loss": 0.6911, "step": 16248 }, { "epoch": 0.835080686607051, "grad_norm": 1.1193323135375977, "learning_rate": 6.964678092481996e-07, "loss": 0.6743, "step": 16249 }, { "epoch": 0.8351320793503957, "grad_norm": 1.0752524137496948, "learning_rate": 6.960441669990403e-07, "loss": 0.6596, "step": 16250 }, { "epoch": 0.8351834720937403, "grad_norm": 1.090964674949646, "learning_rate": 6.956206439947377e-07, "loss": 0.6928, "step": 16251 }, { "epoch": 0.835234864837085, "grad_norm": 1.9304898977279663, "learning_rate": 6.95197240247022e-07, "loss": 0.7414, "step": 16252 }, { "epoch": 0.8352862575804296, "grad_norm": 1.0531097650527954, "learning_rate": 6.94773955767627e-07, "loss": 0.669, "step": 16253 }, { "epoch": 0.8353376503237743, "grad_norm": 1.1478848457336426, "learning_rate": 6.943507905682772e-07, "loss": 0.7156, "step": 16254 }, { "epoch": 0.8353890430671189, "grad_norm": 1.0878046751022339, "learning_rate": 6.939277446607007e-07, "loss": 0.6616, "step": 16255 }, { "epoch": 0.8354404358104636, "grad_norm": 1.080569863319397, "learning_rate": 6.935048180566162e-07, "loss": 0.6983, "step": 16256 }, { "epoch": 0.8354918285538082, "grad_norm": 0.7603037357330322, "learning_rate": 6.930820107677394e-07, "loss": 0.6153, "step": 16257 }, { "epoch": 0.8355432212971529, "grad_norm": 1.0902410745620728, "learning_rate": 6.926593228057893e-07, "loss": 0.6853, "step": 16258 }, { "epoch": 0.8355946140404975, "grad_norm": 1.0827269554138184, "learning_rate": 6.92236754182472e-07, "loss": 0.7207, "step": 16259 }, { "epoch": 0.8356460067838422, "grad_norm": 1.0454212427139282, "learning_rate": 6.918143049094983e-07, "loss": 0.7168, "step": 16260 }, { "epoch": 0.8356973995271868, "grad_norm": 1.09187650680542, "learning_rate": 6.913919749985709e-07, "loss": 0.6864, "step": 16261 }, { "epoch": 0.8357487922705314, "grad_norm": 1.0645188093185425, "learning_rate": 6.909697644613916e-07, "loss": 0.6779, "step": 16262 }, { "epoch": 0.8358001850138761, "grad_norm": 1.1095867156982422, "learning_rate": 6.905476733096566e-07, "loss": 0.6902, "step": 16263 }, { "epoch": 0.8358515777572206, "grad_norm": 1.1850234270095825, "learning_rate": 6.901257015550627e-07, "loss": 0.7136, "step": 16264 }, { "epoch": 0.8359029705005653, "grad_norm": 1.1090548038482666, "learning_rate": 6.897038492092994e-07, "loss": 0.7023, "step": 16265 }, { "epoch": 0.8359543632439099, "grad_norm": 1.0831363201141357, "learning_rate": 6.89282116284054e-07, "loss": 0.7088, "step": 16266 }, { "epoch": 0.8360057559872546, "grad_norm": 1.1062325239181519, "learning_rate": 6.888605027910145e-07, "loss": 0.6641, "step": 16267 }, { "epoch": 0.8360571487305992, "grad_norm": 1.0894643068313599, "learning_rate": 6.884390087418569e-07, "loss": 0.7337, "step": 16268 }, { "epoch": 0.8361085414739439, "grad_norm": 1.1248421669006348, "learning_rate": 6.880176341482625e-07, "loss": 0.6244, "step": 16269 }, { "epoch": 0.8361599342172885, "grad_norm": 1.091792345046997, "learning_rate": 6.875963790219043e-07, "loss": 0.682, "step": 16270 }, { "epoch": 0.8362113269606332, "grad_norm": 1.0689754486083984, "learning_rate": 6.87175243374456e-07, "loss": 0.6837, "step": 16271 }, { "epoch": 0.8362627197039778, "grad_norm": 1.037709355354309, "learning_rate": 6.86754227217582e-07, "loss": 0.6723, "step": 16272 }, { "epoch": 0.8363141124473225, "grad_norm": 1.0764809846878052, "learning_rate": 6.863333305629494e-07, "loss": 0.6833, "step": 16273 }, { "epoch": 0.8363655051906671, "grad_norm": 1.0703129768371582, "learning_rate": 6.859125534222189e-07, "loss": 0.6661, "step": 16274 }, { "epoch": 0.8364168979340117, "grad_norm": 1.1416085958480835, "learning_rate": 6.854918958070472e-07, "loss": 0.6382, "step": 16275 }, { "epoch": 0.8364682906773564, "grad_norm": 0.7774978280067444, "learning_rate": 6.850713577290913e-07, "loss": 0.6624, "step": 16276 }, { "epoch": 0.836519683420701, "grad_norm": 1.1273070573806763, "learning_rate": 6.846509392000011e-07, "loss": 0.6531, "step": 16277 }, { "epoch": 0.8365710761640457, "grad_norm": 1.182376503944397, "learning_rate": 6.842306402314258e-07, "loss": 0.662, "step": 16278 }, { "epoch": 0.8366224689073902, "grad_norm": 0.6770730018615723, "learning_rate": 6.838104608350077e-07, "loss": 0.6126, "step": 16279 }, { "epoch": 0.8366738616507349, "grad_norm": 1.0436102151870728, "learning_rate": 6.833904010223919e-07, "loss": 0.6845, "step": 16280 }, { "epoch": 0.8367252543940795, "grad_norm": 1.0707000494003296, "learning_rate": 6.829704608052123e-07, "loss": 0.7269, "step": 16281 }, { "epoch": 0.8367766471374242, "grad_norm": 1.0488498210906982, "learning_rate": 6.825506401951071e-07, "loss": 0.6341, "step": 16282 }, { "epoch": 0.8368280398807688, "grad_norm": 1.0473618507385254, "learning_rate": 6.821309392037068e-07, "loss": 0.7156, "step": 16283 }, { "epoch": 0.8368794326241135, "grad_norm": 1.1246974468231201, "learning_rate": 6.817113578426393e-07, "loss": 0.7223, "step": 16284 }, { "epoch": 0.8369308253674581, "grad_norm": 1.1153661012649536, "learning_rate": 6.812918961235282e-07, "loss": 0.6496, "step": 16285 }, { "epoch": 0.8369822181108028, "grad_norm": 1.1102979183197021, "learning_rate": 6.808725540579975e-07, "loss": 0.6941, "step": 16286 }, { "epoch": 0.8370336108541474, "grad_norm": 1.0231292247772217, "learning_rate": 6.804533316576644e-07, "loss": 0.6468, "step": 16287 }, { "epoch": 0.837085003597492, "grad_norm": 1.0611438751220703, "learning_rate": 6.800342289341433e-07, "loss": 0.6834, "step": 16288 }, { "epoch": 0.8371363963408367, "grad_norm": 1.0873777866363525, "learning_rate": 6.796152458990469e-07, "loss": 0.7043, "step": 16289 }, { "epoch": 0.8371877890841813, "grad_norm": 1.006508469581604, "learning_rate": 6.791963825639825e-07, "loss": 0.6857, "step": 16290 }, { "epoch": 0.837239181827526, "grad_norm": 1.046751618385315, "learning_rate": 6.787776389405559e-07, "loss": 0.7373, "step": 16291 }, { "epoch": 0.8372905745708706, "grad_norm": 1.0341377258300781, "learning_rate": 6.783590150403669e-07, "loss": 0.6417, "step": 16292 }, { "epoch": 0.8373419673142153, "grad_norm": 1.1443153619766235, "learning_rate": 6.779405108750175e-07, "loss": 0.6744, "step": 16293 }, { "epoch": 0.8373933600575598, "grad_norm": 1.1029880046844482, "learning_rate": 6.775221264560983e-07, "loss": 0.7115, "step": 16294 }, { "epoch": 0.8374447528009045, "grad_norm": 1.034118413925171, "learning_rate": 6.771038617952041e-07, "loss": 0.7104, "step": 16295 }, { "epoch": 0.8374961455442491, "grad_norm": 1.0693303346633911, "learning_rate": 6.766857169039226e-07, "loss": 0.6469, "step": 16296 }, { "epoch": 0.8375475382875938, "grad_norm": 1.14186429977417, "learning_rate": 6.762676917938377e-07, "loss": 0.712, "step": 16297 }, { "epoch": 0.8375989310309384, "grad_norm": 1.1504127979278564, "learning_rate": 6.758497864765329e-07, "loss": 0.7231, "step": 16298 }, { "epoch": 0.8376503237742831, "grad_norm": 1.084836483001709, "learning_rate": 6.754320009635856e-07, "loss": 0.7136, "step": 16299 }, { "epoch": 0.8377017165176277, "grad_norm": 1.0781267881393433, "learning_rate": 6.750143352665717e-07, "loss": 0.702, "step": 16300 }, { "epoch": 0.8377531092609724, "grad_norm": 1.1036078929901123, "learning_rate": 6.745967893970606e-07, "loss": 0.7042, "step": 16301 }, { "epoch": 0.837804502004317, "grad_norm": 1.1051130294799805, "learning_rate": 6.741793633666244e-07, "loss": 0.6755, "step": 16302 }, { "epoch": 0.8378558947476616, "grad_norm": 1.036118745803833, "learning_rate": 6.737620571868258e-07, "loss": 0.6627, "step": 16303 }, { "epoch": 0.8379072874910063, "grad_norm": 0.9984489679336548, "learning_rate": 6.733448708692275e-07, "loss": 0.6507, "step": 16304 }, { "epoch": 0.8379586802343509, "grad_norm": 1.1033415794372559, "learning_rate": 6.729278044253867e-07, "loss": 0.7091, "step": 16305 }, { "epoch": 0.8380100729776956, "grad_norm": 1.1419497728347778, "learning_rate": 6.725108578668609e-07, "loss": 0.6371, "step": 16306 }, { "epoch": 0.8380614657210402, "grad_norm": 1.183470606803894, "learning_rate": 6.720940312052004e-07, "loss": 0.6731, "step": 16307 }, { "epoch": 0.8381128584643849, "grad_norm": 1.1097166538238525, "learning_rate": 6.716773244519531e-07, "loss": 0.7235, "step": 16308 }, { "epoch": 0.8381642512077294, "grad_norm": 1.1303751468658447, "learning_rate": 6.712607376186675e-07, "loss": 0.7271, "step": 16309 }, { "epoch": 0.8382156439510741, "grad_norm": 1.2080334424972534, "learning_rate": 6.708442707168811e-07, "loss": 0.7088, "step": 16310 }, { "epoch": 0.8382670366944187, "grad_norm": 1.0644322633743286, "learning_rate": 6.704279237581357e-07, "loss": 0.6901, "step": 16311 }, { "epoch": 0.8383184294377634, "grad_norm": 0.7498881816864014, "learning_rate": 6.700116967539655e-07, "loss": 0.6282, "step": 16312 }, { "epoch": 0.838369822181108, "grad_norm": 1.0880926847457886, "learning_rate": 6.695955897159023e-07, "loss": 0.7377, "step": 16313 }, { "epoch": 0.8384212149244527, "grad_norm": 1.056373119354248, "learning_rate": 6.691796026554731e-07, "loss": 0.722, "step": 16314 }, { "epoch": 0.8384726076677973, "grad_norm": 1.1985082626342773, "learning_rate": 6.687637355842069e-07, "loss": 0.6538, "step": 16315 }, { "epoch": 0.838524000411142, "grad_norm": 1.159277319908142, "learning_rate": 6.683479885136229e-07, "loss": 0.6737, "step": 16316 }, { "epoch": 0.8385753931544866, "grad_norm": 1.1270928382873535, "learning_rate": 6.679323614552396e-07, "loss": 0.735, "step": 16317 }, { "epoch": 0.8386267858978312, "grad_norm": 0.7263604402542114, "learning_rate": 6.675168544205745e-07, "loss": 0.6337, "step": 16318 }, { "epoch": 0.8386781786411759, "grad_norm": 1.1149317026138306, "learning_rate": 6.671014674211379e-07, "loss": 0.6978, "step": 16319 }, { "epoch": 0.8387295713845205, "grad_norm": 0.7285401225090027, "learning_rate": 6.666862004684393e-07, "loss": 0.675, "step": 16320 }, { "epoch": 0.8387809641278652, "grad_norm": 1.0955021381378174, "learning_rate": 6.662710535739819e-07, "loss": 0.7389, "step": 16321 }, { "epoch": 0.8388323568712098, "grad_norm": 1.038291335105896, "learning_rate": 6.65856026749272e-07, "loss": 0.6909, "step": 16322 }, { "epoch": 0.8388837496145545, "grad_norm": 1.1523593664169312, "learning_rate": 6.654411200058037e-07, "loss": 0.7083, "step": 16323 }, { "epoch": 0.838935142357899, "grad_norm": 1.0337821245193481, "learning_rate": 6.650263333550744e-07, "loss": 0.6697, "step": 16324 }, { "epoch": 0.8389865351012437, "grad_norm": 0.6926131844520569, "learning_rate": 6.646116668085767e-07, "loss": 0.6304, "step": 16325 }, { "epoch": 0.8390379278445883, "grad_norm": 1.12691330909729, "learning_rate": 6.641971203777975e-07, "loss": 0.7095, "step": 16326 }, { "epoch": 0.839089320587933, "grad_norm": 1.0844695568084717, "learning_rate": 6.637826940742242e-07, "loss": 0.6429, "step": 16327 }, { "epoch": 0.8391407133312776, "grad_norm": 1.1112098693847656, "learning_rate": 6.633683879093383e-07, "loss": 0.6705, "step": 16328 }, { "epoch": 0.8391921060746222, "grad_norm": 1.0747679471969604, "learning_rate": 6.629542018946178e-07, "loss": 0.7109, "step": 16329 }, { "epoch": 0.8392434988179669, "grad_norm": 1.2787449359893799, "learning_rate": 6.625401360415374e-07, "loss": 0.6847, "step": 16330 }, { "epoch": 0.8392948915613115, "grad_norm": 1.0589516162872314, "learning_rate": 6.621261903615711e-07, "loss": 0.6688, "step": 16331 }, { "epoch": 0.8393462843046562, "grad_norm": 1.0881831645965576, "learning_rate": 6.617123648661871e-07, "loss": 0.7177, "step": 16332 }, { "epoch": 0.8393976770480008, "grad_norm": 1.0508501529693604, "learning_rate": 6.612986595668503e-07, "loss": 0.6539, "step": 16333 }, { "epoch": 0.8394490697913455, "grad_norm": 1.1027936935424805, "learning_rate": 6.608850744750217e-07, "loss": 0.7164, "step": 16334 }, { "epoch": 0.8395004625346901, "grad_norm": 1.0460678339004517, "learning_rate": 6.604716096021634e-07, "loss": 0.6495, "step": 16335 }, { "epoch": 0.8395518552780348, "grad_norm": 1.0753124952316284, "learning_rate": 6.600582649597265e-07, "loss": 0.696, "step": 16336 }, { "epoch": 0.8396032480213794, "grad_norm": 0.7549222707748413, "learning_rate": 6.596450405591665e-07, "loss": 0.6546, "step": 16337 }, { "epoch": 0.8396546407647241, "grad_norm": 1.0794966220855713, "learning_rate": 6.592319364119304e-07, "loss": 0.6884, "step": 16338 }, { "epoch": 0.8397060335080686, "grad_norm": 1.1128748655319214, "learning_rate": 6.58818952529463e-07, "loss": 0.7199, "step": 16339 }, { "epoch": 0.8397574262514133, "grad_norm": 1.1007788181304932, "learning_rate": 6.584060889232091e-07, "loss": 0.6986, "step": 16340 }, { "epoch": 0.8398088189947579, "grad_norm": 1.101826786994934, "learning_rate": 6.57993345604605e-07, "loss": 0.6214, "step": 16341 }, { "epoch": 0.8398602117381025, "grad_norm": 1.1578271389007568, "learning_rate": 6.575807225850872e-07, "loss": 0.6847, "step": 16342 }, { "epoch": 0.8399116044814472, "grad_norm": 1.1246073246002197, "learning_rate": 6.571682198760864e-07, "loss": 0.7604, "step": 16343 }, { "epoch": 0.8399629972247918, "grad_norm": 1.0500749349594116, "learning_rate": 6.567558374890348e-07, "loss": 0.6671, "step": 16344 }, { "epoch": 0.8400143899681365, "grad_norm": 1.1606776714324951, "learning_rate": 6.563435754353531e-07, "loss": 0.6884, "step": 16345 }, { "epoch": 0.8400657827114811, "grad_norm": 0.7513163089752197, "learning_rate": 6.559314337264666e-07, "loss": 0.654, "step": 16346 }, { "epoch": 0.8401171754548258, "grad_norm": 1.069115400314331, "learning_rate": 6.555194123737924e-07, "loss": 0.7108, "step": 16347 }, { "epoch": 0.8401685681981704, "grad_norm": 1.1407817602157593, "learning_rate": 6.551075113887478e-07, "loss": 0.7177, "step": 16348 }, { "epoch": 0.8402199609415151, "grad_norm": 0.8228931427001953, "learning_rate": 6.546957307827434e-07, "loss": 0.6328, "step": 16349 }, { "epoch": 0.8402713536848597, "grad_norm": 0.8667601346969604, "learning_rate": 6.542840705671871e-07, "loss": 0.6562, "step": 16350 }, { "epoch": 0.8403227464282044, "grad_norm": 1.1828827857971191, "learning_rate": 6.538725307534882e-07, "loss": 0.7112, "step": 16351 }, { "epoch": 0.840374139171549, "grad_norm": 1.0517867803573608, "learning_rate": 6.534611113530437e-07, "loss": 0.6732, "step": 16352 }, { "epoch": 0.8404255319148937, "grad_norm": 0.772987961769104, "learning_rate": 6.530498123772556e-07, "loss": 0.6486, "step": 16353 }, { "epoch": 0.8404769246582383, "grad_norm": 1.1312000751495361, "learning_rate": 6.526386338375185e-07, "loss": 0.658, "step": 16354 }, { "epoch": 0.8405283174015828, "grad_norm": 1.168704867362976, "learning_rate": 6.522275757452245e-07, "loss": 0.6775, "step": 16355 }, { "epoch": 0.8405797101449275, "grad_norm": 7.032253265380859, "learning_rate": 6.518166381117608e-07, "loss": 0.7372, "step": 16356 }, { "epoch": 0.8406311028882721, "grad_norm": 1.097245454788208, "learning_rate": 6.514058209485152e-07, "loss": 0.7519, "step": 16357 }, { "epoch": 0.8406824956316168, "grad_norm": 1.080039381980896, "learning_rate": 6.509951242668688e-07, "loss": 0.721, "step": 16358 }, { "epoch": 0.8407338883749614, "grad_norm": 1.0440746545791626, "learning_rate": 6.505845480781997e-07, "loss": 0.6478, "step": 16359 }, { "epoch": 0.8407852811183061, "grad_norm": 0.8059173822402954, "learning_rate": 6.501740923938854e-07, "loss": 0.6225, "step": 16360 }, { "epoch": 0.8408366738616507, "grad_norm": 1.125105857849121, "learning_rate": 6.497637572252941e-07, "loss": 0.71, "step": 16361 }, { "epoch": 0.8408880666049954, "grad_norm": 15.266189575195312, "learning_rate": 6.493535425837982e-07, "loss": 0.7837, "step": 16362 }, { "epoch": 0.84093945934834, "grad_norm": 1.1267484426498413, "learning_rate": 6.489434484807599e-07, "loss": 0.7231, "step": 16363 }, { "epoch": 0.8409908520916847, "grad_norm": 1.1437889337539673, "learning_rate": 6.485334749275457e-07, "loss": 0.6679, "step": 16364 }, { "epoch": 0.8410422448350293, "grad_norm": 1.1569018363952637, "learning_rate": 6.481236219355092e-07, "loss": 0.7389, "step": 16365 }, { "epoch": 0.841093637578374, "grad_norm": 1.107492208480835, "learning_rate": 6.477138895160085e-07, "loss": 0.7022, "step": 16366 }, { "epoch": 0.8411450303217186, "grad_norm": 1.1194957494735718, "learning_rate": 6.473042776803956e-07, "loss": 0.7319, "step": 16367 }, { "epoch": 0.8411964230650633, "grad_norm": 1.1241450309753418, "learning_rate": 6.468947864400177e-07, "loss": 0.6814, "step": 16368 }, { "epoch": 0.8412478158084079, "grad_norm": 3.13297438621521, "learning_rate": 6.464854158062217e-07, "loss": 0.7357, "step": 16369 }, { "epoch": 0.8412992085517524, "grad_norm": 0.7655166983604431, "learning_rate": 6.460761657903492e-07, "loss": 0.6447, "step": 16370 }, { "epoch": 0.8413506012950971, "grad_norm": 1.1183664798736572, "learning_rate": 6.456670364037388e-07, "loss": 0.7154, "step": 16371 }, { "epoch": 0.8414019940384417, "grad_norm": 1.0953044891357422, "learning_rate": 6.452580276577236e-07, "loss": 0.6693, "step": 16372 }, { "epoch": 0.8414533867817864, "grad_norm": 1.1274372339248657, "learning_rate": 6.448491395636403e-07, "loss": 0.7206, "step": 16373 }, { "epoch": 0.841504779525131, "grad_norm": 1.1264081001281738, "learning_rate": 6.44440372132813e-07, "loss": 0.7555, "step": 16374 }, { "epoch": 0.8415561722684757, "grad_norm": 1.1442147493362427, "learning_rate": 6.440317253765688e-07, "loss": 0.6602, "step": 16375 }, { "epoch": 0.8416075650118203, "grad_norm": 1.1056187152862549, "learning_rate": 6.436231993062298e-07, "loss": 0.6912, "step": 16376 }, { "epoch": 0.841658957755165, "grad_norm": 1.1231343746185303, "learning_rate": 6.432147939331134e-07, "loss": 0.6844, "step": 16377 }, { "epoch": 0.8417103504985096, "grad_norm": 1.0995243787765503, "learning_rate": 6.428065092685365e-07, "loss": 0.6904, "step": 16378 }, { "epoch": 0.8417617432418543, "grad_norm": 1.0518836975097656, "learning_rate": 6.423983453238103e-07, "loss": 0.7049, "step": 16379 }, { "epoch": 0.8418131359851989, "grad_norm": 0.8346656560897827, "learning_rate": 6.419903021102431e-07, "loss": 0.645, "step": 16380 }, { "epoch": 0.8418645287285436, "grad_norm": 1.084513545036316, "learning_rate": 6.41582379639139e-07, "loss": 0.6827, "step": 16381 }, { "epoch": 0.8419159214718882, "grad_norm": 1.0619189739227295, "learning_rate": 6.411745779218026e-07, "loss": 0.6575, "step": 16382 }, { "epoch": 0.8419673142152329, "grad_norm": 1.150114893913269, "learning_rate": 6.407668969695302e-07, "loss": 0.694, "step": 16383 }, { "epoch": 0.8420187069585775, "grad_norm": 0.7526631951332092, "learning_rate": 6.403593367936178e-07, "loss": 0.6828, "step": 16384 }, { "epoch": 0.842070099701922, "grad_norm": 1.0531716346740723, "learning_rate": 6.399518974053564e-07, "loss": 0.6863, "step": 16385 }, { "epoch": 0.8421214924452667, "grad_norm": 0.6946149468421936, "learning_rate": 6.395445788160365e-07, "loss": 0.6405, "step": 16386 }, { "epoch": 0.8421728851886113, "grad_norm": 0.8326166868209839, "learning_rate": 6.3913738103694e-07, "loss": 0.6384, "step": 16387 }, { "epoch": 0.842224277931956, "grad_norm": 0.793912947177887, "learning_rate": 6.387303040793519e-07, "loss": 0.6449, "step": 16388 }, { "epoch": 0.8422756706753006, "grad_norm": 0.9851839542388916, "learning_rate": 6.383233479545486e-07, "loss": 0.6542, "step": 16389 }, { "epoch": 0.8423270634186453, "grad_norm": 1.1075139045715332, "learning_rate": 6.379165126738046e-07, "loss": 0.7194, "step": 16390 }, { "epoch": 0.8423784561619899, "grad_norm": 1.078636884689331, "learning_rate": 6.375097982483941e-07, "loss": 0.6857, "step": 16391 }, { "epoch": 0.8424298489053346, "grad_norm": 1.0742202997207642, "learning_rate": 6.371032046895842e-07, "loss": 0.703, "step": 16392 }, { "epoch": 0.8424812416486792, "grad_norm": 1.1089184284210205, "learning_rate": 6.366967320086398e-07, "loss": 0.6882, "step": 16393 }, { "epoch": 0.8425326343920239, "grad_norm": 1.0909452438354492, "learning_rate": 6.362903802168219e-07, "loss": 0.695, "step": 16394 }, { "epoch": 0.8425840271353685, "grad_norm": 1.1308059692382812, "learning_rate": 6.358841493253904e-07, "loss": 0.7211, "step": 16395 }, { "epoch": 0.8426354198787132, "grad_norm": 1.1464941501617432, "learning_rate": 6.354780393455995e-07, "loss": 0.6763, "step": 16396 }, { "epoch": 0.8426868126220578, "grad_norm": 1.0735931396484375, "learning_rate": 6.350720502887004e-07, "loss": 0.6413, "step": 16397 }, { "epoch": 0.8427382053654024, "grad_norm": 1.1571218967437744, "learning_rate": 6.346661821659411e-07, "loss": 0.7242, "step": 16398 }, { "epoch": 0.8427895981087471, "grad_norm": 1.7675156593322754, "learning_rate": 6.342604349885684e-07, "loss": 0.7324, "step": 16399 }, { "epoch": 0.8428409908520916, "grad_norm": 1.088990569114685, "learning_rate": 6.338548087678226e-07, "loss": 0.6705, "step": 16400 }, { "epoch": 0.8428923835954363, "grad_norm": 1.1002097129821777, "learning_rate": 6.334493035149408e-07, "loss": 0.7192, "step": 16401 }, { "epoch": 0.8429437763387809, "grad_norm": 1.0985361337661743, "learning_rate": 6.330439192411614e-07, "loss": 0.7188, "step": 16402 }, { "epoch": 0.8429951690821256, "grad_norm": 1.1096352338790894, "learning_rate": 6.326386559577119e-07, "loss": 0.7135, "step": 16403 }, { "epoch": 0.8430465618254702, "grad_norm": 1.1229389905929565, "learning_rate": 6.322335136758229e-07, "loss": 0.7016, "step": 16404 }, { "epoch": 0.8430979545688149, "grad_norm": 1.0732941627502441, "learning_rate": 6.318284924067181e-07, "loss": 0.6627, "step": 16405 }, { "epoch": 0.8431493473121595, "grad_norm": 1.118199348449707, "learning_rate": 6.314235921616202e-07, "loss": 0.6382, "step": 16406 }, { "epoch": 0.8432007400555042, "grad_norm": 1.1047911643981934, "learning_rate": 6.310188129517447e-07, "loss": 0.6734, "step": 16407 }, { "epoch": 0.8432521327988488, "grad_norm": 1.0798594951629639, "learning_rate": 6.306141547883099e-07, "loss": 0.7037, "step": 16408 }, { "epoch": 0.8433035255421935, "grad_norm": 1.058784008026123, "learning_rate": 6.302096176825251e-07, "loss": 0.6673, "step": 16409 }, { "epoch": 0.8433549182855381, "grad_norm": 0.7257029414176941, "learning_rate": 6.298052016455974e-07, "loss": 0.6424, "step": 16410 }, { "epoch": 0.8434063110288827, "grad_norm": 1.08196222782135, "learning_rate": 6.294009066887346e-07, "loss": 0.6476, "step": 16411 }, { "epoch": 0.8434577037722274, "grad_norm": 1.0518192052841187, "learning_rate": 6.289967328231355e-07, "loss": 0.6606, "step": 16412 }, { "epoch": 0.843509096515572, "grad_norm": 1.0450294017791748, "learning_rate": 6.285926800599995e-07, "loss": 0.7022, "step": 16413 }, { "epoch": 0.8435604892589167, "grad_norm": 1.0903228521347046, "learning_rate": 6.281887484105192e-07, "loss": 0.7399, "step": 16414 }, { "epoch": 0.8436118820022612, "grad_norm": 1.0687614679336548, "learning_rate": 6.277849378858897e-07, "loss": 0.7175, "step": 16415 }, { "epoch": 0.8436632747456059, "grad_norm": 1.0549064874649048, "learning_rate": 6.273812484972941e-07, "loss": 0.6782, "step": 16416 }, { "epoch": 0.8437146674889505, "grad_norm": 1.0292104482650757, "learning_rate": 6.269776802559208e-07, "loss": 0.6921, "step": 16417 }, { "epoch": 0.8437660602322952, "grad_norm": 1.0841951370239258, "learning_rate": 6.265742331729491e-07, "loss": 0.6522, "step": 16418 }, { "epoch": 0.8438174529756398, "grad_norm": 1.1700786352157593, "learning_rate": 6.261709072595568e-07, "loss": 0.6904, "step": 16419 }, { "epoch": 0.8438688457189845, "grad_norm": 1.1372413635253906, "learning_rate": 6.257677025269193e-07, "loss": 0.7169, "step": 16420 }, { "epoch": 0.8439202384623291, "grad_norm": 1.1404823064804077, "learning_rate": 6.253646189862078e-07, "loss": 0.7212, "step": 16421 }, { "epoch": 0.8439716312056738, "grad_norm": 1.1127538681030273, "learning_rate": 6.249616566485899e-07, "loss": 0.6829, "step": 16422 }, { "epoch": 0.8440230239490184, "grad_norm": 0.7741541266441345, "learning_rate": 6.245588155252285e-07, "loss": 0.6228, "step": 16423 }, { "epoch": 0.844074416692363, "grad_norm": 1.3361481428146362, "learning_rate": 6.24156095627288e-07, "loss": 0.6653, "step": 16424 }, { "epoch": 0.8441258094357077, "grad_norm": 1.226232886314392, "learning_rate": 6.23753496965922e-07, "loss": 0.5761, "step": 16425 }, { "epoch": 0.8441772021790523, "grad_norm": 1.0449851751327515, "learning_rate": 6.233510195522874e-07, "loss": 0.6375, "step": 16426 }, { "epoch": 0.844228594922397, "grad_norm": 1.0675405263900757, "learning_rate": 6.229486633975346e-07, "loss": 0.6994, "step": 16427 }, { "epoch": 0.8442799876657416, "grad_norm": 2.415781021118164, "learning_rate": 6.225464285128119e-07, "loss": 0.7259, "step": 16428 }, { "epoch": 0.8443313804090863, "grad_norm": 0.8723052740097046, "learning_rate": 6.221443149092626e-07, "loss": 0.6411, "step": 16429 }, { "epoch": 0.8443827731524309, "grad_norm": 1.0396190881729126, "learning_rate": 6.217423225980285e-07, "loss": 0.6605, "step": 16430 }, { "epoch": 0.8444341658957755, "grad_norm": 1.068182349205017, "learning_rate": 6.213404515902466e-07, "loss": 0.7565, "step": 16431 }, { "epoch": 0.8444855586391201, "grad_norm": 1.012959361076355, "learning_rate": 6.209387018970498e-07, "loss": 0.6902, "step": 16432 }, { "epoch": 0.8445369513824648, "grad_norm": 1.0755106210708618, "learning_rate": 6.205370735295713e-07, "loss": 0.71, "step": 16433 }, { "epoch": 0.8445883441258094, "grad_norm": 0.8704649806022644, "learning_rate": 6.201355664989378e-07, "loss": 0.7052, "step": 16434 }, { "epoch": 0.8446397368691541, "grad_norm": 1.0970100164413452, "learning_rate": 6.197341808162732e-07, "loss": 0.6276, "step": 16435 }, { "epoch": 0.8446911296124987, "grad_norm": 1.0853278636932373, "learning_rate": 6.193329164926964e-07, "loss": 0.7041, "step": 16436 }, { "epoch": 0.8447425223558434, "grad_norm": 0.7823596596717834, "learning_rate": 6.189317735393297e-07, "loss": 0.6432, "step": 16437 }, { "epoch": 0.844793915099188, "grad_norm": 1.1115933656692505, "learning_rate": 6.185307519672812e-07, "loss": 0.6838, "step": 16438 }, { "epoch": 0.8448453078425326, "grad_norm": 1.098714828491211, "learning_rate": 6.181298517876655e-07, "loss": 0.724, "step": 16439 }, { "epoch": 0.8448967005858773, "grad_norm": 1.0852597951889038, "learning_rate": 6.177290730115876e-07, "loss": 0.7007, "step": 16440 }, { "epoch": 0.8449480933292219, "grad_norm": 1.1243349313735962, "learning_rate": 6.173284156501535e-07, "loss": 0.6795, "step": 16441 }, { "epoch": 0.8449994860725666, "grad_norm": 1.1116089820861816, "learning_rate": 6.169278797144629e-07, "loss": 0.6869, "step": 16442 }, { "epoch": 0.8450508788159112, "grad_norm": 1.0651346445083618, "learning_rate": 6.165274652156116e-07, "loss": 0.6933, "step": 16443 }, { "epoch": 0.8451022715592559, "grad_norm": 1.040443778038025, "learning_rate": 6.161271721646972e-07, "loss": 0.7091, "step": 16444 }, { "epoch": 0.8451536643026005, "grad_norm": 0.7358945608139038, "learning_rate": 6.15727000572805e-07, "loss": 0.647, "step": 16445 }, { "epoch": 0.8452050570459451, "grad_norm": 1.0403320789337158, "learning_rate": 6.153269504510262e-07, "loss": 0.719, "step": 16446 }, { "epoch": 0.8452564497892897, "grad_norm": 1.0431816577911377, "learning_rate": 6.149270218104436e-07, "loss": 0.645, "step": 16447 }, { "epoch": 0.8453078425326344, "grad_norm": 1.062342882156372, "learning_rate": 6.145272146621367e-07, "loss": 0.686, "step": 16448 }, { "epoch": 0.845359235275979, "grad_norm": 1.0591074228286743, "learning_rate": 6.141275290171816e-07, "loss": 0.6523, "step": 16449 }, { "epoch": 0.8454106280193237, "grad_norm": 0.7416768074035645, "learning_rate": 6.137279648866546e-07, "loss": 0.6724, "step": 16450 }, { "epoch": 0.8454620207626683, "grad_norm": 1.0046964883804321, "learning_rate": 6.133285222816243e-07, "loss": 0.6232, "step": 16451 }, { "epoch": 0.845513413506013, "grad_norm": 1.06511652469635, "learning_rate": 6.129292012131571e-07, "loss": 0.6533, "step": 16452 }, { "epoch": 0.8455648062493576, "grad_norm": 0.7565569281578064, "learning_rate": 6.125300016923197e-07, "loss": 0.6444, "step": 16453 }, { "epoch": 0.8456161989927022, "grad_norm": 0.7711178660392761, "learning_rate": 6.121309237301676e-07, "loss": 0.6965, "step": 16454 }, { "epoch": 0.8456675917360469, "grad_norm": 1.035863995552063, "learning_rate": 6.117319673377609e-07, "loss": 0.6818, "step": 16455 }, { "epoch": 0.8457189844793915, "grad_norm": 1.1163461208343506, "learning_rate": 6.113331325261512e-07, "loss": 0.6807, "step": 16456 }, { "epoch": 0.8457703772227362, "grad_norm": 1.1405702829360962, "learning_rate": 6.10934419306391e-07, "loss": 0.6186, "step": 16457 }, { "epoch": 0.8458217699660808, "grad_norm": 1.0459213256835938, "learning_rate": 6.105358276895241e-07, "loss": 0.6521, "step": 16458 }, { "epoch": 0.8458731627094255, "grad_norm": 1.0791577100753784, "learning_rate": 6.10137357686596e-07, "loss": 0.6981, "step": 16459 }, { "epoch": 0.8459245554527701, "grad_norm": 0.6540331244468689, "learning_rate": 6.097390093086453e-07, "loss": 0.6295, "step": 16460 }, { "epoch": 0.8459759481961147, "grad_norm": 1.1189758777618408, "learning_rate": 6.093407825667086e-07, "loss": 0.669, "step": 16461 }, { "epoch": 0.8460273409394593, "grad_norm": 1.04123854637146, "learning_rate": 6.089426774718205e-07, "loss": 0.6561, "step": 16462 }, { "epoch": 0.846078733682804, "grad_norm": 1.1685868501663208, "learning_rate": 6.085446940350104e-07, "loss": 0.6897, "step": 16463 }, { "epoch": 0.8461301264261486, "grad_norm": 1.245519757270813, "learning_rate": 6.08146832267304e-07, "loss": 0.7106, "step": 16464 }, { "epoch": 0.8461815191694932, "grad_norm": 1.0681719779968262, "learning_rate": 6.077490921797236e-07, "loss": 0.7225, "step": 16465 }, { "epoch": 0.8462329119128379, "grad_norm": 1.158044457435608, "learning_rate": 6.07351473783292e-07, "loss": 0.6879, "step": 16466 }, { "epoch": 0.8462843046561825, "grad_norm": 1.0493100881576538, "learning_rate": 6.06953977089022e-07, "loss": 0.6946, "step": 16467 }, { "epoch": 0.8463356973995272, "grad_norm": 0.7046316862106323, "learning_rate": 6.065566021079294e-07, "loss": 0.6065, "step": 16468 }, { "epoch": 0.8463870901428718, "grad_norm": 1.0526148080825806, "learning_rate": 6.061593488510226e-07, "loss": 0.6351, "step": 16469 }, { "epoch": 0.8464384828862165, "grad_norm": 1.16148042678833, "learning_rate": 6.057622173293065e-07, "loss": 0.7008, "step": 16470 }, { "epoch": 0.8464898756295611, "grad_norm": 1.1463459730148315, "learning_rate": 6.053652075537869e-07, "loss": 0.7136, "step": 16471 }, { "epoch": 0.8465412683729058, "grad_norm": 1.1279423236846924, "learning_rate": 6.049683195354605e-07, "loss": 0.697, "step": 16472 }, { "epoch": 0.8465926611162504, "grad_norm": 1.0883005857467651, "learning_rate": 6.045715532853269e-07, "loss": 0.6773, "step": 16473 }, { "epoch": 0.8466440538595951, "grad_norm": 1.1768378019332886, "learning_rate": 6.041749088143744e-07, "loss": 0.6551, "step": 16474 }, { "epoch": 0.8466954466029397, "grad_norm": 0.79721599817276, "learning_rate": 6.03778386133596e-07, "loss": 0.6368, "step": 16475 }, { "epoch": 0.8467468393462843, "grad_norm": 1.1473215818405151, "learning_rate": 6.033819852539769e-07, "loss": 0.6891, "step": 16476 }, { "epoch": 0.8467982320896289, "grad_norm": 1.019971251487732, "learning_rate": 6.029857061864986e-07, "loss": 0.6982, "step": 16477 }, { "epoch": 0.8468496248329735, "grad_norm": 1.1158809661865234, "learning_rate": 6.025895489421397e-07, "loss": 0.703, "step": 16478 }, { "epoch": 0.8469010175763182, "grad_norm": 1.1332000494003296, "learning_rate": 6.021935135318791e-07, "loss": 0.6711, "step": 16479 }, { "epoch": 0.8469524103196628, "grad_norm": 1.0930920839309692, "learning_rate": 6.017975999666869e-07, "loss": 0.6922, "step": 16480 }, { "epoch": 0.8470038030630075, "grad_norm": 1.1375925540924072, "learning_rate": 6.014018082575329e-07, "loss": 0.7206, "step": 16481 }, { "epoch": 0.8470551958063521, "grad_norm": 1.0558089017868042, "learning_rate": 6.010061384153831e-07, "loss": 0.6713, "step": 16482 }, { "epoch": 0.8471065885496968, "grad_norm": 1.0715219974517822, "learning_rate": 6.006105904511989e-07, "loss": 0.6917, "step": 16483 }, { "epoch": 0.8471579812930414, "grad_norm": 1.0560548305511475, "learning_rate": 6.002151643759407e-07, "loss": 0.7248, "step": 16484 }, { "epoch": 0.8472093740363861, "grad_norm": 1.0935081243515015, "learning_rate": 5.998198602005634e-07, "loss": 0.6817, "step": 16485 }, { "epoch": 0.8472607667797307, "grad_norm": 0.7103713750839233, "learning_rate": 5.994246779360197e-07, "loss": 0.6353, "step": 16486 }, { "epoch": 0.8473121595230754, "grad_norm": 0.7412858009338379, "learning_rate": 5.990296175932564e-07, "loss": 0.6136, "step": 16487 }, { "epoch": 0.84736355226642, "grad_norm": 0.7032665014266968, "learning_rate": 5.98634679183222e-07, "loss": 0.6573, "step": 16488 }, { "epoch": 0.8474149450097647, "grad_norm": 0.7189478278160095, "learning_rate": 5.982398627168579e-07, "loss": 0.6413, "step": 16489 }, { "epoch": 0.8474663377531093, "grad_norm": 1.0538734197616577, "learning_rate": 5.978451682051012e-07, "loss": 0.6247, "step": 16490 }, { "epoch": 0.8475177304964538, "grad_norm": 0.7165653109550476, "learning_rate": 5.974505956588877e-07, "loss": 0.5821, "step": 16491 }, { "epoch": 0.8475691232397985, "grad_norm": 1.0727893114089966, "learning_rate": 5.97056145089151e-07, "loss": 0.6604, "step": 16492 }, { "epoch": 0.8476205159831431, "grad_norm": 1.101873755455017, "learning_rate": 5.966618165068189e-07, "loss": 0.7053, "step": 16493 }, { "epoch": 0.8476719087264878, "grad_norm": 1.107740044593811, "learning_rate": 5.962676099228148e-07, "loss": 0.6712, "step": 16494 }, { "epoch": 0.8477233014698324, "grad_norm": 1.0503225326538086, "learning_rate": 5.958735253480647e-07, "loss": 0.6514, "step": 16495 }, { "epoch": 0.8477746942131771, "grad_norm": 1.180428147315979, "learning_rate": 5.954795627934818e-07, "loss": 0.696, "step": 16496 }, { "epoch": 0.8478260869565217, "grad_norm": 0.7621245384216309, "learning_rate": 5.950857222699852e-07, "loss": 0.6549, "step": 16497 }, { "epoch": 0.8478774796998664, "grad_norm": 1.0458861589431763, "learning_rate": 5.946920037884851e-07, "loss": 0.6516, "step": 16498 }, { "epoch": 0.847928872443211, "grad_norm": 0.7500308156013489, "learning_rate": 5.942984073598901e-07, "loss": 0.6519, "step": 16499 }, { "epoch": 0.8479802651865557, "grad_norm": 1.07485830783844, "learning_rate": 5.939049329951041e-07, "loss": 0.6915, "step": 16500 }, { "epoch": 0.8480316579299003, "grad_norm": 1.1496615409851074, "learning_rate": 5.935115807050301e-07, "loss": 0.7269, "step": 16501 }, { "epoch": 0.848083050673245, "grad_norm": 1.0851554870605469, "learning_rate": 5.931183505005661e-07, "loss": 0.6538, "step": 16502 }, { "epoch": 0.8481344434165896, "grad_norm": 1.1087366342544556, "learning_rate": 5.92725242392605e-07, "loss": 0.7092, "step": 16503 }, { "epoch": 0.8481858361599343, "grad_norm": 1.0834993124008179, "learning_rate": 5.923322563920409e-07, "loss": 0.6432, "step": 16504 }, { "epoch": 0.8482372289032789, "grad_norm": 1.0779434442520142, "learning_rate": 5.919393925097611e-07, "loss": 0.6554, "step": 16505 }, { "epoch": 0.8482886216466234, "grad_norm": 1.258431077003479, "learning_rate": 5.915466507566492e-07, "loss": 0.7716, "step": 16506 }, { "epoch": 0.8483400143899681, "grad_norm": 0.6868913769721985, "learning_rate": 5.911540311435859e-07, "loss": 0.651, "step": 16507 }, { "epoch": 0.8483914071333127, "grad_norm": 1.0884453058242798, "learning_rate": 5.907615336814527e-07, "loss": 0.668, "step": 16508 }, { "epoch": 0.8484427998766574, "grad_norm": 1.068174958229065, "learning_rate": 5.903691583811189e-07, "loss": 0.6714, "step": 16509 }, { "epoch": 0.848494192620002, "grad_norm": 1.0838615894317627, "learning_rate": 5.899769052534598e-07, "loss": 0.6706, "step": 16510 }, { "epoch": 0.8485455853633467, "grad_norm": 1.1418522596359253, "learning_rate": 5.895847743093414e-07, "loss": 0.7165, "step": 16511 }, { "epoch": 0.8485969781066913, "grad_norm": 1.1390329599380493, "learning_rate": 5.891927655596269e-07, "loss": 0.6505, "step": 16512 }, { "epoch": 0.848648370850036, "grad_norm": 1.1139330863952637, "learning_rate": 5.8880087901518e-07, "loss": 0.6799, "step": 16513 }, { "epoch": 0.8486997635933806, "grad_norm": 1.0549362897872925, "learning_rate": 5.884091146868565e-07, "loss": 0.6929, "step": 16514 }, { "epoch": 0.8487511563367253, "grad_norm": 1.1215424537658691, "learning_rate": 5.880174725855109e-07, "loss": 0.7369, "step": 16515 }, { "epoch": 0.8488025490800699, "grad_norm": 0.7168447971343994, "learning_rate": 5.87625952721993e-07, "loss": 0.6686, "step": 16516 }, { "epoch": 0.8488539418234146, "grad_norm": 0.6985307931900024, "learning_rate": 5.872345551071529e-07, "loss": 0.637, "step": 16517 }, { "epoch": 0.8489053345667592, "grad_norm": 1.109653115272522, "learning_rate": 5.868432797518314e-07, "loss": 0.7013, "step": 16518 }, { "epoch": 0.8489567273101039, "grad_norm": 1.08949875831604, "learning_rate": 5.86452126666871e-07, "loss": 0.7005, "step": 16519 }, { "epoch": 0.8490081200534485, "grad_norm": 1.080488681793213, "learning_rate": 5.86061095863108e-07, "loss": 0.6812, "step": 16520 }, { "epoch": 0.8490595127967931, "grad_norm": 1.1085983514785767, "learning_rate": 5.856701873513776e-07, "loss": 0.6932, "step": 16521 }, { "epoch": 0.8491109055401377, "grad_norm": 0.7504760026931763, "learning_rate": 5.852794011425094e-07, "loss": 0.6083, "step": 16522 }, { "epoch": 0.8491622982834823, "grad_norm": 1.0299628973007202, "learning_rate": 5.848887372473299e-07, "loss": 0.6972, "step": 16523 }, { "epoch": 0.849213691026827, "grad_norm": 1.0379050970077515, "learning_rate": 5.844981956766649e-07, "loss": 0.7466, "step": 16524 }, { "epoch": 0.8492650837701716, "grad_norm": 1.0614036321640015, "learning_rate": 5.841077764413317e-07, "loss": 0.7266, "step": 16525 }, { "epoch": 0.8493164765135163, "grad_norm": 0.8220763802528381, "learning_rate": 5.837174795521494e-07, "loss": 0.6375, "step": 16526 }, { "epoch": 0.8493678692568609, "grad_norm": 1.0771716833114624, "learning_rate": 5.833273050199306e-07, "loss": 0.7028, "step": 16527 }, { "epoch": 0.8494192620002056, "grad_norm": 1.1031699180603027, "learning_rate": 5.829372528554861e-07, "loss": 0.6758, "step": 16528 }, { "epoch": 0.8494706547435502, "grad_norm": 0.7094926238059998, "learning_rate": 5.82547323069621e-07, "loss": 0.6222, "step": 16529 }, { "epoch": 0.8495220474868949, "grad_norm": 1.102644681930542, "learning_rate": 5.821575156731407e-07, "loss": 0.6791, "step": 16530 }, { "epoch": 0.8495734402302395, "grad_norm": 1.1244462728500366, "learning_rate": 5.817678306768443e-07, "loss": 0.6972, "step": 16531 }, { "epoch": 0.8496248329735842, "grad_norm": 0.7482755780220032, "learning_rate": 5.813782680915287e-07, "loss": 0.641, "step": 16532 }, { "epoch": 0.8496762257169288, "grad_norm": 1.0783021450042725, "learning_rate": 5.809888279279863e-07, "loss": 0.6847, "step": 16533 }, { "epoch": 0.8497276184602734, "grad_norm": 1.063064455986023, "learning_rate": 5.805995101970063e-07, "loss": 0.6684, "step": 16534 }, { "epoch": 0.8497790112036181, "grad_norm": 1.0792648792266846, "learning_rate": 5.802103149093774e-07, "loss": 0.6628, "step": 16535 }, { "epoch": 0.8498304039469627, "grad_norm": 0.7009086608886719, "learning_rate": 5.798212420758798e-07, "loss": 0.7046, "step": 16536 }, { "epoch": 0.8498817966903073, "grad_norm": 1.1129181385040283, "learning_rate": 5.79432291707297e-07, "loss": 0.6975, "step": 16537 }, { "epoch": 0.8499331894336519, "grad_norm": 1.1756367683410645, "learning_rate": 5.790434638144005e-07, "loss": 0.6945, "step": 16538 }, { "epoch": 0.8499845821769966, "grad_norm": 1.1293703317642212, "learning_rate": 5.786547584079666e-07, "loss": 0.6768, "step": 16539 }, { "epoch": 0.8500359749203412, "grad_norm": 0.843086302280426, "learning_rate": 5.782661754987634e-07, "loss": 0.617, "step": 16540 }, { "epoch": 0.8500873676636859, "grad_norm": 1.1602028608322144, "learning_rate": 5.778777150975573e-07, "loss": 0.6618, "step": 16541 }, { "epoch": 0.8501387604070305, "grad_norm": 1.1112890243530273, "learning_rate": 5.774893772151097e-07, "loss": 0.7418, "step": 16542 }, { "epoch": 0.8501901531503752, "grad_norm": 1.0591968297958374, "learning_rate": 5.771011618621813e-07, "loss": 0.6521, "step": 16543 }, { "epoch": 0.8502415458937198, "grad_norm": 1.0433895587921143, "learning_rate": 5.767130690495281e-07, "loss": 0.6735, "step": 16544 }, { "epoch": 0.8502929386370645, "grad_norm": 1.1210577487945557, "learning_rate": 5.763250987879004e-07, "loss": 0.6845, "step": 16545 }, { "epoch": 0.8503443313804091, "grad_norm": 1.080849051475525, "learning_rate": 5.759372510880512e-07, "loss": 0.6186, "step": 16546 }, { "epoch": 0.8503957241237537, "grad_norm": 1.1271902322769165, "learning_rate": 5.755495259607219e-07, "loss": 0.7142, "step": 16547 }, { "epoch": 0.8504471168670984, "grad_norm": 1.0916759967803955, "learning_rate": 5.751619234166573e-07, "loss": 0.7257, "step": 16548 }, { "epoch": 0.850498509610443, "grad_norm": 0.9473386406898499, "learning_rate": 5.747744434665942e-07, "loss": 0.6341, "step": 16549 }, { "epoch": 0.8505499023537877, "grad_norm": 1.0855633020401, "learning_rate": 5.743870861212719e-07, "loss": 0.6276, "step": 16550 }, { "epoch": 0.8506012950971323, "grad_norm": 1.021533489227295, "learning_rate": 5.73999851391418e-07, "loss": 0.6438, "step": 16551 }, { "epoch": 0.8506526878404769, "grad_norm": 1.1036608219146729, "learning_rate": 5.736127392877644e-07, "loss": 0.7253, "step": 16552 }, { "epoch": 0.8507040805838215, "grad_norm": 1.1576470136642456, "learning_rate": 5.732257498210347e-07, "loss": 0.6909, "step": 16553 }, { "epoch": 0.8507554733271662, "grad_norm": 1.08005952835083, "learning_rate": 5.728388830019505e-07, "loss": 0.7271, "step": 16554 }, { "epoch": 0.8508068660705108, "grad_norm": 1.0497759580612183, "learning_rate": 5.724521388412319e-07, "loss": 0.6959, "step": 16555 }, { "epoch": 0.8508582588138555, "grad_norm": 1.0315431356430054, "learning_rate": 5.720655173495931e-07, "loss": 0.6368, "step": 16556 }, { "epoch": 0.8509096515572001, "grad_norm": 1.0845293998718262, "learning_rate": 5.716790185377464e-07, "loss": 0.7169, "step": 16557 }, { "epoch": 0.8509610443005448, "grad_norm": 0.82472163438797, "learning_rate": 5.712926424163978e-07, "loss": 0.6232, "step": 16558 }, { "epoch": 0.8510124370438894, "grad_norm": 1.0753751993179321, "learning_rate": 5.709063889962563e-07, "loss": 0.7009, "step": 16559 }, { "epoch": 0.851063829787234, "grad_norm": 1.1726080179214478, "learning_rate": 5.705202582880187e-07, "loss": 0.7508, "step": 16560 }, { "epoch": 0.8511152225305787, "grad_norm": 1.0945407152175903, "learning_rate": 5.701342503023865e-07, "loss": 0.6906, "step": 16561 }, { "epoch": 0.8511666152739233, "grad_norm": 1.2457739114761353, "learning_rate": 5.697483650500529e-07, "loss": 0.6486, "step": 16562 }, { "epoch": 0.851218008017268, "grad_norm": 1.0417375564575195, "learning_rate": 5.693626025417087e-07, "loss": 0.6945, "step": 16563 }, { "epoch": 0.8512694007606126, "grad_norm": 0.7327407002449036, "learning_rate": 5.689769627880442e-07, "loss": 0.6523, "step": 16564 }, { "epoch": 0.8513207935039573, "grad_norm": 1.0250177383422852, "learning_rate": 5.685914457997405e-07, "loss": 0.7026, "step": 16565 }, { "epoch": 0.8513721862473019, "grad_norm": 1.1260474920272827, "learning_rate": 5.682060515874833e-07, "loss": 0.7034, "step": 16566 }, { "epoch": 0.8514235789906465, "grad_norm": 1.1305336952209473, "learning_rate": 5.678207801619445e-07, "loss": 0.7121, "step": 16567 }, { "epoch": 0.8514749717339911, "grad_norm": 1.0871243476867676, "learning_rate": 5.674356315338031e-07, "loss": 0.6933, "step": 16568 }, { "epoch": 0.8515263644773358, "grad_norm": 1.0837122201919556, "learning_rate": 5.670506057137282e-07, "loss": 0.6857, "step": 16569 }, { "epoch": 0.8515777572206804, "grad_norm": 1.1311004161834717, "learning_rate": 5.666657027123868e-07, "loss": 0.7232, "step": 16570 }, { "epoch": 0.8516291499640251, "grad_norm": 1.0921376943588257, "learning_rate": 5.66280922540443e-07, "loss": 0.7059, "step": 16571 }, { "epoch": 0.8516805427073697, "grad_norm": 1.0684804916381836, "learning_rate": 5.658962652085586e-07, "loss": 0.7173, "step": 16572 }, { "epoch": 0.8517319354507144, "grad_norm": 1.148685336112976, "learning_rate": 5.655117307273905e-07, "loss": 0.7143, "step": 16573 }, { "epoch": 0.851783328194059, "grad_norm": 1.1506116390228271, "learning_rate": 5.651273191075912e-07, "loss": 0.7358, "step": 16574 }, { "epoch": 0.8518347209374036, "grad_norm": 0.7286731600761414, "learning_rate": 5.647430303598145e-07, "loss": 0.6468, "step": 16575 }, { "epoch": 0.8518861136807483, "grad_norm": 1.1082874536514282, "learning_rate": 5.643588644947029e-07, "loss": 0.7421, "step": 16576 }, { "epoch": 0.8519375064240929, "grad_norm": 1.151232361793518, "learning_rate": 5.639748215229035e-07, "loss": 0.7636, "step": 16577 }, { "epoch": 0.8519888991674376, "grad_norm": 0.7597556114196777, "learning_rate": 5.63590901455055e-07, "loss": 0.6994, "step": 16578 }, { "epoch": 0.8520402919107822, "grad_norm": 1.140904426574707, "learning_rate": 5.632071043017951e-07, "loss": 0.7032, "step": 16579 }, { "epoch": 0.8520916846541269, "grad_norm": 1.0783642530441284, "learning_rate": 5.628234300737556e-07, "loss": 0.6932, "step": 16580 }, { "epoch": 0.8521430773974715, "grad_norm": 1.0700945854187012, "learning_rate": 5.624398787815688e-07, "loss": 0.6619, "step": 16581 }, { "epoch": 0.8521944701408161, "grad_norm": 1.1012630462646484, "learning_rate": 5.620564504358599e-07, "loss": 0.739, "step": 16582 }, { "epoch": 0.8522458628841607, "grad_norm": 1.0722440481185913, "learning_rate": 5.61673145047253e-07, "loss": 0.679, "step": 16583 }, { "epoch": 0.8522972556275054, "grad_norm": 0.8380346894264221, "learning_rate": 5.612899626263657e-07, "loss": 0.6816, "step": 16584 }, { "epoch": 0.85234864837085, "grad_norm": 1.1677442789077759, "learning_rate": 5.609069031838176e-07, "loss": 0.7221, "step": 16585 }, { "epoch": 0.8524000411141947, "grad_norm": 1.1117154359817505, "learning_rate": 5.605239667302198e-07, "loss": 0.6751, "step": 16586 }, { "epoch": 0.8524514338575393, "grad_norm": 1.1337196826934814, "learning_rate": 5.601411532761814e-07, "loss": 0.6848, "step": 16587 }, { "epoch": 0.852502826600884, "grad_norm": 0.7420323491096497, "learning_rate": 5.597584628323116e-07, "loss": 0.6952, "step": 16588 }, { "epoch": 0.8525542193442286, "grad_norm": 1.1757662296295166, "learning_rate": 5.593758954092088e-07, "loss": 0.6498, "step": 16589 }, { "epoch": 0.8526056120875732, "grad_norm": 1.0438846349716187, "learning_rate": 5.589934510174755e-07, "loss": 0.6772, "step": 16590 }, { "epoch": 0.8526570048309179, "grad_norm": 1.0811257362365723, "learning_rate": 5.586111296677071e-07, "loss": 0.7021, "step": 16591 }, { "epoch": 0.8527083975742625, "grad_norm": 1.133449912071228, "learning_rate": 5.582289313704953e-07, "loss": 0.6771, "step": 16592 }, { "epoch": 0.8527597903176072, "grad_norm": 1.09807550907135, "learning_rate": 5.578468561364292e-07, "loss": 0.7462, "step": 16593 }, { "epoch": 0.8528111830609518, "grad_norm": 1.0715973377227783, "learning_rate": 5.574649039760955e-07, "loss": 0.7161, "step": 16594 }, { "epoch": 0.8528625758042965, "grad_norm": 1.092837929725647, "learning_rate": 5.570830749000766e-07, "loss": 0.6549, "step": 16595 }, { "epoch": 0.8529139685476411, "grad_norm": 1.0895129442214966, "learning_rate": 5.567013689189493e-07, "loss": 0.6543, "step": 16596 }, { "epoch": 0.8529653612909857, "grad_norm": 1.0654332637786865, "learning_rate": 5.563197860432917e-07, "loss": 0.7447, "step": 16597 }, { "epoch": 0.8530167540343303, "grad_norm": 1.063740611076355, "learning_rate": 5.559383262836754e-07, "loss": 0.7001, "step": 16598 }, { "epoch": 0.853068146777675, "grad_norm": 1.0838167667388916, "learning_rate": 5.555569896506679e-07, "loss": 0.648, "step": 16599 }, { "epoch": 0.8531195395210196, "grad_norm": 1.110093355178833, "learning_rate": 5.551757761548343e-07, "loss": 0.7094, "step": 16600 }, { "epoch": 0.8531709322643642, "grad_norm": 1.1284714937210083, "learning_rate": 5.547946858067393e-07, "loss": 0.6962, "step": 16601 }, { "epoch": 0.8532223250077089, "grad_norm": 1.098952293395996, "learning_rate": 5.544137186169373e-07, "loss": 0.6761, "step": 16602 }, { "epoch": 0.8532737177510535, "grad_norm": 1.1246854066848755, "learning_rate": 5.540328745959866e-07, "loss": 0.6712, "step": 16603 }, { "epoch": 0.8533251104943982, "grad_norm": 1.0508147478103638, "learning_rate": 5.536521537544371e-07, "loss": 0.7032, "step": 16604 }, { "epoch": 0.8533765032377428, "grad_norm": 1.0919137001037598, "learning_rate": 5.532715561028363e-07, "loss": 0.7274, "step": 16605 }, { "epoch": 0.8534278959810875, "grad_norm": 1.107042670249939, "learning_rate": 5.528910816517314e-07, "loss": 0.6867, "step": 16606 }, { "epoch": 0.8534792887244321, "grad_norm": 0.813794732093811, "learning_rate": 5.525107304116622e-07, "loss": 0.671, "step": 16607 }, { "epoch": 0.8535306814677768, "grad_norm": 1.0404741764068604, "learning_rate": 5.521305023931673e-07, "loss": 0.6457, "step": 16608 }, { "epoch": 0.8535820742111214, "grad_norm": 0.6791060566902161, "learning_rate": 5.517503976067801e-07, "loss": 0.6318, "step": 16609 }, { "epoch": 0.8536334669544661, "grad_norm": 1.0482901334762573, "learning_rate": 5.513704160630339e-07, "loss": 0.6512, "step": 16610 }, { "epoch": 0.8536848596978107, "grad_norm": 1.09709632396698, "learning_rate": 5.509905577724533e-07, "loss": 0.672, "step": 16611 }, { "epoch": 0.8537362524411554, "grad_norm": 1.1396543979644775, "learning_rate": 5.506108227455659e-07, "loss": 0.6814, "step": 16612 }, { "epoch": 0.8537876451844999, "grad_norm": 1.0864802598953247, "learning_rate": 5.502312109928892e-07, "loss": 0.688, "step": 16613 }, { "epoch": 0.8538390379278445, "grad_norm": 1.0504721403121948, "learning_rate": 5.498517225249439e-07, "loss": 0.7082, "step": 16614 }, { "epoch": 0.8538904306711892, "grad_norm": 1.0559278726577759, "learning_rate": 5.494723573522432e-07, "loss": 0.6786, "step": 16615 }, { "epoch": 0.8539418234145338, "grad_norm": 1.146001935005188, "learning_rate": 5.490931154852952e-07, "loss": 0.6969, "step": 16616 }, { "epoch": 0.8539932161578785, "grad_norm": 1.1648839712142944, "learning_rate": 5.487139969346122e-07, "loss": 0.6706, "step": 16617 }, { "epoch": 0.8540446089012231, "grad_norm": 1.0918409824371338, "learning_rate": 5.483350017106925e-07, "loss": 0.7036, "step": 16618 }, { "epoch": 0.8540960016445678, "grad_norm": 1.14625084400177, "learning_rate": 5.479561298240399e-07, "loss": 0.7293, "step": 16619 }, { "epoch": 0.8541473943879124, "grad_norm": 1.0785630941390991, "learning_rate": 5.475773812851503e-07, "loss": 0.6498, "step": 16620 }, { "epoch": 0.8541987871312571, "grad_norm": 1.0916883945465088, "learning_rate": 5.471987561045178e-07, "loss": 0.6678, "step": 16621 }, { "epoch": 0.8542501798746017, "grad_norm": 1.105644702911377, "learning_rate": 5.468202542926309e-07, "loss": 0.6719, "step": 16622 }, { "epoch": 0.8543015726179464, "grad_norm": 0.7795040607452393, "learning_rate": 5.464418758599787e-07, "loss": 0.6497, "step": 16623 }, { "epoch": 0.854352965361291, "grad_norm": 1.072161078453064, "learning_rate": 5.460636208170433e-07, "loss": 0.7423, "step": 16624 }, { "epoch": 0.8544043581046357, "grad_norm": 1.0713341236114502, "learning_rate": 5.456854891743035e-07, "loss": 0.6957, "step": 16625 }, { "epoch": 0.8544557508479803, "grad_norm": 1.0741219520568848, "learning_rate": 5.453074809422393e-07, "loss": 0.7021, "step": 16626 }, { "epoch": 0.854507143591325, "grad_norm": 0.7816008925437927, "learning_rate": 5.449295961313189e-07, "loss": 0.6245, "step": 16627 }, { "epoch": 0.8545585363346695, "grad_norm": 1.1115132570266724, "learning_rate": 5.445518347520157e-07, "loss": 0.7324, "step": 16628 }, { "epoch": 0.8546099290780141, "grad_norm": 1.1087119579315186, "learning_rate": 5.441741968147934e-07, "loss": 0.6728, "step": 16629 }, { "epoch": 0.8546613218213588, "grad_norm": 1.0497866868972778, "learning_rate": 5.437966823301183e-07, "loss": 0.6685, "step": 16630 }, { "epoch": 0.8547127145647034, "grad_norm": 1.119650959968567, "learning_rate": 5.434192913084452e-07, "loss": 0.6763, "step": 16631 }, { "epoch": 0.8547641073080481, "grad_norm": 1.067808985710144, "learning_rate": 5.430420237602336e-07, "loss": 0.6199, "step": 16632 }, { "epoch": 0.8548155000513927, "grad_norm": 1.0712556838989258, "learning_rate": 5.426648796959344e-07, "loss": 0.659, "step": 16633 }, { "epoch": 0.8548668927947374, "grad_norm": 0.8623567819595337, "learning_rate": 5.422878591259972e-07, "loss": 0.6347, "step": 16634 }, { "epoch": 0.854918285538082, "grad_norm": 1.0654454231262207, "learning_rate": 5.419109620608664e-07, "loss": 0.674, "step": 16635 }, { "epoch": 0.8549696782814267, "grad_norm": 0.6622124910354614, "learning_rate": 5.415341885109864e-07, "loss": 0.6255, "step": 16636 }, { "epoch": 0.8550210710247713, "grad_norm": 0.7096377611160278, "learning_rate": 5.411575384867957e-07, "loss": 0.6116, "step": 16637 }, { "epoch": 0.855072463768116, "grad_norm": 0.8410235047340393, "learning_rate": 5.407810119987272e-07, "loss": 0.6101, "step": 16638 }, { "epoch": 0.8551238565114606, "grad_norm": 1.1687554121017456, "learning_rate": 5.404046090572173e-07, "loss": 0.718, "step": 16639 }, { "epoch": 0.8551752492548053, "grad_norm": 0.7542539834976196, "learning_rate": 5.400283296726899e-07, "loss": 0.6332, "step": 16640 }, { "epoch": 0.8552266419981499, "grad_norm": 1.1134101152420044, "learning_rate": 5.396521738555732e-07, "loss": 0.7243, "step": 16641 }, { "epoch": 0.8552780347414946, "grad_norm": 1.123837947845459, "learning_rate": 5.392761416162879e-07, "loss": 0.6416, "step": 16642 }, { "epoch": 0.8553294274848391, "grad_norm": 1.086321473121643, "learning_rate": 5.389002329652521e-07, "loss": 0.6779, "step": 16643 }, { "epoch": 0.8553808202281837, "grad_norm": 1.0343292951583862, "learning_rate": 5.385244479128804e-07, "loss": 0.669, "step": 16644 }, { "epoch": 0.8554322129715284, "grad_norm": 1.0483826398849487, "learning_rate": 5.381487864695856e-07, "loss": 0.7161, "step": 16645 }, { "epoch": 0.855483605714873, "grad_norm": 1.0882412195205688, "learning_rate": 5.377732486457754e-07, "loss": 0.6996, "step": 16646 }, { "epoch": 0.8555349984582177, "grad_norm": 1.2087448835372925, "learning_rate": 5.373978344518527e-07, "loss": 0.7191, "step": 16647 }, { "epoch": 0.8555863912015623, "grad_norm": 1.1023222208023071, "learning_rate": 5.370225438982208e-07, "loss": 0.6971, "step": 16648 }, { "epoch": 0.855637783944907, "grad_norm": 1.0749213695526123, "learning_rate": 5.366473769952768e-07, "loss": 0.6856, "step": 16649 }, { "epoch": 0.8556891766882516, "grad_norm": 0.7783789038658142, "learning_rate": 5.362723337534148e-07, "loss": 0.6804, "step": 16650 }, { "epoch": 0.8557405694315963, "grad_norm": 1.0635075569152832, "learning_rate": 5.358974141830242e-07, "loss": 0.6816, "step": 16651 }, { "epoch": 0.8557919621749409, "grad_norm": 1.1363420486450195, "learning_rate": 5.355226182944967e-07, "loss": 0.7324, "step": 16652 }, { "epoch": 0.8558433549182856, "grad_norm": 1.0363080501556396, "learning_rate": 5.351479460982112e-07, "loss": 0.6409, "step": 16653 }, { "epoch": 0.8558947476616302, "grad_norm": 1.0236976146697998, "learning_rate": 5.347733976045522e-07, "loss": 0.6776, "step": 16654 }, { "epoch": 0.8559461404049749, "grad_norm": 1.1086493730545044, "learning_rate": 5.343989728238947e-07, "loss": 0.6687, "step": 16655 }, { "epoch": 0.8559975331483195, "grad_norm": 0.7606008648872375, "learning_rate": 5.340246717666131e-07, "loss": 0.662, "step": 16656 }, { "epoch": 0.8560489258916641, "grad_norm": 1.1598230600357056, "learning_rate": 5.336504944430781e-07, "loss": 0.725, "step": 16657 }, { "epoch": 0.8561003186350087, "grad_norm": 1.1804630756378174, "learning_rate": 5.33276440863657e-07, "loss": 0.6857, "step": 16658 }, { "epoch": 0.8561517113783533, "grad_norm": 1.0253392457962036, "learning_rate": 5.329025110387121e-07, "loss": 0.6515, "step": 16659 }, { "epoch": 0.856203104121698, "grad_norm": 1.1216741800308228, "learning_rate": 5.325287049786032e-07, "loss": 0.7206, "step": 16660 }, { "epoch": 0.8562544968650426, "grad_norm": 1.1707128286361694, "learning_rate": 5.321550226936889e-07, "loss": 0.7118, "step": 16661 }, { "epoch": 0.8563058896083873, "grad_norm": 1.0712249279022217, "learning_rate": 5.317814641943208e-07, "loss": 0.6598, "step": 16662 }, { "epoch": 0.8563572823517319, "grad_norm": 1.0757219791412354, "learning_rate": 5.314080294908497e-07, "loss": 0.7312, "step": 16663 }, { "epoch": 0.8564086750950766, "grad_norm": 1.1548324823379517, "learning_rate": 5.310347185936199e-07, "loss": 0.7441, "step": 16664 }, { "epoch": 0.8564600678384212, "grad_norm": 1.1156595945358276, "learning_rate": 5.306615315129765e-07, "loss": 0.7264, "step": 16665 }, { "epoch": 0.8565114605817659, "grad_norm": 1.0754835605621338, "learning_rate": 5.302884682592585e-07, "loss": 0.7197, "step": 16666 }, { "epoch": 0.8565628533251105, "grad_norm": 1.0774306058883667, "learning_rate": 5.299155288428004e-07, "loss": 0.7065, "step": 16667 }, { "epoch": 0.8566142460684552, "grad_norm": 1.128065586090088, "learning_rate": 5.295427132739384e-07, "loss": 0.7355, "step": 16668 }, { "epoch": 0.8566656388117998, "grad_norm": 1.1027228832244873, "learning_rate": 5.291700215629969e-07, "loss": 0.6763, "step": 16669 }, { "epoch": 0.8567170315551444, "grad_norm": 1.0568816661834717, "learning_rate": 5.287974537203056e-07, "loss": 0.6639, "step": 16670 }, { "epoch": 0.8567684242984891, "grad_norm": 1.1418324708938599, "learning_rate": 5.284250097561849e-07, "loss": 0.6881, "step": 16671 }, { "epoch": 0.8568198170418337, "grad_norm": 1.0666179656982422, "learning_rate": 5.280526896809545e-07, "loss": 0.7143, "step": 16672 }, { "epoch": 0.8568712097851783, "grad_norm": 1.1284116506576538, "learning_rate": 5.276804935049279e-07, "loss": 0.7045, "step": 16673 }, { "epoch": 0.8569226025285229, "grad_norm": 1.3307828903198242, "learning_rate": 5.273084212384199e-07, "loss": 0.6912, "step": 16674 }, { "epoch": 0.8569739952718676, "grad_norm": 1.1961811780929565, "learning_rate": 5.269364728917381e-07, "loss": 0.6717, "step": 16675 }, { "epoch": 0.8570253880152122, "grad_norm": 1.0898549556732178, "learning_rate": 5.265646484751857e-07, "loss": 0.6774, "step": 16676 }, { "epoch": 0.8570767807585569, "grad_norm": 1.1135098934173584, "learning_rate": 5.261929479990679e-07, "loss": 0.7188, "step": 16677 }, { "epoch": 0.8571281735019015, "grad_norm": 1.0735715627670288, "learning_rate": 5.25821371473681e-07, "loss": 0.6889, "step": 16678 }, { "epoch": 0.8571795662452462, "grad_norm": 1.0151816606521606, "learning_rate": 5.254499189093198e-07, "loss": 0.6786, "step": 16679 }, { "epoch": 0.8572309589885908, "grad_norm": 1.1660007238388062, "learning_rate": 5.250785903162747e-07, "loss": 0.7259, "step": 16680 }, { "epoch": 0.8572823517319355, "grad_norm": 1.1349910497665405, "learning_rate": 5.247073857048373e-07, "loss": 0.6512, "step": 16681 }, { "epoch": 0.8573337444752801, "grad_norm": 1.0447020530700684, "learning_rate": 5.243363050852878e-07, "loss": 0.6788, "step": 16682 }, { "epoch": 0.8573851372186247, "grad_norm": 1.1052254438400269, "learning_rate": 5.239653484679103e-07, "loss": 0.6984, "step": 16683 }, { "epoch": 0.8574365299619694, "grad_norm": 0.7357926368713379, "learning_rate": 5.235945158629818e-07, "loss": 0.6401, "step": 16684 }, { "epoch": 0.857487922705314, "grad_norm": 1.1499524116516113, "learning_rate": 5.232238072807761e-07, "loss": 0.7138, "step": 16685 }, { "epoch": 0.8575393154486587, "grad_norm": 1.1138731241226196, "learning_rate": 5.228532227315625e-07, "loss": 0.6717, "step": 16686 }, { "epoch": 0.8575907081920033, "grad_norm": 1.1965906620025635, "learning_rate": 5.224827622256118e-07, "loss": 0.6934, "step": 16687 }, { "epoch": 0.857642100935348, "grad_norm": 1.103754997253418, "learning_rate": 5.221124257731858e-07, "loss": 0.6486, "step": 16688 }, { "epoch": 0.8576934936786925, "grad_norm": 1.1095292568206787, "learning_rate": 5.21742213384544e-07, "loss": 0.6947, "step": 16689 }, { "epoch": 0.8577448864220372, "grad_norm": 1.1932556629180908, "learning_rate": 5.213721250699466e-07, "loss": 0.6911, "step": 16690 }, { "epoch": 0.8577962791653818, "grad_norm": 1.0708427429199219, "learning_rate": 5.210021608396449e-07, "loss": 0.7525, "step": 16691 }, { "epoch": 0.8578476719087265, "grad_norm": 0.7635583877563477, "learning_rate": 5.206323207038893e-07, "loss": 0.6695, "step": 16692 }, { "epoch": 0.8578990646520711, "grad_norm": 1.1842479705810547, "learning_rate": 5.202626046729265e-07, "loss": 0.6687, "step": 16693 }, { "epoch": 0.8579504573954158, "grad_norm": 1.0723521709442139, "learning_rate": 5.198930127570018e-07, "loss": 0.6747, "step": 16694 }, { "epoch": 0.8580018501387604, "grad_norm": 1.1177728176116943, "learning_rate": 5.19523544966351e-07, "loss": 0.7236, "step": 16695 }, { "epoch": 0.858053242882105, "grad_norm": 1.0427969694137573, "learning_rate": 5.191542013112144e-07, "loss": 0.6475, "step": 16696 }, { "epoch": 0.8581046356254497, "grad_norm": 1.0824629068374634, "learning_rate": 5.187849818018237e-07, "loss": 0.7029, "step": 16697 }, { "epoch": 0.8581560283687943, "grad_norm": 1.159592628479004, "learning_rate": 5.184158864484068e-07, "loss": 0.6888, "step": 16698 }, { "epoch": 0.858207421112139, "grad_norm": 1.1469663381576538, "learning_rate": 5.180469152611928e-07, "loss": 0.7269, "step": 16699 }, { "epoch": 0.8582588138554836, "grad_norm": 1.1179442405700684, "learning_rate": 5.176780682504023e-07, "loss": 0.7206, "step": 16700 }, { "epoch": 0.8583102065988283, "grad_norm": 1.0898016691207886, "learning_rate": 5.173093454262557e-07, "loss": 0.6476, "step": 16701 }, { "epoch": 0.8583615993421729, "grad_norm": 0.6671383380889893, "learning_rate": 5.169407467989668e-07, "loss": 0.6461, "step": 16702 }, { "epoch": 0.8584129920855176, "grad_norm": 1.044257640838623, "learning_rate": 5.165722723787514e-07, "loss": 0.7014, "step": 16703 }, { "epoch": 0.8584643848288621, "grad_norm": 1.1633086204528809, "learning_rate": 5.162039221758141e-07, "loss": 0.7519, "step": 16704 }, { "epoch": 0.8585157775722068, "grad_norm": 1.120275616645813, "learning_rate": 5.158356962003635e-07, "loss": 0.7061, "step": 16705 }, { "epoch": 0.8585671703155514, "grad_norm": 1.105878233909607, "learning_rate": 5.154675944626003e-07, "loss": 0.6624, "step": 16706 }, { "epoch": 0.8586185630588961, "grad_norm": 1.1321300268173218, "learning_rate": 5.15099616972724e-07, "loss": 0.7271, "step": 16707 }, { "epoch": 0.8586699558022407, "grad_norm": 1.064968228340149, "learning_rate": 5.147317637409294e-07, "loss": 0.671, "step": 16708 }, { "epoch": 0.8587213485455854, "grad_norm": 0.7342528104782104, "learning_rate": 5.14364034777407e-07, "loss": 0.638, "step": 16709 }, { "epoch": 0.85877274128893, "grad_norm": 1.0235021114349365, "learning_rate": 5.139964300923478e-07, "loss": 0.6852, "step": 16710 }, { "epoch": 0.8588241340322746, "grad_norm": 1.071006417274475, "learning_rate": 5.136289496959329e-07, "loss": 0.6826, "step": 16711 }, { "epoch": 0.8588755267756193, "grad_norm": 1.2732007503509521, "learning_rate": 5.132615935983471e-07, "loss": 0.7183, "step": 16712 }, { "epoch": 0.8589269195189639, "grad_norm": 1.1122158765792847, "learning_rate": 5.128943618097664e-07, "loss": 0.6739, "step": 16713 }, { "epoch": 0.8589783122623086, "grad_norm": 1.1001310348510742, "learning_rate": 5.125272543403658e-07, "loss": 0.6661, "step": 16714 }, { "epoch": 0.8590297050056532, "grad_norm": 1.1717740297317505, "learning_rate": 5.12160271200316e-07, "loss": 0.7329, "step": 16715 }, { "epoch": 0.8590810977489979, "grad_norm": 1.100318431854248, "learning_rate": 5.117934123997853e-07, "loss": 0.684, "step": 16716 }, { "epoch": 0.8591324904923425, "grad_norm": 1.121384620666504, "learning_rate": 5.114266779489375e-07, "loss": 0.7575, "step": 16717 }, { "epoch": 0.8591838832356872, "grad_norm": 1.1172490119934082, "learning_rate": 5.11060067857932e-07, "loss": 0.7201, "step": 16718 }, { "epoch": 0.8592352759790317, "grad_norm": 1.1001996994018555, "learning_rate": 5.106935821369291e-07, "loss": 0.7118, "step": 16719 }, { "epoch": 0.8592866687223764, "grad_norm": 1.110534429550171, "learning_rate": 5.103272207960791e-07, "loss": 0.6584, "step": 16720 }, { "epoch": 0.859338061465721, "grad_norm": 1.066957712173462, "learning_rate": 5.09960983845535e-07, "loss": 0.65, "step": 16721 }, { "epoch": 0.8593894542090657, "grad_norm": 0.7040334343910217, "learning_rate": 5.095948712954418e-07, "loss": 0.6257, "step": 16722 }, { "epoch": 0.8594408469524103, "grad_norm": 1.029909372329712, "learning_rate": 5.092288831559455e-07, "loss": 0.66, "step": 16723 }, { "epoch": 0.859492239695755, "grad_norm": 1.3412113189697266, "learning_rate": 5.088630194371829e-07, "loss": 0.6876, "step": 16724 }, { "epoch": 0.8595436324390996, "grad_norm": 1.1116526126861572, "learning_rate": 5.084972801492932e-07, "loss": 0.6487, "step": 16725 }, { "epoch": 0.8595950251824442, "grad_norm": 1.0862735509872437, "learning_rate": 5.081316653024088e-07, "loss": 0.6232, "step": 16726 }, { "epoch": 0.8596464179257889, "grad_norm": 1.088112473487854, "learning_rate": 5.077661749066575e-07, "loss": 0.6921, "step": 16727 }, { "epoch": 0.8596978106691335, "grad_norm": 1.1253446340560913, "learning_rate": 5.074008089721683e-07, "loss": 0.676, "step": 16728 }, { "epoch": 0.8597492034124782, "grad_norm": 1.0455559492111206, "learning_rate": 5.070355675090632e-07, "loss": 0.6144, "step": 16729 }, { "epoch": 0.8598005961558228, "grad_norm": 1.0786539316177368, "learning_rate": 5.066704505274606e-07, "loss": 0.659, "step": 16730 }, { "epoch": 0.8598519888991675, "grad_norm": 1.0298736095428467, "learning_rate": 5.063054580374766e-07, "loss": 0.7549, "step": 16731 }, { "epoch": 0.8599033816425121, "grad_norm": 1.0899608135223389, "learning_rate": 5.059405900492254e-07, "loss": 0.7428, "step": 16732 }, { "epoch": 0.8599547743858568, "grad_norm": 1.1084903478622437, "learning_rate": 5.055758465728128e-07, "loss": 0.6971, "step": 16733 }, { "epoch": 0.8600061671292013, "grad_norm": 1.1381936073303223, "learning_rate": 5.052112276183469e-07, "loss": 0.6827, "step": 16734 }, { "epoch": 0.860057559872546, "grad_norm": 1.0742419958114624, "learning_rate": 5.048467331959294e-07, "loss": 0.7003, "step": 16735 }, { "epoch": 0.8601089526158906, "grad_norm": 1.0268282890319824, "learning_rate": 5.044823633156581e-07, "loss": 0.6808, "step": 16736 }, { "epoch": 0.8601603453592352, "grad_norm": 1.0812846422195435, "learning_rate": 5.041181179876275e-07, "loss": 0.7411, "step": 16737 }, { "epoch": 0.8602117381025799, "grad_norm": 0.6776163578033447, "learning_rate": 5.037539972219319e-07, "loss": 0.6491, "step": 16738 }, { "epoch": 0.8602631308459245, "grad_norm": 1.1219476461410522, "learning_rate": 5.033900010286579e-07, "loss": 0.6753, "step": 16739 }, { "epoch": 0.8603145235892692, "grad_norm": 1.073449730873108, "learning_rate": 5.030261294178895e-07, "loss": 0.7098, "step": 16740 }, { "epoch": 0.8603659163326138, "grad_norm": 1.0801972150802612, "learning_rate": 5.026623823997107e-07, "loss": 0.7147, "step": 16741 }, { "epoch": 0.8604173090759585, "grad_norm": 0.7268709540367126, "learning_rate": 5.022987599841972e-07, "loss": 0.6277, "step": 16742 }, { "epoch": 0.8604687018193031, "grad_norm": 1.1501014232635498, "learning_rate": 5.019352621814244e-07, "loss": 0.6576, "step": 16743 }, { "epoch": 0.8605200945626478, "grad_norm": 1.0481343269348145, "learning_rate": 5.015718890014621e-07, "loss": 0.6887, "step": 16744 }, { "epoch": 0.8605714873059924, "grad_norm": 1.1956849098205566, "learning_rate": 5.012086404543809e-07, "loss": 0.6988, "step": 16745 }, { "epoch": 0.8606228800493371, "grad_norm": 1.1003162860870361, "learning_rate": 5.008455165502407e-07, "loss": 0.6321, "step": 16746 }, { "epoch": 0.8606742727926817, "grad_norm": 1.1154745817184448, "learning_rate": 5.004825172991051e-07, "loss": 0.691, "step": 16747 }, { "epoch": 0.8607256655360264, "grad_norm": 1.1095490455627441, "learning_rate": 5.001196427110306e-07, "loss": 0.7082, "step": 16748 }, { "epoch": 0.8607770582793709, "grad_norm": 1.1589680910110474, "learning_rate": 4.9975689279607e-07, "loss": 0.7155, "step": 16749 }, { "epoch": 0.8608284510227155, "grad_norm": 1.1479979753494263, "learning_rate": 4.99394267564276e-07, "loss": 0.6979, "step": 16750 }, { "epoch": 0.8608798437660602, "grad_norm": 0.7034879326820374, "learning_rate": 4.990317670256933e-07, "loss": 0.5978, "step": 16751 }, { "epoch": 0.8609312365094048, "grad_norm": 0.723787248134613, "learning_rate": 4.986693911903661e-07, "loss": 0.6805, "step": 16752 }, { "epoch": 0.8609826292527495, "grad_norm": 1.1227329969406128, "learning_rate": 4.983071400683331e-07, "loss": 0.6883, "step": 16753 }, { "epoch": 0.8610340219960941, "grad_norm": 1.050891637802124, "learning_rate": 4.979450136696329e-07, "loss": 0.6655, "step": 16754 }, { "epoch": 0.8610854147394388, "grad_norm": 1.0858683586120605, "learning_rate": 4.97583012004298e-07, "loss": 0.6742, "step": 16755 }, { "epoch": 0.8611368074827834, "grad_norm": 1.113417387008667, "learning_rate": 4.972211350823569e-07, "loss": 0.6589, "step": 16756 }, { "epoch": 0.8611882002261281, "grad_norm": 1.0991747379302979, "learning_rate": 4.968593829138352e-07, "loss": 0.6365, "step": 16757 }, { "epoch": 0.8612395929694727, "grad_norm": 1.1093100309371948, "learning_rate": 4.96497755508758e-07, "loss": 0.7659, "step": 16758 }, { "epoch": 0.8612909857128174, "grad_norm": 0.6461854577064514, "learning_rate": 4.96136252877143e-07, "loss": 0.6095, "step": 16759 }, { "epoch": 0.861342378456162, "grad_norm": 1.1379939317703247, "learning_rate": 4.957748750290047e-07, "loss": 0.6798, "step": 16760 }, { "epoch": 0.8613937711995067, "grad_norm": 1.0954571962356567, "learning_rate": 4.954136219743594e-07, "loss": 0.684, "step": 16761 }, { "epoch": 0.8614451639428513, "grad_norm": 1.130601406097412, "learning_rate": 4.95052493723211e-07, "loss": 0.6352, "step": 16762 }, { "epoch": 0.861496556686196, "grad_norm": 1.0393316745758057, "learning_rate": 4.946914902855682e-07, "loss": 0.6575, "step": 16763 }, { "epoch": 0.8615479494295405, "grad_norm": 1.0717134475708008, "learning_rate": 4.94330611671432e-07, "loss": 0.7174, "step": 16764 }, { "epoch": 0.8615993421728851, "grad_norm": 0.9992402791976929, "learning_rate": 4.939698578908003e-07, "loss": 0.6666, "step": 16765 }, { "epoch": 0.8616507349162298, "grad_norm": 1.1043227910995483, "learning_rate": 4.936092289536682e-07, "loss": 0.6913, "step": 16766 }, { "epoch": 0.8617021276595744, "grad_norm": 1.1066546440124512, "learning_rate": 4.932487248700279e-07, "loss": 0.7, "step": 16767 }, { "epoch": 0.8617535204029191, "grad_norm": 1.0850659608840942, "learning_rate": 4.928883456498679e-07, "loss": 0.6618, "step": 16768 }, { "epoch": 0.8618049131462637, "grad_norm": 1.1021714210510254, "learning_rate": 4.925280913031704e-07, "loss": 0.7284, "step": 16769 }, { "epoch": 0.8618563058896084, "grad_norm": 1.115631103515625, "learning_rate": 4.921679618399199e-07, "loss": 0.6663, "step": 16770 }, { "epoch": 0.861907698632953, "grad_norm": 1.1310640573501587, "learning_rate": 4.918079572700923e-07, "loss": 0.6884, "step": 16771 }, { "epoch": 0.8619590913762977, "grad_norm": 1.0832988023757935, "learning_rate": 4.914480776036617e-07, "loss": 0.6939, "step": 16772 }, { "epoch": 0.8620104841196423, "grad_norm": 1.234472632408142, "learning_rate": 4.91088322850598e-07, "loss": 0.7179, "step": 16773 }, { "epoch": 0.862061876862987, "grad_norm": 1.0742464065551758, "learning_rate": 4.907286930208721e-07, "loss": 0.6987, "step": 16774 }, { "epoch": 0.8621132696063316, "grad_norm": 1.0897985696792603, "learning_rate": 4.903691881244438e-07, "loss": 0.7024, "step": 16775 }, { "epoch": 0.8621646623496763, "grad_norm": 1.197669506072998, "learning_rate": 4.900098081712756e-07, "loss": 0.6767, "step": 16776 }, { "epoch": 0.8622160550930209, "grad_norm": 0.6522785425186157, "learning_rate": 4.896505531713241e-07, "loss": 0.6178, "step": 16777 }, { "epoch": 0.8622674478363656, "grad_norm": 1.1411634683609009, "learning_rate": 4.892914231345414e-07, "loss": 0.6359, "step": 16778 }, { "epoch": 0.8623188405797102, "grad_norm": 1.2458677291870117, "learning_rate": 4.889324180708799e-07, "loss": 0.6808, "step": 16779 }, { "epoch": 0.8623702333230547, "grad_norm": 1.1351290941238403, "learning_rate": 4.885735379902856e-07, "loss": 0.6967, "step": 16780 }, { "epoch": 0.8624216260663994, "grad_norm": 0.7984334826469421, "learning_rate": 4.882147829027e-07, "loss": 0.6796, "step": 16781 }, { "epoch": 0.862473018809744, "grad_norm": 1.198954701423645, "learning_rate": 4.878561528180637e-07, "loss": 0.7134, "step": 16782 }, { "epoch": 0.8625244115530887, "grad_norm": 1.120751976966858, "learning_rate": 4.874976477463134e-07, "loss": 0.7647, "step": 16783 }, { "epoch": 0.8625758042964333, "grad_norm": 1.118221402168274, "learning_rate": 4.871392676973813e-07, "loss": 0.643, "step": 16784 }, { "epoch": 0.862627197039778, "grad_norm": 0.700772225856781, "learning_rate": 4.867810126811967e-07, "loss": 0.6841, "step": 16785 }, { "epoch": 0.8626785897831226, "grad_norm": 1.156740665435791, "learning_rate": 4.864228827076845e-07, "loss": 0.7139, "step": 16786 }, { "epoch": 0.8627299825264673, "grad_norm": 1.0679891109466553, "learning_rate": 4.860648777867682e-07, "loss": 0.6439, "step": 16787 }, { "epoch": 0.8627813752698119, "grad_norm": 0.7223211526870728, "learning_rate": 4.857069979283669e-07, "loss": 0.662, "step": 16788 }, { "epoch": 0.8628327680131566, "grad_norm": 1.1113128662109375, "learning_rate": 4.853492431423945e-07, "loss": 0.6702, "step": 16789 }, { "epoch": 0.8628841607565012, "grad_norm": 0.7723284363746643, "learning_rate": 4.849916134387644e-07, "loss": 0.6584, "step": 16790 }, { "epoch": 0.8629355534998459, "grad_norm": 1.112114667892456, "learning_rate": 4.846341088273832e-07, "loss": 0.6743, "step": 16791 }, { "epoch": 0.8629869462431905, "grad_norm": 1.0344780683517456, "learning_rate": 4.842767293181577e-07, "loss": 0.7201, "step": 16792 }, { "epoch": 0.8630383389865351, "grad_norm": 1.1592353582382202, "learning_rate": 4.839194749209891e-07, "loss": 0.7357, "step": 16793 }, { "epoch": 0.8630897317298798, "grad_norm": 1.1069899797439575, "learning_rate": 4.83562345645775e-07, "loss": 0.6683, "step": 16794 }, { "epoch": 0.8631411244732243, "grad_norm": 1.118985652923584, "learning_rate": 4.83205341502409e-07, "loss": 0.6904, "step": 16795 }, { "epoch": 0.863192517216569, "grad_norm": 1.0612621307373047, "learning_rate": 4.828484625007851e-07, "loss": 0.69, "step": 16796 }, { "epoch": 0.8632439099599136, "grad_norm": 1.110080599784851, "learning_rate": 4.824917086507874e-07, "loss": 0.7174, "step": 16797 }, { "epoch": 0.8632953027032583, "grad_norm": 1.1500543355941772, "learning_rate": 4.821350799623031e-07, "loss": 0.6781, "step": 16798 }, { "epoch": 0.8633466954466029, "grad_norm": 0.6933485865592957, "learning_rate": 4.817785764452098e-07, "loss": 0.624, "step": 16799 }, { "epoch": 0.8633980881899476, "grad_norm": 1.131424903869629, "learning_rate": 4.814221981093881e-07, "loss": 0.6977, "step": 16800 }, { "epoch": 0.8634494809332922, "grad_norm": 1.1123830080032349, "learning_rate": 4.810659449647103e-07, "loss": 0.6901, "step": 16801 }, { "epoch": 0.8635008736766369, "grad_norm": 1.0635474920272827, "learning_rate": 4.807098170210455e-07, "loss": 0.664, "step": 16802 }, { "epoch": 0.8635522664199815, "grad_norm": 0.886468768119812, "learning_rate": 4.803538142882636e-07, "loss": 0.6189, "step": 16803 }, { "epoch": 0.8636036591633262, "grad_norm": 1.081760048866272, "learning_rate": 4.799979367762236e-07, "loss": 0.6632, "step": 16804 }, { "epoch": 0.8636550519066708, "grad_norm": 1.0781593322753906, "learning_rate": 4.796421844947896e-07, "loss": 0.685, "step": 16805 }, { "epoch": 0.8637064446500154, "grad_norm": 1.1444255113601685, "learning_rate": 4.792865574538159e-07, "loss": 0.7414, "step": 16806 }, { "epoch": 0.8637578373933601, "grad_norm": 0.7110108733177185, "learning_rate": 4.789310556631554e-07, "loss": 0.7078, "step": 16807 }, { "epoch": 0.8638092301367047, "grad_norm": 0.7261713147163391, "learning_rate": 4.785756791326579e-07, "loss": 0.6736, "step": 16808 }, { "epoch": 0.8638606228800494, "grad_norm": 1.0720264911651611, "learning_rate": 4.782204278721697e-07, "loss": 0.693, "step": 16809 }, { "epoch": 0.8639120156233939, "grad_norm": 0.8120423555374146, "learning_rate": 4.778653018915335e-07, "loss": 0.6489, "step": 16810 }, { "epoch": 0.8639634083667386, "grad_norm": 1.1376157999038696, "learning_rate": 4.775103012005871e-07, "loss": 0.7053, "step": 16811 }, { "epoch": 0.8640148011100832, "grad_norm": 1.0715097188949585, "learning_rate": 4.771554258091687e-07, "loss": 0.7039, "step": 16812 }, { "epoch": 0.8640661938534279, "grad_norm": 1.1204653978347778, "learning_rate": 4.768006757271071e-07, "loss": 0.7578, "step": 16813 }, { "epoch": 0.8641175865967725, "grad_norm": 1.0762317180633545, "learning_rate": 4.764460509642338e-07, "loss": 0.7354, "step": 16814 }, { "epoch": 0.8641689793401172, "grad_norm": 0.7659726738929749, "learning_rate": 4.7609155153037157e-07, "loss": 0.6574, "step": 16815 }, { "epoch": 0.8642203720834618, "grad_norm": 1.1349427700042725, "learning_rate": 4.757371774353453e-07, "loss": 0.7107, "step": 16816 }, { "epoch": 0.8642717648268065, "grad_norm": 1.0839574337005615, "learning_rate": 4.753829286889694e-07, "loss": 0.7279, "step": 16817 }, { "epoch": 0.8643231575701511, "grad_norm": 1.0780730247497559, "learning_rate": 4.7502880530106155e-07, "loss": 0.6423, "step": 16818 }, { "epoch": 0.8643745503134957, "grad_norm": 1.0728671550750732, "learning_rate": 4.746748072814322e-07, "loss": 0.6332, "step": 16819 }, { "epoch": 0.8644259430568404, "grad_norm": 1.0914570093154907, "learning_rate": 4.7432093463988805e-07, "loss": 0.6592, "step": 16820 }, { "epoch": 0.864477335800185, "grad_norm": 1.0886261463165283, "learning_rate": 4.73967187386235e-07, "loss": 0.6487, "step": 16821 }, { "epoch": 0.8645287285435297, "grad_norm": 1.1144911050796509, "learning_rate": 4.7361356553027373e-07, "loss": 0.6653, "step": 16822 }, { "epoch": 0.8645801212868743, "grad_norm": 1.1381844282150269, "learning_rate": 4.7326006908180123e-07, "loss": 0.6586, "step": 16823 }, { "epoch": 0.864631514030219, "grad_norm": 0.6732823848724365, "learning_rate": 4.729066980506103e-07, "loss": 0.6453, "step": 16824 }, { "epoch": 0.8646829067735635, "grad_norm": 1.1333794593811035, "learning_rate": 4.7255345244649474e-07, "loss": 0.6407, "step": 16825 }, { "epoch": 0.8647342995169082, "grad_norm": 0.7401777505874634, "learning_rate": 4.722003322792373e-07, "loss": 0.6517, "step": 16826 }, { "epoch": 0.8647856922602528, "grad_norm": 1.072718858718872, "learning_rate": 4.718473375586241e-07, "loss": 0.737, "step": 16827 }, { "epoch": 0.8648370850035975, "grad_norm": 1.0631109476089478, "learning_rate": 4.71494468294435e-07, "loss": 0.6742, "step": 16828 }, { "epoch": 0.8648884777469421, "grad_norm": 1.1551052331924438, "learning_rate": 4.7114172449644503e-07, "loss": 0.6964, "step": 16829 }, { "epoch": 0.8649398704902868, "grad_norm": 1.108849048614502, "learning_rate": 4.7078910617442964e-07, "loss": 0.7132, "step": 16830 }, { "epoch": 0.8649912632336314, "grad_norm": 0.6628881096839905, "learning_rate": 4.7043661333815717e-07, "loss": 0.6132, "step": 16831 }, { "epoch": 0.865042655976976, "grad_norm": 1.0856351852416992, "learning_rate": 4.7008424599739366e-07, "loss": 0.6759, "step": 16832 }, { "epoch": 0.8650940487203207, "grad_norm": 1.041588544845581, "learning_rate": 4.697320041619008e-07, "loss": 0.6833, "step": 16833 }, { "epoch": 0.8651454414636653, "grad_norm": 1.1799362897872925, "learning_rate": 4.693798878414396e-07, "loss": 0.7056, "step": 16834 }, { "epoch": 0.86519683420701, "grad_norm": 1.0882309675216675, "learning_rate": 4.6902789704576514e-07, "loss": 0.6648, "step": 16835 }, { "epoch": 0.8652482269503546, "grad_norm": 1.047500491142273, "learning_rate": 4.6867603178462895e-07, "loss": 0.6859, "step": 16836 }, { "epoch": 0.8652996196936993, "grad_norm": 1.1451234817504883, "learning_rate": 4.6832429206777995e-07, "loss": 0.7493, "step": 16837 }, { "epoch": 0.8653510124370439, "grad_norm": 1.0401136875152588, "learning_rate": 4.679726779049643e-07, "loss": 0.7051, "step": 16838 }, { "epoch": 0.8654024051803886, "grad_norm": 0.6487236022949219, "learning_rate": 4.676211893059235e-07, "loss": 0.6387, "step": 16839 }, { "epoch": 0.8654537979237331, "grad_norm": 1.1266751289367676, "learning_rate": 4.6726982628039483e-07, "loss": 0.6932, "step": 16840 }, { "epoch": 0.8655051906670778, "grad_norm": 1.118712306022644, "learning_rate": 4.669185888381145e-07, "loss": 0.7436, "step": 16841 }, { "epoch": 0.8655565834104224, "grad_norm": 0.6916739344596863, "learning_rate": 4.665674769888118e-07, "loss": 0.6631, "step": 16842 }, { "epoch": 0.8656079761537671, "grad_norm": 1.1347112655639648, "learning_rate": 4.662164907422173e-07, "loss": 0.6974, "step": 16843 }, { "epoch": 0.8656593688971117, "grad_norm": 1.0755008459091187, "learning_rate": 4.658656301080539e-07, "loss": 0.6884, "step": 16844 }, { "epoch": 0.8657107616404563, "grad_norm": 1.0861234664916992, "learning_rate": 4.6551489509604196e-07, "loss": 0.7077, "step": 16845 }, { "epoch": 0.865762154383801, "grad_norm": 1.0912024974822998, "learning_rate": 4.6516428571589943e-07, "loss": 0.694, "step": 16846 }, { "epoch": 0.8658135471271456, "grad_norm": 1.0918269157409668, "learning_rate": 4.6481380197734125e-07, "loss": 0.7055, "step": 16847 }, { "epoch": 0.8658649398704903, "grad_norm": 1.010430097579956, "learning_rate": 4.6446344389007637e-07, "loss": 0.6547, "step": 16848 }, { "epoch": 0.8659163326138349, "grad_norm": 1.085803747177124, "learning_rate": 4.6411321146381305e-07, "loss": 0.7055, "step": 16849 }, { "epoch": 0.8659677253571796, "grad_norm": 1.1148240566253662, "learning_rate": 4.63763104708253e-07, "loss": 0.6879, "step": 16850 }, { "epoch": 0.8660191181005242, "grad_norm": 1.0023490190505981, "learning_rate": 4.634131236330985e-07, "loss": 0.686, "step": 16851 }, { "epoch": 0.8660705108438689, "grad_norm": 0.8238533735275269, "learning_rate": 4.630632682480446e-07, "loss": 0.6476, "step": 16852 }, { "epoch": 0.8661219035872135, "grad_norm": 1.116344928741455, "learning_rate": 4.62713538562784e-07, "loss": 0.7547, "step": 16853 }, { "epoch": 0.8661732963305582, "grad_norm": 1.0539709329605103, "learning_rate": 4.6236393458700844e-07, "loss": 0.6905, "step": 16854 }, { "epoch": 0.8662246890739027, "grad_norm": 1.033897876739502, "learning_rate": 4.620144563304013e-07, "loss": 0.7008, "step": 16855 }, { "epoch": 0.8662760818172474, "grad_norm": 0.9878309965133667, "learning_rate": 4.6166510380264705e-07, "loss": 0.6707, "step": 16856 }, { "epoch": 0.866327474560592, "grad_norm": 0.7867979407310486, "learning_rate": 4.6131587701342407e-07, "loss": 0.6497, "step": 16857 }, { "epoch": 0.8663788673039367, "grad_norm": 1.0457791090011597, "learning_rate": 4.609667759724079e-07, "loss": 0.6508, "step": 16858 }, { "epoch": 0.8664302600472813, "grad_norm": 1.085776925086975, "learning_rate": 4.606178006892703e-07, "loss": 0.6231, "step": 16859 }, { "epoch": 0.866481652790626, "grad_norm": 1.0443991422653198, "learning_rate": 4.6026895117368187e-07, "loss": 0.7116, "step": 16860 }, { "epoch": 0.8665330455339706, "grad_norm": 1.1229698657989502, "learning_rate": 4.5992022743530594e-07, "loss": 0.6647, "step": 16861 }, { "epoch": 0.8665844382773152, "grad_norm": 1.1352931261062622, "learning_rate": 4.5957162948380374e-07, "loss": 0.7684, "step": 16862 }, { "epoch": 0.8666358310206599, "grad_norm": 1.0604121685028076, "learning_rate": 4.5922315732883526e-07, "loss": 0.6671, "step": 16863 }, { "epoch": 0.8666872237640045, "grad_norm": 1.0564987659454346, "learning_rate": 4.588748109800545e-07, "loss": 0.6579, "step": 16864 }, { "epoch": 0.8667386165073492, "grad_norm": 1.0370519161224365, "learning_rate": 4.5852659044711257e-07, "loss": 0.7028, "step": 16865 }, { "epoch": 0.8667900092506938, "grad_norm": 1.1641144752502441, "learning_rate": 4.581784957396568e-07, "loss": 0.6741, "step": 16866 }, { "epoch": 0.8668414019940385, "grad_norm": 1.130568027496338, "learning_rate": 4.5783052686733333e-07, "loss": 0.6503, "step": 16867 }, { "epoch": 0.8668927947373831, "grad_norm": 1.0574378967285156, "learning_rate": 4.5748268383977947e-07, "loss": 0.6943, "step": 16868 }, { "epoch": 0.8669441874807278, "grad_norm": 1.090811014175415, "learning_rate": 4.571349666666358e-07, "loss": 0.621, "step": 16869 }, { "epoch": 0.8669955802240724, "grad_norm": 0.7930713891983032, "learning_rate": 4.5678737535753526e-07, "loss": 0.6198, "step": 16870 }, { "epoch": 0.867046972967417, "grad_norm": 1.0515788793563843, "learning_rate": 4.564399099221062e-07, "loss": 0.6566, "step": 16871 }, { "epoch": 0.8670983657107616, "grad_norm": 1.1588231325149536, "learning_rate": 4.5609257036997865e-07, "loss": 0.6557, "step": 16872 }, { "epoch": 0.8671497584541062, "grad_norm": 0.8916311860084534, "learning_rate": 4.557453567107739e-07, "loss": 0.6183, "step": 16873 }, { "epoch": 0.8672011511974509, "grad_norm": 1.1547328233718872, "learning_rate": 4.5539826895411255e-07, "loss": 0.6691, "step": 16874 }, { "epoch": 0.8672525439407955, "grad_norm": 1.139276385307312, "learning_rate": 4.5505130710960963e-07, "loss": 0.6564, "step": 16875 }, { "epoch": 0.8673039366841402, "grad_norm": 0.8685891032218933, "learning_rate": 4.5470447118688086e-07, "loss": 0.6492, "step": 16876 }, { "epoch": 0.8673553294274848, "grad_norm": 1.0801135301589966, "learning_rate": 4.5435776119553245e-07, "loss": 0.692, "step": 16877 }, { "epoch": 0.8674067221708295, "grad_norm": 1.0958422422409058, "learning_rate": 4.540111771451722e-07, "loss": 0.7214, "step": 16878 }, { "epoch": 0.8674581149141741, "grad_norm": 1.0810171365737915, "learning_rate": 4.536647190454013e-07, "loss": 0.7197, "step": 16879 }, { "epoch": 0.8675095076575188, "grad_norm": 0.7447600364685059, "learning_rate": 4.5331838690582053e-07, "loss": 0.5945, "step": 16880 }, { "epoch": 0.8675609004008634, "grad_norm": 1.0942082405090332, "learning_rate": 4.529721807360238e-07, "loss": 0.7102, "step": 16881 }, { "epoch": 0.8676122931442081, "grad_norm": 1.0947860479354858, "learning_rate": 4.526261005456034e-07, "loss": 0.7141, "step": 16882 }, { "epoch": 0.8676636858875527, "grad_norm": 1.1190485954284668, "learning_rate": 4.522801463441484e-07, "loss": 0.7156, "step": 16883 }, { "epoch": 0.8677150786308974, "grad_norm": 1.0785343647003174, "learning_rate": 4.519343181412422e-07, "loss": 0.711, "step": 16884 }, { "epoch": 0.867766471374242, "grad_norm": 1.2084046602249146, "learning_rate": 4.515886159464677e-07, "loss": 0.7145, "step": 16885 }, { "epoch": 0.8678178641175865, "grad_norm": 1.1176583766937256, "learning_rate": 4.512430397694029e-07, "loss": 0.7545, "step": 16886 }, { "epoch": 0.8678692568609312, "grad_norm": 1.2191412448883057, "learning_rate": 4.508975896196216e-07, "loss": 0.7322, "step": 16887 }, { "epoch": 0.8679206496042758, "grad_norm": 1.145444631576538, "learning_rate": 4.505522655066941e-07, "loss": 0.7391, "step": 16888 }, { "epoch": 0.8679720423476205, "grad_norm": 1.1362560987472534, "learning_rate": 4.502070674401904e-07, "loss": 0.7129, "step": 16889 }, { "epoch": 0.8680234350909651, "grad_norm": 1.0449599027633667, "learning_rate": 4.4986199542967236e-07, "loss": 0.6966, "step": 16890 }, { "epoch": 0.8680748278343098, "grad_norm": 1.0726850032806396, "learning_rate": 4.495170494847012e-07, "loss": 0.7247, "step": 16891 }, { "epoch": 0.8681262205776544, "grad_norm": 1.1508533954620361, "learning_rate": 4.4917222961483377e-07, "loss": 0.7308, "step": 16892 }, { "epoch": 0.8681776133209991, "grad_norm": 0.698868453502655, "learning_rate": 4.488275358296229e-07, "loss": 0.6552, "step": 16893 }, { "epoch": 0.8682290060643437, "grad_norm": 1.1259434223175049, "learning_rate": 4.4848296813862046e-07, "loss": 0.6771, "step": 16894 }, { "epoch": 0.8682803988076884, "grad_norm": 0.7530171871185303, "learning_rate": 4.48138526551371e-07, "loss": 0.6721, "step": 16895 }, { "epoch": 0.868331791551033, "grad_norm": 1.0810846090316772, "learning_rate": 4.477942110774203e-07, "loss": 0.695, "step": 16896 }, { "epoch": 0.8683831842943777, "grad_norm": 1.1041934490203857, "learning_rate": 4.4745002172630446e-07, "loss": 0.7008, "step": 16897 }, { "epoch": 0.8684345770377223, "grad_norm": 1.0875623226165771, "learning_rate": 4.471059585075621e-07, "loss": 0.6957, "step": 16898 }, { "epoch": 0.868485969781067, "grad_norm": 1.1131097078323364, "learning_rate": 4.46762021430725e-07, "loss": 0.6997, "step": 16899 }, { "epoch": 0.8685373625244116, "grad_norm": 1.0392400026321411, "learning_rate": 4.464182105053222e-07, "loss": 0.6217, "step": 16900 }, { "epoch": 0.8685887552677561, "grad_norm": 1.0921518802642822, "learning_rate": 4.460745257408783e-07, "loss": 0.7343, "step": 16901 }, { "epoch": 0.8686401480111008, "grad_norm": 1.1028876304626465, "learning_rate": 4.457309671469173e-07, "loss": 0.6946, "step": 16902 }, { "epoch": 0.8686915407544454, "grad_norm": 1.0970611572265625, "learning_rate": 4.453875347329567e-07, "loss": 0.6745, "step": 16903 }, { "epoch": 0.8687429334977901, "grad_norm": 1.046669602394104, "learning_rate": 4.4504422850851105e-07, "loss": 0.6348, "step": 16904 }, { "epoch": 0.8687943262411347, "grad_norm": 1.0449820756912231, "learning_rate": 4.447010484830944e-07, "loss": 0.7197, "step": 16905 }, { "epoch": 0.8688457189844794, "grad_norm": 1.1876931190490723, "learning_rate": 4.4435799466621134e-07, "loss": 0.7236, "step": 16906 }, { "epoch": 0.868897111727824, "grad_norm": 0.7819764018058777, "learning_rate": 4.4401506706736874e-07, "loss": 0.6557, "step": 16907 }, { "epoch": 0.8689485044711687, "grad_norm": 1.081925630569458, "learning_rate": 4.436722656960668e-07, "loss": 0.6573, "step": 16908 }, { "epoch": 0.8689998972145133, "grad_norm": 1.1064388751983643, "learning_rate": 4.433295905618046e-07, "loss": 0.7174, "step": 16909 }, { "epoch": 0.869051289957858, "grad_norm": 1.175676941871643, "learning_rate": 4.4298704167407394e-07, "loss": 0.7226, "step": 16910 }, { "epoch": 0.8691026827012026, "grad_norm": 0.7838454246520996, "learning_rate": 4.4264461904236777e-07, "loss": 0.6597, "step": 16911 }, { "epoch": 0.8691540754445473, "grad_norm": 1.2905205488204956, "learning_rate": 4.4230232267617136e-07, "loss": 0.6507, "step": 16912 }, { "epoch": 0.8692054681878919, "grad_norm": 0.6603265404701233, "learning_rate": 4.419601525849687e-07, "loss": 0.661, "step": 16913 }, { "epoch": 0.8692568609312366, "grad_norm": 1.0421782732009888, "learning_rate": 4.416181087782412e-07, "loss": 0.7322, "step": 16914 }, { "epoch": 0.8693082536745812, "grad_norm": 1.2059253454208374, "learning_rate": 4.412761912654645e-07, "loss": 0.7073, "step": 16915 }, { "epoch": 0.8693596464179257, "grad_norm": 1.0222489833831787, "learning_rate": 4.4093440005611164e-07, "loss": 0.6582, "step": 16916 }, { "epoch": 0.8694110391612704, "grad_norm": 1.096178412437439, "learning_rate": 4.405927351596517e-07, "loss": 0.6855, "step": 16917 }, { "epoch": 0.869462431904615, "grad_norm": 1.0448393821716309, "learning_rate": 4.402511965855533e-07, "loss": 0.6482, "step": 16918 }, { "epoch": 0.8695138246479597, "grad_norm": 1.0728740692138672, "learning_rate": 4.399097843432754e-07, "loss": 0.6711, "step": 16919 }, { "epoch": 0.8695652173913043, "grad_norm": 1.1431784629821777, "learning_rate": 4.395684984422799e-07, "loss": 0.6921, "step": 16920 }, { "epoch": 0.869616610134649, "grad_norm": 1.1259459257125854, "learning_rate": 4.3922733889202207e-07, "loss": 0.6434, "step": 16921 }, { "epoch": 0.8696680028779936, "grad_norm": 1.0877840518951416, "learning_rate": 4.388863057019516e-07, "loss": 0.6958, "step": 16922 }, { "epoch": 0.8697193956213383, "grad_norm": 1.0904501676559448, "learning_rate": 4.385453988815208e-07, "loss": 0.7547, "step": 16923 }, { "epoch": 0.8697707883646829, "grad_norm": 0.7371160387992859, "learning_rate": 4.3820461844017117e-07, "loss": 0.6405, "step": 16924 }, { "epoch": 0.8698221811080276, "grad_norm": 1.105760097503662, "learning_rate": 4.3786396438734834e-07, "loss": 0.7315, "step": 16925 }, { "epoch": 0.8698735738513722, "grad_norm": 1.1106667518615723, "learning_rate": 4.3752343673248655e-07, "loss": 0.7158, "step": 16926 }, { "epoch": 0.8699249665947169, "grad_norm": 1.1510581970214844, "learning_rate": 4.371830354850232e-07, "loss": 0.7538, "step": 16927 }, { "epoch": 0.8699763593380615, "grad_norm": 1.1151307821273804, "learning_rate": 4.368427606543879e-07, "loss": 0.7084, "step": 16928 }, { "epoch": 0.8700277520814061, "grad_norm": 1.1802327632904053, "learning_rate": 4.365026122500088e-07, "loss": 0.6855, "step": 16929 }, { "epoch": 0.8700791448247508, "grad_norm": 1.0912760496139526, "learning_rate": 4.3616259028130936e-07, "loss": 0.7153, "step": 16930 }, { "epoch": 0.8701305375680953, "grad_norm": 1.0497270822525024, "learning_rate": 4.35822694757711e-07, "loss": 0.6775, "step": 16931 }, { "epoch": 0.87018193031144, "grad_norm": 1.1995489597320557, "learning_rate": 4.354829256886306e-07, "loss": 0.7457, "step": 16932 }, { "epoch": 0.8702333230547846, "grad_norm": 1.0532625913619995, "learning_rate": 4.3514328308348173e-07, "loss": 0.7321, "step": 16933 }, { "epoch": 0.8702847157981293, "grad_norm": 0.7766775488853455, "learning_rate": 4.348037669516747e-07, "loss": 0.6436, "step": 16934 }, { "epoch": 0.8703361085414739, "grad_norm": 1.0565733909606934, "learning_rate": 4.344643773026147e-07, "loss": 0.721, "step": 16935 }, { "epoch": 0.8703875012848186, "grad_norm": 1.1334953308105469, "learning_rate": 4.34125114145707e-07, "loss": 0.7056, "step": 16936 }, { "epoch": 0.8704388940281632, "grad_norm": 1.0884066820144653, "learning_rate": 4.337859774903502e-07, "loss": 0.6608, "step": 16937 }, { "epoch": 0.8704902867715079, "grad_norm": 0.7810848355293274, "learning_rate": 4.334469673459402e-07, "loss": 0.6304, "step": 16938 }, { "epoch": 0.8705416795148525, "grad_norm": 0.7114217877388, "learning_rate": 4.3310808372186877e-07, "loss": 0.6161, "step": 16939 }, { "epoch": 0.8705930722581972, "grad_norm": 1.1316041946411133, "learning_rate": 4.327693266275268e-07, "loss": 0.7404, "step": 16940 }, { "epoch": 0.8706444650015418, "grad_norm": 1.0426186323165894, "learning_rate": 4.32430696072299e-07, "loss": 0.6963, "step": 16941 }, { "epoch": 0.8706958577448864, "grad_norm": 1.0765613317489624, "learning_rate": 4.3209219206556687e-07, "loss": 0.6334, "step": 16942 }, { "epoch": 0.8707472504882311, "grad_norm": 1.1478737592697144, "learning_rate": 4.317538146167094e-07, "loss": 0.7115, "step": 16943 }, { "epoch": 0.8707986432315757, "grad_norm": 1.4248632192611694, "learning_rate": 4.3141556373510197e-07, "loss": 0.7735, "step": 16944 }, { "epoch": 0.8708500359749204, "grad_norm": 1.1269899606704712, "learning_rate": 4.310774394301159e-07, "loss": 0.7027, "step": 16945 }, { "epoch": 0.870901428718265, "grad_norm": 1.0911827087402344, "learning_rate": 4.3073944171111824e-07, "loss": 0.6712, "step": 16946 }, { "epoch": 0.8709528214616096, "grad_norm": 1.0040462017059326, "learning_rate": 4.3040157058747645e-07, "loss": 0.6913, "step": 16947 }, { "epoch": 0.8710042142049542, "grad_norm": 1.0728493928909302, "learning_rate": 4.3006382606854745e-07, "loss": 0.7461, "step": 16948 }, { "epoch": 0.8710556069482989, "grad_norm": 1.1414399147033691, "learning_rate": 4.2972620816369217e-07, "loss": 0.7077, "step": 16949 }, { "epoch": 0.8711069996916435, "grad_norm": 1.069277286529541, "learning_rate": 4.29388716882263e-07, "loss": 0.7157, "step": 16950 }, { "epoch": 0.8711583924349882, "grad_norm": 1.1577059030532837, "learning_rate": 4.290513522336104e-07, "loss": 0.7125, "step": 16951 }, { "epoch": 0.8712097851783328, "grad_norm": 1.2562531232833862, "learning_rate": 4.287141142270812e-07, "loss": 0.6602, "step": 16952 }, { "epoch": 0.8712611779216775, "grad_norm": 1.2184945344924927, "learning_rate": 4.283770028720202e-07, "loss": 0.7121, "step": 16953 }, { "epoch": 0.8713125706650221, "grad_norm": 0.6707696914672852, "learning_rate": 4.280400181777661e-07, "loss": 0.6451, "step": 16954 }, { "epoch": 0.8713639634083667, "grad_norm": 1.0605617761611938, "learning_rate": 4.277031601536552e-07, "loss": 0.7154, "step": 16955 }, { "epoch": 0.8714153561517114, "grad_norm": 0.7428040504455566, "learning_rate": 4.2736642880902124e-07, "loss": 0.665, "step": 16956 }, { "epoch": 0.871466748895056, "grad_norm": 1.0502722263336182, "learning_rate": 4.2702982415319393e-07, "loss": 0.709, "step": 16957 }, { "epoch": 0.8715181416384007, "grad_norm": 1.0288259983062744, "learning_rate": 4.266933461954986e-07, "loss": 0.6846, "step": 16958 }, { "epoch": 0.8715695343817453, "grad_norm": 0.7152932286262512, "learning_rate": 4.2635699494525676e-07, "loss": 0.6152, "step": 16959 }, { "epoch": 0.87162092712509, "grad_norm": 0.6775751709938049, "learning_rate": 4.2602077041179024e-07, "loss": 0.6399, "step": 16960 }, { "epoch": 0.8716723198684346, "grad_norm": 1.0512100458145142, "learning_rate": 4.256846726044106e-07, "loss": 0.6679, "step": 16961 }, { "epoch": 0.8717237126117792, "grad_norm": 1.1368736028671265, "learning_rate": 4.2534870153243256e-07, "loss": 0.6939, "step": 16962 }, { "epoch": 0.8717751053551238, "grad_norm": 1.0631284713745117, "learning_rate": 4.250128572051632e-07, "loss": 0.6812, "step": 16963 }, { "epoch": 0.8718264980984685, "grad_norm": 1.1007957458496094, "learning_rate": 4.2467713963190713e-07, "loss": 0.701, "step": 16964 }, { "epoch": 0.8718778908418131, "grad_norm": 0.7270546555519104, "learning_rate": 4.243415488219671e-07, "loss": 0.6183, "step": 16965 }, { "epoch": 0.8719292835851578, "grad_norm": 1.0154081583023071, "learning_rate": 4.2400608478464e-07, "loss": 0.7474, "step": 16966 }, { "epoch": 0.8719806763285024, "grad_norm": 1.1003563404083252, "learning_rate": 4.2367074752922067e-07, "loss": 0.7346, "step": 16967 }, { "epoch": 0.872032069071847, "grad_norm": 0.7726216316223145, "learning_rate": 4.233355370649983e-07, "loss": 0.6565, "step": 16968 }, { "epoch": 0.8720834618151917, "grad_norm": 1.0713075399398804, "learning_rate": 4.230004534012633e-07, "loss": 0.6785, "step": 16969 }, { "epoch": 0.8721348545585363, "grad_norm": 0.7042410969734192, "learning_rate": 4.226654965472954e-07, "loss": 0.5947, "step": 16970 }, { "epoch": 0.872186247301881, "grad_norm": 1.1804038286209106, "learning_rate": 4.223306665123783e-07, "loss": 0.6305, "step": 16971 }, { "epoch": 0.8722376400452256, "grad_norm": 1.0762962102890015, "learning_rate": 4.2199596330578685e-07, "loss": 0.7012, "step": 16972 }, { "epoch": 0.8722890327885703, "grad_norm": 1.124481201171875, "learning_rate": 4.216613869367953e-07, "loss": 0.739, "step": 16973 }, { "epoch": 0.8723404255319149, "grad_norm": 0.6997160315513611, "learning_rate": 4.213269374146733e-07, "loss": 0.6694, "step": 16974 }, { "epoch": 0.8723918182752596, "grad_norm": 0.9837872385978699, "learning_rate": 4.2099261474868593e-07, "loss": 0.635, "step": 16975 }, { "epoch": 0.8724432110186042, "grad_norm": 1.1050201654434204, "learning_rate": 4.206584189480989e-07, "loss": 0.7337, "step": 16976 }, { "epoch": 0.8724946037619488, "grad_norm": 1.2221908569335938, "learning_rate": 4.203243500221671e-07, "loss": 0.7028, "step": 16977 }, { "epoch": 0.8725459965052934, "grad_norm": 1.0891327857971191, "learning_rate": 4.1999040798014924e-07, "loss": 0.6645, "step": 16978 }, { "epoch": 0.8725973892486381, "grad_norm": 1.0673365592956543, "learning_rate": 4.1965659283129625e-07, "loss": 0.6755, "step": 16979 }, { "epoch": 0.8726487819919827, "grad_norm": 1.0438331365585327, "learning_rate": 4.193229045848574e-07, "loss": 0.7106, "step": 16980 }, { "epoch": 0.8727001747353273, "grad_norm": 1.093877911567688, "learning_rate": 4.189893432500769e-07, "loss": 0.6243, "step": 16981 }, { "epoch": 0.872751567478672, "grad_norm": 1.0681830644607544, "learning_rate": 4.1865590883619743e-07, "loss": 0.7061, "step": 16982 }, { "epoch": 0.8728029602220166, "grad_norm": 0.8856194615364075, "learning_rate": 4.1832260135245715e-07, "loss": 0.6435, "step": 16983 }, { "epoch": 0.8728543529653613, "grad_norm": 1.1188421249389648, "learning_rate": 4.1798942080808977e-07, "loss": 0.6262, "step": 16984 }, { "epoch": 0.8729057457087059, "grad_norm": 1.0492881536483765, "learning_rate": 4.176563672123268e-07, "loss": 0.7, "step": 16985 }, { "epoch": 0.8729571384520506, "grad_norm": 1.0606663227081299, "learning_rate": 4.1732344057439477e-07, "loss": 0.6981, "step": 16986 }, { "epoch": 0.8730085311953952, "grad_norm": 1.2632490396499634, "learning_rate": 4.169906409035196e-07, "loss": 0.7753, "step": 16987 }, { "epoch": 0.8730599239387399, "grad_norm": 1.1579920053482056, "learning_rate": 4.1665796820891947e-07, "loss": 0.6391, "step": 16988 }, { "epoch": 0.8731113166820845, "grad_norm": 1.0685985088348389, "learning_rate": 4.163254224998148e-07, "loss": 0.7087, "step": 16989 }, { "epoch": 0.8731627094254292, "grad_norm": 1.0190953016281128, "learning_rate": 4.159930037854154e-07, "loss": 0.6902, "step": 16990 }, { "epoch": 0.8732141021687738, "grad_norm": 0.7726448178291321, "learning_rate": 4.156607120749334e-07, "loss": 0.5996, "step": 16991 }, { "epoch": 0.8732654949121184, "grad_norm": 1.1112573146820068, "learning_rate": 4.1532854737757475e-07, "loss": 0.69, "step": 16992 }, { "epoch": 0.873316887655463, "grad_norm": 1.0939804315567017, "learning_rate": 4.1499650970254214e-07, "loss": 0.6989, "step": 16993 }, { "epoch": 0.8733682803988077, "grad_norm": 1.156049132347107, "learning_rate": 4.146645990590342e-07, "loss": 0.7641, "step": 16994 }, { "epoch": 0.8734196731421523, "grad_norm": 1.0336335897445679, "learning_rate": 4.143328154562487e-07, "loss": 0.638, "step": 16995 }, { "epoch": 0.873471065885497, "grad_norm": 1.1202205419540405, "learning_rate": 4.1400115890337657e-07, "loss": 0.7361, "step": 16996 }, { "epoch": 0.8735224586288416, "grad_norm": 1.1035032272338867, "learning_rate": 4.1366962940960655e-07, "loss": 0.6466, "step": 16997 }, { "epoch": 0.8735738513721862, "grad_norm": 1.1604799032211304, "learning_rate": 4.1333822698412575e-07, "loss": 0.7788, "step": 16998 }, { "epoch": 0.8736252441155309, "grad_norm": 1.0190480947494507, "learning_rate": 4.130069516361135e-07, "loss": 0.6711, "step": 16999 }, { "epoch": 0.8736766368588755, "grad_norm": 0.7498182058334351, "learning_rate": 4.1267580337474965e-07, "loss": 0.6106, "step": 17000 }, { "epoch": 0.8737280296022202, "grad_norm": 1.0064923763275146, "learning_rate": 4.123447822092086e-07, "loss": 0.6719, "step": 17001 }, { "epoch": 0.8737794223455648, "grad_norm": 0.7679840922355652, "learning_rate": 4.120138881486613e-07, "loss": 0.6541, "step": 17002 }, { "epoch": 0.8738308150889095, "grad_norm": 0.8344942927360535, "learning_rate": 4.1168312120227537e-07, "loss": 0.6718, "step": 17003 }, { "epoch": 0.8738822078322541, "grad_norm": 1.1132385730743408, "learning_rate": 4.113524813792158e-07, "loss": 0.6883, "step": 17004 }, { "epoch": 0.8739336005755988, "grad_norm": 1.1151834726333618, "learning_rate": 4.110219686886424e-07, "loss": 0.6155, "step": 17005 }, { "epoch": 0.8739849933189434, "grad_norm": 1.0761266946792603, "learning_rate": 4.1069158313971237e-07, "loss": 0.708, "step": 17006 }, { "epoch": 0.874036386062288, "grad_norm": 1.1007834672927856, "learning_rate": 4.1036132474157995e-07, "loss": 0.7577, "step": 17007 }, { "epoch": 0.8740877788056326, "grad_norm": 1.1064989566802979, "learning_rate": 4.1003119350339513e-07, "loss": 0.6493, "step": 17008 }, { "epoch": 0.8741391715489772, "grad_norm": 1.1839499473571777, "learning_rate": 4.097011894343045e-07, "loss": 0.6702, "step": 17009 }, { "epoch": 0.8741905642923219, "grad_norm": 0.6931377649307251, "learning_rate": 4.093713125434501e-07, "loss": 0.6321, "step": 17010 }, { "epoch": 0.8742419570356665, "grad_norm": 1.1268229484558105, "learning_rate": 4.0904156283997353e-07, "loss": 0.6781, "step": 17011 }, { "epoch": 0.8742933497790112, "grad_norm": 1.1244171857833862, "learning_rate": 4.087119403330075e-07, "loss": 0.717, "step": 17012 }, { "epoch": 0.8743447425223558, "grad_norm": 1.1495572328567505, "learning_rate": 4.083824450316881e-07, "loss": 0.7065, "step": 17013 }, { "epoch": 0.8743961352657005, "grad_norm": 1.1147764921188354, "learning_rate": 4.080530769451424e-07, "loss": 0.7236, "step": 17014 }, { "epoch": 0.8744475280090451, "grad_norm": 0.7313366532325745, "learning_rate": 4.0772383608249475e-07, "loss": 0.68, "step": 17015 }, { "epoch": 0.8744989207523898, "grad_norm": 1.0453277826309204, "learning_rate": 4.073947224528696e-07, "loss": 0.6273, "step": 17016 }, { "epoch": 0.8745503134957344, "grad_norm": 1.080526351928711, "learning_rate": 4.070657360653835e-07, "loss": 0.6397, "step": 17017 }, { "epoch": 0.8746017062390791, "grad_norm": 0.7586373686790466, "learning_rate": 4.0673687692915353e-07, "loss": 0.6023, "step": 17018 }, { "epoch": 0.8746530989824237, "grad_norm": 1.0428462028503418, "learning_rate": 4.064081450532875e-07, "loss": 0.7343, "step": 17019 }, { "epoch": 0.8747044917257684, "grad_norm": 1.0922504663467407, "learning_rate": 4.060795404468959e-07, "loss": 0.6437, "step": 17020 }, { "epoch": 0.874755884469113, "grad_norm": 1.1428388357162476, "learning_rate": 4.0575106311908254e-07, "loss": 0.6635, "step": 17021 }, { "epoch": 0.8748072772124575, "grad_norm": 1.1208579540252686, "learning_rate": 4.0542271307894785e-07, "loss": 0.6959, "step": 17022 }, { "epoch": 0.8748586699558022, "grad_norm": 1.1612337827682495, "learning_rate": 4.05094490335588e-07, "loss": 0.7422, "step": 17023 }, { "epoch": 0.8749100626991468, "grad_norm": 0.7855663299560547, "learning_rate": 4.04766394898099e-07, "loss": 0.6204, "step": 17024 }, { "epoch": 0.8749614554424915, "grad_norm": 1.0562723875045776, "learning_rate": 4.0443842677556967e-07, "loss": 0.6721, "step": 17025 }, { "epoch": 0.8750128481858361, "grad_norm": 0.7155593037605286, "learning_rate": 4.0411058597708553e-07, "loss": 0.6404, "step": 17026 }, { "epoch": 0.8750642409291808, "grad_norm": 1.0214122533798218, "learning_rate": 4.0378287251173323e-07, "loss": 0.6278, "step": 17027 }, { "epoch": 0.8751156336725254, "grad_norm": 0.999601423740387, "learning_rate": 4.034552863885877e-07, "loss": 0.6726, "step": 17028 }, { "epoch": 0.8751670264158701, "grad_norm": 0.7710046768188477, "learning_rate": 4.0312782761672886e-07, "loss": 0.6891, "step": 17029 }, { "epoch": 0.8752184191592147, "grad_norm": 1.0801796913146973, "learning_rate": 4.0280049620522733e-07, "loss": 0.6855, "step": 17030 }, { "epoch": 0.8752698119025594, "grad_norm": 0.7587679028511047, "learning_rate": 4.02473292163153e-07, "loss": 0.663, "step": 17031 }, { "epoch": 0.875321204645904, "grad_norm": 1.1141160726547241, "learning_rate": 4.0214621549956925e-07, "loss": 0.7661, "step": 17032 }, { "epoch": 0.8753725973892487, "grad_norm": 1.1480093002319336, "learning_rate": 4.0181926622354096e-07, "loss": 0.6645, "step": 17033 }, { "epoch": 0.8754239901325933, "grad_norm": 1.0717296600341797, "learning_rate": 4.014924443441254e-07, "loss": 0.6562, "step": 17034 }, { "epoch": 0.875475382875938, "grad_norm": 1.123856544494629, "learning_rate": 4.0116574987037693e-07, "loss": 0.7454, "step": 17035 }, { "epoch": 0.8755267756192826, "grad_norm": 1.0959911346435547, "learning_rate": 4.008391828113467e-07, "loss": 0.7102, "step": 17036 }, { "epoch": 0.8755781683626273, "grad_norm": 1.1420667171478271, "learning_rate": 4.0051274317608357e-07, "loss": 0.6927, "step": 17037 }, { "epoch": 0.8756295611059718, "grad_norm": 1.1462041139602661, "learning_rate": 4.00186430973632e-07, "loss": 0.65, "step": 17038 }, { "epoch": 0.8756809538493164, "grad_norm": 1.1185624599456787, "learning_rate": 3.9986024621303077e-07, "loss": 0.6293, "step": 17039 }, { "epoch": 0.8757323465926611, "grad_norm": 1.210189938545227, "learning_rate": 3.9953418890331996e-07, "loss": 0.6595, "step": 17040 }, { "epoch": 0.8757837393360057, "grad_norm": 1.047241449356079, "learning_rate": 3.9920825905353065e-07, "loss": 0.6026, "step": 17041 }, { "epoch": 0.8758351320793504, "grad_norm": 0.7243605256080627, "learning_rate": 3.9888245667269456e-07, "loss": 0.6618, "step": 17042 }, { "epoch": 0.875886524822695, "grad_norm": 0.8229589462280273, "learning_rate": 3.9855678176983824e-07, "loss": 0.6564, "step": 17043 }, { "epoch": 0.8759379175660397, "grad_norm": 1.052469253540039, "learning_rate": 3.9823123435398403e-07, "loss": 0.6739, "step": 17044 }, { "epoch": 0.8759893103093843, "grad_norm": 0.6812047362327576, "learning_rate": 3.9790581443415135e-07, "loss": 0.653, "step": 17045 }, { "epoch": 0.876040703052729, "grad_norm": 1.0542981624603271, "learning_rate": 3.975805220193579e-07, "loss": 0.6129, "step": 17046 }, { "epoch": 0.8760920957960736, "grad_norm": 1.0855557918548584, "learning_rate": 3.9725535711861494e-07, "loss": 0.6861, "step": 17047 }, { "epoch": 0.8761434885394183, "grad_norm": 0.7398030757904053, "learning_rate": 3.969303197409308e-07, "loss": 0.646, "step": 17048 }, { "epoch": 0.8761948812827629, "grad_norm": 1.0615806579589844, "learning_rate": 3.966054098953126e-07, "loss": 0.6651, "step": 17049 }, { "epoch": 0.8762462740261076, "grad_norm": 1.1380072832107544, "learning_rate": 3.96280627590761e-07, "loss": 0.7179, "step": 17050 }, { "epoch": 0.8762976667694522, "grad_norm": 0.731192946434021, "learning_rate": 3.9595597283627553e-07, "loss": 0.6292, "step": 17051 }, { "epoch": 0.8763490595127968, "grad_norm": 1.2247822284698486, "learning_rate": 3.956314456408489e-07, "loss": 0.6566, "step": 17052 }, { "epoch": 0.8764004522561414, "grad_norm": 1.4047714471817017, "learning_rate": 3.953070460134756e-07, "loss": 0.7413, "step": 17053 }, { "epoch": 0.876451844999486, "grad_norm": 0.7880280613899231, "learning_rate": 3.949827739631401e-07, "loss": 0.6214, "step": 17054 }, { "epoch": 0.8765032377428307, "grad_norm": 1.0637768507003784, "learning_rate": 3.946586294988286e-07, "loss": 0.7104, "step": 17055 }, { "epoch": 0.8765546304861753, "grad_norm": 1.141261100769043, "learning_rate": 3.943346126295217e-07, "loss": 0.7002, "step": 17056 }, { "epoch": 0.87660602322952, "grad_norm": 1.0902400016784668, "learning_rate": 3.940107233641949e-07, "loss": 0.7139, "step": 17057 }, { "epoch": 0.8766574159728646, "grad_norm": 1.0471253395080566, "learning_rate": 3.9368696171182443e-07, "loss": 0.6483, "step": 17058 }, { "epoch": 0.8767088087162093, "grad_norm": 1.1976720094680786, "learning_rate": 3.933633276813792e-07, "loss": 0.6956, "step": 17059 }, { "epoch": 0.8767602014595539, "grad_norm": 0.7958855032920837, "learning_rate": 3.930398212818254e-07, "loss": 0.6231, "step": 17060 }, { "epoch": 0.8768115942028986, "grad_norm": 0.8885971307754517, "learning_rate": 3.927164425221258e-07, "loss": 0.6294, "step": 17061 }, { "epoch": 0.8768629869462432, "grad_norm": 1.0475728511810303, "learning_rate": 3.923931914112422e-07, "loss": 0.6967, "step": 17062 }, { "epoch": 0.8769143796895879, "grad_norm": 1.1210602521896362, "learning_rate": 3.9207006795812684e-07, "loss": 0.711, "step": 17063 }, { "epoch": 0.8769657724329325, "grad_norm": 1.095068335533142, "learning_rate": 3.917470721717348e-07, "loss": 0.6779, "step": 17064 }, { "epoch": 0.8770171651762771, "grad_norm": 1.1076130867004395, "learning_rate": 3.914242040610139e-07, "loss": 0.6994, "step": 17065 }, { "epoch": 0.8770685579196218, "grad_norm": 0.6902804970741272, "learning_rate": 3.911014636349103e-07, "loss": 0.6896, "step": 17066 }, { "epoch": 0.8771199506629664, "grad_norm": 1.0327762365341187, "learning_rate": 3.9077885090236524e-07, "loss": 0.637, "step": 17067 }, { "epoch": 0.877171343406311, "grad_norm": 1.0222896337509155, "learning_rate": 3.904563658723165e-07, "loss": 0.6979, "step": 17068 }, { "epoch": 0.8772227361496556, "grad_norm": 1.09306800365448, "learning_rate": 3.901340085537009e-07, "loss": 0.6501, "step": 17069 }, { "epoch": 0.8772741288930003, "grad_norm": 1.106996774673462, "learning_rate": 3.8981177895544677e-07, "loss": 0.7223, "step": 17070 }, { "epoch": 0.8773255216363449, "grad_norm": 1.091869592666626, "learning_rate": 3.894896770864837e-07, "loss": 0.6865, "step": 17071 }, { "epoch": 0.8773769143796896, "grad_norm": 1.0750385522842407, "learning_rate": 3.8916770295573503e-07, "loss": 0.63, "step": 17072 }, { "epoch": 0.8774283071230342, "grad_norm": 1.1212286949157715, "learning_rate": 3.8884585657212205e-07, "loss": 0.6388, "step": 17073 }, { "epoch": 0.8774796998663789, "grad_norm": 0.7513415217399597, "learning_rate": 3.8852413794456033e-07, "loss": 0.6426, "step": 17074 }, { "epoch": 0.8775310926097235, "grad_norm": 1.1841797828674316, "learning_rate": 3.88202547081965e-07, "loss": 0.6688, "step": 17075 }, { "epoch": 0.8775824853530682, "grad_norm": 1.114106297492981, "learning_rate": 3.878810839932451e-07, "loss": 0.7129, "step": 17076 }, { "epoch": 0.8776338780964128, "grad_norm": 1.1150649785995483, "learning_rate": 3.875597486873067e-07, "loss": 0.6795, "step": 17077 }, { "epoch": 0.8776852708397574, "grad_norm": 1.0625505447387695, "learning_rate": 3.8723854117305505e-07, "loss": 0.6587, "step": 17078 }, { "epoch": 0.8777366635831021, "grad_norm": 1.066200852394104, "learning_rate": 3.869174614593857e-07, "loss": 0.638, "step": 17079 }, { "epoch": 0.8777880563264467, "grad_norm": 1.0165842771530151, "learning_rate": 3.8659650955519714e-07, "loss": 0.6605, "step": 17080 }, { "epoch": 0.8778394490697914, "grad_norm": 0.8071630001068115, "learning_rate": 3.8627568546938056e-07, "loss": 0.6659, "step": 17081 }, { "epoch": 0.877890841813136, "grad_norm": 1.0759340524673462, "learning_rate": 3.859549892108261e-07, "loss": 0.7469, "step": 17082 }, { "epoch": 0.8779422345564806, "grad_norm": 0.786113440990448, "learning_rate": 3.856344207884166e-07, "loss": 0.6353, "step": 17083 }, { "epoch": 0.8779936272998252, "grad_norm": 1.109815239906311, "learning_rate": 3.853139802110356e-07, "loss": 0.6829, "step": 17084 }, { "epoch": 0.8780450200431699, "grad_norm": 1.0783814191818237, "learning_rate": 3.84993667487561e-07, "loss": 0.6328, "step": 17085 }, { "epoch": 0.8780964127865145, "grad_norm": 1.0265936851501465, "learning_rate": 3.846734826268661e-07, "loss": 0.7089, "step": 17086 }, { "epoch": 0.8781478055298592, "grad_norm": 1.0987348556518555, "learning_rate": 3.843534256378223e-07, "loss": 0.6818, "step": 17087 }, { "epoch": 0.8781991982732038, "grad_norm": 1.0983035564422607, "learning_rate": 3.8403349652929856e-07, "loss": 0.6716, "step": 17088 }, { "epoch": 0.8782505910165485, "grad_norm": 1.1450324058532715, "learning_rate": 3.8371369531015726e-07, "loss": 0.7073, "step": 17089 }, { "epoch": 0.8783019837598931, "grad_norm": 1.1366676092147827, "learning_rate": 3.833940219892579e-07, "loss": 0.7047, "step": 17090 }, { "epoch": 0.8783533765032377, "grad_norm": 1.061814785003662, "learning_rate": 3.8307447657546125e-07, "loss": 0.7501, "step": 17091 }, { "epoch": 0.8784047692465824, "grad_norm": 1.2162518501281738, "learning_rate": 3.827550590776152e-07, "loss": 0.7238, "step": 17092 }, { "epoch": 0.878456161989927, "grad_norm": 1.1522462368011475, "learning_rate": 3.8243576950457385e-07, "loss": 0.7244, "step": 17093 }, { "epoch": 0.8785075547332717, "grad_norm": 1.204479455947876, "learning_rate": 3.821166078651817e-07, "loss": 0.6709, "step": 17094 }, { "epoch": 0.8785589474766163, "grad_norm": 1.0347235202789307, "learning_rate": 3.817975741682811e-07, "loss": 0.6616, "step": 17095 }, { "epoch": 0.878610340219961, "grad_norm": 1.080794095993042, "learning_rate": 3.8147866842271066e-07, "loss": 0.7197, "step": 17096 }, { "epoch": 0.8786617329633056, "grad_norm": 0.790163516998291, "learning_rate": 3.811598906373082e-07, "loss": 0.5996, "step": 17097 }, { "epoch": 0.8787131257066502, "grad_norm": 1.1208338737487793, "learning_rate": 3.808412408209039e-07, "loss": 0.6863, "step": 17098 }, { "epoch": 0.8787645184499948, "grad_norm": 0.8032639622688293, "learning_rate": 3.8052271898232574e-07, "loss": 0.6662, "step": 17099 }, { "epoch": 0.8788159111933395, "grad_norm": 1.1396254301071167, "learning_rate": 3.802043251304005e-07, "loss": 0.6935, "step": 17100 }, { "epoch": 0.8788673039366841, "grad_norm": 1.0788861513137817, "learning_rate": 3.798860592739489e-07, "loss": 0.7188, "step": 17101 }, { "epoch": 0.8789186966800288, "grad_norm": 0.6574702262878418, "learning_rate": 3.7956792142178845e-07, "loss": 0.6552, "step": 17102 }, { "epoch": 0.8789700894233734, "grad_norm": 1.0810402631759644, "learning_rate": 3.792499115827325e-07, "loss": 0.7173, "step": 17103 }, { "epoch": 0.879021482166718, "grad_norm": 1.0911890268325806, "learning_rate": 3.789320297655941e-07, "loss": 0.6813, "step": 17104 }, { "epoch": 0.8790728749100627, "grad_norm": 1.0754843950271606, "learning_rate": 3.786142759791783e-07, "loss": 0.683, "step": 17105 }, { "epoch": 0.8791242676534073, "grad_norm": 1.038679599761963, "learning_rate": 3.782966502322899e-07, "loss": 0.6868, "step": 17106 }, { "epoch": 0.879175660396752, "grad_norm": 1.1028778553009033, "learning_rate": 3.779791525337284e-07, "loss": 0.7308, "step": 17107 }, { "epoch": 0.8792270531400966, "grad_norm": 1.2103615999221802, "learning_rate": 3.7766178289229014e-07, "loss": 0.7268, "step": 17108 }, { "epoch": 0.8792784458834413, "grad_norm": 1.0694783926010132, "learning_rate": 3.773445413167698e-07, "loss": 0.711, "step": 17109 }, { "epoch": 0.8793298386267859, "grad_norm": 1.1376357078552246, "learning_rate": 3.770274278159553e-07, "loss": 0.6296, "step": 17110 }, { "epoch": 0.8793812313701306, "grad_norm": 1.0509940385818481, "learning_rate": 3.767104423986329e-07, "loss": 0.72, "step": 17111 }, { "epoch": 0.8794326241134752, "grad_norm": 1.1283636093139648, "learning_rate": 3.763935850735839e-07, "loss": 0.7131, "step": 17112 }, { "epoch": 0.8794840168568199, "grad_norm": 1.086683988571167, "learning_rate": 3.760768558495892e-07, "loss": 0.7486, "step": 17113 }, { "epoch": 0.8795354096001644, "grad_norm": 0.722305178642273, "learning_rate": 3.757602547354233e-07, "loss": 0.6162, "step": 17114 }, { "epoch": 0.879586802343509, "grad_norm": 0.7169036269187927, "learning_rate": 3.7544378173985706e-07, "loss": 0.6856, "step": 17115 }, { "epoch": 0.8796381950868537, "grad_norm": 1.0900222063064575, "learning_rate": 3.7512743687165896e-07, "loss": 0.6827, "step": 17116 }, { "epoch": 0.8796895878301983, "grad_norm": 1.1443791389465332, "learning_rate": 3.7481122013959424e-07, "loss": 0.6717, "step": 17117 }, { "epoch": 0.879740980573543, "grad_norm": 0.9293333888053894, "learning_rate": 3.7449513155242366e-07, "loss": 0.6604, "step": 17118 }, { "epoch": 0.8797923733168876, "grad_norm": 1.1026860475540161, "learning_rate": 3.741791711189041e-07, "loss": 0.695, "step": 17119 }, { "epoch": 0.8798437660602323, "grad_norm": 1.3580210208892822, "learning_rate": 3.738633388477919e-07, "loss": 0.6824, "step": 17120 }, { "epoch": 0.8798951588035769, "grad_norm": 1.1297781467437744, "learning_rate": 3.735476347478334e-07, "loss": 0.7158, "step": 17121 }, { "epoch": 0.8799465515469216, "grad_norm": 1.098528504371643, "learning_rate": 3.732320588277788e-07, "loss": 0.6784, "step": 17122 }, { "epoch": 0.8799979442902662, "grad_norm": 1.134292721748352, "learning_rate": 3.7291661109637003e-07, "loss": 0.668, "step": 17123 }, { "epoch": 0.8800493370336109, "grad_norm": 1.2878371477127075, "learning_rate": 3.7260129156234783e-07, "loss": 0.7027, "step": 17124 }, { "epoch": 0.8801007297769555, "grad_norm": 0.738251268863678, "learning_rate": 3.7228610023444633e-07, "loss": 0.6305, "step": 17125 }, { "epoch": 0.8801521225203002, "grad_norm": 1.0267661809921265, "learning_rate": 3.719710371214003e-07, "loss": 0.6491, "step": 17126 }, { "epoch": 0.8802035152636448, "grad_norm": 1.0673110485076904, "learning_rate": 3.7165610223193814e-07, "loss": 0.6799, "step": 17127 }, { "epoch": 0.8802549080069895, "grad_norm": 1.0275362730026245, "learning_rate": 3.7134129557478473e-07, "loss": 0.6619, "step": 17128 }, { "epoch": 0.880306300750334, "grad_norm": 1.0604251623153687, "learning_rate": 3.7102661715866353e-07, "loss": 0.6816, "step": 17129 }, { "epoch": 0.8803576934936787, "grad_norm": 1.0553553104400635, "learning_rate": 3.7071206699229147e-07, "loss": 0.6749, "step": 17130 }, { "epoch": 0.8804090862370233, "grad_norm": 0.7612324357032776, "learning_rate": 3.7039764508438493e-07, "loss": 0.6552, "step": 17131 }, { "epoch": 0.8804604789803679, "grad_norm": 1.1975078582763672, "learning_rate": 3.7008335144365306e-07, "loss": 0.7274, "step": 17132 }, { "epoch": 0.8805118717237126, "grad_norm": 1.1076902151107788, "learning_rate": 3.6976918607880664e-07, "loss": 0.7138, "step": 17133 }, { "epoch": 0.8805632644670572, "grad_norm": 1.0978312492370605, "learning_rate": 3.694551489985471e-07, "loss": 0.6985, "step": 17134 }, { "epoch": 0.8806146572104019, "grad_norm": 1.1310263872146606, "learning_rate": 3.6914124021157685e-07, "loss": 0.6929, "step": 17135 }, { "epoch": 0.8806660499537465, "grad_norm": 0.6573857665061951, "learning_rate": 3.6882745972659227e-07, "loss": 0.6299, "step": 17136 }, { "epoch": 0.8807174426970912, "grad_norm": 0.7280827760696411, "learning_rate": 3.685138075522859e-07, "loss": 0.6414, "step": 17137 }, { "epoch": 0.8807688354404358, "grad_norm": 1.081909418106079, "learning_rate": 3.6820028369735026e-07, "loss": 0.7212, "step": 17138 }, { "epoch": 0.8808202281837805, "grad_norm": 1.086043357849121, "learning_rate": 3.6788688817047e-07, "loss": 0.7434, "step": 17139 }, { "epoch": 0.8808716209271251, "grad_norm": 1.0470410585403442, "learning_rate": 3.675736209803288e-07, "loss": 0.6161, "step": 17140 }, { "epoch": 0.8809230136704698, "grad_norm": 1.09235417842865, "learning_rate": 3.672604821356052e-07, "loss": 0.6776, "step": 17141 }, { "epoch": 0.8809744064138144, "grad_norm": 1.1200886964797974, "learning_rate": 3.669474716449756e-07, "loss": 0.7197, "step": 17142 }, { "epoch": 0.8810257991571591, "grad_norm": 1.022282361984253, "learning_rate": 3.666345895171125e-07, "loss": 0.6691, "step": 17143 }, { "epoch": 0.8810771919005036, "grad_norm": 0.8025686740875244, "learning_rate": 3.66321835760684e-07, "loss": 0.6521, "step": 17144 }, { "epoch": 0.8811285846438482, "grad_norm": 1.1165648698806763, "learning_rate": 3.6600921038435435e-07, "loss": 0.6811, "step": 17145 }, { "epoch": 0.8811799773871929, "grad_norm": 1.0491769313812256, "learning_rate": 3.656967133967881e-07, "loss": 0.668, "step": 17146 }, { "epoch": 0.8812313701305375, "grad_norm": 0.7947109937667847, "learning_rate": 3.6538434480663963e-07, "loss": 0.6447, "step": 17147 }, { "epoch": 0.8812827628738822, "grad_norm": 1.084948182106018, "learning_rate": 3.650721046225658e-07, "loss": 0.7042, "step": 17148 }, { "epoch": 0.8813341556172268, "grad_norm": 1.0844550132751465, "learning_rate": 3.647599928532164e-07, "loss": 0.7248, "step": 17149 }, { "epoch": 0.8813855483605715, "grad_norm": 0.7221179008483887, "learning_rate": 3.6444800950723836e-07, "loss": 0.6842, "step": 17150 }, { "epoch": 0.8814369411039161, "grad_norm": 1.0798547267913818, "learning_rate": 3.6413615459327755e-07, "loss": 0.687, "step": 17151 }, { "epoch": 0.8814883338472608, "grad_norm": 1.0676544904708862, "learning_rate": 3.638244281199721e-07, "loss": 0.7005, "step": 17152 }, { "epoch": 0.8815397265906054, "grad_norm": 1.1150600910186768, "learning_rate": 3.6351283009596004e-07, "loss": 0.6879, "step": 17153 }, { "epoch": 0.8815911193339501, "grad_norm": 1.0744106769561768, "learning_rate": 3.632013605298723e-07, "loss": 0.6762, "step": 17154 }, { "epoch": 0.8816425120772947, "grad_norm": 0.7658979296684265, "learning_rate": 3.62890019430342e-07, "loss": 0.6196, "step": 17155 }, { "epoch": 0.8816939048206394, "grad_norm": 1.108672022819519, "learning_rate": 3.6257880680599157e-07, "loss": 0.6874, "step": 17156 }, { "epoch": 0.881745297563984, "grad_norm": 0.760303258895874, "learning_rate": 3.622677226654453e-07, "loss": 0.6609, "step": 17157 }, { "epoch": 0.8817966903073287, "grad_norm": 1.0185706615447998, "learning_rate": 3.619567670173213e-07, "loss": 0.7192, "step": 17158 }, { "epoch": 0.8818480830506732, "grad_norm": 1.014692783355713, "learning_rate": 3.6164593987023545e-07, "loss": 0.6503, "step": 17159 }, { "epoch": 0.8818994757940178, "grad_norm": 1.0535719394683838, "learning_rate": 3.613352412327997e-07, "loss": 0.6994, "step": 17160 }, { "epoch": 0.8819508685373625, "grad_norm": 1.0766398906707764, "learning_rate": 3.610246711136206e-07, "loss": 0.7083, "step": 17161 }, { "epoch": 0.8820022612807071, "grad_norm": 1.1554217338562012, "learning_rate": 3.6071422952130563e-07, "loss": 0.6962, "step": 17162 }, { "epoch": 0.8820536540240518, "grad_norm": 1.1262308359146118, "learning_rate": 3.6040391646445293e-07, "loss": 0.7428, "step": 17163 }, { "epoch": 0.8821050467673964, "grad_norm": 1.0671658515930176, "learning_rate": 3.6009373195166176e-07, "loss": 0.7132, "step": 17164 }, { "epoch": 0.8821564395107411, "grad_norm": 1.0519405603408813, "learning_rate": 3.5978367599152576e-07, "loss": 0.6301, "step": 17165 }, { "epoch": 0.8822078322540857, "grad_norm": 1.252974033355713, "learning_rate": 3.594737485926347e-07, "loss": 0.7072, "step": 17166 }, { "epoch": 0.8822592249974304, "grad_norm": 1.0198771953582764, "learning_rate": 3.5916394976357513e-07, "loss": 0.6498, "step": 17167 }, { "epoch": 0.882310617740775, "grad_norm": 0.8625896573066711, "learning_rate": 3.5885427951293125e-07, "loss": 0.6274, "step": 17168 }, { "epoch": 0.8823620104841197, "grad_norm": 1.2059311866760254, "learning_rate": 3.585447378492829e-07, "loss": 0.688, "step": 17169 }, { "epoch": 0.8824134032274643, "grad_norm": 0.7967343926429749, "learning_rate": 3.582353247812048e-07, "loss": 0.6173, "step": 17170 }, { "epoch": 0.882464795970809, "grad_norm": 1.1352158784866333, "learning_rate": 3.579260403172724e-07, "loss": 0.7154, "step": 17171 }, { "epoch": 0.8825161887141536, "grad_norm": 1.1393189430236816, "learning_rate": 3.5761688446605047e-07, "loss": 0.6907, "step": 17172 }, { "epoch": 0.8825675814574983, "grad_norm": 1.172644853591919, "learning_rate": 3.573078572361077e-07, "loss": 0.6973, "step": 17173 }, { "epoch": 0.8826189742008428, "grad_norm": 1.0227291584014893, "learning_rate": 3.5699895863600455e-07, "loss": 0.6784, "step": 17174 }, { "epoch": 0.8826703669441874, "grad_norm": 1.035005807876587, "learning_rate": 3.5669018867430074e-07, "loss": 0.6739, "step": 17175 }, { "epoch": 0.8827217596875321, "grad_norm": 0.857428252696991, "learning_rate": 3.563815473595489e-07, "loss": 0.592, "step": 17176 }, { "epoch": 0.8827731524308767, "grad_norm": 1.0636605024337769, "learning_rate": 3.560730347003016e-07, "loss": 0.6919, "step": 17177 }, { "epoch": 0.8828245451742214, "grad_norm": 0.8680917024612427, "learning_rate": 3.557646507051066e-07, "loss": 0.6863, "step": 17178 }, { "epoch": 0.882875937917566, "grad_norm": 1.1427152156829834, "learning_rate": 3.554563953825063e-07, "loss": 0.7739, "step": 17179 }, { "epoch": 0.8829273306609107, "grad_norm": 1.0131632089614868, "learning_rate": 3.5514826874104347e-07, "loss": 0.6803, "step": 17180 }, { "epoch": 0.8829787234042553, "grad_norm": 1.1011669635772705, "learning_rate": 3.5484027078925343e-07, "loss": 0.6962, "step": 17181 }, { "epoch": 0.8830301161476, "grad_norm": 0.9995465874671936, "learning_rate": 3.545324015356705e-07, "loss": 0.6699, "step": 17182 }, { "epoch": 0.8830815088909446, "grad_norm": 0.7657961845397949, "learning_rate": 3.5422466098882337e-07, "loss": 0.612, "step": 17183 }, { "epoch": 0.8831329016342893, "grad_norm": 1.0281312465667725, "learning_rate": 3.5391704915724026e-07, "loss": 0.7438, "step": 17184 }, { "epoch": 0.8831842943776339, "grad_norm": 0.9848723411560059, "learning_rate": 3.5360956604944096e-07, "loss": 0.6659, "step": 17185 }, { "epoch": 0.8832356871209786, "grad_norm": 0.8695343732833862, "learning_rate": 3.533022116739465e-07, "loss": 0.6334, "step": 17186 }, { "epoch": 0.8832870798643232, "grad_norm": 0.7787792682647705, "learning_rate": 3.529949860392723e-07, "loss": 0.6347, "step": 17187 }, { "epoch": 0.8833384726076678, "grad_norm": 1.1888320446014404, "learning_rate": 3.5268788915392927e-07, "loss": 0.6613, "step": 17188 }, { "epoch": 0.8833898653510124, "grad_norm": 1.0413286685943604, "learning_rate": 3.523809210264273e-07, "loss": 0.6318, "step": 17189 }, { "epoch": 0.883441258094357, "grad_norm": 1.09832763671875, "learning_rate": 3.5207408166527014e-07, "loss": 0.6938, "step": 17190 }, { "epoch": 0.8834926508377017, "grad_norm": 1.105941891670227, "learning_rate": 3.5176737107895985e-07, "loss": 0.7184, "step": 17191 }, { "epoch": 0.8835440435810463, "grad_norm": 1.061026930809021, "learning_rate": 3.5146078927599245e-07, "loss": 0.7236, "step": 17192 }, { "epoch": 0.883595436324391, "grad_norm": 1.2754851579666138, "learning_rate": 3.5115433626486394e-07, "loss": 0.7179, "step": 17193 }, { "epoch": 0.8836468290677356, "grad_norm": 1.118636965751648, "learning_rate": 3.5084801205406414e-07, "loss": 0.697, "step": 17194 }, { "epoch": 0.8836982218110803, "grad_norm": 1.045110821723938, "learning_rate": 3.5054181665207967e-07, "loss": 0.6629, "step": 17195 }, { "epoch": 0.8837496145544249, "grad_norm": 1.0930958986282349, "learning_rate": 3.502357500673942e-07, "loss": 0.7012, "step": 17196 }, { "epoch": 0.8838010072977696, "grad_norm": 1.1034013032913208, "learning_rate": 3.499298123084888e-07, "loss": 0.6638, "step": 17197 }, { "epoch": 0.8838524000411142, "grad_norm": 1.1060434579849243, "learning_rate": 3.496240033838366e-07, "loss": 0.6397, "step": 17198 }, { "epoch": 0.8839037927844589, "grad_norm": 1.0753295421600342, "learning_rate": 3.4931832330191374e-07, "loss": 0.6864, "step": 17199 }, { "epoch": 0.8839551855278035, "grad_norm": 0.7311631441116333, "learning_rate": 3.490127720711878e-07, "loss": 0.6148, "step": 17200 }, { "epoch": 0.8840065782711481, "grad_norm": 0.7730428576469421, "learning_rate": 3.4870734970012363e-07, "loss": 0.6601, "step": 17201 }, { "epoch": 0.8840579710144928, "grad_norm": 1.1114429235458374, "learning_rate": 3.484020561971846e-07, "loss": 0.7105, "step": 17202 }, { "epoch": 0.8841093637578374, "grad_norm": 1.1501654386520386, "learning_rate": 3.4809689157082884e-07, "loss": 0.6854, "step": 17203 }, { "epoch": 0.8841607565011821, "grad_norm": 1.0526442527770996, "learning_rate": 3.4779185582951125e-07, "loss": 0.6718, "step": 17204 }, { "epoch": 0.8842121492445266, "grad_norm": 1.0376602411270142, "learning_rate": 3.474869489816812e-07, "loss": 0.6595, "step": 17205 }, { "epoch": 0.8842635419878713, "grad_norm": 0.7162024974822998, "learning_rate": 3.471821710357892e-07, "loss": 0.6387, "step": 17206 }, { "epoch": 0.8843149347312159, "grad_norm": 1.1031944751739502, "learning_rate": 3.4687752200027847e-07, "loss": 0.6319, "step": 17207 }, { "epoch": 0.8843663274745606, "grad_norm": 1.0819947719573975, "learning_rate": 3.4657300188358887e-07, "loss": 0.7434, "step": 17208 }, { "epoch": 0.8844177202179052, "grad_norm": 1.099614143371582, "learning_rate": 3.462686106941576e-07, "loss": 0.7059, "step": 17209 }, { "epoch": 0.8844691129612499, "grad_norm": 0.7581198811531067, "learning_rate": 3.459643484404185e-07, "loss": 0.6451, "step": 17210 }, { "epoch": 0.8845205057045945, "grad_norm": 1.0846989154815674, "learning_rate": 3.456602151308014e-07, "loss": 0.7023, "step": 17211 }, { "epoch": 0.8845718984479392, "grad_norm": 0.7723881006240845, "learning_rate": 3.453562107737318e-07, "loss": 0.6233, "step": 17212 }, { "epoch": 0.8846232911912838, "grad_norm": 1.0163217782974243, "learning_rate": 3.450523353776347e-07, "loss": 0.6981, "step": 17213 }, { "epoch": 0.8846746839346284, "grad_norm": 1.166298270225525, "learning_rate": 3.4474858895092554e-07, "loss": 0.7331, "step": 17214 }, { "epoch": 0.8847260766779731, "grad_norm": 1.1289334297180176, "learning_rate": 3.4444497150202315e-07, "loss": 0.6679, "step": 17215 }, { "epoch": 0.8847774694213177, "grad_norm": 0.7483891248703003, "learning_rate": 3.4414148303933805e-07, "loss": 0.6432, "step": 17216 }, { "epoch": 0.8848288621646624, "grad_norm": 0.6482483744621277, "learning_rate": 3.438381235712784e-07, "loss": 0.6072, "step": 17217 }, { "epoch": 0.884880254908007, "grad_norm": 0.7421467304229736, "learning_rate": 3.4353489310624935e-07, "loss": 0.6693, "step": 17218 }, { "epoch": 0.8849316476513517, "grad_norm": 1.106765627861023, "learning_rate": 3.4323179165265283e-07, "loss": 0.6869, "step": 17219 }, { "epoch": 0.8849830403946962, "grad_norm": 1.1078455448150635, "learning_rate": 3.429288192188862e-07, "loss": 0.7216, "step": 17220 }, { "epoch": 0.8850344331380409, "grad_norm": 1.230933666229248, "learning_rate": 3.426259758133427e-07, "loss": 0.6991, "step": 17221 }, { "epoch": 0.8850858258813855, "grad_norm": 1.0159106254577637, "learning_rate": 3.4232326144441387e-07, "loss": 0.6507, "step": 17222 }, { "epoch": 0.8851372186247302, "grad_norm": 1.1438149213790894, "learning_rate": 3.420206761204864e-07, "loss": 0.7181, "step": 17223 }, { "epoch": 0.8851886113680748, "grad_norm": 1.0809037685394287, "learning_rate": 3.417182198499441e-07, "loss": 0.6712, "step": 17224 }, { "epoch": 0.8852400041114195, "grad_norm": 1.1030441522598267, "learning_rate": 3.4141589264116537e-07, "loss": 0.7274, "step": 17225 }, { "epoch": 0.8852913968547641, "grad_norm": 1.121673345565796, "learning_rate": 3.4111369450252895e-07, "loss": 0.704, "step": 17226 }, { "epoch": 0.8853427895981087, "grad_norm": 1.067434549331665, "learning_rate": 3.408116254424043e-07, "loss": 0.6818, "step": 17227 }, { "epoch": 0.8853941823414534, "grad_norm": 0.679084062576294, "learning_rate": 3.4050968546916307e-07, "loss": 0.5973, "step": 17228 }, { "epoch": 0.885445575084798, "grad_norm": 1.0908101797103882, "learning_rate": 3.4020787459116967e-07, "loss": 0.7169, "step": 17229 }, { "epoch": 0.8854969678281427, "grad_norm": 1.1170778274536133, "learning_rate": 3.399061928167857e-07, "loss": 0.6537, "step": 17230 }, { "epoch": 0.8855483605714873, "grad_norm": 1.1773847341537476, "learning_rate": 3.396046401543712e-07, "loss": 0.704, "step": 17231 }, { "epoch": 0.885599753314832, "grad_norm": 0.824786901473999, "learning_rate": 3.393032166122789e-07, "loss": 0.6336, "step": 17232 }, { "epoch": 0.8856511460581766, "grad_norm": 1.1812467575073242, "learning_rate": 3.390019221988616e-07, "loss": 0.6963, "step": 17233 }, { "epoch": 0.8857025388015213, "grad_norm": 1.030355453491211, "learning_rate": 3.387007569224654e-07, "loss": 0.6843, "step": 17234 }, { "epoch": 0.8857539315448658, "grad_norm": 1.1622542142868042, "learning_rate": 3.3839972079143635e-07, "loss": 0.6844, "step": 17235 }, { "epoch": 0.8858053242882105, "grad_norm": 1.0862395763397217, "learning_rate": 3.3809881381411224e-07, "loss": 0.6833, "step": 17236 }, { "epoch": 0.8858567170315551, "grad_norm": 1.1335065364837646, "learning_rate": 3.3779803599883257e-07, "loss": 0.6996, "step": 17237 }, { "epoch": 0.8859081097748998, "grad_norm": 1.078650712966919, "learning_rate": 3.374973873539289e-07, "loss": 0.7087, "step": 17238 }, { "epoch": 0.8859595025182444, "grad_norm": 1.1388357877731323, "learning_rate": 3.3719686788773187e-07, "loss": 0.6844, "step": 17239 }, { "epoch": 0.886010895261589, "grad_norm": 1.078320860862732, "learning_rate": 3.368964776085676e-07, "loss": 0.6191, "step": 17240 }, { "epoch": 0.8860622880049337, "grad_norm": 1.1209315061569214, "learning_rate": 3.3659621652475885e-07, "loss": 0.6476, "step": 17241 }, { "epoch": 0.8861136807482783, "grad_norm": 1.1069517135620117, "learning_rate": 3.3629608464462336e-07, "loss": 0.6787, "step": 17242 }, { "epoch": 0.886165073491623, "grad_norm": 1.0445693731307983, "learning_rate": 3.359960819764774e-07, "loss": 0.7287, "step": 17243 }, { "epoch": 0.8862164662349676, "grad_norm": 1.06179678440094, "learning_rate": 3.3569620852863303e-07, "loss": 0.6473, "step": 17244 }, { "epoch": 0.8862678589783123, "grad_norm": 1.0277901887893677, "learning_rate": 3.353964643093988e-07, "loss": 0.6769, "step": 17245 }, { "epoch": 0.8863192517216569, "grad_norm": 1.227080225944519, "learning_rate": 3.350968493270784e-07, "loss": 0.6961, "step": 17246 }, { "epoch": 0.8863706444650016, "grad_norm": 0.7237336039543152, "learning_rate": 3.347973635899726e-07, "loss": 0.645, "step": 17247 }, { "epoch": 0.8864220372083462, "grad_norm": 1.1391007900238037, "learning_rate": 3.344980071063819e-07, "loss": 0.6654, "step": 17248 }, { "epoch": 0.8864734299516909, "grad_norm": 1.0825445652008057, "learning_rate": 3.341987798845958e-07, "loss": 0.7326, "step": 17249 }, { "epoch": 0.8865248226950354, "grad_norm": 1.0735896825790405, "learning_rate": 3.338996819329077e-07, "loss": 0.6699, "step": 17250 }, { "epoch": 0.88657621543838, "grad_norm": 1.126123070716858, "learning_rate": 3.3360071325960307e-07, "loss": 0.6431, "step": 17251 }, { "epoch": 0.8866276081817247, "grad_norm": 0.6752283573150635, "learning_rate": 3.333018738729665e-07, "loss": 0.6071, "step": 17252 }, { "epoch": 0.8866790009250693, "grad_norm": 1.0911318063735962, "learning_rate": 3.330031637812764e-07, "loss": 0.6596, "step": 17253 }, { "epoch": 0.886730393668414, "grad_norm": 1.1691069602966309, "learning_rate": 3.327045829928083e-07, "loss": 0.7034, "step": 17254 }, { "epoch": 0.8867817864117586, "grad_norm": 1.122514247894287, "learning_rate": 3.3240613151583665e-07, "loss": 0.7618, "step": 17255 }, { "epoch": 0.8868331791551033, "grad_norm": 1.0930514335632324, "learning_rate": 3.3210780935862773e-07, "loss": 0.7227, "step": 17256 }, { "epoch": 0.8868845718984479, "grad_norm": 0.7268034815788269, "learning_rate": 3.318096165294493e-07, "loss": 0.5955, "step": 17257 }, { "epoch": 0.8869359646417926, "grad_norm": 1.1669716835021973, "learning_rate": 3.3151155303656144e-07, "loss": 0.706, "step": 17258 }, { "epoch": 0.8869873573851372, "grad_norm": 1.1718838214874268, "learning_rate": 3.312136188882231e-07, "loss": 0.6574, "step": 17259 }, { "epoch": 0.8870387501284819, "grad_norm": 0.831355094909668, "learning_rate": 3.309158140926877e-07, "loss": 0.6441, "step": 17260 }, { "epoch": 0.8870901428718265, "grad_norm": 1.1457546949386597, "learning_rate": 3.3061813865820744e-07, "loss": 0.6401, "step": 17261 }, { "epoch": 0.8871415356151712, "grad_norm": 0.7980967164039612, "learning_rate": 3.303205925930292e-07, "loss": 0.656, "step": 17262 }, { "epoch": 0.8871929283585158, "grad_norm": 1.0373868942260742, "learning_rate": 3.3002317590539623e-07, "loss": 0.6995, "step": 17263 }, { "epoch": 0.8872443211018605, "grad_norm": 1.0957270860671997, "learning_rate": 3.2972588860355035e-07, "loss": 0.6401, "step": 17264 }, { "epoch": 0.887295713845205, "grad_norm": 1.0539743900299072, "learning_rate": 3.2942873069572555e-07, "loss": 0.7054, "step": 17265 }, { "epoch": 0.8873471065885497, "grad_norm": 0.7700062394142151, "learning_rate": 3.291317021901569e-07, "loss": 0.6568, "step": 17266 }, { "epoch": 0.8873984993318943, "grad_norm": 1.053623080253601, "learning_rate": 3.2883480309507274e-07, "loss": 0.6993, "step": 17267 }, { "epoch": 0.8874498920752389, "grad_norm": 1.094169020652771, "learning_rate": 3.2853803341870105e-07, "loss": 0.7444, "step": 17268 }, { "epoch": 0.8875012848185836, "grad_norm": 0.8515596985816956, "learning_rate": 3.2824139316926074e-07, "loss": 0.6505, "step": 17269 }, { "epoch": 0.8875526775619282, "grad_norm": 1.090950846672058, "learning_rate": 3.27944882354973e-07, "loss": 0.6659, "step": 17270 }, { "epoch": 0.8876040703052729, "grad_norm": 1.0787317752838135, "learning_rate": 3.2764850098405245e-07, "loss": 0.6642, "step": 17271 }, { "epoch": 0.8876554630486175, "grad_norm": 0.7060444355010986, "learning_rate": 3.2735224906470906e-07, "loss": 0.6242, "step": 17272 }, { "epoch": 0.8877068557919622, "grad_norm": 1.141175389289856, "learning_rate": 3.2705612660515305e-07, "loss": 0.6913, "step": 17273 }, { "epoch": 0.8877582485353068, "grad_norm": 1.135288953781128, "learning_rate": 3.267601336135878e-07, "loss": 0.7261, "step": 17274 }, { "epoch": 0.8878096412786515, "grad_norm": 1.1385952234268188, "learning_rate": 3.264642700982135e-07, "loss": 0.664, "step": 17275 }, { "epoch": 0.8878610340219961, "grad_norm": 1.092997670173645, "learning_rate": 3.261685360672273e-07, "loss": 0.7433, "step": 17276 }, { "epoch": 0.8879124267653408, "grad_norm": 1.1415539979934692, "learning_rate": 3.2587293152882446e-07, "loss": 0.7035, "step": 17277 }, { "epoch": 0.8879638195086854, "grad_norm": 1.068906307220459, "learning_rate": 3.255774564911923e-07, "loss": 0.7129, "step": 17278 }, { "epoch": 0.8880152122520301, "grad_norm": 1.0996875762939453, "learning_rate": 3.2528211096251926e-07, "loss": 0.665, "step": 17279 }, { "epoch": 0.8880666049953746, "grad_norm": 1.0697276592254639, "learning_rate": 3.2498689495098767e-07, "loss": 0.6782, "step": 17280 }, { "epoch": 0.8881179977387192, "grad_norm": 0.8283963203430176, "learning_rate": 3.24691808464776e-07, "loss": 0.6436, "step": 17281 }, { "epoch": 0.8881693904820639, "grad_norm": 1.0669564008712769, "learning_rate": 3.2439685151206104e-07, "loss": 0.6697, "step": 17282 }, { "epoch": 0.8882207832254085, "grad_norm": 0.6826446056365967, "learning_rate": 3.24102024101014e-07, "loss": 0.6768, "step": 17283 }, { "epoch": 0.8882721759687532, "grad_norm": 0.661725640296936, "learning_rate": 3.2380732623980395e-07, "loss": 0.6234, "step": 17284 }, { "epoch": 0.8883235687120978, "grad_norm": 1.1046956777572632, "learning_rate": 3.235127579365943e-07, "loss": 0.6978, "step": 17285 }, { "epoch": 0.8883749614554425, "grad_norm": 1.2081642150878906, "learning_rate": 3.232183191995486e-07, "loss": 0.6554, "step": 17286 }, { "epoch": 0.8884263541987871, "grad_norm": 1.044550895690918, "learning_rate": 3.2292401003682303e-07, "loss": 0.6809, "step": 17287 }, { "epoch": 0.8884777469421318, "grad_norm": 1.076706886291504, "learning_rate": 3.226298304565717e-07, "loss": 0.6951, "step": 17288 }, { "epoch": 0.8885291396854764, "grad_norm": 1.12276029586792, "learning_rate": 3.2233578046694523e-07, "loss": 0.6735, "step": 17289 }, { "epoch": 0.8885805324288211, "grad_norm": 1.0970485210418701, "learning_rate": 3.2204186007609105e-07, "loss": 0.6897, "step": 17290 }, { "epoch": 0.8886319251721657, "grad_norm": 1.118424654006958, "learning_rate": 3.2174806929215206e-07, "loss": 0.7407, "step": 17291 }, { "epoch": 0.8886833179155104, "grad_norm": 1.053068995475769, "learning_rate": 3.2145440812326845e-07, "loss": 0.6691, "step": 17292 }, { "epoch": 0.888734710658855, "grad_norm": 1.1214486360549927, "learning_rate": 3.211608765775759e-07, "loss": 0.7023, "step": 17293 }, { "epoch": 0.8887861034021997, "grad_norm": 1.0410770177841187, "learning_rate": 3.2086747466320624e-07, "loss": 0.6647, "step": 17294 }, { "epoch": 0.8888374961455443, "grad_norm": 1.104561448097229, "learning_rate": 3.205742023882896e-07, "loss": 0.6612, "step": 17295 }, { "epoch": 0.8888888888888888, "grad_norm": 0.693846583366394, "learning_rate": 3.2028105976095125e-07, "loss": 0.6653, "step": 17296 }, { "epoch": 0.8889402816322335, "grad_norm": 1.1310362815856934, "learning_rate": 3.1998804678931295e-07, "loss": 0.6702, "step": 17297 }, { "epoch": 0.8889916743755781, "grad_norm": 1.1292986869812012, "learning_rate": 3.1969516348149155e-07, "loss": 0.7311, "step": 17298 }, { "epoch": 0.8890430671189228, "grad_norm": 1.0828626155853271, "learning_rate": 3.1940240984560333e-07, "loss": 0.6821, "step": 17299 }, { "epoch": 0.8890944598622674, "grad_norm": 0.7424675226211548, "learning_rate": 3.1910978588975905e-07, "loss": 0.6195, "step": 17300 }, { "epoch": 0.8891458526056121, "grad_norm": 1.0057932138442993, "learning_rate": 3.1881729162206556e-07, "loss": 0.6125, "step": 17301 }, { "epoch": 0.8891972453489567, "grad_norm": 1.1064280271530151, "learning_rate": 3.185249270506258e-07, "loss": 0.768, "step": 17302 }, { "epoch": 0.8892486380923014, "grad_norm": 1.0534429550170898, "learning_rate": 3.182326921835421e-07, "loss": 0.6472, "step": 17303 }, { "epoch": 0.889300030835646, "grad_norm": 1.0426263809204102, "learning_rate": 3.1794058702890983e-07, "loss": 0.7162, "step": 17304 }, { "epoch": 0.8893514235789907, "grad_norm": 1.062767744064331, "learning_rate": 3.176486115948213e-07, "loss": 0.6931, "step": 17305 }, { "epoch": 0.8894028163223353, "grad_norm": 1.0462026596069336, "learning_rate": 3.173567658893684e-07, "loss": 0.6355, "step": 17306 }, { "epoch": 0.88945420906568, "grad_norm": 1.1213829517364502, "learning_rate": 3.1706504992063403e-07, "loss": 0.7193, "step": 17307 }, { "epoch": 0.8895056018090246, "grad_norm": 1.0517857074737549, "learning_rate": 3.1677346369670235e-07, "loss": 0.673, "step": 17308 }, { "epoch": 0.8895569945523693, "grad_norm": 0.8106634020805359, "learning_rate": 3.164820072256514e-07, "loss": 0.6134, "step": 17309 }, { "epoch": 0.8896083872957139, "grad_norm": 1.0553170442581177, "learning_rate": 3.1619068051555624e-07, "loss": 0.709, "step": 17310 }, { "epoch": 0.8896597800390584, "grad_norm": 1.1086674928665161, "learning_rate": 3.1589948357448776e-07, "loss": 0.69, "step": 17311 }, { "epoch": 0.8897111727824031, "grad_norm": 1.1421409845352173, "learning_rate": 3.1560841641051555e-07, "loss": 0.6936, "step": 17312 }, { "epoch": 0.8897625655257477, "grad_norm": 1.3569501638412476, "learning_rate": 3.153174790317026e-07, "loss": 0.6742, "step": 17313 }, { "epoch": 0.8898139582690924, "grad_norm": 1.2307621240615845, "learning_rate": 3.1502667144610867e-07, "loss": 0.6787, "step": 17314 }, { "epoch": 0.889865351012437, "grad_norm": 0.6956360340118408, "learning_rate": 3.1473599366179277e-07, "loss": 0.64, "step": 17315 }, { "epoch": 0.8899167437557817, "grad_norm": 1.0923476219177246, "learning_rate": 3.144454456868079e-07, "loss": 0.6824, "step": 17316 }, { "epoch": 0.8899681364991263, "grad_norm": 0.9958836436271667, "learning_rate": 3.141550275292032e-07, "loss": 0.7347, "step": 17317 }, { "epoch": 0.890019529242471, "grad_norm": 1.0987982749938965, "learning_rate": 3.1386473919702507e-07, "loss": 0.7168, "step": 17318 }, { "epoch": 0.8900709219858156, "grad_norm": 1.1015421152114868, "learning_rate": 3.135745806983176e-07, "loss": 0.6392, "step": 17319 }, { "epoch": 0.8901223147291603, "grad_norm": 1.020914077758789, "learning_rate": 3.1328455204111705e-07, "loss": 0.7061, "step": 17320 }, { "epoch": 0.8901737074725049, "grad_norm": 1.1055606603622437, "learning_rate": 3.129946532334621e-07, "loss": 0.708, "step": 17321 }, { "epoch": 0.8902251002158496, "grad_norm": 1.0513299703598022, "learning_rate": 3.127048842833824e-07, "loss": 0.7093, "step": 17322 }, { "epoch": 0.8902764929591942, "grad_norm": 1.0631810426712036, "learning_rate": 3.124152451989071e-07, "loss": 0.7007, "step": 17323 }, { "epoch": 0.8903278857025388, "grad_norm": 1.0524544715881348, "learning_rate": 3.121257359880608e-07, "loss": 0.7452, "step": 17324 }, { "epoch": 0.8903792784458835, "grad_norm": 0.6523012518882751, "learning_rate": 3.1183635665886504e-07, "loss": 0.6578, "step": 17325 }, { "epoch": 0.890430671189228, "grad_norm": 1.1447899341583252, "learning_rate": 3.115471072193366e-07, "loss": 0.7014, "step": 17326 }, { "epoch": 0.8904820639325727, "grad_norm": 1.0926249027252197, "learning_rate": 3.112579876774896e-07, "loss": 0.644, "step": 17327 }, { "epoch": 0.8905334566759173, "grad_norm": 1.05857253074646, "learning_rate": 3.1096899804133553e-07, "loss": 0.7275, "step": 17328 }, { "epoch": 0.890584849419262, "grad_norm": 1.0685769319534302, "learning_rate": 3.1068013831887847e-07, "loss": 0.6488, "step": 17329 }, { "epoch": 0.8906362421626066, "grad_norm": 0.7496011257171631, "learning_rate": 3.103914085181242e-07, "loss": 0.6485, "step": 17330 }, { "epoch": 0.8906876349059513, "grad_norm": 1.159683108329773, "learning_rate": 3.101028086470698e-07, "loss": 0.7643, "step": 17331 }, { "epoch": 0.8907390276492959, "grad_norm": 1.2198076248168945, "learning_rate": 3.0981433871371367e-07, "loss": 0.7089, "step": 17332 }, { "epoch": 0.8907904203926406, "grad_norm": 1.1778459548950195, "learning_rate": 3.095259987260468e-07, "loss": 0.7314, "step": 17333 }, { "epoch": 0.8908418131359852, "grad_norm": 1.0457491874694824, "learning_rate": 3.092377886920583e-07, "loss": 0.6493, "step": 17334 }, { "epoch": 0.8908932058793299, "grad_norm": 1.1541686058044434, "learning_rate": 3.0894970861973295e-07, "loss": 0.6408, "step": 17335 }, { "epoch": 0.8909445986226745, "grad_norm": 0.7138638496398926, "learning_rate": 3.086617585170515e-07, "loss": 0.6332, "step": 17336 }, { "epoch": 0.8909959913660191, "grad_norm": 1.0447943210601807, "learning_rate": 3.083739383919937e-07, "loss": 0.6963, "step": 17337 }, { "epoch": 0.8910473841093638, "grad_norm": 1.1010397672653198, "learning_rate": 3.0808624825253265e-07, "loss": 0.7216, "step": 17338 }, { "epoch": 0.8910987768527084, "grad_norm": 0.7465928196907043, "learning_rate": 3.0779868810663917e-07, "loss": 0.6351, "step": 17339 }, { "epoch": 0.8911501695960531, "grad_norm": 0.7380722761154175, "learning_rate": 3.0751125796227965e-07, "loss": 0.6614, "step": 17340 }, { "epoch": 0.8912015623393976, "grad_norm": 1.1245673894882202, "learning_rate": 3.0722395782741945e-07, "loss": 0.7219, "step": 17341 }, { "epoch": 0.8912529550827423, "grad_norm": 0.7626135945320129, "learning_rate": 3.0693678771001765e-07, "loss": 0.6608, "step": 17342 }, { "epoch": 0.8913043478260869, "grad_norm": 0.9172635078430176, "learning_rate": 3.066497476180297e-07, "loss": 0.6449, "step": 17343 }, { "epoch": 0.8913557405694316, "grad_norm": 0.771536111831665, "learning_rate": 3.063628375594091e-07, "loss": 0.6464, "step": 17344 }, { "epoch": 0.8914071333127762, "grad_norm": 0.7993407249450684, "learning_rate": 3.0607605754210457e-07, "loss": 0.6547, "step": 17345 }, { "epoch": 0.8914585260561209, "grad_norm": 1.0910483598709106, "learning_rate": 3.057894075740619e-07, "loss": 0.6606, "step": 17346 }, { "epoch": 0.8915099187994655, "grad_norm": 1.0440376996994019, "learning_rate": 3.055028876632227e-07, "loss": 0.6993, "step": 17347 }, { "epoch": 0.8915613115428102, "grad_norm": 1.0500766038894653, "learning_rate": 3.052164978175265e-07, "loss": 0.6839, "step": 17348 }, { "epoch": 0.8916127042861548, "grad_norm": 0.6825932264328003, "learning_rate": 3.049302380449054e-07, "loss": 0.664, "step": 17349 }, { "epoch": 0.8916640970294994, "grad_norm": 1.0863875150680542, "learning_rate": 3.046441083532931e-07, "loss": 0.6512, "step": 17350 }, { "epoch": 0.8917154897728441, "grad_norm": 1.0442588329315186, "learning_rate": 3.043581087506159e-07, "loss": 0.6741, "step": 17351 }, { "epoch": 0.8917668825161887, "grad_norm": 1.3062458038330078, "learning_rate": 3.0407223924479757e-07, "loss": 0.631, "step": 17352 }, { "epoch": 0.8918182752595334, "grad_norm": 0.7802101373672485, "learning_rate": 3.0378649984375784e-07, "loss": 0.7097, "step": 17353 }, { "epoch": 0.891869668002878, "grad_norm": 1.0873173475265503, "learning_rate": 3.035008905554149e-07, "loss": 0.6628, "step": 17354 }, { "epoch": 0.8919210607462227, "grad_norm": 1.1318784952163696, "learning_rate": 3.0321541138768064e-07, "loss": 0.6697, "step": 17355 }, { "epoch": 0.8919724534895672, "grad_norm": 1.121634840965271, "learning_rate": 3.029300623484643e-07, "loss": 0.799, "step": 17356 }, { "epoch": 0.8920238462329119, "grad_norm": 1.04374361038208, "learning_rate": 3.026448434456741e-07, "loss": 0.6791, "step": 17357 }, { "epoch": 0.8920752389762565, "grad_norm": 1.0634995698928833, "learning_rate": 3.023597546872087e-07, "loss": 0.7081, "step": 17358 }, { "epoch": 0.8921266317196012, "grad_norm": 1.1587491035461426, "learning_rate": 3.020747960809689e-07, "loss": 0.7061, "step": 17359 }, { "epoch": 0.8921780244629458, "grad_norm": 1.088513970375061, "learning_rate": 3.017899676348496e-07, "loss": 0.6553, "step": 17360 }, { "epoch": 0.8922294172062905, "grad_norm": 1.0593944787979126, "learning_rate": 3.0150526935674216e-07, "loss": 0.7001, "step": 17361 }, { "epoch": 0.8922808099496351, "grad_norm": 1.0292344093322754, "learning_rate": 3.012207012545332e-07, "loss": 0.6967, "step": 17362 }, { "epoch": 0.8923322026929797, "grad_norm": 0.760574460029602, "learning_rate": 3.009362633361085e-07, "loss": 0.6814, "step": 17363 }, { "epoch": 0.8923835954363244, "grad_norm": 0.6671168208122253, "learning_rate": 3.006519556093479e-07, "loss": 0.5869, "step": 17364 }, { "epoch": 0.892434988179669, "grad_norm": 1.077892780303955, "learning_rate": 3.003677780821285e-07, "loss": 0.6401, "step": 17365 }, { "epoch": 0.8924863809230137, "grad_norm": 1.105648398399353, "learning_rate": 3.000837307623239e-07, "loss": 0.6991, "step": 17366 }, { "epoch": 0.8925377736663583, "grad_norm": 1.10355806350708, "learning_rate": 2.9979981365780397e-07, "loss": 0.6626, "step": 17367 }, { "epoch": 0.892589166409703, "grad_norm": 1.225762128829956, "learning_rate": 2.995160267764341e-07, "loss": 0.6537, "step": 17368 }, { "epoch": 0.8926405591530476, "grad_norm": 1.0736891031265259, "learning_rate": 2.992323701260774e-07, "loss": 0.6986, "step": 17369 }, { "epoch": 0.8926919518963923, "grad_norm": 1.0879831314086914, "learning_rate": 2.989488437145938e-07, "loss": 0.6776, "step": 17370 }, { "epoch": 0.8927433446397369, "grad_norm": 1.0839269161224365, "learning_rate": 2.9866544754983626e-07, "loss": 0.6477, "step": 17371 }, { "epoch": 0.8927947373830815, "grad_norm": 1.1312848329544067, "learning_rate": 2.983821816396587e-07, "loss": 0.7612, "step": 17372 }, { "epoch": 0.8928461301264261, "grad_norm": 1.0883126258850098, "learning_rate": 2.9809904599190865e-07, "loss": 0.7205, "step": 17373 }, { "epoch": 0.8928975228697708, "grad_norm": 1.0878690481185913, "learning_rate": 2.9781604061442927e-07, "loss": 0.6763, "step": 17374 }, { "epoch": 0.8929489156131154, "grad_norm": 1.1662545204162598, "learning_rate": 2.975331655150637e-07, "loss": 0.6928, "step": 17375 }, { "epoch": 0.89300030835646, "grad_norm": 1.0824042558670044, "learning_rate": 2.9725042070164745e-07, "loss": 0.7007, "step": 17376 }, { "epoch": 0.8930517010998047, "grad_norm": 0.6959069967269897, "learning_rate": 2.969678061820164e-07, "loss": 0.6076, "step": 17377 }, { "epoch": 0.8931030938431493, "grad_norm": 1.0717687606811523, "learning_rate": 2.9668532196399704e-07, "loss": 0.6654, "step": 17378 }, { "epoch": 0.893154486586494, "grad_norm": 1.1564409732818604, "learning_rate": 2.9640296805541925e-07, "loss": 0.7075, "step": 17379 }, { "epoch": 0.8932058793298386, "grad_norm": 1.1828687191009521, "learning_rate": 2.961207444641051e-07, "loss": 0.7335, "step": 17380 }, { "epoch": 0.8932572720731833, "grad_norm": 1.1136672496795654, "learning_rate": 2.958386511978728e-07, "loss": 0.6683, "step": 17381 }, { "epoch": 0.8933086648165279, "grad_norm": 1.1237658262252808, "learning_rate": 2.9555668826453774e-07, "loss": 0.688, "step": 17382 }, { "epoch": 0.8933600575598726, "grad_norm": 1.0470094680786133, "learning_rate": 2.952748556719137e-07, "loss": 0.7095, "step": 17383 }, { "epoch": 0.8934114503032172, "grad_norm": 0.6859115958213806, "learning_rate": 2.949931534278083e-07, "loss": 0.6434, "step": 17384 }, { "epoch": 0.8934628430465619, "grad_norm": 1.0506435632705688, "learning_rate": 2.947115815400259e-07, "loss": 0.6776, "step": 17385 }, { "epoch": 0.8935142357899065, "grad_norm": 1.1310999393463135, "learning_rate": 2.9443014001636796e-07, "loss": 0.7016, "step": 17386 }, { "epoch": 0.893565628533251, "grad_norm": 1.1167365312576294, "learning_rate": 2.941488288646316e-07, "loss": 0.6769, "step": 17387 }, { "epoch": 0.8936170212765957, "grad_norm": 1.1728516817092896, "learning_rate": 2.938676480926117e-07, "loss": 0.6583, "step": 17388 }, { "epoch": 0.8936684140199403, "grad_norm": 1.078352451324463, "learning_rate": 2.935865977080987e-07, "loss": 0.6753, "step": 17389 }, { "epoch": 0.893719806763285, "grad_norm": 1.1132880449295044, "learning_rate": 2.933056777188786e-07, "loss": 0.7001, "step": 17390 }, { "epoch": 0.8937711995066296, "grad_norm": 1.0806270837783813, "learning_rate": 2.930248881327341e-07, "loss": 0.6396, "step": 17391 }, { "epoch": 0.8938225922499743, "grad_norm": 1.035416603088379, "learning_rate": 2.927442289574461e-07, "loss": 0.679, "step": 17392 }, { "epoch": 0.8938739849933189, "grad_norm": 1.0838271379470825, "learning_rate": 2.924637002007896e-07, "loss": 0.6865, "step": 17393 }, { "epoch": 0.8939253777366636, "grad_norm": 0.7409743666648865, "learning_rate": 2.9218330187053723e-07, "loss": 0.6674, "step": 17394 }, { "epoch": 0.8939767704800082, "grad_norm": 1.0827436447143555, "learning_rate": 2.919030339744572e-07, "loss": 0.7165, "step": 17395 }, { "epoch": 0.8940281632233529, "grad_norm": 0.7001832723617554, "learning_rate": 2.916228965203155e-07, "loss": 0.6598, "step": 17396 }, { "epoch": 0.8940795559666975, "grad_norm": 1.1202679872512817, "learning_rate": 2.913428895158726e-07, "loss": 0.7093, "step": 17397 }, { "epoch": 0.8941309487100422, "grad_norm": 1.038336992263794, "learning_rate": 2.910630129688863e-07, "loss": 0.736, "step": 17398 }, { "epoch": 0.8941823414533868, "grad_norm": 1.0931763648986816, "learning_rate": 2.90783266887113e-07, "loss": 0.7558, "step": 17399 }, { "epoch": 0.8942337341967315, "grad_norm": 1.1437512636184692, "learning_rate": 2.9050365127829995e-07, "loss": 0.6804, "step": 17400 }, { "epoch": 0.8942851269400761, "grad_norm": 1.1102626323699951, "learning_rate": 2.9022416615019645e-07, "loss": 0.6726, "step": 17401 }, { "epoch": 0.8943365196834206, "grad_norm": 0.6824184656143188, "learning_rate": 2.899448115105452e-07, "loss": 0.6311, "step": 17402 }, { "epoch": 0.8943879124267653, "grad_norm": 0.7354761958122253, "learning_rate": 2.896655873670856e-07, "loss": 0.6558, "step": 17403 }, { "epoch": 0.8944393051701099, "grad_norm": 1.1177719831466675, "learning_rate": 2.8938649372755425e-07, "loss": 0.7151, "step": 17404 }, { "epoch": 0.8944906979134546, "grad_norm": 1.1368799209594727, "learning_rate": 2.891075305996838e-07, "loss": 0.7442, "step": 17405 }, { "epoch": 0.8945420906567992, "grad_norm": 1.176924228668213, "learning_rate": 2.8882869799120305e-07, "loss": 0.6836, "step": 17406 }, { "epoch": 0.8945934834001439, "grad_norm": 1.1300816535949707, "learning_rate": 2.885499959098365e-07, "loss": 0.6808, "step": 17407 }, { "epoch": 0.8946448761434885, "grad_norm": 1.0469435453414917, "learning_rate": 2.8827142436330726e-07, "loss": 0.7138, "step": 17408 }, { "epoch": 0.8946962688868332, "grad_norm": 1.0598068237304688, "learning_rate": 2.8799298335933255e-07, "loss": 0.6859, "step": 17409 }, { "epoch": 0.8947476616301778, "grad_norm": 1.0993692874908447, "learning_rate": 2.877146729056274e-07, "loss": 0.6955, "step": 17410 }, { "epoch": 0.8947990543735225, "grad_norm": 1.1401753425598145, "learning_rate": 2.8743649300990104e-07, "loss": 0.6953, "step": 17411 }, { "epoch": 0.8948504471168671, "grad_norm": 1.0517836809158325, "learning_rate": 2.871584436798636e-07, "loss": 0.7002, "step": 17412 }, { "epoch": 0.8949018398602118, "grad_norm": 1.127001166343689, "learning_rate": 2.868805249232154e-07, "loss": 0.682, "step": 17413 }, { "epoch": 0.8949532326035564, "grad_norm": 1.0674580335617065, "learning_rate": 2.866027367476587e-07, "loss": 0.651, "step": 17414 }, { "epoch": 0.8950046253469011, "grad_norm": 1.0983985662460327, "learning_rate": 2.86325079160889e-07, "loss": 0.7076, "step": 17415 }, { "epoch": 0.8950560180902457, "grad_norm": 1.1478139162063599, "learning_rate": 2.8604755217059853e-07, "loss": 0.7278, "step": 17416 }, { "epoch": 0.8951074108335902, "grad_norm": 0.7941725254058838, "learning_rate": 2.8577015578447774e-07, "loss": 0.6176, "step": 17417 }, { "epoch": 0.8951588035769349, "grad_norm": 0.7267408967018127, "learning_rate": 2.854928900102116e-07, "loss": 0.6702, "step": 17418 }, { "epoch": 0.8952101963202795, "grad_norm": 0.7024105191230774, "learning_rate": 2.852157548554818e-07, "loss": 0.6089, "step": 17419 }, { "epoch": 0.8952615890636242, "grad_norm": 1.1136268377304077, "learning_rate": 2.8493875032796547e-07, "loss": 0.7164, "step": 17420 }, { "epoch": 0.8953129818069688, "grad_norm": 1.101357340812683, "learning_rate": 2.846618764353404e-07, "loss": 0.7242, "step": 17421 }, { "epoch": 0.8953643745503135, "grad_norm": 1.1436948776245117, "learning_rate": 2.8438513318527426e-07, "loss": 0.6694, "step": 17422 }, { "epoch": 0.8954157672936581, "grad_norm": 1.0445762872695923, "learning_rate": 2.8410852058543656e-07, "loss": 0.674, "step": 17423 }, { "epoch": 0.8954671600370028, "grad_norm": 1.0948631763458252, "learning_rate": 2.838320386434895e-07, "loss": 0.6276, "step": 17424 }, { "epoch": 0.8955185527803474, "grad_norm": 1.0978718996047974, "learning_rate": 2.8355568736709524e-07, "loss": 0.7145, "step": 17425 }, { "epoch": 0.8955699455236921, "grad_norm": 1.0621562004089355, "learning_rate": 2.8327946676390884e-07, "loss": 0.7117, "step": 17426 }, { "epoch": 0.8956213382670367, "grad_norm": 1.1699554920196533, "learning_rate": 2.830033768415835e-07, "loss": 0.6862, "step": 17427 }, { "epoch": 0.8956727310103814, "grad_norm": 0.6954039335250854, "learning_rate": 2.827274176077699e-07, "loss": 0.6233, "step": 17428 }, { "epoch": 0.895724123753726, "grad_norm": 1.0798120498657227, "learning_rate": 2.824515890701107e-07, "loss": 0.6781, "step": 17429 }, { "epoch": 0.8957755164970707, "grad_norm": 0.7310761213302612, "learning_rate": 2.82175891236251e-07, "loss": 0.6765, "step": 17430 }, { "epoch": 0.8958269092404153, "grad_norm": 1.0690207481384277, "learning_rate": 2.8190032411382795e-07, "loss": 0.7012, "step": 17431 }, { "epoch": 0.8958783019837598, "grad_norm": 1.1398894786834717, "learning_rate": 2.8162488771047604e-07, "loss": 0.6625, "step": 17432 }, { "epoch": 0.8959296947271045, "grad_norm": 1.1216741800308228, "learning_rate": 2.813495820338269e-07, "loss": 0.7135, "step": 17433 }, { "epoch": 0.8959810874704491, "grad_norm": 0.895706295967102, "learning_rate": 2.8107440709150837e-07, "loss": 0.6564, "step": 17434 }, { "epoch": 0.8960324802137938, "grad_norm": 1.2029927968978882, "learning_rate": 2.807993628911443e-07, "loss": 0.7077, "step": 17435 }, { "epoch": 0.8960838729571384, "grad_norm": 1.173804521560669, "learning_rate": 2.805244494403542e-07, "loss": 0.6985, "step": 17436 }, { "epoch": 0.8961352657004831, "grad_norm": 1.109657883644104, "learning_rate": 2.8024966674675636e-07, "loss": 0.6731, "step": 17437 }, { "epoch": 0.8961866584438277, "grad_norm": 1.0929486751556396, "learning_rate": 2.7997501481796253e-07, "loss": 0.7252, "step": 17438 }, { "epoch": 0.8962380511871724, "grad_norm": 1.126557469367981, "learning_rate": 2.797004936615827e-07, "loss": 0.6925, "step": 17439 }, { "epoch": 0.896289443930517, "grad_norm": 1.048191785812378, "learning_rate": 2.794261032852219e-07, "loss": 0.6795, "step": 17440 }, { "epoch": 0.8963408366738617, "grad_norm": 1.1220759153366089, "learning_rate": 2.791518436964846e-07, "loss": 0.6437, "step": 17441 }, { "epoch": 0.8963922294172063, "grad_norm": 1.0785547494888306, "learning_rate": 2.78877714902967e-07, "loss": 0.6876, "step": 17442 }, { "epoch": 0.896443622160551, "grad_norm": 0.664884626865387, "learning_rate": 2.786037169122652e-07, "loss": 0.6377, "step": 17443 }, { "epoch": 0.8964950149038956, "grad_norm": 1.0685147047042847, "learning_rate": 2.7832984973197084e-07, "loss": 0.6792, "step": 17444 }, { "epoch": 0.8965464076472403, "grad_norm": 1.152539610862732, "learning_rate": 2.7805611336967077e-07, "loss": 0.7028, "step": 17445 }, { "epoch": 0.8965978003905849, "grad_norm": 1.1342029571533203, "learning_rate": 2.777825078329488e-07, "loss": 0.6706, "step": 17446 }, { "epoch": 0.8966491931339294, "grad_norm": 1.1150037050247192, "learning_rate": 2.775090331293867e-07, "loss": 0.6691, "step": 17447 }, { "epoch": 0.8967005858772741, "grad_norm": 1.0736795663833618, "learning_rate": 2.772356892665612e-07, "loss": 0.6861, "step": 17448 }, { "epoch": 0.8967519786206187, "grad_norm": 1.1482558250427246, "learning_rate": 2.7696247625204397e-07, "loss": 0.6929, "step": 17449 }, { "epoch": 0.8968033713639634, "grad_norm": 0.6963222622871399, "learning_rate": 2.7668939409340677e-07, "loss": 0.6597, "step": 17450 }, { "epoch": 0.896854764107308, "grad_norm": 1.0902416706085205, "learning_rate": 2.7641644279821356e-07, "loss": 0.6753, "step": 17451 }, { "epoch": 0.8969061568506527, "grad_norm": 1.0697299242019653, "learning_rate": 2.7614362237402825e-07, "loss": 0.6434, "step": 17452 }, { "epoch": 0.8969575495939973, "grad_norm": 1.2497276067733765, "learning_rate": 2.758709328284093e-07, "loss": 0.6828, "step": 17453 }, { "epoch": 0.897008942337342, "grad_norm": 0.7118691205978394, "learning_rate": 2.755983741689111e-07, "loss": 0.6071, "step": 17454 }, { "epoch": 0.8970603350806866, "grad_norm": 1.0583820343017578, "learning_rate": 2.7532594640308496e-07, "loss": 0.6873, "step": 17455 }, { "epoch": 0.8971117278240313, "grad_norm": 1.100954294204712, "learning_rate": 2.750536495384798e-07, "loss": 0.6892, "step": 17456 }, { "epoch": 0.8971631205673759, "grad_norm": 1.1245986223220825, "learning_rate": 2.7478148358263956e-07, "loss": 0.7124, "step": 17457 }, { "epoch": 0.8972145133107206, "grad_norm": 0.8061022758483887, "learning_rate": 2.7450944854310323e-07, "loss": 0.6273, "step": 17458 }, { "epoch": 0.8972659060540652, "grad_norm": 1.0604676008224487, "learning_rate": 2.742375444274104e-07, "loss": 0.6666, "step": 17459 }, { "epoch": 0.8973172987974098, "grad_norm": 1.0862524509429932, "learning_rate": 2.739657712430932e-07, "loss": 0.681, "step": 17460 }, { "epoch": 0.8973686915407545, "grad_norm": 0.732409656047821, "learning_rate": 2.736941289976813e-07, "loss": 0.6236, "step": 17461 }, { "epoch": 0.8974200842840991, "grad_norm": 1.0655211210250854, "learning_rate": 2.7342261769869984e-07, "loss": 0.6367, "step": 17462 }, { "epoch": 0.8974714770274437, "grad_norm": 1.0893183946609497, "learning_rate": 2.7315123735367376e-07, "loss": 0.7372, "step": 17463 }, { "epoch": 0.8975228697707883, "grad_norm": 1.0969533920288086, "learning_rate": 2.7287998797011885e-07, "loss": 0.6763, "step": 17464 }, { "epoch": 0.897574262514133, "grad_norm": 1.0983387231826782, "learning_rate": 2.726088695555523e-07, "loss": 0.7383, "step": 17465 }, { "epoch": 0.8976256552574776, "grad_norm": 1.101660132408142, "learning_rate": 2.723378821174855e-07, "loss": 0.6732, "step": 17466 }, { "epoch": 0.8976770480008223, "grad_norm": 1.1684496402740479, "learning_rate": 2.7206702566342504e-07, "loss": 0.6641, "step": 17467 }, { "epoch": 0.8977284407441669, "grad_norm": 0.637290894985199, "learning_rate": 2.717963002008772e-07, "loss": 0.6731, "step": 17468 }, { "epoch": 0.8977798334875116, "grad_norm": 1.0875515937805176, "learning_rate": 2.715257057373416e-07, "loss": 0.7013, "step": 17469 }, { "epoch": 0.8978312262308562, "grad_norm": 0.6846639513969421, "learning_rate": 2.712552422803161e-07, "loss": 0.6243, "step": 17470 }, { "epoch": 0.8978826189742009, "grad_norm": 0.7346695065498352, "learning_rate": 2.709849098372919e-07, "loss": 0.6454, "step": 17471 }, { "epoch": 0.8979340117175455, "grad_norm": 1.1032160520553589, "learning_rate": 2.707147084157613e-07, "loss": 0.6511, "step": 17472 }, { "epoch": 0.8979854044608901, "grad_norm": 1.036848783493042, "learning_rate": 2.704446380232095e-07, "loss": 0.6478, "step": 17473 }, { "epoch": 0.8980367972042348, "grad_norm": 1.024643063545227, "learning_rate": 2.7017469866711943e-07, "loss": 0.6535, "step": 17474 }, { "epoch": 0.8980881899475794, "grad_norm": 1.10590660572052, "learning_rate": 2.699048903549684e-07, "loss": 0.7101, "step": 17475 }, { "epoch": 0.8981395826909241, "grad_norm": 1.0675209760665894, "learning_rate": 2.6963521309423424e-07, "loss": 0.6177, "step": 17476 }, { "epoch": 0.8981909754342687, "grad_norm": 1.039973497390747, "learning_rate": 2.693656668923866e-07, "loss": 0.6704, "step": 17477 }, { "epoch": 0.8982423681776133, "grad_norm": 1.1084675788879395, "learning_rate": 2.6909625175689403e-07, "loss": 0.6838, "step": 17478 }, { "epoch": 0.8982937609209579, "grad_norm": 0.7855969071388245, "learning_rate": 2.688269676952221e-07, "loss": 0.6471, "step": 17479 }, { "epoch": 0.8983451536643026, "grad_norm": 1.2746654748916626, "learning_rate": 2.6855781471482876e-07, "loss": 0.6899, "step": 17480 }, { "epoch": 0.8983965464076472, "grad_norm": 1.088821530342102, "learning_rate": 2.6828879282317424e-07, "loss": 0.6392, "step": 17481 }, { "epoch": 0.8984479391509919, "grad_norm": 1.2113670110702515, "learning_rate": 2.680199020277102e-07, "loss": 0.776, "step": 17482 }, { "epoch": 0.8984993318943365, "grad_norm": 1.0478318929672241, "learning_rate": 2.6775114233588696e-07, "loss": 0.7157, "step": 17483 }, { "epoch": 0.8985507246376812, "grad_norm": 0.7268092036247253, "learning_rate": 2.6748251375515065e-07, "loss": 0.6763, "step": 17484 }, { "epoch": 0.8986021173810258, "grad_norm": 0.7095730900764465, "learning_rate": 2.6721401629294373e-07, "loss": 0.6687, "step": 17485 }, { "epoch": 0.8986535101243704, "grad_norm": 0.8008632659912109, "learning_rate": 2.669456499567058e-07, "loss": 0.6591, "step": 17486 }, { "epoch": 0.8987049028677151, "grad_norm": 1.0375851392745972, "learning_rate": 2.6667741475387144e-07, "loss": 0.6689, "step": 17487 }, { "epoch": 0.8987562956110597, "grad_norm": 0.7210838794708252, "learning_rate": 2.664093106918725e-07, "loss": 0.6735, "step": 17488 }, { "epoch": 0.8988076883544044, "grad_norm": 1.0483851432800293, "learning_rate": 2.66141337778138e-07, "loss": 0.7108, "step": 17489 }, { "epoch": 0.898859081097749, "grad_norm": 1.1259170770645142, "learning_rate": 2.6587349602009095e-07, "loss": 0.7455, "step": 17490 }, { "epoch": 0.8989104738410937, "grad_norm": 1.042736291885376, "learning_rate": 2.656057854251526e-07, "loss": 0.6843, "step": 17491 }, { "epoch": 0.8989618665844383, "grad_norm": 1.0363774299621582, "learning_rate": 2.653382060007409e-07, "loss": 0.7176, "step": 17492 }, { "epoch": 0.8990132593277829, "grad_norm": 0.7217842936515808, "learning_rate": 2.650707577542677e-07, "loss": 0.5943, "step": 17493 }, { "epoch": 0.8990646520711275, "grad_norm": 1.0505690574645996, "learning_rate": 2.648034406931449e-07, "loss": 0.7204, "step": 17494 }, { "epoch": 0.8991160448144722, "grad_norm": 1.1260002851486206, "learning_rate": 2.645362548247776e-07, "loss": 0.7355, "step": 17495 }, { "epoch": 0.8991674375578168, "grad_norm": 0.8638566732406616, "learning_rate": 2.642692001565683e-07, "loss": 0.6529, "step": 17496 }, { "epoch": 0.8992188303011615, "grad_norm": 1.1522737741470337, "learning_rate": 2.6400227669591547e-07, "loss": 0.7245, "step": 17497 }, { "epoch": 0.8992702230445061, "grad_norm": 1.0978188514709473, "learning_rate": 2.637354844502166e-07, "loss": 0.7045, "step": 17498 }, { "epoch": 0.8993216157878507, "grad_norm": 1.102148413658142, "learning_rate": 2.634688234268612e-07, "loss": 0.6682, "step": 17499 }, { "epoch": 0.8993730085311954, "grad_norm": 1.1625529527664185, "learning_rate": 2.632022936332385e-07, "loss": 0.703, "step": 17500 }, { "epoch": 0.89942440127454, "grad_norm": 1.1003016233444214, "learning_rate": 2.6293589507673243e-07, "loss": 0.7322, "step": 17501 }, { "epoch": 0.8994757940178847, "grad_norm": 0.7062330842018127, "learning_rate": 2.626696277647245e-07, "loss": 0.6137, "step": 17502 }, { "epoch": 0.8995271867612293, "grad_norm": 0.6347520351409912, "learning_rate": 2.624034917045909e-07, "loss": 0.7068, "step": 17503 }, { "epoch": 0.899578579504574, "grad_norm": 0.6669344902038574, "learning_rate": 2.621374869037052e-07, "loss": 0.6828, "step": 17504 }, { "epoch": 0.8996299722479186, "grad_norm": 1.1745636463165283, "learning_rate": 2.618716133694393e-07, "loss": 0.6928, "step": 17505 }, { "epoch": 0.8996813649912633, "grad_norm": 1.1013671159744263, "learning_rate": 2.6160587110915626e-07, "loss": 0.6903, "step": 17506 }, { "epoch": 0.8997327577346079, "grad_norm": 1.1720566749572754, "learning_rate": 2.613402601302212e-07, "loss": 0.6909, "step": 17507 }, { "epoch": 0.8997841504779525, "grad_norm": 1.0640180110931396, "learning_rate": 2.6107478043999213e-07, "loss": 0.6217, "step": 17508 }, { "epoch": 0.8998355432212971, "grad_norm": 1.1407326459884644, "learning_rate": 2.608094320458243e-07, "loss": 0.6271, "step": 17509 }, { "epoch": 0.8998869359646418, "grad_norm": 1.2122441530227661, "learning_rate": 2.6054421495506963e-07, "loss": 0.6891, "step": 17510 }, { "epoch": 0.8999383287079864, "grad_norm": 1.0725260972976685, "learning_rate": 2.602791291750767e-07, "loss": 0.7182, "step": 17511 }, { "epoch": 0.899989721451331, "grad_norm": 0.7054963111877441, "learning_rate": 2.60014174713189e-07, "loss": 0.6274, "step": 17512 }, { "epoch": 0.9000411141946757, "grad_norm": 1.1932792663574219, "learning_rate": 2.597493515767474e-07, "loss": 0.6956, "step": 17513 }, { "epoch": 0.9000925069380203, "grad_norm": 1.0281811952590942, "learning_rate": 2.5948465977309047e-07, "loss": 0.6684, "step": 17514 }, { "epoch": 0.900143899681365, "grad_norm": 1.1068369150161743, "learning_rate": 2.592200993095495e-07, "loss": 0.7083, "step": 17515 }, { "epoch": 0.9001952924247096, "grad_norm": 1.0526407957077026, "learning_rate": 2.589556701934559e-07, "loss": 0.7109, "step": 17516 }, { "epoch": 0.9002466851680543, "grad_norm": 1.0962176322937012, "learning_rate": 2.586913724321349e-07, "loss": 0.6545, "step": 17517 }, { "epoch": 0.9002980779113989, "grad_norm": 1.1656428575515747, "learning_rate": 2.584272060329107e-07, "loss": 0.7366, "step": 17518 }, { "epoch": 0.9003494706547436, "grad_norm": 1.1307579278945923, "learning_rate": 2.581631710031013e-07, "loss": 0.6816, "step": 17519 }, { "epoch": 0.9004008633980882, "grad_norm": 0.9736624956130981, "learning_rate": 2.5789926735002137e-07, "loss": 0.6494, "step": 17520 }, { "epoch": 0.9004522561414329, "grad_norm": 1.0942718982696533, "learning_rate": 2.576354950809845e-07, "loss": 0.7304, "step": 17521 }, { "epoch": 0.9005036488847775, "grad_norm": 1.0813865661621094, "learning_rate": 2.5737185420329604e-07, "loss": 0.7095, "step": 17522 }, { "epoch": 0.900555041628122, "grad_norm": 1.0433142185211182, "learning_rate": 2.571083447242628e-07, "loss": 0.6947, "step": 17523 }, { "epoch": 0.9006064343714667, "grad_norm": 1.2292811870574951, "learning_rate": 2.568449666511846e-07, "loss": 0.7057, "step": 17524 }, { "epoch": 0.9006578271148113, "grad_norm": 1.1177713871002197, "learning_rate": 2.565817199913584e-07, "loss": 0.6524, "step": 17525 }, { "epoch": 0.900709219858156, "grad_norm": 1.1463268995285034, "learning_rate": 2.563186047520766e-07, "loss": 0.6729, "step": 17526 }, { "epoch": 0.9007606126015006, "grad_norm": 1.084991693496704, "learning_rate": 2.5605562094063175e-07, "loss": 0.6614, "step": 17527 }, { "epoch": 0.9008120053448453, "grad_norm": 1.038216233253479, "learning_rate": 2.5579276856430804e-07, "loss": 0.6621, "step": 17528 }, { "epoch": 0.9008633980881899, "grad_norm": 1.148944616317749, "learning_rate": 2.5553004763038794e-07, "loss": 0.72, "step": 17529 }, { "epoch": 0.9009147908315346, "grad_norm": 1.0615544319152832, "learning_rate": 2.552674581461523e-07, "loss": 0.7131, "step": 17530 }, { "epoch": 0.9009661835748792, "grad_norm": 1.047399878501892, "learning_rate": 2.5500500011887364e-07, "loss": 0.6985, "step": 17531 }, { "epoch": 0.9010175763182239, "grad_norm": 0.6706374883651733, "learning_rate": 2.547426735558256e-07, "loss": 0.6488, "step": 17532 }, { "epoch": 0.9010689690615685, "grad_norm": 1.1104978322982788, "learning_rate": 2.5448047846427514e-07, "loss": 0.6666, "step": 17533 }, { "epoch": 0.9011203618049132, "grad_norm": 1.0795434713363647, "learning_rate": 2.542184148514881e-07, "loss": 0.7017, "step": 17534 }, { "epoch": 0.9011717545482578, "grad_norm": 1.070583462715149, "learning_rate": 2.5395648272472317e-07, "loss": 0.7094, "step": 17535 }, { "epoch": 0.9012231472916025, "grad_norm": 1.2641292810440063, "learning_rate": 2.536946820912384e-07, "loss": 0.6791, "step": 17536 }, { "epoch": 0.9012745400349471, "grad_norm": 1.1463603973388672, "learning_rate": 2.5343301295828795e-07, "loss": 0.7449, "step": 17537 }, { "epoch": 0.9013259327782918, "grad_norm": 1.107149600982666, "learning_rate": 2.5317147533311936e-07, "loss": 0.7314, "step": 17538 }, { "epoch": 0.9013773255216363, "grad_norm": 1.133347511291504, "learning_rate": 2.5291006922298136e-07, "loss": 0.6955, "step": 17539 }, { "epoch": 0.9014287182649809, "grad_norm": 1.1084579229354858, "learning_rate": 2.526487946351147e-07, "loss": 0.7171, "step": 17540 }, { "epoch": 0.9014801110083256, "grad_norm": 1.060705542564392, "learning_rate": 2.5238765157675924e-07, "loss": 0.7278, "step": 17541 }, { "epoch": 0.9015315037516702, "grad_norm": 1.055263876914978, "learning_rate": 2.521266400551492e-07, "loss": 0.6789, "step": 17542 }, { "epoch": 0.9015828964950149, "grad_norm": 1.0538804531097412, "learning_rate": 2.518657600775176e-07, "loss": 0.7049, "step": 17543 }, { "epoch": 0.9016342892383595, "grad_norm": 0.8037257790565491, "learning_rate": 2.5160501165108984e-07, "loss": 0.6265, "step": 17544 }, { "epoch": 0.9016856819817042, "grad_norm": 0.7324722409248352, "learning_rate": 2.513443947830929e-07, "loss": 0.6477, "step": 17545 }, { "epoch": 0.9017370747250488, "grad_norm": 1.1232225894927979, "learning_rate": 2.5108390948074603e-07, "loss": 0.6465, "step": 17546 }, { "epoch": 0.9017884674683935, "grad_norm": 1.0749430656433105, "learning_rate": 2.508235557512667e-07, "loss": 0.6529, "step": 17547 }, { "epoch": 0.9018398602117381, "grad_norm": 1.0457696914672852, "learning_rate": 2.50563333601867e-07, "loss": 0.6793, "step": 17548 }, { "epoch": 0.9018912529550828, "grad_norm": 1.1819281578063965, "learning_rate": 2.5030324303975784e-07, "loss": 0.7068, "step": 17549 }, { "epoch": 0.9019426456984274, "grad_norm": 1.0802580118179321, "learning_rate": 2.5004328407214564e-07, "loss": 0.6806, "step": 17550 }, { "epoch": 0.9019940384417721, "grad_norm": 1.0565345287322998, "learning_rate": 2.497834567062313e-07, "loss": 0.6683, "step": 17551 }, { "epoch": 0.9020454311851167, "grad_norm": 1.0929385423660278, "learning_rate": 2.495237609492146e-07, "loss": 0.6901, "step": 17552 }, { "epoch": 0.9020968239284614, "grad_norm": 1.1595039367675781, "learning_rate": 2.4926419680829095e-07, "loss": 0.6942, "step": 17553 }, { "epoch": 0.9021482166718059, "grad_norm": 1.161083459854126, "learning_rate": 2.4900476429065125e-07, "loss": 0.7011, "step": 17554 }, { "epoch": 0.9021996094151505, "grad_norm": 0.7919915318489075, "learning_rate": 2.487454634034825e-07, "loss": 0.6477, "step": 17555 }, { "epoch": 0.9022510021584952, "grad_norm": 1.0947983264923096, "learning_rate": 2.484862941539712e-07, "loss": 0.6962, "step": 17556 }, { "epoch": 0.9023023949018398, "grad_norm": 1.1308001279830933, "learning_rate": 2.4822725654929493e-07, "loss": 0.7396, "step": 17557 }, { "epoch": 0.9023537876451845, "grad_norm": 1.1335115432739258, "learning_rate": 2.479683505966324e-07, "loss": 0.6828, "step": 17558 }, { "epoch": 0.9024051803885291, "grad_norm": 0.7448621392250061, "learning_rate": 2.4770957630315673e-07, "loss": 0.6477, "step": 17559 }, { "epoch": 0.9024565731318738, "grad_norm": 1.0525184869766235, "learning_rate": 2.474509336760361e-07, "loss": 0.6839, "step": 17560 }, { "epoch": 0.9025079658752184, "grad_norm": 1.0095527172088623, "learning_rate": 2.4719242272243814e-07, "loss": 0.6964, "step": 17561 }, { "epoch": 0.9025593586185631, "grad_norm": 1.0868803262710571, "learning_rate": 2.469340434495249e-07, "loss": 0.7057, "step": 17562 }, { "epoch": 0.9026107513619077, "grad_norm": 1.071757435798645, "learning_rate": 2.4667579586445443e-07, "loss": 0.6829, "step": 17563 }, { "epoch": 0.9026621441052524, "grad_norm": 1.1874746084213257, "learning_rate": 2.464176799743812e-07, "loss": 0.6933, "step": 17564 }, { "epoch": 0.902713536848597, "grad_norm": 1.072497844696045, "learning_rate": 2.461596957864576e-07, "loss": 0.6324, "step": 17565 }, { "epoch": 0.9027649295919417, "grad_norm": 1.4241782426834106, "learning_rate": 2.459018433078314e-07, "loss": 0.6884, "step": 17566 }, { "epoch": 0.9028163223352863, "grad_norm": 1.0346876382827759, "learning_rate": 2.4564412254564575e-07, "loss": 0.7117, "step": 17567 }, { "epoch": 0.902867715078631, "grad_norm": 1.1888883113861084, "learning_rate": 2.4538653350704046e-07, "loss": 0.6793, "step": 17568 }, { "epoch": 0.9029191078219755, "grad_norm": 0.7513110041618347, "learning_rate": 2.451290761991543e-07, "loss": 0.6316, "step": 17569 }, { "epoch": 0.9029705005653201, "grad_norm": 0.7497759461402893, "learning_rate": 2.4487175062911926e-07, "loss": 0.6597, "step": 17570 }, { "epoch": 0.9030218933086648, "grad_norm": 1.0527058839797974, "learning_rate": 2.4461455680406363e-07, "loss": 0.6738, "step": 17571 }, { "epoch": 0.9030732860520094, "grad_norm": 1.0991442203521729, "learning_rate": 2.4435749473111613e-07, "loss": 0.6607, "step": 17572 }, { "epoch": 0.9031246787953541, "grad_norm": 1.0166374444961548, "learning_rate": 2.441005644173955e-07, "loss": 0.6571, "step": 17573 }, { "epoch": 0.9031760715386987, "grad_norm": 1.0188210010528564, "learning_rate": 2.438437658700227e-07, "loss": 0.6701, "step": 17574 }, { "epoch": 0.9032274642820434, "grad_norm": 1.0827404260635376, "learning_rate": 2.4358709909611144e-07, "loss": 0.701, "step": 17575 }, { "epoch": 0.903278857025388, "grad_norm": 1.0952612161636353, "learning_rate": 2.4333056410277276e-07, "loss": 0.6859, "step": 17576 }, { "epoch": 0.9033302497687327, "grad_norm": 1.0821459293365479, "learning_rate": 2.430741608971143e-07, "loss": 0.7334, "step": 17577 }, { "epoch": 0.9033816425120773, "grad_norm": 1.0059535503387451, "learning_rate": 2.42817889486241e-07, "loss": 0.7098, "step": 17578 }, { "epoch": 0.903433035255422, "grad_norm": 0.711694061756134, "learning_rate": 2.4256174987725203e-07, "loss": 0.648, "step": 17579 }, { "epoch": 0.9034844279987666, "grad_norm": 1.06101393699646, "learning_rate": 2.4230574207724344e-07, "loss": 0.7155, "step": 17580 }, { "epoch": 0.9035358207421113, "grad_norm": 1.069606900215149, "learning_rate": 2.420498660933096e-07, "loss": 0.6617, "step": 17581 }, { "epoch": 0.9035872134854559, "grad_norm": 1.1617414951324463, "learning_rate": 2.4179412193253925e-07, "loss": 0.7332, "step": 17582 }, { "epoch": 0.9036386062288005, "grad_norm": 1.1796826124191284, "learning_rate": 2.415385096020173e-07, "loss": 0.6709, "step": 17583 }, { "epoch": 0.9036899989721451, "grad_norm": 1.1939054727554321, "learning_rate": 2.412830291088264e-07, "loss": 0.6892, "step": 17584 }, { "epoch": 0.9037413917154897, "grad_norm": 1.090199589729309, "learning_rate": 2.4102768046004534e-07, "loss": 0.6956, "step": 17585 }, { "epoch": 0.9037927844588344, "grad_norm": 1.2293330430984497, "learning_rate": 2.407724636627473e-07, "loss": 0.7121, "step": 17586 }, { "epoch": 0.903844177202179, "grad_norm": 0.6924625039100647, "learning_rate": 2.405173787240045e-07, "loss": 0.6323, "step": 17587 }, { "epoch": 0.9038955699455237, "grad_norm": 1.0404231548309326, "learning_rate": 2.4026242565088396e-07, "loss": 0.7147, "step": 17588 }, { "epoch": 0.9039469626888683, "grad_norm": 1.1004329919815063, "learning_rate": 2.4000760445044845e-07, "loss": 0.6618, "step": 17589 }, { "epoch": 0.903998355432213, "grad_norm": 1.0404647588729858, "learning_rate": 2.397529151297595e-07, "loss": 0.6641, "step": 17590 }, { "epoch": 0.9040497481755576, "grad_norm": 0.7179667353630066, "learning_rate": 2.3949835769587316e-07, "loss": 0.6424, "step": 17591 }, { "epoch": 0.9041011409189023, "grad_norm": 1.1021227836608887, "learning_rate": 2.392439321558421e-07, "loss": 0.7239, "step": 17592 }, { "epoch": 0.9041525336622469, "grad_norm": 1.0678948163986206, "learning_rate": 2.389896385167145e-07, "loss": 0.7369, "step": 17593 }, { "epoch": 0.9042039264055916, "grad_norm": 1.1009504795074463, "learning_rate": 2.3873547678553654e-07, "loss": 0.6711, "step": 17594 }, { "epoch": 0.9042553191489362, "grad_norm": 1.0780911445617676, "learning_rate": 2.384814469693508e-07, "loss": 0.7129, "step": 17595 }, { "epoch": 0.9043067118922808, "grad_norm": 3.6993801593780518, "learning_rate": 2.3822754907519396e-07, "loss": 0.6512, "step": 17596 }, { "epoch": 0.9043581046356255, "grad_norm": 1.0272951126098633, "learning_rate": 2.3797378311010032e-07, "loss": 0.686, "step": 17597 }, { "epoch": 0.9044094973789701, "grad_norm": 1.0487034320831299, "learning_rate": 2.377201490811032e-07, "loss": 0.7008, "step": 17598 }, { "epoch": 0.9044608901223147, "grad_norm": 1.1117587089538574, "learning_rate": 2.3746664699522637e-07, "loss": 0.6846, "step": 17599 }, { "epoch": 0.9045122828656593, "grad_norm": 1.1480002403259277, "learning_rate": 2.3721327685949536e-07, "loss": 0.7135, "step": 17600 }, { "epoch": 0.904563675609004, "grad_norm": 1.1156461238861084, "learning_rate": 2.3696003868093008e-07, "loss": 0.6864, "step": 17601 }, { "epoch": 0.9046150683523486, "grad_norm": 1.243971586227417, "learning_rate": 2.3670693246654553e-07, "loss": 0.6934, "step": 17602 }, { "epoch": 0.9046664610956933, "grad_norm": 1.1333811283111572, "learning_rate": 2.3645395822335548e-07, "loss": 0.6973, "step": 17603 }, { "epoch": 0.9047178538390379, "grad_norm": 1.0605882406234741, "learning_rate": 2.3620111595836826e-07, "loss": 0.6887, "step": 17604 }, { "epoch": 0.9047692465823826, "grad_norm": 1.0705245733261108, "learning_rate": 2.3594840567858878e-07, "loss": 0.6629, "step": 17605 }, { "epoch": 0.9048206393257272, "grad_norm": 0.739646315574646, "learning_rate": 2.356958273910187e-07, "loss": 0.6576, "step": 17606 }, { "epoch": 0.9048720320690719, "grad_norm": 1.1915605068206787, "learning_rate": 2.3544338110265685e-07, "loss": 0.7338, "step": 17607 }, { "epoch": 0.9049234248124165, "grad_norm": 1.0127440690994263, "learning_rate": 2.3519106682049597e-07, "loss": 0.6327, "step": 17608 }, { "epoch": 0.9049748175557611, "grad_norm": 1.1401029825210571, "learning_rate": 2.349388845515277e-07, "loss": 0.7122, "step": 17609 }, { "epoch": 0.9050262102991058, "grad_norm": 0.7195321917533875, "learning_rate": 2.3468683430273865e-07, "loss": 0.6326, "step": 17610 }, { "epoch": 0.9050776030424504, "grad_norm": 1.0024667978286743, "learning_rate": 2.3443491608111212e-07, "loss": 0.6751, "step": 17611 }, { "epoch": 0.9051289957857951, "grad_norm": 1.102150797843933, "learning_rate": 2.3418312989362812e-07, "loss": 0.6674, "step": 17612 }, { "epoch": 0.9051803885291397, "grad_norm": 1.0790152549743652, "learning_rate": 2.339314757472616e-07, "loss": 0.6958, "step": 17613 }, { "epoch": 0.9052317812724843, "grad_norm": 1.0946900844573975, "learning_rate": 2.3367995364898643e-07, "loss": 0.7233, "step": 17614 }, { "epoch": 0.9052831740158289, "grad_norm": 1.199942708015442, "learning_rate": 2.3342856360576927e-07, "loss": 0.7061, "step": 17615 }, { "epoch": 0.9053345667591736, "grad_norm": 1.0425904989242554, "learning_rate": 2.3317730562457676e-07, "loss": 0.6991, "step": 17616 }, { "epoch": 0.9053859595025182, "grad_norm": 1.1337605714797974, "learning_rate": 2.3292617971236942e-07, "loss": 0.7351, "step": 17617 }, { "epoch": 0.9054373522458629, "grad_norm": 1.0822337865829468, "learning_rate": 2.3267518587610504e-07, "loss": 0.675, "step": 17618 }, { "epoch": 0.9054887449892075, "grad_norm": 1.1378329992294312, "learning_rate": 2.324243241227375e-07, "loss": 0.7095, "step": 17619 }, { "epoch": 0.9055401377325522, "grad_norm": 1.0523250102996826, "learning_rate": 2.3217359445921738e-07, "loss": 0.6914, "step": 17620 }, { "epoch": 0.9055915304758968, "grad_norm": 1.1309000253677368, "learning_rate": 2.3192299689249187e-07, "loss": 0.6765, "step": 17621 }, { "epoch": 0.9056429232192414, "grad_norm": 0.7675410509109497, "learning_rate": 2.3167253142950265e-07, "loss": 0.6302, "step": 17622 }, { "epoch": 0.9056943159625861, "grad_norm": 0.6988844871520996, "learning_rate": 2.314221980771908e-07, "loss": 0.6648, "step": 17623 }, { "epoch": 0.9057457087059307, "grad_norm": 1.072287678718567, "learning_rate": 2.3117199684249024e-07, "loss": 0.7482, "step": 17624 }, { "epoch": 0.9057971014492754, "grad_norm": 1.0469870567321777, "learning_rate": 2.309219277323338e-07, "loss": 0.7341, "step": 17625 }, { "epoch": 0.90584849419262, "grad_norm": 1.1441148519515991, "learning_rate": 2.3067199075364977e-07, "loss": 0.7115, "step": 17626 }, { "epoch": 0.9058998869359647, "grad_norm": 1.1056292057037354, "learning_rate": 2.304221859133643e-07, "loss": 0.728, "step": 17627 }, { "epoch": 0.9059512796793093, "grad_norm": 1.1033676862716675, "learning_rate": 2.3017251321839573e-07, "loss": 0.7147, "step": 17628 }, { "epoch": 0.906002672422654, "grad_norm": 1.1317777633666992, "learning_rate": 2.299229726756641e-07, "loss": 0.7327, "step": 17629 }, { "epoch": 0.9060540651659985, "grad_norm": 0.8893811106681824, "learning_rate": 2.296735642920811e-07, "loss": 0.6015, "step": 17630 }, { "epoch": 0.9061054579093432, "grad_norm": 1.0364857912063599, "learning_rate": 2.2942428807455787e-07, "loss": 0.6612, "step": 17631 }, { "epoch": 0.9061568506526878, "grad_norm": 0.8596569299697876, "learning_rate": 2.291751440300005e-07, "loss": 0.6318, "step": 17632 }, { "epoch": 0.9062082433960325, "grad_norm": 1.11715567111969, "learning_rate": 2.2892613216531245e-07, "loss": 0.7318, "step": 17633 }, { "epoch": 0.9062596361393771, "grad_norm": 1.2381598949432373, "learning_rate": 2.2867725248739204e-07, "loss": 0.6975, "step": 17634 }, { "epoch": 0.9063110288827217, "grad_norm": 1.1477817296981812, "learning_rate": 2.2842850500313375e-07, "loss": 0.6779, "step": 17635 }, { "epoch": 0.9063624216260664, "grad_norm": 1.0620248317718506, "learning_rate": 2.281798897194326e-07, "loss": 0.6825, "step": 17636 }, { "epoch": 0.906413814369411, "grad_norm": 1.1184896230697632, "learning_rate": 2.279314066431726e-07, "loss": 0.721, "step": 17637 }, { "epoch": 0.9064652071127557, "grad_norm": 1.133842945098877, "learning_rate": 2.2768305578124094e-07, "loss": 0.6855, "step": 17638 }, { "epoch": 0.9065165998561003, "grad_norm": 1.1051045656204224, "learning_rate": 2.2743483714051773e-07, "loss": 0.6454, "step": 17639 }, { "epoch": 0.906567992599445, "grad_norm": 1.0730048418045044, "learning_rate": 2.271867507278791e-07, "loss": 0.6709, "step": 17640 }, { "epoch": 0.9066193853427896, "grad_norm": 1.114850640296936, "learning_rate": 2.2693879655020067e-07, "loss": 0.7345, "step": 17641 }, { "epoch": 0.9066707780861343, "grad_norm": 1.1023399829864502, "learning_rate": 2.266909746143503e-07, "loss": 0.6861, "step": 17642 }, { "epoch": 0.9067221708294789, "grad_norm": 1.1586685180664062, "learning_rate": 2.264432849271947e-07, "loss": 0.7853, "step": 17643 }, { "epoch": 0.9067735635728236, "grad_norm": 0.6670803427696228, "learning_rate": 2.2619572749559616e-07, "loss": 0.5806, "step": 17644 }, { "epoch": 0.9068249563161681, "grad_norm": 1.0671591758728027, "learning_rate": 2.259483023264142e-07, "loss": 0.7052, "step": 17645 }, { "epoch": 0.9068763490595128, "grad_norm": 1.0895864963531494, "learning_rate": 2.257010094265033e-07, "loss": 0.699, "step": 17646 }, { "epoch": 0.9069277418028574, "grad_norm": 1.0388092994689941, "learning_rate": 2.2545384880271526e-07, "loss": 0.6396, "step": 17647 }, { "epoch": 0.906979134546202, "grad_norm": 1.1465235948562622, "learning_rate": 2.252068204618968e-07, "loss": 0.6874, "step": 17648 }, { "epoch": 0.9070305272895467, "grad_norm": 1.1031323671340942, "learning_rate": 2.2495992441089408e-07, "loss": 0.739, "step": 17649 }, { "epoch": 0.9070819200328913, "grad_norm": 1.0973830223083496, "learning_rate": 2.247131606565456e-07, "loss": 0.7616, "step": 17650 }, { "epoch": 0.907133312776236, "grad_norm": 1.1110271215438843, "learning_rate": 2.2446652920568912e-07, "loss": 0.7284, "step": 17651 }, { "epoch": 0.9071847055195806, "grad_norm": 0.7484662532806396, "learning_rate": 2.2422003006515812e-07, "loss": 0.6462, "step": 17652 }, { "epoch": 0.9072360982629253, "grad_norm": 0.7035232186317444, "learning_rate": 2.2397366324178049e-07, "loss": 0.6542, "step": 17653 }, { "epoch": 0.9072874910062699, "grad_norm": 1.0518049001693726, "learning_rate": 2.2372742874238406e-07, "loss": 0.7262, "step": 17654 }, { "epoch": 0.9073388837496146, "grad_norm": 0.7808499932289124, "learning_rate": 2.2348132657379008e-07, "loss": 0.6693, "step": 17655 }, { "epoch": 0.9073902764929592, "grad_norm": 1.089228630065918, "learning_rate": 2.2323535674281748e-07, "loss": 0.7012, "step": 17656 }, { "epoch": 0.9074416692363039, "grad_norm": 1.0767018795013428, "learning_rate": 2.229895192562792e-07, "loss": 0.6955, "step": 17657 }, { "epoch": 0.9074930619796485, "grad_norm": 1.1553523540496826, "learning_rate": 2.2274381412098866e-07, "loss": 0.7309, "step": 17658 }, { "epoch": 0.9075444547229932, "grad_norm": 1.2027223110198975, "learning_rate": 2.2249824134375264e-07, "loss": 0.681, "step": 17659 }, { "epoch": 0.9075958474663377, "grad_norm": 1.060681939125061, "learning_rate": 2.2225280093137457e-07, "loss": 0.7304, "step": 17660 }, { "epoch": 0.9076472402096823, "grad_norm": 0.7268868684768677, "learning_rate": 2.2200749289065405e-07, "loss": 0.6632, "step": 17661 }, { "epoch": 0.907698632953027, "grad_norm": 1.1161484718322754, "learning_rate": 2.2176231722838947e-07, "loss": 0.6604, "step": 17662 }, { "epoch": 0.9077500256963716, "grad_norm": 0.7811411023139954, "learning_rate": 2.2151727395137213e-07, "loss": 0.6444, "step": 17663 }, { "epoch": 0.9078014184397163, "grad_norm": 1.0133854150772095, "learning_rate": 2.2127236306639043e-07, "loss": 0.6589, "step": 17664 }, { "epoch": 0.9078528111830609, "grad_norm": 1.115313172340393, "learning_rate": 2.2102758458023288e-07, "loss": 0.7179, "step": 17665 }, { "epoch": 0.9079042039264056, "grad_norm": 0.7578287124633789, "learning_rate": 2.207829384996779e-07, "loss": 0.6047, "step": 17666 }, { "epoch": 0.9079555966697502, "grad_norm": 0.7083166837692261, "learning_rate": 2.2053842483150513e-07, "loss": 0.6708, "step": 17667 }, { "epoch": 0.9080069894130949, "grad_norm": 1.0738131999969482, "learning_rate": 2.2029404358248963e-07, "loss": 0.6785, "step": 17668 }, { "epoch": 0.9080583821564395, "grad_norm": 1.0604991912841797, "learning_rate": 2.2004979475940102e-07, "loss": 0.7095, "step": 17669 }, { "epoch": 0.9081097748997842, "grad_norm": 1.0235544443130493, "learning_rate": 2.1980567836900612e-07, "loss": 0.6533, "step": 17670 }, { "epoch": 0.9081611676431288, "grad_norm": 1.0896077156066895, "learning_rate": 2.1956169441807007e-07, "loss": 0.685, "step": 17671 }, { "epoch": 0.9082125603864735, "grad_norm": 1.1579216718673706, "learning_rate": 2.193178429133519e-07, "loss": 0.7168, "step": 17672 }, { "epoch": 0.9082639531298181, "grad_norm": 1.1074789762496948, "learning_rate": 2.1907412386160675e-07, "loss": 0.7014, "step": 17673 }, { "epoch": 0.9083153458731628, "grad_norm": 1.0487475395202637, "learning_rate": 2.188305372695887e-07, "loss": 0.6941, "step": 17674 }, { "epoch": 0.9083667386165073, "grad_norm": 1.0870540142059326, "learning_rate": 2.1858708314404564e-07, "loss": 0.7055, "step": 17675 }, { "epoch": 0.9084181313598519, "grad_norm": 1.1733434200286865, "learning_rate": 2.1834376149172332e-07, "loss": 0.7008, "step": 17676 }, { "epoch": 0.9084695241031966, "grad_norm": 1.145087480545044, "learning_rate": 2.1810057231936132e-07, "loss": 0.6924, "step": 17677 }, { "epoch": 0.9085209168465412, "grad_norm": 1.0450141429901123, "learning_rate": 2.178575156337004e-07, "loss": 0.6385, "step": 17678 }, { "epoch": 0.9085723095898859, "grad_norm": 1.0399049520492554, "learning_rate": 2.1761459144147122e-07, "loss": 0.6529, "step": 17679 }, { "epoch": 0.9086237023332305, "grad_norm": 1.1304844617843628, "learning_rate": 2.173717997494068e-07, "loss": 0.7142, "step": 17680 }, { "epoch": 0.9086750950765752, "grad_norm": 1.0297542810440063, "learning_rate": 2.1712914056423339e-07, "loss": 0.6957, "step": 17681 }, { "epoch": 0.9087264878199198, "grad_norm": 1.1754237413406372, "learning_rate": 2.1688661389267286e-07, "loss": 0.7333, "step": 17682 }, { "epoch": 0.9087778805632645, "grad_norm": 1.07144033908844, "learning_rate": 2.166442197414459e-07, "loss": 0.6882, "step": 17683 }, { "epoch": 0.9088292733066091, "grad_norm": 1.0926289558410645, "learning_rate": 2.1640195811726828e-07, "loss": 0.7352, "step": 17684 }, { "epoch": 0.9088806660499538, "grad_norm": 1.04472815990448, "learning_rate": 2.1615982902685183e-07, "loss": 0.6658, "step": 17685 }, { "epoch": 0.9089320587932984, "grad_norm": 1.1104344129562378, "learning_rate": 2.1591783247690347e-07, "loss": 0.6567, "step": 17686 }, { "epoch": 0.9089834515366431, "grad_norm": 0.9975858926773071, "learning_rate": 2.1567596847413108e-07, "loss": 0.6493, "step": 17687 }, { "epoch": 0.9090348442799877, "grad_norm": 1.1546063423156738, "learning_rate": 2.1543423702523214e-07, "loss": 0.7152, "step": 17688 }, { "epoch": 0.9090862370233324, "grad_norm": 1.0524946451187134, "learning_rate": 2.1519263813690683e-07, "loss": 0.7139, "step": 17689 }, { "epoch": 0.9091376297666769, "grad_norm": 1.0565636157989502, "learning_rate": 2.14951171815847e-07, "loss": 0.7233, "step": 17690 }, { "epoch": 0.9091890225100215, "grad_norm": 1.0292373895645142, "learning_rate": 2.1470983806874402e-07, "loss": 0.703, "step": 17691 }, { "epoch": 0.9092404152533662, "grad_norm": 1.064986228942871, "learning_rate": 2.144686369022836e-07, "loss": 0.69, "step": 17692 }, { "epoch": 0.9092918079967108, "grad_norm": 0.7384403347969055, "learning_rate": 2.1422756832314873e-07, "loss": 0.6815, "step": 17693 }, { "epoch": 0.9093432007400555, "grad_norm": 1.149746298789978, "learning_rate": 2.1398663233801798e-07, "loss": 0.6969, "step": 17694 }, { "epoch": 0.9093945934834001, "grad_norm": 1.0494534969329834, "learning_rate": 2.1374582895356656e-07, "loss": 0.6655, "step": 17695 }, { "epoch": 0.9094459862267448, "grad_norm": 1.1233456134796143, "learning_rate": 2.135051581764669e-07, "loss": 0.7122, "step": 17696 }, { "epoch": 0.9094973789700894, "grad_norm": 1.1172221899032593, "learning_rate": 2.1326462001338698e-07, "loss": 0.7224, "step": 17697 }, { "epoch": 0.9095487717134341, "grad_norm": 1.131014108657837, "learning_rate": 2.1302421447099042e-07, "loss": 0.6785, "step": 17698 }, { "epoch": 0.9096001644567787, "grad_norm": 0.7532368302345276, "learning_rate": 2.1278394155593796e-07, "loss": 0.6683, "step": 17699 }, { "epoch": 0.9096515572001234, "grad_norm": 1.1580088138580322, "learning_rate": 2.125438012748876e-07, "loss": 0.6971, "step": 17700 }, { "epoch": 0.909702949943468, "grad_norm": 0.7308835387229919, "learning_rate": 2.123037936344907e-07, "loss": 0.671, "step": 17701 }, { "epoch": 0.9097543426868127, "grad_norm": 1.0930366516113281, "learning_rate": 2.1206391864139863e-07, "loss": 0.6377, "step": 17702 }, { "epoch": 0.9098057354301573, "grad_norm": 1.067945957183838, "learning_rate": 2.1182417630225715e-07, "loss": 0.7089, "step": 17703 }, { "epoch": 0.909857128173502, "grad_norm": 1.0786175727844238, "learning_rate": 2.1158456662370708e-07, "loss": 0.6979, "step": 17704 }, { "epoch": 0.9099085209168465, "grad_norm": 0.9157930612564087, "learning_rate": 2.1134508961238864e-07, "loss": 0.6594, "step": 17705 }, { "epoch": 0.9099599136601911, "grad_norm": 1.1287634372711182, "learning_rate": 2.11105745274936e-07, "loss": 0.6862, "step": 17706 }, { "epoch": 0.9100113064035358, "grad_norm": 1.0697829723358154, "learning_rate": 2.1086653361798103e-07, "loss": 0.7419, "step": 17707 }, { "epoch": 0.9100626991468804, "grad_norm": 1.0793765783309937, "learning_rate": 2.1062745464815014e-07, "loss": 0.6835, "step": 17708 }, { "epoch": 0.9101140918902251, "grad_norm": 1.0769258737564087, "learning_rate": 2.103885083720686e-07, "loss": 0.6873, "step": 17709 }, { "epoch": 0.9101654846335697, "grad_norm": 1.1262171268463135, "learning_rate": 2.101496947963555e-07, "loss": 0.7213, "step": 17710 }, { "epoch": 0.9102168773769144, "grad_norm": 1.0951290130615234, "learning_rate": 2.0991101392762781e-07, "loss": 0.6967, "step": 17711 }, { "epoch": 0.910268270120259, "grad_norm": 1.0736819505691528, "learning_rate": 2.09672465772498e-07, "loss": 0.6898, "step": 17712 }, { "epoch": 0.9103196628636037, "grad_norm": 1.1122201681137085, "learning_rate": 2.0943405033757634e-07, "loss": 0.6705, "step": 17713 }, { "epoch": 0.9103710556069483, "grad_norm": 1.1191297769546509, "learning_rate": 2.0919576762946758e-07, "loss": 0.7433, "step": 17714 }, { "epoch": 0.910422448350293, "grad_norm": 1.0796895027160645, "learning_rate": 2.0895761765477306e-07, "loss": 0.6825, "step": 17715 }, { "epoch": 0.9104738410936376, "grad_norm": 1.08818781375885, "learning_rate": 2.0871960042009254e-07, "loss": 0.7101, "step": 17716 }, { "epoch": 0.9105252338369823, "grad_norm": 1.1995338201522827, "learning_rate": 2.084817159320185e-07, "loss": 0.6683, "step": 17717 }, { "epoch": 0.9105766265803269, "grad_norm": 1.1802960634231567, "learning_rate": 2.0824396419714287e-07, "loss": 0.7788, "step": 17718 }, { "epoch": 0.9106280193236715, "grad_norm": 1.0783082246780396, "learning_rate": 2.0800634522205265e-07, "loss": 0.6783, "step": 17719 }, { "epoch": 0.9106794120670162, "grad_norm": 1.0899839401245117, "learning_rate": 2.0776885901333198e-07, "loss": 0.7086, "step": 17720 }, { "epoch": 0.9107308048103607, "grad_norm": 1.0576839447021484, "learning_rate": 2.075315055775584e-07, "loss": 0.7077, "step": 17721 }, { "epoch": 0.9107821975537054, "grad_norm": 0.774390459060669, "learning_rate": 2.0729428492131054e-07, "loss": 0.6612, "step": 17722 }, { "epoch": 0.91083359029705, "grad_norm": 1.103029489517212, "learning_rate": 2.0705719705115923e-07, "loss": 0.6882, "step": 17723 }, { "epoch": 0.9108849830403947, "grad_norm": 1.1453486680984497, "learning_rate": 2.0682024197367369e-07, "loss": 0.7754, "step": 17724 }, { "epoch": 0.9109363757837393, "grad_norm": 1.0678218603134155, "learning_rate": 2.065834196954192e-07, "loss": 0.6912, "step": 17725 }, { "epoch": 0.910987768527084, "grad_norm": 1.1938998699188232, "learning_rate": 2.0634673022295658e-07, "loss": 0.6873, "step": 17726 }, { "epoch": 0.9110391612704286, "grad_norm": 1.1752475500106812, "learning_rate": 2.0611017356284458e-07, "loss": 0.7194, "step": 17727 }, { "epoch": 0.9110905540137733, "grad_norm": 1.0744637250900269, "learning_rate": 2.058737497216351e-07, "loss": 0.7082, "step": 17728 }, { "epoch": 0.9111419467571179, "grad_norm": 1.134577989578247, "learning_rate": 2.0563745870588126e-07, "loss": 0.7246, "step": 17729 }, { "epoch": 0.9111933395004626, "grad_norm": 1.091447353363037, "learning_rate": 2.0540130052212726e-07, "loss": 0.7129, "step": 17730 }, { "epoch": 0.9112447322438072, "grad_norm": 1.077867031097412, "learning_rate": 2.0516527517691786e-07, "loss": 0.7092, "step": 17731 }, { "epoch": 0.9112961249871518, "grad_norm": 0.7471120953559875, "learning_rate": 2.049293826767912e-07, "loss": 0.6073, "step": 17732 }, { "epoch": 0.9113475177304965, "grad_norm": 1.0988645553588867, "learning_rate": 2.0469362302828254e-07, "loss": 0.7275, "step": 17733 }, { "epoch": 0.9113989104738411, "grad_norm": 1.1255877017974854, "learning_rate": 2.0445799623792563e-07, "loss": 0.714, "step": 17734 }, { "epoch": 0.9114503032171858, "grad_norm": 1.0762730836868286, "learning_rate": 2.0422250231224626e-07, "loss": 0.7668, "step": 17735 }, { "epoch": 0.9115016959605303, "grad_norm": 0.6903653740882874, "learning_rate": 2.0398714125777208e-07, "loss": 0.6411, "step": 17736 }, { "epoch": 0.911553088703875, "grad_norm": 1.15397047996521, "learning_rate": 2.0375191308102115e-07, "loss": 0.6814, "step": 17737 }, { "epoch": 0.9116044814472196, "grad_norm": 0.996845006942749, "learning_rate": 2.035168177885122e-07, "loss": 0.6779, "step": 17738 }, { "epoch": 0.9116558741905643, "grad_norm": 1.0848451852798462, "learning_rate": 2.0328185538675827e-07, "loss": 0.7232, "step": 17739 }, { "epoch": 0.9117072669339089, "grad_norm": 1.0763123035430908, "learning_rate": 2.0304702588226922e-07, "loss": 0.6744, "step": 17740 }, { "epoch": 0.9117586596772536, "grad_norm": 1.039921760559082, "learning_rate": 2.0281232928155092e-07, "loss": 0.6636, "step": 17741 }, { "epoch": 0.9118100524205982, "grad_norm": 1.1046186685562134, "learning_rate": 2.0257776559110655e-07, "loss": 0.666, "step": 17742 }, { "epoch": 0.9118614451639429, "grad_norm": 0.7246026992797852, "learning_rate": 2.0234333481743417e-07, "loss": 0.7017, "step": 17743 }, { "epoch": 0.9119128379072875, "grad_norm": 1.0738811492919922, "learning_rate": 2.0210903696702977e-07, "loss": 0.6968, "step": 17744 }, { "epoch": 0.9119642306506321, "grad_norm": 1.1218420267105103, "learning_rate": 2.0187487204638367e-07, "loss": 0.7006, "step": 17745 }, { "epoch": 0.9120156233939768, "grad_norm": 1.013877511024475, "learning_rate": 2.0164084006198405e-07, "loss": 0.6611, "step": 17746 }, { "epoch": 0.9120670161373214, "grad_norm": 1.1758811473846436, "learning_rate": 2.014069410203151e-07, "loss": 0.7399, "step": 17747 }, { "epoch": 0.9121184088806661, "grad_norm": 1.1796398162841797, "learning_rate": 2.0117317492785783e-07, "loss": 0.6768, "step": 17748 }, { "epoch": 0.9121698016240107, "grad_norm": 1.0896128416061401, "learning_rate": 2.009395417910881e-07, "loss": 0.6752, "step": 17749 }, { "epoch": 0.9122211943673554, "grad_norm": 1.1138525009155273, "learning_rate": 2.0070604161647856e-07, "loss": 0.7005, "step": 17750 }, { "epoch": 0.9122725871106999, "grad_norm": 1.0697894096374512, "learning_rate": 2.0047267441049957e-07, "loss": 0.7039, "step": 17751 }, { "epoch": 0.9123239798540446, "grad_norm": 1.1171438694000244, "learning_rate": 2.0023944017961594e-07, "loss": 0.7016, "step": 17752 }, { "epoch": 0.9123753725973892, "grad_norm": 1.0748374462127686, "learning_rate": 2.0000633893029087e-07, "loss": 0.6646, "step": 17753 }, { "epoch": 0.9124267653407339, "grad_norm": 0.6407703757286072, "learning_rate": 1.9977337066898029e-07, "loss": 0.6729, "step": 17754 }, { "epoch": 0.9124781580840785, "grad_norm": 1.0660828351974487, "learning_rate": 1.9954053540214123e-07, "loss": 0.7267, "step": 17755 }, { "epoch": 0.9125295508274232, "grad_norm": 1.1067396402359009, "learning_rate": 1.993078331362236e-07, "loss": 0.6487, "step": 17756 }, { "epoch": 0.9125809435707678, "grad_norm": 1.0695616006851196, "learning_rate": 1.9907526387767384e-07, "loss": 0.6973, "step": 17757 }, { "epoch": 0.9126323363141124, "grad_norm": 0.8864750862121582, "learning_rate": 1.9884282763293794e-07, "loss": 0.6197, "step": 17758 }, { "epoch": 0.9126837290574571, "grad_norm": 1.024201512336731, "learning_rate": 1.9861052440845241e-07, "loss": 0.6704, "step": 17759 }, { "epoch": 0.9127351218008017, "grad_norm": 1.0804156064987183, "learning_rate": 1.98378354210656e-07, "loss": 0.6893, "step": 17760 }, { "epoch": 0.9127865145441464, "grad_norm": 1.0635980367660522, "learning_rate": 1.981463170459802e-07, "loss": 0.6679, "step": 17761 }, { "epoch": 0.912837907287491, "grad_norm": 0.8467568755149841, "learning_rate": 1.979144129208538e-07, "loss": 0.6619, "step": 17762 }, { "epoch": 0.9128893000308357, "grad_norm": 1.0786676406860352, "learning_rate": 1.9768264184170104e-07, "loss": 0.661, "step": 17763 }, { "epoch": 0.9129406927741803, "grad_norm": 1.092750906944275, "learning_rate": 1.974510038149452e-07, "loss": 0.7409, "step": 17764 }, { "epoch": 0.912992085517525, "grad_norm": 0.672427773475647, "learning_rate": 1.9721949884700332e-07, "loss": 0.6193, "step": 17765 }, { "epoch": 0.9130434782608695, "grad_norm": 1.0358028411865234, "learning_rate": 1.9698812694428805e-07, "loss": 0.5805, "step": 17766 }, { "epoch": 0.9130948710042142, "grad_norm": 1.2200502157211304, "learning_rate": 1.9675688811321203e-07, "loss": 0.6835, "step": 17767 }, { "epoch": 0.9131462637475588, "grad_norm": 1.1212867498397827, "learning_rate": 1.9652578236018072e-07, "loss": 0.6879, "step": 17768 }, { "epoch": 0.9131976564909035, "grad_norm": 1.145961046218872, "learning_rate": 1.962948096915973e-07, "loss": 0.7041, "step": 17769 }, { "epoch": 0.9132490492342481, "grad_norm": 0.8149420619010925, "learning_rate": 1.9606397011386002e-07, "loss": 0.7071, "step": 17770 }, { "epoch": 0.9133004419775927, "grad_norm": 1.0562644004821777, "learning_rate": 1.9583326363336707e-07, "loss": 0.6844, "step": 17771 }, { "epoch": 0.9133518347209374, "grad_norm": 1.100762963294983, "learning_rate": 1.9560269025650725e-07, "loss": 0.6844, "step": 17772 }, { "epoch": 0.913403227464282, "grad_norm": 1.0478250980377197, "learning_rate": 1.9537224998967098e-07, "loss": 0.6432, "step": 17773 }, { "epoch": 0.9134546202076267, "grad_norm": 1.1058193445205688, "learning_rate": 1.9514194283924203e-07, "loss": 0.6993, "step": 17774 }, { "epoch": 0.9135060129509713, "grad_norm": 1.1286722421646118, "learning_rate": 1.949117688116009e-07, "loss": 0.6624, "step": 17775 }, { "epoch": 0.913557405694316, "grad_norm": 1.1158396005630493, "learning_rate": 1.9468172791312633e-07, "loss": 0.7058, "step": 17776 }, { "epoch": 0.9136087984376606, "grad_norm": 1.0591977834701538, "learning_rate": 1.9445182015018994e-07, "loss": 0.6493, "step": 17777 }, { "epoch": 0.9136601911810053, "grad_norm": 1.0822550058364868, "learning_rate": 1.942220455291627e-07, "loss": 0.6697, "step": 17778 }, { "epoch": 0.9137115839243499, "grad_norm": 1.115945816040039, "learning_rate": 1.939924040564095e-07, "loss": 0.6911, "step": 17779 }, { "epoch": 0.9137629766676946, "grad_norm": 1.1579450368881226, "learning_rate": 1.9376289573829478e-07, "loss": 0.7013, "step": 17780 }, { "epoch": 0.9138143694110391, "grad_norm": 1.08558988571167, "learning_rate": 1.9353352058117503e-07, "loss": 0.6466, "step": 17781 }, { "epoch": 0.9138657621543838, "grad_norm": 1.0792367458343506, "learning_rate": 1.9330427859140687e-07, "loss": 0.6893, "step": 17782 }, { "epoch": 0.9139171548977284, "grad_norm": 0.7175288200378418, "learning_rate": 1.9307516977534024e-07, "loss": 0.681, "step": 17783 }, { "epoch": 0.913968547641073, "grad_norm": 1.1783541440963745, "learning_rate": 1.9284619413932447e-07, "loss": 0.6724, "step": 17784 }, { "epoch": 0.9140199403844177, "grad_norm": 1.3228797912597656, "learning_rate": 1.926173516897023e-07, "loss": 0.6928, "step": 17785 }, { "epoch": 0.9140713331277623, "grad_norm": 1.1209787130355835, "learning_rate": 1.9238864243281418e-07, "loss": 0.7191, "step": 17786 }, { "epoch": 0.914122725871107, "grad_norm": 0.998951256275177, "learning_rate": 1.921600663749984e-07, "loss": 0.6556, "step": 17787 }, { "epoch": 0.9141741186144516, "grad_norm": 1.0374977588653564, "learning_rate": 1.9193162352258487e-07, "loss": 0.639, "step": 17788 }, { "epoch": 0.9142255113577963, "grad_norm": 1.1528420448303223, "learning_rate": 1.9170331388190466e-07, "loss": 0.7109, "step": 17789 }, { "epoch": 0.9142769041011409, "grad_norm": 1.1000696420669556, "learning_rate": 1.9147513745928382e-07, "loss": 0.6808, "step": 17790 }, { "epoch": 0.9143282968444856, "grad_norm": 0.7669256925582886, "learning_rate": 1.9124709426104226e-07, "loss": 0.6307, "step": 17791 }, { "epoch": 0.9143796895878302, "grad_norm": 0.8601362109184265, "learning_rate": 1.910191842934994e-07, "loss": 0.6311, "step": 17792 }, { "epoch": 0.9144310823311749, "grad_norm": 0.9897601008415222, "learning_rate": 1.9079140756296965e-07, "loss": 0.6982, "step": 17793 }, { "epoch": 0.9144824750745195, "grad_norm": 1.0738475322723389, "learning_rate": 1.905637640757635e-07, "loss": 0.6841, "step": 17794 }, { "epoch": 0.9145338678178642, "grad_norm": 1.1342777013778687, "learning_rate": 1.903362538381881e-07, "loss": 0.713, "step": 17795 }, { "epoch": 0.9145852605612088, "grad_norm": 1.0233083963394165, "learning_rate": 1.9010887685654677e-07, "loss": 0.6551, "step": 17796 }, { "epoch": 0.9146366533045533, "grad_norm": 1.0669301748275757, "learning_rate": 1.8988163313713892e-07, "loss": 0.6697, "step": 17797 }, { "epoch": 0.914688046047898, "grad_norm": 1.1149855852127075, "learning_rate": 1.8965452268626117e-07, "loss": 0.6729, "step": 17798 }, { "epoch": 0.9147394387912426, "grad_norm": 1.126332402229309, "learning_rate": 1.8942754551020515e-07, "loss": 0.712, "step": 17799 }, { "epoch": 0.9147908315345873, "grad_norm": 1.0495896339416504, "learning_rate": 1.8920070161526083e-07, "loss": 0.6657, "step": 17800 }, { "epoch": 0.9148422242779319, "grad_norm": 1.0570639371871948, "learning_rate": 1.8897399100771097e-07, "loss": 0.6632, "step": 17801 }, { "epoch": 0.9148936170212766, "grad_norm": 1.1035386323928833, "learning_rate": 1.887474136938383e-07, "loss": 0.6965, "step": 17802 }, { "epoch": 0.9149450097646212, "grad_norm": 1.0673751831054688, "learning_rate": 1.885209696799195e-07, "loss": 0.6602, "step": 17803 }, { "epoch": 0.9149964025079659, "grad_norm": 0.7812763452529907, "learning_rate": 1.8829465897222954e-07, "loss": 0.6384, "step": 17804 }, { "epoch": 0.9150477952513105, "grad_norm": 0.7205644845962524, "learning_rate": 1.880684815770373e-07, "loss": 0.644, "step": 17805 }, { "epoch": 0.9150991879946552, "grad_norm": 1.0499277114868164, "learning_rate": 1.8784243750060994e-07, "loss": 0.7078, "step": 17806 }, { "epoch": 0.9151505807379998, "grad_norm": 1.0734301805496216, "learning_rate": 1.8761652674921028e-07, "loss": 0.6693, "step": 17807 }, { "epoch": 0.9152019734813445, "grad_norm": 1.072252631187439, "learning_rate": 1.8739074932909607e-07, "loss": 0.689, "step": 17808 }, { "epoch": 0.9152533662246891, "grad_norm": 1.0846703052520752, "learning_rate": 1.8716510524652508e-07, "loss": 0.7349, "step": 17809 }, { "epoch": 0.9153047589680338, "grad_norm": 1.0556458234786987, "learning_rate": 1.869395945077468e-07, "loss": 0.6902, "step": 17810 }, { "epoch": 0.9153561517113784, "grad_norm": 1.0917582511901855, "learning_rate": 1.867142171190106e-07, "loss": 0.6932, "step": 17811 }, { "epoch": 0.9154075444547229, "grad_norm": 1.06710946559906, "learning_rate": 1.864889730865599e-07, "loss": 0.6904, "step": 17812 }, { "epoch": 0.9154589371980676, "grad_norm": 1.252770185470581, "learning_rate": 1.8626386241663574e-07, "loss": 0.6539, "step": 17813 }, { "epoch": 0.9155103299414122, "grad_norm": 1.099575161933899, "learning_rate": 1.860388851154743e-07, "loss": 0.7279, "step": 17814 }, { "epoch": 0.9155617226847569, "grad_norm": 0.753142774105072, "learning_rate": 1.8581404118931e-07, "loss": 0.6547, "step": 17815 }, { "epoch": 0.9156131154281015, "grad_norm": 1.179952621459961, "learning_rate": 1.8558933064437122e-07, "loss": 0.6646, "step": 17816 }, { "epoch": 0.9156645081714462, "grad_norm": 1.1514180898666382, "learning_rate": 1.8536475348688353e-07, "loss": 0.7113, "step": 17817 }, { "epoch": 0.9157159009147908, "grad_norm": 1.0712791681289673, "learning_rate": 1.8514030972307085e-07, "loss": 0.6942, "step": 17818 }, { "epoch": 0.9157672936581355, "grad_norm": 1.1599644422531128, "learning_rate": 1.849159993591504e-07, "loss": 0.6463, "step": 17819 }, { "epoch": 0.9158186864014801, "grad_norm": 1.1334662437438965, "learning_rate": 1.8469182240133666e-07, "loss": 0.7299, "step": 17820 }, { "epoch": 0.9158700791448248, "grad_norm": 1.1012070178985596, "learning_rate": 1.844677788558402e-07, "loss": 0.6718, "step": 17821 }, { "epoch": 0.9159214718881694, "grad_norm": 1.053802490234375, "learning_rate": 1.8424386872887057e-07, "loss": 0.6836, "step": 17822 }, { "epoch": 0.9159728646315141, "grad_norm": 1.1249608993530273, "learning_rate": 1.840200920266283e-07, "loss": 0.7243, "step": 17823 }, { "epoch": 0.9160242573748587, "grad_norm": 1.0726464986801147, "learning_rate": 1.8379644875531566e-07, "loss": 0.662, "step": 17824 }, { "epoch": 0.9160756501182034, "grad_norm": 1.080886721611023, "learning_rate": 1.8357293892112825e-07, "loss": 0.7304, "step": 17825 }, { "epoch": 0.916127042861548, "grad_norm": 0.9601016640663147, "learning_rate": 1.8334956253025783e-07, "loss": 0.5948, "step": 17826 }, { "epoch": 0.9161784356048925, "grad_norm": 0.9973931312561035, "learning_rate": 1.8312631958889438e-07, "loss": 0.6503, "step": 17827 }, { "epoch": 0.9162298283482372, "grad_norm": 1.097583532333374, "learning_rate": 1.8290321010322243e-07, "loss": 0.6965, "step": 17828 }, { "epoch": 0.9162812210915818, "grad_norm": 1.0854599475860596, "learning_rate": 1.8268023407942426e-07, "loss": 0.6956, "step": 17829 }, { "epoch": 0.9163326138349265, "grad_norm": 1.0833860635757446, "learning_rate": 1.8245739152367602e-07, "loss": 0.674, "step": 17830 }, { "epoch": 0.9163840065782711, "grad_norm": 1.1195471286773682, "learning_rate": 1.8223468244215281e-07, "loss": 0.6629, "step": 17831 }, { "epoch": 0.9164353993216158, "grad_norm": 0.7234694361686707, "learning_rate": 1.8201210684102522e-07, "loss": 0.6559, "step": 17832 }, { "epoch": 0.9164867920649604, "grad_norm": 1.0424590110778809, "learning_rate": 1.8178966472645888e-07, "loss": 0.6896, "step": 17833 }, { "epoch": 0.9165381848083051, "grad_norm": 1.1470211744308472, "learning_rate": 1.8156735610461717e-07, "loss": 0.7357, "step": 17834 }, { "epoch": 0.9165895775516497, "grad_norm": 1.0699808597564697, "learning_rate": 1.813451809816602e-07, "loss": 0.6889, "step": 17835 }, { "epoch": 0.9166409702949944, "grad_norm": 1.0896185636520386, "learning_rate": 1.8112313936374303e-07, "loss": 0.7216, "step": 17836 }, { "epoch": 0.916692363038339, "grad_norm": 0.7181626558303833, "learning_rate": 1.809012312570163e-07, "loss": 0.6245, "step": 17837 }, { "epoch": 0.9167437557816837, "grad_norm": 1.095384120941162, "learning_rate": 1.8067945666763064e-07, "loss": 0.7207, "step": 17838 }, { "epoch": 0.9167951485250283, "grad_norm": 1.0941030979156494, "learning_rate": 1.8045781560172782e-07, "loss": 0.711, "step": 17839 }, { "epoch": 0.916846541268373, "grad_norm": 1.152273178100586, "learning_rate": 1.8023630806545066e-07, "loss": 0.7366, "step": 17840 }, { "epoch": 0.9168979340117176, "grad_norm": 1.1516669988632202, "learning_rate": 1.8001493406493486e-07, "loss": 0.6904, "step": 17841 }, { "epoch": 0.9169493267550621, "grad_norm": 1.1944884061813354, "learning_rate": 1.7979369360631495e-07, "loss": 0.7475, "step": 17842 }, { "epoch": 0.9170007194984068, "grad_norm": 1.1919951438903809, "learning_rate": 1.7957258669571877e-07, "loss": 0.6788, "step": 17843 }, { "epoch": 0.9170521122417514, "grad_norm": 1.0706806182861328, "learning_rate": 1.7935161333927476e-07, "loss": 0.7046, "step": 17844 }, { "epoch": 0.9171035049850961, "grad_norm": 1.1335201263427734, "learning_rate": 1.791307735431036e-07, "loss": 0.6717, "step": 17845 }, { "epoch": 0.9171548977284407, "grad_norm": 0.7956576943397522, "learning_rate": 1.7891006731332372e-07, "loss": 0.65, "step": 17846 }, { "epoch": 0.9172062904717854, "grad_norm": 1.0746384859085083, "learning_rate": 1.7868949465605024e-07, "loss": 0.6858, "step": 17847 }, { "epoch": 0.91725768321513, "grad_norm": 1.1434177160263062, "learning_rate": 1.784690555773949e-07, "loss": 0.6993, "step": 17848 }, { "epoch": 0.9173090759584747, "grad_norm": 1.1466773748397827, "learning_rate": 1.7824875008346508e-07, "loss": 0.6809, "step": 17849 }, { "epoch": 0.9173604687018193, "grad_norm": 1.2224323749542236, "learning_rate": 1.7802857818036367e-07, "loss": 0.6858, "step": 17850 }, { "epoch": 0.917411861445164, "grad_norm": 1.1147270202636719, "learning_rate": 1.778085398741919e-07, "loss": 0.7083, "step": 17851 }, { "epoch": 0.9174632541885086, "grad_norm": 1.0163531303405762, "learning_rate": 1.7758863517104485e-07, "loss": 0.6154, "step": 17852 }, { "epoch": 0.9175146469318533, "grad_norm": 0.787189781665802, "learning_rate": 1.7736886407701604e-07, "loss": 0.6313, "step": 17853 }, { "epoch": 0.9175660396751979, "grad_norm": 1.0340983867645264, "learning_rate": 1.7714922659819443e-07, "loss": 0.6594, "step": 17854 }, { "epoch": 0.9176174324185425, "grad_norm": 1.2157496213912964, "learning_rate": 1.7692972274066467e-07, "loss": 0.7109, "step": 17855 }, { "epoch": 0.9176688251618872, "grad_norm": 1.1568785905838013, "learning_rate": 1.767103525105085e-07, "loss": 0.6675, "step": 17856 }, { "epoch": 0.9177202179052317, "grad_norm": 1.1193177700042725, "learning_rate": 1.7649111591380385e-07, "loss": 0.698, "step": 17857 }, { "epoch": 0.9177716106485764, "grad_norm": 0.686129629611969, "learning_rate": 1.7627201295662532e-07, "loss": 0.6321, "step": 17858 }, { "epoch": 0.917823003391921, "grad_norm": 1.2017323970794678, "learning_rate": 1.760530436450425e-07, "loss": 0.6788, "step": 17859 }, { "epoch": 0.9178743961352657, "grad_norm": 1.0828529596328735, "learning_rate": 1.758342079851233e-07, "loss": 0.6632, "step": 17860 }, { "epoch": 0.9179257888786103, "grad_norm": 1.1641581058502197, "learning_rate": 1.7561550598292954e-07, "loss": 0.7203, "step": 17861 }, { "epoch": 0.917977181621955, "grad_norm": 1.0762056112289429, "learning_rate": 1.7539693764452137e-07, "loss": 0.6676, "step": 17862 }, { "epoch": 0.9180285743652996, "grad_norm": 1.129097580909729, "learning_rate": 1.751785029759534e-07, "loss": 0.7054, "step": 17863 }, { "epoch": 0.9180799671086443, "grad_norm": 1.044602632522583, "learning_rate": 1.7496020198327967e-07, "loss": 0.6067, "step": 17864 }, { "epoch": 0.9181313598519889, "grad_norm": 0.7199539542198181, "learning_rate": 1.7474203467254592e-07, "loss": 0.6171, "step": 17865 }, { "epoch": 0.9181827525953336, "grad_norm": 1.0825364589691162, "learning_rate": 1.7452400104979782e-07, "loss": 0.6215, "step": 17866 }, { "epoch": 0.9182341453386782, "grad_norm": 1.0826314687728882, "learning_rate": 1.7430610112107616e-07, "loss": 0.7204, "step": 17867 }, { "epoch": 0.9182855380820228, "grad_norm": 1.1075942516326904, "learning_rate": 1.7408833489241772e-07, "loss": 0.6439, "step": 17868 }, { "epoch": 0.9183369308253675, "grad_norm": 1.0964702367782593, "learning_rate": 1.7387070236985658e-07, "loss": 0.6887, "step": 17869 }, { "epoch": 0.9183883235687121, "grad_norm": 0.8237515687942505, "learning_rate": 1.7365320355942185e-07, "loss": 0.6221, "step": 17870 }, { "epoch": 0.9184397163120568, "grad_norm": 1.3141134977340698, "learning_rate": 1.7343583846713974e-07, "loss": 0.6506, "step": 17871 }, { "epoch": 0.9184911090554013, "grad_norm": 1.1173369884490967, "learning_rate": 1.7321860709903214e-07, "loss": 0.667, "step": 17872 }, { "epoch": 0.918542501798746, "grad_norm": 1.0614476203918457, "learning_rate": 1.7300150946111872e-07, "loss": 0.6661, "step": 17873 }, { "epoch": 0.9185938945420906, "grad_norm": 1.0272514820098877, "learning_rate": 1.7278454555941294e-07, "loss": 0.6641, "step": 17874 }, { "epoch": 0.9186452872854353, "grad_norm": 1.1299598217010498, "learning_rate": 1.7256771539992723e-07, "loss": 0.703, "step": 17875 }, { "epoch": 0.9186966800287799, "grad_norm": 1.131804347038269, "learning_rate": 1.7235101898866736e-07, "loss": 0.7107, "step": 17876 }, { "epoch": 0.9187480727721246, "grad_norm": 1.0820996761322021, "learning_rate": 1.7213445633163961e-07, "loss": 0.6868, "step": 17877 }, { "epoch": 0.9187994655154692, "grad_norm": 1.128631591796875, "learning_rate": 1.7191802743484198e-07, "loss": 0.7497, "step": 17878 }, { "epoch": 0.9188508582588139, "grad_norm": 1.10720956325531, "learning_rate": 1.7170173230427135e-07, "loss": 0.7093, "step": 17879 }, { "epoch": 0.9189022510021585, "grad_norm": 1.1000370979309082, "learning_rate": 1.7148557094592122e-07, "loss": 0.7232, "step": 17880 }, { "epoch": 0.9189536437455031, "grad_norm": 0.7628276348114014, "learning_rate": 1.7126954336577906e-07, "loss": 0.6514, "step": 17881 }, { "epoch": 0.9190050364888478, "grad_norm": 1.0870556831359863, "learning_rate": 1.7105364956983116e-07, "loss": 0.7081, "step": 17882 }, { "epoch": 0.9190564292321924, "grad_norm": 1.016682744026184, "learning_rate": 1.7083788956405889e-07, "loss": 0.711, "step": 17883 }, { "epoch": 0.9191078219755371, "grad_norm": 0.6881650686264038, "learning_rate": 1.7062226335443964e-07, "loss": 0.6305, "step": 17884 }, { "epoch": 0.9191592147188817, "grad_norm": 1.153055191040039, "learning_rate": 1.7040677094694756e-07, "loss": 0.6684, "step": 17885 }, { "epoch": 0.9192106074622264, "grad_norm": 0.6730453968048096, "learning_rate": 1.7019141234755398e-07, "loss": 0.668, "step": 17886 }, { "epoch": 0.919262000205571, "grad_norm": 1.1237142086029053, "learning_rate": 1.6997618756222466e-07, "loss": 0.6778, "step": 17887 }, { "epoch": 0.9193133929489156, "grad_norm": 1.0704950094223022, "learning_rate": 1.6976109659692263e-07, "loss": 0.6483, "step": 17888 }, { "epoch": 0.9193647856922602, "grad_norm": 1.0592703819274902, "learning_rate": 1.6954613945760812e-07, "loss": 0.7091, "step": 17889 }, { "epoch": 0.9194161784356049, "grad_norm": 1.1600102186203003, "learning_rate": 1.693313161502347e-07, "loss": 0.6939, "step": 17890 }, { "epoch": 0.9194675711789495, "grad_norm": 1.1618740558624268, "learning_rate": 1.6911662668075645e-07, "loss": 0.6894, "step": 17891 }, { "epoch": 0.9195189639222942, "grad_norm": 1.0890191793441772, "learning_rate": 1.689020710551198e-07, "loss": 0.6423, "step": 17892 }, { "epoch": 0.9195703566656388, "grad_norm": 1.0456568002700806, "learning_rate": 1.686876492792716e-07, "loss": 0.6654, "step": 17893 }, { "epoch": 0.9196217494089834, "grad_norm": 1.165205717086792, "learning_rate": 1.6847336135914937e-07, "loss": 0.6929, "step": 17894 }, { "epoch": 0.9196731421523281, "grad_norm": 1.1483205556869507, "learning_rate": 1.6825920730069279e-07, "loss": 0.7267, "step": 17895 }, { "epoch": 0.9197245348956727, "grad_norm": 1.3313127756118774, "learning_rate": 1.6804518710983374e-07, "loss": 0.7284, "step": 17896 }, { "epoch": 0.9197759276390174, "grad_norm": 1.0919408798217773, "learning_rate": 1.6783130079250255e-07, "loss": 0.6587, "step": 17897 }, { "epoch": 0.919827320382362, "grad_norm": 1.1168464422225952, "learning_rate": 1.6761754835462386e-07, "loss": 0.7168, "step": 17898 }, { "epoch": 0.9198787131257067, "grad_norm": 1.1493359804153442, "learning_rate": 1.6740392980212185e-07, "loss": 0.7368, "step": 17899 }, { "epoch": 0.9199301058690513, "grad_norm": 0.9786962866783142, "learning_rate": 1.6719044514091344e-07, "loss": 0.6042, "step": 17900 }, { "epoch": 0.919981498612396, "grad_norm": 0.7389718294143677, "learning_rate": 1.6697709437691388e-07, "loss": 0.6495, "step": 17901 }, { "epoch": 0.9200328913557406, "grad_norm": 0.6559603214263916, "learning_rate": 1.6676387751603518e-07, "loss": 0.6308, "step": 17902 }, { "epoch": 0.9200842840990852, "grad_norm": 1.1020094156265259, "learning_rate": 1.6655079456418312e-07, "loss": 0.6671, "step": 17903 }, { "epoch": 0.9201356768424298, "grad_norm": 1.1517366170883179, "learning_rate": 1.663378455272624e-07, "loss": 0.7296, "step": 17904 }, { "epoch": 0.9201870695857745, "grad_norm": 1.1258994340896606, "learning_rate": 1.6612503041117278e-07, "loss": 0.7193, "step": 17905 }, { "epoch": 0.9202384623291191, "grad_norm": 1.096066951751709, "learning_rate": 1.659123492218101e-07, "loss": 0.6918, "step": 17906 }, { "epoch": 0.9202898550724637, "grad_norm": 1.0259110927581787, "learning_rate": 1.656998019650663e-07, "loss": 0.665, "step": 17907 }, { "epoch": 0.9203412478158084, "grad_norm": 1.0766032934188843, "learning_rate": 1.6548738864683222e-07, "loss": 0.6748, "step": 17908 }, { "epoch": 0.920392640559153, "grad_norm": 0.6675497889518738, "learning_rate": 1.6527510927299152e-07, "loss": 0.6452, "step": 17909 }, { "epoch": 0.9204440333024977, "grad_norm": 1.140841007232666, "learning_rate": 1.65062963849425e-07, "loss": 0.729, "step": 17910 }, { "epoch": 0.9204954260458423, "grad_norm": 1.0372449159622192, "learning_rate": 1.6485095238201187e-07, "loss": 0.7559, "step": 17911 }, { "epoch": 0.920546818789187, "grad_norm": 1.134925127029419, "learning_rate": 1.6463907487662523e-07, "loss": 0.6825, "step": 17912 }, { "epoch": 0.9205982115325316, "grad_norm": 1.0917657613754272, "learning_rate": 1.644273313391359e-07, "loss": 0.7089, "step": 17913 }, { "epoch": 0.9206496042758763, "grad_norm": 1.1895254850387573, "learning_rate": 1.6421572177540867e-07, "loss": 0.7135, "step": 17914 }, { "epoch": 0.9207009970192209, "grad_norm": 1.102960467338562, "learning_rate": 1.640042461913094e-07, "loss": 0.697, "step": 17915 }, { "epoch": 0.9207523897625656, "grad_norm": 1.1201682090759277, "learning_rate": 1.6379290459269448e-07, "loss": 0.7418, "step": 17916 }, { "epoch": 0.9208037825059102, "grad_norm": 1.084162712097168, "learning_rate": 1.6358169698542037e-07, "loss": 0.6873, "step": 17917 }, { "epoch": 0.9208551752492548, "grad_norm": 1.1272410154342651, "learning_rate": 1.6337062337533904e-07, "loss": 0.7226, "step": 17918 }, { "epoch": 0.9209065679925994, "grad_norm": 1.1469653844833374, "learning_rate": 1.631596837682975e-07, "loss": 0.6975, "step": 17919 }, { "epoch": 0.920957960735944, "grad_norm": 0.6749574542045593, "learning_rate": 1.629488781701416e-07, "loss": 0.6311, "step": 17920 }, { "epoch": 0.9210093534792887, "grad_norm": 1.1012417078018188, "learning_rate": 1.6273820658671057e-07, "loss": 0.7211, "step": 17921 }, { "epoch": 0.9210607462226333, "grad_norm": 1.129227638244629, "learning_rate": 1.6252766902384144e-07, "loss": 0.7202, "step": 17922 }, { "epoch": 0.921112138965978, "grad_norm": 1.0793527364730835, "learning_rate": 1.6231726548736727e-07, "loss": 0.7049, "step": 17923 }, { "epoch": 0.9211635317093226, "grad_norm": 1.0719245672225952, "learning_rate": 1.6210699598311842e-07, "loss": 0.6267, "step": 17924 }, { "epoch": 0.9212149244526673, "grad_norm": 1.088513731956482, "learning_rate": 1.6189686051691968e-07, "loss": 0.6345, "step": 17925 }, { "epoch": 0.9212663171960119, "grad_norm": 1.0609462261199951, "learning_rate": 1.6168685909459304e-07, "loss": 0.6734, "step": 17926 }, { "epoch": 0.9213177099393566, "grad_norm": 1.1078190803527832, "learning_rate": 1.6147699172195718e-07, "loss": 0.6983, "step": 17927 }, { "epoch": 0.9213691026827012, "grad_norm": 1.1390665769577026, "learning_rate": 1.6126725840482637e-07, "loss": 0.7211, "step": 17928 }, { "epoch": 0.9214204954260459, "grad_norm": 0.9897744655609131, "learning_rate": 1.610576591490115e-07, "loss": 0.6412, "step": 17929 }, { "epoch": 0.9214718881693905, "grad_norm": 1.0611399412155151, "learning_rate": 1.6084819396031958e-07, "loss": 0.6866, "step": 17930 }, { "epoch": 0.9215232809127352, "grad_norm": 1.1009806394577026, "learning_rate": 1.6063886284455544e-07, "loss": 0.6719, "step": 17931 }, { "epoch": 0.9215746736560798, "grad_norm": 1.0061864852905273, "learning_rate": 1.6042966580751607e-07, "loss": 0.653, "step": 17932 }, { "epoch": 0.9216260663994243, "grad_norm": 1.1049132347106934, "learning_rate": 1.6022060285499964e-07, "loss": 0.7158, "step": 17933 }, { "epoch": 0.921677459142769, "grad_norm": 0.7581444382667542, "learning_rate": 1.600116739927976e-07, "loss": 0.607, "step": 17934 }, { "epoch": 0.9217288518861136, "grad_norm": 1.1605666875839233, "learning_rate": 1.5980287922669868e-07, "loss": 0.6627, "step": 17935 }, { "epoch": 0.9217802446294583, "grad_norm": 1.0792490243911743, "learning_rate": 1.5959421856248715e-07, "loss": 0.6769, "step": 17936 }, { "epoch": 0.9218316373728029, "grad_norm": 0.876400351524353, "learning_rate": 1.59385692005945e-07, "loss": 0.6524, "step": 17937 }, { "epoch": 0.9218830301161476, "grad_norm": 1.0118316411972046, "learning_rate": 1.5917729956284934e-07, "loss": 0.676, "step": 17938 }, { "epoch": 0.9219344228594922, "grad_norm": 0.8122013211250305, "learning_rate": 1.589690412389733e-07, "loss": 0.6534, "step": 17939 }, { "epoch": 0.9219858156028369, "grad_norm": 1.1455602645874023, "learning_rate": 1.5876091704008834e-07, "loss": 0.6792, "step": 17940 }, { "epoch": 0.9220372083461815, "grad_norm": 1.1031770706176758, "learning_rate": 1.5855292697195935e-07, "loss": 0.6814, "step": 17941 }, { "epoch": 0.9220886010895262, "grad_norm": 1.0946985483169556, "learning_rate": 1.5834507104034947e-07, "loss": 0.6588, "step": 17942 }, { "epoch": 0.9221399938328708, "grad_norm": 1.110788345336914, "learning_rate": 1.581373492510163e-07, "loss": 0.6937, "step": 17943 }, { "epoch": 0.9221913865762155, "grad_norm": 1.1252504587173462, "learning_rate": 1.5792976160971752e-07, "loss": 0.7146, "step": 17944 }, { "epoch": 0.9222427793195601, "grad_norm": 1.187690019607544, "learning_rate": 1.577223081222018e-07, "loss": 0.7248, "step": 17945 }, { "epoch": 0.9222941720629048, "grad_norm": 1.0790342092514038, "learning_rate": 1.5751498879421901e-07, "loss": 0.6883, "step": 17946 }, { "epoch": 0.9223455648062494, "grad_norm": 1.1112626791000366, "learning_rate": 1.5730780363151176e-07, "loss": 0.6861, "step": 17947 }, { "epoch": 0.9223969575495939, "grad_norm": 1.1326247453689575, "learning_rate": 1.571007526398205e-07, "loss": 0.6877, "step": 17948 }, { "epoch": 0.9224483502929386, "grad_norm": 1.1459630727767944, "learning_rate": 1.5689383582488117e-07, "loss": 0.7348, "step": 17949 }, { "epoch": 0.9224997430362832, "grad_norm": 1.1149672269821167, "learning_rate": 1.566870531924286e-07, "loss": 0.7281, "step": 17950 }, { "epoch": 0.9225511357796279, "grad_norm": 1.1064398288726807, "learning_rate": 1.5648040474818994e-07, "loss": 0.7009, "step": 17951 }, { "epoch": 0.9226025285229725, "grad_norm": 1.0870492458343506, "learning_rate": 1.562738904978911e-07, "loss": 0.6913, "step": 17952 }, { "epoch": 0.9226539212663172, "grad_norm": 1.108618140220642, "learning_rate": 1.5606751044725422e-07, "loss": 0.6749, "step": 17953 }, { "epoch": 0.9227053140096618, "grad_norm": 0.8157503008842468, "learning_rate": 1.558612646019969e-07, "loss": 0.611, "step": 17954 }, { "epoch": 0.9227567067530065, "grad_norm": 1.049039602279663, "learning_rate": 1.556551529678335e-07, "loss": 0.6776, "step": 17955 }, { "epoch": 0.9228080994963511, "grad_norm": 1.0380388498306274, "learning_rate": 1.554491755504739e-07, "loss": 0.6338, "step": 17956 }, { "epoch": 0.9228594922396958, "grad_norm": 1.132527232170105, "learning_rate": 1.5524333235562628e-07, "loss": 0.6685, "step": 17957 }, { "epoch": 0.9229108849830404, "grad_norm": 1.0159029960632324, "learning_rate": 1.5503762338899165e-07, "loss": 0.6518, "step": 17958 }, { "epoch": 0.9229622777263851, "grad_norm": 1.0918611288070679, "learning_rate": 1.54832048656271e-07, "loss": 0.7168, "step": 17959 }, { "epoch": 0.9230136704697297, "grad_norm": 1.1056890487670898, "learning_rate": 1.5462660816315978e-07, "loss": 0.6672, "step": 17960 }, { "epoch": 0.9230650632130744, "grad_norm": 1.0868240594863892, "learning_rate": 1.54421301915349e-07, "loss": 0.6946, "step": 17961 }, { "epoch": 0.923116455956419, "grad_norm": 1.0969133377075195, "learning_rate": 1.5421612991852743e-07, "loss": 0.6758, "step": 17962 }, { "epoch": 0.9231678486997635, "grad_norm": 1.0977437496185303, "learning_rate": 1.5401109217837996e-07, "loss": 0.7188, "step": 17963 }, { "epoch": 0.9232192414431082, "grad_norm": 1.1663697957992554, "learning_rate": 1.5380618870058705e-07, "loss": 0.7497, "step": 17964 }, { "epoch": 0.9232706341864528, "grad_norm": 1.1673861742019653, "learning_rate": 1.5360141949082474e-07, "loss": 0.6828, "step": 17965 }, { "epoch": 0.9233220269297975, "grad_norm": 1.1102330684661865, "learning_rate": 1.5339678455476848e-07, "loss": 0.6548, "step": 17966 }, { "epoch": 0.9233734196731421, "grad_norm": 1.0657141208648682, "learning_rate": 1.5319228389808538e-07, "loss": 0.6564, "step": 17967 }, { "epoch": 0.9234248124164868, "grad_norm": 1.0036215782165527, "learning_rate": 1.5298791752644316e-07, "loss": 0.6803, "step": 17968 }, { "epoch": 0.9234762051598314, "grad_norm": 1.0851986408233643, "learning_rate": 1.5278368544550282e-07, "loss": 0.7094, "step": 17969 }, { "epoch": 0.9235275979031761, "grad_norm": 1.0764050483703613, "learning_rate": 1.5257958766092318e-07, "loss": 0.6776, "step": 17970 }, { "epoch": 0.9235789906465207, "grad_norm": 0.7904413342475891, "learning_rate": 1.5237562417835973e-07, "loss": 0.6367, "step": 17971 }, { "epoch": 0.9236303833898654, "grad_norm": 1.0781728029251099, "learning_rate": 1.521717950034618e-07, "loss": 0.7381, "step": 17972 }, { "epoch": 0.92368177613321, "grad_norm": 1.1759557723999023, "learning_rate": 1.5196810014187936e-07, "loss": 0.7618, "step": 17973 }, { "epoch": 0.9237331688765547, "grad_norm": 1.092697262763977, "learning_rate": 1.517645395992523e-07, "loss": 0.6808, "step": 17974 }, { "epoch": 0.9237845616198993, "grad_norm": 1.0965087413787842, "learning_rate": 1.5156111338122338e-07, "loss": 0.737, "step": 17975 }, { "epoch": 0.923835954363244, "grad_norm": 1.1602504253387451, "learning_rate": 1.5135782149342749e-07, "loss": 0.6723, "step": 17976 }, { "epoch": 0.9238873471065886, "grad_norm": 1.0647333860397339, "learning_rate": 1.5115466394149737e-07, "loss": 0.6961, "step": 17977 }, { "epoch": 0.9239387398499332, "grad_norm": 1.0834171772003174, "learning_rate": 1.5095164073106128e-07, "loss": 0.6798, "step": 17978 }, { "epoch": 0.9239901325932778, "grad_norm": 1.12798011302948, "learning_rate": 1.507487518677442e-07, "loss": 0.727, "step": 17979 }, { "epoch": 0.9240415253366224, "grad_norm": 1.1388719081878662, "learning_rate": 1.5054599735716824e-07, "loss": 0.6982, "step": 17980 }, { "epoch": 0.9240929180799671, "grad_norm": 1.1117812395095825, "learning_rate": 1.5034337720494897e-07, "loss": 0.7107, "step": 17981 }, { "epoch": 0.9241443108233117, "grad_norm": 1.1133978366851807, "learning_rate": 1.5014089141670296e-07, "loss": 0.7032, "step": 17982 }, { "epoch": 0.9241957035666564, "grad_norm": 1.1378601789474487, "learning_rate": 1.4993853999803743e-07, "loss": 0.6859, "step": 17983 }, { "epoch": 0.924247096310001, "grad_norm": 1.073419451713562, "learning_rate": 1.497363229545601e-07, "loss": 0.6765, "step": 17984 }, { "epoch": 0.9242984890533457, "grad_norm": 1.0413177013397217, "learning_rate": 1.4953424029187315e-07, "loss": 0.733, "step": 17985 }, { "epoch": 0.9243498817966903, "grad_norm": 1.1038250923156738, "learning_rate": 1.4933229201557707e-07, "loss": 0.6946, "step": 17986 }, { "epoch": 0.924401274540035, "grad_norm": 1.0266940593719482, "learning_rate": 1.4913047813126414e-07, "loss": 0.6643, "step": 17987 }, { "epoch": 0.9244526672833796, "grad_norm": 1.0726960897445679, "learning_rate": 1.4892879864452813e-07, "loss": 0.6568, "step": 17988 }, { "epoch": 0.9245040600267243, "grad_norm": 1.1040953397750854, "learning_rate": 1.4872725356095575e-07, "loss": 0.7084, "step": 17989 }, { "epoch": 0.9245554527700689, "grad_norm": 1.145744800567627, "learning_rate": 1.4852584288613082e-07, "loss": 0.7068, "step": 17990 }, { "epoch": 0.9246068455134135, "grad_norm": 1.1365375518798828, "learning_rate": 1.4832456662563443e-07, "loss": 0.7196, "step": 17991 }, { "epoch": 0.9246582382567582, "grad_norm": 1.2515432834625244, "learning_rate": 1.4812342478504216e-07, "loss": 0.7381, "step": 17992 }, { "epoch": 0.9247096310001028, "grad_norm": 1.1745240688323975, "learning_rate": 1.4792241736992785e-07, "loss": 0.7026, "step": 17993 }, { "epoch": 0.9247610237434474, "grad_norm": 1.0568336248397827, "learning_rate": 1.4772154438585929e-07, "loss": 0.6942, "step": 17994 }, { "epoch": 0.924812416486792, "grad_norm": 1.0647705793380737, "learning_rate": 1.4752080583840366e-07, "loss": 0.6512, "step": 17995 }, { "epoch": 0.9248638092301367, "grad_norm": 0.7068923115730286, "learning_rate": 1.4732020173312045e-07, "loss": 0.6776, "step": 17996 }, { "epoch": 0.9249152019734813, "grad_norm": 1.0651558637619019, "learning_rate": 1.4711973207556907e-07, "loss": 0.7404, "step": 17997 }, { "epoch": 0.924966594716826, "grad_norm": 1.0641396045684814, "learning_rate": 1.469193968713034e-07, "loss": 0.6985, "step": 17998 }, { "epoch": 0.9250179874601706, "grad_norm": 1.077093243598938, "learning_rate": 1.467191961258735e-07, "loss": 0.7171, "step": 17999 }, { "epoch": 0.9250693802035153, "grad_norm": 1.0949444770812988, "learning_rate": 1.4651912984482596e-07, "loss": 0.6862, "step": 18000 }, { "epoch": 0.9251207729468599, "grad_norm": 1.1452710628509521, "learning_rate": 1.4631919803370477e-07, "loss": 0.6974, "step": 18001 }, { "epoch": 0.9251721656902046, "grad_norm": 1.048422932624817, "learning_rate": 1.4611940069804875e-07, "loss": 0.6839, "step": 18002 }, { "epoch": 0.9252235584335492, "grad_norm": 0.7867533564567566, "learning_rate": 1.4591973784339242e-07, "loss": 0.6723, "step": 18003 }, { "epoch": 0.9252749511768938, "grad_norm": 1.0264827013015747, "learning_rate": 1.4572020947526966e-07, "loss": 0.6688, "step": 18004 }, { "epoch": 0.9253263439202385, "grad_norm": 0.737784743309021, "learning_rate": 1.4552081559920716e-07, "loss": 0.6514, "step": 18005 }, { "epoch": 0.9253777366635831, "grad_norm": 1.1032506227493286, "learning_rate": 1.4532155622072941e-07, "loss": 0.7214, "step": 18006 }, { "epoch": 0.9254291294069278, "grad_norm": 1.0456485748291016, "learning_rate": 1.4512243134535698e-07, "loss": 0.6525, "step": 18007 }, { "epoch": 0.9254805221502724, "grad_norm": 1.049696683883667, "learning_rate": 1.4492344097860823e-07, "loss": 0.6479, "step": 18008 }, { "epoch": 0.925531914893617, "grad_norm": 1.0641655921936035, "learning_rate": 1.4472458512599374e-07, "loss": 0.7152, "step": 18009 }, { "epoch": 0.9255833076369616, "grad_norm": 0.6849913001060486, "learning_rate": 1.4452586379302524e-07, "loss": 0.635, "step": 18010 }, { "epoch": 0.9256347003803063, "grad_norm": 1.038744330406189, "learning_rate": 1.4432727698520775e-07, "loss": 0.702, "step": 18011 }, { "epoch": 0.9256860931236509, "grad_norm": 1.1021252870559692, "learning_rate": 1.4412882470804302e-07, "loss": 0.6578, "step": 18012 }, { "epoch": 0.9257374858669956, "grad_norm": 1.0691472291946411, "learning_rate": 1.4393050696702938e-07, "loss": 0.6845, "step": 18013 }, { "epoch": 0.9257888786103402, "grad_norm": 0.7026569247245789, "learning_rate": 1.437323237676619e-07, "loss": 0.6146, "step": 18014 }, { "epoch": 0.9258402713536849, "grad_norm": 1.1386979818344116, "learning_rate": 1.4353427511543117e-07, "loss": 0.7405, "step": 18015 }, { "epoch": 0.9258916640970295, "grad_norm": 0.780444324016571, "learning_rate": 1.4333636101582395e-07, "loss": 0.6601, "step": 18016 }, { "epoch": 0.9259430568403741, "grad_norm": 0.7260696291923523, "learning_rate": 1.4313858147432414e-07, "loss": 0.6353, "step": 18017 }, { "epoch": 0.9259944495837188, "grad_norm": 1.1066051721572876, "learning_rate": 1.4294093649641071e-07, "loss": 0.6975, "step": 18018 }, { "epoch": 0.9260458423270634, "grad_norm": 0.8170315623283386, "learning_rate": 1.4274342608756042e-07, "loss": 0.649, "step": 18019 }, { "epoch": 0.9260972350704081, "grad_norm": 0.6989414691925049, "learning_rate": 1.4254605025324497e-07, "loss": 0.6103, "step": 18020 }, { "epoch": 0.9261486278137527, "grad_norm": 0.7070466876029968, "learning_rate": 1.4234880899893279e-07, "loss": 0.6416, "step": 18021 }, { "epoch": 0.9262000205570974, "grad_norm": 1.0816798210144043, "learning_rate": 1.4215170233008945e-07, "loss": 0.6863, "step": 18022 }, { "epoch": 0.926251413300442, "grad_norm": 1.0600920915603638, "learning_rate": 1.41954730252174e-07, "loss": 0.7263, "step": 18023 }, { "epoch": 0.9263028060437866, "grad_norm": 1.089324951171875, "learning_rate": 1.4175789277064645e-07, "loss": 0.6384, "step": 18024 }, { "epoch": 0.9263541987871312, "grad_norm": 1.1319739818572998, "learning_rate": 1.4156118989095801e-07, "loss": 0.705, "step": 18025 }, { "epoch": 0.9264055915304759, "grad_norm": 0.6767879724502563, "learning_rate": 1.4136462161855992e-07, "loss": 0.6315, "step": 18026 }, { "epoch": 0.9264569842738205, "grad_norm": 1.1289499998092651, "learning_rate": 1.4116818795889776e-07, "loss": 0.7108, "step": 18027 }, { "epoch": 0.9265083770171652, "grad_norm": 1.0119119882583618, "learning_rate": 1.409718889174133e-07, "loss": 0.6005, "step": 18028 }, { "epoch": 0.9265597697605098, "grad_norm": 1.117863655090332, "learning_rate": 1.4077572449954612e-07, "loss": 0.7288, "step": 18029 }, { "epoch": 0.9266111625038544, "grad_norm": 1.124996542930603, "learning_rate": 1.4057969471073074e-07, "loss": 0.6745, "step": 18030 }, { "epoch": 0.9266625552471991, "grad_norm": 1.1155446767807007, "learning_rate": 1.4038379955639835e-07, "loss": 0.6959, "step": 18031 }, { "epoch": 0.9267139479905437, "grad_norm": 1.0934009552001953, "learning_rate": 1.401880390419763e-07, "loss": 0.7568, "step": 18032 }, { "epoch": 0.9267653407338884, "grad_norm": 1.1934900283813477, "learning_rate": 1.3999241317288857e-07, "loss": 0.6443, "step": 18033 }, { "epoch": 0.926816733477233, "grad_norm": 1.1255019903182983, "learning_rate": 1.397969219545553e-07, "loss": 0.6712, "step": 18034 }, { "epoch": 0.9268681262205777, "grad_norm": 1.0650707483291626, "learning_rate": 1.396015653923921e-07, "loss": 0.6907, "step": 18035 }, { "epoch": 0.9269195189639223, "grad_norm": 1.1233806610107422, "learning_rate": 1.3940634349181136e-07, "loss": 0.6878, "step": 18036 }, { "epoch": 0.926970911707267, "grad_norm": 1.1066895723342896, "learning_rate": 1.3921125625822373e-07, "loss": 0.7318, "step": 18037 }, { "epoch": 0.9270223044506116, "grad_norm": 1.0932739973068237, "learning_rate": 1.3901630369703156e-07, "loss": 0.6856, "step": 18038 }, { "epoch": 0.9270736971939562, "grad_norm": 1.108330488204956, "learning_rate": 1.388214858136383e-07, "loss": 0.6651, "step": 18039 }, { "epoch": 0.9271250899373008, "grad_norm": 1.0956828594207764, "learning_rate": 1.3862680261344075e-07, "loss": 0.6894, "step": 18040 }, { "epoch": 0.9271764826806455, "grad_norm": 1.1883400678634644, "learning_rate": 1.384322541018318e-07, "loss": 0.709, "step": 18041 }, { "epoch": 0.9272278754239901, "grad_norm": 1.0750535726547241, "learning_rate": 1.3823784028420385e-07, "loss": 0.6749, "step": 18042 }, { "epoch": 0.9272792681673347, "grad_norm": 1.0885261297225952, "learning_rate": 1.3804356116594141e-07, "loss": 0.6947, "step": 18043 }, { "epoch": 0.9273306609106794, "grad_norm": 0.6639559864997864, "learning_rate": 1.3784941675242803e-07, "loss": 0.6407, "step": 18044 }, { "epoch": 0.927382053654024, "grad_norm": 1.0531586408615112, "learning_rate": 1.3765540704904211e-07, "loss": 0.6762, "step": 18045 }, { "epoch": 0.9274334463973687, "grad_norm": 1.0709749460220337, "learning_rate": 1.3746153206115997e-07, "loss": 0.691, "step": 18046 }, { "epoch": 0.9274848391407133, "grad_norm": 1.1919680833816528, "learning_rate": 1.3726779179415116e-07, "loss": 0.7133, "step": 18047 }, { "epoch": 0.927536231884058, "grad_norm": 1.0678884983062744, "learning_rate": 1.3707418625338476e-07, "loss": 0.7406, "step": 18048 }, { "epoch": 0.9275876246274026, "grad_norm": 1.1151939630508423, "learning_rate": 1.3688071544422475e-07, "loss": 0.7516, "step": 18049 }, { "epoch": 0.9276390173707473, "grad_norm": 0.9404031038284302, "learning_rate": 1.3668737937203136e-07, "loss": 0.6303, "step": 18050 }, { "epoch": 0.9276904101140919, "grad_norm": 1.0909656286239624, "learning_rate": 1.3649417804216082e-07, "loss": 0.7374, "step": 18051 }, { "epoch": 0.9277418028574366, "grad_norm": 1.043218970298767, "learning_rate": 1.3630111145996605e-07, "loss": 0.7064, "step": 18052 }, { "epoch": 0.9277931956007812, "grad_norm": 0.7091201543807983, "learning_rate": 1.361081796307967e-07, "loss": 0.6206, "step": 18053 }, { "epoch": 0.9278445883441259, "grad_norm": 1.042454719543457, "learning_rate": 1.3591538255999626e-07, "loss": 0.6757, "step": 18054 }, { "epoch": 0.9278959810874704, "grad_norm": 1.013356328010559, "learning_rate": 1.3572272025290879e-07, "loss": 0.6694, "step": 18055 }, { "epoch": 0.927947373830815, "grad_norm": 1.0977790355682373, "learning_rate": 1.3553019271487112e-07, "loss": 0.6886, "step": 18056 }, { "epoch": 0.9279987665741597, "grad_norm": 1.0828428268432617, "learning_rate": 1.3533779995121731e-07, "loss": 0.6951, "step": 18057 }, { "epoch": 0.9280501593175043, "grad_norm": 0.7001410722732544, "learning_rate": 1.3514554196727702e-07, "loss": 0.6289, "step": 18058 }, { "epoch": 0.928101552060849, "grad_norm": 1.0742887258529663, "learning_rate": 1.3495341876837875e-07, "loss": 0.6576, "step": 18059 }, { "epoch": 0.9281529448041936, "grad_norm": 0.7802107930183411, "learning_rate": 1.3476143035984323e-07, "loss": 0.6604, "step": 18060 }, { "epoch": 0.9282043375475383, "grad_norm": 1.2168136835098267, "learning_rate": 1.3456957674699178e-07, "loss": 0.6847, "step": 18061 }, { "epoch": 0.9282557302908829, "grad_norm": 1.1582164764404297, "learning_rate": 1.3437785793513792e-07, "loss": 0.6891, "step": 18062 }, { "epoch": 0.9283071230342276, "grad_norm": 1.107991337776184, "learning_rate": 1.3418627392959515e-07, "loss": 0.7038, "step": 18063 }, { "epoch": 0.9283585157775722, "grad_norm": 0.6763691902160645, "learning_rate": 1.3399482473567093e-07, "loss": 0.6363, "step": 18064 }, { "epoch": 0.9284099085209169, "grad_norm": 1.1387147903442383, "learning_rate": 1.3380351035866823e-07, "loss": 0.6673, "step": 18065 }, { "epoch": 0.9284613012642615, "grad_norm": 1.0933600664138794, "learning_rate": 1.3361233080389e-07, "loss": 0.741, "step": 18066 }, { "epoch": 0.9285126940076062, "grad_norm": 1.0726501941680908, "learning_rate": 1.3342128607663096e-07, "loss": 0.6755, "step": 18067 }, { "epoch": 0.9285640867509508, "grad_norm": 1.0702552795410156, "learning_rate": 1.3323037618218516e-07, "loss": 0.6277, "step": 18068 }, { "epoch": 0.9286154794942955, "grad_norm": 0.6824581623077393, "learning_rate": 1.3303960112584113e-07, "loss": 0.6073, "step": 18069 }, { "epoch": 0.92866687223764, "grad_norm": 1.0876410007476807, "learning_rate": 1.328489609128858e-07, "loss": 0.7121, "step": 18070 }, { "epoch": 0.9287182649809846, "grad_norm": 1.094212293624878, "learning_rate": 1.3265845554859934e-07, "loss": 0.7332, "step": 18071 }, { "epoch": 0.9287696577243293, "grad_norm": 1.1048212051391602, "learning_rate": 1.324680850382609e-07, "loss": 0.6964, "step": 18072 }, { "epoch": 0.9288210504676739, "grad_norm": 1.1248199939727783, "learning_rate": 1.3227784938714517e-07, "loss": 0.7202, "step": 18073 }, { "epoch": 0.9288724432110186, "grad_norm": 1.1405552625656128, "learning_rate": 1.3208774860052177e-07, "loss": 0.6945, "step": 18074 }, { "epoch": 0.9289238359543632, "grad_norm": 1.0174750089645386, "learning_rate": 1.3189778268365928e-07, "loss": 0.7266, "step": 18075 }, { "epoch": 0.9289752286977079, "grad_norm": 0.7862551212310791, "learning_rate": 1.3170795164181847e-07, "loss": 0.6761, "step": 18076 }, { "epoch": 0.9290266214410525, "grad_norm": 1.0244574546813965, "learning_rate": 1.3151825548026077e-07, "loss": 0.6437, "step": 18077 }, { "epoch": 0.9290780141843972, "grad_norm": 1.0804916620254517, "learning_rate": 1.3132869420424077e-07, "loss": 0.7387, "step": 18078 }, { "epoch": 0.9291294069277418, "grad_norm": 1.245983600616455, "learning_rate": 1.3113926781901155e-07, "loss": 0.7472, "step": 18079 }, { "epoch": 0.9291807996710865, "grad_norm": 1.058756709098816, "learning_rate": 1.3094997632981998e-07, "loss": 0.7022, "step": 18080 }, { "epoch": 0.9292321924144311, "grad_norm": 1.1186561584472656, "learning_rate": 1.3076081974191136e-07, "loss": 0.6915, "step": 18081 }, { "epoch": 0.9292835851577758, "grad_norm": 1.1571509838104248, "learning_rate": 1.3057179806052645e-07, "loss": 0.7133, "step": 18082 }, { "epoch": 0.9293349779011204, "grad_norm": 1.2750399112701416, "learning_rate": 1.3038291129090108e-07, "loss": 0.6953, "step": 18083 }, { "epoch": 0.929386370644465, "grad_norm": 0.7940494418144226, "learning_rate": 1.3019415943827052e-07, "loss": 0.6546, "step": 18084 }, { "epoch": 0.9294377633878096, "grad_norm": 1.0582627058029175, "learning_rate": 1.3000554250786334e-07, "loss": 0.6783, "step": 18085 }, { "epoch": 0.9294891561311542, "grad_norm": 0.7538990378379822, "learning_rate": 1.2981706050490484e-07, "loss": 0.6563, "step": 18086 }, { "epoch": 0.9295405488744989, "grad_norm": 1.0719081163406372, "learning_rate": 1.2962871343461747e-07, "loss": 0.6901, "step": 18087 }, { "epoch": 0.9295919416178435, "grad_norm": 1.0997651815414429, "learning_rate": 1.2944050130222042e-07, "loss": 0.6567, "step": 18088 }, { "epoch": 0.9296433343611882, "grad_norm": 0.785801351070404, "learning_rate": 1.292524241129267e-07, "loss": 0.6251, "step": 18089 }, { "epoch": 0.9296947271045328, "grad_norm": 1.0831655263900757, "learning_rate": 1.290644818719483e-07, "loss": 0.725, "step": 18090 }, { "epoch": 0.9297461198478775, "grad_norm": 1.1604515314102173, "learning_rate": 1.288766745844916e-07, "loss": 0.7021, "step": 18091 }, { "epoch": 0.9297975125912221, "grad_norm": 1.066574215888977, "learning_rate": 1.286890022557602e-07, "loss": 0.6769, "step": 18092 }, { "epoch": 0.9298489053345668, "grad_norm": 1.069810152053833, "learning_rate": 1.285014648909544e-07, "loss": 0.699, "step": 18093 }, { "epoch": 0.9299002980779114, "grad_norm": 1.0855931043624878, "learning_rate": 1.2831406249526946e-07, "loss": 0.6541, "step": 18094 }, { "epoch": 0.9299516908212561, "grad_norm": 1.1196109056472778, "learning_rate": 1.2812679507389737e-07, "loss": 0.7654, "step": 18095 }, { "epoch": 0.9300030835646007, "grad_norm": 1.1178741455078125, "learning_rate": 1.2793966263202616e-07, "loss": 0.6859, "step": 18096 }, { "epoch": 0.9300544763079454, "grad_norm": 1.1831738948822021, "learning_rate": 1.2775266517484176e-07, "loss": 0.6567, "step": 18097 }, { "epoch": 0.93010586905129, "grad_norm": 0.6771738529205322, "learning_rate": 1.2756580270752439e-07, "loss": 0.6295, "step": 18098 }, { "epoch": 0.9301572617946346, "grad_norm": 1.0817644596099854, "learning_rate": 1.2737907523525162e-07, "loss": 0.6954, "step": 18099 }, { "epoch": 0.9302086545379792, "grad_norm": 1.0571473836898804, "learning_rate": 1.2719248276319595e-07, "loss": 0.716, "step": 18100 }, { "epoch": 0.9302600472813238, "grad_norm": 1.1686515808105469, "learning_rate": 1.270060252965283e-07, "loss": 0.6883, "step": 18101 }, { "epoch": 0.9303114400246685, "grad_norm": 1.051928997039795, "learning_rate": 1.2681970284041333e-07, "loss": 0.6587, "step": 18102 }, { "epoch": 0.9303628327680131, "grad_norm": 1.0963581800460815, "learning_rate": 1.2663351540001423e-07, "loss": 0.6692, "step": 18103 }, { "epoch": 0.9304142255113578, "grad_norm": 1.1356115341186523, "learning_rate": 1.264474629804896e-07, "loss": 0.6768, "step": 18104 }, { "epoch": 0.9304656182547024, "grad_norm": 1.045095682144165, "learning_rate": 1.262615455869931e-07, "loss": 0.6881, "step": 18105 }, { "epoch": 0.9305170109980471, "grad_norm": 1.0176856517791748, "learning_rate": 1.2607576322467674e-07, "loss": 0.66, "step": 18106 }, { "epoch": 0.9305684037413917, "grad_norm": 1.0867902040481567, "learning_rate": 1.258901158986875e-07, "loss": 0.6903, "step": 18107 }, { "epoch": 0.9306197964847364, "grad_norm": 1.1248114109039307, "learning_rate": 1.2570460361416958e-07, "loss": 0.7506, "step": 18108 }, { "epoch": 0.930671189228081, "grad_norm": 1.0654813051223755, "learning_rate": 1.2551922637626058e-07, "loss": 0.7032, "step": 18109 }, { "epoch": 0.9307225819714257, "grad_norm": 1.0783593654632568, "learning_rate": 1.2533398419009913e-07, "loss": 0.6621, "step": 18110 }, { "epoch": 0.9307739747147703, "grad_norm": 1.0867053270339966, "learning_rate": 1.2514887706081613e-07, "loss": 0.6601, "step": 18111 }, { "epoch": 0.930825367458115, "grad_norm": 1.0424574613571167, "learning_rate": 1.2496390499354082e-07, "loss": 0.6828, "step": 18112 }, { "epoch": 0.9308767602014596, "grad_norm": 1.0914846658706665, "learning_rate": 1.247790679933969e-07, "loss": 0.6806, "step": 18113 }, { "epoch": 0.9309281529448042, "grad_norm": 1.053343653678894, "learning_rate": 1.2459436606550634e-07, "loss": 0.73, "step": 18114 }, { "epoch": 0.9309795456881488, "grad_norm": 0.7563942074775696, "learning_rate": 1.2440979921498675e-07, "loss": 0.6721, "step": 18115 }, { "epoch": 0.9310309384314934, "grad_norm": 1.080246090888977, "learning_rate": 1.242253674469507e-07, "loss": 0.6865, "step": 18116 }, { "epoch": 0.9310823311748381, "grad_norm": 1.0843212604522705, "learning_rate": 1.240410707665096e-07, "loss": 0.6468, "step": 18117 }, { "epoch": 0.9311337239181827, "grad_norm": 1.0800042152404785, "learning_rate": 1.2385690917876724e-07, "loss": 0.6428, "step": 18118 }, { "epoch": 0.9311851166615274, "grad_norm": 1.0935783386230469, "learning_rate": 1.2367288268882783e-07, "loss": 0.7159, "step": 18119 }, { "epoch": 0.931236509404872, "grad_norm": 1.0715067386627197, "learning_rate": 1.234889913017895e-07, "loss": 0.6638, "step": 18120 }, { "epoch": 0.9312879021482167, "grad_norm": 0.7493892908096313, "learning_rate": 1.2330523502274705e-07, "loss": 0.6586, "step": 18121 }, { "epoch": 0.9313392948915613, "grad_norm": 0.7267151474952698, "learning_rate": 1.2312161385679144e-07, "loss": 0.6652, "step": 18122 }, { "epoch": 0.931390687634906, "grad_norm": 1.0882261991500854, "learning_rate": 1.2293812780901027e-07, "loss": 0.705, "step": 18123 }, { "epoch": 0.9314420803782506, "grad_norm": 0.7736518979072571, "learning_rate": 1.2275477688448723e-07, "loss": 0.6191, "step": 18124 }, { "epoch": 0.9314934731215953, "grad_norm": 1.0837070941925049, "learning_rate": 1.2257156108830159e-07, "loss": 0.7129, "step": 18125 }, { "epoch": 0.9315448658649399, "grad_norm": 1.089508056640625, "learning_rate": 1.2238848042553042e-07, "loss": 0.6928, "step": 18126 }, { "epoch": 0.9315962586082845, "grad_norm": 0.7178660035133362, "learning_rate": 1.2220553490124632e-07, "loss": 0.6273, "step": 18127 }, { "epoch": 0.9316476513516292, "grad_norm": 1.1137096881866455, "learning_rate": 1.220227245205169e-07, "loss": 0.6703, "step": 18128 }, { "epoch": 0.9316990440949738, "grad_norm": 1.1520177125930786, "learning_rate": 1.21840049288407e-07, "loss": 0.6951, "step": 18129 }, { "epoch": 0.9317504368383184, "grad_norm": 1.0321643352508545, "learning_rate": 1.2165750920997977e-07, "loss": 0.6955, "step": 18130 }, { "epoch": 0.931801829581663, "grad_norm": 0.8172734379768372, "learning_rate": 1.2147510429028952e-07, "loss": 0.693, "step": 18131 }, { "epoch": 0.9318532223250077, "grad_norm": 1.1170563697814941, "learning_rate": 1.2129283453439278e-07, "loss": 0.6769, "step": 18132 }, { "epoch": 0.9319046150683523, "grad_norm": 1.016937494277954, "learning_rate": 1.2111069994733827e-07, "loss": 0.6628, "step": 18133 }, { "epoch": 0.931956007811697, "grad_norm": 1.0824862718582153, "learning_rate": 1.2092870053417138e-07, "loss": 0.6717, "step": 18134 }, { "epoch": 0.9320074005550416, "grad_norm": 1.1746697425842285, "learning_rate": 1.2074683629993643e-07, "loss": 0.6985, "step": 18135 }, { "epoch": 0.9320587932983863, "grad_norm": 1.1815232038497925, "learning_rate": 1.2056510724967107e-07, "loss": 0.7213, "step": 18136 }, { "epoch": 0.9321101860417309, "grad_norm": 5.467514514923096, "learning_rate": 1.2038351338841014e-07, "loss": 0.7101, "step": 18137 }, { "epoch": 0.9321615787850756, "grad_norm": 0.7454895973205566, "learning_rate": 1.2020205472118462e-07, "loss": 0.6788, "step": 18138 }, { "epoch": 0.9322129715284202, "grad_norm": 1.0649522542953491, "learning_rate": 1.2002073125302382e-07, "loss": 0.7258, "step": 18139 }, { "epoch": 0.9322643642717648, "grad_norm": 0.7957323789596558, "learning_rate": 1.1983954298894874e-07, "loss": 0.6508, "step": 18140 }, { "epoch": 0.9323157570151095, "grad_norm": 1.0418696403503418, "learning_rate": 1.1965848993398088e-07, "loss": 0.703, "step": 18141 }, { "epoch": 0.9323671497584541, "grad_norm": 0.7692862749099731, "learning_rate": 1.1947757209313626e-07, "loss": 0.6486, "step": 18142 }, { "epoch": 0.9324185425017988, "grad_norm": 1.0710469484329224, "learning_rate": 1.1929678947142753e-07, "loss": 0.6533, "step": 18143 }, { "epoch": 0.9324699352451434, "grad_norm": 1.2012804746627808, "learning_rate": 1.1911614207386346e-07, "loss": 0.6444, "step": 18144 }, { "epoch": 0.9325213279884881, "grad_norm": 0.9740645885467529, "learning_rate": 1.1893562990544894e-07, "loss": 0.6646, "step": 18145 }, { "epoch": 0.9325727207318326, "grad_norm": 0.7126825451850891, "learning_rate": 1.1875525297118496e-07, "loss": 0.6395, "step": 18146 }, { "epoch": 0.9326241134751773, "grad_norm": 1.0794944763183594, "learning_rate": 1.1857501127606919e-07, "loss": 0.709, "step": 18147 }, { "epoch": 0.9326755062185219, "grad_norm": 0.7529399394989014, "learning_rate": 1.1839490482509541e-07, "loss": 0.6599, "step": 18148 }, { "epoch": 0.9327268989618666, "grad_norm": 1.0956978797912598, "learning_rate": 1.182149336232541e-07, "loss": 0.7296, "step": 18149 }, { "epoch": 0.9327782917052112, "grad_norm": 0.8156173825263977, "learning_rate": 1.1803509767553067e-07, "loss": 0.6159, "step": 18150 }, { "epoch": 0.9328296844485559, "grad_norm": 0.7395505905151367, "learning_rate": 1.1785539698690728e-07, "loss": 0.6283, "step": 18151 }, { "epoch": 0.9328810771919005, "grad_norm": 1.1007943153381348, "learning_rate": 1.176758315623644e-07, "loss": 0.6486, "step": 18152 }, { "epoch": 0.9329324699352451, "grad_norm": 1.126417875289917, "learning_rate": 1.1749640140687579e-07, "loss": 0.7318, "step": 18153 }, { "epoch": 0.9329838626785898, "grad_norm": 1.0709338188171387, "learning_rate": 1.1731710652541306e-07, "loss": 0.6678, "step": 18154 }, { "epoch": 0.9330352554219344, "grad_norm": 0.7269437313079834, "learning_rate": 1.171379469229439e-07, "loss": 0.6835, "step": 18155 }, { "epoch": 0.9330866481652791, "grad_norm": 1.0362740755081177, "learning_rate": 1.1695892260443098e-07, "loss": 0.6701, "step": 18156 }, { "epoch": 0.9331380409086237, "grad_norm": 1.2100704908370972, "learning_rate": 1.1678003357483591e-07, "loss": 0.6363, "step": 18157 }, { "epoch": 0.9331894336519684, "grad_norm": 1.074168086051941, "learning_rate": 1.1660127983911362e-07, "loss": 0.6848, "step": 18158 }, { "epoch": 0.933240826395313, "grad_norm": 0.7845690846443176, "learning_rate": 1.1642266140221848e-07, "loss": 0.6489, "step": 18159 }, { "epoch": 0.9332922191386577, "grad_norm": 1.0633260011672974, "learning_rate": 1.162441782690965e-07, "loss": 0.6798, "step": 18160 }, { "epoch": 0.9333436118820022, "grad_norm": 1.2773399353027344, "learning_rate": 1.1606583044469544e-07, "loss": 0.7016, "step": 18161 }, { "epoch": 0.9333950046253469, "grad_norm": 0.8328320980072021, "learning_rate": 1.158876179339552e-07, "loss": 0.6482, "step": 18162 }, { "epoch": 0.9334463973686915, "grad_norm": 1.1610026359558105, "learning_rate": 1.157095407418135e-07, "loss": 0.6985, "step": 18163 }, { "epoch": 0.9334977901120362, "grad_norm": 1.1135119199752808, "learning_rate": 1.1553159887320365e-07, "loss": 0.736, "step": 18164 }, { "epoch": 0.9335491828553808, "grad_norm": 1.1179707050323486, "learning_rate": 1.1535379233305665e-07, "loss": 0.7314, "step": 18165 }, { "epoch": 0.9336005755987254, "grad_norm": 1.0149478912353516, "learning_rate": 1.1517612112629805e-07, "loss": 0.7079, "step": 18166 }, { "epoch": 0.9336519683420701, "grad_norm": 1.0771840810775757, "learning_rate": 1.1499858525785057e-07, "loss": 0.6469, "step": 18167 }, { "epoch": 0.9337033610854147, "grad_norm": 1.1967853307724, "learning_rate": 1.1482118473263415e-07, "loss": 0.7518, "step": 18168 }, { "epoch": 0.9337547538287594, "grad_norm": 1.1043626070022583, "learning_rate": 1.1464391955556154e-07, "loss": 0.691, "step": 18169 }, { "epoch": 0.933806146572104, "grad_norm": 1.1084086894989014, "learning_rate": 1.1446678973154546e-07, "loss": 0.6766, "step": 18170 }, { "epoch": 0.9338575393154487, "grad_norm": 1.1623708009719849, "learning_rate": 1.1428979526549366e-07, "loss": 0.6974, "step": 18171 }, { "epoch": 0.9339089320587933, "grad_norm": 1.1333773136138916, "learning_rate": 1.1411293616230945e-07, "loss": 0.7476, "step": 18172 }, { "epoch": 0.933960324802138, "grad_norm": 1.109848141670227, "learning_rate": 1.1393621242689224e-07, "loss": 0.6969, "step": 18173 }, { "epoch": 0.9340117175454826, "grad_norm": 1.0783395767211914, "learning_rate": 1.1375962406413976e-07, "loss": 0.6664, "step": 18174 }, { "epoch": 0.9340631102888273, "grad_norm": 1.1152595281600952, "learning_rate": 1.1358317107894312e-07, "loss": 0.7164, "step": 18175 }, { "epoch": 0.9341145030321718, "grad_norm": 0.6855306625366211, "learning_rate": 1.1340685347619229e-07, "loss": 0.6204, "step": 18176 }, { "epoch": 0.9341658957755165, "grad_norm": 1.045112133026123, "learning_rate": 1.1323067126077169e-07, "loss": 0.634, "step": 18177 }, { "epoch": 0.9342172885188611, "grad_norm": 1.041113257408142, "learning_rate": 1.1305462443756244e-07, "loss": 0.681, "step": 18178 }, { "epoch": 0.9342686812622057, "grad_norm": 0.7230408191680908, "learning_rate": 1.1287871301144283e-07, "loss": 0.6317, "step": 18179 }, { "epoch": 0.9343200740055504, "grad_norm": 0.7001280188560486, "learning_rate": 1.1270293698728507e-07, "loss": 0.6498, "step": 18180 }, { "epoch": 0.934371466748895, "grad_norm": 1.0436370372772217, "learning_rate": 1.125272963699614e-07, "loss": 0.6558, "step": 18181 }, { "epoch": 0.9344228594922397, "grad_norm": 1.116808295249939, "learning_rate": 1.1235179116433626e-07, "loss": 0.7204, "step": 18182 }, { "epoch": 0.9344742522355843, "grad_norm": 1.1701769828796387, "learning_rate": 1.1217642137527296e-07, "loss": 0.6766, "step": 18183 }, { "epoch": 0.934525644978929, "grad_norm": 1.1348007917404175, "learning_rate": 1.120011870076304e-07, "loss": 0.7672, "step": 18184 }, { "epoch": 0.9345770377222736, "grad_norm": 0.7406056523323059, "learning_rate": 1.1182608806626306e-07, "loss": 0.6386, "step": 18185 }, { "epoch": 0.9346284304656183, "grad_norm": 1.0408074855804443, "learning_rate": 1.1165112455602312e-07, "loss": 0.6991, "step": 18186 }, { "epoch": 0.9346798232089629, "grad_norm": 1.0372551679611206, "learning_rate": 1.1147629648175617e-07, "loss": 0.7773, "step": 18187 }, { "epoch": 0.9347312159523076, "grad_norm": 1.0081762075424194, "learning_rate": 1.113016038483089e-07, "loss": 0.6095, "step": 18188 }, { "epoch": 0.9347826086956522, "grad_norm": 1.0785664319992065, "learning_rate": 1.1112704666051855e-07, "loss": 0.6638, "step": 18189 }, { "epoch": 0.9348340014389969, "grad_norm": 1.2045140266418457, "learning_rate": 1.1095262492322346e-07, "loss": 0.7384, "step": 18190 }, { "epoch": 0.9348853941823414, "grad_norm": 1.100014090538025, "learning_rate": 1.1077833864125476e-07, "loss": 0.6898, "step": 18191 }, { "epoch": 0.934936786925686, "grad_norm": 1.0657894611358643, "learning_rate": 1.1060418781944193e-07, "loss": 0.7225, "step": 18192 }, { "epoch": 0.9349881796690307, "grad_norm": 1.1204603910446167, "learning_rate": 1.104301724626089e-07, "loss": 0.6584, "step": 18193 }, { "epoch": 0.9350395724123753, "grad_norm": 1.1003681421279907, "learning_rate": 1.1025629257557847e-07, "loss": 0.7331, "step": 18194 }, { "epoch": 0.93509096515572, "grad_norm": 1.094254732131958, "learning_rate": 1.1008254816316733e-07, "loss": 0.6812, "step": 18195 }, { "epoch": 0.9351423578990646, "grad_norm": 1.1393364667892456, "learning_rate": 1.0990893923018942e-07, "loss": 0.7013, "step": 18196 }, { "epoch": 0.9351937506424093, "grad_norm": 1.0725802183151245, "learning_rate": 1.0973546578145422e-07, "loss": 0.6973, "step": 18197 }, { "epoch": 0.9352451433857539, "grad_norm": 1.0916386842727661, "learning_rate": 1.0956212782176845e-07, "loss": 0.693, "step": 18198 }, { "epoch": 0.9352965361290986, "grad_norm": 0.7486464977264404, "learning_rate": 1.0938892535593437e-07, "loss": 0.6095, "step": 18199 }, { "epoch": 0.9353479288724432, "grad_norm": 1.12406325340271, "learning_rate": 1.0921585838875092e-07, "loss": 0.7451, "step": 18200 }, { "epoch": 0.9353993216157879, "grad_norm": 1.081967830657959, "learning_rate": 1.0904292692501317e-07, "loss": 0.7024, "step": 18201 }, { "epoch": 0.9354507143591325, "grad_norm": 1.0904582738876343, "learning_rate": 1.0887013096951172e-07, "loss": 0.7023, "step": 18202 }, { "epoch": 0.9355021071024772, "grad_norm": 0.721435546875, "learning_rate": 1.0869747052703439e-07, "loss": 0.6842, "step": 18203 }, { "epoch": 0.9355534998458218, "grad_norm": 0.758167564868927, "learning_rate": 1.0852494560236571e-07, "loss": 0.635, "step": 18204 }, { "epoch": 0.9356048925891665, "grad_norm": 1.0998713970184326, "learning_rate": 1.0835255620028406e-07, "loss": 0.6779, "step": 18205 }, { "epoch": 0.935656285332511, "grad_norm": 1.051513910293579, "learning_rate": 1.081803023255662e-07, "loss": 0.6925, "step": 18206 }, { "epoch": 0.9357076780758556, "grad_norm": 1.098307728767395, "learning_rate": 1.0800818398298552e-07, "loss": 0.7537, "step": 18207 }, { "epoch": 0.9357590708192003, "grad_norm": 0.7755401134490967, "learning_rate": 1.0783620117730986e-07, "loss": 0.6301, "step": 18208 }, { "epoch": 0.9358104635625449, "grad_norm": 1.1143397092819214, "learning_rate": 1.0766435391330432e-07, "loss": 0.7014, "step": 18209 }, { "epoch": 0.9358618563058896, "grad_norm": 1.092690110206604, "learning_rate": 1.0749264219573008e-07, "loss": 0.6818, "step": 18210 }, { "epoch": 0.9359132490492342, "grad_norm": 0.7035539150238037, "learning_rate": 1.0732106602934445e-07, "loss": 0.6334, "step": 18211 }, { "epoch": 0.9359646417925789, "grad_norm": 1.1421818733215332, "learning_rate": 1.071496254189014e-07, "loss": 0.6927, "step": 18212 }, { "epoch": 0.9360160345359235, "grad_norm": 1.1793495416641235, "learning_rate": 1.0697832036915046e-07, "loss": 0.7286, "step": 18213 }, { "epoch": 0.9360674272792682, "grad_norm": 0.7602949738502502, "learning_rate": 1.0680715088483784e-07, "loss": 0.6722, "step": 18214 }, { "epoch": 0.9361188200226128, "grad_norm": 1.0900987386703491, "learning_rate": 1.0663611697070586e-07, "loss": 0.6839, "step": 18215 }, { "epoch": 0.9361702127659575, "grad_norm": 1.140613079071045, "learning_rate": 1.064652186314935e-07, "loss": 0.7412, "step": 18216 }, { "epoch": 0.9362216055093021, "grad_norm": 1.2014966011047363, "learning_rate": 1.0629445587193587e-07, "loss": 0.6991, "step": 18217 }, { "epoch": 0.9362729982526468, "grad_norm": 1.1197752952575684, "learning_rate": 1.0612382869676307e-07, "loss": 0.7534, "step": 18218 }, { "epoch": 0.9363243909959914, "grad_norm": 1.0953505039215088, "learning_rate": 1.0595333711070354e-07, "loss": 0.7493, "step": 18219 }, { "epoch": 0.936375783739336, "grad_norm": 1.0983437299728394, "learning_rate": 1.0578298111848073e-07, "loss": 0.68, "step": 18220 }, { "epoch": 0.9364271764826807, "grad_norm": 1.0882781744003296, "learning_rate": 1.0561276072481419e-07, "loss": 0.7087, "step": 18221 }, { "epoch": 0.9364785692260252, "grad_norm": 0.8025135397911072, "learning_rate": 1.0544267593441959e-07, "loss": 0.6379, "step": 18222 }, { "epoch": 0.9365299619693699, "grad_norm": 0.6718245148658752, "learning_rate": 1.0527272675201094e-07, "loss": 0.6663, "step": 18223 }, { "epoch": 0.9365813547127145, "grad_norm": 1.1004219055175781, "learning_rate": 1.0510291318229449e-07, "loss": 0.6912, "step": 18224 }, { "epoch": 0.9366327474560592, "grad_norm": 0.7890014052391052, "learning_rate": 1.0493323522997644e-07, "loss": 0.6632, "step": 18225 }, { "epoch": 0.9366841401994038, "grad_norm": 1.1225265264511108, "learning_rate": 1.0476369289975808e-07, "loss": 0.6737, "step": 18226 }, { "epoch": 0.9367355329427485, "grad_norm": 1.1219775676727295, "learning_rate": 1.0459428619633616e-07, "loss": 0.6931, "step": 18227 }, { "epoch": 0.9367869256860931, "grad_norm": 1.0455548763275146, "learning_rate": 1.0442501512440473e-07, "loss": 0.71, "step": 18228 }, { "epoch": 0.9368383184294378, "grad_norm": 1.0516871213912964, "learning_rate": 1.0425587968865281e-07, "loss": 0.6394, "step": 18229 }, { "epoch": 0.9368897111727824, "grad_norm": 1.1444944143295288, "learning_rate": 1.040868798937672e-07, "loss": 0.6991, "step": 18230 }, { "epoch": 0.9369411039161271, "grad_norm": 1.0673075914382935, "learning_rate": 1.0391801574442972e-07, "loss": 0.6934, "step": 18231 }, { "epoch": 0.9369924966594717, "grad_norm": 1.1196258068084717, "learning_rate": 1.0374928724531997e-07, "loss": 0.7063, "step": 18232 }, { "epoch": 0.9370438894028164, "grad_norm": 0.8464105129241943, "learning_rate": 1.0358069440111029e-07, "loss": 0.6209, "step": 18233 }, { "epoch": 0.937095282146161, "grad_norm": 1.1044212579727173, "learning_rate": 1.034122372164742e-07, "loss": 0.6579, "step": 18234 }, { "epoch": 0.9371466748895056, "grad_norm": 1.1360880136489868, "learning_rate": 1.0324391569607739e-07, "loss": 0.7236, "step": 18235 }, { "epoch": 0.9371980676328503, "grad_norm": 1.061560034751892, "learning_rate": 1.0307572984458502e-07, "loss": 0.6446, "step": 18236 }, { "epoch": 0.9372494603761948, "grad_norm": 1.0182867050170898, "learning_rate": 1.0290767966665504e-07, "loss": 0.6643, "step": 18237 }, { "epoch": 0.9373008531195395, "grad_norm": 0.9916114807128906, "learning_rate": 1.0273976516694372e-07, "loss": 0.6987, "step": 18238 }, { "epoch": 0.9373522458628841, "grad_norm": 1.0453910827636719, "learning_rate": 1.0257198635010512e-07, "loss": 0.6849, "step": 18239 }, { "epoch": 0.9374036386062288, "grad_norm": 1.1969722509384155, "learning_rate": 1.0240434322078497e-07, "loss": 0.6617, "step": 18240 }, { "epoch": 0.9374550313495734, "grad_norm": 0.7796474695205688, "learning_rate": 1.0223683578362953e-07, "loss": 0.6448, "step": 18241 }, { "epoch": 0.9375064240929181, "grad_norm": 1.079524040222168, "learning_rate": 1.020694640432801e-07, "loss": 0.7608, "step": 18242 }, { "epoch": 0.9375578168362627, "grad_norm": 0.709758460521698, "learning_rate": 1.01902228004373e-07, "loss": 0.6162, "step": 18243 }, { "epoch": 0.9376092095796074, "grad_norm": 1.0926910638809204, "learning_rate": 1.0173512767154114e-07, "loss": 0.6622, "step": 18244 }, { "epoch": 0.937660602322952, "grad_norm": 0.8188534379005432, "learning_rate": 1.0156816304941586e-07, "loss": 0.6202, "step": 18245 }, { "epoch": 0.9377119950662967, "grad_norm": 1.1509997844696045, "learning_rate": 1.0140133414262177e-07, "loss": 0.7219, "step": 18246 }, { "epoch": 0.9377633878096413, "grad_norm": 0.6941210627555847, "learning_rate": 1.0123464095578128e-07, "loss": 0.5924, "step": 18247 }, { "epoch": 0.937814780552986, "grad_norm": 1.1383109092712402, "learning_rate": 1.0106808349351349e-07, "loss": 0.7223, "step": 18248 }, { "epoch": 0.9378661732963306, "grad_norm": 1.0416836738586426, "learning_rate": 1.0090166176043137e-07, "loss": 0.667, "step": 18249 }, { "epoch": 0.9379175660396752, "grad_norm": 1.0473331212997437, "learning_rate": 1.0073537576114789e-07, "loss": 0.6733, "step": 18250 }, { "epoch": 0.9379689587830199, "grad_norm": 0.7374266386032104, "learning_rate": 1.0056922550026826e-07, "loss": 0.652, "step": 18251 }, { "epoch": 0.9380203515263644, "grad_norm": 1.0661418437957764, "learning_rate": 1.0040321098239713e-07, "loss": 0.6975, "step": 18252 }, { "epoch": 0.9380717442697091, "grad_norm": 1.1141732931137085, "learning_rate": 1.0023733221213305e-07, "loss": 0.6581, "step": 18253 }, { "epoch": 0.9381231370130537, "grad_norm": 1.0481083393096924, "learning_rate": 1.0007158919407289e-07, "loss": 0.7174, "step": 18254 }, { "epoch": 0.9381745297563984, "grad_norm": 1.0148179531097412, "learning_rate": 9.990598193280854e-08, "loss": 0.6616, "step": 18255 }, { "epoch": 0.938225922499743, "grad_norm": 1.132919192314148, "learning_rate": 9.974051043292742e-08, "loss": 0.7723, "step": 18256 }, { "epoch": 0.9382773152430877, "grad_norm": 1.1283656358718872, "learning_rate": 9.957517469901423e-08, "loss": 0.7251, "step": 18257 }, { "epoch": 0.9383287079864323, "grad_norm": 1.1088967323303223, "learning_rate": 9.940997473565028e-08, "loss": 0.7, "step": 18258 }, { "epoch": 0.938380100729777, "grad_norm": 1.087043046951294, "learning_rate": 9.924491054741303e-08, "loss": 0.7361, "step": 18259 }, { "epoch": 0.9384314934731216, "grad_norm": 1.0823676586151123, "learning_rate": 9.907998213887437e-08, "loss": 0.7075, "step": 18260 }, { "epoch": 0.9384828862164662, "grad_norm": 0.7894309163093567, "learning_rate": 9.89151895146051e-08, "loss": 0.6497, "step": 18261 }, { "epoch": 0.9385342789598109, "grad_norm": 0.7110560536384583, "learning_rate": 9.875053267916934e-08, "loss": 0.6489, "step": 18262 }, { "epoch": 0.9385856717031555, "grad_norm": 0.7982341051101685, "learning_rate": 9.858601163713122e-08, "loss": 0.6452, "step": 18263 }, { "epoch": 0.9386370644465002, "grad_norm": 1.098705530166626, "learning_rate": 9.842162639304708e-08, "loss": 0.7364, "step": 18264 }, { "epoch": 0.9386884571898448, "grad_norm": 0.7360174655914307, "learning_rate": 9.82573769514722e-08, "loss": 0.6685, "step": 18265 }, { "epoch": 0.9387398499331895, "grad_norm": 1.0338443517684937, "learning_rate": 9.809326331695734e-08, "loss": 0.7089, "step": 18266 }, { "epoch": 0.938791242676534, "grad_norm": 1.0561853647232056, "learning_rate": 9.79292854940489e-08, "loss": 0.6529, "step": 18267 }, { "epoch": 0.9388426354198787, "grad_norm": 1.1618887186050415, "learning_rate": 9.776544348729044e-08, "loss": 0.6524, "step": 18268 }, { "epoch": 0.9388940281632233, "grad_norm": 1.0948460102081299, "learning_rate": 9.760173730122057e-08, "loss": 0.6707, "step": 18269 }, { "epoch": 0.938945420906568, "grad_norm": 0.8047192096710205, "learning_rate": 9.743816694037622e-08, "loss": 0.6497, "step": 18270 }, { "epoch": 0.9389968136499126, "grad_norm": 1.1081783771514893, "learning_rate": 9.727473240928875e-08, "loss": 0.7123, "step": 18271 }, { "epoch": 0.9390482063932573, "grad_norm": 1.1524940729141235, "learning_rate": 9.711143371248621e-08, "loss": 0.6751, "step": 18272 }, { "epoch": 0.9390995991366019, "grad_norm": 1.0993659496307373, "learning_rate": 9.694827085449221e-08, "loss": 0.7311, "step": 18273 }, { "epoch": 0.9391509918799466, "grad_norm": 1.0823835134506226, "learning_rate": 9.678524383982868e-08, "loss": 0.6678, "step": 18274 }, { "epoch": 0.9392023846232912, "grad_norm": 1.086285948753357, "learning_rate": 9.66223526730109e-08, "loss": 0.7078, "step": 18275 }, { "epoch": 0.9392537773666358, "grad_norm": 1.1212517023086548, "learning_rate": 9.645959735855304e-08, "loss": 0.742, "step": 18276 }, { "epoch": 0.9393051701099805, "grad_norm": 1.112223505973816, "learning_rate": 9.629697790096371e-08, "loss": 0.6737, "step": 18277 }, { "epoch": 0.9393565628533251, "grad_norm": 1.0868277549743652, "learning_rate": 9.613449430474875e-08, "loss": 0.6769, "step": 18278 }, { "epoch": 0.9394079555966698, "grad_norm": 0.7513636350631714, "learning_rate": 9.597214657441012e-08, "loss": 0.6482, "step": 18279 }, { "epoch": 0.9394593483400144, "grad_norm": 1.1184829473495483, "learning_rate": 9.580993471444588e-08, "loss": 0.7089, "step": 18280 }, { "epoch": 0.9395107410833591, "grad_norm": 1.064218282699585, "learning_rate": 9.564785872934967e-08, "loss": 0.7012, "step": 18281 }, { "epoch": 0.9395621338267036, "grad_norm": 1.0187371969223022, "learning_rate": 9.548591862361179e-08, "loss": 0.6293, "step": 18282 }, { "epoch": 0.9396135265700483, "grad_norm": 1.1224628686904907, "learning_rate": 9.532411440171974e-08, "loss": 0.6794, "step": 18283 }, { "epoch": 0.9396649193133929, "grad_norm": 1.158615231513977, "learning_rate": 9.516244606815662e-08, "loss": 0.7135, "step": 18284 }, { "epoch": 0.9397163120567376, "grad_norm": 1.115944743156433, "learning_rate": 9.500091362740049e-08, "loss": 0.7009, "step": 18285 }, { "epoch": 0.9397677048000822, "grad_norm": 1.0644429922103882, "learning_rate": 9.483951708392669e-08, "loss": 0.7317, "step": 18286 }, { "epoch": 0.9398190975434269, "grad_norm": 1.1075092554092407, "learning_rate": 9.467825644220829e-08, "loss": 0.7078, "step": 18287 }, { "epoch": 0.9398704902867715, "grad_norm": 1.0846257209777832, "learning_rate": 9.451713170671229e-08, "loss": 0.6725, "step": 18288 }, { "epoch": 0.9399218830301161, "grad_norm": 1.2795671224594116, "learning_rate": 9.435614288190232e-08, "loss": 0.7004, "step": 18289 }, { "epoch": 0.9399732757734608, "grad_norm": 1.1195169687271118, "learning_rate": 9.419528997224037e-08, "loss": 0.7202, "step": 18290 }, { "epoch": 0.9400246685168054, "grad_norm": 1.0363930463790894, "learning_rate": 9.403457298218066e-08, "loss": 0.7148, "step": 18291 }, { "epoch": 0.9400760612601501, "grad_norm": 1.0344195365905762, "learning_rate": 9.387399191617796e-08, "loss": 0.679, "step": 18292 }, { "epoch": 0.9401274540034947, "grad_norm": 1.1695897579193115, "learning_rate": 9.371354677868039e-08, "loss": 0.6831, "step": 18293 }, { "epoch": 0.9401788467468394, "grad_norm": 1.0498465299606323, "learning_rate": 9.355323757413326e-08, "loss": 0.6771, "step": 18294 }, { "epoch": 0.940230239490184, "grad_norm": 1.1350390911102295, "learning_rate": 9.339306430697803e-08, "loss": 0.6453, "step": 18295 }, { "epoch": 0.9402816322335287, "grad_norm": 1.1396948099136353, "learning_rate": 9.323302698165282e-08, "loss": 0.699, "step": 18296 }, { "epoch": 0.9403330249768732, "grad_norm": 0.6648402214050293, "learning_rate": 9.307312560259186e-08, "loss": 0.6738, "step": 18297 }, { "epoch": 0.9403844177202179, "grad_norm": 1.0341657400131226, "learning_rate": 9.291336017422437e-08, "loss": 0.7235, "step": 18298 }, { "epoch": 0.9404358104635625, "grad_norm": 0.6653953194618225, "learning_rate": 9.275373070097682e-08, "loss": 0.6229, "step": 18299 }, { "epoch": 0.9404872032069072, "grad_norm": 1.1513853073120117, "learning_rate": 9.259423718727345e-08, "loss": 0.7019, "step": 18300 }, { "epoch": 0.9405385959502518, "grad_norm": 1.1183154582977295, "learning_rate": 9.243487963753128e-08, "loss": 0.6989, "step": 18301 }, { "epoch": 0.9405899886935964, "grad_norm": 1.1340559720993042, "learning_rate": 9.227565805616678e-08, "loss": 0.6513, "step": 18302 }, { "epoch": 0.9406413814369411, "grad_norm": 0.7577893137931824, "learning_rate": 9.211657244759087e-08, "loss": 0.6582, "step": 18303 }, { "epoch": 0.9406927741802857, "grad_norm": 1.1421209573745728, "learning_rate": 9.195762281621057e-08, "loss": 0.7183, "step": 18304 }, { "epoch": 0.9407441669236304, "grad_norm": 1.1564736366271973, "learning_rate": 9.17988091664307e-08, "loss": 0.7751, "step": 18305 }, { "epoch": 0.940795559666975, "grad_norm": 0.7559486627578735, "learning_rate": 9.16401315026505e-08, "loss": 0.6824, "step": 18306 }, { "epoch": 0.9408469524103197, "grad_norm": 1.1291884183883667, "learning_rate": 9.148158982926703e-08, "loss": 0.6904, "step": 18307 }, { "epoch": 0.9408983451536643, "grad_norm": 1.1255857944488525, "learning_rate": 9.132318415067232e-08, "loss": 0.6642, "step": 18308 }, { "epoch": 0.940949737897009, "grad_norm": 1.05878484249115, "learning_rate": 9.116491447125508e-08, "loss": 0.671, "step": 18309 }, { "epoch": 0.9410011306403536, "grad_norm": 0.713230550289154, "learning_rate": 9.10067807954007e-08, "loss": 0.6761, "step": 18310 }, { "epoch": 0.9410525233836983, "grad_norm": 1.1593071222305298, "learning_rate": 9.084878312749012e-08, "loss": 0.7015, "step": 18311 }, { "epoch": 0.9411039161270429, "grad_norm": 1.050907015800476, "learning_rate": 9.069092147190095e-08, "loss": 0.6591, "step": 18312 }, { "epoch": 0.9411553088703875, "grad_norm": 1.0818685293197632, "learning_rate": 9.05331958330069e-08, "loss": 0.685, "step": 18313 }, { "epoch": 0.9412067016137321, "grad_norm": 1.1976109743118286, "learning_rate": 9.037560621517783e-08, "loss": 0.7308, "step": 18314 }, { "epoch": 0.9412580943570767, "grad_norm": 1.112247109413147, "learning_rate": 9.021815262277911e-08, "loss": 0.7052, "step": 18315 }, { "epoch": 0.9413094871004214, "grad_norm": 1.0798821449279785, "learning_rate": 9.006083506017505e-08, "loss": 0.6859, "step": 18316 }, { "epoch": 0.941360879843766, "grad_norm": 0.871130645275116, "learning_rate": 8.990365353172215e-08, "loss": 0.6242, "step": 18317 }, { "epoch": 0.9414122725871107, "grad_norm": 1.0228276252746582, "learning_rate": 8.974660804177693e-08, "loss": 0.6635, "step": 18318 }, { "epoch": 0.9414636653304553, "grad_norm": 0.7340797781944275, "learning_rate": 8.95896985946898e-08, "loss": 0.6458, "step": 18319 }, { "epoch": 0.9415150580738, "grad_norm": 1.172174334526062, "learning_rate": 8.943292519480729e-08, "loss": 0.6582, "step": 18320 }, { "epoch": 0.9415664508171446, "grad_norm": 1.1598737239837646, "learning_rate": 8.92762878464748e-08, "loss": 0.6876, "step": 18321 }, { "epoch": 0.9416178435604893, "grad_norm": 1.3235054016113281, "learning_rate": 8.911978655403052e-08, "loss": 0.6558, "step": 18322 }, { "epoch": 0.9416692363038339, "grad_norm": 1.0557688474655151, "learning_rate": 8.896342132181101e-08, "loss": 0.7143, "step": 18323 }, { "epoch": 0.9417206290471786, "grad_norm": 0.9993652701377869, "learning_rate": 8.88071921541478e-08, "loss": 0.645, "step": 18324 }, { "epoch": 0.9417720217905232, "grad_norm": 1.1330052614212036, "learning_rate": 8.865109905537128e-08, "loss": 0.664, "step": 18325 }, { "epoch": 0.9418234145338679, "grad_norm": 1.106482744216919, "learning_rate": 8.849514202980358e-08, "loss": 0.7293, "step": 18326 }, { "epoch": 0.9418748072772125, "grad_norm": 1.0812040567398071, "learning_rate": 8.833932108176735e-08, "loss": 0.6779, "step": 18327 }, { "epoch": 0.941926200020557, "grad_norm": 1.1602320671081543, "learning_rate": 8.818363621557913e-08, "loss": 0.6617, "step": 18328 }, { "epoch": 0.9419775927639017, "grad_norm": 1.165284514427185, "learning_rate": 8.802808743555269e-08, "loss": 0.7208, "step": 18329 }, { "epoch": 0.9420289855072463, "grad_norm": 1.0488582849502563, "learning_rate": 8.787267474599792e-08, "loss": 0.7283, "step": 18330 }, { "epoch": 0.942080378250591, "grad_norm": 1.07056725025177, "learning_rate": 8.771739815121916e-08, "loss": 0.6867, "step": 18331 }, { "epoch": 0.9421317709939356, "grad_norm": 1.0258830785751343, "learning_rate": 8.756225765552074e-08, "loss": 0.6411, "step": 18332 }, { "epoch": 0.9421831637372803, "grad_norm": 1.058482050895691, "learning_rate": 8.740725326319865e-08, "loss": 0.7051, "step": 18333 }, { "epoch": 0.9422345564806249, "grad_norm": 1.1020543575286865, "learning_rate": 8.725238497854893e-08, "loss": 0.6497, "step": 18334 }, { "epoch": 0.9422859492239696, "grad_norm": 1.072866439819336, "learning_rate": 8.709765280586202e-08, "loss": 0.683, "step": 18335 }, { "epoch": 0.9423373419673142, "grad_norm": 1.0985640287399292, "learning_rate": 8.694305674942504e-08, "loss": 0.6738, "step": 18336 }, { "epoch": 0.9423887347106589, "grad_norm": 0.7717066407203674, "learning_rate": 8.678859681352014e-08, "loss": 0.637, "step": 18337 }, { "epoch": 0.9424401274540035, "grad_norm": 1.1254777908325195, "learning_rate": 8.663427300242833e-08, "loss": 0.6683, "step": 18338 }, { "epoch": 0.9424915201973482, "grad_norm": 1.1327439546585083, "learning_rate": 8.648008532042451e-08, "loss": 0.692, "step": 18339 }, { "epoch": 0.9425429129406928, "grad_norm": 1.2893112897872925, "learning_rate": 8.632603377177972e-08, "loss": 0.7192, "step": 18340 }, { "epoch": 0.9425943056840375, "grad_norm": 0.8734267354011536, "learning_rate": 8.617211836076445e-08, "loss": 0.6341, "step": 18341 }, { "epoch": 0.9426456984273821, "grad_norm": 1.0354949235916138, "learning_rate": 8.601833909164081e-08, "loss": 0.5949, "step": 18342 }, { "epoch": 0.9426970911707266, "grad_norm": 0.7370154857635498, "learning_rate": 8.586469596866987e-08, "loss": 0.6318, "step": 18343 }, { "epoch": 0.9427484839140713, "grad_norm": 1.1292835474014282, "learning_rate": 8.571118899610875e-08, "loss": 0.7157, "step": 18344 }, { "epoch": 0.9427998766574159, "grad_norm": 1.090070366859436, "learning_rate": 8.555781817821185e-08, "loss": 0.693, "step": 18345 }, { "epoch": 0.9428512694007606, "grad_norm": 1.0881165266036987, "learning_rate": 8.540458351922576e-08, "loss": 0.6593, "step": 18346 }, { "epoch": 0.9429026621441052, "grad_norm": 1.0677086114883423, "learning_rate": 8.525148502339764e-08, "loss": 0.6528, "step": 18347 }, { "epoch": 0.9429540548874499, "grad_norm": 1.086267113685608, "learning_rate": 8.509852269496909e-08, "loss": 0.6647, "step": 18348 }, { "epoch": 0.9430054476307945, "grad_norm": 0.7654056549072266, "learning_rate": 8.494569653817786e-08, "loss": 0.6634, "step": 18349 }, { "epoch": 0.9430568403741392, "grad_norm": 1.1124404668807983, "learning_rate": 8.479300655725775e-08, "loss": 0.6679, "step": 18350 }, { "epoch": 0.9431082331174838, "grad_norm": 0.6850045919418335, "learning_rate": 8.464045275643984e-08, "loss": 0.6282, "step": 18351 }, { "epoch": 0.9431596258608285, "grad_norm": 0.9976409673690796, "learning_rate": 8.448803513995074e-08, "loss": 0.6193, "step": 18352 }, { "epoch": 0.9432110186041731, "grad_norm": 1.107151746749878, "learning_rate": 8.433575371201264e-08, "loss": 0.7061, "step": 18353 }, { "epoch": 0.9432624113475178, "grad_norm": 1.0202232599258423, "learning_rate": 8.418360847684548e-08, "loss": 0.6469, "step": 18354 }, { "epoch": 0.9433138040908624, "grad_norm": 1.1062217950820923, "learning_rate": 8.403159943866368e-08, "loss": 0.7196, "step": 18355 }, { "epoch": 0.943365196834207, "grad_norm": 1.110945463180542, "learning_rate": 8.387972660167942e-08, "loss": 0.7138, "step": 18356 }, { "epoch": 0.9434165895775517, "grad_norm": 1.1200275421142578, "learning_rate": 8.372798997010045e-08, "loss": 0.6775, "step": 18357 }, { "epoch": 0.9434679823208962, "grad_norm": 1.0690885782241821, "learning_rate": 8.357638954813063e-08, "loss": 0.6893, "step": 18358 }, { "epoch": 0.9435193750642409, "grad_norm": 1.0283961296081543, "learning_rate": 8.342492533996993e-08, "loss": 0.6597, "step": 18359 }, { "epoch": 0.9435707678075855, "grad_norm": 1.0629364252090454, "learning_rate": 8.327359734981554e-08, "loss": 0.6675, "step": 18360 }, { "epoch": 0.9436221605509302, "grad_norm": 1.0458266735076904, "learning_rate": 8.312240558185914e-08, "loss": 0.6334, "step": 18361 }, { "epoch": 0.9436735532942748, "grad_norm": 0.7651318311691284, "learning_rate": 8.297135004029011e-08, "loss": 0.6268, "step": 18362 }, { "epoch": 0.9437249460376195, "grad_norm": 0.7195444107055664, "learning_rate": 8.282043072929347e-08, "loss": 0.6298, "step": 18363 }, { "epoch": 0.9437763387809641, "grad_norm": 1.1340622901916504, "learning_rate": 8.26696476530514e-08, "loss": 0.7046, "step": 18364 }, { "epoch": 0.9438277315243088, "grad_norm": 0.7147236466407776, "learning_rate": 8.25190008157406e-08, "loss": 0.5916, "step": 18365 }, { "epoch": 0.9438791242676534, "grad_norm": 1.1565423011779785, "learning_rate": 8.236849022153436e-08, "loss": 0.7452, "step": 18366 }, { "epoch": 0.9439305170109981, "grad_norm": 0.6798750758171082, "learning_rate": 8.221811587460438e-08, "loss": 0.6579, "step": 18367 }, { "epoch": 0.9439819097543427, "grad_norm": 1.0616264343261719, "learning_rate": 8.206787777911507e-08, "loss": 0.632, "step": 18368 }, { "epoch": 0.9440333024976874, "grad_norm": 1.1190334558486938, "learning_rate": 8.191777593922978e-08, "loss": 0.671, "step": 18369 }, { "epoch": 0.944084695241032, "grad_norm": 1.081934928894043, "learning_rate": 8.176781035910796e-08, "loss": 0.712, "step": 18370 }, { "epoch": 0.9441360879843766, "grad_norm": 1.1126043796539307, "learning_rate": 8.161798104290297e-08, "loss": 0.614, "step": 18371 }, { "epoch": 0.9441874807277213, "grad_norm": 1.0566575527191162, "learning_rate": 8.146828799476703e-08, "loss": 0.6895, "step": 18372 }, { "epoch": 0.9442388734710658, "grad_norm": 1.3962303400039673, "learning_rate": 8.131873121884737e-08, "loss": 0.6978, "step": 18373 }, { "epoch": 0.9442902662144105, "grad_norm": 1.080667495727539, "learning_rate": 8.116931071928736e-08, "loss": 0.7137, "step": 18374 }, { "epoch": 0.9443416589577551, "grad_norm": 1.0879675149917603, "learning_rate": 8.102002650022645e-08, "loss": 0.6849, "step": 18375 }, { "epoch": 0.9443930517010998, "grad_norm": 1.0781880617141724, "learning_rate": 8.087087856580189e-08, "loss": 0.7159, "step": 18376 }, { "epoch": 0.9444444444444444, "grad_norm": 1.1003646850585938, "learning_rate": 8.072186692014538e-08, "loss": 0.7314, "step": 18377 }, { "epoch": 0.9444958371877891, "grad_norm": 0.8745474219322205, "learning_rate": 8.057299156738474e-08, "loss": 0.6111, "step": 18378 }, { "epoch": 0.9445472299311337, "grad_norm": 1.1004819869995117, "learning_rate": 8.042425251164553e-08, "loss": 0.6808, "step": 18379 }, { "epoch": 0.9445986226744784, "grad_norm": 1.131296992301941, "learning_rate": 8.027564975704838e-08, "loss": 0.6901, "step": 18380 }, { "epoch": 0.944650015417823, "grad_norm": 0.7593706846237183, "learning_rate": 8.012718330770997e-08, "loss": 0.6197, "step": 18381 }, { "epoch": 0.9447014081611677, "grad_norm": 1.0173735618591309, "learning_rate": 7.997885316774423e-08, "loss": 0.6394, "step": 18382 }, { "epoch": 0.9447528009045123, "grad_norm": 1.039408802986145, "learning_rate": 7.983065934126177e-08, "loss": 0.6665, "step": 18383 }, { "epoch": 0.944804193647857, "grad_norm": 1.1528152227401733, "learning_rate": 7.968260183236654e-08, "loss": 0.7075, "step": 18384 }, { "epoch": 0.9448555863912016, "grad_norm": 1.156407117843628, "learning_rate": 7.95346806451619e-08, "loss": 0.6699, "step": 18385 }, { "epoch": 0.9449069791345462, "grad_norm": 1.0548685789108276, "learning_rate": 7.938689578374514e-08, "loss": 0.6925, "step": 18386 }, { "epoch": 0.9449583718778909, "grad_norm": 1.0613758563995361, "learning_rate": 7.923924725221188e-08, "loss": 0.663, "step": 18387 }, { "epoch": 0.9450097646212354, "grad_norm": 1.060703992843628, "learning_rate": 7.909173505465162e-08, "loss": 0.6922, "step": 18388 }, { "epoch": 0.9450611573645801, "grad_norm": 1.0817965269088745, "learning_rate": 7.894435919515275e-08, "loss": 0.6459, "step": 18389 }, { "epoch": 0.9451125501079247, "grad_norm": 1.023704171180725, "learning_rate": 7.879711967779757e-08, "loss": 0.7078, "step": 18390 }, { "epoch": 0.9451639428512694, "grad_norm": 1.124657392501831, "learning_rate": 7.865001650666504e-08, "loss": 0.6715, "step": 18391 }, { "epoch": 0.945215335594614, "grad_norm": 0.6721488237380981, "learning_rate": 7.850304968583189e-08, "loss": 0.6756, "step": 18392 }, { "epoch": 0.9452667283379587, "grad_norm": 0.7647435069084167, "learning_rate": 7.835621921936931e-08, "loss": 0.6516, "step": 18393 }, { "epoch": 0.9453181210813033, "grad_norm": 1.546966314315796, "learning_rate": 7.820952511134516e-08, "loss": 0.6842, "step": 18394 }, { "epoch": 0.945369513824648, "grad_norm": 1.109662413597107, "learning_rate": 7.806296736582398e-08, "loss": 0.7224, "step": 18395 }, { "epoch": 0.9454209065679926, "grad_norm": 1.084359049797058, "learning_rate": 7.791654598686693e-08, "loss": 0.6387, "step": 18396 }, { "epoch": 0.9454722993113372, "grad_norm": 1.2082669734954834, "learning_rate": 7.777026097852968e-08, "loss": 0.6831, "step": 18397 }, { "epoch": 0.9455236920546819, "grad_norm": 0.7884851098060608, "learning_rate": 7.762411234486623e-08, "loss": 0.6896, "step": 18398 }, { "epoch": 0.9455750847980265, "grad_norm": 1.0399426221847534, "learning_rate": 7.74781000899244e-08, "loss": 0.6466, "step": 18399 }, { "epoch": 0.9456264775413712, "grad_norm": 1.0866239070892334, "learning_rate": 7.7332224217751e-08, "loss": 0.6741, "step": 18400 }, { "epoch": 0.9456778702847158, "grad_norm": 0.8036141395568848, "learning_rate": 7.718648473238666e-08, "loss": 0.6583, "step": 18401 }, { "epoch": 0.9457292630280605, "grad_norm": 1.0273306369781494, "learning_rate": 7.704088163786982e-08, "loss": 0.6924, "step": 18402 }, { "epoch": 0.9457806557714051, "grad_norm": 1.1397476196289062, "learning_rate": 7.689541493823393e-08, "loss": 0.7054, "step": 18403 }, { "epoch": 0.9458320485147497, "grad_norm": 0.7133983969688416, "learning_rate": 7.675008463750966e-08, "loss": 0.6529, "step": 18404 }, { "epoch": 0.9458834412580943, "grad_norm": 1.0985074043273926, "learning_rate": 7.660489073972433e-08, "loss": 0.6644, "step": 18405 }, { "epoch": 0.945934834001439, "grad_norm": 0.7019321322441101, "learning_rate": 7.645983324889861e-08, "loss": 0.6304, "step": 18406 }, { "epoch": 0.9459862267447836, "grad_norm": 1.1341408491134644, "learning_rate": 7.631491216905318e-08, "loss": 0.67, "step": 18407 }, { "epoch": 0.9460376194881283, "grad_norm": 1.1129119396209717, "learning_rate": 7.61701275042026e-08, "loss": 0.6939, "step": 18408 }, { "epoch": 0.9460890122314729, "grad_norm": 1.1298763751983643, "learning_rate": 7.602547925835923e-08, "loss": 0.6977, "step": 18409 }, { "epoch": 0.9461404049748176, "grad_norm": 1.1325587034225464, "learning_rate": 7.588096743552875e-08, "loss": 0.7151, "step": 18410 }, { "epoch": 0.9461917977181622, "grad_norm": 1.1187431812286377, "learning_rate": 7.573659203971628e-08, "loss": 0.6598, "step": 18411 }, { "epoch": 0.9462431904615068, "grad_norm": 1.1162960529327393, "learning_rate": 7.55923530749214e-08, "loss": 0.7255, "step": 18412 }, { "epoch": 0.9462945832048515, "grad_norm": 1.0391203165054321, "learning_rate": 7.544825054514094e-08, "loss": 0.7168, "step": 18413 }, { "epoch": 0.9463459759481961, "grad_norm": 0.7869401574134827, "learning_rate": 7.530428445436667e-08, "loss": 0.6264, "step": 18414 }, { "epoch": 0.9463973686915408, "grad_norm": 1.0711759328842163, "learning_rate": 7.516045480658818e-08, "loss": 0.6997, "step": 18415 }, { "epoch": 0.9464487614348854, "grad_norm": 1.1328225135803223, "learning_rate": 7.501676160578953e-08, "loss": 0.6689, "step": 18416 }, { "epoch": 0.9465001541782301, "grad_norm": 1.0806596279144287, "learning_rate": 7.487320485595195e-08, "loss": 0.7014, "step": 18417 }, { "epoch": 0.9465515469215747, "grad_norm": 1.107001781463623, "learning_rate": 7.472978456105396e-08, "loss": 0.7079, "step": 18418 }, { "epoch": 0.9466029396649193, "grad_norm": 1.094678521156311, "learning_rate": 7.458650072506734e-08, "loss": 0.6879, "step": 18419 }, { "epoch": 0.9466543324082639, "grad_norm": 1.179389476776123, "learning_rate": 7.44433533519634e-08, "loss": 0.7319, "step": 18420 }, { "epoch": 0.9467057251516086, "grad_norm": 1.1737526655197144, "learning_rate": 7.430034244570672e-08, "loss": 0.673, "step": 18421 }, { "epoch": 0.9467571178949532, "grad_norm": 0.7394821047782898, "learning_rate": 7.415746801026135e-08, "loss": 0.6793, "step": 18422 }, { "epoch": 0.9468085106382979, "grad_norm": 1.0772780179977417, "learning_rate": 7.401473004958415e-08, "loss": 0.6769, "step": 18423 }, { "epoch": 0.9468599033816425, "grad_norm": 1.0923675298690796, "learning_rate": 7.387212856763082e-08, "loss": 0.7749, "step": 18424 }, { "epoch": 0.9469112961249871, "grad_norm": 1.0914546251296997, "learning_rate": 7.37296635683521e-08, "loss": 0.6714, "step": 18425 }, { "epoch": 0.9469626888683318, "grad_norm": 1.1394906044006348, "learning_rate": 7.358733505569426e-08, "loss": 0.711, "step": 18426 }, { "epoch": 0.9470140816116764, "grad_norm": 1.0678943395614624, "learning_rate": 7.34451430336014e-08, "loss": 0.6717, "step": 18427 }, { "epoch": 0.9470654743550211, "grad_norm": 1.1875513792037964, "learning_rate": 7.330308750601312e-08, "loss": 0.6756, "step": 18428 }, { "epoch": 0.9471168670983657, "grad_norm": 1.0514755249023438, "learning_rate": 7.316116847686517e-08, "loss": 0.7395, "step": 18429 }, { "epoch": 0.9471682598417104, "grad_norm": 1.1398285627365112, "learning_rate": 7.301938595008883e-08, "loss": 0.7533, "step": 18430 }, { "epoch": 0.947219652585055, "grad_norm": 1.077080249786377, "learning_rate": 7.287773992961323e-08, "loss": 0.6521, "step": 18431 }, { "epoch": 0.9472710453283997, "grad_norm": 1.0949287414550781, "learning_rate": 7.27362304193624e-08, "loss": 0.7117, "step": 18432 }, { "epoch": 0.9473224380717443, "grad_norm": 0.8304964303970337, "learning_rate": 7.259485742325712e-08, "loss": 0.6638, "step": 18433 }, { "epoch": 0.9473738308150889, "grad_norm": 1.0441664457321167, "learning_rate": 7.245362094521424e-08, "loss": 0.7334, "step": 18434 }, { "epoch": 0.9474252235584335, "grad_norm": 1.149086594581604, "learning_rate": 7.23125209891462e-08, "loss": 0.7262, "step": 18435 }, { "epoch": 0.9474766163017782, "grad_norm": 1.147416591644287, "learning_rate": 7.217155755896321e-08, "loss": 0.7572, "step": 18436 }, { "epoch": 0.9475280090451228, "grad_norm": 1.0969284772872925, "learning_rate": 7.203073065857046e-08, "loss": 0.7028, "step": 18437 }, { "epoch": 0.9475794017884674, "grad_norm": 1.163584589958191, "learning_rate": 7.189004029186986e-08, "loss": 0.6995, "step": 18438 }, { "epoch": 0.9476307945318121, "grad_norm": 1.0175880193710327, "learning_rate": 7.174948646275826e-08, "loss": 0.6561, "step": 18439 }, { "epoch": 0.9476821872751567, "grad_norm": 1.1271517276763916, "learning_rate": 7.160906917513144e-08, "loss": 0.6774, "step": 18440 }, { "epoch": 0.9477335800185014, "grad_norm": 1.194360375404358, "learning_rate": 7.146878843287907e-08, "loss": 0.6766, "step": 18441 }, { "epoch": 0.947784972761846, "grad_norm": 1.1572412252426147, "learning_rate": 7.132864423988695e-08, "loss": 0.699, "step": 18442 }, { "epoch": 0.9478363655051907, "grad_norm": 1.0989176034927368, "learning_rate": 7.118863660003972e-08, "loss": 0.6701, "step": 18443 }, { "epoch": 0.9478877582485353, "grad_norm": 1.117856502532959, "learning_rate": 7.104876551721485e-08, "loss": 0.7303, "step": 18444 }, { "epoch": 0.94793915099188, "grad_norm": 1.0269311666488647, "learning_rate": 7.090903099528812e-08, "loss": 0.7116, "step": 18445 }, { "epoch": 0.9479905437352246, "grad_norm": 1.1562519073486328, "learning_rate": 7.076943303813088e-08, "loss": 0.6809, "step": 18446 }, { "epoch": 0.9480419364785693, "grad_norm": 1.0489423274993896, "learning_rate": 7.062997164961116e-08, "loss": 0.7346, "step": 18447 }, { "epoch": 0.9480933292219139, "grad_norm": 1.0649052858352661, "learning_rate": 7.049064683359252e-08, "loss": 0.6867, "step": 18448 }, { "epoch": 0.9481447219652585, "grad_norm": 1.0856635570526123, "learning_rate": 7.035145859393521e-08, "loss": 0.7256, "step": 18449 }, { "epoch": 0.9481961147086031, "grad_norm": 1.096313714981079, "learning_rate": 7.021240693449561e-08, "loss": 0.6991, "step": 18450 }, { "epoch": 0.9482475074519477, "grad_norm": 1.1210533380508423, "learning_rate": 7.007349185912616e-08, "loss": 0.6776, "step": 18451 }, { "epoch": 0.9482989001952924, "grad_norm": 1.1090292930603027, "learning_rate": 6.99347133716749e-08, "loss": 0.7521, "step": 18452 }, { "epoch": 0.948350292938637, "grad_norm": 1.0571128129959106, "learning_rate": 6.979607147598821e-08, "loss": 0.7019, "step": 18453 }, { "epoch": 0.9484016856819817, "grad_norm": 1.097296953201294, "learning_rate": 6.965756617590691e-08, "loss": 0.7167, "step": 18454 }, { "epoch": 0.9484530784253263, "grad_norm": 1.053031086921692, "learning_rate": 6.951919747526737e-08, "loss": 0.7015, "step": 18455 }, { "epoch": 0.948504471168671, "grad_norm": 1.0557738542556763, "learning_rate": 6.938096537790484e-08, "loss": 0.7166, "step": 18456 }, { "epoch": 0.9485558639120156, "grad_norm": 1.0871689319610596, "learning_rate": 6.924286988764795e-08, "loss": 0.6804, "step": 18457 }, { "epoch": 0.9486072566553603, "grad_norm": 0.8251727819442749, "learning_rate": 6.910491100832306e-08, "loss": 0.5833, "step": 18458 }, { "epoch": 0.9486586493987049, "grad_norm": 1.1040583848953247, "learning_rate": 6.896708874375213e-08, "loss": 0.7224, "step": 18459 }, { "epoch": 0.9487100421420496, "grad_norm": 1.0890392065048218, "learning_rate": 6.882940309775487e-08, "loss": 0.7274, "step": 18460 }, { "epoch": 0.9487614348853942, "grad_norm": 1.268336296081543, "learning_rate": 6.86918540741438e-08, "loss": 0.6523, "step": 18461 }, { "epoch": 0.9488128276287389, "grad_norm": 1.0202683210372925, "learning_rate": 6.855444167673197e-08, "loss": 0.6574, "step": 18462 }, { "epoch": 0.9488642203720835, "grad_norm": 0.7436390519142151, "learning_rate": 6.841716590932579e-08, "loss": 0.622, "step": 18463 }, { "epoch": 0.948915613115428, "grad_norm": 1.1500734090805054, "learning_rate": 6.828002677572831e-08, "loss": 0.6943, "step": 18464 }, { "epoch": 0.9489670058587727, "grad_norm": 0.7075164318084717, "learning_rate": 6.814302427973929e-08, "loss": 0.6018, "step": 18465 }, { "epoch": 0.9490183986021173, "grad_norm": 1.1226712465286255, "learning_rate": 6.800615842515457e-08, "loss": 0.7104, "step": 18466 }, { "epoch": 0.949069791345462, "grad_norm": 1.0663801431655884, "learning_rate": 6.786942921576611e-08, "loss": 0.6888, "step": 18467 }, { "epoch": 0.9491211840888066, "grad_norm": 1.1259154081344604, "learning_rate": 6.773283665536145e-08, "loss": 0.7188, "step": 18468 }, { "epoch": 0.9491725768321513, "grad_norm": 1.0178067684173584, "learning_rate": 6.759638074772645e-08, "loss": 0.6545, "step": 18469 }, { "epoch": 0.9492239695754959, "grad_norm": 1.0998835563659668, "learning_rate": 6.746006149664086e-08, "loss": 0.6667, "step": 18470 }, { "epoch": 0.9492753623188406, "grad_norm": 0.8189653754234314, "learning_rate": 6.73238789058811e-08, "loss": 0.626, "step": 18471 }, { "epoch": 0.9493267550621852, "grad_norm": 1.165387511253357, "learning_rate": 6.718783297922082e-08, "loss": 0.707, "step": 18472 }, { "epoch": 0.9493781478055299, "grad_norm": 1.1666316986083984, "learning_rate": 6.705192372042923e-08, "loss": 0.7033, "step": 18473 }, { "epoch": 0.9494295405488745, "grad_norm": 1.0873501300811768, "learning_rate": 6.691615113327165e-08, "loss": 0.6716, "step": 18474 }, { "epoch": 0.9494809332922192, "grad_norm": 0.7831141948699951, "learning_rate": 6.678051522151008e-08, "loss": 0.6287, "step": 18475 }, { "epoch": 0.9495323260355638, "grad_norm": 1.0405778884887695, "learning_rate": 6.66450159889026e-08, "loss": 0.6559, "step": 18476 }, { "epoch": 0.9495837187789085, "grad_norm": 0.7674185037612915, "learning_rate": 6.650965343920235e-08, "loss": 0.6167, "step": 18477 }, { "epoch": 0.9496351115222531, "grad_norm": 0.745793879032135, "learning_rate": 6.63744275761602e-08, "loss": 0.6089, "step": 18478 }, { "epoch": 0.9496865042655978, "grad_norm": 1.1060012578964233, "learning_rate": 6.62393384035237e-08, "loss": 0.651, "step": 18479 }, { "epoch": 0.9497378970089423, "grad_norm": 1.1515978574752808, "learning_rate": 6.610438592503376e-08, "loss": 0.7066, "step": 18480 }, { "epoch": 0.9497892897522869, "grad_norm": 0.7230024337768555, "learning_rate": 6.596957014443073e-08, "loss": 0.6767, "step": 18481 }, { "epoch": 0.9498406824956316, "grad_norm": 1.0488051176071167, "learning_rate": 6.583489106544938e-08, "loss": 0.6732, "step": 18482 }, { "epoch": 0.9498920752389762, "grad_norm": 1.0918362140655518, "learning_rate": 6.570034869182062e-08, "loss": 0.671, "step": 18483 }, { "epoch": 0.9499434679823209, "grad_norm": 1.0410696268081665, "learning_rate": 6.556594302727259e-08, "loss": 0.7569, "step": 18484 }, { "epoch": 0.9499948607256655, "grad_norm": 1.0427526235580444, "learning_rate": 6.543167407552953e-08, "loss": 0.6488, "step": 18485 }, { "epoch": 0.9500462534690102, "grad_norm": 1.0695544481277466, "learning_rate": 6.529754184031123e-08, "loss": 0.6999, "step": 18486 }, { "epoch": 0.9500976462123548, "grad_norm": 1.0486271381378174, "learning_rate": 6.516354632533361e-08, "loss": 0.7023, "step": 18487 }, { "epoch": 0.9501490389556995, "grad_norm": 1.1256617307662964, "learning_rate": 6.502968753430871e-08, "loss": 0.6925, "step": 18488 }, { "epoch": 0.9502004316990441, "grad_norm": 1.1408568620681763, "learning_rate": 6.489596547094634e-08, "loss": 0.6998, "step": 18489 }, { "epoch": 0.9502518244423888, "grad_norm": 1.0978319644927979, "learning_rate": 6.476238013895019e-08, "loss": 0.7172, "step": 18490 }, { "epoch": 0.9503032171857334, "grad_norm": 1.1008049249649048, "learning_rate": 6.462893154202232e-08, "loss": 0.7479, "step": 18491 }, { "epoch": 0.950354609929078, "grad_norm": 1.0873156785964966, "learning_rate": 6.449561968385976e-08, "loss": 0.724, "step": 18492 }, { "epoch": 0.9504060026724227, "grad_norm": 1.0022153854370117, "learning_rate": 6.436244456815566e-08, "loss": 0.6398, "step": 18493 }, { "epoch": 0.9504573954157673, "grad_norm": 1.0211819410324097, "learning_rate": 6.42294061986004e-08, "loss": 0.6903, "step": 18494 }, { "epoch": 0.9505087881591119, "grad_norm": 1.1302131414413452, "learning_rate": 6.40965045788794e-08, "loss": 0.6584, "step": 18495 }, { "epoch": 0.9505601809024565, "grad_norm": 1.1093788146972656, "learning_rate": 6.396373971267523e-08, "loss": 0.6889, "step": 18496 }, { "epoch": 0.9506115736458012, "grad_norm": 1.0910133123397827, "learning_rate": 6.383111160366551e-08, "loss": 0.6918, "step": 18497 }, { "epoch": 0.9506629663891458, "grad_norm": 1.0377365350723267, "learning_rate": 6.36986202555262e-08, "loss": 0.7185, "step": 18498 }, { "epoch": 0.9507143591324905, "grad_norm": 1.1549595594406128, "learning_rate": 6.356626567192603e-08, "loss": 0.6857, "step": 18499 }, { "epoch": 0.9507657518758351, "grad_norm": 1.1532100439071655, "learning_rate": 6.343404785653373e-08, "loss": 0.691, "step": 18500 }, { "epoch": 0.9508171446191798, "grad_norm": 1.0925688743591309, "learning_rate": 6.330196681301192e-08, "loss": 0.7071, "step": 18501 }, { "epoch": 0.9508685373625244, "grad_norm": 0.7527129054069519, "learning_rate": 6.317002254502047e-08, "loss": 0.6358, "step": 18502 }, { "epoch": 0.9509199301058691, "grad_norm": 1.0495054721832275, "learning_rate": 6.303821505621421e-08, "loss": 0.6567, "step": 18503 }, { "epoch": 0.9509713228492137, "grad_norm": 1.1129714250564575, "learning_rate": 6.290654435024523e-08, "loss": 0.6739, "step": 18504 }, { "epoch": 0.9510227155925584, "grad_norm": 1.1473026275634766, "learning_rate": 6.277501043076229e-08, "loss": 0.7202, "step": 18505 }, { "epoch": 0.951074108335903, "grad_norm": 1.154187560081482, "learning_rate": 6.264361330140856e-08, "loss": 0.7749, "step": 18506 }, { "epoch": 0.9511255010792476, "grad_norm": 1.1078130006790161, "learning_rate": 6.251235296582503e-08, "loss": 0.6846, "step": 18507 }, { "epoch": 0.9511768938225923, "grad_norm": 1.0758453607559204, "learning_rate": 6.23812294276488e-08, "loss": 0.6819, "step": 18508 }, { "epoch": 0.9512282865659369, "grad_norm": 1.125129222869873, "learning_rate": 6.225024269051194e-08, "loss": 0.711, "step": 18509 }, { "epoch": 0.9512796793092815, "grad_norm": 0.9998478293418884, "learning_rate": 6.211939275804379e-08, "loss": 0.5959, "step": 18510 }, { "epoch": 0.9513310720526261, "grad_norm": 1.058196783065796, "learning_rate": 6.19886796338709e-08, "loss": 0.6995, "step": 18511 }, { "epoch": 0.9513824647959708, "grad_norm": 1.0488908290863037, "learning_rate": 6.185810332161257e-08, "loss": 0.7032, "step": 18512 }, { "epoch": 0.9514338575393154, "grad_norm": 1.2267917394638062, "learning_rate": 6.172766382488815e-08, "loss": 0.7711, "step": 18513 }, { "epoch": 0.9514852502826601, "grad_norm": 1.1512950658798218, "learning_rate": 6.159736114731141e-08, "loss": 0.7681, "step": 18514 }, { "epoch": 0.9515366430260047, "grad_norm": 1.1782149076461792, "learning_rate": 6.146719529249168e-08, "loss": 0.7714, "step": 18515 }, { "epoch": 0.9515880357693494, "grad_norm": 1.1275291442871094, "learning_rate": 6.133716626403607e-08, "loss": 0.7664, "step": 18516 }, { "epoch": 0.951639428512694, "grad_norm": 1.0166822671890259, "learning_rate": 6.120727406554672e-08, "loss": 0.6924, "step": 18517 }, { "epoch": 0.9516908212560387, "grad_norm": 1.0569884777069092, "learning_rate": 6.107751870062296e-08, "loss": 0.7543, "step": 18518 }, { "epoch": 0.9517422139993833, "grad_norm": 1.1686623096466064, "learning_rate": 6.094790017285911e-08, "loss": 0.7537, "step": 18519 }, { "epoch": 0.951793606742728, "grad_norm": 1.0049107074737549, "learning_rate": 6.08184184858468e-08, "loss": 0.649, "step": 18520 }, { "epoch": 0.9518449994860726, "grad_norm": 1.090505838394165, "learning_rate": 6.068907364317367e-08, "loss": 0.7165, "step": 18521 }, { "epoch": 0.9518963922294172, "grad_norm": 1.0637328624725342, "learning_rate": 6.055986564842242e-08, "loss": 0.734, "step": 18522 }, { "epoch": 0.9519477849727619, "grad_norm": 1.1752525568008423, "learning_rate": 6.043079450517353e-08, "loss": 0.7089, "step": 18523 }, { "epoch": 0.9519991777161065, "grad_norm": 1.0466455221176147, "learning_rate": 6.030186021700301e-08, "loss": 0.6929, "step": 18524 }, { "epoch": 0.9520505704594511, "grad_norm": 1.2045466899871826, "learning_rate": 6.017306278748303e-08, "loss": 0.7534, "step": 18525 }, { "epoch": 0.9521019632027957, "grad_norm": 1.0915586948394775, "learning_rate": 6.004440222018181e-08, "loss": 0.6362, "step": 18526 }, { "epoch": 0.9521533559461404, "grad_norm": 1.0318011045455933, "learning_rate": 5.991587851866488e-08, "loss": 0.6808, "step": 18527 }, { "epoch": 0.952204748689485, "grad_norm": 1.1518017053604126, "learning_rate": 5.978749168649212e-08, "loss": 0.7415, "step": 18528 }, { "epoch": 0.9522561414328297, "grad_norm": 1.1035850048065186, "learning_rate": 5.965924172722071e-08, "loss": 0.6648, "step": 18529 }, { "epoch": 0.9523075341761743, "grad_norm": 1.0127605199813843, "learning_rate": 5.953112864440391e-08, "loss": 0.6659, "step": 18530 }, { "epoch": 0.952358926919519, "grad_norm": 1.1557966470718384, "learning_rate": 5.9403152441592224e-08, "loss": 0.7113, "step": 18531 }, { "epoch": 0.9524103196628636, "grad_norm": 0.8569692969322205, "learning_rate": 5.927531312232948e-08, "loss": 0.6355, "step": 18532 }, { "epoch": 0.9524617124062082, "grad_norm": 1.1663459539413452, "learning_rate": 5.914761069015951e-08, "loss": 0.6925, "step": 18533 }, { "epoch": 0.9525131051495529, "grad_norm": 1.171074628829956, "learning_rate": 5.9020045148619474e-08, "loss": 0.716, "step": 18534 }, { "epoch": 0.9525644978928975, "grad_norm": 1.059232473373413, "learning_rate": 5.889261650124323e-08, "loss": 0.7402, "step": 18535 }, { "epoch": 0.9526158906362422, "grad_norm": 1.0407756567001343, "learning_rate": 5.876532475156238e-08, "loss": 0.6882, "step": 18536 }, { "epoch": 0.9526672833795868, "grad_norm": 1.0759800672531128, "learning_rate": 5.863816990310245e-08, "loss": 0.6955, "step": 18537 }, { "epoch": 0.9527186761229315, "grad_norm": 1.0647433996200562, "learning_rate": 5.8511151959387836e-08, "loss": 0.7005, "step": 18538 }, { "epoch": 0.9527700688662761, "grad_norm": 1.098070502281189, "learning_rate": 5.838427092393628e-08, "loss": 0.6648, "step": 18539 }, { "epoch": 0.9528214616096207, "grad_norm": 1.0953816175460815, "learning_rate": 5.825752680026442e-08, "loss": 0.7157, "step": 18540 }, { "epoch": 0.9528728543529653, "grad_norm": 1.0858277082443237, "learning_rate": 5.8130919591882215e-08, "loss": 0.7058, "step": 18541 }, { "epoch": 0.95292424709631, "grad_norm": 1.1203078031539917, "learning_rate": 5.800444930229909e-08, "loss": 0.6798, "step": 18542 }, { "epoch": 0.9529756398396546, "grad_norm": 1.0937460660934448, "learning_rate": 5.78781159350178e-08, "loss": 0.6466, "step": 18543 }, { "epoch": 0.9530270325829993, "grad_norm": 1.0030782222747803, "learning_rate": 5.7751919493538865e-08, "loss": 0.6236, "step": 18544 }, { "epoch": 0.9530784253263439, "grad_norm": 1.0455782413482666, "learning_rate": 5.76258599813595e-08, "loss": 0.7133, "step": 18545 }, { "epoch": 0.9531298180696886, "grad_norm": 1.125820279121399, "learning_rate": 5.7499937401970796e-08, "loss": 0.7165, "step": 18546 }, { "epoch": 0.9531812108130332, "grad_norm": 1.0471553802490234, "learning_rate": 5.7374151758863296e-08, "loss": 0.6555, "step": 18547 }, { "epoch": 0.9532326035563778, "grad_norm": 1.0910669565200806, "learning_rate": 5.724850305551977e-08, "loss": 0.67, "step": 18548 }, { "epoch": 0.9532839962997225, "grad_norm": 0.7481269240379333, "learning_rate": 5.7122991295423534e-08, "loss": 0.6264, "step": 18549 }, { "epoch": 0.9533353890430671, "grad_norm": 1.1256262063980103, "learning_rate": 5.699761648205126e-08, "loss": 0.6816, "step": 18550 }, { "epoch": 0.9533867817864118, "grad_norm": 0.8002947568893433, "learning_rate": 5.687237861887629e-08, "loss": 0.6334, "step": 18551 }, { "epoch": 0.9534381745297564, "grad_norm": 1.1651090383529663, "learning_rate": 5.6747277709368055e-08, "loss": 0.6828, "step": 18552 }, { "epoch": 0.9534895672731011, "grad_norm": 1.054434895515442, "learning_rate": 5.662231375699379e-08, "loss": 0.6985, "step": 18553 }, { "epoch": 0.9535409600164457, "grad_norm": 1.073267936706543, "learning_rate": 5.649748676521516e-08, "loss": 0.6687, "step": 18554 }, { "epoch": 0.9535923527597903, "grad_norm": 1.1348817348480225, "learning_rate": 5.637279673748997e-08, "loss": 0.6842, "step": 18555 }, { "epoch": 0.9536437455031349, "grad_norm": 1.079379916191101, "learning_rate": 5.6248243677274326e-08, "loss": 0.6614, "step": 18556 }, { "epoch": 0.9536951382464796, "grad_norm": 1.1933705806732178, "learning_rate": 5.6123827588017155e-08, "loss": 0.6782, "step": 18557 }, { "epoch": 0.9537465309898242, "grad_norm": 1.1850683689117432, "learning_rate": 5.5999548473167355e-08, "loss": 0.7161, "step": 18558 }, { "epoch": 0.9537979237331689, "grad_norm": 0.9700796604156494, "learning_rate": 5.587540633616662e-08, "loss": 0.6576, "step": 18559 }, { "epoch": 0.9538493164765135, "grad_norm": 1.1091818809509277, "learning_rate": 5.5751401180456074e-08, "loss": 0.6841, "step": 18560 }, { "epoch": 0.9539007092198581, "grad_norm": 1.0881222486495972, "learning_rate": 5.5627533009469657e-08, "loss": 0.6912, "step": 18561 }, { "epoch": 0.9539521019632028, "grad_norm": 1.0959827899932861, "learning_rate": 5.5503801826640726e-08, "loss": 0.7079, "step": 18562 }, { "epoch": 0.9540034947065474, "grad_norm": 1.0534908771514893, "learning_rate": 5.538020763539653e-08, "loss": 0.7006, "step": 18563 }, { "epoch": 0.9540548874498921, "grad_norm": 1.0893689393997192, "learning_rate": 5.5256750439161565e-08, "loss": 0.723, "step": 18564 }, { "epoch": 0.9541062801932367, "grad_norm": 1.057265281677246, "learning_rate": 5.513343024135587e-08, "loss": 0.7079, "step": 18565 }, { "epoch": 0.9541576729365814, "grad_norm": 1.0557259321212769, "learning_rate": 5.50102470453967e-08, "loss": 0.7296, "step": 18566 }, { "epoch": 0.954209065679926, "grad_norm": 1.082780361175537, "learning_rate": 5.4887200854697455e-08, "loss": 0.6796, "step": 18567 }, { "epoch": 0.9542604584232707, "grad_norm": 1.0965237617492676, "learning_rate": 5.4764291672665395e-08, "loss": 0.6664, "step": 18568 }, { "epoch": 0.9543118511666153, "grad_norm": 1.02618408203125, "learning_rate": 5.464151950270835e-08, "loss": 0.6697, "step": 18569 }, { "epoch": 0.95436324390996, "grad_norm": 1.0253819227218628, "learning_rate": 5.451888434822583e-08, "loss": 0.6777, "step": 18570 }, { "epoch": 0.9544146366533045, "grad_norm": 1.0898034572601318, "learning_rate": 5.4396386212616225e-08, "loss": 0.7201, "step": 18571 }, { "epoch": 0.9544660293966492, "grad_norm": 0.7667589783668518, "learning_rate": 5.427402509927404e-08, "loss": 0.648, "step": 18572 }, { "epoch": 0.9545174221399938, "grad_norm": 1.1205601692199707, "learning_rate": 5.415180101158823e-08, "loss": 0.6971, "step": 18573 }, { "epoch": 0.9545688148833384, "grad_norm": 1.1298917531967163, "learning_rate": 5.402971395294554e-08, "loss": 0.7141, "step": 18574 }, { "epoch": 0.9546202076266831, "grad_norm": 1.2310457229614258, "learning_rate": 5.3907763926728804e-08, "loss": 0.6687, "step": 18575 }, { "epoch": 0.9546716003700277, "grad_norm": 1.1133778095245361, "learning_rate": 5.3785950936317e-08, "loss": 0.7137, "step": 18576 }, { "epoch": 0.9547229931133724, "grad_norm": 1.1581921577453613, "learning_rate": 5.366427498508409e-08, "loss": 0.7358, "step": 18577 }, { "epoch": 0.954774385856717, "grad_norm": 1.101252555847168, "learning_rate": 5.3542736076402366e-08, "loss": 0.7525, "step": 18578 }, { "epoch": 0.9548257786000617, "grad_norm": 1.1212066411972046, "learning_rate": 5.3421334213638597e-08, "loss": 0.6701, "step": 18579 }, { "epoch": 0.9548771713434063, "grad_norm": 1.0468226671218872, "learning_rate": 5.330006940015564e-08, "loss": 0.7034, "step": 18580 }, { "epoch": 0.954928564086751, "grad_norm": 1.1018781661987305, "learning_rate": 5.317894163931414e-08, "loss": 0.6954, "step": 18581 }, { "epoch": 0.9549799568300956, "grad_norm": 0.666149914264679, "learning_rate": 5.3057950934470856e-08, "loss": 0.6264, "step": 18582 }, { "epoch": 0.9550313495734403, "grad_norm": 1.0723998546600342, "learning_rate": 5.293709728897589e-08, "loss": 0.7156, "step": 18583 }, { "epoch": 0.9550827423167849, "grad_norm": 1.2199606895446777, "learning_rate": 5.281638070617878e-08, "loss": 0.6588, "step": 18584 }, { "epoch": 0.9551341350601296, "grad_norm": 1.0921270847320557, "learning_rate": 5.269580118942408e-08, "loss": 0.7056, "step": 18585 }, { "epoch": 0.9551855278034741, "grad_norm": 1.1010504961013794, "learning_rate": 5.257535874205244e-08, "loss": 0.7068, "step": 18586 }, { "epoch": 0.9552369205468187, "grad_norm": 1.134931206703186, "learning_rate": 5.245505336740064e-08, "loss": 0.7145, "step": 18587 }, { "epoch": 0.9552883132901634, "grad_norm": 0.7802860140800476, "learning_rate": 5.2334885068802135e-08, "loss": 0.6304, "step": 18588 }, { "epoch": 0.955339706033508, "grad_norm": 1.088397741317749, "learning_rate": 5.2214853849586465e-08, "loss": 0.7122, "step": 18589 }, { "epoch": 0.9553910987768527, "grad_norm": 1.1143532991409302, "learning_rate": 5.209495971307821e-08, "loss": 0.7046, "step": 18590 }, { "epoch": 0.9554424915201973, "grad_norm": 1.1202495098114014, "learning_rate": 5.197520266260081e-08, "loss": 0.712, "step": 18591 }, { "epoch": 0.955493884263542, "grad_norm": 1.0739589929580688, "learning_rate": 5.1855582701469955e-08, "loss": 0.6694, "step": 18592 }, { "epoch": 0.9555452770068866, "grad_norm": 1.1087214946746826, "learning_rate": 5.1736099833001876e-08, "loss": 0.7205, "step": 18593 }, { "epoch": 0.9555966697502313, "grad_norm": 1.1017102003097534, "learning_rate": 5.161675406050615e-08, "loss": 0.7547, "step": 18594 }, { "epoch": 0.9556480624935759, "grad_norm": 0.7690007090568542, "learning_rate": 5.149754538728902e-08, "loss": 0.612, "step": 18595 }, { "epoch": 0.9556994552369206, "grad_norm": 0.8100370168685913, "learning_rate": 5.137847381665451e-08, "loss": 0.6654, "step": 18596 }, { "epoch": 0.9557508479802652, "grad_norm": 1.0310572385787964, "learning_rate": 5.1259539351899976e-08, "loss": 0.7062, "step": 18597 }, { "epoch": 0.9558022407236099, "grad_norm": 1.1250792741775513, "learning_rate": 5.1140741996321685e-08, "loss": 0.6585, "step": 18598 }, { "epoch": 0.9558536334669545, "grad_norm": 1.0422991514205933, "learning_rate": 5.102208175321033e-08, "loss": 0.7023, "step": 18599 }, { "epoch": 0.9559050262102992, "grad_norm": 1.1391725540161133, "learning_rate": 5.090355862585439e-08, "loss": 0.7041, "step": 18600 }, { "epoch": 0.9559564189536437, "grad_norm": 0.7220892310142517, "learning_rate": 5.078517261753679e-08, "loss": 0.5987, "step": 18601 }, { "epoch": 0.9560078116969883, "grad_norm": 0.6920177936553955, "learning_rate": 5.06669237315377e-08, "loss": 0.65, "step": 18602 }, { "epoch": 0.956059204440333, "grad_norm": 1.032349944114685, "learning_rate": 5.054881197113337e-08, "loss": 0.6448, "step": 18603 }, { "epoch": 0.9561105971836776, "grad_norm": 1.101815104484558, "learning_rate": 5.043083733959675e-08, "loss": 0.7323, "step": 18604 }, { "epoch": 0.9561619899270223, "grad_norm": 1.0962743759155273, "learning_rate": 5.031299984019633e-08, "loss": 0.6853, "step": 18605 }, { "epoch": 0.9562133826703669, "grad_norm": 1.0746657848358154, "learning_rate": 5.0195299476195616e-08, "loss": 0.7137, "step": 18606 }, { "epoch": 0.9562647754137116, "grad_norm": 1.0905104875564575, "learning_rate": 5.007773625085699e-08, "loss": 0.7003, "step": 18607 }, { "epoch": 0.9563161681570562, "grad_norm": 1.0539453029632568, "learning_rate": 4.996031016743674e-08, "loss": 0.6621, "step": 18608 }, { "epoch": 0.9563675609004009, "grad_norm": 1.0639744997024536, "learning_rate": 4.9843021229189495e-08, "loss": 0.6989, "step": 18609 }, { "epoch": 0.9564189536437455, "grad_norm": 1.128825068473816, "learning_rate": 4.9725869439363195e-08, "loss": 0.7088, "step": 18610 }, { "epoch": 0.9564703463870902, "grad_norm": 1.1343121528625488, "learning_rate": 4.960885480120525e-08, "loss": 0.662, "step": 18611 }, { "epoch": 0.9565217391304348, "grad_norm": 1.107842206954956, "learning_rate": 4.949197731795641e-08, "loss": 0.6836, "step": 18612 }, { "epoch": 0.9565731318737795, "grad_norm": 1.1908289194107056, "learning_rate": 4.937523699285518e-08, "loss": 0.7169, "step": 18613 }, { "epoch": 0.9566245246171241, "grad_norm": 1.1074457168579102, "learning_rate": 4.925863382913676e-08, "loss": 0.7086, "step": 18614 }, { "epoch": 0.9566759173604688, "grad_norm": 1.1247050762176514, "learning_rate": 4.914216783003079e-08, "loss": 0.6628, "step": 18615 }, { "epoch": 0.9567273101038133, "grad_norm": 0.801388680934906, "learning_rate": 4.902583899876357e-08, "loss": 0.6696, "step": 18616 }, { "epoch": 0.9567787028471579, "grad_norm": 1.1273503303527832, "learning_rate": 4.890964733855974e-08, "loss": 0.7419, "step": 18617 }, { "epoch": 0.9568300955905026, "grad_norm": 1.1985262632369995, "learning_rate": 4.879359285263785e-08, "loss": 0.7112, "step": 18618 }, { "epoch": 0.9568814883338472, "grad_norm": 1.101425290107727, "learning_rate": 4.867767554421199e-08, "loss": 0.6751, "step": 18619 }, { "epoch": 0.9569328810771919, "grad_norm": 1.1299301385879517, "learning_rate": 4.856189541649625e-08, "loss": 0.7197, "step": 18620 }, { "epoch": 0.9569842738205365, "grad_norm": 1.1485008001327515, "learning_rate": 4.844625247269585e-08, "loss": 0.7466, "step": 18621 }, { "epoch": 0.9570356665638812, "grad_norm": 1.1370301246643066, "learning_rate": 4.833074671601656e-08, "loss": 0.7132, "step": 18622 }, { "epoch": 0.9570870593072258, "grad_norm": 1.148215413093567, "learning_rate": 4.821537814965804e-08, "loss": 0.707, "step": 18623 }, { "epoch": 0.9571384520505705, "grad_norm": 1.044503092765808, "learning_rate": 4.810014677681607e-08, "loss": 0.6896, "step": 18624 }, { "epoch": 0.9571898447939151, "grad_norm": 1.0706899166107178, "learning_rate": 4.7985052600683644e-08, "loss": 0.6793, "step": 18625 }, { "epoch": 0.9572412375372598, "grad_norm": 1.0646222829818726, "learning_rate": 4.787009562445044e-08, "loss": 0.6842, "step": 18626 }, { "epoch": 0.9572926302806044, "grad_norm": 0.7521428465843201, "learning_rate": 4.775527585130002e-08, "loss": 0.6474, "step": 18627 }, { "epoch": 0.957344023023949, "grad_norm": 1.0449061393737793, "learning_rate": 4.7640593284414283e-08, "loss": 0.6459, "step": 18628 }, { "epoch": 0.9573954157672937, "grad_norm": 1.061620831489563, "learning_rate": 4.7526047926970686e-08, "loss": 0.7335, "step": 18629 }, { "epoch": 0.9574468085106383, "grad_norm": 1.1447478532791138, "learning_rate": 4.741163978214281e-08, "loss": 0.7452, "step": 18630 }, { "epoch": 0.9574982012539829, "grad_norm": 1.1448407173156738, "learning_rate": 4.7297368853100325e-08, "loss": 0.6594, "step": 18631 }, { "epoch": 0.9575495939973275, "grad_norm": 1.2221343517303467, "learning_rate": 4.718323514300849e-08, "loss": 0.7158, "step": 18632 }, { "epoch": 0.9576009867406722, "grad_norm": 1.0822041034698486, "learning_rate": 4.706923865503088e-08, "loss": 0.6869, "step": 18633 }, { "epoch": 0.9576523794840168, "grad_norm": 1.082029938697815, "learning_rate": 4.695537939232442e-08, "loss": 0.6735, "step": 18634 }, { "epoch": 0.9577037722273615, "grad_norm": 1.0523247718811035, "learning_rate": 4.684165735804491e-08, "loss": 0.6601, "step": 18635 }, { "epoch": 0.9577551649707061, "grad_norm": 1.1622936725616455, "learning_rate": 4.6728072555342616e-08, "loss": 0.7172, "step": 18636 }, { "epoch": 0.9578065577140508, "grad_norm": 0.7009083032608032, "learning_rate": 4.661462498736391e-08, "loss": 0.6466, "step": 18637 }, { "epoch": 0.9578579504573954, "grad_norm": 1.0581218004226685, "learning_rate": 4.650131465725294e-08, "loss": 0.7291, "step": 18638 }, { "epoch": 0.9579093432007401, "grad_norm": 0.7652928829193115, "learning_rate": 4.638814156814886e-08, "loss": 0.6236, "step": 18639 }, { "epoch": 0.9579607359440847, "grad_norm": 1.0943200588226318, "learning_rate": 4.627510572318694e-08, "loss": 0.7053, "step": 18640 }, { "epoch": 0.9580121286874294, "grad_norm": 1.1425758600234985, "learning_rate": 4.616220712549857e-08, "loss": 0.6497, "step": 18641 }, { "epoch": 0.958063521430774, "grad_norm": 1.0686031579971313, "learning_rate": 4.604944577821235e-08, "loss": 0.6867, "step": 18642 }, { "epoch": 0.9581149141741186, "grad_norm": 1.1265840530395508, "learning_rate": 4.593682168445246e-08, "loss": 0.7082, "step": 18643 }, { "epoch": 0.9581663069174633, "grad_norm": 1.138424277305603, "learning_rate": 4.582433484733917e-08, "loss": 0.7235, "step": 18644 }, { "epoch": 0.9582176996608079, "grad_norm": 1.1426639556884766, "learning_rate": 4.571198526998832e-08, "loss": 0.6913, "step": 18645 }, { "epoch": 0.9582690924041525, "grad_norm": 0.7291485071182251, "learning_rate": 4.5599772955514096e-08, "loss": 0.6014, "step": 18646 }, { "epoch": 0.9583204851474971, "grad_norm": 0.8170145750045776, "learning_rate": 4.5487697907024006e-08, "loss": 0.6222, "step": 18647 }, { "epoch": 0.9583718778908418, "grad_norm": 1.0010935068130493, "learning_rate": 4.53757601276239e-08, "loss": 0.6867, "step": 18648 }, { "epoch": 0.9584232706341864, "grad_norm": 1.1479108333587646, "learning_rate": 4.5263959620414633e-08, "loss": 0.6372, "step": 18649 }, { "epoch": 0.9584746633775311, "grad_norm": 1.09955894947052, "learning_rate": 4.5152296388494276e-08, "loss": 0.6808, "step": 18650 }, { "epoch": 0.9585260561208757, "grad_norm": 1.039372205734253, "learning_rate": 4.504077043495592e-08, "loss": 0.6703, "step": 18651 }, { "epoch": 0.9585774488642204, "grad_norm": 0.8475385904312134, "learning_rate": 4.492938176289041e-08, "loss": 0.6748, "step": 18652 }, { "epoch": 0.958628841607565, "grad_norm": 1.1384634971618652, "learning_rate": 4.481813037538363e-08, "loss": 0.709, "step": 18653 }, { "epoch": 0.9586802343509097, "grad_norm": 0.7860634326934814, "learning_rate": 4.470701627551699e-08, "loss": 0.6332, "step": 18654 }, { "epoch": 0.9587316270942543, "grad_norm": 1.1513521671295166, "learning_rate": 4.4596039466369704e-08, "loss": 0.6856, "step": 18655 }, { "epoch": 0.958783019837599, "grad_norm": 1.098516821861267, "learning_rate": 4.448519995101652e-08, "loss": 0.6498, "step": 18656 }, { "epoch": 0.9588344125809436, "grad_norm": 0.7560486197471619, "learning_rate": 4.437449773252889e-08, "loss": 0.6133, "step": 18657 }, { "epoch": 0.9588858053242882, "grad_norm": 1.203675627708435, "learning_rate": 4.426393281397212e-08, "loss": 0.6939, "step": 18658 }, { "epoch": 0.9589371980676329, "grad_norm": 2.11722469329834, "learning_rate": 4.415350519841155e-08, "loss": 0.6967, "step": 18659 }, { "epoch": 0.9589885908109775, "grad_norm": 1.1580649614334106, "learning_rate": 4.404321488890528e-08, "loss": 0.6797, "step": 18660 }, { "epoch": 0.9590399835543222, "grad_norm": 0.8317680358886719, "learning_rate": 4.3933061888509765e-08, "loss": 0.6352, "step": 18661 }, { "epoch": 0.9590913762976667, "grad_norm": 1.0601463317871094, "learning_rate": 4.3823046200277e-08, "loss": 0.6917, "step": 18662 }, { "epoch": 0.9591427690410114, "grad_norm": 1.0961825847625732, "learning_rate": 4.3713167827253986e-08, "loss": 0.7036, "step": 18663 }, { "epoch": 0.959194161784356, "grad_norm": 1.1072916984558105, "learning_rate": 4.360342677248608e-08, "loss": 0.7072, "step": 18664 }, { "epoch": 0.9592455545277007, "grad_norm": 1.132230281829834, "learning_rate": 4.349382303901306e-08, "loss": 0.7106, "step": 18665 }, { "epoch": 0.9592969472710453, "grad_norm": 1.0210397243499756, "learning_rate": 4.338435662987195e-08, "loss": 0.6692, "step": 18666 }, { "epoch": 0.95934834001439, "grad_norm": 0.7035905718803406, "learning_rate": 4.327502754809587e-08, "loss": 0.6078, "step": 18667 }, { "epoch": 0.9593997327577346, "grad_norm": 1.1251174211502075, "learning_rate": 4.316583579671352e-08, "loss": 0.7309, "step": 18668 }, { "epoch": 0.9594511255010792, "grad_norm": 1.049394130706787, "learning_rate": 4.3056781378750246e-08, "loss": 0.6852, "step": 18669 }, { "epoch": 0.9595025182444239, "grad_norm": 1.1074708700180054, "learning_rate": 4.294786429722697e-08, "loss": 0.7042, "step": 18670 }, { "epoch": 0.9595539109877685, "grad_norm": 1.0724937915802002, "learning_rate": 4.283908455516239e-08, "loss": 0.6443, "step": 18671 }, { "epoch": 0.9596053037311132, "grad_norm": 1.0756522417068481, "learning_rate": 4.273044215556965e-08, "loss": 0.7478, "step": 18672 }, { "epoch": 0.9596566964744578, "grad_norm": 1.1108465194702148, "learning_rate": 4.262193710145912e-08, "loss": 0.7439, "step": 18673 }, { "epoch": 0.9597080892178025, "grad_norm": 1.127938151359558, "learning_rate": 4.2513569395836176e-08, "loss": 0.7338, "step": 18674 }, { "epoch": 0.9597594819611471, "grad_norm": 1.1073179244995117, "learning_rate": 4.240533904170452e-08, "loss": 0.6913, "step": 18675 }, { "epoch": 0.9598108747044918, "grad_norm": 0.7691919803619385, "learning_rate": 4.2297246042061754e-08, "loss": 0.6461, "step": 18676 }, { "epoch": 0.9598622674478363, "grad_norm": 1.1023718118667603, "learning_rate": 4.2189290399903274e-08, "loss": 0.6794, "step": 18677 }, { "epoch": 0.959913660191181, "grad_norm": 0.8810712695121765, "learning_rate": 4.208147211822e-08, "loss": 0.6708, "step": 18678 }, { "epoch": 0.9599650529345256, "grad_norm": 1.1336606740951538, "learning_rate": 4.197379119999845e-08, "loss": 0.6516, "step": 18679 }, { "epoch": 0.9600164456778703, "grad_norm": 1.0686670541763306, "learning_rate": 4.186624764822289e-08, "loss": 0.6642, "step": 18680 }, { "epoch": 0.9600678384212149, "grad_norm": 1.115654706954956, "learning_rate": 4.175884146587261e-08, "loss": 0.6875, "step": 18681 }, { "epoch": 0.9601192311645596, "grad_norm": 0.7161726355552673, "learning_rate": 4.1651572655923564e-08, "loss": 0.6589, "step": 18682 }, { "epoch": 0.9601706239079042, "grad_norm": 1.1350669860839844, "learning_rate": 4.154444122134726e-08, "loss": 0.7162, "step": 18683 }, { "epoch": 0.9602220166512488, "grad_norm": 1.060953140258789, "learning_rate": 4.1437447165112446e-08, "loss": 0.6982, "step": 18684 }, { "epoch": 0.9602734093945935, "grad_norm": 1.0805357694625854, "learning_rate": 4.133059049018284e-08, "loss": 0.6512, "step": 18685 }, { "epoch": 0.9603248021379381, "grad_norm": 1.132425308227539, "learning_rate": 4.122387119951998e-08, "loss": 0.7607, "step": 18686 }, { "epoch": 0.9603761948812828, "grad_norm": 1.140392780303955, "learning_rate": 4.1117289296079274e-08, "loss": 0.6856, "step": 18687 }, { "epoch": 0.9604275876246274, "grad_norm": 0.7822204828262329, "learning_rate": 4.1010844782814475e-08, "loss": 0.606, "step": 18688 }, { "epoch": 0.9604789803679721, "grad_norm": 1.117435097694397, "learning_rate": 4.090453766267488e-08, "loss": 0.7681, "step": 18689 }, { "epoch": 0.9605303731113167, "grad_norm": 1.07142174243927, "learning_rate": 4.079836793860537e-08, "loss": 0.6953, "step": 18690 }, { "epoch": 0.9605817658546614, "grad_norm": 1.1133992671966553, "learning_rate": 4.0692335613548576e-08, "loss": 0.7242, "step": 18691 }, { "epoch": 0.9606331585980059, "grad_norm": 0.6592369079589844, "learning_rate": 4.058644069043993e-08, "loss": 0.6383, "step": 18692 }, { "epoch": 0.9606845513413506, "grad_norm": 1.2187731266021729, "learning_rate": 4.048068317221543e-08, "loss": 0.7294, "step": 18693 }, { "epoch": 0.9607359440846952, "grad_norm": 1.0944730043411255, "learning_rate": 4.037506306180439e-08, "loss": 0.6569, "step": 18694 }, { "epoch": 0.9607873368280399, "grad_norm": 1.1488984823226929, "learning_rate": 4.0269580362133354e-08, "loss": 0.7034, "step": 18695 }, { "epoch": 0.9608387295713845, "grad_norm": 1.1664592027664185, "learning_rate": 4.016423507612443e-08, "loss": 0.6774, "step": 18696 }, { "epoch": 0.9608901223147291, "grad_norm": 1.3573521375656128, "learning_rate": 4.005902720669641e-08, "loss": 0.7438, "step": 18697 }, { "epoch": 0.9609415150580738, "grad_norm": 1.0690838098526, "learning_rate": 3.995395675676472e-08, "loss": 0.6623, "step": 18698 }, { "epoch": 0.9609929078014184, "grad_norm": 0.8270371556282043, "learning_rate": 3.984902372923982e-08, "loss": 0.6056, "step": 18699 }, { "epoch": 0.9610443005447631, "grad_norm": 1.1175559759140015, "learning_rate": 3.9744228127028825e-08, "loss": 0.6835, "step": 18700 }, { "epoch": 0.9610956932881077, "grad_norm": 1.1107882261276245, "learning_rate": 3.963956995303553e-08, "loss": 0.6283, "step": 18701 }, { "epoch": 0.9611470860314524, "grad_norm": 1.188991665840149, "learning_rate": 3.953504921015983e-08, "loss": 0.6881, "step": 18702 }, { "epoch": 0.961198478774797, "grad_norm": 0.8681862950325012, "learning_rate": 3.943066590129663e-08, "loss": 0.5955, "step": 18703 }, { "epoch": 0.9612498715181417, "grad_norm": 1.1701823472976685, "learning_rate": 3.932642002933973e-08, "loss": 0.6943, "step": 18704 }, { "epoch": 0.9613012642614863, "grad_norm": 1.1131714582443237, "learning_rate": 3.922231159717516e-08, "loss": 0.735, "step": 18705 }, { "epoch": 0.961352657004831, "grad_norm": 1.1054378747940063, "learning_rate": 3.9118340607689484e-08, "loss": 0.6973, "step": 18706 }, { "epoch": 0.9614040497481755, "grad_norm": 1.0071227550506592, "learning_rate": 3.901450706376153e-08, "loss": 0.6695, "step": 18707 }, { "epoch": 0.9614554424915202, "grad_norm": 0.6596884727478027, "learning_rate": 3.891081096826899e-08, "loss": 0.6239, "step": 18708 }, { "epoch": 0.9615068352348648, "grad_norm": 1.1368803977966309, "learning_rate": 3.880725232408455e-08, "loss": 0.7025, "step": 18709 }, { "epoch": 0.9615582279782094, "grad_norm": 1.1497546434402466, "learning_rate": 3.870383113407761e-08, "loss": 0.7066, "step": 18710 }, { "epoch": 0.9616096207215541, "grad_norm": 1.006225347518921, "learning_rate": 3.860054740111363e-08, "loss": 0.6488, "step": 18711 }, { "epoch": 0.9616610134648987, "grad_norm": 1.1463208198547363, "learning_rate": 3.8497401128053114e-08, "loss": 0.6369, "step": 18712 }, { "epoch": 0.9617124062082434, "grad_norm": 0.6772070527076721, "learning_rate": 3.839439231775599e-08, "loss": 0.6253, "step": 18713 }, { "epoch": 0.961763798951588, "grad_norm": 1.136577844619751, "learning_rate": 3.829152097307387e-08, "loss": 0.6974, "step": 18714 }, { "epoch": 0.9618151916949327, "grad_norm": 1.1231223344802856, "learning_rate": 3.818878709685836e-08, "loss": 0.6748, "step": 18715 }, { "epoch": 0.9618665844382773, "grad_norm": 1.065050482749939, "learning_rate": 3.808619069195552e-08, "loss": 0.6619, "step": 18716 }, { "epoch": 0.961917977181622, "grad_norm": 1.0246723890304565, "learning_rate": 3.798373176120806e-08, "loss": 0.7145, "step": 18717 }, { "epoch": 0.9619693699249666, "grad_norm": 1.034804105758667, "learning_rate": 3.7881410307453735e-08, "loss": 0.6466, "step": 18718 }, { "epoch": 0.9620207626683113, "grad_norm": 1.031685709953308, "learning_rate": 3.777922633352804e-08, "loss": 0.7178, "step": 18719 }, { "epoch": 0.9620721554116559, "grad_norm": 1.070880651473999, "learning_rate": 3.76771798422626e-08, "loss": 0.688, "step": 18720 }, { "epoch": 0.9621235481550006, "grad_norm": 1.0734368562698364, "learning_rate": 3.7575270836483493e-08, "loss": 0.6145, "step": 18721 }, { "epoch": 0.9621749408983451, "grad_norm": 1.1854833364486694, "learning_rate": 3.747349931901512e-08, "loss": 0.681, "step": 18722 }, { "epoch": 0.9622263336416897, "grad_norm": 0.7495351433753967, "learning_rate": 3.7371865292677444e-08, "loss": 0.6212, "step": 18723 }, { "epoch": 0.9622777263850344, "grad_norm": 1.100218653678894, "learning_rate": 3.7270368760285445e-08, "loss": 0.6763, "step": 18724 }, { "epoch": 0.962329119128379, "grad_norm": 1.0150842666625977, "learning_rate": 3.716900972465187e-08, "loss": 0.6888, "step": 18725 }, { "epoch": 0.9623805118717237, "grad_norm": 1.0960780382156372, "learning_rate": 3.706778818858448e-08, "loss": 0.7086, "step": 18726 }, { "epoch": 0.9624319046150683, "grad_norm": 1.0719295740127563, "learning_rate": 3.696670415488768e-08, "loss": 0.6987, "step": 18727 }, { "epoch": 0.962483297358413, "grad_norm": 1.0648680925369263, "learning_rate": 3.686575762636257e-08, "loss": 0.686, "step": 18728 }, { "epoch": 0.9625346901017576, "grad_norm": 1.0990171432495117, "learning_rate": 3.6764948605805265e-08, "loss": 0.6769, "step": 18729 }, { "epoch": 0.9625860828451023, "grad_norm": 1.0862153768539429, "learning_rate": 3.666427709600961e-08, "loss": 0.7492, "step": 18730 }, { "epoch": 0.9626374755884469, "grad_norm": 0.7320635318756104, "learning_rate": 3.656374309976396e-08, "loss": 0.6885, "step": 18731 }, { "epoch": 0.9626888683317916, "grad_norm": 1.1207351684570312, "learning_rate": 3.64633466198544e-08, "loss": 0.707, "step": 18732 }, { "epoch": 0.9627402610751362, "grad_norm": 1.0549672842025757, "learning_rate": 3.6363087659062047e-08, "loss": 0.6952, "step": 18733 }, { "epoch": 0.9627916538184809, "grad_norm": 1.0477707386016846, "learning_rate": 3.6262966220164676e-08, "loss": 0.6906, "step": 18734 }, { "epoch": 0.9628430465618255, "grad_norm": 1.064225435256958, "learning_rate": 3.616298230593673e-08, "loss": 0.694, "step": 18735 }, { "epoch": 0.9628944393051702, "grad_norm": 1.1177211999893188, "learning_rate": 3.6063135919147675e-08, "loss": 0.7156, "step": 18736 }, { "epoch": 0.9629458320485148, "grad_norm": 1.108678936958313, "learning_rate": 3.596342706256473e-08, "loss": 0.7006, "step": 18737 }, { "epoch": 0.9629972247918593, "grad_norm": 1.0977599620819092, "learning_rate": 3.5863855738949015e-08, "loss": 0.6981, "step": 18738 }, { "epoch": 0.963048617535204, "grad_norm": 1.0243377685546875, "learning_rate": 3.576442195106056e-08, "loss": 0.6644, "step": 18739 }, { "epoch": 0.9631000102785486, "grad_norm": 1.1440057754516602, "learning_rate": 3.5665125701653816e-08, "loss": 0.7731, "step": 18740 }, { "epoch": 0.9631514030218933, "grad_norm": 1.1067330837249756, "learning_rate": 3.5565966993479363e-08, "loss": 0.6632, "step": 18741 }, { "epoch": 0.9632027957652379, "grad_norm": 1.2536667585372925, "learning_rate": 3.546694582928611e-08, "loss": 0.6299, "step": 18742 }, { "epoch": 0.9632541885085826, "grad_norm": 1.1487494707107544, "learning_rate": 3.5368062211815216e-08, "loss": 0.6975, "step": 18743 }, { "epoch": 0.9633055812519272, "grad_norm": 1.0988941192626953, "learning_rate": 3.5269316143807796e-08, "loss": 0.6806, "step": 18744 }, { "epoch": 0.9633569739952719, "grad_norm": 1.055826187133789, "learning_rate": 3.517070762799946e-08, "loss": 0.6882, "step": 18745 }, { "epoch": 0.9634083667386165, "grad_norm": 1.4291555881500244, "learning_rate": 3.507223666712245e-08, "loss": 0.7055, "step": 18746 }, { "epoch": 0.9634597594819612, "grad_norm": 0.75838702917099, "learning_rate": 3.497390326390404e-08, "loss": 0.6424, "step": 18747 }, { "epoch": 0.9635111522253058, "grad_norm": 1.1375784873962402, "learning_rate": 3.487570742106983e-08, "loss": 0.6865, "step": 18748 }, { "epoch": 0.9635625449686505, "grad_norm": 1.0791219472885132, "learning_rate": 3.4777649141339854e-08, "loss": 0.7142, "step": 18749 }, { "epoch": 0.9636139377119951, "grad_norm": 1.0075712203979492, "learning_rate": 3.467972842743139e-08, "loss": 0.6366, "step": 18750 }, { "epoch": 0.9636653304553398, "grad_norm": 1.1120567321777344, "learning_rate": 3.458194528205616e-08, "loss": 0.715, "step": 18751 }, { "epoch": 0.9637167231986844, "grad_norm": 1.1362496614456177, "learning_rate": 3.4484299707924216e-08, "loss": 0.7038, "step": 18752 }, { "epoch": 0.9637681159420289, "grad_norm": 1.0460567474365234, "learning_rate": 3.4386791707741177e-08, "loss": 0.6677, "step": 18753 }, { "epoch": 0.9638195086853736, "grad_norm": 1.0803955793380737, "learning_rate": 3.42894212842082e-08, "loss": 0.6836, "step": 18754 }, { "epoch": 0.9638709014287182, "grad_norm": 0.6990349888801575, "learning_rate": 3.4192188440023696e-08, "loss": 0.6523, "step": 18755 }, { "epoch": 0.9639222941720629, "grad_norm": 1.0684380531311035, "learning_rate": 3.409509317787996e-08, "loss": 0.7144, "step": 18756 }, { "epoch": 0.9639736869154075, "grad_norm": 1.1243247985839844, "learning_rate": 3.3998135500468707e-08, "loss": 0.659, "step": 18757 }, { "epoch": 0.9640250796587522, "grad_norm": 1.0722790956497192, "learning_rate": 3.3901315410475586e-08, "loss": 0.6733, "step": 18758 }, { "epoch": 0.9640764724020968, "grad_norm": 0.674856424331665, "learning_rate": 3.380463291058289e-08, "loss": 0.6423, "step": 18759 }, { "epoch": 0.9641278651454415, "grad_norm": 1.1426196098327637, "learning_rate": 3.370808800346959e-08, "loss": 0.6972, "step": 18760 }, { "epoch": 0.9641792578887861, "grad_norm": 1.0138877630233765, "learning_rate": 3.361168069181075e-08, "loss": 0.6707, "step": 18761 }, { "epoch": 0.9642306506321308, "grad_norm": 1.0983363389968872, "learning_rate": 3.351541097827704e-08, "loss": 0.6222, "step": 18762 }, { "epoch": 0.9642820433754754, "grad_norm": 1.1426610946655273, "learning_rate": 3.3419278865535196e-08, "loss": 0.7086, "step": 18763 }, { "epoch": 0.96433343611882, "grad_norm": 1.1298636198043823, "learning_rate": 3.3323284356250316e-08, "loss": 0.69, "step": 18764 }, { "epoch": 0.9643848288621647, "grad_norm": 1.0991857051849365, "learning_rate": 3.322742745308028e-08, "loss": 0.6982, "step": 18765 }, { "epoch": 0.9644362216055093, "grad_norm": 1.1431920528411865, "learning_rate": 3.313170815868183e-08, "loss": 0.7004, "step": 18766 }, { "epoch": 0.964487614348854, "grad_norm": 1.0627944469451904, "learning_rate": 3.3036126475706196e-08, "loss": 0.6928, "step": 18767 }, { "epoch": 0.9645390070921985, "grad_norm": 1.060699224472046, "learning_rate": 3.294068240680293e-08, "loss": 0.6553, "step": 18768 }, { "epoch": 0.9645903998355432, "grad_norm": 1.1477888822555542, "learning_rate": 3.28453759546149e-08, "loss": 0.7421, "step": 18769 }, { "epoch": 0.9646417925788878, "grad_norm": 1.1723428964614868, "learning_rate": 3.275020712178334e-08, "loss": 0.6844, "step": 18770 }, { "epoch": 0.9646931853222325, "grad_norm": 1.0870871543884277, "learning_rate": 3.265517591094503e-08, "loss": 0.681, "step": 18771 }, { "epoch": 0.9647445780655771, "grad_norm": 1.1194822788238525, "learning_rate": 3.256028232473285e-08, "loss": 0.6448, "step": 18772 }, { "epoch": 0.9647959708089218, "grad_norm": 1.0316683053970337, "learning_rate": 3.246552636577582e-08, "loss": 0.6653, "step": 18773 }, { "epoch": 0.9648473635522664, "grad_norm": 1.206403374671936, "learning_rate": 3.237090803669907e-08, "loss": 0.6406, "step": 18774 }, { "epoch": 0.9648987562956111, "grad_norm": 1.1178646087646484, "learning_rate": 3.227642734012437e-08, "loss": 0.7032, "step": 18775 }, { "epoch": 0.9649501490389557, "grad_norm": 1.0351780652999878, "learning_rate": 3.218208427866909e-08, "loss": 0.6666, "step": 18776 }, { "epoch": 0.9650015417823004, "grad_norm": 1.0891104936599731, "learning_rate": 3.20878788549478e-08, "loss": 0.6841, "step": 18777 }, { "epoch": 0.965052934525645, "grad_norm": 1.0699228048324585, "learning_rate": 3.199381107156951e-08, "loss": 0.713, "step": 18778 }, { "epoch": 0.9651043272689896, "grad_norm": 1.0828994512557983, "learning_rate": 3.1899880931141045e-08, "loss": 0.7336, "step": 18779 }, { "epoch": 0.9651557200123343, "grad_norm": 1.0952237844467163, "learning_rate": 3.180608843626476e-08, "loss": 0.6548, "step": 18780 }, { "epoch": 0.9652071127556789, "grad_norm": 1.10565984249115, "learning_rate": 3.171243358953913e-08, "loss": 0.71, "step": 18781 }, { "epoch": 0.9652585054990236, "grad_norm": 1.1103250980377197, "learning_rate": 3.1618916393559294e-08, "loss": 0.71, "step": 18782 }, { "epoch": 0.9653098982423681, "grad_norm": 1.110566258430481, "learning_rate": 3.152553685091597e-08, "loss": 0.6751, "step": 18783 }, { "epoch": 0.9653612909857128, "grad_norm": 1.1556313037872314, "learning_rate": 3.143229496419653e-08, "loss": 0.7287, "step": 18784 }, { "epoch": 0.9654126837290574, "grad_norm": 0.7787653803825378, "learning_rate": 3.1339190735983905e-08, "loss": 0.626, "step": 18785 }, { "epoch": 0.9654640764724021, "grad_norm": 1.0406484603881836, "learning_rate": 3.12462241688577e-08, "loss": 0.6647, "step": 18786 }, { "epoch": 0.9655154692157467, "grad_norm": 0.6849263906478882, "learning_rate": 3.115339526539418e-08, "loss": 0.6409, "step": 18787 }, { "epoch": 0.9655668619590914, "grad_norm": 1.0911227464675903, "learning_rate": 3.1060704028164634e-08, "loss": 0.6564, "step": 18788 }, { "epoch": 0.965618254702436, "grad_norm": 1.0870689153671265, "learning_rate": 3.0968150459737e-08, "loss": 0.6546, "step": 18789 }, { "epoch": 0.9656696474457807, "grad_norm": 1.6593421697616577, "learning_rate": 3.087573456267645e-08, "loss": 0.674, "step": 18790 }, { "epoch": 0.9657210401891253, "grad_norm": 0.9238305687904358, "learning_rate": 3.0783456339542605e-08, "loss": 0.6586, "step": 18791 }, { "epoch": 0.96577243293247, "grad_norm": 1.0731452703475952, "learning_rate": 3.0691315792892306e-08, "loss": 0.7265, "step": 18792 }, { "epoch": 0.9658238256758146, "grad_norm": 1.1256870031356812, "learning_rate": 3.059931292527907e-08, "loss": 0.7384, "step": 18793 }, { "epoch": 0.9658752184191592, "grad_norm": 1.1552778482437134, "learning_rate": 3.0507447739250854e-08, "loss": 0.6979, "step": 18794 }, { "epoch": 0.9659266111625039, "grad_norm": 1.077541470527649, "learning_rate": 3.0415720237353954e-08, "loss": 0.7119, "step": 18795 }, { "epoch": 0.9659780039058485, "grad_norm": 1.0648802518844604, "learning_rate": 3.032413042212856e-08, "loss": 0.6847, "step": 18796 }, { "epoch": 0.9660293966491932, "grad_norm": 1.0678517818450928, "learning_rate": 3.023267829611376e-08, "loss": 0.7152, "step": 18797 }, { "epoch": 0.9660807893925377, "grad_norm": 1.1466152667999268, "learning_rate": 3.014136386184141e-08, "loss": 0.7051, "step": 18798 }, { "epoch": 0.9661321821358824, "grad_norm": 0.8240313529968262, "learning_rate": 3.005018712184282e-08, "loss": 0.6166, "step": 18799 }, { "epoch": 0.966183574879227, "grad_norm": 1.032137393951416, "learning_rate": 2.995914807864375e-08, "loss": 0.6492, "step": 18800 }, { "epoch": 0.9662349676225717, "grad_norm": 1.1151278018951416, "learning_rate": 2.9868246734766624e-08, "loss": 0.6852, "step": 18801 }, { "epoch": 0.9662863603659163, "grad_norm": 1.1414090394973755, "learning_rate": 2.9777483092729985e-08, "loss": 0.7051, "step": 18802 }, { "epoch": 0.966337753109261, "grad_norm": 1.1444951295852661, "learning_rate": 2.9686857155048488e-08, "loss": 0.7062, "step": 18803 }, { "epoch": 0.9663891458526056, "grad_norm": 1.1039438247680664, "learning_rate": 2.9596368924232345e-08, "loss": 0.6648, "step": 18804 }, { "epoch": 0.9664405385959502, "grad_norm": 1.0023292303085327, "learning_rate": 2.950601840278955e-08, "loss": 0.6446, "step": 18805 }, { "epoch": 0.9664919313392949, "grad_norm": 1.122821569442749, "learning_rate": 2.9415805593223102e-08, "loss": 0.6938, "step": 18806 }, { "epoch": 0.9665433240826395, "grad_norm": 1.0668556690216064, "learning_rate": 2.9325730498032113e-08, "loss": 0.651, "step": 18807 }, { "epoch": 0.9665947168259842, "grad_norm": 1.1478619575500488, "learning_rate": 2.9235793119712362e-08, "loss": 0.6952, "step": 18808 }, { "epoch": 0.9666461095693288, "grad_norm": 1.1086454391479492, "learning_rate": 2.914599346075575e-08, "loss": 0.6652, "step": 18809 }, { "epoch": 0.9666975023126735, "grad_norm": 1.151504635810852, "learning_rate": 2.9056331523650282e-08, "loss": 0.7271, "step": 18810 }, { "epoch": 0.9667488950560181, "grad_norm": 1.0807229280471802, "learning_rate": 2.896680731087953e-08, "loss": 0.6634, "step": 18811 }, { "epoch": 0.9668002877993628, "grad_norm": 1.118722915649414, "learning_rate": 2.8877420824924285e-08, "loss": 0.6864, "step": 18812 }, { "epoch": 0.9668516805427073, "grad_norm": 1.062164545059204, "learning_rate": 2.8788172068261457e-08, "loss": 0.7006, "step": 18813 }, { "epoch": 0.966903073286052, "grad_norm": 1.0743906497955322, "learning_rate": 2.8699061043363508e-08, "loss": 0.7387, "step": 18814 }, { "epoch": 0.9669544660293966, "grad_norm": 1.076715350151062, "learning_rate": 2.8610087752699024e-08, "loss": 0.6877, "step": 18815 }, { "epoch": 0.9670058587727413, "grad_norm": 1.1419105529785156, "learning_rate": 2.852125219873325e-08, "loss": 0.6819, "step": 18816 }, { "epoch": 0.9670572515160859, "grad_norm": 1.1111364364624023, "learning_rate": 2.8432554383927557e-08, "loss": 0.6808, "step": 18817 }, { "epoch": 0.9671086442594305, "grad_norm": 1.0722932815551758, "learning_rate": 2.834399431073942e-08, "loss": 0.7049, "step": 18818 }, { "epoch": 0.9671600370027752, "grad_norm": 1.0485846996307373, "learning_rate": 2.8255571981622433e-08, "loss": 0.684, "step": 18819 }, { "epoch": 0.9672114297461198, "grad_norm": 1.0673350095748901, "learning_rate": 2.8167287399026298e-08, "loss": 0.7262, "step": 18820 }, { "epoch": 0.9672628224894645, "grad_norm": 1.1458088159561157, "learning_rate": 2.8079140565396846e-08, "loss": 0.6988, "step": 18821 }, { "epoch": 0.9673142152328091, "grad_norm": 1.1272093057632446, "learning_rate": 2.799113148317656e-08, "loss": 0.7168, "step": 18822 }, { "epoch": 0.9673656079761538, "grad_norm": 1.0721516609191895, "learning_rate": 2.7903260154804044e-08, "loss": 0.672, "step": 18823 }, { "epoch": 0.9674170007194984, "grad_norm": 1.0681589841842651, "learning_rate": 2.781552658271347e-08, "loss": 0.7147, "step": 18824 }, { "epoch": 0.9674683934628431, "grad_norm": 1.1155004501342773, "learning_rate": 2.7727930769335663e-08, "loss": 0.6529, "step": 18825 }, { "epoch": 0.9675197862061877, "grad_norm": 1.0461015701293945, "learning_rate": 2.764047271709702e-08, "loss": 0.6724, "step": 18826 }, { "epoch": 0.9675711789495324, "grad_norm": 1.0760018825531006, "learning_rate": 2.7553152428421715e-08, "loss": 0.683, "step": 18827 }, { "epoch": 0.967622571692877, "grad_norm": 1.0824872255325317, "learning_rate": 2.7465969905728363e-08, "loss": 0.6644, "step": 18828 }, { "epoch": 0.9676739644362216, "grad_norm": 0.7416027188301086, "learning_rate": 2.737892515143281e-08, "loss": 0.6674, "step": 18829 }, { "epoch": 0.9677253571795662, "grad_norm": 1.061812400817871, "learning_rate": 2.729201816794591e-08, "loss": 0.6864, "step": 18830 }, { "epoch": 0.9677767499229109, "grad_norm": 1.1292790174484253, "learning_rate": 2.720524895767629e-08, "loss": 0.6905, "step": 18831 }, { "epoch": 0.9678281426662555, "grad_norm": 1.0828323364257812, "learning_rate": 2.7118617523027578e-08, "loss": 0.7002, "step": 18832 }, { "epoch": 0.9678795354096001, "grad_norm": 1.0326259136199951, "learning_rate": 2.703212386640064e-08, "loss": 0.7304, "step": 18833 }, { "epoch": 0.9679309281529448, "grad_norm": 1.0880370140075684, "learning_rate": 2.6945767990190774e-08, "loss": 0.6747, "step": 18834 }, { "epoch": 0.9679823208962894, "grad_norm": 1.0722930431365967, "learning_rate": 2.685954989679107e-08, "loss": 0.6474, "step": 18835 }, { "epoch": 0.9680337136396341, "grad_norm": 0.7151961922645569, "learning_rate": 2.677346958859073e-08, "loss": 0.6064, "step": 18836 }, { "epoch": 0.9680851063829787, "grad_norm": 1.0241864919662476, "learning_rate": 2.668752706797395e-08, "loss": 0.7276, "step": 18837 }, { "epoch": 0.9681364991263234, "grad_norm": 1.0279613733291626, "learning_rate": 2.6601722337322166e-08, "loss": 0.7318, "step": 18838 }, { "epoch": 0.968187891869668, "grad_norm": 1.106692910194397, "learning_rate": 2.6516055399012917e-08, "loss": 0.7229, "step": 18839 }, { "epoch": 0.9682392846130127, "grad_norm": 1.0555864572525024, "learning_rate": 2.6430526255418753e-08, "loss": 0.7012, "step": 18840 }, { "epoch": 0.9682906773563573, "grad_norm": 1.212843894958496, "learning_rate": 2.6345134908910552e-08, "loss": 0.6389, "step": 18841 }, { "epoch": 0.968342070099702, "grad_norm": 1.074379324913025, "learning_rate": 2.6259881361853646e-08, "loss": 0.6806, "step": 18842 }, { "epoch": 0.9683934628430466, "grad_norm": 0.7655879259109497, "learning_rate": 2.6174765616609478e-08, "loss": 0.6488, "step": 18843 }, { "epoch": 0.9684448555863912, "grad_norm": 1.108168363571167, "learning_rate": 2.608978767553727e-08, "loss": 0.7261, "step": 18844 }, { "epoch": 0.9684962483297358, "grad_norm": 1.0376719236373901, "learning_rate": 2.60049475409907e-08, "loss": 0.6662, "step": 18845 }, { "epoch": 0.9685476410730804, "grad_norm": 1.1275608539581299, "learning_rate": 2.592024521532066e-08, "loss": 0.6734, "step": 18846 }, { "epoch": 0.9685990338164251, "grad_norm": 1.0924030542373657, "learning_rate": 2.583568070087361e-08, "loss": 0.6998, "step": 18847 }, { "epoch": 0.9686504265597697, "grad_norm": 1.1389002799987793, "learning_rate": 2.5751253999993232e-08, "loss": 0.7333, "step": 18848 }, { "epoch": 0.9687018193031144, "grad_norm": 1.0855473279953003, "learning_rate": 2.566696511501765e-08, "loss": 0.7072, "step": 18849 }, { "epoch": 0.968753212046459, "grad_norm": 1.1324273347854614, "learning_rate": 2.558281404828278e-08, "loss": 0.7419, "step": 18850 }, { "epoch": 0.9688046047898037, "grad_norm": 1.0241976976394653, "learning_rate": 2.5498800802120082e-08, "loss": 0.6583, "step": 18851 }, { "epoch": 0.9688559975331483, "grad_norm": 1.0598891973495483, "learning_rate": 2.541492537885659e-08, "loss": 0.6827, "step": 18852 }, { "epoch": 0.968907390276493, "grad_norm": 1.0443761348724365, "learning_rate": 2.5331187780817113e-08, "loss": 0.6373, "step": 18853 }, { "epoch": 0.9689587830198376, "grad_norm": 1.0728639364242554, "learning_rate": 2.52475880103209e-08, "loss": 0.6728, "step": 18854 }, { "epoch": 0.9690101757631823, "grad_norm": 1.129329800605774, "learning_rate": 2.5164126069684434e-08, "loss": 0.7043, "step": 18855 }, { "epoch": 0.9690615685065269, "grad_norm": 1.1329033374786377, "learning_rate": 2.508080196121976e-08, "loss": 0.6538, "step": 18856 }, { "epoch": 0.9691129612498716, "grad_norm": 0.8134285807609558, "learning_rate": 2.4997615687236687e-08, "loss": 0.6852, "step": 18857 }, { "epoch": 0.9691643539932162, "grad_norm": 1.0913729667663574, "learning_rate": 2.4914567250038378e-08, "loss": 0.6454, "step": 18858 }, { "epoch": 0.9692157467365607, "grad_norm": 1.1296486854553223, "learning_rate": 2.483165665192633e-08, "loss": 0.697, "step": 18859 }, { "epoch": 0.9692671394799054, "grad_norm": 0.6473573446273804, "learning_rate": 2.474888389519814e-08, "loss": 0.6588, "step": 18860 }, { "epoch": 0.96931853222325, "grad_norm": 1.0771087408065796, "learning_rate": 2.4666248982146978e-08, "loss": 0.66, "step": 18861 }, { "epoch": 0.9693699249665947, "grad_norm": 1.0867892503738403, "learning_rate": 2.458375191506157e-08, "loss": 0.6749, "step": 18862 }, { "epoch": 0.9694213177099393, "grad_norm": 0.7781486511230469, "learning_rate": 2.4501392696227864e-08, "loss": 0.6233, "step": 18863 }, { "epoch": 0.969472710453284, "grad_norm": 0.7067942023277283, "learning_rate": 2.441917132792848e-08, "loss": 0.675, "step": 18864 }, { "epoch": 0.9695241031966286, "grad_norm": 1.1310036182403564, "learning_rate": 2.4337087812440485e-08, "loss": 0.6696, "step": 18865 }, { "epoch": 0.9695754959399733, "grad_norm": 0.7079627513885498, "learning_rate": 2.4255142152038725e-08, "loss": 0.6427, "step": 18866 }, { "epoch": 0.9696268886833179, "grad_norm": 1.0893439054489136, "learning_rate": 2.417333434899305e-08, "loss": 0.7214, "step": 18867 }, { "epoch": 0.9696782814266626, "grad_norm": 0.8117467164993286, "learning_rate": 2.4091664405570536e-08, "loss": 0.6717, "step": 18868 }, { "epoch": 0.9697296741700072, "grad_norm": 1.0447922945022583, "learning_rate": 2.4010132324033265e-08, "loss": 0.6386, "step": 18869 }, { "epoch": 0.9697810669133519, "grad_norm": 1.278834581375122, "learning_rate": 2.3928738106640536e-08, "loss": 0.6499, "step": 18870 }, { "epoch": 0.9698324596566965, "grad_norm": 1.2126810550689697, "learning_rate": 2.3847481755647773e-08, "loss": 0.6415, "step": 18871 }, { "epoch": 0.9698838524000412, "grad_norm": 1.0899922847747803, "learning_rate": 2.376636327330595e-08, "loss": 0.7028, "step": 18872 }, { "epoch": 0.9699352451433858, "grad_norm": 0.6740182638168335, "learning_rate": 2.3685382661862154e-08, "loss": 0.6199, "step": 18873 }, { "epoch": 0.9699866378867303, "grad_norm": 1.1607987880706787, "learning_rate": 2.3604539923560154e-08, "loss": 0.6388, "step": 18874 }, { "epoch": 0.970038030630075, "grad_norm": 1.4651659727096558, "learning_rate": 2.3523835060640376e-08, "loss": 0.698, "step": 18875 }, { "epoch": 0.9700894233734196, "grad_norm": 1.1489415168762207, "learning_rate": 2.3443268075338253e-08, "loss": 0.7354, "step": 18876 }, { "epoch": 0.9701408161167643, "grad_norm": 1.141338586807251, "learning_rate": 2.3362838969886447e-08, "loss": 0.6988, "step": 18877 }, { "epoch": 0.9701922088601089, "grad_norm": 1.1430846452713013, "learning_rate": 2.3282547746512618e-08, "loss": 0.6567, "step": 18878 }, { "epoch": 0.9702436016034536, "grad_norm": 1.0344953536987305, "learning_rate": 2.320239440744221e-08, "loss": 0.629, "step": 18879 }, { "epoch": 0.9702949943467982, "grad_norm": 1.1419172286987305, "learning_rate": 2.3122378954895107e-08, "loss": 0.7167, "step": 18880 }, { "epoch": 0.9703463870901429, "grad_norm": 1.0531339645385742, "learning_rate": 2.3042501391088436e-08, "loss": 0.7069, "step": 18881 }, { "epoch": 0.9703977798334875, "grad_norm": 1.0569449663162231, "learning_rate": 2.2962761718235415e-08, "loss": 0.6758, "step": 18882 }, { "epoch": 0.9704491725768322, "grad_norm": 1.0781910419464111, "learning_rate": 2.2883159938545396e-08, "loss": 0.7064, "step": 18883 }, { "epoch": 0.9705005653201768, "grad_norm": 1.0687355995178223, "learning_rate": 2.2803696054223832e-08, "loss": 0.6388, "step": 18884 }, { "epoch": 0.9705519580635215, "grad_norm": 1.134851098060608, "learning_rate": 2.272437006747119e-08, "loss": 0.6626, "step": 18885 }, { "epoch": 0.9706033508068661, "grad_norm": 1.061223030090332, "learning_rate": 2.2645181980487376e-08, "loss": 0.6844, "step": 18886 }, { "epoch": 0.9706547435502108, "grad_norm": 1.1131073236465454, "learning_rate": 2.256613179546452e-08, "loss": 0.6816, "step": 18887 }, { "epoch": 0.9707061362935554, "grad_norm": 0.6761428713798523, "learning_rate": 2.2487219514594206e-08, "loss": 0.6354, "step": 18888 }, { "epoch": 0.9707575290368999, "grad_norm": 1.1077100038528442, "learning_rate": 2.2408445140061352e-08, "loss": 0.6963, "step": 18889 }, { "epoch": 0.9708089217802446, "grad_norm": 1.0538028478622437, "learning_rate": 2.2329808674049768e-08, "loss": 0.7109, "step": 18890 }, { "epoch": 0.9708603145235892, "grad_norm": 1.0268654823303223, "learning_rate": 2.2251310118737156e-08, "loss": 0.6202, "step": 18891 }, { "epoch": 0.9709117072669339, "grad_norm": 1.0834881067276, "learning_rate": 2.2172949476299e-08, "loss": 0.6789, "step": 18892 }, { "epoch": 0.9709631000102785, "grad_norm": 0.7152554988861084, "learning_rate": 2.209472674890578e-08, "loss": 0.6402, "step": 18893 }, { "epoch": 0.9710144927536232, "grad_norm": 1.0728943347930908, "learning_rate": 2.2016641938725213e-08, "loss": 0.7107, "step": 18894 }, { "epoch": 0.9710658854969678, "grad_norm": 1.1177711486816406, "learning_rate": 2.1938695047920566e-08, "loss": 0.7024, "step": 18895 }, { "epoch": 0.9711172782403125, "grad_norm": 0.7367702722549438, "learning_rate": 2.1860886078651777e-08, "loss": 0.6362, "step": 18896 }, { "epoch": 0.9711686709836571, "grad_norm": 1.178797960281372, "learning_rate": 2.178321503307379e-08, "loss": 0.7302, "step": 18897 }, { "epoch": 0.9712200637270018, "grad_norm": 1.1022377014160156, "learning_rate": 2.170568191333933e-08, "loss": 0.7075, "step": 18898 }, { "epoch": 0.9712714564703464, "grad_norm": 1.0785107612609863, "learning_rate": 2.1628286721596115e-08, "loss": 0.6978, "step": 18899 }, { "epoch": 0.971322849213691, "grad_norm": 1.1143381595611572, "learning_rate": 2.155102945998855e-08, "loss": 0.6221, "step": 18900 }, { "epoch": 0.9713742419570357, "grad_norm": 1.0877841711044312, "learning_rate": 2.1473910130657137e-08, "loss": 0.6679, "step": 18901 }, { "epoch": 0.9714256347003803, "grad_norm": 1.1170271635055542, "learning_rate": 2.139692873573851e-08, "loss": 0.6631, "step": 18902 }, { "epoch": 0.971477027443725, "grad_norm": 1.1502450704574585, "learning_rate": 2.1320085277365398e-08, "loss": 0.7399, "step": 18903 }, { "epoch": 0.9715284201870696, "grad_norm": 1.104975938796997, "learning_rate": 2.1243379757667215e-08, "loss": 0.6933, "step": 18904 }, { "epoch": 0.9715798129304142, "grad_norm": 1.1125210523605347, "learning_rate": 2.1166812178768924e-08, "loss": 0.7014, "step": 18905 }, { "epoch": 0.9716312056737588, "grad_norm": 1.2464022636413574, "learning_rate": 2.1090382542791608e-08, "loss": 0.7458, "step": 18906 }, { "epoch": 0.9716825984171035, "grad_norm": 1.0029170513153076, "learning_rate": 2.101409085185302e-08, "loss": 0.6611, "step": 18907 }, { "epoch": 0.9717339911604481, "grad_norm": 1.1367905139923096, "learning_rate": 2.093793710806702e-08, "loss": 0.7378, "step": 18908 }, { "epoch": 0.9717853839037928, "grad_norm": 0.7762461304664612, "learning_rate": 2.0861921313543033e-08, "loss": 0.6297, "step": 18909 }, { "epoch": 0.9718367766471374, "grad_norm": 1.0437346696853638, "learning_rate": 2.0786043470387706e-08, "loss": 0.7059, "step": 18910 }, { "epoch": 0.9718881693904821, "grad_norm": 1.066454529762268, "learning_rate": 2.0710303580703252e-08, "loss": 0.6618, "step": 18911 }, { "epoch": 0.9719395621338267, "grad_norm": 1.1015859842300415, "learning_rate": 2.0634701646587983e-08, "loss": 0.7192, "step": 18912 }, { "epoch": 0.9719909548771714, "grad_norm": 1.1418269872665405, "learning_rate": 2.0559237670136345e-08, "loss": 0.7073, "step": 18913 }, { "epoch": 0.972042347620516, "grad_norm": 0.8296297192573547, "learning_rate": 2.0483911653439436e-08, "loss": 0.6096, "step": 18914 }, { "epoch": 0.9720937403638606, "grad_norm": 1.1228785514831543, "learning_rate": 2.0408723598584478e-08, "loss": 0.6911, "step": 18915 }, { "epoch": 0.9721451331072053, "grad_norm": 1.0520683526992798, "learning_rate": 2.033367350765314e-08, "loss": 0.6733, "step": 18916 }, { "epoch": 0.9721965258505499, "grad_norm": 1.0208766460418701, "learning_rate": 2.0258761382726533e-08, "loss": 0.6707, "step": 18917 }, { "epoch": 0.9722479185938946, "grad_norm": 1.069037914276123, "learning_rate": 2.0183987225879665e-08, "loss": 0.7541, "step": 18918 }, { "epoch": 0.9722993113372392, "grad_norm": 1.0833656787872314, "learning_rate": 2.0109351039183656e-08, "loss": 0.7455, "step": 18919 }, { "epoch": 0.9723507040805838, "grad_norm": 1.0597163438796997, "learning_rate": 2.0034852824706852e-08, "loss": 0.6202, "step": 18920 }, { "epoch": 0.9724020968239284, "grad_norm": 1.0821118354797363, "learning_rate": 1.9960492584513157e-08, "loss": 0.663, "step": 18921 }, { "epoch": 0.9724534895672731, "grad_norm": 1.1545156240463257, "learning_rate": 1.988627032066315e-08, "loss": 0.7197, "step": 18922 }, { "epoch": 0.9725048823106177, "grad_norm": 1.0051571130752563, "learning_rate": 1.98121860352124e-08, "loss": 0.6172, "step": 18923 }, { "epoch": 0.9725562750539624, "grad_norm": 1.1615413427352905, "learning_rate": 1.973823973021427e-08, "loss": 0.6971, "step": 18924 }, { "epoch": 0.972607667797307, "grad_norm": 1.130119800567627, "learning_rate": 1.9664431407716568e-08, "loss": 0.6997, "step": 18925 }, { "epoch": 0.9726590605406517, "grad_norm": 1.0433279275894165, "learning_rate": 1.959076106976543e-08, "loss": 0.6686, "step": 18926 }, { "epoch": 0.9727104532839963, "grad_norm": 1.029630184173584, "learning_rate": 1.9517228718401447e-08, "loss": 0.7213, "step": 18927 }, { "epoch": 0.972761846027341, "grad_norm": 1.0754896402359009, "learning_rate": 1.9443834355661884e-08, "loss": 0.6846, "step": 18928 }, { "epoch": 0.9728132387706856, "grad_norm": 1.111117959022522, "learning_rate": 1.937057798357955e-08, "loss": 0.6897, "step": 18929 }, { "epoch": 0.9728646315140302, "grad_norm": 1.1520622968673706, "learning_rate": 1.9297459604185052e-08, "loss": 0.708, "step": 18930 }, { "epoch": 0.9729160242573749, "grad_norm": 0.7266581058502197, "learning_rate": 1.9224479219503434e-08, "loss": 0.6598, "step": 18931 }, { "epoch": 0.9729674170007195, "grad_norm": 1.060086727142334, "learning_rate": 1.9151636831557518e-08, "loss": 0.6833, "step": 18932 }, { "epoch": 0.9730188097440642, "grad_norm": 1.08528733253479, "learning_rate": 1.9078932442364584e-08, "loss": 0.7053, "step": 18933 }, { "epoch": 0.9730702024874088, "grad_norm": 1.0269505977630615, "learning_rate": 1.9006366053939686e-08, "loss": 0.7215, "step": 18934 }, { "epoch": 0.9731215952307534, "grad_norm": 1.0795613527297974, "learning_rate": 1.8933937668292878e-08, "loss": 0.6935, "step": 18935 }, { "epoch": 0.973172987974098, "grad_norm": 1.0081359148025513, "learning_rate": 1.8861647287430897e-08, "loss": 0.6532, "step": 18936 }, { "epoch": 0.9732243807174427, "grad_norm": 0.7232218980789185, "learning_rate": 1.878949491335713e-08, "loss": 0.644, "step": 18937 }, { "epoch": 0.9732757734607873, "grad_norm": 0.7001712918281555, "learning_rate": 1.8717480548069987e-08, "loss": 0.6383, "step": 18938 }, { "epoch": 0.973327166204132, "grad_norm": 1.030350685119629, "learning_rate": 1.864560419356509e-08, "loss": 0.6483, "step": 18939 }, { "epoch": 0.9733785589474766, "grad_norm": 1.1158052682876587, "learning_rate": 1.8573865851833627e-08, "loss": 0.6686, "step": 18940 }, { "epoch": 0.9734299516908212, "grad_norm": 1.2173320055007935, "learning_rate": 1.8502265524862895e-08, "loss": 0.7525, "step": 18941 }, { "epoch": 0.9734813444341659, "grad_norm": 0.7782997488975525, "learning_rate": 1.8430803214637416e-08, "loss": 0.6527, "step": 18942 }, { "epoch": 0.9735327371775105, "grad_norm": 1.0380682945251465, "learning_rate": 1.8359478923136164e-08, "loss": 0.6861, "step": 18943 }, { "epoch": 0.9735841299208552, "grad_norm": 1.0890341997146606, "learning_rate": 1.828829265233589e-08, "loss": 0.7506, "step": 18944 }, { "epoch": 0.9736355226641998, "grad_norm": 1.091010570526123, "learning_rate": 1.8217244404208912e-08, "loss": 0.7029, "step": 18945 }, { "epoch": 0.9736869154075445, "grad_norm": 1.1977953910827637, "learning_rate": 1.8146334180723092e-08, "loss": 0.7042, "step": 18946 }, { "epoch": 0.9737383081508891, "grad_norm": 1.111505150794983, "learning_rate": 1.8075561983844083e-08, "loss": 0.6777, "step": 18947 }, { "epoch": 0.9737897008942338, "grad_norm": 1.0528273582458496, "learning_rate": 1.8004927815531982e-08, "loss": 0.7168, "step": 18948 }, { "epoch": 0.9738410936375784, "grad_norm": 1.0274717807769775, "learning_rate": 1.7934431677743002e-08, "loss": 0.6444, "step": 18949 }, { "epoch": 0.973892486380923, "grad_norm": 1.1316269636154175, "learning_rate": 1.786407357243225e-08, "loss": 0.6739, "step": 18950 }, { "epoch": 0.9739438791242676, "grad_norm": 0.7171473503112793, "learning_rate": 1.7793853501547054e-08, "loss": 0.6203, "step": 18951 }, { "epoch": 0.9739952718676123, "grad_norm": 0.6089109778404236, "learning_rate": 1.772377146703419e-08, "loss": 0.6375, "step": 18952 }, { "epoch": 0.9740466646109569, "grad_norm": 1.164149284362793, "learning_rate": 1.7653827470834882e-08, "loss": 0.7535, "step": 18953 }, { "epoch": 0.9740980573543015, "grad_norm": 1.049311637878418, "learning_rate": 1.7584021514887583e-08, "loss": 0.6888, "step": 18954 }, { "epoch": 0.9741494500976462, "grad_norm": 1.0824832916259766, "learning_rate": 1.7514353601125188e-08, "loss": 0.72, "step": 18955 }, { "epoch": 0.9742008428409908, "grad_norm": 1.101812481880188, "learning_rate": 1.7444823731478934e-08, "loss": 0.6634, "step": 18956 }, { "epoch": 0.9742522355843355, "grad_norm": 0.8164855241775513, "learning_rate": 1.7375431907875053e-08, "loss": 0.629, "step": 18957 }, { "epoch": 0.9743036283276801, "grad_norm": 1.0493357181549072, "learning_rate": 1.730617813223534e-08, "loss": 0.6938, "step": 18958 }, { "epoch": 0.9743550210710248, "grad_norm": 1.0917471647262573, "learning_rate": 1.723706240647882e-08, "loss": 0.7068, "step": 18959 }, { "epoch": 0.9744064138143694, "grad_norm": 1.1101254224777222, "learning_rate": 1.7168084732521186e-08, "loss": 0.6629, "step": 18960 }, { "epoch": 0.9744578065577141, "grad_norm": 1.1838537454605103, "learning_rate": 1.709924511227312e-08, "loss": 0.6758, "step": 18961 }, { "epoch": 0.9745091993010587, "grad_norm": 1.032524824142456, "learning_rate": 1.703054354764144e-08, "loss": 0.6681, "step": 18962 }, { "epoch": 0.9745605920444034, "grad_norm": 1.1587255001068115, "learning_rate": 1.696198004053018e-08, "loss": 0.6838, "step": 18963 }, { "epoch": 0.974611984787748, "grad_norm": 1.0701427459716797, "learning_rate": 1.689355459283837e-08, "loss": 0.6337, "step": 18964 }, { "epoch": 0.9746633775310926, "grad_norm": 1.155348300933838, "learning_rate": 1.6825267206462272e-08, "loss": 0.7387, "step": 18965 }, { "epoch": 0.9747147702744372, "grad_norm": 1.1337854862213135, "learning_rate": 1.6757117883293706e-08, "loss": 0.7209, "step": 18966 }, { "epoch": 0.9747661630177819, "grad_norm": 1.1096447706222534, "learning_rate": 1.6689106625220607e-08, "loss": 0.6925, "step": 18967 }, { "epoch": 0.9748175557611265, "grad_norm": 1.0637927055358887, "learning_rate": 1.662123343412758e-08, "loss": 0.6569, "step": 18968 }, { "epoch": 0.9748689485044711, "grad_norm": 1.0688611268997192, "learning_rate": 1.6553498311894233e-08, "loss": 0.6654, "step": 18969 }, { "epoch": 0.9749203412478158, "grad_norm": 1.0771616697311401, "learning_rate": 1.6485901260399062e-08, "loss": 0.6641, "step": 18970 }, { "epoch": 0.9749717339911604, "grad_norm": 1.153753399848938, "learning_rate": 1.6418442281513346e-08, "loss": 0.6914, "step": 18971 }, { "epoch": 0.9750231267345051, "grad_norm": 1.1474782228469849, "learning_rate": 1.6351121377106705e-08, "loss": 0.6978, "step": 18972 }, { "epoch": 0.9750745194778497, "grad_norm": 0.7070923447608948, "learning_rate": 1.628393854904431e-08, "loss": 0.6452, "step": 18973 }, { "epoch": 0.9751259122211944, "grad_norm": 1.225093126296997, "learning_rate": 1.6216893799186896e-08, "loss": 0.7352, "step": 18974 }, { "epoch": 0.975177304964539, "grad_norm": 1.0549525022506714, "learning_rate": 1.6149987129392973e-08, "loss": 0.7255, "step": 18975 }, { "epoch": 0.9752286977078837, "grad_norm": 1.0518620014190674, "learning_rate": 1.608321854151551e-08, "loss": 0.7042, "step": 18976 }, { "epoch": 0.9752800904512283, "grad_norm": 1.199503779411316, "learning_rate": 1.601658803740469e-08, "loss": 0.7512, "step": 18977 }, { "epoch": 0.975331483194573, "grad_norm": 1.1221708059310913, "learning_rate": 1.5950095618906812e-08, "loss": 0.6922, "step": 18978 }, { "epoch": 0.9753828759379176, "grad_norm": 1.103562831878662, "learning_rate": 1.5883741287863742e-08, "loss": 0.7131, "step": 18979 }, { "epoch": 0.9754342686812622, "grad_norm": 1.138983130455017, "learning_rate": 1.5817525046113447e-08, "loss": 0.6674, "step": 18980 }, { "epoch": 0.9754856614246068, "grad_norm": 0.6696897745132446, "learning_rate": 1.5751446895491685e-08, "loss": 0.6362, "step": 18981 }, { "epoch": 0.9755370541679514, "grad_norm": 1.128674864768982, "learning_rate": 1.56855068378281e-08, "loss": 0.6228, "step": 18982 }, { "epoch": 0.9755884469112961, "grad_norm": 1.146132230758667, "learning_rate": 1.5619704874950126e-08, "loss": 0.7186, "step": 18983 }, { "epoch": 0.9756398396546407, "grad_norm": 1.0576441287994385, "learning_rate": 1.5554041008680742e-08, "loss": 0.6689, "step": 18984 }, { "epoch": 0.9756912323979854, "grad_norm": 0.684005081653595, "learning_rate": 1.548851524083961e-08, "loss": 0.6373, "step": 18985 }, { "epoch": 0.97574262514133, "grad_norm": 1.107236385345459, "learning_rate": 1.5423127573241938e-08, "loss": 0.7091, "step": 18986 }, { "epoch": 0.9757940178846747, "grad_norm": 1.1063700914382935, "learning_rate": 1.5357878007699055e-08, "loss": 0.6594, "step": 18987 }, { "epoch": 0.9758454106280193, "grad_norm": 1.116257667541504, "learning_rate": 1.5292766546018966e-08, "loss": 0.6564, "step": 18988 }, { "epoch": 0.975896803371364, "grad_norm": 1.096600890159607, "learning_rate": 1.5227793190005226e-08, "loss": 0.6635, "step": 18989 }, { "epoch": 0.9759481961147086, "grad_norm": 1.089584231376648, "learning_rate": 1.5162957941459165e-08, "loss": 0.6859, "step": 18990 }, { "epoch": 0.9759995888580533, "grad_norm": 1.076155424118042, "learning_rate": 1.5098260802176022e-08, "loss": 0.6196, "step": 18991 }, { "epoch": 0.9760509816013979, "grad_norm": 1.0884466171264648, "learning_rate": 1.503370177394825e-08, "loss": 0.6579, "step": 18992 }, { "epoch": 0.9761023743447426, "grad_norm": 1.042235255241394, "learning_rate": 1.4969280858565526e-08, "loss": 0.7141, "step": 18993 }, { "epoch": 0.9761537670880872, "grad_norm": 1.0301247835159302, "learning_rate": 1.4904998057811428e-08, "loss": 0.6748, "step": 18994 }, { "epoch": 0.9762051598314319, "grad_norm": 1.1477546691894531, "learning_rate": 1.4840853373467856e-08, "loss": 0.7178, "step": 18995 }, { "epoch": 0.9762565525747764, "grad_norm": 1.0929409265518188, "learning_rate": 1.4776846807311175e-08, "loss": 0.6667, "step": 18996 }, { "epoch": 0.976307945318121, "grad_norm": 1.0887374877929688, "learning_rate": 1.471297836111607e-08, "loss": 0.6609, "step": 18997 }, { "epoch": 0.9763593380614657, "grad_norm": 1.0710246562957764, "learning_rate": 1.4649248036651131e-08, "loss": 0.6651, "step": 18998 }, { "epoch": 0.9764107308048103, "grad_norm": 1.0719025135040283, "learning_rate": 1.4585655835681612e-08, "loss": 0.6781, "step": 18999 }, { "epoch": 0.976462123548155, "grad_norm": 1.145033836364746, "learning_rate": 1.4522201759970544e-08, "loss": 0.689, "step": 19000 }, { "epoch": 0.9765135162914996, "grad_norm": 1.1196221113204956, "learning_rate": 1.445888581127486e-08, "loss": 0.694, "step": 19001 }, { "epoch": 0.9765649090348443, "grad_norm": 1.1073291301727295, "learning_rate": 1.4395707991349817e-08, "loss": 0.6768, "step": 19002 }, { "epoch": 0.9766163017781889, "grad_norm": 1.0918573141098022, "learning_rate": 1.4332668301945684e-08, "loss": 0.6901, "step": 19003 }, { "epoch": 0.9766676945215336, "grad_norm": 1.0817763805389404, "learning_rate": 1.4269766744807733e-08, "loss": 0.7065, "step": 19004 }, { "epoch": 0.9767190872648782, "grad_norm": 1.1678199768066406, "learning_rate": 1.4207003321680125e-08, "loss": 0.6653, "step": 19005 }, { "epoch": 0.9767704800082229, "grad_norm": 1.160949945449829, "learning_rate": 1.4144378034301465e-08, "loss": 0.675, "step": 19006 }, { "epoch": 0.9768218727515675, "grad_norm": 1.1462547779083252, "learning_rate": 1.4081890884406479e-08, "loss": 0.7078, "step": 19007 }, { "epoch": 0.9768732654949122, "grad_norm": 1.067353367805481, "learning_rate": 1.401954187372656e-08, "loss": 0.7229, "step": 19008 }, { "epoch": 0.9769246582382568, "grad_norm": 1.1847907304763794, "learning_rate": 1.3957331003989216e-08, "loss": 0.7025, "step": 19009 }, { "epoch": 0.9769760509816015, "grad_norm": 1.0363231897354126, "learning_rate": 1.3895258276918621e-08, "loss": 0.7048, "step": 19010 }, { "epoch": 0.977027443724946, "grad_norm": 1.0179197788238525, "learning_rate": 1.3833323694233402e-08, "loss": 0.6591, "step": 19011 }, { "epoch": 0.9770788364682906, "grad_norm": 1.1255227327346802, "learning_rate": 1.3771527257649963e-08, "loss": 0.6692, "step": 19012 }, { "epoch": 0.9771302292116353, "grad_norm": 1.121323823928833, "learning_rate": 1.3709868968880825e-08, "loss": 0.6459, "step": 19013 }, { "epoch": 0.9771816219549799, "grad_norm": 0.6892160773277283, "learning_rate": 1.3648348829634061e-08, "loss": 0.6392, "step": 19014 }, { "epoch": 0.9772330146983246, "grad_norm": 1.1293246746063232, "learning_rate": 1.3586966841613869e-08, "loss": 0.6854, "step": 19015 }, { "epoch": 0.9772844074416692, "grad_norm": 1.0964460372924805, "learning_rate": 1.3525723006521663e-08, "loss": 0.6706, "step": 19016 }, { "epoch": 0.9773358001850139, "grad_norm": 1.0584444999694824, "learning_rate": 1.3464617326052754e-08, "loss": 0.717, "step": 19017 }, { "epoch": 0.9773871929283585, "grad_norm": 1.067427158355713, "learning_rate": 1.3403649801901896e-08, "loss": 0.7216, "step": 19018 }, { "epoch": 0.9774385856717032, "grad_norm": 0.7050039768218994, "learning_rate": 1.3342820435757186e-08, "loss": 0.6969, "step": 19019 }, { "epoch": 0.9774899784150478, "grad_norm": 0.8224236965179443, "learning_rate": 1.3282129229303941e-08, "loss": 0.6617, "step": 19020 }, { "epoch": 0.9775413711583925, "grad_norm": 1.0814045667648315, "learning_rate": 1.322157618422415e-08, "loss": 0.6938, "step": 19021 }, { "epoch": 0.9775927639017371, "grad_norm": 1.1549818515777588, "learning_rate": 1.3161161302195359e-08, "loss": 0.6565, "step": 19022 }, { "epoch": 0.9776441566450818, "grad_norm": 1.1385267972946167, "learning_rate": 1.3100884584891227e-08, "loss": 0.7123, "step": 19023 }, { "epoch": 0.9776955493884264, "grad_norm": 1.1226462125778198, "learning_rate": 1.3040746033982088e-08, "loss": 0.7123, "step": 19024 }, { "epoch": 0.977746942131771, "grad_norm": 1.0890467166900635, "learning_rate": 1.2980745651133831e-08, "loss": 0.6765, "step": 19025 }, { "epoch": 0.9777983348751156, "grad_norm": 1.048478126525879, "learning_rate": 1.2920883438008457e-08, "loss": 0.678, "step": 19026 }, { "epoch": 0.9778497276184602, "grad_norm": 0.7622886300086975, "learning_rate": 1.28611593962652e-08, "loss": 0.6034, "step": 19027 }, { "epoch": 0.9779011203618049, "grad_norm": 1.1089476346969604, "learning_rate": 1.2801573527558841e-08, "loss": 0.6964, "step": 19028 }, { "epoch": 0.9779525131051495, "grad_norm": 1.1935968399047852, "learning_rate": 1.2742125833539176e-08, "loss": 0.7238, "step": 19029 }, { "epoch": 0.9780039058484942, "grad_norm": 0.7410804033279419, "learning_rate": 1.2682816315854329e-08, "loss": 0.6483, "step": 19030 }, { "epoch": 0.9780552985918388, "grad_norm": 0.6861765384674072, "learning_rate": 1.2623644976147432e-08, "loss": 0.6595, "step": 19031 }, { "epoch": 0.9781066913351835, "grad_norm": 1.1236472129821777, "learning_rate": 1.2564611816057725e-08, "loss": 0.6189, "step": 19032 }, { "epoch": 0.9781580840785281, "grad_norm": 1.0733482837677002, "learning_rate": 1.2505716837220572e-08, "loss": 0.6657, "step": 19033 }, { "epoch": 0.9782094768218728, "grad_norm": 1.0664821863174438, "learning_rate": 1.2446960041267997e-08, "loss": 0.6509, "step": 19034 }, { "epoch": 0.9782608695652174, "grad_norm": 1.1141270399093628, "learning_rate": 1.2388341429827034e-08, "loss": 0.6939, "step": 19035 }, { "epoch": 0.978312262308562, "grad_norm": 1.1200549602508545, "learning_rate": 1.2329861004523047e-08, "loss": 0.6354, "step": 19036 }, { "epoch": 0.9783636550519067, "grad_norm": 1.109206199645996, "learning_rate": 1.2271518766975854e-08, "loss": 0.6896, "step": 19037 }, { "epoch": 0.9784150477952513, "grad_norm": 1.0326014757156372, "learning_rate": 1.2213314718801383e-08, "loss": 0.6771, "step": 19038 }, { "epoch": 0.978466440538596, "grad_norm": 1.0878466367721558, "learning_rate": 1.215524886161279e-08, "loss": 0.6717, "step": 19039 }, { "epoch": 0.9785178332819406, "grad_norm": 1.0529557466506958, "learning_rate": 1.2097321197018785e-08, "loss": 0.6714, "step": 19040 }, { "epoch": 0.9785692260252852, "grad_norm": 1.041327714920044, "learning_rate": 1.2039531726623644e-08, "loss": 0.6721, "step": 19041 }, { "epoch": 0.9786206187686298, "grad_norm": 1.0779091119766235, "learning_rate": 1.1981880452029415e-08, "loss": 0.692, "step": 19042 }, { "epoch": 0.9786720115119745, "grad_norm": 1.1115553379058838, "learning_rate": 1.1924367374832602e-08, "loss": 0.6781, "step": 19043 }, { "epoch": 0.9787234042553191, "grad_norm": 1.041497826576233, "learning_rate": 1.1866992496626928e-08, "loss": 0.7056, "step": 19044 }, { "epoch": 0.9787747969986638, "grad_norm": 1.071725845336914, "learning_rate": 1.1809755819002788e-08, "loss": 0.6658, "step": 19045 }, { "epoch": 0.9788261897420084, "grad_norm": 1.0545936822891235, "learning_rate": 1.1752657343544471e-08, "loss": 0.6122, "step": 19046 }, { "epoch": 0.9788775824853531, "grad_norm": 0.6993469595909119, "learning_rate": 1.1695697071835154e-08, "loss": 0.6136, "step": 19047 }, { "epoch": 0.9789289752286977, "grad_norm": 1.0951720476150513, "learning_rate": 1.1638875005452466e-08, "loss": 0.6981, "step": 19048 }, { "epoch": 0.9789803679720424, "grad_norm": 0.7597584128379822, "learning_rate": 1.15821911459707e-08, "loss": 0.6327, "step": 19049 }, { "epoch": 0.979031760715387, "grad_norm": 0.7588790059089661, "learning_rate": 1.1525645494960824e-08, "loss": 0.6513, "step": 19050 }, { "epoch": 0.9790831534587316, "grad_norm": 1.1419697999954224, "learning_rate": 1.1469238053988808e-08, "loss": 0.6718, "step": 19051 }, { "epoch": 0.9791345462020763, "grad_norm": 1.011348009109497, "learning_rate": 1.1412968824617842e-08, "loss": 0.6044, "step": 19052 }, { "epoch": 0.9791859389454209, "grad_norm": 1.0667184591293335, "learning_rate": 1.1356837808406685e-08, "loss": 0.6536, "step": 19053 }, { "epoch": 0.9792373316887656, "grad_norm": 1.1459214687347412, "learning_rate": 1.1300845006910755e-08, "loss": 0.6748, "step": 19054 }, { "epoch": 0.9792887244321102, "grad_norm": 1.1163365840911865, "learning_rate": 1.1244990421681035e-08, "loss": 0.7342, "step": 19055 }, { "epoch": 0.9793401171754548, "grad_norm": 1.2121094465255737, "learning_rate": 1.1189274054265175e-08, "loss": 0.6452, "step": 19056 }, { "epoch": 0.9793915099187994, "grad_norm": 0.7309182286262512, "learning_rate": 1.1133695906207497e-08, "loss": 0.6596, "step": 19057 }, { "epoch": 0.9794429026621441, "grad_norm": 1.0707401037216187, "learning_rate": 1.107825597904677e-08, "loss": 0.6239, "step": 19058 }, { "epoch": 0.9794942954054887, "grad_norm": 0.7262753844261169, "learning_rate": 1.1022954274320096e-08, "loss": 0.6664, "step": 19059 }, { "epoch": 0.9795456881488334, "grad_norm": 1.1145539283752441, "learning_rate": 1.0967790793558475e-08, "loss": 0.7254, "step": 19060 }, { "epoch": 0.979597080892178, "grad_norm": 1.0860663652420044, "learning_rate": 1.0912765538290681e-08, "loss": 0.6381, "step": 19061 }, { "epoch": 0.9796484736355227, "grad_norm": 1.0046390295028687, "learning_rate": 1.0857878510041608e-08, "loss": 0.6395, "step": 19062 }, { "epoch": 0.9796998663788673, "grad_norm": 1.0336318016052246, "learning_rate": 1.0803129710332261e-08, "loss": 0.701, "step": 19063 }, { "epoch": 0.979751259122212, "grad_norm": 1.2074172496795654, "learning_rate": 1.0748519140678648e-08, "loss": 0.7768, "step": 19064 }, { "epoch": 0.9798026518655566, "grad_norm": 0.695376455783844, "learning_rate": 1.0694046802594005e-08, "loss": 0.6197, "step": 19065 }, { "epoch": 0.9798540446089012, "grad_norm": 1.2134979963302612, "learning_rate": 1.0639712697587679e-08, "loss": 0.6872, "step": 19066 }, { "epoch": 0.9799054373522459, "grad_norm": 1.1151384115219116, "learning_rate": 1.0585516827165132e-08, "loss": 0.7196, "step": 19067 }, { "epoch": 0.9799568300955905, "grad_norm": 1.2165758609771729, "learning_rate": 1.0531459192827941e-08, "loss": 0.6879, "step": 19068 }, { "epoch": 0.9800082228389352, "grad_norm": 1.089278221130371, "learning_rate": 1.0477539796073799e-08, "loss": 0.7039, "step": 19069 }, { "epoch": 0.9800596155822798, "grad_norm": 1.030637264251709, "learning_rate": 1.0423758638395953e-08, "loss": 0.7178, "step": 19070 }, { "epoch": 0.9801110083256244, "grad_norm": 1.1957106590270996, "learning_rate": 1.0370115721285433e-08, "loss": 0.6833, "step": 19071 }, { "epoch": 0.980162401068969, "grad_norm": 1.101899266242981, "learning_rate": 1.031661104622772e-08, "loss": 0.6531, "step": 19072 }, { "epoch": 0.9802137938123137, "grad_norm": 1.059823989868164, "learning_rate": 1.026324461470607e-08, "loss": 0.6482, "step": 19073 }, { "epoch": 0.9802651865556583, "grad_norm": 1.1145623922348022, "learning_rate": 1.0210016428197633e-08, "loss": 0.7277, "step": 19074 }, { "epoch": 0.980316579299003, "grad_norm": 1.0254786014556885, "learning_rate": 1.015692648817901e-08, "loss": 0.6898, "step": 19075 }, { "epoch": 0.9803679720423476, "grad_norm": 1.081668734550476, "learning_rate": 1.010397479611902e-08, "loss": 0.6799, "step": 19076 }, { "epoch": 0.9804193647856922, "grad_norm": 1.0847656726837158, "learning_rate": 1.0051161353486495e-08, "loss": 0.6227, "step": 19077 }, { "epoch": 0.9804707575290369, "grad_norm": 0.767213761806488, "learning_rate": 9.998486161743592e-09, "loss": 0.6262, "step": 19078 }, { "epoch": 0.9805221502723815, "grad_norm": 1.0824520587921143, "learning_rate": 9.945949222350814e-09, "loss": 0.6713, "step": 19079 }, { "epoch": 0.9805735430157262, "grad_norm": 0.7505239248275757, "learning_rate": 9.893550536761997e-09, "loss": 0.6221, "step": 19080 }, { "epoch": 0.9806249357590708, "grad_norm": 1.0721256732940674, "learning_rate": 9.841290106430979e-09, "loss": 0.6697, "step": 19081 }, { "epoch": 0.9806763285024155, "grad_norm": 1.0725972652435303, "learning_rate": 9.789167932803822e-09, "loss": 0.7019, "step": 19082 }, { "epoch": 0.9807277212457601, "grad_norm": 1.0916476249694824, "learning_rate": 9.73718401732604e-09, "loss": 0.7018, "step": 19083 }, { "epoch": 0.9807791139891048, "grad_norm": 1.1089128255844116, "learning_rate": 9.685338361437036e-09, "loss": 0.741, "step": 19084 }, { "epoch": 0.9808305067324494, "grad_norm": 1.081346035003662, "learning_rate": 9.633630966573437e-09, "loss": 0.6489, "step": 19085 }, { "epoch": 0.9808818994757941, "grad_norm": 0.7334508299827576, "learning_rate": 9.582061834167988e-09, "loss": 0.6498, "step": 19086 }, { "epoch": 0.9809332922191386, "grad_norm": 1.1141550540924072, "learning_rate": 9.530630965649546e-09, "loss": 0.5947, "step": 19087 }, { "epoch": 0.9809846849624833, "grad_norm": 1.1384509801864624, "learning_rate": 9.479338362442525e-09, "loss": 0.7322, "step": 19088 }, { "epoch": 0.9810360777058279, "grad_norm": 1.1362167596817017, "learning_rate": 9.428184025968567e-09, "loss": 0.651, "step": 19089 }, { "epoch": 0.9810874704491725, "grad_norm": 1.0448548793792725, "learning_rate": 9.377167957644317e-09, "loss": 0.6895, "step": 19090 }, { "epoch": 0.9811388631925172, "grad_norm": 1.0639455318450928, "learning_rate": 9.326290158884199e-09, "loss": 0.6757, "step": 19091 }, { "epoch": 0.9811902559358618, "grad_norm": 1.1399660110473633, "learning_rate": 9.275550631097086e-09, "loss": 0.7301, "step": 19092 }, { "epoch": 0.9812416486792065, "grad_norm": 1.0856454372406006, "learning_rate": 9.224949375689074e-09, "loss": 0.6618, "step": 19093 }, { "epoch": 0.9812930414225511, "grad_norm": 1.1854712963104248, "learning_rate": 9.17448639406182e-09, "loss": 0.699, "step": 19094 }, { "epoch": 0.9813444341658958, "grad_norm": 1.0969607830047607, "learning_rate": 9.124161687613653e-09, "loss": 0.665, "step": 19095 }, { "epoch": 0.9813958269092404, "grad_norm": 1.1026151180267334, "learning_rate": 9.07397525773901e-09, "loss": 0.667, "step": 19096 }, { "epoch": 0.9814472196525851, "grad_norm": 1.168717622756958, "learning_rate": 9.02392710582789e-09, "loss": 0.7122, "step": 19097 }, { "epoch": 0.9814986123959297, "grad_norm": 1.1737709045410156, "learning_rate": 8.974017233267517e-09, "loss": 0.6891, "step": 19098 }, { "epoch": 0.9815500051392744, "grad_norm": 1.1070634126663208, "learning_rate": 8.92424564144012e-09, "loss": 0.6932, "step": 19099 }, { "epoch": 0.981601397882619, "grad_norm": 1.1919206380844116, "learning_rate": 8.874612331725151e-09, "loss": 0.6701, "step": 19100 }, { "epoch": 0.9816527906259637, "grad_norm": 0.7962684631347656, "learning_rate": 8.825117305497622e-09, "loss": 0.6349, "step": 19101 }, { "epoch": 0.9817041833693082, "grad_norm": 1.1634429693222046, "learning_rate": 8.775760564128654e-09, "loss": 0.6741, "step": 19102 }, { "epoch": 0.9817555761126529, "grad_norm": 1.1207243204116821, "learning_rate": 8.726542108985492e-09, "loss": 0.6821, "step": 19103 }, { "epoch": 0.9818069688559975, "grad_norm": 0.7481328845024109, "learning_rate": 8.677461941432041e-09, "loss": 0.6464, "step": 19104 }, { "epoch": 0.9818583615993421, "grad_norm": 1.1418112516403198, "learning_rate": 8.628520062828882e-09, "loss": 0.7229, "step": 19105 }, { "epoch": 0.9819097543426868, "grad_norm": 1.108088493347168, "learning_rate": 8.579716474530486e-09, "loss": 0.6829, "step": 19106 }, { "epoch": 0.9819611470860314, "grad_norm": 1.153940200805664, "learning_rate": 8.531051177890215e-09, "loss": 0.7257, "step": 19107 }, { "epoch": 0.9820125398293761, "grad_norm": 1.1209362745285034, "learning_rate": 8.482524174255325e-09, "loss": 0.7353, "step": 19108 }, { "epoch": 0.9820639325727207, "grad_norm": 1.1414313316345215, "learning_rate": 8.434135464971404e-09, "loss": 0.7462, "step": 19109 }, { "epoch": 0.9821153253160654, "grad_norm": 1.0669515132904053, "learning_rate": 8.385885051378495e-09, "loss": 0.7085, "step": 19110 }, { "epoch": 0.98216671805941, "grad_norm": 1.107173204421997, "learning_rate": 8.337772934813859e-09, "loss": 0.7037, "step": 19111 }, { "epoch": 0.9822181108027547, "grad_norm": 0.7353394031524658, "learning_rate": 8.289799116609764e-09, "loss": 0.6598, "step": 19112 }, { "epoch": 0.9822695035460993, "grad_norm": 1.118727445602417, "learning_rate": 8.241963598095703e-09, "loss": 0.6675, "step": 19113 }, { "epoch": 0.982320896289444, "grad_norm": 1.026202917098999, "learning_rate": 8.194266380597838e-09, "loss": 0.6836, "step": 19114 }, { "epoch": 0.9823722890327886, "grad_norm": 1.1317459344863892, "learning_rate": 8.146707465436221e-09, "loss": 0.7484, "step": 19115 }, { "epoch": 0.9824236817761333, "grad_norm": 1.1192821264266968, "learning_rate": 8.099286853929245e-09, "loss": 0.6845, "step": 19116 }, { "epoch": 0.9824750745194778, "grad_norm": 1.1494070291519165, "learning_rate": 8.052004547390302e-09, "loss": 0.6491, "step": 19117 }, { "epoch": 0.9825264672628224, "grad_norm": 1.0828487873077393, "learning_rate": 8.004860547130566e-09, "loss": 0.7016, "step": 19118 }, { "epoch": 0.9825778600061671, "grad_norm": 1.1415433883666992, "learning_rate": 7.95785485445455e-09, "loss": 0.736, "step": 19119 }, { "epoch": 0.9826292527495117, "grad_norm": 0.7266352772712708, "learning_rate": 7.91098747066621e-09, "loss": 0.6134, "step": 19120 }, { "epoch": 0.9826806454928564, "grad_norm": 0.755668580532074, "learning_rate": 7.86425839706284e-09, "loss": 0.6347, "step": 19121 }, { "epoch": 0.982732038236201, "grad_norm": 1.0913411378860474, "learning_rate": 7.81766763493952e-09, "loss": 0.6555, "step": 19122 }, { "epoch": 0.9827834309795457, "grad_norm": 1.141624927520752, "learning_rate": 7.771215185586877e-09, "loss": 0.6939, "step": 19123 }, { "epoch": 0.9828348237228903, "grad_norm": 0.7152939438819885, "learning_rate": 7.724901050292776e-09, "loss": 0.688, "step": 19124 }, { "epoch": 0.982886216466235, "grad_norm": 1.0655516386032104, "learning_rate": 7.678725230339523e-09, "loss": 0.7159, "step": 19125 }, { "epoch": 0.9829376092095796, "grad_norm": 1.1815950870513916, "learning_rate": 7.632687727006649e-09, "loss": 0.6745, "step": 19126 }, { "epoch": 0.9829890019529243, "grad_norm": 1.0789704322814941, "learning_rate": 7.5867885415698e-09, "loss": 0.6666, "step": 19127 }, { "epoch": 0.9830403946962689, "grad_norm": 1.1177507638931274, "learning_rate": 7.541027675300183e-09, "loss": 0.7692, "step": 19128 }, { "epoch": 0.9830917874396136, "grad_norm": 1.1244651079177856, "learning_rate": 7.495405129466227e-09, "loss": 0.6679, "step": 19129 }, { "epoch": 0.9831431801829582, "grad_norm": 1.0718439817428589, "learning_rate": 7.4499209053319195e-09, "loss": 0.6809, "step": 19130 }, { "epoch": 0.9831945729263029, "grad_norm": 0.9903975129127502, "learning_rate": 7.404575004157366e-09, "loss": 0.6413, "step": 19131 }, { "epoch": 0.9832459656696474, "grad_norm": 1.2095125913619995, "learning_rate": 7.3593674271987826e-09, "loss": 0.7274, "step": 19132 }, { "epoch": 0.983297358412992, "grad_norm": 1.072476863861084, "learning_rate": 7.3142981757085006e-09, "loss": 0.6719, "step": 19133 }, { "epoch": 0.9833487511563367, "grad_norm": 1.0492796897888184, "learning_rate": 7.2693672509355215e-09, "loss": 0.6504, "step": 19134 }, { "epoch": 0.9834001438996813, "grad_norm": 1.0729732513427734, "learning_rate": 7.2245746541244055e-09, "loss": 0.6778, "step": 19135 }, { "epoch": 0.983451536643026, "grad_norm": 1.055814266204834, "learning_rate": 7.179920386516381e-09, "loss": 0.6753, "step": 19136 }, { "epoch": 0.9835029293863706, "grad_norm": 0.7535542845726013, "learning_rate": 7.135404449348793e-09, "loss": 0.6658, "step": 19137 }, { "epoch": 0.9835543221297153, "grad_norm": 1.0257213115692139, "learning_rate": 7.091026843855098e-09, "loss": 0.6883, "step": 19138 }, { "epoch": 0.9836057148730599, "grad_norm": 0.7484007477760315, "learning_rate": 7.046787571263758e-09, "loss": 0.5925, "step": 19139 }, { "epoch": 0.9836571076164046, "grad_norm": 1.067734956741333, "learning_rate": 7.002686632802124e-09, "loss": 0.6775, "step": 19140 }, { "epoch": 0.9837085003597492, "grad_norm": 0.7192723155021667, "learning_rate": 6.958724029690334e-09, "loss": 0.6252, "step": 19141 }, { "epoch": 0.9837598931030939, "grad_norm": 1.1488723754882812, "learning_rate": 6.914899763147409e-09, "loss": 0.6476, "step": 19142 }, { "epoch": 0.9838112858464385, "grad_norm": 1.1378847360610962, "learning_rate": 6.871213834387936e-09, "loss": 0.6822, "step": 19143 }, { "epoch": 0.9838626785897832, "grad_norm": 1.102319598197937, "learning_rate": 6.8276662446215e-09, "loss": 0.6462, "step": 19144 }, { "epoch": 0.9839140713331278, "grad_norm": 1.0780017375946045, "learning_rate": 6.784256995054361e-09, "loss": 0.6819, "step": 19145 }, { "epoch": 0.9839654640764725, "grad_norm": 1.1294132471084595, "learning_rate": 6.74098608689e-09, "loss": 0.7185, "step": 19146 }, { "epoch": 0.984016856819817, "grad_norm": 0.9775769710540771, "learning_rate": 6.697853521326903e-09, "loss": 0.6296, "step": 19147 }, { "epoch": 0.9840682495631616, "grad_norm": 0.6856719255447388, "learning_rate": 6.654859299560223e-09, "loss": 0.6196, "step": 19148 }, { "epoch": 0.9841196423065063, "grad_norm": 1.0954852104187012, "learning_rate": 6.6120034227812325e-09, "loss": 0.7642, "step": 19149 }, { "epoch": 0.9841710350498509, "grad_norm": 1.256247639656067, "learning_rate": 6.5692858921767575e-09, "loss": 0.7164, "step": 19150 }, { "epoch": 0.9842224277931956, "grad_norm": 1.199968934059143, "learning_rate": 6.526706708930853e-09, "loss": 0.6516, "step": 19151 }, { "epoch": 0.9842738205365402, "grad_norm": 1.0853835344314575, "learning_rate": 6.484265874223128e-09, "loss": 0.6676, "step": 19152 }, { "epoch": 0.9843252132798849, "grad_norm": 0.7812142372131348, "learning_rate": 6.441963389229311e-09, "loss": 0.6347, "step": 19153 }, { "epoch": 0.9843766060232295, "grad_norm": 0.741860032081604, "learning_rate": 6.399799255121242e-09, "loss": 0.6906, "step": 19154 }, { "epoch": 0.9844279987665742, "grad_norm": 1.0499294996261597, "learning_rate": 6.35777347306743e-09, "loss": 0.7231, "step": 19155 }, { "epoch": 0.9844793915099188, "grad_norm": 1.0360826253890991, "learning_rate": 6.3158860442325e-09, "loss": 0.6269, "step": 19156 }, { "epoch": 0.9845307842532635, "grad_norm": 1.0612317323684692, "learning_rate": 6.274136969776634e-09, "loss": 0.6786, "step": 19157 }, { "epoch": 0.9845821769966081, "grad_norm": 0.6755431294441223, "learning_rate": 6.23252625085613e-09, "loss": 0.6322, "step": 19158 }, { "epoch": 0.9846335697399528, "grad_norm": 0.7971909046173096, "learning_rate": 6.19105388862451e-09, "loss": 0.6373, "step": 19159 }, { "epoch": 0.9846849624832974, "grad_norm": 1.1279340982437134, "learning_rate": 6.1497198842308535e-09, "loss": 0.7082, "step": 19160 }, { "epoch": 0.984736355226642, "grad_norm": 1.1069625616073608, "learning_rate": 6.108524238819802e-09, "loss": 0.7229, "step": 19161 }, { "epoch": 0.9847877479699867, "grad_norm": 1.1133421659469604, "learning_rate": 6.067466953532663e-09, "loss": 0.7153, "step": 19162 }, { "epoch": 0.9848391407133312, "grad_norm": 1.0618606805801392, "learning_rate": 6.0265480295079724e-09, "loss": 0.67, "step": 19163 }, { "epoch": 0.9848905334566759, "grad_norm": 1.1177117824554443, "learning_rate": 5.985767467878157e-09, "loss": 0.7185, "step": 19164 }, { "epoch": 0.9849419262000205, "grad_norm": 1.081763744354248, "learning_rate": 5.9451252697739766e-09, "loss": 0.6822, "step": 19165 }, { "epoch": 0.9849933189433652, "grad_norm": 1.1279723644256592, "learning_rate": 5.904621436320645e-09, "loss": 0.6519, "step": 19166 }, { "epoch": 0.9850447116867098, "grad_norm": 1.1024889945983887, "learning_rate": 5.864255968641153e-09, "loss": 0.694, "step": 19167 }, { "epoch": 0.9850961044300545, "grad_norm": 1.078163981437683, "learning_rate": 5.824028867853493e-09, "loss": 0.7349, "step": 19168 }, { "epoch": 0.9851474971733991, "grad_norm": 1.0550600290298462, "learning_rate": 5.78394013507233e-09, "loss": 0.6831, "step": 19169 }, { "epoch": 0.9851988899167438, "grad_norm": 1.13813316822052, "learning_rate": 5.743989771408443e-09, "loss": 0.7109, "step": 19170 }, { "epoch": 0.9852502826600884, "grad_norm": 1.0683363676071167, "learning_rate": 5.704177777968167e-09, "loss": 0.6855, "step": 19171 }, { "epoch": 0.985301675403433, "grad_norm": 1.1147421598434448, "learning_rate": 5.664504155855066e-09, "loss": 0.6867, "step": 19172 }, { "epoch": 0.9853530681467777, "grad_norm": 1.1705666780471802, "learning_rate": 5.62496890616826e-09, "loss": 0.7796, "step": 19173 }, { "epoch": 0.9854044608901223, "grad_norm": 1.0893540382385254, "learning_rate": 5.5855720300024285e-09, "loss": 0.7028, "step": 19174 }, { "epoch": 0.985455853633467, "grad_norm": 1.084223985671997, "learning_rate": 5.546313528450031e-09, "loss": 0.6677, "step": 19175 }, { "epoch": 0.9855072463768116, "grad_norm": 1.045938491821289, "learning_rate": 5.507193402598532e-09, "loss": 0.6436, "step": 19176 }, { "epoch": 0.9855586391201563, "grad_norm": 1.0392664670944214, "learning_rate": 5.468211653531508e-09, "loss": 0.6835, "step": 19177 }, { "epoch": 0.9856100318635008, "grad_norm": 0.6864719986915588, "learning_rate": 5.429368282329206e-09, "loss": 0.6343, "step": 19178 }, { "epoch": 0.9856614246068455, "grad_norm": 1.2241264581680298, "learning_rate": 5.390663290067988e-09, "loss": 0.7432, "step": 19179 }, { "epoch": 0.9857128173501901, "grad_norm": 1.0778214931488037, "learning_rate": 5.352096677819774e-09, "loss": 0.6722, "step": 19180 }, { "epoch": 0.9857642100935348, "grad_norm": 0.6745914220809937, "learning_rate": 5.313668446653153e-09, "loss": 0.6947, "step": 19181 }, { "epoch": 0.9858156028368794, "grad_norm": 1.120334506034851, "learning_rate": 5.27537859763283e-09, "loss": 0.6699, "step": 19182 }, { "epoch": 0.9858669955802241, "grad_norm": 1.0697579383850098, "learning_rate": 5.237227131819622e-09, "loss": 0.7022, "step": 19183 }, { "epoch": 0.9859183883235687, "grad_norm": 1.0539544820785522, "learning_rate": 5.199214050271018e-09, "loss": 0.7388, "step": 19184 }, { "epoch": 0.9859697810669134, "grad_norm": 1.1074775457382202, "learning_rate": 5.161339354040062e-09, "loss": 0.6944, "step": 19185 }, { "epoch": 0.986021173810258, "grad_norm": 1.140161395072937, "learning_rate": 5.123603044175362e-09, "loss": 0.6932, "step": 19186 }, { "epoch": 0.9860725665536026, "grad_norm": 0.7605612277984619, "learning_rate": 5.086005121723303e-09, "loss": 0.6379, "step": 19187 }, { "epoch": 0.9861239592969473, "grad_norm": 1.0699491500854492, "learning_rate": 5.0485455877252735e-09, "loss": 0.6653, "step": 19188 }, { "epoch": 0.9861753520402919, "grad_norm": 0.8272678256034851, "learning_rate": 5.011224443218776e-09, "loss": 0.622, "step": 19189 }, { "epoch": 0.9862267447836366, "grad_norm": 0.7901523113250732, "learning_rate": 4.97404168923854e-09, "loss": 0.6459, "step": 19190 }, { "epoch": 0.9862781375269812, "grad_norm": 0.7620747089385986, "learning_rate": 4.936997326813742e-09, "loss": 0.6946, "step": 19191 }, { "epoch": 0.9863295302703259, "grad_norm": 1.148488998413086, "learning_rate": 4.900091356971892e-09, "loss": 0.7208, "step": 19192 }, { "epoch": 0.9863809230136704, "grad_norm": 0.7960978746414185, "learning_rate": 4.863323780734952e-09, "loss": 0.6766, "step": 19193 }, { "epoch": 0.9864323157570151, "grad_norm": 1.1352416276931763, "learning_rate": 4.826694599121551e-09, "loss": 0.6709, "step": 19194 }, { "epoch": 0.9864837085003597, "grad_norm": 1.0732593536376953, "learning_rate": 4.790203813146433e-09, "loss": 0.6695, "step": 19195 }, { "epoch": 0.9865351012437044, "grad_norm": 1.127615213394165, "learning_rate": 4.75385142382101e-09, "loss": 0.6609, "step": 19196 }, { "epoch": 0.986586493987049, "grad_norm": 0.8002522587776184, "learning_rate": 4.7176374321517004e-09, "loss": 0.6381, "step": 19197 }, { "epoch": 0.9866378867303937, "grad_norm": 1.1601793766021729, "learning_rate": 4.6815618391427005e-09, "loss": 0.6586, "step": 19198 }, { "epoch": 0.9866892794737383, "grad_norm": 1.1371480226516724, "learning_rate": 4.645624645793212e-09, "loss": 0.7339, "step": 19199 }, { "epoch": 0.986740672217083, "grad_norm": 1.1167134046554565, "learning_rate": 4.609825853099104e-09, "loss": 0.7273, "step": 19200 }, { "epoch": 0.9867920649604276, "grad_norm": 1.046738624572754, "learning_rate": 4.57416546205125e-09, "loss": 0.6663, "step": 19201 }, { "epoch": 0.9868434577037722, "grad_norm": 1.2537602186203003, "learning_rate": 4.53864347363886e-09, "loss": 0.6941, "step": 19202 }, { "epoch": 0.9868948504471169, "grad_norm": 1.073128581047058, "learning_rate": 4.50325988884559e-09, "loss": 0.7056, "step": 19203 }, { "epoch": 0.9869462431904615, "grad_norm": 1.1635302305221558, "learning_rate": 4.468014708651769e-09, "loss": 0.6861, "step": 19204 }, { "epoch": 0.9869976359338062, "grad_norm": 1.0978872776031494, "learning_rate": 4.432907934033836e-09, "loss": 0.6761, "step": 19205 }, { "epoch": 0.9870490286771508, "grad_norm": 1.0596505403518677, "learning_rate": 4.397939565964349e-09, "loss": 0.6528, "step": 19206 }, { "epoch": 0.9871004214204955, "grad_norm": 1.0515906810760498, "learning_rate": 4.36310960541253e-09, "loss": 0.6755, "step": 19207 }, { "epoch": 0.98715181416384, "grad_norm": 0.7850010991096497, "learning_rate": 4.328418053343164e-09, "loss": 0.5906, "step": 19208 }, { "epoch": 0.9872032069071847, "grad_norm": 1.1051132678985596, "learning_rate": 4.293864910717704e-09, "loss": 0.6903, "step": 19209 }, { "epoch": 0.9872545996505293, "grad_norm": 1.102198600769043, "learning_rate": 4.259450178492608e-09, "loss": 0.7134, "step": 19210 }, { "epoch": 0.987305992393874, "grad_norm": 1.0436303615570068, "learning_rate": 4.225173857622111e-09, "loss": 0.6876, "step": 19211 }, { "epoch": 0.9873573851372186, "grad_norm": 1.0251169204711914, "learning_rate": 4.191035949056011e-09, "loss": 0.7034, "step": 19212 }, { "epoch": 0.9874087778805632, "grad_norm": 1.1378138065338135, "learning_rate": 4.15703645373966e-09, "loss": 0.7146, "step": 19213 }, { "epoch": 0.9874601706239079, "grad_norm": 1.1331069469451904, "learning_rate": 4.1231753726150844e-09, "loss": 0.6707, "step": 19214 }, { "epoch": 0.9875115633672525, "grad_norm": 1.0966565608978271, "learning_rate": 4.0894527066204226e-09, "loss": 0.6803, "step": 19215 }, { "epoch": 0.9875629561105972, "grad_norm": 1.1502457857131958, "learning_rate": 4.055868456690482e-09, "loss": 0.7299, "step": 19216 }, { "epoch": 0.9876143488539418, "grad_norm": 1.1860049962997437, "learning_rate": 4.022422623755073e-09, "loss": 0.6803, "step": 19217 }, { "epoch": 0.9876657415972865, "grad_norm": 1.07864511013031, "learning_rate": 3.989115208741789e-09, "loss": 0.7067, "step": 19218 }, { "epoch": 0.9877171343406311, "grad_norm": 1.0841528177261353, "learning_rate": 3.955946212572115e-09, "loss": 0.7246, "step": 19219 }, { "epoch": 0.9877685270839758, "grad_norm": 1.13443124294281, "learning_rate": 3.922915636165869e-09, "loss": 0.6953, "step": 19220 }, { "epoch": 0.9878199198273204, "grad_norm": 1.132979154586792, "learning_rate": 3.890023480437876e-09, "loss": 0.7386, "step": 19221 }, { "epoch": 0.9878713125706651, "grad_norm": 1.1379449367523193, "learning_rate": 3.857269746300185e-09, "loss": 0.663, "step": 19222 }, { "epoch": 0.9879227053140096, "grad_norm": 1.183051347732544, "learning_rate": 3.824654434659291e-09, "loss": 0.6806, "step": 19223 }, { "epoch": 0.9879740980573543, "grad_norm": 0.6957756876945496, "learning_rate": 3.792177546418918e-09, "loss": 0.6417, "step": 19224 }, { "epoch": 0.9880254908006989, "grad_norm": 1.1658682823181152, "learning_rate": 3.7598390824794554e-09, "loss": 0.6569, "step": 19225 }, { "epoch": 0.9880768835440435, "grad_norm": 1.2409968376159668, "learning_rate": 3.7276390437368526e-09, "loss": 0.75, "step": 19226 }, { "epoch": 0.9881282762873882, "grad_norm": 0.7006943225860596, "learning_rate": 3.69557743108262e-09, "loss": 0.6273, "step": 19227 }, { "epoch": 0.9881796690307328, "grad_norm": 1.140687346458435, "learning_rate": 3.66365424540549e-09, "loss": 0.7123, "step": 19228 }, { "epoch": 0.9882310617740775, "grad_norm": 1.1553950309753418, "learning_rate": 3.6318694875903114e-09, "loss": 0.709, "step": 19229 }, { "epoch": 0.9882824545174221, "grad_norm": 1.0695443153381348, "learning_rate": 3.6002231585163807e-09, "loss": 0.6669, "step": 19230 }, { "epoch": 0.9883338472607668, "grad_norm": 1.0534558296203613, "learning_rate": 3.5687152590618833e-09, "loss": 0.642, "step": 19231 }, { "epoch": 0.9883852400041114, "grad_norm": 1.0932321548461914, "learning_rate": 3.5373457900994557e-09, "loss": 0.6906, "step": 19232 }, { "epoch": 0.9884366327474561, "grad_norm": 1.0996112823486328, "learning_rate": 3.5061147524972916e-09, "loss": 0.7088, "step": 19233 }, { "epoch": 0.9884880254908007, "grad_norm": 1.1056674718856812, "learning_rate": 3.4750221471219203e-09, "loss": 0.7206, "step": 19234 }, { "epoch": 0.9885394182341454, "grad_norm": 1.1441634893417358, "learning_rate": 3.4440679748337647e-09, "loss": 0.7442, "step": 19235 }, { "epoch": 0.98859081097749, "grad_norm": 1.2682429552078247, "learning_rate": 3.413252236491582e-09, "loss": 0.702, "step": 19236 }, { "epoch": 0.9886422037208347, "grad_norm": 1.0489912033081055, "learning_rate": 3.382574932948024e-09, "loss": 0.6646, "step": 19237 }, { "epoch": 0.9886935964641792, "grad_norm": 1.0879982709884644, "learning_rate": 3.352036065053521e-09, "loss": 0.6808, "step": 19238 }, { "epoch": 0.9887449892075239, "grad_norm": 1.1172878742218018, "learning_rate": 3.3216356336540632e-09, "loss": 0.7368, "step": 19239 }, { "epoch": 0.9887963819508685, "grad_norm": 1.107009768486023, "learning_rate": 3.29137363959231e-09, "loss": 0.6807, "step": 19240 }, { "epoch": 0.9888477746942131, "grad_norm": 1.0634379386901855, "learning_rate": 3.2612500837064797e-09, "loss": 0.6292, "step": 19241 }, { "epoch": 0.9888991674375578, "grad_norm": 0.7787332534790039, "learning_rate": 3.2312649668309047e-09, "loss": 0.624, "step": 19242 }, { "epoch": 0.9889505601809024, "grad_norm": 1.0656942129135132, "learning_rate": 3.201418289796032e-09, "loss": 0.6707, "step": 19243 }, { "epoch": 0.9890019529242471, "grad_norm": 1.1543809175491333, "learning_rate": 3.171710053430088e-09, "loss": 0.6567, "step": 19244 }, { "epoch": 0.9890533456675917, "grad_norm": 1.1073237657546997, "learning_rate": 3.142140258555193e-09, "loss": 0.7106, "step": 19245 }, { "epoch": 0.9891047384109364, "grad_norm": 1.0966774225234985, "learning_rate": 3.112708905990136e-09, "loss": 0.6869, "step": 19246 }, { "epoch": 0.989156131154281, "grad_norm": 1.0618302822113037, "learning_rate": 3.0834159965514866e-09, "loss": 0.6506, "step": 19247 }, { "epoch": 0.9892075238976257, "grad_norm": 1.0517992973327637, "learning_rate": 3.0542615310502623e-09, "loss": 0.644, "step": 19248 }, { "epoch": 0.9892589166409703, "grad_norm": 1.1183230876922607, "learning_rate": 3.0252455102947056e-09, "loss": 0.7027, "step": 19249 }, { "epoch": 0.989310309384315, "grad_norm": 1.1745147705078125, "learning_rate": 2.9963679350875075e-09, "loss": 0.744, "step": 19250 }, { "epoch": 0.9893617021276596, "grad_norm": 0.7406668663024902, "learning_rate": 2.967628806229694e-09, "loss": 0.6428, "step": 19251 }, { "epoch": 0.9894130948710043, "grad_norm": 1.0840258598327637, "learning_rate": 2.939028124517851e-09, "loss": 0.7137, "step": 19252 }, { "epoch": 0.9894644876143489, "grad_norm": 1.0861191749572754, "learning_rate": 2.910565890743566e-09, "loss": 0.7174, "step": 19253 }, { "epoch": 0.9895158803576934, "grad_norm": 1.0830777883529663, "learning_rate": 2.882242105695654e-09, "loss": 0.7272, "step": 19254 }, { "epoch": 0.9895672731010381, "grad_norm": 1.05614173412323, "learning_rate": 2.8540567701590414e-09, "loss": 0.6591, "step": 19255 }, { "epoch": 0.9896186658443827, "grad_norm": 1.005201816558838, "learning_rate": 2.826009884914771e-09, "loss": 0.6358, "step": 19256 }, { "epoch": 0.9896700585877274, "grad_norm": 0.6927486062049866, "learning_rate": 2.7981014507394434e-09, "loss": 0.645, "step": 19257 }, { "epoch": 0.989721451331072, "grad_norm": 1.0683655738830566, "learning_rate": 2.7703314684063286e-09, "loss": 0.664, "step": 19258 }, { "epoch": 0.9897728440744167, "grad_norm": 1.0542893409729004, "learning_rate": 2.742699938684812e-09, "loss": 0.7179, "step": 19259 }, { "epoch": 0.9898242368177613, "grad_norm": 1.1283996105194092, "learning_rate": 2.7152068623409466e-09, "loss": 0.7214, "step": 19260 }, { "epoch": 0.989875629561106, "grad_norm": 1.095484733581543, "learning_rate": 2.687852240136346e-09, "loss": 0.6845, "step": 19261 }, { "epoch": 0.9899270223044506, "grad_norm": 1.7568156719207764, "learning_rate": 2.6606360728281823e-09, "loss": 0.6367, "step": 19262 }, { "epoch": 0.9899784150477953, "grad_norm": 1.0557917356491089, "learning_rate": 2.6335583611714067e-09, "loss": 0.6645, "step": 19263 }, { "epoch": 0.9900298077911399, "grad_norm": 1.1051298379898071, "learning_rate": 2.60661910591542e-09, "loss": 0.6987, "step": 19264 }, { "epoch": 0.9900812005344846, "grad_norm": 1.1440355777740479, "learning_rate": 2.5798183078074023e-09, "loss": 0.6958, "step": 19265 }, { "epoch": 0.9901325932778292, "grad_norm": 1.139714002609253, "learning_rate": 2.5531559675889827e-09, "loss": 0.728, "step": 19266 }, { "epoch": 0.9901839860211739, "grad_norm": 1.1090095043182373, "learning_rate": 2.5266320859995697e-09, "loss": 0.7134, "step": 19267 }, { "epoch": 0.9902353787645185, "grad_norm": 0.6799173355102539, "learning_rate": 2.5002466637741306e-09, "loss": 0.6505, "step": 19268 }, { "epoch": 0.990286771507863, "grad_norm": 1.1176661252975464, "learning_rate": 2.473999701643193e-09, "loss": 0.7034, "step": 19269 }, { "epoch": 0.9903381642512077, "grad_norm": 1.1120586395263672, "learning_rate": 2.4478912003339517e-09, "loss": 0.6973, "step": 19270 }, { "epoch": 0.9903895569945523, "grad_norm": 1.097770094871521, "learning_rate": 2.4219211605702733e-09, "loss": 0.6337, "step": 19271 }, { "epoch": 0.990440949737897, "grad_norm": 0.6981989145278931, "learning_rate": 2.3960895830710262e-09, "loss": 0.6547, "step": 19272 }, { "epoch": 0.9904923424812416, "grad_norm": 1.1528936624526978, "learning_rate": 2.370396468552305e-09, "loss": 0.6989, "step": 19273 }, { "epoch": 0.9905437352245863, "grad_norm": 1.1162132024765015, "learning_rate": 2.3448418177263177e-09, "loss": 0.7451, "step": 19274 }, { "epoch": 0.9905951279679309, "grad_norm": 1.099804401397705, "learning_rate": 2.319425631300276e-09, "loss": 0.6712, "step": 19275 }, { "epoch": 0.9906465207112756, "grad_norm": 1.039446473121643, "learning_rate": 2.294147909978617e-09, "loss": 0.6803, "step": 19276 }, { "epoch": 0.9906979134546202, "grad_norm": 1.0218850374221802, "learning_rate": 2.2690086544624456e-09, "loss": 0.7064, "step": 19277 }, { "epoch": 0.9907493061979649, "grad_norm": 1.1670374870300293, "learning_rate": 2.244007865446762e-09, "loss": 0.7159, "step": 19278 }, { "epoch": 0.9908006989413095, "grad_norm": 1.0085628032684326, "learning_rate": 2.2191455436254562e-09, "loss": 0.6435, "step": 19279 }, { "epoch": 0.9908520916846542, "grad_norm": 1.1783610582351685, "learning_rate": 2.1944216896874203e-09, "loss": 0.6745, "step": 19280 }, { "epoch": 0.9909034844279988, "grad_norm": 1.1046825647354126, "learning_rate": 2.169836304317108e-09, "loss": 0.6931, "step": 19281 }, { "epoch": 0.9909548771713435, "grad_norm": 1.093119740486145, "learning_rate": 2.145389388195085e-09, "loss": 0.6337, "step": 19282 }, { "epoch": 0.9910062699146881, "grad_norm": 1.1016041040420532, "learning_rate": 2.1210809420002533e-09, "loss": 0.7049, "step": 19283 }, { "epoch": 0.9910576626580326, "grad_norm": 1.0662263631820679, "learning_rate": 2.0969109664048525e-09, "loss": 0.6937, "step": 19284 }, { "epoch": 0.9911090554013773, "grad_norm": 1.060379981994629, "learning_rate": 2.0728794620789026e-09, "loss": 0.6962, "step": 19285 }, { "epoch": 0.9911604481447219, "grad_norm": 1.0481971502304077, "learning_rate": 2.0489864296879825e-09, "loss": 0.7024, "step": 19286 }, { "epoch": 0.9912118408880666, "grad_norm": 1.0933082103729248, "learning_rate": 2.0252318698948946e-09, "loss": 0.6486, "step": 19287 }, { "epoch": 0.9912632336314112, "grad_norm": 0.6570514440536499, "learning_rate": 2.001615783356892e-09, "loss": 0.6203, "step": 19288 }, { "epoch": 0.9913146263747559, "grad_norm": 1.102957010269165, "learning_rate": 1.978138170729005e-09, "loss": 0.7241, "step": 19289 }, { "epoch": 0.9913660191181005, "grad_norm": 0.7595608234405518, "learning_rate": 1.954799032660715e-09, "loss": 0.6532, "step": 19290 }, { "epoch": 0.9914174118614452, "grad_norm": 1.0640813112258911, "learning_rate": 1.931598369799281e-09, "loss": 0.7241, "step": 19291 }, { "epoch": 0.9914688046047898, "grad_norm": 1.180168628692627, "learning_rate": 1.908536182787524e-09, "loss": 0.7511, "step": 19292 }, { "epoch": 0.9915201973481345, "grad_norm": 1.077062964439392, "learning_rate": 1.885612472264375e-09, "loss": 0.674, "step": 19293 }, { "epoch": 0.9915715900914791, "grad_norm": 1.130330204963684, "learning_rate": 1.862827238865439e-09, "loss": 0.7047, "step": 19294 }, { "epoch": 0.9916229828348238, "grad_norm": 1.1044999361038208, "learning_rate": 1.8401804832207659e-09, "loss": 0.7291, "step": 19295 }, { "epoch": 0.9916743755781684, "grad_norm": 0.7969647645950317, "learning_rate": 1.8176722059587426e-09, "loss": 0.6745, "step": 19296 }, { "epoch": 0.991725768321513, "grad_norm": 1.1171493530273438, "learning_rate": 1.7953024077027591e-09, "loss": 0.6517, "step": 19297 }, { "epoch": 0.9917771610648577, "grad_norm": 1.0566861629486084, "learning_rate": 1.77307108907232e-09, "loss": 0.7163, "step": 19298 }, { "epoch": 0.9918285538082022, "grad_norm": 0.8097507357597351, "learning_rate": 1.7509782506841544e-09, "loss": 0.6759, "step": 19299 }, { "epoch": 0.9918799465515469, "grad_norm": 1.1557252407073975, "learning_rate": 1.7290238931494396e-09, "loss": 0.6738, "step": 19300 }, { "epoch": 0.9919313392948915, "grad_norm": 1.0709545612335205, "learning_rate": 1.707208017076578e-09, "loss": 0.6949, "step": 19301 }, { "epoch": 0.9919827320382362, "grad_norm": 1.0855231285095215, "learning_rate": 1.6855306230706414e-09, "loss": 0.6368, "step": 19302 }, { "epoch": 0.9920341247815808, "grad_norm": 1.8344392776489258, "learning_rate": 1.663991711731705e-09, "loss": 0.6544, "step": 19303 }, { "epoch": 0.9920855175249255, "grad_norm": 1.1377217769622803, "learning_rate": 1.642591283656514e-09, "loss": 0.7408, "step": 19304 }, { "epoch": 0.9921369102682701, "grad_norm": 0.8285262584686279, "learning_rate": 1.621329339438482e-09, "loss": 0.6368, "step": 19305 }, { "epoch": 0.9921883030116148, "grad_norm": 1.1041715145111084, "learning_rate": 1.600205879666028e-09, "loss": 0.7292, "step": 19306 }, { "epoch": 0.9922396957549594, "grad_norm": 1.0027024745941162, "learning_rate": 1.5792209049247942e-09, "loss": 0.6552, "step": 19307 }, { "epoch": 0.992291088498304, "grad_norm": 1.0270957946777344, "learning_rate": 1.5583744157959824e-09, "loss": 0.6531, "step": 19308 }, { "epoch": 0.9923424812416487, "grad_norm": 0.9946281909942627, "learning_rate": 1.5376664128574636e-09, "loss": 0.6692, "step": 19309 }, { "epoch": 0.9923938739849933, "grad_norm": 1.036358118057251, "learning_rate": 1.5170968966826682e-09, "loss": 0.6735, "step": 19310 }, { "epoch": 0.992445266728338, "grad_norm": 1.0617839097976685, "learning_rate": 1.4966658678416957e-09, "loss": 0.6708, "step": 19311 }, { "epoch": 0.9924966594716826, "grad_norm": 1.2486793994903564, "learning_rate": 1.4763733269002044e-09, "loss": 0.7248, "step": 19312 }, { "epoch": 0.9925480522150273, "grad_norm": 1.0622004270553589, "learning_rate": 1.456219274421078e-09, "loss": 0.6449, "step": 19313 }, { "epoch": 0.9925994449583718, "grad_norm": 1.1487758159637451, "learning_rate": 1.4362037109622028e-09, "loss": 0.6286, "step": 19314 }, { "epoch": 0.9926508377017165, "grad_norm": 1.1095880270004272, "learning_rate": 1.4163266370781358e-09, "loss": 0.7358, "step": 19315 }, { "epoch": 0.9927022304450611, "grad_norm": 1.0970951318740845, "learning_rate": 1.3965880533195476e-09, "loss": 0.6845, "step": 19316 }, { "epoch": 0.9927536231884058, "grad_norm": 1.1831855773925781, "learning_rate": 1.3769879602337776e-09, "loss": 0.649, "step": 19317 }, { "epoch": 0.9928050159317504, "grad_norm": 1.0302222967147827, "learning_rate": 1.3575263583637256e-09, "loss": 0.6804, "step": 19318 }, { "epoch": 0.9928564086750951, "grad_norm": 0.7512422800064087, "learning_rate": 1.338203248248404e-09, "loss": 0.629, "step": 19319 }, { "epoch": 0.9929078014184397, "grad_norm": 0.7811951041221619, "learning_rate": 1.3190186304229413e-09, "loss": 0.61, "step": 19320 }, { "epoch": 0.9929591941617844, "grad_norm": 1.157462239265442, "learning_rate": 1.2999725054191338e-09, "loss": 0.694, "step": 19321 }, { "epoch": 0.993010586905129, "grad_norm": 0.8165507912635803, "learning_rate": 1.2810648737648924e-09, "loss": 0.6401, "step": 19322 }, { "epoch": 0.9930619796484736, "grad_norm": 1.0097500085830688, "learning_rate": 1.2622957359836873e-09, "loss": 0.6468, "step": 19323 }, { "epoch": 0.9931133723918183, "grad_norm": 0.6846978068351746, "learning_rate": 1.2436650925956584e-09, "loss": 0.632, "step": 19324 }, { "epoch": 0.9931647651351629, "grad_norm": 0.7729439735412598, "learning_rate": 1.2251729441176142e-09, "loss": 0.5841, "step": 19325 }, { "epoch": 0.9932161578785076, "grad_norm": 1.0360852479934692, "learning_rate": 1.2068192910602573e-09, "loss": 0.6509, "step": 19326 }, { "epoch": 0.9932675506218522, "grad_norm": 0.6819604635238647, "learning_rate": 1.1886041339337352e-09, "loss": 0.6006, "step": 19327 }, { "epoch": 0.9933189433651969, "grad_norm": 1.020704746246338, "learning_rate": 1.1705274732420891e-09, "loss": 0.6806, "step": 19328 }, { "epoch": 0.9933703361085414, "grad_norm": 1.1895776987075806, "learning_rate": 1.1525893094865848e-09, "loss": 0.7034, "step": 19329 }, { "epoch": 0.9934217288518861, "grad_norm": 1.1127362251281738, "learning_rate": 1.1347896431629368e-09, "loss": 0.6764, "step": 19330 }, { "epoch": 0.9934731215952307, "grad_norm": 1.1071585416793823, "learning_rate": 1.1171284747657497e-09, "loss": 0.6842, "step": 19331 }, { "epoch": 0.9935245143385754, "grad_norm": 1.3185594081878662, "learning_rate": 1.0996058047835212e-09, "loss": 0.7139, "step": 19332 }, { "epoch": 0.99357590708192, "grad_norm": 0.7500571608543396, "learning_rate": 1.0822216337025293e-09, "loss": 0.6276, "step": 19333 }, { "epoch": 0.9936272998252647, "grad_norm": 1.1356713771820068, "learning_rate": 1.0649759620029454e-09, "loss": 0.7597, "step": 19334 }, { "epoch": 0.9936786925686093, "grad_norm": 1.1430342197418213, "learning_rate": 1.0478687901643858e-09, "loss": 0.6902, "step": 19335 }, { "epoch": 0.993730085311954, "grad_norm": 0.8056836128234863, "learning_rate": 1.0309001186592504e-09, "loss": 0.6206, "step": 19336 }, { "epoch": 0.9937814780552986, "grad_norm": 1.074803352355957, "learning_rate": 1.014069947958829e-09, "loss": 0.6539, "step": 19337 }, { "epoch": 0.9938328707986432, "grad_norm": 0.9977484941482544, "learning_rate": 9.973782785283048e-10, "loss": 0.6842, "step": 19338 }, { "epoch": 0.9938842635419879, "grad_norm": 0.7673470973968506, "learning_rate": 9.808251108311961e-10, "loss": 0.6204, "step": 19339 }, { "epoch": 0.9939356562853325, "grad_norm": 0.760940670967102, "learning_rate": 9.6441044532547e-10, "loss": 0.6269, "step": 19340 }, { "epoch": 0.9939870490286772, "grad_norm": 1.1634966135025024, "learning_rate": 9.481342824663175e-10, "loss": 0.7114, "step": 19341 }, { "epoch": 0.9940384417720218, "grad_norm": 0.7205185294151306, "learning_rate": 9.319966227044897e-10, "loss": 0.6165, "step": 19342 }, { "epoch": 0.9940898345153665, "grad_norm": 1.0723607540130615, "learning_rate": 9.159974664874061e-10, "loss": 0.6538, "step": 19343 }, { "epoch": 0.9941412272587111, "grad_norm": 1.0852857828140259, "learning_rate": 9.001368142574907e-10, "loss": 0.6644, "step": 19344 }, { "epoch": 0.9941926200020557, "grad_norm": 1.0827651023864746, "learning_rate": 8.844146664549468e-10, "loss": 0.7022, "step": 19345 }, { "epoch": 0.9942440127454003, "grad_norm": 1.10652494430542, "learning_rate": 8.688310235149822e-10, "loss": 0.673, "step": 19346 }, { "epoch": 0.994295405488745, "grad_norm": 1.0643011331558228, "learning_rate": 8.533858858700283e-10, "loss": 0.7093, "step": 19347 }, { "epoch": 0.9943467982320896, "grad_norm": 1.102324366569519, "learning_rate": 8.380792539469663e-10, "loss": 0.6672, "step": 19348 }, { "epoch": 0.9943981909754342, "grad_norm": 1.1710870265960693, "learning_rate": 8.229111281704561e-10, "loss": 0.7245, "step": 19349 }, { "epoch": 0.9944495837187789, "grad_norm": 1.0414108037948608, "learning_rate": 8.078815089607173e-10, "loss": 0.6828, "step": 19350 }, { "epoch": 0.9945009764621235, "grad_norm": 1.0861867666244507, "learning_rate": 7.929903967346387e-10, "loss": 0.6848, "step": 19351 }, { "epoch": 0.9945523692054682, "grad_norm": 0.8204453587532043, "learning_rate": 7.782377919035577e-10, "loss": 0.665, "step": 19352 }, { "epoch": 0.9946037619488128, "grad_norm": 0.7217210531234741, "learning_rate": 7.636236948771469e-10, "loss": 0.6393, "step": 19353 }, { "epoch": 0.9946551546921575, "grad_norm": 1.1099402904510498, "learning_rate": 7.491481060606376e-10, "loss": 0.741, "step": 19354 }, { "epoch": 0.9947065474355021, "grad_norm": 1.1761648654937744, "learning_rate": 7.3481102585371e-10, "loss": 0.7044, "step": 19355 }, { "epoch": 0.9947579401788468, "grad_norm": 1.095995306968689, "learning_rate": 7.206124546549342e-10, "loss": 0.7392, "step": 19356 }, { "epoch": 0.9948093329221914, "grad_norm": 1.0842957496643066, "learning_rate": 7.065523928567741e-10, "loss": 0.7186, "step": 19357 }, { "epoch": 0.9948607256655361, "grad_norm": 1.0737346410751343, "learning_rate": 6.926308408494731e-10, "loss": 0.6959, "step": 19358 }, { "epoch": 0.9949121184088807, "grad_norm": 1.1387611627578735, "learning_rate": 6.788477990188336e-10, "loss": 0.7084, "step": 19359 }, { "epoch": 0.9949635111522253, "grad_norm": 1.0786956548690796, "learning_rate": 6.652032677456622e-10, "loss": 0.7256, "step": 19360 }, { "epoch": 0.9950149038955699, "grad_norm": 1.0833635330200195, "learning_rate": 6.516972474090999e-10, "loss": 0.6744, "step": 19361 }, { "epoch": 0.9950662966389145, "grad_norm": 0.7598153948783875, "learning_rate": 6.383297383827369e-10, "loss": 0.6838, "step": 19362 }, { "epoch": 0.9951176893822592, "grad_norm": 1.0951838493347168, "learning_rate": 6.251007410373877e-10, "loss": 0.7148, "step": 19363 }, { "epoch": 0.9951690821256038, "grad_norm": 1.1390982866287231, "learning_rate": 6.120102557388707e-10, "loss": 0.6945, "step": 19364 }, { "epoch": 0.9952204748689485, "grad_norm": 1.0435545444488525, "learning_rate": 5.990582828502289e-10, "loss": 0.6466, "step": 19365 }, { "epoch": 0.9952718676122931, "grad_norm": 1.119020938873291, "learning_rate": 5.862448227306195e-10, "loss": 0.6679, "step": 19366 }, { "epoch": 0.9953232603556378, "grad_norm": 1.1077371835708618, "learning_rate": 5.735698757347586e-10, "loss": 0.6964, "step": 19367 }, { "epoch": 0.9953746530989824, "grad_norm": 1.0444180965423584, "learning_rate": 5.610334422140317e-10, "loss": 0.6235, "step": 19368 }, { "epoch": 0.9954260458423271, "grad_norm": 1.052249550819397, "learning_rate": 5.486355225153838e-10, "loss": 0.6228, "step": 19369 }, { "epoch": 0.9954774385856717, "grad_norm": 1.1058058738708496, "learning_rate": 5.36376116982984e-10, "loss": 0.6653, "step": 19370 }, { "epoch": 0.9955288313290164, "grad_norm": 1.1063816547393799, "learning_rate": 5.242552259554501e-10, "loss": 0.6784, "step": 19371 }, { "epoch": 0.995580224072361, "grad_norm": 0.6766038537025452, "learning_rate": 5.122728497691798e-10, "loss": 0.6236, "step": 19372 }, { "epoch": 0.9956316168157057, "grad_norm": 1.0495966672897339, "learning_rate": 5.004289887566849e-10, "loss": 0.6843, "step": 19373 }, { "epoch": 0.9956830095590503, "grad_norm": 1.1163429021835327, "learning_rate": 4.887236432449261e-10, "loss": 0.6562, "step": 19374 }, { "epoch": 0.9957344023023948, "grad_norm": 1.1211323738098145, "learning_rate": 4.771568135591986e-10, "loss": 0.7072, "step": 19375 }, { "epoch": 0.9957857950457395, "grad_norm": 1.0442214012145996, "learning_rate": 4.657285000198019e-10, "loss": 0.713, "step": 19376 }, { "epoch": 0.9958371877890841, "grad_norm": 1.17401921749115, "learning_rate": 4.544387029431496e-10, "loss": 0.7511, "step": 19377 }, { "epoch": 0.9958885805324288, "grad_norm": 0.7029767632484436, "learning_rate": 4.4328742264176937e-10, "loss": 0.625, "step": 19378 }, { "epoch": 0.9959399732757734, "grad_norm": 1.0480568408966064, "learning_rate": 4.322746594254135e-10, "loss": 0.6551, "step": 19379 }, { "epoch": 0.9959913660191181, "grad_norm": 1.1074014902114868, "learning_rate": 4.21400413598283e-10, "loss": 0.6887, "step": 19380 }, { "epoch": 0.9960427587624627, "grad_norm": 1.145377278327942, "learning_rate": 4.106646854623586e-10, "loss": 0.734, "step": 19381 }, { "epoch": 0.9960941515058074, "grad_norm": 1.07652747631073, "learning_rate": 4.000674753151801e-10, "loss": 0.746, "step": 19382 }, { "epoch": 0.996145544249152, "grad_norm": 1.073360800743103, "learning_rate": 3.896087834492912e-10, "loss": 0.6807, "step": 19383 }, { "epoch": 0.9961969369924967, "grad_norm": 1.1766273975372314, "learning_rate": 3.792886101555704e-10, "loss": 0.6981, "step": 19384 }, { "epoch": 0.9962483297358413, "grad_norm": 1.1354820728302002, "learning_rate": 3.6910695571934497e-10, "loss": 0.6654, "step": 19385 }, { "epoch": 0.996299722479186, "grad_norm": 1.211204171180725, "learning_rate": 3.590638204231667e-10, "loss": 0.678, "step": 19386 }, { "epoch": 0.9963511152225306, "grad_norm": 0.7538310289382935, "learning_rate": 3.491592045451464e-10, "loss": 0.6567, "step": 19387 }, { "epoch": 0.9964025079658753, "grad_norm": 0.8086171746253967, "learning_rate": 3.393931083589541e-10, "loss": 0.6838, "step": 19388 }, { "epoch": 0.9964539007092199, "grad_norm": 1.116568684577942, "learning_rate": 3.297655321365945e-10, "loss": 0.7269, "step": 19389 }, { "epoch": 0.9965052934525644, "grad_norm": 1.0952311754226685, "learning_rate": 3.2027647614341074e-10, "loss": 0.7112, "step": 19390 }, { "epoch": 0.9965566861959091, "grad_norm": 1.048426628112793, "learning_rate": 3.109259406430809e-10, "loss": 0.7166, "step": 19391 }, { "epoch": 0.9966080789392537, "grad_norm": 0.8646671772003174, "learning_rate": 3.0171392589428693e-10, "loss": 0.6076, "step": 19392 }, { "epoch": 0.9966594716825984, "grad_norm": 0.8289965391159058, "learning_rate": 2.9264043215293523e-10, "loss": 0.6482, "step": 19393 }, { "epoch": 0.996710864425943, "grad_norm": 1.1529563665390015, "learning_rate": 2.837054596693811e-10, "loss": 0.6602, "step": 19394 }, { "epoch": 0.9967622571692877, "grad_norm": 1.1179028749465942, "learning_rate": 2.7490900869231453e-10, "loss": 0.7117, "step": 19395 }, { "epoch": 0.9968136499126323, "grad_norm": 1.038568139076233, "learning_rate": 2.6625107946431917e-10, "loss": 0.619, "step": 19396 }, { "epoch": 0.996865042655977, "grad_norm": 1.0913738012313843, "learning_rate": 2.5773167222631344e-10, "loss": 0.6518, "step": 19397 }, { "epoch": 0.9969164353993216, "grad_norm": 1.0856178998947144, "learning_rate": 2.4935078721366467e-10, "loss": 0.763, "step": 19398 }, { "epoch": 0.9969678281426663, "grad_norm": 1.0060220956802368, "learning_rate": 2.4110842465840943e-10, "loss": 0.6939, "step": 19399 }, { "epoch": 0.9970192208860109, "grad_norm": 1.146921992301941, "learning_rate": 2.3300458478980883e-10, "loss": 0.7225, "step": 19400 }, { "epoch": 0.9970706136293556, "grad_norm": 1.088821530342102, "learning_rate": 2.250392678315727e-10, "loss": 0.642, "step": 19401 }, { "epoch": 0.9971220063727002, "grad_norm": 1.101847767829895, "learning_rate": 2.1721247400463553e-10, "loss": 0.7211, "step": 19402 }, { "epoch": 0.9971733991160449, "grad_norm": 1.1838881969451904, "learning_rate": 2.0952420352604587e-10, "loss": 0.6958, "step": 19403 }, { "epoch": 0.9972247918593895, "grad_norm": 1.1068248748779297, "learning_rate": 2.0197445660841141e-10, "loss": 0.6346, "step": 19404 }, { "epoch": 0.997276184602734, "grad_norm": 1.0783617496490479, "learning_rate": 1.9456323346100926e-10, "loss": 0.6496, "step": 19405 }, { "epoch": 0.9973275773460787, "grad_norm": 0.8151327967643738, "learning_rate": 1.872905342897857e-10, "loss": 0.6704, "step": 19406 }, { "epoch": 0.9973789700894233, "grad_norm": 1.2066408395767212, "learning_rate": 1.8015635929513608e-10, "loss": 0.6906, "step": 19407 }, { "epoch": 0.997430362832768, "grad_norm": 1.1067622900009155, "learning_rate": 1.7316070867579027e-10, "loss": 0.683, "step": 19408 }, { "epoch": 0.9974817555761126, "grad_norm": 1.1232110261917114, "learning_rate": 1.6630358262437195e-10, "loss": 0.6773, "step": 19409 }, { "epoch": 0.9975331483194573, "grad_norm": 1.0760105848312378, "learning_rate": 1.5958498133239465e-10, "loss": 0.6787, "step": 19410 }, { "epoch": 0.9975845410628019, "grad_norm": 1.0958530902862549, "learning_rate": 1.5300490498471044e-10, "loss": 0.7423, "step": 19411 }, { "epoch": 0.9976359338061466, "grad_norm": 1.0523499250411987, "learning_rate": 1.4656335376450614e-10, "loss": 0.7048, "step": 19412 }, { "epoch": 0.9976873265494912, "grad_norm": 1.113553524017334, "learning_rate": 1.4026032784941745e-10, "loss": 0.7031, "step": 19413 }, { "epoch": 0.9977387192928359, "grad_norm": 1.0779064893722534, "learning_rate": 1.340958274148596e-10, "loss": 0.6924, "step": 19414 }, { "epoch": 0.9977901120361805, "grad_norm": 1.0766246318817139, "learning_rate": 1.2806985263125182e-10, "loss": 0.6902, "step": 19415 }, { "epoch": 0.9978415047795252, "grad_norm": 1.086360216140747, "learning_rate": 1.2218240366512756e-10, "loss": 0.6549, "step": 19416 }, { "epoch": 0.9978928975228698, "grad_norm": 1.1849026679992676, "learning_rate": 1.1643348068024474e-10, "loss": 0.7057, "step": 19417 }, { "epoch": 0.9979442902662145, "grad_norm": 1.1411043405532837, "learning_rate": 1.1082308383592033e-10, "loss": 0.6592, "step": 19418 }, { "epoch": 0.9979956830095591, "grad_norm": 1.136001467704773, "learning_rate": 1.0535121328758557e-10, "loss": 0.6929, "step": 19419 }, { "epoch": 0.9980470757529037, "grad_norm": 1.0285239219665527, "learning_rate": 1.000178691862308e-10, "loss": 0.685, "step": 19420 }, { "epoch": 0.9980984684962483, "grad_norm": 1.1170852184295654, "learning_rate": 9.482305168007078e-11, "loss": 0.7042, "step": 19421 }, { "epoch": 0.9981498612395929, "grad_norm": 1.0366523265838623, "learning_rate": 8.976676091287939e-11, "loss": 0.7244, "step": 19422 }, { "epoch": 0.9982012539829376, "grad_norm": 1.0879096984863281, "learning_rate": 8.484899702509986e-11, "loss": 0.6623, "step": 19423 }, { "epoch": 0.9982526467262822, "grad_norm": 1.2013845443725586, "learning_rate": 8.006976015273448e-11, "loss": 0.6941, "step": 19424 }, { "epoch": 0.9983040394696269, "grad_norm": 1.1568068265914917, "learning_rate": 7.542905042789983e-11, "loss": 0.7231, "step": 19425 }, { "epoch": 0.9983554322129715, "grad_norm": 1.1374436616897583, "learning_rate": 7.092686797938176e-11, "loss": 0.7134, "step": 19426 }, { "epoch": 0.9984068249563162, "grad_norm": 0.8318619728088379, "learning_rate": 6.656321293208034e-11, "loss": 0.6832, "step": 19427 }, { "epoch": 0.9984582176996608, "grad_norm": 1.015324592590332, "learning_rate": 6.233808540700992e-11, "loss": 0.6737, "step": 19428 }, { "epoch": 0.9985096104430055, "grad_norm": 1.079624056816101, "learning_rate": 5.82514855207439e-11, "loss": 0.7477, "step": 19429 }, { "epoch": 0.9985610031863501, "grad_norm": 1.205488681793213, "learning_rate": 5.430341338708012e-11, "loss": 0.7202, "step": 19430 }, { "epoch": 0.9986123959296948, "grad_norm": 1.1007046699523926, "learning_rate": 5.0493869115375574e-11, "loss": 0.7049, "step": 19431 }, { "epoch": 0.9986637886730394, "grad_norm": 0.7101621031761169, "learning_rate": 4.6822852810546325e-11, "loss": 0.6442, "step": 19432 }, { "epoch": 0.998715181416384, "grad_norm": 1.0886563062667847, "learning_rate": 4.329036457417779e-11, "loss": 0.6984, "step": 19433 }, { "epoch": 0.9987665741597287, "grad_norm": 1.0681804418563843, "learning_rate": 3.9896404505634924e-11, "loss": 0.6496, "step": 19434 }, { "epoch": 0.9988179669030733, "grad_norm": 1.0775365829467773, "learning_rate": 3.6640972697066234e-11, "loss": 0.6964, "step": 19435 }, { "epoch": 0.9988693596464179, "grad_norm": 0.7069593071937561, "learning_rate": 3.352406924006513e-11, "loss": 0.6577, "step": 19436 }, { "epoch": 0.9989207523897625, "grad_norm": 1.030396580696106, "learning_rate": 3.0545694220118774e-11, "loss": 0.6419, "step": 19437 }, { "epoch": 0.9989721451331072, "grad_norm": 1.0528852939605713, "learning_rate": 2.7705847720493895e-11, "loss": 0.6406, "step": 19438 }, { "epoch": 0.9990235378764518, "grad_norm": 1.1139971017837524, "learning_rate": 2.5004529819461222e-11, "loss": 0.644, "step": 19439 }, { "epoch": 0.9990749306197965, "grad_norm": 1.064696192741394, "learning_rate": 2.2441740591405692e-11, "loss": 0.7158, "step": 19440 }, { "epoch": 0.9991263233631411, "grad_norm": 1.0799723863601685, "learning_rate": 2.0017480107936693e-11, "loss": 0.6679, "step": 19441 }, { "epoch": 0.9991777161064858, "grad_norm": 1.082567811012268, "learning_rate": 1.7731748436222718e-11, "loss": 0.6849, "step": 19442 }, { "epoch": 0.9992291088498304, "grad_norm": 1.1231456995010376, "learning_rate": 1.5584545639546477e-11, "loss": 0.7249, "step": 19443 }, { "epoch": 0.999280501593175, "grad_norm": 1.1078248023986816, "learning_rate": 1.3575871777304905e-11, "loss": 0.7129, "step": 19444 }, { "epoch": 0.9993318943365197, "grad_norm": 1.1178227663040161, "learning_rate": 1.1705726905009152e-11, "loss": 0.664, "step": 19445 }, { "epoch": 0.9993832870798643, "grad_norm": 1.0704572200775146, "learning_rate": 9.974111074839698e-12, "loss": 0.6856, "step": 19446 }, { "epoch": 0.999434679823209, "grad_norm": 1.1912915706634521, "learning_rate": 8.381024333981025e-12, "loss": 0.7084, "step": 19447 }, { "epoch": 0.9994860725665536, "grad_norm": 1.1131463050842285, "learning_rate": 6.9264667279522744e-12, "loss": 0.7317, "step": 19448 }, { "epoch": 0.9995374653098983, "grad_norm": 1.0881973505020142, "learning_rate": 5.6104382956112534e-12, "loss": 0.7371, "step": 19449 }, { "epoch": 0.9995888580532429, "grad_norm": 1.1469289064407349, "learning_rate": 4.432939074150433e-12, "loss": 0.7116, "step": 19450 }, { "epoch": 0.9996402507965875, "grad_norm": 1.0609543323516846, "learning_rate": 3.3939690963213923e-12, "loss": 0.7003, "step": 19451 }, { "epoch": 0.9996916435399321, "grad_norm": 1.1198300123214722, "learning_rate": 2.4935283904348183e-12, "loss": 0.6302, "step": 19452 }, { "epoch": 0.9997430362832768, "grad_norm": 0.68081134557724, "learning_rate": 1.731616982025841e-12, "loss": 0.6537, "step": 19453 }, { "epoch": 0.9997944290266214, "grad_norm": 1.081189751625061, "learning_rate": 1.1082348916335861e-12, "loss": 0.6603, "step": 19454 }, { "epoch": 0.9998458217699661, "grad_norm": 0.9850096106529236, "learning_rate": 6.233821364665105e-13, "loss": 0.6412, "step": 19455 }, { "epoch": 0.9998972145133107, "grad_norm": 1.0728039741516113, "learning_rate": 2.7705873040240196e-13, "loss": 0.6702, "step": 19456 }, { "epoch": 0.9999486072566554, "grad_norm": 0.9957613348960876, "learning_rate": 6.926468287815624e-14, "loss": 0.7068, "step": 19457 }, { "epoch": 1.0, "grad_norm": 0.7922065854072571, "learning_rate": 0.0, "loss": 0.6775, "step": 19458 }, { "epoch": 1.0, "step": 19458, "total_flos": 3.0958737148945734e+20, "train_loss": 0.7380289890248087, "train_runtime": 68709.733, "train_samples_per_second": 144.993, "train_steps_per_second": 0.283 } ], "logging_steps": 1.0, "max_steps": 19458, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.0958737148945734e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }