|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.000789265982636, |
|
"eval_steps": 500, |
|
"global_step": 1268, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007892659826361484, |
|
"grad_norm": 2.738257646560669, |
|
"learning_rate": 7.5e-07, |
|
"loss": 2.0033, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015785319652722968, |
|
"grad_norm": 1.657310128211975, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.0098, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0023677979479084454, |
|
"grad_norm": 3.086771011352539, |
|
"learning_rate": 2.25e-06, |
|
"loss": 2.0645, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0031570639305445935, |
|
"grad_norm": 3.7529759407043457, |
|
"learning_rate": 3e-06, |
|
"loss": 1.9987, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003946329913180742, |
|
"grad_norm": 2.473452091217041, |
|
"learning_rate": 3.75e-06, |
|
"loss": 2.042, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004735595895816891, |
|
"grad_norm": 2.465566873550415, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.9511, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0055248618784530384, |
|
"grad_norm": 1.4017177820205688, |
|
"learning_rate": 5.25e-06, |
|
"loss": 2.038, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006314127861089187, |
|
"grad_norm": 1.7734719514846802, |
|
"learning_rate": 6e-06, |
|
"loss": 2.1121, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007103393843725336, |
|
"grad_norm": 1.63801109790802, |
|
"learning_rate": 6.750000000000001e-06, |
|
"loss": 2.0107, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007892659826361484, |
|
"grad_norm": 1.9717129468917847, |
|
"learning_rate": 7.5e-06, |
|
"loss": 2.0291, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008681925808997633, |
|
"grad_norm": 1.5256599187850952, |
|
"learning_rate": 8.25e-06, |
|
"loss": 2.0407, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009471191791633781, |
|
"grad_norm": 1.4094102382659912, |
|
"learning_rate": 9e-06, |
|
"loss": 2.0014, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010260457774269928, |
|
"grad_norm": 2.010549306869507, |
|
"learning_rate": 9.75e-06, |
|
"loss": 2.0341, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011049723756906077, |
|
"grad_norm": 3.010610342025757, |
|
"learning_rate": 1.05e-05, |
|
"loss": 2.0388, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011838989739542225, |
|
"grad_norm": 2.2516043186187744, |
|
"learning_rate": 1.125e-05, |
|
"loss": 2.0162, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012628255722178374, |
|
"grad_norm": 3.590932607650757, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.0371, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013417521704814523, |
|
"grad_norm": 2.2385761737823486, |
|
"learning_rate": 1.275e-05, |
|
"loss": 2.1105, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014206787687450671, |
|
"grad_norm": 4.703427314758301, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 2.1296, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01499605367008682, |
|
"grad_norm": 2.485727310180664, |
|
"learning_rate": 1.4249999999999999e-05, |
|
"loss": 2.0493, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01578531965272297, |
|
"grad_norm": 3.6647562980651855, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.006, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016574585635359115, |
|
"grad_norm": 2.9405100345611572, |
|
"learning_rate": 1.575e-05, |
|
"loss": 2.045, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.017363851617995266, |
|
"grad_norm": 5.117101192474365, |
|
"learning_rate": 1.65e-05, |
|
"loss": 2.0551, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.018153117600631413, |
|
"grad_norm": 4.730511665344238, |
|
"learning_rate": 1.725e-05, |
|
"loss": 2.0584, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.018942383583267563, |
|
"grad_norm": 3.1207115650177, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.029, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01973164956590371, |
|
"grad_norm": 3.1241533756256104, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.989, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020520915548539857, |
|
"grad_norm": 3.6228599548339844, |
|
"learning_rate": 1.95e-05, |
|
"loss": 1.9951, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.021310181531176007, |
|
"grad_norm": 2.3428969383239746, |
|
"learning_rate": 2.025e-05, |
|
"loss": 2.0484, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022099447513812154, |
|
"grad_norm": 5.09832239151001, |
|
"learning_rate": 2.1e-05, |
|
"loss": 2.1136, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.022888713496448304, |
|
"grad_norm": 114.6532211303711, |
|
"learning_rate": 2.175e-05, |
|
"loss": 1.9234, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02367797947908445, |
|
"grad_norm": 4.587088108062744, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.9931, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0244672454617206, |
|
"grad_norm": 24.365489959716797, |
|
"learning_rate": 2.3250000000000003e-05, |
|
"loss": 2.1458, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.025256511444356748, |
|
"grad_norm": 4.502379417419434, |
|
"learning_rate": 2.4e-05, |
|
"loss": 2.0772, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026045777426992895, |
|
"grad_norm": 3.6276373863220215, |
|
"learning_rate": 2.475e-05, |
|
"loss": 2.0606, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.026835043409629045, |
|
"grad_norm": 5.00884485244751, |
|
"learning_rate": 2.55e-05, |
|
"loss": 2.1513, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.027624309392265192, |
|
"grad_norm": 3.615124225616455, |
|
"learning_rate": 2.625e-05, |
|
"loss": 2.0823, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.028413575374901343, |
|
"grad_norm": 2.078237771987915, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 2.0344, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02920284135753749, |
|
"grad_norm": 4.229931354522705, |
|
"learning_rate": 2.7750000000000004e-05, |
|
"loss": 2.0605, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02999210734017364, |
|
"grad_norm": 4.0947113037109375, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 2.1708, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.030781373322809787, |
|
"grad_norm": 12.784259796142578, |
|
"learning_rate": 2.925e-05, |
|
"loss": 2.2762, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03157063930544594, |
|
"grad_norm": 3.9042775630950928, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0888, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03235990528808208, |
|
"grad_norm": 4.025945663452148, |
|
"learning_rate": 2.999998809942206e-05, |
|
"loss": 2.031, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03314917127071823, |
|
"grad_norm": 4.266918659210205, |
|
"learning_rate": 2.9999952397707115e-05, |
|
"loss": 2.0641, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03393843725335438, |
|
"grad_norm": 5.787127494812012, |
|
"learning_rate": 2.9999892894911822e-05, |
|
"loss": 2.0462, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03472770323599053, |
|
"grad_norm": 8.19904613494873, |
|
"learning_rate": 2.999980959113059e-05, |
|
"loss": 2.1786, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.035516969218626675, |
|
"grad_norm": 5.060120582580566, |
|
"learning_rate": 2.999970248649561e-05, |
|
"loss": 2.1286, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.036306235201262825, |
|
"grad_norm": 3.672504425048828, |
|
"learning_rate": 2.9999571581176817e-05, |
|
"loss": 2.0609, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.037095501183898975, |
|
"grad_norm": 10.494200706481934, |
|
"learning_rate": 2.999941687538193e-05, |
|
"loss": 2.125, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.037884767166535126, |
|
"grad_norm": 10.003849029541016, |
|
"learning_rate": 2.9999238369356434e-05, |
|
"loss": 2.1308, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03867403314917127, |
|
"grad_norm": 33.990047454833984, |
|
"learning_rate": 2.9999036063383564e-05, |
|
"loss": 2.1923, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03946329913180742, |
|
"grad_norm": 4.710549831390381, |
|
"learning_rate": 2.999880995778433e-05, |
|
"loss": 2.1082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04025256511444357, |
|
"grad_norm": 7.869533061981201, |
|
"learning_rate": 2.9998560052917504e-05, |
|
"loss": 2.1733, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04104183109707971, |
|
"grad_norm": 3.3192622661590576, |
|
"learning_rate": 2.999828634917962e-05, |
|
"loss": 2.1189, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.041831097079715863, |
|
"grad_norm": 4.055788516998291, |
|
"learning_rate": 2.999798884700498e-05, |
|
"loss": 2.1299, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.042620363062352014, |
|
"grad_norm": 4.032358169555664, |
|
"learning_rate": 2.999766754686564e-05, |
|
"loss": 2.1634, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.043409629044988164, |
|
"grad_norm": 2.2070775032043457, |
|
"learning_rate": 2.9997322449271417e-05, |
|
"loss": 2.0969, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04419889502762431, |
|
"grad_norm": 3.4423580169677734, |
|
"learning_rate": 2.99969535547699e-05, |
|
"loss": 2.0635, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04498816101026046, |
|
"grad_norm": 2.4554710388183594, |
|
"learning_rate": 2.9996560863946424e-05, |
|
"loss": 2.0655, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04577742699289661, |
|
"grad_norm": 4.392270088195801, |
|
"learning_rate": 2.999614437742409e-05, |
|
"loss": 2.1193, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04656669297553275, |
|
"grad_norm": 2.630579710006714, |
|
"learning_rate": 2.999570409586376e-05, |
|
"loss": 2.0359, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0473559589581689, |
|
"grad_norm": 7.560144424438477, |
|
"learning_rate": 2.9995240019964048e-05, |
|
"loss": 2.0961, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04814522494080505, |
|
"grad_norm": 3.4002232551574707, |
|
"learning_rate": 2.9994752150461317e-05, |
|
"loss": 2.089, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0489344909234412, |
|
"grad_norm": 2.383335590362549, |
|
"learning_rate": 2.9994240488129696e-05, |
|
"loss": 2.0589, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.049723756906077346, |
|
"grad_norm": 2.5884249210357666, |
|
"learning_rate": 2.9993705033781057e-05, |
|
"loss": 2.0677, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.050513022888713496, |
|
"grad_norm": 6.4155120849609375, |
|
"learning_rate": 2.9993145788265035e-05, |
|
"loss": 2.1962, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05130228887134965, |
|
"grad_norm": 3.5735418796539307, |
|
"learning_rate": 2.9992562752469007e-05, |
|
"loss": 2.1703, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05209155485398579, |
|
"grad_norm": 2.7267649173736572, |
|
"learning_rate": 2.9991955927318098e-05, |
|
"loss": 2.084, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05288082083662194, |
|
"grad_norm": 5.19821834564209, |
|
"learning_rate": 2.9991325313775187e-05, |
|
"loss": 2.0788, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05367008681925809, |
|
"grad_norm": 4.282698631286621, |
|
"learning_rate": 2.9990670912840894e-05, |
|
"loss": 2.0828, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05445935280189424, |
|
"grad_norm": 3.5214319229125977, |
|
"learning_rate": 2.998999272555359e-05, |
|
"loss": 2.0621, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.055248618784530384, |
|
"grad_norm": 2.759268283843994, |
|
"learning_rate": 2.9989290752989383e-05, |
|
"loss": 2.0133, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.056037884767166535, |
|
"grad_norm": 3.1185007095336914, |
|
"learning_rate": 2.9988564996262122e-05, |
|
"loss": 2.055, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.056827150749802685, |
|
"grad_norm": 2.4150500297546387, |
|
"learning_rate": 2.9987815456523395e-05, |
|
"loss": 2.0358, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05761641673243883, |
|
"grad_norm": 2.4296348094940186, |
|
"learning_rate": 2.9987042134962534e-05, |
|
"loss": 2.0821, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05840568271507498, |
|
"grad_norm": 4.594061851501465, |
|
"learning_rate": 2.99862450328066e-05, |
|
"loss": 2.0983, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05919494869771113, |
|
"grad_norm": 3.1533656120300293, |
|
"learning_rate": 2.9985424151320388e-05, |
|
"loss": 2.0465, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05998421468034728, |
|
"grad_norm": 3.550783634185791, |
|
"learning_rate": 2.9984579491806428e-05, |
|
"loss": 2.2023, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06077348066298342, |
|
"grad_norm": 3.503725051879883, |
|
"learning_rate": 2.998371105560498e-05, |
|
"loss": 2.1149, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06156274664561957, |
|
"grad_norm": 2.6297385692596436, |
|
"learning_rate": 2.998281884409403e-05, |
|
"loss": 2.1017, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.062352012628255724, |
|
"grad_norm": 2.355755090713501, |
|
"learning_rate": 2.9981902858689287e-05, |
|
"loss": 2.0615, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06314127861089187, |
|
"grad_norm": 4.401884078979492, |
|
"learning_rate": 2.9980963100844184e-05, |
|
"loss": 2.0501, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06393054459352802, |
|
"grad_norm": 2.54091215133667, |
|
"learning_rate": 2.9979999572049876e-05, |
|
"loss": 2.0138, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06471981057616416, |
|
"grad_norm": 6.047756671905518, |
|
"learning_rate": 2.9979012273835237e-05, |
|
"loss": 2.082, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06550907655880031, |
|
"grad_norm": 6.175260543823242, |
|
"learning_rate": 2.9978001207766858e-05, |
|
"loss": 2.2342, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06629834254143646, |
|
"grad_norm": 2.3510050773620605, |
|
"learning_rate": 2.997696637544904e-05, |
|
"loss": 2.0092, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06708760852407261, |
|
"grad_norm": 5.738100051879883, |
|
"learning_rate": 2.9975907778523802e-05, |
|
"loss": 2.0805, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06787687450670876, |
|
"grad_norm": 3.1302692890167236, |
|
"learning_rate": 2.9974825418670854e-05, |
|
"loss": 2.08, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06866614048934491, |
|
"grad_norm": 3.3828234672546387, |
|
"learning_rate": 2.9973719297607634e-05, |
|
"loss": 2.0314, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06945540647198106, |
|
"grad_norm": 2.4826507568359375, |
|
"learning_rate": 2.9972589417089267e-05, |
|
"loss": 2.0798, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0702446724546172, |
|
"grad_norm": 1.8656960725784302, |
|
"learning_rate": 2.997143577890859e-05, |
|
"loss": 2.005, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07103393843725335, |
|
"grad_norm": 2.417743444442749, |
|
"learning_rate": 2.9970258384896127e-05, |
|
"loss": 2.0352, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0718232044198895, |
|
"grad_norm": 5.928717613220215, |
|
"learning_rate": 2.9969057236920102e-05, |
|
"loss": 2.1684, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07261247040252565, |
|
"grad_norm": 2.976854085922241, |
|
"learning_rate": 2.9967832336886425e-05, |
|
"loss": 2.0208, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0734017363851618, |
|
"grad_norm": 2.003722906112671, |
|
"learning_rate": 2.99665836867387e-05, |
|
"loss": 2.008, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07419100236779795, |
|
"grad_norm": 3.6728177070617676, |
|
"learning_rate": 2.996531128845822e-05, |
|
"loss": 2.0774, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0749802683504341, |
|
"grad_norm": 2.2868685722351074, |
|
"learning_rate": 2.996401514406395e-05, |
|
"loss": 2.0518, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07576953433307025, |
|
"grad_norm": 16.315637588500977, |
|
"learning_rate": 2.996269525561254e-05, |
|
"loss": 2.265, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07655880031570639, |
|
"grad_norm": 6.387162685394287, |
|
"learning_rate": 2.9961351625198315e-05, |
|
"loss": 2.0423, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07734806629834254, |
|
"grad_norm": 3.667564868927002, |
|
"learning_rate": 2.9959984254953276e-05, |
|
"loss": 2.0816, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07813733228097869, |
|
"grad_norm": 5.106146812438965, |
|
"learning_rate": 2.9958593147047084e-05, |
|
"loss": 2.1309, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07892659826361484, |
|
"grad_norm": 4.6215925216674805, |
|
"learning_rate": 2.9957178303687066e-05, |
|
"loss": 2.0565, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07971586424625099, |
|
"grad_norm": 3.13112735748291, |
|
"learning_rate": 2.9955739727118227e-05, |
|
"loss": 2.1061, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08050513022888714, |
|
"grad_norm": 2.501467227935791, |
|
"learning_rate": 2.995427741962321e-05, |
|
"loss": 1.9896, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08129439621152329, |
|
"grad_norm": 2.8199145793914795, |
|
"learning_rate": 2.9952791383522333e-05, |
|
"loss": 2.0923, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.08208366219415943, |
|
"grad_norm": 4.113518714904785, |
|
"learning_rate": 2.9951281621173547e-05, |
|
"loss": 2.1615, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08287292817679558, |
|
"grad_norm": 2.821820020675659, |
|
"learning_rate": 2.9949748134972454e-05, |
|
"loss": 2.0332, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08366219415943173, |
|
"grad_norm": 1.9956532716751099, |
|
"learning_rate": 2.9948190927352313e-05, |
|
"loss": 2.0673, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08445146014206788, |
|
"grad_norm": 2.073834180831909, |
|
"learning_rate": 2.9946610000784006e-05, |
|
"loss": 2.0746, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08524072612470403, |
|
"grad_norm": 2.5564310550689697, |
|
"learning_rate": 2.9945005357776064e-05, |
|
"loss": 1.9765, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08602999210734018, |
|
"grad_norm": 1.676325798034668, |
|
"learning_rate": 2.9943377000874635e-05, |
|
"loss": 1.9988, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08681925808997633, |
|
"grad_norm": 2.4475619792938232, |
|
"learning_rate": 2.9941724932663517e-05, |
|
"loss": 1.9959, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08760852407261246, |
|
"grad_norm": 5.736175060272217, |
|
"learning_rate": 2.9940049155764106e-05, |
|
"loss": 2.1744, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08839779005524862, |
|
"grad_norm": 3.406623363494873, |
|
"learning_rate": 2.9938349672835443e-05, |
|
"loss": 2.1555, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08918705603788477, |
|
"grad_norm": 2.378791332244873, |
|
"learning_rate": 2.9936626486574165e-05, |
|
"loss": 2.0615, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08997632202052092, |
|
"grad_norm": 2.432135581970215, |
|
"learning_rate": 2.9934879599714525e-05, |
|
"loss": 2.0743, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09076558800315707, |
|
"grad_norm": 2.3118343353271484, |
|
"learning_rate": 2.9933109015028388e-05, |
|
"loss": 2.0682, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09155485398579322, |
|
"grad_norm": 1.8455097675323486, |
|
"learning_rate": 2.993131473532522e-05, |
|
"loss": 2.022, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09234411996842937, |
|
"grad_norm": 1.897011637687683, |
|
"learning_rate": 2.9929496763452077e-05, |
|
"loss": 2.0156, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0931333859510655, |
|
"grad_norm": 1.9447441101074219, |
|
"learning_rate": 2.992765510229362e-05, |
|
"loss": 2.021, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09392265193370165, |
|
"grad_norm": 1.878902554512024, |
|
"learning_rate": 2.9925789754772097e-05, |
|
"loss": 2.0258, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0947119179163378, |
|
"grad_norm": 1.7902443408966064, |
|
"learning_rate": 2.9923900723847323e-05, |
|
"loss": 2.0541, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09550118389897395, |
|
"grad_norm": 2.5857603549957275, |
|
"learning_rate": 2.9921988012516717e-05, |
|
"loss": 2.0374, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0962904498816101, |
|
"grad_norm": 1.7749550342559814, |
|
"learning_rate": 2.992005162381526e-05, |
|
"loss": 2.0192, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09707971586424625, |
|
"grad_norm": 1.388109564781189, |
|
"learning_rate": 2.99180915608155e-05, |
|
"loss": 2.025, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0978689818468824, |
|
"grad_norm": 2.3288400173187256, |
|
"learning_rate": 2.9916107826627557e-05, |
|
"loss": 1.9993, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09865824782951854, |
|
"grad_norm": 1.9639314413070679, |
|
"learning_rate": 2.9914100424399115e-05, |
|
"loss": 2.0216, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09944751381215469, |
|
"grad_norm": 5.6544108390808105, |
|
"learning_rate": 2.9912069357315394e-05, |
|
"loss": 2.158, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10023677979479084, |
|
"grad_norm": 1.9935773611068726, |
|
"learning_rate": 2.9910014628599188e-05, |
|
"loss": 2.0447, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10102604577742699, |
|
"grad_norm": 1.5992683172225952, |
|
"learning_rate": 2.9907936241510822e-05, |
|
"loss": 2.084, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.10181531176006314, |
|
"grad_norm": 2.9652304649353027, |
|
"learning_rate": 2.9905834199348165e-05, |
|
"loss": 2.0843, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.1026045777426993, |
|
"grad_norm": 1.8616968393325806, |
|
"learning_rate": 2.9903708505446618e-05, |
|
"loss": 2.0871, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10339384372533544, |
|
"grad_norm": 1.7061188220977783, |
|
"learning_rate": 2.9901559163179105e-05, |
|
"loss": 2.0342, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10418310970797158, |
|
"grad_norm": 2.443295955657959, |
|
"learning_rate": 2.989938617595609e-05, |
|
"loss": 2.0476, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10497237569060773, |
|
"grad_norm": 2.450977087020874, |
|
"learning_rate": 2.989718954722555e-05, |
|
"loss": 2.0669, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10576164167324388, |
|
"grad_norm": 1.9823421239852905, |
|
"learning_rate": 2.9894969280472964e-05, |
|
"loss": 2.0512, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10655090765588003, |
|
"grad_norm": 2.8827438354492188, |
|
"learning_rate": 2.9892725379221327e-05, |
|
"loss": 2.0744, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10734017363851618, |
|
"grad_norm": 3.418274164199829, |
|
"learning_rate": 2.989045784703114e-05, |
|
"loss": 2.0862, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10812943962115233, |
|
"grad_norm": 3.35532808303833, |
|
"learning_rate": 2.9888166687500397e-05, |
|
"loss": 2.1452, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.10891870560378848, |
|
"grad_norm": 12.882590293884277, |
|
"learning_rate": 2.988585190426457e-05, |
|
"loss": 2.045, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10970797158642462, |
|
"grad_norm": 7.744283676147461, |
|
"learning_rate": 2.9883513500996636e-05, |
|
"loss": 2.0628, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11049723756906077, |
|
"grad_norm": 3.180764675140381, |
|
"learning_rate": 2.988115148140704e-05, |
|
"loss": 2.1304, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11128650355169692, |
|
"grad_norm": 2.4699342250823975, |
|
"learning_rate": 2.9878765849243697e-05, |
|
"loss": 2.0432, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11207576953433307, |
|
"grad_norm": 2.3269429206848145, |
|
"learning_rate": 2.9876356608292002e-05, |
|
"loss": 2.1071, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11286503551696922, |
|
"grad_norm": 3.243727684020996, |
|
"learning_rate": 2.9873923762374794e-05, |
|
"loss": 2.0571, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11365430149960537, |
|
"grad_norm": 1.7754957675933838, |
|
"learning_rate": 2.9871467315352386e-05, |
|
"loss": 2.0491, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11444356748224152, |
|
"grad_norm": 3.8351848125457764, |
|
"learning_rate": 2.9868987271122523e-05, |
|
"loss": 1.9965, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11523283346487766, |
|
"grad_norm": 1.8786120414733887, |
|
"learning_rate": 2.9866483633620404e-05, |
|
"loss": 2.0471, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11602209944751381, |
|
"grad_norm": 2.9970719814300537, |
|
"learning_rate": 2.9863956406818656e-05, |
|
"loss": 2.0535, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11681136543014996, |
|
"grad_norm": 2.0963001251220703, |
|
"learning_rate": 2.986140559472734e-05, |
|
"loss": 2.0334, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11760063141278611, |
|
"grad_norm": 4.035261154174805, |
|
"learning_rate": 2.9858831201393943e-05, |
|
"loss": 2.0474, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11838989739542226, |
|
"grad_norm": 2.755239963531494, |
|
"learning_rate": 2.9856233230903368e-05, |
|
"loss": 2.0366, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11917916337805841, |
|
"grad_norm": 3.6331722736358643, |
|
"learning_rate": 2.985361168737793e-05, |
|
"loss": 2.0431, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11996842936069456, |
|
"grad_norm": 2.390591859817505, |
|
"learning_rate": 2.985096657497734e-05, |
|
"loss": 2.0486, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.12075769534333071, |
|
"grad_norm": 5.158343315124512, |
|
"learning_rate": 2.9848297897898724e-05, |
|
"loss": 2.09, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12154696132596685, |
|
"grad_norm": 5.338906764984131, |
|
"learning_rate": 2.9845605660376577e-05, |
|
"loss": 2.1369, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.122336227308603, |
|
"grad_norm": 2.3366825580596924, |
|
"learning_rate": 2.98428898666828e-05, |
|
"loss": 2.0233, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12312549329123915, |
|
"grad_norm": 3.8773341178894043, |
|
"learning_rate": 2.9840150521126656e-05, |
|
"loss": 2.0668, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1239147592738753, |
|
"grad_norm": 2.9581716060638428, |
|
"learning_rate": 2.9837387628054782e-05, |
|
"loss": 2.0564, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.12470402525651145, |
|
"grad_norm": 3.0984768867492676, |
|
"learning_rate": 2.9834601191851187e-05, |
|
"loss": 2.0288, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1254932912391476, |
|
"grad_norm": 2.5730841159820557, |
|
"learning_rate": 2.9831791216937227e-05, |
|
"loss": 2.0555, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12628255722178375, |
|
"grad_norm": 1.9970016479492188, |
|
"learning_rate": 2.982895770777162e-05, |
|
"loss": 2.0139, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1270718232044199, |
|
"grad_norm": 3.020761728286743, |
|
"learning_rate": 2.9826100668850408e-05, |
|
"loss": 2.0301, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12786108918705605, |
|
"grad_norm": 2.456298828125, |
|
"learning_rate": 2.9823220104706988e-05, |
|
"loss": 2.0326, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1286503551696922, |
|
"grad_norm": 3.4278247356414795, |
|
"learning_rate": 2.982031601991207e-05, |
|
"loss": 2.0017, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.12943962115232832, |
|
"grad_norm": 3.299609899520874, |
|
"learning_rate": 2.9817388419073695e-05, |
|
"loss": 2.0219, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.13022888713496447, |
|
"grad_norm": 3.0248196125030518, |
|
"learning_rate": 2.981443730683722e-05, |
|
"loss": 2.1489, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13101815311760062, |
|
"grad_norm": 3.821983575820923, |
|
"learning_rate": 2.98114626878853e-05, |
|
"loss": 2.0333, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.13180741910023677, |
|
"grad_norm": 2.1882872581481934, |
|
"learning_rate": 2.9808464566937887e-05, |
|
"loss": 2.0813, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.13259668508287292, |
|
"grad_norm": 4.968083381652832, |
|
"learning_rate": 2.9805442948752242e-05, |
|
"loss": 2.0832, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13338595106550907, |
|
"grad_norm": 4.123311519622803, |
|
"learning_rate": 2.9802397838122895e-05, |
|
"loss": 2.0222, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.13417521704814522, |
|
"grad_norm": 2.42797589302063, |
|
"learning_rate": 2.979932923988165e-05, |
|
"loss": 2.0298, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13496448303078137, |
|
"grad_norm": 2.7780134677886963, |
|
"learning_rate": 2.979623715889759e-05, |
|
"loss": 1.9963, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.13575374901341752, |
|
"grad_norm": 1.9025119543075562, |
|
"learning_rate": 2.9793121600077058e-05, |
|
"loss": 1.9975, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.13654301499605367, |
|
"grad_norm": 1.8044270277023315, |
|
"learning_rate": 2.9789982568363643e-05, |
|
"loss": 2.0171, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.13733228097868982, |
|
"grad_norm": 2.5506300926208496, |
|
"learning_rate": 2.9786820068738186e-05, |
|
"loss": 2.0043, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13812154696132597, |
|
"grad_norm": 2.364445447921753, |
|
"learning_rate": 2.978363410621877e-05, |
|
"loss": 2.0384, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13891081294396213, |
|
"grad_norm": 3.070680618286133, |
|
"learning_rate": 2.9780424685860686e-05, |
|
"loss": 2.0334, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.13970007892659828, |
|
"grad_norm": 2.494616746902466, |
|
"learning_rate": 2.9777191812756474e-05, |
|
"loss": 2.0816, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1404893449092344, |
|
"grad_norm": 2.9060347080230713, |
|
"learning_rate": 2.9773935492035868e-05, |
|
"loss": 2.1325, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14127861089187055, |
|
"grad_norm": 3.662177801132202, |
|
"learning_rate": 2.977065572886582e-05, |
|
"loss": 2.0774, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1420678768745067, |
|
"grad_norm": 1.8172063827514648, |
|
"learning_rate": 2.976735252845047e-05, |
|
"loss": 1.9909, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 2.4916176795959473, |
|
"learning_rate": 2.9764025896031154e-05, |
|
"loss": 2.0537, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.143646408839779, |
|
"grad_norm": 1.9986275434494019, |
|
"learning_rate": 2.9760675836886383e-05, |
|
"loss": 2.055, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.14443567482241515, |
|
"grad_norm": 1.7363250255584717, |
|
"learning_rate": 2.975730235633184e-05, |
|
"loss": 1.9759, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1452249408050513, |
|
"grad_norm": 2.0071136951446533, |
|
"learning_rate": 2.9753905459720373e-05, |
|
"loss": 2.074, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.14601420678768745, |
|
"grad_norm": 1.985868215560913, |
|
"learning_rate": 2.975048515244199e-05, |
|
"loss": 1.9924, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1468034727703236, |
|
"grad_norm": 1.816689133644104, |
|
"learning_rate": 2.9747041439923848e-05, |
|
"loss": 1.9802, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.14759273875295975, |
|
"grad_norm": 1.7806591987609863, |
|
"learning_rate": 2.9743574327630223e-05, |
|
"loss": 1.9926, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1483820047355959, |
|
"grad_norm": 2.3675436973571777, |
|
"learning_rate": 2.9740083821062548e-05, |
|
"loss": 2.0365, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14917127071823205, |
|
"grad_norm": 2.0345346927642822, |
|
"learning_rate": 2.9736569925759348e-05, |
|
"loss": 2.0975, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1499605367008682, |
|
"grad_norm": 1.685162901878357, |
|
"learning_rate": 2.9733032647296285e-05, |
|
"loss": 2.036, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15074980268350435, |
|
"grad_norm": 2.479959487915039, |
|
"learning_rate": 2.9729471991286112e-05, |
|
"loss": 2.0894, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1515390686661405, |
|
"grad_norm": 1.6787084341049194, |
|
"learning_rate": 2.972588796337867e-05, |
|
"loss": 2.0565, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15232833464877663, |
|
"grad_norm": 1.9295579195022583, |
|
"learning_rate": 2.9722280569260903e-05, |
|
"loss": 2.0168, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.15311760063141278, |
|
"grad_norm": 2.2043139934539795, |
|
"learning_rate": 2.971864981465681e-05, |
|
"loss": 2.0041, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.15390686661404893, |
|
"grad_norm": 1.823622703552246, |
|
"learning_rate": 2.971499570532748e-05, |
|
"loss": 2.032, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15469613259668508, |
|
"grad_norm": 2.536930561065674, |
|
"learning_rate": 2.971131824707104e-05, |
|
"loss": 2.0314, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.15548539857932123, |
|
"grad_norm": 1.9996862411499023, |
|
"learning_rate": 2.9707617445722675e-05, |
|
"loss": 2.0447, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.15627466456195738, |
|
"grad_norm": 3.0000147819519043, |
|
"learning_rate": 2.970389330715461e-05, |
|
"loss": 2.005, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15706393054459353, |
|
"grad_norm": 2.3367671966552734, |
|
"learning_rate": 2.9700145837276104e-05, |
|
"loss": 2.0679, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15785319652722968, |
|
"grad_norm": 2.8640778064727783, |
|
"learning_rate": 2.9696375042033418e-05, |
|
"loss": 2.0307, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15864246250986583, |
|
"grad_norm": 3.0491254329681396, |
|
"learning_rate": 2.9692580927409845e-05, |
|
"loss": 2.0275, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.15943172849250198, |
|
"grad_norm": 2.3943986892700195, |
|
"learning_rate": 2.9688763499425674e-05, |
|
"loss": 2.0167, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.16022099447513813, |
|
"grad_norm": 3.376763343811035, |
|
"learning_rate": 2.9684922764138184e-05, |
|
"loss": 2.0733, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.16101026045777428, |
|
"grad_norm": 1.7241394519805908, |
|
"learning_rate": 2.9681058727641635e-05, |
|
"loss": 2.0159, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16179952644041043, |
|
"grad_norm": 2.2213003635406494, |
|
"learning_rate": 2.9677171396067268e-05, |
|
"loss": 2.0833, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16258879242304658, |
|
"grad_norm": 2.832461357116699, |
|
"learning_rate": 2.967326077558328e-05, |
|
"loss": 2.0226, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1633780584056827, |
|
"grad_norm": 1.7477656602859497, |
|
"learning_rate": 2.9669326872394816e-05, |
|
"loss": 2.0271, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.16416732438831885, |
|
"grad_norm": 2.8195369243621826, |
|
"learning_rate": 2.9665369692743982e-05, |
|
"loss": 2.0262, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.164956590370955, |
|
"grad_norm": 2.8796699047088623, |
|
"learning_rate": 2.96613892429098e-05, |
|
"loss": 2.0593, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.16574585635359115, |
|
"grad_norm": 2.3450050354003906, |
|
"learning_rate": 2.9657385529208232e-05, |
|
"loss": 2.0637, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1665351223362273, |
|
"grad_norm": 1.9920148849487305, |
|
"learning_rate": 2.9653358557992144e-05, |
|
"loss": 2.0372, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.16732438831886345, |
|
"grad_norm": 2.1870229244232178, |
|
"learning_rate": 2.96493083356513e-05, |
|
"loss": 2.0295, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1681136543014996, |
|
"grad_norm": 1.792230248451233, |
|
"learning_rate": 2.964523486861237e-05, |
|
"loss": 2.0277, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.16890292028413575, |
|
"grad_norm": 2.0910332202911377, |
|
"learning_rate": 2.964113816333891e-05, |
|
"loss": 1.9782, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.1696921862667719, |
|
"grad_norm": 1.8934677839279175, |
|
"learning_rate": 2.9637018226331324e-05, |
|
"loss": 2.02, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17048145224940806, |
|
"grad_norm": 2.7041609287261963, |
|
"learning_rate": 2.9632875064126913e-05, |
|
"loss": 1.9993, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1712707182320442, |
|
"grad_norm": 2.318772077560425, |
|
"learning_rate": 2.9628708683299803e-05, |
|
"loss": 2.0641, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.17205998421468036, |
|
"grad_norm": 2.512057304382324, |
|
"learning_rate": 2.9624519090460977e-05, |
|
"loss": 2.092, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1728492501973165, |
|
"grad_norm": 4.763547897338867, |
|
"learning_rate": 2.9620306292258244e-05, |
|
"loss": 1.9952, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.17363851617995266, |
|
"grad_norm": 1.4289495944976807, |
|
"learning_rate": 2.9616070295376236e-05, |
|
"loss": 1.9942, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17442778216258878, |
|
"grad_norm": 1.9937539100646973, |
|
"learning_rate": 2.9611811106536392e-05, |
|
"loss": 1.979, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.17521704814522493, |
|
"grad_norm": 3.0298221111297607, |
|
"learning_rate": 2.9607528732496956e-05, |
|
"loss": 2.0111, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.17600631412786108, |
|
"grad_norm": 1.8235526084899902, |
|
"learning_rate": 2.9603223180052958e-05, |
|
"loss": 2.023, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.17679558011049723, |
|
"grad_norm": 1.3632335662841797, |
|
"learning_rate": 2.9598894456036202e-05, |
|
"loss": 2.0136, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.17758484609313338, |
|
"grad_norm": 1.8402268886566162, |
|
"learning_rate": 2.959454256731527e-05, |
|
"loss": 1.9844, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.17837411207576953, |
|
"grad_norm": 1.385807991027832, |
|
"learning_rate": 2.9590167520795487e-05, |
|
"loss": 2.0579, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.17916337805840568, |
|
"grad_norm": 1.4608477354049683, |
|
"learning_rate": 2.9585769323418944e-05, |
|
"loss": 1.9635, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.17995264404104183, |
|
"grad_norm": 4.646909236907959, |
|
"learning_rate": 2.9581347982164436e-05, |
|
"loss": 2.0006, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.18074191002367798, |
|
"grad_norm": 4.385404586791992, |
|
"learning_rate": 2.9576903504047507e-05, |
|
"loss": 2.0464, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.18153117600631413, |
|
"grad_norm": 2.0274832248687744, |
|
"learning_rate": 2.9572435896120408e-05, |
|
"loss": 2.0775, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18232044198895028, |
|
"grad_norm": 2.401803493499756, |
|
"learning_rate": 2.9567945165472082e-05, |
|
"loss": 2.0186, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.18310970797158643, |
|
"grad_norm": 2.278960943222046, |
|
"learning_rate": 2.9563431319228168e-05, |
|
"loss": 2.0229, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.18389897395422258, |
|
"grad_norm": 2.016186237335205, |
|
"learning_rate": 2.955889436455099e-05, |
|
"loss": 2.0598, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.18468823993685873, |
|
"grad_norm": 5.457252025604248, |
|
"learning_rate": 2.955433430863952e-05, |
|
"loss": 2.0805, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.18547750591949486, |
|
"grad_norm": 3.5198957920074463, |
|
"learning_rate": 2.9549751158729413e-05, |
|
"loss": 2.0738, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.186266771902131, |
|
"grad_norm": 1.9037076234817505, |
|
"learning_rate": 2.954514492209294e-05, |
|
"loss": 2.0021, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.18705603788476716, |
|
"grad_norm": 3.1855623722076416, |
|
"learning_rate": 2.9540515606039027e-05, |
|
"loss": 2.0158, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1878453038674033, |
|
"grad_norm": 3.4484121799468994, |
|
"learning_rate": 2.9535863217913207e-05, |
|
"loss": 2.0114, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.18863456985003946, |
|
"grad_norm": 2.1158602237701416, |
|
"learning_rate": 2.9531187765097628e-05, |
|
"loss": 2.0541, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1894238358326756, |
|
"grad_norm": 2.0095179080963135, |
|
"learning_rate": 2.9526489255011045e-05, |
|
"loss": 2.0216, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19021310181531176, |
|
"grad_norm": 2.5604724884033203, |
|
"learning_rate": 2.9521767695108774e-05, |
|
"loss": 2.1093, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1910023677979479, |
|
"grad_norm": 2.263054609298706, |
|
"learning_rate": 2.951702309288273e-05, |
|
"loss": 2.1129, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.19179163378058406, |
|
"grad_norm": 2.0415761470794678, |
|
"learning_rate": 2.9512255455861378e-05, |
|
"loss": 2.0314, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1925808997632202, |
|
"grad_norm": 2.0080976486206055, |
|
"learning_rate": 2.950746479160974e-05, |
|
"loss": 2.0588, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.19337016574585636, |
|
"grad_norm": 2.1279237270355225, |
|
"learning_rate": 2.9502651107729368e-05, |
|
"loss": 2.0065, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1941594317284925, |
|
"grad_norm": 1.4809678792953491, |
|
"learning_rate": 2.9497814411858345e-05, |
|
"loss": 2.0148, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.19494869771112866, |
|
"grad_norm": 1.6292145252227783, |
|
"learning_rate": 2.949295471167127e-05, |
|
"loss": 2.0296, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1957379636937648, |
|
"grad_norm": 1.6641464233398438, |
|
"learning_rate": 2.948807201487924e-05, |
|
"loss": 2.0343, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.19652722967640096, |
|
"grad_norm": 1.5582904815673828, |
|
"learning_rate": 2.9483166329229848e-05, |
|
"loss": 2.0254, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.19731649565903708, |
|
"grad_norm": 1.716148853302002, |
|
"learning_rate": 2.947823766250715e-05, |
|
"loss": 2.0208, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19810576164167323, |
|
"grad_norm": 1.9706841707229614, |
|
"learning_rate": 2.947328602253169e-05, |
|
"loss": 2.0787, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.19889502762430938, |
|
"grad_norm": 1.5452086925506592, |
|
"learning_rate": 2.9468311417160437e-05, |
|
"loss": 2.0004, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.19968429360694553, |
|
"grad_norm": 2.418027400970459, |
|
"learning_rate": 2.9463313854286827e-05, |
|
"loss": 2.1183, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.20047355958958168, |
|
"grad_norm": 1.6865825653076172, |
|
"learning_rate": 2.9458293341840708e-05, |
|
"loss": 2.0038, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.20126282557221783, |
|
"grad_norm": 2.1472463607788086, |
|
"learning_rate": 2.9453249887788343e-05, |
|
"loss": 2.0005, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.20205209155485399, |
|
"grad_norm": 1.4692081212997437, |
|
"learning_rate": 2.9448183500132407e-05, |
|
"loss": 2.0358, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.20284135753749014, |
|
"grad_norm": 1.3552985191345215, |
|
"learning_rate": 2.9443094186911955e-05, |
|
"loss": 2.0313, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.20363062352012629, |
|
"grad_norm": 1.4547805786132812, |
|
"learning_rate": 2.9437981956202422e-05, |
|
"loss": 2.0203, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.20441988950276244, |
|
"grad_norm": 1.7660293579101562, |
|
"learning_rate": 2.9432846816115614e-05, |
|
"loss": 2.0385, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2052091554853986, |
|
"grad_norm": 1.439816951751709, |
|
"learning_rate": 2.942768877479967e-05, |
|
"loss": 1.9926, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.20599842146803474, |
|
"grad_norm": 1.6005187034606934, |
|
"learning_rate": 2.9422507840439095e-05, |
|
"loss": 1.973, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2067876874506709, |
|
"grad_norm": 1.9543637037277222, |
|
"learning_rate": 2.9417304021254697e-05, |
|
"loss": 2.0519, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.20757695343330704, |
|
"grad_norm": 1.4007548093795776, |
|
"learning_rate": 2.94120773255036e-05, |
|
"loss": 2.0292, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.20836621941594316, |
|
"grad_norm": 1.6729865074157715, |
|
"learning_rate": 2.9406827761479233e-05, |
|
"loss": 1.9558, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2091554853985793, |
|
"grad_norm": 1.6150462627410889, |
|
"learning_rate": 2.940155533751131e-05, |
|
"loss": 1.9898, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.20994475138121546, |
|
"grad_norm": 1.9057928323745728, |
|
"learning_rate": 2.9396260061965816e-05, |
|
"loss": 2.0723, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2107340173638516, |
|
"grad_norm": 1.8108017444610596, |
|
"learning_rate": 2.9390941943244996e-05, |
|
"loss": 2.0714, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.21152328334648776, |
|
"grad_norm": 1.887980341911316, |
|
"learning_rate": 2.9385600989787346e-05, |
|
"loss": 2.015, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2123125493291239, |
|
"grad_norm": 1.840850830078125, |
|
"learning_rate": 2.938023721006758e-05, |
|
"loss": 2.033, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.21310181531176006, |
|
"grad_norm": 2.9777164459228516, |
|
"learning_rate": 2.9374850612596652e-05, |
|
"loss": 2.0825, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2138910812943962, |
|
"grad_norm": 3.170605421066284, |
|
"learning_rate": 2.9369441205921708e-05, |
|
"loss": 2.0824, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.21468034727703236, |
|
"grad_norm": 2.142885446548462, |
|
"learning_rate": 2.936400899862609e-05, |
|
"loss": 2.0829, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2154696132596685, |
|
"grad_norm": 1.4462889432907104, |
|
"learning_rate": 2.9358553999329317e-05, |
|
"loss": 2.021, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.21625887924230466, |
|
"grad_norm": 1.5857114791870117, |
|
"learning_rate": 2.935307621668708e-05, |
|
"loss": 2.057, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2170481452249408, |
|
"grad_norm": 1.7854455709457397, |
|
"learning_rate": 2.934757565939121e-05, |
|
"loss": 2.0408, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.21783741120757696, |
|
"grad_norm": 1.98285710811615, |
|
"learning_rate": 2.9342052336169688e-05, |
|
"loss": 2.0795, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.21862667719021311, |
|
"grad_norm": 1.760247826576233, |
|
"learning_rate": 2.9336506255786605e-05, |
|
"loss": 2.0655, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.21941594317284924, |
|
"grad_norm": 1.7804441452026367, |
|
"learning_rate": 2.933093742704218e-05, |
|
"loss": 2.0511, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2202052091554854, |
|
"grad_norm": 2.0356266498565674, |
|
"learning_rate": 2.9325345858772704e-05, |
|
"loss": 2.0701, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.22099447513812154, |
|
"grad_norm": 1.364184021949768, |
|
"learning_rate": 2.9319731559850575e-05, |
|
"loss": 2.0427, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2217837411207577, |
|
"grad_norm": 2.4935107231140137, |
|
"learning_rate": 2.9314094539184238e-05, |
|
"loss": 2.0072, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.22257300710339384, |
|
"grad_norm": 1.3386174440383911, |
|
"learning_rate": 2.9308434805718202e-05, |
|
"loss": 2.0346, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.22336227308603, |
|
"grad_norm": 2.427462577819824, |
|
"learning_rate": 2.9302752368433012e-05, |
|
"loss": 2.0328, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.22415153906866614, |
|
"grad_norm": 1.5349195003509521, |
|
"learning_rate": 2.9297047236345248e-05, |
|
"loss": 2.0586, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2249408050513023, |
|
"grad_norm": 2.037698745727539, |
|
"learning_rate": 2.9291319418507487e-05, |
|
"loss": 2.0782, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22573007103393844, |
|
"grad_norm": 1.759464979171753, |
|
"learning_rate": 2.928556892400831e-05, |
|
"loss": 2.018, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2265193370165746, |
|
"grad_norm": 1.4675992727279663, |
|
"learning_rate": 2.927979576197227e-05, |
|
"loss": 2.0049, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.22730860299921074, |
|
"grad_norm": 2.1224958896636963, |
|
"learning_rate": 2.92739999415599e-05, |
|
"loss": 2.0442, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2280978689818469, |
|
"grad_norm": 1.3020617961883545, |
|
"learning_rate": 2.926818147196769e-05, |
|
"loss": 2.0149, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.22888713496448304, |
|
"grad_norm": 2.0251049995422363, |
|
"learning_rate": 2.926234036242805e-05, |
|
"loss": 2.015, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2296764009471192, |
|
"grad_norm": 1.5819907188415527, |
|
"learning_rate": 2.925647662220933e-05, |
|
"loss": 2.0674, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.23046566692975531, |
|
"grad_norm": 2.1385371685028076, |
|
"learning_rate": 2.9250590260615782e-05, |
|
"loss": 2.051, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.23125493291239146, |
|
"grad_norm": 1.6703637838363647, |
|
"learning_rate": 2.924468128698755e-05, |
|
"loss": 2.0661, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.23204419889502761, |
|
"grad_norm": 1.527028203010559, |
|
"learning_rate": 2.923874971070066e-05, |
|
"loss": 2.0273, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.23283346487766376, |
|
"grad_norm": 1.87993323802948, |
|
"learning_rate": 2.9232795541167007e-05, |
|
"loss": 2.035, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.23362273086029992, |
|
"grad_norm": 1.5972620248794556, |
|
"learning_rate": 2.922681878783433e-05, |
|
"loss": 2.052, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.23441199684293607, |
|
"grad_norm": 2.544999122619629, |
|
"learning_rate": 2.9220819460186212e-05, |
|
"loss": 2.0747, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.23520126282557222, |
|
"grad_norm": 1.865227222442627, |
|
"learning_rate": 2.9214797567742036e-05, |
|
"loss": 2.0921, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.23599052880820837, |
|
"grad_norm": 1.6884143352508545, |
|
"learning_rate": 2.9208753120057012e-05, |
|
"loss": 1.9842, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.23677979479084452, |
|
"grad_norm": 1.422402024269104, |
|
"learning_rate": 2.920268612672213e-05, |
|
"loss": 2.0283, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23756906077348067, |
|
"grad_norm": 2.1607301235198975, |
|
"learning_rate": 2.919659659736414e-05, |
|
"loss": 2.0836, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.23835832675611682, |
|
"grad_norm": 2.1439242362976074, |
|
"learning_rate": 2.919048454164558e-05, |
|
"loss": 2.0337, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.23914759273875297, |
|
"grad_norm": 1.4744690656661987, |
|
"learning_rate": 2.9184349969264713e-05, |
|
"loss": 2.1089, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.23993685872138912, |
|
"grad_norm": 1.688265323638916, |
|
"learning_rate": 2.9178192889955525e-05, |
|
"loss": 1.9577, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.24072612470402527, |
|
"grad_norm": 1.8507063388824463, |
|
"learning_rate": 2.917201331348773e-05, |
|
"loss": 2.0491, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.24151539068666142, |
|
"grad_norm": 1.7672779560089111, |
|
"learning_rate": 2.916581124966673e-05, |
|
"loss": 1.9864, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.24230465666929754, |
|
"grad_norm": 1.3851258754730225, |
|
"learning_rate": 2.915958670833361e-05, |
|
"loss": 2.0329, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2430939226519337, |
|
"grad_norm": 1.7702224254608154, |
|
"learning_rate": 2.9153339699365127e-05, |
|
"loss": 2.0583, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.24388318863456984, |
|
"grad_norm": 1.283464789390564, |
|
"learning_rate": 2.9147070232673678e-05, |
|
"loss": 2.0354, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.244672454617206, |
|
"grad_norm": 1.3770912885665894, |
|
"learning_rate": 2.9140778318207304e-05, |
|
"loss": 2.0359, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24546172059984214, |
|
"grad_norm": 1.590747594833374, |
|
"learning_rate": 2.9134463965949657e-05, |
|
"loss": 2.0104, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2462509865824783, |
|
"grad_norm": 1.3684974908828735, |
|
"learning_rate": 2.9128127185919995e-05, |
|
"loss": 1.9965, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.24704025256511444, |
|
"grad_norm": 1.4951163530349731, |
|
"learning_rate": 2.9121767988173166e-05, |
|
"loss": 2.0349, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2478295185477506, |
|
"grad_norm": 1.373107671737671, |
|
"learning_rate": 2.9115386382799594e-05, |
|
"loss": 2.0083, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.24861878453038674, |
|
"grad_norm": 7.275319576263428, |
|
"learning_rate": 2.9108982379925237e-05, |
|
"loss": 2.0614, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2494080505130229, |
|
"grad_norm": 1.5124423503875732, |
|
"learning_rate": 2.9102555989711617e-05, |
|
"loss": 2.0778, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.250197316495659, |
|
"grad_norm": 1.264237403869629, |
|
"learning_rate": 2.9096107222355764e-05, |
|
"loss": 2.0035, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2509865824782952, |
|
"grad_norm": 1.6124038696289062, |
|
"learning_rate": 2.9089636088090222e-05, |
|
"loss": 1.9842, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2517758484609313, |
|
"grad_norm": 1.5616995096206665, |
|
"learning_rate": 2.908314259718302e-05, |
|
"loss": 2.0289, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2525651144435675, |
|
"grad_norm": 1.362492322921753, |
|
"learning_rate": 2.9076626759937665e-05, |
|
"loss": 2.0416, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2533543804262036, |
|
"grad_norm": 2.4406826496124268, |
|
"learning_rate": 2.9070088586693124e-05, |
|
"loss": 2.1443, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2541436464088398, |
|
"grad_norm": 1.6452536582946777, |
|
"learning_rate": 2.9063528087823795e-05, |
|
"loss": 2.0788, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.2549329123914759, |
|
"grad_norm": 1.5950769186019897, |
|
"learning_rate": 2.9056945273739516e-05, |
|
"loss": 2.0651, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.2557221783741121, |
|
"grad_norm": 1.3139842748641968, |
|
"learning_rate": 2.9050340154885522e-05, |
|
"loss": 1.9862, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2565114443567482, |
|
"grad_norm": 1.4966543912887573, |
|
"learning_rate": 2.9043712741742446e-05, |
|
"loss": 2.0707, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2573007103393844, |
|
"grad_norm": 1.750475525856018, |
|
"learning_rate": 2.9037063044826287e-05, |
|
"loss": 2.0497, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2580899763220205, |
|
"grad_norm": 1.473083257675171, |
|
"learning_rate": 2.9030391074688422e-05, |
|
"loss": 2.0227, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.25887924230465664, |
|
"grad_norm": 1.3315284252166748, |
|
"learning_rate": 2.9023696841915547e-05, |
|
"loss": 2.0061, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2596685082872928, |
|
"grad_norm": 1.4846017360687256, |
|
"learning_rate": 2.90169803571297e-05, |
|
"loss": 1.9782, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.26045777426992894, |
|
"grad_norm": 1.4215277433395386, |
|
"learning_rate": 2.901024163098822e-05, |
|
"loss": 2.0208, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2612470402525651, |
|
"grad_norm": 1.4329077005386353, |
|
"learning_rate": 2.900348067418374e-05, |
|
"loss": 2.0733, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.26203630623520124, |
|
"grad_norm": 1.7334977388381958, |
|
"learning_rate": 2.899669749744416e-05, |
|
"loss": 2.0124, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2628255722178374, |
|
"grad_norm": 1.5247516632080078, |
|
"learning_rate": 2.8989892111532646e-05, |
|
"loss": 2.0615, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.26361483820047354, |
|
"grad_norm": 1.699466347694397, |
|
"learning_rate": 2.8983064527247603e-05, |
|
"loss": 2.0213, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2644041041831097, |
|
"grad_norm": 1.425671935081482, |
|
"learning_rate": 2.897621475542266e-05, |
|
"loss": 2.0026, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.26519337016574585, |
|
"grad_norm": 2.0017428398132324, |
|
"learning_rate": 2.8969342806926644e-05, |
|
"loss": 2.0526, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.265982636148382, |
|
"grad_norm": 1.9548540115356445, |
|
"learning_rate": 2.8962448692663578e-05, |
|
"loss": 2.0681, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.26677190213101815, |
|
"grad_norm": 1.8230706453323364, |
|
"learning_rate": 2.8955532423572654e-05, |
|
"loss": 2.0354, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2675611681136543, |
|
"grad_norm": 5.8361287117004395, |
|
"learning_rate": 2.8948594010628223e-05, |
|
"loss": 2.0588, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.26835043409629045, |
|
"grad_norm": 1.7668081521987915, |
|
"learning_rate": 2.894163346483976e-05, |
|
"loss": 2.0865, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2691397000789266, |
|
"grad_norm": 1.423452615737915, |
|
"learning_rate": 2.8934650797251875e-05, |
|
"loss": 1.9987, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.26992896606156275, |
|
"grad_norm": 1.6870286464691162, |
|
"learning_rate": 2.8927646018944264e-05, |
|
"loss": 2.0765, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.27071823204419887, |
|
"grad_norm": 1.5475367307662964, |
|
"learning_rate": 2.8920619141031722e-05, |
|
"loss": 1.9911, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.27150749802683505, |
|
"grad_norm": 1.4201185703277588, |
|
"learning_rate": 2.8913570174664104e-05, |
|
"loss": 2.0093, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.27229676400947117, |
|
"grad_norm": 1.6851767301559448, |
|
"learning_rate": 2.8906499131026313e-05, |
|
"loss": 2.019, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.27308602999210735, |
|
"grad_norm": 2.842083215713501, |
|
"learning_rate": 2.8899406021338277e-05, |
|
"loss": 2.0587, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.27387529597474347, |
|
"grad_norm": 2.324366569519043, |
|
"learning_rate": 2.8892290856854945e-05, |
|
"loss": 2.0381, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.27466456195737965, |
|
"grad_norm": 1.7157739400863647, |
|
"learning_rate": 2.8885153648866266e-05, |
|
"loss": 2.1044, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.27545382794001577, |
|
"grad_norm": 1.7006040811538696, |
|
"learning_rate": 2.887799440869715e-05, |
|
"loss": 2.0283, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.27624309392265195, |
|
"grad_norm": 1.27659010887146, |
|
"learning_rate": 2.8870813147707486e-05, |
|
"loss": 1.9939, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2770323599052881, |
|
"grad_norm": 1.2692986726760864, |
|
"learning_rate": 2.8863609877292095e-05, |
|
"loss": 2.0046, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.27782162588792425, |
|
"grad_norm": 1.8094099760055542, |
|
"learning_rate": 2.885638460888071e-05, |
|
"loss": 2.0605, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2786108918705604, |
|
"grad_norm": 1.6548364162445068, |
|
"learning_rate": 2.884913735393799e-05, |
|
"loss": 2.0143, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.27940015785319655, |
|
"grad_norm": 1.4649075269699097, |
|
"learning_rate": 2.8841868123963467e-05, |
|
"loss": 2.0126, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2801894238358327, |
|
"grad_norm": 1.3697763681411743, |
|
"learning_rate": 2.883457693049155e-05, |
|
"loss": 2.0532, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2809786898184688, |
|
"grad_norm": 1.5171440839767456, |
|
"learning_rate": 2.882726378509149e-05, |
|
"loss": 1.9809, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.281767955801105, |
|
"grad_norm": 1.9760620594024658, |
|
"learning_rate": 2.881992869936738e-05, |
|
"loss": 2.0904, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2825572217837411, |
|
"grad_norm": 1.4889613389968872, |
|
"learning_rate": 2.8812571684958112e-05, |
|
"loss": 2.0099, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2833464877663773, |
|
"grad_norm": 1.700632095336914, |
|
"learning_rate": 2.880519275353739e-05, |
|
"loss": 2.0635, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.2841357537490134, |
|
"grad_norm": 1.9366352558135986, |
|
"learning_rate": 2.8797791916813693e-05, |
|
"loss": 1.9718, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2849250197316496, |
|
"grad_norm": 1.4939085245132446, |
|
"learning_rate": 2.8790369186530234e-05, |
|
"loss": 2.0218, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 2.3025503158569336, |
|
"learning_rate": 2.8782924574465003e-05, |
|
"loss": 2.0402, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2865035516969219, |
|
"grad_norm": 1.7214744091033936, |
|
"learning_rate": 2.877545809243068e-05, |
|
"loss": 1.9928, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.287292817679558, |
|
"grad_norm": 3.420003890991211, |
|
"learning_rate": 2.8767969752274658e-05, |
|
"loss": 2.0177, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2880820836621942, |
|
"grad_norm": 2.178987741470337, |
|
"learning_rate": 2.8760459565879024e-05, |
|
"loss": 1.9814, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2888713496448303, |
|
"grad_norm": 2.3021793365478516, |
|
"learning_rate": 2.8752927545160506e-05, |
|
"loss": 2.0564, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2896606156274665, |
|
"grad_norm": 1.8379982709884644, |
|
"learning_rate": 2.8745373702070503e-05, |
|
"loss": 1.9817, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2904498816101026, |
|
"grad_norm": 2.0152549743652344, |
|
"learning_rate": 2.8737798048595014e-05, |
|
"loss": 1.9944, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.2912391475927388, |
|
"grad_norm": 2.254018783569336, |
|
"learning_rate": 2.8730200596754676e-05, |
|
"loss": 2.0644, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2920284135753749, |
|
"grad_norm": 2.5273802280426025, |
|
"learning_rate": 2.8722581358604686e-05, |
|
"loss": 1.9702, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.292817679558011, |
|
"grad_norm": 1.9010764360427856, |
|
"learning_rate": 2.871494034623483e-05, |
|
"loss": 2.0317, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2936069455406472, |
|
"grad_norm": 3.991340160369873, |
|
"learning_rate": 2.8707277571769427e-05, |
|
"loss": 2.0036, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2943962115232833, |
|
"grad_norm": 3.4760918617248535, |
|
"learning_rate": 2.8699593047367346e-05, |
|
"loss": 2.0028, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2951854775059195, |
|
"grad_norm": 2.2568345069885254, |
|
"learning_rate": 2.8691886785221945e-05, |
|
"loss": 2.0038, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2959747434885556, |
|
"grad_norm": 3.1988019943237305, |
|
"learning_rate": 2.8684158797561108e-05, |
|
"loss": 2.0642, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2967640094711918, |
|
"grad_norm": 1.9911775588989258, |
|
"learning_rate": 2.867640909664715e-05, |
|
"loss": 2.0453, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2975532754538279, |
|
"grad_norm": 2.7963943481445312, |
|
"learning_rate": 2.866863769477687e-05, |
|
"loss": 2.1297, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2983425414364641, |
|
"grad_norm": 2.355445384979248, |
|
"learning_rate": 2.8660844604281496e-05, |
|
"loss": 1.9831, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2991318074191002, |
|
"grad_norm": 1.601205587387085, |
|
"learning_rate": 2.8653029837526657e-05, |
|
"loss": 1.9842, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2999210734017364, |
|
"grad_norm": 1.3466482162475586, |
|
"learning_rate": 2.8645193406912384e-05, |
|
"loss": 2.0044, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3007103393843725, |
|
"grad_norm": 2.001037836074829, |
|
"learning_rate": 2.8637335324873094e-05, |
|
"loss": 2.0035, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3014996053670087, |
|
"grad_norm": 1.4719334840774536, |
|
"learning_rate": 2.8629455603877538e-05, |
|
"loss": 1.9992, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3022888713496448, |
|
"grad_norm": 2.8064234256744385, |
|
"learning_rate": 2.862155425642882e-05, |
|
"loss": 1.9626, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.303078137332281, |
|
"grad_norm": 2.7371597290039062, |
|
"learning_rate": 2.861363129506436e-05, |
|
"loss": 2.0108, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.30386740331491713, |
|
"grad_norm": 10.789750099182129, |
|
"learning_rate": 2.8605686732355848e-05, |
|
"loss": 2.1043, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.30465666929755325, |
|
"grad_norm": 3.1278905868530273, |
|
"learning_rate": 2.859772058090929e-05, |
|
"loss": 2.0117, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.30544593528018943, |
|
"grad_norm": 1.378149151802063, |
|
"learning_rate": 2.8589732853364914e-05, |
|
"loss": 2.0337, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.30623520126282555, |
|
"grad_norm": 2.791260004043579, |
|
"learning_rate": 2.8581723562397203e-05, |
|
"loss": 2.0247, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.30702446724546173, |
|
"grad_norm": 2.1987431049346924, |
|
"learning_rate": 2.857369272071484e-05, |
|
"loss": 2.029, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.30781373322809785, |
|
"grad_norm": 2.8841540813446045, |
|
"learning_rate": 2.8565640341060726e-05, |
|
"loss": 1.9728, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.30860299921073403, |
|
"grad_norm": 3.048370838165283, |
|
"learning_rate": 2.8557566436211915e-05, |
|
"loss": 2.0747, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.30939226519337015, |
|
"grad_norm": 1.3164457082748413, |
|
"learning_rate": 2.8549471018979622e-05, |
|
"loss": 2.0121, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.31018153117600633, |
|
"grad_norm": 2.2098472118377686, |
|
"learning_rate": 2.854135410220921e-05, |
|
"loss": 2.0156, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.31097079715864245, |
|
"grad_norm": 1.937860131263733, |
|
"learning_rate": 2.853321569878014e-05, |
|
"loss": 1.9367, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.31176006314127863, |
|
"grad_norm": 1.6437093019485474, |
|
"learning_rate": 2.8525055821605966e-05, |
|
"loss": 1.9702, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.31254932912391475, |
|
"grad_norm": 1.7104240655899048, |
|
"learning_rate": 2.8516874483634336e-05, |
|
"loss": 2.0901, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.31333859510655093, |
|
"grad_norm": 2.3748621940612793, |
|
"learning_rate": 2.850867169784693e-05, |
|
"loss": 2.0178, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.31412786108918705, |
|
"grad_norm": 1.400006651878357, |
|
"learning_rate": 2.8500447477259458e-05, |
|
"loss": 1.9689, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3149171270718232, |
|
"grad_norm": 1.2704992294311523, |
|
"learning_rate": 2.849220183492167e-05, |
|
"loss": 1.9398, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.31570639305445936, |
|
"grad_norm": 1.4304686784744263, |
|
"learning_rate": 2.8483934783917266e-05, |
|
"loss": 2.0251, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3164956590370955, |
|
"grad_norm": 2.9045751094818115, |
|
"learning_rate": 2.847564633736395e-05, |
|
"loss": 2.0429, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.31728492501973166, |
|
"grad_norm": 1.477043867111206, |
|
"learning_rate": 2.8467336508413366e-05, |
|
"loss": 1.9737, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3180741910023678, |
|
"grad_norm": 1.5823172330856323, |
|
"learning_rate": 2.845900531025107e-05, |
|
"loss": 1.9746, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.31886345698500396, |
|
"grad_norm": 2.0735702514648438, |
|
"learning_rate": 2.8450652756096544e-05, |
|
"loss": 2.0304, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3196527229676401, |
|
"grad_norm": 2.1452434062957764, |
|
"learning_rate": 2.8442278859203154e-05, |
|
"loss": 2.0198, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.32044198895027626, |
|
"grad_norm": 1.5755338668823242, |
|
"learning_rate": 2.8433883632858125e-05, |
|
"loss": 1.9722, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.3212312549329124, |
|
"grad_norm": 1.497015118598938, |
|
"learning_rate": 2.8425467090382533e-05, |
|
"loss": 2.0369, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.32202052091554856, |
|
"grad_norm": 1.4064276218414307, |
|
"learning_rate": 2.8417029245131272e-05, |
|
"loss": 1.9983, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3228097868981847, |
|
"grad_norm": 2.0976450443267822, |
|
"learning_rate": 2.8408570110493038e-05, |
|
"loss": 1.9712, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.32359905288082086, |
|
"grad_norm": 1.6904927492141724, |
|
"learning_rate": 2.8400089699890313e-05, |
|
"loss": 2.0335, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.324388318863457, |
|
"grad_norm": 1.9519057273864746, |
|
"learning_rate": 2.8391588026779334e-05, |
|
"loss": 2.0334, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.32517758484609316, |
|
"grad_norm": 2.1449975967407227, |
|
"learning_rate": 2.838306510465008e-05, |
|
"loss": 2.0053, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3259668508287293, |
|
"grad_norm": 2.0840375423431396, |
|
"learning_rate": 2.837452094702624e-05, |
|
"loss": 1.9542, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.3267561168113654, |
|
"grad_norm": 2.5114152431488037, |
|
"learning_rate": 2.8365955567465203e-05, |
|
"loss": 1.9939, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3275453827940016, |
|
"grad_norm": 2.250431537628174, |
|
"learning_rate": 2.8357368979558035e-05, |
|
"loss": 2.0081, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3283346487766377, |
|
"grad_norm": 3.218123197555542, |
|
"learning_rate": 2.8348761196929443e-05, |
|
"loss": 1.9852, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3291239147592739, |
|
"grad_norm": 4.118273735046387, |
|
"learning_rate": 2.8340132233237784e-05, |
|
"loss": 2.0008, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.32991318074191, |
|
"grad_norm": 2.0143914222717285, |
|
"learning_rate": 2.8331482102175e-05, |
|
"loss": 1.9703, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3307024467245462, |
|
"grad_norm": 2.4409642219543457, |
|
"learning_rate": 2.832281081746664e-05, |
|
"loss": 1.9678, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3314917127071823, |
|
"grad_norm": 1.8680471181869507, |
|
"learning_rate": 2.831411839287181e-05, |
|
"loss": 1.988, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3322809786898185, |
|
"grad_norm": 2.5271923542022705, |
|
"learning_rate": 2.8305404842183154e-05, |
|
"loss": 1.9601, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3330702446724546, |
|
"grad_norm": 1.9228181838989258, |
|
"learning_rate": 2.829667017922685e-05, |
|
"loss": 1.9337, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3338595106550908, |
|
"grad_norm": 2.778698444366455, |
|
"learning_rate": 2.8287914417862565e-05, |
|
"loss": 2.0357, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3346487766377269, |
|
"grad_norm": 1.8243480920791626, |
|
"learning_rate": 2.8279137571983456e-05, |
|
"loss": 1.9876, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3354380426203631, |
|
"grad_norm": 2.0867981910705566, |
|
"learning_rate": 2.827033965551612e-05, |
|
"loss": 2.0039, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3362273086029992, |
|
"grad_norm": 2.3519058227539062, |
|
"learning_rate": 2.82615206824206e-05, |
|
"loss": 1.9389, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3370165745856354, |
|
"grad_norm": 2.992839813232422, |
|
"learning_rate": 2.8252680666690346e-05, |
|
"loss": 2.0878, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3378058405682715, |
|
"grad_norm": 3.436065435409546, |
|
"learning_rate": 2.8243819622352197e-05, |
|
"loss": 2.0401, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.33859510655090763, |
|
"grad_norm": 1.8708529472351074, |
|
"learning_rate": 2.8234937563466355e-05, |
|
"loss": 2.0056, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3393843725335438, |
|
"grad_norm": 2.743220090866089, |
|
"learning_rate": 2.822603450412638e-05, |
|
"loss": 2.0001, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.34017363851617993, |
|
"grad_norm": 3.055884838104248, |
|
"learning_rate": 2.8217110458459136e-05, |
|
"loss": 2.0223, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3409629044988161, |
|
"grad_norm": 1.3831731081008911, |
|
"learning_rate": 2.8208165440624804e-05, |
|
"loss": 2.0223, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.34175217048145223, |
|
"grad_norm": 2.483407735824585, |
|
"learning_rate": 2.8199199464816834e-05, |
|
"loss": 2.0198, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3425414364640884, |
|
"grad_norm": 1.9653549194335938, |
|
"learning_rate": 2.8190212545261928e-05, |
|
"loss": 2.0058, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.34333070244672453, |
|
"grad_norm": 2.2429397106170654, |
|
"learning_rate": 2.818120469622003e-05, |
|
"loss": 1.9628, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3441199684293607, |
|
"grad_norm": 2.5655837059020996, |
|
"learning_rate": 2.8172175931984276e-05, |
|
"loss": 2.0032, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.34490923441199683, |
|
"grad_norm": 1.482216238975525, |
|
"learning_rate": 2.8163126266881012e-05, |
|
"loss": 1.9787, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.345698500394633, |
|
"grad_norm": 2.1151742935180664, |
|
"learning_rate": 2.815405571526974e-05, |
|
"loss": 1.94, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.34648776637726914, |
|
"grad_norm": 1.2237361669540405, |
|
"learning_rate": 2.8144964291543093e-05, |
|
"loss": 2.0106, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3472770323599053, |
|
"grad_norm": 2.514650821685791, |
|
"learning_rate": 2.8135852010126836e-05, |
|
"loss": 2.0033, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.34806629834254144, |
|
"grad_norm": 1.4608144760131836, |
|
"learning_rate": 2.812671888547982e-05, |
|
"loss": 1.9822, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.34885556432517756, |
|
"grad_norm": 2.3360090255737305, |
|
"learning_rate": 2.811756493209397e-05, |
|
"loss": 1.9659, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.34964483030781374, |
|
"grad_norm": 2.3090360164642334, |
|
"learning_rate": 2.8108390164494272e-05, |
|
"loss": 1.9949, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.35043409629044986, |
|
"grad_norm": 1.496638536453247, |
|
"learning_rate": 2.8099194597238727e-05, |
|
"loss": 1.992, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.35122336227308604, |
|
"grad_norm": 2.009535551071167, |
|
"learning_rate": 2.8089978244918344e-05, |
|
"loss": 2.0079, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.35201262825572216, |
|
"grad_norm": 1.2042880058288574, |
|
"learning_rate": 2.8080741122157116e-05, |
|
"loss": 2.0639, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.35280189423835834, |
|
"grad_norm": 2.214263916015625, |
|
"learning_rate": 2.8071483243611982e-05, |
|
"loss": 1.9812, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.35359116022099446, |
|
"grad_norm": 1.3645316362380981, |
|
"learning_rate": 2.8062204623972826e-05, |
|
"loss": 2.0229, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.35438042620363064, |
|
"grad_norm": 2.3980894088745117, |
|
"learning_rate": 2.8052905277962445e-05, |
|
"loss": 2.0767, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.35516969218626676, |
|
"grad_norm": 2.262523651123047, |
|
"learning_rate": 2.804358522033651e-05, |
|
"loss": 1.9719, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.35595895816890294, |
|
"grad_norm": 1.6269384622573853, |
|
"learning_rate": 2.803424446588357e-05, |
|
"loss": 1.9849, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.35674822415153906, |
|
"grad_norm": 1.989469051361084, |
|
"learning_rate": 2.8024883029425007e-05, |
|
"loss": 2.0425, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.35753749013417524, |
|
"grad_norm": 1.4726110696792603, |
|
"learning_rate": 2.8015500925815014e-05, |
|
"loss": 1.9886, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.35832675611681136, |
|
"grad_norm": 1.4802323579788208, |
|
"learning_rate": 2.8006098169940594e-05, |
|
"loss": 1.9858, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.35911602209944754, |
|
"grad_norm": 2.984384059906006, |
|
"learning_rate": 2.799667477672151e-05, |
|
"loss": 2.0638, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.35990528808208366, |
|
"grad_norm": 2.767064094543457, |
|
"learning_rate": 2.7987230761110268e-05, |
|
"loss": 2.0321, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3606945540647198, |
|
"grad_norm": 1.6278806924819946, |
|
"learning_rate": 2.7977766138092105e-05, |
|
"loss": 1.9889, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.36148382004735596, |
|
"grad_norm": 2.8049097061157227, |
|
"learning_rate": 2.796828092268495e-05, |
|
"loss": 2.0226, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3622730860299921, |
|
"grad_norm": 2.840355396270752, |
|
"learning_rate": 2.7958775129939407e-05, |
|
"loss": 1.9445, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.36306235201262826, |
|
"grad_norm": 1.337878942489624, |
|
"learning_rate": 2.7949248774938735e-05, |
|
"loss": 1.9669, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3638516179952644, |
|
"grad_norm": 2.2922017574310303, |
|
"learning_rate": 2.793970187279882e-05, |
|
"loss": 1.9675, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.36464088397790057, |
|
"grad_norm": 1.5649480819702148, |
|
"learning_rate": 2.7930134438668147e-05, |
|
"loss": 1.9923, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3654301499605367, |
|
"grad_norm": 1.7279510498046875, |
|
"learning_rate": 2.7920546487727782e-05, |
|
"loss": 1.9885, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.36621941594317287, |
|
"grad_norm": 1.4108428955078125, |
|
"learning_rate": 2.7910938035191344e-05, |
|
"loss": 2.0052, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.367008681925809, |
|
"grad_norm": 1.3832406997680664, |
|
"learning_rate": 2.790130909630499e-05, |
|
"loss": 1.9843, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.36779794790844517, |
|
"grad_norm": 1.4402744770050049, |
|
"learning_rate": 2.7891659686347372e-05, |
|
"loss": 1.9644, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3685872138910813, |
|
"grad_norm": 1.1699187755584717, |
|
"learning_rate": 2.7881989820629634e-05, |
|
"loss": 1.9496, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.36937647987371747, |
|
"grad_norm": 1.7933238744735718, |
|
"learning_rate": 2.787229951449538e-05, |
|
"loss": 1.9949, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3701657458563536, |
|
"grad_norm": 1.3118610382080078, |
|
"learning_rate": 2.7862588783320634e-05, |
|
"loss": 1.945, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3709550118389897, |
|
"grad_norm": 1.6701432466506958, |
|
"learning_rate": 2.7852857642513838e-05, |
|
"loss": 1.9523, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3717442778216259, |
|
"grad_norm": 1.4986763000488281, |
|
"learning_rate": 2.7843106107515824e-05, |
|
"loss": 1.9515, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.372533543804262, |
|
"grad_norm": 1.5882444381713867, |
|
"learning_rate": 2.783333419379978e-05, |
|
"loss": 2.0187, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3733228097868982, |
|
"grad_norm": 1.8366268873214722, |
|
"learning_rate": 2.782354191687122e-05, |
|
"loss": 1.984, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3741120757695343, |
|
"grad_norm": 1.6720727682113647, |
|
"learning_rate": 2.7813729292267987e-05, |
|
"loss": 1.9898, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3749013417521705, |
|
"grad_norm": 1.5399422645568848, |
|
"learning_rate": 2.78038963355602e-05, |
|
"loss": 1.9989, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3756906077348066, |
|
"grad_norm": 2.1057066917419434, |
|
"learning_rate": 2.7794043062350234e-05, |
|
"loss": 1.996, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3764798737174428, |
|
"grad_norm": 1.6760990619659424, |
|
"learning_rate": 2.7784169488272714e-05, |
|
"loss": 2.0258, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3772691397000789, |
|
"grad_norm": 1.8123892545700073, |
|
"learning_rate": 2.7774275628994474e-05, |
|
"loss": 1.9563, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3780584056827151, |
|
"grad_norm": 1.5573099851608276, |
|
"learning_rate": 2.776436150021453e-05, |
|
"loss": 1.9674, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3788476716653512, |
|
"grad_norm": 1.8921067714691162, |
|
"learning_rate": 2.7754427117664064e-05, |
|
"loss": 1.9627, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3796369376479874, |
|
"grad_norm": 1.409118890762329, |
|
"learning_rate": 2.7744472497106396e-05, |
|
"loss": 1.9807, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3804262036306235, |
|
"grad_norm": 1.9282482862472534, |
|
"learning_rate": 2.7734497654336955e-05, |
|
"loss": 2.0048, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3812154696132597, |
|
"grad_norm": 1.6770762205123901, |
|
"learning_rate": 2.7724502605183263e-05, |
|
"loss": 2.0104, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3820047355958958, |
|
"grad_norm": 1.8302911520004272, |
|
"learning_rate": 2.7714487365504903e-05, |
|
"loss": 1.9957, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.38279400157853194, |
|
"grad_norm": 1.6130921840667725, |
|
"learning_rate": 2.770445195119349e-05, |
|
"loss": 2.0331, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3835832675611681, |
|
"grad_norm": 1.7150806188583374, |
|
"learning_rate": 2.769439637817265e-05, |
|
"loss": 1.9678, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.38437253354380424, |
|
"grad_norm": 1.2525075674057007, |
|
"learning_rate": 2.7684320662398017e-05, |
|
"loss": 1.9609, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3851617995264404, |
|
"grad_norm": 1.7981503009796143, |
|
"learning_rate": 2.7674224819857155e-05, |
|
"loss": 1.9904, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.38595106550907654, |
|
"grad_norm": 1.3988957405090332, |
|
"learning_rate": 2.7664108866569583e-05, |
|
"loss": 2.0331, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3867403314917127, |
|
"grad_norm": 1.6103547811508179, |
|
"learning_rate": 2.7653972818586725e-05, |
|
"loss": 1.9774, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.38752959747434884, |
|
"grad_norm": 1.465564489364624, |
|
"learning_rate": 2.7643816691991896e-05, |
|
"loss": 2.0019, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.388318863456985, |
|
"grad_norm": 1.4811826944351196, |
|
"learning_rate": 2.7633640502900258e-05, |
|
"loss": 2.0019, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.38910812943962114, |
|
"grad_norm": 1.9267323017120361, |
|
"learning_rate": 2.762344426745883e-05, |
|
"loss": 1.9857, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3898973954222573, |
|
"grad_norm": 1.2511990070343018, |
|
"learning_rate": 2.7613228001846408e-05, |
|
"loss": 1.9663, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.39068666140489344, |
|
"grad_norm": 1.2942070960998535, |
|
"learning_rate": 2.7602991722273595e-05, |
|
"loss": 1.9845, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3914759273875296, |
|
"grad_norm": 1.2999018430709839, |
|
"learning_rate": 2.7592735444982745e-05, |
|
"loss": 2.0299, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.39226519337016574, |
|
"grad_norm": 1.3434914350509644, |
|
"learning_rate": 2.758245918624794e-05, |
|
"loss": 1.9909, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3930544593528019, |
|
"grad_norm": 1.5425822734832764, |
|
"learning_rate": 2.757216296237496e-05, |
|
"loss": 1.9483, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.39384372533543804, |
|
"grad_norm": 1.1178399324417114, |
|
"learning_rate": 2.7561846789701295e-05, |
|
"loss": 1.9843, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.39463299131807417, |
|
"grad_norm": 1.8598966598510742, |
|
"learning_rate": 2.755151068459605e-05, |
|
"loss": 1.9679, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39542225730071034, |
|
"grad_norm": 1.3829669952392578, |
|
"learning_rate": 2.7541154663459973e-05, |
|
"loss": 2.0293, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.39621152328334647, |
|
"grad_norm": 1.8445639610290527, |
|
"learning_rate": 2.7530778742725428e-05, |
|
"loss": 1.9737, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.39700078926598265, |
|
"grad_norm": 1.229123830795288, |
|
"learning_rate": 2.7520382938856332e-05, |
|
"loss": 2.0013, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.39779005524861877, |
|
"grad_norm": 1.8316127061843872, |
|
"learning_rate": 2.7509967268348168e-05, |
|
"loss": 1.9645, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.39857932123125495, |
|
"grad_norm": 1.3551149368286133, |
|
"learning_rate": 2.7499531747727938e-05, |
|
"loss": 1.9771, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.39936858721389107, |
|
"grad_norm": 1.8307702541351318, |
|
"learning_rate": 2.7489076393554128e-05, |
|
"loss": 2.0686, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.40015785319652725, |
|
"grad_norm": 2.3857483863830566, |
|
"learning_rate": 2.747860122241671e-05, |
|
"loss": 2.0652, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.40094711917916337, |
|
"grad_norm": 5.453534126281738, |
|
"learning_rate": 2.7468106250937104e-05, |
|
"loss": 2.0154, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.40173638516179955, |
|
"grad_norm": 4.784702301025391, |
|
"learning_rate": 2.745759149576813e-05, |
|
"loss": 2.0359, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.40252565114443567, |
|
"grad_norm": 1.8123985528945923, |
|
"learning_rate": 2.7447056973594018e-05, |
|
"loss": 1.9937, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.40331491712707185, |
|
"grad_norm": 1.9731765985488892, |
|
"learning_rate": 2.7436502701130346e-05, |
|
"loss": 2.0277, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.40410418310970797, |
|
"grad_norm": 2.117807149887085, |
|
"learning_rate": 2.742592869512405e-05, |
|
"loss": 2.0225, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4048934490923441, |
|
"grad_norm": 1.983472466468811, |
|
"learning_rate": 2.741533497235336e-05, |
|
"loss": 2.0592, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.40568271507498027, |
|
"grad_norm": 1.5282464027404785, |
|
"learning_rate": 2.74047215496278e-05, |
|
"loss": 2.0422, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.4064719810576164, |
|
"grad_norm": 1.679261326789856, |
|
"learning_rate": 2.7394088443788154e-05, |
|
"loss": 1.9958, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.40726124704025257, |
|
"grad_norm": 1.5584269762039185, |
|
"learning_rate": 2.7383435671706433e-05, |
|
"loss": 2.0147, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4080505130228887, |
|
"grad_norm": 1.3049349784851074, |
|
"learning_rate": 2.7372763250285865e-05, |
|
"loss": 2.0012, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4088397790055249, |
|
"grad_norm": 1.1305190324783325, |
|
"learning_rate": 2.7362071196460833e-05, |
|
"loss": 2.0246, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.409629044988161, |
|
"grad_norm": 1.8814990520477295, |
|
"learning_rate": 2.73513595271969e-05, |
|
"loss": 2.0355, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4104183109707972, |
|
"grad_norm": 1.9675487279891968, |
|
"learning_rate": 2.7340628259490732e-05, |
|
"loss": 2.0102, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4112075769534333, |
|
"grad_norm": 1.3380517959594727, |
|
"learning_rate": 2.7329877410370103e-05, |
|
"loss": 1.9709, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4119968429360695, |
|
"grad_norm": 1.272178053855896, |
|
"learning_rate": 2.731910699689386e-05, |
|
"loss": 2.0224, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4127861089187056, |
|
"grad_norm": 1.2764815092086792, |
|
"learning_rate": 2.730831703615188e-05, |
|
"loss": 1.9994, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.4135753749013418, |
|
"grad_norm": 2.1558752059936523, |
|
"learning_rate": 2.729750754526507e-05, |
|
"loss": 2.034, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4143646408839779, |
|
"grad_norm": 1.6258021593093872, |
|
"learning_rate": 2.7286678541385327e-05, |
|
"loss": 1.9565, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4151539068666141, |
|
"grad_norm": 1.502463698387146, |
|
"learning_rate": 2.727583004169549e-05, |
|
"loss": 1.9866, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.4159431728492502, |
|
"grad_norm": 1.258079171180725, |
|
"learning_rate": 2.7264962063409368e-05, |
|
"loss": 1.9524, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4167324388318863, |
|
"grad_norm": 1.5579389333724976, |
|
"learning_rate": 2.7254074623771643e-05, |
|
"loss": 2.034, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4175217048145225, |
|
"grad_norm": 1.9363354444503784, |
|
"learning_rate": 2.7243167740057894e-05, |
|
"loss": 1.952, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4183109707971586, |
|
"grad_norm": 2.8109583854675293, |
|
"learning_rate": 2.723224142957455e-05, |
|
"loss": 1.9794, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4191002367797948, |
|
"grad_norm": 1.8886324167251587, |
|
"learning_rate": 2.7221295709658873e-05, |
|
"loss": 2.0423, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4198895027624309, |
|
"grad_norm": 1.2472280263900757, |
|
"learning_rate": 2.721033059767891e-05, |
|
"loss": 2.0076, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4206787687450671, |
|
"grad_norm": 2.1861677169799805, |
|
"learning_rate": 2.719934611103348e-05, |
|
"loss": 2.0189, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4214680347277032, |
|
"grad_norm": 1.2035226821899414, |
|
"learning_rate": 2.7188342267152155e-05, |
|
"loss": 1.9594, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4222573007103394, |
|
"grad_norm": 2.6411736011505127, |
|
"learning_rate": 2.7177319083495212e-05, |
|
"loss": 1.9578, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4230465666929755, |
|
"grad_norm": 63.83803939819336, |
|
"learning_rate": 2.7166276577553612e-05, |
|
"loss": 2.1051, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4238358326756117, |
|
"grad_norm": 1.9330120086669922, |
|
"learning_rate": 2.7155214766848996e-05, |
|
"loss": 1.9559, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4246250986582478, |
|
"grad_norm": 1.8951784372329712, |
|
"learning_rate": 2.714413366893361e-05, |
|
"loss": 1.9691, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.425414364640884, |
|
"grad_norm": 1.6028685569763184, |
|
"learning_rate": 2.7133033301390328e-05, |
|
"loss": 1.9484, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4262036306235201, |
|
"grad_norm": 2.4249136447906494, |
|
"learning_rate": 2.712191368183258e-05, |
|
"loss": 2.1158, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.42699289660615625, |
|
"grad_norm": 3.1744580268859863, |
|
"learning_rate": 2.711077482790435e-05, |
|
"loss": 1.9602, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4277821625887924, |
|
"grad_norm": 1.779587745666504, |
|
"learning_rate": 2.7099616757280158e-05, |
|
"loss": 2.037, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 2.5144872665405273, |
|
"learning_rate": 2.708843948766499e-05, |
|
"loss": 2.0066, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4293606945540647, |
|
"grad_norm": 3.0390021800994873, |
|
"learning_rate": 2.707724303679431e-05, |
|
"loss": 2.0017, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.43014996053670085, |
|
"grad_norm": 2.0000808238983154, |
|
"learning_rate": 2.706602742243402e-05, |
|
"loss": 1.9526, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.430939226519337, |
|
"grad_norm": 2.5513486862182617, |
|
"learning_rate": 2.7054792662380427e-05, |
|
"loss": 1.9689, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.43172849250197315, |
|
"grad_norm": 2.5275909900665283, |
|
"learning_rate": 2.704353877446021e-05, |
|
"loss": 1.9719, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.4325177584846093, |
|
"grad_norm": 1.781296968460083, |
|
"learning_rate": 2.7032265776530414e-05, |
|
"loss": 1.9811, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.43330702446724545, |
|
"grad_norm": 1.563276767730713, |
|
"learning_rate": 2.7020973686478388e-05, |
|
"loss": 2.0099, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4340962904498816, |
|
"grad_norm": 2.2312963008880615, |
|
"learning_rate": 2.700966252222179e-05, |
|
"loss": 2.0063, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.43488555643251775, |
|
"grad_norm": 2.1669869422912598, |
|
"learning_rate": 2.699833230170854e-05, |
|
"loss": 1.9857, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.43567482241515393, |
|
"grad_norm": 2.051197052001953, |
|
"learning_rate": 2.6986983042916792e-05, |
|
"loss": 2.0003, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.43646408839779005, |
|
"grad_norm": 1.9057836532592773, |
|
"learning_rate": 2.697561476385491e-05, |
|
"loss": 2.0375, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.43725335438042623, |
|
"grad_norm": 1.7578641176223755, |
|
"learning_rate": 2.6964227482561442e-05, |
|
"loss": 2.0319, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.43804262036306235, |
|
"grad_norm": 2.087156295776367, |
|
"learning_rate": 2.6952821217105086e-05, |
|
"loss": 1.9948, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4388318863456985, |
|
"grad_norm": 1.399621844291687, |
|
"learning_rate": 2.6941395985584656e-05, |
|
"loss": 1.9759, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.43962115232833465, |
|
"grad_norm": 1.4898017644882202, |
|
"learning_rate": 2.6929951806129076e-05, |
|
"loss": 1.9606, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4404104183109708, |
|
"grad_norm": 1.5503097772598267, |
|
"learning_rate": 2.6918488696897317e-05, |
|
"loss": 1.9867, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.44119968429360695, |
|
"grad_norm": 1.4455583095550537, |
|
"learning_rate": 2.69070066760784e-05, |
|
"loss": 1.9556, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4419889502762431, |
|
"grad_norm": 1.4497873783111572, |
|
"learning_rate": 2.689550576189135e-05, |
|
"loss": 1.9932, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.44277821625887925, |
|
"grad_norm": 1.2433416843414307, |
|
"learning_rate": 2.688398597258517e-05, |
|
"loss": 1.9857, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4435674822415154, |
|
"grad_norm": 1.5041720867156982, |
|
"learning_rate": 2.6872447326438813e-05, |
|
"loss": 1.9872, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.44435674822415155, |
|
"grad_norm": 1.2008202075958252, |
|
"learning_rate": 2.6860889841761152e-05, |
|
"loss": 2.0249, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4451460142067877, |
|
"grad_norm": 1.420989751815796, |
|
"learning_rate": 2.6849313536890956e-05, |
|
"loss": 1.9807, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.44593528018942385, |
|
"grad_norm": 1.4576897621154785, |
|
"learning_rate": 2.6837718430196848e-05, |
|
"loss": 1.9909, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.44672454617206, |
|
"grad_norm": 1.538425087928772, |
|
"learning_rate": 2.68261045400773e-05, |
|
"loss": 1.9566, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.44751381215469616, |
|
"grad_norm": 1.6612803936004639, |
|
"learning_rate": 2.681447188496057e-05, |
|
"loss": 1.9703, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4483030781373323, |
|
"grad_norm": 1.5408464670181274, |
|
"learning_rate": 2.6802820483304713e-05, |
|
"loss": 1.9929, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.44909234411996846, |
|
"grad_norm": 1.468129277229309, |
|
"learning_rate": 2.6791150353597507e-05, |
|
"loss": 1.9623, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4498816101026046, |
|
"grad_norm": 1.5219614505767822, |
|
"learning_rate": 2.6779461514356454e-05, |
|
"loss": 2.0299, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4506708760852407, |
|
"grad_norm": 1.2168009281158447, |
|
"learning_rate": 2.6767753984128756e-05, |
|
"loss": 1.9743, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4514601420678769, |
|
"grad_norm": 1.8264933824539185, |
|
"learning_rate": 2.6756027781491262e-05, |
|
"loss": 2.0069, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.452249408050513, |
|
"grad_norm": 1.409223198890686, |
|
"learning_rate": 2.6744282925050443e-05, |
|
"loss": 2.0129, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4530386740331492, |
|
"grad_norm": 2.189690113067627, |
|
"learning_rate": 2.6732519433442386e-05, |
|
"loss": 1.9749, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4538279400157853, |
|
"grad_norm": 1.3944450616836548, |
|
"learning_rate": 2.672073732533273e-05, |
|
"loss": 2.039, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4546172059984215, |
|
"grad_norm": 2.574265718460083, |
|
"learning_rate": 2.670893661941666e-05, |
|
"loss": 2.0016, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4554064719810576, |
|
"grad_norm": 2.220921039581299, |
|
"learning_rate": 2.669711733441888e-05, |
|
"loss": 2.0456, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4561957379636938, |
|
"grad_norm": 1.492487907409668, |
|
"learning_rate": 2.668527948909356e-05, |
|
"loss": 1.9773, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4569850039463299, |
|
"grad_norm": 2.086151123046875, |
|
"learning_rate": 2.667342310222433e-05, |
|
"loss": 2.0039, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4577742699289661, |
|
"grad_norm": 1.9060444831848145, |
|
"learning_rate": 2.6661548192624234e-05, |
|
"loss": 1.9759, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4585635359116022, |
|
"grad_norm": 15.723176956176758, |
|
"learning_rate": 2.6649654779135715e-05, |
|
"loss": 2.0215, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4593528018942384, |
|
"grad_norm": 3.334221124649048, |
|
"learning_rate": 2.663774288063057e-05, |
|
"loss": 2.0489, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4601420678768745, |
|
"grad_norm": 2.814065933227539, |
|
"learning_rate": 2.6625812516009925e-05, |
|
"loss": 2.0048, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.46093133385951063, |
|
"grad_norm": 3.551992177963257, |
|
"learning_rate": 2.6613863704204217e-05, |
|
"loss": 2.0969, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4617205998421468, |
|
"grad_norm": 3.252483367919922, |
|
"learning_rate": 2.660189646417315e-05, |
|
"loss": 2.0006, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.46250986582478293, |
|
"grad_norm": 3.306950092315674, |
|
"learning_rate": 2.658991081490566e-05, |
|
"loss": 2.0138, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4632991318074191, |
|
"grad_norm": 1.512024998664856, |
|
"learning_rate": 2.65779067754199e-05, |
|
"loss": 1.9825, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.46408839779005523, |
|
"grad_norm": 1.600062608718872, |
|
"learning_rate": 2.6565884364763214e-05, |
|
"loss": 2.013, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4648776637726914, |
|
"grad_norm": 1.3948335647583008, |
|
"learning_rate": 2.655384360201208e-05, |
|
"loss": 1.9622, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.46566692975532753, |
|
"grad_norm": 1.922743558883667, |
|
"learning_rate": 2.6541784506272108e-05, |
|
"loss": 1.9972, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4664561957379637, |
|
"grad_norm": 1.3690232038497925, |
|
"learning_rate": 2.652970709667798e-05, |
|
"loss": 1.9871, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.46724546172059983, |
|
"grad_norm": 1.273729681968689, |
|
"learning_rate": 2.6517611392393463e-05, |
|
"loss": 1.9675, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.468034727703236, |
|
"grad_norm": 1.565245509147644, |
|
"learning_rate": 2.6505497412611335e-05, |
|
"loss": 1.9785, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.46882399368587213, |
|
"grad_norm": 1.3492815494537354, |
|
"learning_rate": 2.6493365176553376e-05, |
|
"loss": 1.9781, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4696132596685083, |
|
"grad_norm": 1.7030278444290161, |
|
"learning_rate": 2.648121470347034e-05, |
|
"loss": 1.9991, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.47040252565114443, |
|
"grad_norm": 1.3920458555221558, |
|
"learning_rate": 2.646904601264191e-05, |
|
"loss": 1.9711, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4711917916337806, |
|
"grad_norm": 1.3776328563690186, |
|
"learning_rate": 2.6456859123376676e-05, |
|
"loss": 1.9506, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.47198105761641673, |
|
"grad_norm": 1.3518524169921875, |
|
"learning_rate": 2.644465405501212e-05, |
|
"loss": 1.9457, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.47277032359905286, |
|
"grad_norm": 1.43320631980896, |
|
"learning_rate": 2.643243082691455e-05, |
|
"loss": 1.9906, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.47355958958168903, |
|
"grad_norm": 1.2658172845840454, |
|
"learning_rate": 2.6420189458479095e-05, |
|
"loss": 1.9749, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47434885556432516, |
|
"grad_norm": 1.543461799621582, |
|
"learning_rate": 2.6407929969129668e-05, |
|
"loss": 1.968, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.47513812154696133, |
|
"grad_norm": 1.238538384437561, |
|
"learning_rate": 2.6395652378318945e-05, |
|
"loss": 1.97, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.47592738752959746, |
|
"grad_norm": 1.790334701538086, |
|
"learning_rate": 2.6383356705528306e-05, |
|
"loss": 2.0287, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.47671665351223363, |
|
"grad_norm": 1.5136432647705078, |
|
"learning_rate": 2.6371042970267846e-05, |
|
"loss": 1.9737, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.47750591949486976, |
|
"grad_norm": 1.5719919204711914, |
|
"learning_rate": 2.6358711192076294e-05, |
|
"loss": 2.0797, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.47829518547750594, |
|
"grad_norm": 1.8503254652023315, |
|
"learning_rate": 2.6346361390521026e-05, |
|
"loss": 1.9932, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.47908445146014206, |
|
"grad_norm": 1.1544816493988037, |
|
"learning_rate": 2.6333993585198014e-05, |
|
"loss": 1.9213, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.47987371744277824, |
|
"grad_norm": 1.2639306783676147, |
|
"learning_rate": 2.63216077957318e-05, |
|
"loss": 1.9033, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.48066298342541436, |
|
"grad_norm": 1.110762357711792, |
|
"learning_rate": 2.6309204041775444e-05, |
|
"loss": 1.9829, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.48145224940805054, |
|
"grad_norm": 1.6563924551010132, |
|
"learning_rate": 2.629678234301054e-05, |
|
"loss": 2.078, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.48224151539068666, |
|
"grad_norm": 1.3371410369873047, |
|
"learning_rate": 2.6284342719147134e-05, |
|
"loss": 2.024, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.48303078137332284, |
|
"grad_norm": 1.093579649925232, |
|
"learning_rate": 2.6271885189923714e-05, |
|
"loss": 1.9387, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.48382004735595896, |
|
"grad_norm": 1.2362289428710938, |
|
"learning_rate": 2.6259409775107198e-05, |
|
"loss": 2.0065, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4846093133385951, |
|
"grad_norm": 1.3334242105484009, |
|
"learning_rate": 2.6246916494492866e-05, |
|
"loss": 2.003, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.48539857932123126, |
|
"grad_norm": 1.247543454170227, |
|
"learning_rate": 2.6234405367904354e-05, |
|
"loss": 1.9803, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4861878453038674, |
|
"grad_norm": 1.462421178817749, |
|
"learning_rate": 2.622187641519361e-05, |
|
"loss": 1.9861, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.48697711128650356, |
|
"grad_norm": 1.3130522966384888, |
|
"learning_rate": 2.6209329656240883e-05, |
|
"loss": 1.9314, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4877663772691397, |
|
"grad_norm": 1.2004978656768799, |
|
"learning_rate": 2.619676511095465e-05, |
|
"loss": 2.0066, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.48855564325177586, |
|
"grad_norm": 1.11477530002594, |
|
"learning_rate": 2.618418279927163e-05, |
|
"loss": 1.9476, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.489344909234412, |
|
"grad_norm": 1.0705735683441162, |
|
"learning_rate": 2.617158274115673e-05, |
|
"loss": 1.9561, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.49013417521704816, |
|
"grad_norm": 1.1721019744873047, |
|
"learning_rate": 2.6158964956603008e-05, |
|
"loss": 1.9738, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4909234411996843, |
|
"grad_norm": 1.3675673007965088, |
|
"learning_rate": 2.6146329465631657e-05, |
|
"loss": 1.9825, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.49171270718232046, |
|
"grad_norm": 1.2252568006515503, |
|
"learning_rate": 2.6133676288291964e-05, |
|
"loss": 1.9561, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4925019731649566, |
|
"grad_norm": 1.335405707359314, |
|
"learning_rate": 2.6121005444661275e-05, |
|
"loss": 2.0124, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.49329123914759276, |
|
"grad_norm": 1.20765221118927, |
|
"learning_rate": 2.610831695484498e-05, |
|
"loss": 2.0189, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4940805051302289, |
|
"grad_norm": 1.2233660221099854, |
|
"learning_rate": 2.6095610838976453e-05, |
|
"loss": 1.9865, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.494869771112865, |
|
"grad_norm": 1.3525431156158447, |
|
"learning_rate": 2.608288711721704e-05, |
|
"loss": 1.9831, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4956590370955012, |
|
"grad_norm": 1.565442681312561, |
|
"learning_rate": 2.6070145809756036e-05, |
|
"loss": 2.0341, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.4964483030781373, |
|
"grad_norm": 1.7925246953964233, |
|
"learning_rate": 2.6057386936810627e-05, |
|
"loss": 2.0274, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4972375690607735, |
|
"grad_norm": 2.0463998317718506, |
|
"learning_rate": 2.6044610518625875e-05, |
|
"loss": 2.1091, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4980268350434096, |
|
"grad_norm": 1.641654133796692, |
|
"learning_rate": 2.603181657547468e-05, |
|
"loss": 2.0029, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.4988161010260458, |
|
"grad_norm": 1.214727759361267, |
|
"learning_rate": 2.601900512765775e-05, |
|
"loss": 1.9857, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.4996053670086819, |
|
"grad_norm": 1.7539560794830322, |
|
"learning_rate": 2.6006176195503576e-05, |
|
"loss": 1.9751, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.500394632991318, |
|
"grad_norm": 1.4065206050872803, |
|
"learning_rate": 2.5993329799368377e-05, |
|
"loss": 1.9431, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5011838989739542, |
|
"grad_norm": 1.6131680011749268, |
|
"learning_rate": 2.598046595963609e-05, |
|
"loss": 1.9809, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5019731649565904, |
|
"grad_norm": 1.5111154317855835, |
|
"learning_rate": 2.5967584696718346e-05, |
|
"loss": 1.9888, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5027624309392266, |
|
"grad_norm": 1.5211158990859985, |
|
"learning_rate": 2.5954686031054402e-05, |
|
"loss": 1.9922, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5035516969218626, |
|
"grad_norm": 3.4300498962402344, |
|
"learning_rate": 2.5941769983111126e-05, |
|
"loss": 1.9395, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5043409629044988, |
|
"grad_norm": 1.8587327003479004, |
|
"learning_rate": 2.5928836573382982e-05, |
|
"loss": 2.012, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.505130228887135, |
|
"grad_norm": 3.757934808731079, |
|
"learning_rate": 2.591588582239198e-05, |
|
"loss": 2.038, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5059194948697711, |
|
"grad_norm": 1.6540387868881226, |
|
"learning_rate": 2.5902917750687637e-05, |
|
"loss": 1.948, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5067087608524072, |
|
"grad_norm": 1.5322803258895874, |
|
"learning_rate": 2.5889932378846963e-05, |
|
"loss": 1.9642, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5074980268350434, |
|
"grad_norm": 1.324696660041809, |
|
"learning_rate": 2.5876929727474415e-05, |
|
"loss": 1.961, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5082872928176796, |
|
"grad_norm": 1.3825550079345703, |
|
"learning_rate": 2.586390981720187e-05, |
|
"loss": 2.0007, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5090765588003157, |
|
"grad_norm": 1.4527854919433594, |
|
"learning_rate": 2.5850872668688585e-05, |
|
"loss": 2.0226, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5098658247829518, |
|
"grad_norm": 1.7354285717010498, |
|
"learning_rate": 2.5837818302621177e-05, |
|
"loss": 2.0154, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.510655090765588, |
|
"grad_norm": 1.1257487535476685, |
|
"learning_rate": 2.5824746739713574e-05, |
|
"loss": 1.951, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5114443567482242, |
|
"grad_norm": 1.931472897529602, |
|
"learning_rate": 2.5811658000707002e-05, |
|
"loss": 1.9932, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5122336227308603, |
|
"grad_norm": 1.536661148071289, |
|
"learning_rate": 2.579855210636994e-05, |
|
"loss": 2.0213, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5130228887134964, |
|
"grad_norm": 1.3281984329223633, |
|
"learning_rate": 2.578542907749807e-05, |
|
"loss": 2.0358, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5138121546961326, |
|
"grad_norm": 1.228049874305725, |
|
"learning_rate": 2.5772288934914287e-05, |
|
"loss": 1.9683, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5146014206787688, |
|
"grad_norm": 1.1916272640228271, |
|
"learning_rate": 2.5759131699468624e-05, |
|
"loss": 1.9604, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5153906866614049, |
|
"grad_norm": 1.534188985824585, |
|
"learning_rate": 2.5745957392038252e-05, |
|
"loss": 1.9949, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.516179952644041, |
|
"grad_norm": 1.3599672317504883, |
|
"learning_rate": 2.5732766033527403e-05, |
|
"loss": 2.0305, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5169692186266772, |
|
"grad_norm": 1.546063780784607, |
|
"learning_rate": 2.5719557644867395e-05, |
|
"loss": 1.9581, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5177584846093133, |
|
"grad_norm": 1.4883053302764893, |
|
"learning_rate": 2.570633224701655e-05, |
|
"loss": 1.9556, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5185477505919495, |
|
"grad_norm": 1.2917836904525757, |
|
"learning_rate": 2.569308986096019e-05, |
|
"loss": 1.973, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5193370165745856, |
|
"grad_norm": 1.249537467956543, |
|
"learning_rate": 2.5679830507710586e-05, |
|
"loss": 2.0075, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5201262825572218, |
|
"grad_norm": 1.1445908546447754, |
|
"learning_rate": 2.5666554208306933e-05, |
|
"loss": 1.9672, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5209155485398579, |
|
"grad_norm": 1.1478815078735352, |
|
"learning_rate": 2.565326098381532e-05, |
|
"loss": 1.9705, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5217048145224941, |
|
"grad_norm": 1.1105977296829224, |
|
"learning_rate": 2.5639950855328678e-05, |
|
"loss": 1.9644, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5224940805051302, |
|
"grad_norm": 1.1826235055923462, |
|
"learning_rate": 2.562662384396678e-05, |
|
"loss": 1.925, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5232833464877664, |
|
"grad_norm": 1.4168332815170288, |
|
"learning_rate": 2.561327997087617e-05, |
|
"loss": 1.9569, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5240726124704025, |
|
"grad_norm": 1.261547327041626, |
|
"learning_rate": 2.5599919257230158e-05, |
|
"loss": 2.0187, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5248618784530387, |
|
"grad_norm": 1.1711071729660034, |
|
"learning_rate": 2.5586541724228774e-05, |
|
"loss": 1.9609, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5256511444356748, |
|
"grad_norm": 1.1934360265731812, |
|
"learning_rate": 2.5573147393098734e-05, |
|
"loss": 1.955, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.526440410418311, |
|
"grad_norm": 1.6360708475112915, |
|
"learning_rate": 2.5559736285093408e-05, |
|
"loss": 1.975, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5272296764009471, |
|
"grad_norm": 1.4871546030044556, |
|
"learning_rate": 2.5546308421492785e-05, |
|
"loss": 2.01, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5280189423835833, |
|
"grad_norm": 1.4333875179290771, |
|
"learning_rate": 2.5532863823603446e-05, |
|
"loss": 1.9847, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5288082083662194, |
|
"grad_norm": 1.2392849922180176, |
|
"learning_rate": 2.5519402512758524e-05, |
|
"loss": 1.9323, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5295974743488555, |
|
"grad_norm": 1.5046405792236328, |
|
"learning_rate": 2.550592451031767e-05, |
|
"loss": 1.9297, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5303867403314917, |
|
"grad_norm": 1.231242299079895, |
|
"learning_rate": 2.549242983766702e-05, |
|
"loss": 1.9706, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5311760063141279, |
|
"grad_norm": 1.2985905408859253, |
|
"learning_rate": 2.547891851621915e-05, |
|
"loss": 1.9668, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.531965272296764, |
|
"grad_norm": 1.5686601400375366, |
|
"learning_rate": 2.5465390567413078e-05, |
|
"loss": 1.9397, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5327545382794001, |
|
"grad_norm": 2.6767473220825195, |
|
"learning_rate": 2.5451846012714186e-05, |
|
"loss": 1.9895, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5335438042620363, |
|
"grad_norm": 1.4398545026779175, |
|
"learning_rate": 2.543828487361421e-05, |
|
"loss": 2.0005, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5343330702446725, |
|
"grad_norm": 1.6226674318313599, |
|
"learning_rate": 2.5424707171631206e-05, |
|
"loss": 1.9832, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5351223362273086, |
|
"grad_norm": 1.2014738321304321, |
|
"learning_rate": 2.541111292830951e-05, |
|
"loss": 2.0015, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5359116022099447, |
|
"grad_norm": 1.2680522203445435, |
|
"learning_rate": 2.5397502165219696e-05, |
|
"loss": 1.9777, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5367008681925809, |
|
"grad_norm": 1.573599100112915, |
|
"learning_rate": 2.5383874903958557e-05, |
|
"loss": 1.9867, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5374901341752171, |
|
"grad_norm": 1.1822348833084106, |
|
"learning_rate": 2.537023116614907e-05, |
|
"loss": 1.9619, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5382794001578532, |
|
"grad_norm": 1.0724636316299438, |
|
"learning_rate": 2.5356570973440348e-05, |
|
"loss": 1.9544, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5390686661404893, |
|
"grad_norm": 1.2558928728103638, |
|
"learning_rate": 2.5342894347507614e-05, |
|
"loss": 1.9375, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5398579321231255, |
|
"grad_norm": 64.77701568603516, |
|
"learning_rate": 2.5329201310052162e-05, |
|
"loss": 2.4849, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5406471981057617, |
|
"grad_norm": 1.963006615638733, |
|
"learning_rate": 2.531549188280135e-05, |
|
"loss": 1.9229, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5414364640883977, |
|
"grad_norm": 1.3791204690933228, |
|
"learning_rate": 2.5301766087508515e-05, |
|
"loss": 1.97, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5422257300710339, |
|
"grad_norm": 1.607014775276184, |
|
"learning_rate": 2.5288023945952974e-05, |
|
"loss": 1.9664, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5430149960536701, |
|
"grad_norm": 1.2975430488586426, |
|
"learning_rate": 2.527426547993999e-05, |
|
"loss": 1.98, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5438042620363063, |
|
"grad_norm": 1.4744203090667725, |
|
"learning_rate": 2.5260490711300724e-05, |
|
"loss": 2.021, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5445935280189423, |
|
"grad_norm": 1.405918836593628, |
|
"learning_rate": 2.5246699661892193e-05, |
|
"loss": 1.9456, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5453827940015785, |
|
"grad_norm": 1.3272600173950195, |
|
"learning_rate": 2.5232892353597273e-05, |
|
"loss": 1.9648, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5461720599842147, |
|
"grad_norm": 1.346197247505188, |
|
"learning_rate": 2.5219068808324612e-05, |
|
"loss": 1.9528, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5469613259668509, |
|
"grad_norm": 1.2085739374160767, |
|
"learning_rate": 2.5205229048008635e-05, |
|
"loss": 1.9736, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5477505919494869, |
|
"grad_norm": 1.3460921049118042, |
|
"learning_rate": 2.5191373094609505e-05, |
|
"loss": 1.9744, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5485398579321231, |
|
"grad_norm": 1.324518084526062, |
|
"learning_rate": 2.517750097011306e-05, |
|
"loss": 1.9659, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5493291239147593, |
|
"grad_norm": 1.448301911354065, |
|
"learning_rate": 2.5163612696530805e-05, |
|
"loss": 1.9793, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5501183898973955, |
|
"grad_norm": 1.9733283519744873, |
|
"learning_rate": 2.5149708295899873e-05, |
|
"loss": 2.0423, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5509076558800315, |
|
"grad_norm": 1.3468515872955322, |
|
"learning_rate": 2.5135787790282992e-05, |
|
"loss": 1.9799, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5516969218626677, |
|
"grad_norm": 2.0482213497161865, |
|
"learning_rate": 2.5121851201768425e-05, |
|
"loss": 2.0022, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5524861878453039, |
|
"grad_norm": 1.2711023092269897, |
|
"learning_rate": 2.5107898552469974e-05, |
|
"loss": 1.9457, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.55327545382794, |
|
"grad_norm": 1.7975988388061523, |
|
"learning_rate": 2.5093929864526915e-05, |
|
"loss": 2.0417, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5540647198105761, |
|
"grad_norm": 1.7747515439987183, |
|
"learning_rate": 2.507994516010398e-05, |
|
"loss": 1.9492, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5548539857932123, |
|
"grad_norm": 1.7052737474441528, |
|
"learning_rate": 2.5065944461391305e-05, |
|
"loss": 1.9589, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5556432517758485, |
|
"grad_norm": 2.9147486686706543, |
|
"learning_rate": 2.5051927790604412e-05, |
|
"loss": 2.0123, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5564325177584846, |
|
"grad_norm": 1.5917088985443115, |
|
"learning_rate": 2.5037895169984174e-05, |
|
"loss": 1.9582, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5572217837411207, |
|
"grad_norm": 1.5023882389068604, |
|
"learning_rate": 2.502384662179675e-05, |
|
"loss": 1.9572, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5580110497237569, |
|
"grad_norm": 1.551519513130188, |
|
"learning_rate": 2.500978216833359e-05, |
|
"loss": 1.9601, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5588003157063931, |
|
"grad_norm": 1.5883748531341553, |
|
"learning_rate": 2.4995701831911388e-05, |
|
"loss": 2.0028, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5595895816890292, |
|
"grad_norm": 2.114210605621338, |
|
"learning_rate": 2.4981605634872013e-05, |
|
"loss": 1.9387, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5603788476716653, |
|
"grad_norm": 1.8715765476226807, |
|
"learning_rate": 2.496749359958253e-05, |
|
"loss": 2.0478, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5611681136543015, |
|
"grad_norm": 1.5651837587356567, |
|
"learning_rate": 2.495336574843512e-05, |
|
"loss": 1.9964, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5619573796369376, |
|
"grad_norm": 1.7899484634399414, |
|
"learning_rate": 2.4939222103847056e-05, |
|
"loss": 1.9748, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5627466456195738, |
|
"grad_norm": 1.461633324623108, |
|
"learning_rate": 2.4925062688260683e-05, |
|
"loss": 1.9682, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.56353591160221, |
|
"grad_norm": 2.1834540367126465, |
|
"learning_rate": 2.4910887524143364e-05, |
|
"loss": 1.9991, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5643251775848461, |
|
"grad_norm": 1.5680309534072876, |
|
"learning_rate": 2.4896696633987448e-05, |
|
"loss": 1.9627, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5651144435674822, |
|
"grad_norm": 1.121997356414795, |
|
"learning_rate": 2.4882490040310244e-05, |
|
"loss": 1.9605, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5659037095501184, |
|
"grad_norm": 1.4992661476135254, |
|
"learning_rate": 2.4868267765653976e-05, |
|
"loss": 1.9297, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5666929755327546, |
|
"grad_norm": 1.1993900537490845, |
|
"learning_rate": 2.485402983258575e-05, |
|
"loss": 1.9219, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5674822415153907, |
|
"grad_norm": 2.7265751361846924, |
|
"learning_rate": 2.4839776263697514e-05, |
|
"loss": 1.9954, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5682715074980268, |
|
"grad_norm": 2.0000016689300537, |
|
"learning_rate": 2.482550708160603e-05, |
|
"loss": 1.9929, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.569060773480663, |
|
"grad_norm": 1.3767764568328857, |
|
"learning_rate": 2.4811222308952836e-05, |
|
"loss": 2.0236, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5698500394632992, |
|
"grad_norm": 2.6863558292388916, |
|
"learning_rate": 2.4796921968404204e-05, |
|
"loss": 2.0154, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5706393054459353, |
|
"grad_norm": 1.9830318689346313, |
|
"learning_rate": 2.4782606082651102e-05, |
|
"loss": 1.9738, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.2655317783355713, |
|
"learning_rate": 2.4768274674409178e-05, |
|
"loss": 1.9938, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5722178374112076, |
|
"grad_norm": 1.693976879119873, |
|
"learning_rate": 2.4753927766418708e-05, |
|
"loss": 2.0487, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5730071033938438, |
|
"grad_norm": 1.6992710828781128, |
|
"learning_rate": 2.4739565381444554e-05, |
|
"loss": 2.0051, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5737963693764798, |
|
"grad_norm": 1.0462723970413208, |
|
"learning_rate": 2.4725187542276144e-05, |
|
"loss": 1.9466, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.574585635359116, |
|
"grad_norm": 1.8844130039215088, |
|
"learning_rate": 2.4710794271727415e-05, |
|
"loss": 1.9626, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5753749013417522, |
|
"grad_norm": 1.1489008665084839, |
|
"learning_rate": 2.469638559263681e-05, |
|
"loss": 1.9644, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5761641673243884, |
|
"grad_norm": 1.4816726446151733, |
|
"learning_rate": 2.46819615278672e-05, |
|
"loss": 1.9771, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5769534333070244, |
|
"grad_norm": 1.660891056060791, |
|
"learning_rate": 2.4667522100305886e-05, |
|
"loss": 1.9484, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5777426992896606, |
|
"grad_norm": 1.0999447107315063, |
|
"learning_rate": 2.4653067332864537e-05, |
|
"loss": 1.9751, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5785319652722968, |
|
"grad_norm": 1.3727487325668335, |
|
"learning_rate": 2.4638597248479165e-05, |
|
"loss": 1.8958, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.579321231254933, |
|
"grad_norm": 1.0751103162765503, |
|
"learning_rate": 2.462411187011009e-05, |
|
"loss": 1.933, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.580110497237569, |
|
"grad_norm": 1.3762593269348145, |
|
"learning_rate": 2.4609611220741884e-05, |
|
"loss": 1.9646, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5808997632202052, |
|
"grad_norm": 1.5363467931747437, |
|
"learning_rate": 2.459509532338337e-05, |
|
"loss": 1.9666, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5816890292028414, |
|
"grad_norm": 1.1099787950515747, |
|
"learning_rate": 2.4580564201067557e-05, |
|
"loss": 1.9915, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5824782951854776, |
|
"grad_norm": 1.2934099435806274, |
|
"learning_rate": 2.4566017876851605e-05, |
|
"loss": 1.9321, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5832675611681136, |
|
"grad_norm": 2.3133106231689453, |
|
"learning_rate": 2.4551456373816815e-05, |
|
"loss": 1.9448, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5840568271507498, |
|
"grad_norm": 1.6514102220535278, |
|
"learning_rate": 2.4536879715068546e-05, |
|
"loss": 2.0637, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.584846093133386, |
|
"grad_norm": 1.7410539388656616, |
|
"learning_rate": 2.452228792373623e-05, |
|
"loss": 1.9651, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.585635359116022, |
|
"grad_norm": 1.1819558143615723, |
|
"learning_rate": 2.45076810229733e-05, |
|
"loss": 1.9527, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5864246250986582, |
|
"grad_norm": 1.6729192733764648, |
|
"learning_rate": 2.4493059035957164e-05, |
|
"loss": 2.0146, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5872138910812944, |
|
"grad_norm": 1.0809391736984253, |
|
"learning_rate": 2.4478421985889162e-05, |
|
"loss": 1.9934, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5880031570639306, |
|
"grad_norm": 1.5663741827011108, |
|
"learning_rate": 2.4463769895994545e-05, |
|
"loss": 1.8878, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5887924230465666, |
|
"grad_norm": 1.1649593114852905, |
|
"learning_rate": 2.4449102789522427e-05, |
|
"loss": 1.9876, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5895816890292028, |
|
"grad_norm": 1.8039592504501343, |
|
"learning_rate": 2.443442068974574e-05, |
|
"loss": 1.9676, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.590370955011839, |
|
"grad_norm": 1.341096043586731, |
|
"learning_rate": 2.441972361996122e-05, |
|
"loss": 1.9812, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5911602209944752, |
|
"grad_norm": 1.758004903793335, |
|
"learning_rate": 2.4405011603489353e-05, |
|
"loss": 1.9761, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5919494869771112, |
|
"grad_norm": 1.4644911289215088, |
|
"learning_rate": 2.439028466367433e-05, |
|
"loss": 1.9381, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5927387529597474, |
|
"grad_norm": 1.7480524778366089, |
|
"learning_rate": 2.4375542823884044e-05, |
|
"loss": 1.9949, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5935280189423836, |
|
"grad_norm": 2.019632577896118, |
|
"learning_rate": 2.4360786107510003e-05, |
|
"loss": 1.9843, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5943172849250198, |
|
"grad_norm": 1.6516532897949219, |
|
"learning_rate": 2.434601453796734e-05, |
|
"loss": 1.9856, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5951065509076559, |
|
"grad_norm": 1.9035701751708984, |
|
"learning_rate": 2.433122813869475e-05, |
|
"loss": 1.9736, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.595895816890292, |
|
"grad_norm": 1.3820042610168457, |
|
"learning_rate": 2.4316426933154457e-05, |
|
"loss": 1.9113, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5966850828729282, |
|
"grad_norm": 32.268516540527344, |
|
"learning_rate": 2.430161094483218e-05, |
|
"loss": 2.0811, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5974743488555643, |
|
"grad_norm": 1.958722472190857, |
|
"learning_rate": 2.4286780197237098e-05, |
|
"loss": 1.9285, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5982636148382005, |
|
"grad_norm": 1.1933417320251465, |
|
"learning_rate": 2.427193471390181e-05, |
|
"loss": 1.9512, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5990528808208366, |
|
"grad_norm": 3.102102756500244, |
|
"learning_rate": 2.425707451838229e-05, |
|
"loss": 1.9804, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5998421468034728, |
|
"grad_norm": 1.4609273672103882, |
|
"learning_rate": 2.4242199634257865e-05, |
|
"loss": 1.9406, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6006314127861089, |
|
"grad_norm": 1.467287302017212, |
|
"learning_rate": 2.4227310085131156e-05, |
|
"loss": 1.9203, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.601420678768745, |
|
"grad_norm": 1.8098467588424683, |
|
"learning_rate": 2.4212405894628063e-05, |
|
"loss": 1.9826, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6022099447513812, |
|
"grad_norm": 1.479901671409607, |
|
"learning_rate": 2.4197487086397724e-05, |
|
"loss": 2.0035, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6029992107340174, |
|
"grad_norm": 1.6941622495651245, |
|
"learning_rate": 2.4182553684112454e-05, |
|
"loss": 1.9921, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6037884767166535, |
|
"grad_norm": 1.103091835975647, |
|
"learning_rate": 2.416760571146774e-05, |
|
"loss": 1.9229, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6045777426992897, |
|
"grad_norm": 1.2702820301055908, |
|
"learning_rate": 2.4152643192182188e-05, |
|
"loss": 1.9755, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6053670086819258, |
|
"grad_norm": 1.2228517532348633, |
|
"learning_rate": 2.4137666149997478e-05, |
|
"loss": 1.9695, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.606156274664562, |
|
"grad_norm": 1.2769513130187988, |
|
"learning_rate": 2.4122674608678334e-05, |
|
"loss": 1.9965, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6069455406471981, |
|
"grad_norm": 1.25937020778656, |
|
"learning_rate": 2.4107668592012498e-05, |
|
"loss": 1.9219, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6077348066298343, |
|
"grad_norm": 1.0595743656158447, |
|
"learning_rate": 2.409264812381067e-05, |
|
"loss": 1.988, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6085240726124704, |
|
"grad_norm": 1.3666781187057495, |
|
"learning_rate": 2.407761322790648e-05, |
|
"loss": 1.9581, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6093133385951065, |
|
"grad_norm": 1.167434573173523, |
|
"learning_rate": 2.4062563928156455e-05, |
|
"loss": 1.9685, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6101026045777427, |
|
"grad_norm": 1.627456784248352, |
|
"learning_rate": 2.404750024843998e-05, |
|
"loss": 1.9174, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6108918705603789, |
|
"grad_norm": 1.2988723516464233, |
|
"learning_rate": 2.4032422212659257e-05, |
|
"loss": 1.971, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.611681136543015, |
|
"grad_norm": 1.6529747247695923, |
|
"learning_rate": 2.4017329844739257e-05, |
|
"loss": 1.9778, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6124704025256511, |
|
"grad_norm": 1.8538501262664795, |
|
"learning_rate": 2.4002223168627707e-05, |
|
"loss": 1.9594, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6132596685082873, |
|
"grad_norm": 1.302612066268921, |
|
"learning_rate": 2.398710220829503e-05, |
|
"loss": 1.9616, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6140489344909235, |
|
"grad_norm": 1.9736380577087402, |
|
"learning_rate": 2.3971966987734306e-05, |
|
"loss": 1.9303, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6148382004735596, |
|
"grad_norm": 1.284517765045166, |
|
"learning_rate": 2.3956817530961262e-05, |
|
"loss": 1.9755, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6156274664561957, |
|
"grad_norm": 1.2649601697921753, |
|
"learning_rate": 2.3941653862014195e-05, |
|
"loss": 1.9127, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6164167324388319, |
|
"grad_norm": 1.1102463006973267, |
|
"learning_rate": 2.392647600495397e-05, |
|
"loss": 1.9198, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6172059984214681, |
|
"grad_norm": 1.0401090383529663, |
|
"learning_rate": 2.3911283983863953e-05, |
|
"loss": 1.9438, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6179952644041041, |
|
"grad_norm": 1.1567270755767822, |
|
"learning_rate": 2.3896077822849984e-05, |
|
"loss": 1.9812, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6187845303867403, |
|
"grad_norm": 1.1251745223999023, |
|
"learning_rate": 2.3880857546040355e-05, |
|
"loss": 1.9033, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6195737963693765, |
|
"grad_norm": 1.1831730604171753, |
|
"learning_rate": 2.386562317758573e-05, |
|
"loss": 1.9404, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6203630623520127, |
|
"grad_norm": 4.819545269012451, |
|
"learning_rate": 2.3850374741659155e-05, |
|
"loss": 1.9528, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6211523283346487, |
|
"grad_norm": 1.254148244857788, |
|
"learning_rate": 2.3835112262455992e-05, |
|
"loss": 1.9311, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6219415943172849, |
|
"grad_norm": 3.181715250015259, |
|
"learning_rate": 2.3819835764193875e-05, |
|
"loss": 2.0101, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6227308602999211, |
|
"grad_norm": 1.117749810218811, |
|
"learning_rate": 2.38045452711127e-05, |
|
"loss": 1.9697, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6235201262825573, |
|
"grad_norm": 1.4008678197860718, |
|
"learning_rate": 2.3789240807474566e-05, |
|
"loss": 1.9163, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6243093922651933, |
|
"grad_norm": 1.0686087608337402, |
|
"learning_rate": 2.3773922397563723e-05, |
|
"loss": 1.9319, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6250986582478295, |
|
"grad_norm": 1.0253478288650513, |
|
"learning_rate": 2.3758590065686567e-05, |
|
"loss": 1.9838, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6258879242304657, |
|
"grad_norm": 1.427983045578003, |
|
"learning_rate": 2.3743243836171577e-05, |
|
"loss": 1.9409, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6266771902131019, |
|
"grad_norm": 1.0589593648910522, |
|
"learning_rate": 2.3727883733369295e-05, |
|
"loss": 1.9825, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6274664561957379, |
|
"grad_norm": 2.2683448791503906, |
|
"learning_rate": 2.3712509781652258e-05, |
|
"loss": 2.007, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6282557221783741, |
|
"grad_norm": 1.4350131750106812, |
|
"learning_rate": 2.3697122005414987e-05, |
|
"loss": 1.969, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6290449881610103, |
|
"grad_norm": 1.2323224544525146, |
|
"learning_rate": 2.3681720429073947e-05, |
|
"loss": 1.9373, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6298342541436464, |
|
"grad_norm": 0.9967265129089355, |
|
"learning_rate": 2.3666305077067487e-05, |
|
"loss": 1.9147, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6306235201262825, |
|
"grad_norm": 1.2962182760238647, |
|
"learning_rate": 2.3650875973855825e-05, |
|
"loss": 1.9588, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6314127861089187, |
|
"grad_norm": 1.6102293729782104, |
|
"learning_rate": 2.3635433143920985e-05, |
|
"loss": 2.0478, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6322020520915549, |
|
"grad_norm": 1.213399052619934, |
|
"learning_rate": 2.3619976611766793e-05, |
|
"loss": 1.9225, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.632991318074191, |
|
"grad_norm": 1.4079943895339966, |
|
"learning_rate": 2.360450640191879e-05, |
|
"loss": 1.9589, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6337805840568271, |
|
"grad_norm": 1.3271418809890747, |
|
"learning_rate": 2.3589022538924246e-05, |
|
"loss": 2.0268, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6345698500394633, |
|
"grad_norm": 1.1346262693405151, |
|
"learning_rate": 2.3573525047352078e-05, |
|
"loss": 1.9397, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6353591160220995, |
|
"grad_norm": 1.3805230855941772, |
|
"learning_rate": 2.3558013951792836e-05, |
|
"loss": 1.9674, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6361483820047356, |
|
"grad_norm": 1.2992908954620361, |
|
"learning_rate": 2.3542489276858655e-05, |
|
"loss": 1.9824, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6369376479873717, |
|
"grad_norm": 1.2167001962661743, |
|
"learning_rate": 2.3526951047183208e-05, |
|
"loss": 1.9646, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6377269139700079, |
|
"grad_norm": 1.2849534749984741, |
|
"learning_rate": 2.3511399287421688e-05, |
|
"loss": 1.9261, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6385161799526441, |
|
"grad_norm": 1.2633917331695557, |
|
"learning_rate": 2.3495834022250752e-05, |
|
"loss": 1.9408, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.6393054459352802, |
|
"grad_norm": 1.0912851095199585, |
|
"learning_rate": 2.3480255276368493e-05, |
|
"loss": 1.9653, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6400947119179163, |
|
"grad_norm": 1.1520500183105469, |
|
"learning_rate": 2.346466307449438e-05, |
|
"loss": 1.9434, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6408839779005525, |
|
"grad_norm": 1.21257483959198, |
|
"learning_rate": 2.3449057441369243e-05, |
|
"loss": 2.0161, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6416732438831886, |
|
"grad_norm": 1.5208064317703247, |
|
"learning_rate": 2.3433438401755224e-05, |
|
"loss": 1.915, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6424625098658248, |
|
"grad_norm": 1.200332760810852, |
|
"learning_rate": 2.341780598043574e-05, |
|
"loss": 1.9753, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6432517758484609, |
|
"grad_norm": 1.5574405193328857, |
|
"learning_rate": 2.3402160202215426e-05, |
|
"loss": 1.9294, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6440410418310971, |
|
"grad_norm": 1.2709983587265015, |
|
"learning_rate": 2.3386501091920134e-05, |
|
"loss": 1.9696, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6448303078137332, |
|
"grad_norm": 1.9312561750411987, |
|
"learning_rate": 2.3370828674396855e-05, |
|
"loss": 2.034, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6456195737963694, |
|
"grad_norm": 1.6993128061294556, |
|
"learning_rate": 2.3355142974513694e-05, |
|
"loss": 1.9364, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6464088397790055, |
|
"grad_norm": 1.7830700874328613, |
|
"learning_rate": 2.3339444017159847e-05, |
|
"loss": 1.9691, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6471981057616417, |
|
"grad_norm": 1.1403906345367432, |
|
"learning_rate": 2.3323731827245526e-05, |
|
"loss": 1.952, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6479873717442778, |
|
"grad_norm": 1.5226829051971436, |
|
"learning_rate": 2.3308006429701956e-05, |
|
"loss": 1.9802, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.648776637726914, |
|
"grad_norm": 1.1285881996154785, |
|
"learning_rate": 2.3292267849481313e-05, |
|
"loss": 1.9184, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6495659037095501, |
|
"grad_norm": 1.2501252889633179, |
|
"learning_rate": 2.327651611155669e-05, |
|
"loss": 1.8888, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6503551696921863, |
|
"grad_norm": 1.576758623123169, |
|
"learning_rate": 2.3260751240922054e-05, |
|
"loss": 1.9909, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6511444356748224, |
|
"grad_norm": 1.2548635005950928, |
|
"learning_rate": 2.324497326259222e-05, |
|
"loss": 1.9063, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6519337016574586, |
|
"grad_norm": 1.1014522314071655, |
|
"learning_rate": 2.322918220160279e-05, |
|
"loss": 1.9507, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6527229676400947, |
|
"grad_norm": 1.3672009706497192, |
|
"learning_rate": 2.321337808301014e-05, |
|
"loss": 1.9708, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6535122336227308, |
|
"grad_norm": 1.2893126010894775, |
|
"learning_rate": 2.3197560931891347e-05, |
|
"loss": 1.951, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.654301499605367, |
|
"grad_norm": 1.176769733428955, |
|
"learning_rate": 2.3181730773344182e-05, |
|
"loss": 1.9732, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6550907655880032, |
|
"grad_norm": 1.6181094646453857, |
|
"learning_rate": 2.3165887632487046e-05, |
|
"loss": 1.8668, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6558800315706393, |
|
"grad_norm": 1.608091950416565, |
|
"learning_rate": 2.3150031534458947e-05, |
|
"loss": 1.9172, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6566692975532754, |
|
"grad_norm": 1.127637267112732, |
|
"learning_rate": 2.313416250441945e-05, |
|
"loss": 1.9291, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6574585635359116, |
|
"grad_norm": 1.0388165712356567, |
|
"learning_rate": 2.3118280567548633e-05, |
|
"loss": 1.9392, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6582478295185478, |
|
"grad_norm": 1.3837047815322876, |
|
"learning_rate": 2.3102385749047058e-05, |
|
"loss": 1.9586, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.659037095501184, |
|
"grad_norm": 1.0598629713058472, |
|
"learning_rate": 2.3086478074135742e-05, |
|
"loss": 1.9177, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.65982636148382, |
|
"grad_norm": 1.442895770072937, |
|
"learning_rate": 2.307055756805607e-05, |
|
"loss": 1.9223, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6606156274664562, |
|
"grad_norm": 1.247636079788208, |
|
"learning_rate": 2.3054624256069824e-05, |
|
"loss": 1.9878, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6614048934490924, |
|
"grad_norm": 1.8692328929901123, |
|
"learning_rate": 2.303867816345907e-05, |
|
"loss": 1.9418, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6621941594317285, |
|
"grad_norm": 1.4947094917297363, |
|
"learning_rate": 2.3022719315526184e-05, |
|
"loss": 1.9157, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6629834254143646, |
|
"grad_norm": 1.2936433553695679, |
|
"learning_rate": 2.3006747737593756e-05, |
|
"loss": 1.9789, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6637726913970008, |
|
"grad_norm": 1.081805944442749, |
|
"learning_rate": 2.2990763455004597e-05, |
|
"loss": 1.8891, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.664561957379637, |
|
"grad_norm": 10.924898147583008, |
|
"learning_rate": 2.2974766493121666e-05, |
|
"loss": 2.0674, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.665351223362273, |
|
"grad_norm": 1.4154882431030273, |
|
"learning_rate": 2.2958756877328037e-05, |
|
"loss": 1.966, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6661404893449092, |
|
"grad_norm": 1.1723241806030273, |
|
"learning_rate": 2.2942734633026864e-05, |
|
"loss": 1.966, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6669297553275454, |
|
"grad_norm": 1.3635941743850708, |
|
"learning_rate": 2.292669978564135e-05, |
|
"loss": 1.9216, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6677190213101816, |
|
"grad_norm": 2.8145909309387207, |
|
"learning_rate": 2.2910652360614688e-05, |
|
"loss": 1.9837, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6685082872928176, |
|
"grad_norm": 1.623931646347046, |
|
"learning_rate": 2.2894592383410027e-05, |
|
"loss": 1.9557, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6692975532754538, |
|
"grad_norm": 1.248794436454773, |
|
"learning_rate": 2.2878519879510437e-05, |
|
"loss": 1.9235, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.67008681925809, |
|
"grad_norm": 2.0067548751831055, |
|
"learning_rate": 2.2862434874418857e-05, |
|
"loss": 1.9316, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6708760852407262, |
|
"grad_norm": 1.3988443613052368, |
|
"learning_rate": 2.2846337393658074e-05, |
|
"loss": 1.944, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6716653512233622, |
|
"grad_norm": 1.6866941452026367, |
|
"learning_rate": 2.2830227462770665e-05, |
|
"loss": 2.0047, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6724546172059984, |
|
"grad_norm": 1.9285333156585693, |
|
"learning_rate": 2.2814105107318955e-05, |
|
"loss": 2.0428, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6732438831886346, |
|
"grad_norm": 1.3415141105651855, |
|
"learning_rate": 2.2797970352884997e-05, |
|
"loss": 1.9088, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6740331491712708, |
|
"grad_norm": 1.4724236726760864, |
|
"learning_rate": 2.2781823225070507e-05, |
|
"loss": 1.9685, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6748224151539068, |
|
"grad_norm": 1.3407270908355713, |
|
"learning_rate": 2.2765663749496846e-05, |
|
"loss": 1.9887, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.675611681136543, |
|
"grad_norm": 1.1044732332229614, |
|
"learning_rate": 2.274949195180495e-05, |
|
"loss": 1.8941, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6764009471191792, |
|
"grad_norm": 1.3121356964111328, |
|
"learning_rate": 2.2733307857655327e-05, |
|
"loss": 1.9777, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6771902131018153, |
|
"grad_norm": 1.1786476373672485, |
|
"learning_rate": 2.271711149272798e-05, |
|
"loss": 1.923, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6779794790844514, |
|
"grad_norm": 1.3217613697052002, |
|
"learning_rate": 2.2700902882722396e-05, |
|
"loss": 1.8686, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6787687450670876, |
|
"grad_norm": 1.3290356397628784, |
|
"learning_rate": 2.2684682053357472e-05, |
|
"loss": 1.9165, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6795580110497238, |
|
"grad_norm": 1.4889332056045532, |
|
"learning_rate": 2.2668449030371527e-05, |
|
"loss": 1.9085, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6803472770323599, |
|
"grad_norm": 1.156880259513855, |
|
"learning_rate": 2.2652203839522196e-05, |
|
"loss": 1.9228, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.681136543014996, |
|
"grad_norm": 1.52714204788208, |
|
"learning_rate": 2.2635946506586435e-05, |
|
"loss": 1.9908, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6819258089976322, |
|
"grad_norm": 1.362430453300476, |
|
"learning_rate": 2.261967705736046e-05, |
|
"loss": 1.9262, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6827150749802684, |
|
"grad_norm": 1.495282769203186, |
|
"learning_rate": 2.2603395517659728e-05, |
|
"loss": 1.9529, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6835043409629045, |
|
"grad_norm": 1.3992247581481934, |
|
"learning_rate": 2.2587101913318864e-05, |
|
"loss": 1.9274, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6842936069455406, |
|
"grad_norm": 1.3318471908569336, |
|
"learning_rate": 2.257079627019164e-05, |
|
"loss": 1.9572, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6850828729281768, |
|
"grad_norm": 1.420928716659546, |
|
"learning_rate": 2.255447861415094e-05, |
|
"loss": 1.9143, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6858721389108129, |
|
"grad_norm": 1.0273611545562744, |
|
"learning_rate": 2.25381489710887e-05, |
|
"loss": 1.9147, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6866614048934491, |
|
"grad_norm": 1.3935787677764893, |
|
"learning_rate": 2.2521807366915876e-05, |
|
"loss": 1.9353, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6874506708760852, |
|
"grad_norm": 1.21060049533844, |
|
"learning_rate": 2.250545382756241e-05, |
|
"loss": 2.0201, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6882399368587214, |
|
"grad_norm": 10.238750457763672, |
|
"learning_rate": 2.2489088378977176e-05, |
|
"loss": 2.0379, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6890292028413575, |
|
"grad_norm": 4.480075359344482, |
|
"learning_rate": 2.247271104712794e-05, |
|
"loss": 2.0273, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6898184688239937, |
|
"grad_norm": 2.047750234603882, |
|
"learning_rate": 2.245632185800134e-05, |
|
"loss": 1.9601, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6906077348066298, |
|
"grad_norm": 1.5088602304458618, |
|
"learning_rate": 2.2439920837602817e-05, |
|
"loss": 1.9402, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.691397000789266, |
|
"grad_norm": 1.2437899112701416, |
|
"learning_rate": 2.2423508011956583e-05, |
|
"loss": 1.9443, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6921862667719021, |
|
"grad_norm": 7.469998836517334, |
|
"learning_rate": 2.240708340710559e-05, |
|
"loss": 1.971, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6929755327545383, |
|
"grad_norm": 2.0380475521087646, |
|
"learning_rate": 2.2390647049111472e-05, |
|
"loss": 1.9815, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6937647987371744, |
|
"grad_norm": 2.9355788230895996, |
|
"learning_rate": 2.237419896405453e-05, |
|
"loss": 1.8744, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6945540647198106, |
|
"grad_norm": 1.66250479221344, |
|
"learning_rate": 2.2357739178033645e-05, |
|
"loss": 1.9304, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6953433307024467, |
|
"grad_norm": 1.2899961471557617, |
|
"learning_rate": 2.2341267717166285e-05, |
|
"loss": 1.9682, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6961325966850829, |
|
"grad_norm": 1.6746995449066162, |
|
"learning_rate": 2.2324784607588432e-05, |
|
"loss": 1.9546, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.696921862667719, |
|
"grad_norm": 2.286348342895508, |
|
"learning_rate": 2.2308289875454573e-05, |
|
"loss": 1.9209, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6977111286503551, |
|
"grad_norm": 1.3030370473861694, |
|
"learning_rate": 2.2291783546937596e-05, |
|
"loss": 1.9305, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6985003946329913, |
|
"grad_norm": 1.2656025886535645, |
|
"learning_rate": 2.2275265648228833e-05, |
|
"loss": 1.9644, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6992896606156275, |
|
"grad_norm": 1.3782883882522583, |
|
"learning_rate": 2.2258736205537955e-05, |
|
"loss": 1.9385, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7000789265982637, |
|
"grad_norm": 1.1452478170394897, |
|
"learning_rate": 2.2242195245092942e-05, |
|
"loss": 1.9486, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7008681925808997, |
|
"grad_norm": 1.3117746114730835, |
|
"learning_rate": 2.2225642793140067e-05, |
|
"loss": 1.9841, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7016574585635359, |
|
"grad_norm": 1.22527015209198, |
|
"learning_rate": 2.2209078875943822e-05, |
|
"loss": 1.9265, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7024467245461721, |
|
"grad_norm": 1.0896904468536377, |
|
"learning_rate": 2.219250351978691e-05, |
|
"loss": 1.9565, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7032359905288083, |
|
"grad_norm": 1.1967089176177979, |
|
"learning_rate": 2.2175916750970164e-05, |
|
"loss": 1.9371, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7040252565114443, |
|
"grad_norm": 1.3110548257827759, |
|
"learning_rate": 2.2159318595812532e-05, |
|
"loss": 1.9225, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.7048145224940805, |
|
"grad_norm": 1.1737967729568481, |
|
"learning_rate": 2.2142709080651047e-05, |
|
"loss": 1.9572, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7056037884767167, |
|
"grad_norm": 1.9293705224990845, |
|
"learning_rate": 2.212608823184074e-05, |
|
"loss": 1.9495, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7063930544593529, |
|
"grad_norm": 1.6178711652755737, |
|
"learning_rate": 2.2109456075754644e-05, |
|
"loss": 1.9028, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7071823204419889, |
|
"grad_norm": 1.4091240167617798, |
|
"learning_rate": 2.2092812638783723e-05, |
|
"loss": 1.8941, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7079715864246251, |
|
"grad_norm": 1.9126784801483154, |
|
"learning_rate": 2.207615794733686e-05, |
|
"loss": 2.013, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7087608524072613, |
|
"grad_norm": 1.1694210767745972, |
|
"learning_rate": 2.2059492027840766e-05, |
|
"loss": 1.9768, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7095501183898973, |
|
"grad_norm": 3.8965656757354736, |
|
"learning_rate": 2.204281490673999e-05, |
|
"loss": 2.0802, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7103393843725335, |
|
"grad_norm": 1.5217310190200806, |
|
"learning_rate": 2.2026126610496852e-05, |
|
"loss": 1.9425, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7111286503551697, |
|
"grad_norm": 1.3671518564224243, |
|
"learning_rate": 2.20094271655914e-05, |
|
"loss": 1.9806, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7119179163378059, |
|
"grad_norm": 1.2386888265609741, |
|
"learning_rate": 2.1992716598521372e-05, |
|
"loss": 1.9273, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.712707182320442, |
|
"grad_norm": 1.5315154790878296, |
|
"learning_rate": 2.197599493580216e-05, |
|
"loss": 1.8953, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7134964483030781, |
|
"grad_norm": 1.662078857421875, |
|
"learning_rate": 2.1959262203966748e-05, |
|
"loss": 1.9719, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 1.7234045267105103, |
|
"learning_rate": 2.1942518429565703e-05, |
|
"loss": 1.9798, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7150749802683505, |
|
"grad_norm": 1.38892662525177, |
|
"learning_rate": 2.19257636391671e-05, |
|
"loss": 1.9861, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7158642462509865, |
|
"grad_norm": 1.0747907161712646, |
|
"learning_rate": 2.1908997859356496e-05, |
|
"loss": 1.9242, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7166535122336227, |
|
"grad_norm": 1.7632391452789307, |
|
"learning_rate": 2.189222111673689e-05, |
|
"loss": 1.9238, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7174427782162589, |
|
"grad_norm": 1.0350416898727417, |
|
"learning_rate": 2.1875433437928666e-05, |
|
"loss": 1.8935, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7182320441988951, |
|
"grad_norm": 1.5194156169891357, |
|
"learning_rate": 2.1858634849569578e-05, |
|
"loss": 1.9061, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7190213101815311, |
|
"grad_norm": 1.0854952335357666, |
|
"learning_rate": 2.184182537831468e-05, |
|
"loss": 1.8827, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7198105761641673, |
|
"grad_norm": 1.9698389768600464, |
|
"learning_rate": 2.1825005050836284e-05, |
|
"loss": 1.9542, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7205998421468035, |
|
"grad_norm": 1.6458585262298584, |
|
"learning_rate": 2.180817389382395e-05, |
|
"loss": 1.9505, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7213891081294396, |
|
"grad_norm": 1.779136061668396, |
|
"learning_rate": 2.1791331933984407e-05, |
|
"loss": 1.9921, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.7221783741120757, |
|
"grad_norm": 2.018765687942505, |
|
"learning_rate": 2.1774479198041526e-05, |
|
"loss": 1.9579, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7229676400947119, |
|
"grad_norm": 1.0813268423080444, |
|
"learning_rate": 2.1757615712736284e-05, |
|
"loss": 1.9442, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7237569060773481, |
|
"grad_norm": 2.2704761028289795, |
|
"learning_rate": 2.174074150482672e-05, |
|
"loss": 1.9334, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7245461720599842, |
|
"grad_norm": 4.316535949707031, |
|
"learning_rate": 2.1723856601087854e-05, |
|
"loss": 1.9386, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7253354380426204, |
|
"grad_norm": 2.0900955200195312, |
|
"learning_rate": 2.170696102831172e-05, |
|
"loss": 1.9106, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7261247040252565, |
|
"grad_norm": 2.0656304359436035, |
|
"learning_rate": 2.1690054813307255e-05, |
|
"loss": 1.9515, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7269139700078927, |
|
"grad_norm": 2.1848411560058594, |
|
"learning_rate": 2.1673137982900297e-05, |
|
"loss": 1.9959, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7277032359905288, |
|
"grad_norm": 1.9023263454437256, |
|
"learning_rate": 2.1656210563933508e-05, |
|
"loss": 1.9947, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.728492501973165, |
|
"grad_norm": 1.5430853366851807, |
|
"learning_rate": 2.163927258326637e-05, |
|
"loss": 1.9767, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7292817679558011, |
|
"grad_norm": 1.293797254562378, |
|
"learning_rate": 2.1622324067775118e-05, |
|
"loss": 1.903, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7300710339384373, |
|
"grad_norm": 1.5131195783615112, |
|
"learning_rate": 2.1605365044352704e-05, |
|
"loss": 1.9292, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7308602999210734, |
|
"grad_norm": 1.0771164894104004, |
|
"learning_rate": 2.1588395539908753e-05, |
|
"loss": 1.9476, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7316495659037096, |
|
"grad_norm": 1.6526238918304443, |
|
"learning_rate": 2.1571415581369516e-05, |
|
"loss": 1.94, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7324388318863457, |
|
"grad_norm": 1.312951683998108, |
|
"learning_rate": 2.1554425195677838e-05, |
|
"loss": 1.9687, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7332280978689818, |
|
"grad_norm": 1.599605679512024, |
|
"learning_rate": 2.1537424409793104e-05, |
|
"loss": 1.9032, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.734017363851618, |
|
"grad_norm": 1.2115885019302368, |
|
"learning_rate": 2.1520413250691202e-05, |
|
"loss": 1.94, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7348066298342542, |
|
"grad_norm": 1.6560498476028442, |
|
"learning_rate": 2.1503391745364494e-05, |
|
"loss": 1.9307, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7355958958168903, |
|
"grad_norm": 2.2198646068573, |
|
"learning_rate": 2.148635992082173e-05, |
|
"loss": 1.9424, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7363851617995264, |
|
"grad_norm": 1.0756328105926514, |
|
"learning_rate": 2.1469317804088066e-05, |
|
"loss": 1.9332, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7371744277821626, |
|
"grad_norm": 1.542237401008606, |
|
"learning_rate": 2.1452265422204967e-05, |
|
"loss": 1.9586, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.7379636937647988, |
|
"grad_norm": 1.2590047121047974, |
|
"learning_rate": 2.1435202802230193e-05, |
|
"loss": 1.937, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7387529597474349, |
|
"grad_norm": 1.3695781230926514, |
|
"learning_rate": 2.141812997123775e-05, |
|
"loss": 1.9052, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.739542225730071, |
|
"grad_norm": 1.2480250597000122, |
|
"learning_rate": 2.1401046956317843e-05, |
|
"loss": 1.996, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7403314917127072, |
|
"grad_norm": 1.5121519565582275, |
|
"learning_rate": 2.1383953784576843e-05, |
|
"loss": 1.9314, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.7411207576953434, |
|
"grad_norm": 1.3273252248764038, |
|
"learning_rate": 2.136685048313723e-05, |
|
"loss": 1.9594, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7419100236779794, |
|
"grad_norm": 1.2657073736190796, |
|
"learning_rate": 2.1349737079137554e-05, |
|
"loss": 1.9398, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7426992896606156, |
|
"grad_norm": 1.3078733682632446, |
|
"learning_rate": 2.133261359973242e-05, |
|
"loss": 1.9175, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7434885556432518, |
|
"grad_norm": 1.0810556411743164, |
|
"learning_rate": 2.1315480072092385e-05, |
|
"loss": 1.9058, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.744277821625888, |
|
"grad_norm": 1.3791708946228027, |
|
"learning_rate": 2.129833652340397e-05, |
|
"loss": 1.9271, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.745067087608524, |
|
"grad_norm": 1.2780756950378418, |
|
"learning_rate": 2.1281182980869594e-05, |
|
"loss": 1.9071, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7458563535911602, |
|
"grad_norm": 1.1226168870925903, |
|
"learning_rate": 2.126401947170754e-05, |
|
"loss": 1.9827, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7466456195737964, |
|
"grad_norm": 1.133967638015747, |
|
"learning_rate": 2.1246846023151888e-05, |
|
"loss": 1.8706, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.7474348855564326, |
|
"grad_norm": 1.0483019351959229, |
|
"learning_rate": 2.12296626624525e-05, |
|
"loss": 1.8822, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7482241515390686, |
|
"grad_norm": 2.100027322769165, |
|
"learning_rate": 2.1212469416874972e-05, |
|
"loss": 1.9556, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7490134175217048, |
|
"grad_norm": 1.4822384119033813, |
|
"learning_rate": 2.119526631370058e-05, |
|
"loss": 1.9661, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.749802683504341, |
|
"grad_norm": 1.2308375835418701, |
|
"learning_rate": 2.1178053380226234e-05, |
|
"loss": 1.9075, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7505919494869772, |
|
"grad_norm": 1.7181750535964966, |
|
"learning_rate": 2.1160830643764448e-05, |
|
"loss": 1.9816, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7513812154696132, |
|
"grad_norm": 1.7420463562011719, |
|
"learning_rate": 2.11435981316433e-05, |
|
"loss": 1.9134, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7521704814522494, |
|
"grad_norm": 1.654775619506836, |
|
"learning_rate": 2.1126355871206358e-05, |
|
"loss": 1.9577, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7529597474348856, |
|
"grad_norm": 1.5406017303466797, |
|
"learning_rate": 2.110910388981268e-05, |
|
"loss": 1.9192, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7537490134175217, |
|
"grad_norm": 1.4977563619613647, |
|
"learning_rate": 2.1091842214836736e-05, |
|
"loss": 1.9376, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7545382794001578, |
|
"grad_norm": 1.0576728582382202, |
|
"learning_rate": 2.1074570873668374e-05, |
|
"loss": 1.8936, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.755327545382794, |
|
"grad_norm": 1.1977719068527222, |
|
"learning_rate": 2.1057289893712796e-05, |
|
"loss": 1.8901, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7561168113654302, |
|
"grad_norm": 1.399145483970642, |
|
"learning_rate": 2.103999930239049e-05, |
|
"loss": 1.8795, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7569060773480663, |
|
"grad_norm": 1.3279935121536255, |
|
"learning_rate": 2.1022699127137184e-05, |
|
"loss": 1.9639, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7576953433307024, |
|
"grad_norm": 1.220153570175171, |
|
"learning_rate": 2.1005389395403827e-05, |
|
"loss": 1.9882, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7584846093133386, |
|
"grad_norm": 1.3342796564102173, |
|
"learning_rate": 2.0988070134656525e-05, |
|
"loss": 1.9771, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7592738752959748, |
|
"grad_norm": 1.1898157596588135, |
|
"learning_rate": 2.097074137237651e-05, |
|
"loss": 1.9551, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7600631412786109, |
|
"grad_norm": 1.3458317518234253, |
|
"learning_rate": 2.0953403136060088e-05, |
|
"loss": 2.0118, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.760852407261247, |
|
"grad_norm": 1.055283546447754, |
|
"learning_rate": 2.093605545321859e-05, |
|
"loss": 1.9307, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7616416732438832, |
|
"grad_norm": 1.1726508140563965, |
|
"learning_rate": 2.091869835137835e-05, |
|
"loss": 1.9662, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7624309392265194, |
|
"grad_norm": 1.4072753190994263, |
|
"learning_rate": 2.0901331858080633e-05, |
|
"loss": 1.9281, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7632202052091555, |
|
"grad_norm": 1.1238325834274292, |
|
"learning_rate": 2.088395600088162e-05, |
|
"loss": 1.8911, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7640094711917916, |
|
"grad_norm": 1.2030211687088013, |
|
"learning_rate": 2.086657080735234e-05, |
|
"loss": 1.8815, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7647987371744278, |
|
"grad_norm": 1.2055410146713257, |
|
"learning_rate": 2.0849176305078646e-05, |
|
"loss": 1.9201, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7655880031570639, |
|
"grad_norm": 1.2860301733016968, |
|
"learning_rate": 2.083177252166114e-05, |
|
"loss": 1.9055, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7663772691397001, |
|
"grad_norm": 1.2509900331497192, |
|
"learning_rate": 2.0814359484715183e-05, |
|
"loss": 1.957, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7671665351223362, |
|
"grad_norm": 1.172758936882019, |
|
"learning_rate": 2.0796937221870792e-05, |
|
"loss": 1.9532, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7679558011049724, |
|
"grad_norm": 1.1523761749267578, |
|
"learning_rate": 2.077950576077264e-05, |
|
"loss": 1.92, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7687450670876085, |
|
"grad_norm": 1.1627498865127563, |
|
"learning_rate": 2.076206512907998e-05, |
|
"loss": 1.927, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7695343330702447, |
|
"grad_norm": 1.4067708253860474, |
|
"learning_rate": 2.074461535446663e-05, |
|
"loss": 1.934, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7703235990528808, |
|
"grad_norm": 1.0437569618225098, |
|
"learning_rate": 2.072715646462092e-05, |
|
"loss": 1.9641, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.771112865035517, |
|
"grad_norm": 1.206112265586853, |
|
"learning_rate": 2.0709688487245616e-05, |
|
"loss": 1.9312, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7719021310181531, |
|
"grad_norm": 1.1898462772369385, |
|
"learning_rate": 2.069221145005793e-05, |
|
"loss": 1.9562, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7726913970007893, |
|
"grad_norm": 1.2786809206008911, |
|
"learning_rate": 2.0674725380789444e-05, |
|
"loss": 1.9559, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7734806629834254, |
|
"grad_norm": 1.164592981338501, |
|
"learning_rate": 2.065723030718606e-05, |
|
"loss": 1.9825, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7742699289660616, |
|
"grad_norm": 1.3167165517807007, |
|
"learning_rate": 2.0639726257007986e-05, |
|
"loss": 1.9216, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7750591949486977, |
|
"grad_norm": 1.2257906198501587, |
|
"learning_rate": 2.0622213258029657e-05, |
|
"loss": 1.9491, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7758484609313339, |
|
"grad_norm": 1.0711567401885986, |
|
"learning_rate": 2.060469133803972e-05, |
|
"loss": 1.9191, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.77663772691397, |
|
"grad_norm": 1.8117008209228516, |
|
"learning_rate": 2.058716052484097e-05, |
|
"loss": 1.9311, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7774269928966061, |
|
"grad_norm": 1.137128233909607, |
|
"learning_rate": 2.056962084625031e-05, |
|
"loss": 1.8654, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7782162588792423, |
|
"grad_norm": 1.1593658924102783, |
|
"learning_rate": 2.055207233009872e-05, |
|
"loss": 1.9392, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7790055248618785, |
|
"grad_norm": 1.0210576057434082, |
|
"learning_rate": 2.0534515004231193e-05, |
|
"loss": 1.9445, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7797947908445146, |
|
"grad_norm": 1.2282181978225708, |
|
"learning_rate": 2.0516948896506706e-05, |
|
"loss": 1.8879, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7805840568271507, |
|
"grad_norm": 1.1221119165420532, |
|
"learning_rate": 2.049937403479818e-05, |
|
"loss": 1.9226, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7813733228097869, |
|
"grad_norm": 1.6811386346817017, |
|
"learning_rate": 2.0481790446992405e-05, |
|
"loss": 1.9722, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7821625887924231, |
|
"grad_norm": 1.2378782033920288, |
|
"learning_rate": 2.0464198160990034e-05, |
|
"loss": 1.9014, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7829518547750592, |
|
"grad_norm": 1.1285789012908936, |
|
"learning_rate": 2.044659720470552e-05, |
|
"loss": 1.9549, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7837411207576953, |
|
"grad_norm": 1.1845968961715698, |
|
"learning_rate": 2.042898760606706e-05, |
|
"loss": 1.9145, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7845303867403315, |
|
"grad_norm": 98.27835083007812, |
|
"learning_rate": 2.0411369393016583e-05, |
|
"loss": 2.1377, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7853196527229677, |
|
"grad_norm": 1.2735936641693115, |
|
"learning_rate": 2.0393742593509673e-05, |
|
"loss": 1.9253, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7861089187056038, |
|
"grad_norm": 1.335058569908142, |
|
"learning_rate": 2.0376107235515545e-05, |
|
"loss": 1.9287, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7868981846882399, |
|
"grad_norm": 1.3353217840194702, |
|
"learning_rate": 2.035846334701699e-05, |
|
"loss": 1.9171, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7876874506708761, |
|
"grad_norm": 1.3583861589431763, |
|
"learning_rate": 2.0340810956010347e-05, |
|
"loss": 1.8695, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7884767166535123, |
|
"grad_norm": 1.0094035863876343, |
|
"learning_rate": 2.0323150090505425e-05, |
|
"loss": 1.959, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7892659826361483, |
|
"grad_norm": 1.0646247863769531, |
|
"learning_rate": 2.03054807785255e-05, |
|
"loss": 1.9456, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7900552486187845, |
|
"grad_norm": 1.4316010475158691, |
|
"learning_rate": 2.0287803048107237e-05, |
|
"loss": 1.9598, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7908445146014207, |
|
"grad_norm": 1.2731982469558716, |
|
"learning_rate": 2.027011692730066e-05, |
|
"loss": 1.8874, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7916337805840569, |
|
"grad_norm": 0.9664216041564941, |
|
"learning_rate": 2.0252422444169122e-05, |
|
"loss": 1.9212, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7924230465666929, |
|
"grad_norm": 1.229513168334961, |
|
"learning_rate": 2.0234719626789222e-05, |
|
"loss": 1.9094, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7932123125493291, |
|
"grad_norm": 1.2635388374328613, |
|
"learning_rate": 2.0217008503250804e-05, |
|
"loss": 1.9462, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7940015785319653, |
|
"grad_norm": 1.0137258768081665, |
|
"learning_rate": 2.019928910165687e-05, |
|
"loss": 1.9129, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7947908445146015, |
|
"grad_norm": 4.261168003082275, |
|
"learning_rate": 2.0181561450123584e-05, |
|
"loss": 1.9242, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7955801104972375, |
|
"grad_norm": 1.5557130575180054, |
|
"learning_rate": 2.0163825576780177e-05, |
|
"loss": 1.9141, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7963693764798737, |
|
"grad_norm": 1.5184749364852905, |
|
"learning_rate": 2.0146081509768932e-05, |
|
"loss": 1.9849, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7971586424625099, |
|
"grad_norm": 1.0676449537277222, |
|
"learning_rate": 2.0128329277245147e-05, |
|
"loss": 1.9777, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.797947908445146, |
|
"grad_norm": 1.1058017015457153, |
|
"learning_rate": 2.011056890737705e-05, |
|
"loss": 1.9017, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7987371744277821, |
|
"grad_norm": 3.1086857318878174, |
|
"learning_rate": 2.0092800428345803e-05, |
|
"loss": 1.9944, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7995264404104183, |
|
"grad_norm": 2.2823331356048584, |
|
"learning_rate": 2.007502386834544e-05, |
|
"loss": 1.9451, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8003157063930545, |
|
"grad_norm": 1.4383108615875244, |
|
"learning_rate": 2.005723925558279e-05, |
|
"loss": 1.8993, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8011049723756906, |
|
"grad_norm": 1.7305094003677368, |
|
"learning_rate": 2.0039446618277486e-05, |
|
"loss": 1.9778, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8018942383583267, |
|
"grad_norm": 1.3104729652404785, |
|
"learning_rate": 2.0021645984661877e-05, |
|
"loss": 1.9218, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8026835043409629, |
|
"grad_norm": 1.3720124959945679, |
|
"learning_rate": 2.000383738298101e-05, |
|
"loss": 1.9605, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8034727703235991, |
|
"grad_norm": 1.67930006980896, |
|
"learning_rate": 1.9986020841492575e-05, |
|
"loss": 1.9081, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8042620363062352, |
|
"grad_norm": 1.313557505607605, |
|
"learning_rate": 1.9968196388466852e-05, |
|
"loss": 2.0005, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.8050513022888713, |
|
"grad_norm": 1.0877718925476074, |
|
"learning_rate": 1.9950364052186682e-05, |
|
"loss": 1.9576, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8058405682715075, |
|
"grad_norm": 1.4061955213546753, |
|
"learning_rate": 1.993252386094741e-05, |
|
"loss": 1.9535, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8066298342541437, |
|
"grad_norm": 1.2338870763778687, |
|
"learning_rate": 1.9914675843056855e-05, |
|
"loss": 1.9022, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8074191002367798, |
|
"grad_norm": 1.0897296667099, |
|
"learning_rate": 1.9896820026835237e-05, |
|
"loss": 1.9264, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8082083662194159, |
|
"grad_norm": 1.21879243850708, |
|
"learning_rate": 1.9878956440615172e-05, |
|
"loss": 1.9245, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8089976322020521, |
|
"grad_norm": 1.0368714332580566, |
|
"learning_rate": 1.9861085112741587e-05, |
|
"loss": 1.905, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8097868981846882, |
|
"grad_norm": 1.2078007459640503, |
|
"learning_rate": 1.9843206071571692e-05, |
|
"loss": 1.9068, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8105761641673244, |
|
"grad_norm": 1.3901704549789429, |
|
"learning_rate": 1.982531934547496e-05, |
|
"loss": 1.9295, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.8113654301499605, |
|
"grad_norm": 1.098552942276001, |
|
"learning_rate": 1.980742496283303e-05, |
|
"loss": 1.9386, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8121546961325967, |
|
"grad_norm": 1.073913812637329, |
|
"learning_rate": 1.9789522952039697e-05, |
|
"loss": 1.9274, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8129439621152328, |
|
"grad_norm": 1.0683954954147339, |
|
"learning_rate": 1.977161334150088e-05, |
|
"loss": 1.9, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.813733228097869, |
|
"grad_norm": 0.9743616580963135, |
|
"learning_rate": 1.9753696159634532e-05, |
|
"loss": 1.9312, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.8145224940805051, |
|
"grad_norm": 2.691880226135254, |
|
"learning_rate": 1.9735771434870624e-05, |
|
"loss": 1.8925, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8153117600631413, |
|
"grad_norm": 1.1001895666122437, |
|
"learning_rate": 1.9717839195651112e-05, |
|
"loss": 1.9242, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8161010260457774, |
|
"grad_norm": 1.2172091007232666, |
|
"learning_rate": 1.9699899470429852e-05, |
|
"loss": 1.9218, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8168902920284136, |
|
"grad_norm": 1.0749343633651733, |
|
"learning_rate": 1.9681952287672603e-05, |
|
"loss": 1.9229, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8176795580110497, |
|
"grad_norm": 1.0054171085357666, |
|
"learning_rate": 1.9663997675856928e-05, |
|
"loss": 1.9917, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8184688239936859, |
|
"grad_norm": 1.0160495042800903, |
|
"learning_rate": 1.964603566347221e-05, |
|
"loss": 1.9582, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.819258089976322, |
|
"grad_norm": 1.068723201751709, |
|
"learning_rate": 1.9628066279019557e-05, |
|
"loss": 1.9772, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8200473559589582, |
|
"grad_norm": 4.0177507400512695, |
|
"learning_rate": 1.961008955101177e-05, |
|
"loss": 1.9647, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.8208366219415943, |
|
"grad_norm": 1.0878604650497437, |
|
"learning_rate": 1.959210550797331e-05, |
|
"loss": 1.9027, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8216258879242304, |
|
"grad_norm": 1.042733073234558, |
|
"learning_rate": 1.9574114178440258e-05, |
|
"loss": 1.8878, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8224151539068666, |
|
"grad_norm": 0.9671216011047363, |
|
"learning_rate": 1.955611559096023e-05, |
|
"loss": 1.9175, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.8232044198895028, |
|
"grad_norm": 1.0640370845794678, |
|
"learning_rate": 1.953810977409237e-05, |
|
"loss": 1.9226, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.823993685872139, |
|
"grad_norm": 0.9828730821609497, |
|
"learning_rate": 1.9520096756407302e-05, |
|
"loss": 1.9383, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.824782951854775, |
|
"grad_norm": 0.9962606430053711, |
|
"learning_rate": 1.950207656648707e-05, |
|
"loss": 1.8699, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8255722178374112, |
|
"grad_norm": 1.1136631965637207, |
|
"learning_rate": 1.948404923292509e-05, |
|
"loss": 1.9337, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.8263614838200474, |
|
"grad_norm": 0.9540908932685852, |
|
"learning_rate": 1.9466014784326124e-05, |
|
"loss": 1.8999, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8271507498026835, |
|
"grad_norm": 1.3623814582824707, |
|
"learning_rate": 1.9447973249306225e-05, |
|
"loss": 1.9309, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.8279400157853196, |
|
"grad_norm": 1.2193188667297363, |
|
"learning_rate": 1.942992465649268e-05, |
|
"loss": 1.9221, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.8287292817679558, |
|
"grad_norm": 1.0682237148284912, |
|
"learning_rate": 1.9411869034523977e-05, |
|
"loss": 1.9563, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.829518547750592, |
|
"grad_norm": 1.0715112686157227, |
|
"learning_rate": 1.9393806412049765e-05, |
|
"loss": 1.889, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.8303078137332282, |
|
"grad_norm": 1.0543572902679443, |
|
"learning_rate": 1.93757368177308e-05, |
|
"loss": 1.8968, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.8310970797158642, |
|
"grad_norm": 1.1826039552688599, |
|
"learning_rate": 1.9357660280238896e-05, |
|
"loss": 1.9357, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8318863456985004, |
|
"grad_norm": 1.1282274723052979, |
|
"learning_rate": 1.933957682825688e-05, |
|
"loss": 1.9127, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.8326756116811366, |
|
"grad_norm": 0.9678381085395813, |
|
"learning_rate": 1.9321486490478565e-05, |
|
"loss": 1.9063, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8334648776637726, |
|
"grad_norm": 1.0463242530822754, |
|
"learning_rate": 1.9303389295608677e-05, |
|
"loss": 1.9163, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8342541436464088, |
|
"grad_norm": 1.168262243270874, |
|
"learning_rate": 1.9285285272362816e-05, |
|
"loss": 1.8283, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.835043409629045, |
|
"grad_norm": 1.1833221912384033, |
|
"learning_rate": 1.9267174449467442e-05, |
|
"loss": 1.9377, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8358326756116812, |
|
"grad_norm": 1.2744213342666626, |
|
"learning_rate": 1.924905685565979e-05, |
|
"loss": 1.9625, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8366219415943172, |
|
"grad_norm": 1.243308663368225, |
|
"learning_rate": 1.9230932519687822e-05, |
|
"loss": 1.8508, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8374112075769534, |
|
"grad_norm": 1.043876051902771, |
|
"learning_rate": 1.921280147031023e-05, |
|
"loss": 1.911, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8382004735595896, |
|
"grad_norm": 1.0375399589538574, |
|
"learning_rate": 1.919466373629634e-05, |
|
"loss": 1.9219, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8389897395422258, |
|
"grad_norm": 0.9551234245300293, |
|
"learning_rate": 1.9176519346426084e-05, |
|
"loss": 1.8883, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8397790055248618, |
|
"grad_norm": 1.1786881685256958, |
|
"learning_rate": 1.9158368329489957e-05, |
|
"loss": 1.9367, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.840568271507498, |
|
"grad_norm": 1.134186029434204, |
|
"learning_rate": 1.914021071428898e-05, |
|
"loss": 1.9078, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8413575374901342, |
|
"grad_norm": 0.9656745791435242, |
|
"learning_rate": 1.9122046529634625e-05, |
|
"loss": 1.9052, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8421468034727704, |
|
"grad_norm": 0.9540627002716064, |
|
"learning_rate": 1.9103875804348806e-05, |
|
"loss": 1.8895, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8429360694554064, |
|
"grad_norm": 1.0714752674102783, |
|
"learning_rate": 1.90856985672638e-05, |
|
"loss": 1.9444, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8437253354380426, |
|
"grad_norm": 1.54597806930542, |
|
"learning_rate": 1.9067514847222227e-05, |
|
"loss": 1.965, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8445146014206788, |
|
"grad_norm": 1.068199634552002, |
|
"learning_rate": 1.9049324673076994e-05, |
|
"loss": 1.9616, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8453038674033149, |
|
"grad_norm": 1.2419066429138184, |
|
"learning_rate": 1.903112807369124e-05, |
|
"loss": 1.9696, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.846093133385951, |
|
"grad_norm": 0.986475944519043, |
|
"learning_rate": 1.9012925077938318e-05, |
|
"loss": 1.9167, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8468823993685872, |
|
"grad_norm": 0.9953616261482239, |
|
"learning_rate": 1.89947157147017e-05, |
|
"loss": 1.9293, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8476716653512234, |
|
"grad_norm": 1.0164732933044434, |
|
"learning_rate": 1.897650001287498e-05, |
|
"loss": 1.9196, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8484609313338595, |
|
"grad_norm": 2.1091668605804443, |
|
"learning_rate": 1.8958278001361823e-05, |
|
"loss": 1.9055, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8492501973164956, |
|
"grad_norm": 2.535614252090454, |
|
"learning_rate": 1.8940049709075877e-05, |
|
"loss": 1.9882, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8500394632991318, |
|
"grad_norm": 1.2041168212890625, |
|
"learning_rate": 1.8921815164940784e-05, |
|
"loss": 1.9206, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.850828729281768, |
|
"grad_norm": 1.0966860055923462, |
|
"learning_rate": 1.890357439789008e-05, |
|
"loss": 1.9412, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8516179952644041, |
|
"grad_norm": 1.3406330347061157, |
|
"learning_rate": 1.8885327436867194e-05, |
|
"loss": 1.9086, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8524072612470402, |
|
"grad_norm": 2.0100197792053223, |
|
"learning_rate": 1.8867074310825377e-05, |
|
"loss": 1.9412, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8531965272296764, |
|
"grad_norm": 1.0935620069503784, |
|
"learning_rate": 1.884881504872766e-05, |
|
"loss": 1.9451, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8539857932123125, |
|
"grad_norm": 1.2751333713531494, |
|
"learning_rate": 1.883054967954681e-05, |
|
"loss": 1.9423, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8547750591949487, |
|
"grad_norm": 1.2702161073684692, |
|
"learning_rate": 1.8812278232265297e-05, |
|
"loss": 1.9101, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8555643251775849, |
|
"grad_norm": 1.0920089483261108, |
|
"learning_rate": 1.879400073587521e-05, |
|
"loss": 1.9152, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.856353591160221, |
|
"grad_norm": 1.0169867277145386, |
|
"learning_rate": 1.8775717219378264e-05, |
|
"loss": 1.892, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.1083533763885498, |
|
"learning_rate": 1.8757427711785714e-05, |
|
"loss": 1.943, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8579321231254933, |
|
"grad_norm": 0.9779300093650818, |
|
"learning_rate": 1.873913224211832e-05, |
|
"loss": 1.93, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8587213891081295, |
|
"grad_norm": 1.2874062061309814, |
|
"learning_rate": 1.8720830839406294e-05, |
|
"loss": 1.9395, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8595106550907656, |
|
"grad_norm": 1.7089831829071045, |
|
"learning_rate": 1.8702523532689282e-05, |
|
"loss": 1.938, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8602999210734017, |
|
"grad_norm": 1.2932785749435425, |
|
"learning_rate": 1.8684210351016288e-05, |
|
"loss": 1.9323, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8610891870560379, |
|
"grad_norm": 1.0572917461395264, |
|
"learning_rate": 1.8665891323445635e-05, |
|
"loss": 1.934, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.861878453038674, |
|
"grad_norm": 0.9761425256729126, |
|
"learning_rate": 1.864756647904492e-05, |
|
"loss": 1.8805, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8626677190213102, |
|
"grad_norm": 0.97571861743927, |
|
"learning_rate": 1.862923584689099e-05, |
|
"loss": 1.891, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8634569850039463, |
|
"grad_norm": 1.1124482154846191, |
|
"learning_rate": 1.8610899456069846e-05, |
|
"loss": 1.9309, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8642462509865825, |
|
"grad_norm": 1.0374202728271484, |
|
"learning_rate": 1.8592557335676648e-05, |
|
"loss": 1.9369, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8650355169692187, |
|
"grad_norm": 1.0762258768081665, |
|
"learning_rate": 1.857420951481564e-05, |
|
"loss": 1.916, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8658247829518547, |
|
"grad_norm": 1.2466862201690674, |
|
"learning_rate": 1.8555856022600105e-05, |
|
"loss": 1.9623, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8666140489344909, |
|
"grad_norm": 1.0643996000289917, |
|
"learning_rate": 1.853749688815234e-05, |
|
"loss": 1.9485, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8674033149171271, |
|
"grad_norm": 1.2198410034179688, |
|
"learning_rate": 1.8519132140603584e-05, |
|
"loss": 1.9576, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8681925808997633, |
|
"grad_norm": 1.1695075035095215, |
|
"learning_rate": 1.8500761809093983e-05, |
|
"loss": 1.8975, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8689818468823993, |
|
"grad_norm": 1.2702025175094604, |
|
"learning_rate": 1.848238592277255e-05, |
|
"loss": 1.9415, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8697711128650355, |
|
"grad_norm": 1.2971569299697876, |
|
"learning_rate": 1.84640045107971e-05, |
|
"loss": 1.9521, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8705603788476717, |
|
"grad_norm": 1.3112415075302124, |
|
"learning_rate": 1.8445617602334228e-05, |
|
"loss": 1.9009, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8713496448303079, |
|
"grad_norm": 1.192368507385254, |
|
"learning_rate": 1.8427225226559247e-05, |
|
"loss": 1.9095, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8721389108129439, |
|
"grad_norm": 1.4217482805252075, |
|
"learning_rate": 1.840882741265614e-05, |
|
"loss": 1.9417, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8729281767955801, |
|
"grad_norm": 1.1348249912261963, |
|
"learning_rate": 1.839042418981752e-05, |
|
"loss": 1.8749, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8737174427782163, |
|
"grad_norm": 1.2523006200790405, |
|
"learning_rate": 1.8372015587244596e-05, |
|
"loss": 1.9132, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8745067087608525, |
|
"grad_norm": 1.1433618068695068, |
|
"learning_rate": 1.8353601634147092e-05, |
|
"loss": 1.9525, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8752959747434885, |
|
"grad_norm": 1.5293651819229126, |
|
"learning_rate": 1.833518235974324e-05, |
|
"loss": 1.8907, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8760852407261247, |
|
"grad_norm": 1.7137699127197266, |
|
"learning_rate": 1.8316757793259704e-05, |
|
"loss": 1.9531, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8768745067087609, |
|
"grad_norm": 1.1961606740951538, |
|
"learning_rate": 1.829832796393155e-05, |
|
"loss": 1.9241, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.877663772691397, |
|
"grad_norm": 1.5045583248138428, |
|
"learning_rate": 1.8279892901002193e-05, |
|
"loss": 1.9374, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8784530386740331, |
|
"grad_norm": 1.1014037132263184, |
|
"learning_rate": 1.8261452633723356e-05, |
|
"loss": 1.8816, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8792423046566693, |
|
"grad_norm": 1.487324595451355, |
|
"learning_rate": 1.824300719135502e-05, |
|
"loss": 1.8671, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8800315706393055, |
|
"grad_norm": 1.1038920879364014, |
|
"learning_rate": 1.8224556603165363e-05, |
|
"loss": 1.9133, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8808208366219415, |
|
"grad_norm": 1.3335046768188477, |
|
"learning_rate": 1.820610089843075e-05, |
|
"loss": 1.9142, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8816101026045777, |
|
"grad_norm": 1.2794814109802246, |
|
"learning_rate": 1.8187640106435654e-05, |
|
"loss": 1.8914, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8823993685872139, |
|
"grad_norm": 1.3214670419692993, |
|
"learning_rate": 1.8169174256472623e-05, |
|
"loss": 1.9615, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8831886345698501, |
|
"grad_norm": 1.5310771465301514, |
|
"learning_rate": 1.815070337784222e-05, |
|
"loss": 1.8994, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8839779005524862, |
|
"grad_norm": 1.1699209213256836, |
|
"learning_rate": 1.8132227499853003e-05, |
|
"loss": 1.9175, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8847671665351223, |
|
"grad_norm": 1.2050460577011108, |
|
"learning_rate": 1.8113746651821457e-05, |
|
"loss": 1.8937, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8855564325177585, |
|
"grad_norm": 1.040279746055603, |
|
"learning_rate": 1.8095260863071943e-05, |
|
"loss": 1.9171, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8863456985003947, |
|
"grad_norm": 1.1379011869430542, |
|
"learning_rate": 1.8076770162936678e-05, |
|
"loss": 1.9004, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8871349644830308, |
|
"grad_norm": 1.071140170097351, |
|
"learning_rate": 1.805827458075566e-05, |
|
"loss": 1.8959, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8879242304656669, |
|
"grad_norm": 0.9981568455696106, |
|
"learning_rate": 1.8039774145876643e-05, |
|
"loss": 1.8838, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8887134964483031, |
|
"grad_norm": 1.1366488933563232, |
|
"learning_rate": 1.802126888765507e-05, |
|
"loss": 1.8907, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8895027624309392, |
|
"grad_norm": 1.379737377166748, |
|
"learning_rate": 1.8002758835454046e-05, |
|
"loss": 1.9165, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8902920284135754, |
|
"grad_norm": 0.9969932436943054, |
|
"learning_rate": 1.7984244018644283e-05, |
|
"loss": 1.9267, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8910812943962115, |
|
"grad_norm": 1.143640398979187, |
|
"learning_rate": 1.7965724466604046e-05, |
|
"loss": 1.8801, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8918705603788477, |
|
"grad_norm": 1.122267723083496, |
|
"learning_rate": 1.794720020871912e-05, |
|
"loss": 1.9291, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8926598263614838, |
|
"grad_norm": 1.3195860385894775, |
|
"learning_rate": 1.7928671274382754e-05, |
|
"loss": 1.922, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.89344909234412, |
|
"grad_norm": 1.3526853322982788, |
|
"learning_rate": 1.7910137692995616e-05, |
|
"loss": 1.947, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8942383583267561, |
|
"grad_norm": 1.3031686544418335, |
|
"learning_rate": 1.7891599493965756e-05, |
|
"loss": 1.9207, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8950276243093923, |
|
"grad_norm": 0.967303991317749, |
|
"learning_rate": 1.7873056706708546e-05, |
|
"loss": 1.9416, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8958168902920284, |
|
"grad_norm": 1.5189329385757446, |
|
"learning_rate": 1.7854509360646627e-05, |
|
"loss": 1.9273, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8966061562746646, |
|
"grad_norm": 1.0588527917861938, |
|
"learning_rate": 1.7835957485209894e-05, |
|
"loss": 1.8705, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8973954222573007, |
|
"grad_norm": 1.061260461807251, |
|
"learning_rate": 1.7817401109835412e-05, |
|
"loss": 1.9345, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8981846882399369, |
|
"grad_norm": 1.0034102201461792, |
|
"learning_rate": 1.7798840263967405e-05, |
|
"loss": 1.9364, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.898973954222573, |
|
"grad_norm": 1.059441089630127, |
|
"learning_rate": 1.7780274977057162e-05, |
|
"loss": 1.8858, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8997632202052092, |
|
"grad_norm": 1.0713084936141968, |
|
"learning_rate": 1.776170527856304e-05, |
|
"loss": 1.9348, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9005524861878453, |
|
"grad_norm": 1.074656367301941, |
|
"learning_rate": 1.7743131197950405e-05, |
|
"loss": 1.878, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9013417521704814, |
|
"grad_norm": 1.2053810358047485, |
|
"learning_rate": 1.7724552764691545e-05, |
|
"loss": 1.9581, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9021310181531176, |
|
"grad_norm": 1.0136345624923706, |
|
"learning_rate": 1.7705970008265687e-05, |
|
"loss": 1.8901, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9029202841357538, |
|
"grad_norm": 1.1384106874465942, |
|
"learning_rate": 1.7687382958158893e-05, |
|
"loss": 1.8547, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.9037095501183899, |
|
"grad_norm": 1.0804816484451294, |
|
"learning_rate": 1.7668791643864056e-05, |
|
"loss": 1.9461, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.904498816101026, |
|
"grad_norm": 1.1892673969268799, |
|
"learning_rate": 1.7650196094880817e-05, |
|
"loss": 1.9242, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9052880820836622, |
|
"grad_norm": 1.0078619718551636, |
|
"learning_rate": 1.763159634071556e-05, |
|
"loss": 1.9495, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9060773480662984, |
|
"grad_norm": 0.978377103805542, |
|
"learning_rate": 1.7612992410881314e-05, |
|
"loss": 1.9139, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.9068666140489345, |
|
"grad_norm": 1.0232102870941162, |
|
"learning_rate": 1.7594384334897757e-05, |
|
"loss": 1.8801, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9076558800315706, |
|
"grad_norm": 1.1658730506896973, |
|
"learning_rate": 1.7575772142291136e-05, |
|
"loss": 1.9321, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9084451460142068, |
|
"grad_norm": 0.961025059223175, |
|
"learning_rate": 1.7557155862594232e-05, |
|
"loss": 1.9078, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.909234411996843, |
|
"grad_norm": 1.1790615320205688, |
|
"learning_rate": 1.7538535525346304e-05, |
|
"loss": 1.9617, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.9100236779794791, |
|
"grad_norm": 0.976462721824646, |
|
"learning_rate": 1.751991116009306e-05, |
|
"loss": 1.8995, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.9108129439621152, |
|
"grad_norm": 0.9908099174499512, |
|
"learning_rate": 1.7501282796386593e-05, |
|
"loss": 1.9021, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.9116022099447514, |
|
"grad_norm": 1.0563790798187256, |
|
"learning_rate": 1.748265046378535e-05, |
|
"loss": 1.9061, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9123914759273876, |
|
"grad_norm": 1.0215762853622437, |
|
"learning_rate": 1.7464014191854046e-05, |
|
"loss": 1.8833, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.9131807419100236, |
|
"grad_norm": 1.1506677865982056, |
|
"learning_rate": 1.744537401016369e-05, |
|
"loss": 1.8939, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.9139700078926598, |
|
"grad_norm": 1.1882095336914062, |
|
"learning_rate": 1.7426729948291474e-05, |
|
"loss": 1.9077, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.914759273875296, |
|
"grad_norm": 0.929111897945404, |
|
"learning_rate": 1.7408082035820733e-05, |
|
"loss": 1.854, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.9155485398579322, |
|
"grad_norm": 1.0913232564926147, |
|
"learning_rate": 1.7389430302340928e-05, |
|
"loss": 1.8809, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9163378058405682, |
|
"grad_norm": 0.9732852578163147, |
|
"learning_rate": 1.7370774777447583e-05, |
|
"loss": 1.8555, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9171270718232044, |
|
"grad_norm": 1.0545657873153687, |
|
"learning_rate": 1.7352115490742243e-05, |
|
"loss": 1.9186, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.9179163378058406, |
|
"grad_norm": 1.105906367301941, |
|
"learning_rate": 1.7333452471832403e-05, |
|
"loss": 1.9243, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.9187056037884768, |
|
"grad_norm": 1.3180484771728516, |
|
"learning_rate": 1.7314785750331486e-05, |
|
"loss": 1.9367, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9194948697711128, |
|
"grad_norm": 0.9373801350593567, |
|
"learning_rate": 1.7296115355858812e-05, |
|
"loss": 1.8631, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.920284135753749, |
|
"grad_norm": 1.2233669757843018, |
|
"learning_rate": 1.7277441318039503e-05, |
|
"loss": 1.9653, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.9210734017363852, |
|
"grad_norm": 1.0992927551269531, |
|
"learning_rate": 1.725876366650447e-05, |
|
"loss": 1.8945, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9218626677190213, |
|
"grad_norm": 1.4309226274490356, |
|
"learning_rate": 1.724008243089036e-05, |
|
"loss": 1.892, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.9226519337016574, |
|
"grad_norm": 1.1195532083511353, |
|
"learning_rate": 1.7221397640839516e-05, |
|
"loss": 1.9198, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.9234411996842936, |
|
"grad_norm": 1.1537940502166748, |
|
"learning_rate": 1.7202709325999893e-05, |
|
"loss": 1.9336, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9242304656669298, |
|
"grad_norm": 1.222401738166809, |
|
"learning_rate": 1.7184017516025075e-05, |
|
"loss": 1.878, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.9250197316495659, |
|
"grad_norm": 1.4775243997573853, |
|
"learning_rate": 1.7165322240574162e-05, |
|
"loss": 1.9012, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.925808997632202, |
|
"grad_norm": 1.2868552207946777, |
|
"learning_rate": 1.7146623529311772e-05, |
|
"loss": 1.9462, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9265982636148382, |
|
"grad_norm": 1.0581059455871582, |
|
"learning_rate": 1.7127921411907965e-05, |
|
"loss": 1.9898, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.9273875295974744, |
|
"grad_norm": 1.020804762840271, |
|
"learning_rate": 1.710921591803821e-05, |
|
"loss": 1.8327, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9281767955801105, |
|
"grad_norm": 1.3169434070587158, |
|
"learning_rate": 1.7090507077383332e-05, |
|
"loss": 1.9644, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9289660615627466, |
|
"grad_norm": 0.9919707179069519, |
|
"learning_rate": 1.7071794919629466e-05, |
|
"loss": 1.8822, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.9297553275453828, |
|
"grad_norm": 1.044602870941162, |
|
"learning_rate": 1.7053079474468006e-05, |
|
"loss": 1.941, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.930544593528019, |
|
"grad_norm": 0.9796439409255981, |
|
"learning_rate": 1.703436077159558e-05, |
|
"loss": 1.8714, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9313338595106551, |
|
"grad_norm": 1.1889114379882812, |
|
"learning_rate": 1.7015638840713954e-05, |
|
"loss": 1.9272, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9321231254932912, |
|
"grad_norm": 0.9672757983207703, |
|
"learning_rate": 1.699691371153005e-05, |
|
"loss": 1.9113, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.9329123914759274, |
|
"grad_norm": 1.0609270334243774, |
|
"learning_rate": 1.6978185413755844e-05, |
|
"loss": 1.9508, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9337016574585635, |
|
"grad_norm": 1.0816676616668701, |
|
"learning_rate": 1.6959453977108345e-05, |
|
"loss": 1.8926, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.9344909234411997, |
|
"grad_norm": 1.1348642110824585, |
|
"learning_rate": 1.694071943130954e-05, |
|
"loss": 1.9565, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.9352801894238358, |
|
"grad_norm": 0.9974650740623474, |
|
"learning_rate": 1.6921981806086354e-05, |
|
"loss": 1.969, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.936069455406472, |
|
"grad_norm": 1.0185261964797974, |
|
"learning_rate": 1.6903241131170597e-05, |
|
"loss": 1.8792, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.9368587213891081, |
|
"grad_norm": 1.1831624507904053, |
|
"learning_rate": 1.6884497436298918e-05, |
|
"loss": 1.9001, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9376479873717443, |
|
"grad_norm": 1.4525943994522095, |
|
"learning_rate": 1.6865750751212752e-05, |
|
"loss": 1.9121, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9384372533543804, |
|
"grad_norm": 1.0227642059326172, |
|
"learning_rate": 1.6847001105658296e-05, |
|
"loss": 1.9067, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.9392265193370166, |
|
"grad_norm": 1.0105981826782227, |
|
"learning_rate": 1.6828248529386418e-05, |
|
"loss": 1.9006, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9400157853196527, |
|
"grad_norm": 1.0613151788711548, |
|
"learning_rate": 1.6809493052152655e-05, |
|
"loss": 1.942, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9408050513022889, |
|
"grad_norm": 1.254150390625, |
|
"learning_rate": 1.6790734703717153e-05, |
|
"loss": 1.8991, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.941594317284925, |
|
"grad_norm": 1.0217347145080566, |
|
"learning_rate": 1.677197351384459e-05, |
|
"loss": 1.9152, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.9423835832675612, |
|
"grad_norm": 1.1719932556152344, |
|
"learning_rate": 1.6753209512304174e-05, |
|
"loss": 1.9262, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9431728492501973, |
|
"grad_norm": 1.062974452972412, |
|
"learning_rate": 1.6734442728869566e-05, |
|
"loss": 1.8823, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9439621152328335, |
|
"grad_norm": 1.2222181558609009, |
|
"learning_rate": 1.6715673193318834e-05, |
|
"loss": 2.0189, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9447513812154696, |
|
"grad_norm": 1.1024019718170166, |
|
"learning_rate": 1.669690093543443e-05, |
|
"loss": 1.909, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9455406471981057, |
|
"grad_norm": 1.2238271236419678, |
|
"learning_rate": 1.667812598500312e-05, |
|
"loss": 1.9175, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9463299131807419, |
|
"grad_norm": 1.004940152168274, |
|
"learning_rate": 1.6659348371815927e-05, |
|
"loss": 1.9056, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9471191791633781, |
|
"grad_norm": 0.9510829448699951, |
|
"learning_rate": 1.664056812566812e-05, |
|
"loss": 1.9269, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9479084451460142, |
|
"grad_norm": 0.9175992608070374, |
|
"learning_rate": 1.662178527635913e-05, |
|
"loss": 1.9337, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.9486977111286503, |
|
"grad_norm": 0.9384323358535767, |
|
"learning_rate": 1.6602999853692528e-05, |
|
"loss": 1.8387, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9494869771112865, |
|
"grad_norm": 1.3229904174804688, |
|
"learning_rate": 1.6584211887475968e-05, |
|
"loss": 1.9395, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9502762430939227, |
|
"grad_norm": 0.9942592978477478, |
|
"learning_rate": 1.6565421407521134e-05, |
|
"loss": 1.9253, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9510655090765588, |
|
"grad_norm": 1.01997709274292, |
|
"learning_rate": 1.65466284436437e-05, |
|
"loss": 1.9165, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9518547750591949, |
|
"grad_norm": 0.9606868028640747, |
|
"learning_rate": 1.6527833025663294e-05, |
|
"loss": 1.895, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9526440410418311, |
|
"grad_norm": 0.9341292381286621, |
|
"learning_rate": 1.650903518340342e-05, |
|
"loss": 1.9, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9534333070244673, |
|
"grad_norm": 1.058000922203064, |
|
"learning_rate": 1.6490234946691435e-05, |
|
"loss": 1.8823, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9542225730071034, |
|
"grad_norm": 0.9709818959236145, |
|
"learning_rate": 1.6471432345358498e-05, |
|
"loss": 1.9495, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.9550118389897395, |
|
"grad_norm": 0.9561473727226257, |
|
"learning_rate": 1.6452627409239523e-05, |
|
"loss": 1.8852, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9558011049723757, |
|
"grad_norm": 0.9571905732154846, |
|
"learning_rate": 1.6433820168173116e-05, |
|
"loss": 1.93, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9565903709550119, |
|
"grad_norm": 1.0493736267089844, |
|
"learning_rate": 1.6415010652001553e-05, |
|
"loss": 1.9129, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9573796369376479, |
|
"grad_norm": 0.975974977016449, |
|
"learning_rate": 1.6396198890570724e-05, |
|
"loss": 1.8683, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9581689029202841, |
|
"grad_norm": 1.1953823566436768, |
|
"learning_rate": 1.637738491373006e-05, |
|
"loss": 1.929, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9589581689029203, |
|
"grad_norm": 0.9741194248199463, |
|
"learning_rate": 1.6358568751332524e-05, |
|
"loss": 1.9201, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9597474348855565, |
|
"grad_norm": 1.1271497011184692, |
|
"learning_rate": 1.633975043323455e-05, |
|
"loss": 1.8898, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9605367008681925, |
|
"grad_norm": 1.1122384071350098, |
|
"learning_rate": 1.632092998929598e-05, |
|
"loss": 1.9652, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9613259668508287, |
|
"grad_norm": 0.9283862113952637, |
|
"learning_rate": 1.6302107449380042e-05, |
|
"loss": 1.917, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9621152328334649, |
|
"grad_norm": 1.0600271224975586, |
|
"learning_rate": 1.628328284335327e-05, |
|
"loss": 1.9492, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9629044988161011, |
|
"grad_norm": 1.1159858703613281, |
|
"learning_rate": 1.6264456201085506e-05, |
|
"loss": 1.9413, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9636937647987371, |
|
"grad_norm": 1.1544772386550903, |
|
"learning_rate": 1.6245627552449796e-05, |
|
"loss": 1.939, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9644830307813733, |
|
"grad_norm": 0.9870274662971497, |
|
"learning_rate": 1.622679692732238e-05, |
|
"loss": 1.926, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9652722967640095, |
|
"grad_norm": 0.9417735934257507, |
|
"learning_rate": 1.620796435558264e-05, |
|
"loss": 1.8375, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9660615627466457, |
|
"grad_norm": 1.059479832649231, |
|
"learning_rate": 1.618912986711304e-05, |
|
"loss": 1.913, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9668508287292817, |
|
"grad_norm": 0.9993748068809509, |
|
"learning_rate": 1.6170293491799083e-05, |
|
"loss": 1.836, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9676400947119179, |
|
"grad_norm": 1.0149083137512207, |
|
"learning_rate": 1.615145525952927e-05, |
|
"loss": 1.9319, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9684293606945541, |
|
"grad_norm": 1.4480969905853271, |
|
"learning_rate": 1.6132615200195044e-05, |
|
"loss": 1.9768, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9692186266771902, |
|
"grad_norm": 1.1640135049819946, |
|
"learning_rate": 1.611377334369076e-05, |
|
"loss": 1.8763, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9700078926598263, |
|
"grad_norm": 1.0086321830749512, |
|
"learning_rate": 1.6094929719913614e-05, |
|
"loss": 1.8939, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9707971586424625, |
|
"grad_norm": 1.4108259677886963, |
|
"learning_rate": 1.60760843587636e-05, |
|
"loss": 1.8972, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9715864246250987, |
|
"grad_norm": 1.1569288969039917, |
|
"learning_rate": 1.605723729014349e-05, |
|
"loss": 1.899, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9723756906077348, |
|
"grad_norm": 1.085387110710144, |
|
"learning_rate": 1.6038388543958734e-05, |
|
"loss": 1.91, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9731649565903709, |
|
"grad_norm": 6.412627696990967, |
|
"learning_rate": 1.6019538150117473e-05, |
|
"loss": 1.8456, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9739542225730071, |
|
"grad_norm": 1.1839845180511475, |
|
"learning_rate": 1.6000686138530452e-05, |
|
"loss": 1.8995, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.9747434885556433, |
|
"grad_norm": 1.0736435651779175, |
|
"learning_rate": 1.598183253911098e-05, |
|
"loss": 1.8954, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9755327545382794, |
|
"grad_norm": 1.1705999374389648, |
|
"learning_rate": 1.5962977381774883e-05, |
|
"loss": 1.8938, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9763220205209155, |
|
"grad_norm": 0.9498980045318604, |
|
"learning_rate": 1.5944120696440467e-05, |
|
"loss": 1.8688, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9771112865035517, |
|
"grad_norm": 1.0730433464050293, |
|
"learning_rate": 1.5925262513028463e-05, |
|
"loss": 1.9343, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9779005524861878, |
|
"grad_norm": 1.2047873735427856, |
|
"learning_rate": 1.590640286146197e-05, |
|
"loss": 1.8976, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.978689818468824, |
|
"grad_norm": 1.1192723512649536, |
|
"learning_rate": 1.5887541771666424e-05, |
|
"loss": 1.8691, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9794790844514601, |
|
"grad_norm": 1.0943949222564697, |
|
"learning_rate": 1.5868679273569543e-05, |
|
"loss": 1.8912, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9802683504340963, |
|
"grad_norm": 1.2639052867889404, |
|
"learning_rate": 1.5849815397101276e-05, |
|
"loss": 1.9344, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9810576164167324, |
|
"grad_norm": 0.9903395175933838, |
|
"learning_rate": 1.5830950172193756e-05, |
|
"loss": 1.8876, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9818468823993686, |
|
"grad_norm": 1.1683999300003052, |
|
"learning_rate": 1.5812083628781265e-05, |
|
"loss": 1.8467, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9826361483820047, |
|
"grad_norm": 0.9717577695846558, |
|
"learning_rate": 1.5793215796800167e-05, |
|
"loss": 1.9227, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9834254143646409, |
|
"grad_norm": 1.2117033004760742, |
|
"learning_rate": 1.5774346706188886e-05, |
|
"loss": 1.9106, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.984214680347277, |
|
"grad_norm": 1.0700875520706177, |
|
"learning_rate": 1.5755476386887828e-05, |
|
"loss": 1.9613, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9850039463299132, |
|
"grad_norm": 0.9804224967956543, |
|
"learning_rate": 1.5736604868839355e-05, |
|
"loss": 1.8936, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9857932123125493, |
|
"grad_norm": 0.9192711710929871, |
|
"learning_rate": 1.5717732181987723e-05, |
|
"loss": 1.9023, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9865824782951855, |
|
"grad_norm": 1.2049267292022705, |
|
"learning_rate": 1.5698858356279057e-05, |
|
"loss": 1.9268, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9873717442778216, |
|
"grad_norm": 1.061391830444336, |
|
"learning_rate": 1.5679983421661277e-05, |
|
"loss": 1.8722, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9881610102604578, |
|
"grad_norm": 1.2003884315490723, |
|
"learning_rate": 1.5661107408084073e-05, |
|
"loss": 1.8623, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.988950276243094, |
|
"grad_norm": 1.0538649559020996, |
|
"learning_rate": 1.564223034549883e-05, |
|
"loss": 1.8866, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.98973954222573, |
|
"grad_norm": 0.9783664345741272, |
|
"learning_rate": 1.5623352263858622e-05, |
|
"loss": 1.8931, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9905288082083662, |
|
"grad_norm": 1.0133979320526123, |
|
"learning_rate": 1.5604473193118124e-05, |
|
"loss": 1.8642, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9913180741910024, |
|
"grad_norm": 0.913181722164154, |
|
"learning_rate": 1.5585593163233572e-05, |
|
"loss": 1.8951, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.9921073401736386, |
|
"grad_norm": 0.8568845987319946, |
|
"learning_rate": 1.5566712204162744e-05, |
|
"loss": 1.8658, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9928966061562746, |
|
"grad_norm": 0.9925752282142639, |
|
"learning_rate": 1.5547830345864887e-05, |
|
"loss": 1.9071, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9936858721389108, |
|
"grad_norm": 1.0075455904006958, |
|
"learning_rate": 1.552894761830066e-05, |
|
"loss": 1.8626, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.994475138121547, |
|
"grad_norm": 0.9527983665466309, |
|
"learning_rate": 1.551006405143212e-05, |
|
"loss": 1.8863, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9952644041041832, |
|
"grad_norm": 0.9452120065689087, |
|
"learning_rate": 1.5491179675222645e-05, |
|
"loss": 1.8764, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9960536700868192, |
|
"grad_norm": 1.0155888795852661, |
|
"learning_rate": 1.5472294519636906e-05, |
|
"loss": 1.9361, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.9968429360694554, |
|
"grad_norm": 0.955237627029419, |
|
"learning_rate": 1.54534086146408e-05, |
|
"loss": 1.8533, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.9976322020520916, |
|
"grad_norm": 1.4082919359207153, |
|
"learning_rate": 1.5434521990201417e-05, |
|
"loss": 1.8902, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9984214680347278, |
|
"grad_norm": 0.9780258536338806, |
|
"learning_rate": 1.5415634676287e-05, |
|
"loss": 1.8814, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9992107340173638, |
|
"grad_norm": 1.0558334589004517, |
|
"learning_rate": 1.5396746702866863e-05, |
|
"loss": 1.9069, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1238291263580322, |
|
"learning_rate": 1.5377858099911384e-05, |
|
"loss": 1.9228, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 1.000789265982636, |
|
"grad_norm": 1.2115720510482788, |
|
"learning_rate": 1.5358968897391935e-05, |
|
"loss": 2.6076, |
|
"step": 1268 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 634, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.619513039696586e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|