{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5000236910684672, "eval_steps": 10553, "global_step": 10553, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.738213693437574e-05, "grad_norm": 0.265625, "learning_rate": 2e-05, "loss": 1.5032, "step": 1 }, { "epoch": 4.738213693437574e-05, "eval_loss": 1.6555964946746826, "eval_runtime": 1297.243, "eval_samples_per_second": 1.808, "eval_steps_per_second": 1.808, "step": 1 }, { "epoch": 9.476427386875148e-05, "grad_norm": 0.455078125, "learning_rate": 4e-05, "loss": 1.7934, "step": 2 }, { "epoch": 0.00014214641080312722, "grad_norm": 0.232421875, "learning_rate": 6e-05, "loss": 1.5472, "step": 3 }, { "epoch": 0.00018952854773750296, "grad_norm": 0.35546875, "learning_rate": 8e-05, "loss": 1.7256, "step": 4 }, { "epoch": 0.00023691068467187872, "grad_norm": 0.296875, "learning_rate": 0.0001, "loss": 1.2578, "step": 5 }, { "epoch": 0.00028429282160625445, "grad_norm": 0.2431640625, "learning_rate": 0.00012, "loss": 1.3301, "step": 6 }, { "epoch": 0.0003316749585406302, "grad_norm": 0.306640625, "learning_rate": 0.00014, "loss": 1.5469, "step": 7 }, { "epoch": 0.0003790570954750059, "grad_norm": 0.35546875, "learning_rate": 0.00016, "loss": 1.4694, "step": 8 }, { "epoch": 0.00042643923240938164, "grad_norm": 0.267578125, "learning_rate": 0.00018, "loss": 0.9671, "step": 9 }, { "epoch": 0.00047382136934375743, "grad_norm": 0.431640625, "learning_rate": 0.0002, "loss": 1.6308, "step": 10 }, { "epoch": 0.0005212035062781332, "grad_norm": 0.4453125, "learning_rate": 0.00019999999889105325, "loss": 1.5963, "step": 11 }, { "epoch": 0.0005685856432125089, "grad_norm": 0.5, "learning_rate": 0.00019999999556421307, "loss": 1.694, "step": 12 }, { "epoch": 0.0006159677801468846, "grad_norm": 1.3046875, "learning_rate": 0.00019999999001947948, "loss": 2.0886, "step": 13 }, { "epoch": 0.0006633499170812604, "grad_norm": 0.5703125, "learning_rate": 0.00019999998225685262, "loss": 1.9375, "step": 14 }, { "epoch": 0.0007107320540156361, "grad_norm": 0.396484375, "learning_rate": 0.00019999997227633268, "loss": 1.429, "step": 15 }, { "epoch": 0.0007581141909500118, "grad_norm": 0.458984375, "learning_rate": 0.00019999996007791988, "loss": 1.2525, "step": 16 }, { "epoch": 0.0008054963278843876, "grad_norm": 0.47265625, "learning_rate": 0.00019999994566161444, "loss": 0.9333, "step": 17 }, { "epoch": 0.0008528784648187633, "grad_norm": 0.75, "learning_rate": 0.00019999992902741678, "loss": 1.07, "step": 18 }, { "epoch": 0.000900260601753139, "grad_norm": 0.7890625, "learning_rate": 0.00019999991017532716, "loss": 1.0739, "step": 19 }, { "epoch": 0.0009476427386875149, "grad_norm": 0.640625, "learning_rate": 0.00019999988910534606, "loss": 0.9974, "step": 20 }, { "epoch": 0.0009950248756218905, "grad_norm": 0.416015625, "learning_rate": 0.00019999986581747394, "loss": 1.147, "step": 21 }, { "epoch": 0.0010424070125562663, "grad_norm": 0.43359375, "learning_rate": 0.0001999998403117113, "loss": 1.2521, "step": 22 }, { "epoch": 0.001089789149490642, "grad_norm": 1.4296875, "learning_rate": 0.00019999981258805874, "loss": 0.9572, "step": 23 }, { "epoch": 0.0011371712864250178, "grad_norm": 0.54296875, "learning_rate": 0.00019999978264651684, "loss": 1.0038, "step": 24 }, { "epoch": 0.0011845534233593934, "grad_norm": 0.53125, "learning_rate": 0.00019999975048708626, "loss": 1.1757, "step": 25 }, { "epoch": 0.0012319355602937693, "grad_norm": 0.6171875, "learning_rate": 0.00019999971610976775, "loss": 0.8609, "step": 26 }, { "epoch": 0.001279317697228145, "grad_norm": 0.734375, "learning_rate": 0.00019999967951456204, "loss": 1.7934, "step": 27 }, { "epoch": 0.0013266998341625207, "grad_norm": 0.51171875, "learning_rate": 0.00019999964070146998, "loss": 1.3382, "step": 28 }, { "epoch": 0.0013740819710968966, "grad_norm": 0.427734375, "learning_rate": 0.00019999959967049237, "loss": 1.0583, "step": 29 }, { "epoch": 0.0014214641080312722, "grad_norm": 0.482421875, "learning_rate": 0.00019999955642163015, "loss": 1.0192, "step": 30 }, { "epoch": 0.001468846244965648, "grad_norm": 0.45703125, "learning_rate": 0.00019999951095488433, "loss": 1.5542, "step": 31 }, { "epoch": 0.0015162283819000236, "grad_norm": 0.44921875, "learning_rate": 0.00019999946327025584, "loss": 1.3448, "step": 32 }, { "epoch": 0.0015636105188343995, "grad_norm": 0.474609375, "learning_rate": 0.00019999941336774576, "loss": 1.6881, "step": 33 }, { "epoch": 0.0016109926557687751, "grad_norm": 0.65625, "learning_rate": 0.00019999936124735524, "loss": 0.7713, "step": 34 }, { "epoch": 0.001658374792703151, "grad_norm": 0.44921875, "learning_rate": 0.00019999930690908535, "loss": 1.1188, "step": 35 }, { "epoch": 0.0017057569296375266, "grad_norm": 0.859375, "learning_rate": 0.00019999925035293738, "loss": 1.0596, "step": 36 }, { "epoch": 0.0017531390665719024, "grad_norm": 0.5546875, "learning_rate": 0.00019999919157891256, "loss": 1.3195, "step": 37 }, { "epoch": 0.001800521203506278, "grad_norm": 0.33984375, "learning_rate": 0.00019999913058701217, "loss": 0.9574, "step": 38 }, { "epoch": 0.0018479033404406539, "grad_norm": 0.93359375, "learning_rate": 0.00019999906737723757, "loss": 0.7413, "step": 39 }, { "epoch": 0.0018952854773750297, "grad_norm": 0.6875, "learning_rate": 0.00019999900194959017, "loss": 1.1054, "step": 40 }, { "epoch": 0.0019426676143094053, "grad_norm": 0.5234375, "learning_rate": 0.00019999893430407145, "loss": 0.8824, "step": 41 }, { "epoch": 0.001990049751243781, "grad_norm": 0.39453125, "learning_rate": 0.00019999886444068286, "loss": 1.0459, "step": 42 }, { "epoch": 0.002037431888178157, "grad_norm": 0.455078125, "learning_rate": 0.000199998792359426, "loss": 0.7952, "step": 43 }, { "epoch": 0.0020848140251125327, "grad_norm": 0.7265625, "learning_rate": 0.00019999871806030239, "loss": 1.1249, "step": 44 }, { "epoch": 0.0021321961620469083, "grad_norm": 0.462890625, "learning_rate": 0.00019999864154331376, "loss": 1.4, "step": 45 }, { "epoch": 0.002179578298981284, "grad_norm": 0.4296875, "learning_rate": 0.00019999856280846173, "loss": 1.5702, "step": 46 }, { "epoch": 0.00222696043591566, "grad_norm": 0.578125, "learning_rate": 0.00019999848185574815, "loss": 1.3921, "step": 47 }, { "epoch": 0.0022743425728500356, "grad_norm": 0.5546875, "learning_rate": 0.00019999839868517475, "loss": 1.0725, "step": 48 }, { "epoch": 0.002321724709784411, "grad_norm": 0.51171875, "learning_rate": 0.00019999831329674334, "loss": 1.0056, "step": 49 }, { "epoch": 0.002369106846718787, "grad_norm": 0.53125, "learning_rate": 0.00019999822569045589, "loss": 1.0241, "step": 50 }, { "epoch": 0.002416488983653163, "grad_norm": 0.498046875, "learning_rate": 0.00019999813586631427, "loss": 1.0889, "step": 51 }, { "epoch": 0.0024638711205875385, "grad_norm": 0.5234375, "learning_rate": 0.00019999804382432053, "loss": 1.1529, "step": 52 }, { "epoch": 0.002511253257521914, "grad_norm": 0.5234375, "learning_rate": 0.00019999794956447673, "loss": 1.4482, "step": 53 }, { "epoch": 0.00255863539445629, "grad_norm": 0.5390625, "learning_rate": 0.00019999785308678488, "loss": 0.7506, "step": 54 }, { "epoch": 0.002606017531390666, "grad_norm": 0.6015625, "learning_rate": 0.00019999775439124716, "loss": 0.9041, "step": 55 }, { "epoch": 0.0026533996683250414, "grad_norm": 0.57421875, "learning_rate": 0.00019999765347786578, "loss": 1.0598, "step": 56 }, { "epoch": 0.002700781805259417, "grad_norm": 0.64453125, "learning_rate": 0.00019999755034664295, "loss": 0.9802, "step": 57 }, { "epoch": 0.002748163942193793, "grad_norm": 0.5078125, "learning_rate": 0.00019999744499758096, "loss": 0.7913, "step": 58 }, { "epoch": 0.0027955460791281687, "grad_norm": 0.515625, "learning_rate": 0.00019999733743068215, "loss": 0.9532, "step": 59 }, { "epoch": 0.0028429282160625444, "grad_norm": 0.54296875, "learning_rate": 0.0001999972276459489, "loss": 1.6101, "step": 60 }, { "epoch": 0.00289031035299692, "grad_norm": 0.515625, "learning_rate": 0.00019999711564338367, "loss": 1.2862, "step": 61 }, { "epoch": 0.002937692489931296, "grad_norm": 0.72265625, "learning_rate": 0.0001999970014229889, "loss": 0.1047, "step": 62 }, { "epoch": 0.0029850746268656717, "grad_norm": 0.609375, "learning_rate": 0.0001999968849847672, "loss": 1.4375, "step": 63 }, { "epoch": 0.0030324567638000473, "grad_norm": 0.5390625, "learning_rate": 0.00019999676632872108, "loss": 1.3146, "step": 64 }, { "epoch": 0.003079838900734423, "grad_norm": 0.7890625, "learning_rate": 0.0001999966454548532, "loss": 0.2704, "step": 65 }, { "epoch": 0.003127221037668799, "grad_norm": 0.46875, "learning_rate": 0.0001999965223631662, "loss": 1.0367, "step": 66 }, { "epoch": 0.0031746031746031746, "grad_norm": 0.54296875, "learning_rate": 0.0001999963970536629, "loss": 0.7727, "step": 67 }, { "epoch": 0.0032219853115375502, "grad_norm": 0.43359375, "learning_rate": 0.00019999626952634599, "loss": 1.244, "step": 68 }, { "epoch": 0.0032693674484719263, "grad_norm": 0.45703125, "learning_rate": 0.00019999613978121834, "loss": 1.08, "step": 69 }, { "epoch": 0.003316749585406302, "grad_norm": 0.50390625, "learning_rate": 0.0001999960078182828, "loss": 1.1897, "step": 70 }, { "epoch": 0.0033641317223406775, "grad_norm": 0.51953125, "learning_rate": 0.00019999587363754234, "loss": 1.0152, "step": 71 }, { "epoch": 0.003411513859275053, "grad_norm": 0.50390625, "learning_rate": 0.00019999573723899992, "loss": 1.1051, "step": 72 }, { "epoch": 0.003458895996209429, "grad_norm": 0.421875, "learning_rate": 0.00019999559862265856, "loss": 0.8628, "step": 73 }, { "epoch": 0.003506278133143805, "grad_norm": 0.408203125, "learning_rate": 0.00019999545778852132, "loss": 0.9969, "step": 74 }, { "epoch": 0.0035536602700781805, "grad_norm": 0.458984375, "learning_rate": 0.00019999531473659135, "loss": 1.3045, "step": 75 }, { "epoch": 0.003601042407012556, "grad_norm": 0.59765625, "learning_rate": 0.0001999951694668718, "loss": 0.7187, "step": 76 }, { "epoch": 0.003648424543946932, "grad_norm": 0.53515625, "learning_rate": 0.0001999950219793659, "loss": 1.2484, "step": 77 }, { "epoch": 0.0036958066808813078, "grad_norm": 0.58984375, "learning_rate": 0.0001999948722740769, "loss": 1.1648, "step": 78 }, { "epoch": 0.0037431888178156834, "grad_norm": 0.73046875, "learning_rate": 0.0001999947203510082, "loss": 1.1182, "step": 79 }, { "epoch": 0.0037905709547500594, "grad_norm": 0.52734375, "learning_rate": 0.0001999945662101631, "loss": 0.9652, "step": 80 }, { "epoch": 0.003837953091684435, "grad_norm": 0.390625, "learning_rate": 0.00019999440985154498, "loss": 1.1329, "step": 81 }, { "epoch": 0.0038853352286188107, "grad_norm": 0.400390625, "learning_rate": 0.0001999942512751574, "loss": 0.6673, "step": 82 }, { "epoch": 0.003932717365553186, "grad_norm": 0.57421875, "learning_rate": 0.00019999409048100382, "loss": 0.743, "step": 83 }, { "epoch": 0.003980099502487562, "grad_norm": 0.400390625, "learning_rate": 0.0001999939274690878, "loss": 0.8757, "step": 84 }, { "epoch": 0.0040274816394219376, "grad_norm": 0.66015625, "learning_rate": 0.000199993762239413, "loss": 1.2065, "step": 85 }, { "epoch": 0.004074863776356314, "grad_norm": 0.56640625, "learning_rate": 0.0001999935947919831, "loss": 1.5393, "step": 86 }, { "epoch": 0.00412224591329069, "grad_norm": 0.302734375, "learning_rate": 0.00019999342512680172, "loss": 0.6055, "step": 87 }, { "epoch": 0.004169628050225065, "grad_norm": 0.388671875, "learning_rate": 0.0001999932532438727, "loss": 0.963, "step": 88 }, { "epoch": 0.004217010187159441, "grad_norm": 0.482421875, "learning_rate": 0.00019999307914319981, "loss": 0.5133, "step": 89 }, { "epoch": 0.0042643923240938165, "grad_norm": 0.56640625, "learning_rate": 0.00019999290282478698, "loss": 1.1098, "step": 90 }, { "epoch": 0.004311774461028192, "grad_norm": 0.515625, "learning_rate": 0.00019999272428863804, "loss": 0.9357, "step": 91 }, { "epoch": 0.004359156597962568, "grad_norm": 0.4375, "learning_rate": 0.00019999254353475702, "loss": 0.9475, "step": 92 }, { "epoch": 0.004406538734896943, "grad_norm": 0.56640625, "learning_rate": 0.00019999236056314783, "loss": 0.9209, "step": 93 }, { "epoch": 0.00445392087183132, "grad_norm": 0.71484375, "learning_rate": 0.00019999217537381464, "loss": 1.2401, "step": 94 }, { "epoch": 0.0045013030087656955, "grad_norm": 0.421875, "learning_rate": 0.0001999919879667615, "loss": 1.2056, "step": 95 }, { "epoch": 0.004548685145700071, "grad_norm": 0.484375, "learning_rate": 0.00019999179834199256, "loss": 0.8911, "step": 96 }, { "epoch": 0.004596067282634447, "grad_norm": 0.71484375, "learning_rate": 0.00019999160649951202, "loss": 1.5826, "step": 97 }, { "epoch": 0.004643449419568822, "grad_norm": 0.55078125, "learning_rate": 0.00019999141243932418, "loss": 1.5075, "step": 98 }, { "epoch": 0.004690831556503198, "grad_norm": 0.51171875, "learning_rate": 0.00019999121616143332, "loss": 1.2497, "step": 99 }, { "epoch": 0.004738213693437574, "grad_norm": 0.439453125, "learning_rate": 0.00019999101766584378, "loss": 1.0032, "step": 100 }, { "epoch": 0.00478559583037195, "grad_norm": 0.419921875, "learning_rate": 0.00019999081695255998, "loss": 1.241, "step": 101 }, { "epoch": 0.004832977967306326, "grad_norm": 0.6484375, "learning_rate": 0.00019999061402158636, "loss": 0.1521, "step": 102 }, { "epoch": 0.004880360104240701, "grad_norm": 0.42578125, "learning_rate": 0.00019999040887292745, "loss": 1.5866, "step": 103 }, { "epoch": 0.004927742241175077, "grad_norm": 0.41015625, "learning_rate": 0.00019999020150658774, "loss": 1.6, "step": 104 }, { "epoch": 0.004975124378109453, "grad_norm": 0.546875, "learning_rate": 0.00019998999192257188, "loss": 1.1149, "step": 105 }, { "epoch": 0.005022506515043828, "grad_norm": 0.447265625, "learning_rate": 0.0001999897801208845, "loss": 0.9298, "step": 106 }, { "epoch": 0.005069888651978204, "grad_norm": 0.359375, "learning_rate": 0.0001999895661015303, "loss": 1.2095, "step": 107 }, { "epoch": 0.00511727078891258, "grad_norm": 0.59375, "learning_rate": 0.00019998934986451404, "loss": 0.8567, "step": 108 }, { "epoch": 0.005164652925846956, "grad_norm": 0.421875, "learning_rate": 0.0001999891314098405, "loss": 0.9423, "step": 109 }, { "epoch": 0.005212035062781332, "grad_norm": 0.392578125, "learning_rate": 0.00019998891073751452, "loss": 0.8099, "step": 110 }, { "epoch": 0.005259417199715707, "grad_norm": 0.51171875, "learning_rate": 0.00019998868784754103, "loss": 1.0169, "step": 111 }, { "epoch": 0.005306799336650083, "grad_norm": 0.546875, "learning_rate": 0.00019998846273992492, "loss": 1.4406, "step": 112 }, { "epoch": 0.0053541814735844585, "grad_norm": 0.52734375, "learning_rate": 0.00019998823541467122, "loss": 0.9936, "step": 113 }, { "epoch": 0.005401563610518834, "grad_norm": 0.37890625, "learning_rate": 0.00019998800587178495, "loss": 1.3192, "step": 114 }, { "epoch": 0.00544894574745321, "grad_norm": 0.4296875, "learning_rate": 0.00019998777411127123, "loss": 0.8875, "step": 115 }, { "epoch": 0.005496327884387586, "grad_norm": 0.494140625, "learning_rate": 0.00019998754013313515, "loss": 0.9446, "step": 116 }, { "epoch": 0.005543710021321962, "grad_norm": 0.63671875, "learning_rate": 0.00019998730393738198, "loss": 1.1031, "step": 117 }, { "epoch": 0.0055910921582563375, "grad_norm": 0.4453125, "learning_rate": 0.0001999870655240169, "loss": 1.1828, "step": 118 }, { "epoch": 0.005638474295190713, "grad_norm": 0.48828125, "learning_rate": 0.00019998682489304515, "loss": 1.3674, "step": 119 }, { "epoch": 0.005685856432125089, "grad_norm": 0.50390625, "learning_rate": 0.00019998658204447217, "loss": 0.8399, "step": 120 }, { "epoch": 0.005733238569059464, "grad_norm": 0.5, "learning_rate": 0.0001999863369783033, "loss": 1.0854, "step": 121 }, { "epoch": 0.00578062070599384, "grad_norm": 0.59375, "learning_rate": 0.000199986089694544, "loss": 0.4969, "step": 122 }, { "epoch": 0.0058280028429282165, "grad_norm": 0.1982421875, "learning_rate": 0.0001999858401931997, "loss": 0.0215, "step": 123 }, { "epoch": 0.005875384979862592, "grad_norm": 0.70703125, "learning_rate": 0.00019998558847427597, "loss": 1.1571, "step": 124 }, { "epoch": 0.005922767116796968, "grad_norm": 0.515625, "learning_rate": 0.00019998533453777838, "loss": 1.3631, "step": 125 }, { "epoch": 0.005970149253731343, "grad_norm": 0.515625, "learning_rate": 0.0001999850783837126, "loss": 0.6508, "step": 126 }, { "epoch": 0.006017531390665719, "grad_norm": 0.63671875, "learning_rate": 0.00019998482001208425, "loss": 0.8974, "step": 127 }, { "epoch": 0.006064913527600095, "grad_norm": 1.078125, "learning_rate": 0.00019998455942289912, "loss": 1.0397, "step": 128 }, { "epoch": 0.00611229566453447, "grad_norm": 0.7578125, "learning_rate": 0.00019998429661616292, "loss": 0.8684, "step": 129 }, { "epoch": 0.006159677801468846, "grad_norm": 0.51953125, "learning_rate": 0.00019998403159188154, "loss": 1.4368, "step": 130 }, { "epoch": 0.006207059938403222, "grad_norm": 0.451171875, "learning_rate": 0.00019998376435006082, "loss": 0.6405, "step": 131 }, { "epoch": 0.006254442075337598, "grad_norm": 0.65234375, "learning_rate": 0.00019998349489070677, "loss": 0.6771, "step": 132 }, { "epoch": 0.006301824212271974, "grad_norm": 0.70703125, "learning_rate": 0.00019998322321382523, "loss": 1.0881, "step": 133 }, { "epoch": 0.006349206349206349, "grad_norm": 0.30859375, "learning_rate": 0.00019998294931942233, "loss": 0.6219, "step": 134 }, { "epoch": 0.006396588486140725, "grad_norm": 0.45703125, "learning_rate": 0.0001999826732075041, "loss": 0.9019, "step": 135 }, { "epoch": 0.0064439706230751004, "grad_norm": 0.47265625, "learning_rate": 0.00019998239487807666, "loss": 1.1105, "step": 136 }, { "epoch": 0.006491352760009476, "grad_norm": 0.50390625, "learning_rate": 0.00019998211433114622, "loss": 1.1331, "step": 137 }, { "epoch": 0.0065387348969438526, "grad_norm": 1.3828125, "learning_rate": 0.00019998183156671898, "loss": 0.9425, "step": 138 }, { "epoch": 0.006586117033878228, "grad_norm": 0.51171875, "learning_rate": 0.00019998154658480122, "loss": 0.9189, "step": 139 }, { "epoch": 0.006633499170812604, "grad_norm": 1.140625, "learning_rate": 0.00019998125938539924, "loss": 0.8435, "step": 140 }, { "epoch": 0.0066808813077469794, "grad_norm": 0.5625, "learning_rate": 0.0001999809699685194, "loss": 1.3486, "step": 141 }, { "epoch": 0.006728263444681355, "grad_norm": 0.408203125, "learning_rate": 0.00019998067833416815, "loss": 1.1616, "step": 142 }, { "epoch": 0.006775645581615731, "grad_norm": 0.8515625, "learning_rate": 0.00019998038448235195, "loss": 1.0803, "step": 143 }, { "epoch": 0.006823027718550106, "grad_norm": 0.703125, "learning_rate": 0.00019998008841307736, "loss": 0.9512, "step": 144 }, { "epoch": 0.006870409855484482, "grad_norm": 1.046875, "learning_rate": 0.00019997979012635085, "loss": 0.2863, "step": 145 }, { "epoch": 0.006917791992418858, "grad_norm": 0.41796875, "learning_rate": 0.00019997948962217912, "loss": 0.7258, "step": 146 }, { "epoch": 0.006965174129353234, "grad_norm": 0.43359375, "learning_rate": 0.0001999791869005688, "loss": 1.2192, "step": 147 }, { "epoch": 0.00701255626628761, "grad_norm": 0.58203125, "learning_rate": 0.0001999788819615266, "loss": 1.2817, "step": 148 }, { "epoch": 0.007059938403221985, "grad_norm": 0.419921875, "learning_rate": 0.00019997857480505928, "loss": 0.8469, "step": 149 }, { "epoch": 0.007107320540156361, "grad_norm": 0.51171875, "learning_rate": 0.0001999782654311737, "loss": 0.6144, "step": 150 }, { "epoch": 0.0071547026770907365, "grad_norm": 0.466796875, "learning_rate": 0.00019997795383987663, "loss": 1.7007, "step": 151 }, { "epoch": 0.007202084814025112, "grad_norm": 0.380859375, "learning_rate": 0.00019997764003117509, "loss": 0.8697, "step": 152 }, { "epoch": 0.007249466950959489, "grad_norm": 0.35546875, "learning_rate": 0.00019997732400507597, "loss": 1.0246, "step": 153 }, { "epoch": 0.007296849087893864, "grad_norm": 0.4453125, "learning_rate": 0.00019997700576158628, "loss": 0.9222, "step": 154 }, { "epoch": 0.00734423122482824, "grad_norm": 0.83984375, "learning_rate": 0.00019997668530071308, "loss": 1.0439, "step": 155 }, { "epoch": 0.0073916133617626155, "grad_norm": 0.439453125, "learning_rate": 0.0001999763626224635, "loss": 1.2715, "step": 156 }, { "epoch": 0.007438995498696991, "grad_norm": 0.33203125, "learning_rate": 0.0001999760377268447, "loss": 0.5978, "step": 157 }, { "epoch": 0.007486377635631367, "grad_norm": 0.423828125, "learning_rate": 0.00019997571061386386, "loss": 1.2903, "step": 158 }, { "epoch": 0.007533759772565742, "grad_norm": 0.5234375, "learning_rate": 0.00019997538128352826, "loss": 0.6958, "step": 159 }, { "epoch": 0.007581141909500119, "grad_norm": 0.48828125, "learning_rate": 0.0001999750497358452, "loss": 1.2971, "step": 160 }, { "epoch": 0.0076285240464344945, "grad_norm": 0.62890625, "learning_rate": 0.000199974715970822, "loss": 0.7183, "step": 161 }, { "epoch": 0.00767590618336887, "grad_norm": 0.5703125, "learning_rate": 0.0001999743799884661, "loss": 1.6736, "step": 162 }, { "epoch": 0.007723288320303246, "grad_norm": 0.376953125, "learning_rate": 0.00019997404178878495, "loss": 0.7535, "step": 163 }, { "epoch": 0.007770670457237621, "grad_norm": 0.423828125, "learning_rate": 0.00019997370137178603, "loss": 1.0898, "step": 164 }, { "epoch": 0.007818052594171997, "grad_norm": 0.298828125, "learning_rate": 0.0001999733587374769, "loss": 0.5674, "step": 165 }, { "epoch": 0.007865434731106373, "grad_norm": 0.58203125, "learning_rate": 0.00019997301388586519, "loss": 0.8733, "step": 166 }, { "epoch": 0.007912816868040748, "grad_norm": 0.890625, "learning_rate": 0.00019997266681695845, "loss": 0.6212, "step": 167 }, { "epoch": 0.007960199004975124, "grad_norm": 0.435546875, "learning_rate": 0.00019997231753076452, "loss": 1.2933, "step": 168 }, { "epoch": 0.0080075811419095, "grad_norm": 0.75, "learning_rate": 0.00019997196602729102, "loss": 0.9396, "step": 169 }, { "epoch": 0.008054963278843875, "grad_norm": 0.482421875, "learning_rate": 0.0001999716123065458, "loss": 0.7431, "step": 170 }, { "epoch": 0.00810234541577825, "grad_norm": 0.9296875, "learning_rate": 0.00019997125636853676, "loss": 1.022, "step": 171 }, { "epoch": 0.008149727552712628, "grad_norm": 0.3359375, "learning_rate": 0.00019997089821327172, "loss": 0.5743, "step": 172 }, { "epoch": 0.008197109689647004, "grad_norm": 0.58203125, "learning_rate": 0.00019997053784075858, "loss": 0.4988, "step": 173 }, { "epoch": 0.00824449182658138, "grad_norm": 0.53125, "learning_rate": 0.00019997017525100546, "loss": 0.9723, "step": 174 }, { "epoch": 0.008291873963515755, "grad_norm": 0.52734375, "learning_rate": 0.00019996981044402033, "loss": 1.2186, "step": 175 }, { "epoch": 0.00833925610045013, "grad_norm": 0.5234375, "learning_rate": 0.00019996944341981124, "loss": 0.628, "step": 176 }, { "epoch": 0.008386638237384506, "grad_norm": 0.3203125, "learning_rate": 0.00019996907417838642, "loss": 1.0701, "step": 177 }, { "epoch": 0.008434020374318882, "grad_norm": 0.400390625, "learning_rate": 0.00019996870271975402, "loss": 0.8977, "step": 178 }, { "epoch": 0.008481402511253257, "grad_norm": 0.4375, "learning_rate": 0.00019996832904392226, "loss": 1.2282, "step": 179 }, { "epoch": 0.008528784648187633, "grad_norm": 0.44140625, "learning_rate": 0.0001999679531508994, "loss": 1.3241, "step": 180 }, { "epoch": 0.008576166785122009, "grad_norm": 0.466796875, "learning_rate": 0.0001999675750406939, "loss": 0.7242, "step": 181 }, { "epoch": 0.008623548922056384, "grad_norm": 0.50390625, "learning_rate": 0.00019996719471331403, "loss": 0.5549, "step": 182 }, { "epoch": 0.00867093105899076, "grad_norm": 0.412109375, "learning_rate": 0.00019996681216876826, "loss": 0.8259, "step": 183 }, { "epoch": 0.008718313195925136, "grad_norm": 0.5, "learning_rate": 0.00019996642740706508, "loss": 1.0661, "step": 184 }, { "epoch": 0.008765695332859511, "grad_norm": 0.45703125, "learning_rate": 0.000199966040428213, "loss": 1.3544, "step": 185 }, { "epoch": 0.008813077469793887, "grad_norm": 0.515625, "learning_rate": 0.00019996565123222066, "loss": 1.1912, "step": 186 }, { "epoch": 0.008860459606728264, "grad_norm": 0.44140625, "learning_rate": 0.00019996525981909663, "loss": 0.4306, "step": 187 }, { "epoch": 0.00890784174366264, "grad_norm": 0.734375, "learning_rate": 0.0001999648661888496, "loss": 0.7892, "step": 188 }, { "epoch": 0.008955223880597015, "grad_norm": 0.455078125, "learning_rate": 0.00019996447034148837, "loss": 1.3539, "step": 189 }, { "epoch": 0.009002606017531391, "grad_norm": 0.486328125, "learning_rate": 0.00019996407227702162, "loss": 1.5831, "step": 190 }, { "epoch": 0.009049988154465767, "grad_norm": 0.57421875, "learning_rate": 0.00019996367199545824, "loss": 0.1309, "step": 191 }, { "epoch": 0.009097370291400142, "grad_norm": 0.310546875, "learning_rate": 0.00019996326949680708, "loss": 1.0598, "step": 192 }, { "epoch": 0.009144752428334518, "grad_norm": 0.40234375, "learning_rate": 0.00019996286478107708, "loss": 0.7607, "step": 193 }, { "epoch": 0.009192134565268894, "grad_norm": 0.48828125, "learning_rate": 0.00019996245784827723, "loss": 0.9167, "step": 194 }, { "epoch": 0.00923951670220327, "grad_norm": 0.396484375, "learning_rate": 0.00019996204869841654, "loss": 0.5669, "step": 195 }, { "epoch": 0.009286898839137645, "grad_norm": 0.42578125, "learning_rate": 0.00019996163733150408, "loss": 0.2394, "step": 196 }, { "epoch": 0.00933428097607202, "grad_norm": 0.515625, "learning_rate": 0.00019996122374754896, "loss": 0.5832, "step": 197 }, { "epoch": 0.009381663113006396, "grad_norm": 0.365234375, "learning_rate": 0.00019996080794656038, "loss": 0.8465, "step": 198 }, { "epoch": 0.009429045249940772, "grad_norm": 0.6875, "learning_rate": 0.00019996038992854757, "loss": 0.631, "step": 199 }, { "epoch": 0.009476427386875147, "grad_norm": 0.6484375, "learning_rate": 0.00019995996969351978, "loss": 1.4833, "step": 200 }, { "epoch": 0.009523809523809525, "grad_norm": 0.5234375, "learning_rate": 0.0001999595472414863, "loss": 1.349, "step": 201 }, { "epoch": 0.0095711916607439, "grad_norm": 0.431640625, "learning_rate": 0.0001999591225724566, "loss": 0.9398, "step": 202 }, { "epoch": 0.009618573797678276, "grad_norm": 0.396484375, "learning_rate": 0.00019995869568643996, "loss": 0.6837, "step": 203 }, { "epoch": 0.009665955934612652, "grad_norm": 0.37890625, "learning_rate": 0.000199958266583446, "loss": 0.6042, "step": 204 }, { "epoch": 0.009713338071547027, "grad_norm": 0.421875, "learning_rate": 0.0001999578352634841, "loss": 1.0077, "step": 205 }, { "epoch": 0.009760720208481403, "grad_norm": 1.3828125, "learning_rate": 0.00019995740172656386, "loss": 0.671, "step": 206 }, { "epoch": 0.009808102345415778, "grad_norm": 1.046875, "learning_rate": 0.00019995696597269498, "loss": 0.7752, "step": 207 }, { "epoch": 0.009855484482350154, "grad_norm": 0.52734375, "learning_rate": 0.00019995652800188705, "loss": 1.3379, "step": 208 }, { "epoch": 0.00990286661928453, "grad_norm": 1.5234375, "learning_rate": 0.00019995608781414977, "loss": 1.0481, "step": 209 }, { "epoch": 0.009950248756218905, "grad_norm": 0.4609375, "learning_rate": 0.00019995564540949297, "loss": 1.1031, "step": 210 }, { "epoch": 0.009997630893153281, "grad_norm": 0.49609375, "learning_rate": 0.0001999552007879264, "loss": 0.9984, "step": 211 }, { "epoch": 0.010045013030087657, "grad_norm": 0.498046875, "learning_rate": 0.00019995475394945996, "loss": 1.442, "step": 212 }, { "epoch": 0.010092395167022032, "grad_norm": 0.58203125, "learning_rate": 0.00019995430489410353, "loss": 1.1315, "step": 213 }, { "epoch": 0.010139777303956408, "grad_norm": 0.5, "learning_rate": 0.00019995385362186707, "loss": 1.4601, "step": 214 }, { "epoch": 0.010187159440890783, "grad_norm": 0.48828125, "learning_rate": 0.00019995340013276064, "loss": 1.091, "step": 215 }, { "epoch": 0.01023454157782516, "grad_norm": 0.412109375, "learning_rate": 0.00019995294442679421, "loss": 0.94, "step": 216 }, { "epoch": 0.010281923714759536, "grad_norm": 0.55078125, "learning_rate": 0.00019995248650397799, "loss": 1.067, "step": 217 }, { "epoch": 0.010329305851693912, "grad_norm": 0.84765625, "learning_rate": 0.00019995202636432203, "loss": 1.0678, "step": 218 }, { "epoch": 0.010376687988628288, "grad_norm": 0.49609375, "learning_rate": 0.00019995156400783663, "loss": 0.968, "step": 219 }, { "epoch": 0.010424070125562663, "grad_norm": 0.52734375, "learning_rate": 0.00019995109943453198, "loss": 1.2878, "step": 220 }, { "epoch": 0.010471452262497039, "grad_norm": 0.408203125, "learning_rate": 0.00019995063264441844, "loss": 1.0963, "step": 221 }, { "epoch": 0.010518834399431415, "grad_norm": 0.50390625, "learning_rate": 0.0001999501636375063, "loss": 1.3468, "step": 222 }, { "epoch": 0.01056621653636579, "grad_norm": 0.375, "learning_rate": 0.000199949692413806, "loss": 0.6686, "step": 223 }, { "epoch": 0.010613598673300166, "grad_norm": 0.4453125, "learning_rate": 0.000199949218973328, "loss": 1.2847, "step": 224 }, { "epoch": 0.010660980810234541, "grad_norm": 0.5, "learning_rate": 0.0001999487433160828, "loss": 1.4145, "step": 225 }, { "epoch": 0.010708362947168917, "grad_norm": 0.58203125, "learning_rate": 0.00019994826544208086, "loss": 1.1932, "step": 226 }, { "epoch": 0.010755745084103293, "grad_norm": 0.57421875, "learning_rate": 0.00019994778535133292, "loss": 1.2457, "step": 227 }, { "epoch": 0.010803127221037668, "grad_norm": 0.388671875, "learning_rate": 0.00019994730304384955, "loss": 0.5284, "step": 228 }, { "epoch": 0.010850509357972044, "grad_norm": 0.625, "learning_rate": 0.00019994681851964144, "loss": 1.2445, "step": 229 }, { "epoch": 0.01089789149490642, "grad_norm": 0.447265625, "learning_rate": 0.00019994633177871935, "loss": 1.1842, "step": 230 }, { "epoch": 0.010945273631840797, "grad_norm": 0.482421875, "learning_rate": 0.0001999458428210941, "loss": 1.0567, "step": 231 }, { "epoch": 0.010992655768775172, "grad_norm": 0.59375, "learning_rate": 0.00019994535164677651, "loss": 0.9081, "step": 232 }, { "epoch": 0.011040037905709548, "grad_norm": 0.390625, "learning_rate": 0.00019994485825577748, "loss": 0.788, "step": 233 }, { "epoch": 0.011087420042643924, "grad_norm": 0.44921875, "learning_rate": 0.0001999443626481079, "loss": 1.2978, "step": 234 }, { "epoch": 0.0111348021795783, "grad_norm": 0.498046875, "learning_rate": 0.00019994386482377887, "loss": 1.8736, "step": 235 }, { "epoch": 0.011182184316512675, "grad_norm": 0.400390625, "learning_rate": 0.00019994336478280138, "loss": 0.6525, "step": 236 }, { "epoch": 0.01122956645344705, "grad_norm": 0.4765625, "learning_rate": 0.00019994286252518646, "loss": 1.6496, "step": 237 }, { "epoch": 0.011276948590381426, "grad_norm": 0.53125, "learning_rate": 0.00019994235805094536, "loss": 0.8822, "step": 238 }, { "epoch": 0.011324330727315802, "grad_norm": 0.68359375, "learning_rate": 0.00019994185136008917, "loss": 0.8822, "step": 239 }, { "epoch": 0.011371712864250177, "grad_norm": 0.435546875, "learning_rate": 0.00019994134245262922, "loss": 1.0044, "step": 240 }, { "epoch": 0.011419095001184553, "grad_norm": 0.41796875, "learning_rate": 0.0001999408313285767, "loss": 1.0126, "step": 241 }, { "epoch": 0.011466477138118929, "grad_norm": 0.369140625, "learning_rate": 0.00019994031798794298, "loss": 1.2154, "step": 242 }, { "epoch": 0.011513859275053304, "grad_norm": 0.482421875, "learning_rate": 0.0001999398024307395, "loss": 1.3496, "step": 243 }, { "epoch": 0.01156124141198768, "grad_norm": 0.34375, "learning_rate": 0.00019993928465697765, "loss": 1.4015, "step": 244 }, { "epoch": 0.011608623548922056, "grad_norm": 0.44140625, "learning_rate": 0.0001999387646666689, "loss": 1.2544, "step": 245 }, { "epoch": 0.011656005685856433, "grad_norm": 0.330078125, "learning_rate": 0.0001999382424598248, "loss": 0.6131, "step": 246 }, { "epoch": 0.011703387822790809, "grad_norm": 0.58984375, "learning_rate": 0.00019993771803645695, "loss": 0.7921, "step": 247 }, { "epoch": 0.011750769959725184, "grad_norm": 0.48046875, "learning_rate": 0.00019993719139657694, "loss": 1.1962, "step": 248 }, { "epoch": 0.01179815209665956, "grad_norm": 0.4609375, "learning_rate": 0.00019993666254019648, "loss": 1.268, "step": 249 }, { "epoch": 0.011845534233593935, "grad_norm": 0.458984375, "learning_rate": 0.0001999361314673273, "loss": 0.8002, "step": 250 }, { "epoch": 0.011892916370528311, "grad_norm": 0.5, "learning_rate": 0.00019993559817798118, "loss": 0.6962, "step": 251 }, { "epoch": 0.011940298507462687, "grad_norm": 0.50390625, "learning_rate": 0.00019993506267216993, "loss": 0.9077, "step": 252 }, { "epoch": 0.011987680644397062, "grad_norm": 0.6640625, "learning_rate": 0.00019993452494990543, "loss": 0.9264, "step": 253 }, { "epoch": 0.012035062781331438, "grad_norm": 0.375, "learning_rate": 0.0001999339850111996, "loss": 1.1098, "step": 254 }, { "epoch": 0.012082444918265814, "grad_norm": 0.35546875, "learning_rate": 0.00019993344285606447, "loss": 1.0, "step": 255 }, { "epoch": 0.01212982705520019, "grad_norm": 0.404296875, "learning_rate": 0.00019993289848451197, "loss": 1.2045, "step": 256 }, { "epoch": 0.012177209192134565, "grad_norm": 0.453125, "learning_rate": 0.00019993235189655426, "loss": 0.9974, "step": 257 }, { "epoch": 0.01222459132906894, "grad_norm": 0.70703125, "learning_rate": 0.0001999318030922034, "loss": 0.9157, "step": 258 }, { "epoch": 0.012271973466003316, "grad_norm": 0.73046875, "learning_rate": 0.0001999312520714716, "loss": 0.7541, "step": 259 }, { "epoch": 0.012319355602937692, "grad_norm": 0.498046875, "learning_rate": 0.0001999306988343711, "loss": 0.8848, "step": 260 }, { "epoch": 0.012366737739872069, "grad_norm": 0.4375, "learning_rate": 0.00019993014338091412, "loss": 1.1219, "step": 261 }, { "epoch": 0.012414119876806445, "grad_norm": 0.51953125, "learning_rate": 0.000199929585711113, "loss": 1.0632, "step": 262 }, { "epoch": 0.01246150201374082, "grad_norm": 0.4140625, "learning_rate": 0.0001999290258249801, "loss": 0.7072, "step": 263 }, { "epoch": 0.012508884150675196, "grad_norm": 0.384765625, "learning_rate": 0.00019992846372252787, "loss": 1.2486, "step": 264 }, { "epoch": 0.012556266287609572, "grad_norm": 0.478515625, "learning_rate": 0.00019992789940376872, "loss": 0.6055, "step": 265 }, { "epoch": 0.012603648424543947, "grad_norm": 0.61328125, "learning_rate": 0.00019992733286871523, "loss": 0.7229, "step": 266 }, { "epoch": 0.012651030561478323, "grad_norm": 0.76953125, "learning_rate": 0.00019992676411737992, "loss": 0.4119, "step": 267 }, { "epoch": 0.012698412698412698, "grad_norm": 0.384765625, "learning_rate": 0.00019992619314977543, "loss": 1.1707, "step": 268 }, { "epoch": 0.012745794835347074, "grad_norm": 0.68359375, "learning_rate": 0.0001999256199659144, "loss": 0.8128, "step": 269 }, { "epoch": 0.01279317697228145, "grad_norm": 0.46875, "learning_rate": 0.0001999250445658096, "loss": 1.109, "step": 270 }, { "epoch": 0.012840559109215825, "grad_norm": 0.462890625, "learning_rate": 0.00019992446694947367, "loss": 1.1046, "step": 271 }, { "epoch": 0.012887941246150201, "grad_norm": 0.64453125, "learning_rate": 0.00019992388711691955, "loss": 0.9036, "step": 272 }, { "epoch": 0.012935323383084577, "grad_norm": 0.443359375, "learning_rate": 0.00019992330506816001, "loss": 1.2072, "step": 273 }, { "epoch": 0.012982705520018952, "grad_norm": 1.0859375, "learning_rate": 0.00019992272080320803, "loss": 0.0806, "step": 274 }, { "epoch": 0.013030087656953328, "grad_norm": 0.59375, "learning_rate": 0.00019992213432207655, "loss": 1.1417, "step": 275 }, { "epoch": 0.013077469793887705, "grad_norm": 0.609375, "learning_rate": 0.00019992154562477855, "loss": 1.5429, "step": 276 }, { "epoch": 0.01312485193082208, "grad_norm": 0.41015625, "learning_rate": 0.00019992095471132705, "loss": 0.7147, "step": 277 }, { "epoch": 0.013172234067756456, "grad_norm": 0.42578125, "learning_rate": 0.00019992036158173525, "loss": 1.4001, "step": 278 }, { "epoch": 0.013219616204690832, "grad_norm": 0.43359375, "learning_rate": 0.00019991976623601624, "loss": 0.8493, "step": 279 }, { "epoch": 0.013266998341625208, "grad_norm": 0.466796875, "learning_rate": 0.00019991916867418327, "loss": 0.8411, "step": 280 }, { "epoch": 0.013314380478559583, "grad_norm": 0.98046875, "learning_rate": 0.00019991856889624957, "loss": 1.1472, "step": 281 }, { "epoch": 0.013361762615493959, "grad_norm": 0.6171875, "learning_rate": 0.00019991796690222843, "loss": 1.1025, "step": 282 }, { "epoch": 0.013409144752428334, "grad_norm": 0.53515625, "learning_rate": 0.00019991736269213322, "loss": 1.2855, "step": 283 }, { "epoch": 0.01345652688936271, "grad_norm": 0.462890625, "learning_rate": 0.0001999167562659773, "loss": 0.8279, "step": 284 }, { "epoch": 0.013503909026297086, "grad_norm": 0.515625, "learning_rate": 0.00019991614762377417, "loss": 1.1669, "step": 285 }, { "epoch": 0.013551291163231461, "grad_norm": 0.7734375, "learning_rate": 0.00019991553676553734, "loss": 0.5494, "step": 286 }, { "epoch": 0.013598673300165837, "grad_norm": 0.423828125, "learning_rate": 0.0001999149236912803, "loss": 0.7617, "step": 287 }, { "epoch": 0.013646055437100213, "grad_norm": 0.40234375, "learning_rate": 0.00019991430840101668, "loss": 0.9924, "step": 288 }, { "epoch": 0.013693437574034588, "grad_norm": 0.462890625, "learning_rate": 0.00019991369089476013, "loss": 0.924, "step": 289 }, { "epoch": 0.013740819710968964, "grad_norm": 0.48828125, "learning_rate": 0.00019991307117252433, "loss": 1.1997, "step": 290 }, { "epoch": 0.013788201847903341, "grad_norm": 0.62890625, "learning_rate": 0.00019991244923432303, "loss": 0.4997, "step": 291 }, { "epoch": 0.013835583984837717, "grad_norm": 0.4921875, "learning_rate": 0.00019991182508017006, "loss": 0.5573, "step": 292 }, { "epoch": 0.013882966121772092, "grad_norm": 0.59765625, "learning_rate": 0.0001999111987100792, "loss": 1.2159, "step": 293 }, { "epoch": 0.013930348258706468, "grad_norm": 0.80859375, "learning_rate": 0.0001999105701240644, "loss": 0.5836, "step": 294 }, { "epoch": 0.013977730395640844, "grad_norm": 0.30078125, "learning_rate": 0.00019990993932213956, "loss": 0.5885, "step": 295 }, { "epoch": 0.01402511253257522, "grad_norm": 0.25390625, "learning_rate": 0.00019990930630431865, "loss": 0.0234, "step": 296 }, { "epoch": 0.014072494669509595, "grad_norm": 0.443359375, "learning_rate": 0.0001999086710706158, "loss": 1.3111, "step": 297 }, { "epoch": 0.01411987680644397, "grad_norm": 0.490234375, "learning_rate": 0.000199908033621045, "loss": 0.678, "step": 298 }, { "epoch": 0.014167258943378346, "grad_norm": 0.419921875, "learning_rate": 0.00019990739395562047, "loss": 1.1531, "step": 299 }, { "epoch": 0.014214641080312722, "grad_norm": 0.28125, "learning_rate": 0.00019990675207435634, "loss": 0.0211, "step": 300 }, { "epoch": 0.014262023217247097, "grad_norm": 0.875, "learning_rate": 0.00019990610797726688, "loss": 0.8344, "step": 301 }, { "epoch": 0.014309405354181473, "grad_norm": 0.43359375, "learning_rate": 0.00019990546166436635, "loss": 1.1513, "step": 302 }, { "epoch": 0.014356787491115849, "grad_norm": 0.56640625, "learning_rate": 0.00019990481313566906, "loss": 1.1517, "step": 303 }, { "epoch": 0.014404169628050224, "grad_norm": 0.478515625, "learning_rate": 0.0001999041623911895, "loss": 1.2003, "step": 304 }, { "epoch": 0.0144515517649846, "grad_norm": 0.359375, "learning_rate": 0.00019990350943094196, "loss": 0.6667, "step": 305 }, { "epoch": 0.014498933901918977, "grad_norm": 0.49609375, "learning_rate": 0.00019990285425494105, "loss": 1.274, "step": 306 }, { "epoch": 0.014546316038853353, "grad_norm": 0.515625, "learning_rate": 0.0001999021968632012, "loss": 1.4139, "step": 307 }, { "epoch": 0.014593698175787729, "grad_norm": 0.5546875, "learning_rate": 0.00019990153725573705, "loss": 0.7986, "step": 308 }, { "epoch": 0.014641080312722104, "grad_norm": 0.3359375, "learning_rate": 0.00019990087543256323, "loss": 0.6945, "step": 309 }, { "epoch": 0.01468846244965648, "grad_norm": 0.609375, "learning_rate": 0.00019990021139369436, "loss": 0.6369, "step": 310 }, { "epoch": 0.014735844586590855, "grad_norm": 0.50390625, "learning_rate": 0.00019989954513914527, "loss": 0.8179, "step": 311 }, { "epoch": 0.014783226723525231, "grad_norm": 0.435546875, "learning_rate": 0.00019989887666893062, "loss": 0.7585, "step": 312 }, { "epoch": 0.014830608860459607, "grad_norm": 0.388671875, "learning_rate": 0.00019989820598306532, "loss": 1.2037, "step": 313 }, { "epoch": 0.014877990997393982, "grad_norm": 0.9453125, "learning_rate": 0.00019989753308156423, "loss": 0.9347, "step": 314 }, { "epoch": 0.014925373134328358, "grad_norm": 0.44921875, "learning_rate": 0.00019989685796444225, "loss": 1.1497, "step": 315 }, { "epoch": 0.014972755271262734, "grad_norm": 0.890625, "learning_rate": 0.00019989618063171436, "loss": 0.8473, "step": 316 }, { "epoch": 0.01502013740819711, "grad_norm": 0.8671875, "learning_rate": 0.0001998955010833956, "loss": 0.8583, "step": 317 }, { "epoch": 0.015067519545131485, "grad_norm": 0.69921875, "learning_rate": 0.00019989481931950102, "loss": 0.7101, "step": 318 }, { "epoch": 0.01511490168206586, "grad_norm": 0.453125, "learning_rate": 0.00019989413534004575, "loss": 0.9949, "step": 319 }, { "epoch": 0.015162283819000238, "grad_norm": 0.515625, "learning_rate": 0.00019989344914504497, "loss": 1.3381, "step": 320 }, { "epoch": 0.015209665955934613, "grad_norm": 0.390625, "learning_rate": 0.0001998927607345139, "loss": 0.9605, "step": 321 }, { "epoch": 0.015257048092868989, "grad_norm": 0.96484375, "learning_rate": 0.00019989207010846777, "loss": 0.8851, "step": 322 }, { "epoch": 0.015304430229803365, "grad_norm": 0.423828125, "learning_rate": 0.00019989137726692194, "loss": 1.1357, "step": 323 }, { "epoch": 0.01535181236673774, "grad_norm": 0.4921875, "learning_rate": 0.00019989068220989175, "loss": 0.9308, "step": 324 }, { "epoch": 0.015399194503672116, "grad_norm": 0.462890625, "learning_rate": 0.00019988998493739263, "loss": 1.1891, "step": 325 }, { "epoch": 0.015446576640606492, "grad_norm": 0.5703125, "learning_rate": 0.00019988928544944007, "loss": 0.6323, "step": 326 }, { "epoch": 0.015493958777540867, "grad_norm": 0.578125, "learning_rate": 0.00019988858374604953, "loss": 1.0981, "step": 327 }, { "epoch": 0.015541340914475243, "grad_norm": 0.39453125, "learning_rate": 0.0001998878798272366, "loss": 1.325, "step": 328 }, { "epoch": 0.015588723051409618, "grad_norm": 0.36328125, "learning_rate": 0.00019988717369301688, "loss": 0.0412, "step": 329 }, { "epoch": 0.015636105188343994, "grad_norm": 0.51953125, "learning_rate": 0.00019988646534340606, "loss": 0.5804, "step": 330 }, { "epoch": 0.01568348732527837, "grad_norm": 0.37890625, "learning_rate": 0.00019988575477841985, "loss": 1.3774, "step": 331 }, { "epoch": 0.015730869462212745, "grad_norm": 0.28125, "learning_rate": 0.00019988504199807395, "loss": 0.6024, "step": 332 }, { "epoch": 0.01577825159914712, "grad_norm": 0.423828125, "learning_rate": 0.00019988432700238424, "loss": 1.3106, "step": 333 }, { "epoch": 0.015825633736081497, "grad_norm": 0.69921875, "learning_rate": 0.00019988360979136655, "loss": 0.9213, "step": 334 }, { "epoch": 0.015873015873015872, "grad_norm": 0.484375, "learning_rate": 0.0001998828903650368, "loss": 1.7168, "step": 335 }, { "epoch": 0.015920398009950248, "grad_norm": 0.5, "learning_rate": 0.0001998821687234109, "loss": 0.8373, "step": 336 }, { "epoch": 0.015967780146884623, "grad_norm": 0.494140625, "learning_rate": 0.00019988144486650491, "loss": 1.3647, "step": 337 }, { "epoch": 0.016015162283819, "grad_norm": 0.427734375, "learning_rate": 0.00019988071879433483, "loss": 0.8493, "step": 338 }, { "epoch": 0.016062544420753375, "grad_norm": 0.578125, "learning_rate": 0.0001998799905069168, "loss": 0.9083, "step": 339 }, { "epoch": 0.01610992655768775, "grad_norm": 0.427734375, "learning_rate": 0.00019987926000426703, "loss": 1.4788, "step": 340 }, { "epoch": 0.016157308694622126, "grad_norm": 0.34375, "learning_rate": 0.0001998785272864016, "loss": 0.0531, "step": 341 }, { "epoch": 0.0162046908315565, "grad_norm": 0.482421875, "learning_rate": 0.00019987779235333683, "loss": 0.5946, "step": 342 }, { "epoch": 0.01625207296849088, "grad_norm": 0.44921875, "learning_rate": 0.000199877055205089, "loss": 1.3108, "step": 343 }, { "epoch": 0.016299455105425256, "grad_norm": 0.8359375, "learning_rate": 0.0001998763158416745, "loss": 0.6112, "step": 344 }, { "epoch": 0.016346837242359632, "grad_norm": 0.421875, "learning_rate": 0.0001998755742631097, "loss": 0.7658, "step": 345 }, { "epoch": 0.016394219379294007, "grad_norm": 0.625, "learning_rate": 0.000199874830469411, "loss": 0.6044, "step": 346 }, { "epoch": 0.016441601516228383, "grad_norm": 0.53125, "learning_rate": 0.000199874084460595, "loss": 1.118, "step": 347 }, { "epoch": 0.01648898365316276, "grad_norm": 0.6015625, "learning_rate": 0.00019987333623667814, "loss": 0.4469, "step": 348 }, { "epoch": 0.016536365790097134, "grad_norm": 0.49609375, "learning_rate": 0.0001998725857976771, "loss": 0.9925, "step": 349 }, { "epoch": 0.01658374792703151, "grad_norm": 0.515625, "learning_rate": 0.00019987183314360848, "loss": 1.8437, "step": 350 }, { "epoch": 0.016631130063965886, "grad_norm": 0.41796875, "learning_rate": 0.00019987107827448895, "loss": 1.2108, "step": 351 }, { "epoch": 0.01667851220090026, "grad_norm": 0.451171875, "learning_rate": 0.00019987032119033528, "loss": 1.193, "step": 352 }, { "epoch": 0.016725894337834637, "grad_norm": 0.59375, "learning_rate": 0.0001998695618911643, "loss": 1.4704, "step": 353 }, { "epoch": 0.016773276474769012, "grad_norm": 0.4921875, "learning_rate": 0.00019986880037699278, "loss": 1.2658, "step": 354 }, { "epoch": 0.016820658611703388, "grad_norm": 0.29296875, "learning_rate": 0.00019986803664783767, "loss": 0.5875, "step": 355 }, { "epoch": 0.016868040748637764, "grad_norm": 0.384765625, "learning_rate": 0.00019986727070371587, "loss": 1.1199, "step": 356 }, { "epoch": 0.01691542288557214, "grad_norm": 0.412109375, "learning_rate": 0.00019986650254464437, "loss": 0.5635, "step": 357 }, { "epoch": 0.016962805022506515, "grad_norm": 0.494140625, "learning_rate": 0.00019986573217064022, "loss": 1.0004, "step": 358 }, { "epoch": 0.01701018715944089, "grad_norm": 0.55078125, "learning_rate": 0.00019986495958172054, "loss": 1.3553, "step": 359 }, { "epoch": 0.017057569296375266, "grad_norm": 0.90625, "learning_rate": 0.00019986418477790235, "loss": 0.6413, "step": 360 }, { "epoch": 0.017104951433309642, "grad_norm": 0.0869140625, "learning_rate": 0.00019986340775920297, "loss": 0.0047, "step": 361 }, { "epoch": 0.017152333570244017, "grad_norm": 0.92578125, "learning_rate": 0.00019986262852563958, "loss": 0.7254, "step": 362 }, { "epoch": 0.017199715707178393, "grad_norm": 1.3203125, "learning_rate": 0.00019986184707722945, "loss": 0.4382, "step": 363 }, { "epoch": 0.01724709784411277, "grad_norm": 0.408203125, "learning_rate": 0.00019986106341398992, "loss": 0.7554, "step": 364 }, { "epoch": 0.017294479981047144, "grad_norm": 1.0546875, "learning_rate": 0.00019986027753593835, "loss": 0.866, "step": 365 }, { "epoch": 0.01734186211798152, "grad_norm": 0.46875, "learning_rate": 0.00019985948944309221, "loss": 1.6714, "step": 366 }, { "epoch": 0.017389244254915896, "grad_norm": 0.765625, "learning_rate": 0.00019985869913546894, "loss": 0.4531, "step": 367 }, { "epoch": 0.01743662639185027, "grad_norm": 0.384765625, "learning_rate": 0.00019985790661308613, "loss": 0.7205, "step": 368 }, { "epoch": 0.017484008528784647, "grad_norm": 0.609375, "learning_rate": 0.00019985711187596125, "loss": 0.3726, "step": 369 }, { "epoch": 0.017531390665719022, "grad_norm": 0.765625, "learning_rate": 0.00019985631492411206, "loss": 1.2968, "step": 370 }, { "epoch": 0.017578772802653398, "grad_norm": 0.859375, "learning_rate": 0.00019985551575755613, "loss": 0.7039, "step": 371 }, { "epoch": 0.017626154939587774, "grad_norm": 0.390625, "learning_rate": 0.0001998547143763112, "loss": 1.1429, "step": 372 }, { "epoch": 0.017673537076522153, "grad_norm": 0.451171875, "learning_rate": 0.00019985391078039514, "loss": 0.865, "step": 373 }, { "epoch": 0.01772091921345653, "grad_norm": 0.357421875, "learning_rate": 0.00019985310496982564, "loss": 1.5814, "step": 374 }, { "epoch": 0.017768301350390904, "grad_norm": 0.478515625, "learning_rate": 0.00019985229694462065, "loss": 1.4083, "step": 375 }, { "epoch": 0.01781568348732528, "grad_norm": 0.703125, "learning_rate": 0.00019985148670479804, "loss": 0.3757, "step": 376 }, { "epoch": 0.017863065624259655, "grad_norm": 0.45703125, "learning_rate": 0.00019985067425037583, "loss": 1.2033, "step": 377 }, { "epoch": 0.01791044776119403, "grad_norm": 0.466796875, "learning_rate": 0.00019984985958137203, "loss": 1.2772, "step": 378 }, { "epoch": 0.017957829898128407, "grad_norm": 1.328125, "learning_rate": 0.0001998490426978047, "loss": 1.0075, "step": 379 }, { "epoch": 0.018005212035062782, "grad_norm": 0.7578125, "learning_rate": 0.00019984822359969196, "loss": 0.7024, "step": 380 }, { "epoch": 0.018052594171997158, "grad_norm": 0.474609375, "learning_rate": 0.00019984740228705196, "loss": 1.2398, "step": 381 }, { "epoch": 0.018099976308931533, "grad_norm": 0.416015625, "learning_rate": 0.00019984657875990296, "loss": 0.8402, "step": 382 }, { "epoch": 0.01814735844586591, "grad_norm": 0.5703125, "learning_rate": 0.00019984575301826315, "loss": 1.4791, "step": 383 }, { "epoch": 0.018194740582800285, "grad_norm": 0.490234375, "learning_rate": 0.00019984492506215092, "loss": 1.2764, "step": 384 }, { "epoch": 0.01824212271973466, "grad_norm": 0.47265625, "learning_rate": 0.0001998440948915846, "loss": 0.9856, "step": 385 }, { "epoch": 0.018289504856669036, "grad_norm": 0.56640625, "learning_rate": 0.0001998432625065826, "loss": 0.8699, "step": 386 }, { "epoch": 0.01833688699360341, "grad_norm": 0.486328125, "learning_rate": 0.00019984242790716339, "loss": 0.9452, "step": 387 }, { "epoch": 0.018384269130537787, "grad_norm": 0.08740234375, "learning_rate": 0.00019984159109334547, "loss": 0.0059, "step": 388 }, { "epoch": 0.018431651267472163, "grad_norm": 0.458984375, "learning_rate": 0.00019984075206514742, "loss": 1.2753, "step": 389 }, { "epoch": 0.01847903340440654, "grad_norm": 0.361328125, "learning_rate": 0.0001998399108225878, "loss": 1.1387, "step": 390 }, { "epoch": 0.018526415541340914, "grad_norm": 0.54296875, "learning_rate": 0.00019983906736568532, "loss": 1.1879, "step": 391 }, { "epoch": 0.01857379767827529, "grad_norm": 0.7421875, "learning_rate": 0.00019983822169445867, "loss": 0.5234, "step": 392 }, { "epoch": 0.018621179815209665, "grad_norm": 0.62109375, "learning_rate": 0.00019983737380892662, "loss": 0.5158, "step": 393 }, { "epoch": 0.01866856195214404, "grad_norm": 0.396484375, "learning_rate": 0.00019983652370910796, "loss": 0.9222, "step": 394 }, { "epoch": 0.018715944089078417, "grad_norm": 0.4609375, "learning_rate": 0.00019983567139502152, "loss": 0.7296, "step": 395 }, { "epoch": 0.018763326226012792, "grad_norm": 0.48828125, "learning_rate": 0.00019983481686668627, "loss": 0.8614, "step": 396 }, { "epoch": 0.018810708362947168, "grad_norm": 0.35546875, "learning_rate": 0.00019983396012412109, "loss": 0.6279, "step": 397 }, { "epoch": 0.018858090499881543, "grad_norm": 0.431640625, "learning_rate": 0.00019983310116734502, "loss": 1.1019, "step": 398 }, { "epoch": 0.01890547263681592, "grad_norm": 0.56640625, "learning_rate": 0.0001998322399963771, "loss": 1.1183, "step": 399 }, { "epoch": 0.018952854773750295, "grad_norm": 0.396484375, "learning_rate": 0.00019983137661123642, "loss": 0.9234, "step": 400 }, { "epoch": 0.01900023691068467, "grad_norm": 0.51953125, "learning_rate": 0.00019983051101194217, "loss": 1.2332, "step": 401 }, { "epoch": 0.01904761904761905, "grad_norm": 0.439453125, "learning_rate": 0.00019982964319851352, "loss": 0.9963, "step": 402 }, { "epoch": 0.019095001184553425, "grad_norm": 0.3984375, "learning_rate": 0.0001998287731709697, "loss": 0.875, "step": 403 }, { "epoch": 0.0191423833214878, "grad_norm": 0.470703125, "learning_rate": 0.00019982790092933002, "loss": 1.4477, "step": 404 }, { "epoch": 0.019189765458422176, "grad_norm": 0.40234375, "learning_rate": 0.00019982702647361385, "loss": 1.0502, "step": 405 }, { "epoch": 0.019237147595356552, "grad_norm": 0.51171875, "learning_rate": 0.00019982614980384056, "loss": 1.3743, "step": 406 }, { "epoch": 0.019284529732290927, "grad_norm": 0.55859375, "learning_rate": 0.0001998252709200296, "loss": 0.9722, "step": 407 }, { "epoch": 0.019331911869225303, "grad_norm": 0.69140625, "learning_rate": 0.00019982438982220043, "loss": 0.9775, "step": 408 }, { "epoch": 0.01937929400615968, "grad_norm": 0.6875, "learning_rate": 0.00019982350651037264, "loss": 0.4177, "step": 409 }, { "epoch": 0.019426676143094054, "grad_norm": 0.3984375, "learning_rate": 0.00019982262098456582, "loss": 1.3982, "step": 410 }, { "epoch": 0.01947405828002843, "grad_norm": 0.359375, "learning_rate": 0.00019982173324479955, "loss": 0.9668, "step": 411 }, { "epoch": 0.019521440416962806, "grad_norm": 0.369140625, "learning_rate": 0.0001998208432910936, "loss": 0.5722, "step": 412 }, { "epoch": 0.01956882255389718, "grad_norm": 0.46875, "learning_rate": 0.00019981995112346764, "loss": 0.0664, "step": 413 }, { "epoch": 0.019616204690831557, "grad_norm": 0.49609375, "learning_rate": 0.00019981905674194153, "loss": 1.4617, "step": 414 }, { "epoch": 0.019663586827765932, "grad_norm": 0.34765625, "learning_rate": 0.00019981816014653502, "loss": 1.0365, "step": 415 }, { "epoch": 0.019710968964700308, "grad_norm": 0.421875, "learning_rate": 0.00019981726133726807, "loss": 0.9083, "step": 416 }, { "epoch": 0.019758351101634684, "grad_norm": 0.8828125, "learning_rate": 0.0001998163603141606, "loss": 0.5751, "step": 417 }, { "epoch": 0.01980573323856906, "grad_norm": 0.921875, "learning_rate": 0.00019981545707723256, "loss": 0.3827, "step": 418 }, { "epoch": 0.019853115375503435, "grad_norm": 0.72265625, "learning_rate": 0.00019981455162650398, "loss": 0.7891, "step": 419 }, { "epoch": 0.01990049751243781, "grad_norm": 0.7734375, "learning_rate": 0.00019981364396199497, "loss": 1.0511, "step": 420 }, { "epoch": 0.019947879649372186, "grad_norm": 1.078125, "learning_rate": 0.00019981273408372564, "loss": 1.1038, "step": 421 }, { "epoch": 0.019995261786306562, "grad_norm": 0.455078125, "learning_rate": 0.00019981182199171622, "loss": 1.2424, "step": 422 }, { "epoch": 0.020042643923240937, "grad_norm": 0.6171875, "learning_rate": 0.0001998109076859869, "loss": 0.5426, "step": 423 }, { "epoch": 0.020090026060175313, "grad_norm": 1.203125, "learning_rate": 0.00019980999116655794, "loss": 1.1897, "step": 424 }, { "epoch": 0.02013740819710969, "grad_norm": 0.7890625, "learning_rate": 0.00019980907243344968, "loss": 1.1586, "step": 425 }, { "epoch": 0.020184790334044064, "grad_norm": 0.3671875, "learning_rate": 0.00019980815148668251, "loss": 0.6897, "step": 426 }, { "epoch": 0.02023217247097844, "grad_norm": 0.482421875, "learning_rate": 0.0001998072283262769, "loss": 1.2089, "step": 427 }, { "epoch": 0.020279554607912816, "grad_norm": 0.85546875, "learning_rate": 0.00019980630295225323, "loss": 0.7024, "step": 428 }, { "epoch": 0.02032693674484719, "grad_norm": 0.416015625, "learning_rate": 0.00019980537536463207, "loss": 0.3366, "step": 429 }, { "epoch": 0.020374318881781567, "grad_norm": 0.796875, "learning_rate": 0.000199804445563434, "loss": 1.0042, "step": 430 }, { "epoch": 0.020421701018715942, "grad_norm": 0.419921875, "learning_rate": 0.00019980351354867963, "loss": 0.5644, "step": 431 }, { "epoch": 0.02046908315565032, "grad_norm": 0.37109375, "learning_rate": 0.00019980257932038966, "loss": 1.0856, "step": 432 }, { "epoch": 0.020516465292584697, "grad_norm": 0.53515625, "learning_rate": 0.00019980164287858475, "loss": 0.4346, "step": 433 }, { "epoch": 0.020563847429519073, "grad_norm": 0.5234375, "learning_rate": 0.00019980070422328573, "loss": 0.3099, "step": 434 }, { "epoch": 0.02061122956645345, "grad_norm": 0.42578125, "learning_rate": 0.00019979976335451338, "loss": 0.991, "step": 435 }, { "epoch": 0.020658611703387824, "grad_norm": 0.56640625, "learning_rate": 0.0001997988202722886, "loss": 1.0886, "step": 436 }, { "epoch": 0.0207059938403222, "grad_norm": 0.44921875, "learning_rate": 0.00019979787497663228, "loss": 1.0824, "step": 437 }, { "epoch": 0.020753375977256575, "grad_norm": 0.451171875, "learning_rate": 0.00019979692746756536, "loss": 1.2677, "step": 438 }, { "epoch": 0.02080075811419095, "grad_norm": 0.5625, "learning_rate": 0.00019979597774510892, "loss": 0.7553, "step": 439 }, { "epoch": 0.020848140251125327, "grad_norm": 0.466796875, "learning_rate": 0.000199795025809284, "loss": 1.0373, "step": 440 }, { "epoch": 0.020895522388059702, "grad_norm": 0.498046875, "learning_rate": 0.00019979407166011165, "loss": 1.2302, "step": 441 }, { "epoch": 0.020942904524994078, "grad_norm": 0.51171875, "learning_rate": 0.00019979311529761312, "loss": 0.3575, "step": 442 }, { "epoch": 0.020990286661928453, "grad_norm": 0.390625, "learning_rate": 0.00019979215672180962, "loss": 0.9975, "step": 443 }, { "epoch": 0.02103766879886283, "grad_norm": 0.490234375, "learning_rate": 0.00019979119593272236, "loss": 0.4121, "step": 444 }, { "epoch": 0.021085050935797205, "grad_norm": 0.44140625, "learning_rate": 0.00019979023293037265, "loss": 1.1058, "step": 445 }, { "epoch": 0.02113243307273158, "grad_norm": 0.43359375, "learning_rate": 0.00019978926771478187, "loss": 1.045, "step": 446 }, { "epoch": 0.021179815209665956, "grad_norm": 0.78515625, "learning_rate": 0.00019978830028597141, "loss": 1.5996, "step": 447 }, { "epoch": 0.02122719734660033, "grad_norm": 0.7734375, "learning_rate": 0.00019978733064396277, "loss": 0.6733, "step": 448 }, { "epoch": 0.021274579483534707, "grad_norm": 0.451171875, "learning_rate": 0.00019978635878877742, "loss": 1.1244, "step": 449 }, { "epoch": 0.021321961620469083, "grad_norm": 0.546875, "learning_rate": 0.00019978538472043692, "loss": 1.5741, "step": 450 }, { "epoch": 0.02136934375740346, "grad_norm": 0.419921875, "learning_rate": 0.00019978440843896285, "loss": 0.7887, "step": 451 }, { "epoch": 0.021416725894337834, "grad_norm": 0.5625, "learning_rate": 0.00019978342994437688, "loss": 0.4479, "step": 452 }, { "epoch": 0.02146410803127221, "grad_norm": 0.50390625, "learning_rate": 0.00019978244923670076, "loss": 1.525, "step": 453 }, { "epoch": 0.021511490168206585, "grad_norm": 0.38671875, "learning_rate": 0.00019978146631595615, "loss": 1.161, "step": 454 }, { "epoch": 0.02155887230514096, "grad_norm": 0.462890625, "learning_rate": 0.00019978048118216496, "loss": 1.2118, "step": 455 }, { "epoch": 0.021606254442075336, "grad_norm": 0.625, "learning_rate": 0.00019977949383534894, "loss": 0.6029, "step": 456 }, { "epoch": 0.021653636579009712, "grad_norm": 0.48046875, "learning_rate": 0.00019977850427553, "loss": 0.9705, "step": 457 }, { "epoch": 0.021701018715944088, "grad_norm": 0.39453125, "learning_rate": 0.0001997775125027302, "loss": 1.2315, "step": 458 }, { "epoch": 0.021748400852878463, "grad_norm": 0.455078125, "learning_rate": 0.00019977651851697136, "loss": 1.1845, "step": 459 }, { "epoch": 0.02179578298981284, "grad_norm": 0.310546875, "learning_rate": 0.00019977552231827564, "loss": 0.4129, "step": 460 }, { "epoch": 0.021843165126747215, "grad_norm": 0.70703125, "learning_rate": 0.00019977452390666515, "loss": 0.3194, "step": 461 }, { "epoch": 0.021890547263681594, "grad_norm": 0.578125, "learning_rate": 0.00019977352328216197, "loss": 0.784, "step": 462 }, { "epoch": 0.02193792940061597, "grad_norm": 0.431640625, "learning_rate": 0.0001997725204447883, "loss": 1.1318, "step": 463 }, { "epoch": 0.021985311537550345, "grad_norm": 0.400390625, "learning_rate": 0.00019977151539456642, "loss": 1.6012, "step": 464 }, { "epoch": 0.02203269367448472, "grad_norm": 0.69921875, "learning_rate": 0.0001997705081315186, "loss": 0.3294, "step": 465 }, { "epoch": 0.022080075811419096, "grad_norm": 0.9453125, "learning_rate": 0.0001997694986556672, "loss": 0.5496, "step": 466 }, { "epoch": 0.022127457948353472, "grad_norm": 0.69921875, "learning_rate": 0.00019976848696703456, "loss": 1.0092, "step": 467 }, { "epoch": 0.022174840085287847, "grad_norm": 0.375, "learning_rate": 0.00019976747306564314, "loss": 0.9297, "step": 468 }, { "epoch": 0.022222222222222223, "grad_norm": 0.451171875, "learning_rate": 0.00019976645695151546, "loss": 1.193, "step": 469 }, { "epoch": 0.0222696043591566, "grad_norm": 0.48828125, "learning_rate": 0.00019976543862467404, "loss": 1.4561, "step": 470 }, { "epoch": 0.022316986496090974, "grad_norm": 1.0234375, "learning_rate": 0.0001997644180851414, "loss": 0.9823, "step": 471 }, { "epoch": 0.02236436863302535, "grad_norm": 0.7578125, "learning_rate": 0.00019976339533294028, "loss": 0.5589, "step": 472 }, { "epoch": 0.022411750769959726, "grad_norm": 0.54296875, "learning_rate": 0.00019976237036809332, "loss": 0.8081, "step": 473 }, { "epoch": 0.0224591329068941, "grad_norm": 0.59375, "learning_rate": 0.00019976134319062323, "loss": 0.6358, "step": 474 }, { "epoch": 0.022506515043828477, "grad_norm": 0.7421875, "learning_rate": 0.0001997603138005528, "loss": 0.8958, "step": 475 }, { "epoch": 0.022553897180762852, "grad_norm": 0.3828125, "learning_rate": 0.0001997592821979049, "loss": 1.0532, "step": 476 }, { "epoch": 0.022601279317697228, "grad_norm": 0.400390625, "learning_rate": 0.00019975824838270234, "loss": 1.1853, "step": 477 }, { "epoch": 0.022648661454631604, "grad_norm": 0.640625, "learning_rate": 0.00019975721235496811, "loss": 0.4938, "step": 478 }, { "epoch": 0.02269604359156598, "grad_norm": 0.52734375, "learning_rate": 0.00019975617411472518, "loss": 1.1591, "step": 479 }, { "epoch": 0.022743425728500355, "grad_norm": 0.359375, "learning_rate": 0.00019975513366199654, "loss": 1.3403, "step": 480 }, { "epoch": 0.02279080786543473, "grad_norm": 0.6015625, "learning_rate": 0.0001997540909968053, "loss": 1.0315, "step": 481 }, { "epoch": 0.022838190002369106, "grad_norm": 0.92578125, "learning_rate": 0.00019975304611917456, "loss": 0.3196, "step": 482 }, { "epoch": 0.022885572139303482, "grad_norm": 0.349609375, "learning_rate": 0.00019975199902912754, "loss": 0.6876, "step": 483 }, { "epoch": 0.022932954276237857, "grad_norm": 0.55859375, "learning_rate": 0.0001997509497266874, "loss": 1.6713, "step": 484 }, { "epoch": 0.022980336413172233, "grad_norm": 0.8046875, "learning_rate": 0.00019974989821187745, "loss": 0.1393, "step": 485 }, { "epoch": 0.02302771855010661, "grad_norm": 0.66796875, "learning_rate": 0.00019974884448472103, "loss": 0.2337, "step": 486 }, { "epoch": 0.023075100687040984, "grad_norm": 0.64453125, "learning_rate": 0.00019974778854524148, "loss": 0.1739, "step": 487 }, { "epoch": 0.02312248282397536, "grad_norm": 0.486328125, "learning_rate": 0.00019974673039346223, "loss": 1.2593, "step": 488 }, { "epoch": 0.023169864960909736, "grad_norm": 0.6640625, "learning_rate": 0.00019974567002940675, "loss": 1.5246, "step": 489 }, { "epoch": 0.02321724709784411, "grad_norm": 0.419921875, "learning_rate": 0.0001997446074530985, "loss": 0.9928, "step": 490 }, { "epoch": 0.023264629234778487, "grad_norm": 0.5, "learning_rate": 0.00019974354266456116, "loss": 0.9088, "step": 491 }, { "epoch": 0.023312011371712866, "grad_norm": 0.486328125, "learning_rate": 0.00019974247566381824, "loss": 1.3176, "step": 492 }, { "epoch": 0.02335939350864724, "grad_norm": 0.51953125, "learning_rate": 0.00019974140645089347, "loss": 0.9397, "step": 493 }, { "epoch": 0.023406775645581617, "grad_norm": 0.48828125, "learning_rate": 0.00019974033502581054, "loss": 0.8935, "step": 494 }, { "epoch": 0.023454157782515993, "grad_norm": 0.46875, "learning_rate": 0.00019973926138859324, "loss": 1.5524, "step": 495 }, { "epoch": 0.02350153991945037, "grad_norm": 0.4921875, "learning_rate": 0.00019973818553926535, "loss": 1.1607, "step": 496 }, { "epoch": 0.023548922056384744, "grad_norm": 0.44140625, "learning_rate": 0.00019973710747785075, "loss": 1.2721, "step": 497 }, { "epoch": 0.02359630419331912, "grad_norm": 0.375, "learning_rate": 0.00019973602720437336, "loss": 0.7482, "step": 498 }, { "epoch": 0.023643686330253495, "grad_norm": 0.458984375, "learning_rate": 0.00019973494471885707, "loss": 1.1923, "step": 499 }, { "epoch": 0.02369106846718787, "grad_norm": 0.3828125, "learning_rate": 0.00019973386002132597, "loss": 1.0473, "step": 500 }, { "epoch": 0.023738450604122247, "grad_norm": 0.462890625, "learning_rate": 0.00019973277311180409, "loss": 1.5173, "step": 501 }, { "epoch": 0.023785832741056622, "grad_norm": 0.84765625, "learning_rate": 0.00019973168399031548, "loss": 0.5056, "step": 502 }, { "epoch": 0.023833214877990998, "grad_norm": 0.6484375, "learning_rate": 0.00019973059265688438, "loss": 0.9238, "step": 503 }, { "epoch": 0.023880597014925373, "grad_norm": 0.984375, "learning_rate": 0.000199729499111535, "loss": 0.3908, "step": 504 }, { "epoch": 0.02392797915185975, "grad_norm": 0.392578125, "learning_rate": 0.00019972840335429152, "loss": 1.0612, "step": 505 }, { "epoch": 0.023975361288794125, "grad_norm": 0.412109375, "learning_rate": 0.00019972730538517827, "loss": 1.0104, "step": 506 }, { "epoch": 0.0240227434257285, "grad_norm": 0.38671875, "learning_rate": 0.00019972620520421964, "loss": 1.2123, "step": 507 }, { "epoch": 0.024070125562662876, "grad_norm": 0.41015625, "learning_rate": 0.00019972510281144, "loss": 1.2562, "step": 508 }, { "epoch": 0.02411750769959725, "grad_norm": 0.75, "learning_rate": 0.00019972399820686378, "loss": 0.3947, "step": 509 }, { "epoch": 0.024164889836531627, "grad_norm": 0.333984375, "learning_rate": 0.00019972289139051551, "loss": 0.5638, "step": 510 }, { "epoch": 0.024212271973466003, "grad_norm": 0.341796875, "learning_rate": 0.00019972178236241973, "loss": 0.053, "step": 511 }, { "epoch": 0.02425965411040038, "grad_norm": 0.451171875, "learning_rate": 0.00019972067112260103, "loss": 0.997, "step": 512 }, { "epoch": 0.024307036247334754, "grad_norm": 0.43359375, "learning_rate": 0.0001997195576710841, "loss": 1.133, "step": 513 }, { "epoch": 0.02435441838426913, "grad_norm": 0.51953125, "learning_rate": 0.00019971844200789357, "loss": 0.9191, "step": 514 }, { "epoch": 0.024401800521203505, "grad_norm": 0.45703125, "learning_rate": 0.0001997173241330542, "loss": 0.9256, "step": 515 }, { "epoch": 0.02444918265813788, "grad_norm": 0.64453125, "learning_rate": 0.0001997162040465908, "loss": 1.015, "step": 516 }, { "epoch": 0.024496564795072256, "grad_norm": 0.625, "learning_rate": 0.00019971508174852822, "loss": 0.9148, "step": 517 }, { "epoch": 0.024543946932006632, "grad_norm": 0.478515625, "learning_rate": 0.00019971395723889133, "loss": 1.2158, "step": 518 }, { "epoch": 0.024591329068941008, "grad_norm": 0.4921875, "learning_rate": 0.0001997128305177051, "loss": 0.9604, "step": 519 }, { "epoch": 0.024638711205875383, "grad_norm": 0.41015625, "learning_rate": 0.00019971170158499443, "loss": 1.173, "step": 520 }, { "epoch": 0.024686093342809762, "grad_norm": 0.462890625, "learning_rate": 0.0001997105704407845, "loss": 1.0204, "step": 521 }, { "epoch": 0.024733475479744138, "grad_norm": 0.427734375, "learning_rate": 0.0001997094370851003, "loss": 1.2085, "step": 522 }, { "epoch": 0.024780857616678514, "grad_norm": 0.408203125, "learning_rate": 0.00019970830151796697, "loss": 0.9763, "step": 523 }, { "epoch": 0.02482823975361289, "grad_norm": 0.5625, "learning_rate": 0.00019970716373940976, "loss": 1.2341, "step": 524 }, { "epoch": 0.024875621890547265, "grad_norm": 0.9140625, "learning_rate": 0.0001997060237494538, "loss": 0.5408, "step": 525 }, { "epoch": 0.02492300402748164, "grad_norm": 0.12353515625, "learning_rate": 0.0001997048815481245, "loss": 0.0094, "step": 526 }, { "epoch": 0.024970386164416016, "grad_norm": 0.5703125, "learning_rate": 0.0001997037371354471, "loss": 1.1377, "step": 527 }, { "epoch": 0.025017768301350392, "grad_norm": 0.4140625, "learning_rate": 0.000199702590511447, "loss": 0.3099, "step": 528 }, { "epoch": 0.025065150438284767, "grad_norm": 0.396484375, "learning_rate": 0.00019970144167614967, "loss": 1.0221, "step": 529 }, { "epoch": 0.025112532575219143, "grad_norm": 0.64453125, "learning_rate": 0.00019970029062958054, "loss": 0.4058, "step": 530 }, { "epoch": 0.02515991471215352, "grad_norm": 0.408203125, "learning_rate": 0.00019969913737176515, "loss": 1.2623, "step": 531 }, { "epoch": 0.025207296849087894, "grad_norm": 0.58203125, "learning_rate": 0.0001996979819027291, "loss": 1.618, "step": 532 }, { "epoch": 0.02525467898602227, "grad_norm": 1.046875, "learning_rate": 0.00019969682422249803, "loss": 0.4917, "step": 533 }, { "epoch": 0.025302061122956646, "grad_norm": 0.470703125, "learning_rate": 0.00019969566433109757, "loss": 1.1174, "step": 534 }, { "epoch": 0.02534944325989102, "grad_norm": 0.5078125, "learning_rate": 0.00019969450222855347, "loss": 1.2709, "step": 535 }, { "epoch": 0.025396825396825397, "grad_norm": 0.435546875, "learning_rate": 0.0001996933379148915, "loss": 1.5649, "step": 536 }, { "epoch": 0.025444207533759772, "grad_norm": 0.30859375, "learning_rate": 0.00019969217139013745, "loss": 0.5722, "step": 537 }, { "epoch": 0.025491589670694148, "grad_norm": 0.361328125, "learning_rate": 0.00019969100265431727, "loss": 1.0741, "step": 538 }, { "epoch": 0.025538971807628524, "grad_norm": 0.396484375, "learning_rate": 0.0001996898317074568, "loss": 0.2779, "step": 539 }, { "epoch": 0.0255863539445629, "grad_norm": 0.412109375, "learning_rate": 0.00019968865854958208, "loss": 0.9093, "step": 540 }, { "epoch": 0.025633736081497275, "grad_norm": 0.478515625, "learning_rate": 0.00019968748318071908, "loss": 1.1398, "step": 541 }, { "epoch": 0.02568111821843165, "grad_norm": 0.54296875, "learning_rate": 0.0001996863056008939, "loss": 1.2883, "step": 542 }, { "epoch": 0.025728500355366026, "grad_norm": 0.5546875, "learning_rate": 0.0001996851258101326, "loss": 1.5229, "step": 543 }, { "epoch": 0.025775882492300402, "grad_norm": 0.47265625, "learning_rate": 0.00019968394380846146, "loss": 0.4906, "step": 544 }, { "epoch": 0.025823264629234777, "grad_norm": 0.5390625, "learning_rate": 0.0001996827595959066, "loss": 0.8691, "step": 545 }, { "epoch": 0.025870646766169153, "grad_norm": 0.46484375, "learning_rate": 0.00019968157317249428, "loss": 0.9585, "step": 546 }, { "epoch": 0.02591802890310353, "grad_norm": 0.4296875, "learning_rate": 0.00019968038453825084, "loss": 0.9995, "step": 547 }, { "epoch": 0.025965411040037904, "grad_norm": 0.54296875, "learning_rate": 0.00019967919369320267, "loss": 0.9304, "step": 548 }, { "epoch": 0.02601279317697228, "grad_norm": 0.6328125, "learning_rate": 0.00019967800063737617, "loss": 0.2167, "step": 549 }, { "epoch": 0.026060175313906656, "grad_norm": 0.56640625, "learning_rate": 0.00019967680537079775, "loss": 0.8259, "step": 550 }, { "epoch": 0.026107557450841035, "grad_norm": 0.2421875, "learning_rate": 0.000199675607893494, "loss": 0.0206, "step": 551 }, { "epoch": 0.02615493958777541, "grad_norm": 0.455078125, "learning_rate": 0.0001996744082054914, "loss": 1.3086, "step": 552 }, { "epoch": 0.026202321724709786, "grad_norm": 0.427734375, "learning_rate": 0.0001996732063068166, "loss": 1.187, "step": 553 }, { "epoch": 0.02624970386164416, "grad_norm": 0.97265625, "learning_rate": 0.00019967200219749628, "loss": 0.6433, "step": 554 }, { "epoch": 0.026297085998578537, "grad_norm": 0.671875, "learning_rate": 0.0001996707958775571, "loss": 0.8348, "step": 555 }, { "epoch": 0.026344468135512913, "grad_norm": 0.58984375, "learning_rate": 0.00019966958734702584, "loss": 1.079, "step": 556 }, { "epoch": 0.02639185027244729, "grad_norm": 0.373046875, "learning_rate": 0.00019966837660592926, "loss": 1.209, "step": 557 }, { "epoch": 0.026439232409381664, "grad_norm": 0.39453125, "learning_rate": 0.0001996671636542943, "loss": 0.5099, "step": 558 }, { "epoch": 0.02648661454631604, "grad_norm": 0.43359375, "learning_rate": 0.0001996659484921478, "loss": 1.0275, "step": 559 }, { "epoch": 0.026533996683250415, "grad_norm": 0.84765625, "learning_rate": 0.00019966473111951669, "loss": 0.6466, "step": 560 }, { "epoch": 0.02658137882018479, "grad_norm": 0.41015625, "learning_rate": 0.000199663511536428, "loss": 1.2751, "step": 561 }, { "epoch": 0.026628760957119166, "grad_norm": 0.76953125, "learning_rate": 0.0001996622897429088, "loss": 0.6194, "step": 562 }, { "epoch": 0.026676143094053542, "grad_norm": 0.48828125, "learning_rate": 0.00019966106573898618, "loss": 0.6814, "step": 563 }, { "epoch": 0.026723525230987918, "grad_norm": 0.4375, "learning_rate": 0.00019965983952468727, "loss": 0.4884, "step": 564 }, { "epoch": 0.026770907367922293, "grad_norm": 0.53515625, "learning_rate": 0.00019965861110003927, "loss": 0.7638, "step": 565 }, { "epoch": 0.02681828950485667, "grad_norm": 0.5390625, "learning_rate": 0.00019965738046506945, "loss": 1.5112, "step": 566 }, { "epoch": 0.026865671641791045, "grad_norm": 0.3828125, "learning_rate": 0.000199656147619805, "loss": 0.6674, "step": 567 }, { "epoch": 0.02691305377872542, "grad_norm": 0.486328125, "learning_rate": 0.00019965491256427345, "loss": 1.3322, "step": 568 }, { "epoch": 0.026960435915659796, "grad_norm": 0.46484375, "learning_rate": 0.000199653675298502, "loss": 1.0886, "step": 569 }, { "epoch": 0.02700781805259417, "grad_norm": 0.484375, "learning_rate": 0.00019965243582251824, "loss": 0.4547, "step": 570 }, { "epoch": 0.027055200189528547, "grad_norm": 0.64453125, "learning_rate": 0.00019965119413634956, "loss": 0.7786, "step": 571 }, { "epoch": 0.027102582326462923, "grad_norm": 0.41796875, "learning_rate": 0.0001996499502400236, "loss": 1.0901, "step": 572 }, { "epoch": 0.0271499644633973, "grad_norm": 0.408203125, "learning_rate": 0.00019964870413356783, "loss": 0.6203, "step": 573 }, { "epoch": 0.027197346600331674, "grad_norm": 0.5625, "learning_rate": 0.00019964745581700993, "loss": 0.309, "step": 574 }, { "epoch": 0.02724472873726605, "grad_norm": 0.423828125, "learning_rate": 0.00019964620529037763, "loss": 0.8369, "step": 575 }, { "epoch": 0.027292110874200425, "grad_norm": 0.486328125, "learning_rate": 0.0001996449525536986, "loss": 1.0702, "step": 576 }, { "epoch": 0.0273394930111348, "grad_norm": 0.5234375, "learning_rate": 0.00019964369760700073, "loss": 0.7614, "step": 577 }, { "epoch": 0.027386875148069176, "grad_norm": 0.6875, "learning_rate": 0.0001996424404503117, "loss": 0.8197, "step": 578 }, { "epoch": 0.027434257285003552, "grad_norm": 0.451171875, "learning_rate": 0.00019964118108365954, "loss": 0.9399, "step": 579 }, { "epoch": 0.027481639421937928, "grad_norm": 0.400390625, "learning_rate": 0.0001996399195070721, "loss": 1.1644, "step": 580 }, { "epoch": 0.027529021558872307, "grad_norm": 0.439453125, "learning_rate": 0.00019963865572057734, "loss": 1.0886, "step": 581 }, { "epoch": 0.027576403695806682, "grad_norm": 0.93359375, "learning_rate": 0.00019963738972420336, "loss": 1.1885, "step": 582 }, { "epoch": 0.027623785832741058, "grad_norm": 0.498046875, "learning_rate": 0.00019963612151797819, "loss": 0.2672, "step": 583 }, { "epoch": 0.027671167969675434, "grad_norm": 0.4609375, "learning_rate": 0.00019963485110193, "loss": 1.0301, "step": 584 }, { "epoch": 0.02771855010660981, "grad_norm": 0.59765625, "learning_rate": 0.00019963357847608692, "loss": 0.1862, "step": 585 }, { "epoch": 0.027765932243544185, "grad_norm": 0.431640625, "learning_rate": 0.0001996323036404772, "loss": 0.9836, "step": 586 }, { "epoch": 0.02781331438047856, "grad_norm": 0.54296875, "learning_rate": 0.00019963102659512912, "loss": 1.0559, "step": 587 }, { "epoch": 0.027860696517412936, "grad_norm": 0.396484375, "learning_rate": 0.00019962974734007095, "loss": 0.7318, "step": 588 }, { "epoch": 0.027908078654347312, "grad_norm": 0.419921875, "learning_rate": 0.00019962846587533113, "loss": 0.8617, "step": 589 }, { "epoch": 0.027955460791281687, "grad_norm": 0.462890625, "learning_rate": 0.00019962718220093806, "loss": 1.2558, "step": 590 }, { "epoch": 0.028002842928216063, "grad_norm": 0.578125, "learning_rate": 0.0001996258963169202, "loss": 1.2238, "step": 591 }, { "epoch": 0.02805022506515044, "grad_norm": 0.5234375, "learning_rate": 0.00019962460822330608, "loss": 0.9095, "step": 592 }, { "epoch": 0.028097607202084814, "grad_norm": 0.40625, "learning_rate": 0.00019962331792012426, "loss": 1.1602, "step": 593 }, { "epoch": 0.02814498933901919, "grad_norm": 0.4375, "learning_rate": 0.0001996220254074034, "loss": 0.6363, "step": 594 }, { "epoch": 0.028192371475953566, "grad_norm": 0.498046875, "learning_rate": 0.00019962073068517205, "loss": 0.1952, "step": 595 }, { "epoch": 0.02823975361288794, "grad_norm": 0.51953125, "learning_rate": 0.0001996194337534591, "loss": 0.2863, "step": 596 }, { "epoch": 0.028287135749822317, "grad_norm": 0.44140625, "learning_rate": 0.00019961813461229314, "loss": 1.1668, "step": 597 }, { "epoch": 0.028334517886756692, "grad_norm": 0.478515625, "learning_rate": 0.0001996168332617031, "loss": 1.333, "step": 598 }, { "epoch": 0.028381900023691068, "grad_norm": 0.392578125, "learning_rate": 0.00019961552970171778, "loss": 1.0673, "step": 599 }, { "epoch": 0.028429282160625444, "grad_norm": 0.296875, "learning_rate": 0.00019961422393236617, "loss": 1.4091, "step": 600 }, { "epoch": 0.02847666429755982, "grad_norm": 0.51171875, "learning_rate": 0.00019961291595367714, "loss": 1.3616, "step": 601 }, { "epoch": 0.028524046434494195, "grad_norm": 0.45703125, "learning_rate": 0.00019961160576567973, "loss": 1.1052, "step": 602 }, { "epoch": 0.02857142857142857, "grad_norm": 0.9453125, "learning_rate": 0.00019961029336840302, "loss": 0.095, "step": 603 }, { "epoch": 0.028618810708362946, "grad_norm": 0.43359375, "learning_rate": 0.0001996089787618761, "loss": 1.1025, "step": 604 }, { "epoch": 0.028666192845297322, "grad_norm": 0.466796875, "learning_rate": 0.00019960766194612815, "loss": 0.6186, "step": 605 }, { "epoch": 0.028713574982231697, "grad_norm": 0.6484375, "learning_rate": 0.0001996063429211883, "loss": 0.8258, "step": 606 }, { "epoch": 0.028760957119166073, "grad_norm": 1.046875, "learning_rate": 0.0001996050216870859, "loss": 1.029, "step": 607 }, { "epoch": 0.02880833925610045, "grad_norm": 0.84375, "learning_rate": 0.0001996036982438502, "loss": 0.6272, "step": 608 }, { "epoch": 0.028855721393034824, "grad_norm": 0.421875, "learning_rate": 0.0001996023725915106, "loss": 0.7262, "step": 609 }, { "epoch": 0.0289031035299692, "grad_norm": 0.453125, "learning_rate": 0.00019960104473009643, "loss": 1.2872, "step": 610 }, { "epoch": 0.02895048566690358, "grad_norm": 0.4453125, "learning_rate": 0.0001995997146596372, "loss": 0.0761, "step": 611 }, { "epoch": 0.028997867803837955, "grad_norm": 0.52734375, "learning_rate": 0.0001995983823801624, "loss": 1.1936, "step": 612 }, { "epoch": 0.02904524994077233, "grad_norm": 0.47265625, "learning_rate": 0.00019959704789170152, "loss": 1.2569, "step": 613 }, { "epoch": 0.029092632077706706, "grad_norm": 0.890625, "learning_rate": 0.0001995957111942842, "loss": 0.5025, "step": 614 }, { "epoch": 0.02914001421464108, "grad_norm": 0.443359375, "learning_rate": 0.00019959437228794013, "loss": 0.9133, "step": 615 }, { "epoch": 0.029187396351575457, "grad_norm": 0.62109375, "learning_rate": 0.00019959303117269897, "loss": 0.635, "step": 616 }, { "epoch": 0.029234778488509833, "grad_norm": 0.416015625, "learning_rate": 0.00019959168784859044, "loss": 0.3316, "step": 617 }, { "epoch": 0.02928216062544421, "grad_norm": 1.171875, "learning_rate": 0.00019959034231564434, "loss": 0.7864, "step": 618 }, { "epoch": 0.029329542762378584, "grad_norm": 0.44921875, "learning_rate": 0.00019958899457389056, "loss": 0.9831, "step": 619 }, { "epoch": 0.02937692489931296, "grad_norm": 0.48046875, "learning_rate": 0.00019958764462335894, "loss": 1.1185, "step": 620 }, { "epoch": 0.029424307036247335, "grad_norm": 0.3046875, "learning_rate": 0.00019958629246407945, "loss": 0.511, "step": 621 }, { "epoch": 0.02947168917318171, "grad_norm": 0.546875, "learning_rate": 0.00019958493809608206, "loss": 0.1045, "step": 622 }, { "epoch": 0.029519071310116086, "grad_norm": 0.37109375, "learning_rate": 0.0001995835815193968, "loss": 0.8508, "step": 623 }, { "epoch": 0.029566453447050462, "grad_norm": 0.578125, "learning_rate": 0.0001995822227340538, "loss": 1.005, "step": 624 }, { "epoch": 0.029613835583984838, "grad_norm": 0.4609375, "learning_rate": 0.00019958086174008314, "loss": 0.9452, "step": 625 }, { "epoch": 0.029661217720919213, "grad_norm": 0.0322265625, "learning_rate": 0.00019957949853751506, "loss": 0.0025, "step": 626 }, { "epoch": 0.02970859985785359, "grad_norm": 0.60546875, "learning_rate": 0.00019957813312637977, "loss": 0.6284, "step": 627 }, { "epoch": 0.029755981994787965, "grad_norm": 0.5078125, "learning_rate": 0.00019957676550670753, "loss": 0.4623, "step": 628 }, { "epoch": 0.02980336413172234, "grad_norm": 0.5234375, "learning_rate": 0.00019957539567852872, "loss": 1.3487, "step": 629 }, { "epoch": 0.029850746268656716, "grad_norm": 0.84765625, "learning_rate": 0.00019957402364187367, "loss": 0.3309, "step": 630 }, { "epoch": 0.02989812840559109, "grad_norm": 0.73828125, "learning_rate": 0.00019957264939677287, "loss": 0.4242, "step": 631 }, { "epoch": 0.029945510542525467, "grad_norm": 0.404296875, "learning_rate": 0.00019957127294325676, "loss": 0.2819, "step": 632 }, { "epoch": 0.029992892679459843, "grad_norm": 1.3671875, "learning_rate": 0.00019956989428135584, "loss": 0.5646, "step": 633 }, { "epoch": 0.03004027481639422, "grad_norm": 0.6015625, "learning_rate": 0.00019956851341110075, "loss": 1.522, "step": 634 }, { "epoch": 0.030087656953328594, "grad_norm": 0.52734375, "learning_rate": 0.00019956713033252211, "loss": 1.0439, "step": 635 }, { "epoch": 0.03013503909026297, "grad_norm": 1.2109375, "learning_rate": 0.00019956574504565054, "loss": 0.4926, "step": 636 }, { "epoch": 0.030182421227197345, "grad_norm": 0.265625, "learning_rate": 0.0001995643575505168, "loss": 0.0841, "step": 637 }, { "epoch": 0.03022980336413172, "grad_norm": 0.1298828125, "learning_rate": 0.00019956296784715168, "loss": 0.0148, "step": 638 }, { "epoch": 0.030277185501066096, "grad_norm": 0.7421875, "learning_rate": 0.00019956157593558596, "loss": 0.3761, "step": 639 }, { "epoch": 0.030324567638000476, "grad_norm": 0.55859375, "learning_rate": 0.00019956018181585054, "loss": 1.1064, "step": 640 }, { "epoch": 0.03037194977493485, "grad_norm": 0.5078125, "learning_rate": 0.00019955878548797636, "loss": 0.987, "step": 641 }, { "epoch": 0.030419331911869227, "grad_norm": 0.3828125, "learning_rate": 0.00019955738695199432, "loss": 0.3002, "step": 642 }, { "epoch": 0.030466714048803602, "grad_norm": 0.41796875, "learning_rate": 0.00019955598620793552, "loss": 0.7511, "step": 643 }, { "epoch": 0.030514096185737978, "grad_norm": 0.3359375, "learning_rate": 0.00019955458325583096, "loss": 0.936, "step": 644 }, { "epoch": 0.030561478322672354, "grad_norm": 0.498046875, "learning_rate": 0.00019955317809571178, "loss": 1.0025, "step": 645 }, { "epoch": 0.03060886045960673, "grad_norm": 0.52734375, "learning_rate": 0.0001995517707276092, "loss": 1.0406, "step": 646 }, { "epoch": 0.030656242596541105, "grad_norm": 0.828125, "learning_rate": 0.00019955036115155435, "loss": 1.1344, "step": 647 }, { "epoch": 0.03070362473347548, "grad_norm": 0.77734375, "learning_rate": 0.0001995489493675785, "loss": 0.1149, "step": 648 }, { "epoch": 0.030751006870409856, "grad_norm": 0.45703125, "learning_rate": 0.00019954753537571303, "loss": 1.2429, "step": 649 }, { "epoch": 0.030798389007344232, "grad_norm": 0.3359375, "learning_rate": 0.00019954611917598922, "loss": 0.0303, "step": 650 }, { "epoch": 0.030845771144278607, "grad_norm": 0.65234375, "learning_rate": 0.00019954470076843854, "loss": 0.8687, "step": 651 }, { "epoch": 0.030893153281212983, "grad_norm": 0.7109375, "learning_rate": 0.00019954328015309243, "loss": 0.2572, "step": 652 }, { "epoch": 0.03094053541814736, "grad_norm": 0.78515625, "learning_rate": 0.0001995418573299824, "loss": 0.5588, "step": 653 }, { "epoch": 0.030987917555081734, "grad_norm": 0.640625, "learning_rate": 0.00019954043229913996, "loss": 0.1976, "step": 654 }, { "epoch": 0.03103529969201611, "grad_norm": 0.376953125, "learning_rate": 0.0001995390050605968, "loss": 0.5403, "step": 655 }, { "epoch": 0.031082681828950486, "grad_norm": 0.49609375, "learning_rate": 0.00019953757561438454, "loss": 1.1379, "step": 656 }, { "epoch": 0.03113006396588486, "grad_norm": 0.6953125, "learning_rate": 0.00019953614396053487, "loss": 0.4124, "step": 657 }, { "epoch": 0.031177446102819237, "grad_norm": 0.51171875, "learning_rate": 0.00019953471009907952, "loss": 1.3503, "step": 658 }, { "epoch": 0.031224828239753612, "grad_norm": 0.478515625, "learning_rate": 0.00019953327403005034, "loss": 0.8431, "step": 659 }, { "epoch": 0.03127221037668799, "grad_norm": 0.42578125, "learning_rate": 0.00019953183575347914, "loss": 0.8172, "step": 660 }, { "epoch": 0.03131959251362237, "grad_norm": 0.69140625, "learning_rate": 0.00019953039526939784, "loss": 0.2071, "step": 661 }, { "epoch": 0.03136697465055674, "grad_norm": 0.51171875, "learning_rate": 0.00019952895257783842, "loss": 0.2853, "step": 662 }, { "epoch": 0.03141435678749112, "grad_norm": 0.39453125, "learning_rate": 0.00019952750767883283, "loss": 0.8138, "step": 663 }, { "epoch": 0.03146173892442549, "grad_norm": 0.494140625, "learning_rate": 0.00019952606057241313, "loss": 1.1287, "step": 664 }, { "epoch": 0.03150912106135987, "grad_norm": 0.40234375, "learning_rate": 0.0001995246112586114, "loss": 0.8817, "step": 665 }, { "epoch": 0.03155650319829424, "grad_norm": 1.3671875, "learning_rate": 0.00019952315973745984, "loss": 0.6365, "step": 666 }, { "epoch": 0.03160388533522862, "grad_norm": 0.51171875, "learning_rate": 0.00019952170600899058, "loss": 1.1044, "step": 667 }, { "epoch": 0.03165126747216299, "grad_norm": 0.859375, "learning_rate": 0.00019952025007323587, "loss": 0.631, "step": 668 }, { "epoch": 0.03169864960909737, "grad_norm": 0.423828125, "learning_rate": 0.00019951879193022806, "loss": 0.6363, "step": 669 }, { "epoch": 0.031746031746031744, "grad_norm": 1.0703125, "learning_rate": 0.0001995173315799994, "loss": 0.662, "step": 670 }, { "epoch": 0.03179341388296612, "grad_norm": 0.71484375, "learning_rate": 0.00019951586902258237, "loss": 0.2984, "step": 671 }, { "epoch": 0.031840796019900496, "grad_norm": 0.55078125, "learning_rate": 0.00019951440425800934, "loss": 0.2706, "step": 672 }, { "epoch": 0.031888178156834875, "grad_norm": 0.341796875, "learning_rate": 0.00019951293728631282, "loss": 0.9611, "step": 673 }, { "epoch": 0.03193556029376925, "grad_norm": 0.376953125, "learning_rate": 0.00019951146810752535, "loss": 0.2135, "step": 674 }, { "epoch": 0.031982942430703626, "grad_norm": 0.41796875, "learning_rate": 0.0001995099967216795, "loss": 0.224, "step": 675 }, { "epoch": 0.032030324567638, "grad_norm": 1.0703125, "learning_rate": 0.00019950852312880795, "loss": 0.7542, "step": 676 }, { "epoch": 0.03207770670457238, "grad_norm": 0.42578125, "learning_rate": 0.0001995070473289433, "loss": 0.7266, "step": 677 }, { "epoch": 0.03212508884150675, "grad_norm": 0.373046875, "learning_rate": 0.0001995055693221184, "loss": 0.2165, "step": 678 }, { "epoch": 0.03217247097844113, "grad_norm": 1.2421875, "learning_rate": 0.0001995040891083659, "loss": 1.2225, "step": 679 }, { "epoch": 0.0322198531153755, "grad_norm": 0.60546875, "learning_rate": 0.00019950260668771873, "loss": 1.2573, "step": 680 }, { "epoch": 0.03226723525230988, "grad_norm": 0.3828125, "learning_rate": 0.0001995011220602097, "loss": 0.1674, "step": 681 }, { "epoch": 0.03231461738924425, "grad_norm": 0.3515625, "learning_rate": 0.00019949963522587178, "loss": 0.4874, "step": 682 }, { "epoch": 0.03236199952617863, "grad_norm": 0.52734375, "learning_rate": 0.00019949814618473792, "loss": 1.2075, "step": 683 }, { "epoch": 0.032409381663113, "grad_norm": 0.427734375, "learning_rate": 0.00019949665493684119, "loss": 1.2133, "step": 684 }, { "epoch": 0.03245676380004738, "grad_norm": 0.431640625, "learning_rate": 0.00019949516148221462, "loss": 1.1786, "step": 685 }, { "epoch": 0.03250414593698176, "grad_norm": 0.408203125, "learning_rate": 0.00019949366582089134, "loss": 1.2238, "step": 686 }, { "epoch": 0.03255152807391613, "grad_norm": 0.734375, "learning_rate": 0.00019949216795290452, "loss": 0.5681, "step": 687 }, { "epoch": 0.03259891021085051, "grad_norm": 0.41796875, "learning_rate": 0.00019949066787828737, "loss": 1.0104, "step": 688 }, { "epoch": 0.032646292347784885, "grad_norm": 0.4375, "learning_rate": 0.0001994891655970732, "loss": 1.0594, "step": 689 }, { "epoch": 0.032693674484719264, "grad_norm": 0.8515625, "learning_rate": 0.00019948766110929533, "loss": 1.0748, "step": 690 }, { "epoch": 0.032741056621653636, "grad_norm": 0.71875, "learning_rate": 0.00019948615441498708, "loss": 0.7086, "step": 691 }, { "epoch": 0.032788438758588015, "grad_norm": 0.419921875, "learning_rate": 0.0001994846455141819, "loss": 0.9541, "step": 692 }, { "epoch": 0.03283582089552239, "grad_norm": 0.5078125, "learning_rate": 0.00019948313440691325, "loss": 1.1669, "step": 693 }, { "epoch": 0.032883203032456766, "grad_norm": 0.44921875, "learning_rate": 0.00019948162109321464, "loss": 1.1247, "step": 694 }, { "epoch": 0.03293058516939114, "grad_norm": 0.53125, "learning_rate": 0.00019948010557311964, "loss": 1.3189, "step": 695 }, { "epoch": 0.03297796730632552, "grad_norm": 0.390625, "learning_rate": 0.00019947858784666187, "loss": 0.8425, "step": 696 }, { "epoch": 0.03302534944325989, "grad_norm": 0.478515625, "learning_rate": 0.00019947706791387498, "loss": 1.0511, "step": 697 }, { "epoch": 0.03307273158019427, "grad_norm": 0.6328125, "learning_rate": 0.00019947554577479267, "loss": 0.6043, "step": 698 }, { "epoch": 0.03312011371712864, "grad_norm": 0.44921875, "learning_rate": 0.00019947402142944868, "loss": 0.9978, "step": 699 }, { "epoch": 0.03316749585406302, "grad_norm": 0.435546875, "learning_rate": 0.00019947249487787692, "loss": 1.0231, "step": 700 }, { "epoch": 0.03321487799099739, "grad_norm": 0.4296875, "learning_rate": 0.00019947096612011112, "loss": 0.7844, "step": 701 }, { "epoch": 0.03326226012793177, "grad_norm": 0.78125, "learning_rate": 0.00019946943515618525, "loss": 0.6324, "step": 702 }, { "epoch": 0.03330964226486614, "grad_norm": 0.357421875, "learning_rate": 0.0001994679019861333, "loss": 0.88, "step": 703 }, { "epoch": 0.03335702440180052, "grad_norm": 1.046875, "learning_rate": 0.0001994663666099892, "loss": 0.0794, "step": 704 }, { "epoch": 0.033404406538734895, "grad_norm": 0.427734375, "learning_rate": 0.00019946482902778704, "loss": 1.3594, "step": 705 }, { "epoch": 0.033451788675669274, "grad_norm": 0.45703125, "learning_rate": 0.00019946328923956092, "loss": 1.3723, "step": 706 }, { "epoch": 0.033499170812603646, "grad_norm": 0.484375, "learning_rate": 0.00019946174724534498, "loss": 0.7756, "step": 707 }, { "epoch": 0.033546552949538025, "grad_norm": 1.2421875, "learning_rate": 0.00019946020304517347, "loss": 0.5788, "step": 708 }, { "epoch": 0.0335939350864724, "grad_norm": 0.60546875, "learning_rate": 0.00019945865663908055, "loss": 0.1196, "step": 709 }, { "epoch": 0.033641317223406776, "grad_norm": 0.921875, "learning_rate": 0.00019945710802710056, "loss": 0.7798, "step": 710 }, { "epoch": 0.03368869936034115, "grad_norm": 0.396484375, "learning_rate": 0.00019945555720926787, "loss": 1.0431, "step": 711 }, { "epoch": 0.03373608149727553, "grad_norm": 0.71875, "learning_rate": 0.00019945400418561686, "loss": 0.2399, "step": 712 }, { "epoch": 0.0337834636342099, "grad_norm": 0.71875, "learning_rate": 0.00019945244895618194, "loss": 0.7883, "step": 713 }, { "epoch": 0.03383084577114428, "grad_norm": 0.328125, "learning_rate": 0.00019945089152099765, "loss": 0.5802, "step": 714 }, { "epoch": 0.03387822790807866, "grad_norm": 0.54296875, "learning_rate": 0.00019944933188009855, "loss": 1.1913, "step": 715 }, { "epoch": 0.03392561004501303, "grad_norm": 0.431640625, "learning_rate": 0.00019944777003351916, "loss": 0.844, "step": 716 }, { "epoch": 0.03397299218194741, "grad_norm": 0.46484375, "learning_rate": 0.00019944620598129418, "loss": 1.5475, "step": 717 }, { "epoch": 0.03402037431888178, "grad_norm": 0.46484375, "learning_rate": 0.00019944463972345827, "loss": 1.2742, "step": 718 }, { "epoch": 0.03406775645581616, "grad_norm": 1.2109375, "learning_rate": 0.00019944307126004614, "loss": 0.3556, "step": 719 }, { "epoch": 0.03411513859275053, "grad_norm": 0.462890625, "learning_rate": 0.00019944150059109266, "loss": 0.7456, "step": 720 }, { "epoch": 0.03416252072968491, "grad_norm": 0.56640625, "learning_rate": 0.0001994399277166326, "loss": 1.1194, "step": 721 }, { "epoch": 0.034209902866619284, "grad_norm": 0.59765625, "learning_rate": 0.00019943835263670084, "loss": 1.0139, "step": 722 }, { "epoch": 0.03425728500355366, "grad_norm": 0.494140625, "learning_rate": 0.0001994367753513324, "loss": 1.2507, "step": 723 }, { "epoch": 0.034304667140488035, "grad_norm": 0.3828125, "learning_rate": 0.00019943519586056212, "loss": 0.8208, "step": 724 }, { "epoch": 0.034352049277422414, "grad_norm": 0.53125, "learning_rate": 0.00019943361416442515, "loss": 1.0066, "step": 725 }, { "epoch": 0.034399431414356786, "grad_norm": 0.62109375, "learning_rate": 0.0001994320302629565, "loss": 1.3527, "step": 726 }, { "epoch": 0.034446813551291165, "grad_norm": 0.57421875, "learning_rate": 0.00019943044415619138, "loss": 0.8505, "step": 727 }, { "epoch": 0.03449419568822554, "grad_norm": 1.15625, "learning_rate": 0.00019942885584416488, "loss": 0.097, "step": 728 }, { "epoch": 0.034541577825159916, "grad_norm": 0.55078125, "learning_rate": 0.00019942726532691228, "loss": 0.2142, "step": 729 }, { "epoch": 0.03458895996209429, "grad_norm": 0.484375, "learning_rate": 0.00019942567260446885, "loss": 0.8172, "step": 730 }, { "epoch": 0.03463634209902867, "grad_norm": 0.55078125, "learning_rate": 0.0001994240776768699, "loss": 0.2967, "step": 731 }, { "epoch": 0.03468372423596304, "grad_norm": 0.287109375, "learning_rate": 0.0001994224805441508, "loss": 0.0132, "step": 732 }, { "epoch": 0.03473110637289742, "grad_norm": 0.5078125, "learning_rate": 0.00019942088120634694, "loss": 1.1554, "step": 733 }, { "epoch": 0.03477848850983179, "grad_norm": 0.5, "learning_rate": 0.00019941927966349388, "loss": 0.7384, "step": 734 }, { "epoch": 0.03482587064676617, "grad_norm": 0.5078125, "learning_rate": 0.0001994176759156271, "loss": 1.2028, "step": 735 }, { "epoch": 0.03487325278370054, "grad_norm": 0.478515625, "learning_rate": 0.00019941606996278215, "loss": 0.3103, "step": 736 }, { "epoch": 0.03492063492063492, "grad_norm": 0.5625, "learning_rate": 0.00019941446180499466, "loss": 0.557, "step": 737 }, { "epoch": 0.034968017057569294, "grad_norm": 0.462890625, "learning_rate": 0.00019941285144230029, "loss": 0.9375, "step": 738 }, { "epoch": 0.03501539919450367, "grad_norm": 0.55859375, "learning_rate": 0.00019941123887473475, "loss": 1.1975, "step": 739 }, { "epoch": 0.035062781331438045, "grad_norm": 1.3203125, "learning_rate": 0.00019940962410233386, "loss": 0.589, "step": 740 }, { "epoch": 0.035110163468372424, "grad_norm": 0.5078125, "learning_rate": 0.0001994080071251334, "loss": 1.0395, "step": 741 }, { "epoch": 0.035157545605306796, "grad_norm": 0.58203125, "learning_rate": 0.0001994063879431692, "loss": 1.1947, "step": 742 }, { "epoch": 0.035204927742241175, "grad_norm": 0.5234375, "learning_rate": 0.0001994047665564772, "loss": 1.1165, "step": 743 }, { "epoch": 0.03525230987917555, "grad_norm": 0.65234375, "learning_rate": 0.00019940314296509337, "loss": 0.31, "step": 744 }, { "epoch": 0.035299692016109926, "grad_norm": 0.81640625, "learning_rate": 0.00019940151716905371, "loss": 0.68, "step": 745 }, { "epoch": 0.035347074153044306, "grad_norm": 1.1328125, "learning_rate": 0.00019939988916839425, "loss": 0.7773, "step": 746 }, { "epoch": 0.03539445628997868, "grad_norm": 0.453125, "learning_rate": 0.00019939825896315115, "loss": 1.0553, "step": 747 }, { "epoch": 0.03544183842691306, "grad_norm": 0.380859375, "learning_rate": 0.00019939662655336053, "loss": 0.7158, "step": 748 }, { "epoch": 0.03548922056384743, "grad_norm": 0.443359375, "learning_rate": 0.00019939499193905862, "loss": 0.2265, "step": 749 }, { "epoch": 0.03553660270078181, "grad_norm": 0.53125, "learning_rate": 0.00019939335512028164, "loss": 0.96, "step": 750 }, { "epoch": 0.03558398483771618, "grad_norm": 0.27734375, "learning_rate": 0.0001993917160970659, "loss": 0.7222, "step": 751 }, { "epoch": 0.03563136697465056, "grad_norm": 0.423828125, "learning_rate": 0.0001993900748694478, "loss": 1.0403, "step": 752 }, { "epoch": 0.03567874911158493, "grad_norm": 0.44140625, "learning_rate": 0.00019938843143746369, "loss": 0.9728, "step": 753 }, { "epoch": 0.03572613124851931, "grad_norm": 0.484375, "learning_rate": 0.00019938678580115005, "loss": 1.0014, "step": 754 }, { "epoch": 0.03577351338545368, "grad_norm": 0.46484375, "learning_rate": 0.00019938513796054333, "loss": 1.2322, "step": 755 }, { "epoch": 0.03582089552238806, "grad_norm": 0.51171875, "learning_rate": 0.00019938348791568013, "loss": 1.2852, "step": 756 }, { "epoch": 0.035868277659322434, "grad_norm": 0.58984375, "learning_rate": 0.00019938183566659703, "loss": 1.0591, "step": 757 }, { "epoch": 0.03591565979625681, "grad_norm": 0.482421875, "learning_rate": 0.00019938018121333064, "loss": 1.7937, "step": 758 }, { "epoch": 0.035963041933191185, "grad_norm": 0.59765625, "learning_rate": 0.00019937852455591772, "loss": 1.0624, "step": 759 }, { "epoch": 0.036010424070125564, "grad_norm": 1.09375, "learning_rate": 0.00019937686569439496, "loss": 0.564, "step": 760 }, { "epoch": 0.036057806207059936, "grad_norm": 1.2265625, "learning_rate": 0.00019937520462879918, "loss": 0.9257, "step": 761 }, { "epoch": 0.036105188343994316, "grad_norm": 0.38671875, "learning_rate": 0.00019937354135916721, "loss": 1.023, "step": 762 }, { "epoch": 0.03615257048092869, "grad_norm": 0.94921875, "learning_rate": 0.00019937187588553595, "loss": 0.7796, "step": 763 }, { "epoch": 0.03619995261786307, "grad_norm": 0.41796875, "learning_rate": 0.00019937020820794233, "loss": 0.0903, "step": 764 }, { "epoch": 0.03624733475479744, "grad_norm": 2.0, "learning_rate": 0.00019936853832642332, "loss": 0.5821, "step": 765 }, { "epoch": 0.03629471689173182, "grad_norm": 0.9140625, "learning_rate": 0.00019936686624101596, "loss": 0.3511, "step": 766 }, { "epoch": 0.03634209902866619, "grad_norm": 0.48046875, "learning_rate": 0.0001993651919517574, "loss": 0.314, "step": 767 }, { "epoch": 0.03638948116560057, "grad_norm": 0.48046875, "learning_rate": 0.00019936351545868467, "loss": 0.8487, "step": 768 }, { "epoch": 0.03643686330253494, "grad_norm": 0.427734375, "learning_rate": 0.000199361836761835, "loss": 0.6685, "step": 769 }, { "epoch": 0.03648424543946932, "grad_norm": 0.83984375, "learning_rate": 0.0001993601558612457, "loss": 0.7398, "step": 770 }, { "epoch": 0.03653162757640369, "grad_norm": 0.5, "learning_rate": 0.00019935847275695393, "loss": 1.2331, "step": 771 }, { "epoch": 0.03657900971333807, "grad_norm": 0.439453125, "learning_rate": 0.00019935678744899705, "loss": 1.2426, "step": 772 }, { "epoch": 0.036626391850272444, "grad_norm": 0.1435546875, "learning_rate": 0.00019935509993741245, "loss": 0.0053, "step": 773 }, { "epoch": 0.03667377398720682, "grad_norm": 0.470703125, "learning_rate": 0.0001993534102222376, "loss": 0.9312, "step": 774 }, { "epoch": 0.0367211561241412, "grad_norm": 0.65234375, "learning_rate": 0.0001993517183035099, "loss": 1.0097, "step": 775 }, { "epoch": 0.036768538261075574, "grad_norm": 0.052734375, "learning_rate": 0.00019935002418126693, "loss": 0.004, "step": 776 }, { "epoch": 0.03681592039800995, "grad_norm": 0.56640625, "learning_rate": 0.00019934832785554625, "loss": 1.0161, "step": 777 }, { "epoch": 0.036863302534944326, "grad_norm": 0.515625, "learning_rate": 0.00019934662932638548, "loss": 0.9238, "step": 778 }, { "epoch": 0.036910684671878705, "grad_norm": 0.5078125, "learning_rate": 0.00019934492859382226, "loss": 0.1305, "step": 779 }, { "epoch": 0.03695806680881308, "grad_norm": 0.5546875, "learning_rate": 0.00019934322565789438, "loss": 1.173, "step": 780 }, { "epoch": 0.037005448945747456, "grad_norm": 0.83984375, "learning_rate": 0.00019934152051863957, "loss": 0.2643, "step": 781 }, { "epoch": 0.03705283108268183, "grad_norm": 0.451171875, "learning_rate": 0.00019933981317609562, "loss": 1.4096, "step": 782 }, { "epoch": 0.03710021321961621, "grad_norm": 0.4765625, "learning_rate": 0.00019933810363030046, "loss": 0.2747, "step": 783 }, { "epoch": 0.03714759535655058, "grad_norm": 0.5078125, "learning_rate": 0.00019933639188129195, "loss": 0.9685, "step": 784 }, { "epoch": 0.03719497749348496, "grad_norm": 0.6484375, "learning_rate": 0.00019933467792910805, "loss": 1.578, "step": 785 }, { "epoch": 0.03724235963041933, "grad_norm": 0.68359375, "learning_rate": 0.00019933296177378684, "loss": 1.4234, "step": 786 }, { "epoch": 0.03728974176735371, "grad_norm": 0.52734375, "learning_rate": 0.00019933124341536633, "loss": 1.0826, "step": 787 }, { "epoch": 0.03733712390428808, "grad_norm": 0.59375, "learning_rate": 0.00019932952285388463, "loss": 0.0578, "step": 788 }, { "epoch": 0.03738450604122246, "grad_norm": 0.392578125, "learning_rate": 0.00019932780008937993, "loss": 1.0469, "step": 789 }, { "epoch": 0.03743188817815683, "grad_norm": 0.57421875, "learning_rate": 0.00019932607512189042, "loss": 0.0993, "step": 790 }, { "epoch": 0.03747927031509121, "grad_norm": 0.83203125, "learning_rate": 0.00019932434795145437, "loss": 0.4258, "step": 791 }, { "epoch": 0.037526652452025584, "grad_norm": 0.2373046875, "learning_rate": 0.0001993226185781101, "loss": 0.0317, "step": 792 }, { "epoch": 0.03757403458895996, "grad_norm": 0.65625, "learning_rate": 0.0001993208870018959, "loss": 0.2297, "step": 793 }, { "epoch": 0.037621416725894335, "grad_norm": 0.2333984375, "learning_rate": 0.00019931915322285025, "loss": 0.0231, "step": 794 }, { "epoch": 0.037668798862828715, "grad_norm": 0.470703125, "learning_rate": 0.00019931741724101153, "loss": 0.7809, "step": 795 }, { "epoch": 0.03771618099976309, "grad_norm": 0.48828125, "learning_rate": 0.00019931567905641834, "loss": 0.8968, "step": 796 }, { "epoch": 0.037763563136697466, "grad_norm": 0.7578125, "learning_rate": 0.00019931393866910914, "loss": 0.3473, "step": 797 }, { "epoch": 0.03781094527363184, "grad_norm": 0.33203125, "learning_rate": 0.00019931219607912258, "loss": 0.0396, "step": 798 }, { "epoch": 0.03785832741056622, "grad_norm": 0.59765625, "learning_rate": 0.00019931045128649725, "loss": 0.4401, "step": 799 }, { "epoch": 0.03790570954750059, "grad_norm": 0.5078125, "learning_rate": 0.00019930870429127193, "loss": 0.345, "step": 800 }, { "epoch": 0.03795309168443497, "grad_norm": 0.52734375, "learning_rate": 0.00019930695509348534, "loss": 1.2678, "step": 801 }, { "epoch": 0.03800047382136934, "grad_norm": 0.498046875, "learning_rate": 0.00019930520369317622, "loss": 0.1882, "step": 802 }, { "epoch": 0.03804785595830372, "grad_norm": 0.59765625, "learning_rate": 0.00019930345009038351, "loss": 0.4099, "step": 803 }, { "epoch": 0.0380952380952381, "grad_norm": 0.40234375, "learning_rate": 0.000199301694285146, "loss": 0.2211, "step": 804 }, { "epoch": 0.03814262023217247, "grad_norm": 0.490234375, "learning_rate": 0.00019929993627750272, "loss": 0.8162, "step": 805 }, { "epoch": 0.03819000236910685, "grad_norm": 0.435546875, "learning_rate": 0.0001992981760674926, "loss": 1.9835, "step": 806 }, { "epoch": 0.03823738450604122, "grad_norm": 0.431640625, "learning_rate": 0.00019929641365515474, "loss": 0.9136, "step": 807 }, { "epoch": 0.0382847666429756, "grad_norm": 1.3125, "learning_rate": 0.00019929464904052812, "loss": 0.6974, "step": 808 }, { "epoch": 0.03833214877990997, "grad_norm": 0.4609375, "learning_rate": 0.00019929288222365202, "loss": 0.7456, "step": 809 }, { "epoch": 0.03837953091684435, "grad_norm": 0.484375, "learning_rate": 0.0001992911132045655, "loss": 1.1999, "step": 810 }, { "epoch": 0.038426913053778725, "grad_norm": 0.53125, "learning_rate": 0.0001992893419833079, "loss": 0.7235, "step": 811 }, { "epoch": 0.038474295190713104, "grad_norm": 0.5234375, "learning_rate": 0.0001992875685599184, "loss": 1.0206, "step": 812 }, { "epoch": 0.038521677327647476, "grad_norm": 0.890625, "learning_rate": 0.0001992857929344364, "loss": 0.3944, "step": 813 }, { "epoch": 0.038569059464581855, "grad_norm": 0.52734375, "learning_rate": 0.0001992840151069013, "loss": 1.2379, "step": 814 }, { "epoch": 0.03861644160151623, "grad_norm": 0.69140625, "learning_rate": 0.00019928223507735248, "loss": 1.1114, "step": 815 }, { "epoch": 0.038663823738450606, "grad_norm": 0.37890625, "learning_rate": 0.00019928045284582941, "loss": 0.2301, "step": 816 }, { "epoch": 0.03871120587538498, "grad_norm": 0.578125, "learning_rate": 0.00019927866841237167, "loss": 1.174, "step": 817 }, { "epoch": 0.03875858801231936, "grad_norm": 0.55078125, "learning_rate": 0.00019927688177701883, "loss": 0.6894, "step": 818 }, { "epoch": 0.03880597014925373, "grad_norm": 1.1875, "learning_rate": 0.00019927509293981048, "loss": 1.3543, "step": 819 }, { "epoch": 0.03885335228618811, "grad_norm": 0.384765625, "learning_rate": 0.0001992733019007863, "loss": 0.2246, "step": 820 }, { "epoch": 0.03890073442312248, "grad_norm": 0.78515625, "learning_rate": 0.00019927150865998604, "loss": 0.6539, "step": 821 }, { "epoch": 0.03894811656005686, "grad_norm": 0.43359375, "learning_rate": 0.00019926971321744942, "loss": 1.4651, "step": 822 }, { "epoch": 0.03899549869699123, "grad_norm": 0.423828125, "learning_rate": 0.00019926791557321635, "loss": 0.8091, "step": 823 }, { "epoch": 0.03904288083392561, "grad_norm": 0.55078125, "learning_rate": 0.00019926611572732662, "loss": 1.3761, "step": 824 }, { "epoch": 0.03909026297085998, "grad_norm": 0.625, "learning_rate": 0.0001992643136798202, "loss": 0.3683, "step": 825 }, { "epoch": 0.03913764510779436, "grad_norm": 0.412109375, "learning_rate": 0.00019926250943073698, "loss": 0.5105, "step": 826 }, { "epoch": 0.039185027244728735, "grad_norm": 1.390625, "learning_rate": 0.0001992607029801171, "loss": 0.542, "step": 827 }, { "epoch": 0.039232409381663114, "grad_norm": 0.51953125, "learning_rate": 0.0001992588943280005, "loss": 0.2755, "step": 828 }, { "epoch": 0.039279791518597486, "grad_norm": 0.3828125, "learning_rate": 0.0001992570834744274, "loss": 0.2822, "step": 829 }, { "epoch": 0.039327173655531865, "grad_norm": 0.8515625, "learning_rate": 0.0001992552704194379, "loss": 0.9901, "step": 830 }, { "epoch": 0.03937455579246624, "grad_norm": 0.0615234375, "learning_rate": 0.00019925345516307217, "loss": 0.0039, "step": 831 }, { "epoch": 0.039421937929400616, "grad_norm": 0.478515625, "learning_rate": 0.00019925163770537059, "loss": 1.0106, "step": 832 }, { "epoch": 0.03946932006633499, "grad_norm": 0.034423828125, "learning_rate": 0.00019924981804637337, "loss": 0.0026, "step": 833 }, { "epoch": 0.03951670220326937, "grad_norm": 0.5234375, "learning_rate": 0.0001992479961861209, "loss": 0.9969, "step": 834 }, { "epoch": 0.039564084340203746, "grad_norm": 1.125, "learning_rate": 0.0001992461721246536, "loss": 0.2964, "step": 835 }, { "epoch": 0.03961146647713812, "grad_norm": 0.42578125, "learning_rate": 0.00019924434586201191, "loss": 1.2602, "step": 836 }, { "epoch": 0.0396588486140725, "grad_norm": 0.396484375, "learning_rate": 0.00019924251739823637, "loss": 0.7571, "step": 837 }, { "epoch": 0.03970623075100687, "grad_norm": 0.115234375, "learning_rate": 0.00019924068673336746, "loss": 0.0111, "step": 838 }, { "epoch": 0.03975361288794125, "grad_norm": 0.3046875, "learning_rate": 0.00019923885386744582, "loss": 0.02, "step": 839 }, { "epoch": 0.03980099502487562, "grad_norm": 0.5546875, "learning_rate": 0.00019923701880051212, "loss": 1.8461, "step": 840 }, { "epoch": 0.03984837716181, "grad_norm": 0.486328125, "learning_rate": 0.00019923518153260706, "loss": 0.3594, "step": 841 }, { "epoch": 0.03989575929874437, "grad_norm": 0.408203125, "learning_rate": 0.00019923334206377135, "loss": 0.2136, "step": 842 }, { "epoch": 0.03994314143567875, "grad_norm": 0.388671875, "learning_rate": 0.00019923150039404582, "loss": 0.7874, "step": 843 }, { "epoch": 0.039990523572613124, "grad_norm": 0.451171875, "learning_rate": 0.00019922965652347134, "loss": 1.266, "step": 844 }, { "epoch": 0.0400379057095475, "grad_norm": 0.51953125, "learning_rate": 0.00019922781045208875, "loss": 1.0366, "step": 845 }, { "epoch": 0.040085287846481875, "grad_norm": 0.4609375, "learning_rate": 0.000199225962179939, "loss": 1.495, "step": 846 }, { "epoch": 0.040132669983416254, "grad_norm": 0.5390625, "learning_rate": 0.00019922411170706313, "loss": 0.9435, "step": 847 }, { "epoch": 0.040180052120350626, "grad_norm": 0.4609375, "learning_rate": 0.00019922225903350212, "loss": 1.0605, "step": 848 }, { "epoch": 0.040227434257285005, "grad_norm": 0.482421875, "learning_rate": 0.0001992204041592971, "loss": 1.4564, "step": 849 }, { "epoch": 0.04027481639421938, "grad_norm": 0.59765625, "learning_rate": 0.0001992185470844892, "loss": 0.6306, "step": 850 }, { "epoch": 0.040322198531153756, "grad_norm": 0.474609375, "learning_rate": 0.00019921668780911963, "loss": 0.2552, "step": 851 }, { "epoch": 0.04036958066808813, "grad_norm": 0.310546875, "learning_rate": 0.0001992148263332296, "loss": 0.1815, "step": 852 }, { "epoch": 0.04041696280502251, "grad_norm": 0.427734375, "learning_rate": 0.0001992129626568604, "loss": 1.2073, "step": 853 }, { "epoch": 0.04046434494195688, "grad_norm": 0.04296875, "learning_rate": 0.00019921109678005335, "loss": 0.0031, "step": 854 }, { "epoch": 0.04051172707889126, "grad_norm": 0.6875, "learning_rate": 0.00019920922870284984, "loss": 1.0907, "step": 855 }, { "epoch": 0.04055910921582563, "grad_norm": 0.447265625, "learning_rate": 0.0001992073584252913, "loss": 0.8843, "step": 856 }, { "epoch": 0.04060649135276001, "grad_norm": 0.400390625, "learning_rate": 0.00019920548594741928, "loss": 1.1248, "step": 857 }, { "epoch": 0.04065387348969438, "grad_norm": 0.388671875, "learning_rate": 0.00019920361126927522, "loss": 0.2766, "step": 858 }, { "epoch": 0.04070125562662876, "grad_norm": 0.439453125, "learning_rate": 0.00019920173439090072, "loss": 0.9496, "step": 859 }, { "epoch": 0.040748637763563134, "grad_norm": 0.45703125, "learning_rate": 0.00019919985531233742, "loss": 0.2032, "step": 860 }, { "epoch": 0.04079601990049751, "grad_norm": 0.435546875, "learning_rate": 0.000199197974033627, "loss": 0.7034, "step": 861 }, { "epoch": 0.040843402037431885, "grad_norm": 0.455078125, "learning_rate": 0.00019919609055481116, "loss": 1.1872, "step": 862 }, { "epoch": 0.040890784174366264, "grad_norm": 0.50390625, "learning_rate": 0.0001991942048759317, "loss": 1.3044, "step": 863 }, { "epoch": 0.04093816631130064, "grad_norm": 0.58984375, "learning_rate": 0.00019919231699703046, "loss": 1.1731, "step": 864 }, { "epoch": 0.040985548448235015, "grad_norm": 0.8984375, "learning_rate": 0.00019919042691814924, "loss": 0.2639, "step": 865 }, { "epoch": 0.041032930585169394, "grad_norm": 0.4453125, "learning_rate": 0.00019918853463933003, "loss": 0.83, "step": 866 }, { "epoch": 0.041080312722103766, "grad_norm": 0.435546875, "learning_rate": 0.00019918664016061474, "loss": 1.0699, "step": 867 }, { "epoch": 0.041127694859038146, "grad_norm": 0.435546875, "learning_rate": 0.00019918474348204544, "loss": 1.2956, "step": 868 }, { "epoch": 0.04117507699597252, "grad_norm": 0.51171875, "learning_rate": 0.0001991828446036642, "loss": 0.8589, "step": 869 }, { "epoch": 0.0412224591329069, "grad_norm": 0.5390625, "learning_rate": 0.00019918094352551312, "loss": 0.6539, "step": 870 }, { "epoch": 0.04126984126984127, "grad_norm": 0.318359375, "learning_rate": 0.00019917904024763428, "loss": 0.4389, "step": 871 }, { "epoch": 0.04131722340677565, "grad_norm": 0.474609375, "learning_rate": 0.00019917713477007003, "loss": 1.028, "step": 872 }, { "epoch": 0.04136460554371002, "grad_norm": 0.447265625, "learning_rate": 0.00019917522709286256, "loss": 0.2418, "step": 873 }, { "epoch": 0.0414119876806444, "grad_norm": 0.318359375, "learning_rate": 0.00019917331721605418, "loss": 0.2146, "step": 874 }, { "epoch": 0.04145936981757877, "grad_norm": 0.44921875, "learning_rate": 0.00019917140513968725, "loss": 1.2755, "step": 875 }, { "epoch": 0.04150675195451315, "grad_norm": 0.43359375, "learning_rate": 0.0001991694908638042, "loss": 0.8287, "step": 876 }, { "epoch": 0.04155413409144752, "grad_norm": 0.6484375, "learning_rate": 0.0001991675743884475, "loss": 0.2201, "step": 877 }, { "epoch": 0.0416015162283819, "grad_norm": 0.07275390625, "learning_rate": 0.0001991656557136596, "loss": 0.0061, "step": 878 }, { "epoch": 0.041648898365316274, "grad_norm": 0.5703125, "learning_rate": 0.00019916373483948308, "loss": 1.242, "step": 879 }, { "epoch": 0.04169628050225065, "grad_norm": 0.5625, "learning_rate": 0.00019916181176596055, "loss": 1.2381, "step": 880 }, { "epoch": 0.041743662639185025, "grad_norm": 0.474609375, "learning_rate": 0.00019915988649313467, "loss": 1.2693, "step": 881 }, { "epoch": 0.041791044776119404, "grad_norm": 0.515625, "learning_rate": 0.0001991579590210481, "loss": 1.3847, "step": 882 }, { "epoch": 0.041838426913053776, "grad_norm": 0.48828125, "learning_rate": 0.00019915602934974364, "loss": 1.032, "step": 883 }, { "epoch": 0.041885809049988156, "grad_norm": 0.6015625, "learning_rate": 0.00019915409747926405, "loss": 1.0213, "step": 884 }, { "epoch": 0.04193319118692253, "grad_norm": 0.30859375, "learning_rate": 0.0001991521634096522, "loss": 0.7964, "step": 885 }, { "epoch": 0.04198057332385691, "grad_norm": 0.447265625, "learning_rate": 0.00019915022714095098, "loss": 0.8511, "step": 886 }, { "epoch": 0.04202795546079128, "grad_norm": 0.478515625, "learning_rate": 0.00019914828867320335, "loss": 0.734, "step": 887 }, { "epoch": 0.04207533759772566, "grad_norm": 0.5546875, "learning_rate": 0.00019914634800645225, "loss": 0.9853, "step": 888 }, { "epoch": 0.04212271973466003, "grad_norm": 0.451171875, "learning_rate": 0.00019914440514074078, "loss": 0.9674, "step": 889 }, { "epoch": 0.04217010187159441, "grad_norm": 0.408203125, "learning_rate": 0.000199142460076112, "loss": 1.0232, "step": 890 }, { "epoch": 0.04221748400852878, "grad_norm": 0.39453125, "learning_rate": 0.00019914051281260905, "loss": 0.8152, "step": 891 }, { "epoch": 0.04226486614546316, "grad_norm": 0.298828125, "learning_rate": 0.00019913856335027514, "loss": 0.0387, "step": 892 }, { "epoch": 0.04231224828239753, "grad_norm": 0.58984375, "learning_rate": 0.0001991366116891535, "loss": 0.8438, "step": 893 }, { "epoch": 0.04235963041933191, "grad_norm": 0.74609375, "learning_rate": 0.00019913465782928736, "loss": 0.1963, "step": 894 }, { "epoch": 0.04240701255626629, "grad_norm": 0.443359375, "learning_rate": 0.00019913270177072015, "loss": 0.8991, "step": 895 }, { "epoch": 0.04245439469320066, "grad_norm": 0.26953125, "learning_rate": 0.0001991307435134952, "loss": 0.6034, "step": 896 }, { "epoch": 0.04250177683013504, "grad_norm": 0.490234375, "learning_rate": 0.00019912878305765593, "loss": 1.2035, "step": 897 }, { "epoch": 0.042549158967069414, "grad_norm": 0.46875, "learning_rate": 0.00019912682040324587, "loss": 1.2343, "step": 898 }, { "epoch": 0.04259654110400379, "grad_norm": 0.62109375, "learning_rate": 0.0001991248555503085, "loss": 0.1443, "step": 899 }, { "epoch": 0.042643923240938165, "grad_norm": 0.416015625, "learning_rate": 0.00019912288849888743, "loss": 1.281, "step": 900 }, { "epoch": 0.042691305377872545, "grad_norm": 0.64453125, "learning_rate": 0.00019912091924902624, "loss": 0.0633, "step": 901 }, { "epoch": 0.04273868751480692, "grad_norm": 0.470703125, "learning_rate": 0.00019911894780076867, "loss": 1.2478, "step": 902 }, { "epoch": 0.042786069651741296, "grad_norm": 0.51171875, "learning_rate": 0.0001991169741541584, "loss": 0.7873, "step": 903 }, { "epoch": 0.04283345178867567, "grad_norm": 0.3984375, "learning_rate": 0.00019911499830923922, "loss": 0.9015, "step": 904 }, { "epoch": 0.04288083392561005, "grad_norm": 0.46484375, "learning_rate": 0.00019911302026605495, "loss": 1.2971, "step": 905 }, { "epoch": 0.04292821606254442, "grad_norm": 0.421875, "learning_rate": 0.00019911104002464947, "loss": 0.965, "step": 906 }, { "epoch": 0.0429755981994788, "grad_norm": 0.5546875, "learning_rate": 0.00019910905758506667, "loss": 1.2898, "step": 907 }, { "epoch": 0.04302298033641317, "grad_norm": 0.6875, "learning_rate": 0.00019910707294735057, "loss": 0.9892, "step": 908 }, { "epoch": 0.04307036247334755, "grad_norm": 0.5078125, "learning_rate": 0.00019910508611154515, "loss": 0.2499, "step": 909 }, { "epoch": 0.04311774461028192, "grad_norm": 0.51171875, "learning_rate": 0.00019910309707769447, "loss": 0.1086, "step": 910 }, { "epoch": 0.0431651267472163, "grad_norm": 0.43359375, "learning_rate": 0.00019910110584584265, "loss": 0.824, "step": 911 }, { "epoch": 0.04321250888415067, "grad_norm": 0.451171875, "learning_rate": 0.00019909911241603386, "loss": 1.3195, "step": 912 }, { "epoch": 0.04325989102108505, "grad_norm": 0.58203125, "learning_rate": 0.0001990971167883123, "loss": 0.1024, "step": 913 }, { "epoch": 0.043307273158019424, "grad_norm": 0.42578125, "learning_rate": 0.00019909511896272229, "loss": 0.6601, "step": 914 }, { "epoch": 0.0433546552949538, "grad_norm": 0.7734375, "learning_rate": 0.00019909311893930807, "loss": 0.3921, "step": 915 }, { "epoch": 0.043402037431888175, "grad_norm": 0.51171875, "learning_rate": 0.00019909111671811402, "loss": 0.7832, "step": 916 }, { "epoch": 0.043449419568822555, "grad_norm": 0.578125, "learning_rate": 0.00019908911229918452, "loss": 1.384, "step": 917 }, { "epoch": 0.04349680170575693, "grad_norm": 1.1171875, "learning_rate": 0.0001990871056825641, "loss": 1.0337, "step": 918 }, { "epoch": 0.043544183842691306, "grad_norm": 0.7578125, "learning_rate": 0.0001990850968682972, "loss": 0.2049, "step": 919 }, { "epoch": 0.04359156597962568, "grad_norm": 1.6640625, "learning_rate": 0.00019908308585642838, "loss": 0.6315, "step": 920 }, { "epoch": 0.04363894811656006, "grad_norm": 0.51171875, "learning_rate": 0.00019908107264700225, "loss": 1.143, "step": 921 }, { "epoch": 0.04368633025349443, "grad_norm": 0.6640625, "learning_rate": 0.0001990790572400635, "loss": 0.3487, "step": 922 }, { "epoch": 0.04373371239042881, "grad_norm": 0.427734375, "learning_rate": 0.00019907703963565677, "loss": 1.0009, "step": 923 }, { "epoch": 0.04378109452736319, "grad_norm": 0.337890625, "learning_rate": 0.00019907501983382683, "loss": 0.0066, "step": 924 }, { "epoch": 0.04382847666429756, "grad_norm": 0.74609375, "learning_rate": 0.00019907299783461852, "loss": 0.513, "step": 925 }, { "epoch": 0.04387585880123194, "grad_norm": 0.416015625, "learning_rate": 0.0001990709736380766, "loss": 0.1052, "step": 926 }, { "epoch": 0.04392324093816631, "grad_norm": 0.447265625, "learning_rate": 0.000199068947244246, "loss": 1.0993, "step": 927 }, { "epoch": 0.04397062307510069, "grad_norm": 0.42578125, "learning_rate": 0.00019906691865317173, "loss": 0.859, "step": 928 }, { "epoch": 0.04401800521203506, "grad_norm": 0.51171875, "learning_rate": 0.00019906488786489867, "loss": 1.018, "step": 929 }, { "epoch": 0.04406538734896944, "grad_norm": 0.51171875, "learning_rate": 0.00019906285487947197, "loss": 1.2107, "step": 930 }, { "epoch": 0.04411276948590381, "grad_norm": 0.625, "learning_rate": 0.0001990608196969366, "loss": 1.1488, "step": 931 }, { "epoch": 0.04416015162283819, "grad_norm": 0.5390625, "learning_rate": 0.00019905878231733781, "loss": 1.1261, "step": 932 }, { "epoch": 0.044207533759772565, "grad_norm": 0.71875, "learning_rate": 0.00019905674274072076, "loss": 0.1464, "step": 933 }, { "epoch": 0.044254915896706944, "grad_norm": 1.265625, "learning_rate": 0.0001990547009671306, "loss": 0.7288, "step": 934 }, { "epoch": 0.044302298033641316, "grad_norm": 0.375, "learning_rate": 0.00019905265699661273, "loss": 0.3238, "step": 935 }, { "epoch": 0.044349680170575695, "grad_norm": 0.458984375, "learning_rate": 0.00019905061082921242, "loss": 0.8499, "step": 936 }, { "epoch": 0.04439706230751007, "grad_norm": 0.53125, "learning_rate": 0.00019904856246497508, "loss": 1.4673, "step": 937 }, { "epoch": 0.044444444444444446, "grad_norm": 0.494140625, "learning_rate": 0.0001990465119039461, "loss": 1.1882, "step": 938 }, { "epoch": 0.04449182658137882, "grad_norm": 0.470703125, "learning_rate": 0.000199044459146171, "loss": 1.2626, "step": 939 }, { "epoch": 0.0445392087183132, "grad_norm": 0.263671875, "learning_rate": 0.0001990424041916953, "loss": 0.0356, "step": 940 }, { "epoch": 0.04458659085524757, "grad_norm": 0.60546875, "learning_rate": 0.00019904034704056454, "loss": 0.2159, "step": 941 }, { "epoch": 0.04463397299218195, "grad_norm": 0.40234375, "learning_rate": 0.0001990382876928244, "loss": 0.8416, "step": 942 }, { "epoch": 0.04468135512911632, "grad_norm": 0.4453125, "learning_rate": 0.00019903622614852055, "loss": 0.7343, "step": 943 }, { "epoch": 0.0447287372660507, "grad_norm": 0.439453125, "learning_rate": 0.00019903416240769865, "loss": 0.2001, "step": 944 }, { "epoch": 0.04477611940298507, "grad_norm": 0.50390625, "learning_rate": 0.00019903209647040458, "loss": 0.928, "step": 945 }, { "epoch": 0.04482350153991945, "grad_norm": 0.34765625, "learning_rate": 0.00019903002833668402, "loss": 0.2404, "step": 946 }, { "epoch": 0.04487088367685382, "grad_norm": 0.1201171875, "learning_rate": 0.00019902795800658295, "loss": 0.0152, "step": 947 }, { "epoch": 0.0449182658137882, "grad_norm": 0.4765625, "learning_rate": 0.00019902588548014724, "loss": 0.8822, "step": 948 }, { "epoch": 0.044965647950722575, "grad_norm": 0.439453125, "learning_rate": 0.0001990238107574229, "loss": 0.8807, "step": 949 }, { "epoch": 0.045013030087656954, "grad_norm": 0.455078125, "learning_rate": 0.00019902173383845587, "loss": 0.8176, "step": 950 }, { "epoch": 0.045060412224591326, "grad_norm": 0.1689453125, "learning_rate": 0.00019901965472329228, "loss": 0.0183, "step": 951 }, { "epoch": 0.045107794361525705, "grad_norm": 0.90625, "learning_rate": 0.0001990175734119782, "loss": 0.7378, "step": 952 }, { "epoch": 0.045155176498460084, "grad_norm": 0.51171875, "learning_rate": 0.0001990154899045598, "loss": 1.3953, "step": 953 }, { "epoch": 0.045202558635394456, "grad_norm": 0.56640625, "learning_rate": 0.00019901340420108333, "loss": 1.292, "step": 954 }, { "epoch": 0.045249940772328835, "grad_norm": 0.5390625, "learning_rate": 0.00019901131630159502, "loss": 1.3624, "step": 955 }, { "epoch": 0.04529732290926321, "grad_norm": 0.55078125, "learning_rate": 0.00019900922620614119, "loss": 1.2252, "step": 956 }, { "epoch": 0.045344705046197586, "grad_norm": 0.7578125, "learning_rate": 0.00019900713391476815, "loss": 0.4908, "step": 957 }, { "epoch": 0.04539208718313196, "grad_norm": 0.55859375, "learning_rate": 0.00019900503942752235, "loss": 1.1443, "step": 958 }, { "epoch": 0.04543946932006634, "grad_norm": 0.46484375, "learning_rate": 0.0001990029427444502, "loss": 0.9862, "step": 959 }, { "epoch": 0.04548685145700071, "grad_norm": 0.11279296875, "learning_rate": 0.00019900084386559826, "loss": 0.0205, "step": 960 }, { "epoch": 0.04553423359393509, "grad_norm": 0.5703125, "learning_rate": 0.00019899874279101306, "loss": 0.1988, "step": 961 }, { "epoch": 0.04558161573086946, "grad_norm": 0.55859375, "learning_rate": 0.00019899663952074122, "loss": 1.3923, "step": 962 }, { "epoch": 0.04562899786780384, "grad_norm": 0.60546875, "learning_rate": 0.00019899453405482933, "loss": 1.2035, "step": 963 }, { "epoch": 0.04567638000473821, "grad_norm": 1.0390625, "learning_rate": 0.00019899242639332413, "loss": 0.1353, "step": 964 }, { "epoch": 0.04572376214167259, "grad_norm": 0.51171875, "learning_rate": 0.0001989903165362723, "loss": 0.1484, "step": 965 }, { "epoch": 0.045771144278606964, "grad_norm": 0.5078125, "learning_rate": 0.00019898820448372074, "loss": 1.3354, "step": 966 }, { "epoch": 0.04581852641554134, "grad_norm": 0.79296875, "learning_rate": 0.00019898609023571626, "loss": 0.4631, "step": 967 }, { "epoch": 0.045865908552475715, "grad_norm": 1.078125, "learning_rate": 0.0001989839737923057, "loss": 0.4975, "step": 968 }, { "epoch": 0.045913290689410094, "grad_norm": 0.431640625, "learning_rate": 0.00019898185515353608, "loss": 1.0471, "step": 969 }, { "epoch": 0.045960672826344466, "grad_norm": 0.494140625, "learning_rate": 0.0001989797343194543, "loss": 0.8438, "step": 970 }, { "epoch": 0.046008054963278845, "grad_norm": 0.578125, "learning_rate": 0.00019897761129010743, "loss": 1.1028, "step": 971 }, { "epoch": 0.04605543710021322, "grad_norm": 0.400390625, "learning_rate": 0.00019897548606554258, "loss": 0.7336, "step": 972 }, { "epoch": 0.046102819237147596, "grad_norm": 0.482421875, "learning_rate": 0.0001989733586458069, "loss": 0.954, "step": 973 }, { "epoch": 0.04615020137408197, "grad_norm": 0.423828125, "learning_rate": 0.00019897122903094752, "loss": 1.1067, "step": 974 }, { "epoch": 0.04619758351101635, "grad_norm": 0.53125, "learning_rate": 0.0001989690972210117, "loss": 1.6079, "step": 975 }, { "epoch": 0.04624496564795072, "grad_norm": 0.6796875, "learning_rate": 0.00019896696321604674, "loss": 0.3096, "step": 976 }, { "epoch": 0.0462923477848851, "grad_norm": 0.482421875, "learning_rate": 0.00019896482701609993, "loss": 0.9771, "step": 977 }, { "epoch": 0.04633972992181947, "grad_norm": 0.474609375, "learning_rate": 0.00019896268862121868, "loss": 0.9484, "step": 978 }, { "epoch": 0.04638711205875385, "grad_norm": 0.34375, "learning_rate": 0.00019896054803145039, "loss": 0.2152, "step": 979 }, { "epoch": 0.04643449419568822, "grad_norm": 0.51171875, "learning_rate": 0.00019895840524684257, "loss": 0.9165, "step": 980 }, { "epoch": 0.0464818763326226, "grad_norm": 0.486328125, "learning_rate": 0.0001989562602674427, "loss": 1.0838, "step": 981 }, { "epoch": 0.046529258469556974, "grad_norm": 0.515625, "learning_rate": 0.00019895411309329845, "loss": 1.2539, "step": 982 }, { "epoch": 0.04657664060649135, "grad_norm": 0.59765625, "learning_rate": 0.0001989519637244573, "loss": 1.1238, "step": 983 }, { "epoch": 0.04662402274342573, "grad_norm": 0.48828125, "learning_rate": 0.00019894981216096703, "loss": 1.024, "step": 984 }, { "epoch": 0.046671404880360104, "grad_norm": 0.50390625, "learning_rate": 0.00019894765840287532, "loss": 1.2483, "step": 985 }, { "epoch": 0.04671878701729448, "grad_norm": 0.43359375, "learning_rate": 0.00019894550245022993, "loss": 1.0601, "step": 986 }, { "epoch": 0.046766169154228855, "grad_norm": 0.59375, "learning_rate": 0.00019894334430307868, "loss": 0.9888, "step": 987 }, { "epoch": 0.046813551291163234, "grad_norm": 0.44921875, "learning_rate": 0.0001989411839614695, "loss": 0.8472, "step": 988 }, { "epoch": 0.046860933428097606, "grad_norm": 0.39453125, "learning_rate": 0.00019893902142545015, "loss": 0.0727, "step": 989 }, { "epoch": 0.046908315565031986, "grad_norm": 0.80078125, "learning_rate": 0.00019893685669506876, "loss": 0.3552, "step": 990 }, { "epoch": 0.04695569770196636, "grad_norm": 0.50390625, "learning_rate": 0.00019893468977037325, "loss": 0.9933, "step": 991 }, { "epoch": 0.04700307983890074, "grad_norm": 0.1904296875, "learning_rate": 0.0001989325206514117, "loss": 0.0244, "step": 992 }, { "epoch": 0.04705046197583511, "grad_norm": 0.58203125, "learning_rate": 0.00019893034933823222, "loss": 0.9056, "step": 993 }, { "epoch": 0.04709784411276949, "grad_norm": 1.015625, "learning_rate": 0.000198928175830883, "loss": 0.3743, "step": 994 }, { "epoch": 0.04714522624970386, "grad_norm": 0.44921875, "learning_rate": 0.00019892600012941217, "loss": 0.8073, "step": 995 }, { "epoch": 0.04719260838663824, "grad_norm": 0.490234375, "learning_rate": 0.00019892382223386806, "loss": 0.947, "step": 996 }, { "epoch": 0.04723999052357261, "grad_norm": 0.498046875, "learning_rate": 0.0001989216421442989, "loss": 1.0194, "step": 997 }, { "epoch": 0.04728737266050699, "grad_norm": 0.58984375, "learning_rate": 0.0001989194598607531, "loss": 1.2811, "step": 998 }, { "epoch": 0.04733475479744136, "grad_norm": 0.48828125, "learning_rate": 0.0001989172753832791, "loss": 0.7862, "step": 999 }, { "epoch": 0.04738213693437574, "grad_norm": 0.640625, "learning_rate": 0.00019891508871192523, "loss": 1.246, "step": 1000 }, { "epoch": 0.047429519071310114, "grad_norm": 0.373046875, "learning_rate": 0.0001989128998467401, "loss": 0.2165, "step": 1001 }, { "epoch": 0.04747690120824449, "grad_norm": 0.419921875, "learning_rate": 0.00019891070878777213, "loss": 1.1898, "step": 1002 }, { "epoch": 0.047524283345178865, "grad_norm": 0.435546875, "learning_rate": 0.00019890851553507006, "loss": 0.3253, "step": 1003 }, { "epoch": 0.047571665482113244, "grad_norm": 0.61328125, "learning_rate": 0.00019890632008868244, "loss": 0.8785, "step": 1004 }, { "epoch": 0.047619047619047616, "grad_norm": 0.9921875, "learning_rate": 0.000198904122448658, "loss": 0.6022, "step": 1005 }, { "epoch": 0.047666429755981995, "grad_norm": 0.48046875, "learning_rate": 0.00019890192261504548, "loss": 1.3854, "step": 1006 }, { "epoch": 0.04771381189291637, "grad_norm": 0.23828125, "learning_rate": 0.00019889972058789366, "loss": 0.0278, "step": 1007 }, { "epoch": 0.04776119402985075, "grad_norm": 0.59375, "learning_rate": 0.00019889751636725138, "loss": 1.2147, "step": 1008 }, { "epoch": 0.04780857616678512, "grad_norm": 0.51171875, "learning_rate": 0.00019889530995316753, "loss": 1.0106, "step": 1009 }, { "epoch": 0.0478559583037195, "grad_norm": 0.59765625, "learning_rate": 0.00019889310134569104, "loss": 0.8297, "step": 1010 }, { "epoch": 0.04790334044065387, "grad_norm": 0.37109375, "learning_rate": 0.00019889089054487088, "loss": 0.8559, "step": 1011 }, { "epoch": 0.04795072257758825, "grad_norm": 0.71484375, "learning_rate": 0.00019888867755075613, "loss": 0.322, "step": 1012 }, { "epoch": 0.04799810471452263, "grad_norm": 0.0576171875, "learning_rate": 0.00019888646236339584, "loss": 0.0064, "step": 1013 }, { "epoch": 0.048045486851457, "grad_norm": 1.1484375, "learning_rate": 0.00019888424498283914, "loss": 0.1653, "step": 1014 }, { "epoch": 0.04809286898839138, "grad_norm": 1.453125, "learning_rate": 0.00019888202540913522, "loss": 0.8939, "step": 1015 }, { "epoch": 0.04814025112532575, "grad_norm": 0.6015625, "learning_rate": 0.00019887980364233327, "loss": 0.6065, "step": 1016 }, { "epoch": 0.04818763326226013, "grad_norm": 0.06201171875, "learning_rate": 0.00019887757968248263, "loss": 0.007, "step": 1017 }, { "epoch": 0.0482350153991945, "grad_norm": 0.875, "learning_rate": 0.00019887535352963257, "loss": 1.1738, "step": 1018 }, { "epoch": 0.04828239753612888, "grad_norm": 0.74609375, "learning_rate": 0.0001988731251838325, "loss": 0.4478, "step": 1019 }, { "epoch": 0.048329779673063254, "grad_norm": 0.82421875, "learning_rate": 0.00019887089464513182, "loss": 1.1147, "step": 1020 }, { "epoch": 0.04837716180999763, "grad_norm": 0.490234375, "learning_rate": 0.00019886866191358, "loss": 0.625, "step": 1021 }, { "epoch": 0.048424543946932005, "grad_norm": 0.458984375, "learning_rate": 0.0001988664269892266, "loss": 0.8374, "step": 1022 }, { "epoch": 0.048471926083866385, "grad_norm": 0.453125, "learning_rate": 0.00019886418987212113, "loss": 0.7784, "step": 1023 }, { "epoch": 0.04851930822080076, "grad_norm": 0.65625, "learning_rate": 0.00019886195056231326, "loss": 0.1788, "step": 1024 }, { "epoch": 0.048566690357735136, "grad_norm": 1.0859375, "learning_rate": 0.0001988597090598526, "loss": 1.3, "step": 1025 }, { "epoch": 0.04861407249466951, "grad_norm": 0.416015625, "learning_rate": 0.0001988574653647889, "loss": 0.0471, "step": 1026 }, { "epoch": 0.04866145463160389, "grad_norm": 0.6484375, "learning_rate": 0.00019885521947717193, "loss": 0.835, "step": 1027 }, { "epoch": 0.04870883676853826, "grad_norm": 0.466796875, "learning_rate": 0.0001988529713970515, "loss": 1.1535, "step": 1028 }, { "epoch": 0.04875621890547264, "grad_norm": 0.435546875, "learning_rate": 0.00019885072112447741, "loss": 0.7651, "step": 1029 }, { "epoch": 0.04880360104240701, "grad_norm": 0.6640625, "learning_rate": 0.00019884846865949967, "loss": 0.3877, "step": 1030 }, { "epoch": 0.04885098317934139, "grad_norm": 0.3828125, "learning_rate": 0.00019884621400216815, "loss": 0.2153, "step": 1031 }, { "epoch": 0.04889836531627576, "grad_norm": 0.67578125, "learning_rate": 0.0001988439571525329, "loss": 1.0758, "step": 1032 }, { "epoch": 0.04894574745321014, "grad_norm": 0.1630859375, "learning_rate": 0.00019884169811064395, "loss": 0.015, "step": 1033 }, { "epoch": 0.04899312959014451, "grad_norm": 0.322265625, "learning_rate": 0.00019883943687655143, "loss": 0.0581, "step": 1034 }, { "epoch": 0.04904051172707889, "grad_norm": 0.48828125, "learning_rate": 0.0001988371734503055, "loss": 1.1398, "step": 1035 }, { "epoch": 0.049087893864013264, "grad_norm": 0.515625, "learning_rate": 0.00019883490783195628, "loss": 1.2845, "step": 1036 }, { "epoch": 0.04913527600094764, "grad_norm": 0.58203125, "learning_rate": 0.00019883264002155414, "loss": 1.0436, "step": 1037 }, { "epoch": 0.049182658137882015, "grad_norm": 0.66796875, "learning_rate": 0.0001988303700191493, "loss": 0.2564, "step": 1038 }, { "epoch": 0.049230040274816395, "grad_norm": 0.494140625, "learning_rate": 0.0001988280978247921, "loss": 1.0179, "step": 1039 }, { "epoch": 0.04927742241175077, "grad_norm": 0.53515625, "learning_rate": 0.00019882582343853298, "loss": 0.7744, "step": 1040 }, { "epoch": 0.049324804548685146, "grad_norm": 0.65625, "learning_rate": 0.00019882354686042236, "loss": 1.5745, "step": 1041 }, { "epoch": 0.049372186685619525, "grad_norm": 0.609375, "learning_rate": 0.00019882126809051071, "loss": 0.6615, "step": 1042 }, { "epoch": 0.0494195688225539, "grad_norm": 0.4375, "learning_rate": 0.0001988189871288486, "loss": 0.9245, "step": 1043 }, { "epoch": 0.049466950959488276, "grad_norm": 0.470703125, "learning_rate": 0.00019881670397548664, "loss": 0.8864, "step": 1044 }, { "epoch": 0.04951433309642265, "grad_norm": 0.4140625, "learning_rate": 0.00019881441863047543, "loss": 0.6943, "step": 1045 }, { "epoch": 0.04956171523335703, "grad_norm": 0.462890625, "learning_rate": 0.00019881213109386567, "loss": 1.1239, "step": 1046 }, { "epoch": 0.0496090973702914, "grad_norm": 0.384765625, "learning_rate": 0.00019880984136570805, "loss": 1.1835, "step": 1047 }, { "epoch": 0.04965647950722578, "grad_norm": 0.73046875, "learning_rate": 0.00019880754944605344, "loss": 0.058, "step": 1048 }, { "epoch": 0.04970386164416015, "grad_norm": 0.455078125, "learning_rate": 0.00019880525533495262, "loss": 0.9848, "step": 1049 }, { "epoch": 0.04975124378109453, "grad_norm": 0.478515625, "learning_rate": 0.0001988029590324565, "loss": 0.0888, "step": 1050 }, { "epoch": 0.0497986259180289, "grad_norm": 0.490234375, "learning_rate": 0.00019880066053861594, "loss": 1.3071, "step": 1051 }, { "epoch": 0.04984600805496328, "grad_norm": 0.70703125, "learning_rate": 0.00019879835985348198, "loss": 0.1956, "step": 1052 }, { "epoch": 0.04989339019189765, "grad_norm": 0.52734375, "learning_rate": 0.00019879605697710565, "loss": 0.8406, "step": 1053 }, { "epoch": 0.04994077232883203, "grad_norm": 0.306640625, "learning_rate": 0.00019879375190953803, "loss": 0.2062, "step": 1054 }, { "epoch": 0.049988154465766405, "grad_norm": 0.85546875, "learning_rate": 0.00019879144465083018, "loss": 1.3425, "step": 1055 }, { "epoch": 0.050035536602700784, "grad_norm": 0.546875, "learning_rate": 0.00019878913520103334, "loss": 0.5386, "step": 1056 }, { "epoch": 0.050082918739635156, "grad_norm": 0.546875, "learning_rate": 0.00019878682356019872, "loss": 0.7189, "step": 1057 }, { "epoch": 0.050130300876569535, "grad_norm": 0.5234375, "learning_rate": 0.00019878450972837753, "loss": 1.426, "step": 1058 }, { "epoch": 0.05017768301350391, "grad_norm": 0.33203125, "learning_rate": 0.00019878219370562117, "loss": 0.7201, "step": 1059 }, { "epoch": 0.050225065150438286, "grad_norm": 0.99609375, "learning_rate": 0.00019877987549198097, "loss": 0.0785, "step": 1060 }, { "epoch": 0.05027244728737266, "grad_norm": 0.68359375, "learning_rate": 0.00019877755508750832, "loss": 0.1805, "step": 1061 }, { "epoch": 0.05031982942430704, "grad_norm": 0.515625, "learning_rate": 0.00019877523249225477, "loss": 1.2644, "step": 1062 }, { "epoch": 0.05036721156124141, "grad_norm": 0.431640625, "learning_rate": 0.0001987729077062717, "loss": 0.9881, "step": 1063 }, { "epoch": 0.05041459369817579, "grad_norm": 0.453125, "learning_rate": 0.0001987705807296108, "loss": 0.7509, "step": 1064 }, { "epoch": 0.05046197583511016, "grad_norm": 0.123046875, "learning_rate": 0.0001987682515623236, "loss": 0.0115, "step": 1065 }, { "epoch": 0.05050935797204454, "grad_norm": 0.421875, "learning_rate": 0.00019876592020446178, "loss": 0.2251, "step": 1066 }, { "epoch": 0.05055674010897891, "grad_norm": 0.55078125, "learning_rate": 0.00019876358665607706, "loss": 1.3754, "step": 1067 }, { "epoch": 0.05060412224591329, "grad_norm": 0.53515625, "learning_rate": 0.0001987612509172212, "loss": 0.1373, "step": 1068 }, { "epoch": 0.05065150438284766, "grad_norm": 0.185546875, "learning_rate": 0.00019875891298794596, "loss": 0.0087, "step": 1069 }, { "epoch": 0.05069888651978204, "grad_norm": 0.54296875, "learning_rate": 0.00019875657286830324, "loss": 1.1929, "step": 1070 }, { "epoch": 0.050746268656716415, "grad_norm": 0.515625, "learning_rate": 0.00019875423055834492, "loss": 0.0568, "step": 1071 }, { "epoch": 0.050793650793650794, "grad_norm": 0.7578125, "learning_rate": 0.00019875188605812297, "loss": 0.7779, "step": 1072 }, { "epoch": 0.05084103293058517, "grad_norm": 0.50390625, "learning_rate": 0.00019874953936768936, "loss": 1.5075, "step": 1073 }, { "epoch": 0.050888415067519545, "grad_norm": 0.392578125, "learning_rate": 0.00019874719048709616, "loss": 1.1444, "step": 1074 }, { "epoch": 0.050935797204453924, "grad_norm": 0.51171875, "learning_rate": 0.00019874483941639546, "loss": 1.0448, "step": 1075 }, { "epoch": 0.050983179341388296, "grad_norm": 0.5546875, "learning_rate": 0.00019874248615563936, "loss": 1.245, "step": 1076 }, { "epoch": 0.051030561478322675, "grad_norm": 0.50390625, "learning_rate": 0.00019874013070488014, "loss": 0.8945, "step": 1077 }, { "epoch": 0.05107794361525705, "grad_norm": 0.484375, "learning_rate": 0.00019873777306416996, "loss": 0.7757, "step": 1078 }, { "epoch": 0.051125325752191426, "grad_norm": 0.5078125, "learning_rate": 0.00019873541323356118, "loss": 1.0556, "step": 1079 }, { "epoch": 0.0511727078891258, "grad_norm": 0.5703125, "learning_rate": 0.00019873305121310609, "loss": 0.1753, "step": 1080 }, { "epoch": 0.05122009002606018, "grad_norm": 0.5078125, "learning_rate": 0.00019873068700285704, "loss": 1.4021, "step": 1081 }, { "epoch": 0.05126747216299455, "grad_norm": 0.9140625, "learning_rate": 0.00019872832060286656, "loss": 0.8074, "step": 1082 }, { "epoch": 0.05131485429992893, "grad_norm": 0.44921875, "learning_rate": 0.00019872595201318708, "loss": 0.9757, "step": 1083 }, { "epoch": 0.0513622364368633, "grad_norm": 0.302734375, "learning_rate": 0.00019872358123387116, "loss": 0.1936, "step": 1084 }, { "epoch": 0.05140961857379768, "grad_norm": 0.484375, "learning_rate": 0.00019872120826497136, "loss": 1.2161, "step": 1085 }, { "epoch": 0.05145700071073205, "grad_norm": 0.53515625, "learning_rate": 0.00019871883310654031, "loss": 1.0108, "step": 1086 }, { "epoch": 0.05150438284766643, "grad_norm": 0.44921875, "learning_rate": 0.0001987164557586307, "loss": 0.9926, "step": 1087 }, { "epoch": 0.051551764984600804, "grad_norm": 0.65625, "learning_rate": 0.00019871407622129523, "loss": 1.2252, "step": 1088 }, { "epoch": 0.05159914712153518, "grad_norm": 0.64453125, "learning_rate": 0.0001987116944945867, "loss": 0.2188, "step": 1089 }, { "epoch": 0.051646529258469555, "grad_norm": 0.036865234375, "learning_rate": 0.00019870931057855792, "loss": 0.0031, "step": 1090 }, { "epoch": 0.051693911395403934, "grad_norm": 0.51171875, "learning_rate": 0.0001987069244732618, "loss": 1.4619, "step": 1091 }, { "epoch": 0.051741293532338306, "grad_norm": 0.5703125, "learning_rate": 0.00019870453617875123, "loss": 0.9762, "step": 1092 }, { "epoch": 0.051788675669272685, "grad_norm": 1.5390625, "learning_rate": 0.00019870214569507914, "loss": 0.3705, "step": 1093 }, { "epoch": 0.05183605780620706, "grad_norm": 0.53125, "learning_rate": 0.00019869975302229864, "loss": 1.2158, "step": 1094 }, { "epoch": 0.051883439943141436, "grad_norm": 0.4296875, "learning_rate": 0.0001986973581604627, "loss": 0.9881, "step": 1095 }, { "epoch": 0.05193082208007581, "grad_norm": 0.494140625, "learning_rate": 0.00019869496110962452, "loss": 1.101, "step": 1096 }, { "epoch": 0.05197820421701019, "grad_norm": 0.9921875, "learning_rate": 0.00019869256186983724, "loss": 0.5085, "step": 1097 }, { "epoch": 0.05202558635394456, "grad_norm": 0.41015625, "learning_rate": 0.00019869016044115405, "loss": 0.767, "step": 1098 }, { "epoch": 0.05207296849087894, "grad_norm": 0.640625, "learning_rate": 0.0001986877568236282, "loss": 0.0881, "step": 1099 }, { "epoch": 0.05212035062781331, "grad_norm": 0.86328125, "learning_rate": 0.00019868535101731305, "loss": 0.2183, "step": 1100 }, { "epoch": 0.05216773276474769, "grad_norm": 0.4296875, "learning_rate": 0.00019868294302226192, "loss": 1.084, "step": 1101 }, { "epoch": 0.05221511490168207, "grad_norm": 1.6953125, "learning_rate": 0.0001986805328385282, "loss": 0.9669, "step": 1102 }, { "epoch": 0.05226249703861644, "grad_norm": 0.07177734375, "learning_rate": 0.0001986781204661654, "loss": 0.0065, "step": 1103 }, { "epoch": 0.05230987917555082, "grad_norm": 0.640625, "learning_rate": 0.00019867570590522698, "loss": 1.0672, "step": 1104 }, { "epoch": 0.05235726131248519, "grad_norm": 0.5078125, "learning_rate": 0.0001986732891557665, "loss": 0.205, "step": 1105 }, { "epoch": 0.05240464344941957, "grad_norm": 0.69140625, "learning_rate": 0.0001986708702178376, "loss": 0.5019, "step": 1106 }, { "epoch": 0.052452025586353944, "grad_norm": 0.419921875, "learning_rate": 0.00019866844909149388, "loss": 0.9741, "step": 1107 }, { "epoch": 0.05249940772328832, "grad_norm": 0.5234375, "learning_rate": 0.00019866602577678905, "loss": 1.3385, "step": 1108 }, { "epoch": 0.052546789860222695, "grad_norm": 0.306640625, "learning_rate": 0.00019866360027377686, "loss": 0.6885, "step": 1109 }, { "epoch": 0.052594171997157074, "grad_norm": 1.09375, "learning_rate": 0.00019866117258251112, "loss": 0.1899, "step": 1110 }, { "epoch": 0.052641554134091446, "grad_norm": 0.5390625, "learning_rate": 0.00019865874270304565, "loss": 0.7239, "step": 1111 }, { "epoch": 0.052688936271025825, "grad_norm": 0.04638671875, "learning_rate": 0.00019865631063543438, "loss": 0.0036, "step": 1112 }, { "epoch": 0.0527363184079602, "grad_norm": 0.55078125, "learning_rate": 0.0001986538763797312, "loss": 0.9317, "step": 1113 }, { "epoch": 0.05278370054489458, "grad_norm": 0.29296875, "learning_rate": 0.00019865143993599014, "loss": 0.0368, "step": 1114 }, { "epoch": 0.05283108268182895, "grad_norm": 0.5, "learning_rate": 0.0001986490013042652, "loss": 1.0516, "step": 1115 }, { "epoch": 0.05287846481876333, "grad_norm": 0.4765625, "learning_rate": 0.0001986465604846105, "loss": 1.0051, "step": 1116 }, { "epoch": 0.0529258469556977, "grad_norm": 0.6484375, "learning_rate": 0.00019864411747708016, "loss": 0.5711, "step": 1117 }, { "epoch": 0.05297322909263208, "grad_norm": 0.51953125, "learning_rate": 0.00019864167228172836, "loss": 1.2666, "step": 1118 }, { "epoch": 0.05302061122956645, "grad_norm": 0.5625, "learning_rate": 0.00019863922489860936, "loss": 1.4304, "step": 1119 }, { "epoch": 0.05306799336650083, "grad_norm": 0.48046875, "learning_rate": 0.00019863677532777738, "loss": 0.9424, "step": 1120 }, { "epoch": 0.0531153755034352, "grad_norm": 0.59765625, "learning_rate": 0.0001986343235692868, "loss": 1.1107, "step": 1121 }, { "epoch": 0.05316275764036958, "grad_norm": 0.4609375, "learning_rate": 0.000198631869623192, "loss": 1.2194, "step": 1122 }, { "epoch": 0.053210139777303954, "grad_norm": 0.5546875, "learning_rate": 0.00019862941348954737, "loss": 0.9687, "step": 1123 }, { "epoch": 0.05325752191423833, "grad_norm": 0.423828125, "learning_rate": 0.00019862695516840743, "loss": 0.7604, "step": 1124 }, { "epoch": 0.053304904051172705, "grad_norm": 0.470703125, "learning_rate": 0.00019862449465982666, "loss": 1.1728, "step": 1125 }, { "epoch": 0.053352286188107084, "grad_norm": 0.5625, "learning_rate": 0.00019862203196385964, "loss": 1.0629, "step": 1126 }, { "epoch": 0.053399668325041456, "grad_norm": 0.734375, "learning_rate": 0.000198619567080561, "loss": 0.3663, "step": 1127 }, { "epoch": 0.053447050461975835, "grad_norm": 0.59375, "learning_rate": 0.0001986171000099854, "loss": 1.1313, "step": 1128 }, { "epoch": 0.05349443259891021, "grad_norm": 0.494140625, "learning_rate": 0.0001986146307521876, "loss": 0.9301, "step": 1129 }, { "epoch": 0.05354181473584459, "grad_norm": 0.55078125, "learning_rate": 0.00019861215930722234, "loss": 0.9257, "step": 1130 }, { "epoch": 0.05358919687277896, "grad_norm": 0.52734375, "learning_rate": 0.00019860968567514436, "loss": 1.0074, "step": 1131 }, { "epoch": 0.05363657900971334, "grad_norm": 0.57421875, "learning_rate": 0.0001986072098560086, "loss": 0.9213, "step": 1132 }, { "epoch": 0.05368396114664772, "grad_norm": 0.4765625, "learning_rate": 0.00019860473184987, "loss": 0.7373, "step": 1133 }, { "epoch": 0.05373134328358209, "grad_norm": 0.50390625, "learning_rate": 0.00019860225165678345, "loss": 1.1619, "step": 1134 }, { "epoch": 0.05377872542051647, "grad_norm": 0.91015625, "learning_rate": 0.00019859976927680397, "loss": 1.2205, "step": 1135 }, { "epoch": 0.05382610755745084, "grad_norm": 0.578125, "learning_rate": 0.00019859728470998666, "loss": 1.1632, "step": 1136 }, { "epoch": 0.05387348969438522, "grad_norm": 0.06591796875, "learning_rate": 0.00019859479795638658, "loss": 0.0056, "step": 1137 }, { "epoch": 0.05392087183131959, "grad_norm": 0.138671875, "learning_rate": 0.0001985923090160589, "loss": 0.0159, "step": 1138 }, { "epoch": 0.05396825396825397, "grad_norm": 0.73828125, "learning_rate": 0.00019858981788905883, "loss": 1.1557, "step": 1139 }, { "epoch": 0.05401563610518834, "grad_norm": 0.578125, "learning_rate": 0.0001985873245754416, "loss": 0.7595, "step": 1140 }, { "epoch": 0.05406301824212272, "grad_norm": 0.53125, "learning_rate": 0.00019858482907526254, "loss": 1.2538, "step": 1141 }, { "epoch": 0.054110400379057094, "grad_norm": 0.73828125, "learning_rate": 0.00019858233138857697, "loss": 0.5855, "step": 1142 }, { "epoch": 0.05415778251599147, "grad_norm": 0.55859375, "learning_rate": 0.0001985798315154403, "loss": 0.5724, "step": 1143 }, { "epoch": 0.054205164652925845, "grad_norm": 0.29296875, "learning_rate": 0.00019857732945590794, "loss": 0.2013, "step": 1144 }, { "epoch": 0.054252546789860225, "grad_norm": 0.388671875, "learning_rate": 0.00019857482521003545, "loss": 1.0684, "step": 1145 }, { "epoch": 0.0542999289267946, "grad_norm": 0.5390625, "learning_rate": 0.0001985723187778783, "loss": 0.821, "step": 1146 }, { "epoch": 0.054347311063728976, "grad_norm": 0.50390625, "learning_rate": 0.00019856981015949215, "loss": 0.8093, "step": 1147 }, { "epoch": 0.05439469320066335, "grad_norm": 0.5078125, "learning_rate": 0.00019856729935493258, "loss": 0.831, "step": 1148 }, { "epoch": 0.05444207533759773, "grad_norm": 0.439453125, "learning_rate": 0.0001985647863642553, "loss": 0.9024, "step": 1149 }, { "epoch": 0.0544894574745321, "grad_norm": 0.9140625, "learning_rate": 0.00019856227118751605, "loss": 0.8989, "step": 1150 }, { "epoch": 0.05453683961146648, "grad_norm": 0.65234375, "learning_rate": 0.0001985597538247706, "loss": 0.4668, "step": 1151 }, { "epoch": 0.05458422174840085, "grad_norm": 0.248046875, "learning_rate": 0.00019855723427607483, "loss": 0.0173, "step": 1152 }, { "epoch": 0.05463160388533523, "grad_norm": 0.5390625, "learning_rate": 0.0001985547125414845, "loss": 1.0429, "step": 1153 }, { "epoch": 0.0546789860222696, "grad_norm": 0.41015625, "learning_rate": 0.00019855218862105568, "loss": 1.1953, "step": 1154 }, { "epoch": 0.05472636815920398, "grad_norm": 0.54296875, "learning_rate": 0.0001985496625148443, "loss": 0.9374, "step": 1155 }, { "epoch": 0.05477375029613835, "grad_norm": 0.5234375, "learning_rate": 0.00019854713422290637, "loss": 1.2047, "step": 1156 }, { "epoch": 0.05482113243307273, "grad_norm": 0.57421875, "learning_rate": 0.00019854460374529794, "loss": 1.514, "step": 1157 }, { "epoch": 0.054868514570007104, "grad_norm": 0.859375, "learning_rate": 0.00019854207108207521, "loss": 0.2, "step": 1158 }, { "epoch": 0.05491589670694148, "grad_norm": 0.953125, "learning_rate": 0.00019853953623329428, "loss": 0.8601, "step": 1159 }, { "epoch": 0.054963278843875855, "grad_norm": 0.5703125, "learning_rate": 0.00019853699919901137, "loss": 0.6545, "step": 1160 }, { "epoch": 0.055010660980810235, "grad_norm": 0.50390625, "learning_rate": 0.0001985344599792828, "loss": 1.0417, "step": 1161 }, { "epoch": 0.055058043117744614, "grad_norm": 0.51953125, "learning_rate": 0.00019853191857416487, "loss": 1.3017, "step": 1162 }, { "epoch": 0.055105425254678986, "grad_norm": 0.85546875, "learning_rate": 0.00019852937498371394, "loss": 0.6266, "step": 1163 }, { "epoch": 0.055152807391613365, "grad_norm": 0.515625, "learning_rate": 0.0001985268292079864, "loss": 0.9562, "step": 1164 }, { "epoch": 0.05520018952854774, "grad_norm": 0.2099609375, "learning_rate": 0.00019852428124703876, "loss": 0.0217, "step": 1165 }, { "epoch": 0.055247571665482116, "grad_norm": 0.5078125, "learning_rate": 0.0001985217311009275, "loss": 0.8682, "step": 1166 }, { "epoch": 0.05529495380241649, "grad_norm": 0.447265625, "learning_rate": 0.00019851917876970916, "loss": 1.2461, "step": 1167 }, { "epoch": 0.05534233593935087, "grad_norm": 0.396484375, "learning_rate": 0.00019851662425344037, "loss": 0.0585, "step": 1168 }, { "epoch": 0.05538971807628524, "grad_norm": 0.6015625, "learning_rate": 0.0001985140675521778, "loss": 0.1793, "step": 1169 }, { "epoch": 0.05543710021321962, "grad_norm": 0.341796875, "learning_rate": 0.00019851150866597816, "loss": 0.1714, "step": 1170 }, { "epoch": 0.05548448235015399, "grad_norm": 0.546875, "learning_rate": 0.0001985089475948982, "loss": 0.1487, "step": 1171 }, { "epoch": 0.05553186448708837, "grad_norm": 0.5390625, "learning_rate": 0.00019850638433899467, "loss": 0.7608, "step": 1172 }, { "epoch": 0.05557924662402274, "grad_norm": 0.5234375, "learning_rate": 0.00019850381889832447, "loss": 1.2617, "step": 1173 }, { "epoch": 0.05562662876095712, "grad_norm": 0.5546875, "learning_rate": 0.0001985012512729445, "loss": 1.1914, "step": 1174 }, { "epoch": 0.05567401089789149, "grad_norm": 0.46484375, "learning_rate": 0.00019849868146291168, "loss": 1.0033, "step": 1175 }, { "epoch": 0.05572139303482587, "grad_norm": 0.59765625, "learning_rate": 0.00019849610946828306, "loss": 0.8603, "step": 1176 }, { "epoch": 0.055768775171760245, "grad_norm": 0.462890625, "learning_rate": 0.0001984935352891156, "loss": 0.8712, "step": 1177 }, { "epoch": 0.055816157308694624, "grad_norm": 0.484375, "learning_rate": 0.00019849095892546646, "loss": 0.648, "step": 1178 }, { "epoch": 0.055863539445628996, "grad_norm": 0.6171875, "learning_rate": 0.00019848838037739275, "loss": 1.5737, "step": 1179 }, { "epoch": 0.055910921582563375, "grad_norm": 0.416015625, "learning_rate": 0.0001984857996449517, "loss": 0.2043, "step": 1180 }, { "epoch": 0.05595830371949775, "grad_norm": 0.546875, "learning_rate": 0.00019848321672820047, "loss": 0.8374, "step": 1181 }, { "epoch": 0.056005685856432126, "grad_norm": 0.490234375, "learning_rate": 0.0001984806316271964, "loss": 0.6035, "step": 1182 }, { "epoch": 0.0560530679933665, "grad_norm": 1.1015625, "learning_rate": 0.00019847804434199685, "loss": 0.3609, "step": 1183 }, { "epoch": 0.05610045013030088, "grad_norm": 0.42578125, "learning_rate": 0.00019847545487265914, "loss": 0.8299, "step": 1184 }, { "epoch": 0.05614783226723525, "grad_norm": 0.396484375, "learning_rate": 0.00019847286321924074, "loss": 0.2107, "step": 1185 }, { "epoch": 0.05619521440416963, "grad_norm": 0.65625, "learning_rate": 0.00019847026938179914, "loss": 1.2353, "step": 1186 }, { "epoch": 0.056242596541104, "grad_norm": 0.09765625, "learning_rate": 0.00019846767336039183, "loss": 0.0066, "step": 1187 }, { "epoch": 0.05628997867803838, "grad_norm": 1.609375, "learning_rate": 0.0001984650751550764, "loss": 0.1846, "step": 1188 }, { "epoch": 0.05633736081497275, "grad_norm": 0.490234375, "learning_rate": 0.0001984624747659105, "loss": 0.9159, "step": 1189 }, { "epoch": 0.05638474295190713, "grad_norm": 0.73046875, "learning_rate": 0.0001984598721929518, "loss": 0.0875, "step": 1190 }, { "epoch": 0.05643212508884151, "grad_norm": 0.48828125, "learning_rate": 0.00019845726743625798, "loss": 1.0923, "step": 1191 }, { "epoch": 0.05647950722577588, "grad_norm": 0.498046875, "learning_rate": 0.00019845466049588686, "loss": 1.2815, "step": 1192 }, { "epoch": 0.05652688936271026, "grad_norm": 0.6015625, "learning_rate": 0.00019845205137189624, "loss": 1.1373, "step": 1193 }, { "epoch": 0.056574271499644634, "grad_norm": 0.51171875, "learning_rate": 0.00019844944006434397, "loss": 1.4879, "step": 1194 }, { "epoch": 0.05662165363657901, "grad_norm": 0.43359375, "learning_rate": 0.000198446826573288, "loss": 0.7608, "step": 1195 }, { "epoch": 0.056669035773513385, "grad_norm": 1.078125, "learning_rate": 0.00019844421089878627, "loss": 0.2131, "step": 1196 }, { "epoch": 0.056716417910447764, "grad_norm": 0.671875, "learning_rate": 0.0001984415930408968, "loss": 1.0517, "step": 1197 }, { "epoch": 0.056763800047382136, "grad_norm": 0.60546875, "learning_rate": 0.00019843897299967765, "loss": 0.9982, "step": 1198 }, { "epoch": 0.056811182184316515, "grad_norm": 0.52734375, "learning_rate": 0.00019843635077518691, "loss": 1.1964, "step": 1199 }, { "epoch": 0.05685856432125089, "grad_norm": 0.65625, "learning_rate": 0.00019843372636748282, "loss": 1.0349, "step": 1200 }, { "epoch": 0.056905946458185266, "grad_norm": 0.490234375, "learning_rate": 0.00019843109977662348, "loss": 0.8252, "step": 1201 }, { "epoch": 0.05695332859511964, "grad_norm": 0.5234375, "learning_rate": 0.00019842847100266718, "loss": 0.7719, "step": 1202 }, { "epoch": 0.05700071073205402, "grad_norm": 0.4609375, "learning_rate": 0.00019842584004567225, "loss": 1.2267, "step": 1203 }, { "epoch": 0.05704809286898839, "grad_norm": 0.53125, "learning_rate": 0.00019842320690569704, "loss": 1.3288, "step": 1204 }, { "epoch": 0.05709547500592277, "grad_norm": 0.52734375, "learning_rate": 0.00019842057158279993, "loss": 1.3921, "step": 1205 }, { "epoch": 0.05714285714285714, "grad_norm": 0.62890625, "learning_rate": 0.0001984179340770394, "loss": 0.2586, "step": 1206 }, { "epoch": 0.05719023927979152, "grad_norm": 0.8671875, "learning_rate": 0.00019841529438847387, "loss": 0.8311, "step": 1207 }, { "epoch": 0.05723762141672589, "grad_norm": 0.443359375, "learning_rate": 0.00019841265251716196, "loss": 0.0966, "step": 1208 }, { "epoch": 0.05728500355366027, "grad_norm": 0.44921875, "learning_rate": 0.00019841000846316224, "loss": 1.0222, "step": 1209 }, { "epoch": 0.057332385690594644, "grad_norm": 0.67578125, "learning_rate": 0.00019840736222653335, "loss": 0.2326, "step": 1210 }, { "epoch": 0.05737976782752902, "grad_norm": 0.46484375, "learning_rate": 0.00019840471380733402, "loss": 1.1139, "step": 1211 }, { "epoch": 0.057427149964463395, "grad_norm": 0.419921875, "learning_rate": 0.00019840206320562295, "loss": 0.9745, "step": 1212 }, { "epoch": 0.057474532101397774, "grad_norm": 0.4453125, "learning_rate": 0.00019839941042145887, "loss": 0.6654, "step": 1213 }, { "epoch": 0.057521914238332146, "grad_norm": 0.6015625, "learning_rate": 0.00019839675545490072, "loss": 0.2254, "step": 1214 }, { "epoch": 0.057569296375266525, "grad_norm": 0.482421875, "learning_rate": 0.00019839409830600733, "loss": 0.9748, "step": 1215 }, { "epoch": 0.0576166785122009, "grad_norm": 0.34765625, "learning_rate": 0.00019839143897483767, "loss": 0.2524, "step": 1216 }, { "epoch": 0.057664060649135276, "grad_norm": 0.330078125, "learning_rate": 0.00019838877746145066, "loss": 0.1985, "step": 1217 }, { "epoch": 0.05771144278606965, "grad_norm": 0.39453125, "learning_rate": 0.00019838611376590538, "loss": 0.1742, "step": 1218 }, { "epoch": 0.05775882492300403, "grad_norm": 0.486328125, "learning_rate": 0.0001983834478882609, "loss": 1.2711, "step": 1219 }, { "epoch": 0.0578062070599384, "grad_norm": 0.515625, "learning_rate": 0.00019838077982857634, "loss": 0.7279, "step": 1220 }, { "epoch": 0.05785358919687278, "grad_norm": 0.447265625, "learning_rate": 0.00019837810958691087, "loss": 0.6179, "step": 1221 }, { "epoch": 0.05790097133380716, "grad_norm": 0.546875, "learning_rate": 0.0001983754371633237, "loss": 0.9773, "step": 1222 }, { "epoch": 0.05794835347074153, "grad_norm": 0.404296875, "learning_rate": 0.00019837276255787415, "loss": 0.3931, "step": 1223 }, { "epoch": 0.05799573560767591, "grad_norm": 0.515625, "learning_rate": 0.0001983700857706215, "loss": 1.3234, "step": 1224 }, { "epoch": 0.05804311774461028, "grad_norm": 0.39453125, "learning_rate": 0.00019836740680162512, "loss": 0.8854, "step": 1225 }, { "epoch": 0.05809049988154466, "grad_norm": 0.53515625, "learning_rate": 0.00019836472565094442, "loss": 0.9522, "step": 1226 }, { "epoch": 0.05813788201847903, "grad_norm": 0.44921875, "learning_rate": 0.00019836204231863888, "loss": 1.0914, "step": 1227 }, { "epoch": 0.05818526415541341, "grad_norm": 0.7890625, "learning_rate": 0.00019835935680476803, "loss": 1.223, "step": 1228 }, { "epoch": 0.058232646292347784, "grad_norm": 0.439453125, "learning_rate": 0.00019835666910939138, "loss": 1.3825, "step": 1229 }, { "epoch": 0.05828002842928216, "grad_norm": 0.5625, "learning_rate": 0.0001983539792325686, "loss": 1.5757, "step": 1230 }, { "epoch": 0.058327410566216535, "grad_norm": 0.54296875, "learning_rate": 0.00019835128717435934, "loss": 0.8738, "step": 1231 }, { "epoch": 0.058374792703150914, "grad_norm": 0.423828125, "learning_rate": 0.00019834859293482328, "loss": 0.8583, "step": 1232 }, { "epoch": 0.058422174840085286, "grad_norm": 0.453125, "learning_rate": 0.0001983458965140202, "loss": 0.2097, "step": 1233 }, { "epoch": 0.058469556977019665, "grad_norm": 0.46875, "learning_rate": 0.00019834319791200983, "loss": 0.9326, "step": 1234 }, { "epoch": 0.05851693911395404, "grad_norm": 0.45703125, "learning_rate": 0.0001983404971288521, "loss": 1.0178, "step": 1235 }, { "epoch": 0.05856432125088842, "grad_norm": 0.55078125, "learning_rate": 0.00019833779416460692, "loss": 1.1593, "step": 1236 }, { "epoch": 0.05861170338782279, "grad_norm": 0.50390625, "learning_rate": 0.0001983350890193342, "loss": 1.1695, "step": 1237 }, { "epoch": 0.05865908552475717, "grad_norm": 1.203125, "learning_rate": 0.00019833238169309395, "loss": 0.4912, "step": 1238 }, { "epoch": 0.05870646766169154, "grad_norm": 0.64453125, "learning_rate": 0.0001983296721859462, "loss": 0.3485, "step": 1239 }, { "epoch": 0.05875384979862592, "grad_norm": 0.5390625, "learning_rate": 0.00019832696049795108, "loss": 0.9958, "step": 1240 }, { "epoch": 0.05880123193556029, "grad_norm": 0.49609375, "learning_rate": 0.0001983242466291687, "loss": 1.262, "step": 1241 }, { "epoch": 0.05884861407249467, "grad_norm": 0.7265625, "learning_rate": 0.00019832153057965926, "loss": 0.1754, "step": 1242 }, { "epoch": 0.05889599620942904, "grad_norm": 0.72265625, "learning_rate": 0.00019831881234948296, "loss": 0.0748, "step": 1243 }, { "epoch": 0.05894337834636342, "grad_norm": 0.353515625, "learning_rate": 0.00019831609193870015, "loss": 0.2001, "step": 1244 }, { "epoch": 0.058990760483297794, "grad_norm": 0.474609375, "learning_rate": 0.00019831336934737117, "loss": 1.4368, "step": 1245 }, { "epoch": 0.05903814262023217, "grad_norm": 1.2265625, "learning_rate": 0.00019831064457555636, "loss": 0.8099, "step": 1246 }, { "epoch": 0.059085524757166545, "grad_norm": 0.53125, "learning_rate": 0.00019830791762331617, "loss": 1.5033, "step": 1247 }, { "epoch": 0.059132906894100924, "grad_norm": 0.57421875, "learning_rate": 0.0001983051884907111, "loss": 1.082, "step": 1248 }, { "epoch": 0.059180289031035296, "grad_norm": 0.357421875, "learning_rate": 0.0001983024571778016, "loss": 0.442, "step": 1249 }, { "epoch": 0.059227671167969675, "grad_norm": 0.4453125, "learning_rate": 0.00019829972368464835, "loss": 0.855, "step": 1250 }, { "epoch": 0.059275053304904055, "grad_norm": 0.03271484375, "learning_rate": 0.00019829698801131194, "loss": 0.0025, "step": 1251 }, { "epoch": 0.05932243544183843, "grad_norm": 0.69921875, "learning_rate": 0.00019829425015785305, "loss": 1.1192, "step": 1252 }, { "epoch": 0.059369817578772806, "grad_norm": 0.3203125, "learning_rate": 0.00019829151012433233, "loss": 0.1857, "step": 1253 }, { "epoch": 0.05941719971570718, "grad_norm": 0.419921875, "learning_rate": 0.00019828876791081065, "loss": 0.0894, "step": 1254 }, { "epoch": 0.05946458185264156, "grad_norm": 0.41796875, "learning_rate": 0.00019828602351734879, "loss": 1.186, "step": 1255 }, { "epoch": 0.05951196398957593, "grad_norm": 0.435546875, "learning_rate": 0.0001982832769440076, "loss": 0.7192, "step": 1256 }, { "epoch": 0.05955934612651031, "grad_norm": 0.51953125, "learning_rate": 0.00019828052819084803, "loss": 1.0787, "step": 1257 }, { "epoch": 0.05960672826344468, "grad_norm": 0.451171875, "learning_rate": 0.00019827777725793104, "loss": 1.3895, "step": 1258 }, { "epoch": 0.05965411040037906, "grad_norm": 0.43359375, "learning_rate": 0.0001982750241453176, "loss": 0.7737, "step": 1259 }, { "epoch": 0.05970149253731343, "grad_norm": 0.44140625, "learning_rate": 0.0001982722688530688, "loss": 0.8477, "step": 1260 }, { "epoch": 0.05974887467424781, "grad_norm": 0.380859375, "learning_rate": 0.00019826951138124578, "loss": 0.0817, "step": 1261 }, { "epoch": 0.05979625681118218, "grad_norm": 0.62109375, "learning_rate": 0.00019826675172990966, "loss": 1.0333, "step": 1262 }, { "epoch": 0.05984363894811656, "grad_norm": 0.474609375, "learning_rate": 0.00019826398989912167, "loss": 0.9203, "step": 1263 }, { "epoch": 0.059891021085050934, "grad_norm": 0.6171875, "learning_rate": 0.00019826122588894305, "loss": 0.1183, "step": 1264 }, { "epoch": 0.05993840322198531, "grad_norm": 0.49609375, "learning_rate": 0.00019825845969943505, "loss": 1.0712, "step": 1265 }, { "epoch": 0.059985785358919685, "grad_norm": 0.53515625, "learning_rate": 0.00019825569133065913, "loss": 0.9647, "step": 1266 }, { "epoch": 0.060033167495854065, "grad_norm": 0.326171875, "learning_rate": 0.0001982529207826766, "loss": 0.3805, "step": 1267 }, { "epoch": 0.06008054963278844, "grad_norm": 0.26953125, "learning_rate": 0.000198250148055549, "loss": 0.5593, "step": 1268 }, { "epoch": 0.060127931769722816, "grad_norm": 0.474609375, "learning_rate": 0.0001982473731493377, "loss": 0.9391, "step": 1269 }, { "epoch": 0.06017531390665719, "grad_norm": 0.48828125, "learning_rate": 0.00019824459606410436, "loss": 0.6489, "step": 1270 }, { "epoch": 0.06022269604359157, "grad_norm": 0.447265625, "learning_rate": 0.0001982418167999105, "loss": 0.7921, "step": 1271 }, { "epoch": 0.06027007818052594, "grad_norm": 0.6015625, "learning_rate": 0.00019823903535681777, "loss": 0.7933, "step": 1272 }, { "epoch": 0.06031746031746032, "grad_norm": 0.69140625, "learning_rate": 0.0001982362517348879, "loss": 0.8601, "step": 1273 }, { "epoch": 0.06036484245439469, "grad_norm": 0.55078125, "learning_rate": 0.0001982334659341826, "loss": 0.7573, "step": 1274 }, { "epoch": 0.06041222459132907, "grad_norm": 0.58203125, "learning_rate": 0.00019823067795476365, "loss": 0.2582, "step": 1275 }, { "epoch": 0.06045960672826344, "grad_norm": 0.42578125, "learning_rate": 0.0001982278877966929, "loss": 0.8929, "step": 1276 }, { "epoch": 0.06050698886519782, "grad_norm": 0.427734375, "learning_rate": 0.0001982250954600322, "loss": 0.9258, "step": 1277 }, { "epoch": 0.06055437100213219, "grad_norm": 0.443359375, "learning_rate": 0.00019822230094484355, "loss": 1.0504, "step": 1278 }, { "epoch": 0.06060175313906657, "grad_norm": 0.5234375, "learning_rate": 0.00019821950425118887, "loss": 0.8885, "step": 1279 }, { "epoch": 0.06064913527600095, "grad_norm": 0.478515625, "learning_rate": 0.00019821670537913022, "loss": 0.1725, "step": 1280 }, { "epoch": 0.06069651741293532, "grad_norm": 0.408203125, "learning_rate": 0.0001982139043287296, "loss": 0.65, "step": 1281 }, { "epoch": 0.0607438995498697, "grad_norm": 0.64453125, "learning_rate": 0.0001982111011000493, "loss": 0.0851, "step": 1282 }, { "epoch": 0.060791281686804075, "grad_norm": 0.52734375, "learning_rate": 0.0001982082956931513, "loss": 1.096, "step": 1283 }, { "epoch": 0.060838663823738454, "grad_norm": 0.302734375, "learning_rate": 0.0001982054881080979, "loss": 0.0401, "step": 1284 }, { "epoch": 0.060886045960672826, "grad_norm": 0.515625, "learning_rate": 0.00019820267834495144, "loss": 1.1779, "step": 1285 }, { "epoch": 0.060933428097607205, "grad_norm": 0.5859375, "learning_rate": 0.00019819986640377414, "loss": 1.2213, "step": 1286 }, { "epoch": 0.06098081023454158, "grad_norm": 0.53515625, "learning_rate": 0.00019819705228462842, "loss": 1.2332, "step": 1287 }, { "epoch": 0.061028192371475956, "grad_norm": 0.515625, "learning_rate": 0.00019819423598757668, "loss": 1.1858, "step": 1288 }, { "epoch": 0.06107557450841033, "grad_norm": 0.60546875, "learning_rate": 0.00019819141751268134, "loss": 0.0969, "step": 1289 }, { "epoch": 0.06112295664534471, "grad_norm": 0.453125, "learning_rate": 0.00019818859686000496, "loss": 1.1394, "step": 1290 }, { "epoch": 0.06117033878227908, "grad_norm": 0.51953125, "learning_rate": 0.00019818577402961013, "loss": 1.091, "step": 1291 }, { "epoch": 0.06121772091921346, "grad_norm": 0.81640625, "learning_rate": 0.00019818294902155937, "loss": 0.5775, "step": 1292 }, { "epoch": 0.06126510305614783, "grad_norm": 0.5859375, "learning_rate": 0.00019818012183591538, "loss": 0.8449, "step": 1293 }, { "epoch": 0.06131248519308221, "grad_norm": 0.61328125, "learning_rate": 0.0001981772924727409, "loss": 1.2039, "step": 1294 }, { "epoch": 0.06135986733001658, "grad_norm": 0.515625, "learning_rate": 0.0001981744609320986, "loss": 0.0453, "step": 1295 }, { "epoch": 0.06140724946695096, "grad_norm": 0.671875, "learning_rate": 0.00019817162721405134, "loss": 0.814, "step": 1296 }, { "epoch": 0.06145463160388533, "grad_norm": 0.52734375, "learning_rate": 0.00019816879131866197, "loss": 0.6862, "step": 1297 }, { "epoch": 0.06150201374081971, "grad_norm": 0.4765625, "learning_rate": 0.00019816595324599335, "loss": 0.1128, "step": 1298 }, { "epoch": 0.061549395877754084, "grad_norm": 0.21875, "learning_rate": 0.0001981631129961085, "loss": 0.0284, "step": 1299 }, { "epoch": 0.061596778014688464, "grad_norm": 0.61328125, "learning_rate": 0.00019816027056907034, "loss": 1.3758, "step": 1300 }, { "epoch": 0.061644160151622836, "grad_norm": 0.8515625, "learning_rate": 0.00019815742596494192, "loss": 0.558, "step": 1301 }, { "epoch": 0.061691542288557215, "grad_norm": 0.53125, "learning_rate": 0.00019815457918378635, "loss": 0.3337, "step": 1302 }, { "epoch": 0.06173892442549159, "grad_norm": 0.63671875, "learning_rate": 0.00019815173022566675, "loss": 1.1537, "step": 1303 }, { "epoch": 0.061786306562425966, "grad_norm": 0.52734375, "learning_rate": 0.00019814887909064632, "loss": 1.2278, "step": 1304 }, { "epoch": 0.06183368869936034, "grad_norm": 0.5078125, "learning_rate": 0.00019814602577878835, "loss": 1.1332, "step": 1305 }, { "epoch": 0.06188107083629472, "grad_norm": 0.546875, "learning_rate": 0.000198143170290156, "loss": 1.1716, "step": 1306 }, { "epoch": 0.06192845297322909, "grad_norm": 0.515625, "learning_rate": 0.0001981403126248127, "loss": 0.3662, "step": 1307 }, { "epoch": 0.06197583511016347, "grad_norm": 0.4609375, "learning_rate": 0.0001981374527828218, "loss": 1.4196, "step": 1308 }, { "epoch": 0.06202321724709784, "grad_norm": 1.546875, "learning_rate": 0.00019813459076424672, "loss": 0.5595, "step": 1309 }, { "epoch": 0.06207059938403222, "grad_norm": 0.4609375, "learning_rate": 0.00019813172656915094, "loss": 0.2389, "step": 1310 }, { "epoch": 0.0621179815209666, "grad_norm": 0.45703125, "learning_rate": 0.000198128860197598, "loss": 0.6669, "step": 1311 }, { "epoch": 0.06216536365790097, "grad_norm": 0.86328125, "learning_rate": 0.00019812599164965148, "loss": 0.5671, "step": 1312 }, { "epoch": 0.06221274579483535, "grad_norm": 0.416015625, "learning_rate": 0.00019812312092537497, "loss": 1.0529, "step": 1313 }, { "epoch": 0.06226012793176972, "grad_norm": 0.470703125, "learning_rate": 0.00019812024802483212, "loss": 0.9078, "step": 1314 }, { "epoch": 0.0623075100687041, "grad_norm": 0.55859375, "learning_rate": 0.00019811737294808673, "loss": 1.1995, "step": 1315 }, { "epoch": 0.062354892205638474, "grad_norm": 0.50390625, "learning_rate": 0.00019811449569520252, "loss": 1.0019, "step": 1316 }, { "epoch": 0.06240227434257285, "grad_norm": 0.486328125, "learning_rate": 0.00019811161626624328, "loss": 0.8394, "step": 1317 }, { "epoch": 0.062449656479507225, "grad_norm": 0.58203125, "learning_rate": 0.00019810873466127288, "loss": 1.4806, "step": 1318 }, { "epoch": 0.062497038616441604, "grad_norm": 0.5703125, "learning_rate": 0.00019810585088035526, "loss": 0.7276, "step": 1319 }, { "epoch": 0.06254442075337598, "grad_norm": 0.5078125, "learning_rate": 0.0001981029649235544, "loss": 0.8709, "step": 1320 }, { "epoch": 0.06259180289031036, "grad_norm": 0.412109375, "learning_rate": 0.00019810007679093422, "loss": 0.0994, "step": 1321 }, { "epoch": 0.06263918502724473, "grad_norm": 0.57421875, "learning_rate": 0.00019809718648255888, "loss": 1.3156, "step": 1322 }, { "epoch": 0.0626865671641791, "grad_norm": 0.4765625, "learning_rate": 0.00019809429399849238, "loss": 0.8856, "step": 1323 }, { "epoch": 0.06273394930111348, "grad_norm": 0.486328125, "learning_rate": 0.00019809139933879897, "loss": 1.0598, "step": 1324 }, { "epoch": 0.06278133143804786, "grad_norm": 0.59375, "learning_rate": 0.00019808850250354278, "loss": 1.2765, "step": 1325 }, { "epoch": 0.06282871357498224, "grad_norm": 0.53125, "learning_rate": 0.00019808560349278808, "loss": 1.4646, "step": 1326 }, { "epoch": 0.0628760957119166, "grad_norm": 0.470703125, "learning_rate": 0.0001980827023065992, "loss": 1.0001, "step": 1327 }, { "epoch": 0.06292347784885098, "grad_norm": 0.275390625, "learning_rate": 0.00019807979894504043, "loss": 0.1755, "step": 1328 }, { "epoch": 0.06297085998578536, "grad_norm": 0.5, "learning_rate": 0.00019807689340817618, "loss": 1.2925, "step": 1329 }, { "epoch": 0.06301824212271974, "grad_norm": 0.65234375, "learning_rate": 0.0001980739856960709, "loss": 0.8089, "step": 1330 }, { "epoch": 0.0630656242596541, "grad_norm": 0.609375, "learning_rate": 0.0001980710758087891, "loss": 1.3729, "step": 1331 }, { "epoch": 0.06311300639658848, "grad_norm": 0.35546875, "learning_rate": 0.0001980681637463953, "loss": 0.2018, "step": 1332 }, { "epoch": 0.06316038853352286, "grad_norm": 0.84765625, "learning_rate": 0.00019806524950895406, "loss": 0.626, "step": 1333 }, { "epoch": 0.06320777067045724, "grad_norm": 0.50390625, "learning_rate": 0.00019806233309653008, "loss": 1.0182, "step": 1334 }, { "epoch": 0.06325515280739161, "grad_norm": 0.640625, "learning_rate": 0.00019805941450918798, "loss": 1.1142, "step": 1335 }, { "epoch": 0.06330253494432599, "grad_norm": 0.91015625, "learning_rate": 0.0001980564937469925, "loss": 0.4311, "step": 1336 }, { "epoch": 0.06334991708126037, "grad_norm": 0.3125, "learning_rate": 0.00019805357081000845, "loss": 0.3452, "step": 1337 }, { "epoch": 0.06339729921819474, "grad_norm": 0.451171875, "learning_rate": 0.00019805064569830067, "loss": 0.0286, "step": 1338 }, { "epoch": 0.06344468135512911, "grad_norm": 0.671875, "learning_rate": 0.000198047718411934, "loss": 1.5651, "step": 1339 }, { "epoch": 0.06349206349206349, "grad_norm": 0.478515625, "learning_rate": 0.00019804478895097335, "loss": 0.9273, "step": 1340 }, { "epoch": 0.06353944562899787, "grad_norm": 0.361328125, "learning_rate": 0.00019804185731548367, "loss": 0.4678, "step": 1341 }, { "epoch": 0.06358682776593225, "grad_norm": 0.5859375, "learning_rate": 0.0001980389235055301, "loss": 0.9688, "step": 1342 }, { "epoch": 0.06363420990286663, "grad_norm": 1.09375, "learning_rate": 0.0001980359875211776, "loss": 0.4255, "step": 1343 }, { "epoch": 0.06368159203980099, "grad_norm": 0.5546875, "learning_rate": 0.00019803304936249133, "loss": 1.298, "step": 1344 }, { "epoch": 0.06372897417673537, "grad_norm": 0.5625, "learning_rate": 0.00019803010902953643, "loss": 0.4256, "step": 1345 }, { "epoch": 0.06377635631366975, "grad_norm": 0.5078125, "learning_rate": 0.00019802716652237811, "loss": 1.1817, "step": 1346 }, { "epoch": 0.06382373845060413, "grad_norm": 0.6640625, "learning_rate": 0.0001980242218410817, "loss": 0.2303, "step": 1347 }, { "epoch": 0.0638711205875385, "grad_norm": 0.470703125, "learning_rate": 0.00019802127498571244, "loss": 0.8684, "step": 1348 }, { "epoch": 0.06391850272447287, "grad_norm": 0.5078125, "learning_rate": 0.00019801832595633568, "loss": 1.2578, "step": 1349 }, { "epoch": 0.06396588486140725, "grad_norm": 0.48046875, "learning_rate": 0.0001980153747530169, "loss": 0.9623, "step": 1350 }, { "epoch": 0.06401326699834163, "grad_norm": 0.39453125, "learning_rate": 0.00019801242137582148, "loss": 0.8354, "step": 1351 }, { "epoch": 0.064060649135276, "grad_norm": 0.52734375, "learning_rate": 0.00019800946582481497, "loss": 0.9811, "step": 1352 }, { "epoch": 0.06410803127221038, "grad_norm": 0.4765625, "learning_rate": 0.0001980065081000629, "loss": 0.7545, "step": 1353 }, { "epoch": 0.06415541340914475, "grad_norm": 0.5859375, "learning_rate": 0.00019800354820163088, "loss": 0.7627, "step": 1354 }, { "epoch": 0.06420279554607913, "grad_norm": 0.470703125, "learning_rate": 0.00019800058612958453, "loss": 0.6435, "step": 1355 }, { "epoch": 0.0642501776830135, "grad_norm": 1.421875, "learning_rate": 0.00019799762188398953, "loss": 0.9989, "step": 1356 }, { "epoch": 0.06429755981994788, "grad_norm": 0.484375, "learning_rate": 0.00019799465546491173, "loss": 0.8533, "step": 1357 }, { "epoch": 0.06434494195688226, "grad_norm": 0.306640625, "learning_rate": 0.0001979916868724168, "loss": 0.2298, "step": 1358 }, { "epoch": 0.06439232409381664, "grad_norm": 0.51171875, "learning_rate": 0.00019798871610657068, "loss": 1.1798, "step": 1359 }, { "epoch": 0.064439706230751, "grad_norm": 0.408203125, "learning_rate": 0.00019798574316743915, "loss": 0.8694, "step": 1360 }, { "epoch": 0.06448708836768538, "grad_norm": 0.4609375, "learning_rate": 0.00019798276805508826, "loss": 1.2998, "step": 1361 }, { "epoch": 0.06453447050461976, "grad_norm": 0.609375, "learning_rate": 0.0001979797907695839, "loss": 0.8954, "step": 1362 }, { "epoch": 0.06458185264155414, "grad_norm": 0.51171875, "learning_rate": 0.0001979768113109922, "loss": 1.4044, "step": 1363 }, { "epoch": 0.0646292347784885, "grad_norm": 0.5, "learning_rate": 0.00019797382967937912, "loss": 0.919, "step": 1364 }, { "epoch": 0.06467661691542288, "grad_norm": 0.390625, "learning_rate": 0.0001979708458748109, "loss": 0.0239, "step": 1365 }, { "epoch": 0.06472399905235726, "grad_norm": 0.53125, "learning_rate": 0.00019796785989735364, "loss": 1.1159, "step": 1366 }, { "epoch": 0.06477138118929164, "grad_norm": 0.5625, "learning_rate": 0.00019796487174707363, "loss": 0.0594, "step": 1367 }, { "epoch": 0.064818763326226, "grad_norm": 0.486328125, "learning_rate": 0.00019796188142403708, "loss": 1.0021, "step": 1368 }, { "epoch": 0.06486614546316039, "grad_norm": 0.78125, "learning_rate": 0.0001979588889283104, "loss": 0.4291, "step": 1369 }, { "epoch": 0.06491352760009476, "grad_norm": 0.478515625, "learning_rate": 0.00019795589425995985, "loss": 1.3191, "step": 1370 }, { "epoch": 0.06496090973702914, "grad_norm": 0.5390625, "learning_rate": 0.0001979528974190519, "loss": 1.1967, "step": 1371 }, { "epoch": 0.06500829187396352, "grad_norm": 0.458984375, "learning_rate": 0.00019794989840565307, "loss": 1.1394, "step": 1372 }, { "epoch": 0.06505567401089789, "grad_norm": 0.8359375, "learning_rate": 0.00019794689721982977, "loss": 0.9012, "step": 1373 }, { "epoch": 0.06510305614783227, "grad_norm": 0.453125, "learning_rate": 0.00019794389386164864, "loss": 0.1004, "step": 1374 }, { "epoch": 0.06515043828476665, "grad_norm": 0.2490234375, "learning_rate": 0.00019794088833117627, "loss": 0.4262, "step": 1375 }, { "epoch": 0.06519782042170102, "grad_norm": 0.546875, "learning_rate": 0.00019793788062847932, "loss": 1.051, "step": 1376 }, { "epoch": 0.06524520255863539, "grad_norm": 0.419921875, "learning_rate": 0.00019793487075362448, "loss": 0.4535, "step": 1377 }, { "epoch": 0.06529258469556977, "grad_norm": 0.828125, "learning_rate": 0.00019793185870667856, "loss": 0.2997, "step": 1378 }, { "epoch": 0.06533996683250415, "grad_norm": 0.458984375, "learning_rate": 0.00019792884448770827, "loss": 0.7945, "step": 1379 }, { "epoch": 0.06538734896943853, "grad_norm": 0.484375, "learning_rate": 0.00019792582809678057, "loss": 0.6099, "step": 1380 }, { "epoch": 0.06543473110637289, "grad_norm": 0.85546875, "learning_rate": 0.0001979228095339623, "loss": 0.5786, "step": 1381 }, { "epoch": 0.06548211324330727, "grad_norm": 0.23828125, "learning_rate": 0.00019791978879932042, "loss": 0.0349, "step": 1382 }, { "epoch": 0.06552949538024165, "grad_norm": 0.62890625, "learning_rate": 0.00019791676589292189, "loss": 0.3752, "step": 1383 }, { "epoch": 0.06557687751717603, "grad_norm": 0.5078125, "learning_rate": 0.0001979137408148338, "loss": 0.8369, "step": 1384 }, { "epoch": 0.0656242596541104, "grad_norm": 0.052978515625, "learning_rate": 0.00019791071356512326, "loss": 0.0036, "step": 1385 }, { "epoch": 0.06567164179104477, "grad_norm": 0.3125, "learning_rate": 0.00019790768414385736, "loss": 0.0213, "step": 1386 }, { "epoch": 0.06571902392797915, "grad_norm": 0.353515625, "learning_rate": 0.00019790465255110334, "loss": 0.1986, "step": 1387 }, { "epoch": 0.06576640606491353, "grad_norm": 0.61328125, "learning_rate": 0.00019790161878692836, "loss": 1.0587, "step": 1388 }, { "epoch": 0.0658137882018479, "grad_norm": 0.47265625, "learning_rate": 0.0001978985828513998, "loss": 1.2339, "step": 1389 }, { "epoch": 0.06586117033878228, "grad_norm": 0.66015625, "learning_rate": 0.00019789554474458493, "loss": 1.2089, "step": 1390 }, { "epoch": 0.06590855247571666, "grad_norm": 0.546875, "learning_rate": 0.00019789250446655116, "loss": 1.0333, "step": 1391 }, { "epoch": 0.06595593461265103, "grad_norm": 1.296875, "learning_rate": 0.0001978894620173659, "loss": 0.8205, "step": 1392 }, { "epoch": 0.0660033167495854, "grad_norm": 1.4765625, "learning_rate": 0.00019788641739709663, "loss": 0.6403, "step": 1393 }, { "epoch": 0.06605069888651978, "grad_norm": 0.5546875, "learning_rate": 0.00019788337060581092, "loss": 0.7673, "step": 1394 }, { "epoch": 0.06609808102345416, "grad_norm": 0.392578125, "learning_rate": 0.00019788032164357627, "loss": 0.7747, "step": 1395 }, { "epoch": 0.06614546316038854, "grad_norm": 0.412109375, "learning_rate": 0.0001978772705104604, "loss": 0.916, "step": 1396 }, { "epoch": 0.0661928452973229, "grad_norm": 0.30859375, "learning_rate": 0.00019787421720653087, "loss": 0.1706, "step": 1397 }, { "epoch": 0.06624022743425728, "grad_norm": 0.5546875, "learning_rate": 0.00019787116173185546, "loss": 1.6059, "step": 1398 }, { "epoch": 0.06628760957119166, "grad_norm": 0.44921875, "learning_rate": 0.00019786810408650195, "loss": 0.4818, "step": 1399 }, { "epoch": 0.06633499170812604, "grad_norm": 0.7578125, "learning_rate": 0.00019786504427053814, "loss": 0.3894, "step": 1400 }, { "epoch": 0.06638237384506042, "grad_norm": 0.61328125, "learning_rate": 0.0001978619822840319, "loss": 0.8414, "step": 1401 }, { "epoch": 0.06642975598199478, "grad_norm": 0.546875, "learning_rate": 0.00019785891812705106, "loss": 0.6433, "step": 1402 }, { "epoch": 0.06647713811892916, "grad_norm": 0.63671875, "learning_rate": 0.00019785585179966372, "loss": 1.1861, "step": 1403 }, { "epoch": 0.06652452025586354, "grad_norm": 0.43359375, "learning_rate": 0.0001978527833019378, "loss": 0.5291, "step": 1404 }, { "epoch": 0.06657190239279792, "grad_norm": 0.51171875, "learning_rate": 0.00019784971263394136, "loss": 1.133, "step": 1405 }, { "epoch": 0.06661928452973229, "grad_norm": 0.63671875, "learning_rate": 0.00019784663979574254, "loss": 0.9909, "step": 1406 }, { "epoch": 0.06666666666666667, "grad_norm": 0.9453125, "learning_rate": 0.00019784356478740945, "loss": 0.7536, "step": 1407 }, { "epoch": 0.06671404880360104, "grad_norm": 0.15625, "learning_rate": 0.00019784048760901031, "loss": 0.0098, "step": 1408 }, { "epoch": 0.06676143094053542, "grad_norm": 0.404296875, "learning_rate": 0.0001978374082606134, "loss": 1.0359, "step": 1409 }, { "epoch": 0.06680881307746979, "grad_norm": 0.55078125, "learning_rate": 0.00019783432674228696, "loss": 0.9342, "step": 1410 }, { "epoch": 0.06685619521440417, "grad_norm": 1.2421875, "learning_rate": 0.0001978312430540994, "loss": 1.0898, "step": 1411 }, { "epoch": 0.06690357735133855, "grad_norm": 0.57421875, "learning_rate": 0.000197828157196119, "loss": 1.1065, "step": 1412 }, { "epoch": 0.06695095948827293, "grad_norm": 1.0078125, "learning_rate": 0.00019782506916841435, "loss": 0.5028, "step": 1413 }, { "epoch": 0.06699834162520729, "grad_norm": 0.578125, "learning_rate": 0.00019782197897105384, "loss": 1.3315, "step": 1414 }, { "epoch": 0.06704572376214167, "grad_norm": 0.83984375, "learning_rate": 0.00019781888660410602, "loss": 0.0758, "step": 1415 }, { "epoch": 0.06709310589907605, "grad_norm": 0.55078125, "learning_rate": 0.0001978157920676395, "loss": 1.1478, "step": 1416 }, { "epoch": 0.06714048803601043, "grad_norm": 0.451171875, "learning_rate": 0.00019781269536172288, "loss": 0.1899, "step": 1417 }, { "epoch": 0.0671878701729448, "grad_norm": 0.4609375, "learning_rate": 0.00019780959648642489, "loss": 1.3329, "step": 1418 }, { "epoch": 0.06723525230987917, "grad_norm": 0.451171875, "learning_rate": 0.00019780649544181423, "loss": 1.0074, "step": 1419 }, { "epoch": 0.06728263444681355, "grad_norm": 0.609375, "learning_rate": 0.00019780339222795964, "loss": 1.6396, "step": 1420 }, { "epoch": 0.06733001658374793, "grad_norm": 0.6953125, "learning_rate": 0.00019780028684493, "loss": 0.5259, "step": 1421 }, { "epoch": 0.0673773987206823, "grad_norm": 0.431640625, "learning_rate": 0.00019779717929279422, "loss": 1.2355, "step": 1422 }, { "epoch": 0.06742478085761668, "grad_norm": 0.54296875, "learning_rate": 0.0001977940695716211, "loss": 0.1711, "step": 1423 }, { "epoch": 0.06747216299455105, "grad_norm": 0.89453125, "learning_rate": 0.00019779095768147973, "loss": 0.0939, "step": 1424 }, { "epoch": 0.06751954513148543, "grad_norm": 0.33203125, "learning_rate": 0.0001977878436224391, "loss": 0.0777, "step": 1425 }, { "epoch": 0.0675669272684198, "grad_norm": 0.7890625, "learning_rate": 0.0001977847273945682, "loss": 0.2649, "step": 1426 }, { "epoch": 0.06761430940535418, "grad_norm": 0.6875, "learning_rate": 0.00019778160899793624, "loss": 1.2112, "step": 1427 }, { "epoch": 0.06766169154228856, "grad_norm": 0.5859375, "learning_rate": 0.00019777848843261232, "loss": 1.1193, "step": 1428 }, { "epoch": 0.06770907367922294, "grad_norm": 0.216796875, "learning_rate": 0.0001977753656986657, "loss": 0.0224, "step": 1429 }, { "epoch": 0.06775645581615732, "grad_norm": 0.515625, "learning_rate": 0.00019777224079616562, "loss": 1.253, "step": 1430 }, { "epoch": 0.06780383795309168, "grad_norm": 0.81640625, "learning_rate": 0.00019776911372518135, "loss": 0.5958, "step": 1431 }, { "epoch": 0.06785122009002606, "grad_norm": 1.375, "learning_rate": 0.00019776598448578229, "loss": 1.0039, "step": 1432 }, { "epoch": 0.06789860222696044, "grad_norm": 0.46875, "learning_rate": 0.00019776285307803782, "loss": 1.2828, "step": 1433 }, { "epoch": 0.06794598436389482, "grad_norm": 0.494140625, "learning_rate": 0.00019775971950201742, "loss": 1.4507, "step": 1434 }, { "epoch": 0.06799336650082918, "grad_norm": 0.443359375, "learning_rate": 0.00019775658375779056, "loss": 0.9144, "step": 1435 }, { "epoch": 0.06804074863776356, "grad_norm": 0.490234375, "learning_rate": 0.0001977534458454268, "loss": 1.1339, "step": 1436 }, { "epoch": 0.06808813077469794, "grad_norm": 0.64453125, "learning_rate": 0.00019775030576499574, "loss": 0.8736, "step": 1437 }, { "epoch": 0.06813551291163232, "grad_norm": 0.040283203125, "learning_rate": 0.000197747163516567, "loss": 0.0034, "step": 1438 }, { "epoch": 0.06818289504856669, "grad_norm": 0.47265625, "learning_rate": 0.00019774401910021031, "loss": 0.9527, "step": 1439 }, { "epoch": 0.06823027718550106, "grad_norm": 0.36328125, "learning_rate": 0.00019774087251599537, "loss": 0.6436, "step": 1440 }, { "epoch": 0.06827765932243544, "grad_norm": 0.310546875, "learning_rate": 0.000197737723763992, "loss": 0.2099, "step": 1441 }, { "epoch": 0.06832504145936982, "grad_norm": 0.5078125, "learning_rate": 0.00019773457284427, "loss": 1.2293, "step": 1442 }, { "epoch": 0.06837242359630419, "grad_norm": 0.5, "learning_rate": 0.0001977314197568993, "loss": 0.7126, "step": 1443 }, { "epoch": 0.06841980573323857, "grad_norm": 0.515625, "learning_rate": 0.00019772826450194982, "loss": 0.0341, "step": 1444 }, { "epoch": 0.06846718787017295, "grad_norm": 0.45703125, "learning_rate": 0.00019772510707949154, "loss": 1.6576, "step": 1445 }, { "epoch": 0.06851457000710733, "grad_norm": 0.58984375, "learning_rate": 0.00019772194748959442, "loss": 1.482, "step": 1446 }, { "epoch": 0.06856195214404169, "grad_norm": 0.478515625, "learning_rate": 0.0001977187857323286, "loss": 0.8749, "step": 1447 }, { "epoch": 0.06860933428097607, "grad_norm": 0.625, "learning_rate": 0.00019771562180776424, "loss": 0.2753, "step": 1448 }, { "epoch": 0.06865671641791045, "grad_norm": 0.5625, "learning_rate": 0.00019771245571597142, "loss": 0.1535, "step": 1449 }, { "epoch": 0.06870409855484483, "grad_norm": 0.828125, "learning_rate": 0.00019770928745702047, "loss": 0.5296, "step": 1450 }, { "epoch": 0.0687514806917792, "grad_norm": 0.30078125, "learning_rate": 0.00019770611703098155, "loss": 0.1664, "step": 1451 }, { "epoch": 0.06879886282871357, "grad_norm": 0.51953125, "learning_rate": 0.00019770294443792507, "loss": 1.132, "step": 1452 }, { "epoch": 0.06884624496564795, "grad_norm": 0.09033203125, "learning_rate": 0.00019769976967792132, "loss": 0.0082, "step": 1453 }, { "epoch": 0.06889362710258233, "grad_norm": 0.498046875, "learning_rate": 0.00019769659275104074, "loss": 1.4106, "step": 1454 }, { "epoch": 0.0689410092395167, "grad_norm": 1.0234375, "learning_rate": 0.00019769341365735382, "loss": 0.3698, "step": 1455 }, { "epoch": 0.06898839137645107, "grad_norm": 0.421875, "learning_rate": 0.00019769023239693103, "loss": 0.9214, "step": 1456 }, { "epoch": 0.06903577351338545, "grad_norm": 0.44921875, "learning_rate": 0.00019768704896984293, "loss": 0.7105, "step": 1457 }, { "epoch": 0.06908315565031983, "grad_norm": 0.63671875, "learning_rate": 0.00019768386337616013, "loss": 1.3286, "step": 1458 }, { "epoch": 0.06913053778725421, "grad_norm": 0.451171875, "learning_rate": 0.0001976806756159533, "loss": 1.117, "step": 1459 }, { "epoch": 0.06917791992418858, "grad_norm": 0.68359375, "learning_rate": 0.00019767748568929317, "loss": 0.8185, "step": 1460 }, { "epoch": 0.06922530206112296, "grad_norm": 0.494140625, "learning_rate": 0.00019767429359625037, "loss": 1.0807, "step": 1461 }, { "epoch": 0.06927268419805734, "grad_norm": 0.60546875, "learning_rate": 0.00019767109933689584, "loss": 0.8929, "step": 1462 }, { "epoch": 0.06932006633499171, "grad_norm": 0.482421875, "learning_rate": 0.00019766790291130036, "loss": 0.708, "step": 1463 }, { "epoch": 0.06936744847192608, "grad_norm": 0.04248046875, "learning_rate": 0.00019766470431953478, "loss": 0.0032, "step": 1464 }, { "epoch": 0.06941483060886046, "grad_norm": 0.388671875, "learning_rate": 0.0001976615035616701, "loss": 0.58, "step": 1465 }, { "epoch": 0.06946221274579484, "grad_norm": 0.326171875, "learning_rate": 0.00019765830063777734, "loss": 0.0398, "step": 1466 }, { "epoch": 0.06950959488272922, "grad_norm": 0.50390625, "learning_rate": 0.00019765509554792746, "loss": 1.1605, "step": 1467 }, { "epoch": 0.06955697701966358, "grad_norm": 0.5859375, "learning_rate": 0.00019765188829219156, "loss": 1.0195, "step": 1468 }, { "epoch": 0.06960435915659796, "grad_norm": 0.54296875, "learning_rate": 0.0001976486788706408, "loss": 1.1521, "step": 1469 }, { "epoch": 0.06965174129353234, "grad_norm": 0.482421875, "learning_rate": 0.00019764546728334636, "loss": 0.8, "step": 1470 }, { "epoch": 0.06969912343046672, "grad_norm": 0.61328125, "learning_rate": 0.00019764225353037946, "loss": 1.2107, "step": 1471 }, { "epoch": 0.06974650556740108, "grad_norm": 0.59765625, "learning_rate": 0.0001976390376118114, "loss": 1.4391, "step": 1472 }, { "epoch": 0.06979388770433546, "grad_norm": 1.234375, "learning_rate": 0.00019763581952771347, "loss": 0.6741, "step": 1473 }, { "epoch": 0.06984126984126984, "grad_norm": 0.431640625, "learning_rate": 0.00019763259927815704, "loss": 0.7293, "step": 1474 }, { "epoch": 0.06988865197820422, "grad_norm": 0.49609375, "learning_rate": 0.0001976293768632136, "loss": 0.9597, "step": 1475 }, { "epoch": 0.06993603411513859, "grad_norm": 0.435546875, "learning_rate": 0.0001976261522829545, "loss": 0.9556, "step": 1476 }, { "epoch": 0.06998341625207297, "grad_norm": 0.6015625, "learning_rate": 0.00019762292553745142, "loss": 1.0678, "step": 1477 }, { "epoch": 0.07003079838900735, "grad_norm": 0.5234375, "learning_rate": 0.00019761969662677578, "loss": 1.1374, "step": 1478 }, { "epoch": 0.07007818052594172, "grad_norm": 0.55859375, "learning_rate": 0.00019761646555099924, "loss": 1.0522, "step": 1479 }, { "epoch": 0.07012556266287609, "grad_norm": 0.486328125, "learning_rate": 0.00019761323231019348, "loss": 1.129, "step": 1480 }, { "epoch": 0.07017294479981047, "grad_norm": 0.330078125, "learning_rate": 0.00019760999690443022, "loss": 0.1712, "step": 1481 }, { "epoch": 0.07022032693674485, "grad_norm": 0.46484375, "learning_rate": 0.0001976067593337812, "loss": 1.0533, "step": 1482 }, { "epoch": 0.07026770907367923, "grad_norm": 0.455078125, "learning_rate": 0.00019760351959831817, "loss": 1.1679, "step": 1483 }, { "epoch": 0.07031509121061359, "grad_norm": 0.5703125, "learning_rate": 0.0001976002776981131, "loss": 0.9336, "step": 1484 }, { "epoch": 0.07036247334754797, "grad_norm": 0.369140625, "learning_rate": 0.0001975970336332378, "loss": 0.0366, "step": 1485 }, { "epoch": 0.07040985548448235, "grad_norm": 0.5625, "learning_rate": 0.00019759378740376426, "loss": 0.9989, "step": 1486 }, { "epoch": 0.07045723762141673, "grad_norm": 0.91796875, "learning_rate": 0.00019759053900976446, "loss": 0.8363, "step": 1487 }, { "epoch": 0.0705046197583511, "grad_norm": 1.2109375, "learning_rate": 0.00019758728845131044, "loss": 0.4774, "step": 1488 }, { "epoch": 0.07055200189528547, "grad_norm": 1.171875, "learning_rate": 0.00019758403572847435, "loss": 0.1674, "step": 1489 }, { "epoch": 0.07059938403221985, "grad_norm": 0.609375, "learning_rate": 0.00019758078084132827, "loss": 1.1625, "step": 1490 }, { "epoch": 0.07064676616915423, "grad_norm": 0.47265625, "learning_rate": 0.00019757752378994438, "loss": 1.334, "step": 1491 }, { "epoch": 0.07069414830608861, "grad_norm": 0.53515625, "learning_rate": 0.00019757426457439497, "loss": 1.1302, "step": 1492 }, { "epoch": 0.07074153044302298, "grad_norm": 0.91796875, "learning_rate": 0.00019757100319475233, "loss": 0.3671, "step": 1493 }, { "epoch": 0.07078891257995736, "grad_norm": 1.7734375, "learning_rate": 0.00019756773965108875, "loss": 0.384, "step": 1494 }, { "epoch": 0.07083629471689173, "grad_norm": 0.484375, "learning_rate": 0.0001975644739434766, "loss": 0.8549, "step": 1495 }, { "epoch": 0.07088367685382611, "grad_norm": 0.455078125, "learning_rate": 0.00019756120607198835, "loss": 0.8868, "step": 1496 }, { "epoch": 0.07093105899076048, "grad_norm": 0.48828125, "learning_rate": 0.0001975579360366965, "loss": 1.2682, "step": 1497 }, { "epoch": 0.07097844112769486, "grad_norm": 0.5546875, "learning_rate": 0.0001975546638376735, "loss": 0.968, "step": 1498 }, { "epoch": 0.07102582326462924, "grad_norm": 0.58203125, "learning_rate": 0.000197551389474992, "loss": 1.1436, "step": 1499 }, { "epoch": 0.07107320540156362, "grad_norm": 0.498046875, "learning_rate": 0.00019754811294872456, "loss": 0.1859, "step": 1500 }, { "epoch": 0.07112058753849798, "grad_norm": 0.462890625, "learning_rate": 0.0001975448342589439, "loss": 0.9387, "step": 1501 }, { "epoch": 0.07116796967543236, "grad_norm": 0.458984375, "learning_rate": 0.00019754155340572272, "loss": 0.7409, "step": 1502 }, { "epoch": 0.07121535181236674, "grad_norm": 0.478515625, "learning_rate": 0.00019753827038913375, "loss": 1.5493, "step": 1503 }, { "epoch": 0.07126273394930112, "grad_norm": 0.375, "learning_rate": 0.00019753498520924987, "loss": 0.1718, "step": 1504 }, { "epoch": 0.07131011608623548, "grad_norm": 0.64453125, "learning_rate": 0.0001975316978661439, "loss": 1.1719, "step": 1505 }, { "epoch": 0.07135749822316986, "grad_norm": 0.6953125, "learning_rate": 0.00019752840835988872, "loss": 1.3937, "step": 1506 }, { "epoch": 0.07140488036010424, "grad_norm": 0.53515625, "learning_rate": 0.00019752511669055738, "loss": 1.2344, "step": 1507 }, { "epoch": 0.07145226249703862, "grad_norm": 0.474609375, "learning_rate": 0.0001975218228582228, "loss": 0.1861, "step": 1508 }, { "epoch": 0.07149964463397299, "grad_norm": 0.63671875, "learning_rate": 0.00019751852686295806, "loss": 1.4951, "step": 1509 }, { "epoch": 0.07154702677090737, "grad_norm": 0.384765625, "learning_rate": 0.0001975152287048363, "loss": 0.6003, "step": 1510 }, { "epoch": 0.07159440890784174, "grad_norm": 0.54296875, "learning_rate": 0.00019751192838393062, "loss": 1.3992, "step": 1511 }, { "epoch": 0.07164179104477612, "grad_norm": 0.80859375, "learning_rate": 0.00019750862590031424, "loss": 0.9387, "step": 1512 }, { "epoch": 0.07168917318171049, "grad_norm": 0.462890625, "learning_rate": 0.00019750532125406035, "loss": 1.4259, "step": 1513 }, { "epoch": 0.07173655531864487, "grad_norm": 0.49609375, "learning_rate": 0.00019750201444524235, "loss": 1.1457, "step": 1514 }, { "epoch": 0.07178393745557925, "grad_norm": 0.06591796875, "learning_rate": 0.0001974987054739335, "loss": 0.0032, "step": 1515 }, { "epoch": 0.07183131959251363, "grad_norm": 0.470703125, "learning_rate": 0.0001974953943402072, "loss": 0.745, "step": 1516 }, { "epoch": 0.07187870172944799, "grad_norm": 0.515625, "learning_rate": 0.00019749208104413694, "loss": 1.2319, "step": 1517 }, { "epoch": 0.07192608386638237, "grad_norm": 0.6796875, "learning_rate": 0.00019748876558579612, "loss": 1.3171, "step": 1518 }, { "epoch": 0.07197346600331675, "grad_norm": 0.490234375, "learning_rate": 0.00019748544796525835, "loss": 0.0755, "step": 1519 }, { "epoch": 0.07202084814025113, "grad_norm": 0.6171875, "learning_rate": 0.0001974821281825972, "loss": 1.3535, "step": 1520 }, { "epoch": 0.07206823027718551, "grad_norm": 0.435546875, "learning_rate": 0.00019747880623788625, "loss": 0.876, "step": 1521 }, { "epoch": 0.07211561241411987, "grad_norm": 0.6484375, "learning_rate": 0.00019747548213119918, "loss": 0.7843, "step": 1522 }, { "epoch": 0.07216299455105425, "grad_norm": 0.52734375, "learning_rate": 0.00019747215586260982, "loss": 1.4776, "step": 1523 }, { "epoch": 0.07221037668798863, "grad_norm": 0.4765625, "learning_rate": 0.0001974688274321918, "loss": 0.6193, "step": 1524 }, { "epoch": 0.07225775882492301, "grad_norm": 0.5625, "learning_rate": 0.00019746549684001902, "loss": 0.9984, "step": 1525 }, { "epoch": 0.07230514096185738, "grad_norm": 0.494140625, "learning_rate": 0.00019746216408616536, "loss": 1.0317, "step": 1526 }, { "epoch": 0.07235252309879175, "grad_norm": 0.5546875, "learning_rate": 0.0001974588291707047, "loss": 0.0613, "step": 1527 }, { "epoch": 0.07239990523572613, "grad_norm": 0.408203125, "learning_rate": 0.000197455492093711, "loss": 0.7384, "step": 1528 }, { "epoch": 0.07244728737266051, "grad_norm": 0.431640625, "learning_rate": 0.0001974521528552583, "loss": 0.0511, "step": 1529 }, { "epoch": 0.07249466950959488, "grad_norm": 0.48046875, "learning_rate": 0.00019744881145542068, "loss": 0.969, "step": 1530 }, { "epoch": 0.07254205164652926, "grad_norm": 1.09375, "learning_rate": 0.0001974454678942722, "loss": 0.6165, "step": 1531 }, { "epoch": 0.07258943378346364, "grad_norm": 0.42578125, "learning_rate": 0.000197442122171887, "loss": 0.8376, "step": 1532 }, { "epoch": 0.07263681592039802, "grad_norm": 0.73046875, "learning_rate": 0.00019743877428833934, "loss": 0.1542, "step": 1533 }, { "epoch": 0.07268419805733238, "grad_norm": 0.63671875, "learning_rate": 0.00019743542424370346, "loss": 0.0372, "step": 1534 }, { "epoch": 0.07273158019426676, "grad_norm": 0.4765625, "learning_rate": 0.00019743207203805368, "loss": 1.7193, "step": 1535 }, { "epoch": 0.07277896233120114, "grad_norm": 0.80078125, "learning_rate": 0.00019742871767146428, "loss": 1.0667, "step": 1536 }, { "epoch": 0.07282634446813552, "grad_norm": 0.50390625, "learning_rate": 0.00019742536114400973, "loss": 0.1448, "step": 1537 }, { "epoch": 0.07287372660506988, "grad_norm": 0.60546875, "learning_rate": 0.00019742200245576443, "loss": 1.2105, "step": 1538 }, { "epoch": 0.07292110874200426, "grad_norm": 0.54296875, "learning_rate": 0.0001974186416068029, "loss": 1.3034, "step": 1539 }, { "epoch": 0.07296849087893864, "grad_norm": 0.58203125, "learning_rate": 0.00019741527859719966, "loss": 1.3104, "step": 1540 }, { "epoch": 0.07301587301587302, "grad_norm": 0.484375, "learning_rate": 0.00019741191342702928, "loss": 0.1806, "step": 1541 }, { "epoch": 0.07306325515280739, "grad_norm": 0.57421875, "learning_rate": 0.00019740854609636644, "loss": 1.0215, "step": 1542 }, { "epoch": 0.07311063728974176, "grad_norm": 0.47265625, "learning_rate": 0.00019740517660528579, "loss": 1.149, "step": 1543 }, { "epoch": 0.07315801942667614, "grad_norm": 0.5625, "learning_rate": 0.0001974018049538621, "loss": 0.8894, "step": 1544 }, { "epoch": 0.07320540156361052, "grad_norm": 0.59375, "learning_rate": 0.00019739843114217006, "loss": 1.1542, "step": 1545 }, { "epoch": 0.07325278370054489, "grad_norm": 0.498046875, "learning_rate": 0.00019739505517028463, "loss": 1.0084, "step": 1546 }, { "epoch": 0.07330016583747927, "grad_norm": 0.443359375, "learning_rate": 0.00019739167703828058, "loss": 0.6863, "step": 1547 }, { "epoch": 0.07334754797441365, "grad_norm": 0.53515625, "learning_rate": 0.0001973882967462329, "loss": 0.8286, "step": 1548 }, { "epoch": 0.07339493011134803, "grad_norm": 0.48828125, "learning_rate": 0.0001973849142942165, "loss": 0.8003, "step": 1549 }, { "epoch": 0.0734423122482824, "grad_norm": 0.55859375, "learning_rate": 0.00019738152968230645, "loss": 0.8031, "step": 1550 }, { "epoch": 0.07348969438521677, "grad_norm": 1.578125, "learning_rate": 0.0001973781429105778, "loss": 0.6852, "step": 1551 }, { "epoch": 0.07353707652215115, "grad_norm": 0.369140625, "learning_rate": 0.00019737475397910563, "loss": 0.027, "step": 1552 }, { "epoch": 0.07358445865908553, "grad_norm": 0.53515625, "learning_rate": 0.00019737136288796515, "loss": 0.1264, "step": 1553 }, { "epoch": 0.0736318407960199, "grad_norm": 0.48046875, "learning_rate": 0.0001973679696372316, "loss": 0.6384, "step": 1554 }, { "epoch": 0.07367922293295427, "grad_norm": 0.55078125, "learning_rate": 0.00019736457422698015, "loss": 0.8887, "step": 1555 }, { "epoch": 0.07372660506988865, "grad_norm": 0.494140625, "learning_rate": 0.00019736117665728617, "loss": 0.9889, "step": 1556 }, { "epoch": 0.07377398720682303, "grad_norm": 0.5078125, "learning_rate": 0.000197357776928225, "loss": 1.2968, "step": 1557 }, { "epoch": 0.07382136934375741, "grad_norm": 0.4921875, "learning_rate": 0.00019735437503987202, "loss": 0.8644, "step": 1558 }, { "epoch": 0.07386875148069177, "grad_norm": 0.5078125, "learning_rate": 0.0001973509709923027, "loss": 1.1251, "step": 1559 }, { "epoch": 0.07391613361762615, "grad_norm": 0.439453125, "learning_rate": 0.00019734756478559254, "loss": 0.2686, "step": 1560 }, { "epoch": 0.07396351575456053, "grad_norm": 0.44921875, "learning_rate": 0.0001973441564198171, "loss": 0.7783, "step": 1561 }, { "epoch": 0.07401089789149491, "grad_norm": 0.44140625, "learning_rate": 0.00019734074589505195, "loss": 1.1619, "step": 1562 }, { "epoch": 0.07405828002842928, "grad_norm": 0.515625, "learning_rate": 0.00019733733321137273, "loss": 0.8184, "step": 1563 }, { "epoch": 0.07410566216536366, "grad_norm": 0.546875, "learning_rate": 0.00019733391836885514, "loss": 1.2703, "step": 1564 }, { "epoch": 0.07415304430229804, "grad_norm": 0.83203125, "learning_rate": 0.0001973305013675749, "loss": 0.1781, "step": 1565 }, { "epoch": 0.07420042643923241, "grad_norm": 0.8359375, "learning_rate": 0.00019732708220760782, "loss": 1.2109, "step": 1566 }, { "epoch": 0.07424780857616678, "grad_norm": 0.498046875, "learning_rate": 0.00019732366088902976, "loss": 0.7686, "step": 1567 }, { "epoch": 0.07429519071310116, "grad_norm": 0.90625, "learning_rate": 0.00019732023741191653, "loss": 0.5418, "step": 1568 }, { "epoch": 0.07434257285003554, "grad_norm": 0.43359375, "learning_rate": 0.00019731681177634412, "loss": 0.6996, "step": 1569 }, { "epoch": 0.07438995498696992, "grad_norm": 0.515625, "learning_rate": 0.00019731338398238846, "loss": 1.1675, "step": 1570 }, { "epoch": 0.07443733712390428, "grad_norm": 0.396484375, "learning_rate": 0.0001973099540301256, "loss": 0.891, "step": 1571 }, { "epoch": 0.07448471926083866, "grad_norm": 0.66796875, "learning_rate": 0.0001973065219196316, "loss": 1.2264, "step": 1572 }, { "epoch": 0.07453210139777304, "grad_norm": 0.890625, "learning_rate": 0.00019730308765098263, "loss": 1.1571, "step": 1573 }, { "epoch": 0.07457948353470742, "grad_norm": 0.376953125, "learning_rate": 0.0001972996512242548, "loss": 0.6126, "step": 1574 }, { "epoch": 0.07462686567164178, "grad_norm": 0.4765625, "learning_rate": 0.0001972962126395243, "loss": 0.6741, "step": 1575 }, { "epoch": 0.07467424780857616, "grad_norm": 0.875, "learning_rate": 0.00019729277189686748, "loss": 0.4545, "step": 1576 }, { "epoch": 0.07472162994551054, "grad_norm": 0.404296875, "learning_rate": 0.0001972893289963606, "loss": 0.7829, "step": 1577 }, { "epoch": 0.07476901208244492, "grad_norm": 0.56640625, "learning_rate": 0.00019728588393808005, "loss": 1.5995, "step": 1578 }, { "epoch": 0.0748163942193793, "grad_norm": 0.423828125, "learning_rate": 0.00019728243672210223, "loss": 0.4342, "step": 1579 }, { "epoch": 0.07486377635631367, "grad_norm": 0.478515625, "learning_rate": 0.00019727898734850355, "loss": 0.7056, "step": 1580 }, { "epoch": 0.07491115849324805, "grad_norm": 0.46484375, "learning_rate": 0.00019727553581736054, "loss": 0.0762, "step": 1581 }, { "epoch": 0.07495854063018242, "grad_norm": 0.7109375, "learning_rate": 0.00019727208212874978, "loss": 0.3199, "step": 1582 }, { "epoch": 0.0750059227671168, "grad_norm": 0.5390625, "learning_rate": 0.00019726862628274784, "loss": 1.4885, "step": 1583 }, { "epoch": 0.07505330490405117, "grad_norm": 0.022216796875, "learning_rate": 0.0001972651682794314, "loss": 0.0019, "step": 1584 }, { "epoch": 0.07510068704098555, "grad_norm": 0.54296875, "learning_rate": 0.0001972617081188771, "loss": 0.8026, "step": 1585 }, { "epoch": 0.07514806917791993, "grad_norm": 0.546875, "learning_rate": 0.00019725824580116172, "loss": 0.9925, "step": 1586 }, { "epoch": 0.0751954513148543, "grad_norm": 0.5234375, "learning_rate": 0.00019725478132636207, "loss": 1.1016, "step": 1587 }, { "epoch": 0.07524283345178867, "grad_norm": 0.5625, "learning_rate": 0.0001972513146945549, "loss": 0.6538, "step": 1588 }, { "epoch": 0.07529021558872305, "grad_norm": 0.65234375, "learning_rate": 0.0001972478459058172, "loss": 1.4446, "step": 1589 }, { "epoch": 0.07533759772565743, "grad_norm": 1.2578125, "learning_rate": 0.00019724437496022588, "loss": 0.8096, "step": 1590 }, { "epoch": 0.07538497986259181, "grad_norm": 0.53515625, "learning_rate": 0.00019724090185785787, "loss": 0.0483, "step": 1591 }, { "epoch": 0.07543236199952617, "grad_norm": 0.5390625, "learning_rate": 0.00019723742659879024, "loss": 1.3905, "step": 1592 }, { "epoch": 0.07547974413646055, "grad_norm": 0.5703125, "learning_rate": 0.00019723394918310007, "loss": 0.8611, "step": 1593 }, { "epoch": 0.07552712627339493, "grad_norm": 0.4921875, "learning_rate": 0.00019723046961086444, "loss": 1.3968, "step": 1594 }, { "epoch": 0.07557450841032931, "grad_norm": 0.494140625, "learning_rate": 0.0001972269878821606, "loss": 1.1161, "step": 1595 }, { "epoch": 0.07562189054726368, "grad_norm": 0.56640625, "learning_rate": 0.0001972235039970657, "loss": 1.2614, "step": 1596 }, { "epoch": 0.07566927268419806, "grad_norm": 0.5546875, "learning_rate": 0.00019722001795565705, "loss": 1.1092, "step": 1597 }, { "epoch": 0.07571665482113243, "grad_norm": 0.498046875, "learning_rate": 0.00019721652975801196, "loss": 1.2873, "step": 1598 }, { "epoch": 0.07576403695806681, "grad_norm": 0.466796875, "learning_rate": 0.0001972130394042078, "loss": 1.0667, "step": 1599 }, { "epoch": 0.07581141909500118, "grad_norm": 0.498046875, "learning_rate": 0.00019720954689432199, "loss": 0.9517, "step": 1600 }, { "epoch": 0.07585880123193556, "grad_norm": 0.5078125, "learning_rate": 0.00019720605222843193, "loss": 1.1561, "step": 1601 }, { "epoch": 0.07590618336886994, "grad_norm": 0.4921875, "learning_rate": 0.0001972025554066152, "loss": 1.052, "step": 1602 }, { "epoch": 0.07595356550580432, "grad_norm": 0.73828125, "learning_rate": 0.0001971990564289493, "loss": 0.2228, "step": 1603 }, { "epoch": 0.07600094764273868, "grad_norm": 0.58984375, "learning_rate": 0.0001971955552955119, "loss": 1.0376, "step": 1604 }, { "epoch": 0.07604832977967306, "grad_norm": 0.2734375, "learning_rate": 0.00019719205200638057, "loss": 0.0396, "step": 1605 }, { "epoch": 0.07609571191660744, "grad_norm": 0.546875, "learning_rate": 0.00019718854656163308, "loss": 0.8643, "step": 1606 }, { "epoch": 0.07614309405354182, "grad_norm": 0.431640625, "learning_rate": 0.00019718503896134712, "loss": 1.0733, "step": 1607 }, { "epoch": 0.0761904761904762, "grad_norm": 0.482421875, "learning_rate": 0.00019718152920560056, "loss": 1.1269, "step": 1608 }, { "epoch": 0.07623785832741056, "grad_norm": 0.83984375, "learning_rate": 0.00019717801729447114, "loss": 0.4496, "step": 1609 }, { "epoch": 0.07628524046434494, "grad_norm": 0.458984375, "learning_rate": 0.00019717450322803684, "loss": 1.1536, "step": 1610 }, { "epoch": 0.07633262260127932, "grad_norm": 0.8125, "learning_rate": 0.00019717098700637554, "loss": 0.6839, "step": 1611 }, { "epoch": 0.0763800047382137, "grad_norm": 0.515625, "learning_rate": 0.00019716746862956527, "loss": 1.3748, "step": 1612 }, { "epoch": 0.07642738687514807, "grad_norm": 0.494140625, "learning_rate": 0.00019716394809768403, "loss": 1.0119, "step": 1613 }, { "epoch": 0.07647476901208244, "grad_norm": 0.55859375, "learning_rate": 0.00019716042541080992, "loss": 1.0805, "step": 1614 }, { "epoch": 0.07652215114901682, "grad_norm": 0.6171875, "learning_rate": 0.00019715690056902108, "loss": 1.3178, "step": 1615 }, { "epoch": 0.0765695332859512, "grad_norm": 0.57421875, "learning_rate": 0.00019715337357239566, "loss": 1.1677, "step": 1616 }, { "epoch": 0.07661691542288557, "grad_norm": 0.55078125, "learning_rate": 0.0001971498444210119, "loss": 0.7856, "step": 1617 }, { "epoch": 0.07666429755981995, "grad_norm": 0.396484375, "learning_rate": 0.00019714631311494807, "loss": 0.8945, "step": 1618 }, { "epoch": 0.07671167969675433, "grad_norm": 0.59765625, "learning_rate": 0.0001971427796542825, "loss": 1.3548, "step": 1619 }, { "epoch": 0.0767590618336887, "grad_norm": 0.48828125, "learning_rate": 0.00019713924403909352, "loss": 1.1525, "step": 1620 }, { "epoch": 0.07680644397062307, "grad_norm": 0.5078125, "learning_rate": 0.0001971357062694596, "loss": 0.0897, "step": 1621 }, { "epoch": 0.07685382610755745, "grad_norm": 0.474609375, "learning_rate": 0.0001971321663454592, "loss": 0.826, "step": 1622 }, { "epoch": 0.07690120824449183, "grad_norm": 0.48828125, "learning_rate": 0.00019712862426717075, "loss": 0.8226, "step": 1623 }, { "epoch": 0.07694859038142621, "grad_norm": 0.33203125, "learning_rate": 0.0001971250800346729, "loss": 0.8715, "step": 1624 }, { "epoch": 0.07699597251836057, "grad_norm": 0.375, "learning_rate": 0.00019712153364804424, "loss": 0.9103, "step": 1625 }, { "epoch": 0.07704335465529495, "grad_norm": 0.416015625, "learning_rate": 0.0001971179851073634, "loss": 1.124, "step": 1626 }, { "epoch": 0.07709073679222933, "grad_norm": 0.57421875, "learning_rate": 0.00019711443441270912, "loss": 0.8573, "step": 1627 }, { "epoch": 0.07713811892916371, "grad_norm": 0.3984375, "learning_rate": 0.00019711088156416012, "loss": 0.524, "step": 1628 }, { "epoch": 0.07718550106609808, "grad_norm": 0.54296875, "learning_rate": 0.00019710732656179518, "loss": 1.0545, "step": 1629 }, { "epoch": 0.07723288320303245, "grad_norm": 0.56640625, "learning_rate": 0.00019710376940569317, "loss": 0.681, "step": 1630 }, { "epoch": 0.07728026533996683, "grad_norm": 0.62109375, "learning_rate": 0.000197100210095933, "loss": 1.4263, "step": 1631 }, { "epoch": 0.07732764747690121, "grad_norm": 0.392578125, "learning_rate": 0.00019709664863259358, "loss": 0.2156, "step": 1632 }, { "epoch": 0.07737502961383558, "grad_norm": 0.78125, "learning_rate": 0.00019709308501575398, "loss": 1.1149, "step": 1633 }, { "epoch": 0.07742241175076996, "grad_norm": 0.515625, "learning_rate": 0.00019708951924549307, "loss": 1.2251, "step": 1634 }, { "epoch": 0.07746979388770434, "grad_norm": 0.55078125, "learning_rate": 0.0001970859513218901, "loss": 0.7541, "step": 1635 }, { "epoch": 0.07751717602463871, "grad_norm": 0.373046875, "learning_rate": 0.00019708238124502417, "loss": 1.5339, "step": 1636 }, { "epoch": 0.07756455816157308, "grad_norm": 0.64453125, "learning_rate": 0.00019707880901497437, "loss": 1.0511, "step": 1637 }, { "epoch": 0.07761194029850746, "grad_norm": 0.240234375, "learning_rate": 0.00019707523463182, "loss": 0.0204, "step": 1638 }, { "epoch": 0.07765932243544184, "grad_norm": 0.55078125, "learning_rate": 0.00019707165809564034, "loss": 0.9405, "step": 1639 }, { "epoch": 0.07770670457237622, "grad_norm": 0.439453125, "learning_rate": 0.00019706807940651473, "loss": 1.1919, "step": 1640 }, { "epoch": 0.0777540867093106, "grad_norm": 0.482421875, "learning_rate": 0.00019706449856452248, "loss": 0.7183, "step": 1641 }, { "epoch": 0.07780146884624496, "grad_norm": 0.67578125, "learning_rate": 0.00019706091556974303, "loss": 1.4933, "step": 1642 }, { "epoch": 0.07784885098317934, "grad_norm": 0.4140625, "learning_rate": 0.00019705733042225588, "loss": 1.0473, "step": 1643 }, { "epoch": 0.07789623312011372, "grad_norm": 0.01409912109375, "learning_rate": 0.0001970537431221405, "loss": 0.001, "step": 1644 }, { "epoch": 0.0779436152570481, "grad_norm": 0.36328125, "learning_rate": 0.00019705015366947647, "loss": 0.9944, "step": 1645 }, { "epoch": 0.07799099739398246, "grad_norm": 0.435546875, "learning_rate": 0.00019704656206434343, "loss": 0.9846, "step": 1646 }, { "epoch": 0.07803837953091684, "grad_norm": 0.3671875, "learning_rate": 0.00019704296830682098, "loss": 0.9973, "step": 1647 }, { "epoch": 0.07808576166785122, "grad_norm": 0.271484375, "learning_rate": 0.0001970393723969889, "loss": 0.0165, "step": 1648 }, { "epoch": 0.0781331438047856, "grad_norm": 0.62890625, "learning_rate": 0.00019703577433492688, "loss": 1.2609, "step": 1649 }, { "epoch": 0.07818052594171997, "grad_norm": 0.314453125, "learning_rate": 0.00019703217412071476, "loss": 0.2079, "step": 1650 }, { "epoch": 0.07822790807865435, "grad_norm": 0.9453125, "learning_rate": 0.00019702857175443234, "loss": 0.2338, "step": 1651 }, { "epoch": 0.07827529021558872, "grad_norm": 0.259765625, "learning_rate": 0.00019702496723615956, "loss": 0.1863, "step": 1652 }, { "epoch": 0.0783226723525231, "grad_norm": 0.427734375, "learning_rate": 0.00019702136056597635, "loss": 0.7679, "step": 1653 }, { "epoch": 0.07837005448945747, "grad_norm": 0.53125, "learning_rate": 0.00019701775174396272, "loss": 1.2124, "step": 1654 }, { "epoch": 0.07841743662639185, "grad_norm": 0.232421875, "learning_rate": 0.00019701414077019871, "loss": 0.0344, "step": 1655 }, { "epoch": 0.07846481876332623, "grad_norm": 1.5859375, "learning_rate": 0.00019701052764476437, "loss": 1.0308, "step": 1656 }, { "epoch": 0.0785122009002606, "grad_norm": 0.1845703125, "learning_rate": 0.00019700691236773987, "loss": 0.0132, "step": 1657 }, { "epoch": 0.07855958303719497, "grad_norm": 0.453125, "learning_rate": 0.00019700329493920534, "loss": 1.3492, "step": 1658 }, { "epoch": 0.07860696517412935, "grad_norm": 0.2412109375, "learning_rate": 0.00019699967535924108, "loss": 0.1897, "step": 1659 }, { "epoch": 0.07865434731106373, "grad_norm": 0.6171875, "learning_rate": 0.00019699605362792736, "loss": 1.41, "step": 1660 }, { "epoch": 0.07870172944799811, "grad_norm": 0.5390625, "learning_rate": 0.00019699242974534445, "loss": 1.0693, "step": 1661 }, { "epoch": 0.07874911158493247, "grad_norm": 0.6015625, "learning_rate": 0.0001969888037115728, "loss": 1.2154, "step": 1662 }, { "epoch": 0.07879649372186685, "grad_norm": 0.455078125, "learning_rate": 0.00019698517552669276, "loss": 0.7537, "step": 1663 }, { "epoch": 0.07884387585880123, "grad_norm": 0.4921875, "learning_rate": 0.00019698154519078484, "loss": 1.3872, "step": 1664 }, { "epoch": 0.07889125799573561, "grad_norm": 0.515625, "learning_rate": 0.00019697791270392956, "loss": 1.2254, "step": 1665 }, { "epoch": 0.07893864013266998, "grad_norm": 0.01055908203125, "learning_rate": 0.00019697427806620744, "loss": 0.0009, "step": 1666 }, { "epoch": 0.07898602226960436, "grad_norm": 0.5859375, "learning_rate": 0.00019697064127769916, "loss": 0.8843, "step": 1667 }, { "epoch": 0.07903340440653873, "grad_norm": 0.5234375, "learning_rate": 0.00019696700233848532, "loss": 0.954, "step": 1668 }, { "epoch": 0.07908078654347311, "grad_norm": 0.515625, "learning_rate": 0.00019696336124864667, "loss": 0.8493, "step": 1669 }, { "epoch": 0.07912816868040749, "grad_norm": 0.287109375, "learning_rate": 0.00019695971800826395, "loss": 0.1835, "step": 1670 }, { "epoch": 0.07917555081734186, "grad_norm": 0.6171875, "learning_rate": 0.00019695607261741794, "loss": 0.2005, "step": 1671 }, { "epoch": 0.07922293295427624, "grad_norm": 0.427734375, "learning_rate": 0.00019695242507618952, "loss": 0.7346, "step": 1672 }, { "epoch": 0.07927031509121062, "grad_norm": 0.65234375, "learning_rate": 0.0001969487753846596, "loss": 0.7522, "step": 1673 }, { "epoch": 0.079317697228145, "grad_norm": 0.46484375, "learning_rate": 0.00019694512354290908, "loss": 0.6101, "step": 1674 }, { "epoch": 0.07936507936507936, "grad_norm": 0.58203125, "learning_rate": 0.000196941469551019, "loss": 0.7977, "step": 1675 }, { "epoch": 0.07941246150201374, "grad_norm": 0.62890625, "learning_rate": 0.00019693781340907038, "loss": 0.2747, "step": 1676 }, { "epoch": 0.07945984363894812, "grad_norm": 0.44921875, "learning_rate": 0.00019693415511714432, "loss": 0.9906, "step": 1677 }, { "epoch": 0.0795072257758825, "grad_norm": 0.67578125, "learning_rate": 0.00019693049467532192, "loss": 1.2076, "step": 1678 }, { "epoch": 0.07955460791281686, "grad_norm": 0.3828125, "learning_rate": 0.00019692683208368444, "loss": 0.1237, "step": 1679 }, { "epoch": 0.07960199004975124, "grad_norm": 0.65234375, "learning_rate": 0.00019692316734231302, "loss": 0.645, "step": 1680 }, { "epoch": 0.07964937218668562, "grad_norm": 1.03125, "learning_rate": 0.00019691950045128901, "loss": 1.1272, "step": 1681 }, { "epoch": 0.07969675432362, "grad_norm": 0.474609375, "learning_rate": 0.00019691583141069372, "loss": 1.2546, "step": 1682 }, { "epoch": 0.07974413646055437, "grad_norm": 0.890625, "learning_rate": 0.0001969121602206085, "loss": 0.1895, "step": 1683 }, { "epoch": 0.07979151859748874, "grad_norm": 0.6015625, "learning_rate": 0.0001969084868811148, "loss": 0.8877, "step": 1684 }, { "epoch": 0.07983890073442312, "grad_norm": 0.82421875, "learning_rate": 0.0001969048113922941, "loss": 0.7107, "step": 1685 }, { "epoch": 0.0798862828713575, "grad_norm": 0.416015625, "learning_rate": 0.00019690113375422789, "loss": 0.0773, "step": 1686 }, { "epoch": 0.07993366500829187, "grad_norm": 0.59765625, "learning_rate": 0.0001968974539669977, "loss": 1.6357, "step": 1687 }, { "epoch": 0.07998104714522625, "grad_norm": 0.51171875, "learning_rate": 0.00019689377203068524, "loss": 1.1769, "step": 1688 }, { "epoch": 0.08002842928216063, "grad_norm": 0.51171875, "learning_rate": 0.00019689008794537212, "loss": 0.2874, "step": 1689 }, { "epoch": 0.080075811419095, "grad_norm": 0.5859375, "learning_rate": 0.00019688640171114006, "loss": 1.636, "step": 1690 }, { "epoch": 0.08012319355602937, "grad_norm": 0.6015625, "learning_rate": 0.0001968827133280708, "loss": 0.9001, "step": 1691 }, { "epoch": 0.08017057569296375, "grad_norm": 0.447265625, "learning_rate": 0.00019687902279624616, "loss": 0.9937, "step": 1692 }, { "epoch": 0.08021795782989813, "grad_norm": 0.40234375, "learning_rate": 0.00019687533011574797, "loss": 0.0951, "step": 1693 }, { "epoch": 0.08026533996683251, "grad_norm": 0.56640625, "learning_rate": 0.00019687163528665815, "loss": 1.5956, "step": 1694 }, { "epoch": 0.08031272210376687, "grad_norm": 0.298828125, "learning_rate": 0.00019686793830905864, "loss": 0.0098, "step": 1695 }, { "epoch": 0.08036010424070125, "grad_norm": 0.5625, "learning_rate": 0.00019686423918303144, "loss": 0.219, "step": 1696 }, { "epoch": 0.08040748637763563, "grad_norm": 0.55078125, "learning_rate": 0.0001968605379086586, "loss": 0.236, "step": 1697 }, { "epoch": 0.08045486851457001, "grad_norm": 0.48828125, "learning_rate": 0.0001968568344860222, "loss": 0.1505, "step": 1698 }, { "epoch": 0.08050225065150439, "grad_norm": 0.5390625, "learning_rate": 0.00019685312891520437, "loss": 0.9413, "step": 1699 }, { "epoch": 0.08054963278843875, "grad_norm": 0.49609375, "learning_rate": 0.0001968494211962873, "loss": 0.0553, "step": 1700 }, { "epoch": 0.08059701492537313, "grad_norm": 0.58203125, "learning_rate": 0.00019684571132935324, "loss": 1.17, "step": 1701 }, { "epoch": 0.08064439706230751, "grad_norm": 0.609375, "learning_rate": 0.00019684199931448444, "loss": 1.0493, "step": 1702 }, { "epoch": 0.08069177919924189, "grad_norm": 0.5546875, "learning_rate": 0.00019683828515176325, "loss": 0.108, "step": 1703 }, { "epoch": 0.08073916133617626, "grad_norm": 0.65234375, "learning_rate": 0.00019683456884127205, "loss": 0.8528, "step": 1704 }, { "epoch": 0.08078654347311064, "grad_norm": 0.5625, "learning_rate": 0.00019683085038309326, "loss": 0.2052, "step": 1705 }, { "epoch": 0.08083392561004502, "grad_norm": 0.515625, "learning_rate": 0.00019682712977730935, "loss": 0.7148, "step": 1706 }, { "epoch": 0.0808813077469794, "grad_norm": 0.90625, "learning_rate": 0.00019682340702400285, "loss": 0.2189, "step": 1707 }, { "epoch": 0.08092868988391376, "grad_norm": 0.671875, "learning_rate": 0.00019681968212325628, "loss": 0.773, "step": 1708 }, { "epoch": 0.08097607202084814, "grad_norm": 0.71875, "learning_rate": 0.0001968159550751523, "loss": 0.2618, "step": 1709 }, { "epoch": 0.08102345415778252, "grad_norm": 0.4296875, "learning_rate": 0.00019681222587977356, "loss": 0.6582, "step": 1710 }, { "epoch": 0.0810708362947169, "grad_norm": 0.578125, "learning_rate": 0.00019680849453720275, "loss": 1.3603, "step": 1711 }, { "epoch": 0.08111821843165126, "grad_norm": 0.66015625, "learning_rate": 0.00019680476104752269, "loss": 0.078, "step": 1712 }, { "epoch": 0.08116560056858564, "grad_norm": 0.7421875, "learning_rate": 0.00019680102541081614, "loss": 1.651, "step": 1713 }, { "epoch": 0.08121298270552002, "grad_norm": 0.5625, "learning_rate": 0.00019679728762716592, "loss": 0.8839, "step": 1714 }, { "epoch": 0.0812603648424544, "grad_norm": 0.66015625, "learning_rate": 0.00019679354769665497, "loss": 0.1678, "step": 1715 }, { "epoch": 0.08130774697938876, "grad_norm": 0.69140625, "learning_rate": 0.00019678980561936625, "loss": 0.1759, "step": 1716 }, { "epoch": 0.08135512911632314, "grad_norm": 0.51171875, "learning_rate": 0.00019678606139538274, "loss": 1.0799, "step": 1717 }, { "epoch": 0.08140251125325752, "grad_norm": 0.53515625, "learning_rate": 0.00019678231502478745, "loss": 0.816, "step": 1718 }, { "epoch": 0.0814498933901919, "grad_norm": 0.44140625, "learning_rate": 0.00019677856650766353, "loss": 1.0411, "step": 1719 }, { "epoch": 0.08149727552712627, "grad_norm": 0.2421875, "learning_rate": 0.00019677481584409406, "loss": 0.0126, "step": 1720 }, { "epoch": 0.08154465766406065, "grad_norm": 0.9921875, "learning_rate": 0.00019677106303416227, "loss": 1.2617, "step": 1721 }, { "epoch": 0.08159203980099503, "grad_norm": 0.5234375, "learning_rate": 0.0001967673080779514, "loss": 0.6694, "step": 1722 }, { "epoch": 0.0816394219379294, "grad_norm": 0.53125, "learning_rate": 0.00019676355097554468, "loss": 1.0204, "step": 1723 }, { "epoch": 0.08168680407486377, "grad_norm": 0.51171875, "learning_rate": 0.0001967597917270255, "loss": 1.0117, "step": 1724 }, { "epoch": 0.08173418621179815, "grad_norm": 1.03125, "learning_rate": 0.00019675603033247717, "loss": 0.4565, "step": 1725 }, { "epoch": 0.08178156834873253, "grad_norm": 0.5390625, "learning_rate": 0.0001967522667919832, "loss": 1.0873, "step": 1726 }, { "epoch": 0.08182895048566691, "grad_norm": 0.224609375, "learning_rate": 0.00019674850110562692, "loss": 0.0202, "step": 1727 }, { "epoch": 0.08187633262260129, "grad_norm": 0.2412109375, "learning_rate": 0.000196744733273492, "loss": 0.1749, "step": 1728 }, { "epoch": 0.08192371475953565, "grad_norm": 0.484375, "learning_rate": 0.00019674096329566192, "loss": 0.2461, "step": 1729 }, { "epoch": 0.08197109689647003, "grad_norm": 0.6796875, "learning_rate": 0.0001967371911722203, "loss": 0.9353, "step": 1730 }, { "epoch": 0.08201847903340441, "grad_norm": 0.515625, "learning_rate": 0.00019673341690325087, "loss": 1.253, "step": 1731 }, { "epoch": 0.08206586117033879, "grad_norm": 0.462890625, "learning_rate": 0.00019672964048883727, "loss": 0.072, "step": 1732 }, { "epoch": 0.08211324330727315, "grad_norm": 0.32421875, "learning_rate": 0.00019672586192906325, "loss": 0.829, "step": 1733 }, { "epoch": 0.08216062544420753, "grad_norm": 0.3984375, "learning_rate": 0.00019672208122401268, "loss": 0.2471, "step": 1734 }, { "epoch": 0.08220800758114191, "grad_norm": 0.5546875, "learning_rate": 0.00019671829837376935, "loss": 0.9004, "step": 1735 }, { "epoch": 0.08225538971807629, "grad_norm": 0.515625, "learning_rate": 0.00019671451337841718, "loss": 1.1285, "step": 1736 }, { "epoch": 0.08230277185501066, "grad_norm": 0.3828125, "learning_rate": 0.00019671072623804012, "loss": 0.0457, "step": 1737 }, { "epoch": 0.08235015399194504, "grad_norm": 0.64453125, "learning_rate": 0.00019670693695272216, "loss": 1.2687, "step": 1738 }, { "epoch": 0.08239753612887941, "grad_norm": 0.69140625, "learning_rate": 0.00019670314552254736, "loss": 0.8366, "step": 1739 }, { "epoch": 0.0824449182658138, "grad_norm": 0.5234375, "learning_rate": 0.00019669935194759978, "loss": 1.333, "step": 1740 }, { "epoch": 0.08249230040274816, "grad_norm": 0.6015625, "learning_rate": 0.00019669555622796358, "loss": 1.0499, "step": 1741 }, { "epoch": 0.08253968253968254, "grad_norm": 0.009033203125, "learning_rate": 0.00019669175836372293, "loss": 0.001, "step": 1742 }, { "epoch": 0.08258706467661692, "grad_norm": 0.5546875, "learning_rate": 0.0001966879583549621, "loss": 0.8029, "step": 1743 }, { "epoch": 0.0826344468135513, "grad_norm": 0.5078125, "learning_rate": 0.0001966841562017653, "loss": 0.8387, "step": 1744 }, { "epoch": 0.08268182895048566, "grad_norm": 0.80859375, "learning_rate": 0.00019668035190421694, "loss": 0.2109, "step": 1745 }, { "epoch": 0.08272921108742004, "grad_norm": 0.91796875, "learning_rate": 0.00019667654546240133, "loss": 0.2607, "step": 1746 }, { "epoch": 0.08277659322435442, "grad_norm": 0.54296875, "learning_rate": 0.00019667273687640292, "loss": 1.5496, "step": 1747 }, { "epoch": 0.0828239753612888, "grad_norm": 0.53125, "learning_rate": 0.00019666892614630618, "loss": 0.4689, "step": 1748 }, { "epoch": 0.08287135749822316, "grad_norm": 0.44921875, "learning_rate": 0.00019666511327219563, "loss": 0.9652, "step": 1749 }, { "epoch": 0.08291873963515754, "grad_norm": 0.51171875, "learning_rate": 0.00019666129825415582, "loss": 0.789, "step": 1750 }, { "epoch": 0.08296612177209192, "grad_norm": 0.5625, "learning_rate": 0.0001966574810922714, "loss": 0.3402, "step": 1751 }, { "epoch": 0.0830135039090263, "grad_norm": 0.76171875, "learning_rate": 0.00019665366178662697, "loss": 0.1491, "step": 1752 }, { "epoch": 0.08306088604596067, "grad_norm": 0.703125, "learning_rate": 0.0001966498403373073, "loss": 0.0646, "step": 1753 }, { "epoch": 0.08310826818289505, "grad_norm": 0.68359375, "learning_rate": 0.00019664601674439708, "loss": 0.2229, "step": 1754 }, { "epoch": 0.08315565031982942, "grad_norm": 0.5390625, "learning_rate": 0.00019664219100798118, "loss": 0.0445, "step": 1755 }, { "epoch": 0.0832030324567638, "grad_norm": 0.45703125, "learning_rate": 0.0001966383631281444, "loss": 0.9004, "step": 1756 }, { "epoch": 0.08325041459369818, "grad_norm": 0.546875, "learning_rate": 0.0001966345331049717, "loss": 1.1838, "step": 1757 }, { "epoch": 0.08329779673063255, "grad_norm": 0.416015625, "learning_rate": 0.00019663070093854797, "loss": 0.8954, "step": 1758 }, { "epoch": 0.08334517886756693, "grad_norm": 0.57421875, "learning_rate": 0.00019662686662895822, "loss": 0.9397, "step": 1759 }, { "epoch": 0.0833925610045013, "grad_norm": 0.515625, "learning_rate": 0.00019662303017628746, "loss": 1.0675, "step": 1760 }, { "epoch": 0.08343994314143569, "grad_norm": 0.388671875, "learning_rate": 0.00019661919158062084, "loss": 0.0825, "step": 1761 }, { "epoch": 0.08348732527837005, "grad_norm": 0.53125, "learning_rate": 0.00019661535084204346, "loss": 0.9899, "step": 1762 }, { "epoch": 0.08353470741530443, "grad_norm": 0.4296875, "learning_rate": 0.0001966115079606405, "loss": 0.5338, "step": 1763 }, { "epoch": 0.08358208955223881, "grad_norm": 0.8984375, "learning_rate": 0.00019660766293649718, "loss": 0.8799, "step": 1764 }, { "epoch": 0.08362947168917319, "grad_norm": 0.55859375, "learning_rate": 0.00019660381576969886, "loss": 0.9777, "step": 1765 }, { "epoch": 0.08367685382610755, "grad_norm": 0.455078125, "learning_rate": 0.00019659996646033076, "loss": 0.7217, "step": 1766 }, { "epoch": 0.08372423596304193, "grad_norm": 0.5625, "learning_rate": 0.0001965961150084783, "loss": 0.978, "step": 1767 }, { "epoch": 0.08377161809997631, "grad_norm": 0.6484375, "learning_rate": 0.0001965922614142269, "loss": 0.4204, "step": 1768 }, { "epoch": 0.08381900023691069, "grad_norm": 0.5546875, "learning_rate": 0.00019658840567766205, "loss": 0.9237, "step": 1769 }, { "epoch": 0.08386638237384506, "grad_norm": 0.482421875, "learning_rate": 0.0001965845477988692, "loss": 1.1636, "step": 1770 }, { "epoch": 0.08391376451077943, "grad_norm": 0.62890625, "learning_rate": 0.000196580687777934, "loss": 0.9592, "step": 1771 }, { "epoch": 0.08396114664771381, "grad_norm": 0.55859375, "learning_rate": 0.00019657682561494198, "loss": 0.9794, "step": 1772 }, { "epoch": 0.08400852878464819, "grad_norm": 0.5859375, "learning_rate": 0.00019657296130997886, "loss": 1.0554, "step": 1773 }, { "epoch": 0.08405591092158256, "grad_norm": 0.34765625, "learning_rate": 0.00019656909486313033, "loss": 0.0215, "step": 1774 }, { "epoch": 0.08410329305851694, "grad_norm": 1.0546875, "learning_rate": 0.00019656522627448212, "loss": 0.4928, "step": 1775 }, { "epoch": 0.08415067519545132, "grad_norm": 0.443359375, "learning_rate": 0.00019656135554412004, "loss": 1.4965, "step": 1776 }, { "epoch": 0.0841980573323857, "grad_norm": 0.34375, "learning_rate": 0.00019655748267212998, "loss": 0.1763, "step": 1777 }, { "epoch": 0.08424543946932006, "grad_norm": 0.486328125, "learning_rate": 0.00019655360765859778, "loss": 1.0559, "step": 1778 }, { "epoch": 0.08429282160625444, "grad_norm": 0.2470703125, "learning_rate": 0.00019654973050360942, "loss": 0.0206, "step": 1779 }, { "epoch": 0.08434020374318882, "grad_norm": 0.6953125, "learning_rate": 0.00019654585120725085, "loss": 1.4635, "step": 1780 }, { "epoch": 0.0843875858801232, "grad_norm": 0.51171875, "learning_rate": 0.00019654196976960818, "loss": 1.0031, "step": 1781 }, { "epoch": 0.08443496801705756, "grad_norm": 0.5546875, "learning_rate": 0.0001965380861907674, "loss": 0.6398, "step": 1782 }, { "epoch": 0.08448235015399194, "grad_norm": 0.6015625, "learning_rate": 0.00019653420047081473, "loss": 1.2147, "step": 1783 }, { "epoch": 0.08452973229092632, "grad_norm": 0.7421875, "learning_rate": 0.0001965303126098363, "loss": 0.259, "step": 1784 }, { "epoch": 0.0845771144278607, "grad_norm": 0.609375, "learning_rate": 0.00019652642260791837, "loss": 0.988, "step": 1785 }, { "epoch": 0.08462449656479507, "grad_norm": 0.419921875, "learning_rate": 0.0001965225304651472, "loss": 0.7848, "step": 1786 }, { "epoch": 0.08467187870172944, "grad_norm": 0.3046875, "learning_rate": 0.0001965186361816091, "loss": 0.022, "step": 1787 }, { "epoch": 0.08471926083866382, "grad_norm": 0.251953125, "learning_rate": 0.00019651473975739047, "loss": 0.0087, "step": 1788 }, { "epoch": 0.0847666429755982, "grad_norm": 0.58984375, "learning_rate": 0.0001965108411925777, "loss": 1.2164, "step": 1789 }, { "epoch": 0.08481402511253258, "grad_norm": 1.015625, "learning_rate": 0.00019650694048725732, "loss": 0.8642, "step": 1790 }, { "epoch": 0.08486140724946695, "grad_norm": 0.57421875, "learning_rate": 0.00019650303764151574, "loss": 1.1453, "step": 1791 }, { "epoch": 0.08490878938640133, "grad_norm": 0.14453125, "learning_rate": 0.0001964991326554396, "loss": 0.0116, "step": 1792 }, { "epoch": 0.0849561715233357, "grad_norm": 0.32421875, "learning_rate": 0.00019649522552911547, "loss": 0.1656, "step": 1793 }, { "epoch": 0.08500355366027008, "grad_norm": 0.447265625, "learning_rate": 0.00019649131626263002, "loss": 0.791, "step": 1794 }, { "epoch": 0.08505093579720445, "grad_norm": 0.421875, "learning_rate": 0.00019648740485606996, "loss": 0.0354, "step": 1795 }, { "epoch": 0.08509831793413883, "grad_norm": 0.48828125, "learning_rate": 0.00019648349130952207, "loss": 0.9545, "step": 1796 }, { "epoch": 0.08514570007107321, "grad_norm": 0.6796875, "learning_rate": 0.00019647957562307305, "loss": 0.036, "step": 1797 }, { "epoch": 0.08519308220800759, "grad_norm": 0.296875, "learning_rate": 0.00019647565779680983, "loss": 0.1932, "step": 1798 }, { "epoch": 0.08524046434494195, "grad_norm": 0.51953125, "learning_rate": 0.00019647173783081932, "loss": 0.8699, "step": 1799 }, { "epoch": 0.08528784648187633, "grad_norm": 0.0556640625, "learning_rate": 0.00019646781572518838, "loss": 0.0034, "step": 1800 }, { "epoch": 0.08533522861881071, "grad_norm": 0.2451171875, "learning_rate": 0.00019646389148000404, "loss": 0.0264, "step": 1801 }, { "epoch": 0.08538261075574509, "grad_norm": 0.4296875, "learning_rate": 0.00019645996509535334, "loss": 0.0255, "step": 1802 }, { "epoch": 0.08542999289267945, "grad_norm": 0.474609375, "learning_rate": 0.00019645603657132335, "loss": 1.1372, "step": 1803 }, { "epoch": 0.08547737502961383, "grad_norm": 0.455078125, "learning_rate": 0.00019645210590800124, "loss": 1.076, "step": 1804 }, { "epoch": 0.08552475716654821, "grad_norm": 0.2392578125, "learning_rate": 0.00019644817310547416, "loss": 0.0127, "step": 1805 }, { "epoch": 0.08557213930348259, "grad_norm": 0.76171875, "learning_rate": 0.0001964442381638293, "loss": 0.7601, "step": 1806 }, { "epoch": 0.08561952144041696, "grad_norm": 0.625, "learning_rate": 0.00019644030108315397, "loss": 1.2834, "step": 1807 }, { "epoch": 0.08566690357735134, "grad_norm": 0.63671875, "learning_rate": 0.0001964363618635355, "loss": 0.9358, "step": 1808 }, { "epoch": 0.08571428571428572, "grad_norm": 0.4375, "learning_rate": 0.00019643242050506124, "loss": 0.1022, "step": 1809 }, { "epoch": 0.0857616678512201, "grad_norm": 0.6171875, "learning_rate": 0.0001964284770078186, "loss": 1.0071, "step": 1810 }, { "epoch": 0.08580904998815446, "grad_norm": 1.203125, "learning_rate": 0.00019642453137189505, "loss": 0.0967, "step": 1811 }, { "epoch": 0.08585643212508884, "grad_norm": 0.9921875, "learning_rate": 0.00019642058359737813, "loss": 0.9234, "step": 1812 }, { "epoch": 0.08590381426202322, "grad_norm": 0.60546875, "learning_rate": 0.00019641663368435533, "loss": 1.2388, "step": 1813 }, { "epoch": 0.0859511963989576, "grad_norm": 0.1484375, "learning_rate": 0.0001964126816329143, "loss": 0.007, "step": 1814 }, { "epoch": 0.08599857853589196, "grad_norm": 0.953125, "learning_rate": 0.0001964087274431427, "loss": 0.0974, "step": 1815 }, { "epoch": 0.08604596067282634, "grad_norm": 1.234375, "learning_rate": 0.0001964047711151282, "loss": 0.4457, "step": 1816 }, { "epoch": 0.08609334280976072, "grad_norm": 0.44921875, "learning_rate": 0.00019640081264895857, "loss": 1.1292, "step": 1817 }, { "epoch": 0.0861407249466951, "grad_norm": 0.5390625, "learning_rate": 0.0001963968520447216, "loss": 0.7719, "step": 1818 }, { "epoch": 0.08618810708362948, "grad_norm": 0.52734375, "learning_rate": 0.00019639288930250516, "loss": 0.7508, "step": 1819 }, { "epoch": 0.08623548922056384, "grad_norm": 0.384765625, "learning_rate": 0.00019638892442239706, "loss": 1.1699, "step": 1820 }, { "epoch": 0.08628287135749822, "grad_norm": 0.78515625, "learning_rate": 0.00019638495740448528, "loss": 0.9526, "step": 1821 }, { "epoch": 0.0863302534944326, "grad_norm": 0.5234375, "learning_rate": 0.00019638098824885784, "loss": 1.1326, "step": 1822 }, { "epoch": 0.08637763563136698, "grad_norm": 0.470703125, "learning_rate": 0.00019637701695560274, "loss": 0.1677, "step": 1823 }, { "epoch": 0.08642501776830135, "grad_norm": 0.462890625, "learning_rate": 0.00019637304352480806, "loss": 1.0371, "step": 1824 }, { "epoch": 0.08647239990523573, "grad_norm": 0.34375, "learning_rate": 0.0001963690679565619, "loss": 0.208, "step": 1825 }, { "epoch": 0.0865197820421701, "grad_norm": 0.2177734375, "learning_rate": 0.00019636509025095248, "loss": 0.0226, "step": 1826 }, { "epoch": 0.08656716417910448, "grad_norm": 0.333984375, "learning_rate": 0.00019636111040806798, "loss": 0.1305, "step": 1827 }, { "epoch": 0.08661454631603885, "grad_norm": 0.54296875, "learning_rate": 0.00019635712842799673, "loss": 1.0012, "step": 1828 }, { "epoch": 0.08666192845297323, "grad_norm": 0.6015625, "learning_rate": 0.00019635314431082698, "loss": 0.9523, "step": 1829 }, { "epoch": 0.0867093105899076, "grad_norm": 0.89453125, "learning_rate": 0.00019634915805664713, "loss": 0.7971, "step": 1830 }, { "epoch": 0.08675669272684199, "grad_norm": 0.54296875, "learning_rate": 0.0001963451696655456, "loss": 0.8661, "step": 1831 }, { "epoch": 0.08680407486377635, "grad_norm": 0.51953125, "learning_rate": 0.0001963411791376108, "loss": 1.1867, "step": 1832 }, { "epoch": 0.08685145700071073, "grad_norm": 0.5703125, "learning_rate": 0.00019633718647293127, "loss": 0.7809, "step": 1833 }, { "epoch": 0.08689883913764511, "grad_norm": 0.60546875, "learning_rate": 0.00019633319167159557, "loss": 0.9822, "step": 1834 }, { "epoch": 0.08694622127457949, "grad_norm": 0.58984375, "learning_rate": 0.00019632919473369228, "loss": 1.0346, "step": 1835 }, { "epoch": 0.08699360341151385, "grad_norm": 0.265625, "learning_rate": 0.00019632519565931006, "loss": 0.1719, "step": 1836 }, { "epoch": 0.08704098554844823, "grad_norm": 0.498046875, "learning_rate": 0.00019632119444853757, "loss": 0.3176, "step": 1837 }, { "epoch": 0.08708836768538261, "grad_norm": 0.59375, "learning_rate": 0.00019631719110146362, "loss": 1.0714, "step": 1838 }, { "epoch": 0.08713574982231699, "grad_norm": 0.330078125, "learning_rate": 0.00019631318561817696, "loss": 0.2127, "step": 1839 }, { "epoch": 0.08718313195925136, "grad_norm": 0.44140625, "learning_rate": 0.00019630917799876642, "loss": 1.0841, "step": 1840 }, { "epoch": 0.08723051409618574, "grad_norm": 0.69921875, "learning_rate": 0.0001963051682433209, "loss": 0.55, "step": 1841 }, { "epoch": 0.08727789623312011, "grad_norm": 0.48046875, "learning_rate": 0.00019630115635192933, "loss": 0.7196, "step": 1842 }, { "epoch": 0.0873252783700545, "grad_norm": 0.5859375, "learning_rate": 0.0001962971423246807, "loss": 1.0728, "step": 1843 }, { "epoch": 0.08737266050698886, "grad_norm": 0.24609375, "learning_rate": 0.000196293126161664, "loss": 0.0204, "step": 1844 }, { "epoch": 0.08742004264392324, "grad_norm": 0.6171875, "learning_rate": 0.00019628910786296833, "loss": 0.126, "step": 1845 }, { "epoch": 0.08746742478085762, "grad_norm": 0.55078125, "learning_rate": 0.00019628508742868285, "loss": 1.0929, "step": 1846 }, { "epoch": 0.087514806917792, "grad_norm": 0.498046875, "learning_rate": 0.00019628106485889663, "loss": 1.3432, "step": 1847 }, { "epoch": 0.08756218905472637, "grad_norm": 0.6328125, "learning_rate": 0.000196277040153699, "loss": 1.217, "step": 1848 }, { "epoch": 0.08760957119166074, "grad_norm": 0.57421875, "learning_rate": 0.0001962730133131791, "loss": 1.0241, "step": 1849 }, { "epoch": 0.08765695332859512, "grad_norm": 0.01507568359375, "learning_rate": 0.00019626898433742636, "loss": 0.0011, "step": 1850 }, { "epoch": 0.0877043354655295, "grad_norm": 0.55859375, "learning_rate": 0.0001962649532265301, "loss": 1.0595, "step": 1851 }, { "epoch": 0.08775171760246388, "grad_norm": 0.51953125, "learning_rate": 0.0001962609199805797, "loss": 0.9597, "step": 1852 }, { "epoch": 0.08779909973939824, "grad_norm": 0.74609375, "learning_rate": 0.00019625688459966464, "loss": 0.3966, "step": 1853 }, { "epoch": 0.08784648187633262, "grad_norm": 0.58984375, "learning_rate": 0.00019625284708387435, "loss": 0.17, "step": 1854 }, { "epoch": 0.087893864013267, "grad_norm": 0.0927734375, "learning_rate": 0.00019624880743329847, "loss": 0.006, "step": 1855 }, { "epoch": 0.08794124615020138, "grad_norm": 0.58203125, "learning_rate": 0.00019624476564802657, "loss": 0.2036, "step": 1856 }, { "epoch": 0.08798862828713575, "grad_norm": 0.59765625, "learning_rate": 0.00019624072172814828, "loss": 1.4628, "step": 1857 }, { "epoch": 0.08803601042407012, "grad_norm": 0.93359375, "learning_rate": 0.0001962366756737533, "loss": 0.377, "step": 1858 }, { "epoch": 0.0880833925610045, "grad_norm": 0.65234375, "learning_rate": 0.00019623262748493135, "loss": 1.082, "step": 1859 }, { "epoch": 0.08813077469793888, "grad_norm": 0.435546875, "learning_rate": 0.00019622857716177224, "loss": 0.1617, "step": 1860 }, { "epoch": 0.08817815683487325, "grad_norm": 0.474609375, "learning_rate": 0.00019622452470436578, "loss": 1.0647, "step": 1861 }, { "epoch": 0.08822553897180763, "grad_norm": 0.5234375, "learning_rate": 0.00019622047011280184, "loss": 1.0775, "step": 1862 }, { "epoch": 0.088272921108742, "grad_norm": 0.5703125, "learning_rate": 0.00019621641338717037, "loss": 1.3918, "step": 1863 }, { "epoch": 0.08832030324567638, "grad_norm": 0.796875, "learning_rate": 0.00019621235452756136, "loss": 0.1497, "step": 1864 }, { "epoch": 0.08836768538261075, "grad_norm": 0.373046875, "learning_rate": 0.00019620829353406477, "loss": 1.1239, "step": 1865 }, { "epoch": 0.08841506751954513, "grad_norm": 0.53515625, "learning_rate": 0.00019620423040677074, "loss": 0.9789, "step": 1866 }, { "epoch": 0.08846244965647951, "grad_norm": 0.51171875, "learning_rate": 0.00019620016514576932, "loss": 1.0354, "step": 1867 }, { "epoch": 0.08850983179341389, "grad_norm": 0.5, "learning_rate": 0.00019619609775115072, "loss": 1.2406, "step": 1868 }, { "epoch": 0.08855721393034825, "grad_norm": 0.458984375, "learning_rate": 0.00019619202822300513, "loss": 1.5745, "step": 1869 }, { "epoch": 0.08860459606728263, "grad_norm": 0.61328125, "learning_rate": 0.0001961879565614228, "loss": 1.1745, "step": 1870 }, { "epoch": 0.08865197820421701, "grad_norm": 1.0234375, "learning_rate": 0.00019618388276649407, "loss": 0.2836, "step": 1871 }, { "epoch": 0.08869936034115139, "grad_norm": 1.21875, "learning_rate": 0.00019617980683830926, "loss": 0.379, "step": 1872 }, { "epoch": 0.08874674247808576, "grad_norm": 0.81640625, "learning_rate": 0.00019617572877695882, "loss": 0.1641, "step": 1873 }, { "epoch": 0.08879412461502013, "grad_norm": 0.33984375, "learning_rate": 0.00019617164858253311, "loss": 0.0742, "step": 1874 }, { "epoch": 0.08884150675195451, "grad_norm": 0.49609375, "learning_rate": 0.0001961675662551227, "loss": 1.1144, "step": 1875 }, { "epoch": 0.08888888888888889, "grad_norm": 0.5546875, "learning_rate": 0.00019616348179481808, "loss": 0.0538, "step": 1876 }, { "epoch": 0.08893627102582327, "grad_norm": 0.44921875, "learning_rate": 0.00019615939520170988, "loss": 0.9442, "step": 1877 }, { "epoch": 0.08898365316275764, "grad_norm": 0.6640625, "learning_rate": 0.00019615530647588872, "loss": 1.5793, "step": 1878 }, { "epoch": 0.08903103529969202, "grad_norm": 0.478515625, "learning_rate": 0.00019615121561744524, "loss": 0.7052, "step": 1879 }, { "epoch": 0.0890784174366264, "grad_norm": 0.60546875, "learning_rate": 0.00019614712262647026, "loss": 1.4601, "step": 1880 }, { "epoch": 0.08912579957356077, "grad_norm": 0.65625, "learning_rate": 0.00019614302750305452, "loss": 1.1465, "step": 1881 }, { "epoch": 0.08917318171049514, "grad_norm": 0.83984375, "learning_rate": 0.0001961389302472888, "loss": 1.4365, "step": 1882 }, { "epoch": 0.08922056384742952, "grad_norm": 0.427734375, "learning_rate": 0.000196134830859264, "loss": 1.0184, "step": 1883 }, { "epoch": 0.0892679459843639, "grad_norm": 0.65625, "learning_rate": 0.0001961307293390711, "loss": 1.4051, "step": 1884 }, { "epoch": 0.08931532812129828, "grad_norm": 0.498046875, "learning_rate": 0.00019612662568680098, "loss": 0.7338, "step": 1885 }, { "epoch": 0.08936271025823264, "grad_norm": 0.490234375, "learning_rate": 0.0001961225199025447, "loss": 1.3631, "step": 1886 }, { "epoch": 0.08941009239516702, "grad_norm": 0.5234375, "learning_rate": 0.00019611841198639333, "loss": 0.8304, "step": 1887 }, { "epoch": 0.0894574745321014, "grad_norm": 0.58203125, "learning_rate": 0.00019611430193843793, "loss": 0.8106, "step": 1888 }, { "epoch": 0.08950485666903578, "grad_norm": 0.66015625, "learning_rate": 0.00019611018975876972, "loss": 0.0576, "step": 1889 }, { "epoch": 0.08955223880597014, "grad_norm": 0.48046875, "learning_rate": 0.00019610607544747987, "loss": 0.5524, "step": 1890 }, { "epoch": 0.08959962094290452, "grad_norm": 0.58984375, "learning_rate": 0.00019610195900465963, "loss": 1.0035, "step": 1891 }, { "epoch": 0.0896470030798389, "grad_norm": 0.53125, "learning_rate": 0.00019609784043040032, "loss": 1.0234, "step": 1892 }, { "epoch": 0.08969438521677328, "grad_norm": 0.62109375, "learning_rate": 0.0001960937197247932, "loss": 1.0955, "step": 1893 }, { "epoch": 0.08974176735370765, "grad_norm": 0.2060546875, "learning_rate": 0.0001960895968879298, "loss": 0.0322, "step": 1894 }, { "epoch": 0.08978914949064203, "grad_norm": 0.48828125, "learning_rate": 0.00019608547191990146, "loss": 0.7134, "step": 1895 }, { "epoch": 0.0898365316275764, "grad_norm": 0.498046875, "learning_rate": 0.00019608134482079973, "loss": 0.8305, "step": 1896 }, { "epoch": 0.08988391376451078, "grad_norm": 0.703125, "learning_rate": 0.00019607721559071608, "loss": 1.1003, "step": 1897 }, { "epoch": 0.08993129590144515, "grad_norm": 0.625, "learning_rate": 0.00019607308422974216, "loss": 0.9224, "step": 1898 }, { "epoch": 0.08997867803837953, "grad_norm": 0.47265625, "learning_rate": 0.0001960689507379695, "loss": 0.2233, "step": 1899 }, { "epoch": 0.09002606017531391, "grad_norm": 0.5546875, "learning_rate": 0.0001960648151154899, "loss": 0.1031, "step": 1900 }, { "epoch": 0.09007344231224829, "grad_norm": 0.3359375, "learning_rate": 0.000196060677362395, "loss": 0.1767, "step": 1901 }, { "epoch": 0.09012082444918265, "grad_norm": 0.042236328125, "learning_rate": 0.0001960565374787766, "loss": 0.0024, "step": 1902 }, { "epoch": 0.09016820658611703, "grad_norm": 0.51953125, "learning_rate": 0.0001960523954647265, "loss": 1.0201, "step": 1903 }, { "epoch": 0.09021558872305141, "grad_norm": 0.5390625, "learning_rate": 0.00019604825132033663, "loss": 1.044, "step": 1904 }, { "epoch": 0.09026297085998579, "grad_norm": 0.1884765625, "learning_rate": 0.0001960441050456988, "loss": 0.0299, "step": 1905 }, { "epoch": 0.09031035299692017, "grad_norm": 0.498046875, "learning_rate": 0.000196039956640905, "loss": 1.0385, "step": 1906 }, { "epoch": 0.09035773513385453, "grad_norm": 0.53125, "learning_rate": 0.0001960358061060473, "loss": 1.0603, "step": 1907 }, { "epoch": 0.09040511727078891, "grad_norm": 0.38671875, "learning_rate": 0.00019603165344121774, "loss": 0.8072, "step": 1908 }, { "epoch": 0.09045249940772329, "grad_norm": 0.8046875, "learning_rate": 0.00019602749864650835, "loss": 0.3386, "step": 1909 }, { "epoch": 0.09049988154465767, "grad_norm": 0.6875, "learning_rate": 0.00019602334172201137, "loss": 1.4393, "step": 1910 }, { "epoch": 0.09054726368159204, "grad_norm": 0.6015625, "learning_rate": 0.0001960191826678189, "loss": 1.5312, "step": 1911 }, { "epoch": 0.09059464581852641, "grad_norm": 0.96484375, "learning_rate": 0.00019601502148402323, "loss": 0.3824, "step": 1912 }, { "epoch": 0.0906420279554608, "grad_norm": 0.80078125, "learning_rate": 0.0001960108581707167, "loss": 0.2593, "step": 1913 }, { "epoch": 0.09068941009239517, "grad_norm": 0.51171875, "learning_rate": 0.00019600669272799157, "loss": 0.102, "step": 1914 }, { "epoch": 0.09073679222932954, "grad_norm": 0.62109375, "learning_rate": 0.00019600252515594027, "loss": 0.9024, "step": 1915 }, { "epoch": 0.09078417436626392, "grad_norm": 0.59765625, "learning_rate": 0.0001959983554546552, "loss": 0.6981, "step": 1916 }, { "epoch": 0.0908315565031983, "grad_norm": 0.5234375, "learning_rate": 0.0001959941836242289, "loss": 1.0576, "step": 1917 }, { "epoch": 0.09087893864013268, "grad_norm": 0.5390625, "learning_rate": 0.00019599000966475383, "loss": 0.6964, "step": 1918 }, { "epoch": 0.09092632077706704, "grad_norm": 0.53515625, "learning_rate": 0.00019598583357632258, "loss": 0.0821, "step": 1919 }, { "epoch": 0.09097370291400142, "grad_norm": 0.5546875, "learning_rate": 0.00019598165535902778, "loss": 0.8816, "step": 1920 }, { "epoch": 0.0910210850509358, "grad_norm": 0.55859375, "learning_rate": 0.0001959774750129621, "loss": 1.0731, "step": 1921 }, { "epoch": 0.09106846718787018, "grad_norm": 0.58984375, "learning_rate": 0.0001959732925382183, "loss": 0.7151, "step": 1922 }, { "epoch": 0.09111584932480454, "grad_norm": 0.67578125, "learning_rate": 0.00019596910793488902, "loss": 1.176, "step": 1923 }, { "epoch": 0.09116323146173892, "grad_norm": 0.85546875, "learning_rate": 0.0001959649212030672, "loss": 0.3397, "step": 1924 }, { "epoch": 0.0912106135986733, "grad_norm": 0.484375, "learning_rate": 0.00019596073234284561, "loss": 1.1506, "step": 1925 }, { "epoch": 0.09125799573560768, "grad_norm": 0.58203125, "learning_rate": 0.00019595654135431723, "loss": 0.7399, "step": 1926 }, { "epoch": 0.09130537787254205, "grad_norm": 0.58203125, "learning_rate": 0.00019595234823757495, "loss": 0.1751, "step": 1927 }, { "epoch": 0.09135276000947642, "grad_norm": 0.5625, "learning_rate": 0.0001959481529927118, "loss": 1.2673, "step": 1928 }, { "epoch": 0.0914001421464108, "grad_norm": 0.53125, "learning_rate": 0.00019594395561982081, "loss": 0.9565, "step": 1929 }, { "epoch": 0.09144752428334518, "grad_norm": 0.34375, "learning_rate": 0.00019593975611899506, "loss": 0.4006, "step": 1930 }, { "epoch": 0.09149490642027955, "grad_norm": 0.451171875, "learning_rate": 0.00019593555449032773, "loss": 0.62, "step": 1931 }, { "epoch": 0.09154228855721393, "grad_norm": 0.671875, "learning_rate": 0.000195931350733912, "loss": 1.1285, "step": 1932 }, { "epoch": 0.0915896706941483, "grad_norm": 0.447265625, "learning_rate": 0.00019592714484984106, "loss": 0.8639, "step": 1933 }, { "epoch": 0.09163705283108269, "grad_norm": 0.55859375, "learning_rate": 0.00019592293683820826, "loss": 1.1909, "step": 1934 }, { "epoch": 0.09168443496801706, "grad_norm": 0.546875, "learning_rate": 0.0001959187266991069, "loss": 1.2091, "step": 1935 }, { "epoch": 0.09173181710495143, "grad_norm": 0.57421875, "learning_rate": 0.0001959145144326303, "loss": 1.3906, "step": 1936 }, { "epoch": 0.09177919924188581, "grad_norm": 0.54296875, "learning_rate": 0.00019591030003887196, "loss": 0.248, "step": 1937 }, { "epoch": 0.09182658137882019, "grad_norm": 0.5625, "learning_rate": 0.00019590608351792533, "loss": 1.1041, "step": 1938 }, { "epoch": 0.09187396351575457, "grad_norm": 0.609375, "learning_rate": 0.00019590186486988391, "loss": 1.1581, "step": 1939 }, { "epoch": 0.09192134565268893, "grad_norm": 0.46875, "learning_rate": 0.0001958976440948413, "loss": 0.9681, "step": 1940 }, { "epoch": 0.09196872778962331, "grad_norm": 0.447265625, "learning_rate": 0.00019589342119289105, "loss": 0.8862, "step": 1941 }, { "epoch": 0.09201610992655769, "grad_norm": 0.6171875, "learning_rate": 0.0001958891961641269, "loss": 1.1146, "step": 1942 }, { "epoch": 0.09206349206349207, "grad_norm": 0.439453125, "learning_rate": 0.0001958849690086425, "loss": 0.5821, "step": 1943 }, { "epoch": 0.09211087420042643, "grad_norm": 1.140625, "learning_rate": 0.0001958807397265316, "loss": 0.6119, "step": 1944 }, { "epoch": 0.09215825633736081, "grad_norm": 0.88671875, "learning_rate": 0.00019587650831788805, "loss": 0.5591, "step": 1945 }, { "epoch": 0.09220563847429519, "grad_norm": 0.546875, "learning_rate": 0.00019587227478280562, "loss": 0.3989, "step": 1946 }, { "epoch": 0.09225302061122957, "grad_norm": 0.57421875, "learning_rate": 0.0001958680391213783, "loss": 1.3922, "step": 1947 }, { "epoch": 0.09230040274816394, "grad_norm": 0.50390625, "learning_rate": 0.00019586380133369998, "loss": 0.1463, "step": 1948 }, { "epoch": 0.09234778488509832, "grad_norm": 1.1484375, "learning_rate": 0.00019585956141986467, "loss": 0.922, "step": 1949 }, { "epoch": 0.0923951670220327, "grad_norm": 0.3671875, "learning_rate": 0.00019585531937996636, "loss": 0.2105, "step": 1950 }, { "epoch": 0.09244254915896707, "grad_norm": 0.50390625, "learning_rate": 0.00019585107521409917, "loss": 0.9343, "step": 1951 }, { "epoch": 0.09248993129590144, "grad_norm": 0.44921875, "learning_rate": 0.00019584682892235725, "loss": 1.093, "step": 1952 }, { "epoch": 0.09253731343283582, "grad_norm": 0.58203125, "learning_rate": 0.00019584258050483474, "loss": 1.3191, "step": 1953 }, { "epoch": 0.0925846955697702, "grad_norm": 0.259765625, "learning_rate": 0.0001958383299616259, "loss": 0.0516, "step": 1954 }, { "epoch": 0.09263207770670458, "grad_norm": 0.4765625, "learning_rate": 0.00019583407729282498, "loss": 0.8796, "step": 1955 }, { "epoch": 0.09267945984363894, "grad_norm": 0.3984375, "learning_rate": 0.00019582982249852625, "loss": 0.5609, "step": 1956 }, { "epoch": 0.09272684198057332, "grad_norm": 0.404296875, "learning_rate": 0.0001958255655788242, "loss": 0.6923, "step": 1957 }, { "epoch": 0.0927742241175077, "grad_norm": 0.43359375, "learning_rate": 0.00019582130653381314, "loss": 0.2134, "step": 1958 }, { "epoch": 0.09282160625444208, "grad_norm": 0.671875, "learning_rate": 0.00019581704536358755, "loss": 1.4877, "step": 1959 }, { "epoch": 0.09286898839137644, "grad_norm": 0.5625, "learning_rate": 0.000195812782068242, "loss": 0.8046, "step": 1960 }, { "epoch": 0.09291637052831082, "grad_norm": 0.52734375, "learning_rate": 0.00019580851664787098, "loss": 1.401, "step": 1961 }, { "epoch": 0.0929637526652452, "grad_norm": 0.1533203125, "learning_rate": 0.0001958042491025691, "loss": 0.017, "step": 1962 }, { "epoch": 0.09301113480217958, "grad_norm": 0.53515625, "learning_rate": 0.00019579997943243102, "loss": 0.961, "step": 1963 }, { "epoch": 0.09305851693911395, "grad_norm": 0.5859375, "learning_rate": 0.00019579570763755144, "loss": 0.157, "step": 1964 }, { "epoch": 0.09310589907604833, "grad_norm": 0.37890625, "learning_rate": 0.0001957914337180251, "loss": 0.1928, "step": 1965 }, { "epoch": 0.0931532812129827, "grad_norm": 0.55859375, "learning_rate": 0.00019578715767394682, "loss": 0.2222, "step": 1966 }, { "epoch": 0.09320066334991708, "grad_norm": 0.30078125, "learning_rate": 0.00019578287950541137, "loss": 0.2013, "step": 1967 }, { "epoch": 0.09324804548685146, "grad_norm": 0.4765625, "learning_rate": 0.00019577859921251372, "loss": 0.1711, "step": 1968 }, { "epoch": 0.09329542762378583, "grad_norm": 0.5, "learning_rate": 0.00019577431679534876, "loss": 1.3195, "step": 1969 }, { "epoch": 0.09334280976072021, "grad_norm": 0.625, "learning_rate": 0.00019577003225401142, "loss": 0.9525, "step": 1970 }, { "epoch": 0.09339019189765459, "grad_norm": 0.5390625, "learning_rate": 0.00019576574558859678, "loss": 0.8443, "step": 1971 }, { "epoch": 0.09343757403458897, "grad_norm": 0.56640625, "learning_rate": 0.00019576145679919992, "loss": 0.8663, "step": 1972 }, { "epoch": 0.09348495617152333, "grad_norm": 0.62109375, "learning_rate": 0.00019575716588591597, "loss": 0.1549, "step": 1973 }, { "epoch": 0.09353233830845771, "grad_norm": 0.490234375, "learning_rate": 0.00019575287284884005, "loss": 0.211, "step": 1974 }, { "epoch": 0.09357972044539209, "grad_norm": 0.494140625, "learning_rate": 0.00019574857768806742, "loss": 0.5792, "step": 1975 }, { "epoch": 0.09362710258232647, "grad_norm": 0.53125, "learning_rate": 0.0001957442804036933, "loss": 1.4567, "step": 1976 }, { "epoch": 0.09367448471926083, "grad_norm": 0.125, "learning_rate": 0.00019573998099581304, "loss": 0.012, "step": 1977 }, { "epoch": 0.09372186685619521, "grad_norm": 0.404296875, "learning_rate": 0.00019573567946452197, "loss": 0.8247, "step": 1978 }, { "epoch": 0.09376924899312959, "grad_norm": 0.18359375, "learning_rate": 0.0001957313758099155, "loss": 0.0189, "step": 1979 }, { "epoch": 0.09381663113006397, "grad_norm": 0.58203125, "learning_rate": 0.00019572707003208913, "loss": 1.1493, "step": 1980 }, { "epoch": 0.09386401326699834, "grad_norm": 0.58984375, "learning_rate": 0.00019572276213113824, "loss": 1.4851, "step": 1981 }, { "epoch": 0.09391139540393272, "grad_norm": 0.58203125, "learning_rate": 0.00019571845210715849, "loss": 1.2883, "step": 1982 }, { "epoch": 0.0939587775408671, "grad_norm": 0.5859375, "learning_rate": 0.00019571413996024542, "loss": 1.0645, "step": 1983 }, { "epoch": 0.09400615967780147, "grad_norm": 0.71484375, "learning_rate": 0.00019570982569049467, "loss": 0.0704, "step": 1984 }, { "epoch": 0.09405354181473584, "grad_norm": 0.5078125, "learning_rate": 0.00019570550929800192, "loss": 0.9932, "step": 1985 }, { "epoch": 0.09410092395167022, "grad_norm": 0.57421875, "learning_rate": 0.00019570119078286293, "loss": 0.9952, "step": 1986 }, { "epoch": 0.0941483060886046, "grad_norm": 0.34765625, "learning_rate": 0.00019569687014517348, "loss": 0.2108, "step": 1987 }, { "epoch": 0.09419568822553898, "grad_norm": 0.46875, "learning_rate": 0.0001956925473850294, "loss": 0.8497, "step": 1988 }, { "epoch": 0.09424307036247334, "grad_norm": 0.59375, "learning_rate": 0.00019568822250252648, "loss": 1.1613, "step": 1989 }, { "epoch": 0.09429045249940772, "grad_norm": 0.498046875, "learning_rate": 0.0001956838954977607, "loss": 0.5845, "step": 1990 }, { "epoch": 0.0943378346363421, "grad_norm": 0.6640625, "learning_rate": 0.0001956795663708281, "loss": 1.0806, "step": 1991 }, { "epoch": 0.09438521677327648, "grad_norm": 0.498046875, "learning_rate": 0.0001956752351218246, "loss": 0.8906, "step": 1992 }, { "epoch": 0.09443259891021084, "grad_norm": 0.609375, "learning_rate": 0.00019567090175084633, "loss": 1.265, "step": 1993 }, { "epoch": 0.09447998104714522, "grad_norm": 0.7890625, "learning_rate": 0.0001956665662579893, "loss": 0.295, "step": 1994 }, { "epoch": 0.0945273631840796, "grad_norm": 0.6640625, "learning_rate": 0.0001956622286433498, "loss": 0.0936, "step": 1995 }, { "epoch": 0.09457474532101398, "grad_norm": 0.345703125, "learning_rate": 0.0001956578889070239, "loss": 0.4778, "step": 1996 }, { "epoch": 0.09462212745794836, "grad_norm": 0.55859375, "learning_rate": 0.00019565354704910792, "loss": 1.3791, "step": 1997 }, { "epoch": 0.09466950959488273, "grad_norm": 0.55078125, "learning_rate": 0.00019564920306969818, "loss": 1.0183, "step": 1998 }, { "epoch": 0.0947168917318171, "grad_norm": 1.2265625, "learning_rate": 0.00019564485696889098, "loss": 1.125, "step": 1999 }, { "epoch": 0.09476427386875148, "grad_norm": 0.59375, "learning_rate": 0.00019564050874678276, "loss": 0.881, "step": 2000 }, { "epoch": 0.09481165600568586, "grad_norm": 0.07861328125, "learning_rate": 0.0001956361584034699, "loss": 0.0042, "step": 2001 }, { "epoch": 0.09485903814262023, "grad_norm": 0.55078125, "learning_rate": 0.0001956318059390489, "loss": 0.9589, "step": 2002 }, { "epoch": 0.0949064202795546, "grad_norm": 0.53125, "learning_rate": 0.00019562745135361635, "loss": 0.8589, "step": 2003 }, { "epoch": 0.09495380241648899, "grad_norm": 0.427734375, "learning_rate": 0.00019562309464726875, "loss": 0.8546, "step": 2004 }, { "epoch": 0.09500118455342337, "grad_norm": 0.59765625, "learning_rate": 0.0001956187358201028, "loss": 1.1615, "step": 2005 }, { "epoch": 0.09504856669035773, "grad_norm": 0.76171875, "learning_rate": 0.0001956143748722151, "loss": 1.1912, "step": 2006 }, { "epoch": 0.09509594882729211, "grad_norm": 0.56640625, "learning_rate": 0.0001956100118037024, "loss": 1.4969, "step": 2007 }, { "epoch": 0.09514333096422649, "grad_norm": 0.5, "learning_rate": 0.0001956056466146615, "loss": 1.0647, "step": 2008 }, { "epoch": 0.09519071310116087, "grad_norm": 0.6328125, "learning_rate": 0.00019560127930518922, "loss": 0.7003, "step": 2009 }, { "epoch": 0.09523809523809523, "grad_norm": 0.7109375, "learning_rate": 0.00019559690987538234, "loss": 0.4448, "step": 2010 }, { "epoch": 0.09528547737502961, "grad_norm": 0.5390625, "learning_rate": 0.00019559253832533786, "loss": 1.2646, "step": 2011 }, { "epoch": 0.09533285951196399, "grad_norm": 0.546875, "learning_rate": 0.0001955881646551527, "loss": 1.2954, "step": 2012 }, { "epoch": 0.09538024164889837, "grad_norm": 0.5703125, "learning_rate": 0.00019558378886492387, "loss": 0.9051, "step": 2013 }, { "epoch": 0.09542762378583274, "grad_norm": 0.5078125, "learning_rate": 0.0001955794109547484, "loss": 0.9611, "step": 2014 }, { "epoch": 0.09547500592276711, "grad_norm": 0.68359375, "learning_rate": 0.0001955750309247234, "loss": 1.608, "step": 2015 }, { "epoch": 0.0955223880597015, "grad_norm": 0.0086669921875, "learning_rate": 0.000195570648774946, "loss": 0.0008, "step": 2016 }, { "epoch": 0.09556977019663587, "grad_norm": 0.67578125, "learning_rate": 0.00019556626450551343, "loss": 1.1494, "step": 2017 }, { "epoch": 0.09561715233357024, "grad_norm": 0.470703125, "learning_rate": 0.0001955618781165229, "loss": 1.3059, "step": 2018 }, { "epoch": 0.09566453447050462, "grad_norm": 0.470703125, "learning_rate": 0.0001955574896080717, "loss": 1.0875, "step": 2019 }, { "epoch": 0.095711916607439, "grad_norm": 0.5234375, "learning_rate": 0.00019555309898025718, "loss": 0.706, "step": 2020 }, { "epoch": 0.09575929874437338, "grad_norm": 0.61328125, "learning_rate": 0.00019554870623317668, "loss": 0.9968, "step": 2021 }, { "epoch": 0.09580668088130774, "grad_norm": 0.451171875, "learning_rate": 0.00019554431136692765, "loss": 0.8222, "step": 2022 }, { "epoch": 0.09585406301824212, "grad_norm": 0.53515625, "learning_rate": 0.0001955399143816076, "loss": 1.1134, "step": 2023 }, { "epoch": 0.0959014451551765, "grad_norm": 0.64453125, "learning_rate": 0.00019553551527731397, "loss": 0.9769, "step": 2024 }, { "epoch": 0.09594882729211088, "grad_norm": 0.341796875, "learning_rate": 0.0001955311140541444, "loss": 0.0852, "step": 2025 }, { "epoch": 0.09599620942904526, "grad_norm": 0.474609375, "learning_rate": 0.00019552671071219644, "loss": 1.1401, "step": 2026 }, { "epoch": 0.09604359156597962, "grad_norm": 0.453125, "learning_rate": 0.00019552230525156784, "loss": 0.9021, "step": 2027 }, { "epoch": 0.096090973702914, "grad_norm": 0.4765625, "learning_rate": 0.0001955178976723562, "loss": 0.5925, "step": 2028 }, { "epoch": 0.09613835583984838, "grad_norm": 0.91015625, "learning_rate": 0.00019551348797465935, "loss": 1.123, "step": 2029 }, { "epoch": 0.09618573797678276, "grad_norm": 0.66015625, "learning_rate": 0.00019550907615857507, "loss": 0.0715, "step": 2030 }, { "epoch": 0.09623312011371712, "grad_norm": 0.515625, "learning_rate": 0.00019550466222420125, "loss": 1.0904, "step": 2031 }, { "epoch": 0.0962805022506515, "grad_norm": 0.61328125, "learning_rate": 0.0001955002461716357, "loss": 1.1416, "step": 2032 }, { "epoch": 0.09632788438758588, "grad_norm": 0.486328125, "learning_rate": 0.00019549582800097644, "loss": 1.2338, "step": 2033 }, { "epoch": 0.09637526652452026, "grad_norm": 0.4375, "learning_rate": 0.00019549140771232143, "loss": 0.8136, "step": 2034 }, { "epoch": 0.09642264866145463, "grad_norm": 0.51171875, "learning_rate": 0.0001954869853057687, "loss": 0.857, "step": 2035 }, { "epoch": 0.096470030798389, "grad_norm": 1.8828125, "learning_rate": 0.00019548256078141636, "loss": 1.0993, "step": 2036 }, { "epoch": 0.09651741293532339, "grad_norm": 0.76953125, "learning_rate": 0.0001954781341393625, "loss": 0.6322, "step": 2037 }, { "epoch": 0.09656479507225776, "grad_norm": 0.5625, "learning_rate": 0.00019547370537970532, "loss": 0.1113, "step": 2038 }, { "epoch": 0.09661217720919213, "grad_norm": 0.62890625, "learning_rate": 0.0001954692745025431, "loss": 0.9889, "step": 2039 }, { "epoch": 0.09665955934612651, "grad_norm": 0.6015625, "learning_rate": 0.00019546484150797402, "loss": 1.0365, "step": 2040 }, { "epoch": 0.09670694148306089, "grad_norm": 0.462890625, "learning_rate": 0.00019546040639609644, "loss": 1.5881, "step": 2041 }, { "epoch": 0.09675432361999527, "grad_norm": 0.546875, "learning_rate": 0.00019545596916700872, "loss": 1.3077, "step": 2042 }, { "epoch": 0.09680170575692963, "grad_norm": 0.62890625, "learning_rate": 0.00019545152982080932, "loss": 1.132, "step": 2043 }, { "epoch": 0.09684908789386401, "grad_norm": 0.6015625, "learning_rate": 0.00019544708835759662, "loss": 1.2448, "step": 2044 }, { "epoch": 0.09689647003079839, "grad_norm": 0.59375, "learning_rate": 0.0001954426447774692, "loss": 1.0699, "step": 2045 }, { "epoch": 0.09694385216773277, "grad_norm": 0.78515625, "learning_rate": 0.00019543819908052554, "loss": 0.4107, "step": 2046 }, { "epoch": 0.09699123430466713, "grad_norm": 0.083984375, "learning_rate": 0.0001954337512668643, "loss": 0.0083, "step": 2047 }, { "epoch": 0.09703861644160151, "grad_norm": 0.7578125, "learning_rate": 0.00019542930133658408, "loss": 0.2417, "step": 2048 }, { "epoch": 0.09708599857853589, "grad_norm": 0.5859375, "learning_rate": 0.00019542484928978363, "loss": 1.1712, "step": 2049 }, { "epoch": 0.09713338071547027, "grad_norm": 0.486328125, "learning_rate": 0.00019542039512656167, "loss": 1.1497, "step": 2050 }, { "epoch": 0.09718076285240464, "grad_norm": 0.3984375, "learning_rate": 0.00019541593884701697, "loss": 0.7131, "step": 2051 }, { "epoch": 0.09722814498933902, "grad_norm": 0.58203125, "learning_rate": 0.0001954114804512484, "loss": 0.7727, "step": 2052 }, { "epoch": 0.0972755271262734, "grad_norm": 0.56640625, "learning_rate": 0.0001954070199393548, "loss": 0.9297, "step": 2053 }, { "epoch": 0.09732290926320777, "grad_norm": 0.56640625, "learning_rate": 0.00019540255731143513, "loss": 0.1139, "step": 2054 }, { "epoch": 0.09737029140014215, "grad_norm": 0.0693359375, "learning_rate": 0.00019539809256758836, "loss": 0.0037, "step": 2055 }, { "epoch": 0.09741767353707652, "grad_norm": 0.294921875, "learning_rate": 0.00019539362570791352, "loss": 0.208, "step": 2056 }, { "epoch": 0.0974650556740109, "grad_norm": 0.62890625, "learning_rate": 0.00019538915673250964, "loss": 1.1762, "step": 2057 }, { "epoch": 0.09751243781094528, "grad_norm": 0.52734375, "learning_rate": 0.00019538468564147588, "loss": 0.9383, "step": 2058 }, { "epoch": 0.09755981994787966, "grad_norm": 0.4921875, "learning_rate": 0.0001953802124349114, "loss": 1.2892, "step": 2059 }, { "epoch": 0.09760720208481402, "grad_norm": 1.0078125, "learning_rate": 0.0001953757371129154, "loss": 0.3101, "step": 2060 }, { "epoch": 0.0976545842217484, "grad_norm": 0.416015625, "learning_rate": 0.00019537125967558714, "loss": 0.0716, "step": 2061 }, { "epoch": 0.09770196635868278, "grad_norm": 0.64453125, "learning_rate": 0.00019536678012302592, "loss": 1.0844, "step": 2062 }, { "epoch": 0.09774934849561716, "grad_norm": 0.6796875, "learning_rate": 0.0001953622984553311, "loss": 0.9733, "step": 2063 }, { "epoch": 0.09779673063255152, "grad_norm": 0.39453125, "learning_rate": 0.00019535781467260206, "loss": 0.0277, "step": 2064 }, { "epoch": 0.0978441127694859, "grad_norm": 0.66796875, "learning_rate": 0.0001953533287749383, "loss": 0.0644, "step": 2065 }, { "epoch": 0.09789149490642028, "grad_norm": 0.59375, "learning_rate": 0.00019534884076243922, "loss": 0.8489, "step": 2066 }, { "epoch": 0.09793887704335466, "grad_norm": 0.55859375, "learning_rate": 0.00019534435063520446, "loss": 0.8202, "step": 2067 }, { "epoch": 0.09798625918028903, "grad_norm": 0.75, "learning_rate": 0.00019533985839333354, "loss": 0.9201, "step": 2068 }, { "epoch": 0.0980336413172234, "grad_norm": 0.25390625, "learning_rate": 0.0001953353640369261, "loss": 0.0525, "step": 2069 }, { "epoch": 0.09808102345415778, "grad_norm": 0.43359375, "learning_rate": 0.00019533086756608187, "loss": 0.9008, "step": 2070 }, { "epoch": 0.09812840559109216, "grad_norm": 0.62109375, "learning_rate": 0.0001953263689809005, "loss": 1.6623, "step": 2071 }, { "epoch": 0.09817578772802653, "grad_norm": 0.5546875, "learning_rate": 0.00019532186828148183, "loss": 1.2798, "step": 2072 }, { "epoch": 0.09822316986496091, "grad_norm": 0.48046875, "learning_rate": 0.00019531736546792562, "loss": 0.8696, "step": 2073 }, { "epoch": 0.09827055200189529, "grad_norm": 0.65234375, "learning_rate": 0.0001953128605403318, "loss": 1.2022, "step": 2074 }, { "epoch": 0.09831793413882967, "grad_norm": 0.47265625, "learning_rate": 0.00019530835349880027, "loss": 0.8575, "step": 2075 }, { "epoch": 0.09836531627576403, "grad_norm": 0.55859375, "learning_rate": 0.00019530384434343097, "loss": 1.1681, "step": 2076 }, { "epoch": 0.09841269841269841, "grad_norm": 0.765625, "learning_rate": 0.0001952993330743239, "loss": 1.0974, "step": 2077 }, { "epoch": 0.09846008054963279, "grad_norm": 0.451171875, "learning_rate": 0.00019529481969157912, "loss": 0.135, "step": 2078 }, { "epoch": 0.09850746268656717, "grad_norm": 0.58203125, "learning_rate": 0.00019529030419529675, "loss": 1.3588, "step": 2079 }, { "epoch": 0.09855484482350153, "grad_norm": 0.482421875, "learning_rate": 0.00019528578658557696, "loss": 1.0843, "step": 2080 }, { "epoch": 0.09860222696043591, "grad_norm": 0.5703125, "learning_rate": 0.00019528126686251988, "loss": 0.9951, "step": 2081 }, { "epoch": 0.09864960909737029, "grad_norm": 1.0078125, "learning_rate": 0.0001952767450262258, "loss": 0.3888, "step": 2082 }, { "epoch": 0.09869699123430467, "grad_norm": 0.5390625, "learning_rate": 0.00019527222107679502, "loss": 1.2278, "step": 2083 }, { "epoch": 0.09874437337123905, "grad_norm": 0.10546875, "learning_rate": 0.00019526769501432785, "loss": 0.0112, "step": 2084 }, { "epoch": 0.09879175550817342, "grad_norm": 0.8046875, "learning_rate": 0.00019526316683892464, "loss": 0.9511, "step": 2085 }, { "epoch": 0.0988391376451078, "grad_norm": 0.5, "learning_rate": 0.00019525863655068593, "loss": 1.0531, "step": 2086 }, { "epoch": 0.09888651978204217, "grad_norm": 0.51953125, "learning_rate": 0.00019525410414971206, "loss": 1.47, "step": 2087 }, { "epoch": 0.09893390191897655, "grad_norm": 0.37109375, "learning_rate": 0.00019524956963610365, "loss": 0.2093, "step": 2088 }, { "epoch": 0.09898128405591092, "grad_norm": 1.2734375, "learning_rate": 0.00019524503300996125, "loss": 0.4769, "step": 2089 }, { "epoch": 0.0990286661928453, "grad_norm": 0.494140625, "learning_rate": 0.00019524049427138544, "loss": 1.1945, "step": 2090 }, { "epoch": 0.09907604832977968, "grad_norm": 0.458984375, "learning_rate": 0.00019523595342047694, "loss": 0.7347, "step": 2091 }, { "epoch": 0.09912343046671405, "grad_norm": 0.1748046875, "learning_rate": 0.00019523141045733642, "loss": 0.0159, "step": 2092 }, { "epoch": 0.09917081260364842, "grad_norm": 0.58984375, "learning_rate": 0.00019522686538206465, "loss": 1.2201, "step": 2093 }, { "epoch": 0.0992181947405828, "grad_norm": 0.515625, "learning_rate": 0.00019522231819476244, "loss": 0.8225, "step": 2094 }, { "epoch": 0.09926557687751718, "grad_norm": 0.75390625, "learning_rate": 0.00019521776889553066, "loss": 0.9317, "step": 2095 }, { "epoch": 0.09931295901445156, "grad_norm": 0.306640625, "learning_rate": 0.00019521321748447015, "loss": 0.1652, "step": 2096 }, { "epoch": 0.09936034115138592, "grad_norm": 0.91015625, "learning_rate": 0.0001952086639616819, "loss": 0.2254, "step": 2097 }, { "epoch": 0.0994077232883203, "grad_norm": 0.1318359375, "learning_rate": 0.00019520410832726693, "loss": 0.0149, "step": 2098 }, { "epoch": 0.09945510542525468, "grad_norm": 0.53125, "learning_rate": 0.0001951995505813262, "loss": 1.0392, "step": 2099 }, { "epoch": 0.09950248756218906, "grad_norm": 0.44140625, "learning_rate": 0.00019519499072396087, "loss": 0.2113, "step": 2100 }, { "epoch": 0.09954986969912342, "grad_norm": 0.443359375, "learning_rate": 0.00019519042875527202, "loss": 0.7009, "step": 2101 }, { "epoch": 0.0995972518360578, "grad_norm": 0.64453125, "learning_rate": 0.00019518586467536086, "loss": 1.1118, "step": 2102 }, { "epoch": 0.09964463397299218, "grad_norm": 0.4765625, "learning_rate": 0.0001951812984843286, "loss": 0.8469, "step": 2103 }, { "epoch": 0.09969201610992656, "grad_norm": 0.275390625, "learning_rate": 0.00019517673018227654, "loss": 0.0265, "step": 2104 }, { "epoch": 0.09973939824686093, "grad_norm": 0.53515625, "learning_rate": 0.000195172159769306, "loss": 0.7084, "step": 2105 }, { "epoch": 0.0997867803837953, "grad_norm": 0.67578125, "learning_rate": 0.0001951675872455183, "loss": 1.1953, "step": 2106 }, { "epoch": 0.09983416252072969, "grad_norm": 0.95703125, "learning_rate": 0.0001951630126110149, "loss": 1.233, "step": 2107 }, { "epoch": 0.09988154465766406, "grad_norm": 0.451171875, "learning_rate": 0.0001951584358658972, "loss": 1.1104, "step": 2108 }, { "epoch": 0.09992892679459843, "grad_norm": 0.00634765625, "learning_rate": 0.0001951538570102668, "loss": 0.0004, "step": 2109 }, { "epoch": 0.09997630893153281, "grad_norm": 0.427734375, "learning_rate": 0.0001951492760442252, "loss": 0.917, "step": 2110 }, { "epoch": 0.10002369106846719, "grad_norm": 1.0546875, "learning_rate": 0.000195144692967874, "loss": 0.4697, "step": 2111 }, { "epoch": 0.10007107320540157, "grad_norm": 0.6015625, "learning_rate": 0.00019514010778131483, "loss": 1.0373, "step": 2112 }, { "epoch": 0.10011845534233593, "grad_norm": 0.62109375, "learning_rate": 0.00019513552048464942, "loss": 0.9711, "step": 2113 }, { "epoch": 0.10016583747927031, "grad_norm": 0.625, "learning_rate": 0.0001951309310779795, "loss": 1.4549, "step": 2114 }, { "epoch": 0.10021321961620469, "grad_norm": 0.0244140625, "learning_rate": 0.00019512633956140688, "loss": 0.002, "step": 2115 }, { "epoch": 0.10026060175313907, "grad_norm": 0.37890625, "learning_rate": 0.00019512174593503336, "loss": 0.0166, "step": 2116 }, { "epoch": 0.10030798389007345, "grad_norm": 0.55078125, "learning_rate": 0.00019511715019896082, "loss": 1.2642, "step": 2117 }, { "epoch": 0.10035536602700781, "grad_norm": 0.38671875, "learning_rate": 0.0001951125523532912, "loss": 1.1373, "step": 2118 }, { "epoch": 0.1004027481639422, "grad_norm": 0.66015625, "learning_rate": 0.0001951079523981265, "loss": 0.2338, "step": 2119 }, { "epoch": 0.10045013030087657, "grad_norm": 0.47265625, "learning_rate": 0.00019510335033356873, "loss": 0.5619, "step": 2120 }, { "epoch": 0.10049751243781095, "grad_norm": 0.1728515625, "learning_rate": 0.00019509874615971992, "loss": 0.018, "step": 2121 }, { "epoch": 0.10054489457474532, "grad_norm": 0.416015625, "learning_rate": 0.00019509413987668222, "loss": 0.939, "step": 2122 }, { "epoch": 0.1005922767116797, "grad_norm": 0.63671875, "learning_rate": 0.0001950895314845578, "loss": 1.0705, "step": 2123 }, { "epoch": 0.10063965884861407, "grad_norm": 0.6484375, "learning_rate": 0.00019508492098344884, "loss": 1.0411, "step": 2124 }, { "epoch": 0.10068704098554845, "grad_norm": 0.6484375, "learning_rate": 0.00019508030837345764, "loss": 1.0597, "step": 2125 }, { "epoch": 0.10073442312248282, "grad_norm": 0.36328125, "learning_rate": 0.00019507569365468644, "loss": 0.0604, "step": 2126 }, { "epoch": 0.1007818052594172, "grad_norm": 0.51953125, "learning_rate": 0.00019507107682723764, "loss": 1.1313, "step": 2127 }, { "epoch": 0.10082918739635158, "grad_norm": 0.07080078125, "learning_rate": 0.00019506645789121364, "loss": 0.0049, "step": 2128 }, { "epoch": 0.10087656953328596, "grad_norm": 0.51953125, "learning_rate": 0.00019506183684671685, "loss": 0.6684, "step": 2129 }, { "epoch": 0.10092395167022032, "grad_norm": 0.4140625, "learning_rate": 0.00019505721369384975, "loss": 0.1914, "step": 2130 }, { "epoch": 0.1009713338071547, "grad_norm": 0.306640625, "learning_rate": 0.0001950525884327149, "loss": 0.2069, "step": 2131 }, { "epoch": 0.10101871594408908, "grad_norm": 0.392578125, "learning_rate": 0.00019504796106341492, "loss": 0.9878, "step": 2132 }, { "epoch": 0.10106609808102346, "grad_norm": 0.53125, "learning_rate": 0.0001950433315860524, "loss": 1.1931, "step": 2133 }, { "epoch": 0.10111348021795782, "grad_norm": 0.498046875, "learning_rate": 0.00019503870000073001, "loss": 1.2078, "step": 2134 }, { "epoch": 0.1011608623548922, "grad_norm": 0.69921875, "learning_rate": 0.00019503406630755048, "loss": 0.0691, "step": 2135 }, { "epoch": 0.10120824449182658, "grad_norm": 0.6015625, "learning_rate": 0.0001950294305066166, "loss": 1.4365, "step": 2136 }, { "epoch": 0.10125562662876096, "grad_norm": 0.7109375, "learning_rate": 0.00019502479259803117, "loss": 1.1509, "step": 2137 }, { "epoch": 0.10130300876569533, "grad_norm": 0.453125, "learning_rate": 0.00019502015258189703, "loss": 0.3573, "step": 2138 }, { "epoch": 0.1013503909026297, "grad_norm": 0.5859375, "learning_rate": 0.00019501551045831715, "loss": 1.3825, "step": 2139 }, { "epoch": 0.10139777303956408, "grad_norm": 0.494140625, "learning_rate": 0.00019501086622739444, "loss": 0.8767, "step": 2140 }, { "epoch": 0.10144515517649846, "grad_norm": 0.45703125, "learning_rate": 0.00019500621988923191, "loss": 0.9961, "step": 2141 }, { "epoch": 0.10149253731343283, "grad_norm": 0.53515625, "learning_rate": 0.0001950015714439326, "loss": 0.7829, "step": 2142 }, { "epoch": 0.10153991945036721, "grad_norm": 0.578125, "learning_rate": 0.0001949969208915997, "loss": 0.8525, "step": 2143 }, { "epoch": 0.10158730158730159, "grad_norm": 0.56640625, "learning_rate": 0.0001949922682323362, "loss": 1.2719, "step": 2144 }, { "epoch": 0.10163468372423597, "grad_norm": 0.44140625, "learning_rate": 0.00019498761346624542, "loss": 0.6178, "step": 2145 }, { "epoch": 0.10168206586117035, "grad_norm": 0.455078125, "learning_rate": 0.0001949829565934305, "loss": 0.7717, "step": 2146 }, { "epoch": 0.10172944799810471, "grad_norm": 0.439453125, "learning_rate": 0.00019497829761399484, "loss": 0.7072, "step": 2147 }, { "epoch": 0.10177683013503909, "grad_norm": 0.5234375, "learning_rate": 0.00019497363652804168, "loss": 0.8323, "step": 2148 }, { "epoch": 0.10182421227197347, "grad_norm": 0.65625, "learning_rate": 0.0001949689733356744, "loss": 0.1043, "step": 2149 }, { "epoch": 0.10187159440890785, "grad_norm": 0.5234375, "learning_rate": 0.00019496430803699646, "loss": 1.0638, "step": 2150 }, { "epoch": 0.10191897654584221, "grad_norm": 0.5703125, "learning_rate": 0.00019495964063211135, "loss": 0.8882, "step": 2151 }, { "epoch": 0.10196635868277659, "grad_norm": 0.2451171875, "learning_rate": 0.00019495497112112254, "loss": 0.146, "step": 2152 }, { "epoch": 0.10201374081971097, "grad_norm": 0.796875, "learning_rate": 0.0001949502995041336, "loss": 0.4443, "step": 2153 }, { "epoch": 0.10206112295664535, "grad_norm": 0.58984375, "learning_rate": 0.00019494562578124813, "loss": 0.9381, "step": 2154 }, { "epoch": 0.10210850509357972, "grad_norm": 0.640625, "learning_rate": 0.00019494094995256984, "loss": 1.013, "step": 2155 }, { "epoch": 0.1021558872305141, "grad_norm": 0.6015625, "learning_rate": 0.00019493627201820242, "loss": 0.9014, "step": 2156 }, { "epoch": 0.10220326936744847, "grad_norm": 0.453125, "learning_rate": 0.0001949315919782496, "loss": 0.9222, "step": 2157 }, { "epoch": 0.10225065150438285, "grad_norm": 0.89453125, "learning_rate": 0.00019492690983281515, "loss": 0.0349, "step": 2158 }, { "epoch": 0.10229803364131722, "grad_norm": 0.26171875, "learning_rate": 0.000194922225582003, "loss": 0.1476, "step": 2159 }, { "epoch": 0.1023454157782516, "grad_norm": 0.7578125, "learning_rate": 0.00019491753922591695, "loss": 0.1474, "step": 2160 }, { "epoch": 0.10239279791518598, "grad_norm": 0.62890625, "learning_rate": 0.00019491285076466102, "loss": 0.107, "step": 2161 }, { "epoch": 0.10244018005212036, "grad_norm": 0.06591796875, "learning_rate": 0.00019490816019833914, "loss": 0.0058, "step": 2162 }, { "epoch": 0.10248756218905472, "grad_norm": 0.4765625, "learning_rate": 0.00019490346752705536, "loss": 0.2579, "step": 2163 }, { "epoch": 0.1025349443259891, "grad_norm": 0.482421875, "learning_rate": 0.00019489877275091378, "loss": 1.0398, "step": 2164 }, { "epoch": 0.10258232646292348, "grad_norm": 0.546875, "learning_rate": 0.0001948940758700185, "loss": 1.6905, "step": 2165 }, { "epoch": 0.10262970859985786, "grad_norm": 0.330078125, "learning_rate": 0.00019488937688447365, "loss": 0.2404, "step": 2166 }, { "epoch": 0.10267709073679222, "grad_norm": 0.56640625, "learning_rate": 0.0001948846757943835, "loss": 0.8703, "step": 2167 }, { "epoch": 0.1027244728737266, "grad_norm": 0.66015625, "learning_rate": 0.00019487997259985234, "loss": 1.3456, "step": 2168 }, { "epoch": 0.10277185501066098, "grad_norm": 0.00750732421875, "learning_rate": 0.00019487526730098441, "loss": 0.0007, "step": 2169 }, { "epoch": 0.10281923714759536, "grad_norm": 0.41796875, "learning_rate": 0.00019487055989788417, "loss": 1.2684, "step": 2170 }, { "epoch": 0.10286661928452973, "grad_norm": 0.51953125, "learning_rate": 0.0001948658503906559, "loss": 0.1757, "step": 2171 }, { "epoch": 0.1029140014214641, "grad_norm": 0.412109375, "learning_rate": 0.00019486113877940415, "loss": 0.1336, "step": 2172 }, { "epoch": 0.10296138355839848, "grad_norm": 0.56640625, "learning_rate": 0.00019485642506423338, "loss": 0.6852, "step": 2173 }, { "epoch": 0.10300876569533286, "grad_norm": 0.59765625, "learning_rate": 0.00019485170924524813, "loss": 1.1825, "step": 2174 }, { "epoch": 0.10305614783226724, "grad_norm": 0.400390625, "learning_rate": 0.000194846991322553, "loss": 0.2251, "step": 2175 }, { "epoch": 0.10310352996920161, "grad_norm": 0.69140625, "learning_rate": 0.00019484227129625264, "loss": 0.3324, "step": 2176 }, { "epoch": 0.10315091210613599, "grad_norm": 0.38671875, "learning_rate": 0.00019483754916645168, "loss": 0.0443, "step": 2177 }, { "epoch": 0.10319829424307037, "grad_norm": 0.55859375, "learning_rate": 0.00019483282493325493, "loss": 0.9885, "step": 2178 }, { "epoch": 0.10324567638000474, "grad_norm": 0.03125, "learning_rate": 0.00019482809859676717, "loss": 0.0012, "step": 2179 }, { "epoch": 0.10329305851693911, "grad_norm": 0.4296875, "learning_rate": 0.00019482337015709313, "loss": 0.7013, "step": 2180 }, { "epoch": 0.10334044065387349, "grad_norm": 0.65625, "learning_rate": 0.00019481863961433776, "loss": 1.396, "step": 2181 }, { "epoch": 0.10338782279080787, "grad_norm": 0.435546875, "learning_rate": 0.00019481390696860596, "loss": 0.5346, "step": 2182 }, { "epoch": 0.10343520492774225, "grad_norm": 0.64453125, "learning_rate": 0.00019480917222000272, "loss": 1.4599, "step": 2183 }, { "epoch": 0.10348258706467661, "grad_norm": 0.494140625, "learning_rate": 0.000194804435368633, "loss": 0.0461, "step": 2184 }, { "epoch": 0.10352996920161099, "grad_norm": 0.5859375, "learning_rate": 0.0001947996964146019, "loss": 1.3128, "step": 2185 }, { "epoch": 0.10357735133854537, "grad_norm": 0.68359375, "learning_rate": 0.0001947949553580145, "loss": 1.5236, "step": 2186 }, { "epoch": 0.10362473347547975, "grad_norm": 0.462890625, "learning_rate": 0.00019479021219897594, "loss": 0.6163, "step": 2187 }, { "epoch": 0.10367211561241411, "grad_norm": 0.498046875, "learning_rate": 0.00019478546693759148, "loss": 0.9992, "step": 2188 }, { "epoch": 0.1037194977493485, "grad_norm": 0.45703125, "learning_rate": 0.00019478071957396628, "loss": 1.2316, "step": 2189 }, { "epoch": 0.10376687988628287, "grad_norm": 0.2294921875, "learning_rate": 0.0001947759701082057, "loss": 0.0197, "step": 2190 }, { "epoch": 0.10381426202321725, "grad_norm": 1.0859375, "learning_rate": 0.0001947712185404151, "loss": 0.1965, "step": 2191 }, { "epoch": 0.10386164416015162, "grad_norm": 0.4921875, "learning_rate": 0.00019476646487069977, "loss": 0.1305, "step": 2192 }, { "epoch": 0.103909026297086, "grad_norm": 0.5625, "learning_rate": 0.00019476170909916518, "loss": 0.6922, "step": 2193 }, { "epoch": 0.10395640843402038, "grad_norm": 0.486328125, "learning_rate": 0.0001947569512259168, "loss": 1.1491, "step": 2194 }, { "epoch": 0.10400379057095475, "grad_norm": 0.7578125, "learning_rate": 0.00019475219125106023, "loss": 1.0979, "step": 2195 }, { "epoch": 0.10405117270788912, "grad_norm": 0.78515625, "learning_rate": 0.00019474742917470096, "loss": 0.2263, "step": 2196 }, { "epoch": 0.1040985548448235, "grad_norm": 0.55078125, "learning_rate": 0.00019474266499694464, "loss": 0.2181, "step": 2197 }, { "epoch": 0.10414593698175788, "grad_norm": 0.546875, "learning_rate": 0.0001947378987178969, "loss": 0.9503, "step": 2198 }, { "epoch": 0.10419331911869226, "grad_norm": 0.671875, "learning_rate": 0.0001947331303376635, "loss": 1.2979, "step": 2199 }, { "epoch": 0.10424070125562662, "grad_norm": 0.64453125, "learning_rate": 0.00019472835985635017, "loss": 1.5636, "step": 2200 }, { "epoch": 0.104288083392561, "grad_norm": 0.6484375, "learning_rate": 0.0001947235872740627, "loss": 0.915, "step": 2201 }, { "epoch": 0.10433546552949538, "grad_norm": 0.83203125, "learning_rate": 0.00019471881259090697, "loss": 0.8837, "step": 2202 }, { "epoch": 0.10438284766642976, "grad_norm": 0.57421875, "learning_rate": 0.00019471403580698885, "loss": 1.2649, "step": 2203 }, { "epoch": 0.10443022980336414, "grad_norm": 0.1142578125, "learning_rate": 0.00019470925692241436, "loss": 0.0166, "step": 2204 }, { "epoch": 0.1044776119402985, "grad_norm": 0.92578125, "learning_rate": 0.0001947044759372894, "loss": 0.2473, "step": 2205 }, { "epoch": 0.10452499407723288, "grad_norm": 0.5234375, "learning_rate": 0.00019469969285172, "loss": 1.3336, "step": 2206 }, { "epoch": 0.10457237621416726, "grad_norm": 0.79296875, "learning_rate": 0.0001946949076658123, "loss": 0.089, "step": 2207 }, { "epoch": 0.10461975835110164, "grad_norm": 0.55859375, "learning_rate": 0.00019469012037967245, "loss": 0.2336, "step": 2208 }, { "epoch": 0.104667140488036, "grad_norm": 0.451171875, "learning_rate": 0.00019468533099340656, "loss": 0.5127, "step": 2209 }, { "epoch": 0.10471452262497039, "grad_norm": 0.625, "learning_rate": 0.00019468053950712086, "loss": 1.1701, "step": 2210 }, { "epoch": 0.10476190476190476, "grad_norm": 1.390625, "learning_rate": 0.00019467574592092168, "loss": 0.7627, "step": 2211 }, { "epoch": 0.10480928689883914, "grad_norm": 0.52734375, "learning_rate": 0.00019467095023491528, "loss": 1.283, "step": 2212 }, { "epoch": 0.10485666903577351, "grad_norm": 0.02294921875, "learning_rate": 0.00019466615244920807, "loss": 0.0013, "step": 2213 }, { "epoch": 0.10490405117270789, "grad_norm": 0.166015625, "learning_rate": 0.0001946613525639064, "loss": 0.0062, "step": 2214 }, { "epoch": 0.10495143330964227, "grad_norm": 0.69140625, "learning_rate": 0.00019465655057911678, "loss": 0.9722, "step": 2215 }, { "epoch": 0.10499881544657665, "grad_norm": 0.61328125, "learning_rate": 0.0001946517464949457, "loss": 1.1721, "step": 2216 }, { "epoch": 0.10504619758351101, "grad_norm": 0.435546875, "learning_rate": 0.00019464694031149968, "loss": 0.5812, "step": 2217 }, { "epoch": 0.10509357972044539, "grad_norm": 0.42578125, "learning_rate": 0.00019464213202888535, "loss": 0.8201, "step": 2218 }, { "epoch": 0.10514096185737977, "grad_norm": 0.6015625, "learning_rate": 0.0001946373216472093, "loss": 1.0211, "step": 2219 }, { "epoch": 0.10518834399431415, "grad_norm": 0.62890625, "learning_rate": 0.00019463250916657834, "loss": 1.2849, "step": 2220 }, { "epoch": 0.10523572613124851, "grad_norm": 0.82421875, "learning_rate": 0.00019462769458709905, "loss": 0.2717, "step": 2221 }, { "epoch": 0.10528310826818289, "grad_norm": 0.51171875, "learning_rate": 0.00019462287790887833, "loss": 1.056, "step": 2222 }, { "epoch": 0.10533049040511727, "grad_norm": 0.53125, "learning_rate": 0.00019461805913202293, "loss": 0.1818, "step": 2223 }, { "epoch": 0.10537787254205165, "grad_norm": 0.62890625, "learning_rate": 0.0001946132382566398, "loss": 1.3601, "step": 2224 }, { "epoch": 0.10542525467898602, "grad_norm": 0.6640625, "learning_rate": 0.0001946084152828358, "loss": 1.411, "step": 2225 }, { "epoch": 0.1054726368159204, "grad_norm": 0.00494384765625, "learning_rate": 0.00019460359021071793, "loss": 0.0003, "step": 2226 }, { "epoch": 0.10552001895285477, "grad_norm": 0.5390625, "learning_rate": 0.0001945987630403932, "loss": 0.1902, "step": 2227 }, { "epoch": 0.10556740108978915, "grad_norm": 0.59375, "learning_rate": 0.00019459393377196864, "loss": 1.4867, "step": 2228 }, { "epoch": 0.10561478322672352, "grad_norm": 0.703125, "learning_rate": 0.0001945891024055514, "loss": 0.1368, "step": 2229 }, { "epoch": 0.1056621653636579, "grad_norm": 1.0625, "learning_rate": 0.00019458426894124864, "loss": 0.4235, "step": 2230 }, { "epoch": 0.10570954750059228, "grad_norm": 0.5, "learning_rate": 0.0001945794333791675, "loss": 1.5729, "step": 2231 }, { "epoch": 0.10575692963752666, "grad_norm": 0.494140625, "learning_rate": 0.00019457459571941532, "loss": 0.5732, "step": 2232 }, { "epoch": 0.10580431177446104, "grad_norm": 0.59375, "learning_rate": 0.00019456975596209933, "loss": 1.2299, "step": 2233 }, { "epoch": 0.1058516939113954, "grad_norm": 0.48046875, "learning_rate": 0.00019456491410732688, "loss": 0.8259, "step": 2234 }, { "epoch": 0.10589907604832978, "grad_norm": 0.61328125, "learning_rate": 0.00019456007015520533, "loss": 0.0567, "step": 2235 }, { "epoch": 0.10594645818526416, "grad_norm": 0.5859375, "learning_rate": 0.00019455522410584218, "loss": 0.9271, "step": 2236 }, { "epoch": 0.10599384032219854, "grad_norm": 0.6796875, "learning_rate": 0.00019455037595934486, "loss": 1.4491, "step": 2237 }, { "epoch": 0.1060412224591329, "grad_norm": 0.302734375, "learning_rate": 0.0001945455257158209, "loss": 0.2037, "step": 2238 }, { "epoch": 0.10608860459606728, "grad_norm": 0.5546875, "learning_rate": 0.0001945406733753779, "loss": 1.2239, "step": 2239 }, { "epoch": 0.10613598673300166, "grad_norm": 0.435546875, "learning_rate": 0.0001945358189381235, "loss": 0.7899, "step": 2240 }, { "epoch": 0.10618336886993604, "grad_norm": 0.55078125, "learning_rate": 0.00019453096240416528, "loss": 0.9769, "step": 2241 }, { "epoch": 0.1062307510068704, "grad_norm": 0.68359375, "learning_rate": 0.00019452610377361103, "loss": 1.6305, "step": 2242 }, { "epoch": 0.10627813314380478, "grad_norm": 0.419921875, "learning_rate": 0.00019452124304656846, "loss": 0.6719, "step": 2243 }, { "epoch": 0.10632551528073916, "grad_norm": 0.482421875, "learning_rate": 0.00019451638022314541, "loss": 0.8991, "step": 2244 }, { "epoch": 0.10637289741767354, "grad_norm": 1.4140625, "learning_rate": 0.00019451151530344973, "loss": 1.1208, "step": 2245 }, { "epoch": 0.10642027955460791, "grad_norm": 1.0234375, "learning_rate": 0.0001945066482875893, "loss": 0.0206, "step": 2246 }, { "epoch": 0.10646766169154229, "grad_norm": 0.0026397705078125, "learning_rate": 0.00019450177917567206, "loss": 0.0002, "step": 2247 }, { "epoch": 0.10651504382847667, "grad_norm": 0.58984375, "learning_rate": 0.00019449690796780606, "loss": 0.8428, "step": 2248 }, { "epoch": 0.10656242596541105, "grad_norm": 0.65625, "learning_rate": 0.00019449203466409928, "loss": 1.1472, "step": 2249 }, { "epoch": 0.10660980810234541, "grad_norm": 0.4140625, "learning_rate": 0.00019448715926465978, "loss": 0.9232, "step": 2250 }, { "epoch": 0.10665719023927979, "grad_norm": 2.34375, "learning_rate": 0.00019448228176959577, "loss": 0.5728, "step": 2251 }, { "epoch": 0.10670457237621417, "grad_norm": 0.64453125, "learning_rate": 0.0001944774021790154, "loss": 0.9843, "step": 2252 }, { "epoch": 0.10675195451314855, "grad_norm": 0.63671875, "learning_rate": 0.00019447252049302686, "loss": 0.2289, "step": 2253 }, { "epoch": 0.10679933665008291, "grad_norm": 0.58203125, "learning_rate": 0.00019446763671173843, "loss": 1.2277, "step": 2254 }, { "epoch": 0.10684671878701729, "grad_norm": 0.5625, "learning_rate": 0.00019446275083525848, "loss": 0.5088, "step": 2255 }, { "epoch": 0.10689410092395167, "grad_norm": 0.55078125, "learning_rate": 0.00019445786286369527, "loss": 0.1824, "step": 2256 }, { "epoch": 0.10694148306088605, "grad_norm": 0.63671875, "learning_rate": 0.0001944529727971573, "loss": 0.7188, "step": 2257 }, { "epoch": 0.10698886519782042, "grad_norm": 0.5078125, "learning_rate": 0.00019444808063575302, "loss": 1.2987, "step": 2258 }, { "epoch": 0.1070362473347548, "grad_norm": 0.51953125, "learning_rate": 0.00019444318637959091, "loss": 0.9129, "step": 2259 }, { "epoch": 0.10708362947168917, "grad_norm": 0.50390625, "learning_rate": 0.00019443829002877951, "loss": 0.5481, "step": 2260 }, { "epoch": 0.10713101160862355, "grad_norm": 0.490234375, "learning_rate": 0.00019443339158342745, "loss": 0.6164, "step": 2261 }, { "epoch": 0.10717839374555792, "grad_norm": 0.341796875, "learning_rate": 0.00019442849104364334, "loss": 0.0807, "step": 2262 }, { "epoch": 0.1072257758824923, "grad_norm": 0.51171875, "learning_rate": 0.00019442358840953588, "loss": 1.028, "step": 2263 }, { "epoch": 0.10727315801942668, "grad_norm": 0.6640625, "learning_rate": 0.0001944186836812138, "loss": 1.1778, "step": 2264 }, { "epoch": 0.10732054015636106, "grad_norm": 0.73828125, "learning_rate": 0.00019441377685878587, "loss": 1.3525, "step": 2265 }, { "epoch": 0.10736792229329543, "grad_norm": 0.7265625, "learning_rate": 0.00019440886794236097, "loss": 0.7213, "step": 2266 }, { "epoch": 0.1074153044302298, "grad_norm": 0.45703125, "learning_rate": 0.0001944039569320479, "loss": 0.8769, "step": 2267 }, { "epoch": 0.10746268656716418, "grad_norm": 0.45703125, "learning_rate": 0.00019439904382795564, "loss": 0.8678, "step": 2268 }, { "epoch": 0.10751006870409856, "grad_norm": 0.2265625, "learning_rate": 0.00019439412863019314, "loss": 0.1564, "step": 2269 }, { "epoch": 0.10755745084103294, "grad_norm": 0.1552734375, "learning_rate": 0.0001943892113388694, "loss": 0.0198, "step": 2270 }, { "epoch": 0.1076048329779673, "grad_norm": 0.34765625, "learning_rate": 0.00019438429195409352, "loss": 0.0609, "step": 2271 }, { "epoch": 0.10765221511490168, "grad_norm": 0.4375, "learning_rate": 0.00019437937047597455, "loss": 0.508, "step": 2272 }, { "epoch": 0.10769959725183606, "grad_norm": 0.94140625, "learning_rate": 0.0001943744469046217, "loss": 0.3306, "step": 2273 }, { "epoch": 0.10774697938877044, "grad_norm": 0.296875, "learning_rate": 0.0001943695212401441, "loss": 0.0205, "step": 2274 }, { "epoch": 0.1077943615257048, "grad_norm": 0.5, "learning_rate": 0.00019436459348265106, "loss": 0.569, "step": 2275 }, { "epoch": 0.10784174366263918, "grad_norm": 0.38671875, "learning_rate": 0.0001943596636322518, "loss": 0.0908, "step": 2276 }, { "epoch": 0.10788912579957356, "grad_norm": 0.734375, "learning_rate": 0.00019435473168905577, "loss": 1.1299, "step": 2277 }, { "epoch": 0.10793650793650794, "grad_norm": 0.234375, "learning_rate": 0.0001943497976531723, "loss": 0.0519, "step": 2278 }, { "epoch": 0.1079838900734423, "grad_norm": 0.63671875, "learning_rate": 0.00019434486152471075, "loss": 0.9495, "step": 2279 }, { "epoch": 0.10803127221037669, "grad_norm": 0.388671875, "learning_rate": 0.00019433992330378073, "loss": 0.0155, "step": 2280 }, { "epoch": 0.10807865434731107, "grad_norm": 0.5625, "learning_rate": 0.00019433498299049168, "loss": 0.94, "step": 2281 }, { "epoch": 0.10812603648424544, "grad_norm": 0.5234375, "learning_rate": 0.00019433004058495317, "loss": 0.8623, "step": 2282 }, { "epoch": 0.10817341862117981, "grad_norm": 0.66796875, "learning_rate": 0.00019432509608727485, "loss": 1.4621, "step": 2283 }, { "epoch": 0.10822080075811419, "grad_norm": 0.045654296875, "learning_rate": 0.00019432014949756637, "loss": 0.0021, "step": 2284 }, { "epoch": 0.10826818289504857, "grad_norm": 0.69140625, "learning_rate": 0.00019431520081593742, "loss": 1.0244, "step": 2285 }, { "epoch": 0.10831556503198295, "grad_norm": 0.59765625, "learning_rate": 0.0001943102500424978, "loss": 1.214, "step": 2286 }, { "epoch": 0.10836294716891731, "grad_norm": 0.65625, "learning_rate": 0.00019430529717735727, "loss": 1.6764, "step": 2287 }, { "epoch": 0.10841032930585169, "grad_norm": 0.62109375, "learning_rate": 0.00019430034222062573, "loss": 0.8461, "step": 2288 }, { "epoch": 0.10845771144278607, "grad_norm": 0.62109375, "learning_rate": 0.00019429538517241302, "loss": 1.0771, "step": 2289 }, { "epoch": 0.10850509357972045, "grad_norm": 0.21875, "learning_rate": 0.0001942904260328291, "loss": 0.1528, "step": 2290 }, { "epoch": 0.10855247571665481, "grad_norm": 0.6015625, "learning_rate": 0.00019428546480198397, "loss": 1.1033, "step": 2291 }, { "epoch": 0.1085998578535892, "grad_norm": 0.703125, "learning_rate": 0.00019428050147998765, "loss": 1.2995, "step": 2292 }, { "epoch": 0.10864723999052357, "grad_norm": 0.65234375, "learning_rate": 0.00019427553606695024, "loss": 0.656, "step": 2293 }, { "epoch": 0.10869462212745795, "grad_norm": 0.51171875, "learning_rate": 0.00019427056856298185, "loss": 0.9073, "step": 2294 }, { "epoch": 0.10874200426439233, "grad_norm": 0.25390625, "learning_rate": 0.0001942655989681927, "loss": 0.1786, "step": 2295 }, { "epoch": 0.1087893864013267, "grad_norm": 0.71875, "learning_rate": 0.0001942606272826929, "loss": 0.4551, "step": 2296 }, { "epoch": 0.10883676853826108, "grad_norm": 0.5859375, "learning_rate": 0.00019425565350659286, "loss": 0.9024, "step": 2297 }, { "epoch": 0.10888415067519545, "grad_norm": 0.416015625, "learning_rate": 0.00019425067764000276, "loss": 0.028, "step": 2298 }, { "epoch": 0.10893153281212983, "grad_norm": 0.48828125, "learning_rate": 0.0001942456996830331, "loss": 0.9282, "step": 2299 }, { "epoch": 0.1089789149490642, "grad_norm": 0.55859375, "learning_rate": 0.00019424071963579414, "loss": 1.1809, "step": 2300 }, { "epoch": 0.10902629708599858, "grad_norm": 0.62890625, "learning_rate": 0.00019423573749839643, "loss": 0.8025, "step": 2301 }, { "epoch": 0.10907367922293296, "grad_norm": 0.59765625, "learning_rate": 0.0001942307532709504, "loss": 1.4061, "step": 2302 }, { "epoch": 0.10912106135986734, "grad_norm": 0.578125, "learning_rate": 0.00019422576695356667, "loss": 1.2611, "step": 2303 }, { "epoch": 0.1091684434968017, "grad_norm": 0.5234375, "learning_rate": 0.0001942207785463558, "loss": 1.0934, "step": 2304 }, { "epoch": 0.10921582563373608, "grad_norm": 0.59765625, "learning_rate": 0.00019421578804942842, "loss": 1.3257, "step": 2305 }, { "epoch": 0.10926320777067046, "grad_norm": 0.62109375, "learning_rate": 0.00019421079546289518, "loss": 0.4124, "step": 2306 }, { "epoch": 0.10931058990760484, "grad_norm": 0.4609375, "learning_rate": 0.00019420580078686689, "loss": 1.3487, "step": 2307 }, { "epoch": 0.1093579720445392, "grad_norm": 0.6640625, "learning_rate": 0.00019420080402145424, "loss": 0.8191, "step": 2308 }, { "epoch": 0.10940535418147358, "grad_norm": 0.73828125, "learning_rate": 0.0001941958051667681, "loss": 1.0877, "step": 2309 }, { "epoch": 0.10945273631840796, "grad_norm": 0.4921875, "learning_rate": 0.00019419080422291936, "loss": 1.1972, "step": 2310 }, { "epoch": 0.10950011845534234, "grad_norm": 0.259765625, "learning_rate": 0.00019418580119001888, "loss": 0.0473, "step": 2311 }, { "epoch": 0.1095475005922767, "grad_norm": 0.64453125, "learning_rate": 0.00019418079606817767, "loss": 0.0514, "step": 2312 }, { "epoch": 0.10959488272921108, "grad_norm": 0.5859375, "learning_rate": 0.00019417578885750673, "loss": 0.1785, "step": 2313 }, { "epoch": 0.10964226486614546, "grad_norm": 0.609375, "learning_rate": 0.00019417077955811708, "loss": 1.6825, "step": 2314 }, { "epoch": 0.10968964700307984, "grad_norm": 0.5625, "learning_rate": 0.00019416576817011988, "loss": 1.0818, "step": 2315 }, { "epoch": 0.10973702914001421, "grad_norm": 0.7265625, "learning_rate": 0.0001941607546936262, "loss": 1.2398, "step": 2316 }, { "epoch": 0.10978441127694859, "grad_norm": 0.37109375, "learning_rate": 0.00019415573912874733, "loss": 0.0085, "step": 2317 }, { "epoch": 0.10983179341388297, "grad_norm": 0.1611328125, "learning_rate": 0.0001941507214755944, "loss": 0.0102, "step": 2318 }, { "epoch": 0.10987917555081735, "grad_norm": 0.466796875, "learning_rate": 0.0001941457017342788, "loss": 0.8173, "step": 2319 }, { "epoch": 0.10992655768775171, "grad_norm": 0.59375, "learning_rate": 0.00019414067990491178, "loss": 0.8912, "step": 2320 }, { "epoch": 0.10997393982468609, "grad_norm": 0.5234375, "learning_rate": 0.00019413565598760477, "loss": 0.9078, "step": 2321 }, { "epoch": 0.11002132196162047, "grad_norm": 0.41015625, "learning_rate": 0.00019413062998246917, "loss": 0.1027, "step": 2322 }, { "epoch": 0.11006870409855485, "grad_norm": 0.703125, "learning_rate": 0.00019412560188961648, "loss": 0.0374, "step": 2323 }, { "epoch": 0.11011608623548923, "grad_norm": 0.458984375, "learning_rate": 0.0001941205717091582, "loss": 0.0314, "step": 2324 }, { "epoch": 0.11016346837242359, "grad_norm": 0.6328125, "learning_rate": 0.00019411553944120584, "loss": 1.018, "step": 2325 }, { "epoch": 0.11021085050935797, "grad_norm": 0.703125, "learning_rate": 0.0001941105050858711, "loss": 1.2728, "step": 2326 }, { "epoch": 0.11025823264629235, "grad_norm": 0.76171875, "learning_rate": 0.0001941054686432656, "loss": 0.3018, "step": 2327 }, { "epoch": 0.11030561478322673, "grad_norm": 0.67578125, "learning_rate": 0.00019410043011350102, "loss": 1.0628, "step": 2328 }, { "epoch": 0.1103529969201611, "grad_norm": 0.62890625, "learning_rate": 0.00019409538949668916, "loss": 0.2092, "step": 2329 }, { "epoch": 0.11040037905709547, "grad_norm": 1.40625, "learning_rate": 0.0001940903467929418, "loss": 0.3697, "step": 2330 }, { "epoch": 0.11044776119402985, "grad_norm": 0.51953125, "learning_rate": 0.00019408530200237074, "loss": 1.355, "step": 2331 }, { "epoch": 0.11049514333096423, "grad_norm": 0.4921875, "learning_rate": 0.0001940802551250879, "loss": 1.1421, "step": 2332 }, { "epoch": 0.1105425254678986, "grad_norm": 0.5078125, "learning_rate": 0.00019407520616120523, "loss": 0.7896, "step": 2333 }, { "epoch": 0.11058990760483298, "grad_norm": 0.5625, "learning_rate": 0.00019407015511083465, "loss": 1.1293, "step": 2334 }, { "epoch": 0.11063728974176736, "grad_norm": 0.53515625, "learning_rate": 0.00019406510197408826, "loss": 0.8314, "step": 2335 }, { "epoch": 0.11068467187870173, "grad_norm": 0.478515625, "learning_rate": 0.00019406004675107813, "loss": 1.0182, "step": 2336 }, { "epoch": 0.1107320540156361, "grad_norm": 0.498046875, "learning_rate": 0.0001940549894419163, "loss": 0.9839, "step": 2337 }, { "epoch": 0.11077943615257048, "grad_norm": 0.55078125, "learning_rate": 0.000194049930046715, "loss": 1.1501, "step": 2338 }, { "epoch": 0.11082681828950486, "grad_norm": 0.419921875, "learning_rate": 0.00019404486856558644, "loss": 0.4599, "step": 2339 }, { "epoch": 0.11087420042643924, "grad_norm": 0.6328125, "learning_rate": 0.00019403980499864285, "loss": 1.2662, "step": 2340 }, { "epoch": 0.1109215825633736, "grad_norm": 0.15234375, "learning_rate": 0.00019403473934599655, "loss": 0.0025, "step": 2341 }, { "epoch": 0.11096896470030798, "grad_norm": 0.48046875, "learning_rate": 0.0001940296716077599, "loss": 0.6028, "step": 2342 }, { "epoch": 0.11101634683724236, "grad_norm": 0.58984375, "learning_rate": 0.00019402460178404534, "loss": 0.8875, "step": 2343 }, { "epoch": 0.11106372897417674, "grad_norm": 0.51171875, "learning_rate": 0.0001940195298749652, "loss": 1.0461, "step": 2344 }, { "epoch": 0.1111111111111111, "grad_norm": 0.248046875, "learning_rate": 0.00019401445588063205, "loss": 0.0401, "step": 2345 }, { "epoch": 0.11115849324804548, "grad_norm": 0.474609375, "learning_rate": 0.0001940093798011584, "loss": 0.9364, "step": 2346 }, { "epoch": 0.11120587538497986, "grad_norm": 0.59375, "learning_rate": 0.00019400430163665685, "loss": 0.7033, "step": 2347 }, { "epoch": 0.11125325752191424, "grad_norm": 0.62109375, "learning_rate": 0.00019399922138724004, "loss": 1.3435, "step": 2348 }, { "epoch": 0.11130063965884861, "grad_norm": 0.5703125, "learning_rate": 0.0001939941390530206, "loss": 0.2642, "step": 2349 }, { "epoch": 0.11134802179578299, "grad_norm": 1.2890625, "learning_rate": 0.00019398905463411124, "loss": 0.9699, "step": 2350 }, { "epoch": 0.11139540393271737, "grad_norm": 0.71484375, "learning_rate": 0.00019398396813062482, "loss": 1.213, "step": 2351 }, { "epoch": 0.11144278606965174, "grad_norm": 0.3203125, "learning_rate": 0.00019397887954267408, "loss": 0.1429, "step": 2352 }, { "epoch": 0.11149016820658612, "grad_norm": 0.6171875, "learning_rate": 0.00019397378887037187, "loss": 1.3877, "step": 2353 }, { "epoch": 0.11153755034352049, "grad_norm": 0.361328125, "learning_rate": 0.00019396869611383114, "loss": 0.1812, "step": 2354 }, { "epoch": 0.11158493248045487, "grad_norm": 0.34375, "learning_rate": 0.00019396360127316482, "loss": 0.0505, "step": 2355 }, { "epoch": 0.11163231461738925, "grad_norm": 0.05078125, "learning_rate": 0.0001939585043484859, "loss": 0.0035, "step": 2356 }, { "epoch": 0.11167969675432363, "grad_norm": 0.51953125, "learning_rate": 0.00019395340533990744, "loss": 0.2011, "step": 2357 }, { "epoch": 0.11172707889125799, "grad_norm": 0.7109375, "learning_rate": 0.00019394830424754252, "loss": 0.8884, "step": 2358 }, { "epoch": 0.11177446102819237, "grad_norm": 0.53125, "learning_rate": 0.00019394320107150428, "loss": 0.7974, "step": 2359 }, { "epoch": 0.11182184316512675, "grad_norm": 0.494140625, "learning_rate": 0.0001939380958119059, "loss": 0.2071, "step": 2360 }, { "epoch": 0.11186922530206113, "grad_norm": 0.267578125, "learning_rate": 0.00019393298846886062, "loss": 0.1647, "step": 2361 }, { "epoch": 0.1119166074389955, "grad_norm": 0.66015625, "learning_rate": 0.0001939278790424817, "loss": 0.9444, "step": 2362 }, { "epoch": 0.11196398957592987, "grad_norm": 0.64453125, "learning_rate": 0.00019392276753288248, "loss": 0.9224, "step": 2363 }, { "epoch": 0.11201137171286425, "grad_norm": 0.49609375, "learning_rate": 0.0001939176539401763, "loss": 0.7519, "step": 2364 }, { "epoch": 0.11205875384979863, "grad_norm": 0.3203125, "learning_rate": 0.00019391253826447663, "loss": 0.1332, "step": 2365 }, { "epoch": 0.112106135986733, "grad_norm": 0.53125, "learning_rate": 0.00019390742050589687, "loss": 0.548, "step": 2366 }, { "epoch": 0.11215351812366738, "grad_norm": 0.482421875, "learning_rate": 0.00019390230066455058, "loss": 1.065, "step": 2367 }, { "epoch": 0.11220090026060175, "grad_norm": 0.30078125, "learning_rate": 0.00019389717874055125, "loss": 0.1695, "step": 2368 }, { "epoch": 0.11224828239753613, "grad_norm": 0.734375, "learning_rate": 0.0001938920547340125, "loss": 0.9457, "step": 2369 }, { "epoch": 0.1122956645344705, "grad_norm": 0.76171875, "learning_rate": 0.00019388692864504798, "loss": 0.2065, "step": 2370 }, { "epoch": 0.11234304667140488, "grad_norm": 1.1875, "learning_rate": 0.00019388180047377144, "loss": 0.761, "step": 2371 }, { "epoch": 0.11239042880833926, "grad_norm": 0.56640625, "learning_rate": 0.0001938766702202965, "loss": 0.9179, "step": 2372 }, { "epoch": 0.11243781094527364, "grad_norm": 0.54296875, "learning_rate": 0.00019387153788473705, "loss": 1.2718, "step": 2373 }, { "epoch": 0.112485193082208, "grad_norm": 0.6015625, "learning_rate": 0.00019386640346720686, "loss": 0.8863, "step": 2374 }, { "epoch": 0.11253257521914238, "grad_norm": 0.6328125, "learning_rate": 0.00019386126696781982, "loss": 0.0903, "step": 2375 }, { "epoch": 0.11257995735607676, "grad_norm": 0.474609375, "learning_rate": 0.00019385612838668985, "loss": 0.3469, "step": 2376 }, { "epoch": 0.11262733949301114, "grad_norm": 0.498046875, "learning_rate": 0.00019385098772393096, "loss": 0.8277, "step": 2377 }, { "epoch": 0.1126747216299455, "grad_norm": 0.55078125, "learning_rate": 0.0001938458449796571, "loss": 0.7903, "step": 2378 }, { "epoch": 0.11272210376687988, "grad_norm": 0.58203125, "learning_rate": 0.00019384070015398235, "loss": 0.6248, "step": 2379 }, { "epoch": 0.11276948590381426, "grad_norm": 0.455078125, "learning_rate": 0.00019383555324702082, "loss": 0.5589, "step": 2380 }, { "epoch": 0.11281686804074864, "grad_norm": 0.388671875, "learning_rate": 0.00019383040425888668, "loss": 0.4301, "step": 2381 }, { "epoch": 0.11286425017768302, "grad_norm": 0.490234375, "learning_rate": 0.00019382525318969413, "loss": 1.2382, "step": 2382 }, { "epoch": 0.11291163231461739, "grad_norm": 0.58984375, "learning_rate": 0.0001938201000395574, "loss": 0.9135, "step": 2383 }, { "epoch": 0.11295901445155176, "grad_norm": 0.5390625, "learning_rate": 0.00019381494480859076, "loss": 0.4733, "step": 2384 }, { "epoch": 0.11300639658848614, "grad_norm": 0.59375, "learning_rate": 0.0001938097874969086, "loss": 1.1656, "step": 2385 }, { "epoch": 0.11305377872542052, "grad_norm": 0.234375, "learning_rate": 0.00019380462810462525, "loss": 0.0332, "step": 2386 }, { "epoch": 0.11310116086235489, "grad_norm": 0.83984375, "learning_rate": 0.0001937994666318552, "loss": 1.0793, "step": 2387 }, { "epoch": 0.11314854299928927, "grad_norm": 0.55859375, "learning_rate": 0.00019379430307871288, "loss": 0.7872, "step": 2388 }, { "epoch": 0.11319592513622365, "grad_norm": 0.8046875, "learning_rate": 0.0001937891374453128, "loss": 0.0647, "step": 2389 }, { "epoch": 0.11324330727315803, "grad_norm": 0.455078125, "learning_rate": 0.00019378396973176955, "loss": 0.4569, "step": 2390 }, { "epoch": 0.11329068941009239, "grad_norm": 0.478515625, "learning_rate": 0.00019377879993819777, "loss": 0.8047, "step": 2391 }, { "epoch": 0.11333807154702677, "grad_norm": 0.53125, "learning_rate": 0.00019377362806471208, "loss": 1.0586, "step": 2392 }, { "epoch": 0.11338545368396115, "grad_norm": 0.474609375, "learning_rate": 0.0001937684541114272, "loss": 1.0862, "step": 2393 }, { "epoch": 0.11343283582089553, "grad_norm": 0.625, "learning_rate": 0.00019376327807845792, "loss": 1.7054, "step": 2394 }, { "epoch": 0.1134802179578299, "grad_norm": 0.53125, "learning_rate": 0.00019375809996591896, "loss": 1.0919, "step": 2395 }, { "epoch": 0.11352760009476427, "grad_norm": 0.54296875, "learning_rate": 0.00019375291977392523, "loss": 0.983, "step": 2396 }, { "epoch": 0.11357498223169865, "grad_norm": 0.4609375, "learning_rate": 0.0001937477375025916, "loss": 0.7082, "step": 2397 }, { "epoch": 0.11362236436863303, "grad_norm": 0.4921875, "learning_rate": 0.000193742553152033, "loss": 0.8129, "step": 2398 }, { "epoch": 0.1136697465055674, "grad_norm": 0.69921875, "learning_rate": 0.00019373736672236445, "loss": 1.3653, "step": 2399 }, { "epoch": 0.11371712864250177, "grad_norm": 0.484375, "learning_rate": 0.0001937321782137009, "loss": 1.09, "step": 2400 }, { "epoch": 0.11376451077943615, "grad_norm": 0.58984375, "learning_rate": 0.0001937269876261575, "loss": 0.521, "step": 2401 }, { "epoch": 0.11381189291637053, "grad_norm": 0.375, "learning_rate": 0.00019372179495984936, "loss": 0.9942, "step": 2402 }, { "epoch": 0.1138592750533049, "grad_norm": 0.61328125, "learning_rate": 0.00019371660021489162, "loss": 1.1217, "step": 2403 }, { "epoch": 0.11390665719023928, "grad_norm": 0.5234375, "learning_rate": 0.00019371140339139952, "loss": 1.177, "step": 2404 }, { "epoch": 0.11395403932717366, "grad_norm": 0.453125, "learning_rate": 0.0001937062044894883, "loss": 0.0618, "step": 2405 }, { "epoch": 0.11400142146410804, "grad_norm": 0.515625, "learning_rate": 0.00019370100350927328, "loss": 1.4012, "step": 2406 }, { "epoch": 0.1140488036010424, "grad_norm": 0.6796875, "learning_rate": 0.0001936958004508698, "loss": 0.2863, "step": 2407 }, { "epoch": 0.11409618573797678, "grad_norm": 0.7734375, "learning_rate": 0.00019369059531439332, "loss": 1.1873, "step": 2408 }, { "epoch": 0.11414356787491116, "grad_norm": 0.44921875, "learning_rate": 0.00019368538809995918, "loss": 1.0045, "step": 2409 }, { "epoch": 0.11419095001184554, "grad_norm": 1.0703125, "learning_rate": 0.0001936801788076829, "loss": 0.8343, "step": 2410 }, { "epoch": 0.1142383321487799, "grad_norm": 0.40625, "learning_rate": 0.00019367496743768009, "loss": 0.5111, "step": 2411 }, { "epoch": 0.11428571428571428, "grad_norm": 0.271484375, "learning_rate": 0.00019366975399006626, "loss": 0.1618, "step": 2412 }, { "epoch": 0.11433309642264866, "grad_norm": 0.61328125, "learning_rate": 0.00019366453846495705, "loss": 0.8744, "step": 2413 }, { "epoch": 0.11438047855958304, "grad_norm": 0.78125, "learning_rate": 0.00019365932086246813, "loss": 0.7495, "step": 2414 }, { "epoch": 0.11442786069651742, "grad_norm": 0.341796875, "learning_rate": 0.00019365410118271528, "loss": 0.179, "step": 2415 }, { "epoch": 0.11447524283345178, "grad_norm": 0.56640625, "learning_rate": 0.00019364887942581417, "loss": 0.8303, "step": 2416 }, { "epoch": 0.11452262497038616, "grad_norm": 0.515625, "learning_rate": 0.0001936436555918807, "loss": 0.5321, "step": 2417 }, { "epoch": 0.11457000710732054, "grad_norm": 0.64453125, "learning_rate": 0.0001936384296810307, "loss": 0.6623, "step": 2418 }, { "epoch": 0.11461738924425492, "grad_norm": 0.75390625, "learning_rate": 0.00019363320169338004, "loss": 0.2194, "step": 2419 }, { "epoch": 0.11466477138118929, "grad_norm": 0.326171875, "learning_rate": 0.0001936279716290447, "loss": 0.5661, "step": 2420 }, { "epoch": 0.11471215351812367, "grad_norm": 0.6640625, "learning_rate": 0.00019362273948814068, "loss": 1.2285, "step": 2421 }, { "epoch": 0.11475953565505805, "grad_norm": 0.609375, "learning_rate": 0.00019361750527078405, "loss": 1.3478, "step": 2422 }, { "epoch": 0.11480691779199242, "grad_norm": 0.52734375, "learning_rate": 0.00019361226897709086, "loss": 1.6165, "step": 2423 }, { "epoch": 0.11485429992892679, "grad_norm": 0.6953125, "learning_rate": 0.00019360703060717724, "loss": 0.9164, "step": 2424 }, { "epoch": 0.11490168206586117, "grad_norm": 0.609375, "learning_rate": 0.0001936017901611594, "loss": 0.9924, "step": 2425 }, { "epoch": 0.11494906420279555, "grad_norm": 0.52734375, "learning_rate": 0.00019359654763915354, "loss": 0.7366, "step": 2426 }, { "epoch": 0.11499644633972993, "grad_norm": 0.48046875, "learning_rate": 0.00019359130304127595, "loss": 0.0893, "step": 2427 }, { "epoch": 0.11504382847666429, "grad_norm": 0.58203125, "learning_rate": 0.00019358605636764296, "loss": 0.8715, "step": 2428 }, { "epoch": 0.11509121061359867, "grad_norm": 0.578125, "learning_rate": 0.0001935808076183709, "loss": 1.0793, "step": 2429 }, { "epoch": 0.11513859275053305, "grad_norm": 0.69140625, "learning_rate": 0.00019357555679357623, "loss": 0.9252, "step": 2430 }, { "epoch": 0.11518597488746743, "grad_norm": 0.5390625, "learning_rate": 0.0001935703038933754, "loss": 0.8074, "step": 2431 }, { "epoch": 0.1152333570244018, "grad_norm": 0.6640625, "learning_rate": 0.00019356504891788486, "loss": 1.7112, "step": 2432 }, { "epoch": 0.11528073916133617, "grad_norm": 0.55859375, "learning_rate": 0.00019355979186722118, "loss": 1.0211, "step": 2433 }, { "epoch": 0.11532812129827055, "grad_norm": 0.2578125, "learning_rate": 0.000193554532741501, "loss": 0.1586, "step": 2434 }, { "epoch": 0.11537550343520493, "grad_norm": 0.56640625, "learning_rate": 0.00019354927154084095, "loss": 0.1016, "step": 2435 }, { "epoch": 0.1154228855721393, "grad_norm": 0.57421875, "learning_rate": 0.00019354400826535767, "loss": 1.0657, "step": 2436 }, { "epoch": 0.11547026770907368, "grad_norm": 0.42578125, "learning_rate": 0.00019353874291516793, "loss": 0.4119, "step": 2437 }, { "epoch": 0.11551764984600806, "grad_norm": 0.70703125, "learning_rate": 0.0001935334754903885, "loss": 0.0556, "step": 2438 }, { "epoch": 0.11556503198294243, "grad_norm": 0.05908203125, "learning_rate": 0.00019352820599113622, "loss": 0.0062, "step": 2439 }, { "epoch": 0.1156124141198768, "grad_norm": 0.60546875, "learning_rate": 0.00019352293441752798, "loss": 1.1129, "step": 2440 }, { "epoch": 0.11565979625681118, "grad_norm": 0.640625, "learning_rate": 0.00019351766076968065, "loss": 1.6857, "step": 2441 }, { "epoch": 0.11570717839374556, "grad_norm": 0.54296875, "learning_rate": 0.0001935123850477112, "loss": 1.5279, "step": 2442 }, { "epoch": 0.11575456053067994, "grad_norm": 0.484375, "learning_rate": 0.00019350710725173667, "loss": 0.855, "step": 2443 }, { "epoch": 0.11580194266761432, "grad_norm": 0.515625, "learning_rate": 0.0001935018273818741, "loss": 1.1485, "step": 2444 }, { "epoch": 0.11584932480454868, "grad_norm": 1.1171875, "learning_rate": 0.00019349654543824059, "loss": 0.5004, "step": 2445 }, { "epoch": 0.11589670694148306, "grad_norm": 1.1484375, "learning_rate": 0.00019349126142095328, "loss": 0.9367, "step": 2446 }, { "epoch": 0.11594408907841744, "grad_norm": 0.65625, "learning_rate": 0.00019348597533012937, "loss": 0.3459, "step": 2447 }, { "epoch": 0.11599147121535182, "grad_norm": 0.435546875, "learning_rate": 0.00019348068716588615, "loss": 0.1398, "step": 2448 }, { "epoch": 0.11603885335228618, "grad_norm": 0.6796875, "learning_rate": 0.0001934753969283408, "loss": 0.9748, "step": 2449 }, { "epoch": 0.11608623548922056, "grad_norm": 0.58203125, "learning_rate": 0.00019347010461761075, "loss": 1.0431, "step": 2450 }, { "epoch": 0.11613361762615494, "grad_norm": 0.5859375, "learning_rate": 0.00019346481023381333, "loss": 1.1636, "step": 2451 }, { "epoch": 0.11618099976308932, "grad_norm": 0.6953125, "learning_rate": 0.00019345951377706597, "loss": 0.1638, "step": 2452 }, { "epoch": 0.11622838190002369, "grad_norm": 0.5625, "learning_rate": 0.00019345421524748614, "loss": 1.0393, "step": 2453 }, { "epoch": 0.11627576403695807, "grad_norm": 0.59375, "learning_rate": 0.00019344891464519138, "loss": 0.4738, "step": 2454 }, { "epoch": 0.11632314617389244, "grad_norm": 0.58984375, "learning_rate": 0.00019344361197029918, "loss": 0.9189, "step": 2455 }, { "epoch": 0.11637052831082682, "grad_norm": 0.51171875, "learning_rate": 0.00019343830722292726, "loss": 0.9371, "step": 2456 }, { "epoch": 0.11641791044776119, "grad_norm": 0.62109375, "learning_rate": 0.00019343300040319317, "loss": 0.3964, "step": 2457 }, { "epoch": 0.11646529258469557, "grad_norm": 0.46875, "learning_rate": 0.00019342769151121467, "loss": 0.8681, "step": 2458 }, { "epoch": 0.11651267472162995, "grad_norm": 0.5625, "learning_rate": 0.0001934223805471095, "loss": 0.8003, "step": 2459 }, { "epoch": 0.11656005685856433, "grad_norm": 0.1708984375, "learning_rate": 0.00019341706751099542, "loss": 0.0212, "step": 2460 }, { "epoch": 0.11660743899549869, "grad_norm": 0.08837890625, "learning_rate": 0.00019341175240299028, "loss": 0.0065, "step": 2461 }, { "epoch": 0.11665482113243307, "grad_norm": 0.53125, "learning_rate": 0.000193406435223212, "loss": 1.2321, "step": 2462 }, { "epoch": 0.11670220326936745, "grad_norm": 0.69140625, "learning_rate": 0.00019340111597177843, "loss": 1.1462, "step": 2463 }, { "epoch": 0.11674958540630183, "grad_norm": 0.578125, "learning_rate": 0.00019339579464880763, "loss": 0.9835, "step": 2464 }, { "epoch": 0.1167969675432362, "grad_norm": 0.41015625, "learning_rate": 0.00019339047125441756, "loss": 0.8235, "step": 2465 }, { "epoch": 0.11684434968017057, "grad_norm": 0.56640625, "learning_rate": 0.00019338514578872633, "loss": 0.8367, "step": 2466 }, { "epoch": 0.11689173181710495, "grad_norm": 0.65234375, "learning_rate": 0.00019337981825185202, "loss": 1.147, "step": 2467 }, { "epoch": 0.11693911395403933, "grad_norm": 0.52734375, "learning_rate": 0.00019337448864391283, "loss": 0.4575, "step": 2468 }, { "epoch": 0.1169864960909737, "grad_norm": 0.05712890625, "learning_rate": 0.00019336915696502693, "loss": 0.0063, "step": 2469 }, { "epoch": 0.11703387822790808, "grad_norm": 0.8359375, "learning_rate": 0.0001933638232153126, "loss": 1.6739, "step": 2470 }, { "epoch": 0.11708126036484245, "grad_norm": 0.04345703125, "learning_rate": 0.00019335848739488807, "loss": 0.0051, "step": 2471 }, { "epoch": 0.11712864250177683, "grad_norm": 0.2314453125, "learning_rate": 0.00019335314950387174, "loss": 0.0252, "step": 2472 }, { "epoch": 0.11717602463871121, "grad_norm": 0.72265625, "learning_rate": 0.00019334780954238204, "loss": 1.0257, "step": 2473 }, { "epoch": 0.11722340677564558, "grad_norm": 0.474609375, "learning_rate": 0.0001933424675105373, "loss": 0.0673, "step": 2474 }, { "epoch": 0.11727078891257996, "grad_norm": 0.53515625, "learning_rate": 0.00019333712340845608, "loss": 1.181, "step": 2475 }, { "epoch": 0.11731817104951434, "grad_norm": 0.53515625, "learning_rate": 0.0001933317772362569, "loss": 1.1553, "step": 2476 }, { "epoch": 0.11736555318644872, "grad_norm": 0.609375, "learning_rate": 0.00019332642899405825, "loss": 0.8072, "step": 2477 }, { "epoch": 0.11741293532338308, "grad_norm": 0.5078125, "learning_rate": 0.00019332107868197886, "loss": 0.0409, "step": 2478 }, { "epoch": 0.11746031746031746, "grad_norm": 0.4609375, "learning_rate": 0.00019331572630013736, "loss": 0.2116, "step": 2479 }, { "epoch": 0.11750769959725184, "grad_norm": 0.53125, "learning_rate": 0.0001933103718486524, "loss": 0.1154, "step": 2480 }, { "epoch": 0.11755508173418622, "grad_norm": 0.458984375, "learning_rate": 0.00019330501532764283, "loss": 0.9118, "step": 2481 }, { "epoch": 0.11760246387112058, "grad_norm": 0.443359375, "learning_rate": 0.00019329965673722737, "loss": 0.5453, "step": 2482 }, { "epoch": 0.11764984600805496, "grad_norm": 0.2001953125, "learning_rate": 0.0001932942960775249, "loss": 0.026, "step": 2483 }, { "epoch": 0.11769722814498934, "grad_norm": 0.51171875, "learning_rate": 0.00019328893334865431, "loss": 1.1812, "step": 2484 }, { "epoch": 0.11774461028192372, "grad_norm": 0.51953125, "learning_rate": 0.0001932835685507346, "loss": 1.4514, "step": 2485 }, { "epoch": 0.11779199241885809, "grad_norm": 0.6171875, "learning_rate": 0.00019327820168388464, "loss": 0.6865, "step": 2486 }, { "epoch": 0.11783937455579246, "grad_norm": 0.69921875, "learning_rate": 0.00019327283274822357, "loss": 1.2399, "step": 2487 }, { "epoch": 0.11788675669272684, "grad_norm": 1.03125, "learning_rate": 0.00019326746174387038, "loss": 0.2101, "step": 2488 }, { "epoch": 0.11793413882966122, "grad_norm": 0.318359375, "learning_rate": 0.00019326208867094424, "loss": 0.0275, "step": 2489 }, { "epoch": 0.11798152096659559, "grad_norm": 0.59375, "learning_rate": 0.0001932567135295643, "loss": 0.8127, "step": 2490 }, { "epoch": 0.11802890310352997, "grad_norm": 0.52734375, "learning_rate": 0.00019325133631984981, "loss": 0.7025, "step": 2491 }, { "epoch": 0.11807628524046435, "grad_norm": 0.69140625, "learning_rate": 0.00019324595704192, "loss": 0.1387, "step": 2492 }, { "epoch": 0.11812366737739873, "grad_norm": 0.56640625, "learning_rate": 0.0001932405756958942, "loss": 1.3282, "step": 2493 }, { "epoch": 0.11817104951433309, "grad_norm": 0.2392578125, "learning_rate": 0.00019323519228189173, "loss": 0.1715, "step": 2494 }, { "epoch": 0.11821843165126747, "grad_norm": 0.26953125, "learning_rate": 0.000193229806800032, "loss": 0.1249, "step": 2495 }, { "epoch": 0.11826581378820185, "grad_norm": 0.62109375, "learning_rate": 0.00019322441925043448, "loss": 1.3299, "step": 2496 }, { "epoch": 0.11831319592513623, "grad_norm": 0.59375, "learning_rate": 0.00019321902963321863, "loss": 1.0653, "step": 2497 }, { "epoch": 0.11836057806207059, "grad_norm": 0.458984375, "learning_rate": 0.00019321363794850397, "loss": 0.9062, "step": 2498 }, { "epoch": 0.11840796019900497, "grad_norm": 0.6484375, "learning_rate": 0.00019320824419641016, "loss": 1.5567, "step": 2499 }, { "epoch": 0.11845534233593935, "grad_norm": 0.5546875, "learning_rate": 0.00019320284837705671, "loss": 0.9094, "step": 2500 }, { "epoch": 0.11850272447287373, "grad_norm": 0.6484375, "learning_rate": 0.0001931974504905634, "loss": 1.2802, "step": 2501 }, { "epoch": 0.11855010660980811, "grad_norm": 0.55859375, "learning_rate": 0.00019319205053704993, "loss": 1.1052, "step": 2502 }, { "epoch": 0.11859748874674247, "grad_norm": 0.34765625, "learning_rate": 0.00019318664851663599, "loss": 0.3962, "step": 2503 }, { "epoch": 0.11864487088367685, "grad_norm": 0.48828125, "learning_rate": 0.00019318124442944146, "loss": 1.1065, "step": 2504 }, { "epoch": 0.11869225302061123, "grad_norm": 0.5625, "learning_rate": 0.0001931758382755862, "loss": 1.4303, "step": 2505 }, { "epoch": 0.11873963515754561, "grad_norm": 0.640625, "learning_rate": 0.00019317043005519008, "loss": 0.8571, "step": 2506 }, { "epoch": 0.11878701729447998, "grad_norm": 0.478515625, "learning_rate": 0.00019316501976837308, "loss": 1.3223, "step": 2507 }, { "epoch": 0.11883439943141436, "grad_norm": 0.98046875, "learning_rate": 0.00019315960741525512, "loss": 0.8867, "step": 2508 }, { "epoch": 0.11888178156834874, "grad_norm": 0.55078125, "learning_rate": 0.00019315419299595633, "loss": 0.8417, "step": 2509 }, { "epoch": 0.11892916370528311, "grad_norm": 0.57421875, "learning_rate": 0.00019314877651059676, "loss": 0.8918, "step": 2510 }, { "epoch": 0.11897654584221748, "grad_norm": 0.51953125, "learning_rate": 0.0001931433579592966, "loss": 0.7333, "step": 2511 }, { "epoch": 0.11902392797915186, "grad_norm": 0.5078125, "learning_rate": 0.0001931379373421759, "loss": 1.0359, "step": 2512 }, { "epoch": 0.11907131011608624, "grad_norm": 0.828125, "learning_rate": 0.00019313251465935498, "loss": 1.1578, "step": 2513 }, { "epoch": 0.11911869225302062, "grad_norm": 0.5859375, "learning_rate": 0.00019312708991095408, "loss": 1.3204, "step": 2514 }, { "epoch": 0.11916607438995498, "grad_norm": 1.0625, "learning_rate": 0.00019312166309709352, "loss": 0.3033, "step": 2515 }, { "epoch": 0.11921345652688936, "grad_norm": 0.5625, "learning_rate": 0.00019311623421789368, "loss": 1.3375, "step": 2516 }, { "epoch": 0.11926083866382374, "grad_norm": 0.68359375, "learning_rate": 0.00019311080327347492, "loss": 1.2571, "step": 2517 }, { "epoch": 0.11930822080075812, "grad_norm": 0.62890625, "learning_rate": 0.00019310537026395773, "loss": 0.9547, "step": 2518 }, { "epoch": 0.11935560293769248, "grad_norm": 0.302734375, "learning_rate": 0.00019309993518946264, "loss": 0.0299, "step": 2519 }, { "epoch": 0.11940298507462686, "grad_norm": 0.408203125, "learning_rate": 0.0001930944980501101, "loss": 0.7248, "step": 2520 }, { "epoch": 0.11945036721156124, "grad_norm": 0.515625, "learning_rate": 0.00019308905884602074, "loss": 0.7792, "step": 2521 }, { "epoch": 0.11949774934849562, "grad_norm": 0.546875, "learning_rate": 0.00019308361757731528, "loss": 1.0552, "step": 2522 }, { "epoch": 0.11954513148542999, "grad_norm": 0.61328125, "learning_rate": 0.00019307817424411426, "loss": 0.7193, "step": 2523 }, { "epoch": 0.11959251362236437, "grad_norm": 0.431640625, "learning_rate": 0.0001930727288465385, "loss": 1.0973, "step": 2524 }, { "epoch": 0.11963989575929874, "grad_norm": 0.75, "learning_rate": 0.00019306728138470877, "loss": 0.8714, "step": 2525 }, { "epoch": 0.11968727789623312, "grad_norm": 0.1455078125, "learning_rate": 0.00019306183185874585, "loss": 0.0116, "step": 2526 }, { "epoch": 0.11973466003316749, "grad_norm": 0.236328125, "learning_rate": 0.00019305638026877064, "loss": 0.1626, "step": 2527 }, { "epoch": 0.11978204217010187, "grad_norm": 0.37890625, "learning_rate": 0.00019305092661490406, "loss": 0.0571, "step": 2528 }, { "epoch": 0.11982942430703625, "grad_norm": 0.625, "learning_rate": 0.000193045470897267, "loss": 0.9813, "step": 2529 }, { "epoch": 0.11987680644397063, "grad_norm": 0.55078125, "learning_rate": 0.0001930400131159805, "loss": 1.069, "step": 2530 }, { "epoch": 0.119924188580905, "grad_norm": 0.5234375, "learning_rate": 0.0001930345532711656, "loss": 1.0806, "step": 2531 }, { "epoch": 0.11997157071783937, "grad_norm": 1.0703125, "learning_rate": 0.00019302909136294344, "loss": 0.8771, "step": 2532 }, { "epoch": 0.12001895285477375, "grad_norm": 0.2470703125, "learning_rate": 0.00019302362739143512, "loss": 0.1197, "step": 2533 }, { "epoch": 0.12006633499170813, "grad_norm": 0.62890625, "learning_rate": 0.00019301816135676182, "loss": 1.5407, "step": 2534 }, { "epoch": 0.12011371712864251, "grad_norm": 0.62890625, "learning_rate": 0.00019301269325904476, "loss": 0.9555, "step": 2535 }, { "epoch": 0.12016109926557687, "grad_norm": 0.053466796875, "learning_rate": 0.00019300722309840526, "loss": 0.0034, "step": 2536 }, { "epoch": 0.12020848140251125, "grad_norm": 0.9765625, "learning_rate": 0.00019300175087496463, "loss": 0.8896, "step": 2537 }, { "epoch": 0.12025586353944563, "grad_norm": 0.1376953125, "learning_rate": 0.00019299627658884421, "loss": 0.0083, "step": 2538 }, { "epoch": 0.12030324567638001, "grad_norm": 0.60546875, "learning_rate": 0.00019299080024016543, "loss": 0.7692, "step": 2539 }, { "epoch": 0.12035062781331438, "grad_norm": 0.49609375, "learning_rate": 0.00019298532182904975, "loss": 0.9235, "step": 2540 }, { "epoch": 0.12039800995024875, "grad_norm": 0.66015625, "learning_rate": 0.00019297984135561866, "loss": 1.2387, "step": 2541 }, { "epoch": 0.12044539208718313, "grad_norm": 0.32421875, "learning_rate": 0.00019297435881999376, "loss": 0.1512, "step": 2542 }, { "epoch": 0.12049277422411751, "grad_norm": 0.5234375, "learning_rate": 0.0001929688742222966, "loss": 0.6726, "step": 2543 }, { "epoch": 0.12054015636105188, "grad_norm": 0.65234375, "learning_rate": 0.00019296338756264882, "loss": 1.4926, "step": 2544 }, { "epoch": 0.12058753849798626, "grad_norm": 0.54296875, "learning_rate": 0.00019295789884117212, "loss": 1.0649, "step": 2545 }, { "epoch": 0.12063492063492064, "grad_norm": 0.298828125, "learning_rate": 0.00019295240805798826, "loss": 0.0948, "step": 2546 }, { "epoch": 0.12068230277185502, "grad_norm": 0.35546875, "learning_rate": 0.000192946915213219, "loss": 0.7311, "step": 2547 }, { "epoch": 0.12072968490878938, "grad_norm": 0.2236328125, "learning_rate": 0.00019294142030698615, "loss": 0.1531, "step": 2548 }, { "epoch": 0.12077706704572376, "grad_norm": 0.58984375, "learning_rate": 0.00019293592333941158, "loss": 0.6935, "step": 2549 }, { "epoch": 0.12082444918265814, "grad_norm": 0.49609375, "learning_rate": 0.00019293042431061725, "loss": 1.1029, "step": 2550 }, { "epoch": 0.12087183131959252, "grad_norm": 0.79296875, "learning_rate": 0.00019292492322072507, "loss": 1.3596, "step": 2551 }, { "epoch": 0.12091921345652688, "grad_norm": 0.019775390625, "learning_rate": 0.0001929194200698571, "loss": 0.0017, "step": 2552 }, { "epoch": 0.12096659559346126, "grad_norm": 1.03125, "learning_rate": 0.00019291391485813533, "loss": 0.8052, "step": 2553 }, { "epoch": 0.12101397773039564, "grad_norm": 0.33203125, "learning_rate": 0.00019290840758568194, "loss": 0.0633, "step": 2554 }, { "epoch": 0.12106135986733002, "grad_norm": 0.57421875, "learning_rate": 0.000192902898252619, "loss": 0.4026, "step": 2555 }, { "epoch": 0.12110874200426439, "grad_norm": 0.7578125, "learning_rate": 0.00019289738685906874, "loss": 1.0573, "step": 2556 }, { "epoch": 0.12115612414119876, "grad_norm": 0.07861328125, "learning_rate": 0.0001928918734051534, "loss": 0.008, "step": 2557 }, { "epoch": 0.12120350627813314, "grad_norm": 0.8515625, "learning_rate": 0.00019288635789099524, "loss": 1.0692, "step": 2558 }, { "epoch": 0.12125088841506752, "grad_norm": 0.6953125, "learning_rate": 0.0001928808403167166, "loss": 1.4574, "step": 2559 }, { "epoch": 0.1212982705520019, "grad_norm": 0.53125, "learning_rate": 0.00019287532068243984, "loss": 0.7272, "step": 2560 }, { "epoch": 0.12134565268893627, "grad_norm": 1.578125, "learning_rate": 0.00019286979898828742, "loss": 0.0126, "step": 2561 }, { "epoch": 0.12139303482587065, "grad_norm": 0.56640625, "learning_rate": 0.00019286427523438178, "loss": 1.0641, "step": 2562 }, { "epoch": 0.12144041696280503, "grad_norm": 0.88671875, "learning_rate": 0.0001928587494208454, "loss": 0.1086, "step": 2563 }, { "epoch": 0.1214877990997394, "grad_norm": 0.58203125, "learning_rate": 0.0001928532215478009, "loss": 1.4355, "step": 2564 }, { "epoch": 0.12153518123667377, "grad_norm": 0.53515625, "learning_rate": 0.00019284769161537083, "loss": 0.7239, "step": 2565 }, { "epoch": 0.12158256337360815, "grad_norm": 0.8203125, "learning_rate": 0.00019284215962367786, "loss": 1.2765, "step": 2566 }, { "epoch": 0.12162994551054253, "grad_norm": 0.478515625, "learning_rate": 0.0001928366255728447, "loss": 0.2377, "step": 2567 }, { "epoch": 0.12167732764747691, "grad_norm": 0.439453125, "learning_rate": 0.00019283108946299403, "loss": 0.0398, "step": 2568 }, { "epoch": 0.12172470978441127, "grad_norm": 0.5234375, "learning_rate": 0.0001928255512942487, "loss": 0.9694, "step": 2569 }, { "epoch": 0.12177209192134565, "grad_norm": 0.474609375, "learning_rate": 0.00019282001106673153, "loss": 1.048, "step": 2570 }, { "epoch": 0.12181947405828003, "grad_norm": 0.76953125, "learning_rate": 0.00019281446878056534, "loss": 0.5022, "step": 2571 }, { "epoch": 0.12186685619521441, "grad_norm": 0.10791015625, "learning_rate": 0.00019280892443587316, "loss": 0.0089, "step": 2572 }, { "epoch": 0.12191423833214877, "grad_norm": 0.416015625, "learning_rate": 0.00019280337803277781, "loss": 0.6116, "step": 2573 }, { "epoch": 0.12196162046908315, "grad_norm": 0.6484375, "learning_rate": 0.00019279782957140243, "loss": 0.2022, "step": 2574 }, { "epoch": 0.12200900260601753, "grad_norm": 0.06689453125, "learning_rate": 0.00019279227905187005, "loss": 0.0068, "step": 2575 }, { "epoch": 0.12205638474295191, "grad_norm": 1.359375, "learning_rate": 0.00019278672647430375, "loss": 0.4393, "step": 2576 }, { "epoch": 0.12210376687988628, "grad_norm": 0.64453125, "learning_rate": 0.00019278117183882664, "loss": 1.1396, "step": 2577 }, { "epoch": 0.12215114901682066, "grad_norm": 1.265625, "learning_rate": 0.00019277561514556202, "loss": 0.3896, "step": 2578 }, { "epoch": 0.12219853115375504, "grad_norm": 0.55859375, "learning_rate": 0.00019277005639463304, "loss": 1.1313, "step": 2579 }, { "epoch": 0.12224591329068941, "grad_norm": 0.58984375, "learning_rate": 0.00019276449558616306, "loss": 0.6474, "step": 2580 }, { "epoch": 0.12229329542762378, "grad_norm": 0.341796875, "learning_rate": 0.00019275893272027535, "loss": 0.0849, "step": 2581 }, { "epoch": 0.12234067756455816, "grad_norm": 0.6640625, "learning_rate": 0.0001927533677970933, "loss": 1.3711, "step": 2582 }, { "epoch": 0.12238805970149254, "grad_norm": 0.7421875, "learning_rate": 0.0001927478008167404, "loss": 0.8757, "step": 2583 }, { "epoch": 0.12243544183842692, "grad_norm": 0.412109375, "learning_rate": 0.00019274223177934, "loss": 0.7932, "step": 2584 }, { "epoch": 0.12248282397536128, "grad_norm": 0.65625, "learning_rate": 0.00019273666068501575, "loss": 0.963, "step": 2585 }, { "epoch": 0.12253020611229566, "grad_norm": 0.875, "learning_rate": 0.0001927310875338911, "loss": 1.1819, "step": 2586 }, { "epoch": 0.12257758824923004, "grad_norm": 0.466796875, "learning_rate": 0.00019272551232608974, "loss": 0.968, "step": 2587 }, { "epoch": 0.12262497038616442, "grad_norm": 0.4609375, "learning_rate": 0.00019271993506173526, "loss": 0.5281, "step": 2588 }, { "epoch": 0.12267235252309878, "grad_norm": 0.490234375, "learning_rate": 0.00019271435574095137, "loss": 0.8354, "step": 2589 }, { "epoch": 0.12271973466003316, "grad_norm": 0.5234375, "learning_rate": 0.00019270877436386186, "loss": 1.0137, "step": 2590 }, { "epoch": 0.12276711679696754, "grad_norm": 0.6484375, "learning_rate": 0.00019270319093059044, "loss": 0.9656, "step": 2591 }, { "epoch": 0.12281449893390192, "grad_norm": 0.4765625, "learning_rate": 0.000192697605441261, "loss": 0.6587, "step": 2592 }, { "epoch": 0.1228618810708363, "grad_norm": 0.314453125, "learning_rate": 0.00019269201789599743, "loss": 0.191, "step": 2593 }, { "epoch": 0.12290926320777067, "grad_norm": 0.423828125, "learning_rate": 0.00019268642829492363, "loss": 0.1004, "step": 2594 }, { "epoch": 0.12295664534470505, "grad_norm": 0.296875, "learning_rate": 0.00019268083663816357, "loss": 0.0405, "step": 2595 }, { "epoch": 0.12300402748163942, "grad_norm": 0.1201171875, "learning_rate": 0.00019267524292584126, "loss": 0.0198, "step": 2596 }, { "epoch": 0.1230514096185738, "grad_norm": 0.103515625, "learning_rate": 0.00019266964715808078, "loss": 0.0101, "step": 2597 }, { "epoch": 0.12309879175550817, "grad_norm": 0.48828125, "learning_rate": 0.00019266404933500624, "loss": 0.7614, "step": 2598 }, { "epoch": 0.12314617389244255, "grad_norm": 0.384765625, "learning_rate": 0.00019265844945674177, "loss": 0.6976, "step": 2599 }, { "epoch": 0.12319355602937693, "grad_norm": 0.53125, "learning_rate": 0.00019265284752341158, "loss": 0.7968, "step": 2600 }, { "epoch": 0.1232409381663113, "grad_norm": 0.392578125, "learning_rate": 0.00019264724353514, "loss": 0.6219, "step": 2601 }, { "epoch": 0.12328832030324567, "grad_norm": 0.625, "learning_rate": 0.00019264163749205116, "loss": 1.4453, "step": 2602 }, { "epoch": 0.12333570244018005, "grad_norm": 0.734375, "learning_rate": 0.0001926360293942695, "loss": 1.453, "step": 2603 }, { "epoch": 0.12338308457711443, "grad_norm": 0.267578125, "learning_rate": 0.00019263041924191937, "loss": 0.0269, "step": 2604 }, { "epoch": 0.12343046671404881, "grad_norm": 0.51953125, "learning_rate": 0.00019262480703512521, "loss": 0.6972, "step": 2605 }, { "epoch": 0.12347784885098317, "grad_norm": 0.54296875, "learning_rate": 0.00019261919277401154, "loss": 0.6406, "step": 2606 }, { "epoch": 0.12352523098791755, "grad_norm": 0.333984375, "learning_rate": 0.00019261357645870275, "loss": 0.2155, "step": 2607 }, { "epoch": 0.12357261312485193, "grad_norm": 0.48828125, "learning_rate": 0.00019260795808932353, "loss": 0.0741, "step": 2608 }, { "epoch": 0.12361999526178631, "grad_norm": 0.53125, "learning_rate": 0.00019260233766599843, "loss": 1.0198, "step": 2609 }, { "epoch": 0.12366737739872068, "grad_norm": 0.6640625, "learning_rate": 0.0001925967151888521, "loss": 1.0583, "step": 2610 }, { "epoch": 0.12371475953565506, "grad_norm": 0.84765625, "learning_rate": 0.0001925910906580093, "loss": 1.1802, "step": 2611 }, { "epoch": 0.12376214167258943, "grad_norm": 0.412109375, "learning_rate": 0.00019258546407359468, "loss": 0.5909, "step": 2612 }, { "epoch": 0.12380952380952381, "grad_norm": 0.72265625, "learning_rate": 0.0001925798354357331, "loss": 1.1352, "step": 2613 }, { "epoch": 0.12385690594645818, "grad_norm": 1.265625, "learning_rate": 0.0001925742047445494, "loss": 0.2152, "step": 2614 }, { "epoch": 0.12390428808339256, "grad_norm": 0.546875, "learning_rate": 0.00019256857200016845, "loss": 0.5009, "step": 2615 }, { "epoch": 0.12395167022032694, "grad_norm": 0.474609375, "learning_rate": 0.00019256293720271514, "loss": 0.6322, "step": 2616 }, { "epoch": 0.12399905235726132, "grad_norm": 0.470703125, "learning_rate": 0.0001925573003523145, "loss": 0.7375, "step": 2617 }, { "epoch": 0.12404643449419568, "grad_norm": 0.5703125, "learning_rate": 0.00019255166144909152, "loss": 0.8515, "step": 2618 }, { "epoch": 0.12409381663113006, "grad_norm": 0.5390625, "learning_rate": 0.00019254602049317127, "loss": 0.8582, "step": 2619 }, { "epoch": 0.12414119876806444, "grad_norm": 0.66796875, "learning_rate": 0.00019254037748467885, "loss": 0.7536, "step": 2620 }, { "epoch": 0.12418858090499882, "grad_norm": 0.74609375, "learning_rate": 0.00019253473242373945, "loss": 1.3561, "step": 2621 }, { "epoch": 0.1242359630419332, "grad_norm": 0.75390625, "learning_rate": 0.00019252908531047823, "loss": 1.0946, "step": 2622 }, { "epoch": 0.12428334517886756, "grad_norm": 0.50390625, "learning_rate": 0.00019252343614502046, "loss": 1.1829, "step": 2623 }, { "epoch": 0.12433072731580194, "grad_norm": 0.93359375, "learning_rate": 0.00019251778492749145, "loss": 0.5284, "step": 2624 }, { "epoch": 0.12437810945273632, "grad_norm": 0.431640625, "learning_rate": 0.00019251213165801648, "loss": 0.7203, "step": 2625 }, { "epoch": 0.1244254915896707, "grad_norm": 0.5234375, "learning_rate": 0.000192506476336721, "loss": 1.3022, "step": 2626 }, { "epoch": 0.12447287372660507, "grad_norm": 0.5625, "learning_rate": 0.00019250081896373037, "loss": 1.3903, "step": 2627 }, { "epoch": 0.12452025586353944, "grad_norm": 0.490234375, "learning_rate": 0.00019249515953917014, "loss": 1.207, "step": 2628 }, { "epoch": 0.12456763800047382, "grad_norm": 0.75, "learning_rate": 0.00019248949806316578, "loss": 0.473, "step": 2629 }, { "epoch": 0.1246150201374082, "grad_norm": 0.32421875, "learning_rate": 0.00019248383453584288, "loss": 0.1061, "step": 2630 }, { "epoch": 0.12466240227434257, "grad_norm": 0.73046875, "learning_rate": 0.000192478168957327, "loss": 0.4016, "step": 2631 }, { "epoch": 0.12470978441127695, "grad_norm": 0.734375, "learning_rate": 0.0001924725013277439, "loss": 0.8508, "step": 2632 }, { "epoch": 0.12475716654821133, "grad_norm": 0.65625, "learning_rate": 0.00019246683164721916, "loss": 0.8059, "step": 2633 }, { "epoch": 0.1248045486851457, "grad_norm": 0.58984375, "learning_rate": 0.00019246115991587858, "loss": 1.0854, "step": 2634 }, { "epoch": 0.12485193082208007, "grad_norm": 0.45703125, "learning_rate": 0.000192455486133848, "loss": 1.3106, "step": 2635 }, { "epoch": 0.12489931295901445, "grad_norm": 0.53515625, "learning_rate": 0.0001924498103012532, "loss": 1.2176, "step": 2636 }, { "epoch": 0.12494669509594883, "grad_norm": 0.484375, "learning_rate": 0.00019244413241822008, "loss": 0.8235, "step": 2637 }, { "epoch": 0.12499407723288321, "grad_norm": 0.54296875, "learning_rate": 0.00019243845248487456, "loss": 1.1107, "step": 2638 }, { "epoch": 0.1250414593698176, "grad_norm": 0.57421875, "learning_rate": 0.00019243277050134266, "loss": 0.9661, "step": 2639 }, { "epoch": 0.12508884150675195, "grad_norm": 0.59375, "learning_rate": 0.00019242708646775034, "loss": 0.8563, "step": 2640 }, { "epoch": 0.12513622364368632, "grad_norm": 0.6015625, "learning_rate": 0.0001924214003842237, "loss": 1.1063, "step": 2641 }, { "epoch": 0.1251836057806207, "grad_norm": 0.49609375, "learning_rate": 0.0001924157122508888, "loss": 0.0833, "step": 2642 }, { "epoch": 0.12523098791755508, "grad_norm": 0.75, "learning_rate": 0.0001924100220678719, "loss": 1.1375, "step": 2643 }, { "epoch": 0.12527837005448947, "grad_norm": 0.6640625, "learning_rate": 0.0001924043298352991, "loss": 0.0631, "step": 2644 }, { "epoch": 0.12532575219142383, "grad_norm": 0.7421875, "learning_rate": 0.00019239863555329671, "loss": 1.0201, "step": 2645 }, { "epoch": 0.1253731343283582, "grad_norm": 0.94140625, "learning_rate": 0.000192392939221991, "loss": 0.2895, "step": 2646 }, { "epoch": 0.1254205164652926, "grad_norm": 0.578125, "learning_rate": 0.0001923872408415083, "loss": 0.9183, "step": 2647 }, { "epoch": 0.12546789860222696, "grad_norm": 0.185546875, "learning_rate": 0.000192381540411975, "loss": 0.0157, "step": 2648 }, { "epoch": 0.12551528073916132, "grad_norm": 0.5078125, "learning_rate": 0.00019237583793351758, "loss": 0.6224, "step": 2649 }, { "epoch": 0.12556266287609572, "grad_norm": 0.44921875, "learning_rate": 0.0001923701334062624, "loss": 0.5273, "step": 2650 }, { "epoch": 0.12561004501303008, "grad_norm": 0.56640625, "learning_rate": 0.0001923644268303361, "loss": 0.8104, "step": 2651 }, { "epoch": 0.12565742714996447, "grad_norm": 0.609375, "learning_rate": 0.00019235871820586517, "loss": 0.8764, "step": 2652 }, { "epoch": 0.12570480928689884, "grad_norm": 0.60546875, "learning_rate": 0.00019235300753297626, "loss": 0.928, "step": 2653 }, { "epoch": 0.1257521914238332, "grad_norm": 0.51171875, "learning_rate": 0.000192347294811796, "loss": 0.51, "step": 2654 }, { "epoch": 0.1257995735607676, "grad_norm": 0.5859375, "learning_rate": 0.00019234158004245112, "loss": 1.4853, "step": 2655 }, { "epoch": 0.12584695569770196, "grad_norm": 0.8046875, "learning_rate": 0.00019233586322506832, "loss": 1.1049, "step": 2656 }, { "epoch": 0.12589433783463636, "grad_norm": 0.39453125, "learning_rate": 0.00019233014435977448, "loss": 0.4638, "step": 2657 }, { "epoch": 0.12594171997157072, "grad_norm": 0.8046875, "learning_rate": 0.00019232442344669634, "loss": 0.6833, "step": 2658 }, { "epoch": 0.12598910210850509, "grad_norm": 0.50390625, "learning_rate": 0.00019231870048596085, "loss": 0.0987, "step": 2659 }, { "epoch": 0.12603648424543948, "grad_norm": 0.84375, "learning_rate": 0.00019231297547769492, "loss": 0.349, "step": 2660 }, { "epoch": 0.12608386638237384, "grad_norm": 0.546875, "learning_rate": 0.0001923072484220255, "loss": 1.1312, "step": 2661 }, { "epoch": 0.1261312485193082, "grad_norm": 0.5, "learning_rate": 0.00019230151931907963, "loss": 1.0402, "step": 2662 }, { "epoch": 0.1261786306562426, "grad_norm": 0.53125, "learning_rate": 0.0001922957881689844, "loss": 1.4235, "step": 2663 }, { "epoch": 0.12622601279317697, "grad_norm": 0.6484375, "learning_rate": 0.00019229005497186687, "loss": 1.2282, "step": 2664 }, { "epoch": 0.12627339493011136, "grad_norm": 0.671875, "learning_rate": 0.00019228431972785425, "loss": 0.1748, "step": 2665 }, { "epoch": 0.12632077706704573, "grad_norm": 0.38671875, "learning_rate": 0.00019227858243707371, "loss": 0.4302, "step": 2666 }, { "epoch": 0.1263681592039801, "grad_norm": 0.55859375, "learning_rate": 0.0001922728430996525, "loss": 0.7094, "step": 2667 }, { "epoch": 0.12641554134091448, "grad_norm": 0.58984375, "learning_rate": 0.00019226710171571791, "loss": 0.7875, "step": 2668 }, { "epoch": 0.12646292347784885, "grad_norm": 0.5546875, "learning_rate": 0.00019226135828539728, "loss": 0.8099, "step": 2669 }, { "epoch": 0.12651030561478321, "grad_norm": 0.291015625, "learning_rate": 0.000192255612808818, "loss": 0.1927, "step": 2670 }, { "epoch": 0.1265576877517176, "grad_norm": 0.78125, "learning_rate": 0.0001922498652861075, "loss": 0.7767, "step": 2671 }, { "epoch": 0.12660506988865197, "grad_norm": 0.6171875, "learning_rate": 0.0001922441157173932, "loss": 0.8704, "step": 2672 }, { "epoch": 0.12665245202558637, "grad_norm": 0.59375, "learning_rate": 0.00019223836410280274, "loss": 1.3345, "step": 2673 }, { "epoch": 0.12669983416252073, "grad_norm": 0.5625, "learning_rate": 0.00019223261044246356, "loss": 0.2152, "step": 2674 }, { "epoch": 0.1267472162994551, "grad_norm": 0.498046875, "learning_rate": 0.00019222685473650338, "loss": 0.6204, "step": 2675 }, { "epoch": 0.1267945984363895, "grad_norm": 0.71875, "learning_rate": 0.00019222109698504975, "loss": 0.3218, "step": 2676 }, { "epoch": 0.12684198057332385, "grad_norm": 0.65234375, "learning_rate": 0.00019221533718823044, "loss": 1.485, "step": 2677 }, { "epoch": 0.12688936271025822, "grad_norm": 0.431640625, "learning_rate": 0.00019220957534617314, "loss": 0.0332, "step": 2678 }, { "epoch": 0.1269367448471926, "grad_norm": 0.515625, "learning_rate": 0.0001922038114590057, "loss": 1.5897, "step": 2679 }, { "epoch": 0.12698412698412698, "grad_norm": 0.53125, "learning_rate": 0.00019219804552685595, "loss": 0.7048, "step": 2680 }, { "epoch": 0.12703150912106137, "grad_norm": 0.328125, "learning_rate": 0.00019219227754985175, "loss": 0.0491, "step": 2681 }, { "epoch": 0.12707889125799574, "grad_norm": 1.359375, "learning_rate": 0.000192186507528121, "loss": 0.8848, "step": 2682 }, { "epoch": 0.1271262733949301, "grad_norm": 0.69140625, "learning_rate": 0.00019218073546179172, "loss": 1.0105, "step": 2683 }, { "epoch": 0.1271736555318645, "grad_norm": 0.2470703125, "learning_rate": 0.0001921749613509919, "loss": 0.1505, "step": 2684 }, { "epoch": 0.12722103766879886, "grad_norm": 0.53515625, "learning_rate": 0.00019216918519584966, "loss": 0.5415, "step": 2685 }, { "epoch": 0.12726841980573325, "grad_norm": 0.2265625, "learning_rate": 0.00019216340699649304, "loss": 0.145, "step": 2686 }, { "epoch": 0.12731580194266762, "grad_norm": 0.55078125, "learning_rate": 0.0001921576267530502, "loss": 1.0475, "step": 2687 }, { "epoch": 0.12736318407960198, "grad_norm": 0.69140625, "learning_rate": 0.00019215184446564942, "loss": 0.1473, "step": 2688 }, { "epoch": 0.12741056621653638, "grad_norm": 0.5078125, "learning_rate": 0.00019214606013441885, "loss": 0.7821, "step": 2689 }, { "epoch": 0.12745794835347074, "grad_norm": 0.609375, "learning_rate": 0.0001921402737594868, "loss": 0.5792, "step": 2690 }, { "epoch": 0.1275053304904051, "grad_norm": 0.65625, "learning_rate": 0.00019213448534098165, "loss": 0.8598, "step": 2691 }, { "epoch": 0.1275527126273395, "grad_norm": 0.298828125, "learning_rate": 0.00019212869487903174, "loss": 0.1843, "step": 2692 }, { "epoch": 0.12760009476427386, "grad_norm": 0.431640625, "learning_rate": 0.0001921229023737655, "loss": 0.3087, "step": 2693 }, { "epoch": 0.12764747690120826, "grad_norm": 0.625, "learning_rate": 0.00019211710782531143, "loss": 0.8627, "step": 2694 }, { "epoch": 0.12769485903814262, "grad_norm": 0.040771484375, "learning_rate": 0.00019211131123379803, "loss": 0.0027, "step": 2695 }, { "epoch": 0.127742241175077, "grad_norm": 0.5390625, "learning_rate": 0.00019210551259935384, "loss": 0.7263, "step": 2696 }, { "epoch": 0.12778962331201138, "grad_norm": 0.7265625, "learning_rate": 0.00019209971192210747, "loss": 1.5114, "step": 2697 }, { "epoch": 0.12783700544894575, "grad_norm": 0.5078125, "learning_rate": 0.0001920939092021876, "loss": 1.0862, "step": 2698 }, { "epoch": 0.1278843875858801, "grad_norm": 0.62890625, "learning_rate": 0.00019208810443972296, "loss": 1.0623, "step": 2699 }, { "epoch": 0.1279317697228145, "grad_norm": 0.51953125, "learning_rate": 0.00019208229763484222, "loss": 1.2576, "step": 2700 }, { "epoch": 0.12797915185974887, "grad_norm": 0.51171875, "learning_rate": 0.00019207648878767422, "loss": 0.7411, "step": 2701 }, { "epoch": 0.12802653399668326, "grad_norm": 0.57421875, "learning_rate": 0.00019207067789834775, "loss": 1.1092, "step": 2702 }, { "epoch": 0.12807391613361763, "grad_norm": 0.828125, "learning_rate": 0.0001920648649669917, "loss": 1.7907, "step": 2703 }, { "epoch": 0.128121298270552, "grad_norm": 0.7109375, "learning_rate": 0.00019205904999373502, "loss": 1.2354, "step": 2704 }, { "epoch": 0.12816868040748639, "grad_norm": 0.86328125, "learning_rate": 0.00019205323297870667, "loss": 0.4706, "step": 2705 }, { "epoch": 0.12821606254442075, "grad_norm": 0.609375, "learning_rate": 0.00019204741392203568, "loss": 1.2389, "step": 2706 }, { "epoch": 0.12826344468135512, "grad_norm": 0.66796875, "learning_rate": 0.00019204159282385107, "loss": 1.2217, "step": 2707 }, { "epoch": 0.1283108268182895, "grad_norm": 0.3671875, "learning_rate": 0.000192035769684282, "loss": 0.4758, "step": 2708 }, { "epoch": 0.12835820895522387, "grad_norm": 0.42578125, "learning_rate": 0.00019202994450345757, "loss": 1.1666, "step": 2709 }, { "epoch": 0.12840559109215827, "grad_norm": 0.61328125, "learning_rate": 0.00019202411728150702, "loss": 1.061, "step": 2710 }, { "epoch": 0.12845297322909263, "grad_norm": 1.046875, "learning_rate": 0.0001920182880185595, "loss": 1.5762, "step": 2711 }, { "epoch": 0.128500355366027, "grad_norm": 0.5625, "learning_rate": 0.00019201245671474445, "loss": 1.1611, "step": 2712 }, { "epoch": 0.1285477375029614, "grad_norm": 0.5625, "learning_rate": 0.00019200662337019107, "loss": 1.3035, "step": 2713 }, { "epoch": 0.12859511963989576, "grad_norm": 0.3203125, "learning_rate": 0.0001920007879850288, "loss": 0.1655, "step": 2714 }, { "epoch": 0.12864250177683015, "grad_norm": 0.5390625, "learning_rate": 0.00019199495055938703, "loss": 0.8907, "step": 2715 }, { "epoch": 0.1286898839137645, "grad_norm": 0.55078125, "learning_rate": 0.00019198911109339525, "loss": 0.8073, "step": 2716 }, { "epoch": 0.12873726605069888, "grad_norm": 0.43359375, "learning_rate": 0.000191983269587183, "loss": 0.2654, "step": 2717 }, { "epoch": 0.12878464818763327, "grad_norm": 0.6171875, "learning_rate": 0.0001919774260408798, "loss": 0.3346, "step": 2718 }, { "epoch": 0.12883203032456764, "grad_norm": 0.68359375, "learning_rate": 0.00019197158045461524, "loss": 0.1698, "step": 2719 }, { "epoch": 0.128879412461502, "grad_norm": 0.353515625, "learning_rate": 0.00019196573282851897, "loss": 0.0602, "step": 2720 }, { "epoch": 0.1289267945984364, "grad_norm": 0.251953125, "learning_rate": 0.00019195988316272073, "loss": 0.1667, "step": 2721 }, { "epoch": 0.12897417673537076, "grad_norm": 0.73046875, "learning_rate": 0.00019195403145735025, "loss": 0.5652, "step": 2722 }, { "epoch": 0.12902155887230515, "grad_norm": 0.431640625, "learning_rate": 0.00019194817771253726, "loss": 0.7821, "step": 2723 }, { "epoch": 0.12906894100923952, "grad_norm": 0.57421875, "learning_rate": 0.00019194232192841165, "loss": 0.9453, "step": 2724 }, { "epoch": 0.12911632314617388, "grad_norm": 0.91796875, "learning_rate": 0.00019193646410510325, "loss": 0.2774, "step": 2725 }, { "epoch": 0.12916370528310828, "grad_norm": 0.49609375, "learning_rate": 0.00019193060424274204, "loss": 0.0525, "step": 2726 }, { "epoch": 0.12921108742004264, "grad_norm": 0.3125, "learning_rate": 0.0001919247423414579, "loss": 0.1739, "step": 2727 }, { "epoch": 0.129258469556977, "grad_norm": 0.52734375, "learning_rate": 0.00019191887840138092, "loss": 0.6484, "step": 2728 }, { "epoch": 0.1293058516939114, "grad_norm": 1.3828125, "learning_rate": 0.0001919130124226411, "loss": 1.1583, "step": 2729 }, { "epoch": 0.12935323383084577, "grad_norm": 0.3828125, "learning_rate": 0.0001919071444053686, "loss": 0.7938, "step": 2730 }, { "epoch": 0.12940061596778016, "grad_norm": 0.62890625, "learning_rate": 0.0001919012743496935, "loss": 0.8336, "step": 2731 }, { "epoch": 0.12944799810471452, "grad_norm": 0.80859375, "learning_rate": 0.00019189540225574608, "loss": 0.5599, "step": 2732 }, { "epoch": 0.1294953802416489, "grad_norm": 0.61328125, "learning_rate": 0.00019188952812365645, "loss": 1.0495, "step": 2733 }, { "epoch": 0.12954276237858328, "grad_norm": 0.1865234375, "learning_rate": 0.000191883651953555, "loss": 0.0177, "step": 2734 }, { "epoch": 0.12959014451551765, "grad_norm": 0.54296875, "learning_rate": 0.000191877773745572, "loss": 0.4574, "step": 2735 }, { "epoch": 0.129637526652452, "grad_norm": 0.69921875, "learning_rate": 0.00019187189349983787, "loss": 0.2015, "step": 2736 }, { "epoch": 0.1296849087893864, "grad_norm": 0.62109375, "learning_rate": 0.000191866011216483, "loss": 1.0366, "step": 2737 }, { "epoch": 0.12973229092632077, "grad_norm": 0.494140625, "learning_rate": 0.00019186012689563785, "loss": 0.2846, "step": 2738 }, { "epoch": 0.12977967306325516, "grad_norm": 0.8046875, "learning_rate": 0.0001918542405374329, "loss": 1.2374, "step": 2739 }, { "epoch": 0.12982705520018953, "grad_norm": 0.2734375, "learning_rate": 0.00019184835214199874, "loss": 0.2086, "step": 2740 }, { "epoch": 0.1298744373371239, "grad_norm": 0.6484375, "learning_rate": 0.00019184246170946597, "loss": 1.0787, "step": 2741 }, { "epoch": 0.1299218194740583, "grad_norm": 0.55078125, "learning_rate": 0.00019183656923996525, "loss": 0.8791, "step": 2742 }, { "epoch": 0.12996920161099265, "grad_norm": 0.447265625, "learning_rate": 0.00019183067473362722, "loss": 0.7641, "step": 2743 }, { "epoch": 0.13001658374792704, "grad_norm": 0.376953125, "learning_rate": 0.00019182477819058264, "loss": 0.735, "step": 2744 }, { "epoch": 0.1300639658848614, "grad_norm": 0.7890625, "learning_rate": 0.0001918188796109623, "loss": 0.598, "step": 2745 }, { "epoch": 0.13011134802179578, "grad_norm": 0.5703125, "learning_rate": 0.00019181297899489698, "loss": 0.1809, "step": 2746 }, { "epoch": 0.13015873015873017, "grad_norm": 0.6328125, "learning_rate": 0.00019180707634251762, "loss": 1.3001, "step": 2747 }, { "epoch": 0.13020611229566453, "grad_norm": 0.0234375, "learning_rate": 0.0001918011716539551, "loss": 0.0017, "step": 2748 }, { "epoch": 0.1302534944325989, "grad_norm": 1.0, "learning_rate": 0.00019179526492934032, "loss": 0.2991, "step": 2749 }, { "epoch": 0.1303008765695333, "grad_norm": 0.47265625, "learning_rate": 0.00019178935616880435, "loss": 0.6896, "step": 2750 }, { "epoch": 0.13034825870646766, "grad_norm": 0.6796875, "learning_rate": 0.00019178344537247827, "loss": 1.1772, "step": 2751 }, { "epoch": 0.13039564084340205, "grad_norm": 0.71875, "learning_rate": 0.0001917775325404931, "loss": 0.8827, "step": 2752 }, { "epoch": 0.13044302298033641, "grad_norm": 0.87890625, "learning_rate": 0.00019177161767298004, "loss": 0.3459, "step": 2753 }, { "epoch": 0.13049040511727078, "grad_norm": 0.3203125, "learning_rate": 0.00019176570077007025, "loss": 0.0461, "step": 2754 }, { "epoch": 0.13053778725420517, "grad_norm": 0.59375, "learning_rate": 0.00019175978183189494, "loss": 1.1708, "step": 2755 }, { "epoch": 0.13058516939113954, "grad_norm": 0.515625, "learning_rate": 0.00019175386085858542, "loss": 0.1037, "step": 2756 }, { "epoch": 0.1306325515280739, "grad_norm": 0.59375, "learning_rate": 0.000191747937850273, "loss": 1.2245, "step": 2757 }, { "epoch": 0.1306799336650083, "grad_norm": 0.5390625, "learning_rate": 0.00019174201280708903, "loss": 0.968, "step": 2758 }, { "epoch": 0.13072731580194266, "grad_norm": 0.609375, "learning_rate": 0.00019173608572916497, "loss": 0.7292, "step": 2759 }, { "epoch": 0.13077469793887705, "grad_norm": 0.578125, "learning_rate": 0.00019173015661663222, "loss": 1.3887, "step": 2760 }, { "epoch": 0.13082208007581142, "grad_norm": 0.5703125, "learning_rate": 0.0001917242254696223, "loss": 1.1602, "step": 2761 }, { "epoch": 0.13086946221274579, "grad_norm": 0.62890625, "learning_rate": 0.00019171829228826676, "loss": 1.1013, "step": 2762 }, { "epoch": 0.13091684434968018, "grad_norm": 0.388671875, "learning_rate": 0.00019171235707269717, "loss": 0.5124, "step": 2763 }, { "epoch": 0.13096422648661454, "grad_norm": 0.322265625, "learning_rate": 0.00019170641982304524, "loss": 0.2174, "step": 2764 }, { "epoch": 0.1310116086235489, "grad_norm": 0.578125, "learning_rate": 0.00019170048053944255, "loss": 0.9312, "step": 2765 }, { "epoch": 0.1310589907604833, "grad_norm": 0.0203857421875, "learning_rate": 0.0001916945392220209, "loss": 0.0015, "step": 2766 }, { "epoch": 0.13110637289741767, "grad_norm": 1.171875, "learning_rate": 0.00019168859587091205, "loss": 0.5787, "step": 2767 }, { "epoch": 0.13115375503435206, "grad_norm": 0.515625, "learning_rate": 0.00019168265048624775, "loss": 0.7287, "step": 2768 }, { "epoch": 0.13120113717128642, "grad_norm": 0.1630859375, "learning_rate": 0.00019167670306815998, "loss": 0.0285, "step": 2769 }, { "epoch": 0.1312485193082208, "grad_norm": 0.58984375, "learning_rate": 0.00019167075361678054, "loss": 0.8246, "step": 2770 }, { "epoch": 0.13129590144515518, "grad_norm": 0.486328125, "learning_rate": 0.00019166480213224146, "loss": 0.5416, "step": 2771 }, { "epoch": 0.13134328358208955, "grad_norm": 0.2314453125, "learning_rate": 0.00019165884861467472, "loss": 0.0086, "step": 2772 }, { "epoch": 0.13139066571902394, "grad_norm": 0.59375, "learning_rate": 0.00019165289306421232, "loss": 1.7462, "step": 2773 }, { "epoch": 0.1314380478559583, "grad_norm": 0.59375, "learning_rate": 0.0001916469354809864, "loss": 1.0287, "step": 2774 }, { "epoch": 0.13148542999289267, "grad_norm": 0.10546875, "learning_rate": 0.00019164097586512902, "loss": 0.0097, "step": 2775 }, { "epoch": 0.13153281212982706, "grad_norm": 0.5703125, "learning_rate": 0.00019163501421677243, "loss": 1.4429, "step": 2776 }, { "epoch": 0.13158019426676143, "grad_norm": 0.75, "learning_rate": 0.00019162905053604886, "loss": 1.0607, "step": 2777 }, { "epoch": 0.1316275764036958, "grad_norm": 0.41015625, "learning_rate": 0.00019162308482309053, "loss": 0.9388, "step": 2778 }, { "epoch": 0.1316749585406302, "grad_norm": 0.75390625, "learning_rate": 0.00019161711707802977, "loss": 0.1465, "step": 2779 }, { "epoch": 0.13172234067756455, "grad_norm": 0.625, "learning_rate": 0.00019161114730099893, "loss": 1.1465, "step": 2780 }, { "epoch": 0.13176972281449895, "grad_norm": 0.314453125, "learning_rate": 0.00019160517549213042, "loss": 0.1622, "step": 2781 }, { "epoch": 0.1318171049514333, "grad_norm": 0.578125, "learning_rate": 0.00019159920165155668, "loss": 1.3999, "step": 2782 }, { "epoch": 0.13186448708836768, "grad_norm": 0.828125, "learning_rate": 0.00019159322577941024, "loss": 0.3647, "step": 2783 }, { "epoch": 0.13191186922530207, "grad_norm": 0.578125, "learning_rate": 0.00019158724787582363, "loss": 1.1849, "step": 2784 }, { "epoch": 0.13195925136223643, "grad_norm": 0.54296875, "learning_rate": 0.0001915812679409294, "loss": 0.7259, "step": 2785 }, { "epoch": 0.1320066334991708, "grad_norm": 0.54296875, "learning_rate": 0.00019157528597486017, "loss": 0.9717, "step": 2786 }, { "epoch": 0.1320540156361052, "grad_norm": 0.380859375, "learning_rate": 0.00019156930197774866, "loss": 0.0891, "step": 2787 }, { "epoch": 0.13210139777303956, "grad_norm": 1.0859375, "learning_rate": 0.00019156331594972757, "loss": 0.2622, "step": 2788 }, { "epoch": 0.13214877990997395, "grad_norm": 0.53515625, "learning_rate": 0.00019155732789092965, "loss": 1.1889, "step": 2789 }, { "epoch": 0.13219616204690832, "grad_norm": 0.5078125, "learning_rate": 0.00019155133780148772, "loss": 0.9256, "step": 2790 }, { "epoch": 0.13224354418384268, "grad_norm": 0.6015625, "learning_rate": 0.00019154534568153464, "loss": 0.9825, "step": 2791 }, { "epoch": 0.13229092632077707, "grad_norm": 0.578125, "learning_rate": 0.0001915393515312033, "loss": 1.0119, "step": 2792 }, { "epoch": 0.13233830845771144, "grad_norm": 0.546875, "learning_rate": 0.00019153335535062666, "loss": 0.9351, "step": 2793 }, { "epoch": 0.1323856905946458, "grad_norm": 0.50390625, "learning_rate": 0.00019152735713993767, "loss": 0.7316, "step": 2794 }, { "epoch": 0.1324330727315802, "grad_norm": 0.60546875, "learning_rate": 0.0001915213568992694, "loss": 0.9324, "step": 2795 }, { "epoch": 0.13248045486851456, "grad_norm": 0.5234375, "learning_rate": 0.00019151535462875495, "loss": 0.7162, "step": 2796 }, { "epoch": 0.13252783700544896, "grad_norm": 0.470703125, "learning_rate": 0.00019150935032852736, "loss": 0.7289, "step": 2797 }, { "epoch": 0.13257521914238332, "grad_norm": 0.61328125, "learning_rate": 0.0001915033439987199, "loss": 1.1233, "step": 2798 }, { "epoch": 0.1326226012793177, "grad_norm": 0.73046875, "learning_rate": 0.00019149733563946568, "loss": 0.9656, "step": 2799 }, { "epoch": 0.13266998341625208, "grad_norm": 0.5546875, "learning_rate": 0.00019149132525089806, "loss": 1.158, "step": 2800 }, { "epoch": 0.13271736555318644, "grad_norm": 0.58984375, "learning_rate": 0.00019148531283315028, "loss": 0.8042, "step": 2801 }, { "epoch": 0.13276474769012084, "grad_norm": 0.6953125, "learning_rate": 0.00019147929838635573, "loss": 0.2657, "step": 2802 }, { "epoch": 0.1328121298270552, "grad_norm": 0.443359375, "learning_rate": 0.00019147328191064774, "loss": 0.6868, "step": 2803 }, { "epoch": 0.13285951196398957, "grad_norm": 0.5625, "learning_rate": 0.0001914672634061598, "loss": 0.9452, "step": 2804 }, { "epoch": 0.13290689410092396, "grad_norm": 0.408203125, "learning_rate": 0.0001914612428730254, "loss": 0.5108, "step": 2805 }, { "epoch": 0.13295427623785833, "grad_norm": 0.486328125, "learning_rate": 0.00019145522031137807, "loss": 0.863, "step": 2806 }, { "epoch": 0.1330016583747927, "grad_norm": 0.51171875, "learning_rate": 0.00019144919572135135, "loss": 0.0661, "step": 2807 }, { "epoch": 0.13304904051172708, "grad_norm": 0.5234375, "learning_rate": 0.00019144316910307883, "loss": 0.8761, "step": 2808 }, { "epoch": 0.13309642264866145, "grad_norm": 0.447265625, "learning_rate": 0.00019143714045669427, "loss": 0.0686, "step": 2809 }, { "epoch": 0.13314380478559584, "grad_norm": 0.2392578125, "learning_rate": 0.00019143110978233133, "loss": 0.0148, "step": 2810 }, { "epoch": 0.1331911869225302, "grad_norm": 0.49609375, "learning_rate": 0.00019142507708012376, "loss": 0.0215, "step": 2811 }, { "epoch": 0.13323856905946457, "grad_norm": 0.10595703125, "learning_rate": 0.00019141904235020537, "loss": 0.0066, "step": 2812 }, { "epoch": 0.13328595119639897, "grad_norm": 0.53125, "learning_rate": 0.00019141300559270995, "loss": 0.6714, "step": 2813 }, { "epoch": 0.13333333333333333, "grad_norm": 0.51171875, "learning_rate": 0.00019140696680777146, "loss": 0.7004, "step": 2814 }, { "epoch": 0.1333807154702677, "grad_norm": 0.8671875, "learning_rate": 0.00019140092599552385, "loss": 1.147, "step": 2815 }, { "epoch": 0.1334280976072021, "grad_norm": 0.10400390625, "learning_rate": 0.000191394883156101, "loss": 0.0055, "step": 2816 }, { "epoch": 0.13347547974413645, "grad_norm": 0.87890625, "learning_rate": 0.000191388838289637, "loss": 0.4192, "step": 2817 }, { "epoch": 0.13352286188107085, "grad_norm": 1.90625, "learning_rate": 0.00019138279139626594, "loss": 0.714, "step": 2818 }, { "epoch": 0.1335702440180052, "grad_norm": 0.59375, "learning_rate": 0.00019137674247612186, "loss": 0.732, "step": 2819 }, { "epoch": 0.13361762615493958, "grad_norm": 0.455078125, "learning_rate": 0.000191370691529339, "loss": 0.4689, "step": 2820 }, { "epoch": 0.13366500829187397, "grad_norm": 0.62890625, "learning_rate": 0.0001913646385560515, "loss": 0.087, "step": 2821 }, { "epoch": 0.13371239042880834, "grad_norm": 0.55859375, "learning_rate": 0.00019135858355639367, "loss": 0.7453, "step": 2822 }, { "epoch": 0.1337597725657427, "grad_norm": 0.6015625, "learning_rate": 0.00019135252653049975, "loss": 0.992, "step": 2823 }, { "epoch": 0.1338071547026771, "grad_norm": 0.67578125, "learning_rate": 0.00019134646747850408, "loss": 0.8448, "step": 2824 }, { "epoch": 0.13385453683961146, "grad_norm": 0.57421875, "learning_rate": 0.00019134040640054106, "loss": 0.6233, "step": 2825 }, { "epoch": 0.13390191897654585, "grad_norm": 0.01019287109375, "learning_rate": 0.00019133434329674512, "loss": 0.0008, "step": 2826 }, { "epoch": 0.13394930111348022, "grad_norm": 0.6640625, "learning_rate": 0.00019132827816725073, "loss": 1.0276, "step": 2827 }, { "epoch": 0.13399668325041458, "grad_norm": 0.546875, "learning_rate": 0.00019132221101219243, "loss": 1.1445, "step": 2828 }, { "epoch": 0.13404406538734898, "grad_norm": 0.337890625, "learning_rate": 0.00019131614183170477, "loss": 0.0918, "step": 2829 }, { "epoch": 0.13409144752428334, "grad_norm": 0.4765625, "learning_rate": 0.00019131007062592233, "loss": 0.8945, "step": 2830 }, { "epoch": 0.13413882966121773, "grad_norm": 0.453125, "learning_rate": 0.00019130399739497977, "loss": 0.3067, "step": 2831 }, { "epoch": 0.1341862117981521, "grad_norm": 0.609375, "learning_rate": 0.00019129792213901182, "loss": 1.005, "step": 2832 }, { "epoch": 0.13423359393508646, "grad_norm": 0.302734375, "learning_rate": 0.0001912918448581532, "loss": 0.0211, "step": 2833 }, { "epoch": 0.13428097607202086, "grad_norm": 0.72265625, "learning_rate": 0.00019128576555253868, "loss": 1.0393, "step": 2834 }, { "epoch": 0.13432835820895522, "grad_norm": 0.5546875, "learning_rate": 0.00019127968422230315, "loss": 0.6882, "step": 2835 }, { "epoch": 0.1343757403458896, "grad_norm": 0.0927734375, "learning_rate": 0.0001912736008675814, "loss": 0.0056, "step": 2836 }, { "epoch": 0.13442312248282398, "grad_norm": 0.63671875, "learning_rate": 0.00019126751548850844, "loss": 1.6895, "step": 2837 }, { "epoch": 0.13447050461975835, "grad_norm": 0.84375, "learning_rate": 0.00019126142808521918, "loss": 0.7938, "step": 2838 }, { "epoch": 0.13451788675669274, "grad_norm": 0.609375, "learning_rate": 0.00019125533865784868, "loss": 1.026, "step": 2839 }, { "epoch": 0.1345652688936271, "grad_norm": 0.0155029296875, "learning_rate": 0.00019124924720653196, "loss": 0.0009, "step": 2840 }, { "epoch": 0.13461265103056147, "grad_norm": 0.578125, "learning_rate": 0.0001912431537314041, "loss": 0.8351, "step": 2841 }, { "epoch": 0.13466003316749586, "grad_norm": 0.4921875, "learning_rate": 0.00019123705823260033, "loss": 0.245, "step": 2842 }, { "epoch": 0.13470741530443023, "grad_norm": 0.48046875, "learning_rate": 0.00019123096071025576, "loss": 0.8535, "step": 2843 }, { "epoch": 0.1347547974413646, "grad_norm": 0.59375, "learning_rate": 0.00019122486116450566, "loss": 0.469, "step": 2844 }, { "epoch": 0.134802179578299, "grad_norm": 0.640625, "learning_rate": 0.0001912187595954853, "loss": 0.3835, "step": 2845 }, { "epoch": 0.13484956171523335, "grad_norm": 0.2236328125, "learning_rate": 0.00019121265600333004, "loss": 0.022, "step": 2846 }, { "epoch": 0.13489694385216774, "grad_norm": 0.546875, "learning_rate": 0.0001912065503881752, "loss": 0.1573, "step": 2847 }, { "epoch": 0.1349443259891021, "grad_norm": 0.208984375, "learning_rate": 0.00019120044275015624, "loss": 0.1412, "step": 2848 }, { "epoch": 0.13499170812603647, "grad_norm": 0.54296875, "learning_rate": 0.00019119433308940857, "loss": 1.1661, "step": 2849 }, { "epoch": 0.13503909026297087, "grad_norm": 0.76953125, "learning_rate": 0.00019118822140606776, "loss": 1.1511, "step": 2850 }, { "epoch": 0.13508647239990523, "grad_norm": 1.1875, "learning_rate": 0.0001911821077002693, "loss": 0.3723, "step": 2851 }, { "epoch": 0.1351338545368396, "grad_norm": 0.8046875, "learning_rate": 0.00019117599197214884, "loss": 0.1053, "step": 2852 }, { "epoch": 0.135181236673774, "grad_norm": 0.26171875, "learning_rate": 0.00019116987422184198, "loss": 0.1539, "step": 2853 }, { "epoch": 0.13522861881070836, "grad_norm": 0.59765625, "learning_rate": 0.0001911637544494844, "loss": 0.8968, "step": 2854 }, { "epoch": 0.13527600094764275, "grad_norm": 0.69921875, "learning_rate": 0.0001911576326552119, "loss": 0.7788, "step": 2855 }, { "epoch": 0.13532338308457711, "grad_norm": 0.002777099609375, "learning_rate": 0.00019115150883916018, "loss": 0.0002, "step": 2856 }, { "epoch": 0.13537076522151148, "grad_norm": 0.41015625, "learning_rate": 0.00019114538300146508, "loss": 0.1204, "step": 2857 }, { "epoch": 0.13541814735844587, "grad_norm": 0.400390625, "learning_rate": 0.00019113925514226246, "loss": 0.0465, "step": 2858 }, { "epoch": 0.13546552949538024, "grad_norm": 0.55078125, "learning_rate": 0.00019113312526168827, "loss": 0.9245, "step": 2859 }, { "epoch": 0.13551291163231463, "grad_norm": 0.796875, "learning_rate": 0.00019112699335987842, "loss": 0.3414, "step": 2860 }, { "epoch": 0.135560293769249, "grad_norm": 0.49609375, "learning_rate": 0.00019112085943696888, "loss": 0.7549, "step": 2861 }, { "epoch": 0.13560767590618336, "grad_norm": 0.5859375, "learning_rate": 0.00019111472349309577, "loss": 0.6086, "step": 2862 }, { "epoch": 0.13565505804311775, "grad_norm": 0.470703125, "learning_rate": 0.00019110858552839517, "loss": 0.5047, "step": 2863 }, { "epoch": 0.13570244018005212, "grad_norm": 0.55859375, "learning_rate": 0.00019110244554300314, "loss": 0.9906, "step": 2864 }, { "epoch": 0.13574982231698648, "grad_norm": 1.015625, "learning_rate": 0.00019109630353705592, "loss": 0.1466, "step": 2865 }, { "epoch": 0.13579720445392088, "grad_norm": 0.15234375, "learning_rate": 0.00019109015951068975, "loss": 0.0174, "step": 2866 }, { "epoch": 0.13584458659085524, "grad_norm": 0.2421875, "learning_rate": 0.00019108401346404084, "loss": 0.0408, "step": 2867 }, { "epoch": 0.13589196872778964, "grad_norm": 0.69140625, "learning_rate": 0.00019107786539724554, "loss": 1.2381, "step": 2868 }, { "epoch": 0.135939350864724, "grad_norm": 0.5859375, "learning_rate": 0.00019107171531044018, "loss": 0.967, "step": 2869 }, { "epoch": 0.13598673300165837, "grad_norm": 0.2197265625, "learning_rate": 0.0001910655632037612, "loss": 0.0303, "step": 2870 }, { "epoch": 0.13603411513859276, "grad_norm": 0.65234375, "learning_rate": 0.00019105940907734498, "loss": 0.6367, "step": 2871 }, { "epoch": 0.13608149727552712, "grad_norm": 0.67578125, "learning_rate": 0.00019105325293132812, "loss": 0.944, "step": 2872 }, { "epoch": 0.1361288794124615, "grad_norm": 0.78515625, "learning_rate": 0.00019104709476584707, "loss": 1.4161, "step": 2873 }, { "epoch": 0.13617626154939588, "grad_norm": 0.44921875, "learning_rate": 0.00019104093458103841, "loss": 0.6082, "step": 2874 }, { "epoch": 0.13622364368633025, "grad_norm": 0.59375, "learning_rate": 0.00019103477237703885, "loss": 1.1064, "step": 2875 }, { "epoch": 0.13627102582326464, "grad_norm": 0.02099609375, "learning_rate": 0.00019102860815398495, "loss": 0.0011, "step": 2876 }, { "epoch": 0.136318407960199, "grad_norm": 0.482421875, "learning_rate": 0.0001910224419120135, "loss": 0.8008, "step": 2877 }, { "epoch": 0.13636579009713337, "grad_norm": 0.451171875, "learning_rate": 0.00019101627365126125, "loss": 0.0692, "step": 2878 }, { "epoch": 0.13641317223406776, "grad_norm": 0.2177734375, "learning_rate": 0.000191010103371865, "loss": 0.0145, "step": 2879 }, { "epoch": 0.13646055437100213, "grad_norm": 1.1328125, "learning_rate": 0.00019100393107396158, "loss": 0.2141, "step": 2880 }, { "epoch": 0.1365079365079365, "grad_norm": 0.3203125, "learning_rate": 0.00019099775675768791, "loss": 0.0824, "step": 2881 }, { "epoch": 0.1365553186448709, "grad_norm": 0.609375, "learning_rate": 0.00019099158042318096, "loss": 0.1288, "step": 2882 }, { "epoch": 0.13660270078180525, "grad_norm": 0.00469970703125, "learning_rate": 0.0001909854020705776, "loss": 0.0003, "step": 2883 }, { "epoch": 0.13665008291873965, "grad_norm": 0.671875, "learning_rate": 0.00019097922170001498, "loss": 0.8375, "step": 2884 }, { "epoch": 0.136697465055674, "grad_norm": 0.48828125, "learning_rate": 0.00019097303931163014, "loss": 1.0394, "step": 2885 }, { "epoch": 0.13674484719260838, "grad_norm": 0.52734375, "learning_rate": 0.00019096685490556018, "loss": 0.7349, "step": 2886 }, { "epoch": 0.13679222932954277, "grad_norm": 0.74609375, "learning_rate": 0.00019096066848194225, "loss": 1.3313, "step": 2887 }, { "epoch": 0.13683961146647713, "grad_norm": 0.74609375, "learning_rate": 0.00019095448004091358, "loss": 1.1952, "step": 2888 }, { "epoch": 0.13688699360341153, "grad_norm": 0.044189453125, "learning_rate": 0.00019094828958261145, "loss": 0.002, "step": 2889 }, { "epoch": 0.1369343757403459, "grad_norm": 0.625, "learning_rate": 0.0001909420971071731, "loss": 1.0378, "step": 2890 }, { "epoch": 0.13698175787728026, "grad_norm": 0.55859375, "learning_rate": 0.00019093590261473592, "loss": 1.2131, "step": 2891 }, { "epoch": 0.13702914001421465, "grad_norm": 1.078125, "learning_rate": 0.0001909297061054373, "loss": 0.2519, "step": 2892 }, { "epoch": 0.13707652215114902, "grad_norm": 0.36328125, "learning_rate": 0.00019092350757941463, "loss": 0.1355, "step": 2893 }, { "epoch": 0.13712390428808338, "grad_norm": 0.55078125, "learning_rate": 0.0001909173070368054, "loss": 0.0684, "step": 2894 }, { "epoch": 0.13717128642501777, "grad_norm": 0.59765625, "learning_rate": 0.0001909111044777472, "loss": 1.0816, "step": 2895 }, { "epoch": 0.13721866856195214, "grad_norm": 0.59765625, "learning_rate": 0.00019090489990237746, "loss": 0.1926, "step": 2896 }, { "epoch": 0.13726605069888653, "grad_norm": 0.5546875, "learning_rate": 0.0001908986933108339, "loss": 0.7542, "step": 2897 }, { "epoch": 0.1373134328358209, "grad_norm": 1.171875, "learning_rate": 0.00019089248470325414, "loss": 1.1729, "step": 2898 }, { "epoch": 0.13736081497275526, "grad_norm": 0.5859375, "learning_rate": 0.00019088627407977588, "loss": 1.0235, "step": 2899 }, { "epoch": 0.13740819710968966, "grad_norm": 0.482421875, "learning_rate": 0.00019088006144053686, "loss": 1.096, "step": 2900 }, { "epoch": 0.13745557924662402, "grad_norm": 0.294921875, "learning_rate": 0.0001908738467856749, "loss": 0.1479, "step": 2901 }, { "epoch": 0.1375029613835584, "grad_norm": 0.5234375, "learning_rate": 0.0001908676301153278, "loss": 0.6035, "step": 2902 }, { "epoch": 0.13755034352049278, "grad_norm": 0.2001953125, "learning_rate": 0.0001908614114296335, "loss": 0.0202, "step": 2903 }, { "epoch": 0.13759772565742714, "grad_norm": 0.271484375, "learning_rate": 0.0001908551907287298, "loss": 0.1763, "step": 2904 }, { "epoch": 0.13764510779436154, "grad_norm": 0.640625, "learning_rate": 0.00019084896801275478, "loss": 1.3041, "step": 2905 }, { "epoch": 0.1376924899312959, "grad_norm": 0.498046875, "learning_rate": 0.0001908427432818464, "loss": 1.2016, "step": 2906 }, { "epoch": 0.13773987206823027, "grad_norm": 0.59765625, "learning_rate": 0.00019083651653614277, "loss": 1.1691, "step": 2907 }, { "epoch": 0.13778725420516466, "grad_norm": 0.47265625, "learning_rate": 0.00019083028777578192, "loss": 0.8341, "step": 2908 }, { "epoch": 0.13783463634209903, "grad_norm": 0.244140625, "learning_rate": 0.00019082405700090207, "loss": 0.1651, "step": 2909 }, { "epoch": 0.1378820184790334, "grad_norm": 0.578125, "learning_rate": 0.00019081782421164137, "loss": 1.4576, "step": 2910 }, { "epoch": 0.13792940061596778, "grad_norm": 0.5859375, "learning_rate": 0.00019081158940813806, "loss": 0.9515, "step": 2911 }, { "epoch": 0.13797678275290215, "grad_norm": 0.435546875, "learning_rate": 0.0001908053525905304, "loss": 0.2158, "step": 2912 }, { "epoch": 0.13802416488983654, "grad_norm": 0.7265625, "learning_rate": 0.00019079911375895675, "loss": 1.3892, "step": 2913 }, { "epoch": 0.1380715470267709, "grad_norm": 0.5703125, "learning_rate": 0.0001907928729135555, "loss": 0.9338, "step": 2914 }, { "epoch": 0.13811892916370527, "grad_norm": 0.361328125, "learning_rate": 0.000190786630054465, "loss": 0.0649, "step": 2915 }, { "epoch": 0.13816631130063967, "grad_norm": 0.58984375, "learning_rate": 0.00019078038518182376, "loss": 0.9949, "step": 2916 }, { "epoch": 0.13821369343757403, "grad_norm": 0.62890625, "learning_rate": 0.0001907741382957703, "loss": 1.1079, "step": 2917 }, { "epoch": 0.13826107557450842, "grad_norm": 0.482421875, "learning_rate": 0.00019076788939644313, "loss": 0.1373, "step": 2918 }, { "epoch": 0.1383084577114428, "grad_norm": 0.49609375, "learning_rate": 0.0001907616384839808, "loss": 0.8246, "step": 2919 }, { "epoch": 0.13835583984837715, "grad_norm": 0.56640625, "learning_rate": 0.00019075538555852207, "loss": 1.201, "step": 2920 }, { "epoch": 0.13840322198531155, "grad_norm": 0.69140625, "learning_rate": 0.00019074913062020553, "loss": 0.1198, "step": 2921 }, { "epoch": 0.1384506041222459, "grad_norm": 0.50390625, "learning_rate": 0.00019074287366916995, "loss": 0.9153, "step": 2922 }, { "epoch": 0.13849798625918028, "grad_norm": 0.6328125, "learning_rate": 0.00019073661470555406, "loss": 1.2473, "step": 2923 }, { "epoch": 0.13854536839611467, "grad_norm": 0.53125, "learning_rate": 0.00019073035372949671, "loss": 0.9549, "step": 2924 }, { "epoch": 0.13859275053304904, "grad_norm": 0.97265625, "learning_rate": 0.00019072409074113677, "loss": 0.7476, "step": 2925 }, { "epoch": 0.13864013266998343, "grad_norm": 0.462890625, "learning_rate": 0.00019071782574061312, "loss": 0.6304, "step": 2926 }, { "epoch": 0.1386875148069178, "grad_norm": 0.57421875, "learning_rate": 0.00019071155872806475, "loss": 1.4026, "step": 2927 }, { "epoch": 0.13873489694385216, "grad_norm": 0.58203125, "learning_rate": 0.0001907052897036306, "loss": 0.1602, "step": 2928 }, { "epoch": 0.13878227908078655, "grad_norm": 0.52734375, "learning_rate": 0.00019069901866744976, "loss": 0.7066, "step": 2929 }, { "epoch": 0.13882966121772092, "grad_norm": 0.478515625, "learning_rate": 0.00019069274561966127, "loss": 0.9224, "step": 2930 }, { "epoch": 0.13887704335465528, "grad_norm": 0.546875, "learning_rate": 0.0001906864705604043, "loss": 1.169, "step": 2931 }, { "epoch": 0.13892442549158968, "grad_norm": 0.64453125, "learning_rate": 0.000190680193489818, "loss": 0.5324, "step": 2932 }, { "epoch": 0.13897180762852404, "grad_norm": 0.46875, "learning_rate": 0.00019067391440804162, "loss": 0.424, "step": 2933 }, { "epoch": 0.13901918976545843, "grad_norm": 0.038330078125, "learning_rate": 0.00019066763331521437, "loss": 0.0024, "step": 2934 }, { "epoch": 0.1390665719023928, "grad_norm": 0.53125, "learning_rate": 0.0001906613502114756, "loss": 0.7701, "step": 2935 }, { "epoch": 0.13911395403932716, "grad_norm": 0.625, "learning_rate": 0.00019065506509696465, "loss": 1.1735, "step": 2936 }, { "epoch": 0.13916133617626156, "grad_norm": 0.62890625, "learning_rate": 0.0001906487779718209, "loss": 1.383, "step": 2937 }, { "epoch": 0.13920871831319592, "grad_norm": 0.4453125, "learning_rate": 0.00019064248883618383, "loss": 0.4393, "step": 2938 }, { "epoch": 0.1392561004501303, "grad_norm": 0.73046875, "learning_rate": 0.0001906361976901929, "loss": 0.4204, "step": 2939 }, { "epoch": 0.13930348258706468, "grad_norm": 0.5625, "learning_rate": 0.00019062990453398766, "loss": 1.2174, "step": 2940 }, { "epoch": 0.13935086472399905, "grad_norm": 0.5234375, "learning_rate": 0.00019062360936770767, "loss": 0.6505, "step": 2941 }, { "epoch": 0.13939824686093344, "grad_norm": 0.796875, "learning_rate": 0.00019061731219149251, "loss": 1.3342, "step": 2942 }, { "epoch": 0.1394456289978678, "grad_norm": 0.625, "learning_rate": 0.00019061101300548195, "loss": 1.5339, "step": 2943 }, { "epoch": 0.13949301113480217, "grad_norm": 0.953125, "learning_rate": 0.00019060471180981558, "loss": 1.3235, "step": 2944 }, { "epoch": 0.13954039327173656, "grad_norm": 0.6796875, "learning_rate": 0.00019059840860463325, "loss": 1.2126, "step": 2945 }, { "epoch": 0.13958777540867093, "grad_norm": 0.61328125, "learning_rate": 0.0001905921033900747, "loss": 1.1159, "step": 2946 }, { "epoch": 0.13963515754560532, "grad_norm": 0.423828125, "learning_rate": 0.0001905857961662798, "loss": 0.1903, "step": 2947 }, { "epoch": 0.13968253968253969, "grad_norm": 0.70703125, "learning_rate": 0.00019057948693338843, "loss": 0.8658, "step": 2948 }, { "epoch": 0.13972992181947405, "grad_norm": 0.58203125, "learning_rate": 0.0001905731756915405, "loss": 0.0776, "step": 2949 }, { "epoch": 0.13977730395640844, "grad_norm": 0.455078125, "learning_rate": 0.00019056686244087602, "loss": 0.8222, "step": 2950 }, { "epoch": 0.1398246860933428, "grad_norm": 0.66015625, "learning_rate": 0.000190560547181535, "loss": 1.0425, "step": 2951 }, { "epoch": 0.13987206823027717, "grad_norm": 0.62109375, "learning_rate": 0.00019055422991365754, "loss": 0.9886, "step": 2952 }, { "epoch": 0.13991945036721157, "grad_norm": 0.48828125, "learning_rate": 0.00019054791063738365, "loss": 0.6058, "step": 2953 }, { "epoch": 0.13996683250414593, "grad_norm": 0.53125, "learning_rate": 0.00019054158935285357, "loss": 0.2055, "step": 2954 }, { "epoch": 0.14001421464108033, "grad_norm": 1.15625, "learning_rate": 0.0001905352660602075, "loss": 1.2486, "step": 2955 }, { "epoch": 0.1400615967780147, "grad_norm": 0.3046875, "learning_rate": 0.00019052894075958566, "loss": 0.1538, "step": 2956 }, { "epoch": 0.14010897891494906, "grad_norm": 0.47265625, "learning_rate": 0.0001905226134511283, "loss": 0.6826, "step": 2957 }, { "epoch": 0.14015636105188345, "grad_norm": 0.640625, "learning_rate": 0.00019051628413497583, "loss": 1.2466, "step": 2958 }, { "epoch": 0.14020374318881781, "grad_norm": 0.4609375, "learning_rate": 0.0001905099528112686, "loss": 0.7137, "step": 2959 }, { "epoch": 0.14025112532575218, "grad_norm": 1.0625, "learning_rate": 0.00019050361948014702, "loss": 0.0606, "step": 2960 }, { "epoch": 0.14029850746268657, "grad_norm": 0.58984375, "learning_rate": 0.00019049728414175153, "loss": 0.8559, "step": 2961 }, { "epoch": 0.14034588959962094, "grad_norm": 0.578125, "learning_rate": 0.00019049094679622267, "loss": 0.6058, "step": 2962 }, { "epoch": 0.14039327173655533, "grad_norm": 0.65234375, "learning_rate": 0.00019048460744370104, "loss": 0.1514, "step": 2963 }, { "epoch": 0.1404406538734897, "grad_norm": 0.50390625, "learning_rate": 0.00019047826608432718, "loss": 1.0782, "step": 2964 }, { "epoch": 0.14048803601042406, "grad_norm": 0.4765625, "learning_rate": 0.00019047192271824174, "loss": 0.1652, "step": 2965 }, { "epoch": 0.14053541814735845, "grad_norm": 0.66015625, "learning_rate": 0.00019046557734558542, "loss": 0.8274, "step": 2966 }, { "epoch": 0.14058280028429282, "grad_norm": 0.5, "learning_rate": 0.00019045922996649897, "loss": 0.9063, "step": 2967 }, { "epoch": 0.14063018242122718, "grad_norm": 0.62109375, "learning_rate": 0.00019045288058112313, "loss": 1.4254, "step": 2968 }, { "epoch": 0.14067756455816158, "grad_norm": 0.435546875, "learning_rate": 0.00019044652918959876, "loss": 0.1689, "step": 2969 }, { "epoch": 0.14072494669509594, "grad_norm": 1.109375, "learning_rate": 0.00019044017579206672, "loss": 0.1385, "step": 2970 }, { "epoch": 0.14077232883203034, "grad_norm": 0.6796875, "learning_rate": 0.00019043382038866789, "loss": 0.9092, "step": 2971 }, { "epoch": 0.1408197109689647, "grad_norm": 0.2490234375, "learning_rate": 0.00019042746297954328, "loss": 0.1566, "step": 2972 }, { "epoch": 0.14086709310589907, "grad_norm": 0.5859375, "learning_rate": 0.00019042110356483382, "loss": 1.2021, "step": 2973 }, { "epoch": 0.14091447524283346, "grad_norm": 0.7265625, "learning_rate": 0.0001904147421446806, "loss": 1.2113, "step": 2974 }, { "epoch": 0.14096185737976782, "grad_norm": 0.6328125, "learning_rate": 0.00019040837871922475, "loss": 1.3793, "step": 2975 }, { "epoch": 0.1410092395167022, "grad_norm": 0.95703125, "learning_rate": 0.00019040201328860733, "loss": 0.2159, "step": 2976 }, { "epoch": 0.14105662165363658, "grad_norm": 0.99609375, "learning_rate": 0.00019039564585296957, "loss": 0.1552, "step": 2977 }, { "epoch": 0.14110400379057095, "grad_norm": 0.640625, "learning_rate": 0.00019038927641245264, "loss": 1.4, "step": 2978 }, { "epoch": 0.14115138592750534, "grad_norm": 0.5390625, "learning_rate": 0.0001903829049671978, "loss": 0.9316, "step": 2979 }, { "epoch": 0.1411987680644397, "grad_norm": 0.6171875, "learning_rate": 0.00019037653151734646, "loss": 0.8791, "step": 2980 }, { "epoch": 0.14124615020137407, "grad_norm": 0.5546875, "learning_rate": 0.00019037015606303991, "loss": 1.2623, "step": 2981 }, { "epoch": 0.14129353233830846, "grad_norm": 0.5390625, "learning_rate": 0.00019036377860441955, "loss": 0.7812, "step": 2982 }, { "epoch": 0.14134091447524283, "grad_norm": 0.92578125, "learning_rate": 0.00019035739914162683, "loss": 0.2049, "step": 2983 }, { "epoch": 0.14138829661217722, "grad_norm": 0.5625, "learning_rate": 0.00019035101767480322, "loss": 1.0255, "step": 2984 }, { "epoch": 0.1414356787491116, "grad_norm": 0.4921875, "learning_rate": 0.0001903446342040903, "loss": 0.916, "step": 2985 }, { "epoch": 0.14148306088604595, "grad_norm": 0.7578125, "learning_rate": 0.00019033824872962963, "loss": 1.4644, "step": 2986 }, { "epoch": 0.14153044302298035, "grad_norm": 0.66015625, "learning_rate": 0.00019033186125156282, "loss": 1.173, "step": 2987 }, { "epoch": 0.1415778251599147, "grad_norm": 0.016845703125, "learning_rate": 0.00019032547177003152, "loss": 0.0009, "step": 2988 }, { "epoch": 0.14162520729684908, "grad_norm": 0.482421875, "learning_rate": 0.0001903190802851775, "loss": 0.8895, "step": 2989 }, { "epoch": 0.14167258943378347, "grad_norm": 0.6171875, "learning_rate": 0.0001903126867971425, "loss": 0.7037, "step": 2990 }, { "epoch": 0.14171997157071783, "grad_norm": 1.328125, "learning_rate": 0.00019030629130606825, "loss": 0.5518, "step": 2991 }, { "epoch": 0.14176735370765223, "grad_norm": 0.369140625, "learning_rate": 0.00019029989381209669, "loss": 0.968, "step": 2992 }, { "epoch": 0.1418147358445866, "grad_norm": 0.59765625, "learning_rate": 0.00019029349431536963, "loss": 0.8648, "step": 2993 }, { "epoch": 0.14186211798152096, "grad_norm": 0.51171875, "learning_rate": 0.0001902870928160291, "loss": 0.2046, "step": 2994 }, { "epoch": 0.14190950011845535, "grad_norm": 0.6953125, "learning_rate": 0.00019028068931421695, "loss": 1.3475, "step": 2995 }, { "epoch": 0.14195688225538972, "grad_norm": 0.478515625, "learning_rate": 0.00019027428381007534, "loss": 0.3664, "step": 2996 }, { "epoch": 0.14200426439232408, "grad_norm": 0.546875, "learning_rate": 0.00019026787630374621, "loss": 1.3609, "step": 2997 }, { "epoch": 0.14205164652925847, "grad_norm": 0.6328125, "learning_rate": 0.00019026146679537175, "loss": 1.3, "step": 2998 }, { "epoch": 0.14209902866619284, "grad_norm": 0.58984375, "learning_rate": 0.00019025505528509414, "loss": 0.9183, "step": 2999 }, { "epoch": 0.14214641080312723, "grad_norm": 0.7109375, "learning_rate": 0.00019024864177305547, "loss": 0.1665, "step": 3000 }, { "epoch": 0.1421937929400616, "grad_norm": 0.69921875, "learning_rate": 0.00019024222625939812, "loss": 0.9524, "step": 3001 }, { "epoch": 0.14224117507699596, "grad_norm": 0.8984375, "learning_rate": 0.0001902358087442643, "loss": 0.9756, "step": 3002 }, { "epoch": 0.14228855721393036, "grad_norm": 1.296875, "learning_rate": 0.00019022938922779633, "loss": 0.3226, "step": 3003 }, { "epoch": 0.14233593935086472, "grad_norm": 0.65625, "learning_rate": 0.00019022296771013664, "loss": 0.9146, "step": 3004 }, { "epoch": 0.14238332148779909, "grad_norm": 0.55078125, "learning_rate": 0.0001902165441914276, "loss": 1.1521, "step": 3005 }, { "epoch": 0.14243070362473348, "grad_norm": 0.53515625, "learning_rate": 0.0001902101186718117, "loss": 0.9258, "step": 3006 }, { "epoch": 0.14247808576166784, "grad_norm": 0.63671875, "learning_rate": 0.0001902036911514315, "loss": 1.0645, "step": 3007 }, { "epoch": 0.14252546789860224, "grad_norm": 0.6875, "learning_rate": 0.0001901972616304295, "loss": 1.4107, "step": 3008 }, { "epoch": 0.1425728500355366, "grad_norm": 0.71875, "learning_rate": 0.0001901908301089483, "loss": 1.3563, "step": 3009 }, { "epoch": 0.14262023217247097, "grad_norm": 0.75, "learning_rate": 0.00019018439658713055, "loss": 0.1836, "step": 3010 }, { "epoch": 0.14266761430940536, "grad_norm": 0.74609375, "learning_rate": 0.00019017796106511893, "loss": 0.0152, "step": 3011 }, { "epoch": 0.14271499644633973, "grad_norm": 0.63671875, "learning_rate": 0.0001901715235430562, "loss": 1.1516, "step": 3012 }, { "epoch": 0.14276237858327412, "grad_norm": 0.46484375, "learning_rate": 0.00019016508402108513, "loss": 1.2557, "step": 3013 }, { "epoch": 0.14280976072020848, "grad_norm": 0.6171875, "learning_rate": 0.00019015864249934854, "loss": 1.0156, "step": 3014 }, { "epoch": 0.14285714285714285, "grad_norm": 0.65234375, "learning_rate": 0.00019015219897798927, "loss": 1.5077, "step": 3015 }, { "epoch": 0.14290452499407724, "grad_norm": 0.53125, "learning_rate": 0.00019014575345715028, "loss": 0.8094, "step": 3016 }, { "epoch": 0.1429519071310116, "grad_norm": 0.59375, "learning_rate": 0.0001901393059369745, "loss": 1.1134, "step": 3017 }, { "epoch": 0.14299928926794597, "grad_norm": 0.37109375, "learning_rate": 0.0001901328564176049, "loss": 0.5888, "step": 3018 }, { "epoch": 0.14304667140488037, "grad_norm": 0.5390625, "learning_rate": 0.00019012640489918456, "loss": 1.2123, "step": 3019 }, { "epoch": 0.14309405354181473, "grad_norm": 0.640625, "learning_rate": 0.00019011995138185656, "loss": 0.0636, "step": 3020 }, { "epoch": 0.14314143567874912, "grad_norm": 0.5234375, "learning_rate": 0.00019011349586576403, "loss": 0.9244, "step": 3021 }, { "epoch": 0.1431888178156835, "grad_norm": 0.66796875, "learning_rate": 0.00019010703835105016, "loss": 1.1969, "step": 3022 }, { "epoch": 0.14323619995261785, "grad_norm": 0.4921875, "learning_rate": 0.0001901005788378581, "loss": 0.0429, "step": 3023 }, { "epoch": 0.14328358208955225, "grad_norm": 0.2451171875, "learning_rate": 0.00019009411732633124, "loss": 0.128, "step": 3024 }, { "epoch": 0.1433309642264866, "grad_norm": 0.58984375, "learning_rate": 0.0001900876538166128, "loss": 1.0117, "step": 3025 }, { "epoch": 0.14337834636342098, "grad_norm": 0.20703125, "learning_rate": 0.00019008118830884615, "loss": 0.1456, "step": 3026 }, { "epoch": 0.14342572850035537, "grad_norm": 0.48828125, "learning_rate": 0.0001900747208031747, "loss": 1.0432, "step": 3027 }, { "epoch": 0.14347311063728974, "grad_norm": 0.54296875, "learning_rate": 0.00019006825129974186, "loss": 0.7482, "step": 3028 }, { "epoch": 0.14352049277422413, "grad_norm": 0.61328125, "learning_rate": 0.00019006177979869116, "loss": 1.1897, "step": 3029 }, { "epoch": 0.1435678749111585, "grad_norm": 0.6328125, "learning_rate": 0.00019005530630016612, "loss": 0.0816, "step": 3030 }, { "epoch": 0.14361525704809286, "grad_norm": 0.703125, "learning_rate": 0.00019004883080431034, "loss": 1.1493, "step": 3031 }, { "epoch": 0.14366263918502725, "grad_norm": 0.5234375, "learning_rate": 0.00019004235331126736, "loss": 0.0867, "step": 3032 }, { "epoch": 0.14371002132196162, "grad_norm": 0.5625, "learning_rate": 0.00019003587382118088, "loss": 1.1148, "step": 3033 }, { "epoch": 0.14375740345889598, "grad_norm": 0.0164794921875, "learning_rate": 0.00019002939233419465, "loss": 0.0006, "step": 3034 }, { "epoch": 0.14380478559583038, "grad_norm": 0.61328125, "learning_rate": 0.00019002290885045242, "loss": 0.1768, "step": 3035 }, { "epoch": 0.14385216773276474, "grad_norm": 0.62890625, "learning_rate": 0.00019001642337009792, "loss": 0.8938, "step": 3036 }, { "epoch": 0.14389954986969913, "grad_norm": 0.61328125, "learning_rate": 0.00019000993589327503, "loss": 0.9925, "step": 3037 }, { "epoch": 0.1439469320066335, "grad_norm": 0.5390625, "learning_rate": 0.00019000344642012765, "loss": 0.9377, "step": 3038 }, { "epoch": 0.14399431414356786, "grad_norm": 0.72265625, "learning_rate": 0.0001899969549507997, "loss": 1.0997, "step": 3039 }, { "epoch": 0.14404169628050226, "grad_norm": 0.5703125, "learning_rate": 0.00018999046148543514, "loss": 0.2018, "step": 3040 }, { "epoch": 0.14408907841743662, "grad_norm": 0.5078125, "learning_rate": 0.000189983966024178, "loss": 0.7018, "step": 3041 }, { "epoch": 0.14413646055437102, "grad_norm": 0.220703125, "learning_rate": 0.00018997746856717234, "loss": 0.0291, "step": 3042 }, { "epoch": 0.14418384269130538, "grad_norm": 0.78125, "learning_rate": 0.00018997096911456228, "loss": 0.0811, "step": 3043 }, { "epoch": 0.14423122482823975, "grad_norm": 0.419921875, "learning_rate": 0.00018996446766649192, "loss": 0.0217, "step": 3044 }, { "epoch": 0.14427860696517414, "grad_norm": 0.66015625, "learning_rate": 0.00018995796422310553, "loss": 0.9543, "step": 3045 }, { "epoch": 0.1443259891021085, "grad_norm": 0.50390625, "learning_rate": 0.0001899514587845473, "loss": 0.0619, "step": 3046 }, { "epoch": 0.14437337123904287, "grad_norm": 0.2392578125, "learning_rate": 0.00018994495135096152, "loss": 0.1531, "step": 3047 }, { "epoch": 0.14442075337597726, "grad_norm": 0.2158203125, "learning_rate": 0.00018993844192249256, "loss": 0.0108, "step": 3048 }, { "epoch": 0.14446813551291163, "grad_norm": 0.89453125, "learning_rate": 0.0001899319304992847, "loss": 1.1234, "step": 3049 }, { "epoch": 0.14451551764984602, "grad_norm": 0.6015625, "learning_rate": 0.00018992541708148246, "loss": 1.077, "step": 3050 }, { "epoch": 0.14456289978678039, "grad_norm": 0.53515625, "learning_rate": 0.0001899189016692302, "loss": 0.5022, "step": 3051 }, { "epoch": 0.14461028192371475, "grad_norm": 0.5859375, "learning_rate": 0.00018991238426267252, "loss": 0.6068, "step": 3052 }, { "epoch": 0.14465766406064914, "grad_norm": 0.61328125, "learning_rate": 0.00018990586486195392, "loss": 1.5448, "step": 3053 }, { "epoch": 0.1447050461975835, "grad_norm": 0.404296875, "learning_rate": 0.00018989934346721901, "loss": 0.582, "step": 3054 }, { "epoch": 0.14475242833451787, "grad_norm": 0.490234375, "learning_rate": 0.0001898928200786124, "loss": 0.877, "step": 3055 }, { "epoch": 0.14479981047145227, "grad_norm": 0.9765625, "learning_rate": 0.0001898862946962788, "loss": 1.1089, "step": 3056 }, { "epoch": 0.14484719260838663, "grad_norm": 0.5703125, "learning_rate": 0.00018987976732036293, "loss": 0.8184, "step": 3057 }, { "epoch": 0.14489457474532103, "grad_norm": 0.462890625, "learning_rate": 0.00018987323795100955, "loss": 0.9716, "step": 3058 }, { "epoch": 0.1449419568822554, "grad_norm": 0.5546875, "learning_rate": 0.00018986670658836346, "loss": 0.8909, "step": 3059 }, { "epoch": 0.14498933901918976, "grad_norm": 0.64453125, "learning_rate": 0.0001898601732325696, "loss": 1.1241, "step": 3060 }, { "epoch": 0.14503672115612415, "grad_norm": 0.6328125, "learning_rate": 0.00018985363788377274, "loss": 0.8768, "step": 3061 }, { "epoch": 0.14508410329305851, "grad_norm": 0.045654296875, "learning_rate": 0.00018984710054211794, "loss": 0.0018, "step": 3062 }, { "epoch": 0.14513148542999288, "grad_norm": 0.55078125, "learning_rate": 0.00018984056120775015, "loss": 1.1582, "step": 3063 }, { "epoch": 0.14517886756692727, "grad_norm": 0.7109375, "learning_rate": 0.0001898340198808144, "loss": 1.1511, "step": 3064 }, { "epoch": 0.14522624970386164, "grad_norm": 0.75390625, "learning_rate": 0.0001898274765614558, "loss": 0.1091, "step": 3065 }, { "epoch": 0.14527363184079603, "grad_norm": 0.1416015625, "learning_rate": 0.00018982093124981941, "loss": 0.0181, "step": 3066 }, { "epoch": 0.1453210139777304, "grad_norm": 0.73828125, "learning_rate": 0.00018981438394605044, "loss": 1.2479, "step": 3067 }, { "epoch": 0.14536839611466476, "grad_norm": 0.494140625, "learning_rate": 0.00018980783465029412, "loss": 0.6365, "step": 3068 }, { "epoch": 0.14541577825159915, "grad_norm": 0.71484375, "learning_rate": 0.0001898012833626957, "loss": 1.0946, "step": 3069 }, { "epoch": 0.14546316038853352, "grad_norm": 0.66796875, "learning_rate": 0.00018979473008340044, "loss": 1.1289, "step": 3070 }, { "epoch": 0.1455105425254679, "grad_norm": 0.251953125, "learning_rate": 0.0001897881748125537, "loss": 0.1688, "step": 3071 }, { "epoch": 0.14555792466240228, "grad_norm": 0.578125, "learning_rate": 0.00018978161755030094, "loss": 1.2244, "step": 3072 }, { "epoch": 0.14560530679933664, "grad_norm": 0.72265625, "learning_rate": 0.00018977505829678747, "loss": 1.4977, "step": 3073 }, { "epoch": 0.14565268893627104, "grad_norm": 0.7109375, "learning_rate": 0.00018976849705215883, "loss": 1.0672, "step": 3074 }, { "epoch": 0.1457000710732054, "grad_norm": 0.65234375, "learning_rate": 0.0001897619338165606, "loss": 0.1644, "step": 3075 }, { "epoch": 0.14574745321013977, "grad_norm": 0.7109375, "learning_rate": 0.00018975536859013826, "loss": 1.2112, "step": 3076 }, { "epoch": 0.14579483534707416, "grad_norm": 0.734375, "learning_rate": 0.00018974880137303743, "loss": 1.2029, "step": 3077 }, { "epoch": 0.14584221748400852, "grad_norm": 0.57421875, "learning_rate": 0.00018974223216540378, "loss": 0.9227, "step": 3078 }, { "epoch": 0.14588959962094292, "grad_norm": 0.6328125, "learning_rate": 0.00018973566096738305, "loss": 1.0532, "step": 3079 }, { "epoch": 0.14593698175787728, "grad_norm": 0.99609375, "learning_rate": 0.0001897290877791209, "loss": 0.6377, "step": 3080 }, { "epoch": 0.14598436389481165, "grad_norm": 0.6328125, "learning_rate": 0.00018972251260076316, "loss": 1.3745, "step": 3081 }, { "epoch": 0.14603174603174604, "grad_norm": 0.546875, "learning_rate": 0.0001897159354324557, "loss": 0.8157, "step": 3082 }, { "epoch": 0.1460791281686804, "grad_norm": 0.01007080078125, "learning_rate": 0.00018970935627434432, "loss": 0.0007, "step": 3083 }, { "epoch": 0.14612651030561477, "grad_norm": 0.640625, "learning_rate": 0.00018970277512657497, "loss": 0.8798, "step": 3084 }, { "epoch": 0.14617389244254916, "grad_norm": 1.15625, "learning_rate": 0.00018969619198929363, "loss": 0.2542, "step": 3085 }, { "epoch": 0.14622127457948353, "grad_norm": 0.76953125, "learning_rate": 0.00018968960686264628, "loss": 1.2204, "step": 3086 }, { "epoch": 0.14626865671641792, "grad_norm": 0.6328125, "learning_rate": 0.00018968301974677898, "loss": 1.1266, "step": 3087 }, { "epoch": 0.1463160388533523, "grad_norm": 0.56640625, "learning_rate": 0.00018967643064183784, "loss": 0.9517, "step": 3088 }, { "epoch": 0.14636342099028665, "grad_norm": 0.60546875, "learning_rate": 0.00018966983954796896, "loss": 0.1777, "step": 3089 }, { "epoch": 0.14641080312722105, "grad_norm": 0.2021484375, "learning_rate": 0.0001896632464653186, "loss": 0.0187, "step": 3090 }, { "epoch": 0.1464581852641554, "grad_norm": 0.453125, "learning_rate": 0.0001896566513940329, "loss": 1.1612, "step": 3091 }, { "epoch": 0.14650556740108978, "grad_norm": 0.6484375, "learning_rate": 0.0001896500543342582, "loss": 1.0695, "step": 3092 }, { "epoch": 0.14655294953802417, "grad_norm": 0.62890625, "learning_rate": 0.00018964345528614077, "loss": 1.1036, "step": 3093 }, { "epoch": 0.14660033167495853, "grad_norm": 0.62890625, "learning_rate": 0.000189636854249827, "loss": 1.5296, "step": 3094 }, { "epoch": 0.14664771381189293, "grad_norm": 1.1953125, "learning_rate": 0.00018963025122546324, "loss": 0.0533, "step": 3095 }, { "epoch": 0.1466950959488273, "grad_norm": 0.69140625, "learning_rate": 0.000189623646213196, "loss": 1.1633, "step": 3096 }, { "epoch": 0.14674247808576166, "grad_norm": 0.279296875, "learning_rate": 0.00018961703921317174, "loss": 0.1973, "step": 3097 }, { "epoch": 0.14678986022269605, "grad_norm": 0.62109375, "learning_rate": 0.00018961043022553705, "loss": 1.3722, "step": 3098 }, { "epoch": 0.14683724235963042, "grad_norm": 0.2265625, "learning_rate": 0.00018960381925043842, "loss": 0.1504, "step": 3099 }, { "epoch": 0.1468846244965648, "grad_norm": 0.52734375, "learning_rate": 0.00018959720628802253, "loss": 0.2417, "step": 3100 }, { "epoch": 0.14693200663349917, "grad_norm": 0.5703125, "learning_rate": 0.00018959059133843607, "loss": 0.8952, "step": 3101 }, { "epoch": 0.14697938877043354, "grad_norm": 0.703125, "learning_rate": 0.00018958397440182567, "loss": 1.1358, "step": 3102 }, { "epoch": 0.14702677090736793, "grad_norm": 0.53125, "learning_rate": 0.00018957735547833816, "loss": 1.035, "step": 3103 }, { "epoch": 0.1470741530443023, "grad_norm": 0.8671875, "learning_rate": 0.00018957073456812032, "loss": 0.1452, "step": 3104 }, { "epoch": 0.14712153518123666, "grad_norm": 0.4609375, "learning_rate": 0.000189564111671319, "loss": 0.9038, "step": 3105 }, { "epoch": 0.14716891731817106, "grad_norm": 0.498046875, "learning_rate": 0.00018955748678808106, "loss": 0.7211, "step": 3106 }, { "epoch": 0.14721629945510542, "grad_norm": 0.69921875, "learning_rate": 0.0001895508599185535, "loss": 0.0434, "step": 3107 }, { "epoch": 0.1472636815920398, "grad_norm": 0.6328125, "learning_rate": 0.00018954423106288322, "loss": 0.996, "step": 3108 }, { "epoch": 0.14731106372897418, "grad_norm": 0.59375, "learning_rate": 0.0001895376002212173, "loss": 1.033, "step": 3109 }, { "epoch": 0.14735844586590854, "grad_norm": 0.65625, "learning_rate": 0.00018953096739370272, "loss": 1.0574, "step": 3110 }, { "epoch": 0.14740582800284294, "grad_norm": 0.53515625, "learning_rate": 0.0001895243325804867, "loss": 1.3177, "step": 3111 }, { "epoch": 0.1474532101397773, "grad_norm": 0.46484375, "learning_rate": 0.0001895176957817163, "loss": 0.1774, "step": 3112 }, { "epoch": 0.14750059227671167, "grad_norm": 0.47265625, "learning_rate": 0.0001895110569975388, "loss": 0.6744, "step": 3113 }, { "epoch": 0.14754797441364606, "grad_norm": 0.44140625, "learning_rate": 0.0001895044162281014, "loss": 0.9408, "step": 3114 }, { "epoch": 0.14759535655058043, "grad_norm": 0.75, "learning_rate": 0.00018949777347355138, "loss": 0.8715, "step": 3115 }, { "epoch": 0.14764273868751482, "grad_norm": 1.2265625, "learning_rate": 0.00018949112873403604, "loss": 0.6799, "step": 3116 }, { "epoch": 0.14769012082444918, "grad_norm": 0.6640625, "learning_rate": 0.0001894844820097028, "loss": 0.7846, "step": 3117 }, { "epoch": 0.14773750296138355, "grad_norm": 0.5390625, "learning_rate": 0.00018947783330069908, "loss": 0.7918, "step": 3118 }, { "epoch": 0.14778488509831794, "grad_norm": 0.58984375, "learning_rate": 0.0001894711826071723, "loss": 1.1212, "step": 3119 }, { "epoch": 0.1478322672352523, "grad_norm": 1.421875, "learning_rate": 0.00018946452992927, "loss": 1.0106, "step": 3120 }, { "epoch": 0.14787964937218667, "grad_norm": 0.81640625, "learning_rate": 0.00018945787526713974, "loss": 0.8408, "step": 3121 }, { "epoch": 0.14792703150912107, "grad_norm": 0.228515625, "learning_rate": 0.00018945121862092907, "loss": 0.1438, "step": 3122 }, { "epoch": 0.14797441364605543, "grad_norm": 0.6875, "learning_rate": 0.00018944455999078566, "loss": 1.0483, "step": 3123 }, { "epoch": 0.14802179578298982, "grad_norm": 0.76953125, "learning_rate": 0.00018943789937685718, "loss": 0.7446, "step": 3124 }, { "epoch": 0.1480691779199242, "grad_norm": 0.625, "learning_rate": 0.00018943123677929135, "loss": 1.4493, "step": 3125 }, { "epoch": 0.14811656005685855, "grad_norm": 0.62890625, "learning_rate": 0.00018942457219823594, "loss": 1.049, "step": 3126 }, { "epoch": 0.14816394219379295, "grad_norm": 0.48828125, "learning_rate": 0.0001894179056338388, "loss": 1.1085, "step": 3127 }, { "epoch": 0.1482113243307273, "grad_norm": 0.337890625, "learning_rate": 0.00018941123708624772, "loss": 0.0172, "step": 3128 }, { "epoch": 0.1482587064676617, "grad_norm": 0.60546875, "learning_rate": 0.00018940456655561064, "loss": 0.0563, "step": 3129 }, { "epoch": 0.14830608860459607, "grad_norm": 0.94921875, "learning_rate": 0.00018939789404207556, "loss": 0.3327, "step": 3130 }, { "epoch": 0.14835347074153044, "grad_norm": 0.458984375, "learning_rate": 0.00018939121954579036, "loss": 0.8707, "step": 3131 }, { "epoch": 0.14840085287846483, "grad_norm": 1.0703125, "learning_rate": 0.00018938454306690315, "loss": 0.489, "step": 3132 }, { "epoch": 0.1484482350153992, "grad_norm": 0.421875, "learning_rate": 0.00018937786460556196, "loss": 0.1558, "step": 3133 }, { "epoch": 0.14849561715233356, "grad_norm": 0.578125, "learning_rate": 0.00018937118416191494, "loss": 1.3409, "step": 3134 }, { "epoch": 0.14854299928926795, "grad_norm": 0.287109375, "learning_rate": 0.00018936450173611026, "loss": 0.0315, "step": 3135 }, { "epoch": 0.14859038142620232, "grad_norm": 0.203125, "learning_rate": 0.0001893578173282961, "loss": 0.1632, "step": 3136 }, { "epoch": 0.1486377635631367, "grad_norm": 0.357421875, "learning_rate": 0.00018935113093862074, "loss": 0.031, "step": 3137 }, { "epoch": 0.14868514570007108, "grad_norm": 0.6484375, "learning_rate": 0.00018934444256723246, "loss": 0.6992, "step": 3138 }, { "epoch": 0.14873252783700544, "grad_norm": 0.419921875, "learning_rate": 0.00018933775221427964, "loss": 0.453, "step": 3139 }, { "epoch": 0.14877990997393983, "grad_norm": 0.5078125, "learning_rate": 0.0001893310598799106, "loss": 0.9826, "step": 3140 }, { "epoch": 0.1488272921108742, "grad_norm": 0.6484375, "learning_rate": 0.00018932436556427383, "loss": 0.1852, "step": 3141 }, { "epoch": 0.14887467424780856, "grad_norm": 0.578125, "learning_rate": 0.00018931766926751778, "loss": 1.0186, "step": 3142 }, { "epoch": 0.14892205638474296, "grad_norm": 0.58984375, "learning_rate": 0.00018931097098979095, "loss": 0.8663, "step": 3143 }, { "epoch": 0.14896943852167732, "grad_norm": 0.52734375, "learning_rate": 0.00018930427073124187, "loss": 1.1418, "step": 3144 }, { "epoch": 0.14901682065861172, "grad_norm": 0.423828125, "learning_rate": 0.00018929756849201925, "loss": 0.0588, "step": 3145 }, { "epoch": 0.14906420279554608, "grad_norm": 0.412109375, "learning_rate": 0.00018929086427227164, "loss": 0.5915, "step": 3146 }, { "epoch": 0.14911158493248045, "grad_norm": 0.66015625, "learning_rate": 0.00018928415807214778, "loss": 0.9467, "step": 3147 }, { "epoch": 0.14915896706941484, "grad_norm": 0.73828125, "learning_rate": 0.0001892774498917964, "loss": 0.8641, "step": 3148 }, { "epoch": 0.1492063492063492, "grad_norm": 0.236328125, "learning_rate": 0.00018927073973136626, "loss": 0.1645, "step": 3149 }, { "epoch": 0.14925373134328357, "grad_norm": 0.5859375, "learning_rate": 0.00018926402759100622, "loss": 1.561, "step": 3150 }, { "epoch": 0.14930111348021796, "grad_norm": 0.1494140625, "learning_rate": 0.00018925731347086512, "loss": 0.0236, "step": 3151 }, { "epoch": 0.14934849561715233, "grad_norm": 0.51171875, "learning_rate": 0.00018925059737109188, "loss": 0.66, "step": 3152 }, { "epoch": 0.14939587775408672, "grad_norm": 0.57421875, "learning_rate": 0.00018924387929183546, "loss": 1.2031, "step": 3153 }, { "epoch": 0.14944325989102109, "grad_norm": 0.5859375, "learning_rate": 0.00018923715923324484, "loss": 0.9212, "step": 3154 }, { "epoch": 0.14949064202795545, "grad_norm": 0.96484375, "learning_rate": 0.0001892304371954691, "loss": 0.5071, "step": 3155 }, { "epoch": 0.14953802416488984, "grad_norm": 0.65625, "learning_rate": 0.0001892237131786573, "loss": 1.53, "step": 3156 }, { "epoch": 0.1495854063018242, "grad_norm": 0.08544921875, "learning_rate": 0.00018921698718295856, "loss": 0.003, "step": 3157 }, { "epoch": 0.1496327884387586, "grad_norm": 0.67578125, "learning_rate": 0.0001892102592085221, "loss": 0.8592, "step": 3158 }, { "epoch": 0.14968017057569297, "grad_norm": 0.6171875, "learning_rate": 0.00018920352925549708, "loss": 1.1312, "step": 3159 }, { "epoch": 0.14972755271262733, "grad_norm": 0.57421875, "learning_rate": 0.00018919679732403284, "loss": 0.9752, "step": 3160 }, { "epoch": 0.14977493484956172, "grad_norm": 0.66796875, "learning_rate": 0.0001891900634142786, "loss": 0.942, "step": 3161 }, { "epoch": 0.1498223169864961, "grad_norm": 0.65625, "learning_rate": 0.0001891833275263838, "loss": 1.1579, "step": 3162 }, { "epoch": 0.14986969912343046, "grad_norm": 0.345703125, "learning_rate": 0.00018917658966049778, "loss": 0.173, "step": 3163 }, { "epoch": 0.14991708126036485, "grad_norm": 0.7421875, "learning_rate": 0.00018916984981676995, "loss": 1.3253, "step": 3164 }, { "epoch": 0.1499644633972992, "grad_norm": 0.1416015625, "learning_rate": 0.00018916310799534986, "loss": 0.0086, "step": 3165 }, { "epoch": 0.1500118455342336, "grad_norm": 0.51953125, "learning_rate": 0.00018915636419638703, "loss": 1.1239, "step": 3166 }, { "epoch": 0.15005922767116797, "grad_norm": 0.59375, "learning_rate": 0.00018914961842003095, "loss": 1.4062, "step": 3167 }, { "epoch": 0.15010660980810234, "grad_norm": 1.515625, "learning_rate": 0.00018914287066643134, "loss": 0.9106, "step": 3168 }, { "epoch": 0.15015399194503673, "grad_norm": 0.5390625, "learning_rate": 0.00018913612093573778, "loss": 0.9539, "step": 3169 }, { "epoch": 0.1502013740819711, "grad_norm": 0.216796875, "learning_rate": 0.0001891293692281, "loss": 0.1564, "step": 3170 }, { "epoch": 0.15024875621890546, "grad_norm": 0.36328125, "learning_rate": 0.00018912261554366778, "loss": 0.1612, "step": 3171 }, { "epoch": 0.15029613835583985, "grad_norm": 0.66015625, "learning_rate": 0.00018911585988259084, "loss": 0.8047, "step": 3172 }, { "epoch": 0.15034352049277422, "grad_norm": 0.67578125, "learning_rate": 0.00018910910224501906, "loss": 1.1706, "step": 3173 }, { "epoch": 0.1503909026297086, "grad_norm": 0.66796875, "learning_rate": 0.0001891023426311023, "loss": 0.8821, "step": 3174 }, { "epoch": 0.15043828476664298, "grad_norm": 0.984375, "learning_rate": 0.0001890955810409905, "loss": 0.4706, "step": 3175 }, { "epoch": 0.15048566690357734, "grad_norm": 0.54296875, "learning_rate": 0.00018908881747483364, "loss": 0.8414, "step": 3176 }, { "epoch": 0.15053304904051173, "grad_norm": 0.5234375, "learning_rate": 0.00018908205193278165, "loss": 0.9074, "step": 3177 }, { "epoch": 0.1505804311774461, "grad_norm": 0.5234375, "learning_rate": 0.00018907528441498465, "loss": 0.1279, "step": 3178 }, { "epoch": 0.15062781331438047, "grad_norm": 0.57421875, "learning_rate": 0.00018906851492159275, "loss": 0.0764, "step": 3179 }, { "epoch": 0.15067519545131486, "grad_norm": 0.54296875, "learning_rate": 0.00018906174345275603, "loss": 0.9166, "step": 3180 }, { "epoch": 0.15072257758824922, "grad_norm": 0.7578125, "learning_rate": 0.00018905497000862474, "loss": 0.9086, "step": 3181 }, { "epoch": 0.15076995972518362, "grad_norm": 0.546875, "learning_rate": 0.000189048194589349, "loss": 0.5789, "step": 3182 }, { "epoch": 0.15081734186211798, "grad_norm": 0.69140625, "learning_rate": 0.00018904141719507922, "loss": 0.7374, "step": 3183 }, { "epoch": 0.15086472399905235, "grad_norm": 0.7578125, "learning_rate": 0.00018903463782596563, "loss": 0.9864, "step": 3184 }, { "epoch": 0.15091210613598674, "grad_norm": 0.58203125, "learning_rate": 0.00018902785648215858, "loss": 0.9219, "step": 3185 }, { "epoch": 0.1509594882729211, "grad_norm": 0.10302734375, "learning_rate": 0.00018902107316380855, "loss": 0.0079, "step": 3186 }, { "epoch": 0.1510068704098555, "grad_norm": 0.71484375, "learning_rate": 0.00018901428787106592, "loss": 0.0924, "step": 3187 }, { "epoch": 0.15105425254678986, "grad_norm": 0.6640625, "learning_rate": 0.00018900750060408118, "loss": 0.8376, "step": 3188 }, { "epoch": 0.15110163468372423, "grad_norm": 0.6484375, "learning_rate": 0.00018900071136300492, "loss": 1.1438, "step": 3189 }, { "epoch": 0.15114901682065862, "grad_norm": 0.171875, "learning_rate": 0.00018899392014798766, "loss": 0.009, "step": 3190 }, { "epoch": 0.151196398957593, "grad_norm": 0.27734375, "learning_rate": 0.00018898712695918004, "loss": 0.1356, "step": 3191 }, { "epoch": 0.15124378109452735, "grad_norm": 0.63671875, "learning_rate": 0.00018898033179673274, "loss": 1.1841, "step": 3192 }, { "epoch": 0.15129116323146174, "grad_norm": 0.58984375, "learning_rate": 0.00018897353466079645, "loss": 1.0475, "step": 3193 }, { "epoch": 0.1513385453683961, "grad_norm": 0.80078125, "learning_rate": 0.00018896673555152195, "loss": 0.2508, "step": 3194 }, { "epoch": 0.1513859275053305, "grad_norm": 0.703125, "learning_rate": 0.00018895993446905998, "loss": 0.8208, "step": 3195 }, { "epoch": 0.15143330964226487, "grad_norm": 0.333984375, "learning_rate": 0.00018895313141356143, "loss": 0.0463, "step": 3196 }, { "epoch": 0.15148069177919923, "grad_norm": 0.69140625, "learning_rate": 0.00018894632638517716, "loss": 0.0668, "step": 3197 }, { "epoch": 0.15152807391613363, "grad_norm": 0.53515625, "learning_rate": 0.00018893951938405817, "loss": 0.8999, "step": 3198 }, { "epoch": 0.151575456053068, "grad_norm": 0.65625, "learning_rate": 0.00018893271041035533, "loss": 1.1003, "step": 3199 }, { "epoch": 0.15162283819000236, "grad_norm": 0.6015625, "learning_rate": 0.0001889258994642197, "loss": 1.0037, "step": 3200 }, { "epoch": 0.15167022032693675, "grad_norm": 0.6484375, "learning_rate": 0.00018891908654580238, "loss": 0.8504, "step": 3201 }, { "epoch": 0.15171760246387112, "grad_norm": 0.46484375, "learning_rate": 0.00018891227165525437, "loss": 0.1001, "step": 3202 }, { "epoch": 0.1517649846008055, "grad_norm": 0.66796875, "learning_rate": 0.00018890545479272692, "loss": 0.9516, "step": 3203 }, { "epoch": 0.15181236673773987, "grad_norm": 0.5703125, "learning_rate": 0.00018889863595837118, "loss": 1.1313, "step": 3204 }, { "epoch": 0.15185974887467424, "grad_norm": 0.76953125, "learning_rate": 0.00018889181515233835, "loss": 1.1998, "step": 3205 }, { "epoch": 0.15190713101160863, "grad_norm": 0.578125, "learning_rate": 0.0001888849923747798, "loss": 0.2061, "step": 3206 }, { "epoch": 0.151954513148543, "grad_norm": 0.5546875, "learning_rate": 0.00018887816762584676, "loss": 0.8141, "step": 3207 }, { "epoch": 0.15200189528547736, "grad_norm": 0.65234375, "learning_rate": 0.00018887134090569063, "loss": 1.5754, "step": 3208 }, { "epoch": 0.15204927742241175, "grad_norm": 0.6640625, "learning_rate": 0.00018886451221446283, "loss": 1.2315, "step": 3209 }, { "epoch": 0.15209665955934612, "grad_norm": 0.61328125, "learning_rate": 0.00018885768155231481, "loss": 1.3574, "step": 3210 }, { "epoch": 0.1521440416962805, "grad_norm": 0.62109375, "learning_rate": 0.00018885084891939803, "loss": 1.0823, "step": 3211 }, { "epoch": 0.15219142383321488, "grad_norm": 0.62890625, "learning_rate": 0.00018884401431586408, "loss": 0.7045, "step": 3212 }, { "epoch": 0.15223880597014924, "grad_norm": 0.609375, "learning_rate": 0.00018883717774186454, "loss": 0.9636, "step": 3213 }, { "epoch": 0.15228618810708364, "grad_norm": 0.248046875, "learning_rate": 0.000188830339197551, "loss": 0.1879, "step": 3214 }, { "epoch": 0.152333570244018, "grad_norm": 0.83203125, "learning_rate": 0.00018882349868307516, "loss": 0.955, "step": 3215 }, { "epoch": 0.1523809523809524, "grad_norm": 0.365234375, "learning_rate": 0.00018881665619858873, "loss": 0.2655, "step": 3216 }, { "epoch": 0.15242833451788676, "grad_norm": 0.45703125, "learning_rate": 0.00018880981174424348, "loss": 0.8976, "step": 3217 }, { "epoch": 0.15247571665482113, "grad_norm": 1.109375, "learning_rate": 0.0001888029653201912, "loss": 0.0646, "step": 3218 }, { "epoch": 0.15252309879175552, "grad_norm": 0.88671875, "learning_rate": 0.00018879611692658373, "loss": 0.2538, "step": 3219 }, { "epoch": 0.15257048092868988, "grad_norm": 0.578125, "learning_rate": 0.00018878926656357297, "loss": 1.1655, "step": 3220 }, { "epoch": 0.15261786306562425, "grad_norm": 0.6640625, "learning_rate": 0.00018878241423131084, "loss": 1.2484, "step": 3221 }, { "epoch": 0.15266524520255864, "grad_norm": 0.5390625, "learning_rate": 0.00018877555992994935, "loss": 0.7451, "step": 3222 }, { "epoch": 0.152712627339493, "grad_norm": 0.83984375, "learning_rate": 0.00018876870365964048, "loss": 0.2073, "step": 3223 }, { "epoch": 0.1527600094764274, "grad_norm": 0.6171875, "learning_rate": 0.00018876184542053633, "loss": 1.1962, "step": 3224 }, { "epoch": 0.15280739161336176, "grad_norm": 0.59375, "learning_rate": 0.000188754985212789, "loss": 1.0728, "step": 3225 }, { "epoch": 0.15285477375029613, "grad_norm": 0.40625, "learning_rate": 0.0001887481230365506, "loss": 0.671, "step": 3226 }, { "epoch": 0.15290215588723052, "grad_norm": 0.4453125, "learning_rate": 0.00018874125889197337, "loss": 1.2873, "step": 3227 }, { "epoch": 0.1529495380241649, "grad_norm": 0.8046875, "learning_rate": 0.00018873439277920957, "loss": 0.7527, "step": 3228 }, { "epoch": 0.15299692016109925, "grad_norm": 0.8125, "learning_rate": 0.00018872752469841145, "loss": 0.3701, "step": 3229 }, { "epoch": 0.15304430229803365, "grad_norm": 0.447265625, "learning_rate": 0.0001887206546497313, "loss": 1.3629, "step": 3230 }, { "epoch": 0.153091684434968, "grad_norm": 0.5078125, "learning_rate": 0.0001887137826333216, "loss": 0.8888, "step": 3231 }, { "epoch": 0.1531390665719024, "grad_norm": 0.61328125, "learning_rate": 0.00018870690864933467, "loss": 1.2509, "step": 3232 }, { "epoch": 0.15318644870883677, "grad_norm": 0.453125, "learning_rate": 0.00018870003269792296, "loss": 0.5475, "step": 3233 }, { "epoch": 0.15323383084577114, "grad_norm": 0.59375, "learning_rate": 0.00018869315477923904, "loss": 1.0475, "step": 3234 }, { "epoch": 0.15328121298270553, "grad_norm": 0.3125, "learning_rate": 0.00018868627489343538, "loss": 0.1699, "step": 3235 }, { "epoch": 0.1533285951196399, "grad_norm": 0.072265625, "learning_rate": 0.00018867939304066463, "loss": 0.0069, "step": 3236 }, { "epoch": 0.15337597725657426, "grad_norm": 0.56640625, "learning_rate": 0.0001886725092210794, "loss": 0.9158, "step": 3237 }, { "epoch": 0.15342335939350865, "grad_norm": 1.5234375, "learning_rate": 0.00018866562343483237, "loss": 0.7676, "step": 3238 }, { "epoch": 0.15347074153044302, "grad_norm": 0.484375, "learning_rate": 0.00018865873568207623, "loss": 0.9575, "step": 3239 }, { "epoch": 0.1535181236673774, "grad_norm": 0.3828125, "learning_rate": 0.0001886518459629638, "loss": 0.2172, "step": 3240 }, { "epoch": 0.15356550580431177, "grad_norm": 0.71484375, "learning_rate": 0.00018864495427764783, "loss": 0.9754, "step": 3241 }, { "epoch": 0.15361288794124614, "grad_norm": 0.625, "learning_rate": 0.0001886380606262812, "loss": 1.0084, "step": 3242 }, { "epoch": 0.15366027007818053, "grad_norm": 0.59375, "learning_rate": 0.00018863116500901682, "loss": 1.1972, "step": 3243 }, { "epoch": 0.1537076522151149, "grad_norm": 0.578125, "learning_rate": 0.00018862426742600756, "loss": 0.046, "step": 3244 }, { "epoch": 0.1537550343520493, "grad_norm": 0.384765625, "learning_rate": 0.0001886173678774065, "loss": 0.0177, "step": 3245 }, { "epoch": 0.15380241648898366, "grad_norm": 0.66015625, "learning_rate": 0.00018861046636336657, "loss": 1.08, "step": 3246 }, { "epoch": 0.15384979862591802, "grad_norm": 0.6796875, "learning_rate": 0.0001886035628840409, "loss": 0.9977, "step": 3247 }, { "epoch": 0.15389718076285241, "grad_norm": 0.48828125, "learning_rate": 0.0001885966574395826, "loss": 0.55, "step": 3248 }, { "epoch": 0.15394456289978678, "grad_norm": 0.6640625, "learning_rate": 0.00018858975003014476, "loss": 1.112, "step": 3249 }, { "epoch": 0.15399194503672115, "grad_norm": 0.5625, "learning_rate": 0.00018858284065588065, "loss": 1.4552, "step": 3250 }, { "epoch": 0.15403932717365554, "grad_norm": 0.6796875, "learning_rate": 0.00018857592931694348, "loss": 1.1829, "step": 3251 }, { "epoch": 0.1540867093105899, "grad_norm": 0.78125, "learning_rate": 0.00018856901601348657, "loss": 0.1491, "step": 3252 }, { "epoch": 0.1541340914475243, "grad_norm": 0.5390625, "learning_rate": 0.0001885621007456632, "loss": 0.7196, "step": 3253 }, { "epoch": 0.15418147358445866, "grad_norm": 0.59765625, "learning_rate": 0.00018855518351362677, "loss": 1.1786, "step": 3254 }, { "epoch": 0.15422885572139303, "grad_norm": 0.2158203125, "learning_rate": 0.0001885482643175307, "loss": 0.1435, "step": 3255 }, { "epoch": 0.15427623785832742, "grad_norm": 0.59765625, "learning_rate": 0.00018854134315752846, "loss": 0.7636, "step": 3256 }, { "epoch": 0.15432361999526178, "grad_norm": 0.400390625, "learning_rate": 0.00018853442003377352, "loss": 0.7926, "step": 3257 }, { "epoch": 0.15437100213219615, "grad_norm": 0.6328125, "learning_rate": 0.00018852749494641943, "loss": 1.0515, "step": 3258 }, { "epoch": 0.15441838426913054, "grad_norm": 1.2265625, "learning_rate": 0.00018852056789561982, "loss": 0.7984, "step": 3259 }, { "epoch": 0.1544657664060649, "grad_norm": 0.82421875, "learning_rate": 0.00018851363888152832, "loss": 1.0246, "step": 3260 }, { "epoch": 0.1545131485429993, "grad_norm": 0.5625, "learning_rate": 0.00018850670790429854, "loss": 0.9434, "step": 3261 }, { "epoch": 0.15456053067993367, "grad_norm": 0.5859375, "learning_rate": 0.0001884997749640843, "loss": 0.8916, "step": 3262 }, { "epoch": 0.15460791281686803, "grad_norm": 1.03125, "learning_rate": 0.0001884928400610393, "loss": 0.81, "step": 3263 }, { "epoch": 0.15465529495380242, "grad_norm": 0.8203125, "learning_rate": 0.00018848590319531735, "loss": 1.3236, "step": 3264 }, { "epoch": 0.1547026770907368, "grad_norm": 0.5703125, "learning_rate": 0.00018847896436707234, "loss": 1.135, "step": 3265 }, { "epoch": 0.15475005922767116, "grad_norm": 0.380859375, "learning_rate": 0.00018847202357645814, "loss": 0.0196, "step": 3266 }, { "epoch": 0.15479744136460555, "grad_norm": 0.53125, "learning_rate": 0.00018846508082362865, "loss": 0.8033, "step": 3267 }, { "epoch": 0.1548448235015399, "grad_norm": 0.51171875, "learning_rate": 0.00018845813610873796, "loss": 0.871, "step": 3268 }, { "epoch": 0.1548922056384743, "grad_norm": 0.3828125, "learning_rate": 0.00018845118943194, "loss": 0.0952, "step": 3269 }, { "epoch": 0.15493958777540867, "grad_norm": 0.47265625, "learning_rate": 0.0001884442407933889, "loss": 1.1118, "step": 3270 }, { "epoch": 0.15498696991234304, "grad_norm": 1.0234375, "learning_rate": 0.0001884372901932387, "loss": 0.2118, "step": 3271 }, { "epoch": 0.15503435204927743, "grad_norm": 0.5390625, "learning_rate": 0.00018843033763164363, "loss": 0.1658, "step": 3272 }, { "epoch": 0.1550817341862118, "grad_norm": 0.443359375, "learning_rate": 0.00018842338310875786, "loss": 0.8745, "step": 3273 }, { "epoch": 0.15512911632314616, "grad_norm": 0.546875, "learning_rate": 0.00018841642662473565, "loss": 0.0557, "step": 3274 }, { "epoch": 0.15517649846008055, "grad_norm": 0.8046875, "learning_rate": 0.00018840946817973126, "loss": 0.1246, "step": 3275 }, { "epoch": 0.15522388059701492, "grad_norm": 0.625, "learning_rate": 0.00018840250777389902, "loss": 0.8924, "step": 3276 }, { "epoch": 0.1552712627339493, "grad_norm": 0.48828125, "learning_rate": 0.00018839554540739335, "loss": 0.5771, "step": 3277 }, { "epoch": 0.15531864487088368, "grad_norm": 0.65234375, "learning_rate": 0.00018838858108036864, "loss": 1.1268, "step": 3278 }, { "epoch": 0.15536602700781804, "grad_norm": 0.609375, "learning_rate": 0.00018838161479297933, "loss": 1.1074, "step": 3279 }, { "epoch": 0.15541340914475243, "grad_norm": 0.65234375, "learning_rate": 0.00018837464654537995, "loss": 1.6613, "step": 3280 }, { "epoch": 0.1554607912816868, "grad_norm": 0.5234375, "learning_rate": 0.00018836767633772502, "loss": 1.0114, "step": 3281 }, { "epoch": 0.1555081734186212, "grad_norm": 0.67578125, "learning_rate": 0.0001883607041701692, "loss": 1.2269, "step": 3282 }, { "epoch": 0.15555555555555556, "grad_norm": 0.54296875, "learning_rate": 0.00018835373004286702, "loss": 1.402, "step": 3283 }, { "epoch": 0.15560293769248992, "grad_norm": 0.59375, "learning_rate": 0.00018834675395597324, "loss": 1.0193, "step": 3284 }, { "epoch": 0.15565031982942432, "grad_norm": 0.5859375, "learning_rate": 0.00018833977590964257, "loss": 1.3892, "step": 3285 }, { "epoch": 0.15569770196635868, "grad_norm": 0.376953125, "learning_rate": 0.0001883327959040298, "loss": 0.4987, "step": 3286 }, { "epoch": 0.15574508410329305, "grad_norm": 0.60546875, "learning_rate": 0.00018832581393928965, "loss": 1.3572, "step": 3287 }, { "epoch": 0.15579246624022744, "grad_norm": 0.50390625, "learning_rate": 0.00018831883001557706, "loss": 1.2606, "step": 3288 }, { "epoch": 0.1558398483771618, "grad_norm": 0.49609375, "learning_rate": 0.0001883118441330469, "loss": 1.7267, "step": 3289 }, { "epoch": 0.1558872305140962, "grad_norm": 0.56640625, "learning_rate": 0.00018830485629185405, "loss": 1.2203, "step": 3290 }, { "epoch": 0.15593461265103056, "grad_norm": 0.91796875, "learning_rate": 0.00018829786649215358, "loss": 0.3889, "step": 3291 }, { "epoch": 0.15598199478796493, "grad_norm": 0.67578125, "learning_rate": 0.00018829087473410048, "loss": 1.3834, "step": 3292 }, { "epoch": 0.15602937692489932, "grad_norm": 0.7109375, "learning_rate": 0.00018828388101784985, "loss": 1.2501, "step": 3293 }, { "epoch": 0.1560767590618337, "grad_norm": 0.4609375, "learning_rate": 0.0001882768853435567, "loss": 0.1653, "step": 3294 }, { "epoch": 0.15612414119876805, "grad_norm": 0.578125, "learning_rate": 0.00018826988771137635, "loss": 0.2747, "step": 3295 }, { "epoch": 0.15617152333570244, "grad_norm": 0.5390625, "learning_rate": 0.00018826288812146387, "loss": 0.7825, "step": 3296 }, { "epoch": 0.1562189054726368, "grad_norm": 0.62109375, "learning_rate": 0.00018825588657397452, "loss": 0.854, "step": 3297 }, { "epoch": 0.1562662876095712, "grad_norm": 0.443359375, "learning_rate": 0.00018824888306906366, "loss": 1.0428, "step": 3298 }, { "epoch": 0.15631366974650557, "grad_norm": 0.71484375, "learning_rate": 0.00018824187760688654, "loss": 1.6231, "step": 3299 }, { "epoch": 0.15636105188343993, "grad_norm": 0.478515625, "learning_rate": 0.0001882348701875986, "loss": 0.2232, "step": 3300 }, { "epoch": 0.15640843402037433, "grad_norm": 0.53515625, "learning_rate": 0.00018822786081135518, "loss": 0.8423, "step": 3301 }, { "epoch": 0.1564558161573087, "grad_norm": 0.61328125, "learning_rate": 0.00018822084947831181, "loss": 0.9263, "step": 3302 }, { "epoch": 0.15650319829424306, "grad_norm": 0.376953125, "learning_rate": 0.00018821383618862397, "loss": 0.1621, "step": 3303 }, { "epoch": 0.15655058043117745, "grad_norm": 0.58984375, "learning_rate": 0.00018820682094244717, "loss": 1.0773, "step": 3304 }, { "epoch": 0.15659796256811181, "grad_norm": 0.216796875, "learning_rate": 0.00018819980373993705, "loss": 0.0241, "step": 3305 }, { "epoch": 0.1566453447050462, "grad_norm": 0.5859375, "learning_rate": 0.00018819278458124923, "loss": 1.2362, "step": 3306 }, { "epoch": 0.15669272684198057, "grad_norm": 0.62109375, "learning_rate": 0.0001881857634665394, "loss": 1.1092, "step": 3307 }, { "epoch": 0.15674010897891494, "grad_norm": 0.73046875, "learning_rate": 0.00018817874039596326, "loss": 0.7047, "step": 3308 }, { "epoch": 0.15678749111584933, "grad_norm": 0.095703125, "learning_rate": 0.00018817171536967658, "loss": 0.0124, "step": 3309 }, { "epoch": 0.1568348732527837, "grad_norm": 1.046875, "learning_rate": 0.00018816468838783518, "loss": 0.4429, "step": 3310 }, { "epoch": 0.1568822553897181, "grad_norm": 0.51171875, "learning_rate": 0.0001881576594505949, "loss": 0.8604, "step": 3311 }, { "epoch": 0.15692963752665245, "grad_norm": 0.3359375, "learning_rate": 0.0001881506285581116, "loss": 0.155, "step": 3312 }, { "epoch": 0.15697701966358682, "grad_norm": 0.546875, "learning_rate": 0.0001881435957105413, "loss": 0.875, "step": 3313 }, { "epoch": 0.1570244018005212, "grad_norm": 0.8984375, "learning_rate": 0.00018813656090803992, "loss": 1.6006, "step": 3314 }, { "epoch": 0.15707178393745558, "grad_norm": 0.251953125, "learning_rate": 0.00018812952415076347, "loss": 0.1606, "step": 3315 }, { "epoch": 0.15711916607438994, "grad_norm": 0.25, "learning_rate": 0.00018812248543886807, "loss": 0.0366, "step": 3316 }, { "epoch": 0.15716654821132434, "grad_norm": 1.171875, "learning_rate": 0.0001881154447725098, "loss": 0.5749, "step": 3317 }, { "epoch": 0.1572139303482587, "grad_norm": 0.69921875, "learning_rate": 0.00018810840215184485, "loss": 0.1889, "step": 3318 }, { "epoch": 0.1572613124851931, "grad_norm": 0.59765625, "learning_rate": 0.00018810135757702935, "loss": 1.1963, "step": 3319 }, { "epoch": 0.15730869462212746, "grad_norm": 0.6328125, "learning_rate": 0.0001880943110482196, "loss": 1.172, "step": 3320 }, { "epoch": 0.15735607675906182, "grad_norm": 0.478515625, "learning_rate": 0.00018808726256557185, "loss": 0.8667, "step": 3321 }, { "epoch": 0.15740345889599622, "grad_norm": 0.6640625, "learning_rate": 0.00018808021212924245, "loss": 1.3381, "step": 3322 }, { "epoch": 0.15745084103293058, "grad_norm": 0.6484375, "learning_rate": 0.00018807315973938776, "loss": 1.1065, "step": 3323 }, { "epoch": 0.15749822316986495, "grad_norm": 0.53125, "learning_rate": 0.00018806610539616423, "loss": 1.1935, "step": 3324 }, { "epoch": 0.15754560530679934, "grad_norm": 0.361328125, "learning_rate": 0.00018805904909972824, "loss": 0.7319, "step": 3325 }, { "epoch": 0.1575929874437337, "grad_norm": 0.51953125, "learning_rate": 0.00018805199085023637, "loss": 1.0317, "step": 3326 }, { "epoch": 0.1576403695806681, "grad_norm": 0.21484375, "learning_rate": 0.00018804493064784511, "loss": 0.1413, "step": 3327 }, { "epoch": 0.15768775171760246, "grad_norm": 0.65234375, "learning_rate": 0.00018803786849271107, "loss": 1.0061, "step": 3328 }, { "epoch": 0.15773513385453683, "grad_norm": 0.486328125, "learning_rate": 0.0001880308043849909, "loss": 0.4742, "step": 3329 }, { "epoch": 0.15778251599147122, "grad_norm": 0.412109375, "learning_rate": 0.00018802373832484123, "loss": 0.0569, "step": 3330 }, { "epoch": 0.1578298981284056, "grad_norm": 0.6875, "learning_rate": 0.00018801667031241882, "loss": 0.3141, "step": 3331 }, { "epoch": 0.15787728026533995, "grad_norm": 0.6640625, "learning_rate": 0.00018800960034788043, "loss": 0.4049, "step": 3332 }, { "epoch": 0.15792466240227435, "grad_norm": 0.6796875, "learning_rate": 0.00018800252843138282, "loss": 1.3762, "step": 3333 }, { "epoch": 0.1579720445392087, "grad_norm": 0.546875, "learning_rate": 0.00018799545456308289, "loss": 1.1696, "step": 3334 }, { "epoch": 0.1580194266761431, "grad_norm": 0.203125, "learning_rate": 0.00018798837874313748, "loss": 0.1247, "step": 3335 }, { "epoch": 0.15806680881307747, "grad_norm": 0.486328125, "learning_rate": 0.00018798130097170358, "loss": 0.962, "step": 3336 }, { "epoch": 0.15811419095001183, "grad_norm": 0.7734375, "learning_rate": 0.00018797422124893807, "loss": 0.8894, "step": 3337 }, { "epoch": 0.15816157308694623, "grad_norm": 0.66015625, "learning_rate": 0.0001879671395749981, "loss": 0.8901, "step": 3338 }, { "epoch": 0.1582089552238806, "grad_norm": 0.486328125, "learning_rate": 0.00018796005595004067, "loss": 0.9047, "step": 3339 }, { "epoch": 0.15825633736081499, "grad_norm": 0.65234375, "learning_rate": 0.0001879529703742229, "loss": 1.0445, "step": 3340 }, { "epoch": 0.15830371949774935, "grad_norm": 0.68359375, "learning_rate": 0.00018794588284770186, "loss": 1.084, "step": 3341 }, { "epoch": 0.15835110163468372, "grad_norm": 0.53125, "learning_rate": 0.00018793879337063488, "loss": 1.3039, "step": 3342 }, { "epoch": 0.1583984837716181, "grad_norm": 0.765625, "learning_rate": 0.00018793170194317913, "loss": 1.1952, "step": 3343 }, { "epoch": 0.15844586590855247, "grad_norm": 0.5859375, "learning_rate": 0.00018792460856549185, "loss": 1.0394, "step": 3344 }, { "epoch": 0.15849324804548684, "grad_norm": 0.4609375, "learning_rate": 0.00018791751323773043, "loss": 0.5739, "step": 3345 }, { "epoch": 0.15854063018242123, "grad_norm": 0.640625, "learning_rate": 0.00018791041596005225, "loss": 0.9165, "step": 3346 }, { "epoch": 0.1585880123193556, "grad_norm": 0.8046875, "learning_rate": 0.00018790331673261464, "loss": 1.1403, "step": 3347 }, { "epoch": 0.15863539445629, "grad_norm": 0.6796875, "learning_rate": 0.0001878962155555751, "loss": 1.0392, "step": 3348 }, { "epoch": 0.15868277659322436, "grad_norm": 0.7890625, "learning_rate": 0.00018788911242909113, "loss": 1.1052, "step": 3349 }, { "epoch": 0.15873015873015872, "grad_norm": 0.71484375, "learning_rate": 0.00018788200735332024, "loss": 1.0922, "step": 3350 }, { "epoch": 0.15877754086709311, "grad_norm": 0.55859375, "learning_rate": 0.00018787490032842006, "loss": 1.0305, "step": 3351 }, { "epoch": 0.15882492300402748, "grad_norm": 0.54296875, "learning_rate": 0.00018786779135454817, "loss": 0.1058, "step": 3352 }, { "epoch": 0.15887230514096184, "grad_norm": 0.5234375, "learning_rate": 0.00018786068043186226, "loss": 0.1585, "step": 3353 }, { "epoch": 0.15891968727789624, "grad_norm": 0.62890625, "learning_rate": 0.00018785356756052007, "loss": 0.3798, "step": 3354 }, { "epoch": 0.1589670694148306, "grad_norm": 0.734375, "learning_rate": 0.00018784645274067931, "loss": 0.9803, "step": 3355 }, { "epoch": 0.159014451551765, "grad_norm": 0.1376953125, "learning_rate": 0.0001878393359724978, "loss": 0.0151, "step": 3356 }, { "epoch": 0.15906183368869936, "grad_norm": 0.66015625, "learning_rate": 0.00018783221725613336, "loss": 0.762, "step": 3357 }, { "epoch": 0.15910921582563373, "grad_norm": 0.21484375, "learning_rate": 0.00018782509659174393, "loss": 0.0296, "step": 3358 }, { "epoch": 0.15915659796256812, "grad_norm": 0.455078125, "learning_rate": 0.0001878179739794874, "loss": 0.0517, "step": 3359 }, { "epoch": 0.15920398009950248, "grad_norm": 0.80859375, "learning_rate": 0.00018781084941952173, "loss": 0.2991, "step": 3360 }, { "epoch": 0.15925136223643685, "grad_norm": 0.7890625, "learning_rate": 0.00018780372291200497, "loss": 0.8215, "step": 3361 }, { "epoch": 0.15929874437337124, "grad_norm": 0.53515625, "learning_rate": 0.00018779659445709515, "loss": 0.6526, "step": 3362 }, { "epoch": 0.1593461265103056, "grad_norm": 0.67578125, "learning_rate": 0.00018778946405495036, "loss": 0.0389, "step": 3363 }, { "epoch": 0.15939350864724, "grad_norm": 0.248046875, "learning_rate": 0.00018778233170572877, "loss": 0.1111, "step": 3364 }, { "epoch": 0.15944089078417437, "grad_norm": 0.6796875, "learning_rate": 0.00018777519740958862, "loss": 1.1919, "step": 3365 }, { "epoch": 0.15948827292110873, "grad_norm": 0.1259765625, "learning_rate": 0.00018776806116668803, "loss": 0.0036, "step": 3366 }, { "epoch": 0.15953565505804312, "grad_norm": 0.5859375, "learning_rate": 0.00018776092297718535, "loss": 0.8826, "step": 3367 }, { "epoch": 0.1595830371949775, "grad_norm": 0.228515625, "learning_rate": 0.00018775378284123888, "loss": 0.0293, "step": 3368 }, { "epoch": 0.15963041933191188, "grad_norm": 0.5078125, "learning_rate": 0.00018774664075900697, "loss": 0.8727, "step": 3369 }, { "epoch": 0.15967780146884625, "grad_norm": 0.48046875, "learning_rate": 0.00018773949673064804, "loss": 0.9371, "step": 3370 }, { "epoch": 0.1597251836057806, "grad_norm": 0.65234375, "learning_rate": 0.00018773235075632052, "loss": 1.1879, "step": 3371 }, { "epoch": 0.159772565742715, "grad_norm": 0.55078125, "learning_rate": 0.0001877252028361829, "loss": 0.7749, "step": 3372 }, { "epoch": 0.15981994787964937, "grad_norm": 0.6171875, "learning_rate": 0.00018771805297039374, "loss": 0.829, "step": 3373 }, { "epoch": 0.15986733001658374, "grad_norm": 0.62890625, "learning_rate": 0.0001877109011591116, "loss": 1.4261, "step": 3374 }, { "epoch": 0.15991471215351813, "grad_norm": 0.68359375, "learning_rate": 0.00018770374740249507, "loss": 0.3003, "step": 3375 }, { "epoch": 0.1599620942904525, "grad_norm": 0.7265625, "learning_rate": 0.00018769659170070287, "loss": 0.0686, "step": 3376 }, { "epoch": 0.1600094764273869, "grad_norm": 0.85546875, "learning_rate": 0.00018768943405389366, "loss": 0.1909, "step": 3377 }, { "epoch": 0.16005685856432125, "grad_norm": 0.20703125, "learning_rate": 0.00018768227446222622, "loss": 0.1452, "step": 3378 }, { "epoch": 0.16010424070125562, "grad_norm": 0.6328125, "learning_rate": 0.0001876751129258593, "loss": 0.8432, "step": 3379 }, { "epoch": 0.16015162283819, "grad_norm": 0.71484375, "learning_rate": 0.00018766794944495178, "loss": 0.8528, "step": 3380 }, { "epoch": 0.16019900497512438, "grad_norm": 0.6171875, "learning_rate": 0.0001876607840196625, "loss": 0.9679, "step": 3381 }, { "epoch": 0.16024638711205874, "grad_norm": 0.59765625, "learning_rate": 0.00018765361665015043, "loss": 0.9675, "step": 3382 }, { "epoch": 0.16029376924899313, "grad_norm": 0.64453125, "learning_rate": 0.00018764644733657452, "loss": 0.1435, "step": 3383 }, { "epoch": 0.1603411513859275, "grad_norm": 0.63671875, "learning_rate": 0.00018763927607909375, "loss": 0.6409, "step": 3384 }, { "epoch": 0.1603885335228619, "grad_norm": 0.337890625, "learning_rate": 0.0001876321028778672, "loss": 0.0473, "step": 3385 }, { "epoch": 0.16043591565979626, "grad_norm": 0.5859375, "learning_rate": 0.0001876249277330539, "loss": 1.3328, "step": 3386 }, { "epoch": 0.16048329779673062, "grad_norm": 0.62890625, "learning_rate": 0.00018761775064481308, "loss": 0.0951, "step": 3387 }, { "epoch": 0.16053067993366502, "grad_norm": 0.734375, "learning_rate": 0.00018761057161330386, "loss": 1.1392, "step": 3388 }, { "epoch": 0.16057806207059938, "grad_norm": 0.279296875, "learning_rate": 0.0001876033906386855, "loss": 0.037, "step": 3389 }, { "epoch": 0.16062544420753375, "grad_norm": 0.62109375, "learning_rate": 0.00018759620772111721, "loss": 0.0903, "step": 3390 }, { "epoch": 0.16067282634446814, "grad_norm": 0.419921875, "learning_rate": 0.00018758902286075837, "loss": 0.7807, "step": 3391 }, { "epoch": 0.1607202084814025, "grad_norm": 0.6171875, "learning_rate": 0.00018758183605776827, "loss": 1.0448, "step": 3392 }, { "epoch": 0.1607675906183369, "grad_norm": 0.61328125, "learning_rate": 0.00018757464731230635, "loss": 1.4464, "step": 3393 }, { "epoch": 0.16081497275527126, "grad_norm": 0.65234375, "learning_rate": 0.00018756745662453205, "loss": 1.3067, "step": 3394 }, { "epoch": 0.16086235489220563, "grad_norm": 0.57421875, "learning_rate": 0.0001875602639946048, "loss": 1.1364, "step": 3395 }, { "epoch": 0.16090973702914002, "grad_norm": 0.7734375, "learning_rate": 0.00018755306942268418, "loss": 0.1941, "step": 3396 }, { "epoch": 0.16095711916607439, "grad_norm": 0.55859375, "learning_rate": 0.00018754587290892974, "loss": 0.7959, "step": 3397 }, { "epoch": 0.16100450130300878, "grad_norm": 0.77734375, "learning_rate": 0.00018753867445350108, "loss": 1.3102, "step": 3398 }, { "epoch": 0.16105188343994314, "grad_norm": 0.51953125, "learning_rate": 0.00018753147405655787, "loss": 0.9047, "step": 3399 }, { "epoch": 0.1610992655768775, "grad_norm": 0.33984375, "learning_rate": 0.0001875242717182598, "loss": 0.0682, "step": 3400 }, { "epoch": 0.1611466477138119, "grad_norm": 0.59375, "learning_rate": 0.0001875170674387666, "loss": 1.1625, "step": 3401 }, { "epoch": 0.16119402985074627, "grad_norm": 0.68359375, "learning_rate": 0.0001875098612182381, "loss": 1.2987, "step": 3402 }, { "epoch": 0.16124141198768063, "grad_norm": 0.59765625, "learning_rate": 0.00018750265305683404, "loss": 1.1018, "step": 3403 }, { "epoch": 0.16128879412461503, "grad_norm": 0.671875, "learning_rate": 0.00018749544295471436, "loss": 1.2829, "step": 3404 }, { "epoch": 0.1613361762615494, "grad_norm": 1.3984375, "learning_rate": 0.00018748823091203892, "loss": 0.4753, "step": 3405 }, { "epoch": 0.16138355839848378, "grad_norm": 0.65625, "learning_rate": 0.00018748101692896775, "loss": 1.0085, "step": 3406 }, { "epoch": 0.16143094053541815, "grad_norm": 0.53515625, "learning_rate": 0.0001874738010056608, "loss": 0.0389, "step": 3407 }, { "epoch": 0.16147832267235251, "grad_norm": 0.490234375, "learning_rate": 0.0001874665831422781, "loss": 0.6164, "step": 3408 }, { "epoch": 0.1615257048092869, "grad_norm": 0.65625, "learning_rate": 0.00018745936333897976, "loss": 0.8375, "step": 3409 }, { "epoch": 0.16157308694622127, "grad_norm": 0.408203125, "learning_rate": 0.0001874521415959259, "loss": 0.0557, "step": 3410 }, { "epoch": 0.16162046908315564, "grad_norm": 0.68359375, "learning_rate": 0.00018744491791327668, "loss": 1.331, "step": 3411 }, { "epoch": 0.16166785122009003, "grad_norm": 0.57421875, "learning_rate": 0.00018743769229119232, "loss": 0.8699, "step": 3412 }, { "epoch": 0.1617152333570244, "grad_norm": 0.419921875, "learning_rate": 0.0001874304647298331, "loss": 0.0352, "step": 3413 }, { "epoch": 0.1617626154939588, "grad_norm": 0.1962890625, "learning_rate": 0.0001874232352293593, "loss": 0.0239, "step": 3414 }, { "epoch": 0.16180999763089315, "grad_norm": 0.62890625, "learning_rate": 0.00018741600378993124, "loss": 0.9394, "step": 3415 }, { "epoch": 0.16185737976782752, "grad_norm": 0.58203125, "learning_rate": 0.00018740877041170935, "loss": 0.6944, "step": 3416 }, { "epoch": 0.1619047619047619, "grad_norm": 0.5234375, "learning_rate": 0.000187401535094854, "loss": 1.0595, "step": 3417 }, { "epoch": 0.16195214404169628, "grad_norm": 0.5078125, "learning_rate": 0.00018739429783952574, "loss": 0.993, "step": 3418 }, { "epoch": 0.16199952617863064, "grad_norm": 0.470703125, "learning_rate": 0.000187387058645885, "loss": 0.6458, "step": 3419 }, { "epoch": 0.16204690831556504, "grad_norm": 0.7109375, "learning_rate": 0.00018737981751409241, "loss": 0.9488, "step": 3420 }, { "epoch": 0.1620942904524994, "grad_norm": 0.52734375, "learning_rate": 0.00018737257444430855, "loss": 0.7799, "step": 3421 }, { "epoch": 0.1621416725894338, "grad_norm": 0.88671875, "learning_rate": 0.00018736532943669404, "loss": 1.1519, "step": 3422 }, { "epoch": 0.16218905472636816, "grad_norm": 0.6953125, "learning_rate": 0.00018735808249140953, "loss": 1.179, "step": 3423 }, { "epoch": 0.16223643686330252, "grad_norm": 0.703125, "learning_rate": 0.00018735083360861587, "loss": 0.9812, "step": 3424 }, { "epoch": 0.16228381900023692, "grad_norm": 0.60546875, "learning_rate": 0.00018734358278847376, "loss": 1.1356, "step": 3425 }, { "epoch": 0.16233120113717128, "grad_norm": 0.65625, "learning_rate": 0.00018733633003114396, "loss": 1.0417, "step": 3426 }, { "epoch": 0.16237858327410568, "grad_norm": 0.65234375, "learning_rate": 0.00018732907533678742, "loss": 0.8498, "step": 3427 }, { "epoch": 0.16242596541104004, "grad_norm": 0.63671875, "learning_rate": 0.00018732181870556502, "loss": 0.1318, "step": 3428 }, { "epoch": 0.1624733475479744, "grad_norm": 0.59375, "learning_rate": 0.0001873145601376377, "loss": 0.9391, "step": 3429 }, { "epoch": 0.1625207296849088, "grad_norm": 0.58984375, "learning_rate": 0.00018730729963316642, "loss": 0.9662, "step": 3430 }, { "epoch": 0.16256811182184316, "grad_norm": 0.5859375, "learning_rate": 0.00018730003719231224, "loss": 1.0315, "step": 3431 }, { "epoch": 0.16261549395877753, "grad_norm": 0.69140625, "learning_rate": 0.0001872927728152362, "loss": 1.0004, "step": 3432 }, { "epoch": 0.16266287609571192, "grad_norm": 0.61328125, "learning_rate": 0.00018728550650209946, "loss": 0.0759, "step": 3433 }, { "epoch": 0.1627102582326463, "grad_norm": 0.57421875, "learning_rate": 0.00018727823825306317, "loss": 0.9821, "step": 3434 }, { "epoch": 0.16275764036958068, "grad_norm": 0.6953125, "learning_rate": 0.00018727096806828847, "loss": 0.1027, "step": 3435 }, { "epoch": 0.16280502250651505, "grad_norm": 0.443359375, "learning_rate": 0.00018726369594793672, "loss": 0.5308, "step": 3436 }, { "epoch": 0.1628524046434494, "grad_norm": 0.58984375, "learning_rate": 0.00018725642189216908, "loss": 1.4648, "step": 3437 }, { "epoch": 0.1628997867803838, "grad_norm": 0.6328125, "learning_rate": 0.000187249145901147, "loss": 1.4506, "step": 3438 }, { "epoch": 0.16294716891731817, "grad_norm": 0.5703125, "learning_rate": 0.00018724186797503177, "loss": 0.5585, "step": 3439 }, { "epoch": 0.16299455105425253, "grad_norm": 0.58984375, "learning_rate": 0.00018723458811398483, "loss": 0.1731, "step": 3440 }, { "epoch": 0.16304193319118693, "grad_norm": 0.69921875, "learning_rate": 0.00018722730631816765, "loss": 1.1493, "step": 3441 }, { "epoch": 0.1630893153281213, "grad_norm": 0.34375, "learning_rate": 0.00018722002258774172, "loss": 0.0177, "step": 3442 }, { "epoch": 0.16313669746505569, "grad_norm": 0.462890625, "learning_rate": 0.0001872127369228686, "loss": 0.3601, "step": 3443 }, { "epoch": 0.16318407960199005, "grad_norm": 0.490234375, "learning_rate": 0.00018720544932370988, "loss": 0.0731, "step": 3444 }, { "epoch": 0.16323146173892442, "grad_norm": 0.458984375, "learning_rate": 0.00018719815979042714, "loss": 0.8687, "step": 3445 }, { "epoch": 0.1632788438758588, "grad_norm": 0.89453125, "learning_rate": 0.00018719086832318213, "loss": 0.9003, "step": 3446 }, { "epoch": 0.16332622601279317, "grad_norm": 0.59765625, "learning_rate": 0.00018718357492213654, "loss": 0.1883, "step": 3447 }, { "epoch": 0.16337360814972754, "grad_norm": 0.498046875, "learning_rate": 0.0001871762795874521, "loss": 0.9352, "step": 3448 }, { "epoch": 0.16342099028666193, "grad_norm": 0.6484375, "learning_rate": 0.00018716898231929064, "loss": 1.4955, "step": 3449 }, { "epoch": 0.1634683724235963, "grad_norm": 0.421875, "learning_rate": 0.000187161683117814, "loss": 0.0188, "step": 3450 }, { "epoch": 0.1635157545605307, "grad_norm": 0.59375, "learning_rate": 0.00018715438198318407, "loss": 1.0635, "step": 3451 }, { "epoch": 0.16356313669746506, "grad_norm": 0.6953125, "learning_rate": 0.0001871470789155628, "loss": 0.8331, "step": 3452 }, { "epoch": 0.16361051883439942, "grad_norm": 0.7109375, "learning_rate": 0.00018713977391511213, "loss": 1.1757, "step": 3453 }, { "epoch": 0.16365790097133381, "grad_norm": 0.61328125, "learning_rate": 0.00018713246698199407, "loss": 1.1352, "step": 3454 }, { "epoch": 0.16370528310826818, "grad_norm": 0.48828125, "learning_rate": 0.00018712515811637073, "loss": 0.7567, "step": 3455 }, { "epoch": 0.16375266524520257, "grad_norm": 0.2275390625, "learning_rate": 0.00018711784731840415, "loss": 0.1355, "step": 3456 }, { "epoch": 0.16380004738213694, "grad_norm": 0.5703125, "learning_rate": 0.00018711053458825655, "loss": 1.2875, "step": 3457 }, { "epoch": 0.1638474295190713, "grad_norm": 0.6328125, "learning_rate": 0.00018710321992609008, "loss": 0.0153, "step": 3458 }, { "epoch": 0.1638948116560057, "grad_norm": 0.5234375, "learning_rate": 0.00018709590333206694, "loss": 0.0086, "step": 3459 }, { "epoch": 0.16394219379294006, "grad_norm": 0.9140625, "learning_rate": 0.00018708858480634945, "loss": 0.3582, "step": 3460 }, { "epoch": 0.16398957592987443, "grad_norm": 0.58984375, "learning_rate": 0.00018708126434909992, "loss": 1.1043, "step": 3461 }, { "epoch": 0.16403695806680882, "grad_norm": 0.921875, "learning_rate": 0.0001870739419604807, "loss": 0.7051, "step": 3462 }, { "epoch": 0.16408434020374318, "grad_norm": 0.57421875, "learning_rate": 0.00018706661764065417, "loss": 0.8314, "step": 3463 }, { "epoch": 0.16413172234067758, "grad_norm": 0.326171875, "learning_rate": 0.0001870592913897828, "loss": 0.0076, "step": 3464 }, { "epoch": 0.16417910447761194, "grad_norm": 0.5703125, "learning_rate": 0.0001870519632080291, "loss": 0.216, "step": 3465 }, { "epoch": 0.1642264866145463, "grad_norm": 0.380859375, "learning_rate": 0.0001870446330955556, "loss": 0.1688, "step": 3466 }, { "epoch": 0.1642738687514807, "grad_norm": 0.5859375, "learning_rate": 0.00018703730105252482, "loss": 1.0041, "step": 3467 }, { "epoch": 0.16432125088841507, "grad_norm": 0.31640625, "learning_rate": 0.0001870299670790994, "loss": 0.0299, "step": 3468 }, { "epoch": 0.16436863302534943, "grad_norm": 0.7890625, "learning_rate": 0.00018702263117544203, "loss": 0.1967, "step": 3469 }, { "epoch": 0.16441601516228382, "grad_norm": 0.6328125, "learning_rate": 0.0001870152933417154, "loss": 0.9879, "step": 3470 }, { "epoch": 0.1644633972992182, "grad_norm": 0.51171875, "learning_rate": 0.00018700795357808224, "loss": 0.6545, "step": 3471 }, { "epoch": 0.16451077943615258, "grad_norm": 0.4765625, "learning_rate": 0.00018700061188470533, "loss": 1.3322, "step": 3472 }, { "epoch": 0.16455816157308695, "grad_norm": 0.63671875, "learning_rate": 0.00018699326826174755, "loss": 1.2804, "step": 3473 }, { "epoch": 0.1646055437100213, "grad_norm": 0.546875, "learning_rate": 0.00018698592270937172, "loss": 1.1649, "step": 3474 }, { "epoch": 0.1646529258469557, "grad_norm": 0.56640625, "learning_rate": 0.0001869785752277408, "loss": 0.8521, "step": 3475 }, { "epoch": 0.16470030798389007, "grad_norm": 0.46484375, "learning_rate": 0.00018697122581701767, "loss": 0.6356, "step": 3476 }, { "epoch": 0.16474769012082444, "grad_norm": 0.365234375, "learning_rate": 0.00018696387447736544, "loss": 0.1956, "step": 3477 }, { "epoch": 0.16479507225775883, "grad_norm": 0.62109375, "learning_rate": 0.0001869565212089471, "loss": 0.9507, "step": 3478 }, { "epoch": 0.1648424543946932, "grad_norm": 0.9453125, "learning_rate": 0.00018694916601192573, "loss": 0.1632, "step": 3479 }, { "epoch": 0.1648898365316276, "grad_norm": 0.609375, "learning_rate": 0.00018694180888646447, "loss": 0.953, "step": 3480 }, { "epoch": 0.16493721866856195, "grad_norm": 0.462890625, "learning_rate": 0.0001869344498327265, "loss": 1.1483, "step": 3481 }, { "epoch": 0.16498460080549632, "grad_norm": 0.9140625, "learning_rate": 0.00018692708885087504, "loss": 1.2897, "step": 3482 }, { "epoch": 0.1650319829424307, "grad_norm": 0.71484375, "learning_rate": 0.00018691972594107333, "loss": 0.8568, "step": 3483 }, { "epoch": 0.16507936507936508, "grad_norm": 0.73046875, "learning_rate": 0.00018691236110348467, "loss": 0.276, "step": 3484 }, { "epoch": 0.16512674721629947, "grad_norm": 0.75, "learning_rate": 0.00018690499433827244, "loss": 0.7986, "step": 3485 }, { "epoch": 0.16517412935323383, "grad_norm": 0.4453125, "learning_rate": 0.00018689762564559997, "loss": 0.0847, "step": 3486 }, { "epoch": 0.1652215114901682, "grad_norm": 0.255859375, "learning_rate": 0.00018689025502563076, "loss": 0.156, "step": 3487 }, { "epoch": 0.1652688936271026, "grad_norm": 0.6640625, "learning_rate": 0.00018688288247852822, "loss": 1.1043, "step": 3488 }, { "epoch": 0.16531627576403696, "grad_norm": 0.6640625, "learning_rate": 0.00018687550800445591, "loss": 1.3915, "step": 3489 }, { "epoch": 0.16536365790097132, "grad_norm": 0.640625, "learning_rate": 0.00018686813160357732, "loss": 0.8655, "step": 3490 }, { "epoch": 0.16541104003790572, "grad_norm": 0.55078125, "learning_rate": 0.00018686075327605615, "loss": 1.0847, "step": 3491 }, { "epoch": 0.16545842217484008, "grad_norm": 0.7265625, "learning_rate": 0.000186853373022056, "loss": 1.0577, "step": 3492 }, { "epoch": 0.16550580431177447, "grad_norm": 0.365234375, "learning_rate": 0.00018684599084174051, "loss": 0.199, "step": 3493 }, { "epoch": 0.16555318644870884, "grad_norm": 0.640625, "learning_rate": 0.00018683860673527348, "loss": 1.3364, "step": 3494 }, { "epoch": 0.1656005685856432, "grad_norm": 0.51953125, "learning_rate": 0.0001868312207028186, "loss": 0.2806, "step": 3495 }, { "epoch": 0.1656479507225776, "grad_norm": 0.6328125, "learning_rate": 0.00018682383274453977, "loss": 0.9413, "step": 3496 }, { "epoch": 0.16569533285951196, "grad_norm": 0.734375, "learning_rate": 0.00018681644286060083, "loss": 0.8934, "step": 3497 }, { "epoch": 0.16574271499644633, "grad_norm": 0.5390625, "learning_rate": 0.00018680905105116562, "loss": 0.0685, "step": 3498 }, { "epoch": 0.16579009713338072, "grad_norm": 0.71484375, "learning_rate": 0.00018680165731639815, "loss": 0.8225, "step": 3499 }, { "epoch": 0.16583747927031509, "grad_norm": 0.52734375, "learning_rate": 0.00018679426165646237, "loss": 0.9894, "step": 3500 }, { "epoch": 0.16588486140724948, "grad_norm": 0.53125, "learning_rate": 0.00018678686407152227, "loss": 1.1245, "step": 3501 }, { "epoch": 0.16593224354418384, "grad_norm": 0.498046875, "learning_rate": 0.00018677946456174206, "loss": 0.104, "step": 3502 }, { "epoch": 0.1659796256811182, "grad_norm": 0.55078125, "learning_rate": 0.00018677206312728569, "loss": 0.9713, "step": 3503 }, { "epoch": 0.1660270078180526, "grad_norm": 0.79296875, "learning_rate": 0.0001867646597683174, "loss": 0.0467, "step": 3504 }, { "epoch": 0.16607438995498697, "grad_norm": 0.30859375, "learning_rate": 0.00018675725448500136, "loss": 0.1175, "step": 3505 }, { "epoch": 0.16612177209192133, "grad_norm": 0.7109375, "learning_rate": 0.00018674984727750184, "loss": 0.274, "step": 3506 }, { "epoch": 0.16616915422885573, "grad_norm": 0.52734375, "learning_rate": 0.0001867424381459831, "loss": 0.8968, "step": 3507 }, { "epoch": 0.1662165363657901, "grad_norm": 0.703125, "learning_rate": 0.0001867350270906095, "loss": 1.1854, "step": 3508 }, { "epoch": 0.16626391850272448, "grad_norm": 0.6484375, "learning_rate": 0.00018672761411154536, "loss": 1.1038, "step": 3509 }, { "epoch": 0.16631130063965885, "grad_norm": 0.578125, "learning_rate": 0.00018672019920895513, "loss": 0.5199, "step": 3510 }, { "epoch": 0.16635868277659321, "grad_norm": 0.5546875, "learning_rate": 0.00018671278238300328, "loss": 1.3755, "step": 3511 }, { "epoch": 0.1664060649135276, "grad_norm": 0.64453125, "learning_rate": 0.0001867053636338542, "loss": 0.9573, "step": 3512 }, { "epoch": 0.16645344705046197, "grad_norm": 0.2392578125, "learning_rate": 0.00018669794296167258, "loss": 0.0389, "step": 3513 }, { "epoch": 0.16650082918739637, "grad_norm": 0.111328125, "learning_rate": 0.0001866905203666229, "loss": 0.0144, "step": 3514 }, { "epoch": 0.16654821132433073, "grad_norm": 0.78515625, "learning_rate": 0.00018668309584886984, "loss": 1.4951, "step": 3515 }, { "epoch": 0.1665955934612651, "grad_norm": 0.58203125, "learning_rate": 0.00018667566940857805, "loss": 0.8611, "step": 3516 }, { "epoch": 0.1666429755981995, "grad_norm": 0.484375, "learning_rate": 0.00018666824104591218, "loss": 0.9698, "step": 3517 }, { "epoch": 0.16669035773513385, "grad_norm": 0.65234375, "learning_rate": 0.0001866608107610371, "loss": 1.074, "step": 3518 }, { "epoch": 0.16673773987206822, "grad_norm": 0.6484375, "learning_rate": 0.0001866533785541175, "loss": 0.5675, "step": 3519 }, { "epoch": 0.1667851220090026, "grad_norm": 0.20703125, "learning_rate": 0.00018664594442531832, "loss": 0.0248, "step": 3520 }, { "epoch": 0.16683250414593698, "grad_norm": 0.38671875, "learning_rate": 0.0001866385083748043, "loss": 0.0185, "step": 3521 }, { "epoch": 0.16687988628287137, "grad_norm": 0.66015625, "learning_rate": 0.0001866310704027405, "loss": 0.7097, "step": 3522 }, { "epoch": 0.16692726841980574, "grad_norm": 0.482421875, "learning_rate": 0.00018662363050929182, "loss": 0.1294, "step": 3523 }, { "epoch": 0.1669746505567401, "grad_norm": 0.58984375, "learning_rate": 0.00018661618869462328, "loss": 0.8745, "step": 3524 }, { "epoch": 0.1670220326936745, "grad_norm": 0.5859375, "learning_rate": 0.00018660874495889996, "loss": 1.0901, "step": 3525 }, { "epoch": 0.16706941483060886, "grad_norm": 0.296875, "learning_rate": 0.00018660129930228695, "loss": 0.1195, "step": 3526 }, { "epoch": 0.16711679696754322, "grad_norm": 0.310546875, "learning_rate": 0.0001865938517249493, "loss": 0.0636, "step": 3527 }, { "epoch": 0.16716417910447762, "grad_norm": 0.63671875, "learning_rate": 0.0001865864022270523, "loss": 0.9913, "step": 3528 }, { "epoch": 0.16721156124141198, "grad_norm": 0.6328125, "learning_rate": 0.00018657895080876109, "loss": 0.8216, "step": 3529 }, { "epoch": 0.16725894337834638, "grad_norm": 0.671875, "learning_rate": 0.00018657149747024099, "loss": 1.1584, "step": 3530 }, { "epoch": 0.16730632551528074, "grad_norm": 0.78515625, "learning_rate": 0.00018656404221165728, "loss": 1.5047, "step": 3531 }, { "epoch": 0.1673537076522151, "grad_norm": 0.53125, "learning_rate": 0.00018655658503317533, "loss": 1.0045, "step": 3532 }, { "epoch": 0.1674010897891495, "grad_norm": 0.53515625, "learning_rate": 0.0001865491259349605, "loss": 1.0516, "step": 3533 }, { "epoch": 0.16744847192608386, "grad_norm": 0.5234375, "learning_rate": 0.00018654166491717826, "loss": 0.742, "step": 3534 }, { "epoch": 0.16749585406301823, "grad_norm": 0.58203125, "learning_rate": 0.0001865342019799941, "loss": 0.2457, "step": 3535 }, { "epoch": 0.16754323619995262, "grad_norm": 0.5234375, "learning_rate": 0.00018652673712357345, "loss": 1.1417, "step": 3536 }, { "epoch": 0.167590618336887, "grad_norm": 0.3671875, "learning_rate": 0.00018651927034808198, "loss": 0.1849, "step": 3537 }, { "epoch": 0.16763800047382138, "grad_norm": 0.62109375, "learning_rate": 0.00018651180165368524, "loss": 1.0322, "step": 3538 }, { "epoch": 0.16768538261075575, "grad_norm": 0.68359375, "learning_rate": 0.00018650433104054888, "loss": 1.1652, "step": 3539 }, { "epoch": 0.1677327647476901, "grad_norm": 0.6484375, "learning_rate": 0.0001864968585088386, "loss": 1.1635, "step": 3540 }, { "epoch": 0.1677801468846245, "grad_norm": 0.1728515625, "learning_rate": 0.00018648938405872015, "loss": 0.0201, "step": 3541 }, { "epoch": 0.16782752902155887, "grad_norm": 0.66015625, "learning_rate": 0.00018648190769035927, "loss": 1.3377, "step": 3542 }, { "epoch": 0.16787491115849326, "grad_norm": 0.1572265625, "learning_rate": 0.0001864744294039218, "loss": 0.0092, "step": 3543 }, { "epoch": 0.16792229329542763, "grad_norm": 0.609375, "learning_rate": 0.00018646694919957356, "loss": 0.9292, "step": 3544 }, { "epoch": 0.167969675432362, "grad_norm": 0.0072021484375, "learning_rate": 0.00018645946707748052, "loss": 0.0004, "step": 3545 }, { "epoch": 0.16801705756929639, "grad_norm": 0.69140625, "learning_rate": 0.0001864519830378086, "loss": 1.5274, "step": 3546 }, { "epoch": 0.16806443970623075, "grad_norm": 0.56640625, "learning_rate": 0.00018644449708072377, "loss": 0.6784, "step": 3547 }, { "epoch": 0.16811182184316512, "grad_norm": 0.6484375, "learning_rate": 0.00018643700920639207, "loss": 1.0314, "step": 3548 }, { "epoch": 0.1681592039800995, "grad_norm": 0.61328125, "learning_rate": 0.00018642951941497956, "loss": 1.3053, "step": 3549 }, { "epoch": 0.16820658611703387, "grad_norm": 0.546875, "learning_rate": 0.00018642202770665237, "loss": 1.3362, "step": 3550 }, { "epoch": 0.16825396825396827, "grad_norm": 0.69140625, "learning_rate": 0.0001864145340815767, "loss": 1.524, "step": 3551 }, { "epoch": 0.16830135039090263, "grad_norm": 0.6171875, "learning_rate": 0.00018640703853991869, "loss": 0.9737, "step": 3552 }, { "epoch": 0.168348732527837, "grad_norm": 0.40625, "learning_rate": 0.00018639954108184457, "loss": 0.5459, "step": 3553 }, { "epoch": 0.1683961146647714, "grad_norm": 0.498046875, "learning_rate": 0.00018639204170752067, "loss": 0.8134, "step": 3554 }, { "epoch": 0.16844349680170576, "grad_norm": 0.490234375, "learning_rate": 0.00018638454041711332, "loss": 1.1741, "step": 3555 }, { "epoch": 0.16849087893864012, "grad_norm": 0.484375, "learning_rate": 0.00018637703721078886, "loss": 0.6779, "step": 3556 }, { "epoch": 0.1685382610755745, "grad_norm": 0.0123291015625, "learning_rate": 0.00018636953208871372, "loss": 0.0007, "step": 3557 }, { "epoch": 0.16858564321250888, "grad_norm": 0.61328125, "learning_rate": 0.0001863620250510544, "loss": 0.7599, "step": 3558 }, { "epoch": 0.16863302534944327, "grad_norm": 0.2412109375, "learning_rate": 0.0001863545160979773, "loss": 0.0232, "step": 3559 }, { "epoch": 0.16868040748637764, "grad_norm": 0.322265625, "learning_rate": 0.00018634700522964903, "loss": 0.0911, "step": 3560 }, { "epoch": 0.168727789623312, "grad_norm": 0.7734375, "learning_rate": 0.00018633949244623615, "loss": 1.1788, "step": 3561 }, { "epoch": 0.1687751717602464, "grad_norm": 0.2138671875, "learning_rate": 0.00018633197774790535, "loss": 0.0551, "step": 3562 }, { "epoch": 0.16882255389718076, "grad_norm": 0.490234375, "learning_rate": 0.00018632446113482317, "loss": 0.9242, "step": 3563 }, { "epoch": 0.16886993603411513, "grad_norm": 0.5625, "learning_rate": 0.0001863169426071564, "loss": 0.9733, "step": 3564 }, { "epoch": 0.16891731817104952, "grad_norm": 0.58984375, "learning_rate": 0.00018630942216507182, "loss": 1.0315, "step": 3565 }, { "epoch": 0.16896470030798388, "grad_norm": 0.91015625, "learning_rate": 0.00018630189980873617, "loss": 1.143, "step": 3566 }, { "epoch": 0.16901208244491828, "grad_norm": 0.515625, "learning_rate": 0.00018629437553831628, "loss": 0.8955, "step": 3567 }, { "epoch": 0.16905946458185264, "grad_norm": 0.291015625, "learning_rate": 0.00018628684935397907, "loss": 0.0415, "step": 3568 }, { "epoch": 0.169106846718787, "grad_norm": 0.10791015625, "learning_rate": 0.00018627932125589146, "loss": 0.0113, "step": 3569 }, { "epoch": 0.1691542288557214, "grad_norm": 0.447265625, "learning_rate": 0.0001862717912442204, "loss": 0.8555, "step": 3570 }, { "epoch": 0.16920161099265577, "grad_norm": 0.5625, "learning_rate": 0.00018626425931913293, "loss": 0.1145, "step": 3571 }, { "epoch": 0.16924899312959013, "grad_norm": 0.546875, "learning_rate": 0.00018625672548079606, "loss": 0.8483, "step": 3572 }, { "epoch": 0.16929637526652452, "grad_norm": 0.57421875, "learning_rate": 0.00018624918972937686, "loss": 1.2921, "step": 3573 }, { "epoch": 0.1693437574034589, "grad_norm": 0.62109375, "learning_rate": 0.00018624165206504253, "loss": 1.0215, "step": 3574 }, { "epoch": 0.16939113954039328, "grad_norm": 0.07958984375, "learning_rate": 0.0001862341124879602, "loss": 0.0082, "step": 3575 }, { "epoch": 0.16943852167732765, "grad_norm": 0.63671875, "learning_rate": 0.00018622657099829715, "loss": 1.2434, "step": 3576 }, { "epoch": 0.169485903814262, "grad_norm": 0.5390625, "learning_rate": 0.00018621902759622056, "loss": 0.1075, "step": 3577 }, { "epoch": 0.1695332859511964, "grad_norm": 0.91015625, "learning_rate": 0.00018621148228189778, "loss": 0.5215, "step": 3578 }, { "epoch": 0.16958066808813077, "grad_norm": 0.625, "learning_rate": 0.00018620393505549616, "loss": 0.1551, "step": 3579 }, { "epoch": 0.16962805022506516, "grad_norm": 0.314453125, "learning_rate": 0.00018619638591718307, "loss": 0.0896, "step": 3580 }, { "epoch": 0.16967543236199953, "grad_norm": 0.5390625, "learning_rate": 0.00018618883486712595, "loss": 0.6216, "step": 3581 }, { "epoch": 0.1697228144989339, "grad_norm": 0.7578125, "learning_rate": 0.0001861812819054923, "loss": 0.8138, "step": 3582 }, { "epoch": 0.1697701966358683, "grad_norm": 0.6640625, "learning_rate": 0.0001861737270324496, "loss": 1.1879, "step": 3583 }, { "epoch": 0.16981757877280265, "grad_norm": 0.6875, "learning_rate": 0.0001861661702481654, "loss": 1.7843, "step": 3584 }, { "epoch": 0.16986496090973702, "grad_norm": 0.3515625, "learning_rate": 0.00018615861155280735, "loss": 0.1884, "step": 3585 }, { "epoch": 0.1699123430466714, "grad_norm": 0.52734375, "learning_rate": 0.0001861510509465431, "loss": 0.214, "step": 3586 }, { "epoch": 0.16995972518360578, "grad_norm": 0.625, "learning_rate": 0.00018614348842954024, "loss": 0.0472, "step": 3587 }, { "epoch": 0.17000710732054017, "grad_norm": 0.859375, "learning_rate": 0.0001861359240019666, "loss": 0.7151, "step": 3588 }, { "epoch": 0.17005448945747453, "grad_norm": 0.69140625, "learning_rate": 0.00018612835766398987, "loss": 1.2108, "step": 3589 }, { "epoch": 0.1701018715944089, "grad_norm": 0.98046875, "learning_rate": 0.00018612078941577799, "loss": 1.048, "step": 3590 }, { "epoch": 0.1701492537313433, "grad_norm": 0.5390625, "learning_rate": 0.00018611321925749867, "loss": 1.0845, "step": 3591 }, { "epoch": 0.17019663586827766, "grad_norm": 0.6328125, "learning_rate": 0.0001861056471893199, "loss": 1.0258, "step": 3592 }, { "epoch": 0.17024401800521202, "grad_norm": 0.625, "learning_rate": 0.00018609807321140956, "loss": 1.1531, "step": 3593 }, { "epoch": 0.17029140014214642, "grad_norm": 0.412109375, "learning_rate": 0.0001860904973239357, "loss": 0.7115, "step": 3594 }, { "epoch": 0.17033878227908078, "grad_norm": 0.65234375, "learning_rate": 0.00018608291952706626, "loss": 0.7323, "step": 3595 }, { "epoch": 0.17038616441601517, "grad_norm": 0.53515625, "learning_rate": 0.0001860753398209694, "loss": 1.2005, "step": 3596 }, { "epoch": 0.17043354655294954, "grad_norm": 0.375, "learning_rate": 0.00018606775820581315, "loss": 0.0186, "step": 3597 }, { "epoch": 0.1704809286898839, "grad_norm": 0.62109375, "learning_rate": 0.0001860601746817657, "loss": 0.8583, "step": 3598 }, { "epoch": 0.1705283108268183, "grad_norm": 0.62890625, "learning_rate": 0.00018605258924899527, "loss": 0.8947, "step": 3599 }, { "epoch": 0.17057569296375266, "grad_norm": 0.482421875, "learning_rate": 0.00018604500190767007, "loss": 0.7192, "step": 3600 }, { "epoch": 0.17062307510068703, "grad_norm": 0.6640625, "learning_rate": 0.00018603741265795835, "loss": 0.0833, "step": 3601 }, { "epoch": 0.17067045723762142, "grad_norm": 0.58984375, "learning_rate": 0.0001860298215000285, "loss": 0.8278, "step": 3602 }, { "epoch": 0.17071783937455579, "grad_norm": 0.83203125, "learning_rate": 0.00018602222843404882, "loss": 0.6577, "step": 3603 }, { "epoch": 0.17076522151149018, "grad_norm": 0.76953125, "learning_rate": 0.00018601463346018776, "loss": 0.9069, "step": 3604 }, { "epoch": 0.17081260364842454, "grad_norm": 0.49609375, "learning_rate": 0.00018600703657861372, "loss": 0.4135, "step": 3605 }, { "epoch": 0.1708599857853589, "grad_norm": 0.65625, "learning_rate": 0.00018599943778949523, "loss": 0.2287, "step": 3606 }, { "epoch": 0.1709073679222933, "grad_norm": 0.78125, "learning_rate": 0.0001859918370930008, "loss": 0.5911, "step": 3607 }, { "epoch": 0.17095475005922767, "grad_norm": 0.2099609375, "learning_rate": 0.00018598423448929906, "loss": 0.0499, "step": 3608 }, { "epoch": 0.17100213219616206, "grad_norm": 0.7421875, "learning_rate": 0.00018597662997855855, "loss": 1.2342, "step": 3609 }, { "epoch": 0.17104951433309643, "grad_norm": 0.83984375, "learning_rate": 0.00018596902356094797, "loss": 1.1811, "step": 3610 }, { "epoch": 0.1710968964700308, "grad_norm": 0.58203125, "learning_rate": 0.00018596141523663601, "loss": 1.1274, "step": 3611 }, { "epoch": 0.17114427860696518, "grad_norm": 0.5546875, "learning_rate": 0.00018595380500579142, "loss": 0.9245, "step": 3612 }, { "epoch": 0.17119166074389955, "grad_norm": 0.73828125, "learning_rate": 0.00018594619286858301, "loss": 0.4514, "step": 3613 }, { "epoch": 0.1712390428808339, "grad_norm": 0.275390625, "learning_rate": 0.00018593857882517957, "loss": 0.0604, "step": 3614 }, { "epoch": 0.1712864250177683, "grad_norm": 0.5859375, "learning_rate": 0.00018593096287575, "loss": 0.7656, "step": 3615 }, { "epoch": 0.17133380715470267, "grad_norm": 0.059814453125, "learning_rate": 0.0001859233450204632, "loss": 0.0035, "step": 3616 }, { "epoch": 0.17138118929163706, "grad_norm": 0.4921875, "learning_rate": 0.00018591572525948814, "loss": 0.8791, "step": 3617 }, { "epoch": 0.17142857142857143, "grad_norm": 0.6484375, "learning_rate": 0.0001859081035929938, "loss": 1.2135, "step": 3618 }, { "epoch": 0.1714759535655058, "grad_norm": 0.6015625, "learning_rate": 0.0001859004800211492, "loss": 0.5741, "step": 3619 }, { "epoch": 0.1715233357024402, "grad_norm": 0.84765625, "learning_rate": 0.00018589285454412348, "loss": 0.5292, "step": 3620 }, { "epoch": 0.17157071783937455, "grad_norm": 0.859375, "learning_rate": 0.00018588522716208575, "loss": 0.0636, "step": 3621 }, { "epoch": 0.17161809997630892, "grad_norm": 0.65234375, "learning_rate": 0.00018587759787520514, "loss": 0.921, "step": 3622 }, { "epoch": 0.1716654821132433, "grad_norm": 0.6171875, "learning_rate": 0.0001858699666836509, "loss": 1.152, "step": 3623 }, { "epoch": 0.17171286425017768, "grad_norm": 0.54296875, "learning_rate": 0.00018586233358759222, "loss": 0.4444, "step": 3624 }, { "epoch": 0.17176024638711207, "grad_norm": 0.625, "learning_rate": 0.00018585469858719845, "loss": 0.6908, "step": 3625 }, { "epoch": 0.17180762852404644, "grad_norm": 0.67578125, "learning_rate": 0.00018584706168263895, "loss": 1.046, "step": 3626 }, { "epoch": 0.1718550106609808, "grad_norm": 0.58203125, "learning_rate": 0.00018583942287408303, "loss": 1.1907, "step": 3627 }, { "epoch": 0.1719023927979152, "grad_norm": 0.58984375, "learning_rate": 0.00018583178216170017, "loss": 0.9779, "step": 3628 }, { "epoch": 0.17194977493484956, "grad_norm": 0.515625, "learning_rate": 0.0001858241395456598, "loss": 1.2669, "step": 3629 }, { "epoch": 0.17199715707178392, "grad_norm": 0.39453125, "learning_rate": 0.00018581649502613138, "loss": 0.0819, "step": 3630 }, { "epoch": 0.17204453920871832, "grad_norm": 0.50390625, "learning_rate": 0.00018580884860328455, "loss": 0.0316, "step": 3631 }, { "epoch": 0.17209192134565268, "grad_norm": 0.474609375, "learning_rate": 0.00018580120027728887, "loss": 1.0357, "step": 3632 }, { "epoch": 0.17213930348258707, "grad_norm": 0.6640625, "learning_rate": 0.00018579355004831393, "loss": 1.4687, "step": 3633 }, { "epoch": 0.17218668561952144, "grad_norm": 0.8515625, "learning_rate": 0.00018578589791652946, "loss": 0.9925, "step": 3634 }, { "epoch": 0.1722340677564558, "grad_norm": 0.546875, "learning_rate": 0.00018577824388210515, "loss": 0.9039, "step": 3635 }, { "epoch": 0.1722814498933902, "grad_norm": 0.44921875, "learning_rate": 0.00018577058794521075, "loss": 0.0527, "step": 3636 }, { "epoch": 0.17232883203032456, "grad_norm": 0.5546875, "learning_rate": 0.00018576293010601604, "loss": 0.5258, "step": 3637 }, { "epoch": 0.17237621416725896, "grad_norm": 0.451171875, "learning_rate": 0.0001857552703646909, "loss": 0.1037, "step": 3638 }, { "epoch": 0.17242359630419332, "grad_norm": 0.0888671875, "learning_rate": 0.00018574760872140523, "loss": 0.0104, "step": 3639 }, { "epoch": 0.1724709784411277, "grad_norm": 0.55078125, "learning_rate": 0.00018573994517632892, "loss": 1.4852, "step": 3640 }, { "epoch": 0.17251836057806208, "grad_norm": 0.6640625, "learning_rate": 0.00018573227972963196, "loss": 1.1529, "step": 3641 }, { "epoch": 0.17256574271499645, "grad_norm": 0.6015625, "learning_rate": 0.00018572461238148432, "loss": 1.4178, "step": 3642 }, { "epoch": 0.1726131248519308, "grad_norm": 1.15625, "learning_rate": 0.00018571694313205614, "loss": 0.4535, "step": 3643 }, { "epoch": 0.1726605069888652, "grad_norm": 0.6484375, "learning_rate": 0.00018570927198151743, "loss": 0.0695, "step": 3644 }, { "epoch": 0.17270788912579957, "grad_norm": 1.078125, "learning_rate": 0.00018570159893003835, "loss": 0.0868, "step": 3645 }, { "epoch": 0.17275527126273396, "grad_norm": 0.54296875, "learning_rate": 0.0001856939239777891, "loss": 0.6132, "step": 3646 }, { "epoch": 0.17280265339966833, "grad_norm": 0.609375, "learning_rate": 0.0001856862471249399, "loss": 1.2419, "step": 3647 }, { "epoch": 0.1728500355366027, "grad_norm": 0.482421875, "learning_rate": 0.00018567856837166104, "loss": 0.3735, "step": 3648 }, { "epoch": 0.17289741767353708, "grad_norm": 0.55078125, "learning_rate": 0.00018567088771812275, "loss": 0.7501, "step": 3649 }, { "epoch": 0.17294479981047145, "grad_norm": 0.59765625, "learning_rate": 0.00018566320516449545, "loss": 1.2527, "step": 3650 }, { "epoch": 0.17299218194740582, "grad_norm": 0.5390625, "learning_rate": 0.00018565552071094947, "loss": 1.0334, "step": 3651 }, { "epoch": 0.1730395640843402, "grad_norm": 0.58203125, "learning_rate": 0.0001856478343576553, "loss": 0.0948, "step": 3652 }, { "epoch": 0.17308694622127457, "grad_norm": 0.8515625, "learning_rate": 0.0001856401461047834, "loss": 0.1733, "step": 3653 }, { "epoch": 0.17313432835820897, "grad_norm": 0.55859375, "learning_rate": 0.00018563245595250427, "loss": 1.0906, "step": 3654 }, { "epoch": 0.17318171049514333, "grad_norm": 0.5546875, "learning_rate": 0.00018562476390098848, "loss": 0.8008, "step": 3655 }, { "epoch": 0.1732290926320777, "grad_norm": 0.23046875, "learning_rate": 0.00018561706995040661, "loss": 0.0755, "step": 3656 }, { "epoch": 0.1732764747690121, "grad_norm": 0.609375, "learning_rate": 0.00018560937410092934, "loss": 1.0517, "step": 3657 }, { "epoch": 0.17332385690594646, "grad_norm": 0.58984375, "learning_rate": 0.00018560167635272735, "loss": 1.3768, "step": 3658 }, { "epoch": 0.17337123904288082, "grad_norm": 0.234375, "learning_rate": 0.00018559397670597135, "loss": 0.0153, "step": 3659 }, { "epoch": 0.1734186211798152, "grad_norm": 0.59375, "learning_rate": 0.00018558627516083212, "loss": 1.3332, "step": 3660 }, { "epoch": 0.17346600331674958, "grad_norm": 0.23828125, "learning_rate": 0.00018557857171748045, "loss": 0.1742, "step": 3661 }, { "epoch": 0.17351338545368397, "grad_norm": 0.66015625, "learning_rate": 0.00018557086637608722, "loss": 0.9789, "step": 3662 }, { "epoch": 0.17356076759061834, "grad_norm": 0.36328125, "learning_rate": 0.00018556315913682335, "loss": 0.1682, "step": 3663 }, { "epoch": 0.1736081497275527, "grad_norm": 0.67578125, "learning_rate": 0.00018555544999985973, "loss": 1.0887, "step": 3664 }, { "epoch": 0.1736555318644871, "grad_norm": 0.64453125, "learning_rate": 0.00018554773896536735, "loss": 1.5074, "step": 3665 }, { "epoch": 0.17370291400142146, "grad_norm": 0.060302734375, "learning_rate": 0.00018554002603351724, "loss": 0.0026, "step": 3666 }, { "epoch": 0.17375029613835585, "grad_norm": 0.375, "learning_rate": 0.00018553231120448048, "loss": 0.0545, "step": 3667 }, { "epoch": 0.17379767827529022, "grad_norm": 0.6015625, "learning_rate": 0.00018552459447842813, "loss": 1.0061, "step": 3668 }, { "epoch": 0.17384506041222458, "grad_norm": 0.68359375, "learning_rate": 0.0001855168758555314, "loss": 1.1792, "step": 3669 }, { "epoch": 0.17389244254915898, "grad_norm": 0.3203125, "learning_rate": 0.00018550915533596145, "loss": 0.0447, "step": 3670 }, { "epoch": 0.17393982468609334, "grad_norm": 0.6171875, "learning_rate": 0.0001855014329198895, "loss": 1.3617, "step": 3671 }, { "epoch": 0.1739872068230277, "grad_norm": 0.65234375, "learning_rate": 0.00018549370860748688, "loss": 0.8235, "step": 3672 }, { "epoch": 0.1740345889599621, "grad_norm": 0.62890625, "learning_rate": 0.00018548598239892484, "loss": 1.2566, "step": 3673 }, { "epoch": 0.17408197109689647, "grad_norm": 0.5703125, "learning_rate": 0.00018547825429437474, "loss": 0.8101, "step": 3674 }, { "epoch": 0.17412935323383086, "grad_norm": 0.48828125, "learning_rate": 0.00018547052429400803, "loss": 1.1601, "step": 3675 }, { "epoch": 0.17417673537076522, "grad_norm": 0.66015625, "learning_rate": 0.00018546279239799613, "loss": 0.0429, "step": 3676 }, { "epoch": 0.1742241175076996, "grad_norm": 0.65625, "learning_rate": 0.00018545505860651055, "loss": 1.0094, "step": 3677 }, { "epoch": 0.17427149964463398, "grad_norm": 0.4921875, "learning_rate": 0.00018544732291972274, "loss": 1.0445, "step": 3678 }, { "epoch": 0.17431888178156835, "grad_norm": 0.64453125, "learning_rate": 0.00018543958533780437, "loss": 0.3371, "step": 3679 }, { "epoch": 0.1743662639185027, "grad_norm": 0.6484375, "learning_rate": 0.000185431845860927, "loss": 0.7795, "step": 3680 }, { "epoch": 0.1744136460554371, "grad_norm": 0.5546875, "learning_rate": 0.00018542410448926227, "loss": 0.0906, "step": 3681 }, { "epoch": 0.17446102819237147, "grad_norm": 0.609375, "learning_rate": 0.00018541636122298188, "loss": 0.7835, "step": 3682 }, { "epoch": 0.17450841032930586, "grad_norm": 0.40625, "learning_rate": 0.0001854086160622576, "loss": 0.36, "step": 3683 }, { "epoch": 0.17455579246624023, "grad_norm": 0.34375, "learning_rate": 0.00018540086900726118, "loss": 0.0148, "step": 3684 }, { "epoch": 0.1746031746031746, "grad_norm": 0.64453125, "learning_rate": 0.00018539312005816445, "loss": 1.1809, "step": 3685 }, { "epoch": 0.174650556740109, "grad_norm": 0.6484375, "learning_rate": 0.00018538536921513927, "loss": 0.9764, "step": 3686 }, { "epoch": 0.17469793887704335, "grad_norm": 0.5546875, "learning_rate": 0.00018537761647835754, "loss": 0.9014, "step": 3687 }, { "epoch": 0.17474532101397772, "grad_norm": 0.7421875, "learning_rate": 0.00018536986184799123, "loss": 0.2781, "step": 3688 }, { "epoch": 0.1747927031509121, "grad_norm": 0.68359375, "learning_rate": 0.00018536210532421233, "loss": 0.8563, "step": 3689 }, { "epoch": 0.17484008528784648, "grad_norm": 0.32421875, "learning_rate": 0.00018535434690719285, "loss": 0.0401, "step": 3690 }, { "epoch": 0.17488746742478087, "grad_norm": 0.81640625, "learning_rate": 0.00018534658659710487, "loss": 1.1854, "step": 3691 }, { "epoch": 0.17493484956171523, "grad_norm": 0.66015625, "learning_rate": 0.0001853388243941205, "loss": 1.3527, "step": 3692 }, { "epoch": 0.1749822316986496, "grad_norm": 0.4921875, "learning_rate": 0.00018533106029841188, "loss": 1.0618, "step": 3693 }, { "epoch": 0.175029613835584, "grad_norm": 0.234375, "learning_rate": 0.00018532329431015127, "loss": 0.1632, "step": 3694 }, { "epoch": 0.17507699597251836, "grad_norm": 0.609375, "learning_rate": 0.00018531552642951087, "loss": 1.3013, "step": 3695 }, { "epoch": 0.17512437810945275, "grad_norm": 0.78125, "learning_rate": 0.00018530775665666295, "loss": 1.2847, "step": 3696 }, { "epoch": 0.17517176024638711, "grad_norm": 0.77734375, "learning_rate": 0.0001852999849917799, "loss": 0.2259, "step": 3697 }, { "epoch": 0.17521914238332148, "grad_norm": 1.6328125, "learning_rate": 0.000185292211435034, "loss": 1.0169, "step": 3698 }, { "epoch": 0.17526652452025587, "grad_norm": 0.87890625, "learning_rate": 0.00018528443598659768, "loss": 0.9275, "step": 3699 }, { "epoch": 0.17531390665719024, "grad_norm": 0.671875, "learning_rate": 0.00018527665864664344, "loss": 0.1529, "step": 3700 }, { "epoch": 0.1753612887941246, "grad_norm": 0.58203125, "learning_rate": 0.00018526887941534373, "loss": 1.0, "step": 3701 }, { "epoch": 0.175408670931059, "grad_norm": 0.5078125, "learning_rate": 0.00018526109829287112, "loss": 0.6171, "step": 3702 }, { "epoch": 0.17545605306799336, "grad_norm": 0.341796875, "learning_rate": 0.00018525331527939818, "loss": 0.1511, "step": 3703 }, { "epoch": 0.17550343520492775, "grad_norm": 0.099609375, "learning_rate": 0.00018524553037509747, "loss": 0.0093, "step": 3704 }, { "epoch": 0.17555081734186212, "grad_norm": 0.65234375, "learning_rate": 0.00018523774358014173, "loss": 1.238, "step": 3705 }, { "epoch": 0.17559819947879649, "grad_norm": 0.51171875, "learning_rate": 0.0001852299548947036, "loss": 0.6388, "step": 3706 }, { "epoch": 0.17564558161573088, "grad_norm": 0.6015625, "learning_rate": 0.00018522216431895587, "loss": 0.8282, "step": 3707 }, { "epoch": 0.17569296375266524, "grad_norm": 0.53125, "learning_rate": 0.00018521437185307132, "loss": 0.5269, "step": 3708 }, { "epoch": 0.1757403458895996, "grad_norm": 0.466796875, "learning_rate": 0.00018520657749722275, "loss": 0.666, "step": 3709 }, { "epoch": 0.175787728026534, "grad_norm": 0.53125, "learning_rate": 0.00018519878125158306, "loss": 0.1391, "step": 3710 }, { "epoch": 0.17583511016346837, "grad_norm": 0.43359375, "learning_rate": 0.00018519098311632512, "loss": 0.1767, "step": 3711 }, { "epoch": 0.17588249230040276, "grad_norm": 0.61328125, "learning_rate": 0.00018518318309162195, "loss": 0.7675, "step": 3712 }, { "epoch": 0.17592987443733712, "grad_norm": 0.4921875, "learning_rate": 0.0001851753811776465, "loss": 0.9559, "step": 3713 }, { "epoch": 0.1759772565742715, "grad_norm": 0.65234375, "learning_rate": 0.00018516757737457182, "loss": 1.0426, "step": 3714 }, { "epoch": 0.17602463871120588, "grad_norm": 1.328125, "learning_rate": 0.000185159771682571, "loss": 1.6759, "step": 3715 }, { "epoch": 0.17607202084814025, "grad_norm": 0.62109375, "learning_rate": 0.00018515196410181714, "loss": 1.2135, "step": 3716 }, { "epoch": 0.1761194029850746, "grad_norm": 0.53125, "learning_rate": 0.00018514415463248343, "loss": 1.1519, "step": 3717 }, { "epoch": 0.176166785122009, "grad_norm": 0.734375, "learning_rate": 0.00018513634327474305, "loss": 1.2489, "step": 3718 }, { "epoch": 0.17621416725894337, "grad_norm": 0.68359375, "learning_rate": 0.00018512853002876928, "loss": 1.2299, "step": 3719 }, { "epoch": 0.17626154939587776, "grad_norm": 0.703125, "learning_rate": 0.00018512071489473536, "loss": 0.8664, "step": 3720 }, { "epoch": 0.17630893153281213, "grad_norm": 0.609375, "learning_rate": 0.00018511289787281467, "loss": 0.5929, "step": 3721 }, { "epoch": 0.1763563136697465, "grad_norm": 0.515625, "learning_rate": 0.00018510507896318056, "loss": 0.1134, "step": 3722 }, { "epoch": 0.1764036958066809, "grad_norm": 0.5390625, "learning_rate": 0.00018509725816600643, "loss": 0.9311, "step": 3723 }, { "epoch": 0.17645107794361525, "grad_norm": 0.58203125, "learning_rate": 0.0001850894354814658, "loss": 1.0983, "step": 3724 }, { "epoch": 0.17649846008054965, "grad_norm": 0.6953125, "learning_rate": 0.00018508161090973206, "loss": 0.3915, "step": 3725 }, { "epoch": 0.176545842217484, "grad_norm": 0.515625, "learning_rate": 0.00018507378445097885, "loss": 0.7702, "step": 3726 }, { "epoch": 0.17659322435441838, "grad_norm": 0.59375, "learning_rate": 0.0001850659561053797, "loss": 0.9071, "step": 3727 }, { "epoch": 0.17664060649135277, "grad_norm": 0.703125, "learning_rate": 0.00018505812587310829, "loss": 1.133, "step": 3728 }, { "epoch": 0.17668798862828713, "grad_norm": 0.2275390625, "learning_rate": 0.0001850502937543382, "loss": 0.0478, "step": 3729 }, { "epoch": 0.1767353707652215, "grad_norm": 0.26171875, "learning_rate": 0.00018504245974924324, "loss": 0.0362, "step": 3730 }, { "epoch": 0.1767827529021559, "grad_norm": 0.7734375, "learning_rate": 0.00018503462385799707, "loss": 1.0033, "step": 3731 }, { "epoch": 0.17683013503909026, "grad_norm": 0.54296875, "learning_rate": 0.00018502678608077355, "loss": 1.1681, "step": 3732 }, { "epoch": 0.17687751717602465, "grad_norm": 0.3828125, "learning_rate": 0.00018501894641774643, "loss": 0.0484, "step": 3733 }, { "epoch": 0.17692489931295902, "grad_norm": 0.6796875, "learning_rate": 0.00018501110486908968, "loss": 0.8182, "step": 3734 }, { "epoch": 0.17697228144989338, "grad_norm": 0.74609375, "learning_rate": 0.00018500326143497715, "loss": 0.9496, "step": 3735 }, { "epoch": 0.17701966358682777, "grad_norm": 0.2353515625, "learning_rate": 0.00018499541611558283, "loss": 0.1437, "step": 3736 }, { "epoch": 0.17706704572376214, "grad_norm": 0.52734375, "learning_rate": 0.00018498756891108072, "loss": 0.1331, "step": 3737 }, { "epoch": 0.1771144278606965, "grad_norm": 1.0625, "learning_rate": 0.00018497971982164483, "loss": 1.0338, "step": 3738 }, { "epoch": 0.1771618099976309, "grad_norm": 0.28515625, "learning_rate": 0.0001849718688474493, "loss": 0.0395, "step": 3739 }, { "epoch": 0.17720919213456526, "grad_norm": 0.5625, "learning_rate": 0.0001849640159886682, "loss": 0.9706, "step": 3740 }, { "epoch": 0.17725657427149966, "grad_norm": 0.578125, "learning_rate": 0.00018495616124547578, "loss": 0.9039, "step": 3741 }, { "epoch": 0.17730395640843402, "grad_norm": 0.384765625, "learning_rate": 0.00018494830461804617, "loss": 0.09, "step": 3742 }, { "epoch": 0.1773513385453684, "grad_norm": 0.7421875, "learning_rate": 0.00018494044610655358, "loss": 0.9534, "step": 3743 }, { "epoch": 0.17739872068230278, "grad_norm": 0.58984375, "learning_rate": 0.00018493258571117244, "loss": 1.2597, "step": 3744 }, { "epoch": 0.17744610281923714, "grad_norm": 0.4453125, "learning_rate": 0.000184924723432077, "loss": 0.0954, "step": 3745 }, { "epoch": 0.1774934849561715, "grad_norm": 0.427734375, "learning_rate": 0.00018491685926944165, "loss": 0.0738, "step": 3746 }, { "epoch": 0.1775408670931059, "grad_norm": 0.73828125, "learning_rate": 0.00018490899322344082, "loss": 1.2257, "step": 3747 }, { "epoch": 0.17758824923004027, "grad_norm": 0.671875, "learning_rate": 0.00018490112529424897, "loss": 1.2488, "step": 3748 }, { "epoch": 0.17763563136697466, "grad_norm": 0.69140625, "learning_rate": 0.00018489325548204057, "loss": 1.0829, "step": 3749 }, { "epoch": 0.17768301350390903, "grad_norm": 0.90625, "learning_rate": 0.00018488538378699017, "loss": 1.1709, "step": 3750 }, { "epoch": 0.1777303956408434, "grad_norm": 0.55859375, "learning_rate": 0.00018487751020927239, "loss": 0.1868, "step": 3751 }, { "epoch": 0.17777777777777778, "grad_norm": 0.5625, "learning_rate": 0.00018486963474906187, "loss": 0.0744, "step": 3752 }, { "epoch": 0.17782515991471215, "grad_norm": 0.57421875, "learning_rate": 0.00018486175740653322, "loss": 1.1554, "step": 3753 }, { "epoch": 0.17787254205164654, "grad_norm": 0.69921875, "learning_rate": 0.0001848538781818612, "loss": 1.1292, "step": 3754 }, { "epoch": 0.1779199241885809, "grad_norm": 0.65234375, "learning_rate": 0.00018484599707522053, "loss": 0.9446, "step": 3755 }, { "epoch": 0.17796730632551527, "grad_norm": 0.55859375, "learning_rate": 0.00018483811408678602, "loss": 1.2564, "step": 3756 }, { "epoch": 0.17801468846244967, "grad_norm": 0.69140625, "learning_rate": 0.00018483022921673249, "loss": 1.0344, "step": 3757 }, { "epoch": 0.17806207059938403, "grad_norm": 0.9765625, "learning_rate": 0.00018482234246523487, "loss": 1.0584, "step": 3758 }, { "epoch": 0.1781094527363184, "grad_norm": 0.6328125, "learning_rate": 0.00018481445383246802, "loss": 0.8575, "step": 3759 }, { "epoch": 0.1781568348732528, "grad_norm": 0.51953125, "learning_rate": 0.00018480656331860694, "loss": 0.7026, "step": 3760 }, { "epoch": 0.17820421701018715, "grad_norm": 0.9296875, "learning_rate": 0.0001847986709238266, "loss": 0.1739, "step": 3761 }, { "epoch": 0.17825159914712155, "grad_norm": 1.0703125, "learning_rate": 0.00018479077664830206, "loss": 1.1396, "step": 3762 }, { "epoch": 0.1782989812840559, "grad_norm": 0.2392578125, "learning_rate": 0.00018478288049220842, "loss": 0.1417, "step": 3763 }, { "epoch": 0.17834636342099028, "grad_norm": 0.7265625, "learning_rate": 0.0001847749824557208, "loss": 1.2362, "step": 3764 }, { "epoch": 0.17839374555792467, "grad_norm": 0.4140625, "learning_rate": 0.00018476708253901435, "loss": 0.0794, "step": 3765 }, { "epoch": 0.17844112769485904, "grad_norm": 0.50390625, "learning_rate": 0.00018475918074226433, "loss": 1.0855, "step": 3766 }, { "epoch": 0.1784885098317934, "grad_norm": 0.09765625, "learning_rate": 0.00018475127706564593, "loss": 0.002, "step": 3767 }, { "epoch": 0.1785358919687278, "grad_norm": 0.28515625, "learning_rate": 0.0001847433715093345, "loss": 0.0393, "step": 3768 }, { "epoch": 0.17858327410566216, "grad_norm": 0.7109375, "learning_rate": 0.00018473546407350532, "loss": 0.8788, "step": 3769 }, { "epoch": 0.17863065624259655, "grad_norm": 0.7890625, "learning_rate": 0.00018472755475833384, "loss": 0.5201, "step": 3770 }, { "epoch": 0.17867803837953092, "grad_norm": 0.81640625, "learning_rate": 0.00018471964356399546, "loss": 0.8121, "step": 3771 }, { "epoch": 0.17872542051646528, "grad_norm": 0.9140625, "learning_rate": 0.0001847117304906656, "loss": 1.2936, "step": 3772 }, { "epoch": 0.17877280265339968, "grad_norm": 0.02099609375, "learning_rate": 0.00018470381553851976, "loss": 0.0016, "step": 3773 }, { "epoch": 0.17882018479033404, "grad_norm": 0.625, "learning_rate": 0.00018469589870773355, "loss": 1.0161, "step": 3774 }, { "epoch": 0.1788675669272684, "grad_norm": 0.546875, "learning_rate": 0.0001846879799984825, "loss": 1.019, "step": 3775 }, { "epoch": 0.1789149490642028, "grad_norm": 0.97265625, "learning_rate": 0.00018468005941094226, "loss": 0.4715, "step": 3776 }, { "epoch": 0.17896233120113716, "grad_norm": 0.66796875, "learning_rate": 0.00018467213694528853, "loss": 0.1571, "step": 3777 }, { "epoch": 0.17900971333807156, "grad_norm": 0.609375, "learning_rate": 0.00018466421260169695, "loss": 1.4319, "step": 3778 }, { "epoch": 0.17905709547500592, "grad_norm": 0.482421875, "learning_rate": 0.00018465628638034332, "loss": 1.1197, "step": 3779 }, { "epoch": 0.1791044776119403, "grad_norm": 0.59375, "learning_rate": 0.00018464835828140347, "loss": 1.3157, "step": 3780 }, { "epoch": 0.17915185974887468, "grad_norm": 0.65234375, "learning_rate": 0.00018464042830505317, "loss": 1.264, "step": 3781 }, { "epoch": 0.17919924188580905, "grad_norm": 0.48828125, "learning_rate": 0.00018463249645146834, "loss": 0.2718, "step": 3782 }, { "epoch": 0.17924662402274344, "grad_norm": 0.50390625, "learning_rate": 0.00018462456272082487, "loss": 0.7724, "step": 3783 }, { "epoch": 0.1792940061596778, "grad_norm": 0.2001953125, "learning_rate": 0.00018461662711329876, "loss": 0.0419, "step": 3784 }, { "epoch": 0.17934138829661217, "grad_norm": 1.0859375, "learning_rate": 0.00018460868962906594, "loss": 0.3372, "step": 3785 }, { "epoch": 0.17938877043354656, "grad_norm": 0.435546875, "learning_rate": 0.00018460075026830252, "loss": 1.0154, "step": 3786 }, { "epoch": 0.17943615257048093, "grad_norm": 0.59765625, "learning_rate": 0.0001845928090311846, "loss": 0.8292, "step": 3787 }, { "epoch": 0.1794835347074153, "grad_norm": 0.244140625, "learning_rate": 0.00018458486591788826, "loss": 0.1723, "step": 3788 }, { "epoch": 0.17953091684434969, "grad_norm": 0.5546875, "learning_rate": 0.0001845769209285897, "loss": 0.6954, "step": 3789 }, { "epoch": 0.17957829898128405, "grad_norm": 0.6640625, "learning_rate": 0.0001845689740634651, "loss": 0.8468, "step": 3790 }, { "epoch": 0.17962568111821844, "grad_norm": 0.6640625, "learning_rate": 0.00018456102532269077, "loss": 1.1859, "step": 3791 }, { "epoch": 0.1796730632551528, "grad_norm": 0.208984375, "learning_rate": 0.00018455307470644294, "loss": 0.1165, "step": 3792 }, { "epoch": 0.17972044539208717, "grad_norm": 0.640625, "learning_rate": 0.000184545122214898, "loss": 0.6493, "step": 3793 }, { "epoch": 0.17976782752902157, "grad_norm": 0.244140625, "learning_rate": 0.00018453716784823227, "loss": 0.1474, "step": 3794 }, { "epoch": 0.17981520966595593, "grad_norm": 0.67578125, "learning_rate": 0.00018452921160662223, "loss": 0.1255, "step": 3795 }, { "epoch": 0.1798625918028903, "grad_norm": 0.58984375, "learning_rate": 0.0001845212534902443, "loss": 0.6474, "step": 3796 }, { "epoch": 0.1799099739398247, "grad_norm": 0.70703125, "learning_rate": 0.000184513293499275, "loss": 0.7447, "step": 3797 }, { "epoch": 0.17995735607675906, "grad_norm": 0.5078125, "learning_rate": 0.00018450533163389085, "loss": 0.9997, "step": 3798 }, { "epoch": 0.18000473821369345, "grad_norm": 0.5703125, "learning_rate": 0.00018449736789426848, "loss": 1.0661, "step": 3799 }, { "epoch": 0.18005212035062781, "grad_norm": 0.6953125, "learning_rate": 0.0001844894022805845, "loss": 1.5189, "step": 3800 }, { "epoch": 0.18009950248756218, "grad_norm": 0.2001953125, "learning_rate": 0.00018448143479301554, "loss": 0.0322, "step": 3801 }, { "epoch": 0.18014688462449657, "grad_norm": 0.76171875, "learning_rate": 0.00018447346543173836, "loss": 0.9759, "step": 3802 }, { "epoch": 0.18019426676143094, "grad_norm": 0.5703125, "learning_rate": 0.00018446549419692972, "loss": 1.4936, "step": 3803 }, { "epoch": 0.1802416488983653, "grad_norm": 0.58984375, "learning_rate": 0.00018445752108876633, "loss": 1.0951, "step": 3804 }, { "epoch": 0.1802890310352997, "grad_norm": 0.70703125, "learning_rate": 0.00018444954610742512, "loss": 0.0722, "step": 3805 }, { "epoch": 0.18033641317223406, "grad_norm": 0.248046875, "learning_rate": 0.0001844415692530829, "loss": 0.0524, "step": 3806 }, { "epoch": 0.18038379530916845, "grad_norm": 0.431640625, "learning_rate": 0.00018443359052591665, "loss": 0.0333, "step": 3807 }, { "epoch": 0.18043117744610282, "grad_norm": 0.67578125, "learning_rate": 0.0001844256099261033, "loss": 0.6686, "step": 3808 }, { "epoch": 0.18047855958303718, "grad_norm": 0.5859375, "learning_rate": 0.0001844176274538198, "loss": 1.0334, "step": 3809 }, { "epoch": 0.18052594171997158, "grad_norm": 0.69140625, "learning_rate": 0.00018440964310924328, "loss": 0.979, "step": 3810 }, { "epoch": 0.18057332385690594, "grad_norm": 0.4921875, "learning_rate": 0.0001844016568925508, "loss": 0.6279, "step": 3811 }, { "epoch": 0.18062070599384034, "grad_norm": 0.158203125, "learning_rate": 0.00018439366880391943, "loss": 0.0123, "step": 3812 }, { "epoch": 0.1806680881307747, "grad_norm": 0.51171875, "learning_rate": 0.0001843856788435264, "loss": 1.1228, "step": 3813 }, { "epoch": 0.18071547026770907, "grad_norm": 0.71875, "learning_rate": 0.00018437768701154891, "loss": 1.2088, "step": 3814 }, { "epoch": 0.18076285240464346, "grad_norm": 0.67578125, "learning_rate": 0.00018436969330816417, "loss": 0.9791, "step": 3815 }, { "epoch": 0.18081023454157782, "grad_norm": 0.6484375, "learning_rate": 0.00018436169773354953, "loss": 1.1451, "step": 3816 }, { "epoch": 0.1808576166785122, "grad_norm": 0.53515625, "learning_rate": 0.00018435370028788226, "loss": 0.9227, "step": 3817 }, { "epoch": 0.18090499881544658, "grad_norm": 0.578125, "learning_rate": 0.00018434570097133977, "loss": 1.1372, "step": 3818 }, { "epoch": 0.18095238095238095, "grad_norm": 0.5234375, "learning_rate": 0.0001843376997840995, "loss": 0.8091, "step": 3819 }, { "epoch": 0.18099976308931534, "grad_norm": 0.2255859375, "learning_rate": 0.00018432969672633887, "loss": 0.1405, "step": 3820 }, { "epoch": 0.1810471452262497, "grad_norm": 0.2197265625, "learning_rate": 0.00018432169179823538, "loss": 0.1652, "step": 3821 }, { "epoch": 0.18109452736318407, "grad_norm": 0.28515625, "learning_rate": 0.00018431368499996658, "loss": 0.1791, "step": 3822 }, { "epoch": 0.18114190950011846, "grad_norm": 0.0069580078125, "learning_rate": 0.00018430567633171004, "loss": 0.0005, "step": 3823 }, { "epoch": 0.18118929163705283, "grad_norm": 0.58984375, "learning_rate": 0.00018429766579364343, "loss": 0.8203, "step": 3824 }, { "epoch": 0.1812366737739872, "grad_norm": 0.1904296875, "learning_rate": 0.00018428965338594436, "loss": 0.1378, "step": 3825 }, { "epoch": 0.1812840559109216, "grad_norm": 0.81640625, "learning_rate": 0.00018428163910879058, "loss": 1.7155, "step": 3826 }, { "epoch": 0.18133143804785595, "grad_norm": 0.59765625, "learning_rate": 0.0001842736229623598, "loss": 0.8864, "step": 3827 }, { "epoch": 0.18137882018479035, "grad_norm": 0.65234375, "learning_rate": 0.00018426560494682982, "loss": 1.245, "step": 3828 }, { "epoch": 0.1814262023217247, "grad_norm": 0.48828125, "learning_rate": 0.00018425758506237852, "loss": 0.1062, "step": 3829 }, { "epoch": 0.18147358445865908, "grad_norm": 0.263671875, "learning_rate": 0.00018424956330918369, "loss": 0.1808, "step": 3830 }, { "epoch": 0.18152096659559347, "grad_norm": 0.74609375, "learning_rate": 0.0001842415396874233, "loss": 0.4874, "step": 3831 }, { "epoch": 0.18156834873252783, "grad_norm": 0.609375, "learning_rate": 0.0001842335141972753, "loss": 1.0836, "step": 3832 }, { "epoch": 0.1816157308694622, "grad_norm": 1.1640625, "learning_rate": 0.00018422548683891764, "loss": 0.5258, "step": 3833 }, { "epoch": 0.1816631130063966, "grad_norm": 0.447265625, "learning_rate": 0.0001842174576125284, "loss": 0.5478, "step": 3834 }, { "epoch": 0.18171049514333096, "grad_norm": 0.51953125, "learning_rate": 0.0001842094265182857, "loss": 0.8814, "step": 3835 }, { "epoch": 0.18175787728026535, "grad_norm": 0.275390625, "learning_rate": 0.00018420139355636756, "loss": 0.0369, "step": 3836 }, { "epoch": 0.18180525941719972, "grad_norm": 0.54296875, "learning_rate": 0.00018419335872695225, "loss": 0.8465, "step": 3837 }, { "epoch": 0.18185264155413408, "grad_norm": 0.55859375, "learning_rate": 0.00018418532203021788, "loss": 0.9397, "step": 3838 }, { "epoch": 0.18190002369106847, "grad_norm": 0.59375, "learning_rate": 0.00018417728346634276, "loss": 1.1063, "step": 3839 }, { "epoch": 0.18194740582800284, "grad_norm": 0.0546875, "learning_rate": 0.00018416924303550516, "loss": 0.0021, "step": 3840 }, { "epoch": 0.18199478796493723, "grad_norm": 0.68359375, "learning_rate": 0.00018416120073788342, "loss": 0.9548, "step": 3841 }, { "epoch": 0.1820421701018716, "grad_norm": 0.4453125, "learning_rate": 0.00018415315657365584, "loss": 0.0584, "step": 3842 }, { "epoch": 0.18208955223880596, "grad_norm": 0.57421875, "learning_rate": 0.0001841451105430009, "loss": 0.9115, "step": 3843 }, { "epoch": 0.18213693437574036, "grad_norm": 0.7265625, "learning_rate": 0.000184137062646097, "loss": 1.344, "step": 3844 }, { "epoch": 0.18218431651267472, "grad_norm": 0.51953125, "learning_rate": 0.00018412901288312276, "loss": 0.7186, "step": 3845 }, { "epoch": 0.1822316986496091, "grad_norm": 0.5390625, "learning_rate": 0.00018412096125425658, "loss": 0.0939, "step": 3846 }, { "epoch": 0.18227908078654348, "grad_norm": 0.609375, "learning_rate": 0.00018411290775967708, "loss": 1.3313, "step": 3847 }, { "epoch": 0.18232646292347784, "grad_norm": 0.69921875, "learning_rate": 0.00018410485239956286, "loss": 0.9501, "step": 3848 }, { "epoch": 0.18237384506041224, "grad_norm": 0.515625, "learning_rate": 0.0001840967951740926, "loss": 0.5894, "step": 3849 }, { "epoch": 0.1824212271973466, "grad_norm": 0.59765625, "learning_rate": 0.00018408873608344504, "loss": 0.3189, "step": 3850 }, { "epoch": 0.18246860933428097, "grad_norm": 0.3828125, "learning_rate": 0.00018408067512779884, "loss": 0.0554, "step": 3851 }, { "epoch": 0.18251599147121536, "grad_norm": 0.68359375, "learning_rate": 0.00018407261230733287, "loss": 0.9996, "step": 3852 }, { "epoch": 0.18256337360814973, "grad_norm": 0.392578125, "learning_rate": 0.00018406454762222584, "loss": 0.1408, "step": 3853 }, { "epoch": 0.1826107557450841, "grad_norm": 0.6875, "learning_rate": 0.00018405648107265674, "loss": 0.6393, "step": 3854 }, { "epoch": 0.18265813788201848, "grad_norm": 0.8359375, "learning_rate": 0.00018404841265880442, "loss": 1.1064, "step": 3855 }, { "epoch": 0.18270552001895285, "grad_norm": 0.2392578125, "learning_rate": 0.0001840403423808478, "loss": 0.1576, "step": 3856 }, { "epoch": 0.18275290215588724, "grad_norm": 0.62109375, "learning_rate": 0.00018403227023896593, "loss": 1.107, "step": 3857 }, { "epoch": 0.1828002842928216, "grad_norm": 0.455078125, "learning_rate": 0.0001840241962333378, "loss": 0.6858, "step": 3858 }, { "epoch": 0.18284766642975597, "grad_norm": 0.765625, "learning_rate": 0.0001840161203641425, "loss": 0.9427, "step": 3859 }, { "epoch": 0.18289504856669037, "grad_norm": 0.484375, "learning_rate": 0.00018400804263155913, "loss": 0.0761, "step": 3860 }, { "epoch": 0.18294243070362473, "grad_norm": 0.6484375, "learning_rate": 0.00018399996303576687, "loss": 0.9487, "step": 3861 }, { "epoch": 0.1829898128405591, "grad_norm": 0.69921875, "learning_rate": 0.0001839918815769449, "loss": 1.2183, "step": 3862 }, { "epoch": 0.1830371949774935, "grad_norm": 0.63671875, "learning_rate": 0.00018398379825527246, "loss": 0.6553, "step": 3863 }, { "epoch": 0.18308457711442785, "grad_norm": 0.8046875, "learning_rate": 0.00018397571307092881, "loss": 1.3488, "step": 3864 }, { "epoch": 0.18313195925136225, "grad_norm": 0.546875, "learning_rate": 0.00018396762602409332, "loss": 1.0609, "step": 3865 }, { "epoch": 0.1831793413882966, "grad_norm": 1.6953125, "learning_rate": 0.00018395953711494533, "loss": 0.2589, "step": 3866 }, { "epoch": 0.18322672352523098, "grad_norm": 0.63671875, "learning_rate": 0.0001839514463436642, "loss": 1.275, "step": 3867 }, { "epoch": 0.18327410566216537, "grad_norm": 0.97265625, "learning_rate": 0.00018394335371042943, "loss": 1.2991, "step": 3868 }, { "epoch": 0.18332148779909974, "grad_norm": 0.66796875, "learning_rate": 0.00018393525921542048, "loss": 1.434, "step": 3869 }, { "epoch": 0.18336886993603413, "grad_norm": 0.609375, "learning_rate": 0.00018392716285881692, "loss": 1.0181, "step": 3870 }, { "epoch": 0.1834162520729685, "grad_norm": 0.5390625, "learning_rate": 0.00018391906464079822, "loss": 0.7575, "step": 3871 }, { "epoch": 0.18346363420990286, "grad_norm": 0.68359375, "learning_rate": 0.0001839109645615441, "loss": 1.0288, "step": 3872 }, { "epoch": 0.18351101634683725, "grad_norm": 1.0546875, "learning_rate": 0.00018390286262123416, "loss": 0.3225, "step": 3873 }, { "epoch": 0.18355839848377162, "grad_norm": 0.60546875, "learning_rate": 0.0001838947588200481, "loss": 1.5614, "step": 3874 }, { "epoch": 0.18360578062070598, "grad_norm": 0.80859375, "learning_rate": 0.00018388665315816562, "loss": 1.0404, "step": 3875 }, { "epoch": 0.18365316275764038, "grad_norm": 0.6640625, "learning_rate": 0.00018387854563576655, "loss": 0.7009, "step": 3876 }, { "epoch": 0.18370054489457474, "grad_norm": 0.55859375, "learning_rate": 0.00018387043625303068, "loss": 0.0823, "step": 3877 }, { "epoch": 0.18374792703150913, "grad_norm": 0.5703125, "learning_rate": 0.00018386232501013786, "loss": 0.7296, "step": 3878 }, { "epoch": 0.1837953091684435, "grad_norm": 0.65625, "learning_rate": 0.000183854211907268, "loss": 0.1192, "step": 3879 }, { "epoch": 0.18384269130537786, "grad_norm": 0.453125, "learning_rate": 0.00018384609694460102, "loss": 0.1747, "step": 3880 }, { "epoch": 0.18389007344231226, "grad_norm": 0.66796875, "learning_rate": 0.00018383798012231695, "loss": 1.5189, "step": 3881 }, { "epoch": 0.18393745557924662, "grad_norm": 0.50390625, "learning_rate": 0.00018382986144059577, "loss": 0.0617, "step": 3882 }, { "epoch": 0.183984837716181, "grad_norm": 0.87109375, "learning_rate": 0.00018382174089961754, "loss": 1.3788, "step": 3883 }, { "epoch": 0.18403221985311538, "grad_norm": 0.94921875, "learning_rate": 0.0001838136184995624, "loss": 1.0907, "step": 3884 }, { "epoch": 0.18407960199004975, "grad_norm": 0.8828125, "learning_rate": 0.00018380549424061045, "loss": 0.2911, "step": 3885 }, { "epoch": 0.18412698412698414, "grad_norm": 0.51171875, "learning_rate": 0.00018379736812294194, "loss": 1.1701, "step": 3886 }, { "epoch": 0.1841743662639185, "grad_norm": 0.1650390625, "learning_rate": 0.00018378924014673703, "loss": 0.0224, "step": 3887 }, { "epoch": 0.18422174840085287, "grad_norm": 0.01446533203125, "learning_rate": 0.00018378111031217605, "loss": 0.0009, "step": 3888 }, { "epoch": 0.18426913053778726, "grad_norm": 0.5390625, "learning_rate": 0.00018377297861943927, "loss": 0.7568, "step": 3889 }, { "epoch": 0.18431651267472163, "grad_norm": 0.470703125, "learning_rate": 0.00018376484506870707, "loss": 0.6754, "step": 3890 }, { "epoch": 0.184363894811656, "grad_norm": 0.72265625, "learning_rate": 0.0001837567096601598, "loss": 1.0827, "step": 3891 }, { "epoch": 0.18441127694859039, "grad_norm": 0.703125, "learning_rate": 0.00018374857239397794, "loss": 1.0899, "step": 3892 }, { "epoch": 0.18445865908552475, "grad_norm": 0.859375, "learning_rate": 0.00018374043327034194, "loss": 1.1447, "step": 3893 }, { "epoch": 0.18450604122245914, "grad_norm": 0.79296875, "learning_rate": 0.0001837322922894323, "loss": 0.729, "step": 3894 }, { "epoch": 0.1845534233593935, "grad_norm": 1.140625, "learning_rate": 0.00018372414945142963, "loss": 0.6065, "step": 3895 }, { "epoch": 0.18460080549632787, "grad_norm": 0.3671875, "learning_rate": 0.00018371600475651455, "loss": 0.0833, "step": 3896 }, { "epoch": 0.18464818763326227, "grad_norm": 0.51171875, "learning_rate": 0.0001837078582048676, "loss": 1.0352, "step": 3897 }, { "epoch": 0.18469556977019663, "grad_norm": 0.59765625, "learning_rate": 0.00018369970979666952, "loss": 0.5847, "step": 3898 }, { "epoch": 0.184742951907131, "grad_norm": 0.6640625, "learning_rate": 0.00018369155953210103, "loss": 0.871, "step": 3899 }, { "epoch": 0.1847903340440654, "grad_norm": 0.83203125, "learning_rate": 0.00018368340741134294, "loss": 1.4962, "step": 3900 }, { "epoch": 0.18483771618099976, "grad_norm": 0.52734375, "learning_rate": 0.00018367525343457596, "loss": 0.5314, "step": 3901 }, { "epoch": 0.18488509831793415, "grad_norm": 0.69140625, "learning_rate": 0.000183667097601981, "loss": 1.46, "step": 3902 }, { "epoch": 0.18493248045486851, "grad_norm": 0.294921875, "learning_rate": 0.00018365893991373892, "loss": 0.071, "step": 3903 }, { "epoch": 0.18497986259180288, "grad_norm": 0.62890625, "learning_rate": 0.00018365078037003069, "loss": 0.9815, "step": 3904 }, { "epoch": 0.18502724472873727, "grad_norm": 0.7890625, "learning_rate": 0.00018364261897103724, "loss": 1.2039, "step": 3905 }, { "epoch": 0.18507462686567164, "grad_norm": 0.8828125, "learning_rate": 0.0001836344557169396, "loss": 1.0092, "step": 3906 }, { "epoch": 0.18512200900260603, "grad_norm": 0.58984375, "learning_rate": 0.00018362629060791877, "loss": 1.2131, "step": 3907 }, { "epoch": 0.1851693911395404, "grad_norm": 0.01556396484375, "learning_rate": 0.00018361812364415595, "loss": 0.001, "step": 3908 }, { "epoch": 0.18521677327647476, "grad_norm": 0.404296875, "learning_rate": 0.0001836099548258322, "loss": 0.0326, "step": 3909 }, { "epoch": 0.18526415541340915, "grad_norm": 0.8359375, "learning_rate": 0.00018360178415312867, "loss": 0.6078, "step": 3910 }, { "epoch": 0.18531153755034352, "grad_norm": 0.62109375, "learning_rate": 0.00018359361162622662, "loss": 1.2607, "step": 3911 }, { "epoch": 0.18535891968727788, "grad_norm": 0.671875, "learning_rate": 0.00018358543724530737, "loss": 1.049, "step": 3912 }, { "epoch": 0.18540630182421228, "grad_norm": 0.32421875, "learning_rate": 0.00018357726101055207, "loss": 0.0309, "step": 3913 }, { "epoch": 0.18545368396114664, "grad_norm": 0.71484375, "learning_rate": 0.0001835690829221422, "loss": 1.0746, "step": 3914 }, { "epoch": 0.18550106609808104, "grad_norm": 0.310546875, "learning_rate": 0.0001835609029802591, "loss": 0.0499, "step": 3915 }, { "epoch": 0.1855484482350154, "grad_norm": 0.70703125, "learning_rate": 0.00018355272118508414, "loss": 1.2679, "step": 3916 }, { "epoch": 0.18559583037194977, "grad_norm": 0.56640625, "learning_rate": 0.00018354453753679882, "loss": 1.0898, "step": 3917 }, { "epoch": 0.18564321250888416, "grad_norm": 0.51953125, "learning_rate": 0.00018353635203558467, "loss": 0.6901, "step": 3918 }, { "epoch": 0.18569059464581852, "grad_norm": 0.828125, "learning_rate": 0.00018352816468162318, "loss": 1.3228, "step": 3919 }, { "epoch": 0.1857379767827529, "grad_norm": 0.58984375, "learning_rate": 0.00018351997547509598, "loss": 1.2299, "step": 3920 }, { "epoch": 0.18578535891968728, "grad_norm": 0.62109375, "learning_rate": 0.00018351178441618467, "loss": 1.2638, "step": 3921 }, { "epoch": 0.18583274105662165, "grad_norm": 0.6328125, "learning_rate": 0.00018350359150507095, "loss": 0.7652, "step": 3922 }, { "epoch": 0.18588012319355604, "grad_norm": 0.41796875, "learning_rate": 0.00018349539674193652, "loss": 0.0936, "step": 3923 }, { "epoch": 0.1859275053304904, "grad_norm": 0.515625, "learning_rate": 0.00018348720012696312, "loss": 1.1508, "step": 3924 }, { "epoch": 0.18597488746742477, "grad_norm": 0.96875, "learning_rate": 0.00018347900166033253, "loss": 0.3565, "step": 3925 }, { "epoch": 0.18602226960435916, "grad_norm": 0.498046875, "learning_rate": 0.00018347080134222663, "loss": 0.5486, "step": 3926 }, { "epoch": 0.18606965174129353, "grad_norm": 0.9921875, "learning_rate": 0.00018346259917282725, "loss": 0.9104, "step": 3927 }, { "epoch": 0.1861170338782279, "grad_norm": 0.609375, "learning_rate": 0.00018345439515231633, "loss": 1.1383, "step": 3928 }, { "epoch": 0.1861644160151623, "grad_norm": 0.8515625, "learning_rate": 0.00018344618928087584, "loss": 0.5591, "step": 3929 }, { "epoch": 0.18621179815209665, "grad_norm": 0.3984375, "learning_rate": 0.0001834379815586877, "loss": 0.1086, "step": 3930 }, { "epoch": 0.18625918028903105, "grad_norm": 0.23046875, "learning_rate": 0.00018342977198593404, "loss": 0.0302, "step": 3931 }, { "epoch": 0.1863065624259654, "grad_norm": 0.50390625, "learning_rate": 0.00018342156056279686, "loss": 0.8353, "step": 3932 }, { "epoch": 0.18635394456289978, "grad_norm": 0.1298828125, "learning_rate": 0.0001834133472894584, "loss": 0.007, "step": 3933 }, { "epoch": 0.18640132669983417, "grad_norm": 0.5546875, "learning_rate": 0.00018340513216610066, "loss": 1.2658, "step": 3934 }, { "epoch": 0.18644870883676853, "grad_norm": 0.53125, "learning_rate": 0.00018339691519290597, "loss": 0.1188, "step": 3935 }, { "epoch": 0.18649609097370293, "grad_norm": 0.84765625, "learning_rate": 0.00018338869637005652, "loss": 0.4885, "step": 3936 }, { "epoch": 0.1865434731106373, "grad_norm": 0.6953125, "learning_rate": 0.00018338047569773464, "loss": 1.4999, "step": 3937 }, { "epoch": 0.18659085524757166, "grad_norm": 0.5625, "learning_rate": 0.00018337225317612262, "loss": 0.925, "step": 3938 }, { "epoch": 0.18663823738450605, "grad_norm": 0.51953125, "learning_rate": 0.00018336402880540277, "loss": 0.9007, "step": 3939 }, { "epoch": 0.18668561952144042, "grad_norm": 0.83203125, "learning_rate": 0.00018335580258575763, "loss": 1.3914, "step": 3940 }, { "epoch": 0.18673300165837478, "grad_norm": 0.61328125, "learning_rate": 0.00018334757451736951, "loss": 1.3489, "step": 3941 }, { "epoch": 0.18678038379530917, "grad_norm": 0.53125, "learning_rate": 0.000183339344600421, "loss": 0.9137, "step": 3942 }, { "epoch": 0.18682776593224354, "grad_norm": 0.4609375, "learning_rate": 0.00018333111283509458, "loss": 0.9077, "step": 3943 }, { "epoch": 0.18687514806917793, "grad_norm": 0.451171875, "learning_rate": 0.00018332287922157288, "loss": 0.8801, "step": 3944 }, { "epoch": 0.1869225302061123, "grad_norm": 0.431640625, "learning_rate": 0.00018331464376003843, "loss": 0.5744, "step": 3945 }, { "epoch": 0.18696991234304666, "grad_norm": 0.546875, "learning_rate": 0.00018330640645067392, "loss": 0.6218, "step": 3946 }, { "epoch": 0.18701729447998106, "grad_norm": 1.0390625, "learning_rate": 0.00018329816729366206, "loss": 0.5785, "step": 3947 }, { "epoch": 0.18706467661691542, "grad_norm": 0.58984375, "learning_rate": 0.00018328992628918557, "loss": 0.4056, "step": 3948 }, { "epoch": 0.18711205875384979, "grad_norm": 0.490234375, "learning_rate": 0.0001832816834374272, "loss": 0.8687, "step": 3949 }, { "epoch": 0.18715944089078418, "grad_norm": 0.310546875, "learning_rate": 0.00018327343873856985, "loss": 0.1306, "step": 3950 }, { "epoch": 0.18720682302771854, "grad_norm": 0.55859375, "learning_rate": 0.00018326519219279632, "loss": 1.1476, "step": 3951 }, { "epoch": 0.18725420516465294, "grad_norm": 0.4453125, "learning_rate": 0.00018325694380028952, "loss": 0.6137, "step": 3952 }, { "epoch": 0.1873015873015873, "grad_norm": 0.546875, "learning_rate": 0.00018324869356123238, "loss": 0.6959, "step": 3953 }, { "epoch": 0.18734896943852167, "grad_norm": 0.66015625, "learning_rate": 0.0001832404414758079, "loss": 0.6488, "step": 3954 }, { "epoch": 0.18739635157545606, "grad_norm": 0.7421875, "learning_rate": 0.00018323218754419905, "loss": 0.7129, "step": 3955 }, { "epoch": 0.18744373371239043, "grad_norm": 0.67578125, "learning_rate": 0.00018322393176658898, "loss": 0.9986, "step": 3956 }, { "epoch": 0.1874911158493248, "grad_norm": 0.490234375, "learning_rate": 0.00018321567414316073, "loss": 0.7868, "step": 3957 }, { "epoch": 0.18753849798625918, "grad_norm": 0.6953125, "learning_rate": 0.00018320741467409748, "loss": 0.3172, "step": 3958 }, { "epoch": 0.18758588012319355, "grad_norm": 0.76171875, "learning_rate": 0.0001831991533595824, "loss": 1.3278, "step": 3959 }, { "epoch": 0.18763326226012794, "grad_norm": 0.546875, "learning_rate": 0.00018319089019979868, "loss": 0.6262, "step": 3960 }, { "epoch": 0.1876806443970623, "grad_norm": 0.578125, "learning_rate": 0.00018318262519492965, "loss": 1.1593, "step": 3961 }, { "epoch": 0.18772802653399667, "grad_norm": 0.287109375, "learning_rate": 0.00018317435834515862, "loss": 0.0484, "step": 3962 }, { "epoch": 0.18777540867093107, "grad_norm": 0.57421875, "learning_rate": 0.00018316608965066887, "loss": 0.9068, "step": 3963 }, { "epoch": 0.18782279080786543, "grad_norm": 0.30859375, "learning_rate": 0.00018315781911164386, "loss": 0.1292, "step": 3964 }, { "epoch": 0.18787017294479982, "grad_norm": 0.69921875, "learning_rate": 0.00018314954672826703, "loss": 0.2688, "step": 3965 }, { "epoch": 0.1879175550817342, "grad_norm": 0.55859375, "learning_rate": 0.00018314127250072178, "loss": 1.1685, "step": 3966 }, { "epoch": 0.18796493721866855, "grad_norm": 0.6484375, "learning_rate": 0.00018313299642919167, "loss": 1.2903, "step": 3967 }, { "epoch": 0.18801231935560295, "grad_norm": 0.6875, "learning_rate": 0.00018312471851386026, "loss": 0.9526, "step": 3968 }, { "epoch": 0.1880597014925373, "grad_norm": 0.66796875, "learning_rate": 0.00018311643875491113, "loss": 1.6321, "step": 3969 }, { "epoch": 0.18810708362947168, "grad_norm": 0.62890625, "learning_rate": 0.00018310815715252788, "loss": 1.1504, "step": 3970 }, { "epoch": 0.18815446576640607, "grad_norm": 0.6171875, "learning_rate": 0.00018309987370689428, "loss": 0.6617, "step": 3971 }, { "epoch": 0.18820184790334044, "grad_norm": 1.4296875, "learning_rate": 0.00018309158841819397, "loss": 0.7718, "step": 3972 }, { "epoch": 0.18824923004027483, "grad_norm": 0.59765625, "learning_rate": 0.00018308330128661074, "loss": 0.8172, "step": 3973 }, { "epoch": 0.1882966121772092, "grad_norm": 0.76171875, "learning_rate": 0.0001830750123123284, "loss": 0.7052, "step": 3974 }, { "epoch": 0.18834399431414356, "grad_norm": 0.80078125, "learning_rate": 0.00018306672149553076, "loss": 1.2497, "step": 3975 }, { "epoch": 0.18839137645107795, "grad_norm": 0.58203125, "learning_rate": 0.0001830584288364017, "loss": 0.7786, "step": 3976 }, { "epoch": 0.18843875858801232, "grad_norm": 0.39453125, "learning_rate": 0.0001830501343351252, "loss": 0.1944, "step": 3977 }, { "epoch": 0.18848614072494668, "grad_norm": 0.30859375, "learning_rate": 0.00018304183799188514, "loss": 0.043, "step": 3978 }, { "epoch": 0.18853352286188108, "grad_norm": 0.0064697265625, "learning_rate": 0.00018303353980686558, "loss": 0.0004, "step": 3979 }, { "epoch": 0.18858090499881544, "grad_norm": 0.490234375, "learning_rate": 0.00018302523978025058, "loss": 0.1154, "step": 3980 }, { "epoch": 0.18862828713574983, "grad_norm": 0.138671875, "learning_rate": 0.00018301693791222413, "loss": 0.0054, "step": 3981 }, { "epoch": 0.1886756692726842, "grad_norm": 0.609375, "learning_rate": 0.0001830086342029705, "loss": 0.6819, "step": 3982 }, { "epoch": 0.18872305140961856, "grad_norm": 0.671875, "learning_rate": 0.0001830003286526737, "loss": 0.9113, "step": 3983 }, { "epoch": 0.18877043354655296, "grad_norm": 0.625, "learning_rate": 0.00018299202126151808, "loss": 1.3575, "step": 3984 }, { "epoch": 0.18881781568348732, "grad_norm": 0.419921875, "learning_rate": 0.00018298371202968782, "loss": 0.74, "step": 3985 }, { "epoch": 0.1888651978204217, "grad_norm": 0.71484375, "learning_rate": 0.0001829754009573672, "loss": 1.3455, "step": 3986 }, { "epoch": 0.18891257995735608, "grad_norm": 1.0625, "learning_rate": 0.00018296708804474058, "loss": 0.1818, "step": 3987 }, { "epoch": 0.18895996209429045, "grad_norm": 0.08154296875, "learning_rate": 0.0001829587732919923, "loss": 0.0035, "step": 3988 }, { "epoch": 0.18900734423122484, "grad_norm": 0.8203125, "learning_rate": 0.00018295045669930682, "loss": 0.9317, "step": 3989 }, { "epoch": 0.1890547263681592, "grad_norm": 0.75390625, "learning_rate": 0.00018294213826686853, "loss": 1.188, "step": 3990 }, { "epoch": 0.18910210850509357, "grad_norm": 0.5703125, "learning_rate": 0.000182933817994862, "loss": 0.7484, "step": 3991 }, { "epoch": 0.18914949064202796, "grad_norm": 0.56640625, "learning_rate": 0.00018292549588347167, "loss": 1.4723, "step": 3992 }, { "epoch": 0.18919687277896233, "grad_norm": 0.6171875, "learning_rate": 0.00018291717193288224, "loss": 0.9236, "step": 3993 }, { "epoch": 0.18924425491589672, "grad_norm": 0.640625, "learning_rate": 0.00018290884614327818, "loss": 0.9172, "step": 3994 }, { "epoch": 0.18929163705283109, "grad_norm": 0.9609375, "learning_rate": 0.0001829005185148443, "loss": 0.6576, "step": 3995 }, { "epoch": 0.18933901918976545, "grad_norm": 0.75390625, "learning_rate": 0.00018289218904776516, "loss": 0.8123, "step": 3996 }, { "epoch": 0.18938640132669984, "grad_norm": 0.69921875, "learning_rate": 0.00018288385774222558, "loss": 1.4964, "step": 3997 }, { "epoch": 0.1894337834636342, "grad_norm": 0.95703125, "learning_rate": 0.00018287552459841033, "loss": 0.0254, "step": 3998 }, { "epoch": 0.18948116560056857, "grad_norm": 1.3984375, "learning_rate": 0.00018286718961650422, "loss": 1.1917, "step": 3999 }, { "epoch": 0.18952854773750297, "grad_norm": 0.6328125, "learning_rate": 0.00018285885279669209, "loss": 0.7383, "step": 4000 }, { "epoch": 0.18957592987443733, "grad_norm": 0.6328125, "learning_rate": 0.00018285051413915885, "loss": 1.2202, "step": 4001 }, { "epoch": 0.18962331201137173, "grad_norm": 1.015625, "learning_rate": 0.0001828421736440895, "loss": 0.1662, "step": 4002 }, { "epoch": 0.1896706941483061, "grad_norm": 0.671875, "learning_rate": 0.00018283383131166895, "loss": 0.8653, "step": 4003 }, { "epoch": 0.18971807628524046, "grad_norm": 0.67578125, "learning_rate": 0.00018282548714208227, "loss": 1.4906, "step": 4004 }, { "epoch": 0.18976545842217485, "grad_norm": 0.58984375, "learning_rate": 0.00018281714113551448, "loss": 0.043, "step": 4005 }, { "epoch": 0.1898128405591092, "grad_norm": 0.51171875, "learning_rate": 0.00018280879329215076, "loss": 0.642, "step": 4006 }, { "epoch": 0.18986022269604358, "grad_norm": 0.61328125, "learning_rate": 0.00018280044361217615, "loss": 0.9417, "step": 4007 }, { "epoch": 0.18990760483297797, "grad_norm": 0.63671875, "learning_rate": 0.00018279209209577592, "loss": 1.0475, "step": 4008 }, { "epoch": 0.18995498696991234, "grad_norm": 0.70703125, "learning_rate": 0.0001827837387431353, "loss": 1.0232, "step": 4009 }, { "epoch": 0.19000236910684673, "grad_norm": 0.06005859375, "learning_rate": 0.0001827753835544395, "loss": 0.005, "step": 4010 }, { "epoch": 0.1900497512437811, "grad_norm": 0.5546875, "learning_rate": 0.00018276702652987389, "loss": 1.1849, "step": 4011 }, { "epoch": 0.19009713338071546, "grad_norm": 0.314453125, "learning_rate": 0.00018275866766962376, "loss": 0.1887, "step": 4012 }, { "epoch": 0.19014451551764985, "grad_norm": 0.53515625, "learning_rate": 0.0001827503069738745, "loss": 1.0838, "step": 4013 }, { "epoch": 0.19019189765458422, "grad_norm": 0.58984375, "learning_rate": 0.00018274194444281165, "loss": 1.0973, "step": 4014 }, { "epoch": 0.19023927979151858, "grad_norm": 0.6328125, "learning_rate": 0.00018273358007662055, "loss": 0.9148, "step": 4015 }, { "epoch": 0.19028666192845298, "grad_norm": 0.6875, "learning_rate": 0.0001827252138754868, "loss": 0.805, "step": 4016 }, { "epoch": 0.19033404406538734, "grad_norm": 0.671875, "learning_rate": 0.0001827168458395959, "loss": 0.6369, "step": 4017 }, { "epoch": 0.19038142620232174, "grad_norm": 0.76953125, "learning_rate": 0.0001827084759691335, "loss": 1.0181, "step": 4018 }, { "epoch": 0.1904288083392561, "grad_norm": 0.55859375, "learning_rate": 0.00018270010426428516, "loss": 0.5452, "step": 4019 }, { "epoch": 0.19047619047619047, "grad_norm": 0.48046875, "learning_rate": 0.00018269173072523663, "loss": 1.1883, "step": 4020 }, { "epoch": 0.19052357261312486, "grad_norm": 0.828125, "learning_rate": 0.00018268335535217355, "loss": 1.6229, "step": 4021 }, { "epoch": 0.19057095475005922, "grad_norm": 0.60546875, "learning_rate": 0.00018267497814528175, "loss": 0.7858, "step": 4022 }, { "epoch": 0.19061833688699362, "grad_norm": 0.5625, "learning_rate": 0.00018266659910474696, "loss": 1.03, "step": 4023 }, { "epoch": 0.19066571902392798, "grad_norm": 0.65234375, "learning_rate": 0.00018265821823075512, "loss": 0.8899, "step": 4024 }, { "epoch": 0.19071310116086235, "grad_norm": 0.6171875, "learning_rate": 0.00018264983552349198, "loss": 0.2098, "step": 4025 }, { "epoch": 0.19076048329779674, "grad_norm": 0.46484375, "learning_rate": 0.00018264145098314356, "loss": 0.0286, "step": 4026 }, { "epoch": 0.1908078654347311, "grad_norm": 0.578125, "learning_rate": 0.00018263306460989575, "loss": 0.5669, "step": 4027 }, { "epoch": 0.19085524757166547, "grad_norm": 0.08251953125, "learning_rate": 0.00018262467640393462, "loss": 0.008, "step": 4028 }, { "epoch": 0.19090262970859986, "grad_norm": 0.470703125, "learning_rate": 0.00018261628636544614, "loss": 0.5928, "step": 4029 }, { "epoch": 0.19095001184553423, "grad_norm": 0.5625, "learning_rate": 0.00018260789449461645, "loss": 0.8542, "step": 4030 }, { "epoch": 0.19099739398246862, "grad_norm": 1.0859375, "learning_rate": 0.00018259950079163164, "loss": 0.9255, "step": 4031 }, { "epoch": 0.191044776119403, "grad_norm": 0.71484375, "learning_rate": 0.0001825911052566779, "loss": 1.2408, "step": 4032 }, { "epoch": 0.19109215825633735, "grad_norm": 0.6171875, "learning_rate": 0.00018258270788994142, "loss": 1.4613, "step": 4033 }, { "epoch": 0.19113954039327175, "grad_norm": 0.796875, "learning_rate": 0.00018257430869160843, "loss": 1.1682, "step": 4034 }, { "epoch": 0.1911869225302061, "grad_norm": 0.95703125, "learning_rate": 0.00018256590766186522, "loss": 1.0742, "step": 4035 }, { "epoch": 0.19123430466714048, "grad_norm": 0.56640625, "learning_rate": 0.00018255750480089812, "loss": 0.4825, "step": 4036 }, { "epoch": 0.19128168680407487, "grad_norm": 0.796875, "learning_rate": 0.00018254910010889354, "loss": 0.9041, "step": 4037 }, { "epoch": 0.19132906894100923, "grad_norm": 0.73828125, "learning_rate": 0.0001825406935860378, "loss": 1.0246, "step": 4038 }, { "epoch": 0.19137645107794363, "grad_norm": 0.6640625, "learning_rate": 0.0001825322852325174, "loss": 1.1757, "step": 4039 }, { "epoch": 0.191423833214878, "grad_norm": 0.53515625, "learning_rate": 0.00018252387504851883, "loss": 1.2713, "step": 4040 }, { "epoch": 0.19147121535181236, "grad_norm": 0.63671875, "learning_rate": 0.00018251546303422865, "loss": 1.3427, "step": 4041 }, { "epoch": 0.19151859748874675, "grad_norm": 0.84375, "learning_rate": 0.00018250704918983335, "loss": 1.2905, "step": 4042 }, { "epoch": 0.19156597962568112, "grad_norm": 0.578125, "learning_rate": 0.00018249863351551957, "loss": 0.9729, "step": 4043 }, { "epoch": 0.19161336176261548, "grad_norm": 0.419921875, "learning_rate": 0.000182490216011474, "loss": 0.6328, "step": 4044 }, { "epoch": 0.19166074389954987, "grad_norm": 0.408203125, "learning_rate": 0.0001824817966778833, "loss": 0.0854, "step": 4045 }, { "epoch": 0.19170812603648424, "grad_norm": 0.048095703125, "learning_rate": 0.0001824733755149342, "loss": 0.0029, "step": 4046 }, { "epoch": 0.19175550817341863, "grad_norm": 0.01312255859375, "learning_rate": 0.0001824649525228135, "loss": 0.0006, "step": 4047 }, { "epoch": 0.191802890310353, "grad_norm": 0.56640625, "learning_rate": 0.00018245652770170794, "loss": 0.3941, "step": 4048 }, { "epoch": 0.19185027244728736, "grad_norm": 0.609375, "learning_rate": 0.00018244810105180444, "loss": 1.1245, "step": 4049 }, { "epoch": 0.19189765458422176, "grad_norm": 0.65234375, "learning_rate": 0.0001824396725732899, "loss": 0.1825, "step": 4050 }, { "epoch": 0.19194503672115612, "grad_norm": 0.65625, "learning_rate": 0.00018243124226635123, "loss": 0.8779, "step": 4051 }, { "epoch": 0.1919924188580905, "grad_norm": 0.55859375, "learning_rate": 0.0001824228101311754, "loss": 0.842, "step": 4052 }, { "epoch": 0.19203980099502488, "grad_norm": 0.29296875, "learning_rate": 0.00018241437616794946, "loss": 0.1626, "step": 4053 }, { "epoch": 0.19208718313195924, "grad_norm": 0.484375, "learning_rate": 0.0001824059403768604, "loss": 0.7362, "step": 4054 }, { "epoch": 0.19213456526889364, "grad_norm": 0.6171875, "learning_rate": 0.00018239750275809538, "loss": 1.2624, "step": 4055 }, { "epoch": 0.192181947405828, "grad_norm": 0.515625, "learning_rate": 0.0001823890633118415, "loss": 0.8972, "step": 4056 }, { "epoch": 0.19222932954276237, "grad_norm": 0.4453125, "learning_rate": 0.00018238062203828598, "loss": 0.5408, "step": 4057 }, { "epoch": 0.19227671167969676, "grad_norm": 0.65234375, "learning_rate": 0.00018237217893761598, "loss": 1.1592, "step": 4058 }, { "epoch": 0.19232409381663113, "grad_norm": 0.58984375, "learning_rate": 0.00018236373401001878, "loss": 1.0925, "step": 4059 }, { "epoch": 0.19237147595356552, "grad_norm": 0.01043701171875, "learning_rate": 0.00018235528725568174, "loss": 0.0004, "step": 4060 }, { "epoch": 0.19241885809049988, "grad_norm": 0.5859375, "learning_rate": 0.0001823468386747921, "loss": 1.0052, "step": 4061 }, { "epoch": 0.19246624022743425, "grad_norm": 0.609375, "learning_rate": 0.00018233838826753733, "loss": 0.3306, "step": 4062 }, { "epoch": 0.19251362236436864, "grad_norm": 0.671875, "learning_rate": 0.00018232993603410478, "loss": 0.1146, "step": 4063 }, { "epoch": 0.192561004501303, "grad_norm": 0.61328125, "learning_rate": 0.00018232148197468194, "loss": 1.229, "step": 4064 }, { "epoch": 0.19260838663823737, "grad_norm": 1.0390625, "learning_rate": 0.00018231302608945636, "loss": 0.1671, "step": 4065 }, { "epoch": 0.19265576877517177, "grad_norm": 0.1005859375, "learning_rate": 0.0001823045683786155, "loss": 0.0051, "step": 4066 }, { "epoch": 0.19270315091210613, "grad_norm": 0.51171875, "learning_rate": 0.00018229610884234698, "loss": 0.7844, "step": 4067 }, { "epoch": 0.19275053304904052, "grad_norm": 1.2109375, "learning_rate": 0.00018228764748083847, "loss": 0.9091, "step": 4068 }, { "epoch": 0.1927979151859749, "grad_norm": 0.57421875, "learning_rate": 0.00018227918429427752, "loss": 0.7842, "step": 4069 }, { "epoch": 0.19284529732290925, "grad_norm": 0.51171875, "learning_rate": 0.00018227071928285192, "loss": 1.0486, "step": 4070 }, { "epoch": 0.19289267945984365, "grad_norm": 0.259765625, "learning_rate": 0.00018226225244674944, "loss": 0.0681, "step": 4071 }, { "epoch": 0.192940061596778, "grad_norm": 0.12158203125, "learning_rate": 0.00018225378378615778, "loss": 0.0134, "step": 4072 }, { "epoch": 0.19298744373371238, "grad_norm": 0.498046875, "learning_rate": 0.00018224531330126483, "loss": 0.9821, "step": 4073 }, { "epoch": 0.19303482587064677, "grad_norm": 0.828125, "learning_rate": 0.00018223684099225843, "loss": 0.246, "step": 4074 }, { "epoch": 0.19308220800758114, "grad_norm": 0.59765625, "learning_rate": 0.00018222836685932646, "loss": 1.0779, "step": 4075 }, { "epoch": 0.19312959014451553, "grad_norm": 0.4453125, "learning_rate": 0.00018221989090265694, "loss": 0.4981, "step": 4076 }, { "epoch": 0.1931769722814499, "grad_norm": 0.6484375, "learning_rate": 0.0001822114131224378, "loss": 0.8945, "step": 4077 }, { "epoch": 0.19322435441838426, "grad_norm": 0.435546875, "learning_rate": 0.0001822029335188571, "loss": 0.3565, "step": 4078 }, { "epoch": 0.19327173655531865, "grad_norm": 0.703125, "learning_rate": 0.00018219445209210287, "loss": 1.3734, "step": 4079 }, { "epoch": 0.19331911869225302, "grad_norm": 0.63671875, "learning_rate": 0.00018218596884236326, "loss": 1.1543, "step": 4080 }, { "epoch": 0.1933665008291874, "grad_norm": 0.0107421875, "learning_rate": 0.00018217748376982636, "loss": 0.0005, "step": 4081 }, { "epoch": 0.19341388296612178, "grad_norm": 0.7109375, "learning_rate": 0.00018216899687468048, "loss": 0.8825, "step": 4082 }, { "epoch": 0.19346126510305614, "grad_norm": 0.51953125, "learning_rate": 0.0001821605081571137, "loss": 1.0132, "step": 4083 }, { "epoch": 0.19350864723999053, "grad_norm": 0.4921875, "learning_rate": 0.0001821520176173144, "loss": 1.0012, "step": 4084 }, { "epoch": 0.1935560293769249, "grad_norm": 0.57421875, "learning_rate": 0.00018214352525547085, "loss": 1.0989, "step": 4085 }, { "epoch": 0.19360341151385926, "grad_norm": 0.69140625, "learning_rate": 0.0001821350310717714, "loss": 0.8541, "step": 4086 }, { "epoch": 0.19365079365079366, "grad_norm": 0.73828125, "learning_rate": 0.00018212653506640442, "loss": 0.8196, "step": 4087 }, { "epoch": 0.19369817578772802, "grad_norm": 0.2265625, "learning_rate": 0.0001821180372395584, "loss": 0.0037, "step": 4088 }, { "epoch": 0.19374555792466241, "grad_norm": 0.5859375, "learning_rate": 0.00018210953759142178, "loss": 0.2064, "step": 4089 }, { "epoch": 0.19379294006159678, "grad_norm": 0.466796875, "learning_rate": 0.00018210103612218307, "loss": 0.0975, "step": 4090 }, { "epoch": 0.19384032219853115, "grad_norm": 0.578125, "learning_rate": 0.00018209253283203082, "loss": 0.7448, "step": 4091 }, { "epoch": 0.19388770433546554, "grad_norm": 0.51171875, "learning_rate": 0.00018208402772115366, "loss": 0.0596, "step": 4092 }, { "epoch": 0.1939350864723999, "grad_norm": 0.59375, "learning_rate": 0.00018207552078974018, "loss": 0.0836, "step": 4093 }, { "epoch": 0.19398246860933427, "grad_norm": 0.6328125, "learning_rate": 0.00018206701203797907, "loss": 0.9699, "step": 4094 }, { "epoch": 0.19402985074626866, "grad_norm": 0.6015625, "learning_rate": 0.00018205850146605905, "loss": 0.9955, "step": 4095 }, { "epoch": 0.19407723288320303, "grad_norm": 0.56640625, "learning_rate": 0.00018204998907416888, "loss": 1.0723, "step": 4096 }, { "epoch": 0.19412461502013742, "grad_norm": 0.006317138671875, "learning_rate": 0.00018204147486249733, "loss": 0.0005, "step": 4097 }, { "epoch": 0.19417199715707179, "grad_norm": 0.6015625, "learning_rate": 0.0001820329588312333, "loss": 1.2173, "step": 4098 }, { "epoch": 0.19421937929400615, "grad_norm": 0.298828125, "learning_rate": 0.00018202444098056554, "loss": 0.1614, "step": 4099 }, { "epoch": 0.19426676143094054, "grad_norm": 0.39453125, "learning_rate": 0.00018201592131068312, "loss": 0.4065, "step": 4100 }, { "epoch": 0.1943141435678749, "grad_norm": 0.451171875, "learning_rate": 0.00018200739982177487, "loss": 0.0372, "step": 4101 }, { "epoch": 0.19436152570480927, "grad_norm": 0.439453125, "learning_rate": 0.00018199887651402987, "loss": 0.4186, "step": 4102 }, { "epoch": 0.19440890784174367, "grad_norm": 0.51953125, "learning_rate": 0.00018199035138763712, "loss": 0.9015, "step": 4103 }, { "epoch": 0.19445628997867803, "grad_norm": 0.5234375, "learning_rate": 0.00018198182444278572, "loss": 1.0228, "step": 4104 }, { "epoch": 0.19450367211561242, "grad_norm": 0.48046875, "learning_rate": 0.0001819732956796648, "loss": 1.0538, "step": 4105 }, { "epoch": 0.1945510542525468, "grad_norm": 0.765625, "learning_rate": 0.00018196476509846346, "loss": 1.1581, "step": 4106 }, { "epoch": 0.19459843638948116, "grad_norm": 0.57421875, "learning_rate": 0.00018195623269937094, "loss": 0.9356, "step": 4107 }, { "epoch": 0.19464581852641555, "grad_norm": 0.63671875, "learning_rate": 0.00018194769848257648, "loss": 0.9523, "step": 4108 }, { "epoch": 0.1946932006633499, "grad_norm": 0.0194091796875, "learning_rate": 0.00018193916244826938, "loss": 0.0006, "step": 4109 }, { "epoch": 0.1947405828002843, "grad_norm": 0.546875, "learning_rate": 0.0001819306245966389, "loss": 1.0516, "step": 4110 }, { "epoch": 0.19478796493721867, "grad_norm": 0.4765625, "learning_rate": 0.00018192208492787444, "loss": 0.8879, "step": 4111 }, { "epoch": 0.19483534707415304, "grad_norm": 0.54296875, "learning_rate": 0.0001819135434421654, "loss": 0.6391, "step": 4112 }, { "epoch": 0.19488272921108743, "grad_norm": 0.408203125, "learning_rate": 0.0001819050001397012, "loss": 0.0754, "step": 4113 }, { "epoch": 0.1949301113480218, "grad_norm": 0.546875, "learning_rate": 0.0001818964550206714, "loss": 1.0995, "step": 4114 }, { "epoch": 0.19497749348495616, "grad_norm": 0.640625, "learning_rate": 0.0001818879080852654, "loss": 0.9117, "step": 4115 }, { "epoch": 0.19502487562189055, "grad_norm": 0.53515625, "learning_rate": 0.00018187935933367281, "loss": 0.6531, "step": 4116 }, { "epoch": 0.19507225775882492, "grad_norm": 0.193359375, "learning_rate": 0.00018187080876608328, "loss": 0.0202, "step": 4117 }, { "epoch": 0.1951196398957593, "grad_norm": 0.55859375, "learning_rate": 0.00018186225638268643, "loss": 0.0461, "step": 4118 }, { "epoch": 0.19516702203269368, "grad_norm": 0.671875, "learning_rate": 0.00018185370218367188, "loss": 0.8954, "step": 4119 }, { "epoch": 0.19521440416962804, "grad_norm": 0.419921875, "learning_rate": 0.00018184514616922945, "loss": 0.0695, "step": 4120 }, { "epoch": 0.19526178630656243, "grad_norm": 0.53515625, "learning_rate": 0.00018183658833954884, "loss": 0.4098, "step": 4121 }, { "epoch": 0.1953091684434968, "grad_norm": 1.1796875, "learning_rate": 0.00018182802869481986, "loss": 1.5883, "step": 4122 }, { "epoch": 0.19535655058043117, "grad_norm": 0.640625, "learning_rate": 0.00018181946723523236, "loss": 1.1525, "step": 4123 }, { "epoch": 0.19540393271736556, "grad_norm": 0.53125, "learning_rate": 0.00018181090396097625, "loss": 1.054, "step": 4124 }, { "epoch": 0.19545131485429992, "grad_norm": 0.578125, "learning_rate": 0.00018180233887224142, "loss": 0.6363, "step": 4125 }, { "epoch": 0.19549869699123432, "grad_norm": 0.421875, "learning_rate": 0.0001817937719692178, "loss": 0.5821, "step": 4126 }, { "epoch": 0.19554607912816868, "grad_norm": 0.470703125, "learning_rate": 0.0001817852032520955, "loss": 0.2442, "step": 4127 }, { "epoch": 0.19559346126510305, "grad_norm": 0.69140625, "learning_rate": 0.00018177663272106448, "loss": 0.5642, "step": 4128 }, { "epoch": 0.19564084340203744, "grad_norm": 0.51171875, "learning_rate": 0.00018176806037631485, "loss": 0.5534, "step": 4129 }, { "epoch": 0.1956882255389718, "grad_norm": 0.59765625, "learning_rate": 0.00018175948621803676, "loss": 0.7327, "step": 4130 }, { "epoch": 0.19573560767590617, "grad_norm": 0.32421875, "learning_rate": 0.00018175091024642034, "loss": 0.142, "step": 4131 }, { "epoch": 0.19578298981284056, "grad_norm": 0.5234375, "learning_rate": 0.0001817423324616558, "loss": 0.7017, "step": 4132 }, { "epoch": 0.19583037194977493, "grad_norm": 0.28125, "learning_rate": 0.0001817337528639334, "loss": 0.0416, "step": 4133 }, { "epoch": 0.19587775408670932, "grad_norm": 0.2236328125, "learning_rate": 0.00018172517145344341, "loss": 0.1415, "step": 4134 }, { "epoch": 0.1959251362236437, "grad_norm": 0.56640625, "learning_rate": 0.00018171658823037617, "loss": 1.0319, "step": 4135 }, { "epoch": 0.19597251836057805, "grad_norm": 0.64453125, "learning_rate": 0.00018170800319492204, "loss": 0.9732, "step": 4136 }, { "epoch": 0.19601990049751244, "grad_norm": 0.671875, "learning_rate": 0.00018169941634727142, "loss": 1.0862, "step": 4137 }, { "epoch": 0.1960672826344468, "grad_norm": 0.20703125, "learning_rate": 0.0001816908276876148, "loss": 0.0289, "step": 4138 }, { "epoch": 0.1961146647713812, "grad_norm": 0.609375, "learning_rate": 0.00018168223721614263, "loss": 0.6891, "step": 4139 }, { "epoch": 0.19616204690831557, "grad_norm": 0.609375, "learning_rate": 0.00018167364493304545, "loss": 1.2633, "step": 4140 }, { "epoch": 0.19620942904524993, "grad_norm": 0.59375, "learning_rate": 0.0001816650508385138, "loss": 0.9559, "step": 4141 }, { "epoch": 0.19625681118218433, "grad_norm": 0.609375, "learning_rate": 0.0001816564549327383, "loss": 1.3139, "step": 4142 }, { "epoch": 0.1963041933191187, "grad_norm": 0.75390625, "learning_rate": 0.00018164785721590961, "loss": 1.0766, "step": 4143 }, { "epoch": 0.19635157545605306, "grad_norm": 0.65625, "learning_rate": 0.00018163925768821843, "loss": 0.7621, "step": 4144 }, { "epoch": 0.19639895759298745, "grad_norm": 0.859375, "learning_rate": 0.00018163065634985547, "loss": 1.3806, "step": 4145 }, { "epoch": 0.19644633972992182, "grad_norm": 0.66796875, "learning_rate": 0.0001816220532010115, "loss": 1.0591, "step": 4146 }, { "epoch": 0.1964937218668562, "grad_norm": 0.76953125, "learning_rate": 0.00018161344824187733, "loss": 1.3723, "step": 4147 }, { "epoch": 0.19654110400379057, "grad_norm": 0.34765625, "learning_rate": 0.0001816048414726438, "loss": 0.2652, "step": 4148 }, { "epoch": 0.19658848614072494, "grad_norm": 0.86328125, "learning_rate": 0.00018159623289350183, "loss": 1.0016, "step": 4149 }, { "epoch": 0.19663586827765933, "grad_norm": 0.4375, "learning_rate": 0.0001815876225046423, "loss": 0.0529, "step": 4150 }, { "epoch": 0.1966832504145937, "grad_norm": 0.482421875, "learning_rate": 0.00018157901030625625, "loss": 0.7826, "step": 4151 }, { "epoch": 0.19673063255152806, "grad_norm": 0.60546875, "learning_rate": 0.00018157039629853458, "loss": 0.7337, "step": 4152 }, { "epoch": 0.19677801468846245, "grad_norm": 0.78125, "learning_rate": 0.00018156178048166842, "loss": 1.1036, "step": 4153 }, { "epoch": 0.19682539682539682, "grad_norm": 0.56640625, "learning_rate": 0.00018155316285584886, "loss": 0.751, "step": 4154 }, { "epoch": 0.1968727789623312, "grad_norm": 0.59765625, "learning_rate": 0.00018154454342126703, "loss": 1.113, "step": 4155 }, { "epoch": 0.19692016109926558, "grad_norm": 0.72265625, "learning_rate": 0.00018153592217811407, "loss": 1.2036, "step": 4156 }, { "epoch": 0.19696754323619994, "grad_norm": 0.5078125, "learning_rate": 0.00018152729912658118, "loss": 0.1611, "step": 4157 }, { "epoch": 0.19701492537313434, "grad_norm": 0.21875, "learning_rate": 0.0001815186742668597, "loss": 0.1472, "step": 4158 }, { "epoch": 0.1970623075100687, "grad_norm": 0.70703125, "learning_rate": 0.0001815100475991408, "loss": 1.2335, "step": 4159 }, { "epoch": 0.19710968964700307, "grad_norm": 0.66796875, "learning_rate": 0.00018150141912361587, "loss": 0.9885, "step": 4160 }, { "epoch": 0.19715707178393746, "grad_norm": 0.40625, "learning_rate": 0.0001814927888404763, "loss": 0.9222, "step": 4161 }, { "epoch": 0.19720445392087183, "grad_norm": 0.0185546875, "learning_rate": 0.00018148415674991344, "loss": 0.0008, "step": 4162 }, { "epoch": 0.19725183605780622, "grad_norm": 0.447265625, "learning_rate": 0.0001814755228521188, "loss": 0.2763, "step": 4163 }, { "epoch": 0.19729921819474058, "grad_norm": 0.7578125, "learning_rate": 0.00018146688714728386, "loss": 0.9484, "step": 4164 }, { "epoch": 0.19734660033167495, "grad_norm": 0.65234375, "learning_rate": 0.0001814582496356001, "loss": 1.0825, "step": 4165 }, { "epoch": 0.19739398246860934, "grad_norm": 0.64453125, "learning_rate": 0.00018144961031725916, "loss": 1.1818, "step": 4166 }, { "epoch": 0.1974413646055437, "grad_norm": 0.671875, "learning_rate": 0.0001814409691924526, "loss": 1.0338, "step": 4167 }, { "epoch": 0.1974887467424781, "grad_norm": 0.34375, "learning_rate": 0.0001814323262613721, "loss": 0.0489, "step": 4168 }, { "epoch": 0.19753612887941246, "grad_norm": 0.55078125, "learning_rate": 0.00018142368152420933, "loss": 0.9375, "step": 4169 }, { "epoch": 0.19758351101634683, "grad_norm": 0.66015625, "learning_rate": 0.00018141503498115603, "loss": 1.2822, "step": 4170 }, { "epoch": 0.19763089315328122, "grad_norm": 0.62109375, "learning_rate": 0.00018140638663240402, "loss": 1.137, "step": 4171 }, { "epoch": 0.1976782752902156, "grad_norm": 1.09375, "learning_rate": 0.00018139773647814501, "loss": 1.0599, "step": 4172 }, { "epoch": 0.19772565742714995, "grad_norm": 0.5703125, "learning_rate": 0.00018138908451857094, "loss": 0.2821, "step": 4173 }, { "epoch": 0.19777303956408435, "grad_norm": 0.81640625, "learning_rate": 0.00018138043075387365, "loss": 1.3287, "step": 4174 }, { "epoch": 0.1978204217010187, "grad_norm": 0.55859375, "learning_rate": 0.00018137177518424508, "loss": 0.9248, "step": 4175 }, { "epoch": 0.1978678038379531, "grad_norm": 0.7265625, "learning_rate": 0.00018136311780987723, "loss": 0.8726, "step": 4176 }, { "epoch": 0.19791518597488747, "grad_norm": 0.54296875, "learning_rate": 0.00018135445863096206, "loss": 0.7506, "step": 4177 }, { "epoch": 0.19796256811182183, "grad_norm": 0.67578125, "learning_rate": 0.00018134579764769168, "loss": 0.2422, "step": 4178 }, { "epoch": 0.19800995024875623, "grad_norm": 0.77734375, "learning_rate": 0.00018133713486025816, "loss": 1.2308, "step": 4179 }, { "epoch": 0.1980573323856906, "grad_norm": 0.54296875, "learning_rate": 0.0001813284702688536, "loss": 0.961, "step": 4180 }, { "epoch": 0.19810471452262496, "grad_norm": 0.6328125, "learning_rate": 0.00018131980387367023, "loss": 1.2177, "step": 4181 }, { "epoch": 0.19815209665955935, "grad_norm": 0.51171875, "learning_rate": 0.00018131113567490021, "loss": 0.0505, "step": 4182 }, { "epoch": 0.19819947879649372, "grad_norm": 0.5078125, "learning_rate": 0.0001813024656727358, "loss": 0.7928, "step": 4183 }, { "epoch": 0.1982468609334281, "grad_norm": 0.7109375, "learning_rate": 0.00018129379386736932, "loss": 0.4456, "step": 4184 }, { "epoch": 0.19829424307036247, "grad_norm": 0.64453125, "learning_rate": 0.00018128512025899305, "loss": 0.2851, "step": 4185 }, { "epoch": 0.19834162520729684, "grad_norm": 0.06787109375, "learning_rate": 0.00018127644484779943, "loss": 0.0022, "step": 4186 }, { "epoch": 0.19838900734423123, "grad_norm": 0.6640625, "learning_rate": 0.0001812677676339808, "loss": 0.8009, "step": 4187 }, { "epoch": 0.1984363894811656, "grad_norm": 0.6953125, "learning_rate": 0.00018125908861772966, "loss": 0.9012, "step": 4188 }, { "epoch": 0.19848377161809996, "grad_norm": 0.67578125, "learning_rate": 0.0001812504077992385, "loss": 0.9618, "step": 4189 }, { "epoch": 0.19853115375503436, "grad_norm": 0.30078125, "learning_rate": 0.00018124172517869984, "loss": 0.1837, "step": 4190 }, { "epoch": 0.19857853589196872, "grad_norm": 0.671875, "learning_rate": 0.00018123304075630625, "loss": 1.0119, "step": 4191 }, { "epoch": 0.19862591802890311, "grad_norm": 0.68359375, "learning_rate": 0.00018122435453225033, "loss": 1.0928, "step": 4192 }, { "epoch": 0.19867330016583748, "grad_norm": 0.63671875, "learning_rate": 0.00018121566650672474, "loss": 0.8389, "step": 4193 }, { "epoch": 0.19872068230277184, "grad_norm": 0.609375, "learning_rate": 0.00018120697667992216, "loss": 1.0344, "step": 4194 }, { "epoch": 0.19876806443970624, "grad_norm": 0.65234375, "learning_rate": 0.00018119828505203537, "loss": 1.2736, "step": 4195 }, { "epoch": 0.1988154465766406, "grad_norm": 0.7578125, "learning_rate": 0.00018118959162325711, "loss": 0.4803, "step": 4196 }, { "epoch": 0.19886282871357497, "grad_norm": 0.26171875, "learning_rate": 0.00018118089639378016, "loss": 0.146, "step": 4197 }, { "epoch": 0.19891021085050936, "grad_norm": 0.0037994384765625, "learning_rate": 0.00018117219936379742, "loss": 0.0002, "step": 4198 }, { "epoch": 0.19895759298744373, "grad_norm": 0.6796875, "learning_rate": 0.00018116350053350172, "loss": 1.1075, "step": 4199 }, { "epoch": 0.19900497512437812, "grad_norm": 0.5859375, "learning_rate": 0.0001811547999030861, "loss": 0.7409, "step": 4200 }, { "epoch": 0.19905235726131248, "grad_norm": 1.2578125, "learning_rate": 0.0001811460974727434, "loss": 0.0631, "step": 4201 }, { "epoch": 0.19909973939824685, "grad_norm": 0.25390625, "learning_rate": 0.00018113739324266673, "loss": 0.1505, "step": 4202 }, { "epoch": 0.19914712153518124, "grad_norm": 0.66015625, "learning_rate": 0.00018112868721304904, "loss": 1.0933, "step": 4203 }, { "epoch": 0.1991945036721156, "grad_norm": 0.515625, "learning_rate": 0.00018111997938408353, "loss": 0.8051, "step": 4204 }, { "epoch": 0.19924188580905, "grad_norm": 0.177734375, "learning_rate": 0.00018111126975596328, "loss": 0.0196, "step": 4205 }, { "epoch": 0.19928926794598437, "grad_norm": 0.3203125, "learning_rate": 0.00018110255832888146, "loss": 0.1584, "step": 4206 }, { "epoch": 0.19933665008291873, "grad_norm": 0.8203125, "learning_rate": 0.00018109384510303127, "loss": 0.1847, "step": 4207 }, { "epoch": 0.19938403221985312, "grad_norm": 0.75390625, "learning_rate": 0.00018108513007860597, "loss": 0.6955, "step": 4208 }, { "epoch": 0.1994314143567875, "grad_norm": 0.53515625, "learning_rate": 0.00018107641325579887, "loss": 0.0291, "step": 4209 }, { "epoch": 0.19947879649372185, "grad_norm": 0.69921875, "learning_rate": 0.0001810676946348033, "loss": 1.3563, "step": 4210 }, { "epoch": 0.19952617863065625, "grad_norm": 0.251953125, "learning_rate": 0.00018105897421581257, "loss": 0.1529, "step": 4211 }, { "epoch": 0.1995735607675906, "grad_norm": 0.330078125, "learning_rate": 0.00018105025199902016, "loss": 0.1252, "step": 4212 }, { "epoch": 0.199620942904525, "grad_norm": 0.62890625, "learning_rate": 0.00018104152798461946, "loss": 0.1457, "step": 4213 }, { "epoch": 0.19966832504145937, "grad_norm": 0.78515625, "learning_rate": 0.00018103280217280406, "loss": 1.43, "step": 4214 }, { "epoch": 0.19971570717839374, "grad_norm": 0.80078125, "learning_rate": 0.00018102407456376733, "loss": 1.4493, "step": 4215 }, { "epoch": 0.19976308931532813, "grad_norm": 0.91015625, "learning_rate": 0.00018101534515770298, "loss": 1.3808, "step": 4216 }, { "epoch": 0.1998104714522625, "grad_norm": 0.6015625, "learning_rate": 0.00018100661395480455, "loss": 0.8198, "step": 4217 }, { "epoch": 0.19985785358919686, "grad_norm": 0.2177734375, "learning_rate": 0.00018099788095526576, "loss": 0.1655, "step": 4218 }, { "epoch": 0.19990523572613125, "grad_norm": 0.447265625, "learning_rate": 0.00018098914615928018, "loss": 0.8965, "step": 4219 }, { "epoch": 0.19995261786306562, "grad_norm": 0.390625, "learning_rate": 0.00018098040956704162, "loss": 0.0433, "step": 4220 }, { "epoch": 0.2, "grad_norm": 0.69140625, "learning_rate": 0.00018097167117874386, "loss": 1.3795, "step": 4221 }, { "epoch": 0.20004738213693438, "grad_norm": 0.54296875, "learning_rate": 0.00018096293099458067, "loss": 1.1495, "step": 4222 }, { "epoch": 0.20009476427386874, "grad_norm": 0.345703125, "learning_rate": 0.00018095418901474587, "loss": 0.0622, "step": 4223 }, { "epoch": 0.20014214641080313, "grad_norm": 0.59765625, "learning_rate": 0.0001809454452394334, "loss": 0.1261, "step": 4224 }, { "epoch": 0.2001895285477375, "grad_norm": 0.90234375, "learning_rate": 0.00018093669966883722, "loss": 0.1317, "step": 4225 }, { "epoch": 0.20023691068467186, "grad_norm": 0.65625, "learning_rate": 0.0001809279523031512, "loss": 1.4214, "step": 4226 }, { "epoch": 0.20028429282160626, "grad_norm": 0.8125, "learning_rate": 0.00018091920314256945, "loss": 0.301, "step": 4227 }, { "epoch": 0.20033167495854062, "grad_norm": 0.234375, "learning_rate": 0.00018091045218728593, "loss": 0.1512, "step": 4228 }, { "epoch": 0.20037905709547502, "grad_norm": 0.28515625, "learning_rate": 0.00018090169943749476, "loss": 0.1496, "step": 4229 }, { "epoch": 0.20042643923240938, "grad_norm": 0.6953125, "learning_rate": 0.00018089294489339008, "loss": 0.88, "step": 4230 }, { "epoch": 0.20047382136934375, "grad_norm": 0.515625, "learning_rate": 0.00018088418855516603, "loss": 0.201, "step": 4231 }, { "epoch": 0.20052120350627814, "grad_norm": 0.40234375, "learning_rate": 0.00018087543042301685, "loss": 0.675, "step": 4232 }, { "epoch": 0.2005685856432125, "grad_norm": 0.1552734375, "learning_rate": 0.00018086667049713675, "loss": 0.0207, "step": 4233 }, { "epoch": 0.2006159677801469, "grad_norm": 0.64453125, "learning_rate": 0.00018085790877772005, "loss": 1.1899, "step": 4234 }, { "epoch": 0.20066334991708126, "grad_norm": 0.6171875, "learning_rate": 0.00018084914526496105, "loss": 1.0218, "step": 4235 }, { "epoch": 0.20071073205401563, "grad_norm": 0.314453125, "learning_rate": 0.0001808403799590541, "loss": 0.0254, "step": 4236 }, { "epoch": 0.20075811419095002, "grad_norm": 0.494140625, "learning_rate": 0.00018083161286019367, "loss": 0.8982, "step": 4237 }, { "epoch": 0.2008054963278844, "grad_norm": 0.625, "learning_rate": 0.0001808228439685742, "loss": 1.0802, "step": 4238 }, { "epoch": 0.20085287846481875, "grad_norm": 0.73828125, "learning_rate": 0.00018081407328439007, "loss": 0.926, "step": 4239 }, { "epoch": 0.20090026060175314, "grad_norm": 0.640625, "learning_rate": 0.0001808053008078359, "loss": 0.8248, "step": 4240 }, { "epoch": 0.2009476427386875, "grad_norm": 0.59375, "learning_rate": 0.0001807965265391062, "loss": 0.9299, "step": 4241 }, { "epoch": 0.2009950248756219, "grad_norm": 0.58203125, "learning_rate": 0.00018078775047839564, "loss": 0.2623, "step": 4242 }, { "epoch": 0.20104240701255627, "grad_norm": 0.2236328125, "learning_rate": 0.00018077897262589883, "loss": 0.1651, "step": 4243 }, { "epoch": 0.20108978914949063, "grad_norm": 1.5, "learning_rate": 0.00018077019298181043, "loss": 0.0975, "step": 4244 }, { "epoch": 0.20113717128642503, "grad_norm": 0.8359375, "learning_rate": 0.00018076141154632516, "loss": 0.8288, "step": 4245 }, { "epoch": 0.2011845534233594, "grad_norm": 0.5703125, "learning_rate": 0.0001807526283196378, "loss": 0.7461, "step": 4246 }, { "epoch": 0.20123193556029376, "grad_norm": 0.09228515625, "learning_rate": 0.00018074384330194322, "loss": 0.0066, "step": 4247 }, { "epoch": 0.20127931769722815, "grad_norm": 0.734375, "learning_rate": 0.00018073505649343616, "loss": 0.1853, "step": 4248 }, { "epoch": 0.20132669983416251, "grad_norm": 0.625, "learning_rate": 0.00018072626789431156, "loss": 0.8787, "step": 4249 }, { "epoch": 0.2013740819710969, "grad_norm": 0.51171875, "learning_rate": 0.0001807174775047643, "loss": 0.4628, "step": 4250 }, { "epoch": 0.20142146410803127, "grad_norm": 1.265625, "learning_rate": 0.0001807086853249894, "loss": 0.3134, "step": 4251 }, { "epoch": 0.20146884624496564, "grad_norm": 0.66796875, "learning_rate": 0.0001806998913551818, "loss": 0.8921, "step": 4252 }, { "epoch": 0.20151622838190003, "grad_norm": 0.69921875, "learning_rate": 0.00018069109559553655, "loss": 0.7831, "step": 4253 }, { "epoch": 0.2015636105188344, "grad_norm": 0.796875, "learning_rate": 0.00018068229804624878, "loss": 0.0292, "step": 4254 }, { "epoch": 0.20161099265576876, "grad_norm": 0.53125, "learning_rate": 0.00018067349870751355, "loss": 0.0452, "step": 4255 }, { "epoch": 0.20165837479270315, "grad_norm": 0.59765625, "learning_rate": 0.00018066469757952608, "loss": 1.1553, "step": 4256 }, { "epoch": 0.20170575692963752, "grad_norm": 0.53515625, "learning_rate": 0.00018065589466248152, "loss": 0.9697, "step": 4257 }, { "epoch": 0.2017531390665719, "grad_norm": 0.5703125, "learning_rate": 0.00018064708995657513, "loss": 0.9858, "step": 4258 }, { "epoch": 0.20180052120350628, "grad_norm": 0.734375, "learning_rate": 0.0001806382834620022, "loss": 0.994, "step": 4259 }, { "epoch": 0.20184790334044064, "grad_norm": 0.6953125, "learning_rate": 0.000180629475178958, "loss": 0.1986, "step": 4260 }, { "epoch": 0.20189528547737504, "grad_norm": 0.330078125, "learning_rate": 0.00018062066510763795, "loss": 0.2004, "step": 4261 }, { "epoch": 0.2019426676143094, "grad_norm": 0.1787109375, "learning_rate": 0.00018061185324823743, "loss": 0.0201, "step": 4262 }, { "epoch": 0.2019900497512438, "grad_norm": 0.69921875, "learning_rate": 0.00018060303960095185, "loss": 1.223, "step": 4263 }, { "epoch": 0.20203743188817816, "grad_norm": 0.6171875, "learning_rate": 0.0001805942241659767, "loss": 0.8158, "step": 4264 }, { "epoch": 0.20208481402511252, "grad_norm": 0.11279296875, "learning_rate": 0.00018058540694350752, "loss": 0.0056, "step": 4265 }, { "epoch": 0.20213219616204692, "grad_norm": 0.70703125, "learning_rate": 0.00018057658793373983, "loss": 1.104, "step": 4266 }, { "epoch": 0.20217957829898128, "grad_norm": 0.87890625, "learning_rate": 0.00018056776713686927, "loss": 1.2196, "step": 4267 }, { "epoch": 0.20222696043591565, "grad_norm": 0.443359375, "learning_rate": 0.00018055894455309144, "loss": 1.0654, "step": 4268 }, { "epoch": 0.20227434257285004, "grad_norm": 0.55078125, "learning_rate": 0.00018055012018260204, "loss": 1.1841, "step": 4269 }, { "epoch": 0.2023217247097844, "grad_norm": 0.6015625, "learning_rate": 0.00018054129402559675, "loss": 1.1881, "step": 4270 }, { "epoch": 0.2023691068467188, "grad_norm": 0.79296875, "learning_rate": 0.00018053246608227137, "loss": 0.5166, "step": 4271 }, { "epoch": 0.20241648898365316, "grad_norm": 0.51953125, "learning_rate": 0.00018052363635282163, "loss": 0.0744, "step": 4272 }, { "epoch": 0.20246387112058753, "grad_norm": 0.55859375, "learning_rate": 0.00018051480483744346, "loss": 0.9402, "step": 4273 }, { "epoch": 0.20251125325752192, "grad_norm": 0.02197265625, "learning_rate": 0.00018050597153633263, "loss": 0.0009, "step": 4274 }, { "epoch": 0.2025586353944563, "grad_norm": 0.28515625, "learning_rate": 0.00018049713644968516, "loss": 0.1977, "step": 4275 }, { "epoch": 0.20260601753139065, "grad_norm": 0.6484375, "learning_rate": 0.0001804882995776969, "loss": 1.4933, "step": 4276 }, { "epoch": 0.20265339966832505, "grad_norm": 0.01129150390625, "learning_rate": 0.00018047946092056391, "loss": 0.0006, "step": 4277 }, { "epoch": 0.2027007818052594, "grad_norm": 0.58984375, "learning_rate": 0.0001804706204784822, "loss": 1.4036, "step": 4278 }, { "epoch": 0.2027481639421938, "grad_norm": 0.1513671875, "learning_rate": 0.00018046177825164784, "loss": 0.015, "step": 4279 }, { "epoch": 0.20279554607912817, "grad_norm": 0.65625, "learning_rate": 0.0001804529342402569, "loss": 0.6647, "step": 4280 }, { "epoch": 0.20284292821606253, "grad_norm": 0.69140625, "learning_rate": 0.00018044408844450563, "loss": 0.3855, "step": 4281 }, { "epoch": 0.20289031035299693, "grad_norm": 0.5546875, "learning_rate": 0.00018043524086459013, "loss": 0.9161, "step": 4282 }, { "epoch": 0.2029376924899313, "grad_norm": 0.55078125, "learning_rate": 0.0001804263915007067, "loss": 1.0379, "step": 4283 }, { "epoch": 0.20298507462686566, "grad_norm": 0.53125, "learning_rate": 0.00018041754035305152, "loss": 0.0514, "step": 4284 }, { "epoch": 0.20303245676380005, "grad_norm": 1.09375, "learning_rate": 0.00018040868742182098, "loss": 0.0456, "step": 4285 }, { "epoch": 0.20307983890073442, "grad_norm": 0.67578125, "learning_rate": 0.0001803998327072114, "loss": 1.2112, "step": 4286 }, { "epoch": 0.2031272210376688, "grad_norm": 0.349609375, "learning_rate": 0.00018039097620941915, "loss": 0.0263, "step": 4287 }, { "epoch": 0.20317460317460317, "grad_norm": 0.14453125, "learning_rate": 0.0001803821179286407, "loss": 0.0274, "step": 4288 }, { "epoch": 0.20322198531153754, "grad_norm": 0.171875, "learning_rate": 0.00018037325786507248, "loss": 0.0196, "step": 4289 }, { "epoch": 0.20326936744847193, "grad_norm": 0.5703125, "learning_rate": 0.00018036439601891102, "loss": 1.1825, "step": 4290 }, { "epoch": 0.2033167495854063, "grad_norm": 0.79296875, "learning_rate": 0.00018035553239035285, "loss": 0.8397, "step": 4291 }, { "epoch": 0.2033641317223407, "grad_norm": 0.66015625, "learning_rate": 0.00018034666697959456, "loss": 0.9871, "step": 4292 }, { "epoch": 0.20341151385927506, "grad_norm": 0.609375, "learning_rate": 0.0001803377997868328, "loss": 0.7991, "step": 4293 }, { "epoch": 0.20345889599620942, "grad_norm": 0.70703125, "learning_rate": 0.0001803289308122642, "loss": 0.9436, "step": 4294 }, { "epoch": 0.20350627813314381, "grad_norm": 0.78125, "learning_rate": 0.0001803200600560855, "loss": 1.4093, "step": 4295 }, { "epoch": 0.20355366027007818, "grad_norm": 0.1904296875, "learning_rate": 0.00018031118751849338, "loss": 0.0173, "step": 4296 }, { "epoch": 0.20360104240701254, "grad_norm": 0.326171875, "learning_rate": 0.00018030231319968466, "loss": 0.1785, "step": 4297 }, { "epoch": 0.20364842454394694, "grad_norm": 0.5234375, "learning_rate": 0.0001802934370998562, "loss": 0.6897, "step": 4298 }, { "epoch": 0.2036958066808813, "grad_norm": 0.158203125, "learning_rate": 0.0001802845592192048, "loss": 0.0282, "step": 4299 }, { "epoch": 0.2037431888178157, "grad_norm": 0.5703125, "learning_rate": 0.0001802756795579274, "loss": 1.045, "step": 4300 }, { "epoch": 0.20379057095475006, "grad_norm": 0.65625, "learning_rate": 0.00018026679811622098, "loss": 1.0125, "step": 4301 }, { "epoch": 0.20383795309168443, "grad_norm": 0.984375, "learning_rate": 0.0001802579148942824, "loss": 1.3856, "step": 4302 }, { "epoch": 0.20388533522861882, "grad_norm": 0.5859375, "learning_rate": 0.00018024902989230882, "loss": 0.8329, "step": 4303 }, { "epoch": 0.20393271736555318, "grad_norm": 0.9453125, "learning_rate": 0.0001802401431104972, "loss": 1.3463, "step": 4304 }, { "epoch": 0.20398009950248755, "grad_norm": 0.83984375, "learning_rate": 0.00018023125454904467, "loss": 0.2841, "step": 4305 }, { "epoch": 0.20402748163942194, "grad_norm": 0.5390625, "learning_rate": 0.00018022236420814838, "loss": 0.7527, "step": 4306 }, { "epoch": 0.2040748637763563, "grad_norm": 0.00122833251953125, "learning_rate": 0.0001802134720880055, "loss": 0.0001, "step": 4307 }, { "epoch": 0.2041222459132907, "grad_norm": 0.67578125, "learning_rate": 0.00018020457818881326, "loss": 0.7237, "step": 4308 }, { "epoch": 0.20416962805022507, "grad_norm": 0.90234375, "learning_rate": 0.0001801956825107689, "loss": 1.564, "step": 4309 }, { "epoch": 0.20421701018715943, "grad_norm": 0.8984375, "learning_rate": 0.00018018678505406972, "loss": 1.4349, "step": 4310 }, { "epoch": 0.20426439232409382, "grad_norm": 0.87109375, "learning_rate": 0.00018017788581891307, "loss": 1.4531, "step": 4311 }, { "epoch": 0.2043117744610282, "grad_norm": 0.65625, "learning_rate": 0.0001801689848054963, "loss": 0.8812, "step": 4312 }, { "epoch": 0.20435915659796255, "grad_norm": 0.58203125, "learning_rate": 0.00018016008201401684, "loss": 0.8732, "step": 4313 }, { "epoch": 0.20440653873489695, "grad_norm": 0.345703125, "learning_rate": 0.00018015117744467213, "loss": 0.1634, "step": 4314 }, { "epoch": 0.2044539208718313, "grad_norm": 0.099609375, "learning_rate": 0.0001801422710976597, "loss": 0.0031, "step": 4315 }, { "epoch": 0.2045013030087657, "grad_norm": 0.88671875, "learning_rate": 0.00018013336297317703, "loss": 0.8116, "step": 4316 }, { "epoch": 0.20454868514570007, "grad_norm": 0.671875, "learning_rate": 0.00018012445307142175, "loss": 1.1377, "step": 4317 }, { "epoch": 0.20459606728263444, "grad_norm": 0.5625, "learning_rate": 0.00018011554139259146, "loss": 0.8103, "step": 4318 }, { "epoch": 0.20464344941956883, "grad_norm": 0.609375, "learning_rate": 0.00018010662793688378, "loss": 0.8936, "step": 4319 }, { "epoch": 0.2046908315565032, "grad_norm": 0.50390625, "learning_rate": 0.0001800977127044964, "loss": 0.1203, "step": 4320 }, { "epoch": 0.2047382136934376, "grad_norm": 0.76171875, "learning_rate": 0.0001800887956956271, "loss": 0.9999, "step": 4321 }, { "epoch": 0.20478559583037195, "grad_norm": 0.8125, "learning_rate": 0.00018007987691047358, "loss": 1.2671, "step": 4322 }, { "epoch": 0.20483297796730632, "grad_norm": 0.640625, "learning_rate": 0.00018007095634923372, "loss": 1.2057, "step": 4323 }, { "epoch": 0.2048803601042407, "grad_norm": 0.71875, "learning_rate": 0.00018006203401210535, "loss": 1.0501, "step": 4324 }, { "epoch": 0.20492774224117508, "grad_norm": 0.67578125, "learning_rate": 0.00018005310989928632, "loss": 1.1723, "step": 4325 }, { "epoch": 0.20497512437810944, "grad_norm": 0.6484375, "learning_rate": 0.00018004418401097456, "loss": 1.1301, "step": 4326 }, { "epoch": 0.20502250651504383, "grad_norm": 0.60546875, "learning_rate": 0.00018003525634736808, "loss": 0.8912, "step": 4327 }, { "epoch": 0.2050698886519782, "grad_norm": 0.6328125, "learning_rate": 0.00018002632690866487, "loss": 1.0652, "step": 4328 }, { "epoch": 0.2051172707889126, "grad_norm": 0.8125, "learning_rate": 0.000180017395695063, "loss": 1.0792, "step": 4329 }, { "epoch": 0.20516465292584696, "grad_norm": 0.79296875, "learning_rate": 0.00018000846270676047, "loss": 1.1601, "step": 4330 }, { "epoch": 0.20521203506278132, "grad_norm": 0.31640625, "learning_rate": 0.00017999952794395548, "loss": 0.0512, "step": 4331 }, { "epoch": 0.20525941719971572, "grad_norm": 0.703125, "learning_rate": 0.00017999059140684615, "loss": 0.7923, "step": 4332 }, { "epoch": 0.20530679933665008, "grad_norm": 0.9609375, "learning_rate": 0.00017998165309563073, "loss": 0.7649, "step": 4333 }, { "epoch": 0.20535418147358445, "grad_norm": 0.671875, "learning_rate": 0.0001799727130105074, "loss": 0.9117, "step": 4334 }, { "epoch": 0.20540156361051884, "grad_norm": 0.76171875, "learning_rate": 0.00017996377115167452, "loss": 0.3355, "step": 4335 }, { "epoch": 0.2054489457474532, "grad_norm": 0.75, "learning_rate": 0.00017995482751933037, "loss": 1.2005, "step": 4336 }, { "epoch": 0.2054963278843876, "grad_norm": 0.65234375, "learning_rate": 0.0001799458821136733, "loss": 0.8636, "step": 4337 }, { "epoch": 0.20554371002132196, "grad_norm": 0.8203125, "learning_rate": 0.00017993693493490175, "loss": 0.5847, "step": 4338 }, { "epoch": 0.20559109215825633, "grad_norm": 0.6484375, "learning_rate": 0.00017992798598321405, "loss": 1.2174, "step": 4339 }, { "epoch": 0.20563847429519072, "grad_norm": 0.640625, "learning_rate": 0.00017991903525880882, "loss": 1.2868, "step": 4340 }, { "epoch": 0.20568585643212509, "grad_norm": 0.494140625, "learning_rate": 0.00017991008276188448, "loss": 0.0422, "step": 4341 }, { "epoch": 0.20573323856905945, "grad_norm": 0.6796875, "learning_rate": 0.00017990112849263965, "loss": 0.8283, "step": 4342 }, { "epoch": 0.20578062070599384, "grad_norm": 0.7109375, "learning_rate": 0.00017989217245127287, "loss": 1.3559, "step": 4343 }, { "epoch": 0.2058280028429282, "grad_norm": 0.578125, "learning_rate": 0.00017988321463798283, "loss": 0.6048, "step": 4344 }, { "epoch": 0.2058753849798626, "grad_norm": 0.51953125, "learning_rate": 0.00017987425505296815, "loss": 0.9089, "step": 4345 }, { "epoch": 0.20592276711679697, "grad_norm": 0.75, "learning_rate": 0.00017986529369642758, "loss": 0.8273, "step": 4346 }, { "epoch": 0.20597014925373133, "grad_norm": 0.60546875, "learning_rate": 0.00017985633056855988, "loss": 0.1078, "step": 4347 }, { "epoch": 0.20601753139066573, "grad_norm": 0.6015625, "learning_rate": 0.00017984736566956382, "loss": 0.8183, "step": 4348 }, { "epoch": 0.2060649135276001, "grad_norm": 0.00225830078125, "learning_rate": 0.00017983839899963822, "loss": 0.0002, "step": 4349 }, { "epoch": 0.20611229566453448, "grad_norm": 0.330078125, "learning_rate": 0.000179829430558982, "loss": 0.1592, "step": 4350 }, { "epoch": 0.20615967780146885, "grad_norm": 0.5859375, "learning_rate": 0.000179820460347794, "loss": 0.9371, "step": 4351 }, { "epoch": 0.20620705993840321, "grad_norm": 0.68359375, "learning_rate": 0.00017981148836627325, "loss": 1.0387, "step": 4352 }, { "epoch": 0.2062544420753376, "grad_norm": 0.984375, "learning_rate": 0.00017980251461461867, "loss": 0.4302, "step": 4353 }, { "epoch": 0.20630182421227197, "grad_norm": 0.51953125, "learning_rate": 0.00017979353909302934, "loss": 0.5641, "step": 4354 }, { "epoch": 0.20634920634920634, "grad_norm": 0.6953125, "learning_rate": 0.0001797845618017043, "loss": 0.986, "step": 4355 }, { "epoch": 0.20639658848614073, "grad_norm": 0.6171875, "learning_rate": 0.00017977558274084266, "loss": 1.3083, "step": 4356 }, { "epoch": 0.2064439706230751, "grad_norm": 0.56640625, "learning_rate": 0.00017976660191064357, "loss": 0.7942, "step": 4357 }, { "epoch": 0.2064913527600095, "grad_norm": 0.703125, "learning_rate": 0.0001797576193113062, "loss": 0.26, "step": 4358 }, { "epoch": 0.20653873489694385, "grad_norm": 0.6484375, "learning_rate": 0.00017974863494302982, "loss": 0.9274, "step": 4359 }, { "epoch": 0.20658611703387822, "grad_norm": 0.5703125, "learning_rate": 0.00017973964880601363, "loss": 0.7014, "step": 4360 }, { "epoch": 0.2066334991708126, "grad_norm": 0.462890625, "learning_rate": 0.000179730660900457, "loss": 0.059, "step": 4361 }, { "epoch": 0.20668088130774698, "grad_norm": 0.396484375, "learning_rate": 0.00017972167122655918, "loss": 0.1828, "step": 4362 }, { "epoch": 0.20672826344468134, "grad_norm": 0.8359375, "learning_rate": 0.00017971267978451967, "loss": 0.6896, "step": 4363 }, { "epoch": 0.20677564558161574, "grad_norm": 0.515625, "learning_rate": 0.00017970368657453778, "loss": 0.767, "step": 4364 }, { "epoch": 0.2068230277185501, "grad_norm": 0.26953125, "learning_rate": 0.000179694691596813, "loss": 0.1646, "step": 4365 }, { "epoch": 0.2068704098554845, "grad_norm": 0.62890625, "learning_rate": 0.00017968569485154489, "loss": 1.4281, "step": 4366 }, { "epoch": 0.20691779199241886, "grad_norm": 0.7421875, "learning_rate": 0.00017967669633893297, "loss": 0.9969, "step": 4367 }, { "epoch": 0.20696517412935322, "grad_norm": 0.66015625, "learning_rate": 0.00017966769605917674, "loss": 0.8421, "step": 4368 }, { "epoch": 0.20701255626628762, "grad_norm": 0.56640625, "learning_rate": 0.0001796586940124759, "loss": 0.9112, "step": 4369 }, { "epoch": 0.20705993840322198, "grad_norm": 0.5390625, "learning_rate": 0.00017964969019903005, "loss": 0.5215, "step": 4370 }, { "epoch": 0.20710732054015635, "grad_norm": 0.65234375, "learning_rate": 0.00017964068461903896, "loss": 0.8646, "step": 4371 }, { "epoch": 0.20715470267709074, "grad_norm": 0.8984375, "learning_rate": 0.00017963167727270225, "loss": 0.1278, "step": 4372 }, { "epoch": 0.2072020848140251, "grad_norm": 0.75, "learning_rate": 0.0001796226681602198, "loss": 1.0172, "step": 4373 }, { "epoch": 0.2072494669509595, "grad_norm": 0.6328125, "learning_rate": 0.00017961365728179138, "loss": 0.7388, "step": 4374 }, { "epoch": 0.20729684908789386, "grad_norm": 0.65625, "learning_rate": 0.00017960464463761682, "loss": 0.9097, "step": 4375 }, { "epoch": 0.20734423122482823, "grad_norm": 1.9609375, "learning_rate": 0.00017959563022789605, "loss": 0.4247, "step": 4376 }, { "epoch": 0.20739161336176262, "grad_norm": 0.6953125, "learning_rate": 0.00017958661405282898, "loss": 0.4378, "step": 4377 }, { "epoch": 0.207438995498697, "grad_norm": 0.703125, "learning_rate": 0.0001795775961126156, "loss": 0.8121, "step": 4378 }, { "epoch": 0.20748637763563138, "grad_norm": 0.57421875, "learning_rate": 0.00017956857640745585, "loss": 0.1325, "step": 4379 }, { "epoch": 0.20753375977256575, "grad_norm": 0.26953125, "learning_rate": 0.00017955955493754986, "loss": 0.1541, "step": 4380 }, { "epoch": 0.2075811419095001, "grad_norm": 0.53125, "learning_rate": 0.0001795505317030977, "loss": 1.0451, "step": 4381 }, { "epoch": 0.2076285240464345, "grad_norm": 0.419921875, "learning_rate": 0.00017954150670429944, "loss": 0.6229, "step": 4382 }, { "epoch": 0.20767590618336887, "grad_norm": 0.671875, "learning_rate": 0.0001795324799413553, "loss": 1.2359, "step": 4383 }, { "epoch": 0.20772328832030323, "grad_norm": 0.4375, "learning_rate": 0.00017952345141446552, "loss": 0.1026, "step": 4384 }, { "epoch": 0.20777067045723763, "grad_norm": 1.265625, "learning_rate": 0.0001795144211238302, "loss": 0.7769, "step": 4385 }, { "epoch": 0.207818052594172, "grad_norm": 0.65625, "learning_rate": 0.00017950538906964979, "loss": 0.53, "step": 4386 }, { "epoch": 0.20786543473110639, "grad_norm": 0.73828125, "learning_rate": 0.0001794963552521245, "loss": 1.4608, "step": 4387 }, { "epoch": 0.20791281686804075, "grad_norm": 0.57421875, "learning_rate": 0.00017948731967145474, "loss": 1.4962, "step": 4388 }, { "epoch": 0.20796019900497512, "grad_norm": 0.6171875, "learning_rate": 0.0001794782823278409, "loss": 1.2669, "step": 4389 }, { "epoch": 0.2080075811419095, "grad_norm": 0.67578125, "learning_rate": 0.00017946924322148343, "loss": 1.1537, "step": 4390 }, { "epoch": 0.20805496327884387, "grad_norm": 0.546875, "learning_rate": 0.00017946020235258274, "loss": 0.9543, "step": 4391 }, { "epoch": 0.20810234541577824, "grad_norm": 0.67578125, "learning_rate": 0.00017945115972133944, "loss": 0.7522, "step": 4392 }, { "epoch": 0.20814972755271263, "grad_norm": 0.61328125, "learning_rate": 0.000179442115327954, "loss": 0.1398, "step": 4393 }, { "epoch": 0.208197109689647, "grad_norm": 0.66015625, "learning_rate": 0.0001794330691726271, "loss": 0.2923, "step": 4394 }, { "epoch": 0.2082444918265814, "grad_norm": 0.244140625, "learning_rate": 0.00017942402125555935, "loss": 0.1591, "step": 4395 }, { "epoch": 0.20829187396351576, "grad_norm": 0.3515625, "learning_rate": 0.00017941497157695138, "loss": 0.0288, "step": 4396 }, { "epoch": 0.20833925610045012, "grad_norm": 0.68359375, "learning_rate": 0.00017940592013700394, "loss": 1.0994, "step": 4397 }, { "epoch": 0.2083866382373845, "grad_norm": 0.6484375, "learning_rate": 0.00017939686693591776, "loss": 1.5042, "step": 4398 }, { "epoch": 0.20843402037431888, "grad_norm": 0.671875, "learning_rate": 0.00017938781197389365, "loss": 0.4372, "step": 4399 }, { "epoch": 0.20848140251125324, "grad_norm": 0.53515625, "learning_rate": 0.00017937875525113242, "loss": 1.1026, "step": 4400 }, { "epoch": 0.20852878464818764, "grad_norm": 0.5390625, "learning_rate": 0.00017936969676783494, "loss": 0.1716, "step": 4401 }, { "epoch": 0.208576166785122, "grad_norm": 0.0037689208984375, "learning_rate": 0.00017936063652420215, "loss": 0.0003, "step": 4402 }, { "epoch": 0.2086235489220564, "grad_norm": 0.197265625, "learning_rate": 0.00017935157452043495, "loss": 0.0158, "step": 4403 }, { "epoch": 0.20867093105899076, "grad_norm": 0.2099609375, "learning_rate": 0.00017934251075673436, "loss": 0.0206, "step": 4404 }, { "epoch": 0.20871831319592513, "grad_norm": 0.66015625, "learning_rate": 0.0001793334452333014, "loss": 0.8304, "step": 4405 }, { "epoch": 0.20876569533285952, "grad_norm": 0.376953125, "learning_rate": 0.00017932437795033712, "loss": 0.0437, "step": 4406 }, { "epoch": 0.20881307746979388, "grad_norm": 0.7578125, "learning_rate": 0.00017931530890804263, "loss": 1.0193, "step": 4407 }, { "epoch": 0.20886045960672828, "grad_norm": 0.65625, "learning_rate": 0.0001793062381066191, "loss": 0.4923, "step": 4408 }, { "epoch": 0.20890784174366264, "grad_norm": 1.109375, "learning_rate": 0.00017929716554626764, "loss": 0.4406, "step": 4409 }, { "epoch": 0.208955223880597, "grad_norm": 0.578125, "learning_rate": 0.00017928809122718953, "loss": 0.8709, "step": 4410 }, { "epoch": 0.2090026060175314, "grad_norm": 0.65625, "learning_rate": 0.000179279015149586, "loss": 0.6877, "step": 4411 }, { "epoch": 0.20904998815446577, "grad_norm": 0.142578125, "learning_rate": 0.00017926993731365838, "loss": 0.0178, "step": 4412 }, { "epoch": 0.20909737029140013, "grad_norm": 0.578125, "learning_rate": 0.00017926085771960797, "loss": 0.9612, "step": 4413 }, { "epoch": 0.20914475242833452, "grad_norm": 0.55859375, "learning_rate": 0.00017925177636763615, "loss": 0.3026, "step": 4414 }, { "epoch": 0.2091921345652689, "grad_norm": 0.52734375, "learning_rate": 0.00017924269325794435, "loss": 0.6007, "step": 4415 }, { "epoch": 0.20923951670220328, "grad_norm": 0.6171875, "learning_rate": 0.00017923360839073406, "loss": 0.7906, "step": 4416 }, { "epoch": 0.20928689883913765, "grad_norm": 0.5703125, "learning_rate": 0.00017922452176620667, "loss": 0.7725, "step": 4417 }, { "epoch": 0.209334280976072, "grad_norm": 0.8671875, "learning_rate": 0.0001792154333845638, "loss": 1.5173, "step": 4418 }, { "epoch": 0.2093816631130064, "grad_norm": 0.013671875, "learning_rate": 0.000179206343246007, "loss": 0.0007, "step": 4419 }, { "epoch": 0.20942904524994077, "grad_norm": 0.65625, "learning_rate": 0.00017919725135073788, "loss": 0.9344, "step": 4420 }, { "epoch": 0.20947642738687514, "grad_norm": 0.69921875, "learning_rate": 0.00017918815769895808, "loss": 0.0335, "step": 4421 }, { "epoch": 0.20952380952380953, "grad_norm": 0.2431640625, "learning_rate": 0.00017917906229086925, "loss": 0.0333, "step": 4422 }, { "epoch": 0.2095711916607439, "grad_norm": 0.55078125, "learning_rate": 0.0001791699651266732, "loss": 1.1535, "step": 4423 }, { "epoch": 0.2096185737976783, "grad_norm": 0.61328125, "learning_rate": 0.00017916086620657165, "loss": 1.2566, "step": 4424 }, { "epoch": 0.20966595593461265, "grad_norm": 0.60546875, "learning_rate": 0.0001791517655307664, "loss": 0.9216, "step": 4425 }, { "epoch": 0.20971333807154702, "grad_norm": 0.330078125, "learning_rate": 0.00017914266309945927, "loss": 0.033, "step": 4426 }, { "epoch": 0.2097607202084814, "grad_norm": 0.423828125, "learning_rate": 0.0001791335589128522, "loss": 0.6292, "step": 4427 }, { "epoch": 0.20980810234541578, "grad_norm": 0.51171875, "learning_rate": 0.00017912445297114707, "loss": 0.8958, "step": 4428 }, { "epoch": 0.20985548448235014, "grad_norm": 0.6875, "learning_rate": 0.00017911534527454586, "loss": 0.7853, "step": 4429 }, { "epoch": 0.20990286661928453, "grad_norm": 0.546875, "learning_rate": 0.00017910623582325058, "loss": 1.0217, "step": 4430 }, { "epoch": 0.2099502487562189, "grad_norm": 0.640625, "learning_rate": 0.0001790971246174632, "loss": 0.9569, "step": 4431 }, { "epoch": 0.2099976308931533, "grad_norm": 0.671875, "learning_rate": 0.0001790880116573859, "loss": 1.152, "step": 4432 }, { "epoch": 0.21004501303008766, "grad_norm": 0.69921875, "learning_rate": 0.0001790788969432207, "loss": 0.9903, "step": 4433 }, { "epoch": 0.21009239516702202, "grad_norm": 0.53515625, "learning_rate": 0.00017906978047516983, "loss": 0.6095, "step": 4434 }, { "epoch": 0.21013977730395642, "grad_norm": 0.58203125, "learning_rate": 0.00017906066225343542, "loss": 0.7025, "step": 4435 }, { "epoch": 0.21018715944089078, "grad_norm": 0.2041015625, "learning_rate": 0.00017905154227821978, "loss": 0.0122, "step": 4436 }, { "epoch": 0.21023454157782517, "grad_norm": 0.68359375, "learning_rate": 0.0001790424205497251, "loss": 1.0213, "step": 4437 }, { "epoch": 0.21028192371475954, "grad_norm": 0.76171875, "learning_rate": 0.00017903329706815373, "loss": 1.4509, "step": 4438 }, { "epoch": 0.2103293058516939, "grad_norm": 0.58984375, "learning_rate": 0.000179024171833708, "loss": 0.7588, "step": 4439 }, { "epoch": 0.2103766879886283, "grad_norm": 0.5234375, "learning_rate": 0.00017901504484659034, "loss": 1.0901, "step": 4440 }, { "epoch": 0.21042407012556266, "grad_norm": 0.55859375, "learning_rate": 0.00017900591610700312, "loss": 1.1255, "step": 4441 }, { "epoch": 0.21047145226249703, "grad_norm": 0.64453125, "learning_rate": 0.00017899678561514884, "loss": 0.5801, "step": 4442 }, { "epoch": 0.21051883439943142, "grad_norm": 0.66015625, "learning_rate": 0.00017898765337123003, "loss": 0.9075, "step": 4443 }, { "epoch": 0.21056621653636579, "grad_norm": 0.58203125, "learning_rate": 0.00017897851937544916, "loss": 0.7969, "step": 4444 }, { "epoch": 0.21061359867330018, "grad_norm": 0.55859375, "learning_rate": 0.00017896938362800885, "loss": 0.7594, "step": 4445 }, { "epoch": 0.21066098081023454, "grad_norm": 0.86328125, "learning_rate": 0.00017896024612911178, "loss": 0.959, "step": 4446 }, { "epoch": 0.2107083629471689, "grad_norm": 0.60546875, "learning_rate": 0.00017895110687896052, "loss": 1.2508, "step": 4447 }, { "epoch": 0.2107557450841033, "grad_norm": 0.60546875, "learning_rate": 0.0001789419658777578, "loss": 0.6613, "step": 4448 }, { "epoch": 0.21080312722103767, "grad_norm": 0.71875, "learning_rate": 0.00017893282312570635, "loss": 1.3908, "step": 4449 }, { "epoch": 0.21085050935797203, "grad_norm": 0.78125, "learning_rate": 0.000178923678623009, "loss": 1.1284, "step": 4450 }, { "epoch": 0.21089789149490643, "grad_norm": 0.5625, "learning_rate": 0.00017891453236986847, "loss": 0.7242, "step": 4451 }, { "epoch": 0.2109452736318408, "grad_norm": 0.5859375, "learning_rate": 0.0001789053843664877, "loss": 1.0444, "step": 4452 }, { "epoch": 0.21099265576877518, "grad_norm": 0.6328125, "learning_rate": 0.00017889623461306956, "loss": 0.8223, "step": 4453 }, { "epoch": 0.21104003790570955, "grad_norm": 0.2734375, "learning_rate": 0.00017888708310981696, "loss": 0.1471, "step": 4454 }, { "epoch": 0.21108742004264391, "grad_norm": 0.68359375, "learning_rate": 0.0001788779298569329, "loss": 1.1383, "step": 4455 }, { "epoch": 0.2111348021795783, "grad_norm": 0.62109375, "learning_rate": 0.00017886877485462038, "loss": 0.0846, "step": 4456 }, { "epoch": 0.21118218431651267, "grad_norm": 0.86328125, "learning_rate": 0.00017885961810308238, "loss": 0.1903, "step": 4457 }, { "epoch": 0.21122956645344704, "grad_norm": 0.6796875, "learning_rate": 0.0001788504596025221, "loss": 0.0582, "step": 4458 }, { "epoch": 0.21127694859038143, "grad_norm": 0.57421875, "learning_rate": 0.00017884129935314262, "loss": 1.2312, "step": 4459 }, { "epoch": 0.2113243307273158, "grad_norm": 0.62890625, "learning_rate": 0.00017883213735514706, "loss": 0.8433, "step": 4460 }, { "epoch": 0.2113717128642502, "grad_norm": 0.71484375, "learning_rate": 0.0001788229736087387, "loss": 0.9221, "step": 4461 }, { "epoch": 0.21141909500118455, "grad_norm": 0.486328125, "learning_rate": 0.00017881380811412073, "loss": 0.8076, "step": 4462 }, { "epoch": 0.21146647713811892, "grad_norm": 0.5390625, "learning_rate": 0.00017880464087149643, "loss": 1.1539, "step": 4463 }, { "epoch": 0.2115138592750533, "grad_norm": 0.5859375, "learning_rate": 0.00017879547188106914, "loss": 0.7532, "step": 4464 }, { "epoch": 0.21156124141198768, "grad_norm": 0.58984375, "learning_rate": 0.00017878630114304223, "loss": 0.9691, "step": 4465 }, { "epoch": 0.21160862354892207, "grad_norm": 0.4453125, "learning_rate": 0.00017877712865761906, "loss": 0.5623, "step": 4466 }, { "epoch": 0.21165600568585644, "grad_norm": 0.56640625, "learning_rate": 0.0001787679544250031, "loss": 1.3412, "step": 4467 }, { "epoch": 0.2117033878227908, "grad_norm": 0.0810546875, "learning_rate": 0.0001787587784453978, "loss": 0.0055, "step": 4468 }, { "epoch": 0.2117507699597252, "grad_norm": 0.69921875, "learning_rate": 0.00017874960071900668, "loss": 1.0678, "step": 4469 }, { "epoch": 0.21179815209665956, "grad_norm": 0.65234375, "learning_rate": 0.0001787404212460333, "loss": 1.2227, "step": 4470 }, { "epoch": 0.21184553423359392, "grad_norm": 0.21484375, "learning_rate": 0.00017873124002668126, "loss": 0.1423, "step": 4471 }, { "epoch": 0.21189291637052832, "grad_norm": 0.57421875, "learning_rate": 0.00017872205706115417, "loss": 0.7837, "step": 4472 }, { "epoch": 0.21194029850746268, "grad_norm": 0.46875, "learning_rate": 0.0001787128723496557, "loss": 0.4995, "step": 4473 }, { "epoch": 0.21198768064439708, "grad_norm": 0.6484375, "learning_rate": 0.00017870368589238957, "loss": 1.6933, "step": 4474 }, { "epoch": 0.21203506278133144, "grad_norm": 0.2294921875, "learning_rate": 0.00017869449768955953, "loss": 0.0053, "step": 4475 }, { "epoch": 0.2120824449182658, "grad_norm": 0.58203125, "learning_rate": 0.00017868530774136932, "loss": 1.1833, "step": 4476 }, { "epoch": 0.2121298270552002, "grad_norm": 0.75390625, "learning_rate": 0.00017867611604802282, "loss": 0.7067, "step": 4477 }, { "epoch": 0.21217720919213456, "grad_norm": 1.5703125, "learning_rate": 0.00017866692260972388, "loss": 0.8348, "step": 4478 }, { "epoch": 0.21222459132906893, "grad_norm": 0.0303955078125, "learning_rate": 0.00017865772742667636, "loss": 0.0019, "step": 4479 }, { "epoch": 0.21227197346600332, "grad_norm": 0.62890625, "learning_rate": 0.00017864853049908422, "loss": 0.9081, "step": 4480 }, { "epoch": 0.2123193556029377, "grad_norm": 0.5546875, "learning_rate": 0.00017863933182715147, "loss": 0.5684, "step": 4481 }, { "epoch": 0.21236673773987208, "grad_norm": 0.69921875, "learning_rate": 0.0001786301314110821, "loss": 0.9752, "step": 4482 }, { "epoch": 0.21241411987680645, "grad_norm": 0.5234375, "learning_rate": 0.00017862092925108016, "loss": 0.0964, "step": 4483 }, { "epoch": 0.2124615020137408, "grad_norm": 0.1884765625, "learning_rate": 0.00017861172534734977, "loss": 0.0359, "step": 4484 }, { "epoch": 0.2125088841506752, "grad_norm": 0.671875, "learning_rate": 0.00017860251970009503, "loss": 1.3729, "step": 4485 }, { "epoch": 0.21255626628760957, "grad_norm": 0.640625, "learning_rate": 0.00017859331230952013, "loss": 0.6791, "step": 4486 }, { "epoch": 0.21260364842454393, "grad_norm": 0.6875, "learning_rate": 0.00017858410317582929, "loss": 0.2489, "step": 4487 }, { "epoch": 0.21265103056147833, "grad_norm": 0.52734375, "learning_rate": 0.0001785748922992267, "loss": 0.7125, "step": 4488 }, { "epoch": 0.2126984126984127, "grad_norm": 0.5234375, "learning_rate": 0.00017856567967991675, "loss": 0.8741, "step": 4489 }, { "epoch": 0.21274579483534709, "grad_norm": 0.640625, "learning_rate": 0.00017855646531810367, "loss": 0.2095, "step": 4490 }, { "epoch": 0.21279317697228145, "grad_norm": 0.890625, "learning_rate": 0.0001785472492139919, "loss": 0.9846, "step": 4491 }, { "epoch": 0.21284055910921582, "grad_norm": 0.6640625, "learning_rate": 0.0001785380313677858, "loss": 0.8484, "step": 4492 }, { "epoch": 0.2128879412461502, "grad_norm": 0.5234375, "learning_rate": 0.00017852881177968978, "loss": 0.8543, "step": 4493 }, { "epoch": 0.21293532338308457, "grad_norm": 0.62890625, "learning_rate": 0.0001785195904499084, "loss": 1.2075, "step": 4494 }, { "epoch": 0.21298270552001897, "grad_norm": 0.6171875, "learning_rate": 0.00017851036737864614, "loss": 0.9833, "step": 4495 }, { "epoch": 0.21303008765695333, "grad_norm": 0.58203125, "learning_rate": 0.00017850114256610754, "loss": 1.2396, "step": 4496 }, { "epoch": 0.2130774697938877, "grad_norm": 0.1953125, "learning_rate": 0.0001784919160124972, "loss": 0.0141, "step": 4497 }, { "epoch": 0.2131248519308221, "grad_norm": 0.46875, "learning_rate": 0.00017848268771801978, "loss": 0.5485, "step": 4498 }, { "epoch": 0.21317223406775646, "grad_norm": 0.625, "learning_rate": 0.00017847345768287994, "loss": 1.1611, "step": 4499 }, { "epoch": 0.21321961620469082, "grad_norm": 0.671875, "learning_rate": 0.00017846422590728241, "loss": 1.4193, "step": 4500 }, { "epoch": 0.2132669983416252, "grad_norm": 0.6640625, "learning_rate": 0.0001784549923914319, "loss": 1.0991, "step": 4501 }, { "epoch": 0.21331438047855958, "grad_norm": 0.62109375, "learning_rate": 0.00017844575713553325, "loss": 1.0719, "step": 4502 }, { "epoch": 0.21336176261549397, "grad_norm": 0.8984375, "learning_rate": 0.00017843652013979123, "loss": 1.1205, "step": 4503 }, { "epoch": 0.21340914475242834, "grad_norm": 0.5234375, "learning_rate": 0.00017842728140441078, "loss": 0.7693, "step": 4504 }, { "epoch": 0.2134565268893627, "grad_norm": 0.671875, "learning_rate": 0.00017841804092959675, "loss": 1.0125, "step": 4505 }, { "epoch": 0.2135039090262971, "grad_norm": 0.5546875, "learning_rate": 0.00017840879871555408, "loss": 1.2968, "step": 4506 }, { "epoch": 0.21355129116323146, "grad_norm": 0.62109375, "learning_rate": 0.00017839955476248783, "loss": 1.0322, "step": 4507 }, { "epoch": 0.21359867330016583, "grad_norm": 0.55078125, "learning_rate": 0.0001783903090706029, "loss": 0.039, "step": 4508 }, { "epoch": 0.21364605543710022, "grad_norm": 0.65234375, "learning_rate": 0.00017838106164010445, "loss": 1.449, "step": 4509 }, { "epoch": 0.21369343757403458, "grad_norm": 0.359375, "learning_rate": 0.00017837181247119753, "loss": 0.2595, "step": 4510 }, { "epoch": 0.21374081971096898, "grad_norm": 0.52734375, "learning_rate": 0.00017836256156408728, "loss": 0.5309, "step": 4511 }, { "epoch": 0.21378820184790334, "grad_norm": 0.60546875, "learning_rate": 0.0001783533089189789, "loss": 1.3286, "step": 4512 }, { "epoch": 0.2138355839848377, "grad_norm": 0.609375, "learning_rate": 0.0001783440545360776, "loss": 0.9724, "step": 4513 }, { "epoch": 0.2138829661217721, "grad_norm": 0.5234375, "learning_rate": 0.00017833479841558857, "loss": 0.7573, "step": 4514 }, { "epoch": 0.21393034825870647, "grad_norm": 0.80078125, "learning_rate": 0.00017832554055771714, "loss": 0.1716, "step": 4515 }, { "epoch": 0.21397773039564083, "grad_norm": 0.5390625, "learning_rate": 0.0001783162809626687, "loss": 0.066, "step": 4516 }, { "epoch": 0.21402511253257522, "grad_norm": 0.59765625, "learning_rate": 0.00017830701963064856, "loss": 0.9212, "step": 4517 }, { "epoch": 0.2140724946695096, "grad_norm": 0.60546875, "learning_rate": 0.00017829775656186213, "loss": 1.08, "step": 4518 }, { "epoch": 0.21411987680644398, "grad_norm": 0.67578125, "learning_rate": 0.00017828849175651482, "loss": 1.1641, "step": 4519 }, { "epoch": 0.21416725894337835, "grad_norm": 0.78515625, "learning_rate": 0.0001782792252148122, "loss": 0.5539, "step": 4520 }, { "epoch": 0.2142146410803127, "grad_norm": 0.4765625, "learning_rate": 0.00017826995693695974, "loss": 0.356, "step": 4521 }, { "epoch": 0.2142620232172471, "grad_norm": 0.8671875, "learning_rate": 0.00017826068692316298, "loss": 1.1708, "step": 4522 }, { "epoch": 0.21430940535418147, "grad_norm": 0.40625, "learning_rate": 0.00017825141517362756, "loss": 0.0662, "step": 4523 }, { "epoch": 0.21435678749111584, "grad_norm": 0.46484375, "learning_rate": 0.00017824214168855908, "loss": 0.6175, "step": 4524 }, { "epoch": 0.21440416962805023, "grad_norm": 0.91015625, "learning_rate": 0.00017823286646816326, "loss": 0.9617, "step": 4525 }, { "epoch": 0.2144515517649846, "grad_norm": 0.78125, "learning_rate": 0.0001782235895126458, "loss": 1.2364, "step": 4526 }, { "epoch": 0.214498933901919, "grad_norm": 0.62890625, "learning_rate": 0.0001782143108222124, "loss": 1.1671, "step": 4527 }, { "epoch": 0.21454631603885335, "grad_norm": 0.69921875, "learning_rate": 0.00017820503039706894, "loss": 0.1485, "step": 4528 }, { "epoch": 0.21459369817578772, "grad_norm": 0.5546875, "learning_rate": 0.0001781957482374212, "loss": 1.2461, "step": 4529 }, { "epoch": 0.2146410803127221, "grad_norm": 0.80078125, "learning_rate": 0.00017818646434347504, "loss": 0.8072, "step": 4530 }, { "epoch": 0.21468846244965648, "grad_norm": 0.75390625, "learning_rate": 0.0001781771787154364, "loss": 1.2577, "step": 4531 }, { "epoch": 0.21473584458659087, "grad_norm": 0.73828125, "learning_rate": 0.00017816789135351117, "loss": 1.0809, "step": 4532 }, { "epoch": 0.21478322672352523, "grad_norm": 0.2197265625, "learning_rate": 0.0001781586022579054, "loss": 0.0052, "step": 4533 }, { "epoch": 0.2148306088604596, "grad_norm": 0.65625, "learning_rate": 0.00017814931142882506, "loss": 0.7403, "step": 4534 }, { "epoch": 0.214877990997394, "grad_norm": 0.76953125, "learning_rate": 0.00017814001886647628, "loss": 0.9979, "step": 4535 }, { "epoch": 0.21492537313432836, "grad_norm": 0.302734375, "learning_rate": 0.00017813072457106504, "loss": 0.1139, "step": 4536 }, { "epoch": 0.21497275527126272, "grad_norm": 0.69140625, "learning_rate": 0.00017812142854279763, "loss": 1.0816, "step": 4537 }, { "epoch": 0.21502013740819712, "grad_norm": 0.6015625, "learning_rate": 0.00017811213078188008, "loss": 0.0649, "step": 4538 }, { "epoch": 0.21506751954513148, "grad_norm": 0.625, "learning_rate": 0.00017810283128851873, "loss": 0.0291, "step": 4539 }, { "epoch": 0.21511490168206587, "grad_norm": 0.08984375, "learning_rate": 0.00017809353006291974, "loss": 0.0044, "step": 4540 }, { "epoch": 0.21516228381900024, "grad_norm": 0.70703125, "learning_rate": 0.00017808422710528942, "loss": 1.1405, "step": 4541 }, { "epoch": 0.2152096659559346, "grad_norm": 0.57421875, "learning_rate": 0.00017807492241583413, "loss": 0.5905, "step": 4542 }, { "epoch": 0.215257048092869, "grad_norm": 0.64453125, "learning_rate": 0.00017806561599476023, "loss": 0.9847, "step": 4543 }, { "epoch": 0.21530443022980336, "grad_norm": 0.8125, "learning_rate": 0.0001780563078422741, "loss": 0.9864, "step": 4544 }, { "epoch": 0.21535181236673773, "grad_norm": 0.275390625, "learning_rate": 0.00017804699795858224, "loss": 0.1668, "step": 4545 }, { "epoch": 0.21539919450367212, "grad_norm": 0.87109375, "learning_rate": 0.00017803768634389107, "loss": 0.5681, "step": 4546 }, { "epoch": 0.21544657664060649, "grad_norm": 0.66796875, "learning_rate": 0.00017802837299840714, "loss": 0.9761, "step": 4547 }, { "epoch": 0.21549395877754088, "grad_norm": 0.78515625, "learning_rate": 0.00017801905792233703, "loss": 1.0556, "step": 4548 }, { "epoch": 0.21554134091447524, "grad_norm": 0.6171875, "learning_rate": 0.0001780097411158873, "loss": 1.2234, "step": 4549 }, { "epoch": 0.2155887230514096, "grad_norm": 0.640625, "learning_rate": 0.0001780004225792646, "loss": 1.0361, "step": 4550 }, { "epoch": 0.215636105188344, "grad_norm": 0.75, "learning_rate": 0.0001779911023126756, "loss": 0.9324, "step": 4551 }, { "epoch": 0.21568348732527837, "grad_norm": 0.6328125, "learning_rate": 0.00017798178031632703, "loss": 1.1086, "step": 4552 }, { "epoch": 0.21573086946221273, "grad_norm": 0.373046875, "learning_rate": 0.00017797245659042566, "loss": 0.0465, "step": 4553 }, { "epoch": 0.21577825159914713, "grad_norm": 0.546875, "learning_rate": 0.00017796313113517824, "loss": 0.846, "step": 4554 }, { "epoch": 0.2158256337360815, "grad_norm": 0.4453125, "learning_rate": 0.0001779538039507916, "loss": 0.7183, "step": 4555 }, { "epoch": 0.21587301587301588, "grad_norm": 0.53515625, "learning_rate": 0.00017794447503747263, "loss": 0.9674, "step": 4556 }, { "epoch": 0.21592039800995025, "grad_norm": 0.5625, "learning_rate": 0.00017793514439542825, "loss": 0.6377, "step": 4557 }, { "epoch": 0.2159677801468846, "grad_norm": 0.6171875, "learning_rate": 0.00017792581202486535, "loss": 0.7499, "step": 4558 }, { "epoch": 0.216015162283819, "grad_norm": 0.59765625, "learning_rate": 0.00017791647792599093, "loss": 0.9097, "step": 4559 }, { "epoch": 0.21606254442075337, "grad_norm": 1.0, "learning_rate": 0.00017790714209901207, "loss": 1.3563, "step": 4560 }, { "epoch": 0.21610992655768776, "grad_norm": 0.6015625, "learning_rate": 0.00017789780454413573, "loss": 1.0608, "step": 4561 }, { "epoch": 0.21615730869462213, "grad_norm": 0.63671875, "learning_rate": 0.00017788846526156911, "loss": 0.089, "step": 4562 }, { "epoch": 0.2162046908315565, "grad_norm": 0.5078125, "learning_rate": 0.00017787912425151924, "loss": 0.5966, "step": 4563 }, { "epoch": 0.2162520729684909, "grad_norm": 0.8125, "learning_rate": 0.00017786978151419338, "loss": 0.7353, "step": 4564 }, { "epoch": 0.21629945510542525, "grad_norm": 0.2080078125, "learning_rate": 0.0001778604370497987, "loss": 0.1329, "step": 4565 }, { "epoch": 0.21634683724235962, "grad_norm": 0.61328125, "learning_rate": 0.00017785109085854245, "loss": 0.356, "step": 4566 }, { "epoch": 0.216394219379294, "grad_norm": 0.2119140625, "learning_rate": 0.00017784174294063195, "loss": 0.0174, "step": 4567 }, { "epoch": 0.21644160151622838, "grad_norm": 0.75, "learning_rate": 0.00017783239329627454, "loss": 1.3163, "step": 4568 }, { "epoch": 0.21648898365316277, "grad_norm": 0.41015625, "learning_rate": 0.00017782304192567747, "loss": 0.1526, "step": 4569 }, { "epoch": 0.21653636579009714, "grad_norm": 0.9453125, "learning_rate": 0.00017781368882904824, "loss": 0.0569, "step": 4570 }, { "epoch": 0.2165837479270315, "grad_norm": 0.6640625, "learning_rate": 0.00017780433400659432, "loss": 1.3177, "step": 4571 }, { "epoch": 0.2166311300639659, "grad_norm": 0.92578125, "learning_rate": 0.0001777949774585231, "loss": 0.0829, "step": 4572 }, { "epoch": 0.21667851220090026, "grad_norm": 0.62109375, "learning_rate": 0.00017778561918504218, "loss": 0.0749, "step": 4573 }, { "epoch": 0.21672589433783462, "grad_norm": 0.640625, "learning_rate": 0.00017777625918635907, "loss": 1.1553, "step": 4574 }, { "epoch": 0.21677327647476902, "grad_norm": 0.59765625, "learning_rate": 0.00017776689746268137, "loss": 1.1068, "step": 4575 }, { "epoch": 0.21682065861170338, "grad_norm": 0.53515625, "learning_rate": 0.0001777575340142167, "loss": 0.6795, "step": 4576 }, { "epoch": 0.21686804074863777, "grad_norm": 0.734375, "learning_rate": 0.00017774816884117277, "loss": 1.1908, "step": 4577 }, { "epoch": 0.21691542288557214, "grad_norm": 0.58203125, "learning_rate": 0.00017773880194375726, "loss": 1.3618, "step": 4578 }, { "epoch": 0.2169628050225065, "grad_norm": 0.259765625, "learning_rate": 0.00017772943332217792, "loss": 0.0409, "step": 4579 }, { "epoch": 0.2170101871594409, "grad_norm": 0.953125, "learning_rate": 0.00017772006297664256, "loss": 0.6163, "step": 4580 }, { "epoch": 0.21705756929637526, "grad_norm": 0.6015625, "learning_rate": 0.00017771069090735897, "loss": 1.048, "step": 4581 }, { "epoch": 0.21710495143330963, "grad_norm": 0.58984375, "learning_rate": 0.00017770131711453507, "loss": 1.1447, "step": 4582 }, { "epoch": 0.21715233357024402, "grad_norm": 0.58984375, "learning_rate": 0.00017769194159837867, "loss": 1.2723, "step": 4583 }, { "epoch": 0.2171997157071784, "grad_norm": 0.67578125, "learning_rate": 0.00017768256435909778, "loss": 1.3821, "step": 4584 }, { "epoch": 0.21724709784411278, "grad_norm": 0.1865234375, "learning_rate": 0.00017767318539690035, "loss": 0.0339, "step": 4585 }, { "epoch": 0.21729447998104715, "grad_norm": 0.578125, "learning_rate": 0.0001776638047119944, "loss": 0.6054, "step": 4586 }, { "epoch": 0.2173418621179815, "grad_norm": 0.5703125, "learning_rate": 0.000177654422304588, "loss": 1.1137, "step": 4587 }, { "epoch": 0.2173892442549159, "grad_norm": 0.6328125, "learning_rate": 0.00017764503817488923, "loss": 1.4631, "step": 4588 }, { "epoch": 0.21743662639185027, "grad_norm": 1.1015625, "learning_rate": 0.0001776356523231062, "loss": 0.9437, "step": 4589 }, { "epoch": 0.21748400852878466, "grad_norm": 0.4765625, "learning_rate": 0.0001776262647494471, "loss": 0.6411, "step": 4590 }, { "epoch": 0.21753139066571903, "grad_norm": 0.53125, "learning_rate": 0.00017761687545412013, "loss": 0.7975, "step": 4591 }, { "epoch": 0.2175787728026534, "grad_norm": 0.234375, "learning_rate": 0.00017760748443733352, "loss": 0.1838, "step": 4592 }, { "epoch": 0.21762615493958778, "grad_norm": 0.578125, "learning_rate": 0.0001775980916992956, "loss": 1.1014, "step": 4593 }, { "epoch": 0.21767353707652215, "grad_norm": 0.5234375, "learning_rate": 0.00017758869724021463, "loss": 0.7344, "step": 4594 }, { "epoch": 0.21772091921345652, "grad_norm": 0.62109375, "learning_rate": 0.00017757930106029903, "loss": 0.9556, "step": 4595 }, { "epoch": 0.2177683013503909, "grad_norm": 0.578125, "learning_rate": 0.00017756990315975714, "loss": 1.1179, "step": 4596 }, { "epoch": 0.21781568348732527, "grad_norm": 0.474609375, "learning_rate": 0.00017756050353879746, "loss": 0.6082, "step": 4597 }, { "epoch": 0.21786306562425967, "grad_norm": 0.48046875, "learning_rate": 0.0001775511021976284, "loss": 0.6329, "step": 4598 }, { "epoch": 0.21791044776119403, "grad_norm": 0.75, "learning_rate": 0.0001775416991364585, "loss": 1.2488, "step": 4599 }, { "epoch": 0.2179578298981284, "grad_norm": 1.859375, "learning_rate": 0.00017753229435549627, "loss": 1.1134, "step": 4600 }, { "epoch": 0.2180052120350628, "grad_norm": 0.55859375, "learning_rate": 0.00017752288785495037, "loss": 0.9355, "step": 4601 }, { "epoch": 0.21805259417199715, "grad_norm": 1.2734375, "learning_rate": 0.0001775134796350294, "loss": 0.064, "step": 4602 }, { "epoch": 0.21809997630893152, "grad_norm": 1.234375, "learning_rate": 0.000177504069695942, "loss": 0.3083, "step": 4603 }, { "epoch": 0.2181473584458659, "grad_norm": 0.25390625, "learning_rate": 0.0001774946580378969, "loss": 0.1614, "step": 4604 }, { "epoch": 0.21819474058280028, "grad_norm": 0.46484375, "learning_rate": 0.0001774852446611028, "loss": 0.2464, "step": 4605 }, { "epoch": 0.21824212271973467, "grad_norm": 0.6171875, "learning_rate": 0.00017747582956576853, "loss": 0.5314, "step": 4606 }, { "epoch": 0.21828950485666904, "grad_norm": 0.248046875, "learning_rate": 0.0001774664127521029, "loss": 0.0323, "step": 4607 }, { "epoch": 0.2183368869936034, "grad_norm": 0.53515625, "learning_rate": 0.0001774569942203147, "loss": 0.5314, "step": 4608 }, { "epoch": 0.2183842691305378, "grad_norm": 0.57421875, "learning_rate": 0.0001774475739706129, "loss": 0.8986, "step": 4609 }, { "epoch": 0.21843165126747216, "grad_norm": 0.71875, "learning_rate": 0.0001774381520032064, "loss": 1.1637, "step": 4610 }, { "epoch": 0.21847903340440653, "grad_norm": 0.75, "learning_rate": 0.00017742872831830422, "loss": 0.8177, "step": 4611 }, { "epoch": 0.21852641554134092, "grad_norm": 0.62890625, "learning_rate": 0.00017741930291611526, "loss": 1.1722, "step": 4612 }, { "epoch": 0.21857379767827528, "grad_norm": 0.55859375, "learning_rate": 0.00017740987579684863, "loss": 0.8712, "step": 4613 }, { "epoch": 0.21862117981520968, "grad_norm": 0.248046875, "learning_rate": 0.00017740044696071342, "loss": 0.062, "step": 4614 }, { "epoch": 0.21866856195214404, "grad_norm": 0.671875, "learning_rate": 0.00017739101640791875, "loss": 1.4296, "step": 4615 }, { "epoch": 0.2187159440890784, "grad_norm": 0.07568359375, "learning_rate": 0.00017738158413867377, "loss": 0.0048, "step": 4616 }, { "epoch": 0.2187633262260128, "grad_norm": 0.53125, "learning_rate": 0.00017737215015318765, "loss": 0.1394, "step": 4617 }, { "epoch": 0.21881070836294716, "grad_norm": 0.58203125, "learning_rate": 0.00017736271445166965, "loss": 1.2406, "step": 4618 }, { "epoch": 0.21885809049988156, "grad_norm": 0.74609375, "learning_rate": 0.00017735327703432906, "loss": 1.0548, "step": 4619 }, { "epoch": 0.21890547263681592, "grad_norm": 0.6640625, "learning_rate": 0.0001773438379013752, "loss": 1.4781, "step": 4620 }, { "epoch": 0.2189528547737503, "grad_norm": 0.7265625, "learning_rate": 0.00017733439705301738, "loss": 1.6623, "step": 4621 }, { "epoch": 0.21900023691068468, "grad_norm": 0.62109375, "learning_rate": 0.000177324954489465, "loss": 1.0556, "step": 4622 }, { "epoch": 0.21904761904761905, "grad_norm": 0.26953125, "learning_rate": 0.00017731551021092748, "loss": 0.0364, "step": 4623 }, { "epoch": 0.2190950011845534, "grad_norm": 0.380859375, "learning_rate": 0.00017730606421761434, "loss": 0.5981, "step": 4624 }, { "epoch": 0.2191423833214878, "grad_norm": 0.7734375, "learning_rate": 0.00017729661650973502, "loss": 1.2069, "step": 4625 }, { "epoch": 0.21918976545842217, "grad_norm": 0.5859375, "learning_rate": 0.00017728716708749907, "loss": 0.628, "step": 4626 }, { "epoch": 0.21923714759535656, "grad_norm": 0.84765625, "learning_rate": 0.0001772777159511161, "loss": 0.9068, "step": 4627 }, { "epoch": 0.21928452973229093, "grad_norm": 0.57421875, "learning_rate": 0.00017726826310079566, "loss": 0.7564, "step": 4628 }, { "epoch": 0.2193319118692253, "grad_norm": 0.79296875, "learning_rate": 0.00017725880853674744, "loss": 0.256, "step": 4629 }, { "epoch": 0.2193792940061597, "grad_norm": 0.59375, "learning_rate": 0.0001772493522591812, "loss": 0.9717, "step": 4630 }, { "epoch": 0.21942667614309405, "grad_norm": 0.65625, "learning_rate": 0.00017723989426830656, "loss": 0.1078, "step": 4631 }, { "epoch": 0.21947405828002842, "grad_norm": 0.7734375, "learning_rate": 0.00017723043456433334, "loss": 1.1237, "step": 4632 }, { "epoch": 0.2195214404169628, "grad_norm": 0.5703125, "learning_rate": 0.00017722097314747137, "loss": 1.2465, "step": 4633 }, { "epoch": 0.21956882255389717, "grad_norm": 0.6875, "learning_rate": 0.00017721151001793046, "loss": 1.145, "step": 4634 }, { "epoch": 0.21961620469083157, "grad_norm": 0.546875, "learning_rate": 0.0001772020451759205, "loss": 0.5836, "step": 4635 }, { "epoch": 0.21966358682776593, "grad_norm": 0.48046875, "learning_rate": 0.00017719257862165139, "loss": 0.0576, "step": 4636 }, { "epoch": 0.2197109689647003, "grad_norm": 0.6640625, "learning_rate": 0.00017718311035533314, "loss": 1.0329, "step": 4637 }, { "epoch": 0.2197583511016347, "grad_norm": 0.640625, "learning_rate": 0.00017717364037717566, "loss": 1.1943, "step": 4638 }, { "epoch": 0.21980573323856906, "grad_norm": 0.77734375, "learning_rate": 0.0001771641686873891, "loss": 0.9518, "step": 4639 }, { "epoch": 0.21985311537550342, "grad_norm": 0.70703125, "learning_rate": 0.00017715469528618342, "loss": 0.5623, "step": 4640 }, { "epoch": 0.21990049751243781, "grad_norm": 0.31640625, "learning_rate": 0.0001771452201737688, "loss": 0.1291, "step": 4641 }, { "epoch": 0.21994787964937218, "grad_norm": 0.8984375, "learning_rate": 0.00017713574335035542, "loss": 0.7221, "step": 4642 }, { "epoch": 0.21999526178630657, "grad_norm": 0.57421875, "learning_rate": 0.00017712626481615335, "loss": 0.8539, "step": 4643 }, { "epoch": 0.22004264392324094, "grad_norm": 0.67578125, "learning_rate": 0.00017711678457137288, "loss": 1.3783, "step": 4644 }, { "epoch": 0.2200900260601753, "grad_norm": 0.228515625, "learning_rate": 0.0001771073026162243, "loss": 0.0489, "step": 4645 }, { "epoch": 0.2201374081971097, "grad_norm": 0.55078125, "learning_rate": 0.00017709781895091785, "loss": 1.1835, "step": 4646 }, { "epoch": 0.22018479033404406, "grad_norm": 0.53125, "learning_rate": 0.0001770883335756639, "loss": 0.906, "step": 4647 }, { "epoch": 0.22023217247097845, "grad_norm": 0.56640625, "learning_rate": 0.00017707884649067283, "loss": 0.7656, "step": 4648 }, { "epoch": 0.22027955460791282, "grad_norm": 0.224609375, "learning_rate": 0.00017706935769615508, "loss": 0.1213, "step": 4649 }, { "epoch": 0.22032693674484718, "grad_norm": 0.74609375, "learning_rate": 0.00017705986719232102, "loss": 0.9539, "step": 4650 }, { "epoch": 0.22037431888178158, "grad_norm": 0.6328125, "learning_rate": 0.0001770503749793812, "loss": 0.8432, "step": 4651 }, { "epoch": 0.22042170101871594, "grad_norm": 0.71484375, "learning_rate": 0.00017704088105754612, "loss": 0.7951, "step": 4652 }, { "epoch": 0.2204690831556503, "grad_norm": 0.734375, "learning_rate": 0.0001770313854270264, "loss": 0.9206, "step": 4653 }, { "epoch": 0.2205164652925847, "grad_norm": 0.53515625, "learning_rate": 0.00017702188808803255, "loss": 0.8624, "step": 4654 }, { "epoch": 0.22056384742951907, "grad_norm": 0.162109375, "learning_rate": 0.00017701238904077528, "loss": 0.0222, "step": 4655 }, { "epoch": 0.22061122956645346, "grad_norm": 0.71484375, "learning_rate": 0.00017700288828546525, "loss": 0.0418, "step": 4656 }, { "epoch": 0.22065861170338782, "grad_norm": 1.1171875, "learning_rate": 0.00017699338582231316, "loss": 0.0485, "step": 4657 }, { "epoch": 0.2207059938403222, "grad_norm": 0.69921875, "learning_rate": 0.00017698388165152983, "loss": 1.0035, "step": 4658 }, { "epoch": 0.22075337597725658, "grad_norm": 0.62109375, "learning_rate": 0.000176974375773326, "loss": 1.1605, "step": 4659 }, { "epoch": 0.22080075811419095, "grad_norm": 0.609375, "learning_rate": 0.00017696486818791248, "loss": 1.1143, "step": 4660 }, { "epoch": 0.2208481402511253, "grad_norm": 0.318359375, "learning_rate": 0.00017695535889550014, "loss": 0.046, "step": 4661 }, { "epoch": 0.2208955223880597, "grad_norm": 0.5703125, "learning_rate": 0.00017694584789629996, "loss": 0.9403, "step": 4662 }, { "epoch": 0.22094290452499407, "grad_norm": 0.5234375, "learning_rate": 0.0001769363351905228, "loss": 1.1337, "step": 4663 }, { "epoch": 0.22099028666192846, "grad_norm": 0.5625, "learning_rate": 0.00017692682077837966, "loss": 1.2104, "step": 4664 }, { "epoch": 0.22103766879886283, "grad_norm": 0.5078125, "learning_rate": 0.00017691730466008163, "loss": 0.6819, "step": 4665 }, { "epoch": 0.2210850509357972, "grad_norm": 0.640625, "learning_rate": 0.00017690778683583967, "loss": 0.8609, "step": 4666 }, { "epoch": 0.2211324330727316, "grad_norm": 0.67578125, "learning_rate": 0.00017689826730586493, "loss": 1.376, "step": 4667 }, { "epoch": 0.22117981520966595, "grad_norm": 0.71875, "learning_rate": 0.00017688874607036853, "loss": 1.4283, "step": 4668 }, { "epoch": 0.22122719734660032, "grad_norm": 0.98828125, "learning_rate": 0.00017687922312956163, "loss": 0.2257, "step": 4669 }, { "epoch": 0.2212745794835347, "grad_norm": 0.69140625, "learning_rate": 0.00017686969848365545, "loss": 0.0948, "step": 4670 }, { "epoch": 0.22132196162046908, "grad_norm": 0.51171875, "learning_rate": 0.00017686017213286123, "loss": 1.052, "step": 4671 }, { "epoch": 0.22136934375740347, "grad_norm": 0.6875, "learning_rate": 0.00017685064407739026, "loss": 1.2214, "step": 4672 }, { "epoch": 0.22141672589433783, "grad_norm": 0.6640625, "learning_rate": 0.0001768411143174539, "loss": 1.276, "step": 4673 }, { "epoch": 0.2214641080312722, "grad_norm": 0.279296875, "learning_rate": 0.00017683158285326344, "loss": 0.1367, "step": 4674 }, { "epoch": 0.2215114901682066, "grad_norm": 0.80078125, "learning_rate": 0.00017682204968503032, "loss": 1.3712, "step": 4675 }, { "epoch": 0.22155887230514096, "grad_norm": 0.703125, "learning_rate": 0.00017681251481296595, "loss": 1.4865, "step": 4676 }, { "epoch": 0.22160625444207535, "grad_norm": 0.8984375, "learning_rate": 0.00017680297823728183, "loss": 0.5422, "step": 4677 }, { "epoch": 0.22165363657900972, "grad_norm": 0.6640625, "learning_rate": 0.00017679343995818947, "loss": 0.7689, "step": 4678 }, { "epoch": 0.22170101871594408, "grad_norm": 0.2578125, "learning_rate": 0.00017678389997590038, "loss": 0.1221, "step": 4679 }, { "epoch": 0.22174840085287847, "grad_norm": 0.59765625, "learning_rate": 0.00017677435829062618, "loss": 0.6552, "step": 4680 }, { "epoch": 0.22179578298981284, "grad_norm": 0.26953125, "learning_rate": 0.0001767648149025785, "loss": 0.1533, "step": 4681 }, { "epoch": 0.2218431651267472, "grad_norm": 0.64453125, "learning_rate": 0.000176755269811969, "loss": 1.3087, "step": 4682 }, { "epoch": 0.2218905472636816, "grad_norm": 0.341796875, "learning_rate": 0.00017674572301900934, "loss": 0.4479, "step": 4683 }, { "epoch": 0.22193792940061596, "grad_norm": 0.349609375, "learning_rate": 0.00017673617452391134, "loss": 0.0224, "step": 4684 }, { "epoch": 0.22198531153755036, "grad_norm": 0.57421875, "learning_rate": 0.0001767266243268867, "loss": 0.4738, "step": 4685 }, { "epoch": 0.22203269367448472, "grad_norm": 0.90234375, "learning_rate": 0.00017671707242814723, "loss": 0.8659, "step": 4686 }, { "epoch": 0.2220800758114191, "grad_norm": 0.6640625, "learning_rate": 0.00017670751882790486, "loss": 1.4518, "step": 4687 }, { "epoch": 0.22212745794835348, "grad_norm": 0.70703125, "learning_rate": 0.00017669796352637137, "loss": 1.1001, "step": 4688 }, { "epoch": 0.22217484008528784, "grad_norm": 0.69140625, "learning_rate": 0.0001766884065237588, "loss": 0.6323, "step": 4689 }, { "epoch": 0.2222222222222222, "grad_norm": 0.71875, "learning_rate": 0.00017667884782027903, "loss": 1.1724, "step": 4690 }, { "epoch": 0.2222696043591566, "grad_norm": 0.49609375, "learning_rate": 0.0001766692874161441, "loss": 1.0944, "step": 4691 }, { "epoch": 0.22231698649609097, "grad_norm": 0.78515625, "learning_rate": 0.00017665972531156603, "loss": 0.0107, "step": 4692 }, { "epoch": 0.22236436863302536, "grad_norm": 0.71875, "learning_rate": 0.0001766501615067569, "loss": 0.9774, "step": 4693 }, { "epoch": 0.22241175076995973, "grad_norm": 0.61328125, "learning_rate": 0.00017664059600192884, "loss": 1.0095, "step": 4694 }, { "epoch": 0.2224591329068941, "grad_norm": 0.55859375, "learning_rate": 0.00017663102879729401, "loss": 1.158, "step": 4695 }, { "epoch": 0.22250651504382848, "grad_norm": 0.65234375, "learning_rate": 0.00017662145989306456, "loss": 0.918, "step": 4696 }, { "epoch": 0.22255389718076285, "grad_norm": 0.3984375, "learning_rate": 0.00017661188928945275, "loss": 0.0291, "step": 4697 }, { "epoch": 0.22260127931769721, "grad_norm": 0.498046875, "learning_rate": 0.00017660231698667084, "loss": 0.0249, "step": 4698 }, { "epoch": 0.2226486614546316, "grad_norm": 0.11328125, "learning_rate": 0.00017659274298493114, "loss": 0.0116, "step": 4699 }, { "epoch": 0.22269604359156597, "grad_norm": 0.58203125, "learning_rate": 0.00017658316728444597, "loss": 0.6287, "step": 4700 }, { "epoch": 0.22274342572850037, "grad_norm": 0.63671875, "learning_rate": 0.00017657358988542774, "loss": 1.0842, "step": 4701 }, { "epoch": 0.22279080786543473, "grad_norm": 0.72265625, "learning_rate": 0.00017656401078808883, "loss": 0.6734, "step": 4702 }, { "epoch": 0.2228381900023691, "grad_norm": 0.65234375, "learning_rate": 0.00017655442999264174, "loss": 0.7644, "step": 4703 }, { "epoch": 0.2228855721393035, "grad_norm": 0.62109375, "learning_rate": 0.00017654484749929893, "loss": 0.8459, "step": 4704 }, { "epoch": 0.22293295427623785, "grad_norm": 0.60546875, "learning_rate": 0.0001765352633082729, "loss": 0.315, "step": 4705 }, { "epoch": 0.22298033641317225, "grad_norm": 0.5546875, "learning_rate": 0.00017652567741977627, "loss": 0.5902, "step": 4706 }, { "epoch": 0.2230277185501066, "grad_norm": 0.7265625, "learning_rate": 0.00017651608983402164, "loss": 1.2075, "step": 4707 }, { "epoch": 0.22307510068704098, "grad_norm": 0.5703125, "learning_rate": 0.0001765065005512216, "loss": 0.7806, "step": 4708 }, { "epoch": 0.22312248282397537, "grad_norm": 1.109375, "learning_rate": 0.00017649690957158892, "loss": 0.3137, "step": 4709 }, { "epoch": 0.22316986496090974, "grad_norm": 0.703125, "learning_rate": 0.00017648731689533627, "loss": 0.5101, "step": 4710 }, { "epoch": 0.2232172470978441, "grad_norm": 0.6796875, "learning_rate": 0.00017647772252267637, "loss": 0.6607, "step": 4711 }, { "epoch": 0.2232646292347785, "grad_norm": 0.7109375, "learning_rate": 0.00017646812645382201, "loss": 1.1553, "step": 4712 }, { "epoch": 0.22331201137171286, "grad_norm": 0.69140625, "learning_rate": 0.00017645852868898611, "loss": 1.0347, "step": 4713 }, { "epoch": 0.22335939350864725, "grad_norm": 0.609375, "learning_rate": 0.00017644892922838147, "loss": 0.8938, "step": 4714 }, { "epoch": 0.22340677564558162, "grad_norm": 0.66796875, "learning_rate": 0.00017643932807222102, "loss": 1.466, "step": 4715 }, { "epoch": 0.22345415778251598, "grad_norm": 0.58203125, "learning_rate": 0.00017642972522071768, "loss": 0.7273, "step": 4716 }, { "epoch": 0.22350153991945038, "grad_norm": 0.6015625, "learning_rate": 0.00017642012067408444, "loss": 1.5163, "step": 4717 }, { "epoch": 0.22354892205638474, "grad_norm": 0.7578125, "learning_rate": 0.00017641051443253433, "loss": 0.925, "step": 4718 }, { "epoch": 0.2235963041933191, "grad_norm": 0.00823974609375, "learning_rate": 0.00017640090649628042, "loss": 0.0005, "step": 4719 }, { "epoch": 0.2236436863302535, "grad_norm": 0.69140625, "learning_rate": 0.00017639129686553573, "loss": 0.9881, "step": 4720 }, { "epoch": 0.22369106846718786, "grad_norm": 0.46875, "learning_rate": 0.00017638168554051352, "loss": 0.5051, "step": 4721 }, { "epoch": 0.22373845060412226, "grad_norm": 0.62890625, "learning_rate": 0.0001763720725214268, "loss": 0.9316, "step": 4722 }, { "epoch": 0.22378583274105662, "grad_norm": 0.71484375, "learning_rate": 0.0001763624578084889, "loss": 1.0418, "step": 4723 }, { "epoch": 0.223833214877991, "grad_norm": 0.52734375, "learning_rate": 0.00017635284140191302, "loss": 1.1204, "step": 4724 }, { "epoch": 0.22388059701492538, "grad_norm": 0.58984375, "learning_rate": 0.00017634322330191244, "loss": 0.8188, "step": 4725 }, { "epoch": 0.22392797915185975, "grad_norm": 0.56640625, "learning_rate": 0.0001763336035087005, "loss": 1.2027, "step": 4726 }, { "epoch": 0.2239753612887941, "grad_norm": 0.71875, "learning_rate": 0.00017632398202249055, "loss": 1.5133, "step": 4727 }, { "epoch": 0.2240227434257285, "grad_norm": 0.90234375, "learning_rate": 0.00017631435884349594, "loss": 0.3672, "step": 4728 }, { "epoch": 0.22407012556266287, "grad_norm": 0.50390625, "learning_rate": 0.00017630473397193015, "loss": 0.3307, "step": 4729 }, { "epoch": 0.22411750769959726, "grad_norm": 0.5390625, "learning_rate": 0.00017629510740800662, "loss": 0.0769, "step": 4730 }, { "epoch": 0.22416488983653163, "grad_norm": 0.703125, "learning_rate": 0.00017628547915193892, "loss": 1.293, "step": 4731 }, { "epoch": 0.224212271973466, "grad_norm": 0.6953125, "learning_rate": 0.0001762758492039405, "loss": 0.8374, "step": 4732 }, { "epoch": 0.22425965411040039, "grad_norm": 1.1484375, "learning_rate": 0.00017626621756422503, "loss": 0.2328, "step": 4733 }, { "epoch": 0.22430703624733475, "grad_norm": 0.69140625, "learning_rate": 0.00017625658423300606, "loss": 1.0732, "step": 4734 }, { "epoch": 0.22435441838426914, "grad_norm": 0.57421875, "learning_rate": 0.00017624694921049728, "loss": 0.0418, "step": 4735 }, { "epoch": 0.2244018005212035, "grad_norm": 1.65625, "learning_rate": 0.0001762373124969124, "loss": 0.1589, "step": 4736 }, { "epoch": 0.22444918265813787, "grad_norm": 0.69140625, "learning_rate": 0.0001762276740924651, "loss": 1.3373, "step": 4737 }, { "epoch": 0.22449656479507227, "grad_norm": 0.453125, "learning_rate": 0.00017621803399736922, "loss": 0.5409, "step": 4738 }, { "epoch": 0.22454394693200663, "grad_norm": 0.44140625, "learning_rate": 0.00017620839221183852, "loss": 0.2755, "step": 4739 }, { "epoch": 0.224591329068941, "grad_norm": 0.61328125, "learning_rate": 0.00017619874873608685, "loss": 0.9164, "step": 4740 }, { "epoch": 0.2246387112058754, "grad_norm": 0.50390625, "learning_rate": 0.00017618910357032808, "loss": 0.3311, "step": 4741 }, { "epoch": 0.22468609334280976, "grad_norm": 0.8828125, "learning_rate": 0.00017617945671477618, "loss": 1.7412, "step": 4742 }, { "epoch": 0.22473347547974415, "grad_norm": 0.265625, "learning_rate": 0.00017616980816964503, "loss": 0.0396, "step": 4743 }, { "epoch": 0.22478085761667851, "grad_norm": 0.515625, "learning_rate": 0.0001761601579351487, "loss": 0.7049, "step": 4744 }, { "epoch": 0.22482823975361288, "grad_norm": 0.1748046875, "learning_rate": 0.00017615050601150117, "loss": 0.0126, "step": 4745 }, { "epoch": 0.22487562189054727, "grad_norm": 0.73828125, "learning_rate": 0.00017614085239891654, "loss": 1.1157, "step": 4746 }, { "epoch": 0.22492300402748164, "grad_norm": 0.671875, "learning_rate": 0.0001761311970976089, "loss": 0.8671, "step": 4747 }, { "epoch": 0.224970386164416, "grad_norm": 0.2490234375, "learning_rate": 0.00017612154010779242, "loss": 0.0453, "step": 4748 }, { "epoch": 0.2250177683013504, "grad_norm": 0.77734375, "learning_rate": 0.00017611188142968126, "loss": 0.8946, "step": 4749 }, { "epoch": 0.22506515043828476, "grad_norm": 0.21484375, "learning_rate": 0.0001761022210634896, "loss": 0.024, "step": 4750 }, { "epoch": 0.22511253257521915, "grad_norm": 0.6484375, "learning_rate": 0.00017609255900943177, "loss": 1.0729, "step": 4751 }, { "epoch": 0.22515991471215352, "grad_norm": 0.486328125, "learning_rate": 0.000176082895267722, "loss": 0.9089, "step": 4752 }, { "epoch": 0.22520729684908788, "grad_norm": 0.66015625, "learning_rate": 0.0001760732298385747, "loss": 1.1987, "step": 4753 }, { "epoch": 0.22525467898602228, "grad_norm": 0.5234375, "learning_rate": 0.00017606356272220416, "loss": 0.9727, "step": 4754 }, { "epoch": 0.22530206112295664, "grad_norm": 0.36328125, "learning_rate": 0.00017605389391882483, "loss": 0.0066, "step": 4755 }, { "epoch": 0.225349443259891, "grad_norm": 0.412109375, "learning_rate": 0.00017604422342865113, "loss": 0.1331, "step": 4756 }, { "epoch": 0.2253968253968254, "grad_norm": 0.71484375, "learning_rate": 0.00017603455125189758, "loss": 0.9788, "step": 4757 }, { "epoch": 0.22544420753375977, "grad_norm": 0.74609375, "learning_rate": 0.00017602487738877862, "loss": 1.3464, "step": 4758 }, { "epoch": 0.22549158967069416, "grad_norm": 0.6875, "learning_rate": 0.0001760152018395089, "loss": 1.2365, "step": 4759 }, { "epoch": 0.22553897180762852, "grad_norm": 0.59765625, "learning_rate": 0.00017600552460430297, "loss": 0.9028, "step": 4760 }, { "epoch": 0.2255863539445629, "grad_norm": 0.60546875, "learning_rate": 0.00017599584568337546, "loss": 1.0353, "step": 4761 }, { "epoch": 0.22563373608149728, "grad_norm": 0.59765625, "learning_rate": 0.00017598616507694104, "loss": 1.1336, "step": 4762 }, { "epoch": 0.22568111821843165, "grad_norm": 0.80078125, "learning_rate": 0.0001759764827852144, "loss": 0.012, "step": 4763 }, { "epoch": 0.22572850035536604, "grad_norm": 0.6328125, "learning_rate": 0.0001759667988084103, "loss": 0.8005, "step": 4764 }, { "epoch": 0.2257758824923004, "grad_norm": 0.62109375, "learning_rate": 0.00017595711314674352, "loss": 0.0862, "step": 4765 }, { "epoch": 0.22582326462923477, "grad_norm": 0.7265625, "learning_rate": 0.00017594742580042888, "loss": 1.197, "step": 4766 }, { "epoch": 0.22587064676616916, "grad_norm": 0.75, "learning_rate": 0.00017593773676968124, "loss": 0.0649, "step": 4767 }, { "epoch": 0.22591802890310353, "grad_norm": 0.484375, "learning_rate": 0.00017592804605471546, "loss": 0.6548, "step": 4768 }, { "epoch": 0.2259654110400379, "grad_norm": 0.33203125, "learning_rate": 0.00017591835365574654, "loss": 0.013, "step": 4769 }, { "epoch": 0.2260127931769723, "grad_norm": 0.52734375, "learning_rate": 0.00017590865957298934, "loss": 1.1218, "step": 4770 }, { "epoch": 0.22606017531390665, "grad_norm": 0.466796875, "learning_rate": 0.00017589896380665896, "loss": 0.7666, "step": 4771 }, { "epoch": 0.22610755745084105, "grad_norm": 0.58984375, "learning_rate": 0.0001758892663569704, "loss": 0.8846, "step": 4772 }, { "epoch": 0.2261549395877754, "grad_norm": 0.52734375, "learning_rate": 0.00017587956722413877, "loss": 0.6879, "step": 4773 }, { "epoch": 0.22620232172470978, "grad_norm": 0.9296875, "learning_rate": 0.00017586986640837914, "loss": 0.0823, "step": 4774 }, { "epoch": 0.22624970386164417, "grad_norm": 0.66796875, "learning_rate": 0.00017586016390990668, "loss": 1.276, "step": 4775 }, { "epoch": 0.22629708599857853, "grad_norm": 0.41796875, "learning_rate": 0.00017585045972893658, "loss": 0.008, "step": 4776 }, { "epoch": 0.2263444681355129, "grad_norm": 0.57421875, "learning_rate": 0.0001758407538656841, "loss": 0.9002, "step": 4777 }, { "epoch": 0.2263918502724473, "grad_norm": 0.6484375, "learning_rate": 0.00017583104632036447, "loss": 0.7278, "step": 4778 }, { "epoch": 0.22643923240938166, "grad_norm": 1.03125, "learning_rate": 0.00017582133709319298, "loss": 0.2357, "step": 4779 }, { "epoch": 0.22648661454631605, "grad_norm": 0.033203125, "learning_rate": 0.00017581162618438505, "loss": 0.0013, "step": 4780 }, { "epoch": 0.22653399668325042, "grad_norm": 0.2177734375, "learning_rate": 0.00017580191359415595, "loss": 0.0245, "step": 4781 }, { "epoch": 0.22658137882018478, "grad_norm": 0.7109375, "learning_rate": 0.00017579219932272117, "loss": 1.1036, "step": 4782 }, { "epoch": 0.22662876095711917, "grad_norm": 0.59765625, "learning_rate": 0.00017578248337029613, "loss": 0.8106, "step": 4783 }, { "epoch": 0.22667614309405354, "grad_norm": 0.55859375, "learning_rate": 0.00017577276573709635, "loss": 0.772, "step": 4784 }, { "epoch": 0.2267235252309879, "grad_norm": 0.75, "learning_rate": 0.00017576304642333732, "loss": 1.1472, "step": 4785 }, { "epoch": 0.2267709073679223, "grad_norm": 0.3046875, "learning_rate": 0.00017575332542923465, "loss": 0.1988, "step": 4786 }, { "epoch": 0.22681828950485666, "grad_norm": 0.70703125, "learning_rate": 0.00017574360275500386, "loss": 1.0495, "step": 4787 }, { "epoch": 0.22686567164179106, "grad_norm": 0.447265625, "learning_rate": 0.00017573387840086067, "loss": 0.1959, "step": 4788 }, { "epoch": 0.22691305377872542, "grad_norm": 0.6484375, "learning_rate": 0.00017572415236702068, "loss": 0.2046, "step": 4789 }, { "epoch": 0.2269604359156598, "grad_norm": 0.7734375, "learning_rate": 0.0001757144246536997, "loss": 0.9627, "step": 4790 }, { "epoch": 0.22700781805259418, "grad_norm": 0.42578125, "learning_rate": 0.0001757046952611134, "loss": 0.1513, "step": 4791 }, { "epoch": 0.22705520018952854, "grad_norm": 0.2197265625, "learning_rate": 0.0001756949641894776, "loss": 0.1538, "step": 4792 }, { "epoch": 0.22710258232646294, "grad_norm": 0.5390625, "learning_rate": 0.00017568523143900815, "loss": 0.9176, "step": 4793 }, { "epoch": 0.2271499644633973, "grad_norm": 0.7734375, "learning_rate": 0.00017567549700992085, "loss": 0.9416, "step": 4794 }, { "epoch": 0.22719734660033167, "grad_norm": 0.79296875, "learning_rate": 0.00017566576090243164, "loss": 1.1508, "step": 4795 }, { "epoch": 0.22724472873726606, "grad_norm": 0.6640625, "learning_rate": 0.00017565602311675647, "loss": 1.1279, "step": 4796 }, { "epoch": 0.22729211087420043, "grad_norm": 0.51953125, "learning_rate": 0.00017564628365311129, "loss": 0.9059, "step": 4797 }, { "epoch": 0.2273394930111348, "grad_norm": 0.765625, "learning_rate": 0.00017563654251171208, "loss": 0.0381, "step": 4798 }, { "epoch": 0.22738687514806918, "grad_norm": 0.64453125, "learning_rate": 0.00017562679969277496, "loss": 0.2966, "step": 4799 }, { "epoch": 0.22743425728500355, "grad_norm": 0.77734375, "learning_rate": 0.00017561705519651593, "loss": 1.2171, "step": 4800 }, { "epoch": 0.22748163942193794, "grad_norm": 0.546875, "learning_rate": 0.0001756073090231512, "loss": 1.2832, "step": 4801 }, { "epoch": 0.2275290215588723, "grad_norm": 0.462890625, "learning_rate": 0.00017559756117289688, "loss": 0.7064, "step": 4802 }, { "epoch": 0.22757640369580667, "grad_norm": 0.66796875, "learning_rate": 0.00017558781164596918, "loss": 0.7603, "step": 4803 }, { "epoch": 0.22762378583274107, "grad_norm": 0.78515625, "learning_rate": 0.00017557806044258433, "loss": 0.9623, "step": 4804 }, { "epoch": 0.22767116796967543, "grad_norm": 0.69140625, "learning_rate": 0.00017556830756295858, "loss": 1.2106, "step": 4805 }, { "epoch": 0.2277185501066098, "grad_norm": 0.57421875, "learning_rate": 0.00017555855300730827, "loss": 1.2195, "step": 4806 }, { "epoch": 0.2277659322435442, "grad_norm": 0.703125, "learning_rate": 0.00017554879677584973, "loss": 0.9812, "step": 4807 }, { "epoch": 0.22781331438047855, "grad_norm": 0.578125, "learning_rate": 0.00017553903886879937, "loss": 0.0847, "step": 4808 }, { "epoch": 0.22786069651741295, "grad_norm": 0.66796875, "learning_rate": 0.0001755292792863736, "loss": 1.2341, "step": 4809 }, { "epoch": 0.2279080786543473, "grad_norm": 0.58203125, "learning_rate": 0.00017551951802878882, "loss": 0.7845, "step": 4810 }, { "epoch": 0.22795546079128168, "grad_norm": 0.2578125, "learning_rate": 0.00017550975509626163, "loss": 0.1396, "step": 4811 }, { "epoch": 0.22800284292821607, "grad_norm": 0.67578125, "learning_rate": 0.00017549999048900846, "loss": 1.2626, "step": 4812 }, { "epoch": 0.22805022506515044, "grad_norm": 0.6015625, "learning_rate": 0.0001754902242072459, "loss": 1.0554, "step": 4813 }, { "epoch": 0.2280976072020848, "grad_norm": 0.474609375, "learning_rate": 0.0001754804562511906, "loss": 0.6975, "step": 4814 }, { "epoch": 0.2281449893390192, "grad_norm": 0.79296875, "learning_rate": 0.0001754706866210592, "loss": 0.1331, "step": 4815 }, { "epoch": 0.22819237147595356, "grad_norm": 0.6953125, "learning_rate": 0.00017546091531706832, "loss": 1.4851, "step": 4816 }, { "epoch": 0.22823975361288795, "grad_norm": 0.5234375, "learning_rate": 0.00017545114233943472, "loss": 0.8983, "step": 4817 }, { "epoch": 0.22828713574982232, "grad_norm": 0.72265625, "learning_rate": 0.0001754413676883752, "loss": 1.0767, "step": 4818 }, { "epoch": 0.22833451788675668, "grad_norm": 0.75, "learning_rate": 0.00017543159136410643, "loss": 1.3045, "step": 4819 }, { "epoch": 0.22838190002369108, "grad_norm": 0.73046875, "learning_rate": 0.00017542181336684535, "loss": 0.7192, "step": 4820 }, { "epoch": 0.22842928216062544, "grad_norm": 0.59375, "learning_rate": 0.00017541203369680875, "loss": 0.5849, "step": 4821 }, { "epoch": 0.2284766642975598, "grad_norm": 0.59765625, "learning_rate": 0.0001754022523542136, "loss": 0.9732, "step": 4822 }, { "epoch": 0.2285240464344942, "grad_norm": 0.69140625, "learning_rate": 0.00017539246933927682, "loss": 1.1261, "step": 4823 }, { "epoch": 0.22857142857142856, "grad_norm": 0.6171875, "learning_rate": 0.00017538268465221534, "loss": 0.9075, "step": 4824 }, { "epoch": 0.22861881070836296, "grad_norm": 0.1845703125, "learning_rate": 0.00017537289829324624, "loss": 0.134, "step": 4825 }, { "epoch": 0.22866619284529732, "grad_norm": 0.671875, "learning_rate": 0.00017536311026258652, "loss": 0.6335, "step": 4826 }, { "epoch": 0.2287135749822317, "grad_norm": 0.466796875, "learning_rate": 0.00017535332056045332, "loss": 0.5317, "step": 4827 }, { "epoch": 0.22876095711916608, "grad_norm": 0.640625, "learning_rate": 0.0001753435291870637, "loss": 0.789, "step": 4828 }, { "epoch": 0.22880833925610045, "grad_norm": 0.7734375, "learning_rate": 0.00017533373614263487, "loss": 1.2605, "step": 4829 }, { "epoch": 0.22885572139303484, "grad_norm": 0.236328125, "learning_rate": 0.000175323941427384, "loss": 0.1808, "step": 4830 }, { "epoch": 0.2289031035299692, "grad_norm": 0.11962890625, "learning_rate": 0.00017531414504152833, "loss": 0.01, "step": 4831 }, { "epoch": 0.22895048566690357, "grad_norm": 0.55078125, "learning_rate": 0.00017530434698528516, "loss": 0.9347, "step": 4832 }, { "epoch": 0.22899786780383796, "grad_norm": 0.6015625, "learning_rate": 0.00017529454725887178, "loss": 0.5792, "step": 4833 }, { "epoch": 0.22904524994077233, "grad_norm": 0.76171875, "learning_rate": 0.00017528474586250554, "loss": 1.219, "step": 4834 }, { "epoch": 0.2290926320777067, "grad_norm": 0.6328125, "learning_rate": 0.00017527494279640383, "loss": 0.7787, "step": 4835 }, { "epoch": 0.22914001421464109, "grad_norm": 0.5078125, "learning_rate": 0.00017526513806078407, "loss": 0.8009, "step": 4836 }, { "epoch": 0.22918739635157545, "grad_norm": 0.57421875, "learning_rate": 0.00017525533165586374, "loss": 1.0709, "step": 4837 }, { "epoch": 0.22923477848850984, "grad_norm": 0.63671875, "learning_rate": 0.00017524552358186027, "loss": 0.8568, "step": 4838 }, { "epoch": 0.2292821606254442, "grad_norm": 0.12890625, "learning_rate": 0.00017523571383899127, "loss": 0.012, "step": 4839 }, { "epoch": 0.22932954276237857, "grad_norm": 0.76953125, "learning_rate": 0.00017522590242747426, "loss": 1.3154, "step": 4840 }, { "epoch": 0.22937692489931297, "grad_norm": 0.609375, "learning_rate": 0.00017521608934752684, "loss": 0.8839, "step": 4841 }, { "epoch": 0.22942430703624733, "grad_norm": 0.400390625, "learning_rate": 0.00017520627459936673, "loss": 0.1537, "step": 4842 }, { "epoch": 0.2294716891731817, "grad_norm": 0.5625, "learning_rate": 0.0001751964581832115, "loss": 1.0254, "step": 4843 }, { "epoch": 0.2295190713101161, "grad_norm": 0.63671875, "learning_rate": 0.00017518664009927895, "loss": 0.2249, "step": 4844 }, { "epoch": 0.22956645344705046, "grad_norm": 0.5703125, "learning_rate": 0.0001751768203477868, "loss": 1.1852, "step": 4845 }, { "epoch": 0.22961383558398485, "grad_norm": 0.56640625, "learning_rate": 0.00017516699892895286, "loss": 0.4592, "step": 4846 }, { "epoch": 0.22966121772091921, "grad_norm": 0.53515625, "learning_rate": 0.00017515717584299493, "loss": 1.0775, "step": 4847 }, { "epoch": 0.22970859985785358, "grad_norm": 0.57421875, "learning_rate": 0.0001751473510901309, "loss": 0.9325, "step": 4848 }, { "epoch": 0.22975598199478797, "grad_norm": 0.30859375, "learning_rate": 0.00017513752467057867, "loss": 0.1666, "step": 4849 }, { "epoch": 0.22980336413172234, "grad_norm": 0.67578125, "learning_rate": 0.00017512769658455617, "loss": 0.8059, "step": 4850 }, { "epoch": 0.2298507462686567, "grad_norm": 0.28125, "learning_rate": 0.0001751178668322814, "loss": 0.1496, "step": 4851 }, { "epoch": 0.2298981284055911, "grad_norm": 0.6875, "learning_rate": 0.00017510803541397234, "loss": 1.0282, "step": 4852 }, { "epoch": 0.22994551054252546, "grad_norm": 0.59375, "learning_rate": 0.00017509820232984705, "loss": 0.0131, "step": 4853 }, { "epoch": 0.22999289267945985, "grad_norm": 0.69921875, "learning_rate": 0.00017508836758012365, "loss": 1.2326, "step": 4854 }, { "epoch": 0.23004027481639422, "grad_norm": 0.6875, "learning_rate": 0.00017507853116502023, "loss": 1.0969, "step": 4855 }, { "epoch": 0.23008765695332858, "grad_norm": 0.65234375, "learning_rate": 0.00017506869308475494, "loss": 1.248, "step": 4856 }, { "epoch": 0.23013503909026298, "grad_norm": 0.44921875, "learning_rate": 0.000175058853339546, "loss": 0.1875, "step": 4857 }, { "epoch": 0.23018242122719734, "grad_norm": 0.47265625, "learning_rate": 0.00017504901192961163, "loss": 0.5449, "step": 4858 }, { "epoch": 0.23022980336413174, "grad_norm": 0.828125, "learning_rate": 0.00017503916885517015, "loss": 1.0936, "step": 4859 }, { "epoch": 0.2302771855010661, "grad_norm": 0.09619140625, "learning_rate": 0.0001750293241164398, "loss": 0.0034, "step": 4860 }, { "epoch": 0.23032456763800047, "grad_norm": 0.8671875, "learning_rate": 0.00017501947771363896, "loss": 0.1314, "step": 4861 }, { "epoch": 0.23037194977493486, "grad_norm": 0.6875, "learning_rate": 0.00017500962964698603, "loss": 1.2821, "step": 4862 }, { "epoch": 0.23041933191186922, "grad_norm": 0.56640625, "learning_rate": 0.0001749997799166994, "loss": 0.0218, "step": 4863 }, { "epoch": 0.2304667140488036, "grad_norm": 0.7890625, "learning_rate": 0.00017498992852299757, "loss": 0.8939, "step": 4864 }, { "epoch": 0.23051409618573798, "grad_norm": 0.68359375, "learning_rate": 0.00017498007546609895, "loss": 1.309, "step": 4865 }, { "epoch": 0.23056147832267235, "grad_norm": 0.6875, "learning_rate": 0.00017497022074622215, "loss": 0.9838, "step": 4866 }, { "epoch": 0.23060886045960674, "grad_norm": 0.58203125, "learning_rate": 0.0001749603643635857, "loss": 1.042, "step": 4867 }, { "epoch": 0.2306562425965411, "grad_norm": 0.0213623046875, "learning_rate": 0.0001749505063184082, "loss": 0.0017, "step": 4868 }, { "epoch": 0.23070362473347547, "grad_norm": 0.74609375, "learning_rate": 0.00017494064661090835, "loss": 0.8515, "step": 4869 }, { "epoch": 0.23075100687040986, "grad_norm": 0.53515625, "learning_rate": 0.00017493078524130474, "loss": 1.0213, "step": 4870 }, { "epoch": 0.23079838900734423, "grad_norm": 0.35546875, "learning_rate": 0.00017492092220981612, "loss": 0.1837, "step": 4871 }, { "epoch": 0.2308457711442786, "grad_norm": 0.498046875, "learning_rate": 0.0001749110575166613, "loss": 0.3354, "step": 4872 }, { "epoch": 0.230893153281213, "grad_norm": 0.010009765625, "learning_rate": 0.00017490119116205896, "loss": 0.0006, "step": 4873 }, { "epoch": 0.23094053541814735, "grad_norm": 0.59375, "learning_rate": 0.000174891323146228, "loss": 0.6591, "step": 4874 }, { "epoch": 0.23098791755508175, "grad_norm": 0.20703125, "learning_rate": 0.00017488145346938728, "loss": 0.0333, "step": 4875 }, { "epoch": 0.2310352996920161, "grad_norm": 0.625, "learning_rate": 0.00017487158213175564, "loss": 1.029, "step": 4876 }, { "epoch": 0.23108268182895048, "grad_norm": 0.546875, "learning_rate": 0.00017486170913355212, "loss": 0.6199, "step": 4877 }, { "epoch": 0.23113006396588487, "grad_norm": 0.68359375, "learning_rate": 0.0001748518344749956, "loss": 0.8363, "step": 4878 }, { "epoch": 0.23117744610281923, "grad_norm": 0.041748046875, "learning_rate": 0.0001748419581563051, "loss": 0.0018, "step": 4879 }, { "epoch": 0.2312248282397536, "grad_norm": 0.57421875, "learning_rate": 0.0001748320801776997, "loss": 1.0243, "step": 4880 }, { "epoch": 0.231272210376688, "grad_norm": 0.48828125, "learning_rate": 0.0001748222005393985, "loss": 1.0112, "step": 4881 }, { "epoch": 0.23131959251362236, "grad_norm": 0.361328125, "learning_rate": 0.00017481231924162054, "loss": 0.035, "step": 4882 }, { "epoch": 0.23136697465055675, "grad_norm": 0.2265625, "learning_rate": 0.00017480243628458504, "loss": 0.0418, "step": 4883 }, { "epoch": 0.23141435678749112, "grad_norm": 0.546875, "learning_rate": 0.0001747925516685112, "loss": 0.6955, "step": 4884 }, { "epoch": 0.23146173892442548, "grad_norm": 0.3203125, "learning_rate": 0.0001747826653936182, "loss": 0.1441, "step": 4885 }, { "epoch": 0.23150912106135987, "grad_norm": 0.01904296875, "learning_rate": 0.00017477277746012537, "loss": 0.0011, "step": 4886 }, { "epoch": 0.23155650319829424, "grad_norm": 0.451171875, "learning_rate": 0.00017476288786825195, "loss": 0.4778, "step": 4887 }, { "epoch": 0.23160388533522863, "grad_norm": 0.58203125, "learning_rate": 0.0001747529966182173, "loss": 0.5974, "step": 4888 }, { "epoch": 0.231651267472163, "grad_norm": 0.69140625, "learning_rate": 0.00017474310371024085, "loss": 1.2586, "step": 4889 }, { "epoch": 0.23169864960909736, "grad_norm": 0.58984375, "learning_rate": 0.00017473320914454193, "loss": 0.9656, "step": 4890 }, { "epoch": 0.23174603174603176, "grad_norm": 0.58203125, "learning_rate": 0.0001747233129213401, "loss": 1.1482, "step": 4891 }, { "epoch": 0.23179341388296612, "grad_norm": 0.88671875, "learning_rate": 0.00017471341504085472, "loss": 0.1904, "step": 4892 }, { "epoch": 0.23184079601990049, "grad_norm": 0.6875, "learning_rate": 0.0001747035155033054, "loss": 0.7755, "step": 4893 }, { "epoch": 0.23188817815683488, "grad_norm": 0.458984375, "learning_rate": 0.00017469361430891167, "loss": 0.0128, "step": 4894 }, { "epoch": 0.23193556029376924, "grad_norm": 0.5, "learning_rate": 0.00017468371145789314, "loss": 0.1607, "step": 4895 }, { "epoch": 0.23198294243070364, "grad_norm": 0.70703125, "learning_rate": 0.00017467380695046942, "loss": 1.0165, "step": 4896 }, { "epoch": 0.232030324567638, "grad_norm": 0.028076171875, "learning_rate": 0.00017466390078686023, "loss": 0.0017, "step": 4897 }, { "epoch": 0.23207770670457237, "grad_norm": 0.55078125, "learning_rate": 0.00017465399296728523, "loss": 0.9253, "step": 4898 }, { "epoch": 0.23212508884150676, "grad_norm": 0.55078125, "learning_rate": 0.00017464408349196416, "loss": 0.0854, "step": 4899 }, { "epoch": 0.23217247097844113, "grad_norm": 0.59375, "learning_rate": 0.00017463417236111684, "loss": 1.1781, "step": 4900 }, { "epoch": 0.2322198531153755, "grad_norm": 0.71875, "learning_rate": 0.0001746242595749631, "loss": 0.3372, "step": 4901 }, { "epoch": 0.23226723525230988, "grad_norm": 0.6015625, "learning_rate": 0.00017461434513372275, "loss": 1.125, "step": 4902 }, { "epoch": 0.23231461738924425, "grad_norm": 0.486328125, "learning_rate": 0.00017460442903761573, "loss": 0.0674, "step": 4903 }, { "epoch": 0.23236199952617864, "grad_norm": 0.6796875, "learning_rate": 0.00017459451128686192, "loss": 1.2133, "step": 4904 }, { "epoch": 0.232409381663113, "grad_norm": 0.494140625, "learning_rate": 0.00017458459188168132, "loss": 0.3097, "step": 4905 }, { "epoch": 0.23245676380004737, "grad_norm": 0.166015625, "learning_rate": 0.00017457467082229386, "loss": 0.1143, "step": 4906 }, { "epoch": 0.23250414593698177, "grad_norm": 0.48046875, "learning_rate": 0.0001745647481089197, "loss": 0.0897, "step": 4907 }, { "epoch": 0.23255152807391613, "grad_norm": 0.8125, "learning_rate": 0.0001745548237417788, "loss": 0.6822, "step": 4908 }, { "epoch": 0.2325989102108505, "grad_norm": 0.70703125, "learning_rate": 0.00017454489772109134, "loss": 1.2888, "step": 4909 }, { "epoch": 0.2326462923477849, "grad_norm": 0.5390625, "learning_rate": 0.00017453497004707747, "loss": 0.613, "step": 4910 }, { "epoch": 0.23269367448471925, "grad_norm": 1.03125, "learning_rate": 0.00017452504071995733, "loss": 0.8391, "step": 4911 }, { "epoch": 0.23274105662165365, "grad_norm": 0.67578125, "learning_rate": 0.00017451510973995115, "loss": 1.3987, "step": 4912 }, { "epoch": 0.232788438758588, "grad_norm": 0.640625, "learning_rate": 0.00017450517710727924, "loss": 0.8413, "step": 4913 }, { "epoch": 0.23283582089552238, "grad_norm": 0.396484375, "learning_rate": 0.00017449524282216186, "loss": 0.1535, "step": 4914 }, { "epoch": 0.23288320303245677, "grad_norm": 0.67578125, "learning_rate": 0.00017448530688481934, "loss": 0.9345, "step": 4915 }, { "epoch": 0.23293058516939114, "grad_norm": 0.515625, "learning_rate": 0.00017447536929547202, "loss": 0.4542, "step": 4916 }, { "epoch": 0.23297796730632553, "grad_norm": 0.52734375, "learning_rate": 0.00017446543005434037, "loss": 0.7109, "step": 4917 }, { "epoch": 0.2330253494432599, "grad_norm": 0.37890625, "learning_rate": 0.0001744554891616448, "loss": 0.0557, "step": 4918 }, { "epoch": 0.23307273158019426, "grad_norm": 0.74609375, "learning_rate": 0.00017444554661760577, "loss": 1.5583, "step": 4919 }, { "epoch": 0.23312011371712865, "grad_norm": 0.7734375, "learning_rate": 0.00017443560242244384, "loss": 0.8715, "step": 4920 }, { "epoch": 0.23316749585406302, "grad_norm": 0.65625, "learning_rate": 0.0001744256565763795, "loss": 0.6857, "step": 4921 }, { "epoch": 0.23321487799099738, "grad_norm": 1.6796875, "learning_rate": 0.0001744157090796334, "loss": 1.169, "step": 4922 }, { "epoch": 0.23326226012793178, "grad_norm": 0.61328125, "learning_rate": 0.0001744057599324261, "loss": 0.8634, "step": 4923 }, { "epoch": 0.23330964226486614, "grad_norm": 0.61328125, "learning_rate": 0.00017439580913497832, "loss": 1.1702, "step": 4924 }, { "epoch": 0.23335702440180053, "grad_norm": 0.73828125, "learning_rate": 0.00017438585668751074, "loss": 1.1863, "step": 4925 }, { "epoch": 0.2334044065387349, "grad_norm": 0.53125, "learning_rate": 0.0001743759025902441, "loss": 1.2634, "step": 4926 }, { "epoch": 0.23345178867566926, "grad_norm": 0.32421875, "learning_rate": 0.00017436594684339912, "loss": 0.0342, "step": 4927 }, { "epoch": 0.23349917081260366, "grad_norm": 0.384765625, "learning_rate": 0.0001743559894471967, "loss": 0.1795, "step": 4928 }, { "epoch": 0.23354655294953802, "grad_norm": 1.53125, "learning_rate": 0.00017434603040185763, "loss": 0.7102, "step": 4929 }, { "epoch": 0.2335939350864724, "grad_norm": 0.57421875, "learning_rate": 0.00017433606970760276, "loss": 1.11, "step": 4930 }, { "epoch": 0.23364131722340678, "grad_norm": 0.259765625, "learning_rate": 0.00017432610736465307, "loss": 0.1318, "step": 4931 }, { "epoch": 0.23368869936034115, "grad_norm": 0.6015625, "learning_rate": 0.00017431614337322948, "loss": 1.1823, "step": 4932 }, { "epoch": 0.23373608149727554, "grad_norm": 0.54296875, "learning_rate": 0.00017430617773355297, "loss": 0.7964, "step": 4933 }, { "epoch": 0.2337834636342099, "grad_norm": 0.6953125, "learning_rate": 0.00017429621044584464, "loss": 0.956, "step": 4934 }, { "epoch": 0.23383084577114427, "grad_norm": 0.54296875, "learning_rate": 0.00017428624151032544, "loss": 0.1908, "step": 4935 }, { "epoch": 0.23387822790807866, "grad_norm": 0.67578125, "learning_rate": 0.00017427627092721654, "loss": 0.8539, "step": 4936 }, { "epoch": 0.23392561004501303, "grad_norm": 0.498046875, "learning_rate": 0.0001742662986967391, "loss": 0.4029, "step": 4937 }, { "epoch": 0.2339729921819474, "grad_norm": 0.052001953125, "learning_rate": 0.00017425632481911423, "loss": 0.0029, "step": 4938 }, { "epoch": 0.23402037431888179, "grad_norm": 0.546875, "learning_rate": 0.0001742463492945632, "loss": 1.1076, "step": 4939 }, { "epoch": 0.23406775645581615, "grad_norm": 0.91015625, "learning_rate": 0.00017423637212330716, "loss": 0.6563, "step": 4940 }, { "epoch": 0.23411513859275054, "grad_norm": 0.498046875, "learning_rate": 0.00017422639330556754, "loss": 0.7361, "step": 4941 }, { "epoch": 0.2341625207296849, "grad_norm": 0.84765625, "learning_rate": 0.00017421641284156553, "loss": 1.0227, "step": 4942 }, { "epoch": 0.23420990286661927, "grad_norm": 0.71875, "learning_rate": 0.00017420643073152254, "loss": 0.5419, "step": 4943 }, { "epoch": 0.23425728500355367, "grad_norm": 0.76171875, "learning_rate": 0.00017419644697565996, "loss": 0.9013, "step": 4944 }, { "epoch": 0.23430466714048803, "grad_norm": 0.1728515625, "learning_rate": 0.00017418646157419922, "loss": 0.0414, "step": 4945 }, { "epoch": 0.23435204927742243, "grad_norm": 0.61328125, "learning_rate": 0.00017417647452736178, "loss": 0.0804, "step": 4946 }, { "epoch": 0.2343994314143568, "grad_norm": 0.6953125, "learning_rate": 0.00017416648583536915, "loss": 0.9069, "step": 4947 }, { "epoch": 0.23444681355129116, "grad_norm": 0.85546875, "learning_rate": 0.00017415649549844286, "loss": 1.0225, "step": 4948 }, { "epoch": 0.23449419568822555, "grad_norm": 0.625, "learning_rate": 0.00017414650351680447, "loss": 1.2589, "step": 4949 }, { "epoch": 0.2345415778251599, "grad_norm": 0.66015625, "learning_rate": 0.00017413650989067564, "loss": 0.8184, "step": 4950 }, { "epoch": 0.23458895996209428, "grad_norm": 0.625, "learning_rate": 0.00017412651462027798, "loss": 1.5527, "step": 4951 }, { "epoch": 0.23463634209902867, "grad_norm": 0.48046875, "learning_rate": 0.00017411651770583318, "loss": 0.1233, "step": 4952 }, { "epoch": 0.23468372423596304, "grad_norm": 0.984375, "learning_rate": 0.00017410651914756295, "loss": 1.028, "step": 4953 }, { "epoch": 0.23473110637289743, "grad_norm": 0.7265625, "learning_rate": 0.00017409651894568907, "loss": 0.8294, "step": 4954 }, { "epoch": 0.2347784885098318, "grad_norm": 0.7421875, "learning_rate": 0.00017408651710043333, "loss": 0.0641, "step": 4955 }, { "epoch": 0.23482587064676616, "grad_norm": 0.46875, "learning_rate": 0.00017407651361201756, "loss": 1.0256, "step": 4956 }, { "epoch": 0.23487325278370055, "grad_norm": 0.95703125, "learning_rate": 0.0001740665084806636, "loss": 1.0128, "step": 4957 }, { "epoch": 0.23492063492063492, "grad_norm": 0.7734375, "learning_rate": 0.00017405650170659339, "loss": 1.0555, "step": 4958 }, { "epoch": 0.23496801705756928, "grad_norm": 0.66796875, "learning_rate": 0.00017404649329002883, "loss": 0.9352, "step": 4959 }, { "epoch": 0.23501539919450368, "grad_norm": 0.224609375, "learning_rate": 0.00017403648323119196, "loss": 0.1448, "step": 4960 }, { "epoch": 0.23506278133143804, "grad_norm": 0.64453125, "learning_rate": 0.0001740264715303047, "loss": 1.1987, "step": 4961 }, { "epoch": 0.23511016346837244, "grad_norm": 0.28125, "learning_rate": 0.00017401645818758917, "loss": 0.0393, "step": 4962 }, { "epoch": 0.2351575456053068, "grad_norm": 0.326171875, "learning_rate": 0.00017400644320326745, "loss": 0.1885, "step": 4963 }, { "epoch": 0.23520492774224117, "grad_norm": 0.6015625, "learning_rate": 0.00017399642657756162, "loss": 0.9889, "step": 4964 }, { "epoch": 0.23525230987917556, "grad_norm": 0.87109375, "learning_rate": 0.0001739864083106939, "loss": 0.9626, "step": 4965 }, { "epoch": 0.23529969201610992, "grad_norm": 0.7265625, "learning_rate": 0.00017397638840288643, "loss": 0.9734, "step": 4966 }, { "epoch": 0.2353470741530443, "grad_norm": 0.7265625, "learning_rate": 0.00017396636685436149, "loss": 0.3847, "step": 4967 }, { "epoch": 0.23539445628997868, "grad_norm": 0.74609375, "learning_rate": 0.00017395634366534131, "loss": 1.0344, "step": 4968 }, { "epoch": 0.23544183842691305, "grad_norm": 0.91796875, "learning_rate": 0.00017394631883604818, "loss": 1.1247, "step": 4969 }, { "epoch": 0.23548922056384744, "grad_norm": 0.345703125, "learning_rate": 0.00017393629236670446, "loss": 0.0628, "step": 4970 }, { "epoch": 0.2355366027007818, "grad_norm": 0.56640625, "learning_rate": 0.00017392626425753255, "loss": 0.7047, "step": 4971 }, { "epoch": 0.23558398483771617, "grad_norm": 0.78515625, "learning_rate": 0.00017391623450875482, "loss": 1.1307, "step": 4972 }, { "epoch": 0.23563136697465056, "grad_norm": 0.6484375, "learning_rate": 0.00017390620312059376, "loss": 1.0037, "step": 4973 }, { "epoch": 0.23567874911158493, "grad_norm": 1.75, "learning_rate": 0.00017389617009327184, "loss": 1.0204, "step": 4974 }, { "epoch": 0.23572613124851932, "grad_norm": 0.2294921875, "learning_rate": 0.00017388613542701156, "loss": 0.1655, "step": 4975 }, { "epoch": 0.2357735133854537, "grad_norm": 0.671875, "learning_rate": 0.0001738760991220355, "loss": 0.9813, "step": 4976 }, { "epoch": 0.23582089552238805, "grad_norm": 0.1513671875, "learning_rate": 0.00017386606117856626, "loss": 0.0226, "step": 4977 }, { "epoch": 0.23586827765932245, "grad_norm": 0.494140625, "learning_rate": 0.00017385602159682647, "loss": 1.083, "step": 4978 }, { "epoch": 0.2359156597962568, "grad_norm": 0.4765625, "learning_rate": 0.00017384598037703877, "loss": 1.045, "step": 4979 }, { "epoch": 0.23596304193319118, "grad_norm": 0.65234375, "learning_rate": 0.0001738359375194259, "loss": 1.4046, "step": 4980 }, { "epoch": 0.23601042407012557, "grad_norm": 0.6953125, "learning_rate": 0.00017382589302421055, "loss": 0.4487, "step": 4981 }, { "epoch": 0.23605780620705993, "grad_norm": 0.7109375, "learning_rate": 0.00017381584689161555, "loss": 1.0711, "step": 4982 }, { "epoch": 0.23610518834399433, "grad_norm": 0.5234375, "learning_rate": 0.0001738057991218637, "loss": 0.8805, "step": 4983 }, { "epoch": 0.2361525704809287, "grad_norm": 0.4921875, "learning_rate": 0.00017379574971517782, "loss": 0.5025, "step": 4984 }, { "epoch": 0.23619995261786306, "grad_norm": 0.62109375, "learning_rate": 0.00017378569867178083, "loss": 1.1161, "step": 4985 }, { "epoch": 0.23624733475479745, "grad_norm": 0.26953125, "learning_rate": 0.00017377564599189562, "loss": 0.0181, "step": 4986 }, { "epoch": 0.23629471689173182, "grad_norm": 0.2236328125, "learning_rate": 0.00017376559167574517, "loss": 0.1703, "step": 4987 }, { "epoch": 0.23634209902866618, "grad_norm": 0.5546875, "learning_rate": 0.00017375553572355248, "loss": 0.5635, "step": 4988 }, { "epoch": 0.23638948116560057, "grad_norm": 0.953125, "learning_rate": 0.00017374547813554057, "loss": 1.1397, "step": 4989 }, { "epoch": 0.23643686330253494, "grad_norm": 0.80078125, "learning_rate": 0.0001737354189119325, "loss": 1.2917, "step": 4990 }, { "epoch": 0.23648424543946933, "grad_norm": 1.171875, "learning_rate": 0.00017372535805295136, "loss": 0.1013, "step": 4991 }, { "epoch": 0.2365316275764037, "grad_norm": 0.7890625, "learning_rate": 0.00017371529555882032, "loss": 0.8985, "step": 4992 }, { "epoch": 0.23657900971333806, "grad_norm": 0.78125, "learning_rate": 0.00017370523142976255, "loss": 0.9875, "step": 4993 }, { "epoch": 0.23662639185027246, "grad_norm": 0.703125, "learning_rate": 0.00017369516566600126, "loss": 0.7521, "step": 4994 }, { "epoch": 0.23667377398720682, "grad_norm": 0.6640625, "learning_rate": 0.00017368509826775968, "loss": 1.1462, "step": 4995 }, { "epoch": 0.23672115612414119, "grad_norm": 0.8671875, "learning_rate": 0.00017367502923526108, "loss": 1.8729, "step": 4996 }, { "epoch": 0.23676853826107558, "grad_norm": 0.515625, "learning_rate": 0.00017366495856872884, "loss": 0.9345, "step": 4997 }, { "epoch": 0.23681592039800994, "grad_norm": 0.78515625, "learning_rate": 0.00017365488626838632, "loss": 1.3382, "step": 4998 }, { "epoch": 0.23686330253494434, "grad_norm": 0.64453125, "learning_rate": 0.0001736448123344568, "loss": 0.9335, "step": 4999 }, { "epoch": 0.2369106846718787, "grad_norm": 0.56640625, "learning_rate": 0.00017363473676716384, "loss": 0.7158, "step": 5000 }, { "epoch": 0.23695806680881307, "grad_norm": 0.609375, "learning_rate": 0.00017362465956673078, "loss": 1.3346, "step": 5001 }, { "epoch": 0.23700544894574746, "grad_norm": 0.765625, "learning_rate": 0.00017361458073338127, "loss": 1.39, "step": 5002 }, { "epoch": 0.23705283108268183, "grad_norm": 0.515625, "learning_rate": 0.00017360450026733873, "loss": 0.6038, "step": 5003 }, { "epoch": 0.23710021321961622, "grad_norm": 0.65234375, "learning_rate": 0.0001735944181688268, "loss": 0.9036, "step": 5004 }, { "epoch": 0.23714759535655058, "grad_norm": 0.5546875, "learning_rate": 0.00017358433443806905, "loss": 1.1041, "step": 5005 }, { "epoch": 0.23719497749348495, "grad_norm": 0.56640625, "learning_rate": 0.00017357424907528914, "loss": 0.152, "step": 5006 }, { "epoch": 0.23724235963041934, "grad_norm": 0.625, "learning_rate": 0.00017356416208071074, "loss": 0.0677, "step": 5007 }, { "epoch": 0.2372897417673537, "grad_norm": 0.390625, "learning_rate": 0.00017355407345455762, "loss": 0.1848, "step": 5008 }, { "epoch": 0.23733712390428807, "grad_norm": 0.7109375, "learning_rate": 0.00017354398319705346, "loss": 1.084, "step": 5009 }, { "epoch": 0.23738450604122247, "grad_norm": 0.64453125, "learning_rate": 0.0001735338913084221, "loss": 1.2232, "step": 5010 }, { "epoch": 0.23743188817815683, "grad_norm": 0.69140625, "learning_rate": 0.00017352379778888736, "loss": 0.6343, "step": 5011 }, { "epoch": 0.23747927031509122, "grad_norm": 0.578125, "learning_rate": 0.0001735137026386731, "loss": 0.811, "step": 5012 }, { "epoch": 0.2375266524520256, "grad_norm": 0.5, "learning_rate": 0.0001735036058580032, "loss": 0.4561, "step": 5013 }, { "epoch": 0.23757403458895995, "grad_norm": 0.609375, "learning_rate": 0.00017349350744710163, "loss": 0.6335, "step": 5014 }, { "epoch": 0.23762141672589435, "grad_norm": 0.3046875, "learning_rate": 0.00017348340740619235, "loss": 0.1175, "step": 5015 }, { "epoch": 0.2376687988628287, "grad_norm": 0.65625, "learning_rate": 0.00017347330573549936, "loss": 1.1408, "step": 5016 }, { "epoch": 0.23771618099976308, "grad_norm": 0.61328125, "learning_rate": 0.0001734632024352467, "loss": 0.99, "step": 5017 }, { "epoch": 0.23776356313669747, "grad_norm": 0.609375, "learning_rate": 0.00017345309750565848, "loss": 0.1639, "step": 5018 }, { "epoch": 0.23781094527363184, "grad_norm": 0.9609375, "learning_rate": 0.0001734429909469588, "loss": 0.7657, "step": 5019 }, { "epoch": 0.23785832741056623, "grad_norm": 0.1962890625, "learning_rate": 0.00017343288275937176, "loss": 0.1353, "step": 5020 }, { "epoch": 0.2379057095475006, "grad_norm": 0.3828125, "learning_rate": 0.00017342277294312165, "loss": 0.1467, "step": 5021 }, { "epoch": 0.23795309168443496, "grad_norm": 0.8046875, "learning_rate": 0.00017341266149843262, "loss": 0.7441, "step": 5022 }, { "epoch": 0.23800047382136935, "grad_norm": 0.234375, "learning_rate": 0.00017340254842552897, "loss": 0.1331, "step": 5023 }, { "epoch": 0.23804785595830372, "grad_norm": 0.77734375, "learning_rate": 0.00017339243372463495, "loss": 1.0166, "step": 5024 }, { "epoch": 0.23809523809523808, "grad_norm": 0.65625, "learning_rate": 0.00017338231739597496, "loss": 0.151, "step": 5025 }, { "epoch": 0.23814262023217247, "grad_norm": 0.451171875, "learning_rate": 0.00017337219943977332, "loss": 0.0549, "step": 5026 }, { "epoch": 0.23819000236910684, "grad_norm": 0.8046875, "learning_rate": 0.00017336207985625443, "loss": 0.4165, "step": 5027 }, { "epoch": 0.23823738450604123, "grad_norm": 0.66796875, "learning_rate": 0.00017335195864564277, "loss": 1.1232, "step": 5028 }, { "epoch": 0.2382847666429756, "grad_norm": 0.6171875, "learning_rate": 0.00017334183580816279, "loss": 1.1028, "step": 5029 }, { "epoch": 0.23833214877990996, "grad_norm": 0.578125, "learning_rate": 0.000173331711344039, "loss": 0.9572, "step": 5030 }, { "epoch": 0.23837953091684436, "grad_norm": 0.75, "learning_rate": 0.000173321585253496, "loss": 1.3829, "step": 5031 }, { "epoch": 0.23842691305377872, "grad_norm": 0.042724609375, "learning_rate": 0.0001733114575367583, "loss": 0.0021, "step": 5032 }, { "epoch": 0.23847429519071311, "grad_norm": 0.6015625, "learning_rate": 0.00017330132819405058, "loss": 0.3454, "step": 5033 }, { "epoch": 0.23852167732764748, "grad_norm": 0.6796875, "learning_rate": 0.00017329119722559749, "loss": 0.557, "step": 5034 }, { "epoch": 0.23856905946458185, "grad_norm": 0.017578125, "learning_rate": 0.00017328106463162369, "loss": 0.0011, "step": 5035 }, { "epoch": 0.23861644160151624, "grad_norm": 0.1728515625, "learning_rate": 0.0001732709304123539, "loss": 0.0309, "step": 5036 }, { "epoch": 0.2386638237384506, "grad_norm": 0.55078125, "learning_rate": 0.00017326079456801298, "loss": 0.9946, "step": 5037 }, { "epoch": 0.23871120587538497, "grad_norm": 0.5078125, "learning_rate": 0.00017325065709882567, "loss": 0.3698, "step": 5038 }, { "epoch": 0.23875858801231936, "grad_norm": 0.474609375, "learning_rate": 0.00017324051800501677, "loss": 0.6622, "step": 5039 }, { "epoch": 0.23880597014925373, "grad_norm": 0.90234375, "learning_rate": 0.00017323037728681122, "loss": 0.2755, "step": 5040 }, { "epoch": 0.23885335228618812, "grad_norm": 0.9453125, "learning_rate": 0.00017322023494443386, "loss": 1.3305, "step": 5041 }, { "epoch": 0.23890073442312248, "grad_norm": 0.89453125, "learning_rate": 0.0001732100909781097, "loss": 1.0599, "step": 5042 }, { "epoch": 0.23894811656005685, "grad_norm": 0.365234375, "learning_rate": 0.00017319994538806372, "loss": 0.2098, "step": 5043 }, { "epoch": 0.23899549869699124, "grad_norm": 0.01708984375, "learning_rate": 0.00017318979817452091, "loss": 0.0013, "step": 5044 }, { "epoch": 0.2390428808339256, "grad_norm": 1.1953125, "learning_rate": 0.00017317964933770633, "loss": 0.483, "step": 5045 }, { "epoch": 0.23909026297085997, "grad_norm": 0.2265625, "learning_rate": 0.0001731694988778451, "loss": 0.0344, "step": 5046 }, { "epoch": 0.23913764510779437, "grad_norm": 0.67578125, "learning_rate": 0.0001731593467951623, "loss": 1.0277, "step": 5047 }, { "epoch": 0.23918502724472873, "grad_norm": 0.76953125, "learning_rate": 0.0001731491930898831, "loss": 1.0551, "step": 5048 }, { "epoch": 0.23923240938166312, "grad_norm": 0.498046875, "learning_rate": 0.00017313903776223274, "loss": 0.8756, "step": 5049 }, { "epoch": 0.2392797915185975, "grad_norm": 0.263671875, "learning_rate": 0.0001731288808124364, "loss": 0.1851, "step": 5050 }, { "epoch": 0.23932717365553186, "grad_norm": 0.2353515625, "learning_rate": 0.00017311872224071942, "loss": 0.1446, "step": 5051 }, { "epoch": 0.23937455579246625, "grad_norm": 0.486328125, "learning_rate": 0.00017310856204730705, "loss": 0.6632, "step": 5052 }, { "epoch": 0.2394219379294006, "grad_norm": 0.142578125, "learning_rate": 0.0001730984002324246, "loss": 0.022, "step": 5053 }, { "epoch": 0.23946932006633498, "grad_norm": 0.80078125, "learning_rate": 0.00017308823679629756, "loss": 1.0228, "step": 5054 }, { "epoch": 0.23951670220326937, "grad_norm": 0.55078125, "learning_rate": 0.00017307807173915123, "loss": 0.9571, "step": 5055 }, { "epoch": 0.23956408434020374, "grad_norm": 0.69140625, "learning_rate": 0.00017306790506121114, "loss": 1.0204, "step": 5056 }, { "epoch": 0.23961146647713813, "grad_norm": 0.2353515625, "learning_rate": 0.0001730577367627027, "loss": 0.0199, "step": 5057 }, { "epoch": 0.2396588486140725, "grad_norm": 0.5234375, "learning_rate": 0.00017304756684385152, "loss": 1.0773, "step": 5058 }, { "epoch": 0.23970623075100686, "grad_norm": 0.578125, "learning_rate": 0.00017303739530488308, "loss": 1.0701, "step": 5059 }, { "epoch": 0.23975361288794125, "grad_norm": 0.59765625, "learning_rate": 0.00017302722214602303, "loss": 0.961, "step": 5060 }, { "epoch": 0.23980099502487562, "grad_norm": 0.609375, "learning_rate": 0.00017301704736749697, "loss": 0.0564, "step": 5061 }, { "epoch": 0.23984837716181, "grad_norm": 0.01483154296875, "learning_rate": 0.0001730068709695306, "loss": 0.001, "step": 5062 }, { "epoch": 0.23989575929874438, "grad_norm": 0.671875, "learning_rate": 0.00017299669295234955, "loss": 1.3446, "step": 5063 }, { "epoch": 0.23994314143567874, "grad_norm": 0.80859375, "learning_rate": 0.0001729865133161796, "loss": 1.3723, "step": 5064 }, { "epoch": 0.23999052357261313, "grad_norm": 0.4140625, "learning_rate": 0.00017297633206124656, "loss": 0.1451, "step": 5065 }, { "epoch": 0.2400379057095475, "grad_norm": 0.828125, "learning_rate": 0.0001729661491877762, "loss": 1.1708, "step": 5066 }, { "epoch": 0.24008528784648187, "grad_norm": 0.64453125, "learning_rate": 0.00017295596469599437, "loss": 1.1451, "step": 5067 }, { "epoch": 0.24013266998341626, "grad_norm": 0.5078125, "learning_rate": 0.00017294577858612695, "loss": 0.6699, "step": 5068 }, { "epoch": 0.24018005212035062, "grad_norm": 0.7734375, "learning_rate": 0.00017293559085839987, "loss": 1.2246, "step": 5069 }, { "epoch": 0.24022743425728502, "grad_norm": 0.462890625, "learning_rate": 0.00017292540151303903, "loss": 0.2145, "step": 5070 }, { "epoch": 0.24027481639421938, "grad_norm": 0.37890625, "learning_rate": 0.00017291521055027052, "loss": 0.1655, "step": 5071 }, { "epoch": 0.24032219853115375, "grad_norm": 0.64453125, "learning_rate": 0.00017290501797032027, "loss": 1.3479, "step": 5072 }, { "epoch": 0.24036958066808814, "grad_norm": 0.19921875, "learning_rate": 0.0001728948237734144, "loss": 0.1186, "step": 5073 }, { "epoch": 0.2404169628050225, "grad_norm": 0.58203125, "learning_rate": 0.00017288462795977895, "loss": 0.6707, "step": 5074 }, { "epoch": 0.24046434494195687, "grad_norm": 0.87109375, "learning_rate": 0.0001728744305296401, "loss": 0.7204, "step": 5075 }, { "epoch": 0.24051172707889126, "grad_norm": 0.62109375, "learning_rate": 0.000172864231483224, "loss": 0.9086, "step": 5076 }, { "epoch": 0.24055910921582563, "grad_norm": 0.7734375, "learning_rate": 0.00017285403082075687, "loss": 1.2283, "step": 5077 }, { "epoch": 0.24060649135276002, "grad_norm": 0.61328125, "learning_rate": 0.00017284382854246494, "loss": 0.8928, "step": 5078 }, { "epoch": 0.2406538734896944, "grad_norm": 0.88671875, "learning_rate": 0.00017283362464857446, "loss": 0.3822, "step": 5079 }, { "epoch": 0.24070125562662875, "grad_norm": 0.59375, "learning_rate": 0.00017282341913931178, "loss": 0.8967, "step": 5080 }, { "epoch": 0.24074863776356314, "grad_norm": 0.7421875, "learning_rate": 0.0001728132120149032, "loss": 1.743, "step": 5081 }, { "epoch": 0.2407960199004975, "grad_norm": 0.6875, "learning_rate": 0.0001728030032755752, "loss": 1.162, "step": 5082 }, { "epoch": 0.24084340203743188, "grad_norm": 0.6875, "learning_rate": 0.00017279279292155408, "loss": 1.3576, "step": 5083 }, { "epoch": 0.24089078417436627, "grad_norm": 0.1455078125, "learning_rate": 0.00017278258095306637, "loss": 0.0138, "step": 5084 }, { "epoch": 0.24093816631130063, "grad_norm": 0.2578125, "learning_rate": 0.00017277236737033854, "loss": 0.1336, "step": 5085 }, { "epoch": 0.24098554844823503, "grad_norm": 0.54296875, "learning_rate": 0.0001727621521735971, "loss": 0.7657, "step": 5086 }, { "epoch": 0.2410329305851694, "grad_norm": 0.1826171875, "learning_rate": 0.00017275193536306864, "loss": 0.1299, "step": 5087 }, { "epoch": 0.24108031272210376, "grad_norm": 0.6328125, "learning_rate": 0.00017274171693897975, "loss": 0.9155, "step": 5088 }, { "epoch": 0.24112769485903815, "grad_norm": 0.484375, "learning_rate": 0.00017273149690155703, "loss": 0.6494, "step": 5089 }, { "epoch": 0.24117507699597251, "grad_norm": 0.59765625, "learning_rate": 0.00017272127525102721, "loss": 0.7286, "step": 5090 }, { "epoch": 0.2412224591329069, "grad_norm": 0.5625, "learning_rate": 0.00017271105198761694, "loss": 1.1988, "step": 5091 }, { "epoch": 0.24126984126984127, "grad_norm": 0.234375, "learning_rate": 0.00017270082711155302, "loss": 0.1147, "step": 5092 }, { "epoch": 0.24131722340677564, "grad_norm": 0.75390625, "learning_rate": 0.00017269060062306214, "loss": 1.0916, "step": 5093 }, { "epoch": 0.24136460554371003, "grad_norm": 0.1474609375, "learning_rate": 0.00017268037252237122, "loss": 0.0054, "step": 5094 }, { "epoch": 0.2414119876806444, "grad_norm": 0.85546875, "learning_rate": 0.00017267014280970702, "loss": 0.4695, "step": 5095 }, { "epoch": 0.24145936981757876, "grad_norm": 0.9375, "learning_rate": 0.00017265991148529648, "loss": 1.0174, "step": 5096 }, { "epoch": 0.24150675195451315, "grad_norm": 0.287109375, "learning_rate": 0.0001726496785493665, "loss": 0.1565, "step": 5097 }, { "epoch": 0.24155413409144752, "grad_norm": 0.54296875, "learning_rate": 0.000172639444002144, "loss": 1.2032, "step": 5098 }, { "epoch": 0.2416015162283819, "grad_norm": 0.474609375, "learning_rate": 0.00017262920784385602, "loss": 1.0722, "step": 5099 }, { "epoch": 0.24164889836531628, "grad_norm": 0.31640625, "learning_rate": 0.00017261897007472956, "loss": 0.0169, "step": 5100 }, { "epoch": 0.24169628050225064, "grad_norm": 0.671875, "learning_rate": 0.00017260873069499172, "loss": 1.778, "step": 5101 }, { "epoch": 0.24174366263918504, "grad_norm": 0.65625, "learning_rate": 0.00017259848970486955, "loss": 0.4495, "step": 5102 }, { "epoch": 0.2417910447761194, "grad_norm": 0.640625, "learning_rate": 0.00017258824710459023, "loss": 0.9077, "step": 5103 }, { "epoch": 0.24183842691305377, "grad_norm": 0.1796875, "learning_rate": 0.0001725780028943809, "loss": 0.0096, "step": 5104 }, { "epoch": 0.24188580904998816, "grad_norm": 0.1767578125, "learning_rate": 0.00017256775707446875, "loss": 0.0437, "step": 5105 }, { "epoch": 0.24193319118692252, "grad_norm": 0.6171875, "learning_rate": 0.00017255750964508107, "loss": 1.0936, "step": 5106 }, { "epoch": 0.24198057332385692, "grad_norm": 0.56640625, "learning_rate": 0.00017254726060644512, "loss": 0.0998, "step": 5107 }, { "epoch": 0.24202795546079128, "grad_norm": 0.7265625, "learning_rate": 0.00017253700995878814, "loss": 1.4343, "step": 5108 }, { "epoch": 0.24207533759772565, "grad_norm": 0.7578125, "learning_rate": 0.00017252675770233758, "loss": 1.1585, "step": 5109 }, { "epoch": 0.24212271973466004, "grad_norm": 0.51171875, "learning_rate": 0.0001725165038373208, "loss": 0.9669, "step": 5110 }, { "epoch": 0.2421701018715944, "grad_norm": 0.466796875, "learning_rate": 0.0001725062483639652, "loss": 0.1645, "step": 5111 }, { "epoch": 0.24221748400852877, "grad_norm": 0.462890625, "learning_rate": 0.00017249599128249825, "loss": 0.8636, "step": 5112 }, { "epoch": 0.24226486614546316, "grad_norm": 0.2177734375, "learning_rate": 0.00017248573259314739, "loss": 0.1532, "step": 5113 }, { "epoch": 0.24231224828239753, "grad_norm": 0.72265625, "learning_rate": 0.00017247547229614022, "loss": 1.4756, "step": 5114 }, { "epoch": 0.24235963041933192, "grad_norm": 0.5, "learning_rate": 0.00017246521039170429, "loss": 0.7939, "step": 5115 }, { "epoch": 0.2424070125562663, "grad_norm": 0.59375, "learning_rate": 0.00017245494688006716, "loss": 0.8726, "step": 5116 }, { "epoch": 0.24245439469320065, "grad_norm": 0.54296875, "learning_rate": 0.00017244468176145648, "loss": 1.7517, "step": 5117 }, { "epoch": 0.24250177683013505, "grad_norm": 0.259765625, "learning_rate": 0.00017243441503609993, "loss": 0.0335, "step": 5118 }, { "epoch": 0.2425491589670694, "grad_norm": 0.54296875, "learning_rate": 0.00017242414670422523, "loss": 0.7218, "step": 5119 }, { "epoch": 0.2425965411040038, "grad_norm": 0.6953125, "learning_rate": 0.00017241387676606004, "loss": 0.6616, "step": 5120 }, { "epoch": 0.24264392324093817, "grad_norm": 0.150390625, "learning_rate": 0.00017240360522183224, "loss": 0.0204, "step": 5121 }, { "epoch": 0.24269130537787253, "grad_norm": 0.53515625, "learning_rate": 0.0001723933320717696, "loss": 0.7932, "step": 5122 }, { "epoch": 0.24273868751480693, "grad_norm": 0.19140625, "learning_rate": 0.00017238305731609997, "loss": 0.1196, "step": 5123 }, { "epoch": 0.2427860696517413, "grad_norm": 0.546875, "learning_rate": 0.00017237278095505118, "loss": 0.6768, "step": 5124 }, { "epoch": 0.24283345178867566, "grad_norm": 0.6484375, "learning_rate": 0.00017236250298885124, "loss": 0.7022, "step": 5125 }, { "epoch": 0.24288083392561005, "grad_norm": 0.71875, "learning_rate": 0.00017235222341772802, "loss": 1.0216, "step": 5126 }, { "epoch": 0.24292821606254442, "grad_norm": 0.65234375, "learning_rate": 0.00017234194224190961, "loss": 0.9276, "step": 5127 }, { "epoch": 0.2429755981994788, "grad_norm": 0.59375, "learning_rate": 0.00017233165946162394, "loss": 1.2332, "step": 5128 }, { "epoch": 0.24302298033641317, "grad_norm": 0.1328125, "learning_rate": 0.00017232137507709912, "loss": 0.0201, "step": 5129 }, { "epoch": 0.24307036247334754, "grad_norm": 0.6953125, "learning_rate": 0.0001723110890885632, "loss": 0.7718, "step": 5130 }, { "epoch": 0.24311774461028193, "grad_norm": 0.71484375, "learning_rate": 0.0001723008014962444, "loss": 1.0057, "step": 5131 }, { "epoch": 0.2431651267472163, "grad_norm": 0.515625, "learning_rate": 0.00017229051230037082, "loss": 0.9161, "step": 5132 }, { "epoch": 0.24321250888415066, "grad_norm": 0.76953125, "learning_rate": 0.00017228022150117065, "loss": 1.0776, "step": 5133 }, { "epoch": 0.24325989102108506, "grad_norm": 0.53125, "learning_rate": 0.00017226992909887215, "loss": 0.0449, "step": 5134 }, { "epoch": 0.24330727315801942, "grad_norm": 0.859375, "learning_rate": 0.0001722596350937036, "loss": 1.0285, "step": 5135 }, { "epoch": 0.24335465529495381, "grad_norm": 0.1630859375, "learning_rate": 0.00017224933948589336, "loss": 0.0313, "step": 5136 }, { "epoch": 0.24340203743188818, "grad_norm": 0.64453125, "learning_rate": 0.0001722390422756697, "loss": 1.1853, "step": 5137 }, { "epoch": 0.24344941956882254, "grad_norm": 0.87890625, "learning_rate": 0.00017222874346326103, "loss": 0.7152, "step": 5138 }, { "epoch": 0.24349680170575694, "grad_norm": 0.4453125, "learning_rate": 0.00017221844304889577, "loss": 0.1681, "step": 5139 }, { "epoch": 0.2435441838426913, "grad_norm": 0.5390625, "learning_rate": 0.00017220814103280233, "loss": 0.7481, "step": 5140 }, { "epoch": 0.24359156597962567, "grad_norm": 0.4140625, "learning_rate": 0.0001721978374152093, "loss": 0.026, "step": 5141 }, { "epoch": 0.24363894811656006, "grad_norm": 0.68359375, "learning_rate": 0.0001721875321963451, "loss": 1.3398, "step": 5142 }, { "epoch": 0.24368633025349443, "grad_norm": 1.6796875, "learning_rate": 0.0001721772253764383, "loss": 0.5379, "step": 5143 }, { "epoch": 0.24373371239042882, "grad_norm": 0.89453125, "learning_rate": 0.00017216691695571756, "loss": 1.3066, "step": 5144 }, { "epoch": 0.24378109452736318, "grad_norm": 0.494140625, "learning_rate": 0.00017215660693441147, "loss": 0.4534, "step": 5145 }, { "epoch": 0.24382847666429755, "grad_norm": 0.392578125, "learning_rate": 0.00017214629531274865, "loss": 0.2493, "step": 5146 }, { "epoch": 0.24387585880123194, "grad_norm": 0.447265625, "learning_rate": 0.00017213598209095792, "loss": 0.9154, "step": 5147 }, { "epoch": 0.2439232409381663, "grad_norm": 0.71484375, "learning_rate": 0.00017212566726926789, "loss": 0.4233, "step": 5148 }, { "epoch": 0.24397062307510067, "grad_norm": 0.11474609375, "learning_rate": 0.0001721153508479074, "loss": 0.0072, "step": 5149 }, { "epoch": 0.24401800521203507, "grad_norm": 0.76171875, "learning_rate": 0.00017210503282710527, "loss": 0.8346, "step": 5150 }, { "epoch": 0.24406538734896943, "grad_norm": 0.49609375, "learning_rate": 0.00017209471320709025, "loss": 0.2293, "step": 5151 }, { "epoch": 0.24411276948590382, "grad_norm": 0.6796875, "learning_rate": 0.00017208439198809132, "loss": 0.8109, "step": 5152 }, { "epoch": 0.2441601516228382, "grad_norm": 0.06982421875, "learning_rate": 0.00017207406917033738, "loss": 0.003, "step": 5153 }, { "epoch": 0.24420753375977255, "grad_norm": 0.73828125, "learning_rate": 0.0001720637447540573, "loss": 1.1695, "step": 5154 }, { "epoch": 0.24425491589670695, "grad_norm": 0.64453125, "learning_rate": 0.00017205341873948018, "loss": 0.9309, "step": 5155 }, { "epoch": 0.2443022980336413, "grad_norm": 0.81640625, "learning_rate": 0.00017204309112683493, "loss": 0.9097, "step": 5156 }, { "epoch": 0.2443496801705757, "grad_norm": 0.365234375, "learning_rate": 0.0001720327619163507, "loss": 0.0052, "step": 5157 }, { "epoch": 0.24439706230751007, "grad_norm": 0.1923828125, "learning_rate": 0.00017202243110825652, "loss": 0.0227, "step": 5158 }, { "epoch": 0.24444444444444444, "grad_norm": 0.49609375, "learning_rate": 0.00017201209870278152, "loss": 0.726, "step": 5159 }, { "epoch": 0.24449182658137883, "grad_norm": 0.2392578125, "learning_rate": 0.00017200176470015486, "loss": 0.0155, "step": 5160 }, { "epoch": 0.2445392087183132, "grad_norm": 0.60546875, "learning_rate": 0.0001719914291006058, "loss": 1.3043, "step": 5161 }, { "epoch": 0.24458659085524756, "grad_norm": 0.671875, "learning_rate": 0.0001719810919043635, "loss": 0.9903, "step": 5162 }, { "epoch": 0.24463397299218195, "grad_norm": 1.453125, "learning_rate": 0.00017197075311165723, "loss": 0.0655, "step": 5163 }, { "epoch": 0.24468135512911632, "grad_norm": 0.6328125, "learning_rate": 0.00017196041272271635, "loss": 0.936, "step": 5164 }, { "epoch": 0.2447287372660507, "grad_norm": 0.470703125, "learning_rate": 0.00017195007073777014, "loss": 0.0852, "step": 5165 }, { "epoch": 0.24477611940298508, "grad_norm": 0.53125, "learning_rate": 0.000171939727157048, "loss": 0.6782, "step": 5166 }, { "epoch": 0.24482350153991944, "grad_norm": 0.75, "learning_rate": 0.00017192938198077936, "loss": 1.1449, "step": 5167 }, { "epoch": 0.24487088367685383, "grad_norm": 0.7734375, "learning_rate": 0.00017191903520919364, "loss": 1.3613, "step": 5168 }, { "epoch": 0.2449182658137882, "grad_norm": 0.6796875, "learning_rate": 0.0001719086868425203, "loss": 1.1635, "step": 5169 }, { "epoch": 0.24496564795072256, "grad_norm": 0.6171875, "learning_rate": 0.0001718983368809889, "loss": 0.7599, "step": 5170 }, { "epoch": 0.24501303008765696, "grad_norm": 0.33203125, "learning_rate": 0.00017188798532482896, "loss": 0.0246, "step": 5171 }, { "epoch": 0.24506041222459132, "grad_norm": 0.6796875, "learning_rate": 0.0001718776321742701, "loss": 1.2764, "step": 5172 }, { "epoch": 0.24510779436152572, "grad_norm": 0.75, "learning_rate": 0.00017186727742954188, "loss": 0.1184, "step": 5173 }, { "epoch": 0.24515517649846008, "grad_norm": 0.6484375, "learning_rate": 0.00017185692109087403, "loss": 0.9347, "step": 5174 }, { "epoch": 0.24520255863539445, "grad_norm": 0.453125, "learning_rate": 0.00017184656315849618, "loss": 0.4884, "step": 5175 }, { "epoch": 0.24524994077232884, "grad_norm": 0.255859375, "learning_rate": 0.0001718362036326381, "loss": 0.1475, "step": 5176 }, { "epoch": 0.2452973229092632, "grad_norm": 0.5, "learning_rate": 0.00017182584251352955, "loss": 1.0809, "step": 5177 }, { "epoch": 0.24534470504619757, "grad_norm": 0.0037689208984375, "learning_rate": 0.00017181547980140032, "loss": 0.0003, "step": 5178 }, { "epoch": 0.24539208718313196, "grad_norm": 0.73046875, "learning_rate": 0.00017180511549648024, "loss": 1.2188, "step": 5179 }, { "epoch": 0.24543946932006633, "grad_norm": 0.341796875, "learning_rate": 0.00017179474959899918, "loss": 0.0038, "step": 5180 }, { "epoch": 0.24548685145700072, "grad_norm": 0.310546875, "learning_rate": 0.00017178438210918703, "loss": 0.0334, "step": 5181 }, { "epoch": 0.2455342335939351, "grad_norm": 0.765625, "learning_rate": 0.00017177401302727376, "loss": 0.0359, "step": 5182 }, { "epoch": 0.24558161573086945, "grad_norm": 0.70703125, "learning_rate": 0.00017176364235348932, "loss": 1.4391, "step": 5183 }, { "epoch": 0.24562899786780384, "grad_norm": 0.498046875, "learning_rate": 0.00017175327008806375, "loss": 0.6934, "step": 5184 }, { "epoch": 0.2456763800047382, "grad_norm": 0.390625, "learning_rate": 0.00017174289623122705, "loss": 0.0347, "step": 5185 }, { "epoch": 0.2457237621416726, "grad_norm": 0.64453125, "learning_rate": 0.00017173252078320935, "loss": 1.4223, "step": 5186 }, { "epoch": 0.24577114427860697, "grad_norm": 0.5546875, "learning_rate": 0.00017172214374424076, "loss": 0.9328, "step": 5187 }, { "epoch": 0.24581852641554133, "grad_norm": 0.26953125, "learning_rate": 0.0001717117651145514, "loss": 0.0053, "step": 5188 }, { "epoch": 0.24586590855247573, "grad_norm": 0.498046875, "learning_rate": 0.00017170138489437146, "loss": 0.4809, "step": 5189 }, { "epoch": 0.2459132906894101, "grad_norm": 0.70703125, "learning_rate": 0.0001716910030839312, "loss": 0.1269, "step": 5190 }, { "epoch": 0.24596067282634446, "grad_norm": 0.546875, "learning_rate": 0.00017168061968346083, "loss": 0.6677, "step": 5191 }, { "epoch": 0.24600805496327885, "grad_norm": 0.09912109375, "learning_rate": 0.0001716702346931907, "loss": 0.0111, "step": 5192 }, { "epoch": 0.24605543710021321, "grad_norm": 0.78515625, "learning_rate": 0.00017165984811335106, "loss": 1.6476, "step": 5193 }, { "epoch": 0.2461028192371476, "grad_norm": 0.7265625, "learning_rate": 0.00017164945994417233, "loss": 1.1426, "step": 5194 }, { "epoch": 0.24615020137408197, "grad_norm": 0.83984375, "learning_rate": 0.00017163907018588492, "loss": 0.6152, "step": 5195 }, { "epoch": 0.24619758351101634, "grad_norm": 0.65625, "learning_rate": 0.00017162867883871924, "loss": 0.9099, "step": 5196 }, { "epoch": 0.24624496564795073, "grad_norm": 0.388671875, "learning_rate": 0.00017161828590290572, "loss": 0.0948, "step": 5197 }, { "epoch": 0.2462923477848851, "grad_norm": 0.478515625, "learning_rate": 0.00017160789137867495, "loss": 0.5108, "step": 5198 }, { "epoch": 0.24633972992181946, "grad_norm": 0.361328125, "learning_rate": 0.0001715974952662574, "loss": 0.1785, "step": 5199 }, { "epoch": 0.24638711205875385, "grad_norm": 0.1083984375, "learning_rate": 0.00017158709756588366, "loss": 0.0049, "step": 5200 }, { "epoch": 0.24643449419568822, "grad_norm": 0.76953125, "learning_rate": 0.00017157669827778436, "loss": 0.2206, "step": 5201 }, { "epoch": 0.2464818763326226, "grad_norm": 0.01275634765625, "learning_rate": 0.0001715662974021901, "loss": 0.0009, "step": 5202 }, { "epoch": 0.24652925846955698, "grad_norm": 0.98828125, "learning_rate": 0.00017155589493933162, "loss": 0.2739, "step": 5203 }, { "epoch": 0.24657664060649134, "grad_norm": 0.56640625, "learning_rate": 0.0001715454908894396, "loss": 0.7662, "step": 5204 }, { "epoch": 0.24662402274342574, "grad_norm": 0.69921875, "learning_rate": 0.0001715350852527448, "loss": 0.7587, "step": 5205 }, { "epoch": 0.2466714048803601, "grad_norm": 0.50390625, "learning_rate": 0.00017152467802947804, "loss": 1.1468, "step": 5206 }, { "epoch": 0.24671878701729447, "grad_norm": 0.27734375, "learning_rate": 0.00017151426921987008, "loss": 0.1373, "step": 5207 }, { "epoch": 0.24676616915422886, "grad_norm": 0.0732421875, "learning_rate": 0.0001715038588241518, "loss": 0.0061, "step": 5208 }, { "epoch": 0.24681355129116322, "grad_norm": 0.65625, "learning_rate": 0.0001714934468425541, "loss": 0.9072, "step": 5209 }, { "epoch": 0.24686093342809762, "grad_norm": 0.54296875, "learning_rate": 0.00017148303327530788, "loss": 0.7494, "step": 5210 }, { "epoch": 0.24690831556503198, "grad_norm": 0.515625, "learning_rate": 0.00017147261812264412, "loss": 0.5658, "step": 5211 }, { "epoch": 0.24695569770196635, "grad_norm": 0.6875, "learning_rate": 0.00017146220138479384, "loss": 1.052, "step": 5212 }, { "epoch": 0.24700307983890074, "grad_norm": 0.6640625, "learning_rate": 0.00017145178306198806, "loss": 0.1219, "step": 5213 }, { "epoch": 0.2470504619758351, "grad_norm": 0.6953125, "learning_rate": 0.00017144136315445783, "loss": 0.7649, "step": 5214 }, { "epoch": 0.2470978441127695, "grad_norm": 0.78125, "learning_rate": 0.00017143094166243423, "loss": 1.2744, "step": 5215 }, { "epoch": 0.24714522624970386, "grad_norm": 0.298828125, "learning_rate": 0.00017142051858614848, "loss": 0.0192, "step": 5216 }, { "epoch": 0.24719260838663823, "grad_norm": 0.60546875, "learning_rate": 0.00017141009392583167, "loss": 1.2459, "step": 5217 }, { "epoch": 0.24723999052357262, "grad_norm": 0.59375, "learning_rate": 0.00017139966768171504, "loss": 0.5501, "step": 5218 }, { "epoch": 0.247287372660507, "grad_norm": 0.359375, "learning_rate": 0.00017138923985402985, "loss": 0.2089, "step": 5219 }, { "epoch": 0.24733475479744135, "grad_norm": 0.59375, "learning_rate": 0.00017137881044300735, "loss": 0.794, "step": 5220 }, { "epoch": 0.24738213693437575, "grad_norm": 0.65234375, "learning_rate": 0.00017136837944887887, "loss": 0.9825, "step": 5221 }, { "epoch": 0.2474295190713101, "grad_norm": 0.6171875, "learning_rate": 0.00017135794687187574, "loss": 0.9264, "step": 5222 }, { "epoch": 0.2474769012082445, "grad_norm": 0.66015625, "learning_rate": 0.00017134751271222936, "loss": 1.1857, "step": 5223 }, { "epoch": 0.24752428334517887, "grad_norm": 0.396484375, "learning_rate": 0.00017133707697017115, "loss": 0.0222, "step": 5224 }, { "epoch": 0.24757166548211323, "grad_norm": 0.2392578125, "learning_rate": 0.00017132663964593254, "loss": 0.0491, "step": 5225 }, { "epoch": 0.24761904761904763, "grad_norm": 0.67578125, "learning_rate": 0.00017131620073974503, "loss": 1.0718, "step": 5226 }, { "epoch": 0.247666429755982, "grad_norm": 0.412109375, "learning_rate": 0.0001713057602518402, "loss": 0.4596, "step": 5227 }, { "epoch": 0.24771381189291636, "grad_norm": 0.609375, "learning_rate": 0.00017129531818244954, "loss": 0.0546, "step": 5228 }, { "epoch": 0.24776119402985075, "grad_norm": 0.75390625, "learning_rate": 0.00017128487453180462, "loss": 0.8905, "step": 5229 }, { "epoch": 0.24780857616678512, "grad_norm": 0.50390625, "learning_rate": 0.00017127442930013715, "loss": 0.538, "step": 5230 }, { "epoch": 0.2478559583037195, "grad_norm": 0.46875, "learning_rate": 0.00017126398248767875, "loss": 0.144, "step": 5231 }, { "epoch": 0.24790334044065387, "grad_norm": 0.7265625, "learning_rate": 0.0001712535340946611, "loss": 1.2883, "step": 5232 }, { "epoch": 0.24795072257758824, "grad_norm": 0.671875, "learning_rate": 0.000171243084121316, "loss": 1.1133, "step": 5233 }, { "epoch": 0.24799810471452263, "grad_norm": 0.21484375, "learning_rate": 0.00017123263256787517, "loss": 0.0404, "step": 5234 }, { "epoch": 0.248045486851457, "grad_norm": 0.189453125, "learning_rate": 0.0001712221794345704, "loss": 0.1245, "step": 5235 }, { "epoch": 0.24809286898839136, "grad_norm": 0.328125, "learning_rate": 0.00017121172472163356, "loss": 0.042, "step": 5236 }, { "epoch": 0.24814025112532576, "grad_norm": 0.875, "learning_rate": 0.00017120126842929656, "loss": 0.4715, "step": 5237 }, { "epoch": 0.24818763326226012, "grad_norm": 0.59765625, "learning_rate": 0.0001711908105577912, "loss": 1.1695, "step": 5238 }, { "epoch": 0.24823501539919451, "grad_norm": 0.83984375, "learning_rate": 0.00017118035110734954, "loss": 0.9905, "step": 5239 }, { "epoch": 0.24828239753612888, "grad_norm": 0.051025390625, "learning_rate": 0.0001711698900782035, "loss": 0.0019, "step": 5240 }, { "epoch": 0.24832977967306324, "grad_norm": 0.55859375, "learning_rate": 0.0001711594274705851, "loss": 0.9222, "step": 5241 }, { "epoch": 0.24837716180999764, "grad_norm": 0.703125, "learning_rate": 0.00017114896328472638, "loss": 0.2917, "step": 5242 }, { "epoch": 0.248424543946932, "grad_norm": 0.439453125, "learning_rate": 0.00017113849752085946, "loss": 0.5447, "step": 5243 }, { "epoch": 0.2484719260838664, "grad_norm": 0.58984375, "learning_rate": 0.0001711280301792164, "loss": 1.1684, "step": 5244 }, { "epoch": 0.24851930822080076, "grad_norm": 1.046875, "learning_rate": 0.00017111756126002945, "loss": 1.2022, "step": 5245 }, { "epoch": 0.24856669035773513, "grad_norm": 0.70703125, "learning_rate": 0.00017110709076353068, "loss": 0.9585, "step": 5246 }, { "epoch": 0.24861407249466952, "grad_norm": 0.73828125, "learning_rate": 0.00017109661868995244, "loss": 0.932, "step": 5247 }, { "epoch": 0.24866145463160388, "grad_norm": 0.70703125, "learning_rate": 0.0001710861450395269, "loss": 1.2129, "step": 5248 }, { "epoch": 0.24870883676853825, "grad_norm": 0.3046875, "learning_rate": 0.00017107566981248637, "loss": 0.0886, "step": 5249 }, { "epoch": 0.24875621890547264, "grad_norm": 0.2373046875, "learning_rate": 0.0001710651930090632, "loss": 0.0481, "step": 5250 }, { "epoch": 0.248803601042407, "grad_norm": 0.63671875, "learning_rate": 0.00017105471462948975, "loss": 0.1141, "step": 5251 }, { "epoch": 0.2488509831793414, "grad_norm": 0.76953125, "learning_rate": 0.00017104423467399838, "loss": 0.9654, "step": 5252 }, { "epoch": 0.24889836531627577, "grad_norm": 1.6328125, "learning_rate": 0.0001710337531428216, "loss": 0.7203, "step": 5253 }, { "epoch": 0.24894574745321013, "grad_norm": 0.69921875, "learning_rate": 0.00017102327003619183, "loss": 1.1123, "step": 5254 }, { "epoch": 0.24899312959014452, "grad_norm": 0.57421875, "learning_rate": 0.00017101278535434155, "loss": 0.857, "step": 5255 }, { "epoch": 0.2490405117270789, "grad_norm": 0.76171875, "learning_rate": 0.00017100229909750337, "loss": 1.2906, "step": 5256 }, { "epoch": 0.24908789386401325, "grad_norm": 0.515625, "learning_rate": 0.0001709918112659098, "loss": 0.7463, "step": 5257 }, { "epoch": 0.24913527600094765, "grad_norm": 0.0341796875, "learning_rate": 0.00017098132185979346, "loss": 0.0013, "step": 5258 }, { "epoch": 0.249182658137882, "grad_norm": 0.68359375, "learning_rate": 0.00017097083087938705, "loss": 1.2393, "step": 5259 }, { "epoch": 0.2492300402748164, "grad_norm": 0.38671875, "learning_rate": 0.00017096033832492317, "loss": 0.1059, "step": 5260 }, { "epoch": 0.24927742241175077, "grad_norm": 0.54296875, "learning_rate": 0.00017094984419663457, "loss": 1.0911, "step": 5261 }, { "epoch": 0.24932480454868514, "grad_norm": 0.279296875, "learning_rate": 0.00017093934849475405, "loss": 0.1458, "step": 5262 }, { "epoch": 0.24937218668561953, "grad_norm": 0.640625, "learning_rate": 0.00017092885121951427, "loss": 0.7728, "step": 5263 }, { "epoch": 0.2494195688225539, "grad_norm": 0.7578125, "learning_rate": 0.00017091835237114818, "loss": 1.1178, "step": 5264 }, { "epoch": 0.24946695095948826, "grad_norm": 0.58203125, "learning_rate": 0.00017090785194988852, "loss": 0.9226, "step": 5265 }, { "epoch": 0.24951433309642265, "grad_norm": 0.546875, "learning_rate": 0.00017089734995596824, "loss": 1.1267, "step": 5266 }, { "epoch": 0.24956171523335702, "grad_norm": 0.55859375, "learning_rate": 0.00017088684638962029, "loss": 0.9419, "step": 5267 }, { "epoch": 0.2496090973702914, "grad_norm": 0.478515625, "learning_rate": 0.00017087634125107756, "loss": 0.5949, "step": 5268 }, { "epoch": 0.24965647950722578, "grad_norm": 0.359375, "learning_rate": 0.0001708658345405731, "loss": 0.0654, "step": 5269 }, { "epoch": 0.24970386164416014, "grad_norm": 0.640625, "learning_rate": 0.0001708553262583399, "loss": 1.1039, "step": 5270 }, { "epoch": 0.24975124378109453, "grad_norm": 0.46484375, "learning_rate": 0.00017084481640461104, "loss": 0.5722, "step": 5271 }, { "epoch": 0.2497986259180289, "grad_norm": 0.5546875, "learning_rate": 0.0001708343049796196, "loss": 1.0248, "step": 5272 }, { "epoch": 0.2498460080549633, "grad_norm": 0.890625, "learning_rate": 0.00017082379198359875, "loss": 0.7751, "step": 5273 }, { "epoch": 0.24989339019189766, "grad_norm": 0.828125, "learning_rate": 0.00017081327741678162, "loss": 0.1469, "step": 5274 }, { "epoch": 0.24994077232883202, "grad_norm": 0.7265625, "learning_rate": 0.0001708027612794014, "loss": 0.9905, "step": 5275 }, { "epoch": 0.24998815446576642, "grad_norm": 0.73828125, "learning_rate": 0.00017079224357169137, "loss": 0.7281, "step": 5276 }, { "epoch": 0.2500355366027008, "grad_norm": 0.70703125, "learning_rate": 0.00017078172429388477, "loss": 0.9772, "step": 5277 }, { "epoch": 0.2500829187396352, "grad_norm": 0.34375, "learning_rate": 0.00017077120344621496, "loss": 0.0234, "step": 5278 }, { "epoch": 0.2501303008765695, "grad_norm": 0.03759765625, "learning_rate": 0.0001707606810289152, "loss": 0.001, "step": 5279 }, { "epoch": 0.2501776830135039, "grad_norm": 0.6796875, "learning_rate": 0.0001707501570422189, "loss": 1.0387, "step": 5280 }, { "epoch": 0.2502250651504383, "grad_norm": 0.59375, "learning_rate": 0.0001707396314863595, "loss": 0.0456, "step": 5281 }, { "epoch": 0.25027244728737263, "grad_norm": 0.5546875, "learning_rate": 0.0001707291043615704, "loss": 0.9844, "step": 5282 }, { "epoch": 0.250319829424307, "grad_norm": 0.33984375, "learning_rate": 0.00017071857566808513, "loss": 0.1872, "step": 5283 }, { "epoch": 0.2503672115612414, "grad_norm": 0.458984375, "learning_rate": 0.00017070804540613718, "loss": 0.1904, "step": 5284 }, { "epoch": 0.2504145936981758, "grad_norm": 0.671875, "learning_rate": 0.0001706975135759601, "loss": 1.0268, "step": 5285 }, { "epoch": 0.25046197583511015, "grad_norm": 0.58984375, "learning_rate": 0.0001706869801777874, "loss": 1.2751, "step": 5286 }, { "epoch": 0.25050935797204454, "grad_norm": 0.78515625, "learning_rate": 0.00017067644521185288, "loss": 1.1897, "step": 5287 }, { "epoch": 0.25055674010897894, "grad_norm": 0.6640625, "learning_rate": 0.00017066590867838999, "loss": 1.3925, "step": 5288 }, { "epoch": 0.2506041222459133, "grad_norm": 0.4921875, "learning_rate": 0.00017065537057763257, "loss": 0.1312, "step": 5289 }, { "epoch": 0.25065150438284767, "grad_norm": 0.1953125, "learning_rate": 0.00017064483090981428, "loss": 0.0314, "step": 5290 }, { "epoch": 0.25069888651978206, "grad_norm": 0.6640625, "learning_rate": 0.00017063428967516888, "loss": 1.44, "step": 5291 }, { "epoch": 0.2507462686567164, "grad_norm": 0.78125, "learning_rate": 0.0001706237468739302, "loss": 0.7273, "step": 5292 }, { "epoch": 0.2507936507936508, "grad_norm": 0.298828125, "learning_rate": 0.000170613202506332, "loss": 0.0644, "step": 5293 }, { "epoch": 0.2508410329305852, "grad_norm": 0.546875, "learning_rate": 0.00017060265657260822, "loss": 1.2131, "step": 5294 }, { "epoch": 0.2508884150675195, "grad_norm": 0.86328125, "learning_rate": 0.00017059210907299267, "loss": 0.408, "step": 5295 }, { "epoch": 0.2509357972044539, "grad_norm": 0.76171875, "learning_rate": 0.00017058156000771937, "loss": 1.3831, "step": 5296 }, { "epoch": 0.2509831793413883, "grad_norm": 0.267578125, "learning_rate": 0.00017057100937702222, "loss": 0.134, "step": 5297 }, { "epoch": 0.25103056147832264, "grad_norm": 0.61328125, "learning_rate": 0.00017056045718113528, "loss": 1.0522, "step": 5298 }, { "epoch": 0.25107794361525704, "grad_norm": 0.69921875, "learning_rate": 0.00017054990342029255, "loss": 1.4229, "step": 5299 }, { "epoch": 0.25112532575219143, "grad_norm": 0.006072998046875, "learning_rate": 0.0001705393480947281, "loss": 0.0003, "step": 5300 }, { "epoch": 0.2511727078891258, "grad_norm": 0.18359375, "learning_rate": 0.00017052879120467605, "loss": 0.0204, "step": 5301 }, { "epoch": 0.25122009002606016, "grad_norm": 0.5859375, "learning_rate": 0.00017051823275037053, "loss": 1.1679, "step": 5302 }, { "epoch": 0.25126747216299455, "grad_norm": 0.84375, "learning_rate": 0.00017050767273204574, "loss": 0.9325, "step": 5303 }, { "epoch": 0.25131485429992895, "grad_norm": 0.447265625, "learning_rate": 0.00017049711114993588, "loss": 0.5116, "step": 5304 }, { "epoch": 0.2513622364368633, "grad_norm": 0.06689453125, "learning_rate": 0.00017048654800427512, "loss": 0.0021, "step": 5305 }, { "epoch": 0.2514096185737977, "grad_norm": 0.11376953125, "learning_rate": 0.00017047598329529787, "loss": 0.0169, "step": 5306 }, { "epoch": 0.25145700071073207, "grad_norm": 0.07177734375, "learning_rate": 0.00017046541702323836, "loss": 0.0057, "step": 5307 }, { "epoch": 0.2515043828476664, "grad_norm": 0.640625, "learning_rate": 0.00017045484918833093, "loss": 1.2136, "step": 5308 }, { "epoch": 0.2515517649846008, "grad_norm": 0.47265625, "learning_rate": 0.00017044427979081002, "loss": 0.206, "step": 5309 }, { "epoch": 0.2515991471215352, "grad_norm": 0.78125, "learning_rate": 0.00017043370883091, "loss": 0.695, "step": 5310 }, { "epoch": 0.25164652925846953, "grad_norm": 0.197265625, "learning_rate": 0.00017042313630886535, "loss": 0.1355, "step": 5311 }, { "epoch": 0.2516939113954039, "grad_norm": 0.56640625, "learning_rate": 0.00017041256222491056, "loss": 0.4462, "step": 5312 }, { "epoch": 0.2517412935323383, "grad_norm": 0.234375, "learning_rate": 0.00017040198657928011, "loss": 0.1635, "step": 5313 }, { "epoch": 0.2517886756692727, "grad_norm": 0.75, "learning_rate": 0.00017039140937220862, "loss": 1.4779, "step": 5314 }, { "epoch": 0.25183605780620705, "grad_norm": 0.640625, "learning_rate": 0.0001703808306039306, "loss": 1.2994, "step": 5315 }, { "epoch": 0.25188343994314144, "grad_norm": 0.59375, "learning_rate": 0.0001703702502746808, "loss": 1.1394, "step": 5316 }, { "epoch": 0.25193082208007583, "grad_norm": 0.78125, "learning_rate": 0.00017035966838469376, "loss": 0.9651, "step": 5317 }, { "epoch": 0.25197820421701017, "grad_norm": 0.79296875, "learning_rate": 0.0001703490849342042, "loss": 1.0763, "step": 5318 }, { "epoch": 0.25202558635394456, "grad_norm": 0.64453125, "learning_rate": 0.00017033849992344687, "loss": 0.9423, "step": 5319 }, { "epoch": 0.25207296849087896, "grad_norm": 0.65234375, "learning_rate": 0.00017032791335265657, "loss": 0.0622, "step": 5320 }, { "epoch": 0.2521203506278133, "grad_norm": 0.91796875, "learning_rate": 0.00017031732522206804, "loss": 0.1328, "step": 5321 }, { "epoch": 0.2521677327647477, "grad_norm": 0.5546875, "learning_rate": 0.00017030673553191611, "loss": 0.5537, "step": 5322 }, { "epoch": 0.2522151149016821, "grad_norm": 0.56640625, "learning_rate": 0.0001702961442824357, "loss": 1.0053, "step": 5323 }, { "epoch": 0.2522624970386164, "grad_norm": 0.26171875, "learning_rate": 0.00017028555147386172, "loss": 0.1488, "step": 5324 }, { "epoch": 0.2523098791755508, "grad_norm": 0.65234375, "learning_rate": 0.000170274957106429, "loss": 0.6858, "step": 5325 }, { "epoch": 0.2523572613124852, "grad_norm": 0.68359375, "learning_rate": 0.00017026436118037266, "loss": 1.3075, "step": 5326 }, { "epoch": 0.25240464344941954, "grad_norm": 0.80078125, "learning_rate": 0.00017025376369592758, "loss": 1.209, "step": 5327 }, { "epoch": 0.25245202558635393, "grad_norm": 0.50390625, "learning_rate": 0.00017024316465332886, "loss": 0.5749, "step": 5328 }, { "epoch": 0.2524994077232883, "grad_norm": 0.2373046875, "learning_rate": 0.00017023256405281157, "loss": 0.128, "step": 5329 }, { "epoch": 0.2525467898602227, "grad_norm": 0.26953125, "learning_rate": 0.0001702219618946108, "loss": 0.0648, "step": 5330 }, { "epoch": 0.25259417199715706, "grad_norm": 0.3046875, "learning_rate": 0.00017021135817896178, "loss": 0.0228, "step": 5331 }, { "epoch": 0.25264155413409145, "grad_norm": 0.58203125, "learning_rate": 0.00017020075290609957, "loss": 1.1479, "step": 5332 }, { "epoch": 0.25268893627102584, "grad_norm": 0.74609375, "learning_rate": 0.00017019014607625943, "loss": 1.4094, "step": 5333 }, { "epoch": 0.2527363184079602, "grad_norm": 0.53515625, "learning_rate": 0.00017017953768967662, "loss": 1.2996, "step": 5334 }, { "epoch": 0.2527837005448946, "grad_norm": 0.69140625, "learning_rate": 0.00017016892774658642, "loss": 0.0563, "step": 5335 }, { "epoch": 0.25283108268182897, "grad_norm": 0.1943359375, "learning_rate": 0.00017015831624722413, "loss": 0.0816, "step": 5336 }, { "epoch": 0.2528784648187633, "grad_norm": 0.2412109375, "learning_rate": 0.00017014770319182513, "loss": 0.0362, "step": 5337 }, { "epoch": 0.2529258469556977, "grad_norm": 0.71875, "learning_rate": 0.0001701370885806248, "loss": 0.7215, "step": 5338 }, { "epoch": 0.2529732290926321, "grad_norm": 0.435546875, "learning_rate": 0.00017012647241385856, "loss": 0.0291, "step": 5339 }, { "epoch": 0.25302061122956643, "grad_norm": 0.58984375, "learning_rate": 0.00017011585469176184, "loss": 1.0139, "step": 5340 }, { "epoch": 0.2530679933665008, "grad_norm": 0.7421875, "learning_rate": 0.00017010523541457015, "loss": 0.8763, "step": 5341 }, { "epoch": 0.2531153755034352, "grad_norm": 0.6484375, "learning_rate": 0.000170094614582519, "loss": 1.1539, "step": 5342 }, { "epoch": 0.2531627576403696, "grad_norm": 0.90625, "learning_rate": 0.00017008399219584398, "loss": 0.7403, "step": 5343 }, { "epoch": 0.25321013977730394, "grad_norm": 0.66015625, "learning_rate": 0.00017007336825478064, "loss": 0.9521, "step": 5344 }, { "epoch": 0.25325752191423834, "grad_norm": 0.26953125, "learning_rate": 0.00017006274275956461, "loss": 0.0444, "step": 5345 }, { "epoch": 0.25330490405117273, "grad_norm": 0.66015625, "learning_rate": 0.0001700521157104316, "loss": 0.6661, "step": 5346 }, { "epoch": 0.25335228618810707, "grad_norm": 0.60546875, "learning_rate": 0.00017004148710761732, "loss": 1.1629, "step": 5347 }, { "epoch": 0.25339966832504146, "grad_norm": 0.640625, "learning_rate": 0.00017003085695135742, "loss": 1.4518, "step": 5348 }, { "epoch": 0.25344705046197585, "grad_norm": 0.71875, "learning_rate": 0.0001700202252418877, "loss": 1.1735, "step": 5349 }, { "epoch": 0.2534944325989102, "grad_norm": 0.69140625, "learning_rate": 0.000170009591979444, "loss": 1.2322, "step": 5350 }, { "epoch": 0.2535418147358446, "grad_norm": 0.50390625, "learning_rate": 0.00016999895716426208, "loss": 0.4126, "step": 5351 }, { "epoch": 0.253589196872779, "grad_norm": 0.51171875, "learning_rate": 0.00016998832079657787, "loss": 0.0597, "step": 5352 }, { "epoch": 0.2536365790097133, "grad_norm": 1.125, "learning_rate": 0.00016997768287662724, "loss": 0.3836, "step": 5353 }, { "epoch": 0.2536839611466477, "grad_norm": 0.7578125, "learning_rate": 0.00016996704340464612, "loss": 1.1632, "step": 5354 }, { "epoch": 0.2537313432835821, "grad_norm": 0.55859375, "learning_rate": 0.0001699564023808706, "loss": 1.2416, "step": 5355 }, { "epoch": 0.25377872542051644, "grad_norm": 0.6328125, "learning_rate": 0.0001699457598055365, "loss": 0.913, "step": 5356 }, { "epoch": 0.25382610755745083, "grad_norm": 0.65625, "learning_rate": 0.00016993511567887996, "loss": 1.0357, "step": 5357 }, { "epoch": 0.2538734896943852, "grad_norm": 0.171875, "learning_rate": 0.00016992447000113706, "loss": 0.0206, "step": 5358 }, { "epoch": 0.2539208718313196, "grad_norm": 0.71875, "learning_rate": 0.00016991382277254391, "loss": 0.8985, "step": 5359 }, { "epoch": 0.25396825396825395, "grad_norm": 0.8046875, "learning_rate": 0.0001699031739933366, "loss": 1.1099, "step": 5360 }, { "epoch": 0.25401563610518835, "grad_norm": 0.8046875, "learning_rate": 0.00016989252366375138, "loss": 0.32, "step": 5361 }, { "epoch": 0.25406301824212274, "grad_norm": 1.3125, "learning_rate": 0.00016988187178402443, "loss": 0.3791, "step": 5362 }, { "epoch": 0.2541104003790571, "grad_norm": 0.72265625, "learning_rate": 0.00016987121835439198, "loss": 1.2845, "step": 5363 }, { "epoch": 0.25415778251599147, "grad_norm": 0.62109375, "learning_rate": 0.00016986056337509034, "loss": 0.893, "step": 5364 }, { "epoch": 0.25420516465292586, "grad_norm": 0.6171875, "learning_rate": 0.00016984990684635584, "loss": 0.888, "step": 5365 }, { "epoch": 0.2542525467898602, "grad_norm": 0.63671875, "learning_rate": 0.00016983924876842478, "loss": 1.352, "step": 5366 }, { "epoch": 0.2542999289267946, "grad_norm": 0.53515625, "learning_rate": 0.00016982858914153356, "loss": 0.3745, "step": 5367 }, { "epoch": 0.254347311063729, "grad_norm": 0.6484375, "learning_rate": 0.00016981792796591866, "loss": 1.3169, "step": 5368 }, { "epoch": 0.2543946932006633, "grad_norm": 0.58203125, "learning_rate": 0.00016980726524181642, "loss": 1.0266, "step": 5369 }, { "epoch": 0.2544420753375977, "grad_norm": 0.64453125, "learning_rate": 0.00016979660096946343, "loss": 0.8766, "step": 5370 }, { "epoch": 0.2544894574745321, "grad_norm": 0.640625, "learning_rate": 0.0001697859351490962, "loss": 1.0002, "step": 5371 }, { "epoch": 0.2545368396114665, "grad_norm": 0.337890625, "learning_rate": 0.00016977526778095121, "loss": 0.0449, "step": 5372 }, { "epoch": 0.25458422174840084, "grad_norm": 0.703125, "learning_rate": 0.00016976459886526514, "loss": 1.2232, "step": 5373 }, { "epoch": 0.25463160388533523, "grad_norm": 0.5078125, "learning_rate": 0.00016975392840227455, "loss": 0.6692, "step": 5374 }, { "epoch": 0.2546789860222696, "grad_norm": 1.1640625, "learning_rate": 0.00016974325639221616, "loss": 1.3018, "step": 5375 }, { "epoch": 0.25472636815920396, "grad_norm": 1.640625, "learning_rate": 0.00016973258283532657, "loss": 0.1251, "step": 5376 }, { "epoch": 0.25477375029613836, "grad_norm": 0.69921875, "learning_rate": 0.0001697219077318426, "loss": 0.9578, "step": 5377 }, { "epoch": 0.25482113243307275, "grad_norm": 0.515625, "learning_rate": 0.00016971123108200102, "loss": 0.7342, "step": 5378 }, { "epoch": 0.2548685145700071, "grad_norm": 0.58203125, "learning_rate": 0.00016970055288603858, "loss": 1.1069, "step": 5379 }, { "epoch": 0.2549158967069415, "grad_norm": 0.380859375, "learning_rate": 0.00016968987314419203, "loss": 0.0442, "step": 5380 }, { "epoch": 0.2549632788438759, "grad_norm": 0.61328125, "learning_rate": 0.00016967919185669842, "loss": 1.1472, "step": 5381 }, { "epoch": 0.2550106609808102, "grad_norm": 0.80859375, "learning_rate": 0.0001696685090237945, "loss": 0.598, "step": 5382 }, { "epoch": 0.2550580431177446, "grad_norm": 0.70703125, "learning_rate": 0.00016965782464571728, "loss": 0.3313, "step": 5383 }, { "epoch": 0.255105425254679, "grad_norm": 0.2490234375, "learning_rate": 0.0001696471387227037, "loss": 0.1466, "step": 5384 }, { "epoch": 0.25515280739161333, "grad_norm": 0.6484375, "learning_rate": 0.00016963645125499079, "loss": 0.8998, "step": 5385 }, { "epoch": 0.2552001895285477, "grad_norm": 0.73046875, "learning_rate": 0.00016962576224281554, "loss": 0.1095, "step": 5386 }, { "epoch": 0.2552475716654821, "grad_norm": 0.3125, "learning_rate": 0.00016961507168641507, "loss": 0.0474, "step": 5387 }, { "epoch": 0.2552949538024165, "grad_norm": 0.6796875, "learning_rate": 0.00016960437958602644, "loss": 0.7683, "step": 5388 }, { "epoch": 0.25534233593935085, "grad_norm": 0.640625, "learning_rate": 0.00016959368594188681, "loss": 1.4062, "step": 5389 }, { "epoch": 0.25538971807628524, "grad_norm": 0.68359375, "learning_rate": 0.00016958299075423336, "loss": 0.9186, "step": 5390 }, { "epoch": 0.25543710021321964, "grad_norm": 0.26953125, "learning_rate": 0.00016957229402330329, "loss": 0.0289, "step": 5391 }, { "epoch": 0.255484482350154, "grad_norm": 0.1298828125, "learning_rate": 0.0001695615957493338, "loss": 0.0273, "step": 5392 }, { "epoch": 0.25553186448708837, "grad_norm": 0.515625, "learning_rate": 0.00016955089593256227, "loss": 0.5225, "step": 5393 }, { "epoch": 0.25557924662402276, "grad_norm": 0.64453125, "learning_rate": 0.00016954019457322595, "loss": 1.5962, "step": 5394 }, { "epoch": 0.2556266287609571, "grad_norm": 0.5234375, "learning_rate": 0.00016952949167156216, "loss": 0.8025, "step": 5395 }, { "epoch": 0.2556740108978915, "grad_norm": 0.73828125, "learning_rate": 0.00016951878722780832, "loss": 0.8912, "step": 5396 }, { "epoch": 0.2557213930348259, "grad_norm": 0.46875, "learning_rate": 0.0001695080812422018, "loss": 0.4736, "step": 5397 }, { "epoch": 0.2557687751717602, "grad_norm": 1.2421875, "learning_rate": 0.00016949737371498008, "loss": 0.803, "step": 5398 }, { "epoch": 0.2558161573086946, "grad_norm": 0.640625, "learning_rate": 0.00016948666464638068, "loss": 1.0973, "step": 5399 }, { "epoch": 0.255863539445629, "grad_norm": 0.451171875, "learning_rate": 0.00016947595403664101, "loss": 0.0214, "step": 5400 }, { "epoch": 0.2559109215825634, "grad_norm": 1.0390625, "learning_rate": 0.00016946524188599872, "loss": 1.081, "step": 5401 }, { "epoch": 0.25595830371949774, "grad_norm": 1.0546875, "learning_rate": 0.00016945452819469136, "loss": 0.5477, "step": 5402 }, { "epoch": 0.25600568585643213, "grad_norm": 0.3203125, "learning_rate": 0.00016944381296295653, "loss": 0.1643, "step": 5403 }, { "epoch": 0.2560530679933665, "grad_norm": 0.443359375, "learning_rate": 0.00016943309619103192, "loss": 0.2134, "step": 5404 }, { "epoch": 0.25610045013030086, "grad_norm": 1.046875, "learning_rate": 0.00016942237787915515, "loss": 0.1236, "step": 5405 }, { "epoch": 0.25614783226723525, "grad_norm": 0.4609375, "learning_rate": 0.00016941165802756403, "loss": 0.0374, "step": 5406 }, { "epoch": 0.25619521440416965, "grad_norm": 0.5859375, "learning_rate": 0.00016940093663649622, "loss": 0.8897, "step": 5407 }, { "epoch": 0.256242596541104, "grad_norm": 0.53125, "learning_rate": 0.0001693902137061896, "loss": 1.0275, "step": 5408 }, { "epoch": 0.2562899786780384, "grad_norm": 0.337890625, "learning_rate": 0.00016937948923688193, "loss": 0.089, "step": 5409 }, { "epoch": 0.25633736081497277, "grad_norm": 0.8359375, "learning_rate": 0.0001693687632288111, "loss": 1.2504, "step": 5410 }, { "epoch": 0.2563847429519071, "grad_norm": 0.1962890625, "learning_rate": 0.000169358035682215, "loss": 0.1348, "step": 5411 }, { "epoch": 0.2564321250888415, "grad_norm": 0.2412109375, "learning_rate": 0.00016934730659733154, "loss": 0.1577, "step": 5412 }, { "epoch": 0.2564795072257759, "grad_norm": 0.65234375, "learning_rate": 0.00016933657597439865, "loss": 1.201, "step": 5413 }, { "epoch": 0.25652688936271023, "grad_norm": 0.5390625, "learning_rate": 0.00016932584381365438, "loss": 0.5596, "step": 5414 }, { "epoch": 0.2565742714996446, "grad_norm": 0.48828125, "learning_rate": 0.00016931511011533673, "loss": 0.4878, "step": 5415 }, { "epoch": 0.256621653636579, "grad_norm": 0.5546875, "learning_rate": 0.00016930437487968378, "loss": 0.0254, "step": 5416 }, { "epoch": 0.2566690357735134, "grad_norm": 0.71484375, "learning_rate": 0.0001692936381069336, "loss": 0.7329, "step": 5417 }, { "epoch": 0.25671641791044775, "grad_norm": 0.36328125, "learning_rate": 0.00016928289979732436, "loss": 0.0926, "step": 5418 }, { "epoch": 0.25676380004738214, "grad_norm": 0.671875, "learning_rate": 0.0001692721599510942, "loss": 0.4945, "step": 5419 }, { "epoch": 0.25681118218431653, "grad_norm": 0.42578125, "learning_rate": 0.0001692614185684813, "loss": 0.0455, "step": 5420 }, { "epoch": 0.25685856432125087, "grad_norm": 0.70703125, "learning_rate": 0.00016925067564972393, "loss": 1.5666, "step": 5421 }, { "epoch": 0.25690594645818526, "grad_norm": 0.1953125, "learning_rate": 0.0001692399311950603, "loss": 0.1474, "step": 5422 }, { "epoch": 0.25695332859511966, "grad_norm": 0.63671875, "learning_rate": 0.0001692291852047288, "loss": 1.1678, "step": 5423 }, { "epoch": 0.257000710732054, "grad_norm": 0.6328125, "learning_rate": 0.00016921843767896765, "loss": 0.8908, "step": 5424 }, { "epoch": 0.2570480928689884, "grad_norm": 0.58984375, "learning_rate": 0.00016920768861801533, "loss": 0.6911, "step": 5425 }, { "epoch": 0.2570954750059228, "grad_norm": 0.216796875, "learning_rate": 0.0001691969380221102, "loss": 0.1496, "step": 5426 }, { "epoch": 0.2571428571428571, "grad_norm": 0.7890625, "learning_rate": 0.00016918618589149064, "loss": 0.8787, "step": 5427 }, { "epoch": 0.2571902392797915, "grad_norm": 0.671875, "learning_rate": 0.0001691754322263952, "loss": 1.2739, "step": 5428 }, { "epoch": 0.2572376214167259, "grad_norm": 0.6796875, "learning_rate": 0.00016916467702706236, "loss": 0.8478, "step": 5429 }, { "epoch": 0.2572850035536603, "grad_norm": 0.55078125, "learning_rate": 0.00016915392029373064, "loss": 0.8881, "step": 5430 }, { "epoch": 0.25733238569059463, "grad_norm": 0.703125, "learning_rate": 0.00016914316202663862, "loss": 1.5181, "step": 5431 }, { "epoch": 0.257379767827529, "grad_norm": 0.578125, "learning_rate": 0.00016913240222602493, "loss": 0.5271, "step": 5432 }, { "epoch": 0.2574271499644634, "grad_norm": 0.5, "learning_rate": 0.0001691216408921282, "loss": 0.6417, "step": 5433 }, { "epoch": 0.25747453210139776, "grad_norm": 0.314453125, "learning_rate": 0.0001691108780251871, "loss": 0.1801, "step": 5434 }, { "epoch": 0.25752191423833215, "grad_norm": 0.5859375, "learning_rate": 0.00016910011362544035, "loss": 0.7382, "step": 5435 }, { "epoch": 0.25756929637526654, "grad_norm": 0.59765625, "learning_rate": 0.00016908934769312666, "loss": 1.7052, "step": 5436 }, { "epoch": 0.2576166785122009, "grad_norm": 0.921875, "learning_rate": 0.00016907858022848483, "loss": 1.3022, "step": 5437 }, { "epoch": 0.2576640606491353, "grad_norm": 0.56640625, "learning_rate": 0.00016906781123175366, "loss": 0.5217, "step": 5438 }, { "epoch": 0.25771144278606967, "grad_norm": 0.7421875, "learning_rate": 0.000169057040703172, "loss": 0.6668, "step": 5439 }, { "epoch": 0.257758824923004, "grad_norm": 0.6484375, "learning_rate": 0.00016904626864297875, "loss": 1.2352, "step": 5440 }, { "epoch": 0.2578062070599384, "grad_norm": 0.8359375, "learning_rate": 0.00016903549505141284, "loss": 0.9384, "step": 5441 }, { "epoch": 0.2578535891968728, "grad_norm": 0.62890625, "learning_rate": 0.00016902471992871315, "loss": 1.3833, "step": 5442 }, { "epoch": 0.2579009713338071, "grad_norm": 0.55078125, "learning_rate": 0.0001690139432751187, "loss": 0.7121, "step": 5443 }, { "epoch": 0.2579483534707415, "grad_norm": 0.625, "learning_rate": 0.00016900316509086847, "loss": 1.2244, "step": 5444 }, { "epoch": 0.2579957356076759, "grad_norm": 0.10791015625, "learning_rate": 0.00016899238537620154, "loss": 0.0141, "step": 5445 }, { "epoch": 0.2580431177446103, "grad_norm": 0.73828125, "learning_rate": 0.00016898160413135701, "loss": 1.0926, "step": 5446 }, { "epoch": 0.25809049988154464, "grad_norm": 0.6328125, "learning_rate": 0.00016897082135657399, "loss": 1.0492, "step": 5447 }, { "epoch": 0.25813788201847904, "grad_norm": 0.38671875, "learning_rate": 0.00016896003705209157, "loss": 0.1845, "step": 5448 }, { "epoch": 0.25818526415541343, "grad_norm": 0.62890625, "learning_rate": 0.00016894925121814906, "loss": 1.3174, "step": 5449 }, { "epoch": 0.25823264629234777, "grad_norm": 0.296875, "learning_rate": 0.00016893846385498552, "loss": 0.0335, "step": 5450 }, { "epoch": 0.25828002842928216, "grad_norm": 0.67578125, "learning_rate": 0.00016892767496284034, "loss": 1.2171, "step": 5451 }, { "epoch": 0.25832741056621655, "grad_norm": 0.70703125, "learning_rate": 0.00016891688454195273, "loss": 1.2409, "step": 5452 }, { "epoch": 0.2583747927031509, "grad_norm": 0.48046875, "learning_rate": 0.000168906092592562, "loss": 0.5802, "step": 5453 }, { "epoch": 0.2584221748400853, "grad_norm": 0.63671875, "learning_rate": 0.00016889529911490753, "loss": 0.9941, "step": 5454 }, { "epoch": 0.2584695569770197, "grad_norm": 0.002838134765625, "learning_rate": 0.00016888450410922876, "loss": 0.0003, "step": 5455 }, { "epoch": 0.258516939113954, "grad_norm": 0.7890625, "learning_rate": 0.000168873707575765, "loss": 0.2259, "step": 5456 }, { "epoch": 0.2585643212508884, "grad_norm": 0.6015625, "learning_rate": 0.00016886290951475584, "loss": 0.9563, "step": 5457 }, { "epoch": 0.2586117033878228, "grad_norm": 0.51171875, "learning_rate": 0.00016885210992644066, "loss": 0.0681, "step": 5458 }, { "epoch": 0.2586590855247572, "grad_norm": 0.7734375, "learning_rate": 0.000168841308811059, "loss": 1.1659, "step": 5459 }, { "epoch": 0.25870646766169153, "grad_norm": 0.640625, "learning_rate": 0.00016883050616885043, "loss": 1.6042, "step": 5460 }, { "epoch": 0.2587538497986259, "grad_norm": 0.87109375, "learning_rate": 0.0001688197020000546, "loss": 0.6201, "step": 5461 }, { "epoch": 0.2588012319355603, "grad_norm": 0.6328125, "learning_rate": 0.00016880889630491104, "loss": 1.339, "step": 5462 }, { "epoch": 0.25884861407249465, "grad_norm": 0.48046875, "learning_rate": 0.00016879808908365945, "loss": 1.2221, "step": 5463 }, { "epoch": 0.25889599620942905, "grad_norm": 0.72265625, "learning_rate": 0.00016878728033653957, "loss": 1.1975, "step": 5464 }, { "epoch": 0.25894337834636344, "grad_norm": 0.66015625, "learning_rate": 0.00016877647006379104, "loss": 0.8915, "step": 5465 }, { "epoch": 0.2589907604832978, "grad_norm": 0.498046875, "learning_rate": 0.00016876565826565366, "loss": 0.2214, "step": 5466 }, { "epoch": 0.25903814262023217, "grad_norm": 0.53515625, "learning_rate": 0.00016875484494236726, "loss": 0.0409, "step": 5467 }, { "epoch": 0.25908552475716656, "grad_norm": 0.74609375, "learning_rate": 0.00016874403009417162, "loss": 1.0784, "step": 5468 }, { "epoch": 0.2591329068941009, "grad_norm": 0.1640625, "learning_rate": 0.0001687332137213066, "loss": 0.0318, "step": 5469 }, { "epoch": 0.2591802890310353, "grad_norm": 0.6640625, "learning_rate": 0.0001687223958240121, "loss": 1.1415, "step": 5470 }, { "epoch": 0.2592276711679697, "grad_norm": 0.60546875, "learning_rate": 0.00016871157640252807, "loss": 0.7716, "step": 5471 }, { "epoch": 0.259275053304904, "grad_norm": 0.404296875, "learning_rate": 0.00016870075545709449, "loss": 0.0613, "step": 5472 }, { "epoch": 0.2593224354418384, "grad_norm": 0.462890625, "learning_rate": 0.0001686899329879513, "loss": 0.0502, "step": 5473 }, { "epoch": 0.2593698175787728, "grad_norm": 0.283203125, "learning_rate": 0.00016867910899533858, "loss": 0.0127, "step": 5474 }, { "epoch": 0.2594171997157072, "grad_norm": 0.58203125, "learning_rate": 0.00016866828347949638, "loss": 0.7669, "step": 5475 }, { "epoch": 0.25946458185264154, "grad_norm": 0.6875, "learning_rate": 0.0001686574564406648, "loss": 0.7168, "step": 5476 }, { "epoch": 0.25951196398957593, "grad_norm": 0.6640625, "learning_rate": 0.00016864662787908393, "loss": 0.9622, "step": 5477 }, { "epoch": 0.2595593461265103, "grad_norm": 0.7578125, "learning_rate": 0.00016863579779499398, "loss": 1.2405, "step": 5478 }, { "epoch": 0.25960672826344466, "grad_norm": 0.71484375, "learning_rate": 0.00016862496618863518, "loss": 0.9796, "step": 5479 }, { "epoch": 0.25965411040037906, "grad_norm": 0.546875, "learning_rate": 0.0001686141330602477, "loss": 1.1535, "step": 5480 }, { "epoch": 0.25970149253731345, "grad_norm": 0.498046875, "learning_rate": 0.00016860329841007182, "loss": 1.0441, "step": 5481 }, { "epoch": 0.2597488746742478, "grad_norm": 0.030029296875, "learning_rate": 0.0001685924622383479, "loss": 0.0022, "step": 5482 }, { "epoch": 0.2597962568111822, "grad_norm": 0.7109375, "learning_rate": 0.00016858162454531618, "loss": 1.1805, "step": 5483 }, { "epoch": 0.2598436389481166, "grad_norm": 0.103515625, "learning_rate": 0.0001685707853312171, "loss": 0.0136, "step": 5484 }, { "epoch": 0.2598910210850509, "grad_norm": 0.6328125, "learning_rate": 0.00016855994459629106, "loss": 0.9291, "step": 5485 }, { "epoch": 0.2599384032219853, "grad_norm": 0.80078125, "learning_rate": 0.00016854910234077842, "loss": 1.4879, "step": 5486 }, { "epoch": 0.2599857853589197, "grad_norm": 0.8828125, "learning_rate": 0.00016853825856491972, "loss": 1.1442, "step": 5487 }, { "epoch": 0.2600331674958541, "grad_norm": 0.2119140625, "learning_rate": 0.0001685274132689555, "loss": 0.1475, "step": 5488 }, { "epoch": 0.2600805496327884, "grad_norm": 0.84765625, "learning_rate": 0.00016851656645312622, "loss": 1.3088, "step": 5489 }, { "epoch": 0.2601279317697228, "grad_norm": 0.66015625, "learning_rate": 0.00016850571811767245, "loss": 0.2319, "step": 5490 }, { "epoch": 0.2601753139066572, "grad_norm": 0.6640625, "learning_rate": 0.00016849486826283486, "loss": 1.084, "step": 5491 }, { "epoch": 0.26022269604359155, "grad_norm": 0.69921875, "learning_rate": 0.000168484016888854, "loss": 1.3469, "step": 5492 }, { "epoch": 0.26027007818052594, "grad_norm": 0.5859375, "learning_rate": 0.00016847316399597065, "loss": 1.4763, "step": 5493 }, { "epoch": 0.26031746031746034, "grad_norm": 0.431640625, "learning_rate": 0.00016846230958442542, "loss": 0.6448, "step": 5494 }, { "epoch": 0.2603648424543947, "grad_norm": 0.75, "learning_rate": 0.0001684514536544591, "loss": 1.0415, "step": 5495 }, { "epoch": 0.26041222459132907, "grad_norm": 0.85546875, "learning_rate": 0.00016844059620631244, "loss": 1.5532, "step": 5496 }, { "epoch": 0.26045960672826346, "grad_norm": 0.74609375, "learning_rate": 0.00016842973724022625, "loss": 0.0721, "step": 5497 }, { "epoch": 0.2605069888651978, "grad_norm": 0.70703125, "learning_rate": 0.0001684188767564414, "loss": 0.976, "step": 5498 }, { "epoch": 0.2605543710021322, "grad_norm": 0.058837890625, "learning_rate": 0.0001684080147551987, "loss": 0.0077, "step": 5499 }, { "epoch": 0.2606017531390666, "grad_norm": 0.69921875, "learning_rate": 0.00016839715123673915, "loss": 1.5189, "step": 5500 }, { "epoch": 0.2606491352760009, "grad_norm": 0.66015625, "learning_rate": 0.00016838628620130362, "loss": 0.9867, "step": 5501 }, { "epoch": 0.2606965174129353, "grad_norm": 0.349609375, "learning_rate": 0.00016837541964913306, "loss": 0.4231, "step": 5502 }, { "epoch": 0.2607438995498697, "grad_norm": 1.09375, "learning_rate": 0.00016836455158046856, "loss": 1.5931, "step": 5503 }, { "epoch": 0.2607912816868041, "grad_norm": 0.5546875, "learning_rate": 0.00016835368199555112, "loss": 1.2388, "step": 5504 }, { "epoch": 0.26083866382373844, "grad_norm": 0.29296875, "learning_rate": 0.00016834281089462186, "loss": 0.0145, "step": 5505 }, { "epoch": 0.26088604596067283, "grad_norm": 0.74609375, "learning_rate": 0.00016833193827792177, "loss": 0.621, "step": 5506 }, { "epoch": 0.2609334280976072, "grad_norm": 0.51171875, "learning_rate": 0.00016832106414569213, "loss": 0.7694, "step": 5507 }, { "epoch": 0.26098081023454156, "grad_norm": 0.69921875, "learning_rate": 0.00016831018849817407, "loss": 0.7878, "step": 5508 }, { "epoch": 0.26102819237147595, "grad_norm": 1.6015625, "learning_rate": 0.00016829931133560875, "loss": 0.0324, "step": 5509 }, { "epoch": 0.26107557450841035, "grad_norm": 0.16015625, "learning_rate": 0.00016828843265823747, "loss": 0.0147, "step": 5510 }, { "epoch": 0.2611229566453447, "grad_norm": 0.671875, "learning_rate": 0.00016827755246630148, "loss": 0.7277, "step": 5511 }, { "epoch": 0.2611703387822791, "grad_norm": 0.5625, "learning_rate": 0.00016826667076004213, "loss": 0.5882, "step": 5512 }, { "epoch": 0.26121772091921347, "grad_norm": 0.84765625, "learning_rate": 0.0001682557875397007, "loss": 0.8579, "step": 5513 }, { "epoch": 0.2612651030561478, "grad_norm": 0.470703125, "learning_rate": 0.00016824490280551864, "loss": 0.2415, "step": 5514 }, { "epoch": 0.2613124851930822, "grad_norm": 0.640625, "learning_rate": 0.0001682340165577373, "loss": 0.7873, "step": 5515 }, { "epoch": 0.2613598673300166, "grad_norm": 0.67578125, "learning_rate": 0.00016822312879659817, "loss": 1.012, "step": 5516 }, { "epoch": 0.261407249466951, "grad_norm": 0.50390625, "learning_rate": 0.0001682122395223427, "loss": 0.0454, "step": 5517 }, { "epoch": 0.2614546316038853, "grad_norm": 0.8046875, "learning_rate": 0.0001682013487352124, "loss": 0.176, "step": 5518 }, { "epoch": 0.2615020137408197, "grad_norm": 0.96875, "learning_rate": 0.00016819045643544885, "loss": 0.254, "step": 5519 }, { "epoch": 0.2615493958777541, "grad_norm": 0.6171875, "learning_rate": 0.00016817956262329362, "loss": 0.8728, "step": 5520 }, { "epoch": 0.26159677801468845, "grad_norm": 0.94921875, "learning_rate": 0.0001681686672989883, "loss": 1.3349, "step": 5521 }, { "epoch": 0.26164416015162284, "grad_norm": 0.76171875, "learning_rate": 0.00016815777046277455, "loss": 0.5688, "step": 5522 }, { "epoch": 0.26169154228855723, "grad_norm": 0.70703125, "learning_rate": 0.00016814687211489404, "loss": 1.051, "step": 5523 }, { "epoch": 0.26173892442549157, "grad_norm": 0.57421875, "learning_rate": 0.0001681359722555885, "loss": 0.9618, "step": 5524 }, { "epoch": 0.26178630656242596, "grad_norm": 0.408203125, "learning_rate": 0.0001681250708850997, "loss": 0.7285, "step": 5525 }, { "epoch": 0.26183368869936036, "grad_norm": 0.52734375, "learning_rate": 0.00016811416800366935, "loss": 1.0602, "step": 5526 }, { "epoch": 0.2618810708362947, "grad_norm": 0.2041015625, "learning_rate": 0.0001681032636115393, "loss": 0.0395, "step": 5527 }, { "epoch": 0.2619284529732291, "grad_norm": 0.296875, "learning_rate": 0.00016809235770895144, "loss": 0.0628, "step": 5528 }, { "epoch": 0.2619758351101635, "grad_norm": 0.7578125, "learning_rate": 0.00016808145029614758, "loss": 0.8732, "step": 5529 }, { "epoch": 0.2620232172470978, "grad_norm": 0.8359375, "learning_rate": 0.00016807054137336973, "loss": 1.06, "step": 5530 }, { "epoch": 0.2620705993840322, "grad_norm": 0.048583984375, "learning_rate": 0.0001680596309408597, "loss": 0.0016, "step": 5531 }, { "epoch": 0.2621179815209666, "grad_norm": 0.025634765625, "learning_rate": 0.0001680487189988596, "loss": 0.0016, "step": 5532 }, { "epoch": 0.262165363657901, "grad_norm": 0.78125, "learning_rate": 0.00016803780554761137, "loss": 1.2372, "step": 5533 }, { "epoch": 0.26221274579483533, "grad_norm": 0.6328125, "learning_rate": 0.0001680268905873571, "loss": 0.8734, "step": 5534 }, { "epoch": 0.2622601279317697, "grad_norm": 0.0810546875, "learning_rate": 0.00016801597411833885, "loss": 0.0055, "step": 5535 }, { "epoch": 0.2623075100687041, "grad_norm": 0.7109375, "learning_rate": 0.00016800505614079876, "loss": 1.2974, "step": 5536 }, { "epoch": 0.26235489220563846, "grad_norm": 0.59375, "learning_rate": 0.00016799413665497892, "loss": 1.3831, "step": 5537 }, { "epoch": 0.26240227434257285, "grad_norm": 0.59765625, "learning_rate": 0.00016798321566112158, "loss": 1.1895, "step": 5538 }, { "epoch": 0.26244965647950724, "grad_norm": 0.65234375, "learning_rate": 0.0001679722931594689, "loss": 1.0775, "step": 5539 }, { "epoch": 0.2624970386164416, "grad_norm": 0.640625, "learning_rate": 0.0001679613691502632, "loss": 0.8848, "step": 5540 }, { "epoch": 0.262544420753376, "grad_norm": 0.6484375, "learning_rate": 0.00016795044363374673, "loss": 0.5862, "step": 5541 }, { "epoch": 0.26259180289031037, "grad_norm": 0.59375, "learning_rate": 0.00016793951661016175, "loss": 0.9577, "step": 5542 }, { "epoch": 0.2626391850272447, "grad_norm": 0.765625, "learning_rate": 0.00016792858807975066, "loss": 1.3822, "step": 5543 }, { "epoch": 0.2626865671641791, "grad_norm": 0.640625, "learning_rate": 0.0001679176580427559, "loss": 0.962, "step": 5544 }, { "epoch": 0.2627339493011135, "grad_norm": 0.69140625, "learning_rate": 0.00016790672649941976, "loss": 1.5334, "step": 5545 }, { "epoch": 0.2627813314380479, "grad_norm": 0.6640625, "learning_rate": 0.0001678957934499848, "loss": 0.9279, "step": 5546 }, { "epoch": 0.2628287135749822, "grad_norm": 0.466796875, "learning_rate": 0.00016788485889469344, "loss": 0.4792, "step": 5547 }, { "epoch": 0.2628760957119166, "grad_norm": 0.7421875, "learning_rate": 0.00016787392283378822, "loss": 1.0632, "step": 5548 }, { "epoch": 0.262923477848851, "grad_norm": 0.1533203125, "learning_rate": 0.00016786298526751166, "loss": 0.0135, "step": 5549 }, { "epoch": 0.26297085998578534, "grad_norm": 0.64453125, "learning_rate": 0.00016785204619610642, "loss": 0.8517, "step": 5550 }, { "epoch": 0.26301824212271974, "grad_norm": 0.400390625, "learning_rate": 0.00016784110561981507, "loss": 0.0653, "step": 5551 }, { "epoch": 0.26306562425965413, "grad_norm": 0.63671875, "learning_rate": 0.00016783016353888024, "loss": 1.2957, "step": 5552 }, { "epoch": 0.26311300639658847, "grad_norm": 0.57421875, "learning_rate": 0.00016781921995354462, "loss": 0.7068, "step": 5553 }, { "epoch": 0.26316038853352286, "grad_norm": 0.64453125, "learning_rate": 0.00016780827486405096, "loss": 1.0027, "step": 5554 }, { "epoch": 0.26320777067045725, "grad_norm": 0.65625, "learning_rate": 0.00016779732827064197, "loss": 0.104, "step": 5555 }, { "epoch": 0.2632551528073916, "grad_norm": 0.84765625, "learning_rate": 0.00016778638017356045, "loss": 0.1015, "step": 5556 }, { "epoch": 0.263302534944326, "grad_norm": 0.78125, "learning_rate": 0.00016777543057304922, "loss": 0.1114, "step": 5557 }, { "epoch": 0.2633499170812604, "grad_norm": 0.58984375, "learning_rate": 0.00016776447946935115, "loss": 0.0839, "step": 5558 }, { "epoch": 0.2633972992181947, "grad_norm": 0.765625, "learning_rate": 0.00016775352686270912, "loss": 0.3187, "step": 5559 }, { "epoch": 0.2634446813551291, "grad_norm": 0.4765625, "learning_rate": 0.000167742572753366, "loss": 0.5824, "step": 5560 }, { "epoch": 0.2634920634920635, "grad_norm": 0.306640625, "learning_rate": 0.00016773161714156478, "loss": 0.1804, "step": 5561 }, { "epoch": 0.2635394456289979, "grad_norm": 0.3984375, "learning_rate": 0.00016772066002754846, "loss": 0.0095, "step": 5562 }, { "epoch": 0.26358682776593223, "grad_norm": 0.76171875, "learning_rate": 0.00016770970141155998, "loss": 1.1152, "step": 5563 }, { "epoch": 0.2636342099028666, "grad_norm": 0.61328125, "learning_rate": 0.00016769874129384248, "loss": 1.1267, "step": 5564 }, { "epoch": 0.263681592039801, "grad_norm": 0.65234375, "learning_rate": 0.000167687779674639, "loss": 1.0091, "step": 5565 }, { "epoch": 0.26372897417673535, "grad_norm": 0.7109375, "learning_rate": 0.00016767681655419268, "loss": 0.7512, "step": 5566 }, { "epoch": 0.26377635631366975, "grad_norm": 0.1064453125, "learning_rate": 0.00016766585193274665, "loss": 0.0189, "step": 5567 }, { "epoch": 0.26382373845060414, "grad_norm": 0.484375, "learning_rate": 0.0001676548858105441, "loss": 0.4407, "step": 5568 }, { "epoch": 0.2638711205875385, "grad_norm": 0.703125, "learning_rate": 0.00016764391818782824, "loss": 0.8798, "step": 5569 }, { "epoch": 0.26391850272447287, "grad_norm": 0.70703125, "learning_rate": 0.00016763294906484234, "loss": 1.0353, "step": 5570 }, { "epoch": 0.26396588486140726, "grad_norm": 0.6015625, "learning_rate": 0.00016762197844182963, "loss": 1.4533, "step": 5571 }, { "epoch": 0.2640132669983416, "grad_norm": 0.6328125, "learning_rate": 0.00016761100631903348, "loss": 0.1956, "step": 5572 }, { "epoch": 0.264060649135276, "grad_norm": 0.326171875, "learning_rate": 0.00016760003269669725, "loss": 0.0535, "step": 5573 }, { "epoch": 0.2641080312722104, "grad_norm": 0.7109375, "learning_rate": 0.00016758905757506426, "loss": 1.2123, "step": 5574 }, { "epoch": 0.2641554134091448, "grad_norm": 0.408203125, "learning_rate": 0.000167578080954378, "loss": 0.3087, "step": 5575 }, { "epoch": 0.2642027955460791, "grad_norm": 0.470703125, "learning_rate": 0.0001675671028348819, "loss": 1.1576, "step": 5576 }, { "epoch": 0.2642501776830135, "grad_norm": 0.076171875, "learning_rate": 0.00016755612321681936, "loss": 0.002, "step": 5577 }, { "epoch": 0.2642975598199479, "grad_norm": 0.57421875, "learning_rate": 0.00016754514210043402, "loss": 0.1876, "step": 5578 }, { "epoch": 0.26434494195688224, "grad_norm": 0.76953125, "learning_rate": 0.00016753415948596935, "loss": 1.0717, "step": 5579 }, { "epoch": 0.26439232409381663, "grad_norm": 0.7421875, "learning_rate": 0.00016752317537366897, "loss": 1.1072, "step": 5580 }, { "epoch": 0.264439706230751, "grad_norm": 0.1376953125, "learning_rate": 0.0001675121897637765, "loss": 0.0101, "step": 5581 }, { "epoch": 0.26448708836768536, "grad_norm": 0.671875, "learning_rate": 0.00016750120265653554, "loss": 1.4156, "step": 5582 }, { "epoch": 0.26453447050461976, "grad_norm": 0.13671875, "learning_rate": 0.0001674902140521898, "loss": 0.0341, "step": 5583 }, { "epoch": 0.26458185264155415, "grad_norm": 0.0108642578125, "learning_rate": 0.00016747922395098303, "loss": 0.0006, "step": 5584 }, { "epoch": 0.2646292347784885, "grad_norm": 0.78125, "learning_rate": 0.00016746823235315895, "loss": 1.0785, "step": 5585 }, { "epoch": 0.2646766169154229, "grad_norm": 0.17578125, "learning_rate": 0.0001674572392589613, "loss": 0.1082, "step": 5586 }, { "epoch": 0.2647239990523573, "grad_norm": 0.62890625, "learning_rate": 0.00016744624466863395, "loss": 0.9585, "step": 5587 }, { "epoch": 0.2647713811892916, "grad_norm": 0.79296875, "learning_rate": 0.00016743524858242075, "loss": 0.853, "step": 5588 }, { "epoch": 0.264818763326226, "grad_norm": 0.1083984375, "learning_rate": 0.00016742425100056555, "loss": 0.0078, "step": 5589 }, { "epoch": 0.2648661454631604, "grad_norm": 1.0625, "learning_rate": 0.0001674132519233123, "loss": 1.1826, "step": 5590 }, { "epoch": 0.2649135276000948, "grad_norm": 0.2314453125, "learning_rate": 0.00016740225135090491, "loss": 0.1609, "step": 5591 }, { "epoch": 0.2649609097370291, "grad_norm": 0.63671875, "learning_rate": 0.0001673912492835874, "loss": 0.9908, "step": 5592 }, { "epoch": 0.2650082918739635, "grad_norm": 0.984375, "learning_rate": 0.00016738024572160375, "loss": 0.7295, "step": 5593 }, { "epoch": 0.2650556740108979, "grad_norm": 0.65234375, "learning_rate": 0.000167369240665198, "loss": 0.8419, "step": 5594 }, { "epoch": 0.26510305614783225, "grad_norm": 0.62890625, "learning_rate": 0.0001673582341146143, "loss": 0.5265, "step": 5595 }, { "epoch": 0.26515043828476664, "grad_norm": 0.55078125, "learning_rate": 0.00016734722607009665, "loss": 0.8965, "step": 5596 }, { "epoch": 0.26519782042170104, "grad_norm": 0.46484375, "learning_rate": 0.00016733621653188931, "loss": 0.2007, "step": 5597 }, { "epoch": 0.2652452025586354, "grad_norm": 0.66015625, "learning_rate": 0.00016732520550023642, "loss": 0.685, "step": 5598 }, { "epoch": 0.26529258469556977, "grad_norm": 0.5078125, "learning_rate": 0.00016731419297538212, "loss": 0.5059, "step": 5599 }, { "epoch": 0.26533996683250416, "grad_norm": 0.88671875, "learning_rate": 0.0001673031789575708, "loss": 1.1605, "step": 5600 }, { "epoch": 0.2653873489694385, "grad_norm": 0.54296875, "learning_rate": 0.00016729216344704662, "loss": 0.6541, "step": 5601 }, { "epoch": 0.2654347311063729, "grad_norm": 0.5625, "learning_rate": 0.00016728114644405388, "loss": 1.5109, "step": 5602 }, { "epoch": 0.2654821132433073, "grad_norm": 0.5625, "learning_rate": 0.00016727012794883703, "loss": 0.6519, "step": 5603 }, { "epoch": 0.2655294953802417, "grad_norm": 0.55078125, "learning_rate": 0.0001672591079616404, "loss": 1.0167, "step": 5604 }, { "epoch": 0.265576877517176, "grad_norm": 0.72265625, "learning_rate": 0.0001672480864827084, "loss": 0.6678, "step": 5605 }, { "epoch": 0.2656242596541104, "grad_norm": 0.47265625, "learning_rate": 0.00016723706351228545, "loss": 0.6186, "step": 5606 }, { "epoch": 0.2656716417910448, "grad_norm": 0.703125, "learning_rate": 0.00016722603905061604, "loss": 0.7193, "step": 5607 }, { "epoch": 0.26571902392797914, "grad_norm": 0.7421875, "learning_rate": 0.00016721501309794473, "loss": 0.7566, "step": 5608 }, { "epoch": 0.26576640606491353, "grad_norm": 0.478515625, "learning_rate": 0.000167203985654516, "loss": 0.7573, "step": 5609 }, { "epoch": 0.2658137882018479, "grad_norm": 0.45703125, "learning_rate": 0.00016719295672057443, "loss": 0.5047, "step": 5610 }, { "epoch": 0.26586117033878226, "grad_norm": 0.83203125, "learning_rate": 0.00016718192629636467, "loss": 0.5751, "step": 5611 }, { "epoch": 0.26590855247571665, "grad_norm": 0.79296875, "learning_rate": 0.00016717089438213133, "loss": 0.9762, "step": 5612 }, { "epoch": 0.26595593461265105, "grad_norm": 0.55859375, "learning_rate": 0.00016715986097811912, "loss": 1.1714, "step": 5613 }, { "epoch": 0.2660033167495854, "grad_norm": 0.87109375, "learning_rate": 0.00016714882608457273, "loss": 1.4798, "step": 5614 }, { "epoch": 0.2660506988865198, "grad_norm": 0.8984375, "learning_rate": 0.0001671377897017369, "loss": 1.0433, "step": 5615 }, { "epoch": 0.26609808102345417, "grad_norm": 0.703125, "learning_rate": 0.0001671267518298564, "loss": 0.8132, "step": 5616 }, { "epoch": 0.2661454631603885, "grad_norm": 0.7734375, "learning_rate": 0.00016711571246917607, "loss": 1.2701, "step": 5617 }, { "epoch": 0.2661928452973229, "grad_norm": 0.52734375, "learning_rate": 0.0001671046716199407, "loss": 0.7974, "step": 5618 }, { "epoch": 0.2662402274342573, "grad_norm": 0.95703125, "learning_rate": 0.00016709362928239515, "loss": 1.0338, "step": 5619 }, { "epoch": 0.2662876095711917, "grad_norm": 0.7421875, "learning_rate": 0.00016708258545678444, "loss": 0.6452, "step": 5620 }, { "epoch": 0.266334991708126, "grad_norm": 0.1865234375, "learning_rate": 0.0001670715401433534, "loss": 0.0214, "step": 5621 }, { "epoch": 0.2663823738450604, "grad_norm": 0.77734375, "learning_rate": 0.00016706049334234705, "loss": 1.0348, "step": 5622 }, { "epoch": 0.2664297559819948, "grad_norm": 1.1484375, "learning_rate": 0.0001670494450540104, "loss": 0.6407, "step": 5623 }, { "epoch": 0.26647713811892915, "grad_norm": 0.68359375, "learning_rate": 0.00016703839527858848, "loss": 1.1821, "step": 5624 }, { "epoch": 0.26652452025586354, "grad_norm": 1.2890625, "learning_rate": 0.00016702734401632632, "loss": 0.3451, "step": 5625 }, { "epoch": 0.26657190239279793, "grad_norm": 0.2578125, "learning_rate": 0.00016701629126746908, "loss": 0.1856, "step": 5626 }, { "epoch": 0.26661928452973227, "grad_norm": 0.8515625, "learning_rate": 0.0001670052370322619, "loss": 0.0511, "step": 5627 }, { "epoch": 0.26666666666666666, "grad_norm": 0.65625, "learning_rate": 0.0001669941813109499, "loss": 1.1102, "step": 5628 }, { "epoch": 0.26671404880360106, "grad_norm": 0.2255859375, "learning_rate": 0.00016698312410377833, "loss": 0.1564, "step": 5629 }, { "epoch": 0.2667614309405354, "grad_norm": 0.302734375, "learning_rate": 0.00016697206541099245, "loss": 0.1795, "step": 5630 }, { "epoch": 0.2668088130774698, "grad_norm": 0.84765625, "learning_rate": 0.00016696100523283744, "loss": 1.0263, "step": 5631 }, { "epoch": 0.2668561952144042, "grad_norm": 0.765625, "learning_rate": 0.0001669499435695587, "loss": 0.5158, "step": 5632 }, { "epoch": 0.2669035773513386, "grad_norm": 0.51171875, "learning_rate": 0.00016693888042140152, "loss": 1.1575, "step": 5633 }, { "epoch": 0.2669509594882729, "grad_norm": 0.83203125, "learning_rate": 0.00016692781578861126, "loss": 1.0503, "step": 5634 }, { "epoch": 0.2669983416252073, "grad_norm": 0.5078125, "learning_rate": 0.0001669167496714333, "loss": 0.9357, "step": 5635 }, { "epoch": 0.2670457237621417, "grad_norm": 0.6640625, "learning_rate": 0.00016690568207011313, "loss": 0.9347, "step": 5636 }, { "epoch": 0.26709310589907603, "grad_norm": 0.765625, "learning_rate": 0.00016689461298489625, "loss": 1.0529, "step": 5637 }, { "epoch": 0.2671404880360104, "grad_norm": 0.64453125, "learning_rate": 0.00016688354241602805, "loss": 0.8088, "step": 5638 }, { "epoch": 0.2671878701729448, "grad_norm": 0.71484375, "learning_rate": 0.00016687247036375414, "loss": 1.321, "step": 5639 }, { "epoch": 0.26723525230987916, "grad_norm": 0.81640625, "learning_rate": 0.00016686139682832004, "loss": 0.8933, "step": 5640 }, { "epoch": 0.26728263444681355, "grad_norm": 0.546875, "learning_rate": 0.00016685032180997143, "loss": 0.8912, "step": 5641 }, { "epoch": 0.26733001658374794, "grad_norm": 0.1435546875, "learning_rate": 0.00016683924530895385, "loss": 0.0123, "step": 5642 }, { "epoch": 0.2673773987206823, "grad_norm": 0.7734375, "learning_rate": 0.000166828167325513, "loss": 1.3312, "step": 5643 }, { "epoch": 0.2674247808576167, "grad_norm": 0.166015625, "learning_rate": 0.0001668170878598946, "loss": 0.0041, "step": 5644 }, { "epoch": 0.26747216299455107, "grad_norm": 0.69921875, "learning_rate": 0.00016680600691234437, "loss": 0.8034, "step": 5645 }, { "epoch": 0.2675195451314854, "grad_norm": 0.875, "learning_rate": 0.00016679492448310804, "loss": 0.4828, "step": 5646 }, { "epoch": 0.2675669272684198, "grad_norm": 0.27734375, "learning_rate": 0.00016678384057243145, "loss": 0.0189, "step": 5647 }, { "epoch": 0.2676143094053542, "grad_norm": 0.69140625, "learning_rate": 0.00016677275518056037, "loss": 1.4275, "step": 5648 }, { "epoch": 0.2676616915422886, "grad_norm": 0.57421875, "learning_rate": 0.00016676166830774074, "loss": 0.9209, "step": 5649 }, { "epoch": 0.2677090736792229, "grad_norm": 0.59375, "learning_rate": 0.00016675057995421843, "loss": 0.7652, "step": 5650 }, { "epoch": 0.2677564558161573, "grad_norm": 0.51953125, "learning_rate": 0.00016673949012023932, "loss": 0.5517, "step": 5651 }, { "epoch": 0.2678038379530917, "grad_norm": 0.71875, "learning_rate": 0.00016672839880604943, "loss": 0.9392, "step": 5652 }, { "epoch": 0.26785122009002604, "grad_norm": 0.75390625, "learning_rate": 0.00016671730601189473, "loss": 1.086, "step": 5653 }, { "epoch": 0.26789860222696044, "grad_norm": 0.64453125, "learning_rate": 0.00016670621173802124, "loss": 1.0273, "step": 5654 }, { "epoch": 0.26794598436389483, "grad_norm": 0.11767578125, "learning_rate": 0.00016669511598467503, "loss": 0.0101, "step": 5655 }, { "epoch": 0.26799336650082917, "grad_norm": 0.69140625, "learning_rate": 0.0001666840187521022, "loss": 1.0378, "step": 5656 }, { "epoch": 0.26804074863776356, "grad_norm": 0.73828125, "learning_rate": 0.00016667292004054885, "loss": 0.6257, "step": 5657 }, { "epoch": 0.26808813077469795, "grad_norm": 0.60546875, "learning_rate": 0.00016666181985026113, "loss": 0.6897, "step": 5658 }, { "epoch": 0.2681355129116323, "grad_norm": 0.6484375, "learning_rate": 0.00016665071818148527, "loss": 0.8365, "step": 5659 }, { "epoch": 0.2681828950485667, "grad_norm": 0.68359375, "learning_rate": 0.00016663961503446748, "loss": 0.2519, "step": 5660 }, { "epoch": 0.2682302771855011, "grad_norm": 1.3828125, "learning_rate": 0.000166628510409454, "loss": 0.4015, "step": 5661 }, { "epoch": 0.26827765932243547, "grad_norm": 0.72265625, "learning_rate": 0.00016661740430669116, "loss": 0.8856, "step": 5662 }, { "epoch": 0.2683250414593698, "grad_norm": 0.54296875, "learning_rate": 0.0001666062967264252, "loss": 1.2056, "step": 5663 }, { "epoch": 0.2683724235963042, "grad_norm": 0.734375, "learning_rate": 0.00016659518766890257, "loss": 1.3711, "step": 5664 }, { "epoch": 0.2684198057332386, "grad_norm": 0.671875, "learning_rate": 0.00016658407713436956, "loss": 0.8598, "step": 5665 }, { "epoch": 0.26846718787017293, "grad_norm": 0.83203125, "learning_rate": 0.00016657296512307266, "loss": 1.4123, "step": 5666 }, { "epoch": 0.2685145700071073, "grad_norm": 0.63671875, "learning_rate": 0.00016656185163525832, "loss": 0.8251, "step": 5667 }, { "epoch": 0.2685619521440417, "grad_norm": 0.91015625, "learning_rate": 0.000166550736671173, "loss": 0.1422, "step": 5668 }, { "epoch": 0.26860933428097605, "grad_norm": 0.67578125, "learning_rate": 0.00016653962023106323, "loss": 1.0764, "step": 5669 }, { "epoch": 0.26865671641791045, "grad_norm": 1.0, "learning_rate": 0.00016652850231517558, "loss": 1.195, "step": 5670 }, { "epoch": 0.26870409855484484, "grad_norm": 0.6328125, "learning_rate": 0.0001665173829237566, "loss": 0.7752, "step": 5671 }, { "epoch": 0.2687514806917792, "grad_norm": 0.5625, "learning_rate": 0.0001665062620570529, "loss": 0.8655, "step": 5672 }, { "epoch": 0.26879886282871357, "grad_norm": 0.81640625, "learning_rate": 0.00016649513971531114, "loss": 1.2131, "step": 5673 }, { "epoch": 0.26884624496564796, "grad_norm": 0.80859375, "learning_rate": 0.00016648401589877804, "loss": 1.2834, "step": 5674 }, { "epoch": 0.2688936271025823, "grad_norm": 0.2578125, "learning_rate": 0.00016647289060770027, "loss": 0.1421, "step": 5675 }, { "epoch": 0.2689410092395167, "grad_norm": 0.66015625, "learning_rate": 0.00016646176384232456, "loss": 1.0725, "step": 5676 }, { "epoch": 0.2689883913764511, "grad_norm": 0.671875, "learning_rate": 0.00016645063560289777, "loss": 1.1293, "step": 5677 }, { "epoch": 0.2690357735133855, "grad_norm": 0.75, "learning_rate": 0.00016643950588966663, "loss": 1.3876, "step": 5678 }, { "epoch": 0.2690831556503198, "grad_norm": 0.69921875, "learning_rate": 0.00016642837470287803, "loss": 0.9904, "step": 5679 }, { "epoch": 0.2691305377872542, "grad_norm": 0.6328125, "learning_rate": 0.0001664172420427788, "loss": 0.8577, "step": 5680 }, { "epoch": 0.2691779199241886, "grad_norm": 1.375, "learning_rate": 0.00016640610790961591, "loss": 0.2961, "step": 5681 }, { "epoch": 0.26922530206112294, "grad_norm": 0.7109375, "learning_rate": 0.0001663949723036363, "loss": 1.6089, "step": 5682 }, { "epoch": 0.26927268419805733, "grad_norm": 0.62890625, "learning_rate": 0.0001663838352250869, "loss": 0.948, "step": 5683 }, { "epoch": 0.2693200663349917, "grad_norm": 0.6171875, "learning_rate": 0.0001663726966742148, "loss": 0.9098, "step": 5684 }, { "epoch": 0.26936744847192606, "grad_norm": 0.5625, "learning_rate": 0.00016636155665126693, "loss": 0.6937, "step": 5685 }, { "epoch": 0.26941483060886046, "grad_norm": 0.0673828125, "learning_rate": 0.00016635041515649043, "loss": 0.0027, "step": 5686 }, { "epoch": 0.26946221274579485, "grad_norm": 0.68359375, "learning_rate": 0.00016633927219013238, "loss": 0.2111, "step": 5687 }, { "epoch": 0.2695095948827292, "grad_norm": 1.09375, "learning_rate": 0.00016632812775243996, "loss": 0.6147, "step": 5688 }, { "epoch": 0.2695569770196636, "grad_norm": 0.7578125, "learning_rate": 0.0001663169818436603, "loss": 0.7323, "step": 5689 }, { "epoch": 0.269604359156598, "grad_norm": 0.55078125, "learning_rate": 0.00016630583446404062, "loss": 0.4665, "step": 5690 }, { "epoch": 0.26965174129353237, "grad_norm": 0.91796875, "learning_rate": 0.00016629468561382815, "loss": 0.795, "step": 5691 }, { "epoch": 0.2696991234304667, "grad_norm": 0.54296875, "learning_rate": 0.00016628353529327022, "loss": 0.5587, "step": 5692 }, { "epoch": 0.2697465055674011, "grad_norm": 0.7109375, "learning_rate": 0.00016627238350261402, "loss": 1.3128, "step": 5693 }, { "epoch": 0.2697938877043355, "grad_norm": 0.6953125, "learning_rate": 0.00016626123024210697, "loss": 1.0178, "step": 5694 }, { "epoch": 0.2698412698412698, "grad_norm": 0.58203125, "learning_rate": 0.0001662500755119964, "loss": 0.2749, "step": 5695 }, { "epoch": 0.2698886519782042, "grad_norm": 0.28515625, "learning_rate": 0.00016623891931252972, "loss": 0.1534, "step": 5696 }, { "epoch": 0.2699360341151386, "grad_norm": 0.9765625, "learning_rate": 0.00016622776164395436, "loss": 1.0676, "step": 5697 }, { "epoch": 0.26998341625207295, "grad_norm": 0.1337890625, "learning_rate": 0.0001662166025065178, "loss": 0.0204, "step": 5698 }, { "epoch": 0.27003079838900734, "grad_norm": 0.59375, "learning_rate": 0.00016620544190046752, "loss": 1.3422, "step": 5699 }, { "epoch": 0.27007818052594174, "grad_norm": 0.83203125, "learning_rate": 0.0001661942798260511, "loss": 0.9876, "step": 5700 }, { "epoch": 0.2701255626628761, "grad_norm": 0.1904296875, "learning_rate": 0.000166183116283516, "loss": 0.018, "step": 5701 }, { "epoch": 0.27017294479981047, "grad_norm": 0.7109375, "learning_rate": 0.0001661719512731099, "loss": 1.3744, "step": 5702 }, { "epoch": 0.27022032693674486, "grad_norm": 0.671875, "learning_rate": 0.0001661607847950804, "loss": 0.0785, "step": 5703 }, { "epoch": 0.2702677090736792, "grad_norm": 0.0869140625, "learning_rate": 0.0001661496168496752, "loss": 0.0065, "step": 5704 }, { "epoch": 0.2703150912106136, "grad_norm": 0.69140625, "learning_rate": 0.0001661384474371419, "loss": 1.119, "step": 5705 }, { "epoch": 0.270362473347548, "grad_norm": 0.78125, "learning_rate": 0.00016612727655772831, "loss": 1.0204, "step": 5706 }, { "epoch": 0.2704098554844824, "grad_norm": 0.640625, "learning_rate": 0.00016611610421168215, "loss": 0.8329, "step": 5707 }, { "epoch": 0.2704572376214167, "grad_norm": 0.59375, "learning_rate": 0.00016610493039925125, "loss": 0.9431, "step": 5708 }, { "epoch": 0.2705046197583511, "grad_norm": 0.0849609375, "learning_rate": 0.00016609375512068337, "loss": 0.0081, "step": 5709 }, { "epoch": 0.2705520018952855, "grad_norm": 1.078125, "learning_rate": 0.00016608257837622646, "loss": 0.2527, "step": 5710 }, { "epoch": 0.27059938403221984, "grad_norm": 0.59375, "learning_rate": 0.00016607140016612826, "loss": 1.0873, "step": 5711 }, { "epoch": 0.27064676616915423, "grad_norm": 0.7265625, "learning_rate": 0.00016606022049063686, "loss": 1.42, "step": 5712 }, { "epoch": 0.2706941483060886, "grad_norm": 0.5703125, "learning_rate": 0.0001660490393500001, "loss": 0.0884, "step": 5713 }, { "epoch": 0.27074153044302296, "grad_norm": 0.6015625, "learning_rate": 0.000166037856744466, "loss": 1.1958, "step": 5714 }, { "epoch": 0.27078891257995735, "grad_norm": 0.2001953125, "learning_rate": 0.00016602667267428257, "loss": 0.1684, "step": 5715 }, { "epoch": 0.27083629471689175, "grad_norm": 0.578125, "learning_rate": 0.0001660154871396979, "loss": 0.5702, "step": 5716 }, { "epoch": 0.2708836768538261, "grad_norm": 0.7109375, "learning_rate": 0.00016600430014096, "loss": 0.872, "step": 5717 }, { "epoch": 0.2709310589907605, "grad_norm": 0.58984375, "learning_rate": 0.00016599311167831706, "loss": 1.018, "step": 5718 }, { "epoch": 0.27097844112769487, "grad_norm": 0.484375, "learning_rate": 0.00016598192175201718, "loss": 0.3869, "step": 5719 }, { "epoch": 0.27102582326462926, "grad_norm": 0.6484375, "learning_rate": 0.00016597073036230854, "loss": 0.7992, "step": 5720 }, { "epoch": 0.2710732054015636, "grad_norm": 0.09033203125, "learning_rate": 0.0001659595375094394, "loss": 0.0172, "step": 5721 }, { "epoch": 0.271120587538498, "grad_norm": 0.466796875, "learning_rate": 0.00016594834319365797, "loss": 0.5832, "step": 5722 }, { "epoch": 0.2711679696754324, "grad_norm": 0.57421875, "learning_rate": 0.00016593714741521253, "loss": 0.6701, "step": 5723 }, { "epoch": 0.2712153518123667, "grad_norm": 0.6015625, "learning_rate": 0.0001659259501743514, "loss": 0.8962, "step": 5724 }, { "epoch": 0.2712627339493011, "grad_norm": 0.59765625, "learning_rate": 0.0001659147514713229, "loss": 0.8608, "step": 5725 }, { "epoch": 0.2713101160862355, "grad_norm": 0.7265625, "learning_rate": 0.00016590355130637546, "loss": 0.9534, "step": 5726 }, { "epoch": 0.27135749822316985, "grad_norm": 0.0220947265625, "learning_rate": 0.0001658923496797574, "loss": 0.001, "step": 5727 }, { "epoch": 0.27140488036010424, "grad_norm": 0.22265625, "learning_rate": 0.00016588114659171722, "loss": 0.0697, "step": 5728 }, { "epoch": 0.27145226249703863, "grad_norm": 0.5703125, "learning_rate": 0.00016586994204250338, "loss": 1.0821, "step": 5729 }, { "epoch": 0.27149964463397297, "grad_norm": 0.62890625, "learning_rate": 0.0001658587360323644, "loss": 0.9673, "step": 5730 }, { "epoch": 0.27154702677090736, "grad_norm": 0.890625, "learning_rate": 0.0001658475285615488, "loss": 0.4256, "step": 5731 }, { "epoch": 0.27159440890784176, "grad_norm": 0.53515625, "learning_rate": 0.00016583631963030518, "loss": 0.9804, "step": 5732 }, { "epoch": 0.2716417910447761, "grad_norm": 0.65234375, "learning_rate": 0.0001658251092388821, "loss": 0.9756, "step": 5733 }, { "epoch": 0.2716891731817105, "grad_norm": 1.1015625, "learning_rate": 0.0001658138973875282, "loss": 1.2051, "step": 5734 }, { "epoch": 0.2717365553186449, "grad_norm": 0.59765625, "learning_rate": 0.00016580268407649214, "loss": 0.1696, "step": 5735 }, { "epoch": 0.27178393745557927, "grad_norm": 0.60546875, "learning_rate": 0.00016579146930602266, "loss": 0.1459, "step": 5736 }, { "epoch": 0.2718313195925136, "grad_norm": 0.6328125, "learning_rate": 0.00016578025307636846, "loss": 0.9619, "step": 5737 }, { "epoch": 0.271878701729448, "grad_norm": 0.640625, "learning_rate": 0.00016576903538777834, "loss": 1.0552, "step": 5738 }, { "epoch": 0.2719260838663824, "grad_norm": 0.6328125, "learning_rate": 0.00016575781624050104, "loss": 0.7923, "step": 5739 }, { "epoch": 0.27197346600331673, "grad_norm": 0.416015625, "learning_rate": 0.00016574659563478543, "loss": 0.1956, "step": 5740 }, { "epoch": 0.2720208481402511, "grad_norm": 0.8046875, "learning_rate": 0.00016573537357088036, "loss": 0.6846, "step": 5741 }, { "epoch": 0.2720682302771855, "grad_norm": 0.70703125, "learning_rate": 0.0001657241500490347, "loss": 1.2539, "step": 5742 }, { "epoch": 0.27211561241411986, "grad_norm": 0.443359375, "learning_rate": 0.00016571292506949742, "loss": 0.1037, "step": 5743 }, { "epoch": 0.27216299455105425, "grad_norm": 0.5390625, "learning_rate": 0.00016570169863251746, "loss": 0.7723, "step": 5744 }, { "epoch": 0.27221037668798864, "grad_norm": 0.8046875, "learning_rate": 0.00016569047073834378, "loss": 1.033, "step": 5745 }, { "epoch": 0.272257758824923, "grad_norm": 0.54296875, "learning_rate": 0.00016567924138722546, "loss": 1.2771, "step": 5746 }, { "epoch": 0.2723051409618574, "grad_norm": 0.390625, "learning_rate": 0.0001656680105794115, "loss": 0.0527, "step": 5747 }, { "epoch": 0.27235252309879177, "grad_norm": 0.71875, "learning_rate": 0.00016565677831515104, "loss": 1.0547, "step": 5748 }, { "epoch": 0.27239990523572616, "grad_norm": 0.7734375, "learning_rate": 0.00016564554459469317, "loss": 0.8906, "step": 5749 }, { "epoch": 0.2724472873726605, "grad_norm": 0.67578125, "learning_rate": 0.000165634309418287, "loss": 1.0512, "step": 5750 }, { "epoch": 0.2724946695095949, "grad_norm": 0.72265625, "learning_rate": 0.00016562307278618178, "loss": 1.1705, "step": 5751 }, { "epoch": 0.2725420516465293, "grad_norm": 0.7734375, "learning_rate": 0.00016561183469862673, "loss": 1.1822, "step": 5752 }, { "epoch": 0.2725894337834636, "grad_norm": 0.61328125, "learning_rate": 0.00016560059515587105, "loss": 0.7487, "step": 5753 }, { "epoch": 0.272636815920398, "grad_norm": 0.51171875, "learning_rate": 0.00016558935415816403, "loss": 0.6775, "step": 5754 }, { "epoch": 0.2726841980573324, "grad_norm": 0.84375, "learning_rate": 0.00016557811170575504, "loss": 0.9706, "step": 5755 }, { "epoch": 0.27273158019426674, "grad_norm": 0.703125, "learning_rate": 0.00016556686779889337, "loss": 0.9756, "step": 5756 }, { "epoch": 0.27277896233120114, "grad_norm": 0.056396484375, "learning_rate": 0.0001655556224378284, "loss": 0.0017, "step": 5757 }, { "epoch": 0.27282634446813553, "grad_norm": 0.68359375, "learning_rate": 0.00016554437562280952, "loss": 1.2672, "step": 5758 }, { "epoch": 0.27287372660506987, "grad_norm": 0.82421875, "learning_rate": 0.00016553312735408625, "loss": 0.1707, "step": 5759 }, { "epoch": 0.27292110874200426, "grad_norm": 0.0133056640625, "learning_rate": 0.00016552187763190797, "loss": 0.0005, "step": 5760 }, { "epoch": 0.27296849087893865, "grad_norm": 0.96875, "learning_rate": 0.0001655106264565243, "loss": 1.0667, "step": 5761 }, { "epoch": 0.273015873015873, "grad_norm": 0.01031494140625, "learning_rate": 0.00016549937382818466, "loss": 0.0005, "step": 5762 }, { "epoch": 0.2730632551528074, "grad_norm": 0.76171875, "learning_rate": 0.0001654881197471387, "loss": 0.9846, "step": 5763 }, { "epoch": 0.2731106372897418, "grad_norm": 0.8125, "learning_rate": 0.00016547686421363602, "loss": 1.5557, "step": 5764 }, { "epoch": 0.27315801942667617, "grad_norm": 1.2421875, "learning_rate": 0.0001654656072279262, "loss": 1.2228, "step": 5765 }, { "epoch": 0.2732054015636105, "grad_norm": 0.62109375, "learning_rate": 0.00016545434879025893, "loss": 0.0552, "step": 5766 }, { "epoch": 0.2732527837005449, "grad_norm": 0.283203125, "learning_rate": 0.00016544308890088395, "loss": 0.1073, "step": 5767 }, { "epoch": 0.2733001658374793, "grad_norm": 1.015625, "learning_rate": 0.00016543182756005096, "loss": 1.0415, "step": 5768 }, { "epoch": 0.27334754797441363, "grad_norm": 0.259765625, "learning_rate": 0.00016542056476800973, "loss": 0.0164, "step": 5769 }, { "epoch": 0.273394930111348, "grad_norm": 0.74609375, "learning_rate": 0.00016540930052501006, "loss": 0.9602, "step": 5770 }, { "epoch": 0.2734423122482824, "grad_norm": 0.609375, "learning_rate": 0.00016539803483130177, "loss": 1.3939, "step": 5771 }, { "epoch": 0.27348969438521675, "grad_norm": 0.6328125, "learning_rate": 0.00016538676768713476, "loss": 1.0973, "step": 5772 }, { "epoch": 0.27353707652215115, "grad_norm": 0.46484375, "learning_rate": 0.00016537549909275881, "loss": 0.0422, "step": 5773 }, { "epoch": 0.27358445865908554, "grad_norm": 0.203125, "learning_rate": 0.00016536422904842398, "loss": 0.0074, "step": 5774 }, { "epoch": 0.2736318407960199, "grad_norm": 0.53515625, "learning_rate": 0.00016535295755438017, "loss": 0.8604, "step": 5775 }, { "epoch": 0.27367922293295427, "grad_norm": 0.53515625, "learning_rate": 0.00016534168461087738, "loss": 0.2881, "step": 5776 }, { "epoch": 0.27372660506988866, "grad_norm": 0.61328125, "learning_rate": 0.0001653304102181656, "loss": 0.9498, "step": 5777 }, { "epoch": 0.27377398720682306, "grad_norm": 0.734375, "learning_rate": 0.0001653191343764949, "loss": 1.4866, "step": 5778 }, { "epoch": 0.2738213693437574, "grad_norm": 0.96484375, "learning_rate": 0.0001653078570861154, "loss": 1.2301, "step": 5779 }, { "epoch": 0.2738687514806918, "grad_norm": 0.455078125, "learning_rate": 0.0001652965783472772, "loss": 0.0687, "step": 5780 }, { "epoch": 0.2739161336176262, "grad_norm": 0.57421875, "learning_rate": 0.0001652852981602304, "loss": 1.2015, "step": 5781 }, { "epoch": 0.2739635157545605, "grad_norm": 0.53515625, "learning_rate": 0.00016527401652522528, "loss": 0.6943, "step": 5782 }, { "epoch": 0.2740108978914949, "grad_norm": 0.373046875, "learning_rate": 0.00016526273344251197, "loss": 0.0311, "step": 5783 }, { "epoch": 0.2740582800284293, "grad_norm": 0.8359375, "learning_rate": 0.00016525144891234078, "loss": 1.2077, "step": 5784 }, { "epoch": 0.27410566216536364, "grad_norm": 0.0498046875, "learning_rate": 0.0001652401629349619, "loss": 0.0035, "step": 5785 }, { "epoch": 0.27415304430229803, "grad_norm": 0.7421875, "learning_rate": 0.0001652288755106257, "loss": 0.5082, "step": 5786 }, { "epoch": 0.2742004264392324, "grad_norm": 1.2421875, "learning_rate": 0.0001652175866395826, "loss": 0.9112, "step": 5787 }, { "epoch": 0.27424780857616676, "grad_norm": 0.62890625, "learning_rate": 0.0001652062963220828, "loss": 1.5539, "step": 5788 }, { "epoch": 0.27429519071310116, "grad_norm": 0.7109375, "learning_rate": 0.00016519500455837688, "loss": 1.0714, "step": 5789 }, { "epoch": 0.27434257285003555, "grad_norm": 0.5390625, "learning_rate": 0.00016518371134871513, "loss": 0.6494, "step": 5790 }, { "epoch": 0.2743899549869699, "grad_norm": 0.2353515625, "learning_rate": 0.00016517241669334812, "loss": 0.151, "step": 5791 }, { "epoch": 0.2744373371239043, "grad_norm": 0.50390625, "learning_rate": 0.00016516112059252635, "loss": 0.3159, "step": 5792 }, { "epoch": 0.27448471926083867, "grad_norm": 0.85546875, "learning_rate": 0.0001651498230465003, "loss": 1.2159, "step": 5793 }, { "epoch": 0.27453210139777307, "grad_norm": 0.458984375, "learning_rate": 0.00016513852405552058, "loss": 0.1881, "step": 5794 }, { "epoch": 0.2745794835347074, "grad_norm": 0.7109375, "learning_rate": 0.00016512722361983778, "loss": 0.9019, "step": 5795 }, { "epoch": 0.2746268656716418, "grad_norm": 0.7578125, "learning_rate": 0.00016511592173970257, "loss": 1.2556, "step": 5796 }, { "epoch": 0.2746742478085762, "grad_norm": 0.609375, "learning_rate": 0.0001651046184153655, "loss": 1.1222, "step": 5797 }, { "epoch": 0.2747216299455105, "grad_norm": 0.07421875, "learning_rate": 0.00016509331364707739, "loss": 0.0055, "step": 5798 }, { "epoch": 0.2747690120824449, "grad_norm": 0.53515625, "learning_rate": 0.0001650820074350889, "loss": 0.8872, "step": 5799 }, { "epoch": 0.2748163942193793, "grad_norm": 0.01165771484375, "learning_rate": 0.00016507069977965083, "loss": 0.0009, "step": 5800 }, { "epoch": 0.27486377635631365, "grad_norm": 0.2216796875, "learning_rate": 0.0001650593906810139, "loss": 0.1333, "step": 5801 }, { "epoch": 0.27491115849324804, "grad_norm": 0.09521484375, "learning_rate": 0.000165048080139429, "loss": 0.0043, "step": 5802 }, { "epoch": 0.27495854063018244, "grad_norm": 0.58984375, "learning_rate": 0.00016503676815514702, "loss": 1.1665, "step": 5803 }, { "epoch": 0.2750059227671168, "grad_norm": 0.83984375, "learning_rate": 0.00016502545472841875, "loss": 1.2289, "step": 5804 }, { "epoch": 0.27505330490405117, "grad_norm": 0.8125, "learning_rate": 0.00016501413985949514, "loss": 0.8153, "step": 5805 }, { "epoch": 0.27510068704098556, "grad_norm": 0.51171875, "learning_rate": 0.0001650028235486272, "loss": 0.3612, "step": 5806 }, { "epoch": 0.27514806917791995, "grad_norm": 0.322265625, "learning_rate": 0.00016499150579606586, "loss": 0.1876, "step": 5807 }, { "epoch": 0.2751954513148543, "grad_norm": 0.52734375, "learning_rate": 0.00016498018660206213, "loss": 0.4883, "step": 5808 }, { "epoch": 0.2752428334517887, "grad_norm": 0.048095703125, "learning_rate": 0.00016496886596686707, "loss": 0.0051, "step": 5809 }, { "epoch": 0.2752902155887231, "grad_norm": 0.03662109375, "learning_rate": 0.00016495754389073182, "loss": 0.0022, "step": 5810 }, { "epoch": 0.2753375977256574, "grad_norm": 0.7265625, "learning_rate": 0.0001649462203739074, "loss": 0.7888, "step": 5811 }, { "epoch": 0.2753849798625918, "grad_norm": 0.5625, "learning_rate": 0.00016493489541664498, "loss": 0.7594, "step": 5812 }, { "epoch": 0.2754323619995262, "grad_norm": 0.63671875, "learning_rate": 0.00016492356901919575, "loss": 0.8629, "step": 5813 }, { "epoch": 0.27547974413646054, "grad_norm": 0.9609375, "learning_rate": 0.0001649122411818109, "loss": 1.3489, "step": 5814 }, { "epoch": 0.27552712627339493, "grad_norm": 0.55078125, "learning_rate": 0.00016490091190474168, "loss": 0.9438, "step": 5815 }, { "epoch": 0.2755745084103293, "grad_norm": 0.56640625, "learning_rate": 0.0001648895811882394, "loss": 0.8693, "step": 5816 }, { "epoch": 0.27562189054726366, "grad_norm": 0.7890625, "learning_rate": 0.0001648782490325553, "loss": 0.7939, "step": 5817 }, { "epoch": 0.27566927268419805, "grad_norm": 0.314453125, "learning_rate": 0.00016486691543794076, "loss": 0.1437, "step": 5818 }, { "epoch": 0.27571665482113245, "grad_norm": 0.5390625, "learning_rate": 0.00016485558040464713, "loss": 0.6232, "step": 5819 }, { "epoch": 0.2757640369580668, "grad_norm": 0.53515625, "learning_rate": 0.00016484424393292576, "loss": 0.5876, "step": 5820 }, { "epoch": 0.2758114190950012, "grad_norm": 0.390625, "learning_rate": 0.00016483290602302818, "loss": 0.0837, "step": 5821 }, { "epoch": 0.27585880123193557, "grad_norm": 0.76171875, "learning_rate": 0.00016482156667520576, "loss": 0.8046, "step": 5822 }, { "epoch": 0.27590618336886996, "grad_norm": 0.640625, "learning_rate": 0.00016481022588971006, "loss": 0.9897, "step": 5823 }, { "epoch": 0.2759535655058043, "grad_norm": 0.80078125, "learning_rate": 0.0001647988836667926, "loss": 1.3133, "step": 5824 }, { "epoch": 0.2760009476427387, "grad_norm": 0.82421875, "learning_rate": 0.00016478754000670489, "loss": 1.4105, "step": 5825 }, { "epoch": 0.2760483297796731, "grad_norm": 0.5078125, "learning_rate": 0.00016477619490969857, "loss": 0.7777, "step": 5826 }, { "epoch": 0.2760957119166074, "grad_norm": 0.7421875, "learning_rate": 0.00016476484837602527, "loss": 0.9432, "step": 5827 }, { "epoch": 0.2761430940535418, "grad_norm": 0.69140625, "learning_rate": 0.00016475350040593656, "loss": 1.1919, "step": 5828 }, { "epoch": 0.2761904761904762, "grad_norm": 1.03125, "learning_rate": 0.00016474215099968422, "loss": 0.1892, "step": 5829 }, { "epoch": 0.27623785832741055, "grad_norm": 0.84375, "learning_rate": 0.0001647308001575199, "loss": 0.1605, "step": 5830 }, { "epoch": 0.27628524046434494, "grad_norm": 0.341796875, "learning_rate": 0.0001647194478796954, "loss": 0.0698, "step": 5831 }, { "epoch": 0.27633262260127933, "grad_norm": 0.6796875, "learning_rate": 0.00016470809416646248, "loss": 0.8024, "step": 5832 }, { "epoch": 0.27638000473821367, "grad_norm": 0.6328125, "learning_rate": 0.00016469673901807296, "loss": 1.1148, "step": 5833 }, { "epoch": 0.27642738687514806, "grad_norm": 0.0986328125, "learning_rate": 0.0001646853824347787, "loss": 0.0053, "step": 5834 }, { "epoch": 0.27647476901208246, "grad_norm": 0.07275390625, "learning_rate": 0.00016467402441683153, "loss": 0.0064, "step": 5835 }, { "epoch": 0.27652215114901685, "grad_norm": 1.140625, "learning_rate": 0.0001646626649644834, "loss": 1.199, "step": 5836 }, { "epoch": 0.2765695332859512, "grad_norm": 0.34765625, "learning_rate": 0.0001646513040779862, "loss": 0.1704, "step": 5837 }, { "epoch": 0.2766169154228856, "grad_norm": 0.1357421875, "learning_rate": 0.00016463994175759197, "loss": 0.0157, "step": 5838 }, { "epoch": 0.27666429755981997, "grad_norm": 0.62890625, "learning_rate": 0.00016462857800355268, "loss": 0.0994, "step": 5839 }, { "epoch": 0.2767116796967543, "grad_norm": 0.59765625, "learning_rate": 0.00016461721281612037, "loss": 0.7554, "step": 5840 }, { "epoch": 0.2767590618336887, "grad_norm": 0.306640625, "learning_rate": 0.0001646058461955471, "loss": 0.0174, "step": 5841 }, { "epoch": 0.2768064439706231, "grad_norm": 0.45703125, "learning_rate": 0.00016459447814208498, "loss": 0.5143, "step": 5842 }, { "epoch": 0.27685382610755743, "grad_norm": 0.5546875, "learning_rate": 0.00016458310865598617, "loss": 0.8134, "step": 5843 }, { "epoch": 0.2769012082444918, "grad_norm": 0.62890625, "learning_rate": 0.00016457173773750275, "loss": 0.7829, "step": 5844 }, { "epoch": 0.2769485903814262, "grad_norm": 0.042236328125, "learning_rate": 0.000164560365386887, "loss": 0.0016, "step": 5845 }, { "epoch": 0.27699597251836056, "grad_norm": 1.6015625, "learning_rate": 0.0001645489916043911, "loss": 0.7386, "step": 5846 }, { "epoch": 0.27704335465529495, "grad_norm": 0.52734375, "learning_rate": 0.0001645376163902673, "loss": 1.1325, "step": 5847 }, { "epoch": 0.27709073679222934, "grad_norm": 0.9375, "learning_rate": 0.00016452623974476795, "loss": 0.4153, "step": 5848 }, { "epoch": 0.2771381189291637, "grad_norm": 0.5703125, "learning_rate": 0.0001645148616681453, "loss": 0.8578, "step": 5849 }, { "epoch": 0.2771855010660981, "grad_norm": 0.77734375, "learning_rate": 0.00016450348216065177, "loss": 0.9116, "step": 5850 }, { "epoch": 0.27723288320303247, "grad_norm": 0.6953125, "learning_rate": 0.00016449210122253968, "loss": 1.5575, "step": 5851 }, { "epoch": 0.27728026533996686, "grad_norm": 0.203125, "learning_rate": 0.00016448071885406148, "loss": 0.1502, "step": 5852 }, { "epoch": 0.2773276474769012, "grad_norm": 0.70703125, "learning_rate": 0.00016446933505546963, "loss": 1.5635, "step": 5853 }, { "epoch": 0.2773750296138356, "grad_norm": 0.6640625, "learning_rate": 0.00016445794982701661, "loss": 0.912, "step": 5854 }, { "epoch": 0.27742241175077, "grad_norm": 0.76953125, "learning_rate": 0.00016444656316895491, "loss": 1.082, "step": 5855 }, { "epoch": 0.2774697938877043, "grad_norm": 0.58984375, "learning_rate": 0.00016443517508153707, "loss": 0.7714, "step": 5856 }, { "epoch": 0.2775171760246387, "grad_norm": 0.68359375, "learning_rate": 0.0001644237855650157, "loss": 0.6867, "step": 5857 }, { "epoch": 0.2775645581615731, "grad_norm": 0.65234375, "learning_rate": 0.00016441239461964337, "loss": 1.5592, "step": 5858 }, { "epoch": 0.27761194029850744, "grad_norm": 0.62890625, "learning_rate": 0.00016440100224567275, "loss": 0.0292, "step": 5859 }, { "epoch": 0.27765932243544184, "grad_norm": 0.072265625, "learning_rate": 0.0001643896084433565, "loss": 0.0025, "step": 5860 }, { "epoch": 0.27770670457237623, "grad_norm": 0.69921875, "learning_rate": 0.00016437821321294732, "loss": 1.1657, "step": 5861 }, { "epoch": 0.27775408670931057, "grad_norm": 0.60546875, "learning_rate": 0.00016436681655469793, "loss": 0.7367, "step": 5862 }, { "epoch": 0.27780146884624496, "grad_norm": 0.828125, "learning_rate": 0.0001643554184688611, "loss": 1.1748, "step": 5863 }, { "epoch": 0.27784885098317935, "grad_norm": 0.85546875, "learning_rate": 0.00016434401895568966, "loss": 1.0877, "step": 5864 }, { "epoch": 0.27789623312011374, "grad_norm": 0.1748046875, "learning_rate": 0.0001643326180154364, "loss": 0.0143, "step": 5865 }, { "epoch": 0.2779436152570481, "grad_norm": 0.5390625, "learning_rate": 0.00016432121564835422, "loss": 0.7058, "step": 5866 }, { "epoch": 0.2779909973939825, "grad_norm": 0.76953125, "learning_rate": 0.00016430981185469595, "loss": 1.2879, "step": 5867 }, { "epoch": 0.27803837953091687, "grad_norm": 0.333984375, "learning_rate": 0.0001642984066347146, "loss": 0.1743, "step": 5868 }, { "epoch": 0.2780857616678512, "grad_norm": 0.6015625, "learning_rate": 0.00016428699998866303, "loss": 0.734, "step": 5869 }, { "epoch": 0.2781331438047856, "grad_norm": 0.53515625, "learning_rate": 0.0001642755919167943, "loss": 1.0882, "step": 5870 }, { "epoch": 0.27818052594172, "grad_norm": 0.35546875, "learning_rate": 0.00016426418241936142, "loss": 0.0577, "step": 5871 }, { "epoch": 0.27822790807865433, "grad_norm": 0.76171875, "learning_rate": 0.00016425277149661736, "loss": 1.1437, "step": 5872 }, { "epoch": 0.2782752902155887, "grad_norm": 0.67578125, "learning_rate": 0.00016424135914881534, "loss": 1.2461, "step": 5873 }, { "epoch": 0.2783226723525231, "grad_norm": 0.65234375, "learning_rate": 0.0001642299453762084, "loss": 1.051, "step": 5874 }, { "epoch": 0.27837005448945745, "grad_norm": 0.5859375, "learning_rate": 0.00016421853017904963, "loss": 0.5847, "step": 5875 }, { "epoch": 0.27841743662639185, "grad_norm": 0.181640625, "learning_rate": 0.00016420711355759232, "loss": 0.1426, "step": 5876 }, { "epoch": 0.27846481876332624, "grad_norm": 0.53515625, "learning_rate": 0.00016419569551208958, "loss": 0.0234, "step": 5877 }, { "epoch": 0.2785122009002606, "grad_norm": 0.0107421875, "learning_rate": 0.00016418427604279471, "loss": 0.0005, "step": 5878 }, { "epoch": 0.27855958303719497, "grad_norm": 1.2890625, "learning_rate": 0.00016417285514996096, "loss": 1.1206, "step": 5879 }, { "epoch": 0.27860696517412936, "grad_norm": 0.0498046875, "learning_rate": 0.00016416143283384165, "loss": 0.0053, "step": 5880 }, { "epoch": 0.27865434731106375, "grad_norm": 0.59765625, "learning_rate": 0.0001641500090946901, "loss": 1.0242, "step": 5881 }, { "epoch": 0.2787017294479981, "grad_norm": 0.6328125, "learning_rate": 0.00016413858393275968, "loss": 0.9941, "step": 5882 }, { "epoch": 0.2787491115849325, "grad_norm": 0.74609375, "learning_rate": 0.00016412715734830376, "loss": 0.2387, "step": 5883 }, { "epoch": 0.2787964937218669, "grad_norm": 0.310546875, "learning_rate": 0.00016411572934157582, "loss": 0.0203, "step": 5884 }, { "epoch": 0.2788438758588012, "grad_norm": 0.76171875, "learning_rate": 0.00016410429991282928, "loss": 0.8312, "step": 5885 }, { "epoch": 0.2788912579957356, "grad_norm": 0.63671875, "learning_rate": 0.00016409286906231765, "loss": 0.8291, "step": 5886 }, { "epoch": 0.27893864013267, "grad_norm": 0.75, "learning_rate": 0.00016408143679029445, "loss": 1.2022, "step": 5887 }, { "epoch": 0.27898602226960434, "grad_norm": 0.59765625, "learning_rate": 0.00016407000309701323, "loss": 0.826, "step": 5888 }, { "epoch": 0.27903340440653873, "grad_norm": 0.0247802734375, "learning_rate": 0.0001640585679827276, "loss": 0.0015, "step": 5889 }, { "epoch": 0.2790807865434731, "grad_norm": 0.80859375, "learning_rate": 0.0001640471314476912, "loss": 0.9617, "step": 5890 }, { "epoch": 0.27912816868040746, "grad_norm": 0.2470703125, "learning_rate": 0.0001640356934921576, "loss": 0.0169, "step": 5891 }, { "epoch": 0.27917555081734186, "grad_norm": 0.79296875, "learning_rate": 0.00016402425411638053, "loss": 1.1381, "step": 5892 }, { "epoch": 0.27922293295427625, "grad_norm": 0.78125, "learning_rate": 0.0001640128133206137, "loss": 1.4161, "step": 5893 }, { "epoch": 0.27927031509121064, "grad_norm": 0.345703125, "learning_rate": 0.00016400137110511085, "loss": 0.0422, "step": 5894 }, { "epoch": 0.279317697228145, "grad_norm": 0.57421875, "learning_rate": 0.00016398992747012578, "loss": 1.1908, "step": 5895 }, { "epoch": 0.27936507936507937, "grad_norm": 0.298828125, "learning_rate": 0.00016397848241591224, "loss": 0.0074, "step": 5896 }, { "epoch": 0.27941246150201376, "grad_norm": 0.703125, "learning_rate": 0.00016396703594272416, "loss": 1.2606, "step": 5897 }, { "epoch": 0.2794598436389481, "grad_norm": 0.6328125, "learning_rate": 0.00016395558805081532, "loss": 0.5287, "step": 5898 }, { "epoch": 0.2795072257758825, "grad_norm": 0.71875, "learning_rate": 0.00016394413874043967, "loss": 0.6665, "step": 5899 }, { "epoch": 0.2795546079128169, "grad_norm": 1.4765625, "learning_rate": 0.00016393268801185115, "loss": 0.8474, "step": 5900 }, { "epoch": 0.2796019900497512, "grad_norm": 0.6171875, "learning_rate": 0.0001639212358653037, "loss": 1.1306, "step": 5901 }, { "epoch": 0.2796493721866856, "grad_norm": 0.322265625, "learning_rate": 0.00016390978230105128, "loss": 0.0694, "step": 5902 }, { "epoch": 0.27969675432362, "grad_norm": 0.65625, "learning_rate": 0.00016389832731934803, "loss": 0.1758, "step": 5903 }, { "epoch": 0.27974413646055435, "grad_norm": 0.51953125, "learning_rate": 0.00016388687092044787, "loss": 0.1082, "step": 5904 }, { "epoch": 0.27979151859748874, "grad_norm": 0.5859375, "learning_rate": 0.00016387541310460506, "loss": 0.6653, "step": 5905 }, { "epoch": 0.27983890073442313, "grad_norm": 0.5234375, "learning_rate": 0.00016386395387207355, "loss": 0.0071, "step": 5906 }, { "epoch": 0.2798862828713575, "grad_norm": 0.2392578125, "learning_rate": 0.0001638524932231076, "loss": 0.1529, "step": 5907 }, { "epoch": 0.27993366500829187, "grad_norm": 0.34375, "learning_rate": 0.00016384103115796132, "loss": 0.0179, "step": 5908 }, { "epoch": 0.27998104714522626, "grad_norm": 0.6484375, "learning_rate": 0.000163829567676889, "loss": 1.1626, "step": 5909 }, { "epoch": 0.28002842928216065, "grad_norm": 0.66015625, "learning_rate": 0.00016381810278014484, "loss": 1.0607, "step": 5910 }, { "epoch": 0.280075811419095, "grad_norm": 0.55078125, "learning_rate": 0.00016380663646798315, "loss": 1.2028, "step": 5911 }, { "epoch": 0.2801231935560294, "grad_norm": 0.5078125, "learning_rate": 0.00016379516874065825, "loss": 0.6336, "step": 5912 }, { "epoch": 0.2801705756929638, "grad_norm": 0.640625, "learning_rate": 0.00016378369959842444, "loss": 0.0943, "step": 5913 }, { "epoch": 0.2802179578298981, "grad_norm": 0.890625, "learning_rate": 0.00016377222904153608, "loss": 1.0451, "step": 5914 }, { "epoch": 0.2802653399668325, "grad_norm": 0.78125, "learning_rate": 0.00016376075707024766, "loss": 1.0668, "step": 5915 }, { "epoch": 0.2803127221037669, "grad_norm": 0.84375, "learning_rate": 0.00016374928368481352, "loss": 1.0897, "step": 5916 }, { "epoch": 0.28036010424070124, "grad_norm": 0.65234375, "learning_rate": 0.00016373780888548817, "loss": 0.1122, "step": 5917 }, { "epoch": 0.28040748637763563, "grad_norm": 0.515625, "learning_rate": 0.00016372633267252616, "loss": 0.9279, "step": 5918 }, { "epoch": 0.28045486851457, "grad_norm": 0.7109375, "learning_rate": 0.00016371485504618191, "loss": 1.1632, "step": 5919 }, { "epoch": 0.28050225065150436, "grad_norm": 0.51953125, "learning_rate": 0.00016370337600671008, "loss": 0.6464, "step": 5920 }, { "epoch": 0.28054963278843875, "grad_norm": 0.055419921875, "learning_rate": 0.00016369189555436525, "loss": 0.0027, "step": 5921 }, { "epoch": 0.28059701492537314, "grad_norm": 0.73828125, "learning_rate": 0.00016368041368940194, "loss": 0.9308, "step": 5922 }, { "epoch": 0.2806443970623075, "grad_norm": 0.609375, "learning_rate": 0.00016366893041207492, "loss": 1.0763, "step": 5923 }, { "epoch": 0.2806917791992419, "grad_norm": 0.318359375, "learning_rate": 0.00016365744572263886, "loss": 0.135, "step": 5924 }, { "epoch": 0.28073916133617627, "grad_norm": 0.62109375, "learning_rate": 0.00016364595962134844, "loss": 1.0708, "step": 5925 }, { "epoch": 0.28078654347311066, "grad_norm": 1.109375, "learning_rate": 0.00016363447210845843, "loss": 0.6604, "step": 5926 }, { "epoch": 0.280833925610045, "grad_norm": 0.66015625, "learning_rate": 0.0001636229831842236, "loss": 1.2479, "step": 5927 }, { "epoch": 0.2808813077469794, "grad_norm": 0.6953125, "learning_rate": 0.0001636114928488988, "loss": 1.0728, "step": 5928 }, { "epoch": 0.2809286898839138, "grad_norm": 0.6796875, "learning_rate": 0.0001636000011027388, "loss": 0.856, "step": 5929 }, { "epoch": 0.2809760720208481, "grad_norm": 0.412109375, "learning_rate": 0.00016358850794599857, "loss": 0.0754, "step": 5930 }, { "epoch": 0.2810234541577825, "grad_norm": 0.57421875, "learning_rate": 0.00016357701337893295, "loss": 0.7193, "step": 5931 }, { "epoch": 0.2810708362947169, "grad_norm": 0.54296875, "learning_rate": 0.00016356551740179687, "loss": 0.6159, "step": 5932 }, { "epoch": 0.28111821843165125, "grad_norm": 0.36328125, "learning_rate": 0.00016355402001484533, "loss": 0.0552, "step": 5933 }, { "epoch": 0.28116560056858564, "grad_norm": 0.69140625, "learning_rate": 0.00016354252121833333, "loss": 1.3275, "step": 5934 }, { "epoch": 0.28121298270552003, "grad_norm": 2.84375, "learning_rate": 0.00016353102101251588, "loss": 0.8097, "step": 5935 }, { "epoch": 0.28126036484245437, "grad_norm": 0.51953125, "learning_rate": 0.00016351951939764806, "loss": 0.7637, "step": 5936 }, { "epoch": 0.28130774697938876, "grad_norm": 0.59765625, "learning_rate": 0.00016350801637398497, "loss": 0.5522, "step": 5937 }, { "epoch": 0.28135512911632315, "grad_norm": 0.61328125, "learning_rate": 0.00016349651194178173, "loss": 1.2141, "step": 5938 }, { "epoch": 0.28140251125325755, "grad_norm": 0.6015625, "learning_rate": 0.00016348500610129346, "loss": 1.445, "step": 5939 }, { "epoch": 0.2814498933901919, "grad_norm": 0.6640625, "learning_rate": 0.00016347349885277538, "loss": 1.3921, "step": 5940 }, { "epoch": 0.2814972755271263, "grad_norm": 0.00628662109375, "learning_rate": 0.00016346199019648272, "loss": 0.0003, "step": 5941 }, { "epoch": 0.28154465766406067, "grad_norm": 0.48046875, "learning_rate": 0.00016345048013267067, "loss": 0.0337, "step": 5942 }, { "epoch": 0.281592039800995, "grad_norm": 0.03515625, "learning_rate": 0.0001634389686615946, "loss": 0.0011, "step": 5943 }, { "epoch": 0.2816394219379294, "grad_norm": 0.73046875, "learning_rate": 0.00016342745578350976, "loss": 1.8758, "step": 5944 }, { "epoch": 0.2816868040748638, "grad_norm": 0.69140625, "learning_rate": 0.00016341594149867154, "loss": 0.994, "step": 5945 }, { "epoch": 0.28173418621179813, "grad_norm": 0.00262451171875, "learning_rate": 0.00016340442580733527, "loss": 0.0002, "step": 5946 }, { "epoch": 0.2817815683487325, "grad_norm": 0.7265625, "learning_rate": 0.00016339290870975636, "loss": 1.3652, "step": 5947 }, { "epoch": 0.2818289504856669, "grad_norm": 0.15625, "learning_rate": 0.00016338139020619026, "loss": 0.0128, "step": 5948 }, { "epoch": 0.28187633262260126, "grad_norm": 0.77734375, "learning_rate": 0.00016336987029689243, "loss": 1.0002, "step": 5949 }, { "epoch": 0.28192371475953565, "grad_norm": 1.453125, "learning_rate": 0.00016335834898211838, "loss": 1.2971, "step": 5950 }, { "epoch": 0.28197109689647004, "grad_norm": 0.67578125, "learning_rate": 0.00016334682626212364, "loss": 1.3942, "step": 5951 }, { "epoch": 0.2820184790334044, "grad_norm": 0.734375, "learning_rate": 0.00016333530213716378, "loss": 1.3696, "step": 5952 }, { "epoch": 0.28206586117033877, "grad_norm": 0.8828125, "learning_rate": 0.00016332377660749437, "loss": 1.3783, "step": 5953 }, { "epoch": 0.28211324330727316, "grad_norm": 0.71484375, "learning_rate": 0.00016331224967337102, "loss": 1.2957, "step": 5954 }, { "epoch": 0.28216062544420756, "grad_norm": 0.341796875, "learning_rate": 0.00016330072133504943, "loss": 0.134, "step": 5955 }, { "epoch": 0.2822080075811419, "grad_norm": 0.30078125, "learning_rate": 0.00016328919159278526, "loss": 0.0179, "step": 5956 }, { "epoch": 0.2822553897180763, "grad_norm": 0.53125, "learning_rate": 0.00016327766044683424, "loss": 0.594, "step": 5957 }, { "epoch": 0.2823027718550107, "grad_norm": 0.7265625, "learning_rate": 0.00016326612789745212, "loss": 0.7436, "step": 5958 }, { "epoch": 0.282350153991945, "grad_norm": 0.6015625, "learning_rate": 0.00016325459394489467, "loss": 1.401, "step": 5959 }, { "epoch": 0.2823975361288794, "grad_norm": 0.2099609375, "learning_rate": 0.0001632430585894177, "loss": 0.1428, "step": 5960 }, { "epoch": 0.2824449182658138, "grad_norm": 0.578125, "learning_rate": 0.00016323152183127704, "loss": 0.8121, "step": 5961 }, { "epoch": 0.28249230040274814, "grad_norm": 0.734375, "learning_rate": 0.00016321998367072857, "loss": 1.1734, "step": 5962 }, { "epoch": 0.28253968253968254, "grad_norm": 0.68359375, "learning_rate": 0.0001632084441080282, "loss": 1.1679, "step": 5963 }, { "epoch": 0.28258706467661693, "grad_norm": 0.796875, "learning_rate": 0.0001631969031434319, "loss": 0.1729, "step": 5964 }, { "epoch": 0.28263444681355127, "grad_norm": 0.53515625, "learning_rate": 0.0001631853607771956, "loss": 1.3251, "step": 5965 }, { "epoch": 0.28268182895048566, "grad_norm": 0.90234375, "learning_rate": 0.00016317381700957525, "loss": 0.8001, "step": 5966 }, { "epoch": 0.28272921108742005, "grad_norm": 0.62109375, "learning_rate": 0.00016316227184082698, "loss": 0.8081, "step": 5967 }, { "epoch": 0.28277659322435444, "grad_norm": 0.71484375, "learning_rate": 0.0001631507252712068, "loss": 1.072, "step": 5968 }, { "epoch": 0.2828239753612888, "grad_norm": 0.3828125, "learning_rate": 0.00016313917730097078, "loss": 0.3934, "step": 5969 }, { "epoch": 0.2828713574982232, "grad_norm": 0.50390625, "learning_rate": 0.00016312762793037507, "loss": 1.1343, "step": 5970 }, { "epoch": 0.28291873963515757, "grad_norm": 0.259765625, "learning_rate": 0.00016311607715967577, "loss": 0.0241, "step": 5971 }, { "epoch": 0.2829661217720919, "grad_norm": 0.66796875, "learning_rate": 0.00016310452498912917, "loss": 1.0106, "step": 5972 }, { "epoch": 0.2830135039090263, "grad_norm": 0.70703125, "learning_rate": 0.00016309297141899135, "loss": 1.1859, "step": 5973 }, { "epoch": 0.2830608860459607, "grad_norm": 0.7890625, "learning_rate": 0.00016308141644951867, "loss": 1.3627, "step": 5974 }, { "epoch": 0.28310826818289503, "grad_norm": 0.78125, "learning_rate": 0.0001630698600809674, "loss": 0.8214, "step": 5975 }, { "epoch": 0.2831556503198294, "grad_norm": 0.55859375, "learning_rate": 0.00016305830231359378, "loss": 1.4405, "step": 5976 }, { "epoch": 0.2832030324567638, "grad_norm": 0.6015625, "learning_rate": 0.00016304674314765417, "loss": 1.1925, "step": 5977 }, { "epoch": 0.28325041459369815, "grad_norm": 0.01171875, "learning_rate": 0.00016303518258340492, "loss": 0.0006, "step": 5978 }, { "epoch": 0.28329779673063255, "grad_norm": 0.65234375, "learning_rate": 0.0001630236206211025, "loss": 0.6789, "step": 5979 }, { "epoch": 0.28334517886756694, "grad_norm": 0.7109375, "learning_rate": 0.00016301205726100332, "loss": 1.5498, "step": 5980 }, { "epoch": 0.2833925610045013, "grad_norm": 0.6171875, "learning_rate": 0.00016300049250336383, "loss": 0.7349, "step": 5981 }, { "epoch": 0.28343994314143567, "grad_norm": 0.1494140625, "learning_rate": 0.0001629889263484405, "loss": 0.0111, "step": 5982 }, { "epoch": 0.28348732527837006, "grad_norm": 0.9609375, "learning_rate": 0.00016297735879648988, "loss": 0.3184, "step": 5983 }, { "epoch": 0.28353470741530445, "grad_norm": 0.703125, "learning_rate": 0.0001629657898477685, "loss": 0.8369, "step": 5984 }, { "epoch": 0.2835820895522388, "grad_norm": 0.00897216796875, "learning_rate": 0.00016295421950253302, "loss": 0.0005, "step": 5985 }, { "epoch": 0.2836294716891732, "grad_norm": 1.03125, "learning_rate": 0.00016294264776103997, "loss": 0.7341, "step": 5986 }, { "epoch": 0.2836768538261076, "grad_norm": 0.8515625, "learning_rate": 0.00016293107462354603, "loss": 0.6819, "step": 5987 }, { "epoch": 0.2837242359630419, "grad_norm": 0.5546875, "learning_rate": 0.00016291950009030793, "loss": 0.9467, "step": 5988 }, { "epoch": 0.2837716180999763, "grad_norm": 0.474609375, "learning_rate": 0.00016290792416158228, "loss": 0.7839, "step": 5989 }, { "epoch": 0.2838190002369107, "grad_norm": 0.58203125, "learning_rate": 0.00016289634683762591, "loss": 0.1468, "step": 5990 }, { "epoch": 0.28386638237384504, "grad_norm": 0.546875, "learning_rate": 0.00016288476811869557, "loss": 0.7732, "step": 5991 }, { "epoch": 0.28391376451077943, "grad_norm": 1.0, "learning_rate": 0.00016287318800504804, "loss": 0.0987, "step": 5992 }, { "epoch": 0.2839611466477138, "grad_norm": 0.83984375, "learning_rate": 0.0001628616064969402, "loss": 0.8688, "step": 5993 }, { "epoch": 0.28400852878464816, "grad_norm": 0.66015625, "learning_rate": 0.00016285002359462883, "loss": 1.3546, "step": 5994 }, { "epoch": 0.28405591092158256, "grad_norm": 0.78125, "learning_rate": 0.00016283843929837093, "loss": 0.8868, "step": 5995 }, { "epoch": 0.28410329305851695, "grad_norm": 0.58203125, "learning_rate": 0.00016282685360842334, "loss": 0.8705, "step": 5996 }, { "epoch": 0.28415067519545134, "grad_norm": 0.6640625, "learning_rate": 0.00016281526652504308, "loss": 1.1959, "step": 5997 }, { "epoch": 0.2841980573323857, "grad_norm": 0.640625, "learning_rate": 0.00016280367804848711, "loss": 1.3464, "step": 5998 }, { "epoch": 0.28424543946932007, "grad_norm": 0.1533203125, "learning_rate": 0.00016279208817901247, "loss": 0.0082, "step": 5999 }, { "epoch": 0.28429282160625446, "grad_norm": 0.67578125, "learning_rate": 0.00016278049691687617, "loss": 1.8539, "step": 6000 }, { "epoch": 0.2843402037431888, "grad_norm": 0.99609375, "learning_rate": 0.00016276890426233535, "loss": 1.017, "step": 6001 }, { "epoch": 0.2843875858801232, "grad_norm": 0.43359375, "learning_rate": 0.00016275731021564706, "loss": 0.1131, "step": 6002 }, { "epoch": 0.2844349680170576, "grad_norm": 0.71875, "learning_rate": 0.00016274571477706848, "loss": 1.4311, "step": 6003 }, { "epoch": 0.2844823501539919, "grad_norm": 0.4921875, "learning_rate": 0.0001627341179468568, "loss": 1.0981, "step": 6004 }, { "epoch": 0.2845297322909263, "grad_norm": 0.427734375, "learning_rate": 0.00016272251972526921, "loss": 0.5771, "step": 6005 }, { "epoch": 0.2845771144278607, "grad_norm": 0.75, "learning_rate": 0.00016271092011256293, "loss": 0.0699, "step": 6006 }, { "epoch": 0.28462449656479505, "grad_norm": 0.38671875, "learning_rate": 0.00016269931910899526, "loss": 0.1502, "step": 6007 }, { "epoch": 0.28467187870172944, "grad_norm": 0.57421875, "learning_rate": 0.00016268771671482343, "loss": 1.0168, "step": 6008 }, { "epoch": 0.28471926083866383, "grad_norm": 0.78125, "learning_rate": 0.00016267611293030484, "loss": 0.2216, "step": 6009 }, { "epoch": 0.28476664297559817, "grad_norm": 0.51171875, "learning_rate": 0.00016266450775569684, "loss": 0.639, "step": 6010 }, { "epoch": 0.28481402511253257, "grad_norm": 0.9453125, "learning_rate": 0.00016265290119125677, "loss": 1.3192, "step": 6011 }, { "epoch": 0.28486140724946696, "grad_norm": 0.216796875, "learning_rate": 0.0001626412932372421, "loss": 0.1411, "step": 6012 }, { "epoch": 0.28490878938640135, "grad_norm": 0.6328125, "learning_rate": 0.00016262968389391027, "loss": 1.1672, "step": 6013 }, { "epoch": 0.2849561715233357, "grad_norm": 0.5234375, "learning_rate": 0.0001626180731615188, "loss": 0.4654, "step": 6014 }, { "epoch": 0.2850035536602701, "grad_norm": 0.53515625, "learning_rate": 0.00016260646104032512, "loss": 0.8847, "step": 6015 }, { "epoch": 0.2850509357972045, "grad_norm": 0.26171875, "learning_rate": 0.00016259484753058682, "loss": 0.1109, "step": 6016 }, { "epoch": 0.2850983179341388, "grad_norm": 0.68359375, "learning_rate": 0.0001625832326325615, "loss": 1.344, "step": 6017 }, { "epoch": 0.2851457000710732, "grad_norm": 0.390625, "learning_rate": 0.0001625716163465067, "loss": 0.1425, "step": 6018 }, { "epoch": 0.2851930822080076, "grad_norm": 0.162109375, "learning_rate": 0.00016255999867268012, "loss": 0.0183, "step": 6019 }, { "epoch": 0.28524046434494194, "grad_norm": 0.66796875, "learning_rate": 0.00016254837961133942, "loss": 0.718, "step": 6020 }, { "epoch": 0.28528784648187633, "grad_norm": 0.62109375, "learning_rate": 0.00016253675916274226, "loss": 1.0985, "step": 6021 }, { "epoch": 0.2853352286188107, "grad_norm": 0.00787353515625, "learning_rate": 0.00016252513732714643, "loss": 0.0006, "step": 6022 }, { "epoch": 0.28538261075574506, "grad_norm": 0.004425048828125, "learning_rate": 0.00016251351410480962, "loss": 0.0002, "step": 6023 }, { "epoch": 0.28542999289267945, "grad_norm": 0.65625, "learning_rate": 0.00016250188949598965, "loss": 1.2493, "step": 6024 }, { "epoch": 0.28547737502961384, "grad_norm": 0.16796875, "learning_rate": 0.00016249026350094437, "loss": 0.1221, "step": 6025 }, { "epoch": 0.28552475716654824, "grad_norm": 0.76953125, "learning_rate": 0.00016247863611993158, "loss": 0.8118, "step": 6026 }, { "epoch": 0.2855721393034826, "grad_norm": 0.80078125, "learning_rate": 0.0001624670073532092, "loss": 1.1523, "step": 6027 }, { "epoch": 0.28561952144041697, "grad_norm": 0.23046875, "learning_rate": 0.00016245537720103515, "loss": 0.0994, "step": 6028 }, { "epoch": 0.28566690357735136, "grad_norm": 0.470703125, "learning_rate": 0.00016244374566366733, "loss": 0.9692, "step": 6029 }, { "epoch": 0.2857142857142857, "grad_norm": 0.50390625, "learning_rate": 0.00016243211274136378, "loss": 0.5362, "step": 6030 }, { "epoch": 0.2857616678512201, "grad_norm": 0.0086669921875, "learning_rate": 0.00016242047843438245, "loss": 0.0004, "step": 6031 }, { "epoch": 0.2858090499881545, "grad_norm": 0.7265625, "learning_rate": 0.0001624088427429814, "loss": 1.7455, "step": 6032 }, { "epoch": 0.2858564321250888, "grad_norm": 0.62890625, "learning_rate": 0.00016239720566741867, "loss": 0.9899, "step": 6033 }, { "epoch": 0.2859038142620232, "grad_norm": 0.1357421875, "learning_rate": 0.0001623855672079524, "loss": 0.0159, "step": 6034 }, { "epoch": 0.2859511963989576, "grad_norm": 0.7890625, "learning_rate": 0.0001623739273648407, "loss": 0.4276, "step": 6035 }, { "epoch": 0.28599857853589195, "grad_norm": 0.61328125, "learning_rate": 0.00016236228613834171, "loss": 0.9734, "step": 6036 }, { "epoch": 0.28604596067282634, "grad_norm": 0.033203125, "learning_rate": 0.00016235064352871365, "loss": 0.0035, "step": 6037 }, { "epoch": 0.28609334280976073, "grad_norm": 0.9296875, "learning_rate": 0.00016233899953621475, "loss": 1.3255, "step": 6038 }, { "epoch": 0.28614072494669507, "grad_norm": 0.9140625, "learning_rate": 0.00016232735416110323, "loss": 2.0154, "step": 6039 }, { "epoch": 0.28618810708362946, "grad_norm": 0.58984375, "learning_rate": 0.00016231570740363738, "loss": 1.0311, "step": 6040 }, { "epoch": 0.28623548922056385, "grad_norm": 0.66015625, "learning_rate": 0.00016230405926407552, "loss": 0.7651, "step": 6041 }, { "epoch": 0.28628287135749825, "grad_norm": 0.69921875, "learning_rate": 0.00016229240974267596, "loss": 0.9131, "step": 6042 }, { "epoch": 0.2863302534944326, "grad_norm": 0.287109375, "learning_rate": 0.00016228075883969713, "loss": 0.0416, "step": 6043 }, { "epoch": 0.286377635631367, "grad_norm": 0.75, "learning_rate": 0.0001622691065553974, "loss": 1.0857, "step": 6044 }, { "epoch": 0.28642501776830137, "grad_norm": 1.1875, "learning_rate": 0.00016225745289003522, "loss": 0.7592, "step": 6045 }, { "epoch": 0.2864723999052357, "grad_norm": 0.65234375, "learning_rate": 0.00016224579784386903, "loss": 1.2407, "step": 6046 }, { "epoch": 0.2865197820421701, "grad_norm": 0.58203125, "learning_rate": 0.00016223414141715737, "loss": 1.2729, "step": 6047 }, { "epoch": 0.2865671641791045, "grad_norm": 0.69921875, "learning_rate": 0.00016222248361015873, "loss": 1.2148, "step": 6048 }, { "epoch": 0.28661454631603883, "grad_norm": 0.8515625, "learning_rate": 0.00016221082442313168, "loss": 0.9701, "step": 6049 }, { "epoch": 0.2866619284529732, "grad_norm": 0.54296875, "learning_rate": 0.00016219916385633483, "loss": 0.9508, "step": 6050 }, { "epoch": 0.2867093105899076, "grad_norm": 0.185546875, "learning_rate": 0.00016218750191002675, "loss": 0.0976, "step": 6051 }, { "epoch": 0.28675669272684196, "grad_norm": 0.263671875, "learning_rate": 0.0001621758385844661, "loss": 0.0691, "step": 6052 }, { "epoch": 0.28680407486377635, "grad_norm": 0.69140625, "learning_rate": 0.0001621641738799116, "loss": 1.346, "step": 6053 }, { "epoch": 0.28685145700071074, "grad_norm": 0.96484375, "learning_rate": 0.00016215250779662193, "loss": 0.1656, "step": 6054 }, { "epoch": 0.28689883913764513, "grad_norm": 0.55859375, "learning_rate": 0.0001621408403348559, "loss": 0.9364, "step": 6055 }, { "epoch": 0.28694622127457947, "grad_norm": 0.0032501220703125, "learning_rate": 0.00016212917149487216, "loss": 0.0002, "step": 6056 }, { "epoch": 0.28699360341151386, "grad_norm": 0.0036468505859375, "learning_rate": 0.00016211750127692956, "loss": 0.0003, "step": 6057 }, { "epoch": 0.28704098554844826, "grad_norm": 0.57421875, "learning_rate": 0.00016210582968128695, "loss": 0.9902, "step": 6058 }, { "epoch": 0.2870883676853826, "grad_norm": 0.7890625, "learning_rate": 0.00016209415670820321, "loss": 0.6357, "step": 6059 }, { "epoch": 0.287135749822317, "grad_norm": 0.6796875, "learning_rate": 0.00016208248235793723, "loss": 1.3255, "step": 6060 }, { "epoch": 0.2871831319592514, "grad_norm": 0.73828125, "learning_rate": 0.00016207080663074787, "loss": 0.289, "step": 6061 }, { "epoch": 0.2872305140961857, "grad_norm": 0.65234375, "learning_rate": 0.0001620591295268942, "loss": 0.5338, "step": 6062 }, { "epoch": 0.2872778962331201, "grad_norm": 0.68359375, "learning_rate": 0.0001620474510466351, "loss": 0.089, "step": 6063 }, { "epoch": 0.2873252783700545, "grad_norm": 0.193359375, "learning_rate": 0.0001620357711902296, "loss": 0.1438, "step": 6064 }, { "epoch": 0.28737266050698884, "grad_norm": 0.69140625, "learning_rate": 0.0001620240899579368, "loss": 1.2508, "step": 6065 }, { "epoch": 0.28742004264392323, "grad_norm": 0.2236328125, "learning_rate": 0.00016201240735001576, "loss": 0.0149, "step": 6066 }, { "epoch": 0.2874674247808576, "grad_norm": 0.058837890625, "learning_rate": 0.00016200072336672555, "loss": 0.0066, "step": 6067 }, { "epoch": 0.28751480691779197, "grad_norm": 0.68359375, "learning_rate": 0.00016198903800832538, "loss": 1.5147, "step": 6068 }, { "epoch": 0.28756218905472636, "grad_norm": 0.640625, "learning_rate": 0.00016197735127507435, "loss": 0.776, "step": 6069 }, { "epoch": 0.28760957119166075, "grad_norm": 0.953125, "learning_rate": 0.00016196566316723169, "loss": 0.5439, "step": 6070 }, { "epoch": 0.28765695332859514, "grad_norm": 0.52734375, "learning_rate": 0.0001619539736850566, "loss": 0.7401, "step": 6071 }, { "epoch": 0.2877043354655295, "grad_norm": 0.6953125, "learning_rate": 0.00016194228282880838, "loss": 1.199, "step": 6072 }, { "epoch": 0.2877517176024639, "grad_norm": 0.671875, "learning_rate": 0.0001619305905987463, "loss": 1.0443, "step": 6073 }, { "epoch": 0.28779909973939827, "grad_norm": 0.70703125, "learning_rate": 0.0001619188969951297, "loss": 1.0907, "step": 6074 }, { "epoch": 0.2878464818763326, "grad_norm": 0.5625, "learning_rate": 0.00016190720201821792, "loss": 0.7844, "step": 6075 }, { "epoch": 0.287893864013267, "grad_norm": 0.63671875, "learning_rate": 0.0001618955056682703, "loss": 1.1599, "step": 6076 }, { "epoch": 0.2879412461502014, "grad_norm": 0.7109375, "learning_rate": 0.00016188380794554633, "loss": 1.07, "step": 6077 }, { "epoch": 0.28798862828713573, "grad_norm": 0.6953125, "learning_rate": 0.00016187210885030542, "loss": 0.3129, "step": 6078 }, { "epoch": 0.2880360104240701, "grad_norm": 0.46484375, "learning_rate": 0.00016186040838280703, "loss": 1.0747, "step": 6079 }, { "epoch": 0.2880833925610045, "grad_norm": 0.65625, "learning_rate": 0.00016184870654331065, "loss": 0.9774, "step": 6080 }, { "epoch": 0.28813077469793885, "grad_norm": 0.8125, "learning_rate": 0.00016183700333207587, "loss": 0.7071, "step": 6081 }, { "epoch": 0.28817815683487324, "grad_norm": 0.462890625, "learning_rate": 0.0001618252987493622, "loss": 0.4575, "step": 6082 }, { "epoch": 0.28822553897180764, "grad_norm": 0.46484375, "learning_rate": 0.00016181359279542927, "loss": 0.6988, "step": 6083 }, { "epoch": 0.28827292110874203, "grad_norm": 0.703125, "learning_rate": 0.0001618018854705367, "loss": 1.4512, "step": 6084 }, { "epoch": 0.28832030324567637, "grad_norm": 0.76953125, "learning_rate": 0.00016179017677494414, "loss": 0.4298, "step": 6085 }, { "epoch": 0.28836768538261076, "grad_norm": 0.63671875, "learning_rate": 0.00016177846670891125, "loss": 0.8053, "step": 6086 }, { "epoch": 0.28841506751954515, "grad_norm": 0.53515625, "learning_rate": 0.00016176675527269777, "loss": 1.2651, "step": 6087 }, { "epoch": 0.2884624496564795, "grad_norm": 0.048828125, "learning_rate": 0.00016175504246656347, "loss": 0.004, "step": 6088 }, { "epoch": 0.2885098317934139, "grad_norm": 0.62109375, "learning_rate": 0.0001617433282907681, "loss": 0.7184, "step": 6089 }, { "epoch": 0.2885572139303483, "grad_norm": 0.287109375, "learning_rate": 0.00016173161274557146, "loss": 0.0291, "step": 6090 }, { "epoch": 0.2886045960672826, "grad_norm": 0.81640625, "learning_rate": 0.00016171989583123341, "loss": 1.0737, "step": 6091 }, { "epoch": 0.288651978204217, "grad_norm": 0.5390625, "learning_rate": 0.00016170817754801383, "loss": 0.7138, "step": 6092 }, { "epoch": 0.2886993603411514, "grad_norm": 0.55859375, "learning_rate": 0.0001616964578961726, "loss": 0.7767, "step": 6093 }, { "epoch": 0.28874674247808574, "grad_norm": 0.0257568359375, "learning_rate": 0.00016168473687596963, "loss": 0.0022, "step": 6094 }, { "epoch": 0.28879412461502013, "grad_norm": 0.6796875, "learning_rate": 0.0001616730144876649, "loss": 0.6575, "step": 6095 }, { "epoch": 0.2888415067519545, "grad_norm": 0.546875, "learning_rate": 0.00016166129073151843, "loss": 0.1054, "step": 6096 }, { "epoch": 0.28888888888888886, "grad_norm": 0.70703125, "learning_rate": 0.0001616495656077902, "loss": 0.9698, "step": 6097 }, { "epoch": 0.28893627102582325, "grad_norm": 0.66796875, "learning_rate": 0.00016163783911674021, "loss": 1.0205, "step": 6098 }, { "epoch": 0.28898365316275765, "grad_norm": 0.59375, "learning_rate": 0.00016162611125862867, "loss": 0.8748, "step": 6099 }, { "epoch": 0.28903103529969204, "grad_norm": 0.78515625, "learning_rate": 0.00016161438203371562, "loss": 1.3839, "step": 6100 }, { "epoch": 0.2890784174366264, "grad_norm": 0.8203125, "learning_rate": 0.0001616026514422612, "loss": 0.9761, "step": 6101 }, { "epoch": 0.28912579957356077, "grad_norm": 0.61328125, "learning_rate": 0.00016159091948452555, "loss": 1.2139, "step": 6102 }, { "epoch": 0.28917318171049516, "grad_norm": 0.94140625, "learning_rate": 0.00016157918616076895, "loss": 1.4222, "step": 6103 }, { "epoch": 0.2892205638474295, "grad_norm": 0.28125, "learning_rate": 0.00016156745147125157, "loss": 0.0964, "step": 6104 }, { "epoch": 0.2892679459843639, "grad_norm": 0.73046875, "learning_rate": 0.0001615557154162337, "loss": 1.0896, "step": 6105 }, { "epoch": 0.2893153281212983, "grad_norm": 0.41796875, "learning_rate": 0.00016154397799597562, "loss": 0.0228, "step": 6106 }, { "epoch": 0.2893627102582326, "grad_norm": 0.63671875, "learning_rate": 0.00016153223921073768, "loss": 1.5017, "step": 6107 }, { "epoch": 0.289410092395167, "grad_norm": 0.67578125, "learning_rate": 0.0001615204990607802, "loss": 0.9594, "step": 6108 }, { "epoch": 0.2894574745321014, "grad_norm": 0.51171875, "learning_rate": 0.0001615087575463636, "loss": 0.7924, "step": 6109 }, { "epoch": 0.28950485666903575, "grad_norm": 0.55078125, "learning_rate": 0.00016149701466774827, "loss": 0.9055, "step": 6110 }, { "epoch": 0.28955223880597014, "grad_norm": 0.5390625, "learning_rate": 0.00016148527042519466, "loss": 0.648, "step": 6111 }, { "epoch": 0.28959962094290453, "grad_norm": 0.68359375, "learning_rate": 0.00016147352481896322, "loss": 1.648, "step": 6112 }, { "epoch": 0.2896470030798389, "grad_norm": 0.7109375, "learning_rate": 0.0001614617778493145, "loss": 1.1806, "step": 6113 }, { "epoch": 0.28969438521677326, "grad_norm": 0.52734375, "learning_rate": 0.000161450029516509, "loss": 0.5153, "step": 6114 }, { "epoch": 0.28974176735370766, "grad_norm": 1.03125, "learning_rate": 0.0001614382798208073, "loss": 1.104, "step": 6115 }, { "epoch": 0.28978914949064205, "grad_norm": 0.6328125, "learning_rate": 0.00016142652876247, "loss": 0.916, "step": 6116 }, { "epoch": 0.2898365316275764, "grad_norm": 0.66015625, "learning_rate": 0.00016141477634175772, "loss": 1.2782, "step": 6117 }, { "epoch": 0.2898839137645108, "grad_norm": 0.67578125, "learning_rate": 0.00016140302255893114, "loss": 1.3098, "step": 6118 }, { "epoch": 0.2899312959014452, "grad_norm": 0.1142578125, "learning_rate": 0.0001613912674142509, "loss": 0.0161, "step": 6119 }, { "epoch": 0.2899786780383795, "grad_norm": 0.6484375, "learning_rate": 0.00016137951090797775, "loss": 1.2315, "step": 6120 }, { "epoch": 0.2900260601753139, "grad_norm": 0.671875, "learning_rate": 0.0001613677530403724, "loss": 0.7916, "step": 6121 }, { "epoch": 0.2900734423122483, "grad_norm": 0.7109375, "learning_rate": 0.00016135599381169566, "loss": 1.5569, "step": 6122 }, { "epoch": 0.29012082444918263, "grad_norm": 0.5546875, "learning_rate": 0.00016134423322220834, "loss": 0.2116, "step": 6123 }, { "epoch": 0.29016820658611703, "grad_norm": 0.63671875, "learning_rate": 0.00016133247127217125, "loss": 0.8461, "step": 6124 }, { "epoch": 0.2902155887230514, "grad_norm": 0.66796875, "learning_rate": 0.00016132070796184532, "loss": 0.9446, "step": 6125 }, { "epoch": 0.29026297085998576, "grad_norm": 0.61328125, "learning_rate": 0.00016130894329149137, "loss": 1.0857, "step": 6126 }, { "epoch": 0.29031035299692015, "grad_norm": 0.55078125, "learning_rate": 0.00016129717726137037, "loss": 0.6333, "step": 6127 }, { "epoch": 0.29035773513385454, "grad_norm": 0.1552734375, "learning_rate": 0.00016128540987174326, "loss": 0.0076, "step": 6128 }, { "epoch": 0.29040511727078894, "grad_norm": 0.734375, "learning_rate": 0.00016127364112287107, "loss": 1.0762, "step": 6129 }, { "epoch": 0.2904524994077233, "grad_norm": 0.58203125, "learning_rate": 0.00016126187101501475, "loss": 1.1474, "step": 6130 }, { "epoch": 0.29049988154465767, "grad_norm": 0.61328125, "learning_rate": 0.0001612500995484354, "loss": 1.068, "step": 6131 }, { "epoch": 0.29054726368159206, "grad_norm": 0.46875, "learning_rate": 0.00016123832672339407, "loss": 0.1338, "step": 6132 }, { "epoch": 0.2905946458185264, "grad_norm": 0.201171875, "learning_rate": 0.00016122655254015192, "loss": 0.0309, "step": 6133 }, { "epoch": 0.2906420279554608, "grad_norm": 0.23046875, "learning_rate": 0.00016121477699896998, "loss": 0.0521, "step": 6134 }, { "epoch": 0.2906894100923952, "grad_norm": 0.65625, "learning_rate": 0.00016120300010010954, "loss": 1.0097, "step": 6135 }, { "epoch": 0.2907367922293295, "grad_norm": 0.640625, "learning_rate": 0.00016119122184383175, "loss": 0.8926, "step": 6136 }, { "epoch": 0.2907841743662639, "grad_norm": 0.61328125, "learning_rate": 0.0001611794422303978, "loss": 1.2017, "step": 6137 }, { "epoch": 0.2908315565031983, "grad_norm": 0.58984375, "learning_rate": 0.000161167661260069, "loss": 1.012, "step": 6138 }, { "epoch": 0.29087893864013264, "grad_norm": 0.345703125, "learning_rate": 0.00016115587893310665, "loss": 0.0874, "step": 6139 }, { "epoch": 0.29092632077706704, "grad_norm": 0.9296875, "learning_rate": 0.00016114409524977202, "loss": 1.116, "step": 6140 }, { "epoch": 0.29097370291400143, "grad_norm": 0.6875, "learning_rate": 0.0001611323102103265, "loss": 0.6605, "step": 6141 }, { "epoch": 0.2910210850509358, "grad_norm": 0.6015625, "learning_rate": 0.00016112052381503147, "loss": 1.1295, "step": 6142 }, { "epoch": 0.29106846718787016, "grad_norm": 0.77734375, "learning_rate": 0.0001611087360641483, "loss": 0.9839, "step": 6143 }, { "epoch": 0.29111584932480455, "grad_norm": 0.6875, "learning_rate": 0.00016109694695793847, "loss": 0.8536, "step": 6144 }, { "epoch": 0.29116323146173895, "grad_norm": 0.349609375, "learning_rate": 0.0001610851564966634, "loss": 0.048, "step": 6145 }, { "epoch": 0.2912106135986733, "grad_norm": 0.10888671875, "learning_rate": 0.00016107336468058466, "loss": 0.015, "step": 6146 }, { "epoch": 0.2912579957356077, "grad_norm": 0.609375, "learning_rate": 0.00016106157150996375, "loss": 0.9039, "step": 6147 }, { "epoch": 0.29130537787254207, "grad_norm": 0.9375, "learning_rate": 0.0001610497769850622, "loss": 0.2915, "step": 6148 }, { "epoch": 0.2913527600094764, "grad_norm": 0.5703125, "learning_rate": 0.00016103798110614164, "loss": 0.757, "step": 6149 }, { "epoch": 0.2914001421464108, "grad_norm": 0.68359375, "learning_rate": 0.00016102618387346367, "loss": 1.1858, "step": 6150 }, { "epoch": 0.2914475242833452, "grad_norm": 0.71484375, "learning_rate": 0.00016101438528728993, "loss": 0.0603, "step": 6151 }, { "epoch": 0.29149490642027953, "grad_norm": 0.61328125, "learning_rate": 0.00016100258534788213, "loss": 0.8641, "step": 6152 }, { "epoch": 0.2915422885572139, "grad_norm": 0.50390625, "learning_rate": 0.00016099078405550194, "loss": 0.7625, "step": 6153 }, { "epoch": 0.2915896706941483, "grad_norm": 0.59375, "learning_rate": 0.00016097898141041115, "loss": 1.667, "step": 6154 }, { "epoch": 0.29163705283108265, "grad_norm": 0.5859375, "learning_rate": 0.00016096717741287148, "loss": 0.0808, "step": 6155 }, { "epoch": 0.29168443496801705, "grad_norm": 0.115234375, "learning_rate": 0.00016095537206314479, "loss": 0.0055, "step": 6156 }, { "epoch": 0.29173181710495144, "grad_norm": 0.50390625, "learning_rate": 0.00016094356536149284, "loss": 0.2689, "step": 6157 }, { "epoch": 0.29177919924188583, "grad_norm": 0.60546875, "learning_rate": 0.00016093175730817752, "loss": 0.8178, "step": 6158 }, { "epoch": 0.29182658137882017, "grad_norm": 0.2294921875, "learning_rate": 0.00016091994790346076, "loss": 0.0233, "step": 6159 }, { "epoch": 0.29187396351575456, "grad_norm": 0.53515625, "learning_rate": 0.00016090813714760442, "loss": 0.1928, "step": 6160 }, { "epoch": 0.29192134565268896, "grad_norm": 0.69921875, "learning_rate": 0.0001608963250408705, "loss": 0.1937, "step": 6161 }, { "epoch": 0.2919687277896233, "grad_norm": 1.125, "learning_rate": 0.0001608845115835209, "loss": 0.5616, "step": 6162 }, { "epoch": 0.2920161099265577, "grad_norm": 0.095703125, "learning_rate": 0.00016087269677581774, "loss": 0.0031, "step": 6163 }, { "epoch": 0.2920634920634921, "grad_norm": 0.53125, "learning_rate": 0.000160860880618023, "loss": 1.07, "step": 6164 }, { "epoch": 0.2921108742004264, "grad_norm": 0.6953125, "learning_rate": 0.00016084906311039873, "loss": 1.5735, "step": 6165 }, { "epoch": 0.2921582563373608, "grad_norm": 0.75, "learning_rate": 0.00016083724425320706, "loss": 1.0827, "step": 6166 }, { "epoch": 0.2922056384742952, "grad_norm": 0.10302734375, "learning_rate": 0.0001608254240467101, "loss": 0.0123, "step": 6167 }, { "epoch": 0.29225302061122954, "grad_norm": 0.7890625, "learning_rate": 0.00016081360249117004, "loss": 0.8328, "step": 6168 }, { "epoch": 0.29230040274816393, "grad_norm": 0.74609375, "learning_rate": 0.00016080177958684906, "loss": 0.9292, "step": 6169 }, { "epoch": 0.2923477848850983, "grad_norm": 0.30859375, "learning_rate": 0.00016078995533400935, "loss": 0.1389, "step": 6170 }, { "epoch": 0.2923951670220327, "grad_norm": 0.88671875, "learning_rate": 0.0001607781297329132, "loss": 0.8424, "step": 6171 }, { "epoch": 0.29244254915896706, "grad_norm": 0.8828125, "learning_rate": 0.00016076630278382287, "loss": 0.8404, "step": 6172 }, { "epoch": 0.29248993129590145, "grad_norm": 0.6328125, "learning_rate": 0.00016075447448700067, "loss": 1.0984, "step": 6173 }, { "epoch": 0.29253731343283584, "grad_norm": 0.400390625, "learning_rate": 0.00016074264484270892, "loss": 0.494, "step": 6174 }, { "epoch": 0.2925846955697702, "grad_norm": 0.2578125, "learning_rate": 0.00016073081385121002, "loss": 0.0795, "step": 6175 }, { "epoch": 0.2926320777067046, "grad_norm": 0.6875, "learning_rate": 0.00016071898151276637, "loss": 0.9714, "step": 6176 }, { "epoch": 0.29267945984363897, "grad_norm": 0.46484375, "learning_rate": 0.00016070714782764035, "loss": 0.9679, "step": 6177 }, { "epoch": 0.2927268419805733, "grad_norm": 0.66015625, "learning_rate": 0.00016069531279609448, "loss": 1.077, "step": 6178 }, { "epoch": 0.2927742241175077, "grad_norm": 0.6796875, "learning_rate": 0.0001606834764183912, "loss": 0.9103, "step": 6179 }, { "epoch": 0.2928216062544421, "grad_norm": 0.55078125, "learning_rate": 0.00016067163869479308, "loss": 1.4553, "step": 6180 }, { "epoch": 0.29286898839137643, "grad_norm": 0.62109375, "learning_rate": 0.00016065979962556263, "loss": 1.0385, "step": 6181 }, { "epoch": 0.2929163705283108, "grad_norm": 0.50390625, "learning_rate": 0.00016064795921096243, "loss": 0.9758, "step": 6182 }, { "epoch": 0.2929637526652452, "grad_norm": 0.431640625, "learning_rate": 0.00016063611745125507, "loss": 0.4767, "step": 6183 }, { "epoch": 0.29301113480217955, "grad_norm": 0.5859375, "learning_rate": 0.00016062427434670329, "loss": 1.1962, "step": 6184 }, { "epoch": 0.29305851693911394, "grad_norm": 0.8359375, "learning_rate": 0.0001606124298975696, "loss": 1.4655, "step": 6185 }, { "epoch": 0.29310589907604834, "grad_norm": 0.515625, "learning_rate": 0.0001606005841041168, "loss": 0.8719, "step": 6186 }, { "epoch": 0.29315328121298273, "grad_norm": 0.142578125, "learning_rate": 0.00016058873696660761, "loss": 0.0059, "step": 6187 }, { "epoch": 0.29320066334991707, "grad_norm": 0.65234375, "learning_rate": 0.00016057688848530475, "loss": 0.9908, "step": 6188 }, { "epoch": 0.29324804548685146, "grad_norm": 0.5859375, "learning_rate": 0.00016056503866047103, "loss": 1.0236, "step": 6189 }, { "epoch": 0.29329542762378585, "grad_norm": 0.185546875, "learning_rate": 0.00016055318749236928, "loss": 0.1252, "step": 6190 }, { "epoch": 0.2933428097607202, "grad_norm": 0.76171875, "learning_rate": 0.00016054133498126229, "loss": 1.286, "step": 6191 }, { "epoch": 0.2933901918976546, "grad_norm": 0.1904296875, "learning_rate": 0.00016052948112741302, "loss": 0.0102, "step": 6192 }, { "epoch": 0.293437574034589, "grad_norm": 0.69921875, "learning_rate": 0.0001605176259310843, "loss": 0.135, "step": 6193 }, { "epoch": 0.2934849561715233, "grad_norm": 0.78515625, "learning_rate": 0.00016050576939253912, "loss": 1.089, "step": 6194 }, { "epoch": 0.2935323383084577, "grad_norm": 0.6875, "learning_rate": 0.00016049391151204043, "loss": 0.9853, "step": 6195 }, { "epoch": 0.2935797204453921, "grad_norm": 0.90625, "learning_rate": 0.00016048205228985117, "loss": 1.1275, "step": 6196 }, { "epoch": 0.29362710258232644, "grad_norm": 0.0732421875, "learning_rate": 0.00016047019172623448, "loss": 0.0027, "step": 6197 }, { "epoch": 0.29367448471926083, "grad_norm": 0.71484375, "learning_rate": 0.0001604583298214533, "loss": 0.9025, "step": 6198 }, { "epoch": 0.2937218668561952, "grad_norm": 0.193359375, "learning_rate": 0.0001604464665757708, "loss": 0.1409, "step": 6199 }, { "epoch": 0.2937692489931296, "grad_norm": 0.53515625, "learning_rate": 0.00016043460198945, "loss": 0.8453, "step": 6200 }, { "epoch": 0.29381663113006395, "grad_norm": 0.75390625, "learning_rate": 0.00016042273606275414, "loss": 1.5202, "step": 6201 }, { "epoch": 0.29386401326699835, "grad_norm": 0.66015625, "learning_rate": 0.00016041086879594634, "loss": 1.264, "step": 6202 }, { "epoch": 0.29391139540393274, "grad_norm": 1.25, "learning_rate": 0.0001603990001892898, "loss": 0.3838, "step": 6203 }, { "epoch": 0.2939587775408671, "grad_norm": 0.5703125, "learning_rate": 0.0001603871302430478, "loss": 0.1965, "step": 6204 }, { "epoch": 0.29400615967780147, "grad_norm": 0.2353515625, "learning_rate": 0.00016037525895748358, "loss": 0.051, "step": 6205 }, { "epoch": 0.29405354181473586, "grad_norm": 0.51953125, "learning_rate": 0.0001603633863328604, "loss": 0.1407, "step": 6206 }, { "epoch": 0.2941009239516702, "grad_norm": 2.21875, "learning_rate": 0.00016035151236944162, "loss": 0.6282, "step": 6207 }, { "epoch": 0.2941483060886046, "grad_norm": 0.76953125, "learning_rate": 0.00016033963706749057, "loss": 0.3471, "step": 6208 }, { "epoch": 0.294195688225539, "grad_norm": 0.70703125, "learning_rate": 0.00016032776042727064, "loss": 0.8517, "step": 6209 }, { "epoch": 0.2942430703624733, "grad_norm": 0.2373046875, "learning_rate": 0.00016031588244904525, "loss": 0.0247, "step": 6210 }, { "epoch": 0.2942904524994077, "grad_norm": 0.234375, "learning_rate": 0.00016030400313307784, "loss": 0.0238, "step": 6211 }, { "epoch": 0.2943378346363421, "grad_norm": 0.72265625, "learning_rate": 0.00016029212247963185, "loss": 0.9779, "step": 6212 }, { "epoch": 0.29438521677327645, "grad_norm": 0.5234375, "learning_rate": 0.0001602802404889708, "loss": 0.1089, "step": 6213 }, { "epoch": 0.29443259891021084, "grad_norm": 0.625, "learning_rate": 0.00016026835716135827, "loss": 1.0606, "step": 6214 }, { "epoch": 0.29447998104714523, "grad_norm": 0.63671875, "learning_rate": 0.00016025647249705776, "loss": 0.7342, "step": 6215 }, { "epoch": 0.2945273631840796, "grad_norm": 0.60546875, "learning_rate": 0.00016024458649633282, "loss": 1.044, "step": 6216 }, { "epoch": 0.29457474532101396, "grad_norm": 0.7265625, "learning_rate": 0.00016023269915944717, "loss": 1.1679, "step": 6217 }, { "epoch": 0.29462212745794836, "grad_norm": 0.0245361328125, "learning_rate": 0.0001602208104866644, "loss": 0.0018, "step": 6218 }, { "epoch": 0.29466950959488275, "grad_norm": 0.37109375, "learning_rate": 0.0001602089204782482, "loss": 0.1495, "step": 6219 }, { "epoch": 0.2947168917318171, "grad_norm": 0.640625, "learning_rate": 0.00016019702913446226, "loss": 1.0678, "step": 6220 }, { "epoch": 0.2947642738687515, "grad_norm": 0.53125, "learning_rate": 0.00016018513645557034, "loss": 0.6447, "step": 6221 }, { "epoch": 0.2948116560056859, "grad_norm": 0.79296875, "learning_rate": 0.0001601732424418362, "loss": 0.8159, "step": 6222 }, { "epoch": 0.2948590381426202, "grad_norm": 0.64453125, "learning_rate": 0.00016016134709352365, "loss": 0.7239, "step": 6223 }, { "epoch": 0.2949064202795546, "grad_norm": 0.6328125, "learning_rate": 0.0001601494504108965, "loss": 1.0982, "step": 6224 }, { "epoch": 0.294953802416489, "grad_norm": 0.5859375, "learning_rate": 0.00016013755239421856, "loss": 1.1708, "step": 6225 }, { "epoch": 0.29500118455342333, "grad_norm": 0.265625, "learning_rate": 0.00016012565304375381, "loss": 0.0258, "step": 6226 }, { "epoch": 0.2950485666903577, "grad_norm": 0.30859375, "learning_rate": 0.00016011375235976612, "loss": 0.0109, "step": 6227 }, { "epoch": 0.2950959488272921, "grad_norm": 0.58203125, "learning_rate": 0.00016010185034251944, "loss": 0.8148, "step": 6228 }, { "epoch": 0.2951433309642265, "grad_norm": 0.5859375, "learning_rate": 0.00016008994699227773, "loss": 0.4518, "step": 6229 }, { "epoch": 0.29519071310116085, "grad_norm": 0.625, "learning_rate": 0.00016007804230930498, "loss": 0.6498, "step": 6230 }, { "epoch": 0.29523809523809524, "grad_norm": 0.671875, "learning_rate": 0.00016006613629386527, "loss": 0.9114, "step": 6231 }, { "epoch": 0.29528547737502964, "grad_norm": 0.58984375, "learning_rate": 0.00016005422894622263, "loss": 0.6616, "step": 6232 }, { "epoch": 0.295332859511964, "grad_norm": 0.70703125, "learning_rate": 0.0001600423202666411, "loss": 1.1665, "step": 6233 }, { "epoch": 0.29538024164889837, "grad_norm": 0.400390625, "learning_rate": 0.00016003041025538495, "loss": 0.0348, "step": 6234 }, { "epoch": 0.29542762378583276, "grad_norm": 0.51953125, "learning_rate": 0.0001600184989127182, "loss": 0.5651, "step": 6235 }, { "epoch": 0.2954750059227671, "grad_norm": 0.19921875, "learning_rate": 0.00016000658623890508, "loss": 0.1426, "step": 6236 }, { "epoch": 0.2955223880597015, "grad_norm": 0.6640625, "learning_rate": 0.00015999467223420979, "loss": 1.3802, "step": 6237 }, { "epoch": 0.2955697701966359, "grad_norm": 0.66015625, "learning_rate": 0.00015998275689889656, "loss": 0.0291, "step": 6238 }, { "epoch": 0.2956171523335702, "grad_norm": 0.640625, "learning_rate": 0.0001599708402332297, "loss": 0.9275, "step": 6239 }, { "epoch": 0.2956645344705046, "grad_norm": 0.33203125, "learning_rate": 0.00015995892223747346, "loss": 0.1245, "step": 6240 }, { "epoch": 0.295711916607439, "grad_norm": 0.31640625, "learning_rate": 0.0001599470029118922, "loss": 0.1849, "step": 6241 }, { "epoch": 0.29575929874437334, "grad_norm": 0.55078125, "learning_rate": 0.00015993508225675029, "loss": 0.4715, "step": 6242 }, { "epoch": 0.29580668088130774, "grad_norm": 0.55859375, "learning_rate": 0.00015992316027231204, "loss": 0.1159, "step": 6243 }, { "epoch": 0.29585406301824213, "grad_norm": 0.1533203125, "learning_rate": 0.00015991123695884197, "loss": 0.0317, "step": 6244 }, { "epoch": 0.2959014451551765, "grad_norm": 0.63671875, "learning_rate": 0.00015989931231660446, "loss": 0.5017, "step": 6245 }, { "epoch": 0.29594882729211086, "grad_norm": 0.61328125, "learning_rate": 0.000159887386345864, "loss": 0.939, "step": 6246 }, { "epoch": 0.29599620942904525, "grad_norm": 1.46875, "learning_rate": 0.00015987545904688514, "loss": 1.6365, "step": 6247 }, { "epoch": 0.29604359156597965, "grad_norm": 0.3046875, "learning_rate": 0.0001598635304199323, "loss": 0.0365, "step": 6248 }, { "epoch": 0.296090973702914, "grad_norm": 0.193359375, "learning_rate": 0.0001598516004652702, "loss": 0.0459, "step": 6249 }, { "epoch": 0.2961383558398484, "grad_norm": 0.283203125, "learning_rate": 0.0001598396691831633, "loss": 0.1533, "step": 6250 }, { "epoch": 0.29618573797678277, "grad_norm": 0.6875, "learning_rate": 0.00015982773657387627, "loss": 0.6514, "step": 6251 }, { "epoch": 0.2962331201137171, "grad_norm": 0.74609375, "learning_rate": 0.00015981580263767383, "loss": 1.3279, "step": 6252 }, { "epoch": 0.2962805022506515, "grad_norm": 0.65625, "learning_rate": 0.00015980386737482057, "loss": 1.3035, "step": 6253 }, { "epoch": 0.2963278843875859, "grad_norm": 0.56640625, "learning_rate": 0.00015979193078558118, "loss": 0.7709, "step": 6254 }, { "epoch": 0.29637526652452023, "grad_norm": 0.68359375, "learning_rate": 0.00015977999287022053, "loss": 0.9822, "step": 6255 }, { "epoch": 0.2964226486614546, "grad_norm": 0.42578125, "learning_rate": 0.00015976805362900325, "loss": 0.2037, "step": 6256 }, { "epoch": 0.296470030798389, "grad_norm": 1.1328125, "learning_rate": 0.00015975611306219423, "loss": 1.2106, "step": 6257 }, { "epoch": 0.2965174129353234, "grad_norm": 0.62890625, "learning_rate": 0.0001597441711700583, "loss": 0.7787, "step": 6258 }, { "epoch": 0.29656479507225775, "grad_norm": 0.78515625, "learning_rate": 0.00015973222795286025, "loss": 1.0039, "step": 6259 }, { "epoch": 0.29661217720919214, "grad_norm": 0.59765625, "learning_rate": 0.000159720283410865, "loss": 0.9096, "step": 6260 }, { "epoch": 0.29665955934612653, "grad_norm": 0.267578125, "learning_rate": 0.00015970833754433753, "loss": 0.1478, "step": 6261 }, { "epoch": 0.29670694148306087, "grad_norm": 0.291015625, "learning_rate": 0.00015969639035354267, "loss": 0.0838, "step": 6262 }, { "epoch": 0.29675432361999526, "grad_norm": 0.76953125, "learning_rate": 0.0001596844418387455, "loss": 1.3852, "step": 6263 }, { "epoch": 0.29680170575692966, "grad_norm": 0.5078125, "learning_rate": 0.00015967249200021094, "loss": 0.9298, "step": 6264 }, { "epoch": 0.296849087893864, "grad_norm": 0.1591796875, "learning_rate": 0.0001596605408382041, "loss": 0.022, "step": 6265 }, { "epoch": 0.2968964700307984, "grad_norm": 0.4609375, "learning_rate": 0.00015964858835299003, "loss": 0.2375, "step": 6266 }, { "epoch": 0.2969438521677328, "grad_norm": 0.8359375, "learning_rate": 0.00015963663454483378, "loss": 1.151, "step": 6267 }, { "epoch": 0.2969912343046671, "grad_norm": 0.02978515625, "learning_rate": 0.00015962467941400048, "loss": 0.0012, "step": 6268 }, { "epoch": 0.2970386164416015, "grad_norm": 0.65234375, "learning_rate": 0.0001596127229607553, "loss": 1.225, "step": 6269 }, { "epoch": 0.2970859985785359, "grad_norm": 0.5390625, "learning_rate": 0.00015960076518536345, "loss": 0.9748, "step": 6270 }, { "epoch": 0.29713338071547024, "grad_norm": 0.56640625, "learning_rate": 0.0001595888060880901, "loss": 1.0452, "step": 6271 }, { "epoch": 0.29718076285240463, "grad_norm": 0.58203125, "learning_rate": 0.00015957684566920052, "loss": 0.8657, "step": 6272 }, { "epoch": 0.297228144989339, "grad_norm": 0.6484375, "learning_rate": 0.00015956488392895995, "loss": 0.9778, "step": 6273 }, { "epoch": 0.2972755271262734, "grad_norm": 0.71875, "learning_rate": 0.00015955292086763373, "loss": 1.4371, "step": 6274 }, { "epoch": 0.29732290926320776, "grad_norm": 0.58203125, "learning_rate": 0.0001595409564854871, "loss": 0.6994, "step": 6275 }, { "epoch": 0.29737029140014215, "grad_norm": 0.65625, "learning_rate": 0.0001595289907827855, "loss": 0.788, "step": 6276 }, { "epoch": 0.29741767353707654, "grad_norm": 0.376953125, "learning_rate": 0.00015951702375979426, "loss": 0.1758, "step": 6277 }, { "epoch": 0.2974650556740109, "grad_norm": 0.69140625, "learning_rate": 0.00015950505541677886, "loss": 0.1653, "step": 6278 }, { "epoch": 0.2975124378109453, "grad_norm": 0.828125, "learning_rate": 0.00015949308575400473, "loss": 1.3668, "step": 6279 }, { "epoch": 0.29755981994787967, "grad_norm": 0.0205078125, "learning_rate": 0.0001594811147717373, "loss": 0.0009, "step": 6280 }, { "epoch": 0.297607202084814, "grad_norm": 0.546875, "learning_rate": 0.00015946914247024212, "loss": 0.826, "step": 6281 }, { "epoch": 0.2976545842217484, "grad_norm": 0.2890625, "learning_rate": 0.0001594571688497847, "loss": 0.1798, "step": 6282 }, { "epoch": 0.2977019663586828, "grad_norm": 0.60546875, "learning_rate": 0.0001594451939106306, "loss": 1.074, "step": 6283 }, { "epoch": 0.2977493484956171, "grad_norm": 0.6640625, "learning_rate": 0.00015943321765304538, "loss": 0.8172, "step": 6284 }, { "epoch": 0.2977967306325515, "grad_norm": 0.703125, "learning_rate": 0.00015942124007729475, "loss": 0.1754, "step": 6285 }, { "epoch": 0.2978441127694859, "grad_norm": 0.7890625, "learning_rate": 0.0001594092611836443, "loss": 1.321, "step": 6286 }, { "epoch": 0.2978914949064203, "grad_norm": 0.8984375, "learning_rate": 0.0001593972809723597, "loss": 1.0648, "step": 6287 }, { "epoch": 0.29793887704335464, "grad_norm": 0.1943359375, "learning_rate": 0.0001593852994437067, "loss": 0.0259, "step": 6288 }, { "epoch": 0.29798625918028904, "grad_norm": 0.64453125, "learning_rate": 0.00015937331659795101, "loss": 1.3141, "step": 6289 }, { "epoch": 0.29803364131722343, "grad_norm": 0.435546875, "learning_rate": 0.00015936133243535838, "loss": 0.2187, "step": 6290 }, { "epoch": 0.29808102345415777, "grad_norm": 0.6796875, "learning_rate": 0.00015934934695619463, "loss": 0.1718, "step": 6291 }, { "epoch": 0.29812840559109216, "grad_norm": 1.0078125, "learning_rate": 0.00015933736016072558, "loss": 1.254, "step": 6292 }, { "epoch": 0.29817578772802655, "grad_norm": 1.1015625, "learning_rate": 0.0001593253720492171, "loss": 0.7333, "step": 6293 }, { "epoch": 0.2982231698649609, "grad_norm": 0.9453125, "learning_rate": 0.00015931338262193501, "loss": 0.3758, "step": 6294 }, { "epoch": 0.2982705520018953, "grad_norm": 0.7734375, "learning_rate": 0.00015930139187914533, "loss": 0.9309, "step": 6295 }, { "epoch": 0.2983179341388297, "grad_norm": 0.578125, "learning_rate": 0.0001592893998211139, "loss": 1.0219, "step": 6296 }, { "epoch": 0.298365316275764, "grad_norm": 0.609375, "learning_rate": 0.00015927740644810677, "loss": 1.0214, "step": 6297 }, { "epoch": 0.2984126984126984, "grad_norm": 0.1962890625, "learning_rate": 0.0001592654117603899, "loss": 0.1297, "step": 6298 }, { "epoch": 0.2984600805496328, "grad_norm": 0.50390625, "learning_rate": 0.00015925341575822932, "loss": 0.0671, "step": 6299 }, { "epoch": 0.29850746268656714, "grad_norm": 0.6328125, "learning_rate": 0.00015924141844189107, "loss": 0.7515, "step": 6300 }, { "epoch": 0.29855484482350153, "grad_norm": 0.6875, "learning_rate": 0.00015922941981164128, "loss": 1.2515, "step": 6301 }, { "epoch": 0.2986022269604359, "grad_norm": 0.60546875, "learning_rate": 0.00015921741986774603, "loss": 0.9278, "step": 6302 }, { "epoch": 0.2986496090973703, "grad_norm": 0.54296875, "learning_rate": 0.0001592054186104715, "loss": 1.3154, "step": 6303 }, { "epoch": 0.29869699123430465, "grad_norm": 0.640625, "learning_rate": 0.00015919341604008383, "loss": 0.8406, "step": 6304 }, { "epoch": 0.29874437337123905, "grad_norm": 0.65625, "learning_rate": 0.0001591814121568493, "loss": 1.1043, "step": 6305 }, { "epoch": 0.29879175550817344, "grad_norm": 0.1455078125, "learning_rate": 0.000159169406961034, "loss": 0.0178, "step": 6306 }, { "epoch": 0.2988391376451078, "grad_norm": 0.99609375, "learning_rate": 0.00015915740045290434, "loss": 0.0587, "step": 6307 }, { "epoch": 0.29888651978204217, "grad_norm": 0.625, "learning_rate": 0.0001591453926327265, "loss": 0.9912, "step": 6308 }, { "epoch": 0.29893390191897656, "grad_norm": 0.1640625, "learning_rate": 0.0001591333835007669, "loss": 0.0063, "step": 6309 }, { "epoch": 0.2989812840559109, "grad_norm": 0.6796875, "learning_rate": 0.00015912137305729181, "loss": 1.1696, "step": 6310 }, { "epoch": 0.2990286661928453, "grad_norm": 0.65625, "learning_rate": 0.00015910936130256764, "loss": 0.1296, "step": 6311 }, { "epoch": 0.2990760483297797, "grad_norm": 0.7109375, "learning_rate": 0.00015909734823686081, "loss": 1.0156, "step": 6312 }, { "epoch": 0.299123430466714, "grad_norm": 0.24609375, "learning_rate": 0.00015908533386043775, "loss": 0.1437, "step": 6313 }, { "epoch": 0.2991708126036484, "grad_norm": 0.62109375, "learning_rate": 0.0001590733181735649, "loss": 1.0903, "step": 6314 }, { "epoch": 0.2992181947405828, "grad_norm": 0.68359375, "learning_rate": 0.00015906130117650878, "loss": 1.1543, "step": 6315 }, { "epoch": 0.2992655768775172, "grad_norm": 0.06982421875, "learning_rate": 0.00015904928286953593, "loss": 0.0095, "step": 6316 }, { "epoch": 0.29931295901445154, "grad_norm": 0.69140625, "learning_rate": 0.00015903726325291285, "loss": 0.9702, "step": 6317 }, { "epoch": 0.29936034115138593, "grad_norm": 0.734375, "learning_rate": 0.00015902524232690616, "loss": 0.9361, "step": 6318 }, { "epoch": 0.2994077232883203, "grad_norm": 0.90234375, "learning_rate": 0.00015901322009178248, "loss": 1.4427, "step": 6319 }, { "epoch": 0.29945510542525466, "grad_norm": 0.5234375, "learning_rate": 0.00015900119654780842, "loss": 1.0989, "step": 6320 }, { "epoch": 0.29950248756218906, "grad_norm": 0.220703125, "learning_rate": 0.0001589891716952507, "loss": 0.1506, "step": 6321 }, { "epoch": 0.29954986969912345, "grad_norm": 0.435546875, "learning_rate": 0.00015897714553437598, "loss": 1.2756, "step": 6322 }, { "epoch": 0.2995972518360578, "grad_norm": 0.93359375, "learning_rate": 0.00015896511806545095, "loss": 0.8838, "step": 6323 }, { "epoch": 0.2996446339729922, "grad_norm": 0.212890625, "learning_rate": 0.00015895308928874245, "loss": 0.1703, "step": 6324 }, { "epoch": 0.2996920161099266, "grad_norm": 0.6328125, "learning_rate": 0.0001589410592045172, "loss": 1.1891, "step": 6325 }, { "epoch": 0.2997393982468609, "grad_norm": 0.1328125, "learning_rate": 0.00015892902781304203, "loss": 0.0167, "step": 6326 }, { "epoch": 0.2997867803837953, "grad_norm": 0.54296875, "learning_rate": 0.00015891699511458383, "loss": 1.7041, "step": 6327 }, { "epoch": 0.2998341625207297, "grad_norm": 0.7734375, "learning_rate": 0.00015890496110940937, "loss": 1.2175, "step": 6328 }, { "epoch": 0.29988154465766403, "grad_norm": 0.8359375, "learning_rate": 0.00015889292579778568, "loss": 0.6999, "step": 6329 }, { "epoch": 0.2999289267945984, "grad_norm": 0.65625, "learning_rate": 0.00015888088917997962, "loss": 0.7467, "step": 6330 }, { "epoch": 0.2999763089315328, "grad_norm": 0.04931640625, "learning_rate": 0.00015886885125625813, "loss": 0.0019, "step": 6331 }, { "epoch": 0.3000236910684672, "grad_norm": 0.71484375, "learning_rate": 0.0001588568120268882, "loss": 1.3148, "step": 6332 }, { "epoch": 0.30007107320540155, "grad_norm": 0.57421875, "learning_rate": 0.0001588447714921369, "loss": 0.9488, "step": 6333 }, { "epoch": 0.30011845534233594, "grad_norm": 0.47265625, "learning_rate": 0.00015883272965227125, "loss": 0.1925, "step": 6334 }, { "epoch": 0.30016583747927034, "grad_norm": 0.73828125, "learning_rate": 0.00015882068650755832, "loss": 0.9735, "step": 6335 }, { "epoch": 0.3002132196162047, "grad_norm": 0.4765625, "learning_rate": 0.0001588086420582652, "loss": 0.1459, "step": 6336 }, { "epoch": 0.30026060175313907, "grad_norm": 0.55859375, "learning_rate": 0.00015879659630465905, "loss": 1.1262, "step": 6337 }, { "epoch": 0.30030798389007346, "grad_norm": 0.91015625, "learning_rate": 0.00015878454924700706, "loss": 1.0842, "step": 6338 }, { "epoch": 0.3003553660270078, "grad_norm": 1.03125, "learning_rate": 0.00015877250088557635, "loss": 0.9827, "step": 6339 }, { "epoch": 0.3004027481639422, "grad_norm": 0.66015625, "learning_rate": 0.00015876045122063416, "loss": 1.1656, "step": 6340 }, { "epoch": 0.3004501303008766, "grad_norm": 0.1298828125, "learning_rate": 0.00015874840025244775, "loss": 0.013, "step": 6341 }, { "epoch": 0.3004975124378109, "grad_norm": 0.87109375, "learning_rate": 0.00015873634798128442, "loss": 0.7893, "step": 6342 }, { "epoch": 0.3005448945747453, "grad_norm": 0.66796875, "learning_rate": 0.00015872429440741143, "loss": 0.7876, "step": 6343 }, { "epoch": 0.3005922767116797, "grad_norm": 2.078125, "learning_rate": 0.00015871223953109616, "loss": 0.8104, "step": 6344 }, { "epoch": 0.3006396588486141, "grad_norm": 1.6328125, "learning_rate": 0.00015870018335260597, "loss": 0.6066, "step": 6345 }, { "epoch": 0.30068704098554844, "grad_norm": 0.57421875, "learning_rate": 0.0001586881258722082, "loss": 0.7675, "step": 6346 }, { "epoch": 0.30073442312248283, "grad_norm": 0.58984375, "learning_rate": 0.00015867606709017032, "loss": 0.0975, "step": 6347 }, { "epoch": 0.3007818052594172, "grad_norm": 0.353515625, "learning_rate": 0.00015866400700675977, "loss": 0.0579, "step": 6348 }, { "epoch": 0.30082918739635156, "grad_norm": 0.1826171875, "learning_rate": 0.00015865194562224402, "loss": 0.0256, "step": 6349 }, { "epoch": 0.30087656953328595, "grad_norm": 0.61328125, "learning_rate": 0.00015863988293689062, "loss": 0.5278, "step": 6350 }, { "epoch": 0.30092395167022035, "grad_norm": 0.60546875, "learning_rate": 0.00015862781895096707, "loss": 1.3326, "step": 6351 }, { "epoch": 0.3009713338071547, "grad_norm": 0.53515625, "learning_rate": 0.00015861575366474094, "loss": 1.1228, "step": 6352 }, { "epoch": 0.3010187159440891, "grad_norm": 0.51953125, "learning_rate": 0.00015860368707847983, "loss": 0.8816, "step": 6353 }, { "epoch": 0.30106609808102347, "grad_norm": 0.55859375, "learning_rate": 0.00015859161919245133, "loss": 0.7351, "step": 6354 }, { "epoch": 0.3011134802179578, "grad_norm": 0.064453125, "learning_rate": 0.00015857955000692317, "loss": 0.0034, "step": 6355 }, { "epoch": 0.3011608623548922, "grad_norm": 0.58984375, "learning_rate": 0.00015856747952216297, "loss": 0.9956, "step": 6356 }, { "epoch": 0.3012082444918266, "grad_norm": 0.61328125, "learning_rate": 0.00015855540773843845, "loss": 0.1596, "step": 6357 }, { "epoch": 0.30125562662876093, "grad_norm": 0.228515625, "learning_rate": 0.00015854333465601736, "loss": 0.1357, "step": 6358 }, { "epoch": 0.3013030087656953, "grad_norm": 0.484375, "learning_rate": 0.0001585312602751675, "loss": 0.7673, "step": 6359 }, { "epoch": 0.3013503909026297, "grad_norm": 0.81640625, "learning_rate": 0.0001585191845961566, "loss": 1.2014, "step": 6360 }, { "epoch": 0.3013977730395641, "grad_norm": 0.515625, "learning_rate": 0.00015850710761925252, "loss": 1.1184, "step": 6361 }, { "epoch": 0.30144515517649845, "grad_norm": 0.62109375, "learning_rate": 0.0001584950293447231, "loss": 0.7689, "step": 6362 }, { "epoch": 0.30149253731343284, "grad_norm": 0.96484375, "learning_rate": 0.00015848294977283624, "loss": 1.2459, "step": 6363 }, { "epoch": 0.30153991945036723, "grad_norm": 0.58984375, "learning_rate": 0.00015847086890385988, "loss": 0.8124, "step": 6364 }, { "epoch": 0.30158730158730157, "grad_norm": 0.1884765625, "learning_rate": 0.00015845878673806187, "loss": 0.1512, "step": 6365 }, { "epoch": 0.30163468372423596, "grad_norm": 0.00567626953125, "learning_rate": 0.00015844670327571029, "loss": 0.0003, "step": 6366 }, { "epoch": 0.30168206586117036, "grad_norm": 0.32421875, "learning_rate": 0.00015843461851707303, "loss": 0.0213, "step": 6367 }, { "epoch": 0.3017294479981047, "grad_norm": 0.80859375, "learning_rate": 0.00015842253246241823, "loss": 1.3032, "step": 6368 }, { "epoch": 0.3017768301350391, "grad_norm": 0.66796875, "learning_rate": 0.00015841044511201387, "loss": 1.1587, "step": 6369 }, { "epoch": 0.3018242122719735, "grad_norm": 0.3203125, "learning_rate": 0.00015839835646612804, "loss": 0.1073, "step": 6370 }, { "epoch": 0.3018715944089078, "grad_norm": 0.416015625, "learning_rate": 0.00015838626652502888, "loss": 0.0134, "step": 6371 }, { "epoch": 0.3019189765458422, "grad_norm": 0.474609375, "learning_rate": 0.0001583741752889845, "loss": 0.6428, "step": 6372 }, { "epoch": 0.3019663586827766, "grad_norm": 1.265625, "learning_rate": 0.00015836208275826313, "loss": 0.8704, "step": 6373 }, { "epoch": 0.302013740819711, "grad_norm": 0.322265625, "learning_rate": 0.00015834998893313288, "loss": 0.1387, "step": 6374 }, { "epoch": 0.30206112295664533, "grad_norm": 0.53515625, "learning_rate": 0.00015833789381386205, "loss": 0.673, "step": 6375 }, { "epoch": 0.3021085050935797, "grad_norm": 0.73046875, "learning_rate": 0.0001583257974007189, "loss": 0.9997, "step": 6376 }, { "epoch": 0.3021558872305141, "grad_norm": 0.734375, "learning_rate": 0.00015831369969397164, "loss": 1.2411, "step": 6377 }, { "epoch": 0.30220326936744846, "grad_norm": 0.52734375, "learning_rate": 0.00015830160069388866, "loss": 0.7677, "step": 6378 }, { "epoch": 0.30225065150438285, "grad_norm": 0.0257568359375, "learning_rate": 0.00015828950040073828, "loss": 0.0027, "step": 6379 }, { "epoch": 0.30229803364131724, "grad_norm": 0.43359375, "learning_rate": 0.00015827739881478888, "loss": 0.0299, "step": 6380 }, { "epoch": 0.3023454157782516, "grad_norm": 0.8359375, "learning_rate": 0.00015826529593630881, "loss": 1.3019, "step": 6381 }, { "epoch": 0.302392797915186, "grad_norm": 0.609375, "learning_rate": 0.00015825319176556658, "loss": 0.8761, "step": 6382 }, { "epoch": 0.30244018005212037, "grad_norm": 0.609375, "learning_rate": 0.00015824108630283057, "loss": 1.1102, "step": 6383 }, { "epoch": 0.3024875621890547, "grad_norm": 0.6015625, "learning_rate": 0.00015822897954836931, "loss": 1.1638, "step": 6384 }, { "epoch": 0.3025349443259891, "grad_norm": 0.53125, "learning_rate": 0.00015821687150245132, "loss": 0.7149, "step": 6385 }, { "epoch": 0.3025823264629235, "grad_norm": 0.61328125, "learning_rate": 0.0001582047621653451, "loss": 1.0962, "step": 6386 }, { "epoch": 0.3026297085998578, "grad_norm": 0.5859375, "learning_rate": 0.0001581926515373193, "loss": 1.4139, "step": 6387 }, { "epoch": 0.3026770907367922, "grad_norm": 0.068359375, "learning_rate": 0.00015818053961864242, "loss": 0.0104, "step": 6388 }, { "epoch": 0.3027244728737266, "grad_norm": 1.3984375, "learning_rate": 0.00015816842640958321, "loss": 0.7952, "step": 6389 }, { "epoch": 0.302771855010661, "grad_norm": 0.59375, "learning_rate": 0.0001581563119104102, "loss": 1.1018, "step": 6390 }, { "epoch": 0.30281923714759534, "grad_norm": 0.240234375, "learning_rate": 0.00015814419612139214, "loss": 0.1604, "step": 6391 }, { "epoch": 0.30286661928452974, "grad_norm": 0.70703125, "learning_rate": 0.0001581320790427978, "loss": 0.9539, "step": 6392 }, { "epoch": 0.30291400142146413, "grad_norm": 0.66796875, "learning_rate": 0.0001581199606748958, "loss": 0.7522, "step": 6393 }, { "epoch": 0.30296138355839847, "grad_norm": 0.6171875, "learning_rate": 0.000158107841017955, "loss": 0.8823, "step": 6394 }, { "epoch": 0.30300876569533286, "grad_norm": 0.48046875, "learning_rate": 0.0001580957200722442, "loss": 0.034, "step": 6395 }, { "epoch": 0.30305614783226725, "grad_norm": 0.2109375, "learning_rate": 0.0001580835978380322, "loss": 0.15, "step": 6396 }, { "epoch": 0.3031035299692016, "grad_norm": 0.302734375, "learning_rate": 0.00015807147431558786, "loss": 0.1194, "step": 6397 }, { "epoch": 0.303150912106136, "grad_norm": 0.625, "learning_rate": 0.00015805934950518006, "loss": 1.0244, "step": 6398 }, { "epoch": 0.3031982942430704, "grad_norm": 0.12890625, "learning_rate": 0.00015804722340707778, "loss": 0.0147, "step": 6399 }, { "epoch": 0.3032456763800047, "grad_norm": 0.7890625, "learning_rate": 0.00015803509602154984, "loss": 0.7737, "step": 6400 }, { "epoch": 0.3032930585169391, "grad_norm": 0.478515625, "learning_rate": 0.00015802296734886536, "loss": 0.5766, "step": 6401 }, { "epoch": 0.3033404406538735, "grad_norm": 0.56640625, "learning_rate": 0.00015801083738929323, "loss": 0.854, "step": 6402 }, { "epoch": 0.3033878227908079, "grad_norm": 0.2041015625, "learning_rate": 0.00015799870614310253, "loss": 0.1264, "step": 6403 }, { "epoch": 0.30343520492774223, "grad_norm": 0.6015625, "learning_rate": 0.00015798657361056228, "loss": 1.1678, "step": 6404 }, { "epoch": 0.3034825870646766, "grad_norm": 0.2333984375, "learning_rate": 0.0001579744397919416, "loss": 0.1216, "step": 6405 }, { "epoch": 0.303529969201611, "grad_norm": 0.7734375, "learning_rate": 0.00015796230468750961, "loss": 1.2662, "step": 6406 }, { "epoch": 0.30357735133854535, "grad_norm": 0.4140625, "learning_rate": 0.00015795016829753546, "loss": 0.1355, "step": 6407 }, { "epoch": 0.30362473347547975, "grad_norm": 0.90234375, "learning_rate": 0.00015793803062228828, "loss": 0.0243, "step": 6408 }, { "epoch": 0.30367211561241414, "grad_norm": 0.59765625, "learning_rate": 0.00015792589166203729, "loss": 1.1711, "step": 6409 }, { "epoch": 0.3037194977493485, "grad_norm": 0.7578125, "learning_rate": 0.0001579137514170517, "loss": 1.1089, "step": 6410 }, { "epoch": 0.30376687988628287, "grad_norm": 0.53515625, "learning_rate": 0.00015790160988760082, "loss": 0.8376, "step": 6411 }, { "epoch": 0.30381426202321726, "grad_norm": 0.6640625, "learning_rate": 0.0001578894670739539, "loss": 1.0766, "step": 6412 }, { "epoch": 0.3038616441601516, "grad_norm": 0.8984375, "learning_rate": 0.00015787732297638027, "loss": 1.198, "step": 6413 }, { "epoch": 0.303909026297086, "grad_norm": 0.57421875, "learning_rate": 0.00015786517759514926, "loss": 0.8732, "step": 6414 }, { "epoch": 0.3039564084340204, "grad_norm": 0.54296875, "learning_rate": 0.00015785303093053025, "loss": 0.0758, "step": 6415 }, { "epoch": 0.3040037905709547, "grad_norm": 0.89453125, "learning_rate": 0.00015784088298279264, "loss": 0.0796, "step": 6416 }, { "epoch": 0.3040511727078891, "grad_norm": 0.6953125, "learning_rate": 0.00015782873375220583, "loss": 1.0285, "step": 6417 }, { "epoch": 0.3040985548448235, "grad_norm": 0.1923828125, "learning_rate": 0.0001578165832390393, "loss": 0.0299, "step": 6418 }, { "epoch": 0.3041459369817579, "grad_norm": 0.57421875, "learning_rate": 0.0001578044314435626, "loss": 1.039, "step": 6419 }, { "epoch": 0.30419331911869224, "grad_norm": 0.58203125, "learning_rate": 0.0001577922783660451, "loss": 1.2203, "step": 6420 }, { "epoch": 0.30424070125562663, "grad_norm": 0.69921875, "learning_rate": 0.00015778012400675647, "loss": 1.0667, "step": 6421 }, { "epoch": 0.304288083392561, "grad_norm": 0.61328125, "learning_rate": 0.0001577679683659662, "loss": 0.1458, "step": 6422 }, { "epoch": 0.30433546552949536, "grad_norm": 0.2578125, "learning_rate": 0.00015775581144394395, "loss": 0.0251, "step": 6423 }, { "epoch": 0.30438284766642976, "grad_norm": 0.4609375, "learning_rate": 0.00015774365324095936, "loss": 1.1773, "step": 6424 }, { "epoch": 0.30443022980336415, "grad_norm": 0.55859375, "learning_rate": 0.00015773149375728198, "loss": 0.9736, "step": 6425 }, { "epoch": 0.3044776119402985, "grad_norm": 0.234375, "learning_rate": 0.0001577193329931816, "loss": 0.0459, "step": 6426 }, { "epoch": 0.3045249940772329, "grad_norm": 0.8671875, "learning_rate": 0.00015770717094892785, "loss": 0.9453, "step": 6427 }, { "epoch": 0.3045723762141673, "grad_norm": 0.703125, "learning_rate": 0.00015769500762479054, "loss": 0.7941, "step": 6428 }, { "epoch": 0.3046197583511016, "grad_norm": 0.1943359375, "learning_rate": 0.00015768284302103945, "loss": 0.1327, "step": 6429 }, { "epoch": 0.304667140488036, "grad_norm": 0.421875, "learning_rate": 0.0001576706771379443, "loss": 0.1174, "step": 6430 }, { "epoch": 0.3047145226249704, "grad_norm": 0.6875, "learning_rate": 0.000157658509975775, "loss": 1.1515, "step": 6431 }, { "epoch": 0.3047619047619048, "grad_norm": 0.05517578125, "learning_rate": 0.00015764634153480134, "loss": 0.0068, "step": 6432 }, { "epoch": 0.3048092868988391, "grad_norm": 0.68359375, "learning_rate": 0.0001576341718152932, "loss": 0.9539, "step": 6433 }, { "epoch": 0.3048566690357735, "grad_norm": 0.61328125, "learning_rate": 0.00015762200081752053, "loss": 0.8338, "step": 6434 }, { "epoch": 0.3049040511727079, "grad_norm": 0.73828125, "learning_rate": 0.0001576098285417533, "loss": 0.8848, "step": 6435 }, { "epoch": 0.30495143330964225, "grad_norm": 0.53515625, "learning_rate": 0.00015759765498826137, "loss": 1.0667, "step": 6436 }, { "epoch": 0.30499881544657664, "grad_norm": 0.6875, "learning_rate": 0.00015758548015731486, "loss": 0.9937, "step": 6437 }, { "epoch": 0.30504619758351104, "grad_norm": 0.5546875, "learning_rate": 0.00015757330404918372, "loss": 1.1364, "step": 6438 }, { "epoch": 0.3050935797204454, "grad_norm": 0.7265625, "learning_rate": 0.000157561126664138, "loss": 1.0133, "step": 6439 }, { "epoch": 0.30514096185737977, "grad_norm": 0.6171875, "learning_rate": 0.0001575489480024478, "loss": 0.562, "step": 6440 }, { "epoch": 0.30518834399431416, "grad_norm": 0.46484375, "learning_rate": 0.00015753676806438328, "loss": 0.02, "step": 6441 }, { "epoch": 0.3052357261312485, "grad_norm": 0.240234375, "learning_rate": 0.00015752458685021448, "loss": 0.138, "step": 6442 }, { "epoch": 0.3052831082681829, "grad_norm": 0.578125, "learning_rate": 0.00015751240436021163, "loss": 0.7693, "step": 6443 }, { "epoch": 0.3053304904051173, "grad_norm": 0.078125, "learning_rate": 0.00015750022059464493, "loss": 0.0042, "step": 6444 }, { "epoch": 0.3053778725420516, "grad_norm": 0.05078125, "learning_rate": 0.0001574880355537846, "loss": 0.003, "step": 6445 }, { "epoch": 0.305425254678986, "grad_norm": 0.515625, "learning_rate": 0.0001574758492379008, "loss": 0.8774, "step": 6446 }, { "epoch": 0.3054726368159204, "grad_norm": 0.69921875, "learning_rate": 0.00015746366164726396, "loss": 1.1418, "step": 6447 }, { "epoch": 0.3055200189528548, "grad_norm": 0.62890625, "learning_rate": 0.00015745147278214427, "loss": 1.227, "step": 6448 }, { "epoch": 0.30556740108978914, "grad_norm": 0.1923828125, "learning_rate": 0.00015743928264281212, "loss": 0.0743, "step": 6449 }, { "epoch": 0.30561478322672353, "grad_norm": 0.55859375, "learning_rate": 0.00015742709122953786, "loss": 0.9859, "step": 6450 }, { "epoch": 0.3056621653636579, "grad_norm": 0.81640625, "learning_rate": 0.00015741489854259187, "loss": 1.0521, "step": 6451 }, { "epoch": 0.30570954750059226, "grad_norm": 0.474609375, "learning_rate": 0.0001574027045822446, "loss": 0.7161, "step": 6452 }, { "epoch": 0.30575692963752665, "grad_norm": 0.61328125, "learning_rate": 0.00015739050934876652, "loss": 0.6875, "step": 6453 }, { "epoch": 0.30580431177446105, "grad_norm": 0.5234375, "learning_rate": 0.00015737831284242803, "loss": 0.7807, "step": 6454 }, { "epoch": 0.3058516939113954, "grad_norm": 0.765625, "learning_rate": 0.0001573661150634997, "loss": 1.3163, "step": 6455 }, { "epoch": 0.3058990760483298, "grad_norm": 0.609375, "learning_rate": 0.00015735391601225202, "loss": 0.319, "step": 6456 }, { "epoch": 0.30594645818526417, "grad_norm": 0.034912109375, "learning_rate": 0.00015734171568895558, "loss": 0.0014, "step": 6457 }, { "epoch": 0.3059938403221985, "grad_norm": 0.57421875, "learning_rate": 0.00015732951409388096, "loss": 1.143, "step": 6458 }, { "epoch": 0.3060412224591329, "grad_norm": 0.59375, "learning_rate": 0.00015731731122729881, "loss": 0.2536, "step": 6459 }, { "epoch": 0.3060886045960673, "grad_norm": 0.2470703125, "learning_rate": 0.00015730510708947973, "loss": 0.1477, "step": 6460 }, { "epoch": 0.3061359867330017, "grad_norm": 0.7109375, "learning_rate": 0.0001572929016806944, "loss": 1.3176, "step": 6461 }, { "epoch": 0.306183368869936, "grad_norm": 0.65234375, "learning_rate": 0.00015728069500121357, "loss": 0.9559, "step": 6462 }, { "epoch": 0.3062307510068704, "grad_norm": 0.5390625, "learning_rate": 0.00015726848705130788, "loss": 0.9945, "step": 6463 }, { "epoch": 0.3062781331438048, "grad_norm": 1.1640625, "learning_rate": 0.0001572562778312482, "loss": 0.1301, "step": 6464 }, { "epoch": 0.30632551528073915, "grad_norm": 0.048095703125, "learning_rate": 0.00015724406734130524, "loss": 0.0058, "step": 6465 }, { "epoch": 0.30637289741767354, "grad_norm": 0.244140625, "learning_rate": 0.00015723185558174984, "loss": 0.1685, "step": 6466 }, { "epoch": 0.30642027955460793, "grad_norm": 0.796875, "learning_rate": 0.00015721964255285283, "loss": 0.7259, "step": 6467 }, { "epoch": 0.30646766169154227, "grad_norm": 0.6328125, "learning_rate": 0.00015720742825488511, "loss": 1.2139, "step": 6468 }, { "epoch": 0.30651504382847666, "grad_norm": 0.73828125, "learning_rate": 0.00015719521268811758, "loss": 0.6002, "step": 6469 }, { "epoch": 0.30656242596541106, "grad_norm": 0.53125, "learning_rate": 0.00015718299585282117, "loss": 0.5995, "step": 6470 }, { "epoch": 0.3066098081023454, "grad_norm": 0.08349609375, "learning_rate": 0.0001571707777492668, "loss": 0.0112, "step": 6471 }, { "epoch": 0.3066571902392798, "grad_norm": 0.478515625, "learning_rate": 0.00015715855837772547, "loss": 0.9161, "step": 6472 }, { "epoch": 0.3067045723762142, "grad_norm": 0.6640625, "learning_rate": 0.00015714633773846818, "loss": 0.8095, "step": 6473 }, { "epoch": 0.3067519545131485, "grad_norm": 0.6953125, "learning_rate": 0.00015713411583176602, "loss": 0.1901, "step": 6474 }, { "epoch": 0.3067993366500829, "grad_norm": 0.7890625, "learning_rate": 0.00015712189265789002, "loss": 1.2767, "step": 6475 }, { "epoch": 0.3068467187870173, "grad_norm": 0.66796875, "learning_rate": 0.00015710966821711129, "loss": 1.4565, "step": 6476 }, { "epoch": 0.3068941009239517, "grad_norm": 0.703125, "learning_rate": 0.00015709744250970092, "loss": 1.0918, "step": 6477 }, { "epoch": 0.30694148306088603, "grad_norm": 0.69140625, "learning_rate": 0.00015708521553593012, "loss": 1.4035, "step": 6478 }, { "epoch": 0.3069888651978204, "grad_norm": 0.6875, "learning_rate": 0.00015707298729607003, "loss": 0.253, "step": 6479 }, { "epoch": 0.3070362473347548, "grad_norm": 0.55859375, "learning_rate": 0.0001570607577903919, "loss": 0.7883, "step": 6480 }, { "epoch": 0.30708362947168916, "grad_norm": 0.5703125, "learning_rate": 0.0001570485270191669, "loss": 0.7736, "step": 6481 }, { "epoch": 0.30713101160862355, "grad_norm": 0.07177734375, "learning_rate": 0.00015703629498266635, "loss": 0.0134, "step": 6482 }, { "epoch": 0.30717839374555794, "grad_norm": 0.001922607421875, "learning_rate": 0.00015702406168116153, "loss": 0.0002, "step": 6483 }, { "epoch": 0.3072257758824923, "grad_norm": 0.61328125, "learning_rate": 0.00015701182711492378, "loss": 1.0878, "step": 6484 }, { "epoch": 0.3072731580194267, "grad_norm": 0.2255859375, "learning_rate": 0.00015699959128422442, "loss": 0.109, "step": 6485 }, { "epoch": 0.30732054015636107, "grad_norm": 0.95703125, "learning_rate": 0.00015698735418933482, "loss": 1.3013, "step": 6486 }, { "epoch": 0.3073679222932954, "grad_norm": 0.62109375, "learning_rate": 0.0001569751158305264, "loss": 1.0957, "step": 6487 }, { "epoch": 0.3074153044302298, "grad_norm": 0.7265625, "learning_rate": 0.00015696287620807064, "loss": 1.2234, "step": 6488 }, { "epoch": 0.3074626865671642, "grad_norm": 0.49609375, "learning_rate": 0.00015695063532223896, "loss": 0.9033, "step": 6489 }, { "epoch": 0.3075100687040986, "grad_norm": 0.8125, "learning_rate": 0.0001569383931733028, "loss": 0.7429, "step": 6490 }, { "epoch": 0.3075574508410329, "grad_norm": 0.625, "learning_rate": 0.00015692614976153374, "loss": 1.1825, "step": 6491 }, { "epoch": 0.3076048329779673, "grad_norm": 0.65234375, "learning_rate": 0.00015691390508720335, "loss": 0.678, "step": 6492 }, { "epoch": 0.3076522151149017, "grad_norm": 0.71484375, "learning_rate": 0.00015690165915058314, "loss": 1.0472, "step": 6493 }, { "epoch": 0.30769959725183604, "grad_norm": 0.5234375, "learning_rate": 0.00015688941195194476, "loss": 0.2872, "step": 6494 }, { "epoch": 0.30774697938877044, "grad_norm": 0.578125, "learning_rate": 0.0001568771634915598, "loss": 1.4657, "step": 6495 }, { "epoch": 0.30779436152570483, "grad_norm": 0.46484375, "learning_rate": 0.00015686491376969993, "loss": 0.021, "step": 6496 }, { "epoch": 0.30784174366263917, "grad_norm": 0.6640625, "learning_rate": 0.00015685266278663685, "loss": 1.0232, "step": 6497 }, { "epoch": 0.30788912579957356, "grad_norm": 0.83984375, "learning_rate": 0.0001568404105426423, "loss": 1.1927, "step": 6498 }, { "epoch": 0.30793650793650795, "grad_norm": 0.8828125, "learning_rate": 0.00015682815703798792, "loss": 0.3838, "step": 6499 }, { "epoch": 0.3079838900734423, "grad_norm": 0.265625, "learning_rate": 0.00015681590227294558, "loss": 0.0158, "step": 6500 }, { "epoch": 0.3080312722103767, "grad_norm": 0.56640625, "learning_rate": 0.00015680364624778704, "loss": 1.0193, "step": 6501 }, { "epoch": 0.3080786543473111, "grad_norm": 0.6953125, "learning_rate": 0.00015679138896278417, "loss": 1.3357, "step": 6502 }, { "epoch": 0.3081260364842454, "grad_norm": 0.65234375, "learning_rate": 0.00015677913041820874, "loss": 0.8807, "step": 6503 }, { "epoch": 0.3081734186211798, "grad_norm": 0.6640625, "learning_rate": 0.00015676687061433268, "loss": 1.2676, "step": 6504 }, { "epoch": 0.3082208007581142, "grad_norm": 0.54296875, "learning_rate": 0.0001567546095514279, "loss": 0.62, "step": 6505 }, { "epoch": 0.3082681828950486, "grad_norm": 0.55859375, "learning_rate": 0.00015674234722976634, "loss": 1.2959, "step": 6506 }, { "epoch": 0.30831556503198293, "grad_norm": 0.6328125, "learning_rate": 0.00015673008364962, "loss": 0.6548, "step": 6507 }, { "epoch": 0.3083629471689173, "grad_norm": 1.0546875, "learning_rate": 0.00015671781881126075, "loss": 0.1654, "step": 6508 }, { "epoch": 0.3084103293058517, "grad_norm": 0.18359375, "learning_rate": 0.00015670555271496075, "loss": 0.1376, "step": 6509 }, { "epoch": 0.30845771144278605, "grad_norm": 0.765625, "learning_rate": 0.00015669328536099198, "loss": 0.6347, "step": 6510 }, { "epoch": 0.30850509357972045, "grad_norm": 0.62109375, "learning_rate": 0.00015668101674962647, "loss": 0.9348, "step": 6511 }, { "epoch": 0.30855247571665484, "grad_norm": 0.2412109375, "learning_rate": 0.00015666874688113644, "loss": 0.1464, "step": 6512 }, { "epoch": 0.3085998578535892, "grad_norm": 0.5, "learning_rate": 0.00015665647575579397, "loss": 0.6497, "step": 6513 }, { "epoch": 0.30864723999052357, "grad_norm": 0.609375, "learning_rate": 0.00015664420337387118, "loss": 0.9218, "step": 6514 }, { "epoch": 0.30869462212745796, "grad_norm": 0.60546875, "learning_rate": 0.00015663192973564032, "loss": 0.7704, "step": 6515 }, { "epoch": 0.3087420042643923, "grad_norm": 0.5546875, "learning_rate": 0.00015661965484137355, "loss": 0.7489, "step": 6516 }, { "epoch": 0.3087893864013267, "grad_norm": 0.59375, "learning_rate": 0.00015660737869134322, "loss": 0.5298, "step": 6517 }, { "epoch": 0.3088367685382611, "grad_norm": 0.8046875, "learning_rate": 0.00015659510128582143, "loss": 0.77, "step": 6518 }, { "epoch": 0.3088841506751954, "grad_norm": 0.640625, "learning_rate": 0.00015658282262508063, "loss": 1.2064, "step": 6519 }, { "epoch": 0.3089315328121298, "grad_norm": 0.59765625, "learning_rate": 0.00015657054270939308, "loss": 0.5916, "step": 6520 }, { "epoch": 0.3089789149490642, "grad_norm": 0.036376953125, "learning_rate": 0.00015655826153903118, "loss": 0.002, "step": 6521 }, { "epoch": 0.3090262970859986, "grad_norm": 0.61328125, "learning_rate": 0.00015654597911426722, "loss": 0.9681, "step": 6522 }, { "epoch": 0.30907367922293294, "grad_norm": 0.5234375, "learning_rate": 0.00015653369543537372, "loss": 0.4623, "step": 6523 }, { "epoch": 0.30912106135986733, "grad_norm": 0.66796875, "learning_rate": 0.00015652141050262307, "loss": 0.4973, "step": 6524 }, { "epoch": 0.3091684434968017, "grad_norm": 1.3515625, "learning_rate": 0.00015650912431628772, "loss": 0.2051, "step": 6525 }, { "epoch": 0.30921582563373606, "grad_norm": 0.60546875, "learning_rate": 0.00015649683687664017, "loss": 1.2065, "step": 6526 }, { "epoch": 0.30926320777067046, "grad_norm": 0.380859375, "learning_rate": 0.00015648454818395298, "loss": 0.0663, "step": 6527 }, { "epoch": 0.30931058990760485, "grad_norm": 0.486328125, "learning_rate": 0.00015647225823849868, "loss": 0.8263, "step": 6528 }, { "epoch": 0.3093579720445392, "grad_norm": 0.59765625, "learning_rate": 0.00015645996704054984, "loss": 1.0052, "step": 6529 }, { "epoch": 0.3094053541814736, "grad_norm": 0.431640625, "learning_rate": 0.00015644767459037907, "loss": 0.8184, "step": 6530 }, { "epoch": 0.309452736318408, "grad_norm": 0.47265625, "learning_rate": 0.000156435380888259, "loss": 0.9012, "step": 6531 }, { "epoch": 0.3095001184553423, "grad_norm": 0.4609375, "learning_rate": 0.00015642308593446228, "loss": 0.3063, "step": 6532 }, { "epoch": 0.3095475005922767, "grad_norm": 0.95703125, "learning_rate": 0.00015641078972926164, "loss": 0.1363, "step": 6533 }, { "epoch": 0.3095948827292111, "grad_norm": 0.99609375, "learning_rate": 0.00015639849227292975, "loss": 0.5949, "step": 6534 }, { "epoch": 0.3096422648661455, "grad_norm": 0.51171875, "learning_rate": 0.0001563861935657394, "loss": 0.7603, "step": 6535 }, { "epoch": 0.3096896470030798, "grad_norm": 0.63671875, "learning_rate": 0.0001563738936079633, "loss": 0.9269, "step": 6536 }, { "epoch": 0.3097370291400142, "grad_norm": 0.6484375, "learning_rate": 0.0001563615923998743, "loss": 0.9251, "step": 6537 }, { "epoch": 0.3097844112769486, "grad_norm": 0.140625, "learning_rate": 0.00015634928994174522, "loss": 0.0117, "step": 6538 }, { "epoch": 0.30983179341388295, "grad_norm": 0.70703125, "learning_rate": 0.0001563369862338489, "loss": 0.9585, "step": 6539 }, { "epoch": 0.30987917555081734, "grad_norm": 0.66796875, "learning_rate": 0.00015632468127645826, "loss": 1.1298, "step": 6540 }, { "epoch": 0.30992655768775174, "grad_norm": 0.75, "learning_rate": 0.00015631237506984617, "loss": 1.4214, "step": 6541 }, { "epoch": 0.3099739398246861, "grad_norm": 0.6015625, "learning_rate": 0.00015630006761428557, "loss": 1.0516, "step": 6542 }, { "epoch": 0.31002132196162047, "grad_norm": 0.67578125, "learning_rate": 0.00015628775891004946, "loss": 0.86, "step": 6543 }, { "epoch": 0.31006870409855486, "grad_norm": 0.55859375, "learning_rate": 0.00015627544895741076, "loss": 0.4261, "step": 6544 }, { "epoch": 0.3101160862354892, "grad_norm": 0.65625, "learning_rate": 0.0001562631377566426, "loss": 1.2106, "step": 6545 }, { "epoch": 0.3101634683724236, "grad_norm": 0.69140625, "learning_rate": 0.00015625082530801795, "loss": 1.0549, "step": 6546 }, { "epoch": 0.310210850509358, "grad_norm": 0.84375, "learning_rate": 0.00015623851161180991, "loss": 0.257, "step": 6547 }, { "epoch": 0.3102582326462923, "grad_norm": 0.6171875, "learning_rate": 0.0001562261966682916, "loss": 0.5688, "step": 6548 }, { "epoch": 0.3103056147832267, "grad_norm": 0.33984375, "learning_rate": 0.00015621388047773612, "loss": 0.0285, "step": 6549 }, { "epoch": 0.3103529969201611, "grad_norm": 0.53125, "learning_rate": 0.00015620156304041666, "loss": 0.621, "step": 6550 }, { "epoch": 0.3104003790570955, "grad_norm": 0.68359375, "learning_rate": 0.0001561892443566064, "loss": 0.998, "step": 6551 }, { "epoch": 0.31044776119402984, "grad_norm": 0.267578125, "learning_rate": 0.00015617692442657853, "loss": 0.03, "step": 6552 }, { "epoch": 0.31049514333096423, "grad_norm": 0.84765625, "learning_rate": 0.00015616460325060635, "loss": 0.7481, "step": 6553 }, { "epoch": 0.3105425254678986, "grad_norm": 0.490234375, "learning_rate": 0.00015615228082896306, "loss": 0.8258, "step": 6554 }, { "epoch": 0.31058990760483296, "grad_norm": 0.5546875, "learning_rate": 0.00015613995716192198, "loss": 0.974, "step": 6555 }, { "epoch": 0.31063728974176735, "grad_norm": 0.54296875, "learning_rate": 0.00015612763224975647, "loss": 0.8936, "step": 6556 }, { "epoch": 0.31068467187870175, "grad_norm": 0.031982421875, "learning_rate": 0.00015611530609273985, "loss": 0.0016, "step": 6557 }, { "epoch": 0.3107320540156361, "grad_norm": 0.66796875, "learning_rate": 0.00015610297869114552, "loss": 0.7345, "step": 6558 }, { "epoch": 0.3107794361525705, "grad_norm": 0.2890625, "learning_rate": 0.0001560906500452469, "loss": 0.0393, "step": 6559 }, { "epoch": 0.31082681828950487, "grad_norm": 0.84375, "learning_rate": 0.00015607832015531736, "loss": 0.1445, "step": 6560 }, { "epoch": 0.3108742004264392, "grad_norm": 1.3046875, "learning_rate": 0.00015606598902163045, "loss": 0.6681, "step": 6561 }, { "epoch": 0.3109215825633736, "grad_norm": 0.546875, "learning_rate": 0.0001560536566444596, "loss": 0.5334, "step": 6562 }, { "epoch": 0.310968964700308, "grad_norm": 0.75, "learning_rate": 0.00015604132302407837, "loss": 1.5135, "step": 6563 }, { "epoch": 0.3110163468372424, "grad_norm": 0.470703125, "learning_rate": 0.0001560289881607603, "loss": 0.4288, "step": 6564 }, { "epoch": 0.3110637289741767, "grad_norm": 0.546875, "learning_rate": 0.0001560166520547789, "loss": 0.4492, "step": 6565 }, { "epoch": 0.3111111111111111, "grad_norm": 1.0, "learning_rate": 0.00015600431470640786, "loss": 1.3553, "step": 6566 }, { "epoch": 0.3111584932480455, "grad_norm": 0.5234375, "learning_rate": 0.0001559919761159208, "loss": 0.6518, "step": 6567 }, { "epoch": 0.31120587538497985, "grad_norm": 0.66015625, "learning_rate": 0.0001559796362835913, "loss": 1.0372, "step": 6568 }, { "epoch": 0.31125325752191424, "grad_norm": 0.236328125, "learning_rate": 0.0001559672952096931, "loss": 0.0377, "step": 6569 }, { "epoch": 0.31130063965884863, "grad_norm": 0.7578125, "learning_rate": 0.00015595495289449994, "loss": 0.8932, "step": 6570 }, { "epoch": 0.31134802179578297, "grad_norm": 0.8359375, "learning_rate": 0.0001559426093382855, "loss": 1.586, "step": 6571 }, { "epoch": 0.31139540393271736, "grad_norm": 0.71484375, "learning_rate": 0.00015593026454132359, "loss": 0.9798, "step": 6572 }, { "epoch": 0.31144278606965176, "grad_norm": 0.6015625, "learning_rate": 0.00015591791850388798, "loss": 0.7761, "step": 6573 }, { "epoch": 0.3114901682065861, "grad_norm": 0.61328125, "learning_rate": 0.00015590557122625246, "loss": 0.9733, "step": 6574 }, { "epoch": 0.3115375503435205, "grad_norm": 0.640625, "learning_rate": 0.00015589322270869095, "loss": 0.6146, "step": 6575 }, { "epoch": 0.3115849324804549, "grad_norm": 0.7578125, "learning_rate": 0.0001558808729514773, "loss": 1.2899, "step": 6576 }, { "epoch": 0.3116323146173892, "grad_norm": 0.66796875, "learning_rate": 0.0001558685219548854, "loss": 1.2373, "step": 6577 }, { "epoch": 0.3116796967543236, "grad_norm": 0.734375, "learning_rate": 0.0001558561697191892, "loss": 1.3932, "step": 6578 }, { "epoch": 0.311727078891258, "grad_norm": 0.6484375, "learning_rate": 0.00015584381624466265, "loss": 0.5996, "step": 6579 }, { "epoch": 0.3117744610281924, "grad_norm": 0.71484375, "learning_rate": 0.00015583146153157974, "loss": 0.6589, "step": 6580 }, { "epoch": 0.31182184316512673, "grad_norm": 0.69140625, "learning_rate": 0.00015581910558021446, "loss": 1.0168, "step": 6581 }, { "epoch": 0.3118692253020611, "grad_norm": 0.255859375, "learning_rate": 0.0001558067483908409, "loss": 0.1408, "step": 6582 }, { "epoch": 0.3119166074389955, "grad_norm": 0.6171875, "learning_rate": 0.0001557943899637331, "loss": 0.093, "step": 6583 }, { "epoch": 0.31196398957592986, "grad_norm": 0.5703125, "learning_rate": 0.00015578203029916515, "loss": 0.722, "step": 6584 }, { "epoch": 0.31201137171286425, "grad_norm": 0.625, "learning_rate": 0.0001557696693974112, "loss": 0.7425, "step": 6585 }, { "epoch": 0.31205875384979864, "grad_norm": 0.671875, "learning_rate": 0.00015575730725874535, "loss": 1.0831, "step": 6586 }, { "epoch": 0.312106135986733, "grad_norm": 0.671875, "learning_rate": 0.00015574494388344182, "loss": 1.0582, "step": 6587 }, { "epoch": 0.3121535181236674, "grad_norm": 0.5703125, "learning_rate": 0.0001557325792717749, "loss": 1.2879, "step": 6588 }, { "epoch": 0.31220090026060177, "grad_norm": 0.25, "learning_rate": 0.00015572021342401864, "loss": 0.1396, "step": 6589 }, { "epoch": 0.3122482823975361, "grad_norm": 0.875, "learning_rate": 0.00015570784634044742, "loss": 0.2146, "step": 6590 }, { "epoch": 0.3122956645344705, "grad_norm": 0.126953125, "learning_rate": 0.0001556954780213355, "loss": 0.0209, "step": 6591 }, { "epoch": 0.3123430466714049, "grad_norm": 1.0, "learning_rate": 0.00015568310846695722, "loss": 0.9349, "step": 6592 }, { "epoch": 0.3123904288083393, "grad_norm": 0.65625, "learning_rate": 0.0001556707376775869, "loss": 1.2064, "step": 6593 }, { "epoch": 0.3124378109452736, "grad_norm": 0.73046875, "learning_rate": 0.00015565836565349889, "loss": 0.8765, "step": 6594 }, { "epoch": 0.312485193082208, "grad_norm": 0.73046875, "learning_rate": 0.00015564599239496765, "loss": 1.4647, "step": 6595 }, { "epoch": 0.3125325752191424, "grad_norm": 0.6015625, "learning_rate": 0.0001556336179022676, "loss": 0.0322, "step": 6596 }, { "epoch": 0.31257995735607674, "grad_norm": 0.6328125, "learning_rate": 0.0001556212421756731, "loss": 0.7906, "step": 6597 }, { "epoch": 0.31262733949301114, "grad_norm": 0.6796875, "learning_rate": 0.00015560886521545866, "loss": 1.4391, "step": 6598 }, { "epoch": 0.31267472162994553, "grad_norm": 0.5859375, "learning_rate": 0.00015559648702189892, "loss": 0.1864, "step": 6599 }, { "epoch": 0.31272210376687987, "grad_norm": 0.212890625, "learning_rate": 0.00015558410759526826, "loss": 0.0391, "step": 6600 }, { "epoch": 0.31276948590381426, "grad_norm": 0.64453125, "learning_rate": 0.0001555717269358413, "loss": 0.9505, "step": 6601 }, { "epoch": 0.31281686804074865, "grad_norm": 0.4296875, "learning_rate": 0.00015555934504389262, "loss": 0.1565, "step": 6602 }, { "epoch": 0.312864250177683, "grad_norm": 0.6875, "learning_rate": 0.0001555469619196969, "loss": 0.616, "step": 6603 }, { "epoch": 0.3129116323146174, "grad_norm": 0.6015625, "learning_rate": 0.0001555345775635287, "loss": 1.1719, "step": 6604 }, { "epoch": 0.3129590144515518, "grad_norm": 1.0625, "learning_rate": 0.00015552219197566272, "loss": 2.274, "step": 6605 }, { "epoch": 0.3130063965884861, "grad_norm": 0.5703125, "learning_rate": 0.00015550980515637367, "loss": 1.172, "step": 6606 }, { "epoch": 0.3130537787254205, "grad_norm": 0.10302734375, "learning_rate": 0.00015549741710593624, "loss": 0.0183, "step": 6607 }, { "epoch": 0.3131011608623549, "grad_norm": 0.435546875, "learning_rate": 0.00015548502782462522, "loss": 0.0319, "step": 6608 }, { "epoch": 0.3131485429992893, "grad_norm": 0.8515625, "learning_rate": 0.0001554726373127154, "loss": 1.1681, "step": 6609 }, { "epoch": 0.31319592513622363, "grad_norm": 0.83984375, "learning_rate": 0.00015546024557048154, "loss": 1.3657, "step": 6610 }, { "epoch": 0.313243307273158, "grad_norm": 0.6484375, "learning_rate": 0.00015544785259819855, "loss": 1.2749, "step": 6611 }, { "epoch": 0.3132906894100924, "grad_norm": 0.59765625, "learning_rate": 0.00015543545839614121, "loss": 0.6629, "step": 6612 }, { "epoch": 0.31333807154702675, "grad_norm": 0.63671875, "learning_rate": 0.0001554230629645845, "loss": 1.1962, "step": 6613 }, { "epoch": 0.31338545368396115, "grad_norm": 0.240234375, "learning_rate": 0.00015541066630380326, "loss": 0.0403, "step": 6614 }, { "epoch": 0.31343283582089554, "grad_norm": 0.6171875, "learning_rate": 0.00015539826841407247, "loss": 0.1714, "step": 6615 }, { "epoch": 0.3134802179578299, "grad_norm": 0.63671875, "learning_rate": 0.00015538586929566707, "loss": 0.9104, "step": 6616 }, { "epoch": 0.31352760009476427, "grad_norm": 0.6171875, "learning_rate": 0.0001553734689488621, "loss": 0.7639, "step": 6617 }, { "epoch": 0.31357498223169866, "grad_norm": 0.083984375, "learning_rate": 0.00015536106737393257, "loss": 0.0042, "step": 6618 }, { "epoch": 0.313622364368633, "grad_norm": 0.4921875, "learning_rate": 0.00015534866457115354, "loss": 0.4751, "step": 6619 }, { "epoch": 0.3136697465055674, "grad_norm": 0.8984375, "learning_rate": 0.00015533626054080007, "loss": 0.7211, "step": 6620 }, { "epoch": 0.3137171286425018, "grad_norm": 0.6171875, "learning_rate": 0.00015532385528314727, "loss": 1.0287, "step": 6621 }, { "epoch": 0.3137645107794362, "grad_norm": 0.75390625, "learning_rate": 0.00015531144879847033, "loss": 1.6725, "step": 6622 }, { "epoch": 0.3138118929163705, "grad_norm": 0.59765625, "learning_rate": 0.00015529904108704435, "loss": 1.1494, "step": 6623 }, { "epoch": 0.3138592750533049, "grad_norm": 0.68359375, "learning_rate": 0.00015528663214914453, "loss": 0.7874, "step": 6624 }, { "epoch": 0.3139066571902393, "grad_norm": 0.59375, "learning_rate": 0.0001552742219850461, "loss": 0.9382, "step": 6625 }, { "epoch": 0.31395403932717364, "grad_norm": 0.53515625, "learning_rate": 0.00015526181059502428, "loss": 0.8744, "step": 6626 }, { "epoch": 0.31400142146410803, "grad_norm": 0.1826171875, "learning_rate": 0.00015524939797935443, "loss": 0.1217, "step": 6627 }, { "epoch": 0.3140488036010424, "grad_norm": 0.1923828125, "learning_rate": 0.00015523698413831173, "loss": 0.1361, "step": 6628 }, { "epoch": 0.31409618573797676, "grad_norm": 0.71484375, "learning_rate": 0.00015522456907217155, "loss": 0.8062, "step": 6629 }, { "epoch": 0.31414356787491116, "grad_norm": 0.67578125, "learning_rate": 0.0001552121527812093, "loss": 0.9175, "step": 6630 }, { "epoch": 0.31419095001184555, "grad_norm": 0.6328125, "learning_rate": 0.00015519973526570023, "loss": 0.813, "step": 6631 }, { "epoch": 0.3142383321487799, "grad_norm": 0.6640625, "learning_rate": 0.0001551873165259199, "loss": 1.2004, "step": 6632 }, { "epoch": 0.3142857142857143, "grad_norm": 0.412109375, "learning_rate": 0.00015517489656214365, "loss": 0.0825, "step": 6633 }, { "epoch": 0.3143330964226487, "grad_norm": 0.2890625, "learning_rate": 0.00015516247537464696, "loss": 0.1309, "step": 6634 }, { "epoch": 0.314380478559583, "grad_norm": 0.74609375, "learning_rate": 0.00015515005296370534, "loss": 1.126, "step": 6635 }, { "epoch": 0.3144278606965174, "grad_norm": 0.66015625, "learning_rate": 0.0001551376293295943, "loss": 0.8408, "step": 6636 }, { "epoch": 0.3144752428334518, "grad_norm": 0.6796875, "learning_rate": 0.00015512520447258932, "loss": 1.3584, "step": 6637 }, { "epoch": 0.3145226249703862, "grad_norm": 1.0859375, "learning_rate": 0.00015511277839296605, "loss": 1.3213, "step": 6638 }, { "epoch": 0.3145700071073205, "grad_norm": 0.6171875, "learning_rate": 0.00015510035109100008, "loss": 0.9437, "step": 6639 }, { "epoch": 0.3146173892442549, "grad_norm": 0.310546875, "learning_rate": 0.00015508792256696698, "loss": 0.0295, "step": 6640 }, { "epoch": 0.3146647713811893, "grad_norm": 0.47265625, "learning_rate": 0.00015507549282114244, "loss": 0.3888, "step": 6641 }, { "epoch": 0.31471215351812365, "grad_norm": 0.8125, "learning_rate": 0.00015506306185380213, "loss": 1.0023, "step": 6642 }, { "epoch": 0.31475953565505804, "grad_norm": 0.98828125, "learning_rate": 0.0001550506296652218, "loss": 0.4771, "step": 6643 }, { "epoch": 0.31480691779199244, "grad_norm": 0.5859375, "learning_rate": 0.0001550381962556771, "loss": 0.7989, "step": 6644 }, { "epoch": 0.3148542999289268, "grad_norm": 0.37109375, "learning_rate": 0.00015502576162544385, "loss": 0.0758, "step": 6645 }, { "epoch": 0.31490168206586117, "grad_norm": 0.6953125, "learning_rate": 0.00015501332577479778, "loss": 1.0518, "step": 6646 }, { "epoch": 0.31494906420279556, "grad_norm": 0.31640625, "learning_rate": 0.00015500088870401476, "loss": 0.0091, "step": 6647 }, { "epoch": 0.3149964463397299, "grad_norm": 0.703125, "learning_rate": 0.00015498845041337064, "loss": 1.0809, "step": 6648 }, { "epoch": 0.3150438284766643, "grad_norm": 0.3671875, "learning_rate": 0.00015497601090314124, "loss": 0.2188, "step": 6649 }, { "epoch": 0.3150912106135987, "grad_norm": 0.765625, "learning_rate": 0.0001549635701736025, "loss": 1.1145, "step": 6650 }, { "epoch": 0.3151385927505331, "grad_norm": 0.546875, "learning_rate": 0.00015495112822503027, "loss": 0.5646, "step": 6651 }, { "epoch": 0.3151859748874674, "grad_norm": 0.66015625, "learning_rate": 0.0001549386850577006, "loss": 1.0729, "step": 6652 }, { "epoch": 0.3152333570244018, "grad_norm": 0.6171875, "learning_rate": 0.0001549262406718894, "loss": 1.0544, "step": 6653 }, { "epoch": 0.3152807391613362, "grad_norm": 0.546875, "learning_rate": 0.00015491379506787264, "loss": 0.781, "step": 6654 }, { "epoch": 0.31532812129827054, "grad_norm": 0.5859375, "learning_rate": 0.00015490134824592644, "loss": 0.9315, "step": 6655 }, { "epoch": 0.31537550343520493, "grad_norm": 0.68359375, "learning_rate": 0.00015488890020632677, "loss": 1.2667, "step": 6656 }, { "epoch": 0.3154228855721393, "grad_norm": 0.11865234375, "learning_rate": 0.00015487645094934983, "loss": 0.0167, "step": 6657 }, { "epoch": 0.31547026770907366, "grad_norm": 0.158203125, "learning_rate": 0.00015486400047527162, "loss": 0.0131, "step": 6658 }, { "epoch": 0.31551764984600805, "grad_norm": 0.55078125, "learning_rate": 0.00015485154878436833, "loss": 0.6654, "step": 6659 }, { "epoch": 0.31556503198294245, "grad_norm": 0.53515625, "learning_rate": 0.0001548390958769161, "loss": 0.4158, "step": 6660 }, { "epoch": 0.3156124141198768, "grad_norm": 0.7109375, "learning_rate": 0.00015482664175319111, "loss": 1.1609, "step": 6661 }, { "epoch": 0.3156597962568112, "grad_norm": 0.62109375, "learning_rate": 0.00015481418641346965, "loss": 1.0335, "step": 6662 }, { "epoch": 0.31570717839374557, "grad_norm": 0.11962890625, "learning_rate": 0.0001548017298580279, "loss": 0.0021, "step": 6663 }, { "epoch": 0.3157545605306799, "grad_norm": 0.490234375, "learning_rate": 0.0001547892720871422, "loss": 0.7076, "step": 6664 }, { "epoch": 0.3158019426676143, "grad_norm": 0.54296875, "learning_rate": 0.00015477681310108873, "loss": 0.7821, "step": 6665 }, { "epoch": 0.3158493248045487, "grad_norm": 0.11865234375, "learning_rate": 0.00015476435290014395, "loss": 0.0123, "step": 6666 }, { "epoch": 0.3158967069414831, "grad_norm": 0.146484375, "learning_rate": 0.0001547518914845841, "loss": 0.025, "step": 6667 }, { "epoch": 0.3159440890784174, "grad_norm": 0.5703125, "learning_rate": 0.0001547394288546857, "loss": 1.0853, "step": 6668 }, { "epoch": 0.3159914712153518, "grad_norm": 0.31640625, "learning_rate": 0.000154726965010725, "loss": 0.0582, "step": 6669 }, { "epoch": 0.3160388533522862, "grad_norm": 0.54296875, "learning_rate": 0.0001547144999529785, "loss": 0.0365, "step": 6670 }, { "epoch": 0.31608623548922055, "grad_norm": 0.2138671875, "learning_rate": 0.0001547020336817227, "loss": 0.0178, "step": 6671 }, { "epoch": 0.31613361762615494, "grad_norm": 0.75, "learning_rate": 0.00015468956619723407, "loss": 0.7368, "step": 6672 }, { "epoch": 0.31618099976308933, "grad_norm": 0.201171875, "learning_rate": 0.0001546770974997891, "loss": 0.0375, "step": 6673 }, { "epoch": 0.31622838190002367, "grad_norm": 0.7109375, "learning_rate": 0.00015466462758966437, "loss": 0.7151, "step": 6674 }, { "epoch": 0.31627576403695806, "grad_norm": 1.359375, "learning_rate": 0.0001546521564671364, "loss": 0.5066, "step": 6675 }, { "epoch": 0.31632314617389246, "grad_norm": 0.23046875, "learning_rate": 0.0001546396841324818, "loss": 0.011, "step": 6676 }, { "epoch": 0.3163705283108268, "grad_norm": 1.5234375, "learning_rate": 0.0001546272105859772, "loss": 1.2534, "step": 6677 }, { "epoch": 0.3164179104477612, "grad_norm": 0.58984375, "learning_rate": 0.0001546147358278993, "loss": 0.9658, "step": 6678 }, { "epoch": 0.3164652925846956, "grad_norm": 0.00341796875, "learning_rate": 0.0001546022598585247, "loss": 0.0003, "step": 6679 }, { "epoch": 0.31651267472162997, "grad_norm": 0.734375, "learning_rate": 0.00015458978267813016, "loss": 0.9939, "step": 6680 }, { "epoch": 0.3165600568585643, "grad_norm": 0.671875, "learning_rate": 0.00015457730428699237, "loss": 0.5775, "step": 6681 }, { "epoch": 0.3166074389954987, "grad_norm": 0.5625, "learning_rate": 0.00015456482468538814, "loss": 0.9927, "step": 6682 }, { "epoch": 0.3166548211324331, "grad_norm": 0.58984375, "learning_rate": 0.00015455234387359417, "loss": 0.867, "step": 6683 }, { "epoch": 0.31670220326936743, "grad_norm": 0.07470703125, "learning_rate": 0.0001545398618518873, "loss": 0.009, "step": 6684 }, { "epoch": 0.3167495854063018, "grad_norm": 0.71484375, "learning_rate": 0.00015452737862054446, "loss": 1.0679, "step": 6685 }, { "epoch": 0.3167969675432362, "grad_norm": 0.1494140625, "learning_rate": 0.00015451489417984238, "loss": 0.0163, "step": 6686 }, { "epoch": 0.31684434968017056, "grad_norm": 0.703125, "learning_rate": 0.00015450240853005805, "loss": 0.973, "step": 6687 }, { "epoch": 0.31689173181710495, "grad_norm": 0.68359375, "learning_rate": 0.00015448992167146834, "loss": 0.3367, "step": 6688 }, { "epoch": 0.31693911395403934, "grad_norm": 0.216796875, "learning_rate": 0.0001544774336043502, "loss": 0.1513, "step": 6689 }, { "epoch": 0.3169864960909737, "grad_norm": 0.59765625, "learning_rate": 0.00015446494432898064, "loss": 1.4223, "step": 6690 }, { "epoch": 0.3170338782279081, "grad_norm": 0.50390625, "learning_rate": 0.0001544524538456366, "loss": 1.0795, "step": 6691 }, { "epoch": 0.31708126036484247, "grad_norm": 0.53515625, "learning_rate": 0.00015443996215459515, "loss": 0.4657, "step": 6692 }, { "epoch": 0.3171286425017768, "grad_norm": 0.66015625, "learning_rate": 0.0001544274692561333, "loss": 0.9374, "step": 6693 }, { "epoch": 0.3171760246387112, "grad_norm": 0.734375, "learning_rate": 0.00015441497515052816, "loss": 0.9042, "step": 6694 }, { "epoch": 0.3172234067756456, "grad_norm": 0.1875, "learning_rate": 0.00015440247983805686, "loss": 0.0815, "step": 6695 }, { "epoch": 0.31727078891258, "grad_norm": 0.65625, "learning_rate": 0.0001543899833189965, "loss": 1.362, "step": 6696 }, { "epoch": 0.3173181710495143, "grad_norm": 0.56640625, "learning_rate": 0.0001543774855936242, "loss": 0.6509, "step": 6697 }, { "epoch": 0.3173655531864487, "grad_norm": 0.30859375, "learning_rate": 0.00015436498666221725, "loss": 0.014, "step": 6698 }, { "epoch": 0.3174129353233831, "grad_norm": 0.796875, "learning_rate": 0.00015435248652505276, "loss": 0.8763, "step": 6699 }, { "epoch": 0.31746031746031744, "grad_norm": 0.6015625, "learning_rate": 0.000154339985182408, "loss": 0.7114, "step": 6700 }, { "epoch": 0.31750769959725184, "grad_norm": 0.0159912109375, "learning_rate": 0.0001543274826345603, "loss": 0.001, "step": 6701 }, { "epoch": 0.31755508173418623, "grad_norm": 0.228515625, "learning_rate": 0.00015431497888178687, "loss": 0.0264, "step": 6702 }, { "epoch": 0.31760246387112057, "grad_norm": 0.8984375, "learning_rate": 0.00015430247392436507, "loss": 1.4535, "step": 6703 }, { "epoch": 0.31764984600805496, "grad_norm": 0.94140625, "learning_rate": 0.00015428996776257225, "loss": 0.6312, "step": 6704 }, { "epoch": 0.31769722814498935, "grad_norm": 0.2451171875, "learning_rate": 0.00015427746039668575, "loss": 0.1916, "step": 6705 }, { "epoch": 0.3177446102819237, "grad_norm": 0.75, "learning_rate": 0.00015426495182698302, "loss": 0.7129, "step": 6706 }, { "epoch": 0.3177919924188581, "grad_norm": 0.369140625, "learning_rate": 0.00015425244205374144, "loss": 0.1759, "step": 6707 }, { "epoch": 0.3178393745557925, "grad_norm": 0.7890625, "learning_rate": 0.00015423993107723847, "loss": 1.465, "step": 6708 }, { "epoch": 0.31788675669272687, "grad_norm": 0.65625, "learning_rate": 0.0001542274188977516, "loss": 0.9762, "step": 6709 }, { "epoch": 0.3179341388296612, "grad_norm": 0.35546875, "learning_rate": 0.00015421490551555838, "loss": 0.1516, "step": 6710 }, { "epoch": 0.3179815209665956, "grad_norm": 0.79296875, "learning_rate": 0.00015420239093093623, "loss": 1.2048, "step": 6711 }, { "epoch": 0.31802890310353, "grad_norm": 0.6875, "learning_rate": 0.00015418987514416286, "loss": 1.065, "step": 6712 }, { "epoch": 0.31807628524046433, "grad_norm": 1.2578125, "learning_rate": 0.00015417735815551574, "loss": 0.2488, "step": 6713 }, { "epoch": 0.3181236673773987, "grad_norm": 1.1171875, "learning_rate": 0.00015416483996527256, "loss": 1.0395, "step": 6714 }, { "epoch": 0.3181710495143331, "grad_norm": 0.55078125, "learning_rate": 0.0001541523205737109, "loss": 0.411, "step": 6715 }, { "epoch": 0.31821843165126745, "grad_norm": 0.60546875, "learning_rate": 0.00015413979998110845, "loss": 0.7368, "step": 6716 }, { "epoch": 0.31826581378820185, "grad_norm": 0.671875, "learning_rate": 0.00015412727818774288, "loss": 0.4258, "step": 6717 }, { "epoch": 0.31831319592513624, "grad_norm": 0.58984375, "learning_rate": 0.00015411475519389197, "loss": 0.8731, "step": 6718 }, { "epoch": 0.3183605780620706, "grad_norm": 0.83203125, "learning_rate": 0.00015410223099983342, "loss": 1.1324, "step": 6719 }, { "epoch": 0.31840796019900497, "grad_norm": 1.0703125, "learning_rate": 0.000154089705605845, "loss": 0.0377, "step": 6720 }, { "epoch": 0.31845534233593936, "grad_norm": 0.486328125, "learning_rate": 0.00015407717901220452, "loss": 0.0648, "step": 6721 }, { "epoch": 0.3185027244728737, "grad_norm": 0.71484375, "learning_rate": 0.00015406465121918985, "loss": 1.4768, "step": 6722 }, { "epoch": 0.3185501066098081, "grad_norm": 0.498046875, "learning_rate": 0.00015405212222707875, "loss": 0.0798, "step": 6723 }, { "epoch": 0.3185974887467425, "grad_norm": 0.6640625, "learning_rate": 0.00015403959203614918, "loss": 0.9283, "step": 6724 }, { "epoch": 0.3186448708836769, "grad_norm": 0.5703125, "learning_rate": 0.00015402706064667902, "loss": 1.3497, "step": 6725 }, { "epoch": 0.3186922530206112, "grad_norm": 0.453125, "learning_rate": 0.0001540145280589462, "loss": 0.6482, "step": 6726 }, { "epoch": 0.3187396351575456, "grad_norm": 0.671875, "learning_rate": 0.00015400199427322868, "loss": 1.0577, "step": 6727 }, { "epoch": 0.31878701729448, "grad_norm": 0.6171875, "learning_rate": 0.00015398945928980445, "loss": 1.4132, "step": 6728 }, { "epoch": 0.31883439943141434, "grad_norm": 0.515625, "learning_rate": 0.0001539769231089515, "loss": 0.7271, "step": 6729 }, { "epoch": 0.31888178156834873, "grad_norm": 0.66796875, "learning_rate": 0.00015396438573094792, "loss": 0.0687, "step": 6730 }, { "epoch": 0.3189291637052831, "grad_norm": 0.314453125, "learning_rate": 0.00015395184715607174, "loss": 0.0163, "step": 6731 }, { "epoch": 0.31897654584221746, "grad_norm": 0.5390625, "learning_rate": 0.00015393930738460103, "loss": 0.6262, "step": 6732 }, { "epoch": 0.31902392797915186, "grad_norm": 1.1796875, "learning_rate": 0.00015392676641681394, "loss": 0.6423, "step": 6733 }, { "epoch": 0.31907131011608625, "grad_norm": 0.5, "learning_rate": 0.00015391422425298866, "loss": 0.9327, "step": 6734 }, { "epoch": 0.3191186922530206, "grad_norm": 0.734375, "learning_rate": 0.00015390168089340325, "loss": 0.3877, "step": 6735 }, { "epoch": 0.319166074389955, "grad_norm": 0.70703125, "learning_rate": 0.00015388913633833598, "loss": 0.8933, "step": 6736 }, { "epoch": 0.31921345652688937, "grad_norm": 0.796875, "learning_rate": 0.0001538765905880651, "loss": 0.6484, "step": 6737 }, { "epoch": 0.31926083866382376, "grad_norm": 0.59765625, "learning_rate": 0.00015386404364286884, "loss": 0.1907, "step": 6738 }, { "epoch": 0.3193082208007581, "grad_norm": 0.6640625, "learning_rate": 0.00015385149550302543, "loss": 1.6144, "step": 6739 }, { "epoch": 0.3193556029376925, "grad_norm": 0.4921875, "learning_rate": 0.00015383894616881322, "loss": 0.8437, "step": 6740 }, { "epoch": 0.3194029850746269, "grad_norm": 0.70703125, "learning_rate": 0.0001538263956405105, "loss": 1.4197, "step": 6741 }, { "epoch": 0.3194503672115612, "grad_norm": 0.7890625, "learning_rate": 0.0001538138439183957, "loss": 1.0557, "step": 6742 }, { "epoch": 0.3194977493484956, "grad_norm": 0.51953125, "learning_rate": 0.00015380129100274714, "loss": 0.5517, "step": 6743 }, { "epoch": 0.31954513148543, "grad_norm": 0.8046875, "learning_rate": 0.00015378873689384328, "loss": 1.4355, "step": 6744 }, { "epoch": 0.31959251362236435, "grad_norm": 0.2265625, "learning_rate": 0.0001537761815919625, "loss": 0.1077, "step": 6745 }, { "epoch": 0.31963989575929874, "grad_norm": 0.98828125, "learning_rate": 0.00015376362509738332, "loss": 0.5584, "step": 6746 }, { "epoch": 0.31968727789623314, "grad_norm": 0.69140625, "learning_rate": 0.0001537510674103842, "loss": 0.9827, "step": 6747 }, { "epoch": 0.3197346600331675, "grad_norm": 0.69140625, "learning_rate": 0.00015373850853124362, "loss": 0.7614, "step": 6748 }, { "epoch": 0.31978204217010187, "grad_norm": 0.48046875, "learning_rate": 0.00015372594846024018, "loss": 0.6042, "step": 6749 }, { "epoch": 0.31982942430703626, "grad_norm": 0.6953125, "learning_rate": 0.00015371338719765242, "loss": 0.6404, "step": 6750 }, { "epoch": 0.3198768064439706, "grad_norm": 0.58203125, "learning_rate": 0.00015370082474375896, "loss": 0.7974, "step": 6751 }, { "epoch": 0.319924188580905, "grad_norm": 0.119140625, "learning_rate": 0.00015368826109883837, "loss": 0.0118, "step": 6752 }, { "epoch": 0.3199715707178394, "grad_norm": 0.6796875, "learning_rate": 0.0001536756962631694, "loss": 0.9106, "step": 6753 }, { "epoch": 0.3200189528547738, "grad_norm": 0.376953125, "learning_rate": 0.00015366313023703061, "loss": 0.0626, "step": 6754 }, { "epoch": 0.3200663349917081, "grad_norm": 0.76953125, "learning_rate": 0.00015365056302070077, "loss": 0.9871, "step": 6755 }, { "epoch": 0.3201137171286425, "grad_norm": 0.62109375, "learning_rate": 0.0001536379946144586, "loss": 0.7532, "step": 6756 }, { "epoch": 0.3201610992655769, "grad_norm": 0.056640625, "learning_rate": 0.0001536254250185828, "loss": 0.0051, "step": 6757 }, { "epoch": 0.32020848140251124, "grad_norm": 0.9921875, "learning_rate": 0.00015361285423335223, "loss": 1.1125, "step": 6758 }, { "epoch": 0.32025586353944563, "grad_norm": 0.78125, "learning_rate": 0.00015360028225904567, "loss": 0.8966, "step": 6759 }, { "epoch": 0.32030324567638, "grad_norm": 0.62890625, "learning_rate": 0.00015358770909594188, "loss": 0.189, "step": 6760 }, { "epoch": 0.32035062781331436, "grad_norm": 0.59765625, "learning_rate": 0.00015357513474431985, "loss": 0.7883, "step": 6761 }, { "epoch": 0.32039800995024875, "grad_norm": 0.69921875, "learning_rate": 0.0001535625592044584, "loss": 1.0491, "step": 6762 }, { "epoch": 0.32044539208718315, "grad_norm": 0.58203125, "learning_rate": 0.0001535499824766364, "loss": 0.8326, "step": 6763 }, { "epoch": 0.3204927742241175, "grad_norm": 0.59375, "learning_rate": 0.0001535374045611328, "loss": 1.5672, "step": 6764 }, { "epoch": 0.3205401563610519, "grad_norm": 0.6328125, "learning_rate": 0.00015352482545822666, "loss": 0.678, "step": 6765 }, { "epoch": 0.32058753849798627, "grad_norm": 0.17578125, "learning_rate": 0.00015351224516819688, "loss": 0.1243, "step": 6766 }, { "epoch": 0.32063492063492066, "grad_norm": 0.9921875, "learning_rate": 0.00015349966369132247, "loss": 1.1682, "step": 6767 }, { "epoch": 0.320682302771855, "grad_norm": 0.7890625, "learning_rate": 0.00015348708102788254, "loss": 1.3193, "step": 6768 }, { "epoch": 0.3207296849087894, "grad_norm": 0.0240478515625, "learning_rate": 0.0001534744971781561, "loss": 0.0012, "step": 6769 }, { "epoch": 0.3207770670457238, "grad_norm": 0.64453125, "learning_rate": 0.0001534619121424223, "loss": 1.0788, "step": 6770 }, { "epoch": 0.3208244491826581, "grad_norm": 0.30078125, "learning_rate": 0.00015344932592096022, "loss": 0.1021, "step": 6771 }, { "epoch": 0.3208718313195925, "grad_norm": 0.78125, "learning_rate": 0.00015343673851404903, "loss": 1.2014, "step": 6772 }, { "epoch": 0.3209192134565269, "grad_norm": 0.640625, "learning_rate": 0.00015342414992196787, "loss": 0.716, "step": 6773 }, { "epoch": 0.32096659559346125, "grad_norm": 0.84375, "learning_rate": 0.00015341156014499598, "loss": 1.0394, "step": 6774 }, { "epoch": 0.32101397773039564, "grad_norm": 0.59765625, "learning_rate": 0.00015339896918341257, "loss": 0.9286, "step": 6775 }, { "epoch": 0.32106135986733003, "grad_norm": 0.8046875, "learning_rate": 0.00015338637703749694, "loss": 1.0344, "step": 6776 }, { "epoch": 0.32110874200426437, "grad_norm": 0.609375, "learning_rate": 0.00015337378370752831, "loss": 0.751, "step": 6777 }, { "epoch": 0.32115612414119876, "grad_norm": 0.61328125, "learning_rate": 0.00015336118919378601, "loss": 1.0953, "step": 6778 }, { "epoch": 0.32120350627813316, "grad_norm": 0.6171875, "learning_rate": 0.00015334859349654936, "loss": 0.9351, "step": 6779 }, { "epoch": 0.3212508884150675, "grad_norm": 0.2353515625, "learning_rate": 0.00015333599661609775, "loss": 0.0137, "step": 6780 }, { "epoch": 0.3212982705520019, "grad_norm": 0.09912109375, "learning_rate": 0.00015332339855271052, "loss": 0.0023, "step": 6781 }, { "epoch": 0.3213456526889363, "grad_norm": 0.54296875, "learning_rate": 0.0001533107993066671, "loss": 1.0246, "step": 6782 }, { "epoch": 0.32139303482587067, "grad_norm": 0.703125, "learning_rate": 0.00015329819887824697, "loss": 0.8105, "step": 6783 }, { "epoch": 0.321440416962805, "grad_norm": 0.162109375, "learning_rate": 0.00015328559726772957, "loss": 0.0319, "step": 6784 }, { "epoch": 0.3214877990997394, "grad_norm": 0.193359375, "learning_rate": 0.00015327299447539435, "loss": 0.1445, "step": 6785 }, { "epoch": 0.3215351812366738, "grad_norm": 0.5703125, "learning_rate": 0.00015326039050152086, "loss": 0.6866, "step": 6786 }, { "epoch": 0.32158256337360813, "grad_norm": 0.79296875, "learning_rate": 0.00015324778534638863, "loss": 1.0162, "step": 6787 }, { "epoch": 0.3216299455105425, "grad_norm": 0.91796875, "learning_rate": 0.00015323517901027727, "loss": 1.3541, "step": 6788 }, { "epoch": 0.3216773276474769, "grad_norm": 0.7578125, "learning_rate": 0.0001532225714934663, "loss": 0.7535, "step": 6789 }, { "epoch": 0.32172470978441126, "grad_norm": 0.248046875, "learning_rate": 0.00015320996279623544, "loss": 0.1705, "step": 6790 }, { "epoch": 0.32177209192134565, "grad_norm": 0.765625, "learning_rate": 0.00015319735291886423, "loss": 1.1241, "step": 6791 }, { "epoch": 0.32181947405828004, "grad_norm": 0.73046875, "learning_rate": 0.00015318474186163243, "loss": 1.0389, "step": 6792 }, { "epoch": 0.3218668561952144, "grad_norm": 0.7734375, "learning_rate": 0.00015317212962481967, "loss": 1.0484, "step": 6793 }, { "epoch": 0.32191423833214877, "grad_norm": 0.6328125, "learning_rate": 0.00015315951620870573, "loss": 1.4084, "step": 6794 }, { "epoch": 0.32196162046908317, "grad_norm": 0.6328125, "learning_rate": 0.00015314690161357037, "loss": 0.9202, "step": 6795 }, { "epoch": 0.32200900260601756, "grad_norm": 0.74609375, "learning_rate": 0.0001531342858396933, "loss": 0.7585, "step": 6796 }, { "epoch": 0.3220563847429519, "grad_norm": 0.58984375, "learning_rate": 0.00015312166888735437, "loss": 1.2135, "step": 6797 }, { "epoch": 0.3221037668798863, "grad_norm": 0.330078125, "learning_rate": 0.00015310905075683343, "loss": 0.1361, "step": 6798 }, { "epoch": 0.3221511490168207, "grad_norm": 0.68359375, "learning_rate": 0.00015309643144841034, "loss": 1.1331, "step": 6799 }, { "epoch": 0.322198531153755, "grad_norm": 0.1416015625, "learning_rate": 0.0001530838109623649, "loss": 0.0115, "step": 6800 }, { "epoch": 0.3222459132906894, "grad_norm": 0.5078125, "learning_rate": 0.0001530711892989771, "loss": 0.2296, "step": 6801 }, { "epoch": 0.3222932954276238, "grad_norm": 0.56640625, "learning_rate": 0.00015305856645852686, "loss": 1.0299, "step": 6802 }, { "epoch": 0.32234067756455814, "grad_norm": 0.4296875, "learning_rate": 0.00015304594244129412, "loss": 0.1335, "step": 6803 }, { "epoch": 0.32238805970149254, "grad_norm": 0.004669189453125, "learning_rate": 0.00015303331724755885, "loss": 0.0002, "step": 6804 }, { "epoch": 0.32243544183842693, "grad_norm": 0.625, "learning_rate": 0.0001530206908776011, "loss": 1.3244, "step": 6805 }, { "epoch": 0.32248282397536127, "grad_norm": 0.33984375, "learning_rate": 0.00015300806333170094, "loss": 0.1052, "step": 6806 }, { "epoch": 0.32253020611229566, "grad_norm": 0.703125, "learning_rate": 0.00015299543461013836, "loss": 0.8976, "step": 6807 }, { "epoch": 0.32257758824923005, "grad_norm": 0.5703125, "learning_rate": 0.00015298280471319353, "loss": 0.1655, "step": 6808 }, { "epoch": 0.3226249703861644, "grad_norm": 0.10107421875, "learning_rate": 0.00015297017364114649, "loss": 0.0132, "step": 6809 }, { "epoch": 0.3226723525230988, "grad_norm": 0.7421875, "learning_rate": 0.00015295754139427742, "loss": 0.7851, "step": 6810 }, { "epoch": 0.3227197346600332, "grad_norm": 0.6875, "learning_rate": 0.0001529449079728665, "loss": 1.1728, "step": 6811 }, { "epoch": 0.32276711679696757, "grad_norm": 0.53515625, "learning_rate": 0.00015293227337719395, "loss": 0.7953, "step": 6812 }, { "epoch": 0.3228144989339019, "grad_norm": 0.443359375, "learning_rate": 0.00015291963760753988, "loss": 0.4337, "step": 6813 }, { "epoch": 0.3228618810708363, "grad_norm": 0.177734375, "learning_rate": 0.0001529070006641847, "loss": 0.0239, "step": 6814 }, { "epoch": 0.3229092632077707, "grad_norm": 0.6015625, "learning_rate": 0.00015289436254740853, "loss": 1.0518, "step": 6815 }, { "epoch": 0.32295664534470503, "grad_norm": 0.7109375, "learning_rate": 0.00015288172325749177, "loss": 0.6293, "step": 6816 }, { "epoch": 0.3230040274816394, "grad_norm": 0.5390625, "learning_rate": 0.00015286908279471472, "loss": 0.7351, "step": 6817 }, { "epoch": 0.3230514096185738, "grad_norm": 0.59375, "learning_rate": 0.00015285644115935773, "loss": 0.9188, "step": 6818 }, { "epoch": 0.32309879175550815, "grad_norm": 0.3671875, "learning_rate": 0.00015284379835170118, "loss": 0.0971, "step": 6819 }, { "epoch": 0.32314617389244255, "grad_norm": 1.0390625, "learning_rate": 0.00015283115437202542, "loss": 0.5676, "step": 6820 }, { "epoch": 0.32319355602937694, "grad_norm": 0.57421875, "learning_rate": 0.00015281850922061095, "loss": 0.9162, "step": 6821 }, { "epoch": 0.3232409381663113, "grad_norm": 0.7109375, "learning_rate": 0.00015280586289773823, "loss": 1.3716, "step": 6822 }, { "epoch": 0.32328832030324567, "grad_norm": 0.62890625, "learning_rate": 0.0001527932154036877, "loss": 0.3771, "step": 6823 }, { "epoch": 0.32333570244018006, "grad_norm": 0.62109375, "learning_rate": 0.0001527805667387399, "loss": 0.4614, "step": 6824 }, { "epoch": 0.32338308457711445, "grad_norm": 0.75, "learning_rate": 0.00015276791690317534, "loss": 0.5965, "step": 6825 }, { "epoch": 0.3234304667140488, "grad_norm": 0.78125, "learning_rate": 0.00015275526589727463, "loss": 1.106, "step": 6826 }, { "epoch": 0.3234778488509832, "grad_norm": 0.59375, "learning_rate": 0.00015274261372131824, "loss": 1.1501, "step": 6827 }, { "epoch": 0.3235252309879176, "grad_norm": 0.2197265625, "learning_rate": 0.00015272996037558688, "loss": 0.0227, "step": 6828 }, { "epoch": 0.3235726131248519, "grad_norm": 0.53515625, "learning_rate": 0.00015271730586036118, "loss": 0.9243, "step": 6829 }, { "epoch": 0.3236199952617863, "grad_norm": 0.73828125, "learning_rate": 0.0001527046501759218, "loss": 0.1782, "step": 6830 }, { "epoch": 0.3236673773987207, "grad_norm": 0.57421875, "learning_rate": 0.0001526919933225494, "loss": 0.1422, "step": 6831 }, { "epoch": 0.32371475953565504, "grad_norm": 0.72265625, "learning_rate": 0.0001526793353005247, "loss": 1.4339, "step": 6832 }, { "epoch": 0.32376214167258943, "grad_norm": 0.50390625, "learning_rate": 0.00015266667611012847, "loss": 0.9213, "step": 6833 }, { "epoch": 0.3238095238095238, "grad_norm": 0.5234375, "learning_rate": 0.00015265401575164148, "loss": 0.6695, "step": 6834 }, { "epoch": 0.32385690594645816, "grad_norm": 0.51171875, "learning_rate": 0.00015264135422534447, "loss": 1.099, "step": 6835 }, { "epoch": 0.32390428808339256, "grad_norm": 1.40625, "learning_rate": 0.0001526286915315183, "loss": 1.2299, "step": 6836 }, { "epoch": 0.32395167022032695, "grad_norm": 0.609375, "learning_rate": 0.00015261602767044382, "loss": 0.6742, "step": 6837 }, { "epoch": 0.3239990523572613, "grad_norm": 0.65625, "learning_rate": 0.00015260336264240188, "loss": 1.0091, "step": 6838 }, { "epoch": 0.3240464344941957, "grad_norm": 0.21875, "learning_rate": 0.0001525906964476734, "loss": 0.1714, "step": 6839 }, { "epoch": 0.32409381663113007, "grad_norm": 0.7578125, "learning_rate": 0.00015257802908653928, "loss": 0.1054, "step": 6840 }, { "epoch": 0.32414119876806446, "grad_norm": 0.00323486328125, "learning_rate": 0.0001525653605592805, "loss": 0.0003, "step": 6841 }, { "epoch": 0.3241885809049988, "grad_norm": 0.169921875, "learning_rate": 0.000152552690866178, "loss": 0.1259, "step": 6842 }, { "epoch": 0.3242359630419332, "grad_norm": 0.0028228759765625, "learning_rate": 0.0001525400200075128, "loss": 0.0002, "step": 6843 }, { "epoch": 0.3242833451788676, "grad_norm": 0.96484375, "learning_rate": 0.00015252734798356588, "loss": 1.4238, "step": 6844 }, { "epoch": 0.3243307273158019, "grad_norm": 0.0400390625, "learning_rate": 0.00015251467479461837, "loss": 0.0015, "step": 6845 }, { "epoch": 0.3243781094527363, "grad_norm": 0.7734375, "learning_rate": 0.0001525020004409513, "loss": 0.3023, "step": 6846 }, { "epoch": 0.3244254915896707, "grad_norm": 0.66796875, "learning_rate": 0.0001524893249228458, "loss": 1.0292, "step": 6847 }, { "epoch": 0.32447287372660505, "grad_norm": 0.71875, "learning_rate": 0.00015247664824058295, "loss": 0.2902, "step": 6848 }, { "epoch": 0.32452025586353944, "grad_norm": 0.765625, "learning_rate": 0.00015246397039444398, "loss": 0.9553, "step": 6849 }, { "epoch": 0.32456763800047383, "grad_norm": 0.66015625, "learning_rate": 0.00015245129138471, "loss": 0.8417, "step": 6850 }, { "epoch": 0.3246150201374082, "grad_norm": 0.671875, "learning_rate": 0.00015243861121166222, "loss": 0.9548, "step": 6851 }, { "epoch": 0.32466240227434257, "grad_norm": 0.5703125, "learning_rate": 0.00015242592987558192, "loss": 0.9847, "step": 6852 }, { "epoch": 0.32470978441127696, "grad_norm": 0.7890625, "learning_rate": 0.00015241324737675038, "loss": 1.4285, "step": 6853 }, { "epoch": 0.32475716654821135, "grad_norm": 0.59765625, "learning_rate": 0.00015240056371544877, "loss": 1.125, "step": 6854 }, { "epoch": 0.3248045486851457, "grad_norm": 0.5546875, "learning_rate": 0.0001523878788919585, "loss": 0.7167, "step": 6855 }, { "epoch": 0.3248519308220801, "grad_norm": 0.45703125, "learning_rate": 0.0001523751929065609, "loss": 0.7513, "step": 6856 }, { "epoch": 0.3248993129590145, "grad_norm": 1.0, "learning_rate": 0.0001523625057595373, "loss": 1.1208, "step": 6857 }, { "epoch": 0.3249466950959488, "grad_norm": 0.71875, "learning_rate": 0.0001523498174511691, "loss": 0.7778, "step": 6858 }, { "epoch": 0.3249940772328832, "grad_norm": 0.0537109375, "learning_rate": 0.0001523371279817377, "loss": 0.0056, "step": 6859 }, { "epoch": 0.3250414593698176, "grad_norm": 0.890625, "learning_rate": 0.00015232443735152456, "loss": 0.9183, "step": 6860 }, { "epoch": 0.32508884150675194, "grad_norm": 0.625, "learning_rate": 0.00015231174556081109, "loss": 0.7987, "step": 6861 }, { "epoch": 0.32513622364368633, "grad_norm": 0.482421875, "learning_rate": 0.00015229905260987886, "loss": 0.1747, "step": 6862 }, { "epoch": 0.3251836057806207, "grad_norm": 0.640625, "learning_rate": 0.00015228635849900935, "loss": 1.0784, "step": 6863 }, { "epoch": 0.32523098791755506, "grad_norm": 0.6015625, "learning_rate": 0.00015227366322848407, "loss": 0.0737, "step": 6864 }, { "epoch": 0.32527837005448945, "grad_norm": 0.380859375, "learning_rate": 0.00015226096679858467, "loss": 0.0539, "step": 6865 }, { "epoch": 0.32532575219142384, "grad_norm": 0.74609375, "learning_rate": 0.00015224826920959264, "loss": 1.331, "step": 6866 }, { "epoch": 0.3253731343283582, "grad_norm": 0.53515625, "learning_rate": 0.00015223557046178967, "loss": 0.8021, "step": 6867 }, { "epoch": 0.3254205164652926, "grad_norm": 0.6171875, "learning_rate": 0.0001522228705554574, "loss": 0.6568, "step": 6868 }, { "epoch": 0.32546789860222697, "grad_norm": 0.92578125, "learning_rate": 0.00015221016949087748, "loss": 1.5958, "step": 6869 }, { "epoch": 0.32551528073916136, "grad_norm": 0.88671875, "learning_rate": 0.0001521974672683316, "loss": 1.0054, "step": 6870 }, { "epoch": 0.3255626628760957, "grad_norm": 0.19921875, "learning_rate": 0.00015218476388810151, "loss": 0.1415, "step": 6871 }, { "epoch": 0.3256100450130301, "grad_norm": 0.65234375, "learning_rate": 0.00015217205935046896, "loss": 0.9692, "step": 6872 }, { "epoch": 0.3256574271499645, "grad_norm": 0.515625, "learning_rate": 0.00015215935365571568, "loss": 0.6272, "step": 6873 }, { "epoch": 0.3257048092868988, "grad_norm": 0.81640625, "learning_rate": 0.0001521466468041235, "loss": 1.0321, "step": 6874 }, { "epoch": 0.3257521914238332, "grad_norm": 0.61328125, "learning_rate": 0.0001521339387959742, "loss": 1.1456, "step": 6875 }, { "epoch": 0.3257995735607676, "grad_norm": 0.97265625, "learning_rate": 0.00015212122963154974, "loss": 0.2301, "step": 6876 }, { "epoch": 0.32584695569770195, "grad_norm": 0.138671875, "learning_rate": 0.00015210851931113186, "loss": 0.0103, "step": 6877 }, { "epoch": 0.32589433783463634, "grad_norm": 0.5859375, "learning_rate": 0.00015209580783500255, "loss": 1.1929, "step": 6878 }, { "epoch": 0.32594171997157073, "grad_norm": 0.6796875, "learning_rate": 0.00015208309520344372, "loss": 1.2416, "step": 6879 }, { "epoch": 0.32598910210850507, "grad_norm": 0.73828125, "learning_rate": 0.0001520703814167373, "loss": 1.2499, "step": 6880 }, { "epoch": 0.32603648424543946, "grad_norm": 0.734375, "learning_rate": 0.0001520576664751653, "loss": 0.8114, "step": 6881 }, { "epoch": 0.32608386638237385, "grad_norm": 0.2060546875, "learning_rate": 0.0001520449503790097, "loss": 0.1669, "step": 6882 }, { "epoch": 0.32613124851930825, "grad_norm": 0.51171875, "learning_rate": 0.00015203223312855254, "loss": 0.5363, "step": 6883 }, { "epoch": 0.3261786306562426, "grad_norm": 0.7109375, "learning_rate": 0.00015201951472407584, "loss": 1.1362, "step": 6884 }, { "epoch": 0.326226012793177, "grad_norm": 0.1767578125, "learning_rate": 0.00015200679516586173, "loss": 0.0285, "step": 6885 }, { "epoch": 0.32627339493011137, "grad_norm": 0.62109375, "learning_rate": 0.00015199407445419235, "loss": 1.0288, "step": 6886 }, { "epoch": 0.3263207770670457, "grad_norm": 0.546875, "learning_rate": 0.00015198135258934968, "loss": 1.0881, "step": 6887 }, { "epoch": 0.3263681592039801, "grad_norm": 0.455078125, "learning_rate": 0.00015196862957161605, "loss": 0.0375, "step": 6888 }, { "epoch": 0.3264155413409145, "grad_norm": 0.0458984375, "learning_rate": 0.00015195590540127357, "loss": 0.0011, "step": 6889 }, { "epoch": 0.32646292347784883, "grad_norm": 0.703125, "learning_rate": 0.00015194318007860443, "loss": 0.862, "step": 6890 }, { "epoch": 0.3265103056147832, "grad_norm": 0.66015625, "learning_rate": 0.00015193045360389088, "loss": 1.0593, "step": 6891 }, { "epoch": 0.3265576877517176, "grad_norm": 0.390625, "learning_rate": 0.0001519177259774152, "loss": 0.0673, "step": 6892 }, { "epoch": 0.32660506988865196, "grad_norm": 0.474609375, "learning_rate": 0.00015190499719945963, "loss": 0.0542, "step": 6893 }, { "epoch": 0.32665245202558635, "grad_norm": 0.64453125, "learning_rate": 0.00015189226727030652, "loss": 1.0707, "step": 6894 }, { "epoch": 0.32669983416252074, "grad_norm": 0.59375, "learning_rate": 0.00015187953619023823, "loss": 1.0912, "step": 6895 }, { "epoch": 0.3267472162994551, "grad_norm": 0.0966796875, "learning_rate": 0.00015186680395953706, "loss": 0.0081, "step": 6896 }, { "epoch": 0.32679459843638947, "grad_norm": 1.2109375, "learning_rate": 0.0001518540705784854, "loss": 1.0111, "step": 6897 }, { "epoch": 0.32684198057332386, "grad_norm": 0.56640625, "learning_rate": 0.00015184133604736571, "loss": 1.0445, "step": 6898 }, { "epoch": 0.32688936271025826, "grad_norm": 0.6953125, "learning_rate": 0.00015182860036646041, "loss": 0.9122, "step": 6899 }, { "epoch": 0.3269367448471926, "grad_norm": 0.7265625, "learning_rate": 0.00015181586353605196, "loss": 1.3648, "step": 6900 }, { "epoch": 0.326984126984127, "grad_norm": 0.447265625, "learning_rate": 0.0001518031255564228, "loss": 0.7088, "step": 6901 }, { "epoch": 0.3270315091210614, "grad_norm": 0.65234375, "learning_rate": 0.00015179038642785557, "loss": 1.4338, "step": 6902 }, { "epoch": 0.3270788912579957, "grad_norm": 0.625, "learning_rate": 0.0001517776461506327, "loss": 0.8348, "step": 6903 }, { "epoch": 0.3271262733949301, "grad_norm": 0.578125, "learning_rate": 0.00015176490472503678, "loss": 0.9255, "step": 6904 }, { "epoch": 0.3271736555318645, "grad_norm": 0.349609375, "learning_rate": 0.00015175216215135043, "loss": 0.035, "step": 6905 }, { "epoch": 0.32722103766879884, "grad_norm": 0.66015625, "learning_rate": 0.00015173941842985627, "loss": 1.0767, "step": 6906 }, { "epoch": 0.32726841980573323, "grad_norm": 0.54296875, "learning_rate": 0.00015172667356083686, "loss": 0.8763, "step": 6907 }, { "epoch": 0.32731580194266763, "grad_norm": 0.64453125, "learning_rate": 0.00015171392754457494, "loss": 0.6437, "step": 6908 }, { "epoch": 0.32736318407960197, "grad_norm": 0.19921875, "learning_rate": 0.00015170118038135318, "loss": 0.1531, "step": 6909 }, { "epoch": 0.32741056621653636, "grad_norm": 0.458984375, "learning_rate": 0.00015168843207145435, "loss": 0.0325, "step": 6910 }, { "epoch": 0.32745794835347075, "grad_norm": 0.66015625, "learning_rate": 0.00015167568261516116, "loss": 1.0991, "step": 6911 }, { "epoch": 0.32750533049040514, "grad_norm": 0.73828125, "learning_rate": 0.00015166293201275633, "loss": 0.8243, "step": 6912 }, { "epoch": 0.3275527126273395, "grad_norm": 0.83203125, "learning_rate": 0.0001516501802645227, "loss": 1.4578, "step": 6913 }, { "epoch": 0.3276000947642739, "grad_norm": 0.55859375, "learning_rate": 0.00015163742737074314, "loss": 0.8581, "step": 6914 }, { "epoch": 0.32764747690120827, "grad_norm": 0.734375, "learning_rate": 0.0001516246733317004, "loss": 0.9694, "step": 6915 }, { "epoch": 0.3276948590381426, "grad_norm": 0.7421875, "learning_rate": 0.0001516119181476774, "loss": 0.5541, "step": 6916 }, { "epoch": 0.327742241175077, "grad_norm": 0.73828125, "learning_rate": 0.000151599161818957, "loss": 1.2393, "step": 6917 }, { "epoch": 0.3277896233120114, "grad_norm": 0.66015625, "learning_rate": 0.0001515864043458222, "loss": 1.134, "step": 6918 }, { "epoch": 0.32783700544894573, "grad_norm": 0.55078125, "learning_rate": 0.0001515736457285559, "loss": 0.6624, "step": 6919 }, { "epoch": 0.3278843875858801, "grad_norm": 0.69921875, "learning_rate": 0.00015156088596744103, "loss": 0.6075, "step": 6920 }, { "epoch": 0.3279317697228145, "grad_norm": 0.6640625, "learning_rate": 0.00015154812506276065, "loss": 1.0984, "step": 6921 }, { "epoch": 0.32797915185974885, "grad_norm": 0.67578125, "learning_rate": 0.00015153536301479775, "loss": 0.9776, "step": 6922 }, { "epoch": 0.32802653399668324, "grad_norm": 0.703125, "learning_rate": 0.00015152259982383542, "loss": 1.2544, "step": 6923 }, { "epoch": 0.32807391613361764, "grad_norm": 0.1396484375, "learning_rate": 0.00015150983549015666, "loss": 0.0135, "step": 6924 }, { "epoch": 0.328121298270552, "grad_norm": 0.5859375, "learning_rate": 0.00015149707001404464, "loss": 1.0113, "step": 6925 }, { "epoch": 0.32816868040748637, "grad_norm": 1.125, "learning_rate": 0.0001514843033957825, "loss": 1.1702, "step": 6926 }, { "epoch": 0.32821606254442076, "grad_norm": 0.11865234375, "learning_rate": 0.00015147153563565332, "loss": 0.0092, "step": 6927 }, { "epoch": 0.32826344468135515, "grad_norm": 0.10009765625, "learning_rate": 0.00015145876673394027, "loss": 0.017, "step": 6928 }, { "epoch": 0.3283108268182895, "grad_norm": 0.8828125, "learning_rate": 0.00015144599669092662, "loss": 1.0963, "step": 6929 }, { "epoch": 0.3283582089552239, "grad_norm": 0.703125, "learning_rate": 0.00015143322550689554, "loss": 1.3804, "step": 6930 }, { "epoch": 0.3284055910921583, "grad_norm": 0.57421875, "learning_rate": 0.00015142045318213031, "loss": 0.2335, "step": 6931 }, { "epoch": 0.3284529732290926, "grad_norm": 0.55859375, "learning_rate": 0.0001514076797169142, "loss": 0.9878, "step": 6932 }, { "epoch": 0.328500355366027, "grad_norm": 0.46484375, "learning_rate": 0.00015139490511153055, "loss": 0.0259, "step": 6933 }, { "epoch": 0.3285477375029614, "grad_norm": 1.1171875, "learning_rate": 0.00015138212936626258, "loss": 0.8695, "step": 6934 }, { "epoch": 0.32859511963989574, "grad_norm": 0.859375, "learning_rate": 0.00015136935248139376, "loss": 1.6176, "step": 6935 }, { "epoch": 0.32864250177683013, "grad_norm": 0.201171875, "learning_rate": 0.00015135657445720742, "loss": 0.122, "step": 6936 }, { "epoch": 0.3286898839137645, "grad_norm": 0.54296875, "learning_rate": 0.00015134379529398693, "loss": 0.8374, "step": 6937 }, { "epoch": 0.32873726605069886, "grad_norm": 0.55859375, "learning_rate": 0.00015133101499201576, "loss": 0.9414, "step": 6938 }, { "epoch": 0.32878464818763325, "grad_norm": 0.5859375, "learning_rate": 0.00015131823355157736, "loss": 0.7317, "step": 6939 }, { "epoch": 0.32883203032456765, "grad_norm": 0.625, "learning_rate": 0.00015130545097295518, "loss": 1.0873, "step": 6940 }, { "epoch": 0.32887941246150204, "grad_norm": 0.306640625, "learning_rate": 0.0001512926672564328, "loss": 0.0427, "step": 6941 }, { "epoch": 0.3289267945984364, "grad_norm": 0.5546875, "learning_rate": 0.00015127988240229364, "loss": 0.9747, "step": 6942 }, { "epoch": 0.32897417673537077, "grad_norm": 0.77734375, "learning_rate": 0.00015126709641082132, "loss": 1.2964, "step": 6943 }, { "epoch": 0.32902155887230516, "grad_norm": 0.0018157958984375, "learning_rate": 0.00015125430928229942, "loss": 0.0002, "step": 6944 }, { "epoch": 0.3290689410092395, "grad_norm": 0.64453125, "learning_rate": 0.00015124152101701155, "loss": 1.0356, "step": 6945 }, { "epoch": 0.3291163231461739, "grad_norm": 0.73828125, "learning_rate": 0.00015122873161524126, "loss": 1.1814, "step": 6946 }, { "epoch": 0.3291637052831083, "grad_norm": 0.486328125, "learning_rate": 0.0001512159410772723, "loss": 0.0803, "step": 6947 }, { "epoch": 0.3292110874200426, "grad_norm": 0.76953125, "learning_rate": 0.00015120314940338835, "loss": 0.8302, "step": 6948 }, { "epoch": 0.329258469556977, "grad_norm": 0.76953125, "learning_rate": 0.00015119035659387308, "loss": 1.1037, "step": 6949 }, { "epoch": 0.3293058516939114, "grad_norm": 0.5, "learning_rate": 0.0001511775626490102, "loss": 0.5201, "step": 6950 }, { "epoch": 0.32935323383084575, "grad_norm": 0.15234375, "learning_rate": 0.00015116476756908353, "loss": 0.1235, "step": 6951 }, { "epoch": 0.32940061596778014, "grad_norm": 0.0849609375, "learning_rate": 0.0001511519713543768, "loss": 0.0051, "step": 6952 }, { "epoch": 0.32944799810471453, "grad_norm": 0.08203125, "learning_rate": 0.0001511391740051738, "loss": 0.0045, "step": 6953 }, { "epoch": 0.32949538024164887, "grad_norm": 0.59375, "learning_rate": 0.0001511263755217584, "loss": 0.3869, "step": 6954 }, { "epoch": 0.32954276237858326, "grad_norm": 0.71484375, "learning_rate": 0.00015111357590441444, "loss": 1.262, "step": 6955 }, { "epoch": 0.32959014451551766, "grad_norm": 0.25390625, "learning_rate": 0.00015110077515342586, "loss": 0.0431, "step": 6956 }, { "epoch": 0.32963752665245205, "grad_norm": 0.7421875, "learning_rate": 0.0001510879732690765, "loss": 1.046, "step": 6957 }, { "epoch": 0.3296849087893864, "grad_norm": 0.2177734375, "learning_rate": 0.0001510751702516503, "loss": 0.0331, "step": 6958 }, { "epoch": 0.3297322909263208, "grad_norm": 0.77734375, "learning_rate": 0.00015106236610143122, "loss": 0.7115, "step": 6959 }, { "epoch": 0.3297796730632552, "grad_norm": 0.60546875, "learning_rate": 0.00015104956081870324, "loss": 0.5726, "step": 6960 }, { "epoch": 0.3298270552001895, "grad_norm": 0.734375, "learning_rate": 0.0001510367544037504, "loss": 0.2715, "step": 6961 }, { "epoch": 0.3298744373371239, "grad_norm": 0.62109375, "learning_rate": 0.00015102394685685667, "loss": 0.0544, "step": 6962 }, { "epoch": 0.3299218194740583, "grad_norm": 0.25390625, "learning_rate": 0.0001510111381783062, "loss": 0.0501, "step": 6963 }, { "epoch": 0.32996920161099264, "grad_norm": 0.98828125, "learning_rate": 0.000150998328368383, "loss": 0.9406, "step": 6964 }, { "epoch": 0.33001658374792703, "grad_norm": 0.62109375, "learning_rate": 0.0001509855174273712, "loss": 0.8347, "step": 6965 }, { "epoch": 0.3300639658848614, "grad_norm": 0.609375, "learning_rate": 0.00015097270535555487, "loss": 0.9301, "step": 6966 }, { "epoch": 0.33011134802179576, "grad_norm": 0.46484375, "learning_rate": 0.00015095989215321832, "loss": 0.7732, "step": 6967 }, { "epoch": 0.33015873015873015, "grad_norm": 0.6640625, "learning_rate": 0.0001509470778206456, "loss": 1.3104, "step": 6968 }, { "epoch": 0.33020611229566454, "grad_norm": 0.26171875, "learning_rate": 0.00015093426235812096, "loss": 0.162, "step": 6969 }, { "epoch": 0.33025349443259894, "grad_norm": 0.91796875, "learning_rate": 0.0001509214457659286, "loss": 1.7801, "step": 6970 }, { "epoch": 0.3303008765695333, "grad_norm": 0.6484375, "learning_rate": 0.00015090862804435288, "loss": 1.7799, "step": 6971 }, { "epoch": 0.33034825870646767, "grad_norm": 0.52734375, "learning_rate": 0.00015089580919367798, "loss": 0.642, "step": 6972 }, { "epoch": 0.33039564084340206, "grad_norm": 0.80859375, "learning_rate": 0.00015088298921418826, "loss": 1.085, "step": 6973 }, { "epoch": 0.3304430229803364, "grad_norm": 0.84765625, "learning_rate": 0.00015087016810616803, "loss": 1.5359, "step": 6974 }, { "epoch": 0.3304904051172708, "grad_norm": 1.421875, "learning_rate": 0.00015085734586990166, "loss": 1.1087, "step": 6975 }, { "epoch": 0.3305377872542052, "grad_norm": 0.10205078125, "learning_rate": 0.00015084452250567352, "loss": 0.0126, "step": 6976 }, { "epoch": 0.3305851693911395, "grad_norm": 0.2216796875, "learning_rate": 0.00015083169801376802, "loss": 0.0923, "step": 6977 }, { "epoch": 0.3306325515280739, "grad_norm": 0.6171875, "learning_rate": 0.0001508188723944696, "loss": 1.0601, "step": 6978 }, { "epoch": 0.3306799336650083, "grad_norm": 0.578125, "learning_rate": 0.00015080604564806274, "loss": 1.2822, "step": 6979 }, { "epoch": 0.33072731580194265, "grad_norm": 0.57421875, "learning_rate": 0.0001507932177748319, "loss": 0.8857, "step": 6980 }, { "epoch": 0.33077469793887704, "grad_norm": 0.60546875, "learning_rate": 0.00015078038877506159, "loss": 0.0921, "step": 6981 }, { "epoch": 0.33082208007581143, "grad_norm": 0.71484375, "learning_rate": 0.00015076755864903632, "loss": 1.1313, "step": 6982 }, { "epoch": 0.33086946221274577, "grad_norm": 0.69140625, "learning_rate": 0.0001507547273970407, "loss": 1.3182, "step": 6983 }, { "epoch": 0.33091684434968016, "grad_norm": 0.39453125, "learning_rate": 0.0001507418950193593, "loss": 0.1749, "step": 6984 }, { "epoch": 0.33096422648661455, "grad_norm": 0.55078125, "learning_rate": 0.0001507290615162767, "loss": 0.5609, "step": 6985 }, { "epoch": 0.33101160862354895, "grad_norm": 0.353515625, "learning_rate": 0.00015071622688807757, "loss": 0.0124, "step": 6986 }, { "epoch": 0.3310589907604833, "grad_norm": 1.3359375, "learning_rate": 0.0001507033911350465, "loss": 0.259, "step": 6987 }, { "epoch": 0.3311063728974177, "grad_norm": 0.625, "learning_rate": 0.00015069055425746827, "loss": 0.8591, "step": 6988 }, { "epoch": 0.33115375503435207, "grad_norm": 0.765625, "learning_rate": 0.00015067771625562753, "loss": 1.0551, "step": 6989 }, { "epoch": 0.3312011371712864, "grad_norm": 0.54296875, "learning_rate": 0.00015066487712980898, "loss": 0.8592, "step": 6990 }, { "epoch": 0.3312485193082208, "grad_norm": 0.54296875, "learning_rate": 0.0001506520368802975, "loss": 1.0344, "step": 6991 }, { "epoch": 0.3312959014451552, "grad_norm": 0.2177734375, "learning_rate": 0.00015063919550737772, "loss": 0.0456, "step": 6992 }, { "epoch": 0.33134328358208953, "grad_norm": 0.60546875, "learning_rate": 0.00015062635301133455, "loss": 0.9304, "step": 6993 }, { "epoch": 0.3313906657190239, "grad_norm": 0.365234375, "learning_rate": 0.00015061350939245277, "loss": 0.0861, "step": 6994 }, { "epoch": 0.3314380478559583, "grad_norm": 0.435546875, "learning_rate": 0.00015060066465101733, "loss": 0.0486, "step": 6995 }, { "epoch": 0.33148542999289266, "grad_norm": 0.5625, "learning_rate": 0.000150587818787313, "loss": 0.8601, "step": 6996 }, { "epoch": 0.33153281212982705, "grad_norm": 0.00244140625, "learning_rate": 0.0001505749718016247, "loss": 0.0002, "step": 6997 }, { "epoch": 0.33158019426676144, "grad_norm": 0.66796875, "learning_rate": 0.00015056212369423747, "loss": 1.1144, "step": 6998 }, { "epoch": 0.33162757640369583, "grad_norm": 0.5078125, "learning_rate": 0.00015054927446543615, "loss": 0.8029, "step": 6999 }, { "epoch": 0.33167495854063017, "grad_norm": 0.11376953125, "learning_rate": 0.00015053642411550576, "loss": 0.0089, "step": 7000 }, { "epoch": 0.33172234067756456, "grad_norm": 0.96875, "learning_rate": 0.0001505235726447313, "loss": 0.497, "step": 7001 }, { "epoch": 0.33176972281449896, "grad_norm": 0.953125, "learning_rate": 0.00015051072005339787, "loss": 1.1711, "step": 7002 }, { "epoch": 0.3318171049514333, "grad_norm": 0.57421875, "learning_rate": 0.0001504978663417904, "loss": 0.8463, "step": 7003 }, { "epoch": 0.3318644870883677, "grad_norm": 0.6484375, "learning_rate": 0.00015048501151019412, "loss": 0.9828, "step": 7004 }, { "epoch": 0.3319118692253021, "grad_norm": 0.69140625, "learning_rate": 0.000150472155558894, "loss": 1.1101, "step": 7005 }, { "epoch": 0.3319592513622364, "grad_norm": 0.1611328125, "learning_rate": 0.00015045929848817526, "loss": 0.008, "step": 7006 }, { "epoch": 0.3320066334991708, "grad_norm": 0.703125, "learning_rate": 0.000150446440298323, "loss": 1.2718, "step": 7007 }, { "epoch": 0.3320540156361052, "grad_norm": 0.34765625, "learning_rate": 0.00015043358098962246, "loss": 0.179, "step": 7008 }, { "epoch": 0.33210139777303954, "grad_norm": 0.61328125, "learning_rate": 0.00015042072056235876, "loss": 1.006, "step": 7009 }, { "epoch": 0.33214877990997393, "grad_norm": 0.05517578125, "learning_rate": 0.00015040785901681723, "loss": 0.0034, "step": 7010 }, { "epoch": 0.3321961620469083, "grad_norm": 0.486328125, "learning_rate": 0.00015039499635328306, "loss": 0.2602, "step": 7011 }, { "epoch": 0.33224354418384267, "grad_norm": 0.01025390625, "learning_rate": 0.0001503821325720416, "loss": 0.0003, "step": 7012 }, { "epoch": 0.33229092632077706, "grad_norm": 0.251953125, "learning_rate": 0.00015036926767337803, "loss": 0.096, "step": 7013 }, { "epoch": 0.33233830845771145, "grad_norm": 0.63671875, "learning_rate": 0.0001503564016575778, "loss": 0.8911, "step": 7014 }, { "epoch": 0.33238569059464584, "grad_norm": 0.62109375, "learning_rate": 0.0001503435345249262, "loss": 0.9765, "step": 7015 }, { "epoch": 0.3324330727315802, "grad_norm": 0.7578125, "learning_rate": 0.00015033066627570863, "loss": 0.9499, "step": 7016 }, { "epoch": 0.3324804548685146, "grad_norm": 0.61328125, "learning_rate": 0.00015031779691021047, "loss": 0.7292, "step": 7017 }, { "epoch": 0.33252783700544897, "grad_norm": 0.66015625, "learning_rate": 0.00015030492642871722, "loss": 1.1517, "step": 7018 }, { "epoch": 0.3325752191423833, "grad_norm": 0.9765625, "learning_rate": 0.00015029205483151425, "loss": 1.2147, "step": 7019 }, { "epoch": 0.3326226012793177, "grad_norm": 0.58984375, "learning_rate": 0.0001502791821188871, "loss": 0.977, "step": 7020 }, { "epoch": 0.3326699834162521, "grad_norm": 0.78125, "learning_rate": 0.00015026630829112123, "loss": 1.0967, "step": 7021 }, { "epoch": 0.33271736555318643, "grad_norm": 0.5625, "learning_rate": 0.00015025343334850217, "loss": 0.9472, "step": 7022 }, { "epoch": 0.3327647476901208, "grad_norm": 0.58203125, "learning_rate": 0.00015024055729131547, "loss": 0.5209, "step": 7023 }, { "epoch": 0.3328121298270552, "grad_norm": 0.6171875, "learning_rate": 0.00015022768011984676, "loss": 1.2798, "step": 7024 }, { "epoch": 0.33285951196398955, "grad_norm": 0.125, "learning_rate": 0.0001502148018343816, "loss": 0.0117, "step": 7025 }, { "epoch": 0.33290689410092394, "grad_norm": 0.79296875, "learning_rate": 0.00015020192243520562, "loss": 0.8227, "step": 7026 }, { "epoch": 0.33295427623785834, "grad_norm": 0.173828125, "learning_rate": 0.00015018904192260445, "loss": 0.1, "step": 7027 }, { "epoch": 0.33300165837479273, "grad_norm": 0.890625, "learning_rate": 0.00015017616029686383, "loss": 0.7774, "step": 7028 }, { "epoch": 0.33304904051172707, "grad_norm": 0.515625, "learning_rate": 0.0001501632775582694, "loss": 1.027, "step": 7029 }, { "epoch": 0.33309642264866146, "grad_norm": 0.80078125, "learning_rate": 0.00015015039370710692, "loss": 1.0763, "step": 7030 }, { "epoch": 0.33314380478559585, "grad_norm": 0.70703125, "learning_rate": 0.00015013750874366208, "loss": 0.3201, "step": 7031 }, { "epoch": 0.3331911869225302, "grad_norm": 0.49609375, "learning_rate": 0.00015012462266822074, "loss": 0.0487, "step": 7032 }, { "epoch": 0.3332385690594646, "grad_norm": 0.271484375, "learning_rate": 0.00015011173548106865, "loss": 0.0261, "step": 7033 }, { "epoch": 0.333285951196399, "grad_norm": 0.484375, "learning_rate": 0.00015009884718249162, "loss": 0.4067, "step": 7034 }, { "epoch": 0.3333333333333333, "grad_norm": 0.408203125, "learning_rate": 0.00015008595777277556, "loss": 0.0487, "step": 7035 }, { "epoch": 0.3333807154702677, "grad_norm": 0.294921875, "learning_rate": 0.0001500730672522063, "loss": 0.0181, "step": 7036 }, { "epoch": 0.3334280976072021, "grad_norm": 0.71484375, "learning_rate": 0.00015006017562106973, "loss": 0.772, "step": 7037 }, { "epoch": 0.33347547974413644, "grad_norm": 0.5234375, "learning_rate": 0.0001500472828796518, "loss": 0.8465, "step": 7038 }, { "epoch": 0.33352286188107083, "grad_norm": 0.7109375, "learning_rate": 0.0001500343890282384, "loss": 0.7087, "step": 7039 }, { "epoch": 0.3335702440180052, "grad_norm": 0.2001953125, "learning_rate": 0.00015002149406711558, "loss": 0.0318, "step": 7040 }, { "epoch": 0.33361762615493956, "grad_norm": 0.2119140625, "learning_rate": 0.0001500085979965693, "loss": 0.1506, "step": 7041 }, { "epoch": 0.33366500829187395, "grad_norm": 0.76171875, "learning_rate": 0.00014999570081688558, "loss": 1.1756, "step": 7042 }, { "epoch": 0.33371239042880835, "grad_norm": 0.65234375, "learning_rate": 0.0001499828025283505, "loss": 1.231, "step": 7043 }, { "epoch": 0.33375977256574274, "grad_norm": 0.396484375, "learning_rate": 0.00014996990313125008, "loss": 0.0667, "step": 7044 }, { "epoch": 0.3338071547026771, "grad_norm": 0.6171875, "learning_rate": 0.00014995700262587043, "loss": 0.8659, "step": 7045 }, { "epoch": 0.33385453683961147, "grad_norm": 0.71484375, "learning_rate": 0.00014994410101249766, "loss": 0.8055, "step": 7046 }, { "epoch": 0.33390191897654586, "grad_norm": 0.62109375, "learning_rate": 0.00014993119829141794, "loss": 0.7001, "step": 7047 }, { "epoch": 0.3339493011134802, "grad_norm": 0.5390625, "learning_rate": 0.00014991829446291743, "loss": 0.0552, "step": 7048 }, { "epoch": 0.3339966832504146, "grad_norm": 0.8359375, "learning_rate": 0.0001499053895272823, "loss": 0.6531, "step": 7049 }, { "epoch": 0.334044065387349, "grad_norm": 0.51171875, "learning_rate": 0.0001498924834847988, "loss": 0.1737, "step": 7050 }, { "epoch": 0.3340914475242833, "grad_norm": 0.7109375, "learning_rate": 0.00014987957633575316, "loss": 0.5999, "step": 7051 }, { "epoch": 0.3341388296612177, "grad_norm": 0.765625, "learning_rate": 0.00014986666808043165, "loss": 0.149, "step": 7052 }, { "epoch": 0.3341862117981521, "grad_norm": 0.72265625, "learning_rate": 0.00014985375871912055, "loss": 0.7358, "step": 7053 }, { "epoch": 0.33423359393508645, "grad_norm": 0.515625, "learning_rate": 0.00014984084825210619, "loss": 0.6134, "step": 7054 }, { "epoch": 0.33428097607202084, "grad_norm": 0.71484375, "learning_rate": 0.0001498279366796749, "loss": 0.8201, "step": 7055 }, { "epoch": 0.33432835820895523, "grad_norm": 0.1328125, "learning_rate": 0.00014981502400211304, "loss": 0.0103, "step": 7056 }, { "epoch": 0.3343757403458896, "grad_norm": 0.69140625, "learning_rate": 0.00014980211021970702, "loss": 1.4085, "step": 7057 }, { "epoch": 0.33442312248282396, "grad_norm": 0.734375, "learning_rate": 0.00014978919533274322, "loss": 1.0654, "step": 7058 }, { "epoch": 0.33447050461975836, "grad_norm": 0.8203125, "learning_rate": 0.00014977627934150816, "loss": 0.9682, "step": 7059 }, { "epoch": 0.33451788675669275, "grad_norm": 0.7109375, "learning_rate": 0.00014976336224628822, "loss": 0.8443, "step": 7060 }, { "epoch": 0.3345652688936271, "grad_norm": 0.400390625, "learning_rate": 0.00014975044404736987, "loss": 0.4145, "step": 7061 }, { "epoch": 0.3346126510305615, "grad_norm": 1.1640625, "learning_rate": 0.00014973752474503968, "loss": 1.3228, "step": 7062 }, { "epoch": 0.3346600331674959, "grad_norm": 0.6640625, "learning_rate": 0.00014972460433958419, "loss": 0.8524, "step": 7063 }, { "epoch": 0.3347074153044302, "grad_norm": 0.703125, "learning_rate": 0.00014971168283128993, "loss": 0.8595, "step": 7064 }, { "epoch": 0.3347547974413646, "grad_norm": 0.7890625, "learning_rate": 0.00014969876022044346, "loss": 0.6905, "step": 7065 }, { "epoch": 0.334802179578299, "grad_norm": 0.48828125, "learning_rate": 0.0001496858365073315, "loss": 0.9029, "step": 7066 }, { "epoch": 0.33484956171523333, "grad_norm": 0.8046875, "learning_rate": 0.00014967291169224058, "loss": 0.9739, "step": 7067 }, { "epoch": 0.33489694385216773, "grad_norm": 0.06689453125, "learning_rate": 0.00014965998577545734, "loss": 0.0078, "step": 7068 }, { "epoch": 0.3349443259891021, "grad_norm": 0.443359375, "learning_rate": 0.00014964705875726857, "loss": 0.0475, "step": 7069 }, { "epoch": 0.33499170812603646, "grad_norm": 0.06396484375, "learning_rate": 0.00014963413063796087, "loss": 0.0081, "step": 7070 }, { "epoch": 0.33503909026297085, "grad_norm": 0.859375, "learning_rate": 0.00014962120141782104, "loss": 0.3946, "step": 7071 }, { "epoch": 0.33508647239990524, "grad_norm": 0.54296875, "learning_rate": 0.0001496082710971358, "loss": 1.0272, "step": 7072 }, { "epoch": 0.33513385453683964, "grad_norm": 0.08154296875, "learning_rate": 0.000149595339676192, "loss": 0.0063, "step": 7073 }, { "epoch": 0.335181236673774, "grad_norm": 0.68359375, "learning_rate": 0.00014958240715527636, "loss": 1.2715, "step": 7074 }, { "epoch": 0.33522861881070837, "grad_norm": 0.0517578125, "learning_rate": 0.00014956947353467578, "loss": 0.0058, "step": 7075 }, { "epoch": 0.33527600094764276, "grad_norm": 0.79296875, "learning_rate": 0.00014955653881467703, "loss": 1.415, "step": 7076 }, { "epoch": 0.3353233830845771, "grad_norm": 0.0634765625, "learning_rate": 0.00014954360299556703, "loss": 0.0029, "step": 7077 }, { "epoch": 0.3353707652215115, "grad_norm": 0.455078125, "learning_rate": 0.00014953066607763268, "loss": 0.0616, "step": 7078 }, { "epoch": 0.3354181473584459, "grad_norm": 0.86328125, "learning_rate": 0.00014951772806116095, "loss": 1.2487, "step": 7079 }, { "epoch": 0.3354655294953802, "grad_norm": 0.5703125, "learning_rate": 0.00014950478894643873, "loss": 1.2876, "step": 7080 }, { "epoch": 0.3355129116323146, "grad_norm": 0.6015625, "learning_rate": 0.000149491848733753, "loss": 0.9256, "step": 7081 }, { "epoch": 0.335560293769249, "grad_norm": 0.5, "learning_rate": 0.00014947890742339086, "loss": 0.678, "step": 7082 }, { "epoch": 0.33560767590618334, "grad_norm": 0.64453125, "learning_rate": 0.0001494659650156392, "loss": 1.1603, "step": 7083 }, { "epoch": 0.33565505804311774, "grad_norm": 0.60546875, "learning_rate": 0.00014945302151078512, "loss": 0.7622, "step": 7084 }, { "epoch": 0.33570244018005213, "grad_norm": 0.6953125, "learning_rate": 0.00014944007690911572, "loss": 0.8894, "step": 7085 }, { "epoch": 0.3357498223169865, "grad_norm": 0.04052734375, "learning_rate": 0.0001494271312109181, "loss": 0.001, "step": 7086 }, { "epoch": 0.33579720445392086, "grad_norm": 0.859375, "learning_rate": 0.0001494141844164793, "loss": 1.2383, "step": 7087 }, { "epoch": 0.33584458659085525, "grad_norm": 0.73046875, "learning_rate": 0.00014940123652608652, "loss": 0.1373, "step": 7088 }, { "epoch": 0.33589196872778965, "grad_norm": 0.59375, "learning_rate": 0.00014938828754002697, "loss": 0.508, "step": 7089 }, { "epoch": 0.335939350864724, "grad_norm": 0.68359375, "learning_rate": 0.00014937533745858783, "loss": 1.205, "step": 7090 }, { "epoch": 0.3359867330016584, "grad_norm": 0.625, "learning_rate": 0.00014936238628205625, "loss": 1.1114, "step": 7091 }, { "epoch": 0.33603411513859277, "grad_norm": 0.6640625, "learning_rate": 0.00014934943401071954, "loss": 0.8703, "step": 7092 }, { "epoch": 0.3360814972755271, "grad_norm": 0.18359375, "learning_rate": 0.00014933648064486494, "loss": 0.1389, "step": 7093 }, { "epoch": 0.3361288794124615, "grad_norm": 0.88671875, "learning_rate": 0.00014932352618477976, "loss": 1.1597, "step": 7094 }, { "epoch": 0.3361762615493959, "grad_norm": 0.2470703125, "learning_rate": 0.0001493105706307513, "loss": 0.1503, "step": 7095 }, { "epoch": 0.33622364368633023, "grad_norm": 0.9296875, "learning_rate": 0.0001492976139830669, "loss": 0.5618, "step": 7096 }, { "epoch": 0.3362710258232646, "grad_norm": 0.322265625, "learning_rate": 0.00014928465624201395, "loss": 0.1363, "step": 7097 }, { "epoch": 0.336318407960199, "grad_norm": 0.1533203125, "learning_rate": 0.00014927169740787982, "loss": 0.0229, "step": 7098 }, { "epoch": 0.33636579009713335, "grad_norm": 0.6953125, "learning_rate": 0.00014925873748095192, "loss": 1.0774, "step": 7099 }, { "epoch": 0.33641317223406775, "grad_norm": 1.1484375, "learning_rate": 0.00014924577646151767, "loss": 0.7773, "step": 7100 }, { "epoch": 0.33646055437100214, "grad_norm": 0.578125, "learning_rate": 0.00014923281434986457, "loss": 0.811, "step": 7101 }, { "epoch": 0.33650793650793653, "grad_norm": 0.047607421875, "learning_rate": 0.0001492198511462801, "loss": 0.0017, "step": 7102 }, { "epoch": 0.33655531864487087, "grad_norm": 0.09619140625, "learning_rate": 0.00014920688685105172, "loss": 0.0042, "step": 7103 }, { "epoch": 0.33660270078180526, "grad_norm": 0.5625, "learning_rate": 0.00014919392146446703, "loss": 1.039, "step": 7104 }, { "epoch": 0.33665008291873966, "grad_norm": 0.65234375, "learning_rate": 0.00014918095498681356, "loss": 1.2575, "step": 7105 }, { "epoch": 0.336697465055674, "grad_norm": 0.5703125, "learning_rate": 0.0001491679874183789, "loss": 0.6664, "step": 7106 }, { "epoch": 0.3367448471926084, "grad_norm": 0.73828125, "learning_rate": 0.00014915501875945064, "loss": 0.9582, "step": 7107 }, { "epoch": 0.3367922293295428, "grad_norm": 0.365234375, "learning_rate": 0.0001491420490103164, "loss": 0.6622, "step": 7108 }, { "epoch": 0.3368396114664771, "grad_norm": 0.68359375, "learning_rate": 0.0001491290781712639, "loss": 1.2484, "step": 7109 }, { "epoch": 0.3368869936034115, "grad_norm": 0.57421875, "learning_rate": 0.00014911610624258076, "loss": 1.2539, "step": 7110 }, { "epoch": 0.3369343757403459, "grad_norm": 0.51171875, "learning_rate": 0.00014910313322455466, "loss": 0.5703, "step": 7111 }, { "epoch": 0.33698175787728024, "grad_norm": 0.11376953125, "learning_rate": 0.00014909015911747343, "loss": 0.0237, "step": 7112 }, { "epoch": 0.33702914001421463, "grad_norm": 0.6171875, "learning_rate": 0.00014907718392162474, "loss": 1.2957, "step": 7113 }, { "epoch": 0.337076522151149, "grad_norm": 0.58203125, "learning_rate": 0.00014906420763729638, "loss": 0.5867, "step": 7114 }, { "epoch": 0.3371239042880834, "grad_norm": 0.341796875, "learning_rate": 0.00014905123026477614, "loss": 0.0653, "step": 7115 }, { "epoch": 0.33717128642501776, "grad_norm": 0.6640625, "learning_rate": 0.00014903825180435186, "loss": 1.0758, "step": 7116 }, { "epoch": 0.33721866856195215, "grad_norm": 0.59765625, "learning_rate": 0.0001490252722563114, "loss": 0.4744, "step": 7117 }, { "epoch": 0.33726605069888654, "grad_norm": 0.376953125, "learning_rate": 0.00014901229162094263, "loss": 0.1012, "step": 7118 }, { "epoch": 0.3373134328358209, "grad_norm": 0.79296875, "learning_rate": 0.00014899930989853343, "loss": 0.8182, "step": 7119 }, { "epoch": 0.3373608149727553, "grad_norm": 0.6953125, "learning_rate": 0.00014898632708937171, "loss": 1.108, "step": 7120 }, { "epoch": 0.33740819710968967, "grad_norm": 0.5234375, "learning_rate": 0.00014897334319374545, "loss": 0.9846, "step": 7121 }, { "epoch": 0.337455579246624, "grad_norm": 0.26171875, "learning_rate": 0.00014896035821194262, "loss": 0.0365, "step": 7122 }, { "epoch": 0.3375029613835584, "grad_norm": 0.69921875, "learning_rate": 0.00014894737214425117, "loss": 0.9856, "step": 7123 }, { "epoch": 0.3375503435204928, "grad_norm": 0.52734375, "learning_rate": 0.00014893438499095915, "loss": 0.0867, "step": 7124 }, { "epoch": 0.33759772565742713, "grad_norm": 0.52734375, "learning_rate": 0.00014892139675235462, "loss": 0.9966, "step": 7125 }, { "epoch": 0.3376451077943615, "grad_norm": 0.06982421875, "learning_rate": 0.00014890840742872555, "loss": 0.0097, "step": 7126 }, { "epoch": 0.3376924899312959, "grad_norm": 0.69921875, "learning_rate": 0.00014889541702036013, "loss": 1.1499, "step": 7127 }, { "epoch": 0.33773987206823025, "grad_norm": 0.62890625, "learning_rate": 0.00014888242552754647, "loss": 0.6494, "step": 7128 }, { "epoch": 0.33778725420516464, "grad_norm": 0.04296875, "learning_rate": 0.00014886943295057265, "loss": 0.0051, "step": 7129 }, { "epoch": 0.33783463634209904, "grad_norm": 0.2001953125, "learning_rate": 0.0001488564392897269, "loss": 0.1142, "step": 7130 }, { "epoch": 0.33788201847903343, "grad_norm": 0.671875, "learning_rate": 0.00014884344454529734, "loss": 0.6924, "step": 7131 }, { "epoch": 0.33792940061596777, "grad_norm": 0.6640625, "learning_rate": 0.00014883044871757218, "loss": 0.8983, "step": 7132 }, { "epoch": 0.33797678275290216, "grad_norm": 0.5703125, "learning_rate": 0.0001488174518068397, "loss": 0.622, "step": 7133 }, { "epoch": 0.33802416488983655, "grad_norm": 0.68359375, "learning_rate": 0.00014880445381338815, "loss": 1.1508, "step": 7134 }, { "epoch": 0.3380715470267709, "grad_norm": 0.259765625, "learning_rate": 0.00014879145473750577, "loss": 0.0232, "step": 7135 }, { "epoch": 0.3381189291637053, "grad_norm": 0.6640625, "learning_rate": 0.00014877845457948093, "loss": 0.7931, "step": 7136 }, { "epoch": 0.3381663113006397, "grad_norm": 0.62890625, "learning_rate": 0.0001487654533396019, "loss": 1.0485, "step": 7137 }, { "epoch": 0.338213693437574, "grad_norm": 0.5859375, "learning_rate": 0.00014875245101815708, "loss": 0.4988, "step": 7138 }, { "epoch": 0.3382610755745084, "grad_norm": 0.5703125, "learning_rate": 0.0001487394476154348, "loss": 0.9374, "step": 7139 }, { "epoch": 0.3383084577114428, "grad_norm": 0.53515625, "learning_rate": 0.0001487264431317235, "loss": 1.1444, "step": 7140 }, { "epoch": 0.33835583984837714, "grad_norm": 0.703125, "learning_rate": 0.00014871343756731156, "loss": 0.5691, "step": 7141 }, { "epoch": 0.33840322198531153, "grad_norm": 0.6796875, "learning_rate": 0.00014870043092248748, "loss": 1.2617, "step": 7142 }, { "epoch": 0.3384506041222459, "grad_norm": 0.5546875, "learning_rate": 0.00014868742319753975, "loss": 1.0233, "step": 7143 }, { "epoch": 0.33849798625918026, "grad_norm": 0.61328125, "learning_rate": 0.0001486744143927568, "loss": 1.2125, "step": 7144 }, { "epoch": 0.33854536839611465, "grad_norm": 0.62890625, "learning_rate": 0.00014866140450842718, "loss": 1.0453, "step": 7145 }, { "epoch": 0.33859275053304905, "grad_norm": 0.5859375, "learning_rate": 0.00014864839354483946, "loss": 0.8343, "step": 7146 }, { "epoch": 0.33864013266998344, "grad_norm": 0.41796875, "learning_rate": 0.00014863538150228217, "loss": 0.6355, "step": 7147 }, { "epoch": 0.3386875148069178, "grad_norm": 0.62890625, "learning_rate": 0.00014862236838104396, "loss": 1.1156, "step": 7148 }, { "epoch": 0.33873489694385217, "grad_norm": 0.59375, "learning_rate": 0.00014860935418141338, "loss": 1.0201, "step": 7149 }, { "epoch": 0.33878227908078656, "grad_norm": 0.6328125, "learning_rate": 0.00014859633890367907, "loss": 0.5542, "step": 7150 }, { "epoch": 0.3388296612177209, "grad_norm": 0.042236328125, "learning_rate": 0.0001485833225481298, "loss": 0.0045, "step": 7151 }, { "epoch": 0.3388770433546553, "grad_norm": 0.60546875, "learning_rate": 0.00014857030511505412, "loss": 1.0131, "step": 7152 }, { "epoch": 0.3389244254915897, "grad_norm": 0.220703125, "learning_rate": 0.00014855728660474084, "loss": 0.0299, "step": 7153 }, { "epoch": 0.338971807628524, "grad_norm": 0.1767578125, "learning_rate": 0.00014854426701747865, "loss": 0.1357, "step": 7154 }, { "epoch": 0.3390191897654584, "grad_norm": 0.031494140625, "learning_rate": 0.00014853124635355632, "loss": 0.0032, "step": 7155 }, { "epoch": 0.3390665719023928, "grad_norm": 0.470703125, "learning_rate": 0.00014851822461326266, "loss": 0.3549, "step": 7156 }, { "epoch": 0.33911395403932715, "grad_norm": 0.388671875, "learning_rate": 0.00014850520179688644, "loss": 0.4451, "step": 7157 }, { "epoch": 0.33916133617626154, "grad_norm": 0.44140625, "learning_rate": 0.0001484921779047165, "loss": 0.0741, "step": 7158 }, { "epoch": 0.33920871831319593, "grad_norm": 0.54296875, "learning_rate": 0.00014847915293704172, "loss": 1.5142, "step": 7159 }, { "epoch": 0.3392561004501303, "grad_norm": 0.039306640625, "learning_rate": 0.000148466126894151, "loss": 0.0014, "step": 7160 }, { "epoch": 0.33930348258706466, "grad_norm": 0.67578125, "learning_rate": 0.00014845309977633316, "loss": 1.29, "step": 7161 }, { "epoch": 0.33935086472399906, "grad_norm": 0.1982421875, "learning_rate": 0.00014844007158387718, "loss": 0.0123, "step": 7162 }, { "epoch": 0.33939824686093345, "grad_norm": 0.6328125, "learning_rate": 0.00014842704231707204, "loss": 0.9652, "step": 7163 }, { "epoch": 0.3394456289978678, "grad_norm": 0.5234375, "learning_rate": 0.00014841401197620665, "loss": 1.042, "step": 7164 }, { "epoch": 0.3394930111348022, "grad_norm": 0.494140625, "learning_rate": 0.00014840098056157007, "loss": 0.9504, "step": 7165 }, { "epoch": 0.3395403932717366, "grad_norm": 0.7421875, "learning_rate": 0.00014838794807345128, "loss": 1.1345, "step": 7166 }, { "epoch": 0.3395877754086709, "grad_norm": 0.400390625, "learning_rate": 0.00014837491451213933, "loss": 0.0718, "step": 7167 }, { "epoch": 0.3396351575456053, "grad_norm": 0.58203125, "learning_rate": 0.00014836187987792333, "loss": 0.5195, "step": 7168 }, { "epoch": 0.3396825396825397, "grad_norm": 0.58203125, "learning_rate": 0.00014834884417109235, "loss": 0.9032, "step": 7169 }, { "epoch": 0.33972992181947403, "grad_norm": 0.6484375, "learning_rate": 0.00014833580739193546, "loss": 1.1558, "step": 7170 }, { "epoch": 0.3397773039564084, "grad_norm": 0.004119873046875, "learning_rate": 0.00014832276954074191, "loss": 0.0002, "step": 7171 }, { "epoch": 0.3398246860933428, "grad_norm": 0.671875, "learning_rate": 0.00014830973061780075, "loss": 0.5184, "step": 7172 }, { "epoch": 0.33987206823027716, "grad_norm": 0.73828125, "learning_rate": 0.00014829669062340123, "loss": 0.9264, "step": 7173 }, { "epoch": 0.33991945036721155, "grad_norm": 0.65625, "learning_rate": 0.00014828364955783257, "loss": 0.8504, "step": 7174 }, { "epoch": 0.33996683250414594, "grad_norm": 0.64453125, "learning_rate": 0.00014827060742138399, "loss": 1.2405, "step": 7175 }, { "epoch": 0.34001421464108034, "grad_norm": 0.05859375, "learning_rate": 0.00014825756421434476, "loss": 0.007, "step": 7176 }, { "epoch": 0.3400615967780147, "grad_norm": 0.6640625, "learning_rate": 0.00014824451993700416, "loss": 0.8505, "step": 7177 }, { "epoch": 0.34010897891494907, "grad_norm": 0.89453125, "learning_rate": 0.0001482314745896515, "loss": 0.6473, "step": 7178 }, { "epoch": 0.34015636105188346, "grad_norm": 0.8203125, "learning_rate": 0.00014821842817257607, "loss": 1.3722, "step": 7179 }, { "epoch": 0.3402037431888178, "grad_norm": 0.65234375, "learning_rate": 0.00014820538068606727, "loss": 0.9416, "step": 7180 }, { "epoch": 0.3402511253257522, "grad_norm": 0.40234375, "learning_rate": 0.00014819233213041451, "loss": 0.0131, "step": 7181 }, { "epoch": 0.3402985074626866, "grad_norm": 0.46875, "learning_rate": 0.00014817928250590714, "loss": 0.0351, "step": 7182 }, { "epoch": 0.3403458895996209, "grad_norm": 0.58984375, "learning_rate": 0.00014816623181283458, "loss": 1.075, "step": 7183 }, { "epoch": 0.3403932717365553, "grad_norm": 0.7421875, "learning_rate": 0.0001481531800514863, "loss": 1.0367, "step": 7184 }, { "epoch": 0.3404406538734897, "grad_norm": 0.54296875, "learning_rate": 0.00014814012722215181, "loss": 0.8159, "step": 7185 }, { "epoch": 0.34048803601042404, "grad_norm": 0.5703125, "learning_rate": 0.0001481270733251206, "loss": 0.8916, "step": 7186 }, { "epoch": 0.34053541814735844, "grad_norm": 0.90234375, "learning_rate": 0.00014811401836068211, "loss": 0.7287, "step": 7187 }, { "epoch": 0.34058280028429283, "grad_norm": 0.392578125, "learning_rate": 0.00014810096232912594, "loss": 0.1533, "step": 7188 }, { "epoch": 0.3406301824212272, "grad_norm": 0.53125, "learning_rate": 0.0001480879052307417, "loss": 0.0744, "step": 7189 }, { "epoch": 0.34067756455816156, "grad_norm": 0.66015625, "learning_rate": 0.0001480748470658189, "loss": 0.9296, "step": 7190 }, { "epoch": 0.34072494669509595, "grad_norm": 0.51953125, "learning_rate": 0.00014806178783464722, "loss": 0.6243, "step": 7191 }, { "epoch": 0.34077232883203035, "grad_norm": 0.470703125, "learning_rate": 0.00014804872753751625, "loss": 0.5635, "step": 7192 }, { "epoch": 0.3408197109689647, "grad_norm": 0.2451171875, "learning_rate": 0.00014803566617471573, "loss": 0.1587, "step": 7193 }, { "epoch": 0.3408670931058991, "grad_norm": 0.169921875, "learning_rate": 0.0001480226037465353, "loss": 0.0075, "step": 7194 }, { "epoch": 0.34091447524283347, "grad_norm": 0.328125, "learning_rate": 0.00014800954025326465, "loss": 0.1212, "step": 7195 }, { "epoch": 0.3409618573797678, "grad_norm": 0.7890625, "learning_rate": 0.00014799647569519353, "loss": 1.1877, "step": 7196 }, { "epoch": 0.3410092395167022, "grad_norm": 0.71484375, "learning_rate": 0.00014798341007261171, "loss": 1.0534, "step": 7197 }, { "epoch": 0.3410566216536366, "grad_norm": 0.67578125, "learning_rate": 0.00014797034338580897, "loss": 1.1359, "step": 7198 }, { "epoch": 0.34110400379057093, "grad_norm": 1.2890625, "learning_rate": 0.0001479572756350751, "loss": 0.2054, "step": 7199 }, { "epoch": 0.3411513859275053, "grad_norm": 0.453125, "learning_rate": 0.00014794420682069995, "loss": 0.3547, "step": 7200 }, { "epoch": 0.3411987680644397, "grad_norm": 0.0225830078125, "learning_rate": 0.00014793113694297336, "loss": 0.0009, "step": 7201 }, { "epoch": 0.34124615020137405, "grad_norm": 0.66796875, "learning_rate": 0.0001479180660021852, "loss": 1.2018, "step": 7202 }, { "epoch": 0.34129353233830845, "grad_norm": 0.1591796875, "learning_rate": 0.00014790499399862535, "loss": 0.0145, "step": 7203 }, { "epoch": 0.34134091447524284, "grad_norm": 0.765625, "learning_rate": 0.00014789192093258378, "loss": 0.9923, "step": 7204 }, { "epoch": 0.34138829661217723, "grad_norm": 0.58203125, "learning_rate": 0.00014787884680435043, "loss": 0.5752, "step": 7205 }, { "epoch": 0.34143567874911157, "grad_norm": 0.671875, "learning_rate": 0.00014786577161421524, "loss": 1.3934, "step": 7206 }, { "epoch": 0.34148306088604596, "grad_norm": 0.640625, "learning_rate": 0.00014785269536246823, "loss": 1.1021, "step": 7207 }, { "epoch": 0.34153044302298036, "grad_norm": 0.6875, "learning_rate": 0.0001478396180493994, "loss": 1.4513, "step": 7208 }, { "epoch": 0.3415778251599147, "grad_norm": 0.65234375, "learning_rate": 0.00014782653967529882, "loss": 0.8498, "step": 7209 }, { "epoch": 0.3416252072968491, "grad_norm": 0.859375, "learning_rate": 0.0001478134602404565, "loss": 0.5001, "step": 7210 }, { "epoch": 0.3416725894337835, "grad_norm": 0.8359375, "learning_rate": 0.00014780037974516258, "loss": 0.8522, "step": 7211 }, { "epoch": 0.3417199715707178, "grad_norm": 0.1650390625, "learning_rate": 0.00014778729818970714, "loss": 0.0263, "step": 7212 }, { "epoch": 0.3417673537076522, "grad_norm": 0.66015625, "learning_rate": 0.00014777421557438033, "loss": 1.4596, "step": 7213 }, { "epoch": 0.3418147358445866, "grad_norm": 0.6484375, "learning_rate": 0.0001477611318994723, "loss": 0.9247, "step": 7214 }, { "epoch": 0.34186211798152094, "grad_norm": 0.640625, "learning_rate": 0.00014774804716527324, "loss": 1.1011, "step": 7215 }, { "epoch": 0.34190950011845533, "grad_norm": 0.72265625, "learning_rate": 0.00014773496137207337, "loss": 0.4848, "step": 7216 }, { "epoch": 0.3419568822553897, "grad_norm": 0.19921875, "learning_rate": 0.0001477218745201629, "loss": 0.1409, "step": 7217 }, { "epoch": 0.3420042643923241, "grad_norm": 0.6484375, "learning_rate": 0.00014770878660983207, "loss": 1.2408, "step": 7218 }, { "epoch": 0.34205164652925846, "grad_norm": 0.26171875, "learning_rate": 0.00014769569764137117, "loss": 0.1452, "step": 7219 }, { "epoch": 0.34209902866619285, "grad_norm": 0.5390625, "learning_rate": 0.0001476826076150705, "loss": 0.0404, "step": 7220 }, { "epoch": 0.34214641080312724, "grad_norm": 0.87890625, "learning_rate": 0.0001476695165312204, "loss": 1.304, "step": 7221 }, { "epoch": 0.3421937929400616, "grad_norm": 0.73828125, "learning_rate": 0.00014765642439011116, "loss": 1.162, "step": 7222 }, { "epoch": 0.342241175076996, "grad_norm": 0.76953125, "learning_rate": 0.0001476433311920332, "loss": 1.0679, "step": 7223 }, { "epoch": 0.34228855721393037, "grad_norm": 0.78515625, "learning_rate": 0.00014763023693727695, "loss": 0.1273, "step": 7224 }, { "epoch": 0.3423359393508647, "grad_norm": 0.671875, "learning_rate": 0.00014761714162613273, "loss": 1.2374, "step": 7225 }, { "epoch": 0.3423833214877991, "grad_norm": 0.6640625, "learning_rate": 0.00014760404525889105, "loss": 0.7444, "step": 7226 }, { "epoch": 0.3424307036247335, "grad_norm": 0.59765625, "learning_rate": 0.00014759094783584233, "loss": 0.9537, "step": 7227 }, { "epoch": 0.3424780857616678, "grad_norm": 0.55078125, "learning_rate": 0.0001475778493572771, "loss": 0.3954, "step": 7228 }, { "epoch": 0.3425254678986022, "grad_norm": 1.046875, "learning_rate": 0.00014756474982348584, "loss": 0.3159, "step": 7229 }, { "epoch": 0.3425728500355366, "grad_norm": 0.67578125, "learning_rate": 0.00014755164923475908, "loss": 1.084, "step": 7230 }, { "epoch": 0.34262023217247095, "grad_norm": 0.62890625, "learning_rate": 0.00014753854759138742, "loss": 0.9867, "step": 7231 }, { "epoch": 0.34266761430940534, "grad_norm": 0.41796875, "learning_rate": 0.0001475254448936614, "loss": 0.089, "step": 7232 }, { "epoch": 0.34271499644633974, "grad_norm": 0.59375, "learning_rate": 0.0001475123411418716, "loss": 0.9966, "step": 7233 }, { "epoch": 0.34276237858327413, "grad_norm": 0.169921875, "learning_rate": 0.00014749923633630872, "loss": 0.0149, "step": 7234 }, { "epoch": 0.34280976072020847, "grad_norm": 1.0703125, "learning_rate": 0.00014748613047726336, "loss": 1.4929, "step": 7235 }, { "epoch": 0.34285714285714286, "grad_norm": 0.75390625, "learning_rate": 0.00014747302356502622, "loss": 0.3428, "step": 7236 }, { "epoch": 0.34290452499407725, "grad_norm": 0.0027923583984375, "learning_rate": 0.00014745991559988794, "loss": 0.0002, "step": 7237 }, { "epoch": 0.3429519071310116, "grad_norm": 0.69140625, "learning_rate": 0.00014744680658213932, "loss": 1.5044, "step": 7238 }, { "epoch": 0.342999289267946, "grad_norm": 0.62109375, "learning_rate": 0.00014743369651207106, "loss": 1.1535, "step": 7239 }, { "epoch": 0.3430466714048804, "grad_norm": 0.68359375, "learning_rate": 0.00014742058538997393, "loss": 1.421, "step": 7240 }, { "epoch": 0.3430940535418147, "grad_norm": 0.7265625, "learning_rate": 0.00014740747321613875, "loss": 1.2337, "step": 7241 }, { "epoch": 0.3431414356787491, "grad_norm": 0.59375, "learning_rate": 0.0001473943599908563, "loss": 1.1612, "step": 7242 }, { "epoch": 0.3431888178156835, "grad_norm": 0.06298828125, "learning_rate": 0.00014738124571441743, "loss": 0.0057, "step": 7243 }, { "epoch": 0.34323619995261784, "grad_norm": 0.86328125, "learning_rate": 0.00014736813038711296, "loss": 0.3686, "step": 7244 }, { "epoch": 0.34328358208955223, "grad_norm": 0.2431640625, "learning_rate": 0.00014735501400923385, "loss": 0.0485, "step": 7245 }, { "epoch": 0.3433309642264866, "grad_norm": 0.7421875, "learning_rate": 0.00014734189658107094, "loss": 1.0313, "step": 7246 }, { "epoch": 0.343378346363421, "grad_norm": 0.6171875, "learning_rate": 0.00014732877810291523, "loss": 0.9282, "step": 7247 }, { "epoch": 0.34342572850035535, "grad_norm": 0.6328125, "learning_rate": 0.00014731565857505763, "loss": 0.6842, "step": 7248 }, { "epoch": 0.34347311063728975, "grad_norm": 0.384765625, "learning_rate": 0.0001473025379977891, "loss": 0.2441, "step": 7249 }, { "epoch": 0.34352049277422414, "grad_norm": 0.6796875, "learning_rate": 0.00014728941637140065, "loss": 0.7064, "step": 7250 }, { "epoch": 0.3435678749111585, "grad_norm": 0.5390625, "learning_rate": 0.00014727629369618334, "loss": 0.5426, "step": 7251 }, { "epoch": 0.34361525704809287, "grad_norm": 0.6875, "learning_rate": 0.00014726316997242818, "loss": 1.2039, "step": 7252 }, { "epoch": 0.34366263918502726, "grad_norm": 0.01708984375, "learning_rate": 0.00014725004520042628, "loss": 0.0011, "step": 7253 }, { "epoch": 0.3437100213219616, "grad_norm": 0.57421875, "learning_rate": 0.00014723691938046867, "loss": 1.3481, "step": 7254 }, { "epoch": 0.343757403458896, "grad_norm": 0.73828125, "learning_rate": 0.00014722379251284654, "loss": 1.3633, "step": 7255 }, { "epoch": 0.3438047855958304, "grad_norm": 0.5, "learning_rate": 0.00014721066459785095, "loss": 0.1503, "step": 7256 }, { "epoch": 0.3438521677327647, "grad_norm": 0.69921875, "learning_rate": 0.00014719753563577313, "loss": 1.1006, "step": 7257 }, { "epoch": 0.3438995498696991, "grad_norm": 0.66015625, "learning_rate": 0.00014718440562690424, "loss": 0.5793, "step": 7258 }, { "epoch": 0.3439469320066335, "grad_norm": 0.87109375, "learning_rate": 0.0001471712745715355, "loss": 1.1158, "step": 7259 }, { "epoch": 0.34399431414356785, "grad_norm": 0.47265625, "learning_rate": 0.0001471581424699581, "loss": 0.9373, "step": 7260 }, { "epoch": 0.34404169628050224, "grad_norm": 0.251953125, "learning_rate": 0.00014714500932246332, "loss": 0.1639, "step": 7261 }, { "epoch": 0.34408907841743663, "grad_norm": 0.201171875, "learning_rate": 0.00014713187512934253, "loss": 0.1281, "step": 7262 }, { "epoch": 0.344136460554371, "grad_norm": 0.65625, "learning_rate": 0.0001471187398908869, "loss": 1.3557, "step": 7263 }, { "epoch": 0.34418384269130536, "grad_norm": 0.734375, "learning_rate": 0.0001471056036073878, "loss": 1.4961, "step": 7264 }, { "epoch": 0.34423122482823976, "grad_norm": 0.6171875, "learning_rate": 0.0001470924662791366, "loss": 0.7998, "step": 7265 }, { "epoch": 0.34427860696517415, "grad_norm": 0.5390625, "learning_rate": 0.0001470793279064247, "loss": 0.7616, "step": 7266 }, { "epoch": 0.3443259891021085, "grad_norm": 0.416015625, "learning_rate": 0.0001470661884895434, "loss": 0.5415, "step": 7267 }, { "epoch": 0.3443733712390429, "grad_norm": 0.57421875, "learning_rate": 0.00014705304802878417, "loss": 0.7378, "step": 7268 }, { "epoch": 0.3444207533759773, "grad_norm": 0.73828125, "learning_rate": 0.00014703990652443845, "loss": 0.9451, "step": 7269 }, { "epoch": 0.3444681355129116, "grad_norm": 1.0703125, "learning_rate": 0.00014702676397679776, "loss": 0.3388, "step": 7270 }, { "epoch": 0.344515517649846, "grad_norm": 0.0196533203125, "learning_rate": 0.00014701362038615348, "loss": 0.0011, "step": 7271 }, { "epoch": 0.3445628997867804, "grad_norm": 0.7890625, "learning_rate": 0.0001470004757527972, "loss": 1.2691, "step": 7272 }, { "epoch": 0.34461028192371473, "grad_norm": 0.62890625, "learning_rate": 0.00014698733007702044, "loss": 0.8237, "step": 7273 }, { "epoch": 0.3446576640606491, "grad_norm": 0.2197265625, "learning_rate": 0.00014697418335911472, "loss": 0.065, "step": 7274 }, { "epoch": 0.3447050461975835, "grad_norm": 0.68359375, "learning_rate": 0.00014696103559937165, "loss": 1.1218, "step": 7275 }, { "epoch": 0.3447524283345179, "grad_norm": 0.2109375, "learning_rate": 0.00014694788679808286, "loss": 0.1589, "step": 7276 }, { "epoch": 0.34479981047145225, "grad_norm": 0.228515625, "learning_rate": 0.00014693473695553995, "loss": 0.1621, "step": 7277 }, { "epoch": 0.34484719260838664, "grad_norm": 0.68359375, "learning_rate": 0.00014692158607203454, "loss": 0.9023, "step": 7278 }, { "epoch": 0.34489457474532104, "grad_norm": 0.58203125, "learning_rate": 0.00014690843414785835, "loss": 0.8748, "step": 7279 }, { "epoch": 0.3449419568822554, "grad_norm": 0.76171875, "learning_rate": 0.00014689528118330304, "loss": 0.1103, "step": 7280 }, { "epoch": 0.34498933901918977, "grad_norm": 0.34765625, "learning_rate": 0.00014688212717866038, "loss": 0.0525, "step": 7281 }, { "epoch": 0.34503672115612416, "grad_norm": 0.06494140625, "learning_rate": 0.000146868972134222, "loss": 0.008, "step": 7282 }, { "epoch": 0.3450841032930585, "grad_norm": 0.65625, "learning_rate": 0.00014685581605027978, "loss": 0.9089, "step": 7283 }, { "epoch": 0.3451314854299929, "grad_norm": 0.470703125, "learning_rate": 0.00014684265892712548, "loss": 0.2723, "step": 7284 }, { "epoch": 0.3451788675669273, "grad_norm": 0.7734375, "learning_rate": 0.0001468295007650509, "loss": 1.4206, "step": 7285 }, { "epoch": 0.3452262497038616, "grad_norm": 0.59375, "learning_rate": 0.00014681634156434785, "loss": 1.3776, "step": 7286 }, { "epoch": 0.345273631840796, "grad_norm": 0.53125, "learning_rate": 0.00014680318132530827, "loss": 1.148, "step": 7287 }, { "epoch": 0.3453210139777304, "grad_norm": 0.427734375, "learning_rate": 0.0001467900200482239, "loss": 0.6705, "step": 7288 }, { "epoch": 0.34536839611466474, "grad_norm": 0.5234375, "learning_rate": 0.00014677685773338678, "loss": 0.72, "step": 7289 }, { "epoch": 0.34541577825159914, "grad_norm": 0.25390625, "learning_rate": 0.00014676369438108874, "loss": 0.0097, "step": 7290 }, { "epoch": 0.34546316038853353, "grad_norm": 0.6796875, "learning_rate": 0.00014675052999162179, "loss": 1.5282, "step": 7291 }, { "epoch": 0.3455105425254679, "grad_norm": 0.87890625, "learning_rate": 0.00014673736456527787, "loss": 1.3205, "step": 7292 }, { "epoch": 0.34555792466240226, "grad_norm": 0.62109375, "learning_rate": 0.00014672419810234902, "loss": 1.3132, "step": 7293 }, { "epoch": 0.34560530679933665, "grad_norm": 0.490234375, "learning_rate": 0.00014671103060312718, "loss": 0.0965, "step": 7294 }, { "epoch": 0.34565268893627105, "grad_norm": 0.255859375, "learning_rate": 0.00014669786206790447, "loss": 0.1393, "step": 7295 }, { "epoch": 0.3457000710732054, "grad_norm": 0.6328125, "learning_rate": 0.0001466846924969729, "loss": 1.0978, "step": 7296 }, { "epoch": 0.3457474532101398, "grad_norm": 0.9296875, "learning_rate": 0.00014667152189062462, "loss": 1.045, "step": 7297 }, { "epoch": 0.34579483534707417, "grad_norm": 0.734375, "learning_rate": 0.00014665835024915165, "loss": 0.7312, "step": 7298 }, { "epoch": 0.3458422174840085, "grad_norm": 0.55078125, "learning_rate": 0.00014664517757284617, "loss": 0.732, "step": 7299 }, { "epoch": 0.3458895996209429, "grad_norm": 0.99609375, "learning_rate": 0.00014663200386200035, "loss": 0.8799, "step": 7300 }, { "epoch": 0.3459369817578773, "grad_norm": 0.5703125, "learning_rate": 0.00014661882911690634, "loss": 0.5094, "step": 7301 }, { "epoch": 0.34598436389481163, "grad_norm": 0.7421875, "learning_rate": 0.00014660565333785637, "loss": 0.969, "step": 7302 }, { "epoch": 0.346031746031746, "grad_norm": 0.27734375, "learning_rate": 0.00014659247652514266, "loss": 0.02, "step": 7303 }, { "epoch": 0.3460791281686804, "grad_norm": 0.466796875, "learning_rate": 0.00014657929867905746, "loss": 0.5696, "step": 7304 }, { "epoch": 0.3461265103056148, "grad_norm": 0.1767578125, "learning_rate": 0.00014656611979989298, "loss": 0.1315, "step": 7305 }, { "epoch": 0.34617389244254915, "grad_norm": 0.0283203125, "learning_rate": 0.00014655293988794158, "loss": 0.0009, "step": 7306 }, { "epoch": 0.34622127457948354, "grad_norm": 0.5546875, "learning_rate": 0.0001465397589434956, "loss": 0.9577, "step": 7307 }, { "epoch": 0.34626865671641793, "grad_norm": 0.77734375, "learning_rate": 0.0001465265769668473, "loss": 1.1871, "step": 7308 }, { "epoch": 0.34631603885335227, "grad_norm": 0.1875, "learning_rate": 0.00014651339395828906, "loss": 0.1278, "step": 7309 }, { "epoch": 0.34636342099028666, "grad_norm": 0.82421875, "learning_rate": 0.00014650020991811334, "loss": 1.3712, "step": 7310 }, { "epoch": 0.34641080312722106, "grad_norm": 0.5390625, "learning_rate": 0.00014648702484661245, "loss": 1.0471, "step": 7311 }, { "epoch": 0.3464581852641554, "grad_norm": 0.1357421875, "learning_rate": 0.0001464738387440789, "loss": 0.0317, "step": 7312 }, { "epoch": 0.3465055674010898, "grad_norm": 0.291015625, "learning_rate": 0.00014646065161080509, "loss": 0.14, "step": 7313 }, { "epoch": 0.3465529495380242, "grad_norm": 0.96484375, "learning_rate": 0.00014644746344708351, "loss": 0.913, "step": 7314 }, { "epoch": 0.3466003316749585, "grad_norm": 0.51171875, "learning_rate": 0.00014643427425320665, "loss": 0.3591, "step": 7315 }, { "epoch": 0.3466477138118929, "grad_norm": 0.76171875, "learning_rate": 0.00014642108402946707, "loss": 1.342, "step": 7316 }, { "epoch": 0.3466950959488273, "grad_norm": 0.05859375, "learning_rate": 0.0001464078927761573, "loss": 0.0029, "step": 7317 }, { "epoch": 0.34674247808576164, "grad_norm": 0.07861328125, "learning_rate": 0.00014639470049356988, "loss": 0.0056, "step": 7318 }, { "epoch": 0.34678986022269603, "grad_norm": 0.52734375, "learning_rate": 0.00014638150718199744, "loss": 0.0129, "step": 7319 }, { "epoch": 0.3468372423596304, "grad_norm": 0.52734375, "learning_rate": 0.00014636831284173257, "loss": 0.406, "step": 7320 }, { "epoch": 0.3468846244965648, "grad_norm": 0.66796875, "learning_rate": 0.0001463551174730679, "loss": 0.7431, "step": 7321 }, { "epoch": 0.34693200663349916, "grad_norm": 0.59375, "learning_rate": 0.0001463419210762961, "loss": 0.4843, "step": 7322 }, { "epoch": 0.34697938877043355, "grad_norm": 0.69140625, "learning_rate": 0.00014632872365170986, "loss": 1.1616, "step": 7323 }, { "epoch": 0.34702677090736794, "grad_norm": 0.71484375, "learning_rate": 0.00014631552519960185, "loss": 0.9166, "step": 7324 }, { "epoch": 0.3470741530443023, "grad_norm": 0.81640625, "learning_rate": 0.00014630232572026484, "loss": 1.215, "step": 7325 }, { "epoch": 0.3471215351812367, "grad_norm": 0.388671875, "learning_rate": 0.0001462891252139916, "loss": 0.1048, "step": 7326 }, { "epoch": 0.34716891731817107, "grad_norm": 0.5703125, "learning_rate": 0.00014627592368107484, "loss": 0.485, "step": 7327 }, { "epoch": 0.3472162994551054, "grad_norm": 0.91015625, "learning_rate": 0.00014626272112180737, "loss": 1.0416, "step": 7328 }, { "epoch": 0.3472636815920398, "grad_norm": 0.58984375, "learning_rate": 0.00014624951753648203, "loss": 0.5534, "step": 7329 }, { "epoch": 0.3473110637289742, "grad_norm": 0.0162353515625, "learning_rate": 0.00014623631292539163, "loss": 0.0008, "step": 7330 }, { "epoch": 0.3473584458659085, "grad_norm": 1.0703125, "learning_rate": 0.00014622310728882912, "loss": 1.2545, "step": 7331 }, { "epoch": 0.3474058280028429, "grad_norm": 0.423828125, "learning_rate": 0.0001462099006270873, "loss": 0.1492, "step": 7332 }, { "epoch": 0.3474532101397773, "grad_norm": 0.91015625, "learning_rate": 0.00014619669294045905, "loss": 0.9416, "step": 7333 }, { "epoch": 0.3475005922767117, "grad_norm": 0.51171875, "learning_rate": 0.00014618348422923742, "loss": 1.1713, "step": 7334 }, { "epoch": 0.34754797441364604, "grad_norm": 0.279296875, "learning_rate": 0.00014617027449371532, "loss": 0.0498, "step": 7335 }, { "epoch": 0.34759535655058044, "grad_norm": 0.640625, "learning_rate": 0.00014615706373418566, "loss": 0.1106, "step": 7336 }, { "epoch": 0.34764273868751483, "grad_norm": 0.65625, "learning_rate": 0.0001461438519509415, "loss": 1.1389, "step": 7337 }, { "epoch": 0.34769012082444917, "grad_norm": 0.58984375, "learning_rate": 0.00014613063914427585, "loss": 0.9827, "step": 7338 }, { "epoch": 0.34773750296138356, "grad_norm": 0.58203125, "learning_rate": 0.0001461174253144818, "loss": 0.7366, "step": 7339 }, { "epoch": 0.34778488509831795, "grad_norm": 0.470703125, "learning_rate": 0.00014610421046185233, "loss": 0.0852, "step": 7340 }, { "epoch": 0.3478322672352523, "grad_norm": 0.9453125, "learning_rate": 0.0001460909945866806, "loss": 0.8307, "step": 7341 }, { "epoch": 0.3478796493721867, "grad_norm": 0.7265625, "learning_rate": 0.0001460777776892597, "loss": 1.0301, "step": 7342 }, { "epoch": 0.3479270315091211, "grad_norm": 0.68359375, "learning_rate": 0.0001460645597698828, "loss": 1.4619, "step": 7343 }, { "epoch": 0.3479744136460554, "grad_norm": 0.7265625, "learning_rate": 0.00014605134082884295, "loss": 1.3222, "step": 7344 }, { "epoch": 0.3480217957829898, "grad_norm": 0.83984375, "learning_rate": 0.00014603812086643348, "loss": 1.4361, "step": 7345 }, { "epoch": 0.3480691779199242, "grad_norm": 0.56640625, "learning_rate": 0.0001460248998829475, "loss": 0.4261, "step": 7346 }, { "epoch": 0.34811656005685854, "grad_norm": 0.61328125, "learning_rate": 0.00014601167787867827, "loss": 0.9054, "step": 7347 }, { "epoch": 0.34816394219379293, "grad_norm": 0.7890625, "learning_rate": 0.00014599845485391906, "loss": 0.8762, "step": 7348 }, { "epoch": 0.3482113243307273, "grad_norm": 0.1748046875, "learning_rate": 0.00014598523080896307, "loss": 0.1223, "step": 7349 }, { "epoch": 0.3482587064676617, "grad_norm": 0.53125, "learning_rate": 0.0001459720057441037, "loss": 0.5596, "step": 7350 }, { "epoch": 0.34830608860459605, "grad_norm": 0.74609375, "learning_rate": 0.00014595877965963418, "loss": 1.2321, "step": 7351 }, { "epoch": 0.34835347074153045, "grad_norm": 1.1328125, "learning_rate": 0.00014594555255584786, "loss": 1.2072, "step": 7352 }, { "epoch": 0.34840085287846484, "grad_norm": 0.84375, "learning_rate": 0.00014593232443303812, "loss": 1.08, "step": 7353 }, { "epoch": 0.3484482350153992, "grad_norm": 0.59375, "learning_rate": 0.00014591909529149838, "loss": 0.8205, "step": 7354 }, { "epoch": 0.34849561715233357, "grad_norm": 0.703125, "learning_rate": 0.00014590586513152202, "loss": 1.0821, "step": 7355 }, { "epoch": 0.34854299928926796, "grad_norm": 0.68359375, "learning_rate": 0.00014589263395340245, "loss": 0.8623, "step": 7356 }, { "epoch": 0.3485903814262023, "grad_norm": 0.48828125, "learning_rate": 0.00014587940175743317, "loss": 0.7111, "step": 7357 }, { "epoch": 0.3486377635631367, "grad_norm": 0.478515625, "learning_rate": 0.0001458661685439076, "loss": 0.1724, "step": 7358 }, { "epoch": 0.3486851457000711, "grad_norm": 0.625, "learning_rate": 0.00014585293431311925, "loss": 0.7656, "step": 7359 }, { "epoch": 0.3487325278370054, "grad_norm": 0.69921875, "learning_rate": 0.00014583969906536168, "loss": 0.4699, "step": 7360 }, { "epoch": 0.3487799099739398, "grad_norm": 0.81640625, "learning_rate": 0.0001458264628009284, "loss": 1.0327, "step": 7361 }, { "epoch": 0.3488272921108742, "grad_norm": 0.7265625, "learning_rate": 0.000145813225520113, "loss": 1.2035, "step": 7362 }, { "epoch": 0.3488746742478086, "grad_norm": 0.82421875, "learning_rate": 0.00014579998722320906, "loss": 0.2131, "step": 7363 }, { "epoch": 0.34892205638474294, "grad_norm": 0.70703125, "learning_rate": 0.00014578674791051018, "loss": 0.8787, "step": 7364 }, { "epoch": 0.34896943852167733, "grad_norm": 0.58984375, "learning_rate": 0.00014577350758231, "loss": 0.5334, "step": 7365 }, { "epoch": 0.3490168206586117, "grad_norm": 0.609375, "learning_rate": 0.0001457602662389022, "loss": 0.7806, "step": 7366 }, { "epoch": 0.34906420279554606, "grad_norm": 0.62890625, "learning_rate": 0.0001457470238805804, "loss": 0.957, "step": 7367 }, { "epoch": 0.34911158493248046, "grad_norm": 0.57421875, "learning_rate": 0.00014573378050763836, "loss": 0.6307, "step": 7368 }, { "epoch": 0.34915896706941485, "grad_norm": 0.11572265625, "learning_rate": 0.00014572053612036979, "loss": 0.0118, "step": 7369 }, { "epoch": 0.3492063492063492, "grad_norm": 0.8515625, "learning_rate": 0.00014570729071906839, "loss": 1.1567, "step": 7370 }, { "epoch": 0.3492537313432836, "grad_norm": 0.6875, "learning_rate": 0.00014569404430402798, "loss": 0.8284, "step": 7371 }, { "epoch": 0.349301113480218, "grad_norm": 0.60546875, "learning_rate": 0.00014568079687554236, "loss": 0.6115, "step": 7372 }, { "epoch": 0.3493484956171523, "grad_norm": 0.65234375, "learning_rate": 0.00014566754843390535, "loss": 1.1986, "step": 7373 }, { "epoch": 0.3493958777540867, "grad_norm": 0.734375, "learning_rate": 0.0001456542989794107, "loss": 0.9807, "step": 7374 }, { "epoch": 0.3494432598910211, "grad_norm": 0.6796875, "learning_rate": 0.00014564104851235232, "loss": 1.0133, "step": 7375 }, { "epoch": 0.34949064202795543, "grad_norm": 0.59765625, "learning_rate": 0.00014562779703302411, "loss": 1.1259, "step": 7376 }, { "epoch": 0.3495380241648898, "grad_norm": 0.71875, "learning_rate": 0.00014561454454172, "loss": 1.0618, "step": 7377 }, { "epoch": 0.3495854063018242, "grad_norm": 0.625, "learning_rate": 0.00014560129103873386, "loss": 0.7027, "step": 7378 }, { "epoch": 0.3496327884387586, "grad_norm": 0.90625, "learning_rate": 0.00014558803652435964, "loss": 0.2671, "step": 7379 }, { "epoch": 0.34968017057569295, "grad_norm": 0.53125, "learning_rate": 0.00014557478099889136, "loss": 0.7036, "step": 7380 }, { "epoch": 0.34972755271262734, "grad_norm": 0.75390625, "learning_rate": 0.00014556152446262296, "loss": 0.6956, "step": 7381 }, { "epoch": 0.34977493484956174, "grad_norm": 0.71484375, "learning_rate": 0.00014554826691584846, "loss": 1.3122, "step": 7382 }, { "epoch": 0.3498223169864961, "grad_norm": 0.640625, "learning_rate": 0.00014553500835886194, "loss": 1.0056, "step": 7383 }, { "epoch": 0.34986969912343047, "grad_norm": 0.64453125, "learning_rate": 0.00014552174879195744, "loss": 1.1146, "step": 7384 }, { "epoch": 0.34991708126036486, "grad_norm": 0.84375, "learning_rate": 0.00014550848821542905, "loss": 0.7516, "step": 7385 }, { "epoch": 0.3499644633972992, "grad_norm": 0.65234375, "learning_rate": 0.0001454952266295708, "loss": 1.2665, "step": 7386 }, { "epoch": 0.3500118455342336, "grad_norm": 1.21875, "learning_rate": 0.00014548196403467694, "loss": 0.3745, "step": 7387 }, { "epoch": 0.350059227671168, "grad_norm": 0.396484375, "learning_rate": 0.00014546870043104156, "loss": 0.5764, "step": 7388 }, { "epoch": 0.3501066098081023, "grad_norm": 0.70703125, "learning_rate": 0.00014545543581895884, "loss": 0.6194, "step": 7389 }, { "epoch": 0.3501539919450367, "grad_norm": 0.00154876708984375, "learning_rate": 0.00014544217019872295, "loss": 0.0001, "step": 7390 }, { "epoch": 0.3502013740819711, "grad_norm": 0.85546875, "learning_rate": 0.00014542890357062814, "loss": 0.9624, "step": 7391 }, { "epoch": 0.3502487562189055, "grad_norm": 0.6640625, "learning_rate": 0.00014541563593496864, "loss": 1.2012, "step": 7392 }, { "epoch": 0.35029613835583984, "grad_norm": 0.1630859375, "learning_rate": 0.00014540236729203868, "loss": 0.0248, "step": 7393 }, { "epoch": 0.35034352049277423, "grad_norm": 0.7265625, "learning_rate": 0.0001453890976421326, "loss": 0.8003, "step": 7394 }, { "epoch": 0.3503909026297086, "grad_norm": 1.4453125, "learning_rate": 0.00014537582698554466, "loss": 0.9812, "step": 7395 }, { "epoch": 0.35043828476664296, "grad_norm": 0.625, "learning_rate": 0.00014536255532256927, "loss": 0.8983, "step": 7396 }, { "epoch": 0.35048566690357735, "grad_norm": 0.154296875, "learning_rate": 0.00014534928265350067, "loss": 0.0213, "step": 7397 }, { "epoch": 0.35053304904051175, "grad_norm": 0.5390625, "learning_rate": 0.0001453360089786333, "loss": 0.8947, "step": 7398 }, { "epoch": 0.3505804311774461, "grad_norm": 0.625, "learning_rate": 0.00014532273429826152, "loss": 0.8732, "step": 7399 }, { "epoch": 0.3506278133143805, "grad_norm": 0.185546875, "learning_rate": 0.0001453094586126798, "loss": 0.1235, "step": 7400 }, { "epoch": 0.35067519545131487, "grad_norm": 0.04443359375, "learning_rate": 0.0001452961819221825, "loss": 0.0053, "step": 7401 }, { "epoch": 0.3507225775882492, "grad_norm": 0.189453125, "learning_rate": 0.00014528290422706418, "loss": 0.132, "step": 7402 }, { "epoch": 0.3507699597251836, "grad_norm": 0.64453125, "learning_rate": 0.00014526962552761927, "loss": 0.8855, "step": 7403 }, { "epoch": 0.350817341862118, "grad_norm": 0.796875, "learning_rate": 0.00014525634582414226, "loss": 0.7257, "step": 7404 }, { "epoch": 0.35086472399905233, "grad_norm": 0.5234375, "learning_rate": 0.00014524306511692772, "loss": 1.1252, "step": 7405 }, { "epoch": 0.3509121061359867, "grad_norm": 0.6484375, "learning_rate": 0.00014522978340627017, "loss": 1.1197, "step": 7406 }, { "epoch": 0.3509594882729211, "grad_norm": 0.671875, "learning_rate": 0.00014521650069246423, "loss": 1.0832, "step": 7407 }, { "epoch": 0.3510068704098555, "grad_norm": 0.6640625, "learning_rate": 0.00014520321697580446, "loss": 1.3954, "step": 7408 }, { "epoch": 0.35105425254678985, "grad_norm": 0.48828125, "learning_rate": 0.00014518993225658548, "loss": 0.1518, "step": 7409 }, { "epoch": 0.35110163468372424, "grad_norm": 0.68359375, "learning_rate": 0.0001451766465351019, "loss": 0.9196, "step": 7410 }, { "epoch": 0.35114901682065863, "grad_norm": 0.63671875, "learning_rate": 0.0001451633598116485, "loss": 1.2294, "step": 7411 }, { "epoch": 0.35119639895759297, "grad_norm": 0.8515625, "learning_rate": 0.00014515007208651984, "loss": 0.9131, "step": 7412 }, { "epoch": 0.35124378109452736, "grad_norm": 0.8046875, "learning_rate": 0.00014513678336001068, "loss": 1.2582, "step": 7413 }, { "epoch": 0.35129116323146176, "grad_norm": 0.7265625, "learning_rate": 0.00014512349363241572, "loss": 0.7336, "step": 7414 }, { "epoch": 0.3513385453683961, "grad_norm": 0.006500244140625, "learning_rate": 0.00014511020290402976, "loss": 0.0003, "step": 7415 }, { "epoch": 0.3513859275053305, "grad_norm": 1.1015625, "learning_rate": 0.00014509691117514753, "loss": 0.5614, "step": 7416 }, { "epoch": 0.3514333096422649, "grad_norm": 0.6953125, "learning_rate": 0.00014508361844606387, "loss": 0.8816, "step": 7417 }, { "epoch": 0.3514806917791992, "grad_norm": 0.322265625, "learning_rate": 0.00014507032471707353, "loss": 0.1625, "step": 7418 }, { "epoch": 0.3515280739161336, "grad_norm": 0.1865234375, "learning_rate": 0.00014505702998847145, "loss": 0.1259, "step": 7419 }, { "epoch": 0.351575456053068, "grad_norm": 0.62109375, "learning_rate": 0.00014504373426055243, "loss": 0.9052, "step": 7420 }, { "epoch": 0.3516228381900024, "grad_norm": 0.55078125, "learning_rate": 0.00014503043753361135, "loss": 1.4965, "step": 7421 }, { "epoch": 0.35167022032693673, "grad_norm": 0.734375, "learning_rate": 0.00014501713980794308, "loss": 1.3937, "step": 7422 }, { "epoch": 0.3517176024638711, "grad_norm": 0.1162109375, "learning_rate": 0.00014500384108384268, "loss": 0.0065, "step": 7423 }, { "epoch": 0.3517649846008055, "grad_norm": 0.640625, "learning_rate": 0.00014499054136160496, "loss": 1.1131, "step": 7424 }, { "epoch": 0.35181236673773986, "grad_norm": 0.03857421875, "learning_rate": 0.00014497724064152498, "loss": 0.0025, "step": 7425 }, { "epoch": 0.35185974887467425, "grad_norm": 0.9765625, "learning_rate": 0.0001449639389238977, "loss": 1.2031, "step": 7426 }, { "epoch": 0.35190713101160864, "grad_norm": 0.53125, "learning_rate": 0.0001449506362090182, "loss": 0.9041, "step": 7427 }, { "epoch": 0.351954513148543, "grad_norm": 0.6875, "learning_rate": 0.0001449373324971814, "loss": 1.0974, "step": 7428 }, { "epoch": 0.3520018952854774, "grad_norm": 0.0673828125, "learning_rate": 0.00014492402778868246, "loss": 0.0086, "step": 7429 }, { "epoch": 0.35204927742241177, "grad_norm": 0.388671875, "learning_rate": 0.00014491072208381643, "loss": 0.0784, "step": 7430 }, { "epoch": 0.3520966595593461, "grad_norm": 0.58203125, "learning_rate": 0.0001448974153828784, "loss": 1.2892, "step": 7431 }, { "epoch": 0.3521440416962805, "grad_norm": 0.40625, "learning_rate": 0.00014488410768616355, "loss": 0.4134, "step": 7432 }, { "epoch": 0.3521914238332149, "grad_norm": 0.54296875, "learning_rate": 0.00014487079899396698, "loss": 0.5826, "step": 7433 }, { "epoch": 0.3522388059701492, "grad_norm": 0.6484375, "learning_rate": 0.0001448574893065839, "loss": 0.6569, "step": 7434 }, { "epoch": 0.3522861881070836, "grad_norm": 0.59375, "learning_rate": 0.00014484417862430947, "loss": 1.0911, "step": 7435 }, { "epoch": 0.352333570244018, "grad_norm": 0.65625, "learning_rate": 0.00014483086694743898, "loss": 0.7696, "step": 7436 }, { "epoch": 0.3523809523809524, "grad_norm": 0.9609375, "learning_rate": 0.00014481755427626754, "loss": 1.152, "step": 7437 }, { "epoch": 0.35242833451788674, "grad_norm": 0.765625, "learning_rate": 0.00014480424061109052, "loss": 1.2271, "step": 7438 }, { "epoch": 0.35247571665482114, "grad_norm": 1.7265625, "learning_rate": 0.00014479092595220315, "loss": 0.4939, "step": 7439 }, { "epoch": 0.35252309879175553, "grad_norm": 0.75390625, "learning_rate": 0.00014477761029990074, "loss": 0.8439, "step": 7440 }, { "epoch": 0.35257048092868987, "grad_norm": 0.81640625, "learning_rate": 0.00014476429365447866, "loss": 1.2563, "step": 7441 }, { "epoch": 0.35261786306562426, "grad_norm": 0.59765625, "learning_rate": 0.0001447509760162322, "loss": 1.0431, "step": 7442 }, { "epoch": 0.35266524520255865, "grad_norm": 0.63671875, "learning_rate": 0.00014473765738545676, "loss": 1.1327, "step": 7443 }, { "epoch": 0.352712627339493, "grad_norm": 0.59765625, "learning_rate": 0.00014472433776244778, "loss": 1.1597, "step": 7444 }, { "epoch": 0.3527600094764274, "grad_norm": 0.78125, "learning_rate": 0.00014471101714750057, "loss": 1.1022, "step": 7445 }, { "epoch": 0.3528073916133618, "grad_norm": 1.03125, "learning_rate": 0.00014469769554091067, "loss": 1.1637, "step": 7446 }, { "epoch": 0.3528547737502961, "grad_norm": 0.5859375, "learning_rate": 0.00014468437294297345, "loss": 0.52, "step": 7447 }, { "epoch": 0.3529021558872305, "grad_norm": 0.59765625, "learning_rate": 0.00014467104935398447, "loss": 0.8924, "step": 7448 }, { "epoch": 0.3529495380241649, "grad_norm": 0.123046875, "learning_rate": 0.00014465772477423918, "loss": 0.0154, "step": 7449 }, { "epoch": 0.3529969201610993, "grad_norm": 0.6015625, "learning_rate": 0.00014464439920403312, "loss": 0.5742, "step": 7450 }, { "epoch": 0.35304430229803363, "grad_norm": 0.671875, "learning_rate": 0.00014463107264366183, "loss": 0.9505, "step": 7451 }, { "epoch": 0.353091684434968, "grad_norm": 0.84765625, "learning_rate": 0.0001446177450934209, "loss": 0.8262, "step": 7452 }, { "epoch": 0.3531390665719024, "grad_norm": 0.86328125, "learning_rate": 0.00014460441655360587, "loss": 1.1557, "step": 7453 }, { "epoch": 0.35318644870883675, "grad_norm": 0.11865234375, "learning_rate": 0.00014459108702451245, "loss": 0.0148, "step": 7454 }, { "epoch": 0.35323383084577115, "grad_norm": 0.62109375, "learning_rate": 0.0001445777565064362, "loss": 1.3203, "step": 7455 }, { "epoch": 0.35328121298270554, "grad_norm": 0.60546875, "learning_rate": 0.00014456442499967276, "loss": 1.0322, "step": 7456 }, { "epoch": 0.3533285951196399, "grad_norm": 0.53515625, "learning_rate": 0.00014455109250451789, "loss": 0.0646, "step": 7457 }, { "epoch": 0.35337597725657427, "grad_norm": 0.6328125, "learning_rate": 0.00014453775902126723, "loss": 0.8735, "step": 7458 }, { "epoch": 0.35342335939350866, "grad_norm": 0.5859375, "learning_rate": 0.00014452442455021648, "loss": 0.0772, "step": 7459 }, { "epoch": 0.353470741530443, "grad_norm": 0.66015625, "learning_rate": 0.00014451108909166146, "loss": 1.2499, "step": 7460 }, { "epoch": 0.3535181236673774, "grad_norm": 0.54296875, "learning_rate": 0.00014449775264589789, "loss": 0.7132, "step": 7461 }, { "epoch": 0.3535655058043118, "grad_norm": 0.62890625, "learning_rate": 0.00014448441521322153, "loss": 1.0795, "step": 7462 }, { "epoch": 0.3536128879412461, "grad_norm": 0.4375, "learning_rate": 0.00014447107679392825, "loss": 0.1977, "step": 7463 }, { "epoch": 0.3536602700781805, "grad_norm": 0.6171875, "learning_rate": 0.00014445773738831384, "loss": 0.74, "step": 7464 }, { "epoch": 0.3537076522151149, "grad_norm": 0.71875, "learning_rate": 0.00014444439699667417, "loss": 1.2651, "step": 7465 }, { "epoch": 0.3537550343520493, "grad_norm": 0.73046875, "learning_rate": 0.00014443105561930513, "loss": 0.7995, "step": 7466 }, { "epoch": 0.35380241648898364, "grad_norm": 0.33984375, "learning_rate": 0.00014441771325650256, "loss": 0.1529, "step": 7467 }, { "epoch": 0.35384979862591803, "grad_norm": 0.80859375, "learning_rate": 0.0001444043699085625, "loss": 0.9945, "step": 7468 }, { "epoch": 0.3538971807628524, "grad_norm": 0.11181640625, "learning_rate": 0.00014439102557578076, "loss": 0.0181, "step": 7469 }, { "epoch": 0.35394456289978676, "grad_norm": 0.333984375, "learning_rate": 0.00014437768025845338, "loss": 0.0129, "step": 7470 }, { "epoch": 0.35399194503672116, "grad_norm": 0.30859375, "learning_rate": 0.00014436433395687627, "loss": 0.0466, "step": 7471 }, { "epoch": 0.35403932717365555, "grad_norm": 0.69140625, "learning_rate": 0.00014435098667134555, "loss": 1.275, "step": 7472 }, { "epoch": 0.3540867093105899, "grad_norm": 0.294921875, "learning_rate": 0.00014433763840215714, "loss": 0.122, "step": 7473 }, { "epoch": 0.3541340914475243, "grad_norm": 0.79296875, "learning_rate": 0.00014432428914960715, "loss": 1.2653, "step": 7474 }, { "epoch": 0.3541814735844587, "grad_norm": 0.59375, "learning_rate": 0.00014431093891399165, "loss": 1.1488, "step": 7475 }, { "epoch": 0.354228855721393, "grad_norm": 0.52734375, "learning_rate": 0.00014429758769560672, "loss": 1.43, "step": 7476 }, { "epoch": 0.3542762378583274, "grad_norm": 0.60546875, "learning_rate": 0.0001442842354947485, "loss": 0.586, "step": 7477 }, { "epoch": 0.3543236199952618, "grad_norm": 0.6328125, "learning_rate": 0.00014427088231171306, "loss": 1.0131, "step": 7478 }, { "epoch": 0.3543710021321962, "grad_norm": 0.6171875, "learning_rate": 0.00014425752814679663, "loss": 0.8696, "step": 7479 }, { "epoch": 0.3544183842691305, "grad_norm": 0.734375, "learning_rate": 0.00014424417300029537, "loss": 1.1594, "step": 7480 }, { "epoch": 0.3544657664060649, "grad_norm": 0.83984375, "learning_rate": 0.00014423081687250545, "loss": 1.1847, "step": 7481 }, { "epoch": 0.3545131485429993, "grad_norm": 0.515625, "learning_rate": 0.00014421745976372316, "loss": 0.8928, "step": 7482 }, { "epoch": 0.35456053067993365, "grad_norm": 0.67578125, "learning_rate": 0.00014420410167424472, "loss": 0.9059, "step": 7483 }, { "epoch": 0.35460791281686804, "grad_norm": 0.2109375, "learning_rate": 0.00014419074260436636, "loss": 0.1536, "step": 7484 }, { "epoch": 0.35465529495380244, "grad_norm": 0.22265625, "learning_rate": 0.0001441773825543844, "loss": 0.0605, "step": 7485 }, { "epoch": 0.3547026770907368, "grad_norm": 0.2578125, "learning_rate": 0.00014416402152459516, "loss": 0.0113, "step": 7486 }, { "epoch": 0.35475005922767117, "grad_norm": 0.6875, "learning_rate": 0.00014415065951529495, "loss": 1.0058, "step": 7487 }, { "epoch": 0.35479744136460556, "grad_norm": 0.6953125, "learning_rate": 0.00014413729652678017, "loss": 0.7031, "step": 7488 }, { "epoch": 0.3548448235015399, "grad_norm": 0.17578125, "learning_rate": 0.00014412393255934714, "loss": 0.1247, "step": 7489 }, { "epoch": 0.3548922056384743, "grad_norm": 0.181640625, "learning_rate": 0.00014411056761329228, "loss": 0.1318, "step": 7490 }, { "epoch": 0.3549395877754087, "grad_norm": 0.57421875, "learning_rate": 0.00014409720168891203, "loss": 1.499, "step": 7491 }, { "epoch": 0.354986969912343, "grad_norm": 0.83203125, "learning_rate": 0.00014408383478650282, "loss": 0.6738, "step": 7492 }, { "epoch": 0.3550343520492774, "grad_norm": 0.5546875, "learning_rate": 0.0001440704669063611, "loss": 0.9576, "step": 7493 }, { "epoch": 0.3550817341862118, "grad_norm": 0.71484375, "learning_rate": 0.00014405709804878335, "loss": 1.0804, "step": 7494 }, { "epoch": 0.3551291163231462, "grad_norm": 0.05908203125, "learning_rate": 0.00014404372821406612, "loss": 0.0015, "step": 7495 }, { "epoch": 0.35517649846008054, "grad_norm": 0.69140625, "learning_rate": 0.00014403035740250593, "loss": 0.7805, "step": 7496 }, { "epoch": 0.35522388059701493, "grad_norm": 0.54296875, "learning_rate": 0.00014401698561439927, "loss": 0.833, "step": 7497 }, { "epoch": 0.3552712627339493, "grad_norm": 0.71875, "learning_rate": 0.00014400361285004276, "loss": 1.1491, "step": 7498 }, { "epoch": 0.35531864487088366, "grad_norm": 0.1591796875, "learning_rate": 0.000143990239109733, "loss": 0.1052, "step": 7499 }, { "epoch": 0.35536602700781805, "grad_norm": 0.72265625, "learning_rate": 0.00014397686439376658, "loss": 1.2958, "step": 7500 }, { "epoch": 0.35541340914475245, "grad_norm": 0.130859375, "learning_rate": 0.00014396348870244016, "loss": 0.0168, "step": 7501 }, { "epoch": 0.3554607912816868, "grad_norm": 0.357421875, "learning_rate": 0.0001439501120360504, "loss": 0.2103, "step": 7502 }, { "epoch": 0.3555081734186212, "grad_norm": 0.7265625, "learning_rate": 0.00014393673439489394, "loss": 0.854, "step": 7503 }, { "epoch": 0.35555555555555557, "grad_norm": 0.65625, "learning_rate": 0.00014392335577926755, "loss": 1.0615, "step": 7504 }, { "epoch": 0.3556029376924899, "grad_norm": 0.265625, "learning_rate": 0.00014390997618946786, "loss": 0.0217, "step": 7505 }, { "epoch": 0.3556503198294243, "grad_norm": 0.2119140625, "learning_rate": 0.0001438965956257917, "loss": 0.0417, "step": 7506 }, { "epoch": 0.3556977019663587, "grad_norm": 0.60546875, "learning_rate": 0.00014388321408853586, "loss": 0.8922, "step": 7507 }, { "epoch": 0.3557450841032931, "grad_norm": 0.59765625, "learning_rate": 0.00014386983157799697, "loss": 0.872, "step": 7508 }, { "epoch": 0.3557924662402274, "grad_norm": 0.51953125, "learning_rate": 0.000143856448094472, "loss": 1.1858, "step": 7509 }, { "epoch": 0.3558398483771618, "grad_norm": 0.5234375, "learning_rate": 0.00014384306363825772, "loss": 0.6, "step": 7510 }, { "epoch": 0.3558872305140962, "grad_norm": 0.6796875, "learning_rate": 0.00014382967820965098, "loss": 0.9507, "step": 7511 }, { "epoch": 0.35593461265103055, "grad_norm": 0.4453125, "learning_rate": 0.00014381629180894867, "loss": 0.6646, "step": 7512 }, { "epoch": 0.35598199478796494, "grad_norm": 0.703125, "learning_rate": 0.00014380290443644767, "loss": 1.2201, "step": 7513 }, { "epoch": 0.35602937692489933, "grad_norm": 0.765625, "learning_rate": 0.00014378951609244492, "loss": 0.9526, "step": 7514 }, { "epoch": 0.35607675906183367, "grad_norm": 0.19921875, "learning_rate": 0.00014377612677723736, "loss": 0.1276, "step": 7515 }, { "epoch": 0.35612414119876806, "grad_norm": 0.80078125, "learning_rate": 0.0001437627364911219, "loss": 0.7398, "step": 7516 }, { "epoch": 0.35617152333570246, "grad_norm": 0.65234375, "learning_rate": 0.00014374934523439555, "loss": 0.8415, "step": 7517 }, { "epoch": 0.3562189054726368, "grad_norm": 0.486328125, "learning_rate": 0.00014373595300735538, "loss": 0.7519, "step": 7518 }, { "epoch": 0.3562662876095712, "grad_norm": 0.26953125, "learning_rate": 0.0001437225598102983, "loss": 0.139, "step": 7519 }, { "epoch": 0.3563136697465056, "grad_norm": 0.73828125, "learning_rate": 0.00014370916564352144, "loss": 1.1972, "step": 7520 }, { "epoch": 0.3563610518834399, "grad_norm": 0.82421875, "learning_rate": 0.00014369577050732184, "loss": 1.1084, "step": 7521 }, { "epoch": 0.3564084340203743, "grad_norm": 0.5546875, "learning_rate": 0.0001436823744019966, "loss": 0.8126, "step": 7522 }, { "epoch": 0.3564558161573087, "grad_norm": 0.69921875, "learning_rate": 0.00014366897732784285, "loss": 1.1367, "step": 7523 }, { "epoch": 0.3565031982942431, "grad_norm": 0.2255859375, "learning_rate": 0.00014365557928515762, "loss": 0.1515, "step": 7524 }, { "epoch": 0.35655058043117743, "grad_norm": 0.71875, "learning_rate": 0.0001436421802742382, "loss": 0.6548, "step": 7525 }, { "epoch": 0.3565979625681118, "grad_norm": 0.578125, "learning_rate": 0.0001436287802953817, "loss": 0.6433, "step": 7526 }, { "epoch": 0.3566453447050462, "grad_norm": 0.73046875, "learning_rate": 0.00014361537934888533, "loss": 0.1723, "step": 7527 }, { "epoch": 0.35669272684198056, "grad_norm": 0.62890625, "learning_rate": 0.00014360197743504627, "loss": 0.9152, "step": 7528 }, { "epoch": 0.35674010897891495, "grad_norm": 0.59765625, "learning_rate": 0.00014358857455416178, "loss": 1.0276, "step": 7529 }, { "epoch": 0.35678749111584934, "grad_norm": 0.56640625, "learning_rate": 0.00014357517070652921, "loss": 0.7897, "step": 7530 }, { "epoch": 0.3568348732527837, "grad_norm": 0.61328125, "learning_rate": 0.0001435617658924457, "loss": 0.995, "step": 7531 }, { "epoch": 0.3568822553897181, "grad_norm": 0.5703125, "learning_rate": 0.00014354836011220868, "loss": 1.0152, "step": 7532 }, { "epoch": 0.35692963752665247, "grad_norm": 0.6484375, "learning_rate": 0.00014353495336611537, "loss": 0.6246, "step": 7533 }, { "epoch": 0.3569770196635868, "grad_norm": 0.052001953125, "learning_rate": 0.00014352154565446318, "loss": 0.0018, "step": 7534 }, { "epoch": 0.3570244018005212, "grad_norm": 0.2021484375, "learning_rate": 0.00014350813697754948, "loss": 0.0143, "step": 7535 }, { "epoch": 0.3570717839374556, "grad_norm": 0.040283203125, "learning_rate": 0.00014349472733567162, "loss": 0.0042, "step": 7536 }, { "epoch": 0.35711916607439, "grad_norm": 0.6015625, "learning_rate": 0.00014348131672912705, "loss": 0.9628, "step": 7537 }, { "epoch": 0.3571665482113243, "grad_norm": 0.60546875, "learning_rate": 0.0001434679051582132, "loss": 0.9001, "step": 7538 }, { "epoch": 0.3572139303482587, "grad_norm": 0.83984375, "learning_rate": 0.0001434544926232275, "loss": 0.2132, "step": 7539 }, { "epoch": 0.3572613124851931, "grad_norm": 0.49609375, "learning_rate": 0.00014344107912446743, "loss": 0.7011, "step": 7540 }, { "epoch": 0.35730869462212744, "grad_norm": 0.78125, "learning_rate": 0.0001434276646622305, "loss": 1.2016, "step": 7541 }, { "epoch": 0.35735607675906184, "grad_norm": 1.375, "learning_rate": 0.00014341424923681423, "loss": 0.2444, "step": 7542 }, { "epoch": 0.35740345889599623, "grad_norm": 0.765625, "learning_rate": 0.0001434008328485162, "loss": 1.0004, "step": 7543 }, { "epoch": 0.35745084103293057, "grad_norm": 0.73046875, "learning_rate": 0.00014338741549763383, "loss": 0.9323, "step": 7544 }, { "epoch": 0.35749822316986496, "grad_norm": 0.79296875, "learning_rate": 0.00014337399718446488, "loss": 0.9443, "step": 7545 }, { "epoch": 0.35754560530679935, "grad_norm": 0.6640625, "learning_rate": 0.00014336057790930684, "loss": 1.1554, "step": 7546 }, { "epoch": 0.3575929874437337, "grad_norm": 0.19140625, "learning_rate": 0.00014334715767245736, "loss": 0.1453, "step": 7547 }, { "epoch": 0.3576403695806681, "grad_norm": 0.65625, "learning_rate": 0.0001433337364742141, "loss": 1.6827, "step": 7548 }, { "epoch": 0.3576877517176025, "grad_norm": 0.53125, "learning_rate": 0.00014332031431487475, "loss": 0.4558, "step": 7549 }, { "epoch": 0.3577351338545368, "grad_norm": 0.349609375, "learning_rate": 0.00014330689119473693, "loss": 0.1129, "step": 7550 }, { "epoch": 0.3577825159914712, "grad_norm": 0.9609375, "learning_rate": 0.00014329346711409842, "loss": 1.1667, "step": 7551 }, { "epoch": 0.3578298981284056, "grad_norm": 0.08349609375, "learning_rate": 0.0001432800420732569, "loss": 0.0043, "step": 7552 }, { "epoch": 0.35787728026534, "grad_norm": 0.64453125, "learning_rate": 0.00014326661607251014, "loss": 1.2576, "step": 7553 }, { "epoch": 0.35792466240227433, "grad_norm": 0.62890625, "learning_rate": 0.00014325318911215598, "loss": 0.7217, "step": 7554 }, { "epoch": 0.3579720445392087, "grad_norm": 0.4609375, "learning_rate": 0.0001432397611924921, "loss": 0.5296, "step": 7555 }, { "epoch": 0.3580194266761431, "grad_norm": 0.004425048828125, "learning_rate": 0.0001432263323138164, "loss": 0.0003, "step": 7556 }, { "epoch": 0.35806680881307745, "grad_norm": 0.212890625, "learning_rate": 0.0001432129024764267, "loss": 0.1384, "step": 7557 }, { "epoch": 0.35811419095001185, "grad_norm": 0.68359375, "learning_rate": 0.00014319947168062083, "loss": 0.9144, "step": 7558 }, { "epoch": 0.35816157308694624, "grad_norm": 0.25390625, "learning_rate": 0.00014318603992669667, "loss": 0.0499, "step": 7559 }, { "epoch": 0.3582089552238806, "grad_norm": 0.5859375, "learning_rate": 0.00014317260721495218, "loss": 0.0449, "step": 7560 }, { "epoch": 0.35825633736081497, "grad_norm": 0.388671875, "learning_rate": 0.00014315917354568524, "loss": 0.0961, "step": 7561 }, { "epoch": 0.35830371949774936, "grad_norm": 0.703125, "learning_rate": 0.0001431457389191938, "loss": 0.6478, "step": 7562 }, { "epoch": 0.3583511016346837, "grad_norm": 0.625, "learning_rate": 0.00014313230333577582, "loss": 1.1945, "step": 7563 }, { "epoch": 0.3583984837716181, "grad_norm": 0.91015625, "learning_rate": 0.0001431188667957293, "loss": 1.1632, "step": 7564 }, { "epoch": 0.3584458659085525, "grad_norm": 0.80078125, "learning_rate": 0.00014310542929935226, "loss": 1.02, "step": 7565 }, { "epoch": 0.3584932480454869, "grad_norm": 0.6875, "learning_rate": 0.0001430919908469427, "loss": 1.3646, "step": 7566 }, { "epoch": 0.3585406301824212, "grad_norm": 0.73046875, "learning_rate": 0.00014307855143879866, "loss": 0.8545, "step": 7567 }, { "epoch": 0.3585880123193556, "grad_norm": 0.7734375, "learning_rate": 0.00014306511107521828, "loss": 0.9253, "step": 7568 }, { "epoch": 0.35863539445629, "grad_norm": 0.8515625, "learning_rate": 0.00014305166975649955, "loss": 0.972, "step": 7569 }, { "epoch": 0.35868277659322434, "grad_norm": 0.7265625, "learning_rate": 0.0001430382274829407, "loss": 0.142, "step": 7570 }, { "epoch": 0.35873015873015873, "grad_norm": 0.65234375, "learning_rate": 0.00014302478425483976, "loss": 0.3351, "step": 7571 }, { "epoch": 0.3587775408670931, "grad_norm": 0.67578125, "learning_rate": 0.00014301134007249498, "loss": 1.3903, "step": 7572 }, { "epoch": 0.35882492300402746, "grad_norm": 0.150390625, "learning_rate": 0.00014299789493620445, "loss": 0.014, "step": 7573 }, { "epoch": 0.35887230514096186, "grad_norm": 0.640625, "learning_rate": 0.00014298444884626641, "loss": 0.7912, "step": 7574 }, { "epoch": 0.35891968727789625, "grad_norm": 0.64453125, "learning_rate": 0.00014297100180297905, "loss": 0.9977, "step": 7575 }, { "epoch": 0.3589670694148306, "grad_norm": 1.03125, "learning_rate": 0.00014295755380664074, "loss": 1.0591, "step": 7576 }, { "epoch": 0.359014451551765, "grad_norm": 0.9296875, "learning_rate": 0.00014294410485754956, "loss": 1.1827, "step": 7577 }, { "epoch": 0.35906183368869937, "grad_norm": 0.54296875, "learning_rate": 0.00014293065495600392, "loss": 0.8982, "step": 7578 }, { "epoch": 0.3591092158256337, "grad_norm": 0.7890625, "learning_rate": 0.00014291720410230204, "loss": 1.2666, "step": 7579 }, { "epoch": 0.3591565979625681, "grad_norm": 0.796875, "learning_rate": 0.00014290375229674234, "loss": 0.9742, "step": 7580 }, { "epoch": 0.3592039800995025, "grad_norm": 0.7109375, "learning_rate": 0.00014289029953962307, "loss": 1.266, "step": 7581 }, { "epoch": 0.3592513622364369, "grad_norm": 0.009765625, "learning_rate": 0.00014287684583124264, "loss": 0.0004, "step": 7582 }, { "epoch": 0.3592987443733712, "grad_norm": 0.4296875, "learning_rate": 0.00014286339117189947, "loss": 0.0734, "step": 7583 }, { "epoch": 0.3593461265103056, "grad_norm": 0.173828125, "learning_rate": 0.00014284993556189191, "loss": 0.1343, "step": 7584 }, { "epoch": 0.35939350864724, "grad_norm": 1.0, "learning_rate": 0.00014283647900151843, "loss": 0.0427, "step": 7585 }, { "epoch": 0.35944089078417435, "grad_norm": 0.03173828125, "learning_rate": 0.00014282302149107748, "loss": 0.0012, "step": 7586 }, { "epoch": 0.35948827292110874, "grad_norm": 0.53515625, "learning_rate": 0.00014280956303086751, "loss": 0.5595, "step": 7587 }, { "epoch": 0.35953565505804314, "grad_norm": 0.546875, "learning_rate": 0.00014279610362118706, "loss": 0.6958, "step": 7588 }, { "epoch": 0.3595830371949775, "grad_norm": 0.86328125, "learning_rate": 0.00014278264326233458, "loss": 0.9134, "step": 7589 }, { "epoch": 0.35963041933191187, "grad_norm": 0.515625, "learning_rate": 0.00014276918195460866, "loss": 0.7247, "step": 7590 }, { "epoch": 0.35967780146884626, "grad_norm": 0.609375, "learning_rate": 0.00014275571969830786, "loss": 0.7464, "step": 7591 }, { "epoch": 0.3597251836057806, "grad_norm": 0.5859375, "learning_rate": 0.00014274225649373072, "loss": 0.4297, "step": 7592 }, { "epoch": 0.359772565742715, "grad_norm": 0.1845703125, "learning_rate": 0.00014272879234117586, "loss": 0.0135, "step": 7593 }, { "epoch": 0.3598199478796494, "grad_norm": 0.69140625, "learning_rate": 0.0001427153272409419, "loss": 0.9053, "step": 7594 }, { "epoch": 0.3598673300165838, "grad_norm": 0.5703125, "learning_rate": 0.0001427018611933275, "loss": 0.8336, "step": 7595 }, { "epoch": 0.3599147121535181, "grad_norm": 0.353515625, "learning_rate": 0.00014268839419863126, "loss": 0.0803, "step": 7596 }, { "epoch": 0.3599620942904525, "grad_norm": 0.10888671875, "learning_rate": 0.00014267492625715192, "loss": 0.0176, "step": 7597 }, { "epoch": 0.3600094764273869, "grad_norm": 0.5390625, "learning_rate": 0.00014266145736918816, "loss": 0.7939, "step": 7598 }, { "epoch": 0.36005685856432124, "grad_norm": 0.75, "learning_rate": 0.00014264798753503875, "loss": 1.0958, "step": 7599 }, { "epoch": 0.36010424070125563, "grad_norm": 1.859375, "learning_rate": 0.00014263451675500236, "loss": 0.0532, "step": 7600 }, { "epoch": 0.36015162283819, "grad_norm": 0.640625, "learning_rate": 0.00014262104502937785, "loss": 0.9797, "step": 7601 }, { "epoch": 0.36019900497512436, "grad_norm": 0.1630859375, "learning_rate": 0.00014260757235846393, "loss": 0.1309, "step": 7602 }, { "epoch": 0.36024638711205875, "grad_norm": 0.7734375, "learning_rate": 0.00014259409874255947, "loss": 1.236, "step": 7603 }, { "epoch": 0.36029376924899315, "grad_norm": 0.63671875, "learning_rate": 0.00014258062418196323, "loss": 0.8716, "step": 7604 }, { "epoch": 0.3603411513859275, "grad_norm": 1.1484375, "learning_rate": 0.00014256714867697413, "loss": 1.2937, "step": 7605 }, { "epoch": 0.3603885335228619, "grad_norm": 0.62890625, "learning_rate": 0.000142553672227891, "loss": 1.005, "step": 7606 }, { "epoch": 0.36043591565979627, "grad_norm": 0.6328125, "learning_rate": 0.00014254019483501273, "loss": 0.7123, "step": 7607 }, { "epoch": 0.3604832977967306, "grad_norm": 0.2470703125, "learning_rate": 0.00014252671649863825, "loss": 0.059, "step": 7608 }, { "epoch": 0.360530679933665, "grad_norm": 1.2578125, "learning_rate": 0.0001425132372190665, "loss": 1.1571, "step": 7609 }, { "epoch": 0.3605780620705994, "grad_norm": 0.244140625, "learning_rate": 0.00014249975699659646, "loss": 0.0491, "step": 7610 }, { "epoch": 0.3606254442075338, "grad_norm": 0.625, "learning_rate": 0.00014248627583152707, "loss": 1.0308, "step": 7611 }, { "epoch": 0.3606728263444681, "grad_norm": 0.9453125, "learning_rate": 0.00014247279372415732, "loss": 0.5094, "step": 7612 }, { "epoch": 0.3607202084814025, "grad_norm": 0.376953125, "learning_rate": 0.00014245931067478624, "loss": 0.179, "step": 7613 }, { "epoch": 0.3607675906183369, "grad_norm": 0.7109375, "learning_rate": 0.00014244582668371288, "loss": 1.4096, "step": 7614 }, { "epoch": 0.36081497275527125, "grad_norm": 0.83203125, "learning_rate": 0.0001424323417512363, "loss": 1.5485, "step": 7615 }, { "epoch": 0.36086235489220564, "grad_norm": 0.65625, "learning_rate": 0.0001424188558776556, "loss": 1.1223, "step": 7616 }, { "epoch": 0.36090973702914003, "grad_norm": 0.5078125, "learning_rate": 0.00014240536906326982, "loss": 0.6456, "step": 7617 }, { "epoch": 0.36095711916607437, "grad_norm": 0.443359375, "learning_rate": 0.00014239188130837818, "loss": 0.3114, "step": 7618 }, { "epoch": 0.36100450130300876, "grad_norm": 0.2353515625, "learning_rate": 0.0001423783926132797, "loss": 0.0378, "step": 7619 }, { "epoch": 0.36105188343994316, "grad_norm": 0.76953125, "learning_rate": 0.00014236490297827364, "loss": 1.0091, "step": 7620 }, { "epoch": 0.3610992655768775, "grad_norm": 0.267578125, "learning_rate": 0.00014235141240365913, "loss": 0.1251, "step": 7621 }, { "epoch": 0.3611466477138119, "grad_norm": 0.7890625, "learning_rate": 0.00014233792088973543, "loss": 0.9898, "step": 7622 }, { "epoch": 0.3611940298507463, "grad_norm": 0.388671875, "learning_rate": 0.00014232442843680176, "loss": 0.0914, "step": 7623 }, { "epoch": 0.36124141198768067, "grad_norm": 0.216796875, "learning_rate": 0.0001423109350451573, "loss": 0.0264, "step": 7624 }, { "epoch": 0.361288794124615, "grad_norm": 0.6015625, "learning_rate": 0.00014229744071510143, "loss": 0.3978, "step": 7625 }, { "epoch": 0.3613361762615494, "grad_norm": 0.71875, "learning_rate": 0.00014228394544693335, "loss": 0.9037, "step": 7626 }, { "epoch": 0.3613835583984838, "grad_norm": 0.67578125, "learning_rate": 0.00014227044924095239, "loss": 0.744, "step": 7627 }, { "epoch": 0.36143094053541813, "grad_norm": 0.478515625, "learning_rate": 0.0001422569520974579, "loss": 0.1844, "step": 7628 }, { "epoch": 0.3614783226723525, "grad_norm": 0.7578125, "learning_rate": 0.0001422434540167492, "loss": 0.5736, "step": 7629 }, { "epoch": 0.3615257048092869, "grad_norm": 0.72265625, "learning_rate": 0.00014222995499912572, "loss": 1.3629, "step": 7630 }, { "epoch": 0.36157308694622126, "grad_norm": 0.333984375, "learning_rate": 0.00014221645504488678, "loss": 0.1722, "step": 7631 }, { "epoch": 0.36162046908315565, "grad_norm": 0.5703125, "learning_rate": 0.00014220295415433184, "loss": 0.7315, "step": 7632 }, { "epoch": 0.36166785122009004, "grad_norm": 0.7109375, "learning_rate": 0.00014218945232776035, "loss": 0.507, "step": 7633 }, { "epoch": 0.3617152333570244, "grad_norm": 0.51953125, "learning_rate": 0.00014217594956547174, "loss": 0.3093, "step": 7634 }, { "epoch": 0.3617626154939588, "grad_norm": 0.74609375, "learning_rate": 0.0001421624458677655, "loss": 1.0157, "step": 7635 }, { "epoch": 0.36180999763089317, "grad_norm": 0.8046875, "learning_rate": 0.00014214894123494112, "loss": 0.9068, "step": 7636 }, { "epoch": 0.3618573797678275, "grad_norm": 0.7265625, "learning_rate": 0.0001421354356672981, "loss": 1.2128, "step": 7637 }, { "epoch": 0.3619047619047619, "grad_norm": 0.060791015625, "learning_rate": 0.000142121929165136, "loss": 0.0029, "step": 7638 }, { "epoch": 0.3619521440416963, "grad_norm": 0.578125, "learning_rate": 0.00014210842172875438, "loss": 1.0106, "step": 7639 }, { "epoch": 0.3619995261786307, "grad_norm": 0.75, "learning_rate": 0.00014209491335845283, "loss": 1.3891, "step": 7640 }, { "epoch": 0.362046908315565, "grad_norm": 0.6640625, "learning_rate": 0.00014208140405453097, "loss": 1.4021, "step": 7641 }, { "epoch": 0.3620942904524994, "grad_norm": 0.51953125, "learning_rate": 0.00014206789381728834, "loss": 0.6981, "step": 7642 }, { "epoch": 0.3621416725894338, "grad_norm": 0.41015625, "learning_rate": 0.00014205438264702465, "loss": 0.121, "step": 7643 }, { "epoch": 0.36218905472636814, "grad_norm": 0.72265625, "learning_rate": 0.0001420408705440395, "loss": 1.0905, "step": 7644 }, { "epoch": 0.36223643686330254, "grad_norm": 0.8671875, "learning_rate": 0.0001420273575086327, "loss": 0.9875, "step": 7645 }, { "epoch": 0.36228381900023693, "grad_norm": 0.73828125, "learning_rate": 0.00014201384354110385, "loss": 1.352, "step": 7646 }, { "epoch": 0.36233120113717127, "grad_norm": 0.6015625, "learning_rate": 0.00014200032864175268, "loss": 0.1415, "step": 7647 }, { "epoch": 0.36237858327410566, "grad_norm": 0.64453125, "learning_rate": 0.00014198681281087897, "loss": 0.8686, "step": 7648 }, { "epoch": 0.36242596541104005, "grad_norm": 0.59765625, "learning_rate": 0.00014197329604878248, "loss": 1.5066, "step": 7649 }, { "epoch": 0.3624733475479744, "grad_norm": 0.359375, "learning_rate": 0.000141959778355763, "loss": 0.0331, "step": 7650 }, { "epoch": 0.3625207296849088, "grad_norm": 0.66015625, "learning_rate": 0.0001419462597321203, "loss": 1.1088, "step": 7651 }, { "epoch": 0.3625681118218432, "grad_norm": 0.64453125, "learning_rate": 0.00014193274017815428, "loss": 0.6578, "step": 7652 }, { "epoch": 0.36261549395877757, "grad_norm": 0.59375, "learning_rate": 0.0001419192196941647, "loss": 0.7129, "step": 7653 }, { "epoch": 0.3626628760957119, "grad_norm": 0.50390625, "learning_rate": 0.0001419056982804515, "loss": 0.5904, "step": 7654 }, { "epoch": 0.3627102582326463, "grad_norm": 0.2314453125, "learning_rate": 0.00014189217593731454, "loss": 0.1664, "step": 7655 }, { "epoch": 0.3627576403695807, "grad_norm": 0.1240234375, "learning_rate": 0.00014187865266505377, "loss": 0.0093, "step": 7656 }, { "epoch": 0.36280502250651503, "grad_norm": 0.5078125, "learning_rate": 0.00014186512846396906, "loss": 0.5595, "step": 7657 }, { "epoch": 0.3628524046434494, "grad_norm": 0.6640625, "learning_rate": 0.0001418516033343604, "loss": 1.0324, "step": 7658 }, { "epoch": 0.3628997867803838, "grad_norm": 0.0016326904296875, "learning_rate": 0.00014183807727652777, "loss": 0.0001, "step": 7659 }, { "epoch": 0.36294716891731815, "grad_norm": 0.66015625, "learning_rate": 0.00014182455029077112, "loss": 1.1369, "step": 7660 }, { "epoch": 0.36299455105425255, "grad_norm": 0.58203125, "learning_rate": 0.0001418110223773905, "loss": 0.402, "step": 7661 }, { "epoch": 0.36304193319118694, "grad_norm": 0.44921875, "learning_rate": 0.00014179749353668598, "loss": 0.7577, "step": 7662 }, { "epoch": 0.3630893153281213, "grad_norm": 0.65234375, "learning_rate": 0.00014178396376895755, "loss": 0.7793, "step": 7663 }, { "epoch": 0.36313669746505567, "grad_norm": 0.84375, "learning_rate": 0.0001417704330745053, "loss": 0.7439, "step": 7664 }, { "epoch": 0.36318407960199006, "grad_norm": 0.53125, "learning_rate": 0.00014175690145362934, "loss": 1.0099, "step": 7665 }, { "epoch": 0.3632314617389244, "grad_norm": 0.64453125, "learning_rate": 0.00014174336890662977, "loss": 1.1489, "step": 7666 }, { "epoch": 0.3632788438758588, "grad_norm": 0.185546875, "learning_rate": 0.00014172983543380677, "loss": 0.1427, "step": 7667 }, { "epoch": 0.3633262260127932, "grad_norm": 0.78125, "learning_rate": 0.00014171630103546046, "loss": 1.0486, "step": 7668 }, { "epoch": 0.3633736081497276, "grad_norm": 0.189453125, "learning_rate": 0.000141702765711891, "loss": 0.1382, "step": 7669 }, { "epoch": 0.3634209902866619, "grad_norm": 0.220703125, "learning_rate": 0.00014168922946339863, "loss": 0.0904, "step": 7670 }, { "epoch": 0.3634683724235963, "grad_norm": 0.5, "learning_rate": 0.0001416756922902836, "loss": 0.0271, "step": 7671 }, { "epoch": 0.3635157545605307, "grad_norm": 0.5078125, "learning_rate": 0.00014166215419284605, "loss": 0.584, "step": 7672 }, { "epoch": 0.36356313669746504, "grad_norm": 0.57421875, "learning_rate": 0.00014164861517138632, "loss": 0.5146, "step": 7673 }, { "epoch": 0.36361051883439943, "grad_norm": 0.25390625, "learning_rate": 0.00014163507522620464, "loss": 0.0787, "step": 7674 }, { "epoch": 0.3636579009713338, "grad_norm": 0.1435546875, "learning_rate": 0.00014162153435760139, "loss": 0.0122, "step": 7675 }, { "epoch": 0.36370528310826816, "grad_norm": 0.3203125, "learning_rate": 0.00014160799256587682, "loss": 0.0143, "step": 7676 }, { "epoch": 0.36375266524520256, "grad_norm": 0.0810546875, "learning_rate": 0.00014159444985133128, "loss": 0.002, "step": 7677 }, { "epoch": 0.36380004738213695, "grad_norm": 0.56640625, "learning_rate": 0.00014158090621426515, "loss": 0.3689, "step": 7678 }, { "epoch": 0.3638474295190713, "grad_norm": 0.0027923583984375, "learning_rate": 0.00014156736165497883, "loss": 0.0002, "step": 7679 }, { "epoch": 0.3638948116560057, "grad_norm": 0.60546875, "learning_rate": 0.00014155381617377268, "loss": 0.8971, "step": 7680 }, { "epoch": 0.36394219379294007, "grad_norm": 0.057373046875, "learning_rate": 0.00014154026977094715, "loss": 0.0047, "step": 7681 }, { "epoch": 0.36398957592987446, "grad_norm": 0.734375, "learning_rate": 0.00014152672244680268, "loss": 1.345, "step": 7682 }, { "epoch": 0.3640369580668088, "grad_norm": 0.75390625, "learning_rate": 0.00014151317420163975, "loss": 1.2625, "step": 7683 }, { "epoch": 0.3640843402037432, "grad_norm": 0.4765625, "learning_rate": 0.00014149962503575884, "loss": 0.01, "step": 7684 }, { "epoch": 0.3641317223406776, "grad_norm": 0.96875, "learning_rate": 0.00014148607494946045, "loss": 0.8278, "step": 7685 }, { "epoch": 0.3641791044776119, "grad_norm": 0.48046875, "learning_rate": 0.00014147252394304508, "loss": 0.2542, "step": 7686 }, { "epoch": 0.3642264866145463, "grad_norm": 0.1787109375, "learning_rate": 0.00014145897201681335, "loss": 0.1399, "step": 7687 }, { "epoch": 0.3642738687514807, "grad_norm": 0.0135498046875, "learning_rate": 0.00014144541917106575, "loss": 0.0009, "step": 7688 }, { "epoch": 0.36432125088841505, "grad_norm": 0.78125, "learning_rate": 0.0001414318654061029, "loss": 1.0653, "step": 7689 }, { "epoch": 0.36436863302534944, "grad_norm": 0.73046875, "learning_rate": 0.0001414183107222254, "loss": 1.1532, "step": 7690 }, { "epoch": 0.36441601516228384, "grad_norm": 0.259765625, "learning_rate": 0.0001414047551197339, "loss": 0.0235, "step": 7691 }, { "epoch": 0.3644633972992182, "grad_norm": 0.75390625, "learning_rate": 0.000141391198598929, "loss": 1.3046, "step": 7692 }, { "epoch": 0.36451077943615257, "grad_norm": 0.189453125, "learning_rate": 0.00014137764116011145, "loss": 0.0336, "step": 7693 }, { "epoch": 0.36455816157308696, "grad_norm": 0.1728515625, "learning_rate": 0.00014136408280358186, "loss": 0.0187, "step": 7694 }, { "epoch": 0.3646055437100213, "grad_norm": 0.6953125, "learning_rate": 0.00014135052352964102, "loss": 0.8419, "step": 7695 }, { "epoch": 0.3646529258469557, "grad_norm": 0.05078125, "learning_rate": 0.00014133696333858958, "loss": 0.0062, "step": 7696 }, { "epoch": 0.3647003079838901, "grad_norm": 0.59765625, "learning_rate": 0.0001413234022307283, "loss": 1.0701, "step": 7697 }, { "epoch": 0.3647476901208245, "grad_norm": 0.65625, "learning_rate": 0.000141309840206358, "loss": 0.8737, "step": 7698 }, { "epoch": 0.3647950722577588, "grad_norm": 0.7265625, "learning_rate": 0.00014129627726577947, "loss": 0.1132, "step": 7699 }, { "epoch": 0.3648424543946932, "grad_norm": 0.53515625, "learning_rate": 0.00014128271340929343, "loss": 0.6267, "step": 7700 }, { "epoch": 0.3648898365316276, "grad_norm": 0.83984375, "learning_rate": 0.00014126914863720082, "loss": 0.9094, "step": 7701 }, { "epoch": 0.36493721866856194, "grad_norm": 0.79296875, "learning_rate": 0.00014125558294980246, "loss": 1.1937, "step": 7702 }, { "epoch": 0.36498460080549633, "grad_norm": 0.51953125, "learning_rate": 0.0001412420163473992, "loss": 0.98, "step": 7703 }, { "epoch": 0.3650319829424307, "grad_norm": 0.77734375, "learning_rate": 0.00014122844883029197, "loss": 1.0339, "step": 7704 }, { "epoch": 0.36507936507936506, "grad_norm": 0.70703125, "learning_rate": 0.00014121488039878162, "loss": 1.2399, "step": 7705 }, { "epoch": 0.36512674721629945, "grad_norm": 0.66796875, "learning_rate": 0.00014120131105316915, "loss": 0.5477, "step": 7706 }, { "epoch": 0.36517412935323385, "grad_norm": 0.90625, "learning_rate": 0.00014118774079375544, "loss": 0.1137, "step": 7707 }, { "epoch": 0.3652215114901682, "grad_norm": 0.62890625, "learning_rate": 0.00014117416962084156, "loss": 1.4176, "step": 7708 }, { "epoch": 0.3652688936271026, "grad_norm": 0.5859375, "learning_rate": 0.00014116059753472844, "loss": 0.9718, "step": 7709 }, { "epoch": 0.36531627576403697, "grad_norm": 0.486328125, "learning_rate": 0.00014114702453571712, "loss": 0.6634, "step": 7710 }, { "epoch": 0.36536365790097136, "grad_norm": 0.8046875, "learning_rate": 0.00014113345062410863, "loss": 0.8321, "step": 7711 }, { "epoch": 0.3654110400379057, "grad_norm": 0.1318359375, "learning_rate": 0.000141119875800204, "loss": 0.0025, "step": 7712 }, { "epoch": 0.3654584221748401, "grad_norm": 0.310546875, "learning_rate": 0.0001411063000643043, "loss": 0.0578, "step": 7713 }, { "epoch": 0.3655058043117745, "grad_norm": 0.57421875, "learning_rate": 0.0001410927234167107, "loss": 0.6011, "step": 7714 }, { "epoch": 0.3655531864487088, "grad_norm": 0.56640625, "learning_rate": 0.00014107914585772424, "loss": 0.8508, "step": 7715 }, { "epoch": 0.3656005685856432, "grad_norm": 0.54296875, "learning_rate": 0.00014106556738764607, "loss": 0.0356, "step": 7716 }, { "epoch": 0.3656479507225776, "grad_norm": 0.6796875, "learning_rate": 0.00014105198800677736, "loss": 1.1596, "step": 7717 }, { "epoch": 0.36569533285951195, "grad_norm": 0.375, "learning_rate": 0.0001410384077154193, "loss": 0.111, "step": 7718 }, { "epoch": 0.36574271499644634, "grad_norm": 0.6484375, "learning_rate": 0.00014102482651387309, "loss": 0.9378, "step": 7719 }, { "epoch": 0.36579009713338073, "grad_norm": 0.734375, "learning_rate": 0.00014101124440243988, "loss": 1.1642, "step": 7720 }, { "epoch": 0.36583747927031507, "grad_norm": 0.60546875, "learning_rate": 0.00014099766138142098, "loss": 0.5318, "step": 7721 }, { "epoch": 0.36588486140724946, "grad_norm": 0.60546875, "learning_rate": 0.00014098407745111757, "loss": 0.8979, "step": 7722 }, { "epoch": 0.36593224354418386, "grad_norm": 0.70703125, "learning_rate": 0.00014097049261183102, "loss": 0.9741, "step": 7723 }, { "epoch": 0.3659796256811182, "grad_norm": 0.7109375, "learning_rate": 0.00014095690686386257, "loss": 0.9232, "step": 7724 }, { "epoch": 0.3660270078180526, "grad_norm": 0.6484375, "learning_rate": 0.00014094332020751356, "loss": 0.8781, "step": 7725 }, { "epoch": 0.366074389954987, "grad_norm": 0.5625, "learning_rate": 0.0001409297326430853, "loss": 0.6715, "step": 7726 }, { "epoch": 0.36612177209192137, "grad_norm": 0.59765625, "learning_rate": 0.00014091614417087922, "loss": 1.0083, "step": 7727 }, { "epoch": 0.3661691542288557, "grad_norm": 0.024658203125, "learning_rate": 0.00014090255479119659, "loss": 0.0023, "step": 7728 }, { "epoch": 0.3662165363657901, "grad_norm": 0.09912109375, "learning_rate": 0.00014088896450433887, "loss": 0.0067, "step": 7729 }, { "epoch": 0.3662639185027245, "grad_norm": 0.5703125, "learning_rate": 0.0001408753733106075, "loss": 1.0664, "step": 7730 }, { "epoch": 0.36631130063965883, "grad_norm": 0.51953125, "learning_rate": 0.00014086178121030385, "loss": 0.7424, "step": 7731 }, { "epoch": 0.3663586827765932, "grad_norm": 0.59375, "learning_rate": 0.00014084818820372943, "loss": 0.5998, "step": 7732 }, { "epoch": 0.3664060649135276, "grad_norm": 0.6875, "learning_rate": 0.00014083459429118573, "loss": 1.0081, "step": 7733 }, { "epoch": 0.36645344705046196, "grad_norm": 0.52734375, "learning_rate": 0.0001408209994729742, "loss": 0.2045, "step": 7734 }, { "epoch": 0.36650082918739635, "grad_norm": 0.140625, "learning_rate": 0.0001408074037493964, "loss": 0.0289, "step": 7735 }, { "epoch": 0.36654821132433074, "grad_norm": 0.8515625, "learning_rate": 0.0001407938071207538, "loss": 0.5849, "step": 7736 }, { "epoch": 0.3665955934612651, "grad_norm": 0.5546875, "learning_rate": 0.0001407802095873481, "loss": 0.0738, "step": 7737 }, { "epoch": 0.36664297559819947, "grad_norm": 0.80078125, "learning_rate": 0.0001407666111494807, "loss": 1.0406, "step": 7738 }, { "epoch": 0.36669035773513387, "grad_norm": 0.77734375, "learning_rate": 0.00014075301180745332, "loss": 1.2881, "step": 7739 }, { "epoch": 0.36673773987206826, "grad_norm": 0.04541015625, "learning_rate": 0.00014073941156156756, "loss": 0.0032, "step": 7740 }, { "epoch": 0.3667851220090026, "grad_norm": 0.98046875, "learning_rate": 0.000140725810412125, "loss": 1.5008, "step": 7741 }, { "epoch": 0.366832504145937, "grad_norm": 1.765625, "learning_rate": 0.0001407122083594274, "loss": 0.498, "step": 7742 }, { "epoch": 0.3668798862828714, "grad_norm": 0.5234375, "learning_rate": 0.00014069860540377635, "loss": 0.7711, "step": 7743 }, { "epoch": 0.3669272684198057, "grad_norm": 0.625, "learning_rate": 0.00014068500154547361, "loss": 1.1254, "step": 7744 }, { "epoch": 0.3669746505567401, "grad_norm": 0.65234375, "learning_rate": 0.00014067139678482086, "loss": 0.6638, "step": 7745 }, { "epoch": 0.3670220326936745, "grad_norm": 0.60546875, "learning_rate": 0.00014065779112211983, "loss": 0.7345, "step": 7746 }, { "epoch": 0.36706941483060884, "grad_norm": 1.171875, "learning_rate": 0.0001406441845576723, "loss": 0.6039, "step": 7747 }, { "epoch": 0.36711679696754324, "grad_norm": 0.91015625, "learning_rate": 0.00014063057709178008, "loss": 0.9442, "step": 7748 }, { "epoch": 0.36716417910447763, "grad_norm": 0.56640625, "learning_rate": 0.00014061696872474494, "loss": 0.4191, "step": 7749 }, { "epoch": 0.36721156124141197, "grad_norm": 0.466796875, "learning_rate": 0.00014060335945686867, "loss": 0.5002, "step": 7750 }, { "epoch": 0.36725894337834636, "grad_norm": 0.6875, "learning_rate": 0.00014058974928845313, "loss": 0.6164, "step": 7751 }, { "epoch": 0.36730632551528075, "grad_norm": 0.34375, "learning_rate": 0.00014057613821980022, "loss": 0.1459, "step": 7752 }, { "epoch": 0.3673537076522151, "grad_norm": 0.52734375, "learning_rate": 0.00014056252625121177, "loss": 1.2057, "step": 7753 }, { "epoch": 0.3674010897891495, "grad_norm": 0.58984375, "learning_rate": 0.00014054891338298968, "loss": 0.0651, "step": 7754 }, { "epoch": 0.3674484719260839, "grad_norm": 0.60546875, "learning_rate": 0.00014053529961543588, "loss": 1.1087, "step": 7755 }, { "epoch": 0.36749585406301827, "grad_norm": 0.3671875, "learning_rate": 0.0001405216849488523, "loss": 0.1513, "step": 7756 }, { "epoch": 0.3675432361999526, "grad_norm": 0.70703125, "learning_rate": 0.00014050806938354092, "loss": 1.0219, "step": 7757 }, { "epoch": 0.367590618336887, "grad_norm": 0.63671875, "learning_rate": 0.00014049445291980375, "loss": 0.9232, "step": 7758 }, { "epoch": 0.3676380004738214, "grad_norm": 0.046630859375, "learning_rate": 0.0001404808355579427, "loss": 0.0035, "step": 7759 }, { "epoch": 0.36768538261075573, "grad_norm": 0.455078125, "learning_rate": 0.00014046721729825987, "loss": 0.513, "step": 7760 }, { "epoch": 0.3677327647476901, "grad_norm": 0.87109375, "learning_rate": 0.00014045359814105724, "loss": 0.2758, "step": 7761 }, { "epoch": 0.3677801468846245, "grad_norm": 0.8046875, "learning_rate": 0.00014043997808663687, "loss": 1.0791, "step": 7762 }, { "epoch": 0.36782752902155885, "grad_norm": 0.74609375, "learning_rate": 0.00014042635713530093, "loss": 0.9252, "step": 7763 }, { "epoch": 0.36787491115849325, "grad_norm": 0.60546875, "learning_rate": 0.00014041273528735137, "loss": 1.0741, "step": 7764 }, { "epoch": 0.36792229329542764, "grad_norm": 0.46484375, "learning_rate": 0.00014039911254309044, "loss": 0.3521, "step": 7765 }, { "epoch": 0.367969675432362, "grad_norm": 0.68359375, "learning_rate": 0.00014038548890282022, "loss": 0.8648, "step": 7766 }, { "epoch": 0.36801705756929637, "grad_norm": 0.7421875, "learning_rate": 0.00014037186436684286, "loss": 0.751, "step": 7767 }, { "epoch": 0.36806443970623076, "grad_norm": 0.0703125, "learning_rate": 0.00014035823893546054, "loss": 0.0032, "step": 7768 }, { "epoch": 0.3681118218431651, "grad_norm": 0.8984375, "learning_rate": 0.00014034461260897547, "loss": 0.5844, "step": 7769 }, { "epoch": 0.3681592039800995, "grad_norm": 0.0205078125, "learning_rate": 0.00014033098538768988, "loss": 0.0019, "step": 7770 }, { "epoch": 0.3682065861170339, "grad_norm": 0.482421875, "learning_rate": 0.000140317357271906, "loss": 0.5582, "step": 7771 }, { "epoch": 0.3682539682539683, "grad_norm": 0.224609375, "learning_rate": 0.00014030372826192607, "loss": 0.0237, "step": 7772 }, { "epoch": 0.3683013503909026, "grad_norm": 0.59765625, "learning_rate": 0.00014029009835805236, "loss": 1.1245, "step": 7773 }, { "epoch": 0.368348732527837, "grad_norm": 0.671875, "learning_rate": 0.00014027646756058722, "loss": 0.0236, "step": 7774 }, { "epoch": 0.3683961146647714, "grad_norm": 0.53515625, "learning_rate": 0.0001402628358698329, "loss": 0.8308, "step": 7775 }, { "epoch": 0.36844349680170574, "grad_norm": 0.59375, "learning_rate": 0.00014024920328609177, "loss": 0.5748, "step": 7776 }, { "epoch": 0.36849087893864013, "grad_norm": 0.81640625, "learning_rate": 0.00014023556980966617, "loss": 1.1517, "step": 7777 }, { "epoch": 0.3685382610755745, "grad_norm": 1.4609375, "learning_rate": 0.0001402219354408585, "loss": 0.2758, "step": 7778 }, { "epoch": 0.36858564321250886, "grad_norm": 0.59375, "learning_rate": 0.00014020830017997117, "loss": 1.1275, "step": 7779 }, { "epoch": 0.36863302534944326, "grad_norm": 0.68359375, "learning_rate": 0.00014019466402730654, "loss": 1.3826, "step": 7780 }, { "epoch": 0.36868040748637765, "grad_norm": 0.49609375, "learning_rate": 0.00014018102698316708, "loss": 1.2337, "step": 7781 }, { "epoch": 0.368727789623312, "grad_norm": 0.7734375, "learning_rate": 0.00014016738904785525, "loss": 1.1935, "step": 7782 }, { "epoch": 0.3687751717602464, "grad_norm": 0.62890625, "learning_rate": 0.0001401537502216735, "loss": 1.1739, "step": 7783 }, { "epoch": 0.36882255389718077, "grad_norm": 0.1806640625, "learning_rate": 0.00014014011050492438, "loss": 0.1418, "step": 7784 }, { "epoch": 0.36886993603411516, "grad_norm": 0.7265625, "learning_rate": 0.00014012646989791032, "loss": 1.3438, "step": 7785 }, { "epoch": 0.3689173181710495, "grad_norm": 0.609375, "learning_rate": 0.0001401128284009339, "loss": 1.1215, "step": 7786 }, { "epoch": 0.3689647003079839, "grad_norm": 0.640625, "learning_rate": 0.0001400991860142977, "loss": 0.2389, "step": 7787 }, { "epoch": 0.3690120824449183, "grad_norm": 0.55859375, "learning_rate": 0.00014008554273830422, "loss": 1.1212, "step": 7788 }, { "epoch": 0.3690594645818526, "grad_norm": 0.87109375, "learning_rate": 0.00014007189857325616, "loss": 1.3939, "step": 7789 }, { "epoch": 0.369106846718787, "grad_norm": 0.6796875, "learning_rate": 0.00014005825351945609, "loss": 1.1574, "step": 7790 }, { "epoch": 0.3691542288557214, "grad_norm": 0.447265625, "learning_rate": 0.00014004460757720654, "loss": 0.633, "step": 7791 }, { "epoch": 0.36920161099265575, "grad_norm": 0.734375, "learning_rate": 0.0001400309607468103, "loss": 1.1132, "step": 7792 }, { "epoch": 0.36924899312959014, "grad_norm": 0.396484375, "learning_rate": 0.00014001731302856995, "loss": 0.0308, "step": 7793 }, { "epoch": 0.36929637526652453, "grad_norm": 0.62109375, "learning_rate": 0.00014000366442278828, "loss": 0.5809, "step": 7794 }, { "epoch": 0.36934375740345887, "grad_norm": 0.60546875, "learning_rate": 0.0001399900149297679, "loss": 0.8216, "step": 7795 }, { "epoch": 0.36939113954039327, "grad_norm": 0.82421875, "learning_rate": 0.00013997636454981158, "loss": 0.9698, "step": 7796 }, { "epoch": 0.36943852167732766, "grad_norm": 0.64453125, "learning_rate": 0.0001399627132832221, "loss": 1.0882, "step": 7797 }, { "epoch": 0.369485903814262, "grad_norm": 0.62109375, "learning_rate": 0.0001399490611303022, "loss": 0.7544, "step": 7798 }, { "epoch": 0.3695332859511964, "grad_norm": 0.63671875, "learning_rate": 0.00013993540809135468, "loss": 0.711, "step": 7799 }, { "epoch": 0.3695806680881308, "grad_norm": 0.53515625, "learning_rate": 0.00013992175416668233, "loss": 0.3963, "step": 7800 }, { "epoch": 0.3696280502250652, "grad_norm": 0.5234375, "learning_rate": 0.00013990809935658798, "loss": 0.0312, "step": 7801 }, { "epoch": 0.3696754323619995, "grad_norm": 0.162109375, "learning_rate": 0.00013989444366137454, "loss": 0.0184, "step": 7802 }, { "epoch": 0.3697228144989339, "grad_norm": 0.228515625, "learning_rate": 0.0001398807870813448, "loss": 0.1373, "step": 7803 }, { "epoch": 0.3697701966358683, "grad_norm": 0.92578125, "learning_rate": 0.00013986712961680167, "loss": 1.1736, "step": 7804 }, { "epoch": 0.36981757877280264, "grad_norm": 0.796875, "learning_rate": 0.0001398534712680481, "loss": 0.973, "step": 7805 }, { "epoch": 0.36986496090973703, "grad_norm": 0.6171875, "learning_rate": 0.000139839812035387, "loss": 0.8525, "step": 7806 }, { "epoch": 0.3699123430466714, "grad_norm": 0.00148773193359375, "learning_rate": 0.00013982615191912128, "loss": 0.0001, "step": 7807 }, { "epoch": 0.36995972518360576, "grad_norm": 1.28125, "learning_rate": 0.00013981249091955393, "loss": 0.2925, "step": 7808 }, { "epoch": 0.37000710732054015, "grad_norm": 0.1650390625, "learning_rate": 0.00013979882903698796, "loss": 0.021, "step": 7809 }, { "epoch": 0.37005448945747454, "grad_norm": 1.7890625, "learning_rate": 0.0001397851662717263, "loss": 0.3092, "step": 7810 }, { "epoch": 0.3701018715944089, "grad_norm": 0.6171875, "learning_rate": 0.00013977150262407207, "loss": 1.2958, "step": 7811 }, { "epoch": 0.3701492537313433, "grad_norm": 0.26953125, "learning_rate": 0.00013975783809432827, "loss": 0.0816, "step": 7812 }, { "epoch": 0.37019663586827767, "grad_norm": 0.71875, "learning_rate": 0.00013974417268279802, "loss": 0.9872, "step": 7813 }, { "epoch": 0.37024401800521206, "grad_norm": 0.55859375, "learning_rate": 0.0001397305063897843, "loss": 0.4005, "step": 7814 }, { "epoch": 0.3702914001421464, "grad_norm": 0.6640625, "learning_rate": 0.00013971683921559024, "loss": 1.3248, "step": 7815 }, { "epoch": 0.3703387822790808, "grad_norm": 0.52734375, "learning_rate": 0.000139703171160519, "loss": 0.5001, "step": 7816 }, { "epoch": 0.3703861644160152, "grad_norm": 0.039306640625, "learning_rate": 0.00013968950222487375, "loss": 0.0041, "step": 7817 }, { "epoch": 0.3704335465529495, "grad_norm": 0.578125, "learning_rate": 0.00013967583240895762, "loss": 0.1899, "step": 7818 }, { "epoch": 0.3704809286898839, "grad_norm": 0.56640625, "learning_rate": 0.00013966216171307374, "loss": 0.7107, "step": 7819 }, { "epoch": 0.3705283108268183, "grad_norm": 0.6953125, "learning_rate": 0.0001396484901375254, "loss": 1.0735, "step": 7820 }, { "epoch": 0.37057569296375265, "grad_norm": 0.5078125, "learning_rate": 0.0001396348176826158, "loss": 0.914, "step": 7821 }, { "epoch": 0.37062307510068704, "grad_norm": 0.5859375, "learning_rate": 0.0001396211443486481, "loss": 0.687, "step": 7822 }, { "epoch": 0.37067045723762143, "grad_norm": 0.62109375, "learning_rate": 0.00013960747013592567, "loss": 0.3023, "step": 7823 }, { "epoch": 0.37071783937455577, "grad_norm": 0.54296875, "learning_rate": 0.0001395937950447517, "loss": 0.8713, "step": 7824 }, { "epoch": 0.37076522151149016, "grad_norm": 0.6328125, "learning_rate": 0.00013958011907542957, "loss": 0.7141, "step": 7825 }, { "epoch": 0.37081260364842455, "grad_norm": 0.498046875, "learning_rate": 0.0001395664422282625, "loss": 0.8826, "step": 7826 }, { "epoch": 0.3708599857853589, "grad_norm": 0.79296875, "learning_rate": 0.00013955276450355392, "loss": 1.0949, "step": 7827 }, { "epoch": 0.3709073679222933, "grad_norm": 0.69921875, "learning_rate": 0.0001395390859016072, "loss": 1.1836, "step": 7828 }, { "epoch": 0.3709547500592277, "grad_norm": 0.9140625, "learning_rate": 0.00013952540642272562, "loss": 0.7109, "step": 7829 }, { "epoch": 0.37100213219616207, "grad_norm": 0.37890625, "learning_rate": 0.00013951172606721263, "loss": 0.065, "step": 7830 }, { "epoch": 0.3710495143330964, "grad_norm": 0.17578125, "learning_rate": 0.00013949804483537163, "loss": 0.1274, "step": 7831 }, { "epoch": 0.3710968964700308, "grad_norm": 1.3984375, "learning_rate": 0.0001394843627275061, "loss": 0.7424, "step": 7832 }, { "epoch": 0.3711442786069652, "grad_norm": 0.8046875, "learning_rate": 0.00013947067974391943, "loss": 0.9609, "step": 7833 }, { "epoch": 0.37119166074389953, "grad_norm": 0.1513671875, "learning_rate": 0.00013945699588491513, "loss": 0.026, "step": 7834 }, { "epoch": 0.3712390428808339, "grad_norm": 0.22265625, "learning_rate": 0.0001394433111507967, "loss": 0.1169, "step": 7835 }, { "epoch": 0.3712864250177683, "grad_norm": 0.69921875, "learning_rate": 0.00013942962554186765, "loss": 1.5699, "step": 7836 }, { "epoch": 0.37133380715470266, "grad_norm": 0.79296875, "learning_rate": 0.00013941593905843148, "loss": 1.2091, "step": 7837 }, { "epoch": 0.37138118929163705, "grad_norm": 0.7890625, "learning_rate": 0.0001394022517007918, "loss": 0.9868, "step": 7838 }, { "epoch": 0.37142857142857144, "grad_norm": 0.423828125, "learning_rate": 0.0001393885634692521, "loss": 0.0908, "step": 7839 }, { "epoch": 0.3714759535655058, "grad_norm": 0.6171875, "learning_rate": 0.00013937487436411607, "loss": 0.877, "step": 7840 }, { "epoch": 0.37152333570244017, "grad_norm": 0.6875, "learning_rate": 0.0001393611843856872, "loss": 0.9356, "step": 7841 }, { "epoch": 0.37157071783937456, "grad_norm": 0.671875, "learning_rate": 0.00013934749353426923, "loss": 1.2068, "step": 7842 }, { "epoch": 0.37161809997630896, "grad_norm": 0.74609375, "learning_rate": 0.00013933380181016576, "loss": 1.2205, "step": 7843 }, { "epoch": 0.3716654821132433, "grad_norm": 0.00173187255859375, "learning_rate": 0.00013932010921368049, "loss": 0.0001, "step": 7844 }, { "epoch": 0.3717128642501777, "grad_norm": 0.875, "learning_rate": 0.00013930641574511705, "loss": 1.0164, "step": 7845 }, { "epoch": 0.3717602463871121, "grad_norm": 0.033203125, "learning_rate": 0.00013929272140477918, "loss": 0.0033, "step": 7846 }, { "epoch": 0.3718076285240464, "grad_norm": 0.5859375, "learning_rate": 0.00013927902619297058, "loss": 0.6085, "step": 7847 }, { "epoch": 0.3718550106609808, "grad_norm": 0.7109375, "learning_rate": 0.00013926533010999505, "loss": 1.1653, "step": 7848 }, { "epoch": 0.3719023927979152, "grad_norm": 0.640625, "learning_rate": 0.00013925163315615631, "loss": 1.0358, "step": 7849 }, { "epoch": 0.37194977493484954, "grad_norm": 0.6328125, "learning_rate": 0.00013923793533175815, "loss": 1.3688, "step": 7850 }, { "epoch": 0.37199715707178393, "grad_norm": 0.6640625, "learning_rate": 0.00013922423663710438, "loss": 0.641, "step": 7851 }, { "epoch": 0.37204453920871833, "grad_norm": 0.55859375, "learning_rate": 0.00013921053707249885, "loss": 0.7746, "step": 7852 }, { "epoch": 0.37209192134565267, "grad_norm": 0.50390625, "learning_rate": 0.00013919683663824532, "loss": 0.0762, "step": 7853 }, { "epoch": 0.37213930348258706, "grad_norm": 0.71875, "learning_rate": 0.00013918313533464773, "loss": 0.0893, "step": 7854 }, { "epoch": 0.37218668561952145, "grad_norm": 0.64453125, "learning_rate": 0.00013916943316200995, "loss": 0.939, "step": 7855 }, { "epoch": 0.3722340677564558, "grad_norm": 0.60546875, "learning_rate": 0.00013915573012063582, "loss": 1.1455, "step": 7856 }, { "epoch": 0.3722814498933902, "grad_norm": 0.5, "learning_rate": 0.00013914202621082935, "loss": 0.502, "step": 7857 }, { "epoch": 0.3723288320303246, "grad_norm": 0.70703125, "learning_rate": 0.0001391283214328944, "loss": 1.1999, "step": 7858 }, { "epoch": 0.37237621416725897, "grad_norm": 0.70703125, "learning_rate": 0.00013911461578713498, "loss": 0.9362, "step": 7859 }, { "epoch": 0.3724235963041933, "grad_norm": 0.69140625, "learning_rate": 0.00013910090927385507, "loss": 1.2651, "step": 7860 }, { "epoch": 0.3724709784411277, "grad_norm": 0.2421875, "learning_rate": 0.00013908720189335856, "loss": 0.0403, "step": 7861 }, { "epoch": 0.3725183605780621, "grad_norm": 0.60546875, "learning_rate": 0.00013907349364594959, "loss": 1.3027, "step": 7862 }, { "epoch": 0.37256574271499643, "grad_norm": 0.703125, "learning_rate": 0.00013905978453193217, "loss": 0.1854, "step": 7863 }, { "epoch": 0.3726131248519308, "grad_norm": 0.7890625, "learning_rate": 0.0001390460745516103, "loss": 1.1814, "step": 7864 }, { "epoch": 0.3726605069888652, "grad_norm": 0.8671875, "learning_rate": 0.00013903236370528812, "loss": 1.1386, "step": 7865 }, { "epoch": 0.37270788912579955, "grad_norm": 0.76171875, "learning_rate": 0.00013901865199326968, "loss": 1.2676, "step": 7866 }, { "epoch": 0.37275527126273394, "grad_norm": 1.1015625, "learning_rate": 0.0001390049394158591, "loss": 1.6905, "step": 7867 }, { "epoch": 0.37280265339966834, "grad_norm": 0.69140625, "learning_rate": 0.0001389912259733605, "loss": 1.3225, "step": 7868 }, { "epoch": 0.3728500355366027, "grad_norm": 0.77734375, "learning_rate": 0.00013897751166607803, "loss": 0.8471, "step": 7869 }, { "epoch": 0.37289741767353707, "grad_norm": 0.68359375, "learning_rate": 0.00013896379649431587, "loss": 1.0288, "step": 7870 }, { "epoch": 0.37294479981047146, "grad_norm": 0.6484375, "learning_rate": 0.00013895008045837823, "loss": 0.7012, "step": 7871 }, { "epoch": 0.37299218194740585, "grad_norm": 0.5703125, "learning_rate": 0.00013893636355856925, "loss": 0.4255, "step": 7872 }, { "epoch": 0.3730395640843402, "grad_norm": 0.2373046875, "learning_rate": 0.00013892264579519324, "loss": 0.1423, "step": 7873 }, { "epoch": 0.3730869462212746, "grad_norm": 0.3359375, "learning_rate": 0.0001389089271685544, "loss": 0.1286, "step": 7874 }, { "epoch": 0.373134328358209, "grad_norm": 0.5859375, "learning_rate": 0.00013889520767895698, "loss": 0.8213, "step": 7875 }, { "epoch": 0.3731817104951433, "grad_norm": 0.255859375, "learning_rate": 0.0001388814873267053, "loss": 0.0258, "step": 7876 }, { "epoch": 0.3732290926320777, "grad_norm": 0.234375, "learning_rate": 0.00013886776611210364, "loss": 0.0293, "step": 7877 }, { "epoch": 0.3732764747690121, "grad_norm": 0.53515625, "learning_rate": 0.00013885404403545635, "loss": 0.4202, "step": 7878 }, { "epoch": 0.37332385690594644, "grad_norm": 0.64453125, "learning_rate": 0.0001388403210970677, "loss": 0.8329, "step": 7879 }, { "epoch": 0.37337123904288083, "grad_norm": 0.6953125, "learning_rate": 0.00013882659729724212, "loss": 0.7784, "step": 7880 }, { "epoch": 0.3734186211798152, "grad_norm": 0.2294921875, "learning_rate": 0.00013881287263628396, "loss": 0.1253, "step": 7881 }, { "epoch": 0.37346600331674956, "grad_norm": 0.080078125, "learning_rate": 0.00013879914711449766, "loss": 0.0111, "step": 7882 }, { "epoch": 0.37351338545368395, "grad_norm": 0.05908203125, "learning_rate": 0.00013878542073218755, "loss": 0.0033, "step": 7883 }, { "epoch": 0.37356076759061835, "grad_norm": 0.69921875, "learning_rate": 0.00013877169348965819, "loss": 1.2487, "step": 7884 }, { "epoch": 0.3736081497275527, "grad_norm": 0.64453125, "learning_rate": 0.00013875796538721392, "loss": 0.2762, "step": 7885 }, { "epoch": 0.3736555318644871, "grad_norm": 0.51953125, "learning_rate": 0.00013874423642515932, "loss": 1.1925, "step": 7886 }, { "epoch": 0.37370291400142147, "grad_norm": 0.62890625, "learning_rate": 0.00013873050660379873, "loss": 1.1001, "step": 7887 }, { "epoch": 0.37375029613835586, "grad_norm": 0.7265625, "learning_rate": 0.00013871677592343682, "loss": 0.7461, "step": 7888 }, { "epoch": 0.3737976782752902, "grad_norm": 1.1171875, "learning_rate": 0.00013870304438437804, "loss": 0.6083, "step": 7889 }, { "epoch": 0.3738450604122246, "grad_norm": 0.51171875, "learning_rate": 0.00013868931198692696, "loss": 0.517, "step": 7890 }, { "epoch": 0.373892442549159, "grad_norm": 0.5859375, "learning_rate": 0.00013867557873138814, "loss": 0.9896, "step": 7891 }, { "epoch": 0.3739398246860933, "grad_norm": 0.466796875, "learning_rate": 0.0001386618446180662, "loss": 0.0736, "step": 7892 }, { "epoch": 0.3739872068230277, "grad_norm": 0.5859375, "learning_rate": 0.00013864810964726572, "loss": 0.8012, "step": 7893 }, { "epoch": 0.3740345889599621, "grad_norm": 0.494140625, "learning_rate": 0.00013863437381929133, "loss": 0.2274, "step": 7894 }, { "epoch": 0.37408197109689645, "grad_norm": 1.2734375, "learning_rate": 0.00013862063713444768, "loss": 0.2137, "step": 7895 }, { "epoch": 0.37412935323383084, "grad_norm": 0.6875, "learning_rate": 0.00013860689959303946, "loss": 1.0898, "step": 7896 }, { "epoch": 0.37417673537076523, "grad_norm": 0.2021484375, "learning_rate": 0.0001385931611953713, "loss": 0.0916, "step": 7897 }, { "epoch": 0.37422411750769957, "grad_norm": 1.046875, "learning_rate": 0.00013857942194174793, "loss": 0.2023, "step": 7898 }, { "epoch": 0.37427149964463396, "grad_norm": 0.154296875, "learning_rate": 0.00013856568183247408, "loss": 0.1192, "step": 7899 }, { "epoch": 0.37431888178156836, "grad_norm": 0.474609375, "learning_rate": 0.00013855194086785451, "loss": 0.411, "step": 7900 }, { "epoch": 0.37436626391850275, "grad_norm": 0.50390625, "learning_rate": 0.00013853819904819395, "loss": 0.5893, "step": 7901 }, { "epoch": 0.3744136460554371, "grad_norm": 0.240234375, "learning_rate": 0.00013852445637379716, "loss": 0.1442, "step": 7902 }, { "epoch": 0.3744610281923715, "grad_norm": 0.80859375, "learning_rate": 0.00013851071284496898, "loss": 0.8285, "step": 7903 }, { "epoch": 0.3745084103293059, "grad_norm": 0.119140625, "learning_rate": 0.00013849696846201417, "loss": 0.0176, "step": 7904 }, { "epoch": 0.3745557924662402, "grad_norm": 0.294921875, "learning_rate": 0.00013848322322523765, "loss": 0.112, "step": 7905 }, { "epoch": 0.3746031746031746, "grad_norm": 0.2197265625, "learning_rate": 0.0001384694771349442, "loss": 0.1176, "step": 7906 }, { "epoch": 0.374650556740109, "grad_norm": 0.7421875, "learning_rate": 0.00013845573019143876, "loss": 0.793, "step": 7907 }, { "epoch": 0.37469793887704334, "grad_norm": 0.66796875, "learning_rate": 0.00013844198239502614, "loss": 1.3484, "step": 7908 }, { "epoch": 0.37474532101397773, "grad_norm": 0.796875, "learning_rate": 0.00013842823374601135, "loss": 1.2427, "step": 7909 }, { "epoch": 0.3747927031509121, "grad_norm": 0.49609375, "learning_rate": 0.00013841448424469922, "loss": 1.1234, "step": 7910 }, { "epoch": 0.37484008528784646, "grad_norm": 0.8671875, "learning_rate": 0.00013840073389139476, "loss": 0.0352, "step": 7911 }, { "epoch": 0.37488746742478085, "grad_norm": 0.578125, "learning_rate": 0.00013838698268640293, "loss": 0.8207, "step": 7912 }, { "epoch": 0.37493484956171524, "grad_norm": 0.54296875, "learning_rate": 0.0001383732306300287, "loss": 0.5687, "step": 7913 }, { "epoch": 0.3749822316986496, "grad_norm": 0.0791015625, "learning_rate": 0.00013835947772257708, "loss": 0.0053, "step": 7914 }, { "epoch": 0.375029613835584, "grad_norm": 0.23046875, "learning_rate": 0.00013834572396435318, "loss": 0.1277, "step": 7915 }, { "epoch": 0.37507699597251837, "grad_norm": 0.83203125, "learning_rate": 0.00013833196935566188, "loss": 1.2075, "step": 7916 }, { "epoch": 0.37512437810945276, "grad_norm": 0.7109375, "learning_rate": 0.00013831821389680837, "loss": 0.0653, "step": 7917 }, { "epoch": 0.3751717602463871, "grad_norm": 0.671875, "learning_rate": 0.00013830445758809766, "loss": 1.2123, "step": 7918 }, { "epoch": 0.3752191423833215, "grad_norm": 0.78125, "learning_rate": 0.0001382907004298349, "loss": 1.2025, "step": 7919 }, { "epoch": 0.3752665245202559, "grad_norm": 0.48828125, "learning_rate": 0.00013827694242232519, "loss": 0.7279, "step": 7920 }, { "epoch": 0.3753139066571902, "grad_norm": 0.9375, "learning_rate": 0.00013826318356587367, "loss": 0.2879, "step": 7921 }, { "epoch": 0.3753612887941246, "grad_norm": 0.55859375, "learning_rate": 0.00013824942386078548, "loss": 0.8827, "step": 7922 }, { "epoch": 0.375408670931059, "grad_norm": 0.6171875, "learning_rate": 0.00013823566330736583, "loss": 0.9701, "step": 7923 }, { "epoch": 0.37545605306799334, "grad_norm": 0.82421875, "learning_rate": 0.0001382219019059199, "loss": 0.8685, "step": 7924 }, { "epoch": 0.37550343520492774, "grad_norm": 1.4140625, "learning_rate": 0.00013820813965675287, "loss": 0.6137, "step": 7925 }, { "epoch": 0.37555081734186213, "grad_norm": 0.65625, "learning_rate": 0.00013819437656017, "loss": 0.6669, "step": 7926 }, { "epoch": 0.37559819947879647, "grad_norm": 0.578125, "learning_rate": 0.00013818061261647654, "loss": 1.0922, "step": 7927 }, { "epoch": 0.37564558161573086, "grad_norm": 0.62109375, "learning_rate": 0.0001381668478259778, "loss": 0.8572, "step": 7928 }, { "epoch": 0.37569296375266525, "grad_norm": 0.81640625, "learning_rate": 0.000138153082188979, "loss": 0.2273, "step": 7929 }, { "epoch": 0.37574034588959965, "grad_norm": 0.8125, "learning_rate": 0.00013813931570578548, "loss": 1.2525, "step": 7930 }, { "epoch": 0.375787728026534, "grad_norm": 0.283203125, "learning_rate": 0.00013812554837670256, "loss": 0.0903, "step": 7931 }, { "epoch": 0.3758351101634684, "grad_norm": 0.51171875, "learning_rate": 0.0001381117802020356, "loss": 0.0948, "step": 7932 }, { "epoch": 0.37588249230040277, "grad_norm": 0.6796875, "learning_rate": 0.00013809801118208994, "loss": 0.852, "step": 7933 }, { "epoch": 0.3759298744373371, "grad_norm": 0.6875, "learning_rate": 0.00013808424131717096, "loss": 0.9516, "step": 7934 }, { "epoch": 0.3759772565742715, "grad_norm": 0.60546875, "learning_rate": 0.00013807047060758408, "loss": 0.7198, "step": 7935 }, { "epoch": 0.3760246387112059, "grad_norm": 0.107421875, "learning_rate": 0.00013805669905363473, "loss": 0.0043, "step": 7936 }, { "epoch": 0.37607202084814023, "grad_norm": 0.1513671875, "learning_rate": 0.0001380429266556283, "loss": 0.0083, "step": 7937 }, { "epoch": 0.3761194029850746, "grad_norm": 0.4375, "learning_rate": 0.00013802915341387027, "loss": 0.4724, "step": 7938 }, { "epoch": 0.376166785122009, "grad_norm": 0.6015625, "learning_rate": 0.00013801537932866617, "loss": 1.1248, "step": 7939 }, { "epoch": 0.37621416725894335, "grad_norm": 0.197265625, "learning_rate": 0.00013800160440032144, "loss": 0.0161, "step": 7940 }, { "epoch": 0.37626154939587775, "grad_norm": 0.94140625, "learning_rate": 0.0001379878286291416, "loss": 0.6025, "step": 7941 }, { "epoch": 0.37630893153281214, "grad_norm": 0.55859375, "learning_rate": 0.00013797405201543215, "loss": 0.0961, "step": 7942 }, { "epoch": 0.3763563136697465, "grad_norm": 0.1220703125, "learning_rate": 0.0001379602745594987, "loss": 0.0171, "step": 7943 }, { "epoch": 0.37640369580668087, "grad_norm": 0.173828125, "learning_rate": 0.0001379464962616468, "loss": 0.1276, "step": 7944 }, { "epoch": 0.37645107794361526, "grad_norm": 0.80859375, "learning_rate": 0.000137932717122182, "loss": 1.0654, "step": 7945 }, { "epoch": 0.37649846008054966, "grad_norm": 0.54296875, "learning_rate": 0.00013791893714140997, "loss": 1.248, "step": 7946 }, { "epoch": 0.376545842217484, "grad_norm": 0.19140625, "learning_rate": 0.00013790515631963631, "loss": 0.012, "step": 7947 }, { "epoch": 0.3765932243544184, "grad_norm": 0.0189208984375, "learning_rate": 0.00013789137465716666, "loss": 0.0009, "step": 7948 }, { "epoch": 0.3766406064913528, "grad_norm": 0.61328125, "learning_rate": 0.00013787759215430664, "loss": 0.8535, "step": 7949 }, { "epoch": 0.3766879886282871, "grad_norm": 0.197265625, "learning_rate": 0.000137863808811362, "loss": 0.1482, "step": 7950 }, { "epoch": 0.3767353707652215, "grad_norm": 0.1953125, "learning_rate": 0.00013785002462863842, "loss": 0.0033, "step": 7951 }, { "epoch": 0.3767827529021559, "grad_norm": 0.64453125, "learning_rate": 0.00013783623960644155, "loss": 1.3051, "step": 7952 }, { "epoch": 0.37683013503909024, "grad_norm": 0.90625, "learning_rate": 0.00013782245374507725, "loss": 0.4477, "step": 7953 }, { "epoch": 0.37687751717602463, "grad_norm": 0.22265625, "learning_rate": 0.0001378086670448512, "loss": 0.0992, "step": 7954 }, { "epoch": 0.376924899312959, "grad_norm": 0.5859375, "learning_rate": 0.00013779487950606919, "loss": 0.7131, "step": 7955 }, { "epoch": 0.37697228144989336, "grad_norm": 0.69921875, "learning_rate": 0.000137781091129037, "loss": 1.0882, "step": 7956 }, { "epoch": 0.37701966358682776, "grad_norm": 0.71875, "learning_rate": 0.00013776730191406044, "loss": 0.9286, "step": 7957 }, { "epoch": 0.37706704572376215, "grad_norm": 0.66796875, "learning_rate": 0.00013775351186144538, "loss": 1.1329, "step": 7958 }, { "epoch": 0.37711442786069654, "grad_norm": 0.671875, "learning_rate": 0.00013773972097149762, "loss": 0.8817, "step": 7959 }, { "epoch": 0.3771618099976309, "grad_norm": 0.63671875, "learning_rate": 0.00013772592924452304, "loss": 1.0865, "step": 7960 }, { "epoch": 0.3772091921345653, "grad_norm": 0.062255859375, "learning_rate": 0.00013771213668082753, "loss": 0.0069, "step": 7961 }, { "epoch": 0.37725657427149967, "grad_norm": 1.359375, "learning_rate": 0.00013769834328071704, "loss": 0.8624, "step": 7962 }, { "epoch": 0.377303956408434, "grad_norm": 0.58203125, "learning_rate": 0.00013768454904449743, "loss": 0.7787, "step": 7963 }, { "epoch": 0.3773513385453684, "grad_norm": 0.1943359375, "learning_rate": 0.00013767075397247465, "loss": 0.0185, "step": 7964 }, { "epoch": 0.3773987206823028, "grad_norm": 0.91015625, "learning_rate": 0.00013765695806495468, "loss": 0.9429, "step": 7965 }, { "epoch": 0.37744610281923713, "grad_norm": 0.7578125, "learning_rate": 0.0001376431613222435, "loss": 1.2558, "step": 7966 }, { "epoch": 0.3774934849561715, "grad_norm": 0.87890625, "learning_rate": 0.00013762936374464707, "loss": 0.9674, "step": 7967 }, { "epoch": 0.3775408670931059, "grad_norm": 0.74609375, "learning_rate": 0.00013761556533247143, "loss": 0.7816, "step": 7968 }, { "epoch": 0.37758824923004025, "grad_norm": 1.0234375, "learning_rate": 0.00013760176608602263, "loss": 0.4469, "step": 7969 }, { "epoch": 0.37763563136697464, "grad_norm": 0.47265625, "learning_rate": 0.00013758796600560675, "loss": 0.1748, "step": 7970 }, { "epoch": 0.37768301350390904, "grad_norm": 0.64453125, "learning_rate": 0.00013757416509152978, "loss": 0.0476, "step": 7971 }, { "epoch": 0.3777303956408434, "grad_norm": 0.2734375, "learning_rate": 0.00013756036334409784, "loss": 0.1644, "step": 7972 }, { "epoch": 0.37777777777777777, "grad_norm": 0.60546875, "learning_rate": 0.00013754656076361707, "loss": 0.8887, "step": 7973 }, { "epoch": 0.37782515991471216, "grad_norm": 0.21484375, "learning_rate": 0.0001375327573503936, "loss": 0.0274, "step": 7974 }, { "epoch": 0.37787254205164655, "grad_norm": 0.76953125, "learning_rate": 0.0001375189531047335, "loss": 1.3528, "step": 7975 }, { "epoch": 0.3779199241885809, "grad_norm": 0.06640625, "learning_rate": 0.000137505148026943, "loss": 0.0035, "step": 7976 }, { "epoch": 0.3779673063255153, "grad_norm": 0.52734375, "learning_rate": 0.00013749134211732828, "loss": 0.9132, "step": 7977 }, { "epoch": 0.3780146884624497, "grad_norm": 0.703125, "learning_rate": 0.00013747753537619552, "loss": 0.7628, "step": 7978 }, { "epoch": 0.378062070599384, "grad_norm": 0.1796875, "learning_rate": 0.00013746372780385095, "loss": 0.0163, "step": 7979 }, { "epoch": 0.3781094527363184, "grad_norm": 0.625, "learning_rate": 0.00013744991940060078, "loss": 1.0008, "step": 7980 }, { "epoch": 0.3781568348732528, "grad_norm": 0.62109375, "learning_rate": 0.00013743611016675132, "loss": 0.8128, "step": 7981 }, { "epoch": 0.37820421701018714, "grad_norm": 0.70703125, "learning_rate": 0.00013742230010260878, "loss": 0.9581, "step": 7982 }, { "epoch": 0.37825159914712153, "grad_norm": 0.77734375, "learning_rate": 0.0001374084892084795, "loss": 0.9201, "step": 7983 }, { "epoch": 0.3782989812840559, "grad_norm": 0.7109375, "learning_rate": 0.00013739467748466973, "loss": 1.4496, "step": 7984 }, { "epoch": 0.37834636342099026, "grad_norm": 0.6484375, "learning_rate": 0.0001373808649314859, "loss": 1.2382, "step": 7985 }, { "epoch": 0.37839374555792465, "grad_norm": 0.4609375, "learning_rate": 0.0001373670515492343, "loss": 0.1384, "step": 7986 }, { "epoch": 0.37844112769485905, "grad_norm": 0.00262451171875, "learning_rate": 0.00013735323733822129, "loss": 0.0002, "step": 7987 }, { "epoch": 0.37848850983179344, "grad_norm": 0.98828125, "learning_rate": 0.00013733942229875323, "loss": 1.2091, "step": 7988 }, { "epoch": 0.3785358919687278, "grad_norm": 0.62890625, "learning_rate": 0.0001373256064311366, "loss": 0.8466, "step": 7989 }, { "epoch": 0.37858327410566217, "grad_norm": 0.68359375, "learning_rate": 0.00013731178973567775, "loss": 0.9977, "step": 7990 }, { "epoch": 0.37863065624259656, "grad_norm": 0.57421875, "learning_rate": 0.00013729797221268317, "loss": 0.1717, "step": 7991 }, { "epoch": 0.3786780383795309, "grad_norm": 0.48828125, "learning_rate": 0.00013728415386245928, "loss": 0.5609, "step": 7992 }, { "epoch": 0.3787254205164653, "grad_norm": 0.8359375, "learning_rate": 0.00013727033468531255, "loss": 0.844, "step": 7993 }, { "epoch": 0.3787728026533997, "grad_norm": 0.91015625, "learning_rate": 0.00013725651468154954, "loss": 0.3045, "step": 7994 }, { "epoch": 0.378820184790334, "grad_norm": 0.7578125, "learning_rate": 0.0001372426938514767, "loss": 0.8474, "step": 7995 }, { "epoch": 0.3788675669272684, "grad_norm": 0.59765625, "learning_rate": 0.00013722887219540057, "loss": 0.6619, "step": 7996 }, { "epoch": 0.3789149490642028, "grad_norm": 0.416015625, "learning_rate": 0.0001372150497136277, "loss": 0.0171, "step": 7997 }, { "epoch": 0.37896233120113715, "grad_norm": 0.2451171875, "learning_rate": 0.0001372012264064647, "loss": 0.0394, "step": 7998 }, { "epoch": 0.37900971333807154, "grad_norm": 0.484375, "learning_rate": 0.0001371874022742181, "loss": 0.7911, "step": 7999 }, { "epoch": 0.37905709547500593, "grad_norm": 0.734375, "learning_rate": 0.00013717357731719455, "loss": 1.3489, "step": 8000 }, { "epoch": 0.37910447761194027, "grad_norm": 0.94921875, "learning_rate": 0.0001371597515357006, "loss": 0.2617, "step": 8001 }, { "epoch": 0.37915185974887466, "grad_norm": 0.640625, "learning_rate": 0.00013714592493004299, "loss": 0.9439, "step": 8002 }, { "epoch": 0.37919924188580906, "grad_norm": 0.6953125, "learning_rate": 0.0001371320975005283, "loss": 1.1759, "step": 8003 }, { "epoch": 0.37924662402274345, "grad_norm": 0.62109375, "learning_rate": 0.00013711826924746328, "loss": 0.4806, "step": 8004 }, { "epoch": 0.3792940061596778, "grad_norm": 0.859375, "learning_rate": 0.00013710444017115452, "loss": 0.7866, "step": 8005 }, { "epoch": 0.3793413882966122, "grad_norm": 0.58984375, "learning_rate": 0.00013709061027190886, "loss": 0.8067, "step": 8006 }, { "epoch": 0.3793887704335466, "grad_norm": 0.76953125, "learning_rate": 0.00013707677955003292, "loss": 1.244, "step": 8007 }, { "epoch": 0.3794361525704809, "grad_norm": 0.78125, "learning_rate": 0.00013706294800583355, "loss": 1.1158, "step": 8008 }, { "epoch": 0.3794835347074153, "grad_norm": 0.66015625, "learning_rate": 0.00013704911563961747, "loss": 1.2878, "step": 8009 }, { "epoch": 0.3795309168443497, "grad_norm": 0.92578125, "learning_rate": 0.00013703528245169144, "loss": 0.9559, "step": 8010 }, { "epoch": 0.37957829898128403, "grad_norm": 0.61328125, "learning_rate": 0.0001370214484423623, "loss": 0.9671, "step": 8011 }, { "epoch": 0.3796256811182184, "grad_norm": 0.267578125, "learning_rate": 0.00013700761361193687, "loss": 0.1168, "step": 8012 }, { "epoch": 0.3796730632551528, "grad_norm": 0.051025390625, "learning_rate": 0.000136993777960722, "loss": 0.0051, "step": 8013 }, { "epoch": 0.37972044539208716, "grad_norm": 0.5234375, "learning_rate": 0.0001369799414890245, "loss": 0.9547, "step": 8014 }, { "epoch": 0.37976782752902155, "grad_norm": 0.1884765625, "learning_rate": 0.00013696610419715132, "loss": 0.0117, "step": 8015 }, { "epoch": 0.37981520966595594, "grad_norm": 0.84765625, "learning_rate": 0.00013695226608540932, "loss": 1.1277, "step": 8016 }, { "epoch": 0.37986259180289034, "grad_norm": 0.65625, "learning_rate": 0.00013693842715410543, "loss": 1.2176, "step": 8017 }, { "epoch": 0.3799099739398247, "grad_norm": 0.72265625, "learning_rate": 0.00013692458740354656, "loss": 1.3435, "step": 8018 }, { "epoch": 0.37995735607675907, "grad_norm": 0.71875, "learning_rate": 0.00013691074683403967, "loss": 0.7874, "step": 8019 }, { "epoch": 0.38000473821369346, "grad_norm": 0.474609375, "learning_rate": 0.00013689690544589172, "loss": 0.0587, "step": 8020 }, { "epoch": 0.3800521203506278, "grad_norm": 0.6796875, "learning_rate": 0.00013688306323940972, "loss": 0.8233, "step": 8021 }, { "epoch": 0.3800995024875622, "grad_norm": 0.70703125, "learning_rate": 0.00013686922021490064, "loss": 1.1287, "step": 8022 }, { "epoch": 0.3801468846244966, "grad_norm": 0.76171875, "learning_rate": 0.00013685537637267157, "loss": 1.0793, "step": 8023 }, { "epoch": 0.3801942667614309, "grad_norm": 0.57421875, "learning_rate": 0.0001368415317130295, "loss": 0.697, "step": 8024 }, { "epoch": 0.3802416488983653, "grad_norm": 0.26953125, "learning_rate": 0.0001368276862362815, "loss": 0.0584, "step": 8025 }, { "epoch": 0.3802890310352997, "grad_norm": 0.57421875, "learning_rate": 0.0001368138399427346, "loss": 0.984, "step": 8026 }, { "epoch": 0.38033641317223404, "grad_norm": 0.8515625, "learning_rate": 0.000136799992832696, "loss": 0.554, "step": 8027 }, { "epoch": 0.38038379530916844, "grad_norm": 0.5546875, "learning_rate": 0.00013678614490647271, "loss": 1.06, "step": 8028 }, { "epoch": 0.38043117744610283, "grad_norm": 0.921875, "learning_rate": 0.00013677229616437193, "loss": 0.3189, "step": 8029 }, { "epoch": 0.38047855958303717, "grad_norm": 0.7109375, "learning_rate": 0.0001367584466067008, "loss": 0.7537, "step": 8030 }, { "epoch": 0.38052594171997156, "grad_norm": 0.609375, "learning_rate": 0.00013674459623376646, "loss": 0.9905, "step": 8031 }, { "epoch": 0.38057332385690595, "grad_norm": 0.62890625, "learning_rate": 0.0001367307450458761, "loss": 0.979, "step": 8032 }, { "epoch": 0.38062070599384035, "grad_norm": 0.294921875, "learning_rate": 0.000136716893043337, "loss": 0.0686, "step": 8033 }, { "epoch": 0.3806680881307747, "grad_norm": 1.1484375, "learning_rate": 0.00013670304022645626, "loss": 0.0448, "step": 8034 }, { "epoch": 0.3807154702677091, "grad_norm": 0.75, "learning_rate": 0.00013668918659554122, "loss": 0.9802, "step": 8035 }, { "epoch": 0.38076285240464347, "grad_norm": 0.82421875, "learning_rate": 0.00013667533215089907, "loss": 0.9204, "step": 8036 }, { "epoch": 0.3808102345415778, "grad_norm": 0.91015625, "learning_rate": 0.00013666147689283712, "loss": 1.2561, "step": 8037 }, { "epoch": 0.3808576166785122, "grad_norm": 0.39453125, "learning_rate": 0.00013664762082166268, "loss": 0.0745, "step": 8038 }, { "epoch": 0.3809049988154466, "grad_norm": 0.70703125, "learning_rate": 0.00013663376393768304, "loss": 1.1572, "step": 8039 }, { "epoch": 0.38095238095238093, "grad_norm": 0.58203125, "learning_rate": 0.00013661990624120552, "loss": 0.8301, "step": 8040 }, { "epoch": 0.3809997630893153, "grad_norm": 0.86328125, "learning_rate": 0.0001366060477325375, "loss": 1.0107, "step": 8041 }, { "epoch": 0.3810471452262497, "grad_norm": 0.56640625, "learning_rate": 0.00013659218841198634, "loss": 0.5433, "step": 8042 }, { "epoch": 0.38109452736318405, "grad_norm": 0.625, "learning_rate": 0.00013657832827985942, "loss": 1.0698, "step": 8043 }, { "epoch": 0.38114190950011845, "grad_norm": 0.2421875, "learning_rate": 0.00013656446733646414, "loss": 0.0785, "step": 8044 }, { "epoch": 0.38118929163705284, "grad_norm": 0.56640625, "learning_rate": 0.00013655060558210788, "loss": 1.1157, "step": 8045 }, { "epoch": 0.38123667377398723, "grad_norm": 0.8125, "learning_rate": 0.0001365367430170982, "loss": 1.1293, "step": 8046 }, { "epoch": 0.38128405591092157, "grad_norm": 0.1318359375, "learning_rate": 0.0001365228796417424, "loss": 0.0096, "step": 8047 }, { "epoch": 0.38133143804785596, "grad_norm": 0.63671875, "learning_rate": 0.00013650901545634805, "loss": 0.27, "step": 8048 }, { "epoch": 0.38137882018479036, "grad_norm": 0.234375, "learning_rate": 0.00013649515046122265, "loss": 0.0726, "step": 8049 }, { "epoch": 0.3814262023217247, "grad_norm": 0.055908203125, "learning_rate": 0.0001364812846566737, "loss": 0.0058, "step": 8050 }, { "epoch": 0.3814735844586591, "grad_norm": 0.51953125, "learning_rate": 0.00013646741804300866, "loss": 1.2114, "step": 8051 }, { "epoch": 0.3815209665955935, "grad_norm": 0.703125, "learning_rate": 0.00013645355062053515, "loss": 1.4249, "step": 8052 }, { "epoch": 0.3815683487325278, "grad_norm": 0.44140625, "learning_rate": 0.0001364396823895607, "loss": 0.911, "step": 8053 }, { "epoch": 0.3816157308694622, "grad_norm": 0.63671875, "learning_rate": 0.00013642581335039295, "loss": 0.7898, "step": 8054 }, { "epoch": 0.3816631130063966, "grad_norm": 0.146484375, "learning_rate": 0.00013641194350333944, "loss": 0.0948, "step": 8055 }, { "epoch": 0.38171049514333094, "grad_norm": 0.7890625, "learning_rate": 0.0001363980728487078, "loss": 1.5226, "step": 8056 }, { "epoch": 0.38175787728026533, "grad_norm": 0.5546875, "learning_rate": 0.0001363842013868057, "loss": 0.9883, "step": 8057 }, { "epoch": 0.3818052594171997, "grad_norm": 0.58984375, "learning_rate": 0.00013637032911794074, "loss": 0.7916, "step": 8058 }, { "epoch": 0.38185264155413406, "grad_norm": 0.63671875, "learning_rate": 0.00013635645604242064, "loss": 1.3916, "step": 8059 }, { "epoch": 0.38190002369106846, "grad_norm": 0.97265625, "learning_rate": 0.00013634258216055304, "loss": 0.1157, "step": 8060 }, { "epoch": 0.38194740582800285, "grad_norm": 0.98828125, "learning_rate": 0.0001363287074726457, "loss": 0.939, "step": 8061 }, { "epoch": 0.38199478796493724, "grad_norm": 0.61328125, "learning_rate": 0.00013631483197900631, "loss": 0.6919, "step": 8062 }, { "epoch": 0.3820421701018716, "grad_norm": 0.69140625, "learning_rate": 0.00013630095567994266, "loss": 0.7033, "step": 8063 }, { "epoch": 0.382089552238806, "grad_norm": 0.51171875, "learning_rate": 0.00013628707857576245, "loss": 0.672, "step": 8064 }, { "epoch": 0.38213693437574037, "grad_norm": 0.69921875, "learning_rate": 0.0001362732006667735, "loss": 1.0353, "step": 8065 }, { "epoch": 0.3821843165126747, "grad_norm": 0.74609375, "learning_rate": 0.0001362593219532836, "loss": 0.8868, "step": 8066 }, { "epoch": 0.3822316986496091, "grad_norm": 0.65625, "learning_rate": 0.00013624544243560056, "loss": 0.1084, "step": 8067 }, { "epoch": 0.3822790807865435, "grad_norm": 0.79296875, "learning_rate": 0.0001362315621140322, "loss": 0.9786, "step": 8068 }, { "epoch": 0.38232646292347783, "grad_norm": 0.71875, "learning_rate": 0.00013621768098888638, "loss": 0.8478, "step": 8069 }, { "epoch": 0.3823738450604122, "grad_norm": 0.80859375, "learning_rate": 0.00013620379906047097, "loss": 1.2938, "step": 8070 }, { "epoch": 0.3824212271973466, "grad_norm": 0.25390625, "learning_rate": 0.00013618991632909387, "loss": 0.1151, "step": 8071 }, { "epoch": 0.38246860933428095, "grad_norm": 0.73828125, "learning_rate": 0.000136176032795063, "loss": 0.6844, "step": 8072 }, { "epoch": 0.38251599147121534, "grad_norm": 0.6171875, "learning_rate": 0.00013616214845868624, "loss": 0.9966, "step": 8073 }, { "epoch": 0.38256337360814974, "grad_norm": 0.69140625, "learning_rate": 0.00013614826332027154, "loss": 1.0518, "step": 8074 }, { "epoch": 0.38261075574508413, "grad_norm": 0.6328125, "learning_rate": 0.00013613437738012684, "loss": 1.0863, "step": 8075 }, { "epoch": 0.38265813788201847, "grad_norm": 0.0037689208984375, "learning_rate": 0.00013612049063856015, "loss": 0.0003, "step": 8076 }, { "epoch": 0.38270552001895286, "grad_norm": 0.67578125, "learning_rate": 0.00013610660309587948, "loss": 1.4448, "step": 8077 }, { "epoch": 0.38275290215588725, "grad_norm": 0.79296875, "learning_rate": 0.0001360927147523928, "loss": 1.0429, "step": 8078 }, { "epoch": 0.3828002842928216, "grad_norm": 0.59765625, "learning_rate": 0.00013607882560840812, "loss": 0.922, "step": 8079 }, { "epoch": 0.382847666429756, "grad_norm": 0.16796875, "learning_rate": 0.00013606493566423357, "loss": 0.0116, "step": 8080 }, { "epoch": 0.3828950485666904, "grad_norm": 0.6171875, "learning_rate": 0.00013605104492017713, "loss": 1.015, "step": 8081 }, { "epoch": 0.3829424307036247, "grad_norm": 0.6015625, "learning_rate": 0.00013603715337654694, "loss": 0.0668, "step": 8082 }, { "epoch": 0.3829898128405591, "grad_norm": 0.640625, "learning_rate": 0.00013602326103365103, "loss": 0.4468, "step": 8083 }, { "epoch": 0.3830371949774935, "grad_norm": 0.58984375, "learning_rate": 0.00013600936789179757, "loss": 1.3271, "step": 8084 }, { "epoch": 0.38308457711442784, "grad_norm": 0.72265625, "learning_rate": 0.00013599547395129472, "loss": 1.0323, "step": 8085 }, { "epoch": 0.38313195925136223, "grad_norm": 0.9765625, "learning_rate": 0.00013598157921245054, "loss": 0.4701, "step": 8086 }, { "epoch": 0.3831793413882966, "grad_norm": 0.671875, "learning_rate": 0.0001359676836755733, "loss": 0.132, "step": 8087 }, { "epoch": 0.38322672352523096, "grad_norm": 0.859375, "learning_rate": 0.00013595378734097114, "loss": 1.1943, "step": 8088 }, { "epoch": 0.38327410566216535, "grad_norm": 0.263671875, "learning_rate": 0.00013593989020895228, "loss": 0.0516, "step": 8089 }, { "epoch": 0.38332148779909975, "grad_norm": 0.78125, "learning_rate": 0.00013592599227982491, "loss": 0.1347, "step": 8090 }, { "epoch": 0.38336886993603414, "grad_norm": 0.67578125, "learning_rate": 0.00013591209355389734, "loss": 1.0963, "step": 8091 }, { "epoch": 0.3834162520729685, "grad_norm": 0.8203125, "learning_rate": 0.00013589819403147775, "loss": 0.8227, "step": 8092 }, { "epoch": 0.38346363420990287, "grad_norm": 0.609375, "learning_rate": 0.00013588429371287449, "loss": 1.1687, "step": 8093 }, { "epoch": 0.38351101634683726, "grad_norm": 0.7734375, "learning_rate": 0.00013587039259839578, "loss": 1.2914, "step": 8094 }, { "epoch": 0.3835583984837716, "grad_norm": 0.63671875, "learning_rate": 0.00013585649068835, "loss": 0.8823, "step": 8095 }, { "epoch": 0.383605780620706, "grad_norm": 0.9921875, "learning_rate": 0.00013584258798304542, "loss": 0.7378, "step": 8096 }, { "epoch": 0.3836531627576404, "grad_norm": 0.73046875, "learning_rate": 0.00013582868448279044, "loss": 1.3109, "step": 8097 }, { "epoch": 0.3837005448945747, "grad_norm": 0.5859375, "learning_rate": 0.00013581478018789337, "loss": 0.4641, "step": 8098 }, { "epoch": 0.3837479270315091, "grad_norm": 0.578125, "learning_rate": 0.00013580087509866266, "loss": 0.8215, "step": 8099 }, { "epoch": 0.3837953091684435, "grad_norm": 0.035400390625, "learning_rate": 0.00013578696921540665, "loss": 0.0013, "step": 8100 }, { "epoch": 0.38384269130537785, "grad_norm": 0.419921875, "learning_rate": 0.0001357730625384338, "loss": 0.022, "step": 8101 }, { "epoch": 0.38389007344231224, "grad_norm": 0.33984375, "learning_rate": 0.0001357591550680525, "loss": 0.0354, "step": 8102 }, { "epoch": 0.38393745557924663, "grad_norm": 0.62109375, "learning_rate": 0.00013574524680457127, "loss": 0.6458, "step": 8103 }, { "epoch": 0.383984837716181, "grad_norm": 0.65234375, "learning_rate": 0.00013573133774829853, "loss": 1.2551, "step": 8104 }, { "epoch": 0.38403221985311536, "grad_norm": 0.765625, "learning_rate": 0.00013571742789954277, "loss": 1.129, "step": 8105 }, { "epoch": 0.38407960199004976, "grad_norm": 0.6484375, "learning_rate": 0.00013570351725861247, "loss": 1.4974, "step": 8106 }, { "epoch": 0.38412698412698415, "grad_norm": 0.333984375, "learning_rate": 0.00013568960582581623, "loss": 0.1634, "step": 8107 }, { "epoch": 0.3841743662639185, "grad_norm": 0.1552734375, "learning_rate": 0.00013567569360146254, "loss": 0.0321, "step": 8108 }, { "epoch": 0.3842217484008529, "grad_norm": 0.859375, "learning_rate": 0.00013566178058585995, "loss": 0.16, "step": 8109 }, { "epoch": 0.3842691305377873, "grad_norm": 0.484375, "learning_rate": 0.00013564786677931705, "loss": 0.9219, "step": 8110 }, { "epoch": 0.3843165126747216, "grad_norm": 0.609375, "learning_rate": 0.00013563395218214245, "loss": 0.9605, "step": 8111 }, { "epoch": 0.384363894811656, "grad_norm": 0.08251953125, "learning_rate": 0.00013562003679464476, "loss": 0.0044, "step": 8112 }, { "epoch": 0.3844112769485904, "grad_norm": 0.09326171875, "learning_rate": 0.00013560612061713255, "loss": 0.0093, "step": 8113 }, { "epoch": 0.38445865908552473, "grad_norm": 0.310546875, "learning_rate": 0.00013559220364991453, "loss": 0.1572, "step": 8114 }, { "epoch": 0.3845060412224591, "grad_norm": 0.1865234375, "learning_rate": 0.00013557828589329937, "loss": 0.1326, "step": 8115 }, { "epoch": 0.3845534233593935, "grad_norm": 0.71484375, "learning_rate": 0.0001355643673475957, "loss": 0.883, "step": 8116 }, { "epoch": 0.38460080549632786, "grad_norm": 0.78515625, "learning_rate": 0.00013555044801311225, "loss": 1.2444, "step": 8117 }, { "epoch": 0.38464818763326225, "grad_norm": 0.75390625, "learning_rate": 0.0001355365278901577, "loss": 0.9022, "step": 8118 }, { "epoch": 0.38469556977019664, "grad_norm": 0.859375, "learning_rate": 0.00013552260697904088, "loss": 0.7201, "step": 8119 }, { "epoch": 0.38474295190713104, "grad_norm": 2.265625, "learning_rate": 0.00013550868528007043, "loss": 1.5274, "step": 8120 }, { "epoch": 0.3847903340440654, "grad_norm": 0.8046875, "learning_rate": 0.00013549476279355518, "loss": 0.8623, "step": 8121 }, { "epoch": 0.38483771618099977, "grad_norm": 0.67578125, "learning_rate": 0.00013548083951980388, "loss": 1.3328, "step": 8122 }, { "epoch": 0.38488509831793416, "grad_norm": 0.1962890625, "learning_rate": 0.00013546691545912538, "loss": 0.1407, "step": 8123 }, { "epoch": 0.3849324804548685, "grad_norm": 0.66015625, "learning_rate": 0.00013545299061182842, "loss": 1.1061, "step": 8124 }, { "epoch": 0.3849798625918029, "grad_norm": 0.796875, "learning_rate": 0.00013543906497822194, "loss": 1.2707, "step": 8125 }, { "epoch": 0.3850272447287373, "grad_norm": 0.2158203125, "learning_rate": 0.00013542513855861475, "loss": 0.1502, "step": 8126 }, { "epoch": 0.3850746268656716, "grad_norm": 0.58203125, "learning_rate": 0.0001354112113533157, "loss": 0.1422, "step": 8127 }, { "epoch": 0.385122009002606, "grad_norm": 0.6328125, "learning_rate": 0.0001353972833626337, "loss": 0.8849, "step": 8128 }, { "epoch": 0.3851693911395404, "grad_norm": 0.431640625, "learning_rate": 0.00013538335458687764, "loss": 0.0291, "step": 8129 }, { "epoch": 0.38521677327647474, "grad_norm": 0.7421875, "learning_rate": 0.0001353694250263565, "loss": 0.7615, "step": 8130 }, { "epoch": 0.38526415541340914, "grad_norm": 0.6484375, "learning_rate": 0.00013535549468137917, "loss": 0.8799, "step": 8131 }, { "epoch": 0.38531153755034353, "grad_norm": 0.70703125, "learning_rate": 0.00013534156355225462, "loss": 1.2742, "step": 8132 }, { "epoch": 0.3853589196872779, "grad_norm": 0.287109375, "learning_rate": 0.00013532763163929184, "loss": 0.1134, "step": 8133 }, { "epoch": 0.38540630182421226, "grad_norm": 0.53515625, "learning_rate": 0.00013531369894279985, "loss": 0.9775, "step": 8134 }, { "epoch": 0.38545368396114665, "grad_norm": 0.6640625, "learning_rate": 0.0001352997654630876, "loss": 1.1179, "step": 8135 }, { "epoch": 0.38550106609808105, "grad_norm": 0.54296875, "learning_rate": 0.00013528583120046413, "loss": 0.415, "step": 8136 }, { "epoch": 0.3855484482350154, "grad_norm": 0.87890625, "learning_rate": 0.00013527189615523854, "loss": 1.18, "step": 8137 }, { "epoch": 0.3855958303719498, "grad_norm": 0.54296875, "learning_rate": 0.00013525796032771986, "loss": 0.4005, "step": 8138 }, { "epoch": 0.38564321250888417, "grad_norm": 0.5546875, "learning_rate": 0.0001352440237182172, "loss": 0.387, "step": 8139 }, { "epoch": 0.3856905946458185, "grad_norm": 0.61328125, "learning_rate": 0.00013523008632703958, "loss": 0.237, "step": 8140 }, { "epoch": 0.3857379767827529, "grad_norm": 0.31640625, "learning_rate": 0.0001352161481544962, "loss": 0.0283, "step": 8141 }, { "epoch": 0.3857853589196873, "grad_norm": 0.1318359375, "learning_rate": 0.00013520220920089617, "loss": 0.0253, "step": 8142 }, { "epoch": 0.38583274105662163, "grad_norm": 0.6484375, "learning_rate": 0.00013518826946654864, "loss": 1.2714, "step": 8143 }, { "epoch": 0.385880123193556, "grad_norm": 0.205078125, "learning_rate": 0.00013517432895176275, "loss": 0.0174, "step": 8144 }, { "epoch": 0.3859275053304904, "grad_norm": 0.56640625, "learning_rate": 0.0001351603876568477, "loss": 1.0646, "step": 8145 }, { "epoch": 0.38597488746742475, "grad_norm": 0.8359375, "learning_rate": 0.00013514644558211276, "loss": 0.5291, "step": 8146 }, { "epoch": 0.38602226960435915, "grad_norm": 0.62109375, "learning_rate": 0.00013513250272786703, "loss": 0.7843, "step": 8147 }, { "epoch": 0.38606965174129354, "grad_norm": 0.8984375, "learning_rate": 0.00013511855909441984, "loss": 0.804, "step": 8148 }, { "epoch": 0.38611703387822793, "grad_norm": 0.71484375, "learning_rate": 0.00013510461468208042, "loss": 1.0318, "step": 8149 }, { "epoch": 0.38616441601516227, "grad_norm": 0.69140625, "learning_rate": 0.00013509066949115802, "loss": 1.0913, "step": 8150 }, { "epoch": 0.38621179815209666, "grad_norm": 0.341796875, "learning_rate": 0.00013507672352196197, "loss": 0.0499, "step": 8151 }, { "epoch": 0.38625918028903106, "grad_norm": 0.71875, "learning_rate": 0.00013506277677480155, "loss": 0.948, "step": 8152 }, { "epoch": 0.3863065624259654, "grad_norm": 0.80859375, "learning_rate": 0.0001350488292499861, "loss": 0.6322, "step": 8153 }, { "epoch": 0.3863539445628998, "grad_norm": 0.7265625, "learning_rate": 0.00013503488094782494, "loss": 0.7711, "step": 8154 }, { "epoch": 0.3864013266998342, "grad_norm": 0.484375, "learning_rate": 0.0001350209318686274, "loss": 0.8854, "step": 8155 }, { "epoch": 0.3864487088367685, "grad_norm": 0.59765625, "learning_rate": 0.0001350069820127029, "loss": 1.0564, "step": 8156 }, { "epoch": 0.3864960909737029, "grad_norm": 0.671875, "learning_rate": 0.00013499303138036087, "loss": 0.8249, "step": 8157 }, { "epoch": 0.3865434731106373, "grad_norm": 0.51171875, "learning_rate": 0.00013497907997191065, "loss": 0.7084, "step": 8158 }, { "epoch": 0.38659085524757164, "grad_norm": 0.3046875, "learning_rate": 0.00013496512778766174, "loss": 0.0291, "step": 8159 }, { "epoch": 0.38663823738450603, "grad_norm": 0.5703125, "learning_rate": 0.00013495117482792348, "loss": 0.7449, "step": 8160 }, { "epoch": 0.3866856195214404, "grad_norm": 0.7109375, "learning_rate": 0.0001349372210930054, "loss": 1.0236, "step": 8161 }, { "epoch": 0.3867330016583748, "grad_norm": 0.87109375, "learning_rate": 0.000134923266583217, "loss": 0.2611, "step": 8162 }, { "epoch": 0.38678038379530916, "grad_norm": 0.1669921875, "learning_rate": 0.0001349093112988677, "loss": 0.0238, "step": 8163 }, { "epoch": 0.38682776593224355, "grad_norm": 0.703125, "learning_rate": 0.0001348953552402671, "loss": 0.9971, "step": 8164 }, { "epoch": 0.38687514806917794, "grad_norm": 0.69921875, "learning_rate": 0.0001348813984077247, "loss": 1.1173, "step": 8165 }, { "epoch": 0.3869225302061123, "grad_norm": 0.69140625, "learning_rate": 0.00013486744080155, "loss": 1.029, "step": 8166 }, { "epoch": 0.3869699123430467, "grad_norm": 0.63671875, "learning_rate": 0.00013485348242205263, "loss": 1.1005, "step": 8167 }, { "epoch": 0.38701729447998107, "grad_norm": 0.671875, "learning_rate": 0.0001348395232695421, "loss": 0.6973, "step": 8168 }, { "epoch": 0.3870646766169154, "grad_norm": 0.578125, "learning_rate": 0.0001348255633443281, "loss": 1.0075, "step": 8169 }, { "epoch": 0.3871120587538498, "grad_norm": 0.5390625, "learning_rate": 0.00013481160264672016, "loss": 0.0349, "step": 8170 }, { "epoch": 0.3871594408907842, "grad_norm": 0.6328125, "learning_rate": 0.00013479764117702798, "loss": 1.034, "step": 8171 }, { "epoch": 0.3872068230277185, "grad_norm": 0.62890625, "learning_rate": 0.0001347836789355612, "loss": 0.5948, "step": 8172 }, { "epoch": 0.3872542051646529, "grad_norm": 0.0791015625, "learning_rate": 0.00013476971592262944, "loss": 0.006, "step": 8173 }, { "epoch": 0.3873015873015873, "grad_norm": 0.57421875, "learning_rate": 0.00013475575213854244, "loss": 0.8148, "step": 8174 }, { "epoch": 0.38734896943852165, "grad_norm": 0.69921875, "learning_rate": 0.00013474178758360984, "loss": 0.9542, "step": 8175 }, { "epoch": 0.38739635157545604, "grad_norm": 0.62109375, "learning_rate": 0.00013472782225814142, "loss": 0.7074, "step": 8176 }, { "epoch": 0.38744373371239044, "grad_norm": 0.5, "learning_rate": 0.0001347138561624469, "loss": 1.0238, "step": 8177 }, { "epoch": 0.38749111584932483, "grad_norm": 0.62890625, "learning_rate": 0.00013469988929683602, "loss": 0.2586, "step": 8178 }, { "epoch": 0.38753849798625917, "grad_norm": 0.64453125, "learning_rate": 0.00013468592166161855, "loss": 0.8672, "step": 8179 }, { "epoch": 0.38758588012319356, "grad_norm": 0.69921875, "learning_rate": 0.0001346719532571043, "loss": 1.4888, "step": 8180 }, { "epoch": 0.38763326226012795, "grad_norm": 0.765625, "learning_rate": 0.00013465798408360304, "loss": 0.8679, "step": 8181 }, { "epoch": 0.3876806443970623, "grad_norm": 0.703125, "learning_rate": 0.0001346440141414246, "loss": 1.1073, "step": 8182 }, { "epoch": 0.3877280265339967, "grad_norm": 0.5859375, "learning_rate": 0.00013463004343087885, "loss": 0.7844, "step": 8183 }, { "epoch": 0.3877754086709311, "grad_norm": 0.6953125, "learning_rate": 0.00013461607195227563, "loss": 0.7868, "step": 8184 }, { "epoch": 0.3878227908078654, "grad_norm": 0.21484375, "learning_rate": 0.0001346020997059248, "loss": 0.0389, "step": 8185 }, { "epoch": 0.3878701729447998, "grad_norm": 0.275390625, "learning_rate": 0.00013458812669213624, "loss": 0.1952, "step": 8186 }, { "epoch": 0.3879175550817342, "grad_norm": 0.6640625, "learning_rate": 0.00013457415291121984, "loss": 1.0734, "step": 8187 }, { "epoch": 0.38796493721866854, "grad_norm": 0.63671875, "learning_rate": 0.00013456017836348561, "loss": 0.6853, "step": 8188 }, { "epoch": 0.38801231935560293, "grad_norm": 0.201171875, "learning_rate": 0.00013454620304924342, "loss": 0.1069, "step": 8189 }, { "epoch": 0.3880597014925373, "grad_norm": 0.65625, "learning_rate": 0.00013453222696880324, "loss": 0.9593, "step": 8190 }, { "epoch": 0.3881070836294717, "grad_norm": 0.63671875, "learning_rate": 0.00013451825012247506, "loss": 1.3341, "step": 8191 }, { "epoch": 0.38815446576640605, "grad_norm": 0.17578125, "learning_rate": 0.00013450427251056885, "loss": 0.0085, "step": 8192 }, { "epoch": 0.38820184790334045, "grad_norm": 0.78515625, "learning_rate": 0.0001344902941333946, "loss": 0.3653, "step": 8193 }, { "epoch": 0.38824923004027484, "grad_norm": 0.6640625, "learning_rate": 0.00013447631499126242, "loss": 1.182, "step": 8194 }, { "epoch": 0.3882966121772092, "grad_norm": 0.41796875, "learning_rate": 0.00013446233508448228, "loss": 0.2139, "step": 8195 }, { "epoch": 0.38834399431414357, "grad_norm": 0.84765625, "learning_rate": 0.00013444835441336423, "loss": 0.9673, "step": 8196 }, { "epoch": 0.38839137645107796, "grad_norm": 0.66015625, "learning_rate": 0.00013443437297821836, "loss": 1.0825, "step": 8197 }, { "epoch": 0.3884387585880123, "grad_norm": 0.84765625, "learning_rate": 0.00013442039077935482, "loss": 1.0597, "step": 8198 }, { "epoch": 0.3884861407249467, "grad_norm": 0.71875, "learning_rate": 0.00013440640781708365, "loss": 0.9482, "step": 8199 }, { "epoch": 0.3885335228618811, "grad_norm": 0.79296875, "learning_rate": 0.00013439242409171503, "loss": 0.6872, "step": 8200 }, { "epoch": 0.3885809049988154, "grad_norm": 0.021484375, "learning_rate": 0.00013437843960355903, "loss": 0.0011, "step": 8201 }, { "epoch": 0.3886282871357498, "grad_norm": 0.283203125, "learning_rate": 0.00013436445435292588, "loss": 0.1094, "step": 8202 }, { "epoch": 0.3886756692726842, "grad_norm": 0.5, "learning_rate": 0.00013435046834012575, "loss": 0.1463, "step": 8203 }, { "epoch": 0.38872305140961855, "grad_norm": 0.67578125, "learning_rate": 0.0001343364815654688, "loss": 0.9555, "step": 8204 }, { "epoch": 0.38877043354655294, "grad_norm": 0.73828125, "learning_rate": 0.00013432249402926526, "loss": 0.7445, "step": 8205 }, { "epoch": 0.38881781568348733, "grad_norm": 0.46484375, "learning_rate": 0.0001343085057318254, "loss": 0.3253, "step": 8206 }, { "epoch": 0.3888651978204217, "grad_norm": 0.08935546875, "learning_rate": 0.0001342945166734594, "loss": 0.0077, "step": 8207 }, { "epoch": 0.38891257995735606, "grad_norm": 0.302734375, "learning_rate": 0.00013428052685447755, "loss": 0.0575, "step": 8208 }, { "epoch": 0.38895996209429046, "grad_norm": 0.9296875, "learning_rate": 0.0001342665362751901, "loss": 0.2605, "step": 8209 }, { "epoch": 0.38900734423122485, "grad_norm": 0.5859375, "learning_rate": 0.0001342525449359074, "loss": 0.997, "step": 8210 }, { "epoch": 0.3890547263681592, "grad_norm": 0.74609375, "learning_rate": 0.00013423855283693973, "loss": 1.1376, "step": 8211 }, { "epoch": 0.3891021085050936, "grad_norm": 0.5234375, "learning_rate": 0.00013422455997859744, "loss": 0.828, "step": 8212 }, { "epoch": 0.389149490642028, "grad_norm": 0.7578125, "learning_rate": 0.00013421056636119086, "loss": 1.1193, "step": 8213 }, { "epoch": 0.3891968727789623, "grad_norm": 0.34375, "learning_rate": 0.00013419657198503039, "loss": 0.1448, "step": 8214 }, { "epoch": 0.3892442549158967, "grad_norm": 0.0059814453125, "learning_rate": 0.00013418257685042634, "loss": 0.0004, "step": 8215 }, { "epoch": 0.3892916370528311, "grad_norm": 0.56640625, "learning_rate": 0.00013416858095768915, "loss": 0.1043, "step": 8216 }, { "epoch": 0.38933901918976543, "grad_norm": 0.6640625, "learning_rate": 0.0001341545843071292, "loss": 1.0105, "step": 8217 }, { "epoch": 0.3893864013266998, "grad_norm": 0.7421875, "learning_rate": 0.000134140586899057, "loss": 1.4491, "step": 8218 }, { "epoch": 0.3894337834636342, "grad_norm": 0.58203125, "learning_rate": 0.00013412658873378293, "loss": 1.2056, "step": 8219 }, { "epoch": 0.3894811656005686, "grad_norm": 0.57421875, "learning_rate": 0.00013411258981161744, "loss": 0.9109, "step": 8220 }, { "epoch": 0.38952854773750295, "grad_norm": 0.52734375, "learning_rate": 0.00013409859013287107, "loss": 0.8034, "step": 8221 }, { "epoch": 0.38957592987443734, "grad_norm": 0.71484375, "learning_rate": 0.0001340845896978543, "loss": 1.1814, "step": 8222 }, { "epoch": 0.38962331201137174, "grad_norm": 0.8671875, "learning_rate": 0.00013407058850687764, "loss": 1.0716, "step": 8223 }, { "epoch": 0.3896706941483061, "grad_norm": 0.56640625, "learning_rate": 0.0001340565865602516, "loss": 0.7023, "step": 8224 }, { "epoch": 0.38971807628524047, "grad_norm": 0.2158203125, "learning_rate": 0.00013404258385828674, "loss": 0.1406, "step": 8225 }, { "epoch": 0.38976545842217486, "grad_norm": 0.7421875, "learning_rate": 0.00013402858040129363, "loss": 1.024, "step": 8226 }, { "epoch": 0.3898128405591092, "grad_norm": 0.6171875, "learning_rate": 0.0001340145761895829, "loss": 0.55, "step": 8227 }, { "epoch": 0.3898602226960436, "grad_norm": 0.63671875, "learning_rate": 0.00013400057122346505, "loss": 0.8254, "step": 8228 }, { "epoch": 0.389907604832978, "grad_norm": 0.6015625, "learning_rate": 0.00013398656550325078, "loss": 0.1839, "step": 8229 }, { "epoch": 0.3899549869699123, "grad_norm": 0.78125, "learning_rate": 0.00013397255902925065, "loss": 0.7558, "step": 8230 }, { "epoch": 0.3900023691068467, "grad_norm": 0.68359375, "learning_rate": 0.00013395855180177535, "loss": 0.9242, "step": 8231 }, { "epoch": 0.3900497512437811, "grad_norm": 0.0245361328125, "learning_rate": 0.00013394454382113557, "loss": 0.0006, "step": 8232 }, { "epoch": 0.39009713338071544, "grad_norm": 0.62890625, "learning_rate": 0.00013393053508764196, "loss": 0.9738, "step": 8233 }, { "epoch": 0.39014451551764984, "grad_norm": 0.052734375, "learning_rate": 0.00013391652560160522, "loss": 0.0041, "step": 8234 }, { "epoch": 0.39019189765458423, "grad_norm": 0.0034027099609375, "learning_rate": 0.00013390251536333605, "loss": 0.0002, "step": 8235 }, { "epoch": 0.3902392797915186, "grad_norm": 0.6796875, "learning_rate": 0.00013388850437314522, "loss": 0.9698, "step": 8236 }, { "epoch": 0.39028666192845296, "grad_norm": 0.87109375, "learning_rate": 0.00013387449263134346, "loss": 0.0449, "step": 8237 }, { "epoch": 0.39033404406538735, "grad_norm": 0.66015625, "learning_rate": 0.00013386048013824153, "loss": 1.0048, "step": 8238 }, { "epoch": 0.39038142620232175, "grad_norm": 0.65625, "learning_rate": 0.0001338464668941502, "loss": 1.4565, "step": 8239 }, { "epoch": 0.3904288083392561, "grad_norm": 1.1015625, "learning_rate": 0.00013383245289938031, "loss": 0.6494, "step": 8240 }, { "epoch": 0.3904761904761905, "grad_norm": 0.10546875, "learning_rate": 0.00013381843815424264, "loss": 0.0027, "step": 8241 }, { "epoch": 0.39052357261312487, "grad_norm": 0.546875, "learning_rate": 0.00013380442265904801, "loss": 0.8223, "step": 8242 }, { "epoch": 0.3905709547500592, "grad_norm": 0.78125, "learning_rate": 0.00013379040641410734, "loss": 0.9571, "step": 8243 }, { "epoch": 0.3906183368869936, "grad_norm": 1.1015625, "learning_rate": 0.0001337763894197314, "loss": 1.1093, "step": 8244 }, { "epoch": 0.390665719023928, "grad_norm": 0.6875, "learning_rate": 0.00013376237167623116, "loss": 1.0675, "step": 8245 }, { "epoch": 0.39071310116086233, "grad_norm": 0.75, "learning_rate": 0.00013374835318391746, "loss": 0.8834, "step": 8246 }, { "epoch": 0.3907604832977967, "grad_norm": 0.546875, "learning_rate": 0.00013373433394310124, "loss": 0.9922, "step": 8247 }, { "epoch": 0.3908078654347311, "grad_norm": 1.0078125, "learning_rate": 0.00013372031395409342, "loss": 0.3326, "step": 8248 }, { "epoch": 0.3908552475716655, "grad_norm": 0.671875, "learning_rate": 0.00013370629321720498, "loss": 0.9127, "step": 8249 }, { "epoch": 0.39090262970859985, "grad_norm": 0.38671875, "learning_rate": 0.00013369227173274682, "loss": 0.0458, "step": 8250 }, { "epoch": 0.39095001184553424, "grad_norm": 0.234375, "learning_rate": 0.00013367824950102997, "loss": 0.0229, "step": 8251 }, { "epoch": 0.39099739398246863, "grad_norm": 0.1865234375, "learning_rate": 0.0001336642265223654, "loss": 0.1301, "step": 8252 }, { "epoch": 0.39104477611940297, "grad_norm": 0.5625, "learning_rate": 0.0001336502027970642, "loss": 0.134, "step": 8253 }, { "epoch": 0.39109215825633736, "grad_norm": 0.7265625, "learning_rate": 0.0001336361783254373, "loss": 0.9979, "step": 8254 }, { "epoch": 0.39113954039327176, "grad_norm": 0.55859375, "learning_rate": 0.00013362215310779583, "loss": 1.2613, "step": 8255 }, { "epoch": 0.3911869225302061, "grad_norm": 0.125, "learning_rate": 0.00013360812714445077, "loss": 0.0034, "step": 8256 }, { "epoch": 0.3912343046671405, "grad_norm": 0.271484375, "learning_rate": 0.00013359410043571328, "loss": 0.1307, "step": 8257 }, { "epoch": 0.3912816868040749, "grad_norm": 0.56640625, "learning_rate": 0.0001335800729818944, "loss": 0.5249, "step": 8258 }, { "epoch": 0.3913290689410092, "grad_norm": 0.11767578125, "learning_rate": 0.00013356604478330527, "loss": 0.0075, "step": 8259 }, { "epoch": 0.3913764510779436, "grad_norm": 0.66796875, "learning_rate": 0.00013355201584025706, "loss": 1.3444, "step": 8260 }, { "epoch": 0.391423833214878, "grad_norm": 0.023193359375, "learning_rate": 0.00013353798615306086, "loss": 0.0017, "step": 8261 }, { "epoch": 0.39147121535181234, "grad_norm": 0.6484375, "learning_rate": 0.00013352395572202783, "loss": 1.1571, "step": 8262 }, { "epoch": 0.39151859748874673, "grad_norm": 0.462890625, "learning_rate": 0.00013350992454746918, "loss": 0.082, "step": 8263 }, { "epoch": 0.3915659796256811, "grad_norm": 0.78125, "learning_rate": 0.0001334958926296961, "loss": 1.4404, "step": 8264 }, { "epoch": 0.3916133617626155, "grad_norm": 0.98046875, "learning_rate": 0.0001334818599690198, "loss": 0.0723, "step": 8265 }, { "epoch": 0.39166074389954986, "grad_norm": 0.423828125, "learning_rate": 0.0001334678265657515, "loss": 0.2663, "step": 8266 }, { "epoch": 0.39170812603648425, "grad_norm": 0.2001953125, "learning_rate": 0.00013345379242020244, "loss": 0.1553, "step": 8267 }, { "epoch": 0.39175550817341864, "grad_norm": 0.1748046875, "learning_rate": 0.00013343975753268394, "loss": 0.0766, "step": 8268 }, { "epoch": 0.391802890310353, "grad_norm": 0.7265625, "learning_rate": 0.00013342572190350722, "loss": 0.6747, "step": 8269 }, { "epoch": 0.3918502724472874, "grad_norm": 0.68359375, "learning_rate": 0.00013341168553298357, "loss": 0.9877, "step": 8270 }, { "epoch": 0.39189765458422177, "grad_norm": 0.70703125, "learning_rate": 0.00013339764842142433, "loss": 1.0344, "step": 8271 }, { "epoch": 0.3919450367211561, "grad_norm": 0.05712890625, "learning_rate": 0.00013338361056914084, "loss": 0.0048, "step": 8272 }, { "epoch": 0.3919924188580905, "grad_norm": 0.6953125, "learning_rate": 0.00013336957197644441, "loss": 1.2426, "step": 8273 }, { "epoch": 0.3920398009950249, "grad_norm": 0.62890625, "learning_rate": 0.0001333555326436464, "loss": 0.9186, "step": 8274 }, { "epoch": 0.3920871831319592, "grad_norm": 0.58984375, "learning_rate": 0.00013334149257105822, "loss": 0.8823, "step": 8275 }, { "epoch": 0.3921345652688936, "grad_norm": 0.9296875, "learning_rate": 0.0001333274517589913, "loss": 1.0725, "step": 8276 }, { "epoch": 0.392181947405828, "grad_norm": 0.62890625, "learning_rate": 0.00013331341020775695, "loss": 0.9247, "step": 8277 }, { "epoch": 0.3922293295427624, "grad_norm": 0.69921875, "learning_rate": 0.00013329936791766665, "loss": 0.9583, "step": 8278 }, { "epoch": 0.39227671167969674, "grad_norm": 0.0037994384765625, "learning_rate": 0.00013328532488903185, "loss": 0.0002, "step": 8279 }, { "epoch": 0.39232409381663114, "grad_norm": 0.5078125, "learning_rate": 0.000133271281122164, "loss": 0.6199, "step": 8280 }, { "epoch": 0.39237147595356553, "grad_norm": 0.248046875, "learning_rate": 0.00013325723661737455, "loss": 0.0224, "step": 8281 }, { "epoch": 0.39241885809049987, "grad_norm": 0.76171875, "learning_rate": 0.00013324319137497504, "loss": 1.1767, "step": 8282 }, { "epoch": 0.39246624022743426, "grad_norm": 0.64453125, "learning_rate": 0.00013322914539527694, "loss": 0.775, "step": 8283 }, { "epoch": 0.39251362236436865, "grad_norm": 0.1767578125, "learning_rate": 0.00013321509867859178, "loss": 0.0219, "step": 8284 }, { "epoch": 0.392561004501303, "grad_norm": 0.640625, "learning_rate": 0.00013320105122523112, "loss": 0.9422, "step": 8285 }, { "epoch": 0.3926083866382374, "grad_norm": 0.62890625, "learning_rate": 0.0001331870030355065, "loss": 1.0467, "step": 8286 }, { "epoch": 0.3926557687751718, "grad_norm": 1.0234375, "learning_rate": 0.00013317295410972951, "loss": 0.3509, "step": 8287 }, { "epoch": 0.3927031509121061, "grad_norm": 0.765625, "learning_rate": 0.00013315890444821175, "loss": 1.0666, "step": 8288 }, { "epoch": 0.3927505330490405, "grad_norm": 0.314453125, "learning_rate": 0.00013314485405126477, "loss": 0.1586, "step": 8289 }, { "epoch": 0.3927979151859749, "grad_norm": 1.0703125, "learning_rate": 0.00013313080291920025, "loss": 0.3858, "step": 8290 }, { "epoch": 0.39284529732290924, "grad_norm": 0.89453125, "learning_rate": 0.00013311675105232983, "loss": 0.2896, "step": 8291 }, { "epoch": 0.39289267945984363, "grad_norm": 1.0625, "learning_rate": 0.00013310269845096516, "loss": 0.6466, "step": 8292 }, { "epoch": 0.392940061596778, "grad_norm": 0.48046875, "learning_rate": 0.00013308864511541786, "loss": 0.6098, "step": 8293 }, { "epoch": 0.3929874437337124, "grad_norm": 0.76171875, "learning_rate": 0.00013307459104599964, "loss": 1.1771, "step": 8294 }, { "epoch": 0.39303482587064675, "grad_norm": 0.67578125, "learning_rate": 0.00013306053624302227, "loss": 0.7478, "step": 8295 }, { "epoch": 0.39308220800758115, "grad_norm": 0.462890625, "learning_rate": 0.00013304648070679737, "loss": 0.0336, "step": 8296 }, { "epoch": 0.39312959014451554, "grad_norm": 0.62890625, "learning_rate": 0.00013303242443763675, "loss": 0.872, "step": 8297 }, { "epoch": 0.3931769722814499, "grad_norm": 0.60546875, "learning_rate": 0.00013301836743585214, "loss": 1.2149, "step": 8298 }, { "epoch": 0.39322435441838427, "grad_norm": 0.9140625, "learning_rate": 0.00013300430970175533, "loss": 1.2844, "step": 8299 }, { "epoch": 0.39327173655531866, "grad_norm": 0.74609375, "learning_rate": 0.00013299025123565808, "loss": 1.2886, "step": 8300 }, { "epoch": 0.393319118692253, "grad_norm": 0.25, "learning_rate": 0.00013297619203787216, "loss": 0.0843, "step": 8301 }, { "epoch": 0.3933665008291874, "grad_norm": 0.0289306640625, "learning_rate": 0.00013296213210870945, "loss": 0.0025, "step": 8302 }, { "epoch": 0.3934138829661218, "grad_norm": 0.8203125, "learning_rate": 0.0001329480714484818, "loss": 1.1182, "step": 8303 }, { "epoch": 0.3934612651030561, "grad_norm": 0.73828125, "learning_rate": 0.00013293401005750095, "loss": 1.0311, "step": 8304 }, { "epoch": 0.3935086472399905, "grad_norm": 0.1376953125, "learning_rate": 0.00013291994793607884, "loss": 0.0156, "step": 8305 }, { "epoch": 0.3935560293769249, "grad_norm": 0.1923828125, "learning_rate": 0.00013290588508452743, "loss": 0.1274, "step": 8306 }, { "epoch": 0.3936034115138593, "grad_norm": 0.59375, "learning_rate": 0.00013289182150315844, "loss": 0.3793, "step": 8307 }, { "epoch": 0.39365079365079364, "grad_norm": 0.65234375, "learning_rate": 0.00013287775719228394, "loss": 0.9794, "step": 8308 }, { "epoch": 0.39369817578772803, "grad_norm": 0.26171875, "learning_rate": 0.0001328636921522158, "loss": 0.1713, "step": 8309 }, { "epoch": 0.3937455579246624, "grad_norm": 0.77734375, "learning_rate": 0.00013284962638326596, "loss": 1.7904, "step": 8310 }, { "epoch": 0.39379294006159676, "grad_norm": 0.59375, "learning_rate": 0.00013283555988574642, "loss": 0.7938, "step": 8311 }, { "epoch": 0.39384032219853116, "grad_norm": 0.8125, "learning_rate": 0.00013282149265996912, "loss": 0.818, "step": 8312 }, { "epoch": 0.39388770433546555, "grad_norm": 0.1669921875, "learning_rate": 0.00013280742470624606, "loss": 0.1211, "step": 8313 }, { "epoch": 0.3939350864723999, "grad_norm": 0.361328125, "learning_rate": 0.00013279335602488926, "loss": 0.1617, "step": 8314 }, { "epoch": 0.3939824686093343, "grad_norm": 0.06640625, "learning_rate": 0.00013277928661621077, "loss": 0.0098, "step": 8315 }, { "epoch": 0.3940298507462687, "grad_norm": 0.578125, "learning_rate": 0.00013276521648052266, "loss": 0.802, "step": 8316 }, { "epoch": 0.394077232883203, "grad_norm": 0.4921875, "learning_rate": 0.0001327511456181369, "loss": 0.1753, "step": 8317 }, { "epoch": 0.3941246150201374, "grad_norm": 0.5390625, "learning_rate": 0.0001327370740293656, "loss": 1.001, "step": 8318 }, { "epoch": 0.3941719971570718, "grad_norm": 0.1962890625, "learning_rate": 0.0001327230017145209, "loss": 0.0278, "step": 8319 }, { "epoch": 0.39421937929400613, "grad_norm": 0.78125, "learning_rate": 0.00013270892867391486, "loss": 1.0463, "step": 8320 }, { "epoch": 0.3942667614309405, "grad_norm": 0.2314453125, "learning_rate": 0.00013269485490785963, "loss": 0.177, "step": 8321 }, { "epoch": 0.3943141435678749, "grad_norm": 0.5625, "learning_rate": 0.00013268078041666734, "loss": 0.2885, "step": 8322 }, { "epoch": 0.3943615257048093, "grad_norm": 0.70703125, "learning_rate": 0.00013266670520065013, "loss": 1.1419, "step": 8323 }, { "epoch": 0.39440890784174365, "grad_norm": 0.70703125, "learning_rate": 0.00013265262926012025, "loss": 1.1623, "step": 8324 }, { "epoch": 0.39445628997867804, "grad_norm": 0.1611328125, "learning_rate": 0.0001326385525953898, "loss": 0.0795, "step": 8325 }, { "epoch": 0.39450367211561244, "grad_norm": 0.734375, "learning_rate": 0.000132624475206771, "loss": 0.8408, "step": 8326 }, { "epoch": 0.3945510542525468, "grad_norm": 0.625, "learning_rate": 0.00013261039709457613, "loss": 0.3733, "step": 8327 }, { "epoch": 0.39459843638948117, "grad_norm": 0.244140625, "learning_rate": 0.00013259631825911735, "loss": 0.1416, "step": 8328 }, { "epoch": 0.39464581852641556, "grad_norm": 0.65234375, "learning_rate": 0.00013258223870070697, "loss": 0.8125, "step": 8329 }, { "epoch": 0.3946932006633499, "grad_norm": 0.47265625, "learning_rate": 0.00013256815841965723, "loss": 0.4684, "step": 8330 }, { "epoch": 0.3947405828002843, "grad_norm": 0.59375, "learning_rate": 0.00013255407741628045, "loss": 1.0509, "step": 8331 }, { "epoch": 0.3947879649372187, "grad_norm": 0.8671875, "learning_rate": 0.00013253999569088888, "loss": 0.8827, "step": 8332 }, { "epoch": 0.394835347074153, "grad_norm": 0.984375, "learning_rate": 0.00013252591324379486, "loss": 0.098, "step": 8333 }, { "epoch": 0.3948827292110874, "grad_norm": 0.154296875, "learning_rate": 0.00013251183007531077, "loss": 0.0248, "step": 8334 }, { "epoch": 0.3949301113480218, "grad_norm": 0.7734375, "learning_rate": 0.00013249774618574888, "loss": 0.1278, "step": 8335 }, { "epoch": 0.3949774934849562, "grad_norm": 0.48828125, "learning_rate": 0.00013248366157542161, "loss": 0.1062, "step": 8336 }, { "epoch": 0.39502487562189054, "grad_norm": 0.78515625, "learning_rate": 0.00013246957624464133, "loss": 0.9185, "step": 8337 }, { "epoch": 0.39507225775882493, "grad_norm": 0.6875, "learning_rate": 0.00013245549019372043, "loss": 1.1236, "step": 8338 }, { "epoch": 0.3951196398957593, "grad_norm": 0.2490234375, "learning_rate": 0.00013244140342297134, "loss": 0.0376, "step": 8339 }, { "epoch": 0.39516702203269366, "grad_norm": 0.66796875, "learning_rate": 0.00013242731593270644, "loss": 0.994, "step": 8340 }, { "epoch": 0.39521440416962805, "grad_norm": 0.60546875, "learning_rate": 0.00013241322772323825, "loss": 0.9374, "step": 8341 }, { "epoch": 0.39526178630656245, "grad_norm": 0.6875, "learning_rate": 0.00013239913879487919, "loss": 1.3045, "step": 8342 }, { "epoch": 0.3953091684434968, "grad_norm": 1.0859375, "learning_rate": 0.0001323850491479417, "loss": 1.4344, "step": 8343 }, { "epoch": 0.3953565505804312, "grad_norm": 0.578125, "learning_rate": 0.00013237095878273835, "loss": 0.5707, "step": 8344 }, { "epoch": 0.39540393271736557, "grad_norm": 0.0595703125, "learning_rate": 0.00013235686769958162, "loss": 0.0062, "step": 8345 }, { "epoch": 0.3954513148542999, "grad_norm": 0.349609375, "learning_rate": 0.00013234277589878403, "loss": 0.0154, "step": 8346 }, { "epoch": 0.3954986969912343, "grad_norm": 0.6171875, "learning_rate": 0.0001323286833806581, "loss": 1.0657, "step": 8347 }, { "epoch": 0.3955460791281687, "grad_norm": 0.5, "learning_rate": 0.00013231459014551643, "loss": 0.4864, "step": 8348 }, { "epoch": 0.39559346126510303, "grad_norm": 0.5, "learning_rate": 0.00013230049619367156, "loss": 0.7659, "step": 8349 }, { "epoch": 0.3956408434020374, "grad_norm": 0.70703125, "learning_rate": 0.0001322864015254361, "loss": 1.1576, "step": 8350 }, { "epoch": 0.3956882255389718, "grad_norm": 0.70703125, "learning_rate": 0.00013227230614112264, "loss": 0.923, "step": 8351 }, { "epoch": 0.3957356076759062, "grad_norm": 0.8828125, "learning_rate": 0.00013225821004104378, "loss": 1.0582, "step": 8352 }, { "epoch": 0.39578298981284055, "grad_norm": 0.8359375, "learning_rate": 0.0001322441132255122, "loss": 0.5032, "step": 8353 }, { "epoch": 0.39583037194977494, "grad_norm": 0.546875, "learning_rate": 0.00013223001569484053, "loss": 0.884, "step": 8354 }, { "epoch": 0.39587775408670933, "grad_norm": 0.6953125, "learning_rate": 0.00013221591744934144, "loss": 0.9107, "step": 8355 }, { "epoch": 0.39592513622364367, "grad_norm": 0.017822265625, "learning_rate": 0.00013220181848932761, "loss": 0.0011, "step": 8356 }, { "epoch": 0.39597251836057806, "grad_norm": 0.251953125, "learning_rate": 0.0001321877188151118, "loss": 0.0106, "step": 8357 }, { "epoch": 0.39601990049751246, "grad_norm": 0.61328125, "learning_rate": 0.0001321736184270066, "loss": 0.7856, "step": 8358 }, { "epoch": 0.3960672826344468, "grad_norm": 0.12353515625, "learning_rate": 0.00013215951732532482, "loss": 0.0062, "step": 8359 }, { "epoch": 0.3961146647713812, "grad_norm": 0.6875, "learning_rate": 0.00013214541551037927, "loss": 0.9263, "step": 8360 }, { "epoch": 0.3961620469083156, "grad_norm": 0.5390625, "learning_rate": 0.00013213131298248255, "loss": 0.8682, "step": 8361 }, { "epoch": 0.3962094290452499, "grad_norm": 0.1962890625, "learning_rate": 0.0001321172097419476, "loss": 0.1484, "step": 8362 }, { "epoch": 0.3962568111821843, "grad_norm": 0.8125, "learning_rate": 0.00013210310578908713, "loss": 1.3438, "step": 8363 }, { "epoch": 0.3963041933191187, "grad_norm": 0.5, "learning_rate": 0.00013208900112421395, "loss": 1.0264, "step": 8364 }, { "epoch": 0.3963515754560531, "grad_norm": 0.09326171875, "learning_rate": 0.00013207489574764095, "loss": 0.0142, "step": 8365 }, { "epoch": 0.39639895759298743, "grad_norm": 0.63671875, "learning_rate": 0.00013206078965968089, "loss": 1.2929, "step": 8366 }, { "epoch": 0.3964463397299218, "grad_norm": 0.8359375, "learning_rate": 0.00013204668286064664, "loss": 0.9476, "step": 8367 }, { "epoch": 0.3964937218668562, "grad_norm": 0.380859375, "learning_rate": 0.00013203257535085114, "loss": 0.2027, "step": 8368 }, { "epoch": 0.39654110400379056, "grad_norm": 0.6171875, "learning_rate": 0.00013201846713060725, "loss": 0.9504, "step": 8369 }, { "epoch": 0.39658848614072495, "grad_norm": 0.68359375, "learning_rate": 0.0001320043582002278, "loss": 0.1219, "step": 8370 }, { "epoch": 0.39663586827765934, "grad_norm": 0.58984375, "learning_rate": 0.00013199024856002585, "loss": 0.7931, "step": 8371 }, { "epoch": 0.3966832504145937, "grad_norm": 0.625, "learning_rate": 0.00013197613821031422, "loss": 1.1369, "step": 8372 }, { "epoch": 0.3967306325515281, "grad_norm": 0.57421875, "learning_rate": 0.00013196202715140591, "loss": 0.7452, "step": 8373 }, { "epoch": 0.39677801468846247, "grad_norm": 0.7421875, "learning_rate": 0.0001319479153836139, "loss": 0.8202, "step": 8374 }, { "epoch": 0.3968253968253968, "grad_norm": 0.71484375, "learning_rate": 0.00013193380290725111, "loss": 1.3312, "step": 8375 }, { "epoch": 0.3968727789623312, "grad_norm": 0.6640625, "learning_rate": 0.00013191968972263063, "loss": 0.9073, "step": 8376 }, { "epoch": 0.3969201610992656, "grad_norm": 0.6640625, "learning_rate": 0.00013190557583006538, "loss": 1.0335, "step": 8377 }, { "epoch": 0.3969675432361999, "grad_norm": 0.7109375, "learning_rate": 0.00013189146122986848, "loss": 0.2046, "step": 8378 }, { "epoch": 0.3970149253731343, "grad_norm": 0.4453125, "learning_rate": 0.00013187734592235296, "loss": 0.1444, "step": 8379 }, { "epoch": 0.3970623075100687, "grad_norm": 1.1484375, "learning_rate": 0.00013186322990783186, "loss": 1.1527, "step": 8380 }, { "epoch": 0.3971096896470031, "grad_norm": 0.78515625, "learning_rate": 0.00013184911318661824, "loss": 1.0669, "step": 8381 }, { "epoch": 0.39715707178393744, "grad_norm": 0.00848388671875, "learning_rate": 0.0001318349957590252, "loss": 0.0004, "step": 8382 }, { "epoch": 0.39720445392087184, "grad_norm": 0.4609375, "learning_rate": 0.00013182087762536588, "loss": 0.2528, "step": 8383 }, { "epoch": 0.39725183605780623, "grad_norm": 0.1748046875, "learning_rate": 0.00013180675878595336, "loss": 0.0269, "step": 8384 }, { "epoch": 0.39729921819474057, "grad_norm": 0.7109375, "learning_rate": 0.00013179263924110085, "loss": 1.1373, "step": 8385 }, { "epoch": 0.39734660033167496, "grad_norm": 0.66015625, "learning_rate": 0.00013177851899112147, "loss": 0.3504, "step": 8386 }, { "epoch": 0.39739398246860935, "grad_norm": 0.8046875, "learning_rate": 0.00013176439803632835, "loss": 1.5232, "step": 8387 }, { "epoch": 0.3974413646055437, "grad_norm": 0.65625, "learning_rate": 0.00013175027637703473, "loss": 0.8524, "step": 8388 }, { "epoch": 0.3974887467424781, "grad_norm": 0.671875, "learning_rate": 0.0001317361540135538, "loss": 0.6525, "step": 8389 }, { "epoch": 0.3975361288794125, "grad_norm": 0.67578125, "learning_rate": 0.00013172203094619878, "loss": 0.2698, "step": 8390 }, { "epoch": 0.3975835110163468, "grad_norm": 0.55859375, "learning_rate": 0.00013170790717528292, "loss": 1.1265, "step": 8391 }, { "epoch": 0.3976308931532812, "grad_norm": 0.171875, "learning_rate": 0.0001316937827011194, "loss": 0.0248, "step": 8392 }, { "epoch": 0.3976782752902156, "grad_norm": 0.671875, "learning_rate": 0.00013167965752402158, "loss": 0.6908, "step": 8393 }, { "epoch": 0.39772565742714994, "grad_norm": 0.63671875, "learning_rate": 0.0001316655316443027, "loss": 1.1508, "step": 8394 }, { "epoch": 0.39777303956408433, "grad_norm": 0.76171875, "learning_rate": 0.00013165140506227606, "loss": 1.5209, "step": 8395 }, { "epoch": 0.3978204217010187, "grad_norm": 0.62890625, "learning_rate": 0.00013163727777825497, "loss": 1.3025, "step": 8396 }, { "epoch": 0.3978678038379531, "grad_norm": 0.69921875, "learning_rate": 0.00013162314979255272, "loss": 0.9712, "step": 8397 }, { "epoch": 0.39791518597488745, "grad_norm": 0.95703125, "learning_rate": 0.00013160902110548274, "loss": 0.5788, "step": 8398 }, { "epoch": 0.39796256811182185, "grad_norm": 0.12353515625, "learning_rate": 0.0001315948917173583, "loss": 0.0088, "step": 8399 }, { "epoch": 0.39800995024875624, "grad_norm": 0.5546875, "learning_rate": 0.00013158076162849281, "loss": 1.0409, "step": 8400 }, { "epoch": 0.3980573323856906, "grad_norm": 0.10546875, "learning_rate": 0.0001315666308391997, "loss": 0.0045, "step": 8401 }, { "epoch": 0.39810471452262497, "grad_norm": 0.58203125, "learning_rate": 0.00013155249934979234, "loss": 0.9082, "step": 8402 }, { "epoch": 0.39815209665955936, "grad_norm": 0.70703125, "learning_rate": 0.00013153836716058414, "loss": 1.0735, "step": 8403 }, { "epoch": 0.3981994787964937, "grad_norm": 0.2470703125, "learning_rate": 0.00013152423427188856, "loss": 0.0275, "step": 8404 }, { "epoch": 0.3982468609334281, "grad_norm": 0.5859375, "learning_rate": 0.000131510100684019, "loss": 0.9725, "step": 8405 }, { "epoch": 0.3982942430703625, "grad_norm": 0.56640625, "learning_rate": 0.00013149596639728904, "loss": 0.4258, "step": 8406 }, { "epoch": 0.3983416252072968, "grad_norm": 0.70703125, "learning_rate": 0.00013148183141201204, "loss": 1.2988, "step": 8407 }, { "epoch": 0.3983890073442312, "grad_norm": 0.267578125, "learning_rate": 0.00013146769572850158, "loss": 0.0233, "step": 8408 }, { "epoch": 0.3984363894811656, "grad_norm": 0.5078125, "learning_rate": 0.00013145355934707112, "loss": 0.9486, "step": 8409 }, { "epoch": 0.3984837716181, "grad_norm": 0.515625, "learning_rate": 0.00013143942226803427, "loss": 0.1009, "step": 8410 }, { "epoch": 0.39853115375503434, "grad_norm": 0.51171875, "learning_rate": 0.00013142528449170446, "loss": 0.6194, "step": 8411 }, { "epoch": 0.39857853589196873, "grad_norm": 0.6640625, "learning_rate": 0.00013141114601839532, "loss": 1.2989, "step": 8412 }, { "epoch": 0.3986259180289031, "grad_norm": 0.6171875, "learning_rate": 0.00013139700684842043, "loss": 0.5218, "step": 8413 }, { "epoch": 0.39867330016583746, "grad_norm": 0.69140625, "learning_rate": 0.0001313828669820934, "loss": 1.2017, "step": 8414 }, { "epoch": 0.39872068230277186, "grad_norm": 0.8046875, "learning_rate": 0.00013136872641972776, "loss": 0.1648, "step": 8415 }, { "epoch": 0.39876806443970625, "grad_norm": 0.6484375, "learning_rate": 0.00013135458516163722, "loss": 0.2398, "step": 8416 }, { "epoch": 0.3988154465766406, "grad_norm": 0.65234375, "learning_rate": 0.00013134044320813537, "loss": 1.25, "step": 8417 }, { "epoch": 0.398862828713575, "grad_norm": 0.6640625, "learning_rate": 0.0001313263005595359, "loss": 0.9613, "step": 8418 }, { "epoch": 0.3989102108505094, "grad_norm": 0.74609375, "learning_rate": 0.0001313121572161524, "loss": 1.1156, "step": 8419 }, { "epoch": 0.3989575929874437, "grad_norm": 1.25, "learning_rate": 0.00013129801317829858, "loss": 0.5553, "step": 8420 }, { "epoch": 0.3990049751243781, "grad_norm": 0.671875, "learning_rate": 0.00013128386844628824, "loss": 1.3128, "step": 8421 }, { "epoch": 0.3990523572613125, "grad_norm": 0.84375, "learning_rate": 0.00013126972302043497, "loss": 1.3793, "step": 8422 }, { "epoch": 0.39909973939824683, "grad_norm": 0.6796875, "learning_rate": 0.00013125557690105258, "loss": 1.0913, "step": 8423 }, { "epoch": 0.3991471215351812, "grad_norm": 0.08984375, "learning_rate": 0.00013124143008845474, "loss": 0.0059, "step": 8424 }, { "epoch": 0.3991945036721156, "grad_norm": 0.87890625, "learning_rate": 0.00013122728258295528, "loss": 1.249, "step": 8425 }, { "epoch": 0.39924188580905, "grad_norm": 0.69921875, "learning_rate": 0.00013121313438486799, "loss": 1.2313, "step": 8426 }, { "epoch": 0.39928926794598435, "grad_norm": 0.318359375, "learning_rate": 0.0001311989854945066, "loss": 0.0372, "step": 8427 }, { "epoch": 0.39933665008291874, "grad_norm": 0.03076171875, "learning_rate": 0.00013118483591218494, "loss": 0.0015, "step": 8428 }, { "epoch": 0.39938403221985314, "grad_norm": 0.671875, "learning_rate": 0.00013117068563821684, "loss": 1.2636, "step": 8429 }, { "epoch": 0.3994314143567875, "grad_norm": 0.65625, "learning_rate": 0.00013115653467291613, "loss": 1.565, "step": 8430 }, { "epoch": 0.39947879649372187, "grad_norm": 0.2265625, "learning_rate": 0.00013114238301659668, "loss": 0.0358, "step": 8431 }, { "epoch": 0.39952617863065626, "grad_norm": 0.66015625, "learning_rate": 0.0001311282306695723, "loss": 0.8851, "step": 8432 }, { "epoch": 0.3995735607675906, "grad_norm": 0.55078125, "learning_rate": 0.00013111407763215696, "loss": 0.1173, "step": 8433 }, { "epoch": 0.399620942904525, "grad_norm": 0.54296875, "learning_rate": 0.00013109992390466455, "loss": 0.9858, "step": 8434 }, { "epoch": 0.3996683250414594, "grad_norm": 0.1728515625, "learning_rate": 0.00013108576948740893, "loss": 0.1175, "step": 8435 }, { "epoch": 0.3997157071783937, "grad_norm": 0.81640625, "learning_rate": 0.00013107161438070405, "loss": 0.6959, "step": 8436 }, { "epoch": 0.3997630893153281, "grad_norm": 0.61328125, "learning_rate": 0.00013105745858486384, "loss": 0.694, "step": 8437 }, { "epoch": 0.3998104714522625, "grad_norm": 0.216796875, "learning_rate": 0.0001310433021002023, "loss": 0.0261, "step": 8438 }, { "epoch": 0.3998578535891969, "grad_norm": 0.73046875, "learning_rate": 0.00013102914492703335, "loss": 0.6536, "step": 8439 }, { "epoch": 0.39990523572613124, "grad_norm": 0.62890625, "learning_rate": 0.00013101498706567105, "loss": 1.1436, "step": 8440 }, { "epoch": 0.39995261786306563, "grad_norm": 0.76171875, "learning_rate": 0.00013100082851642939, "loss": 1.562, "step": 8441 }, { "epoch": 0.4, "grad_norm": 0.287109375, "learning_rate": 0.00013098666927962235, "loss": 0.1947, "step": 8442 }, { "epoch": 0.40004738213693436, "grad_norm": 0.2041015625, "learning_rate": 0.00013097250935556397, "loss": 0.1247, "step": 8443 }, { "epoch": 0.40009476427386875, "grad_norm": 0.53125, "learning_rate": 0.00013095834874456835, "loss": 0.6569, "step": 8444 }, { "epoch": 0.40014214641080315, "grad_norm": 0.1728515625, "learning_rate": 0.0001309441874469495, "loss": 0.1322, "step": 8445 }, { "epoch": 0.4001895285477375, "grad_norm": 0.1708984375, "learning_rate": 0.00013093002546302158, "loss": 0.1332, "step": 8446 }, { "epoch": 0.4002369106846719, "grad_norm": 0.66796875, "learning_rate": 0.0001309158627930986, "loss": 1.1967, "step": 8447 }, { "epoch": 0.40028429282160627, "grad_norm": 0.51953125, "learning_rate": 0.00013090169943749476, "loss": 0.4765, "step": 8448 }, { "epoch": 0.4003316749585406, "grad_norm": 0.609375, "learning_rate": 0.00013088753539652412, "loss": 0.6844, "step": 8449 }, { "epoch": 0.400379057095475, "grad_norm": 0.71484375, "learning_rate": 0.00013087337067050082, "loss": 0.9418, "step": 8450 }, { "epoch": 0.4004264392324094, "grad_norm": 1.171875, "learning_rate": 0.00013085920525973904, "loss": 0.6466, "step": 8451 }, { "epoch": 0.40047382136934373, "grad_norm": 0.61328125, "learning_rate": 0.00013084503916455301, "loss": 0.9358, "step": 8452 }, { "epoch": 0.4005212035062781, "grad_norm": 0.71875, "learning_rate": 0.00013083087238525685, "loss": 1.5236, "step": 8453 }, { "epoch": 0.4005685856432125, "grad_norm": 0.94140625, "learning_rate": 0.00013081670492216474, "loss": 0.8968, "step": 8454 }, { "epoch": 0.4006159677801469, "grad_norm": 0.7109375, "learning_rate": 0.00013080253677559095, "loss": 0.7483, "step": 8455 }, { "epoch": 0.40066334991708125, "grad_norm": 0.30078125, "learning_rate": 0.00013078836794584971, "loss": 0.0287, "step": 8456 }, { "epoch": 0.40071073205401564, "grad_norm": 0.5859375, "learning_rate": 0.0001307741984332553, "loss": 0.7295, "step": 8457 }, { "epoch": 0.40075811419095003, "grad_norm": 0.6328125, "learning_rate": 0.0001307600282381219, "loss": 0.9218, "step": 8458 }, { "epoch": 0.40080549632788437, "grad_norm": 0.68359375, "learning_rate": 0.00013074585736076386, "loss": 0.6534, "step": 8459 }, { "epoch": 0.40085287846481876, "grad_norm": 0.73828125, "learning_rate": 0.00013073168580149546, "loss": 1.2929, "step": 8460 }, { "epoch": 0.40090026060175316, "grad_norm": 0.07470703125, "learning_rate": 0.00013071751356063098, "loss": 0.0068, "step": 8461 }, { "epoch": 0.4009476427386875, "grad_norm": 0.8046875, "learning_rate": 0.00013070334063848478, "loss": 1.1136, "step": 8462 }, { "epoch": 0.4009950248756219, "grad_norm": 0.62890625, "learning_rate": 0.0001306891670353712, "loss": 0.0706, "step": 8463 }, { "epoch": 0.4010424070125563, "grad_norm": 0.16015625, "learning_rate": 0.00013067499275160459, "loss": 0.129, "step": 8464 }, { "epoch": 0.4010897891494906, "grad_norm": 0.2578125, "learning_rate": 0.00013066081778749928, "loss": 0.1445, "step": 8465 }, { "epoch": 0.401137171286425, "grad_norm": 0.55859375, "learning_rate": 0.0001306466421433697, "loss": 0.8519, "step": 8466 }, { "epoch": 0.4011845534233594, "grad_norm": 0.5546875, "learning_rate": 0.00013063246581953027, "loss": 0.8091, "step": 8467 }, { "epoch": 0.4012319355602938, "grad_norm": 0.279296875, "learning_rate": 0.00013061828881629533, "loss": 0.0299, "step": 8468 }, { "epoch": 0.40127931769722813, "grad_norm": 0.55078125, "learning_rate": 0.00013060411113397936, "loss": 0.8453, "step": 8469 }, { "epoch": 0.4013266998341625, "grad_norm": 0.73046875, "learning_rate": 0.00013058993277289684, "loss": 1.1901, "step": 8470 }, { "epoch": 0.4013740819710969, "grad_norm": 1.1875, "learning_rate": 0.0001305757537333622, "loss": 1.4622, "step": 8471 }, { "epoch": 0.40142146410803126, "grad_norm": 0.455078125, "learning_rate": 0.00013056157401568984, "loss": 0.3346, "step": 8472 }, { "epoch": 0.40146884624496565, "grad_norm": 0.65625, "learning_rate": 0.00013054739362019442, "loss": 0.9678, "step": 8473 }, { "epoch": 0.40151622838190004, "grad_norm": 0.56640625, "learning_rate": 0.00013053321254719028, "loss": 0.4233, "step": 8474 }, { "epoch": 0.4015636105188344, "grad_norm": 0.52734375, "learning_rate": 0.00013051903079699205, "loss": 0.9604, "step": 8475 }, { "epoch": 0.4016109926557688, "grad_norm": 0.625, "learning_rate": 0.00013050484836991418, "loss": 0.8562, "step": 8476 }, { "epoch": 0.40165837479270317, "grad_norm": 0.5703125, "learning_rate": 0.00013049066526627128, "loss": 1.1919, "step": 8477 }, { "epoch": 0.4017057569296375, "grad_norm": 0.56640625, "learning_rate": 0.00013047648148637787, "loss": 0.9606, "step": 8478 }, { "epoch": 0.4017531390665719, "grad_norm": 0.7890625, "learning_rate": 0.00013046229703054862, "loss": 1.0702, "step": 8479 }, { "epoch": 0.4018005212035063, "grad_norm": 0.6171875, "learning_rate": 0.00013044811189909803, "loss": 1.1904, "step": 8480 }, { "epoch": 0.4018479033404406, "grad_norm": 1.0546875, "learning_rate": 0.00013043392609234078, "loss": 0.7271, "step": 8481 }, { "epoch": 0.401895285477375, "grad_norm": 0.86328125, "learning_rate": 0.00013041973961059147, "loss": 1.1301, "step": 8482 }, { "epoch": 0.4019426676143094, "grad_norm": 0.39453125, "learning_rate": 0.0001304055524541647, "loss": 0.0194, "step": 8483 }, { "epoch": 0.4019900497512438, "grad_norm": 0.67578125, "learning_rate": 0.0001303913646233752, "loss": 0.6856, "step": 8484 }, { "epoch": 0.40203743188817814, "grad_norm": 0.55859375, "learning_rate": 0.00013037717611853758, "loss": 1.0049, "step": 8485 }, { "epoch": 0.40208481402511254, "grad_norm": 0.67578125, "learning_rate": 0.00013036298693996657, "loss": 0.7963, "step": 8486 }, { "epoch": 0.40213219616204693, "grad_norm": 0.546875, "learning_rate": 0.00013034879708797683, "loss": 0.5421, "step": 8487 }, { "epoch": 0.40217957829898127, "grad_norm": 0.65234375, "learning_rate": 0.00013033460656288308, "loss": 1.0176, "step": 8488 }, { "epoch": 0.40222696043591566, "grad_norm": 0.60546875, "learning_rate": 0.0001303204153650001, "loss": 0.8604, "step": 8489 }, { "epoch": 0.40227434257285005, "grad_norm": 0.71484375, "learning_rate": 0.00013030622349464262, "loss": 0.9674, "step": 8490 }, { "epoch": 0.4023217247097844, "grad_norm": 0.6171875, "learning_rate": 0.00013029203095212534, "loss": 0.9742, "step": 8491 }, { "epoch": 0.4023691068467188, "grad_norm": 0.22265625, "learning_rate": 0.0001302778377377631, "loss": 0.034, "step": 8492 }, { "epoch": 0.4024164889836532, "grad_norm": 0.66796875, "learning_rate": 0.00013026364385187065, "loss": 1.0015, "step": 8493 }, { "epoch": 0.4024638711205875, "grad_norm": 0.86328125, "learning_rate": 0.00013024944929476283, "loss": 1.2935, "step": 8494 }, { "epoch": 0.4025112532575219, "grad_norm": 0.6875, "learning_rate": 0.00013023525406675445, "loss": 1.0523, "step": 8495 }, { "epoch": 0.4025586353944563, "grad_norm": 0.59765625, "learning_rate": 0.00013022105816816034, "loss": 1.1254, "step": 8496 }, { "epoch": 0.4026060175313907, "grad_norm": 0.6328125, "learning_rate": 0.00013020686159929536, "loss": 0.8375, "step": 8497 }, { "epoch": 0.40265339966832503, "grad_norm": 0.71484375, "learning_rate": 0.00013019266436047438, "loss": 1.2827, "step": 8498 }, { "epoch": 0.4027007818052594, "grad_norm": 0.47265625, "learning_rate": 0.00013017846645201221, "loss": 0.343, "step": 8499 }, { "epoch": 0.4027481639421938, "grad_norm": 0.7421875, "learning_rate": 0.00013016426787422383, "loss": 1.2327, "step": 8500 }, { "epoch": 0.40279554607912815, "grad_norm": 0.82421875, "learning_rate": 0.0001301500686274241, "loss": 0.943, "step": 8501 }, { "epoch": 0.40284292821606255, "grad_norm": 0.6640625, "learning_rate": 0.00013013586871192797, "loss": 0.0642, "step": 8502 }, { "epoch": 0.40289031035299694, "grad_norm": 0.1884765625, "learning_rate": 0.00013012166812805039, "loss": 0.1377, "step": 8503 }, { "epoch": 0.4029376924899313, "grad_norm": 0.1650390625, "learning_rate": 0.0001301074668761063, "loss": 0.0106, "step": 8504 }, { "epoch": 0.40298507462686567, "grad_norm": 0.08056640625, "learning_rate": 0.00013009326495641061, "loss": 0.0096, "step": 8505 }, { "epoch": 0.40303245676380006, "grad_norm": 0.7109375, "learning_rate": 0.0001300790623692784, "loss": 1.1677, "step": 8506 }, { "epoch": 0.4030798389007344, "grad_norm": 0.61328125, "learning_rate": 0.0001300648591150246, "loss": 0.7628, "step": 8507 }, { "epoch": 0.4031272210376688, "grad_norm": 0.1865234375, "learning_rate": 0.00013005065519396429, "loss": 0.0236, "step": 8508 }, { "epoch": 0.4031746031746032, "grad_norm": 0.6328125, "learning_rate": 0.00013003645060641244, "loss": 1.0059, "step": 8509 }, { "epoch": 0.4032219853115375, "grad_norm": 0.298828125, "learning_rate": 0.0001300222453526841, "loss": 0.0604, "step": 8510 }, { "epoch": 0.4032693674484719, "grad_norm": 0.58984375, "learning_rate": 0.00013000803943309433, "loss": 0.9209, "step": 8511 }, { "epoch": 0.4033167495854063, "grad_norm": 0.64453125, "learning_rate": 0.0001299938328479582, "loss": 1.0169, "step": 8512 }, { "epoch": 0.4033641317223407, "grad_norm": 0.66796875, "learning_rate": 0.00012997962559759086, "loss": 1.153, "step": 8513 }, { "epoch": 0.40341151385927504, "grad_norm": 0.15625, "learning_rate": 0.0001299654176823073, "loss": 0.0071, "step": 8514 }, { "epoch": 0.40345889599620943, "grad_norm": 0.765625, "learning_rate": 0.0001299512091024227, "loss": 1.1245, "step": 8515 }, { "epoch": 0.4035062781331438, "grad_norm": 0.72265625, "learning_rate": 0.0001299369998582522, "loss": 1.1065, "step": 8516 }, { "epoch": 0.40355366027007816, "grad_norm": 0.69140625, "learning_rate": 0.00012992278995011095, "loss": 1.4431, "step": 8517 }, { "epoch": 0.40360104240701256, "grad_norm": 0.609375, "learning_rate": 0.00012990857937831405, "loss": 0.6911, "step": 8518 }, { "epoch": 0.40364842454394695, "grad_norm": 0.052734375, "learning_rate": 0.0001298943681431767, "loss": 0.0012, "step": 8519 }, { "epoch": 0.4036958066808813, "grad_norm": 0.9453125, "learning_rate": 0.00012988015624501418, "loss": 0.0266, "step": 8520 }, { "epoch": 0.4037431888178157, "grad_norm": 0.6796875, "learning_rate": 0.00012986594368414158, "loss": 1.0844, "step": 8521 }, { "epoch": 0.40379057095475007, "grad_norm": 0.2021484375, "learning_rate": 0.00012985173046087416, "loss": 0.0193, "step": 8522 }, { "epoch": 0.4038379530916844, "grad_norm": 0.11962890625, "learning_rate": 0.00012983751657552714, "loss": 0.0063, "step": 8523 }, { "epoch": 0.4038853352286188, "grad_norm": 0.546875, "learning_rate": 0.0001298233020284158, "loss": 0.4996, "step": 8524 }, { "epoch": 0.4039327173655532, "grad_norm": 0.19140625, "learning_rate": 0.0001298090868198554, "loss": 0.1321, "step": 8525 }, { "epoch": 0.4039800995024876, "grad_norm": 0.53515625, "learning_rate": 0.00012979487095016122, "loss": 0.8057, "step": 8526 }, { "epoch": 0.4040274816394219, "grad_norm": 0.83203125, "learning_rate": 0.00012978065441964848, "loss": 0.8067, "step": 8527 }, { "epoch": 0.4040748637763563, "grad_norm": 0.12890625, "learning_rate": 0.0001297664372286326, "loss": 0.0177, "step": 8528 }, { "epoch": 0.4041222459132907, "grad_norm": 0.6484375, "learning_rate": 0.00012975221937742886, "loss": 0.9545, "step": 8529 }, { "epoch": 0.40416962805022505, "grad_norm": 0.1298828125, "learning_rate": 0.00012973800086635252, "loss": 0.016, "step": 8530 }, { "epoch": 0.40421701018715944, "grad_norm": 0.353515625, "learning_rate": 0.00012972378169571905, "loss": 0.0937, "step": 8531 }, { "epoch": 0.40426439232409384, "grad_norm": 0.328125, "learning_rate": 0.00012970956186584374, "loss": 0.0335, "step": 8532 }, { "epoch": 0.4043117744610282, "grad_norm": 0.053466796875, "learning_rate": 0.000129695341377042, "loss": 0.0048, "step": 8533 }, { "epoch": 0.40435915659796257, "grad_norm": 0.578125, "learning_rate": 0.00012968112022962918, "loss": 0.8922, "step": 8534 }, { "epoch": 0.40440653873489696, "grad_norm": 0.162109375, "learning_rate": 0.00012966689842392075, "loss": 0.1109, "step": 8535 }, { "epoch": 0.4044539208718313, "grad_norm": 0.22265625, "learning_rate": 0.00012965267596023216, "loss": 0.1531, "step": 8536 }, { "epoch": 0.4045013030087657, "grad_norm": 0.8125, "learning_rate": 0.00012963845283887876, "loss": 1.0037, "step": 8537 }, { "epoch": 0.4045486851457001, "grad_norm": 0.18359375, "learning_rate": 0.00012962422906017605, "loss": 0.0206, "step": 8538 }, { "epoch": 0.4045960672826344, "grad_norm": 0.69140625, "learning_rate": 0.00012961000462443948, "loss": 1.3797, "step": 8539 }, { "epoch": 0.4046434494195688, "grad_norm": 0.7890625, "learning_rate": 0.00012959577953198457, "loss": 0.9688, "step": 8540 }, { "epoch": 0.4046908315565032, "grad_norm": 0.71484375, "learning_rate": 0.00012958155378312678, "loss": 1.0048, "step": 8541 }, { "epoch": 0.4047382136934376, "grad_norm": 0.59375, "learning_rate": 0.0001295673273781816, "loss": 0.6154, "step": 8542 }, { "epoch": 0.40478559583037194, "grad_norm": 0.53515625, "learning_rate": 0.00012955310031746467, "loss": 0.7247, "step": 8543 }, { "epoch": 0.40483297796730633, "grad_norm": 0.5390625, "learning_rate": 0.00012953887260129144, "loss": 0.9636, "step": 8544 }, { "epoch": 0.4048803601042407, "grad_norm": 0.6875, "learning_rate": 0.00012952464422997744, "loss": 0.9512, "step": 8545 }, { "epoch": 0.40492774224117506, "grad_norm": 0.8046875, "learning_rate": 0.0001295104152038383, "loss": 1.5034, "step": 8546 }, { "epoch": 0.40497512437810945, "grad_norm": 0.03857421875, "learning_rate": 0.00012949618552318955, "loss": 0.0033, "step": 8547 }, { "epoch": 0.40502250651504385, "grad_norm": 0.73828125, "learning_rate": 0.00012948195518834688, "loss": 1.1164, "step": 8548 }, { "epoch": 0.4050698886519782, "grad_norm": 0.57421875, "learning_rate": 0.0001294677241996258, "loss": 0.9889, "step": 8549 }, { "epoch": 0.4051172707889126, "grad_norm": 1.0, "learning_rate": 0.000129453492557342, "loss": 1.658, "step": 8550 }, { "epoch": 0.40516465292584697, "grad_norm": 0.76953125, "learning_rate": 0.00012943926026181112, "loss": 1.1179, "step": 8551 }, { "epoch": 0.4052120350627813, "grad_norm": 0.76171875, "learning_rate": 0.00012942502731334884, "loss": 0.413, "step": 8552 }, { "epoch": 0.4052594171997157, "grad_norm": 0.671875, "learning_rate": 0.00012941079371227074, "loss": 0.9343, "step": 8553 }, { "epoch": 0.4053067993366501, "grad_norm": 0.64453125, "learning_rate": 0.00012939655945889256, "loss": 0.7487, "step": 8554 }, { "epoch": 0.4053541814735845, "grad_norm": 0.296875, "learning_rate": 0.00012938232455353004, "loss": 0.0043, "step": 8555 }, { "epoch": 0.4054015636105188, "grad_norm": 0.78125, "learning_rate": 0.00012936808899649882, "loss": 1.2496, "step": 8556 }, { "epoch": 0.4054489457474532, "grad_norm": 0.8203125, "learning_rate": 0.00012935385278811467, "loss": 1.3019, "step": 8557 }, { "epoch": 0.4054963278843876, "grad_norm": 0.6953125, "learning_rate": 0.00012933961592869335, "loss": 0.9618, "step": 8558 }, { "epoch": 0.40554371002132195, "grad_norm": 0.7421875, "learning_rate": 0.0001293253784185506, "loss": 1.1096, "step": 8559 }, { "epoch": 0.40559109215825634, "grad_norm": 0.84765625, "learning_rate": 0.0001293111402580022, "loss": 0.8938, "step": 8560 }, { "epoch": 0.40563847429519073, "grad_norm": 0.62890625, "learning_rate": 0.0001292969014473639, "loss": 1.2005, "step": 8561 }, { "epoch": 0.40568585643212507, "grad_norm": 0.8125, "learning_rate": 0.00012928266198695154, "loss": 1.3525, "step": 8562 }, { "epoch": 0.40573323856905946, "grad_norm": 0.546875, "learning_rate": 0.00012926842187708094, "loss": 0.9494, "step": 8563 }, { "epoch": 0.40578062070599386, "grad_norm": 0.6328125, "learning_rate": 0.0001292541811180679, "loss": 0.7537, "step": 8564 }, { "epoch": 0.4058280028429282, "grad_norm": 0.734375, "learning_rate": 0.00012923993971022832, "loss": 1.1526, "step": 8565 }, { "epoch": 0.4058753849798626, "grad_norm": 0.53125, "learning_rate": 0.000129225697653878, "loss": 0.9039, "step": 8566 }, { "epoch": 0.405922767116797, "grad_norm": 0.71484375, "learning_rate": 0.00012921145494933285, "loss": 1.4967, "step": 8567 }, { "epoch": 0.4059701492537313, "grad_norm": 0.75, "learning_rate": 0.00012919721159690873, "loss": 1.018, "step": 8568 }, { "epoch": 0.4060175313906657, "grad_norm": 0.423828125, "learning_rate": 0.00012918296759692154, "loss": 0.1771, "step": 8569 }, { "epoch": 0.4060649135276001, "grad_norm": 0.01397705078125, "learning_rate": 0.00012916872294968724, "loss": 0.0008, "step": 8570 }, { "epoch": 0.4061122956645345, "grad_norm": 0.055908203125, "learning_rate": 0.00012915447765552172, "loss": 0.0083, "step": 8571 }, { "epoch": 0.40615967780146883, "grad_norm": 1.6328125, "learning_rate": 0.00012914023171474096, "loss": 0.0438, "step": 8572 }, { "epoch": 0.4062070599384032, "grad_norm": 0.56640625, "learning_rate": 0.00012912598512766085, "loss": 0.7081, "step": 8573 }, { "epoch": 0.4062544420753376, "grad_norm": 0.287109375, "learning_rate": 0.00012911173789459745, "loss": 0.3742, "step": 8574 }, { "epoch": 0.40630182421227196, "grad_norm": 0.58203125, "learning_rate": 0.00012909749001586672, "loss": 1.0059, "step": 8575 }, { "epoch": 0.40634920634920635, "grad_norm": 0.197265625, "learning_rate": 0.00012908324149178463, "loss": 0.0195, "step": 8576 }, { "epoch": 0.40639658848614074, "grad_norm": 0.1728515625, "learning_rate": 0.00012906899232266724, "loss": 0.0254, "step": 8577 }, { "epoch": 0.4064439706230751, "grad_norm": 0.294921875, "learning_rate": 0.00012905474250883057, "loss": 0.0615, "step": 8578 }, { "epoch": 0.4064913527600095, "grad_norm": 0.58984375, "learning_rate": 0.00012904049205059065, "loss": 0.5947, "step": 8579 }, { "epoch": 0.40653873489694387, "grad_norm": 0.68359375, "learning_rate": 0.00012902624094826353, "loss": 0.644, "step": 8580 }, { "epoch": 0.4065861170338782, "grad_norm": 0.55859375, "learning_rate": 0.00012901198920216528, "loss": 1.1777, "step": 8581 }, { "epoch": 0.4066334991708126, "grad_norm": 0.65234375, "learning_rate": 0.00012899773681261206, "loss": 0.5417, "step": 8582 }, { "epoch": 0.406680881307747, "grad_norm": 0.5546875, "learning_rate": 0.00012898348377991993, "loss": 0.8528, "step": 8583 }, { "epoch": 0.4067282634446814, "grad_norm": 0.115234375, "learning_rate": 0.00012896923010440498, "loss": 0.0175, "step": 8584 }, { "epoch": 0.4067756455816157, "grad_norm": 0.6640625, "learning_rate": 0.0001289549757863834, "loss": 0.9764, "step": 8585 }, { "epoch": 0.4068230277185501, "grad_norm": 0.53125, "learning_rate": 0.00012894072082617126, "loss": 0.6498, "step": 8586 }, { "epoch": 0.4068704098554845, "grad_norm": 0.7109375, "learning_rate": 0.00012892646522408475, "loss": 0.3805, "step": 8587 }, { "epoch": 0.40691779199241884, "grad_norm": 0.1767578125, "learning_rate": 0.00012891220898044006, "loss": 0.1385, "step": 8588 }, { "epoch": 0.40696517412935324, "grad_norm": 0.76953125, "learning_rate": 0.0001288979520955534, "loss": 0.8701, "step": 8589 }, { "epoch": 0.40701255626628763, "grad_norm": 0.359375, "learning_rate": 0.0001288836945697409, "loss": 0.2025, "step": 8590 }, { "epoch": 0.40705993840322197, "grad_norm": 0.57421875, "learning_rate": 0.00012886943640331885, "loss": 0.7638, "step": 8591 }, { "epoch": 0.40710732054015636, "grad_norm": 0.62890625, "learning_rate": 0.00012885517759660345, "loss": 1.0234, "step": 8592 }, { "epoch": 0.40715470267709075, "grad_norm": 0.65625, "learning_rate": 0.00012884091814991094, "loss": 1.3021, "step": 8593 }, { "epoch": 0.4072020848140251, "grad_norm": 0.6953125, "learning_rate": 0.00012882665806355757, "loss": 0.7696, "step": 8594 }, { "epoch": 0.4072494669509595, "grad_norm": 0.5078125, "learning_rate": 0.00012881239733785965, "loss": 0.7608, "step": 8595 }, { "epoch": 0.4072968490878939, "grad_norm": 0.4921875, "learning_rate": 0.00012879813597313345, "loss": 0.7023, "step": 8596 }, { "epoch": 0.4073442312248282, "grad_norm": 0.80078125, "learning_rate": 0.00012878387396969524, "loss": 0.8593, "step": 8597 }, { "epoch": 0.4073916133617626, "grad_norm": 0.69921875, "learning_rate": 0.0001287696113278614, "loss": 1.1883, "step": 8598 }, { "epoch": 0.407438995498697, "grad_norm": 0.515625, "learning_rate": 0.0001287553480479482, "loss": 0.5168, "step": 8599 }, { "epoch": 0.4074863776356314, "grad_norm": 0.5078125, "learning_rate": 0.000128741084130272, "loss": 0.5142, "step": 8600 }, { "epoch": 0.40753375977256573, "grad_norm": 0.67578125, "learning_rate": 0.00012872681957514922, "loss": 0.1935, "step": 8601 }, { "epoch": 0.4075811419095001, "grad_norm": 0.357421875, "learning_rate": 0.00012871255438289613, "loss": 0.0267, "step": 8602 }, { "epoch": 0.4076285240464345, "grad_norm": 0.58984375, "learning_rate": 0.00012869828855382917, "loss": 1.2798, "step": 8603 }, { "epoch": 0.40767590618336885, "grad_norm": 0.59765625, "learning_rate": 0.00012868402208826473, "loss": 1.0788, "step": 8604 }, { "epoch": 0.40772328832030325, "grad_norm": 0.41015625, "learning_rate": 0.00012866975498651928, "loss": 0.1732, "step": 8605 }, { "epoch": 0.40777067045723764, "grad_norm": 1.0, "learning_rate": 0.0001286554872489092, "loss": 1.135, "step": 8606 }, { "epoch": 0.407818052594172, "grad_norm": 0.5546875, "learning_rate": 0.0001286412188757509, "loss": 0.6741, "step": 8607 }, { "epoch": 0.40786543473110637, "grad_norm": 0.63671875, "learning_rate": 0.00012862694986736086, "loss": 0.8214, "step": 8608 }, { "epoch": 0.40791281686804076, "grad_norm": 0.54296875, "learning_rate": 0.00012861268022405558, "loss": 0.7118, "step": 8609 }, { "epoch": 0.4079601990049751, "grad_norm": 0.7890625, "learning_rate": 0.00012859840994615155, "loss": 0.7582, "step": 8610 }, { "epoch": 0.4080075811419095, "grad_norm": 0.703125, "learning_rate": 0.00012858413903396522, "loss": 0.7089, "step": 8611 }, { "epoch": 0.4080549632788439, "grad_norm": 0.8046875, "learning_rate": 0.00012856986748781317, "loss": 0.9185, "step": 8612 }, { "epoch": 0.4081023454157783, "grad_norm": 0.69921875, "learning_rate": 0.00012855559530801185, "loss": 0.6825, "step": 8613 }, { "epoch": 0.4081497275527126, "grad_norm": 1.03125, "learning_rate": 0.00012854132249487786, "loss": 0.7458, "step": 8614 }, { "epoch": 0.408197109689647, "grad_norm": 0.0084228515625, "learning_rate": 0.00012852704904872775, "loss": 0.0001, "step": 8615 }, { "epoch": 0.4082444918265814, "grad_norm": 0.306640625, "learning_rate": 0.00012851277496987807, "loss": 0.033, "step": 8616 }, { "epoch": 0.40829187396351574, "grad_norm": 0.5703125, "learning_rate": 0.0001284985002586454, "loss": 0.7717, "step": 8617 }, { "epoch": 0.40833925610045013, "grad_norm": 0.52734375, "learning_rate": 0.00012848422491534636, "loss": 0.5768, "step": 8618 }, { "epoch": 0.4083866382373845, "grad_norm": 0.73828125, "learning_rate": 0.00012846994894029755, "loss": 1.0762, "step": 8619 }, { "epoch": 0.40843402037431886, "grad_norm": 0.9296875, "learning_rate": 0.0001284556723338156, "loss": 1.1826, "step": 8620 }, { "epoch": 0.40848140251125326, "grad_norm": 0.38671875, "learning_rate": 0.00012844139509621714, "loss": 0.0071, "step": 8621 }, { "epoch": 0.40852878464818765, "grad_norm": 0.71875, "learning_rate": 0.00012842711722781884, "loss": 1.2741, "step": 8622 }, { "epoch": 0.408576166785122, "grad_norm": 0.447265625, "learning_rate": 0.00012841283872893735, "loss": 0.7631, "step": 8623 }, { "epoch": 0.4086235489220564, "grad_norm": 0.64453125, "learning_rate": 0.00012839855959988935, "loss": 0.9233, "step": 8624 }, { "epoch": 0.40867093105899077, "grad_norm": 0.7109375, "learning_rate": 0.00012838427984099158, "loss": 1.0251, "step": 8625 }, { "epoch": 0.4087183131959251, "grad_norm": 0.51171875, "learning_rate": 0.00012836999945256067, "loss": 0.7825, "step": 8626 }, { "epoch": 0.4087656953328595, "grad_norm": 0.54296875, "learning_rate": 0.00012835571843491342, "loss": 0.4097, "step": 8627 }, { "epoch": 0.4088130774697939, "grad_norm": 0.55859375, "learning_rate": 0.00012834143678836654, "loss": 0.5478, "step": 8628 }, { "epoch": 0.4088604596067283, "grad_norm": 0.72265625, "learning_rate": 0.00012832715451323678, "loss": 1.1211, "step": 8629 }, { "epoch": 0.4089078417436626, "grad_norm": 0.67578125, "learning_rate": 0.00012831287160984092, "loss": 0.9444, "step": 8630 }, { "epoch": 0.408955223880597, "grad_norm": 0.427734375, "learning_rate": 0.00012829858807849567, "loss": 0.1511, "step": 8631 }, { "epoch": 0.4090026060175314, "grad_norm": 0.66015625, "learning_rate": 0.00012828430391951794, "loss": 1.0221, "step": 8632 }, { "epoch": 0.40904998815446575, "grad_norm": 0.6796875, "learning_rate": 0.00012827001913322445, "loss": 1.001, "step": 8633 }, { "epoch": 0.40909737029140014, "grad_norm": 0.75390625, "learning_rate": 0.00012825573371993206, "loss": 0.9136, "step": 8634 }, { "epoch": 0.40914475242833453, "grad_norm": 0.25390625, "learning_rate": 0.00012824144767995758, "loss": 0.1502, "step": 8635 }, { "epoch": 0.4091921345652689, "grad_norm": 0.16015625, "learning_rate": 0.00012822716101361788, "loss": 0.1261, "step": 8636 }, { "epoch": 0.40923951670220327, "grad_norm": 0.5, "learning_rate": 0.00012821287372122978, "loss": 0.3275, "step": 8637 }, { "epoch": 0.40928689883913766, "grad_norm": 0.6484375, "learning_rate": 0.00012819858580311022, "loss": 1.0782, "step": 8638 }, { "epoch": 0.409334280976072, "grad_norm": 0.5625, "learning_rate": 0.00012818429725957605, "loss": 0.9956, "step": 8639 }, { "epoch": 0.4093816631130064, "grad_norm": 0.482421875, "learning_rate": 0.00012817000809094424, "loss": 0.0998, "step": 8640 }, { "epoch": 0.4094290452499408, "grad_norm": 0.67578125, "learning_rate": 0.0001281557182975316, "loss": 0.9675, "step": 8641 }, { "epoch": 0.4094764273868752, "grad_norm": 0.85546875, "learning_rate": 0.00012814142787965512, "loss": 1.1673, "step": 8642 }, { "epoch": 0.4095238095238095, "grad_norm": 0.16796875, "learning_rate": 0.0001281271368376318, "loss": 0.1159, "step": 8643 }, { "epoch": 0.4095711916607439, "grad_norm": 0.8203125, "learning_rate": 0.00012811284517177848, "loss": 1.1577, "step": 8644 }, { "epoch": 0.4096185737976783, "grad_norm": 0.62890625, "learning_rate": 0.0001280985528824122, "loss": 0.7769, "step": 8645 }, { "epoch": 0.40966595593461264, "grad_norm": 0.1923828125, "learning_rate": 0.00012808425996985, "loss": 0.0316, "step": 8646 }, { "epoch": 0.40971333807154703, "grad_norm": 0.416015625, "learning_rate": 0.00012806996643440881, "loss": 0.0188, "step": 8647 }, { "epoch": 0.4097607202084814, "grad_norm": 0.66796875, "learning_rate": 0.00012805567227640565, "loss": 0.8488, "step": 8648 }, { "epoch": 0.40980810234541576, "grad_norm": 0.1875, "learning_rate": 0.00012804137749615753, "loss": 0.0344, "step": 8649 }, { "epoch": 0.40985548448235015, "grad_norm": 0.6640625, "learning_rate": 0.00012802708209398155, "loss": 0.7397, "step": 8650 }, { "epoch": 0.40990286661928454, "grad_norm": 0.4921875, "learning_rate": 0.00012801278607019477, "loss": 0.8155, "step": 8651 }, { "epoch": 0.4099502487562189, "grad_norm": 0.423828125, "learning_rate": 0.00012799848942511422, "loss": 0.1896, "step": 8652 }, { "epoch": 0.4099976308931533, "grad_norm": 0.50390625, "learning_rate": 0.000127984192159057, "loss": 1.0832, "step": 8653 }, { "epoch": 0.41004501303008767, "grad_norm": 0.671875, "learning_rate": 0.00012796989427234017, "loss": 1.272, "step": 8654 }, { "epoch": 0.410092395167022, "grad_norm": 0.6171875, "learning_rate": 0.00012795559576528093, "loss": 0.0558, "step": 8655 }, { "epoch": 0.4101397773039564, "grad_norm": 0.57421875, "learning_rate": 0.0001279412966381963, "loss": 1.2894, "step": 8656 }, { "epoch": 0.4101871594408908, "grad_norm": 0.1748046875, "learning_rate": 0.00012792699689140351, "loss": 0.0748, "step": 8657 }, { "epoch": 0.4102345415778252, "grad_norm": 0.625, "learning_rate": 0.00012791269652521965, "loss": 0.7179, "step": 8658 }, { "epoch": 0.4102819237147595, "grad_norm": 0.62109375, "learning_rate": 0.00012789839553996194, "loss": 1.427, "step": 8659 }, { "epoch": 0.4103293058516939, "grad_norm": 0.52734375, "learning_rate": 0.00012788409393594752, "loss": 0.7396, "step": 8660 }, { "epoch": 0.4103766879886283, "grad_norm": 0.578125, "learning_rate": 0.0001278697917134936, "loss": 0.7206, "step": 8661 }, { "epoch": 0.41042407012556265, "grad_norm": 0.6796875, "learning_rate": 0.00012785548887291737, "loss": 0.8337, "step": 8662 }, { "epoch": 0.41047145226249704, "grad_norm": 0.0908203125, "learning_rate": 0.00012784118541453609, "loss": 0.0057, "step": 8663 }, { "epoch": 0.41051883439943143, "grad_norm": 0.671875, "learning_rate": 0.00012782688133866697, "loss": 1.4189, "step": 8664 }, { "epoch": 0.41056621653636577, "grad_norm": 0.455078125, "learning_rate": 0.00012781257664562726, "loss": 0.252, "step": 8665 }, { "epoch": 0.41061359867330016, "grad_norm": 0.5234375, "learning_rate": 0.00012779827133573422, "loss": 1.1228, "step": 8666 }, { "epoch": 0.41066098081023455, "grad_norm": 0.5546875, "learning_rate": 0.00012778396540930512, "loss": 0.5086, "step": 8667 }, { "epoch": 0.4107083629471689, "grad_norm": 1.0625, "learning_rate": 0.00012776965886665727, "loss": 0.7849, "step": 8668 }, { "epoch": 0.4107557450841033, "grad_norm": 0.703125, "learning_rate": 0.00012775535170810796, "loss": 0.8292, "step": 8669 }, { "epoch": 0.4108031272210377, "grad_norm": 0.0498046875, "learning_rate": 0.00012774104393397452, "loss": 0.0033, "step": 8670 }, { "epoch": 0.41085050935797207, "grad_norm": 0.0120849609375, "learning_rate": 0.0001277267355445743, "loss": 0.0006, "step": 8671 }, { "epoch": 0.4108978914949064, "grad_norm": 0.671875, "learning_rate": 0.00012771242654022454, "loss": 1.0628, "step": 8672 }, { "epoch": 0.4109452736318408, "grad_norm": 2.046875, "learning_rate": 0.00012769811692124274, "loss": 0.4934, "step": 8673 }, { "epoch": 0.4109926557687752, "grad_norm": 0.7265625, "learning_rate": 0.00012768380668794622, "loss": 1.1352, "step": 8674 }, { "epoch": 0.41104003790570953, "grad_norm": 0.83984375, "learning_rate": 0.0001276694958406523, "loss": 0.6742, "step": 8675 }, { "epoch": 0.4110874200426439, "grad_norm": 0.6640625, "learning_rate": 0.00012765518437967847, "loss": 1.153, "step": 8676 }, { "epoch": 0.4111348021795783, "grad_norm": 0.70703125, "learning_rate": 0.00012764087230534213, "loss": 1.3431, "step": 8677 }, { "epoch": 0.41118218431651266, "grad_norm": 0.154296875, "learning_rate": 0.00012762655961796066, "loss": 0.0087, "step": 8678 }, { "epoch": 0.41122956645344705, "grad_norm": 0.50390625, "learning_rate": 0.00012761224631785152, "loss": 0.7768, "step": 8679 }, { "epoch": 0.41127694859038144, "grad_norm": 0.65625, "learning_rate": 0.00012759793240533216, "loss": 1.138, "step": 8680 }, { "epoch": 0.4113243307273158, "grad_norm": 0.609375, "learning_rate": 0.00012758361788072007, "loss": 1.1168, "step": 8681 }, { "epoch": 0.41137171286425017, "grad_norm": 0.515625, "learning_rate": 0.0001275693027443327, "loss": 0.6417, "step": 8682 }, { "epoch": 0.41141909500118456, "grad_norm": 0.49609375, "learning_rate": 0.00012755498699648757, "loss": 0.1818, "step": 8683 }, { "epoch": 0.4114664771381189, "grad_norm": 0.828125, "learning_rate": 0.0001275406706375022, "loss": 1.044, "step": 8684 }, { "epoch": 0.4115138592750533, "grad_norm": 0.703125, "learning_rate": 0.00012752635366769405, "loss": 1.1786, "step": 8685 }, { "epoch": 0.4115612414119877, "grad_norm": 0.68359375, "learning_rate": 0.00012751203608738073, "loss": 0.9176, "step": 8686 }, { "epoch": 0.4116086235489221, "grad_norm": 0.7734375, "learning_rate": 0.00012749771789687974, "loss": 0.9885, "step": 8687 }, { "epoch": 0.4116560056858564, "grad_norm": 0.58203125, "learning_rate": 0.00012748339909650865, "loss": 0.6357, "step": 8688 }, { "epoch": 0.4117033878227908, "grad_norm": 0.28515625, "learning_rate": 0.00012746907968658508, "loss": 0.0454, "step": 8689 }, { "epoch": 0.4117507699597252, "grad_norm": 0.08154296875, "learning_rate": 0.00012745475966742653, "loss": 0.0118, "step": 8690 }, { "epoch": 0.41179815209665954, "grad_norm": 0.003387451171875, "learning_rate": 0.00012744043903935065, "loss": 0.0001, "step": 8691 }, { "epoch": 0.41184553423359394, "grad_norm": 0.72265625, "learning_rate": 0.0001274261178026751, "loss": 1.1407, "step": 8692 }, { "epoch": 0.41189291637052833, "grad_norm": 0.62109375, "learning_rate": 0.00012741179595771748, "loss": 1.2763, "step": 8693 }, { "epoch": 0.41194029850746267, "grad_norm": 0.83984375, "learning_rate": 0.00012739747350479542, "loss": 1.1881, "step": 8694 }, { "epoch": 0.41198768064439706, "grad_norm": 1.0078125, "learning_rate": 0.00012738315044422653, "loss": 1.1318, "step": 8695 }, { "epoch": 0.41203506278133145, "grad_norm": 0.6484375, "learning_rate": 0.00012736882677632854, "loss": 1.301, "step": 8696 }, { "epoch": 0.4120824449182658, "grad_norm": 0.51171875, "learning_rate": 0.00012735450250141915, "loss": 0.0163, "step": 8697 }, { "epoch": 0.4121298270552002, "grad_norm": 0.7734375, "learning_rate": 0.00012734017761981603, "loss": 1.1192, "step": 8698 }, { "epoch": 0.4121772091921346, "grad_norm": 0.6796875, "learning_rate": 0.00012732585213183687, "loss": 0.7565, "step": 8699 }, { "epoch": 0.41222459132906897, "grad_norm": 0.75390625, "learning_rate": 0.00012731152603779946, "loss": 0.9723, "step": 8700 }, { "epoch": 0.4122719734660033, "grad_norm": 0.9765625, "learning_rate": 0.00012729719933802147, "loss": 1.3824, "step": 8701 }, { "epoch": 0.4123193556029377, "grad_norm": 0.859375, "learning_rate": 0.00012728287203282065, "loss": 0.8237, "step": 8702 }, { "epoch": 0.4123667377398721, "grad_norm": 0.87890625, "learning_rate": 0.0001272685441225148, "loss": 0.891, "step": 8703 }, { "epoch": 0.41241411987680643, "grad_norm": 0.7578125, "learning_rate": 0.0001272542156074217, "loss": 0.9442, "step": 8704 }, { "epoch": 0.4124615020137408, "grad_norm": 0.66796875, "learning_rate": 0.00012723988648785913, "loss": 0.7798, "step": 8705 }, { "epoch": 0.4125088841506752, "grad_norm": 0.90625, "learning_rate": 0.0001272255567641449, "loss": 0.8677, "step": 8706 }, { "epoch": 0.41255626628760955, "grad_norm": 0.7265625, "learning_rate": 0.00012721122643659678, "loss": 0.8635, "step": 8707 }, { "epoch": 0.41260364842454395, "grad_norm": 0.29296875, "learning_rate": 0.00012719689550553268, "loss": 0.0828, "step": 8708 }, { "epoch": 0.41265103056147834, "grad_norm": 0.6875, "learning_rate": 0.00012718256397127042, "loss": 1.1555, "step": 8709 }, { "epoch": 0.4126984126984127, "grad_norm": 0.73828125, "learning_rate": 0.0001271682318341278, "loss": 1.2882, "step": 8710 }, { "epoch": 0.41274579483534707, "grad_norm": 0.6015625, "learning_rate": 0.00012715389909442274, "loss": 1.1442, "step": 8711 }, { "epoch": 0.41279317697228146, "grad_norm": 0.67578125, "learning_rate": 0.00012713956575247318, "loss": 1.3723, "step": 8712 }, { "epoch": 0.4128405591092158, "grad_norm": 0.494140625, "learning_rate": 0.00012712523180859688, "loss": 0.5592, "step": 8713 }, { "epoch": 0.4128879412461502, "grad_norm": 0.6640625, "learning_rate": 0.00012711089726311187, "loss": 0.7329, "step": 8714 }, { "epoch": 0.4129353233830846, "grad_norm": 0.671875, "learning_rate": 0.00012709656211633603, "loss": 0.2984, "step": 8715 }, { "epoch": 0.412982705520019, "grad_norm": 0.6484375, "learning_rate": 0.00012708222636858733, "loss": 1.2764, "step": 8716 }, { "epoch": 0.4130300876569533, "grad_norm": 0.6015625, "learning_rate": 0.00012706789002018366, "loss": 0.9975, "step": 8717 }, { "epoch": 0.4130774697938877, "grad_norm": 0.263671875, "learning_rate": 0.00012705355307144305, "loss": 0.0301, "step": 8718 }, { "epoch": 0.4131248519308221, "grad_norm": 0.9296875, "learning_rate": 0.0001270392155226834, "loss": 1.0671, "step": 8719 }, { "epoch": 0.41317223406775644, "grad_norm": 0.5390625, "learning_rate": 0.0001270248773742228, "loss": 0.6159, "step": 8720 }, { "epoch": 0.41321961620469083, "grad_norm": 0.251953125, "learning_rate": 0.00012701053862637917, "loss": 0.0311, "step": 8721 }, { "epoch": 0.4132669983416252, "grad_norm": 0.87890625, "learning_rate": 0.0001269961992794706, "loss": 0.9573, "step": 8722 }, { "epoch": 0.41331438047855956, "grad_norm": 0.765625, "learning_rate": 0.00012698185933381507, "loss": 1.1541, "step": 8723 }, { "epoch": 0.41336176261549396, "grad_norm": 0.73828125, "learning_rate": 0.00012696751878973065, "loss": 1.4427, "step": 8724 }, { "epoch": 0.41340914475242835, "grad_norm": 0.61328125, "learning_rate": 0.00012695317764753537, "loss": 0.9246, "step": 8725 }, { "epoch": 0.4134565268893627, "grad_norm": 0.72265625, "learning_rate": 0.0001269388359075473, "loss": 0.7327, "step": 8726 }, { "epoch": 0.4135039090262971, "grad_norm": 0.8203125, "learning_rate": 0.0001269244935700846, "loss": 0.9051, "step": 8727 }, { "epoch": 0.41355129116323147, "grad_norm": 0.453125, "learning_rate": 0.00012691015063546525, "loss": 0.1719, "step": 8728 }, { "epoch": 0.41359867330016586, "grad_norm": 0.578125, "learning_rate": 0.00012689580710400746, "loss": 0.9895, "step": 8729 }, { "epoch": 0.4136460554371002, "grad_norm": 0.361328125, "learning_rate": 0.00012688146297602928, "loss": 0.0378, "step": 8730 }, { "epoch": 0.4136934375740346, "grad_norm": 0.7890625, "learning_rate": 0.00012686711825184895, "loss": 0.9236, "step": 8731 }, { "epoch": 0.413740819710969, "grad_norm": 1.1875, "learning_rate": 0.0001268527729317845, "loss": 1.1369, "step": 8732 }, { "epoch": 0.4137882018479033, "grad_norm": 0.2080078125, "learning_rate": 0.00012683842701615417, "loss": 0.0278, "step": 8733 }, { "epoch": 0.4138355839848377, "grad_norm": 0.609375, "learning_rate": 0.0001268240805052761, "loss": 0.6815, "step": 8734 }, { "epoch": 0.4138829661217721, "grad_norm": 0.12060546875, "learning_rate": 0.00012680973339946854, "loss": 0.0055, "step": 8735 }, { "epoch": 0.41393034825870645, "grad_norm": 0.67578125, "learning_rate": 0.0001267953856990496, "loss": 1.0057, "step": 8736 }, { "epoch": 0.41397773039564084, "grad_norm": 0.61328125, "learning_rate": 0.00012678103740433754, "loss": 0.7069, "step": 8737 }, { "epoch": 0.41402511253257523, "grad_norm": 0.53515625, "learning_rate": 0.0001267666885156506, "loss": 0.0898, "step": 8738 }, { "epoch": 0.41407249466950957, "grad_norm": 0.74609375, "learning_rate": 0.00012675233903330707, "loss": 0.9589, "step": 8739 }, { "epoch": 0.41411987680644397, "grad_norm": 0.66796875, "learning_rate": 0.00012673798895762513, "loss": 1.1259, "step": 8740 }, { "epoch": 0.41416725894337836, "grad_norm": 0.5390625, "learning_rate": 0.00012672363828892307, "loss": 0.5192, "step": 8741 }, { "epoch": 0.4142146410803127, "grad_norm": 0.09228515625, "learning_rate": 0.00012670928702751915, "loss": 0.0111, "step": 8742 }, { "epoch": 0.4142620232172471, "grad_norm": 0.68359375, "learning_rate": 0.00012669493517373175, "loss": 0.9285, "step": 8743 }, { "epoch": 0.4143094053541815, "grad_norm": 0.1015625, "learning_rate": 0.00012668058272787908, "loss": 0.013, "step": 8744 }, { "epoch": 0.4143567874911159, "grad_norm": 0.54296875, "learning_rate": 0.00012666622969027952, "loss": 0.6148, "step": 8745 }, { "epoch": 0.4144041696280502, "grad_norm": 0.6875, "learning_rate": 0.00012665187606125138, "loss": 0.7689, "step": 8746 }, { "epoch": 0.4144515517649846, "grad_norm": 0.62890625, "learning_rate": 0.00012663752184111302, "loss": 1.0613, "step": 8747 }, { "epoch": 0.414498933901919, "grad_norm": 0.74609375, "learning_rate": 0.00012662316703018279, "loss": 1.1997, "step": 8748 }, { "epoch": 0.41454631603885334, "grad_norm": 0.6796875, "learning_rate": 0.00012660881162877908, "loss": 0.5236, "step": 8749 }, { "epoch": 0.41459369817578773, "grad_norm": 1.046875, "learning_rate": 0.0001265944556372203, "loss": 0.0557, "step": 8750 }, { "epoch": 0.4146410803127221, "grad_norm": 0.435546875, "learning_rate": 0.0001265800990558248, "loss": 0.132, "step": 8751 }, { "epoch": 0.41468846244965646, "grad_norm": 0.53515625, "learning_rate": 0.00012656574188491099, "loss": 0.4266, "step": 8752 }, { "epoch": 0.41473584458659085, "grad_norm": 0.6171875, "learning_rate": 0.00012655138412479732, "loss": 1.1053, "step": 8753 }, { "epoch": 0.41478322672352524, "grad_norm": 0.6796875, "learning_rate": 0.00012653702577580228, "loss": 1.6178, "step": 8754 }, { "epoch": 0.4148306088604596, "grad_norm": 0.447265625, "learning_rate": 0.00012652266683824424, "loss": 0.2094, "step": 8755 }, { "epoch": 0.414877990997394, "grad_norm": 0.94140625, "learning_rate": 0.0001265083073124417, "loss": 0.754, "step": 8756 }, { "epoch": 0.41492537313432837, "grad_norm": 0.055419921875, "learning_rate": 0.00012649394719871314, "loss": 0.0055, "step": 8757 }, { "epoch": 0.41497275527126276, "grad_norm": 0.59765625, "learning_rate": 0.00012647958649737707, "loss": 0.97, "step": 8758 }, { "epoch": 0.4150201374081971, "grad_norm": 0.81640625, "learning_rate": 0.00012646522520875196, "loss": 0.0633, "step": 8759 }, { "epoch": 0.4150675195451315, "grad_norm": 0.765625, "learning_rate": 0.00012645086333315633, "loss": 1.0843, "step": 8760 }, { "epoch": 0.4151149016820659, "grad_norm": 0.5546875, "learning_rate": 0.00012643650087090875, "loss": 0.9516, "step": 8761 }, { "epoch": 0.4151622838190002, "grad_norm": 0.5703125, "learning_rate": 0.00012642213782232775, "loss": 1.0484, "step": 8762 }, { "epoch": 0.4152096659559346, "grad_norm": 0.74609375, "learning_rate": 0.00012640777418773186, "loss": 1.0012, "step": 8763 }, { "epoch": 0.415257048092869, "grad_norm": 0.73828125, "learning_rate": 0.00012639340996743967, "loss": 0.5978, "step": 8764 }, { "epoch": 0.41530443022980335, "grad_norm": 0.80078125, "learning_rate": 0.00012637904516176975, "loss": 0.4532, "step": 8765 }, { "epoch": 0.41535181236673774, "grad_norm": 0.85546875, "learning_rate": 0.0001263646797710407, "loss": 0.25, "step": 8766 }, { "epoch": 0.41539919450367213, "grad_norm": 0.55859375, "learning_rate": 0.00012635031379557116, "loss": 1.0736, "step": 8767 }, { "epoch": 0.41544657664060647, "grad_norm": 0.625, "learning_rate": 0.00012633594723567972, "loss": 1.2801, "step": 8768 }, { "epoch": 0.41549395877754086, "grad_norm": 0.39453125, "learning_rate": 0.000126321580091685, "loss": 0.3909, "step": 8769 }, { "epoch": 0.41554134091447525, "grad_norm": 0.31640625, "learning_rate": 0.0001263072123639057, "loss": 0.0057, "step": 8770 }, { "epoch": 0.4155887230514096, "grad_norm": 0.609375, "learning_rate": 0.00012629284405266044, "loss": 0.879, "step": 8771 }, { "epoch": 0.415636105188344, "grad_norm": 0.06005859375, "learning_rate": 0.0001262784751582679, "loss": 0.0036, "step": 8772 }, { "epoch": 0.4156834873252784, "grad_norm": 0.490234375, "learning_rate": 0.0001262641056810468, "loss": 0.0827, "step": 8773 }, { "epoch": 0.41573086946221277, "grad_norm": 0.5390625, "learning_rate": 0.00012624973562131578, "loss": 0.5373, "step": 8774 }, { "epoch": 0.4157782515991471, "grad_norm": 0.267578125, "learning_rate": 0.00012623536497939356, "loss": 0.0488, "step": 8775 }, { "epoch": 0.4158256337360815, "grad_norm": 0.64453125, "learning_rate": 0.00012622099375559894, "loss": 0.0914, "step": 8776 }, { "epoch": 0.4158730158730159, "grad_norm": 0.55078125, "learning_rate": 0.00012620662195025057, "loss": 0.032, "step": 8777 }, { "epoch": 0.41592039800995023, "grad_norm": 0.62109375, "learning_rate": 0.00012619224956366724, "loss": 1.1273, "step": 8778 }, { "epoch": 0.4159677801468846, "grad_norm": 0.609375, "learning_rate": 0.00012617787659616775, "loss": 0.6303, "step": 8779 }, { "epoch": 0.416015162283819, "grad_norm": 0.84375, "learning_rate": 0.00012616350304807083, "loss": 1.3105, "step": 8780 }, { "epoch": 0.41606254442075336, "grad_norm": 0.828125, "learning_rate": 0.00012614912891969525, "loss": 1.2002, "step": 8781 }, { "epoch": 0.41610992655768775, "grad_norm": 0.62109375, "learning_rate": 0.00012613475421135987, "loss": 1.2593, "step": 8782 }, { "epoch": 0.41615730869462214, "grad_norm": 1.0859375, "learning_rate": 0.00012612037892338347, "loss": 1.1892, "step": 8783 }, { "epoch": 0.4162046908315565, "grad_norm": 0.53515625, "learning_rate": 0.0001261060030560849, "loss": 0.271, "step": 8784 }, { "epoch": 0.41625207296849087, "grad_norm": 0.337890625, "learning_rate": 0.00012609162660978296, "loss": 0.1717, "step": 8785 }, { "epoch": 0.41629945510542526, "grad_norm": 0.640625, "learning_rate": 0.00012607724958479656, "loss": 0.7417, "step": 8786 }, { "epoch": 0.41634683724235966, "grad_norm": 0.6953125, "learning_rate": 0.00012606287198144454, "loss": 0.5609, "step": 8787 }, { "epoch": 0.416394219379294, "grad_norm": 0.55078125, "learning_rate": 0.00012604849380004577, "loss": 0.7522, "step": 8788 }, { "epoch": 0.4164416015162284, "grad_norm": 0.419921875, "learning_rate": 0.00012603411504091917, "loss": 0.8829, "step": 8789 }, { "epoch": 0.4164889836531628, "grad_norm": 1.0078125, "learning_rate": 0.00012601973570438362, "loss": 0.49, "step": 8790 }, { "epoch": 0.4165363657900971, "grad_norm": 0.59765625, "learning_rate": 0.00012600535579075802, "loss": 1.0319, "step": 8791 }, { "epoch": 0.4165837479270315, "grad_norm": 0.6796875, "learning_rate": 0.00012599097530036136, "loss": 0.6672, "step": 8792 }, { "epoch": 0.4166311300639659, "grad_norm": 0.7109375, "learning_rate": 0.00012597659423351252, "loss": 1.1843, "step": 8793 }, { "epoch": 0.41667851220090024, "grad_norm": 0.55859375, "learning_rate": 0.00012596221259053052, "loss": 0.6169, "step": 8794 }, { "epoch": 0.41672589433783463, "grad_norm": 0.7109375, "learning_rate": 0.0001259478303717343, "loss": 1.0632, "step": 8795 }, { "epoch": 0.416773276474769, "grad_norm": 0.67578125, "learning_rate": 0.00012593344757744286, "loss": 0.8213, "step": 8796 }, { "epoch": 0.41682065861170337, "grad_norm": 0.875, "learning_rate": 0.00012591906420797512, "loss": 1.3638, "step": 8797 }, { "epoch": 0.41686804074863776, "grad_norm": 0.51953125, "learning_rate": 0.00012590468026365016, "loss": 0.0186, "step": 8798 }, { "epoch": 0.41691542288557215, "grad_norm": 0.166015625, "learning_rate": 0.00012589029574478698, "loss": 0.0075, "step": 8799 }, { "epoch": 0.4169628050225065, "grad_norm": 0.06396484375, "learning_rate": 0.00012587591065170466, "loss": 0.0046, "step": 8800 }, { "epoch": 0.4170101871594409, "grad_norm": 1.015625, "learning_rate": 0.00012586152498472218, "loss": 1.0377, "step": 8801 }, { "epoch": 0.4170575692963753, "grad_norm": 0.640625, "learning_rate": 0.0001258471387441586, "loss": 1.0316, "step": 8802 }, { "epoch": 0.41710495143330967, "grad_norm": 0.005462646484375, "learning_rate": 0.00012583275193033303, "loss": 0.0002, "step": 8803 }, { "epoch": 0.417152333570244, "grad_norm": 0.251953125, "learning_rate": 0.00012581836454356456, "loss": 0.0459, "step": 8804 }, { "epoch": 0.4171997157071784, "grad_norm": 0.71484375, "learning_rate": 0.00012580397658417224, "loss": 1.2865, "step": 8805 }, { "epoch": 0.4172470978441128, "grad_norm": 0.85546875, "learning_rate": 0.00012578958805247522, "loss": 0.7229, "step": 8806 }, { "epoch": 0.41729447998104713, "grad_norm": 0.1689453125, "learning_rate": 0.00012577519894879256, "loss": 0.0235, "step": 8807 }, { "epoch": 0.4173418621179815, "grad_norm": 0.12158203125, "learning_rate": 0.00012576080927344352, "loss": 0.0176, "step": 8808 }, { "epoch": 0.4173892442549159, "grad_norm": 0.65234375, "learning_rate": 0.0001257464190267471, "loss": 0.6251, "step": 8809 }, { "epoch": 0.41743662639185025, "grad_norm": 0.625, "learning_rate": 0.00012573202820902256, "loss": 1.1001, "step": 8810 }, { "epoch": 0.41748400852878464, "grad_norm": 0.451171875, "learning_rate": 0.00012571763682058904, "loss": 0.5405, "step": 8811 }, { "epoch": 0.41753139066571904, "grad_norm": 0.796875, "learning_rate": 0.00012570324486176575, "loss": 0.988, "step": 8812 }, { "epoch": 0.4175787728026534, "grad_norm": 0.4921875, "learning_rate": 0.0001256888523328718, "loss": 0.0729, "step": 8813 }, { "epoch": 0.41762615493958777, "grad_norm": 0.50390625, "learning_rate": 0.00012567445923422654, "loss": 0.8022, "step": 8814 }, { "epoch": 0.41767353707652216, "grad_norm": 0.390625, "learning_rate": 0.00012566006556614912, "loss": 0.0961, "step": 8815 }, { "epoch": 0.41772091921345655, "grad_norm": 0.53515625, "learning_rate": 0.00012564567132895873, "loss": 0.932, "step": 8816 }, { "epoch": 0.4177683013503909, "grad_norm": 0.1748046875, "learning_rate": 0.00012563127652297467, "loss": 0.0159, "step": 8817 }, { "epoch": 0.4178156834873253, "grad_norm": 0.52734375, "learning_rate": 0.00012561688114851624, "loss": 0.6744, "step": 8818 }, { "epoch": 0.4178630656242597, "grad_norm": 0.498046875, "learning_rate": 0.00012560248520590263, "loss": 0.556, "step": 8819 }, { "epoch": 0.417910447761194, "grad_norm": 0.70703125, "learning_rate": 0.0001255880886954532, "loss": 1.4377, "step": 8820 }, { "epoch": 0.4179578298981284, "grad_norm": 0.72265625, "learning_rate": 0.0001255736916174872, "loss": 1.3856, "step": 8821 }, { "epoch": 0.4180052120350628, "grad_norm": 0.52734375, "learning_rate": 0.00012555929397232396, "loss": 0.9241, "step": 8822 }, { "epoch": 0.41805259417199714, "grad_norm": 0.70703125, "learning_rate": 0.00012554489576028282, "loss": 0.9361, "step": 8823 }, { "epoch": 0.41809997630893153, "grad_norm": 0.89453125, "learning_rate": 0.0001255304969816831, "loss": 0.0571, "step": 8824 }, { "epoch": 0.4181473584458659, "grad_norm": 0.0196533203125, "learning_rate": 0.0001255160976368441, "loss": 0.0014, "step": 8825 }, { "epoch": 0.41819474058280026, "grad_norm": 0.267578125, "learning_rate": 0.0001255016977260853, "loss": 0.017, "step": 8826 }, { "epoch": 0.41824212271973465, "grad_norm": 0.54296875, "learning_rate": 0.000125487297249726, "loss": 0.5335, "step": 8827 }, { "epoch": 0.41828950485666905, "grad_norm": 0.8046875, "learning_rate": 0.00012547289620808556, "loss": 0.751, "step": 8828 }, { "epoch": 0.4183368869936034, "grad_norm": 0.9765625, "learning_rate": 0.00012545849460148342, "loss": 0.5795, "step": 8829 }, { "epoch": 0.4183842691305378, "grad_norm": 0.14453125, "learning_rate": 0.000125444092430239, "loss": 0.018, "step": 8830 }, { "epoch": 0.41843165126747217, "grad_norm": 0.6484375, "learning_rate": 0.00012542968969467172, "loss": 1.1114, "step": 8831 }, { "epoch": 0.41847903340440656, "grad_norm": 0.76171875, "learning_rate": 0.00012541528639510097, "loss": 1.3415, "step": 8832 }, { "epoch": 0.4185264155413409, "grad_norm": 0.64453125, "learning_rate": 0.00012540088253184628, "loss": 0.1147, "step": 8833 }, { "epoch": 0.4185737976782753, "grad_norm": 0.0196533203125, "learning_rate": 0.00012538647810522708, "loss": 0.0015, "step": 8834 }, { "epoch": 0.4186211798152097, "grad_norm": 0.2392578125, "learning_rate": 0.00012537207311556282, "loss": 0.0165, "step": 8835 }, { "epoch": 0.418668561952144, "grad_norm": 0.431640625, "learning_rate": 0.000125357667563173, "loss": 0.0302, "step": 8836 }, { "epoch": 0.4187159440890784, "grad_norm": 0.625, "learning_rate": 0.00012534326144837712, "loss": 0.0651, "step": 8837 }, { "epoch": 0.4187633262260128, "grad_norm": 0.8125, "learning_rate": 0.0001253288547714947, "loss": 1.0661, "step": 8838 }, { "epoch": 0.41881070836294715, "grad_norm": 0.6796875, "learning_rate": 0.00012531444753284524, "loss": 0.9368, "step": 8839 }, { "epoch": 0.41885809049988154, "grad_norm": 0.58203125, "learning_rate": 0.0001253000397327483, "loss": 1.0135, "step": 8840 }, { "epoch": 0.41890547263681593, "grad_norm": 0.6484375, "learning_rate": 0.00012528563137152344, "loss": 0.989, "step": 8841 }, { "epoch": 0.41895285477375027, "grad_norm": 0.55078125, "learning_rate": 0.00012527122244949024, "loss": 0.571, "step": 8842 }, { "epoch": 0.41900023691068466, "grad_norm": 0.7890625, "learning_rate": 0.0001252568129669682, "loss": 0.7921, "step": 8843 }, { "epoch": 0.41904761904761906, "grad_norm": 0.62109375, "learning_rate": 0.00012524240292427697, "loss": 0.8786, "step": 8844 }, { "epoch": 0.41909500118455345, "grad_norm": 0.5625, "learning_rate": 0.00012522799232173612, "loss": 0.8598, "step": 8845 }, { "epoch": 0.4191423833214878, "grad_norm": 0.609375, "learning_rate": 0.00012521358115966528, "loss": 0.6554, "step": 8846 }, { "epoch": 0.4191897654584222, "grad_norm": 0.9453125, "learning_rate": 0.00012519916943838405, "loss": 0.698, "step": 8847 }, { "epoch": 0.4192371475953566, "grad_norm": 0.375, "learning_rate": 0.0001251847571582121, "loss": 0.1573, "step": 8848 }, { "epoch": 0.4192845297322909, "grad_norm": 0.796875, "learning_rate": 0.0001251703443194691, "loss": 1.1251, "step": 8849 }, { "epoch": 0.4193319118692253, "grad_norm": 0.59765625, "learning_rate": 0.0001251559309224746, "loss": 1.0505, "step": 8850 }, { "epoch": 0.4193792940061597, "grad_norm": 0.58984375, "learning_rate": 0.0001251415169675484, "loss": 0.6131, "step": 8851 }, { "epoch": 0.41942667614309403, "grad_norm": 0.80078125, "learning_rate": 0.0001251271024550101, "loss": 0.8077, "step": 8852 }, { "epoch": 0.41947405828002843, "grad_norm": 0.73046875, "learning_rate": 0.00012511268738517943, "loss": 1.0084, "step": 8853 }, { "epoch": 0.4195214404169628, "grad_norm": 1.0078125, "learning_rate": 0.00012509827175837614, "loss": 0.3676, "step": 8854 }, { "epoch": 0.41956882255389716, "grad_norm": 0.5859375, "learning_rate": 0.00012508385557491987, "loss": 0.7035, "step": 8855 }, { "epoch": 0.41961620469083155, "grad_norm": 0.53125, "learning_rate": 0.00012506943883513043, "loss": 1.079, "step": 8856 }, { "epoch": 0.41966358682776594, "grad_norm": 0.59765625, "learning_rate": 0.00012505502153932753, "loss": 1.1291, "step": 8857 }, { "epoch": 0.4197109689647003, "grad_norm": 0.671875, "learning_rate": 0.00012504060368783096, "loss": 1.1054, "step": 8858 }, { "epoch": 0.4197583511016347, "grad_norm": 0.6640625, "learning_rate": 0.00012502618528096045, "loss": 1.0571, "step": 8859 }, { "epoch": 0.41980573323856907, "grad_norm": 0.81640625, "learning_rate": 0.00012501176631903583, "loss": 0.9061, "step": 8860 }, { "epoch": 0.41985311537550346, "grad_norm": 0.6953125, "learning_rate": 0.00012499734680237684, "loss": 1.1701, "step": 8861 }, { "epoch": 0.4199004975124378, "grad_norm": 0.62890625, "learning_rate": 0.00012498292673130334, "loss": 0.6589, "step": 8862 }, { "epoch": 0.4199478796493722, "grad_norm": 0.7890625, "learning_rate": 0.00012496850610613512, "loss": 1.3303, "step": 8863 }, { "epoch": 0.4199952617863066, "grad_norm": 0.55078125, "learning_rate": 0.00012495408492719205, "loss": 0.1845, "step": 8864 }, { "epoch": 0.4200426439232409, "grad_norm": 0.25390625, "learning_rate": 0.00012493966319479397, "loss": 0.0253, "step": 8865 }, { "epoch": 0.4200900260601753, "grad_norm": 1.109375, "learning_rate": 0.0001249252409092607, "loss": 1.3075, "step": 8866 }, { "epoch": 0.4201374081971097, "grad_norm": 0.60546875, "learning_rate": 0.00012491081807091214, "loss": 0.6048, "step": 8867 }, { "epoch": 0.42018479033404404, "grad_norm": 0.6171875, "learning_rate": 0.0001248963946800682, "loss": 0.9905, "step": 8868 }, { "epoch": 0.42023217247097844, "grad_norm": 0.54296875, "learning_rate": 0.0001248819707370487, "loss": 1.0346, "step": 8869 }, { "epoch": 0.42027955460791283, "grad_norm": 0.2138671875, "learning_rate": 0.00012486754624217361, "loss": 0.0315, "step": 8870 }, { "epoch": 0.42032693674484717, "grad_norm": 0.63671875, "learning_rate": 0.0001248531211957628, "loss": 0.9402, "step": 8871 }, { "epoch": 0.42037431888178156, "grad_norm": 0.65234375, "learning_rate": 0.00012483869559813627, "loss": 0.748, "step": 8872 }, { "epoch": 0.42042170101871595, "grad_norm": 0.57421875, "learning_rate": 0.00012482426944961396, "loss": 1.1636, "step": 8873 }, { "epoch": 0.42046908315565035, "grad_norm": 0.255859375, "learning_rate": 0.00012480984275051575, "loss": 0.0295, "step": 8874 }, { "epoch": 0.4205164652925847, "grad_norm": 0.291015625, "learning_rate": 0.00012479541550116166, "loss": 0.026, "step": 8875 }, { "epoch": 0.4205638474295191, "grad_norm": 0.640625, "learning_rate": 0.00012478098770187166, "loss": 0.3643, "step": 8876 }, { "epoch": 0.42061122956645347, "grad_norm": 0.7734375, "learning_rate": 0.00012476655935296575, "loss": 1.3416, "step": 8877 }, { "epoch": 0.4206586117033878, "grad_norm": 0.6640625, "learning_rate": 0.00012475213045476394, "loss": 1.367, "step": 8878 }, { "epoch": 0.4207059938403222, "grad_norm": 0.345703125, "learning_rate": 0.00012473770100758624, "loss": 0.0348, "step": 8879 }, { "epoch": 0.4207533759772566, "grad_norm": 0.53515625, "learning_rate": 0.0001247232710117527, "loss": 0.4657, "step": 8880 }, { "epoch": 0.42080075811419093, "grad_norm": 0.56640625, "learning_rate": 0.00012470884046758332, "loss": 0.5723, "step": 8881 }, { "epoch": 0.4208481402511253, "grad_norm": 0.671875, "learning_rate": 0.00012469440937539817, "loss": 0.9915, "step": 8882 }, { "epoch": 0.4208955223880597, "grad_norm": 0.58203125, "learning_rate": 0.00012467997773551735, "loss": 1.0745, "step": 8883 }, { "epoch": 0.42094290452499405, "grad_norm": 0.65234375, "learning_rate": 0.00012466554554826088, "loss": 1.0999, "step": 8884 }, { "epoch": 0.42099028666192845, "grad_norm": 0.68359375, "learning_rate": 0.0001246511128139489, "loss": 1.0777, "step": 8885 }, { "epoch": 0.42103766879886284, "grad_norm": 0.6640625, "learning_rate": 0.0001246366795329015, "loss": 1.0084, "step": 8886 }, { "epoch": 0.4210850509357972, "grad_norm": 0.55859375, "learning_rate": 0.00012462224570543876, "loss": 0.646, "step": 8887 }, { "epoch": 0.42113243307273157, "grad_norm": 0.1943359375, "learning_rate": 0.00012460781133188088, "loss": 0.1525, "step": 8888 }, { "epoch": 0.42117981520966596, "grad_norm": 0.59765625, "learning_rate": 0.00012459337641254796, "loss": 0.1222, "step": 8889 }, { "epoch": 0.42122719734660036, "grad_norm": 0.78515625, "learning_rate": 0.0001245789409477601, "loss": 0.7078, "step": 8890 }, { "epoch": 0.4212745794835347, "grad_norm": 0.73046875, "learning_rate": 0.00012456450493783752, "loss": 0.5646, "step": 8891 }, { "epoch": 0.4213219616204691, "grad_norm": 0.765625, "learning_rate": 0.00012455006838310042, "loss": 0.8092, "step": 8892 }, { "epoch": 0.4213693437574035, "grad_norm": 0.8125, "learning_rate": 0.00012453563128386893, "loss": 0.9258, "step": 8893 }, { "epoch": 0.4214167258943378, "grad_norm": 0.70703125, "learning_rate": 0.00012452119364046325, "loss": 1.4296, "step": 8894 }, { "epoch": 0.4214641080312722, "grad_norm": 0.7578125, "learning_rate": 0.00012450675545320366, "loss": 1.3191, "step": 8895 }, { "epoch": 0.4215114901682066, "grad_norm": 0.2177734375, "learning_rate": 0.00012449231672241032, "loss": 0.0116, "step": 8896 }, { "epoch": 0.42155887230514094, "grad_norm": 0.002197265625, "learning_rate": 0.0001244778774484035, "loss": 0.0001, "step": 8897 }, { "epoch": 0.42160625444207533, "grad_norm": 0.42578125, "learning_rate": 0.0001244634376315034, "loss": 0.0157, "step": 8898 }, { "epoch": 0.4216536365790097, "grad_norm": 0.6796875, "learning_rate": 0.0001244489972720303, "loss": 1.2617, "step": 8899 }, { "epoch": 0.42170101871594406, "grad_norm": 0.54296875, "learning_rate": 0.00012443455637030452, "loss": 1.3783, "step": 8900 }, { "epoch": 0.42174840085287846, "grad_norm": 0.95703125, "learning_rate": 0.00012442011492664628, "loss": 1.0486, "step": 8901 }, { "epoch": 0.42179578298981285, "grad_norm": 0.62890625, "learning_rate": 0.0001244056729413759, "loss": 1.0188, "step": 8902 }, { "epoch": 0.42184316512674724, "grad_norm": 0.73828125, "learning_rate": 0.0001243912304148137, "loss": 1.1449, "step": 8903 }, { "epoch": 0.4218905472636816, "grad_norm": 0.515625, "learning_rate": 0.00012437678734728, "loss": 0.5396, "step": 8904 }, { "epoch": 0.421937929400616, "grad_norm": 0.91796875, "learning_rate": 0.00012436234373909512, "loss": 0.8482, "step": 8905 }, { "epoch": 0.42198531153755037, "grad_norm": 0.140625, "learning_rate": 0.0001243478995905794, "loss": 0.0132, "step": 8906 }, { "epoch": 0.4220326936744847, "grad_norm": 0.5859375, "learning_rate": 0.00012433345490205322, "loss": 0.5625, "step": 8907 }, { "epoch": 0.4220800758114191, "grad_norm": 0.2490234375, "learning_rate": 0.00012431900967383686, "loss": 0.0742, "step": 8908 }, { "epoch": 0.4221274579483535, "grad_norm": 0.56640625, "learning_rate": 0.00012430456390625082, "loss": 0.529, "step": 8909 }, { "epoch": 0.42217484008528783, "grad_norm": 0.80078125, "learning_rate": 0.00012429011759961545, "loss": 0.6306, "step": 8910 }, { "epoch": 0.4222222222222222, "grad_norm": 0.56640625, "learning_rate": 0.00012427567075425113, "loss": 0.1279, "step": 8911 }, { "epoch": 0.4222696043591566, "grad_norm": 0.65625, "learning_rate": 0.0001242612233704783, "loss": 1.2095, "step": 8912 }, { "epoch": 0.42231698649609095, "grad_norm": 0.78125, "learning_rate": 0.00012424677544861738, "loss": 0.959, "step": 8913 }, { "epoch": 0.42236436863302534, "grad_norm": 0.79296875, "learning_rate": 0.00012423232698898878, "loss": 0.9641, "step": 8914 }, { "epoch": 0.42241175076995974, "grad_norm": 0.2021484375, "learning_rate": 0.000124217877991913, "loss": 0.016, "step": 8915 }, { "epoch": 0.4224591329068941, "grad_norm": 0.74609375, "learning_rate": 0.00012420342845771048, "loss": 0.5453, "step": 8916 }, { "epoch": 0.42250651504382847, "grad_norm": 0.6484375, "learning_rate": 0.00012418897838670166, "loss": 0.8954, "step": 8917 }, { "epoch": 0.42255389718076286, "grad_norm": 0.6796875, "learning_rate": 0.00012417452777920712, "loss": 1.2048, "step": 8918 }, { "epoch": 0.42260127931769725, "grad_norm": 0.158203125, "learning_rate": 0.0001241600766355473, "loss": 0.01, "step": 8919 }, { "epoch": 0.4226486614546316, "grad_norm": 0.65625, "learning_rate": 0.00012414562495604268, "loss": 1.2205, "step": 8920 }, { "epoch": 0.422696043591566, "grad_norm": 0.94921875, "learning_rate": 0.00012413117274101386, "loss": 1.1835, "step": 8921 }, { "epoch": 0.4227434257285004, "grad_norm": 0.0703125, "learning_rate": 0.00012411671999078128, "loss": 0.0057, "step": 8922 }, { "epoch": 0.4227908078654347, "grad_norm": 0.67578125, "learning_rate": 0.0001241022667056656, "loss": 0.8422, "step": 8923 }, { "epoch": 0.4228381900023691, "grad_norm": 0.4140625, "learning_rate": 0.00012408781288598725, "loss": 0.0834, "step": 8924 }, { "epoch": 0.4228855721393035, "grad_norm": 0.63671875, "learning_rate": 0.0001240733585320669, "loss": 0.8948, "step": 8925 }, { "epoch": 0.42293295427623784, "grad_norm": 0.61328125, "learning_rate": 0.0001240589036442251, "loss": 1.1781, "step": 8926 }, { "epoch": 0.42298033641317223, "grad_norm": 0.67578125, "learning_rate": 0.00012404444822278242, "loss": 0.6403, "step": 8927 }, { "epoch": 0.4230277185501066, "grad_norm": 0.703125, "learning_rate": 0.00012402999226805955, "loss": 0.6801, "step": 8928 }, { "epoch": 0.42307510068704096, "grad_norm": 0.19921875, "learning_rate": 0.00012401553578037698, "loss": 0.0082, "step": 8929 }, { "epoch": 0.42312248282397535, "grad_norm": 0.66796875, "learning_rate": 0.00012400107876005544, "loss": 0.8599, "step": 8930 }, { "epoch": 0.42316986496090975, "grad_norm": 0.80859375, "learning_rate": 0.00012398662120741553, "loss": 0.2746, "step": 8931 }, { "epoch": 0.42321724709784414, "grad_norm": 0.625, "learning_rate": 0.0001239721631227779, "loss": 1.0153, "step": 8932 }, { "epoch": 0.4232646292347785, "grad_norm": 0.63671875, "learning_rate": 0.00012395770450646324, "loss": 0.7281, "step": 8933 }, { "epoch": 0.42331201137171287, "grad_norm": 0.6015625, "learning_rate": 0.00012394324535879223, "loss": 0.9411, "step": 8934 }, { "epoch": 0.42335939350864726, "grad_norm": 0.11767578125, "learning_rate": 0.00012392878568008554, "loss": 0.0147, "step": 8935 }, { "epoch": 0.4234067756455816, "grad_norm": 0.62109375, "learning_rate": 0.00012391432547066383, "loss": 1.3564, "step": 8936 }, { "epoch": 0.423454157782516, "grad_norm": 0.578125, "learning_rate": 0.0001238998647308479, "loss": 1.171, "step": 8937 }, { "epoch": 0.4235015399194504, "grad_norm": 0.71484375, "learning_rate": 0.0001238854034609584, "loss": 1.0655, "step": 8938 }, { "epoch": 0.4235489220563847, "grad_norm": 0.734375, "learning_rate": 0.0001238709416613161, "loss": 0.6354, "step": 8939 }, { "epoch": 0.4235963041933191, "grad_norm": 0.8359375, "learning_rate": 0.00012385647933224172, "loss": 1.0352, "step": 8940 }, { "epoch": 0.4236436863302535, "grad_norm": 0.55078125, "learning_rate": 0.00012384201647405607, "loss": 0.0452, "step": 8941 }, { "epoch": 0.42369106846718785, "grad_norm": 0.8203125, "learning_rate": 0.00012382755308707987, "loss": 0.2959, "step": 8942 }, { "epoch": 0.42373845060412224, "grad_norm": 0.6796875, "learning_rate": 0.00012381308917163393, "loss": 0.7554, "step": 8943 }, { "epoch": 0.42378583274105663, "grad_norm": 0.55859375, "learning_rate": 0.00012379862472803904, "loss": 0.5578, "step": 8944 }, { "epoch": 0.42383321487799097, "grad_norm": 0.57421875, "learning_rate": 0.000123784159756616, "loss": 0.9, "step": 8945 }, { "epoch": 0.42388059701492536, "grad_norm": 0.6953125, "learning_rate": 0.00012376969425768563, "loss": 1.1052, "step": 8946 }, { "epoch": 0.42392797915185976, "grad_norm": 0.7890625, "learning_rate": 0.00012375522823156876, "loss": 1.1587, "step": 8947 }, { "epoch": 0.42397536128879415, "grad_norm": 0.6484375, "learning_rate": 0.0001237407616785862, "loss": 1.0681, "step": 8948 }, { "epoch": 0.4240227434257285, "grad_norm": 0.1015625, "learning_rate": 0.00012372629459905888, "loss": 0.0124, "step": 8949 }, { "epoch": 0.4240701255626629, "grad_norm": 0.484375, "learning_rate": 0.0001237118269933076, "loss": 0.7137, "step": 8950 }, { "epoch": 0.4241175076995973, "grad_norm": 0.6015625, "learning_rate": 0.00012369735886165326, "loss": 0.7155, "step": 8951 }, { "epoch": 0.4241648898365316, "grad_norm": 0.283203125, "learning_rate": 0.00012368289020441676, "loss": 0.0344, "step": 8952 }, { "epoch": 0.424212271973466, "grad_norm": 0.6171875, "learning_rate": 0.00012366842102191894, "loss": 0.7535, "step": 8953 }, { "epoch": 0.4242596541104004, "grad_norm": 0.7109375, "learning_rate": 0.0001236539513144808, "loss": 0.624, "step": 8954 }, { "epoch": 0.42430703624733473, "grad_norm": 0.671875, "learning_rate": 0.00012363948108242315, "loss": 0.4886, "step": 8955 }, { "epoch": 0.4243544183842691, "grad_norm": 0.6640625, "learning_rate": 0.00012362501032606702, "loss": 0.7854, "step": 8956 }, { "epoch": 0.4244018005212035, "grad_norm": 0.69921875, "learning_rate": 0.00012361053904573333, "loss": 1.3908, "step": 8957 }, { "epoch": 0.42444918265813786, "grad_norm": 0.71875, "learning_rate": 0.00012359606724174303, "loss": 0.9789, "step": 8958 }, { "epoch": 0.42449656479507225, "grad_norm": 0.40625, "learning_rate": 0.0001235815949144171, "loss": 0.03, "step": 8959 }, { "epoch": 0.42454394693200664, "grad_norm": 0.89453125, "learning_rate": 0.00012356712206407653, "loss": 0.806, "step": 8960 }, { "epoch": 0.42459132906894104, "grad_norm": 0.58984375, "learning_rate": 0.0001235526486910423, "loss": 0.1725, "step": 8961 }, { "epoch": 0.4246387112058754, "grad_norm": 0.921875, "learning_rate": 0.00012353817479563535, "loss": 0.9963, "step": 8962 }, { "epoch": 0.42468609334280977, "grad_norm": 0.08056640625, "learning_rate": 0.0001235237003781768, "loss": 0.0063, "step": 8963 }, { "epoch": 0.42473347547974416, "grad_norm": 0.0986328125, "learning_rate": 0.00012350922543898757, "loss": 0.0165, "step": 8964 }, { "epoch": 0.4247808576166785, "grad_norm": 0.74609375, "learning_rate": 0.00012349474997838883, "loss": 1.1218, "step": 8965 }, { "epoch": 0.4248282397536129, "grad_norm": 0.62890625, "learning_rate": 0.00012348027399670155, "loss": 1.1432, "step": 8966 }, { "epoch": 0.4248756218905473, "grad_norm": 0.06640625, "learning_rate": 0.00012346579749424679, "loss": 0.0073, "step": 8967 }, { "epoch": 0.4249230040274816, "grad_norm": 0.6015625, "learning_rate": 0.00012345132047134565, "loss": 1.2107, "step": 8968 }, { "epoch": 0.424970386164416, "grad_norm": 0.236328125, "learning_rate": 0.00012343684292831917, "loss": 0.0673, "step": 8969 }, { "epoch": 0.4250177683013504, "grad_norm": 0.765625, "learning_rate": 0.0001234223648654885, "loss": 1.0352, "step": 8970 }, { "epoch": 0.42506515043828474, "grad_norm": 0.65625, "learning_rate": 0.00012340788628317472, "loss": 0.9669, "step": 8971 }, { "epoch": 0.42511253257521914, "grad_norm": 0.7265625, "learning_rate": 0.000123393407181699, "loss": 0.526, "step": 8972 }, { "epoch": 0.42515991471215353, "grad_norm": 0.2080078125, "learning_rate": 0.00012337892756138235, "loss": 0.1482, "step": 8973 }, { "epoch": 0.42520729684908787, "grad_norm": 0.8828125, "learning_rate": 0.00012336444742254603, "loss": 0.4275, "step": 8974 }, { "epoch": 0.42525467898602226, "grad_norm": 0.66796875, "learning_rate": 0.00012334996676551115, "loss": 0.082, "step": 8975 }, { "epoch": 0.42530206112295665, "grad_norm": 0.369140625, "learning_rate": 0.0001233354855905989, "loss": 0.0929, "step": 8976 }, { "epoch": 0.42534944325989105, "grad_norm": 0.67578125, "learning_rate": 0.0001233210038981304, "loss": 0.9906, "step": 8977 }, { "epoch": 0.4253968253968254, "grad_norm": 0.75390625, "learning_rate": 0.00012330652168842686, "loss": 0.8739, "step": 8978 }, { "epoch": 0.4254442075337598, "grad_norm": 0.90625, "learning_rate": 0.00012329203896180953, "loss": 0.9353, "step": 8979 }, { "epoch": 0.42549158967069417, "grad_norm": 0.5859375, "learning_rate": 0.0001232775557185996, "loss": 0.1222, "step": 8980 }, { "epoch": 0.4255389718076285, "grad_norm": 0.8828125, "learning_rate": 0.00012326307195911822, "loss": 1.0315, "step": 8981 }, { "epoch": 0.4255863539445629, "grad_norm": 0.60546875, "learning_rate": 0.0001232485876836867, "loss": 0.897, "step": 8982 }, { "epoch": 0.4256337360814973, "grad_norm": 0.73828125, "learning_rate": 0.0001232341028926263, "loss": 1.1748, "step": 8983 }, { "epoch": 0.42568111821843163, "grad_norm": 0.1865234375, "learning_rate": 0.00012321961758625824, "loss": 0.0113, "step": 8984 }, { "epoch": 0.425728500355366, "grad_norm": 0.6015625, "learning_rate": 0.00012320513176490377, "loss": 0.4843, "step": 8985 }, { "epoch": 0.4257758824923004, "grad_norm": 0.68359375, "learning_rate": 0.0001231906454288842, "loss": 1.2123, "step": 8986 }, { "epoch": 0.42582326462923475, "grad_norm": 0.84765625, "learning_rate": 0.00012317615857852083, "loss": 0.4092, "step": 8987 }, { "epoch": 0.42587064676616915, "grad_norm": 1.265625, "learning_rate": 0.00012316167121413497, "loss": 1.2504, "step": 8988 }, { "epoch": 0.42591802890310354, "grad_norm": 0.74609375, "learning_rate": 0.00012314718333604786, "loss": 0.5383, "step": 8989 }, { "epoch": 0.42596541104003793, "grad_norm": 0.0031585693359375, "learning_rate": 0.00012313269494458088, "loss": 0.0002, "step": 8990 }, { "epoch": 0.42601279317697227, "grad_norm": 0.2353515625, "learning_rate": 0.00012311820604005543, "loss": 0.0202, "step": 8991 }, { "epoch": 0.42606017531390666, "grad_norm": 0.6953125, "learning_rate": 0.00012310371662279277, "loss": 1.3952, "step": 8992 }, { "epoch": 0.42610755745084106, "grad_norm": 0.447265625, "learning_rate": 0.00012308922669311426, "loss": 0.7924, "step": 8993 }, { "epoch": 0.4261549395877754, "grad_norm": 0.57421875, "learning_rate": 0.0001230747362513413, "loss": 1.1176, "step": 8994 }, { "epoch": 0.4262023217247098, "grad_norm": 0.8515625, "learning_rate": 0.00012306024529779526, "loss": 1.4522, "step": 8995 }, { "epoch": 0.4262497038616442, "grad_norm": 0.42578125, "learning_rate": 0.00012304575383279755, "loss": 0.1515, "step": 8996 }, { "epoch": 0.4262970859985785, "grad_norm": 0.07470703125, "learning_rate": 0.00012303126185666958, "loss": 0.0047, "step": 8997 }, { "epoch": 0.4263444681355129, "grad_norm": 0.466796875, "learning_rate": 0.00012301676936973273, "loss": 0.2999, "step": 8998 }, { "epoch": 0.4263918502724473, "grad_norm": 0.5234375, "learning_rate": 0.0001230022763723085, "loss": 1.2043, "step": 8999 }, { "epoch": 0.42643923240938164, "grad_norm": 0.76171875, "learning_rate": 0.00012298778286471825, "loss": 1.2351, "step": 9000 }, { "epoch": 0.42648661454631603, "grad_norm": 0.193359375, "learning_rate": 0.00012297328884728346, "loss": 0.1498, "step": 9001 }, { "epoch": 0.4265339966832504, "grad_norm": 0.15625, "learning_rate": 0.00012295879432032558, "loss": 0.043, "step": 9002 }, { "epoch": 0.42658137882018476, "grad_norm": 0.77734375, "learning_rate": 0.0001229442992841661, "loss": 1.1026, "step": 9003 }, { "epoch": 0.42662876095711916, "grad_norm": 0.78125, "learning_rate": 0.00012292980373912652, "loss": 0.0639, "step": 9004 }, { "epoch": 0.42667614309405355, "grad_norm": 0.765625, "learning_rate": 0.0001229153076855283, "loss": 1.012, "step": 9005 }, { "epoch": 0.42672352523098794, "grad_norm": 0.56640625, "learning_rate": 0.00012290081112369298, "loss": 1.2843, "step": 9006 }, { "epoch": 0.4267709073679223, "grad_norm": 0.859375, "learning_rate": 0.00012288631405394206, "loss": 0.7494, "step": 9007 }, { "epoch": 0.4268182895048567, "grad_norm": 0.69140625, "learning_rate": 0.00012287181647659706, "loss": 0.8909, "step": 9008 }, { "epoch": 0.42686567164179107, "grad_norm": 0.6796875, "learning_rate": 0.00012285731839197954, "loss": 1.3663, "step": 9009 }, { "epoch": 0.4269130537787254, "grad_norm": 0.75390625, "learning_rate": 0.00012284281980041103, "loss": 1.3305, "step": 9010 }, { "epoch": 0.4269604359156598, "grad_norm": 0.59765625, "learning_rate": 0.00012282832070221314, "loss": 1.2073, "step": 9011 }, { "epoch": 0.4270078180525942, "grad_norm": 0.73046875, "learning_rate": 0.0001228138210977074, "loss": 0.1763, "step": 9012 }, { "epoch": 0.4270552001895285, "grad_norm": 0.62109375, "learning_rate": 0.0001227993209872154, "loss": 1.3054, "step": 9013 }, { "epoch": 0.4271025823264629, "grad_norm": 0.67578125, "learning_rate": 0.00012278482037105873, "loss": 1.2087, "step": 9014 }, { "epoch": 0.4271499644633973, "grad_norm": 0.51171875, "learning_rate": 0.00012277031924955905, "loss": 0.0055, "step": 9015 }, { "epoch": 0.42719734660033165, "grad_norm": 0.8984375, "learning_rate": 0.0001227558176230379, "loss": 0.9413, "step": 9016 }, { "epoch": 0.42724472873726604, "grad_norm": 0.2158203125, "learning_rate": 0.00012274131549181697, "loss": 0.0318, "step": 9017 }, { "epoch": 0.42729211087420044, "grad_norm": 0.88671875, "learning_rate": 0.0001227268128562179, "loss": 0.8257, "step": 9018 }, { "epoch": 0.4273394930111348, "grad_norm": 0.625, "learning_rate": 0.00012271230971656235, "loss": 1.3141, "step": 9019 }, { "epoch": 0.42738687514806917, "grad_norm": 0.9765625, "learning_rate": 0.0001226978060731719, "loss": 1.0274, "step": 9020 }, { "epoch": 0.42743425728500356, "grad_norm": 0.435546875, "learning_rate": 0.00012268330192636832, "loss": 0.0155, "step": 9021 }, { "epoch": 0.42748163942193795, "grad_norm": 0.58984375, "learning_rate": 0.00012266879727647326, "loss": 0.8564, "step": 9022 }, { "epoch": 0.4275290215588723, "grad_norm": 0.7734375, "learning_rate": 0.00012265429212380847, "loss": 1.0449, "step": 9023 }, { "epoch": 0.4275764036958067, "grad_norm": 0.58984375, "learning_rate": 0.00012263978646869555, "loss": 1.0867, "step": 9024 }, { "epoch": 0.4276237858327411, "grad_norm": 0.8984375, "learning_rate": 0.00012262528031145627, "loss": 1.1932, "step": 9025 }, { "epoch": 0.4276711679696754, "grad_norm": 0.625, "learning_rate": 0.00012261077365241242, "loss": 0.8738, "step": 9026 }, { "epoch": 0.4277185501066098, "grad_norm": 0.65625, "learning_rate": 0.00012259626649188568, "loss": 0.9239, "step": 9027 }, { "epoch": 0.4277659322435442, "grad_norm": 0.65234375, "learning_rate": 0.0001225817588301978, "loss": 0.7741, "step": 9028 }, { "epoch": 0.42781331438047854, "grad_norm": 0.73046875, "learning_rate": 0.0001225672506676706, "loss": 1.0425, "step": 9029 }, { "epoch": 0.42786069651741293, "grad_norm": 0.55859375, "learning_rate": 0.0001225527420046258, "loss": 0.6123, "step": 9030 }, { "epoch": 0.4279080786543473, "grad_norm": 0.021240234375, "learning_rate": 0.0001225382328413852, "loss": 0.0008, "step": 9031 }, { "epoch": 0.42795546079128166, "grad_norm": 0.6328125, "learning_rate": 0.00012252372317827056, "loss": 0.8573, "step": 9032 }, { "epoch": 0.42800284292821605, "grad_norm": 0.703125, "learning_rate": 0.00012250921301560377, "loss": 1.1215, "step": 9033 }, { "epoch": 0.42805022506515045, "grad_norm": 0.17578125, "learning_rate": 0.00012249470235370664, "loss": 0.0241, "step": 9034 }, { "epoch": 0.42809760720208484, "grad_norm": 0.63671875, "learning_rate": 0.00012248019119290093, "loss": 1.162, "step": 9035 }, { "epoch": 0.4281449893390192, "grad_norm": 0.6328125, "learning_rate": 0.00012246567953350852, "loss": 1.4999, "step": 9036 }, { "epoch": 0.42819237147595357, "grad_norm": 0.1943359375, "learning_rate": 0.0001224511673758513, "loss": 0.0294, "step": 9037 }, { "epoch": 0.42823975361288796, "grad_norm": 0.0301513671875, "learning_rate": 0.00012243665472025112, "loss": 0.0009, "step": 9038 }, { "epoch": 0.4282871357498223, "grad_norm": 0.71484375, "learning_rate": 0.0001224221415670298, "loss": 1.0573, "step": 9039 }, { "epoch": 0.4283345178867567, "grad_norm": 0.4765625, "learning_rate": 0.00012240762791650923, "loss": 0.7919, "step": 9040 }, { "epoch": 0.4283819000236911, "grad_norm": 0.28515625, "learning_rate": 0.0001223931137690114, "loss": 0.0177, "step": 9041 }, { "epoch": 0.4284292821606254, "grad_norm": 0.7578125, "learning_rate": 0.00012237859912485815, "loss": 1.1608, "step": 9042 }, { "epoch": 0.4284766642975598, "grad_norm": 0.6953125, "learning_rate": 0.00012236408398437135, "loss": 0.5714, "step": 9043 }, { "epoch": 0.4285240464344942, "grad_norm": 0.416015625, "learning_rate": 0.00012234956834787303, "loss": 0.0568, "step": 9044 }, { "epoch": 0.42857142857142855, "grad_norm": 0.8359375, "learning_rate": 0.00012233505221568512, "loss": 1.2182, "step": 9045 }, { "epoch": 0.42861881070836294, "grad_norm": 0.67578125, "learning_rate": 0.00012232053558812952, "loss": 0.1509, "step": 9046 }, { "epoch": 0.42866619284529733, "grad_norm": 0.76953125, "learning_rate": 0.0001223060184655282, "loss": 1.013, "step": 9047 }, { "epoch": 0.42871357498223167, "grad_norm": 0.76171875, "learning_rate": 0.00012229150084820315, "loss": 1.0359, "step": 9048 }, { "epoch": 0.42876095711916606, "grad_norm": 0.52734375, "learning_rate": 0.0001222769827364764, "loss": 1.0239, "step": 9049 }, { "epoch": 0.42880833925610046, "grad_norm": 0.00162506103515625, "learning_rate": 0.00012226246413066984, "loss": 0.0001, "step": 9050 }, { "epoch": 0.42885572139303485, "grad_norm": 0.6171875, "learning_rate": 0.00012224794503110556, "loss": 1.1826, "step": 9051 }, { "epoch": 0.4289031035299692, "grad_norm": 0.462890625, "learning_rate": 0.00012223342543810556, "loss": 0.7507, "step": 9052 }, { "epoch": 0.4289504856669036, "grad_norm": 0.546875, "learning_rate": 0.00012221890535199186, "loss": 0.4868, "step": 9053 }, { "epoch": 0.428997867803838, "grad_norm": 0.5546875, "learning_rate": 0.00012220438477308655, "loss": 0.8132, "step": 9054 }, { "epoch": 0.4290452499407723, "grad_norm": 0.62109375, "learning_rate": 0.00012218986370171158, "loss": 0.6508, "step": 9055 }, { "epoch": 0.4290926320777067, "grad_norm": 0.1044921875, "learning_rate": 0.00012217534213818907, "loss": 0.0103, "step": 9056 }, { "epoch": 0.4291400142146411, "grad_norm": 0.333984375, "learning_rate": 0.00012216082008284114, "loss": 0.0368, "step": 9057 }, { "epoch": 0.42918739635157543, "grad_norm": 0.66015625, "learning_rate": 0.00012214629753598978, "loss": 0.1443, "step": 9058 }, { "epoch": 0.4292347784885098, "grad_norm": 1.0546875, "learning_rate": 0.00012213177449795713, "loss": 0.7987, "step": 9059 }, { "epoch": 0.4292821606254442, "grad_norm": 0.5546875, "learning_rate": 0.0001221172509690653, "loss": 0.7702, "step": 9060 }, { "epoch": 0.42932954276237856, "grad_norm": 1.0078125, "learning_rate": 0.00012210272694963644, "loss": 0.7252, "step": 9061 }, { "epoch": 0.42937692489931295, "grad_norm": 0.6328125, "learning_rate": 0.00012208820243999263, "loss": 0.817, "step": 9062 }, { "epoch": 0.42942430703624734, "grad_norm": 0.859375, "learning_rate": 0.00012207367744045597, "loss": 0.2632, "step": 9063 }, { "epoch": 0.42947168917318174, "grad_norm": 0.87109375, "learning_rate": 0.0001220591519513487, "loss": 1.2632, "step": 9064 }, { "epoch": 0.4295190713101161, "grad_norm": 0.7109375, "learning_rate": 0.00012204462597299291, "loss": 1.1329, "step": 9065 }, { "epoch": 0.42956645344705047, "grad_norm": 0.6484375, "learning_rate": 0.00012203009950571078, "loss": 1.0932, "step": 9066 }, { "epoch": 0.42961383558398486, "grad_norm": 0.5, "learning_rate": 0.00012201557254982454, "loss": 0.6318, "step": 9067 }, { "epoch": 0.4296612177209192, "grad_norm": 0.244140625, "learning_rate": 0.00012200104510565635, "loss": 0.05, "step": 9068 }, { "epoch": 0.4297085998578536, "grad_norm": 0.68359375, "learning_rate": 0.0001219865171735284, "loss": 1.1741, "step": 9069 }, { "epoch": 0.429755981994788, "grad_norm": 0.6953125, "learning_rate": 0.00012197198875376295, "loss": 1.007, "step": 9070 }, { "epoch": 0.4298033641317223, "grad_norm": 0.734375, "learning_rate": 0.00012195745984668216, "loss": 0.9968, "step": 9071 }, { "epoch": 0.4298507462686567, "grad_norm": 0.578125, "learning_rate": 0.00012194293045260832, "loss": 1.262, "step": 9072 }, { "epoch": 0.4298981284055911, "grad_norm": 0.74609375, "learning_rate": 0.00012192840057186363, "loss": 1.5263, "step": 9073 }, { "epoch": 0.42994551054252544, "grad_norm": 0.8984375, "learning_rate": 0.00012191387020477038, "loss": 0.0267, "step": 9074 }, { "epoch": 0.42999289267945984, "grad_norm": 0.2119140625, "learning_rate": 0.00012189933935165086, "loss": 0.0294, "step": 9075 }, { "epoch": 0.43004027481639423, "grad_norm": 0.19140625, "learning_rate": 0.00012188480801282729, "loss": 0.0222, "step": 9076 }, { "epoch": 0.43008765695332857, "grad_norm": 0.54296875, "learning_rate": 0.00012187027618862199, "loss": 1.0799, "step": 9077 }, { "epoch": 0.43013503909026296, "grad_norm": 0.1650390625, "learning_rate": 0.00012185574387935727, "loss": 0.014, "step": 9078 }, { "epoch": 0.43018242122719735, "grad_norm": 0.58984375, "learning_rate": 0.00012184121108535543, "loss": 0.6301, "step": 9079 }, { "epoch": 0.43022980336413175, "grad_norm": 0.93359375, "learning_rate": 0.0001218266778069388, "loss": 0.2887, "step": 9080 }, { "epoch": 0.4302771855010661, "grad_norm": 0.265625, "learning_rate": 0.00012181214404442967, "loss": 0.0134, "step": 9081 }, { "epoch": 0.4303245676380005, "grad_norm": 0.640625, "learning_rate": 0.00012179760979815045, "loss": 0.9382, "step": 9082 }, { "epoch": 0.43037194977493487, "grad_norm": 0.546875, "learning_rate": 0.00012178307506842345, "loss": 0.7161, "step": 9083 }, { "epoch": 0.4304193319118692, "grad_norm": 0.1279296875, "learning_rate": 0.00012176853985557104, "loss": 0.0159, "step": 9084 }, { "epoch": 0.4304667140488036, "grad_norm": 0.291015625, "learning_rate": 0.00012175400415991563, "loss": 0.1446, "step": 9085 }, { "epoch": 0.430514096185738, "grad_norm": 0.8515625, "learning_rate": 0.00012173946798177956, "loss": 0.7958, "step": 9086 }, { "epoch": 0.43056147832267233, "grad_norm": 0.09375, "learning_rate": 0.00012172493132148525, "loss": 0.0021, "step": 9087 }, { "epoch": 0.4306088604596067, "grad_norm": 0.7890625, "learning_rate": 0.0001217103941793551, "loss": 0.4472, "step": 9088 }, { "epoch": 0.4306562425965411, "grad_norm": 0.73828125, "learning_rate": 0.00012169585655571154, "loss": 0.5643, "step": 9089 }, { "epoch": 0.43070362473347545, "grad_norm": 0.453125, "learning_rate": 0.00012168131845087699, "loss": 0.5358, "step": 9090 }, { "epoch": 0.43075100687040985, "grad_norm": 0.578125, "learning_rate": 0.00012166677986517387, "loss": 0.8892, "step": 9091 }, { "epoch": 0.43079838900734424, "grad_norm": 0.69921875, "learning_rate": 0.00012165224079892467, "loss": 1.2856, "step": 9092 }, { "epoch": 0.43084577114427863, "grad_norm": 0.68359375, "learning_rate": 0.00012163770125245185, "loss": 1.1084, "step": 9093 }, { "epoch": 0.43089315328121297, "grad_norm": 0.90625, "learning_rate": 0.00012162316122607781, "loss": 0.9945, "step": 9094 }, { "epoch": 0.43094053541814736, "grad_norm": 0.8125, "learning_rate": 0.00012160862072012515, "loss": 1.2901, "step": 9095 }, { "epoch": 0.43098791755508176, "grad_norm": 0.0400390625, "learning_rate": 0.00012159407973491625, "loss": 0.0036, "step": 9096 }, { "epoch": 0.4310352996920161, "grad_norm": 0.6171875, "learning_rate": 0.00012157953827077367, "loss": 1.0957, "step": 9097 }, { "epoch": 0.4310826818289505, "grad_norm": 0.6953125, "learning_rate": 0.00012156499632801994, "loss": 1.5313, "step": 9098 }, { "epoch": 0.4311300639658849, "grad_norm": 0.002532958984375, "learning_rate": 0.00012155045390697754, "loss": 0.0002, "step": 9099 }, { "epoch": 0.4311774461028192, "grad_norm": 0.6875, "learning_rate": 0.00012153591100796903, "loss": 1.1189, "step": 9100 }, { "epoch": 0.4312248282397536, "grad_norm": 0.70703125, "learning_rate": 0.00012152136763131697, "loss": 0.5701, "step": 9101 }, { "epoch": 0.431272210376688, "grad_norm": 0.0283203125, "learning_rate": 0.00012150682377734388, "loss": 0.0027, "step": 9102 }, { "epoch": 0.43131959251362234, "grad_norm": 0.62109375, "learning_rate": 0.00012149227944637235, "loss": 0.9697, "step": 9103 }, { "epoch": 0.43136697465055673, "grad_norm": 0.169921875, "learning_rate": 0.00012147773463872496, "loss": 0.0147, "step": 9104 }, { "epoch": 0.4314143567874911, "grad_norm": 0.73828125, "learning_rate": 0.0001214631893547243, "loss": 1.3039, "step": 9105 }, { "epoch": 0.43146173892442546, "grad_norm": 0.69140625, "learning_rate": 0.00012144864359469294, "loss": 0.8633, "step": 9106 }, { "epoch": 0.43150912106135986, "grad_norm": 0.64453125, "learning_rate": 0.00012143409735895353, "loss": 0.8172, "step": 9107 }, { "epoch": 0.43155650319829425, "grad_norm": 0.1259765625, "learning_rate": 0.00012141955064782866, "loss": 0.0044, "step": 9108 }, { "epoch": 0.43160388533522864, "grad_norm": 0.69140625, "learning_rate": 0.00012140500346164099, "loss": 1.3199, "step": 9109 }, { "epoch": 0.431651267472163, "grad_norm": 0.71484375, "learning_rate": 0.00012139045580071313, "loss": 1.3542, "step": 9110 }, { "epoch": 0.4316986496090974, "grad_norm": 0.58984375, "learning_rate": 0.00012137590766536775, "loss": 0.7236, "step": 9111 }, { "epoch": 0.43174603174603177, "grad_norm": 0.078125, "learning_rate": 0.00012136135905592749, "loss": 0.0086, "step": 9112 }, { "epoch": 0.4317934138829661, "grad_norm": 0.65625, "learning_rate": 0.00012134680997271504, "loss": 1.334, "step": 9113 }, { "epoch": 0.4318407960199005, "grad_norm": 0.515625, "learning_rate": 0.0001213322604160531, "loss": 0.8218, "step": 9114 }, { "epoch": 0.4318881781568349, "grad_norm": 0.80859375, "learning_rate": 0.00012131771038626434, "loss": 0.8312, "step": 9115 }, { "epoch": 0.4319355602937692, "grad_norm": 0.5859375, "learning_rate": 0.00012130315988367145, "loss": 0.7438, "step": 9116 }, { "epoch": 0.4319829424307036, "grad_norm": 0.025634765625, "learning_rate": 0.00012128860890859722, "loss": 0.0011, "step": 9117 }, { "epoch": 0.432030324567638, "grad_norm": 0.7890625, "learning_rate": 0.00012127405746136427, "loss": 1.1243, "step": 9118 }, { "epoch": 0.43207770670457235, "grad_norm": 0.78125, "learning_rate": 0.00012125950554229539, "loss": 1.2505, "step": 9119 }, { "epoch": 0.43212508884150674, "grad_norm": 0.6015625, "learning_rate": 0.00012124495315171334, "loss": 1.7117, "step": 9120 }, { "epoch": 0.43217247097844114, "grad_norm": 0.1513671875, "learning_rate": 0.00012123040028994086, "loss": 0.0113, "step": 9121 }, { "epoch": 0.43221985311537553, "grad_norm": 0.67578125, "learning_rate": 0.00012121584695730068, "loss": 1.0387, "step": 9122 }, { "epoch": 0.43226723525230987, "grad_norm": 0.46875, "learning_rate": 0.00012120129315411564, "loss": 0.7958, "step": 9123 }, { "epoch": 0.43231461738924426, "grad_norm": 0.1025390625, "learning_rate": 0.00012118673888070848, "loss": 0.0095, "step": 9124 }, { "epoch": 0.43236199952617865, "grad_norm": 0.337890625, "learning_rate": 0.00012117218413740206, "loss": 0.1888, "step": 9125 }, { "epoch": 0.432409381663113, "grad_norm": 0.1162109375, "learning_rate": 0.00012115762892451915, "loss": 0.0096, "step": 9126 }, { "epoch": 0.4324567638000474, "grad_norm": 0.70703125, "learning_rate": 0.00012114307324238253, "loss": 1.0692, "step": 9127 }, { "epoch": 0.4325041459369818, "grad_norm": 0.53125, "learning_rate": 0.00012112851709131505, "loss": 0.5198, "step": 9128 }, { "epoch": 0.4325515280739161, "grad_norm": 0.703125, "learning_rate": 0.0001211139604716396, "loss": 0.9162, "step": 9129 }, { "epoch": 0.4325989102108505, "grad_norm": 0.059814453125, "learning_rate": 0.00012109940338367897, "loss": 0.004, "step": 9130 }, { "epoch": 0.4326462923477849, "grad_norm": 0.69921875, "learning_rate": 0.00012108484582775608, "loss": 1.0419, "step": 9131 }, { "epoch": 0.43269367448471924, "grad_norm": 0.953125, "learning_rate": 0.00012107028780419374, "loss": 0.6108, "step": 9132 }, { "epoch": 0.43274105662165363, "grad_norm": 0.419921875, "learning_rate": 0.0001210557293133149, "loss": 0.0552, "step": 9133 }, { "epoch": 0.432788438758588, "grad_norm": 0.52734375, "learning_rate": 0.00012104117035544238, "loss": 1.0201, "step": 9134 }, { "epoch": 0.43283582089552236, "grad_norm": 0.353515625, "learning_rate": 0.00012102661093089911, "loss": 0.4299, "step": 9135 }, { "epoch": 0.43288320303245675, "grad_norm": 0.044921875, "learning_rate": 0.00012101205104000801, "loss": 0.0036, "step": 9136 }, { "epoch": 0.43293058516939115, "grad_norm": 0.66015625, "learning_rate": 0.00012099749068309201, "loss": 0.5321, "step": 9137 }, { "epoch": 0.43297796730632554, "grad_norm": 0.298828125, "learning_rate": 0.00012098292986047402, "loss": 0.017, "step": 9138 }, { "epoch": 0.4330253494432599, "grad_norm": 0.251953125, "learning_rate": 0.000120968368572477, "loss": 0.0328, "step": 9139 }, { "epoch": 0.43307273158019427, "grad_norm": 0.703125, "learning_rate": 0.0001209538068194239, "loss": 0.8218, "step": 9140 }, { "epoch": 0.43312011371712866, "grad_norm": 0.62109375, "learning_rate": 0.0001209392446016377, "loss": 1.2307, "step": 9141 }, { "epoch": 0.433167495854063, "grad_norm": 0.578125, "learning_rate": 0.00012092468191944133, "loss": 0.5895, "step": 9142 }, { "epoch": 0.4332148779909974, "grad_norm": 0.73828125, "learning_rate": 0.0001209101187731578, "loss": 0.6921, "step": 9143 }, { "epoch": 0.4332622601279318, "grad_norm": 0.64453125, "learning_rate": 0.00012089555516311016, "loss": 1.3407, "step": 9144 }, { "epoch": 0.4333096422648661, "grad_norm": 0.56640625, "learning_rate": 0.0001208809910896213, "loss": 0.6376, "step": 9145 }, { "epoch": 0.4333570244018005, "grad_norm": 0.09521484375, "learning_rate": 0.00012086642655301432, "loss": 0.0064, "step": 9146 }, { "epoch": 0.4334044065387349, "grad_norm": 0.09912109375, "learning_rate": 0.00012085186155361224, "loss": 0.0053, "step": 9147 }, { "epoch": 0.43345178867566925, "grad_norm": 0.703125, "learning_rate": 0.00012083729609173806, "loss": 1.034, "step": 9148 }, { "epoch": 0.43349917081260364, "grad_norm": 0.51953125, "learning_rate": 0.00012082273016771488, "loss": 0.196, "step": 9149 }, { "epoch": 0.43354655294953803, "grad_norm": 0.62109375, "learning_rate": 0.0001208081637818657, "loss": 1.3181, "step": 9150 }, { "epoch": 0.4335939350864724, "grad_norm": 0.80078125, "learning_rate": 0.0001207935969345136, "loss": 1.2652, "step": 9151 }, { "epoch": 0.43364131722340676, "grad_norm": 0.244140625, "learning_rate": 0.00012077902962598171, "loss": 0.0348, "step": 9152 }, { "epoch": 0.43368869936034116, "grad_norm": 0.31640625, "learning_rate": 0.00012076446185659302, "loss": 0.0056, "step": 9153 }, { "epoch": 0.43373608149727555, "grad_norm": 0.6875, "learning_rate": 0.0001207498936266707, "loss": 0.772, "step": 9154 }, { "epoch": 0.4337834636342099, "grad_norm": 0.68359375, "learning_rate": 0.00012073532493653786, "loss": 1.2163, "step": 9155 }, { "epoch": 0.4338308457711443, "grad_norm": 0.6953125, "learning_rate": 0.00012072075578651762, "loss": 1.0997, "step": 9156 }, { "epoch": 0.4338782279080787, "grad_norm": 0.75390625, "learning_rate": 0.00012070618617693306, "loss": 1.0526, "step": 9157 }, { "epoch": 0.433925610045013, "grad_norm": 0.55859375, "learning_rate": 0.00012069161610810734, "loss": 0.8277, "step": 9158 }, { "epoch": 0.4339729921819474, "grad_norm": 0.6484375, "learning_rate": 0.00012067704558036362, "loss": 1.0736, "step": 9159 }, { "epoch": 0.4340203743188818, "grad_norm": 0.70703125, "learning_rate": 0.00012066247459402507, "loss": 0.7963, "step": 9160 }, { "epoch": 0.43406775645581613, "grad_norm": 0.67578125, "learning_rate": 0.0001206479031494148, "loss": 1.3514, "step": 9161 }, { "epoch": 0.4341151385927505, "grad_norm": 0.2431640625, "learning_rate": 0.00012063333124685606, "loss": 0.0038, "step": 9162 }, { "epoch": 0.4341625207296849, "grad_norm": 0.08935546875, "learning_rate": 0.00012061875888667203, "loss": 0.009, "step": 9163 }, { "epoch": 0.43420990286661926, "grad_norm": 0.59375, "learning_rate": 0.00012060418606918587, "loss": 0.7726, "step": 9164 }, { "epoch": 0.43425728500355365, "grad_norm": 0.6796875, "learning_rate": 0.00012058961279472079, "loss": 0.8138, "step": 9165 }, { "epoch": 0.43430466714048804, "grad_norm": 0.4921875, "learning_rate": 0.00012057503906360004, "loss": 0.5046, "step": 9166 }, { "epoch": 0.43435204927742244, "grad_norm": 0.58984375, "learning_rate": 0.00012056046487614687, "loss": 1.3172, "step": 9167 }, { "epoch": 0.4343994314143568, "grad_norm": 0.1953125, "learning_rate": 0.00012054589023268445, "loss": 0.1465, "step": 9168 }, { "epoch": 0.43444681355129117, "grad_norm": 0.466796875, "learning_rate": 0.00012053131513353608, "loss": 0.811, "step": 9169 }, { "epoch": 0.43449419568822556, "grad_norm": 0.68359375, "learning_rate": 0.00012051673957902501, "loss": 1.2989, "step": 9170 }, { "epoch": 0.4345415778251599, "grad_norm": 0.63671875, "learning_rate": 0.00012050216356947453, "loss": 1.5614, "step": 9171 }, { "epoch": 0.4345889599620943, "grad_norm": 0.5859375, "learning_rate": 0.00012048758710520789, "loss": 0.9579, "step": 9172 }, { "epoch": 0.4346363420990287, "grad_norm": 0.97265625, "learning_rate": 0.00012047301018654838, "loss": 1.2662, "step": 9173 }, { "epoch": 0.434683724235963, "grad_norm": 0.69921875, "learning_rate": 0.0001204584328138193, "loss": 1.0081, "step": 9174 }, { "epoch": 0.4347311063728974, "grad_norm": 0.6171875, "learning_rate": 0.00012044385498734398, "loss": 0.835, "step": 9175 }, { "epoch": 0.4347784885098318, "grad_norm": 0.7734375, "learning_rate": 0.00012042927670744574, "loss": 1.023, "step": 9176 }, { "epoch": 0.43482587064676614, "grad_norm": 0.65625, "learning_rate": 0.00012041469797444788, "loss": 0.7017, "step": 9177 }, { "epoch": 0.43487325278370054, "grad_norm": 0.703125, "learning_rate": 0.00012040011878867379, "loss": 0.9192, "step": 9178 }, { "epoch": 0.43492063492063493, "grad_norm": 0.59765625, "learning_rate": 0.00012038553915044679, "loss": 0.8073, "step": 9179 }, { "epoch": 0.4349680170575693, "grad_norm": 0.1875, "learning_rate": 0.00012037095906009026, "loss": 0.0218, "step": 9180 }, { "epoch": 0.43501539919450366, "grad_norm": 0.84765625, "learning_rate": 0.00012035637851792754, "loss": 0.7896, "step": 9181 }, { "epoch": 0.43506278133143805, "grad_norm": 0.75390625, "learning_rate": 0.00012034179752428203, "loss": 0.7577, "step": 9182 }, { "epoch": 0.43511016346837245, "grad_norm": 0.4140625, "learning_rate": 0.00012032721607947712, "loss": 0.0411, "step": 9183 }, { "epoch": 0.4351575456053068, "grad_norm": 0.6484375, "learning_rate": 0.00012031263418383618, "loss": 1.0111, "step": 9184 }, { "epoch": 0.4352049277422412, "grad_norm": 0.7578125, "learning_rate": 0.00012029805183768268, "loss": 1.3509, "step": 9185 }, { "epoch": 0.43525230987917557, "grad_norm": 0.61328125, "learning_rate": 0.00012028346904134003, "loss": 1.2804, "step": 9186 }, { "epoch": 0.4352996920161099, "grad_norm": 0.58203125, "learning_rate": 0.0001202688857951316, "loss": 0.8546, "step": 9187 }, { "epoch": 0.4353470741530443, "grad_norm": 0.28125, "learning_rate": 0.00012025430209938089, "loss": 0.116, "step": 9188 }, { "epoch": 0.4353944562899787, "grad_norm": 0.625, "learning_rate": 0.00012023971795441133, "loss": 0.5732, "step": 9189 }, { "epoch": 0.43544183842691303, "grad_norm": 0.0306396484375, "learning_rate": 0.0001202251333605464, "loss": 0.0006, "step": 9190 }, { "epoch": 0.4354892205638474, "grad_norm": 0.185546875, "learning_rate": 0.00012021054831810953, "loss": 0.0279, "step": 9191 }, { "epoch": 0.4355366027007818, "grad_norm": 0.072265625, "learning_rate": 0.00012019596282742424, "loss": 0.0075, "step": 9192 }, { "epoch": 0.43558398483771615, "grad_norm": 0.7734375, "learning_rate": 0.00012018137688881399, "loss": 1.0802, "step": 9193 }, { "epoch": 0.43563136697465055, "grad_norm": 0.6640625, "learning_rate": 0.00012016679050260232, "loss": 1.4969, "step": 9194 }, { "epoch": 0.43567874911158494, "grad_norm": 0.84375, "learning_rate": 0.00012015220366911273, "loss": 0.711, "step": 9195 }, { "epoch": 0.43572613124851933, "grad_norm": 0.0016021728515625, "learning_rate": 0.0001201376163886687, "loss": 0.0001, "step": 9196 }, { "epoch": 0.43577351338545367, "grad_norm": 0.416015625, "learning_rate": 0.00012012302866159377, "loss": 0.0915, "step": 9197 }, { "epoch": 0.43582089552238806, "grad_norm": 0.2353515625, "learning_rate": 0.00012010844048821155, "loss": 0.0159, "step": 9198 }, { "epoch": 0.43586827765932246, "grad_norm": 0.57421875, "learning_rate": 0.0001200938518688455, "loss": 0.3029, "step": 9199 }, { "epoch": 0.4359156597962568, "grad_norm": 0.6796875, "learning_rate": 0.00012007926280381921, "loss": 0.9484, "step": 9200 }, { "epoch": 0.4359630419331912, "grad_norm": 0.62109375, "learning_rate": 0.00012006467329345627, "loss": 0.8423, "step": 9201 }, { "epoch": 0.4360104240701256, "grad_norm": 0.0302734375, "learning_rate": 0.00012005008333808028, "loss": 0.0023, "step": 9202 }, { "epoch": 0.4360578062070599, "grad_norm": 0.640625, "learning_rate": 0.00012003549293801477, "loss": 0.8052, "step": 9203 }, { "epoch": 0.4361051883439943, "grad_norm": 0.1875, "learning_rate": 0.00012002090209358336, "loss": 0.1232, "step": 9204 }, { "epoch": 0.4361525704809287, "grad_norm": 0.369140625, "learning_rate": 0.00012000631080510969, "loss": 0.3767, "step": 9205 }, { "epoch": 0.43619995261786304, "grad_norm": 0.64453125, "learning_rate": 0.00011999171907291735, "loss": 0.7324, "step": 9206 }, { "epoch": 0.43624733475479743, "grad_norm": 0.64453125, "learning_rate": 0.00011997712689732996, "loss": 1.2838, "step": 9207 }, { "epoch": 0.4362947168917318, "grad_norm": 0.6328125, "learning_rate": 0.00011996253427867116, "loss": 0.8453, "step": 9208 }, { "epoch": 0.4363420990286662, "grad_norm": 0.6328125, "learning_rate": 0.00011994794121726467, "loss": 1.2646, "step": 9209 }, { "epoch": 0.43638948116560056, "grad_norm": 0.64453125, "learning_rate": 0.00011993334771343405, "loss": 0.8129, "step": 9210 }, { "epoch": 0.43643686330253495, "grad_norm": 0.59765625, "learning_rate": 0.00011991875376750303, "loss": 0.8692, "step": 9211 }, { "epoch": 0.43648424543946934, "grad_norm": 0.96484375, "learning_rate": 0.00011990415937979525, "loss": 1.3055, "step": 9212 }, { "epoch": 0.4365316275764037, "grad_norm": 0.6328125, "learning_rate": 0.00011988956455063442, "loss": 1.3004, "step": 9213 }, { "epoch": 0.4365790097133381, "grad_norm": 0.7421875, "learning_rate": 0.00011987496928034423, "loss": 0.9849, "step": 9214 }, { "epoch": 0.43662639185027247, "grad_norm": 0.0791015625, "learning_rate": 0.00011986037356924839, "loss": 0.0081, "step": 9215 }, { "epoch": 0.4366737739872068, "grad_norm": 0.74609375, "learning_rate": 0.00011984577741767062, "loss": 0.7798, "step": 9216 }, { "epoch": 0.4367211561241412, "grad_norm": 0.5078125, "learning_rate": 0.00011983118082593466, "loss": 0.1623, "step": 9217 }, { "epoch": 0.4367685382610756, "grad_norm": 0.73828125, "learning_rate": 0.00011981658379436423, "loss": 1.3366, "step": 9218 }, { "epoch": 0.4368159203980099, "grad_norm": 0.69140625, "learning_rate": 0.00011980198632328307, "loss": 1.4171, "step": 9219 }, { "epoch": 0.4368633025349443, "grad_norm": 0.061279296875, "learning_rate": 0.00011978738841301494, "loss": 0.0048, "step": 9220 }, { "epoch": 0.4369106846718787, "grad_norm": 0.1875, "learning_rate": 0.00011977279006388365, "loss": 0.0173, "step": 9221 }, { "epoch": 0.43695806680881305, "grad_norm": 0.5546875, "learning_rate": 0.00011975819127621289, "loss": 0.6574, "step": 9222 }, { "epoch": 0.43700544894574744, "grad_norm": 0.6875, "learning_rate": 0.00011974359205032648, "loss": 1.1759, "step": 9223 }, { "epoch": 0.43705283108268184, "grad_norm": 0.12451171875, "learning_rate": 0.00011972899238654827, "loss": 0.0073, "step": 9224 }, { "epoch": 0.43710021321961623, "grad_norm": 0.49609375, "learning_rate": 0.00011971439228520203, "loss": 0.3249, "step": 9225 }, { "epoch": 0.43714759535655057, "grad_norm": 0.8046875, "learning_rate": 0.00011969979174661152, "loss": 0.088, "step": 9226 }, { "epoch": 0.43719497749348496, "grad_norm": 0.62109375, "learning_rate": 0.00011968519077110065, "loss": 1.2362, "step": 9227 }, { "epoch": 0.43724235963041935, "grad_norm": 0.69921875, "learning_rate": 0.0001196705893589932, "loss": 1.108, "step": 9228 }, { "epoch": 0.4372897417673537, "grad_norm": 0.16015625, "learning_rate": 0.00011965598751061307, "loss": 0.0033, "step": 9229 }, { "epoch": 0.4373371239042881, "grad_norm": 0.2578125, "learning_rate": 0.00011964138522628403, "loss": 0.0473, "step": 9230 }, { "epoch": 0.4373845060412225, "grad_norm": 0.59375, "learning_rate": 0.00011962678250632999, "loss": 1.0528, "step": 9231 }, { "epoch": 0.4374318881781568, "grad_norm": 0.62109375, "learning_rate": 0.00011961217935107482, "loss": 0.8092, "step": 9232 }, { "epoch": 0.4374792703150912, "grad_norm": 0.67578125, "learning_rate": 0.00011959757576084243, "loss": 1.3905, "step": 9233 }, { "epoch": 0.4375266524520256, "grad_norm": 0.68359375, "learning_rate": 0.00011958297173595666, "loss": 1.2362, "step": 9234 }, { "epoch": 0.43757403458895994, "grad_norm": 0.71875, "learning_rate": 0.00011956836727674143, "loss": 1.259, "step": 9235 }, { "epoch": 0.43762141672589433, "grad_norm": 0.50390625, "learning_rate": 0.00011955376238352069, "loss": 0.779, "step": 9236 }, { "epoch": 0.4376687988628287, "grad_norm": 0.046630859375, "learning_rate": 0.00011953915705661829, "loss": 0.0048, "step": 9237 }, { "epoch": 0.4377161809997631, "grad_norm": 0.5625, "learning_rate": 0.0001195245512963582, "loss": 0.0321, "step": 9238 }, { "epoch": 0.43776356313669745, "grad_norm": 0.66796875, "learning_rate": 0.00011950994510306437, "loss": 0.7033, "step": 9239 }, { "epoch": 0.43781094527363185, "grad_norm": 0.9375, "learning_rate": 0.00011949533847706076, "loss": 1.2161, "step": 9240 }, { "epoch": 0.43785832741056624, "grad_norm": 0.5859375, "learning_rate": 0.00011948073141867126, "loss": 0.9879, "step": 9241 }, { "epoch": 0.4379057095475006, "grad_norm": 0.58984375, "learning_rate": 0.00011946612392821992, "loss": 0.4406, "step": 9242 }, { "epoch": 0.43795309168443497, "grad_norm": 0.71875, "learning_rate": 0.0001194515160060307, "loss": 1.2926, "step": 9243 }, { "epoch": 0.43800047382136936, "grad_norm": 0.6171875, "learning_rate": 0.00011943690765242757, "loss": 0.8098, "step": 9244 }, { "epoch": 0.4380478559583037, "grad_norm": 0.7890625, "learning_rate": 0.00011942229886773451, "loss": 0.9731, "step": 9245 }, { "epoch": 0.4380952380952381, "grad_norm": 0.95703125, "learning_rate": 0.00011940768965227553, "loss": 0.9692, "step": 9246 }, { "epoch": 0.4381426202321725, "grad_norm": 0.5703125, "learning_rate": 0.00011939308000637471, "loss": 0.6649, "step": 9247 }, { "epoch": 0.4381900023691068, "grad_norm": 0.65625, "learning_rate": 0.00011937846993035602, "loss": 1.4241, "step": 9248 }, { "epoch": 0.4382373845060412, "grad_norm": 0.61328125, "learning_rate": 0.00011936385942454348, "loss": 0.6976, "step": 9249 }, { "epoch": 0.4382847666429756, "grad_norm": 0.53515625, "learning_rate": 0.00011934924848926122, "loss": 0.0258, "step": 9250 }, { "epoch": 0.43833214877990995, "grad_norm": 0.8125, "learning_rate": 0.00011933463712483318, "loss": 0.5183, "step": 9251 }, { "epoch": 0.43837953091684434, "grad_norm": 0.345703125, "learning_rate": 0.00011932002533158351, "loss": 0.0113, "step": 9252 }, { "epoch": 0.43842691305377873, "grad_norm": 0.6875, "learning_rate": 0.00011930541310983625, "loss": 0.9669, "step": 9253 }, { "epoch": 0.4384742951907131, "grad_norm": 0.380859375, "learning_rate": 0.00011929080045991549, "loss": 0.2126, "step": 9254 }, { "epoch": 0.43852167732764746, "grad_norm": 0.2099609375, "learning_rate": 0.00011927618738214534, "loss": 0.1515, "step": 9255 }, { "epoch": 0.43856905946458186, "grad_norm": 0.6171875, "learning_rate": 0.00011926157387684986, "loss": 1.0796, "step": 9256 }, { "epoch": 0.43861644160151625, "grad_norm": 0.67578125, "learning_rate": 0.0001192469599443532, "loss": 1.2017, "step": 9257 }, { "epoch": 0.4386638237384506, "grad_norm": 0.5546875, "learning_rate": 0.00011923234558497948, "loss": 0.6035, "step": 9258 }, { "epoch": 0.438711205875385, "grad_norm": 0.64453125, "learning_rate": 0.00011921773079905283, "loss": 0.9509, "step": 9259 }, { "epoch": 0.4387585880123194, "grad_norm": 0.474609375, "learning_rate": 0.00011920311558689734, "loss": 0.2578, "step": 9260 }, { "epoch": 0.4388059701492537, "grad_norm": 0.61328125, "learning_rate": 0.00011918849994883721, "loss": 0.9338, "step": 9261 }, { "epoch": 0.4388533522861881, "grad_norm": 0.75, "learning_rate": 0.00011917388388519661, "loss": 1.1853, "step": 9262 }, { "epoch": 0.4389007344231225, "grad_norm": 0.7578125, "learning_rate": 0.00011915926739629968, "loss": 0.7985, "step": 9263 }, { "epoch": 0.43894811656005683, "grad_norm": 0.5078125, "learning_rate": 0.0001191446504824706, "loss": 0.7152, "step": 9264 }, { "epoch": 0.4389954986969912, "grad_norm": 0.58203125, "learning_rate": 0.00011913003314403357, "loss": 0.4354, "step": 9265 }, { "epoch": 0.4390428808339256, "grad_norm": 0.57421875, "learning_rate": 0.00011911541538131279, "loss": 0.8526, "step": 9266 }, { "epoch": 0.43909026297086, "grad_norm": 0.474609375, "learning_rate": 0.00011910079719463247, "loss": 0.1765, "step": 9267 }, { "epoch": 0.43913764510779435, "grad_norm": 0.482421875, "learning_rate": 0.00011908617858431679, "loss": 0.8484, "step": 9268 }, { "epoch": 0.43918502724472874, "grad_norm": 0.1845703125, "learning_rate": 0.00011907155955068999, "loss": 0.1277, "step": 9269 }, { "epoch": 0.43923240938166314, "grad_norm": 0.404296875, "learning_rate": 0.00011905694009407631, "loss": 0.1151, "step": 9270 }, { "epoch": 0.4392797915185975, "grad_norm": 0.67578125, "learning_rate": 0.00011904232021480002, "loss": 0.9624, "step": 9271 }, { "epoch": 0.43932717365553187, "grad_norm": 1.9375, "learning_rate": 0.00011902769991318534, "loss": 0.7456, "step": 9272 }, { "epoch": 0.43937455579246626, "grad_norm": 0.796875, "learning_rate": 0.00011901307918955653, "loss": 0.6132, "step": 9273 }, { "epoch": 0.4394219379294006, "grad_norm": 0.040771484375, "learning_rate": 0.0001189984580442379, "loss": 0.003, "step": 9274 }, { "epoch": 0.439469320066335, "grad_norm": 0.5, "learning_rate": 0.00011898383647755374, "loss": 0.9202, "step": 9275 }, { "epoch": 0.4395167022032694, "grad_norm": 0.7109375, "learning_rate": 0.00011896921448982825, "loss": 1.2101, "step": 9276 }, { "epoch": 0.4395640843402037, "grad_norm": 0.63671875, "learning_rate": 0.00011895459208138579, "loss": 0.7341, "step": 9277 }, { "epoch": 0.4396114664771381, "grad_norm": 0.248046875, "learning_rate": 0.00011893996925255069, "loss": 0.0061, "step": 9278 }, { "epoch": 0.4396588486140725, "grad_norm": 0.05615234375, "learning_rate": 0.00011892534600364725, "loss": 0.003, "step": 9279 }, { "epoch": 0.43970623075100684, "grad_norm": 0.7734375, "learning_rate": 0.00011891072233499976, "loss": 0.8399, "step": 9280 }, { "epoch": 0.43975361288794124, "grad_norm": 0.60546875, "learning_rate": 0.0001188960982469326, "loss": 0.874, "step": 9281 }, { "epoch": 0.43980099502487563, "grad_norm": 0.43359375, "learning_rate": 0.00011888147373977014, "loss": 0.9002, "step": 9282 }, { "epoch": 0.43984837716181, "grad_norm": 0.89453125, "learning_rate": 0.00011886684881383669, "loss": 1.1015, "step": 9283 }, { "epoch": 0.43989575929874436, "grad_norm": 0.546875, "learning_rate": 0.00011885222346945661, "loss": 0.5635, "step": 9284 }, { "epoch": 0.43994314143567875, "grad_norm": 0.66015625, "learning_rate": 0.00011883759770695431, "loss": 0.8537, "step": 9285 }, { "epoch": 0.43999052357261315, "grad_norm": 0.5078125, "learning_rate": 0.00011882297152665416, "loss": 0.7777, "step": 9286 }, { "epoch": 0.4400379057095475, "grad_norm": 0.6796875, "learning_rate": 0.00011880834492888056, "loss": 1.4362, "step": 9287 }, { "epoch": 0.4400852878464819, "grad_norm": 1.1484375, "learning_rate": 0.00011879371791395785, "loss": 0.007, "step": 9288 }, { "epoch": 0.44013266998341627, "grad_norm": 0.921875, "learning_rate": 0.00011877909048221056, "loss": 1.1929, "step": 9289 }, { "epoch": 0.4401800521203506, "grad_norm": 0.1396484375, "learning_rate": 0.00011876446263396303, "loss": 0.0123, "step": 9290 }, { "epoch": 0.440227434257285, "grad_norm": 0.26953125, "learning_rate": 0.0001187498343695397, "loss": 0.0907, "step": 9291 }, { "epoch": 0.4402748163942194, "grad_norm": 0.73046875, "learning_rate": 0.000118735205689265, "loss": 1.025, "step": 9292 }, { "epoch": 0.44032219853115373, "grad_norm": 0.1796875, "learning_rate": 0.0001187205765934634, "loss": 0.1393, "step": 9293 }, { "epoch": 0.4403695806680881, "grad_norm": 0.53515625, "learning_rate": 0.00011870594708245937, "loss": 0.9188, "step": 9294 }, { "epoch": 0.4404169628050225, "grad_norm": 0.7578125, "learning_rate": 0.00011869131715657733, "loss": 0.0155, "step": 9295 }, { "epoch": 0.4404643449419569, "grad_norm": 0.62109375, "learning_rate": 0.00011867668681614179, "loss": 0.6337, "step": 9296 }, { "epoch": 0.44051172707889125, "grad_norm": 0.84765625, "learning_rate": 0.00011866205606147725, "loss": 0.5353, "step": 9297 }, { "epoch": 0.44055910921582564, "grad_norm": 0.90234375, "learning_rate": 0.00011864742489290819, "loss": 1.0244, "step": 9298 }, { "epoch": 0.44060649135276003, "grad_norm": 0.052001953125, "learning_rate": 0.0001186327933107591, "loss": 0.0026, "step": 9299 }, { "epoch": 0.44065387348969437, "grad_norm": 0.7109375, "learning_rate": 0.00011861816131535447, "loss": 1.4961, "step": 9300 }, { "epoch": 0.44070125562662876, "grad_norm": 0.78125, "learning_rate": 0.00011860352890701889, "loss": 1.1084, "step": 9301 }, { "epoch": 0.44074863776356316, "grad_norm": 0.27734375, "learning_rate": 0.00011858889608607684, "loss": 0.1376, "step": 9302 }, { "epoch": 0.4407960199004975, "grad_norm": 0.64453125, "learning_rate": 0.00011857426285285287, "loss": 1.4545, "step": 9303 }, { "epoch": 0.4408434020374319, "grad_norm": 0.201171875, "learning_rate": 0.00011855962920767152, "loss": 0.0084, "step": 9304 }, { "epoch": 0.4408907841743663, "grad_norm": 0.427734375, "learning_rate": 0.00011854499515085741, "loss": 0.9402, "step": 9305 }, { "epoch": 0.4409381663113006, "grad_norm": 0.828125, "learning_rate": 0.00011853036068273504, "loss": 0.8362, "step": 9306 }, { "epoch": 0.440985548448235, "grad_norm": 0.1083984375, "learning_rate": 0.000118515725803629, "loss": 0.0034, "step": 9307 }, { "epoch": 0.4410329305851694, "grad_norm": 0.7890625, "learning_rate": 0.00011850109051386387, "loss": 0.9941, "step": 9308 }, { "epoch": 0.44108031272210374, "grad_norm": 0.7265625, "learning_rate": 0.00011848645481376428, "loss": 1.1298, "step": 9309 }, { "epoch": 0.44112769485903813, "grad_norm": 0.7109375, "learning_rate": 0.00011847181870365481, "loss": 0.8825, "step": 9310 }, { "epoch": 0.4411750769959725, "grad_norm": 0.62109375, "learning_rate": 0.00011845718218386006, "loss": 0.8857, "step": 9311 }, { "epoch": 0.4412224591329069, "grad_norm": 0.48828125, "learning_rate": 0.00011844254525470468, "loss": 0.6584, "step": 9312 }, { "epoch": 0.44126984126984126, "grad_norm": 0.92578125, "learning_rate": 0.00011842790791651333, "loss": 0.9399, "step": 9313 }, { "epoch": 0.44131722340677565, "grad_norm": 0.5234375, "learning_rate": 0.00011841327016961055, "loss": 1.0528, "step": 9314 }, { "epoch": 0.44136460554371004, "grad_norm": 0.263671875, "learning_rate": 0.00011839863201432106, "loss": 0.0161, "step": 9315 }, { "epoch": 0.4414119876806444, "grad_norm": 0.70703125, "learning_rate": 0.00011838399345096953, "loss": 0.6023, "step": 9316 }, { "epoch": 0.4414593698175788, "grad_norm": 0.048095703125, "learning_rate": 0.00011836935447988061, "loss": 0.005, "step": 9317 }, { "epoch": 0.44150675195451317, "grad_norm": 0.1513671875, "learning_rate": 0.00011835471510137895, "loss": 0.0147, "step": 9318 }, { "epoch": 0.4415541340914475, "grad_norm": 0.5703125, "learning_rate": 0.00011834007531578927, "loss": 0.6816, "step": 9319 }, { "epoch": 0.4416015162283819, "grad_norm": 0.54296875, "learning_rate": 0.00011832543512343628, "loss": 0.6251, "step": 9320 }, { "epoch": 0.4416488983653163, "grad_norm": 0.66796875, "learning_rate": 0.00011831079452464464, "loss": 1.2253, "step": 9321 }, { "epoch": 0.4416962805022506, "grad_norm": 0.82421875, "learning_rate": 0.00011829615351973906, "loss": 1.0539, "step": 9322 }, { "epoch": 0.441743662639185, "grad_norm": 0.62109375, "learning_rate": 0.0001182815121090443, "loss": 1.1218, "step": 9323 }, { "epoch": 0.4417910447761194, "grad_norm": 0.58203125, "learning_rate": 0.0001182668702928851, "loss": 0.6269, "step": 9324 }, { "epoch": 0.4418384269130538, "grad_norm": 0.9609375, "learning_rate": 0.00011825222807158613, "loss": 1.0764, "step": 9325 }, { "epoch": 0.44188580904998814, "grad_norm": 0.17578125, "learning_rate": 0.00011823758544547221, "loss": 0.1038, "step": 9326 }, { "epoch": 0.44193319118692254, "grad_norm": 0.451171875, "learning_rate": 0.00011822294241486805, "loss": 0.1845, "step": 9327 }, { "epoch": 0.44198057332385693, "grad_norm": 0.02978515625, "learning_rate": 0.00011820829898009847, "loss": 0.0009, "step": 9328 }, { "epoch": 0.44202795546079127, "grad_norm": 0.69140625, "learning_rate": 0.00011819365514148819, "loss": 1.1411, "step": 9329 }, { "epoch": 0.44207533759772566, "grad_norm": 0.3125, "learning_rate": 0.00011817901089936201, "loss": 0.0144, "step": 9330 }, { "epoch": 0.44212271973466005, "grad_norm": 0.57421875, "learning_rate": 0.00011816436625404474, "loss": 0.0671, "step": 9331 }, { "epoch": 0.4421701018715944, "grad_norm": 0.69921875, "learning_rate": 0.00011814972120586118, "loss": 1.2879, "step": 9332 }, { "epoch": 0.4422174840085288, "grad_norm": 0.70703125, "learning_rate": 0.00011813507575513614, "loss": 0.9976, "step": 9333 }, { "epoch": 0.4422648661454632, "grad_norm": 0.65625, "learning_rate": 0.00011812042990219441, "loss": 0.8429, "step": 9334 }, { "epoch": 0.4423122482823975, "grad_norm": 0.78125, "learning_rate": 0.00011810578364736089, "loss": 1.0488, "step": 9335 }, { "epoch": 0.4423596304193319, "grad_norm": 0.63671875, "learning_rate": 0.00011809113699096033, "loss": 1.1678, "step": 9336 }, { "epoch": 0.4424070125562663, "grad_norm": 0.70703125, "learning_rate": 0.00011807648993331766, "loss": 0.9482, "step": 9337 }, { "epoch": 0.44245439469320064, "grad_norm": 0.69140625, "learning_rate": 0.00011806184247475766, "loss": 0.8572, "step": 9338 }, { "epoch": 0.44250177683013503, "grad_norm": 0.65234375, "learning_rate": 0.00011804719461560525, "loss": 0.8175, "step": 9339 }, { "epoch": 0.4425491589670694, "grad_norm": 0.56640625, "learning_rate": 0.00011803254635618531, "loss": 1.0617, "step": 9340 }, { "epoch": 0.4425965411040038, "grad_norm": 0.6796875, "learning_rate": 0.00011801789769682266, "loss": 0.6566, "step": 9341 }, { "epoch": 0.44264392324093815, "grad_norm": 0.60546875, "learning_rate": 0.00011800324863784225, "loss": 1.1249, "step": 9342 }, { "epoch": 0.44269130537787255, "grad_norm": 0.63671875, "learning_rate": 0.00011798859917956897, "loss": 0.5442, "step": 9343 }, { "epoch": 0.44273868751480694, "grad_norm": 0.55859375, "learning_rate": 0.00011797394932232769, "loss": 0.6662, "step": 9344 }, { "epoch": 0.4427860696517413, "grad_norm": 0.83984375, "learning_rate": 0.00011795929906644339, "loss": 1.1757, "step": 9345 }, { "epoch": 0.44283345178867567, "grad_norm": 0.85546875, "learning_rate": 0.00011794464841224095, "loss": 0.2145, "step": 9346 }, { "epoch": 0.44288083392561006, "grad_norm": 0.53515625, "learning_rate": 0.00011792999736004533, "loss": 0.75, "step": 9347 }, { "epoch": 0.4429282160625444, "grad_norm": 0.71875, "learning_rate": 0.00011791534591018148, "loss": 1.4978, "step": 9348 }, { "epoch": 0.4429755981994788, "grad_norm": 0.6953125, "learning_rate": 0.00011790069406297431, "loss": 0.9863, "step": 9349 }, { "epoch": 0.4430229803364132, "grad_norm": 0.63671875, "learning_rate": 0.00011788604181874881, "loss": 0.8307, "step": 9350 }, { "epoch": 0.4430703624733475, "grad_norm": 0.765625, "learning_rate": 0.00011787138917782999, "loss": 1.3348, "step": 9351 }, { "epoch": 0.4431177446102819, "grad_norm": 0.004852294921875, "learning_rate": 0.00011785673614054277, "loss": 0.0002, "step": 9352 }, { "epoch": 0.4431651267472163, "grad_norm": 0.255859375, "learning_rate": 0.00011784208270721216, "loss": 0.1688, "step": 9353 }, { "epoch": 0.4432125088841507, "grad_norm": 0.6328125, "learning_rate": 0.00011782742887816316, "loss": 1.3078, "step": 9354 }, { "epoch": 0.44325989102108504, "grad_norm": 0.6328125, "learning_rate": 0.0001178127746537208, "loss": 1.1072, "step": 9355 }, { "epoch": 0.44330727315801943, "grad_norm": 0.66015625, "learning_rate": 0.00011779812003421004, "loss": 0.9367, "step": 9356 }, { "epoch": 0.4433546552949538, "grad_norm": 0.69921875, "learning_rate": 0.00011778346501995594, "loss": 0.7521, "step": 9357 }, { "epoch": 0.44340203743188816, "grad_norm": 0.68359375, "learning_rate": 0.00011776880961128356, "loss": 0.9882, "step": 9358 }, { "epoch": 0.44344941956882256, "grad_norm": 0.66796875, "learning_rate": 0.00011775415380851787, "loss": 1.1479, "step": 9359 }, { "epoch": 0.44349680170575695, "grad_norm": 0.244140625, "learning_rate": 0.000117739497611984, "loss": 0.1426, "step": 9360 }, { "epoch": 0.4435441838426913, "grad_norm": 0.66796875, "learning_rate": 0.00011772484102200693, "loss": 1.0151, "step": 9361 }, { "epoch": 0.4435915659796257, "grad_norm": 0.765625, "learning_rate": 0.00011771018403891176, "loss": 1.0357, "step": 9362 }, { "epoch": 0.4436389481165601, "grad_norm": 0.62890625, "learning_rate": 0.00011769552666302361, "loss": 1.2855, "step": 9363 }, { "epoch": 0.4436863302534944, "grad_norm": 0.6796875, "learning_rate": 0.00011768086889466752, "loss": 1.3557, "step": 9364 }, { "epoch": 0.4437337123904288, "grad_norm": 0.185546875, "learning_rate": 0.00011766621073416857, "loss": 0.1146, "step": 9365 }, { "epoch": 0.4437810945273632, "grad_norm": 0.5390625, "learning_rate": 0.00011765155218185188, "loss": 0.685, "step": 9366 }, { "epoch": 0.44382847666429753, "grad_norm": 0.640625, "learning_rate": 0.00011763689323804258, "loss": 0.8738, "step": 9367 }, { "epoch": 0.4438758588012319, "grad_norm": 0.76171875, "learning_rate": 0.00011762223390306579, "loss": 0.9329, "step": 9368 }, { "epoch": 0.4439232409381663, "grad_norm": 0.1806640625, "learning_rate": 0.00011760757417724659, "loss": 0.0706, "step": 9369 }, { "epoch": 0.4439706230751007, "grad_norm": 0.0693359375, "learning_rate": 0.00011759291406091018, "loss": 0.0074, "step": 9370 }, { "epoch": 0.44401800521203505, "grad_norm": 0.73046875, "learning_rate": 0.00011757825355438165, "loss": 1.3688, "step": 9371 }, { "epoch": 0.44406538734896944, "grad_norm": 0.44921875, "learning_rate": 0.00011756359265798618, "loss": 0.2759, "step": 9372 }, { "epoch": 0.44411276948590384, "grad_norm": 0.032470703125, "learning_rate": 0.00011754893137204895, "loss": 0.0032, "step": 9373 }, { "epoch": 0.4441601516228382, "grad_norm": 0.54296875, "learning_rate": 0.0001175342696968951, "loss": 0.7585, "step": 9374 }, { "epoch": 0.44420753375977257, "grad_norm": 0.072265625, "learning_rate": 0.00011751960763284981, "loss": 0.0044, "step": 9375 }, { "epoch": 0.44425491589670696, "grad_norm": 0.5390625, "learning_rate": 0.00011750494518023834, "loss": 0.7121, "step": 9376 }, { "epoch": 0.4443022980336413, "grad_norm": 0.6484375, "learning_rate": 0.00011749028233938577, "loss": 0.8757, "step": 9377 }, { "epoch": 0.4443496801705757, "grad_norm": 0.212890625, "learning_rate": 0.00011747561911061742, "loss": 0.1564, "step": 9378 }, { "epoch": 0.4443970623075101, "grad_norm": 0.5, "learning_rate": 0.00011746095549425841, "loss": 1.078, "step": 9379 }, { "epoch": 0.4444444444444444, "grad_norm": 0.6484375, "learning_rate": 0.00011744629149063403, "loss": 0.6784, "step": 9380 }, { "epoch": 0.4444918265813788, "grad_norm": 0.625, "learning_rate": 0.00011743162710006947, "loss": 0.051, "step": 9381 }, { "epoch": 0.4445392087183132, "grad_norm": 0.72265625, "learning_rate": 0.00011741696232288998, "loss": 0.983, "step": 9382 }, { "epoch": 0.4445865908552476, "grad_norm": 0.625, "learning_rate": 0.00011740229715942083, "loss": 0.6347, "step": 9383 }, { "epoch": 0.44463397299218194, "grad_norm": 0.5859375, "learning_rate": 0.00011738763160998729, "loss": 0.9181, "step": 9384 }, { "epoch": 0.44468135512911633, "grad_norm": 0.0023345947265625, "learning_rate": 0.00011737296567491459, "loss": 0.0001, "step": 9385 }, { "epoch": 0.4447287372660507, "grad_norm": 0.46875, "learning_rate": 0.000117358299354528, "loss": 0.0823, "step": 9386 }, { "epoch": 0.44477611940298506, "grad_norm": 0.51171875, "learning_rate": 0.00011734363264915282, "loss": 0.7144, "step": 9387 }, { "epoch": 0.44482350153991945, "grad_norm": 0.2314453125, "learning_rate": 0.00011732896555911435, "loss": 0.1086, "step": 9388 }, { "epoch": 0.44487088367685385, "grad_norm": 0.185546875, "learning_rate": 0.00011731429808473788, "loss": 0.1366, "step": 9389 }, { "epoch": 0.4449182658137882, "grad_norm": 0.73046875, "learning_rate": 0.00011729963022634872, "loss": 1.0817, "step": 9390 }, { "epoch": 0.4449656479507226, "grad_norm": 0.435546875, "learning_rate": 0.00011728496198427217, "loss": 0.7142, "step": 9391 }, { "epoch": 0.44501303008765697, "grad_norm": 0.6171875, "learning_rate": 0.00011727029335883361, "loss": 0.649, "step": 9392 }, { "epoch": 0.4450604122245913, "grad_norm": 0.703125, "learning_rate": 0.00011725562435035833, "loss": 1.0864, "step": 9393 }, { "epoch": 0.4451077943615257, "grad_norm": 0.4609375, "learning_rate": 0.00011724095495917167, "loss": 0.5156, "step": 9394 }, { "epoch": 0.4451551764984601, "grad_norm": 1.703125, "learning_rate": 0.00011722628518559899, "loss": 0.0558, "step": 9395 }, { "epoch": 0.44520255863539443, "grad_norm": 0.765625, "learning_rate": 0.00011721161502996565, "loss": 0.5618, "step": 9396 }, { "epoch": 0.4452499407723288, "grad_norm": 0.48828125, "learning_rate": 0.00011719694449259706, "loss": 0.7703, "step": 9397 }, { "epoch": 0.4452973229092632, "grad_norm": 0.87109375, "learning_rate": 0.0001171822735738185, "loss": 0.9965, "step": 9398 }, { "epoch": 0.4453447050461976, "grad_norm": 0.64453125, "learning_rate": 0.00011716760227395545, "loss": 1.0266, "step": 9399 }, { "epoch": 0.44539208718313195, "grad_norm": 0.06005859375, "learning_rate": 0.00011715293059333327, "loss": 0.0017, "step": 9400 }, { "epoch": 0.44543946932006634, "grad_norm": 0.6328125, "learning_rate": 0.00011713825853227738, "loss": 1.2956, "step": 9401 }, { "epoch": 0.44548685145700073, "grad_norm": 0.64453125, "learning_rate": 0.00011712358609111311, "loss": 1.0393, "step": 9402 }, { "epoch": 0.44553423359393507, "grad_norm": 0.79296875, "learning_rate": 0.00011710891327016597, "loss": 1.3742, "step": 9403 }, { "epoch": 0.44558161573086946, "grad_norm": 1.171875, "learning_rate": 0.00011709424006976138, "loss": 1.0638, "step": 9404 }, { "epoch": 0.44562899786780386, "grad_norm": 0.62890625, "learning_rate": 0.0001170795664902247, "loss": 0.94, "step": 9405 }, { "epoch": 0.4456763800047382, "grad_norm": 0.470703125, "learning_rate": 0.00011706489253188147, "loss": 0.5283, "step": 9406 }, { "epoch": 0.4457237621416726, "grad_norm": 0.68359375, "learning_rate": 0.0001170502181950571, "loss": 1.1417, "step": 9407 }, { "epoch": 0.445771144278607, "grad_norm": 0.8046875, "learning_rate": 0.00011703554348007704, "loss": 0.9421, "step": 9408 }, { "epoch": 0.4458185264155413, "grad_norm": 0.6953125, "learning_rate": 0.00011702086838726679, "loss": 0.7954, "step": 9409 }, { "epoch": 0.4458659085524757, "grad_norm": 0.64453125, "learning_rate": 0.00011700619291695178, "loss": 1.0548, "step": 9410 }, { "epoch": 0.4459132906894101, "grad_norm": 0.447265625, "learning_rate": 0.00011699151706945753, "loss": 0.0486, "step": 9411 }, { "epoch": 0.4459606728263445, "grad_norm": 0.4921875, "learning_rate": 0.00011697684084510956, "loss": 0.8602, "step": 9412 }, { "epoch": 0.44600805496327883, "grad_norm": 1.203125, "learning_rate": 0.0001169621642442333, "loss": 0.7969, "step": 9413 }, { "epoch": 0.4460554371002132, "grad_norm": 0.1767578125, "learning_rate": 0.00011694748726715432, "loss": 0.1151, "step": 9414 }, { "epoch": 0.4461028192371476, "grad_norm": 0.119140625, "learning_rate": 0.00011693280991419815, "loss": 0.0798, "step": 9415 }, { "epoch": 0.44615020137408196, "grad_norm": 0.5625, "learning_rate": 0.00011691813218569027, "loss": 0.7561, "step": 9416 }, { "epoch": 0.44619758351101635, "grad_norm": 0.703125, "learning_rate": 0.00011690345408195624, "loss": 1.4186, "step": 9417 }, { "epoch": 0.44624496564795074, "grad_norm": 0.609375, "learning_rate": 0.00011688877560332158, "loss": 0.9857, "step": 9418 }, { "epoch": 0.4462923477848851, "grad_norm": 0.625, "learning_rate": 0.00011687409675011189, "loss": 0.8451, "step": 9419 }, { "epoch": 0.4463397299218195, "grad_norm": 0.55859375, "learning_rate": 0.00011685941752265272, "loss": 0.5844, "step": 9420 }, { "epoch": 0.44638711205875387, "grad_norm": 0.69140625, "learning_rate": 0.0001168447379212696, "loss": 0.0114, "step": 9421 }, { "epoch": 0.4464344941956882, "grad_norm": 0.13671875, "learning_rate": 0.00011683005794628814, "loss": 0.017, "step": 9422 }, { "epoch": 0.4464818763326226, "grad_norm": 0.234375, "learning_rate": 0.00011681537759803393, "loss": 0.0175, "step": 9423 }, { "epoch": 0.446529258469557, "grad_norm": 0.66015625, "learning_rate": 0.0001168006968768326, "loss": 0.9998, "step": 9424 }, { "epoch": 0.4465766406064913, "grad_norm": 0.38671875, "learning_rate": 0.00011678601578300965, "loss": 0.4831, "step": 9425 }, { "epoch": 0.4466240227434257, "grad_norm": 0.67578125, "learning_rate": 0.00011677133431689075, "loss": 0.8602, "step": 9426 }, { "epoch": 0.4466714048803601, "grad_norm": 0.59765625, "learning_rate": 0.00011675665247880154, "loss": 0.6564, "step": 9427 }, { "epoch": 0.4467187870172945, "grad_norm": 0.5078125, "learning_rate": 0.00011674197026906762, "loss": 0.7945, "step": 9428 }, { "epoch": 0.44676616915422884, "grad_norm": 0.94140625, "learning_rate": 0.00011672728768801461, "loss": 0.4851, "step": 9429 }, { "epoch": 0.44681355129116324, "grad_norm": 0.828125, "learning_rate": 0.00011671260473596819, "loss": 0.5367, "step": 9430 }, { "epoch": 0.44686093342809763, "grad_norm": 0.69140625, "learning_rate": 0.00011669792141325402, "loss": 0.5629, "step": 9431 }, { "epoch": 0.44690831556503197, "grad_norm": 0.69921875, "learning_rate": 0.00011668323772019774, "loss": 0.0414, "step": 9432 }, { "epoch": 0.44695569770196636, "grad_norm": 0.68359375, "learning_rate": 0.00011666855365712499, "loss": 1.2052, "step": 9433 }, { "epoch": 0.44700307983890075, "grad_norm": 0.69140625, "learning_rate": 0.00011665386922436148, "loss": 0.7529, "step": 9434 }, { "epoch": 0.4470504619758351, "grad_norm": 0.54296875, "learning_rate": 0.00011663918442223292, "loss": 1.1898, "step": 9435 }, { "epoch": 0.4470978441127695, "grad_norm": 0.2138671875, "learning_rate": 0.00011662449925106493, "loss": 0.0194, "step": 9436 }, { "epoch": 0.4471452262497039, "grad_norm": 0.80859375, "learning_rate": 0.00011660981371118327, "loss": 0.4117, "step": 9437 }, { "epoch": 0.4471926083866382, "grad_norm": 0.69140625, "learning_rate": 0.00011659512780291364, "loss": 1.0694, "step": 9438 }, { "epoch": 0.4472399905235726, "grad_norm": 0.16015625, "learning_rate": 0.00011658044152658179, "loss": 0.128, "step": 9439 }, { "epoch": 0.447287372660507, "grad_norm": 0.59765625, "learning_rate": 0.00011656575488251333, "loss": 0.9505, "step": 9440 }, { "epoch": 0.4473347547974414, "grad_norm": 0.126953125, "learning_rate": 0.00011655106787103411, "loss": 0.016, "step": 9441 }, { "epoch": 0.44738213693437573, "grad_norm": 0.6328125, "learning_rate": 0.00011653638049246982, "loss": 0.5005, "step": 9442 }, { "epoch": 0.4474295190713101, "grad_norm": 0.6640625, "learning_rate": 0.00011652169274714626, "loss": 1.0562, "step": 9443 }, { "epoch": 0.4474769012082445, "grad_norm": 0.54296875, "learning_rate": 0.0001165070046353891, "loss": 0.2267, "step": 9444 }, { "epoch": 0.44752428334517885, "grad_norm": 0.75390625, "learning_rate": 0.0001164923161575242, "loss": 1.0425, "step": 9445 }, { "epoch": 0.44757166548211325, "grad_norm": 0.58984375, "learning_rate": 0.0001164776273138773, "loss": 0.9655, "step": 9446 }, { "epoch": 0.44761904761904764, "grad_norm": 0.60546875, "learning_rate": 0.00011646293810477418, "loss": 1.1615, "step": 9447 }, { "epoch": 0.447666429755982, "grad_norm": 0.85546875, "learning_rate": 0.0001164482485305406, "loss": 1.0926, "step": 9448 }, { "epoch": 0.44771381189291637, "grad_norm": 0.37109375, "learning_rate": 0.00011643355859150237, "loss": 0.1574, "step": 9449 }, { "epoch": 0.44776119402985076, "grad_norm": 0.67578125, "learning_rate": 0.00011641886828798536, "loss": 0.8106, "step": 9450 }, { "epoch": 0.4478085761667851, "grad_norm": 0.16796875, "learning_rate": 0.00011640417762031533, "loss": 0.1354, "step": 9451 }, { "epoch": 0.4478559583037195, "grad_norm": 0.578125, "learning_rate": 0.00011638948658881808, "loss": 0.5431, "step": 9452 }, { "epoch": 0.4479033404406539, "grad_norm": 0.66796875, "learning_rate": 0.00011637479519381949, "loss": 1.068, "step": 9453 }, { "epoch": 0.4479507225775882, "grad_norm": 0.55859375, "learning_rate": 0.0001163601034356454, "loss": 1.0824, "step": 9454 }, { "epoch": 0.4479981047145226, "grad_norm": 0.546875, "learning_rate": 0.00011634541131462167, "loss": 0.9222, "step": 9455 }, { "epoch": 0.448045486851457, "grad_norm": 0.39453125, "learning_rate": 0.00011633071883107407, "loss": 0.1449, "step": 9456 }, { "epoch": 0.4480928689883914, "grad_norm": 0.7109375, "learning_rate": 0.00011631602598532854, "loss": 0.99, "step": 9457 }, { "epoch": 0.44814025112532574, "grad_norm": 0.4453125, "learning_rate": 0.00011630133277771096, "loss": 0.6691, "step": 9458 }, { "epoch": 0.44818763326226013, "grad_norm": 0.5859375, "learning_rate": 0.00011628663920854714, "loss": 0.7797, "step": 9459 }, { "epoch": 0.4482350153991945, "grad_norm": 0.93359375, "learning_rate": 0.00011627194527816304, "loss": 0.6866, "step": 9460 }, { "epoch": 0.44828239753612886, "grad_norm": 0.6875, "learning_rate": 0.0001162572509868845, "loss": 1.1991, "step": 9461 }, { "epoch": 0.44832977967306326, "grad_norm": 0.62109375, "learning_rate": 0.00011624255633503749, "loss": 1.1431, "step": 9462 }, { "epoch": 0.44837716180999765, "grad_norm": 0.671875, "learning_rate": 0.00011622786132294784, "loss": 1.2445, "step": 9463 }, { "epoch": 0.448424543946932, "grad_norm": 0.69140625, "learning_rate": 0.00011621316595094151, "loss": 0.8336, "step": 9464 }, { "epoch": 0.4484719260838664, "grad_norm": 0.74609375, "learning_rate": 0.00011619847021934445, "loss": 1.1235, "step": 9465 }, { "epoch": 0.44851930822080077, "grad_norm": 0.703125, "learning_rate": 0.00011618377412848258, "loss": 1.0955, "step": 9466 }, { "epoch": 0.4485666903577351, "grad_norm": 0.9375, "learning_rate": 0.00011616907767868181, "loss": 0.2217, "step": 9467 }, { "epoch": 0.4486140724946695, "grad_norm": 0.6171875, "learning_rate": 0.00011615438087026816, "loss": 0.8414, "step": 9468 }, { "epoch": 0.4486614546316039, "grad_norm": 0.79296875, "learning_rate": 0.00011613968370356754, "loss": 1.3303, "step": 9469 }, { "epoch": 0.4487088367685383, "grad_norm": 0.6484375, "learning_rate": 0.0001161249861789059, "loss": 0.9061, "step": 9470 }, { "epoch": 0.4487562189054726, "grad_norm": 1.171875, "learning_rate": 0.00011611028829660925, "loss": 0.924, "step": 9471 }, { "epoch": 0.448803601042407, "grad_norm": 0.62890625, "learning_rate": 0.00011609559005700356, "loss": 1.097, "step": 9472 }, { "epoch": 0.4488509831793414, "grad_norm": 0.484375, "learning_rate": 0.00011608089146041487, "loss": 0.8251, "step": 9473 }, { "epoch": 0.44889836531627575, "grad_norm": 0.13671875, "learning_rate": 0.0001160661925071691, "loss": 0.0071, "step": 9474 }, { "epoch": 0.44894574745321014, "grad_norm": 0.828125, "learning_rate": 0.00011605149319759228, "loss": 1.0481, "step": 9475 }, { "epoch": 0.44899312959014454, "grad_norm": 1.0625, "learning_rate": 0.00011603679353201044, "loss": 1.073, "step": 9476 }, { "epoch": 0.4490405117270789, "grad_norm": 0.7421875, "learning_rate": 0.00011602209351074963, "loss": 0.9888, "step": 9477 }, { "epoch": 0.44908789386401327, "grad_norm": 0.58984375, "learning_rate": 0.00011600739313413587, "loss": 0.9214, "step": 9478 }, { "epoch": 0.44913527600094766, "grad_norm": 0.66796875, "learning_rate": 0.00011599269240249513, "loss": 0.9524, "step": 9479 }, { "epoch": 0.449182658137882, "grad_norm": 0.314453125, "learning_rate": 0.00011597799131615352, "loss": 0.1399, "step": 9480 }, { "epoch": 0.4492300402748164, "grad_norm": 0.04541015625, "learning_rate": 0.0001159632898754371, "loss": 0.0011, "step": 9481 }, { "epoch": 0.4492774224117508, "grad_norm": 0.546875, "learning_rate": 0.00011594858808067188, "loss": 0.6639, "step": 9482 }, { "epoch": 0.4493248045486851, "grad_norm": 0.03857421875, "learning_rate": 0.000115933885932184, "loss": 0.0029, "step": 9483 }, { "epoch": 0.4493721866856195, "grad_norm": 0.63671875, "learning_rate": 0.00011591918343029946, "loss": 0.8705, "step": 9484 }, { "epoch": 0.4494195688225539, "grad_norm": 0.08642578125, "learning_rate": 0.00011590448057534445, "loss": 0.007, "step": 9485 }, { "epoch": 0.4494669509594883, "grad_norm": 0.1337890625, "learning_rate": 0.00011588977736764497, "loss": 0.0142, "step": 9486 }, { "epoch": 0.44951433309642264, "grad_norm": 0.62109375, "learning_rate": 0.00011587507380752713, "loss": 0.7774, "step": 9487 }, { "epoch": 0.44956171523335703, "grad_norm": 0.578125, "learning_rate": 0.00011586036989531707, "loss": 0.823, "step": 9488 }, { "epoch": 0.4496090973702914, "grad_norm": 0.703125, "learning_rate": 0.00011584566563134094, "loss": 0.9035, "step": 9489 }, { "epoch": 0.44965647950722576, "grad_norm": 0.6796875, "learning_rate": 0.00011583096101592479, "loss": 1.0848, "step": 9490 }, { "epoch": 0.44970386164416015, "grad_norm": 0.65234375, "learning_rate": 0.00011581625604939477, "loss": 1.6401, "step": 9491 }, { "epoch": 0.44975124378109455, "grad_norm": 0.59375, "learning_rate": 0.00011580155073207707, "loss": 1.2194, "step": 9492 }, { "epoch": 0.4497986259180289, "grad_norm": 0.216796875, "learning_rate": 0.0001157868450642978, "loss": 0.0073, "step": 9493 }, { "epoch": 0.4498460080549633, "grad_norm": 0.796875, "learning_rate": 0.00011577213904638313, "loss": 1.1233, "step": 9494 }, { "epoch": 0.44989339019189767, "grad_norm": 0.2109375, "learning_rate": 0.0001157574326786592, "loss": 0.1325, "step": 9495 }, { "epoch": 0.449940772328832, "grad_norm": 0.0274658203125, "learning_rate": 0.00011574272596145221, "loss": 0.0009, "step": 9496 }, { "epoch": 0.4499881544657664, "grad_norm": 0.166015625, "learning_rate": 0.00011572801889508831, "loss": 0.1232, "step": 9497 }, { "epoch": 0.4500355366027008, "grad_norm": 0.2470703125, "learning_rate": 0.0001157133114798937, "loss": 0.1474, "step": 9498 }, { "epoch": 0.4500829187396352, "grad_norm": 0.5546875, "learning_rate": 0.00011569860371619459, "loss": 0.1166, "step": 9499 }, { "epoch": 0.4501303008765695, "grad_norm": 0.75390625, "learning_rate": 0.00011568389560431719, "loss": 0.7246, "step": 9500 }, { "epoch": 0.4501776830135039, "grad_norm": 0.5703125, "learning_rate": 0.00011566918714458765, "loss": 0.4216, "step": 9501 }, { "epoch": 0.4502250651504383, "grad_norm": 0.95703125, "learning_rate": 0.00011565447833733227, "loss": 0.0335, "step": 9502 }, { "epoch": 0.45027244728737265, "grad_norm": 0.095703125, "learning_rate": 0.0001156397691828772, "loss": 0.0089, "step": 9503 }, { "epoch": 0.45031982942430704, "grad_norm": 0.455078125, "learning_rate": 0.00011562505968154876, "loss": 0.4767, "step": 9504 }, { "epoch": 0.45036721156124143, "grad_norm": 0.578125, "learning_rate": 0.00011561034983367307, "loss": 0.5419, "step": 9505 }, { "epoch": 0.45041459369817577, "grad_norm": 0.435546875, "learning_rate": 0.00011559563963957649, "loss": 0.4571, "step": 9506 }, { "epoch": 0.45046197583511016, "grad_norm": 0.7421875, "learning_rate": 0.0001155809290995852, "loss": 0.8394, "step": 9507 }, { "epoch": 0.45050935797204456, "grad_norm": 0.1416015625, "learning_rate": 0.00011556621821402556, "loss": 0.008, "step": 9508 }, { "epoch": 0.4505567401089789, "grad_norm": 0.7734375, "learning_rate": 0.00011555150698322372, "loss": 1.2913, "step": 9509 }, { "epoch": 0.4506041222459133, "grad_norm": 0.10693359375, "learning_rate": 0.00011553679540750605, "loss": 0.0034, "step": 9510 }, { "epoch": 0.4506515043828477, "grad_norm": 0.2255859375, "learning_rate": 0.0001155220834871988, "loss": 0.0453, "step": 9511 }, { "epoch": 0.450698886519782, "grad_norm": 0.162109375, "learning_rate": 0.0001155073712226283, "loss": 0.1167, "step": 9512 }, { "epoch": 0.4507462686567164, "grad_norm": 0.62109375, "learning_rate": 0.00011549265861412077, "loss": 1.2963, "step": 9513 }, { "epoch": 0.4507936507936508, "grad_norm": 0.63671875, "learning_rate": 0.00011547794566200261, "loss": 1.3112, "step": 9514 }, { "epoch": 0.4508410329305852, "grad_norm": 0.263671875, "learning_rate": 0.0001154632323666001, "loss": 0.1733, "step": 9515 }, { "epoch": 0.45088841506751953, "grad_norm": 0.5703125, "learning_rate": 0.00011544851872823956, "loss": 0.8656, "step": 9516 }, { "epoch": 0.4509357972044539, "grad_norm": 0.8203125, "learning_rate": 0.00011543380474724735, "loss": 1.2624, "step": 9517 }, { "epoch": 0.4509831793413883, "grad_norm": 0.57421875, "learning_rate": 0.00011541909042394974, "loss": 1.0939, "step": 9518 }, { "epoch": 0.45103056147832266, "grad_norm": 0.75390625, "learning_rate": 0.00011540437575867315, "loss": 0.6567, "step": 9519 }, { "epoch": 0.45107794361525705, "grad_norm": 0.47265625, "learning_rate": 0.00011538966075174396, "loss": 0.0171, "step": 9520 }, { "epoch": 0.45112532575219144, "grad_norm": 0.59375, "learning_rate": 0.00011537494540348843, "loss": 0.7626, "step": 9521 }, { "epoch": 0.4511727078891258, "grad_norm": 0.7109375, "learning_rate": 0.000115360229714233, "loss": 1.0279, "step": 9522 }, { "epoch": 0.45122009002606017, "grad_norm": 0.78125, "learning_rate": 0.00011534551368430404, "loss": 0.9124, "step": 9523 }, { "epoch": 0.45126747216299457, "grad_norm": 0.6328125, "learning_rate": 0.00011533079731402793, "loss": 0.5132, "step": 9524 }, { "epoch": 0.4513148542999289, "grad_norm": 0.65234375, "learning_rate": 0.00011531608060373109, "loss": 1.246, "step": 9525 }, { "epoch": 0.4513622364368633, "grad_norm": 0.63671875, "learning_rate": 0.00011530136355373986, "loss": 1.0366, "step": 9526 }, { "epoch": 0.4514096185737977, "grad_norm": 0.01202392578125, "learning_rate": 0.00011528664616438072, "loss": 0.0007, "step": 9527 }, { "epoch": 0.4514570007107321, "grad_norm": 0.8671875, "learning_rate": 0.00011527192843598002, "loss": 1.3601, "step": 9528 }, { "epoch": 0.4515043828476664, "grad_norm": 0.416015625, "learning_rate": 0.00011525721036886421, "loss": 0.1206, "step": 9529 }, { "epoch": 0.4515517649846008, "grad_norm": 0.28515625, "learning_rate": 0.00011524249196335974, "loss": 0.0013, "step": 9530 }, { "epoch": 0.4515991471215352, "grad_norm": 0.65234375, "learning_rate": 0.00011522777321979303, "loss": 0.4378, "step": 9531 }, { "epoch": 0.45164652925846954, "grad_norm": 0.80859375, "learning_rate": 0.00011521305413849055, "loss": 0.9599, "step": 9532 }, { "epoch": 0.45169391139540394, "grad_norm": 0.51953125, "learning_rate": 0.00011519833471977873, "loss": 1.0324, "step": 9533 }, { "epoch": 0.45174129353233833, "grad_norm": 0.296875, "learning_rate": 0.000115183614963984, "loss": 0.0441, "step": 9534 }, { "epoch": 0.45178867566927267, "grad_norm": 0.79296875, "learning_rate": 0.0001151688948714329, "loss": 1.2256, "step": 9535 }, { "epoch": 0.45183605780620706, "grad_norm": 0.484375, "learning_rate": 0.00011515417444245185, "loss": 1.162, "step": 9536 }, { "epoch": 0.45188343994314145, "grad_norm": 0.55859375, "learning_rate": 0.00011513945367736733, "loss": 0.1426, "step": 9537 }, { "epoch": 0.4519308220800758, "grad_norm": 0.038818359375, "learning_rate": 0.00011512473257650592, "loss": 0.0041, "step": 9538 }, { "epoch": 0.4519782042170102, "grad_norm": 0.0277099609375, "learning_rate": 0.00011511001114019399, "loss": 0.002, "step": 9539 }, { "epoch": 0.4520255863539446, "grad_norm": 0.1943359375, "learning_rate": 0.0001150952893687581, "loss": 0.0135, "step": 9540 }, { "epoch": 0.4520729684908789, "grad_norm": 0.5703125, "learning_rate": 0.00011508056726252482, "loss": 1.2194, "step": 9541 }, { "epoch": 0.4521203506278133, "grad_norm": 0.74609375, "learning_rate": 0.00011506584482182061, "loss": 0.9896, "step": 9542 }, { "epoch": 0.4521677327647477, "grad_norm": 0.734375, "learning_rate": 0.000115051122046972, "loss": 0.9118, "step": 9543 }, { "epoch": 0.4522151149016821, "grad_norm": 0.7265625, "learning_rate": 0.00011503639893830553, "loss": 1.2259, "step": 9544 }, { "epoch": 0.45226249703861643, "grad_norm": 0.34765625, "learning_rate": 0.00011502167549614773, "loss": 0.0313, "step": 9545 }, { "epoch": 0.4523098791755508, "grad_norm": 0.291015625, "learning_rate": 0.00011500695172082521, "loss": 0.1573, "step": 9546 }, { "epoch": 0.4523572613124852, "grad_norm": 0.6640625, "learning_rate": 0.00011499222761266446, "loss": 0.8081, "step": 9547 }, { "epoch": 0.45240464344941955, "grad_norm": 0.6640625, "learning_rate": 0.00011497750317199208, "loss": 0.8415, "step": 9548 }, { "epoch": 0.45245202558635395, "grad_norm": 0.81640625, "learning_rate": 0.00011496277839913463, "loss": 0.1733, "step": 9549 }, { "epoch": 0.45249940772328834, "grad_norm": 0.71484375, "learning_rate": 0.00011494805329441871, "loss": 1.1652, "step": 9550 }, { "epoch": 0.4525467898602227, "grad_norm": 0.70703125, "learning_rate": 0.00011493332785817086, "loss": 1.0033, "step": 9551 }, { "epoch": 0.45259417199715707, "grad_norm": 0.78515625, "learning_rate": 0.0001149186020907177, "loss": 1.1452, "step": 9552 }, { "epoch": 0.45264155413409146, "grad_norm": 0.126953125, "learning_rate": 0.00011490387599238586, "loss": 0.017, "step": 9553 }, { "epoch": 0.4526889362710258, "grad_norm": 0.7734375, "learning_rate": 0.00011488914956350191, "loss": 1.0636, "step": 9554 }, { "epoch": 0.4527363184079602, "grad_norm": 0.63671875, "learning_rate": 0.00011487442280439249, "loss": 0.8769, "step": 9555 }, { "epoch": 0.4527837005448946, "grad_norm": 0.75, "learning_rate": 0.0001148596957153842, "loss": 0.8713, "step": 9556 }, { "epoch": 0.452831082681829, "grad_norm": 0.6328125, "learning_rate": 0.00011484496829680371, "loss": 1.2281, "step": 9557 }, { "epoch": 0.4528784648187633, "grad_norm": 0.043701171875, "learning_rate": 0.00011483024054897764, "loss": 0.0025, "step": 9558 }, { "epoch": 0.4529258469556977, "grad_norm": 0.345703125, "learning_rate": 0.00011481551247223261, "loss": 0.0514, "step": 9559 }, { "epoch": 0.4529732290926321, "grad_norm": 0.625, "learning_rate": 0.0001148007840668953, "loss": 0.5776, "step": 9560 }, { "epoch": 0.45302061122956644, "grad_norm": 0.251953125, "learning_rate": 0.00011478605533329239, "loss": 0.1592, "step": 9561 }, { "epoch": 0.45306799336650083, "grad_norm": 0.9296875, "learning_rate": 0.00011477132627175046, "loss": 0.8947, "step": 9562 }, { "epoch": 0.4531153755034352, "grad_norm": 0.21484375, "learning_rate": 0.0001147565968825963, "loss": 0.0262, "step": 9563 }, { "epoch": 0.45316275764036956, "grad_norm": 0.703125, "learning_rate": 0.00011474186716615651, "loss": 0.9152, "step": 9564 }, { "epoch": 0.45321013977730396, "grad_norm": 0.60546875, "learning_rate": 0.00011472713712275784, "loss": 0.8025, "step": 9565 }, { "epoch": 0.45325752191423835, "grad_norm": 0.6328125, "learning_rate": 0.00011471240675272694, "loss": 0.4409, "step": 9566 }, { "epoch": 0.4533049040511727, "grad_norm": 1.0546875, "learning_rate": 0.00011469767605639052, "loss": 1.0586, "step": 9567 }, { "epoch": 0.4533522861881071, "grad_norm": 0.154296875, "learning_rate": 0.00011468294503407531, "loss": 0.1058, "step": 9568 }, { "epoch": 0.45339966832504147, "grad_norm": 0.1630859375, "learning_rate": 0.00011466821368610803, "loss": 0.0348, "step": 9569 }, { "epoch": 0.4534470504619758, "grad_norm": 0.0303955078125, "learning_rate": 0.00011465348201281538, "loss": 0.0017, "step": 9570 }, { "epoch": 0.4534944325989102, "grad_norm": 0.609375, "learning_rate": 0.00011463875001452409, "loss": 0.9173, "step": 9571 }, { "epoch": 0.4535418147358446, "grad_norm": 0.31640625, "learning_rate": 0.00011462401769156095, "loss": 0.0327, "step": 9572 }, { "epoch": 0.453589196872779, "grad_norm": 0.67578125, "learning_rate": 0.00011460928504425267, "loss": 1.0341, "step": 9573 }, { "epoch": 0.4536365790097133, "grad_norm": 0.86328125, "learning_rate": 0.000114594552072926, "loss": 0.8655, "step": 9574 }, { "epoch": 0.4536839611466477, "grad_norm": 0.51953125, "learning_rate": 0.00011457981877790769, "loss": 1.1325, "step": 9575 }, { "epoch": 0.4537313432835821, "grad_norm": 0.66796875, "learning_rate": 0.00011456508515952456, "loss": 1.2329, "step": 9576 }, { "epoch": 0.45377872542051645, "grad_norm": 0.478515625, "learning_rate": 0.00011455035121810334, "loss": 0.3296, "step": 9577 }, { "epoch": 0.45382610755745084, "grad_norm": 0.443359375, "learning_rate": 0.00011453561695397081, "loss": 0.0984, "step": 9578 }, { "epoch": 0.45387348969438523, "grad_norm": 0.734375, "learning_rate": 0.00011452088236745382, "loss": 1.1948, "step": 9579 }, { "epoch": 0.4539208718313196, "grad_norm": 0.77734375, "learning_rate": 0.00011450614745887911, "loss": 1.0574, "step": 9580 }, { "epoch": 0.45396825396825397, "grad_norm": 0.02001953125, "learning_rate": 0.00011449141222857351, "loss": 0.001, "step": 9581 }, { "epoch": 0.45401563610518836, "grad_norm": 0.5703125, "learning_rate": 0.00011447667667686379, "loss": 0.9499, "step": 9582 }, { "epoch": 0.4540630182421227, "grad_norm": 0.640625, "learning_rate": 0.00011446194080407682, "loss": 1.3364, "step": 9583 }, { "epoch": 0.4541104003790571, "grad_norm": 0.52734375, "learning_rate": 0.00011444720461053942, "loss": 0.6499, "step": 9584 }, { "epoch": 0.4541577825159915, "grad_norm": 0.671875, "learning_rate": 0.00011443246809657839, "loss": 0.2543, "step": 9585 }, { "epoch": 0.4542051646529259, "grad_norm": 0.00191497802734375, "learning_rate": 0.0001144177312625206, "loss": 0.0001, "step": 9586 }, { "epoch": 0.4542525467898602, "grad_norm": 0.2197265625, "learning_rate": 0.00011440299410869286, "loss": 0.147, "step": 9587 }, { "epoch": 0.4542999289267946, "grad_norm": 0.494140625, "learning_rate": 0.00011438825663542209, "loss": 0.0698, "step": 9588 }, { "epoch": 0.454347311063729, "grad_norm": 0.6484375, "learning_rate": 0.00011437351884303513, "loss": 0.9221, "step": 9589 }, { "epoch": 0.45439469320066334, "grad_norm": 0.73828125, "learning_rate": 0.00011435878073185879, "loss": 0.6449, "step": 9590 }, { "epoch": 0.45444207533759773, "grad_norm": 0.73828125, "learning_rate": 0.00011434404230222, "loss": 1.0036, "step": 9591 }, { "epoch": 0.4544894574745321, "grad_norm": 0.87890625, "learning_rate": 0.00011432930355444564, "loss": 1.5581, "step": 9592 }, { "epoch": 0.45453683961146646, "grad_norm": 0.1455078125, "learning_rate": 0.00011431456448886257, "loss": 0.0236, "step": 9593 }, { "epoch": 0.45458422174840085, "grad_norm": 0.171875, "learning_rate": 0.0001142998251057977, "loss": 0.1308, "step": 9594 }, { "epoch": 0.45463160388533524, "grad_norm": 0.71484375, "learning_rate": 0.00011428508540557799, "loss": 1.1786, "step": 9595 }, { "epoch": 0.4546789860222696, "grad_norm": 0.7421875, "learning_rate": 0.00011427034538853028, "loss": 0.9578, "step": 9596 }, { "epoch": 0.454726368159204, "grad_norm": 0.62109375, "learning_rate": 0.0001142556050549815, "loss": 0.8847, "step": 9597 }, { "epoch": 0.45477375029613837, "grad_norm": 0.55859375, "learning_rate": 0.00011424086440525856, "loss": 1.0814, "step": 9598 }, { "epoch": 0.4548211324330727, "grad_norm": 0.671875, "learning_rate": 0.00011422612343968844, "loss": 1.1423, "step": 9599 }, { "epoch": 0.4548685145700071, "grad_norm": 0.197265625, "learning_rate": 0.00011421138215859806, "loss": 0.0239, "step": 9600 }, { "epoch": 0.4549158967069415, "grad_norm": 0.578125, "learning_rate": 0.00011419664056231436, "loss": 0.4021, "step": 9601 }, { "epoch": 0.4549632788438759, "grad_norm": 0.9765625, "learning_rate": 0.00011418189865116429, "loss": 1.1078, "step": 9602 }, { "epoch": 0.4550106609808102, "grad_norm": 0.208984375, "learning_rate": 0.00011416715642547482, "loss": 0.1314, "step": 9603 }, { "epoch": 0.4550580431177446, "grad_norm": 1.0078125, "learning_rate": 0.00011415241388557292, "loss": 0.8406, "step": 9604 }, { "epoch": 0.455105425254679, "grad_norm": 0.7109375, "learning_rate": 0.00011413767103178553, "loss": 1.1282, "step": 9605 }, { "epoch": 0.45515280739161335, "grad_norm": 0.63671875, "learning_rate": 0.00011412292786443965, "loss": 0.4674, "step": 9606 }, { "epoch": 0.45520018952854774, "grad_norm": 0.765625, "learning_rate": 0.0001141081843838623, "loss": 1.5249, "step": 9607 }, { "epoch": 0.45524757166548213, "grad_norm": 0.5625, "learning_rate": 0.00011409344059038043, "loss": 1.1895, "step": 9608 }, { "epoch": 0.45529495380241647, "grad_norm": 0.6171875, "learning_rate": 0.00011407869648432105, "loss": 0.8841, "step": 9609 }, { "epoch": 0.45534233593935086, "grad_norm": 0.71484375, "learning_rate": 0.00011406395206601119, "loss": 0.8649, "step": 9610 }, { "epoch": 0.45538971807628525, "grad_norm": 0.421875, "learning_rate": 0.00011404920733577786, "loss": 0.3384, "step": 9611 }, { "epoch": 0.4554371002132196, "grad_norm": 0.671875, "learning_rate": 0.00011403446229394809, "loss": 0.8103, "step": 9612 }, { "epoch": 0.455484482350154, "grad_norm": 0.65234375, "learning_rate": 0.00011401971694084887, "loss": 1.1253, "step": 9613 }, { "epoch": 0.4555318644870884, "grad_norm": 0.6796875, "learning_rate": 0.00011400497127680727, "loss": 0.8887, "step": 9614 }, { "epoch": 0.45557924662402277, "grad_norm": 0.2578125, "learning_rate": 0.00011399022530215033, "loss": 0.0379, "step": 9615 }, { "epoch": 0.4556266287609571, "grad_norm": 0.07763671875, "learning_rate": 0.00011397547901720508, "loss": 0.0031, "step": 9616 }, { "epoch": 0.4556740108978915, "grad_norm": 0.369140625, "learning_rate": 0.00011396073242229859, "loss": 0.0252, "step": 9617 }, { "epoch": 0.4557213930348259, "grad_norm": 0.671875, "learning_rate": 0.00011394598551775794, "loss": 0.8843, "step": 9618 }, { "epoch": 0.45576877517176023, "grad_norm": 0.5, "learning_rate": 0.00011393123830391019, "loss": 0.6872, "step": 9619 }, { "epoch": 0.4558161573086946, "grad_norm": 0.166015625, "learning_rate": 0.0001139164907810824, "loss": 0.0144, "step": 9620 }, { "epoch": 0.455863539445629, "grad_norm": 0.66015625, "learning_rate": 0.00011390174294960165, "loss": 1.0954, "step": 9621 }, { "epoch": 0.45591092158256336, "grad_norm": 0.02392578125, "learning_rate": 0.00011388699480979507, "loss": 0.0009, "step": 9622 }, { "epoch": 0.45595830371949775, "grad_norm": 0.66796875, "learning_rate": 0.00011387224636198977, "loss": 0.9338, "step": 9623 }, { "epoch": 0.45600568585643214, "grad_norm": 0.734375, "learning_rate": 0.00011385749760651276, "loss": 0.8699, "step": 9624 }, { "epoch": 0.4560530679933665, "grad_norm": 0.515625, "learning_rate": 0.00011384274854369124, "loss": 1.0061, "step": 9625 }, { "epoch": 0.45610045013030087, "grad_norm": 0.609375, "learning_rate": 0.00011382799917385232, "loss": 0.9334, "step": 9626 }, { "epoch": 0.45614783226723526, "grad_norm": 0.7578125, "learning_rate": 0.0001138132494973231, "loss": 1.0398, "step": 9627 }, { "epoch": 0.4561952144041696, "grad_norm": 0.4921875, "learning_rate": 0.00011379849951443071, "loss": 0.8752, "step": 9628 }, { "epoch": 0.456242596541104, "grad_norm": 0.50390625, "learning_rate": 0.00011378374922550228, "loss": 0.0754, "step": 9629 }, { "epoch": 0.4562899786780384, "grad_norm": 0.314453125, "learning_rate": 0.00011376899863086501, "loss": 0.0124, "step": 9630 }, { "epoch": 0.4563373608149728, "grad_norm": 0.62109375, "learning_rate": 0.00011375424773084599, "loss": 0.8615, "step": 9631 }, { "epoch": 0.4563847429519071, "grad_norm": 0.58203125, "learning_rate": 0.0001137394965257724, "loss": 0.9877, "step": 9632 }, { "epoch": 0.4564321250888415, "grad_norm": 0.7578125, "learning_rate": 0.00011372474501597144, "loss": 0.7045, "step": 9633 }, { "epoch": 0.4564795072257759, "grad_norm": 0.1572265625, "learning_rate": 0.00011370999320177022, "loss": 0.0186, "step": 9634 }, { "epoch": 0.45652688936271024, "grad_norm": 0.2060546875, "learning_rate": 0.00011369524108349601, "loss": 0.1419, "step": 9635 }, { "epoch": 0.45657427149964463, "grad_norm": 0.75, "learning_rate": 0.00011368048866147589, "loss": 0.9008, "step": 9636 }, { "epoch": 0.45662165363657903, "grad_norm": 0.5703125, "learning_rate": 0.0001136657359360371, "loss": 0.913, "step": 9637 }, { "epoch": 0.45666903577351337, "grad_norm": 0.61328125, "learning_rate": 0.00011365098290750689, "loss": 1.543, "step": 9638 }, { "epoch": 0.45671641791044776, "grad_norm": 0.7109375, "learning_rate": 0.00011363622957621238, "loss": 0.655, "step": 9639 }, { "epoch": 0.45676380004738215, "grad_norm": 0.46484375, "learning_rate": 0.00011362147594248082, "loss": 0.0694, "step": 9640 }, { "epoch": 0.4568111821843165, "grad_norm": 0.57421875, "learning_rate": 0.00011360672200663946, "loss": 0.7387, "step": 9641 }, { "epoch": 0.4568585643212509, "grad_norm": 0.50390625, "learning_rate": 0.00011359196776901548, "loss": 0.4316, "step": 9642 }, { "epoch": 0.4569059464581853, "grad_norm": 0.40625, "learning_rate": 0.00011357721322993615, "loss": 0.5524, "step": 9643 }, { "epoch": 0.4569533285951196, "grad_norm": 0.71484375, "learning_rate": 0.00011356245838972868, "loss": 1.012, "step": 9644 }, { "epoch": 0.457000710732054, "grad_norm": 0.484375, "learning_rate": 0.0001135477032487203, "loss": 0.9261, "step": 9645 }, { "epoch": 0.4570480928689884, "grad_norm": 0.5625, "learning_rate": 0.00011353294780723836, "loss": 0.5779, "step": 9646 }, { "epoch": 0.4570954750059228, "grad_norm": 0.09375, "learning_rate": 0.00011351819206561, "loss": 0.0075, "step": 9647 }, { "epoch": 0.45714285714285713, "grad_norm": 0.5546875, "learning_rate": 0.00011350343602416254, "loss": 0.8601, "step": 9648 }, { "epoch": 0.4571902392797915, "grad_norm": 0.94921875, "learning_rate": 0.00011348867968322327, "loss": 0.982, "step": 9649 }, { "epoch": 0.4572376214167259, "grad_norm": 0.48828125, "learning_rate": 0.00011347392304311944, "loss": 0.8338, "step": 9650 }, { "epoch": 0.45728500355366025, "grad_norm": 0.169921875, "learning_rate": 0.00011345916610417837, "loss": 0.1226, "step": 9651 }, { "epoch": 0.45733238569059464, "grad_norm": 1.125, "learning_rate": 0.00011344440886672732, "loss": 0.7333, "step": 9652 }, { "epoch": 0.45737976782752904, "grad_norm": 0.279296875, "learning_rate": 0.00011342965133109361, "loss": 0.08, "step": 9653 }, { "epoch": 0.4574271499644634, "grad_norm": 0.734375, "learning_rate": 0.00011341489349760453, "loss": 1.1604, "step": 9654 }, { "epoch": 0.45747453210139777, "grad_norm": 0.53515625, "learning_rate": 0.00011340013536658738, "loss": 0.7503, "step": 9655 }, { "epoch": 0.45752191423833216, "grad_norm": 0.734375, "learning_rate": 0.0001133853769383695, "loss": 0.3503, "step": 9656 }, { "epoch": 0.4575692963752665, "grad_norm": 0.37109375, "learning_rate": 0.00011337061821327825, "loss": 0.1039, "step": 9657 }, { "epoch": 0.4576166785122009, "grad_norm": 0.5078125, "learning_rate": 0.00011335585919164092, "loss": 0.5785, "step": 9658 }, { "epoch": 0.4576640606491353, "grad_norm": 0.7265625, "learning_rate": 0.00011334109987378485, "loss": 1.3708, "step": 9659 }, { "epoch": 0.4577114427860697, "grad_norm": 0.85546875, "learning_rate": 0.00011332634026003741, "loss": 1.051, "step": 9660 }, { "epoch": 0.457758824923004, "grad_norm": 0.71875, "learning_rate": 0.00011331158035072593, "loss": 0.8012, "step": 9661 }, { "epoch": 0.4578062070599384, "grad_norm": 0.76953125, "learning_rate": 0.00011329682014617777, "loss": 0.8043, "step": 9662 }, { "epoch": 0.4578535891968728, "grad_norm": 0.1298828125, "learning_rate": 0.00011328205964672029, "loss": 0.0104, "step": 9663 }, { "epoch": 0.45790097133380714, "grad_norm": 0.212890625, "learning_rate": 0.00011326729885268088, "loss": 0.0549, "step": 9664 }, { "epoch": 0.45794835347074153, "grad_norm": 0.578125, "learning_rate": 0.00011325253776438695, "loss": 0.7109, "step": 9665 }, { "epoch": 0.4579957356076759, "grad_norm": 0.13671875, "learning_rate": 0.00011323777638216582, "loss": 0.0102, "step": 9666 }, { "epoch": 0.45804311774461026, "grad_norm": 0.267578125, "learning_rate": 0.0001132230147063449, "loss": 0.0063, "step": 9667 }, { "epoch": 0.45809049988154465, "grad_norm": 0.37109375, "learning_rate": 0.00011320825273725162, "loss": 0.0473, "step": 9668 }, { "epoch": 0.45813788201847905, "grad_norm": 0.64453125, "learning_rate": 0.00011319349047521337, "loss": 1.3497, "step": 9669 }, { "epoch": 0.4581852641554134, "grad_norm": 0.66015625, "learning_rate": 0.00011317872792055752, "loss": 1.1318, "step": 9670 }, { "epoch": 0.4582326462923478, "grad_norm": 0.578125, "learning_rate": 0.00011316396507361153, "loss": 1.4103, "step": 9671 }, { "epoch": 0.45828002842928217, "grad_norm": 0.158203125, "learning_rate": 0.00011314920193470284, "loss": 0.015, "step": 9672 }, { "epoch": 0.4583274105662165, "grad_norm": 0.70703125, "learning_rate": 0.00011313443850415884, "loss": 1.2142, "step": 9673 }, { "epoch": 0.4583747927031509, "grad_norm": 0.96484375, "learning_rate": 0.000113119674782307, "loss": 0.119, "step": 9674 }, { "epoch": 0.4584221748400853, "grad_norm": 0.703125, "learning_rate": 0.00011310491076947474, "loss": 0.6329, "step": 9675 }, { "epoch": 0.4584695569770197, "grad_norm": 0.71484375, "learning_rate": 0.00011309014646598953, "loss": 0.9227, "step": 9676 }, { "epoch": 0.458516939113954, "grad_norm": 0.001556396484375, "learning_rate": 0.00011307538187217879, "loss": 0.0001, "step": 9677 }, { "epoch": 0.4585643212508884, "grad_norm": 0.5234375, "learning_rate": 0.00011306061698837, "loss": 1.0653, "step": 9678 }, { "epoch": 0.4586117033878228, "grad_norm": 0.453125, "learning_rate": 0.00011304585181489065, "loss": 0.3294, "step": 9679 }, { "epoch": 0.45865908552475715, "grad_norm": 0.5390625, "learning_rate": 0.0001130310863520682, "loss": 0.9124, "step": 9680 }, { "epoch": 0.45870646766169154, "grad_norm": 0.83203125, "learning_rate": 0.00011301632060023015, "loss": 1.1995, "step": 9681 }, { "epoch": 0.45875384979862593, "grad_norm": 0.66015625, "learning_rate": 0.00011300155455970396, "loss": 1.0626, "step": 9682 }, { "epoch": 0.45880123193556027, "grad_norm": 0.9765625, "learning_rate": 0.00011298678823081714, "loss": 1.5004, "step": 9683 }, { "epoch": 0.45884861407249466, "grad_norm": 0.1572265625, "learning_rate": 0.0001129720216138972, "loss": 0.0103, "step": 9684 }, { "epoch": 0.45889599620942906, "grad_norm": 0.578125, "learning_rate": 0.00011295725470927163, "loss": 0.5508, "step": 9685 }, { "epoch": 0.4589433783463634, "grad_norm": 0.1865234375, "learning_rate": 0.00011294248751726795, "loss": 0.1358, "step": 9686 }, { "epoch": 0.4589907604832978, "grad_norm": 1.1015625, "learning_rate": 0.00011292772003821366, "loss": 0.8689, "step": 9687 }, { "epoch": 0.4590381426202322, "grad_norm": 0.703125, "learning_rate": 0.00011291295227243634, "loss": 1.5178, "step": 9688 }, { "epoch": 0.4590855247571666, "grad_norm": 0.796875, "learning_rate": 0.00011289818422026348, "loss": 0.5054, "step": 9689 }, { "epoch": 0.4591329068941009, "grad_norm": 0.296875, "learning_rate": 0.00011288341588202266, "loss": 0.0367, "step": 9690 }, { "epoch": 0.4591802890310353, "grad_norm": 0.5703125, "learning_rate": 0.00011286864725804136, "loss": 0.5464, "step": 9691 }, { "epoch": 0.4592276711679697, "grad_norm": 0.5859375, "learning_rate": 0.0001128538783486472, "loss": 1.1001, "step": 9692 }, { "epoch": 0.45927505330490404, "grad_norm": 0.1982421875, "learning_rate": 0.00011283910915416771, "loss": 0.1493, "step": 9693 }, { "epoch": 0.45932243544183843, "grad_norm": 0.53515625, "learning_rate": 0.00011282433967493042, "loss": 0.6702, "step": 9694 }, { "epoch": 0.4593698175787728, "grad_norm": 0.08251953125, "learning_rate": 0.00011280956991126297, "loss": 0.0083, "step": 9695 }, { "epoch": 0.45941719971570716, "grad_norm": 0.2314453125, "learning_rate": 0.00011279479986349288, "loss": 0.1184, "step": 9696 }, { "epoch": 0.45946458185264155, "grad_norm": 0.054931640625, "learning_rate": 0.00011278002953194777, "loss": 0.003, "step": 9697 }, { "epoch": 0.45951196398957594, "grad_norm": 0.259765625, "learning_rate": 0.00011276525891695521, "loss": 0.1499, "step": 9698 }, { "epoch": 0.4595593461265103, "grad_norm": 0.76171875, "learning_rate": 0.00011275048801884284, "loss": 0.8919, "step": 9699 }, { "epoch": 0.4596067282634447, "grad_norm": 0.859375, "learning_rate": 0.0001127357168379382, "loss": 2.1475, "step": 9700 }, { "epoch": 0.45965411040037907, "grad_norm": 0.1220703125, "learning_rate": 0.0001127209453745689, "loss": 0.0036, "step": 9701 }, { "epoch": 0.4597014925373134, "grad_norm": 0.00628662109375, "learning_rate": 0.0001127061736290626, "loss": 0.0002, "step": 9702 }, { "epoch": 0.4597488746742478, "grad_norm": 0.107421875, "learning_rate": 0.00011269140160174694, "loss": 0.0031, "step": 9703 }, { "epoch": 0.4597962568111822, "grad_norm": 0.50390625, "learning_rate": 0.00011267662929294946, "loss": 0.5549, "step": 9704 }, { "epoch": 0.4598436389481166, "grad_norm": 0.6171875, "learning_rate": 0.00011266185670299785, "loss": 0.8531, "step": 9705 }, { "epoch": 0.4598910210850509, "grad_norm": 0.1279296875, "learning_rate": 0.00011264708383221978, "loss": 0.0104, "step": 9706 }, { "epoch": 0.4599384032219853, "grad_norm": 0.734375, "learning_rate": 0.00011263231068094285, "loss": 1.5082, "step": 9707 }, { "epoch": 0.4599857853589197, "grad_norm": 0.625, "learning_rate": 0.00011261753724949471, "loss": 0.7843, "step": 9708 }, { "epoch": 0.46003316749585405, "grad_norm": 0.66796875, "learning_rate": 0.00011260276353820303, "loss": 0.9899, "step": 9709 }, { "epoch": 0.46008054963278844, "grad_norm": 0.51953125, "learning_rate": 0.00011258798954739547, "loss": 0.4392, "step": 9710 }, { "epoch": 0.46012793176972283, "grad_norm": 0.6171875, "learning_rate": 0.00011257321527739974, "loss": 0.7733, "step": 9711 }, { "epoch": 0.46017531390665717, "grad_norm": 0.62890625, "learning_rate": 0.00011255844072854347, "loss": 0.9897, "step": 9712 }, { "epoch": 0.46022269604359156, "grad_norm": 0.7890625, "learning_rate": 0.00011254366590115435, "loss": 0.3876, "step": 9713 }, { "epoch": 0.46027007818052595, "grad_norm": 0.71484375, "learning_rate": 0.00011252889079556011, "loss": 1.0065, "step": 9714 }, { "epoch": 0.4603174603174603, "grad_norm": 0.470703125, "learning_rate": 0.00011251411541208843, "loss": 0.8447, "step": 9715 }, { "epoch": 0.4603648424543947, "grad_norm": 0.70703125, "learning_rate": 0.00011249933975106697, "loss": 0.7846, "step": 9716 }, { "epoch": 0.4604122245913291, "grad_norm": 0.78125, "learning_rate": 0.00011248456381282344, "loss": 1.0592, "step": 9717 }, { "epoch": 0.46045960672826347, "grad_norm": 0.625, "learning_rate": 0.00011246978759768563, "loss": 1.2718, "step": 9718 }, { "epoch": 0.4605069888651978, "grad_norm": 0.796875, "learning_rate": 0.0001124550111059812, "loss": 0.0379, "step": 9719 }, { "epoch": 0.4605543710021322, "grad_norm": 0.298828125, "learning_rate": 0.00011244023433803788, "loss": 0.1691, "step": 9720 }, { "epoch": 0.4606017531390666, "grad_norm": 0.357421875, "learning_rate": 0.00011242545729418342, "loss": 0.0345, "step": 9721 }, { "epoch": 0.46064913527600093, "grad_norm": 0.65234375, "learning_rate": 0.00011241067997474557, "loss": 1.0231, "step": 9722 }, { "epoch": 0.4606965174129353, "grad_norm": 0.69140625, "learning_rate": 0.00011239590238005204, "loss": 1.3441, "step": 9723 }, { "epoch": 0.4607438995498697, "grad_norm": 0.67578125, "learning_rate": 0.00011238112451043059, "loss": 1.337, "step": 9724 }, { "epoch": 0.46079128168680406, "grad_norm": 0.033203125, "learning_rate": 0.00011236634636620899, "loss": 0.0008, "step": 9725 }, { "epoch": 0.46083866382373845, "grad_norm": 0.515625, "learning_rate": 0.00011235156794771502, "loss": 0.7237, "step": 9726 }, { "epoch": 0.46088604596067284, "grad_norm": 0.10107421875, "learning_rate": 0.0001123367892552764, "loss": 0.0114, "step": 9727 }, { "epoch": 0.4609334280976072, "grad_norm": 0.98046875, "learning_rate": 0.00011232201028922093, "loss": 1.2869, "step": 9728 }, { "epoch": 0.46098081023454157, "grad_norm": 0.1083984375, "learning_rate": 0.00011230723104987644, "loss": 0.0132, "step": 9729 }, { "epoch": 0.46102819237147596, "grad_norm": 0.81640625, "learning_rate": 0.00011229245153757067, "loss": 0.8768, "step": 9730 }, { "epoch": 0.4610755745084103, "grad_norm": 1.0546875, "learning_rate": 0.00011227767175263138, "loss": 2.0491, "step": 9731 }, { "epoch": 0.4611229566453447, "grad_norm": 0.69921875, "learning_rate": 0.00011226289169538642, "loss": 0.728, "step": 9732 }, { "epoch": 0.4611703387822791, "grad_norm": 0.033447265625, "learning_rate": 0.00011224811136616358, "loss": 0.0014, "step": 9733 }, { "epoch": 0.4612177209192135, "grad_norm": 0.69921875, "learning_rate": 0.00011223333076529071, "loss": 1.6146, "step": 9734 }, { "epoch": 0.4612651030561478, "grad_norm": 0.33984375, "learning_rate": 0.00011221854989309555, "loss": 0.0202, "step": 9735 }, { "epoch": 0.4613124851930822, "grad_norm": 0.65234375, "learning_rate": 0.00011220376874990599, "loss": 0.8589, "step": 9736 }, { "epoch": 0.4613598673300166, "grad_norm": 0.65234375, "learning_rate": 0.00011218898733604985, "loss": 1.0107, "step": 9737 }, { "epoch": 0.46140724946695094, "grad_norm": 0.06005859375, "learning_rate": 0.00011217420565185495, "loss": 0.004, "step": 9738 }, { "epoch": 0.46145463160388533, "grad_norm": 0.9140625, "learning_rate": 0.00011215942369764912, "loss": 0.7273, "step": 9739 }, { "epoch": 0.4615020137408197, "grad_norm": 0.02587890625, "learning_rate": 0.00011214464147376022, "loss": 0.001, "step": 9740 }, { "epoch": 0.46154939587775407, "grad_norm": 0.65234375, "learning_rate": 0.00011212985898051613, "loss": 0.9893, "step": 9741 }, { "epoch": 0.46159677801468846, "grad_norm": 0.66015625, "learning_rate": 0.00011211507621824467, "loss": 0.6534, "step": 9742 }, { "epoch": 0.46164416015162285, "grad_norm": 0.73046875, "learning_rate": 0.00011210029318727374, "loss": 1.0455, "step": 9743 }, { "epoch": 0.4616915422885572, "grad_norm": 0.5859375, "learning_rate": 0.00011208550988793116, "loss": 1.008, "step": 9744 }, { "epoch": 0.4617389244254916, "grad_norm": 0.05908203125, "learning_rate": 0.00011207072632054489, "loss": 0.0047, "step": 9745 }, { "epoch": 0.461786306562426, "grad_norm": 0.248046875, "learning_rate": 0.00011205594248544275, "loss": 0.013, "step": 9746 }, { "epoch": 0.46183368869936037, "grad_norm": 0.26171875, "learning_rate": 0.00011204115838295263, "loss": 0.021, "step": 9747 }, { "epoch": 0.4618810708362947, "grad_norm": 0.6640625, "learning_rate": 0.00011202637401340246, "loss": 0.5427, "step": 9748 }, { "epoch": 0.4619284529732291, "grad_norm": 0.6796875, "learning_rate": 0.00011201158937712013, "loss": 0.1054, "step": 9749 }, { "epoch": 0.4619758351101635, "grad_norm": 0.2255859375, "learning_rate": 0.00011199680447443352, "loss": 0.0799, "step": 9750 }, { "epoch": 0.46202321724709783, "grad_norm": 0.65234375, "learning_rate": 0.00011198201930567059, "loss": 0.6151, "step": 9751 }, { "epoch": 0.4620705993840322, "grad_norm": 0.73828125, "learning_rate": 0.00011196723387115922, "loss": 1.362, "step": 9752 }, { "epoch": 0.4621179815209666, "grad_norm": 0.87109375, "learning_rate": 0.00011195244817122736, "loss": 0.6715, "step": 9753 }, { "epoch": 0.46216536365790095, "grad_norm": 0.69140625, "learning_rate": 0.00011193766220620291, "loss": 1.1283, "step": 9754 }, { "epoch": 0.46221274579483534, "grad_norm": 0.83203125, "learning_rate": 0.00011192287597641385, "loss": 1.0834, "step": 9755 }, { "epoch": 0.46226012793176974, "grad_norm": 0.3046875, "learning_rate": 0.00011190808948218807, "loss": 0.0404, "step": 9756 }, { "epoch": 0.4623075100687041, "grad_norm": 0.84375, "learning_rate": 0.00011189330272385359, "loss": 0.5915, "step": 9757 }, { "epoch": 0.46235489220563847, "grad_norm": 0.49609375, "learning_rate": 0.00011187851570173831, "loss": 0.1652, "step": 9758 }, { "epoch": 0.46240227434257286, "grad_norm": 0.6953125, "learning_rate": 0.00011186372841617019, "loss": 0.0703, "step": 9759 }, { "epoch": 0.4624496564795072, "grad_norm": 0.73046875, "learning_rate": 0.00011184894086747722, "loss": 1.0824, "step": 9760 }, { "epoch": 0.4624970386164416, "grad_norm": 0.59765625, "learning_rate": 0.00011183415305598737, "loss": 0.9035, "step": 9761 }, { "epoch": 0.462544420753376, "grad_norm": 0.6796875, "learning_rate": 0.0001118193649820286, "loss": 1.2383, "step": 9762 }, { "epoch": 0.4625918028903104, "grad_norm": 1.0703125, "learning_rate": 0.0001118045766459289, "loss": 1.1803, "step": 9763 }, { "epoch": 0.4626391850272447, "grad_norm": 0.71484375, "learning_rate": 0.00011178978804801627, "loss": 0.7214, "step": 9764 }, { "epoch": 0.4626865671641791, "grad_norm": 0.1591796875, "learning_rate": 0.00011177499918861869, "loss": 0.0074, "step": 9765 }, { "epoch": 0.4627339493011135, "grad_norm": 0.6171875, "learning_rate": 0.00011176021006806418, "loss": 0.9706, "step": 9766 }, { "epoch": 0.46278133143804784, "grad_norm": 0.8203125, "learning_rate": 0.00011174542068668073, "loss": 1.4274, "step": 9767 }, { "epoch": 0.46282871357498223, "grad_norm": 0.828125, "learning_rate": 0.00011173063104479638, "loss": 1.291, "step": 9768 }, { "epoch": 0.4628760957119166, "grad_norm": 0.57421875, "learning_rate": 0.00011171584114273912, "loss": 0.6011, "step": 9769 }, { "epoch": 0.46292347784885096, "grad_norm": 0.70703125, "learning_rate": 0.00011170105098083696, "loss": 1.3595, "step": 9770 }, { "epoch": 0.46297085998578535, "grad_norm": 0.59765625, "learning_rate": 0.00011168626055941798, "loss": 0.6471, "step": 9771 }, { "epoch": 0.46301824212271975, "grad_norm": 0.69140625, "learning_rate": 0.00011167146987881019, "loss": 0.9433, "step": 9772 }, { "epoch": 0.4630656242596541, "grad_norm": 0.2265625, "learning_rate": 0.0001116566789393416, "loss": 0.0392, "step": 9773 }, { "epoch": 0.4631130063965885, "grad_norm": 0.1904296875, "learning_rate": 0.0001116418877413403, "loss": 0.1455, "step": 9774 }, { "epoch": 0.46316038853352287, "grad_norm": 0.515625, "learning_rate": 0.00011162709628513435, "loss": 0.5217, "step": 9775 }, { "epoch": 0.46320777067045726, "grad_norm": 0.89453125, "learning_rate": 0.00011161230457105176, "loss": 0.6512, "step": 9776 }, { "epoch": 0.4632551528073916, "grad_norm": 0.64453125, "learning_rate": 0.00011159751259942066, "loss": 1.0447, "step": 9777 }, { "epoch": 0.463302534944326, "grad_norm": 0.625, "learning_rate": 0.00011158272037056905, "loss": 0.8032, "step": 9778 }, { "epoch": 0.4633499170812604, "grad_norm": 0.58203125, "learning_rate": 0.00011156792788482504, "loss": 1.1034, "step": 9779 }, { "epoch": 0.4633972992181947, "grad_norm": 0.63671875, "learning_rate": 0.00011155313514251673, "loss": 1.1353, "step": 9780 }, { "epoch": 0.4634446813551291, "grad_norm": 0.46875, "learning_rate": 0.00011153834214397219, "loss": 0.8526, "step": 9781 }, { "epoch": 0.4634920634920635, "grad_norm": 0.62109375, "learning_rate": 0.00011152354888951948, "loss": 0.4919, "step": 9782 }, { "epoch": 0.46353944562899785, "grad_norm": 0.7265625, "learning_rate": 0.00011150875537948677, "loss": 1.0076, "step": 9783 }, { "epoch": 0.46358682776593224, "grad_norm": 0.68359375, "learning_rate": 0.00011149396161420211, "loss": 1.3346, "step": 9784 }, { "epoch": 0.46363420990286663, "grad_norm": 0.734375, "learning_rate": 0.00011147916759399362, "loss": 1.3384, "step": 9785 }, { "epoch": 0.46368159203980097, "grad_norm": 0.9296875, "learning_rate": 0.00011146437331918939, "loss": 1.0324, "step": 9786 }, { "epoch": 0.46372897417673536, "grad_norm": 0.6953125, "learning_rate": 0.00011144957879011764, "loss": 0.759, "step": 9787 }, { "epoch": 0.46377635631366976, "grad_norm": 0.7421875, "learning_rate": 0.00011143478400710636, "loss": 1.0396, "step": 9788 }, { "epoch": 0.4638237384506041, "grad_norm": 0.1943359375, "learning_rate": 0.00011141998897048378, "loss": 0.1246, "step": 9789 }, { "epoch": 0.4638711205875385, "grad_norm": 0.6953125, "learning_rate": 0.000111405193680578, "loss": 0.8911, "step": 9790 }, { "epoch": 0.4639185027244729, "grad_norm": 0.7109375, "learning_rate": 0.00011139039813771719, "loss": 1.1938, "step": 9791 }, { "epoch": 0.4639658848614073, "grad_norm": 0.8828125, "learning_rate": 0.0001113756023422295, "loss": 0.6653, "step": 9792 }, { "epoch": 0.4640132669983416, "grad_norm": 0.2353515625, "learning_rate": 0.00011136080629444303, "loss": 0.0119, "step": 9793 }, { "epoch": 0.464060649135276, "grad_norm": 0.7421875, "learning_rate": 0.00011134600999468596, "loss": 0.8786, "step": 9794 }, { "epoch": 0.4641080312722104, "grad_norm": 0.77734375, "learning_rate": 0.00011133121344328652, "loss": 0.7408, "step": 9795 }, { "epoch": 0.46415541340914473, "grad_norm": 0.1591796875, "learning_rate": 0.00011131641664057282, "loss": 0.0056, "step": 9796 }, { "epoch": 0.4642027955460791, "grad_norm": 0.765625, "learning_rate": 0.00011130161958687304, "loss": 1.5517, "step": 9797 }, { "epoch": 0.4642501776830135, "grad_norm": 0.671875, "learning_rate": 0.00011128682228251538, "loss": 1.2288, "step": 9798 }, { "epoch": 0.46429755981994786, "grad_norm": 0.65625, "learning_rate": 0.00011127202472782802, "loss": 1.3046, "step": 9799 }, { "epoch": 0.46434494195688225, "grad_norm": 0.828125, "learning_rate": 0.00011125722692313918, "loss": 0.8764, "step": 9800 }, { "epoch": 0.46439232409381664, "grad_norm": 0.61328125, "learning_rate": 0.00011124242886877703, "loss": 0.7067, "step": 9801 }, { "epoch": 0.464439706230751, "grad_norm": 0.208984375, "learning_rate": 0.00011122763056506975, "loss": 0.0462, "step": 9802 }, { "epoch": 0.4644870883676854, "grad_norm": 0.83203125, "learning_rate": 0.00011121283201234562, "loss": 1.4981, "step": 9803 }, { "epoch": 0.46453447050461977, "grad_norm": 0.6875, "learning_rate": 0.00011119803321093281, "loss": 1.1497, "step": 9804 }, { "epoch": 0.46458185264155416, "grad_norm": 0.439453125, "learning_rate": 0.00011118323416115955, "loss": 0.4918, "step": 9805 }, { "epoch": 0.4646292347784885, "grad_norm": 0.5703125, "learning_rate": 0.00011116843486335407, "loss": 0.8862, "step": 9806 }, { "epoch": 0.4646766169154229, "grad_norm": 0.69140625, "learning_rate": 0.0001111536353178446, "loss": 1.2946, "step": 9807 }, { "epoch": 0.4647239990523573, "grad_norm": 0.83203125, "learning_rate": 0.00011113883552495938, "loss": 1.6293, "step": 9808 }, { "epoch": 0.4647713811892916, "grad_norm": 1.125, "learning_rate": 0.00011112403548502664, "loss": 0.7951, "step": 9809 }, { "epoch": 0.464818763326226, "grad_norm": 0.625, "learning_rate": 0.00011110923519837466, "loss": 0.8851, "step": 9810 }, { "epoch": 0.4648661454631604, "grad_norm": 0.09033203125, "learning_rate": 0.00011109443466533167, "loss": 0.0103, "step": 9811 }, { "epoch": 0.46491352760009474, "grad_norm": 1.40625, "learning_rate": 0.00011107963388622594, "loss": 0.8671, "step": 9812 }, { "epoch": 0.46496090973702914, "grad_norm": 0.6875, "learning_rate": 0.0001110648328613857, "loss": 0.6951, "step": 9813 }, { "epoch": 0.46500829187396353, "grad_norm": 0.671875, "learning_rate": 0.0001110500315911393, "loss": 1.1671, "step": 9814 }, { "epoch": 0.46505567401089787, "grad_norm": 0.65625, "learning_rate": 0.00011103523007581494, "loss": 1.1655, "step": 9815 }, { "epoch": 0.46510305614783226, "grad_norm": 0.30078125, "learning_rate": 0.00011102042831574095, "loss": 0.0534, "step": 9816 }, { "epoch": 0.46515043828476665, "grad_norm": 0.55078125, "learning_rate": 0.00011100562631124558, "loss": 1.0242, "step": 9817 }, { "epoch": 0.465197820421701, "grad_norm": 0.60546875, "learning_rate": 0.00011099082406265715, "loss": 0.5828, "step": 9818 }, { "epoch": 0.4652452025586354, "grad_norm": 0.7265625, "learning_rate": 0.00011097602157030394, "loss": 1.1612, "step": 9819 }, { "epoch": 0.4652925846955698, "grad_norm": 0.65234375, "learning_rate": 0.00011096121883451427, "loss": 0.5682, "step": 9820 }, { "epoch": 0.46533996683250417, "grad_norm": 0.64453125, "learning_rate": 0.00011094641585561645, "loss": 0.653, "step": 9821 }, { "epoch": 0.4653873489694385, "grad_norm": 0.63671875, "learning_rate": 0.00011093161263393876, "loss": 1.2252, "step": 9822 }, { "epoch": 0.4654347311063729, "grad_norm": 0.027099609375, "learning_rate": 0.00011091680916980957, "loss": 0.0018, "step": 9823 }, { "epoch": 0.4654821132433073, "grad_norm": 0.62890625, "learning_rate": 0.00011090200546355718, "loss": 0.9737, "step": 9824 }, { "epoch": 0.46552949538024163, "grad_norm": 0.95703125, "learning_rate": 0.00011088720151550991, "loss": 0.8436, "step": 9825 }, { "epoch": 0.465576877517176, "grad_norm": 0.072265625, "learning_rate": 0.00011087239732599612, "loss": 0.0059, "step": 9826 }, { "epoch": 0.4656242596541104, "grad_norm": 0.1328125, "learning_rate": 0.00011085759289534412, "loss": 0.0125, "step": 9827 }, { "epoch": 0.46567164179104475, "grad_norm": 0.6875, "learning_rate": 0.00011084278822388228, "loss": 1.3405, "step": 9828 }, { "epoch": 0.46571902392797915, "grad_norm": 0.7265625, "learning_rate": 0.00011082798331193898, "loss": 1.1513, "step": 9829 }, { "epoch": 0.46576640606491354, "grad_norm": 0.7890625, "learning_rate": 0.0001108131781598425, "loss": 1.184, "step": 9830 }, { "epoch": 0.4658137882018479, "grad_norm": 0.56640625, "learning_rate": 0.00011079837276792125, "loss": 0.7758, "step": 9831 }, { "epoch": 0.46586117033878227, "grad_norm": 0.1201171875, "learning_rate": 0.00011078356713650361, "loss": 0.0137, "step": 9832 }, { "epoch": 0.46590855247571666, "grad_norm": 0.7890625, "learning_rate": 0.00011076876126591796, "loss": 1.1446, "step": 9833 }, { "epoch": 0.46595593461265106, "grad_norm": 0.240234375, "learning_rate": 0.00011075395515649261, "loss": 0.0185, "step": 9834 }, { "epoch": 0.4660033167495854, "grad_norm": 0.7890625, "learning_rate": 0.000110739148808556, "loss": 0.614, "step": 9835 }, { "epoch": 0.4660506988865198, "grad_norm": 0.69140625, "learning_rate": 0.00011072434222243651, "loss": 0.7432, "step": 9836 }, { "epoch": 0.4660980810234542, "grad_norm": 0.63671875, "learning_rate": 0.00011070953539846254, "loss": 0.9163, "step": 9837 }, { "epoch": 0.4661454631603885, "grad_norm": 0.66015625, "learning_rate": 0.00011069472833696249, "loss": 0.8119, "step": 9838 }, { "epoch": 0.4661928452973229, "grad_norm": 0.2890625, "learning_rate": 0.00011067992103826474, "loss": 0.0054, "step": 9839 }, { "epoch": 0.4662402274342573, "grad_norm": 0.53515625, "learning_rate": 0.00011066511350269773, "loss": 0.6951, "step": 9840 }, { "epoch": 0.46628760957119164, "grad_norm": 0.609375, "learning_rate": 0.00011065030573058987, "loss": 0.88, "step": 9841 }, { "epoch": 0.46633499170812603, "grad_norm": 1.03125, "learning_rate": 0.00011063549772226955, "loss": 1.2468, "step": 9842 }, { "epoch": 0.4663823738450604, "grad_norm": 0.2216796875, "learning_rate": 0.00011062068947806521, "loss": 0.1288, "step": 9843 }, { "epoch": 0.46642975598199476, "grad_norm": 0.6328125, "learning_rate": 0.00011060588099830533, "loss": 1.4737, "step": 9844 }, { "epoch": 0.46647713811892916, "grad_norm": 0.67578125, "learning_rate": 0.00011059107228331829, "loss": 1.4271, "step": 9845 }, { "epoch": 0.46652452025586355, "grad_norm": 0.5078125, "learning_rate": 0.00011057626333343251, "loss": 0.6395, "step": 9846 }, { "epoch": 0.4665719023927979, "grad_norm": 0.58984375, "learning_rate": 0.00011056145414897654, "loss": 0.7846, "step": 9847 }, { "epoch": 0.4666192845297323, "grad_norm": 0.76171875, "learning_rate": 0.00011054664473027871, "loss": 0.8299, "step": 9848 }, { "epoch": 0.4666666666666667, "grad_norm": 0.578125, "learning_rate": 0.00011053183507766758, "loss": 0.9766, "step": 9849 }, { "epoch": 0.46671404880360107, "grad_norm": 0.1708984375, "learning_rate": 0.00011051702519147153, "loss": 0.1263, "step": 9850 }, { "epoch": 0.4667614309405354, "grad_norm": 0.15234375, "learning_rate": 0.00011050221507201908, "loss": 0.0947, "step": 9851 }, { "epoch": 0.4668088130774698, "grad_norm": 0.453125, "learning_rate": 0.00011048740471963868, "loss": 0.1709, "step": 9852 }, { "epoch": 0.4668561952144042, "grad_norm": 0.56640625, "learning_rate": 0.00011047259413465882, "loss": 0.7236, "step": 9853 }, { "epoch": 0.46690357735133853, "grad_norm": 0.6796875, "learning_rate": 0.00011045778331740797, "loss": 1.0616, "step": 9854 }, { "epoch": 0.4669509594882729, "grad_norm": 0.63671875, "learning_rate": 0.00011044297226821463, "loss": 1.0608, "step": 9855 }, { "epoch": 0.4669983416252073, "grad_norm": 0.65234375, "learning_rate": 0.00011042816098740732, "loss": 0.7149, "step": 9856 }, { "epoch": 0.46704572376214165, "grad_norm": 0.69921875, "learning_rate": 0.00011041334947531445, "loss": 1.1244, "step": 9857 }, { "epoch": 0.46709310589907604, "grad_norm": 0.25, "learning_rate": 0.00011039853773226461, "loss": 0.0236, "step": 9858 }, { "epoch": 0.46714048803601044, "grad_norm": 0.6953125, "learning_rate": 0.00011038372575858625, "loss": 0.9527, "step": 9859 }, { "epoch": 0.4671878701729448, "grad_norm": 0.80859375, "learning_rate": 0.00011036891355460795, "loss": 0.8516, "step": 9860 }, { "epoch": 0.46723525230987917, "grad_norm": 0.6875, "learning_rate": 0.00011035410112065819, "loss": 0.7951, "step": 9861 }, { "epoch": 0.46728263444681356, "grad_norm": 0.62109375, "learning_rate": 0.00011033928845706545, "loss": 0.8273, "step": 9862 }, { "epoch": 0.46733001658374795, "grad_norm": 0.69921875, "learning_rate": 0.00011032447556415838, "loss": 1.5249, "step": 9863 }, { "epoch": 0.4673773987206823, "grad_norm": 0.765625, "learning_rate": 0.0001103096624422654, "loss": 1.0421, "step": 9864 }, { "epoch": 0.4674247808576167, "grad_norm": 0.59375, "learning_rate": 0.00011029484909171508, "loss": 1.0365, "step": 9865 }, { "epoch": 0.4674721629945511, "grad_norm": 0.97265625, "learning_rate": 0.00011028003551283597, "loss": 1.1412, "step": 9866 }, { "epoch": 0.4675195451314854, "grad_norm": 0.90625, "learning_rate": 0.00011026522170595663, "loss": 1.4166, "step": 9867 }, { "epoch": 0.4675669272684198, "grad_norm": 0.61328125, "learning_rate": 0.00011025040767140562, "loss": 0.9339, "step": 9868 }, { "epoch": 0.4676143094053542, "grad_norm": 0.2734375, "learning_rate": 0.00011023559340951146, "loss": 0.0557, "step": 9869 }, { "epoch": 0.46766169154228854, "grad_norm": 0.703125, "learning_rate": 0.00011022077892060274, "loss": 0.7653, "step": 9870 }, { "epoch": 0.46770907367922293, "grad_norm": 0.6015625, "learning_rate": 0.00011020596420500807, "loss": 0.6284, "step": 9871 }, { "epoch": 0.4677564558161573, "grad_norm": 0.59765625, "learning_rate": 0.00011019114926305597, "loss": 0.6233, "step": 9872 }, { "epoch": 0.46780383795309166, "grad_norm": 0.55859375, "learning_rate": 0.00011017633409507502, "loss": 0.7435, "step": 9873 }, { "epoch": 0.46785122009002605, "grad_norm": 0.66015625, "learning_rate": 0.0001101615187013938, "loss": 1.2383, "step": 9874 }, { "epoch": 0.46789860222696045, "grad_norm": 0.68359375, "learning_rate": 0.00011014670308234096, "loss": 1.1249, "step": 9875 }, { "epoch": 0.4679459843638948, "grad_norm": 0.7109375, "learning_rate": 0.00011013188723824504, "loss": 0.1879, "step": 9876 }, { "epoch": 0.4679933665008292, "grad_norm": 0.6875, "learning_rate": 0.00011011707116943463, "loss": 1.1596, "step": 9877 }, { "epoch": 0.46804074863776357, "grad_norm": 0.5390625, "learning_rate": 0.00011010225487623837, "loss": 0.6584, "step": 9878 }, { "epoch": 0.46808813077469796, "grad_norm": 0.69140625, "learning_rate": 0.0001100874383589849, "loss": 1.255, "step": 9879 }, { "epoch": 0.4681355129116323, "grad_norm": 0.72265625, "learning_rate": 0.00011007262161800276, "loss": 0.9886, "step": 9880 }, { "epoch": 0.4681828950485667, "grad_norm": 0.0458984375, "learning_rate": 0.00011005780465362057, "loss": 0.0037, "step": 9881 }, { "epoch": 0.4682302771855011, "grad_norm": 0.6328125, "learning_rate": 0.00011004298746616701, "loss": 1.0849, "step": 9882 }, { "epoch": 0.4682776593224354, "grad_norm": 0.61328125, "learning_rate": 0.0001100281700559707, "loss": 0.7927, "step": 9883 }, { "epoch": 0.4683250414593698, "grad_norm": 0.51953125, "learning_rate": 0.00011001335242336023, "loss": 0.0405, "step": 9884 }, { "epoch": 0.4683724235963042, "grad_norm": 0.62109375, "learning_rate": 0.00010999853456866429, "loss": 0.0701, "step": 9885 }, { "epoch": 0.46841980573323855, "grad_norm": 0.625, "learning_rate": 0.0001099837164922115, "loss": 1.1635, "step": 9886 }, { "epoch": 0.46846718787017294, "grad_norm": 0.57421875, "learning_rate": 0.00010996889819433053, "loss": 1.3223, "step": 9887 }, { "epoch": 0.46851457000710733, "grad_norm": 0.53515625, "learning_rate": 0.00010995407967535, "loss": 0.6921, "step": 9888 }, { "epoch": 0.46856195214404167, "grad_norm": 0.248046875, "learning_rate": 0.00010993926093559859, "loss": 0.0444, "step": 9889 }, { "epoch": 0.46860933428097606, "grad_norm": 0.1455078125, "learning_rate": 0.00010992444197540495, "loss": 0.0119, "step": 9890 }, { "epoch": 0.46865671641791046, "grad_norm": 0.9140625, "learning_rate": 0.00010990962279509775, "loss": 0.0671, "step": 9891 }, { "epoch": 0.46870409855484485, "grad_norm": 0.65625, "learning_rate": 0.00010989480339500569, "loss": 1.021, "step": 9892 }, { "epoch": 0.4687514806917792, "grad_norm": 0.57421875, "learning_rate": 0.0001098799837754574, "loss": 0.3846, "step": 9893 }, { "epoch": 0.4687988628287136, "grad_norm": 0.56640625, "learning_rate": 0.00010986516393678164, "loss": 1.0384, "step": 9894 }, { "epoch": 0.468846244965648, "grad_norm": 0.76171875, "learning_rate": 0.00010985034387930704, "loss": 0.3264, "step": 9895 }, { "epoch": 0.4688936271025823, "grad_norm": 0.326171875, "learning_rate": 0.0001098355236033623, "loss": 0.0473, "step": 9896 }, { "epoch": 0.4689410092395167, "grad_norm": 0.59765625, "learning_rate": 0.0001098207031092761, "loss": 1.0076, "step": 9897 }, { "epoch": 0.4689883913764511, "grad_norm": 0.030517578125, "learning_rate": 0.0001098058823973772, "loss": 0.0017, "step": 9898 }, { "epoch": 0.46903577351338543, "grad_norm": 0.7890625, "learning_rate": 0.00010979106146799425, "loss": 0.8646, "step": 9899 }, { "epoch": 0.4690831556503198, "grad_norm": 0.71484375, "learning_rate": 0.00010977624032145597, "loss": 1.15, "step": 9900 }, { "epoch": 0.4691305377872542, "grad_norm": 0.59375, "learning_rate": 0.00010976141895809111, "loss": 0.8969, "step": 9901 }, { "epoch": 0.46917791992418856, "grad_norm": 1.375, "learning_rate": 0.00010974659737822842, "loss": 0.3929, "step": 9902 }, { "epoch": 0.46922530206112295, "grad_norm": 0.7578125, "learning_rate": 0.00010973177558219651, "loss": 1.0758, "step": 9903 }, { "epoch": 0.46927268419805734, "grad_norm": 0.70703125, "learning_rate": 0.00010971695357032423, "loss": 0.696, "step": 9904 }, { "epoch": 0.4693200663349917, "grad_norm": 0.1669921875, "learning_rate": 0.00010970213134294023, "loss": 0.1151, "step": 9905 }, { "epoch": 0.4693674484719261, "grad_norm": 0.71484375, "learning_rate": 0.00010968730890037333, "loss": 0.7491, "step": 9906 }, { "epoch": 0.46941483060886047, "grad_norm": 0.55859375, "learning_rate": 0.00010967248624295221, "loss": 0.4829, "step": 9907 }, { "epoch": 0.46946221274579486, "grad_norm": 0.71484375, "learning_rate": 0.00010965766337100567, "loss": 1.2206, "step": 9908 }, { "epoch": 0.4695095948827292, "grad_norm": 0.5546875, "learning_rate": 0.00010964284028486245, "loss": 0.8439, "step": 9909 }, { "epoch": 0.4695569770196636, "grad_norm": 1.5625, "learning_rate": 0.00010962801698485128, "loss": 0.3366, "step": 9910 }, { "epoch": 0.469604359156598, "grad_norm": 0.00140380859375, "learning_rate": 0.00010961319347130095, "loss": 0.0001, "step": 9911 }, { "epoch": 0.4696517412935323, "grad_norm": 0.51171875, "learning_rate": 0.00010959836974454023, "loss": 0.5039, "step": 9912 }, { "epoch": 0.4696991234304667, "grad_norm": 0.62890625, "learning_rate": 0.00010958354580489791, "loss": 0.8668, "step": 9913 }, { "epoch": 0.4697465055674011, "grad_norm": 0.0888671875, "learning_rate": 0.00010956872165270273, "loss": 0.0019, "step": 9914 }, { "epoch": 0.46979388770433544, "grad_norm": 0.004180908203125, "learning_rate": 0.0001095538972882835, "loss": 0.0002, "step": 9915 }, { "epoch": 0.46984126984126984, "grad_norm": 0.6640625, "learning_rate": 0.000109539072711969, "loss": 0.6317, "step": 9916 }, { "epoch": 0.46988865197820423, "grad_norm": 0.5703125, "learning_rate": 0.00010952424792408804, "loss": 0.9663, "step": 9917 }, { "epoch": 0.46993603411513857, "grad_norm": 0.1318359375, "learning_rate": 0.00010950942292496942, "loss": 0.026, "step": 9918 }, { "epoch": 0.46998341625207296, "grad_norm": 0.0208740234375, "learning_rate": 0.0001094945977149419, "loss": 0.0009, "step": 9919 }, { "epoch": 0.47003079838900735, "grad_norm": 0.6484375, "learning_rate": 0.00010947977229433433, "loss": 1.3223, "step": 9920 }, { "epoch": 0.47007818052594175, "grad_norm": 0.7265625, "learning_rate": 0.00010946494666347551, "loss": 0.8865, "step": 9921 }, { "epoch": 0.4701255626628761, "grad_norm": 0.75390625, "learning_rate": 0.00010945012082269423, "loss": 1.0039, "step": 9922 }, { "epoch": 0.4701729447998105, "grad_norm": 0.71484375, "learning_rate": 0.00010943529477231936, "loss": 1.3037, "step": 9923 }, { "epoch": 0.47022032693674487, "grad_norm": 0.1484375, "learning_rate": 0.00010942046851267968, "loss": 0.0193, "step": 9924 }, { "epoch": 0.4702677090736792, "grad_norm": 0.60546875, "learning_rate": 0.00010940564204410408, "loss": 0.7356, "step": 9925 }, { "epoch": 0.4703150912106136, "grad_norm": 0.625, "learning_rate": 0.00010939081536692135, "loss": 0.8879, "step": 9926 }, { "epoch": 0.470362473347548, "grad_norm": 0.341796875, "learning_rate": 0.00010937598848146032, "loss": 0.0262, "step": 9927 }, { "epoch": 0.47040985548448233, "grad_norm": 0.640625, "learning_rate": 0.00010936116138804985, "loss": 0.7974, "step": 9928 }, { "epoch": 0.4704572376214167, "grad_norm": 0.5234375, "learning_rate": 0.00010934633408701883, "loss": 0.715, "step": 9929 }, { "epoch": 0.4705046197583511, "grad_norm": 0.6796875, "learning_rate": 0.00010933150657869602, "loss": 1.0631, "step": 9930 }, { "epoch": 0.47055200189528545, "grad_norm": 0.1806640625, "learning_rate": 0.00010931667886341035, "loss": 0.0301, "step": 9931 }, { "epoch": 0.47059938403221985, "grad_norm": 0.69140625, "learning_rate": 0.00010930185094149068, "loss": 0.9386, "step": 9932 }, { "epoch": 0.47064676616915424, "grad_norm": 0.71875, "learning_rate": 0.00010928702281326586, "loss": 1.4621, "step": 9933 }, { "epoch": 0.4706941483060886, "grad_norm": 0.71875, "learning_rate": 0.00010927219447906478, "loss": 0.9693, "step": 9934 }, { "epoch": 0.47074153044302297, "grad_norm": 0.173828125, "learning_rate": 0.00010925736593921627, "loss": 0.1278, "step": 9935 }, { "epoch": 0.47078891257995736, "grad_norm": 0.30859375, "learning_rate": 0.00010924253719404929, "loss": 0.1093, "step": 9936 }, { "epoch": 0.47083629471689176, "grad_norm": 0.8671875, "learning_rate": 0.00010922770824389264, "loss": 0.8521, "step": 9937 }, { "epoch": 0.4708836768538261, "grad_norm": 0.1845703125, "learning_rate": 0.00010921287908907525, "loss": 0.1137, "step": 9938 }, { "epoch": 0.4709310589907605, "grad_norm": 0.1357421875, "learning_rate": 0.000109198049729926, "loss": 0.0876, "step": 9939 }, { "epoch": 0.4709784411276949, "grad_norm": 0.62109375, "learning_rate": 0.00010918322016677385, "loss": 1.2775, "step": 9940 }, { "epoch": 0.4710258232646292, "grad_norm": 0.46875, "learning_rate": 0.00010916839039994766, "loss": 0.374, "step": 9941 }, { "epoch": 0.4710732054015636, "grad_norm": 0.279296875, "learning_rate": 0.00010915356042977632, "loss": 0.096, "step": 9942 }, { "epoch": 0.471120587538498, "grad_norm": 0.494140625, "learning_rate": 0.00010913873025658874, "loss": 1.0659, "step": 9943 }, { "epoch": 0.47116796967543234, "grad_norm": 0.66796875, "learning_rate": 0.00010912389988071388, "loss": 0.206, "step": 9944 }, { "epoch": 0.47121535181236673, "grad_norm": 0.55078125, "learning_rate": 0.00010910906930248061, "loss": 0.7639, "step": 9945 }, { "epoch": 0.4712627339493011, "grad_norm": 0.75, "learning_rate": 0.00010909423852221792, "loss": 0.8366, "step": 9946 }, { "epoch": 0.47131011608623546, "grad_norm": 0.1357421875, "learning_rate": 0.00010907940754025468, "loss": 0.0766, "step": 9947 }, { "epoch": 0.47135749822316986, "grad_norm": 0.6484375, "learning_rate": 0.00010906457635691987, "loss": 0.1732, "step": 9948 }, { "epoch": 0.47140488036010425, "grad_norm": 0.21484375, "learning_rate": 0.00010904974497254241, "loss": 0.0058, "step": 9949 }, { "epoch": 0.47145226249703864, "grad_norm": 0.0019683837890625, "learning_rate": 0.00010903491338745124, "loss": 0.0001, "step": 9950 }, { "epoch": 0.471499644633973, "grad_norm": 0.4453125, "learning_rate": 0.0001090200816019753, "loss": 0.5352, "step": 9951 }, { "epoch": 0.4715470267709074, "grad_norm": 0.94921875, "learning_rate": 0.00010900524961644361, "loss": 0.7032, "step": 9952 }, { "epoch": 0.47159440890784177, "grad_norm": 0.6875, "learning_rate": 0.00010899041743118501, "loss": 0.8182, "step": 9953 }, { "epoch": 0.4716417910447761, "grad_norm": 0.8359375, "learning_rate": 0.00010897558504652856, "loss": 1.6718, "step": 9954 }, { "epoch": 0.4716891731817105, "grad_norm": 0.1298828125, "learning_rate": 0.0001089607524628032, "loss": 0.0083, "step": 9955 }, { "epoch": 0.4717365553186449, "grad_norm": 0.142578125, "learning_rate": 0.00010894591968033787, "loss": 0.019, "step": 9956 }, { "epoch": 0.4717839374555792, "grad_norm": 0.451171875, "learning_rate": 0.00010893108669946162, "loss": 0.0577, "step": 9957 }, { "epoch": 0.4718313195925136, "grad_norm": 0.68359375, "learning_rate": 0.00010891625352050332, "loss": 0.9744, "step": 9958 }, { "epoch": 0.471878701729448, "grad_norm": 0.8359375, "learning_rate": 0.00010890142014379205, "loss": 1.0241, "step": 9959 }, { "epoch": 0.47192608386638235, "grad_norm": 0.69140625, "learning_rate": 0.00010888658656965675, "loss": 0.6482, "step": 9960 }, { "epoch": 0.47197346600331674, "grad_norm": 0.3828125, "learning_rate": 0.00010887175279842643, "loss": 0.1113, "step": 9961 }, { "epoch": 0.47202084814025114, "grad_norm": 0.7421875, "learning_rate": 0.00010885691883043008, "loss": 1.0869, "step": 9962 }, { "epoch": 0.4720682302771855, "grad_norm": 0.7265625, "learning_rate": 0.0001088420846659967, "loss": 0.9253, "step": 9963 }, { "epoch": 0.47211561241411987, "grad_norm": 0.4921875, "learning_rate": 0.00010882725030545531, "loss": 0.3006, "step": 9964 }, { "epoch": 0.47216299455105426, "grad_norm": 0.56640625, "learning_rate": 0.00010881241574913492, "loss": 0.8257, "step": 9965 }, { "epoch": 0.47221037668798865, "grad_norm": 0.000728607177734375, "learning_rate": 0.00010879758099736453, "loss": 0.0001, "step": 9966 }, { "epoch": 0.472257758824923, "grad_norm": 0.4609375, "learning_rate": 0.00010878274605047317, "loss": 0.0298, "step": 9967 }, { "epoch": 0.4723051409618574, "grad_norm": 0.07177734375, "learning_rate": 0.00010876791090878981, "loss": 0.008, "step": 9968 }, { "epoch": 0.4723525230987918, "grad_norm": 0.46484375, "learning_rate": 0.00010875307557264356, "loss": 0.4073, "step": 9969 }, { "epoch": 0.4723999052357261, "grad_norm": 0.79296875, "learning_rate": 0.00010873824004236342, "loss": 1.0676, "step": 9970 }, { "epoch": 0.4724472873726605, "grad_norm": 0.70703125, "learning_rate": 0.00010872340431827841, "loss": 1.1719, "step": 9971 }, { "epoch": 0.4724946695095949, "grad_norm": 0.55859375, "learning_rate": 0.0001087085684007176, "loss": 1.0475, "step": 9972 }, { "epoch": 0.47254205164652924, "grad_norm": 0.765625, "learning_rate": 0.00010869373229001001, "loss": 1.3784, "step": 9973 }, { "epoch": 0.47258943378346363, "grad_norm": 0.462890625, "learning_rate": 0.0001086788959864847, "loss": 0.148, "step": 9974 }, { "epoch": 0.472636815920398, "grad_norm": 0.6328125, "learning_rate": 0.00010866405949047074, "loss": 1.0741, "step": 9975 }, { "epoch": 0.47268419805733236, "grad_norm": 0.90625, "learning_rate": 0.00010864922280229714, "loss": 0.9483, "step": 9976 }, { "epoch": 0.47273158019426675, "grad_norm": 0.65625, "learning_rate": 0.00010863438592229299, "loss": 1.1607, "step": 9977 }, { "epoch": 0.47277896233120115, "grad_norm": 0.8984375, "learning_rate": 0.00010861954885078738, "loss": 1.3892, "step": 9978 }, { "epoch": 0.47282634446813554, "grad_norm": 0.82421875, "learning_rate": 0.00010860471158810934, "loss": 1.0734, "step": 9979 }, { "epoch": 0.4728737266050699, "grad_norm": 0.68359375, "learning_rate": 0.00010858987413458797, "loss": 1.1007, "step": 9980 }, { "epoch": 0.47292110874200427, "grad_norm": 0.74609375, "learning_rate": 0.00010857503649055234, "loss": 1.2357, "step": 9981 }, { "epoch": 0.47296849087893866, "grad_norm": 0.19921875, "learning_rate": 0.00010856019865633156, "loss": 0.1384, "step": 9982 }, { "epoch": 0.473015873015873, "grad_norm": 0.609375, "learning_rate": 0.00010854536063225465, "loss": 0.9593, "step": 9983 }, { "epoch": 0.4730632551528074, "grad_norm": 0.68359375, "learning_rate": 0.00010853052241865076, "loss": 1.2983, "step": 9984 }, { "epoch": 0.4731106372897418, "grad_norm": 0.96484375, "learning_rate": 0.00010851568401584895, "loss": 0.8646, "step": 9985 }, { "epoch": 0.4731580194266761, "grad_norm": 0.5625, "learning_rate": 0.00010850084542417837, "loss": 1.2175, "step": 9986 }, { "epoch": 0.4732054015636105, "grad_norm": 0.71875, "learning_rate": 0.00010848600664396807, "loss": 1.2886, "step": 9987 }, { "epoch": 0.4732527837005449, "grad_norm": 0.58203125, "learning_rate": 0.00010847116767554719, "loss": 0.8181, "step": 9988 }, { "epoch": 0.47330016583747925, "grad_norm": 0.73046875, "learning_rate": 0.00010845632851924485, "loss": 1.0204, "step": 9989 }, { "epoch": 0.47334754797441364, "grad_norm": 0.01513671875, "learning_rate": 0.00010844148917539014, "loss": 0.0006, "step": 9990 }, { "epoch": 0.47339493011134803, "grad_norm": 0.193359375, "learning_rate": 0.00010842664964431219, "loss": 0.1346, "step": 9991 }, { "epoch": 0.47344231224828237, "grad_norm": 0.59765625, "learning_rate": 0.0001084118099263401, "loss": 0.9228, "step": 9992 }, { "epoch": 0.47348969438521676, "grad_norm": 0.64453125, "learning_rate": 0.00010839697002180305, "loss": 0.9449, "step": 9993 }, { "epoch": 0.47353707652215116, "grad_norm": 0.81640625, "learning_rate": 0.00010838212993103016, "loss": 0.7914, "step": 9994 }, { "epoch": 0.47358445865908555, "grad_norm": 0.1318359375, "learning_rate": 0.00010836728965435054, "loss": 0.0168, "step": 9995 }, { "epoch": 0.4736318407960199, "grad_norm": 0.58984375, "learning_rate": 0.00010835244919209337, "loss": 0.7113, "step": 9996 }, { "epoch": 0.4736792229329543, "grad_norm": 0.578125, "learning_rate": 0.00010833760854458774, "loss": 0.6393, "step": 9997 }, { "epoch": 0.4737266050698887, "grad_norm": 0.47265625, "learning_rate": 0.00010832276771216288, "loss": 0.2326, "step": 9998 }, { "epoch": 0.473773987206823, "grad_norm": 0.74609375, "learning_rate": 0.00010830792669514784, "loss": 1.2877, "step": 9999 }, { "epoch": 0.4738213693437574, "grad_norm": 0.5703125, "learning_rate": 0.00010829308549387187, "loss": 0.5168, "step": 10000 }, { "epoch": 0.4738687514806918, "grad_norm": 0.71875, "learning_rate": 0.00010827824410866409, "loss": 0.8626, "step": 10001 }, { "epoch": 0.47391613361762613, "grad_norm": 0.57421875, "learning_rate": 0.00010826340253985368, "loss": 0.9617, "step": 10002 }, { "epoch": 0.4739635157545605, "grad_norm": 0.578125, "learning_rate": 0.00010824856078776979, "loss": 0.7941, "step": 10003 }, { "epoch": 0.4740108978914949, "grad_norm": 0.734375, "learning_rate": 0.00010823371885274163, "loss": 1.194, "step": 10004 }, { "epoch": 0.47405828002842926, "grad_norm": 0.007568359375, "learning_rate": 0.00010821887673509834, "loss": 0.0004, "step": 10005 }, { "epoch": 0.47410566216536365, "grad_norm": 0.7265625, "learning_rate": 0.00010820403443516911, "loss": 1.1485, "step": 10006 }, { "epoch": 0.47415304430229804, "grad_norm": 0.18359375, "learning_rate": 0.00010818919195328315, "loss": 0.0852, "step": 10007 }, { "epoch": 0.47420042643923244, "grad_norm": 0.69140625, "learning_rate": 0.00010817434928976962, "loss": 0.7768, "step": 10008 }, { "epoch": 0.4742478085761668, "grad_norm": 0.5234375, "learning_rate": 0.00010815950644495774, "loss": 0.8561, "step": 10009 }, { "epoch": 0.47429519071310117, "grad_norm": 0.189453125, "learning_rate": 0.00010814466341917669, "loss": 0.0363, "step": 10010 }, { "epoch": 0.47434257285003556, "grad_norm": 0.76953125, "learning_rate": 0.00010812982021275569, "loss": 0.5881, "step": 10011 }, { "epoch": 0.4743899549869699, "grad_norm": 1.34375, "learning_rate": 0.00010811497682602393, "loss": 0.626, "step": 10012 }, { "epoch": 0.4744373371239043, "grad_norm": 0.57421875, "learning_rate": 0.00010810013325931065, "loss": 0.7399, "step": 10013 }, { "epoch": 0.4744847192608387, "grad_norm": 0.59765625, "learning_rate": 0.00010808528951294504, "loss": 0.7295, "step": 10014 }, { "epoch": 0.474532101397773, "grad_norm": 0.515625, "learning_rate": 0.00010807044558725627, "loss": 0.5209, "step": 10015 }, { "epoch": 0.4745794835347074, "grad_norm": 0.60546875, "learning_rate": 0.00010805560148257365, "loss": 0.6022, "step": 10016 }, { "epoch": 0.4746268656716418, "grad_norm": 0.71875, "learning_rate": 0.00010804075719922638, "loss": 1.0504, "step": 10017 }, { "epoch": 0.47467424780857614, "grad_norm": 0.54296875, "learning_rate": 0.00010802591273754365, "loss": 0.1208, "step": 10018 }, { "epoch": 0.47472162994551054, "grad_norm": 0.59375, "learning_rate": 0.00010801106809785475, "loss": 0.6259, "step": 10019 }, { "epoch": 0.47476901208244493, "grad_norm": 0.6015625, "learning_rate": 0.00010799622328048888, "loss": 1.0677, "step": 10020 }, { "epoch": 0.47481639421937927, "grad_norm": 0.95703125, "learning_rate": 0.00010798137828577533, "loss": 1.0154, "step": 10021 }, { "epoch": 0.47486377635631366, "grad_norm": 1.1015625, "learning_rate": 0.00010796653311404326, "loss": 0.2195, "step": 10022 }, { "epoch": 0.47491115849324805, "grad_norm": 0.68359375, "learning_rate": 0.00010795168776562198, "loss": 0.2982, "step": 10023 }, { "epoch": 0.47495854063018245, "grad_norm": 0.69140625, "learning_rate": 0.00010793684224084077, "loss": 1.2982, "step": 10024 }, { "epoch": 0.4750059227671168, "grad_norm": 0.796875, "learning_rate": 0.00010792199654002881, "loss": 1.3599, "step": 10025 }, { "epoch": 0.4750533049040512, "grad_norm": 0.59375, "learning_rate": 0.00010790715066351542, "loss": 0.8199, "step": 10026 }, { "epoch": 0.47510068704098557, "grad_norm": 0.703125, "learning_rate": 0.00010789230461162984, "loss": 0.9086, "step": 10027 }, { "epoch": 0.4751480691779199, "grad_norm": 0.78515625, "learning_rate": 0.0001078774583847014, "loss": 1.1019, "step": 10028 }, { "epoch": 0.4751954513148543, "grad_norm": 0.62890625, "learning_rate": 0.00010786261198305929, "loss": 0.8015, "step": 10029 }, { "epoch": 0.4752428334517887, "grad_norm": 0.65625, "learning_rate": 0.00010784776540703281, "loss": 1.0677, "step": 10030 }, { "epoch": 0.47529021558872303, "grad_norm": 0.6796875, "learning_rate": 0.00010783291865695127, "loss": 1.1803, "step": 10031 }, { "epoch": 0.4753375977256574, "grad_norm": 0.68359375, "learning_rate": 0.00010781807173314394, "loss": 0.953, "step": 10032 }, { "epoch": 0.4753849798625918, "grad_norm": 0.1787109375, "learning_rate": 0.00010780322463594008, "loss": 0.0117, "step": 10033 }, { "epoch": 0.47543236199952615, "grad_norm": 0.67578125, "learning_rate": 0.00010778837736566902, "loss": 1.2231, "step": 10034 }, { "epoch": 0.47547974413646055, "grad_norm": 0.154296875, "learning_rate": 0.00010777352992266006, "loss": 0.1227, "step": 10035 }, { "epoch": 0.47552712627339494, "grad_norm": 0.640625, "learning_rate": 0.00010775868230724249, "loss": 0.9482, "step": 10036 }, { "epoch": 0.47557450841032933, "grad_norm": 0.67578125, "learning_rate": 0.00010774383451974559, "loss": 1.0058, "step": 10037 }, { "epoch": 0.47562189054726367, "grad_norm": 0.2197265625, "learning_rate": 0.0001077289865604987, "loss": 0.145, "step": 10038 }, { "epoch": 0.47566927268419806, "grad_norm": 0.95703125, "learning_rate": 0.00010771413842983111, "loss": 1.3951, "step": 10039 }, { "epoch": 0.47571665482113246, "grad_norm": 0.765625, "learning_rate": 0.00010769929012807216, "loss": 0.9192, "step": 10040 }, { "epoch": 0.4757640369580668, "grad_norm": 0.703125, "learning_rate": 0.00010768444165555114, "loss": 0.8916, "step": 10041 }, { "epoch": 0.4758114190950012, "grad_norm": 0.90625, "learning_rate": 0.0001076695930125974, "loss": 1.0961, "step": 10042 }, { "epoch": 0.4758588012319356, "grad_norm": 0.671875, "learning_rate": 0.00010765474419954027, "loss": 0.5015, "step": 10043 }, { "epoch": 0.4759061833688699, "grad_norm": 0.91796875, "learning_rate": 0.00010763989521670905, "loss": 1.1383, "step": 10044 }, { "epoch": 0.4759535655058043, "grad_norm": 0.71875, "learning_rate": 0.00010762504606443308, "loss": 1.0595, "step": 10045 }, { "epoch": 0.4760009476427387, "grad_norm": 0.76953125, "learning_rate": 0.00010761019674304173, "loss": 1.2456, "step": 10046 }, { "epoch": 0.47604832977967304, "grad_norm": 1.0703125, "learning_rate": 0.00010759534725286434, "loss": 0.9614, "step": 10047 }, { "epoch": 0.47609571191660743, "grad_norm": 0.21875, "learning_rate": 0.0001075804975942302, "loss": 0.1372, "step": 10048 }, { "epoch": 0.4761430940535418, "grad_norm": 0.76953125, "learning_rate": 0.0001075656477674687, "loss": 1.1255, "step": 10049 }, { "epoch": 0.47619047619047616, "grad_norm": 0.609375, "learning_rate": 0.0001075507977729092, "loss": 0.3984, "step": 10050 }, { "epoch": 0.47623785832741056, "grad_norm": 0.66796875, "learning_rate": 0.00010753594761088109, "loss": 0.7424, "step": 10051 }, { "epoch": 0.47628524046434495, "grad_norm": 0.7890625, "learning_rate": 0.00010752109728171363, "loss": 0.8824, "step": 10052 }, { "epoch": 0.47633262260127934, "grad_norm": 0.87890625, "learning_rate": 0.00010750624678573627, "loss": 0.1074, "step": 10053 }, { "epoch": 0.4763800047382137, "grad_norm": 0.75390625, "learning_rate": 0.00010749139612327832, "loss": 0.9001, "step": 10054 }, { "epoch": 0.4764273868751481, "grad_norm": 0.8359375, "learning_rate": 0.00010747654529466921, "loss": 0.4796, "step": 10055 }, { "epoch": 0.47647476901208247, "grad_norm": 0.9296875, "learning_rate": 0.0001074616943002383, "loss": 0.7125, "step": 10056 }, { "epoch": 0.4765221511490168, "grad_norm": 0.73046875, "learning_rate": 0.00010744684314031492, "loss": 1.3499, "step": 10057 }, { "epoch": 0.4765695332859512, "grad_norm": 0.65625, "learning_rate": 0.00010743199181522853, "loss": 0.8206, "step": 10058 }, { "epoch": 0.4766169154228856, "grad_norm": 0.228515625, "learning_rate": 0.0001074171403253085, "loss": 0.0325, "step": 10059 }, { "epoch": 0.4766642975598199, "grad_norm": 0.81640625, "learning_rate": 0.00010740228867088415, "loss": 0.8872, "step": 10060 }, { "epoch": 0.4767116796967543, "grad_norm": 0.5234375, "learning_rate": 0.00010738743685228492, "loss": 0.4424, "step": 10061 }, { "epoch": 0.4767590618336887, "grad_norm": 0.1943359375, "learning_rate": 0.00010737258486984024, "loss": 0.1372, "step": 10062 }, { "epoch": 0.47680644397062305, "grad_norm": 0.396484375, "learning_rate": 0.00010735773272387945, "loss": 0.0906, "step": 10063 }, { "epoch": 0.47685382610755744, "grad_norm": 0.63671875, "learning_rate": 0.000107342880414732, "loss": 0.7328, "step": 10064 }, { "epoch": 0.47690120824449184, "grad_norm": 0.26953125, "learning_rate": 0.00010732802794272727, "loss": 0.0246, "step": 10065 }, { "epoch": 0.47694859038142623, "grad_norm": 0.2158203125, "learning_rate": 0.00010731317530819471, "loss": 0.0452, "step": 10066 }, { "epoch": 0.47699597251836057, "grad_norm": 0.19921875, "learning_rate": 0.00010729832251146372, "loss": 0.0289, "step": 10067 }, { "epoch": 0.47704335465529496, "grad_norm": 0.6171875, "learning_rate": 0.00010728346955286368, "loss": 0.9414, "step": 10068 }, { "epoch": 0.47709073679222935, "grad_norm": 0.05322265625, "learning_rate": 0.00010726861643272406, "loss": 0.0023, "step": 10069 }, { "epoch": 0.4771381189291637, "grad_norm": 0.62109375, "learning_rate": 0.00010725376315137427, "loss": 1.0377, "step": 10070 }, { "epoch": 0.4771855010660981, "grad_norm": 0.5078125, "learning_rate": 0.00010723890970914371, "loss": 0.5354, "step": 10071 }, { "epoch": 0.4772328832030325, "grad_norm": 0.54296875, "learning_rate": 0.00010722405610636188, "loss": 0.5258, "step": 10072 }, { "epoch": 0.4772802653399668, "grad_norm": 0.578125, "learning_rate": 0.00010720920234335816, "loss": 0.9203, "step": 10073 }, { "epoch": 0.4773276474769012, "grad_norm": 0.72265625, "learning_rate": 0.00010719434842046203, "loss": 0.1152, "step": 10074 }, { "epoch": 0.4773750296138356, "grad_norm": 0.97265625, "learning_rate": 0.00010717949433800292, "loss": 1.1106, "step": 10075 }, { "epoch": 0.47742241175076994, "grad_norm": 0.734375, "learning_rate": 0.00010716464009631024, "loss": 1.0645, "step": 10076 }, { "epoch": 0.47746979388770433, "grad_norm": 0.5625, "learning_rate": 0.00010714978569571347, "loss": 0.8776, "step": 10077 }, { "epoch": 0.4775171760246387, "grad_norm": 0.61328125, "learning_rate": 0.00010713493113654212, "loss": 0.5713, "step": 10078 }, { "epoch": 0.47756455816157306, "grad_norm": 0.54296875, "learning_rate": 0.00010712007641912556, "loss": 1.1019, "step": 10079 }, { "epoch": 0.47761194029850745, "grad_norm": 0.66015625, "learning_rate": 0.00010710522154379328, "loss": 1.1372, "step": 10080 }, { "epoch": 0.47765932243544185, "grad_norm": 0.46875, "learning_rate": 0.00010709036651087478, "loss": 0.7831, "step": 10081 }, { "epoch": 0.47770670457237624, "grad_norm": 0.7578125, "learning_rate": 0.00010707551132069949, "loss": 0.8754, "step": 10082 }, { "epoch": 0.4777540867093106, "grad_norm": 0.53515625, "learning_rate": 0.00010706065597359692, "loss": 1.0916, "step": 10083 }, { "epoch": 0.47780146884624497, "grad_norm": 0.73046875, "learning_rate": 0.00010704580046989648, "loss": 1.004, "step": 10084 }, { "epoch": 0.47784885098317936, "grad_norm": 0.455078125, "learning_rate": 0.0001070309448099277, "loss": 0.3348, "step": 10085 }, { "epoch": 0.4778962331201137, "grad_norm": 0.484375, "learning_rate": 0.00010701608899402009, "loss": 0.8956, "step": 10086 }, { "epoch": 0.4779436152570481, "grad_norm": 0.427734375, "learning_rate": 0.00010700123302250307, "loss": 0.1105, "step": 10087 }, { "epoch": 0.4779909973939825, "grad_norm": 0.56640625, "learning_rate": 0.00010698637689570614, "loss": 0.8867, "step": 10088 }, { "epoch": 0.4780383795309168, "grad_norm": 0.40625, "learning_rate": 0.00010697152061395885, "loss": 0.0139, "step": 10089 }, { "epoch": 0.4780857616678512, "grad_norm": 0.61328125, "learning_rate": 0.00010695666417759062, "loss": 1.2305, "step": 10090 }, { "epoch": 0.4781331438047856, "grad_norm": 0.181640625, "learning_rate": 0.00010694180758693104, "loss": 0.1164, "step": 10091 }, { "epoch": 0.47818052594171995, "grad_norm": 0.54296875, "learning_rate": 0.00010692695084230952, "loss": 0.5308, "step": 10092 }, { "epoch": 0.47822790807865434, "grad_norm": 0.71875, "learning_rate": 0.00010691209394405564, "loss": 1.223, "step": 10093 }, { "epoch": 0.47827529021558873, "grad_norm": 0.66015625, "learning_rate": 0.00010689723689249884, "loss": 1.0544, "step": 10094 }, { "epoch": 0.4783226723525231, "grad_norm": 0.64453125, "learning_rate": 0.00010688237968796869, "loss": 1.0804, "step": 10095 }, { "epoch": 0.47837005448945746, "grad_norm": 0.6484375, "learning_rate": 0.00010686752233079467, "loss": 1.0493, "step": 10096 }, { "epoch": 0.47841743662639186, "grad_norm": 0.6796875, "learning_rate": 0.00010685266482130636, "loss": 0.8879, "step": 10097 }, { "epoch": 0.47846481876332625, "grad_norm": 0.2138671875, "learning_rate": 0.00010683780715983323, "loss": 0.1609, "step": 10098 }, { "epoch": 0.4785122009002606, "grad_norm": 0.609375, "learning_rate": 0.00010682294934670482, "loss": 0.3973, "step": 10099 }, { "epoch": 0.478559583037195, "grad_norm": 0.7109375, "learning_rate": 0.00010680809138225065, "loss": 1.0785, "step": 10100 }, { "epoch": 0.4786069651741294, "grad_norm": 0.7265625, "learning_rate": 0.00010679323326680029, "loss": 0.8985, "step": 10101 }, { "epoch": 0.4786543473110637, "grad_norm": 0.609375, "learning_rate": 0.00010677837500068322, "loss": 0.8989, "step": 10102 }, { "epoch": 0.4787017294479981, "grad_norm": 0.3515625, "learning_rate": 0.000106763516584229, "loss": 0.0173, "step": 10103 }, { "epoch": 0.4787491115849325, "grad_norm": 0.11328125, "learning_rate": 0.00010674865801776723, "loss": 0.0018, "step": 10104 }, { "epoch": 0.47879649372186683, "grad_norm": 0.55859375, "learning_rate": 0.00010673379930162739, "loss": 1.0238, "step": 10105 }, { "epoch": 0.4788438758588012, "grad_norm": 0.73828125, "learning_rate": 0.00010671894043613908, "loss": 0.7204, "step": 10106 }, { "epoch": 0.4788912579957356, "grad_norm": 0.5078125, "learning_rate": 0.0001067040814216318, "loss": 0.3304, "step": 10107 }, { "epoch": 0.47893864013266996, "grad_norm": 0.2177734375, "learning_rate": 0.00010668922225843512, "loss": 0.129, "step": 10108 }, { "epoch": 0.47898602226960435, "grad_norm": 0.6484375, "learning_rate": 0.00010667436294687865, "loss": 1.1233, "step": 10109 }, { "epoch": 0.47903340440653874, "grad_norm": 0.49609375, "learning_rate": 0.0001066595034872919, "loss": 1.0682, "step": 10110 }, { "epoch": 0.47908078654347314, "grad_norm": 0.80078125, "learning_rate": 0.00010664464388000445, "loss": 1.2712, "step": 10111 }, { "epoch": 0.4791281686804075, "grad_norm": 0.609375, "learning_rate": 0.0001066297841253459, "loss": 0.521, "step": 10112 }, { "epoch": 0.47917555081734187, "grad_norm": 0.72265625, "learning_rate": 0.00010661492422364578, "loss": 0.5304, "step": 10113 }, { "epoch": 0.47922293295427626, "grad_norm": 0.51953125, "learning_rate": 0.00010660006417523372, "loss": 0.8213, "step": 10114 }, { "epoch": 0.4792703150912106, "grad_norm": 0.61328125, "learning_rate": 0.00010658520398043923, "loss": 0.638, "step": 10115 }, { "epoch": 0.479317697228145, "grad_norm": 0.64453125, "learning_rate": 0.00010657034363959195, "loss": 0.9732, "step": 10116 }, { "epoch": 0.4793650793650794, "grad_norm": 0.625, "learning_rate": 0.00010655548315302144, "loss": 0.8203, "step": 10117 }, { "epoch": 0.4794124615020137, "grad_norm": 0.1103515625, "learning_rate": 0.00010654062252105728, "loss": 0.0159, "step": 10118 }, { "epoch": 0.4794598436389481, "grad_norm": 0.69140625, "learning_rate": 0.00010652576174402909, "loss": 1.1486, "step": 10119 }, { "epoch": 0.4795072257758825, "grad_norm": 0.26171875, "learning_rate": 0.00010651090082226647, "loss": 0.0382, "step": 10120 }, { "epoch": 0.47955460791281684, "grad_norm": 0.62890625, "learning_rate": 0.00010649603975609898, "loss": 1.292, "step": 10121 }, { "epoch": 0.47960199004975124, "grad_norm": 0.48046875, "learning_rate": 0.00010648117854585627, "loss": 0.0801, "step": 10122 }, { "epoch": 0.47964937218668563, "grad_norm": 0.75390625, "learning_rate": 0.0001064663171918679, "loss": 0.7903, "step": 10123 }, { "epoch": 0.47969675432362, "grad_norm": 0.52734375, "learning_rate": 0.00010645145569446353, "loss": 0.3721, "step": 10124 }, { "epoch": 0.47974413646055436, "grad_norm": 0.7421875, "learning_rate": 0.00010643659405397273, "loss": 0.8507, "step": 10125 }, { "epoch": 0.47979151859748875, "grad_norm": 0.58984375, "learning_rate": 0.00010642173227072511, "loss": 0.1726, "step": 10126 }, { "epoch": 0.47983890073442315, "grad_norm": 0.68359375, "learning_rate": 0.00010640687034505034, "loss": 0.3973, "step": 10127 }, { "epoch": 0.4798862828713575, "grad_norm": 0.7578125, "learning_rate": 0.000106392008277278, "loss": 1.1665, "step": 10128 }, { "epoch": 0.4799336650082919, "grad_norm": 0.08740234375, "learning_rate": 0.00010637714606773773, "loss": 0.0156, "step": 10129 }, { "epoch": 0.47998104714522627, "grad_norm": 0.07421875, "learning_rate": 0.00010636228371675916, "loss": 0.0057, "step": 10130 }, { "epoch": 0.4800284292821606, "grad_norm": 0.765625, "learning_rate": 0.0001063474212246719, "loss": 0.8653, "step": 10131 }, { "epoch": 0.480075811419095, "grad_norm": 0.240234375, "learning_rate": 0.0001063325585918056, "loss": 0.1549, "step": 10132 }, { "epoch": 0.4801231935560294, "grad_norm": 0.78125, "learning_rate": 0.0001063176958184899, "loss": 1.2178, "step": 10133 }, { "epoch": 0.48017057569296373, "grad_norm": 0.640625, "learning_rate": 0.0001063028329050544, "loss": 0.6577, "step": 10134 }, { "epoch": 0.4802179578298981, "grad_norm": 0.62109375, "learning_rate": 0.00010628796985182883, "loss": 0.7462, "step": 10135 }, { "epoch": 0.4802653399668325, "grad_norm": 0.8046875, "learning_rate": 0.00010627310665914276, "loss": 0.9332, "step": 10136 }, { "epoch": 0.48031272210376685, "grad_norm": 0.51171875, "learning_rate": 0.00010625824332732586, "loss": 0.5907, "step": 10137 }, { "epoch": 0.48036010424070125, "grad_norm": 0.89453125, "learning_rate": 0.00010624337985670782, "loss": 0.2605, "step": 10138 }, { "epoch": 0.48040748637763564, "grad_norm": 0.51953125, "learning_rate": 0.00010622851624761828, "loss": 1.0526, "step": 10139 }, { "epoch": 0.48045486851457003, "grad_norm": 0.75, "learning_rate": 0.00010621365250038682, "loss": 1.29, "step": 10140 }, { "epoch": 0.48050225065150437, "grad_norm": 0.703125, "learning_rate": 0.00010619878861534318, "loss": 1.2478, "step": 10141 }, { "epoch": 0.48054963278843876, "grad_norm": 0.08203125, "learning_rate": 0.00010618392459281703, "loss": 0.0014, "step": 10142 }, { "epoch": 0.48059701492537316, "grad_norm": 0.71875, "learning_rate": 0.00010616906043313803, "loss": 0.7464, "step": 10143 }, { "epoch": 0.4806443970623075, "grad_norm": 0.81640625, "learning_rate": 0.00010615419613663582, "loss": 0.859, "step": 10144 }, { "epoch": 0.4806917791992419, "grad_norm": 0.6171875, "learning_rate": 0.0001061393317036401, "loss": 1.2051, "step": 10145 }, { "epoch": 0.4807391613361763, "grad_norm": 0.74609375, "learning_rate": 0.00010612446713448054, "loss": 0.9745, "step": 10146 }, { "epoch": 0.4807865434731106, "grad_norm": 0.59765625, "learning_rate": 0.00010610960242948687, "loss": 1.1956, "step": 10147 }, { "epoch": 0.480833925610045, "grad_norm": 0.65625, "learning_rate": 0.00010609473758898867, "loss": 1.363, "step": 10148 }, { "epoch": 0.4808813077469794, "grad_norm": 0.6796875, "learning_rate": 0.00010607987261331567, "loss": 1.1725, "step": 10149 }, { "epoch": 0.48092868988391374, "grad_norm": 0.7265625, "learning_rate": 0.00010606500750279761, "loss": 1.0699, "step": 10150 }, { "epoch": 0.48097607202084813, "grad_norm": 0.578125, "learning_rate": 0.00010605014225776412, "loss": 0.0789, "step": 10151 }, { "epoch": 0.4810234541577825, "grad_norm": 0.76171875, "learning_rate": 0.00010603527687854494, "loss": 1.2713, "step": 10152 }, { "epoch": 0.4810708362947169, "grad_norm": 0.71875, "learning_rate": 0.00010602041136546971, "loss": 1.409, "step": 10153 }, { "epoch": 0.48111821843165126, "grad_norm": 0.78125, "learning_rate": 0.00010600554571886823, "loss": 1.0618, "step": 10154 }, { "epoch": 0.48116560056858565, "grad_norm": 0.72265625, "learning_rate": 0.0001059906799390701, "loss": 1.3923, "step": 10155 }, { "epoch": 0.48121298270552004, "grad_norm": 0.2041015625, "learning_rate": 0.00010597581402640508, "loss": 0.1376, "step": 10156 }, { "epoch": 0.4812603648424544, "grad_norm": 0.62109375, "learning_rate": 0.00010596094798120286, "loss": 0.9505, "step": 10157 }, { "epoch": 0.4813077469793888, "grad_norm": 0.41015625, "learning_rate": 0.00010594608180379317, "loss": 0.0368, "step": 10158 }, { "epoch": 0.48135512911632317, "grad_norm": 0.59765625, "learning_rate": 0.00010593121549450573, "loss": 0.9188, "step": 10159 }, { "epoch": 0.4814025112532575, "grad_norm": 0.859375, "learning_rate": 0.00010591634905367023, "loss": 1.0409, "step": 10160 }, { "epoch": 0.4814498933901919, "grad_norm": 0.61328125, "learning_rate": 0.00010590148248161641, "loss": 1.4114, "step": 10161 }, { "epoch": 0.4814972755271263, "grad_norm": 0.50390625, "learning_rate": 0.00010588661577867403, "loss": 0.8748, "step": 10162 }, { "epoch": 0.4815446576640606, "grad_norm": 0.2041015625, "learning_rate": 0.00010587174894517276, "loss": 0.0263, "step": 10163 }, { "epoch": 0.481592039800995, "grad_norm": 0.7265625, "learning_rate": 0.00010585688198144232, "loss": 1.2197, "step": 10164 }, { "epoch": 0.4816394219379294, "grad_norm": 0.6328125, "learning_rate": 0.00010584201488781251, "loss": 1.2011, "step": 10165 }, { "epoch": 0.48168680407486375, "grad_norm": 0.5078125, "learning_rate": 0.00010582714766461304, "loss": 0.7421, "step": 10166 }, { "epoch": 0.48173418621179814, "grad_norm": 0.08203125, "learning_rate": 0.0001058122803121736, "loss": 0.0014, "step": 10167 }, { "epoch": 0.48178156834873254, "grad_norm": 0.625, "learning_rate": 0.000105797412830824, "loss": 0.9324, "step": 10168 }, { "epoch": 0.48182895048566693, "grad_norm": 0.049072265625, "learning_rate": 0.00010578254522089397, "loss": 0.0013, "step": 10169 }, { "epoch": 0.48187633262260127, "grad_norm": 0.609375, "learning_rate": 0.00010576767748271326, "loss": 1.0638, "step": 10170 }, { "epoch": 0.48192371475953566, "grad_norm": 1.0234375, "learning_rate": 0.00010575280961661159, "loss": 1.0468, "step": 10171 }, { "epoch": 0.48197109689647005, "grad_norm": 0.07275390625, "learning_rate": 0.0001057379416229187, "loss": 0.0099, "step": 10172 }, { "epoch": 0.4820184790334044, "grad_norm": 0.78125, "learning_rate": 0.00010572307350196439, "loss": 1.5398, "step": 10173 }, { "epoch": 0.4820658611703388, "grad_norm": 0.1259765625, "learning_rate": 0.00010570820525407843, "loss": 0.0073, "step": 10174 }, { "epoch": 0.4821132433072732, "grad_norm": 0.61328125, "learning_rate": 0.00010569333687959053, "loss": 1.2509, "step": 10175 }, { "epoch": 0.4821606254442075, "grad_norm": 0.71484375, "learning_rate": 0.00010567846837883049, "loss": 1.298, "step": 10176 }, { "epoch": 0.4822080075811419, "grad_norm": 0.60546875, "learning_rate": 0.0001056635997521281, "loss": 0.7765, "step": 10177 }, { "epoch": 0.4822553897180763, "grad_norm": 0.6640625, "learning_rate": 0.00010564873099981308, "loss": 1.2411, "step": 10178 }, { "epoch": 0.48230277185501064, "grad_norm": 0.8125, "learning_rate": 0.0001056338621222152, "loss": 0.0974, "step": 10179 }, { "epoch": 0.48235015399194503, "grad_norm": 0.69921875, "learning_rate": 0.00010561899311966428, "loss": 0.0739, "step": 10180 }, { "epoch": 0.4823975361288794, "grad_norm": 0.042724609375, "learning_rate": 0.00010560412399249009, "loss": 0.0032, "step": 10181 }, { "epoch": 0.4824449182658138, "grad_norm": 0.65234375, "learning_rate": 0.00010558925474102237, "loss": 0.6145, "step": 10182 }, { "epoch": 0.48249230040274815, "grad_norm": 0.019775390625, "learning_rate": 0.00010557438536559095, "loss": 0.0009, "step": 10183 }, { "epoch": 0.48253968253968255, "grad_norm": 0.6015625, "learning_rate": 0.00010555951586652557, "loss": 1.0975, "step": 10184 }, { "epoch": 0.48258706467661694, "grad_norm": 0.09423828125, "learning_rate": 0.00010554464624415612, "loss": 0.0134, "step": 10185 }, { "epoch": 0.4826344468135513, "grad_norm": 0.65625, "learning_rate": 0.00010552977649881226, "loss": 0.8918, "step": 10186 }, { "epoch": 0.48268182895048567, "grad_norm": 0.5546875, "learning_rate": 0.00010551490663082386, "loss": 1.1871, "step": 10187 }, { "epoch": 0.48272921108742006, "grad_norm": 0.71875, "learning_rate": 0.0001055000366405207, "loss": 0.7529, "step": 10188 }, { "epoch": 0.4827765932243544, "grad_norm": 0.1826171875, "learning_rate": 0.00010548516652823262, "loss": 0.1328, "step": 10189 }, { "epoch": 0.4828239753612888, "grad_norm": 0.75390625, "learning_rate": 0.00010547029629428937, "loss": 1.2239, "step": 10190 }, { "epoch": 0.4828713574982232, "grad_norm": 0.54296875, "learning_rate": 0.00010545542593902076, "loss": 0.712, "step": 10191 }, { "epoch": 0.4829187396351575, "grad_norm": 0.65234375, "learning_rate": 0.00010544055546275664, "loss": 0.5598, "step": 10192 }, { "epoch": 0.4829661217720919, "grad_norm": 0.734375, "learning_rate": 0.00010542568486582678, "loss": 0.9865, "step": 10193 }, { "epoch": 0.4830135039090263, "grad_norm": 0.478515625, "learning_rate": 0.000105410814148561, "loss": 0.9094, "step": 10194 }, { "epoch": 0.48306088604596065, "grad_norm": 0.703125, "learning_rate": 0.00010539594331128912, "loss": 1.296, "step": 10195 }, { "epoch": 0.48310826818289504, "grad_norm": 0.119140625, "learning_rate": 0.00010538107235434101, "loss": 0.005, "step": 10196 }, { "epoch": 0.48315565031982943, "grad_norm": 0.10986328125, "learning_rate": 0.00010536620127804639, "loss": 0.0074, "step": 10197 }, { "epoch": 0.4832030324567638, "grad_norm": 0.5625, "learning_rate": 0.00010535133008273517, "loss": 0.886, "step": 10198 }, { "epoch": 0.48325041459369816, "grad_norm": 0.78125, "learning_rate": 0.00010533645876873715, "loss": 1.3604, "step": 10199 }, { "epoch": 0.48329779673063256, "grad_norm": 0.5390625, "learning_rate": 0.00010532158733638216, "loss": 0.2285, "step": 10200 }, { "epoch": 0.48334517886756695, "grad_norm": 0.22265625, "learning_rate": 0.00010530671578600005, "loss": 0.1313, "step": 10201 }, { "epoch": 0.4833925610045013, "grad_norm": 0.64453125, "learning_rate": 0.00010529184411792059, "loss": 1.0717, "step": 10202 }, { "epoch": 0.4834399431414357, "grad_norm": 0.44921875, "learning_rate": 0.00010527697233247369, "loss": 0.6953, "step": 10203 }, { "epoch": 0.4834873252783701, "grad_norm": 0.57421875, "learning_rate": 0.00010526210042998916, "loss": 0.8257, "step": 10204 }, { "epoch": 0.4835347074153044, "grad_norm": 0.625, "learning_rate": 0.00010524722841079684, "loss": 0.245, "step": 10205 }, { "epoch": 0.4835820895522388, "grad_norm": 0.65625, "learning_rate": 0.00010523235627522659, "loss": 0.9412, "step": 10206 }, { "epoch": 0.4836294716891732, "grad_norm": 0.59765625, "learning_rate": 0.00010521748402360825, "loss": 0.9764, "step": 10207 }, { "epoch": 0.48367685382610753, "grad_norm": 0.43359375, "learning_rate": 0.00010520261165627168, "loss": 0.1322, "step": 10208 }, { "epoch": 0.4837242359630419, "grad_norm": 0.1689453125, "learning_rate": 0.00010518773917354673, "loss": 0.034, "step": 10209 }, { "epoch": 0.4837716180999763, "grad_norm": 0.6171875, "learning_rate": 0.00010517286657576324, "loss": 0.8706, "step": 10210 }, { "epoch": 0.4838190002369107, "grad_norm": 0.57421875, "learning_rate": 0.00010515799386325107, "loss": 0.4869, "step": 10211 }, { "epoch": 0.48386638237384505, "grad_norm": 0.609375, "learning_rate": 0.00010514312103634012, "loss": 0.9291, "step": 10212 }, { "epoch": 0.48391376451077944, "grad_norm": 0.7421875, "learning_rate": 0.00010512824809536019, "loss": 1.3326, "step": 10213 }, { "epoch": 0.48396114664771384, "grad_norm": 0.63671875, "learning_rate": 0.00010511337504064118, "loss": 0.8072, "step": 10214 }, { "epoch": 0.4840085287846482, "grad_norm": 1.2578125, "learning_rate": 0.00010509850187251298, "loss": 0.2092, "step": 10215 }, { "epoch": 0.48405591092158257, "grad_norm": 0.78125, "learning_rate": 0.00010508362859130546, "loss": 0.8456, "step": 10216 }, { "epoch": 0.48410329305851696, "grad_norm": 0.6015625, "learning_rate": 0.00010506875519734843, "loss": 0.2392, "step": 10217 }, { "epoch": 0.4841506751954513, "grad_norm": 0.1953125, "learning_rate": 0.00010505388169097182, "loss": 0.1449, "step": 10218 }, { "epoch": 0.4841980573323857, "grad_norm": 0.341796875, "learning_rate": 0.00010503900807250548, "loss": 0.128, "step": 10219 }, { "epoch": 0.4842454394693201, "grad_norm": 0.8515625, "learning_rate": 0.00010502413434227933, "loss": 1.0062, "step": 10220 }, { "epoch": 0.4842928216062544, "grad_norm": 0.58984375, "learning_rate": 0.00010500926050062323, "loss": 0.8574, "step": 10221 }, { "epoch": 0.4843402037431888, "grad_norm": 0.58984375, "learning_rate": 0.00010499438654786706, "loss": 0.9706, "step": 10222 }, { "epoch": 0.4843875858801232, "grad_norm": 0.478515625, "learning_rate": 0.00010497951248434073, "loss": 0.1635, "step": 10223 }, { "epoch": 0.48443496801705754, "grad_norm": 0.1806640625, "learning_rate": 0.00010496463831037413, "loss": 0.1218, "step": 10224 }, { "epoch": 0.48448235015399194, "grad_norm": 0.330078125, "learning_rate": 0.0001049497640262971, "loss": 0.0917, "step": 10225 }, { "epoch": 0.48452973229092633, "grad_norm": 0.9296875, "learning_rate": 0.00010493488963243958, "loss": 0.0567, "step": 10226 }, { "epoch": 0.4845771144278607, "grad_norm": 0.640625, "learning_rate": 0.0001049200151291315, "loss": 1.2936, "step": 10227 }, { "epoch": 0.48462449656479506, "grad_norm": 0.1884765625, "learning_rate": 0.00010490514051670271, "loss": 0.1276, "step": 10228 }, { "epoch": 0.48467187870172945, "grad_norm": 0.765625, "learning_rate": 0.00010489026579548311, "loss": 1.432, "step": 10229 }, { "epoch": 0.48471926083866385, "grad_norm": 0.212890625, "learning_rate": 0.00010487539096580263, "loss": 0.1523, "step": 10230 }, { "epoch": 0.4847666429755982, "grad_norm": 0.546875, "learning_rate": 0.00010486051602799118, "loss": 1.1239, "step": 10231 }, { "epoch": 0.4848140251125326, "grad_norm": 0.34765625, "learning_rate": 0.00010484564098237868, "loss": 0.0408, "step": 10232 }, { "epoch": 0.48486140724946697, "grad_norm": 0.17578125, "learning_rate": 0.00010483076582929501, "loss": 0.1257, "step": 10233 }, { "epoch": 0.4849087893864013, "grad_norm": 0.54296875, "learning_rate": 0.00010481589056907006, "loss": 0.8312, "step": 10234 }, { "epoch": 0.4849561715233357, "grad_norm": 0.02490234375, "learning_rate": 0.00010480101520203385, "loss": 0.0009, "step": 10235 }, { "epoch": 0.4850035536602701, "grad_norm": 0.93359375, "learning_rate": 0.00010478613972851619, "loss": 0.7291, "step": 10236 }, { "epoch": 0.48505093579720443, "grad_norm": 0.8125, "learning_rate": 0.00010477126414884706, "loss": 1.1389, "step": 10237 }, { "epoch": 0.4850983179341388, "grad_norm": 0.54296875, "learning_rate": 0.0001047563884633564, "loss": 0.4756, "step": 10238 }, { "epoch": 0.4851457000710732, "grad_norm": 0.65234375, "learning_rate": 0.00010474151267237408, "loss": 0.8407, "step": 10239 }, { "epoch": 0.4851930822080076, "grad_norm": 0.76953125, "learning_rate": 0.00010472663677623009, "loss": 0.8124, "step": 10240 }, { "epoch": 0.48524046434494195, "grad_norm": 0.220703125, "learning_rate": 0.00010471176077525428, "loss": 0.1401, "step": 10241 }, { "epoch": 0.48528784648187634, "grad_norm": 0.69140625, "learning_rate": 0.00010469688466977667, "loss": 0.8301, "step": 10242 }, { "epoch": 0.48533522861881073, "grad_norm": 0.62109375, "learning_rate": 0.00010468200846012717, "loss": 0.8308, "step": 10243 }, { "epoch": 0.48538261075574507, "grad_norm": 0.6484375, "learning_rate": 0.00010466713214663568, "loss": 1.3149, "step": 10244 }, { "epoch": 0.48542999289267946, "grad_norm": 0.5859375, "learning_rate": 0.00010465225572963217, "loss": 0.7159, "step": 10245 }, { "epoch": 0.48547737502961386, "grad_norm": 0.91015625, "learning_rate": 0.00010463737920944663, "loss": 1.0653, "step": 10246 }, { "epoch": 0.4855247571665482, "grad_norm": 0.8671875, "learning_rate": 0.00010462250258640891, "loss": 0.9131, "step": 10247 }, { "epoch": 0.4855721393034826, "grad_norm": 0.78125, "learning_rate": 0.00010460762586084905, "loss": 0.8464, "step": 10248 }, { "epoch": 0.485619521440417, "grad_norm": 0.466796875, "learning_rate": 0.0001045927490330969, "loss": 0.634, "step": 10249 }, { "epoch": 0.4856669035773513, "grad_norm": 0.953125, "learning_rate": 0.00010457787210348251, "loss": 1.1345, "step": 10250 }, { "epoch": 0.4857142857142857, "grad_norm": 0.55078125, "learning_rate": 0.00010456299507233577, "loss": 0.3389, "step": 10251 }, { "epoch": 0.4857616678512201, "grad_norm": 0.640625, "learning_rate": 0.00010454811793998666, "loss": 0.445, "step": 10252 }, { "epoch": 0.48580904998815444, "grad_norm": 0.6875, "learning_rate": 0.00010453324070676516, "loss": 0.9121, "step": 10253 }, { "epoch": 0.48585643212508883, "grad_norm": 0.5625, "learning_rate": 0.00010451836337300121, "loss": 0.838, "step": 10254 }, { "epoch": 0.4859038142620232, "grad_norm": 0.82421875, "learning_rate": 0.00010450348593902475, "loss": 0.995, "step": 10255 }, { "epoch": 0.4859511963989576, "grad_norm": 0.5390625, "learning_rate": 0.00010448860840516579, "loss": 0.5999, "step": 10256 }, { "epoch": 0.48599857853589196, "grad_norm": 1.1796875, "learning_rate": 0.00010447373077175427, "loss": 0.3551, "step": 10257 }, { "epoch": 0.48604596067282635, "grad_norm": 0.0269775390625, "learning_rate": 0.00010445885303912017, "loss": 0.0014, "step": 10258 }, { "epoch": 0.48609334280976074, "grad_norm": 0.51171875, "learning_rate": 0.00010444397520759343, "loss": 0.9227, "step": 10259 }, { "epoch": 0.4861407249466951, "grad_norm": 0.52734375, "learning_rate": 0.00010442909727750407, "loss": 1.0303, "step": 10260 }, { "epoch": 0.4861881070836295, "grad_norm": 0.08056640625, "learning_rate": 0.00010441421924918209, "loss": 0.0103, "step": 10261 }, { "epoch": 0.48623548922056387, "grad_norm": 0.1630859375, "learning_rate": 0.00010439934112295737, "loss": 0.0176, "step": 10262 }, { "epoch": 0.4862828713574982, "grad_norm": 0.020263671875, "learning_rate": 0.00010438446289916, "loss": 0.0008, "step": 10263 }, { "epoch": 0.4863302534944326, "grad_norm": 0.57421875, "learning_rate": 0.00010436958457811987, "loss": 0.5984, "step": 10264 }, { "epoch": 0.486377635631367, "grad_norm": 0.80078125, "learning_rate": 0.00010435470616016703, "loss": 1.3061, "step": 10265 }, { "epoch": 0.4864250177683013, "grad_norm": 0.59375, "learning_rate": 0.00010433982764563146, "loss": 0.8373, "step": 10266 }, { "epoch": 0.4864723999052357, "grad_norm": 0.67578125, "learning_rate": 0.00010432494903484312, "loss": 0.8359, "step": 10267 }, { "epoch": 0.4865197820421701, "grad_norm": 0.4140625, "learning_rate": 0.00010431007032813199, "loss": 0.0201, "step": 10268 }, { "epoch": 0.48656716417910445, "grad_norm": 0.5390625, "learning_rate": 0.00010429519152582812, "loss": 0.7844, "step": 10269 }, { "epoch": 0.48661454631603884, "grad_norm": 0.5859375, "learning_rate": 0.00010428031262826148, "loss": 1.022, "step": 10270 }, { "epoch": 0.48666192845297324, "grad_norm": 0.65234375, "learning_rate": 0.00010426543363576207, "loss": 0.8896, "step": 10271 }, { "epoch": 0.48670931058990763, "grad_norm": 0.53125, "learning_rate": 0.00010425055454865987, "loss": 1.1148, "step": 10272 }, { "epoch": 0.48675669272684197, "grad_norm": 0.86328125, "learning_rate": 0.00010423567536728489, "loss": 0.8805, "step": 10273 }, { "epoch": 0.48680407486377636, "grad_norm": 0.55859375, "learning_rate": 0.00010422079609196716, "loss": 0.0896, "step": 10274 }, { "epoch": 0.48685145700071075, "grad_norm": 0.68359375, "learning_rate": 0.00010420591672303666, "loss": 1.0757, "step": 10275 }, { "epoch": 0.4868988391376451, "grad_norm": 0.78125, "learning_rate": 0.00010419103726082339, "loss": 1.2818, "step": 10276 }, { "epoch": 0.4869462212745795, "grad_norm": 0.546875, "learning_rate": 0.00010417615770565741, "loss": 0.677, "step": 10277 }, { "epoch": 0.4869936034115139, "grad_norm": 0.322265625, "learning_rate": 0.00010416127805786869, "loss": 0.0657, "step": 10278 }, { "epoch": 0.4870409855484482, "grad_norm": 0.515625, "learning_rate": 0.00010414639831778726, "loss": 0.7278, "step": 10279 }, { "epoch": 0.4870883676853826, "grad_norm": 0.57421875, "learning_rate": 0.00010413151848574309, "loss": 0.5555, "step": 10280 }, { "epoch": 0.487135749822317, "grad_norm": 0.74609375, "learning_rate": 0.0001041166385620663, "loss": 1.2327, "step": 10281 }, { "epoch": 0.48718313195925134, "grad_norm": 0.1279296875, "learning_rate": 0.00010410175854708681, "loss": 0.0819, "step": 10282 }, { "epoch": 0.48723051409618573, "grad_norm": 0.85546875, "learning_rate": 0.0001040868784411347, "loss": 0.9043, "step": 10283 }, { "epoch": 0.4872778962331201, "grad_norm": 0.55078125, "learning_rate": 0.00010407199824454, "loss": 0.2155, "step": 10284 }, { "epoch": 0.4873252783700545, "grad_norm": 0.0986328125, "learning_rate": 0.00010405711795763269, "loss": 0.0108, "step": 10285 }, { "epoch": 0.48737266050698885, "grad_norm": 0.2216796875, "learning_rate": 0.0001040422375807428, "loss": 0.0133, "step": 10286 }, { "epoch": 0.48742004264392325, "grad_norm": 0.67578125, "learning_rate": 0.00010402735711420045, "loss": 0.7902, "step": 10287 }, { "epoch": 0.48746742478085764, "grad_norm": 0.68359375, "learning_rate": 0.00010401247655833555, "loss": 1.0957, "step": 10288 }, { "epoch": 0.487514806917792, "grad_norm": 0.05810546875, "learning_rate": 0.00010399759591347825, "loss": 0.0049, "step": 10289 }, { "epoch": 0.48756218905472637, "grad_norm": 0.376953125, "learning_rate": 0.00010398271517995848, "loss": 0.0964, "step": 10290 }, { "epoch": 0.48760957119166076, "grad_norm": 0.77734375, "learning_rate": 0.00010396783435810636, "loss": 0.9047, "step": 10291 }, { "epoch": 0.4876569533285951, "grad_norm": 0.173828125, "learning_rate": 0.00010395295344825189, "loss": 0.0149, "step": 10292 }, { "epoch": 0.4877043354655295, "grad_norm": 0.62109375, "learning_rate": 0.00010393807245072511, "loss": 1.1522, "step": 10293 }, { "epoch": 0.4877517176024639, "grad_norm": 0.09619140625, "learning_rate": 0.00010392319136585609, "loss": 0.0133, "step": 10294 }, { "epoch": 0.4877990997393982, "grad_norm": 0.1474609375, "learning_rate": 0.00010390831019397487, "loss": 0.0323, "step": 10295 }, { "epoch": 0.4878464818763326, "grad_norm": 0.63671875, "learning_rate": 0.0001038934289354115, "loss": 0.9189, "step": 10296 }, { "epoch": 0.487893864013267, "grad_norm": 0.70703125, "learning_rate": 0.000103878547590496, "loss": 1.0949, "step": 10297 }, { "epoch": 0.48794124615020135, "grad_norm": 0.609375, "learning_rate": 0.00010386366615955844, "loss": 1.1003, "step": 10298 }, { "epoch": 0.48798862828713574, "grad_norm": 0.2119140625, "learning_rate": 0.0001038487846429289, "loss": 0.0741, "step": 10299 }, { "epoch": 0.48803601042407013, "grad_norm": 0.546875, "learning_rate": 0.00010383390304093743, "loss": 1.0395, "step": 10300 }, { "epoch": 0.4880833925610045, "grad_norm": 0.2392578125, "learning_rate": 0.00010381902135391404, "loss": 0.0056, "step": 10301 }, { "epoch": 0.48813077469793886, "grad_norm": 0.54296875, "learning_rate": 0.00010380413958218884, "loss": 1.0296, "step": 10302 }, { "epoch": 0.48817815683487326, "grad_norm": 0.53515625, "learning_rate": 0.0001037892577260919, "loss": 0.4365, "step": 10303 }, { "epoch": 0.48822553897180765, "grad_norm": 0.6953125, "learning_rate": 0.00010377437578595323, "loss": 1.3547, "step": 10304 }, { "epoch": 0.488272921108742, "grad_norm": 0.6875, "learning_rate": 0.00010375949376210295, "loss": 0.6985, "step": 10305 }, { "epoch": 0.4883203032456764, "grad_norm": 0.95703125, "learning_rate": 0.00010374461165487105, "loss": 0.8957, "step": 10306 }, { "epoch": 0.48836768538261077, "grad_norm": 0.7421875, "learning_rate": 0.00010372972946458772, "loss": 1.1189, "step": 10307 }, { "epoch": 0.4884150675195451, "grad_norm": 0.166015625, "learning_rate": 0.00010371484719158291, "loss": 0.007, "step": 10308 }, { "epoch": 0.4884624496564795, "grad_norm": 0.60546875, "learning_rate": 0.00010369996483618676, "loss": 0.8763, "step": 10309 }, { "epoch": 0.4885098317934139, "grad_norm": 0.58203125, "learning_rate": 0.00010368508239872934, "loss": 0.9605, "step": 10310 }, { "epoch": 0.48855721393034823, "grad_norm": 0.000507354736328125, "learning_rate": 0.00010367019987954072, "loss": 0.0, "step": 10311 }, { "epoch": 0.4886045960672826, "grad_norm": 0.703125, "learning_rate": 0.00010365531727895099, "loss": 1.1541, "step": 10312 }, { "epoch": 0.488651978204217, "grad_norm": 0.60546875, "learning_rate": 0.00010364043459729018, "loss": 0.5847, "step": 10313 }, { "epoch": 0.4886993603411514, "grad_norm": 0.87109375, "learning_rate": 0.00010362555183488842, "loss": 0.7224, "step": 10314 }, { "epoch": 0.48874674247808575, "grad_norm": 0.56640625, "learning_rate": 0.00010361066899207581, "loss": 0.724, "step": 10315 }, { "epoch": 0.48879412461502014, "grad_norm": 0.046142578125, "learning_rate": 0.00010359578606918236, "loss": 0.0045, "step": 10316 }, { "epoch": 0.48884150675195454, "grad_norm": 0.154296875, "learning_rate": 0.00010358090306653822, "loss": 0.1146, "step": 10317 }, { "epoch": 0.4888888888888889, "grad_norm": 0.625, "learning_rate": 0.00010356601998447351, "loss": 1.2059, "step": 10318 }, { "epoch": 0.48893627102582327, "grad_norm": 0.4296875, "learning_rate": 0.00010355113682331824, "loss": 0.433, "step": 10319 }, { "epoch": 0.48898365316275766, "grad_norm": 0.6484375, "learning_rate": 0.00010353625358340253, "loss": 0.9884, "step": 10320 }, { "epoch": 0.489031035299692, "grad_norm": 0.61328125, "learning_rate": 0.00010352137026505649, "loss": 0.9016, "step": 10321 }, { "epoch": 0.4890784174366264, "grad_norm": 0.052978515625, "learning_rate": 0.0001035064868686102, "loss": 0.0052, "step": 10322 }, { "epoch": 0.4891257995735608, "grad_norm": 0.8203125, "learning_rate": 0.00010349160339439379, "loss": 1.0137, "step": 10323 }, { "epoch": 0.4891731817104951, "grad_norm": 0.59765625, "learning_rate": 0.00010347671984273731, "loss": 1.1278, "step": 10324 }, { "epoch": 0.4892205638474295, "grad_norm": 0.734375, "learning_rate": 0.00010346183621397087, "loss": 0.0816, "step": 10325 }, { "epoch": 0.4892679459843639, "grad_norm": 0.53515625, "learning_rate": 0.00010344695250842464, "loss": 0.5045, "step": 10326 }, { "epoch": 0.48931532812129824, "grad_norm": 0.01312255859375, "learning_rate": 0.00010343206872642865, "loss": 0.0006, "step": 10327 }, { "epoch": 0.48936271025823264, "grad_norm": 0.458984375, "learning_rate": 0.00010341718486831304, "loss": 0.6119, "step": 10328 }, { "epoch": 0.48941009239516703, "grad_norm": 0.61328125, "learning_rate": 0.0001034023009344079, "loss": 1.2241, "step": 10329 }, { "epoch": 0.4894574745321014, "grad_norm": 0.65625, "learning_rate": 0.00010338741692504336, "loss": 0.8122, "step": 10330 }, { "epoch": 0.48950485666903576, "grad_norm": 0.5703125, "learning_rate": 0.00010337253284054951, "loss": 1.0274, "step": 10331 }, { "epoch": 0.48955223880597015, "grad_norm": 0.41796875, "learning_rate": 0.00010335764868125646, "loss": 0.1195, "step": 10332 }, { "epoch": 0.48959962094290455, "grad_norm": 0.640625, "learning_rate": 0.00010334276444749436, "loss": 1.2966, "step": 10333 }, { "epoch": 0.4896470030798389, "grad_norm": 0.7265625, "learning_rate": 0.00010332788013959331, "loss": 0.8606, "step": 10334 }, { "epoch": 0.4896943852167733, "grad_norm": 0.1416015625, "learning_rate": 0.00010331299575788342, "loss": 0.0228, "step": 10335 }, { "epoch": 0.48974176735370767, "grad_norm": 0.41796875, "learning_rate": 0.0001032981113026948, "loss": 0.0618, "step": 10336 }, { "epoch": 0.489789149490642, "grad_norm": 0.19921875, "learning_rate": 0.00010328322677435757, "loss": 0.024, "step": 10337 }, { "epoch": 0.4898365316275764, "grad_norm": 0.6328125, "learning_rate": 0.00010326834217320191, "loss": 1.2858, "step": 10338 }, { "epoch": 0.4898839137645108, "grad_norm": 0.173828125, "learning_rate": 0.00010325345749955789, "loss": 0.1277, "step": 10339 }, { "epoch": 0.48993129590144513, "grad_norm": 0.1796875, "learning_rate": 0.00010323857275375561, "loss": 0.1292, "step": 10340 }, { "epoch": 0.4899786780383795, "grad_norm": 0.57421875, "learning_rate": 0.00010322368793612529, "loss": 0.6946, "step": 10341 }, { "epoch": 0.4900260601753139, "grad_norm": 0.4765625, "learning_rate": 0.00010320880304699699, "loss": 0.8295, "step": 10342 }, { "epoch": 0.4900734423122483, "grad_norm": 0.75390625, "learning_rate": 0.00010319391808670082, "loss": 0.8974, "step": 10343 }, { "epoch": 0.49012082444918265, "grad_norm": 0.224609375, "learning_rate": 0.00010317903305556697, "loss": 0.0449, "step": 10344 }, { "epoch": 0.49016820658611704, "grad_norm": 0.515625, "learning_rate": 0.00010316414795392555, "loss": 0.8658, "step": 10345 }, { "epoch": 0.49021558872305143, "grad_norm": 0.00701904296875, "learning_rate": 0.00010314926278210673, "loss": 0.0005, "step": 10346 }, { "epoch": 0.49026297085998577, "grad_norm": 0.7265625, "learning_rate": 0.00010313437754044058, "loss": 0.6892, "step": 10347 }, { "epoch": 0.49031035299692016, "grad_norm": 0.68359375, "learning_rate": 0.00010311949222925727, "loss": 0.3972, "step": 10348 }, { "epoch": 0.49035773513385456, "grad_norm": 0.66796875, "learning_rate": 0.00010310460684888699, "loss": 1.0022, "step": 10349 }, { "epoch": 0.4904051172707889, "grad_norm": 0.66015625, "learning_rate": 0.00010308972139965982, "loss": 1.0487, "step": 10350 }, { "epoch": 0.4904524994077233, "grad_norm": 0.71875, "learning_rate": 0.0001030748358819059, "loss": 0.9667, "step": 10351 }, { "epoch": 0.4904998815446577, "grad_norm": 0.1103515625, "learning_rate": 0.00010305995029595539, "loss": 0.0107, "step": 10352 }, { "epoch": 0.490547263681592, "grad_norm": 0.68359375, "learning_rate": 0.00010304506464213848, "loss": 1.1267, "step": 10353 }, { "epoch": 0.4905946458185264, "grad_norm": 0.2060546875, "learning_rate": 0.00010303017892078523, "loss": 0.0282, "step": 10354 }, { "epoch": 0.4906420279554608, "grad_norm": 0.6328125, "learning_rate": 0.00010301529313222587, "loss": 0.6471, "step": 10355 }, { "epoch": 0.49068941009239514, "grad_norm": 0.5625, "learning_rate": 0.0001030004072767905, "loss": 0.9969, "step": 10356 }, { "epoch": 0.49073679222932953, "grad_norm": 0.087890625, "learning_rate": 0.00010298552135480932, "loss": 0.0024, "step": 10357 }, { "epoch": 0.4907841743662639, "grad_norm": 0.498046875, "learning_rate": 0.00010297063536661247, "loss": 0.7482, "step": 10358 }, { "epoch": 0.4908315565031983, "grad_norm": 1.015625, "learning_rate": 0.00010295574931253005, "loss": 0.8796, "step": 10359 }, { "epoch": 0.49087893864013266, "grad_norm": 0.439453125, "learning_rate": 0.00010294086319289227, "loss": 0.4574, "step": 10360 }, { "epoch": 0.49092632077706705, "grad_norm": 0.57421875, "learning_rate": 0.00010292597700802928, "loss": 1.0798, "step": 10361 }, { "epoch": 0.49097370291400144, "grad_norm": 0.875, "learning_rate": 0.00010291109075827124, "loss": 1.0982, "step": 10362 }, { "epoch": 0.4910210850509358, "grad_norm": 0.490234375, "learning_rate": 0.0001028962044439483, "loss": 0.7837, "step": 10363 }, { "epoch": 0.4910684671878702, "grad_norm": 0.80078125, "learning_rate": 0.00010288131806539063, "loss": 0.7936, "step": 10364 }, { "epoch": 0.49111584932480457, "grad_norm": 0.1572265625, "learning_rate": 0.00010286643162292841, "loss": 0.0107, "step": 10365 }, { "epoch": 0.4911632314617389, "grad_norm": 0.52734375, "learning_rate": 0.00010285154511689179, "loss": 0.0806, "step": 10366 }, { "epoch": 0.4912106135986733, "grad_norm": 0.75390625, "learning_rate": 0.00010283665854761091, "loss": 1.3351, "step": 10367 }, { "epoch": 0.4912579957356077, "grad_norm": 0.796875, "learning_rate": 0.00010282177191541597, "loss": 0.9226, "step": 10368 }, { "epoch": 0.491305377872542, "grad_norm": 0.0238037109375, "learning_rate": 0.00010280688522063717, "loss": 0.001, "step": 10369 }, { "epoch": 0.4913527600094764, "grad_norm": 0.60546875, "learning_rate": 0.00010279199846360461, "loss": 1.0816, "step": 10370 }, { "epoch": 0.4914001421464108, "grad_norm": 0.62890625, "learning_rate": 0.00010277711164464852, "loss": 1.2541, "step": 10371 }, { "epoch": 0.4914475242833452, "grad_norm": 0.859375, "learning_rate": 0.00010276222476409905, "loss": 0.6041, "step": 10372 }, { "epoch": 0.49149490642027954, "grad_norm": 0.75390625, "learning_rate": 0.00010274733782228638, "loss": 1.2356, "step": 10373 }, { "epoch": 0.49154228855721394, "grad_norm": 0.734375, "learning_rate": 0.00010273245081954067, "loss": 1.1031, "step": 10374 }, { "epoch": 0.49158967069414833, "grad_norm": 0.50390625, "learning_rate": 0.00010271756375619208, "loss": 0.7845, "step": 10375 }, { "epoch": 0.49163705283108267, "grad_norm": 0.51953125, "learning_rate": 0.00010270267663257088, "loss": 0.7275, "step": 10376 }, { "epoch": 0.49168443496801706, "grad_norm": 0.73828125, "learning_rate": 0.00010268778944900714, "loss": 0.8636, "step": 10377 }, { "epoch": 0.49173181710495145, "grad_norm": 0.625, "learning_rate": 0.00010267290220583112, "loss": 0.8971, "step": 10378 }, { "epoch": 0.4917791992418858, "grad_norm": 0.052490234375, "learning_rate": 0.00010265801490337296, "loss": 0.0027, "step": 10379 }, { "epoch": 0.4918265813788202, "grad_norm": 0.318359375, "learning_rate": 0.0001026431275419629, "loss": 0.0894, "step": 10380 }, { "epoch": 0.4918739635157546, "grad_norm": 0.57421875, "learning_rate": 0.00010262824012193107, "loss": 0.9239, "step": 10381 }, { "epoch": 0.4919213456526889, "grad_norm": 0.91796875, "learning_rate": 0.00010261335264360765, "loss": 1.1283, "step": 10382 }, { "epoch": 0.4919687277896233, "grad_norm": 0.75, "learning_rate": 0.00010259846510732288, "loss": 1.5949, "step": 10383 }, { "epoch": 0.4920161099265577, "grad_norm": 0.65234375, "learning_rate": 0.0001025835775134069, "loss": 1.0749, "step": 10384 }, { "epoch": 0.49206349206349204, "grad_norm": 0.318359375, "learning_rate": 0.00010256868986218993, "loss": 0.1287, "step": 10385 }, { "epoch": 0.49211087420042643, "grad_norm": 0.81640625, "learning_rate": 0.00010255380215400215, "loss": 0.9982, "step": 10386 }, { "epoch": 0.4921582563373608, "grad_norm": 0.68359375, "learning_rate": 0.00010253891438917375, "loss": 1.1091, "step": 10387 }, { "epoch": 0.4922056384742952, "grad_norm": 0.77734375, "learning_rate": 0.00010252402656803497, "loss": 0.9044, "step": 10388 }, { "epoch": 0.49225302061122955, "grad_norm": 0.70703125, "learning_rate": 0.00010250913869091593, "loss": 1.4099, "step": 10389 }, { "epoch": 0.49230040274816395, "grad_norm": 0.6953125, "learning_rate": 0.0001024942507581469, "loss": 1.1162, "step": 10390 }, { "epoch": 0.49234778488509834, "grad_norm": 0.6328125, "learning_rate": 0.000102479362770058, "loss": 0.086, "step": 10391 }, { "epoch": 0.4923951670220327, "grad_norm": 0.05859375, "learning_rate": 0.00010246447472697953, "loss": 0.0024, "step": 10392 }, { "epoch": 0.49244254915896707, "grad_norm": 0.5625, "learning_rate": 0.0001024495866292416, "loss": 1.1615, "step": 10393 }, { "epoch": 0.49248993129590146, "grad_norm": 0.69140625, "learning_rate": 0.00010243469847717444, "loss": 0.6211, "step": 10394 }, { "epoch": 0.4925373134328358, "grad_norm": 0.65234375, "learning_rate": 0.0001024198102711083, "loss": 1.0741, "step": 10395 }, { "epoch": 0.4925846955697702, "grad_norm": 0.72265625, "learning_rate": 0.0001024049220113733, "loss": 1.2978, "step": 10396 }, { "epoch": 0.4926320777067046, "grad_norm": 0.047607421875, "learning_rate": 0.00010239003369829974, "loss": 0.0049, "step": 10397 }, { "epoch": 0.4926794598436389, "grad_norm": 0.6640625, "learning_rate": 0.00010237514533221774, "loss": 0.6888, "step": 10398 }, { "epoch": 0.4927268419805733, "grad_norm": 0.265625, "learning_rate": 0.00010236025691345757, "loss": 0.1765, "step": 10399 }, { "epoch": 0.4927742241175077, "grad_norm": 0.67578125, "learning_rate": 0.00010234536844234941, "loss": 1.2865, "step": 10400 }, { "epoch": 0.4928216062544421, "grad_norm": 0.7109375, "learning_rate": 0.00010233047991922344, "loss": 0.7992, "step": 10401 }, { "epoch": 0.49286898839137644, "grad_norm": 0.671875, "learning_rate": 0.00010231559134440993, "loss": 1.3444, "step": 10402 }, { "epoch": 0.49291637052831083, "grad_norm": 0.84765625, "learning_rate": 0.0001023007027182391, "loss": 1.0896, "step": 10403 }, { "epoch": 0.4929637526652452, "grad_norm": 0.296875, "learning_rate": 0.00010228581404104112, "loss": 0.0345, "step": 10404 }, { "epoch": 0.49301113480217956, "grad_norm": 0.251953125, "learning_rate": 0.00010227092531314621, "loss": 0.0355, "step": 10405 }, { "epoch": 0.49305851693911396, "grad_norm": 0.6015625, "learning_rate": 0.0001022560365348846, "loss": 1.055, "step": 10406 }, { "epoch": 0.49310589907604835, "grad_norm": 0.8046875, "learning_rate": 0.0001022411477065865, "loss": 1.2288, "step": 10407 }, { "epoch": 0.4931532812129827, "grad_norm": 0.7265625, "learning_rate": 0.00010222625882858212, "loss": 0.9305, "step": 10408 }, { "epoch": 0.4932006633499171, "grad_norm": 0.00274658203125, "learning_rate": 0.0001022113699012017, "loss": 0.0002, "step": 10409 }, { "epoch": 0.49324804548685147, "grad_norm": 0.59765625, "learning_rate": 0.00010219648092477545, "loss": 0.8266, "step": 10410 }, { "epoch": 0.4932954276237858, "grad_norm": 0.56640625, "learning_rate": 0.00010218159189963361, "loss": 0.9723, "step": 10411 }, { "epoch": 0.4933428097607202, "grad_norm": 0.4765625, "learning_rate": 0.00010216670282610637, "loss": 0.2313, "step": 10412 }, { "epoch": 0.4933901918976546, "grad_norm": 0.546875, "learning_rate": 0.00010215181370452399, "loss": 0.5898, "step": 10413 }, { "epoch": 0.49343757403458893, "grad_norm": 0.57421875, "learning_rate": 0.00010213692453521661, "loss": 0.8182, "step": 10414 }, { "epoch": 0.4934849561715233, "grad_norm": 0.435546875, "learning_rate": 0.00010212203531851459, "loss": 0.0812, "step": 10415 }, { "epoch": 0.4935323383084577, "grad_norm": 0.76953125, "learning_rate": 0.00010210714605474805, "loss": 0.2392, "step": 10416 }, { "epoch": 0.4935797204453921, "grad_norm": 1.03125, "learning_rate": 0.00010209225674424727, "loss": 0.1132, "step": 10417 }, { "epoch": 0.49362710258232645, "grad_norm": 0.6953125, "learning_rate": 0.00010207736738734246, "loss": 0.7996, "step": 10418 }, { "epoch": 0.49367448471926084, "grad_norm": 0.17578125, "learning_rate": 0.00010206247798436385, "loss": 0.0155, "step": 10419 }, { "epoch": 0.49372186685619524, "grad_norm": 1.203125, "learning_rate": 0.00010204758853564167, "loss": 0.3139, "step": 10420 }, { "epoch": 0.4937692489931296, "grad_norm": 0.71875, "learning_rate": 0.00010203269904150619, "loss": 0.546, "step": 10421 }, { "epoch": 0.49381663113006397, "grad_norm": 2.359375, "learning_rate": 0.00010201780950228759, "loss": 1.3112, "step": 10422 }, { "epoch": 0.49386401326699836, "grad_norm": 0.55078125, "learning_rate": 0.00010200291991831611, "loss": 0.8465, "step": 10423 }, { "epoch": 0.4939113954039327, "grad_norm": 0.462890625, "learning_rate": 0.000101988030289922, "loss": 0.4812, "step": 10424 }, { "epoch": 0.4939587775408671, "grad_norm": 0.76953125, "learning_rate": 0.00010197314061743551, "loss": 1.0264, "step": 10425 }, { "epoch": 0.4940061596778015, "grad_norm": 0.83203125, "learning_rate": 0.00010195825090118687, "loss": 0.8086, "step": 10426 }, { "epoch": 0.4940535418147358, "grad_norm": 2.34375, "learning_rate": 0.00010194336114150629, "loss": 2.0573, "step": 10427 }, { "epoch": 0.4941009239516702, "grad_norm": 0.0830078125, "learning_rate": 0.00010192847133872405, "loss": 0.0087, "step": 10428 }, { "epoch": 0.4941483060886046, "grad_norm": 0.61328125, "learning_rate": 0.00010191358149317036, "loss": 0.6567, "step": 10429 }, { "epoch": 0.494195688225539, "grad_norm": 0.578125, "learning_rate": 0.00010189869160517549, "loss": 0.5986, "step": 10430 }, { "epoch": 0.49424307036247334, "grad_norm": 0.216796875, "learning_rate": 0.00010188380167506963, "loss": 0.0313, "step": 10431 }, { "epoch": 0.49429045249940773, "grad_norm": 0.5078125, "learning_rate": 0.00010186891170318306, "loss": 0.4554, "step": 10432 }, { "epoch": 0.4943378346363421, "grad_norm": 0.037353515625, "learning_rate": 0.00010185402168984601, "loss": 0.0037, "step": 10433 }, { "epoch": 0.49438521677327646, "grad_norm": 0.76171875, "learning_rate": 0.00010183913163538876, "loss": 0.8794, "step": 10434 }, { "epoch": 0.49443259891021085, "grad_norm": 0.8046875, "learning_rate": 0.00010182424154014152, "loss": 1.1408, "step": 10435 }, { "epoch": 0.49447998104714525, "grad_norm": 0.380859375, "learning_rate": 0.00010180935140443458, "loss": 0.1155, "step": 10436 }, { "epoch": 0.4945273631840796, "grad_norm": 0.65625, "learning_rate": 0.0001017944612285981, "loss": 1.2385, "step": 10437 }, { "epoch": 0.494574745321014, "grad_norm": 0.58203125, "learning_rate": 0.00010177957101296241, "loss": 0.7973, "step": 10438 }, { "epoch": 0.49462212745794837, "grad_norm": 0.59375, "learning_rate": 0.00010176468075785773, "loss": 1.2734, "step": 10439 }, { "epoch": 0.4946695095948827, "grad_norm": 0.77734375, "learning_rate": 0.00010174979046361429, "loss": 1.0335, "step": 10440 }, { "epoch": 0.4947168917318171, "grad_norm": 0.6640625, "learning_rate": 0.00010173490013056237, "loss": 0.7154, "step": 10441 }, { "epoch": 0.4947642738687515, "grad_norm": 0.828125, "learning_rate": 0.00010172000975903221, "loss": 1.1064, "step": 10442 }, { "epoch": 0.49481165600568583, "grad_norm": 0.1455078125, "learning_rate": 0.00010170511934935408, "loss": 0.0372, "step": 10443 }, { "epoch": 0.4948590381426202, "grad_norm": 0.1611328125, "learning_rate": 0.0001016902289018582, "loss": 0.1204, "step": 10444 }, { "epoch": 0.4949064202795546, "grad_norm": 0.75390625, "learning_rate": 0.00010167533841687486, "loss": 0.984, "step": 10445 }, { "epoch": 0.494953802416489, "grad_norm": 0.09619140625, "learning_rate": 0.00010166044789473431, "loss": 0.0056, "step": 10446 }, { "epoch": 0.49500118455342335, "grad_norm": 0.62890625, "learning_rate": 0.00010164555733576675, "loss": 0.6282, "step": 10447 }, { "epoch": 0.49504856669035774, "grad_norm": 0.60546875, "learning_rate": 0.00010163066674030251, "loss": 1.0109, "step": 10448 }, { "epoch": 0.49509594882729213, "grad_norm": 0.63671875, "learning_rate": 0.00010161577610867183, "loss": 1.0961, "step": 10449 }, { "epoch": 0.49514333096422647, "grad_norm": 0.7734375, "learning_rate": 0.00010160088544120494, "loss": 0.9837, "step": 10450 }, { "epoch": 0.49519071310116086, "grad_norm": 0.71875, "learning_rate": 0.00010158599473823209, "loss": 1.0436, "step": 10451 }, { "epoch": 0.49523809523809526, "grad_norm": 0.4921875, "learning_rate": 0.00010157110400008363, "loss": 0.9952, "step": 10452 }, { "epoch": 0.4952854773750296, "grad_norm": 0.62890625, "learning_rate": 0.00010155621322708973, "loss": 1.1686, "step": 10453 }, { "epoch": 0.495332859511964, "grad_norm": 0.64453125, "learning_rate": 0.00010154132241958065, "loss": 1.1098, "step": 10454 }, { "epoch": 0.4953802416488984, "grad_norm": 0.6328125, "learning_rate": 0.0001015264315778867, "loss": 0.4043, "step": 10455 }, { "epoch": 0.4954276237858327, "grad_norm": 1.171875, "learning_rate": 0.00010151154070233813, "loss": 1.1451, "step": 10456 }, { "epoch": 0.4954750059227671, "grad_norm": 0.65625, "learning_rate": 0.00010149664979326518, "loss": 0.969, "step": 10457 }, { "epoch": 0.4955223880597015, "grad_norm": 0.69140625, "learning_rate": 0.00010148175885099815, "loss": 0.9108, "step": 10458 }, { "epoch": 0.4955697701966359, "grad_norm": 0.734375, "learning_rate": 0.00010146686787586728, "loss": 0.5957, "step": 10459 }, { "epoch": 0.49561715233357023, "grad_norm": 0.625, "learning_rate": 0.00010145197686820285, "loss": 1.1212, "step": 10460 }, { "epoch": 0.4956645344705046, "grad_norm": 0.64453125, "learning_rate": 0.00010143708582833513, "loss": 0.6518, "step": 10461 }, { "epoch": 0.495711916607439, "grad_norm": 0.515625, "learning_rate": 0.00010142219475659436, "loss": 0.4941, "step": 10462 }, { "epoch": 0.49575929874437336, "grad_norm": 1.625, "learning_rate": 0.00010140730365331082, "loss": 1.5096, "step": 10463 }, { "epoch": 0.49580668088130775, "grad_norm": 0.28515625, "learning_rate": 0.0001013924125188148, "loss": 0.0465, "step": 10464 }, { "epoch": 0.49585406301824214, "grad_norm": 0.169921875, "learning_rate": 0.00010137752135343653, "loss": 0.0188, "step": 10465 }, { "epoch": 0.4959014451551765, "grad_norm": 0.609375, "learning_rate": 0.0001013626301575063, "loss": 1.0475, "step": 10466 }, { "epoch": 0.49594882729211087, "grad_norm": 0.6015625, "learning_rate": 0.00010134773893135438, "loss": 0.5929, "step": 10467 }, { "epoch": 0.49599620942904526, "grad_norm": 0.54296875, "learning_rate": 0.0001013328476753111, "loss": 0.4285, "step": 10468 }, { "epoch": 0.4960435915659796, "grad_norm": 0.333984375, "learning_rate": 0.00010131795638970665, "loss": 0.1665, "step": 10469 }, { "epoch": 0.496090973702914, "grad_norm": 0.68359375, "learning_rate": 0.00010130306507487133, "loss": 0.8403, "step": 10470 }, { "epoch": 0.4961383558398484, "grad_norm": 0.61328125, "learning_rate": 0.00010128817373113539, "loss": 0.9496, "step": 10471 }, { "epoch": 0.4961857379767827, "grad_norm": 0.50390625, "learning_rate": 0.00010127328235882915, "loss": 0.7994, "step": 10472 }, { "epoch": 0.4962331201137171, "grad_norm": 0.21484375, "learning_rate": 0.00010125839095828286, "loss": 0.0093, "step": 10473 }, { "epoch": 0.4962805022506515, "grad_norm": 0.53515625, "learning_rate": 0.00010124349952982681, "loss": 0.5962, "step": 10474 }, { "epoch": 0.4963278843875859, "grad_norm": 0.0986328125, "learning_rate": 0.00010122860807379127, "loss": 0.0048, "step": 10475 }, { "epoch": 0.49637526652452024, "grad_norm": 0.2021484375, "learning_rate": 0.00010121371659050652, "loss": 0.0295, "step": 10476 }, { "epoch": 0.49642264866145464, "grad_norm": 0.72265625, "learning_rate": 0.00010119882508030279, "loss": 0.7112, "step": 10477 }, { "epoch": 0.49647003079838903, "grad_norm": 0.6328125, "learning_rate": 0.0001011839335435104, "loss": 0.5813, "step": 10478 }, { "epoch": 0.49651741293532337, "grad_norm": 0.259765625, "learning_rate": 0.00010116904198045963, "loss": 0.0494, "step": 10479 }, { "epoch": 0.49656479507225776, "grad_norm": 0.044677734375, "learning_rate": 0.00010115415039148078, "loss": 0.0034, "step": 10480 }, { "epoch": 0.49661217720919215, "grad_norm": 0.5390625, "learning_rate": 0.00010113925877690406, "loss": 0.2992, "step": 10481 }, { "epoch": 0.4966595593461265, "grad_norm": 0.51953125, "learning_rate": 0.00010112436713705984, "loss": 0.7335, "step": 10482 }, { "epoch": 0.4967069414830609, "grad_norm": 0.8828125, "learning_rate": 0.00010110947547227834, "loss": 1.1745, "step": 10483 }, { "epoch": 0.4967543236199953, "grad_norm": 0.54296875, "learning_rate": 0.00010109458378288987, "loss": 0.8676, "step": 10484 }, { "epoch": 0.4968017057569296, "grad_norm": 0.1376953125, "learning_rate": 0.00010107969206922465, "loss": 0.0138, "step": 10485 }, { "epoch": 0.496849087893864, "grad_norm": 0.07958984375, "learning_rate": 0.00010106480033161306, "loss": 0.0081, "step": 10486 }, { "epoch": 0.4968964700307984, "grad_norm": 0.68359375, "learning_rate": 0.0001010499085703853, "loss": 1.1629, "step": 10487 }, { "epoch": 0.4969438521677328, "grad_norm": 0.5546875, "learning_rate": 0.00010103501678587171, "loss": 0.7232, "step": 10488 }, { "epoch": 0.49699123430466713, "grad_norm": 0.74609375, "learning_rate": 0.00010102012497840252, "loss": 1.1733, "step": 10489 }, { "epoch": 0.4970386164416015, "grad_norm": 0.578125, "learning_rate": 0.00010100523314830806, "loss": 0.9117, "step": 10490 }, { "epoch": 0.4970859985785359, "grad_norm": 0.6328125, "learning_rate": 0.00010099034129591862, "loss": 0.9889, "step": 10491 }, { "epoch": 0.49713338071547025, "grad_norm": 0.6328125, "learning_rate": 0.00010097544942156445, "loss": 1.1874, "step": 10492 }, { "epoch": 0.49718076285240465, "grad_norm": 0.6484375, "learning_rate": 0.00010096055752557583, "loss": 0.947, "step": 10493 }, { "epoch": 0.49722814498933904, "grad_norm": 0.328125, "learning_rate": 0.0001009456656082831, "loss": 0.0447, "step": 10494 }, { "epoch": 0.4972755271262734, "grad_norm": 0.28515625, "learning_rate": 0.00010093077367001649, "loss": 0.0465, "step": 10495 }, { "epoch": 0.49732290926320777, "grad_norm": 0.70703125, "learning_rate": 0.00010091588171110631, "loss": 1.3243, "step": 10496 }, { "epoch": 0.49737029140014216, "grad_norm": 0.193359375, "learning_rate": 0.00010090098973188286, "loss": 0.1363, "step": 10497 }, { "epoch": 0.4974176735370765, "grad_norm": 0.765625, "learning_rate": 0.00010088609773267643, "loss": 0.1587, "step": 10498 }, { "epoch": 0.4974650556740109, "grad_norm": 0.1884765625, "learning_rate": 0.0001008712057138173, "loss": 0.0312, "step": 10499 }, { "epoch": 0.4975124378109453, "grad_norm": 1.0859375, "learning_rate": 0.00010085631367563573, "loss": 0.3897, "step": 10500 }, { "epoch": 0.4975598199478796, "grad_norm": 0.1044921875, "learning_rate": 0.00010084142161846202, "loss": 0.0112, "step": 10501 }, { "epoch": 0.497607202084814, "grad_norm": 0.423828125, "learning_rate": 0.0001008265295426265, "loss": 0.7306, "step": 10502 }, { "epoch": 0.4976545842217484, "grad_norm": 0.65234375, "learning_rate": 0.00010081163744845945, "loss": 0.805, "step": 10503 }, { "epoch": 0.4977019663586828, "grad_norm": 0.58984375, "learning_rate": 0.00010079674533629111, "loss": 0.584, "step": 10504 }, { "epoch": 0.49774934849561714, "grad_norm": 0.81640625, "learning_rate": 0.00010078185320645182, "loss": 0.773, "step": 10505 }, { "epoch": 0.49779673063255153, "grad_norm": 0.032470703125, "learning_rate": 0.00010076696105927188, "loss": 0.0028, "step": 10506 }, { "epoch": 0.4978441127694859, "grad_norm": 0.10009765625, "learning_rate": 0.00010075206889508155, "loss": 0.0104, "step": 10507 }, { "epoch": 0.49789149490642026, "grad_norm": 0.1943359375, "learning_rate": 0.00010073717671421113, "loss": 0.1281, "step": 10508 }, { "epoch": 0.49793887704335466, "grad_norm": 1.125, "learning_rate": 0.0001007222845169909, "loss": 0.1277, "step": 10509 }, { "epoch": 0.49798625918028905, "grad_norm": 0.70703125, "learning_rate": 0.0001007073923037512, "loss": 1.2249, "step": 10510 }, { "epoch": 0.4980336413172234, "grad_norm": 0.0257568359375, "learning_rate": 0.00010069250007482227, "loss": 0.002, "step": 10511 }, { "epoch": 0.4980810234541578, "grad_norm": 0.625, "learning_rate": 0.00010067760783053441, "loss": 1.2454, "step": 10512 }, { "epoch": 0.49812840559109217, "grad_norm": 0.6015625, "learning_rate": 0.00010066271557121795, "loss": 1.1108, "step": 10513 }, { "epoch": 0.4981757877280265, "grad_norm": 0.76953125, "learning_rate": 0.00010064782329720317, "loss": 1.4221, "step": 10514 }, { "epoch": 0.4982231698649609, "grad_norm": 0.435546875, "learning_rate": 0.00010063293100882038, "loss": 0.5729, "step": 10515 }, { "epoch": 0.4982705520018953, "grad_norm": 0.21875, "learning_rate": 0.00010061803870639981, "loss": 0.0268, "step": 10516 }, { "epoch": 0.4983179341388297, "grad_norm": 0.392578125, "learning_rate": 0.00010060314639027182, "loss": 0.2148, "step": 10517 }, { "epoch": 0.498365316275764, "grad_norm": 0.349609375, "learning_rate": 0.00010058825406076671, "loss": 0.1574, "step": 10518 }, { "epoch": 0.4984126984126984, "grad_norm": 0.72265625, "learning_rate": 0.0001005733617182147, "loss": 0.1309, "step": 10519 }, { "epoch": 0.4984600805496328, "grad_norm": 0.5, "learning_rate": 0.00010055846936294616, "loss": 0.2592, "step": 10520 }, { "epoch": 0.49850746268656715, "grad_norm": 0.306640625, "learning_rate": 0.00010054357699529138, "loss": 0.1505, "step": 10521 }, { "epoch": 0.49855484482350154, "grad_norm": 0.77734375, "learning_rate": 0.00010052868461558063, "loss": 0.7928, "step": 10522 }, { "epoch": 0.49860222696043593, "grad_norm": 0.23828125, "learning_rate": 0.00010051379222414423, "loss": 0.0357, "step": 10523 }, { "epoch": 0.49864960909737027, "grad_norm": 0.6796875, "learning_rate": 0.00010049889982131244, "loss": 1.3586, "step": 10524 }, { "epoch": 0.49869699123430467, "grad_norm": 0.095703125, "learning_rate": 0.00010048400740741559, "loss": 0.0079, "step": 10525 }, { "epoch": 0.49874437337123906, "grad_norm": 0.734375, "learning_rate": 0.000100469114982784, "loss": 0.4986, "step": 10526 }, { "epoch": 0.4987917555081734, "grad_norm": 0.640625, "learning_rate": 0.0001004542225477479, "loss": 0.6466, "step": 10527 }, { "epoch": 0.4988391376451078, "grad_norm": 0.09033203125, "learning_rate": 0.00010043933010263764, "loss": 0.0078, "step": 10528 }, { "epoch": 0.4988865197820422, "grad_norm": 0.75390625, "learning_rate": 0.0001004244376477835, "loss": 1.187, "step": 10529 }, { "epoch": 0.4989339019189765, "grad_norm": 0.03515625, "learning_rate": 0.0001004095451835158, "loss": 0.0023, "step": 10530 }, { "epoch": 0.4989812840559109, "grad_norm": 0.48046875, "learning_rate": 0.00010039465271016484, "loss": 0.1877, "step": 10531 }, { "epoch": 0.4990286661928453, "grad_norm": 0.67578125, "learning_rate": 0.00010037976022806088, "loss": 1.3169, "step": 10532 }, { "epoch": 0.4990760483297797, "grad_norm": 0.12890625, "learning_rate": 0.00010036486773753427, "loss": 0.0134, "step": 10533 }, { "epoch": 0.49912343046671404, "grad_norm": 0.49609375, "learning_rate": 0.00010034997523891523, "loss": 1.1287, "step": 10534 }, { "epoch": 0.49917081260364843, "grad_norm": 0.83203125, "learning_rate": 0.00010033508273253414, "loss": 1.1797, "step": 10535 }, { "epoch": 0.4992181947405828, "grad_norm": 0.1689453125, "learning_rate": 0.00010032019021872126, "loss": 0.028, "step": 10536 }, { "epoch": 0.49926557687751716, "grad_norm": 0.53125, "learning_rate": 0.00010030529769780693, "loss": 0.7847, "step": 10537 }, { "epoch": 0.49931295901445155, "grad_norm": 0.166015625, "learning_rate": 0.00010029040517012143, "loss": 0.0235, "step": 10538 }, { "epoch": 0.49936034115138594, "grad_norm": 0.022705078125, "learning_rate": 0.00010027551263599501, "loss": 0.0018, "step": 10539 }, { "epoch": 0.4994077232883203, "grad_norm": 0.734375, "learning_rate": 0.00010026062009575803, "loss": 0.8512, "step": 10540 }, { "epoch": 0.4994551054252547, "grad_norm": 1.0, "learning_rate": 0.00010024572754974078, "loss": 0.8281, "step": 10541 }, { "epoch": 0.49950248756218907, "grad_norm": 0.87109375, "learning_rate": 0.00010023083499827355, "loss": 1.0409, "step": 10542 }, { "epoch": 0.4995498696991234, "grad_norm": 0.212890625, "learning_rate": 0.00010021594244168664, "loss": 0.1338, "step": 10543 }, { "epoch": 0.4995972518360578, "grad_norm": 0.478515625, "learning_rate": 0.00010020104988031038, "loss": 0.6374, "step": 10544 }, { "epoch": 0.4996446339729922, "grad_norm": 0.1982421875, "learning_rate": 0.00010018615731447502, "loss": 0.1551, "step": 10545 }, { "epoch": 0.4996920161099266, "grad_norm": 0.8359375, "learning_rate": 0.0001001712647445109, "loss": 1.5011, "step": 10546 }, { "epoch": 0.4997393982468609, "grad_norm": 0.69921875, "learning_rate": 0.00010015637217074832, "loss": 1.4456, "step": 10547 }, { "epoch": 0.4997867803837953, "grad_norm": 0.09375, "learning_rate": 0.00010014147959351754, "loss": 0.0076, "step": 10548 }, { "epoch": 0.4998341625207297, "grad_norm": 0.474609375, "learning_rate": 0.00010012658701314894, "loss": 0.3427, "step": 10549 }, { "epoch": 0.49988154465766405, "grad_norm": 0.6953125, "learning_rate": 0.00010011169442997274, "loss": 0.7347, "step": 10550 }, { "epoch": 0.49992892679459844, "grad_norm": 0.48046875, "learning_rate": 0.00010009680184431927, "loss": 0.1643, "step": 10551 }, { "epoch": 0.49997630893153283, "grad_norm": 0.177734375, "learning_rate": 0.00010008190925651888, "loss": 0.1251, "step": 10552 }, { "epoch": 0.5000236910684672, "grad_norm": 0.65234375, "learning_rate": 0.00010006701666690179, "loss": 1.2011, "step": 10553 }, { "epoch": 0.5000236910684672, "eval_loss": 0.6681665182113647, "eval_runtime": 1301.5041, "eval_samples_per_second": 1.803, "eval_steps_per_second": 1.803, "step": 10553 } ], "logging_steps": 1, "max_steps": 21105, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10553, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.436552471964549e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }