{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 500, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002, "grad_norm": 100.7913589477539, "learning_rate": 2e-05, "loss": 3.10091186, "step": 1 }, { "epoch": 0.004, "grad_norm": 40.7878532409668, "learning_rate": 2e-05, "loss": 1.36577916, "step": 2 }, { "epoch": 0.006, "grad_norm": 17.934667587280273, "learning_rate": 2e-05, "loss": 0.80921072, "step": 3 }, { "epoch": 0.008, "grad_norm": 29.937301635742188, "learning_rate": 2e-05, "loss": 0.85672188, "step": 4 }, { "epoch": 0.01, "grad_norm": 8.57142448425293, "learning_rate": 2e-05, "loss": 0.69120318, "step": 5 }, { "epoch": 0.012, "grad_norm": 7.896178245544434, "learning_rate": 2e-05, "loss": 0.47293058, "step": 6 }, { "epoch": 0.014, "grad_norm": 8.591035842895508, "learning_rate": 2e-05, "loss": 0.50240922, "step": 7 }, { "epoch": 0.016, "grad_norm": 10.887709617614746, "learning_rate": 2e-05, "loss": 0.51509768, "step": 8 }, { "epoch": 0.018, "grad_norm": 134.73768615722656, "learning_rate": 2e-05, "loss": 0.49554652, "step": 9 }, { "epoch": 0.02, "grad_norm": 55.33390426635742, "learning_rate": 2e-05, "loss": 0.62111574, "step": 10 }, { "epoch": 0.022, "grad_norm": 12.508940696716309, "learning_rate": 2e-05, "loss": 0.43727028, "step": 11 }, { "epoch": 0.024, "grad_norm": 8.327451705932617, "learning_rate": 2e-05, "loss": 0.4894059, "step": 12 }, { "epoch": 0.026, "grad_norm": 4.562747001647949, "learning_rate": 2e-05, "loss": 0.42299265, "step": 13 }, { "epoch": 0.028, "grad_norm": 5.968645095825195, "learning_rate": 2e-05, "loss": 0.3810643, "step": 14 }, { "epoch": 0.03, "grad_norm": 4.443109035491943, "learning_rate": 2e-05, "loss": 0.41381907, "step": 15 }, { "epoch": 0.032, "grad_norm": 3.4299163818359375, "learning_rate": 2e-05, "loss": 0.42420715, "step": 16 }, { "epoch": 0.034, "grad_norm": 4.944918155670166, "learning_rate": 2e-05, "loss": 0.46888059, "step": 17 }, { "epoch": 0.036, "grad_norm": 4.6365180015563965, "learning_rate": 2e-05, "loss": 0.45326883, "step": 18 }, { "epoch": 0.038, "grad_norm": 2.8536622524261475, "learning_rate": 2e-05, "loss": 0.44977522, "step": 19 }, { "epoch": 0.04, "grad_norm": 5.375406265258789, "learning_rate": 2e-05, "loss": 0.44011864, "step": 20 }, { "epoch": 0.042, "grad_norm": 3.37801456451416, "learning_rate": 2e-05, "loss": 0.40136099, "step": 21 }, { "epoch": 0.044, "grad_norm": 3.705557346343994, "learning_rate": 2e-05, "loss": 0.37761164, "step": 22 }, { "epoch": 0.046, "grad_norm": 3.8013367652893066, "learning_rate": 2e-05, "loss": 0.44508934, "step": 23 }, { "epoch": 0.048, "grad_norm": 3.566617250442505, "learning_rate": 2e-05, "loss": 0.38218161, "step": 24 }, { "epoch": 0.05, "grad_norm": 3.859208345413208, "learning_rate": 2e-05, "loss": 0.3878693, "step": 25 }, { "epoch": 0.052, "grad_norm": 3.2768349647521973, "learning_rate": 2e-05, "loss": 0.37782881, "step": 26 }, { "epoch": 0.054, "grad_norm": 2.3881168365478516, "learning_rate": 2e-05, "loss": 0.42464909, "step": 27 }, { "epoch": 0.056, "grad_norm": 2.54347562789917, "learning_rate": 2e-05, "loss": 0.39481583, "step": 28 }, { "epoch": 0.058, "grad_norm": 7.55833101272583, "learning_rate": 2e-05, "loss": 0.42484623, "step": 29 }, { "epoch": 0.06, "grad_norm": 2.715081214904785, "learning_rate": 2e-05, "loss": 0.46655717, "step": 30 }, { "epoch": 0.062, "grad_norm": 2.705334424972534, "learning_rate": 2e-05, "loss": 0.38881761, "step": 31 }, { "epoch": 0.064, "grad_norm": 3.3475492000579834, "learning_rate": 2e-05, "loss": 0.42681402, "step": 32 }, { "epoch": 0.066, "grad_norm": 3.141242504119873, "learning_rate": 2e-05, "loss": 0.42609936, "step": 33 }, { "epoch": 0.068, "grad_norm": 2.4876580238342285, "learning_rate": 2e-05, "loss": 0.37850282, "step": 34 }, { "epoch": 0.07, "grad_norm": 2.4182939529418945, "learning_rate": 2e-05, "loss": 0.377267, "step": 35 }, { "epoch": 0.072, "grad_norm": 3.404860258102417, "learning_rate": 2e-05, "loss": 0.41409212, "step": 36 }, { "epoch": 0.074, "grad_norm": 4.462337970733643, "learning_rate": 2e-05, "loss": 0.41622919, "step": 37 }, { "epoch": 0.076, "grad_norm": 3.895780563354492, "learning_rate": 2e-05, "loss": 0.40852085, "step": 38 }, { "epoch": 0.078, "grad_norm": 4.350463390350342, "learning_rate": 2e-05, "loss": 0.44786674, "step": 39 }, { "epoch": 0.08, "grad_norm": 2.5350425243377686, "learning_rate": 2e-05, "loss": 0.40965152, "step": 40 }, { "epoch": 0.082, "grad_norm": 5.017084121704102, "learning_rate": 2e-05, "loss": 0.45135465, "step": 41 }, { "epoch": 0.084, "grad_norm": 3.472752332687378, "learning_rate": 2e-05, "loss": 0.42319882, "step": 42 }, { "epoch": 0.086, "grad_norm": 3.5228331089019775, "learning_rate": 2e-05, "loss": 0.31802225, "step": 43 }, { "epoch": 0.088, "grad_norm": 4.688898086547852, "learning_rate": 2e-05, "loss": 0.46733904, "step": 44 }, { "epoch": 0.09, "grad_norm": 2.554978370666504, "learning_rate": 2e-05, "loss": 0.35162151, "step": 45 }, { "epoch": 0.092, "grad_norm": 3.005284070968628, "learning_rate": 2e-05, "loss": 0.40723544, "step": 46 }, { "epoch": 0.094, "grad_norm": 3.2923007011413574, "learning_rate": 2e-05, "loss": 0.35407171, "step": 47 }, { "epoch": 0.096, "grad_norm": 7.087942600250244, "learning_rate": 2e-05, "loss": 0.43687314, "step": 48 }, { "epoch": 0.098, "grad_norm": 3.748598098754883, "learning_rate": 2e-05, "loss": 0.40476981, "step": 49 }, { "epoch": 0.1, "grad_norm": 3.1440889835357666, "learning_rate": 2e-05, "loss": 0.34471345, "step": 50 }, { "epoch": 0.102, "grad_norm": 3.0694971084594727, "learning_rate": 2e-05, "loss": 0.39447641, "step": 51 }, { "epoch": 0.104, "grad_norm": 2.8853402137756348, "learning_rate": 2e-05, "loss": 0.4363516, "step": 52 }, { "epoch": 0.106, "grad_norm": 2.3027663230895996, "learning_rate": 2e-05, "loss": 0.42112815, "step": 53 }, { "epoch": 0.108, "grad_norm": 3.492051124572754, "learning_rate": 2e-05, "loss": 0.36403522, "step": 54 }, { "epoch": 0.11, "grad_norm": 3.2932403087615967, "learning_rate": 2e-05, "loss": 0.37265083, "step": 55 }, { "epoch": 0.112, "grad_norm": 2.5150811672210693, "learning_rate": 2e-05, "loss": 0.35803241, "step": 56 }, { "epoch": 0.114, "grad_norm": 2.370377540588379, "learning_rate": 2e-05, "loss": 0.4209469, "step": 57 }, { "epoch": 0.116, "grad_norm": 2.392200469970703, "learning_rate": 2e-05, "loss": 0.44664249, "step": 58 }, { "epoch": 0.118, "grad_norm": 2.7458958625793457, "learning_rate": 2e-05, "loss": 0.41989598, "step": 59 }, { "epoch": 0.12, "grad_norm": 2.5858850479125977, "learning_rate": 2e-05, "loss": 0.4336924, "step": 60 }, { "epoch": 0.122, "grad_norm": 3.472402334213257, "learning_rate": 2e-05, "loss": 0.45059329, "step": 61 }, { "epoch": 0.124, "grad_norm": 2.9056155681610107, "learning_rate": 2e-05, "loss": 0.43756476, "step": 62 }, { "epoch": 0.126, "grad_norm": 2.1384313106536865, "learning_rate": 2e-05, "loss": 0.37061867, "step": 63 }, { "epoch": 0.128, "grad_norm": 2.247955560684204, "learning_rate": 2e-05, "loss": 0.4254021, "step": 64 }, { "epoch": 0.13, "grad_norm": 3.1384289264678955, "learning_rate": 2e-05, "loss": 0.48214421, "step": 65 }, { "epoch": 0.132, "grad_norm": 3.4709699153900146, "learning_rate": 2e-05, "loss": 0.42113096, "step": 66 }, { "epoch": 0.134, "grad_norm": 2.4829514026641846, "learning_rate": 2e-05, "loss": 0.3783536, "step": 67 }, { "epoch": 0.136, "grad_norm": 3.276494264602661, "learning_rate": 2e-05, "loss": 0.42203033, "step": 68 }, { "epoch": 0.138, "grad_norm": 2.3716046810150146, "learning_rate": 2e-05, "loss": 0.37840182, "step": 69 }, { "epoch": 0.14, "grad_norm": 2.6660404205322266, "learning_rate": 2e-05, "loss": 0.3909995, "step": 70 }, { "epoch": 0.142, "grad_norm": 2.4495251178741455, "learning_rate": 2e-05, "loss": 0.39900306, "step": 71 }, { "epoch": 0.144, "grad_norm": 2.6454856395721436, "learning_rate": 2e-05, "loss": 0.40227687, "step": 72 }, { "epoch": 0.146, "grad_norm": 3.020829916000366, "learning_rate": 2e-05, "loss": 0.43715149, "step": 73 }, { "epoch": 0.148, "grad_norm": 2.465714454650879, "learning_rate": 2e-05, "loss": 0.43334347, "step": 74 }, { "epoch": 0.15, "grad_norm": 3.1749234199523926, "learning_rate": 2e-05, "loss": 0.39349914, "step": 75 }, { "epoch": 0.152, "grad_norm": 2.522357225418091, "learning_rate": 2e-05, "loss": 0.40576932, "step": 76 }, { "epoch": 0.154, "grad_norm": 2.7155542373657227, "learning_rate": 2e-05, "loss": 0.45305899, "step": 77 }, { "epoch": 0.156, "grad_norm": 2.727293014526367, "learning_rate": 2e-05, "loss": 0.44109389, "step": 78 }, { "epoch": 0.158, "grad_norm": 2.575566291809082, "learning_rate": 2e-05, "loss": 0.44445091, "step": 79 }, { "epoch": 0.16, "grad_norm": 2.941967487335205, "learning_rate": 2e-05, "loss": 0.41291177, "step": 80 }, { "epoch": 0.162, "grad_norm": 2.702101707458496, "learning_rate": 2e-05, "loss": 0.3560887, "step": 81 }, { "epoch": 0.164, "grad_norm": 2.959989547729492, "learning_rate": 2e-05, "loss": 0.3968443, "step": 82 }, { "epoch": 0.166, "grad_norm": 3.733569860458374, "learning_rate": 2e-05, "loss": 0.4232977, "step": 83 }, { "epoch": 0.168, "grad_norm": 2.6571216583251953, "learning_rate": 2e-05, "loss": 0.40444511, "step": 84 }, { "epoch": 0.17, "grad_norm": 2.2582576274871826, "learning_rate": 2e-05, "loss": 0.40064648, "step": 85 }, { "epoch": 0.172, "grad_norm": 2.748850107192993, "learning_rate": 2e-05, "loss": 0.38729197, "step": 86 }, { "epoch": 0.174, "grad_norm": 2.8563621044158936, "learning_rate": 2e-05, "loss": 0.41891837, "step": 87 }, { "epoch": 0.176, "grad_norm": 2.7641937732696533, "learning_rate": 2e-05, "loss": 0.36975527, "step": 88 }, { "epoch": 0.178, "grad_norm": 3.3558907508850098, "learning_rate": 2e-05, "loss": 0.40827322, "step": 89 }, { "epoch": 0.18, "grad_norm": 2.4098260402679443, "learning_rate": 2e-05, "loss": 0.3943249, "step": 90 }, { "epoch": 0.182, "grad_norm": 2.5855796337127686, "learning_rate": 2e-05, "loss": 0.43265003, "step": 91 }, { "epoch": 0.184, "grad_norm": 3.2219607830047607, "learning_rate": 2e-05, "loss": 0.44435838, "step": 92 }, { "epoch": 0.186, "grad_norm": 2.994154453277588, "learning_rate": 2e-05, "loss": 0.43490282, "step": 93 }, { "epoch": 0.188, "grad_norm": 3.057532548904419, "learning_rate": 2e-05, "loss": 0.42345771, "step": 94 }, { "epoch": 0.19, "grad_norm": 2.1149864196777344, "learning_rate": 2e-05, "loss": 0.40705955, "step": 95 }, { "epoch": 0.192, "grad_norm": 3.158325672149658, "learning_rate": 2e-05, "loss": 0.46287918, "step": 96 }, { "epoch": 0.194, "grad_norm": 2.7815473079681396, "learning_rate": 2e-05, "loss": 0.39676839, "step": 97 }, { "epoch": 0.196, "grad_norm": 2.1724812984466553, "learning_rate": 2e-05, "loss": 0.38517368, "step": 98 }, { "epoch": 0.198, "grad_norm": 2.216989040374756, "learning_rate": 2e-05, "loss": 0.41994014, "step": 99 }, { "epoch": 0.2, "grad_norm": 2.4955761432647705, "learning_rate": 2e-05, "loss": 0.44885725, "step": 100 }, { "epoch": 0.202, "grad_norm": 2.3539469242095947, "learning_rate": 2e-05, "loss": 0.39006793, "step": 101 }, { "epoch": 0.204, "grad_norm": 2.2714924812316895, "learning_rate": 2e-05, "loss": 0.34156144, "step": 102 }, { "epoch": 0.206, "grad_norm": 2.3460693359375, "learning_rate": 2e-05, "loss": 0.43049014, "step": 103 }, { "epoch": 0.208, "grad_norm": 3.0970299243927, "learning_rate": 2e-05, "loss": 0.42005956, "step": 104 }, { "epoch": 0.21, "grad_norm": 2.211514711380005, "learning_rate": 2e-05, "loss": 0.38698167, "step": 105 }, { "epoch": 0.212, "grad_norm": 2.8566031455993652, "learning_rate": 2e-05, "loss": 0.39388683, "step": 106 }, { "epoch": 0.214, "grad_norm": 4.965332508087158, "learning_rate": 2e-05, "loss": 0.32786798, "step": 107 }, { "epoch": 0.216, "grad_norm": 2.8506662845611572, "learning_rate": 2e-05, "loss": 0.39046088, "step": 108 }, { "epoch": 0.218, "grad_norm": 2.1420042514801025, "learning_rate": 2e-05, "loss": 0.39321035, "step": 109 }, { "epoch": 0.22, "grad_norm": 2.7388463020324707, "learning_rate": 2e-05, "loss": 0.39163101, "step": 110 }, { "epoch": 0.222, "grad_norm": 2.2771174907684326, "learning_rate": 2e-05, "loss": 0.41349089, "step": 111 }, { "epoch": 0.224, "grad_norm": 4.137855529785156, "learning_rate": 2e-05, "loss": 0.37277463, "step": 112 }, { "epoch": 0.226, "grad_norm": 2.8623104095458984, "learning_rate": 2e-05, "loss": 0.38881728, "step": 113 }, { "epoch": 0.228, "grad_norm": 2.295194387435913, "learning_rate": 2e-05, "loss": 0.3589763, "step": 114 }, { "epoch": 0.23, "grad_norm": 2.1582581996917725, "learning_rate": 2e-05, "loss": 0.42957473, "step": 115 }, { "epoch": 0.232, "grad_norm": 3.635145664215088, "learning_rate": 2e-05, "loss": 0.39942896, "step": 116 }, { "epoch": 0.234, "grad_norm": 3.435088872909546, "learning_rate": 2e-05, "loss": 0.41030282, "step": 117 }, { "epoch": 0.236, "grad_norm": 2.8914260864257812, "learning_rate": 2e-05, "loss": 0.38662139, "step": 118 }, { "epoch": 0.238, "grad_norm": 3.000392198562622, "learning_rate": 2e-05, "loss": 0.42550755, "step": 119 }, { "epoch": 0.24, "grad_norm": 2.0846173763275146, "learning_rate": 2e-05, "loss": 0.3794331, "step": 120 }, { "epoch": 0.242, "grad_norm": 2.7421340942382812, "learning_rate": 2e-05, "loss": 0.40051925, "step": 121 }, { "epoch": 0.244, "grad_norm": 2.206716537475586, "learning_rate": 2e-05, "loss": 0.41522628, "step": 122 }, { "epoch": 0.246, "grad_norm": 2.345612049102783, "learning_rate": 2e-05, "loss": 0.39287353, "step": 123 }, { "epoch": 0.248, "grad_norm": 2.5863916873931885, "learning_rate": 2e-05, "loss": 0.44706243, "step": 124 }, { "epoch": 0.25, "grad_norm": 2.4030227661132812, "learning_rate": 2e-05, "loss": 0.39214543, "step": 125 }, { "epoch": 0.252, "grad_norm": 2.58503794670105, "learning_rate": 2e-05, "loss": 0.40951967, "step": 126 }, { "epoch": 0.254, "grad_norm": 2.058553457260132, "learning_rate": 2e-05, "loss": 0.40562445, "step": 127 }, { "epoch": 0.256, "grad_norm": 2.324352979660034, "learning_rate": 2e-05, "loss": 0.39885789, "step": 128 }, { "epoch": 0.258, "grad_norm": 2.4435112476348877, "learning_rate": 2e-05, "loss": 0.4411369, "step": 129 }, { "epoch": 0.26, "grad_norm": 2.4042422771453857, "learning_rate": 2e-05, "loss": 0.42098755, "step": 130 }, { "epoch": 0.262, "grad_norm": 2.854393482208252, "learning_rate": 2e-05, "loss": 0.40045485, "step": 131 }, { "epoch": 0.264, "grad_norm": 2.2807977199554443, "learning_rate": 2e-05, "loss": 0.44947705, "step": 132 }, { "epoch": 0.266, "grad_norm": 2.3839499950408936, "learning_rate": 2e-05, "loss": 0.3765958, "step": 133 }, { "epoch": 0.268, "grad_norm": 2.462369203567505, "learning_rate": 2e-05, "loss": 0.34250346, "step": 134 }, { "epoch": 0.27, "grad_norm": 2.4267525672912598, "learning_rate": 2e-05, "loss": 0.4025071, "step": 135 }, { "epoch": 0.272, "grad_norm": 2.5603864192962646, "learning_rate": 2e-05, "loss": 0.35927606, "step": 136 }, { "epoch": 0.274, "grad_norm": 3.1467013359069824, "learning_rate": 2e-05, "loss": 0.42534155, "step": 137 }, { "epoch": 0.276, "grad_norm": 2.4989264011383057, "learning_rate": 2e-05, "loss": 0.38834065, "step": 138 }, { "epoch": 0.278, "grad_norm": 5.77341890335083, "learning_rate": 2e-05, "loss": 0.3826952, "step": 139 }, { "epoch": 0.28, "grad_norm": 2.720933675765991, "learning_rate": 2e-05, "loss": 0.39199528, "step": 140 }, { "epoch": 0.282, "grad_norm": 6.706570625305176, "learning_rate": 2e-05, "loss": 0.43876618, "step": 141 }, { "epoch": 0.284, "grad_norm": 22.753585815429688, "learning_rate": 2e-05, "loss": 0.58754456, "step": 142 }, { "epoch": 0.286, "grad_norm": 951.1945190429688, "learning_rate": 2e-05, "loss": 0.81678319, "step": 143 }, { "epoch": 0.288, "grad_norm": 52.611961364746094, "learning_rate": 2e-05, "loss": 0.69098788, "step": 144 }, { "epoch": 0.29, "grad_norm": 45.12958526611328, "learning_rate": 2e-05, "loss": 0.76437581, "step": 145 }, { "epoch": 0.292, "grad_norm": 33.77717971801758, "learning_rate": 2e-05, "loss": 0.75502336, "step": 146 }, { "epoch": 0.294, "grad_norm": 78.94849395751953, "learning_rate": 2e-05, "loss": 0.98386419, "step": 147 }, { "epoch": 0.296, "grad_norm": 99.15904998779297, "learning_rate": 2e-05, "loss": 0.7885493, "step": 148 }, { "epoch": 0.298, "grad_norm": 95.97003173828125, "learning_rate": 2e-05, "loss": 1.16757083, "step": 149 }, { "epoch": 0.3, "grad_norm": 395.0550537109375, "learning_rate": 2e-05, "loss": 0.89327443, "step": 150 }, { "epoch": 0.302, "grad_norm": 34.62553787231445, "learning_rate": 2e-05, "loss": 0.80242836, "step": 151 }, { "epoch": 0.304, "grad_norm": 49.44704818725586, "learning_rate": 2e-05, "loss": 0.63106126, "step": 152 }, { "epoch": 0.306, "grad_norm": 171.4626007080078, "learning_rate": 2e-05, "loss": 0.57824063, "step": 153 }, { "epoch": 0.308, "grad_norm": 93.4944839477539, "learning_rate": 2e-05, "loss": 0.52180779, "step": 154 }, { "epoch": 0.31, "grad_norm": 112.07191467285156, "learning_rate": 2e-05, "loss": 0.57057035, "step": 155 }, { "epoch": 0.312, "grad_norm": 23.541261672973633, "learning_rate": 2e-05, "loss": 0.6415171, "step": 156 }, { "epoch": 0.314, "grad_norm": 121.93998718261719, "learning_rate": 2e-05, "loss": 0.81909418, "step": 157 }, { "epoch": 0.316, "grad_norm": 34.9930305480957, "learning_rate": 2e-05, "loss": 0.68764293, "step": 158 }, { "epoch": 0.318, "grad_norm": 38.409847259521484, "learning_rate": 2e-05, "loss": 0.52681422, "step": 159 }, { "epoch": 0.32, "grad_norm": 23.83429718017578, "learning_rate": 2e-05, "loss": 0.57993436, "step": 160 }, { "epoch": 0.322, "grad_norm": 23.568622589111328, "learning_rate": 2e-05, "loss": 0.51327848, "step": 161 }, { "epoch": 0.324, "grad_norm": 22.358970642089844, "learning_rate": 2e-05, "loss": 0.4694868, "step": 162 }, { "epoch": 0.326, "grad_norm": 37.23964309692383, "learning_rate": 2e-05, "loss": 0.52104098, "step": 163 }, { "epoch": 0.328, "grad_norm": 27.70103645324707, "learning_rate": 2e-05, "loss": 0.5051856, "step": 164 }, { "epoch": 0.33, "grad_norm": 19.599693298339844, "learning_rate": 2e-05, "loss": 0.48360169, "step": 165 }, { "epoch": 0.332, "grad_norm": 17.194791793823242, "learning_rate": 2e-05, "loss": 0.50848162, "step": 166 }, { "epoch": 0.334, "grad_norm": 27.718290328979492, "learning_rate": 2e-05, "loss": 0.46776086, "step": 167 }, { "epoch": 0.336, "grad_norm": 29.439424514770508, "learning_rate": 2e-05, "loss": 0.48417586, "step": 168 }, { "epoch": 0.338, "grad_norm": 13.250664710998535, "learning_rate": 2e-05, "loss": 0.47358498, "step": 169 }, { "epoch": 0.34, "grad_norm": 33.282222747802734, "learning_rate": 2e-05, "loss": 0.52936995, "step": 170 }, { "epoch": 0.342, "grad_norm": 30.323564529418945, "learning_rate": 2e-05, "loss": 0.50996685, "step": 171 }, { "epoch": 0.344, "grad_norm": 26.364585876464844, "learning_rate": 2e-05, "loss": 0.45880085, "step": 172 }, { "epoch": 0.346, "grad_norm": 16.831697463989258, "learning_rate": 2e-05, "loss": 0.44258234, "step": 173 }, { "epoch": 0.348, "grad_norm": 15.406957626342773, "learning_rate": 2e-05, "loss": 0.42777783, "step": 174 }, { "epoch": 0.35, "grad_norm": 27.91541862487793, "learning_rate": 2e-05, "loss": 0.44150254, "step": 175 }, { "epoch": 0.352, "grad_norm": 11.729520797729492, "learning_rate": 2e-05, "loss": 0.45409214, "step": 176 }, { "epoch": 0.354, "grad_norm": 10.498335838317871, "learning_rate": 2e-05, "loss": 0.45166379, "step": 177 }, { "epoch": 0.356, "grad_norm": 13.207727432250977, "learning_rate": 2e-05, "loss": 0.48104641, "step": 178 }, { "epoch": 0.358, "grad_norm": 38.07108688354492, "learning_rate": 2e-05, "loss": 0.4513554, "step": 179 }, { "epoch": 0.36, "grad_norm": 9.624302864074707, "learning_rate": 2e-05, "loss": 0.41277581, "step": 180 }, { "epoch": 0.362, "grad_norm": 8.47309398651123, "learning_rate": 2e-05, "loss": 0.42749909, "step": 181 }, { "epoch": 0.364, "grad_norm": 5.230127811431885, "learning_rate": 2e-05, "loss": 0.39681259, "step": 182 }, { "epoch": 0.366, "grad_norm": 11.410614013671875, "learning_rate": 2e-05, "loss": 0.39442861, "step": 183 }, { "epoch": 0.368, "grad_norm": 12.072883605957031, "learning_rate": 2e-05, "loss": 0.40660879, "step": 184 }, { "epoch": 0.37, "grad_norm": 13.550542831420898, "learning_rate": 2e-05, "loss": 0.40361917, "step": 185 }, { "epoch": 0.372, "grad_norm": 6.943983554840088, "learning_rate": 2e-05, "loss": 0.45435604, "step": 186 }, { "epoch": 0.374, "grad_norm": 7.458695888519287, "learning_rate": 2e-05, "loss": 0.36000335, "step": 187 }, { "epoch": 0.376, "grad_norm": 8.648634910583496, "learning_rate": 2e-05, "loss": 0.43435773, "step": 188 }, { "epoch": 0.378, "grad_norm": 5.483082294464111, "learning_rate": 2e-05, "loss": 0.40732422, "step": 189 }, { "epoch": 0.38, "grad_norm": 6.177685737609863, "learning_rate": 2e-05, "loss": 0.42347455, "step": 190 }, { "epoch": 0.382, "grad_norm": 3.8768444061279297, "learning_rate": 2e-05, "loss": 0.38617033, "step": 191 }, { "epoch": 0.384, "grad_norm": 4.045779705047607, "learning_rate": 2e-05, "loss": 0.3738341, "step": 192 }, { "epoch": 0.386, "grad_norm": 4.49653434753418, "learning_rate": 2e-05, "loss": 0.41778257, "step": 193 }, { "epoch": 0.388, "grad_norm": 3.0218472480773926, "learning_rate": 2e-05, "loss": 0.37529367, "step": 194 }, { "epoch": 0.39, "grad_norm": 4.7495503425598145, "learning_rate": 2e-05, "loss": 0.39258975, "step": 195 }, { "epoch": 0.392, "grad_norm": 4.811206817626953, "learning_rate": 2e-05, "loss": 0.39241675, "step": 196 }, { "epoch": 0.394, "grad_norm": 2.8001279830932617, "learning_rate": 2e-05, "loss": 0.42924637, "step": 197 }, { "epoch": 0.396, "grad_norm": 4.613635063171387, "learning_rate": 2e-05, "loss": 0.40108818, "step": 198 }, { "epoch": 0.398, "grad_norm": 6.350893497467041, "learning_rate": 2e-05, "loss": 0.45137745, "step": 199 }, { "epoch": 0.4, "grad_norm": 4.332032203674316, "learning_rate": 2e-05, "loss": 0.3409031, "step": 200 }, { "epoch": 0.402, "grad_norm": 3.7277584075927734, "learning_rate": 2e-05, "loss": 0.4174028, "step": 201 }, { "epoch": 0.404, "grad_norm": 2.3304224014282227, "learning_rate": 2e-05, "loss": 0.36742717, "step": 202 }, { "epoch": 0.406, "grad_norm": 2.5752127170562744, "learning_rate": 2e-05, "loss": 0.36992675, "step": 203 }, { "epoch": 0.408, "grad_norm": 2.8590283393859863, "learning_rate": 2e-05, "loss": 0.37675118, "step": 204 }, { "epoch": 0.41, "grad_norm": 3.1647579669952393, "learning_rate": 2e-05, "loss": 0.3679235, "step": 205 }, { "epoch": 0.412, "grad_norm": 2.6260170936584473, "learning_rate": 2e-05, "loss": 0.4498952, "step": 206 }, { "epoch": 0.414, "grad_norm": 2.4622349739074707, "learning_rate": 2e-05, "loss": 0.35507852, "step": 207 }, { "epoch": 0.416, "grad_norm": 2.9872968196868896, "learning_rate": 2e-05, "loss": 0.3549549, "step": 208 }, { "epoch": 0.418, "grad_norm": 3.04270601272583, "learning_rate": 2e-05, "loss": 0.41885364, "step": 209 }, { "epoch": 0.42, "grad_norm": 2.9570157527923584, "learning_rate": 2e-05, "loss": 0.43402624, "step": 210 }, { "epoch": 0.422, "grad_norm": 2.0857791900634766, "learning_rate": 2e-05, "loss": 0.40431345, "step": 211 }, { "epoch": 0.424, "grad_norm": 2.574413537979126, "learning_rate": 2e-05, "loss": 0.40234259, "step": 212 }, { "epoch": 0.426, "grad_norm": 2.3002917766571045, "learning_rate": 2e-05, "loss": 0.4213936, "step": 213 }, { "epoch": 0.428, "grad_norm": 2.598877429962158, "learning_rate": 2e-05, "loss": 0.42018652, "step": 214 }, { "epoch": 0.43, "grad_norm": 2.410801887512207, "learning_rate": 2e-05, "loss": 0.41117987, "step": 215 }, { "epoch": 0.432, "grad_norm": 2.146622657775879, "learning_rate": 2e-05, "loss": 0.37472421, "step": 216 }, { "epoch": 0.434, "grad_norm": 2.1747183799743652, "learning_rate": 2e-05, "loss": 0.42943805, "step": 217 }, { "epoch": 0.436, "grad_norm": 2.395761013031006, "learning_rate": 2e-05, "loss": 0.39487824, "step": 218 }, { "epoch": 0.438, "grad_norm": 2.3509767055511475, "learning_rate": 2e-05, "loss": 0.33290654, "step": 219 }, { "epoch": 0.44, "grad_norm": 3.0364584922790527, "learning_rate": 2e-05, "loss": 0.40460128, "step": 220 }, { "epoch": 0.442, "grad_norm": 2.566840648651123, "learning_rate": 2e-05, "loss": 0.41224802, "step": 221 }, { "epoch": 0.444, "grad_norm": 3.774826765060425, "learning_rate": 2e-05, "loss": 0.43019855, "step": 222 }, { "epoch": 0.446, "grad_norm": 2.1061086654663086, "learning_rate": 2e-05, "loss": 0.4085083, "step": 223 }, { "epoch": 0.448, "grad_norm": 2.4577176570892334, "learning_rate": 2e-05, "loss": 0.39225131, "step": 224 }, { "epoch": 0.45, "grad_norm": 4.246354579925537, "learning_rate": 2e-05, "loss": 0.43242174, "step": 225 }, { "epoch": 0.452, "grad_norm": 2.795057773590088, "learning_rate": 2e-05, "loss": 0.34006116, "step": 226 }, { "epoch": 0.454, "grad_norm": 2.231876850128174, "learning_rate": 2e-05, "loss": 0.39721149, "step": 227 }, { "epoch": 0.456, "grad_norm": 3.058765172958374, "learning_rate": 2e-05, "loss": 0.41030541, "step": 228 }, { "epoch": 0.458, "grad_norm": 2.9272921085357666, "learning_rate": 2e-05, "loss": 0.40642214, "step": 229 }, { "epoch": 0.46, "grad_norm": 2.118684768676758, "learning_rate": 2e-05, "loss": 0.40627092, "step": 230 }, { "epoch": 0.462, "grad_norm": 4.35416841506958, "learning_rate": 2e-05, "loss": 0.41149405, "step": 231 }, { "epoch": 0.464, "grad_norm": 3.272524356842041, "learning_rate": 2e-05, "loss": 0.40240556, "step": 232 }, { "epoch": 0.466, "grad_norm": 2.587944746017456, "learning_rate": 2e-05, "loss": 0.374861, "step": 233 }, { "epoch": 0.468, "grad_norm": 2.573385238647461, "learning_rate": 2e-05, "loss": 0.42833674, "step": 234 }, { "epoch": 0.47, "grad_norm": 2.941406726837158, "learning_rate": 2e-05, "loss": 0.44332218, "step": 235 }, { "epoch": 0.472, "grad_norm": 2.2706453800201416, "learning_rate": 2e-05, "loss": 0.38916123, "step": 236 }, { "epoch": 0.474, "grad_norm": 2.572369337081909, "learning_rate": 2e-05, "loss": 0.38100535, "step": 237 }, { "epoch": 0.476, "grad_norm": 2.6199276447296143, "learning_rate": 2e-05, "loss": 0.39440346, "step": 238 }, { "epoch": 0.478, "grad_norm": 2.281733512878418, "learning_rate": 2e-05, "loss": 0.3807506, "step": 239 }, { "epoch": 0.48, "grad_norm": 2.7786262035369873, "learning_rate": 2e-05, "loss": 0.34910616, "step": 240 }, { "epoch": 0.482, "grad_norm": 2.339578866958618, "learning_rate": 2e-05, "loss": 0.40123379, "step": 241 }, { "epoch": 0.484, "grad_norm": 2.5612998008728027, "learning_rate": 2e-05, "loss": 0.34685874, "step": 242 }, { "epoch": 0.486, "grad_norm": 2.650993824005127, "learning_rate": 2e-05, "loss": 0.37038237, "step": 243 }, { "epoch": 0.488, "grad_norm": 2.3199408054351807, "learning_rate": 2e-05, "loss": 0.35883853, "step": 244 }, { "epoch": 0.49, "grad_norm": 3.2961573600769043, "learning_rate": 2e-05, "loss": 0.4362112, "step": 245 }, { "epoch": 0.492, "grad_norm": 2.3096096515655518, "learning_rate": 2e-05, "loss": 0.3606168, "step": 246 }, { "epoch": 0.494, "grad_norm": 2.3645904064178467, "learning_rate": 2e-05, "loss": 0.42863727, "step": 247 }, { "epoch": 0.496, "grad_norm": 2.2203831672668457, "learning_rate": 2e-05, "loss": 0.40489531, "step": 248 }, { "epoch": 0.498, "grad_norm": 2.45977783203125, "learning_rate": 2e-05, "loss": 0.39940965, "step": 249 }, { "epoch": 0.5, "grad_norm": 2.9246718883514404, "learning_rate": 2e-05, "loss": 0.39140785, "step": 250 }, { "epoch": 0.502, "grad_norm": 1.8212493658065796, "learning_rate": 2e-05, "loss": 0.36203432, "step": 251 }, { "epoch": 0.504, "grad_norm": 2.2326316833496094, "learning_rate": 2e-05, "loss": 0.44940746, "step": 252 }, { "epoch": 0.506, "grad_norm": 2.1888906955718994, "learning_rate": 2e-05, "loss": 0.42821601, "step": 253 }, { "epoch": 0.508, "grad_norm": 3.0814597606658936, "learning_rate": 2e-05, "loss": 0.36414945, "step": 254 }, { "epoch": 0.51, "grad_norm": 2.7043652534484863, "learning_rate": 2e-05, "loss": 0.39156815, "step": 255 }, { "epoch": 0.512, "grad_norm": 2.3231751918792725, "learning_rate": 2e-05, "loss": 0.37385294, "step": 256 }, { "epoch": 0.514, "grad_norm": 2.275028705596924, "learning_rate": 2e-05, "loss": 0.38484564, "step": 257 }, { "epoch": 0.516, "grad_norm": 2.472454071044922, "learning_rate": 2e-05, "loss": 0.36479205, "step": 258 }, { "epoch": 0.518, "grad_norm": 2.479954481124878, "learning_rate": 2e-05, "loss": 0.38072658, "step": 259 }, { "epoch": 0.52, "grad_norm": 2.311441659927368, "learning_rate": 2e-05, "loss": 0.33453172, "step": 260 }, { "epoch": 0.522, "grad_norm": 3.8335723876953125, "learning_rate": 2e-05, "loss": 0.40032426, "step": 261 }, { "epoch": 0.524, "grad_norm": 2.4983038902282715, "learning_rate": 2e-05, "loss": 0.37842286, "step": 262 }, { "epoch": 0.526, "grad_norm": 2.8506362438201904, "learning_rate": 2e-05, "loss": 0.45623779, "step": 263 }, { "epoch": 0.528, "grad_norm": 2.5922443866729736, "learning_rate": 2e-05, "loss": 0.42091221, "step": 264 }, { "epoch": 0.53, "grad_norm": 2.4378294944763184, "learning_rate": 2e-05, "loss": 0.43036759, "step": 265 }, { "epoch": 0.532, "grad_norm": 2.5051655769348145, "learning_rate": 2e-05, "loss": 0.36193722, "step": 266 }, { "epoch": 0.534, "grad_norm": 2.4639880657196045, "learning_rate": 2e-05, "loss": 0.42388517, "step": 267 }, { "epoch": 0.536, "grad_norm": 2.43228816986084, "learning_rate": 2e-05, "loss": 0.37180698, "step": 268 }, { "epoch": 0.538, "grad_norm": 2.8706986904144287, "learning_rate": 2e-05, "loss": 0.3466678, "step": 269 }, { "epoch": 0.54, "grad_norm": 2.8321847915649414, "learning_rate": 2e-05, "loss": 0.36745179, "step": 270 }, { "epoch": 0.542, "grad_norm": 2.8276920318603516, "learning_rate": 2e-05, "loss": 0.38181525, "step": 271 }, { "epoch": 0.544, "grad_norm": 2.267299175262451, "learning_rate": 2e-05, "loss": 0.36299157, "step": 272 }, { "epoch": 0.546, "grad_norm": 2.176030158996582, "learning_rate": 2e-05, "loss": 0.35883152, "step": 273 }, { "epoch": 0.548, "grad_norm": 2.4151179790496826, "learning_rate": 2e-05, "loss": 0.35068169, "step": 274 }, { "epoch": 0.55, "grad_norm": 2.5002832412719727, "learning_rate": 2e-05, "loss": 0.4389441, "step": 275 }, { "epoch": 0.552, "grad_norm": 3.062483072280884, "learning_rate": 2e-05, "loss": 0.38645649, "step": 276 }, { "epoch": 0.554, "grad_norm": 1.8937181234359741, "learning_rate": 2e-05, "loss": 0.32340169, "step": 277 }, { "epoch": 0.556, "grad_norm": 2.4653971195220947, "learning_rate": 2e-05, "loss": 0.44503152, "step": 278 }, { "epoch": 0.558, "grad_norm": 2.399894952774048, "learning_rate": 2e-05, "loss": 0.3736915, "step": 279 }, { "epoch": 0.56, "grad_norm": 3.2674710750579834, "learning_rate": 2e-05, "loss": 0.37925249, "step": 280 }, { "epoch": 0.562, "grad_norm": 2.1981282234191895, "learning_rate": 2e-05, "loss": 0.379641, "step": 281 }, { "epoch": 0.564, "grad_norm": 2.4101972579956055, "learning_rate": 2e-05, "loss": 0.32225233, "step": 282 }, { "epoch": 0.566, "grad_norm": 3.3641390800476074, "learning_rate": 2e-05, "loss": 0.40978536, "step": 283 }, { "epoch": 0.568, "grad_norm": 2.453429937362671, "learning_rate": 2e-05, "loss": 0.42054862, "step": 284 }, { "epoch": 0.57, "grad_norm": 2.228393793106079, "learning_rate": 2e-05, "loss": 0.36940455, "step": 285 }, { "epoch": 0.572, "grad_norm": 1.8759377002716064, "learning_rate": 2e-05, "loss": 0.3323741, "step": 286 }, { "epoch": 0.574, "grad_norm": 2.5758562088012695, "learning_rate": 2e-05, "loss": 0.40134084, "step": 287 }, { "epoch": 0.576, "grad_norm": 2.2973790168762207, "learning_rate": 2e-05, "loss": 0.42725891, "step": 288 }, { "epoch": 0.578, "grad_norm": 2.4652743339538574, "learning_rate": 2e-05, "loss": 0.35494244, "step": 289 }, { "epoch": 0.58, "grad_norm": 2.2309014797210693, "learning_rate": 2e-05, "loss": 0.3866846, "step": 290 }, { "epoch": 0.582, "grad_norm": 2.242001533508301, "learning_rate": 2e-05, "loss": 0.4193109, "step": 291 }, { "epoch": 0.584, "grad_norm": 2.306638479232788, "learning_rate": 2e-05, "loss": 0.38435796, "step": 292 }, { "epoch": 0.586, "grad_norm": 2.3204331398010254, "learning_rate": 2e-05, "loss": 0.32084846, "step": 293 }, { "epoch": 0.588, "grad_norm": 2.4214060306549072, "learning_rate": 2e-05, "loss": 0.36136317, "step": 294 }, { "epoch": 0.59, "grad_norm": 1.7441887855529785, "learning_rate": 2e-05, "loss": 0.38891983, "step": 295 }, { "epoch": 0.592, "grad_norm": 2.1641783714294434, "learning_rate": 2e-05, "loss": 0.3675043, "step": 296 }, { "epoch": 0.594, "grad_norm": 2.537519931793213, "learning_rate": 2e-05, "loss": 0.39061338, "step": 297 }, { "epoch": 0.596, "grad_norm": 2.3807148933410645, "learning_rate": 2e-05, "loss": 0.38095194, "step": 298 }, { "epoch": 0.598, "grad_norm": 3.760620594024658, "learning_rate": 2e-05, "loss": 0.38023266, "step": 299 }, { "epoch": 0.6, "grad_norm": 2.8382818698883057, "learning_rate": 2e-05, "loss": 0.42183822, "step": 300 }, { "epoch": 0.602, "grad_norm": 2.5104598999023438, "learning_rate": 2e-05, "loss": 0.38760763, "step": 301 }, { "epoch": 0.604, "grad_norm": 3.4479098320007324, "learning_rate": 2e-05, "loss": 0.38792706, "step": 302 }, { "epoch": 0.606, "grad_norm": 2.2696542739868164, "learning_rate": 2e-05, "loss": 0.43671882, "step": 303 }, { "epoch": 0.608, "grad_norm": 2.4829628467559814, "learning_rate": 2e-05, "loss": 0.38020101, "step": 304 }, { "epoch": 0.61, "grad_norm": 3.053079843521118, "learning_rate": 2e-05, "loss": 0.42556402, "step": 305 }, { "epoch": 0.612, "grad_norm": 2.130146026611328, "learning_rate": 2e-05, "loss": 0.36177719, "step": 306 }, { "epoch": 0.614, "grad_norm": 5.296567440032959, "learning_rate": 2e-05, "loss": 0.40961123, "step": 307 }, { "epoch": 0.616, "grad_norm": 2.4534523487091064, "learning_rate": 2e-05, "loss": 0.34303677, "step": 308 }, { "epoch": 0.618, "grad_norm": 1.9672064781188965, "learning_rate": 2e-05, "loss": 0.42068005, "step": 309 }, { "epoch": 0.62, "grad_norm": 3.3204848766326904, "learning_rate": 2e-05, "loss": 0.37758303, "step": 310 }, { "epoch": 0.622, "grad_norm": 2.4542860984802246, "learning_rate": 2e-05, "loss": 0.41424572, "step": 311 }, { "epoch": 0.624, "grad_norm": 1.9260770082473755, "learning_rate": 2e-05, "loss": 0.34970552, "step": 312 }, { "epoch": 0.626, "grad_norm": 2.6891448497772217, "learning_rate": 2e-05, "loss": 0.30088019, "step": 313 }, { "epoch": 0.628, "grad_norm": 2.968409538269043, "learning_rate": 2e-05, "loss": 0.42999855, "step": 314 }, { "epoch": 0.63, "grad_norm": 2.5964770317077637, "learning_rate": 2e-05, "loss": 0.41451907, "step": 315 }, { "epoch": 0.632, "grad_norm": 2.4311039447784424, "learning_rate": 2e-05, "loss": 0.35747087, "step": 316 }, { "epoch": 0.634, "grad_norm": 2.2850985527038574, "learning_rate": 2e-05, "loss": 0.45336699, "step": 317 }, { "epoch": 0.636, "grad_norm": 2.128378391265869, "learning_rate": 2e-05, "loss": 0.40923813, "step": 318 }, { "epoch": 0.638, "grad_norm": 2.9572834968566895, "learning_rate": 2e-05, "loss": 0.40831214, "step": 319 }, { "epoch": 0.64, "grad_norm": 2.1294198036193848, "learning_rate": 2e-05, "loss": 0.352382, "step": 320 }, { "epoch": 0.642, "grad_norm": 2.146904706954956, "learning_rate": 2e-05, "loss": 0.36986923, "step": 321 }, { "epoch": 0.644, "grad_norm": 2.226712465286255, "learning_rate": 2e-05, "loss": 0.37009352, "step": 322 }, { "epoch": 0.646, "grad_norm": 2.8748672008514404, "learning_rate": 2e-05, "loss": 0.39577782, "step": 323 }, { "epoch": 0.648, "grad_norm": 2.0993311405181885, "learning_rate": 2e-05, "loss": 0.34025472, "step": 324 }, { "epoch": 0.65, "grad_norm": 2.553697347640991, "learning_rate": 2e-05, "loss": 0.35979444, "step": 325 }, { "epoch": 0.652, "grad_norm": 3.0230658054351807, "learning_rate": 2e-05, "loss": 0.38101768, "step": 326 }, { "epoch": 0.654, "grad_norm": 2.227163076400757, "learning_rate": 2e-05, "loss": 0.36153871, "step": 327 }, { "epoch": 0.656, "grad_norm": 2.366971731185913, "learning_rate": 2e-05, "loss": 0.40203893, "step": 328 }, { "epoch": 0.658, "grad_norm": 4.39686393737793, "learning_rate": 2e-05, "loss": 0.34971178, "step": 329 }, { "epoch": 0.66, "grad_norm": 3.1364448070526123, "learning_rate": 2e-05, "loss": 0.38524896, "step": 330 }, { "epoch": 0.662, "grad_norm": 2.4799602031707764, "learning_rate": 2e-05, "loss": 0.3510325, "step": 331 }, { "epoch": 0.664, "grad_norm": 3.8624725341796875, "learning_rate": 2e-05, "loss": 0.40343612, "step": 332 }, { "epoch": 0.666, "grad_norm": 2.8651304244995117, "learning_rate": 2e-05, "loss": 0.3543312, "step": 333 }, { "epoch": 0.668, "grad_norm": 2.507993221282959, "learning_rate": 2e-05, "loss": 0.4071638, "step": 334 }, { "epoch": 0.67, "grad_norm": 2.307523488998413, "learning_rate": 2e-05, "loss": 0.34446114, "step": 335 }, { "epoch": 0.672, "grad_norm": 2.8034613132476807, "learning_rate": 2e-05, "loss": 0.36103964, "step": 336 }, { "epoch": 0.674, "grad_norm": 2.804739236831665, "learning_rate": 2e-05, "loss": 0.34058389, "step": 337 }, { "epoch": 0.676, "grad_norm": 2.3788864612579346, "learning_rate": 2e-05, "loss": 0.3461957, "step": 338 }, { "epoch": 0.678, "grad_norm": 2.2598109245300293, "learning_rate": 2e-05, "loss": 0.34844282, "step": 339 }, { "epoch": 0.68, "grad_norm": 2.7016749382019043, "learning_rate": 2e-05, "loss": 0.37224805, "step": 340 }, { "epoch": 0.682, "grad_norm": 2.102294445037842, "learning_rate": 2e-05, "loss": 0.34660944, "step": 341 }, { "epoch": 0.684, "grad_norm": 2.256808280944824, "learning_rate": 2e-05, "loss": 0.36872256, "step": 342 }, { "epoch": 0.686, "grad_norm": 2.219933032989502, "learning_rate": 2e-05, "loss": 0.41074491, "step": 343 }, { "epoch": 0.688, "grad_norm": 38.59788131713867, "learning_rate": 2e-05, "loss": 0.42532402, "step": 344 }, { "epoch": 0.69, "grad_norm": 2.8445427417755127, "learning_rate": 2e-05, "loss": 0.40610889, "step": 345 }, { "epoch": 0.692, "grad_norm": 3.2422025203704834, "learning_rate": 2e-05, "loss": 0.34015438, "step": 346 }, { "epoch": 0.694, "grad_norm": 2.0103511810302734, "learning_rate": 2e-05, "loss": 0.33063939, "step": 347 }, { "epoch": 0.696, "grad_norm": 2.821288585662842, "learning_rate": 2e-05, "loss": 0.40264255, "step": 348 }, { "epoch": 0.698, "grad_norm": 3.3211405277252197, "learning_rate": 2e-05, "loss": 0.33633679, "step": 349 }, { "epoch": 0.7, "grad_norm": 2.1281816959381104, "learning_rate": 2e-05, "loss": 0.36737266, "step": 350 }, { "epoch": 0.702, "grad_norm": 2.546674966812134, "learning_rate": 2e-05, "loss": 0.39834434, "step": 351 }, { "epoch": 0.704, "grad_norm": 1.9590989351272583, "learning_rate": 2e-05, "loss": 0.376955, "step": 352 }, { "epoch": 0.706, "grad_norm": 2.5581297874450684, "learning_rate": 2e-05, "loss": 0.39899454, "step": 353 }, { "epoch": 0.708, "grad_norm": 2.3324179649353027, "learning_rate": 2e-05, "loss": 0.37603098, "step": 354 }, { "epoch": 0.71, "grad_norm": 2.334880828857422, "learning_rate": 2e-05, "loss": 0.42339247, "step": 355 }, { "epoch": 0.712, "grad_norm": 3.104797601699829, "learning_rate": 2e-05, "loss": 0.36481744, "step": 356 }, { "epoch": 0.714, "grad_norm": 6.291801929473877, "learning_rate": 2e-05, "loss": 0.40678573, "step": 357 }, { "epoch": 0.716, "grad_norm": 4.675421237945557, "learning_rate": 2e-05, "loss": 0.39416665, "step": 358 }, { "epoch": 0.718, "grad_norm": 2.531383752822876, "learning_rate": 2e-05, "loss": 0.35706043, "step": 359 }, { "epoch": 0.72, "grad_norm": 5.778651237487793, "learning_rate": 2e-05, "loss": 0.41085088, "step": 360 }, { "epoch": 0.722, "grad_norm": 2.785738945007324, "learning_rate": 2e-05, "loss": 0.31312498, "step": 361 }, { "epoch": 0.724, "grad_norm": 4.2192063331604, "learning_rate": 2e-05, "loss": 0.43665931, "step": 362 }, { "epoch": 0.726, "grad_norm": 2.268357515335083, "learning_rate": 2e-05, "loss": 0.41825864, "step": 363 }, { "epoch": 0.728, "grad_norm": 2.246572494506836, "learning_rate": 2e-05, "loss": 0.36334115, "step": 364 }, { "epoch": 0.73, "grad_norm": 2.2913174629211426, "learning_rate": 2e-05, "loss": 0.40367627, "step": 365 }, { "epoch": 0.732, "grad_norm": 1.9627586603164673, "learning_rate": 2e-05, "loss": 0.36439764, "step": 366 }, { "epoch": 0.734, "grad_norm": 2.0793967247009277, "learning_rate": 2e-05, "loss": 0.3633121, "step": 367 }, { "epoch": 0.736, "grad_norm": 2.039069175720215, "learning_rate": 2e-05, "loss": 0.37875551, "step": 368 }, { "epoch": 0.738, "grad_norm": 2.103626251220703, "learning_rate": 2e-05, "loss": 0.34807205, "step": 369 }, { "epoch": 0.74, "grad_norm": 1.898775339126587, "learning_rate": 2e-05, "loss": 0.3396126, "step": 370 }, { "epoch": 0.742, "grad_norm": 2.118276596069336, "learning_rate": 2e-05, "loss": 0.38108289, "step": 371 }, { "epoch": 0.744, "grad_norm": 2.3276379108428955, "learning_rate": 2e-05, "loss": 0.35690206, "step": 372 }, { "epoch": 0.746, "grad_norm": 2.173872232437134, "learning_rate": 2e-05, "loss": 0.35335249, "step": 373 }, { "epoch": 0.748, "grad_norm": 1.8903744220733643, "learning_rate": 2e-05, "loss": 0.37995228, "step": 374 }, { "epoch": 0.75, "grad_norm": 2.2106189727783203, "learning_rate": 2e-05, "loss": 0.34882003, "step": 375 }, { "epoch": 0.752, "grad_norm": 2.048971652984619, "learning_rate": 2e-05, "loss": 0.43542331, "step": 376 }, { "epoch": 0.754, "grad_norm": 2.003777503967285, "learning_rate": 2e-05, "loss": 0.3117795, "step": 377 }, { "epoch": 0.756, "grad_norm": 2.0449531078338623, "learning_rate": 2e-05, "loss": 0.33903271, "step": 378 }, { "epoch": 0.758, "grad_norm": 2.3183586597442627, "learning_rate": 2e-05, "loss": 0.33581644, "step": 379 }, { "epoch": 0.76, "grad_norm": 1.9782938957214355, "learning_rate": 2e-05, "loss": 0.31719434, "step": 380 }, { "epoch": 0.762, "grad_norm": 2.3408687114715576, "learning_rate": 2e-05, "loss": 0.38454038, "step": 381 }, { "epoch": 0.764, "grad_norm": 2.642451524734497, "learning_rate": 2e-05, "loss": 0.36350057, "step": 382 }, { "epoch": 0.766, "grad_norm": 2.2716588973999023, "learning_rate": 2e-05, "loss": 0.3672176, "step": 383 }, { "epoch": 0.768, "grad_norm": 2.143385410308838, "learning_rate": 2e-05, "loss": 0.35499257, "step": 384 }, { "epoch": 0.77, "grad_norm": 2.251404285430908, "learning_rate": 2e-05, "loss": 0.33705157, "step": 385 }, { "epoch": 0.772, "grad_norm": 2.599787712097168, "learning_rate": 2e-05, "loss": 0.40010357, "step": 386 }, { "epoch": 0.774, "grad_norm": 2.932671308517456, "learning_rate": 2e-05, "loss": 0.41798162, "step": 387 }, { "epoch": 0.776, "grad_norm": 3.084031343460083, "learning_rate": 2e-05, "loss": 0.40057978, "step": 388 }, { "epoch": 0.778, "grad_norm": 2.370199680328369, "learning_rate": 2e-05, "loss": 0.37256229, "step": 389 }, { "epoch": 0.78, "grad_norm": 2.715414524078369, "learning_rate": 2e-05, "loss": 0.3550342, "step": 390 }, { "epoch": 0.782, "grad_norm": 2.370290756225586, "learning_rate": 2e-05, "loss": 0.40933335, "step": 391 }, { "epoch": 0.784, "grad_norm": 2.753520965576172, "learning_rate": 2e-05, "loss": 0.37647349, "step": 392 }, { "epoch": 0.786, "grad_norm": 3.428513526916504, "learning_rate": 2e-05, "loss": 0.37809077, "step": 393 }, { "epoch": 0.788, "grad_norm": 3.0542285442352295, "learning_rate": 2e-05, "loss": 0.3635264, "step": 394 }, { "epoch": 0.79, "grad_norm": 2.631666421890259, "learning_rate": 2e-05, "loss": 0.37626997, "step": 395 }, { "epoch": 0.792, "grad_norm": 2.9206936359405518, "learning_rate": 2e-05, "loss": 0.3362987, "step": 396 }, { "epoch": 0.794, "grad_norm": 2.600062608718872, "learning_rate": 2e-05, "loss": 0.36531377, "step": 397 }, { "epoch": 0.796, "grad_norm": 2.959347724914551, "learning_rate": 2e-05, "loss": 0.39553091, "step": 398 }, { "epoch": 0.798, "grad_norm": 2.826603651046753, "learning_rate": 2e-05, "loss": 0.35759783, "step": 399 }, { "epoch": 0.8, "grad_norm": 4.756749153137207, "learning_rate": 2e-05, "loss": 0.3761026, "step": 400 }, { "epoch": 0.802, "grad_norm": 2.015024185180664, "learning_rate": 2e-05, "loss": 0.37341845, "step": 401 }, { "epoch": 0.804, "grad_norm": 2.473043203353882, "learning_rate": 2e-05, "loss": 0.38090366, "step": 402 }, { "epoch": 0.806, "grad_norm": 3.260671854019165, "learning_rate": 2e-05, "loss": 0.40409699, "step": 403 }, { "epoch": 0.808, "grad_norm": 1.9450372457504272, "learning_rate": 2e-05, "loss": 0.38324416, "step": 404 }, { "epoch": 0.81, "grad_norm": 2.213120460510254, "learning_rate": 2e-05, "loss": 0.32400131, "step": 405 }, { "epoch": 0.812, "grad_norm": 2.4603614807128906, "learning_rate": 2e-05, "loss": 0.38184336, "step": 406 }, { "epoch": 0.814, "grad_norm": 2.590217351913452, "learning_rate": 2e-05, "loss": 0.31220454, "step": 407 }, { "epoch": 0.816, "grad_norm": 2.3522443771362305, "learning_rate": 2e-05, "loss": 0.37040508, "step": 408 }, { "epoch": 0.818, "grad_norm": 3.8462178707122803, "learning_rate": 2e-05, "loss": 0.31150436, "step": 409 }, { "epoch": 0.82, "grad_norm": 2.3633556365966797, "learning_rate": 2e-05, "loss": 0.33486691, "step": 410 }, { "epoch": 0.822, "grad_norm": 2.797116756439209, "learning_rate": 2e-05, "loss": 0.35024345, "step": 411 }, { "epoch": 0.824, "grad_norm": 2.787532329559326, "learning_rate": 2e-05, "loss": 0.34601441, "step": 412 }, { "epoch": 0.826, "grad_norm": 2.7806966304779053, "learning_rate": 2e-05, "loss": 0.37785235, "step": 413 }, { "epoch": 0.828, "grad_norm": 3.3908019065856934, "learning_rate": 2e-05, "loss": 0.40772134, "step": 414 }, { "epoch": 0.83, "grad_norm": 2.3474032878875732, "learning_rate": 2e-05, "loss": 0.31730652, "step": 415 }, { "epoch": 0.832, "grad_norm": 3.695180892944336, "learning_rate": 2e-05, "loss": 0.38893342, "step": 416 }, { "epoch": 0.834, "grad_norm": 2.969644784927368, "learning_rate": 2e-05, "loss": 0.37266415, "step": 417 }, { "epoch": 0.836, "grad_norm": 3.3439037799835205, "learning_rate": 2e-05, "loss": 0.36170414, "step": 418 }, { "epoch": 0.838, "grad_norm": 2.669370651245117, "learning_rate": 2e-05, "loss": 0.41537088, "step": 419 }, { "epoch": 0.84, "grad_norm": 3.2042794227600098, "learning_rate": 2e-05, "loss": 0.34706631, "step": 420 }, { "epoch": 0.842, "grad_norm": 3.0707359313964844, "learning_rate": 2e-05, "loss": 0.36334276, "step": 421 }, { "epoch": 0.844, "grad_norm": 3.0460245609283447, "learning_rate": 2e-05, "loss": 0.3588101, "step": 422 }, { "epoch": 0.846, "grad_norm": 3.4480361938476562, "learning_rate": 2e-05, "loss": 0.35593536, "step": 423 }, { "epoch": 0.848, "grad_norm": 3.668825626373291, "learning_rate": 2e-05, "loss": 0.36293906, "step": 424 }, { "epoch": 0.85, "grad_norm": 3.644979953765869, "learning_rate": 2e-05, "loss": 0.33053726, "step": 425 }, { "epoch": 0.852, "grad_norm": 3.7507691383361816, "learning_rate": 2e-05, "loss": 0.33477077, "step": 426 }, { "epoch": 0.854, "grad_norm": 3.2668840885162354, "learning_rate": 2e-05, "loss": 0.35934401, "step": 427 }, { "epoch": 0.856, "grad_norm": 3.24027681350708, "learning_rate": 2e-05, "loss": 0.28396821, "step": 428 }, { "epoch": 0.858, "grad_norm": 12.503198623657227, "learning_rate": 2e-05, "loss": 0.32771713, "step": 429 }, { "epoch": 0.86, "grad_norm": 4.297608852386475, "learning_rate": 2e-05, "loss": 0.33745548, "step": 430 }, { "epoch": 0.862, "grad_norm": 4.472741603851318, "learning_rate": 2e-05, "loss": 0.27140242, "step": 431 }, { "epoch": 0.864, "grad_norm": 3.206968069076538, "learning_rate": 2e-05, "loss": 0.28080299, "step": 432 }, { "epoch": 0.866, "grad_norm": 3.3867924213409424, "learning_rate": 2e-05, "loss": 0.3420102, "step": 433 }, { "epoch": 0.868, "grad_norm": 3.833103656768799, "learning_rate": 2e-05, "loss": 0.27758431, "step": 434 }, { "epoch": 0.87, "grad_norm": 3.7450990676879883, "learning_rate": 2e-05, "loss": 0.27860394, "step": 435 }, { "epoch": 0.872, "grad_norm": 4.227891445159912, "learning_rate": 2e-05, "loss": 0.31056997, "step": 436 }, { "epoch": 0.874, "grad_norm": 3.7188467979431152, "learning_rate": 2e-05, "loss": 0.26568019, "step": 437 }, { "epoch": 0.876, "grad_norm": 3.5237154960632324, "learning_rate": 2e-05, "loss": 0.25259641, "step": 438 }, { "epoch": 0.878, "grad_norm": 5.686617851257324, "learning_rate": 2e-05, "loss": 0.2755993, "step": 439 }, { "epoch": 0.88, "grad_norm": 3.4197587966918945, "learning_rate": 2e-05, "loss": 0.23646541, "step": 440 }, { "epoch": 0.882, "grad_norm": 4.551022052764893, "learning_rate": 2e-05, "loss": 0.22122362, "step": 441 }, { "epoch": 0.884, "grad_norm": 4.930042266845703, "learning_rate": 2e-05, "loss": 0.27152666, "step": 442 }, { "epoch": 0.886, "grad_norm": 3.824270725250244, "learning_rate": 2e-05, "loss": 0.2230306, "step": 443 }, { "epoch": 0.888, "grad_norm": 3.917961597442627, "learning_rate": 2e-05, "loss": 0.27966443, "step": 444 }, { "epoch": 0.89, "grad_norm": 3.9343419075012207, "learning_rate": 2e-05, "loss": 0.20023456, "step": 445 }, { "epoch": 0.892, "grad_norm": 4.3064775466918945, "learning_rate": 2e-05, "loss": 0.19676761, "step": 446 }, { "epoch": 0.894, "grad_norm": 3.1968557834625244, "learning_rate": 2e-05, "loss": 0.19466686, "step": 447 }, { "epoch": 0.896, "grad_norm": 4.086765289306641, "learning_rate": 2e-05, "loss": 0.22548294, "step": 448 }, { "epoch": 0.898, "grad_norm": 4.0280256271362305, "learning_rate": 2e-05, "loss": 0.22720584, "step": 449 }, { "epoch": 0.9, "grad_norm": 4.620728492736816, "learning_rate": 2e-05, "loss": 0.17157443, "step": 450 }, { "epoch": 0.902, "grad_norm": 3.6249382495880127, "learning_rate": 2e-05, "loss": 0.23641428, "step": 451 }, { "epoch": 0.904, "grad_norm": 4.136668682098389, "learning_rate": 2e-05, "loss": 0.21479097, "step": 452 }, { "epoch": 0.906, "grad_norm": 7.2505879402160645, "learning_rate": 2e-05, "loss": 0.27446824, "step": 453 }, { "epoch": 0.908, "grad_norm": 5.988163471221924, "learning_rate": 2e-05, "loss": 0.21940503, "step": 454 }, { "epoch": 0.91, "grad_norm": 6.0386505126953125, "learning_rate": 2e-05, "loss": 0.19830205, "step": 455 }, { "epoch": 0.912, "grad_norm": 3.1702799797058105, "learning_rate": 2e-05, "loss": 0.1882799, "step": 456 }, { "epoch": 0.914, "grad_norm": 4.0222015380859375, "learning_rate": 2e-05, "loss": 0.23622099, "step": 457 }, { "epoch": 0.916, "grad_norm": 3.4282891750335693, "learning_rate": 2e-05, "loss": 0.18106145, "step": 458 }, { "epoch": 0.918, "grad_norm": 4.8152337074279785, "learning_rate": 2e-05, "loss": 0.17646313, "step": 459 }, { "epoch": 0.92, "grad_norm": 3.1678340435028076, "learning_rate": 2e-05, "loss": 0.20366624, "step": 460 }, { "epoch": 0.922, "grad_norm": 2.8468918800354004, "learning_rate": 2e-05, "loss": 0.20546392, "step": 461 }, { "epoch": 0.924, "grad_norm": 3.2070858478546143, "learning_rate": 2e-05, "loss": 0.16359358, "step": 462 }, { "epoch": 0.926, "grad_norm": 3.970043420791626, "learning_rate": 2e-05, "loss": 0.21810345, "step": 463 }, { "epoch": 0.928, "grad_norm": 2.7538678646087646, "learning_rate": 2e-05, "loss": 0.15927938, "step": 464 }, { "epoch": 0.93, "grad_norm": 3.887653112411499, "learning_rate": 2e-05, "loss": 0.17965358, "step": 465 }, { "epoch": 0.932, "grad_norm": 2.4924097061157227, "learning_rate": 2e-05, "loss": 0.14478508, "step": 466 }, { "epoch": 0.934, "grad_norm": 2.4270923137664795, "learning_rate": 2e-05, "loss": 0.1299592, "step": 467 }, { "epoch": 0.936, "grad_norm": 2.980048179626465, "learning_rate": 2e-05, "loss": 0.16589662, "step": 468 }, { "epoch": 0.938, "grad_norm": 2.9414331912994385, "learning_rate": 2e-05, "loss": 0.15968111, "step": 469 }, { "epoch": 0.94, "grad_norm": 3.0330123901367188, "learning_rate": 2e-05, "loss": 0.17940134, "step": 470 }, { "epoch": 0.942, "grad_norm": 4.506875514984131, "learning_rate": 2e-05, "loss": 0.17710808, "step": 471 }, { "epoch": 0.944, "grad_norm": 4.033360481262207, "learning_rate": 2e-05, "loss": 0.16440117, "step": 472 }, { "epoch": 0.946, "grad_norm": 3.041499376296997, "learning_rate": 2e-05, "loss": 0.18891403, "step": 473 }, { "epoch": 0.948, "grad_norm": 3.819559335708618, "learning_rate": 2e-05, "loss": 0.17200641, "step": 474 }, { "epoch": 0.95, "grad_norm": 2.71626615524292, "learning_rate": 2e-05, "loss": 0.14698125, "step": 475 }, { "epoch": 0.952, "grad_norm": 4.742694854736328, "learning_rate": 2e-05, "loss": 0.18361038, "step": 476 }, { "epoch": 0.954, "grad_norm": 3.403785228729248, "learning_rate": 2e-05, "loss": 0.15806127, "step": 477 }, { "epoch": 0.956, "grad_norm": 3.088829278945923, "learning_rate": 2e-05, "loss": 0.19003233, "step": 478 }, { "epoch": 0.958, "grad_norm": 4.706967353820801, "learning_rate": 2e-05, "loss": 0.19501597, "step": 479 }, { "epoch": 0.96, "grad_norm": 2.4652099609375, "learning_rate": 2e-05, "loss": 0.16612351, "step": 480 }, { "epoch": 0.962, "grad_norm": 4.185473918914795, "learning_rate": 2e-05, "loss": 0.14217728, "step": 481 }, { "epoch": 0.964, "grad_norm": 3.6370861530303955, "learning_rate": 2e-05, "loss": 0.18404439, "step": 482 }, { "epoch": 0.966, "grad_norm": 4.554826736450195, "learning_rate": 2e-05, "loss": 0.16499752, "step": 483 }, { "epoch": 0.968, "grad_norm": 2.758190393447876, "learning_rate": 2e-05, "loss": 0.13831472, "step": 484 }, { "epoch": 0.97, "grad_norm": 2.261568784713745, "learning_rate": 2e-05, "loss": 0.13929909, "step": 485 }, { "epoch": 0.972, "grad_norm": 2.8820412158966064, "learning_rate": 2e-05, "loss": 0.14241502, "step": 486 }, { "epoch": 0.974, "grad_norm": 3.352541208267212, "learning_rate": 2e-05, "loss": 0.16133608, "step": 487 }, { "epoch": 0.976, "grad_norm": 3.834998846054077, "learning_rate": 2e-05, "loss": 0.17684533, "step": 488 }, { "epoch": 0.978, "grad_norm": 2.977851152420044, "learning_rate": 2e-05, "loss": 0.12988587, "step": 489 }, { "epoch": 0.98, "grad_norm": 6.011771202087402, "learning_rate": 2e-05, "loss": 0.15765052, "step": 490 }, { "epoch": 0.982, "grad_norm": 2.6912081241607666, "learning_rate": 2e-05, "loss": 0.12373734, "step": 491 }, { "epoch": 0.984, "grad_norm": 2.2780370712280273, "learning_rate": 2e-05, "loss": 0.15556982, "step": 492 }, { "epoch": 0.986, "grad_norm": 2.763603448867798, "learning_rate": 2e-05, "loss": 0.14312422, "step": 493 }, { "epoch": 0.988, "grad_norm": 2.51889705657959, "learning_rate": 2e-05, "loss": 0.14164892, "step": 494 }, { "epoch": 0.99, "grad_norm": 2.4747464656829834, "learning_rate": 2e-05, "loss": 0.18990183, "step": 495 }, { "epoch": 0.992, "grad_norm": 2.276155471801758, "learning_rate": 2e-05, "loss": 0.13020995, "step": 496 }, { "epoch": 0.994, "grad_norm": 2.6515796184539795, "learning_rate": 2e-05, "loss": 0.15413743, "step": 497 }, { "epoch": 0.996, "grad_norm": 2.347593069076538, "learning_rate": 2e-05, "loss": 0.15986988, "step": 498 }, { "epoch": 0.998, "grad_norm": 2.6618576049804688, "learning_rate": 2e-05, "loss": 0.15566903, "step": 499 }, { "epoch": 1.0, "grad_norm": 2.4387569427490234, "learning_rate": 2e-05, "loss": 0.15638649, "step": 500 }, { "epoch": 1.0, "eval_performance": { "AngleClassification_1": 0.976, "AngleClassification_2": 0.634, "AngleClassification_3": 0.499001996007984, "Equal_1": 0.13, "Equal_2": 0.08383233532934131, "Equal_3": 0.14770459081836326, "LineComparison_1": 0.498, "LineComparison_2": 0.47305389221556887, "LineComparison_3": 0.5069860279441117, "Parallel_1": 0.40480961923847697, "Parallel_2": 0.9038076152304609, "Parallel_3": 0.254, "Perpendicular_1": 0.522, "Perpendicular_2": 0.16, "Perpendicular_3": 0.1092184368737475, "PointLiesOnCircle_1": 0.8086172344689379, "PointLiesOnCircle_2": 0.5923333333333334, "PointLiesOnCircle_3": 0.29560000000000003, "PointLiesOnLine_1": 0.4529058116232465, "PointLiesOnLine_2": 0.342685370741483, "PointLiesOnLine_3": 0.2315369261477046 }, "eval_runtime": 324.0838, "eval_samples_per_second": 32.399, "eval_steps_per_second": 0.648, "step": 500 }, { "epoch": 1.002, "grad_norm": 4.821013450622559, "learning_rate": 2e-05, "loss": 0.1556485, "step": 501 }, { "epoch": 1.004, "grad_norm": 1.8802967071533203, "learning_rate": 2e-05, "loss": 0.11685692, "step": 502 }, { "epoch": 1.006, "grad_norm": 1.7924933433532715, "learning_rate": 2e-05, "loss": 0.11074463, "step": 503 }, { "epoch": 1.008, "grad_norm": 3.829883337020874, "learning_rate": 2e-05, "loss": 0.1522513, "step": 504 }, { "epoch": 1.01, "grad_norm": 4.246246337890625, "learning_rate": 2e-05, "loss": 0.15618658, "step": 505 }, { "epoch": 1.012, "grad_norm": 4.451237678527832, "learning_rate": 2e-05, "loss": 0.23163846, "step": 506 }, { "epoch": 1.014, "grad_norm": 2.5041894912719727, "learning_rate": 2e-05, "loss": 0.13576819, "step": 507 }, { "epoch": 1.016, "grad_norm": 2.6320042610168457, "learning_rate": 2e-05, "loss": 0.16223896, "step": 508 }, { "epoch": 1.018, "grad_norm": 1.8667449951171875, "learning_rate": 2e-05, "loss": 0.12035898, "step": 509 }, { "epoch": 1.02, "grad_norm": 2.622072458267212, "learning_rate": 2e-05, "loss": 0.13647205, "step": 510 }, { "epoch": 1.022, "grad_norm": 4.481700420379639, "learning_rate": 2e-05, "loss": 0.15954015, "step": 511 }, { "epoch": 1.024, "grad_norm": 3.5658326148986816, "learning_rate": 2e-05, "loss": 0.12799534, "step": 512 }, { "epoch": 1.026, "grad_norm": 6.7309370040893555, "learning_rate": 2e-05, "loss": 0.13949288, "step": 513 }, { "epoch": 1.028, "grad_norm": 3.3907320499420166, "learning_rate": 2e-05, "loss": 0.1515884, "step": 514 }, { "epoch": 1.03, "grad_norm": 2.2120602130889893, "learning_rate": 2e-05, "loss": 0.11528069, "step": 515 }, { "epoch": 1.032, "grad_norm": 1.858370065689087, "learning_rate": 2e-05, "loss": 0.10049677, "step": 516 }, { "epoch": 1.034, "grad_norm": 3.5174007415771484, "learning_rate": 2e-05, "loss": 0.16639367, "step": 517 }, { "epoch": 1.036, "grad_norm": 2.5854098796844482, "learning_rate": 2e-05, "loss": 0.16602588, "step": 518 }, { "epoch": 1.038, "grad_norm": 2.3209636211395264, "learning_rate": 2e-05, "loss": 0.11949471, "step": 519 }, { "epoch": 1.04, "grad_norm": 2.098727226257324, "learning_rate": 2e-05, "loss": 0.1452582, "step": 520 }, { "epoch": 1.042, "grad_norm": 4.866150379180908, "learning_rate": 2e-05, "loss": 0.12613255, "step": 521 }, { "epoch": 1.044, "grad_norm": 2.6759192943573, "learning_rate": 2e-05, "loss": 0.16882387, "step": 522 }, { "epoch": 1.046, "grad_norm": 2.8629543781280518, "learning_rate": 2e-05, "loss": 0.17408426, "step": 523 }, { "epoch": 1.048, "grad_norm": 3.283302068710327, "learning_rate": 2e-05, "loss": 0.13273655, "step": 524 }, { "epoch": 1.05, "grad_norm": 2.690016984939575, "learning_rate": 2e-05, "loss": 0.16916898, "step": 525 }, { "epoch": 1.052, "grad_norm": 3.075875997543335, "learning_rate": 2e-05, "loss": 0.15706718, "step": 526 }, { "epoch": 1.054, "grad_norm": 4.240052223205566, "learning_rate": 2e-05, "loss": 0.12036274, "step": 527 }, { "epoch": 1.056, "grad_norm": 2.4694156646728516, "learning_rate": 2e-05, "loss": 0.16254291, "step": 528 }, { "epoch": 1.058, "grad_norm": 2.702230930328369, "learning_rate": 2e-05, "loss": 0.14166299, "step": 529 }, { "epoch": 1.06, "grad_norm": 2.753516912460327, "learning_rate": 2e-05, "loss": 0.15594101, "step": 530 }, { "epoch": 1.062, "grad_norm": 3.5839767456054688, "learning_rate": 2e-05, "loss": 0.12621114, "step": 531 }, { "epoch": 1.064, "grad_norm": 2.400475263595581, "learning_rate": 2e-05, "loss": 0.11684637, "step": 532 }, { "epoch": 1.066, "grad_norm": 2.5850939750671387, "learning_rate": 2e-05, "loss": 0.16528159, "step": 533 }, { "epoch": 1.068, "grad_norm": 2.623412609100342, "learning_rate": 2e-05, "loss": 0.11953619, "step": 534 }, { "epoch": 1.07, "grad_norm": 2.529339075088501, "learning_rate": 2e-05, "loss": 0.14916751, "step": 535 }, { "epoch": 1.072, "grad_norm": 3.2517716884613037, "learning_rate": 2e-05, "loss": 0.15048769, "step": 536 }, { "epoch": 1.074, "grad_norm": 1.9640837907791138, "learning_rate": 2e-05, "loss": 0.13277957, "step": 537 }, { "epoch": 1.076, "grad_norm": 2.368446111679077, "learning_rate": 2e-05, "loss": 0.14852183, "step": 538 }, { "epoch": 1.078, "grad_norm": 2.9297749996185303, "learning_rate": 2e-05, "loss": 0.15348649, "step": 539 }, { "epoch": 1.08, "grad_norm": 5.759192943572998, "learning_rate": 2e-05, "loss": 0.13191144, "step": 540 }, { "epoch": 1.082, "grad_norm": 2.021772623062134, "learning_rate": 2e-05, "loss": 0.1124662, "step": 541 }, { "epoch": 1.084, "grad_norm": 2.8374252319335938, "learning_rate": 2e-05, "loss": 0.1071616, "step": 542 }, { "epoch": 1.086, "grad_norm": 2.1634042263031006, "learning_rate": 2e-05, "loss": 0.11521625, "step": 543 }, { "epoch": 1.088, "grad_norm": 2.589329719543457, "learning_rate": 2e-05, "loss": 0.13090719, "step": 544 }, { "epoch": 1.09, "grad_norm": 2.291887044906616, "learning_rate": 2e-05, "loss": 0.09445122, "step": 545 }, { "epoch": 1.092, "grad_norm": 2.3259284496307373, "learning_rate": 2e-05, "loss": 0.12547138, "step": 546 }, { "epoch": 1.094, "grad_norm": 2.191845417022705, "learning_rate": 2e-05, "loss": 0.12708214, "step": 547 }, { "epoch": 1.096, "grad_norm": 3.3253958225250244, "learning_rate": 2e-05, "loss": 0.14231913, "step": 548 }, { "epoch": 1.098, "grad_norm": 2.9672670364379883, "learning_rate": 2e-05, "loss": 0.09965955, "step": 549 }, { "epoch": 1.1, "grad_norm": 3.215210199356079, "learning_rate": 2e-05, "loss": 0.13290197, "step": 550 }, { "epoch": 1.102, "grad_norm": 3.2850470542907715, "learning_rate": 2e-05, "loss": 0.10610727, "step": 551 }, { "epoch": 1.104, "grad_norm": 2.7427141666412354, "learning_rate": 2e-05, "loss": 0.12695783, "step": 552 }, { "epoch": 1.106, "grad_norm": 3.2531254291534424, "learning_rate": 2e-05, "loss": 0.12038823, "step": 553 }, { "epoch": 1.108, "grad_norm": 2.5022897720336914, "learning_rate": 2e-05, "loss": 0.13614482, "step": 554 }, { "epoch": 1.11, "grad_norm": 4.687258720397949, "learning_rate": 2e-05, "loss": 0.11169507, "step": 555 }, { "epoch": 1.112, "grad_norm": 2.6494035720825195, "learning_rate": 2e-05, "loss": 0.11928535, "step": 556 }, { "epoch": 1.114, "grad_norm": 2.6407148838043213, "learning_rate": 2e-05, "loss": 0.12640992, "step": 557 }, { "epoch": 1.116, "grad_norm": 4.039032936096191, "learning_rate": 2e-05, "loss": 0.16989604, "step": 558 }, { "epoch": 1.1179999999999999, "grad_norm": 3.1691837310791016, "learning_rate": 2e-05, "loss": 0.1739091, "step": 559 }, { "epoch": 1.12, "grad_norm": 2.1099650859832764, "learning_rate": 2e-05, "loss": 0.13166931, "step": 560 }, { "epoch": 1.1219999999999999, "grad_norm": 2.5065221786499023, "learning_rate": 2e-05, "loss": 0.14683142, "step": 561 }, { "epoch": 1.124, "grad_norm": 2.24849534034729, "learning_rate": 2e-05, "loss": 0.10228881, "step": 562 }, { "epoch": 1.126, "grad_norm": 3.08331298828125, "learning_rate": 2e-05, "loss": 0.15210506, "step": 563 }, { "epoch": 1.1280000000000001, "grad_norm": 2.5049123764038086, "learning_rate": 2e-05, "loss": 0.12219332, "step": 564 }, { "epoch": 1.13, "grad_norm": 2.139686346054077, "learning_rate": 2e-05, "loss": 0.11474234, "step": 565 }, { "epoch": 1.1320000000000001, "grad_norm": 1.576886534690857, "learning_rate": 2e-05, "loss": 0.09722944, "step": 566 }, { "epoch": 1.134, "grad_norm": 2.270977735519409, "learning_rate": 2e-05, "loss": 0.13902695, "step": 567 }, { "epoch": 1.1360000000000001, "grad_norm": 3.4217686653137207, "learning_rate": 2e-05, "loss": 0.13287134, "step": 568 }, { "epoch": 1.138, "grad_norm": 2.5737199783325195, "learning_rate": 2e-05, "loss": 0.14081581, "step": 569 }, { "epoch": 1.1400000000000001, "grad_norm": 1.9033153057098389, "learning_rate": 2e-05, "loss": 0.13062626, "step": 570 }, { "epoch": 1.142, "grad_norm": 1.9676927328109741, "learning_rate": 2e-05, "loss": 0.10712667, "step": 571 }, { "epoch": 1.144, "grad_norm": 2.840505599975586, "learning_rate": 2e-05, "loss": 0.12923452, "step": 572 }, { "epoch": 1.146, "grad_norm": 2.5398242473602295, "learning_rate": 2e-05, "loss": 0.14771613, "step": 573 }, { "epoch": 1.148, "grad_norm": 2.549650192260742, "learning_rate": 2e-05, "loss": 0.13140582, "step": 574 }, { "epoch": 1.15, "grad_norm": 2.1299757957458496, "learning_rate": 2e-05, "loss": 0.12547702, "step": 575 }, { "epoch": 1.152, "grad_norm": 2.358029842376709, "learning_rate": 2e-05, "loss": 0.12179442, "step": 576 }, { "epoch": 1.154, "grad_norm": 1.966770052909851, "learning_rate": 2e-05, "loss": 0.10394298, "step": 577 }, { "epoch": 1.156, "grad_norm": 2.0004608631134033, "learning_rate": 2e-05, "loss": 0.14016056, "step": 578 }, { "epoch": 1.158, "grad_norm": 3.5331180095672607, "learning_rate": 2e-05, "loss": 0.12845185, "step": 579 }, { "epoch": 1.16, "grad_norm": 2.299360513687134, "learning_rate": 2e-05, "loss": 0.12573363, "step": 580 }, { "epoch": 1.162, "grad_norm": 2.8737940788269043, "learning_rate": 2e-05, "loss": 0.12317209, "step": 581 }, { "epoch": 1.164, "grad_norm": 4.00046968460083, "learning_rate": 2e-05, "loss": 0.09280093, "step": 582 }, { "epoch": 1.166, "grad_norm": 2.854579210281372, "learning_rate": 2e-05, "loss": 0.12138534, "step": 583 }, { "epoch": 1.168, "grad_norm": 2.668538808822632, "learning_rate": 2e-05, "loss": 0.10895318, "step": 584 }, { "epoch": 1.17, "grad_norm": 1.9555225372314453, "learning_rate": 2e-05, "loss": 0.10446753, "step": 585 }, { "epoch": 1.172, "grad_norm": 2.943380117416382, "learning_rate": 2e-05, "loss": 0.09996414, "step": 586 }, { "epoch": 1.174, "grad_norm": 2.4844489097595215, "learning_rate": 2e-05, "loss": 0.11357398, "step": 587 }, { "epoch": 1.176, "grad_norm": 4.776824474334717, "learning_rate": 2e-05, "loss": 0.09684882, "step": 588 }, { "epoch": 1.178, "grad_norm": 2.3888590335845947, "learning_rate": 2e-05, "loss": 0.10829578, "step": 589 }, { "epoch": 1.18, "grad_norm": 3.8649983406066895, "learning_rate": 2e-05, "loss": 0.10984306, "step": 590 }, { "epoch": 1.182, "grad_norm": 2.9575302600860596, "learning_rate": 2e-05, "loss": 0.08193485, "step": 591 }, { "epoch": 1.184, "grad_norm": 3.222970724105835, "learning_rate": 2e-05, "loss": 0.12279116, "step": 592 }, { "epoch": 1.186, "grad_norm": 2.6744375228881836, "learning_rate": 2e-05, "loss": 0.12870033, "step": 593 }, { "epoch": 1.188, "grad_norm": 4.028379917144775, "learning_rate": 2e-05, "loss": 0.13311404, "step": 594 }, { "epoch": 1.19, "grad_norm": 6.1691460609436035, "learning_rate": 2e-05, "loss": 0.12187042, "step": 595 }, { "epoch": 1.192, "grad_norm": 2.7343945503234863, "learning_rate": 2e-05, "loss": 0.15135875, "step": 596 }, { "epoch": 1.194, "grad_norm": 2.8838765621185303, "learning_rate": 2e-05, "loss": 0.13852769, "step": 597 }, { "epoch": 1.196, "grad_norm": 2.592514991760254, "learning_rate": 2e-05, "loss": 0.12420864, "step": 598 }, { "epoch": 1.198, "grad_norm": 3.1650428771972656, "learning_rate": 2e-05, "loss": 0.1350922, "step": 599 }, { "epoch": 1.2, "grad_norm": 2.812872886657715, "learning_rate": 2e-05, "loss": 0.11503953, "step": 600 }, { "epoch": 1.202, "grad_norm": 2.564276933670044, "learning_rate": 2e-05, "loss": 0.13502818, "step": 601 }, { "epoch": 1.204, "grad_norm": 2.3174049854278564, "learning_rate": 2e-05, "loss": 0.1046789, "step": 602 }, { "epoch": 1.206, "grad_norm": 5.605673789978027, "learning_rate": 2e-05, "loss": 0.14738208, "step": 603 }, { "epoch": 1.208, "grad_norm": 3.615462303161621, "learning_rate": 2e-05, "loss": 0.15257767, "step": 604 }, { "epoch": 1.21, "grad_norm": 2.023568630218506, "learning_rate": 2e-05, "loss": 0.1027983, "step": 605 }, { "epoch": 1.212, "grad_norm": 2.8860561847686768, "learning_rate": 2e-05, "loss": 0.14651015, "step": 606 }, { "epoch": 1.214, "grad_norm": 2.644528388977051, "learning_rate": 2e-05, "loss": 0.13001838, "step": 607 }, { "epoch": 1.216, "grad_norm": 1.84329092502594, "learning_rate": 2e-05, "loss": 0.09636261, "step": 608 }, { "epoch": 1.218, "grad_norm": 2.386714220046997, "learning_rate": 2e-05, "loss": 0.11445861, "step": 609 }, { "epoch": 1.22, "grad_norm": 2.4820950031280518, "learning_rate": 2e-05, "loss": 0.14000396, "step": 610 }, { "epoch": 1.222, "grad_norm": 2.412814140319824, "learning_rate": 2e-05, "loss": 0.09793176, "step": 611 }, { "epoch": 1.224, "grad_norm": 2.0066609382629395, "learning_rate": 2e-05, "loss": 0.11375158, "step": 612 }, { "epoch": 1.226, "grad_norm": 2.4800400733947754, "learning_rate": 2e-05, "loss": 0.11607377, "step": 613 }, { "epoch": 1.228, "grad_norm": 2.5596139430999756, "learning_rate": 2e-05, "loss": 0.11540417, "step": 614 }, { "epoch": 1.23, "grad_norm": 1.8948832750320435, "learning_rate": 2e-05, "loss": 0.10477018, "step": 615 }, { "epoch": 1.232, "grad_norm": 2.5590243339538574, "learning_rate": 2e-05, "loss": 0.11847038, "step": 616 }, { "epoch": 1.234, "grad_norm": 2.4262442588806152, "learning_rate": 2e-05, "loss": 0.1339002, "step": 617 }, { "epoch": 1.236, "grad_norm": 3.6982271671295166, "learning_rate": 2e-05, "loss": 0.15697673, "step": 618 }, { "epoch": 1.238, "grad_norm": 2.4189836978912354, "learning_rate": 2e-05, "loss": 0.09398519, "step": 619 }, { "epoch": 1.24, "grad_norm": 2.4480879306793213, "learning_rate": 2e-05, "loss": 0.10857891, "step": 620 }, { "epoch": 1.242, "grad_norm": 2.9036366939544678, "learning_rate": 2e-05, "loss": 0.1304615, "step": 621 }, { "epoch": 1.244, "grad_norm": 2.4365622997283936, "learning_rate": 2e-05, "loss": 0.09261293, "step": 622 }, { "epoch": 1.246, "grad_norm": 3.2014307975769043, "learning_rate": 2e-05, "loss": 0.13648328, "step": 623 }, { "epoch": 1.248, "grad_norm": 3.503472089767456, "learning_rate": 2e-05, "loss": 0.1255216, "step": 624 }, { "epoch": 1.25, "grad_norm": 3.31048583984375, "learning_rate": 2e-05, "loss": 0.09449267, "step": 625 }, { "epoch": 1.252, "grad_norm": 3.5570435523986816, "learning_rate": 2e-05, "loss": 0.12387832, "step": 626 }, { "epoch": 1.254, "grad_norm": 2.5230705738067627, "learning_rate": 2e-05, "loss": 0.11507116, "step": 627 }, { "epoch": 1.256, "grad_norm": 2.516810655593872, "learning_rate": 2e-05, "loss": 0.09768143, "step": 628 }, { "epoch": 1.258, "grad_norm": 2.518480062484741, "learning_rate": 2e-05, "loss": 0.10322925, "step": 629 }, { "epoch": 1.26, "grad_norm": 3.009742259979248, "learning_rate": 2e-05, "loss": 0.13511106, "step": 630 }, { "epoch": 1.262, "grad_norm": 2.5133047103881836, "learning_rate": 2e-05, "loss": 0.09286143, "step": 631 }, { "epoch": 1.264, "grad_norm": 1.873865008354187, "learning_rate": 2e-05, "loss": 0.0830982, "step": 632 }, { "epoch": 1.266, "grad_norm": 2.617849826812744, "learning_rate": 2e-05, "loss": 0.14133078, "step": 633 }, { "epoch": 1.268, "grad_norm": 2.539656162261963, "learning_rate": 2e-05, "loss": 0.1445739, "step": 634 }, { "epoch": 1.27, "grad_norm": 2.631986379623413, "learning_rate": 2e-05, "loss": 0.1087111, "step": 635 }, { "epoch": 1.272, "grad_norm": 3.4899914264678955, "learning_rate": 2e-05, "loss": 0.10924721, "step": 636 }, { "epoch": 1.274, "grad_norm": 2.475717067718506, "learning_rate": 2e-05, "loss": 0.11710069, "step": 637 }, { "epoch": 1.276, "grad_norm": 2.376640796661377, "learning_rate": 2e-05, "loss": 0.12215403, "step": 638 }, { "epoch": 1.278, "grad_norm": 3.3351945877075195, "learning_rate": 2e-05, "loss": 0.1109551, "step": 639 }, { "epoch": 1.28, "grad_norm": 4.320706844329834, "learning_rate": 2e-05, "loss": 0.15159556, "step": 640 }, { "epoch": 1.282, "grad_norm": 2.7680718898773193, "learning_rate": 2e-05, "loss": 0.1090942, "step": 641 }, { "epoch": 1.284, "grad_norm": 3.066657304763794, "learning_rate": 2e-05, "loss": 0.12701407, "step": 642 }, { "epoch": 1.286, "grad_norm": 2.19394588470459, "learning_rate": 2e-05, "loss": 0.1153975, "step": 643 }, { "epoch": 1.288, "grad_norm": 3.781139612197876, "learning_rate": 2e-05, "loss": 0.13794988, "step": 644 }, { "epoch": 1.29, "grad_norm": 3.143144369125366, "learning_rate": 2e-05, "loss": 0.10998823, "step": 645 }, { "epoch": 1.292, "grad_norm": 2.4902939796447754, "learning_rate": 2e-05, "loss": 0.10579651, "step": 646 }, { "epoch": 1.294, "grad_norm": 3.307321310043335, "learning_rate": 2e-05, "loss": 0.1062723, "step": 647 }, { "epoch": 1.296, "grad_norm": 2.1090455055236816, "learning_rate": 2e-05, "loss": 0.09155425, "step": 648 }, { "epoch": 1.298, "grad_norm": 2.296339988708496, "learning_rate": 2e-05, "loss": 0.09818932, "step": 649 }, { "epoch": 1.3, "grad_norm": 3.05061936378479, "learning_rate": 2e-05, "loss": 0.10809691, "step": 650 }, { "epoch": 1.302, "grad_norm": 3.311122417449951, "learning_rate": 2e-05, "loss": 0.16015843, "step": 651 }, { "epoch": 1.304, "grad_norm": 3.0931217670440674, "learning_rate": 2e-05, "loss": 0.10492094, "step": 652 }, { "epoch": 1.306, "grad_norm": 2.87888765335083, "learning_rate": 2e-05, "loss": 0.10102548, "step": 653 }, { "epoch": 1.308, "grad_norm": 2.918794631958008, "learning_rate": 2e-05, "loss": 0.12097271, "step": 654 }, { "epoch": 1.31, "grad_norm": 2.5513689517974854, "learning_rate": 2e-05, "loss": 0.10996251, "step": 655 }, { "epoch": 1.312, "grad_norm": 2.10673189163208, "learning_rate": 2e-05, "loss": 0.13955916, "step": 656 }, { "epoch": 1.314, "grad_norm": 2.614741563796997, "learning_rate": 2e-05, "loss": 0.09799536, "step": 657 }, { "epoch": 1.316, "grad_norm": 3.142801523208618, "learning_rate": 2e-05, "loss": 0.09701319, "step": 658 }, { "epoch": 1.318, "grad_norm": 4.541778564453125, "learning_rate": 2e-05, "loss": 0.13997841, "step": 659 }, { "epoch": 1.32, "grad_norm": 3.7341322898864746, "learning_rate": 2e-05, "loss": 0.11460865, "step": 660 }, { "epoch": 1.322, "grad_norm": 2.7065863609313965, "learning_rate": 2e-05, "loss": 0.11451581, "step": 661 }, { "epoch": 1.324, "grad_norm": 2.814460515975952, "learning_rate": 2e-05, "loss": 0.11479338, "step": 662 }, { "epoch": 1.326, "grad_norm": 2.700387477874756, "learning_rate": 2e-05, "loss": 0.11010472, "step": 663 }, { "epoch": 1.328, "grad_norm": 2.559100866317749, "learning_rate": 2e-05, "loss": 0.11664652, "step": 664 }, { "epoch": 1.33, "grad_norm": 2.2762906551361084, "learning_rate": 2e-05, "loss": 0.09327792, "step": 665 }, { "epoch": 1.332, "grad_norm": 2.627418279647827, "learning_rate": 2e-05, "loss": 0.08959809, "step": 666 }, { "epoch": 1.334, "grad_norm": 2.182978868484497, "learning_rate": 2e-05, "loss": 0.09787555, "step": 667 }, { "epoch": 1.336, "grad_norm": 2.3923563957214355, "learning_rate": 2e-05, "loss": 0.10572146, "step": 668 }, { "epoch": 1.338, "grad_norm": 2.8589816093444824, "learning_rate": 2e-05, "loss": 0.13722643, "step": 669 }, { "epoch": 1.34, "grad_norm": 2.460916757583618, "learning_rate": 2e-05, "loss": 0.08740199, "step": 670 }, { "epoch": 1.342, "grad_norm": 2.4553704261779785, "learning_rate": 2e-05, "loss": 0.10862092, "step": 671 }, { "epoch": 1.3439999999999999, "grad_norm": 1.6441093683242798, "learning_rate": 2e-05, "loss": 0.06703743, "step": 672 }, { "epoch": 1.346, "grad_norm": 2.292106866836548, "learning_rate": 2e-05, "loss": 0.10088767, "step": 673 }, { "epoch": 1.3479999999999999, "grad_norm": 2.2479658126831055, "learning_rate": 2e-05, "loss": 0.09034087, "step": 674 }, { "epoch": 1.35, "grad_norm": 2.2497737407684326, "learning_rate": 2e-05, "loss": 0.11896604, "step": 675 }, { "epoch": 1.3519999999999999, "grad_norm": 2.4198803901672363, "learning_rate": 2e-05, "loss": 0.09351024, "step": 676 }, { "epoch": 1.354, "grad_norm": 2.0135796070098877, "learning_rate": 2e-05, "loss": 0.08445922, "step": 677 }, { "epoch": 1.3559999999999999, "grad_norm": 3.221177339553833, "learning_rate": 2e-05, "loss": 0.11014754, "step": 678 }, { "epoch": 1.358, "grad_norm": 2.270320415496826, "learning_rate": 2e-05, "loss": 0.12037845, "step": 679 }, { "epoch": 1.3599999999999999, "grad_norm": 2.6250479221343994, "learning_rate": 2e-05, "loss": 0.11658848, "step": 680 }, { "epoch": 1.362, "grad_norm": 2.4321134090423584, "learning_rate": 2e-05, "loss": 0.09802853, "step": 681 }, { "epoch": 1.3639999999999999, "grad_norm": 2.4564032554626465, "learning_rate": 2e-05, "loss": 0.10415519, "step": 682 }, { "epoch": 1.366, "grad_norm": 1.970099925994873, "learning_rate": 2e-05, "loss": 0.08627912, "step": 683 }, { "epoch": 1.3679999999999999, "grad_norm": 2.5896036624908447, "learning_rate": 2e-05, "loss": 0.0935052, "step": 684 }, { "epoch": 1.37, "grad_norm": 2.6092915534973145, "learning_rate": 2e-05, "loss": 0.09677845, "step": 685 }, { "epoch": 1.3719999999999999, "grad_norm": 3.095132827758789, "learning_rate": 2e-05, "loss": 0.11584838, "step": 686 }, { "epoch": 1.374, "grad_norm": 2.705355167388916, "learning_rate": 2e-05, "loss": 0.10224438, "step": 687 }, { "epoch": 1.376, "grad_norm": 1.8498618602752686, "learning_rate": 2e-05, "loss": 0.07257505, "step": 688 }, { "epoch": 1.3780000000000001, "grad_norm": 2.658275842666626, "learning_rate": 2e-05, "loss": 0.095241, "step": 689 }, { "epoch": 1.38, "grad_norm": 2.1657066345214844, "learning_rate": 2e-05, "loss": 0.08493382, "step": 690 }, { "epoch": 1.3820000000000001, "grad_norm": 2.649157762527466, "learning_rate": 2e-05, "loss": 0.10965092, "step": 691 }, { "epoch": 1.384, "grad_norm": 2.6509430408477783, "learning_rate": 2e-05, "loss": 0.10417461, "step": 692 }, { "epoch": 1.3860000000000001, "grad_norm": 2.7123641967773438, "learning_rate": 2e-05, "loss": 0.0952239, "step": 693 }, { "epoch": 1.388, "grad_norm": 3.1654226779937744, "learning_rate": 2e-05, "loss": 0.1278252, "step": 694 }, { "epoch": 1.3900000000000001, "grad_norm": 3.4855122566223145, "learning_rate": 2e-05, "loss": 0.10266776, "step": 695 }, { "epoch": 1.392, "grad_norm": 1.523314118385315, "learning_rate": 2e-05, "loss": 0.05538701, "step": 696 }, { "epoch": 1.3940000000000001, "grad_norm": 2.838423490524292, "learning_rate": 2e-05, "loss": 0.11393388, "step": 697 }, { "epoch": 1.396, "grad_norm": 3.0423550605773926, "learning_rate": 2e-05, "loss": 0.11277989, "step": 698 }, { "epoch": 1.3980000000000001, "grad_norm": 2.595038414001465, "learning_rate": 2e-05, "loss": 0.1143982, "step": 699 }, { "epoch": 1.4, "grad_norm": 2.106015682220459, "learning_rate": 2e-05, "loss": 0.08432181, "step": 700 }, { "epoch": 1.4020000000000001, "grad_norm": 2.072103977203369, "learning_rate": 2e-05, "loss": 0.10295324, "step": 701 }, { "epoch": 1.404, "grad_norm": 5.35106086730957, "learning_rate": 2e-05, "loss": 0.12211221, "step": 702 }, { "epoch": 1.4060000000000001, "grad_norm": 2.595242738723755, "learning_rate": 2e-05, "loss": 0.11621975, "step": 703 }, { "epoch": 1.408, "grad_norm": 2.3813533782958984, "learning_rate": 2e-05, "loss": 0.07120501, "step": 704 }, { "epoch": 1.41, "grad_norm": 2.7448511123657227, "learning_rate": 2e-05, "loss": 0.08237515, "step": 705 }, { "epoch": 1.412, "grad_norm": 2.4283218383789062, "learning_rate": 2e-05, "loss": 0.07894477, "step": 706 }, { "epoch": 1.414, "grad_norm": 2.722013235092163, "learning_rate": 2e-05, "loss": 0.09057313, "step": 707 }, { "epoch": 1.416, "grad_norm": 2.1725480556488037, "learning_rate": 2e-05, "loss": 0.07726441, "step": 708 }, { "epoch": 1.418, "grad_norm": 2.412940502166748, "learning_rate": 2e-05, "loss": 0.07639, "step": 709 }, { "epoch": 1.42, "grad_norm": 1.8505913019180298, "learning_rate": 2e-05, "loss": 0.09286863, "step": 710 }, { "epoch": 1.422, "grad_norm": 2.121731996536255, "learning_rate": 2e-05, "loss": 0.07919374, "step": 711 }, { "epoch": 1.424, "grad_norm": 2.7666337490081787, "learning_rate": 2e-05, "loss": 0.10220705, "step": 712 }, { "epoch": 1.426, "grad_norm": 2.482006072998047, "learning_rate": 2e-05, "loss": 0.08181632, "step": 713 }, { "epoch": 1.428, "grad_norm": 2.693687915802002, "learning_rate": 2e-05, "loss": 0.06798096, "step": 714 }, { "epoch": 1.43, "grad_norm": 2.5155346393585205, "learning_rate": 2e-05, "loss": 0.08410256, "step": 715 }, { "epoch": 1.432, "grad_norm": 3.095309257507324, "learning_rate": 2e-05, "loss": 0.1147206, "step": 716 }, { "epoch": 1.434, "grad_norm": 2.1130447387695312, "learning_rate": 2e-05, "loss": 0.08827046, "step": 717 }, { "epoch": 1.436, "grad_norm": 3.6553380489349365, "learning_rate": 2e-05, "loss": 0.09300891, "step": 718 }, { "epoch": 1.438, "grad_norm": 2.1224894523620605, "learning_rate": 2e-05, "loss": 0.09277943, "step": 719 }, { "epoch": 1.44, "grad_norm": 1.9273908138275146, "learning_rate": 2e-05, "loss": 0.08951817, "step": 720 }, { "epoch": 1.442, "grad_norm": 2.018853187561035, "learning_rate": 2e-05, "loss": 0.07283738, "step": 721 }, { "epoch": 1.444, "grad_norm": 2.079155206680298, "learning_rate": 2e-05, "loss": 0.08982038, "step": 722 }, { "epoch": 1.446, "grad_norm": 2.438000440597534, "learning_rate": 2e-05, "loss": 0.10790306, "step": 723 }, { "epoch": 1.448, "grad_norm": 2.1911613941192627, "learning_rate": 2e-05, "loss": 0.07532462, "step": 724 }, { "epoch": 1.45, "grad_norm": 1.8854244947433472, "learning_rate": 2e-05, "loss": 0.05531625, "step": 725 }, { "epoch": 1.452, "grad_norm": 2.0435564517974854, "learning_rate": 2e-05, "loss": 0.08569458, "step": 726 }, { "epoch": 1.454, "grad_norm": 4.157101631164551, "learning_rate": 2e-05, "loss": 0.10588066, "step": 727 }, { "epoch": 1.456, "grad_norm": 1.762305498123169, "learning_rate": 2e-05, "loss": 0.08237082, "step": 728 }, { "epoch": 1.458, "grad_norm": 1.7560533285140991, "learning_rate": 2e-05, "loss": 0.08737503, "step": 729 }, { "epoch": 1.46, "grad_norm": 1.803896427154541, "learning_rate": 2e-05, "loss": 0.07071456, "step": 730 }, { "epoch": 1.462, "grad_norm": 2.671704053878784, "learning_rate": 2e-05, "loss": 0.08183609, "step": 731 }, { "epoch": 1.464, "grad_norm": 2.725677728652954, "learning_rate": 2e-05, "loss": 0.09709771, "step": 732 }, { "epoch": 1.466, "grad_norm": 2.230529308319092, "learning_rate": 2e-05, "loss": 0.093163, "step": 733 }, { "epoch": 1.468, "grad_norm": 2.1992108821868896, "learning_rate": 2e-05, "loss": 0.08508515, "step": 734 }, { "epoch": 1.47, "grad_norm": 1.9853242635726929, "learning_rate": 2e-05, "loss": 0.07874966, "step": 735 }, { "epoch": 1.472, "grad_norm": 3.195524215698242, "learning_rate": 2e-05, "loss": 0.07465832, "step": 736 }, { "epoch": 1.474, "grad_norm": 2.578688383102417, "learning_rate": 2e-05, "loss": 0.0810234, "step": 737 }, { "epoch": 1.476, "grad_norm": 2.219371795654297, "learning_rate": 2e-05, "loss": 0.06290153, "step": 738 }, { "epoch": 1.478, "grad_norm": 3.0493226051330566, "learning_rate": 2e-05, "loss": 0.10852329, "step": 739 }, { "epoch": 1.48, "grad_norm": 1.894547700881958, "learning_rate": 2e-05, "loss": 0.07198999, "step": 740 }, { "epoch": 1.482, "grad_norm": 1.9765349626541138, "learning_rate": 2e-05, "loss": 0.07682022, "step": 741 }, { "epoch": 1.484, "grad_norm": 2.758671522140503, "learning_rate": 2e-05, "loss": 0.08025845, "step": 742 }, { "epoch": 1.486, "grad_norm": 5.092621803283691, "learning_rate": 2e-05, "loss": 0.11716142, "step": 743 }, { "epoch": 1.488, "grad_norm": 1.7337188720703125, "learning_rate": 2e-05, "loss": 0.06889806, "step": 744 }, { "epoch": 1.49, "grad_norm": 1.6227397918701172, "learning_rate": 2e-05, "loss": 0.06564939, "step": 745 }, { "epoch": 1.492, "grad_norm": 1.8530882596969604, "learning_rate": 2e-05, "loss": 0.08244382, "step": 746 }, { "epoch": 1.494, "grad_norm": 2.1974449157714844, "learning_rate": 2e-05, "loss": 0.10057726, "step": 747 }, { "epoch": 1.496, "grad_norm": 1.9001625776290894, "learning_rate": 2e-05, "loss": 0.07622299, "step": 748 }, { "epoch": 1.498, "grad_norm": 2.7725989818573, "learning_rate": 2e-05, "loss": 0.09877149, "step": 749 }, { "epoch": 1.5, "grad_norm": 2.439131498336792, "learning_rate": 2e-05, "loss": 0.08865526, "step": 750 }, { "epoch": 1.502, "grad_norm": 1.9627726078033447, "learning_rate": 2e-05, "loss": 0.0812942, "step": 751 }, { "epoch": 1.504, "grad_norm": 2.461146831512451, "learning_rate": 2e-05, "loss": 0.08872318, "step": 752 }, { "epoch": 1.506, "grad_norm": 2.2327353954315186, "learning_rate": 2e-05, "loss": 0.08371209, "step": 753 }, { "epoch": 1.508, "grad_norm": 1.7057092189788818, "learning_rate": 2e-05, "loss": 0.08295096, "step": 754 }, { "epoch": 1.51, "grad_norm": 1.592014193534851, "learning_rate": 2e-05, "loss": 0.06427182, "step": 755 }, { "epoch": 1.512, "grad_norm": 2.1517910957336426, "learning_rate": 2e-05, "loss": 0.07752314, "step": 756 }, { "epoch": 1.514, "grad_norm": 3.956221580505371, "learning_rate": 2e-05, "loss": 0.08947916, "step": 757 }, { "epoch": 1.516, "grad_norm": 2.7721030712127686, "learning_rate": 2e-05, "loss": 0.0844971, "step": 758 }, { "epoch": 1.518, "grad_norm": 1.7479230165481567, "learning_rate": 2e-05, "loss": 0.07433778, "step": 759 }, { "epoch": 1.52, "grad_norm": 2.2564239501953125, "learning_rate": 2e-05, "loss": 0.08172427, "step": 760 }, { "epoch": 1.522, "grad_norm": 2.6283817291259766, "learning_rate": 2e-05, "loss": 0.07976112, "step": 761 }, { "epoch": 1.524, "grad_norm": 2.783998489379883, "learning_rate": 2e-05, "loss": 0.07664889, "step": 762 }, { "epoch": 1.526, "grad_norm": 3.343437671661377, "learning_rate": 2e-05, "loss": 0.07145402, "step": 763 }, { "epoch": 1.528, "grad_norm": 2.8911867141723633, "learning_rate": 2e-05, "loss": 0.06805974, "step": 764 }, { "epoch": 1.53, "grad_norm": 2.998328685760498, "learning_rate": 2e-05, "loss": 0.07499643, "step": 765 }, { "epoch": 1.532, "grad_norm": 2.484273672103882, "learning_rate": 2e-05, "loss": 0.07433078, "step": 766 }, { "epoch": 1.534, "grad_norm": 2.329362392425537, "learning_rate": 2e-05, "loss": 0.08358801, "step": 767 }, { "epoch": 1.536, "grad_norm": 2.202545642852783, "learning_rate": 2e-05, "loss": 0.06542914, "step": 768 }, { "epoch": 1.538, "grad_norm": 2.0014524459838867, "learning_rate": 2e-05, "loss": 0.08993404, "step": 769 }, { "epoch": 1.54, "grad_norm": 2.2723772525787354, "learning_rate": 2e-05, "loss": 0.10550876, "step": 770 }, { "epoch": 1.542, "grad_norm": 2.0692672729492188, "learning_rate": 2e-05, "loss": 0.07269676, "step": 771 }, { "epoch": 1.544, "grad_norm": 2.0737030506134033, "learning_rate": 2e-05, "loss": 0.06703752, "step": 772 }, { "epoch": 1.546, "grad_norm": 2.1744225025177, "learning_rate": 2e-05, "loss": 0.08306466, "step": 773 }, { "epoch": 1.548, "grad_norm": 17.611337661743164, "learning_rate": 2e-05, "loss": 0.064059, "step": 774 }, { "epoch": 1.55, "grad_norm": 1.8644952774047852, "learning_rate": 2e-05, "loss": 0.07994568, "step": 775 }, { "epoch": 1.552, "grad_norm": 1.5328989028930664, "learning_rate": 2e-05, "loss": 0.07587386, "step": 776 }, { "epoch": 1.554, "grad_norm": 2.235229015350342, "learning_rate": 2e-05, "loss": 0.08034088, "step": 777 }, { "epoch": 1.556, "grad_norm": 2.2400894165039062, "learning_rate": 2e-05, "loss": 0.07154867, "step": 778 }, { "epoch": 1.558, "grad_norm": 2.6439497470855713, "learning_rate": 2e-05, "loss": 0.08810341, "step": 779 }, { "epoch": 1.56, "grad_norm": 2.811596632003784, "learning_rate": 2e-05, "loss": 0.09437238, "step": 780 }, { "epoch": 1.562, "grad_norm": 2.309126138687134, "learning_rate": 2e-05, "loss": 0.07691263, "step": 781 }, { "epoch": 1.564, "grad_norm": 2.3875679969787598, "learning_rate": 2e-05, "loss": 0.07415307, "step": 782 }, { "epoch": 1.5659999999999998, "grad_norm": 1.5229976177215576, "learning_rate": 2e-05, "loss": 0.0736395, "step": 783 }, { "epoch": 1.568, "grad_norm": 3.096762180328369, "learning_rate": 2e-05, "loss": 0.09484315, "step": 784 }, { "epoch": 1.5699999999999998, "grad_norm": 1.823845386505127, "learning_rate": 2e-05, "loss": 0.08399117, "step": 785 }, { "epoch": 1.572, "grad_norm": 2.103595018386841, "learning_rate": 2e-05, "loss": 0.060688, "step": 786 }, { "epoch": 1.5739999999999998, "grad_norm": 2.618736505508423, "learning_rate": 2e-05, "loss": 0.08902743, "step": 787 }, { "epoch": 1.576, "grad_norm": 2.025015115737915, "learning_rate": 2e-05, "loss": 0.08033721, "step": 788 }, { "epoch": 1.5779999999999998, "grad_norm": 9.556914329528809, "learning_rate": 2e-05, "loss": 0.11010626, "step": 789 }, { "epoch": 1.58, "grad_norm": 2.4782986640930176, "learning_rate": 2e-05, "loss": 0.12129696, "step": 790 }, { "epoch": 1.5819999999999999, "grad_norm": 2.86425518989563, "learning_rate": 2e-05, "loss": 0.08272199, "step": 791 }, { "epoch": 1.584, "grad_norm": 1.8563334941864014, "learning_rate": 2e-05, "loss": 0.06831618, "step": 792 }, { "epoch": 1.5859999999999999, "grad_norm": 1.709964394569397, "learning_rate": 2e-05, "loss": 0.06362506, "step": 793 }, { "epoch": 1.588, "grad_norm": 2.137883186340332, "learning_rate": 2e-05, "loss": 0.07587803, "step": 794 }, { "epoch": 1.5899999999999999, "grad_norm": 2.363016128540039, "learning_rate": 2e-05, "loss": 0.10132524, "step": 795 }, { "epoch": 1.592, "grad_norm": 2.9902260303497314, "learning_rate": 2e-05, "loss": 0.12351868, "step": 796 }, { "epoch": 1.5939999999999999, "grad_norm": 1.7475019693374634, "learning_rate": 2e-05, "loss": 0.0698937, "step": 797 }, { "epoch": 1.596, "grad_norm": 1.7452584505081177, "learning_rate": 2e-05, "loss": 0.07310887, "step": 798 }, { "epoch": 1.5979999999999999, "grad_norm": 2.2125442028045654, "learning_rate": 2e-05, "loss": 0.08515593, "step": 799 }, { "epoch": 1.6, "grad_norm": 1.9570682048797607, "learning_rate": 2e-05, "loss": 0.09576114, "step": 800 }, { "epoch": 1.6019999999999999, "grad_norm": 2.4968690872192383, "learning_rate": 2e-05, "loss": 0.08693783, "step": 801 }, { "epoch": 1.604, "grad_norm": 2.26326847076416, "learning_rate": 2e-05, "loss": 0.08654219, "step": 802 }, { "epoch": 1.6059999999999999, "grad_norm": 2.3274145126342773, "learning_rate": 2e-05, "loss": 0.10750651, "step": 803 }, { "epoch": 1.608, "grad_norm": 1.52241849899292, "learning_rate": 2e-05, "loss": 0.07020888, "step": 804 }, { "epoch": 1.6099999999999999, "grad_norm": 2.112908363342285, "learning_rate": 2e-05, "loss": 0.07239047, "step": 805 }, { "epoch": 1.612, "grad_norm": 2.029871702194214, "learning_rate": 2e-05, "loss": 0.08264256, "step": 806 }, { "epoch": 1.6139999999999999, "grad_norm": 1.7689646482467651, "learning_rate": 2e-05, "loss": 0.08204137, "step": 807 }, { "epoch": 1.616, "grad_norm": 2.38230562210083, "learning_rate": 2e-05, "loss": 0.07087668, "step": 808 }, { "epoch": 1.6179999999999999, "grad_norm": 1.8665469884872437, "learning_rate": 2e-05, "loss": 0.0778988, "step": 809 }, { "epoch": 1.62, "grad_norm": 2.146146774291992, "learning_rate": 2e-05, "loss": 0.08147563, "step": 810 }, { "epoch": 1.6219999999999999, "grad_norm": 1.901824951171875, "learning_rate": 2e-05, "loss": 0.07722423, "step": 811 }, { "epoch": 1.624, "grad_norm": 1.654176950454712, "learning_rate": 2e-05, "loss": 0.07397038, "step": 812 }, { "epoch": 1.626, "grad_norm": 2.2095117568969727, "learning_rate": 2e-05, "loss": 0.09214038, "step": 813 }, { "epoch": 1.6280000000000001, "grad_norm": 2.471964120864868, "learning_rate": 2e-05, "loss": 0.08440986, "step": 814 }, { "epoch": 1.63, "grad_norm": 2.14119815826416, "learning_rate": 2e-05, "loss": 0.08491719, "step": 815 }, { "epoch": 1.6320000000000001, "grad_norm": 1.472819209098816, "learning_rate": 2e-05, "loss": 0.06080973, "step": 816 }, { "epoch": 1.634, "grad_norm": 4.0974297523498535, "learning_rate": 2e-05, "loss": 0.11212557, "step": 817 }, { "epoch": 1.6360000000000001, "grad_norm": 1.893890142440796, "learning_rate": 2e-05, "loss": 0.06430957, "step": 818 }, { "epoch": 1.638, "grad_norm": 1.8362897634506226, "learning_rate": 2e-05, "loss": 0.07269567, "step": 819 }, { "epoch": 1.6400000000000001, "grad_norm": 2.3635354042053223, "learning_rate": 2e-05, "loss": 0.07982106, "step": 820 }, { "epoch": 1.642, "grad_norm": 2.827873706817627, "learning_rate": 2e-05, "loss": 0.06442684, "step": 821 }, { "epoch": 1.6440000000000001, "grad_norm": 1.9763258695602417, "learning_rate": 2e-05, "loss": 0.0594982, "step": 822 }, { "epoch": 1.646, "grad_norm": 1.7666466236114502, "learning_rate": 2e-05, "loss": 0.06877147, "step": 823 }, { "epoch": 1.6480000000000001, "grad_norm": 3.103104829788208, "learning_rate": 2e-05, "loss": 0.07696585, "step": 824 }, { "epoch": 1.65, "grad_norm": 3.5277068614959717, "learning_rate": 2e-05, "loss": 0.08776857, "step": 825 }, { "epoch": 1.6520000000000001, "grad_norm": 2.6822338104248047, "learning_rate": 2e-05, "loss": 0.06033723, "step": 826 }, { "epoch": 1.654, "grad_norm": 1.966208577156067, "learning_rate": 2e-05, "loss": 0.0782545, "step": 827 }, { "epoch": 1.6560000000000001, "grad_norm": 2.1555910110473633, "learning_rate": 2e-05, "loss": 0.08536707, "step": 828 }, { "epoch": 1.658, "grad_norm": 2.096773624420166, "learning_rate": 2e-05, "loss": 0.06903753, "step": 829 }, { "epoch": 1.6600000000000001, "grad_norm": 1.9013687372207642, "learning_rate": 2e-05, "loss": 0.08171882, "step": 830 }, { "epoch": 1.662, "grad_norm": 1.719090461730957, "learning_rate": 2e-05, "loss": 0.06542725, "step": 831 }, { "epoch": 1.6640000000000001, "grad_norm": 2.0266594886779785, "learning_rate": 2e-05, "loss": 0.08807792, "step": 832 }, { "epoch": 1.666, "grad_norm": 2.070847511291504, "learning_rate": 2e-05, "loss": 0.07473135, "step": 833 }, { "epoch": 1.6680000000000001, "grad_norm": 2.2031726837158203, "learning_rate": 2e-05, "loss": 0.05795466, "step": 834 }, { "epoch": 1.67, "grad_norm": 1.9450498819351196, "learning_rate": 2e-05, "loss": 0.05321917, "step": 835 }, { "epoch": 1.6720000000000002, "grad_norm": 2.383620023727417, "learning_rate": 2e-05, "loss": 0.09668325, "step": 836 }, { "epoch": 1.674, "grad_norm": 1.911126732826233, "learning_rate": 2e-05, "loss": 0.0709563, "step": 837 }, { "epoch": 1.6760000000000002, "grad_norm": 1.8981558084487915, "learning_rate": 2e-05, "loss": 0.05752856, "step": 838 }, { "epoch": 1.678, "grad_norm": 1.5697596073150635, "learning_rate": 2e-05, "loss": 0.06154868, "step": 839 }, { "epoch": 1.6800000000000002, "grad_norm": 2.2556140422821045, "learning_rate": 2e-05, "loss": 0.06484008, "step": 840 }, { "epoch": 1.682, "grad_norm": 2.297083616256714, "learning_rate": 2e-05, "loss": 0.06691624, "step": 841 }, { "epoch": 1.6840000000000002, "grad_norm": 2.831334114074707, "learning_rate": 2e-05, "loss": 0.0824251, "step": 842 }, { "epoch": 1.686, "grad_norm": 2.1341090202331543, "learning_rate": 2e-05, "loss": 0.08404815, "step": 843 }, { "epoch": 1.688, "grad_norm": 1.4432538747787476, "learning_rate": 2e-05, "loss": 0.05486818, "step": 844 }, { "epoch": 1.69, "grad_norm": 2.392119884490967, "learning_rate": 2e-05, "loss": 0.08042577, "step": 845 }, { "epoch": 1.692, "grad_norm": 2.140087604522705, "learning_rate": 2e-05, "loss": 0.07316266, "step": 846 }, { "epoch": 1.694, "grad_norm": 2.8727564811706543, "learning_rate": 2e-05, "loss": 0.09117547, "step": 847 }, { "epoch": 1.696, "grad_norm": 2.5479319095611572, "learning_rate": 2e-05, "loss": 0.0947205, "step": 848 }, { "epoch": 1.698, "grad_norm": 2.120222806930542, "learning_rate": 2e-05, "loss": 0.06924388, "step": 849 }, { "epoch": 1.7, "grad_norm": 2.4992620944976807, "learning_rate": 2e-05, "loss": 0.09288818, "step": 850 }, { "epoch": 1.702, "grad_norm": 1.9825425148010254, "learning_rate": 2e-05, "loss": 0.07824534, "step": 851 }, { "epoch": 1.704, "grad_norm": 1.519212245941162, "learning_rate": 2e-05, "loss": 0.05538861, "step": 852 }, { "epoch": 1.706, "grad_norm": 2.399399518966675, "learning_rate": 2e-05, "loss": 0.0779898, "step": 853 }, { "epoch": 1.708, "grad_norm": 2.637857437133789, "learning_rate": 2e-05, "loss": 0.10369846, "step": 854 }, { "epoch": 1.71, "grad_norm": 2.903981924057007, "learning_rate": 2e-05, "loss": 0.09455149, "step": 855 }, { "epoch": 1.712, "grad_norm": 1.9127029180526733, "learning_rate": 2e-05, "loss": 0.06748534, "step": 856 }, { "epoch": 1.714, "grad_norm": 1.633865237236023, "learning_rate": 2e-05, "loss": 0.07176484, "step": 857 }, { "epoch": 1.716, "grad_norm": 2.8449621200561523, "learning_rate": 2e-05, "loss": 0.07032914, "step": 858 }, { "epoch": 1.718, "grad_norm": 3.111711263656616, "learning_rate": 2e-05, "loss": 0.07360119, "step": 859 }, { "epoch": 1.72, "grad_norm": 2.459895610809326, "learning_rate": 2e-05, "loss": 0.10556261, "step": 860 }, { "epoch": 1.722, "grad_norm": 1.9931344985961914, "learning_rate": 2e-05, "loss": 0.06967217, "step": 861 }, { "epoch": 1.724, "grad_norm": 1.429177165031433, "learning_rate": 2e-05, "loss": 0.04311872, "step": 862 }, { "epoch": 1.726, "grad_norm": 1.8553142547607422, "learning_rate": 2e-05, "loss": 0.07448836, "step": 863 }, { "epoch": 1.728, "grad_norm": 1.4926773309707642, "learning_rate": 2e-05, "loss": 0.0673956, "step": 864 }, { "epoch": 1.73, "grad_norm": 2.5575432777404785, "learning_rate": 2e-05, "loss": 0.07588162, "step": 865 }, { "epoch": 1.732, "grad_norm": 2.1007280349731445, "learning_rate": 2e-05, "loss": 0.05418327, "step": 866 }, { "epoch": 1.734, "grad_norm": 3.0805535316467285, "learning_rate": 2e-05, "loss": 0.08815256, "step": 867 }, { "epoch": 1.736, "grad_norm": 2.14973783493042, "learning_rate": 2e-05, "loss": 0.07602916, "step": 868 }, { "epoch": 1.738, "grad_norm": 2.6056084632873535, "learning_rate": 2e-05, "loss": 0.08369519, "step": 869 }, { "epoch": 1.74, "grad_norm": 1.980968952178955, "learning_rate": 2e-05, "loss": 0.06892893, "step": 870 }, { "epoch": 1.742, "grad_norm": 2.932478427886963, "learning_rate": 2e-05, "loss": 0.08124112, "step": 871 }, { "epoch": 1.744, "grad_norm": 1.6852787733078003, "learning_rate": 2e-05, "loss": 0.06830005, "step": 872 }, { "epoch": 1.746, "grad_norm": 1.881437063217163, "learning_rate": 2e-05, "loss": 0.04541746, "step": 873 }, { "epoch": 1.748, "grad_norm": 2.5663695335388184, "learning_rate": 2e-05, "loss": 0.07354845, "step": 874 }, { "epoch": 1.75, "grad_norm": 2.884338617324829, "learning_rate": 2e-05, "loss": 0.07085122, "step": 875 }, { "epoch": 1.752, "grad_norm": 2.173144578933716, "learning_rate": 2e-05, "loss": 0.06505815, "step": 876 }, { "epoch": 1.754, "grad_norm": 2.744832754135132, "learning_rate": 2e-05, "loss": 0.10300224, "step": 877 }, { "epoch": 1.756, "grad_norm": 2.6979944705963135, "learning_rate": 2e-05, "loss": 0.08862556, "step": 878 }, { "epoch": 1.758, "grad_norm": 2.6769134998321533, "learning_rate": 2e-05, "loss": 0.07417379, "step": 879 }, { "epoch": 1.76, "grad_norm": 2.0921900272369385, "learning_rate": 2e-05, "loss": 0.08049569, "step": 880 }, { "epoch": 1.762, "grad_norm": 3.668351411819458, "learning_rate": 2e-05, "loss": 0.07346732, "step": 881 }, { "epoch": 1.764, "grad_norm": 1.955716609954834, "learning_rate": 2e-05, "loss": 0.0588823, "step": 882 }, { "epoch": 1.766, "grad_norm": 2.055715560913086, "learning_rate": 2e-05, "loss": 0.06786981, "step": 883 }, { "epoch": 1.768, "grad_norm": 1.8874297142028809, "learning_rate": 2e-05, "loss": 0.06704556, "step": 884 }, { "epoch": 1.77, "grad_norm": 2.452850103378296, "learning_rate": 2e-05, "loss": 0.0776285, "step": 885 }, { "epoch": 1.772, "grad_norm": 1.6478520631790161, "learning_rate": 2e-05, "loss": 0.0682406, "step": 886 }, { "epoch": 1.774, "grad_norm": 1.441072940826416, "learning_rate": 2e-05, "loss": 0.05489372, "step": 887 }, { "epoch": 1.776, "grad_norm": 2.24617862701416, "learning_rate": 2e-05, "loss": 0.05962018, "step": 888 }, { "epoch": 1.778, "grad_norm": 2.570594072341919, "learning_rate": 2e-05, "loss": 0.07772797, "step": 889 }, { "epoch": 1.78, "grad_norm": 2.316793441772461, "learning_rate": 2e-05, "loss": 0.07414915, "step": 890 }, { "epoch": 1.782, "grad_norm": 2.2875797748565674, "learning_rate": 2e-05, "loss": 0.06844224, "step": 891 }, { "epoch": 1.784, "grad_norm": 3.2194788455963135, "learning_rate": 2e-05, "loss": 0.08942357, "step": 892 }, { "epoch": 1.786, "grad_norm": 3.111416816711426, "learning_rate": 2e-05, "loss": 0.08543706, "step": 893 }, { "epoch": 1.788, "grad_norm": 2.2496681213378906, "learning_rate": 2e-05, "loss": 0.06796233, "step": 894 }, { "epoch": 1.79, "grad_norm": 2.393826723098755, "learning_rate": 2e-05, "loss": 0.06307848, "step": 895 }, { "epoch": 1.792, "grad_norm": 1.8092612028121948, "learning_rate": 2e-05, "loss": 0.08083902, "step": 896 }, { "epoch": 1.794, "grad_norm": 2.327549934387207, "learning_rate": 2e-05, "loss": 0.07006175, "step": 897 }, { "epoch": 1.796, "grad_norm": 2.3291375637054443, "learning_rate": 2e-05, "loss": 0.07644182, "step": 898 }, { "epoch": 1.798, "grad_norm": 1.8122745752334595, "learning_rate": 2e-05, "loss": 0.05956836, "step": 899 }, { "epoch": 1.8, "grad_norm": 1.7686699628829956, "learning_rate": 2e-05, "loss": 0.0774643, "step": 900 }, { "epoch": 1.802, "grad_norm": 3.383394956588745, "learning_rate": 2e-05, "loss": 0.09732038, "step": 901 }, { "epoch": 1.804, "grad_norm": 1.4468183517456055, "learning_rate": 2e-05, "loss": 0.05280242, "step": 902 }, { "epoch": 1.806, "grad_norm": 3.099715232849121, "learning_rate": 2e-05, "loss": 0.08549714, "step": 903 }, { "epoch": 1.808, "grad_norm": 2.2685530185699463, "learning_rate": 2e-05, "loss": 0.08650636, "step": 904 }, { "epoch": 1.81, "grad_norm": 2.1206886768341064, "learning_rate": 2e-05, "loss": 0.06690793, "step": 905 }, { "epoch": 1.812, "grad_norm": 2.361654281616211, "learning_rate": 2e-05, "loss": 0.07197607, "step": 906 }, { "epoch": 1.814, "grad_norm": 2.3998827934265137, "learning_rate": 2e-05, "loss": 0.06678525, "step": 907 }, { "epoch": 1.8159999999999998, "grad_norm": 1.8265810012817383, "learning_rate": 2e-05, "loss": 0.06269243, "step": 908 }, { "epoch": 1.818, "grad_norm": 2.0034234523773193, "learning_rate": 2e-05, "loss": 0.07594454, "step": 909 }, { "epoch": 1.8199999999999998, "grad_norm": 1.8148810863494873, "learning_rate": 2e-05, "loss": 0.06071473, "step": 910 }, { "epoch": 1.822, "grad_norm": 1.751679539680481, "learning_rate": 2e-05, "loss": 0.06768817, "step": 911 }, { "epoch": 1.8239999999999998, "grad_norm": 2.00803279876709, "learning_rate": 2e-05, "loss": 0.07907202, "step": 912 }, { "epoch": 1.826, "grad_norm": 2.013383388519287, "learning_rate": 2e-05, "loss": 0.06719398, "step": 913 }, { "epoch": 1.8279999999999998, "grad_norm": 2.5518107414245605, "learning_rate": 2e-05, "loss": 0.07807682, "step": 914 }, { "epoch": 1.83, "grad_norm": 2.0092356204986572, "learning_rate": 2e-05, "loss": 0.05918414, "step": 915 }, { "epoch": 1.8319999999999999, "grad_norm": 1.9702318906784058, "learning_rate": 2e-05, "loss": 0.05898762, "step": 916 }, { "epoch": 1.834, "grad_norm": 2.974558115005493, "learning_rate": 2e-05, "loss": 0.09832944, "step": 917 }, { "epoch": 1.8359999999999999, "grad_norm": 3.1490416526794434, "learning_rate": 2e-05, "loss": 0.08504972, "step": 918 }, { "epoch": 1.838, "grad_norm": 3.151014804840088, "learning_rate": 2e-05, "loss": 0.05736318, "step": 919 }, { "epoch": 1.8399999999999999, "grad_norm": 2.9925057888031006, "learning_rate": 2e-05, "loss": 0.07320988, "step": 920 }, { "epoch": 1.842, "grad_norm": 2.636167287826538, "learning_rate": 2e-05, "loss": 0.09282872, "step": 921 }, { "epoch": 1.8439999999999999, "grad_norm": 1.984940528869629, "learning_rate": 2e-05, "loss": 0.05574302, "step": 922 }, { "epoch": 1.846, "grad_norm": 2.508833169937134, "learning_rate": 2e-05, "loss": 0.0758808, "step": 923 }, { "epoch": 1.8479999999999999, "grad_norm": 2.5532920360565186, "learning_rate": 2e-05, "loss": 0.0828255, "step": 924 }, { "epoch": 1.85, "grad_norm": 2.2903261184692383, "learning_rate": 2e-05, "loss": 0.07507607, "step": 925 }, { "epoch": 1.8519999999999999, "grad_norm": 2.095050096511841, "learning_rate": 2e-05, "loss": 0.05968579, "step": 926 }, { "epoch": 1.854, "grad_norm": 1.5813626050949097, "learning_rate": 2e-05, "loss": 0.06536861, "step": 927 }, { "epoch": 1.8559999999999999, "grad_norm": 1.9922809600830078, "learning_rate": 2e-05, "loss": 0.06198128, "step": 928 }, { "epoch": 1.858, "grad_norm": 2.0554776191711426, "learning_rate": 2e-05, "loss": 0.07751165, "step": 929 }, { "epoch": 1.8599999999999999, "grad_norm": 1.9760997295379639, "learning_rate": 2e-05, "loss": 0.0767539, "step": 930 }, { "epoch": 1.862, "grad_norm": 1.6336450576782227, "learning_rate": 2e-05, "loss": 0.06495491, "step": 931 }, { "epoch": 1.8639999999999999, "grad_norm": 1.796708583831787, "learning_rate": 2e-05, "loss": 0.0650342, "step": 932 }, { "epoch": 1.866, "grad_norm": 1.6166343688964844, "learning_rate": 2e-05, "loss": 0.06242783, "step": 933 }, { "epoch": 1.8679999999999999, "grad_norm": 2.2365167140960693, "learning_rate": 2e-05, "loss": 0.08445454, "step": 934 }, { "epoch": 1.87, "grad_norm": 1.795891284942627, "learning_rate": 2e-05, "loss": 0.0526274, "step": 935 }, { "epoch": 1.8719999999999999, "grad_norm": 2.8587825298309326, "learning_rate": 2e-05, "loss": 0.06400409, "step": 936 }, { "epoch": 1.874, "grad_norm": 2.213303565979004, "learning_rate": 2e-05, "loss": 0.06138328, "step": 937 }, { "epoch": 1.876, "grad_norm": 2.25402569770813, "learning_rate": 2e-05, "loss": 0.07633629, "step": 938 }, { "epoch": 1.8780000000000001, "grad_norm": 2.096250057220459, "learning_rate": 2e-05, "loss": 0.07336065, "step": 939 }, { "epoch": 1.88, "grad_norm": 1.6095550060272217, "learning_rate": 2e-05, "loss": 0.057545, "step": 940 }, { "epoch": 1.8820000000000001, "grad_norm": 2.503938913345337, "learning_rate": 2e-05, "loss": 0.07715003, "step": 941 }, { "epoch": 1.884, "grad_norm": 1.7810205221176147, "learning_rate": 2e-05, "loss": 0.06386582, "step": 942 }, { "epoch": 1.8860000000000001, "grad_norm": 3.030251979827881, "learning_rate": 2e-05, "loss": 0.09021285, "step": 943 }, { "epoch": 1.888, "grad_norm": 2.048762798309326, "learning_rate": 2e-05, "loss": 0.07349489, "step": 944 }, { "epoch": 1.8900000000000001, "grad_norm": 2.1592016220092773, "learning_rate": 2e-05, "loss": 0.06426996, "step": 945 }, { "epoch": 1.892, "grad_norm": 1.1694790124893188, "learning_rate": 2e-05, "loss": 0.05598904, "step": 946 }, { "epoch": 1.8940000000000001, "grad_norm": 2.5215678215026855, "learning_rate": 2e-05, "loss": 0.05859364, "step": 947 }, { "epoch": 1.896, "grad_norm": 1.6138578653335571, "learning_rate": 2e-05, "loss": 0.05201582, "step": 948 }, { "epoch": 1.8980000000000001, "grad_norm": 2.5577175617218018, "learning_rate": 2e-05, "loss": 0.07010947, "step": 949 }, { "epoch": 1.9, "grad_norm": 3.392971992492676, "learning_rate": 2e-05, "loss": 0.07514925, "step": 950 }, { "epoch": 1.9020000000000001, "grad_norm": 1.6294666528701782, "learning_rate": 2e-05, "loss": 0.06291882, "step": 951 }, { "epoch": 1.904, "grad_norm": 2.229604482650757, "learning_rate": 2e-05, "loss": 0.06559543, "step": 952 }, { "epoch": 1.9060000000000001, "grad_norm": 1.924761414527893, "learning_rate": 2e-05, "loss": 0.0862481, "step": 953 }, { "epoch": 1.908, "grad_norm": 1.9866812229156494, "learning_rate": 2e-05, "loss": 0.06686676, "step": 954 }, { "epoch": 1.9100000000000001, "grad_norm": 1.7814255952835083, "learning_rate": 2e-05, "loss": 0.05599992, "step": 955 }, { "epoch": 1.912, "grad_norm": 2.1436808109283447, "learning_rate": 2e-05, "loss": 0.07030681, "step": 956 }, { "epoch": 1.9140000000000001, "grad_norm": 1.3862833976745605, "learning_rate": 2e-05, "loss": 0.04867277, "step": 957 }, { "epoch": 1.916, "grad_norm": 1.781009316444397, "learning_rate": 2e-05, "loss": 0.05244367, "step": 958 }, { "epoch": 1.9180000000000001, "grad_norm": 1.6906291246414185, "learning_rate": 2e-05, "loss": 0.06690039, "step": 959 }, { "epoch": 1.92, "grad_norm": 3.1098814010620117, "learning_rate": 2e-05, "loss": 0.11306722, "step": 960 }, { "epoch": 1.9220000000000002, "grad_norm": 1.7126940488815308, "learning_rate": 2e-05, "loss": 0.05899595, "step": 961 }, { "epoch": 1.924, "grad_norm": 2.6065030097961426, "learning_rate": 2e-05, "loss": 0.06164584, "step": 962 }, { "epoch": 1.9260000000000002, "grad_norm": 1.9014359712600708, "learning_rate": 2e-05, "loss": 0.07033944, "step": 963 }, { "epoch": 1.928, "grad_norm": 1.3948487043380737, "learning_rate": 2e-05, "loss": 0.05189591, "step": 964 }, { "epoch": 1.9300000000000002, "grad_norm": 2.2040138244628906, "learning_rate": 2e-05, "loss": 0.05501474, "step": 965 }, { "epoch": 1.932, "grad_norm": 1.710676908493042, "learning_rate": 2e-05, "loss": 0.07464767, "step": 966 }, { "epoch": 1.9340000000000002, "grad_norm": 2.508552312850952, "learning_rate": 2e-05, "loss": 0.07813706, "step": 967 }, { "epoch": 1.936, "grad_norm": 1.4470033645629883, "learning_rate": 2e-05, "loss": 0.0598635, "step": 968 }, { "epoch": 1.938, "grad_norm": 2.1353957653045654, "learning_rate": 2e-05, "loss": 0.05462658, "step": 969 }, { "epoch": 1.94, "grad_norm": 2.136827230453491, "learning_rate": 2e-05, "loss": 0.0630064, "step": 970 }, { "epoch": 1.942, "grad_norm": 2.935197591781616, "learning_rate": 2e-05, "loss": 0.09128846, "step": 971 }, { "epoch": 1.944, "grad_norm": 2.3456857204437256, "learning_rate": 2e-05, "loss": 0.08022948, "step": 972 }, { "epoch": 1.946, "grad_norm": 1.7174320220947266, "learning_rate": 2e-05, "loss": 0.06144142, "step": 973 }, { "epoch": 1.948, "grad_norm": 1.8288911581039429, "learning_rate": 2e-05, "loss": 0.04687043, "step": 974 }, { "epoch": 1.95, "grad_norm": 1.6098743677139282, "learning_rate": 2e-05, "loss": 0.06447228, "step": 975 }, { "epoch": 1.952, "grad_norm": 2.4248886108398438, "learning_rate": 2e-05, "loss": 0.11559688, "step": 976 }, { "epoch": 1.954, "grad_norm": 2.1920695304870605, "learning_rate": 2e-05, "loss": 0.0743776, "step": 977 }, { "epoch": 1.956, "grad_norm": 1.9038764238357544, "learning_rate": 2e-05, "loss": 0.07296964, "step": 978 }, { "epoch": 1.958, "grad_norm": 1.6800254583358765, "learning_rate": 2e-05, "loss": 0.06484336, "step": 979 }, { "epoch": 1.96, "grad_norm": 3.069260597229004, "learning_rate": 2e-05, "loss": 0.07390512, "step": 980 }, { "epoch": 1.962, "grad_norm": 2.405409336090088, "learning_rate": 2e-05, "loss": 0.09900182, "step": 981 }, { "epoch": 1.964, "grad_norm": 1.6991225481033325, "learning_rate": 2e-05, "loss": 0.05558601, "step": 982 }, { "epoch": 1.966, "grad_norm": 1.5244240760803223, "learning_rate": 2e-05, "loss": 0.06104971, "step": 983 }, { "epoch": 1.968, "grad_norm": 1.8483918905258179, "learning_rate": 2e-05, "loss": 0.0812193, "step": 984 }, { "epoch": 1.97, "grad_norm": 2.1218955516815186, "learning_rate": 2e-05, "loss": 0.08611986, "step": 985 }, { "epoch": 1.972, "grad_norm": 1.5318834781646729, "learning_rate": 2e-05, "loss": 0.06183597, "step": 986 }, { "epoch": 1.974, "grad_norm": 1.713119387626648, "learning_rate": 2e-05, "loss": 0.07985615, "step": 987 }, { "epoch": 1.976, "grad_norm": 1.4061270952224731, "learning_rate": 2e-05, "loss": 0.04497521, "step": 988 }, { "epoch": 1.978, "grad_norm": 2.804215431213379, "learning_rate": 2e-05, "loss": 0.06925857, "step": 989 }, { "epoch": 1.98, "grad_norm": 1.997461199760437, "learning_rate": 2e-05, "loss": 0.06266791, "step": 990 }, { "epoch": 1.982, "grad_norm": 1.849177598953247, "learning_rate": 2e-05, "loss": 0.08790208, "step": 991 }, { "epoch": 1.984, "grad_norm": 1.7157963514328003, "learning_rate": 2e-05, "loss": 0.06307608, "step": 992 }, { "epoch": 1.986, "grad_norm": 1.68929123878479, "learning_rate": 2e-05, "loss": 0.06697696, "step": 993 }, { "epoch": 1.988, "grad_norm": 1.83262300491333, "learning_rate": 2e-05, "loss": 0.06532292, "step": 994 }, { "epoch": 1.99, "grad_norm": 3.3675966262817383, "learning_rate": 2e-05, "loss": 0.09480947, "step": 995 }, { "epoch": 1.992, "grad_norm": 2.172999143600464, "learning_rate": 2e-05, "loss": 0.06149952, "step": 996 }, { "epoch": 1.994, "grad_norm": 1.6202707290649414, "learning_rate": 2e-05, "loss": 0.05982627, "step": 997 }, { "epoch": 1.996, "grad_norm": 1.9917645454406738, "learning_rate": 2e-05, "loss": 0.07037735, "step": 998 }, { "epoch": 1.998, "grad_norm": 2.8166284561157227, "learning_rate": 2e-05, "loss": 0.06254306, "step": 999 }, { "epoch": 2.0, "grad_norm": 1.6974472999572754, "learning_rate": 2e-05, "loss": 0.07782187, "step": 1000 }, { "epoch": 2.0, "eval_performance": { "AngleClassification_1": 0.986, "AngleClassification_2": 0.714, "AngleClassification_3": 0.5269461077844312, "Equal_1": 0.846, "Equal_2": 0.6007984031936128, "Equal_3": 0.6926147704590818, "LineComparison_1": 0.966, "LineComparison_2": 0.9600798403193613, "LineComparison_3": 0.8363273453093812, "Parallel_1": 0.8577154308617234, "Parallel_2": 0.9438877755511023, "Parallel_3": 0.444, "Perpendicular_1": 0.888, "Perpendicular_2": 0.342, "Perpendicular_3": 0.08517034068136273, "PointLiesOnCircle_1": 0.995691382765531, "PointLiesOnCircle_2": 0.9887, "PointLiesOnCircle_3": 0.7656, "PointLiesOnLine_1": 0.811623246492986, "PointLiesOnLine_2": 0.48096192384769537, "PointLiesOnLine_3": 0.24750499001996007 }, "eval_runtime": 320.1345, "eval_samples_per_second": 32.799, "eval_steps_per_second": 0.656, "step": 1000 }, { "epoch": 2.002, "grad_norm": 1.6161152124404907, "learning_rate": 2e-05, "loss": 0.0708351, "step": 1001 }, { "epoch": 2.004, "grad_norm": 1.5361851453781128, "learning_rate": 2e-05, "loss": 0.0666085, "step": 1002 }, { "epoch": 2.006, "grad_norm": 2.824312925338745, "learning_rate": 2e-05, "loss": 0.09088291, "step": 1003 }, { "epoch": 2.008, "grad_norm": 1.716564416885376, "learning_rate": 2e-05, "loss": 0.0687227, "step": 1004 }, { "epoch": 2.01, "grad_norm": 3.0519144535064697, "learning_rate": 2e-05, "loss": 0.07319454, "step": 1005 }, { "epoch": 2.012, "grad_norm": 2.324007511138916, "learning_rate": 2e-05, "loss": 0.09257935, "step": 1006 }, { "epoch": 2.014, "grad_norm": 4.496967792510986, "learning_rate": 2e-05, "loss": 0.08681659, "step": 1007 }, { "epoch": 2.016, "grad_norm": 1.5570460557937622, "learning_rate": 2e-05, "loss": 0.04536861, "step": 1008 }, { "epoch": 2.018, "grad_norm": 1.8645915985107422, "learning_rate": 2e-05, "loss": 0.05488719, "step": 1009 }, { "epoch": 2.02, "grad_norm": 3.016777515411377, "learning_rate": 2e-05, "loss": 0.08436164, "step": 1010 }, { "epoch": 2.022, "grad_norm": 1.9473544359207153, "learning_rate": 2e-05, "loss": 0.06556801, "step": 1011 }, { "epoch": 2.024, "grad_norm": 4.963746547698975, "learning_rate": 2e-05, "loss": 0.08209211, "step": 1012 }, { "epoch": 2.026, "grad_norm": 2.4789822101593018, "learning_rate": 2e-05, "loss": 0.0703003, "step": 1013 }, { "epoch": 2.028, "grad_norm": 2.126289129257202, "learning_rate": 2e-05, "loss": 0.03831969, "step": 1014 }, { "epoch": 2.03, "grad_norm": 2.6576788425445557, "learning_rate": 2e-05, "loss": 0.07527678, "step": 1015 }, { "epoch": 2.032, "grad_norm": 1.8639591932296753, "learning_rate": 2e-05, "loss": 0.05922488, "step": 1016 }, { "epoch": 2.034, "grad_norm": 1.5314511060714722, "learning_rate": 2e-05, "loss": 0.04786742, "step": 1017 }, { "epoch": 2.036, "grad_norm": 2.3438453674316406, "learning_rate": 2e-05, "loss": 0.07262143, "step": 1018 }, { "epoch": 2.038, "grad_norm": 1.8147228956222534, "learning_rate": 2e-05, "loss": 0.04851551, "step": 1019 }, { "epoch": 2.04, "grad_norm": 2.4879653453826904, "learning_rate": 2e-05, "loss": 0.0645643, "step": 1020 }, { "epoch": 2.042, "grad_norm": 2.240687608718872, "learning_rate": 2e-05, "loss": 0.07334122, "step": 1021 }, { "epoch": 2.044, "grad_norm": 2.220008611679077, "learning_rate": 2e-05, "loss": 0.06366929, "step": 1022 }, { "epoch": 2.046, "grad_norm": 2.523574113845825, "learning_rate": 2e-05, "loss": 0.07826661, "step": 1023 }, { "epoch": 2.048, "grad_norm": 2.2099592685699463, "learning_rate": 2e-05, "loss": 0.09013459, "step": 1024 }, { "epoch": 2.05, "grad_norm": 2.7173805236816406, "learning_rate": 2e-05, "loss": 0.08330725, "step": 1025 }, { "epoch": 2.052, "grad_norm": 1.9037878513336182, "learning_rate": 2e-05, "loss": 0.04698469, "step": 1026 }, { "epoch": 2.054, "grad_norm": 1.8339701890945435, "learning_rate": 2e-05, "loss": 0.07493852, "step": 1027 }, { "epoch": 2.056, "grad_norm": 1.7632319927215576, "learning_rate": 2e-05, "loss": 0.05750106, "step": 1028 }, { "epoch": 2.058, "grad_norm": 1.5772873163223267, "learning_rate": 2e-05, "loss": 0.05176328, "step": 1029 }, { "epoch": 2.06, "grad_norm": 1.7405070066452026, "learning_rate": 2e-05, "loss": 0.05858688, "step": 1030 }, { "epoch": 2.062, "grad_norm": 2.498608112335205, "learning_rate": 2e-05, "loss": 0.06766784, "step": 1031 }, { "epoch": 2.064, "grad_norm": 1.690112590789795, "learning_rate": 2e-05, "loss": 0.06187538, "step": 1032 }, { "epoch": 2.066, "grad_norm": 2.2642834186553955, "learning_rate": 2e-05, "loss": 0.07190007, "step": 1033 }, { "epoch": 2.068, "grad_norm": 1.882744550704956, "learning_rate": 2e-05, "loss": 0.05131867, "step": 1034 }, { "epoch": 2.07, "grad_norm": 1.5934783220291138, "learning_rate": 2e-05, "loss": 0.03696432, "step": 1035 }, { "epoch": 2.072, "grad_norm": 2.2791152000427246, "learning_rate": 2e-05, "loss": 0.08653103, "step": 1036 }, { "epoch": 2.074, "grad_norm": 1.565434217453003, "learning_rate": 2e-05, "loss": 0.05390278, "step": 1037 }, { "epoch": 2.076, "grad_norm": 2.557328939437866, "learning_rate": 2e-05, "loss": 0.07149288, "step": 1038 }, { "epoch": 2.078, "grad_norm": 2.0665178298950195, "learning_rate": 2e-05, "loss": 0.04702565, "step": 1039 }, { "epoch": 2.08, "grad_norm": 1.5441120862960815, "learning_rate": 2e-05, "loss": 0.04806751, "step": 1040 }, { "epoch": 2.082, "grad_norm": 1.4570614099502563, "learning_rate": 2e-05, "loss": 0.05136944, "step": 1041 }, { "epoch": 2.084, "grad_norm": 5.4056243896484375, "learning_rate": 2e-05, "loss": 0.07949159, "step": 1042 }, { "epoch": 2.086, "grad_norm": 2.241541624069214, "learning_rate": 2e-05, "loss": 0.08863392, "step": 1043 }, { "epoch": 2.088, "grad_norm": 1.510636568069458, "learning_rate": 2e-05, "loss": 0.05880911, "step": 1044 }, { "epoch": 2.09, "grad_norm": 2.41506028175354, "learning_rate": 2e-05, "loss": 0.05853722, "step": 1045 }, { "epoch": 2.092, "grad_norm": 1.5590910911560059, "learning_rate": 2e-05, "loss": 0.04493964, "step": 1046 }, { "epoch": 2.094, "grad_norm": 2.0853965282440186, "learning_rate": 2e-05, "loss": 0.08284545, "step": 1047 }, { "epoch": 2.096, "grad_norm": 1.5066843032836914, "learning_rate": 2e-05, "loss": 0.05551049, "step": 1048 }, { "epoch": 2.098, "grad_norm": 2.1711227893829346, "learning_rate": 2e-05, "loss": 0.07818221, "step": 1049 }, { "epoch": 2.1, "grad_norm": 1.308896541595459, "learning_rate": 2e-05, "loss": 0.04287009, "step": 1050 }, { "epoch": 2.102, "grad_norm": 2.321077346801758, "learning_rate": 2e-05, "loss": 0.0904954, "step": 1051 }, { "epoch": 2.104, "grad_norm": 1.5421305894851685, "learning_rate": 2e-05, "loss": 0.06068816, "step": 1052 }, { "epoch": 2.106, "grad_norm": 1.8072997331619263, "learning_rate": 2e-05, "loss": 0.0773791, "step": 1053 }, { "epoch": 2.108, "grad_norm": 2.3196661472320557, "learning_rate": 2e-05, "loss": 0.06575853, "step": 1054 }, { "epoch": 2.11, "grad_norm": 1.4077317714691162, "learning_rate": 2e-05, "loss": 0.05838005, "step": 1055 }, { "epoch": 2.112, "grad_norm": 1.5628201961517334, "learning_rate": 2e-05, "loss": 0.05414519, "step": 1056 }, { "epoch": 2.114, "grad_norm": 1.9730654954910278, "learning_rate": 2e-05, "loss": 0.063483, "step": 1057 }, { "epoch": 2.116, "grad_norm": 1.9468834400177002, "learning_rate": 2e-05, "loss": 0.0622715, "step": 1058 }, { "epoch": 2.118, "grad_norm": 2.9658288955688477, "learning_rate": 2e-05, "loss": 0.09098145, "step": 1059 }, { "epoch": 2.12, "grad_norm": 2.6334218978881836, "learning_rate": 2e-05, "loss": 0.06693131, "step": 1060 }, { "epoch": 2.122, "grad_norm": 3.37898850440979, "learning_rate": 2e-05, "loss": 0.09015776, "step": 1061 }, { "epoch": 2.124, "grad_norm": 2.2235124111175537, "learning_rate": 2e-05, "loss": 0.06049117, "step": 1062 }, { "epoch": 2.126, "grad_norm": 1.7589530944824219, "learning_rate": 2e-05, "loss": 0.05549654, "step": 1063 }, { "epoch": 2.128, "grad_norm": 2.0518884658813477, "learning_rate": 2e-05, "loss": 0.08317362, "step": 1064 }, { "epoch": 2.13, "grad_norm": 1.3999139070510864, "learning_rate": 2e-05, "loss": 0.04877549, "step": 1065 }, { "epoch": 2.132, "grad_norm": 2.0118892192840576, "learning_rate": 2e-05, "loss": 0.07196333, "step": 1066 }, { "epoch": 2.134, "grad_norm": 1.9917757511138916, "learning_rate": 2e-05, "loss": 0.05575627, "step": 1067 }, { "epoch": 2.136, "grad_norm": 1.17472505569458, "learning_rate": 2e-05, "loss": 0.04250437, "step": 1068 }, { "epoch": 2.138, "grad_norm": 1.4256974458694458, "learning_rate": 2e-05, "loss": 0.0531082, "step": 1069 }, { "epoch": 2.14, "grad_norm": 2.143972396850586, "learning_rate": 2e-05, "loss": 0.0886039, "step": 1070 }, { "epoch": 2.142, "grad_norm": 1.9579774141311646, "learning_rate": 2e-05, "loss": 0.06291603, "step": 1071 }, { "epoch": 2.144, "grad_norm": 1.78086519241333, "learning_rate": 2e-05, "loss": 0.05680231, "step": 1072 }, { "epoch": 2.146, "grad_norm": 1.8430737257003784, "learning_rate": 2e-05, "loss": 0.08492243, "step": 1073 }, { "epoch": 2.148, "grad_norm": 1.4731837511062622, "learning_rate": 2e-05, "loss": 0.05535863, "step": 1074 }, { "epoch": 2.15, "grad_norm": 1.6965607404708862, "learning_rate": 2e-05, "loss": 0.05869256, "step": 1075 }, { "epoch": 2.152, "grad_norm": 1.960614800453186, "learning_rate": 2e-05, "loss": 0.0604171, "step": 1076 }, { "epoch": 2.154, "grad_norm": 1.3592660427093506, "learning_rate": 2e-05, "loss": 0.04966343, "step": 1077 }, { "epoch": 2.156, "grad_norm": 1.4302160739898682, "learning_rate": 2e-05, "loss": 0.05037902, "step": 1078 }, { "epoch": 2.158, "grad_norm": 1.5716873407363892, "learning_rate": 2e-05, "loss": 0.06660844, "step": 1079 }, { "epoch": 2.16, "grad_norm": 1.2582805156707764, "learning_rate": 2e-05, "loss": 0.04393882, "step": 1080 }, { "epoch": 2.162, "grad_norm": 2.170337200164795, "learning_rate": 2e-05, "loss": 0.0725324, "step": 1081 }, { "epoch": 2.164, "grad_norm": 3.0252792835235596, "learning_rate": 2e-05, "loss": 0.08293726, "step": 1082 }, { "epoch": 2.166, "grad_norm": 2.281851053237915, "learning_rate": 2e-05, "loss": 0.09101221, "step": 1083 }, { "epoch": 2.168, "grad_norm": 2.3761801719665527, "learning_rate": 2e-05, "loss": 0.06198543, "step": 1084 }, { "epoch": 2.17, "grad_norm": 2.4593560695648193, "learning_rate": 2e-05, "loss": 0.08639136, "step": 1085 }, { "epoch": 2.172, "grad_norm": 2.1723086833953857, "learning_rate": 2e-05, "loss": 0.06393465, "step": 1086 }, { "epoch": 2.174, "grad_norm": 2.035813570022583, "learning_rate": 2e-05, "loss": 0.07111633, "step": 1087 }, { "epoch": 2.176, "grad_norm": 2.0545802116394043, "learning_rate": 2e-05, "loss": 0.06639965, "step": 1088 }, { "epoch": 2.178, "grad_norm": 2.862257957458496, "learning_rate": 2e-05, "loss": 0.08525625, "step": 1089 }, { "epoch": 2.18, "grad_norm": 1.8956446647644043, "learning_rate": 2e-05, "loss": 0.06492867, "step": 1090 }, { "epoch": 2.182, "grad_norm": 1.6777955293655396, "learning_rate": 2e-05, "loss": 0.06760434, "step": 1091 }, { "epoch": 2.184, "grad_norm": 1.5188251733779907, "learning_rate": 2e-05, "loss": 0.06885065, "step": 1092 }, { "epoch": 2.186, "grad_norm": 1.5363314151763916, "learning_rate": 2e-05, "loss": 0.06016498, "step": 1093 }, { "epoch": 2.188, "grad_norm": 1.5654257535934448, "learning_rate": 2e-05, "loss": 0.0583441, "step": 1094 }, { "epoch": 2.19, "grad_norm": 2.322394371032715, "learning_rate": 2e-05, "loss": 0.07740622, "step": 1095 }, { "epoch": 2.192, "grad_norm": 1.8494588136672974, "learning_rate": 2e-05, "loss": 0.06826154, "step": 1096 }, { "epoch": 2.194, "grad_norm": 1.8143563270568848, "learning_rate": 2e-05, "loss": 0.07409322, "step": 1097 }, { "epoch": 2.196, "grad_norm": 2.239018440246582, "learning_rate": 2e-05, "loss": 0.06441543, "step": 1098 }, { "epoch": 2.198, "grad_norm": 1.3814536333084106, "learning_rate": 2e-05, "loss": 0.06695756, "step": 1099 }, { "epoch": 2.2, "grad_norm": 1.9067416191101074, "learning_rate": 2e-05, "loss": 0.0664328, "step": 1100 }, { "epoch": 2.202, "grad_norm": 2.319135904312134, "learning_rate": 2e-05, "loss": 0.07544907, "step": 1101 }, { "epoch": 2.204, "grad_norm": 1.6520764827728271, "learning_rate": 2e-05, "loss": 0.04925615, "step": 1102 }, { "epoch": 2.206, "grad_norm": 2.0650670528411865, "learning_rate": 2e-05, "loss": 0.08087505, "step": 1103 }, { "epoch": 2.208, "grad_norm": 2.419358730316162, "learning_rate": 2e-05, "loss": 0.08194488, "step": 1104 }, { "epoch": 2.21, "grad_norm": 2.1177220344543457, "learning_rate": 2e-05, "loss": 0.06622218, "step": 1105 }, { "epoch": 2.212, "grad_norm": 1.8905670642852783, "learning_rate": 2e-05, "loss": 0.05285282, "step": 1106 }, { "epoch": 2.214, "grad_norm": 1.7442760467529297, "learning_rate": 2e-05, "loss": 0.07012412, "step": 1107 }, { "epoch": 2.216, "grad_norm": 2.560086965560913, "learning_rate": 2e-05, "loss": 0.07056236, "step": 1108 }, { "epoch": 2.218, "grad_norm": 1.3487662076950073, "learning_rate": 2e-05, "loss": 0.04359243, "step": 1109 }, { "epoch": 2.22, "grad_norm": 2.0357956886291504, "learning_rate": 2e-05, "loss": 0.06766598, "step": 1110 }, { "epoch": 2.222, "grad_norm": 1.6275434494018555, "learning_rate": 2e-05, "loss": 0.06916457, "step": 1111 }, { "epoch": 2.224, "grad_norm": 1.9597185850143433, "learning_rate": 2e-05, "loss": 0.07099155, "step": 1112 }, { "epoch": 2.226, "grad_norm": 1.4252787828445435, "learning_rate": 2e-05, "loss": 0.04932921, "step": 1113 }, { "epoch": 2.228, "grad_norm": 1.5756709575653076, "learning_rate": 2e-05, "loss": 0.05278656, "step": 1114 }, { "epoch": 2.23, "grad_norm": 2.331275701522827, "learning_rate": 2e-05, "loss": 0.07562015, "step": 1115 }, { "epoch": 2.232, "grad_norm": 1.697662115097046, "learning_rate": 2e-05, "loss": 0.06566669, "step": 1116 }, { "epoch": 2.234, "grad_norm": 1.760780692100525, "learning_rate": 2e-05, "loss": 0.04810528, "step": 1117 }, { "epoch": 2.2359999999999998, "grad_norm": 2.7786638736724854, "learning_rate": 2e-05, "loss": 0.05388356, "step": 1118 }, { "epoch": 2.238, "grad_norm": 2.2772700786590576, "learning_rate": 2e-05, "loss": 0.07156706, "step": 1119 }, { "epoch": 2.24, "grad_norm": 1.3025233745574951, "learning_rate": 2e-05, "loss": 0.04904636, "step": 1120 }, { "epoch": 2.242, "grad_norm": 2.5209176540374756, "learning_rate": 2e-05, "loss": 0.0652664, "step": 1121 }, { "epoch": 2.2439999999999998, "grad_norm": 1.7902709245681763, "learning_rate": 2e-05, "loss": 0.061213, "step": 1122 }, { "epoch": 2.246, "grad_norm": 1.2348299026489258, "learning_rate": 2e-05, "loss": 0.03768958, "step": 1123 }, { "epoch": 2.248, "grad_norm": 1.4644815921783447, "learning_rate": 2e-05, "loss": 0.05811838, "step": 1124 }, { "epoch": 2.25, "grad_norm": 1.8537580966949463, "learning_rate": 2e-05, "loss": 0.06129285, "step": 1125 }, { "epoch": 2.252, "grad_norm": 1.9889532327651978, "learning_rate": 2e-05, "loss": 0.05362317, "step": 1126 }, { "epoch": 2.254, "grad_norm": 1.2981350421905518, "learning_rate": 2e-05, "loss": 0.03556494, "step": 1127 }, { "epoch": 2.2560000000000002, "grad_norm": 2.6438069343566895, "learning_rate": 2e-05, "loss": 0.07400218, "step": 1128 }, { "epoch": 2.258, "grad_norm": 2.0551059246063232, "learning_rate": 2e-05, "loss": 0.06481735, "step": 1129 }, { "epoch": 2.26, "grad_norm": 1.9357151985168457, "learning_rate": 2e-05, "loss": 0.07889327, "step": 1130 }, { "epoch": 2.262, "grad_norm": 1.4854387044906616, "learning_rate": 2e-05, "loss": 0.04724164, "step": 1131 }, { "epoch": 2.2640000000000002, "grad_norm": 3.739743947982788, "learning_rate": 2e-05, "loss": 0.07142067, "step": 1132 }, { "epoch": 2.266, "grad_norm": 2.309699773788452, "learning_rate": 2e-05, "loss": 0.05937993, "step": 1133 }, { "epoch": 2.268, "grad_norm": 3.0048446655273438, "learning_rate": 2e-05, "loss": 0.08490255, "step": 1134 }, { "epoch": 2.27, "grad_norm": 1.9927393198013306, "learning_rate": 2e-05, "loss": 0.05297955, "step": 1135 }, { "epoch": 2.2720000000000002, "grad_norm": 2.1314926147460938, "learning_rate": 2e-05, "loss": 0.05550206, "step": 1136 }, { "epoch": 2.274, "grad_norm": 1.467367172241211, "learning_rate": 2e-05, "loss": 0.06153769, "step": 1137 }, { "epoch": 2.276, "grad_norm": 2.6547863483428955, "learning_rate": 2e-05, "loss": 0.06249413, "step": 1138 }, { "epoch": 2.278, "grad_norm": 2.3911125659942627, "learning_rate": 2e-05, "loss": 0.04478404, "step": 1139 }, { "epoch": 2.2800000000000002, "grad_norm": 2.295572280883789, "learning_rate": 2e-05, "loss": 0.05000008, "step": 1140 }, { "epoch": 2.282, "grad_norm": 1.823384165763855, "learning_rate": 2e-05, "loss": 0.05099558, "step": 1141 }, { "epoch": 2.284, "grad_norm": 2.4514079093933105, "learning_rate": 2e-05, "loss": 0.07286972, "step": 1142 }, { "epoch": 2.286, "grad_norm": 1.5815463066101074, "learning_rate": 2e-05, "loss": 0.06633377, "step": 1143 }, { "epoch": 2.288, "grad_norm": 1.3394488096237183, "learning_rate": 2e-05, "loss": 0.05137707, "step": 1144 }, { "epoch": 2.29, "grad_norm": 1.6430034637451172, "learning_rate": 2e-05, "loss": 0.03971995, "step": 1145 }, { "epoch": 2.292, "grad_norm": 2.691917657852173, "learning_rate": 2e-05, "loss": 0.06252594, "step": 1146 }, { "epoch": 2.294, "grad_norm": 1.7687281370162964, "learning_rate": 2e-05, "loss": 0.05269225, "step": 1147 }, { "epoch": 2.296, "grad_norm": 1.8105900287628174, "learning_rate": 2e-05, "loss": 0.07036023, "step": 1148 }, { "epoch": 2.298, "grad_norm": 1.4458359479904175, "learning_rate": 2e-05, "loss": 0.05376993, "step": 1149 }, { "epoch": 2.3, "grad_norm": 1.6199760437011719, "learning_rate": 2e-05, "loss": 0.06054214, "step": 1150 }, { "epoch": 2.302, "grad_norm": 1.98055100440979, "learning_rate": 2e-05, "loss": 0.07045031, "step": 1151 }, { "epoch": 2.304, "grad_norm": 1.7091319561004639, "learning_rate": 2e-05, "loss": 0.04949348, "step": 1152 }, { "epoch": 2.306, "grad_norm": 2.5169119834899902, "learning_rate": 2e-05, "loss": 0.06985944, "step": 1153 }, { "epoch": 2.308, "grad_norm": 1.5572229623794556, "learning_rate": 2e-05, "loss": 0.0572869, "step": 1154 }, { "epoch": 2.31, "grad_norm": 1.3380517959594727, "learning_rate": 2e-05, "loss": 0.04210208, "step": 1155 }, { "epoch": 2.312, "grad_norm": 2.2991576194763184, "learning_rate": 2e-05, "loss": 0.06161058, "step": 1156 }, { "epoch": 2.314, "grad_norm": 1.362855076789856, "learning_rate": 2e-05, "loss": 0.05341607, "step": 1157 }, { "epoch": 2.316, "grad_norm": 1.6171643733978271, "learning_rate": 2e-05, "loss": 0.05781926, "step": 1158 }, { "epoch": 2.318, "grad_norm": 2.0822646617889404, "learning_rate": 2e-05, "loss": 0.05547912, "step": 1159 }, { "epoch": 2.32, "grad_norm": 2.1096858978271484, "learning_rate": 2e-05, "loss": 0.06335936, "step": 1160 }, { "epoch": 2.322, "grad_norm": 1.6990431547164917, "learning_rate": 2e-05, "loss": 0.06178097, "step": 1161 }, { "epoch": 2.324, "grad_norm": 1.849997878074646, "learning_rate": 2e-05, "loss": 0.06346186, "step": 1162 }, { "epoch": 2.326, "grad_norm": 1.6597445011138916, "learning_rate": 2e-05, "loss": 0.05579802, "step": 1163 }, { "epoch": 2.328, "grad_norm": 1.805808663368225, "learning_rate": 2e-05, "loss": 0.08137181, "step": 1164 }, { "epoch": 2.33, "grad_norm": 1.5715245008468628, "learning_rate": 2e-05, "loss": 0.07559416, "step": 1165 }, { "epoch": 2.332, "grad_norm": 1.6368138790130615, "learning_rate": 2e-05, "loss": 0.06028969, "step": 1166 }, { "epoch": 2.334, "grad_norm": 2.369986057281494, "learning_rate": 2e-05, "loss": 0.05975366, "step": 1167 }, { "epoch": 2.336, "grad_norm": 1.567192792892456, "learning_rate": 2e-05, "loss": 0.05366561, "step": 1168 }, { "epoch": 2.338, "grad_norm": 1.529199242591858, "learning_rate": 2e-05, "loss": 0.05094039, "step": 1169 }, { "epoch": 2.34, "grad_norm": 2.299103260040283, "learning_rate": 2e-05, "loss": 0.05972379, "step": 1170 }, { "epoch": 2.342, "grad_norm": 2.0813450813293457, "learning_rate": 2e-05, "loss": 0.06179533, "step": 1171 }, { "epoch": 2.344, "grad_norm": 1.8993735313415527, "learning_rate": 2e-05, "loss": 0.05901102, "step": 1172 }, { "epoch": 2.346, "grad_norm": 1.505812644958496, "learning_rate": 2e-05, "loss": 0.05369399, "step": 1173 }, { "epoch": 2.348, "grad_norm": 42.221553802490234, "learning_rate": 2e-05, "loss": 0.1112899, "step": 1174 }, { "epoch": 2.35, "grad_norm": 1.6444224119186401, "learning_rate": 2e-05, "loss": 0.05893474, "step": 1175 }, { "epoch": 2.352, "grad_norm": 1.4802619218826294, "learning_rate": 2e-05, "loss": 0.05816791, "step": 1176 }, { "epoch": 2.354, "grad_norm": 1.5738928318023682, "learning_rate": 2e-05, "loss": 0.07226036, "step": 1177 }, { "epoch": 2.356, "grad_norm": 1.077085018157959, "learning_rate": 2e-05, "loss": 0.03205248, "step": 1178 }, { "epoch": 2.358, "grad_norm": 48.9713134765625, "learning_rate": 2e-05, "loss": 0.17173615, "step": 1179 }, { "epoch": 2.36, "grad_norm": 2.1683356761932373, "learning_rate": 2e-05, "loss": 0.04456889, "step": 1180 }, { "epoch": 2.362, "grad_norm": 8.09849739074707, "learning_rate": 2e-05, "loss": 0.29480407, "step": 1181 }, { "epoch": 2.364, "grad_norm": 1.4625880718231201, "learning_rate": 2e-05, "loss": 0.04769106, "step": 1182 }, { "epoch": 2.366, "grad_norm": 1.876531720161438, "learning_rate": 2e-05, "loss": 0.0816292, "step": 1183 }, { "epoch": 2.368, "grad_norm": 1.9798403978347778, "learning_rate": 2e-05, "loss": 0.06011814, "step": 1184 }, { "epoch": 2.37, "grad_norm": 1.5905330181121826, "learning_rate": 2e-05, "loss": 0.05314508, "step": 1185 }, { "epoch": 2.372, "grad_norm": 1.7121469974517822, "learning_rate": 2e-05, "loss": 0.04185162, "step": 1186 }, { "epoch": 2.374, "grad_norm": 1.7982616424560547, "learning_rate": 2e-05, "loss": 0.04177887, "step": 1187 }, { "epoch": 2.376, "grad_norm": 2.124553680419922, "learning_rate": 2e-05, "loss": 0.06069058, "step": 1188 }, { "epoch": 2.378, "grad_norm": 1.8068010807037354, "learning_rate": 2e-05, "loss": 0.05539012, "step": 1189 }, { "epoch": 2.38, "grad_norm": 2.0612502098083496, "learning_rate": 2e-05, "loss": 0.07466509, "step": 1190 }, { "epoch": 2.382, "grad_norm": 1.1964912414550781, "learning_rate": 2e-05, "loss": 0.04185656, "step": 1191 }, { "epoch": 2.384, "grad_norm": 2.028778314590454, "learning_rate": 2e-05, "loss": 0.05173714, "step": 1192 }, { "epoch": 2.386, "grad_norm": 1.7989981174468994, "learning_rate": 2e-05, "loss": 0.05134147, "step": 1193 }, { "epoch": 2.388, "grad_norm": 1.6559616327285767, "learning_rate": 2e-05, "loss": 0.06251637, "step": 1194 }, { "epoch": 2.39, "grad_norm": 1.4277832508087158, "learning_rate": 2e-05, "loss": 0.05279864, "step": 1195 }, { "epoch": 2.392, "grad_norm": 1.6468921899795532, "learning_rate": 2e-05, "loss": 0.06037339, "step": 1196 }, { "epoch": 2.394, "grad_norm": 3.1690855026245117, "learning_rate": 2e-05, "loss": 0.08198987, "step": 1197 }, { "epoch": 2.396, "grad_norm": 1.2297303676605225, "learning_rate": 2e-05, "loss": 0.04745869, "step": 1198 }, { "epoch": 2.398, "grad_norm": 2.2883074283599854, "learning_rate": 2e-05, "loss": 0.04119899, "step": 1199 }, { "epoch": 2.4, "grad_norm": 2.0673367977142334, "learning_rate": 2e-05, "loss": 0.05730217, "step": 1200 }, { "epoch": 2.402, "grad_norm": 1.2708475589752197, "learning_rate": 2e-05, "loss": 0.04456803, "step": 1201 }, { "epoch": 2.404, "grad_norm": 1.3558706045150757, "learning_rate": 2e-05, "loss": 0.05208869, "step": 1202 }, { "epoch": 2.406, "grad_norm": 1.2289046049118042, "learning_rate": 2e-05, "loss": 0.04607398, "step": 1203 }, { "epoch": 2.408, "grad_norm": 3.603008985519409, "learning_rate": 2e-05, "loss": 0.08612712, "step": 1204 }, { "epoch": 2.41, "grad_norm": 4.123850345611572, "learning_rate": 2e-05, "loss": 0.06038028, "step": 1205 }, { "epoch": 2.412, "grad_norm": 2.2089900970458984, "learning_rate": 2e-05, "loss": 0.06190875, "step": 1206 }, { "epoch": 2.414, "grad_norm": 1.7213412523269653, "learning_rate": 2e-05, "loss": 0.05619193, "step": 1207 }, { "epoch": 2.416, "grad_norm": 2.1176223754882812, "learning_rate": 2e-05, "loss": 0.06305473, "step": 1208 }, { "epoch": 2.418, "grad_norm": 2.206848621368408, "learning_rate": 2e-05, "loss": 0.06026538, "step": 1209 }, { "epoch": 2.42, "grad_norm": 1.4014763832092285, "learning_rate": 2e-05, "loss": 0.04447113, "step": 1210 }, { "epoch": 2.422, "grad_norm": 1.3986562490463257, "learning_rate": 2e-05, "loss": 0.04321969, "step": 1211 }, { "epoch": 2.424, "grad_norm": 1.840933084487915, "learning_rate": 2e-05, "loss": 0.05479154, "step": 1212 }, { "epoch": 2.426, "grad_norm": 2.6934752464294434, "learning_rate": 2e-05, "loss": 0.05985066, "step": 1213 }, { "epoch": 2.428, "grad_norm": 1.4198068380355835, "learning_rate": 2e-05, "loss": 0.04191133, "step": 1214 }, { "epoch": 2.43, "grad_norm": 2.204730272293091, "learning_rate": 2e-05, "loss": 0.04986127, "step": 1215 }, { "epoch": 2.432, "grad_norm": 1.7930086851119995, "learning_rate": 2e-05, "loss": 0.03822114, "step": 1216 }, { "epoch": 2.434, "grad_norm": 1.9965940713882446, "learning_rate": 2e-05, "loss": 0.07922506, "step": 1217 }, { "epoch": 2.436, "grad_norm": 1.8581851720809937, "learning_rate": 2e-05, "loss": 0.06270727, "step": 1218 }, { "epoch": 2.438, "grad_norm": 2.940757989883423, "learning_rate": 2e-05, "loss": 0.05123382, "step": 1219 }, { "epoch": 2.44, "grad_norm": 1.8128314018249512, "learning_rate": 2e-05, "loss": 0.05950858, "step": 1220 }, { "epoch": 2.442, "grad_norm": 2.7984111309051514, "learning_rate": 2e-05, "loss": 0.04888112, "step": 1221 }, { "epoch": 2.444, "grad_norm": 1.7929303646087646, "learning_rate": 2e-05, "loss": 0.05642758, "step": 1222 }, { "epoch": 2.446, "grad_norm": 3.5627307891845703, "learning_rate": 2e-05, "loss": 0.06712979, "step": 1223 }, { "epoch": 2.448, "grad_norm": 2.1680853366851807, "learning_rate": 2e-05, "loss": 0.04695696, "step": 1224 }, { "epoch": 2.45, "grad_norm": 1.543848991394043, "learning_rate": 2e-05, "loss": 0.0471656, "step": 1225 }, { "epoch": 2.452, "grad_norm": 1.377380609512329, "learning_rate": 2e-05, "loss": 0.04254293, "step": 1226 }, { "epoch": 2.454, "grad_norm": 2.116429090499878, "learning_rate": 2e-05, "loss": 0.0888541, "step": 1227 }, { "epoch": 2.456, "grad_norm": 1.714355707168579, "learning_rate": 2e-05, "loss": 0.04009545, "step": 1228 }, { "epoch": 2.458, "grad_norm": 2.4635934829711914, "learning_rate": 2e-05, "loss": 0.06196552, "step": 1229 }, { "epoch": 2.46, "grad_norm": 1.6283334493637085, "learning_rate": 2e-05, "loss": 0.05298021, "step": 1230 }, { "epoch": 2.462, "grad_norm": 2.1310596466064453, "learning_rate": 2e-05, "loss": 0.0582485, "step": 1231 }, { "epoch": 2.464, "grad_norm": 2.8013837337493896, "learning_rate": 2e-05, "loss": 0.07070208, "step": 1232 }, { "epoch": 2.466, "grad_norm": 2.706620693206787, "learning_rate": 2e-05, "loss": 0.05937808, "step": 1233 }, { "epoch": 2.468, "grad_norm": 2.1183664798736572, "learning_rate": 2e-05, "loss": 0.07405435, "step": 1234 }, { "epoch": 2.4699999999999998, "grad_norm": 1.8695333003997803, "learning_rate": 2e-05, "loss": 0.04990563, "step": 1235 }, { "epoch": 2.472, "grad_norm": 1.3877243995666504, "learning_rate": 2e-05, "loss": 0.05714301, "step": 1236 }, { "epoch": 2.474, "grad_norm": 2.160632371902466, "learning_rate": 2e-05, "loss": 0.05262284, "step": 1237 }, { "epoch": 2.476, "grad_norm": 3.242225408554077, "learning_rate": 2e-05, "loss": 0.07168475, "step": 1238 }, { "epoch": 2.4779999999999998, "grad_norm": 1.5651336908340454, "learning_rate": 2e-05, "loss": 0.03872868, "step": 1239 }, { "epoch": 2.48, "grad_norm": 2.6729652881622314, "learning_rate": 2e-05, "loss": 0.06602624, "step": 1240 }, { "epoch": 2.482, "grad_norm": 1.9314351081848145, "learning_rate": 2e-05, "loss": 0.06864213, "step": 1241 }, { "epoch": 2.484, "grad_norm": 2.9383625984191895, "learning_rate": 2e-05, "loss": 0.07590427, "step": 1242 }, { "epoch": 2.4859999999999998, "grad_norm": 1.5178899765014648, "learning_rate": 2e-05, "loss": 0.04809076, "step": 1243 }, { "epoch": 2.488, "grad_norm": 1.3784013986587524, "learning_rate": 2e-05, "loss": 0.04864044, "step": 1244 }, { "epoch": 2.49, "grad_norm": 1.29901123046875, "learning_rate": 2e-05, "loss": 0.03983694, "step": 1245 }, { "epoch": 2.492, "grad_norm": 2.0041773319244385, "learning_rate": 2e-05, "loss": 0.06119365, "step": 1246 }, { "epoch": 2.4939999999999998, "grad_norm": 1.760473608970642, "learning_rate": 2e-05, "loss": 0.05547423, "step": 1247 }, { "epoch": 2.496, "grad_norm": 1.782887578010559, "learning_rate": 2e-05, "loss": 0.05595577, "step": 1248 }, { "epoch": 2.498, "grad_norm": 1.431018590927124, "learning_rate": 2e-05, "loss": 0.05510469, "step": 1249 }, { "epoch": 2.5, "grad_norm": 2.7681326866149902, "learning_rate": 2e-05, "loss": 0.07547972, "step": 1250 }, { "epoch": 2.502, "grad_norm": 3.4799344539642334, "learning_rate": 2e-05, "loss": 0.08792815, "step": 1251 }, { "epoch": 2.504, "grad_norm": 1.4463987350463867, "learning_rate": 2e-05, "loss": 0.04625604, "step": 1252 }, { "epoch": 2.5060000000000002, "grad_norm": 7.698814392089844, "learning_rate": 2e-05, "loss": 0.07450949, "step": 1253 }, { "epoch": 2.508, "grad_norm": 2.0825157165527344, "learning_rate": 2e-05, "loss": 0.06824566, "step": 1254 }, { "epoch": 2.51, "grad_norm": 2.138805389404297, "learning_rate": 2e-05, "loss": 0.06627047, "step": 1255 }, { "epoch": 2.512, "grad_norm": 2.9669065475463867, "learning_rate": 2e-05, "loss": 0.07500726, "step": 1256 }, { "epoch": 2.5140000000000002, "grad_norm": 1.8249704837799072, "learning_rate": 2e-05, "loss": 0.05518293, "step": 1257 }, { "epoch": 2.516, "grad_norm": 1.9127455949783325, "learning_rate": 2e-05, "loss": 0.05548128, "step": 1258 }, { "epoch": 2.518, "grad_norm": 2.1432528495788574, "learning_rate": 2e-05, "loss": 0.0882006, "step": 1259 }, { "epoch": 2.52, "grad_norm": 1.9188241958618164, "learning_rate": 2e-05, "loss": 0.05358675, "step": 1260 }, { "epoch": 2.5220000000000002, "grad_norm": 1.2570300102233887, "learning_rate": 2e-05, "loss": 0.04172009, "step": 1261 }, { "epoch": 2.524, "grad_norm": 1.4593505859375, "learning_rate": 2e-05, "loss": 0.05119102, "step": 1262 }, { "epoch": 2.526, "grad_norm": 1.6832845211029053, "learning_rate": 2e-05, "loss": 0.04923711, "step": 1263 }, { "epoch": 2.528, "grad_norm": 2.0682425498962402, "learning_rate": 2e-05, "loss": 0.05748929, "step": 1264 }, { "epoch": 2.5300000000000002, "grad_norm": 2.118595838546753, "learning_rate": 2e-05, "loss": 0.07105608, "step": 1265 }, { "epoch": 2.532, "grad_norm": 1.294793725013733, "learning_rate": 2e-05, "loss": 0.05542181, "step": 1266 }, { "epoch": 2.534, "grad_norm": 2.6284120082855225, "learning_rate": 2e-05, "loss": 0.04738271, "step": 1267 }, { "epoch": 2.536, "grad_norm": 1.572341799736023, "learning_rate": 2e-05, "loss": 0.04801615, "step": 1268 }, { "epoch": 2.5380000000000003, "grad_norm": 1.7317556142807007, "learning_rate": 2e-05, "loss": 0.04552308, "step": 1269 }, { "epoch": 2.54, "grad_norm": 1.7648011445999146, "learning_rate": 2e-05, "loss": 0.05321316, "step": 1270 }, { "epoch": 2.542, "grad_norm": 3.3624472618103027, "learning_rate": 2e-05, "loss": 0.04517394, "step": 1271 }, { "epoch": 2.544, "grad_norm": 1.198052167892456, "learning_rate": 2e-05, "loss": 0.03292371, "step": 1272 }, { "epoch": 2.5460000000000003, "grad_norm": 1.9982932806015015, "learning_rate": 2e-05, "loss": 0.04539865, "step": 1273 }, { "epoch": 2.548, "grad_norm": 2.1996402740478516, "learning_rate": 2e-05, "loss": 0.06276131, "step": 1274 }, { "epoch": 2.55, "grad_norm": 2.3174142837524414, "learning_rate": 2e-05, "loss": 0.07670379, "step": 1275 }, { "epoch": 2.552, "grad_norm": 2.5731277465820312, "learning_rate": 2e-05, "loss": 0.05952568, "step": 1276 }, { "epoch": 2.5540000000000003, "grad_norm": 2.444291591644287, "learning_rate": 2e-05, "loss": 0.06525612, "step": 1277 }, { "epoch": 2.556, "grad_norm": 2.2760965824127197, "learning_rate": 2e-05, "loss": 0.06015107, "step": 1278 }, { "epoch": 2.558, "grad_norm": 1.7860162258148193, "learning_rate": 2e-05, "loss": 0.06085454, "step": 1279 }, { "epoch": 2.56, "grad_norm": 1.5104821920394897, "learning_rate": 2e-05, "loss": 0.04777806, "step": 1280 }, { "epoch": 2.5620000000000003, "grad_norm": 2.0118963718414307, "learning_rate": 2e-05, "loss": 0.06037318, "step": 1281 }, { "epoch": 2.564, "grad_norm": 1.3859965801239014, "learning_rate": 2e-05, "loss": 0.05690242, "step": 1282 }, { "epoch": 2.566, "grad_norm": 1.6642067432403564, "learning_rate": 2e-05, "loss": 0.06878574, "step": 1283 }, { "epoch": 2.568, "grad_norm": 1.7605303525924683, "learning_rate": 2e-05, "loss": 0.04636247, "step": 1284 }, { "epoch": 2.57, "grad_norm": 1.2372325658798218, "learning_rate": 2e-05, "loss": 0.0318442, "step": 1285 }, { "epoch": 2.572, "grad_norm": 1.015973687171936, "learning_rate": 2e-05, "loss": 0.02155654, "step": 1286 }, { "epoch": 2.574, "grad_norm": 1.6201320886611938, "learning_rate": 2e-05, "loss": 0.03975923, "step": 1287 }, { "epoch": 2.576, "grad_norm": 3.8816092014312744, "learning_rate": 2e-05, "loss": 0.08194003, "step": 1288 }, { "epoch": 2.578, "grad_norm": 1.7358951568603516, "learning_rate": 2e-05, "loss": 0.04974889, "step": 1289 }, { "epoch": 2.58, "grad_norm": 2.392040252685547, "learning_rate": 2e-05, "loss": 0.06020691, "step": 1290 }, { "epoch": 2.582, "grad_norm": 1.1086469888687134, "learning_rate": 2e-05, "loss": 0.04583845, "step": 1291 }, { "epoch": 2.584, "grad_norm": 1.5086019039154053, "learning_rate": 2e-05, "loss": 0.04313815, "step": 1292 }, { "epoch": 2.586, "grad_norm": 1.7809884548187256, "learning_rate": 2e-05, "loss": 0.05329616, "step": 1293 }, { "epoch": 2.588, "grad_norm": 1.52059006690979, "learning_rate": 2e-05, "loss": 0.04525972, "step": 1294 }, { "epoch": 2.59, "grad_norm": 2.2042572498321533, "learning_rate": 2e-05, "loss": 0.0593795, "step": 1295 }, { "epoch": 2.592, "grad_norm": 1.675024151802063, "learning_rate": 2e-05, "loss": 0.06052756, "step": 1296 }, { "epoch": 2.594, "grad_norm": 2.8984549045562744, "learning_rate": 2e-05, "loss": 0.0687076, "step": 1297 }, { "epoch": 2.596, "grad_norm": 2.2647969722747803, "learning_rate": 2e-05, "loss": 0.05009675, "step": 1298 }, { "epoch": 2.598, "grad_norm": 1.8025486469268799, "learning_rate": 2e-05, "loss": 0.05575639, "step": 1299 }, { "epoch": 2.6, "grad_norm": 1.8778318166732788, "learning_rate": 2e-05, "loss": 0.04730181, "step": 1300 }, { "epoch": 2.602, "grad_norm": 1.6435505151748657, "learning_rate": 2e-05, "loss": 0.04583211, "step": 1301 }, { "epoch": 2.604, "grad_norm": 1.7641148567199707, "learning_rate": 2e-05, "loss": 0.04589385, "step": 1302 }, { "epoch": 2.606, "grad_norm": 2.1896462440490723, "learning_rate": 2e-05, "loss": 0.05265369, "step": 1303 }, { "epoch": 2.608, "grad_norm": 2.137312173843384, "learning_rate": 2e-05, "loss": 0.05405066, "step": 1304 }, { "epoch": 2.61, "grad_norm": 1.550561547279358, "learning_rate": 2e-05, "loss": 0.05104889, "step": 1305 }, { "epoch": 2.612, "grad_norm": 1.8577524423599243, "learning_rate": 2e-05, "loss": 0.06039685, "step": 1306 }, { "epoch": 2.614, "grad_norm": 2.0149245262145996, "learning_rate": 2e-05, "loss": 0.05752065, "step": 1307 }, { "epoch": 2.616, "grad_norm": 1.2771774530410767, "learning_rate": 2e-05, "loss": 0.04627162, "step": 1308 }, { "epoch": 2.618, "grad_norm": 1.2289670705795288, "learning_rate": 2e-05, "loss": 0.04631304, "step": 1309 }, { "epoch": 2.62, "grad_norm": 1.5598198175430298, "learning_rate": 2e-05, "loss": 0.04497156, "step": 1310 }, { "epoch": 2.622, "grad_norm": 2.00862717628479, "learning_rate": 2e-05, "loss": 0.0671872, "step": 1311 }, { "epoch": 2.624, "grad_norm": 1.549716591835022, "learning_rate": 2e-05, "loss": 0.04912385, "step": 1312 }, { "epoch": 2.626, "grad_norm": 1.7805136442184448, "learning_rate": 2e-05, "loss": 0.04953271, "step": 1313 }, { "epoch": 2.628, "grad_norm": 1.4479440450668335, "learning_rate": 2e-05, "loss": 0.03930682, "step": 1314 }, { "epoch": 2.63, "grad_norm": 2.026073932647705, "learning_rate": 2e-05, "loss": 0.0562569, "step": 1315 }, { "epoch": 2.632, "grad_norm": 1.291940450668335, "learning_rate": 2e-05, "loss": 0.05585942, "step": 1316 }, { "epoch": 2.634, "grad_norm": 1.6085025072097778, "learning_rate": 2e-05, "loss": 0.05520656, "step": 1317 }, { "epoch": 2.636, "grad_norm": 1.7852100133895874, "learning_rate": 2e-05, "loss": 0.03830192, "step": 1318 }, { "epoch": 2.638, "grad_norm": 2.1503891944885254, "learning_rate": 2e-05, "loss": 0.05418783, "step": 1319 }, { "epoch": 2.64, "grad_norm": 1.5858957767486572, "learning_rate": 2e-05, "loss": 0.05161075, "step": 1320 }, { "epoch": 2.642, "grad_norm": 1.517919659614563, "learning_rate": 2e-05, "loss": 0.04599277, "step": 1321 }, { "epoch": 2.644, "grad_norm": 2.923825979232788, "learning_rate": 2e-05, "loss": 0.05314268, "step": 1322 }, { "epoch": 2.646, "grad_norm": 1.6294347047805786, "learning_rate": 2e-05, "loss": 0.04354763, "step": 1323 }, { "epoch": 2.648, "grad_norm": 2.0495193004608154, "learning_rate": 2e-05, "loss": 0.05836939, "step": 1324 }, { "epoch": 2.65, "grad_norm": 1.94076669216156, "learning_rate": 2e-05, "loss": 0.04274346, "step": 1325 }, { "epoch": 2.652, "grad_norm": 2.360132932662964, "learning_rate": 2e-05, "loss": 0.0647784, "step": 1326 }, { "epoch": 2.654, "grad_norm": 1.8153984546661377, "learning_rate": 2e-05, "loss": 0.04720091, "step": 1327 }, { "epoch": 2.656, "grad_norm": 1.725906252861023, "learning_rate": 2e-05, "loss": 0.04768829, "step": 1328 }, { "epoch": 2.658, "grad_norm": 3.0785069465637207, "learning_rate": 2e-05, "loss": 0.05972423, "step": 1329 }, { "epoch": 2.66, "grad_norm": 2.830115556716919, "learning_rate": 2e-05, "loss": 0.07905363, "step": 1330 }, { "epoch": 2.662, "grad_norm": 1.4237239360809326, "learning_rate": 2e-05, "loss": 0.05330458, "step": 1331 }, { "epoch": 2.664, "grad_norm": 1.5219199657440186, "learning_rate": 2e-05, "loss": 0.05649319, "step": 1332 }, { "epoch": 2.666, "grad_norm": 1.4314558506011963, "learning_rate": 2e-05, "loss": 0.03354269, "step": 1333 }, { "epoch": 2.668, "grad_norm": 2.2276740074157715, "learning_rate": 2e-05, "loss": 0.07491125, "step": 1334 }, { "epoch": 2.67, "grad_norm": 2.2556209564208984, "learning_rate": 2e-05, "loss": 0.0776452, "step": 1335 }, { "epoch": 2.672, "grad_norm": 1.8496445417404175, "learning_rate": 2e-05, "loss": 0.04727801, "step": 1336 }, { "epoch": 2.674, "grad_norm": 2.769913673400879, "learning_rate": 2e-05, "loss": 0.07160389, "step": 1337 }, { "epoch": 2.676, "grad_norm": 1.8476206064224243, "learning_rate": 2e-05, "loss": 0.04567086, "step": 1338 }, { "epoch": 2.678, "grad_norm": 1.8353848457336426, "learning_rate": 2e-05, "loss": 0.05308025, "step": 1339 }, { "epoch": 2.68, "grad_norm": 1.6821805238723755, "learning_rate": 2e-05, "loss": 0.04728777, "step": 1340 }, { "epoch": 2.682, "grad_norm": 2.2069616317749023, "learning_rate": 2e-05, "loss": 0.05139998, "step": 1341 }, { "epoch": 2.684, "grad_norm": 3.2691590785980225, "learning_rate": 2e-05, "loss": 0.03943311, "step": 1342 }, { "epoch": 2.686, "grad_norm": 1.286525845527649, "learning_rate": 2e-05, "loss": 0.04956998, "step": 1343 }, { "epoch": 2.6879999999999997, "grad_norm": 1.5744187831878662, "learning_rate": 2e-05, "loss": 0.04305596, "step": 1344 }, { "epoch": 2.69, "grad_norm": 2.1982498168945312, "learning_rate": 2e-05, "loss": 0.06531291, "step": 1345 }, { "epoch": 2.692, "grad_norm": 1.5973800420761108, "learning_rate": 2e-05, "loss": 0.06064547, "step": 1346 }, { "epoch": 2.694, "grad_norm": 1.2219719886779785, "learning_rate": 2e-05, "loss": 0.04524098, "step": 1347 }, { "epoch": 2.6959999999999997, "grad_norm": 2.1111559867858887, "learning_rate": 2e-05, "loss": 0.07144919, "step": 1348 }, { "epoch": 2.698, "grad_norm": 1.7122925519943237, "learning_rate": 2e-05, "loss": 0.04555336, "step": 1349 }, { "epoch": 2.7, "grad_norm": 1.607475757598877, "learning_rate": 2e-05, "loss": 0.05808245, "step": 1350 }, { "epoch": 2.702, "grad_norm": 1.6828935146331787, "learning_rate": 2e-05, "loss": 0.05782773, "step": 1351 }, { "epoch": 2.7039999999999997, "grad_norm": 1.7249581813812256, "learning_rate": 2e-05, "loss": 0.0577189, "step": 1352 }, { "epoch": 2.706, "grad_norm": 1.476163387298584, "learning_rate": 2e-05, "loss": 0.0563928, "step": 1353 }, { "epoch": 2.708, "grad_norm": 1.3817315101623535, "learning_rate": 2e-05, "loss": 0.04125504, "step": 1354 }, { "epoch": 2.71, "grad_norm": 1.9194447994232178, "learning_rate": 2e-05, "loss": 0.06330554, "step": 1355 }, { "epoch": 2.7119999999999997, "grad_norm": 1.357925534248352, "learning_rate": 2e-05, "loss": 0.04602174, "step": 1356 }, { "epoch": 2.714, "grad_norm": 2.3565990924835205, "learning_rate": 2e-05, "loss": 0.05163093, "step": 1357 }, { "epoch": 2.716, "grad_norm": 1.6682748794555664, "learning_rate": 2e-05, "loss": 0.03820909, "step": 1358 }, { "epoch": 2.718, "grad_norm": 2.4361672401428223, "learning_rate": 2e-05, "loss": 0.05794044, "step": 1359 }, { "epoch": 2.7199999999999998, "grad_norm": 2.8087456226348877, "learning_rate": 2e-05, "loss": 0.0474041, "step": 1360 }, { "epoch": 2.722, "grad_norm": 2.0830671787261963, "learning_rate": 2e-05, "loss": 0.0617595, "step": 1361 }, { "epoch": 2.724, "grad_norm": 2.5234179496765137, "learning_rate": 2e-05, "loss": 0.07854919, "step": 1362 }, { "epoch": 2.726, "grad_norm": 2.3360183238983154, "learning_rate": 2e-05, "loss": 0.0680959, "step": 1363 }, { "epoch": 2.7279999999999998, "grad_norm": 1.46207594871521, "learning_rate": 2e-05, "loss": 0.05441956, "step": 1364 }, { "epoch": 2.73, "grad_norm": 1.2000892162322998, "learning_rate": 2e-05, "loss": 0.03472488, "step": 1365 }, { "epoch": 2.732, "grad_norm": 1.1089409589767456, "learning_rate": 2e-05, "loss": 0.03588956, "step": 1366 }, { "epoch": 2.734, "grad_norm": 1.484050989151001, "learning_rate": 2e-05, "loss": 0.0420528, "step": 1367 }, { "epoch": 2.7359999999999998, "grad_norm": 2.1856143474578857, "learning_rate": 2e-05, "loss": 0.07063307, "step": 1368 }, { "epoch": 2.738, "grad_norm": 1.3538694381713867, "learning_rate": 2e-05, "loss": 0.04442645, "step": 1369 }, { "epoch": 2.74, "grad_norm": 1.1728174686431885, "learning_rate": 2e-05, "loss": 0.03758407, "step": 1370 }, { "epoch": 2.742, "grad_norm": 1.5417742729187012, "learning_rate": 2e-05, "loss": 0.05347462, "step": 1371 }, { "epoch": 2.7439999999999998, "grad_norm": 1.2521121501922607, "learning_rate": 2e-05, "loss": 0.03596249, "step": 1372 }, { "epoch": 2.746, "grad_norm": 1.7495527267456055, "learning_rate": 2e-05, "loss": 0.05753302, "step": 1373 }, { "epoch": 2.748, "grad_norm": 1.933049201965332, "learning_rate": 2e-05, "loss": 0.04527674, "step": 1374 }, { "epoch": 2.75, "grad_norm": 2.8900275230407715, "learning_rate": 2e-05, "loss": 0.06932513, "step": 1375 }, { "epoch": 2.752, "grad_norm": 1.453918695449829, "learning_rate": 2e-05, "loss": 0.04357269, "step": 1376 }, { "epoch": 2.754, "grad_norm": 1.4563038349151611, "learning_rate": 2e-05, "loss": 0.05056226, "step": 1377 }, { "epoch": 2.7560000000000002, "grad_norm": 1.504270076751709, "learning_rate": 2e-05, "loss": 0.05600676, "step": 1378 }, { "epoch": 2.758, "grad_norm": 1.9855608940124512, "learning_rate": 2e-05, "loss": 0.05795199, "step": 1379 }, { "epoch": 2.76, "grad_norm": 1.656665563583374, "learning_rate": 2e-05, "loss": 0.05140248, "step": 1380 }, { "epoch": 2.762, "grad_norm": 1.7435170412063599, "learning_rate": 2e-05, "loss": 0.04258007, "step": 1381 }, { "epoch": 2.7640000000000002, "grad_norm": 1.655081033706665, "learning_rate": 2e-05, "loss": 0.0503007, "step": 1382 }, { "epoch": 2.766, "grad_norm": 1.523956298828125, "learning_rate": 2e-05, "loss": 0.0550363, "step": 1383 }, { "epoch": 2.768, "grad_norm": 1.4899818897247314, "learning_rate": 2e-05, "loss": 0.04842805, "step": 1384 }, { "epoch": 2.77, "grad_norm": 2.0813798904418945, "learning_rate": 2e-05, "loss": 0.05239079, "step": 1385 }, { "epoch": 2.7720000000000002, "grad_norm": 1.5431971549987793, "learning_rate": 2e-05, "loss": 0.05558814, "step": 1386 }, { "epoch": 2.774, "grad_norm": 1.7558424472808838, "learning_rate": 2e-05, "loss": 0.04031524, "step": 1387 }, { "epoch": 2.776, "grad_norm": 0.9710761308670044, "learning_rate": 2e-05, "loss": 0.02542916, "step": 1388 }, { "epoch": 2.778, "grad_norm": 1.3783509731292725, "learning_rate": 2e-05, "loss": 0.05585878, "step": 1389 }, { "epoch": 2.7800000000000002, "grad_norm": 1.7113022804260254, "learning_rate": 2e-05, "loss": 0.06848748, "step": 1390 }, { "epoch": 2.782, "grad_norm": 1.9026986360549927, "learning_rate": 2e-05, "loss": 0.04627277, "step": 1391 }, { "epoch": 2.784, "grad_norm": 2.1564736366271973, "learning_rate": 2e-05, "loss": 0.04546306, "step": 1392 }, { "epoch": 2.786, "grad_norm": 1.8871279954910278, "learning_rate": 2e-05, "loss": 0.05558582, "step": 1393 }, { "epoch": 2.7880000000000003, "grad_norm": 1.3954287767410278, "learning_rate": 2e-05, "loss": 0.05387101, "step": 1394 }, { "epoch": 2.79, "grad_norm": 1.9269911050796509, "learning_rate": 2e-05, "loss": 0.0572519, "step": 1395 }, { "epoch": 2.792, "grad_norm": 1.7022475004196167, "learning_rate": 2e-05, "loss": 0.04831417, "step": 1396 }, { "epoch": 2.794, "grad_norm": 1.7882295846939087, "learning_rate": 2e-05, "loss": 0.03874616, "step": 1397 }, { "epoch": 2.7960000000000003, "grad_norm": 1.4796777963638306, "learning_rate": 2e-05, "loss": 0.03719701, "step": 1398 }, { "epoch": 2.798, "grad_norm": 1.5420184135437012, "learning_rate": 2e-05, "loss": 0.05557998, "step": 1399 }, { "epoch": 2.8, "grad_norm": 1.8424732685089111, "learning_rate": 2e-05, "loss": 0.05429619, "step": 1400 }, { "epoch": 2.802, "grad_norm": 1.4261318445205688, "learning_rate": 2e-05, "loss": 0.05679541, "step": 1401 }, { "epoch": 2.8040000000000003, "grad_norm": 1.93724524974823, "learning_rate": 2e-05, "loss": 0.04712646, "step": 1402 }, { "epoch": 2.806, "grad_norm": 1.8298678398132324, "learning_rate": 2e-05, "loss": 0.0460495, "step": 1403 }, { "epoch": 2.808, "grad_norm": 2.4823086261749268, "learning_rate": 2e-05, "loss": 0.05720939, "step": 1404 }, { "epoch": 2.81, "grad_norm": 2.05409574508667, "learning_rate": 2e-05, "loss": 0.06287205, "step": 1405 }, { "epoch": 2.8120000000000003, "grad_norm": 1.9300044775009155, "learning_rate": 2e-05, "loss": 0.05220521, "step": 1406 }, { "epoch": 2.814, "grad_norm": 1.6382372379302979, "learning_rate": 2e-05, "loss": 0.04585889, "step": 1407 }, { "epoch": 2.816, "grad_norm": 1.6744272708892822, "learning_rate": 2e-05, "loss": 0.05738734, "step": 1408 }, { "epoch": 2.818, "grad_norm": 1.097986102104187, "learning_rate": 2e-05, "loss": 0.03808478, "step": 1409 }, { "epoch": 2.82, "grad_norm": 1.8194561004638672, "learning_rate": 2e-05, "loss": 0.05379384, "step": 1410 }, { "epoch": 2.822, "grad_norm": 1.1608667373657227, "learning_rate": 2e-05, "loss": 0.04018103, "step": 1411 }, { "epoch": 2.824, "grad_norm": 1.5536198616027832, "learning_rate": 2e-05, "loss": 0.0568902, "step": 1412 }, { "epoch": 2.826, "grad_norm": 1.306771159172058, "learning_rate": 2e-05, "loss": 0.04700731, "step": 1413 }, { "epoch": 2.828, "grad_norm": 1.4206809997558594, "learning_rate": 2e-05, "loss": 0.03569669, "step": 1414 }, { "epoch": 2.83, "grad_norm": 2.00671124458313, "learning_rate": 2e-05, "loss": 0.0654131, "step": 1415 }, { "epoch": 2.832, "grad_norm": 1.3739982843399048, "learning_rate": 2e-05, "loss": 0.05260873, "step": 1416 }, { "epoch": 2.834, "grad_norm": 1.3874986171722412, "learning_rate": 2e-05, "loss": 0.038274, "step": 1417 }, { "epoch": 2.836, "grad_norm": 1.056638479232788, "learning_rate": 2e-05, "loss": 0.03164873, "step": 1418 }, { "epoch": 2.838, "grad_norm": 2.646418809890747, "learning_rate": 2e-05, "loss": 0.0538192, "step": 1419 }, { "epoch": 2.84, "grad_norm": 1.1970181465148926, "learning_rate": 2e-05, "loss": 0.03613362, "step": 1420 }, { "epoch": 2.842, "grad_norm": 1.7380998134613037, "learning_rate": 2e-05, "loss": 0.05502573, "step": 1421 }, { "epoch": 2.844, "grad_norm": 1.64398193359375, "learning_rate": 2e-05, "loss": 0.03875776, "step": 1422 }, { "epoch": 2.846, "grad_norm": 2.1740400791168213, "learning_rate": 2e-05, "loss": 0.05134468, "step": 1423 }, { "epoch": 2.848, "grad_norm": 2.8908863067626953, "learning_rate": 2e-05, "loss": 0.08046682, "step": 1424 }, { "epoch": 2.85, "grad_norm": 1.89336359500885, "learning_rate": 2e-05, "loss": 0.05308978, "step": 1425 }, { "epoch": 2.852, "grad_norm": 2.1659324169158936, "learning_rate": 2e-05, "loss": 0.0364816, "step": 1426 }, { "epoch": 2.854, "grad_norm": 1.402146339416504, "learning_rate": 2e-05, "loss": 0.04808577, "step": 1427 }, { "epoch": 2.856, "grad_norm": 1.9512687921524048, "learning_rate": 2e-05, "loss": 0.05041477, "step": 1428 }, { "epoch": 2.858, "grad_norm": 1.3872478008270264, "learning_rate": 2e-05, "loss": 0.04761492, "step": 1429 }, { "epoch": 2.86, "grad_norm": 1.659584403038025, "learning_rate": 2e-05, "loss": 0.06967552, "step": 1430 }, { "epoch": 2.862, "grad_norm": 1.8425308465957642, "learning_rate": 2e-05, "loss": 0.04844275, "step": 1431 }, { "epoch": 2.864, "grad_norm": 1.3185906410217285, "learning_rate": 2e-05, "loss": 0.04250592, "step": 1432 }, { "epoch": 2.866, "grad_norm": 1.1444284915924072, "learning_rate": 2e-05, "loss": 0.03224308, "step": 1433 }, { "epoch": 2.868, "grad_norm": 1.4065706729888916, "learning_rate": 2e-05, "loss": 0.05243749, "step": 1434 }, { "epoch": 2.87, "grad_norm": 2.1064388751983643, "learning_rate": 2e-05, "loss": 0.02927379, "step": 1435 }, { "epoch": 2.872, "grad_norm": 1.6056631803512573, "learning_rate": 2e-05, "loss": 0.03909209, "step": 1436 }, { "epoch": 2.874, "grad_norm": 1.3546314239501953, "learning_rate": 2e-05, "loss": 0.0485357, "step": 1437 }, { "epoch": 2.876, "grad_norm": 1.5769225358963013, "learning_rate": 2e-05, "loss": 0.05911487, "step": 1438 }, { "epoch": 2.878, "grad_norm": 1.589120864868164, "learning_rate": 2e-05, "loss": 0.04771549, "step": 1439 }, { "epoch": 2.88, "grad_norm": 2.1113979816436768, "learning_rate": 2e-05, "loss": 0.07277437, "step": 1440 }, { "epoch": 2.882, "grad_norm": 2.021388530731201, "learning_rate": 2e-05, "loss": 0.04649474, "step": 1441 }, { "epoch": 2.884, "grad_norm": 1.8449045419692993, "learning_rate": 2e-05, "loss": 0.04096931, "step": 1442 }, { "epoch": 2.886, "grad_norm": 2.4654793739318848, "learning_rate": 2e-05, "loss": 0.06707292, "step": 1443 }, { "epoch": 2.888, "grad_norm": 1.8038156032562256, "learning_rate": 2e-05, "loss": 0.05822627, "step": 1444 }, { "epoch": 2.89, "grad_norm": 1.6336687803268433, "learning_rate": 2e-05, "loss": 0.04999952, "step": 1445 }, { "epoch": 2.892, "grad_norm": 1.4008294343948364, "learning_rate": 2e-05, "loss": 0.04774325, "step": 1446 }, { "epoch": 2.894, "grad_norm": 1.8084869384765625, "learning_rate": 2e-05, "loss": 0.07069649, "step": 1447 }, { "epoch": 2.896, "grad_norm": 3.3638651371002197, "learning_rate": 2e-05, "loss": 0.09050111, "step": 1448 }, { "epoch": 2.898, "grad_norm": 1.1725375652313232, "learning_rate": 2e-05, "loss": 0.04985755, "step": 1449 }, { "epoch": 2.9, "grad_norm": 1.0440391302108765, "learning_rate": 2e-05, "loss": 0.05144562, "step": 1450 }, { "epoch": 2.902, "grad_norm": 2.280170440673828, "learning_rate": 2e-05, "loss": 0.04932537, "step": 1451 }, { "epoch": 2.904, "grad_norm": 1.461732268333435, "learning_rate": 2e-05, "loss": 0.04785193, "step": 1452 }, { "epoch": 2.906, "grad_norm": 1.9254556894302368, "learning_rate": 2e-05, "loss": 0.05030049, "step": 1453 }, { "epoch": 2.908, "grad_norm": 1.182039737701416, "learning_rate": 2e-05, "loss": 0.03006881, "step": 1454 }, { "epoch": 2.91, "grad_norm": 2.5947253704071045, "learning_rate": 2e-05, "loss": 0.04492339, "step": 1455 }, { "epoch": 2.912, "grad_norm": 1.543723225593567, "learning_rate": 2e-05, "loss": 0.05002248, "step": 1456 }, { "epoch": 2.914, "grad_norm": 1.1024924516677856, "learning_rate": 2e-05, "loss": 0.041133, "step": 1457 }, { "epoch": 2.916, "grad_norm": 1.7310285568237305, "learning_rate": 2e-05, "loss": 0.05967389, "step": 1458 }, { "epoch": 2.918, "grad_norm": 2.281721830368042, "learning_rate": 2e-05, "loss": 0.06781383, "step": 1459 }, { "epoch": 2.92, "grad_norm": 1.6971452236175537, "learning_rate": 2e-05, "loss": 0.05646857, "step": 1460 }, { "epoch": 2.922, "grad_norm": 1.741514801979065, "learning_rate": 2e-05, "loss": 0.07292978, "step": 1461 }, { "epoch": 2.924, "grad_norm": 1.4831644296646118, "learning_rate": 2e-05, "loss": 0.03878976, "step": 1462 }, { "epoch": 2.926, "grad_norm": 1.676571011543274, "learning_rate": 2e-05, "loss": 0.05792604, "step": 1463 }, { "epoch": 2.928, "grad_norm": 1.8864375352859497, "learning_rate": 2e-05, "loss": 0.06544764, "step": 1464 }, { "epoch": 2.93, "grad_norm": 2.198659658432007, "learning_rate": 2e-05, "loss": 0.06713847, "step": 1465 }, { "epoch": 2.932, "grad_norm": 1.498012900352478, "learning_rate": 2e-05, "loss": 0.03821286, "step": 1466 }, { "epoch": 2.934, "grad_norm": 1.4330593347549438, "learning_rate": 2e-05, "loss": 0.06282357, "step": 1467 }, { "epoch": 2.936, "grad_norm": 1.3292213678359985, "learning_rate": 2e-05, "loss": 0.06121788, "step": 1468 }, { "epoch": 2.9379999999999997, "grad_norm": 1.5773099660873413, "learning_rate": 2e-05, "loss": 0.03123096, "step": 1469 }, { "epoch": 2.94, "grad_norm": 1.3884388208389282, "learning_rate": 2e-05, "loss": 0.04133505, "step": 1470 }, { "epoch": 2.942, "grad_norm": 1.1826488971710205, "learning_rate": 2e-05, "loss": 0.03776342, "step": 1471 }, { "epoch": 2.944, "grad_norm": 1.5211957693099976, "learning_rate": 2e-05, "loss": 0.04243449, "step": 1472 }, { "epoch": 2.9459999999999997, "grad_norm": 2.148106336593628, "learning_rate": 2e-05, "loss": 0.06043831, "step": 1473 }, { "epoch": 2.948, "grad_norm": 1.1765345335006714, "learning_rate": 2e-05, "loss": 0.04386732, "step": 1474 }, { "epoch": 2.95, "grad_norm": 1.502057433128357, "learning_rate": 2e-05, "loss": 0.0411534, "step": 1475 }, { "epoch": 2.952, "grad_norm": 1.6430046558380127, "learning_rate": 2e-05, "loss": 0.037657, "step": 1476 }, { "epoch": 2.9539999999999997, "grad_norm": 1.535294771194458, "learning_rate": 2e-05, "loss": 0.05130748, "step": 1477 }, { "epoch": 2.956, "grad_norm": 1.9392225742340088, "learning_rate": 2e-05, "loss": 0.05215064, "step": 1478 }, { "epoch": 2.958, "grad_norm": 1.1696999073028564, "learning_rate": 2e-05, "loss": 0.02946492, "step": 1479 }, { "epoch": 2.96, "grad_norm": 1.414081335067749, "learning_rate": 2e-05, "loss": 0.04422535, "step": 1480 }, { "epoch": 2.9619999999999997, "grad_norm": 1.719420075416565, "learning_rate": 2e-05, "loss": 0.05473565, "step": 1481 }, { "epoch": 2.964, "grad_norm": 2.0351169109344482, "learning_rate": 2e-05, "loss": 0.04414459, "step": 1482 }, { "epoch": 2.966, "grad_norm": 1.012802004814148, "learning_rate": 2e-05, "loss": 0.03308199, "step": 1483 }, { "epoch": 2.968, "grad_norm": 2.026688814163208, "learning_rate": 2e-05, "loss": 0.04603078, "step": 1484 }, { "epoch": 2.9699999999999998, "grad_norm": 2.1403253078460693, "learning_rate": 2e-05, "loss": 0.04239823, "step": 1485 }, { "epoch": 2.972, "grad_norm": 2.087897539138794, "learning_rate": 2e-05, "loss": 0.05140383, "step": 1486 }, { "epoch": 2.974, "grad_norm": 3.251877546310425, "learning_rate": 2e-05, "loss": 0.05295721, "step": 1487 }, { "epoch": 2.976, "grad_norm": 2.2770659923553467, "learning_rate": 2e-05, "loss": 0.06836365, "step": 1488 }, { "epoch": 2.9779999999999998, "grad_norm": 1.4098083972930908, "learning_rate": 2e-05, "loss": 0.03099969, "step": 1489 }, { "epoch": 2.98, "grad_norm": 1.4418541193008423, "learning_rate": 2e-05, "loss": 0.0420429, "step": 1490 }, { "epoch": 2.982, "grad_norm": 1.7272592782974243, "learning_rate": 2e-05, "loss": 0.05751514, "step": 1491 }, { "epoch": 2.984, "grad_norm": 1.1021864414215088, "learning_rate": 2e-05, "loss": 0.03865705, "step": 1492 }, { "epoch": 2.9859999999999998, "grad_norm": 1.4109399318695068, "learning_rate": 2e-05, "loss": 0.05035442, "step": 1493 }, { "epoch": 2.988, "grad_norm": 1.5310124158859253, "learning_rate": 2e-05, "loss": 0.04429567, "step": 1494 }, { "epoch": 2.99, "grad_norm": 1.1835428476333618, "learning_rate": 2e-05, "loss": 0.04300354, "step": 1495 }, { "epoch": 2.992, "grad_norm": 1.6832672357559204, "learning_rate": 2e-05, "loss": 0.06267562, "step": 1496 }, { "epoch": 2.9939999999999998, "grad_norm": 1.7960203886032104, "learning_rate": 2e-05, "loss": 0.04719928, "step": 1497 }, { "epoch": 2.996, "grad_norm": 1.2733722925186157, "learning_rate": 2e-05, "loss": 0.04313634, "step": 1498 }, { "epoch": 2.998, "grad_norm": 2.289060592651367, "learning_rate": 2e-05, "loss": 0.04968451, "step": 1499 }, { "epoch": 3.0, "grad_norm": 1.4316084384918213, "learning_rate": 2e-05, "loss": 0.04869864, "step": 1500 }, { "epoch": 3.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 0.52, "AngleClassification_3": 0.49101796407185627, "Equal_1": 0.97, "Equal_2": 0.8323353293413174, "Equal_3": 0.720558882235529, "LineComparison_1": 0.992, "LineComparison_2": 0.9740518962075848, "LineComparison_3": 0.8582834331337326, "Parallel_1": 0.9458917835671342, "Parallel_2": 0.9438877755511023, "Parallel_3": 0.744, "Perpendicular_1": 0.934, "Perpendicular_2": 0.41, "Perpendicular_3": 0.19939879759519039, "PointLiesOnCircle_1": 0.9859719438877755, "PointLiesOnCircle_2": 0.9889333333333334, "PointLiesOnCircle_3": 0.7892666666666667, "PointLiesOnLine_1": 0.9799599198396793, "PointLiesOnLine_2": 0.9238476953907816, "PointLiesOnLine_3": 0.49101796407185627 }, "eval_runtime": 321.7153, "eval_samples_per_second": 32.638, "eval_steps_per_second": 0.653, "step": 1500 }, { "epoch": 3.002, "grad_norm": 2.2160494327545166, "learning_rate": 2e-05, "loss": 0.04842204, "step": 1501 }, { "epoch": 3.004, "grad_norm": 2.940279006958008, "learning_rate": 2e-05, "loss": 0.06389765, "step": 1502 }, { "epoch": 3.006, "grad_norm": 1.4351305961608887, "learning_rate": 2e-05, "loss": 0.03972311, "step": 1503 }, { "epoch": 3.008, "grad_norm": 1.8173421621322632, "learning_rate": 2e-05, "loss": 0.06714956, "step": 1504 }, { "epoch": 3.01, "grad_norm": 1.6168220043182373, "learning_rate": 2e-05, "loss": 0.05728213, "step": 1505 }, { "epoch": 3.012, "grad_norm": 2.713430166244507, "learning_rate": 2e-05, "loss": 0.08517258, "step": 1506 }, { "epoch": 3.014, "grad_norm": 1.6535557508468628, "learning_rate": 2e-05, "loss": 0.05091209, "step": 1507 }, { "epoch": 3.016, "grad_norm": 1.5731172561645508, "learning_rate": 2e-05, "loss": 0.04936452, "step": 1508 }, { "epoch": 3.018, "grad_norm": 2.121694326400757, "learning_rate": 2e-05, "loss": 0.05214013, "step": 1509 }, { "epoch": 3.02, "grad_norm": 1.7850323915481567, "learning_rate": 2e-05, "loss": 0.04803599, "step": 1510 }, { "epoch": 3.022, "grad_norm": 1.0655592679977417, "learning_rate": 2e-05, "loss": 0.04061773, "step": 1511 }, { "epoch": 3.024, "grad_norm": 1.7327584028244019, "learning_rate": 2e-05, "loss": 0.05965272, "step": 1512 }, { "epoch": 3.026, "grad_norm": 1.4035505056381226, "learning_rate": 2e-05, "loss": 0.04205831, "step": 1513 }, { "epoch": 3.028, "grad_norm": 1.2730896472930908, "learning_rate": 2e-05, "loss": 0.04025223, "step": 1514 }, { "epoch": 3.03, "grad_norm": 1.6347522735595703, "learning_rate": 2e-05, "loss": 0.06551866, "step": 1515 }, { "epoch": 3.032, "grad_norm": 1.24717116355896, "learning_rate": 2e-05, "loss": 0.04352552, "step": 1516 }, { "epoch": 3.034, "grad_norm": 2.378849744796753, "learning_rate": 2e-05, "loss": 0.05578585, "step": 1517 }, { "epoch": 3.036, "grad_norm": 1.5153645277023315, "learning_rate": 2e-05, "loss": 0.06007598, "step": 1518 }, { "epoch": 3.038, "grad_norm": 1.4317395687103271, "learning_rate": 2e-05, "loss": 0.04686758, "step": 1519 }, { "epoch": 3.04, "grad_norm": 1.150730013847351, "learning_rate": 2e-05, "loss": 0.03575693, "step": 1520 }, { "epoch": 3.042, "grad_norm": 1.221483588218689, "learning_rate": 2e-05, "loss": 0.03753293, "step": 1521 }, { "epoch": 3.044, "grad_norm": 1.595436453819275, "learning_rate": 2e-05, "loss": 0.04876781, "step": 1522 }, { "epoch": 3.046, "grad_norm": 1.5910844802856445, "learning_rate": 2e-05, "loss": 0.04243572, "step": 1523 }, { "epoch": 3.048, "grad_norm": 1.333764672279358, "learning_rate": 2e-05, "loss": 0.04689807, "step": 1524 }, { "epoch": 3.05, "grad_norm": 1.244879961013794, "learning_rate": 2e-05, "loss": 0.05209486, "step": 1525 }, { "epoch": 3.052, "grad_norm": 2.523118734359741, "learning_rate": 2e-05, "loss": 0.05441052, "step": 1526 }, { "epoch": 3.054, "grad_norm": 1.1830792427062988, "learning_rate": 2e-05, "loss": 0.03513453, "step": 1527 }, { "epoch": 3.056, "grad_norm": 1.4500566720962524, "learning_rate": 2e-05, "loss": 0.03788603, "step": 1528 }, { "epoch": 3.058, "grad_norm": 1.9590797424316406, "learning_rate": 2e-05, "loss": 0.07833979, "step": 1529 }, { "epoch": 3.06, "grad_norm": 2.1240177154541016, "learning_rate": 2e-05, "loss": 0.06781566, "step": 1530 }, { "epoch": 3.062, "grad_norm": 2.877790689468384, "learning_rate": 2e-05, "loss": 0.05031575, "step": 1531 }, { "epoch": 3.064, "grad_norm": 1.827868938446045, "learning_rate": 2e-05, "loss": 0.05153899, "step": 1532 }, { "epoch": 3.066, "grad_norm": 1.4657886028289795, "learning_rate": 2e-05, "loss": 0.05448882, "step": 1533 }, { "epoch": 3.068, "grad_norm": 2.052285671234131, "learning_rate": 2e-05, "loss": 0.06243248, "step": 1534 }, { "epoch": 3.07, "grad_norm": 0.7502951622009277, "learning_rate": 2e-05, "loss": 0.02465053, "step": 1535 }, { "epoch": 3.072, "grad_norm": 1.1672446727752686, "learning_rate": 2e-05, "loss": 0.03131329, "step": 1536 }, { "epoch": 3.074, "grad_norm": 6.168121337890625, "learning_rate": 2e-05, "loss": 0.05717981, "step": 1537 }, { "epoch": 3.076, "grad_norm": 1.6621938943862915, "learning_rate": 2e-05, "loss": 0.04681021, "step": 1538 }, { "epoch": 3.078, "grad_norm": 1.9949926137924194, "learning_rate": 2e-05, "loss": 0.05344428, "step": 1539 }, { "epoch": 3.08, "grad_norm": 1.7297910451889038, "learning_rate": 2e-05, "loss": 0.06000186, "step": 1540 }, { "epoch": 3.082, "grad_norm": 1.9841177463531494, "learning_rate": 2e-05, "loss": 0.05356345, "step": 1541 }, { "epoch": 3.084, "grad_norm": 1.619390606880188, "learning_rate": 2e-05, "loss": 0.05409247, "step": 1542 }, { "epoch": 3.086, "grad_norm": 2.220137357711792, "learning_rate": 2e-05, "loss": 0.06488518, "step": 1543 }, { "epoch": 3.088, "grad_norm": 1.2586517333984375, "learning_rate": 2e-05, "loss": 0.0334148, "step": 1544 }, { "epoch": 3.09, "grad_norm": 2.6507513523101807, "learning_rate": 2e-05, "loss": 0.05396133, "step": 1545 }, { "epoch": 3.092, "grad_norm": 2.484825372695923, "learning_rate": 2e-05, "loss": 0.05893756, "step": 1546 }, { "epoch": 3.094, "grad_norm": 1.530316710472107, "learning_rate": 2e-05, "loss": 0.04786216, "step": 1547 }, { "epoch": 3.096, "grad_norm": 1.5662510395050049, "learning_rate": 2e-05, "loss": 0.05811158, "step": 1548 }, { "epoch": 3.098, "grad_norm": 1.7030950784683228, "learning_rate": 2e-05, "loss": 0.06066509, "step": 1549 }, { "epoch": 3.1, "grad_norm": 1.5851967334747314, "learning_rate": 2e-05, "loss": 0.06345069, "step": 1550 }, { "epoch": 3.102, "grad_norm": 2.7735044956207275, "learning_rate": 2e-05, "loss": 0.0733242, "step": 1551 }, { "epoch": 3.104, "grad_norm": 1.4052716493606567, "learning_rate": 2e-05, "loss": 0.03768426, "step": 1552 }, { "epoch": 3.106, "grad_norm": 2.072314977645874, "learning_rate": 2e-05, "loss": 0.06333002, "step": 1553 }, { "epoch": 3.108, "grad_norm": 1.3217315673828125, "learning_rate": 2e-05, "loss": 0.04815859, "step": 1554 }, { "epoch": 3.11, "grad_norm": 1.2543447017669678, "learning_rate": 2e-05, "loss": 0.04736142, "step": 1555 }, { "epoch": 3.112, "grad_norm": 1.8522039651870728, "learning_rate": 2e-05, "loss": 0.04406268, "step": 1556 }, { "epoch": 3.114, "grad_norm": 1.7204567193984985, "learning_rate": 2e-05, "loss": 0.04844861, "step": 1557 }, { "epoch": 3.116, "grad_norm": 1.9749001264572144, "learning_rate": 2e-05, "loss": 0.05746056, "step": 1558 }, { "epoch": 3.118, "grad_norm": 1.9995803833007812, "learning_rate": 2e-05, "loss": 0.05737771, "step": 1559 }, { "epoch": 3.12, "grad_norm": 2.1211681365966797, "learning_rate": 2e-05, "loss": 0.04288347, "step": 1560 }, { "epoch": 3.122, "grad_norm": 1.4146476984024048, "learning_rate": 2e-05, "loss": 0.05071546, "step": 1561 }, { "epoch": 3.124, "grad_norm": 1.660503625869751, "learning_rate": 2e-05, "loss": 0.05359786, "step": 1562 }, { "epoch": 3.126, "grad_norm": 2.073803186416626, "learning_rate": 2e-05, "loss": 0.0541298, "step": 1563 }, { "epoch": 3.128, "grad_norm": 2.1681854724884033, "learning_rate": 2e-05, "loss": 0.03845184, "step": 1564 }, { "epoch": 3.13, "grad_norm": 1.6317102909088135, "learning_rate": 2e-05, "loss": 0.04459539, "step": 1565 }, { "epoch": 3.132, "grad_norm": 1.2800451517105103, "learning_rate": 2e-05, "loss": 0.05038659, "step": 1566 }, { "epoch": 3.134, "grad_norm": 1.627054214477539, "learning_rate": 2e-05, "loss": 0.05846292, "step": 1567 }, { "epoch": 3.136, "grad_norm": 1.5014915466308594, "learning_rate": 2e-05, "loss": 0.0461411, "step": 1568 }, { "epoch": 3.138, "grad_norm": 1.4676257371902466, "learning_rate": 2e-05, "loss": 0.04794473, "step": 1569 }, { "epoch": 3.14, "grad_norm": 0.8870722055435181, "learning_rate": 2e-05, "loss": 0.0262022, "step": 1570 }, { "epoch": 3.142, "grad_norm": 1.5116006135940552, "learning_rate": 2e-05, "loss": 0.04896776, "step": 1571 }, { "epoch": 3.144, "grad_norm": 2.2803430557250977, "learning_rate": 2e-05, "loss": 0.03939671, "step": 1572 }, { "epoch": 3.146, "grad_norm": 1.6452845335006714, "learning_rate": 2e-05, "loss": 0.06173242, "step": 1573 }, { "epoch": 3.148, "grad_norm": 1.3423179388046265, "learning_rate": 2e-05, "loss": 0.03313474, "step": 1574 }, { "epoch": 3.15, "grad_norm": 1.491223692893982, "learning_rate": 2e-05, "loss": 0.04076514, "step": 1575 }, { "epoch": 3.152, "grad_norm": 1.7405002117156982, "learning_rate": 2e-05, "loss": 0.06292631, "step": 1576 }, { "epoch": 3.154, "grad_norm": 2.301616907119751, "learning_rate": 2e-05, "loss": 0.04174115, "step": 1577 }, { "epoch": 3.156, "grad_norm": 1.4608789682388306, "learning_rate": 2e-05, "loss": 0.03542221, "step": 1578 }, { "epoch": 3.158, "grad_norm": 1.6150435209274292, "learning_rate": 2e-05, "loss": 0.0463053, "step": 1579 }, { "epoch": 3.16, "grad_norm": 1.156436562538147, "learning_rate": 2e-05, "loss": 0.03821928, "step": 1580 }, { "epoch": 3.162, "grad_norm": 1.483613133430481, "learning_rate": 2e-05, "loss": 0.06052083, "step": 1581 }, { "epoch": 3.164, "grad_norm": 3.3407390117645264, "learning_rate": 2e-05, "loss": 0.05487797, "step": 1582 }, { "epoch": 3.166, "grad_norm": 1.3785268068313599, "learning_rate": 2e-05, "loss": 0.04208247, "step": 1583 }, { "epoch": 3.168, "grad_norm": 2.0314083099365234, "learning_rate": 2e-05, "loss": 0.03377452, "step": 1584 }, { "epoch": 3.17, "grad_norm": 2.2946577072143555, "learning_rate": 2e-05, "loss": 0.03864856, "step": 1585 }, { "epoch": 3.172, "grad_norm": 1.5391055345535278, "learning_rate": 2e-05, "loss": 0.04968462, "step": 1586 }, { "epoch": 3.174, "grad_norm": 2.3214282989501953, "learning_rate": 2e-05, "loss": 0.0490016, "step": 1587 }, { "epoch": 3.176, "grad_norm": 2.729959487915039, "learning_rate": 2e-05, "loss": 0.06228638, "step": 1588 }, { "epoch": 3.178, "grad_norm": 1.3159915208816528, "learning_rate": 2e-05, "loss": 0.04129592, "step": 1589 }, { "epoch": 3.18, "grad_norm": 2.102053642272949, "learning_rate": 2e-05, "loss": 0.06420384, "step": 1590 }, { "epoch": 3.182, "grad_norm": 1.6806936264038086, "learning_rate": 2e-05, "loss": 0.04690688, "step": 1591 }, { "epoch": 3.184, "grad_norm": 1.5850759744644165, "learning_rate": 2e-05, "loss": 0.05270191, "step": 1592 }, { "epoch": 3.186, "grad_norm": 1.6031419038772583, "learning_rate": 2e-05, "loss": 0.04544835, "step": 1593 }, { "epoch": 3.188, "grad_norm": 1.542733907699585, "learning_rate": 2e-05, "loss": 0.05052012, "step": 1594 }, { "epoch": 3.19, "grad_norm": 1.3358275890350342, "learning_rate": 2e-05, "loss": 0.04404087, "step": 1595 }, { "epoch": 3.192, "grad_norm": 3.1650984287261963, "learning_rate": 2e-05, "loss": 0.0582697, "step": 1596 }, { "epoch": 3.194, "grad_norm": 2.426821231842041, "learning_rate": 2e-05, "loss": 0.06109107, "step": 1597 }, { "epoch": 3.196, "grad_norm": 1.221462607383728, "learning_rate": 2e-05, "loss": 0.03468029, "step": 1598 }, { "epoch": 3.198, "grad_norm": 1.832042932510376, "learning_rate": 2e-05, "loss": 0.04503892, "step": 1599 }, { "epoch": 3.2, "grad_norm": 1.6637980937957764, "learning_rate": 2e-05, "loss": 0.05590947, "step": 1600 }, { "epoch": 3.202, "grad_norm": 1.707131266593933, "learning_rate": 2e-05, "loss": 0.04391931, "step": 1601 }, { "epoch": 3.204, "grad_norm": 1.2049839496612549, "learning_rate": 2e-05, "loss": 0.03543879, "step": 1602 }, { "epoch": 3.206, "grad_norm": 1.2202122211456299, "learning_rate": 2e-05, "loss": 0.0452034, "step": 1603 }, { "epoch": 3.208, "grad_norm": 1.4739149808883667, "learning_rate": 2e-05, "loss": 0.05101965, "step": 1604 }, { "epoch": 3.21, "grad_norm": 1.7410937547683716, "learning_rate": 2e-05, "loss": 0.05315409, "step": 1605 }, { "epoch": 3.212, "grad_norm": 1.943514108657837, "learning_rate": 2e-05, "loss": 0.05091185, "step": 1606 }, { "epoch": 3.214, "grad_norm": 2.420563220977783, "learning_rate": 2e-05, "loss": 0.04633863, "step": 1607 }, { "epoch": 3.216, "grad_norm": 1.5087945461273193, "learning_rate": 2e-05, "loss": 0.0458011, "step": 1608 }, { "epoch": 3.218, "grad_norm": 1.2308595180511475, "learning_rate": 2e-05, "loss": 0.04334211, "step": 1609 }, { "epoch": 3.22, "grad_norm": 1.7495505809783936, "learning_rate": 2e-05, "loss": 0.05809302, "step": 1610 }, { "epoch": 3.222, "grad_norm": 1.3433486223220825, "learning_rate": 2e-05, "loss": 0.0264309, "step": 1611 }, { "epoch": 3.224, "grad_norm": 2.1560001373291016, "learning_rate": 2e-05, "loss": 0.05610461, "step": 1612 }, { "epoch": 3.226, "grad_norm": 1.4553375244140625, "learning_rate": 2e-05, "loss": 0.06240574, "step": 1613 }, { "epoch": 3.228, "grad_norm": 1.6326318979263306, "learning_rate": 2e-05, "loss": 0.04741699, "step": 1614 }, { "epoch": 3.23, "grad_norm": 2.0145437717437744, "learning_rate": 2e-05, "loss": 0.06130683, "step": 1615 }, { "epoch": 3.232, "grad_norm": 2.395770311355591, "learning_rate": 2e-05, "loss": 0.06174326, "step": 1616 }, { "epoch": 3.234, "grad_norm": 1.4244645833969116, "learning_rate": 2e-05, "loss": 0.04702844, "step": 1617 }, { "epoch": 3.2359999999999998, "grad_norm": 1.611910104751587, "learning_rate": 2e-05, "loss": 0.05492343, "step": 1618 }, { "epoch": 3.238, "grad_norm": 1.3452534675598145, "learning_rate": 2e-05, "loss": 0.04002935, "step": 1619 }, { "epoch": 3.24, "grad_norm": 1.4256281852722168, "learning_rate": 2e-05, "loss": 0.06138282, "step": 1620 }, { "epoch": 3.242, "grad_norm": 2.094010353088379, "learning_rate": 2e-05, "loss": 0.05636217, "step": 1621 }, { "epoch": 3.2439999999999998, "grad_norm": 1.8228808641433716, "learning_rate": 2e-05, "loss": 0.05640348, "step": 1622 }, { "epoch": 3.246, "grad_norm": 0.991633415222168, "learning_rate": 2e-05, "loss": 0.03676727, "step": 1623 }, { "epoch": 3.248, "grad_norm": 1.5511411428451538, "learning_rate": 2e-05, "loss": 0.03195164, "step": 1624 }, { "epoch": 3.25, "grad_norm": 1.3886339664459229, "learning_rate": 2e-05, "loss": 0.06085005, "step": 1625 }, { "epoch": 3.252, "grad_norm": 1.6809405088424683, "learning_rate": 2e-05, "loss": 0.06114616, "step": 1626 }, { "epoch": 3.254, "grad_norm": 1.2058086395263672, "learning_rate": 2e-05, "loss": 0.03140325, "step": 1627 }, { "epoch": 3.2560000000000002, "grad_norm": 1.2712700366973877, "learning_rate": 2e-05, "loss": 0.05181075, "step": 1628 }, { "epoch": 3.258, "grad_norm": 1.6511715650558472, "learning_rate": 2e-05, "loss": 0.03143429, "step": 1629 }, { "epoch": 3.26, "grad_norm": 1.5783659219741821, "learning_rate": 2e-05, "loss": 0.04343471, "step": 1630 }, { "epoch": 3.262, "grad_norm": 0.9034122824668884, "learning_rate": 2e-05, "loss": 0.02218392, "step": 1631 }, { "epoch": 3.2640000000000002, "grad_norm": 1.5698509216308594, "learning_rate": 2e-05, "loss": 0.05552916, "step": 1632 }, { "epoch": 3.266, "grad_norm": 1.4005275964736938, "learning_rate": 2e-05, "loss": 0.03984913, "step": 1633 }, { "epoch": 3.268, "grad_norm": 1.3338640928268433, "learning_rate": 2e-05, "loss": 0.03644662, "step": 1634 }, { "epoch": 3.27, "grad_norm": 1.7735025882720947, "learning_rate": 2e-05, "loss": 0.03694466, "step": 1635 }, { "epoch": 3.2720000000000002, "grad_norm": 2.160663604736328, "learning_rate": 2e-05, "loss": 0.05129207, "step": 1636 }, { "epoch": 3.274, "grad_norm": 2.0547683238983154, "learning_rate": 2e-05, "loss": 0.03293342, "step": 1637 }, { "epoch": 3.276, "grad_norm": 1.309456467628479, "learning_rate": 2e-05, "loss": 0.04493797, "step": 1638 }, { "epoch": 3.278, "grad_norm": 2.039910316467285, "learning_rate": 2e-05, "loss": 0.06677075, "step": 1639 }, { "epoch": 3.2800000000000002, "grad_norm": 2.196495771408081, "learning_rate": 2e-05, "loss": 0.05029328, "step": 1640 }, { "epoch": 3.282, "grad_norm": 2.049243927001953, "learning_rate": 2e-05, "loss": 0.05796874, "step": 1641 }, { "epoch": 3.284, "grad_norm": 2.323168992996216, "learning_rate": 2e-05, "loss": 0.04754086, "step": 1642 }, { "epoch": 3.286, "grad_norm": 1.9049650430679321, "learning_rate": 2e-05, "loss": 0.05514568, "step": 1643 }, { "epoch": 3.288, "grad_norm": 1.898848295211792, "learning_rate": 2e-05, "loss": 0.03569252, "step": 1644 }, { "epoch": 3.29, "grad_norm": 2.345487356185913, "learning_rate": 2e-05, "loss": 0.0490138, "step": 1645 }, { "epoch": 3.292, "grad_norm": 2.001127004623413, "learning_rate": 2e-05, "loss": 0.05215742, "step": 1646 }, { "epoch": 3.294, "grad_norm": 1.4057964086532593, "learning_rate": 2e-05, "loss": 0.03418482, "step": 1647 }, { "epoch": 3.296, "grad_norm": 1.412276029586792, "learning_rate": 2e-05, "loss": 0.03291162, "step": 1648 }, { "epoch": 3.298, "grad_norm": 1.5926165580749512, "learning_rate": 2e-05, "loss": 0.04363141, "step": 1649 }, { "epoch": 3.3, "grad_norm": 1.9674831628799438, "learning_rate": 2e-05, "loss": 0.06002123, "step": 1650 }, { "epoch": 3.302, "grad_norm": 1.581549048423767, "learning_rate": 2e-05, "loss": 0.07400438, "step": 1651 }, { "epoch": 3.304, "grad_norm": 1.1746406555175781, "learning_rate": 2e-05, "loss": 0.03233917, "step": 1652 }, { "epoch": 3.306, "grad_norm": 1.3594640493392944, "learning_rate": 2e-05, "loss": 0.06445151, "step": 1653 }, { "epoch": 3.308, "grad_norm": 1.3005573749542236, "learning_rate": 2e-05, "loss": 0.03622051, "step": 1654 }, { "epoch": 3.31, "grad_norm": 0.8649536371231079, "learning_rate": 2e-05, "loss": 0.03161732, "step": 1655 }, { "epoch": 3.312, "grad_norm": 1.1120051145553589, "learning_rate": 2e-05, "loss": 0.03670903, "step": 1656 }, { "epoch": 3.314, "grad_norm": 2.0635063648223877, "learning_rate": 2e-05, "loss": 0.03296005, "step": 1657 }, { "epoch": 3.316, "grad_norm": 1.942264437675476, "learning_rate": 2e-05, "loss": 0.04086999, "step": 1658 }, { "epoch": 3.318, "grad_norm": 1.2889881134033203, "learning_rate": 2e-05, "loss": 0.03653413, "step": 1659 }, { "epoch": 3.32, "grad_norm": 1.5247739553451538, "learning_rate": 2e-05, "loss": 0.05022556, "step": 1660 }, { "epoch": 3.322, "grad_norm": 1.208240270614624, "learning_rate": 2e-05, "loss": 0.04346113, "step": 1661 }, { "epoch": 3.324, "grad_norm": 1.5146716833114624, "learning_rate": 2e-05, "loss": 0.04634988, "step": 1662 }, { "epoch": 3.326, "grad_norm": 1.9538823366165161, "learning_rate": 2e-05, "loss": 0.04254812, "step": 1663 }, { "epoch": 3.328, "grad_norm": 2.021803379058838, "learning_rate": 2e-05, "loss": 0.05505613, "step": 1664 }, { "epoch": 3.33, "grad_norm": 1.4671275615692139, "learning_rate": 2e-05, "loss": 0.04185315, "step": 1665 }, { "epoch": 3.332, "grad_norm": 1.3730452060699463, "learning_rate": 2e-05, "loss": 0.04696137, "step": 1666 }, { "epoch": 3.334, "grad_norm": 1.9206724166870117, "learning_rate": 2e-05, "loss": 0.05052917, "step": 1667 }, { "epoch": 3.336, "grad_norm": 1.4539846181869507, "learning_rate": 2e-05, "loss": 0.04785593, "step": 1668 }, { "epoch": 3.338, "grad_norm": 1.1836252212524414, "learning_rate": 2e-05, "loss": 0.03441792, "step": 1669 }, { "epoch": 3.34, "grad_norm": 1.3856287002563477, "learning_rate": 2e-05, "loss": 0.03965211, "step": 1670 }, { "epoch": 3.342, "grad_norm": 1.7527414560317993, "learning_rate": 2e-05, "loss": 0.05044956, "step": 1671 }, { "epoch": 3.344, "grad_norm": 1.460963249206543, "learning_rate": 2e-05, "loss": 0.0464677, "step": 1672 }, { "epoch": 3.346, "grad_norm": 2.3444771766662598, "learning_rate": 2e-05, "loss": 0.05217274, "step": 1673 }, { "epoch": 3.348, "grad_norm": 1.5559501647949219, "learning_rate": 2e-05, "loss": 0.04852623, "step": 1674 }, { "epoch": 3.35, "grad_norm": 1.9685094356536865, "learning_rate": 2e-05, "loss": 0.07367245, "step": 1675 }, { "epoch": 3.352, "grad_norm": 1.3340469598770142, "learning_rate": 2e-05, "loss": 0.03031318, "step": 1676 }, { "epoch": 3.354, "grad_norm": 1.5422812700271606, "learning_rate": 2e-05, "loss": 0.04720259, "step": 1677 }, { "epoch": 3.356, "grad_norm": 1.6444870233535767, "learning_rate": 2e-05, "loss": 0.05606821, "step": 1678 }, { "epoch": 3.358, "grad_norm": 2.2023096084594727, "learning_rate": 2e-05, "loss": 0.06454101, "step": 1679 }, { "epoch": 3.36, "grad_norm": 1.5785565376281738, "learning_rate": 2e-05, "loss": 0.04989371, "step": 1680 }, { "epoch": 3.362, "grad_norm": 1.6977547407150269, "learning_rate": 2e-05, "loss": 0.06437075, "step": 1681 }, { "epoch": 3.364, "grad_norm": 2.207683801651001, "learning_rate": 2e-05, "loss": 0.05562439, "step": 1682 }, { "epoch": 3.366, "grad_norm": 1.5128238201141357, "learning_rate": 2e-05, "loss": 0.05253971, "step": 1683 }, { "epoch": 3.368, "grad_norm": 1.5503803491592407, "learning_rate": 2e-05, "loss": 0.03294653, "step": 1684 }, { "epoch": 3.37, "grad_norm": 2.744611978530884, "learning_rate": 2e-05, "loss": 0.0665991, "step": 1685 }, { "epoch": 3.372, "grad_norm": 1.9426320791244507, "learning_rate": 2e-05, "loss": 0.03777114, "step": 1686 }, { "epoch": 3.374, "grad_norm": 1.7858504056930542, "learning_rate": 2e-05, "loss": 0.06296871, "step": 1687 }, { "epoch": 3.376, "grad_norm": 1.6751807928085327, "learning_rate": 2e-05, "loss": 0.0362752, "step": 1688 }, { "epoch": 3.378, "grad_norm": 1.8033543825149536, "learning_rate": 2e-05, "loss": 0.05158364, "step": 1689 }, { "epoch": 3.38, "grad_norm": 1.0224000215530396, "learning_rate": 2e-05, "loss": 0.03424178, "step": 1690 }, { "epoch": 3.382, "grad_norm": 1.2834762334823608, "learning_rate": 2e-05, "loss": 0.04011747, "step": 1691 }, { "epoch": 3.384, "grad_norm": 1.573851466178894, "learning_rate": 2e-05, "loss": 0.04472176, "step": 1692 }, { "epoch": 3.386, "grad_norm": 1.0216726064682007, "learning_rate": 2e-05, "loss": 0.03067708, "step": 1693 }, { "epoch": 3.388, "grad_norm": 1.6406019926071167, "learning_rate": 2e-05, "loss": 0.06715198, "step": 1694 }, { "epoch": 3.39, "grad_norm": 1.9565666913986206, "learning_rate": 2e-05, "loss": 0.04441306, "step": 1695 }, { "epoch": 3.392, "grad_norm": 1.312374234199524, "learning_rate": 2e-05, "loss": 0.04021595, "step": 1696 }, { "epoch": 3.394, "grad_norm": 1.5732117891311646, "learning_rate": 2e-05, "loss": 0.05825553, "step": 1697 }, { "epoch": 3.396, "grad_norm": 1.7435842752456665, "learning_rate": 2e-05, "loss": 0.03737545, "step": 1698 }, { "epoch": 3.398, "grad_norm": 1.3936766386032104, "learning_rate": 2e-05, "loss": 0.04649488, "step": 1699 }, { "epoch": 3.4, "grad_norm": 1.222420573234558, "learning_rate": 2e-05, "loss": 0.04838939, "step": 1700 }, { "epoch": 3.402, "grad_norm": 1.7847157716751099, "learning_rate": 2e-05, "loss": 0.04798005, "step": 1701 }, { "epoch": 3.404, "grad_norm": 1.8903214931488037, "learning_rate": 2e-05, "loss": 0.04042132, "step": 1702 }, { "epoch": 3.406, "grad_norm": 1.9824771881103516, "learning_rate": 2e-05, "loss": 0.04405872, "step": 1703 }, { "epoch": 3.408, "grad_norm": 2.978909730911255, "learning_rate": 2e-05, "loss": 0.06235117, "step": 1704 }, { "epoch": 3.41, "grad_norm": 2.1281321048736572, "learning_rate": 2e-05, "loss": 0.0631174, "step": 1705 }, { "epoch": 3.412, "grad_norm": 1.207214117050171, "learning_rate": 2e-05, "loss": 0.04360072, "step": 1706 }, { "epoch": 3.414, "grad_norm": 2.4614744186401367, "learning_rate": 2e-05, "loss": 0.05545459, "step": 1707 }, { "epoch": 3.416, "grad_norm": 2.2295455932617188, "learning_rate": 2e-05, "loss": 0.05138841, "step": 1708 }, { "epoch": 3.418, "grad_norm": 1.6426289081573486, "learning_rate": 2e-05, "loss": 0.03976878, "step": 1709 }, { "epoch": 3.42, "grad_norm": 2.6790168285369873, "learning_rate": 2e-05, "loss": 0.06980205, "step": 1710 }, { "epoch": 3.422, "grad_norm": 1.8094228506088257, "learning_rate": 2e-05, "loss": 0.05368996, "step": 1711 }, { "epoch": 3.424, "grad_norm": 2.0319266319274902, "learning_rate": 2e-05, "loss": 0.0543413, "step": 1712 }, { "epoch": 3.426, "grad_norm": 1.3606593608856201, "learning_rate": 2e-05, "loss": 0.03617115, "step": 1713 }, { "epoch": 3.428, "grad_norm": 4.077547550201416, "learning_rate": 2e-05, "loss": 0.0491549, "step": 1714 }, { "epoch": 3.43, "grad_norm": 2.273555040359497, "learning_rate": 2e-05, "loss": 0.06512973, "step": 1715 }, { "epoch": 3.432, "grad_norm": 1.533727765083313, "learning_rate": 2e-05, "loss": 0.05353583, "step": 1716 }, { "epoch": 3.434, "grad_norm": 1.5889472961425781, "learning_rate": 2e-05, "loss": 0.05391718, "step": 1717 }, { "epoch": 3.436, "grad_norm": 1.513043761253357, "learning_rate": 2e-05, "loss": 0.05352363, "step": 1718 }, { "epoch": 3.438, "grad_norm": 1.708634853363037, "learning_rate": 2e-05, "loss": 0.042217, "step": 1719 }, { "epoch": 3.44, "grad_norm": 1.5580065250396729, "learning_rate": 2e-05, "loss": 0.05138306, "step": 1720 }, { "epoch": 3.442, "grad_norm": 1.3362562656402588, "learning_rate": 2e-05, "loss": 0.04820231, "step": 1721 }, { "epoch": 3.444, "grad_norm": 1.8119944334030151, "learning_rate": 2e-05, "loss": 0.03760799, "step": 1722 }, { "epoch": 3.446, "grad_norm": 1.7421449422836304, "learning_rate": 2e-05, "loss": 0.04001708, "step": 1723 }, { "epoch": 3.448, "grad_norm": 1.6809686422348022, "learning_rate": 2e-05, "loss": 0.05122007, "step": 1724 }, { "epoch": 3.45, "grad_norm": 3.9546453952789307, "learning_rate": 2e-05, "loss": 0.04566801, "step": 1725 }, { "epoch": 3.452, "grad_norm": 1.462601661682129, "learning_rate": 2e-05, "loss": 0.04439667, "step": 1726 }, { "epoch": 3.454, "grad_norm": 2.2390847206115723, "learning_rate": 2e-05, "loss": 0.03910905, "step": 1727 }, { "epoch": 3.456, "grad_norm": 1.7394695281982422, "learning_rate": 2e-05, "loss": 0.05194352, "step": 1728 }, { "epoch": 3.458, "grad_norm": 2.4958033561706543, "learning_rate": 2e-05, "loss": 0.05337561, "step": 1729 }, { "epoch": 3.46, "grad_norm": 2.3025898933410645, "learning_rate": 2e-05, "loss": 0.04093119, "step": 1730 }, { "epoch": 3.462, "grad_norm": 1.6952155828475952, "learning_rate": 2e-05, "loss": 0.0470874, "step": 1731 }, { "epoch": 3.464, "grad_norm": 1.3717252016067505, "learning_rate": 2e-05, "loss": 0.02857566, "step": 1732 }, { "epoch": 3.466, "grad_norm": 2.161407232284546, "learning_rate": 2e-05, "loss": 0.06686375, "step": 1733 }, { "epoch": 3.468, "grad_norm": 1.595219373703003, "learning_rate": 2e-05, "loss": 0.05054267, "step": 1734 }, { "epoch": 3.4699999999999998, "grad_norm": 1.613440752029419, "learning_rate": 2e-05, "loss": 0.02639636, "step": 1735 }, { "epoch": 3.472, "grad_norm": 2.263852596282959, "learning_rate": 2e-05, "loss": 0.05445278, "step": 1736 }, { "epoch": 3.474, "grad_norm": 2.566851854324341, "learning_rate": 2e-05, "loss": 0.05587868, "step": 1737 }, { "epoch": 3.476, "grad_norm": 2.8093175888061523, "learning_rate": 2e-05, "loss": 0.05327387, "step": 1738 }, { "epoch": 3.4779999999999998, "grad_norm": 1.6908395290374756, "learning_rate": 2e-05, "loss": 0.04662281, "step": 1739 }, { "epoch": 3.48, "grad_norm": 3.4398069381713867, "learning_rate": 2e-05, "loss": 0.06926405, "step": 1740 }, { "epoch": 3.482, "grad_norm": 1.7976182699203491, "learning_rate": 2e-05, "loss": 0.05067788, "step": 1741 }, { "epoch": 3.484, "grad_norm": 1.8646904230117798, "learning_rate": 2e-05, "loss": 0.04594427, "step": 1742 }, { "epoch": 3.4859999999999998, "grad_norm": 1.8577203750610352, "learning_rate": 2e-05, "loss": 0.05025358, "step": 1743 }, { "epoch": 3.488, "grad_norm": 1.3906282186508179, "learning_rate": 2e-05, "loss": 0.04262529, "step": 1744 }, { "epoch": 3.49, "grad_norm": 1.8370797634124756, "learning_rate": 2e-05, "loss": 0.05269311, "step": 1745 }, { "epoch": 3.492, "grad_norm": 2.2525057792663574, "learning_rate": 2e-05, "loss": 0.04886598, "step": 1746 }, { "epoch": 3.4939999999999998, "grad_norm": 1.8896589279174805, "learning_rate": 2e-05, "loss": 0.05545288, "step": 1747 }, { "epoch": 3.496, "grad_norm": 1.3138065338134766, "learning_rate": 2e-05, "loss": 0.04557213, "step": 1748 }, { "epoch": 3.498, "grad_norm": 2.4323270320892334, "learning_rate": 2e-05, "loss": 0.05506615, "step": 1749 }, { "epoch": 3.5, "grad_norm": 1.2671258449554443, "learning_rate": 2e-05, "loss": 0.03962918, "step": 1750 }, { "epoch": 3.502, "grad_norm": 1.2083110809326172, "learning_rate": 2e-05, "loss": 0.04052875, "step": 1751 }, { "epoch": 3.504, "grad_norm": 2.204509735107422, "learning_rate": 2e-05, "loss": 0.05543607, "step": 1752 }, { "epoch": 3.5060000000000002, "grad_norm": 2.0916733741760254, "learning_rate": 2e-05, "loss": 0.05030297, "step": 1753 }, { "epoch": 3.508, "grad_norm": 1.2749862670898438, "learning_rate": 2e-05, "loss": 0.05265682, "step": 1754 }, { "epoch": 3.51, "grad_norm": 1.2697100639343262, "learning_rate": 2e-05, "loss": 0.04578009, "step": 1755 }, { "epoch": 3.512, "grad_norm": 2.575673818588257, "learning_rate": 2e-05, "loss": 0.05128983, "step": 1756 }, { "epoch": 3.5140000000000002, "grad_norm": 1.548751711845398, "learning_rate": 2e-05, "loss": 0.04093613, "step": 1757 }, { "epoch": 3.516, "grad_norm": 1.305117130279541, "learning_rate": 2e-05, "loss": 0.04004899, "step": 1758 }, { "epoch": 3.518, "grad_norm": 1.4659463167190552, "learning_rate": 2e-05, "loss": 0.04068562, "step": 1759 }, { "epoch": 3.52, "grad_norm": 2.3224215507507324, "learning_rate": 2e-05, "loss": 0.03831296, "step": 1760 }, { "epoch": 3.5220000000000002, "grad_norm": 1.6149941682815552, "learning_rate": 2e-05, "loss": 0.0297474, "step": 1761 }, { "epoch": 3.524, "grad_norm": 1.7875992059707642, "learning_rate": 2e-05, "loss": 0.04237871, "step": 1762 }, { "epoch": 3.526, "grad_norm": 1.6973800659179688, "learning_rate": 2e-05, "loss": 0.0459117, "step": 1763 }, { "epoch": 3.528, "grad_norm": 2.5080761909484863, "learning_rate": 2e-05, "loss": 0.05171615, "step": 1764 }, { "epoch": 3.5300000000000002, "grad_norm": 1.5514436960220337, "learning_rate": 2e-05, "loss": 0.03801546, "step": 1765 }, { "epoch": 3.532, "grad_norm": 1.5856919288635254, "learning_rate": 2e-05, "loss": 0.03361356, "step": 1766 }, { "epoch": 3.534, "grad_norm": 1.5322203636169434, "learning_rate": 2e-05, "loss": 0.04025748, "step": 1767 }, { "epoch": 3.536, "grad_norm": 2.3193933963775635, "learning_rate": 2e-05, "loss": 0.06852245, "step": 1768 }, { "epoch": 3.5380000000000003, "grad_norm": 2.101360559463501, "learning_rate": 2e-05, "loss": 0.06704597, "step": 1769 }, { "epoch": 3.54, "grad_norm": 2.1348562240600586, "learning_rate": 2e-05, "loss": 0.03175708, "step": 1770 }, { "epoch": 3.542, "grad_norm": 1.613877296447754, "learning_rate": 2e-05, "loss": 0.05038162, "step": 1771 }, { "epoch": 3.544, "grad_norm": 1.5137550830841064, "learning_rate": 2e-05, "loss": 0.04188209, "step": 1772 }, { "epoch": 3.5460000000000003, "grad_norm": 1.4603201150894165, "learning_rate": 2e-05, "loss": 0.03608978, "step": 1773 }, { "epoch": 3.548, "grad_norm": 1.6376720666885376, "learning_rate": 2e-05, "loss": 0.05980166, "step": 1774 }, { "epoch": 3.55, "grad_norm": 2.329392671585083, "learning_rate": 2e-05, "loss": 0.04924781, "step": 1775 }, { "epoch": 3.552, "grad_norm": 2.361161470413208, "learning_rate": 2e-05, "loss": 0.05286585, "step": 1776 }, { "epoch": 3.5540000000000003, "grad_norm": 1.530290126800537, "learning_rate": 2e-05, "loss": 0.03231184, "step": 1777 }, { "epoch": 3.556, "grad_norm": 1.1076228618621826, "learning_rate": 2e-05, "loss": 0.02891509, "step": 1778 }, { "epoch": 3.558, "grad_norm": 1.950429916381836, "learning_rate": 2e-05, "loss": 0.06025671, "step": 1779 }, { "epoch": 3.56, "grad_norm": 1.4713891744613647, "learning_rate": 2e-05, "loss": 0.04263363, "step": 1780 }, { "epoch": 3.5620000000000003, "grad_norm": 1.9598925113677979, "learning_rate": 2e-05, "loss": 0.0472781, "step": 1781 }, { "epoch": 3.564, "grad_norm": 1.9756190776824951, "learning_rate": 2e-05, "loss": 0.05379218, "step": 1782 }, { "epoch": 3.566, "grad_norm": 1.9871381521224976, "learning_rate": 2e-05, "loss": 0.04629632, "step": 1783 }, { "epoch": 3.568, "grad_norm": 2.374814510345459, "learning_rate": 2e-05, "loss": 0.05010781, "step": 1784 }, { "epoch": 3.57, "grad_norm": 1.6160145998001099, "learning_rate": 2e-05, "loss": 0.04810391, "step": 1785 }, { "epoch": 3.572, "grad_norm": 1.333565354347229, "learning_rate": 2e-05, "loss": 0.04096656, "step": 1786 }, { "epoch": 3.574, "grad_norm": 1.2630254030227661, "learning_rate": 2e-05, "loss": 0.04580157, "step": 1787 }, { "epoch": 3.576, "grad_norm": 2.068432569503784, "learning_rate": 2e-05, "loss": 0.06533036, "step": 1788 }, { "epoch": 3.578, "grad_norm": 1.422972321510315, "learning_rate": 2e-05, "loss": 0.04293206, "step": 1789 }, { "epoch": 3.58, "grad_norm": 1.5883013010025024, "learning_rate": 2e-05, "loss": 0.03554819, "step": 1790 }, { "epoch": 3.582, "grad_norm": 1.2352162599563599, "learning_rate": 2e-05, "loss": 0.03722744, "step": 1791 }, { "epoch": 3.584, "grad_norm": 1.440484642982483, "learning_rate": 2e-05, "loss": 0.03654459, "step": 1792 }, { "epoch": 3.586, "grad_norm": 2.2191922664642334, "learning_rate": 2e-05, "loss": 0.05253022, "step": 1793 }, { "epoch": 3.588, "grad_norm": 1.7797856330871582, "learning_rate": 2e-05, "loss": 0.04316117, "step": 1794 }, { "epoch": 3.59, "grad_norm": 1.476678729057312, "learning_rate": 2e-05, "loss": 0.04034651, "step": 1795 }, { "epoch": 3.592, "grad_norm": 1.2704788446426392, "learning_rate": 2e-05, "loss": 0.03967745, "step": 1796 }, { "epoch": 3.594, "grad_norm": 2.31673002243042, "learning_rate": 2e-05, "loss": 0.0395114, "step": 1797 }, { "epoch": 3.596, "grad_norm": 1.2823116779327393, "learning_rate": 2e-05, "loss": 0.04466548, "step": 1798 }, { "epoch": 3.598, "grad_norm": 1.1849486827850342, "learning_rate": 2e-05, "loss": 0.04506912, "step": 1799 }, { "epoch": 3.6, "grad_norm": 1.558552622795105, "learning_rate": 2e-05, "loss": 0.05311072, "step": 1800 }, { "epoch": 3.602, "grad_norm": 1.5442121028900146, "learning_rate": 2e-05, "loss": 0.0526191, "step": 1801 }, { "epoch": 3.604, "grad_norm": 1.8791892528533936, "learning_rate": 2e-05, "loss": 0.06307591, "step": 1802 }, { "epoch": 3.606, "grad_norm": 1.6081328392028809, "learning_rate": 2e-05, "loss": 0.03651448, "step": 1803 }, { "epoch": 3.608, "grad_norm": 1.4104983806610107, "learning_rate": 2e-05, "loss": 0.04336455, "step": 1804 }, { "epoch": 3.61, "grad_norm": 2.381561756134033, "learning_rate": 2e-05, "loss": 0.05898841, "step": 1805 }, { "epoch": 3.612, "grad_norm": 1.3324648141860962, "learning_rate": 2e-05, "loss": 0.03454057, "step": 1806 }, { "epoch": 3.614, "grad_norm": 1.5297226905822754, "learning_rate": 2e-05, "loss": 0.05038363, "step": 1807 }, { "epoch": 3.616, "grad_norm": 1.5506995916366577, "learning_rate": 2e-05, "loss": 0.05092872, "step": 1808 }, { "epoch": 3.618, "grad_norm": 1.181553602218628, "learning_rate": 2e-05, "loss": 0.03173397, "step": 1809 }, { "epoch": 3.62, "grad_norm": 1.6461181640625, "learning_rate": 2e-05, "loss": 0.05175463, "step": 1810 }, { "epoch": 3.622, "grad_norm": 1.9184272289276123, "learning_rate": 2e-05, "loss": 0.06124686, "step": 1811 }, { "epoch": 3.624, "grad_norm": 1.6065723896026611, "learning_rate": 2e-05, "loss": 0.04484671, "step": 1812 }, { "epoch": 3.626, "grad_norm": 1.443337082862854, "learning_rate": 2e-05, "loss": 0.04267926, "step": 1813 }, { "epoch": 3.628, "grad_norm": 1.178184151649475, "learning_rate": 2e-05, "loss": 0.02920187, "step": 1814 }, { "epoch": 3.63, "grad_norm": 1.146735429763794, "learning_rate": 2e-05, "loss": 0.02786646, "step": 1815 }, { "epoch": 3.632, "grad_norm": 1.1989175081253052, "learning_rate": 2e-05, "loss": 0.03249002, "step": 1816 }, { "epoch": 3.634, "grad_norm": 1.540666103363037, "learning_rate": 2e-05, "loss": 0.03965627, "step": 1817 }, { "epoch": 3.636, "grad_norm": 1.4439377784729004, "learning_rate": 2e-05, "loss": 0.04156953, "step": 1818 }, { "epoch": 3.638, "grad_norm": 1.2657922506332397, "learning_rate": 2e-05, "loss": 0.03509516, "step": 1819 }, { "epoch": 3.64, "grad_norm": 2.9393463134765625, "learning_rate": 2e-05, "loss": 0.04102836, "step": 1820 }, { "epoch": 3.642, "grad_norm": 1.263395071029663, "learning_rate": 2e-05, "loss": 0.03908949, "step": 1821 }, { "epoch": 3.644, "grad_norm": 1.1321371793746948, "learning_rate": 2e-05, "loss": 0.03896394, "step": 1822 }, { "epoch": 3.646, "grad_norm": 1.1515556573867798, "learning_rate": 2e-05, "loss": 0.0317642, "step": 1823 }, { "epoch": 3.648, "grad_norm": 1.4203664064407349, "learning_rate": 2e-05, "loss": 0.05269869, "step": 1824 }, { "epoch": 3.65, "grad_norm": 1.6567198038101196, "learning_rate": 2e-05, "loss": 0.03793368, "step": 1825 }, { "epoch": 3.652, "grad_norm": 2.2994821071624756, "learning_rate": 2e-05, "loss": 0.0429872, "step": 1826 }, { "epoch": 3.654, "grad_norm": 1.6607061624526978, "learning_rate": 2e-05, "loss": 0.03379823, "step": 1827 }, { "epoch": 3.656, "grad_norm": 1.7107027769088745, "learning_rate": 2e-05, "loss": 0.04675277, "step": 1828 }, { "epoch": 3.658, "grad_norm": 1.8303852081298828, "learning_rate": 2e-05, "loss": 0.04841805, "step": 1829 }, { "epoch": 3.66, "grad_norm": 1.8825572729110718, "learning_rate": 2e-05, "loss": 0.04874373, "step": 1830 }, { "epoch": 3.662, "grad_norm": 1.7029788494110107, "learning_rate": 2e-05, "loss": 0.05050893, "step": 1831 }, { "epoch": 3.664, "grad_norm": 1.1050704717636108, "learning_rate": 2e-05, "loss": 0.03281447, "step": 1832 }, { "epoch": 3.666, "grad_norm": 1.4161999225616455, "learning_rate": 2e-05, "loss": 0.0544338, "step": 1833 }, { "epoch": 3.668, "grad_norm": 1.964059829711914, "learning_rate": 2e-05, "loss": 0.03872099, "step": 1834 }, { "epoch": 3.67, "grad_norm": 2.016484022140503, "learning_rate": 2e-05, "loss": 0.05875476, "step": 1835 }, { "epoch": 3.672, "grad_norm": 1.473386287689209, "learning_rate": 2e-05, "loss": 0.03022585, "step": 1836 }, { "epoch": 3.674, "grad_norm": 1.348415493965149, "learning_rate": 2e-05, "loss": 0.04690105, "step": 1837 }, { "epoch": 3.676, "grad_norm": 0.9426001310348511, "learning_rate": 2e-05, "loss": 0.02384089, "step": 1838 }, { "epoch": 3.678, "grad_norm": 3.1298086643218994, "learning_rate": 2e-05, "loss": 0.06468902, "step": 1839 }, { "epoch": 3.68, "grad_norm": 1.0675461292266846, "learning_rate": 2e-05, "loss": 0.03523882, "step": 1840 }, { "epoch": 3.682, "grad_norm": 0.8646126985549927, "learning_rate": 2e-05, "loss": 0.01838133, "step": 1841 }, { "epoch": 3.684, "grad_norm": 1.141176700592041, "learning_rate": 2e-05, "loss": 0.03865944, "step": 1842 }, { "epoch": 3.686, "grad_norm": 2.4700798988342285, "learning_rate": 2e-05, "loss": 0.04170835, "step": 1843 }, { "epoch": 3.6879999999999997, "grad_norm": 1.2326807975769043, "learning_rate": 2e-05, "loss": 0.03476126, "step": 1844 }, { "epoch": 3.69, "grad_norm": 1.714464545249939, "learning_rate": 2e-05, "loss": 0.03549024, "step": 1845 }, { "epoch": 3.692, "grad_norm": 1.038811206817627, "learning_rate": 2e-05, "loss": 0.02916805, "step": 1846 }, { "epoch": 3.694, "grad_norm": 2.0106890201568604, "learning_rate": 2e-05, "loss": 0.04282944, "step": 1847 }, { "epoch": 3.6959999999999997, "grad_norm": 1.4869582653045654, "learning_rate": 2e-05, "loss": 0.0293393, "step": 1848 }, { "epoch": 3.698, "grad_norm": 1.699388861656189, "learning_rate": 2e-05, "loss": 0.03086464, "step": 1849 }, { "epoch": 3.7, "grad_norm": 1.3598302602767944, "learning_rate": 2e-05, "loss": 0.03127223, "step": 1850 }, { "epoch": 3.702, "grad_norm": 1.3136847019195557, "learning_rate": 2e-05, "loss": 0.03167934, "step": 1851 }, { "epoch": 3.7039999999999997, "grad_norm": 1.9143792390823364, "learning_rate": 2e-05, "loss": 0.03092597, "step": 1852 }, { "epoch": 3.706, "grad_norm": 1.9676464796066284, "learning_rate": 2e-05, "loss": 0.04451537, "step": 1853 }, { "epoch": 3.708, "grad_norm": 0.9787660241127014, "learning_rate": 2e-05, "loss": 0.02394712, "step": 1854 }, { "epoch": 3.71, "grad_norm": 2.257038116455078, "learning_rate": 2e-05, "loss": 0.04062683, "step": 1855 }, { "epoch": 3.7119999999999997, "grad_norm": 1.5607311725616455, "learning_rate": 2e-05, "loss": 0.02945242, "step": 1856 }, { "epoch": 3.714, "grad_norm": 1.4506772756576538, "learning_rate": 2e-05, "loss": 0.04430741, "step": 1857 }, { "epoch": 3.716, "grad_norm": 2.7266697883605957, "learning_rate": 2e-05, "loss": 0.03880604, "step": 1858 }, { "epoch": 3.718, "grad_norm": 1.774509072303772, "learning_rate": 2e-05, "loss": 0.04546557, "step": 1859 }, { "epoch": 3.7199999999999998, "grad_norm": 2.044501543045044, "learning_rate": 2e-05, "loss": 0.05731916, "step": 1860 }, { "epoch": 3.722, "grad_norm": 1.1676855087280273, "learning_rate": 2e-05, "loss": 0.03595718, "step": 1861 }, { "epoch": 3.724, "grad_norm": 1.1329604387283325, "learning_rate": 2e-05, "loss": 0.03459671, "step": 1862 }, { "epoch": 3.726, "grad_norm": 1.5749149322509766, "learning_rate": 2e-05, "loss": 0.03534314, "step": 1863 }, { "epoch": 3.7279999999999998, "grad_norm": 2.428170680999756, "learning_rate": 2e-05, "loss": 0.08179246, "step": 1864 }, { "epoch": 3.73, "grad_norm": 1.7956899404525757, "learning_rate": 2e-05, "loss": 0.06140782, "step": 1865 }, { "epoch": 3.732, "grad_norm": 1.7499301433563232, "learning_rate": 2e-05, "loss": 0.04886729, "step": 1866 }, { "epoch": 3.734, "grad_norm": 1.4074490070343018, "learning_rate": 2e-05, "loss": 0.04775254, "step": 1867 }, { "epoch": 3.7359999999999998, "grad_norm": 1.8201322555541992, "learning_rate": 2e-05, "loss": 0.04804891, "step": 1868 }, { "epoch": 3.738, "grad_norm": 1.9848655462265015, "learning_rate": 2e-05, "loss": 0.05379272, "step": 1869 }, { "epoch": 3.74, "grad_norm": 1.2629104852676392, "learning_rate": 2e-05, "loss": 0.03760821, "step": 1870 }, { "epoch": 3.742, "grad_norm": 1.1420437097549438, "learning_rate": 2e-05, "loss": 0.02983224, "step": 1871 }, { "epoch": 3.7439999999999998, "grad_norm": 1.4850512742996216, "learning_rate": 2e-05, "loss": 0.04077002, "step": 1872 }, { "epoch": 3.746, "grad_norm": 1.3481850624084473, "learning_rate": 2e-05, "loss": 0.04353747, "step": 1873 }, { "epoch": 3.748, "grad_norm": 1.613179326057434, "learning_rate": 2e-05, "loss": 0.04657446, "step": 1874 }, { "epoch": 3.75, "grad_norm": 1.436995029449463, "learning_rate": 2e-05, "loss": 0.04443494, "step": 1875 }, { "epoch": 3.752, "grad_norm": 2.0734074115753174, "learning_rate": 2e-05, "loss": 0.05218915, "step": 1876 }, { "epoch": 3.754, "grad_norm": 1.7502717971801758, "learning_rate": 2e-05, "loss": 0.0336446, "step": 1877 }, { "epoch": 3.7560000000000002, "grad_norm": 1.8787667751312256, "learning_rate": 2e-05, "loss": 0.04104609, "step": 1878 }, { "epoch": 3.758, "grad_norm": 2.121859550476074, "learning_rate": 2e-05, "loss": 0.04827151, "step": 1879 }, { "epoch": 3.76, "grad_norm": 2.4621684551239014, "learning_rate": 2e-05, "loss": 0.04723193, "step": 1880 }, { "epoch": 3.762, "grad_norm": 2.063389778137207, "learning_rate": 2e-05, "loss": 0.04516068, "step": 1881 }, { "epoch": 3.7640000000000002, "grad_norm": 1.6342523097991943, "learning_rate": 2e-05, "loss": 0.05464017, "step": 1882 }, { "epoch": 3.766, "grad_norm": 1.649129867553711, "learning_rate": 2e-05, "loss": 0.06015805, "step": 1883 }, { "epoch": 3.768, "grad_norm": 1.4318526983261108, "learning_rate": 2e-05, "loss": 0.030782, "step": 1884 }, { "epoch": 3.77, "grad_norm": 1.4619206190109253, "learning_rate": 2e-05, "loss": 0.04468732, "step": 1885 }, { "epoch": 3.7720000000000002, "grad_norm": 2.3498082160949707, "learning_rate": 2e-05, "loss": 0.03591853, "step": 1886 }, { "epoch": 3.774, "grad_norm": 2.933544397354126, "learning_rate": 2e-05, "loss": 0.04159092, "step": 1887 }, { "epoch": 3.776, "grad_norm": 3.0242841243743896, "learning_rate": 2e-05, "loss": 0.07077288, "step": 1888 }, { "epoch": 3.778, "grad_norm": 0.9896936416625977, "learning_rate": 2e-05, "loss": 0.0333928, "step": 1889 }, { "epoch": 3.7800000000000002, "grad_norm": 2.85379958152771, "learning_rate": 2e-05, "loss": 0.04417305, "step": 1890 }, { "epoch": 3.782, "grad_norm": 1.0257110595703125, "learning_rate": 2e-05, "loss": 0.03142691, "step": 1891 }, { "epoch": 3.784, "grad_norm": 1.1791033744812012, "learning_rate": 2e-05, "loss": 0.03500428, "step": 1892 }, { "epoch": 3.786, "grad_norm": 1.6889530420303345, "learning_rate": 2e-05, "loss": 0.05240297, "step": 1893 }, { "epoch": 3.7880000000000003, "grad_norm": 1.7314518690109253, "learning_rate": 2e-05, "loss": 0.04307431, "step": 1894 }, { "epoch": 3.79, "grad_norm": 1.1842002868652344, "learning_rate": 2e-05, "loss": 0.04247685, "step": 1895 }, { "epoch": 3.792, "grad_norm": 2.100853681564331, "learning_rate": 2e-05, "loss": 0.05021702, "step": 1896 }, { "epoch": 3.794, "grad_norm": 1.5216481685638428, "learning_rate": 2e-05, "loss": 0.03309655, "step": 1897 }, { "epoch": 3.7960000000000003, "grad_norm": 2.276388168334961, "learning_rate": 2e-05, "loss": 0.05299261, "step": 1898 }, { "epoch": 3.798, "grad_norm": 2.0013959407806396, "learning_rate": 2e-05, "loss": 0.04387742, "step": 1899 }, { "epoch": 3.8, "grad_norm": 1.5426726341247559, "learning_rate": 2e-05, "loss": 0.04475781, "step": 1900 }, { "epoch": 3.802, "grad_norm": 1.730944037437439, "learning_rate": 2e-05, "loss": 0.04797804, "step": 1901 }, { "epoch": 3.8040000000000003, "grad_norm": 3.512629747390747, "learning_rate": 2e-05, "loss": 0.06400887, "step": 1902 }, { "epoch": 3.806, "grad_norm": 1.5255076885223389, "learning_rate": 2e-05, "loss": 0.03512551, "step": 1903 }, { "epoch": 3.808, "grad_norm": 1.6803914308547974, "learning_rate": 2e-05, "loss": 0.04707751, "step": 1904 }, { "epoch": 3.81, "grad_norm": 1.6464742422103882, "learning_rate": 2e-05, "loss": 0.04364287, "step": 1905 }, { "epoch": 3.8120000000000003, "grad_norm": 1.5913269519805908, "learning_rate": 2e-05, "loss": 0.05464795, "step": 1906 }, { "epoch": 3.814, "grad_norm": 1.8374617099761963, "learning_rate": 2e-05, "loss": 0.04795897, "step": 1907 }, { "epoch": 3.816, "grad_norm": 2.12955641746521, "learning_rate": 2e-05, "loss": 0.06177934, "step": 1908 }, { "epoch": 3.818, "grad_norm": 1.9309619665145874, "learning_rate": 2e-05, "loss": 0.02248725, "step": 1909 }, { "epoch": 3.82, "grad_norm": 1.7270091772079468, "learning_rate": 2e-05, "loss": 0.03895885, "step": 1910 }, { "epoch": 3.822, "grad_norm": 1.6948938369750977, "learning_rate": 2e-05, "loss": 0.06202386, "step": 1911 }, { "epoch": 3.824, "grad_norm": 1.287054181098938, "learning_rate": 2e-05, "loss": 0.03878157, "step": 1912 }, { "epoch": 3.826, "grad_norm": 1.2202260494232178, "learning_rate": 2e-05, "loss": 0.03554882, "step": 1913 }, { "epoch": 3.828, "grad_norm": 0.9808116555213928, "learning_rate": 2e-05, "loss": 0.02788639, "step": 1914 }, { "epoch": 3.83, "grad_norm": 1.3717739582061768, "learning_rate": 2e-05, "loss": 0.04405915, "step": 1915 }, { "epoch": 3.832, "grad_norm": 1.2487013339996338, "learning_rate": 2e-05, "loss": 0.03764036, "step": 1916 }, { "epoch": 3.834, "grad_norm": 1.122071623802185, "learning_rate": 2e-05, "loss": 0.0333156, "step": 1917 }, { "epoch": 3.836, "grad_norm": 1.3125051259994507, "learning_rate": 2e-05, "loss": 0.04306629, "step": 1918 }, { "epoch": 3.838, "grad_norm": 1.7904739379882812, "learning_rate": 2e-05, "loss": 0.0361197, "step": 1919 }, { "epoch": 3.84, "grad_norm": 1.1583505868911743, "learning_rate": 2e-05, "loss": 0.03510105, "step": 1920 }, { "epoch": 3.842, "grad_norm": 1.6849479675292969, "learning_rate": 2e-05, "loss": 0.04817548, "step": 1921 }, { "epoch": 3.844, "grad_norm": 2.617076873779297, "learning_rate": 2e-05, "loss": 0.06620353, "step": 1922 }, { "epoch": 3.846, "grad_norm": 2.478874683380127, "learning_rate": 2e-05, "loss": 0.04455588, "step": 1923 }, { "epoch": 3.848, "grad_norm": 1.146331548690796, "learning_rate": 2e-05, "loss": 0.04490707, "step": 1924 }, { "epoch": 3.85, "grad_norm": 1.0759429931640625, "learning_rate": 2e-05, "loss": 0.03621227, "step": 1925 }, { "epoch": 3.852, "grad_norm": 1.6324303150177002, "learning_rate": 2e-05, "loss": 0.04010741, "step": 1926 }, { "epoch": 3.854, "grad_norm": 2.0158345699310303, "learning_rate": 2e-05, "loss": 0.05399124, "step": 1927 }, { "epoch": 3.856, "grad_norm": 1.5364727973937988, "learning_rate": 2e-05, "loss": 0.04785627, "step": 1928 }, { "epoch": 3.858, "grad_norm": 1.6611193418502808, "learning_rate": 2e-05, "loss": 0.03403072, "step": 1929 }, { "epoch": 3.86, "grad_norm": 2.6205263137817383, "learning_rate": 2e-05, "loss": 0.04760019, "step": 1930 }, { "epoch": 3.862, "grad_norm": 1.1166229248046875, "learning_rate": 2e-05, "loss": 0.03396893, "step": 1931 }, { "epoch": 3.864, "grad_norm": 2.4609854221343994, "learning_rate": 2e-05, "loss": 0.05028281, "step": 1932 }, { "epoch": 3.866, "grad_norm": 1.2173758745193481, "learning_rate": 2e-05, "loss": 0.03039585, "step": 1933 }, { "epoch": 3.868, "grad_norm": 1.3882139921188354, "learning_rate": 2e-05, "loss": 0.04112496, "step": 1934 }, { "epoch": 3.87, "grad_norm": 1.486579418182373, "learning_rate": 2e-05, "loss": 0.03768492, "step": 1935 }, { "epoch": 3.872, "grad_norm": 1.4759503602981567, "learning_rate": 2e-05, "loss": 0.03338876, "step": 1936 }, { "epoch": 3.874, "grad_norm": 1.290807843208313, "learning_rate": 2e-05, "loss": 0.03643284, "step": 1937 }, { "epoch": 3.876, "grad_norm": 1.520533800125122, "learning_rate": 2e-05, "loss": 0.04213966, "step": 1938 }, { "epoch": 3.878, "grad_norm": 1.262356162071228, "learning_rate": 2e-05, "loss": 0.04538347, "step": 1939 }, { "epoch": 3.88, "grad_norm": 2.381612777709961, "learning_rate": 2e-05, "loss": 0.03353626, "step": 1940 }, { "epoch": 3.882, "grad_norm": 1.6339051723480225, "learning_rate": 2e-05, "loss": 0.04302477, "step": 1941 }, { "epoch": 3.884, "grad_norm": 1.4285898208618164, "learning_rate": 2e-05, "loss": 0.05180392, "step": 1942 }, { "epoch": 3.886, "grad_norm": 1.1873326301574707, "learning_rate": 2e-05, "loss": 0.03653756, "step": 1943 }, { "epoch": 3.888, "grad_norm": 1.3497165441513062, "learning_rate": 2e-05, "loss": 0.03856751, "step": 1944 }, { "epoch": 3.89, "grad_norm": 1.258852481842041, "learning_rate": 2e-05, "loss": 0.04039215, "step": 1945 }, { "epoch": 3.892, "grad_norm": 1.983088731765747, "learning_rate": 2e-05, "loss": 0.04817604, "step": 1946 }, { "epoch": 3.894, "grad_norm": 2.92964768409729, "learning_rate": 2e-05, "loss": 0.053841, "step": 1947 }, { "epoch": 3.896, "grad_norm": 1.1992443799972534, "learning_rate": 2e-05, "loss": 0.04172537, "step": 1948 }, { "epoch": 3.898, "grad_norm": 1.1795976161956787, "learning_rate": 2e-05, "loss": 0.03664827, "step": 1949 }, { "epoch": 3.9, "grad_norm": 1.2616045475006104, "learning_rate": 2e-05, "loss": 0.04682916, "step": 1950 }, { "epoch": 3.902, "grad_norm": 3.0420405864715576, "learning_rate": 2e-05, "loss": 0.0575822, "step": 1951 }, { "epoch": 3.904, "grad_norm": 1.0600016117095947, "learning_rate": 2e-05, "loss": 0.03584382, "step": 1952 }, { "epoch": 3.906, "grad_norm": 1.396815299987793, "learning_rate": 2e-05, "loss": 0.03886579, "step": 1953 }, { "epoch": 3.908, "grad_norm": 2.188819408416748, "learning_rate": 2e-05, "loss": 0.06812514, "step": 1954 }, { "epoch": 3.91, "grad_norm": 1.5482600927352905, "learning_rate": 2e-05, "loss": 0.03375176, "step": 1955 }, { "epoch": 3.912, "grad_norm": 1.212693691253662, "learning_rate": 2e-05, "loss": 0.0395262, "step": 1956 }, { "epoch": 3.914, "grad_norm": 2.2088680267333984, "learning_rate": 2e-05, "loss": 0.03797888, "step": 1957 }, { "epoch": 3.916, "grad_norm": 1.1913862228393555, "learning_rate": 2e-05, "loss": 0.02954198, "step": 1958 }, { "epoch": 3.918, "grad_norm": 1.874579906463623, "learning_rate": 2e-05, "loss": 0.05844161, "step": 1959 }, { "epoch": 3.92, "grad_norm": 1.4673166275024414, "learning_rate": 2e-05, "loss": 0.05150342, "step": 1960 }, { "epoch": 3.922, "grad_norm": 2.861666679382324, "learning_rate": 2e-05, "loss": 0.05179626, "step": 1961 }, { "epoch": 3.924, "grad_norm": 1.771929144859314, "learning_rate": 2e-05, "loss": 0.04365551, "step": 1962 }, { "epoch": 3.926, "grad_norm": 1.3273606300354004, "learning_rate": 2e-05, "loss": 0.05284184, "step": 1963 }, { "epoch": 3.928, "grad_norm": 1.9596598148345947, "learning_rate": 2e-05, "loss": 0.05003749, "step": 1964 }, { "epoch": 3.93, "grad_norm": 2.103850841522217, "learning_rate": 2e-05, "loss": 0.04942531, "step": 1965 }, { "epoch": 3.932, "grad_norm": 1.1391785144805908, "learning_rate": 2e-05, "loss": 0.02786987, "step": 1966 }, { "epoch": 3.934, "grad_norm": 1.7148336172103882, "learning_rate": 2e-05, "loss": 0.04135071, "step": 1967 }, { "epoch": 3.936, "grad_norm": 1.2991632223129272, "learning_rate": 2e-05, "loss": 0.06019251, "step": 1968 }, { "epoch": 3.9379999999999997, "grad_norm": 1.5509945154190063, "learning_rate": 2e-05, "loss": 0.03836875, "step": 1969 }, { "epoch": 3.94, "grad_norm": 1.69589364528656, "learning_rate": 2e-05, "loss": 0.05219426, "step": 1970 }, { "epoch": 3.942, "grad_norm": 1.6247663497924805, "learning_rate": 2e-05, "loss": 0.05570246, "step": 1971 }, { "epoch": 3.944, "grad_norm": 1.2264633178710938, "learning_rate": 2e-05, "loss": 0.04570191, "step": 1972 }, { "epoch": 3.9459999999999997, "grad_norm": 2.1913163661956787, "learning_rate": 2e-05, "loss": 0.05165469, "step": 1973 }, { "epoch": 3.948, "grad_norm": 1.0560282468795776, "learning_rate": 2e-05, "loss": 0.02876104, "step": 1974 }, { "epoch": 3.95, "grad_norm": 1.5731909275054932, "learning_rate": 2e-05, "loss": 0.04167511, "step": 1975 }, { "epoch": 3.952, "grad_norm": 1.399192452430725, "learning_rate": 2e-05, "loss": 0.04265817, "step": 1976 }, { "epoch": 3.9539999999999997, "grad_norm": 1.8346514701843262, "learning_rate": 2e-05, "loss": 0.04521923, "step": 1977 }, { "epoch": 3.956, "grad_norm": 1.6072944402694702, "learning_rate": 2e-05, "loss": 0.04234926, "step": 1978 }, { "epoch": 3.958, "grad_norm": 1.2165111303329468, "learning_rate": 2e-05, "loss": 0.03402507, "step": 1979 }, { "epoch": 3.96, "grad_norm": 1.2799257040023804, "learning_rate": 2e-05, "loss": 0.0419311, "step": 1980 }, { "epoch": 3.9619999999999997, "grad_norm": 1.89944589138031, "learning_rate": 2e-05, "loss": 0.06260209, "step": 1981 }, { "epoch": 3.964, "grad_norm": 1.6802691221237183, "learning_rate": 2e-05, "loss": 0.06025416, "step": 1982 }, { "epoch": 3.966, "grad_norm": 1.2507396936416626, "learning_rate": 2e-05, "loss": 0.03038237, "step": 1983 }, { "epoch": 3.968, "grad_norm": 1.3977298736572266, "learning_rate": 2e-05, "loss": 0.04241063, "step": 1984 }, { "epoch": 3.9699999999999998, "grad_norm": 1.399631381034851, "learning_rate": 2e-05, "loss": 0.03736674, "step": 1985 }, { "epoch": 3.972, "grad_norm": 1.1037839651107788, "learning_rate": 2e-05, "loss": 0.0263209, "step": 1986 }, { "epoch": 3.974, "grad_norm": 1.7092723846435547, "learning_rate": 2e-05, "loss": 0.04478872, "step": 1987 }, { "epoch": 3.976, "grad_norm": 2.23612117767334, "learning_rate": 2e-05, "loss": 0.03863956, "step": 1988 }, { "epoch": 3.9779999999999998, "grad_norm": 1.2527269124984741, "learning_rate": 2e-05, "loss": 0.04319048, "step": 1989 }, { "epoch": 3.98, "grad_norm": 1.2724004983901978, "learning_rate": 2e-05, "loss": 0.04712515, "step": 1990 }, { "epoch": 3.982, "grad_norm": 1.1430199146270752, "learning_rate": 2e-05, "loss": 0.03822499, "step": 1991 }, { "epoch": 3.984, "grad_norm": 1.8308535814285278, "learning_rate": 2e-05, "loss": 0.0425014, "step": 1992 }, { "epoch": 3.9859999999999998, "grad_norm": 1.5834559202194214, "learning_rate": 2e-05, "loss": 0.04654464, "step": 1993 }, { "epoch": 3.988, "grad_norm": 1.4839645624160767, "learning_rate": 2e-05, "loss": 0.06130801, "step": 1994 }, { "epoch": 3.99, "grad_norm": 1.290731430053711, "learning_rate": 2e-05, "loss": 0.03394639, "step": 1995 }, { "epoch": 3.992, "grad_norm": 1.468553066253662, "learning_rate": 2e-05, "loss": 0.05200931, "step": 1996 }, { "epoch": 3.9939999999999998, "grad_norm": 2.330862522125244, "learning_rate": 2e-05, "loss": 0.04680311, "step": 1997 }, { "epoch": 3.996, "grad_norm": 1.8820016384124756, "learning_rate": 2e-05, "loss": 0.06257437, "step": 1998 }, { "epoch": 3.998, "grad_norm": 1.4825798273086548, "learning_rate": 2e-05, "loss": 0.03755412, "step": 1999 }, { "epoch": 4.0, "grad_norm": 1.8379441499710083, "learning_rate": 2e-05, "loss": 0.04851647, "step": 2000 }, { "epoch": 4.0, "eval_performance": { "AngleClassification_1": 0.986, "AngleClassification_2": 0.978, "AngleClassification_3": 0.7784431137724551, "Equal_1": 0.972, "Equal_2": 0.874251497005988, "Equal_3": 0.7445109780439122, "LineComparison_1": 0.988, "LineComparison_2": 0.9860279441117764, "LineComparison_3": 0.9481037924151696, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9779559118236473, "Parallel_3": 0.752, "Perpendicular_1": 0.974, "Perpendicular_2": 0.512, "Perpendicular_3": 0.2565130260521042, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9385333333333333, "PointLiesOnLine_1": 0.9819639278557114, "PointLiesOnLine_2": 0.9579158316633266, "PointLiesOnLine_3": 0.5249500998003992 }, "eval_runtime": 321.6565, "eval_samples_per_second": 32.644, "eval_steps_per_second": 0.653, "step": 2000 }, { "epoch": 4.002, "grad_norm": 2.689896583557129, "learning_rate": 2e-05, "loss": 0.06689534, "step": 2001 }, { "epoch": 4.004, "grad_norm": 2.3726253509521484, "learning_rate": 2e-05, "loss": 0.06310491, "step": 2002 }, { "epoch": 4.006, "grad_norm": 1.7566965818405151, "learning_rate": 2e-05, "loss": 0.05225249, "step": 2003 }, { "epoch": 4.008, "grad_norm": 1.3011726140975952, "learning_rate": 2e-05, "loss": 0.03929686, "step": 2004 }, { "epoch": 4.01, "grad_norm": 1.2295416593551636, "learning_rate": 2e-05, "loss": 0.03298599, "step": 2005 }, { "epoch": 4.012, "grad_norm": 1.9119962453842163, "learning_rate": 2e-05, "loss": 0.04884803, "step": 2006 }, { "epoch": 4.014, "grad_norm": 1.5606404542922974, "learning_rate": 2e-05, "loss": 0.04282958, "step": 2007 }, { "epoch": 4.016, "grad_norm": 2.755392074584961, "learning_rate": 2e-05, "loss": 0.05961449, "step": 2008 }, { "epoch": 4.018, "grad_norm": 1.6373087167739868, "learning_rate": 2e-05, "loss": 0.05367161, "step": 2009 }, { "epoch": 4.02, "grad_norm": 2.529672861099243, "learning_rate": 2e-05, "loss": 0.04300452, "step": 2010 }, { "epoch": 4.022, "grad_norm": 1.8200832605361938, "learning_rate": 2e-05, "loss": 0.05623111, "step": 2011 }, { "epoch": 4.024, "grad_norm": 1.3841335773468018, "learning_rate": 2e-05, "loss": 0.03216402, "step": 2012 }, { "epoch": 4.026, "grad_norm": 2.039104700088501, "learning_rate": 2e-05, "loss": 0.0455704, "step": 2013 }, { "epoch": 4.028, "grad_norm": 2.0491440296173096, "learning_rate": 2e-05, "loss": 0.03618041, "step": 2014 }, { "epoch": 4.03, "grad_norm": 1.554465889930725, "learning_rate": 2e-05, "loss": 0.02181048, "step": 2015 }, { "epoch": 4.032, "grad_norm": 1.2771095037460327, "learning_rate": 2e-05, "loss": 0.03514237, "step": 2016 }, { "epoch": 4.034, "grad_norm": 1.587876558303833, "learning_rate": 2e-05, "loss": 0.04905714, "step": 2017 }, { "epoch": 4.036, "grad_norm": 2.0158097743988037, "learning_rate": 2e-05, "loss": 0.05387116, "step": 2018 }, { "epoch": 4.038, "grad_norm": 2.1845550537109375, "learning_rate": 2e-05, "loss": 0.06457246, "step": 2019 }, { "epoch": 4.04, "grad_norm": 1.155949354171753, "learning_rate": 2e-05, "loss": 0.02948808, "step": 2020 }, { "epoch": 4.042, "grad_norm": 1.5860084295272827, "learning_rate": 2e-05, "loss": 0.05309289, "step": 2021 }, { "epoch": 4.044, "grad_norm": 1.1510523557662964, "learning_rate": 2e-05, "loss": 0.04007091, "step": 2022 }, { "epoch": 4.046, "grad_norm": 2.096463680267334, "learning_rate": 2e-05, "loss": 0.05365406, "step": 2023 }, { "epoch": 4.048, "grad_norm": 1.4027256965637207, "learning_rate": 2e-05, "loss": 0.03679424, "step": 2024 }, { "epoch": 4.05, "grad_norm": 1.503724455833435, "learning_rate": 2e-05, "loss": 0.03995445, "step": 2025 }, { "epoch": 4.052, "grad_norm": 3.1506829261779785, "learning_rate": 2e-05, "loss": 0.07259705, "step": 2026 }, { "epoch": 4.054, "grad_norm": 1.8490493297576904, "learning_rate": 2e-05, "loss": 0.04446945, "step": 2027 }, { "epoch": 4.056, "grad_norm": 1.203890323638916, "learning_rate": 2e-05, "loss": 0.04504851, "step": 2028 }, { "epoch": 4.058, "grad_norm": 2.1894469261169434, "learning_rate": 2e-05, "loss": 0.03918433, "step": 2029 }, { "epoch": 4.06, "grad_norm": 1.990956425666809, "learning_rate": 2e-05, "loss": 0.03764991, "step": 2030 }, { "epoch": 4.062, "grad_norm": 1.7570072412490845, "learning_rate": 2e-05, "loss": 0.04656107, "step": 2031 }, { "epoch": 4.064, "grad_norm": 1.1472625732421875, "learning_rate": 2e-05, "loss": 0.02630271, "step": 2032 }, { "epoch": 4.066, "grad_norm": 2.1167778968811035, "learning_rate": 2e-05, "loss": 0.03036687, "step": 2033 }, { "epoch": 4.068, "grad_norm": 1.1994069814682007, "learning_rate": 2e-05, "loss": 0.03812218, "step": 2034 }, { "epoch": 4.07, "grad_norm": 2.758052349090576, "learning_rate": 2e-05, "loss": 0.0450132, "step": 2035 }, { "epoch": 4.072, "grad_norm": 1.4564862251281738, "learning_rate": 2e-05, "loss": 0.03845803, "step": 2036 }, { "epoch": 4.074, "grad_norm": 1.1046653985977173, "learning_rate": 2e-05, "loss": 0.0331301, "step": 2037 }, { "epoch": 4.076, "grad_norm": 2.1612513065338135, "learning_rate": 2e-05, "loss": 0.05518108, "step": 2038 }, { "epoch": 4.078, "grad_norm": 1.5514698028564453, "learning_rate": 2e-05, "loss": 0.03647344, "step": 2039 }, { "epoch": 4.08, "grad_norm": 2.8072474002838135, "learning_rate": 2e-05, "loss": 0.04705288, "step": 2040 }, { "epoch": 4.082, "grad_norm": 1.4394696950912476, "learning_rate": 2e-05, "loss": 0.03800115, "step": 2041 }, { "epoch": 4.084, "grad_norm": 1.580580711364746, "learning_rate": 2e-05, "loss": 0.03440983, "step": 2042 }, { "epoch": 4.086, "grad_norm": 2.7896625995635986, "learning_rate": 2e-05, "loss": 0.0448301, "step": 2043 }, { "epoch": 4.088, "grad_norm": 1.8632712364196777, "learning_rate": 2e-05, "loss": 0.03617346, "step": 2044 }, { "epoch": 4.09, "grad_norm": 1.8190380334854126, "learning_rate": 2e-05, "loss": 0.0343765, "step": 2045 }, { "epoch": 4.092, "grad_norm": 1.6877806186676025, "learning_rate": 2e-05, "loss": 0.04055114, "step": 2046 }, { "epoch": 4.094, "grad_norm": 2.117882013320923, "learning_rate": 2e-05, "loss": 0.048363, "step": 2047 }, { "epoch": 4.096, "grad_norm": 2.1963632106781006, "learning_rate": 2e-05, "loss": 0.05769034, "step": 2048 }, { "epoch": 4.098, "grad_norm": 2.3755226135253906, "learning_rate": 2e-05, "loss": 0.07200582, "step": 2049 }, { "epoch": 4.1, "grad_norm": 1.4905271530151367, "learning_rate": 2e-05, "loss": 0.0467202, "step": 2050 }, { "epoch": 4.102, "grad_norm": 1.6176154613494873, "learning_rate": 2e-05, "loss": 0.05290875, "step": 2051 }, { "epoch": 4.104, "grad_norm": 1.9842934608459473, "learning_rate": 2e-05, "loss": 0.0542838, "step": 2052 }, { "epoch": 4.106, "grad_norm": 1.1524161100387573, "learning_rate": 2e-05, "loss": 0.03312632, "step": 2053 }, { "epoch": 4.108, "grad_norm": 1.7908684015274048, "learning_rate": 2e-05, "loss": 0.03798482, "step": 2054 }, { "epoch": 4.11, "grad_norm": 1.8829431533813477, "learning_rate": 2e-05, "loss": 0.03899117, "step": 2055 }, { "epoch": 4.112, "grad_norm": 1.7515336275100708, "learning_rate": 2e-05, "loss": 0.04303076, "step": 2056 }, { "epoch": 4.114, "grad_norm": 1.6121231317520142, "learning_rate": 2e-05, "loss": 0.04086188, "step": 2057 }, { "epoch": 4.116, "grad_norm": 1.3123854398727417, "learning_rate": 2e-05, "loss": 0.04525847, "step": 2058 }, { "epoch": 4.118, "grad_norm": 2.75618314743042, "learning_rate": 2e-05, "loss": 0.05385438, "step": 2059 }, { "epoch": 4.12, "grad_norm": 1.0119373798370361, "learning_rate": 2e-05, "loss": 0.02559848, "step": 2060 }, { "epoch": 4.122, "grad_norm": 1.6642717123031616, "learning_rate": 2e-05, "loss": 0.0500881, "step": 2061 }, { "epoch": 4.124, "grad_norm": 1.3954622745513916, "learning_rate": 2e-05, "loss": 0.03746237, "step": 2062 }, { "epoch": 4.126, "grad_norm": 1.5439870357513428, "learning_rate": 2e-05, "loss": 0.05281479, "step": 2063 }, { "epoch": 4.128, "grad_norm": 1.3099309206008911, "learning_rate": 2e-05, "loss": 0.03846513, "step": 2064 }, { "epoch": 4.13, "grad_norm": 1.7383625507354736, "learning_rate": 2e-05, "loss": 0.04843453, "step": 2065 }, { "epoch": 4.132, "grad_norm": 1.8208372592926025, "learning_rate": 2e-05, "loss": 0.05107617, "step": 2066 }, { "epoch": 4.134, "grad_norm": 2.1992876529693604, "learning_rate": 2e-05, "loss": 0.05615732, "step": 2067 }, { "epoch": 4.136, "grad_norm": 1.8214548826217651, "learning_rate": 2e-05, "loss": 0.05642605, "step": 2068 }, { "epoch": 4.138, "grad_norm": 1.3719658851623535, "learning_rate": 2e-05, "loss": 0.04263125, "step": 2069 }, { "epoch": 4.14, "grad_norm": 1.8242491483688354, "learning_rate": 2e-05, "loss": 0.0747356, "step": 2070 }, { "epoch": 4.142, "grad_norm": 2.162917375564575, "learning_rate": 2e-05, "loss": 0.05902673, "step": 2071 }, { "epoch": 4.144, "grad_norm": 1.3159345388412476, "learning_rate": 2e-05, "loss": 0.04596569, "step": 2072 }, { "epoch": 4.146, "grad_norm": 1.8809359073638916, "learning_rate": 2e-05, "loss": 0.05340003, "step": 2073 }, { "epoch": 4.148, "grad_norm": 1.67131769657135, "learning_rate": 2e-05, "loss": 0.03571043, "step": 2074 }, { "epoch": 4.15, "grad_norm": 1.7250995635986328, "learning_rate": 2e-05, "loss": 0.0564668, "step": 2075 }, { "epoch": 4.152, "grad_norm": 1.3855060338974, "learning_rate": 2e-05, "loss": 0.04370652, "step": 2076 }, { "epoch": 4.154, "grad_norm": 1.1010857820510864, "learning_rate": 2e-05, "loss": 0.03384936, "step": 2077 }, { "epoch": 4.156, "grad_norm": 2.0639870166778564, "learning_rate": 2e-05, "loss": 0.03384199, "step": 2078 }, { "epoch": 4.158, "grad_norm": 1.4212632179260254, "learning_rate": 2e-05, "loss": 0.03396594, "step": 2079 }, { "epoch": 4.16, "grad_norm": 2.1525704860687256, "learning_rate": 2e-05, "loss": 0.05491355, "step": 2080 }, { "epoch": 4.162, "grad_norm": 1.7113659381866455, "learning_rate": 2e-05, "loss": 0.04376032, "step": 2081 }, { "epoch": 4.164, "grad_norm": 1.223374366760254, "learning_rate": 2e-05, "loss": 0.03837118, "step": 2082 }, { "epoch": 4.166, "grad_norm": 1.7024589776992798, "learning_rate": 2e-05, "loss": 0.04713647, "step": 2083 }, { "epoch": 4.168, "grad_norm": 1.7157230377197266, "learning_rate": 2e-05, "loss": 0.05556125, "step": 2084 }, { "epoch": 4.17, "grad_norm": 1.5272091627120972, "learning_rate": 2e-05, "loss": 0.04492594, "step": 2085 }, { "epoch": 4.172, "grad_norm": 1.2956258058547974, "learning_rate": 2e-05, "loss": 0.04288995, "step": 2086 }, { "epoch": 4.174, "grad_norm": 1.3768620491027832, "learning_rate": 2e-05, "loss": 0.02881315, "step": 2087 }, { "epoch": 4.176, "grad_norm": 1.5189354419708252, "learning_rate": 2e-05, "loss": 0.05045771, "step": 2088 }, { "epoch": 4.178, "grad_norm": 1.8950650691986084, "learning_rate": 2e-05, "loss": 0.05032203, "step": 2089 }, { "epoch": 4.18, "grad_norm": 1.9955432415008545, "learning_rate": 2e-05, "loss": 0.02519334, "step": 2090 }, { "epoch": 4.182, "grad_norm": 1.4326856136322021, "learning_rate": 2e-05, "loss": 0.03771212, "step": 2091 }, { "epoch": 4.184, "grad_norm": 1.1596758365631104, "learning_rate": 2e-05, "loss": 0.02768318, "step": 2092 }, { "epoch": 4.186, "grad_norm": 1.1794545650482178, "learning_rate": 2e-05, "loss": 0.03789561, "step": 2093 }, { "epoch": 4.188, "grad_norm": 0.9308399558067322, "learning_rate": 2e-05, "loss": 0.02805153, "step": 2094 }, { "epoch": 4.19, "grad_norm": 1.1102815866470337, "learning_rate": 2e-05, "loss": 0.02839583, "step": 2095 }, { "epoch": 4.192, "grad_norm": 1.020172357559204, "learning_rate": 2e-05, "loss": 0.03357933, "step": 2096 }, { "epoch": 4.194, "grad_norm": 1.7360633611679077, "learning_rate": 2e-05, "loss": 0.05581947, "step": 2097 }, { "epoch": 4.196, "grad_norm": 1.2498151063919067, "learning_rate": 2e-05, "loss": 0.03497965, "step": 2098 }, { "epoch": 4.198, "grad_norm": 1.4616661071777344, "learning_rate": 2e-05, "loss": 0.04537179, "step": 2099 }, { "epoch": 4.2, "grad_norm": 1.6234368085861206, "learning_rate": 2e-05, "loss": 0.0405514, "step": 2100 }, { "epoch": 4.202, "grad_norm": 1.50863516330719, "learning_rate": 2e-05, "loss": 0.03994677, "step": 2101 }, { "epoch": 4.204, "grad_norm": 2.2599105834960938, "learning_rate": 2e-05, "loss": 0.04103535, "step": 2102 }, { "epoch": 4.206, "grad_norm": 1.642195224761963, "learning_rate": 2e-05, "loss": 0.02920062, "step": 2103 }, { "epoch": 4.208, "grad_norm": 2.0362472534179688, "learning_rate": 2e-05, "loss": 0.03564665, "step": 2104 }, { "epoch": 4.21, "grad_norm": 2.113086223602295, "learning_rate": 2e-05, "loss": 0.04320297, "step": 2105 }, { "epoch": 4.212, "grad_norm": 1.1269563436508179, "learning_rate": 2e-05, "loss": 0.02520046, "step": 2106 }, { "epoch": 4.214, "grad_norm": 2.32094144821167, "learning_rate": 2e-05, "loss": 0.06330884, "step": 2107 }, { "epoch": 4.216, "grad_norm": 1.5615673065185547, "learning_rate": 2e-05, "loss": 0.0331628, "step": 2108 }, { "epoch": 4.218, "grad_norm": 0.9397850632667542, "learning_rate": 2e-05, "loss": 0.02459784, "step": 2109 }, { "epoch": 4.22, "grad_norm": 1.9889795780181885, "learning_rate": 2e-05, "loss": 0.03781548, "step": 2110 }, { "epoch": 4.222, "grad_norm": 1.8364139795303345, "learning_rate": 2e-05, "loss": 0.03902372, "step": 2111 }, { "epoch": 4.224, "grad_norm": 1.2772095203399658, "learning_rate": 2e-05, "loss": 0.0316204, "step": 2112 }, { "epoch": 4.226, "grad_norm": 2.7459285259246826, "learning_rate": 2e-05, "loss": 0.06582908, "step": 2113 }, { "epoch": 4.228, "grad_norm": 1.5086251497268677, "learning_rate": 2e-05, "loss": 0.04670456, "step": 2114 }, { "epoch": 4.23, "grad_norm": 2.2771124839782715, "learning_rate": 2e-05, "loss": 0.07231095, "step": 2115 }, { "epoch": 4.232, "grad_norm": 1.7897669076919556, "learning_rate": 2e-05, "loss": 0.04256113, "step": 2116 }, { "epoch": 4.234, "grad_norm": 1.2214994430541992, "learning_rate": 2e-05, "loss": 0.03983855, "step": 2117 }, { "epoch": 4.236, "grad_norm": 1.4263696670532227, "learning_rate": 2e-05, "loss": 0.03172107, "step": 2118 }, { "epoch": 4.2379999999999995, "grad_norm": 2.4489498138427734, "learning_rate": 2e-05, "loss": 0.05162663, "step": 2119 }, { "epoch": 4.24, "grad_norm": 1.7401384115219116, "learning_rate": 2e-05, "loss": 0.0567259, "step": 2120 }, { "epoch": 4.242, "grad_norm": 2.355661153793335, "learning_rate": 2e-05, "loss": 0.04312699, "step": 2121 }, { "epoch": 4.244, "grad_norm": 0.8899902701377869, "learning_rate": 2e-05, "loss": 0.02231044, "step": 2122 }, { "epoch": 4.246, "grad_norm": 1.3299999237060547, "learning_rate": 2e-05, "loss": 0.03867707, "step": 2123 }, { "epoch": 4.248, "grad_norm": 2.1780574321746826, "learning_rate": 2e-05, "loss": 0.05338889, "step": 2124 }, { "epoch": 4.25, "grad_norm": 1.6817718744277954, "learning_rate": 2e-05, "loss": 0.04446897, "step": 2125 }, { "epoch": 4.252, "grad_norm": 1.7029000520706177, "learning_rate": 2e-05, "loss": 0.03678191, "step": 2126 }, { "epoch": 4.254, "grad_norm": 1.601068139076233, "learning_rate": 2e-05, "loss": 0.04554681, "step": 2127 }, { "epoch": 4.256, "grad_norm": 2.417534828186035, "learning_rate": 2e-05, "loss": 0.06136102, "step": 2128 }, { "epoch": 4.258, "grad_norm": 2.018188953399658, "learning_rate": 2e-05, "loss": 0.06119014, "step": 2129 }, { "epoch": 4.26, "grad_norm": 2.963576078414917, "learning_rate": 2e-05, "loss": 0.06493448, "step": 2130 }, { "epoch": 4.2620000000000005, "grad_norm": 1.6151946783065796, "learning_rate": 2e-05, "loss": 0.05280596, "step": 2131 }, { "epoch": 4.264, "grad_norm": 1.7862824201583862, "learning_rate": 2e-05, "loss": 0.03816576, "step": 2132 }, { "epoch": 4.266, "grad_norm": 2.704598903656006, "learning_rate": 2e-05, "loss": 0.06200828, "step": 2133 }, { "epoch": 4.268, "grad_norm": 1.2484264373779297, "learning_rate": 2e-05, "loss": 0.02782817, "step": 2134 }, { "epoch": 4.27, "grad_norm": 1.6894198656082153, "learning_rate": 2e-05, "loss": 0.0363794, "step": 2135 }, { "epoch": 4.272, "grad_norm": 1.383814811706543, "learning_rate": 2e-05, "loss": 0.0340181, "step": 2136 }, { "epoch": 4.274, "grad_norm": 1.5126454830169678, "learning_rate": 2e-05, "loss": 0.04073911, "step": 2137 }, { "epoch": 4.276, "grad_norm": 1.4518946409225464, "learning_rate": 2e-05, "loss": 0.04448681, "step": 2138 }, { "epoch": 4.2780000000000005, "grad_norm": 1.3890987634658813, "learning_rate": 2e-05, "loss": 0.02615726, "step": 2139 }, { "epoch": 4.28, "grad_norm": 1.6702042818069458, "learning_rate": 2e-05, "loss": 0.05457231, "step": 2140 }, { "epoch": 4.282, "grad_norm": 2.1039538383483887, "learning_rate": 2e-05, "loss": 0.04261757, "step": 2141 }, { "epoch": 4.284, "grad_norm": 1.6950783729553223, "learning_rate": 2e-05, "loss": 0.05137327, "step": 2142 }, { "epoch": 4.286, "grad_norm": 1.2793657779693604, "learning_rate": 2e-05, "loss": 0.03890596, "step": 2143 }, { "epoch": 4.288, "grad_norm": 1.0241049528121948, "learning_rate": 2e-05, "loss": 0.02962255, "step": 2144 }, { "epoch": 4.29, "grad_norm": 1.3482152223587036, "learning_rate": 2e-05, "loss": 0.04371033, "step": 2145 }, { "epoch": 4.292, "grad_norm": 1.825649619102478, "learning_rate": 2e-05, "loss": 0.05308119, "step": 2146 }, { "epoch": 4.294, "grad_norm": 1.4800101518630981, "learning_rate": 2e-05, "loss": 0.0398606, "step": 2147 }, { "epoch": 4.296, "grad_norm": 1.812041163444519, "learning_rate": 2e-05, "loss": 0.04485359, "step": 2148 }, { "epoch": 4.298, "grad_norm": 1.0440144538879395, "learning_rate": 2e-05, "loss": 0.04142414, "step": 2149 }, { "epoch": 4.3, "grad_norm": 1.4665697813034058, "learning_rate": 2e-05, "loss": 0.03760947, "step": 2150 }, { "epoch": 4.302, "grad_norm": 1.5210576057434082, "learning_rate": 2e-05, "loss": 0.03952883, "step": 2151 }, { "epoch": 4.304, "grad_norm": 1.5270603895187378, "learning_rate": 2e-05, "loss": 0.04734674, "step": 2152 }, { "epoch": 4.306, "grad_norm": 2.6761133670806885, "learning_rate": 2e-05, "loss": 0.05432082, "step": 2153 }, { "epoch": 4.308, "grad_norm": 1.2086443901062012, "learning_rate": 2e-05, "loss": 0.03031913, "step": 2154 }, { "epoch": 4.31, "grad_norm": 1.5387802124023438, "learning_rate": 2e-05, "loss": 0.0282026, "step": 2155 }, { "epoch": 4.312, "grad_norm": 1.694658875465393, "learning_rate": 2e-05, "loss": 0.05619892, "step": 2156 }, { "epoch": 4.314, "grad_norm": 1.5027580261230469, "learning_rate": 2e-05, "loss": 0.04229368, "step": 2157 }, { "epoch": 4.316, "grad_norm": 1.3247681856155396, "learning_rate": 2e-05, "loss": 0.0459209, "step": 2158 }, { "epoch": 4.318, "grad_norm": 0.9879952669143677, "learning_rate": 2e-05, "loss": 0.03440651, "step": 2159 }, { "epoch": 4.32, "grad_norm": 1.2517679929733276, "learning_rate": 2e-05, "loss": 0.0451726, "step": 2160 }, { "epoch": 4.322, "grad_norm": 1.853196620941162, "learning_rate": 2e-05, "loss": 0.03265027, "step": 2161 }, { "epoch": 4.324, "grad_norm": 1.710205316543579, "learning_rate": 2e-05, "loss": 0.04459624, "step": 2162 }, { "epoch": 4.326, "grad_norm": 1.6105256080627441, "learning_rate": 2e-05, "loss": 0.06459425, "step": 2163 }, { "epoch": 4.328, "grad_norm": 1.3017997741699219, "learning_rate": 2e-05, "loss": 0.05446584, "step": 2164 }, { "epoch": 4.33, "grad_norm": 1.122078776359558, "learning_rate": 2e-05, "loss": 0.0316535, "step": 2165 }, { "epoch": 4.332, "grad_norm": 1.4984065294265747, "learning_rate": 2e-05, "loss": 0.03722906, "step": 2166 }, { "epoch": 4.334, "grad_norm": 1.2994331121444702, "learning_rate": 2e-05, "loss": 0.0416957, "step": 2167 }, { "epoch": 4.336, "grad_norm": 2.6297388076782227, "learning_rate": 2e-05, "loss": 0.05040634, "step": 2168 }, { "epoch": 4.338, "grad_norm": 1.6323825120925903, "learning_rate": 2e-05, "loss": 0.03700216, "step": 2169 }, { "epoch": 4.34, "grad_norm": 1.054328203201294, "learning_rate": 2e-05, "loss": 0.03690439, "step": 2170 }, { "epoch": 4.342, "grad_norm": 1.3024274110794067, "learning_rate": 2e-05, "loss": 0.04472798, "step": 2171 }, { "epoch": 4.344, "grad_norm": 1.3375612497329712, "learning_rate": 2e-05, "loss": 0.03631008, "step": 2172 }, { "epoch": 4.346, "grad_norm": 1.0233983993530273, "learning_rate": 2e-05, "loss": 0.03837822, "step": 2173 }, { "epoch": 4.348, "grad_norm": 2.863868474960327, "learning_rate": 2e-05, "loss": 0.04723253, "step": 2174 }, { "epoch": 4.35, "grad_norm": 1.1577509641647339, "learning_rate": 2e-05, "loss": 0.04589339, "step": 2175 }, { "epoch": 4.352, "grad_norm": 1.5174366235733032, "learning_rate": 2e-05, "loss": 0.03971744, "step": 2176 }, { "epoch": 4.354, "grad_norm": 1.822798490524292, "learning_rate": 2e-05, "loss": 0.04629911, "step": 2177 }, { "epoch": 4.356, "grad_norm": 2.334824562072754, "learning_rate": 2e-05, "loss": 0.04560362, "step": 2178 }, { "epoch": 4.358, "grad_norm": 1.554010033607483, "learning_rate": 2e-05, "loss": 0.04555508, "step": 2179 }, { "epoch": 4.36, "grad_norm": 1.4686816930770874, "learning_rate": 2e-05, "loss": 0.0364374, "step": 2180 }, { "epoch": 4.362, "grad_norm": 1.435239315032959, "learning_rate": 2e-05, "loss": 0.03441803, "step": 2181 }, { "epoch": 4.364, "grad_norm": 1.2505661249160767, "learning_rate": 2e-05, "loss": 0.03555541, "step": 2182 }, { "epoch": 4.366, "grad_norm": 2.002845048904419, "learning_rate": 2e-05, "loss": 0.07516761, "step": 2183 }, { "epoch": 4.368, "grad_norm": 2.682558059692383, "learning_rate": 2e-05, "loss": 0.0619045, "step": 2184 }, { "epoch": 4.37, "grad_norm": 1.4598617553710938, "learning_rate": 2e-05, "loss": 0.02849967, "step": 2185 }, { "epoch": 4.372, "grad_norm": 2.2034389972686768, "learning_rate": 2e-05, "loss": 0.05034188, "step": 2186 }, { "epoch": 4.374, "grad_norm": 1.2549974918365479, "learning_rate": 2e-05, "loss": 0.03955576, "step": 2187 }, { "epoch": 4.376, "grad_norm": 1.4328988790512085, "learning_rate": 2e-05, "loss": 0.04043427, "step": 2188 }, { "epoch": 4.378, "grad_norm": 1.1671024560928345, "learning_rate": 2e-05, "loss": 0.03745596, "step": 2189 }, { "epoch": 4.38, "grad_norm": 1.1555709838867188, "learning_rate": 2e-05, "loss": 0.03046387, "step": 2190 }, { "epoch": 4.382, "grad_norm": 2.62123703956604, "learning_rate": 2e-05, "loss": 0.03433674, "step": 2191 }, { "epoch": 4.384, "grad_norm": 1.5227280855178833, "learning_rate": 2e-05, "loss": 0.02560518, "step": 2192 }, { "epoch": 4.386, "grad_norm": 1.8068288564682007, "learning_rate": 2e-05, "loss": 0.0400132, "step": 2193 }, { "epoch": 4.388, "grad_norm": 1.201744794845581, "learning_rate": 2e-05, "loss": 0.04389308, "step": 2194 }, { "epoch": 4.39, "grad_norm": 1.1598666906356812, "learning_rate": 2e-05, "loss": 0.04563789, "step": 2195 }, { "epoch": 4.392, "grad_norm": 1.885544776916504, "learning_rate": 2e-05, "loss": 0.03769413, "step": 2196 }, { "epoch": 4.394, "grad_norm": 1.3363451957702637, "learning_rate": 2e-05, "loss": 0.03677461, "step": 2197 }, { "epoch": 4.396, "grad_norm": 1.197109341621399, "learning_rate": 2e-05, "loss": 0.03512957, "step": 2198 }, { "epoch": 4.398, "grad_norm": 1.189475417137146, "learning_rate": 2e-05, "loss": 0.05237271, "step": 2199 }, { "epoch": 4.4, "grad_norm": 0.9987121224403381, "learning_rate": 2e-05, "loss": 0.01947338, "step": 2200 }, { "epoch": 4.402, "grad_norm": 1.0477992296218872, "learning_rate": 2e-05, "loss": 0.0338387, "step": 2201 }, { "epoch": 4.404, "grad_norm": 2.6130008697509766, "learning_rate": 2e-05, "loss": 0.04995614, "step": 2202 }, { "epoch": 4.406, "grad_norm": 1.40412437915802, "learning_rate": 2e-05, "loss": 0.04779441, "step": 2203 }, { "epoch": 4.408, "grad_norm": 2.32883358001709, "learning_rate": 2e-05, "loss": 0.04897148, "step": 2204 }, { "epoch": 4.41, "grad_norm": 3.4168405532836914, "learning_rate": 2e-05, "loss": 0.05897108, "step": 2205 }, { "epoch": 4.412, "grad_norm": 1.6130071878433228, "learning_rate": 2e-05, "loss": 0.03696927, "step": 2206 }, { "epoch": 4.414, "grad_norm": 1.5120912790298462, "learning_rate": 2e-05, "loss": 0.036088, "step": 2207 }, { "epoch": 4.416, "grad_norm": 1.9834599494934082, "learning_rate": 2e-05, "loss": 0.06459089, "step": 2208 }, { "epoch": 4.418, "grad_norm": 2.0346577167510986, "learning_rate": 2e-05, "loss": 0.05516463, "step": 2209 }, { "epoch": 4.42, "grad_norm": 1.2212589979171753, "learning_rate": 2e-05, "loss": 0.0262365, "step": 2210 }, { "epoch": 4.422, "grad_norm": 1.5008301734924316, "learning_rate": 2e-05, "loss": 0.04188941, "step": 2211 }, { "epoch": 4.424, "grad_norm": 0.9288143515586853, "learning_rate": 2e-05, "loss": 0.02780385, "step": 2212 }, { "epoch": 4.426, "grad_norm": 1.2318435907363892, "learning_rate": 2e-05, "loss": 0.02683347, "step": 2213 }, { "epoch": 4.428, "grad_norm": 1.711323618888855, "learning_rate": 2e-05, "loss": 0.04659836, "step": 2214 }, { "epoch": 4.43, "grad_norm": 1.3409011363983154, "learning_rate": 2e-05, "loss": 0.04741023, "step": 2215 }, { "epoch": 4.432, "grad_norm": 1.6755443811416626, "learning_rate": 2e-05, "loss": 0.05947503, "step": 2216 }, { "epoch": 4.434, "grad_norm": 2.3621582984924316, "learning_rate": 2e-05, "loss": 0.06439509, "step": 2217 }, { "epoch": 4.436, "grad_norm": 1.113885760307312, "learning_rate": 2e-05, "loss": 0.03710642, "step": 2218 }, { "epoch": 4.438, "grad_norm": 0.9438255429267883, "learning_rate": 2e-05, "loss": 0.02726935, "step": 2219 }, { "epoch": 4.44, "grad_norm": 1.442383050918579, "learning_rate": 2e-05, "loss": 0.04566254, "step": 2220 }, { "epoch": 4.442, "grad_norm": 0.9339209794998169, "learning_rate": 2e-05, "loss": 0.02968401, "step": 2221 }, { "epoch": 4.444, "grad_norm": 1.852805256843567, "learning_rate": 2e-05, "loss": 0.05942626, "step": 2222 }, { "epoch": 4.446, "grad_norm": 2.1776721477508545, "learning_rate": 2e-05, "loss": 0.06122681, "step": 2223 }, { "epoch": 4.448, "grad_norm": 1.7097100019454956, "learning_rate": 2e-05, "loss": 0.02926885, "step": 2224 }, { "epoch": 4.45, "grad_norm": 1.3947697877883911, "learning_rate": 2e-05, "loss": 0.03958159, "step": 2225 }, { "epoch": 4.452, "grad_norm": 1.4915926456451416, "learning_rate": 2e-05, "loss": 0.03779658, "step": 2226 }, { "epoch": 4.454, "grad_norm": 1.4534882307052612, "learning_rate": 2e-05, "loss": 0.02921861, "step": 2227 }, { "epoch": 4.456, "grad_norm": 1.5068776607513428, "learning_rate": 2e-05, "loss": 0.03718809, "step": 2228 }, { "epoch": 4.458, "grad_norm": 1.521750569343567, "learning_rate": 2e-05, "loss": 0.04906987, "step": 2229 }, { "epoch": 4.46, "grad_norm": 1.9581565856933594, "learning_rate": 2e-05, "loss": 0.04215892, "step": 2230 }, { "epoch": 4.462, "grad_norm": 1.3763012886047363, "learning_rate": 2e-05, "loss": 0.03585676, "step": 2231 }, { "epoch": 4.464, "grad_norm": 1.682054042816162, "learning_rate": 2e-05, "loss": 0.03892795, "step": 2232 }, { "epoch": 4.466, "grad_norm": 1.1888952255249023, "learning_rate": 2e-05, "loss": 0.02804256, "step": 2233 }, { "epoch": 4.468, "grad_norm": 1.0312952995300293, "learning_rate": 2e-05, "loss": 0.04241008, "step": 2234 }, { "epoch": 4.47, "grad_norm": 0.9027299284934998, "learning_rate": 2e-05, "loss": 0.02175211, "step": 2235 }, { "epoch": 4.4719999999999995, "grad_norm": 1.6123967170715332, "learning_rate": 2e-05, "loss": 0.04372674, "step": 2236 }, { "epoch": 4.474, "grad_norm": 1.2008719444274902, "learning_rate": 2e-05, "loss": 0.03309236, "step": 2237 }, { "epoch": 4.476, "grad_norm": 1.4866458177566528, "learning_rate": 2e-05, "loss": 0.04360201, "step": 2238 }, { "epoch": 4.478, "grad_norm": 1.139384150505066, "learning_rate": 2e-05, "loss": 0.02492278, "step": 2239 }, { "epoch": 4.48, "grad_norm": 1.314674973487854, "learning_rate": 2e-05, "loss": 0.03167442, "step": 2240 }, { "epoch": 4.482, "grad_norm": 1.1558375358581543, "learning_rate": 2e-05, "loss": 0.03370375, "step": 2241 }, { "epoch": 4.484, "grad_norm": 0.98810875415802, "learning_rate": 2e-05, "loss": 0.03298265, "step": 2242 }, { "epoch": 4.486, "grad_norm": 1.692529559135437, "learning_rate": 2e-05, "loss": 0.043402, "step": 2243 }, { "epoch": 4.4879999999999995, "grad_norm": 2.226050853729248, "learning_rate": 2e-05, "loss": 0.05941268, "step": 2244 }, { "epoch": 4.49, "grad_norm": 1.0783953666687012, "learning_rate": 2e-05, "loss": 0.02816807, "step": 2245 }, { "epoch": 4.492, "grad_norm": 1.9201335906982422, "learning_rate": 2e-05, "loss": 0.04565334, "step": 2246 }, { "epoch": 4.494, "grad_norm": 1.2199456691741943, "learning_rate": 2e-05, "loss": 0.03948256, "step": 2247 }, { "epoch": 4.496, "grad_norm": 2.3383970260620117, "learning_rate": 2e-05, "loss": 0.0555183, "step": 2248 }, { "epoch": 4.498, "grad_norm": 1.1793116331100464, "learning_rate": 2e-05, "loss": 0.03439267, "step": 2249 }, { "epoch": 4.5, "grad_norm": 1.0222777128219604, "learning_rate": 2e-05, "loss": 0.02550181, "step": 2250 }, { "epoch": 4.502, "grad_norm": 1.751338005065918, "learning_rate": 2e-05, "loss": 0.03421024, "step": 2251 }, { "epoch": 4.504, "grad_norm": 1.8420368432998657, "learning_rate": 2e-05, "loss": 0.04032079, "step": 2252 }, { "epoch": 4.506, "grad_norm": 1.5478731393814087, "learning_rate": 2e-05, "loss": 0.04113689, "step": 2253 }, { "epoch": 4.508, "grad_norm": 1.9176809787750244, "learning_rate": 2e-05, "loss": 0.03211041, "step": 2254 }, { "epoch": 4.51, "grad_norm": 1.356545090675354, "learning_rate": 2e-05, "loss": 0.03811408, "step": 2255 }, { "epoch": 4.5120000000000005, "grad_norm": 1.470837950706482, "learning_rate": 2e-05, "loss": 0.04642755, "step": 2256 }, { "epoch": 4.514, "grad_norm": 2.1132113933563232, "learning_rate": 2e-05, "loss": 0.06317526, "step": 2257 }, { "epoch": 4.516, "grad_norm": 1.2848460674285889, "learning_rate": 2e-05, "loss": 0.02209956, "step": 2258 }, { "epoch": 4.518, "grad_norm": 2.304513454437256, "learning_rate": 2e-05, "loss": 0.05873321, "step": 2259 }, { "epoch": 4.52, "grad_norm": 1.1547423601150513, "learning_rate": 2e-05, "loss": 0.03009026, "step": 2260 }, { "epoch": 4.522, "grad_norm": 2.2654247283935547, "learning_rate": 2e-05, "loss": 0.05305985, "step": 2261 }, { "epoch": 4.524, "grad_norm": 1.3709189891815186, "learning_rate": 2e-05, "loss": 0.0413008, "step": 2262 }, { "epoch": 4.526, "grad_norm": 1.2186251878738403, "learning_rate": 2e-05, "loss": 0.04457299, "step": 2263 }, { "epoch": 4.5280000000000005, "grad_norm": 1.8440145254135132, "learning_rate": 2e-05, "loss": 0.04306265, "step": 2264 }, { "epoch": 4.53, "grad_norm": 1.8175358772277832, "learning_rate": 2e-05, "loss": 0.04188886, "step": 2265 }, { "epoch": 4.532, "grad_norm": 1.7477566003799438, "learning_rate": 2e-05, "loss": 0.03470108, "step": 2266 }, { "epoch": 4.534, "grad_norm": 1.1122045516967773, "learning_rate": 2e-05, "loss": 0.03090529, "step": 2267 }, { "epoch": 4.536, "grad_norm": 1.6068907976150513, "learning_rate": 2e-05, "loss": 0.04337908, "step": 2268 }, { "epoch": 4.538, "grad_norm": 1.5025036334991455, "learning_rate": 2e-05, "loss": 0.04078036, "step": 2269 }, { "epoch": 4.54, "grad_norm": 1.6896750926971436, "learning_rate": 2e-05, "loss": 0.05480632, "step": 2270 }, { "epoch": 4.542, "grad_norm": 1.4953817129135132, "learning_rate": 2e-05, "loss": 0.02734038, "step": 2271 }, { "epoch": 4.5440000000000005, "grad_norm": 1.010339617729187, "learning_rate": 2e-05, "loss": 0.03597734, "step": 2272 }, { "epoch": 4.546, "grad_norm": 1.4324558973312378, "learning_rate": 2e-05, "loss": 0.03734388, "step": 2273 }, { "epoch": 4.548, "grad_norm": 1.7131267786026, "learning_rate": 2e-05, "loss": 0.050556, "step": 2274 }, { "epoch": 4.55, "grad_norm": 1.5647190809249878, "learning_rate": 2e-05, "loss": 0.03882205, "step": 2275 }, { "epoch": 4.552, "grad_norm": 1.6061151027679443, "learning_rate": 2e-05, "loss": 0.05493605, "step": 2276 }, { "epoch": 4.554, "grad_norm": 1.275571346282959, "learning_rate": 2e-05, "loss": 0.03493945, "step": 2277 }, { "epoch": 4.556, "grad_norm": 1.767574429512024, "learning_rate": 2e-05, "loss": 0.03032027, "step": 2278 }, { "epoch": 4.558, "grad_norm": 1.9915244579315186, "learning_rate": 2e-05, "loss": 0.04645093, "step": 2279 }, { "epoch": 4.5600000000000005, "grad_norm": 1.441829800605774, "learning_rate": 2e-05, "loss": 0.03383998, "step": 2280 }, { "epoch": 4.562, "grad_norm": 1.1743775606155396, "learning_rate": 2e-05, "loss": 0.03763331, "step": 2281 }, { "epoch": 4.564, "grad_norm": 1.7172895669937134, "learning_rate": 2e-05, "loss": 0.0662686, "step": 2282 }, { "epoch": 4.566, "grad_norm": 1.7259050607681274, "learning_rate": 2e-05, "loss": 0.03643933, "step": 2283 }, { "epoch": 4.568, "grad_norm": 1.2674235105514526, "learning_rate": 2e-05, "loss": 0.03521616, "step": 2284 }, { "epoch": 4.57, "grad_norm": 1.095694661140442, "learning_rate": 2e-05, "loss": 0.04045701, "step": 2285 }, { "epoch": 4.572, "grad_norm": 1.3978180885314941, "learning_rate": 2e-05, "loss": 0.03319689, "step": 2286 }, { "epoch": 4.574, "grad_norm": 1.4615137577056885, "learning_rate": 2e-05, "loss": 0.04723348, "step": 2287 }, { "epoch": 4.576, "grad_norm": 2.235830783843994, "learning_rate": 2e-05, "loss": 0.02766045, "step": 2288 }, { "epoch": 4.578, "grad_norm": 1.0262019634246826, "learning_rate": 2e-05, "loss": 0.02492511, "step": 2289 }, { "epoch": 4.58, "grad_norm": 2.410766363143921, "learning_rate": 2e-05, "loss": 0.03837668, "step": 2290 }, { "epoch": 4.582, "grad_norm": 1.3832848072052002, "learning_rate": 2e-05, "loss": 0.04708455, "step": 2291 }, { "epoch": 4.584, "grad_norm": 1.5111563205718994, "learning_rate": 2e-05, "loss": 0.03212856, "step": 2292 }, { "epoch": 4.586, "grad_norm": 1.8234524726867676, "learning_rate": 2e-05, "loss": 0.04155425, "step": 2293 }, { "epoch": 4.588, "grad_norm": 1.8563976287841797, "learning_rate": 2e-05, "loss": 0.05284409, "step": 2294 }, { "epoch": 4.59, "grad_norm": 1.0448572635650635, "learning_rate": 2e-05, "loss": 0.02735084, "step": 2295 }, { "epoch": 4.592, "grad_norm": 0.9727917909622192, "learning_rate": 2e-05, "loss": 0.03186521, "step": 2296 }, { "epoch": 4.594, "grad_norm": 1.7344650030136108, "learning_rate": 2e-05, "loss": 0.05732023, "step": 2297 }, { "epoch": 4.596, "grad_norm": 1.630303144454956, "learning_rate": 2e-05, "loss": 0.03979348, "step": 2298 }, { "epoch": 4.598, "grad_norm": 1.5773340463638306, "learning_rate": 2e-05, "loss": 0.04727698, "step": 2299 }, { "epoch": 4.6, "grad_norm": 1.4864786863327026, "learning_rate": 2e-05, "loss": 0.0243044, "step": 2300 }, { "epoch": 4.602, "grad_norm": 1.6385360956192017, "learning_rate": 2e-05, "loss": 0.04001735, "step": 2301 }, { "epoch": 4.604, "grad_norm": 0.9057992696762085, "learning_rate": 2e-05, "loss": 0.02840018, "step": 2302 }, { "epoch": 4.606, "grad_norm": 2.1752712726593018, "learning_rate": 2e-05, "loss": 0.02728005, "step": 2303 }, { "epoch": 4.608, "grad_norm": 1.4201445579528809, "learning_rate": 2e-05, "loss": 0.03847256, "step": 2304 }, { "epoch": 4.61, "grad_norm": 1.6967096328735352, "learning_rate": 2e-05, "loss": 0.03130513, "step": 2305 }, { "epoch": 4.612, "grad_norm": 2.1925299167633057, "learning_rate": 2e-05, "loss": 0.04984703, "step": 2306 }, { "epoch": 4.614, "grad_norm": 1.6784030199050903, "learning_rate": 2e-05, "loss": 0.03006441, "step": 2307 }, { "epoch": 4.616, "grad_norm": 1.3622230291366577, "learning_rate": 2e-05, "loss": 0.03182603, "step": 2308 }, { "epoch": 4.618, "grad_norm": 1.5717957019805908, "learning_rate": 2e-05, "loss": 0.03701077, "step": 2309 }, { "epoch": 4.62, "grad_norm": 1.0062084197998047, "learning_rate": 2e-05, "loss": 0.0248526, "step": 2310 }, { "epoch": 4.622, "grad_norm": 2.1822187900543213, "learning_rate": 2e-05, "loss": 0.03146263, "step": 2311 }, { "epoch": 4.624, "grad_norm": 3.6029553413391113, "learning_rate": 2e-05, "loss": 0.06397329, "step": 2312 }, { "epoch": 4.626, "grad_norm": 2.1496500968933105, "learning_rate": 2e-05, "loss": 0.03725152, "step": 2313 }, { "epoch": 4.628, "grad_norm": 1.8497250080108643, "learning_rate": 2e-05, "loss": 0.03161447, "step": 2314 }, { "epoch": 4.63, "grad_norm": 1.3184853792190552, "learning_rate": 2e-05, "loss": 0.03692345, "step": 2315 }, { "epoch": 4.632, "grad_norm": 1.4427586793899536, "learning_rate": 2e-05, "loss": 0.02692084, "step": 2316 }, { "epoch": 4.634, "grad_norm": 1.8580230474472046, "learning_rate": 2e-05, "loss": 0.0654839, "step": 2317 }, { "epoch": 4.636, "grad_norm": 1.1471973657608032, "learning_rate": 2e-05, "loss": 0.03153278, "step": 2318 }, { "epoch": 4.638, "grad_norm": 1.2644822597503662, "learning_rate": 2e-05, "loss": 0.04941096, "step": 2319 }, { "epoch": 4.64, "grad_norm": 2.7720656394958496, "learning_rate": 2e-05, "loss": 0.05895547, "step": 2320 }, { "epoch": 4.642, "grad_norm": 1.2557971477508545, "learning_rate": 2e-05, "loss": 0.04365244, "step": 2321 }, { "epoch": 4.644, "grad_norm": 1.6720325946807861, "learning_rate": 2e-05, "loss": 0.05442552, "step": 2322 }, { "epoch": 4.646, "grad_norm": 1.0071033239364624, "learning_rate": 2e-05, "loss": 0.02480928, "step": 2323 }, { "epoch": 4.648, "grad_norm": 1.183469295501709, "learning_rate": 2e-05, "loss": 0.03799593, "step": 2324 }, { "epoch": 4.65, "grad_norm": 1.3475804328918457, "learning_rate": 2e-05, "loss": 0.05035153, "step": 2325 }, { "epoch": 4.652, "grad_norm": 2.5962164402008057, "learning_rate": 2e-05, "loss": 0.04414929, "step": 2326 }, { "epoch": 4.654, "grad_norm": 1.1905382871627808, "learning_rate": 2e-05, "loss": 0.04276285, "step": 2327 }, { "epoch": 4.656, "grad_norm": 1.5951045751571655, "learning_rate": 2e-05, "loss": 0.03594982, "step": 2328 }, { "epoch": 4.658, "grad_norm": 1.6772533655166626, "learning_rate": 2e-05, "loss": 0.05392138, "step": 2329 }, { "epoch": 4.66, "grad_norm": 1.5733336210250854, "learning_rate": 2e-05, "loss": 0.03919678, "step": 2330 }, { "epoch": 4.662, "grad_norm": 1.1291429996490479, "learning_rate": 2e-05, "loss": 0.02948717, "step": 2331 }, { "epoch": 4.664, "grad_norm": 1.38038969039917, "learning_rate": 2e-05, "loss": 0.04529754, "step": 2332 }, { "epoch": 4.666, "grad_norm": 1.4430439472198486, "learning_rate": 2e-05, "loss": 0.02968345, "step": 2333 }, { "epoch": 4.668, "grad_norm": 1.0805678367614746, "learning_rate": 2e-05, "loss": 0.03496836, "step": 2334 }, { "epoch": 4.67, "grad_norm": 1.4857618808746338, "learning_rate": 2e-05, "loss": 0.04253389, "step": 2335 }, { "epoch": 4.672, "grad_norm": 1.8443632125854492, "learning_rate": 2e-05, "loss": 0.03913691, "step": 2336 }, { "epoch": 4.674, "grad_norm": 1.596656084060669, "learning_rate": 2e-05, "loss": 0.03494718, "step": 2337 }, { "epoch": 4.676, "grad_norm": 2.0523974895477295, "learning_rate": 2e-05, "loss": 0.05468136, "step": 2338 }, { "epoch": 4.678, "grad_norm": 1.5666359663009644, "learning_rate": 2e-05, "loss": 0.0429353, "step": 2339 }, { "epoch": 4.68, "grad_norm": 1.299415946006775, "learning_rate": 2e-05, "loss": 0.02959892, "step": 2340 }, { "epoch": 4.682, "grad_norm": 1.6929570436477661, "learning_rate": 2e-05, "loss": 0.04222934, "step": 2341 }, { "epoch": 4.684, "grad_norm": 2.128430128097534, "learning_rate": 2e-05, "loss": 0.04607785, "step": 2342 }, { "epoch": 4.686, "grad_norm": 2.121476650238037, "learning_rate": 2e-05, "loss": 0.04864143, "step": 2343 }, { "epoch": 4.688, "grad_norm": 2.3001673221588135, "learning_rate": 2e-05, "loss": 0.04368895, "step": 2344 }, { "epoch": 4.6899999999999995, "grad_norm": 1.804023265838623, "learning_rate": 2e-05, "loss": 0.03963099, "step": 2345 }, { "epoch": 4.692, "grad_norm": 1.1703206300735474, "learning_rate": 2e-05, "loss": 0.03038155, "step": 2346 }, { "epoch": 4.694, "grad_norm": 1.123602032661438, "learning_rate": 2e-05, "loss": 0.04947871, "step": 2347 }, { "epoch": 4.696, "grad_norm": 1.3002231121063232, "learning_rate": 2e-05, "loss": 0.03265772, "step": 2348 }, { "epoch": 4.698, "grad_norm": 1.3386176824569702, "learning_rate": 2e-05, "loss": 0.0319346, "step": 2349 }, { "epoch": 4.7, "grad_norm": 1.3578490018844604, "learning_rate": 2e-05, "loss": 0.0321762, "step": 2350 }, { "epoch": 4.702, "grad_norm": 2.0441887378692627, "learning_rate": 2e-05, "loss": 0.0398183, "step": 2351 }, { "epoch": 4.704, "grad_norm": 1.819500207901001, "learning_rate": 2e-05, "loss": 0.03708476, "step": 2352 }, { "epoch": 4.7059999999999995, "grad_norm": 1.4998730421066284, "learning_rate": 2e-05, "loss": 0.03444158, "step": 2353 }, { "epoch": 4.708, "grad_norm": 1.3147387504577637, "learning_rate": 2e-05, "loss": 0.02567411, "step": 2354 }, { "epoch": 4.71, "grad_norm": 1.0613209009170532, "learning_rate": 2e-05, "loss": 0.02812085, "step": 2355 }, { "epoch": 4.712, "grad_norm": 1.6744329929351807, "learning_rate": 2e-05, "loss": 0.04626666, "step": 2356 }, { "epoch": 4.714, "grad_norm": 1.8703510761260986, "learning_rate": 2e-05, "loss": 0.05195956, "step": 2357 }, { "epoch": 4.716, "grad_norm": 1.7580175399780273, "learning_rate": 2e-05, "loss": 0.03270845, "step": 2358 }, { "epoch": 4.718, "grad_norm": 1.9400629997253418, "learning_rate": 2e-05, "loss": 0.05601585, "step": 2359 }, { "epoch": 4.72, "grad_norm": 1.4302650690078735, "learning_rate": 2e-05, "loss": 0.03492381, "step": 2360 }, { "epoch": 4.7219999999999995, "grad_norm": 2.32159161567688, "learning_rate": 2e-05, "loss": 0.04457737, "step": 2361 }, { "epoch": 4.724, "grad_norm": 3.4112353324890137, "learning_rate": 2e-05, "loss": 0.08344968, "step": 2362 }, { "epoch": 4.726, "grad_norm": 1.6217650175094604, "learning_rate": 2e-05, "loss": 0.02905689, "step": 2363 }, { "epoch": 4.728, "grad_norm": 1.6228758096694946, "learning_rate": 2e-05, "loss": 0.04028218, "step": 2364 }, { "epoch": 4.73, "grad_norm": 1.1981812715530396, "learning_rate": 2e-05, "loss": 0.04487208, "step": 2365 }, { "epoch": 4.732, "grad_norm": 1.7020169496536255, "learning_rate": 2e-05, "loss": 0.0399951, "step": 2366 }, { "epoch": 4.734, "grad_norm": 1.3388174772262573, "learning_rate": 2e-05, "loss": 0.04234082, "step": 2367 }, { "epoch": 4.736, "grad_norm": 1.5462074279785156, "learning_rate": 2e-05, "loss": 0.0329011, "step": 2368 }, { "epoch": 4.7379999999999995, "grad_norm": 1.1013768911361694, "learning_rate": 2e-05, "loss": 0.03339402, "step": 2369 }, { "epoch": 4.74, "grad_norm": 1.4851659536361694, "learning_rate": 2e-05, "loss": 0.03826153, "step": 2370 }, { "epoch": 4.742, "grad_norm": 1.2620880603790283, "learning_rate": 2e-05, "loss": 0.03477462, "step": 2371 }, { "epoch": 4.744, "grad_norm": 1.2920024394989014, "learning_rate": 2e-05, "loss": 0.04607169, "step": 2372 }, { "epoch": 4.746, "grad_norm": 1.521671175956726, "learning_rate": 2e-05, "loss": 0.04394786, "step": 2373 }, { "epoch": 4.748, "grad_norm": 1.4648962020874023, "learning_rate": 2e-05, "loss": 0.04500762, "step": 2374 }, { "epoch": 4.75, "grad_norm": 1.8178099393844604, "learning_rate": 2e-05, "loss": 0.03826898, "step": 2375 }, { "epoch": 4.752, "grad_norm": 0.9506421685218811, "learning_rate": 2e-05, "loss": 0.03018279, "step": 2376 }, { "epoch": 4.754, "grad_norm": 1.1572425365447998, "learning_rate": 2e-05, "loss": 0.03146629, "step": 2377 }, { "epoch": 4.756, "grad_norm": 0.9127270579338074, "learning_rate": 2e-05, "loss": 0.0217318, "step": 2378 }, { "epoch": 4.758, "grad_norm": 1.051289677619934, "learning_rate": 2e-05, "loss": 0.03464397, "step": 2379 }, { "epoch": 4.76, "grad_norm": 2.1047472953796387, "learning_rate": 2e-05, "loss": 0.05494668, "step": 2380 }, { "epoch": 4.7620000000000005, "grad_norm": 1.4690008163452148, "learning_rate": 2e-05, "loss": 0.02600802, "step": 2381 }, { "epoch": 4.764, "grad_norm": 1.9520142078399658, "learning_rate": 2e-05, "loss": 0.03388739, "step": 2382 }, { "epoch": 4.766, "grad_norm": 2.3827433586120605, "learning_rate": 2e-05, "loss": 0.0476454, "step": 2383 }, { "epoch": 4.768, "grad_norm": 1.4814623594284058, "learning_rate": 2e-05, "loss": 0.03822245, "step": 2384 }, { "epoch": 4.77, "grad_norm": 2.171255350112915, "learning_rate": 2e-05, "loss": 0.06004865, "step": 2385 }, { "epoch": 4.772, "grad_norm": 1.3165963888168335, "learning_rate": 2e-05, "loss": 0.03820898, "step": 2386 }, { "epoch": 4.774, "grad_norm": 1.2138549089431763, "learning_rate": 2e-05, "loss": 0.03760416, "step": 2387 }, { "epoch": 4.776, "grad_norm": 1.4797013998031616, "learning_rate": 2e-05, "loss": 0.03296691, "step": 2388 }, { "epoch": 4.7780000000000005, "grad_norm": 1.994217038154602, "learning_rate": 2e-05, "loss": 0.05951263, "step": 2389 }, { "epoch": 4.78, "grad_norm": 1.6225087642669678, "learning_rate": 2e-05, "loss": 0.03843221, "step": 2390 }, { "epoch": 4.782, "grad_norm": 1.264367938041687, "learning_rate": 2e-05, "loss": 0.03937426, "step": 2391 }, { "epoch": 4.784, "grad_norm": 2.4877493381500244, "learning_rate": 2e-05, "loss": 0.04250683, "step": 2392 }, { "epoch": 4.786, "grad_norm": 1.6016268730163574, "learning_rate": 2e-05, "loss": 0.03200974, "step": 2393 }, { "epoch": 4.788, "grad_norm": 1.2733501195907593, "learning_rate": 2e-05, "loss": 0.02490179, "step": 2394 }, { "epoch": 4.79, "grad_norm": 2.5966386795043945, "learning_rate": 2e-05, "loss": 0.04996984, "step": 2395 }, { "epoch": 4.792, "grad_norm": 1.5662956237792969, "learning_rate": 2e-05, "loss": 0.05331601, "step": 2396 }, { "epoch": 4.7940000000000005, "grad_norm": 1.7879033088684082, "learning_rate": 2e-05, "loss": 0.04639386, "step": 2397 }, { "epoch": 4.796, "grad_norm": 1.5951218605041504, "learning_rate": 2e-05, "loss": 0.04075851, "step": 2398 }, { "epoch": 4.798, "grad_norm": 1.3353394269943237, "learning_rate": 2e-05, "loss": 0.03567629, "step": 2399 }, { "epoch": 4.8, "grad_norm": 1.5784703493118286, "learning_rate": 2e-05, "loss": 0.0398753, "step": 2400 }, { "epoch": 4.802, "grad_norm": 1.7636374235153198, "learning_rate": 2e-05, "loss": 0.03577316, "step": 2401 }, { "epoch": 4.804, "grad_norm": 2.104825496673584, "learning_rate": 2e-05, "loss": 0.05970087, "step": 2402 }, { "epoch": 4.806, "grad_norm": 1.0066531896591187, "learning_rate": 2e-05, "loss": 0.03378828, "step": 2403 }, { "epoch": 4.808, "grad_norm": 1.1586098670959473, "learning_rate": 2e-05, "loss": 0.03477071, "step": 2404 }, { "epoch": 4.8100000000000005, "grad_norm": 1.9183125495910645, "learning_rate": 2e-05, "loss": 0.04798802, "step": 2405 }, { "epoch": 4.812, "grad_norm": 1.2498745918273926, "learning_rate": 2e-05, "loss": 0.04589451, "step": 2406 }, { "epoch": 4.814, "grad_norm": 2.4411637783050537, "learning_rate": 2e-05, "loss": 0.06537707, "step": 2407 }, { "epoch": 4.816, "grad_norm": 1.692878007888794, "learning_rate": 2e-05, "loss": 0.02775835, "step": 2408 }, { "epoch": 4.818, "grad_norm": 1.5488640069961548, "learning_rate": 2e-05, "loss": 0.04157056, "step": 2409 }, { "epoch": 4.82, "grad_norm": 1.456628680229187, "learning_rate": 2e-05, "loss": 0.04453836, "step": 2410 }, { "epoch": 4.822, "grad_norm": 1.2620590925216675, "learning_rate": 2e-05, "loss": 0.04159302, "step": 2411 }, { "epoch": 4.824, "grad_norm": 1.6489261388778687, "learning_rate": 2e-05, "loss": 0.03650364, "step": 2412 }, { "epoch": 4.826, "grad_norm": 1.3085451126098633, "learning_rate": 2e-05, "loss": 0.03413782, "step": 2413 }, { "epoch": 4.828, "grad_norm": 1.924852967262268, "learning_rate": 2e-05, "loss": 0.06830163, "step": 2414 }, { "epoch": 4.83, "grad_norm": 1.020704746246338, "learning_rate": 2e-05, "loss": 0.02394637, "step": 2415 }, { "epoch": 4.832, "grad_norm": 1.132530689239502, "learning_rate": 2e-05, "loss": 0.0420993, "step": 2416 }, { "epoch": 4.834, "grad_norm": 0.9570915102958679, "learning_rate": 2e-05, "loss": 0.0311062, "step": 2417 }, { "epoch": 4.836, "grad_norm": 1.0534124374389648, "learning_rate": 2e-05, "loss": 0.02990963, "step": 2418 }, { "epoch": 4.838, "grad_norm": 1.1117393970489502, "learning_rate": 2e-05, "loss": 0.03667744, "step": 2419 }, { "epoch": 4.84, "grad_norm": 1.32503342628479, "learning_rate": 2e-05, "loss": 0.03586256, "step": 2420 }, { "epoch": 4.842, "grad_norm": 1.5087800025939941, "learning_rate": 2e-05, "loss": 0.03501411, "step": 2421 }, { "epoch": 4.844, "grad_norm": 1.7611730098724365, "learning_rate": 2e-05, "loss": 0.03907187, "step": 2422 }, { "epoch": 4.846, "grad_norm": 1.3522123098373413, "learning_rate": 2e-05, "loss": 0.04923314, "step": 2423 }, { "epoch": 4.848, "grad_norm": 1.3680315017700195, "learning_rate": 2e-05, "loss": 0.03453797, "step": 2424 }, { "epoch": 4.85, "grad_norm": 1.0860910415649414, "learning_rate": 2e-05, "loss": 0.03305909, "step": 2425 }, { "epoch": 4.852, "grad_norm": 1.2761231660842896, "learning_rate": 2e-05, "loss": 0.03872238, "step": 2426 }, { "epoch": 4.854, "grad_norm": 1.7196176052093506, "learning_rate": 2e-05, "loss": 0.03280783, "step": 2427 }, { "epoch": 4.856, "grad_norm": 1.5164096355438232, "learning_rate": 2e-05, "loss": 0.03744507, "step": 2428 }, { "epoch": 4.858, "grad_norm": 1.294891119003296, "learning_rate": 2e-05, "loss": 0.03750362, "step": 2429 }, { "epoch": 4.86, "grad_norm": 1.5655124187469482, "learning_rate": 2e-05, "loss": 0.03739174, "step": 2430 }, { "epoch": 4.862, "grad_norm": 1.806402325630188, "learning_rate": 2e-05, "loss": 0.04592914, "step": 2431 }, { "epoch": 4.864, "grad_norm": 2.5879619121551514, "learning_rate": 2e-05, "loss": 0.03984898, "step": 2432 }, { "epoch": 4.866, "grad_norm": 2.590233564376831, "learning_rate": 2e-05, "loss": 0.03624843, "step": 2433 }, { "epoch": 4.868, "grad_norm": 1.4250051975250244, "learning_rate": 2e-05, "loss": 0.06627299, "step": 2434 }, { "epoch": 4.87, "grad_norm": 2.142298936843872, "learning_rate": 2e-05, "loss": 0.04694505, "step": 2435 }, { "epoch": 4.872, "grad_norm": 1.6512330770492554, "learning_rate": 2e-05, "loss": 0.04675486, "step": 2436 }, { "epoch": 4.874, "grad_norm": 1.1466740369796753, "learning_rate": 2e-05, "loss": 0.0404415, "step": 2437 }, { "epoch": 4.876, "grad_norm": 1.163206934928894, "learning_rate": 2e-05, "loss": 0.04484642, "step": 2438 }, { "epoch": 4.878, "grad_norm": 1.4623184204101562, "learning_rate": 2e-05, "loss": 0.0501948, "step": 2439 }, { "epoch": 4.88, "grad_norm": 1.4037455320358276, "learning_rate": 2e-05, "loss": 0.03190558, "step": 2440 }, { "epoch": 4.882, "grad_norm": 1.6567379236221313, "learning_rate": 2e-05, "loss": 0.03677463, "step": 2441 }, { "epoch": 4.884, "grad_norm": 1.5126616954803467, "learning_rate": 2e-05, "loss": 0.03623777, "step": 2442 }, { "epoch": 4.886, "grad_norm": 1.434916615486145, "learning_rate": 2e-05, "loss": 0.04461944, "step": 2443 }, { "epoch": 4.888, "grad_norm": 1.0425275564193726, "learning_rate": 2e-05, "loss": 0.03600231, "step": 2444 }, { "epoch": 4.89, "grad_norm": 1.3526657819747925, "learning_rate": 2e-05, "loss": 0.05250793, "step": 2445 }, { "epoch": 4.892, "grad_norm": 2.3579368591308594, "learning_rate": 2e-05, "loss": 0.04716105, "step": 2446 }, { "epoch": 4.894, "grad_norm": 2.2220516204833984, "learning_rate": 2e-05, "loss": 0.04456067, "step": 2447 }, { "epoch": 4.896, "grad_norm": 1.631173014640808, "learning_rate": 2e-05, "loss": 0.04533452, "step": 2448 }, { "epoch": 4.898, "grad_norm": 1.5379027128219604, "learning_rate": 2e-05, "loss": 0.03291882, "step": 2449 }, { "epoch": 4.9, "grad_norm": 1.6118535995483398, "learning_rate": 2e-05, "loss": 0.05840709, "step": 2450 }, { "epoch": 4.902, "grad_norm": 1.0765273571014404, "learning_rate": 2e-05, "loss": 0.03540622, "step": 2451 }, { "epoch": 4.904, "grad_norm": 1.0575835704803467, "learning_rate": 2e-05, "loss": 0.02790409, "step": 2452 }, { "epoch": 4.906, "grad_norm": 0.8009814023971558, "learning_rate": 2e-05, "loss": 0.02813254, "step": 2453 }, { "epoch": 4.908, "grad_norm": 1.1870671510696411, "learning_rate": 2e-05, "loss": 0.0324744, "step": 2454 }, { "epoch": 4.91, "grad_norm": 1.583187460899353, "learning_rate": 2e-05, "loss": 0.0449949, "step": 2455 }, { "epoch": 4.912, "grad_norm": 0.8084712028503418, "learning_rate": 2e-05, "loss": 0.02643416, "step": 2456 }, { "epoch": 4.914, "grad_norm": 1.0200517177581787, "learning_rate": 2e-05, "loss": 0.02963091, "step": 2457 }, { "epoch": 4.916, "grad_norm": 1.270112156867981, "learning_rate": 2e-05, "loss": 0.04167578, "step": 2458 }, { "epoch": 4.918, "grad_norm": 1.7084966897964478, "learning_rate": 2e-05, "loss": 0.0530615, "step": 2459 }, { "epoch": 4.92, "grad_norm": 0.9616237282752991, "learning_rate": 2e-05, "loss": 0.03146474, "step": 2460 }, { "epoch": 4.922, "grad_norm": 1.698949933052063, "learning_rate": 2e-05, "loss": 0.04134315, "step": 2461 }, { "epoch": 4.924, "grad_norm": 1.3575299978256226, "learning_rate": 2e-05, "loss": 0.04107556, "step": 2462 }, { "epoch": 4.926, "grad_norm": 1.2565886974334717, "learning_rate": 2e-05, "loss": 0.02181477, "step": 2463 }, { "epoch": 4.928, "grad_norm": 0.7854299545288086, "learning_rate": 2e-05, "loss": 0.0208516, "step": 2464 }, { "epoch": 4.93, "grad_norm": 1.0150772333145142, "learning_rate": 2e-05, "loss": 0.03809316, "step": 2465 }, { "epoch": 4.932, "grad_norm": 1.4074971675872803, "learning_rate": 2e-05, "loss": 0.04282606, "step": 2466 }, { "epoch": 4.934, "grad_norm": 1.378995418548584, "learning_rate": 2e-05, "loss": 0.0498643, "step": 2467 }, { "epoch": 4.936, "grad_norm": 1.3473279476165771, "learning_rate": 2e-05, "loss": 0.04881741, "step": 2468 }, { "epoch": 4.938, "grad_norm": 1.5778177976608276, "learning_rate": 2e-05, "loss": 0.05556468, "step": 2469 }, { "epoch": 4.9399999999999995, "grad_norm": 1.3268343210220337, "learning_rate": 2e-05, "loss": 0.04513773, "step": 2470 }, { "epoch": 4.942, "grad_norm": 1.551581621170044, "learning_rate": 2e-05, "loss": 0.04969638, "step": 2471 }, { "epoch": 4.944, "grad_norm": 1.280708909034729, "learning_rate": 2e-05, "loss": 0.04666021, "step": 2472 }, { "epoch": 4.946, "grad_norm": 1.6760709285736084, "learning_rate": 2e-05, "loss": 0.04832494, "step": 2473 }, { "epoch": 4.948, "grad_norm": 1.4590950012207031, "learning_rate": 2e-05, "loss": 0.05018194, "step": 2474 }, { "epoch": 4.95, "grad_norm": 1.3153926134109497, "learning_rate": 2e-05, "loss": 0.03609195, "step": 2475 }, { "epoch": 4.952, "grad_norm": 2.7595255374908447, "learning_rate": 2e-05, "loss": 0.02830896, "step": 2476 }, { "epoch": 4.954, "grad_norm": 1.0296415090560913, "learning_rate": 2e-05, "loss": 0.02910339, "step": 2477 }, { "epoch": 4.9559999999999995, "grad_norm": 1.205686330795288, "learning_rate": 2e-05, "loss": 0.02493732, "step": 2478 }, { "epoch": 4.958, "grad_norm": 1.3554532527923584, "learning_rate": 2e-05, "loss": 0.03051371, "step": 2479 }, { "epoch": 4.96, "grad_norm": 1.8102985620498657, "learning_rate": 2e-05, "loss": 0.05402344, "step": 2480 }, { "epoch": 4.962, "grad_norm": 1.090096116065979, "learning_rate": 2e-05, "loss": 0.04083726, "step": 2481 }, { "epoch": 4.964, "grad_norm": 1.3355975151062012, "learning_rate": 2e-05, "loss": 0.0298448, "step": 2482 }, { "epoch": 4.966, "grad_norm": 1.1990944147109985, "learning_rate": 2e-05, "loss": 0.04024744, "step": 2483 }, { "epoch": 4.968, "grad_norm": 1.599819540977478, "learning_rate": 2e-05, "loss": 0.04165243, "step": 2484 }, { "epoch": 4.97, "grad_norm": 1.154665470123291, "learning_rate": 2e-05, "loss": 0.03814416, "step": 2485 }, { "epoch": 4.9719999999999995, "grad_norm": 1.7538753747940063, "learning_rate": 2e-05, "loss": 0.04323652, "step": 2486 }, { "epoch": 4.974, "grad_norm": 1.2068040370941162, "learning_rate": 2e-05, "loss": 0.02646199, "step": 2487 }, { "epoch": 4.976, "grad_norm": 1.9645920991897583, "learning_rate": 2e-05, "loss": 0.04496079, "step": 2488 }, { "epoch": 4.978, "grad_norm": 1.4738874435424805, "learning_rate": 2e-05, "loss": 0.04259995, "step": 2489 }, { "epoch": 4.98, "grad_norm": 1.1883742809295654, "learning_rate": 2e-05, "loss": 0.02651443, "step": 2490 }, { "epoch": 4.982, "grad_norm": 1.4614838361740112, "learning_rate": 2e-05, "loss": 0.0321265, "step": 2491 }, { "epoch": 4.984, "grad_norm": 2.184630870819092, "learning_rate": 2e-05, "loss": 0.03757413, "step": 2492 }, { "epoch": 4.986, "grad_norm": 1.9842206239700317, "learning_rate": 2e-05, "loss": 0.05184003, "step": 2493 }, { "epoch": 4.9879999999999995, "grad_norm": 3.535095453262329, "learning_rate": 2e-05, "loss": 0.04131281, "step": 2494 }, { "epoch": 4.99, "grad_norm": 2.6788275241851807, "learning_rate": 2e-05, "loss": 0.04076681, "step": 2495 }, { "epoch": 4.992, "grad_norm": 1.9538054466247559, "learning_rate": 2e-05, "loss": 0.03534822, "step": 2496 }, { "epoch": 4.994, "grad_norm": 2.29728364944458, "learning_rate": 2e-05, "loss": 0.0576341, "step": 2497 }, { "epoch": 4.996, "grad_norm": 1.8153451681137085, "learning_rate": 2e-05, "loss": 0.04025466, "step": 2498 }, { "epoch": 4.998, "grad_norm": 2.0112109184265137, "learning_rate": 2e-05, "loss": 0.05562697, "step": 2499 }, { "epoch": 5.0, "grad_norm": 1.1833168268203735, "learning_rate": 2e-05, "loss": 0.04619126, "step": 2500 }, { "epoch": 5.0, "eval_performance": { "AngleClassification_1": 0.988, "AngleClassification_2": 0.99, "AngleClassification_3": 0.7265469061876247, "Equal_1": 0.956, "Equal_2": 0.8602794411177644, "Equal_3": 0.7544910179640718, "LineComparison_1": 1.0, "LineComparison_2": 0.9900199600798403, "LineComparison_3": 0.9481037924151696, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.9, "Perpendicular_1": 0.974, "Perpendicular_2": 0.638, "Perpendicular_3": 0.38577154308617234, "PointLiesOnCircle_1": 0.9966599866399466, "PointLiesOnCircle_2": 0.9953333333333334, "PointLiesOnCircle_3": 0.9768, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9819639278557114, "PointLiesOnLine_3": 0.6207584830339321 }, "eval_runtime": 320.4044, "eval_samples_per_second": 32.771, "eval_steps_per_second": 0.655, "step": 2500 }, { "epoch": 5.002, "grad_norm": 2.2324397563934326, "learning_rate": 2e-05, "loss": 0.05695269, "step": 2501 }, { "epoch": 5.004, "grad_norm": 1.6174829006195068, "learning_rate": 2e-05, "loss": 0.05285802, "step": 2502 }, { "epoch": 5.006, "grad_norm": 2.227994680404663, "learning_rate": 2e-05, "loss": 0.08210929, "step": 2503 }, { "epoch": 5.008, "grad_norm": 2.022165298461914, "learning_rate": 2e-05, "loss": 0.0481383, "step": 2504 }, { "epoch": 5.01, "grad_norm": 1.2331695556640625, "learning_rate": 2e-05, "loss": 0.03883846, "step": 2505 }, { "epoch": 5.012, "grad_norm": 2.4110569953918457, "learning_rate": 2e-05, "loss": 0.0675078, "step": 2506 }, { "epoch": 5.014, "grad_norm": 1.4140784740447998, "learning_rate": 2e-05, "loss": 0.02794993, "step": 2507 }, { "epoch": 5.016, "grad_norm": 1.0707343816757202, "learning_rate": 2e-05, "loss": 0.02114813, "step": 2508 }, { "epoch": 5.018, "grad_norm": 1.7338379621505737, "learning_rate": 2e-05, "loss": 0.04929041, "step": 2509 }, { "epoch": 5.02, "grad_norm": 2.7071118354797363, "learning_rate": 2e-05, "loss": 0.06345211, "step": 2510 }, { "epoch": 5.022, "grad_norm": 1.2723110914230347, "learning_rate": 2e-05, "loss": 0.04290574, "step": 2511 }, { "epoch": 5.024, "grad_norm": 2.29909348487854, "learning_rate": 2e-05, "loss": 0.05433185, "step": 2512 }, { "epoch": 5.026, "grad_norm": 1.5173856019973755, "learning_rate": 2e-05, "loss": 0.03184156, "step": 2513 }, { "epoch": 5.028, "grad_norm": 1.4544750452041626, "learning_rate": 2e-05, "loss": 0.03113187, "step": 2514 }, { "epoch": 5.03, "grad_norm": 1.0467116832733154, "learning_rate": 2e-05, "loss": 0.02658311, "step": 2515 }, { "epoch": 5.032, "grad_norm": 1.4478943347930908, "learning_rate": 2e-05, "loss": 0.04346019, "step": 2516 }, { "epoch": 5.034, "grad_norm": 1.587401270866394, "learning_rate": 2e-05, "loss": 0.04132868, "step": 2517 }, { "epoch": 5.036, "grad_norm": 1.247644305229187, "learning_rate": 2e-05, "loss": 0.03109686, "step": 2518 }, { "epoch": 5.038, "grad_norm": 1.2669824361801147, "learning_rate": 2e-05, "loss": 0.02979152, "step": 2519 }, { "epoch": 5.04, "grad_norm": 2.6262893676757812, "learning_rate": 2e-05, "loss": 0.06778932, "step": 2520 }, { "epoch": 5.042, "grad_norm": 1.6838678121566772, "learning_rate": 2e-05, "loss": 0.06396025, "step": 2521 }, { "epoch": 5.044, "grad_norm": 1.3168069124221802, "learning_rate": 2e-05, "loss": 0.02535046, "step": 2522 }, { "epoch": 5.046, "grad_norm": 2.7136900424957275, "learning_rate": 2e-05, "loss": 0.04555402, "step": 2523 }, { "epoch": 5.048, "grad_norm": 1.461851716041565, "learning_rate": 2e-05, "loss": 0.03833253, "step": 2524 }, { "epoch": 5.05, "grad_norm": 1.8288956880569458, "learning_rate": 2e-05, "loss": 0.03891191, "step": 2525 }, { "epoch": 5.052, "grad_norm": 1.1910473108291626, "learning_rate": 2e-05, "loss": 0.03411533, "step": 2526 }, { "epoch": 5.054, "grad_norm": 1.5257526636123657, "learning_rate": 2e-05, "loss": 0.04626542, "step": 2527 }, { "epoch": 5.056, "grad_norm": 1.2953051328659058, "learning_rate": 2e-05, "loss": 0.02808543, "step": 2528 }, { "epoch": 5.058, "grad_norm": 1.4993410110473633, "learning_rate": 2e-05, "loss": 0.04547715, "step": 2529 }, { "epoch": 5.06, "grad_norm": 2.6157491207122803, "learning_rate": 2e-05, "loss": 0.09757001, "step": 2530 }, { "epoch": 5.062, "grad_norm": 2.16626238822937, "learning_rate": 2e-05, "loss": 0.05185439, "step": 2531 }, { "epoch": 5.064, "grad_norm": 5.067115783691406, "learning_rate": 2e-05, "loss": 0.04093373, "step": 2532 }, { "epoch": 5.066, "grad_norm": 3.5406131744384766, "learning_rate": 2e-05, "loss": 0.05016257, "step": 2533 }, { "epoch": 5.068, "grad_norm": 1.9250370264053345, "learning_rate": 2e-05, "loss": 0.04495588, "step": 2534 }, { "epoch": 5.07, "grad_norm": 1.3364894390106201, "learning_rate": 2e-05, "loss": 0.05048854, "step": 2535 }, { "epoch": 5.072, "grad_norm": 1.1228950023651123, "learning_rate": 2e-05, "loss": 0.03420167, "step": 2536 }, { "epoch": 5.074, "grad_norm": 2.442943572998047, "learning_rate": 2e-05, "loss": 0.04751071, "step": 2537 }, { "epoch": 5.076, "grad_norm": 1.1549925804138184, "learning_rate": 2e-05, "loss": 0.03477682, "step": 2538 }, { "epoch": 5.078, "grad_norm": 1.890607237815857, "learning_rate": 2e-05, "loss": 0.04445625, "step": 2539 }, { "epoch": 5.08, "grad_norm": 1.524712324142456, "learning_rate": 2e-05, "loss": 0.04532345, "step": 2540 }, { "epoch": 5.082, "grad_norm": 1.9938740730285645, "learning_rate": 2e-05, "loss": 0.04844673, "step": 2541 }, { "epoch": 5.084, "grad_norm": 2.3578040599823, "learning_rate": 2e-05, "loss": 0.06914338, "step": 2542 }, { "epoch": 5.086, "grad_norm": 1.6852574348449707, "learning_rate": 2e-05, "loss": 0.03590863, "step": 2543 }, { "epoch": 5.088, "grad_norm": 1.804057240486145, "learning_rate": 2e-05, "loss": 0.0503867, "step": 2544 }, { "epoch": 5.09, "grad_norm": 1.832448959350586, "learning_rate": 2e-05, "loss": 0.05448397, "step": 2545 }, { "epoch": 5.092, "grad_norm": 1.6885632276535034, "learning_rate": 2e-05, "loss": 0.0470449, "step": 2546 }, { "epoch": 5.094, "grad_norm": 0.9903779625892639, "learning_rate": 2e-05, "loss": 0.03127839, "step": 2547 }, { "epoch": 5.096, "grad_norm": 1.0127002000808716, "learning_rate": 2e-05, "loss": 0.03280874, "step": 2548 }, { "epoch": 5.098, "grad_norm": 1.8620246648788452, "learning_rate": 2e-05, "loss": 0.06030571, "step": 2549 }, { "epoch": 5.1, "grad_norm": 1.3962090015411377, "learning_rate": 2e-05, "loss": 0.04562517, "step": 2550 }, { "epoch": 5.102, "grad_norm": 1.8518095016479492, "learning_rate": 2e-05, "loss": 0.06386492, "step": 2551 }, { "epoch": 5.104, "grad_norm": 1.2915924787521362, "learning_rate": 2e-05, "loss": 0.0317604, "step": 2552 }, { "epoch": 5.106, "grad_norm": 1.6023142337799072, "learning_rate": 2e-05, "loss": 0.03814955, "step": 2553 }, { "epoch": 5.108, "grad_norm": 1.1027803421020508, "learning_rate": 2e-05, "loss": 0.02706053, "step": 2554 }, { "epoch": 5.11, "grad_norm": 2.314110517501831, "learning_rate": 2e-05, "loss": 0.0609056, "step": 2555 }, { "epoch": 5.112, "grad_norm": 1.028228759765625, "learning_rate": 2e-05, "loss": 0.02673756, "step": 2556 }, { "epoch": 5.114, "grad_norm": 1.5030735731124878, "learning_rate": 2e-05, "loss": 0.0449588, "step": 2557 }, { "epoch": 5.116, "grad_norm": 1.3208764791488647, "learning_rate": 2e-05, "loss": 0.04690504, "step": 2558 }, { "epoch": 5.118, "grad_norm": 2.08263897895813, "learning_rate": 2e-05, "loss": 0.04718242, "step": 2559 }, { "epoch": 5.12, "grad_norm": 1.488623857498169, "learning_rate": 2e-05, "loss": 0.04909895, "step": 2560 }, { "epoch": 5.122, "grad_norm": 1.8604114055633545, "learning_rate": 2e-05, "loss": 0.04605662, "step": 2561 }, { "epoch": 5.124, "grad_norm": 1.0743006467819214, "learning_rate": 2e-05, "loss": 0.02543287, "step": 2562 }, { "epoch": 5.126, "grad_norm": 1.3508437871932983, "learning_rate": 2e-05, "loss": 0.03909838, "step": 2563 }, { "epoch": 5.128, "grad_norm": 1.4261709451675415, "learning_rate": 2e-05, "loss": 0.03505434, "step": 2564 }, { "epoch": 5.13, "grad_norm": 2.1459856033325195, "learning_rate": 2e-05, "loss": 0.05590709, "step": 2565 }, { "epoch": 5.132, "grad_norm": 2.010373115539551, "learning_rate": 2e-05, "loss": 0.03756604, "step": 2566 }, { "epoch": 5.134, "grad_norm": 1.6585365533828735, "learning_rate": 2e-05, "loss": 0.03042065, "step": 2567 }, { "epoch": 5.136, "grad_norm": 2.4542722702026367, "learning_rate": 2e-05, "loss": 0.05523026, "step": 2568 }, { "epoch": 5.138, "grad_norm": 1.9174667596817017, "learning_rate": 2e-05, "loss": 0.06206827, "step": 2569 }, { "epoch": 5.14, "grad_norm": 1.7819212675094604, "learning_rate": 2e-05, "loss": 0.0496488, "step": 2570 }, { "epoch": 5.142, "grad_norm": 2.3288769721984863, "learning_rate": 2e-05, "loss": 0.05924601, "step": 2571 }, { "epoch": 5.144, "grad_norm": 4.057807922363281, "learning_rate": 2e-05, "loss": 0.0507498, "step": 2572 }, { "epoch": 5.146, "grad_norm": 1.5712443590164185, "learning_rate": 2e-05, "loss": 0.05386955, "step": 2573 }, { "epoch": 5.148, "grad_norm": 1.6841580867767334, "learning_rate": 2e-05, "loss": 0.0409927, "step": 2574 }, { "epoch": 5.15, "grad_norm": 2.0877368450164795, "learning_rate": 2e-05, "loss": 0.03837425, "step": 2575 }, { "epoch": 5.152, "grad_norm": 1.4437499046325684, "learning_rate": 2e-05, "loss": 0.04897504, "step": 2576 }, { "epoch": 5.154, "grad_norm": 1.4186006784439087, "learning_rate": 2e-05, "loss": 0.05277944, "step": 2577 }, { "epoch": 5.156, "grad_norm": 6.804627418518066, "learning_rate": 2e-05, "loss": 0.05949213, "step": 2578 }, { "epoch": 5.158, "grad_norm": 1.6603080034255981, "learning_rate": 2e-05, "loss": 0.05066625, "step": 2579 }, { "epoch": 5.16, "grad_norm": 1.7450742721557617, "learning_rate": 2e-05, "loss": 0.05555181, "step": 2580 }, { "epoch": 5.162, "grad_norm": 2.0960774421691895, "learning_rate": 2e-05, "loss": 0.05067931, "step": 2581 }, { "epoch": 5.164, "grad_norm": 1.5471669435501099, "learning_rate": 2e-05, "loss": 0.04380035, "step": 2582 }, { "epoch": 5.166, "grad_norm": 1.5839829444885254, "learning_rate": 2e-05, "loss": 0.0428129, "step": 2583 }, { "epoch": 5.168, "grad_norm": 1.1979819536209106, "learning_rate": 2e-05, "loss": 0.03231291, "step": 2584 }, { "epoch": 5.17, "grad_norm": 0.9489485621452332, "learning_rate": 2e-05, "loss": 0.03189579, "step": 2585 }, { "epoch": 5.172, "grad_norm": 1.3143651485443115, "learning_rate": 2e-05, "loss": 0.03099883, "step": 2586 }, { "epoch": 5.174, "grad_norm": 1.7608791589736938, "learning_rate": 2e-05, "loss": 0.06527974, "step": 2587 }, { "epoch": 5.176, "grad_norm": 1.6358402967453003, "learning_rate": 2e-05, "loss": 0.05212991, "step": 2588 }, { "epoch": 5.178, "grad_norm": 1.2819510698318481, "learning_rate": 2e-05, "loss": 0.03158101, "step": 2589 }, { "epoch": 5.18, "grad_norm": 1.5717135667800903, "learning_rate": 2e-05, "loss": 0.04475843, "step": 2590 }, { "epoch": 5.182, "grad_norm": 2.0249385833740234, "learning_rate": 2e-05, "loss": 0.05886701, "step": 2591 }, { "epoch": 5.184, "grad_norm": 1.782658338546753, "learning_rate": 2e-05, "loss": 0.04498136, "step": 2592 }, { "epoch": 5.186, "grad_norm": 1.6862472295761108, "learning_rate": 2e-05, "loss": 0.03234654, "step": 2593 }, { "epoch": 5.188, "grad_norm": 1.2082955837249756, "learning_rate": 2e-05, "loss": 0.04066747, "step": 2594 }, { "epoch": 5.19, "grad_norm": 1.7842987775802612, "learning_rate": 2e-05, "loss": 0.06287039, "step": 2595 }, { "epoch": 5.192, "grad_norm": 1.250356674194336, "learning_rate": 2e-05, "loss": 0.04755474, "step": 2596 }, { "epoch": 5.194, "grad_norm": 2.9072906970977783, "learning_rate": 2e-05, "loss": 0.05345254, "step": 2597 }, { "epoch": 5.196, "grad_norm": 1.3460503816604614, "learning_rate": 2e-05, "loss": 0.05015785, "step": 2598 }, { "epoch": 5.198, "grad_norm": 1.5144968032836914, "learning_rate": 2e-05, "loss": 0.05026857, "step": 2599 }, { "epoch": 5.2, "grad_norm": 1.0161470174789429, "learning_rate": 2e-05, "loss": 0.03177109, "step": 2600 }, { "epoch": 5.202, "grad_norm": 1.379644513130188, "learning_rate": 2e-05, "loss": 0.0431831, "step": 2601 }, { "epoch": 5.204, "grad_norm": 1.5978702306747437, "learning_rate": 2e-05, "loss": 0.04161422, "step": 2602 }, { "epoch": 5.206, "grad_norm": 1.7875207662582397, "learning_rate": 2e-05, "loss": 0.05222338, "step": 2603 }, { "epoch": 5.208, "grad_norm": 1.0086873769760132, "learning_rate": 2e-05, "loss": 0.02815355, "step": 2604 }, { "epoch": 5.21, "grad_norm": 1.4395867586135864, "learning_rate": 2e-05, "loss": 0.03475678, "step": 2605 }, { "epoch": 5.212, "grad_norm": 1.7755112648010254, "learning_rate": 2e-05, "loss": 0.05450004, "step": 2606 }, { "epoch": 5.214, "grad_norm": 1.830944299697876, "learning_rate": 2e-05, "loss": 0.04738111, "step": 2607 }, { "epoch": 5.216, "grad_norm": 1.3517119884490967, "learning_rate": 2e-05, "loss": 0.0596354, "step": 2608 }, { "epoch": 5.218, "grad_norm": 1.0986253023147583, "learning_rate": 2e-05, "loss": 0.0300137, "step": 2609 }, { "epoch": 5.22, "grad_norm": 2.0288379192352295, "learning_rate": 2e-05, "loss": 0.03250675, "step": 2610 }, { "epoch": 5.222, "grad_norm": 2.0758872032165527, "learning_rate": 2e-05, "loss": 0.05346858, "step": 2611 }, { "epoch": 5.224, "grad_norm": 1.4884560108184814, "learning_rate": 2e-05, "loss": 0.04885757, "step": 2612 }, { "epoch": 5.226, "grad_norm": 2.067258596420288, "learning_rate": 2e-05, "loss": 0.04791706, "step": 2613 }, { "epoch": 5.228, "grad_norm": 1.7092328071594238, "learning_rate": 2e-05, "loss": 0.06562018, "step": 2614 }, { "epoch": 5.23, "grad_norm": 2.985393762588501, "learning_rate": 2e-05, "loss": 0.05072825, "step": 2615 }, { "epoch": 5.232, "grad_norm": 1.6960763931274414, "learning_rate": 2e-05, "loss": 0.05636974, "step": 2616 }, { "epoch": 5.234, "grad_norm": 1.1226341724395752, "learning_rate": 2e-05, "loss": 0.03822513, "step": 2617 }, { "epoch": 5.236, "grad_norm": 1.404388427734375, "learning_rate": 2e-05, "loss": 0.03551162, "step": 2618 }, { "epoch": 5.2379999999999995, "grad_norm": 1.3654124736785889, "learning_rate": 2e-05, "loss": 0.04120996, "step": 2619 }, { "epoch": 5.24, "grad_norm": 2.9756903648376465, "learning_rate": 2e-05, "loss": 0.05003164, "step": 2620 }, { "epoch": 5.242, "grad_norm": 1.9504529237747192, "learning_rate": 2e-05, "loss": 0.06165954, "step": 2621 }, { "epoch": 5.244, "grad_norm": 1.5840363502502441, "learning_rate": 2e-05, "loss": 0.0396319, "step": 2622 }, { "epoch": 5.246, "grad_norm": 1.6869369745254517, "learning_rate": 2e-05, "loss": 0.04542349, "step": 2623 }, { "epoch": 5.248, "grad_norm": 1.6327499151229858, "learning_rate": 2e-05, "loss": 0.05939788, "step": 2624 }, { "epoch": 5.25, "grad_norm": 2.854781150817871, "learning_rate": 2e-05, "loss": 0.03455864, "step": 2625 }, { "epoch": 5.252, "grad_norm": 1.3008240461349487, "learning_rate": 2e-05, "loss": 0.02595445, "step": 2626 }, { "epoch": 5.254, "grad_norm": 1.2028790712356567, "learning_rate": 2e-05, "loss": 0.02978789, "step": 2627 }, { "epoch": 5.256, "grad_norm": 1.6047648191452026, "learning_rate": 2e-05, "loss": 0.05063823, "step": 2628 }, { "epoch": 5.258, "grad_norm": 2.121851682662964, "learning_rate": 2e-05, "loss": 0.03812759, "step": 2629 }, { "epoch": 5.26, "grad_norm": 1.4679371118545532, "learning_rate": 2e-05, "loss": 0.04398443, "step": 2630 }, { "epoch": 5.2620000000000005, "grad_norm": 1.7496758699417114, "learning_rate": 2e-05, "loss": 0.04756953, "step": 2631 }, { "epoch": 5.264, "grad_norm": 2.85517954826355, "learning_rate": 2e-05, "loss": 0.04580143, "step": 2632 }, { "epoch": 5.266, "grad_norm": 1.5564442873001099, "learning_rate": 2e-05, "loss": 0.05306519, "step": 2633 }, { "epoch": 5.268, "grad_norm": 1.9785518646240234, "learning_rate": 2e-05, "loss": 0.0391811, "step": 2634 }, { "epoch": 5.27, "grad_norm": 2.4866366386413574, "learning_rate": 2e-05, "loss": 0.05209032, "step": 2635 }, { "epoch": 5.272, "grad_norm": 1.0321913957595825, "learning_rate": 2e-05, "loss": 0.02281884, "step": 2636 }, { "epoch": 5.274, "grad_norm": 0.978119432926178, "learning_rate": 2e-05, "loss": 0.02463586, "step": 2637 }, { "epoch": 5.276, "grad_norm": 1.338571548461914, "learning_rate": 2e-05, "loss": 0.05107386, "step": 2638 }, { "epoch": 5.2780000000000005, "grad_norm": 1.172133207321167, "learning_rate": 2e-05, "loss": 0.03545574, "step": 2639 }, { "epoch": 5.28, "grad_norm": 1.7161072492599487, "learning_rate": 2e-05, "loss": 0.0608919, "step": 2640 }, { "epoch": 5.282, "grad_norm": 2.2566652297973633, "learning_rate": 2e-05, "loss": 0.06986575, "step": 2641 }, { "epoch": 5.284, "grad_norm": 1.5361062288284302, "learning_rate": 2e-05, "loss": 0.0354271, "step": 2642 }, { "epoch": 5.286, "grad_norm": 1.7138904333114624, "learning_rate": 2e-05, "loss": 0.05330596, "step": 2643 }, { "epoch": 5.288, "grad_norm": 1.9515053033828735, "learning_rate": 2e-05, "loss": 0.05626545, "step": 2644 }, { "epoch": 5.29, "grad_norm": 1.527269721031189, "learning_rate": 2e-05, "loss": 0.04371868, "step": 2645 }, { "epoch": 5.292, "grad_norm": 1.7075636386871338, "learning_rate": 2e-05, "loss": 0.04032372, "step": 2646 }, { "epoch": 5.294, "grad_norm": 1.0868964195251465, "learning_rate": 2e-05, "loss": 0.03899564, "step": 2647 }, { "epoch": 5.296, "grad_norm": 3.068068504333496, "learning_rate": 2e-05, "loss": 0.04235051, "step": 2648 }, { "epoch": 5.298, "grad_norm": 1.6693544387817383, "learning_rate": 2e-05, "loss": 0.05411904, "step": 2649 }, { "epoch": 5.3, "grad_norm": 1.2736790180206299, "learning_rate": 2e-05, "loss": 0.0501396, "step": 2650 }, { "epoch": 5.302, "grad_norm": 1.770841121673584, "learning_rate": 2e-05, "loss": 0.03934953, "step": 2651 }, { "epoch": 5.304, "grad_norm": 1.1789344549179077, "learning_rate": 2e-05, "loss": 0.03890145, "step": 2652 }, { "epoch": 5.306, "grad_norm": 1.8938695192337036, "learning_rate": 2e-05, "loss": 0.05737014, "step": 2653 }, { "epoch": 5.308, "grad_norm": 1.4466159343719482, "learning_rate": 2e-05, "loss": 0.04105823, "step": 2654 }, { "epoch": 5.31, "grad_norm": 1.9113630056381226, "learning_rate": 2e-05, "loss": 0.06063426, "step": 2655 }, { "epoch": 5.312, "grad_norm": 1.341367483139038, "learning_rate": 2e-05, "loss": 0.03131312, "step": 2656 }, { "epoch": 5.314, "grad_norm": 1.2381536960601807, "learning_rate": 2e-05, "loss": 0.03998591, "step": 2657 }, { "epoch": 5.316, "grad_norm": 1.210856318473816, "learning_rate": 2e-05, "loss": 0.03550825, "step": 2658 }, { "epoch": 5.318, "grad_norm": 1.4786920547485352, "learning_rate": 2e-05, "loss": 0.03781786, "step": 2659 }, { "epoch": 5.32, "grad_norm": 1.7087515592575073, "learning_rate": 2e-05, "loss": 0.04967047, "step": 2660 }, { "epoch": 5.322, "grad_norm": 1.5567851066589355, "learning_rate": 2e-05, "loss": 0.03445258, "step": 2661 }, { "epoch": 5.324, "grad_norm": 0.9272580146789551, "learning_rate": 2e-05, "loss": 0.02637572, "step": 2662 }, { "epoch": 5.326, "grad_norm": 2.146501302719116, "learning_rate": 2e-05, "loss": 0.03992863, "step": 2663 }, { "epoch": 5.328, "grad_norm": 2.161367654800415, "learning_rate": 2e-05, "loss": 0.05867213, "step": 2664 }, { "epoch": 5.33, "grad_norm": 1.6468945741653442, "learning_rate": 2e-05, "loss": 0.03345583, "step": 2665 }, { "epoch": 5.332, "grad_norm": 1.8014037609100342, "learning_rate": 2e-05, "loss": 0.04314873, "step": 2666 }, { "epoch": 5.334, "grad_norm": 1.926483392715454, "learning_rate": 2e-05, "loss": 0.05389591, "step": 2667 }, { "epoch": 5.336, "grad_norm": 1.567809820175171, "learning_rate": 2e-05, "loss": 0.05018042, "step": 2668 }, { "epoch": 5.338, "grad_norm": 0.9763085842132568, "learning_rate": 2e-05, "loss": 0.02220888, "step": 2669 }, { "epoch": 5.34, "grad_norm": 1.6590309143066406, "learning_rate": 2e-05, "loss": 0.03101541, "step": 2670 }, { "epoch": 5.342, "grad_norm": 1.1251074075698853, "learning_rate": 2e-05, "loss": 0.03375977, "step": 2671 }, { "epoch": 5.344, "grad_norm": 1.7373671531677246, "learning_rate": 2e-05, "loss": 0.02912542, "step": 2672 }, { "epoch": 5.346, "grad_norm": 1.122557282447815, "learning_rate": 2e-05, "loss": 0.02940472, "step": 2673 }, { "epoch": 5.348, "grad_norm": 1.277514100074768, "learning_rate": 2e-05, "loss": 0.03232025, "step": 2674 }, { "epoch": 5.35, "grad_norm": 1.4601491689682007, "learning_rate": 2e-05, "loss": 0.03994402, "step": 2675 }, { "epoch": 5.352, "grad_norm": 0.8786239624023438, "learning_rate": 2e-05, "loss": 0.02341381, "step": 2676 }, { "epoch": 5.354, "grad_norm": 1.1858524084091187, "learning_rate": 2e-05, "loss": 0.04786807, "step": 2677 }, { "epoch": 5.356, "grad_norm": 1.1991043090820312, "learning_rate": 2e-05, "loss": 0.0410894, "step": 2678 }, { "epoch": 5.358, "grad_norm": 0.9990038871765137, "learning_rate": 2e-05, "loss": 0.03214614, "step": 2679 }, { "epoch": 5.36, "grad_norm": 1.499480128288269, "learning_rate": 2e-05, "loss": 0.04654964, "step": 2680 }, { "epoch": 5.362, "grad_norm": 1.3104462623596191, "learning_rate": 2e-05, "loss": 0.03467711, "step": 2681 }, { "epoch": 5.364, "grad_norm": 2.457524538040161, "learning_rate": 2e-05, "loss": 0.05111074, "step": 2682 }, { "epoch": 5.366, "grad_norm": 1.4979830980300903, "learning_rate": 2e-05, "loss": 0.04499523, "step": 2683 }, { "epoch": 5.368, "grad_norm": 4.056197166442871, "learning_rate": 2e-05, "loss": 0.02836193, "step": 2684 }, { "epoch": 5.37, "grad_norm": 3.501704692840576, "learning_rate": 2e-05, "loss": 0.08720149, "step": 2685 }, { "epoch": 5.372, "grad_norm": 1.552233099937439, "learning_rate": 2e-05, "loss": 0.04281237, "step": 2686 }, { "epoch": 5.374, "grad_norm": 1.1963515281677246, "learning_rate": 2e-05, "loss": 0.02805083, "step": 2687 }, { "epoch": 5.376, "grad_norm": 1.2588318586349487, "learning_rate": 2e-05, "loss": 0.03842116, "step": 2688 }, { "epoch": 5.378, "grad_norm": 2.016977310180664, "learning_rate": 2e-05, "loss": 0.04068321, "step": 2689 }, { "epoch": 5.38, "grad_norm": 1.2053779363632202, "learning_rate": 2e-05, "loss": 0.04211229, "step": 2690 }, { "epoch": 5.382, "grad_norm": 1.485421895980835, "learning_rate": 2e-05, "loss": 0.05219296, "step": 2691 }, { "epoch": 5.384, "grad_norm": 1.2090396881103516, "learning_rate": 2e-05, "loss": 0.04025387, "step": 2692 }, { "epoch": 5.386, "grad_norm": 1.3661246299743652, "learning_rate": 2e-05, "loss": 0.03243088, "step": 2693 }, { "epoch": 5.388, "grad_norm": 1.7728279829025269, "learning_rate": 2e-05, "loss": 0.04520981, "step": 2694 }, { "epoch": 5.39, "grad_norm": 1.839638590812683, "learning_rate": 2e-05, "loss": 0.04922792, "step": 2695 }, { "epoch": 5.392, "grad_norm": 1.2756119966506958, "learning_rate": 2e-05, "loss": 0.03673655, "step": 2696 }, { "epoch": 5.394, "grad_norm": 1.812853217124939, "learning_rate": 2e-05, "loss": 0.0451444, "step": 2697 }, { "epoch": 5.396, "grad_norm": 1.732804298400879, "learning_rate": 2e-05, "loss": 0.0696558, "step": 2698 }, { "epoch": 5.398, "grad_norm": 2.259817123413086, "learning_rate": 2e-05, "loss": 0.05728294, "step": 2699 }, { "epoch": 5.4, "grad_norm": 3.6404404640197754, "learning_rate": 2e-05, "loss": 0.05975555, "step": 2700 }, { "epoch": 5.402, "grad_norm": 1.1913822889328003, "learning_rate": 2e-05, "loss": 0.04311104, "step": 2701 }, { "epoch": 5.404, "grad_norm": 1.7628343105316162, "learning_rate": 2e-05, "loss": 0.04324729, "step": 2702 }, { "epoch": 5.406, "grad_norm": 2.0190365314483643, "learning_rate": 2e-05, "loss": 0.05802897, "step": 2703 }, { "epoch": 5.408, "grad_norm": 1.1721633672714233, "learning_rate": 2e-05, "loss": 0.03633162, "step": 2704 }, { "epoch": 5.41, "grad_norm": 1.600894808769226, "learning_rate": 2e-05, "loss": 0.05752605, "step": 2705 }, { "epoch": 5.412, "grad_norm": 1.0390570163726807, "learning_rate": 2e-05, "loss": 0.03858446, "step": 2706 }, { "epoch": 5.414, "grad_norm": 1.4907500743865967, "learning_rate": 2e-05, "loss": 0.02707991, "step": 2707 }, { "epoch": 5.416, "grad_norm": 2.0031204223632812, "learning_rate": 2e-05, "loss": 0.03021158, "step": 2708 }, { "epoch": 5.418, "grad_norm": 4.072817325592041, "learning_rate": 2e-05, "loss": 0.05053132, "step": 2709 }, { "epoch": 5.42, "grad_norm": 1.7337241172790527, "learning_rate": 2e-05, "loss": 0.03692292, "step": 2710 }, { "epoch": 5.422, "grad_norm": 1.754112720489502, "learning_rate": 2e-05, "loss": 0.03906107, "step": 2711 }, { "epoch": 5.424, "grad_norm": 4.156593322753906, "learning_rate": 2e-05, "loss": 0.03578386, "step": 2712 }, { "epoch": 5.426, "grad_norm": 1.2016631364822388, "learning_rate": 2e-05, "loss": 0.03455806, "step": 2713 }, { "epoch": 5.428, "grad_norm": 0.9195474982261658, "learning_rate": 2e-05, "loss": 0.02441694, "step": 2714 }, { "epoch": 5.43, "grad_norm": 1.091452717781067, "learning_rate": 2e-05, "loss": 0.0270972, "step": 2715 }, { "epoch": 5.432, "grad_norm": 1.6297458410263062, "learning_rate": 2e-05, "loss": 0.04799194, "step": 2716 }, { "epoch": 5.434, "grad_norm": 1.3862227201461792, "learning_rate": 2e-05, "loss": 0.04216187, "step": 2717 }, { "epoch": 5.436, "grad_norm": 1.8939886093139648, "learning_rate": 2e-05, "loss": 0.05403071, "step": 2718 }, { "epoch": 5.438, "grad_norm": 2.1752545833587646, "learning_rate": 2e-05, "loss": 0.05855702, "step": 2719 }, { "epoch": 5.44, "grad_norm": 2.9975874423980713, "learning_rate": 2e-05, "loss": 0.0767413, "step": 2720 }, { "epoch": 5.442, "grad_norm": 1.3926620483398438, "learning_rate": 2e-05, "loss": 0.03643538, "step": 2721 }, { "epoch": 5.444, "grad_norm": 1.96034574508667, "learning_rate": 2e-05, "loss": 0.04681049, "step": 2722 }, { "epoch": 5.446, "grad_norm": 1.1249243021011353, "learning_rate": 2e-05, "loss": 0.02556986, "step": 2723 }, { "epoch": 5.448, "grad_norm": 1.8713010549545288, "learning_rate": 2e-05, "loss": 0.05697018, "step": 2724 }, { "epoch": 5.45, "grad_norm": 1.1329511404037476, "learning_rate": 2e-05, "loss": 0.02677982, "step": 2725 }, { "epoch": 5.452, "grad_norm": 2.1637654304504395, "learning_rate": 2e-05, "loss": 0.05468876, "step": 2726 }, { "epoch": 5.454, "grad_norm": 1.6031872034072876, "learning_rate": 2e-05, "loss": 0.04942069, "step": 2727 }, { "epoch": 5.456, "grad_norm": 1.4238561391830444, "learning_rate": 2e-05, "loss": 0.04586676, "step": 2728 }, { "epoch": 5.458, "grad_norm": 0.7893091440200806, "learning_rate": 2e-05, "loss": 0.02090956, "step": 2729 }, { "epoch": 5.46, "grad_norm": 1.485260248184204, "learning_rate": 2e-05, "loss": 0.04206961, "step": 2730 }, { "epoch": 5.462, "grad_norm": 0.9618636965751648, "learning_rate": 2e-05, "loss": 0.02777426, "step": 2731 }, { "epoch": 5.464, "grad_norm": 1.663711428642273, "learning_rate": 2e-05, "loss": 0.04290383, "step": 2732 }, { "epoch": 5.466, "grad_norm": 2.107558250427246, "learning_rate": 2e-05, "loss": 0.05302426, "step": 2733 }, { "epoch": 5.468, "grad_norm": 1.3562395572662354, "learning_rate": 2e-05, "loss": 0.04122175, "step": 2734 }, { "epoch": 5.47, "grad_norm": 1.3446310758590698, "learning_rate": 2e-05, "loss": 0.04664951, "step": 2735 }, { "epoch": 5.4719999999999995, "grad_norm": 1.7537676095962524, "learning_rate": 2e-05, "loss": 0.05597606, "step": 2736 }, { "epoch": 5.474, "grad_norm": 1.2579759359359741, "learning_rate": 2e-05, "loss": 0.04209308, "step": 2737 }, { "epoch": 5.476, "grad_norm": 1.2744957208633423, "learning_rate": 2e-05, "loss": 0.03771916, "step": 2738 }, { "epoch": 5.478, "grad_norm": 1.1946172714233398, "learning_rate": 2e-05, "loss": 0.04514795, "step": 2739 }, { "epoch": 5.48, "grad_norm": 1.2385889291763306, "learning_rate": 2e-05, "loss": 0.03430807, "step": 2740 }, { "epoch": 5.482, "grad_norm": 1.1070294380187988, "learning_rate": 2e-05, "loss": 0.02781132, "step": 2741 }, { "epoch": 5.484, "grad_norm": 2.086735248565674, "learning_rate": 2e-05, "loss": 0.04818981, "step": 2742 }, { "epoch": 5.486, "grad_norm": 1.2726064920425415, "learning_rate": 2e-05, "loss": 0.04557905, "step": 2743 }, { "epoch": 5.4879999999999995, "grad_norm": 1.2682517766952515, "learning_rate": 2e-05, "loss": 0.03911246, "step": 2744 }, { "epoch": 5.49, "grad_norm": 1.2447410821914673, "learning_rate": 2e-05, "loss": 0.04097291, "step": 2745 }, { "epoch": 5.492, "grad_norm": 1.2082518339157104, "learning_rate": 2e-05, "loss": 0.04021576, "step": 2746 }, { "epoch": 5.494, "grad_norm": 1.4685015678405762, "learning_rate": 2e-05, "loss": 0.04541424, "step": 2747 }, { "epoch": 5.496, "grad_norm": 1.4241282939910889, "learning_rate": 2e-05, "loss": 0.03883777, "step": 2748 }, { "epoch": 5.498, "grad_norm": 1.5642436742782593, "learning_rate": 2e-05, "loss": 0.05445066, "step": 2749 }, { "epoch": 5.5, "grad_norm": 1.600178837776184, "learning_rate": 2e-05, "loss": 0.04767666, "step": 2750 }, { "epoch": 5.502, "grad_norm": 1.0440183877944946, "learning_rate": 2e-05, "loss": 0.03446297, "step": 2751 }, { "epoch": 5.504, "grad_norm": 1.2513372898101807, "learning_rate": 2e-05, "loss": 0.03565361, "step": 2752 }, { "epoch": 5.506, "grad_norm": 1.2894079685211182, "learning_rate": 2e-05, "loss": 0.04235047, "step": 2753 }, { "epoch": 5.508, "grad_norm": 1.8253742456436157, "learning_rate": 2e-05, "loss": 0.05126983, "step": 2754 }, { "epoch": 5.51, "grad_norm": 1.881759762763977, "learning_rate": 2e-05, "loss": 0.05438966, "step": 2755 }, { "epoch": 5.5120000000000005, "grad_norm": 0.9773890972137451, "learning_rate": 2e-05, "loss": 0.02625747, "step": 2756 }, { "epoch": 5.514, "grad_norm": 1.1596041917800903, "learning_rate": 2e-05, "loss": 0.03821389, "step": 2757 }, { "epoch": 5.516, "grad_norm": 1.3653874397277832, "learning_rate": 2e-05, "loss": 0.04838298, "step": 2758 }, { "epoch": 5.518, "grad_norm": 1.4374557733535767, "learning_rate": 2e-05, "loss": 0.04565241, "step": 2759 }, { "epoch": 5.52, "grad_norm": 1.6063563823699951, "learning_rate": 2e-05, "loss": 0.053575, "step": 2760 }, { "epoch": 5.522, "grad_norm": 1.2797068357467651, "learning_rate": 2e-05, "loss": 0.03941024, "step": 2761 }, { "epoch": 5.524, "grad_norm": 1.0527799129486084, "learning_rate": 2e-05, "loss": 0.01956801, "step": 2762 }, { "epoch": 5.526, "grad_norm": 1.218973994255066, "learning_rate": 2e-05, "loss": 0.03965396, "step": 2763 }, { "epoch": 5.5280000000000005, "grad_norm": 2.3557660579681396, "learning_rate": 2e-05, "loss": 0.03057607, "step": 2764 }, { "epoch": 5.53, "grad_norm": 1.2339812517166138, "learning_rate": 2e-05, "loss": 0.0315393, "step": 2765 }, { "epoch": 5.532, "grad_norm": 1.698705792427063, "learning_rate": 2e-05, "loss": 0.03224694, "step": 2766 }, { "epoch": 5.534, "grad_norm": 1.9057179689407349, "learning_rate": 2e-05, "loss": 0.05384257, "step": 2767 }, { "epoch": 5.536, "grad_norm": 1.3799241781234741, "learning_rate": 2e-05, "loss": 0.04452458, "step": 2768 }, { "epoch": 5.538, "grad_norm": 1.2078452110290527, "learning_rate": 2e-05, "loss": 0.04307923, "step": 2769 }, { "epoch": 5.54, "grad_norm": 1.1021215915679932, "learning_rate": 2e-05, "loss": 0.03390051, "step": 2770 }, { "epoch": 5.542, "grad_norm": 0.9905539751052856, "learning_rate": 2e-05, "loss": 0.03263092, "step": 2771 }, { "epoch": 5.5440000000000005, "grad_norm": 2.1359174251556396, "learning_rate": 2e-05, "loss": 0.05425924, "step": 2772 }, { "epoch": 5.546, "grad_norm": 1.9481074810028076, "learning_rate": 2e-05, "loss": 0.05403948, "step": 2773 }, { "epoch": 5.548, "grad_norm": 2.03695011138916, "learning_rate": 2e-05, "loss": 0.03124078, "step": 2774 }, { "epoch": 5.55, "grad_norm": 2.420414447784424, "learning_rate": 2e-05, "loss": 0.05495863, "step": 2775 }, { "epoch": 5.552, "grad_norm": 2.0198957920074463, "learning_rate": 2e-05, "loss": 0.05129372, "step": 2776 }, { "epoch": 5.554, "grad_norm": 1.7094142436981201, "learning_rate": 2e-05, "loss": 0.0504851, "step": 2777 }, { "epoch": 5.556, "grad_norm": 3.827301502227783, "learning_rate": 2e-05, "loss": 0.05337991, "step": 2778 }, { "epoch": 5.558, "grad_norm": 1.3300044536590576, "learning_rate": 2e-05, "loss": 0.03548397, "step": 2779 }, { "epoch": 5.5600000000000005, "grad_norm": 1.937286376953125, "learning_rate": 2e-05, "loss": 0.04464417, "step": 2780 }, { "epoch": 5.562, "grad_norm": 2.370962142944336, "learning_rate": 2e-05, "loss": 0.05774143, "step": 2781 }, { "epoch": 5.564, "grad_norm": 1.7345980405807495, "learning_rate": 2e-05, "loss": 0.04462703, "step": 2782 }, { "epoch": 5.566, "grad_norm": 1.5614328384399414, "learning_rate": 2e-05, "loss": 0.04742298, "step": 2783 }, { "epoch": 5.568, "grad_norm": 1.480557918548584, "learning_rate": 2e-05, "loss": 0.04577837, "step": 2784 }, { "epoch": 5.57, "grad_norm": 1.257659912109375, "learning_rate": 2e-05, "loss": 0.03243734, "step": 2785 }, { "epoch": 5.572, "grad_norm": 2.447787046432495, "learning_rate": 2e-05, "loss": 0.01604201, "step": 2786 }, { "epoch": 5.574, "grad_norm": 1.1269577741622925, "learning_rate": 2e-05, "loss": 0.02630051, "step": 2787 }, { "epoch": 5.576, "grad_norm": 1.0902881622314453, "learning_rate": 2e-05, "loss": 0.02891617, "step": 2788 }, { "epoch": 5.578, "grad_norm": 2.013070821762085, "learning_rate": 2e-05, "loss": 0.04982309, "step": 2789 }, { "epoch": 5.58, "grad_norm": 1.520807147026062, "learning_rate": 2e-05, "loss": 0.06273451, "step": 2790 }, { "epoch": 5.582, "grad_norm": 1.0118508338928223, "learning_rate": 2e-05, "loss": 0.0323143, "step": 2791 }, { "epoch": 5.584, "grad_norm": 1.589319109916687, "learning_rate": 2e-05, "loss": 0.04532817, "step": 2792 }, { "epoch": 5.586, "grad_norm": 1.4283770322799683, "learning_rate": 2e-05, "loss": 0.03695945, "step": 2793 }, { "epoch": 5.588, "grad_norm": 1.5165592432022095, "learning_rate": 2e-05, "loss": 0.04063965, "step": 2794 }, { "epoch": 5.59, "grad_norm": 2.1748976707458496, "learning_rate": 2e-05, "loss": 0.0483332, "step": 2795 }, { "epoch": 5.592, "grad_norm": 1.4957975149154663, "learning_rate": 2e-05, "loss": 0.03734215, "step": 2796 }, { "epoch": 5.594, "grad_norm": 1.5057545900344849, "learning_rate": 2e-05, "loss": 0.0448417, "step": 2797 }, { "epoch": 5.596, "grad_norm": 1.6289266347885132, "learning_rate": 2e-05, "loss": 0.04035756, "step": 2798 }, { "epoch": 5.598, "grad_norm": 1.945152759552002, "learning_rate": 2e-05, "loss": 0.04505145, "step": 2799 }, { "epoch": 5.6, "grad_norm": 3.9745194911956787, "learning_rate": 2e-05, "loss": 0.05695663, "step": 2800 }, { "epoch": 5.602, "grad_norm": 1.1209356784820557, "learning_rate": 2e-05, "loss": 0.04382028, "step": 2801 }, { "epoch": 5.604, "grad_norm": 1.7319954633712769, "learning_rate": 2e-05, "loss": 0.07861836, "step": 2802 }, { "epoch": 5.606, "grad_norm": 1.2670855522155762, "learning_rate": 2e-05, "loss": 0.03723117, "step": 2803 }, { "epoch": 5.608, "grad_norm": 1.5532779693603516, "learning_rate": 2e-05, "loss": 0.03009872, "step": 2804 }, { "epoch": 5.61, "grad_norm": 1.9622262716293335, "learning_rate": 2e-05, "loss": 0.06740649, "step": 2805 }, { "epoch": 5.612, "grad_norm": 1.1806871891021729, "learning_rate": 2e-05, "loss": 0.03466694, "step": 2806 }, { "epoch": 5.614, "grad_norm": 2.1496856212615967, "learning_rate": 2e-05, "loss": 0.060385, "step": 2807 }, { "epoch": 5.616, "grad_norm": 1.7937800884246826, "learning_rate": 2e-05, "loss": 0.04280568, "step": 2808 }, { "epoch": 5.618, "grad_norm": 1.664449691772461, "learning_rate": 2e-05, "loss": 0.0573493, "step": 2809 }, { "epoch": 5.62, "grad_norm": 2.113910436630249, "learning_rate": 2e-05, "loss": 0.05896275, "step": 2810 }, { "epoch": 5.622, "grad_norm": 1.4774582386016846, "learning_rate": 2e-05, "loss": 0.05064362, "step": 2811 }, { "epoch": 5.624, "grad_norm": 1.077660083770752, "learning_rate": 2e-05, "loss": 0.03990199, "step": 2812 }, { "epoch": 5.626, "grad_norm": 2.0504021644592285, "learning_rate": 2e-05, "loss": 0.04368483, "step": 2813 }, { "epoch": 5.628, "grad_norm": 1.3828272819519043, "learning_rate": 2e-05, "loss": 0.0452172, "step": 2814 }, { "epoch": 5.63, "grad_norm": 1.3679569959640503, "learning_rate": 2e-05, "loss": 0.04204818, "step": 2815 }, { "epoch": 5.632, "grad_norm": 1.904268503189087, "learning_rate": 2e-05, "loss": 0.03786581, "step": 2816 }, { "epoch": 5.634, "grad_norm": 1.5074198246002197, "learning_rate": 2e-05, "loss": 0.05239385, "step": 2817 }, { "epoch": 5.636, "grad_norm": 1.603095531463623, "learning_rate": 2e-05, "loss": 0.04265279, "step": 2818 }, { "epoch": 5.638, "grad_norm": 1.2010900974273682, "learning_rate": 2e-05, "loss": 0.0357244, "step": 2819 }, { "epoch": 5.64, "grad_norm": 1.8262673616409302, "learning_rate": 2e-05, "loss": 0.05815496, "step": 2820 }, { "epoch": 5.642, "grad_norm": 1.6634687185287476, "learning_rate": 2e-05, "loss": 0.04178239, "step": 2821 }, { "epoch": 5.644, "grad_norm": 1.9540555477142334, "learning_rate": 2e-05, "loss": 0.0514649, "step": 2822 }, { "epoch": 5.646, "grad_norm": 1.463693618774414, "learning_rate": 2e-05, "loss": 0.0404924, "step": 2823 }, { "epoch": 5.648, "grad_norm": 2.447908639907837, "learning_rate": 2e-05, "loss": 0.03566689, "step": 2824 }, { "epoch": 5.65, "grad_norm": 1.290624737739563, "learning_rate": 2e-05, "loss": 0.03801401, "step": 2825 }, { "epoch": 5.652, "grad_norm": 1.4527415037155151, "learning_rate": 2e-05, "loss": 0.03635446, "step": 2826 }, { "epoch": 5.654, "grad_norm": 1.6195863485336304, "learning_rate": 2e-05, "loss": 0.05585508, "step": 2827 }, { "epoch": 5.656, "grad_norm": 1.6211501359939575, "learning_rate": 2e-05, "loss": 0.06746172, "step": 2828 }, { "epoch": 5.658, "grad_norm": 1.346248745918274, "learning_rate": 2e-05, "loss": 0.04963518, "step": 2829 }, { "epoch": 5.66, "grad_norm": 1.062984585762024, "learning_rate": 2e-05, "loss": 0.03912539, "step": 2830 }, { "epoch": 5.662, "grad_norm": 1.6748669147491455, "learning_rate": 2e-05, "loss": 0.06159768, "step": 2831 }, { "epoch": 5.664, "grad_norm": 1.3549000024795532, "learning_rate": 2e-05, "loss": 0.04779611, "step": 2832 }, { "epoch": 5.666, "grad_norm": 1.2861626148223877, "learning_rate": 2e-05, "loss": 0.03312145, "step": 2833 }, { "epoch": 5.668, "grad_norm": 2.1982734203338623, "learning_rate": 2e-05, "loss": 0.05529946, "step": 2834 }, { "epoch": 5.67, "grad_norm": 1.2915890216827393, "learning_rate": 2e-05, "loss": 0.03705323, "step": 2835 }, { "epoch": 5.672, "grad_norm": 1.5575082302093506, "learning_rate": 2e-05, "loss": 0.03686603, "step": 2836 }, { "epoch": 5.674, "grad_norm": 1.038826823234558, "learning_rate": 2e-05, "loss": 0.03793515, "step": 2837 }, { "epoch": 5.676, "grad_norm": 1.6118172407150269, "learning_rate": 2e-05, "loss": 0.03095808, "step": 2838 }, { "epoch": 5.678, "grad_norm": 1.6042510271072388, "learning_rate": 2e-05, "loss": 0.05740377, "step": 2839 }, { "epoch": 5.68, "grad_norm": 1.7100886106491089, "learning_rate": 2e-05, "loss": 0.05264761, "step": 2840 }, { "epoch": 5.682, "grad_norm": 1.575619101524353, "learning_rate": 2e-05, "loss": 0.03263854, "step": 2841 }, { "epoch": 5.684, "grad_norm": 1.4796384572982788, "learning_rate": 2e-05, "loss": 0.04719574, "step": 2842 }, { "epoch": 5.686, "grad_norm": 1.3210619688034058, "learning_rate": 2e-05, "loss": 0.0271394, "step": 2843 }, { "epoch": 5.688, "grad_norm": 1.0833356380462646, "learning_rate": 2e-05, "loss": 0.03883443, "step": 2844 }, { "epoch": 5.6899999999999995, "grad_norm": 2.5778627395629883, "learning_rate": 2e-05, "loss": 0.05159726, "step": 2845 }, { "epoch": 5.692, "grad_norm": 2.201190948486328, "learning_rate": 2e-05, "loss": 0.04031905, "step": 2846 }, { "epoch": 5.694, "grad_norm": 1.1688532829284668, "learning_rate": 2e-05, "loss": 0.03291119, "step": 2847 }, { "epoch": 5.696, "grad_norm": 1.796081304550171, "learning_rate": 2e-05, "loss": 0.05050949, "step": 2848 }, { "epoch": 5.698, "grad_norm": 1.016455888748169, "learning_rate": 2e-05, "loss": 0.03628581, "step": 2849 }, { "epoch": 5.7, "grad_norm": 1.9447333812713623, "learning_rate": 2e-05, "loss": 0.04152183, "step": 2850 }, { "epoch": 5.702, "grad_norm": 1.3478142023086548, "learning_rate": 2e-05, "loss": 0.03443387, "step": 2851 }, { "epoch": 5.704, "grad_norm": 1.6059876680374146, "learning_rate": 2e-05, "loss": 0.05241629, "step": 2852 }, { "epoch": 5.7059999999999995, "grad_norm": 1.2685903310775757, "learning_rate": 2e-05, "loss": 0.03086682, "step": 2853 }, { "epoch": 5.708, "grad_norm": 2.984708547592163, "learning_rate": 2e-05, "loss": 0.05078296, "step": 2854 }, { "epoch": 5.71, "grad_norm": 3.1346423625946045, "learning_rate": 2e-05, "loss": 0.04925366, "step": 2855 }, { "epoch": 5.712, "grad_norm": 1.1678012609481812, "learning_rate": 2e-05, "loss": 0.03549577, "step": 2856 }, { "epoch": 5.714, "grad_norm": 1.408694863319397, "learning_rate": 2e-05, "loss": 0.03885608, "step": 2857 }, { "epoch": 5.716, "grad_norm": 0.9981144070625305, "learning_rate": 2e-05, "loss": 0.02968807, "step": 2858 }, { "epoch": 5.718, "grad_norm": 1.307047724723816, "learning_rate": 2e-05, "loss": 0.03957972, "step": 2859 }, { "epoch": 5.72, "grad_norm": 2.804670572280884, "learning_rate": 2e-05, "loss": 0.04513305, "step": 2860 }, { "epoch": 5.7219999999999995, "grad_norm": 1.1364901065826416, "learning_rate": 2e-05, "loss": 0.03480239, "step": 2861 }, { "epoch": 5.724, "grad_norm": 1.2232093811035156, "learning_rate": 2e-05, "loss": 0.03162998, "step": 2862 }, { "epoch": 5.726, "grad_norm": 1.298130989074707, "learning_rate": 2e-05, "loss": 0.04124122, "step": 2863 }, { "epoch": 5.728, "grad_norm": 1.2249360084533691, "learning_rate": 2e-05, "loss": 0.04093556, "step": 2864 }, { "epoch": 5.73, "grad_norm": 1.261306881904602, "learning_rate": 2e-05, "loss": 0.04419369, "step": 2865 }, { "epoch": 5.732, "grad_norm": 1.0616569519042969, "learning_rate": 2e-05, "loss": 0.02921419, "step": 2866 }, { "epoch": 5.734, "grad_norm": 1.7513487339019775, "learning_rate": 2e-05, "loss": 0.03050586, "step": 2867 }, { "epoch": 5.736, "grad_norm": 1.4477100372314453, "learning_rate": 2e-05, "loss": 0.0365976, "step": 2868 }, { "epoch": 5.7379999999999995, "grad_norm": 2.1464011669158936, "learning_rate": 2e-05, "loss": 0.05555936, "step": 2869 }, { "epoch": 5.74, "grad_norm": 1.7182115316390991, "learning_rate": 2e-05, "loss": 0.04245908, "step": 2870 }, { "epoch": 5.742, "grad_norm": 1.0961130857467651, "learning_rate": 2e-05, "loss": 0.03913336, "step": 2871 }, { "epoch": 5.744, "grad_norm": 1.87662935256958, "learning_rate": 2e-05, "loss": 0.04560525, "step": 2872 }, { "epoch": 5.746, "grad_norm": 1.343624234199524, "learning_rate": 2e-05, "loss": 0.03066914, "step": 2873 }, { "epoch": 5.748, "grad_norm": 2.7794783115386963, "learning_rate": 2e-05, "loss": 0.05809136, "step": 2874 }, { "epoch": 5.75, "grad_norm": 1.3323760032653809, "learning_rate": 2e-05, "loss": 0.05136766, "step": 2875 }, { "epoch": 5.752, "grad_norm": 1.9432049989700317, "learning_rate": 2e-05, "loss": 0.05197339, "step": 2876 }, { "epoch": 5.754, "grad_norm": 1.1205425262451172, "learning_rate": 2e-05, "loss": 0.03465851, "step": 2877 }, { "epoch": 5.756, "grad_norm": 1.6224393844604492, "learning_rate": 2e-05, "loss": 0.03670148, "step": 2878 }, { "epoch": 5.758, "grad_norm": 1.3828800916671753, "learning_rate": 2e-05, "loss": 0.04724912, "step": 2879 }, { "epoch": 5.76, "grad_norm": 1.6124157905578613, "learning_rate": 2e-05, "loss": 0.03899434, "step": 2880 }, { "epoch": 5.7620000000000005, "grad_norm": 1.2006202936172485, "learning_rate": 2e-05, "loss": 0.04298317, "step": 2881 }, { "epoch": 5.764, "grad_norm": 1.8390953540802002, "learning_rate": 2e-05, "loss": 0.04603451, "step": 2882 }, { "epoch": 5.766, "grad_norm": 3.418121099472046, "learning_rate": 2e-05, "loss": 0.03081621, "step": 2883 }, { "epoch": 5.768, "grad_norm": 0.9251627922058105, "learning_rate": 2e-05, "loss": 0.02699123, "step": 2884 }, { "epoch": 5.77, "grad_norm": 1.5069046020507812, "learning_rate": 2e-05, "loss": 0.03988301, "step": 2885 }, { "epoch": 5.772, "grad_norm": 2.365957498550415, "learning_rate": 2e-05, "loss": 0.03983999, "step": 2886 }, { "epoch": 5.774, "grad_norm": 1.5599316358566284, "learning_rate": 2e-05, "loss": 0.04989076, "step": 2887 }, { "epoch": 5.776, "grad_norm": 1.1228059530258179, "learning_rate": 2e-05, "loss": 0.03197101, "step": 2888 }, { "epoch": 5.7780000000000005, "grad_norm": 1.517126202583313, "learning_rate": 2e-05, "loss": 0.03176156, "step": 2889 }, { "epoch": 5.78, "grad_norm": 1.023742914199829, "learning_rate": 2e-05, "loss": 0.035194, "step": 2890 }, { "epoch": 5.782, "grad_norm": 1.5021580457687378, "learning_rate": 2e-05, "loss": 0.0346195, "step": 2891 }, { "epoch": 5.784, "grad_norm": 1.5000685453414917, "learning_rate": 2e-05, "loss": 0.03589319, "step": 2892 }, { "epoch": 5.786, "grad_norm": 1.2418376207351685, "learning_rate": 2e-05, "loss": 0.03027872, "step": 2893 }, { "epoch": 5.788, "grad_norm": 1.425329566001892, "learning_rate": 2e-05, "loss": 0.03400642, "step": 2894 }, { "epoch": 5.79, "grad_norm": 1.4583309888839722, "learning_rate": 2e-05, "loss": 0.03597549, "step": 2895 }, { "epoch": 5.792, "grad_norm": 1.5168836116790771, "learning_rate": 2e-05, "loss": 0.02261955, "step": 2896 }, { "epoch": 5.7940000000000005, "grad_norm": 1.7382346391677856, "learning_rate": 2e-05, "loss": 0.03887838, "step": 2897 }, { "epoch": 5.796, "grad_norm": 3.2048113346099854, "learning_rate": 2e-05, "loss": 0.07382256, "step": 2898 }, { "epoch": 5.798, "grad_norm": 1.1313360929489136, "learning_rate": 2e-05, "loss": 0.02457133, "step": 2899 }, { "epoch": 5.8, "grad_norm": 2.6181559562683105, "learning_rate": 2e-05, "loss": 0.04528946, "step": 2900 }, { "epoch": 5.802, "grad_norm": 2.0275521278381348, "learning_rate": 2e-05, "loss": 0.04249582, "step": 2901 }, { "epoch": 5.804, "grad_norm": 3.3684051036834717, "learning_rate": 2e-05, "loss": 0.04949026, "step": 2902 }, { "epoch": 5.806, "grad_norm": 1.1052809953689575, "learning_rate": 2e-05, "loss": 0.03748845, "step": 2903 }, { "epoch": 5.808, "grad_norm": 1.147633671760559, "learning_rate": 2e-05, "loss": 0.02890555, "step": 2904 }, { "epoch": 5.8100000000000005, "grad_norm": 2.285583257675171, "learning_rate": 2e-05, "loss": 0.04941707, "step": 2905 }, { "epoch": 5.812, "grad_norm": 2.404172658920288, "learning_rate": 2e-05, "loss": 0.02909764, "step": 2906 }, { "epoch": 5.814, "grad_norm": 2.2711870670318604, "learning_rate": 2e-05, "loss": 0.06077723, "step": 2907 }, { "epoch": 5.816, "grad_norm": 1.342374563217163, "learning_rate": 2e-05, "loss": 0.03685233, "step": 2908 }, { "epoch": 5.818, "grad_norm": 1.1346458196640015, "learning_rate": 2e-05, "loss": 0.02703713, "step": 2909 }, { "epoch": 5.82, "grad_norm": 0.9739170670509338, "learning_rate": 2e-05, "loss": 0.0288388, "step": 2910 }, { "epoch": 5.822, "grad_norm": 1.538444995880127, "learning_rate": 2e-05, "loss": 0.0610106, "step": 2911 }, { "epoch": 5.824, "grad_norm": 1.5088497400283813, "learning_rate": 2e-05, "loss": 0.03801049, "step": 2912 }, { "epoch": 5.826, "grad_norm": 2.691092014312744, "learning_rate": 2e-05, "loss": 0.05319805, "step": 2913 }, { "epoch": 5.828, "grad_norm": 1.7784383296966553, "learning_rate": 2e-05, "loss": 0.03665651, "step": 2914 }, { "epoch": 5.83, "grad_norm": 2.699039936065674, "learning_rate": 2e-05, "loss": 0.06110234, "step": 2915 }, { "epoch": 5.832, "grad_norm": 1.0186821222305298, "learning_rate": 2e-05, "loss": 0.0389097, "step": 2916 }, { "epoch": 5.834, "grad_norm": 1.5855401754379272, "learning_rate": 2e-05, "loss": 0.05706736, "step": 2917 }, { "epoch": 5.836, "grad_norm": 1.5186421871185303, "learning_rate": 2e-05, "loss": 0.04286093, "step": 2918 }, { "epoch": 5.838, "grad_norm": 1.8473025560379028, "learning_rate": 2e-05, "loss": 0.04118644, "step": 2919 }, { "epoch": 5.84, "grad_norm": 1.2848973274230957, "learning_rate": 2e-05, "loss": 0.03644439, "step": 2920 }, { "epoch": 5.842, "grad_norm": 1.198118805885315, "learning_rate": 2e-05, "loss": 0.04760751, "step": 2921 }, { "epoch": 5.844, "grad_norm": 1.7948402166366577, "learning_rate": 2e-05, "loss": 0.05257875, "step": 2922 }, { "epoch": 5.846, "grad_norm": 1.2989088296890259, "learning_rate": 2e-05, "loss": 0.0287516, "step": 2923 }, { "epoch": 5.848, "grad_norm": 0.9115301966667175, "learning_rate": 2e-05, "loss": 0.0334456, "step": 2924 }, { "epoch": 5.85, "grad_norm": 11.285009384155273, "learning_rate": 2e-05, "loss": 0.03400384, "step": 2925 }, { "epoch": 5.852, "grad_norm": 5.87296199798584, "learning_rate": 2e-05, "loss": 0.10033739, "step": 2926 }, { "epoch": 5.854, "grad_norm": 1.882878065109253, "learning_rate": 2e-05, "loss": 0.04880483, "step": 2927 }, { "epoch": 5.856, "grad_norm": 11.981039047241211, "learning_rate": 2e-05, "loss": 0.166832, "step": 2928 }, { "epoch": 5.858, "grad_norm": 1.1964349746704102, "learning_rate": 2e-05, "loss": 0.02282231, "step": 2929 }, { "epoch": 5.86, "grad_norm": 3.9903624057769775, "learning_rate": 2e-05, "loss": 0.06060695, "step": 2930 }, { "epoch": 5.862, "grad_norm": 1.5105197429656982, "learning_rate": 2e-05, "loss": 0.04793704, "step": 2931 }, { "epoch": 5.864, "grad_norm": 1.41489839553833, "learning_rate": 2e-05, "loss": 0.03938361, "step": 2932 }, { "epoch": 5.866, "grad_norm": 2.26653790473938, "learning_rate": 2e-05, "loss": 0.0598973, "step": 2933 }, { "epoch": 5.868, "grad_norm": 2.4137723445892334, "learning_rate": 2e-05, "loss": 0.05444659, "step": 2934 }, { "epoch": 5.87, "grad_norm": 1.2001371383666992, "learning_rate": 2e-05, "loss": 0.03985805, "step": 2935 }, { "epoch": 5.872, "grad_norm": 1.5971794128417969, "learning_rate": 2e-05, "loss": 0.06079929, "step": 2936 }, { "epoch": 5.874, "grad_norm": 1.318945050239563, "learning_rate": 2e-05, "loss": 0.04854849, "step": 2937 }, { "epoch": 5.876, "grad_norm": 1.246225118637085, "learning_rate": 2e-05, "loss": 0.03397983, "step": 2938 }, { "epoch": 5.878, "grad_norm": 2.4939327239990234, "learning_rate": 2e-05, "loss": 0.04911096, "step": 2939 }, { "epoch": 5.88, "grad_norm": 1.0084093809127808, "learning_rate": 2e-05, "loss": 0.03702591, "step": 2940 }, { "epoch": 5.882, "grad_norm": 1.044545292854309, "learning_rate": 2e-05, "loss": 0.03148817, "step": 2941 }, { "epoch": 5.884, "grad_norm": 1.6608963012695312, "learning_rate": 2e-05, "loss": 0.05824044, "step": 2942 }, { "epoch": 5.886, "grad_norm": 2.168658494949341, "learning_rate": 2e-05, "loss": 0.05980438, "step": 2943 }, { "epoch": 5.888, "grad_norm": 1.3249305486679077, "learning_rate": 2e-05, "loss": 0.04346288, "step": 2944 }, { "epoch": 5.89, "grad_norm": 1.1912715435028076, "learning_rate": 2e-05, "loss": 0.03074624, "step": 2945 }, { "epoch": 5.892, "grad_norm": 2.0439677238464355, "learning_rate": 2e-05, "loss": 0.06351136, "step": 2946 }, { "epoch": 5.894, "grad_norm": 1.2318497896194458, "learning_rate": 2e-05, "loss": 0.03561502, "step": 2947 }, { "epoch": 5.896, "grad_norm": 1.001007318496704, "learning_rate": 2e-05, "loss": 0.0264318, "step": 2948 }, { "epoch": 5.898, "grad_norm": 1.6429765224456787, "learning_rate": 2e-05, "loss": 0.05054122, "step": 2949 }, { "epoch": 5.9, "grad_norm": 1.8154584169387817, "learning_rate": 2e-05, "loss": 0.05611707, "step": 2950 }, { "epoch": 5.902, "grad_norm": 1.8020012378692627, "learning_rate": 2e-05, "loss": 0.0445966, "step": 2951 }, { "epoch": 5.904, "grad_norm": 1.1561640501022339, "learning_rate": 2e-05, "loss": 0.042308, "step": 2952 }, { "epoch": 5.906, "grad_norm": 1.3049297332763672, "learning_rate": 2e-05, "loss": 0.02226608, "step": 2953 }, { "epoch": 5.908, "grad_norm": 1.9397492408752441, "learning_rate": 2e-05, "loss": 0.04040186, "step": 2954 }, { "epoch": 5.91, "grad_norm": 2.0794615745544434, "learning_rate": 2e-05, "loss": 0.04332725, "step": 2955 }, { "epoch": 5.912, "grad_norm": 1.1256343126296997, "learning_rate": 2e-05, "loss": 0.03192374, "step": 2956 }, { "epoch": 5.914, "grad_norm": 1.629341959953308, "learning_rate": 2e-05, "loss": 0.03662695, "step": 2957 }, { "epoch": 5.916, "grad_norm": 1.8990249633789062, "learning_rate": 2e-05, "loss": 0.04180484, "step": 2958 }, { "epoch": 5.918, "grad_norm": 1.3357110023498535, "learning_rate": 2e-05, "loss": 0.05098827, "step": 2959 }, { "epoch": 5.92, "grad_norm": 2.420962333679199, "learning_rate": 2e-05, "loss": 0.04422121, "step": 2960 }, { "epoch": 5.922, "grad_norm": 1.0313847064971924, "learning_rate": 2e-05, "loss": 0.02736597, "step": 2961 }, { "epoch": 5.924, "grad_norm": 2.0497448444366455, "learning_rate": 2e-05, "loss": 0.04987185, "step": 2962 }, { "epoch": 5.926, "grad_norm": 1.508475661277771, "learning_rate": 2e-05, "loss": 0.02963543, "step": 2963 }, { "epoch": 5.928, "grad_norm": 2.0732882022857666, "learning_rate": 2e-05, "loss": 0.04629382, "step": 2964 }, { "epoch": 5.93, "grad_norm": 1.1313819885253906, "learning_rate": 2e-05, "loss": 0.02837754, "step": 2965 }, { "epoch": 5.932, "grad_norm": 1.3886733055114746, "learning_rate": 2e-05, "loss": 0.03276101, "step": 2966 }, { "epoch": 5.934, "grad_norm": 1.8589578866958618, "learning_rate": 2e-05, "loss": 0.05601101, "step": 2967 }, { "epoch": 5.936, "grad_norm": 1.4239290952682495, "learning_rate": 2e-05, "loss": 0.03772479, "step": 2968 }, { "epoch": 5.938, "grad_norm": 1.3066622018814087, "learning_rate": 2e-05, "loss": 0.03739661, "step": 2969 }, { "epoch": 5.9399999999999995, "grad_norm": 1.04325270652771, "learning_rate": 2e-05, "loss": 0.03995933, "step": 2970 }, { "epoch": 5.942, "grad_norm": 1.5223791599273682, "learning_rate": 2e-05, "loss": 0.04225756, "step": 2971 }, { "epoch": 5.944, "grad_norm": 2.6733899116516113, "learning_rate": 2e-05, "loss": 0.04441323, "step": 2972 }, { "epoch": 5.946, "grad_norm": 1.6275644302368164, "learning_rate": 2e-05, "loss": 0.04762278, "step": 2973 }, { "epoch": 5.948, "grad_norm": 1.2216103076934814, "learning_rate": 2e-05, "loss": 0.03107932, "step": 2974 }, { "epoch": 5.95, "grad_norm": 1.3730415105819702, "learning_rate": 2e-05, "loss": 0.02952298, "step": 2975 }, { "epoch": 5.952, "grad_norm": 1.223415493965149, "learning_rate": 2e-05, "loss": 0.03430998, "step": 2976 }, { "epoch": 5.954, "grad_norm": 1.5398921966552734, "learning_rate": 2e-05, "loss": 0.05110143, "step": 2977 }, { "epoch": 5.9559999999999995, "grad_norm": 1.7357475757598877, "learning_rate": 2e-05, "loss": 0.04328895, "step": 2978 }, { "epoch": 5.958, "grad_norm": 1.7618504762649536, "learning_rate": 2e-05, "loss": 0.05958952, "step": 2979 }, { "epoch": 5.96, "grad_norm": 2.2981998920440674, "learning_rate": 2e-05, "loss": 0.06046209, "step": 2980 }, { "epoch": 5.962, "grad_norm": 1.7709860801696777, "learning_rate": 2e-05, "loss": 0.05310146, "step": 2981 }, { "epoch": 5.964, "grad_norm": 1.4711432456970215, "learning_rate": 2e-05, "loss": 0.03057422, "step": 2982 }, { "epoch": 5.966, "grad_norm": 2.1963133811950684, "learning_rate": 2e-05, "loss": 0.07748948, "step": 2983 }, { "epoch": 5.968, "grad_norm": 1.2445276975631714, "learning_rate": 2e-05, "loss": 0.03456353, "step": 2984 }, { "epoch": 5.97, "grad_norm": 1.2059154510498047, "learning_rate": 2e-05, "loss": 0.03177342, "step": 2985 }, { "epoch": 5.9719999999999995, "grad_norm": 2.007486581802368, "learning_rate": 2e-05, "loss": 0.03973828, "step": 2986 }, { "epoch": 5.974, "grad_norm": 1.5894452333450317, "learning_rate": 2e-05, "loss": 0.04387932, "step": 2987 }, { "epoch": 5.976, "grad_norm": 1.9093830585479736, "learning_rate": 2e-05, "loss": 0.04532828, "step": 2988 }, { "epoch": 5.978, "grad_norm": 1.9382355213165283, "learning_rate": 2e-05, "loss": 0.05527761, "step": 2989 }, { "epoch": 5.98, "grad_norm": 1.3034459352493286, "learning_rate": 2e-05, "loss": 0.05401599, "step": 2990 }, { "epoch": 5.982, "grad_norm": 1.4245580434799194, "learning_rate": 2e-05, "loss": 0.04059825, "step": 2991 }, { "epoch": 5.984, "grad_norm": 2.453721523284912, "learning_rate": 2e-05, "loss": 0.04975076, "step": 2992 }, { "epoch": 5.986, "grad_norm": 1.6775089502334595, "learning_rate": 2e-05, "loss": 0.03426765, "step": 2993 }, { "epoch": 5.9879999999999995, "grad_norm": 1.31791090965271, "learning_rate": 2e-05, "loss": 0.0269012, "step": 2994 }, { "epoch": 5.99, "grad_norm": 1.2786681652069092, "learning_rate": 2e-05, "loss": 0.04798415, "step": 2995 }, { "epoch": 5.992, "grad_norm": 1.375746488571167, "learning_rate": 2e-05, "loss": 0.03704346, "step": 2996 }, { "epoch": 5.994, "grad_norm": 2.2422120571136475, "learning_rate": 2e-05, "loss": 0.03538365, "step": 2997 }, { "epoch": 5.996, "grad_norm": 1.6498358249664307, "learning_rate": 2e-05, "loss": 0.05676982, "step": 2998 }, { "epoch": 5.998, "grad_norm": 0.9439936280250549, "learning_rate": 2e-05, "loss": 0.03124115, "step": 2999 }, { "epoch": 6.0, "grad_norm": 1.866321086883545, "learning_rate": 2e-05, "loss": 0.04968661, "step": 3000 }, { "epoch": 6.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 0.996, "AngleClassification_3": 0.7944111776447106, "Equal_1": 0.99, "Equal_2": 0.9121756487025948, "Equal_3": 0.7964071856287425, "LineComparison_1": 0.986, "LineComparison_2": 0.9960079840319361, "LineComparison_3": 0.9720558882235529, "Parallel_1": 0.9819639278557114, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.986, "Perpendicular_1": 0.984, "Perpendicular_2": 0.726, "Perpendicular_3": 0.3717434869739479, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9796, "PointLiesOnLine_1": 0.9879759519038076, "PointLiesOnLine_2": 0.9839679358717435, "PointLiesOnLine_3": 0.7724550898203593 }, "eval_runtime": 319.7917, "eval_samples_per_second": 32.834, "eval_steps_per_second": 0.657, "step": 3000 }, { "epoch": 6.002, "grad_norm": 1.5802181959152222, "learning_rate": 2e-05, "loss": 0.02955408, "step": 3001 }, { "epoch": 6.004, "grad_norm": 1.1106842756271362, "learning_rate": 2e-05, "loss": 0.03170551, "step": 3002 }, { "epoch": 6.006, "grad_norm": 1.0765694379806519, "learning_rate": 2e-05, "loss": 0.032282, "step": 3003 }, { "epoch": 6.008, "grad_norm": 1.2182323932647705, "learning_rate": 2e-05, "loss": 0.04121379, "step": 3004 }, { "epoch": 6.01, "grad_norm": 1.2825361490249634, "learning_rate": 2e-05, "loss": 0.04134614, "step": 3005 }, { "epoch": 6.012, "grad_norm": 2.202105760574341, "learning_rate": 2e-05, "loss": 0.04897365, "step": 3006 }, { "epoch": 6.014, "grad_norm": 2.587782382965088, "learning_rate": 2e-05, "loss": 0.06023916, "step": 3007 }, { "epoch": 6.016, "grad_norm": 2.555541753768921, "learning_rate": 2e-05, "loss": 0.03511619, "step": 3008 }, { "epoch": 6.018, "grad_norm": 1.417189598083496, "learning_rate": 2e-05, "loss": 0.05029716, "step": 3009 }, { "epoch": 6.02, "grad_norm": 1.7279114723205566, "learning_rate": 2e-05, "loss": 0.0623187, "step": 3010 }, { "epoch": 6.022, "grad_norm": 1.6738758087158203, "learning_rate": 2e-05, "loss": 0.03575193, "step": 3011 }, { "epoch": 6.024, "grad_norm": 1.7566310167312622, "learning_rate": 2e-05, "loss": 0.05152703, "step": 3012 }, { "epoch": 6.026, "grad_norm": 1.1162376403808594, "learning_rate": 2e-05, "loss": 0.03532304, "step": 3013 }, { "epoch": 6.028, "grad_norm": 1.4076039791107178, "learning_rate": 2e-05, "loss": 0.04677567, "step": 3014 }, { "epoch": 6.03, "grad_norm": 1.133365273475647, "learning_rate": 2e-05, "loss": 0.02753913, "step": 3015 }, { "epoch": 6.032, "grad_norm": 1.4458855390548706, "learning_rate": 2e-05, "loss": 0.03868459, "step": 3016 }, { "epoch": 6.034, "grad_norm": 2.3904032707214355, "learning_rate": 2e-05, "loss": 0.03503593, "step": 3017 }, { "epoch": 6.036, "grad_norm": 1.949507713317871, "learning_rate": 2e-05, "loss": 0.04720395, "step": 3018 }, { "epoch": 6.038, "grad_norm": 1.8610894680023193, "learning_rate": 2e-05, "loss": 0.05176724, "step": 3019 }, { "epoch": 6.04, "grad_norm": 1.7843246459960938, "learning_rate": 2e-05, "loss": 0.0451156, "step": 3020 }, { "epoch": 6.042, "grad_norm": 1.3640440702438354, "learning_rate": 2e-05, "loss": 0.04446036, "step": 3021 }, { "epoch": 6.044, "grad_norm": 1.3423798084259033, "learning_rate": 2e-05, "loss": 0.04165779, "step": 3022 }, { "epoch": 6.046, "grad_norm": 1.4849580526351929, "learning_rate": 2e-05, "loss": 0.03914479, "step": 3023 }, { "epoch": 6.048, "grad_norm": 1.4148266315460205, "learning_rate": 2e-05, "loss": 0.03579354, "step": 3024 }, { "epoch": 6.05, "grad_norm": 1.197055459022522, "learning_rate": 2e-05, "loss": 0.04568943, "step": 3025 }, { "epoch": 6.052, "grad_norm": 0.9507676959037781, "learning_rate": 2e-05, "loss": 0.03244974, "step": 3026 }, { "epoch": 6.054, "grad_norm": 1.1743727922439575, "learning_rate": 2e-05, "loss": 0.04001627, "step": 3027 }, { "epoch": 6.056, "grad_norm": 1.06550931930542, "learning_rate": 2e-05, "loss": 0.0358919, "step": 3028 }, { "epoch": 6.058, "grad_norm": 1.3072738647460938, "learning_rate": 2e-05, "loss": 0.04308274, "step": 3029 }, { "epoch": 6.06, "grad_norm": 1.368091106414795, "learning_rate": 2e-05, "loss": 0.0464918, "step": 3030 }, { "epoch": 6.062, "grad_norm": 1.4518086910247803, "learning_rate": 2e-05, "loss": 0.04207553, "step": 3031 }, { "epoch": 6.064, "grad_norm": 0.9633432030677795, "learning_rate": 2e-05, "loss": 0.02657059, "step": 3032 }, { "epoch": 6.066, "grad_norm": 2.7227838039398193, "learning_rate": 2e-05, "loss": 0.04998293, "step": 3033 }, { "epoch": 6.068, "grad_norm": 3.5516669750213623, "learning_rate": 2e-05, "loss": 0.03489901, "step": 3034 }, { "epoch": 6.07, "grad_norm": 1.5370341539382935, "learning_rate": 2e-05, "loss": 0.04453697, "step": 3035 }, { "epoch": 6.072, "grad_norm": 1.7417138814926147, "learning_rate": 2e-05, "loss": 0.03877822, "step": 3036 }, { "epoch": 6.074, "grad_norm": 1.3443036079406738, "learning_rate": 2e-05, "loss": 0.03986344, "step": 3037 }, { "epoch": 6.076, "grad_norm": 1.4446654319763184, "learning_rate": 2e-05, "loss": 0.03199197, "step": 3038 }, { "epoch": 6.078, "grad_norm": 1.7269781827926636, "learning_rate": 2e-05, "loss": 0.04078665, "step": 3039 }, { "epoch": 6.08, "grad_norm": 1.3139877319335938, "learning_rate": 2e-05, "loss": 0.04576064, "step": 3040 }, { "epoch": 6.082, "grad_norm": 1.6032078266143799, "learning_rate": 2e-05, "loss": 0.03815749, "step": 3041 }, { "epoch": 6.084, "grad_norm": 2.1997110843658447, "learning_rate": 2e-05, "loss": 0.04005154, "step": 3042 }, { "epoch": 6.086, "grad_norm": 1.4301249980926514, "learning_rate": 2e-05, "loss": 0.03681426, "step": 3043 }, { "epoch": 6.088, "grad_norm": 0.9475594758987427, "learning_rate": 2e-05, "loss": 0.03235501, "step": 3044 }, { "epoch": 6.09, "grad_norm": 1.1631615161895752, "learning_rate": 2e-05, "loss": 0.04111622, "step": 3045 }, { "epoch": 6.092, "grad_norm": 1.8243446350097656, "learning_rate": 2e-05, "loss": 0.03791495, "step": 3046 }, { "epoch": 6.094, "grad_norm": 1.5762276649475098, "learning_rate": 2e-05, "loss": 0.05622587, "step": 3047 }, { "epoch": 6.096, "grad_norm": 1.136772632598877, "learning_rate": 2e-05, "loss": 0.03879281, "step": 3048 }, { "epoch": 6.098, "grad_norm": 1.1505334377288818, "learning_rate": 2e-05, "loss": 0.03384344, "step": 3049 }, { "epoch": 6.1, "grad_norm": 1.9480074644088745, "learning_rate": 2e-05, "loss": 0.03820986, "step": 3050 }, { "epoch": 6.102, "grad_norm": 1.3107194900512695, "learning_rate": 2e-05, "loss": 0.03161674, "step": 3051 }, { "epoch": 6.104, "grad_norm": 1.3566876649856567, "learning_rate": 2e-05, "loss": 0.0397422, "step": 3052 }, { "epoch": 6.106, "grad_norm": 2.022648811340332, "learning_rate": 2e-05, "loss": 0.05766981, "step": 3053 }, { "epoch": 6.108, "grad_norm": 1.7005928754806519, "learning_rate": 2e-05, "loss": 0.03167719, "step": 3054 }, { "epoch": 6.11, "grad_norm": 3.042736768722534, "learning_rate": 2e-05, "loss": 0.04883799, "step": 3055 }, { "epoch": 6.112, "grad_norm": 1.171810269355774, "learning_rate": 2e-05, "loss": 0.03449272, "step": 3056 }, { "epoch": 6.114, "grad_norm": 1.3196911811828613, "learning_rate": 2e-05, "loss": 0.03630933, "step": 3057 }, { "epoch": 6.116, "grad_norm": 1.3632961511611938, "learning_rate": 2e-05, "loss": 0.02563158, "step": 3058 }, { "epoch": 6.118, "grad_norm": 1.1939222812652588, "learning_rate": 2e-05, "loss": 0.03741849, "step": 3059 }, { "epoch": 6.12, "grad_norm": 1.714534044265747, "learning_rate": 2e-05, "loss": 0.05540673, "step": 3060 }, { "epoch": 6.122, "grad_norm": 1.329563021659851, "learning_rate": 2e-05, "loss": 0.04123599, "step": 3061 }, { "epoch": 6.124, "grad_norm": 1.8710254430770874, "learning_rate": 2e-05, "loss": 0.04224695, "step": 3062 }, { "epoch": 6.126, "grad_norm": 1.2661737203598022, "learning_rate": 2e-05, "loss": 0.03803711, "step": 3063 }, { "epoch": 6.128, "grad_norm": 1.2511358261108398, "learning_rate": 2e-05, "loss": 0.04244345, "step": 3064 }, { "epoch": 6.13, "grad_norm": 0.9187764525413513, "learning_rate": 2e-05, "loss": 0.03632679, "step": 3065 }, { "epoch": 6.132, "grad_norm": 2.078164577484131, "learning_rate": 2e-05, "loss": 0.04006804, "step": 3066 }, { "epoch": 6.134, "grad_norm": 1.1497598886489868, "learning_rate": 2e-05, "loss": 0.03985286, "step": 3067 }, { "epoch": 6.136, "grad_norm": 1.7147870063781738, "learning_rate": 2e-05, "loss": 0.03666584, "step": 3068 }, { "epoch": 6.138, "grad_norm": 1.467666506767273, "learning_rate": 2e-05, "loss": 0.03018375, "step": 3069 }, { "epoch": 6.14, "grad_norm": 2.5385053157806396, "learning_rate": 2e-05, "loss": 0.05757154, "step": 3070 }, { "epoch": 6.142, "grad_norm": 2.1830251216888428, "learning_rate": 2e-05, "loss": 0.0400609, "step": 3071 }, { "epoch": 6.144, "grad_norm": 2.2541887760162354, "learning_rate": 2e-05, "loss": 0.04700291, "step": 3072 }, { "epoch": 6.146, "grad_norm": 1.2078704833984375, "learning_rate": 2e-05, "loss": 0.03664304, "step": 3073 }, { "epoch": 6.148, "grad_norm": 1.6265748739242554, "learning_rate": 2e-05, "loss": 0.03525054, "step": 3074 }, { "epoch": 6.15, "grad_norm": 1.9453312158584595, "learning_rate": 2e-05, "loss": 0.05236892, "step": 3075 }, { "epoch": 6.152, "grad_norm": 1.7163499593734741, "learning_rate": 2e-05, "loss": 0.03309094, "step": 3076 }, { "epoch": 6.154, "grad_norm": 1.738598108291626, "learning_rate": 2e-05, "loss": 0.03554305, "step": 3077 }, { "epoch": 6.156, "grad_norm": 1.6464965343475342, "learning_rate": 2e-05, "loss": 0.05227431, "step": 3078 }, { "epoch": 6.158, "grad_norm": 2.7797718048095703, "learning_rate": 2e-05, "loss": 0.04286774, "step": 3079 }, { "epoch": 6.16, "grad_norm": 1.9953299760818481, "learning_rate": 2e-05, "loss": 0.05062944, "step": 3080 }, { "epoch": 6.162, "grad_norm": 1.4432344436645508, "learning_rate": 2e-05, "loss": 0.03490534, "step": 3081 }, { "epoch": 6.164, "grad_norm": 1.4732416868209839, "learning_rate": 2e-05, "loss": 0.04236253, "step": 3082 }, { "epoch": 6.166, "grad_norm": 2.069814682006836, "learning_rate": 2e-05, "loss": 0.04514054, "step": 3083 }, { "epoch": 6.168, "grad_norm": 1.8395639657974243, "learning_rate": 2e-05, "loss": 0.04167663, "step": 3084 }, { "epoch": 6.17, "grad_norm": 1.5798648595809937, "learning_rate": 2e-05, "loss": 0.05062833, "step": 3085 }, { "epoch": 6.172, "grad_norm": 1.718724012374878, "learning_rate": 2e-05, "loss": 0.03674094, "step": 3086 }, { "epoch": 6.174, "grad_norm": 1.8063639402389526, "learning_rate": 2e-05, "loss": 0.05062528, "step": 3087 }, { "epoch": 6.176, "grad_norm": 1.3898078203201294, "learning_rate": 2e-05, "loss": 0.04505751, "step": 3088 }, { "epoch": 6.178, "grad_norm": 1.5374902486801147, "learning_rate": 2e-05, "loss": 0.05387596, "step": 3089 }, { "epoch": 6.18, "grad_norm": 1.6485440731048584, "learning_rate": 2e-05, "loss": 0.05111238, "step": 3090 }, { "epoch": 6.182, "grad_norm": 1.6274720430374146, "learning_rate": 2e-05, "loss": 0.03595363, "step": 3091 }, { "epoch": 6.184, "grad_norm": 1.5677777528762817, "learning_rate": 2e-05, "loss": 0.03086212, "step": 3092 }, { "epoch": 6.186, "grad_norm": 2.0702250003814697, "learning_rate": 2e-05, "loss": 0.03619816, "step": 3093 }, { "epoch": 6.188, "grad_norm": 1.2100623846054077, "learning_rate": 2e-05, "loss": 0.02968932, "step": 3094 }, { "epoch": 6.19, "grad_norm": 1.6770201921463013, "learning_rate": 2e-05, "loss": 0.05100476, "step": 3095 }, { "epoch": 6.192, "grad_norm": 1.4166276454925537, "learning_rate": 2e-05, "loss": 0.04519867, "step": 3096 }, { "epoch": 6.194, "grad_norm": 1.882489562034607, "learning_rate": 2e-05, "loss": 0.04186785, "step": 3097 }, { "epoch": 6.196, "grad_norm": 1.4164977073669434, "learning_rate": 2e-05, "loss": 0.04563984, "step": 3098 }, { "epoch": 6.198, "grad_norm": 1.6581916809082031, "learning_rate": 2e-05, "loss": 0.04879126, "step": 3099 }, { "epoch": 6.2, "grad_norm": 1.4858304262161255, "learning_rate": 2e-05, "loss": 0.03413234, "step": 3100 }, { "epoch": 6.202, "grad_norm": 1.121500849723816, "learning_rate": 2e-05, "loss": 0.02813966, "step": 3101 }, { "epoch": 6.204, "grad_norm": 1.5754960775375366, "learning_rate": 2e-05, "loss": 0.04899345, "step": 3102 }, { "epoch": 6.206, "grad_norm": 1.356394648551941, "learning_rate": 2e-05, "loss": 0.03853102, "step": 3103 }, { "epoch": 6.208, "grad_norm": 1.0517022609710693, "learning_rate": 2e-05, "loss": 0.03538657, "step": 3104 }, { "epoch": 6.21, "grad_norm": 1.3921403884887695, "learning_rate": 2e-05, "loss": 0.03444414, "step": 3105 }, { "epoch": 6.212, "grad_norm": 1.133500099182129, "learning_rate": 2e-05, "loss": 0.03595487, "step": 3106 }, { "epoch": 6.214, "grad_norm": 1.1165953874588013, "learning_rate": 2e-05, "loss": 0.02767484, "step": 3107 }, { "epoch": 6.216, "grad_norm": 2.7160420417785645, "learning_rate": 2e-05, "loss": 0.03668471, "step": 3108 }, { "epoch": 6.218, "grad_norm": 1.8742766380310059, "learning_rate": 2e-05, "loss": 0.04762148, "step": 3109 }, { "epoch": 6.22, "grad_norm": 1.108489751815796, "learning_rate": 2e-05, "loss": 0.03803735, "step": 3110 }, { "epoch": 6.222, "grad_norm": 1.4708930253982544, "learning_rate": 2e-05, "loss": 0.04263173, "step": 3111 }, { "epoch": 6.224, "grad_norm": 2.706648349761963, "learning_rate": 2e-05, "loss": 0.05671699, "step": 3112 }, { "epoch": 6.226, "grad_norm": 1.8484437465667725, "learning_rate": 2e-05, "loss": 0.02719938, "step": 3113 }, { "epoch": 6.228, "grad_norm": 1.539387822151184, "learning_rate": 2e-05, "loss": 0.03141466, "step": 3114 }, { "epoch": 6.23, "grad_norm": 1.7777189016342163, "learning_rate": 2e-05, "loss": 0.0490169, "step": 3115 }, { "epoch": 6.232, "grad_norm": 1.6402760744094849, "learning_rate": 2e-05, "loss": 0.0498919, "step": 3116 }, { "epoch": 6.234, "grad_norm": 0.8348276019096375, "learning_rate": 2e-05, "loss": 0.01968635, "step": 3117 }, { "epoch": 6.236, "grad_norm": 1.0284675359725952, "learning_rate": 2e-05, "loss": 0.02943014, "step": 3118 }, { "epoch": 6.2379999999999995, "grad_norm": 1.6384459733963013, "learning_rate": 2e-05, "loss": 0.05340105, "step": 3119 }, { "epoch": 6.24, "grad_norm": 1.3227370977401733, "learning_rate": 2e-05, "loss": 0.03396504, "step": 3120 }, { "epoch": 6.242, "grad_norm": 1.28560471534729, "learning_rate": 2e-05, "loss": 0.04309334, "step": 3121 }, { "epoch": 6.244, "grad_norm": 1.1973211765289307, "learning_rate": 2e-05, "loss": 0.03165175, "step": 3122 }, { "epoch": 6.246, "grad_norm": 1.492358922958374, "learning_rate": 2e-05, "loss": 0.04523762, "step": 3123 }, { "epoch": 6.248, "grad_norm": 1.4751956462860107, "learning_rate": 2e-05, "loss": 0.04451723, "step": 3124 }, { "epoch": 6.25, "grad_norm": 1.1065033674240112, "learning_rate": 2e-05, "loss": 0.04365182, "step": 3125 }, { "epoch": 6.252, "grad_norm": 1.341362714767456, "learning_rate": 2e-05, "loss": 0.02606768, "step": 3126 }, { "epoch": 6.254, "grad_norm": 1.7384096384048462, "learning_rate": 2e-05, "loss": 0.03969992, "step": 3127 }, { "epoch": 6.256, "grad_norm": 1.5155085325241089, "learning_rate": 2e-05, "loss": 0.03229823, "step": 3128 }, { "epoch": 6.258, "grad_norm": 1.0733712911605835, "learning_rate": 2e-05, "loss": 0.0306533, "step": 3129 }, { "epoch": 6.26, "grad_norm": 2.0977604389190674, "learning_rate": 2e-05, "loss": 0.05317961, "step": 3130 }, { "epoch": 6.2620000000000005, "grad_norm": 1.4438533782958984, "learning_rate": 2e-05, "loss": 0.05355418, "step": 3131 }, { "epoch": 6.264, "grad_norm": 2.2710464000701904, "learning_rate": 2e-05, "loss": 0.04868567, "step": 3132 }, { "epoch": 6.266, "grad_norm": 1.7031066417694092, "learning_rate": 2e-05, "loss": 0.04030811, "step": 3133 }, { "epoch": 6.268, "grad_norm": 1.0892564058303833, "learning_rate": 2e-05, "loss": 0.0400371, "step": 3134 }, { "epoch": 6.27, "grad_norm": 1.327901840209961, "learning_rate": 2e-05, "loss": 0.05018859, "step": 3135 }, { "epoch": 6.272, "grad_norm": 1.1799272298812866, "learning_rate": 2e-05, "loss": 0.03606693, "step": 3136 }, { "epoch": 6.274, "grad_norm": 1.553155541419983, "learning_rate": 2e-05, "loss": 0.06281506, "step": 3137 }, { "epoch": 6.276, "grad_norm": 1.075716257095337, "learning_rate": 2e-05, "loss": 0.02753101, "step": 3138 }, { "epoch": 6.2780000000000005, "grad_norm": 2.481788396835327, "learning_rate": 2e-05, "loss": 0.03427035, "step": 3139 }, { "epoch": 6.28, "grad_norm": 1.606927752494812, "learning_rate": 2e-05, "loss": 0.03269802, "step": 3140 }, { "epoch": 6.282, "grad_norm": 1.5751926898956299, "learning_rate": 2e-05, "loss": 0.03891, "step": 3141 }, { "epoch": 6.284, "grad_norm": 2.1597113609313965, "learning_rate": 2e-05, "loss": 0.04709848, "step": 3142 }, { "epoch": 6.286, "grad_norm": 1.4305531978607178, "learning_rate": 2e-05, "loss": 0.04866112, "step": 3143 }, { "epoch": 6.288, "grad_norm": 1.1876941919326782, "learning_rate": 2e-05, "loss": 0.03890382, "step": 3144 }, { "epoch": 6.29, "grad_norm": 2.109853744506836, "learning_rate": 2e-05, "loss": 0.04726175, "step": 3145 }, { "epoch": 6.292, "grad_norm": 1.5774122476577759, "learning_rate": 2e-05, "loss": 0.04510861, "step": 3146 }, { "epoch": 6.294, "grad_norm": 2.0001204013824463, "learning_rate": 2e-05, "loss": 0.03729511, "step": 3147 }, { "epoch": 6.296, "grad_norm": 1.9148621559143066, "learning_rate": 2e-05, "loss": 0.05043909, "step": 3148 }, { "epoch": 6.298, "grad_norm": 1.77232825756073, "learning_rate": 2e-05, "loss": 0.05203857, "step": 3149 }, { "epoch": 6.3, "grad_norm": 1.7277973890304565, "learning_rate": 2e-05, "loss": 0.0506317, "step": 3150 }, { "epoch": 6.302, "grad_norm": 1.567769169807434, "learning_rate": 2e-05, "loss": 0.04894867, "step": 3151 }, { "epoch": 6.304, "grad_norm": 1.5426815748214722, "learning_rate": 2e-05, "loss": 0.04491549, "step": 3152 }, { "epoch": 6.306, "grad_norm": 2.3619165420532227, "learning_rate": 2e-05, "loss": 0.04702245, "step": 3153 }, { "epoch": 6.308, "grad_norm": 1.696313500404358, "learning_rate": 2e-05, "loss": 0.05087637, "step": 3154 }, { "epoch": 6.31, "grad_norm": 1.100569248199463, "learning_rate": 2e-05, "loss": 0.03571167, "step": 3155 }, { "epoch": 6.312, "grad_norm": 1.4361978769302368, "learning_rate": 2e-05, "loss": 0.03643346, "step": 3156 }, { "epoch": 6.314, "grad_norm": 1.2051647901535034, "learning_rate": 2e-05, "loss": 0.05133547, "step": 3157 }, { "epoch": 6.316, "grad_norm": 1.9132866859436035, "learning_rate": 2e-05, "loss": 0.03743671, "step": 3158 }, { "epoch": 6.318, "grad_norm": 2.0840513706207275, "learning_rate": 2e-05, "loss": 0.05485996, "step": 3159 }, { "epoch": 6.32, "grad_norm": 1.9084033966064453, "learning_rate": 2e-05, "loss": 0.05698278, "step": 3160 }, { "epoch": 6.322, "grad_norm": 1.0253159999847412, "learning_rate": 2e-05, "loss": 0.02836904, "step": 3161 }, { "epoch": 6.324, "grad_norm": 1.5270940065383911, "learning_rate": 2e-05, "loss": 0.0500167, "step": 3162 }, { "epoch": 6.326, "grad_norm": 1.3842896223068237, "learning_rate": 2e-05, "loss": 0.03779293, "step": 3163 }, { "epoch": 6.328, "grad_norm": 1.4352202415466309, "learning_rate": 2e-05, "loss": 0.03724783, "step": 3164 }, { "epoch": 6.33, "grad_norm": 1.2283622026443481, "learning_rate": 2e-05, "loss": 0.04922156, "step": 3165 }, { "epoch": 6.332, "grad_norm": 1.2028950452804565, "learning_rate": 2e-05, "loss": 0.03399231, "step": 3166 }, { "epoch": 6.334, "grad_norm": 1.8364872932434082, "learning_rate": 2e-05, "loss": 0.05715834, "step": 3167 }, { "epoch": 6.336, "grad_norm": 1.926184058189392, "learning_rate": 2e-05, "loss": 0.052489, "step": 3168 }, { "epoch": 6.338, "grad_norm": 2.94455623626709, "learning_rate": 2e-05, "loss": 0.05718009, "step": 3169 }, { "epoch": 6.34, "grad_norm": 1.064375877380371, "learning_rate": 2e-05, "loss": 0.03787267, "step": 3170 }, { "epoch": 6.342, "grad_norm": 1.227028250694275, "learning_rate": 2e-05, "loss": 0.0388754, "step": 3171 }, { "epoch": 6.344, "grad_norm": 1.999516487121582, "learning_rate": 2e-05, "loss": 0.02867444, "step": 3172 }, { "epoch": 6.346, "grad_norm": 1.4541538953781128, "learning_rate": 2e-05, "loss": 0.04546948, "step": 3173 }, { "epoch": 6.348, "grad_norm": 1.3081698417663574, "learning_rate": 2e-05, "loss": 0.05439358, "step": 3174 }, { "epoch": 6.35, "grad_norm": 1.7099684476852417, "learning_rate": 2e-05, "loss": 0.04584994, "step": 3175 }, { "epoch": 6.352, "grad_norm": 1.3694244623184204, "learning_rate": 2e-05, "loss": 0.05263059, "step": 3176 }, { "epoch": 6.354, "grad_norm": 2.0050711631774902, "learning_rate": 2e-05, "loss": 0.06513149, "step": 3177 }, { "epoch": 6.356, "grad_norm": 2.2543318271636963, "learning_rate": 2e-05, "loss": 0.07143942, "step": 3178 }, { "epoch": 6.358, "grad_norm": 0.9358059167861938, "learning_rate": 2e-05, "loss": 0.02930567, "step": 3179 }, { "epoch": 6.36, "grad_norm": 1.4434269666671753, "learning_rate": 2e-05, "loss": 0.02489997, "step": 3180 }, { "epoch": 6.362, "grad_norm": 1.233762264251709, "learning_rate": 2e-05, "loss": 0.0335849, "step": 3181 }, { "epoch": 6.364, "grad_norm": 1.5462418794631958, "learning_rate": 2e-05, "loss": 0.03041675, "step": 3182 }, { "epoch": 6.366, "grad_norm": 2.0180463790893555, "learning_rate": 2e-05, "loss": 0.05690164, "step": 3183 }, { "epoch": 6.368, "grad_norm": 1.6697211265563965, "learning_rate": 2e-05, "loss": 0.04623037, "step": 3184 }, { "epoch": 6.37, "grad_norm": 1.815030574798584, "learning_rate": 2e-05, "loss": 0.03556263, "step": 3185 }, { "epoch": 6.372, "grad_norm": 1.826423168182373, "learning_rate": 2e-05, "loss": 0.04994491, "step": 3186 }, { "epoch": 6.374, "grad_norm": 1.6917457580566406, "learning_rate": 2e-05, "loss": 0.03865033, "step": 3187 }, { "epoch": 6.376, "grad_norm": 1.7738280296325684, "learning_rate": 2e-05, "loss": 0.04079893, "step": 3188 }, { "epoch": 6.378, "grad_norm": 1.0954397916793823, "learning_rate": 2e-05, "loss": 0.03440213, "step": 3189 }, { "epoch": 6.38, "grad_norm": 1.043359637260437, "learning_rate": 2e-05, "loss": 0.03818577, "step": 3190 }, { "epoch": 6.382, "grad_norm": 0.9938315153121948, "learning_rate": 2e-05, "loss": 0.03229547, "step": 3191 }, { "epoch": 6.384, "grad_norm": 2.5036749839782715, "learning_rate": 2e-05, "loss": 0.04423948, "step": 3192 }, { "epoch": 6.386, "grad_norm": 1.164203405380249, "learning_rate": 2e-05, "loss": 0.03556975, "step": 3193 }, { "epoch": 6.388, "grad_norm": 1.4395684003829956, "learning_rate": 2e-05, "loss": 0.04390024, "step": 3194 }, { "epoch": 6.39, "grad_norm": 1.4073199033737183, "learning_rate": 2e-05, "loss": 0.02870702, "step": 3195 }, { "epoch": 6.392, "grad_norm": 1.905430793762207, "learning_rate": 2e-05, "loss": 0.05451259, "step": 3196 }, { "epoch": 6.394, "grad_norm": 2.2426390647888184, "learning_rate": 2e-05, "loss": 0.04760137, "step": 3197 }, { "epoch": 6.396, "grad_norm": 1.6508835554122925, "learning_rate": 2e-05, "loss": 0.03574256, "step": 3198 }, { "epoch": 6.398, "grad_norm": 1.2062972784042358, "learning_rate": 2e-05, "loss": 0.03633127, "step": 3199 }, { "epoch": 6.4, "grad_norm": 1.1305536031723022, "learning_rate": 2e-05, "loss": 0.03830415, "step": 3200 }, { "epoch": 6.402, "grad_norm": 1.6477299928665161, "learning_rate": 2e-05, "loss": 0.04004838, "step": 3201 }, { "epoch": 6.404, "grad_norm": 1.4459377527236938, "learning_rate": 2e-05, "loss": 0.0500464, "step": 3202 }, { "epoch": 6.406, "grad_norm": 1.643071174621582, "learning_rate": 2e-05, "loss": 0.03596194, "step": 3203 }, { "epoch": 6.408, "grad_norm": 1.4056624174118042, "learning_rate": 2e-05, "loss": 0.0451113, "step": 3204 }, { "epoch": 6.41, "grad_norm": 1.7545050382614136, "learning_rate": 2e-05, "loss": 0.04658743, "step": 3205 }, { "epoch": 6.412, "grad_norm": 1.7296162843704224, "learning_rate": 2e-05, "loss": 0.04593526, "step": 3206 }, { "epoch": 6.414, "grad_norm": 1.6219792366027832, "learning_rate": 2e-05, "loss": 0.0362427, "step": 3207 }, { "epoch": 6.416, "grad_norm": 1.0432740449905396, "learning_rate": 2e-05, "loss": 0.03033496, "step": 3208 }, { "epoch": 6.418, "grad_norm": 1.3447455167770386, "learning_rate": 2e-05, "loss": 0.04312557, "step": 3209 }, { "epoch": 6.42, "grad_norm": 1.140795350074768, "learning_rate": 2e-05, "loss": 0.02616213, "step": 3210 }, { "epoch": 6.422, "grad_norm": 1.3150559663772583, "learning_rate": 2e-05, "loss": 0.03334409, "step": 3211 }, { "epoch": 6.424, "grad_norm": 1.9361340999603271, "learning_rate": 2e-05, "loss": 0.06632831, "step": 3212 }, { "epoch": 6.426, "grad_norm": 2.0635571479797363, "learning_rate": 2e-05, "loss": 0.04785598, "step": 3213 }, { "epoch": 6.428, "grad_norm": 1.896443247795105, "learning_rate": 2e-05, "loss": 0.04902334, "step": 3214 }, { "epoch": 6.43, "grad_norm": 1.3984146118164062, "learning_rate": 2e-05, "loss": 0.03529963, "step": 3215 }, { "epoch": 6.432, "grad_norm": 1.1594336032867432, "learning_rate": 2e-05, "loss": 0.03067735, "step": 3216 }, { "epoch": 6.434, "grad_norm": 1.5906693935394287, "learning_rate": 2e-05, "loss": 0.04803975, "step": 3217 }, { "epoch": 6.436, "grad_norm": 1.211396336555481, "learning_rate": 2e-05, "loss": 0.02445906, "step": 3218 }, { "epoch": 6.438, "grad_norm": 1.4170849323272705, "learning_rate": 2e-05, "loss": 0.03652653, "step": 3219 }, { "epoch": 6.44, "grad_norm": 2.7853238582611084, "learning_rate": 2e-05, "loss": 0.05708388, "step": 3220 }, { "epoch": 6.442, "grad_norm": 1.129001498222351, "learning_rate": 2e-05, "loss": 0.03645102, "step": 3221 }, { "epoch": 6.444, "grad_norm": 1.4778531789779663, "learning_rate": 2e-05, "loss": 0.02655194, "step": 3222 }, { "epoch": 6.446, "grad_norm": 1.6517877578735352, "learning_rate": 2e-05, "loss": 0.03924521, "step": 3223 }, { "epoch": 6.448, "grad_norm": 2.0939931869506836, "learning_rate": 2e-05, "loss": 0.03322685, "step": 3224 }, { "epoch": 6.45, "grad_norm": 1.4540835618972778, "learning_rate": 2e-05, "loss": 0.04464151, "step": 3225 }, { "epoch": 6.452, "grad_norm": 1.3042082786560059, "learning_rate": 2e-05, "loss": 0.04663192, "step": 3226 }, { "epoch": 6.454, "grad_norm": 1.1038719415664673, "learning_rate": 2e-05, "loss": 0.03338126, "step": 3227 }, { "epoch": 6.456, "grad_norm": 1.4457974433898926, "learning_rate": 2e-05, "loss": 0.03782085, "step": 3228 }, { "epoch": 6.458, "grad_norm": 1.439251184463501, "learning_rate": 2e-05, "loss": 0.04930679, "step": 3229 }, { "epoch": 6.46, "grad_norm": 1.6377997398376465, "learning_rate": 2e-05, "loss": 0.04300382, "step": 3230 }, { "epoch": 6.462, "grad_norm": 3.27187442779541, "learning_rate": 2e-05, "loss": 0.03758849, "step": 3231 }, { "epoch": 6.464, "grad_norm": 1.257215976715088, "learning_rate": 2e-05, "loss": 0.03490548, "step": 3232 }, { "epoch": 6.466, "grad_norm": 1.3594287633895874, "learning_rate": 2e-05, "loss": 0.03412262, "step": 3233 }, { "epoch": 6.468, "grad_norm": 1.1700828075408936, "learning_rate": 2e-05, "loss": 0.03058089, "step": 3234 }, { "epoch": 6.47, "grad_norm": 0.9974095225334167, "learning_rate": 2e-05, "loss": 0.03268985, "step": 3235 }, { "epoch": 6.4719999999999995, "grad_norm": 1.6861094236373901, "learning_rate": 2e-05, "loss": 0.04692501, "step": 3236 }, { "epoch": 6.474, "grad_norm": 1.220467209815979, "learning_rate": 2e-05, "loss": 0.04217067, "step": 3237 }, { "epoch": 6.476, "grad_norm": 1.1765286922454834, "learning_rate": 2e-05, "loss": 0.03455502, "step": 3238 }, { "epoch": 6.478, "grad_norm": 1.4939866065979004, "learning_rate": 2e-05, "loss": 0.05220221, "step": 3239 }, { "epoch": 6.48, "grad_norm": 1.1564301252365112, "learning_rate": 2e-05, "loss": 0.03657191, "step": 3240 }, { "epoch": 6.482, "grad_norm": 1.5989612340927124, "learning_rate": 2e-05, "loss": 0.04166878, "step": 3241 }, { "epoch": 6.484, "grad_norm": 1.5818308591842651, "learning_rate": 2e-05, "loss": 0.04151236, "step": 3242 }, { "epoch": 6.486, "grad_norm": 1.6136821508407593, "learning_rate": 2e-05, "loss": 0.03971895, "step": 3243 }, { "epoch": 6.4879999999999995, "grad_norm": 1.5467586517333984, "learning_rate": 2e-05, "loss": 0.04718712, "step": 3244 }, { "epoch": 6.49, "grad_norm": 0.9672493934631348, "learning_rate": 2e-05, "loss": 0.02750623, "step": 3245 }, { "epoch": 6.492, "grad_norm": 1.5886257886886597, "learning_rate": 2e-05, "loss": 0.043421, "step": 3246 }, { "epoch": 6.494, "grad_norm": 1.9581189155578613, "learning_rate": 2e-05, "loss": 0.04324682, "step": 3247 }, { "epoch": 6.496, "grad_norm": 1.1795440912246704, "learning_rate": 2e-05, "loss": 0.04421395, "step": 3248 }, { "epoch": 6.498, "grad_norm": 1.8300138711929321, "learning_rate": 2e-05, "loss": 0.05202936, "step": 3249 }, { "epoch": 6.5, "grad_norm": 1.1608667373657227, "learning_rate": 2e-05, "loss": 0.03892054, "step": 3250 }, { "epoch": 6.502, "grad_norm": 1.8854470252990723, "learning_rate": 2e-05, "loss": 0.05796061, "step": 3251 }, { "epoch": 6.504, "grad_norm": 5.84080696105957, "learning_rate": 2e-05, "loss": 0.04915443, "step": 3252 }, { "epoch": 6.506, "grad_norm": 1.2369056940078735, "learning_rate": 2e-05, "loss": 0.04068791, "step": 3253 }, { "epoch": 6.508, "grad_norm": 2.1469545364379883, "learning_rate": 2e-05, "loss": 0.0260004, "step": 3254 }, { "epoch": 6.51, "grad_norm": 1.370782494544983, "learning_rate": 2e-05, "loss": 0.03866827, "step": 3255 }, { "epoch": 6.5120000000000005, "grad_norm": 1.2568261623382568, "learning_rate": 2e-05, "loss": 0.038529, "step": 3256 }, { "epoch": 6.514, "grad_norm": 1.186782717704773, "learning_rate": 2e-05, "loss": 0.04556369, "step": 3257 }, { "epoch": 6.516, "grad_norm": 0.8793965578079224, "learning_rate": 2e-05, "loss": 0.02259767, "step": 3258 }, { "epoch": 6.518, "grad_norm": 1.2058748006820679, "learning_rate": 2e-05, "loss": 0.03813112, "step": 3259 }, { "epoch": 6.52, "grad_norm": 1.2766646146774292, "learning_rate": 2e-05, "loss": 0.04175682, "step": 3260 }, { "epoch": 6.522, "grad_norm": 1.3827567100524902, "learning_rate": 2e-05, "loss": 0.03931576, "step": 3261 }, { "epoch": 6.524, "grad_norm": 1.741408109664917, "learning_rate": 2e-05, "loss": 0.03503319, "step": 3262 }, { "epoch": 6.526, "grad_norm": 1.5188173055648804, "learning_rate": 2e-05, "loss": 0.03888669, "step": 3263 }, { "epoch": 6.5280000000000005, "grad_norm": 1.9496771097183228, "learning_rate": 2e-05, "loss": 0.04322708, "step": 3264 }, { "epoch": 6.53, "grad_norm": 2.4623851776123047, "learning_rate": 2e-05, "loss": 0.04068457, "step": 3265 }, { "epoch": 6.532, "grad_norm": 0.9864256381988525, "learning_rate": 2e-05, "loss": 0.03565934, "step": 3266 }, { "epoch": 6.534, "grad_norm": 0.9321268796920776, "learning_rate": 2e-05, "loss": 0.02948448, "step": 3267 }, { "epoch": 6.536, "grad_norm": 1.1365487575531006, "learning_rate": 2e-05, "loss": 0.03684638, "step": 3268 }, { "epoch": 6.538, "grad_norm": 1.8974064588546753, "learning_rate": 2e-05, "loss": 0.03136589, "step": 3269 }, { "epoch": 6.54, "grad_norm": 1.6937148571014404, "learning_rate": 2e-05, "loss": 0.04182105, "step": 3270 }, { "epoch": 6.542, "grad_norm": 1.4892710447311401, "learning_rate": 2e-05, "loss": 0.05436049, "step": 3271 }, { "epoch": 6.5440000000000005, "grad_norm": 2.192354917526245, "learning_rate": 2e-05, "loss": 0.03655919, "step": 3272 }, { "epoch": 6.546, "grad_norm": 1.3368217945098877, "learning_rate": 2e-05, "loss": 0.02830843, "step": 3273 }, { "epoch": 6.548, "grad_norm": 1.422207236289978, "learning_rate": 2e-05, "loss": 0.02914708, "step": 3274 }, { "epoch": 6.55, "grad_norm": 3.405113935470581, "learning_rate": 2e-05, "loss": 0.04792117, "step": 3275 }, { "epoch": 6.552, "grad_norm": 2.973292112350464, "learning_rate": 2e-05, "loss": 0.050552, "step": 3276 }, { "epoch": 6.554, "grad_norm": 1.7082622051239014, "learning_rate": 2e-05, "loss": 0.03328468, "step": 3277 }, { "epoch": 6.556, "grad_norm": 1.2330087423324585, "learning_rate": 2e-05, "loss": 0.0288753, "step": 3278 }, { "epoch": 6.558, "grad_norm": 1.4605454206466675, "learning_rate": 2e-05, "loss": 0.03361526, "step": 3279 }, { "epoch": 6.5600000000000005, "grad_norm": 1.288313627243042, "learning_rate": 2e-05, "loss": 0.04549043, "step": 3280 }, { "epoch": 6.562, "grad_norm": 2.2067997455596924, "learning_rate": 2e-05, "loss": 0.0399694, "step": 3281 }, { "epoch": 6.564, "grad_norm": 1.6156538724899292, "learning_rate": 2e-05, "loss": 0.07278688, "step": 3282 }, { "epoch": 6.566, "grad_norm": 2.2792131900787354, "learning_rate": 2e-05, "loss": 0.05353578, "step": 3283 }, { "epoch": 6.568, "grad_norm": 1.9242584705352783, "learning_rate": 2e-05, "loss": 0.0594984, "step": 3284 }, { "epoch": 6.57, "grad_norm": 1.4676686525344849, "learning_rate": 2e-05, "loss": 0.04927154, "step": 3285 }, { "epoch": 6.572, "grad_norm": 1.2734441757202148, "learning_rate": 2e-05, "loss": 0.04294922, "step": 3286 }, { "epoch": 6.574, "grad_norm": 1.739187240600586, "learning_rate": 2e-05, "loss": 0.04884008, "step": 3287 }, { "epoch": 6.576, "grad_norm": 1.88695228099823, "learning_rate": 2e-05, "loss": 0.05116354, "step": 3288 }, { "epoch": 6.578, "grad_norm": 1.28707754611969, "learning_rate": 2e-05, "loss": 0.04390291, "step": 3289 }, { "epoch": 6.58, "grad_norm": 1.3685258626937866, "learning_rate": 2e-05, "loss": 0.03953566, "step": 3290 }, { "epoch": 6.582, "grad_norm": 1.109430193901062, "learning_rate": 2e-05, "loss": 0.04132858, "step": 3291 }, { "epoch": 6.584, "grad_norm": 1.8510801792144775, "learning_rate": 2e-05, "loss": 0.05430602, "step": 3292 }, { "epoch": 6.586, "grad_norm": 1.2035598754882812, "learning_rate": 2e-05, "loss": 0.03927475, "step": 3293 }, { "epoch": 6.588, "grad_norm": 2.5072784423828125, "learning_rate": 2e-05, "loss": 0.04595596, "step": 3294 }, { "epoch": 6.59, "grad_norm": 1.213221549987793, "learning_rate": 2e-05, "loss": 0.04527979, "step": 3295 }, { "epoch": 6.592, "grad_norm": 1.2507351636886597, "learning_rate": 2e-05, "loss": 0.03918802, "step": 3296 }, { "epoch": 6.594, "grad_norm": 1.0590146780014038, "learning_rate": 2e-05, "loss": 0.03508047, "step": 3297 }, { "epoch": 6.596, "grad_norm": 1.703151822090149, "learning_rate": 2e-05, "loss": 0.04405899, "step": 3298 }, { "epoch": 6.598, "grad_norm": 1.1094061136245728, "learning_rate": 2e-05, "loss": 0.03539534, "step": 3299 }, { "epoch": 6.6, "grad_norm": 1.1324583292007446, "learning_rate": 2e-05, "loss": 0.04019186, "step": 3300 }, { "epoch": 6.602, "grad_norm": 3.1161935329437256, "learning_rate": 2e-05, "loss": 0.04159467, "step": 3301 }, { "epoch": 6.604, "grad_norm": 1.558793306350708, "learning_rate": 2e-05, "loss": 0.04492222, "step": 3302 }, { "epoch": 6.606, "grad_norm": 1.1711833477020264, "learning_rate": 2e-05, "loss": 0.03407395, "step": 3303 }, { "epoch": 6.608, "grad_norm": 1.5858479738235474, "learning_rate": 2e-05, "loss": 0.06137519, "step": 3304 }, { "epoch": 6.61, "grad_norm": 1.5749893188476562, "learning_rate": 2e-05, "loss": 0.06341276, "step": 3305 }, { "epoch": 6.612, "grad_norm": 1.4319220781326294, "learning_rate": 2e-05, "loss": 0.03183598, "step": 3306 }, { "epoch": 6.614, "grad_norm": 1.9067035913467407, "learning_rate": 2e-05, "loss": 0.03418519, "step": 3307 }, { "epoch": 6.616, "grad_norm": 1.5378934144973755, "learning_rate": 2e-05, "loss": 0.05683891, "step": 3308 }, { "epoch": 6.618, "grad_norm": 1.4987341165542603, "learning_rate": 2e-05, "loss": 0.0428179, "step": 3309 }, { "epoch": 6.62, "grad_norm": 0.9848178625106812, "learning_rate": 2e-05, "loss": 0.03192403, "step": 3310 }, { "epoch": 6.622, "grad_norm": 1.1798021793365479, "learning_rate": 2e-05, "loss": 0.04244391, "step": 3311 }, { "epoch": 6.624, "grad_norm": 1.2790571451187134, "learning_rate": 2e-05, "loss": 0.05533645, "step": 3312 }, { "epoch": 6.626, "grad_norm": 1.6922457218170166, "learning_rate": 2e-05, "loss": 0.04536498, "step": 3313 }, { "epoch": 6.628, "grad_norm": 1.0179157257080078, "learning_rate": 2e-05, "loss": 0.03572208, "step": 3314 }, { "epoch": 6.63, "grad_norm": 1.8201943635940552, "learning_rate": 2e-05, "loss": 0.03994879, "step": 3315 }, { "epoch": 6.632, "grad_norm": 1.4098727703094482, "learning_rate": 2e-05, "loss": 0.04173999, "step": 3316 }, { "epoch": 6.634, "grad_norm": 1.0485551357269287, "learning_rate": 2e-05, "loss": 0.03973451, "step": 3317 }, { "epoch": 6.636, "grad_norm": 1.4638235569000244, "learning_rate": 2e-05, "loss": 0.04760775, "step": 3318 }, { "epoch": 6.638, "grad_norm": 1.5323985815048218, "learning_rate": 2e-05, "loss": 0.04406105, "step": 3319 }, { "epoch": 6.64, "grad_norm": 1.126651644706726, "learning_rate": 2e-05, "loss": 0.03304296, "step": 3320 }, { "epoch": 6.642, "grad_norm": 1.278250813484192, "learning_rate": 2e-05, "loss": 0.02966734, "step": 3321 }, { "epoch": 6.644, "grad_norm": 1.018739938735962, "learning_rate": 2e-05, "loss": 0.03991082, "step": 3322 }, { "epoch": 6.646, "grad_norm": 1.2465643882751465, "learning_rate": 2e-05, "loss": 0.0458714, "step": 3323 }, { "epoch": 6.648, "grad_norm": 29.821657180786133, "learning_rate": 2e-05, "loss": 0.0503132, "step": 3324 }, { "epoch": 6.65, "grad_norm": 3.7819323539733887, "learning_rate": 2e-05, "loss": 0.04644343, "step": 3325 }, { "epoch": 6.652, "grad_norm": 0.8932387232780457, "learning_rate": 2e-05, "loss": 0.02574427, "step": 3326 }, { "epoch": 6.654, "grad_norm": 1.2899101972579956, "learning_rate": 2e-05, "loss": 0.0327214, "step": 3327 }, { "epoch": 6.656, "grad_norm": 2.6038079261779785, "learning_rate": 2e-05, "loss": 0.05023351, "step": 3328 }, { "epoch": 6.658, "grad_norm": 1.7134565114974976, "learning_rate": 2e-05, "loss": 0.04856233, "step": 3329 }, { "epoch": 6.66, "grad_norm": 1.8361741304397583, "learning_rate": 2e-05, "loss": 0.06476304, "step": 3330 }, { "epoch": 6.662, "grad_norm": 1.2037334442138672, "learning_rate": 2e-05, "loss": 0.04044564, "step": 3331 }, { "epoch": 6.664, "grad_norm": 1.0349493026733398, "learning_rate": 2e-05, "loss": 0.03656833, "step": 3332 }, { "epoch": 6.666, "grad_norm": 2.0848727226257324, "learning_rate": 2e-05, "loss": 0.03080843, "step": 3333 }, { "epoch": 6.668, "grad_norm": 1.4211640357971191, "learning_rate": 2e-05, "loss": 0.04820876, "step": 3334 }, { "epoch": 6.67, "grad_norm": 0.9767024517059326, "learning_rate": 2e-05, "loss": 0.03227487, "step": 3335 }, { "epoch": 6.672, "grad_norm": 1.7927852869033813, "learning_rate": 2e-05, "loss": 0.03349268, "step": 3336 }, { "epoch": 6.674, "grad_norm": 1.3334259986877441, "learning_rate": 2e-05, "loss": 0.03758476, "step": 3337 }, { "epoch": 6.676, "grad_norm": 1.1497528553009033, "learning_rate": 2e-05, "loss": 0.0366753, "step": 3338 }, { "epoch": 6.678, "grad_norm": 1.3280630111694336, "learning_rate": 2e-05, "loss": 0.04031434, "step": 3339 }, { "epoch": 6.68, "grad_norm": 1.5619535446166992, "learning_rate": 2e-05, "loss": 0.02004341, "step": 3340 }, { "epoch": 6.682, "grad_norm": 1.5152435302734375, "learning_rate": 2e-05, "loss": 0.04714475, "step": 3341 }, { "epoch": 6.684, "grad_norm": 1.263704776763916, "learning_rate": 2e-05, "loss": 0.04945374, "step": 3342 }, { "epoch": 6.686, "grad_norm": 1.4276520013809204, "learning_rate": 2e-05, "loss": 0.03332176, "step": 3343 }, { "epoch": 6.688, "grad_norm": 0.9606978297233582, "learning_rate": 2e-05, "loss": 0.03038597, "step": 3344 }, { "epoch": 6.6899999999999995, "grad_norm": 1.5653752088546753, "learning_rate": 2e-05, "loss": 0.04423832, "step": 3345 }, { "epoch": 6.692, "grad_norm": 1.6132287979125977, "learning_rate": 2e-05, "loss": 0.04920972, "step": 3346 }, { "epoch": 6.694, "grad_norm": 2.4168407917022705, "learning_rate": 2e-05, "loss": 0.05681578, "step": 3347 }, { "epoch": 6.696, "grad_norm": 1.8760751485824585, "learning_rate": 2e-05, "loss": 0.0534036, "step": 3348 }, { "epoch": 6.698, "grad_norm": 1.2305333614349365, "learning_rate": 2e-05, "loss": 0.02969261, "step": 3349 }, { "epoch": 6.7, "grad_norm": 1.4671058654785156, "learning_rate": 2e-05, "loss": 0.02868845, "step": 3350 }, { "epoch": 6.702, "grad_norm": 1.1077961921691895, "learning_rate": 2e-05, "loss": 0.0313329, "step": 3351 }, { "epoch": 6.704, "grad_norm": 1.2289212942123413, "learning_rate": 2e-05, "loss": 0.04141407, "step": 3352 }, { "epoch": 6.7059999999999995, "grad_norm": 1.0401378870010376, "learning_rate": 2e-05, "loss": 0.02844977, "step": 3353 }, { "epoch": 6.708, "grad_norm": 1.6821035146713257, "learning_rate": 2e-05, "loss": 0.033038, "step": 3354 }, { "epoch": 6.71, "grad_norm": 1.4683276414871216, "learning_rate": 2e-05, "loss": 0.03478734, "step": 3355 }, { "epoch": 6.712, "grad_norm": 0.9681516885757446, "learning_rate": 2e-05, "loss": 0.02334734, "step": 3356 }, { "epoch": 6.714, "grad_norm": 1.9194122552871704, "learning_rate": 2e-05, "loss": 0.05143356, "step": 3357 }, { "epoch": 6.716, "grad_norm": 1.9872151613235474, "learning_rate": 2e-05, "loss": 0.0525632, "step": 3358 }, { "epoch": 6.718, "grad_norm": 1.8832703828811646, "learning_rate": 2e-05, "loss": 0.04416494, "step": 3359 }, { "epoch": 6.72, "grad_norm": 1.498202919960022, "learning_rate": 2e-05, "loss": 0.04000438, "step": 3360 }, { "epoch": 6.7219999999999995, "grad_norm": 1.6321178674697876, "learning_rate": 2e-05, "loss": 0.05430613, "step": 3361 }, { "epoch": 6.724, "grad_norm": 2.441689968109131, "learning_rate": 2e-05, "loss": 0.0355437, "step": 3362 }, { "epoch": 6.726, "grad_norm": 1.513107419013977, "learning_rate": 2e-05, "loss": 0.03850288, "step": 3363 }, { "epoch": 6.728, "grad_norm": 0.9349620938301086, "learning_rate": 2e-05, "loss": 0.02798152, "step": 3364 }, { "epoch": 6.73, "grad_norm": 1.3889509439468384, "learning_rate": 2e-05, "loss": 0.04935129, "step": 3365 }, { "epoch": 6.732, "grad_norm": 1.4558771848678589, "learning_rate": 2e-05, "loss": 0.04684127, "step": 3366 }, { "epoch": 6.734, "grad_norm": 1.614352822303772, "learning_rate": 2e-05, "loss": 0.05351394, "step": 3367 }, { "epoch": 6.736, "grad_norm": 1.7101666927337646, "learning_rate": 2e-05, "loss": 0.05229837, "step": 3368 }, { "epoch": 6.7379999999999995, "grad_norm": 1.225295901298523, "learning_rate": 2e-05, "loss": 0.04323405, "step": 3369 }, { "epoch": 6.74, "grad_norm": 1.0662435293197632, "learning_rate": 2e-05, "loss": 0.03803292, "step": 3370 }, { "epoch": 6.742, "grad_norm": 1.4984309673309326, "learning_rate": 2e-05, "loss": 0.043239, "step": 3371 }, { "epoch": 6.744, "grad_norm": 1.9732307195663452, "learning_rate": 2e-05, "loss": 0.06063057, "step": 3372 }, { "epoch": 6.746, "grad_norm": 0.8619161248207092, "learning_rate": 2e-05, "loss": 0.02019028, "step": 3373 }, { "epoch": 6.748, "grad_norm": 1.5696371793746948, "learning_rate": 2e-05, "loss": 0.04602835, "step": 3374 }, { "epoch": 6.75, "grad_norm": 1.1177750825881958, "learning_rate": 2e-05, "loss": 0.03560708, "step": 3375 }, { "epoch": 6.752, "grad_norm": 0.9110558032989502, "learning_rate": 2e-05, "loss": 0.01933064, "step": 3376 }, { "epoch": 6.754, "grad_norm": 1.2915785312652588, "learning_rate": 2e-05, "loss": 0.04963629, "step": 3377 }, { "epoch": 6.756, "grad_norm": 1.2118951082229614, "learning_rate": 2e-05, "loss": 0.03098101, "step": 3378 }, { "epoch": 6.758, "grad_norm": 1.0286513566970825, "learning_rate": 2e-05, "loss": 0.02711356, "step": 3379 }, { "epoch": 6.76, "grad_norm": 1.4874508380889893, "learning_rate": 2e-05, "loss": 0.03703025, "step": 3380 }, { "epoch": 6.7620000000000005, "grad_norm": 1.0302430391311646, "learning_rate": 2e-05, "loss": 0.02999306, "step": 3381 }, { "epoch": 6.764, "grad_norm": 1.1743347644805908, "learning_rate": 2e-05, "loss": 0.03301746, "step": 3382 }, { "epoch": 6.766, "grad_norm": 1.2912507057189941, "learning_rate": 2e-05, "loss": 0.02964731, "step": 3383 }, { "epoch": 6.768, "grad_norm": 1.8387466669082642, "learning_rate": 2e-05, "loss": 0.03587981, "step": 3384 }, { "epoch": 6.77, "grad_norm": 1.2493430376052856, "learning_rate": 2e-05, "loss": 0.03021685, "step": 3385 }, { "epoch": 6.772, "grad_norm": 1.8405768871307373, "learning_rate": 2e-05, "loss": 0.03573044, "step": 3386 }, { "epoch": 6.774, "grad_norm": 1.5832135677337646, "learning_rate": 2e-05, "loss": 0.05767541, "step": 3387 }, { "epoch": 6.776, "grad_norm": 1.0562320947647095, "learning_rate": 2e-05, "loss": 0.02797987, "step": 3388 }, { "epoch": 6.7780000000000005, "grad_norm": 1.513688325881958, "learning_rate": 2e-05, "loss": 0.04148609, "step": 3389 }, { "epoch": 6.78, "grad_norm": 1.0690497159957886, "learning_rate": 2e-05, "loss": 0.03502911, "step": 3390 }, { "epoch": 6.782, "grad_norm": 1.8190356492996216, "learning_rate": 2e-05, "loss": 0.03984679, "step": 3391 }, { "epoch": 6.784, "grad_norm": 1.2146891355514526, "learning_rate": 2e-05, "loss": 0.02811867, "step": 3392 }, { "epoch": 6.786, "grad_norm": 2.19791579246521, "learning_rate": 2e-05, "loss": 0.02688298, "step": 3393 }, { "epoch": 6.788, "grad_norm": 2.66430926322937, "learning_rate": 2e-05, "loss": 0.05289562, "step": 3394 }, { "epoch": 6.79, "grad_norm": 1.111362338066101, "learning_rate": 2e-05, "loss": 0.03009887, "step": 3395 }, { "epoch": 6.792, "grad_norm": 2.333982229232788, "learning_rate": 2e-05, "loss": 0.0382598, "step": 3396 }, { "epoch": 6.7940000000000005, "grad_norm": 1.1318389177322388, "learning_rate": 2e-05, "loss": 0.03336748, "step": 3397 }, { "epoch": 6.796, "grad_norm": 1.1174737215042114, "learning_rate": 2e-05, "loss": 0.03350708, "step": 3398 }, { "epoch": 6.798, "grad_norm": 1.5082918405532837, "learning_rate": 2e-05, "loss": 0.03707235, "step": 3399 }, { "epoch": 6.8, "grad_norm": 2.6029469966888428, "learning_rate": 2e-05, "loss": 0.0542256, "step": 3400 }, { "epoch": 6.802, "grad_norm": 1.5858310461044312, "learning_rate": 2e-05, "loss": 0.03691325, "step": 3401 }, { "epoch": 6.804, "grad_norm": 1.3726555109024048, "learning_rate": 2e-05, "loss": 0.04375734, "step": 3402 }, { "epoch": 6.806, "grad_norm": 1.2137669324874878, "learning_rate": 2e-05, "loss": 0.03287685, "step": 3403 }, { "epoch": 6.808, "grad_norm": 1.421502709388733, "learning_rate": 2e-05, "loss": 0.04229927, "step": 3404 }, { "epoch": 6.8100000000000005, "grad_norm": 0.9239016771316528, "learning_rate": 2e-05, "loss": 0.02439195, "step": 3405 }, { "epoch": 6.812, "grad_norm": 1.5839835405349731, "learning_rate": 2e-05, "loss": 0.04999159, "step": 3406 }, { "epoch": 6.814, "grad_norm": 1.8794617652893066, "learning_rate": 2e-05, "loss": 0.04069415, "step": 3407 }, { "epoch": 6.816, "grad_norm": 1.6794978380203247, "learning_rate": 2e-05, "loss": 0.042319, "step": 3408 }, { "epoch": 6.818, "grad_norm": 4.976267337799072, "learning_rate": 2e-05, "loss": 0.03227082, "step": 3409 }, { "epoch": 6.82, "grad_norm": 1.4886870384216309, "learning_rate": 2e-05, "loss": 0.04275644, "step": 3410 }, { "epoch": 6.822, "grad_norm": 7.621867656707764, "learning_rate": 2e-05, "loss": 0.04230917, "step": 3411 }, { "epoch": 6.824, "grad_norm": 1.1310099363327026, "learning_rate": 2e-05, "loss": 0.038154, "step": 3412 }, { "epoch": 6.826, "grad_norm": 0.9763595461845398, "learning_rate": 2e-05, "loss": 0.0353835, "step": 3413 }, { "epoch": 6.828, "grad_norm": 1.6774576902389526, "learning_rate": 2e-05, "loss": 0.04004671, "step": 3414 }, { "epoch": 6.83, "grad_norm": 1.4653470516204834, "learning_rate": 2e-05, "loss": 0.04310292, "step": 3415 }, { "epoch": 6.832, "grad_norm": 1.3005256652832031, "learning_rate": 2e-05, "loss": 0.04270222, "step": 3416 }, { "epoch": 6.834, "grad_norm": 1.2623956203460693, "learning_rate": 2e-05, "loss": 0.04610048, "step": 3417 }, { "epoch": 6.836, "grad_norm": 3.5397067070007324, "learning_rate": 2e-05, "loss": 0.03764736, "step": 3418 }, { "epoch": 6.838, "grad_norm": 1.3481019735336304, "learning_rate": 2e-05, "loss": 0.0253664, "step": 3419 }, { "epoch": 6.84, "grad_norm": 1.1361665725708008, "learning_rate": 2e-05, "loss": 0.03342405, "step": 3420 }, { "epoch": 6.842, "grad_norm": 1.141860842704773, "learning_rate": 2e-05, "loss": 0.03326521, "step": 3421 }, { "epoch": 6.844, "grad_norm": 1.170899748802185, "learning_rate": 2e-05, "loss": 0.03952732, "step": 3422 }, { "epoch": 6.846, "grad_norm": 1.3554216623306274, "learning_rate": 2e-05, "loss": 0.04645754, "step": 3423 }, { "epoch": 6.848, "grad_norm": 1.1913604736328125, "learning_rate": 2e-05, "loss": 0.02899063, "step": 3424 }, { "epoch": 6.85, "grad_norm": 0.9958671927452087, "learning_rate": 2e-05, "loss": 0.03145525, "step": 3425 }, { "epoch": 6.852, "grad_norm": 1.9133530855178833, "learning_rate": 2e-05, "loss": 0.05315136, "step": 3426 }, { "epoch": 6.854, "grad_norm": 1.4105165004730225, "learning_rate": 2e-05, "loss": 0.03489037, "step": 3427 }, { "epoch": 6.856, "grad_norm": 2.0334339141845703, "learning_rate": 2e-05, "loss": 0.03431166, "step": 3428 }, { "epoch": 6.858, "grad_norm": 1.2631068229675293, "learning_rate": 2e-05, "loss": 0.04041663, "step": 3429 }, { "epoch": 6.86, "grad_norm": 0.889681875705719, "learning_rate": 2e-05, "loss": 0.02558031, "step": 3430 }, { "epoch": 6.862, "grad_norm": 1.6193792819976807, "learning_rate": 2e-05, "loss": 0.04380007, "step": 3431 }, { "epoch": 6.864, "grad_norm": 1.2849256992340088, "learning_rate": 2e-05, "loss": 0.03113081, "step": 3432 }, { "epoch": 6.866, "grad_norm": 1.6333054304122925, "learning_rate": 2e-05, "loss": 0.02930366, "step": 3433 }, { "epoch": 6.868, "grad_norm": 1.2340630292892456, "learning_rate": 2e-05, "loss": 0.03456418, "step": 3434 }, { "epoch": 6.87, "grad_norm": 1.9538198709487915, "learning_rate": 2e-05, "loss": 0.037586, "step": 3435 }, { "epoch": 6.872, "grad_norm": 1.0432629585266113, "learning_rate": 2e-05, "loss": 0.02851261, "step": 3436 }, { "epoch": 6.874, "grad_norm": 1.1603375673294067, "learning_rate": 2e-05, "loss": 0.034657, "step": 3437 }, { "epoch": 6.876, "grad_norm": 1.1851812601089478, "learning_rate": 2e-05, "loss": 0.03753354, "step": 3438 }, { "epoch": 6.878, "grad_norm": 1.6592788696289062, "learning_rate": 2e-05, "loss": 0.04491906, "step": 3439 }, { "epoch": 6.88, "grad_norm": 1.2274245023727417, "learning_rate": 2e-05, "loss": 0.0340904, "step": 3440 }, { "epoch": 6.882, "grad_norm": 1.3614306449890137, "learning_rate": 2e-05, "loss": 0.0444744, "step": 3441 }, { "epoch": 6.884, "grad_norm": 1.531777262687683, "learning_rate": 2e-05, "loss": 0.04471203, "step": 3442 }, { "epoch": 6.886, "grad_norm": 1.5285937786102295, "learning_rate": 2e-05, "loss": 0.05250137, "step": 3443 }, { "epoch": 6.888, "grad_norm": 1.1851426362991333, "learning_rate": 2e-05, "loss": 0.02804793, "step": 3444 }, { "epoch": 6.89, "grad_norm": 1.1896464824676514, "learning_rate": 2e-05, "loss": 0.03605233, "step": 3445 }, { "epoch": 6.892, "grad_norm": 1.2750282287597656, "learning_rate": 2e-05, "loss": 0.03346939, "step": 3446 }, { "epoch": 6.894, "grad_norm": 1.1519888639450073, "learning_rate": 2e-05, "loss": 0.0394849, "step": 3447 }, { "epoch": 6.896, "grad_norm": 1.385860800743103, "learning_rate": 2e-05, "loss": 0.03840117, "step": 3448 }, { "epoch": 6.898, "grad_norm": 1.8041616678237915, "learning_rate": 2e-05, "loss": 0.04675615, "step": 3449 }, { "epoch": 6.9, "grad_norm": 2.1062817573547363, "learning_rate": 2e-05, "loss": 0.06331439, "step": 3450 }, { "epoch": 6.902, "grad_norm": 1.432571530342102, "learning_rate": 2e-05, "loss": 0.04114764, "step": 3451 }, { "epoch": 6.904, "grad_norm": 1.6515777111053467, "learning_rate": 2e-05, "loss": 0.03684805, "step": 3452 }, { "epoch": 6.906, "grad_norm": 1.2222999334335327, "learning_rate": 2e-05, "loss": 0.03101808, "step": 3453 }, { "epoch": 6.908, "grad_norm": 1.390168309211731, "learning_rate": 2e-05, "loss": 0.0516277, "step": 3454 }, { "epoch": 6.91, "grad_norm": 0.9776650667190552, "learning_rate": 2e-05, "loss": 0.02990855, "step": 3455 }, { "epoch": 6.912, "grad_norm": 2.2317774295806885, "learning_rate": 2e-05, "loss": 0.03915323, "step": 3456 }, { "epoch": 6.914, "grad_norm": 1.5829832553863525, "learning_rate": 2e-05, "loss": 0.04086168, "step": 3457 }, { "epoch": 6.916, "grad_norm": 1.0172293186187744, "learning_rate": 2e-05, "loss": 0.02923257, "step": 3458 }, { "epoch": 6.918, "grad_norm": 1.2105185985565186, "learning_rate": 2e-05, "loss": 0.02814864, "step": 3459 }, { "epoch": 6.92, "grad_norm": 2.631154775619507, "learning_rate": 2e-05, "loss": 0.05200697, "step": 3460 }, { "epoch": 6.922, "grad_norm": 1.0045074224472046, "learning_rate": 2e-05, "loss": 0.03085414, "step": 3461 }, { "epoch": 6.924, "grad_norm": 1.0970081090927124, "learning_rate": 2e-05, "loss": 0.03664451, "step": 3462 }, { "epoch": 6.926, "grad_norm": 1.9260581731796265, "learning_rate": 2e-05, "loss": 0.05451979, "step": 3463 }, { "epoch": 6.928, "grad_norm": 1.4260892868041992, "learning_rate": 2e-05, "loss": 0.03202819, "step": 3464 }, { "epoch": 6.93, "grad_norm": 1.867712378501892, "learning_rate": 2e-05, "loss": 0.0472446, "step": 3465 }, { "epoch": 6.932, "grad_norm": 1.2931119203567505, "learning_rate": 2e-05, "loss": 0.03627722, "step": 3466 }, { "epoch": 6.934, "grad_norm": 1.267653226852417, "learning_rate": 2e-05, "loss": 0.04178291, "step": 3467 }, { "epoch": 6.936, "grad_norm": 1.8989514112472534, "learning_rate": 2e-05, "loss": 0.06069873, "step": 3468 }, { "epoch": 6.938, "grad_norm": 1.369830846786499, "learning_rate": 2e-05, "loss": 0.03865648, "step": 3469 }, { "epoch": 6.9399999999999995, "grad_norm": 2.1145782470703125, "learning_rate": 2e-05, "loss": 0.03145633, "step": 3470 }, { "epoch": 6.942, "grad_norm": 1.4967544078826904, "learning_rate": 2e-05, "loss": 0.04484332, "step": 3471 }, { "epoch": 6.944, "grad_norm": 0.9439011216163635, "learning_rate": 2e-05, "loss": 0.0252577, "step": 3472 }, { "epoch": 6.946, "grad_norm": 1.3101210594177246, "learning_rate": 2e-05, "loss": 0.02739098, "step": 3473 }, { "epoch": 6.948, "grad_norm": 1.3791102170944214, "learning_rate": 2e-05, "loss": 0.03050887, "step": 3474 }, { "epoch": 6.95, "grad_norm": 1.8151313066482544, "learning_rate": 2e-05, "loss": 0.04466762, "step": 3475 }, { "epoch": 6.952, "grad_norm": 1.7713474035263062, "learning_rate": 2e-05, "loss": 0.03592313, "step": 3476 }, { "epoch": 6.954, "grad_norm": 1.4001665115356445, "learning_rate": 2e-05, "loss": 0.04347411, "step": 3477 }, { "epoch": 6.9559999999999995, "grad_norm": 2.1821348667144775, "learning_rate": 2e-05, "loss": 0.04121158, "step": 3478 }, { "epoch": 6.958, "grad_norm": 1.3329628705978394, "learning_rate": 2e-05, "loss": 0.0454905, "step": 3479 }, { "epoch": 6.96, "grad_norm": 2.2498857975006104, "learning_rate": 2e-05, "loss": 0.05597805, "step": 3480 }, { "epoch": 6.962, "grad_norm": 1.236721396446228, "learning_rate": 2e-05, "loss": 0.03585602, "step": 3481 }, { "epoch": 6.964, "grad_norm": 2.087568521499634, "learning_rate": 2e-05, "loss": 0.03135363, "step": 3482 }, { "epoch": 6.966, "grad_norm": 2.696734666824341, "learning_rate": 2e-05, "loss": 0.05923796, "step": 3483 }, { "epoch": 6.968, "grad_norm": 1.5622137784957886, "learning_rate": 2e-05, "loss": 0.04081835, "step": 3484 }, { "epoch": 6.97, "grad_norm": 1.6300699710845947, "learning_rate": 2e-05, "loss": 0.0384453, "step": 3485 }, { "epoch": 6.9719999999999995, "grad_norm": 1.4294748306274414, "learning_rate": 2e-05, "loss": 0.05283351, "step": 3486 }, { "epoch": 6.974, "grad_norm": 5.5012335777282715, "learning_rate": 2e-05, "loss": 0.04624298, "step": 3487 }, { "epoch": 6.976, "grad_norm": 0.8324853181838989, "learning_rate": 2e-05, "loss": 0.0236811, "step": 3488 }, { "epoch": 6.978, "grad_norm": 1.4482553005218506, "learning_rate": 2e-05, "loss": 0.04167052, "step": 3489 }, { "epoch": 6.98, "grad_norm": 1.1689647436141968, "learning_rate": 2e-05, "loss": 0.03899954, "step": 3490 }, { "epoch": 6.982, "grad_norm": 1.237749695777893, "learning_rate": 2e-05, "loss": 0.03976803, "step": 3491 }, { "epoch": 6.984, "grad_norm": 0.9023482799530029, "learning_rate": 2e-05, "loss": 0.02601608, "step": 3492 }, { "epoch": 6.986, "grad_norm": 1.958407998085022, "learning_rate": 2e-05, "loss": 0.0403572, "step": 3493 }, { "epoch": 6.9879999999999995, "grad_norm": 1.4088008403778076, "learning_rate": 2e-05, "loss": 0.03802837, "step": 3494 }, { "epoch": 6.99, "grad_norm": 1.656472086906433, "learning_rate": 2e-05, "loss": 0.04578206, "step": 3495 }, { "epoch": 6.992, "grad_norm": 2.143359422683716, "learning_rate": 2e-05, "loss": 0.04628511, "step": 3496 }, { "epoch": 6.994, "grad_norm": 1.4736688137054443, "learning_rate": 2e-05, "loss": 0.02740022, "step": 3497 }, { "epoch": 6.996, "grad_norm": 1.2310930490493774, "learning_rate": 2e-05, "loss": 0.04610129, "step": 3498 }, { "epoch": 6.998, "grad_norm": 1.3057410717010498, "learning_rate": 2e-05, "loss": 0.03868414, "step": 3499 }, { "epoch": 7.0, "grad_norm": 1.592368483543396, "learning_rate": 2e-05, "loss": 0.0320503, "step": 3500 }, { "epoch": 7.0, "eval_performance": { "AngleClassification_1": 0.986, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9021956087824351, "Equal_1": 0.994, "Equal_2": 0.9301397205588823, "Equal_3": 0.7924151696606786, "LineComparison_1": 0.998, "LineComparison_2": 0.9960079840319361, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9719438877755511, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.988, "Perpendicular_1": 0.982, "Perpendicular_2": 0.74, "Perpendicular_3": 0.3316633266533066, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9972666666666666, "PointLiesOnCircle_3": 0.9936, "PointLiesOnLine_1": 1.0, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.8223552894211577 }, "eval_runtime": 319.1391, "eval_samples_per_second": 32.901, "eval_steps_per_second": 0.658, "step": 3500 }, { "epoch": 7.002, "grad_norm": 2.186814069747925, "learning_rate": 2e-05, "loss": 0.06875495, "step": 3501 }, { "epoch": 7.004, "grad_norm": 2.7851738929748535, "learning_rate": 2e-05, "loss": 0.07352186, "step": 3502 }, { "epoch": 7.006, "grad_norm": 1.6021937131881714, "learning_rate": 2e-05, "loss": 0.04718731, "step": 3503 }, { "epoch": 7.008, "grad_norm": 1.708878755569458, "learning_rate": 2e-05, "loss": 0.05183144, "step": 3504 }, { "epoch": 7.01, "grad_norm": 1.2704074382781982, "learning_rate": 2e-05, "loss": 0.04325004, "step": 3505 }, { "epoch": 7.012, "grad_norm": 1.7846604585647583, "learning_rate": 2e-05, "loss": 0.04165163, "step": 3506 }, { "epoch": 7.014, "grad_norm": 2.000779390335083, "learning_rate": 2e-05, "loss": 0.05959515, "step": 3507 }, { "epoch": 7.016, "grad_norm": 1.2613239288330078, "learning_rate": 2e-05, "loss": 0.04999759, "step": 3508 }, { "epoch": 7.018, "grad_norm": 1.419448971748352, "learning_rate": 2e-05, "loss": 0.05279196, "step": 3509 }, { "epoch": 7.02, "grad_norm": 1.1730986833572388, "learning_rate": 2e-05, "loss": 0.04814022, "step": 3510 }, { "epoch": 7.022, "grad_norm": 1.3364038467407227, "learning_rate": 2e-05, "loss": 0.0524789, "step": 3511 }, { "epoch": 7.024, "grad_norm": 1.4599989652633667, "learning_rate": 2e-05, "loss": 0.06457356, "step": 3512 }, { "epoch": 7.026, "grad_norm": 1.2211647033691406, "learning_rate": 2e-05, "loss": 0.03781735, "step": 3513 }, { "epoch": 7.028, "grad_norm": 1.916827917098999, "learning_rate": 2e-05, "loss": 0.04609166, "step": 3514 }, { "epoch": 7.03, "grad_norm": 1.3852880001068115, "learning_rate": 2e-05, "loss": 0.05185224, "step": 3515 }, { "epoch": 7.032, "grad_norm": 1.9086310863494873, "learning_rate": 2e-05, "loss": 0.06317137, "step": 3516 }, { "epoch": 7.034, "grad_norm": 1.3954685926437378, "learning_rate": 2e-05, "loss": 0.04755458, "step": 3517 }, { "epoch": 7.036, "grad_norm": 1.2691612243652344, "learning_rate": 2e-05, "loss": 0.04614236, "step": 3518 }, { "epoch": 7.038, "grad_norm": 1.438931941986084, "learning_rate": 2e-05, "loss": 0.06192727, "step": 3519 }, { "epoch": 7.04, "grad_norm": 1.505270004272461, "learning_rate": 2e-05, "loss": 0.04731307, "step": 3520 }, { "epoch": 7.042, "grad_norm": 1.2275315523147583, "learning_rate": 2e-05, "loss": 0.05152331, "step": 3521 }, { "epoch": 7.044, "grad_norm": 1.6780346632003784, "learning_rate": 2e-05, "loss": 0.04752352, "step": 3522 }, { "epoch": 7.046, "grad_norm": 1.3278478384017944, "learning_rate": 2e-05, "loss": 0.03956543, "step": 3523 }, { "epoch": 7.048, "grad_norm": 1.9214937686920166, "learning_rate": 2e-05, "loss": 0.05849922, "step": 3524 }, { "epoch": 7.05, "grad_norm": 2.2144150733947754, "learning_rate": 2e-05, "loss": 0.04467774, "step": 3525 }, { "epoch": 7.052, "grad_norm": 1.5400785207748413, "learning_rate": 2e-05, "loss": 0.05098802, "step": 3526 }, { "epoch": 7.054, "grad_norm": 2.3327882289886475, "learning_rate": 2e-05, "loss": 0.04657032, "step": 3527 }, { "epoch": 7.056, "grad_norm": 1.3429843187332153, "learning_rate": 2e-05, "loss": 0.04105748, "step": 3528 }, { "epoch": 7.058, "grad_norm": 1.845400094985962, "learning_rate": 2e-05, "loss": 0.04504231, "step": 3529 }, { "epoch": 7.06, "grad_norm": 2.3795065879821777, "learning_rate": 2e-05, "loss": 0.0596103, "step": 3530 }, { "epoch": 7.062, "grad_norm": 1.7181967496871948, "learning_rate": 2e-05, "loss": 0.05508241, "step": 3531 }, { "epoch": 7.064, "grad_norm": 2.2078285217285156, "learning_rate": 2e-05, "loss": 0.07361597, "step": 3532 }, { "epoch": 7.066, "grad_norm": 1.3147023916244507, "learning_rate": 2e-05, "loss": 0.04743645, "step": 3533 }, { "epoch": 7.068, "grad_norm": 1.294423222541809, "learning_rate": 2e-05, "loss": 0.03428509, "step": 3534 }, { "epoch": 7.07, "grad_norm": 1.5514196157455444, "learning_rate": 2e-05, "loss": 0.05675917, "step": 3535 }, { "epoch": 7.072, "grad_norm": 2.4513447284698486, "learning_rate": 2e-05, "loss": 0.05515049, "step": 3536 }, { "epoch": 7.074, "grad_norm": 1.598224401473999, "learning_rate": 2e-05, "loss": 0.07278877, "step": 3537 }, { "epoch": 7.076, "grad_norm": 1.0842782258987427, "learning_rate": 2e-05, "loss": 0.04107867, "step": 3538 }, { "epoch": 7.078, "grad_norm": 1.510959267616272, "learning_rate": 2e-05, "loss": 0.06507348, "step": 3539 }, { "epoch": 7.08, "grad_norm": 1.5507309436798096, "learning_rate": 2e-05, "loss": 0.03261951, "step": 3540 }, { "epoch": 7.082, "grad_norm": 1.7355281114578247, "learning_rate": 2e-05, "loss": 0.06146067, "step": 3541 }, { "epoch": 7.084, "grad_norm": 4.931440830230713, "learning_rate": 2e-05, "loss": 0.04650098, "step": 3542 }, { "epoch": 7.086, "grad_norm": 2.3309950828552246, "learning_rate": 2e-05, "loss": 0.09169232, "step": 3543 }, { "epoch": 7.088, "grad_norm": 2.2715747356414795, "learning_rate": 2e-05, "loss": 0.05420232, "step": 3544 }, { "epoch": 7.09, "grad_norm": 2.207563877105713, "learning_rate": 2e-05, "loss": 0.03945183, "step": 3545 }, { "epoch": 7.092, "grad_norm": 1.6729447841644287, "learning_rate": 2e-05, "loss": 0.04845568, "step": 3546 }, { "epoch": 7.094, "grad_norm": 1.796739101409912, "learning_rate": 2e-05, "loss": 0.06503285, "step": 3547 }, { "epoch": 7.096, "grad_norm": 1.2983309030532837, "learning_rate": 2e-05, "loss": 0.04906647, "step": 3548 }, { "epoch": 7.098, "grad_norm": 1.4331189393997192, "learning_rate": 2e-05, "loss": 0.05561874, "step": 3549 }, { "epoch": 7.1, "grad_norm": 1.5238709449768066, "learning_rate": 2e-05, "loss": 0.05148921, "step": 3550 }, { "epoch": 7.102, "grad_norm": 1.2250235080718994, "learning_rate": 2e-05, "loss": 0.04760787, "step": 3551 }, { "epoch": 7.104, "grad_norm": 1.3822916746139526, "learning_rate": 2e-05, "loss": 0.0511881, "step": 3552 }, { "epoch": 7.106, "grad_norm": 2.5087249279022217, "learning_rate": 2e-05, "loss": 0.06286559, "step": 3553 }, { "epoch": 7.108, "grad_norm": 2.8455286026000977, "learning_rate": 2e-05, "loss": 0.05309931, "step": 3554 }, { "epoch": 7.11, "grad_norm": 1.4749555587768555, "learning_rate": 2e-05, "loss": 0.05005356, "step": 3555 }, { "epoch": 7.112, "grad_norm": 2.161928415298462, "learning_rate": 2e-05, "loss": 0.04809496, "step": 3556 }, { "epoch": 7.114, "grad_norm": 1.3201868534088135, "learning_rate": 2e-05, "loss": 0.05066467, "step": 3557 }, { "epoch": 7.116, "grad_norm": 1.2147846221923828, "learning_rate": 2e-05, "loss": 0.05296815, "step": 3558 }, { "epoch": 7.118, "grad_norm": 1.3991172313690186, "learning_rate": 2e-05, "loss": 0.04677524, "step": 3559 }, { "epoch": 7.12, "grad_norm": 6.09710693359375, "learning_rate": 2e-05, "loss": 0.04200501, "step": 3560 }, { "epoch": 7.122, "grad_norm": 1.4460575580596924, "learning_rate": 2e-05, "loss": 0.0484569, "step": 3561 }, { "epoch": 7.124, "grad_norm": 1.2892142534255981, "learning_rate": 2e-05, "loss": 0.04489253, "step": 3562 }, { "epoch": 7.126, "grad_norm": 2.244136333465576, "learning_rate": 2e-05, "loss": 0.05174838, "step": 3563 }, { "epoch": 7.128, "grad_norm": 0.9121345281600952, "learning_rate": 2e-05, "loss": 0.02768136, "step": 3564 }, { "epoch": 7.13, "grad_norm": 1.626583218574524, "learning_rate": 2e-05, "loss": 0.05284789, "step": 3565 }, { "epoch": 7.132, "grad_norm": 1.1587470769882202, "learning_rate": 2e-05, "loss": 0.04420138, "step": 3566 }, { "epoch": 7.134, "grad_norm": 1.1360605955123901, "learning_rate": 2e-05, "loss": 0.03466136, "step": 3567 }, { "epoch": 7.136, "grad_norm": 1.6288130283355713, "learning_rate": 2e-05, "loss": 0.05938084, "step": 3568 }, { "epoch": 7.138, "grad_norm": 1.7451311349868774, "learning_rate": 2e-05, "loss": 0.07126134, "step": 3569 }, { "epoch": 7.14, "grad_norm": 1.0442672967910767, "learning_rate": 2e-05, "loss": 0.0229485, "step": 3570 }, { "epoch": 7.142, "grad_norm": 0.9555102586746216, "learning_rate": 2e-05, "loss": 0.03555539, "step": 3571 }, { "epoch": 7.144, "grad_norm": 1.9347809553146362, "learning_rate": 2e-05, "loss": 0.065361, "step": 3572 }, { "epoch": 7.146, "grad_norm": 1.5342516899108887, "learning_rate": 2e-05, "loss": 0.05312759, "step": 3573 }, { "epoch": 7.148, "grad_norm": 2.1047842502593994, "learning_rate": 2e-05, "loss": 0.04418172, "step": 3574 }, { "epoch": 7.15, "grad_norm": 1.7148213386535645, "learning_rate": 2e-05, "loss": 0.05144256, "step": 3575 }, { "epoch": 7.152, "grad_norm": 2.749667167663574, "learning_rate": 2e-05, "loss": 0.07798346, "step": 3576 }, { "epoch": 7.154, "grad_norm": 2.0609371662139893, "learning_rate": 2e-05, "loss": 0.04355539, "step": 3577 }, { "epoch": 7.156, "grad_norm": 1.6791129112243652, "learning_rate": 2e-05, "loss": 0.05896265, "step": 3578 }, { "epoch": 7.158, "grad_norm": 2.215697765350342, "learning_rate": 2e-05, "loss": 0.0662262, "step": 3579 }, { "epoch": 7.16, "grad_norm": 1.0952160358428955, "learning_rate": 2e-05, "loss": 0.03965465, "step": 3580 }, { "epoch": 7.162, "grad_norm": 1.4443844556808472, "learning_rate": 2e-05, "loss": 0.041635, "step": 3581 }, { "epoch": 7.164, "grad_norm": 3.1026885509490967, "learning_rate": 2e-05, "loss": 0.06438316, "step": 3582 }, { "epoch": 7.166, "grad_norm": 1.489847183227539, "learning_rate": 2e-05, "loss": 0.04829738, "step": 3583 }, { "epoch": 7.168, "grad_norm": 1.4437965154647827, "learning_rate": 2e-05, "loss": 0.05083038, "step": 3584 }, { "epoch": 7.17, "grad_norm": 1.0720726251602173, "learning_rate": 2e-05, "loss": 0.04749148, "step": 3585 }, { "epoch": 7.172, "grad_norm": 1.5895586013793945, "learning_rate": 2e-05, "loss": 0.06001811, "step": 3586 }, { "epoch": 7.174, "grad_norm": 1.490312933921814, "learning_rate": 2e-05, "loss": 0.03755351, "step": 3587 }, { "epoch": 7.176, "grad_norm": 1.5626713037490845, "learning_rate": 2e-05, "loss": 0.05684235, "step": 3588 }, { "epoch": 7.178, "grad_norm": 1.5374075174331665, "learning_rate": 2e-05, "loss": 0.05959694, "step": 3589 }, { "epoch": 7.18, "grad_norm": 1.7578250169754028, "learning_rate": 2e-05, "loss": 0.05503277, "step": 3590 }, { "epoch": 7.182, "grad_norm": 2.2269232273101807, "learning_rate": 2e-05, "loss": 0.05974087, "step": 3591 }, { "epoch": 7.184, "grad_norm": 2.0154407024383545, "learning_rate": 2e-05, "loss": 0.064426, "step": 3592 }, { "epoch": 7.186, "grad_norm": 4.343257427215576, "learning_rate": 2e-05, "loss": 0.06492545, "step": 3593 }, { "epoch": 7.188, "grad_norm": 1.5611995458602905, "learning_rate": 2e-05, "loss": 0.05464255, "step": 3594 }, { "epoch": 7.19, "grad_norm": 2.143812656402588, "learning_rate": 2e-05, "loss": 0.06049203, "step": 3595 }, { "epoch": 7.192, "grad_norm": 1.4210190773010254, "learning_rate": 2e-05, "loss": 0.05442027, "step": 3596 }, { "epoch": 7.194, "grad_norm": 1.7201104164123535, "learning_rate": 2e-05, "loss": 0.07043049, "step": 3597 }, { "epoch": 7.196, "grad_norm": 1.361403226852417, "learning_rate": 2e-05, "loss": 0.04256427, "step": 3598 }, { "epoch": 7.198, "grad_norm": 1.2954514026641846, "learning_rate": 2e-05, "loss": 0.04150357, "step": 3599 }, { "epoch": 7.2, "grad_norm": 1.6332778930664062, "learning_rate": 2e-05, "loss": 0.04461187, "step": 3600 }, { "epoch": 7.202, "grad_norm": 1.5767091512680054, "learning_rate": 2e-05, "loss": 0.05819118, "step": 3601 }, { "epoch": 7.204, "grad_norm": 1.3004631996154785, "learning_rate": 2e-05, "loss": 0.03637975, "step": 3602 }, { "epoch": 7.206, "grad_norm": 1.3462961912155151, "learning_rate": 2e-05, "loss": 0.03405475, "step": 3603 }, { "epoch": 7.208, "grad_norm": 1.1177797317504883, "learning_rate": 2e-05, "loss": 0.03819214, "step": 3604 }, { "epoch": 7.21, "grad_norm": 2.921354293823242, "learning_rate": 2e-05, "loss": 0.06285311, "step": 3605 }, { "epoch": 7.212, "grad_norm": 1.4188235998153687, "learning_rate": 2e-05, "loss": 0.05607903, "step": 3606 }, { "epoch": 7.214, "grad_norm": 2.9079253673553467, "learning_rate": 2e-05, "loss": 0.06680961, "step": 3607 }, { "epoch": 7.216, "grad_norm": 1.4562344551086426, "learning_rate": 2e-05, "loss": 0.05460964, "step": 3608 }, { "epoch": 7.218, "grad_norm": 1.5972447395324707, "learning_rate": 2e-05, "loss": 0.04806063, "step": 3609 }, { "epoch": 7.22, "grad_norm": 1.8971936702728271, "learning_rate": 2e-05, "loss": 0.05641398, "step": 3610 }, { "epoch": 7.222, "grad_norm": 1.4135507345199585, "learning_rate": 2e-05, "loss": 0.04675736, "step": 3611 }, { "epoch": 7.224, "grad_norm": 1.3104100227355957, "learning_rate": 2e-05, "loss": 0.04616308, "step": 3612 }, { "epoch": 7.226, "grad_norm": 1.6768646240234375, "learning_rate": 2e-05, "loss": 0.05637186, "step": 3613 }, { "epoch": 7.228, "grad_norm": 1.6318180561065674, "learning_rate": 2e-05, "loss": 0.06836526, "step": 3614 }, { "epoch": 7.23, "grad_norm": 1.0449095964431763, "learning_rate": 2e-05, "loss": 0.03408219, "step": 3615 }, { "epoch": 7.232, "grad_norm": 1.6176615953445435, "learning_rate": 2e-05, "loss": 0.06785093, "step": 3616 }, { "epoch": 7.234, "grad_norm": 1.3260332345962524, "learning_rate": 2e-05, "loss": 0.04694654, "step": 3617 }, { "epoch": 7.236, "grad_norm": 1.478721261024475, "learning_rate": 2e-05, "loss": 0.04473628, "step": 3618 }, { "epoch": 7.2379999999999995, "grad_norm": 1.9737378358840942, "learning_rate": 2e-05, "loss": 0.06412191, "step": 3619 }, { "epoch": 7.24, "grad_norm": 1.4525940418243408, "learning_rate": 2e-05, "loss": 0.04360288, "step": 3620 }, { "epoch": 7.242, "grad_norm": 2.22672963142395, "learning_rate": 2e-05, "loss": 0.06661811, "step": 3621 }, { "epoch": 7.244, "grad_norm": 1.2235952615737915, "learning_rate": 2e-05, "loss": 0.0464549, "step": 3622 }, { "epoch": 7.246, "grad_norm": 1.7983734607696533, "learning_rate": 2e-05, "loss": 0.04726644, "step": 3623 }, { "epoch": 7.248, "grad_norm": 1.316263198852539, "learning_rate": 2e-05, "loss": 0.0438324, "step": 3624 }, { "epoch": 7.25, "grad_norm": 1.3271807432174683, "learning_rate": 2e-05, "loss": 0.04459179, "step": 3625 }, { "epoch": 7.252, "grad_norm": 1.7631975412368774, "learning_rate": 2e-05, "loss": 0.04965796, "step": 3626 }, { "epoch": 7.254, "grad_norm": 2.7661640644073486, "learning_rate": 2e-05, "loss": 0.06493346, "step": 3627 }, { "epoch": 7.256, "grad_norm": 1.1280755996704102, "learning_rate": 2e-05, "loss": 0.03161623, "step": 3628 }, { "epoch": 7.258, "grad_norm": 1.7222224473953247, "learning_rate": 2e-05, "loss": 0.0412074, "step": 3629 }, { "epoch": 7.26, "grad_norm": 1.4843904972076416, "learning_rate": 2e-05, "loss": 0.04300556, "step": 3630 }, { "epoch": 7.2620000000000005, "grad_norm": 1.4301111698150635, "learning_rate": 2e-05, "loss": 0.05478829, "step": 3631 }, { "epoch": 7.264, "grad_norm": 1.6331700086593628, "learning_rate": 2e-05, "loss": 0.04211323, "step": 3632 }, { "epoch": 7.266, "grad_norm": 1.3613964319229126, "learning_rate": 2e-05, "loss": 0.04794502, "step": 3633 }, { "epoch": 7.268, "grad_norm": 1.2767889499664307, "learning_rate": 2e-05, "loss": 0.04734127, "step": 3634 }, { "epoch": 7.27, "grad_norm": 1.6509748697280884, "learning_rate": 2e-05, "loss": 0.05143131, "step": 3635 }, { "epoch": 7.272, "grad_norm": 1.3792498111724854, "learning_rate": 2e-05, "loss": 0.04537631, "step": 3636 }, { "epoch": 7.274, "grad_norm": 1.7637391090393066, "learning_rate": 2e-05, "loss": 0.04865746, "step": 3637 }, { "epoch": 7.276, "grad_norm": 1.275904655456543, "learning_rate": 2e-05, "loss": 0.03691653, "step": 3638 }, { "epoch": 7.2780000000000005, "grad_norm": 1.575995922088623, "learning_rate": 2e-05, "loss": 0.05559878, "step": 3639 }, { "epoch": 7.28, "grad_norm": 2.00274920463562, "learning_rate": 2e-05, "loss": 0.06520915, "step": 3640 }, { "epoch": 7.282, "grad_norm": 1.6603018045425415, "learning_rate": 2e-05, "loss": 0.04692579, "step": 3641 }, { "epoch": 7.284, "grad_norm": 1.4534879922866821, "learning_rate": 2e-05, "loss": 0.05134547, "step": 3642 }, { "epoch": 7.286, "grad_norm": 1.8674389123916626, "learning_rate": 2e-05, "loss": 0.06286973, "step": 3643 }, { "epoch": 7.288, "grad_norm": 1.2336664199829102, "learning_rate": 2e-05, "loss": 0.04018402, "step": 3644 }, { "epoch": 7.29, "grad_norm": 1.0961309671401978, "learning_rate": 2e-05, "loss": 0.03630316, "step": 3645 }, { "epoch": 7.292, "grad_norm": 1.4161972999572754, "learning_rate": 2e-05, "loss": 0.04280121, "step": 3646 }, { "epoch": 7.294, "grad_norm": 1.8841363191604614, "learning_rate": 2e-05, "loss": 0.06651107, "step": 3647 }, { "epoch": 7.296, "grad_norm": 1.3848674297332764, "learning_rate": 2e-05, "loss": 0.05075488, "step": 3648 }, { "epoch": 7.298, "grad_norm": 2.675525665283203, "learning_rate": 2e-05, "loss": 0.0590144, "step": 3649 }, { "epoch": 7.3, "grad_norm": 2.8043618202209473, "learning_rate": 2e-05, "loss": 0.05341516, "step": 3650 }, { "epoch": 7.302, "grad_norm": 1.16990065574646, "learning_rate": 2e-05, "loss": 0.03649558, "step": 3651 }, { "epoch": 7.304, "grad_norm": 1.3103454113006592, "learning_rate": 2e-05, "loss": 0.04651901, "step": 3652 }, { "epoch": 7.306, "grad_norm": 1.39406156539917, "learning_rate": 2e-05, "loss": 0.04839561, "step": 3653 }, { "epoch": 7.308, "grad_norm": 1.6239453554153442, "learning_rate": 2e-05, "loss": 0.05581132, "step": 3654 }, { "epoch": 7.31, "grad_norm": 1.9287298917770386, "learning_rate": 2e-05, "loss": 0.05162586, "step": 3655 }, { "epoch": 7.312, "grad_norm": 1.1683954000473022, "learning_rate": 2e-05, "loss": 0.04523386, "step": 3656 }, { "epoch": 7.314, "grad_norm": 1.9765461683273315, "learning_rate": 2e-05, "loss": 0.05748572, "step": 3657 }, { "epoch": 7.316, "grad_norm": 1.5653101205825806, "learning_rate": 2e-05, "loss": 0.05275439, "step": 3658 }, { "epoch": 7.318, "grad_norm": 1.9025769233703613, "learning_rate": 2e-05, "loss": 0.0513374, "step": 3659 }, { "epoch": 7.32, "grad_norm": 0.9924256801605225, "learning_rate": 2e-05, "loss": 0.02910082, "step": 3660 }, { "epoch": 7.322, "grad_norm": 1.058456301689148, "learning_rate": 2e-05, "loss": 0.03298526, "step": 3661 }, { "epoch": 7.324, "grad_norm": 1.0151259899139404, "learning_rate": 2e-05, "loss": 0.03118435, "step": 3662 }, { "epoch": 7.326, "grad_norm": 2.0813844203948975, "learning_rate": 2e-05, "loss": 0.05353644, "step": 3663 }, { "epoch": 7.328, "grad_norm": 2.497664213180542, "learning_rate": 2e-05, "loss": 0.07998416, "step": 3664 }, { "epoch": 7.33, "grad_norm": 1.1892470121383667, "learning_rate": 2e-05, "loss": 0.04133311, "step": 3665 }, { "epoch": 7.332, "grad_norm": 1.0578644275665283, "learning_rate": 2e-05, "loss": 0.03139057, "step": 3666 }, { "epoch": 7.334, "grad_norm": 1.7718684673309326, "learning_rate": 2e-05, "loss": 0.06689329, "step": 3667 }, { "epoch": 7.336, "grad_norm": 2.739551305770874, "learning_rate": 2e-05, "loss": 0.03666856, "step": 3668 }, { "epoch": 7.338, "grad_norm": 1.403628945350647, "learning_rate": 2e-05, "loss": 0.0506552, "step": 3669 }, { "epoch": 7.34, "grad_norm": 1.8418684005737305, "learning_rate": 2e-05, "loss": 0.05253516, "step": 3670 }, { "epoch": 7.342, "grad_norm": 1.2462759017944336, "learning_rate": 2e-05, "loss": 0.04325264, "step": 3671 }, { "epoch": 7.344, "grad_norm": 3.195005178451538, "learning_rate": 2e-05, "loss": 0.05133444, "step": 3672 }, { "epoch": 7.346, "grad_norm": 1.543524146080017, "learning_rate": 2e-05, "loss": 0.04906403, "step": 3673 }, { "epoch": 7.348, "grad_norm": 1.4386686086654663, "learning_rate": 2e-05, "loss": 0.04781891, "step": 3674 }, { "epoch": 7.35, "grad_norm": 1.7783071994781494, "learning_rate": 2e-05, "loss": 0.0641686, "step": 3675 }, { "epoch": 7.352, "grad_norm": 1.090715765953064, "learning_rate": 2e-05, "loss": 0.03242114, "step": 3676 }, { "epoch": 7.354, "grad_norm": 1.3116369247436523, "learning_rate": 2e-05, "loss": 0.03984781, "step": 3677 }, { "epoch": 7.356, "grad_norm": 1.894168734550476, "learning_rate": 2e-05, "loss": 0.03056693, "step": 3678 }, { "epoch": 7.358, "grad_norm": 1.7085542678833008, "learning_rate": 2e-05, "loss": 0.05443097, "step": 3679 }, { "epoch": 7.36, "grad_norm": 1.401723027229309, "learning_rate": 2e-05, "loss": 0.04774553, "step": 3680 }, { "epoch": 7.362, "grad_norm": 1.4581307172775269, "learning_rate": 2e-05, "loss": 0.04450407, "step": 3681 }, { "epoch": 7.364, "grad_norm": 1.6069996356964111, "learning_rate": 2e-05, "loss": 0.06170261, "step": 3682 }, { "epoch": 7.366, "grad_norm": 1.324460506439209, "learning_rate": 2e-05, "loss": 0.04895044, "step": 3683 }, { "epoch": 7.368, "grad_norm": 1.3643289804458618, "learning_rate": 2e-05, "loss": 0.04545024, "step": 3684 }, { "epoch": 7.37, "grad_norm": 1.6585203409194946, "learning_rate": 2e-05, "loss": 0.04801652, "step": 3685 }, { "epoch": 7.372, "grad_norm": 1.4287261962890625, "learning_rate": 2e-05, "loss": 0.0614093, "step": 3686 }, { "epoch": 7.374, "grad_norm": 1.5790996551513672, "learning_rate": 2e-05, "loss": 0.05381008, "step": 3687 }, { "epoch": 7.376, "grad_norm": 2.0287680625915527, "learning_rate": 2e-05, "loss": 0.06601522, "step": 3688 }, { "epoch": 7.378, "grad_norm": 1.171452283859253, "learning_rate": 2e-05, "loss": 0.03029294, "step": 3689 }, { "epoch": 7.38, "grad_norm": 1.8826758861541748, "learning_rate": 2e-05, "loss": 0.0547618, "step": 3690 }, { "epoch": 7.382, "grad_norm": 1.565005898475647, "learning_rate": 2e-05, "loss": 0.0449411, "step": 3691 }, { "epoch": 7.384, "grad_norm": 1.5869972705841064, "learning_rate": 2e-05, "loss": 0.04129549, "step": 3692 }, { "epoch": 7.386, "grad_norm": 1.52235746383667, "learning_rate": 2e-05, "loss": 0.05102809, "step": 3693 }, { "epoch": 7.388, "grad_norm": 1.656127691268921, "learning_rate": 2e-05, "loss": 0.04343271, "step": 3694 }, { "epoch": 7.39, "grad_norm": 1.1580047607421875, "learning_rate": 2e-05, "loss": 0.04137683, "step": 3695 }, { "epoch": 7.392, "grad_norm": 1.3034822940826416, "learning_rate": 2e-05, "loss": 0.04663299, "step": 3696 }, { "epoch": 7.394, "grad_norm": 1.391483187675476, "learning_rate": 2e-05, "loss": 0.04537113, "step": 3697 }, { "epoch": 7.396, "grad_norm": 1.4313862323760986, "learning_rate": 2e-05, "loss": 0.04555268, "step": 3698 }, { "epoch": 7.398, "grad_norm": 3.1390175819396973, "learning_rate": 2e-05, "loss": 0.06643859, "step": 3699 }, { "epoch": 7.4, "grad_norm": 1.6369024515151978, "learning_rate": 2e-05, "loss": 0.05245568, "step": 3700 }, { "epoch": 7.402, "grad_norm": 1.2686548233032227, "learning_rate": 2e-05, "loss": 0.04974839, "step": 3701 }, { "epoch": 7.404, "grad_norm": 1.2958829402923584, "learning_rate": 2e-05, "loss": 0.04743432, "step": 3702 }, { "epoch": 7.406, "grad_norm": 1.9894742965698242, "learning_rate": 2e-05, "loss": 0.04571943, "step": 3703 }, { "epoch": 7.408, "grad_norm": 1.0856343507766724, "learning_rate": 2e-05, "loss": 0.03500437, "step": 3704 }, { "epoch": 7.41, "grad_norm": 1.949939250946045, "learning_rate": 2e-05, "loss": 0.05323801, "step": 3705 }, { "epoch": 7.412, "grad_norm": 1.4476184844970703, "learning_rate": 2e-05, "loss": 0.04785695, "step": 3706 }, { "epoch": 7.414, "grad_norm": 3.4324965476989746, "learning_rate": 2e-05, "loss": 0.06573795, "step": 3707 }, { "epoch": 7.416, "grad_norm": 1.3931694030761719, "learning_rate": 2e-05, "loss": 0.04984997, "step": 3708 }, { "epoch": 7.418, "grad_norm": 1.56455659866333, "learning_rate": 2e-05, "loss": 0.05046746, "step": 3709 }, { "epoch": 7.42, "grad_norm": 1.200805425643921, "learning_rate": 2e-05, "loss": 0.03915904, "step": 3710 }, { "epoch": 7.422, "grad_norm": 1.366477608680725, "learning_rate": 2e-05, "loss": 0.04656648, "step": 3711 }, { "epoch": 7.424, "grad_norm": 1.8359090089797974, "learning_rate": 2e-05, "loss": 0.05103552, "step": 3712 }, { "epoch": 7.426, "grad_norm": 1.4380766153335571, "learning_rate": 2e-05, "loss": 0.04777982, "step": 3713 }, { "epoch": 7.428, "grad_norm": 1.0222777128219604, "learning_rate": 2e-05, "loss": 0.03150903, "step": 3714 }, { "epoch": 7.43, "grad_norm": 2.749195098876953, "learning_rate": 2e-05, "loss": 0.05602058, "step": 3715 }, { "epoch": 7.432, "grad_norm": 1.7111470699310303, "learning_rate": 2e-05, "loss": 0.04583771, "step": 3716 }, { "epoch": 7.434, "grad_norm": 1.3404757976531982, "learning_rate": 2e-05, "loss": 0.04275534, "step": 3717 }, { "epoch": 7.436, "grad_norm": 1.210221529006958, "learning_rate": 2e-05, "loss": 0.04058505, "step": 3718 }, { "epoch": 7.438, "grad_norm": 2.068676471710205, "learning_rate": 2e-05, "loss": 0.04987239, "step": 3719 }, { "epoch": 7.44, "grad_norm": 1.6463974714279175, "learning_rate": 2e-05, "loss": 0.0489587, "step": 3720 }, { "epoch": 7.442, "grad_norm": 0.9529186487197876, "learning_rate": 2e-05, "loss": 0.02814676, "step": 3721 }, { "epoch": 7.444, "grad_norm": 1.097665548324585, "learning_rate": 2e-05, "loss": 0.0485144, "step": 3722 }, { "epoch": 7.446, "grad_norm": 1.5147879123687744, "learning_rate": 2e-05, "loss": 0.04698938, "step": 3723 }, { "epoch": 7.448, "grad_norm": 1.2705267667770386, "learning_rate": 2e-05, "loss": 0.03390349, "step": 3724 }, { "epoch": 7.45, "grad_norm": 2.0715646743774414, "learning_rate": 2e-05, "loss": 0.07898147, "step": 3725 }, { "epoch": 7.452, "grad_norm": 1.3505516052246094, "learning_rate": 2e-05, "loss": 0.05181169, "step": 3726 }, { "epoch": 7.454, "grad_norm": 1.6015242338180542, "learning_rate": 2e-05, "loss": 0.05964314, "step": 3727 }, { "epoch": 7.456, "grad_norm": 1.358168601989746, "learning_rate": 2e-05, "loss": 0.04774554, "step": 3728 }, { "epoch": 7.458, "grad_norm": 1.53998863697052, "learning_rate": 2e-05, "loss": 0.04986886, "step": 3729 }, { "epoch": 7.46, "grad_norm": 1.2373335361480713, "learning_rate": 2e-05, "loss": 0.05145576, "step": 3730 }, { "epoch": 7.462, "grad_norm": 1.2821953296661377, "learning_rate": 2e-05, "loss": 0.05637913, "step": 3731 }, { "epoch": 7.464, "grad_norm": 1.3105919361114502, "learning_rate": 2e-05, "loss": 0.04199533, "step": 3732 }, { "epoch": 7.466, "grad_norm": 1.5130268335342407, "learning_rate": 2e-05, "loss": 0.04699155, "step": 3733 }, { "epoch": 7.468, "grad_norm": 1.737265706062317, "learning_rate": 2e-05, "loss": 0.03969675, "step": 3734 }, { "epoch": 7.47, "grad_norm": 1.4570841789245605, "learning_rate": 2e-05, "loss": 0.04642572, "step": 3735 }, { "epoch": 7.4719999999999995, "grad_norm": 0.9005123376846313, "learning_rate": 2e-05, "loss": 0.03132126, "step": 3736 }, { "epoch": 7.474, "grad_norm": 1.973960041999817, "learning_rate": 2e-05, "loss": 0.05362177, "step": 3737 }, { "epoch": 7.476, "grad_norm": 2.2350363731384277, "learning_rate": 2e-05, "loss": 0.06357549, "step": 3738 }, { "epoch": 7.478, "grad_norm": 1.4672166109085083, "learning_rate": 2e-05, "loss": 0.04843113, "step": 3739 }, { "epoch": 7.48, "grad_norm": 1.9889647960662842, "learning_rate": 2e-05, "loss": 0.04717405, "step": 3740 }, { "epoch": 7.482, "grad_norm": 1.1756142377853394, "learning_rate": 2e-05, "loss": 0.04274727, "step": 3741 }, { "epoch": 7.484, "grad_norm": 1.004916787147522, "learning_rate": 2e-05, "loss": 0.03753413, "step": 3742 }, { "epoch": 7.486, "grad_norm": 1.277570128440857, "learning_rate": 2e-05, "loss": 0.04001706, "step": 3743 }, { "epoch": 7.4879999999999995, "grad_norm": 1.0165624618530273, "learning_rate": 2e-05, "loss": 0.03563813, "step": 3744 }, { "epoch": 7.49, "grad_norm": 1.3447015285491943, "learning_rate": 2e-05, "loss": 0.04556485, "step": 3745 }, { "epoch": 7.492, "grad_norm": 1.9407835006713867, "learning_rate": 2e-05, "loss": 0.05292122, "step": 3746 }, { "epoch": 7.494, "grad_norm": 4.274045467376709, "learning_rate": 2e-05, "loss": 0.04551305, "step": 3747 }, { "epoch": 7.496, "grad_norm": 1.2199167013168335, "learning_rate": 2e-05, "loss": 0.04236592, "step": 3748 }, { "epoch": 7.498, "grad_norm": 1.2471674680709839, "learning_rate": 2e-05, "loss": 0.03323042, "step": 3749 }, { "epoch": 7.5, "grad_norm": 1.1750595569610596, "learning_rate": 2e-05, "loss": 0.0397836, "step": 3750 }, { "epoch": 7.502, "grad_norm": 1.7069101333618164, "learning_rate": 2e-05, "loss": 0.04767216, "step": 3751 }, { "epoch": 7.504, "grad_norm": 1.3713217973709106, "learning_rate": 2e-05, "loss": 0.04113061, "step": 3752 }, { "epoch": 7.506, "grad_norm": 1.2698354721069336, "learning_rate": 2e-05, "loss": 0.04726923, "step": 3753 }, { "epoch": 7.508, "grad_norm": 1.3195385932922363, "learning_rate": 2e-05, "loss": 0.04144061, "step": 3754 }, { "epoch": 7.51, "grad_norm": 1.405106782913208, "learning_rate": 2e-05, "loss": 0.04036184, "step": 3755 }, { "epoch": 7.5120000000000005, "grad_norm": 1.3161296844482422, "learning_rate": 2e-05, "loss": 0.03146423, "step": 3756 }, { "epoch": 7.514, "grad_norm": 1.8844631910324097, "learning_rate": 2e-05, "loss": 0.06133197, "step": 3757 }, { "epoch": 7.516, "grad_norm": 2.0802526473999023, "learning_rate": 2e-05, "loss": 0.04892286, "step": 3758 }, { "epoch": 7.518, "grad_norm": 3.128467082977295, "learning_rate": 2e-05, "loss": 0.06032344, "step": 3759 }, { "epoch": 7.52, "grad_norm": 2.1159846782684326, "learning_rate": 2e-05, "loss": 0.07681625, "step": 3760 }, { "epoch": 7.522, "grad_norm": 1.6027671098709106, "learning_rate": 2e-05, "loss": 0.05662168, "step": 3761 }, { "epoch": 7.524, "grad_norm": 2.217183828353882, "learning_rate": 2e-05, "loss": 0.06822868, "step": 3762 }, { "epoch": 7.526, "grad_norm": 1.5272791385650635, "learning_rate": 2e-05, "loss": 0.04064973, "step": 3763 }, { "epoch": 7.5280000000000005, "grad_norm": 1.4808307886123657, "learning_rate": 2e-05, "loss": 0.04885202, "step": 3764 }, { "epoch": 7.53, "grad_norm": 1.6249582767486572, "learning_rate": 2e-05, "loss": 0.05506381, "step": 3765 }, { "epoch": 7.532, "grad_norm": 1.6478004455566406, "learning_rate": 2e-05, "loss": 0.04652187, "step": 3766 }, { "epoch": 7.534, "grad_norm": 2.208526134490967, "learning_rate": 2e-05, "loss": 0.049362, "step": 3767 }, { "epoch": 7.536, "grad_norm": 1.3688409328460693, "learning_rate": 2e-05, "loss": 0.04084139, "step": 3768 }, { "epoch": 7.538, "grad_norm": 1.5067732334136963, "learning_rate": 2e-05, "loss": 0.04833493, "step": 3769 }, { "epoch": 7.54, "grad_norm": 1.175787091255188, "learning_rate": 2e-05, "loss": 0.04301163, "step": 3770 }, { "epoch": 7.542, "grad_norm": 2.0285022258758545, "learning_rate": 2e-05, "loss": 0.06193438, "step": 3771 }, { "epoch": 7.5440000000000005, "grad_norm": 1.2851672172546387, "learning_rate": 2e-05, "loss": 0.05366874, "step": 3772 }, { "epoch": 7.546, "grad_norm": 1.5538355112075806, "learning_rate": 2e-05, "loss": 0.06452001, "step": 3773 }, { "epoch": 7.548, "grad_norm": 1.1925280094146729, "learning_rate": 2e-05, "loss": 0.04862928, "step": 3774 }, { "epoch": 7.55, "grad_norm": 1.3164457082748413, "learning_rate": 2e-05, "loss": 0.04869618, "step": 3775 }, { "epoch": 7.552, "grad_norm": 1.8876863718032837, "learning_rate": 2e-05, "loss": 0.04987069, "step": 3776 }, { "epoch": 7.554, "grad_norm": 1.348827600479126, "learning_rate": 2e-05, "loss": 0.05645898, "step": 3777 }, { "epoch": 7.556, "grad_norm": 1.2659227848052979, "learning_rate": 2e-05, "loss": 0.04560403, "step": 3778 }, { "epoch": 7.558, "grad_norm": 1.553835391998291, "learning_rate": 2e-05, "loss": 0.05577955, "step": 3779 }, { "epoch": 7.5600000000000005, "grad_norm": 2.5734472274780273, "learning_rate": 2e-05, "loss": 0.05894909, "step": 3780 }, { "epoch": 7.562, "grad_norm": 1.9130885601043701, "learning_rate": 2e-05, "loss": 0.04794927, "step": 3781 }, { "epoch": 7.564, "grad_norm": 1.092248797416687, "learning_rate": 2e-05, "loss": 0.03762339, "step": 3782 }, { "epoch": 7.566, "grad_norm": 1.438640832901001, "learning_rate": 2e-05, "loss": 0.03907945, "step": 3783 }, { "epoch": 7.568, "grad_norm": 1.047423005104065, "learning_rate": 2e-05, "loss": 0.0316382, "step": 3784 }, { "epoch": 7.57, "grad_norm": 1.7063649892807007, "learning_rate": 2e-05, "loss": 0.04632139, "step": 3785 }, { "epoch": 7.572, "grad_norm": 1.954330325126648, "learning_rate": 2e-05, "loss": 0.05917243, "step": 3786 }, { "epoch": 7.574, "grad_norm": 1.586054801940918, "learning_rate": 2e-05, "loss": 0.06359352, "step": 3787 }, { "epoch": 7.576, "grad_norm": 1.6239789724349976, "learning_rate": 2e-05, "loss": 0.04376218, "step": 3788 }, { "epoch": 7.578, "grad_norm": 1.7396554946899414, "learning_rate": 2e-05, "loss": 0.04864378, "step": 3789 }, { "epoch": 7.58, "grad_norm": 1.1192086935043335, "learning_rate": 2e-05, "loss": 0.03764372, "step": 3790 }, { "epoch": 7.582, "grad_norm": 2.047725200653076, "learning_rate": 2e-05, "loss": 0.05432349, "step": 3791 }, { "epoch": 7.584, "grad_norm": 1.4690004587173462, "learning_rate": 2e-05, "loss": 0.05431195, "step": 3792 }, { "epoch": 7.586, "grad_norm": 1.6998225450515747, "learning_rate": 2e-05, "loss": 0.05252321, "step": 3793 }, { "epoch": 7.588, "grad_norm": 1.0173852443695068, "learning_rate": 2e-05, "loss": 0.03292936, "step": 3794 }, { "epoch": 7.59, "grad_norm": 1.2898446321487427, "learning_rate": 2e-05, "loss": 0.04756307, "step": 3795 }, { "epoch": 7.592, "grad_norm": 1.3239659070968628, "learning_rate": 2e-05, "loss": 0.03519905, "step": 3796 }, { "epoch": 7.594, "grad_norm": 0.9740890860557556, "learning_rate": 2e-05, "loss": 0.03270172, "step": 3797 }, { "epoch": 7.596, "grad_norm": 1.0018112659454346, "learning_rate": 2e-05, "loss": 0.03196118, "step": 3798 }, { "epoch": 7.598, "grad_norm": 1.5058925151824951, "learning_rate": 2e-05, "loss": 0.04523263, "step": 3799 }, { "epoch": 7.6, "grad_norm": 1.1637154817581177, "learning_rate": 2e-05, "loss": 0.03923118, "step": 3800 }, { "epoch": 7.602, "grad_norm": 1.1870934963226318, "learning_rate": 2e-05, "loss": 0.04084285, "step": 3801 }, { "epoch": 7.604, "grad_norm": 2.0652079582214355, "learning_rate": 2e-05, "loss": 0.05630117, "step": 3802 }, { "epoch": 7.606, "grad_norm": 2.6154839992523193, "learning_rate": 2e-05, "loss": 0.06760383, "step": 3803 }, { "epoch": 7.608, "grad_norm": 1.4074623584747314, "learning_rate": 2e-05, "loss": 0.04475513, "step": 3804 }, { "epoch": 7.61, "grad_norm": 1.3313047885894775, "learning_rate": 2e-05, "loss": 0.04672603, "step": 3805 }, { "epoch": 7.612, "grad_norm": 1.1976416110992432, "learning_rate": 2e-05, "loss": 0.04390628, "step": 3806 }, { "epoch": 7.614, "grad_norm": 2.146451950073242, "learning_rate": 2e-05, "loss": 0.05690853, "step": 3807 }, { "epoch": 7.616, "grad_norm": 1.3076081275939941, "learning_rate": 2e-05, "loss": 0.05246904, "step": 3808 }, { "epoch": 7.618, "grad_norm": 1.1059523820877075, "learning_rate": 2e-05, "loss": 0.02809783, "step": 3809 }, { "epoch": 7.62, "grad_norm": 1.2201027870178223, "learning_rate": 2e-05, "loss": 0.03628596, "step": 3810 }, { "epoch": 7.622, "grad_norm": 1.1609594821929932, "learning_rate": 2e-05, "loss": 0.03147845, "step": 3811 }, { "epoch": 7.624, "grad_norm": 1.2720181941986084, "learning_rate": 2e-05, "loss": 0.04290503, "step": 3812 }, { "epoch": 7.626, "grad_norm": 1.812412142753601, "learning_rate": 2e-05, "loss": 0.07085785, "step": 3813 }, { "epoch": 7.628, "grad_norm": 1.4819214344024658, "learning_rate": 2e-05, "loss": 0.05656476, "step": 3814 }, { "epoch": 7.63, "grad_norm": 1.9143245220184326, "learning_rate": 2e-05, "loss": 0.08178715, "step": 3815 }, { "epoch": 7.632, "grad_norm": 1.3047770261764526, "learning_rate": 2e-05, "loss": 0.05004913, "step": 3816 }, { "epoch": 7.634, "grad_norm": 1.793925404548645, "learning_rate": 2e-05, "loss": 0.04365033, "step": 3817 }, { "epoch": 7.636, "grad_norm": 1.9158854484558105, "learning_rate": 2e-05, "loss": 0.05776561, "step": 3818 }, { "epoch": 7.638, "grad_norm": 1.4316856861114502, "learning_rate": 2e-05, "loss": 0.04811051, "step": 3819 }, { "epoch": 7.64, "grad_norm": 1.6033645868301392, "learning_rate": 2e-05, "loss": 0.04948676, "step": 3820 }, { "epoch": 7.642, "grad_norm": 1.9721709489822388, "learning_rate": 2e-05, "loss": 0.07113369, "step": 3821 }, { "epoch": 7.644, "grad_norm": 2.0091919898986816, "learning_rate": 2e-05, "loss": 0.04521521, "step": 3822 }, { "epoch": 7.646, "grad_norm": 2.2790145874023438, "learning_rate": 2e-05, "loss": 0.06836452, "step": 3823 }, { "epoch": 7.648, "grad_norm": 1.6919300556182861, "learning_rate": 2e-05, "loss": 0.04315101, "step": 3824 }, { "epoch": 7.65, "grad_norm": 1.6368211507797241, "learning_rate": 2e-05, "loss": 0.06151176, "step": 3825 }, { "epoch": 7.652, "grad_norm": 1.644728660583496, "learning_rate": 2e-05, "loss": 0.05243739, "step": 3826 }, { "epoch": 7.654, "grad_norm": 2.1954734325408936, "learning_rate": 2e-05, "loss": 0.04884458, "step": 3827 }, { "epoch": 7.656, "grad_norm": 1.4791172742843628, "learning_rate": 2e-05, "loss": 0.04870691, "step": 3828 }, { "epoch": 7.658, "grad_norm": 1.6743173599243164, "learning_rate": 2e-05, "loss": 0.06012721, "step": 3829 }, { "epoch": 7.66, "grad_norm": 1.8005884885787964, "learning_rate": 2e-05, "loss": 0.05182242, "step": 3830 }, { "epoch": 7.662, "grad_norm": 1.710728645324707, "learning_rate": 2e-05, "loss": 0.05671708, "step": 3831 }, { "epoch": 7.664, "grad_norm": 1.4567813873291016, "learning_rate": 2e-05, "loss": 0.05406286, "step": 3832 }, { "epoch": 7.666, "grad_norm": 1.37957763671875, "learning_rate": 2e-05, "loss": 0.05166141, "step": 3833 }, { "epoch": 7.668, "grad_norm": 1.3298771381378174, "learning_rate": 2e-05, "loss": 0.04657075, "step": 3834 }, { "epoch": 7.67, "grad_norm": 1.678098440170288, "learning_rate": 2e-05, "loss": 0.06359571, "step": 3835 }, { "epoch": 7.672, "grad_norm": 1.3091044425964355, "learning_rate": 2e-05, "loss": 0.04521374, "step": 3836 }, { "epoch": 7.674, "grad_norm": 1.1188842058181763, "learning_rate": 2e-05, "loss": 0.05468755, "step": 3837 }, { "epoch": 7.676, "grad_norm": 1.4298003911972046, "learning_rate": 2e-05, "loss": 0.05605362, "step": 3838 }, { "epoch": 7.678, "grad_norm": 2.1561481952667236, "learning_rate": 2e-05, "loss": 0.05341341, "step": 3839 }, { "epoch": 7.68, "grad_norm": 1.3226423263549805, "learning_rate": 2e-05, "loss": 0.04852953, "step": 3840 }, { "epoch": 7.682, "grad_norm": 1.267012596130371, "learning_rate": 2e-05, "loss": 0.05608751, "step": 3841 }, { "epoch": 7.684, "grad_norm": 1.670479416847229, "learning_rate": 2e-05, "loss": 0.06202754, "step": 3842 }, { "epoch": 7.686, "grad_norm": 1.5169838666915894, "learning_rate": 2e-05, "loss": 0.04942085, "step": 3843 }, { "epoch": 7.688, "grad_norm": 1.5922930240631104, "learning_rate": 2e-05, "loss": 0.03477084, "step": 3844 }, { "epoch": 7.6899999999999995, "grad_norm": 1.939310908317566, "learning_rate": 2e-05, "loss": 0.03845644, "step": 3845 }, { "epoch": 7.692, "grad_norm": 1.3163906335830688, "learning_rate": 2e-05, "loss": 0.04649363, "step": 3846 }, { "epoch": 7.694, "grad_norm": 2.3533008098602295, "learning_rate": 2e-05, "loss": 0.05440918, "step": 3847 }, { "epoch": 7.696, "grad_norm": 2.0161502361297607, "learning_rate": 2e-05, "loss": 0.04734223, "step": 3848 }, { "epoch": 7.698, "grad_norm": 1.932399868965149, "learning_rate": 2e-05, "loss": 0.04660235, "step": 3849 }, { "epoch": 7.7, "grad_norm": 1.0067873001098633, "learning_rate": 2e-05, "loss": 0.03094147, "step": 3850 }, { "epoch": 7.702, "grad_norm": 1.2816112041473389, "learning_rate": 2e-05, "loss": 0.04377981, "step": 3851 }, { "epoch": 7.704, "grad_norm": 1.6109353303909302, "learning_rate": 2e-05, "loss": 0.04601968, "step": 3852 }, { "epoch": 7.7059999999999995, "grad_norm": 1.514350175857544, "learning_rate": 2e-05, "loss": 0.06082528, "step": 3853 }, { "epoch": 7.708, "grad_norm": 1.3705843687057495, "learning_rate": 2e-05, "loss": 0.04245031, "step": 3854 }, { "epoch": 7.71, "grad_norm": 1.340019941329956, "learning_rate": 2e-05, "loss": 0.03780312, "step": 3855 }, { "epoch": 7.712, "grad_norm": 1.9258254766464233, "learning_rate": 2e-05, "loss": 0.04252723, "step": 3856 }, { "epoch": 7.714, "grad_norm": 2.0864787101745605, "learning_rate": 2e-05, "loss": 0.05551865, "step": 3857 }, { "epoch": 7.716, "grad_norm": 1.5121456384658813, "learning_rate": 2e-05, "loss": 0.04484479, "step": 3858 }, { "epoch": 7.718, "grad_norm": 2.2497289180755615, "learning_rate": 2e-05, "loss": 0.06071467, "step": 3859 }, { "epoch": 7.72, "grad_norm": 2.1290595531463623, "learning_rate": 2e-05, "loss": 0.05557271, "step": 3860 }, { "epoch": 7.7219999999999995, "grad_norm": 1.1759511232376099, "learning_rate": 2e-05, "loss": 0.03121856, "step": 3861 }, { "epoch": 7.724, "grad_norm": 1.8583542108535767, "learning_rate": 2e-05, "loss": 0.04486027, "step": 3862 }, { "epoch": 7.726, "grad_norm": 1.753166913986206, "learning_rate": 2e-05, "loss": 0.05814328, "step": 3863 }, { "epoch": 7.728, "grad_norm": 1.2085702419281006, "learning_rate": 2e-05, "loss": 0.04606169, "step": 3864 }, { "epoch": 7.73, "grad_norm": 0.9439897537231445, "learning_rate": 2e-05, "loss": 0.03185405, "step": 3865 }, { "epoch": 7.732, "grad_norm": 1.1782246828079224, "learning_rate": 2e-05, "loss": 0.038643, "step": 3866 }, { "epoch": 7.734, "grad_norm": 2.3399665355682373, "learning_rate": 2e-05, "loss": 0.04473191, "step": 3867 }, { "epoch": 7.736, "grad_norm": 1.0290220975875854, "learning_rate": 2e-05, "loss": 0.03318851, "step": 3868 }, { "epoch": 7.7379999999999995, "grad_norm": 2.829364061355591, "learning_rate": 2e-05, "loss": 0.05109379, "step": 3869 }, { "epoch": 7.74, "grad_norm": 1.3606927394866943, "learning_rate": 2e-05, "loss": 0.05341338, "step": 3870 }, { "epoch": 7.742, "grad_norm": 1.5313520431518555, "learning_rate": 2e-05, "loss": 0.04662801, "step": 3871 }, { "epoch": 7.744, "grad_norm": 1.2733118534088135, "learning_rate": 2e-05, "loss": 0.04062459, "step": 3872 }, { "epoch": 7.746, "grad_norm": 1.3704501390457153, "learning_rate": 2e-05, "loss": 0.05422856, "step": 3873 }, { "epoch": 7.748, "grad_norm": 1.1714882850646973, "learning_rate": 2e-05, "loss": 0.04571648, "step": 3874 }, { "epoch": 7.75, "grad_norm": 1.7015575170516968, "learning_rate": 2e-05, "loss": 0.05158704, "step": 3875 }, { "epoch": 7.752, "grad_norm": 1.004583716392517, "learning_rate": 2e-05, "loss": 0.0300518, "step": 3876 }, { "epoch": 7.754, "grad_norm": 1.118895173072815, "learning_rate": 2e-05, "loss": 0.03498417, "step": 3877 }, { "epoch": 7.756, "grad_norm": 1.5967800617218018, "learning_rate": 2e-05, "loss": 0.05754827, "step": 3878 }, { "epoch": 7.758, "grad_norm": 2.8003337383270264, "learning_rate": 2e-05, "loss": 0.06484474, "step": 3879 }, { "epoch": 7.76, "grad_norm": 1.5037521123886108, "learning_rate": 2e-05, "loss": 0.05595599, "step": 3880 }, { "epoch": 7.7620000000000005, "grad_norm": 1.328269362449646, "learning_rate": 2e-05, "loss": 0.03779745, "step": 3881 }, { "epoch": 7.764, "grad_norm": 1.3670969009399414, "learning_rate": 2e-05, "loss": 0.04492682, "step": 3882 }, { "epoch": 7.766, "grad_norm": 1.441186785697937, "learning_rate": 2e-05, "loss": 0.04883204, "step": 3883 }, { "epoch": 7.768, "grad_norm": 1.1069203615188599, "learning_rate": 2e-05, "loss": 0.03243296, "step": 3884 }, { "epoch": 7.77, "grad_norm": 1.1207300424575806, "learning_rate": 2e-05, "loss": 0.0498887, "step": 3885 }, { "epoch": 7.772, "grad_norm": 1.2130826711654663, "learning_rate": 2e-05, "loss": 0.03474583, "step": 3886 }, { "epoch": 7.774, "grad_norm": 1.099841833114624, "learning_rate": 2e-05, "loss": 0.04760575, "step": 3887 }, { "epoch": 7.776, "grad_norm": 1.401013731956482, "learning_rate": 2e-05, "loss": 0.04593216, "step": 3888 }, { "epoch": 7.7780000000000005, "grad_norm": 0.9707529544830322, "learning_rate": 2e-05, "loss": 0.03742143, "step": 3889 }, { "epoch": 7.78, "grad_norm": 1.4640308618545532, "learning_rate": 2e-05, "loss": 0.05783337, "step": 3890 }, { "epoch": 7.782, "grad_norm": 1.4882322549819946, "learning_rate": 2e-05, "loss": 0.0567808, "step": 3891 }, { "epoch": 7.784, "grad_norm": 1.206899642944336, "learning_rate": 2e-05, "loss": 0.0442479, "step": 3892 }, { "epoch": 7.786, "grad_norm": 1.2967495918273926, "learning_rate": 2e-05, "loss": 0.04167457, "step": 3893 }, { "epoch": 7.788, "grad_norm": 1.8896660804748535, "learning_rate": 2e-05, "loss": 0.04580644, "step": 3894 }, { "epoch": 7.79, "grad_norm": 2.820158004760742, "learning_rate": 2e-05, "loss": 0.0647872, "step": 3895 }, { "epoch": 7.792, "grad_norm": 2.5546226501464844, "learning_rate": 2e-05, "loss": 0.07351266, "step": 3896 }, { "epoch": 7.7940000000000005, "grad_norm": 2.217388868331909, "learning_rate": 2e-05, "loss": 0.03897737, "step": 3897 }, { "epoch": 7.796, "grad_norm": 1.7939376831054688, "learning_rate": 2e-05, "loss": 0.06104586, "step": 3898 }, { "epoch": 7.798, "grad_norm": 1.0939199924468994, "learning_rate": 2e-05, "loss": 0.03686985, "step": 3899 }, { "epoch": 7.8, "grad_norm": 1.504327416419983, "learning_rate": 2e-05, "loss": 0.05772873, "step": 3900 }, { "epoch": 7.802, "grad_norm": 1.402275562286377, "learning_rate": 2e-05, "loss": 0.05212471, "step": 3901 }, { "epoch": 7.804, "grad_norm": 1.3643393516540527, "learning_rate": 2e-05, "loss": 0.04227594, "step": 3902 }, { "epoch": 7.806, "grad_norm": 1.3468226194381714, "learning_rate": 2e-05, "loss": 0.04262753, "step": 3903 }, { "epoch": 7.808, "grad_norm": 1.6918001174926758, "learning_rate": 2e-05, "loss": 0.04145945, "step": 3904 }, { "epoch": 7.8100000000000005, "grad_norm": 1.0832053422927856, "learning_rate": 2e-05, "loss": 0.03417937, "step": 3905 }, { "epoch": 7.812, "grad_norm": 2.516186237335205, "learning_rate": 2e-05, "loss": 0.07497337, "step": 3906 }, { "epoch": 7.814, "grad_norm": 3.0091376304626465, "learning_rate": 2e-05, "loss": 0.05103327, "step": 3907 }, { "epoch": 7.816, "grad_norm": 1.4199695587158203, "learning_rate": 2e-05, "loss": 0.04106236, "step": 3908 }, { "epoch": 7.818, "grad_norm": 2.151015281677246, "learning_rate": 2e-05, "loss": 0.05017766, "step": 3909 }, { "epoch": 7.82, "grad_norm": 1.086867094039917, "learning_rate": 2e-05, "loss": 0.04102978, "step": 3910 }, { "epoch": 7.822, "grad_norm": 1.1980141401290894, "learning_rate": 2e-05, "loss": 0.04687583, "step": 3911 }, { "epoch": 7.824, "grad_norm": 1.7949175834655762, "learning_rate": 2e-05, "loss": 0.04523139, "step": 3912 }, { "epoch": 7.826, "grad_norm": 1.5633015632629395, "learning_rate": 2e-05, "loss": 0.0565387, "step": 3913 }, { "epoch": 7.828, "grad_norm": 1.9414931535720825, "learning_rate": 2e-05, "loss": 0.03930401, "step": 3914 }, { "epoch": 7.83, "grad_norm": 1.1001695394515991, "learning_rate": 2e-05, "loss": 0.03971751, "step": 3915 }, { "epoch": 7.832, "grad_norm": 1.6626832485198975, "learning_rate": 2e-05, "loss": 0.06389378, "step": 3916 }, { "epoch": 7.834, "grad_norm": 1.166463017463684, "learning_rate": 2e-05, "loss": 0.03996446, "step": 3917 }, { "epoch": 7.836, "grad_norm": 1.091837763786316, "learning_rate": 2e-05, "loss": 0.03501406, "step": 3918 }, { "epoch": 7.838, "grad_norm": 1.5070854425430298, "learning_rate": 2e-05, "loss": 0.04527948, "step": 3919 }, { "epoch": 7.84, "grad_norm": 2.192519187927246, "learning_rate": 2e-05, "loss": 0.05735584, "step": 3920 }, { "epoch": 7.842, "grad_norm": 3.073923349380493, "learning_rate": 2e-05, "loss": 0.06089948, "step": 3921 }, { "epoch": 7.844, "grad_norm": 2.4988508224487305, "learning_rate": 2e-05, "loss": 0.06159104, "step": 3922 }, { "epoch": 7.846, "grad_norm": 1.6986947059631348, "learning_rate": 2e-05, "loss": 0.06097404, "step": 3923 }, { "epoch": 7.848, "grad_norm": 1.2336899042129517, "learning_rate": 2e-05, "loss": 0.0403936, "step": 3924 }, { "epoch": 7.85, "grad_norm": 3.7979280948638916, "learning_rate": 2e-05, "loss": 0.05501074, "step": 3925 }, { "epoch": 7.852, "grad_norm": 1.620132565498352, "learning_rate": 2e-05, "loss": 0.05394637, "step": 3926 }, { "epoch": 7.854, "grad_norm": 1.056098222732544, "learning_rate": 2e-05, "loss": 0.02777421, "step": 3927 }, { "epoch": 7.856, "grad_norm": 1.5885734558105469, "learning_rate": 2e-05, "loss": 0.05239529, "step": 3928 }, { "epoch": 7.858, "grad_norm": 1.4689480066299438, "learning_rate": 2e-05, "loss": 0.05046121, "step": 3929 }, { "epoch": 7.86, "grad_norm": 2.288593053817749, "learning_rate": 2e-05, "loss": 0.04710715, "step": 3930 }, { "epoch": 7.862, "grad_norm": 1.4412630796432495, "learning_rate": 2e-05, "loss": 0.04159814, "step": 3931 }, { "epoch": 7.864, "grad_norm": 1.4466181993484497, "learning_rate": 2e-05, "loss": 0.03699116, "step": 3932 }, { "epoch": 7.866, "grad_norm": 3.294027328491211, "learning_rate": 2e-05, "loss": 0.05937823, "step": 3933 }, { "epoch": 7.868, "grad_norm": 2.1902272701263428, "learning_rate": 2e-05, "loss": 0.06694526, "step": 3934 }, { "epoch": 7.87, "grad_norm": 2.0902066230773926, "learning_rate": 2e-05, "loss": 0.04954188, "step": 3935 }, { "epoch": 7.872, "grad_norm": 1.534563422203064, "learning_rate": 2e-05, "loss": 0.05196944, "step": 3936 }, { "epoch": 7.874, "grad_norm": 1.241812825202942, "learning_rate": 2e-05, "loss": 0.04576861, "step": 3937 }, { "epoch": 7.876, "grad_norm": 2.344264507293701, "learning_rate": 2e-05, "loss": 0.04945996, "step": 3938 }, { "epoch": 7.878, "grad_norm": 3.0257856845855713, "learning_rate": 2e-05, "loss": 0.05597422, "step": 3939 }, { "epoch": 7.88, "grad_norm": 1.6022652387619019, "learning_rate": 2e-05, "loss": 0.05853411, "step": 3940 }, { "epoch": 7.882, "grad_norm": 1.321745753288269, "learning_rate": 2e-05, "loss": 0.05200193, "step": 3941 }, { "epoch": 7.884, "grad_norm": 1.6581352949142456, "learning_rate": 2e-05, "loss": 0.05210207, "step": 3942 }, { "epoch": 7.886, "grad_norm": 2.1290030479431152, "learning_rate": 2e-05, "loss": 0.04898274, "step": 3943 }, { "epoch": 7.888, "grad_norm": 1.2903791666030884, "learning_rate": 2e-05, "loss": 0.03680386, "step": 3944 }, { "epoch": 7.89, "grad_norm": 1.1676714420318604, "learning_rate": 2e-05, "loss": 0.03573669, "step": 3945 }, { "epoch": 7.892, "grad_norm": 3.1287758350372314, "learning_rate": 2e-05, "loss": 0.05708075, "step": 3946 }, { "epoch": 7.894, "grad_norm": 1.6967209577560425, "learning_rate": 2e-05, "loss": 0.0523774, "step": 3947 }, { "epoch": 7.896, "grad_norm": 1.3352283239364624, "learning_rate": 2e-05, "loss": 0.05093125, "step": 3948 }, { "epoch": 7.898, "grad_norm": 1.4991921186447144, "learning_rate": 2e-05, "loss": 0.04498458, "step": 3949 }, { "epoch": 7.9, "grad_norm": 1.7871036529541016, "learning_rate": 2e-05, "loss": 0.05441068, "step": 3950 }, { "epoch": 7.902, "grad_norm": 1.8313108682632446, "learning_rate": 2e-05, "loss": 0.04300974, "step": 3951 }, { "epoch": 7.904, "grad_norm": 2.545856475830078, "learning_rate": 2e-05, "loss": 0.0370265, "step": 3952 }, { "epoch": 7.906, "grad_norm": 1.4351707696914673, "learning_rate": 2e-05, "loss": 0.04847009, "step": 3953 }, { "epoch": 7.908, "grad_norm": 1.5962963104248047, "learning_rate": 2e-05, "loss": 0.05077235, "step": 3954 }, { "epoch": 7.91, "grad_norm": 1.3633484840393066, "learning_rate": 2e-05, "loss": 0.04732682, "step": 3955 }, { "epoch": 7.912, "grad_norm": 1.209276556968689, "learning_rate": 2e-05, "loss": 0.03878414, "step": 3956 }, { "epoch": 7.914, "grad_norm": 1.957251787185669, "learning_rate": 2e-05, "loss": 0.07924426, "step": 3957 }, { "epoch": 7.916, "grad_norm": 3.1658568382263184, "learning_rate": 2e-05, "loss": 0.05959687, "step": 3958 }, { "epoch": 7.918, "grad_norm": 0.9807082414627075, "learning_rate": 2e-05, "loss": 0.03472539, "step": 3959 }, { "epoch": 7.92, "grad_norm": 2.003063917160034, "learning_rate": 2e-05, "loss": 0.05765334, "step": 3960 }, { "epoch": 7.922, "grad_norm": 1.3743501901626587, "learning_rate": 2e-05, "loss": 0.048442, "step": 3961 }, { "epoch": 7.924, "grad_norm": 1.9723905324935913, "learning_rate": 2e-05, "loss": 0.06652151, "step": 3962 }, { "epoch": 7.926, "grad_norm": 1.4909707307815552, "learning_rate": 2e-05, "loss": 0.03205038, "step": 3963 }, { "epoch": 7.928, "grad_norm": 1.1835213899612427, "learning_rate": 2e-05, "loss": 0.03661142, "step": 3964 }, { "epoch": 7.93, "grad_norm": 1.435006856918335, "learning_rate": 2e-05, "loss": 0.05666805, "step": 3965 }, { "epoch": 7.932, "grad_norm": 2.22232723236084, "learning_rate": 2e-05, "loss": 0.03182552, "step": 3966 }, { "epoch": 7.934, "grad_norm": 1.0161634683609009, "learning_rate": 2e-05, "loss": 0.03687178, "step": 3967 }, { "epoch": 7.936, "grad_norm": 1.145479679107666, "learning_rate": 2e-05, "loss": 0.04139556, "step": 3968 }, { "epoch": 7.938, "grad_norm": 1.3257615566253662, "learning_rate": 2e-05, "loss": 0.03712116, "step": 3969 }, { "epoch": 7.9399999999999995, "grad_norm": 1.5132187604904175, "learning_rate": 2e-05, "loss": 0.0525584, "step": 3970 }, { "epoch": 7.942, "grad_norm": 1.7809244394302368, "learning_rate": 2e-05, "loss": 0.05465652, "step": 3971 }, { "epoch": 7.944, "grad_norm": 1.6416901350021362, "learning_rate": 2e-05, "loss": 0.05641666, "step": 3972 }, { "epoch": 7.946, "grad_norm": 1.5238450765609741, "learning_rate": 2e-05, "loss": 0.03642607, "step": 3973 }, { "epoch": 7.948, "grad_norm": 2.732297420501709, "learning_rate": 2e-05, "loss": 0.05012231, "step": 3974 }, { "epoch": 7.95, "grad_norm": 1.3529413938522339, "learning_rate": 2e-05, "loss": 0.03961353, "step": 3975 }, { "epoch": 7.952, "grad_norm": 3.372781753540039, "learning_rate": 2e-05, "loss": 0.04853031, "step": 3976 }, { "epoch": 7.954, "grad_norm": 2.362414836883545, "learning_rate": 2e-05, "loss": 0.0558218, "step": 3977 }, { "epoch": 7.9559999999999995, "grad_norm": 1.9768744707107544, "learning_rate": 2e-05, "loss": 0.04110438, "step": 3978 }, { "epoch": 7.958, "grad_norm": 1.477582335472107, "learning_rate": 2e-05, "loss": 0.04463062, "step": 3979 }, { "epoch": 7.96, "grad_norm": 1.62968111038208, "learning_rate": 2e-05, "loss": 0.04137983, "step": 3980 }, { "epoch": 7.962, "grad_norm": 2.0086047649383545, "learning_rate": 2e-05, "loss": 0.05147458, "step": 3981 }, { "epoch": 7.964, "grad_norm": 2.941561222076416, "learning_rate": 2e-05, "loss": 0.05373877, "step": 3982 }, { "epoch": 7.966, "grad_norm": 1.2433993816375732, "learning_rate": 2e-05, "loss": 0.03581764, "step": 3983 }, { "epoch": 7.968, "grad_norm": 1.5138049125671387, "learning_rate": 2e-05, "loss": 0.04050705, "step": 3984 }, { "epoch": 7.97, "grad_norm": 1.289192795753479, "learning_rate": 2e-05, "loss": 0.04422689, "step": 3985 }, { "epoch": 7.9719999999999995, "grad_norm": 1.3445667028427124, "learning_rate": 2e-05, "loss": 0.03943577, "step": 3986 }, { "epoch": 7.974, "grad_norm": 1.3681025505065918, "learning_rate": 2e-05, "loss": 0.04766712, "step": 3987 }, { "epoch": 7.976, "grad_norm": 1.488904356956482, "learning_rate": 2e-05, "loss": 0.04435727, "step": 3988 }, { "epoch": 7.978, "grad_norm": 1.3543517589569092, "learning_rate": 2e-05, "loss": 0.05017151, "step": 3989 }, { "epoch": 7.98, "grad_norm": 1.1544119119644165, "learning_rate": 2e-05, "loss": 0.04686942, "step": 3990 }, { "epoch": 7.982, "grad_norm": 1.188092589378357, "learning_rate": 2e-05, "loss": 0.0426802, "step": 3991 }, { "epoch": 7.984, "grad_norm": 1.4243437051773071, "learning_rate": 2e-05, "loss": 0.06280725, "step": 3992 }, { "epoch": 7.986, "grad_norm": 1.6010160446166992, "learning_rate": 2e-05, "loss": 0.0427033, "step": 3993 }, { "epoch": 7.9879999999999995, "grad_norm": 1.1920783519744873, "learning_rate": 2e-05, "loss": 0.04067779, "step": 3994 }, { "epoch": 7.99, "grad_norm": 1.6925147771835327, "learning_rate": 2e-05, "loss": 0.05137225, "step": 3995 }, { "epoch": 7.992, "grad_norm": 1.1469167470932007, "learning_rate": 2e-05, "loss": 0.0329942, "step": 3996 }, { "epoch": 7.994, "grad_norm": 1.2153685092926025, "learning_rate": 2e-05, "loss": 0.0425185, "step": 3997 }, { "epoch": 7.996, "grad_norm": 1.2573164701461792, "learning_rate": 2e-05, "loss": 0.0418978, "step": 3998 }, { "epoch": 7.998, "grad_norm": 1.892161250114441, "learning_rate": 2e-05, "loss": 0.04587626, "step": 3999 }, { "epoch": 8.0, "grad_norm": 1.3999619483947754, "learning_rate": 2e-05, "loss": 0.0356279, "step": 4000 }, { "epoch": 8.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.994, "AngleClassification_3": 0.9241516966067864, "Equal_1": 0.992, "Equal_2": 0.9421157684630739, "Equal_3": 0.8323353293413174, "LineComparison_1": 0.996, "LineComparison_2": 0.9960079840319361, "LineComparison_3": 0.9760479041916168, "Parallel_1": 0.9839679358717435, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.988, "Perpendicular_1": 0.98, "Perpendicular_2": 0.838, "Perpendicular_3": 0.5160320641282565, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9996666666666667, "PointLiesOnCircle_3": 0.9848666666666667, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.8702594810379242 }, "eval_runtime": 320.1064, "eval_samples_per_second": 32.802, "eval_steps_per_second": 0.656, "step": 4000 }, { "epoch": 8.002, "grad_norm": 1.324432611465454, "learning_rate": 2e-05, "loss": 0.05207051, "step": 4001 }, { "epoch": 8.004, "grad_norm": 1.0732799768447876, "learning_rate": 2e-05, "loss": 0.0246641, "step": 4002 }, { "epoch": 8.006, "grad_norm": 2.0701658725738525, "learning_rate": 2e-05, "loss": 0.04946818, "step": 4003 }, { "epoch": 8.008, "grad_norm": 1.8197208642959595, "learning_rate": 2e-05, "loss": 0.04853348, "step": 4004 }, { "epoch": 8.01, "grad_norm": 1.2051986455917358, "learning_rate": 2e-05, "loss": 0.03389344, "step": 4005 }, { "epoch": 8.012, "grad_norm": 1.289626121520996, "learning_rate": 2e-05, "loss": 0.04067516, "step": 4006 }, { "epoch": 8.014, "grad_norm": 3.4996466636657715, "learning_rate": 2e-05, "loss": 0.07795639, "step": 4007 }, { "epoch": 8.016, "grad_norm": 1.8532557487487793, "learning_rate": 2e-05, "loss": 0.04180763, "step": 4008 }, { "epoch": 8.018, "grad_norm": 1.4671931266784668, "learning_rate": 2e-05, "loss": 0.04495455, "step": 4009 }, { "epoch": 8.02, "grad_norm": 1.7721261978149414, "learning_rate": 2e-05, "loss": 0.04306563, "step": 4010 }, { "epoch": 8.022, "grad_norm": 1.5724709033966064, "learning_rate": 2e-05, "loss": 0.03561451, "step": 4011 }, { "epoch": 8.024, "grad_norm": 1.0541595220565796, "learning_rate": 2e-05, "loss": 0.03057979, "step": 4012 }, { "epoch": 8.026, "grad_norm": 1.8422211408615112, "learning_rate": 2e-05, "loss": 0.05177515, "step": 4013 }, { "epoch": 8.028, "grad_norm": 1.4579343795776367, "learning_rate": 2e-05, "loss": 0.04849966, "step": 4014 }, { "epoch": 8.03, "grad_norm": 1.6735895872116089, "learning_rate": 2e-05, "loss": 0.03308116, "step": 4015 }, { "epoch": 8.032, "grad_norm": 1.21261465549469, "learning_rate": 2e-05, "loss": 0.03874951, "step": 4016 }, { "epoch": 8.034, "grad_norm": 3.218160390853882, "learning_rate": 2e-05, "loss": 0.03598247, "step": 4017 }, { "epoch": 8.036, "grad_norm": 1.264754056930542, "learning_rate": 2e-05, "loss": 0.03381562, "step": 4018 }, { "epoch": 8.038, "grad_norm": 2.2951626777648926, "learning_rate": 2e-05, "loss": 0.05828879, "step": 4019 }, { "epoch": 8.04, "grad_norm": 1.1480064392089844, "learning_rate": 2e-05, "loss": 0.03656226, "step": 4020 }, { "epoch": 8.042, "grad_norm": 1.7028883695602417, "learning_rate": 2e-05, "loss": 0.0495864, "step": 4021 }, { "epoch": 8.044, "grad_norm": 2.4957919120788574, "learning_rate": 2e-05, "loss": 0.06445777, "step": 4022 }, { "epoch": 8.046, "grad_norm": 2.0943565368652344, "learning_rate": 2e-05, "loss": 0.05261732, "step": 4023 }, { "epoch": 8.048, "grad_norm": 2.1373465061187744, "learning_rate": 2e-05, "loss": 0.06218139, "step": 4024 }, { "epoch": 8.05, "grad_norm": 1.4479154348373413, "learning_rate": 2e-05, "loss": 0.05500597, "step": 4025 }, { "epoch": 8.052, "grad_norm": 1.107658863067627, "learning_rate": 2e-05, "loss": 0.04318401, "step": 4026 }, { "epoch": 8.054, "grad_norm": 1.5696206092834473, "learning_rate": 2e-05, "loss": 0.04423702, "step": 4027 }, { "epoch": 8.056, "grad_norm": 2.1380436420440674, "learning_rate": 2e-05, "loss": 0.04484853, "step": 4028 }, { "epoch": 8.058, "grad_norm": 1.4376975297927856, "learning_rate": 2e-05, "loss": 0.03389053, "step": 4029 }, { "epoch": 8.06, "grad_norm": 1.9726535081863403, "learning_rate": 2e-05, "loss": 0.05158387, "step": 4030 }, { "epoch": 8.062, "grad_norm": 1.9487265348434448, "learning_rate": 2e-05, "loss": 0.05221076, "step": 4031 }, { "epoch": 8.064, "grad_norm": 1.4403632879257202, "learning_rate": 2e-05, "loss": 0.04184157, "step": 4032 }, { "epoch": 8.066, "grad_norm": 1.2656970024108887, "learning_rate": 2e-05, "loss": 0.04085121, "step": 4033 }, { "epoch": 8.068, "grad_norm": 1.3084996938705444, "learning_rate": 2e-05, "loss": 0.03328889, "step": 4034 }, { "epoch": 8.07, "grad_norm": 1.760668396949768, "learning_rate": 2e-05, "loss": 0.04869121, "step": 4035 }, { "epoch": 8.072, "grad_norm": 1.6372207403182983, "learning_rate": 2e-05, "loss": 0.04539852, "step": 4036 }, { "epoch": 8.074, "grad_norm": 1.2221434116363525, "learning_rate": 2e-05, "loss": 0.03623684, "step": 4037 }, { "epoch": 8.076, "grad_norm": 1.080573558807373, "learning_rate": 2e-05, "loss": 0.03736069, "step": 4038 }, { "epoch": 8.078, "grad_norm": 1.135930061340332, "learning_rate": 2e-05, "loss": 0.02960359, "step": 4039 }, { "epoch": 8.08, "grad_norm": 1.2680519819259644, "learning_rate": 2e-05, "loss": 0.04433823, "step": 4040 }, { "epoch": 8.082, "grad_norm": 1.3253871202468872, "learning_rate": 2e-05, "loss": 0.03741252, "step": 4041 }, { "epoch": 8.084, "grad_norm": 1.1967227458953857, "learning_rate": 2e-05, "loss": 0.03461279, "step": 4042 }, { "epoch": 8.086, "grad_norm": 1.4370568990707397, "learning_rate": 2e-05, "loss": 0.05240814, "step": 4043 }, { "epoch": 8.088, "grad_norm": 2.1426353454589844, "learning_rate": 2e-05, "loss": 0.04719769, "step": 4044 }, { "epoch": 8.09, "grad_norm": 1.9252382516860962, "learning_rate": 2e-05, "loss": 0.05830365, "step": 4045 }, { "epoch": 8.092, "grad_norm": 1.4431771039962769, "learning_rate": 2e-05, "loss": 0.05209182, "step": 4046 }, { "epoch": 8.094, "grad_norm": 1.4242539405822754, "learning_rate": 2e-05, "loss": 0.04281589, "step": 4047 }, { "epoch": 8.096, "grad_norm": 2.547609567642212, "learning_rate": 2e-05, "loss": 0.04535336, "step": 4048 }, { "epoch": 8.098, "grad_norm": 5.5258965492248535, "learning_rate": 2e-05, "loss": 0.06601989, "step": 4049 }, { "epoch": 8.1, "grad_norm": 1.6703463792800903, "learning_rate": 2e-05, "loss": 0.05124013, "step": 4050 }, { "epoch": 8.102, "grad_norm": 1.6066875457763672, "learning_rate": 2e-05, "loss": 0.05647477, "step": 4051 }, { "epoch": 8.104, "grad_norm": 1.0216000080108643, "learning_rate": 2e-05, "loss": 0.03139941, "step": 4052 }, { "epoch": 8.106, "grad_norm": 1.099047303199768, "learning_rate": 2e-05, "loss": 0.0338769, "step": 4053 }, { "epoch": 8.108, "grad_norm": 1.3956516981124878, "learning_rate": 2e-05, "loss": 0.04579592, "step": 4054 }, { "epoch": 8.11, "grad_norm": 1.361737608909607, "learning_rate": 2e-05, "loss": 0.04546849, "step": 4055 }, { "epoch": 8.112, "grad_norm": 1.3461675643920898, "learning_rate": 2e-05, "loss": 0.05312871, "step": 4056 }, { "epoch": 8.114, "grad_norm": 1.6900047063827515, "learning_rate": 2e-05, "loss": 0.03799757, "step": 4057 }, { "epoch": 8.116, "grad_norm": 1.488451361656189, "learning_rate": 2e-05, "loss": 0.04809422, "step": 4058 }, { "epoch": 8.118, "grad_norm": 1.6791332960128784, "learning_rate": 2e-05, "loss": 0.05416831, "step": 4059 }, { "epoch": 8.12, "grad_norm": 1.0899131298065186, "learning_rate": 2e-05, "loss": 0.03874314, "step": 4060 }, { "epoch": 8.122, "grad_norm": 1.2762742042541504, "learning_rate": 2e-05, "loss": 0.04447027, "step": 4061 }, { "epoch": 8.124, "grad_norm": 1.5424968004226685, "learning_rate": 2e-05, "loss": 0.04579141, "step": 4062 }, { "epoch": 8.126, "grad_norm": 1.8162665367126465, "learning_rate": 2e-05, "loss": 0.04598666, "step": 4063 }, { "epoch": 8.128, "grad_norm": 1.428122639656067, "learning_rate": 2e-05, "loss": 0.05174505, "step": 4064 }, { "epoch": 8.13, "grad_norm": 1.538621187210083, "learning_rate": 2e-05, "loss": 0.03250303, "step": 4065 }, { "epoch": 8.132, "grad_norm": 1.3809401988983154, "learning_rate": 2e-05, "loss": 0.03839519, "step": 4066 }, { "epoch": 8.134, "grad_norm": 1.4454106092453003, "learning_rate": 2e-05, "loss": 0.0449599, "step": 4067 }, { "epoch": 8.136, "grad_norm": 1.3159528970718384, "learning_rate": 2e-05, "loss": 0.04408343, "step": 4068 }, { "epoch": 8.138, "grad_norm": 1.5110152959823608, "learning_rate": 2e-05, "loss": 0.03739053, "step": 4069 }, { "epoch": 8.14, "grad_norm": 1.500827431678772, "learning_rate": 2e-05, "loss": 0.04702216, "step": 4070 }, { "epoch": 8.142, "grad_norm": 1.5902290344238281, "learning_rate": 2e-05, "loss": 0.04143539, "step": 4071 }, { "epoch": 8.144, "grad_norm": 1.9934486150741577, "learning_rate": 2e-05, "loss": 0.03529008, "step": 4072 }, { "epoch": 8.146, "grad_norm": 1.8927077054977417, "learning_rate": 2e-05, "loss": 0.04800845, "step": 4073 }, { "epoch": 8.148, "grad_norm": 1.6216754913330078, "learning_rate": 2e-05, "loss": 0.05250569, "step": 4074 }, { "epoch": 8.15, "grad_norm": 1.7913944721221924, "learning_rate": 2e-05, "loss": 0.04616375, "step": 4075 }, { "epoch": 8.152, "grad_norm": 1.5655014514923096, "learning_rate": 2e-05, "loss": 0.05108138, "step": 4076 }, { "epoch": 8.154, "grad_norm": 1.6830389499664307, "learning_rate": 2e-05, "loss": 0.05072855, "step": 4077 }, { "epoch": 8.156, "grad_norm": 1.5033944845199585, "learning_rate": 2e-05, "loss": 0.04459769, "step": 4078 }, { "epoch": 8.158, "grad_norm": 1.5763124227523804, "learning_rate": 2e-05, "loss": 0.05702509, "step": 4079 }, { "epoch": 8.16, "grad_norm": 1.5542982816696167, "learning_rate": 2e-05, "loss": 0.05888487, "step": 4080 }, { "epoch": 8.162, "grad_norm": 1.8763879537582397, "learning_rate": 2e-05, "loss": 0.03832364, "step": 4081 }, { "epoch": 8.164, "grad_norm": 2.169970989227295, "learning_rate": 2e-05, "loss": 0.06408481, "step": 4082 }, { "epoch": 8.166, "grad_norm": 1.4158964157104492, "learning_rate": 2e-05, "loss": 0.0494574, "step": 4083 }, { "epoch": 8.168, "grad_norm": 1.2305322885513306, "learning_rate": 2e-05, "loss": 0.04329508, "step": 4084 }, { "epoch": 8.17, "grad_norm": 2.0390779972076416, "learning_rate": 2e-05, "loss": 0.03159539, "step": 4085 }, { "epoch": 8.172, "grad_norm": 1.616227149963379, "learning_rate": 2e-05, "loss": 0.08061463, "step": 4086 }, { "epoch": 8.174, "grad_norm": 2.116055965423584, "learning_rate": 2e-05, "loss": 0.05509023, "step": 4087 }, { "epoch": 8.176, "grad_norm": 1.04578697681427, "learning_rate": 2e-05, "loss": 0.03397291, "step": 4088 }, { "epoch": 8.178, "grad_norm": 1.4719500541687012, "learning_rate": 2e-05, "loss": 0.05373418, "step": 4089 }, { "epoch": 8.18, "grad_norm": 1.9806996583938599, "learning_rate": 2e-05, "loss": 0.07111174, "step": 4090 }, { "epoch": 8.182, "grad_norm": 1.30508291721344, "learning_rate": 2e-05, "loss": 0.04480116, "step": 4091 }, { "epoch": 8.184, "grad_norm": 0.9421553015708923, "learning_rate": 2e-05, "loss": 0.03321157, "step": 4092 }, { "epoch": 8.186, "grad_norm": 1.0021604299545288, "learning_rate": 2e-05, "loss": 0.04082393, "step": 4093 }, { "epoch": 8.188, "grad_norm": 1.6946673393249512, "learning_rate": 2e-05, "loss": 0.05384293, "step": 4094 }, { "epoch": 8.19, "grad_norm": 1.0426790714263916, "learning_rate": 2e-05, "loss": 0.03759111, "step": 4095 }, { "epoch": 8.192, "grad_norm": 2.0568313598632812, "learning_rate": 2e-05, "loss": 0.05517369, "step": 4096 }, { "epoch": 8.194, "grad_norm": 1.5960932970046997, "learning_rate": 2e-05, "loss": 0.06000062, "step": 4097 }, { "epoch": 8.196, "grad_norm": 1.7500308752059937, "learning_rate": 2e-05, "loss": 0.05646428, "step": 4098 }, { "epoch": 8.198, "grad_norm": 1.4026926755905151, "learning_rate": 2e-05, "loss": 0.03786077, "step": 4099 }, { "epoch": 8.2, "grad_norm": 1.0574769973754883, "learning_rate": 2e-05, "loss": 0.03918152, "step": 4100 }, { "epoch": 8.202, "grad_norm": 1.5338131189346313, "learning_rate": 2e-05, "loss": 0.04909813, "step": 4101 }, { "epoch": 8.204, "grad_norm": 1.2247587442398071, "learning_rate": 2e-05, "loss": 0.03550359, "step": 4102 }, { "epoch": 8.206, "grad_norm": 0.9067367315292358, "learning_rate": 2e-05, "loss": 0.02538646, "step": 4103 }, { "epoch": 8.208, "grad_norm": 1.0645413398742676, "learning_rate": 2e-05, "loss": 0.02908769, "step": 4104 }, { "epoch": 8.21, "grad_norm": 1.0075726509094238, "learning_rate": 2e-05, "loss": 0.03003458, "step": 4105 }, { "epoch": 8.212, "grad_norm": 1.2275352478027344, "learning_rate": 2e-05, "loss": 0.051549, "step": 4106 }, { "epoch": 8.214, "grad_norm": 1.427625298500061, "learning_rate": 2e-05, "loss": 0.04538945, "step": 4107 }, { "epoch": 8.216, "grad_norm": 1.29180908203125, "learning_rate": 2e-05, "loss": 0.04527157, "step": 4108 }, { "epoch": 8.218, "grad_norm": 1.678622841835022, "learning_rate": 2e-05, "loss": 0.03252411, "step": 4109 }, { "epoch": 8.22, "grad_norm": 2.289098024368286, "learning_rate": 2e-05, "loss": 0.0732346, "step": 4110 }, { "epoch": 8.222, "grad_norm": 1.325034499168396, "learning_rate": 2e-05, "loss": 0.05234051, "step": 4111 }, { "epoch": 8.224, "grad_norm": 1.978644847869873, "learning_rate": 2e-05, "loss": 0.05532134, "step": 4112 }, { "epoch": 8.226, "grad_norm": 1.7182114124298096, "learning_rate": 2e-05, "loss": 0.0578098, "step": 4113 }, { "epoch": 8.228, "grad_norm": 1.4846113920211792, "learning_rate": 2e-05, "loss": 0.04360331, "step": 4114 }, { "epoch": 8.23, "grad_norm": 1.358889102935791, "learning_rate": 2e-05, "loss": 0.05047805, "step": 4115 }, { "epoch": 8.232, "grad_norm": 1.1359084844589233, "learning_rate": 2e-05, "loss": 0.04568867, "step": 4116 }, { "epoch": 8.234, "grad_norm": 1.419180154800415, "learning_rate": 2e-05, "loss": 0.03961842, "step": 4117 }, { "epoch": 8.236, "grad_norm": 1.3813687562942505, "learning_rate": 2e-05, "loss": 0.05298204, "step": 4118 }, { "epoch": 8.238, "grad_norm": 1.8170478343963623, "learning_rate": 2e-05, "loss": 0.04838009, "step": 4119 }, { "epoch": 8.24, "grad_norm": 2.8637313842773438, "learning_rate": 2e-05, "loss": 0.06887215, "step": 4120 }, { "epoch": 8.242, "grad_norm": 1.631001591682434, "learning_rate": 2e-05, "loss": 0.04441953, "step": 4121 }, { "epoch": 8.244, "grad_norm": 1.4597910642623901, "learning_rate": 2e-05, "loss": 0.04484698, "step": 4122 }, { "epoch": 8.246, "grad_norm": 0.9799979329109192, "learning_rate": 2e-05, "loss": 0.02647844, "step": 4123 }, { "epoch": 8.248, "grad_norm": 1.7152873277664185, "learning_rate": 2e-05, "loss": 0.06567133, "step": 4124 }, { "epoch": 8.25, "grad_norm": 1.425673007965088, "learning_rate": 2e-05, "loss": 0.04011939, "step": 4125 }, { "epoch": 8.252, "grad_norm": 2.331421375274658, "learning_rate": 2e-05, "loss": 0.06116243, "step": 4126 }, { "epoch": 8.254, "grad_norm": 3.033942461013794, "learning_rate": 2e-05, "loss": 0.04823023, "step": 4127 }, { "epoch": 8.256, "grad_norm": 1.2400274276733398, "learning_rate": 2e-05, "loss": 0.03877744, "step": 4128 }, { "epoch": 8.258, "grad_norm": 1.145862340927124, "learning_rate": 2e-05, "loss": 0.04107977, "step": 4129 }, { "epoch": 8.26, "grad_norm": 1.4702391624450684, "learning_rate": 2e-05, "loss": 0.06288863, "step": 4130 }, { "epoch": 8.262, "grad_norm": 1.4819300174713135, "learning_rate": 2e-05, "loss": 0.05648782, "step": 4131 }, { "epoch": 8.264, "grad_norm": 1.4082889556884766, "learning_rate": 2e-05, "loss": 0.05002226, "step": 4132 }, { "epoch": 8.266, "grad_norm": 1.2530863285064697, "learning_rate": 2e-05, "loss": 0.04050343, "step": 4133 }, { "epoch": 8.268, "grad_norm": 1.1162209510803223, "learning_rate": 2e-05, "loss": 0.03798089, "step": 4134 }, { "epoch": 8.27, "grad_norm": 1.6518158912658691, "learning_rate": 2e-05, "loss": 0.04391007, "step": 4135 }, { "epoch": 8.272, "grad_norm": 1.1009466648101807, "learning_rate": 2e-05, "loss": 0.04458437, "step": 4136 }, { "epoch": 8.274000000000001, "grad_norm": 1.3596596717834473, "learning_rate": 2e-05, "loss": 0.0591647, "step": 4137 }, { "epoch": 8.276, "grad_norm": 1.1786128282546997, "learning_rate": 2e-05, "loss": 0.03626633, "step": 4138 }, { "epoch": 8.278, "grad_norm": 1.3034263849258423, "learning_rate": 2e-05, "loss": 0.03502483, "step": 4139 }, { "epoch": 8.28, "grad_norm": 1.229113221168518, "learning_rate": 2e-05, "loss": 0.05113623, "step": 4140 }, { "epoch": 8.282, "grad_norm": 1.813858985900879, "learning_rate": 2e-05, "loss": 0.04342901, "step": 4141 }, { "epoch": 8.284, "grad_norm": 1.5317081212997437, "learning_rate": 2e-05, "loss": 0.05725561, "step": 4142 }, { "epoch": 8.286, "grad_norm": 1.4534281492233276, "learning_rate": 2e-05, "loss": 0.04506959, "step": 4143 }, { "epoch": 8.288, "grad_norm": 1.4005870819091797, "learning_rate": 2e-05, "loss": 0.05307934, "step": 4144 }, { "epoch": 8.29, "grad_norm": 1.480741262435913, "learning_rate": 2e-05, "loss": 0.0603156, "step": 4145 }, { "epoch": 8.292, "grad_norm": 1.7137929201126099, "learning_rate": 2e-05, "loss": 0.06312002, "step": 4146 }, { "epoch": 8.294, "grad_norm": 1.4770913124084473, "learning_rate": 2e-05, "loss": 0.05373389, "step": 4147 }, { "epoch": 8.296, "grad_norm": 1.1751221418380737, "learning_rate": 2e-05, "loss": 0.03505269, "step": 4148 }, { "epoch": 8.298, "grad_norm": 1.420811653137207, "learning_rate": 2e-05, "loss": 0.03696169, "step": 4149 }, { "epoch": 8.3, "grad_norm": 1.4480655193328857, "learning_rate": 2e-05, "loss": 0.04662473, "step": 4150 }, { "epoch": 8.302, "grad_norm": 2.5621485710144043, "learning_rate": 2e-05, "loss": 0.04032537, "step": 4151 }, { "epoch": 8.304, "grad_norm": 1.8456149101257324, "learning_rate": 2e-05, "loss": 0.05323803, "step": 4152 }, { "epoch": 8.306, "grad_norm": 1.150632619857788, "learning_rate": 2e-05, "loss": 0.03429832, "step": 4153 }, { "epoch": 8.308, "grad_norm": 1.3768160343170166, "learning_rate": 2e-05, "loss": 0.04700703, "step": 4154 }, { "epoch": 8.31, "grad_norm": 3.1976876258850098, "learning_rate": 2e-05, "loss": 0.0506537, "step": 4155 }, { "epoch": 8.312, "grad_norm": 1.144446611404419, "learning_rate": 2e-05, "loss": 0.04074134, "step": 4156 }, { "epoch": 8.314, "grad_norm": 1.0326695442199707, "learning_rate": 2e-05, "loss": 0.03758462, "step": 4157 }, { "epoch": 8.316, "grad_norm": 1.5255597829818726, "learning_rate": 2e-05, "loss": 0.05389597, "step": 4158 }, { "epoch": 8.318, "grad_norm": 1.6066386699676514, "learning_rate": 2e-05, "loss": 0.04561181, "step": 4159 }, { "epoch": 8.32, "grad_norm": 1.2478289604187012, "learning_rate": 2e-05, "loss": 0.05142663, "step": 4160 }, { "epoch": 8.322, "grad_norm": 1.8503996133804321, "learning_rate": 2e-05, "loss": 0.04205081, "step": 4161 }, { "epoch": 8.324, "grad_norm": 2.421180486679077, "learning_rate": 2e-05, "loss": 0.03882204, "step": 4162 }, { "epoch": 8.326, "grad_norm": 1.386807918548584, "learning_rate": 2e-05, "loss": 0.05854031, "step": 4163 }, { "epoch": 8.328, "grad_norm": 2.2929842472076416, "learning_rate": 2e-05, "loss": 0.04718957, "step": 4164 }, { "epoch": 8.33, "grad_norm": 0.9168434739112854, "learning_rate": 2e-05, "loss": 0.02453806, "step": 4165 }, { "epoch": 8.332, "grad_norm": 6.381651401519775, "learning_rate": 2e-05, "loss": 0.06762838, "step": 4166 }, { "epoch": 8.334, "grad_norm": 1.249552607536316, "learning_rate": 2e-05, "loss": 0.0523265, "step": 4167 }, { "epoch": 8.336, "grad_norm": 1.3933881521224976, "learning_rate": 2e-05, "loss": 0.04636804, "step": 4168 }, { "epoch": 8.338, "grad_norm": 1.6516380310058594, "learning_rate": 2e-05, "loss": 0.04308794, "step": 4169 }, { "epoch": 8.34, "grad_norm": 1.5074326992034912, "learning_rate": 2e-05, "loss": 0.03881112, "step": 4170 }, { "epoch": 8.342, "grad_norm": 1.099740743637085, "learning_rate": 2e-05, "loss": 0.04978018, "step": 4171 }, { "epoch": 8.344, "grad_norm": 1.2606548070907593, "learning_rate": 2e-05, "loss": 0.04098994, "step": 4172 }, { "epoch": 8.346, "grad_norm": 1.293941617012024, "learning_rate": 2e-05, "loss": 0.04166168, "step": 4173 }, { "epoch": 8.348, "grad_norm": 1.2298349142074585, "learning_rate": 2e-05, "loss": 0.04704496, "step": 4174 }, { "epoch": 8.35, "grad_norm": 1.1148796081542969, "learning_rate": 2e-05, "loss": 0.04025009, "step": 4175 }, { "epoch": 8.352, "grad_norm": 1.6456156969070435, "learning_rate": 2e-05, "loss": 0.04465715, "step": 4176 }, { "epoch": 8.354, "grad_norm": 1.3357329368591309, "learning_rate": 2e-05, "loss": 0.04234711, "step": 4177 }, { "epoch": 8.356, "grad_norm": 1.0840578079223633, "learning_rate": 2e-05, "loss": 0.0413487, "step": 4178 }, { "epoch": 8.358, "grad_norm": 1.1986706256866455, "learning_rate": 2e-05, "loss": 0.04364356, "step": 4179 }, { "epoch": 8.36, "grad_norm": 1.2560433149337769, "learning_rate": 2e-05, "loss": 0.04892932, "step": 4180 }, { "epoch": 8.362, "grad_norm": 1.2587891817092896, "learning_rate": 2e-05, "loss": 0.04414427, "step": 4181 }, { "epoch": 8.364, "grad_norm": 1.3355039358139038, "learning_rate": 2e-05, "loss": 0.05022869, "step": 4182 }, { "epoch": 8.366, "grad_norm": 1.277706503868103, "learning_rate": 2e-05, "loss": 0.04096631, "step": 4183 }, { "epoch": 8.368, "grad_norm": 1.781320333480835, "learning_rate": 2e-05, "loss": 0.0590826, "step": 4184 }, { "epoch": 8.37, "grad_norm": 1.9013862609863281, "learning_rate": 2e-05, "loss": 0.04787229, "step": 4185 }, { "epoch": 8.372, "grad_norm": 1.6217079162597656, "learning_rate": 2e-05, "loss": 0.05203458, "step": 4186 }, { "epoch": 8.374, "grad_norm": 1.394450068473816, "learning_rate": 2e-05, "loss": 0.05752437, "step": 4187 }, { "epoch": 8.376, "grad_norm": 0.8604865670204163, "learning_rate": 2e-05, "loss": 0.02740677, "step": 4188 }, { "epoch": 8.378, "grad_norm": 1.4956645965576172, "learning_rate": 2e-05, "loss": 0.0441338, "step": 4189 }, { "epoch": 8.38, "grad_norm": 1.9489855766296387, "learning_rate": 2e-05, "loss": 0.07236366, "step": 4190 }, { "epoch": 8.382, "grad_norm": 1.3766798973083496, "learning_rate": 2e-05, "loss": 0.04797775, "step": 4191 }, { "epoch": 8.384, "grad_norm": 1.082892894744873, "learning_rate": 2e-05, "loss": 0.04361612, "step": 4192 }, { "epoch": 8.386, "grad_norm": 1.0579158067703247, "learning_rate": 2e-05, "loss": 0.04247576, "step": 4193 }, { "epoch": 8.388, "grad_norm": 2.016787052154541, "learning_rate": 2e-05, "loss": 0.05003262, "step": 4194 }, { "epoch": 8.39, "grad_norm": 1.2439075708389282, "learning_rate": 2e-05, "loss": 0.04027151, "step": 4195 }, { "epoch": 8.392, "grad_norm": 1.218614935874939, "learning_rate": 2e-05, "loss": 0.04031168, "step": 4196 }, { "epoch": 8.394, "grad_norm": 1.2276456356048584, "learning_rate": 2e-05, "loss": 0.04959612, "step": 4197 }, { "epoch": 8.396, "grad_norm": 1.965421199798584, "learning_rate": 2e-05, "loss": 0.04630712, "step": 4198 }, { "epoch": 8.398, "grad_norm": 1.103851318359375, "learning_rate": 2e-05, "loss": 0.03305928, "step": 4199 }, { "epoch": 8.4, "grad_norm": 1.183467149734497, "learning_rate": 2e-05, "loss": 0.04819242, "step": 4200 }, { "epoch": 8.402, "grad_norm": 1.2708202600479126, "learning_rate": 2e-05, "loss": 0.04293144, "step": 4201 }, { "epoch": 8.404, "grad_norm": 1.534544587135315, "learning_rate": 2e-05, "loss": 0.04298072, "step": 4202 }, { "epoch": 8.406, "grad_norm": 1.9671140909194946, "learning_rate": 2e-05, "loss": 0.05531413, "step": 4203 }, { "epoch": 8.408, "grad_norm": 1.3386366367340088, "learning_rate": 2e-05, "loss": 0.03748547, "step": 4204 }, { "epoch": 8.41, "grad_norm": 2.237844467163086, "learning_rate": 2e-05, "loss": 0.05199295, "step": 4205 }, { "epoch": 8.412, "grad_norm": 1.5903010368347168, "learning_rate": 2e-05, "loss": 0.04232503, "step": 4206 }, { "epoch": 8.414, "grad_norm": 1.1368825435638428, "learning_rate": 2e-05, "loss": 0.03277536, "step": 4207 }, { "epoch": 8.416, "grad_norm": 1.1275688409805298, "learning_rate": 2e-05, "loss": 0.04298157, "step": 4208 }, { "epoch": 8.418, "grad_norm": 1.7428966760635376, "learning_rate": 2e-05, "loss": 0.05383879, "step": 4209 }, { "epoch": 8.42, "grad_norm": 1.735756516456604, "learning_rate": 2e-05, "loss": 0.06661352, "step": 4210 }, { "epoch": 8.422, "grad_norm": 1.376869797706604, "learning_rate": 2e-05, "loss": 0.04623829, "step": 4211 }, { "epoch": 8.424, "grad_norm": 2.886859178543091, "learning_rate": 2e-05, "loss": 0.06156854, "step": 4212 }, { "epoch": 8.426, "grad_norm": 1.3723516464233398, "learning_rate": 2e-05, "loss": 0.04914469, "step": 4213 }, { "epoch": 8.428, "grad_norm": 1.2038379907608032, "learning_rate": 2e-05, "loss": 0.0332229, "step": 4214 }, { "epoch": 8.43, "grad_norm": 1.1281368732452393, "learning_rate": 2e-05, "loss": 0.03839248, "step": 4215 }, { "epoch": 8.432, "grad_norm": 1.2697054147720337, "learning_rate": 2e-05, "loss": 0.03925816, "step": 4216 }, { "epoch": 8.434, "grad_norm": 1.1074013710021973, "learning_rate": 2e-05, "loss": 0.04592735, "step": 4217 }, { "epoch": 8.436, "grad_norm": 1.722346305847168, "learning_rate": 2e-05, "loss": 0.05641073, "step": 4218 }, { "epoch": 8.438, "grad_norm": 1.3256508111953735, "learning_rate": 2e-05, "loss": 0.04843621, "step": 4219 }, { "epoch": 8.44, "grad_norm": 1.3698394298553467, "learning_rate": 2e-05, "loss": 0.04274657, "step": 4220 }, { "epoch": 8.442, "grad_norm": 1.7315605878829956, "learning_rate": 2e-05, "loss": 0.05456549, "step": 4221 }, { "epoch": 8.444, "grad_norm": 1.2529323101043701, "learning_rate": 2e-05, "loss": 0.02203387, "step": 4222 }, { "epoch": 8.446, "grad_norm": 2.7959108352661133, "learning_rate": 2e-05, "loss": 0.05525438, "step": 4223 }, { "epoch": 8.448, "grad_norm": 1.3126109838485718, "learning_rate": 2e-05, "loss": 0.0481948, "step": 4224 }, { "epoch": 8.45, "grad_norm": 1.4437806606292725, "learning_rate": 2e-05, "loss": 0.05020737, "step": 4225 }, { "epoch": 8.452, "grad_norm": 1.2649277448654175, "learning_rate": 2e-05, "loss": 0.04435945, "step": 4226 }, { "epoch": 8.454, "grad_norm": 1.9862298965454102, "learning_rate": 2e-05, "loss": 0.04354966, "step": 4227 }, { "epoch": 8.456, "grad_norm": 1.2959977388381958, "learning_rate": 2e-05, "loss": 0.04303737, "step": 4228 }, { "epoch": 8.458, "grad_norm": 1.9561591148376465, "learning_rate": 2e-05, "loss": 0.04295287, "step": 4229 }, { "epoch": 8.46, "grad_norm": 1.8894590139389038, "learning_rate": 2e-05, "loss": 0.04403112, "step": 4230 }, { "epoch": 8.462, "grad_norm": 2.1889703273773193, "learning_rate": 2e-05, "loss": 0.06404384, "step": 4231 }, { "epoch": 8.464, "grad_norm": 1.6557297706604004, "learning_rate": 2e-05, "loss": 0.05207235, "step": 4232 }, { "epoch": 8.466, "grad_norm": 1.6295475959777832, "learning_rate": 2e-05, "loss": 0.04500223, "step": 4233 }, { "epoch": 8.468, "grad_norm": 1.411564588546753, "learning_rate": 2e-05, "loss": 0.04487611, "step": 4234 }, { "epoch": 8.47, "grad_norm": 1.886080026626587, "learning_rate": 2e-05, "loss": 0.03916345, "step": 4235 }, { "epoch": 8.472, "grad_norm": 1.2690297365188599, "learning_rate": 2e-05, "loss": 0.03984193, "step": 4236 }, { "epoch": 8.474, "grad_norm": 1.3404802083969116, "learning_rate": 2e-05, "loss": 0.05036929, "step": 4237 }, { "epoch": 8.475999999999999, "grad_norm": 1.5249813795089722, "learning_rate": 2e-05, "loss": 0.04390936, "step": 4238 }, { "epoch": 8.478, "grad_norm": 1.550538182258606, "learning_rate": 2e-05, "loss": 0.06574559, "step": 4239 }, { "epoch": 8.48, "grad_norm": 1.3451995849609375, "learning_rate": 2e-05, "loss": 0.04779359, "step": 4240 }, { "epoch": 8.482, "grad_norm": 1.6042060852050781, "learning_rate": 2e-05, "loss": 0.04354705, "step": 4241 }, { "epoch": 8.484, "grad_norm": 1.3204594850540161, "learning_rate": 2e-05, "loss": 0.03058997, "step": 4242 }, { "epoch": 8.486, "grad_norm": 1.1289502382278442, "learning_rate": 2e-05, "loss": 0.04202674, "step": 4243 }, { "epoch": 8.488, "grad_norm": 1.6240679025650024, "learning_rate": 2e-05, "loss": 0.05241815, "step": 4244 }, { "epoch": 8.49, "grad_norm": 1.0163280963897705, "learning_rate": 2e-05, "loss": 0.03246521, "step": 4245 }, { "epoch": 8.492, "grad_norm": 1.3230700492858887, "learning_rate": 2e-05, "loss": 0.04476965, "step": 4246 }, { "epoch": 8.494, "grad_norm": 1.686160683631897, "learning_rate": 2e-05, "loss": 0.05470574, "step": 4247 }, { "epoch": 8.496, "grad_norm": 1.419310450553894, "learning_rate": 2e-05, "loss": 0.04859785, "step": 4248 }, { "epoch": 8.498, "grad_norm": 1.431846261024475, "learning_rate": 2e-05, "loss": 0.04167376, "step": 4249 }, { "epoch": 8.5, "grad_norm": 1.4465125799179077, "learning_rate": 2e-05, "loss": 0.06550825, "step": 4250 }, { "epoch": 8.502, "grad_norm": 2.1822566986083984, "learning_rate": 2e-05, "loss": 0.05803905, "step": 4251 }, { "epoch": 8.504, "grad_norm": 1.2648488283157349, "learning_rate": 2e-05, "loss": 0.05157655, "step": 4252 }, { "epoch": 8.506, "grad_norm": 1.1298472881317139, "learning_rate": 2e-05, "loss": 0.04487451, "step": 4253 }, { "epoch": 8.508, "grad_norm": 1.696395754814148, "learning_rate": 2e-05, "loss": 0.04066026, "step": 4254 }, { "epoch": 8.51, "grad_norm": 1.068587064743042, "learning_rate": 2e-05, "loss": 0.02926126, "step": 4255 }, { "epoch": 8.512, "grad_norm": 1.2515802383422852, "learning_rate": 2e-05, "loss": 0.04963365, "step": 4256 }, { "epoch": 8.514, "grad_norm": 1.1776715517044067, "learning_rate": 2e-05, "loss": 0.03471005, "step": 4257 }, { "epoch": 8.516, "grad_norm": 1.47025728225708, "learning_rate": 2e-05, "loss": 0.04580873, "step": 4258 }, { "epoch": 8.518, "grad_norm": 1.3387584686279297, "learning_rate": 2e-05, "loss": 0.0392997, "step": 4259 }, { "epoch": 8.52, "grad_norm": 1.7431012392044067, "learning_rate": 2e-05, "loss": 0.04956745, "step": 4260 }, { "epoch": 8.522, "grad_norm": 2.1029186248779297, "learning_rate": 2e-05, "loss": 0.0650689, "step": 4261 }, { "epoch": 8.524000000000001, "grad_norm": 1.218745470046997, "learning_rate": 2e-05, "loss": 0.03940692, "step": 4262 }, { "epoch": 8.526, "grad_norm": 1.8055042028427124, "learning_rate": 2e-05, "loss": 0.04726121, "step": 4263 }, { "epoch": 8.528, "grad_norm": 1.2838687896728516, "learning_rate": 2e-05, "loss": 0.04223323, "step": 4264 }, { "epoch": 8.53, "grad_norm": 1.1817346811294556, "learning_rate": 2e-05, "loss": 0.03910241, "step": 4265 }, { "epoch": 8.532, "grad_norm": 2.022214651107788, "learning_rate": 2e-05, "loss": 0.05597316, "step": 4266 }, { "epoch": 8.534, "grad_norm": 2.186800479888916, "learning_rate": 2e-05, "loss": 0.03829915, "step": 4267 }, { "epoch": 8.536, "grad_norm": 1.6917003393173218, "learning_rate": 2e-05, "loss": 0.05600104, "step": 4268 }, { "epoch": 8.538, "grad_norm": 1.4837727546691895, "learning_rate": 2e-05, "loss": 0.04096905, "step": 4269 }, { "epoch": 8.54, "grad_norm": 1.8344814777374268, "learning_rate": 2e-05, "loss": 0.0464284, "step": 4270 }, { "epoch": 8.542, "grad_norm": 1.7132585048675537, "learning_rate": 2e-05, "loss": 0.04327628, "step": 4271 }, { "epoch": 8.544, "grad_norm": 1.674782395362854, "learning_rate": 2e-05, "loss": 0.04533742, "step": 4272 }, { "epoch": 8.546, "grad_norm": 1.6211055517196655, "learning_rate": 2e-05, "loss": 0.04555016, "step": 4273 }, { "epoch": 8.548, "grad_norm": 2.098546266555786, "learning_rate": 2e-05, "loss": 0.06424834, "step": 4274 }, { "epoch": 8.55, "grad_norm": 1.812697172164917, "learning_rate": 2e-05, "loss": 0.05120952, "step": 4275 }, { "epoch": 8.552, "grad_norm": 1.0520689487457275, "learning_rate": 2e-05, "loss": 0.03506008, "step": 4276 }, { "epoch": 8.554, "grad_norm": 1.5413808822631836, "learning_rate": 2e-05, "loss": 0.03506286, "step": 4277 }, { "epoch": 8.556000000000001, "grad_norm": 0.9895492196083069, "learning_rate": 2e-05, "loss": 0.03194591, "step": 4278 }, { "epoch": 8.558, "grad_norm": 2.4871461391448975, "learning_rate": 2e-05, "loss": 0.04312088, "step": 4279 }, { "epoch": 8.56, "grad_norm": 2.135133743286133, "learning_rate": 2e-05, "loss": 0.05813865, "step": 4280 }, { "epoch": 8.562, "grad_norm": 1.7616969347000122, "learning_rate": 2e-05, "loss": 0.05905205, "step": 4281 }, { "epoch": 8.564, "grad_norm": 1.1986794471740723, "learning_rate": 2e-05, "loss": 0.04172123, "step": 4282 }, { "epoch": 8.566, "grad_norm": 1.0787440538406372, "learning_rate": 2e-05, "loss": 0.03688566, "step": 4283 }, { "epoch": 8.568, "grad_norm": 1.3387644290924072, "learning_rate": 2e-05, "loss": 0.04537062, "step": 4284 }, { "epoch": 8.57, "grad_norm": 2.0228538513183594, "learning_rate": 2e-05, "loss": 0.06465685, "step": 4285 }, { "epoch": 8.572, "grad_norm": 1.7300091981887817, "learning_rate": 2e-05, "loss": 0.05473095, "step": 4286 }, { "epoch": 8.574, "grad_norm": 1.032915711402893, "learning_rate": 2e-05, "loss": 0.0316976, "step": 4287 }, { "epoch": 8.576, "grad_norm": 1.168629765510559, "learning_rate": 2e-05, "loss": 0.0534174, "step": 4288 }, { "epoch": 8.578, "grad_norm": 1.6948527097702026, "learning_rate": 2e-05, "loss": 0.05644149, "step": 4289 }, { "epoch": 8.58, "grad_norm": 1.5725301504135132, "learning_rate": 2e-05, "loss": 0.04966125, "step": 4290 }, { "epoch": 8.582, "grad_norm": 1.6310228109359741, "learning_rate": 2e-05, "loss": 0.04760915, "step": 4291 }, { "epoch": 8.584, "grad_norm": 1.232686996459961, "learning_rate": 2e-05, "loss": 0.03924007, "step": 4292 }, { "epoch": 8.586, "grad_norm": 1.2287647724151611, "learning_rate": 2e-05, "loss": 0.03555786, "step": 4293 }, { "epoch": 8.588, "grad_norm": 2.1337671279907227, "learning_rate": 2e-05, "loss": 0.03892121, "step": 4294 }, { "epoch": 8.59, "grad_norm": 0.8796424865722656, "learning_rate": 2e-05, "loss": 0.02620396, "step": 4295 }, { "epoch": 8.592, "grad_norm": 1.5610501766204834, "learning_rate": 2e-05, "loss": 0.05278179, "step": 4296 }, { "epoch": 8.594, "grad_norm": 1.3259083032608032, "learning_rate": 2e-05, "loss": 0.05059385, "step": 4297 }, { "epoch": 8.596, "grad_norm": 1.2965830564498901, "learning_rate": 2e-05, "loss": 0.04437011, "step": 4298 }, { "epoch": 8.598, "grad_norm": 1.28528892993927, "learning_rate": 2e-05, "loss": 0.04673284, "step": 4299 }, { "epoch": 8.6, "grad_norm": 1.567898154258728, "learning_rate": 2e-05, "loss": 0.05069918, "step": 4300 }, { "epoch": 8.602, "grad_norm": 1.5278816223144531, "learning_rate": 2e-05, "loss": 0.04407776, "step": 4301 }, { "epoch": 8.604, "grad_norm": 2.5078423023223877, "learning_rate": 2e-05, "loss": 0.04832874, "step": 4302 }, { "epoch": 8.606, "grad_norm": 3.015632390975952, "learning_rate": 2e-05, "loss": 0.05268471, "step": 4303 }, { "epoch": 8.608, "grad_norm": 1.8364429473876953, "learning_rate": 2e-05, "loss": 0.04438996, "step": 4304 }, { "epoch": 8.61, "grad_norm": 2.421682119369507, "learning_rate": 2e-05, "loss": 0.05156955, "step": 4305 }, { "epoch": 8.612, "grad_norm": 1.4038487672805786, "learning_rate": 2e-05, "loss": 0.03900445, "step": 4306 }, { "epoch": 8.614, "grad_norm": 1.6436742544174194, "learning_rate": 2e-05, "loss": 0.05502493, "step": 4307 }, { "epoch": 8.616, "grad_norm": 1.551708459854126, "learning_rate": 2e-05, "loss": 0.052692, "step": 4308 }, { "epoch": 8.618, "grad_norm": 1.148949384689331, "learning_rate": 2e-05, "loss": 0.04152766, "step": 4309 }, { "epoch": 8.62, "grad_norm": 1.9535950422286987, "learning_rate": 2e-05, "loss": 0.04875577, "step": 4310 }, { "epoch": 8.622, "grad_norm": 2.445028066635132, "learning_rate": 2e-05, "loss": 0.05650291, "step": 4311 }, { "epoch": 8.624, "grad_norm": 1.3438160419464111, "learning_rate": 2e-05, "loss": 0.04274925, "step": 4312 }, { "epoch": 8.626, "grad_norm": 1.1872248649597168, "learning_rate": 2e-05, "loss": 0.05045023, "step": 4313 }, { "epoch": 8.628, "grad_norm": 0.948291540145874, "learning_rate": 2e-05, "loss": 0.03525672, "step": 4314 }, { "epoch": 8.63, "grad_norm": 1.2028381824493408, "learning_rate": 2e-05, "loss": 0.03516944, "step": 4315 }, { "epoch": 8.632, "grad_norm": 1.463492512702942, "learning_rate": 2e-05, "loss": 0.05510323, "step": 4316 }, { "epoch": 8.634, "grad_norm": 1.7272472381591797, "learning_rate": 2e-05, "loss": 0.04611889, "step": 4317 }, { "epoch": 8.636, "grad_norm": 1.8609179258346558, "learning_rate": 2e-05, "loss": 0.05178494, "step": 4318 }, { "epoch": 8.638, "grad_norm": 1.6433526277542114, "learning_rate": 2e-05, "loss": 0.05495635, "step": 4319 }, { "epoch": 8.64, "grad_norm": 2.0367109775543213, "learning_rate": 2e-05, "loss": 0.05452634, "step": 4320 }, { "epoch": 8.642, "grad_norm": 1.3078205585479736, "learning_rate": 2e-05, "loss": 0.04912869, "step": 4321 }, { "epoch": 8.644, "grad_norm": 1.7824225425720215, "learning_rate": 2e-05, "loss": 0.06498539, "step": 4322 }, { "epoch": 8.646, "grad_norm": 1.2698032855987549, "learning_rate": 2e-05, "loss": 0.03695104, "step": 4323 }, { "epoch": 8.648, "grad_norm": 1.456852912902832, "learning_rate": 2e-05, "loss": 0.04406907, "step": 4324 }, { "epoch": 8.65, "grad_norm": 1.1602133512496948, "learning_rate": 2e-05, "loss": 0.03222954, "step": 4325 }, { "epoch": 8.652, "grad_norm": 1.945662021636963, "learning_rate": 2e-05, "loss": 0.05972052, "step": 4326 }, { "epoch": 8.654, "grad_norm": 1.0064737796783447, "learning_rate": 2e-05, "loss": 0.03110239, "step": 4327 }, { "epoch": 8.656, "grad_norm": 1.220845103263855, "learning_rate": 2e-05, "loss": 0.04152022, "step": 4328 }, { "epoch": 8.658, "grad_norm": 1.2207328081130981, "learning_rate": 2e-05, "loss": 0.0476977, "step": 4329 }, { "epoch": 8.66, "grad_norm": 2.3454084396362305, "learning_rate": 2e-05, "loss": 0.05494661, "step": 4330 }, { "epoch": 8.662, "grad_norm": 1.1374601125717163, "learning_rate": 2e-05, "loss": 0.04281973, "step": 4331 }, { "epoch": 8.664, "grad_norm": 1.3726214170455933, "learning_rate": 2e-05, "loss": 0.0457747, "step": 4332 }, { "epoch": 8.666, "grad_norm": 2.3275725841522217, "learning_rate": 2e-05, "loss": 0.0412515, "step": 4333 }, { "epoch": 8.668, "grad_norm": 1.3664522171020508, "learning_rate": 2e-05, "loss": 0.04096291, "step": 4334 }, { "epoch": 8.67, "grad_norm": 1.359914779663086, "learning_rate": 2e-05, "loss": 0.04645447, "step": 4335 }, { "epoch": 8.672, "grad_norm": 1.2743496894836426, "learning_rate": 2e-05, "loss": 0.03728715, "step": 4336 }, { "epoch": 8.674, "grad_norm": 1.1057415008544922, "learning_rate": 2e-05, "loss": 0.03883886, "step": 4337 }, { "epoch": 8.676, "grad_norm": 2.738429546356201, "learning_rate": 2e-05, "loss": 0.05373093, "step": 4338 }, { "epoch": 8.678, "grad_norm": 1.3785386085510254, "learning_rate": 2e-05, "loss": 0.04660262, "step": 4339 }, { "epoch": 8.68, "grad_norm": 1.4276515245437622, "learning_rate": 2e-05, "loss": 0.04423345, "step": 4340 }, { "epoch": 8.682, "grad_norm": 1.684853196144104, "learning_rate": 2e-05, "loss": 0.05505417, "step": 4341 }, { "epoch": 8.684, "grad_norm": 1.370588779449463, "learning_rate": 2e-05, "loss": 0.04044452, "step": 4342 }, { "epoch": 8.686, "grad_norm": 1.3224636316299438, "learning_rate": 2e-05, "loss": 0.06086275, "step": 4343 }, { "epoch": 8.688, "grad_norm": 1.181963324546814, "learning_rate": 2e-05, "loss": 0.04808854, "step": 4344 }, { "epoch": 8.69, "grad_norm": 1.409883975982666, "learning_rate": 2e-05, "loss": 0.05268676, "step": 4345 }, { "epoch": 8.692, "grad_norm": 5.802072525024414, "learning_rate": 2e-05, "loss": 0.05748282, "step": 4346 }, { "epoch": 8.693999999999999, "grad_norm": 1.2217000722885132, "learning_rate": 2e-05, "loss": 0.03751257, "step": 4347 }, { "epoch": 8.696, "grad_norm": 1.3667843341827393, "learning_rate": 2e-05, "loss": 0.04032121, "step": 4348 }, { "epoch": 8.698, "grad_norm": 1.1752148866653442, "learning_rate": 2e-05, "loss": 0.03708064, "step": 4349 }, { "epoch": 8.7, "grad_norm": 0.9877530336380005, "learning_rate": 2e-05, "loss": 0.02924716, "step": 4350 }, { "epoch": 8.702, "grad_norm": 1.4895782470703125, "learning_rate": 2e-05, "loss": 0.04259634, "step": 4351 }, { "epoch": 8.704, "grad_norm": 1.0186421871185303, "learning_rate": 2e-05, "loss": 0.04007004, "step": 4352 }, { "epoch": 8.706, "grad_norm": 1.8200626373291016, "learning_rate": 2e-05, "loss": 0.05589747, "step": 4353 }, { "epoch": 8.708, "grad_norm": 1.385703444480896, "learning_rate": 2e-05, "loss": 0.04419235, "step": 4354 }, { "epoch": 8.71, "grad_norm": 1.624614953994751, "learning_rate": 2e-05, "loss": 0.03844646, "step": 4355 }, { "epoch": 8.712, "grad_norm": 1.4218086004257202, "learning_rate": 2e-05, "loss": 0.05011544, "step": 4356 }, { "epoch": 8.714, "grad_norm": 1.4951738119125366, "learning_rate": 2e-05, "loss": 0.05101977, "step": 4357 }, { "epoch": 8.716, "grad_norm": 1.324513554573059, "learning_rate": 2e-05, "loss": 0.04258473, "step": 4358 }, { "epoch": 8.718, "grad_norm": 1.4158241748809814, "learning_rate": 2e-05, "loss": 0.06014551, "step": 4359 }, { "epoch": 8.72, "grad_norm": 1.1224169731140137, "learning_rate": 2e-05, "loss": 0.04492978, "step": 4360 }, { "epoch": 8.722, "grad_norm": 1.1736798286437988, "learning_rate": 2e-05, "loss": 0.04360911, "step": 4361 }, { "epoch": 8.724, "grad_norm": 1.0621728897094727, "learning_rate": 2e-05, "loss": 0.03657208, "step": 4362 }, { "epoch": 8.725999999999999, "grad_norm": 1.2689882516860962, "learning_rate": 2e-05, "loss": 0.05324752, "step": 4363 }, { "epoch": 8.728, "grad_norm": 1.0777446031570435, "learning_rate": 2e-05, "loss": 0.0410941, "step": 4364 }, { "epoch": 8.73, "grad_norm": 1.4247167110443115, "learning_rate": 2e-05, "loss": 0.04764949, "step": 4365 }, { "epoch": 8.732, "grad_norm": 1.272257685661316, "learning_rate": 2e-05, "loss": 0.03613374, "step": 4366 }, { "epoch": 8.734, "grad_norm": 0.9077991843223572, "learning_rate": 2e-05, "loss": 0.03062226, "step": 4367 }, { "epoch": 8.736, "grad_norm": 1.2713913917541504, "learning_rate": 2e-05, "loss": 0.05430157, "step": 4368 }, { "epoch": 8.738, "grad_norm": 1.4644341468811035, "learning_rate": 2e-05, "loss": 0.05187561, "step": 4369 }, { "epoch": 8.74, "grad_norm": 1.0588116645812988, "learning_rate": 2e-05, "loss": 0.03304412, "step": 4370 }, { "epoch": 8.742, "grad_norm": 2.1608660221099854, "learning_rate": 2e-05, "loss": 0.05963966, "step": 4371 }, { "epoch": 8.744, "grad_norm": 1.0907118320465088, "learning_rate": 2e-05, "loss": 0.03733838, "step": 4372 }, { "epoch": 8.746, "grad_norm": 2.0541343688964844, "learning_rate": 2e-05, "loss": 0.04198984, "step": 4373 }, { "epoch": 8.748, "grad_norm": 1.6834027767181396, "learning_rate": 2e-05, "loss": 0.02988002, "step": 4374 }, { "epoch": 8.75, "grad_norm": 1.2698630094528198, "learning_rate": 2e-05, "loss": 0.04798315, "step": 4375 }, { "epoch": 8.752, "grad_norm": 2.5312836170196533, "learning_rate": 2e-05, "loss": 0.0558813, "step": 4376 }, { "epoch": 8.754, "grad_norm": 1.4217891693115234, "learning_rate": 2e-05, "loss": 0.05483246, "step": 4377 }, { "epoch": 8.756, "grad_norm": 1.6704591512680054, "learning_rate": 2e-05, "loss": 0.0550387, "step": 4378 }, { "epoch": 8.758, "grad_norm": 1.5293974876403809, "learning_rate": 2e-05, "loss": 0.05219758, "step": 4379 }, { "epoch": 8.76, "grad_norm": 1.3517143726348877, "learning_rate": 2e-05, "loss": 0.04489984, "step": 4380 }, { "epoch": 8.762, "grad_norm": 1.3553781509399414, "learning_rate": 2e-05, "loss": 0.04696585, "step": 4381 }, { "epoch": 8.764, "grad_norm": 1.3556764125823975, "learning_rate": 2e-05, "loss": 0.03715477, "step": 4382 }, { "epoch": 8.766, "grad_norm": 2.0850911140441895, "learning_rate": 2e-05, "loss": 0.05471366, "step": 4383 }, { "epoch": 8.768, "grad_norm": 1.2767047882080078, "learning_rate": 2e-05, "loss": 0.03740011, "step": 4384 }, { "epoch": 8.77, "grad_norm": 1.2590597867965698, "learning_rate": 2e-05, "loss": 0.0351716, "step": 4385 }, { "epoch": 8.772, "grad_norm": 1.1498363018035889, "learning_rate": 2e-05, "loss": 0.04321297, "step": 4386 }, { "epoch": 8.774000000000001, "grad_norm": 1.0147833824157715, "learning_rate": 2e-05, "loss": 0.03920402, "step": 4387 }, { "epoch": 8.776, "grad_norm": 1.6818196773529053, "learning_rate": 2e-05, "loss": 0.05098328, "step": 4388 }, { "epoch": 8.778, "grad_norm": 1.4138433933258057, "learning_rate": 2e-05, "loss": 0.03823352, "step": 4389 }, { "epoch": 8.78, "grad_norm": 1.0351346731185913, "learning_rate": 2e-05, "loss": 0.03335971, "step": 4390 }, { "epoch": 8.782, "grad_norm": 1.7980780601501465, "learning_rate": 2e-05, "loss": 0.07383931, "step": 4391 }, { "epoch": 8.784, "grad_norm": 1.1801741123199463, "learning_rate": 2e-05, "loss": 0.04325704, "step": 4392 }, { "epoch": 8.786, "grad_norm": 0.9786828756332397, "learning_rate": 2e-05, "loss": 0.03433499, "step": 4393 }, { "epoch": 8.788, "grad_norm": 1.6378285884857178, "learning_rate": 2e-05, "loss": 0.03292269, "step": 4394 }, { "epoch": 8.79, "grad_norm": 1.5976024866104126, "learning_rate": 2e-05, "loss": 0.05588251, "step": 4395 }, { "epoch": 8.792, "grad_norm": 4.473025798797607, "learning_rate": 2e-05, "loss": 0.07918499, "step": 4396 }, { "epoch": 8.794, "grad_norm": 1.5586071014404297, "learning_rate": 2e-05, "loss": 0.05898192, "step": 4397 }, { "epoch": 8.796, "grad_norm": 2.6317224502563477, "learning_rate": 2e-05, "loss": 0.05829602, "step": 4398 }, { "epoch": 8.798, "grad_norm": 1.3767268657684326, "learning_rate": 2e-05, "loss": 0.05414031, "step": 4399 }, { "epoch": 8.8, "grad_norm": 0.9226973652839661, "learning_rate": 2e-05, "loss": 0.02986227, "step": 4400 }, { "epoch": 8.802, "grad_norm": 2.223020315170288, "learning_rate": 2e-05, "loss": 0.05309587, "step": 4401 }, { "epoch": 8.804, "grad_norm": 1.2311885356903076, "learning_rate": 2e-05, "loss": 0.04153631, "step": 4402 }, { "epoch": 8.806000000000001, "grad_norm": 1.4580053091049194, "learning_rate": 2e-05, "loss": 0.0463781, "step": 4403 }, { "epoch": 8.808, "grad_norm": 1.253326416015625, "learning_rate": 2e-05, "loss": 0.03931756, "step": 4404 }, { "epoch": 8.81, "grad_norm": 1.2515385150909424, "learning_rate": 2e-05, "loss": 0.04841346, "step": 4405 }, { "epoch": 8.812, "grad_norm": 1.8925061225891113, "learning_rate": 2e-05, "loss": 0.04482786, "step": 4406 }, { "epoch": 8.814, "grad_norm": 1.552807331085205, "learning_rate": 2e-05, "loss": 0.05065498, "step": 4407 }, { "epoch": 8.816, "grad_norm": 1.1485422849655151, "learning_rate": 2e-05, "loss": 0.04279656, "step": 4408 }, { "epoch": 8.818, "grad_norm": 1.3679656982421875, "learning_rate": 2e-05, "loss": 0.04369261, "step": 4409 }, { "epoch": 8.82, "grad_norm": 1.7776272296905518, "learning_rate": 2e-05, "loss": 0.03488866, "step": 4410 }, { "epoch": 8.822, "grad_norm": 1.3123103380203247, "learning_rate": 2e-05, "loss": 0.0491695, "step": 4411 }, { "epoch": 8.824, "grad_norm": 1.5892049074172974, "learning_rate": 2e-05, "loss": 0.04135371, "step": 4412 }, { "epoch": 8.826, "grad_norm": 1.5765873193740845, "learning_rate": 2e-05, "loss": 0.04748004, "step": 4413 }, { "epoch": 8.828, "grad_norm": 1.5640965700149536, "learning_rate": 2e-05, "loss": 0.06020439, "step": 4414 }, { "epoch": 8.83, "grad_norm": 1.4658604860305786, "learning_rate": 2e-05, "loss": 0.04439153, "step": 4415 }, { "epoch": 8.832, "grad_norm": 1.7687060832977295, "learning_rate": 2e-05, "loss": 0.05248412, "step": 4416 }, { "epoch": 8.834, "grad_norm": 1.102990746498108, "learning_rate": 2e-05, "loss": 0.03225242, "step": 4417 }, { "epoch": 8.836, "grad_norm": 1.2133313417434692, "learning_rate": 2e-05, "loss": 0.03089228, "step": 4418 }, { "epoch": 8.838, "grad_norm": 2.1788408756256104, "learning_rate": 2e-05, "loss": 0.05756471, "step": 4419 }, { "epoch": 8.84, "grad_norm": 1.5459779500961304, "learning_rate": 2e-05, "loss": 0.03825875, "step": 4420 }, { "epoch": 8.842, "grad_norm": 2.9150943756103516, "learning_rate": 2e-05, "loss": 0.05558386, "step": 4421 }, { "epoch": 8.844, "grad_norm": 1.6710635423660278, "learning_rate": 2e-05, "loss": 0.06062218, "step": 4422 }, { "epoch": 8.846, "grad_norm": 1.0483447313308716, "learning_rate": 2e-05, "loss": 0.03988266, "step": 4423 }, { "epoch": 8.848, "grad_norm": 1.760643720626831, "learning_rate": 2e-05, "loss": 0.04801723, "step": 4424 }, { "epoch": 8.85, "grad_norm": 1.5282479524612427, "learning_rate": 2e-05, "loss": 0.05213196, "step": 4425 }, { "epoch": 8.852, "grad_norm": 1.7912678718566895, "learning_rate": 2e-05, "loss": 0.04636016, "step": 4426 }, { "epoch": 8.854, "grad_norm": 0.8681804537773132, "learning_rate": 2e-05, "loss": 0.03162609, "step": 4427 }, { "epoch": 8.856, "grad_norm": 1.3080195188522339, "learning_rate": 2e-05, "loss": 0.05422079, "step": 4428 }, { "epoch": 8.858, "grad_norm": 1.337620735168457, "learning_rate": 2e-05, "loss": 0.05549601, "step": 4429 }, { "epoch": 8.86, "grad_norm": 1.0535590648651123, "learning_rate": 2e-05, "loss": 0.04026527, "step": 4430 }, { "epoch": 8.862, "grad_norm": 1.1052578687667847, "learning_rate": 2e-05, "loss": 0.03884507, "step": 4431 }, { "epoch": 8.864, "grad_norm": 1.7922765016555786, "learning_rate": 2e-05, "loss": 0.05645222, "step": 4432 }, { "epoch": 8.866, "grad_norm": 1.553610920906067, "learning_rate": 2e-05, "loss": 0.055147, "step": 4433 }, { "epoch": 8.868, "grad_norm": 1.6815420389175415, "learning_rate": 2e-05, "loss": 0.0513879, "step": 4434 }, { "epoch": 8.87, "grad_norm": 2.2995853424072266, "learning_rate": 2e-05, "loss": 0.0562899, "step": 4435 }, { "epoch": 8.872, "grad_norm": 1.9725992679595947, "learning_rate": 2e-05, "loss": 0.057873, "step": 4436 }, { "epoch": 8.874, "grad_norm": 2.180466890335083, "learning_rate": 2e-05, "loss": 0.04819129, "step": 4437 }, { "epoch": 8.876, "grad_norm": 1.4554449319839478, "learning_rate": 2e-05, "loss": 0.04344202, "step": 4438 }, { "epoch": 8.878, "grad_norm": 1.6162256002426147, "learning_rate": 2e-05, "loss": 0.04701218, "step": 4439 }, { "epoch": 8.88, "grad_norm": 0.8536230325698853, "learning_rate": 2e-05, "loss": 0.02937111, "step": 4440 }, { "epoch": 8.882, "grad_norm": 1.6734143495559692, "learning_rate": 2e-05, "loss": 0.05779783, "step": 4441 }, { "epoch": 8.884, "grad_norm": 1.528039574623108, "learning_rate": 2e-05, "loss": 0.0378891, "step": 4442 }, { "epoch": 8.886, "grad_norm": 1.4532936811447144, "learning_rate": 2e-05, "loss": 0.04875976, "step": 4443 }, { "epoch": 8.888, "grad_norm": 1.1509127616882324, "learning_rate": 2e-05, "loss": 0.04518646, "step": 4444 }, { "epoch": 8.89, "grad_norm": 1.0691490173339844, "learning_rate": 2e-05, "loss": 0.0388993, "step": 4445 }, { "epoch": 8.892, "grad_norm": 1.3075459003448486, "learning_rate": 2e-05, "loss": 0.0404879, "step": 4446 }, { "epoch": 8.894, "grad_norm": 1.5355843305587769, "learning_rate": 2e-05, "loss": 0.06093806, "step": 4447 }, { "epoch": 8.896, "grad_norm": 1.2035893201828003, "learning_rate": 2e-05, "loss": 0.03944387, "step": 4448 }, { "epoch": 8.898, "grad_norm": 1.2087056636810303, "learning_rate": 2e-05, "loss": 0.04656249, "step": 4449 }, { "epoch": 8.9, "grad_norm": 1.3271777629852295, "learning_rate": 2e-05, "loss": 0.04627533, "step": 4450 }, { "epoch": 8.902, "grad_norm": 1.2811769247055054, "learning_rate": 2e-05, "loss": 0.04390157, "step": 4451 }, { "epoch": 8.904, "grad_norm": 1.3496356010437012, "learning_rate": 2e-05, "loss": 0.05148339, "step": 4452 }, { "epoch": 8.906, "grad_norm": 2.2644078731536865, "learning_rate": 2e-05, "loss": 0.06438026, "step": 4453 }, { "epoch": 8.908, "grad_norm": 0.9715155363082886, "learning_rate": 2e-05, "loss": 0.02947888, "step": 4454 }, { "epoch": 8.91, "grad_norm": 1.787411093711853, "learning_rate": 2e-05, "loss": 0.04107827, "step": 4455 }, { "epoch": 8.912, "grad_norm": 1.1115169525146484, "learning_rate": 2e-05, "loss": 0.0399262, "step": 4456 }, { "epoch": 8.914, "grad_norm": 1.2536200284957886, "learning_rate": 2e-05, "loss": 0.03909591, "step": 4457 }, { "epoch": 8.916, "grad_norm": 1.3224055767059326, "learning_rate": 2e-05, "loss": 0.04420022, "step": 4458 }, { "epoch": 8.918, "grad_norm": 1.4246692657470703, "learning_rate": 2e-05, "loss": 0.03702656, "step": 4459 }, { "epoch": 8.92, "grad_norm": 1.9952268600463867, "learning_rate": 2e-05, "loss": 0.05900434, "step": 4460 }, { "epoch": 8.922, "grad_norm": 2.0388376712799072, "learning_rate": 2e-05, "loss": 0.05166218, "step": 4461 }, { "epoch": 8.924, "grad_norm": 1.1385499238967896, "learning_rate": 2e-05, "loss": 0.04556513, "step": 4462 }, { "epoch": 8.926, "grad_norm": 1.5945521593093872, "learning_rate": 2e-05, "loss": 0.0496194, "step": 4463 }, { "epoch": 8.928, "grad_norm": 1.6476668119430542, "learning_rate": 2e-05, "loss": 0.04806423, "step": 4464 }, { "epoch": 8.93, "grad_norm": 1.9072840213775635, "learning_rate": 2e-05, "loss": 0.05876537, "step": 4465 }, { "epoch": 8.932, "grad_norm": 1.7775014638900757, "learning_rate": 2e-05, "loss": 0.05251986, "step": 4466 }, { "epoch": 8.934, "grad_norm": 1.2985152006149292, "learning_rate": 2e-05, "loss": 0.0463018, "step": 4467 }, { "epoch": 8.936, "grad_norm": 1.8547874689102173, "learning_rate": 2e-05, "loss": 0.06041159, "step": 4468 }, { "epoch": 8.938, "grad_norm": 2.1976423263549805, "learning_rate": 2e-05, "loss": 0.03414884, "step": 4469 }, { "epoch": 8.94, "grad_norm": 0.9419854283332825, "learning_rate": 2e-05, "loss": 0.02789684, "step": 4470 }, { "epoch": 8.942, "grad_norm": 1.1904888153076172, "learning_rate": 2e-05, "loss": 0.04499068, "step": 4471 }, { "epoch": 8.943999999999999, "grad_norm": 1.1681838035583496, "learning_rate": 2e-05, "loss": 0.04271141, "step": 4472 }, { "epoch": 8.946, "grad_norm": 1.388636827468872, "learning_rate": 2e-05, "loss": 0.04021178, "step": 4473 }, { "epoch": 8.948, "grad_norm": 1.6245975494384766, "learning_rate": 2e-05, "loss": 0.05697055, "step": 4474 }, { "epoch": 8.95, "grad_norm": 1.1198829412460327, "learning_rate": 2e-05, "loss": 0.03857917, "step": 4475 }, { "epoch": 8.952, "grad_norm": 1.9190189838409424, "learning_rate": 2e-05, "loss": 0.04297666, "step": 4476 }, { "epoch": 8.954, "grad_norm": 1.584479808807373, "learning_rate": 2e-05, "loss": 0.06143427, "step": 4477 }, { "epoch": 8.956, "grad_norm": 1.6050777435302734, "learning_rate": 2e-05, "loss": 0.04966013, "step": 4478 }, { "epoch": 8.958, "grad_norm": 2.1203346252441406, "learning_rate": 2e-05, "loss": 0.06286226, "step": 4479 }, { "epoch": 8.96, "grad_norm": 1.5613123178482056, "learning_rate": 2e-05, "loss": 0.04335446, "step": 4480 }, { "epoch": 8.962, "grad_norm": 1.7688831090927124, "learning_rate": 2e-05, "loss": 0.05140468, "step": 4481 }, { "epoch": 8.964, "grad_norm": 1.1225483417510986, "learning_rate": 2e-05, "loss": 0.03839725, "step": 4482 }, { "epoch": 8.966, "grad_norm": 1.546858549118042, "learning_rate": 2e-05, "loss": 0.04927346, "step": 4483 }, { "epoch": 8.968, "grad_norm": 1.4144601821899414, "learning_rate": 2e-05, "loss": 0.0489133, "step": 4484 }, { "epoch": 8.97, "grad_norm": 1.2596808671951294, "learning_rate": 2e-05, "loss": 0.04895072, "step": 4485 }, { "epoch": 8.972, "grad_norm": 1.3473176956176758, "learning_rate": 2e-05, "loss": 0.04276847, "step": 4486 }, { "epoch": 8.974, "grad_norm": 1.0893629789352417, "learning_rate": 2e-05, "loss": 0.02666626, "step": 4487 }, { "epoch": 8.975999999999999, "grad_norm": 1.3393633365631104, "learning_rate": 2e-05, "loss": 0.04725213, "step": 4488 }, { "epoch": 8.978, "grad_norm": 1.2494399547576904, "learning_rate": 2e-05, "loss": 0.04335026, "step": 4489 }, { "epoch": 8.98, "grad_norm": 1.394800066947937, "learning_rate": 2e-05, "loss": 0.05108353, "step": 4490 }, { "epoch": 8.982, "grad_norm": 2.5832700729370117, "learning_rate": 2e-05, "loss": 0.03944442, "step": 4491 }, { "epoch": 8.984, "grad_norm": 2.0006890296936035, "learning_rate": 2e-05, "loss": 0.06387276, "step": 4492 }, { "epoch": 8.986, "grad_norm": 1.664270043373108, "learning_rate": 2e-05, "loss": 0.0435832, "step": 4493 }, { "epoch": 8.988, "grad_norm": 1.6541657447814941, "learning_rate": 2e-05, "loss": 0.05543558, "step": 4494 }, { "epoch": 8.99, "grad_norm": 1.3456977605819702, "learning_rate": 2e-05, "loss": 0.03495708, "step": 4495 }, { "epoch": 8.992, "grad_norm": 1.4035969972610474, "learning_rate": 2e-05, "loss": 0.04944149, "step": 4496 }, { "epoch": 8.994, "grad_norm": 2.1596810817718506, "learning_rate": 2e-05, "loss": 0.0587335, "step": 4497 }, { "epoch": 8.996, "grad_norm": 1.4159624576568604, "learning_rate": 2e-05, "loss": 0.04697796, "step": 4498 }, { "epoch": 8.998, "grad_norm": 2.2335352897644043, "learning_rate": 2e-05, "loss": 0.05628205, "step": 4499 }, { "epoch": 9.0, "grad_norm": 1.6927722692489624, "learning_rate": 2e-05, "loss": 0.04296227, "step": 4500 }, { "epoch": 9.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 0.99, "AngleClassification_3": 0.8922155688622755, "Equal_1": 0.99, "Equal_2": 0.9600798403193613, "Equal_3": 0.8662674650698603, "LineComparison_1": 0.998, "LineComparison_2": 1.0, "LineComparison_3": 0.9700598802395209, "Parallel_1": 0.969939879759519, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.988, "Perpendicular_1": 0.992, "Perpendicular_2": 0.828, "Perpendicular_3": 0.533066132264529, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9896666666666667, "PointLiesOnCircle_3": 0.9916, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9041916167664671 }, "eval_runtime": 320.19, "eval_samples_per_second": 32.793, "eval_steps_per_second": 0.656, "step": 4500 }, { "epoch": 9.002, "grad_norm": 2.0088605880737305, "learning_rate": 2e-05, "loss": 0.05657805, "step": 4501 }, { "epoch": 9.004, "grad_norm": 3.5889363288879395, "learning_rate": 2e-05, "loss": 0.08259207, "step": 4502 }, { "epoch": 9.006, "grad_norm": 2.149068593978882, "learning_rate": 2e-05, "loss": 0.05930202, "step": 4503 }, { "epoch": 9.008, "grad_norm": 4.037459373474121, "learning_rate": 2e-05, "loss": 0.0556262, "step": 4504 }, { "epoch": 9.01, "grad_norm": 1.781816840171814, "learning_rate": 2e-05, "loss": 0.05204342, "step": 4505 }, { "epoch": 9.012, "grad_norm": 2.256213903427124, "learning_rate": 2e-05, "loss": 0.07549164, "step": 4506 }, { "epoch": 9.014, "grad_norm": 2.2270400524139404, "learning_rate": 2e-05, "loss": 0.10077457, "step": 4507 }, { "epoch": 9.016, "grad_norm": 1.9802483320236206, "learning_rate": 2e-05, "loss": 0.0692708, "step": 4508 }, { "epoch": 9.018, "grad_norm": 1.4657119512557983, "learning_rate": 2e-05, "loss": 0.05361843, "step": 4509 }, { "epoch": 9.02, "grad_norm": 1.6600518226623535, "learning_rate": 2e-05, "loss": 0.05417612, "step": 4510 }, { "epoch": 9.022, "grad_norm": 1.6829038858413696, "learning_rate": 2e-05, "loss": 0.05836887, "step": 4511 }, { "epoch": 9.024, "grad_norm": 1.7397302389144897, "learning_rate": 2e-05, "loss": 0.06527451, "step": 4512 }, { "epoch": 9.026, "grad_norm": 2.1521365642547607, "learning_rate": 2e-05, "loss": 0.0745489, "step": 4513 }, { "epoch": 9.028, "grad_norm": 1.5954670906066895, "learning_rate": 2e-05, "loss": 0.04902204, "step": 4514 }, { "epoch": 9.03, "grad_norm": 1.6460261344909668, "learning_rate": 2e-05, "loss": 0.05216154, "step": 4515 }, { "epoch": 9.032, "grad_norm": 1.4781848192214966, "learning_rate": 2e-05, "loss": 0.05158208, "step": 4516 }, { "epoch": 9.034, "grad_norm": 1.3276011943817139, "learning_rate": 2e-05, "loss": 0.04840251, "step": 4517 }, { "epoch": 9.036, "grad_norm": 1.4326094388961792, "learning_rate": 2e-05, "loss": 0.06191067, "step": 4518 }, { "epoch": 9.038, "grad_norm": 1.330862045288086, "learning_rate": 2e-05, "loss": 0.05839656, "step": 4519 }, { "epoch": 9.04, "grad_norm": 2.0986196994781494, "learning_rate": 2e-05, "loss": 0.06784381, "step": 4520 }, { "epoch": 9.042, "grad_norm": 2.1825203895568848, "learning_rate": 2e-05, "loss": 0.04452966, "step": 4521 }, { "epoch": 9.044, "grad_norm": 1.90194571018219, "learning_rate": 2e-05, "loss": 0.06857677, "step": 4522 }, { "epoch": 9.046, "grad_norm": 2.0310020446777344, "learning_rate": 2e-05, "loss": 0.04652988, "step": 4523 }, { "epoch": 9.048, "grad_norm": 1.5352383852005005, "learning_rate": 2e-05, "loss": 0.04297383, "step": 4524 }, { "epoch": 9.05, "grad_norm": 2.4602177143096924, "learning_rate": 2e-05, "loss": 0.04376163, "step": 4525 }, { "epoch": 9.052, "grad_norm": 1.6497063636779785, "learning_rate": 2e-05, "loss": 0.0518771, "step": 4526 }, { "epoch": 9.054, "grad_norm": 1.3094106912612915, "learning_rate": 2e-05, "loss": 0.04263393, "step": 4527 }, { "epoch": 9.056, "grad_norm": 2.1495842933654785, "learning_rate": 2e-05, "loss": 0.06488663, "step": 4528 }, { "epoch": 9.058, "grad_norm": 1.4335927963256836, "learning_rate": 2e-05, "loss": 0.04027612, "step": 4529 }, { "epoch": 9.06, "grad_norm": 1.6587024927139282, "learning_rate": 2e-05, "loss": 0.06693022, "step": 4530 }, { "epoch": 9.062, "grad_norm": 3.3605124950408936, "learning_rate": 2e-05, "loss": 0.07780167, "step": 4531 }, { "epoch": 9.064, "grad_norm": 1.3746273517608643, "learning_rate": 2e-05, "loss": 0.06263963, "step": 4532 }, { "epoch": 9.066, "grad_norm": 1.8702830076217651, "learning_rate": 2e-05, "loss": 0.04445141, "step": 4533 }, { "epoch": 9.068, "grad_norm": 1.272550344467163, "learning_rate": 2e-05, "loss": 0.04481889, "step": 4534 }, { "epoch": 9.07, "grad_norm": 1.7748891115188599, "learning_rate": 2e-05, "loss": 0.04557388, "step": 4535 }, { "epoch": 9.072, "grad_norm": 6.206629276275635, "learning_rate": 2e-05, "loss": 0.07212655, "step": 4536 }, { "epoch": 9.074, "grad_norm": 1.981056809425354, "learning_rate": 2e-05, "loss": 0.05645072, "step": 4537 }, { "epoch": 9.076, "grad_norm": 2.8909831047058105, "learning_rate": 2e-05, "loss": 0.05474135, "step": 4538 }, { "epoch": 9.078, "grad_norm": 1.981854796409607, "learning_rate": 2e-05, "loss": 0.05070494, "step": 4539 }, { "epoch": 9.08, "grad_norm": 1.315761923789978, "learning_rate": 2e-05, "loss": 0.04502026, "step": 4540 }, { "epoch": 9.082, "grad_norm": 1.2324833869934082, "learning_rate": 2e-05, "loss": 0.04134127, "step": 4541 }, { "epoch": 9.084, "grad_norm": 2.135284185409546, "learning_rate": 2e-05, "loss": 0.05175925, "step": 4542 }, { "epoch": 9.086, "grad_norm": 4.033361434936523, "learning_rate": 2e-05, "loss": 0.07532454, "step": 4543 }, { "epoch": 9.088, "grad_norm": 1.9662163257598877, "learning_rate": 2e-05, "loss": 0.05289062, "step": 4544 }, { "epoch": 9.09, "grad_norm": 1.4949467182159424, "learning_rate": 2e-05, "loss": 0.05135939, "step": 4545 }, { "epoch": 9.092, "grad_norm": 1.5621041059494019, "learning_rate": 2e-05, "loss": 0.05257202, "step": 4546 }, { "epoch": 9.094, "grad_norm": 2.7926948070526123, "learning_rate": 2e-05, "loss": 0.05770051, "step": 4547 }, { "epoch": 9.096, "grad_norm": 1.2026070356369019, "learning_rate": 2e-05, "loss": 0.03686951, "step": 4548 }, { "epoch": 9.098, "grad_norm": 1.5503953695297241, "learning_rate": 2e-05, "loss": 0.06764029, "step": 4549 }, { "epoch": 9.1, "grad_norm": 1.4695518016815186, "learning_rate": 2e-05, "loss": 0.040582, "step": 4550 }, { "epoch": 9.102, "grad_norm": 1.5409952402114868, "learning_rate": 2e-05, "loss": 0.04492978, "step": 4551 }, { "epoch": 9.104, "grad_norm": 1.1860859394073486, "learning_rate": 2e-05, "loss": 0.03985904, "step": 4552 }, { "epoch": 9.106, "grad_norm": 1.1103501319885254, "learning_rate": 2e-05, "loss": 0.03564502, "step": 4553 }, { "epoch": 9.108, "grad_norm": 3.4660491943359375, "learning_rate": 2e-05, "loss": 0.07822317, "step": 4554 }, { "epoch": 9.11, "grad_norm": 2.640744686126709, "learning_rate": 2e-05, "loss": 0.05545685, "step": 4555 }, { "epoch": 9.112, "grad_norm": 2.2998898029327393, "learning_rate": 2e-05, "loss": 0.06051971, "step": 4556 }, { "epoch": 9.114, "grad_norm": 2.3359665870666504, "learning_rate": 2e-05, "loss": 0.06999737, "step": 4557 }, { "epoch": 9.116, "grad_norm": 1.607606291770935, "learning_rate": 2e-05, "loss": 0.05824485, "step": 4558 }, { "epoch": 9.118, "grad_norm": 1.0786031484603882, "learning_rate": 2e-05, "loss": 0.04007483, "step": 4559 }, { "epoch": 9.12, "grad_norm": 2.296783924102783, "learning_rate": 2e-05, "loss": 0.04448222, "step": 4560 }, { "epoch": 9.122, "grad_norm": 1.5085766315460205, "learning_rate": 2e-05, "loss": 0.03726997, "step": 4561 }, { "epoch": 9.124, "grad_norm": 2.368339776992798, "learning_rate": 2e-05, "loss": 0.05795577, "step": 4562 }, { "epoch": 9.126, "grad_norm": 1.6994584798812866, "learning_rate": 2e-05, "loss": 0.04269854, "step": 4563 }, { "epoch": 9.128, "grad_norm": 1.7187951803207397, "learning_rate": 2e-05, "loss": 0.05323853, "step": 4564 }, { "epoch": 9.13, "grad_norm": 2.053581953048706, "learning_rate": 2e-05, "loss": 0.0678404, "step": 4565 }, { "epoch": 9.132, "grad_norm": 1.663622260093689, "learning_rate": 2e-05, "loss": 0.04938076, "step": 4566 }, { "epoch": 9.134, "grad_norm": 2.431302785873413, "learning_rate": 2e-05, "loss": 0.08686221, "step": 4567 }, { "epoch": 9.136, "grad_norm": 1.7747234106063843, "learning_rate": 2e-05, "loss": 0.06363907, "step": 4568 }, { "epoch": 9.138, "grad_norm": 2.0806329250335693, "learning_rate": 2e-05, "loss": 0.06777623, "step": 4569 }, { "epoch": 9.14, "grad_norm": 1.9430458545684814, "learning_rate": 2e-05, "loss": 0.05345959, "step": 4570 }, { "epoch": 9.142, "grad_norm": 1.6341060400009155, "learning_rate": 2e-05, "loss": 0.06348822, "step": 4571 }, { "epoch": 9.144, "grad_norm": 2.909865140914917, "learning_rate": 2e-05, "loss": 0.06166807, "step": 4572 }, { "epoch": 9.146, "grad_norm": 2.0202066898345947, "learning_rate": 2e-05, "loss": 0.06222503, "step": 4573 }, { "epoch": 9.148, "grad_norm": 1.8997862339019775, "learning_rate": 2e-05, "loss": 0.04913793, "step": 4574 }, { "epoch": 9.15, "grad_norm": 1.336957573890686, "learning_rate": 2e-05, "loss": 0.07197899, "step": 4575 }, { "epoch": 9.152, "grad_norm": 1.426227331161499, "learning_rate": 2e-05, "loss": 0.05871036, "step": 4576 }, { "epoch": 9.154, "grad_norm": 1.507077693939209, "learning_rate": 2e-05, "loss": 0.04596441, "step": 4577 }, { "epoch": 9.156, "grad_norm": 1.3802088499069214, "learning_rate": 2e-05, "loss": 0.04962367, "step": 4578 }, { "epoch": 9.158, "grad_norm": 1.2211660146713257, "learning_rate": 2e-05, "loss": 0.04041466, "step": 4579 }, { "epoch": 9.16, "grad_norm": 1.5546852350234985, "learning_rate": 2e-05, "loss": 0.06333128, "step": 4580 }, { "epoch": 9.162, "grad_norm": 1.5927609205245972, "learning_rate": 2e-05, "loss": 0.05327647, "step": 4581 }, { "epoch": 9.164, "grad_norm": 1.427751898765564, "learning_rate": 2e-05, "loss": 0.04664875, "step": 4582 }, { "epoch": 9.166, "grad_norm": 1.6107282638549805, "learning_rate": 2e-05, "loss": 0.06387123, "step": 4583 }, { "epoch": 9.168, "grad_norm": 2.082274913787842, "learning_rate": 2e-05, "loss": 0.07043574, "step": 4584 }, { "epoch": 9.17, "grad_norm": 1.6113282442092896, "learning_rate": 2e-05, "loss": 0.05279049, "step": 4585 }, { "epoch": 9.172, "grad_norm": 1.2764414548873901, "learning_rate": 2e-05, "loss": 0.05512537, "step": 4586 }, { "epoch": 9.174, "grad_norm": 1.731431007385254, "learning_rate": 2e-05, "loss": 0.05489038, "step": 4587 }, { "epoch": 9.176, "grad_norm": 2.2795941829681396, "learning_rate": 2e-05, "loss": 0.05099697, "step": 4588 }, { "epoch": 9.178, "grad_norm": 1.9231106042861938, "learning_rate": 2e-05, "loss": 0.0609756, "step": 4589 }, { "epoch": 9.18, "grad_norm": 1.3888475894927979, "learning_rate": 2e-05, "loss": 0.04252925, "step": 4590 }, { "epoch": 9.182, "grad_norm": 1.7997218370437622, "learning_rate": 2e-05, "loss": 0.0603526, "step": 4591 }, { "epoch": 9.184, "grad_norm": 2.605591297149658, "learning_rate": 2e-05, "loss": 0.05147631, "step": 4592 }, { "epoch": 9.186, "grad_norm": 1.1323707103729248, "learning_rate": 2e-05, "loss": 0.03506859, "step": 4593 }, { "epoch": 9.188, "grad_norm": 1.9055380821228027, "learning_rate": 2e-05, "loss": 0.05622992, "step": 4594 }, { "epoch": 9.19, "grad_norm": 1.66822350025177, "learning_rate": 2e-05, "loss": 0.047398, "step": 4595 }, { "epoch": 9.192, "grad_norm": 1.536523461341858, "learning_rate": 2e-05, "loss": 0.05335706, "step": 4596 }, { "epoch": 9.194, "grad_norm": 2.4968795776367188, "learning_rate": 2e-05, "loss": 0.04762807, "step": 4597 }, { "epoch": 9.196, "grad_norm": 1.6647475957870483, "learning_rate": 2e-05, "loss": 0.06161051, "step": 4598 }, { "epoch": 9.198, "grad_norm": 2.187264919281006, "learning_rate": 2e-05, "loss": 0.08325572, "step": 4599 }, { "epoch": 9.2, "grad_norm": 1.3294416666030884, "learning_rate": 2e-05, "loss": 0.05907375, "step": 4600 }, { "epoch": 9.202, "grad_norm": 1.3682829141616821, "learning_rate": 2e-05, "loss": 0.0561332, "step": 4601 }, { "epoch": 9.204, "grad_norm": 3.3193435668945312, "learning_rate": 2e-05, "loss": 0.06492063, "step": 4602 }, { "epoch": 9.206, "grad_norm": 2.0114078521728516, "learning_rate": 2e-05, "loss": 0.06080581, "step": 4603 }, { "epoch": 9.208, "grad_norm": 1.9517841339111328, "learning_rate": 2e-05, "loss": 0.06941114, "step": 4604 }, { "epoch": 9.21, "grad_norm": 1.9020707607269287, "learning_rate": 2e-05, "loss": 0.06470014, "step": 4605 }, { "epoch": 9.212, "grad_norm": 1.7281121015548706, "learning_rate": 2e-05, "loss": 0.05396245, "step": 4606 }, { "epoch": 9.214, "grad_norm": 3.0623199939727783, "learning_rate": 2e-05, "loss": 0.08490207, "step": 4607 }, { "epoch": 9.216, "grad_norm": 2.61993670463562, "learning_rate": 2e-05, "loss": 0.06834012, "step": 4608 }, { "epoch": 9.218, "grad_norm": 1.3250088691711426, "learning_rate": 2e-05, "loss": 0.04134344, "step": 4609 }, { "epoch": 9.22, "grad_norm": 2.761884927749634, "learning_rate": 2e-05, "loss": 0.06493714, "step": 4610 }, { "epoch": 9.222, "grad_norm": 1.54371178150177, "learning_rate": 2e-05, "loss": 0.05743696, "step": 4611 }, { "epoch": 9.224, "grad_norm": 5.61605167388916, "learning_rate": 2e-05, "loss": 0.06084044, "step": 4612 }, { "epoch": 9.226, "grad_norm": 1.3761494159698486, "learning_rate": 2e-05, "loss": 0.0450317, "step": 4613 }, { "epoch": 9.228, "grad_norm": 1.8002984523773193, "learning_rate": 2e-05, "loss": 0.0667226, "step": 4614 }, { "epoch": 9.23, "grad_norm": 1.532179355621338, "learning_rate": 2e-05, "loss": 0.0425299, "step": 4615 }, { "epoch": 9.232, "grad_norm": 1.6311419010162354, "learning_rate": 2e-05, "loss": 0.04938279, "step": 4616 }, { "epoch": 9.234, "grad_norm": 1.4770641326904297, "learning_rate": 2e-05, "loss": 0.06095012, "step": 4617 }, { "epoch": 9.236, "grad_norm": 1.8100465536117554, "learning_rate": 2e-05, "loss": 0.06146353, "step": 4618 }, { "epoch": 9.238, "grad_norm": 2.3167026042938232, "learning_rate": 2e-05, "loss": 0.05474924, "step": 4619 }, { "epoch": 9.24, "grad_norm": 1.2826061248779297, "learning_rate": 2e-05, "loss": 0.04267112, "step": 4620 }, { "epoch": 9.242, "grad_norm": 1.6880052089691162, "learning_rate": 2e-05, "loss": 0.05456819, "step": 4621 }, { "epoch": 9.244, "grad_norm": 1.711029052734375, "learning_rate": 2e-05, "loss": 0.0621344, "step": 4622 }, { "epoch": 9.246, "grad_norm": 2.999143600463867, "learning_rate": 2e-05, "loss": 0.08942266, "step": 4623 }, { "epoch": 9.248, "grad_norm": 3.74527645111084, "learning_rate": 2e-05, "loss": 0.05644033, "step": 4624 }, { "epoch": 9.25, "grad_norm": 1.9168704748153687, "learning_rate": 2e-05, "loss": 0.03920052, "step": 4625 }, { "epoch": 9.252, "grad_norm": 1.4764798879623413, "learning_rate": 2e-05, "loss": 0.06123003, "step": 4626 }, { "epoch": 9.254, "grad_norm": 1.1448794603347778, "learning_rate": 2e-05, "loss": 0.04291178, "step": 4627 }, { "epoch": 9.256, "grad_norm": 1.2735017538070679, "learning_rate": 2e-05, "loss": 0.0421702, "step": 4628 }, { "epoch": 9.258, "grad_norm": 1.3639416694641113, "learning_rate": 2e-05, "loss": 0.04493828, "step": 4629 }, { "epoch": 9.26, "grad_norm": 1.3818625211715698, "learning_rate": 2e-05, "loss": 0.04133046, "step": 4630 }, { "epoch": 9.262, "grad_norm": 1.436888575553894, "learning_rate": 2e-05, "loss": 0.05594487, "step": 4631 }, { "epoch": 9.264, "grad_norm": 1.6045076847076416, "learning_rate": 2e-05, "loss": 0.07328442, "step": 4632 }, { "epoch": 9.266, "grad_norm": 1.4170843362808228, "learning_rate": 2e-05, "loss": 0.04571041, "step": 4633 }, { "epoch": 9.268, "grad_norm": 1.7131657600402832, "learning_rate": 2e-05, "loss": 0.06080949, "step": 4634 }, { "epoch": 9.27, "grad_norm": 1.8091500997543335, "learning_rate": 2e-05, "loss": 0.0525728, "step": 4635 }, { "epoch": 9.272, "grad_norm": 1.8526278734207153, "learning_rate": 2e-05, "loss": 0.06190364, "step": 4636 }, { "epoch": 9.274000000000001, "grad_norm": 1.4877046346664429, "learning_rate": 2e-05, "loss": 0.06286225, "step": 4637 }, { "epoch": 9.276, "grad_norm": 1.772066354751587, "learning_rate": 2e-05, "loss": 0.04873686, "step": 4638 }, { "epoch": 9.278, "grad_norm": 2.045987606048584, "learning_rate": 2e-05, "loss": 0.06090255, "step": 4639 }, { "epoch": 9.28, "grad_norm": 1.1641405820846558, "learning_rate": 2e-05, "loss": 0.03027676, "step": 4640 }, { "epoch": 9.282, "grad_norm": 1.6448951959609985, "learning_rate": 2e-05, "loss": 0.0530159, "step": 4641 }, { "epoch": 9.284, "grad_norm": 1.8884637355804443, "learning_rate": 2e-05, "loss": 0.0639272, "step": 4642 }, { "epoch": 9.286, "grad_norm": 1.355390191078186, "learning_rate": 2e-05, "loss": 0.04228202, "step": 4643 }, { "epoch": 9.288, "grad_norm": 2.2434418201446533, "learning_rate": 2e-05, "loss": 0.07046621, "step": 4644 }, { "epoch": 9.29, "grad_norm": 2.844688892364502, "learning_rate": 2e-05, "loss": 0.08373692, "step": 4645 }, { "epoch": 9.292, "grad_norm": 1.803123950958252, "learning_rate": 2e-05, "loss": 0.05633461, "step": 4646 }, { "epoch": 9.294, "grad_norm": 2.8948237895965576, "learning_rate": 2e-05, "loss": 0.05847919, "step": 4647 }, { "epoch": 9.296, "grad_norm": 2.4830639362335205, "learning_rate": 2e-05, "loss": 0.05488868, "step": 4648 }, { "epoch": 9.298, "grad_norm": 1.4770337343215942, "learning_rate": 2e-05, "loss": 0.04579071, "step": 4649 }, { "epoch": 9.3, "grad_norm": 2.6344194412231445, "learning_rate": 2e-05, "loss": 0.05781864, "step": 4650 }, { "epoch": 9.302, "grad_norm": 1.8816792964935303, "learning_rate": 2e-05, "loss": 0.05238046, "step": 4651 }, { "epoch": 9.304, "grad_norm": 1.3099342584609985, "learning_rate": 2e-05, "loss": 0.04636721, "step": 4652 }, { "epoch": 9.306, "grad_norm": 2.569141149520874, "learning_rate": 2e-05, "loss": 0.05769484, "step": 4653 }, { "epoch": 9.308, "grad_norm": 1.5715465545654297, "learning_rate": 2e-05, "loss": 0.05822673, "step": 4654 }, { "epoch": 9.31, "grad_norm": 2.693352222442627, "learning_rate": 2e-05, "loss": 0.08047836, "step": 4655 }, { "epoch": 9.312, "grad_norm": 2.2986159324645996, "learning_rate": 2e-05, "loss": 0.06082643, "step": 4656 }, { "epoch": 9.314, "grad_norm": 1.2851147651672363, "learning_rate": 2e-05, "loss": 0.04421428, "step": 4657 }, { "epoch": 9.316, "grad_norm": 2.6191282272338867, "learning_rate": 2e-05, "loss": 0.05954355, "step": 4658 }, { "epoch": 9.318, "grad_norm": 1.7637144327163696, "learning_rate": 2e-05, "loss": 0.06339554, "step": 4659 }, { "epoch": 9.32, "grad_norm": 2.729525089263916, "learning_rate": 2e-05, "loss": 0.07117513, "step": 4660 }, { "epoch": 9.322, "grad_norm": 1.4302338361740112, "learning_rate": 2e-05, "loss": 0.04973509, "step": 4661 }, { "epoch": 9.324, "grad_norm": 1.4821285009384155, "learning_rate": 2e-05, "loss": 0.05108538, "step": 4662 }, { "epoch": 9.326, "grad_norm": 1.5328965187072754, "learning_rate": 2e-05, "loss": 0.05789268, "step": 4663 }, { "epoch": 9.328, "grad_norm": 1.9559437036514282, "learning_rate": 2e-05, "loss": 0.06492808, "step": 4664 }, { "epoch": 9.33, "grad_norm": 1.2809813022613525, "learning_rate": 2e-05, "loss": 0.04128128, "step": 4665 }, { "epoch": 9.332, "grad_norm": 3.450357675552368, "learning_rate": 2e-05, "loss": 0.05096133, "step": 4666 }, { "epoch": 9.334, "grad_norm": 2.293715238571167, "learning_rate": 2e-05, "loss": 0.03788307, "step": 4667 }, { "epoch": 9.336, "grad_norm": 1.2826162576675415, "learning_rate": 2e-05, "loss": 0.05124827, "step": 4668 }, { "epoch": 9.338, "grad_norm": 1.501015305519104, "learning_rate": 2e-05, "loss": 0.04789577, "step": 4669 }, { "epoch": 9.34, "grad_norm": 2.1823089122772217, "learning_rate": 2e-05, "loss": 0.06318521, "step": 4670 }, { "epoch": 9.342, "grad_norm": 1.5042678117752075, "learning_rate": 2e-05, "loss": 0.0364994, "step": 4671 }, { "epoch": 9.344, "grad_norm": 1.1942931413650513, "learning_rate": 2e-05, "loss": 0.04444024, "step": 4672 }, { "epoch": 9.346, "grad_norm": 1.6051136255264282, "learning_rate": 2e-05, "loss": 0.06005847, "step": 4673 }, { "epoch": 9.348, "grad_norm": 1.2597368955612183, "learning_rate": 2e-05, "loss": 0.04411948, "step": 4674 }, { "epoch": 9.35, "grad_norm": 1.3099509477615356, "learning_rate": 2e-05, "loss": 0.05771332, "step": 4675 }, { "epoch": 9.352, "grad_norm": 2.081918954849243, "learning_rate": 2e-05, "loss": 0.04082836, "step": 4676 }, { "epoch": 9.354, "grad_norm": 1.5217939615249634, "learning_rate": 2e-05, "loss": 0.05541312, "step": 4677 }, { "epoch": 9.356, "grad_norm": 1.5885696411132812, "learning_rate": 2e-05, "loss": 0.06058114, "step": 4678 }, { "epoch": 9.358, "grad_norm": 1.2582283020019531, "learning_rate": 2e-05, "loss": 0.02798801, "step": 4679 }, { "epoch": 9.36, "grad_norm": 1.9092825651168823, "learning_rate": 2e-05, "loss": 0.06019748, "step": 4680 }, { "epoch": 9.362, "grad_norm": 3.1406259536743164, "learning_rate": 2e-05, "loss": 0.04014409, "step": 4681 }, { "epoch": 9.364, "grad_norm": 1.404429316520691, "learning_rate": 2e-05, "loss": 0.04144133, "step": 4682 }, { "epoch": 9.366, "grad_norm": 3.4135115146636963, "learning_rate": 2e-05, "loss": 0.06071433, "step": 4683 }, { "epoch": 9.368, "grad_norm": 2.7143144607543945, "learning_rate": 2e-05, "loss": 0.06220663, "step": 4684 }, { "epoch": 9.37, "grad_norm": 1.7465615272521973, "learning_rate": 2e-05, "loss": 0.05392168, "step": 4685 }, { "epoch": 9.372, "grad_norm": 2.8406643867492676, "learning_rate": 2e-05, "loss": 0.09703434, "step": 4686 }, { "epoch": 9.374, "grad_norm": 1.889456868171692, "learning_rate": 2e-05, "loss": 0.04023182, "step": 4687 }, { "epoch": 9.376, "grad_norm": 1.769338846206665, "learning_rate": 2e-05, "loss": 0.04535776, "step": 4688 }, { "epoch": 9.378, "grad_norm": 1.3787952661514282, "learning_rate": 2e-05, "loss": 0.05142323, "step": 4689 }, { "epoch": 9.38, "grad_norm": 1.353203296661377, "learning_rate": 2e-05, "loss": 0.04485345, "step": 4690 }, { "epoch": 9.382, "grad_norm": 1.3988724946975708, "learning_rate": 2e-05, "loss": 0.03410086, "step": 4691 }, { "epoch": 9.384, "grad_norm": 1.2784860134124756, "learning_rate": 2e-05, "loss": 0.05209333, "step": 4692 }, { "epoch": 9.386, "grad_norm": 1.9616296291351318, "learning_rate": 2e-05, "loss": 0.0617491, "step": 4693 }, { "epoch": 9.388, "grad_norm": 2.1018028259277344, "learning_rate": 2e-05, "loss": 0.06020827, "step": 4694 }, { "epoch": 9.39, "grad_norm": 1.3357975482940674, "learning_rate": 2e-05, "loss": 0.03462115, "step": 4695 }, { "epoch": 9.392, "grad_norm": 2.2010691165924072, "learning_rate": 2e-05, "loss": 0.05162614, "step": 4696 }, { "epoch": 9.394, "grad_norm": 1.4281353950500488, "learning_rate": 2e-05, "loss": 0.04901909, "step": 4697 }, { "epoch": 9.396, "grad_norm": 1.978698492050171, "learning_rate": 2e-05, "loss": 0.06718802, "step": 4698 }, { "epoch": 9.398, "grad_norm": 1.465959906578064, "learning_rate": 2e-05, "loss": 0.03534799, "step": 4699 }, { "epoch": 9.4, "grad_norm": 1.7106237411499023, "learning_rate": 2e-05, "loss": 0.06189927, "step": 4700 }, { "epoch": 9.402, "grad_norm": 1.349387526512146, "learning_rate": 2e-05, "loss": 0.04897711, "step": 4701 }, { "epoch": 9.404, "grad_norm": 1.9171662330627441, "learning_rate": 2e-05, "loss": 0.04699158, "step": 4702 }, { "epoch": 9.406, "grad_norm": 2.517526388168335, "learning_rate": 2e-05, "loss": 0.07169571, "step": 4703 }, { "epoch": 9.408, "grad_norm": 1.851466417312622, "learning_rate": 2e-05, "loss": 0.04784168, "step": 4704 }, { "epoch": 9.41, "grad_norm": 2.378621816635132, "learning_rate": 2e-05, "loss": 0.04365069, "step": 4705 }, { "epoch": 9.412, "grad_norm": 1.615919828414917, "learning_rate": 2e-05, "loss": 0.05020372, "step": 4706 }, { "epoch": 9.414, "grad_norm": 1.691772222518921, "learning_rate": 2e-05, "loss": 0.04145588, "step": 4707 }, { "epoch": 9.416, "grad_norm": 3.0600521564483643, "learning_rate": 2e-05, "loss": 0.04704102, "step": 4708 }, { "epoch": 9.418, "grad_norm": 1.2809059619903564, "learning_rate": 2e-05, "loss": 0.04573686, "step": 4709 }, { "epoch": 9.42, "grad_norm": 1.2681528329849243, "learning_rate": 2e-05, "loss": 0.04140832, "step": 4710 }, { "epoch": 9.422, "grad_norm": 1.5011893510818481, "learning_rate": 2e-05, "loss": 0.04901879, "step": 4711 }, { "epoch": 9.424, "grad_norm": 1.7330307960510254, "learning_rate": 2e-05, "loss": 0.05393284, "step": 4712 }, { "epoch": 9.426, "grad_norm": 1.1501388549804688, "learning_rate": 2e-05, "loss": 0.04429302, "step": 4713 }, { "epoch": 9.428, "grad_norm": 1.5243293046951294, "learning_rate": 2e-05, "loss": 0.06208751, "step": 4714 }, { "epoch": 9.43, "grad_norm": 1.6041468381881714, "learning_rate": 2e-05, "loss": 0.05365137, "step": 4715 }, { "epoch": 9.432, "grad_norm": 1.411880612373352, "learning_rate": 2e-05, "loss": 0.04755591, "step": 4716 }, { "epoch": 9.434, "grad_norm": 1.7034417390823364, "learning_rate": 2e-05, "loss": 0.05169332, "step": 4717 }, { "epoch": 9.436, "grad_norm": 1.398370623588562, "learning_rate": 2e-05, "loss": 0.04505457, "step": 4718 }, { "epoch": 9.438, "grad_norm": 1.618485927581787, "learning_rate": 2e-05, "loss": 0.05048877, "step": 4719 }, { "epoch": 9.44, "grad_norm": 2.2976009845733643, "learning_rate": 2e-05, "loss": 0.05732608, "step": 4720 }, { "epoch": 9.442, "grad_norm": 1.4977824687957764, "learning_rate": 2e-05, "loss": 0.05055287, "step": 4721 }, { "epoch": 9.444, "grad_norm": 2.0112006664276123, "learning_rate": 2e-05, "loss": 0.05900105, "step": 4722 }, { "epoch": 9.446, "grad_norm": 1.2168583869934082, "learning_rate": 2e-05, "loss": 0.04659557, "step": 4723 }, { "epoch": 9.448, "grad_norm": 1.8147743940353394, "learning_rate": 2e-05, "loss": 0.04836474, "step": 4724 }, { "epoch": 9.45, "grad_norm": 2.1160666942596436, "learning_rate": 2e-05, "loss": 0.06309254, "step": 4725 }, { "epoch": 9.452, "grad_norm": 1.0232397317886353, "learning_rate": 2e-05, "loss": 0.03620723, "step": 4726 }, { "epoch": 9.454, "grad_norm": 1.4367437362670898, "learning_rate": 2e-05, "loss": 0.04998949, "step": 4727 }, { "epoch": 9.456, "grad_norm": 1.9640501737594604, "learning_rate": 2e-05, "loss": 0.07069734, "step": 4728 }, { "epoch": 9.458, "grad_norm": 4.114424228668213, "learning_rate": 2e-05, "loss": 0.09708965, "step": 4729 }, { "epoch": 9.46, "grad_norm": 1.8628021478652954, "learning_rate": 2e-05, "loss": 0.0480114, "step": 4730 }, { "epoch": 9.462, "grad_norm": 1.896959662437439, "learning_rate": 2e-05, "loss": 0.057604, "step": 4731 }, { "epoch": 9.464, "grad_norm": 1.6538161039352417, "learning_rate": 2e-05, "loss": 0.05080595, "step": 4732 }, { "epoch": 9.466, "grad_norm": 1.931830883026123, "learning_rate": 2e-05, "loss": 0.053108, "step": 4733 }, { "epoch": 9.468, "grad_norm": 1.452933430671692, "learning_rate": 2e-05, "loss": 0.041306, "step": 4734 }, { "epoch": 9.47, "grad_norm": 1.8941861391067505, "learning_rate": 2e-05, "loss": 0.05748565, "step": 4735 }, { "epoch": 9.472, "grad_norm": 1.3065882921218872, "learning_rate": 2e-05, "loss": 0.05725362, "step": 4736 }, { "epoch": 9.474, "grad_norm": 1.3110909461975098, "learning_rate": 2e-05, "loss": 0.04384652, "step": 4737 }, { "epoch": 9.475999999999999, "grad_norm": 1.1904391050338745, "learning_rate": 2e-05, "loss": 0.0409494, "step": 4738 }, { "epoch": 9.478, "grad_norm": 1.359976053237915, "learning_rate": 2e-05, "loss": 0.05123883, "step": 4739 }, { "epoch": 9.48, "grad_norm": 1.628815770149231, "learning_rate": 2e-05, "loss": 0.0685708, "step": 4740 }, { "epoch": 9.482, "grad_norm": 1.5649138689041138, "learning_rate": 2e-05, "loss": 0.0603338, "step": 4741 }, { "epoch": 9.484, "grad_norm": 1.9411360025405884, "learning_rate": 2e-05, "loss": 0.05299631, "step": 4742 }, { "epoch": 9.486, "grad_norm": 2.5453522205352783, "learning_rate": 2e-05, "loss": 0.04924603, "step": 4743 }, { "epoch": 9.488, "grad_norm": 1.5162389278411865, "learning_rate": 2e-05, "loss": 0.04518431, "step": 4744 }, { "epoch": 9.49, "grad_norm": 1.0650982856750488, "learning_rate": 2e-05, "loss": 0.03788474, "step": 4745 }, { "epoch": 9.492, "grad_norm": 1.4776169061660767, "learning_rate": 2e-05, "loss": 0.04502506, "step": 4746 }, { "epoch": 9.494, "grad_norm": 4.540150165557861, "learning_rate": 2e-05, "loss": 0.06305524, "step": 4747 }, { "epoch": 9.496, "grad_norm": 1.8420084714889526, "learning_rate": 2e-05, "loss": 0.05973725, "step": 4748 }, { "epoch": 9.498, "grad_norm": 1.177133321762085, "learning_rate": 2e-05, "loss": 0.04210076, "step": 4749 }, { "epoch": 9.5, "grad_norm": 1.4064905643463135, "learning_rate": 2e-05, "loss": 0.04571437, "step": 4750 }, { "epoch": 9.502, "grad_norm": 1.7198388576507568, "learning_rate": 2e-05, "loss": 0.04831848, "step": 4751 }, { "epoch": 9.504, "grad_norm": 1.373218297958374, "learning_rate": 2e-05, "loss": 0.04200636, "step": 4752 }, { "epoch": 9.506, "grad_norm": 2.0869367122650146, "learning_rate": 2e-05, "loss": 0.05860278, "step": 4753 }, { "epoch": 9.508, "grad_norm": 2.03802227973938, "learning_rate": 2e-05, "loss": 0.04599741, "step": 4754 }, { "epoch": 9.51, "grad_norm": 2.718126058578491, "learning_rate": 2e-05, "loss": 0.05679199, "step": 4755 }, { "epoch": 9.512, "grad_norm": 2.1345672607421875, "learning_rate": 2e-05, "loss": 0.07050245, "step": 4756 }, { "epoch": 9.514, "grad_norm": 3.7088184356689453, "learning_rate": 2e-05, "loss": 0.07732576, "step": 4757 }, { "epoch": 9.516, "grad_norm": 1.6699403524398804, "learning_rate": 2e-05, "loss": 0.04383042, "step": 4758 }, { "epoch": 9.518, "grad_norm": 1.5457981824874878, "learning_rate": 2e-05, "loss": 0.03807002, "step": 4759 }, { "epoch": 9.52, "grad_norm": 1.9819368124008179, "learning_rate": 2e-05, "loss": 0.07731517, "step": 4760 }, { "epoch": 9.522, "grad_norm": 2.0579514503479004, "learning_rate": 2e-05, "loss": 0.06062069, "step": 4761 }, { "epoch": 9.524000000000001, "grad_norm": 1.8381377458572388, "learning_rate": 2e-05, "loss": 0.04691198, "step": 4762 }, { "epoch": 9.526, "grad_norm": 2.709359645843506, "learning_rate": 2e-05, "loss": 0.05731576, "step": 4763 }, { "epoch": 9.528, "grad_norm": 1.4428166151046753, "learning_rate": 2e-05, "loss": 0.04605105, "step": 4764 }, { "epoch": 9.53, "grad_norm": 1.5184147357940674, "learning_rate": 2e-05, "loss": 0.05696104, "step": 4765 }, { "epoch": 9.532, "grad_norm": 1.3034058809280396, "learning_rate": 2e-05, "loss": 0.03874236, "step": 4766 }, { "epoch": 9.534, "grad_norm": 1.7730201482772827, "learning_rate": 2e-05, "loss": 0.05022582, "step": 4767 }, { "epoch": 9.536, "grad_norm": 1.5613415241241455, "learning_rate": 2e-05, "loss": 0.06682578, "step": 4768 }, { "epoch": 9.538, "grad_norm": 1.9399430751800537, "learning_rate": 2e-05, "loss": 0.05712492, "step": 4769 }, { "epoch": 9.54, "grad_norm": 2.3643603324890137, "learning_rate": 2e-05, "loss": 0.0664971, "step": 4770 }, { "epoch": 9.542, "grad_norm": 1.1537812948226929, "learning_rate": 2e-05, "loss": 0.04907678, "step": 4771 }, { "epoch": 9.544, "grad_norm": 1.476564884185791, "learning_rate": 2e-05, "loss": 0.05081899, "step": 4772 }, { "epoch": 9.546, "grad_norm": 1.345429539680481, "learning_rate": 2e-05, "loss": 0.0485204, "step": 4773 }, { "epoch": 9.548, "grad_norm": 1.3945995569229126, "learning_rate": 2e-05, "loss": 0.05414914, "step": 4774 }, { "epoch": 9.55, "grad_norm": 1.2747801542282104, "learning_rate": 2e-05, "loss": 0.04904943, "step": 4775 }, { "epoch": 9.552, "grad_norm": 1.364349603652954, "learning_rate": 2e-05, "loss": 0.05541695, "step": 4776 }, { "epoch": 9.554, "grad_norm": 1.40326726436615, "learning_rate": 2e-05, "loss": 0.0485206, "step": 4777 }, { "epoch": 9.556000000000001, "grad_norm": 2.4218220710754395, "learning_rate": 2e-05, "loss": 0.05936872, "step": 4778 }, { "epoch": 9.558, "grad_norm": 1.5374068021774292, "learning_rate": 2e-05, "loss": 0.03438132, "step": 4779 }, { "epoch": 9.56, "grad_norm": 2.0976953506469727, "learning_rate": 2e-05, "loss": 0.04670816, "step": 4780 }, { "epoch": 9.562, "grad_norm": 1.6651930809020996, "learning_rate": 2e-05, "loss": 0.04262748, "step": 4781 }, { "epoch": 9.564, "grad_norm": 1.433442234992981, "learning_rate": 2e-05, "loss": 0.05769576, "step": 4782 }, { "epoch": 9.566, "grad_norm": 2.727137565612793, "learning_rate": 2e-05, "loss": 0.05369073, "step": 4783 }, { "epoch": 9.568, "grad_norm": 2.403437852859497, "learning_rate": 2e-05, "loss": 0.05518568, "step": 4784 }, { "epoch": 9.57, "grad_norm": 1.5393919944763184, "learning_rate": 2e-05, "loss": 0.05311519, "step": 4785 }, { "epoch": 9.572, "grad_norm": 1.650131344795227, "learning_rate": 2e-05, "loss": 0.05369199, "step": 4786 }, { "epoch": 9.574, "grad_norm": 1.6887848377227783, "learning_rate": 2e-05, "loss": 0.04836053, "step": 4787 }, { "epoch": 9.576, "grad_norm": 1.3404792547225952, "learning_rate": 2e-05, "loss": 0.05586416, "step": 4788 }, { "epoch": 9.578, "grad_norm": 1.5387517213821411, "learning_rate": 2e-05, "loss": 0.05821295, "step": 4789 }, { "epoch": 9.58, "grad_norm": 1.3446617126464844, "learning_rate": 2e-05, "loss": 0.04860961, "step": 4790 }, { "epoch": 9.582, "grad_norm": 1.9128057956695557, "learning_rate": 2e-05, "loss": 0.06139975, "step": 4791 }, { "epoch": 9.584, "grad_norm": 1.8623543977737427, "learning_rate": 2e-05, "loss": 0.04838707, "step": 4792 }, { "epoch": 9.586, "grad_norm": 2.0884079933166504, "learning_rate": 2e-05, "loss": 0.05497058, "step": 4793 }, { "epoch": 9.588, "grad_norm": 1.26129949092865, "learning_rate": 2e-05, "loss": 0.05032674, "step": 4794 }, { "epoch": 9.59, "grad_norm": 1.5007047653198242, "learning_rate": 2e-05, "loss": 0.0500331, "step": 4795 }, { "epoch": 9.592, "grad_norm": 2.1904873847961426, "learning_rate": 2e-05, "loss": 0.05238812, "step": 4796 }, { "epoch": 9.594, "grad_norm": 2.0764124393463135, "learning_rate": 2e-05, "loss": 0.05428065, "step": 4797 }, { "epoch": 9.596, "grad_norm": 1.8335858583450317, "learning_rate": 2e-05, "loss": 0.05815455, "step": 4798 }, { "epoch": 9.598, "grad_norm": 1.186977744102478, "learning_rate": 2e-05, "loss": 0.04371549, "step": 4799 }, { "epoch": 9.6, "grad_norm": 6.732235908508301, "learning_rate": 2e-05, "loss": 0.06942256, "step": 4800 }, { "epoch": 9.602, "grad_norm": 2.0716724395751953, "learning_rate": 2e-05, "loss": 0.0744697, "step": 4801 }, { "epoch": 9.604, "grad_norm": 1.558214783668518, "learning_rate": 2e-05, "loss": 0.06264573, "step": 4802 }, { "epoch": 9.606, "grad_norm": 4.206948280334473, "learning_rate": 2e-05, "loss": 0.055962, "step": 4803 }, { "epoch": 9.608, "grad_norm": 1.3140082359313965, "learning_rate": 2e-05, "loss": 0.04739345, "step": 4804 }, { "epoch": 9.61, "grad_norm": 1.58623468875885, "learning_rate": 2e-05, "loss": 0.0550376, "step": 4805 }, { "epoch": 9.612, "grad_norm": 1.9726959466934204, "learning_rate": 2e-05, "loss": 0.07566231, "step": 4806 }, { "epoch": 9.614, "grad_norm": 1.6119412183761597, "learning_rate": 2e-05, "loss": 0.05679318, "step": 4807 }, { "epoch": 9.616, "grad_norm": 1.3407459259033203, "learning_rate": 2e-05, "loss": 0.04322591, "step": 4808 }, { "epoch": 9.618, "grad_norm": 1.3803037405014038, "learning_rate": 2e-05, "loss": 0.04514508, "step": 4809 }, { "epoch": 9.62, "grad_norm": 1.5106648206710815, "learning_rate": 2e-05, "loss": 0.06446012, "step": 4810 }, { "epoch": 9.622, "grad_norm": 1.2000592947006226, "learning_rate": 2e-05, "loss": 0.04472655, "step": 4811 }, { "epoch": 9.624, "grad_norm": 3.254822015762329, "learning_rate": 2e-05, "loss": 0.07974194, "step": 4812 }, { "epoch": 9.626, "grad_norm": 1.7189629077911377, "learning_rate": 2e-05, "loss": 0.05568657, "step": 4813 }, { "epoch": 9.628, "grad_norm": 1.4042153358459473, "learning_rate": 2e-05, "loss": 0.04132761, "step": 4814 }, { "epoch": 9.63, "grad_norm": 2.279066562652588, "learning_rate": 2e-05, "loss": 0.04985248, "step": 4815 }, { "epoch": 9.632, "grad_norm": 1.4138849973678589, "learning_rate": 2e-05, "loss": 0.04461198, "step": 4816 }, { "epoch": 9.634, "grad_norm": 2.0648868083953857, "learning_rate": 2e-05, "loss": 0.04070833, "step": 4817 }, { "epoch": 9.636, "grad_norm": 3.6020028591156006, "learning_rate": 2e-05, "loss": 0.05208477, "step": 4818 }, { "epoch": 9.638, "grad_norm": 1.8781304359436035, "learning_rate": 2e-05, "loss": 0.05021086, "step": 4819 }, { "epoch": 9.64, "grad_norm": 1.0614352226257324, "learning_rate": 2e-05, "loss": 0.03605043, "step": 4820 }, { "epoch": 9.642, "grad_norm": 1.910072684288025, "learning_rate": 2e-05, "loss": 0.06666766, "step": 4821 }, { "epoch": 9.644, "grad_norm": 1.2410168647766113, "learning_rate": 2e-05, "loss": 0.04226237, "step": 4822 }, { "epoch": 9.646, "grad_norm": 1.6553242206573486, "learning_rate": 2e-05, "loss": 0.05368885, "step": 4823 }, { "epoch": 9.648, "grad_norm": 1.3068299293518066, "learning_rate": 2e-05, "loss": 0.03962949, "step": 4824 }, { "epoch": 9.65, "grad_norm": 2.591618537902832, "learning_rate": 2e-05, "loss": 0.06662943, "step": 4825 }, { "epoch": 9.652, "grad_norm": 2.577542781829834, "learning_rate": 2e-05, "loss": 0.06383407, "step": 4826 }, { "epoch": 9.654, "grad_norm": 1.6264952421188354, "learning_rate": 2e-05, "loss": 0.0523547, "step": 4827 }, { "epoch": 9.656, "grad_norm": 2.4551913738250732, "learning_rate": 2e-05, "loss": 0.04400642, "step": 4828 }, { "epoch": 9.658, "grad_norm": 3.1886560916900635, "learning_rate": 2e-05, "loss": 0.07130433, "step": 4829 }, { "epoch": 9.66, "grad_norm": 1.5255032777786255, "learning_rate": 2e-05, "loss": 0.04827879, "step": 4830 }, { "epoch": 9.662, "grad_norm": 2.4944541454315186, "learning_rate": 2e-05, "loss": 0.07604385, "step": 4831 }, { "epoch": 9.664, "grad_norm": 1.9444361925125122, "learning_rate": 2e-05, "loss": 0.04895295, "step": 4832 }, { "epoch": 9.666, "grad_norm": 1.206829309463501, "learning_rate": 2e-05, "loss": 0.04027706, "step": 4833 }, { "epoch": 9.668, "grad_norm": 1.0655403137207031, "learning_rate": 2e-05, "loss": 0.03361625, "step": 4834 }, { "epoch": 9.67, "grad_norm": 1.6680309772491455, "learning_rate": 2e-05, "loss": 0.05498701, "step": 4835 }, { "epoch": 9.672, "grad_norm": 1.6686928272247314, "learning_rate": 2e-05, "loss": 0.05405208, "step": 4836 }, { "epoch": 9.674, "grad_norm": 1.5973371267318726, "learning_rate": 2e-05, "loss": 0.03787687, "step": 4837 }, { "epoch": 9.676, "grad_norm": 1.5057041645050049, "learning_rate": 2e-05, "loss": 0.05292775, "step": 4838 }, { "epoch": 9.678, "grad_norm": 2.048788547515869, "learning_rate": 2e-05, "loss": 0.05188146, "step": 4839 }, { "epoch": 9.68, "grad_norm": 1.5165146589279175, "learning_rate": 2e-05, "loss": 0.05076149, "step": 4840 }, { "epoch": 9.682, "grad_norm": 1.3067723512649536, "learning_rate": 2e-05, "loss": 0.04745375, "step": 4841 }, { "epoch": 9.684, "grad_norm": 2.40364670753479, "learning_rate": 2e-05, "loss": 0.06933152, "step": 4842 }, { "epoch": 9.686, "grad_norm": 2.187824010848999, "learning_rate": 2e-05, "loss": 0.06234965, "step": 4843 }, { "epoch": 9.688, "grad_norm": 2.3751494884490967, "learning_rate": 2e-05, "loss": 0.05942409, "step": 4844 }, { "epoch": 9.69, "grad_norm": 1.3556272983551025, "learning_rate": 2e-05, "loss": 0.0271932, "step": 4845 }, { "epoch": 9.692, "grad_norm": 2.0702574253082275, "learning_rate": 2e-05, "loss": 0.04416346, "step": 4846 }, { "epoch": 9.693999999999999, "grad_norm": 2.776580572128296, "learning_rate": 2e-05, "loss": 0.06459181, "step": 4847 }, { "epoch": 9.696, "grad_norm": 1.423103928565979, "learning_rate": 2e-05, "loss": 0.04712578, "step": 4848 }, { "epoch": 9.698, "grad_norm": 1.3185054063796997, "learning_rate": 2e-05, "loss": 0.03745867, "step": 4849 }, { "epoch": 9.7, "grad_norm": 1.4054712057113647, "learning_rate": 2e-05, "loss": 0.05372046, "step": 4850 }, { "epoch": 9.702, "grad_norm": 1.5396513938903809, "learning_rate": 2e-05, "loss": 0.04799499, "step": 4851 }, { "epoch": 9.704, "grad_norm": 1.3242496252059937, "learning_rate": 2e-05, "loss": 0.04319259, "step": 4852 }, { "epoch": 9.706, "grad_norm": 1.6607216596603394, "learning_rate": 2e-05, "loss": 0.04303396, "step": 4853 }, { "epoch": 9.708, "grad_norm": 1.570757508277893, "learning_rate": 2e-05, "loss": 0.06974973, "step": 4854 }, { "epoch": 9.71, "grad_norm": 1.5978914499282837, "learning_rate": 2e-05, "loss": 0.05531528, "step": 4855 }, { "epoch": 9.712, "grad_norm": 2.2332658767700195, "learning_rate": 2e-05, "loss": 0.05066529, "step": 4856 }, { "epoch": 9.714, "grad_norm": 1.695459246635437, "learning_rate": 2e-05, "loss": 0.04276437, "step": 4857 }, { "epoch": 9.716, "grad_norm": 1.4969645738601685, "learning_rate": 2e-05, "loss": 0.0565182, "step": 4858 }, { "epoch": 9.718, "grad_norm": 2.46620512008667, "learning_rate": 2e-05, "loss": 0.04657774, "step": 4859 }, { "epoch": 9.72, "grad_norm": 2.0684173107147217, "learning_rate": 2e-05, "loss": 0.05510051, "step": 4860 }, { "epoch": 9.722, "grad_norm": 1.5146620273590088, "learning_rate": 2e-05, "loss": 0.04686711, "step": 4861 }, { "epoch": 9.724, "grad_norm": 2.6633617877960205, "learning_rate": 2e-05, "loss": 0.05086003, "step": 4862 }, { "epoch": 9.725999999999999, "grad_norm": 1.2176555395126343, "learning_rate": 2e-05, "loss": 0.0497752, "step": 4863 }, { "epoch": 9.728, "grad_norm": 1.2809525728225708, "learning_rate": 2e-05, "loss": 0.04838467, "step": 4864 }, { "epoch": 9.73, "grad_norm": 1.3932509422302246, "learning_rate": 2e-05, "loss": 0.0427762, "step": 4865 }, { "epoch": 9.732, "grad_norm": 3.854522466659546, "learning_rate": 2e-05, "loss": 0.0666661, "step": 4866 }, { "epoch": 9.734, "grad_norm": 1.537145972251892, "learning_rate": 2e-05, "loss": 0.03788938, "step": 4867 }, { "epoch": 9.736, "grad_norm": 2.0760679244995117, "learning_rate": 2e-05, "loss": 0.05522866, "step": 4868 }, { "epoch": 9.738, "grad_norm": 1.693951964378357, "learning_rate": 2e-05, "loss": 0.06659015, "step": 4869 }, { "epoch": 9.74, "grad_norm": 1.2150261402130127, "learning_rate": 2e-05, "loss": 0.04565568, "step": 4870 }, { "epoch": 9.742, "grad_norm": 1.8635746240615845, "learning_rate": 2e-05, "loss": 0.0635218, "step": 4871 }, { "epoch": 9.744, "grad_norm": 1.8649309873580933, "learning_rate": 2e-05, "loss": 0.06209478, "step": 4872 }, { "epoch": 9.746, "grad_norm": 1.2454828023910522, "learning_rate": 2e-05, "loss": 0.04050411, "step": 4873 }, { "epoch": 9.748, "grad_norm": 1.7118374109268188, "learning_rate": 2e-05, "loss": 0.0501698, "step": 4874 }, { "epoch": 9.75, "grad_norm": 1.9825464487075806, "learning_rate": 2e-05, "loss": 0.0551829, "step": 4875 }, { "epoch": 9.752, "grad_norm": 1.6307551860809326, "learning_rate": 2e-05, "loss": 0.06142005, "step": 4876 }, { "epoch": 9.754, "grad_norm": 2.2640645503997803, "learning_rate": 2e-05, "loss": 0.06031884, "step": 4877 }, { "epoch": 9.756, "grad_norm": 1.0484297275543213, "learning_rate": 2e-05, "loss": 0.05024759, "step": 4878 }, { "epoch": 9.758, "grad_norm": 1.469162940979004, "learning_rate": 2e-05, "loss": 0.04584752, "step": 4879 }, { "epoch": 9.76, "grad_norm": 1.5308761596679688, "learning_rate": 2e-05, "loss": 0.03732128, "step": 4880 }, { "epoch": 9.762, "grad_norm": 1.904474139213562, "learning_rate": 2e-05, "loss": 0.05384236, "step": 4881 }, { "epoch": 9.764, "grad_norm": 6.474231243133545, "learning_rate": 2e-05, "loss": 0.07180381, "step": 4882 }, { "epoch": 9.766, "grad_norm": 1.5289386510849, "learning_rate": 2e-05, "loss": 0.05518559, "step": 4883 }, { "epoch": 9.768, "grad_norm": 1.3522764444351196, "learning_rate": 2e-05, "loss": 0.05258585, "step": 4884 }, { "epoch": 9.77, "grad_norm": 1.4351115226745605, "learning_rate": 2e-05, "loss": 0.03858275, "step": 4885 }, { "epoch": 9.772, "grad_norm": 2.1494507789611816, "learning_rate": 2e-05, "loss": 0.05100446, "step": 4886 }, { "epoch": 9.774000000000001, "grad_norm": 2.7354249954223633, "learning_rate": 2e-05, "loss": 0.05425896, "step": 4887 }, { "epoch": 9.776, "grad_norm": 1.6631423234939575, "learning_rate": 2e-05, "loss": 0.04931759, "step": 4888 }, { "epoch": 9.778, "grad_norm": 2.6517434120178223, "learning_rate": 2e-05, "loss": 0.06315567, "step": 4889 }, { "epoch": 9.78, "grad_norm": 2.4273080825805664, "learning_rate": 2e-05, "loss": 0.07165055, "step": 4890 }, { "epoch": 9.782, "grad_norm": 1.5773106813430786, "learning_rate": 2e-05, "loss": 0.06337696, "step": 4891 }, { "epoch": 9.784, "grad_norm": 1.3168998956680298, "learning_rate": 2e-05, "loss": 0.04947937, "step": 4892 }, { "epoch": 9.786, "grad_norm": 1.380509614944458, "learning_rate": 2e-05, "loss": 0.04712594, "step": 4893 }, { "epoch": 9.788, "grad_norm": 1.479285717010498, "learning_rate": 2e-05, "loss": 0.04698952, "step": 4894 }, { "epoch": 9.79, "grad_norm": 1.8223832845687866, "learning_rate": 2e-05, "loss": 0.04973854, "step": 4895 }, { "epoch": 9.792, "grad_norm": 1.8875789642333984, "learning_rate": 2e-05, "loss": 0.05310801, "step": 4896 }, { "epoch": 9.794, "grad_norm": 2.261950969696045, "learning_rate": 2e-05, "loss": 0.05974211, "step": 4897 }, { "epoch": 9.796, "grad_norm": 1.628933072090149, "learning_rate": 2e-05, "loss": 0.04541996, "step": 4898 }, { "epoch": 9.798, "grad_norm": 2.749540328979492, "learning_rate": 2e-05, "loss": 0.06158432, "step": 4899 }, { "epoch": 9.8, "grad_norm": 2.566983938217163, "learning_rate": 2e-05, "loss": 0.05563569, "step": 4900 }, { "epoch": 9.802, "grad_norm": 1.902557134628296, "learning_rate": 2e-05, "loss": 0.04794583, "step": 4901 }, { "epoch": 9.804, "grad_norm": 3.0628104209899902, "learning_rate": 2e-05, "loss": 0.06039095, "step": 4902 }, { "epoch": 9.806000000000001, "grad_norm": 1.1863480806350708, "learning_rate": 2e-05, "loss": 0.03568749, "step": 4903 }, { "epoch": 9.808, "grad_norm": 2.010585308074951, "learning_rate": 2e-05, "loss": 0.04390045, "step": 4904 }, { "epoch": 9.81, "grad_norm": 1.8428614139556885, "learning_rate": 2e-05, "loss": 0.06692166, "step": 4905 }, { "epoch": 9.812, "grad_norm": 1.32927668094635, "learning_rate": 2e-05, "loss": 0.04278745, "step": 4906 }, { "epoch": 9.814, "grad_norm": 1.290330410003662, "learning_rate": 2e-05, "loss": 0.0594513, "step": 4907 }, { "epoch": 9.816, "grad_norm": 0.9869194030761719, "learning_rate": 2e-05, "loss": 0.0311925, "step": 4908 }, { "epoch": 9.818, "grad_norm": 1.442022681236267, "learning_rate": 2e-05, "loss": 0.04770211, "step": 4909 }, { "epoch": 9.82, "grad_norm": 1.677892804145813, "learning_rate": 2e-05, "loss": 0.04223461, "step": 4910 }, { "epoch": 9.822, "grad_norm": 1.2613219022750854, "learning_rate": 2e-05, "loss": 0.04329883, "step": 4911 }, { "epoch": 9.824, "grad_norm": 2.5619208812713623, "learning_rate": 2e-05, "loss": 0.04533758, "step": 4912 }, { "epoch": 9.826, "grad_norm": 1.5010818243026733, "learning_rate": 2e-05, "loss": 0.0549286, "step": 4913 }, { "epoch": 9.828, "grad_norm": 1.5867867469787598, "learning_rate": 2e-05, "loss": 0.05643577, "step": 4914 }, { "epoch": 9.83, "grad_norm": 2.7611711025238037, "learning_rate": 2e-05, "loss": 0.04502559, "step": 4915 }, { "epoch": 9.832, "grad_norm": 1.104813575744629, "learning_rate": 2e-05, "loss": 0.03817623, "step": 4916 }, { "epoch": 9.834, "grad_norm": 1.927043080329895, "learning_rate": 2e-05, "loss": 0.05190308, "step": 4917 }, { "epoch": 9.836, "grad_norm": 1.5192525386810303, "learning_rate": 2e-05, "loss": 0.03937745, "step": 4918 }, { "epoch": 9.838, "grad_norm": 1.665751576423645, "learning_rate": 2e-05, "loss": 0.05758657, "step": 4919 }, { "epoch": 9.84, "grad_norm": 2.303105354309082, "learning_rate": 2e-05, "loss": 0.05672823, "step": 4920 }, { "epoch": 9.842, "grad_norm": 1.7795699834823608, "learning_rate": 2e-05, "loss": 0.05460572, "step": 4921 }, { "epoch": 9.844, "grad_norm": 1.8190152645111084, "learning_rate": 2e-05, "loss": 0.04848269, "step": 4922 }, { "epoch": 9.846, "grad_norm": 1.7064250707626343, "learning_rate": 2e-05, "loss": 0.05619481, "step": 4923 }, { "epoch": 9.848, "grad_norm": 1.413002610206604, "learning_rate": 2e-05, "loss": 0.04609952, "step": 4924 }, { "epoch": 9.85, "grad_norm": 1.523747444152832, "learning_rate": 2e-05, "loss": 0.06341836, "step": 4925 }, { "epoch": 9.852, "grad_norm": 1.346929669380188, "learning_rate": 2e-05, "loss": 0.03450187, "step": 4926 }, { "epoch": 9.854, "grad_norm": 1.8626230955123901, "learning_rate": 2e-05, "loss": 0.04854961, "step": 4927 }, { "epoch": 9.856, "grad_norm": 1.1077322959899902, "learning_rate": 2e-05, "loss": 0.03540801, "step": 4928 }, { "epoch": 9.858, "grad_norm": 1.6448397636413574, "learning_rate": 2e-05, "loss": 0.04669225, "step": 4929 }, { "epoch": 9.86, "grad_norm": 2.1358819007873535, "learning_rate": 2e-05, "loss": 0.05960096, "step": 4930 }, { "epoch": 9.862, "grad_norm": 4.25274658203125, "learning_rate": 2e-05, "loss": 0.08006147, "step": 4931 }, { "epoch": 9.864, "grad_norm": 1.9246102571487427, "learning_rate": 2e-05, "loss": 0.07823011, "step": 4932 }, { "epoch": 9.866, "grad_norm": 1.5427088737487793, "learning_rate": 2e-05, "loss": 0.04289813, "step": 4933 }, { "epoch": 9.868, "grad_norm": 1.953423261642456, "learning_rate": 2e-05, "loss": 0.05337017, "step": 4934 }, { "epoch": 9.87, "grad_norm": 1.5664440393447876, "learning_rate": 2e-05, "loss": 0.05721167, "step": 4935 }, { "epoch": 9.872, "grad_norm": 2.196546792984009, "learning_rate": 2e-05, "loss": 0.05689994, "step": 4936 }, { "epoch": 9.874, "grad_norm": 1.7827051877975464, "learning_rate": 2e-05, "loss": 0.05442892, "step": 4937 }, { "epoch": 9.876, "grad_norm": 1.2307844161987305, "learning_rate": 2e-05, "loss": 0.04137726, "step": 4938 }, { "epoch": 9.878, "grad_norm": 1.3965389728546143, "learning_rate": 2e-05, "loss": 0.04177804, "step": 4939 }, { "epoch": 9.88, "grad_norm": 1.301183819770813, "learning_rate": 2e-05, "loss": 0.04382346, "step": 4940 }, { "epoch": 9.882, "grad_norm": 2.091052293777466, "learning_rate": 2e-05, "loss": 0.05116628, "step": 4941 }, { "epoch": 9.884, "grad_norm": 1.581408143043518, "learning_rate": 2e-05, "loss": 0.05110285, "step": 4942 }, { "epoch": 9.886, "grad_norm": 2.7291102409362793, "learning_rate": 2e-05, "loss": 0.06126875, "step": 4943 }, { "epoch": 9.888, "grad_norm": 2.2793638706207275, "learning_rate": 2e-05, "loss": 0.05009011, "step": 4944 }, { "epoch": 9.89, "grad_norm": 3.306272506713867, "learning_rate": 2e-05, "loss": 0.06589854, "step": 4945 }, { "epoch": 9.892, "grad_norm": 1.4898868799209595, "learning_rate": 2e-05, "loss": 0.05594926, "step": 4946 }, { "epoch": 9.894, "grad_norm": 2.2353460788726807, "learning_rate": 2e-05, "loss": 0.06084714, "step": 4947 }, { "epoch": 9.896, "grad_norm": 2.330730676651001, "learning_rate": 2e-05, "loss": 0.05106577, "step": 4948 }, { "epoch": 9.898, "grad_norm": 1.3714518547058105, "learning_rate": 2e-05, "loss": 0.04674903, "step": 4949 }, { "epoch": 9.9, "grad_norm": 1.4012672901153564, "learning_rate": 2e-05, "loss": 0.05167598, "step": 4950 }, { "epoch": 9.902, "grad_norm": 1.0671659708023071, "learning_rate": 2e-05, "loss": 0.02830281, "step": 4951 }, { "epoch": 9.904, "grad_norm": 1.8455958366394043, "learning_rate": 2e-05, "loss": 0.05752491, "step": 4952 }, { "epoch": 9.906, "grad_norm": 1.4932456016540527, "learning_rate": 2e-05, "loss": 0.04646926, "step": 4953 }, { "epoch": 9.908, "grad_norm": 1.6366294622421265, "learning_rate": 2e-05, "loss": 0.0468614, "step": 4954 }, { "epoch": 9.91, "grad_norm": 2.687469720840454, "learning_rate": 2e-05, "loss": 0.0775534, "step": 4955 }, { "epoch": 9.912, "grad_norm": 3.0272841453552246, "learning_rate": 2e-05, "loss": 0.06905685, "step": 4956 }, { "epoch": 9.914, "grad_norm": 2.0984997749328613, "learning_rate": 2e-05, "loss": 0.07175948, "step": 4957 }, { "epoch": 9.916, "grad_norm": 2.3406221866607666, "learning_rate": 2e-05, "loss": 0.06852732, "step": 4958 }, { "epoch": 9.918, "grad_norm": 2.1772301197052, "learning_rate": 2e-05, "loss": 0.0689801, "step": 4959 }, { "epoch": 9.92, "grad_norm": 1.6257960796356201, "learning_rate": 2e-05, "loss": 0.05941542, "step": 4960 }, { "epoch": 9.922, "grad_norm": 2.138094663619995, "learning_rate": 2e-05, "loss": 0.04775422, "step": 4961 }, { "epoch": 9.924, "grad_norm": 2.711124897003174, "learning_rate": 2e-05, "loss": 0.06104578, "step": 4962 }, { "epoch": 9.926, "grad_norm": 2.045482873916626, "learning_rate": 2e-05, "loss": 0.06203432, "step": 4963 }, { "epoch": 9.928, "grad_norm": 1.3155605792999268, "learning_rate": 2e-05, "loss": 0.05150554, "step": 4964 }, { "epoch": 9.93, "grad_norm": 1.79209566116333, "learning_rate": 2e-05, "loss": 0.04305602, "step": 4965 }, { "epoch": 9.932, "grad_norm": 2.037360191345215, "learning_rate": 2e-05, "loss": 0.05929771, "step": 4966 }, { "epoch": 9.934, "grad_norm": 1.8728766441345215, "learning_rate": 2e-05, "loss": 0.05034747, "step": 4967 }, { "epoch": 9.936, "grad_norm": 1.58304762840271, "learning_rate": 2e-05, "loss": 0.04624342, "step": 4968 }, { "epoch": 9.938, "grad_norm": 1.070703387260437, "learning_rate": 2e-05, "loss": 0.04769868, "step": 4969 }, { "epoch": 9.94, "grad_norm": 1.5392509698867798, "learning_rate": 2e-05, "loss": 0.04278378, "step": 4970 }, { "epoch": 9.942, "grad_norm": 1.853967308998108, "learning_rate": 2e-05, "loss": 0.06206827, "step": 4971 }, { "epoch": 9.943999999999999, "grad_norm": 1.6598646640777588, "learning_rate": 2e-05, "loss": 0.05348482, "step": 4972 }, { "epoch": 9.946, "grad_norm": 1.3386852741241455, "learning_rate": 2e-05, "loss": 0.05379867, "step": 4973 }, { "epoch": 9.948, "grad_norm": 1.0884183645248413, "learning_rate": 2e-05, "loss": 0.04097927, "step": 4974 }, { "epoch": 9.95, "grad_norm": 2.1080918312072754, "learning_rate": 2e-05, "loss": 0.05956004, "step": 4975 }, { "epoch": 9.952, "grad_norm": 3.454415798187256, "learning_rate": 2e-05, "loss": 0.05866547, "step": 4976 }, { "epoch": 9.954, "grad_norm": 1.8443541526794434, "learning_rate": 2e-05, "loss": 0.05498111, "step": 4977 }, { "epoch": 9.956, "grad_norm": 1.2437260150909424, "learning_rate": 2e-05, "loss": 0.04896142, "step": 4978 }, { "epoch": 9.958, "grad_norm": 2.130713701248169, "learning_rate": 2e-05, "loss": 0.05966695, "step": 4979 }, { "epoch": 9.96, "grad_norm": 2.0753307342529297, "learning_rate": 2e-05, "loss": 0.0618369, "step": 4980 }, { "epoch": 9.962, "grad_norm": 2.517958879470825, "learning_rate": 2e-05, "loss": 0.06341064, "step": 4981 }, { "epoch": 9.964, "grad_norm": 1.4736145734786987, "learning_rate": 2e-05, "loss": 0.05388065, "step": 4982 }, { "epoch": 9.966, "grad_norm": 2.1390750408172607, "learning_rate": 2e-05, "loss": 0.0556998, "step": 4983 }, { "epoch": 9.968, "grad_norm": 1.417447566986084, "learning_rate": 2e-05, "loss": 0.04931127, "step": 4984 }, { "epoch": 9.97, "grad_norm": 1.8981767892837524, "learning_rate": 2e-05, "loss": 0.04368909, "step": 4985 }, { "epoch": 9.972, "grad_norm": 2.0923216342926025, "learning_rate": 2e-05, "loss": 0.0481521, "step": 4986 }, { "epoch": 9.974, "grad_norm": 2.3560774326324463, "learning_rate": 2e-05, "loss": 0.04617333, "step": 4987 }, { "epoch": 9.975999999999999, "grad_norm": 2.1225790977478027, "learning_rate": 2e-05, "loss": 0.06341647, "step": 4988 }, { "epoch": 9.978, "grad_norm": 1.4702603816986084, "learning_rate": 2e-05, "loss": 0.06608592, "step": 4989 }, { "epoch": 9.98, "grad_norm": 1.437772512435913, "learning_rate": 2e-05, "loss": 0.0440442, "step": 4990 }, { "epoch": 9.982, "grad_norm": 1.7781963348388672, "learning_rate": 2e-05, "loss": 0.04864323, "step": 4991 }, { "epoch": 9.984, "grad_norm": 2.0641047954559326, "learning_rate": 2e-05, "loss": 0.05038781, "step": 4992 }, { "epoch": 9.986, "grad_norm": 1.471038818359375, "learning_rate": 2e-05, "loss": 0.05805187, "step": 4993 }, { "epoch": 9.988, "grad_norm": 1.7074260711669922, "learning_rate": 2e-05, "loss": 0.04913766, "step": 4994 }, { "epoch": 9.99, "grad_norm": 1.5524414777755737, "learning_rate": 2e-05, "loss": 0.04417773, "step": 4995 }, { "epoch": 9.992, "grad_norm": 2.4501919746398926, "learning_rate": 2e-05, "loss": 0.05671235, "step": 4996 }, { "epoch": 9.994, "grad_norm": 1.801255226135254, "learning_rate": 2e-05, "loss": 0.06374493, "step": 4997 }, { "epoch": 9.996, "grad_norm": 1.989261507987976, "learning_rate": 2e-05, "loss": 0.04868555, "step": 4998 }, { "epoch": 9.998, "grad_norm": 1.389630675315857, "learning_rate": 2e-05, "loss": 0.0455512, "step": 4999 }, { "epoch": 10.0, "grad_norm": 1.467817783355713, "learning_rate": 2e-05, "loss": 0.06919758, "step": 5000 }, { "epoch": 10.0, "eval_performance": { "AngleClassification_1": 0.988, "AngleClassification_2": 0.994, "AngleClassification_3": 0.9560878243512974, "Equal_1": 0.998, "Equal_2": 0.9600798403193613, "Equal_3": 0.8542914171656687, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.988, "Perpendicular_1": 0.988, "Perpendicular_2": 0.916, "Perpendicular_3": 0.5741482965931863, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.996, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.9580838323353293 }, "eval_runtime": 320.6792, "eval_samples_per_second": 32.743, "eval_steps_per_second": 0.655, "step": 5000 }, { "epoch": 10.002, "grad_norm": 1.6074891090393066, "learning_rate": 2e-05, "loss": 0.05639043, "step": 5001 }, { "epoch": 10.004, "grad_norm": 1.6433279514312744, "learning_rate": 2e-05, "loss": 0.06391891, "step": 5002 }, { "epoch": 10.006, "grad_norm": 1.0623618364334106, "learning_rate": 2e-05, "loss": 0.0363953, "step": 5003 }, { "epoch": 10.008, "grad_norm": 2.0282094478607178, "learning_rate": 2e-05, "loss": 0.05439749, "step": 5004 }, { "epoch": 10.01, "grad_norm": 2.1043431758880615, "learning_rate": 2e-05, "loss": 0.05599514, "step": 5005 }, { "epoch": 10.012, "grad_norm": 1.4989700317382812, "learning_rate": 2e-05, "loss": 0.04970724, "step": 5006 }, { "epoch": 10.014, "grad_norm": 1.6386247873306274, "learning_rate": 2e-05, "loss": 0.04516533, "step": 5007 }, { "epoch": 10.016, "grad_norm": 1.3612985610961914, "learning_rate": 2e-05, "loss": 0.05006377, "step": 5008 }, { "epoch": 10.018, "grad_norm": 1.6199952363967896, "learning_rate": 2e-05, "loss": 0.0529905, "step": 5009 }, { "epoch": 10.02, "grad_norm": 1.690873384475708, "learning_rate": 2e-05, "loss": 0.06474019, "step": 5010 }, { "epoch": 10.022, "grad_norm": 1.0925859212875366, "learning_rate": 2e-05, "loss": 0.04544307, "step": 5011 }, { "epoch": 10.024, "grad_norm": 2.489445447921753, "learning_rate": 2e-05, "loss": 0.04316767, "step": 5012 }, { "epoch": 10.026, "grad_norm": 1.0583429336547852, "learning_rate": 2e-05, "loss": 0.03954223, "step": 5013 }, { "epoch": 10.028, "grad_norm": 2.259428024291992, "learning_rate": 2e-05, "loss": 0.05867039, "step": 5014 }, { "epoch": 10.03, "grad_norm": 1.1833878755569458, "learning_rate": 2e-05, "loss": 0.04461697, "step": 5015 }, { "epoch": 10.032, "grad_norm": 1.7030174732208252, "learning_rate": 2e-05, "loss": 0.06960414, "step": 5016 }, { "epoch": 10.034, "grad_norm": 2.5388693809509277, "learning_rate": 2e-05, "loss": 0.04828655, "step": 5017 }, { "epoch": 10.036, "grad_norm": 1.4048974514007568, "learning_rate": 2e-05, "loss": 0.03989777, "step": 5018 }, { "epoch": 10.038, "grad_norm": 1.6983534097671509, "learning_rate": 2e-05, "loss": 0.04251143, "step": 5019 }, { "epoch": 10.04, "grad_norm": 2.109666347503662, "learning_rate": 2e-05, "loss": 0.04322487, "step": 5020 }, { "epoch": 10.042, "grad_norm": 1.6029553413391113, "learning_rate": 2e-05, "loss": 0.06342378, "step": 5021 }, { "epoch": 10.044, "grad_norm": 1.3456430435180664, "learning_rate": 2e-05, "loss": 0.04814564, "step": 5022 }, { "epoch": 10.046, "grad_norm": 1.7737743854522705, "learning_rate": 2e-05, "loss": 0.0458786, "step": 5023 }, { "epoch": 10.048, "grad_norm": 1.2273495197296143, "learning_rate": 2e-05, "loss": 0.03703401, "step": 5024 }, { "epoch": 10.05, "grad_norm": 2.4264330863952637, "learning_rate": 2e-05, "loss": 0.07618839, "step": 5025 }, { "epoch": 10.052, "grad_norm": 1.851207971572876, "learning_rate": 2e-05, "loss": 0.07478073, "step": 5026 }, { "epoch": 10.054, "grad_norm": 1.3361573219299316, "learning_rate": 2e-05, "loss": 0.05228508, "step": 5027 }, { "epoch": 10.056, "grad_norm": 2.115432024002075, "learning_rate": 2e-05, "loss": 0.06497653, "step": 5028 }, { "epoch": 10.058, "grad_norm": 1.5752816200256348, "learning_rate": 2e-05, "loss": 0.04064302, "step": 5029 }, { "epoch": 10.06, "grad_norm": 1.224853754043579, "learning_rate": 2e-05, "loss": 0.04076491, "step": 5030 }, { "epoch": 10.062, "grad_norm": 1.0174431800842285, "learning_rate": 2e-05, "loss": 0.02776841, "step": 5031 }, { "epoch": 10.064, "grad_norm": 1.4870493412017822, "learning_rate": 2e-05, "loss": 0.05205495, "step": 5032 }, { "epoch": 10.066, "grad_norm": 1.233358383178711, "learning_rate": 2e-05, "loss": 0.04476997, "step": 5033 }, { "epoch": 10.068, "grad_norm": 1.4965168237686157, "learning_rate": 2e-05, "loss": 0.06173176, "step": 5034 }, { "epoch": 10.07, "grad_norm": 1.2926126718521118, "learning_rate": 2e-05, "loss": 0.05273168, "step": 5035 }, { "epoch": 10.072, "grad_norm": 1.9668138027191162, "learning_rate": 2e-05, "loss": 0.05729368, "step": 5036 }, { "epoch": 10.074, "grad_norm": 1.9118866920471191, "learning_rate": 2e-05, "loss": 0.07089907, "step": 5037 }, { "epoch": 10.076, "grad_norm": 1.567081093788147, "learning_rate": 2e-05, "loss": 0.04848254, "step": 5038 }, { "epoch": 10.078, "grad_norm": 1.3504266738891602, "learning_rate": 2e-05, "loss": 0.04428206, "step": 5039 }, { "epoch": 10.08, "grad_norm": 1.5226638317108154, "learning_rate": 2e-05, "loss": 0.05267453, "step": 5040 }, { "epoch": 10.082, "grad_norm": 2.2543962001800537, "learning_rate": 2e-05, "loss": 0.05228227, "step": 5041 }, { "epoch": 10.084, "grad_norm": 1.4806350469589233, "learning_rate": 2e-05, "loss": 0.04181156, "step": 5042 }, { "epoch": 10.086, "grad_norm": 1.2908692359924316, "learning_rate": 2e-05, "loss": 0.04454116, "step": 5043 }, { "epoch": 10.088, "grad_norm": 2.006575584411621, "learning_rate": 2e-05, "loss": 0.05524797, "step": 5044 }, { "epoch": 10.09, "grad_norm": 1.949083685874939, "learning_rate": 2e-05, "loss": 0.04127013, "step": 5045 }, { "epoch": 10.092, "grad_norm": 1.6486314535140991, "learning_rate": 2e-05, "loss": 0.06116631, "step": 5046 }, { "epoch": 10.094, "grad_norm": 1.8648558855056763, "learning_rate": 2e-05, "loss": 0.04682179, "step": 5047 }, { "epoch": 10.096, "grad_norm": 2.304664134979248, "learning_rate": 2e-05, "loss": 0.05680639, "step": 5048 }, { "epoch": 10.098, "grad_norm": 1.4848788976669312, "learning_rate": 2e-05, "loss": 0.03981915, "step": 5049 }, { "epoch": 10.1, "grad_norm": 1.9800957441329956, "learning_rate": 2e-05, "loss": 0.03908926, "step": 5050 }, { "epoch": 10.102, "grad_norm": 1.5394706726074219, "learning_rate": 2e-05, "loss": 0.05664773, "step": 5051 }, { "epoch": 10.104, "grad_norm": 2.171658992767334, "learning_rate": 2e-05, "loss": 0.05971353, "step": 5052 }, { "epoch": 10.106, "grad_norm": 2.1404213905334473, "learning_rate": 2e-05, "loss": 0.05590626, "step": 5053 }, { "epoch": 10.108, "grad_norm": 3.454113483428955, "learning_rate": 2e-05, "loss": 0.0404954, "step": 5054 }, { "epoch": 10.11, "grad_norm": 4.064882278442383, "learning_rate": 2e-05, "loss": 0.0535365, "step": 5055 }, { "epoch": 10.112, "grad_norm": 1.671109914779663, "learning_rate": 2e-05, "loss": 0.05773462, "step": 5056 }, { "epoch": 10.114, "grad_norm": 2.3881421089172363, "learning_rate": 2e-05, "loss": 0.05546438, "step": 5057 }, { "epoch": 10.116, "grad_norm": 2.157351493835449, "learning_rate": 2e-05, "loss": 0.05277005, "step": 5058 }, { "epoch": 10.118, "grad_norm": 2.0532100200653076, "learning_rate": 2e-05, "loss": 0.05053755, "step": 5059 }, { "epoch": 10.12, "grad_norm": 1.3669548034667969, "learning_rate": 2e-05, "loss": 0.05167579, "step": 5060 }, { "epoch": 10.122, "grad_norm": 1.5119102001190186, "learning_rate": 2e-05, "loss": 0.05561122, "step": 5061 }, { "epoch": 10.124, "grad_norm": 1.244271159172058, "learning_rate": 2e-05, "loss": 0.03419596, "step": 5062 }, { "epoch": 10.126, "grad_norm": 1.6181620359420776, "learning_rate": 2e-05, "loss": 0.06645474, "step": 5063 }, { "epoch": 10.128, "grad_norm": 2.0490105152130127, "learning_rate": 2e-05, "loss": 0.05366251, "step": 5064 }, { "epoch": 10.13, "grad_norm": 1.2846488952636719, "learning_rate": 2e-05, "loss": 0.04805609, "step": 5065 }, { "epoch": 10.132, "grad_norm": 1.5411098003387451, "learning_rate": 2e-05, "loss": 0.04068913, "step": 5066 }, { "epoch": 10.134, "grad_norm": 1.4525972604751587, "learning_rate": 2e-05, "loss": 0.04148395, "step": 5067 }, { "epoch": 10.136, "grad_norm": 1.9913413524627686, "learning_rate": 2e-05, "loss": 0.05251299, "step": 5068 }, { "epoch": 10.138, "grad_norm": 1.329590082168579, "learning_rate": 2e-05, "loss": 0.03482792, "step": 5069 }, { "epoch": 10.14, "grad_norm": 1.917649507522583, "learning_rate": 2e-05, "loss": 0.0389032, "step": 5070 }, { "epoch": 10.142, "grad_norm": 1.8143230676651, "learning_rate": 2e-05, "loss": 0.06296816, "step": 5071 }, { "epoch": 10.144, "grad_norm": 1.9044404029846191, "learning_rate": 2e-05, "loss": 0.07797446, "step": 5072 }, { "epoch": 10.146, "grad_norm": 1.3988782167434692, "learning_rate": 2e-05, "loss": 0.03789392, "step": 5073 }, { "epoch": 10.148, "grad_norm": 2.06538987159729, "learning_rate": 2e-05, "loss": 0.05376375, "step": 5074 }, { "epoch": 10.15, "grad_norm": 1.908994436264038, "learning_rate": 2e-05, "loss": 0.05527605, "step": 5075 }, { "epoch": 10.152, "grad_norm": 1.5503228902816772, "learning_rate": 2e-05, "loss": 0.04854936, "step": 5076 }, { "epoch": 10.154, "grad_norm": 1.1571024656295776, "learning_rate": 2e-05, "loss": 0.03305773, "step": 5077 }, { "epoch": 10.156, "grad_norm": 2.0785417556762695, "learning_rate": 2e-05, "loss": 0.06657989, "step": 5078 }, { "epoch": 10.158, "grad_norm": 2.85941743850708, "learning_rate": 2e-05, "loss": 0.05014766, "step": 5079 }, { "epoch": 10.16, "grad_norm": 2.1629788875579834, "learning_rate": 2e-05, "loss": 0.05455408, "step": 5080 }, { "epoch": 10.162, "grad_norm": 3.536186933517456, "learning_rate": 2e-05, "loss": 0.04600221, "step": 5081 }, { "epoch": 10.164, "grad_norm": 1.7894084453582764, "learning_rate": 2e-05, "loss": 0.03903076, "step": 5082 }, { "epoch": 10.166, "grad_norm": 1.6507161855697632, "learning_rate": 2e-05, "loss": 0.03541517, "step": 5083 }, { "epoch": 10.168, "grad_norm": 1.4610087871551514, "learning_rate": 2e-05, "loss": 0.05326772, "step": 5084 }, { "epoch": 10.17, "grad_norm": 1.1885812282562256, "learning_rate": 2e-05, "loss": 0.04884129, "step": 5085 }, { "epoch": 10.172, "grad_norm": 1.5193572044372559, "learning_rate": 2e-05, "loss": 0.05214652, "step": 5086 }, { "epoch": 10.174, "grad_norm": 2.79036808013916, "learning_rate": 2e-05, "loss": 0.07177282, "step": 5087 }, { "epoch": 10.176, "grad_norm": 2.4795050621032715, "learning_rate": 2e-05, "loss": 0.05598135, "step": 5088 }, { "epoch": 10.178, "grad_norm": 1.8323618173599243, "learning_rate": 2e-05, "loss": 0.04974123, "step": 5089 }, { "epoch": 10.18, "grad_norm": 1.8436342477798462, "learning_rate": 2e-05, "loss": 0.05738843, "step": 5090 }, { "epoch": 10.182, "grad_norm": 1.8771424293518066, "learning_rate": 2e-05, "loss": 0.06057268, "step": 5091 }, { "epoch": 10.184, "grad_norm": 1.8508983850479126, "learning_rate": 2e-05, "loss": 0.05036641, "step": 5092 }, { "epoch": 10.186, "grad_norm": 1.3412444591522217, "learning_rate": 2e-05, "loss": 0.03264994, "step": 5093 }, { "epoch": 10.188, "grad_norm": 1.8948533535003662, "learning_rate": 2e-05, "loss": 0.04661043, "step": 5094 }, { "epoch": 10.19, "grad_norm": 1.1658676862716675, "learning_rate": 2e-05, "loss": 0.0359153, "step": 5095 }, { "epoch": 10.192, "grad_norm": 2.3413681983947754, "learning_rate": 2e-05, "loss": 0.06752145, "step": 5096 }, { "epoch": 10.194, "grad_norm": 2.0791563987731934, "learning_rate": 2e-05, "loss": 0.03742487, "step": 5097 }, { "epoch": 10.196, "grad_norm": 2.3035924434661865, "learning_rate": 2e-05, "loss": 0.05462205, "step": 5098 }, { "epoch": 10.198, "grad_norm": 1.5185017585754395, "learning_rate": 2e-05, "loss": 0.0364113, "step": 5099 }, { "epoch": 10.2, "grad_norm": 2.8155453205108643, "learning_rate": 2e-05, "loss": 0.06460752, "step": 5100 }, { "epoch": 10.202, "grad_norm": 2.565056800842285, "learning_rate": 2e-05, "loss": 0.04938511, "step": 5101 }, { "epoch": 10.204, "grad_norm": 2.4186198711395264, "learning_rate": 2e-05, "loss": 0.05762579, "step": 5102 }, { "epoch": 10.206, "grad_norm": 1.7323216199874878, "learning_rate": 2e-05, "loss": 0.04478491, "step": 5103 }, { "epoch": 10.208, "grad_norm": 2.4618570804595947, "learning_rate": 2e-05, "loss": 0.0601164, "step": 5104 }, { "epoch": 10.21, "grad_norm": 1.4251370429992676, "learning_rate": 2e-05, "loss": 0.05556741, "step": 5105 }, { "epoch": 10.212, "grad_norm": 2.33821177482605, "learning_rate": 2e-05, "loss": 0.07309254, "step": 5106 }, { "epoch": 10.214, "grad_norm": 1.3154981136322021, "learning_rate": 2e-05, "loss": 0.04135425, "step": 5107 }, { "epoch": 10.216, "grad_norm": 2.5417816638946533, "learning_rate": 2e-05, "loss": 0.06982885, "step": 5108 }, { "epoch": 10.218, "grad_norm": 2.1002843379974365, "learning_rate": 2e-05, "loss": 0.05075446, "step": 5109 }, { "epoch": 10.22, "grad_norm": 1.574785828590393, "learning_rate": 2e-05, "loss": 0.04077484, "step": 5110 }, { "epoch": 10.222, "grad_norm": 1.7429002523422241, "learning_rate": 2e-05, "loss": 0.04333501, "step": 5111 }, { "epoch": 10.224, "grad_norm": 1.4146769046783447, "learning_rate": 2e-05, "loss": 0.04594901, "step": 5112 }, { "epoch": 10.226, "grad_norm": 2.0971226692199707, "learning_rate": 2e-05, "loss": 0.06643675, "step": 5113 }, { "epoch": 10.228, "grad_norm": 1.6116442680358887, "learning_rate": 2e-05, "loss": 0.04431507, "step": 5114 }, { "epoch": 10.23, "grad_norm": 3.321749210357666, "learning_rate": 2e-05, "loss": 0.06666962, "step": 5115 }, { "epoch": 10.232, "grad_norm": 1.248750925064087, "learning_rate": 2e-05, "loss": 0.05188263, "step": 5116 }, { "epoch": 10.234, "grad_norm": 1.4745240211486816, "learning_rate": 2e-05, "loss": 0.0565898, "step": 5117 }, { "epoch": 10.236, "grad_norm": 1.7704201936721802, "learning_rate": 2e-05, "loss": 0.04397082, "step": 5118 }, { "epoch": 10.238, "grad_norm": 1.7633501291275024, "learning_rate": 2e-05, "loss": 0.0520197, "step": 5119 }, { "epoch": 10.24, "grad_norm": 1.8023216724395752, "learning_rate": 2e-05, "loss": 0.06178421, "step": 5120 }, { "epoch": 10.242, "grad_norm": 1.9784626960754395, "learning_rate": 2e-05, "loss": 0.05770921, "step": 5121 }, { "epoch": 10.244, "grad_norm": 1.6166801452636719, "learning_rate": 2e-05, "loss": 0.05028439, "step": 5122 }, { "epoch": 10.246, "grad_norm": 1.2488207817077637, "learning_rate": 2e-05, "loss": 0.05257055, "step": 5123 }, { "epoch": 10.248, "grad_norm": 1.3418329954147339, "learning_rate": 2e-05, "loss": 0.03734386, "step": 5124 }, { "epoch": 10.25, "grad_norm": 1.5167311429977417, "learning_rate": 2e-05, "loss": 0.0557439, "step": 5125 }, { "epoch": 10.252, "grad_norm": 1.087599515914917, "learning_rate": 2e-05, "loss": 0.02959022, "step": 5126 }, { "epoch": 10.254, "grad_norm": 1.6808693408966064, "learning_rate": 2e-05, "loss": 0.04390074, "step": 5127 }, { "epoch": 10.256, "grad_norm": 1.909277319908142, "learning_rate": 2e-05, "loss": 0.05320806, "step": 5128 }, { "epoch": 10.258, "grad_norm": 2.03977370262146, "learning_rate": 2e-05, "loss": 0.05649532, "step": 5129 }, { "epoch": 10.26, "grad_norm": 1.8067188262939453, "learning_rate": 2e-05, "loss": 0.06265157, "step": 5130 }, { "epoch": 10.262, "grad_norm": 2.781466007232666, "learning_rate": 2e-05, "loss": 0.06059079, "step": 5131 }, { "epoch": 10.264, "grad_norm": 2.2374839782714844, "learning_rate": 2e-05, "loss": 0.04488031, "step": 5132 }, { "epoch": 10.266, "grad_norm": 1.5937252044677734, "learning_rate": 2e-05, "loss": 0.04937337, "step": 5133 }, { "epoch": 10.268, "grad_norm": 1.5034781694412231, "learning_rate": 2e-05, "loss": 0.04895452, "step": 5134 }, { "epoch": 10.27, "grad_norm": 2.400557279586792, "learning_rate": 2e-05, "loss": 0.06906497, "step": 5135 }, { "epoch": 10.272, "grad_norm": 1.635102391242981, "learning_rate": 2e-05, "loss": 0.03693788, "step": 5136 }, { "epoch": 10.274000000000001, "grad_norm": 1.3570059537887573, "learning_rate": 2e-05, "loss": 0.05358264, "step": 5137 }, { "epoch": 10.276, "grad_norm": 1.6576945781707764, "learning_rate": 2e-05, "loss": 0.05347702, "step": 5138 }, { "epoch": 10.278, "grad_norm": 1.7237348556518555, "learning_rate": 2e-05, "loss": 0.04945024, "step": 5139 }, { "epoch": 10.28, "grad_norm": 1.9865562915802002, "learning_rate": 2e-05, "loss": 0.05404085, "step": 5140 }, { "epoch": 10.282, "grad_norm": 2.0699520111083984, "learning_rate": 2e-05, "loss": 0.04398727, "step": 5141 }, { "epoch": 10.284, "grad_norm": 1.870590329170227, "learning_rate": 2e-05, "loss": 0.05769954, "step": 5142 }, { "epoch": 10.286, "grad_norm": 1.6009098291397095, "learning_rate": 2e-05, "loss": 0.05270899, "step": 5143 }, { "epoch": 10.288, "grad_norm": 1.0542221069335938, "learning_rate": 2e-05, "loss": 0.03725312, "step": 5144 }, { "epoch": 10.29, "grad_norm": 1.4165904521942139, "learning_rate": 2e-05, "loss": 0.04188281, "step": 5145 }, { "epoch": 10.292, "grad_norm": 3.5521252155303955, "learning_rate": 2e-05, "loss": 0.06759865, "step": 5146 }, { "epoch": 10.294, "grad_norm": 1.433131217956543, "learning_rate": 2e-05, "loss": 0.05106829, "step": 5147 }, { "epoch": 10.296, "grad_norm": 1.4577850103378296, "learning_rate": 2e-05, "loss": 0.05083516, "step": 5148 }, { "epoch": 10.298, "grad_norm": 1.3248356580734253, "learning_rate": 2e-05, "loss": 0.04911773, "step": 5149 }, { "epoch": 10.3, "grad_norm": 1.37599778175354, "learning_rate": 2e-05, "loss": 0.04473313, "step": 5150 }, { "epoch": 10.302, "grad_norm": 1.5513116121292114, "learning_rate": 2e-05, "loss": 0.04096273, "step": 5151 }, { "epoch": 10.304, "grad_norm": 1.411054015159607, "learning_rate": 2e-05, "loss": 0.04515607, "step": 5152 }, { "epoch": 10.306, "grad_norm": 2.094132661819458, "learning_rate": 2e-05, "loss": 0.04543452, "step": 5153 }, { "epoch": 10.308, "grad_norm": 1.480191707611084, "learning_rate": 2e-05, "loss": 0.04611487, "step": 5154 }, { "epoch": 10.31, "grad_norm": 1.5969020128250122, "learning_rate": 2e-05, "loss": 0.05584262, "step": 5155 }, { "epoch": 10.312, "grad_norm": 1.505147099494934, "learning_rate": 2e-05, "loss": 0.05068387, "step": 5156 }, { "epoch": 10.314, "grad_norm": 1.7500067949295044, "learning_rate": 2e-05, "loss": 0.04701269, "step": 5157 }, { "epoch": 10.316, "grad_norm": 1.2172950506210327, "learning_rate": 2e-05, "loss": 0.05006229, "step": 5158 }, { "epoch": 10.318, "grad_norm": 1.3415688276290894, "learning_rate": 2e-05, "loss": 0.04002773, "step": 5159 }, { "epoch": 10.32, "grad_norm": 1.4847948551177979, "learning_rate": 2e-05, "loss": 0.05143999, "step": 5160 }, { "epoch": 10.322, "grad_norm": 1.2036330699920654, "learning_rate": 2e-05, "loss": 0.05749707, "step": 5161 }, { "epoch": 10.324, "grad_norm": 1.8553268909454346, "learning_rate": 2e-05, "loss": 0.06461729, "step": 5162 }, { "epoch": 10.326, "grad_norm": 1.2511320114135742, "learning_rate": 2e-05, "loss": 0.04641117, "step": 5163 }, { "epoch": 10.328, "grad_norm": 3.30887770652771, "learning_rate": 2e-05, "loss": 0.0428102, "step": 5164 }, { "epoch": 10.33, "grad_norm": 3.343966007232666, "learning_rate": 2e-05, "loss": 0.06479934, "step": 5165 }, { "epoch": 10.332, "grad_norm": 1.185491919517517, "learning_rate": 2e-05, "loss": 0.0378601, "step": 5166 }, { "epoch": 10.334, "grad_norm": 1.7531931400299072, "learning_rate": 2e-05, "loss": 0.04356651, "step": 5167 }, { "epoch": 10.336, "grad_norm": 1.5299067497253418, "learning_rate": 2e-05, "loss": 0.0489331, "step": 5168 }, { "epoch": 10.338, "grad_norm": 1.7191680669784546, "learning_rate": 2e-05, "loss": 0.05703234, "step": 5169 }, { "epoch": 10.34, "grad_norm": 2.2998485565185547, "learning_rate": 2e-05, "loss": 0.05554244, "step": 5170 }, { "epoch": 10.342, "grad_norm": 1.4917652606964111, "learning_rate": 2e-05, "loss": 0.0459786, "step": 5171 }, { "epoch": 10.344, "grad_norm": 2.3985610008239746, "learning_rate": 2e-05, "loss": 0.07849705, "step": 5172 }, { "epoch": 10.346, "grad_norm": 1.3253332376480103, "learning_rate": 2e-05, "loss": 0.03654364, "step": 5173 }, { "epoch": 10.348, "grad_norm": 1.9052636623382568, "learning_rate": 2e-05, "loss": 0.06166274, "step": 5174 }, { "epoch": 10.35, "grad_norm": 1.6939311027526855, "learning_rate": 2e-05, "loss": 0.04354223, "step": 5175 }, { "epoch": 10.352, "grad_norm": 2.325695753097534, "learning_rate": 2e-05, "loss": 0.06419559, "step": 5176 }, { "epoch": 10.354, "grad_norm": 2.277292013168335, "learning_rate": 2e-05, "loss": 0.05193496, "step": 5177 }, { "epoch": 10.356, "grad_norm": 1.4175512790679932, "learning_rate": 2e-05, "loss": 0.04061021, "step": 5178 }, { "epoch": 10.358, "grad_norm": 2.847827434539795, "learning_rate": 2e-05, "loss": 0.05605824, "step": 5179 }, { "epoch": 10.36, "grad_norm": 1.4422760009765625, "learning_rate": 2e-05, "loss": 0.05320713, "step": 5180 }, { "epoch": 10.362, "grad_norm": 1.720396637916565, "learning_rate": 2e-05, "loss": 0.05881133, "step": 5181 }, { "epoch": 10.364, "grad_norm": 1.4462811946868896, "learning_rate": 2e-05, "loss": 0.05044965, "step": 5182 }, { "epoch": 10.366, "grad_norm": 1.1778342723846436, "learning_rate": 2e-05, "loss": 0.03916883, "step": 5183 }, { "epoch": 10.368, "grad_norm": 0.9606398344039917, "learning_rate": 2e-05, "loss": 0.0260303, "step": 5184 }, { "epoch": 10.37, "grad_norm": 3.025618553161621, "learning_rate": 2e-05, "loss": 0.07479943, "step": 5185 }, { "epoch": 10.372, "grad_norm": 1.7541829347610474, "learning_rate": 2e-05, "loss": 0.05436843, "step": 5186 }, { "epoch": 10.374, "grad_norm": 1.5163887739181519, "learning_rate": 2e-05, "loss": 0.06724942, "step": 5187 }, { "epoch": 10.376, "grad_norm": 1.2254157066345215, "learning_rate": 2e-05, "loss": 0.03878136, "step": 5188 }, { "epoch": 10.378, "grad_norm": 1.7440659999847412, "learning_rate": 2e-05, "loss": 0.04667617, "step": 5189 }, { "epoch": 10.38, "grad_norm": 1.921148657798767, "learning_rate": 2e-05, "loss": 0.0511181, "step": 5190 }, { "epoch": 10.382, "grad_norm": 2.249518871307373, "learning_rate": 2e-05, "loss": 0.05295843, "step": 5191 }, { "epoch": 10.384, "grad_norm": 1.9301743507385254, "learning_rate": 2e-05, "loss": 0.05415469, "step": 5192 }, { "epoch": 10.386, "grad_norm": 2.933361291885376, "learning_rate": 2e-05, "loss": 0.05823458, "step": 5193 }, { "epoch": 10.388, "grad_norm": 1.8775449991226196, "learning_rate": 2e-05, "loss": 0.03668153, "step": 5194 }, { "epoch": 10.39, "grad_norm": 1.2913427352905273, "learning_rate": 2e-05, "loss": 0.03676874, "step": 5195 }, { "epoch": 10.392, "grad_norm": 3.2641937732696533, "learning_rate": 2e-05, "loss": 0.03394835, "step": 5196 }, { "epoch": 10.394, "grad_norm": 1.5311733484268188, "learning_rate": 2e-05, "loss": 0.05097717, "step": 5197 }, { "epoch": 10.396, "grad_norm": 1.488006830215454, "learning_rate": 2e-05, "loss": 0.06110777, "step": 5198 }, { "epoch": 10.398, "grad_norm": 1.3041068315505981, "learning_rate": 2e-05, "loss": 0.04042555, "step": 5199 }, { "epoch": 10.4, "grad_norm": 1.8183988332748413, "learning_rate": 2e-05, "loss": 0.04754098, "step": 5200 }, { "epoch": 10.402, "grad_norm": 2.3569672107696533, "learning_rate": 2e-05, "loss": 0.04439374, "step": 5201 }, { "epoch": 10.404, "grad_norm": 1.4490054845809937, "learning_rate": 2e-05, "loss": 0.0474748, "step": 5202 }, { "epoch": 10.406, "grad_norm": 1.513960838317871, "learning_rate": 2e-05, "loss": 0.05048071, "step": 5203 }, { "epoch": 10.408, "grad_norm": 1.6078133583068848, "learning_rate": 2e-05, "loss": 0.06171016, "step": 5204 }, { "epoch": 10.41, "grad_norm": 2.1041479110717773, "learning_rate": 2e-05, "loss": 0.07458056, "step": 5205 }, { "epoch": 10.412, "grad_norm": 1.2768281698226929, "learning_rate": 2e-05, "loss": 0.05074968, "step": 5206 }, { "epoch": 10.414, "grad_norm": 1.2827049493789673, "learning_rate": 2e-05, "loss": 0.03884158, "step": 5207 }, { "epoch": 10.416, "grad_norm": 1.3199437856674194, "learning_rate": 2e-05, "loss": 0.04091244, "step": 5208 }, { "epoch": 10.418, "grad_norm": 1.4176160097122192, "learning_rate": 2e-05, "loss": 0.05431768, "step": 5209 }, { "epoch": 10.42, "grad_norm": 1.8141944408416748, "learning_rate": 2e-05, "loss": 0.04813956, "step": 5210 }, { "epoch": 10.422, "grad_norm": 1.3609751462936401, "learning_rate": 2e-05, "loss": 0.05530354, "step": 5211 }, { "epoch": 10.424, "grad_norm": 1.780599594116211, "learning_rate": 2e-05, "loss": 0.0568007, "step": 5212 }, { "epoch": 10.426, "grad_norm": 1.6819827556610107, "learning_rate": 2e-05, "loss": 0.03991935, "step": 5213 }, { "epoch": 10.428, "grad_norm": 2.557753801345825, "learning_rate": 2e-05, "loss": 0.0696613, "step": 5214 }, { "epoch": 10.43, "grad_norm": 1.8446578979492188, "learning_rate": 2e-05, "loss": 0.04753678, "step": 5215 }, { "epoch": 10.432, "grad_norm": 0.939564049243927, "learning_rate": 2e-05, "loss": 0.02770604, "step": 5216 }, { "epoch": 10.434, "grad_norm": 1.930539846420288, "learning_rate": 2e-05, "loss": 0.05070189, "step": 5217 }, { "epoch": 10.436, "grad_norm": 1.480115532875061, "learning_rate": 2e-05, "loss": 0.03808409, "step": 5218 }, { "epoch": 10.438, "grad_norm": 1.2075250148773193, "learning_rate": 2e-05, "loss": 0.03466361, "step": 5219 }, { "epoch": 10.44, "grad_norm": 2.400930166244507, "learning_rate": 2e-05, "loss": 0.05821132, "step": 5220 }, { "epoch": 10.442, "grad_norm": 1.5590664148330688, "learning_rate": 2e-05, "loss": 0.04925883, "step": 5221 }, { "epoch": 10.444, "grad_norm": 1.9377199411392212, "learning_rate": 2e-05, "loss": 0.05342076, "step": 5222 }, { "epoch": 10.446, "grad_norm": 1.9182746410369873, "learning_rate": 2e-05, "loss": 0.06361227, "step": 5223 }, { "epoch": 10.448, "grad_norm": 1.3133718967437744, "learning_rate": 2e-05, "loss": 0.04293478, "step": 5224 }, { "epoch": 10.45, "grad_norm": 1.6044734716415405, "learning_rate": 2e-05, "loss": 0.06643161, "step": 5225 }, { "epoch": 10.452, "grad_norm": 1.3510301113128662, "learning_rate": 2e-05, "loss": 0.0424097, "step": 5226 }, { "epoch": 10.454, "grad_norm": 2.926586389541626, "learning_rate": 2e-05, "loss": 0.05616639, "step": 5227 }, { "epoch": 10.456, "grad_norm": 1.7717212438583374, "learning_rate": 2e-05, "loss": 0.04580856, "step": 5228 }, { "epoch": 10.458, "grad_norm": 1.2072011232376099, "learning_rate": 2e-05, "loss": 0.05067045, "step": 5229 }, { "epoch": 10.46, "grad_norm": 2.7207653522491455, "learning_rate": 2e-05, "loss": 0.05808522, "step": 5230 }, { "epoch": 10.462, "grad_norm": 1.8373502492904663, "learning_rate": 2e-05, "loss": 0.0528776, "step": 5231 }, { "epoch": 10.464, "grad_norm": 1.2759414911270142, "learning_rate": 2e-05, "loss": 0.04170848, "step": 5232 }, { "epoch": 10.466, "grad_norm": 1.9643059968948364, "learning_rate": 2e-05, "loss": 0.04919844, "step": 5233 }, { "epoch": 10.468, "grad_norm": 3.1205170154571533, "learning_rate": 2e-05, "loss": 0.06371839, "step": 5234 }, { "epoch": 10.47, "grad_norm": 13.818527221679688, "learning_rate": 2e-05, "loss": 0.0568444, "step": 5235 }, { "epoch": 10.472, "grad_norm": 5.399960517883301, "learning_rate": 2e-05, "loss": 0.04257676, "step": 5236 }, { "epoch": 10.474, "grad_norm": 1.6575089693069458, "learning_rate": 2e-05, "loss": 0.05032737, "step": 5237 }, { "epoch": 10.475999999999999, "grad_norm": 1.2673888206481934, "learning_rate": 2e-05, "loss": 0.04592997, "step": 5238 }, { "epoch": 10.478, "grad_norm": 1.8720520734786987, "learning_rate": 2e-05, "loss": 0.05344952, "step": 5239 }, { "epoch": 10.48, "grad_norm": 5.716723918914795, "learning_rate": 2e-05, "loss": 0.05336998, "step": 5240 }, { "epoch": 10.482, "grad_norm": 1.8104397058486938, "learning_rate": 2e-05, "loss": 0.05812651, "step": 5241 }, { "epoch": 10.484, "grad_norm": 1.6072144508361816, "learning_rate": 2e-05, "loss": 0.05022549, "step": 5242 }, { "epoch": 10.486, "grad_norm": 1.7834798097610474, "learning_rate": 2e-05, "loss": 0.0548949, "step": 5243 }, { "epoch": 10.488, "grad_norm": 1.4747883081436157, "learning_rate": 2e-05, "loss": 0.07916712, "step": 5244 }, { "epoch": 10.49, "grad_norm": 1.4801729917526245, "learning_rate": 2e-05, "loss": 0.0434589, "step": 5245 }, { "epoch": 10.492, "grad_norm": 1.3738034963607788, "learning_rate": 2e-05, "loss": 0.04868829, "step": 5246 }, { "epoch": 10.494, "grad_norm": 1.1676543951034546, "learning_rate": 2e-05, "loss": 0.03874259, "step": 5247 }, { "epoch": 10.496, "grad_norm": 1.4973816871643066, "learning_rate": 2e-05, "loss": 0.04599489, "step": 5248 }, { "epoch": 10.498, "grad_norm": 1.5680755376815796, "learning_rate": 2e-05, "loss": 0.04904544, "step": 5249 }, { "epoch": 10.5, "grad_norm": 1.9003678560256958, "learning_rate": 2e-05, "loss": 0.07093397, "step": 5250 }, { "epoch": 10.502, "grad_norm": 1.6360976696014404, "learning_rate": 2e-05, "loss": 0.04844421, "step": 5251 }, { "epoch": 10.504, "grad_norm": 1.8371424674987793, "learning_rate": 2e-05, "loss": 0.0682577, "step": 5252 }, { "epoch": 10.506, "grad_norm": 2.010197401046753, "learning_rate": 2e-05, "loss": 0.04919277, "step": 5253 }, { "epoch": 10.508, "grad_norm": 1.8417555093765259, "learning_rate": 2e-05, "loss": 0.04783285, "step": 5254 }, { "epoch": 10.51, "grad_norm": 1.6858887672424316, "learning_rate": 2e-05, "loss": 0.05456799, "step": 5255 }, { "epoch": 10.512, "grad_norm": 3.253695011138916, "learning_rate": 2e-05, "loss": 0.03823632, "step": 5256 }, { "epoch": 10.514, "grad_norm": 1.8431847095489502, "learning_rate": 2e-05, "loss": 0.06339058, "step": 5257 }, { "epoch": 10.516, "grad_norm": 1.3688884973526, "learning_rate": 2e-05, "loss": 0.06101316, "step": 5258 }, { "epoch": 10.518, "grad_norm": 0.9589589834213257, "learning_rate": 2e-05, "loss": 0.03332622, "step": 5259 }, { "epoch": 10.52, "grad_norm": 1.5747522115707397, "learning_rate": 2e-05, "loss": 0.05914874, "step": 5260 }, { "epoch": 10.522, "grad_norm": 1.884902834892273, "learning_rate": 2e-05, "loss": 0.07614301, "step": 5261 }, { "epoch": 10.524000000000001, "grad_norm": 1.4136667251586914, "learning_rate": 2e-05, "loss": 0.05749642, "step": 5262 }, { "epoch": 10.526, "grad_norm": 1.4973257780075073, "learning_rate": 2e-05, "loss": 0.05710443, "step": 5263 }, { "epoch": 10.528, "grad_norm": 2.1103341579437256, "learning_rate": 2e-05, "loss": 0.06917971, "step": 5264 }, { "epoch": 10.53, "grad_norm": 1.6362884044647217, "learning_rate": 2e-05, "loss": 0.04992104, "step": 5265 }, { "epoch": 10.532, "grad_norm": 2.8185596466064453, "learning_rate": 2e-05, "loss": 0.05004818, "step": 5266 }, { "epoch": 10.534, "grad_norm": 1.6403809785842896, "learning_rate": 2e-05, "loss": 0.05555485, "step": 5267 }, { "epoch": 10.536, "grad_norm": 2.137666702270508, "learning_rate": 2e-05, "loss": 0.05246295, "step": 5268 }, { "epoch": 10.538, "grad_norm": 1.6086887121200562, "learning_rate": 2e-05, "loss": 0.04962714, "step": 5269 }, { "epoch": 10.54, "grad_norm": 1.4475809335708618, "learning_rate": 2e-05, "loss": 0.0453802, "step": 5270 }, { "epoch": 10.542, "grad_norm": 1.6052064895629883, "learning_rate": 2e-05, "loss": 0.04843373, "step": 5271 }, { "epoch": 10.544, "grad_norm": 1.8348846435546875, "learning_rate": 2e-05, "loss": 0.05354032, "step": 5272 }, { "epoch": 10.546, "grad_norm": 1.5598926544189453, "learning_rate": 2e-05, "loss": 0.04522219, "step": 5273 }, { "epoch": 10.548, "grad_norm": 1.7176995277404785, "learning_rate": 2e-05, "loss": 0.06097797, "step": 5274 }, { "epoch": 10.55, "grad_norm": 1.6066391468048096, "learning_rate": 2e-05, "loss": 0.04459933, "step": 5275 }, { "epoch": 10.552, "grad_norm": 1.2835681438446045, "learning_rate": 2e-05, "loss": 0.04766644, "step": 5276 }, { "epoch": 10.554, "grad_norm": 1.5211418867111206, "learning_rate": 2e-05, "loss": 0.05268958, "step": 5277 }, { "epoch": 10.556000000000001, "grad_norm": 1.3323153257369995, "learning_rate": 2e-05, "loss": 0.04630678, "step": 5278 }, { "epoch": 10.558, "grad_norm": 2.196124315261841, "learning_rate": 2e-05, "loss": 0.04796411, "step": 5279 }, { "epoch": 10.56, "grad_norm": 6.703586101531982, "learning_rate": 2e-05, "loss": 0.08236083, "step": 5280 }, { "epoch": 10.562, "grad_norm": 2.39101505279541, "learning_rate": 2e-05, "loss": 0.0583214, "step": 5281 }, { "epoch": 10.564, "grad_norm": 1.674933671951294, "learning_rate": 2e-05, "loss": 0.05167703, "step": 5282 }, { "epoch": 10.566, "grad_norm": 1.468446135520935, "learning_rate": 2e-05, "loss": 0.04566929, "step": 5283 }, { "epoch": 10.568, "grad_norm": 1.4748491048812866, "learning_rate": 2e-05, "loss": 0.04062827, "step": 5284 }, { "epoch": 10.57, "grad_norm": 2.5267021656036377, "learning_rate": 2e-05, "loss": 0.05230495, "step": 5285 }, { "epoch": 10.572, "grad_norm": 2.1810038089752197, "learning_rate": 2e-05, "loss": 0.04218719, "step": 5286 }, { "epoch": 10.574, "grad_norm": 1.3321385383605957, "learning_rate": 2e-05, "loss": 0.04871097, "step": 5287 }, { "epoch": 10.576, "grad_norm": 1.7109707593917847, "learning_rate": 2e-05, "loss": 0.04826786, "step": 5288 }, { "epoch": 10.578, "grad_norm": 1.9152201414108276, "learning_rate": 2e-05, "loss": 0.04686288, "step": 5289 }, { "epoch": 10.58, "grad_norm": 5.031193256378174, "learning_rate": 2e-05, "loss": 0.04337011, "step": 5290 }, { "epoch": 10.582, "grad_norm": 3.426795482635498, "learning_rate": 2e-05, "loss": 0.03990654, "step": 5291 }, { "epoch": 10.584, "grad_norm": 2.406569719314575, "learning_rate": 2e-05, "loss": 0.03668533, "step": 5292 }, { "epoch": 10.586, "grad_norm": 3.1804933547973633, "learning_rate": 2e-05, "loss": 0.06070712, "step": 5293 }, { "epoch": 10.588, "grad_norm": 2.061739206314087, "learning_rate": 2e-05, "loss": 0.02787476, "step": 5294 }, { "epoch": 10.59, "grad_norm": 1.6928627490997314, "learning_rate": 2e-05, "loss": 0.04576159, "step": 5295 }, { "epoch": 10.592, "grad_norm": 2.2229623794555664, "learning_rate": 2e-05, "loss": 0.05602458, "step": 5296 }, { "epoch": 10.594, "grad_norm": 1.289467692375183, "learning_rate": 2e-05, "loss": 0.05179123, "step": 5297 }, { "epoch": 10.596, "grad_norm": 1.5103563070297241, "learning_rate": 2e-05, "loss": 0.05836169, "step": 5298 }, { "epoch": 10.598, "grad_norm": 1.6638998985290527, "learning_rate": 2e-05, "loss": 0.04362776, "step": 5299 }, { "epoch": 10.6, "grad_norm": 1.3501155376434326, "learning_rate": 2e-05, "loss": 0.04407869, "step": 5300 }, { "epoch": 10.602, "grad_norm": 1.708865761756897, "learning_rate": 2e-05, "loss": 0.05075856, "step": 5301 }, { "epoch": 10.604, "grad_norm": 10.003942489624023, "learning_rate": 2e-05, "loss": 0.07709937, "step": 5302 }, { "epoch": 10.606, "grad_norm": 1.8898708820343018, "learning_rate": 2e-05, "loss": 0.03913818, "step": 5303 }, { "epoch": 10.608, "grad_norm": 1.95207679271698, "learning_rate": 2e-05, "loss": 0.05876492, "step": 5304 }, { "epoch": 10.61, "grad_norm": 1.6811554431915283, "learning_rate": 2e-05, "loss": 0.03337777, "step": 5305 }, { "epoch": 10.612, "grad_norm": 2.222196102142334, "learning_rate": 2e-05, "loss": 0.0575141, "step": 5306 }, { "epoch": 10.614, "grad_norm": 2.2834959030151367, "learning_rate": 2e-05, "loss": 0.05859252, "step": 5307 }, { "epoch": 10.616, "grad_norm": 2.21087646484375, "learning_rate": 2e-05, "loss": 0.05571397, "step": 5308 }, { "epoch": 10.618, "grad_norm": 3.0877723693847656, "learning_rate": 2e-05, "loss": 0.10596378, "step": 5309 }, { "epoch": 10.62, "grad_norm": 3.603444814682007, "learning_rate": 2e-05, "loss": 0.06696412, "step": 5310 }, { "epoch": 10.622, "grad_norm": 1.8475040197372437, "learning_rate": 2e-05, "loss": 0.04399508, "step": 5311 }, { "epoch": 10.624, "grad_norm": 1.4669568538665771, "learning_rate": 2e-05, "loss": 0.0431044, "step": 5312 }, { "epoch": 10.626, "grad_norm": 1.3643040657043457, "learning_rate": 2e-05, "loss": 0.0470515, "step": 5313 }, { "epoch": 10.628, "grad_norm": 1.2287578582763672, "learning_rate": 2e-05, "loss": 0.05811661, "step": 5314 }, { "epoch": 10.63, "grad_norm": 1.0902774333953857, "learning_rate": 2e-05, "loss": 0.03484159, "step": 5315 }, { "epoch": 10.632, "grad_norm": 1.6369050741195679, "learning_rate": 2e-05, "loss": 0.04110289, "step": 5316 }, { "epoch": 10.634, "grad_norm": 1.275800108909607, "learning_rate": 2e-05, "loss": 0.05027898, "step": 5317 }, { "epoch": 10.636, "grad_norm": 1.4583206176757812, "learning_rate": 2e-05, "loss": 0.03882696, "step": 5318 }, { "epoch": 10.638, "grad_norm": 1.3193122148513794, "learning_rate": 2e-05, "loss": 0.03158198, "step": 5319 }, { "epoch": 10.64, "grad_norm": 1.4229142665863037, "learning_rate": 2e-05, "loss": 0.04588968, "step": 5320 }, { "epoch": 10.642, "grad_norm": 1.5470317602157593, "learning_rate": 2e-05, "loss": 0.04177276, "step": 5321 }, { "epoch": 10.644, "grad_norm": 1.322806477546692, "learning_rate": 2e-05, "loss": 0.04808362, "step": 5322 }, { "epoch": 10.646, "grad_norm": 1.7866078615188599, "learning_rate": 2e-05, "loss": 0.04570613, "step": 5323 }, { "epoch": 10.648, "grad_norm": 1.8436026573181152, "learning_rate": 2e-05, "loss": 0.06046174, "step": 5324 }, { "epoch": 10.65, "grad_norm": 1.6099350452423096, "learning_rate": 2e-05, "loss": 0.05201571, "step": 5325 }, { "epoch": 10.652, "grad_norm": 1.4118268489837646, "learning_rate": 2e-05, "loss": 0.04571307, "step": 5326 }, { "epoch": 10.654, "grad_norm": 1.7007311582565308, "learning_rate": 2e-05, "loss": 0.05410545, "step": 5327 }, { "epoch": 10.656, "grad_norm": 1.3747484683990479, "learning_rate": 2e-05, "loss": 0.04396842, "step": 5328 }, { "epoch": 10.658, "grad_norm": 1.5627105236053467, "learning_rate": 2e-05, "loss": 0.05439685, "step": 5329 }, { "epoch": 10.66, "grad_norm": 1.6469447612762451, "learning_rate": 2e-05, "loss": 0.05709826, "step": 5330 }, { "epoch": 10.662, "grad_norm": 3.3534231185913086, "learning_rate": 2e-05, "loss": 0.05348397, "step": 5331 }, { "epoch": 10.664, "grad_norm": 1.3663334846496582, "learning_rate": 2e-05, "loss": 0.04078261, "step": 5332 }, { "epoch": 10.666, "grad_norm": 1.6048120260238647, "learning_rate": 2e-05, "loss": 0.05477417, "step": 5333 }, { "epoch": 10.668, "grad_norm": 1.6299818754196167, "learning_rate": 2e-05, "loss": 0.05402359, "step": 5334 }, { "epoch": 10.67, "grad_norm": 1.9940632581710815, "learning_rate": 2e-05, "loss": 0.05533578, "step": 5335 }, { "epoch": 10.672, "grad_norm": 1.1183878183364868, "learning_rate": 2e-05, "loss": 0.03753264, "step": 5336 }, { "epoch": 10.674, "grad_norm": 1.5522422790527344, "learning_rate": 2e-05, "loss": 0.05828452, "step": 5337 }, { "epoch": 10.676, "grad_norm": 1.3103322982788086, "learning_rate": 2e-05, "loss": 0.04702647, "step": 5338 }, { "epoch": 10.678, "grad_norm": 1.2099779844284058, "learning_rate": 2e-05, "loss": 0.04113594, "step": 5339 }, { "epoch": 10.68, "grad_norm": 3.985682964324951, "learning_rate": 2e-05, "loss": 0.07098438, "step": 5340 }, { "epoch": 10.682, "grad_norm": 1.8771806955337524, "learning_rate": 2e-05, "loss": 0.04530764, "step": 5341 }, { "epoch": 10.684, "grad_norm": 2.1567442417144775, "learning_rate": 2e-05, "loss": 0.04229917, "step": 5342 }, { "epoch": 10.686, "grad_norm": 1.6238269805908203, "learning_rate": 2e-05, "loss": 0.04225535, "step": 5343 }, { "epoch": 10.688, "grad_norm": 1.869265079498291, "learning_rate": 2e-05, "loss": 0.05382503, "step": 5344 }, { "epoch": 10.69, "grad_norm": 1.8355045318603516, "learning_rate": 2e-05, "loss": 0.0423015, "step": 5345 }, { "epoch": 10.692, "grad_norm": 1.3736673593521118, "learning_rate": 2e-05, "loss": 0.04386007, "step": 5346 }, { "epoch": 10.693999999999999, "grad_norm": 2.0322253704071045, "learning_rate": 2e-05, "loss": 0.04041972, "step": 5347 }, { "epoch": 10.696, "grad_norm": 2.336951732635498, "learning_rate": 2e-05, "loss": 0.05086974, "step": 5348 }, { "epoch": 10.698, "grad_norm": 2.0457851886749268, "learning_rate": 2e-05, "loss": 0.05006742, "step": 5349 }, { "epoch": 10.7, "grad_norm": 1.992970585823059, "learning_rate": 2e-05, "loss": 0.06358136, "step": 5350 }, { "epoch": 10.702, "grad_norm": 1.2050204277038574, "learning_rate": 2e-05, "loss": 0.04032322, "step": 5351 }, { "epoch": 10.704, "grad_norm": 1.4412949085235596, "learning_rate": 2e-05, "loss": 0.04633742, "step": 5352 }, { "epoch": 10.706, "grad_norm": 1.5254440307617188, "learning_rate": 2e-05, "loss": 0.05168087, "step": 5353 }, { "epoch": 10.708, "grad_norm": 1.4087555408477783, "learning_rate": 2e-05, "loss": 0.05477347, "step": 5354 }, { "epoch": 10.71, "grad_norm": 1.431321382522583, "learning_rate": 2e-05, "loss": 0.03768836, "step": 5355 }, { "epoch": 10.712, "grad_norm": 1.1506340503692627, "learning_rate": 2e-05, "loss": 0.04004525, "step": 5356 }, { "epoch": 10.714, "grad_norm": 2.427537441253662, "learning_rate": 2e-05, "loss": 0.06314078, "step": 5357 }, { "epoch": 10.716, "grad_norm": 1.5225038528442383, "learning_rate": 2e-05, "loss": 0.05141336, "step": 5358 }, { "epoch": 10.718, "grad_norm": 3.697816848754883, "learning_rate": 2e-05, "loss": 0.05468082, "step": 5359 }, { "epoch": 10.72, "grad_norm": 1.5800566673278809, "learning_rate": 2e-05, "loss": 0.058851, "step": 5360 }, { "epoch": 10.722, "grad_norm": 0.8254619240760803, "learning_rate": 2e-05, "loss": 0.02738049, "step": 5361 }, { "epoch": 10.724, "grad_norm": 1.369780421257019, "learning_rate": 2e-05, "loss": 0.04381582, "step": 5362 }, { "epoch": 10.725999999999999, "grad_norm": 1.2137796878814697, "learning_rate": 2e-05, "loss": 0.0480298, "step": 5363 }, { "epoch": 10.728, "grad_norm": 1.4857935905456543, "learning_rate": 2e-05, "loss": 0.03931804, "step": 5364 }, { "epoch": 10.73, "grad_norm": 1.4152106046676636, "learning_rate": 2e-05, "loss": 0.03949886, "step": 5365 }, { "epoch": 10.732, "grad_norm": 1.3397774696350098, "learning_rate": 2e-05, "loss": 0.05219603, "step": 5366 }, { "epoch": 10.734, "grad_norm": 1.2872146368026733, "learning_rate": 2e-05, "loss": 0.042691, "step": 5367 }, { "epoch": 10.736, "grad_norm": 2.1003661155700684, "learning_rate": 2e-05, "loss": 0.05165257, "step": 5368 }, { "epoch": 10.738, "grad_norm": 2.007760524749756, "learning_rate": 2e-05, "loss": 0.06440516, "step": 5369 }, { "epoch": 10.74, "grad_norm": 1.6404235363006592, "learning_rate": 2e-05, "loss": 0.05111123, "step": 5370 }, { "epoch": 10.742, "grad_norm": 2.8288562297821045, "learning_rate": 2e-05, "loss": 0.05282591, "step": 5371 }, { "epoch": 10.744, "grad_norm": 2.6334850788116455, "learning_rate": 2e-05, "loss": 0.06181185, "step": 5372 }, { "epoch": 10.746, "grad_norm": 1.4054573774337769, "learning_rate": 2e-05, "loss": 0.05052005, "step": 5373 }, { "epoch": 10.748, "grad_norm": 1.4174977540969849, "learning_rate": 2e-05, "loss": 0.04603645, "step": 5374 }, { "epoch": 10.75, "grad_norm": 1.3553341627120972, "learning_rate": 2e-05, "loss": 0.04983129, "step": 5375 }, { "epoch": 10.752, "grad_norm": 2.03845477104187, "learning_rate": 2e-05, "loss": 0.06038283, "step": 5376 }, { "epoch": 10.754, "grad_norm": 2.294628858566284, "learning_rate": 2e-05, "loss": 0.04681225, "step": 5377 }, { "epoch": 10.756, "grad_norm": 1.303605079650879, "learning_rate": 2e-05, "loss": 0.04528525, "step": 5378 }, { "epoch": 10.758, "grad_norm": 1.6137866973876953, "learning_rate": 2e-05, "loss": 0.0557767, "step": 5379 }, { "epoch": 10.76, "grad_norm": 1.5829466581344604, "learning_rate": 2e-05, "loss": 0.04809469, "step": 5380 }, { "epoch": 10.762, "grad_norm": 1.2748074531555176, "learning_rate": 2e-05, "loss": 0.04331703, "step": 5381 }, { "epoch": 10.764, "grad_norm": 1.5433510541915894, "learning_rate": 2e-05, "loss": 0.06725293, "step": 5382 }, { "epoch": 10.766, "grad_norm": 1.6846797466278076, "learning_rate": 2e-05, "loss": 0.04823402, "step": 5383 }, { "epoch": 10.768, "grad_norm": 2.141554355621338, "learning_rate": 2e-05, "loss": 0.06744844, "step": 5384 }, { "epoch": 10.77, "grad_norm": 1.4289065599441528, "learning_rate": 2e-05, "loss": 0.05341191, "step": 5385 }, { "epoch": 10.772, "grad_norm": 1.3509269952774048, "learning_rate": 2e-05, "loss": 0.05020216, "step": 5386 }, { "epoch": 10.774000000000001, "grad_norm": 2.071425676345825, "learning_rate": 2e-05, "loss": 0.06101374, "step": 5387 }, { "epoch": 10.776, "grad_norm": 1.1163301467895508, "learning_rate": 2e-05, "loss": 0.04933468, "step": 5388 }, { "epoch": 10.778, "grad_norm": 1.2288962602615356, "learning_rate": 2e-05, "loss": 0.04980216, "step": 5389 }, { "epoch": 10.78, "grad_norm": 1.9011704921722412, "learning_rate": 2e-05, "loss": 0.05300172, "step": 5390 }, { "epoch": 10.782, "grad_norm": 1.5152117013931274, "learning_rate": 2e-05, "loss": 0.04199405, "step": 5391 }, { "epoch": 10.784, "grad_norm": 1.2738306522369385, "learning_rate": 2e-05, "loss": 0.04918165, "step": 5392 }, { "epoch": 10.786, "grad_norm": 1.598197102546692, "learning_rate": 2e-05, "loss": 0.05998203, "step": 5393 }, { "epoch": 10.788, "grad_norm": 1.4320733547210693, "learning_rate": 2e-05, "loss": 0.04948084, "step": 5394 }, { "epoch": 10.79, "grad_norm": 1.6892844438552856, "learning_rate": 2e-05, "loss": 0.06663223, "step": 5395 }, { "epoch": 10.792, "grad_norm": 1.3951140642166138, "learning_rate": 2e-05, "loss": 0.05481715, "step": 5396 }, { "epoch": 10.794, "grad_norm": 1.895914077758789, "learning_rate": 2e-05, "loss": 0.050419, "step": 5397 }, { "epoch": 10.796, "grad_norm": 1.6895439624786377, "learning_rate": 2e-05, "loss": 0.05903822, "step": 5398 }, { "epoch": 10.798, "grad_norm": 1.3120181560516357, "learning_rate": 2e-05, "loss": 0.03543907, "step": 5399 }, { "epoch": 10.8, "grad_norm": 1.598132610321045, "learning_rate": 2e-05, "loss": 0.03839812, "step": 5400 }, { "epoch": 10.802, "grad_norm": 1.5897235870361328, "learning_rate": 2e-05, "loss": 0.04522756, "step": 5401 }, { "epoch": 10.804, "grad_norm": 1.8060530424118042, "learning_rate": 2e-05, "loss": 0.06058746, "step": 5402 }, { "epoch": 10.806000000000001, "grad_norm": 1.4628931283950806, "learning_rate": 2e-05, "loss": 0.03918256, "step": 5403 }, { "epoch": 10.808, "grad_norm": 1.6146814823150635, "learning_rate": 2e-05, "loss": 0.05647987, "step": 5404 }, { "epoch": 10.81, "grad_norm": 2.082378387451172, "learning_rate": 2e-05, "loss": 0.04440005, "step": 5405 }, { "epoch": 10.812, "grad_norm": 2.013496160507202, "learning_rate": 2e-05, "loss": 0.06099815, "step": 5406 }, { "epoch": 10.814, "grad_norm": 2.267378807067871, "learning_rate": 2e-05, "loss": 0.06538484, "step": 5407 }, { "epoch": 10.816, "grad_norm": 1.9403990507125854, "learning_rate": 2e-05, "loss": 0.05472123, "step": 5408 }, { "epoch": 10.818, "grad_norm": 1.1750283241271973, "learning_rate": 2e-05, "loss": 0.03978173, "step": 5409 }, { "epoch": 10.82, "grad_norm": 1.5307939052581787, "learning_rate": 2e-05, "loss": 0.06439765, "step": 5410 }, { "epoch": 10.822, "grad_norm": 1.0061407089233398, "learning_rate": 2e-05, "loss": 0.03479624, "step": 5411 }, { "epoch": 10.824, "grad_norm": 1.3248248100280762, "learning_rate": 2e-05, "loss": 0.04627767, "step": 5412 }, { "epoch": 10.826, "grad_norm": 2.050506114959717, "learning_rate": 2e-05, "loss": 0.03434724, "step": 5413 }, { "epoch": 10.828, "grad_norm": 1.4149460792541504, "learning_rate": 2e-05, "loss": 0.05991215, "step": 5414 }, { "epoch": 10.83, "grad_norm": 1.5017156600952148, "learning_rate": 2e-05, "loss": 0.05083961, "step": 5415 }, { "epoch": 10.832, "grad_norm": 1.0457175970077515, "learning_rate": 2e-05, "loss": 0.03952884, "step": 5416 }, { "epoch": 10.834, "grad_norm": 1.5834999084472656, "learning_rate": 2e-05, "loss": 0.05933148, "step": 5417 }, { "epoch": 10.836, "grad_norm": 1.1834666728973389, "learning_rate": 2e-05, "loss": 0.03205921, "step": 5418 }, { "epoch": 10.838, "grad_norm": 1.2450237274169922, "learning_rate": 2e-05, "loss": 0.04426446, "step": 5419 }, { "epoch": 10.84, "grad_norm": 1.2155392169952393, "learning_rate": 2e-05, "loss": 0.03819549, "step": 5420 }, { "epoch": 10.842, "grad_norm": 1.4611936807632446, "learning_rate": 2e-05, "loss": 0.04256415, "step": 5421 }, { "epoch": 10.844, "grad_norm": 0.9903550744056702, "learning_rate": 2e-05, "loss": 0.03737554, "step": 5422 }, { "epoch": 10.846, "grad_norm": 1.4878544807434082, "learning_rate": 2e-05, "loss": 0.04571939, "step": 5423 }, { "epoch": 10.848, "grad_norm": 3.3160300254821777, "learning_rate": 2e-05, "loss": 0.06830984, "step": 5424 }, { "epoch": 10.85, "grad_norm": 1.3133734464645386, "learning_rate": 2e-05, "loss": 0.05649231, "step": 5425 }, { "epoch": 10.852, "grad_norm": 1.1060919761657715, "learning_rate": 2e-05, "loss": 0.03377821, "step": 5426 }, { "epoch": 10.854, "grad_norm": 1.686246395111084, "learning_rate": 2e-05, "loss": 0.04799533, "step": 5427 }, { "epoch": 10.856, "grad_norm": 2.3602640628814697, "learning_rate": 2e-05, "loss": 0.03780733, "step": 5428 }, { "epoch": 10.858, "grad_norm": 1.5096981525421143, "learning_rate": 2e-05, "loss": 0.04364054, "step": 5429 }, { "epoch": 10.86, "grad_norm": 3.4624457359313965, "learning_rate": 2e-05, "loss": 0.07765651, "step": 5430 }, { "epoch": 10.862, "grad_norm": 1.9215983152389526, "learning_rate": 2e-05, "loss": 0.0564341, "step": 5431 }, { "epoch": 10.864, "grad_norm": 2.2769806385040283, "learning_rate": 2e-05, "loss": 0.0673317, "step": 5432 }, { "epoch": 10.866, "grad_norm": 1.4568067789077759, "learning_rate": 2e-05, "loss": 0.04599699, "step": 5433 }, { "epoch": 10.868, "grad_norm": 1.3727821111679077, "learning_rate": 2e-05, "loss": 0.05360325, "step": 5434 }, { "epoch": 10.87, "grad_norm": 2.1646981239318848, "learning_rate": 2e-05, "loss": 0.05432039, "step": 5435 }, { "epoch": 10.872, "grad_norm": 1.466217041015625, "learning_rate": 2e-05, "loss": 0.05962711, "step": 5436 }, { "epoch": 10.874, "grad_norm": 1.4684398174285889, "learning_rate": 2e-05, "loss": 0.03964592, "step": 5437 }, { "epoch": 10.876, "grad_norm": 1.3081820011138916, "learning_rate": 2e-05, "loss": 0.03898221, "step": 5438 }, { "epoch": 10.878, "grad_norm": 1.5860309600830078, "learning_rate": 2e-05, "loss": 0.03976344, "step": 5439 }, { "epoch": 10.88, "grad_norm": 1.7701555490493774, "learning_rate": 2e-05, "loss": 0.04799116, "step": 5440 }, { "epoch": 10.882, "grad_norm": 2.2064380645751953, "learning_rate": 2e-05, "loss": 0.0660604, "step": 5441 }, { "epoch": 10.884, "grad_norm": 1.732148289680481, "learning_rate": 2e-05, "loss": 0.05492194, "step": 5442 }, { "epoch": 10.886, "grad_norm": 1.9355525970458984, "learning_rate": 2e-05, "loss": 0.04734231, "step": 5443 }, { "epoch": 10.888, "grad_norm": 1.2587823867797852, "learning_rate": 2e-05, "loss": 0.03634328, "step": 5444 }, { "epoch": 10.89, "grad_norm": 1.5590167045593262, "learning_rate": 2e-05, "loss": 0.06726646, "step": 5445 }, { "epoch": 10.892, "grad_norm": 1.3101967573165894, "learning_rate": 2e-05, "loss": 0.04764952, "step": 5446 }, { "epoch": 10.894, "grad_norm": 2.4456593990325928, "learning_rate": 2e-05, "loss": 0.05624555, "step": 5447 }, { "epoch": 10.896, "grad_norm": 2.824155330657959, "learning_rate": 2e-05, "loss": 0.05465455, "step": 5448 }, { "epoch": 10.898, "grad_norm": 1.4769179821014404, "learning_rate": 2e-05, "loss": 0.05461916, "step": 5449 }, { "epoch": 10.9, "grad_norm": 1.4648699760437012, "learning_rate": 2e-05, "loss": 0.05058068, "step": 5450 }, { "epoch": 10.902, "grad_norm": 1.2384024858474731, "learning_rate": 2e-05, "loss": 0.04764497, "step": 5451 }, { "epoch": 10.904, "grad_norm": 1.6182745695114136, "learning_rate": 2e-05, "loss": 0.05357751, "step": 5452 }, { "epoch": 10.906, "grad_norm": 1.5405651330947876, "learning_rate": 2e-05, "loss": 0.0363545, "step": 5453 }, { "epoch": 10.908, "grad_norm": 1.2049387693405151, "learning_rate": 2e-05, "loss": 0.0472638, "step": 5454 }, { "epoch": 10.91, "grad_norm": 2.5308351516723633, "learning_rate": 2e-05, "loss": 0.04407152, "step": 5455 }, { "epoch": 10.912, "grad_norm": 1.3933305740356445, "learning_rate": 2e-05, "loss": 0.04460661, "step": 5456 }, { "epoch": 10.914, "grad_norm": 1.4004764556884766, "learning_rate": 2e-05, "loss": 0.04893076, "step": 5457 }, { "epoch": 10.916, "grad_norm": 1.8205981254577637, "learning_rate": 2e-05, "loss": 0.04703292, "step": 5458 }, { "epoch": 10.918, "grad_norm": 1.8013707399368286, "learning_rate": 2e-05, "loss": 0.055125, "step": 5459 }, { "epoch": 10.92, "grad_norm": 1.451092004776001, "learning_rate": 2e-05, "loss": 0.04543486, "step": 5460 }, { "epoch": 10.922, "grad_norm": 1.1553832292556763, "learning_rate": 2e-05, "loss": 0.04265869, "step": 5461 }, { "epoch": 10.924, "grad_norm": 1.960945725440979, "learning_rate": 2e-05, "loss": 0.05424058, "step": 5462 }, { "epoch": 10.926, "grad_norm": 1.4518814086914062, "learning_rate": 2e-05, "loss": 0.04993463, "step": 5463 }, { "epoch": 10.928, "grad_norm": 2.5878188610076904, "learning_rate": 2e-05, "loss": 0.04935042, "step": 5464 }, { "epoch": 10.93, "grad_norm": 1.3575770854949951, "learning_rate": 2e-05, "loss": 0.05218004, "step": 5465 }, { "epoch": 10.932, "grad_norm": 1.4629638195037842, "learning_rate": 2e-05, "loss": 0.05007394, "step": 5466 }, { "epoch": 10.934, "grad_norm": 1.2704107761383057, "learning_rate": 2e-05, "loss": 0.04607791, "step": 5467 }, { "epoch": 10.936, "grad_norm": 1.5008957386016846, "learning_rate": 2e-05, "loss": 0.04755886, "step": 5468 }, { "epoch": 10.938, "grad_norm": 1.6490647792816162, "learning_rate": 2e-05, "loss": 0.04356069, "step": 5469 }, { "epoch": 10.94, "grad_norm": 1.9687342643737793, "learning_rate": 2e-05, "loss": 0.04906989, "step": 5470 }, { "epoch": 10.942, "grad_norm": 2.3474905490875244, "learning_rate": 2e-05, "loss": 0.04697709, "step": 5471 }, { "epoch": 10.943999999999999, "grad_norm": 1.7721339464187622, "learning_rate": 2e-05, "loss": 0.06682569, "step": 5472 }, { "epoch": 10.946, "grad_norm": 1.4311152696609497, "learning_rate": 2e-05, "loss": 0.04959798, "step": 5473 }, { "epoch": 10.948, "grad_norm": 1.3908685445785522, "learning_rate": 2e-05, "loss": 0.04007864, "step": 5474 }, { "epoch": 10.95, "grad_norm": 1.1253409385681152, "learning_rate": 2e-05, "loss": 0.0392089, "step": 5475 }, { "epoch": 10.952, "grad_norm": 1.5628206729888916, "learning_rate": 2e-05, "loss": 0.03705291, "step": 5476 }, { "epoch": 10.954, "grad_norm": 2.015963077545166, "learning_rate": 2e-05, "loss": 0.05837575, "step": 5477 }, { "epoch": 10.956, "grad_norm": 1.1806350946426392, "learning_rate": 2e-05, "loss": 0.04036895, "step": 5478 }, { "epoch": 10.958, "grad_norm": 1.8110302686691284, "learning_rate": 2e-05, "loss": 0.05655763, "step": 5479 }, { "epoch": 10.96, "grad_norm": 1.6625659465789795, "learning_rate": 2e-05, "loss": 0.04185361, "step": 5480 }, { "epoch": 10.962, "grad_norm": 1.8857502937316895, "learning_rate": 2e-05, "loss": 0.05384925, "step": 5481 }, { "epoch": 10.964, "grad_norm": 1.3210104703903198, "learning_rate": 2e-05, "loss": 0.0597494, "step": 5482 }, { "epoch": 10.966, "grad_norm": 1.587018609046936, "learning_rate": 2e-05, "loss": 0.05472574, "step": 5483 }, { "epoch": 10.968, "grad_norm": 1.3318067789077759, "learning_rate": 2e-05, "loss": 0.0541456, "step": 5484 }, { "epoch": 10.97, "grad_norm": 1.4794989824295044, "learning_rate": 2e-05, "loss": 0.05960969, "step": 5485 }, { "epoch": 10.972, "grad_norm": 3.501803398132324, "learning_rate": 2e-05, "loss": 0.0588432, "step": 5486 }, { "epoch": 10.974, "grad_norm": 1.6838804483413696, "learning_rate": 2e-05, "loss": 0.05511768, "step": 5487 }, { "epoch": 10.975999999999999, "grad_norm": 1.2759146690368652, "learning_rate": 2e-05, "loss": 0.02767239, "step": 5488 }, { "epoch": 10.978, "grad_norm": 1.4586148262023926, "learning_rate": 2e-05, "loss": 0.05381429, "step": 5489 }, { "epoch": 10.98, "grad_norm": 1.5755432844161987, "learning_rate": 2e-05, "loss": 0.03358914, "step": 5490 }, { "epoch": 10.982, "grad_norm": 1.175610899925232, "learning_rate": 2e-05, "loss": 0.04267032, "step": 5491 }, { "epoch": 10.984, "grad_norm": 1.1766899824142456, "learning_rate": 2e-05, "loss": 0.03875332, "step": 5492 }, { "epoch": 10.986, "grad_norm": 2.118785858154297, "learning_rate": 2e-05, "loss": 0.05230087, "step": 5493 }, { "epoch": 10.988, "grad_norm": 1.6943100690841675, "learning_rate": 2e-05, "loss": 0.05274594, "step": 5494 }, { "epoch": 10.99, "grad_norm": 1.087948203086853, "learning_rate": 2e-05, "loss": 0.03773314, "step": 5495 }, { "epoch": 10.992, "grad_norm": 1.6124333143234253, "learning_rate": 2e-05, "loss": 0.04908092, "step": 5496 }, { "epoch": 10.994, "grad_norm": 1.851080298423767, "learning_rate": 2e-05, "loss": 0.05457485, "step": 5497 }, { "epoch": 10.996, "grad_norm": 1.4189625978469849, "learning_rate": 2e-05, "loss": 0.04965939, "step": 5498 }, { "epoch": 10.998, "grad_norm": 1.9393243789672852, "learning_rate": 2e-05, "loss": 0.05298079, "step": 5499 }, { "epoch": 11.0, "grad_norm": 1.709896445274353, "learning_rate": 2e-05, "loss": 0.05041164, "step": 5500 }, { "epoch": 11.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9600798403193613, "Equal_1": 0.994, "Equal_2": 0.9660678642714571, "Equal_3": 0.874251497005988, "LineComparison_1": 0.996, "LineComparison_2": 1.0, "LineComparison_3": 0.9740518962075848, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.986, "Perpendicular_1": 0.988, "Perpendicular_2": 0.948, "Perpendicular_3": 0.6513026052104208, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9956666666666667, "PointLiesOnCircle_3": 0.9892666666666666, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9859719438877755, "PointLiesOnLine_3": 0.9740518962075848 }, "eval_runtime": 319.8654, "eval_samples_per_second": 32.826, "eval_steps_per_second": 0.657, "step": 5500 }, { "epoch": 11.002, "grad_norm": 1.7501477003097534, "learning_rate": 2e-05, "loss": 0.04067567, "step": 5501 }, { "epoch": 11.004, "grad_norm": 2.942864179611206, "learning_rate": 2e-05, "loss": 0.05952626, "step": 5502 }, { "epoch": 11.006, "grad_norm": 1.5498440265655518, "learning_rate": 2e-05, "loss": 0.03488794, "step": 5503 }, { "epoch": 11.008, "grad_norm": 1.4755291938781738, "learning_rate": 2e-05, "loss": 0.05729527, "step": 5504 }, { "epoch": 11.01, "grad_norm": 1.4485598802566528, "learning_rate": 2e-05, "loss": 0.05340921, "step": 5505 }, { "epoch": 11.012, "grad_norm": 1.3793925046920776, "learning_rate": 2e-05, "loss": 0.04637571, "step": 5506 }, { "epoch": 11.014, "grad_norm": 1.4103147983551025, "learning_rate": 2e-05, "loss": 0.04511734, "step": 5507 }, { "epoch": 11.016, "grad_norm": 1.3534992933273315, "learning_rate": 2e-05, "loss": 0.04359635, "step": 5508 }, { "epoch": 11.018, "grad_norm": 1.5268181562423706, "learning_rate": 2e-05, "loss": 0.07277807, "step": 5509 }, { "epoch": 11.02, "grad_norm": 1.0915262699127197, "learning_rate": 2e-05, "loss": 0.05696312, "step": 5510 }, { "epoch": 11.022, "grad_norm": 1.7027065753936768, "learning_rate": 2e-05, "loss": 0.05292184, "step": 5511 }, { "epoch": 11.024, "grad_norm": 1.5341792106628418, "learning_rate": 2e-05, "loss": 0.05033635, "step": 5512 }, { "epoch": 11.026, "grad_norm": 1.545507788658142, "learning_rate": 2e-05, "loss": 0.06413613, "step": 5513 }, { "epoch": 11.028, "grad_norm": 1.227196455001831, "learning_rate": 2e-05, "loss": 0.04655745, "step": 5514 }, { "epoch": 11.03, "grad_norm": 1.5401439666748047, "learning_rate": 2e-05, "loss": 0.05112302, "step": 5515 }, { "epoch": 11.032, "grad_norm": 0.9862892627716064, "learning_rate": 2e-05, "loss": 0.03179028, "step": 5516 }, { "epoch": 11.034, "grad_norm": 1.793843388557434, "learning_rate": 2e-05, "loss": 0.06312604, "step": 5517 }, { "epoch": 11.036, "grad_norm": 2.1920554637908936, "learning_rate": 2e-05, "loss": 0.06224691, "step": 5518 }, { "epoch": 11.038, "grad_norm": 1.6671439409255981, "learning_rate": 2e-05, "loss": 0.05866244, "step": 5519 }, { "epoch": 11.04, "grad_norm": 1.550345540046692, "learning_rate": 2e-05, "loss": 0.04544277, "step": 5520 }, { "epoch": 11.042, "grad_norm": 1.3246674537658691, "learning_rate": 2e-05, "loss": 0.03805883, "step": 5521 }, { "epoch": 11.044, "grad_norm": 1.9008183479309082, "learning_rate": 2e-05, "loss": 0.04575905, "step": 5522 }, { "epoch": 11.046, "grad_norm": 1.9028370380401611, "learning_rate": 2e-05, "loss": 0.05989815, "step": 5523 }, { "epoch": 11.048, "grad_norm": 1.2603625059127808, "learning_rate": 2e-05, "loss": 0.03706544, "step": 5524 }, { "epoch": 11.05, "grad_norm": 1.408327579498291, "learning_rate": 2e-05, "loss": 0.05105807, "step": 5525 }, { "epoch": 11.052, "grad_norm": 1.762861728668213, "learning_rate": 2e-05, "loss": 0.03200347, "step": 5526 }, { "epoch": 11.054, "grad_norm": 1.0934480428695679, "learning_rate": 2e-05, "loss": 0.04322592, "step": 5527 }, { "epoch": 11.056, "grad_norm": 2.1491646766662598, "learning_rate": 2e-05, "loss": 0.04375915, "step": 5528 }, { "epoch": 11.058, "grad_norm": 1.0617128610610962, "learning_rate": 2e-05, "loss": 0.03229889, "step": 5529 }, { "epoch": 11.06, "grad_norm": 1.9952315092086792, "learning_rate": 2e-05, "loss": 0.0592179, "step": 5530 }, { "epoch": 11.062, "grad_norm": 2.140627861022949, "learning_rate": 2e-05, "loss": 0.04644826, "step": 5531 }, { "epoch": 11.064, "grad_norm": 1.437983512878418, "learning_rate": 2e-05, "loss": 0.03985, "step": 5532 }, { "epoch": 11.066, "grad_norm": 2.316026210784912, "learning_rate": 2e-05, "loss": 0.05902199, "step": 5533 }, { "epoch": 11.068, "grad_norm": 1.4913086891174316, "learning_rate": 2e-05, "loss": 0.03914908, "step": 5534 }, { "epoch": 11.07, "grad_norm": 1.6400469541549683, "learning_rate": 2e-05, "loss": 0.04192843, "step": 5535 }, { "epoch": 11.072, "grad_norm": 1.754105567932129, "learning_rate": 2e-05, "loss": 0.04989998, "step": 5536 }, { "epoch": 11.074, "grad_norm": 2.085247039794922, "learning_rate": 2e-05, "loss": 0.04821778, "step": 5537 }, { "epoch": 11.076, "grad_norm": 1.0525646209716797, "learning_rate": 2e-05, "loss": 0.03182067, "step": 5538 }, { "epoch": 11.078, "grad_norm": 1.7224996089935303, "learning_rate": 2e-05, "loss": 0.04517365, "step": 5539 }, { "epoch": 11.08, "grad_norm": 1.285914659500122, "learning_rate": 2e-05, "loss": 0.0277714, "step": 5540 }, { "epoch": 11.082, "grad_norm": 1.9249039888381958, "learning_rate": 2e-05, "loss": 0.03731865, "step": 5541 }, { "epoch": 11.084, "grad_norm": 3.5124082565307617, "learning_rate": 2e-05, "loss": 0.04889687, "step": 5542 }, { "epoch": 11.086, "grad_norm": 1.3310890197753906, "learning_rate": 2e-05, "loss": 0.04091962, "step": 5543 }, { "epoch": 11.088, "grad_norm": 1.5621048212051392, "learning_rate": 2e-05, "loss": 0.05734289, "step": 5544 }, { "epoch": 11.09, "grad_norm": 2.3096556663513184, "learning_rate": 2e-05, "loss": 0.0544033, "step": 5545 }, { "epoch": 11.092, "grad_norm": 2.0304203033447266, "learning_rate": 2e-05, "loss": 0.03412958, "step": 5546 }, { "epoch": 11.094, "grad_norm": 3.0657801628112793, "learning_rate": 2e-05, "loss": 0.05514826, "step": 5547 }, { "epoch": 11.096, "grad_norm": 1.4915958642959595, "learning_rate": 2e-05, "loss": 0.04357203, "step": 5548 }, { "epoch": 11.098, "grad_norm": 2.101444959640503, "learning_rate": 2e-05, "loss": 0.05817562, "step": 5549 }, { "epoch": 11.1, "grad_norm": 1.9840679168701172, "learning_rate": 2e-05, "loss": 0.05163846, "step": 5550 }, { "epoch": 11.102, "grad_norm": 1.5036526918411255, "learning_rate": 2e-05, "loss": 0.03977584, "step": 5551 }, { "epoch": 11.104, "grad_norm": 3.665069103240967, "learning_rate": 2e-05, "loss": 0.05804493, "step": 5552 }, { "epoch": 11.106, "grad_norm": 6.136903762817383, "learning_rate": 2e-05, "loss": 0.05603513, "step": 5553 }, { "epoch": 11.108, "grad_norm": 2.01459002494812, "learning_rate": 2e-05, "loss": 0.04759218, "step": 5554 }, { "epoch": 11.11, "grad_norm": 1.221979022026062, "learning_rate": 2e-05, "loss": 0.04913111, "step": 5555 }, { "epoch": 11.112, "grad_norm": 2.235368490219116, "learning_rate": 2e-05, "loss": 0.06046454, "step": 5556 }, { "epoch": 11.114, "grad_norm": 2.291501998901367, "learning_rate": 2e-05, "loss": 0.05759356, "step": 5557 }, { "epoch": 11.116, "grad_norm": 1.4968833923339844, "learning_rate": 2e-05, "loss": 0.06014612, "step": 5558 }, { "epoch": 11.118, "grad_norm": 1.9257349967956543, "learning_rate": 2e-05, "loss": 0.0344676, "step": 5559 }, { "epoch": 11.12, "grad_norm": 1.1876585483551025, "learning_rate": 2e-05, "loss": 0.03832501, "step": 5560 }, { "epoch": 11.122, "grad_norm": 1.0107924938201904, "learning_rate": 2e-05, "loss": 0.03613226, "step": 5561 }, { "epoch": 11.124, "grad_norm": 1.3432567119598389, "learning_rate": 2e-05, "loss": 0.0471199, "step": 5562 }, { "epoch": 11.126, "grad_norm": 1.2003259658813477, "learning_rate": 2e-05, "loss": 0.04361069, "step": 5563 }, { "epoch": 11.128, "grad_norm": 0.9868248701095581, "learning_rate": 2e-05, "loss": 0.03666285, "step": 5564 }, { "epoch": 11.13, "grad_norm": 1.2956124544143677, "learning_rate": 2e-05, "loss": 0.04453753, "step": 5565 }, { "epoch": 11.132, "grad_norm": 1.5581343173980713, "learning_rate": 2e-05, "loss": 0.04929563, "step": 5566 }, { "epoch": 11.134, "grad_norm": 2.0106360912323, "learning_rate": 2e-05, "loss": 0.04062722, "step": 5567 }, { "epoch": 11.136, "grad_norm": 1.4985320568084717, "learning_rate": 2e-05, "loss": 0.05832936, "step": 5568 }, { "epoch": 11.138, "grad_norm": 1.6845251321792603, "learning_rate": 2e-05, "loss": 0.03393908, "step": 5569 }, { "epoch": 11.14, "grad_norm": 1.7386165857315063, "learning_rate": 2e-05, "loss": 0.05278086, "step": 5570 }, { "epoch": 11.142, "grad_norm": 1.629763126373291, "learning_rate": 2e-05, "loss": 0.05002792, "step": 5571 }, { "epoch": 11.144, "grad_norm": 1.2016594409942627, "learning_rate": 2e-05, "loss": 0.05499132, "step": 5572 }, { "epoch": 11.146, "grad_norm": 1.2156981229782104, "learning_rate": 2e-05, "loss": 0.04719136, "step": 5573 }, { "epoch": 11.148, "grad_norm": 1.1108115911483765, "learning_rate": 2e-05, "loss": 0.04154814, "step": 5574 }, { "epoch": 11.15, "grad_norm": 2.7240407466888428, "learning_rate": 2e-05, "loss": 0.07720132, "step": 5575 }, { "epoch": 11.152, "grad_norm": 1.6736434698104858, "learning_rate": 2e-05, "loss": 0.0617914, "step": 5576 }, { "epoch": 11.154, "grad_norm": 1.8674448728561401, "learning_rate": 2e-05, "loss": 0.05250496, "step": 5577 }, { "epoch": 11.156, "grad_norm": 2.2997653484344482, "learning_rate": 2e-05, "loss": 0.08813807, "step": 5578 }, { "epoch": 11.158, "grad_norm": 1.312296748161316, "learning_rate": 2e-05, "loss": 0.03612276, "step": 5579 }, { "epoch": 11.16, "grad_norm": 1.4704864025115967, "learning_rate": 2e-05, "loss": 0.05287594, "step": 5580 }, { "epoch": 11.162, "grad_norm": 0.9963863492012024, "learning_rate": 2e-05, "loss": 0.03753687, "step": 5581 }, { "epoch": 11.164, "grad_norm": 2.919243097305298, "learning_rate": 2e-05, "loss": 0.05586375, "step": 5582 }, { "epoch": 11.166, "grad_norm": 2.9407522678375244, "learning_rate": 2e-05, "loss": 0.04070038, "step": 5583 }, { "epoch": 11.168, "grad_norm": 1.1414661407470703, "learning_rate": 2e-05, "loss": 0.03115014, "step": 5584 }, { "epoch": 11.17, "grad_norm": 3.4734015464782715, "learning_rate": 2e-05, "loss": 0.05622058, "step": 5585 }, { "epoch": 11.172, "grad_norm": 1.3070592880249023, "learning_rate": 2e-05, "loss": 0.05956008, "step": 5586 }, { "epoch": 11.174, "grad_norm": 1.8945657014846802, "learning_rate": 2e-05, "loss": 0.05505826, "step": 5587 }, { "epoch": 11.176, "grad_norm": 2.134795665740967, "learning_rate": 2e-05, "loss": 0.05037409, "step": 5588 }, { "epoch": 11.178, "grad_norm": 1.618909239768982, "learning_rate": 2e-05, "loss": 0.04687338, "step": 5589 }, { "epoch": 11.18, "grad_norm": 1.6525713205337524, "learning_rate": 2e-05, "loss": 0.03109693, "step": 5590 }, { "epoch": 11.182, "grad_norm": 1.4128676652908325, "learning_rate": 2e-05, "loss": 0.04869646, "step": 5591 }, { "epoch": 11.184, "grad_norm": 1.8516077995300293, "learning_rate": 2e-05, "loss": 0.05194153, "step": 5592 }, { "epoch": 11.186, "grad_norm": 0.9598081111907959, "learning_rate": 2e-05, "loss": 0.03175935, "step": 5593 }, { "epoch": 11.188, "grad_norm": 2.366396903991699, "learning_rate": 2e-05, "loss": 0.04697275, "step": 5594 }, { "epoch": 11.19, "grad_norm": 1.4049228429794312, "learning_rate": 2e-05, "loss": 0.04712133, "step": 5595 }, { "epoch": 11.192, "grad_norm": 1.392557978630066, "learning_rate": 2e-05, "loss": 0.04036619, "step": 5596 }, { "epoch": 11.194, "grad_norm": 2.7289698123931885, "learning_rate": 2e-05, "loss": 0.05585587, "step": 5597 }, { "epoch": 11.196, "grad_norm": 1.3307260274887085, "learning_rate": 2e-05, "loss": 0.04757424, "step": 5598 }, { "epoch": 11.198, "grad_norm": 1.4445418119430542, "learning_rate": 2e-05, "loss": 0.04362855, "step": 5599 }, { "epoch": 11.2, "grad_norm": 1.3054895401000977, "learning_rate": 2e-05, "loss": 0.0476041, "step": 5600 }, { "epoch": 11.202, "grad_norm": 1.1574424505233765, "learning_rate": 2e-05, "loss": 0.04523313, "step": 5601 }, { "epoch": 11.204, "grad_norm": 1.6531107425689697, "learning_rate": 2e-05, "loss": 0.07256036, "step": 5602 }, { "epoch": 11.206, "grad_norm": 2.6283984184265137, "learning_rate": 2e-05, "loss": 0.05259442, "step": 5603 }, { "epoch": 11.208, "grad_norm": 1.1295219659805298, "learning_rate": 2e-05, "loss": 0.02722023, "step": 5604 }, { "epoch": 11.21, "grad_norm": 1.7580029964447021, "learning_rate": 2e-05, "loss": 0.0378816, "step": 5605 }, { "epoch": 11.212, "grad_norm": 1.1564671993255615, "learning_rate": 2e-05, "loss": 0.04453285, "step": 5606 }, { "epoch": 11.214, "grad_norm": 2.0429930686950684, "learning_rate": 2e-05, "loss": 0.06690899, "step": 5607 }, { "epoch": 11.216, "grad_norm": 3.2511401176452637, "learning_rate": 2e-05, "loss": 0.07473037, "step": 5608 }, { "epoch": 11.218, "grad_norm": 1.5251747369766235, "learning_rate": 2e-05, "loss": 0.06034661, "step": 5609 }, { "epoch": 11.22, "grad_norm": 1.3258527517318726, "learning_rate": 2e-05, "loss": 0.04761764, "step": 5610 }, { "epoch": 11.222, "grad_norm": 1.9736108779907227, "learning_rate": 2e-05, "loss": 0.04002544, "step": 5611 }, { "epoch": 11.224, "grad_norm": 1.7155126333236694, "learning_rate": 2e-05, "loss": 0.05041002, "step": 5612 }, { "epoch": 11.226, "grad_norm": 1.2253735065460205, "learning_rate": 2e-05, "loss": 0.04136164, "step": 5613 }, { "epoch": 11.228, "grad_norm": 1.5986520051956177, "learning_rate": 2e-05, "loss": 0.04651842, "step": 5614 }, { "epoch": 11.23, "grad_norm": 1.277672529220581, "learning_rate": 2e-05, "loss": 0.04242517, "step": 5615 }, { "epoch": 11.232, "grad_norm": 1.531924843788147, "learning_rate": 2e-05, "loss": 0.0391548, "step": 5616 }, { "epoch": 11.234, "grad_norm": 1.1350847482681274, "learning_rate": 2e-05, "loss": 0.04663714, "step": 5617 }, { "epoch": 11.236, "grad_norm": 1.4929993152618408, "learning_rate": 2e-05, "loss": 0.04786985, "step": 5618 }, { "epoch": 11.238, "grad_norm": 1.8032375574111938, "learning_rate": 2e-05, "loss": 0.0391562, "step": 5619 }, { "epoch": 11.24, "grad_norm": 1.310897946357727, "learning_rate": 2e-05, "loss": 0.0419879, "step": 5620 }, { "epoch": 11.242, "grad_norm": 1.8344627618789673, "learning_rate": 2e-05, "loss": 0.05171813, "step": 5621 }, { "epoch": 11.244, "grad_norm": 1.4439165592193604, "learning_rate": 2e-05, "loss": 0.04475713, "step": 5622 }, { "epoch": 11.246, "grad_norm": 1.3868870735168457, "learning_rate": 2e-05, "loss": 0.0524424, "step": 5623 }, { "epoch": 11.248, "grad_norm": 1.4693684577941895, "learning_rate": 2e-05, "loss": 0.03726972, "step": 5624 }, { "epoch": 11.25, "grad_norm": 1.3319610357284546, "learning_rate": 2e-05, "loss": 0.0376013, "step": 5625 }, { "epoch": 11.252, "grad_norm": 1.1958826780319214, "learning_rate": 2e-05, "loss": 0.03406876, "step": 5626 }, { "epoch": 11.254, "grad_norm": 1.5152924060821533, "learning_rate": 2e-05, "loss": 0.05023153, "step": 5627 }, { "epoch": 11.256, "grad_norm": 1.8399534225463867, "learning_rate": 2e-05, "loss": 0.05513574, "step": 5628 }, { "epoch": 11.258, "grad_norm": 1.1258716583251953, "learning_rate": 2e-05, "loss": 0.02846676, "step": 5629 }, { "epoch": 11.26, "grad_norm": 1.4945526123046875, "learning_rate": 2e-05, "loss": 0.04922044, "step": 5630 }, { "epoch": 11.262, "grad_norm": 1.420878291130066, "learning_rate": 2e-05, "loss": 0.05598453, "step": 5631 }, { "epoch": 11.264, "grad_norm": 1.74343740940094, "learning_rate": 2e-05, "loss": 0.05555171, "step": 5632 }, { "epoch": 11.266, "grad_norm": 1.4211103916168213, "learning_rate": 2e-05, "loss": 0.04510403, "step": 5633 }, { "epoch": 11.268, "grad_norm": 1.2378079891204834, "learning_rate": 2e-05, "loss": 0.04460875, "step": 5634 }, { "epoch": 11.27, "grad_norm": 2.1717655658721924, "learning_rate": 2e-05, "loss": 0.04952989, "step": 5635 }, { "epoch": 11.272, "grad_norm": 1.3453963994979858, "learning_rate": 2e-05, "loss": 0.04727048, "step": 5636 }, { "epoch": 11.274000000000001, "grad_norm": 1.2908873558044434, "learning_rate": 2e-05, "loss": 0.04669511, "step": 5637 }, { "epoch": 11.276, "grad_norm": 2.5825376510620117, "learning_rate": 2e-05, "loss": 0.07910918, "step": 5638 }, { "epoch": 11.278, "grad_norm": 1.3286540508270264, "learning_rate": 2e-05, "loss": 0.05208978, "step": 5639 }, { "epoch": 11.28, "grad_norm": 2.3290231227874756, "learning_rate": 2e-05, "loss": 0.0465233, "step": 5640 }, { "epoch": 11.282, "grad_norm": 3.7450168132781982, "learning_rate": 2e-05, "loss": 0.04494961, "step": 5641 }, { "epoch": 11.284, "grad_norm": 2.037266492843628, "learning_rate": 2e-05, "loss": 0.05497518, "step": 5642 }, { "epoch": 11.286, "grad_norm": 1.5916911363601685, "learning_rate": 2e-05, "loss": 0.04094953, "step": 5643 }, { "epoch": 11.288, "grad_norm": 1.6948169469833374, "learning_rate": 2e-05, "loss": 0.0476352, "step": 5644 }, { "epoch": 11.29, "grad_norm": 2.1372830867767334, "learning_rate": 2e-05, "loss": 0.05398251, "step": 5645 }, { "epoch": 11.292, "grad_norm": 1.3128230571746826, "learning_rate": 2e-05, "loss": 0.04471845, "step": 5646 }, { "epoch": 11.294, "grad_norm": 1.647568941116333, "learning_rate": 2e-05, "loss": 0.04331946, "step": 5647 }, { "epoch": 11.296, "grad_norm": 1.923680067062378, "learning_rate": 2e-05, "loss": 0.05029312, "step": 5648 }, { "epoch": 11.298, "grad_norm": 1.4542899131774902, "learning_rate": 2e-05, "loss": 0.05089693, "step": 5649 }, { "epoch": 11.3, "grad_norm": 1.5298106670379639, "learning_rate": 2e-05, "loss": 0.05881848, "step": 5650 }, { "epoch": 11.302, "grad_norm": 1.7057805061340332, "learning_rate": 2e-05, "loss": 0.03789561, "step": 5651 }, { "epoch": 11.304, "grad_norm": 1.3690357208251953, "learning_rate": 2e-05, "loss": 0.04559196, "step": 5652 }, { "epoch": 11.306, "grad_norm": 1.7711060047149658, "learning_rate": 2e-05, "loss": 0.04960579, "step": 5653 }, { "epoch": 11.308, "grad_norm": 2.154360294342041, "learning_rate": 2e-05, "loss": 0.04761767, "step": 5654 }, { "epoch": 11.31, "grad_norm": 1.1826550960540771, "learning_rate": 2e-05, "loss": 0.04088816, "step": 5655 }, { "epoch": 11.312, "grad_norm": 2.5155763626098633, "learning_rate": 2e-05, "loss": 0.04958711, "step": 5656 }, { "epoch": 11.314, "grad_norm": 1.2392568588256836, "learning_rate": 2e-05, "loss": 0.05496053, "step": 5657 }, { "epoch": 11.316, "grad_norm": 1.723213791847229, "learning_rate": 2e-05, "loss": 0.04695657, "step": 5658 }, { "epoch": 11.318, "grad_norm": 1.6298481225967407, "learning_rate": 2e-05, "loss": 0.03813414, "step": 5659 }, { "epoch": 11.32, "grad_norm": 1.4247621297836304, "learning_rate": 2e-05, "loss": 0.04710161, "step": 5660 }, { "epoch": 11.322, "grad_norm": 1.5706455707550049, "learning_rate": 2e-05, "loss": 0.0463535, "step": 5661 }, { "epoch": 11.324, "grad_norm": 2.462221622467041, "learning_rate": 2e-05, "loss": 0.05929623, "step": 5662 }, { "epoch": 11.326, "grad_norm": 1.4576634168624878, "learning_rate": 2e-05, "loss": 0.05897439, "step": 5663 }, { "epoch": 11.328, "grad_norm": 1.6032121181488037, "learning_rate": 2e-05, "loss": 0.04866658, "step": 5664 }, { "epoch": 11.33, "grad_norm": 1.4402192831039429, "learning_rate": 2e-05, "loss": 0.03332867, "step": 5665 }, { "epoch": 11.332, "grad_norm": 2.8734233379364014, "learning_rate": 2e-05, "loss": 0.06615589, "step": 5666 }, { "epoch": 11.334, "grad_norm": 1.652347445487976, "learning_rate": 2e-05, "loss": 0.07197806, "step": 5667 }, { "epoch": 11.336, "grad_norm": 1.8642592430114746, "learning_rate": 2e-05, "loss": 0.0569929, "step": 5668 }, { "epoch": 11.338, "grad_norm": 1.2723792791366577, "learning_rate": 2e-05, "loss": 0.04522455, "step": 5669 }, { "epoch": 11.34, "grad_norm": 1.5746574401855469, "learning_rate": 2e-05, "loss": 0.05542893, "step": 5670 }, { "epoch": 11.342, "grad_norm": 1.4517905712127686, "learning_rate": 2e-05, "loss": 0.04733806, "step": 5671 }, { "epoch": 11.344, "grad_norm": 1.3056635856628418, "learning_rate": 2e-05, "loss": 0.04615809, "step": 5672 }, { "epoch": 11.346, "grad_norm": 2.0126426219940186, "learning_rate": 2e-05, "loss": 0.04903982, "step": 5673 }, { "epoch": 11.348, "grad_norm": 2.094006299972534, "learning_rate": 2e-05, "loss": 0.0404023, "step": 5674 }, { "epoch": 11.35, "grad_norm": 2.433847188949585, "learning_rate": 2e-05, "loss": 0.03881406, "step": 5675 }, { "epoch": 11.352, "grad_norm": 1.2057197093963623, "learning_rate": 2e-05, "loss": 0.0405013, "step": 5676 }, { "epoch": 11.354, "grad_norm": 1.8133881092071533, "learning_rate": 2e-05, "loss": 0.04236561, "step": 5677 }, { "epoch": 11.356, "grad_norm": 2.568739891052246, "learning_rate": 2e-05, "loss": 0.05626953, "step": 5678 }, { "epoch": 11.358, "grad_norm": 1.2331525087356567, "learning_rate": 2e-05, "loss": 0.05501791, "step": 5679 }, { "epoch": 11.36, "grad_norm": 1.5900580883026123, "learning_rate": 2e-05, "loss": 0.03576569, "step": 5680 }, { "epoch": 11.362, "grad_norm": 1.9113343954086304, "learning_rate": 2e-05, "loss": 0.06783809, "step": 5681 }, { "epoch": 11.364, "grad_norm": 2.187023639678955, "learning_rate": 2e-05, "loss": 0.04357416, "step": 5682 }, { "epoch": 11.366, "grad_norm": 1.2699072360992432, "learning_rate": 2e-05, "loss": 0.03943901, "step": 5683 }, { "epoch": 11.368, "grad_norm": 1.9693132638931274, "learning_rate": 2e-05, "loss": 0.05115809, "step": 5684 }, { "epoch": 11.37, "grad_norm": 1.3677825927734375, "learning_rate": 2e-05, "loss": 0.04716478, "step": 5685 }, { "epoch": 11.372, "grad_norm": 2.4023549556732178, "learning_rate": 2e-05, "loss": 0.04504751, "step": 5686 }, { "epoch": 11.374, "grad_norm": 2.511744737625122, "learning_rate": 2e-05, "loss": 0.04527961, "step": 5687 }, { "epoch": 11.376, "grad_norm": 3.562417984008789, "learning_rate": 2e-05, "loss": 0.05556261, "step": 5688 }, { "epoch": 11.378, "grad_norm": 1.733673095703125, "learning_rate": 2e-05, "loss": 0.0592088, "step": 5689 }, { "epoch": 11.38, "grad_norm": 1.9149365425109863, "learning_rate": 2e-05, "loss": 0.04439528, "step": 5690 }, { "epoch": 11.382, "grad_norm": 2.3991384506225586, "learning_rate": 2e-05, "loss": 0.06331401, "step": 5691 }, { "epoch": 11.384, "grad_norm": 1.122333288192749, "learning_rate": 2e-05, "loss": 0.04532647, "step": 5692 }, { "epoch": 11.386, "grad_norm": 1.6301887035369873, "learning_rate": 2e-05, "loss": 0.05703401, "step": 5693 }, { "epoch": 11.388, "grad_norm": 1.636753797531128, "learning_rate": 2e-05, "loss": 0.05419588, "step": 5694 }, { "epoch": 11.39, "grad_norm": 1.2946252822875977, "learning_rate": 2e-05, "loss": 0.05132918, "step": 5695 }, { "epoch": 11.392, "grad_norm": 2.3236074447631836, "learning_rate": 2e-05, "loss": 0.05070593, "step": 5696 }, { "epoch": 11.394, "grad_norm": 1.109092116355896, "learning_rate": 2e-05, "loss": 0.0335158, "step": 5697 }, { "epoch": 11.396, "grad_norm": 1.7937777042388916, "learning_rate": 2e-05, "loss": 0.06048029, "step": 5698 }, { "epoch": 11.398, "grad_norm": 1.0988600254058838, "learning_rate": 2e-05, "loss": 0.04137257, "step": 5699 }, { "epoch": 11.4, "grad_norm": 1.4302674531936646, "learning_rate": 2e-05, "loss": 0.04906035, "step": 5700 }, { "epoch": 11.402, "grad_norm": 1.5210566520690918, "learning_rate": 2e-05, "loss": 0.03527754, "step": 5701 }, { "epoch": 11.404, "grad_norm": 2.1926465034484863, "learning_rate": 2e-05, "loss": 0.04686191, "step": 5702 }, { "epoch": 11.406, "grad_norm": 1.6266396045684814, "learning_rate": 2e-05, "loss": 0.05651092, "step": 5703 }, { "epoch": 11.408, "grad_norm": 2.796839952468872, "learning_rate": 2e-05, "loss": 0.07000058, "step": 5704 }, { "epoch": 11.41, "grad_norm": 1.4611526727676392, "learning_rate": 2e-05, "loss": 0.05321472, "step": 5705 }, { "epoch": 11.412, "grad_norm": 1.8903465270996094, "learning_rate": 2e-05, "loss": 0.05226947, "step": 5706 }, { "epoch": 11.414, "grad_norm": 1.4791998863220215, "learning_rate": 2e-05, "loss": 0.05257621, "step": 5707 }, { "epoch": 11.416, "grad_norm": 1.5878609418869019, "learning_rate": 2e-05, "loss": 0.06273852, "step": 5708 }, { "epoch": 11.418, "grad_norm": 2.1732583045959473, "learning_rate": 2e-05, "loss": 0.05149956, "step": 5709 }, { "epoch": 11.42, "grad_norm": 1.677105188369751, "learning_rate": 2e-05, "loss": 0.0615867, "step": 5710 }, { "epoch": 11.422, "grad_norm": 1.29386305809021, "learning_rate": 2e-05, "loss": 0.04953182, "step": 5711 }, { "epoch": 11.424, "grad_norm": 1.7477467060089111, "learning_rate": 2e-05, "loss": 0.06783712, "step": 5712 }, { "epoch": 11.426, "grad_norm": 3.544480085372925, "learning_rate": 2e-05, "loss": 0.04404292, "step": 5713 }, { "epoch": 11.428, "grad_norm": 2.753350019454956, "learning_rate": 2e-05, "loss": 0.07701689, "step": 5714 }, { "epoch": 11.43, "grad_norm": 1.8815979957580566, "learning_rate": 2e-05, "loss": 0.05695759, "step": 5715 }, { "epoch": 11.432, "grad_norm": 1.5264737606048584, "learning_rate": 2e-05, "loss": 0.05297385, "step": 5716 }, { "epoch": 11.434, "grad_norm": 1.047830581665039, "learning_rate": 2e-05, "loss": 0.03878511, "step": 5717 }, { "epoch": 11.436, "grad_norm": 1.0375819206237793, "learning_rate": 2e-05, "loss": 0.03545886, "step": 5718 }, { "epoch": 11.438, "grad_norm": 1.4977059364318848, "learning_rate": 2e-05, "loss": 0.0493038, "step": 5719 }, { "epoch": 11.44, "grad_norm": 1.3288768529891968, "learning_rate": 2e-05, "loss": 0.0505427, "step": 5720 }, { "epoch": 11.442, "grad_norm": 1.954521894454956, "learning_rate": 2e-05, "loss": 0.06295772, "step": 5721 }, { "epoch": 11.444, "grad_norm": 1.0402206182479858, "learning_rate": 2e-05, "loss": 0.03475888, "step": 5722 }, { "epoch": 11.446, "grad_norm": 2.404123544692993, "learning_rate": 2e-05, "loss": 0.0562352, "step": 5723 }, { "epoch": 11.448, "grad_norm": 1.7195630073547363, "learning_rate": 2e-05, "loss": 0.05285126, "step": 5724 }, { "epoch": 11.45, "grad_norm": 1.2412029504776, "learning_rate": 2e-05, "loss": 0.05088343, "step": 5725 }, { "epoch": 11.452, "grad_norm": 2.3141374588012695, "learning_rate": 2e-05, "loss": 0.0701945, "step": 5726 }, { "epoch": 11.454, "grad_norm": 1.1392027139663696, "learning_rate": 2e-05, "loss": 0.03937323, "step": 5727 }, { "epoch": 11.456, "grad_norm": 1.5960547924041748, "learning_rate": 2e-05, "loss": 0.04383022, "step": 5728 }, { "epoch": 11.458, "grad_norm": 1.1272916793823242, "learning_rate": 2e-05, "loss": 0.04322441, "step": 5729 }, { "epoch": 11.46, "grad_norm": 1.3566938638687134, "learning_rate": 2e-05, "loss": 0.04182547, "step": 5730 }, { "epoch": 11.462, "grad_norm": 1.7132365703582764, "learning_rate": 2e-05, "loss": 0.04979304, "step": 5731 }, { "epoch": 11.464, "grad_norm": 1.125922679901123, "learning_rate": 2e-05, "loss": 0.04207063, "step": 5732 }, { "epoch": 11.466, "grad_norm": 1.6913458108901978, "learning_rate": 2e-05, "loss": 0.04294857, "step": 5733 }, { "epoch": 11.468, "grad_norm": 1.0827990770339966, "learning_rate": 2e-05, "loss": 0.05028411, "step": 5734 }, { "epoch": 11.47, "grad_norm": 1.2500609159469604, "learning_rate": 2e-05, "loss": 0.04735212, "step": 5735 }, { "epoch": 11.472, "grad_norm": 1.5820868015289307, "learning_rate": 2e-05, "loss": 0.04795214, "step": 5736 }, { "epoch": 11.474, "grad_norm": 4.20005464553833, "learning_rate": 2e-05, "loss": 0.05945217, "step": 5737 }, { "epoch": 11.475999999999999, "grad_norm": 1.4440017938613892, "learning_rate": 2e-05, "loss": 0.0505971, "step": 5738 }, { "epoch": 11.478, "grad_norm": 1.6714645624160767, "learning_rate": 2e-05, "loss": 0.04327673, "step": 5739 }, { "epoch": 11.48, "grad_norm": 1.5207058191299438, "learning_rate": 2e-05, "loss": 0.03629073, "step": 5740 }, { "epoch": 11.482, "grad_norm": 2.2845335006713867, "learning_rate": 2e-05, "loss": 0.04524094, "step": 5741 }, { "epoch": 11.484, "grad_norm": 1.6629916429519653, "learning_rate": 2e-05, "loss": 0.03859144, "step": 5742 }, { "epoch": 11.486, "grad_norm": 1.1069337129592896, "learning_rate": 2e-05, "loss": 0.03897121, "step": 5743 }, { "epoch": 11.488, "grad_norm": 1.555224895477295, "learning_rate": 2e-05, "loss": 0.03899939, "step": 5744 }, { "epoch": 11.49, "grad_norm": 2.02616548538208, "learning_rate": 2e-05, "loss": 0.05024801, "step": 5745 }, { "epoch": 11.492, "grad_norm": 1.102475881576538, "learning_rate": 2e-05, "loss": 0.03961539, "step": 5746 }, { "epoch": 11.494, "grad_norm": 1.6318930387496948, "learning_rate": 2e-05, "loss": 0.04914928, "step": 5747 }, { "epoch": 11.496, "grad_norm": 1.7205697298049927, "learning_rate": 2e-05, "loss": 0.04520516, "step": 5748 }, { "epoch": 11.498, "grad_norm": 2.896815538406372, "learning_rate": 2e-05, "loss": 0.06124, "step": 5749 }, { "epoch": 11.5, "grad_norm": 1.4367793798446655, "learning_rate": 2e-05, "loss": 0.03653619, "step": 5750 }, { "epoch": 11.502, "grad_norm": 1.8099347352981567, "learning_rate": 2e-05, "loss": 0.04635201, "step": 5751 }, { "epoch": 11.504, "grad_norm": 1.852498173713684, "learning_rate": 2e-05, "loss": 0.05065921, "step": 5752 }, { "epoch": 11.506, "grad_norm": 1.1768702268600464, "learning_rate": 2e-05, "loss": 0.03448585, "step": 5753 }, { "epoch": 11.508, "grad_norm": 1.5099678039550781, "learning_rate": 2e-05, "loss": 0.03958462, "step": 5754 }, { "epoch": 11.51, "grad_norm": 3.429736375808716, "learning_rate": 2e-05, "loss": 0.04971122, "step": 5755 }, { "epoch": 11.512, "grad_norm": 1.356461524963379, "learning_rate": 2e-05, "loss": 0.04755508, "step": 5756 }, { "epoch": 11.514, "grad_norm": 2.2165658473968506, "learning_rate": 2e-05, "loss": 0.0588036, "step": 5757 }, { "epoch": 11.516, "grad_norm": 1.2915750741958618, "learning_rate": 2e-05, "loss": 0.04737089, "step": 5758 }, { "epoch": 11.518, "grad_norm": 1.303098440170288, "learning_rate": 2e-05, "loss": 0.04432829, "step": 5759 }, { "epoch": 11.52, "grad_norm": 1.4214109182357788, "learning_rate": 2e-05, "loss": 0.04704283, "step": 5760 }, { "epoch": 11.522, "grad_norm": 1.403090476989746, "learning_rate": 2e-05, "loss": 0.03694802, "step": 5761 }, { "epoch": 11.524000000000001, "grad_norm": 4.832217216491699, "learning_rate": 2e-05, "loss": 0.05118045, "step": 5762 }, { "epoch": 11.526, "grad_norm": 1.1842046976089478, "learning_rate": 2e-05, "loss": 0.0342038, "step": 5763 }, { "epoch": 11.528, "grad_norm": 1.0605454444885254, "learning_rate": 2e-05, "loss": 0.03051936, "step": 5764 }, { "epoch": 11.53, "grad_norm": 1.4966093301773071, "learning_rate": 2e-05, "loss": 0.05437325, "step": 5765 }, { "epoch": 11.532, "grad_norm": 1.8924329280853271, "learning_rate": 2e-05, "loss": 0.0331119, "step": 5766 }, { "epoch": 11.534, "grad_norm": 1.384433388710022, "learning_rate": 2e-05, "loss": 0.05945661, "step": 5767 }, { "epoch": 11.536, "grad_norm": 1.6517621278762817, "learning_rate": 2e-05, "loss": 0.04912163, "step": 5768 }, { "epoch": 11.538, "grad_norm": 1.414260983467102, "learning_rate": 2e-05, "loss": 0.05474976, "step": 5769 }, { "epoch": 11.54, "grad_norm": 1.017541527748108, "learning_rate": 2e-05, "loss": 0.0367628, "step": 5770 }, { "epoch": 11.542, "grad_norm": 1.0602508783340454, "learning_rate": 2e-05, "loss": 0.03636168, "step": 5771 }, { "epoch": 11.544, "grad_norm": 1.6005361080169678, "learning_rate": 2e-05, "loss": 0.06516649, "step": 5772 }, { "epoch": 11.546, "grad_norm": 1.2333847284317017, "learning_rate": 2e-05, "loss": 0.0381854, "step": 5773 }, { "epoch": 11.548, "grad_norm": 1.438815951347351, "learning_rate": 2e-05, "loss": 0.04053817, "step": 5774 }, { "epoch": 11.55, "grad_norm": 1.6449198722839355, "learning_rate": 2e-05, "loss": 0.04813613, "step": 5775 }, { "epoch": 11.552, "grad_norm": 2.104741096496582, "learning_rate": 2e-05, "loss": 0.04231735, "step": 5776 }, { "epoch": 11.554, "grad_norm": 1.2205615043640137, "learning_rate": 2e-05, "loss": 0.05345644, "step": 5777 }, { "epoch": 11.556000000000001, "grad_norm": 1.7869209051132202, "learning_rate": 2e-05, "loss": 0.04566547, "step": 5778 }, { "epoch": 11.558, "grad_norm": 2.4200775623321533, "learning_rate": 2e-05, "loss": 0.0442078, "step": 5779 }, { "epoch": 11.56, "grad_norm": 1.3662333488464355, "learning_rate": 2e-05, "loss": 0.0457233, "step": 5780 }, { "epoch": 11.562, "grad_norm": 1.3299016952514648, "learning_rate": 2e-05, "loss": 0.04756445, "step": 5781 }, { "epoch": 11.564, "grad_norm": 1.2130399942398071, "learning_rate": 2e-05, "loss": 0.0316097, "step": 5782 }, { "epoch": 11.566, "grad_norm": 1.805086612701416, "learning_rate": 2e-05, "loss": 0.060819, "step": 5783 }, { "epoch": 11.568, "grad_norm": 1.9846824407577515, "learning_rate": 2e-05, "loss": 0.04482648, "step": 5784 }, { "epoch": 11.57, "grad_norm": 2.4053966999053955, "learning_rate": 2e-05, "loss": 0.04749952, "step": 5785 }, { "epoch": 11.572, "grad_norm": 1.3613585233688354, "learning_rate": 2e-05, "loss": 0.04724826, "step": 5786 }, { "epoch": 11.574, "grad_norm": 2.5251240730285645, "learning_rate": 2e-05, "loss": 0.03613253, "step": 5787 }, { "epoch": 11.576, "grad_norm": 2.14925217628479, "learning_rate": 2e-05, "loss": 0.04898756, "step": 5788 }, { "epoch": 11.578, "grad_norm": 1.379772424697876, "learning_rate": 2e-05, "loss": 0.05590723, "step": 5789 }, { "epoch": 11.58, "grad_norm": 2.1570491790771484, "learning_rate": 2e-05, "loss": 0.06367472, "step": 5790 }, { "epoch": 11.582, "grad_norm": 1.7565891742706299, "learning_rate": 2e-05, "loss": 0.05175413, "step": 5791 }, { "epoch": 11.584, "grad_norm": 1.842274785041809, "learning_rate": 2e-05, "loss": 0.05350547, "step": 5792 }, { "epoch": 11.586, "grad_norm": 2.831634283065796, "learning_rate": 2e-05, "loss": 0.06426252, "step": 5793 }, { "epoch": 11.588, "grad_norm": 1.5474140644073486, "learning_rate": 2e-05, "loss": 0.04553394, "step": 5794 }, { "epoch": 11.59, "grad_norm": 2.2714931964874268, "learning_rate": 2e-05, "loss": 0.02121895, "step": 5795 }, { "epoch": 11.592, "grad_norm": 1.8448201417922974, "learning_rate": 2e-05, "loss": 0.0600131, "step": 5796 }, { "epoch": 11.594, "grad_norm": 1.1388860940933228, "learning_rate": 2e-05, "loss": 0.03663793, "step": 5797 }, { "epoch": 11.596, "grad_norm": 1.9692730903625488, "learning_rate": 2e-05, "loss": 0.04725102, "step": 5798 }, { "epoch": 11.598, "grad_norm": 1.9636507034301758, "learning_rate": 2e-05, "loss": 0.04308451, "step": 5799 }, { "epoch": 11.6, "grad_norm": 1.0663782358169556, "learning_rate": 2e-05, "loss": 0.0288805, "step": 5800 }, { "epoch": 11.602, "grad_norm": 1.4374663829803467, "learning_rate": 2e-05, "loss": 0.04004197, "step": 5801 }, { "epoch": 11.604, "grad_norm": 1.7417012453079224, "learning_rate": 2e-05, "loss": 0.05123825, "step": 5802 }, { "epoch": 11.606, "grad_norm": 2.1125712394714355, "learning_rate": 2e-05, "loss": 0.0459488, "step": 5803 }, { "epoch": 11.608, "grad_norm": 1.3598778247833252, "learning_rate": 2e-05, "loss": 0.03898705, "step": 5804 }, { "epoch": 11.61, "grad_norm": 1.2977287769317627, "learning_rate": 2e-05, "loss": 0.05425401, "step": 5805 }, { "epoch": 11.612, "grad_norm": 1.863133430480957, "learning_rate": 2e-05, "loss": 0.05389511, "step": 5806 }, { "epoch": 11.614, "grad_norm": 2.131502389907837, "learning_rate": 2e-05, "loss": 0.05331743, "step": 5807 }, { "epoch": 11.616, "grad_norm": 2.4704604148864746, "learning_rate": 2e-05, "loss": 0.05750574, "step": 5808 }, { "epoch": 11.618, "grad_norm": 1.9358444213867188, "learning_rate": 2e-05, "loss": 0.04222788, "step": 5809 }, { "epoch": 11.62, "grad_norm": 1.9284040927886963, "learning_rate": 2e-05, "loss": 0.04325092, "step": 5810 }, { "epoch": 11.622, "grad_norm": 2.1048789024353027, "learning_rate": 2e-05, "loss": 0.05008987, "step": 5811 }, { "epoch": 11.624, "grad_norm": 2.60477614402771, "learning_rate": 2e-05, "loss": 0.04310272, "step": 5812 }, { "epoch": 11.626, "grad_norm": 1.3271856307983398, "learning_rate": 2e-05, "loss": 0.03580116, "step": 5813 }, { "epoch": 11.628, "grad_norm": 1.6767809391021729, "learning_rate": 2e-05, "loss": 0.03502236, "step": 5814 }, { "epoch": 11.63, "grad_norm": 1.5544588565826416, "learning_rate": 2e-05, "loss": 0.05321189, "step": 5815 }, { "epoch": 11.632, "grad_norm": 1.3371565341949463, "learning_rate": 2e-05, "loss": 0.04384248, "step": 5816 }, { "epoch": 11.634, "grad_norm": 1.6395615339279175, "learning_rate": 2e-05, "loss": 0.04614739, "step": 5817 }, { "epoch": 11.636, "grad_norm": 2.506648302078247, "learning_rate": 2e-05, "loss": 0.06705838, "step": 5818 }, { "epoch": 11.638, "grad_norm": 1.640137791633606, "learning_rate": 2e-05, "loss": 0.05612247, "step": 5819 }, { "epoch": 11.64, "grad_norm": 2.4301540851593018, "learning_rate": 2e-05, "loss": 0.05863875, "step": 5820 }, { "epoch": 11.642, "grad_norm": 1.5058404207229614, "learning_rate": 2e-05, "loss": 0.04215011, "step": 5821 }, { "epoch": 11.644, "grad_norm": 1.2542659044265747, "learning_rate": 2e-05, "loss": 0.04177867, "step": 5822 }, { "epoch": 11.646, "grad_norm": 1.107443928718567, "learning_rate": 2e-05, "loss": 0.04239887, "step": 5823 }, { "epoch": 11.648, "grad_norm": 1.2233611345291138, "learning_rate": 2e-05, "loss": 0.04678968, "step": 5824 }, { "epoch": 11.65, "grad_norm": 1.8364397287368774, "learning_rate": 2e-05, "loss": 0.04483078, "step": 5825 }, { "epoch": 11.652, "grad_norm": 1.9486418962478638, "learning_rate": 2e-05, "loss": 0.04975449, "step": 5826 }, { "epoch": 11.654, "grad_norm": 1.555195689201355, "learning_rate": 2e-05, "loss": 0.04872251, "step": 5827 }, { "epoch": 11.656, "grad_norm": 1.7315301895141602, "learning_rate": 2e-05, "loss": 0.04223534, "step": 5828 }, { "epoch": 11.658, "grad_norm": 1.342836856842041, "learning_rate": 2e-05, "loss": 0.06062792, "step": 5829 }, { "epoch": 11.66, "grad_norm": 1.3641505241394043, "learning_rate": 2e-05, "loss": 0.05880754, "step": 5830 }, { "epoch": 11.662, "grad_norm": 1.6247472763061523, "learning_rate": 2e-05, "loss": 0.05718165, "step": 5831 }, { "epoch": 11.664, "grad_norm": 1.6310220956802368, "learning_rate": 2e-05, "loss": 0.0669639, "step": 5832 }, { "epoch": 11.666, "grad_norm": 1.7226531505584717, "learning_rate": 2e-05, "loss": 0.04655602, "step": 5833 }, { "epoch": 11.668, "grad_norm": 1.3371641635894775, "learning_rate": 2e-05, "loss": 0.05352715, "step": 5834 }, { "epoch": 11.67, "grad_norm": 1.9057135581970215, "learning_rate": 2e-05, "loss": 0.06148591, "step": 5835 }, { "epoch": 11.672, "grad_norm": 2.0866620540618896, "learning_rate": 2e-05, "loss": 0.04995357, "step": 5836 }, { "epoch": 11.674, "grad_norm": 2.5532031059265137, "learning_rate": 2e-05, "loss": 0.07622005, "step": 5837 }, { "epoch": 11.676, "grad_norm": 2.778372287750244, "learning_rate": 2e-05, "loss": 0.06307095, "step": 5838 }, { "epoch": 11.678, "grad_norm": 1.43252432346344, "learning_rate": 2e-05, "loss": 0.04999834, "step": 5839 }, { "epoch": 11.68, "grad_norm": 1.3844120502471924, "learning_rate": 2e-05, "loss": 0.05112729, "step": 5840 }, { "epoch": 11.682, "grad_norm": 1.2430758476257324, "learning_rate": 2e-05, "loss": 0.04392055, "step": 5841 }, { "epoch": 11.684, "grad_norm": 1.273295521736145, "learning_rate": 2e-05, "loss": 0.04273405, "step": 5842 }, { "epoch": 11.686, "grad_norm": 1.3100574016571045, "learning_rate": 2e-05, "loss": 0.05339495, "step": 5843 }, { "epoch": 11.688, "grad_norm": 1.267080545425415, "learning_rate": 2e-05, "loss": 0.0397922, "step": 5844 }, { "epoch": 11.69, "grad_norm": 1.1280689239501953, "learning_rate": 2e-05, "loss": 0.04549348, "step": 5845 }, { "epoch": 11.692, "grad_norm": 1.0839488506317139, "learning_rate": 2e-05, "loss": 0.03936727, "step": 5846 }, { "epoch": 11.693999999999999, "grad_norm": 1.552826166152954, "learning_rate": 2e-05, "loss": 0.04919448, "step": 5847 }, { "epoch": 11.696, "grad_norm": 1.6215623617172241, "learning_rate": 2e-05, "loss": 0.03126223, "step": 5848 }, { "epoch": 11.698, "grad_norm": 1.1549961566925049, "learning_rate": 2e-05, "loss": 0.03589753, "step": 5849 }, { "epoch": 11.7, "grad_norm": 1.782952070236206, "learning_rate": 2e-05, "loss": 0.0509676, "step": 5850 }, { "epoch": 11.702, "grad_norm": 1.7008768320083618, "learning_rate": 2e-05, "loss": 0.07416837, "step": 5851 }, { "epoch": 11.704, "grad_norm": 1.7450941801071167, "learning_rate": 2e-05, "loss": 0.04925626, "step": 5852 }, { "epoch": 11.706, "grad_norm": 2.7231221199035645, "learning_rate": 2e-05, "loss": 0.06783025, "step": 5853 }, { "epoch": 11.708, "grad_norm": 1.7462410926818848, "learning_rate": 2e-05, "loss": 0.05293039, "step": 5854 }, { "epoch": 11.71, "grad_norm": 1.803440809249878, "learning_rate": 2e-05, "loss": 0.05988405, "step": 5855 }, { "epoch": 11.712, "grad_norm": 1.4763165712356567, "learning_rate": 2e-05, "loss": 0.04566108, "step": 5856 }, { "epoch": 11.714, "grad_norm": 1.44134521484375, "learning_rate": 2e-05, "loss": 0.04696288, "step": 5857 }, { "epoch": 11.716, "grad_norm": 1.0796514749526978, "learning_rate": 2e-05, "loss": 0.03282233, "step": 5858 }, { "epoch": 11.718, "grad_norm": 1.3339581489562988, "learning_rate": 2e-05, "loss": 0.04751251, "step": 5859 }, { "epoch": 11.72, "grad_norm": 1.997147560119629, "learning_rate": 2e-05, "loss": 0.04504245, "step": 5860 }, { "epoch": 11.722, "grad_norm": 1.335020899772644, "learning_rate": 2e-05, "loss": 0.04243895, "step": 5861 }, { "epoch": 11.724, "grad_norm": 2.0907578468322754, "learning_rate": 2e-05, "loss": 0.04816663, "step": 5862 }, { "epoch": 11.725999999999999, "grad_norm": 1.7296607494354248, "learning_rate": 2e-05, "loss": 0.04865206, "step": 5863 }, { "epoch": 11.728, "grad_norm": 1.610846996307373, "learning_rate": 2e-05, "loss": 0.0545282, "step": 5864 }, { "epoch": 11.73, "grad_norm": 1.4066860675811768, "learning_rate": 2e-05, "loss": 0.047715, "step": 5865 }, { "epoch": 11.732, "grad_norm": 1.3445024490356445, "learning_rate": 2e-05, "loss": 0.04617265, "step": 5866 }, { "epoch": 11.734, "grad_norm": 2.7535033226013184, "learning_rate": 2e-05, "loss": 0.04723052, "step": 5867 }, { "epoch": 11.736, "grad_norm": 1.5304971933364868, "learning_rate": 2e-05, "loss": 0.03638636, "step": 5868 }, { "epoch": 11.738, "grad_norm": 1.0276997089385986, "learning_rate": 2e-05, "loss": 0.03411288, "step": 5869 }, { "epoch": 11.74, "grad_norm": 1.4574671983718872, "learning_rate": 2e-05, "loss": 0.05382932, "step": 5870 }, { "epoch": 11.742, "grad_norm": 3.5009210109710693, "learning_rate": 2e-05, "loss": 0.06978729, "step": 5871 }, { "epoch": 11.744, "grad_norm": 2.0347979068756104, "learning_rate": 2e-05, "loss": 0.0829041, "step": 5872 }, { "epoch": 11.746, "grad_norm": 2.443451404571533, "learning_rate": 2e-05, "loss": 0.06375959, "step": 5873 }, { "epoch": 11.748, "grad_norm": 2.023491859436035, "learning_rate": 2e-05, "loss": 0.06856895, "step": 5874 }, { "epoch": 11.75, "grad_norm": 1.3537729978561401, "learning_rate": 2e-05, "loss": 0.04351505, "step": 5875 }, { "epoch": 11.752, "grad_norm": 1.664186716079712, "learning_rate": 2e-05, "loss": 0.03763783, "step": 5876 }, { "epoch": 11.754, "grad_norm": 1.5077100992202759, "learning_rate": 2e-05, "loss": 0.03997686, "step": 5877 }, { "epoch": 11.756, "grad_norm": 2.3517305850982666, "learning_rate": 2e-05, "loss": 0.03420842, "step": 5878 }, { "epoch": 11.758, "grad_norm": 1.4957901239395142, "learning_rate": 2e-05, "loss": 0.04347527, "step": 5879 }, { "epoch": 11.76, "grad_norm": 1.5121381282806396, "learning_rate": 2e-05, "loss": 0.05459063, "step": 5880 }, { "epoch": 11.762, "grad_norm": 1.957458734512329, "learning_rate": 2e-05, "loss": 0.05225588, "step": 5881 }, { "epoch": 11.764, "grad_norm": 1.498754858970642, "learning_rate": 2e-05, "loss": 0.03588563, "step": 5882 }, { "epoch": 11.766, "grad_norm": 1.5608701705932617, "learning_rate": 2e-05, "loss": 0.04653958, "step": 5883 }, { "epoch": 11.768, "grad_norm": 2.1723520755767822, "learning_rate": 2e-05, "loss": 0.05516068, "step": 5884 }, { "epoch": 11.77, "grad_norm": 2.607795000076294, "learning_rate": 2e-05, "loss": 0.05331718, "step": 5885 }, { "epoch": 11.772, "grad_norm": 1.2783042192459106, "learning_rate": 2e-05, "loss": 0.04722802, "step": 5886 }, { "epoch": 11.774000000000001, "grad_norm": 1.131067156791687, "learning_rate": 2e-05, "loss": 0.04217054, "step": 5887 }, { "epoch": 11.776, "grad_norm": 2.056450366973877, "learning_rate": 2e-05, "loss": 0.04825941, "step": 5888 }, { "epoch": 11.778, "grad_norm": 1.3246713876724243, "learning_rate": 2e-05, "loss": 0.0532944, "step": 5889 }, { "epoch": 11.78, "grad_norm": 1.6754448413848877, "learning_rate": 2e-05, "loss": 0.05571581, "step": 5890 }, { "epoch": 11.782, "grad_norm": 1.6864081621170044, "learning_rate": 2e-05, "loss": 0.04703726, "step": 5891 }, { "epoch": 11.784, "grad_norm": 1.1856135129928589, "learning_rate": 2e-05, "loss": 0.03456947, "step": 5892 }, { "epoch": 11.786, "grad_norm": 1.1907005310058594, "learning_rate": 2e-05, "loss": 0.03893632, "step": 5893 }, { "epoch": 11.788, "grad_norm": 1.570899248123169, "learning_rate": 2e-05, "loss": 0.05421094, "step": 5894 }, { "epoch": 11.79, "grad_norm": 2.6174049377441406, "learning_rate": 2e-05, "loss": 0.05405217, "step": 5895 }, { "epoch": 11.792, "grad_norm": 1.9722440242767334, "learning_rate": 2e-05, "loss": 0.04381296, "step": 5896 }, { "epoch": 11.794, "grad_norm": 1.0976735353469849, "learning_rate": 2e-05, "loss": 0.02918898, "step": 5897 }, { "epoch": 11.796, "grad_norm": 1.4373865127563477, "learning_rate": 2e-05, "loss": 0.05200865, "step": 5898 }, { "epoch": 11.798, "grad_norm": 1.3867244720458984, "learning_rate": 2e-05, "loss": 0.03751809, "step": 5899 }, { "epoch": 11.8, "grad_norm": 1.449845314025879, "learning_rate": 2e-05, "loss": 0.04667427, "step": 5900 }, { "epoch": 11.802, "grad_norm": 1.7023156881332397, "learning_rate": 2e-05, "loss": 0.03977741, "step": 5901 }, { "epoch": 11.804, "grad_norm": 1.2221662998199463, "learning_rate": 2e-05, "loss": 0.04769532, "step": 5902 }, { "epoch": 11.806000000000001, "grad_norm": 3.443532943725586, "learning_rate": 2e-05, "loss": 0.06289086, "step": 5903 }, { "epoch": 11.808, "grad_norm": 1.2141717672348022, "learning_rate": 2e-05, "loss": 0.0353791, "step": 5904 }, { "epoch": 11.81, "grad_norm": 1.126047134399414, "learning_rate": 2e-05, "loss": 0.03271578, "step": 5905 }, { "epoch": 11.812, "grad_norm": 1.419371485710144, "learning_rate": 2e-05, "loss": 0.0543963, "step": 5906 }, { "epoch": 11.814, "grad_norm": 2.132117748260498, "learning_rate": 2e-05, "loss": 0.06309062, "step": 5907 }, { "epoch": 11.816, "grad_norm": 1.5774179697036743, "learning_rate": 2e-05, "loss": 0.05101613, "step": 5908 }, { "epoch": 11.818, "grad_norm": 1.1032048463821411, "learning_rate": 2e-05, "loss": 0.03164658, "step": 5909 }, { "epoch": 11.82, "grad_norm": 1.8389184474945068, "learning_rate": 2e-05, "loss": 0.05535435, "step": 5910 }, { "epoch": 11.822, "grad_norm": 1.3901278972625732, "learning_rate": 2e-05, "loss": 0.04581359, "step": 5911 }, { "epoch": 11.824, "grad_norm": 1.5085663795471191, "learning_rate": 2e-05, "loss": 0.04446094, "step": 5912 }, { "epoch": 11.826, "grad_norm": 1.282586932182312, "learning_rate": 2e-05, "loss": 0.04857932, "step": 5913 }, { "epoch": 11.828, "grad_norm": 1.1354501247406006, "learning_rate": 2e-05, "loss": 0.04103622, "step": 5914 }, { "epoch": 11.83, "grad_norm": 1.4101886749267578, "learning_rate": 2e-05, "loss": 0.03275158, "step": 5915 }, { "epoch": 11.832, "grad_norm": 1.4309121370315552, "learning_rate": 2e-05, "loss": 0.04146826, "step": 5916 }, { "epoch": 11.834, "grad_norm": 1.0470571517944336, "learning_rate": 2e-05, "loss": 0.03013843, "step": 5917 }, { "epoch": 11.836, "grad_norm": 2.17950177192688, "learning_rate": 2e-05, "loss": 0.06400843, "step": 5918 }, { "epoch": 11.838, "grad_norm": 1.5850987434387207, "learning_rate": 2e-05, "loss": 0.04890703, "step": 5919 }, { "epoch": 11.84, "grad_norm": 2.1424400806427, "learning_rate": 2e-05, "loss": 0.04672211, "step": 5920 }, { "epoch": 11.842, "grad_norm": 1.6447819471359253, "learning_rate": 2e-05, "loss": 0.05776293, "step": 5921 }, { "epoch": 11.844, "grad_norm": 1.2918273210525513, "learning_rate": 2e-05, "loss": 0.04792162, "step": 5922 }, { "epoch": 11.846, "grad_norm": 1.787546157836914, "learning_rate": 2e-05, "loss": 0.04592175, "step": 5923 }, { "epoch": 11.848, "grad_norm": 2.0610523223876953, "learning_rate": 2e-05, "loss": 0.0371214, "step": 5924 }, { "epoch": 11.85, "grad_norm": 1.302527666091919, "learning_rate": 2e-05, "loss": 0.0393346, "step": 5925 }, { "epoch": 11.852, "grad_norm": 1.6477103233337402, "learning_rate": 2e-05, "loss": 0.05504544, "step": 5926 }, { "epoch": 11.854, "grad_norm": 1.8336002826690674, "learning_rate": 2e-05, "loss": 0.05120177, "step": 5927 }, { "epoch": 11.856, "grad_norm": 1.318718671798706, "learning_rate": 2e-05, "loss": 0.05148219, "step": 5928 }, { "epoch": 11.858, "grad_norm": 1.6090713739395142, "learning_rate": 2e-05, "loss": 0.05758989, "step": 5929 }, { "epoch": 11.86, "grad_norm": 1.4209413528442383, "learning_rate": 2e-05, "loss": 0.03771012, "step": 5930 }, { "epoch": 11.862, "grad_norm": 1.147481083869934, "learning_rate": 2e-05, "loss": 0.05305842, "step": 5931 }, { "epoch": 11.864, "grad_norm": 5.9689130783081055, "learning_rate": 2e-05, "loss": 0.03842753, "step": 5932 }, { "epoch": 11.866, "grad_norm": 2.704475164413452, "learning_rate": 2e-05, "loss": 0.04317836, "step": 5933 }, { "epoch": 11.868, "grad_norm": 13.987378120422363, "learning_rate": 2e-05, "loss": 0.04295823, "step": 5934 }, { "epoch": 11.87, "grad_norm": 2.0759031772613525, "learning_rate": 2e-05, "loss": 0.05019844, "step": 5935 }, { "epoch": 11.872, "grad_norm": 2.631631851196289, "learning_rate": 2e-05, "loss": 0.04314917, "step": 5936 }, { "epoch": 11.874, "grad_norm": 1.2975753545761108, "learning_rate": 2e-05, "loss": 0.0499692, "step": 5937 }, { "epoch": 11.876, "grad_norm": 1.5606701374053955, "learning_rate": 2e-05, "loss": 0.04614861, "step": 5938 }, { "epoch": 11.878, "grad_norm": 1.3647533655166626, "learning_rate": 2e-05, "loss": 0.03900645, "step": 5939 }, { "epoch": 11.88, "grad_norm": 1.462010145187378, "learning_rate": 2e-05, "loss": 0.05590525, "step": 5940 }, { "epoch": 11.882, "grad_norm": 2.939069986343384, "learning_rate": 2e-05, "loss": 0.06512713, "step": 5941 }, { "epoch": 11.884, "grad_norm": 2.307407855987549, "learning_rate": 2e-05, "loss": 0.04771993, "step": 5942 }, { "epoch": 11.886, "grad_norm": 1.784456491470337, "learning_rate": 2e-05, "loss": 0.05009575, "step": 5943 }, { "epoch": 11.888, "grad_norm": 1.886611819267273, "learning_rate": 2e-05, "loss": 0.05641106, "step": 5944 }, { "epoch": 11.89, "grad_norm": 1.1477409601211548, "learning_rate": 2e-05, "loss": 0.0363906, "step": 5945 }, { "epoch": 11.892, "grad_norm": 1.2578458786010742, "learning_rate": 2e-05, "loss": 0.04756481, "step": 5946 }, { "epoch": 11.894, "grad_norm": 1.938241958618164, "learning_rate": 2e-05, "loss": 0.0651107, "step": 5947 }, { "epoch": 11.896, "grad_norm": 2.048572063446045, "learning_rate": 2e-05, "loss": 0.05753088, "step": 5948 }, { "epoch": 11.898, "grad_norm": 1.759401559829712, "learning_rate": 2e-05, "loss": 0.0439103, "step": 5949 }, { "epoch": 11.9, "grad_norm": 1.6436753273010254, "learning_rate": 2e-05, "loss": 0.04948621, "step": 5950 }, { "epoch": 11.902, "grad_norm": 1.4181915521621704, "learning_rate": 2e-05, "loss": 0.0447426, "step": 5951 }, { "epoch": 11.904, "grad_norm": 1.2882474660873413, "learning_rate": 2e-05, "loss": 0.05199945, "step": 5952 }, { "epoch": 11.906, "grad_norm": 1.0682871341705322, "learning_rate": 2e-05, "loss": 0.03927372, "step": 5953 }, { "epoch": 11.908, "grad_norm": 1.2409359216690063, "learning_rate": 2e-05, "loss": 0.05247876, "step": 5954 }, { "epoch": 11.91, "grad_norm": 1.4313156604766846, "learning_rate": 2e-05, "loss": 0.04420187, "step": 5955 }, { "epoch": 11.912, "grad_norm": 1.0682390928268433, "learning_rate": 2e-05, "loss": 0.03606921, "step": 5956 }, { "epoch": 11.914, "grad_norm": 1.4402589797973633, "learning_rate": 2e-05, "loss": 0.0467343, "step": 5957 }, { "epoch": 11.916, "grad_norm": 1.4133102893829346, "learning_rate": 2e-05, "loss": 0.04049528, "step": 5958 }, { "epoch": 11.918, "grad_norm": 1.3268779516220093, "learning_rate": 2e-05, "loss": 0.06124356, "step": 5959 }, { "epoch": 11.92, "grad_norm": 1.5016638040542603, "learning_rate": 2e-05, "loss": 0.04653203, "step": 5960 }, { "epoch": 11.922, "grad_norm": 1.280086874961853, "learning_rate": 2e-05, "loss": 0.04292293, "step": 5961 }, { "epoch": 11.924, "grad_norm": 1.5201990604400635, "learning_rate": 2e-05, "loss": 0.05881717, "step": 5962 }, { "epoch": 11.926, "grad_norm": 2.3044865131378174, "learning_rate": 2e-05, "loss": 0.06573568, "step": 5963 }, { "epoch": 11.928, "grad_norm": 1.8041393756866455, "learning_rate": 2e-05, "loss": 0.05998692, "step": 5964 }, { "epoch": 11.93, "grad_norm": 1.9705407619476318, "learning_rate": 2e-05, "loss": 0.05393011, "step": 5965 }, { "epoch": 11.932, "grad_norm": 1.5527033805847168, "learning_rate": 2e-05, "loss": 0.05403456, "step": 5966 }, { "epoch": 11.934, "grad_norm": 1.115352749824524, "learning_rate": 2e-05, "loss": 0.03984626, "step": 5967 }, { "epoch": 11.936, "grad_norm": 2.1990714073181152, "learning_rate": 2e-05, "loss": 0.05849539, "step": 5968 }, { "epoch": 11.938, "grad_norm": 2.753941535949707, "learning_rate": 2e-05, "loss": 0.05485374, "step": 5969 }, { "epoch": 11.94, "grad_norm": 1.1634223461151123, "learning_rate": 2e-05, "loss": 0.04611646, "step": 5970 }, { "epoch": 11.942, "grad_norm": 1.3184393644332886, "learning_rate": 2e-05, "loss": 0.04198894, "step": 5971 }, { "epoch": 11.943999999999999, "grad_norm": 1.2988426685333252, "learning_rate": 2e-05, "loss": 0.05265917, "step": 5972 }, { "epoch": 11.946, "grad_norm": 1.7585318088531494, "learning_rate": 2e-05, "loss": 0.05654293, "step": 5973 }, { "epoch": 11.948, "grad_norm": 1.6478968858718872, "learning_rate": 2e-05, "loss": 0.03530508, "step": 5974 }, { "epoch": 11.95, "grad_norm": 2.3206682205200195, "learning_rate": 2e-05, "loss": 0.04617747, "step": 5975 }, { "epoch": 11.952, "grad_norm": 1.2929635047912598, "learning_rate": 2e-05, "loss": 0.03368628, "step": 5976 }, { "epoch": 11.954, "grad_norm": 1.047960877418518, "learning_rate": 2e-05, "loss": 0.03170536, "step": 5977 }, { "epoch": 11.956, "grad_norm": 1.0071247816085815, "learning_rate": 2e-05, "loss": 0.02869906, "step": 5978 }, { "epoch": 11.958, "grad_norm": 2.0578083992004395, "learning_rate": 2e-05, "loss": 0.04879463, "step": 5979 }, { "epoch": 11.96, "grad_norm": 1.6822428703308105, "learning_rate": 2e-05, "loss": 0.0515471, "step": 5980 }, { "epoch": 11.962, "grad_norm": 1.1344988346099854, "learning_rate": 2e-05, "loss": 0.03721001, "step": 5981 }, { "epoch": 11.964, "grad_norm": 1.0933568477630615, "learning_rate": 2e-05, "loss": 0.03774923, "step": 5982 }, { "epoch": 11.966, "grad_norm": 1.6839399337768555, "learning_rate": 2e-05, "loss": 0.03457904, "step": 5983 }, { "epoch": 11.968, "grad_norm": 1.0689489841461182, "learning_rate": 2e-05, "loss": 0.0464288, "step": 5984 }, { "epoch": 11.97, "grad_norm": 1.0576542615890503, "learning_rate": 2e-05, "loss": 0.03478286, "step": 5985 }, { "epoch": 11.972, "grad_norm": 1.7853341102600098, "learning_rate": 2e-05, "loss": 0.05685054, "step": 5986 }, { "epoch": 11.974, "grad_norm": 1.4036171436309814, "learning_rate": 2e-05, "loss": 0.04735323, "step": 5987 }, { "epoch": 11.975999999999999, "grad_norm": 2.2815001010894775, "learning_rate": 2e-05, "loss": 0.06094185, "step": 5988 }, { "epoch": 11.978, "grad_norm": 1.4660301208496094, "learning_rate": 2e-05, "loss": 0.04432792, "step": 5989 }, { "epoch": 11.98, "grad_norm": 2.011078119277954, "learning_rate": 2e-05, "loss": 0.05223164, "step": 5990 }, { "epoch": 11.982, "grad_norm": 1.2670975923538208, "learning_rate": 2e-05, "loss": 0.03780036, "step": 5991 }, { "epoch": 11.984, "grad_norm": 1.5381466150283813, "learning_rate": 2e-05, "loss": 0.04062825, "step": 5992 }, { "epoch": 11.986, "grad_norm": 1.1946245431900024, "learning_rate": 2e-05, "loss": 0.03976649, "step": 5993 }, { "epoch": 11.988, "grad_norm": 2.6480839252471924, "learning_rate": 2e-05, "loss": 0.04942157, "step": 5994 }, { "epoch": 11.99, "grad_norm": 1.3954662084579468, "learning_rate": 2e-05, "loss": 0.04232785, "step": 5995 }, { "epoch": 11.992, "grad_norm": 1.0573922395706177, "learning_rate": 2e-05, "loss": 0.03846195, "step": 5996 }, { "epoch": 11.994, "grad_norm": 1.5346850156784058, "learning_rate": 2e-05, "loss": 0.04501946, "step": 5997 }, { "epoch": 11.996, "grad_norm": 1.567658543586731, "learning_rate": 2e-05, "loss": 0.04125125, "step": 5998 }, { "epoch": 11.998, "grad_norm": 1.4279097318649292, "learning_rate": 2e-05, "loss": 0.03719701, "step": 5999 }, { "epoch": 12.0, "grad_norm": 1.4664994478225708, "learning_rate": 2e-05, "loss": 0.06088963, "step": 6000 }, { "epoch": 12.0, "eval_performance": { "AngleClassification_1": 0.978, "AngleClassification_2": 0.992, "AngleClassification_3": 0.9540918163672655, "Equal_1": 0.998, "Equal_2": 0.9800399201596807, "Equal_3": 0.906187624750499, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.986, "Perpendicular_2": 0.946, "Perpendicular_3": 0.6663326653306614, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9856666666666667, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9680638722554891 }, "eval_runtime": 319.0536, "eval_samples_per_second": 32.91, "eval_steps_per_second": 0.658, "step": 6000 }, { "epoch": 12.002, "grad_norm": 2.38565731048584, "learning_rate": 2e-05, "loss": 0.0569401, "step": 6001 }, { "epoch": 12.004, "grad_norm": 2.440852403640747, "learning_rate": 2e-05, "loss": 0.05189714, "step": 6002 }, { "epoch": 12.006, "grad_norm": 1.0071848630905151, "learning_rate": 2e-05, "loss": 0.03481928, "step": 6003 }, { "epoch": 12.008, "grad_norm": 1.0588632822036743, "learning_rate": 2e-05, "loss": 0.03610472, "step": 6004 }, { "epoch": 12.01, "grad_norm": 1.5201109647750854, "learning_rate": 2e-05, "loss": 0.04367625, "step": 6005 }, { "epoch": 12.012, "grad_norm": 1.7423279285430908, "learning_rate": 2e-05, "loss": 0.03819691, "step": 6006 }, { "epoch": 12.014, "grad_norm": 1.3542839288711548, "learning_rate": 2e-05, "loss": 0.0486775, "step": 6007 }, { "epoch": 12.016, "grad_norm": 1.939989447593689, "learning_rate": 2e-05, "loss": 0.05491658, "step": 6008 }, { "epoch": 12.018, "grad_norm": 2.302846670150757, "learning_rate": 2e-05, "loss": 0.04849474, "step": 6009 }, { "epoch": 12.02, "grad_norm": 1.8097832202911377, "learning_rate": 2e-05, "loss": 0.04880159, "step": 6010 }, { "epoch": 12.022, "grad_norm": 0.9699602723121643, "learning_rate": 2e-05, "loss": 0.03032275, "step": 6011 }, { "epoch": 12.024, "grad_norm": 1.2547293901443481, "learning_rate": 2e-05, "loss": 0.03023205, "step": 6012 }, { "epoch": 12.026, "grad_norm": 1.7490140199661255, "learning_rate": 2e-05, "loss": 0.03436543, "step": 6013 }, { "epoch": 12.028, "grad_norm": 1.5913435220718384, "learning_rate": 2e-05, "loss": 0.03803439, "step": 6014 }, { "epoch": 12.03, "grad_norm": 1.5961428880691528, "learning_rate": 2e-05, "loss": 0.04362968, "step": 6015 }, { "epoch": 12.032, "grad_norm": 1.9875961542129517, "learning_rate": 2e-05, "loss": 0.0427319, "step": 6016 }, { "epoch": 12.034, "grad_norm": 1.6006630659103394, "learning_rate": 2e-05, "loss": 0.04221214, "step": 6017 }, { "epoch": 12.036, "grad_norm": 1.985170602798462, "learning_rate": 2e-05, "loss": 0.04991886, "step": 6018 }, { "epoch": 12.038, "grad_norm": 1.3990066051483154, "learning_rate": 2e-05, "loss": 0.0360059, "step": 6019 }, { "epoch": 12.04, "grad_norm": 1.732647180557251, "learning_rate": 2e-05, "loss": 0.03687097, "step": 6020 }, { "epoch": 12.042, "grad_norm": 1.6655960083007812, "learning_rate": 2e-05, "loss": 0.05822805, "step": 6021 }, { "epoch": 12.044, "grad_norm": 1.6695098876953125, "learning_rate": 2e-05, "loss": 0.04767611, "step": 6022 }, { "epoch": 12.046, "grad_norm": 1.7412924766540527, "learning_rate": 2e-05, "loss": 0.04930716, "step": 6023 }, { "epoch": 12.048, "grad_norm": 1.7823171615600586, "learning_rate": 2e-05, "loss": 0.03755423, "step": 6024 }, { "epoch": 12.05, "grad_norm": 1.2226248979568481, "learning_rate": 2e-05, "loss": 0.04179034, "step": 6025 }, { "epoch": 12.052, "grad_norm": 1.5548951625823975, "learning_rate": 2e-05, "loss": 0.03161469, "step": 6026 }, { "epoch": 12.054, "grad_norm": 1.9556974172592163, "learning_rate": 2e-05, "loss": 0.04450768, "step": 6027 }, { "epoch": 12.056, "grad_norm": 1.177003026008606, "learning_rate": 2e-05, "loss": 0.04270407, "step": 6028 }, { "epoch": 12.058, "grad_norm": 1.3655465841293335, "learning_rate": 2e-05, "loss": 0.05215304, "step": 6029 }, { "epoch": 12.06, "grad_norm": 3.6705446243286133, "learning_rate": 2e-05, "loss": 0.05514611, "step": 6030 }, { "epoch": 12.062, "grad_norm": 7.220978260040283, "learning_rate": 2e-05, "loss": 0.05021281, "step": 6031 }, { "epoch": 12.064, "grad_norm": 1.2202095985412598, "learning_rate": 2e-05, "loss": 0.03365953, "step": 6032 }, { "epoch": 12.066, "grad_norm": 1.2675375938415527, "learning_rate": 2e-05, "loss": 0.03978067, "step": 6033 }, { "epoch": 12.068, "grad_norm": 1.05901300907135, "learning_rate": 2e-05, "loss": 0.03192661, "step": 6034 }, { "epoch": 12.07, "grad_norm": 1.8929624557495117, "learning_rate": 2e-05, "loss": 0.03948138, "step": 6035 }, { "epoch": 12.072, "grad_norm": 1.3291373252868652, "learning_rate": 2e-05, "loss": 0.04005025, "step": 6036 }, { "epoch": 12.074, "grad_norm": 2.3758695125579834, "learning_rate": 2e-05, "loss": 0.04173733, "step": 6037 }, { "epoch": 12.076, "grad_norm": 2.500800848007202, "learning_rate": 2e-05, "loss": 0.05645321, "step": 6038 }, { "epoch": 12.078, "grad_norm": 1.4360809326171875, "learning_rate": 2e-05, "loss": 0.04782884, "step": 6039 }, { "epoch": 12.08, "grad_norm": 2.3848273754119873, "learning_rate": 2e-05, "loss": 0.04527626, "step": 6040 }, { "epoch": 12.082, "grad_norm": 1.747944951057434, "learning_rate": 2e-05, "loss": 0.07185665, "step": 6041 }, { "epoch": 12.084, "grad_norm": 3.546881675720215, "learning_rate": 2e-05, "loss": 0.05210949, "step": 6042 }, { "epoch": 12.086, "grad_norm": 2.6596226692199707, "learning_rate": 2e-05, "loss": 0.04689426, "step": 6043 }, { "epoch": 12.088, "grad_norm": 2.573843479156494, "learning_rate": 2e-05, "loss": 0.05877293, "step": 6044 }, { "epoch": 12.09, "grad_norm": 2.237309694290161, "learning_rate": 2e-05, "loss": 0.03652829, "step": 6045 }, { "epoch": 12.092, "grad_norm": 1.193651795387268, "learning_rate": 2e-05, "loss": 0.03118789, "step": 6046 }, { "epoch": 12.094, "grad_norm": 2.136948823928833, "learning_rate": 2e-05, "loss": 0.03762619, "step": 6047 }, { "epoch": 12.096, "grad_norm": 1.9296658039093018, "learning_rate": 2e-05, "loss": 0.05173202, "step": 6048 }, { "epoch": 12.098, "grad_norm": 1.0394606590270996, "learning_rate": 2e-05, "loss": 0.03266693, "step": 6049 }, { "epoch": 12.1, "grad_norm": 1.249806523323059, "learning_rate": 2e-05, "loss": 0.03473232, "step": 6050 }, { "epoch": 12.102, "grad_norm": 1.4066314697265625, "learning_rate": 2e-05, "loss": 0.04402903, "step": 6051 }, { "epoch": 12.104, "grad_norm": 1.833040475845337, "learning_rate": 2e-05, "loss": 0.03956625, "step": 6052 }, { "epoch": 12.106, "grad_norm": 1.421608805656433, "learning_rate": 2e-05, "loss": 0.04748692, "step": 6053 }, { "epoch": 12.108, "grad_norm": 2.0671706199645996, "learning_rate": 2e-05, "loss": 0.04925867, "step": 6054 }, { "epoch": 12.11, "grad_norm": 1.106614589691162, "learning_rate": 2e-05, "loss": 0.04091745, "step": 6055 }, { "epoch": 12.112, "grad_norm": 1.2263094186782837, "learning_rate": 2e-05, "loss": 0.04056298, "step": 6056 }, { "epoch": 12.114, "grad_norm": 3.7029409408569336, "learning_rate": 2e-05, "loss": 0.05836046, "step": 6057 }, { "epoch": 12.116, "grad_norm": 1.413335919380188, "learning_rate": 2e-05, "loss": 0.05062287, "step": 6058 }, { "epoch": 12.118, "grad_norm": 3.4559438228607178, "learning_rate": 2e-05, "loss": 0.0437443, "step": 6059 }, { "epoch": 12.12, "grad_norm": 1.7302478551864624, "learning_rate": 2e-05, "loss": 0.05316646, "step": 6060 }, { "epoch": 12.122, "grad_norm": 1.5175634622573853, "learning_rate": 2e-05, "loss": 0.04142622, "step": 6061 }, { "epoch": 12.124, "grad_norm": 1.6398677825927734, "learning_rate": 2e-05, "loss": 0.05954774, "step": 6062 }, { "epoch": 12.126, "grad_norm": 1.5120245218276978, "learning_rate": 2e-05, "loss": 0.05301241, "step": 6063 }, { "epoch": 12.128, "grad_norm": 1.1674778461456299, "learning_rate": 2e-05, "loss": 0.04489746, "step": 6064 }, { "epoch": 12.13, "grad_norm": 1.299045205116272, "learning_rate": 2e-05, "loss": 0.03792151, "step": 6065 }, { "epoch": 12.132, "grad_norm": 0.9443352222442627, "learning_rate": 2e-05, "loss": 0.02998193, "step": 6066 }, { "epoch": 12.134, "grad_norm": 1.6018176078796387, "learning_rate": 2e-05, "loss": 0.04062822, "step": 6067 }, { "epoch": 12.136, "grad_norm": 2.1333489418029785, "learning_rate": 2e-05, "loss": 0.03400786, "step": 6068 }, { "epoch": 12.138, "grad_norm": 1.8581465482711792, "learning_rate": 2e-05, "loss": 0.05270678, "step": 6069 }, { "epoch": 12.14, "grad_norm": 1.1669179201126099, "learning_rate": 2e-05, "loss": 0.04782745, "step": 6070 }, { "epoch": 12.142, "grad_norm": 2.276296615600586, "learning_rate": 2e-05, "loss": 0.03568403, "step": 6071 }, { "epoch": 12.144, "grad_norm": 1.1378567218780518, "learning_rate": 2e-05, "loss": 0.03819509, "step": 6072 }, { "epoch": 12.146, "grad_norm": 2.1486968994140625, "learning_rate": 2e-05, "loss": 0.04405282, "step": 6073 }, { "epoch": 12.148, "grad_norm": 1.2601561546325684, "learning_rate": 2e-05, "loss": 0.03810599, "step": 6074 }, { "epoch": 12.15, "grad_norm": 1.7045177221298218, "learning_rate": 2e-05, "loss": 0.04431679, "step": 6075 }, { "epoch": 12.152, "grad_norm": 1.4039207696914673, "learning_rate": 2e-05, "loss": 0.05113788, "step": 6076 }, { "epoch": 12.154, "grad_norm": 1.8916300535202026, "learning_rate": 2e-05, "loss": 0.0379601, "step": 6077 }, { "epoch": 12.156, "grad_norm": 1.337270736694336, "learning_rate": 2e-05, "loss": 0.04206997, "step": 6078 }, { "epoch": 12.158, "grad_norm": 1.3554550409317017, "learning_rate": 2e-05, "loss": 0.04288577, "step": 6079 }, { "epoch": 12.16, "grad_norm": 1.9474183320999146, "learning_rate": 2e-05, "loss": 0.04348049, "step": 6080 }, { "epoch": 12.162, "grad_norm": 0.8301889300346375, "learning_rate": 2e-05, "loss": 0.02466136, "step": 6081 }, { "epoch": 12.164, "grad_norm": 1.5788853168487549, "learning_rate": 2e-05, "loss": 0.05005291, "step": 6082 }, { "epoch": 12.166, "grad_norm": 2.1936469078063965, "learning_rate": 2e-05, "loss": 0.05456689, "step": 6083 }, { "epoch": 12.168, "grad_norm": 1.8480530977249146, "learning_rate": 2e-05, "loss": 0.05015424, "step": 6084 }, { "epoch": 12.17, "grad_norm": 1.3784325122833252, "learning_rate": 2e-05, "loss": 0.03511147, "step": 6085 }, { "epoch": 12.172, "grad_norm": 1.4679827690124512, "learning_rate": 2e-05, "loss": 0.028723, "step": 6086 }, { "epoch": 12.174, "grad_norm": 1.677220344543457, "learning_rate": 2e-05, "loss": 0.04889953, "step": 6087 }, { "epoch": 12.176, "grad_norm": 1.5468508005142212, "learning_rate": 2e-05, "loss": 0.04408753, "step": 6088 }, { "epoch": 12.178, "grad_norm": 1.36641526222229, "learning_rate": 2e-05, "loss": 0.03904226, "step": 6089 }, { "epoch": 12.18, "grad_norm": 2.0277597904205322, "learning_rate": 2e-05, "loss": 0.03122667, "step": 6090 }, { "epoch": 12.182, "grad_norm": 2.095195770263672, "learning_rate": 2e-05, "loss": 0.04639265, "step": 6091 }, { "epoch": 12.184, "grad_norm": 1.4929248094558716, "learning_rate": 2e-05, "loss": 0.04378875, "step": 6092 }, { "epoch": 12.186, "grad_norm": 1.040937066078186, "learning_rate": 2e-05, "loss": 0.02706816, "step": 6093 }, { "epoch": 12.188, "grad_norm": 1.5594635009765625, "learning_rate": 2e-05, "loss": 0.02982716, "step": 6094 }, { "epoch": 12.19, "grad_norm": 1.0690845251083374, "learning_rate": 2e-05, "loss": 0.02932034, "step": 6095 }, { "epoch": 12.192, "grad_norm": 1.3992670774459839, "learning_rate": 2e-05, "loss": 0.04999903, "step": 6096 }, { "epoch": 12.194, "grad_norm": 1.2843531370162964, "learning_rate": 2e-05, "loss": 0.02511716, "step": 6097 }, { "epoch": 12.196, "grad_norm": 1.8418145179748535, "learning_rate": 2e-05, "loss": 0.04045618, "step": 6098 }, { "epoch": 12.198, "grad_norm": 1.3255176544189453, "learning_rate": 2e-05, "loss": 0.04162053, "step": 6099 }, { "epoch": 12.2, "grad_norm": 1.1222180128097534, "learning_rate": 2e-05, "loss": 0.02524855, "step": 6100 }, { "epoch": 12.202, "grad_norm": 1.5370876789093018, "learning_rate": 2e-05, "loss": 0.04771005, "step": 6101 }, { "epoch": 12.204, "grad_norm": 1.1293209791183472, "learning_rate": 2e-05, "loss": 0.03034871, "step": 6102 }, { "epoch": 12.206, "grad_norm": 1.13023042678833, "learning_rate": 2e-05, "loss": 0.02627375, "step": 6103 }, { "epoch": 12.208, "grad_norm": 1.3612079620361328, "learning_rate": 2e-05, "loss": 0.04360874, "step": 6104 }, { "epoch": 12.21, "grad_norm": 3.503488779067993, "learning_rate": 2e-05, "loss": 0.03669972, "step": 6105 }, { "epoch": 12.212, "grad_norm": 2.4305996894836426, "learning_rate": 2e-05, "loss": 0.05017614, "step": 6106 }, { "epoch": 12.214, "grad_norm": 1.4109909534454346, "learning_rate": 2e-05, "loss": 0.03262326, "step": 6107 }, { "epoch": 12.216, "grad_norm": 1.8456965684890747, "learning_rate": 2e-05, "loss": 0.0493413, "step": 6108 }, { "epoch": 12.218, "grad_norm": 1.6295368671417236, "learning_rate": 2e-05, "loss": 0.04406694, "step": 6109 }, { "epoch": 12.22, "grad_norm": 1.2703592777252197, "learning_rate": 2e-05, "loss": 0.04533841, "step": 6110 }, { "epoch": 12.222, "grad_norm": 3.6067559719085693, "learning_rate": 2e-05, "loss": 0.05607805, "step": 6111 }, { "epoch": 12.224, "grad_norm": 1.2701184749603271, "learning_rate": 2e-05, "loss": 0.04785171, "step": 6112 }, { "epoch": 12.226, "grad_norm": 1.1288963556289673, "learning_rate": 2e-05, "loss": 0.04206613, "step": 6113 }, { "epoch": 12.228, "grad_norm": 1.6207784414291382, "learning_rate": 2e-05, "loss": 0.04109996, "step": 6114 }, { "epoch": 12.23, "grad_norm": 1.5829488039016724, "learning_rate": 2e-05, "loss": 0.03511421, "step": 6115 }, { "epoch": 12.232, "grad_norm": 1.4869654178619385, "learning_rate": 2e-05, "loss": 0.04815879, "step": 6116 }, { "epoch": 12.234, "grad_norm": 1.412255048751831, "learning_rate": 2e-05, "loss": 0.0408961, "step": 6117 }, { "epoch": 12.236, "grad_norm": 0.9086294174194336, "learning_rate": 2e-05, "loss": 0.03307983, "step": 6118 }, { "epoch": 12.238, "grad_norm": 1.4410475492477417, "learning_rate": 2e-05, "loss": 0.04874083, "step": 6119 }, { "epoch": 12.24, "grad_norm": 2.1542203426361084, "learning_rate": 2e-05, "loss": 0.03492495, "step": 6120 }, { "epoch": 12.242, "grad_norm": 1.4290752410888672, "learning_rate": 2e-05, "loss": 0.03959789, "step": 6121 }, { "epoch": 12.244, "grad_norm": 3.0469701290130615, "learning_rate": 2e-05, "loss": 0.07812107, "step": 6122 }, { "epoch": 12.246, "grad_norm": 1.6953679323196411, "learning_rate": 2e-05, "loss": 0.02141916, "step": 6123 }, { "epoch": 12.248, "grad_norm": 1.8195632696151733, "learning_rate": 2e-05, "loss": 0.0405052, "step": 6124 }, { "epoch": 12.25, "grad_norm": 2.004796266555786, "learning_rate": 2e-05, "loss": 0.03984334, "step": 6125 }, { "epoch": 12.252, "grad_norm": 0.8493677973747253, "learning_rate": 2e-05, "loss": 0.03030949, "step": 6126 }, { "epoch": 12.254, "grad_norm": 1.603154182434082, "learning_rate": 2e-05, "loss": 0.03452389, "step": 6127 }, { "epoch": 12.256, "grad_norm": 1.4318041801452637, "learning_rate": 2e-05, "loss": 0.05320135, "step": 6128 }, { "epoch": 12.258, "grad_norm": 1.305381178855896, "learning_rate": 2e-05, "loss": 0.04058621, "step": 6129 }, { "epoch": 12.26, "grad_norm": 1.0062555074691772, "learning_rate": 2e-05, "loss": 0.03081638, "step": 6130 }, { "epoch": 12.262, "grad_norm": 1.51310133934021, "learning_rate": 2e-05, "loss": 0.03336924, "step": 6131 }, { "epoch": 12.264, "grad_norm": 1.28446626663208, "learning_rate": 2e-05, "loss": 0.03799494, "step": 6132 }, { "epoch": 12.266, "grad_norm": 2.550501823425293, "learning_rate": 2e-05, "loss": 0.05581835, "step": 6133 }, { "epoch": 12.268, "grad_norm": 1.476683259010315, "learning_rate": 2e-05, "loss": 0.04637709, "step": 6134 }, { "epoch": 12.27, "grad_norm": 2.046898126602173, "learning_rate": 2e-05, "loss": 0.04047813, "step": 6135 }, { "epoch": 12.272, "grad_norm": 1.6879256963729858, "learning_rate": 2e-05, "loss": 0.050128, "step": 6136 }, { "epoch": 12.274000000000001, "grad_norm": 1.1848565340042114, "learning_rate": 2e-05, "loss": 0.03365932, "step": 6137 }, { "epoch": 12.276, "grad_norm": 1.4317147731781006, "learning_rate": 2e-05, "loss": 0.04208836, "step": 6138 }, { "epoch": 12.278, "grad_norm": 0.8532785773277283, "learning_rate": 2e-05, "loss": 0.02701728, "step": 6139 }, { "epoch": 12.28, "grad_norm": 2.347813129425049, "learning_rate": 2e-05, "loss": 0.05657571, "step": 6140 }, { "epoch": 12.282, "grad_norm": 1.616212248802185, "learning_rate": 2e-05, "loss": 0.0320894, "step": 6141 }, { "epoch": 12.284, "grad_norm": 1.4436291456222534, "learning_rate": 2e-05, "loss": 0.03790341, "step": 6142 }, { "epoch": 12.286, "grad_norm": 1.915105938911438, "learning_rate": 2e-05, "loss": 0.04711257, "step": 6143 }, { "epoch": 12.288, "grad_norm": 1.6643471717834473, "learning_rate": 2e-05, "loss": 0.02922626, "step": 6144 }, { "epoch": 12.29, "grad_norm": 1.9651648998260498, "learning_rate": 2e-05, "loss": 0.06149007, "step": 6145 }, { "epoch": 12.292, "grad_norm": 1.5634087324142456, "learning_rate": 2e-05, "loss": 0.03441588, "step": 6146 }, { "epoch": 12.294, "grad_norm": 3.6139161586761475, "learning_rate": 2e-05, "loss": 0.0543981, "step": 6147 }, { "epoch": 12.296, "grad_norm": 1.771533489227295, "learning_rate": 2e-05, "loss": 0.04886418, "step": 6148 }, { "epoch": 12.298, "grad_norm": 1.4337700605392456, "learning_rate": 2e-05, "loss": 0.02946185, "step": 6149 }, { "epoch": 12.3, "grad_norm": 1.214970350265503, "learning_rate": 2e-05, "loss": 0.03541949, "step": 6150 }, { "epoch": 12.302, "grad_norm": 1.8696826696395874, "learning_rate": 2e-05, "loss": 0.04219283, "step": 6151 }, { "epoch": 12.304, "grad_norm": 1.1375106573104858, "learning_rate": 2e-05, "loss": 0.02643434, "step": 6152 }, { "epoch": 12.306, "grad_norm": 1.9617277383804321, "learning_rate": 2e-05, "loss": 0.04668794, "step": 6153 }, { "epoch": 12.308, "grad_norm": 1.219461441040039, "learning_rate": 2e-05, "loss": 0.02614435, "step": 6154 }, { "epoch": 12.31, "grad_norm": 2.0026473999023438, "learning_rate": 2e-05, "loss": 0.05046513, "step": 6155 }, { "epoch": 12.312, "grad_norm": 2.2148213386535645, "learning_rate": 2e-05, "loss": 0.03219437, "step": 6156 }, { "epoch": 12.314, "grad_norm": 1.3021752834320068, "learning_rate": 2e-05, "loss": 0.03959469, "step": 6157 }, { "epoch": 12.316, "grad_norm": 2.0185019969940186, "learning_rate": 2e-05, "loss": 0.04784639, "step": 6158 }, { "epoch": 12.318, "grad_norm": 1.835060954093933, "learning_rate": 2e-05, "loss": 0.04800402, "step": 6159 }, { "epoch": 12.32, "grad_norm": 1.1471303701400757, "learning_rate": 2e-05, "loss": 0.04062325, "step": 6160 }, { "epoch": 12.322, "grad_norm": 1.5545427799224854, "learning_rate": 2e-05, "loss": 0.05014087, "step": 6161 }, { "epoch": 12.324, "grad_norm": 1.3972580432891846, "learning_rate": 2e-05, "loss": 0.03715761, "step": 6162 }, { "epoch": 12.326, "grad_norm": 1.1503256559371948, "learning_rate": 2e-05, "loss": 0.03644323, "step": 6163 }, { "epoch": 12.328, "grad_norm": 2.5401759147644043, "learning_rate": 2e-05, "loss": 0.04062868, "step": 6164 }, { "epoch": 12.33, "grad_norm": 1.3734387159347534, "learning_rate": 2e-05, "loss": 0.04200601, "step": 6165 }, { "epoch": 12.332, "grad_norm": 1.1165435314178467, "learning_rate": 2e-05, "loss": 0.03473271, "step": 6166 }, { "epoch": 12.334, "grad_norm": 1.3993453979492188, "learning_rate": 2e-05, "loss": 0.0432322, "step": 6167 }, { "epoch": 12.336, "grad_norm": 1.486961841583252, "learning_rate": 2e-05, "loss": 0.04133502, "step": 6168 }, { "epoch": 12.338, "grad_norm": 1.8763487339019775, "learning_rate": 2e-05, "loss": 0.04380414, "step": 6169 }, { "epoch": 12.34, "grad_norm": 3.696859121322632, "learning_rate": 2e-05, "loss": 0.05829802, "step": 6170 }, { "epoch": 12.342, "grad_norm": 1.087334156036377, "learning_rate": 2e-05, "loss": 0.03167205, "step": 6171 }, { "epoch": 12.344, "grad_norm": 1.7672123908996582, "learning_rate": 2e-05, "loss": 0.0549523, "step": 6172 }, { "epoch": 12.346, "grad_norm": 1.4898287057876587, "learning_rate": 2e-05, "loss": 0.05321936, "step": 6173 }, { "epoch": 12.348, "grad_norm": 1.1989002227783203, "learning_rate": 2e-05, "loss": 0.03441871, "step": 6174 }, { "epoch": 12.35, "grad_norm": 1.5749406814575195, "learning_rate": 2e-05, "loss": 0.04303573, "step": 6175 }, { "epoch": 12.352, "grad_norm": 1.9614742994308472, "learning_rate": 2e-05, "loss": 0.06837121, "step": 6176 }, { "epoch": 12.354, "grad_norm": 2.225933313369751, "learning_rate": 2e-05, "loss": 0.04865203, "step": 6177 }, { "epoch": 12.356, "grad_norm": 2.307185173034668, "learning_rate": 2e-05, "loss": 0.05228138, "step": 6178 }, { "epoch": 12.358, "grad_norm": 1.664549708366394, "learning_rate": 2e-05, "loss": 0.04851029, "step": 6179 }, { "epoch": 12.36, "grad_norm": 1.9192323684692383, "learning_rate": 2e-05, "loss": 0.03343064, "step": 6180 }, { "epoch": 12.362, "grad_norm": 1.0249700546264648, "learning_rate": 2e-05, "loss": 0.02511407, "step": 6181 }, { "epoch": 12.364, "grad_norm": 1.6150821447372437, "learning_rate": 2e-05, "loss": 0.0319114, "step": 6182 }, { "epoch": 12.366, "grad_norm": 1.0298298597335815, "learning_rate": 2e-05, "loss": 0.02553884, "step": 6183 }, { "epoch": 12.368, "grad_norm": 2.103635787963867, "learning_rate": 2e-05, "loss": 0.05173234, "step": 6184 }, { "epoch": 12.37, "grad_norm": 3.9606614112854004, "learning_rate": 2e-05, "loss": 0.04179762, "step": 6185 }, { "epoch": 12.372, "grad_norm": 1.0706768035888672, "learning_rate": 2e-05, "loss": 0.03646227, "step": 6186 }, { "epoch": 12.374, "grad_norm": 1.6147788763046265, "learning_rate": 2e-05, "loss": 0.03327171, "step": 6187 }, { "epoch": 12.376, "grad_norm": 2.25844407081604, "learning_rate": 2e-05, "loss": 0.05061604, "step": 6188 }, { "epoch": 12.378, "grad_norm": 1.574654459953308, "learning_rate": 2e-05, "loss": 0.04572042, "step": 6189 }, { "epoch": 12.38, "grad_norm": 1.6320792436599731, "learning_rate": 2e-05, "loss": 0.04378324, "step": 6190 }, { "epoch": 12.382, "grad_norm": 2.3261327743530273, "learning_rate": 2e-05, "loss": 0.04833006, "step": 6191 }, { "epoch": 12.384, "grad_norm": 1.5708943605422974, "learning_rate": 2e-05, "loss": 0.03576447, "step": 6192 }, { "epoch": 12.386, "grad_norm": 1.543707251548767, "learning_rate": 2e-05, "loss": 0.03951041, "step": 6193 }, { "epoch": 12.388, "grad_norm": 1.1679742336273193, "learning_rate": 2e-05, "loss": 0.03864168, "step": 6194 }, { "epoch": 12.39, "grad_norm": 2.291689157485962, "learning_rate": 2e-05, "loss": 0.04182493, "step": 6195 }, { "epoch": 12.392, "grad_norm": 1.7392657995224, "learning_rate": 2e-05, "loss": 0.04270639, "step": 6196 }, { "epoch": 12.394, "grad_norm": 1.5574685335159302, "learning_rate": 2e-05, "loss": 0.03201068, "step": 6197 }, { "epoch": 12.396, "grad_norm": 1.4459373950958252, "learning_rate": 2e-05, "loss": 0.05233443, "step": 6198 }, { "epoch": 12.398, "grad_norm": 1.459169626235962, "learning_rate": 2e-05, "loss": 0.0575183, "step": 6199 }, { "epoch": 12.4, "grad_norm": 1.6888347864151, "learning_rate": 2e-05, "loss": 0.03846366, "step": 6200 }, { "epoch": 12.402, "grad_norm": 2.6593565940856934, "learning_rate": 2e-05, "loss": 0.03823034, "step": 6201 }, { "epoch": 12.404, "grad_norm": 1.6569043397903442, "learning_rate": 2e-05, "loss": 0.04339511, "step": 6202 }, { "epoch": 12.406, "grad_norm": 0.9647795557975769, "learning_rate": 2e-05, "loss": 0.03080498, "step": 6203 }, { "epoch": 12.408, "grad_norm": 2.6168878078460693, "learning_rate": 2e-05, "loss": 0.04123443, "step": 6204 }, { "epoch": 12.41, "grad_norm": 1.4847103357315063, "learning_rate": 2e-05, "loss": 0.04044367, "step": 6205 }, { "epoch": 12.412, "grad_norm": 2.002779483795166, "learning_rate": 2e-05, "loss": 0.06478892, "step": 6206 }, { "epoch": 12.414, "grad_norm": 1.692628026008606, "learning_rate": 2e-05, "loss": 0.04533452, "step": 6207 }, { "epoch": 12.416, "grad_norm": 2.04227876663208, "learning_rate": 2e-05, "loss": 0.05172842, "step": 6208 }, { "epoch": 12.418, "grad_norm": 1.5471951961517334, "learning_rate": 2e-05, "loss": 0.04024774, "step": 6209 }, { "epoch": 12.42, "grad_norm": 3.0287656784057617, "learning_rate": 2e-05, "loss": 0.07643662, "step": 6210 }, { "epoch": 12.422, "grad_norm": 1.17807137966156, "learning_rate": 2e-05, "loss": 0.03349002, "step": 6211 }, { "epoch": 12.424, "grad_norm": 1.3760405778884888, "learning_rate": 2e-05, "loss": 0.04548774, "step": 6212 }, { "epoch": 12.426, "grad_norm": 1.2264763116836548, "learning_rate": 2e-05, "loss": 0.03662616, "step": 6213 }, { "epoch": 12.428, "grad_norm": 1.1853759288787842, "learning_rate": 2e-05, "loss": 0.03420371, "step": 6214 }, { "epoch": 12.43, "grad_norm": 1.136852502822876, "learning_rate": 2e-05, "loss": 0.04038435, "step": 6215 }, { "epoch": 12.432, "grad_norm": 1.1933867931365967, "learning_rate": 2e-05, "loss": 0.04216532, "step": 6216 }, { "epoch": 12.434, "grad_norm": 4.264523506164551, "learning_rate": 2e-05, "loss": 0.0426877, "step": 6217 }, { "epoch": 12.436, "grad_norm": 1.2900351285934448, "learning_rate": 2e-05, "loss": 0.03366522, "step": 6218 }, { "epoch": 12.438, "grad_norm": 2.727051258087158, "learning_rate": 2e-05, "loss": 0.05351751, "step": 6219 }, { "epoch": 12.44, "grad_norm": 1.5386077165603638, "learning_rate": 2e-05, "loss": 0.05369819, "step": 6220 }, { "epoch": 12.442, "grad_norm": 1.445359468460083, "learning_rate": 2e-05, "loss": 0.04263123, "step": 6221 }, { "epoch": 12.444, "grad_norm": 1.123322606086731, "learning_rate": 2e-05, "loss": 0.04013848, "step": 6222 }, { "epoch": 12.446, "grad_norm": 0.9369562268257141, "learning_rate": 2e-05, "loss": 0.02055129, "step": 6223 }, { "epoch": 12.448, "grad_norm": 1.31740140914917, "learning_rate": 2e-05, "loss": 0.04068302, "step": 6224 }, { "epoch": 12.45, "grad_norm": 1.307610034942627, "learning_rate": 2e-05, "loss": 0.05438361, "step": 6225 }, { "epoch": 12.452, "grad_norm": 1.2957713603973389, "learning_rate": 2e-05, "loss": 0.02663429, "step": 6226 }, { "epoch": 12.454, "grad_norm": 1.6599071025848389, "learning_rate": 2e-05, "loss": 0.04203624, "step": 6227 }, { "epoch": 12.456, "grad_norm": 1.427505373954773, "learning_rate": 2e-05, "loss": 0.04064513, "step": 6228 }, { "epoch": 12.458, "grad_norm": 1.9157131910324097, "learning_rate": 2e-05, "loss": 0.05884474, "step": 6229 }, { "epoch": 12.46, "grad_norm": 1.3565418720245361, "learning_rate": 2e-05, "loss": 0.04177081, "step": 6230 }, { "epoch": 12.462, "grad_norm": 1.1073006391525269, "learning_rate": 2e-05, "loss": 0.03636095, "step": 6231 }, { "epoch": 12.464, "grad_norm": 1.0298948287963867, "learning_rate": 2e-05, "loss": 0.03898697, "step": 6232 }, { "epoch": 12.466, "grad_norm": 1.1140865087509155, "learning_rate": 2e-05, "loss": 0.03568536, "step": 6233 }, { "epoch": 12.468, "grad_norm": 1.7985575199127197, "learning_rate": 2e-05, "loss": 0.04029547, "step": 6234 }, { "epoch": 12.47, "grad_norm": 1.2943682670593262, "learning_rate": 2e-05, "loss": 0.03818323, "step": 6235 }, { "epoch": 12.472, "grad_norm": 1.2726134061813354, "learning_rate": 2e-05, "loss": 0.04815301, "step": 6236 }, { "epoch": 12.474, "grad_norm": 1.8308722972869873, "learning_rate": 2e-05, "loss": 0.05556687, "step": 6237 }, { "epoch": 12.475999999999999, "grad_norm": 1.4734604358673096, "learning_rate": 2e-05, "loss": 0.05400109, "step": 6238 }, { "epoch": 12.478, "grad_norm": 1.909253478050232, "learning_rate": 2e-05, "loss": 0.04208795, "step": 6239 }, { "epoch": 12.48, "grad_norm": 1.3197088241577148, "learning_rate": 2e-05, "loss": 0.03566179, "step": 6240 }, { "epoch": 12.482, "grad_norm": 1.8218048810958862, "learning_rate": 2e-05, "loss": 0.05417576, "step": 6241 }, { "epoch": 12.484, "grad_norm": 1.8446364402770996, "learning_rate": 2e-05, "loss": 0.04411314, "step": 6242 }, { "epoch": 12.486, "grad_norm": 1.5696487426757812, "learning_rate": 2e-05, "loss": 0.04941075, "step": 6243 }, { "epoch": 12.488, "grad_norm": 3.357449531555176, "learning_rate": 2e-05, "loss": 0.05497446, "step": 6244 }, { "epoch": 12.49, "grad_norm": 3.2609894275665283, "learning_rate": 2e-05, "loss": 0.06280646, "step": 6245 }, { "epoch": 12.492, "grad_norm": 1.8737690448760986, "learning_rate": 2e-05, "loss": 0.03448436, "step": 6246 }, { "epoch": 12.494, "grad_norm": 1.4557076692581177, "learning_rate": 2e-05, "loss": 0.05713212, "step": 6247 }, { "epoch": 12.496, "grad_norm": 1.4441951513290405, "learning_rate": 2e-05, "loss": 0.04061989, "step": 6248 }, { "epoch": 12.498, "grad_norm": 1.1646778583526611, "learning_rate": 2e-05, "loss": 0.02681767, "step": 6249 }, { "epoch": 12.5, "grad_norm": 1.1207839250564575, "learning_rate": 2e-05, "loss": 0.03733048, "step": 6250 }, { "epoch": 12.502, "grad_norm": 2.0258734226226807, "learning_rate": 2e-05, "loss": 0.05226999, "step": 6251 }, { "epoch": 12.504, "grad_norm": 2.436753034591675, "learning_rate": 2e-05, "loss": 0.06572986, "step": 6252 }, { "epoch": 12.506, "grad_norm": 1.5565096139907837, "learning_rate": 2e-05, "loss": 0.04375243, "step": 6253 }, { "epoch": 12.508, "grad_norm": 2.353769302368164, "learning_rate": 2e-05, "loss": 0.06041701, "step": 6254 }, { "epoch": 12.51, "grad_norm": 1.1774601936340332, "learning_rate": 2e-05, "loss": 0.04639146, "step": 6255 }, { "epoch": 12.512, "grad_norm": 2.4573864936828613, "learning_rate": 2e-05, "loss": 0.04883677, "step": 6256 }, { "epoch": 12.514, "grad_norm": 2.6108806133270264, "learning_rate": 2e-05, "loss": 0.06272373, "step": 6257 }, { "epoch": 12.516, "grad_norm": 1.03660249710083, "learning_rate": 2e-05, "loss": 0.02872313, "step": 6258 }, { "epoch": 12.518, "grad_norm": 2.0671160221099854, "learning_rate": 2e-05, "loss": 0.03815708, "step": 6259 }, { "epoch": 12.52, "grad_norm": 2.072272777557373, "learning_rate": 2e-05, "loss": 0.03585609, "step": 6260 }, { "epoch": 12.522, "grad_norm": 1.6502577066421509, "learning_rate": 2e-05, "loss": 0.05801409, "step": 6261 }, { "epoch": 12.524000000000001, "grad_norm": 4.274342060089111, "learning_rate": 2e-05, "loss": 0.04877973, "step": 6262 }, { "epoch": 12.526, "grad_norm": 1.2209831476211548, "learning_rate": 2e-05, "loss": 0.03942978, "step": 6263 }, { "epoch": 12.528, "grad_norm": 1.6523079872131348, "learning_rate": 2e-05, "loss": 0.0488749, "step": 6264 }, { "epoch": 12.53, "grad_norm": 2.2305943965911865, "learning_rate": 2e-05, "loss": 0.05138541, "step": 6265 }, { "epoch": 12.532, "grad_norm": 1.3322969675064087, "learning_rate": 2e-05, "loss": 0.04032963, "step": 6266 }, { "epoch": 12.534, "grad_norm": 2.0621213912963867, "learning_rate": 2e-05, "loss": 0.05739632, "step": 6267 }, { "epoch": 12.536, "grad_norm": 0.9136289358139038, "learning_rate": 2e-05, "loss": 0.03144271, "step": 6268 }, { "epoch": 12.538, "grad_norm": 1.2017995119094849, "learning_rate": 2e-05, "loss": 0.0261726, "step": 6269 }, { "epoch": 12.54, "grad_norm": 1.861670732498169, "learning_rate": 2e-05, "loss": 0.05213568, "step": 6270 }, { "epoch": 12.542, "grad_norm": 3.512301206588745, "learning_rate": 2e-05, "loss": 0.10029002, "step": 6271 }, { "epoch": 12.544, "grad_norm": 1.3964629173278809, "learning_rate": 2e-05, "loss": 0.0487773, "step": 6272 }, { "epoch": 12.546, "grad_norm": 1.554492473602295, "learning_rate": 2e-05, "loss": 0.04269033, "step": 6273 }, { "epoch": 12.548, "grad_norm": 1.0274790525436401, "learning_rate": 2e-05, "loss": 0.02957164, "step": 6274 }, { "epoch": 12.55, "grad_norm": 1.4311920404434204, "learning_rate": 2e-05, "loss": 0.03769149, "step": 6275 }, { "epoch": 12.552, "grad_norm": 1.9928914308547974, "learning_rate": 2e-05, "loss": 0.04190875, "step": 6276 }, { "epoch": 12.554, "grad_norm": 1.5183537006378174, "learning_rate": 2e-05, "loss": 0.04576535, "step": 6277 }, { "epoch": 12.556000000000001, "grad_norm": 2.0962085723876953, "learning_rate": 2e-05, "loss": 0.04964176, "step": 6278 }, { "epoch": 12.558, "grad_norm": 2.3136391639709473, "learning_rate": 2e-05, "loss": 0.03684264, "step": 6279 }, { "epoch": 12.56, "grad_norm": 2.110243558883667, "learning_rate": 2e-05, "loss": 0.05677021, "step": 6280 }, { "epoch": 12.562, "grad_norm": 3.119753360748291, "learning_rate": 2e-05, "loss": 0.04740483, "step": 6281 }, { "epoch": 12.564, "grad_norm": 1.0421844720840454, "learning_rate": 2e-05, "loss": 0.03042347, "step": 6282 }, { "epoch": 12.566, "grad_norm": 1.7684757709503174, "learning_rate": 2e-05, "loss": 0.04349629, "step": 6283 }, { "epoch": 12.568, "grad_norm": 1.600565791130066, "learning_rate": 2e-05, "loss": 0.04671545, "step": 6284 }, { "epoch": 12.57, "grad_norm": 1.4540395736694336, "learning_rate": 2e-05, "loss": 0.04411473, "step": 6285 }, { "epoch": 12.572, "grad_norm": 1.092216968536377, "learning_rate": 2e-05, "loss": 0.03701637, "step": 6286 }, { "epoch": 12.574, "grad_norm": 1.4555920362472534, "learning_rate": 2e-05, "loss": 0.05183559, "step": 6287 }, { "epoch": 12.576, "grad_norm": 2.1019439697265625, "learning_rate": 2e-05, "loss": 0.06192996, "step": 6288 }, { "epoch": 12.578, "grad_norm": 1.7618634700775146, "learning_rate": 2e-05, "loss": 0.03472544, "step": 6289 }, { "epoch": 12.58, "grad_norm": 1.7783581018447876, "learning_rate": 2e-05, "loss": 0.04450977, "step": 6290 }, { "epoch": 12.582, "grad_norm": 1.32155179977417, "learning_rate": 2e-05, "loss": 0.04913367, "step": 6291 }, { "epoch": 12.584, "grad_norm": 1.7167394161224365, "learning_rate": 2e-05, "loss": 0.03525726, "step": 6292 }, { "epoch": 12.586, "grad_norm": 1.5468295812606812, "learning_rate": 2e-05, "loss": 0.04274698, "step": 6293 }, { "epoch": 12.588, "grad_norm": 1.0729691982269287, "learning_rate": 2e-05, "loss": 0.04672535, "step": 6294 }, { "epoch": 12.59, "grad_norm": 1.9018189907073975, "learning_rate": 2e-05, "loss": 0.04148772, "step": 6295 }, { "epoch": 12.592, "grad_norm": 1.4016549587249756, "learning_rate": 2e-05, "loss": 0.04215005, "step": 6296 }, { "epoch": 12.594, "grad_norm": 1.5528912544250488, "learning_rate": 2e-05, "loss": 0.03888253, "step": 6297 }, { "epoch": 12.596, "grad_norm": 1.093119740486145, "learning_rate": 2e-05, "loss": 0.03328623, "step": 6298 }, { "epoch": 12.598, "grad_norm": 1.691413402557373, "learning_rate": 2e-05, "loss": 0.04086442, "step": 6299 }, { "epoch": 12.6, "grad_norm": 1.854809284210205, "learning_rate": 2e-05, "loss": 0.03869913, "step": 6300 }, { "epoch": 12.602, "grad_norm": 3.5889766216278076, "learning_rate": 2e-05, "loss": 0.03339052, "step": 6301 }, { "epoch": 12.604, "grad_norm": 1.0106459856033325, "learning_rate": 2e-05, "loss": 0.03362305, "step": 6302 }, { "epoch": 12.606, "grad_norm": 1.3339776992797852, "learning_rate": 2e-05, "loss": 0.03432526, "step": 6303 }, { "epoch": 12.608, "grad_norm": 1.3809486627578735, "learning_rate": 2e-05, "loss": 0.0381794, "step": 6304 }, { "epoch": 12.61, "grad_norm": 4.405636787414551, "learning_rate": 2e-05, "loss": 0.05852031, "step": 6305 }, { "epoch": 12.612, "grad_norm": 2.022235870361328, "learning_rate": 2e-05, "loss": 0.04848814, "step": 6306 }, { "epoch": 12.614, "grad_norm": 2.3099355697631836, "learning_rate": 2e-05, "loss": 0.05684424, "step": 6307 }, { "epoch": 12.616, "grad_norm": 1.1495301723480225, "learning_rate": 2e-05, "loss": 0.02855151, "step": 6308 }, { "epoch": 12.618, "grad_norm": 1.6715397834777832, "learning_rate": 2e-05, "loss": 0.03850598, "step": 6309 }, { "epoch": 12.62, "grad_norm": 1.773354172706604, "learning_rate": 2e-05, "loss": 0.04509133, "step": 6310 }, { "epoch": 12.622, "grad_norm": 1.419111967086792, "learning_rate": 2e-05, "loss": 0.03800103, "step": 6311 }, { "epoch": 12.624, "grad_norm": 1.9277119636535645, "learning_rate": 2e-05, "loss": 0.05758407, "step": 6312 }, { "epoch": 12.626, "grad_norm": 2.3455944061279297, "learning_rate": 2e-05, "loss": 0.05955437, "step": 6313 }, { "epoch": 12.628, "grad_norm": 1.5641530752182007, "learning_rate": 2e-05, "loss": 0.05085967, "step": 6314 }, { "epoch": 12.63, "grad_norm": 4.214975833892822, "learning_rate": 2e-05, "loss": 0.04824074, "step": 6315 }, { "epoch": 12.632, "grad_norm": 2.380524158477783, "learning_rate": 2e-05, "loss": 0.0585499, "step": 6316 }, { "epoch": 12.634, "grad_norm": 1.3643995523452759, "learning_rate": 2e-05, "loss": 0.04846994, "step": 6317 }, { "epoch": 12.636, "grad_norm": 1.191190242767334, "learning_rate": 2e-05, "loss": 0.03240199, "step": 6318 }, { "epoch": 12.638, "grad_norm": 2.9533886909484863, "learning_rate": 2e-05, "loss": 0.0543364, "step": 6319 }, { "epoch": 12.64, "grad_norm": 1.2633097171783447, "learning_rate": 2e-05, "loss": 0.03655366, "step": 6320 }, { "epoch": 12.642, "grad_norm": 1.2036083936691284, "learning_rate": 2e-05, "loss": 0.03070158, "step": 6321 }, { "epoch": 12.644, "grad_norm": 2.5942792892456055, "learning_rate": 2e-05, "loss": 0.04942968, "step": 6322 }, { "epoch": 12.646, "grad_norm": 1.0475519895553589, "learning_rate": 2e-05, "loss": 0.03110964, "step": 6323 }, { "epoch": 12.648, "grad_norm": 1.921798825263977, "learning_rate": 2e-05, "loss": 0.06349019, "step": 6324 }, { "epoch": 12.65, "grad_norm": 1.2762218713760376, "learning_rate": 2e-05, "loss": 0.04748666, "step": 6325 }, { "epoch": 12.652, "grad_norm": 1.2782567739486694, "learning_rate": 2e-05, "loss": 0.04201008, "step": 6326 }, { "epoch": 12.654, "grad_norm": 0.7562774419784546, "learning_rate": 2e-05, "loss": 0.02028156, "step": 6327 }, { "epoch": 12.656, "grad_norm": 1.9511414766311646, "learning_rate": 2e-05, "loss": 0.04364496, "step": 6328 }, { "epoch": 12.658, "grad_norm": 1.7642337083816528, "learning_rate": 2e-05, "loss": 0.05323292, "step": 6329 }, { "epoch": 12.66, "grad_norm": 2.2341959476470947, "learning_rate": 2e-05, "loss": 0.03851413, "step": 6330 }, { "epoch": 12.662, "grad_norm": 1.5995410680770874, "learning_rate": 2e-05, "loss": 0.0516777, "step": 6331 }, { "epoch": 12.664, "grad_norm": 1.8847028017044067, "learning_rate": 2e-05, "loss": 0.04388159, "step": 6332 }, { "epoch": 12.666, "grad_norm": 2.132364511489868, "learning_rate": 2e-05, "loss": 0.04297321, "step": 6333 }, { "epoch": 12.668, "grad_norm": 2.32963228225708, "learning_rate": 2e-05, "loss": 0.03326223, "step": 6334 }, { "epoch": 12.67, "grad_norm": 1.9717942476272583, "learning_rate": 2e-05, "loss": 0.04060198, "step": 6335 }, { "epoch": 12.672, "grad_norm": 1.5272952318191528, "learning_rate": 2e-05, "loss": 0.05175653, "step": 6336 }, { "epoch": 12.674, "grad_norm": 1.090294361114502, "learning_rate": 2e-05, "loss": 0.05121554, "step": 6337 }, { "epoch": 12.676, "grad_norm": 0.9349586367607117, "learning_rate": 2e-05, "loss": 0.03004047, "step": 6338 }, { "epoch": 12.678, "grad_norm": 3.6723544597625732, "learning_rate": 2e-05, "loss": 0.05083375, "step": 6339 }, { "epoch": 12.68, "grad_norm": 1.8542966842651367, "learning_rate": 2e-05, "loss": 0.05500766, "step": 6340 }, { "epoch": 12.682, "grad_norm": 1.6407077312469482, "learning_rate": 2e-05, "loss": 0.0521637, "step": 6341 }, { "epoch": 12.684, "grad_norm": 1.1183030605316162, "learning_rate": 2e-05, "loss": 0.03896394, "step": 6342 }, { "epoch": 12.686, "grad_norm": 1.987824559211731, "learning_rate": 2e-05, "loss": 0.05758013, "step": 6343 }, { "epoch": 12.688, "grad_norm": 1.398348331451416, "learning_rate": 2e-05, "loss": 0.04054468, "step": 6344 }, { "epoch": 12.69, "grad_norm": 1.3957326412200928, "learning_rate": 2e-05, "loss": 0.03729579, "step": 6345 }, { "epoch": 12.692, "grad_norm": 1.2762089967727661, "learning_rate": 2e-05, "loss": 0.03696339, "step": 6346 }, { "epoch": 12.693999999999999, "grad_norm": 2.0938408374786377, "learning_rate": 2e-05, "loss": 0.03400442, "step": 6347 }, { "epoch": 12.696, "grad_norm": 1.4813979864120483, "learning_rate": 2e-05, "loss": 0.03928123, "step": 6348 }, { "epoch": 12.698, "grad_norm": 1.0480759143829346, "learning_rate": 2e-05, "loss": 0.03268198, "step": 6349 }, { "epoch": 12.7, "grad_norm": 1.0556561946868896, "learning_rate": 2e-05, "loss": 0.03014071, "step": 6350 }, { "epoch": 12.702, "grad_norm": 1.721622347831726, "learning_rate": 2e-05, "loss": 0.04010597, "step": 6351 }, { "epoch": 12.704, "grad_norm": 1.3392608165740967, "learning_rate": 2e-05, "loss": 0.04958211, "step": 6352 }, { "epoch": 12.706, "grad_norm": 2.3841845989227295, "learning_rate": 2e-05, "loss": 0.06484979, "step": 6353 }, { "epoch": 12.708, "grad_norm": 1.1281391382217407, "learning_rate": 2e-05, "loss": 0.03068016, "step": 6354 }, { "epoch": 12.71, "grad_norm": 1.1351035833358765, "learning_rate": 2e-05, "loss": 0.03658585, "step": 6355 }, { "epoch": 12.712, "grad_norm": 2.5402114391326904, "learning_rate": 2e-05, "loss": 0.04548267, "step": 6356 }, { "epoch": 12.714, "grad_norm": 2.3668107986450195, "learning_rate": 2e-05, "loss": 0.03807458, "step": 6357 }, { "epoch": 12.716, "grad_norm": 1.4179434776306152, "learning_rate": 2e-05, "loss": 0.03555714, "step": 6358 }, { "epoch": 12.718, "grad_norm": 1.4804911613464355, "learning_rate": 2e-05, "loss": 0.04498214, "step": 6359 }, { "epoch": 12.72, "grad_norm": 2.23551344871521, "learning_rate": 2e-05, "loss": 0.05175463, "step": 6360 }, { "epoch": 12.722, "grad_norm": 1.7548456192016602, "learning_rate": 2e-05, "loss": 0.06676908, "step": 6361 }, { "epoch": 12.724, "grad_norm": 1.852115511894226, "learning_rate": 2e-05, "loss": 0.05606053, "step": 6362 }, { "epoch": 12.725999999999999, "grad_norm": 1.3961422443389893, "learning_rate": 2e-05, "loss": 0.04076095, "step": 6363 }, { "epoch": 12.728, "grad_norm": 2.0832810401916504, "learning_rate": 2e-05, "loss": 0.06329206, "step": 6364 }, { "epoch": 12.73, "grad_norm": 2.148437976837158, "learning_rate": 2e-05, "loss": 0.06662284, "step": 6365 }, { "epoch": 12.732, "grad_norm": 1.9320807456970215, "learning_rate": 2e-05, "loss": 0.04591805, "step": 6366 }, { "epoch": 12.734, "grad_norm": 1.0681511163711548, "learning_rate": 2e-05, "loss": 0.03428921, "step": 6367 }, { "epoch": 12.736, "grad_norm": 1.1943943500518799, "learning_rate": 2e-05, "loss": 0.0293221, "step": 6368 }, { "epoch": 12.738, "grad_norm": 1.1118965148925781, "learning_rate": 2e-05, "loss": 0.03196052, "step": 6369 }, { "epoch": 12.74, "grad_norm": 1.5805182456970215, "learning_rate": 2e-05, "loss": 0.04865031, "step": 6370 }, { "epoch": 12.742, "grad_norm": 3.262758255004883, "learning_rate": 2e-05, "loss": 0.05576367, "step": 6371 }, { "epoch": 12.744, "grad_norm": 1.1225249767303467, "learning_rate": 2e-05, "loss": 0.03483301, "step": 6372 }, { "epoch": 12.746, "grad_norm": 1.2867493629455566, "learning_rate": 2e-05, "loss": 0.04086279, "step": 6373 }, { "epoch": 12.748, "grad_norm": 1.298018217086792, "learning_rate": 2e-05, "loss": 0.04258086, "step": 6374 }, { "epoch": 12.75, "grad_norm": 1.216958999633789, "learning_rate": 2e-05, "loss": 0.04485769, "step": 6375 }, { "epoch": 12.752, "grad_norm": 1.4479326009750366, "learning_rate": 2e-05, "loss": 0.04460638, "step": 6376 }, { "epoch": 12.754, "grad_norm": 1.5058059692382812, "learning_rate": 2e-05, "loss": 0.0504184, "step": 6377 }, { "epoch": 12.756, "grad_norm": 1.393099308013916, "learning_rate": 2e-05, "loss": 0.03610604, "step": 6378 }, { "epoch": 12.758, "grad_norm": 1.2349895238876343, "learning_rate": 2e-05, "loss": 0.04152373, "step": 6379 }, { "epoch": 12.76, "grad_norm": 2.2473106384277344, "learning_rate": 2e-05, "loss": 0.06257766, "step": 6380 }, { "epoch": 12.762, "grad_norm": 4.311264514923096, "learning_rate": 2e-05, "loss": 0.04196393, "step": 6381 }, { "epoch": 12.764, "grad_norm": 1.3708561658859253, "learning_rate": 2e-05, "loss": 0.03996716, "step": 6382 }, { "epoch": 12.766, "grad_norm": 1.4658493995666504, "learning_rate": 2e-05, "loss": 0.03841384, "step": 6383 }, { "epoch": 12.768, "grad_norm": 1.5609458684921265, "learning_rate": 2e-05, "loss": 0.04608671, "step": 6384 }, { "epoch": 12.77, "grad_norm": 1.023798942565918, "learning_rate": 2e-05, "loss": 0.03047959, "step": 6385 }, { "epoch": 12.772, "grad_norm": 2.012105703353882, "learning_rate": 2e-05, "loss": 0.04641195, "step": 6386 }, { "epoch": 12.774000000000001, "grad_norm": 1.3531718254089355, "learning_rate": 2e-05, "loss": 0.04071771, "step": 6387 }, { "epoch": 12.776, "grad_norm": 1.2917602062225342, "learning_rate": 2e-05, "loss": 0.03840836, "step": 6388 }, { "epoch": 12.778, "grad_norm": 1.412302851676941, "learning_rate": 2e-05, "loss": 0.0444256, "step": 6389 }, { "epoch": 12.78, "grad_norm": 2.208261489868164, "learning_rate": 2e-05, "loss": 0.03917177, "step": 6390 }, { "epoch": 12.782, "grad_norm": 2.6841533184051514, "learning_rate": 2e-05, "loss": 0.04568644, "step": 6391 }, { "epoch": 12.784, "grad_norm": 2.0989646911621094, "learning_rate": 2e-05, "loss": 0.04213623, "step": 6392 }, { "epoch": 12.786, "grad_norm": 1.1875020265579224, "learning_rate": 2e-05, "loss": 0.03316896, "step": 6393 }, { "epoch": 12.788, "grad_norm": 1.4427107572555542, "learning_rate": 2e-05, "loss": 0.04718685, "step": 6394 }, { "epoch": 12.79, "grad_norm": 2.2982850074768066, "learning_rate": 2e-05, "loss": 0.06501245, "step": 6395 }, { "epoch": 12.792, "grad_norm": 1.4120808839797974, "learning_rate": 2e-05, "loss": 0.04840774, "step": 6396 }, { "epoch": 12.794, "grad_norm": 1.1496741771697998, "learning_rate": 2e-05, "loss": 0.02569684, "step": 6397 }, { "epoch": 12.796, "grad_norm": 1.4502733945846558, "learning_rate": 2e-05, "loss": 0.0588194, "step": 6398 }, { "epoch": 12.798, "grad_norm": 1.6323798894882202, "learning_rate": 2e-05, "loss": 0.04662085, "step": 6399 }, { "epoch": 12.8, "grad_norm": 1.459981083869934, "learning_rate": 2e-05, "loss": 0.0337071, "step": 6400 }, { "epoch": 12.802, "grad_norm": 1.2611836194992065, "learning_rate": 2e-05, "loss": 0.03413685, "step": 6401 }, { "epoch": 12.804, "grad_norm": 1.0556434392929077, "learning_rate": 2e-05, "loss": 0.03022856, "step": 6402 }, { "epoch": 12.806000000000001, "grad_norm": 1.1885935068130493, "learning_rate": 2e-05, "loss": 0.03547438, "step": 6403 }, { "epoch": 12.808, "grad_norm": 1.4913263320922852, "learning_rate": 2e-05, "loss": 0.04301264, "step": 6404 }, { "epoch": 12.81, "grad_norm": 1.678415298461914, "learning_rate": 2e-05, "loss": 0.0440227, "step": 6405 }, { "epoch": 12.812, "grad_norm": 2.0774550437927246, "learning_rate": 2e-05, "loss": 0.03051781, "step": 6406 }, { "epoch": 12.814, "grad_norm": 1.5015497207641602, "learning_rate": 2e-05, "loss": 0.04394282, "step": 6407 }, { "epoch": 12.816, "grad_norm": 1.5424638986587524, "learning_rate": 2e-05, "loss": 0.0474889, "step": 6408 }, { "epoch": 12.818, "grad_norm": 1.2423845529556274, "learning_rate": 2e-05, "loss": 0.03328826, "step": 6409 }, { "epoch": 12.82, "grad_norm": 1.3648911714553833, "learning_rate": 2e-05, "loss": 0.05593203, "step": 6410 }, { "epoch": 12.822, "grad_norm": 1.6377047300338745, "learning_rate": 2e-05, "loss": 0.04586127, "step": 6411 }, { "epoch": 12.824, "grad_norm": 1.2745712995529175, "learning_rate": 2e-05, "loss": 0.02756302, "step": 6412 }, { "epoch": 12.826, "grad_norm": 1.0557851791381836, "learning_rate": 2e-05, "loss": 0.02719074, "step": 6413 }, { "epoch": 12.828, "grad_norm": 1.3740254640579224, "learning_rate": 2e-05, "loss": 0.03186225, "step": 6414 }, { "epoch": 12.83, "grad_norm": 2.600973129272461, "learning_rate": 2e-05, "loss": 0.04746537, "step": 6415 }, { "epoch": 12.832, "grad_norm": 1.1148313283920288, "learning_rate": 2e-05, "loss": 0.02911303, "step": 6416 }, { "epoch": 12.834, "grad_norm": 1.085671067237854, "learning_rate": 2e-05, "loss": 0.03521315, "step": 6417 }, { "epoch": 12.836, "grad_norm": 2.3671929836273193, "learning_rate": 2e-05, "loss": 0.04562847, "step": 6418 }, { "epoch": 12.838, "grad_norm": 2.1059203147888184, "learning_rate": 2e-05, "loss": 0.05967949, "step": 6419 }, { "epoch": 12.84, "grad_norm": 2.0058083534240723, "learning_rate": 2e-05, "loss": 0.03334425, "step": 6420 }, { "epoch": 12.842, "grad_norm": 1.4104883670806885, "learning_rate": 2e-05, "loss": 0.03960668, "step": 6421 }, { "epoch": 12.844, "grad_norm": 1.3945311307907104, "learning_rate": 2e-05, "loss": 0.05327337, "step": 6422 }, { "epoch": 12.846, "grad_norm": 1.1917275190353394, "learning_rate": 2e-05, "loss": 0.03146118, "step": 6423 }, { "epoch": 12.848, "grad_norm": 1.3188499212265015, "learning_rate": 2e-05, "loss": 0.03480065, "step": 6424 }, { "epoch": 12.85, "grad_norm": 1.2820628881454468, "learning_rate": 2e-05, "loss": 0.02856812, "step": 6425 }, { "epoch": 12.852, "grad_norm": 1.3507776260375977, "learning_rate": 2e-05, "loss": 0.0419952, "step": 6426 }, { "epoch": 12.854, "grad_norm": 1.5551594495773315, "learning_rate": 2e-05, "loss": 0.06390089, "step": 6427 }, { "epoch": 12.856, "grad_norm": 2.567291259765625, "learning_rate": 2e-05, "loss": 0.04246731, "step": 6428 }, { "epoch": 12.858, "grad_norm": 1.707859754562378, "learning_rate": 2e-05, "loss": 0.05553305, "step": 6429 }, { "epoch": 12.86, "grad_norm": 1.1673024892807007, "learning_rate": 2e-05, "loss": 0.04302656, "step": 6430 }, { "epoch": 12.862, "grad_norm": 1.1114625930786133, "learning_rate": 2e-05, "loss": 0.03706741, "step": 6431 }, { "epoch": 12.864, "grad_norm": 1.044904112815857, "learning_rate": 2e-05, "loss": 0.03192709, "step": 6432 }, { "epoch": 12.866, "grad_norm": 1.69417142868042, "learning_rate": 2e-05, "loss": 0.05506884, "step": 6433 }, { "epoch": 12.868, "grad_norm": 1.2619975805282593, "learning_rate": 2e-05, "loss": 0.03792794, "step": 6434 }, { "epoch": 12.87, "grad_norm": 2.1460132598876953, "learning_rate": 2e-05, "loss": 0.05722627, "step": 6435 }, { "epoch": 12.872, "grad_norm": 1.2090710401535034, "learning_rate": 2e-05, "loss": 0.05044911, "step": 6436 }, { "epoch": 12.874, "grad_norm": 1.7237330675125122, "learning_rate": 2e-05, "loss": 0.0452129, "step": 6437 }, { "epoch": 12.876, "grad_norm": 1.0395435094833374, "learning_rate": 2e-05, "loss": 0.02874579, "step": 6438 }, { "epoch": 12.878, "grad_norm": 1.4696030616760254, "learning_rate": 2e-05, "loss": 0.04806498, "step": 6439 }, { "epoch": 12.88, "grad_norm": 1.6383066177368164, "learning_rate": 2e-05, "loss": 0.03882147, "step": 6440 }, { "epoch": 12.882, "grad_norm": 1.9339408874511719, "learning_rate": 2e-05, "loss": 0.06159985, "step": 6441 }, { "epoch": 12.884, "grad_norm": 3.267814874649048, "learning_rate": 2e-05, "loss": 0.05857627, "step": 6442 }, { "epoch": 12.886, "grad_norm": 2.2561628818511963, "learning_rate": 2e-05, "loss": 0.05107491, "step": 6443 }, { "epoch": 12.888, "grad_norm": 1.9300895929336548, "learning_rate": 2e-05, "loss": 0.05386838, "step": 6444 }, { "epoch": 12.89, "grad_norm": 3.2582273483276367, "learning_rate": 2e-05, "loss": 0.04117388, "step": 6445 }, { "epoch": 12.892, "grad_norm": 1.428175449371338, "learning_rate": 2e-05, "loss": 0.03108019, "step": 6446 }, { "epoch": 12.894, "grad_norm": 2.0222373008728027, "learning_rate": 2e-05, "loss": 0.03365955, "step": 6447 }, { "epoch": 12.896, "grad_norm": 2.43520188331604, "learning_rate": 2e-05, "loss": 0.04152467, "step": 6448 }, { "epoch": 12.898, "grad_norm": 1.3054020404815674, "learning_rate": 2e-05, "loss": 0.03886139, "step": 6449 }, { "epoch": 12.9, "grad_norm": 1.3858981132507324, "learning_rate": 2e-05, "loss": 0.03895619, "step": 6450 }, { "epoch": 12.902, "grad_norm": 0.9143564105033875, "learning_rate": 2e-05, "loss": 0.01918302, "step": 6451 }, { "epoch": 12.904, "grad_norm": 1.4373931884765625, "learning_rate": 2e-05, "loss": 0.04344804, "step": 6452 }, { "epoch": 12.906, "grad_norm": 1.0674983263015747, "learning_rate": 2e-05, "loss": 0.03236082, "step": 6453 }, { "epoch": 12.908, "grad_norm": 1.2760518789291382, "learning_rate": 2e-05, "loss": 0.04398597, "step": 6454 }, { "epoch": 12.91, "grad_norm": 2.119486093521118, "learning_rate": 2e-05, "loss": 0.03598646, "step": 6455 }, { "epoch": 12.912, "grad_norm": 1.0327049493789673, "learning_rate": 2e-05, "loss": 0.02299444, "step": 6456 }, { "epoch": 12.914, "grad_norm": 1.3598545789718628, "learning_rate": 2e-05, "loss": 0.04965741, "step": 6457 }, { "epoch": 12.916, "grad_norm": 1.109059453010559, "learning_rate": 2e-05, "loss": 0.0339277, "step": 6458 }, { "epoch": 12.918, "grad_norm": 1.2943098545074463, "learning_rate": 2e-05, "loss": 0.03731314, "step": 6459 }, { "epoch": 12.92, "grad_norm": 2.1976876258850098, "learning_rate": 2e-05, "loss": 0.0418474, "step": 6460 }, { "epoch": 12.922, "grad_norm": 0.9695811867713928, "learning_rate": 2e-05, "loss": 0.02404466, "step": 6461 }, { "epoch": 12.924, "grad_norm": 1.4028820991516113, "learning_rate": 2e-05, "loss": 0.04700348, "step": 6462 }, { "epoch": 12.926, "grad_norm": 2.074523448944092, "learning_rate": 2e-05, "loss": 0.04677882, "step": 6463 }, { "epoch": 12.928, "grad_norm": 0.876520574092865, "learning_rate": 2e-05, "loss": 0.02563859, "step": 6464 }, { "epoch": 12.93, "grad_norm": 1.8341282606124878, "learning_rate": 2e-05, "loss": 0.04422235, "step": 6465 }, { "epoch": 12.932, "grad_norm": 1.3097070455551147, "learning_rate": 2e-05, "loss": 0.03325322, "step": 6466 }, { "epoch": 12.934, "grad_norm": 1.4689263105392456, "learning_rate": 2e-05, "loss": 0.04611597, "step": 6467 }, { "epoch": 12.936, "grad_norm": 1.5380563735961914, "learning_rate": 2e-05, "loss": 0.05122569, "step": 6468 }, { "epoch": 12.938, "grad_norm": 0.8768236637115479, "learning_rate": 2e-05, "loss": 0.01557497, "step": 6469 }, { "epoch": 12.94, "grad_norm": 3.0059239864349365, "learning_rate": 2e-05, "loss": 0.03941962, "step": 6470 }, { "epoch": 12.942, "grad_norm": 4.760193347930908, "learning_rate": 2e-05, "loss": 0.03318344, "step": 6471 }, { "epoch": 12.943999999999999, "grad_norm": 2.0453317165374756, "learning_rate": 2e-05, "loss": 0.04006668, "step": 6472 }, { "epoch": 12.946, "grad_norm": 2.809612512588501, "learning_rate": 2e-05, "loss": 0.04919286, "step": 6473 }, { "epoch": 12.948, "grad_norm": 1.04146146774292, "learning_rate": 2e-05, "loss": 0.0355149, "step": 6474 }, { "epoch": 12.95, "grad_norm": 1.4496123790740967, "learning_rate": 2e-05, "loss": 0.04766847, "step": 6475 }, { "epoch": 12.952, "grad_norm": 1.7231749296188354, "learning_rate": 2e-05, "loss": 0.03629901, "step": 6476 }, { "epoch": 12.954, "grad_norm": 1.4813939332962036, "learning_rate": 2e-05, "loss": 0.05657486, "step": 6477 }, { "epoch": 12.956, "grad_norm": 1.8923465013504028, "learning_rate": 2e-05, "loss": 0.03107157, "step": 6478 }, { "epoch": 12.958, "grad_norm": 1.1124436855316162, "learning_rate": 2e-05, "loss": 0.03300238, "step": 6479 }, { "epoch": 12.96, "grad_norm": 1.609631061553955, "learning_rate": 2e-05, "loss": 0.05744763, "step": 6480 }, { "epoch": 12.962, "grad_norm": 1.092146396636963, "learning_rate": 2e-05, "loss": 0.0354538, "step": 6481 }, { "epoch": 12.964, "grad_norm": 1.0714373588562012, "learning_rate": 2e-05, "loss": 0.03718103, "step": 6482 }, { "epoch": 12.966, "grad_norm": 1.5786356925964355, "learning_rate": 2e-05, "loss": 0.04812677, "step": 6483 }, { "epoch": 12.968, "grad_norm": 2.6054508686065674, "learning_rate": 2e-05, "loss": 0.0359851, "step": 6484 }, { "epoch": 12.97, "grad_norm": 0.9874547123908997, "learning_rate": 2e-05, "loss": 0.03105699, "step": 6485 }, { "epoch": 12.972, "grad_norm": 2.4184865951538086, "learning_rate": 2e-05, "loss": 0.05367502, "step": 6486 }, { "epoch": 12.974, "grad_norm": 3.007775068283081, "learning_rate": 2e-05, "loss": 0.0452963, "step": 6487 }, { "epoch": 12.975999999999999, "grad_norm": 1.4173858165740967, "learning_rate": 2e-05, "loss": 0.05711803, "step": 6488 }, { "epoch": 12.978, "grad_norm": 1.6504642963409424, "learning_rate": 2e-05, "loss": 0.04591904, "step": 6489 }, { "epoch": 12.98, "grad_norm": 1.0074936151504517, "learning_rate": 2e-05, "loss": 0.02326766, "step": 6490 }, { "epoch": 12.982, "grad_norm": 1.0605522394180298, "learning_rate": 2e-05, "loss": 0.02780467, "step": 6491 }, { "epoch": 12.984, "grad_norm": 2.823035717010498, "learning_rate": 2e-05, "loss": 0.0511883, "step": 6492 }, { "epoch": 12.986, "grad_norm": 1.2774420976638794, "learning_rate": 2e-05, "loss": 0.0274408, "step": 6493 }, { "epoch": 12.988, "grad_norm": 0.9702324867248535, "learning_rate": 2e-05, "loss": 0.02415428, "step": 6494 }, { "epoch": 12.99, "grad_norm": 1.212349534034729, "learning_rate": 2e-05, "loss": 0.04195923, "step": 6495 }, { "epoch": 12.992, "grad_norm": 1.8738391399383545, "learning_rate": 2e-05, "loss": 0.03428925, "step": 6496 }, { "epoch": 12.994, "grad_norm": 1.183615803718567, "learning_rate": 2e-05, "loss": 0.03930584, "step": 6497 }, { "epoch": 12.996, "grad_norm": 3.319544553756714, "learning_rate": 2e-05, "loss": 0.0459759, "step": 6498 }, { "epoch": 12.998, "grad_norm": 1.1456108093261719, "learning_rate": 2e-05, "loss": 0.03367121, "step": 6499 }, { "epoch": 13.0, "grad_norm": 1.9047518968582153, "learning_rate": 2e-05, "loss": 0.04684749, "step": 6500 }, { "epoch": 13.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9580838323353293, "Equal_1": 0.996, "Equal_2": 0.9700598802395209, "Equal_3": 0.8882235528942116, "LineComparison_1": 0.998, "LineComparison_2": 1.0, "LineComparison_3": 0.9860279441117764, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.984, "Perpendicular_2": 0.966, "Perpendicular_3": 0.7144288577154309, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9916666666666667, "PointLiesOnCircle_3": 0.9896666666666667, "PointLiesOnLine_1": 0.9819639278557114, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9680638722554891 }, "eval_runtime": 319.563, "eval_samples_per_second": 32.857, "eval_steps_per_second": 0.657, "step": 6500 }, { "epoch": 13.002, "grad_norm": 2.025676727294922, "learning_rate": 2e-05, "loss": 0.05510117, "step": 6501 }, { "epoch": 13.004, "grad_norm": 1.67191481590271, "learning_rate": 2e-05, "loss": 0.05629362, "step": 6502 }, { "epoch": 13.006, "grad_norm": 1.4045917987823486, "learning_rate": 2e-05, "loss": 0.0437433, "step": 6503 }, { "epoch": 13.008, "grad_norm": 2.3563649654388428, "learning_rate": 2e-05, "loss": 0.04698532, "step": 6504 }, { "epoch": 13.01, "grad_norm": 1.7476520538330078, "learning_rate": 2e-05, "loss": 0.05161941, "step": 6505 }, { "epoch": 13.012, "grad_norm": 1.3390698432922363, "learning_rate": 2e-05, "loss": 0.03999062, "step": 6506 }, { "epoch": 13.014, "grad_norm": 1.905305027961731, "learning_rate": 2e-05, "loss": 0.04302887, "step": 6507 }, { "epoch": 13.016, "grad_norm": 2.7333922386169434, "learning_rate": 2e-05, "loss": 0.04309512, "step": 6508 }, { "epoch": 13.018, "grad_norm": 1.2732406854629517, "learning_rate": 2e-05, "loss": 0.04579434, "step": 6509 }, { "epoch": 13.02, "grad_norm": 2.010542631149292, "learning_rate": 2e-05, "loss": 0.04244982, "step": 6510 }, { "epoch": 13.022, "grad_norm": 1.6808205842971802, "learning_rate": 2e-05, "loss": 0.04541232, "step": 6511 }, { "epoch": 13.024, "grad_norm": 1.3999242782592773, "learning_rate": 2e-05, "loss": 0.05283143, "step": 6512 }, { "epoch": 13.026, "grad_norm": 1.0839227437973022, "learning_rate": 2e-05, "loss": 0.03332079, "step": 6513 }, { "epoch": 13.028, "grad_norm": 1.7581884860992432, "learning_rate": 2e-05, "loss": 0.03697137, "step": 6514 }, { "epoch": 13.03, "grad_norm": 2.3362925052642822, "learning_rate": 2e-05, "loss": 0.04086022, "step": 6515 }, { "epoch": 13.032, "grad_norm": 1.2541543245315552, "learning_rate": 2e-05, "loss": 0.04349049, "step": 6516 }, { "epoch": 13.034, "grad_norm": 3.33878231048584, "learning_rate": 2e-05, "loss": 0.04347111, "step": 6517 }, { "epoch": 13.036, "grad_norm": 1.553660273551941, "learning_rate": 2e-05, "loss": 0.03859802, "step": 6518 }, { "epoch": 13.038, "grad_norm": 1.1606173515319824, "learning_rate": 2e-05, "loss": 0.04000216, "step": 6519 }, { "epoch": 13.04, "grad_norm": 1.0634113550186157, "learning_rate": 2e-05, "loss": 0.02386756, "step": 6520 }, { "epoch": 13.042, "grad_norm": 1.356263518333435, "learning_rate": 2e-05, "loss": 0.03155567, "step": 6521 }, { "epoch": 13.044, "grad_norm": 2.3392069339752197, "learning_rate": 2e-05, "loss": 0.06134815, "step": 6522 }, { "epoch": 13.046, "grad_norm": 4.142246723175049, "learning_rate": 2e-05, "loss": 0.06078862, "step": 6523 }, { "epoch": 13.048, "grad_norm": 2.212329387664795, "learning_rate": 2e-05, "loss": 0.04393127, "step": 6524 }, { "epoch": 13.05, "grad_norm": 1.69950270652771, "learning_rate": 2e-05, "loss": 0.04409553, "step": 6525 }, { "epoch": 13.052, "grad_norm": 1.850864052772522, "learning_rate": 2e-05, "loss": 0.03946983, "step": 6526 }, { "epoch": 13.054, "grad_norm": 1.1603494882583618, "learning_rate": 2e-05, "loss": 0.02313782, "step": 6527 }, { "epoch": 13.056, "grad_norm": 2.081894636154175, "learning_rate": 2e-05, "loss": 0.02774069, "step": 6528 }, { "epoch": 13.058, "grad_norm": 1.8797224760055542, "learning_rate": 2e-05, "loss": 0.05306084, "step": 6529 }, { "epoch": 13.06, "grad_norm": 1.903994083404541, "learning_rate": 2e-05, "loss": 0.05133477, "step": 6530 }, { "epoch": 13.062, "grad_norm": 1.7151387929916382, "learning_rate": 2e-05, "loss": 0.04850292, "step": 6531 }, { "epoch": 13.064, "grad_norm": 2.1803505420684814, "learning_rate": 2e-05, "loss": 0.03637154, "step": 6532 }, { "epoch": 13.066, "grad_norm": 1.204275369644165, "learning_rate": 2e-05, "loss": 0.04634327, "step": 6533 }, { "epoch": 13.068, "grad_norm": 1.7479034662246704, "learning_rate": 2e-05, "loss": 0.04111974, "step": 6534 }, { "epoch": 13.07, "grad_norm": 1.4405919313430786, "learning_rate": 2e-05, "loss": 0.03936667, "step": 6535 }, { "epoch": 13.072, "grad_norm": 2.053112030029297, "learning_rate": 2e-05, "loss": 0.05977479, "step": 6536 }, { "epoch": 13.074, "grad_norm": 3.923091173171997, "learning_rate": 2e-05, "loss": 0.03060072, "step": 6537 }, { "epoch": 13.076, "grad_norm": 1.1463172435760498, "learning_rate": 2e-05, "loss": 0.03833826, "step": 6538 }, { "epoch": 13.078, "grad_norm": 1.4521420001983643, "learning_rate": 2e-05, "loss": 0.02565883, "step": 6539 }, { "epoch": 13.08, "grad_norm": 1.5646151304244995, "learning_rate": 2e-05, "loss": 0.04049197, "step": 6540 }, { "epoch": 13.082, "grad_norm": 1.1040815114974976, "learning_rate": 2e-05, "loss": 0.04397867, "step": 6541 }, { "epoch": 13.084, "grad_norm": 0.9532331228256226, "learning_rate": 2e-05, "loss": 0.02361744, "step": 6542 }, { "epoch": 13.086, "grad_norm": 3.3594508171081543, "learning_rate": 2e-05, "loss": 0.06016827, "step": 6543 }, { "epoch": 13.088, "grad_norm": 1.7429322004318237, "learning_rate": 2e-05, "loss": 0.04198964, "step": 6544 }, { "epoch": 13.09, "grad_norm": 1.6168110370635986, "learning_rate": 2e-05, "loss": 0.02668218, "step": 6545 }, { "epoch": 13.092, "grad_norm": 1.9579505920410156, "learning_rate": 2e-05, "loss": 0.0469409, "step": 6546 }, { "epoch": 13.094, "grad_norm": 1.6008516550064087, "learning_rate": 2e-05, "loss": 0.04403362, "step": 6547 }, { "epoch": 13.096, "grad_norm": 2.0377395153045654, "learning_rate": 2e-05, "loss": 0.04835338, "step": 6548 }, { "epoch": 13.098, "grad_norm": 2.0079846382141113, "learning_rate": 2e-05, "loss": 0.04270446, "step": 6549 }, { "epoch": 13.1, "grad_norm": 1.2587766647338867, "learning_rate": 2e-05, "loss": 0.04245213, "step": 6550 }, { "epoch": 13.102, "grad_norm": 1.3419069051742554, "learning_rate": 2e-05, "loss": 0.03273055, "step": 6551 }, { "epoch": 13.104, "grad_norm": 1.0695240497589111, "learning_rate": 2e-05, "loss": 0.02955955, "step": 6552 }, { "epoch": 13.106, "grad_norm": 1.2960338592529297, "learning_rate": 2e-05, "loss": 0.03386617, "step": 6553 }, { "epoch": 13.108, "grad_norm": 1.3421612977981567, "learning_rate": 2e-05, "loss": 0.05039021, "step": 6554 }, { "epoch": 13.11, "grad_norm": 2.230473518371582, "learning_rate": 2e-05, "loss": 0.05388996, "step": 6555 }, { "epoch": 13.112, "grad_norm": 1.5305428504943848, "learning_rate": 2e-05, "loss": 0.03787341, "step": 6556 }, { "epoch": 13.114, "grad_norm": 1.2190325260162354, "learning_rate": 2e-05, "loss": 0.04556718, "step": 6557 }, { "epoch": 13.116, "grad_norm": 1.5937390327453613, "learning_rate": 2e-05, "loss": 0.04280414, "step": 6558 }, { "epoch": 13.118, "grad_norm": 1.9277565479278564, "learning_rate": 2e-05, "loss": 0.04627956, "step": 6559 }, { "epoch": 13.12, "grad_norm": 1.2560091018676758, "learning_rate": 2e-05, "loss": 0.04250285, "step": 6560 }, { "epoch": 13.122, "grad_norm": 1.1613867282867432, "learning_rate": 2e-05, "loss": 0.0368138, "step": 6561 }, { "epoch": 13.124, "grad_norm": 1.8732789754867554, "learning_rate": 2e-05, "loss": 0.04468385, "step": 6562 }, { "epoch": 13.126, "grad_norm": 1.9750572443008423, "learning_rate": 2e-05, "loss": 0.03603955, "step": 6563 }, { "epoch": 13.128, "grad_norm": 1.3057758808135986, "learning_rate": 2e-05, "loss": 0.03955033, "step": 6564 }, { "epoch": 13.13, "grad_norm": 2.027482032775879, "learning_rate": 2e-05, "loss": 0.05527831, "step": 6565 }, { "epoch": 13.132, "grad_norm": 1.953943133354187, "learning_rate": 2e-05, "loss": 0.03958747, "step": 6566 }, { "epoch": 13.134, "grad_norm": 1.0840398073196411, "learning_rate": 2e-05, "loss": 0.03809543, "step": 6567 }, { "epoch": 13.136, "grad_norm": 2.0900790691375732, "learning_rate": 2e-05, "loss": 0.04004861, "step": 6568 }, { "epoch": 13.138, "grad_norm": 1.2721304893493652, "learning_rate": 2e-05, "loss": 0.0365794, "step": 6569 }, { "epoch": 13.14, "grad_norm": 1.4358141422271729, "learning_rate": 2e-05, "loss": 0.04888467, "step": 6570 }, { "epoch": 13.142, "grad_norm": 1.0475200414657593, "learning_rate": 2e-05, "loss": 0.03867028, "step": 6571 }, { "epoch": 13.144, "grad_norm": 1.4733918905258179, "learning_rate": 2e-05, "loss": 0.03798759, "step": 6572 }, { "epoch": 13.146, "grad_norm": 1.2983602285385132, "learning_rate": 2e-05, "loss": 0.04571147, "step": 6573 }, { "epoch": 13.148, "grad_norm": 1.9528359174728394, "learning_rate": 2e-05, "loss": 0.06328858, "step": 6574 }, { "epoch": 13.15, "grad_norm": 2.274611234664917, "learning_rate": 2e-05, "loss": 0.05201408, "step": 6575 }, { "epoch": 13.152, "grad_norm": 1.6580023765563965, "learning_rate": 2e-05, "loss": 0.04882085, "step": 6576 }, { "epoch": 13.154, "grad_norm": 1.701705813407898, "learning_rate": 2e-05, "loss": 0.06361747, "step": 6577 }, { "epoch": 13.156, "grad_norm": 1.3338755369186401, "learning_rate": 2e-05, "loss": 0.03825006, "step": 6578 }, { "epoch": 13.158, "grad_norm": 1.2740284204483032, "learning_rate": 2e-05, "loss": 0.04740117, "step": 6579 }, { "epoch": 13.16, "grad_norm": 0.855344831943512, "learning_rate": 2e-05, "loss": 0.02979115, "step": 6580 }, { "epoch": 13.162, "grad_norm": 1.0931096076965332, "learning_rate": 2e-05, "loss": 0.03413231, "step": 6581 }, { "epoch": 13.164, "grad_norm": 1.7539303302764893, "learning_rate": 2e-05, "loss": 0.03713946, "step": 6582 }, { "epoch": 13.166, "grad_norm": 1.2781082391738892, "learning_rate": 2e-05, "loss": 0.0324952, "step": 6583 }, { "epoch": 13.168, "grad_norm": 1.100340723991394, "learning_rate": 2e-05, "loss": 0.04220153, "step": 6584 }, { "epoch": 13.17, "grad_norm": 1.4666935205459595, "learning_rate": 2e-05, "loss": 0.04995844, "step": 6585 }, { "epoch": 13.172, "grad_norm": 0.8306145668029785, "learning_rate": 2e-05, "loss": 0.0289952, "step": 6586 }, { "epoch": 13.174, "grad_norm": 1.7973495721817017, "learning_rate": 2e-05, "loss": 0.03114134, "step": 6587 }, { "epoch": 13.176, "grad_norm": 1.2895548343658447, "learning_rate": 2e-05, "loss": 0.04376809, "step": 6588 }, { "epoch": 13.178, "grad_norm": 4.245728015899658, "learning_rate": 2e-05, "loss": 0.07614356, "step": 6589 }, { "epoch": 13.18, "grad_norm": 1.9204843044281006, "learning_rate": 2e-05, "loss": 0.05193898, "step": 6590 }, { "epoch": 13.182, "grad_norm": 1.8683027029037476, "learning_rate": 2e-05, "loss": 0.02822795, "step": 6591 }, { "epoch": 13.184, "grad_norm": 0.9471633434295654, "learning_rate": 2e-05, "loss": 0.02922757, "step": 6592 }, { "epoch": 13.186, "grad_norm": 1.7378665208816528, "learning_rate": 2e-05, "loss": 0.05698539, "step": 6593 }, { "epoch": 13.188, "grad_norm": 1.6019706726074219, "learning_rate": 2e-05, "loss": 0.05355989, "step": 6594 }, { "epoch": 13.19, "grad_norm": 1.856338381767273, "learning_rate": 2e-05, "loss": 0.04503391, "step": 6595 }, { "epoch": 13.192, "grad_norm": 1.4183052778244019, "learning_rate": 2e-05, "loss": 0.05698609, "step": 6596 }, { "epoch": 13.194, "grad_norm": 2.9402050971984863, "learning_rate": 2e-05, "loss": 0.05300699, "step": 6597 }, { "epoch": 13.196, "grad_norm": 1.2691911458969116, "learning_rate": 2e-05, "loss": 0.04289351, "step": 6598 }, { "epoch": 13.198, "grad_norm": 1.9946510791778564, "learning_rate": 2e-05, "loss": 0.03787369, "step": 6599 }, { "epoch": 13.2, "grad_norm": 1.5389318466186523, "learning_rate": 2e-05, "loss": 0.04896133, "step": 6600 }, { "epoch": 13.202, "grad_norm": 1.1009681224822998, "learning_rate": 2e-05, "loss": 0.04099545, "step": 6601 }, { "epoch": 13.204, "grad_norm": 1.298155665397644, "learning_rate": 2e-05, "loss": 0.04521285, "step": 6602 }, { "epoch": 13.206, "grad_norm": 1.194268822669983, "learning_rate": 2e-05, "loss": 0.02646183, "step": 6603 }, { "epoch": 13.208, "grad_norm": 1.166735053062439, "learning_rate": 2e-05, "loss": 0.03302169, "step": 6604 }, { "epoch": 13.21, "grad_norm": 1.1501606702804565, "learning_rate": 2e-05, "loss": 0.03696949, "step": 6605 }, { "epoch": 13.212, "grad_norm": 1.2692323923110962, "learning_rate": 2e-05, "loss": 0.04398295, "step": 6606 }, { "epoch": 13.214, "grad_norm": 1.6035974025726318, "learning_rate": 2e-05, "loss": 0.03624269, "step": 6607 }, { "epoch": 13.216, "grad_norm": 1.2560043334960938, "learning_rate": 2e-05, "loss": 0.03458342, "step": 6608 }, { "epoch": 13.218, "grad_norm": 2.6903185844421387, "learning_rate": 2e-05, "loss": 0.03339992, "step": 6609 }, { "epoch": 13.22, "grad_norm": 1.8403470516204834, "learning_rate": 2e-05, "loss": 0.03888453, "step": 6610 }, { "epoch": 13.222, "grad_norm": 1.1549571752548218, "learning_rate": 2e-05, "loss": 0.03554495, "step": 6611 }, { "epoch": 13.224, "grad_norm": 2.7406699657440186, "learning_rate": 2e-05, "loss": 0.04288154, "step": 6612 }, { "epoch": 13.226, "grad_norm": 1.6124176979064941, "learning_rate": 2e-05, "loss": 0.04322715, "step": 6613 }, { "epoch": 13.228, "grad_norm": 1.271803617477417, "learning_rate": 2e-05, "loss": 0.04564525, "step": 6614 }, { "epoch": 13.23, "grad_norm": 2.073120594024658, "learning_rate": 2e-05, "loss": 0.04652428, "step": 6615 }, { "epoch": 13.232, "grad_norm": 1.5669819116592407, "learning_rate": 2e-05, "loss": 0.04134148, "step": 6616 }, { "epoch": 13.234, "grad_norm": 0.8931474685668945, "learning_rate": 2e-05, "loss": 0.03444675, "step": 6617 }, { "epoch": 13.236, "grad_norm": 2.4369468688964844, "learning_rate": 2e-05, "loss": 0.04590879, "step": 6618 }, { "epoch": 13.238, "grad_norm": 2.894359588623047, "learning_rate": 2e-05, "loss": 0.04611806, "step": 6619 }, { "epoch": 13.24, "grad_norm": 1.5769118070602417, "learning_rate": 2e-05, "loss": 0.03493973, "step": 6620 }, { "epoch": 13.242, "grad_norm": 1.3417714834213257, "learning_rate": 2e-05, "loss": 0.04988127, "step": 6621 }, { "epoch": 13.244, "grad_norm": 1.8980623483657837, "learning_rate": 2e-05, "loss": 0.05576314, "step": 6622 }, { "epoch": 13.246, "grad_norm": 1.0693109035491943, "learning_rate": 2e-05, "loss": 0.03685558, "step": 6623 }, { "epoch": 13.248, "grad_norm": 1.5265095233917236, "learning_rate": 2e-05, "loss": 0.05537786, "step": 6624 }, { "epoch": 13.25, "grad_norm": 2.378041982650757, "learning_rate": 2e-05, "loss": 0.03925188, "step": 6625 }, { "epoch": 13.252, "grad_norm": 1.2043821811676025, "learning_rate": 2e-05, "loss": 0.030987, "step": 6626 }, { "epoch": 13.254, "grad_norm": 1.6501712799072266, "learning_rate": 2e-05, "loss": 0.04117061, "step": 6627 }, { "epoch": 13.256, "grad_norm": 2.1596996784210205, "learning_rate": 2e-05, "loss": 0.05108644, "step": 6628 }, { "epoch": 13.258, "grad_norm": 2.158721446990967, "learning_rate": 2e-05, "loss": 0.04933317, "step": 6629 }, { "epoch": 13.26, "grad_norm": 1.9402283430099487, "learning_rate": 2e-05, "loss": 0.04095349, "step": 6630 }, { "epoch": 13.262, "grad_norm": 1.688955307006836, "learning_rate": 2e-05, "loss": 0.03976238, "step": 6631 }, { "epoch": 13.264, "grad_norm": 1.8904914855957031, "learning_rate": 2e-05, "loss": 0.05259514, "step": 6632 }, { "epoch": 13.266, "grad_norm": 1.3631291389465332, "learning_rate": 2e-05, "loss": 0.04219753, "step": 6633 }, { "epoch": 13.268, "grad_norm": 1.141372799873352, "learning_rate": 2e-05, "loss": 0.03875969, "step": 6634 }, { "epoch": 13.27, "grad_norm": 1.0180898904800415, "learning_rate": 2e-05, "loss": 0.02585149, "step": 6635 }, { "epoch": 13.272, "grad_norm": 1.6760014295578003, "learning_rate": 2e-05, "loss": 0.04044281, "step": 6636 }, { "epoch": 13.274000000000001, "grad_norm": 2.0628297328948975, "learning_rate": 2e-05, "loss": 0.05332665, "step": 6637 }, { "epoch": 13.276, "grad_norm": 1.05376398563385, "learning_rate": 2e-05, "loss": 0.03772529, "step": 6638 }, { "epoch": 13.278, "grad_norm": 1.2965894937515259, "learning_rate": 2e-05, "loss": 0.04613126, "step": 6639 }, { "epoch": 13.28, "grad_norm": 1.0610442161560059, "learning_rate": 2e-05, "loss": 0.02312069, "step": 6640 }, { "epoch": 13.282, "grad_norm": 1.1080660820007324, "learning_rate": 2e-05, "loss": 0.03545141, "step": 6641 }, { "epoch": 13.284, "grad_norm": 2.201037883758545, "learning_rate": 2e-05, "loss": 0.03849367, "step": 6642 }, { "epoch": 13.286, "grad_norm": 1.9222381114959717, "learning_rate": 2e-05, "loss": 0.0556365, "step": 6643 }, { "epoch": 13.288, "grad_norm": 1.5982733964920044, "learning_rate": 2e-05, "loss": 0.03892791, "step": 6644 }, { "epoch": 13.29, "grad_norm": 1.0715982913970947, "learning_rate": 2e-05, "loss": 0.02838083, "step": 6645 }, { "epoch": 13.292, "grad_norm": 1.6198405027389526, "learning_rate": 2e-05, "loss": 0.05888985, "step": 6646 }, { "epoch": 13.294, "grad_norm": 1.8655623197555542, "learning_rate": 2e-05, "loss": 0.0356196, "step": 6647 }, { "epoch": 13.296, "grad_norm": 1.737970232963562, "learning_rate": 2e-05, "loss": 0.02180437, "step": 6648 }, { "epoch": 13.298, "grad_norm": 0.9337108135223389, "learning_rate": 2e-05, "loss": 0.02707865, "step": 6649 }, { "epoch": 13.3, "grad_norm": 0.9037566781044006, "learning_rate": 2e-05, "loss": 0.02494653, "step": 6650 }, { "epoch": 13.302, "grad_norm": 0.9231191277503967, "learning_rate": 2e-05, "loss": 0.03142556, "step": 6651 }, { "epoch": 13.304, "grad_norm": 3.1863584518432617, "learning_rate": 2e-05, "loss": 0.05755781, "step": 6652 }, { "epoch": 13.306, "grad_norm": 0.9532139897346497, "learning_rate": 2e-05, "loss": 0.03102466, "step": 6653 }, { "epoch": 13.308, "grad_norm": 1.7882435321807861, "learning_rate": 2e-05, "loss": 0.04434324, "step": 6654 }, { "epoch": 13.31, "grad_norm": 1.4374666213989258, "learning_rate": 2e-05, "loss": 0.04442374, "step": 6655 }, { "epoch": 13.312, "grad_norm": 1.6483453512191772, "learning_rate": 2e-05, "loss": 0.04483134, "step": 6656 }, { "epoch": 13.314, "grad_norm": 1.6020697355270386, "learning_rate": 2e-05, "loss": 0.03660455, "step": 6657 }, { "epoch": 13.316, "grad_norm": 2.8730368614196777, "learning_rate": 2e-05, "loss": 0.04435084, "step": 6658 }, { "epoch": 13.318, "grad_norm": 1.9684791564941406, "learning_rate": 2e-05, "loss": 0.04246651, "step": 6659 }, { "epoch": 13.32, "grad_norm": 1.097242832183838, "learning_rate": 2e-05, "loss": 0.03018874, "step": 6660 }, { "epoch": 13.322, "grad_norm": 0.9127419590950012, "learning_rate": 2e-05, "loss": 0.03180817, "step": 6661 }, { "epoch": 13.324, "grad_norm": 1.5429625511169434, "learning_rate": 2e-05, "loss": 0.05303571, "step": 6662 }, { "epoch": 13.326, "grad_norm": 1.4380091428756714, "learning_rate": 2e-05, "loss": 0.05016965, "step": 6663 }, { "epoch": 13.328, "grad_norm": 1.411498785018921, "learning_rate": 2e-05, "loss": 0.05673229, "step": 6664 }, { "epoch": 13.33, "grad_norm": 1.6350773572921753, "learning_rate": 2e-05, "loss": 0.04110835, "step": 6665 }, { "epoch": 13.332, "grad_norm": 1.2463691234588623, "learning_rate": 2e-05, "loss": 0.05402338, "step": 6666 }, { "epoch": 13.334, "grad_norm": 2.4059319496154785, "learning_rate": 2e-05, "loss": 0.044544, "step": 6667 }, { "epoch": 13.336, "grad_norm": 1.8783429861068726, "learning_rate": 2e-05, "loss": 0.04299849, "step": 6668 }, { "epoch": 13.338, "grad_norm": 2.927933692932129, "learning_rate": 2e-05, "loss": 0.04565507, "step": 6669 }, { "epoch": 13.34, "grad_norm": 1.7480547428131104, "learning_rate": 2e-05, "loss": 0.03766192, "step": 6670 }, { "epoch": 13.342, "grad_norm": 2.074599266052246, "learning_rate": 2e-05, "loss": 0.04872315, "step": 6671 }, { "epoch": 13.344, "grad_norm": 2.1086673736572266, "learning_rate": 2e-05, "loss": 0.06374644, "step": 6672 }, { "epoch": 13.346, "grad_norm": 2.058213949203491, "learning_rate": 2e-05, "loss": 0.03495604, "step": 6673 }, { "epoch": 13.348, "grad_norm": 1.0207751989364624, "learning_rate": 2e-05, "loss": 0.03142983, "step": 6674 }, { "epoch": 13.35, "grad_norm": 2.3387722969055176, "learning_rate": 2e-05, "loss": 0.04296496, "step": 6675 }, { "epoch": 13.352, "grad_norm": 1.1725760698318481, "learning_rate": 2e-05, "loss": 0.03329622, "step": 6676 }, { "epoch": 13.354, "grad_norm": 1.225332498550415, "learning_rate": 2e-05, "loss": 0.03967672, "step": 6677 }, { "epoch": 13.356, "grad_norm": 1.1926147937774658, "learning_rate": 2e-05, "loss": 0.03046719, "step": 6678 }, { "epoch": 13.358, "grad_norm": 1.0180567502975464, "learning_rate": 2e-05, "loss": 0.03923386, "step": 6679 }, { "epoch": 13.36, "grad_norm": 1.3657896518707275, "learning_rate": 2e-05, "loss": 0.03993597, "step": 6680 }, { "epoch": 13.362, "grad_norm": 1.367834448814392, "learning_rate": 2e-05, "loss": 0.0454083, "step": 6681 }, { "epoch": 13.364, "grad_norm": 1.1526918411254883, "learning_rate": 2e-05, "loss": 0.03554086, "step": 6682 }, { "epoch": 13.366, "grad_norm": 1.4416218996047974, "learning_rate": 2e-05, "loss": 0.04877956, "step": 6683 }, { "epoch": 13.368, "grad_norm": 1.735988736152649, "learning_rate": 2e-05, "loss": 0.0392142, "step": 6684 }, { "epoch": 13.37, "grad_norm": 1.4720538854599, "learning_rate": 2e-05, "loss": 0.04314131, "step": 6685 }, { "epoch": 13.372, "grad_norm": 1.0763517618179321, "learning_rate": 2e-05, "loss": 0.03109873, "step": 6686 }, { "epoch": 13.374, "grad_norm": 1.1306136846542358, "learning_rate": 2e-05, "loss": 0.04201383, "step": 6687 }, { "epoch": 13.376, "grad_norm": 1.4741770029067993, "learning_rate": 2e-05, "loss": 0.04738778, "step": 6688 }, { "epoch": 13.378, "grad_norm": 1.487601399421692, "learning_rate": 2e-05, "loss": 0.03376631, "step": 6689 }, { "epoch": 13.38, "grad_norm": 1.2999999523162842, "learning_rate": 2e-05, "loss": 0.04101618, "step": 6690 }, { "epoch": 13.382, "grad_norm": 1.1709288358688354, "learning_rate": 2e-05, "loss": 0.03829162, "step": 6691 }, { "epoch": 13.384, "grad_norm": 1.452234148979187, "learning_rate": 2e-05, "loss": 0.03547079, "step": 6692 }, { "epoch": 13.386, "grad_norm": 0.8371902108192444, "learning_rate": 2e-05, "loss": 0.02435498, "step": 6693 }, { "epoch": 13.388, "grad_norm": 1.7057963609695435, "learning_rate": 2e-05, "loss": 0.0380133, "step": 6694 }, { "epoch": 13.39, "grad_norm": 1.3966196775436401, "learning_rate": 2e-05, "loss": 0.04224955, "step": 6695 }, { "epoch": 13.392, "grad_norm": 1.1923191547393799, "learning_rate": 2e-05, "loss": 0.03705193, "step": 6696 }, { "epoch": 13.394, "grad_norm": 0.9897952079772949, "learning_rate": 2e-05, "loss": 0.03031838, "step": 6697 }, { "epoch": 13.396, "grad_norm": 1.9399019479751587, "learning_rate": 2e-05, "loss": 0.05891667, "step": 6698 }, { "epoch": 13.398, "grad_norm": 1.0432417392730713, "learning_rate": 2e-05, "loss": 0.03055418, "step": 6699 }, { "epoch": 13.4, "grad_norm": 0.9006956219673157, "learning_rate": 2e-05, "loss": 0.02497819, "step": 6700 }, { "epoch": 13.402, "grad_norm": 1.248022198677063, "learning_rate": 2e-05, "loss": 0.02955998, "step": 6701 }, { "epoch": 13.404, "grad_norm": 1.5496799945831299, "learning_rate": 2e-05, "loss": 0.06074677, "step": 6702 }, { "epoch": 13.406, "grad_norm": 1.4711843729019165, "learning_rate": 2e-05, "loss": 0.04381853, "step": 6703 }, { "epoch": 13.408, "grad_norm": 1.325208067893982, "learning_rate": 2e-05, "loss": 0.04054024, "step": 6704 }, { "epoch": 13.41, "grad_norm": 1.5213847160339355, "learning_rate": 2e-05, "loss": 0.03479857, "step": 6705 }, { "epoch": 13.412, "grad_norm": 1.5465024709701538, "learning_rate": 2e-05, "loss": 0.03222397, "step": 6706 }, { "epoch": 13.414, "grad_norm": 1.4514657258987427, "learning_rate": 2e-05, "loss": 0.03864145, "step": 6707 }, { "epoch": 13.416, "grad_norm": 1.5519639253616333, "learning_rate": 2e-05, "loss": 0.04974742, "step": 6708 }, { "epoch": 13.418, "grad_norm": 1.543533205986023, "learning_rate": 2e-05, "loss": 0.04505913, "step": 6709 }, { "epoch": 13.42, "grad_norm": 1.244042158126831, "learning_rate": 2e-05, "loss": 0.03692468, "step": 6710 }, { "epoch": 13.422, "grad_norm": 1.8627500534057617, "learning_rate": 2e-05, "loss": 0.04253852, "step": 6711 }, { "epoch": 13.424, "grad_norm": 1.4597536325454712, "learning_rate": 2e-05, "loss": 0.03926862, "step": 6712 }, { "epoch": 13.426, "grad_norm": 1.841884732246399, "learning_rate": 2e-05, "loss": 0.0296302, "step": 6713 }, { "epoch": 13.428, "grad_norm": 2.594111204147339, "learning_rate": 2e-05, "loss": 0.05455346, "step": 6714 }, { "epoch": 13.43, "grad_norm": 1.466094732284546, "learning_rate": 2e-05, "loss": 0.03086076, "step": 6715 }, { "epoch": 13.432, "grad_norm": 2.093371868133545, "learning_rate": 2e-05, "loss": 0.05538841, "step": 6716 }, { "epoch": 13.434, "grad_norm": 1.1023695468902588, "learning_rate": 2e-05, "loss": 0.04121345, "step": 6717 }, { "epoch": 13.436, "grad_norm": 2.032099485397339, "learning_rate": 2e-05, "loss": 0.05646689, "step": 6718 }, { "epoch": 13.438, "grad_norm": 1.150732398033142, "learning_rate": 2e-05, "loss": 0.03391449, "step": 6719 }, { "epoch": 13.44, "grad_norm": 2.225139617919922, "learning_rate": 2e-05, "loss": 0.05393933, "step": 6720 }, { "epoch": 13.442, "grad_norm": 3.746210813522339, "learning_rate": 2e-05, "loss": 0.04924157, "step": 6721 }, { "epoch": 13.444, "grad_norm": 1.8030920028686523, "learning_rate": 2e-05, "loss": 0.04365423, "step": 6722 }, { "epoch": 13.446, "grad_norm": 0.9142201542854309, "learning_rate": 2e-05, "loss": 0.02320842, "step": 6723 }, { "epoch": 13.448, "grad_norm": 1.0032247304916382, "learning_rate": 2e-05, "loss": 0.03920649, "step": 6724 }, { "epoch": 13.45, "grad_norm": 1.1446279287338257, "learning_rate": 2e-05, "loss": 0.0443483, "step": 6725 }, { "epoch": 13.452, "grad_norm": 1.371544599533081, "learning_rate": 2e-05, "loss": 0.05964202, "step": 6726 }, { "epoch": 13.454, "grad_norm": 1.4129619598388672, "learning_rate": 2e-05, "loss": 0.0317092, "step": 6727 }, { "epoch": 13.456, "grad_norm": 2.262296438217163, "learning_rate": 2e-05, "loss": 0.04459076, "step": 6728 }, { "epoch": 13.458, "grad_norm": 1.2286860942840576, "learning_rate": 2e-05, "loss": 0.04396465, "step": 6729 }, { "epoch": 13.46, "grad_norm": 1.2002842426300049, "learning_rate": 2e-05, "loss": 0.02897931, "step": 6730 }, { "epoch": 13.462, "grad_norm": 1.1000972986221313, "learning_rate": 2e-05, "loss": 0.04501866, "step": 6731 }, { "epoch": 13.464, "grad_norm": 1.1960804462432861, "learning_rate": 2e-05, "loss": 0.02948781, "step": 6732 }, { "epoch": 13.466, "grad_norm": 1.0015963315963745, "learning_rate": 2e-05, "loss": 0.03117497, "step": 6733 }, { "epoch": 13.468, "grad_norm": 1.2673149108886719, "learning_rate": 2e-05, "loss": 0.03281594, "step": 6734 }, { "epoch": 13.47, "grad_norm": 1.048366665840149, "learning_rate": 2e-05, "loss": 0.02636184, "step": 6735 }, { "epoch": 13.472, "grad_norm": 2.1534323692321777, "learning_rate": 2e-05, "loss": 0.06946588, "step": 6736 }, { "epoch": 13.474, "grad_norm": 1.2543253898620605, "learning_rate": 2e-05, "loss": 0.03551532, "step": 6737 }, { "epoch": 13.475999999999999, "grad_norm": 2.5918846130371094, "learning_rate": 2e-05, "loss": 0.04691193, "step": 6738 }, { "epoch": 13.478, "grad_norm": 0.7969970107078552, "learning_rate": 2e-05, "loss": 0.01839239, "step": 6739 }, { "epoch": 13.48, "grad_norm": 2.0504300594329834, "learning_rate": 2e-05, "loss": 0.06359866, "step": 6740 }, { "epoch": 13.482, "grad_norm": 0.8780750036239624, "learning_rate": 2e-05, "loss": 0.0225176, "step": 6741 }, { "epoch": 13.484, "grad_norm": 1.1083356142044067, "learning_rate": 2e-05, "loss": 0.0324852, "step": 6742 }, { "epoch": 13.486, "grad_norm": 2.280796527862549, "learning_rate": 2e-05, "loss": 0.0467263, "step": 6743 }, { "epoch": 13.488, "grad_norm": 1.2050200700759888, "learning_rate": 2e-05, "loss": 0.02860709, "step": 6744 }, { "epoch": 13.49, "grad_norm": 1.8593145608901978, "learning_rate": 2e-05, "loss": 0.03531153, "step": 6745 }, { "epoch": 13.492, "grad_norm": 1.798421025276184, "learning_rate": 2e-05, "loss": 0.0394549, "step": 6746 }, { "epoch": 13.494, "grad_norm": 1.5639235973358154, "learning_rate": 2e-05, "loss": 0.05283806, "step": 6747 }, { "epoch": 13.496, "grad_norm": 1.026511549949646, "learning_rate": 2e-05, "loss": 0.02420056, "step": 6748 }, { "epoch": 13.498, "grad_norm": 2.5854506492614746, "learning_rate": 2e-05, "loss": 0.06519131, "step": 6749 }, { "epoch": 13.5, "grad_norm": 1.8973256349563599, "learning_rate": 2e-05, "loss": 0.04225761, "step": 6750 }, { "epoch": 13.502, "grad_norm": 2.533794403076172, "learning_rate": 2e-05, "loss": 0.02219105, "step": 6751 }, { "epoch": 13.504, "grad_norm": 1.8227717876434326, "learning_rate": 2e-05, "loss": 0.04808221, "step": 6752 }, { "epoch": 13.506, "grad_norm": 1.0413390398025513, "learning_rate": 2e-05, "loss": 0.03295096, "step": 6753 }, { "epoch": 13.508, "grad_norm": 1.9891551733016968, "learning_rate": 2e-05, "loss": 0.03309003, "step": 6754 }, { "epoch": 13.51, "grad_norm": 1.173137903213501, "learning_rate": 2e-05, "loss": 0.03191693, "step": 6755 }, { "epoch": 13.512, "grad_norm": 1.9114857912063599, "learning_rate": 2e-05, "loss": 0.05413975, "step": 6756 }, { "epoch": 13.514, "grad_norm": 1.889272689819336, "learning_rate": 2e-05, "loss": 0.0451472, "step": 6757 }, { "epoch": 13.516, "grad_norm": 2.4712700843811035, "learning_rate": 2e-05, "loss": 0.07723338, "step": 6758 }, { "epoch": 13.518, "grad_norm": 2.2369396686553955, "learning_rate": 2e-05, "loss": 0.03549297, "step": 6759 }, { "epoch": 13.52, "grad_norm": 2.845893144607544, "learning_rate": 2e-05, "loss": 0.05792838, "step": 6760 }, { "epoch": 13.522, "grad_norm": 1.513317346572876, "learning_rate": 2e-05, "loss": 0.04458487, "step": 6761 }, { "epoch": 13.524000000000001, "grad_norm": 1.106540560722351, "learning_rate": 2e-05, "loss": 0.03790012, "step": 6762 }, { "epoch": 13.526, "grad_norm": 1.4870750904083252, "learning_rate": 2e-05, "loss": 0.02564323, "step": 6763 }, { "epoch": 13.528, "grad_norm": 1.8651138544082642, "learning_rate": 2e-05, "loss": 0.05272502, "step": 6764 }, { "epoch": 13.53, "grad_norm": 1.0633503198623657, "learning_rate": 2e-05, "loss": 0.04007112, "step": 6765 }, { "epoch": 13.532, "grad_norm": 1.0299681425094604, "learning_rate": 2e-05, "loss": 0.03321412, "step": 6766 }, { "epoch": 13.534, "grad_norm": 2.307370185852051, "learning_rate": 2e-05, "loss": 0.04456577, "step": 6767 }, { "epoch": 13.536, "grad_norm": 0.9137228727340698, "learning_rate": 2e-05, "loss": 0.02563262, "step": 6768 }, { "epoch": 13.538, "grad_norm": 1.7050751447677612, "learning_rate": 2e-05, "loss": 0.05230839, "step": 6769 }, { "epoch": 13.54, "grad_norm": 1.070886492729187, "learning_rate": 2e-05, "loss": 0.02357959, "step": 6770 }, { "epoch": 13.542, "grad_norm": 1.695765495300293, "learning_rate": 2e-05, "loss": 0.04813949, "step": 6771 }, { "epoch": 13.544, "grad_norm": 1.1727943420410156, "learning_rate": 2e-05, "loss": 0.04054714, "step": 6772 }, { "epoch": 13.546, "grad_norm": 0.9893969297409058, "learning_rate": 2e-05, "loss": 0.02479752, "step": 6773 }, { "epoch": 13.548, "grad_norm": 1.2786779403686523, "learning_rate": 2e-05, "loss": 0.04855724, "step": 6774 }, { "epoch": 13.55, "grad_norm": 2.4117813110351562, "learning_rate": 2e-05, "loss": 0.06347778, "step": 6775 }, { "epoch": 13.552, "grad_norm": 1.0595612525939941, "learning_rate": 2e-05, "loss": 0.04373883, "step": 6776 }, { "epoch": 13.554, "grad_norm": 1.1950441598892212, "learning_rate": 2e-05, "loss": 0.04355627, "step": 6777 }, { "epoch": 13.556000000000001, "grad_norm": 1.147063136100769, "learning_rate": 2e-05, "loss": 0.03419853, "step": 6778 }, { "epoch": 13.558, "grad_norm": 1.4784750938415527, "learning_rate": 2e-05, "loss": 0.04642799, "step": 6779 }, { "epoch": 13.56, "grad_norm": 1.2722127437591553, "learning_rate": 2e-05, "loss": 0.03892024, "step": 6780 }, { "epoch": 13.562, "grad_norm": 1.6152026653289795, "learning_rate": 2e-05, "loss": 0.04028412, "step": 6781 }, { "epoch": 13.564, "grad_norm": 1.1551095247268677, "learning_rate": 2e-05, "loss": 0.03943279, "step": 6782 }, { "epoch": 13.566, "grad_norm": 2.295308828353882, "learning_rate": 2e-05, "loss": 0.054483, "step": 6783 }, { "epoch": 13.568, "grad_norm": 1.0823659896850586, "learning_rate": 2e-05, "loss": 0.0385061, "step": 6784 }, { "epoch": 13.57, "grad_norm": 1.4659216403961182, "learning_rate": 2e-05, "loss": 0.0353567, "step": 6785 }, { "epoch": 13.572, "grad_norm": 1.425752878189087, "learning_rate": 2e-05, "loss": 0.03669987, "step": 6786 }, { "epoch": 13.574, "grad_norm": 2.6012632846832275, "learning_rate": 2e-05, "loss": 0.04362625, "step": 6787 }, { "epoch": 13.576, "grad_norm": 1.162837266921997, "learning_rate": 2e-05, "loss": 0.03585674, "step": 6788 }, { "epoch": 13.578, "grad_norm": 1.107102394104004, "learning_rate": 2e-05, "loss": 0.03959781, "step": 6789 }, { "epoch": 13.58, "grad_norm": 1.6900997161865234, "learning_rate": 2e-05, "loss": 0.06017102, "step": 6790 }, { "epoch": 13.582, "grad_norm": 1.4423255920410156, "learning_rate": 2e-05, "loss": 0.04416816, "step": 6791 }, { "epoch": 13.584, "grad_norm": 1.5928566455841064, "learning_rate": 2e-05, "loss": 0.03386574, "step": 6792 }, { "epoch": 13.586, "grad_norm": 1.8104335069656372, "learning_rate": 2e-05, "loss": 0.0400222, "step": 6793 }, { "epoch": 13.588, "grad_norm": 1.451529860496521, "learning_rate": 2e-05, "loss": 0.04577955, "step": 6794 }, { "epoch": 13.59, "grad_norm": 1.7088810205459595, "learning_rate": 2e-05, "loss": 0.03847235, "step": 6795 }, { "epoch": 13.592, "grad_norm": 1.926962971687317, "learning_rate": 2e-05, "loss": 0.03429279, "step": 6796 }, { "epoch": 13.594, "grad_norm": 2.025388479232788, "learning_rate": 2e-05, "loss": 0.04720273, "step": 6797 }, { "epoch": 13.596, "grad_norm": 1.016129732131958, "learning_rate": 2e-05, "loss": 0.03385385, "step": 6798 }, { "epoch": 13.598, "grad_norm": 1.0439811944961548, "learning_rate": 2e-05, "loss": 0.02241655, "step": 6799 }, { "epoch": 13.6, "grad_norm": 0.7543059587478638, "learning_rate": 2e-05, "loss": 0.01822535, "step": 6800 }, { "epoch": 13.602, "grad_norm": 1.9524015188217163, "learning_rate": 2e-05, "loss": 0.04379565, "step": 6801 }, { "epoch": 13.604, "grad_norm": 1.0359333753585815, "learning_rate": 2e-05, "loss": 0.0270812, "step": 6802 }, { "epoch": 13.606, "grad_norm": 1.362512469291687, "learning_rate": 2e-05, "loss": 0.03527067, "step": 6803 }, { "epoch": 13.608, "grad_norm": 2.679394245147705, "learning_rate": 2e-05, "loss": 0.06183048, "step": 6804 }, { "epoch": 13.61, "grad_norm": 1.175911784172058, "learning_rate": 2e-05, "loss": 0.03599912, "step": 6805 }, { "epoch": 13.612, "grad_norm": 2.0313289165496826, "learning_rate": 2e-05, "loss": 0.06214502, "step": 6806 }, { "epoch": 13.614, "grad_norm": 1.8251034021377563, "learning_rate": 2e-05, "loss": 0.04615673, "step": 6807 }, { "epoch": 13.616, "grad_norm": 1.214990258216858, "learning_rate": 2e-05, "loss": 0.03257399, "step": 6808 }, { "epoch": 13.618, "grad_norm": 3.3506529331207275, "learning_rate": 2e-05, "loss": 0.04304178, "step": 6809 }, { "epoch": 13.62, "grad_norm": 1.2393721342086792, "learning_rate": 2e-05, "loss": 0.04934491, "step": 6810 }, { "epoch": 13.622, "grad_norm": 2.080261468887329, "learning_rate": 2e-05, "loss": 0.04631698, "step": 6811 }, { "epoch": 13.624, "grad_norm": 1.2759827375411987, "learning_rate": 2e-05, "loss": 0.03095847, "step": 6812 }, { "epoch": 13.626, "grad_norm": 1.7272143363952637, "learning_rate": 2e-05, "loss": 0.03796447, "step": 6813 }, { "epoch": 13.628, "grad_norm": 1.4961469173431396, "learning_rate": 2e-05, "loss": 0.04623634, "step": 6814 }, { "epoch": 13.63, "grad_norm": 1.548699140548706, "learning_rate": 2e-05, "loss": 0.0516596, "step": 6815 }, { "epoch": 13.632, "grad_norm": 1.285744309425354, "learning_rate": 2e-05, "loss": 0.03057302, "step": 6816 }, { "epoch": 13.634, "grad_norm": 1.525428295135498, "learning_rate": 2e-05, "loss": 0.05115666, "step": 6817 }, { "epoch": 13.636, "grad_norm": 1.7536441087722778, "learning_rate": 2e-05, "loss": 0.04631253, "step": 6818 }, { "epoch": 13.638, "grad_norm": 1.1306259632110596, "learning_rate": 2e-05, "loss": 0.04088272, "step": 6819 }, { "epoch": 13.64, "grad_norm": 1.1280131340026855, "learning_rate": 2e-05, "loss": 0.04373218, "step": 6820 }, { "epoch": 13.642, "grad_norm": 3.139237642288208, "learning_rate": 2e-05, "loss": 0.04641888, "step": 6821 }, { "epoch": 13.644, "grad_norm": 1.6292080879211426, "learning_rate": 2e-05, "loss": 0.04293904, "step": 6822 }, { "epoch": 13.646, "grad_norm": 1.7208157777786255, "learning_rate": 2e-05, "loss": 0.04404939, "step": 6823 }, { "epoch": 13.648, "grad_norm": 1.2277165651321411, "learning_rate": 2e-05, "loss": 0.03341801, "step": 6824 }, { "epoch": 13.65, "grad_norm": 1.2899742126464844, "learning_rate": 2e-05, "loss": 0.03240266, "step": 6825 }, { "epoch": 13.652, "grad_norm": 1.4660921096801758, "learning_rate": 2e-05, "loss": 0.03453447, "step": 6826 }, { "epoch": 13.654, "grad_norm": 1.2889078855514526, "learning_rate": 2e-05, "loss": 0.03558958, "step": 6827 }, { "epoch": 13.656, "grad_norm": 1.38223397731781, "learning_rate": 2e-05, "loss": 0.05393769, "step": 6828 }, { "epoch": 13.658, "grad_norm": 1.3276984691619873, "learning_rate": 2e-05, "loss": 0.03997407, "step": 6829 }, { "epoch": 13.66, "grad_norm": 1.929145097732544, "learning_rate": 2e-05, "loss": 0.0430852, "step": 6830 }, { "epoch": 13.662, "grad_norm": 1.0870046615600586, "learning_rate": 2e-05, "loss": 0.03382558, "step": 6831 }, { "epoch": 13.664, "grad_norm": 1.1914732456207275, "learning_rate": 2e-05, "loss": 0.0535154, "step": 6832 }, { "epoch": 13.666, "grad_norm": 2.2056117057800293, "learning_rate": 2e-05, "loss": 0.05700465, "step": 6833 }, { "epoch": 13.668, "grad_norm": 1.1149606704711914, "learning_rate": 2e-05, "loss": 0.03322265, "step": 6834 }, { "epoch": 13.67, "grad_norm": 2.3128271102905273, "learning_rate": 2e-05, "loss": 0.07779928, "step": 6835 }, { "epoch": 13.672, "grad_norm": 1.0935770273208618, "learning_rate": 2e-05, "loss": 0.03507974, "step": 6836 }, { "epoch": 13.674, "grad_norm": 1.0744456052780151, "learning_rate": 2e-05, "loss": 0.04397395, "step": 6837 }, { "epoch": 13.676, "grad_norm": 1.3249156475067139, "learning_rate": 2e-05, "loss": 0.04180006, "step": 6838 }, { "epoch": 13.678, "grad_norm": 1.1021473407745361, "learning_rate": 2e-05, "loss": 0.03645816, "step": 6839 }, { "epoch": 13.68, "grad_norm": 1.929029941558838, "learning_rate": 2e-05, "loss": 0.02640364, "step": 6840 }, { "epoch": 13.682, "grad_norm": 0.9546433687210083, "learning_rate": 2e-05, "loss": 0.02705884, "step": 6841 }, { "epoch": 13.684, "grad_norm": 1.073384165763855, "learning_rate": 2e-05, "loss": 0.04102583, "step": 6842 }, { "epoch": 13.686, "grad_norm": 1.257896900177002, "learning_rate": 2e-05, "loss": 0.04304846, "step": 6843 }, { "epoch": 13.688, "grad_norm": 1.318122148513794, "learning_rate": 2e-05, "loss": 0.04488578, "step": 6844 }, { "epoch": 13.69, "grad_norm": 0.9590141773223877, "learning_rate": 2e-05, "loss": 0.02748418, "step": 6845 }, { "epoch": 13.692, "grad_norm": 1.7860455513000488, "learning_rate": 2e-05, "loss": 0.05183525, "step": 6846 }, { "epoch": 13.693999999999999, "grad_norm": 0.9630289673805237, "learning_rate": 2e-05, "loss": 0.03061388, "step": 6847 }, { "epoch": 13.696, "grad_norm": 1.2916054725646973, "learning_rate": 2e-05, "loss": 0.03421018, "step": 6848 }, { "epoch": 13.698, "grad_norm": 2.0008859634399414, "learning_rate": 2e-05, "loss": 0.04936755, "step": 6849 }, { "epoch": 13.7, "grad_norm": 2.6435530185699463, "learning_rate": 2e-05, "loss": 0.06780395, "step": 6850 }, { "epoch": 13.702, "grad_norm": 0.9989877939224243, "learning_rate": 2e-05, "loss": 0.03243184, "step": 6851 }, { "epoch": 13.704, "grad_norm": 1.6317846775054932, "learning_rate": 2e-05, "loss": 0.0594992, "step": 6852 }, { "epoch": 13.706, "grad_norm": 1.2833094596862793, "learning_rate": 2e-05, "loss": 0.03055147, "step": 6853 }, { "epoch": 13.708, "grad_norm": 0.9552610516548157, "learning_rate": 2e-05, "loss": 0.0297171, "step": 6854 }, { "epoch": 13.71, "grad_norm": 1.4244675636291504, "learning_rate": 2e-05, "loss": 0.04444023, "step": 6855 }, { "epoch": 13.712, "grad_norm": 1.2428985834121704, "learning_rate": 2e-05, "loss": 0.04104583, "step": 6856 }, { "epoch": 13.714, "grad_norm": 1.4677211046218872, "learning_rate": 2e-05, "loss": 0.05508178, "step": 6857 }, { "epoch": 13.716, "grad_norm": 1.8807919025421143, "learning_rate": 2e-05, "loss": 0.0591247, "step": 6858 }, { "epoch": 13.718, "grad_norm": 1.9581363201141357, "learning_rate": 2e-05, "loss": 0.04779023, "step": 6859 }, { "epoch": 13.72, "grad_norm": 2.0055696964263916, "learning_rate": 2e-05, "loss": 0.0408286, "step": 6860 }, { "epoch": 13.722, "grad_norm": 1.830039143562317, "learning_rate": 2e-05, "loss": 0.04613936, "step": 6861 }, { "epoch": 13.724, "grad_norm": 2.8258206844329834, "learning_rate": 2e-05, "loss": 0.04065733, "step": 6862 }, { "epoch": 13.725999999999999, "grad_norm": 1.871401309967041, "learning_rate": 2e-05, "loss": 0.04201019, "step": 6863 }, { "epoch": 13.728, "grad_norm": 1.449409008026123, "learning_rate": 2e-05, "loss": 0.03992969, "step": 6864 }, { "epoch": 13.73, "grad_norm": 0.9711210131645203, "learning_rate": 2e-05, "loss": 0.0266035, "step": 6865 }, { "epoch": 13.732, "grad_norm": 3.511932849884033, "learning_rate": 2e-05, "loss": 0.04639168, "step": 6866 }, { "epoch": 13.734, "grad_norm": 1.1094722747802734, "learning_rate": 2e-05, "loss": 0.0350898, "step": 6867 }, { "epoch": 13.736, "grad_norm": 1.4972585439682007, "learning_rate": 2e-05, "loss": 0.04061377, "step": 6868 }, { "epoch": 13.738, "grad_norm": 1.1617933511734009, "learning_rate": 2e-05, "loss": 0.0347375, "step": 6869 }, { "epoch": 13.74, "grad_norm": 1.5653934478759766, "learning_rate": 2e-05, "loss": 0.04277583, "step": 6870 }, { "epoch": 13.742, "grad_norm": 1.7562743425369263, "learning_rate": 2e-05, "loss": 0.04624029, "step": 6871 }, { "epoch": 13.744, "grad_norm": 1.6248326301574707, "learning_rate": 2e-05, "loss": 0.05260355, "step": 6872 }, { "epoch": 13.746, "grad_norm": 1.7137155532836914, "learning_rate": 2e-05, "loss": 0.05001482, "step": 6873 }, { "epoch": 13.748, "grad_norm": 1.3740524053573608, "learning_rate": 2e-05, "loss": 0.04304221, "step": 6874 }, { "epoch": 13.75, "grad_norm": 1.070404291152954, "learning_rate": 2e-05, "loss": 0.02320381, "step": 6875 }, { "epoch": 13.752, "grad_norm": 2.6879231929779053, "learning_rate": 2e-05, "loss": 0.04556799, "step": 6876 }, { "epoch": 13.754, "grad_norm": 2.933372974395752, "learning_rate": 2e-05, "loss": 0.04154959, "step": 6877 }, { "epoch": 13.756, "grad_norm": 1.3636802434921265, "learning_rate": 2e-05, "loss": 0.03800491, "step": 6878 }, { "epoch": 13.758, "grad_norm": 1.157073736190796, "learning_rate": 2e-05, "loss": 0.03626024, "step": 6879 }, { "epoch": 13.76, "grad_norm": 1.7253233194351196, "learning_rate": 2e-05, "loss": 0.04370718, "step": 6880 }, { "epoch": 13.762, "grad_norm": 1.3973212242126465, "learning_rate": 2e-05, "loss": 0.03621276, "step": 6881 }, { "epoch": 13.764, "grad_norm": 1.7019752264022827, "learning_rate": 2e-05, "loss": 0.03458362, "step": 6882 }, { "epoch": 13.766, "grad_norm": 1.227394700050354, "learning_rate": 2e-05, "loss": 0.04752816, "step": 6883 }, { "epoch": 13.768, "grad_norm": 1.493351697921753, "learning_rate": 2e-05, "loss": 0.03362212, "step": 6884 }, { "epoch": 13.77, "grad_norm": 1.694974422454834, "learning_rate": 2e-05, "loss": 0.06347952, "step": 6885 }, { "epoch": 13.772, "grad_norm": 1.4916173219680786, "learning_rate": 2e-05, "loss": 0.04393812, "step": 6886 }, { "epoch": 13.774000000000001, "grad_norm": 1.1646591424942017, "learning_rate": 2e-05, "loss": 0.03405294, "step": 6887 }, { "epoch": 13.776, "grad_norm": 1.1886578798294067, "learning_rate": 2e-05, "loss": 0.02821852, "step": 6888 }, { "epoch": 13.778, "grad_norm": 1.2024027109146118, "learning_rate": 2e-05, "loss": 0.0455913, "step": 6889 }, { "epoch": 13.78, "grad_norm": 1.684903621673584, "learning_rate": 2e-05, "loss": 0.04784643, "step": 6890 }, { "epoch": 13.782, "grad_norm": 0.9186260104179382, "learning_rate": 2e-05, "loss": 0.02574374, "step": 6891 }, { "epoch": 13.784, "grad_norm": 1.2807894945144653, "learning_rate": 2e-05, "loss": 0.03100124, "step": 6892 }, { "epoch": 13.786, "grad_norm": 2.1691181659698486, "learning_rate": 2e-05, "loss": 0.02056332, "step": 6893 }, { "epoch": 13.788, "grad_norm": 1.8777505159378052, "learning_rate": 2e-05, "loss": 0.04661623, "step": 6894 }, { "epoch": 13.79, "grad_norm": 1.4597963094711304, "learning_rate": 2e-05, "loss": 0.05256153, "step": 6895 }, { "epoch": 13.792, "grad_norm": 1.8397506475448608, "learning_rate": 2e-05, "loss": 0.05605321, "step": 6896 }, { "epoch": 13.794, "grad_norm": 1.1106833219528198, "learning_rate": 2e-05, "loss": 0.03157957, "step": 6897 }, { "epoch": 13.796, "grad_norm": 1.118843674659729, "learning_rate": 2e-05, "loss": 0.0361166, "step": 6898 }, { "epoch": 13.798, "grad_norm": 1.4842638969421387, "learning_rate": 2e-05, "loss": 0.04007118, "step": 6899 }, { "epoch": 13.8, "grad_norm": 1.4503836631774902, "learning_rate": 2e-05, "loss": 0.04612395, "step": 6900 }, { "epoch": 13.802, "grad_norm": 1.2816123962402344, "learning_rate": 2e-05, "loss": 0.04101133, "step": 6901 }, { "epoch": 13.804, "grad_norm": 1.273935317993164, "learning_rate": 2e-05, "loss": 0.03615731, "step": 6902 }, { "epoch": 13.806000000000001, "grad_norm": 1.308467984199524, "learning_rate": 2e-05, "loss": 0.04421499, "step": 6903 }, { "epoch": 13.808, "grad_norm": 1.472285509109497, "learning_rate": 2e-05, "loss": 0.04577631, "step": 6904 }, { "epoch": 13.81, "grad_norm": 1.998026728630066, "learning_rate": 2e-05, "loss": 0.0379182, "step": 6905 }, { "epoch": 13.812, "grad_norm": 1.1190993785858154, "learning_rate": 2e-05, "loss": 0.03913017, "step": 6906 }, { "epoch": 13.814, "grad_norm": 3.0139517784118652, "learning_rate": 2e-05, "loss": 0.0456089, "step": 6907 }, { "epoch": 13.816, "grad_norm": 1.1194919347763062, "learning_rate": 2e-05, "loss": 0.04423294, "step": 6908 }, { "epoch": 13.818, "grad_norm": 1.2570995092391968, "learning_rate": 2e-05, "loss": 0.03813727, "step": 6909 }, { "epoch": 13.82, "grad_norm": 1.317740797996521, "learning_rate": 2e-05, "loss": 0.04127891, "step": 6910 }, { "epoch": 13.822, "grad_norm": 1.1890889406204224, "learning_rate": 2e-05, "loss": 0.0370289, "step": 6911 }, { "epoch": 13.824, "grad_norm": 3.053663730621338, "learning_rate": 2e-05, "loss": 0.06167036, "step": 6912 }, { "epoch": 13.826, "grad_norm": 1.2635729312896729, "learning_rate": 2e-05, "loss": 0.05142916, "step": 6913 }, { "epoch": 13.828, "grad_norm": 2.6170880794525146, "learning_rate": 2e-05, "loss": 0.0427838, "step": 6914 }, { "epoch": 13.83, "grad_norm": 1.7016270160675049, "learning_rate": 2e-05, "loss": 0.02756583, "step": 6915 }, { "epoch": 13.832, "grad_norm": 1.1118059158325195, "learning_rate": 2e-05, "loss": 0.04283454, "step": 6916 }, { "epoch": 13.834, "grad_norm": 1.7809730768203735, "learning_rate": 2e-05, "loss": 0.04107636, "step": 6917 }, { "epoch": 13.836, "grad_norm": 1.776057243347168, "learning_rate": 2e-05, "loss": 0.06522802, "step": 6918 }, { "epoch": 13.838, "grad_norm": 2.162116765975952, "learning_rate": 2e-05, "loss": 0.0393892, "step": 6919 }, { "epoch": 13.84, "grad_norm": 1.280896544456482, "learning_rate": 2e-05, "loss": 0.05017384, "step": 6920 }, { "epoch": 13.842, "grad_norm": 1.4492366313934326, "learning_rate": 2e-05, "loss": 0.04526861, "step": 6921 }, { "epoch": 13.844, "grad_norm": 1.8244743347167969, "learning_rate": 2e-05, "loss": 0.06141206, "step": 6922 }, { "epoch": 13.846, "grad_norm": 1.2145180702209473, "learning_rate": 2e-05, "loss": 0.03708153, "step": 6923 }, { "epoch": 13.848, "grad_norm": 1.1694141626358032, "learning_rate": 2e-05, "loss": 0.03655352, "step": 6924 }, { "epoch": 13.85, "grad_norm": 2.419154644012451, "learning_rate": 2e-05, "loss": 0.06464878, "step": 6925 }, { "epoch": 13.852, "grad_norm": 1.4023338556289673, "learning_rate": 2e-05, "loss": 0.03684681, "step": 6926 }, { "epoch": 13.854, "grad_norm": 1.3352280855178833, "learning_rate": 2e-05, "loss": 0.03953333, "step": 6927 }, { "epoch": 13.856, "grad_norm": 0.8786143660545349, "learning_rate": 2e-05, "loss": 0.0229125, "step": 6928 }, { "epoch": 13.858, "grad_norm": 1.3116097450256348, "learning_rate": 2e-05, "loss": 0.04031592, "step": 6929 }, { "epoch": 13.86, "grad_norm": 1.9184274673461914, "learning_rate": 2e-05, "loss": 0.0427547, "step": 6930 }, { "epoch": 13.862, "grad_norm": 1.721928596496582, "learning_rate": 2e-05, "loss": 0.06148085, "step": 6931 }, { "epoch": 13.864, "grad_norm": 1.0398845672607422, "learning_rate": 2e-05, "loss": 0.0353225, "step": 6932 }, { "epoch": 13.866, "grad_norm": 0.929514467716217, "learning_rate": 2e-05, "loss": 0.02477574, "step": 6933 }, { "epoch": 13.868, "grad_norm": 1.3540958166122437, "learning_rate": 2e-05, "loss": 0.03898114, "step": 6934 }, { "epoch": 13.87, "grad_norm": 1.7967790365219116, "learning_rate": 2e-05, "loss": 0.04478693, "step": 6935 }, { "epoch": 13.872, "grad_norm": 1.5192323923110962, "learning_rate": 2e-05, "loss": 0.04696514, "step": 6936 }, { "epoch": 13.874, "grad_norm": 1.3892743587493896, "learning_rate": 2e-05, "loss": 0.04166014, "step": 6937 }, { "epoch": 13.876, "grad_norm": 1.4207967519760132, "learning_rate": 2e-05, "loss": 0.03712838, "step": 6938 }, { "epoch": 13.878, "grad_norm": 1.5932114124298096, "learning_rate": 2e-05, "loss": 0.03432607, "step": 6939 }, { "epoch": 13.88, "grad_norm": 1.9691510200500488, "learning_rate": 2e-05, "loss": 0.03547703, "step": 6940 }, { "epoch": 13.882, "grad_norm": 1.0378472805023193, "learning_rate": 2e-05, "loss": 0.02458699, "step": 6941 }, { "epoch": 13.884, "grad_norm": 2.503615140914917, "learning_rate": 2e-05, "loss": 0.04526497, "step": 6942 }, { "epoch": 13.886, "grad_norm": 1.3788117170333862, "learning_rate": 2e-05, "loss": 0.04195864, "step": 6943 }, { "epoch": 13.888, "grad_norm": 1.6336097717285156, "learning_rate": 2e-05, "loss": 0.03892026, "step": 6944 }, { "epoch": 13.89, "grad_norm": 1.337511420249939, "learning_rate": 2e-05, "loss": 0.03862485, "step": 6945 }, { "epoch": 13.892, "grad_norm": 1.2629504203796387, "learning_rate": 2e-05, "loss": 0.03670635, "step": 6946 }, { "epoch": 13.894, "grad_norm": 1.307521104812622, "learning_rate": 2e-05, "loss": 0.04121678, "step": 6947 }, { "epoch": 13.896, "grad_norm": 1.2313232421875, "learning_rate": 2e-05, "loss": 0.02896846, "step": 6948 }, { "epoch": 13.898, "grad_norm": 1.50730299949646, "learning_rate": 2e-05, "loss": 0.04983806, "step": 6949 }, { "epoch": 13.9, "grad_norm": 1.2840025424957275, "learning_rate": 2e-05, "loss": 0.03766822, "step": 6950 }, { "epoch": 13.902, "grad_norm": 1.5729337930679321, "learning_rate": 2e-05, "loss": 0.04883525, "step": 6951 }, { "epoch": 13.904, "grad_norm": 1.3678689002990723, "learning_rate": 2e-05, "loss": 0.02985792, "step": 6952 }, { "epoch": 13.906, "grad_norm": 1.845995545387268, "learning_rate": 2e-05, "loss": 0.03586386, "step": 6953 }, { "epoch": 13.908, "grad_norm": 1.7878332138061523, "learning_rate": 2e-05, "loss": 0.03654428, "step": 6954 }, { "epoch": 13.91, "grad_norm": 3.085512161254883, "learning_rate": 2e-05, "loss": 0.05120496, "step": 6955 }, { "epoch": 13.912, "grad_norm": 3.0082015991210938, "learning_rate": 2e-05, "loss": 0.05480274, "step": 6956 }, { "epoch": 13.914, "grad_norm": 1.2339752912521362, "learning_rate": 2e-05, "loss": 0.03606438, "step": 6957 }, { "epoch": 13.916, "grad_norm": 1.9148188829421997, "learning_rate": 2e-05, "loss": 0.04195002, "step": 6958 }, { "epoch": 13.918, "grad_norm": 1.1235597133636475, "learning_rate": 2e-05, "loss": 0.0217904, "step": 6959 }, { "epoch": 13.92, "grad_norm": 2.466542959213257, "learning_rate": 2e-05, "loss": 0.06078681, "step": 6960 }, { "epoch": 13.922, "grad_norm": 1.385965347290039, "learning_rate": 2e-05, "loss": 0.02699665, "step": 6961 }, { "epoch": 13.924, "grad_norm": 1.464664340019226, "learning_rate": 2e-05, "loss": 0.03695335, "step": 6962 }, { "epoch": 13.926, "grad_norm": 1.523497462272644, "learning_rate": 2e-05, "loss": 0.04739627, "step": 6963 }, { "epoch": 13.928, "grad_norm": 2.7052083015441895, "learning_rate": 2e-05, "loss": 0.05714166, "step": 6964 }, { "epoch": 13.93, "grad_norm": 1.4660812616348267, "learning_rate": 2e-05, "loss": 0.03287406, "step": 6965 }, { "epoch": 13.932, "grad_norm": 1.166961669921875, "learning_rate": 2e-05, "loss": 0.03073209, "step": 6966 }, { "epoch": 13.934, "grad_norm": 2.519378423690796, "learning_rate": 2e-05, "loss": 0.06768637, "step": 6967 }, { "epoch": 13.936, "grad_norm": 1.424590826034546, "learning_rate": 2e-05, "loss": 0.03274088, "step": 6968 }, { "epoch": 13.938, "grad_norm": 1.7873649597167969, "learning_rate": 2e-05, "loss": 0.0431348, "step": 6969 }, { "epoch": 13.94, "grad_norm": 1.6397364139556885, "learning_rate": 2e-05, "loss": 0.05216152, "step": 6970 }, { "epoch": 13.942, "grad_norm": 1.9869099855422974, "learning_rate": 2e-05, "loss": 0.042454, "step": 6971 }, { "epoch": 13.943999999999999, "grad_norm": 1.582697868347168, "learning_rate": 2e-05, "loss": 0.04447088, "step": 6972 }, { "epoch": 13.946, "grad_norm": 1.1062511205673218, "learning_rate": 2e-05, "loss": 0.03642577, "step": 6973 }, { "epoch": 13.948, "grad_norm": 1.2900327444076538, "learning_rate": 2e-05, "loss": 0.04951657, "step": 6974 }, { "epoch": 13.95, "grad_norm": 1.1730434894561768, "learning_rate": 2e-05, "loss": 0.03196269, "step": 6975 }, { "epoch": 13.952, "grad_norm": 1.5335745811462402, "learning_rate": 2e-05, "loss": 0.05588776, "step": 6976 }, { "epoch": 13.954, "grad_norm": 2.73171329498291, "learning_rate": 2e-05, "loss": 0.0453019, "step": 6977 }, { "epoch": 13.956, "grad_norm": 1.3756558895111084, "learning_rate": 2e-05, "loss": 0.04042321, "step": 6978 }, { "epoch": 13.958, "grad_norm": 1.025495171546936, "learning_rate": 2e-05, "loss": 0.03254474, "step": 6979 }, { "epoch": 13.96, "grad_norm": 0.8832976818084717, "learning_rate": 2e-05, "loss": 0.0262373, "step": 6980 }, { "epoch": 13.962, "grad_norm": 1.3270471096038818, "learning_rate": 2e-05, "loss": 0.03066928, "step": 6981 }, { "epoch": 13.964, "grad_norm": 1.8798532485961914, "learning_rate": 2e-05, "loss": 0.03866819, "step": 6982 }, { "epoch": 13.966, "grad_norm": 1.69099760055542, "learning_rate": 2e-05, "loss": 0.05059724, "step": 6983 }, { "epoch": 13.968, "grad_norm": 1.9903326034545898, "learning_rate": 2e-05, "loss": 0.05271498, "step": 6984 }, { "epoch": 13.97, "grad_norm": 1.4454317092895508, "learning_rate": 2e-05, "loss": 0.03909637, "step": 6985 }, { "epoch": 13.972, "grad_norm": 1.4461326599121094, "learning_rate": 2e-05, "loss": 0.04804809, "step": 6986 }, { "epoch": 13.974, "grad_norm": 1.59605872631073, "learning_rate": 2e-05, "loss": 0.04185, "step": 6987 }, { "epoch": 13.975999999999999, "grad_norm": 1.0969065427780151, "learning_rate": 2e-05, "loss": 0.03130201, "step": 6988 }, { "epoch": 13.978, "grad_norm": 1.317733645439148, "learning_rate": 2e-05, "loss": 0.03786597, "step": 6989 }, { "epoch": 13.98, "grad_norm": 1.1903131008148193, "learning_rate": 2e-05, "loss": 0.03440776, "step": 6990 }, { "epoch": 13.982, "grad_norm": 1.5716062784194946, "learning_rate": 2e-05, "loss": 0.03851765, "step": 6991 }, { "epoch": 13.984, "grad_norm": 1.9664435386657715, "learning_rate": 2e-05, "loss": 0.05616831, "step": 6992 }, { "epoch": 13.986, "grad_norm": 2.0219123363494873, "learning_rate": 2e-05, "loss": 0.04097972, "step": 6993 }, { "epoch": 13.988, "grad_norm": 1.4964755773544312, "learning_rate": 2e-05, "loss": 0.05085189, "step": 6994 }, { "epoch": 13.99, "grad_norm": 1.520508885383606, "learning_rate": 2e-05, "loss": 0.03831444, "step": 6995 }, { "epoch": 13.992, "grad_norm": 1.3518579006195068, "learning_rate": 2e-05, "loss": 0.0365081, "step": 6996 }, { "epoch": 13.994, "grad_norm": 2.745910882949829, "learning_rate": 2e-05, "loss": 0.03853264, "step": 6997 }, { "epoch": 13.996, "grad_norm": 1.2294520139694214, "learning_rate": 2e-05, "loss": 0.0548241, "step": 6998 }, { "epoch": 13.998, "grad_norm": 1.276281714439392, "learning_rate": 2e-05, "loss": 0.04351184, "step": 6999 }, { "epoch": 14.0, "grad_norm": 1.2956624031066895, "learning_rate": 2e-05, "loss": 0.04201851, "step": 7000 }, { "epoch": 14.0, "eval_performance": { "AngleClassification_1": 0.99, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9600798403193613, "Equal_1": 0.996, "Equal_2": 0.9720558882235529, "Equal_3": 0.9001996007984032, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 0.994, "Perpendicular_2": 0.976, "Perpendicular_3": 0.7324649298597194, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9916, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9859719438877755, "PointLiesOnLine_3": 0.9720558882235529 }, "eval_runtime": 318.9857, "eval_samples_per_second": 32.917, "eval_steps_per_second": 0.658, "step": 7000 }, { "epoch": 14.002, "grad_norm": 1.2843459844589233, "learning_rate": 2e-05, "loss": 0.03288523, "step": 7001 }, { "epoch": 14.004, "grad_norm": 1.4108099937438965, "learning_rate": 2e-05, "loss": 0.04203776, "step": 7002 }, { "epoch": 14.006, "grad_norm": 1.3905504941940308, "learning_rate": 2e-05, "loss": 0.03418431, "step": 7003 }, { "epoch": 14.008, "grad_norm": 1.3126283884048462, "learning_rate": 2e-05, "loss": 0.03478189, "step": 7004 }, { "epoch": 14.01, "grad_norm": 1.513098120689392, "learning_rate": 2e-05, "loss": 0.02606986, "step": 7005 }, { "epoch": 14.012, "grad_norm": 0.7904066443443298, "learning_rate": 2e-05, "loss": 0.01787382, "step": 7006 }, { "epoch": 14.014, "grad_norm": 3.054617166519165, "learning_rate": 2e-05, "loss": 0.04634954, "step": 7007 }, { "epoch": 14.016, "grad_norm": 1.0060993432998657, "learning_rate": 2e-05, "loss": 0.03051211, "step": 7008 }, { "epoch": 14.018, "grad_norm": 1.1262900829315186, "learning_rate": 2e-05, "loss": 0.04555785, "step": 7009 }, { "epoch": 14.02, "grad_norm": 1.0693005323410034, "learning_rate": 2e-05, "loss": 0.02980948, "step": 7010 }, { "epoch": 14.022, "grad_norm": 2.0782830715179443, "learning_rate": 2e-05, "loss": 0.04862558, "step": 7011 }, { "epoch": 14.024, "grad_norm": 1.079984188079834, "learning_rate": 2e-05, "loss": 0.03432882, "step": 7012 }, { "epoch": 14.026, "grad_norm": 1.2360132932662964, "learning_rate": 2e-05, "loss": 0.02996394, "step": 7013 }, { "epoch": 14.028, "grad_norm": 1.3257410526275635, "learning_rate": 2e-05, "loss": 0.04203, "step": 7014 }, { "epoch": 14.03, "grad_norm": 1.318507194519043, "learning_rate": 2e-05, "loss": 0.04230007, "step": 7015 }, { "epoch": 14.032, "grad_norm": 1.3276084661483765, "learning_rate": 2e-05, "loss": 0.03894487, "step": 7016 }, { "epoch": 14.034, "grad_norm": 3.99560809135437, "learning_rate": 2e-05, "loss": 0.04019061, "step": 7017 }, { "epoch": 14.036, "grad_norm": 0.97989422082901, "learning_rate": 2e-05, "loss": 0.03579887, "step": 7018 }, { "epoch": 14.038, "grad_norm": 3.5206971168518066, "learning_rate": 2e-05, "loss": 0.04952036, "step": 7019 }, { "epoch": 14.04, "grad_norm": 1.5767422914505005, "learning_rate": 2e-05, "loss": 0.04102598, "step": 7020 }, { "epoch": 14.042, "grad_norm": 2.001638412475586, "learning_rate": 2e-05, "loss": 0.04028576, "step": 7021 }, { "epoch": 14.044, "grad_norm": 1.0773245096206665, "learning_rate": 2e-05, "loss": 0.04221611, "step": 7022 }, { "epoch": 14.046, "grad_norm": 1.02744460105896, "learning_rate": 2e-05, "loss": 0.02617998, "step": 7023 }, { "epoch": 14.048, "grad_norm": 1.0974265336990356, "learning_rate": 2e-05, "loss": 0.02758575, "step": 7024 }, { "epoch": 14.05, "grad_norm": 1.1479986906051636, "learning_rate": 2e-05, "loss": 0.03531769, "step": 7025 }, { "epoch": 14.052, "grad_norm": 1.6043879985809326, "learning_rate": 2e-05, "loss": 0.04604035, "step": 7026 }, { "epoch": 14.054, "grad_norm": 1.2420005798339844, "learning_rate": 2e-05, "loss": 0.04582789, "step": 7027 }, { "epoch": 14.056, "grad_norm": 2.2049918174743652, "learning_rate": 2e-05, "loss": 0.05222199, "step": 7028 }, { "epoch": 14.058, "grad_norm": 2.92496395111084, "learning_rate": 2e-05, "loss": 0.04773784, "step": 7029 }, { "epoch": 14.06, "grad_norm": 1.3038673400878906, "learning_rate": 2e-05, "loss": 0.0424346, "step": 7030 }, { "epoch": 14.062, "grad_norm": 1.3948389291763306, "learning_rate": 2e-05, "loss": 0.0277782, "step": 7031 }, { "epoch": 14.064, "grad_norm": 1.42351233959198, "learning_rate": 2e-05, "loss": 0.04358821, "step": 7032 }, { "epoch": 14.066, "grad_norm": 1.726454257965088, "learning_rate": 2e-05, "loss": 0.050067, "step": 7033 }, { "epoch": 14.068, "grad_norm": 1.3726768493652344, "learning_rate": 2e-05, "loss": 0.04407897, "step": 7034 }, { "epoch": 14.07, "grad_norm": 1.9279018640518188, "learning_rate": 2e-05, "loss": 0.0469468, "step": 7035 }, { "epoch": 14.072, "grad_norm": 1.3610163927078247, "learning_rate": 2e-05, "loss": 0.0371495, "step": 7036 }, { "epoch": 14.074, "grad_norm": 2.023139238357544, "learning_rate": 2e-05, "loss": 0.05333347, "step": 7037 }, { "epoch": 14.076, "grad_norm": 1.167094349861145, "learning_rate": 2e-05, "loss": 0.03193357, "step": 7038 }, { "epoch": 14.078, "grad_norm": 1.3578746318817139, "learning_rate": 2e-05, "loss": 0.04010125, "step": 7039 }, { "epoch": 14.08, "grad_norm": 2.4089791774749756, "learning_rate": 2e-05, "loss": 0.05072846, "step": 7040 }, { "epoch": 14.082, "grad_norm": 1.022179365158081, "learning_rate": 2e-05, "loss": 0.03089194, "step": 7041 }, { "epoch": 14.084, "grad_norm": 1.3668341636657715, "learning_rate": 2e-05, "loss": 0.03569974, "step": 7042 }, { "epoch": 14.086, "grad_norm": 1.1925535202026367, "learning_rate": 2e-05, "loss": 0.03366784, "step": 7043 }, { "epoch": 14.088, "grad_norm": 1.3144326210021973, "learning_rate": 2e-05, "loss": 0.03443348, "step": 7044 }, { "epoch": 14.09, "grad_norm": 1.794022560119629, "learning_rate": 2e-05, "loss": 0.04681649, "step": 7045 }, { "epoch": 14.092, "grad_norm": 1.2918729782104492, "learning_rate": 2e-05, "loss": 0.04749175, "step": 7046 }, { "epoch": 14.094, "grad_norm": 1.18765389919281, "learning_rate": 2e-05, "loss": 0.03726712, "step": 7047 }, { "epoch": 14.096, "grad_norm": 1.0554254055023193, "learning_rate": 2e-05, "loss": 0.03222162, "step": 7048 }, { "epoch": 14.098, "grad_norm": 0.9862664937973022, "learning_rate": 2e-05, "loss": 0.02374917, "step": 7049 }, { "epoch": 14.1, "grad_norm": 1.021124005317688, "learning_rate": 2e-05, "loss": 0.02776304, "step": 7050 }, { "epoch": 14.102, "grad_norm": 1.7811987400054932, "learning_rate": 2e-05, "loss": 0.04543906, "step": 7051 }, { "epoch": 14.104, "grad_norm": 1.463122844696045, "learning_rate": 2e-05, "loss": 0.02648041, "step": 7052 }, { "epoch": 14.106, "grad_norm": 0.7668428421020508, "learning_rate": 2e-05, "loss": 0.02454238, "step": 7053 }, { "epoch": 14.108, "grad_norm": 1.272748589515686, "learning_rate": 2e-05, "loss": 0.0323597, "step": 7054 }, { "epoch": 14.11, "grad_norm": 1.0245872735977173, "learning_rate": 2e-05, "loss": 0.03127266, "step": 7055 }, { "epoch": 14.112, "grad_norm": 1.9159295558929443, "learning_rate": 2e-05, "loss": 0.04461393, "step": 7056 }, { "epoch": 14.114, "grad_norm": 1.5680136680603027, "learning_rate": 2e-05, "loss": 0.04500045, "step": 7057 }, { "epoch": 14.116, "grad_norm": 1.5329288244247437, "learning_rate": 2e-05, "loss": 0.03792113, "step": 7058 }, { "epoch": 14.118, "grad_norm": 1.3381918668746948, "learning_rate": 2e-05, "loss": 0.04293994, "step": 7059 }, { "epoch": 14.12, "grad_norm": 1.2890726327896118, "learning_rate": 2e-05, "loss": 0.04811954, "step": 7060 }, { "epoch": 14.122, "grad_norm": 1.3316044807434082, "learning_rate": 2e-05, "loss": 0.04357966, "step": 7061 }, { "epoch": 14.124, "grad_norm": 1.3575929403305054, "learning_rate": 2e-05, "loss": 0.03795036, "step": 7062 }, { "epoch": 14.126, "grad_norm": 1.5020532608032227, "learning_rate": 2e-05, "loss": 0.04436047, "step": 7063 }, { "epoch": 14.128, "grad_norm": 1.5422673225402832, "learning_rate": 2e-05, "loss": 0.04654428, "step": 7064 }, { "epoch": 14.13, "grad_norm": 2.029569149017334, "learning_rate": 2e-05, "loss": 0.04016326, "step": 7065 }, { "epoch": 14.132, "grad_norm": 1.814108967781067, "learning_rate": 2e-05, "loss": 0.04536037, "step": 7066 }, { "epoch": 14.134, "grad_norm": 1.3232728242874146, "learning_rate": 2e-05, "loss": 0.02865727, "step": 7067 }, { "epoch": 14.136, "grad_norm": 3.152207136154175, "learning_rate": 2e-05, "loss": 0.04350835, "step": 7068 }, { "epoch": 14.138, "grad_norm": 1.4238955974578857, "learning_rate": 2e-05, "loss": 0.03182974, "step": 7069 }, { "epoch": 14.14, "grad_norm": 1.6935443878173828, "learning_rate": 2e-05, "loss": 0.05719973, "step": 7070 }, { "epoch": 14.142, "grad_norm": 1.262613296508789, "learning_rate": 2e-05, "loss": 0.04014334, "step": 7071 }, { "epoch": 14.144, "grad_norm": 1.5854763984680176, "learning_rate": 2e-05, "loss": 0.05335325, "step": 7072 }, { "epoch": 14.146, "grad_norm": 0.964347779750824, "learning_rate": 2e-05, "loss": 0.02348195, "step": 7073 }, { "epoch": 14.148, "grad_norm": 1.284382700920105, "learning_rate": 2e-05, "loss": 0.04065283, "step": 7074 }, { "epoch": 14.15, "grad_norm": 1.5653077363967896, "learning_rate": 2e-05, "loss": 0.06096688, "step": 7075 }, { "epoch": 14.152, "grad_norm": 1.2976857423782349, "learning_rate": 2e-05, "loss": 0.04557998, "step": 7076 }, { "epoch": 14.154, "grad_norm": 1.6498374938964844, "learning_rate": 2e-05, "loss": 0.04869898, "step": 7077 }, { "epoch": 14.156, "grad_norm": 1.2892625331878662, "learning_rate": 2e-05, "loss": 0.03722683, "step": 7078 }, { "epoch": 14.158, "grad_norm": 1.2497565746307373, "learning_rate": 2e-05, "loss": 0.04251852, "step": 7079 }, { "epoch": 14.16, "grad_norm": 1.8106011152267456, "learning_rate": 2e-05, "loss": 0.02925546, "step": 7080 }, { "epoch": 14.162, "grad_norm": 1.6762701272964478, "learning_rate": 2e-05, "loss": 0.06018007, "step": 7081 }, { "epoch": 14.164, "grad_norm": 1.2910103797912598, "learning_rate": 2e-05, "loss": 0.04167475, "step": 7082 }, { "epoch": 14.166, "grad_norm": 1.5248066186904907, "learning_rate": 2e-05, "loss": 0.04084751, "step": 7083 }, { "epoch": 14.168, "grad_norm": 1.5304296016693115, "learning_rate": 2e-05, "loss": 0.04385792, "step": 7084 }, { "epoch": 14.17, "grad_norm": 1.8973009586334229, "learning_rate": 2e-05, "loss": 0.04529943, "step": 7085 }, { "epoch": 14.172, "grad_norm": 1.4018219709396362, "learning_rate": 2e-05, "loss": 0.03316261, "step": 7086 }, { "epoch": 14.174, "grad_norm": 2.388532876968384, "learning_rate": 2e-05, "loss": 0.04198411, "step": 7087 }, { "epoch": 14.176, "grad_norm": 1.7253050804138184, "learning_rate": 2e-05, "loss": 0.03549905, "step": 7088 }, { "epoch": 14.178, "grad_norm": 1.5690933465957642, "learning_rate": 2e-05, "loss": 0.043695, "step": 7089 }, { "epoch": 14.18, "grad_norm": 1.1658984422683716, "learning_rate": 2e-05, "loss": 0.03999247, "step": 7090 }, { "epoch": 14.182, "grad_norm": 1.1882442235946655, "learning_rate": 2e-05, "loss": 0.04268787, "step": 7091 }, { "epoch": 14.184, "grad_norm": 1.894171118736267, "learning_rate": 2e-05, "loss": 0.05581329, "step": 7092 }, { "epoch": 14.186, "grad_norm": 1.7583495378494263, "learning_rate": 2e-05, "loss": 0.05542745, "step": 7093 }, { "epoch": 14.188, "grad_norm": 1.0371955633163452, "learning_rate": 2e-05, "loss": 0.03195368, "step": 7094 }, { "epoch": 14.19, "grad_norm": 1.2478240728378296, "learning_rate": 2e-05, "loss": 0.0411337, "step": 7095 }, { "epoch": 14.192, "grad_norm": 1.3688163757324219, "learning_rate": 2e-05, "loss": 0.04424831, "step": 7096 }, { "epoch": 14.194, "grad_norm": 1.3419855833053589, "learning_rate": 2e-05, "loss": 0.03607039, "step": 7097 }, { "epoch": 14.196, "grad_norm": 1.9281373023986816, "learning_rate": 2e-05, "loss": 0.05275191, "step": 7098 }, { "epoch": 14.198, "grad_norm": 1.3586539030075073, "learning_rate": 2e-05, "loss": 0.03638598, "step": 7099 }, { "epoch": 14.2, "grad_norm": 1.104002833366394, "learning_rate": 2e-05, "loss": 0.03526532, "step": 7100 }, { "epoch": 14.202, "grad_norm": 1.1839067935943604, "learning_rate": 2e-05, "loss": 0.03894392, "step": 7101 }, { "epoch": 14.204, "grad_norm": 1.7897748947143555, "learning_rate": 2e-05, "loss": 0.0426039, "step": 7102 }, { "epoch": 14.206, "grad_norm": 0.8262537717819214, "learning_rate": 2e-05, "loss": 0.02584026, "step": 7103 }, { "epoch": 14.208, "grad_norm": 1.3617100715637207, "learning_rate": 2e-05, "loss": 0.03045266, "step": 7104 }, { "epoch": 14.21, "grad_norm": 1.8911073207855225, "learning_rate": 2e-05, "loss": 0.04614431, "step": 7105 }, { "epoch": 14.212, "grad_norm": 2.970533847808838, "learning_rate": 2e-05, "loss": 0.05797109, "step": 7106 }, { "epoch": 14.214, "grad_norm": 1.425166368484497, "learning_rate": 2e-05, "loss": 0.03592297, "step": 7107 }, { "epoch": 14.216, "grad_norm": 1.0085773468017578, "learning_rate": 2e-05, "loss": 0.03424015, "step": 7108 }, { "epoch": 14.218, "grad_norm": 1.4457972049713135, "learning_rate": 2e-05, "loss": 0.0365461, "step": 7109 }, { "epoch": 14.22, "grad_norm": 1.1994272470474243, "learning_rate": 2e-05, "loss": 0.02875265, "step": 7110 }, { "epoch": 14.222, "grad_norm": 0.986005425453186, "learning_rate": 2e-05, "loss": 0.03896586, "step": 7111 }, { "epoch": 14.224, "grad_norm": 1.2924270629882812, "learning_rate": 2e-05, "loss": 0.03565193, "step": 7112 }, { "epoch": 14.226, "grad_norm": 1.6049319505691528, "learning_rate": 2e-05, "loss": 0.0383412, "step": 7113 }, { "epoch": 14.228, "grad_norm": 1.1894758939743042, "learning_rate": 2e-05, "loss": 0.03838333, "step": 7114 }, { "epoch": 14.23, "grad_norm": 2.1524245738983154, "learning_rate": 2e-05, "loss": 0.05140578, "step": 7115 }, { "epoch": 14.232, "grad_norm": 1.6528891324996948, "learning_rate": 2e-05, "loss": 0.05099215, "step": 7116 }, { "epoch": 14.234, "grad_norm": 1.2502809762954712, "learning_rate": 2e-05, "loss": 0.04991131, "step": 7117 }, { "epoch": 14.236, "grad_norm": 2.5262179374694824, "learning_rate": 2e-05, "loss": 0.03753746, "step": 7118 }, { "epoch": 14.238, "grad_norm": 1.5351922512054443, "learning_rate": 2e-05, "loss": 0.02967945, "step": 7119 }, { "epoch": 14.24, "grad_norm": 0.9251724481582642, "learning_rate": 2e-05, "loss": 0.02450234, "step": 7120 }, { "epoch": 14.242, "grad_norm": 1.6973142623901367, "learning_rate": 2e-05, "loss": 0.0343626, "step": 7121 }, { "epoch": 14.244, "grad_norm": 0.960382878780365, "learning_rate": 2e-05, "loss": 0.02384898, "step": 7122 }, { "epoch": 14.246, "grad_norm": 4.224244117736816, "learning_rate": 2e-05, "loss": 0.03810407, "step": 7123 }, { "epoch": 14.248, "grad_norm": 1.0069541931152344, "learning_rate": 2e-05, "loss": 0.03257271, "step": 7124 }, { "epoch": 14.25, "grad_norm": 1.2396907806396484, "learning_rate": 2e-05, "loss": 0.03927642, "step": 7125 }, { "epoch": 14.252, "grad_norm": 1.199365258216858, "learning_rate": 2e-05, "loss": 0.04600928, "step": 7126 }, { "epoch": 14.254, "grad_norm": 1.4900243282318115, "learning_rate": 2e-05, "loss": 0.03931838, "step": 7127 }, { "epoch": 14.256, "grad_norm": 1.528289556503296, "learning_rate": 2e-05, "loss": 0.02803968, "step": 7128 }, { "epoch": 14.258, "grad_norm": 1.347772240638733, "learning_rate": 2e-05, "loss": 0.03262594, "step": 7129 }, { "epoch": 14.26, "grad_norm": 2.532207489013672, "learning_rate": 2e-05, "loss": 0.05416807, "step": 7130 }, { "epoch": 14.262, "grad_norm": 1.2191015481948853, "learning_rate": 2e-05, "loss": 0.03117691, "step": 7131 }, { "epoch": 14.264, "grad_norm": 2.1217501163482666, "learning_rate": 2e-05, "loss": 0.03810145, "step": 7132 }, { "epoch": 14.266, "grad_norm": 1.506582498550415, "learning_rate": 2e-05, "loss": 0.0460251, "step": 7133 }, { "epoch": 14.268, "grad_norm": 1.2872868776321411, "learning_rate": 2e-05, "loss": 0.04550285, "step": 7134 }, { "epoch": 14.27, "grad_norm": 1.8632010221481323, "learning_rate": 2e-05, "loss": 0.04599176, "step": 7135 }, { "epoch": 14.272, "grad_norm": 1.4420409202575684, "learning_rate": 2e-05, "loss": 0.04499146, "step": 7136 }, { "epoch": 14.274000000000001, "grad_norm": 2.0591557025909424, "learning_rate": 2e-05, "loss": 0.04166877, "step": 7137 }, { "epoch": 14.276, "grad_norm": 2.0912609100341797, "learning_rate": 2e-05, "loss": 0.05226006, "step": 7138 }, { "epoch": 14.278, "grad_norm": 1.5423698425292969, "learning_rate": 2e-05, "loss": 0.03722187, "step": 7139 }, { "epoch": 14.28, "grad_norm": 2.2733895778656006, "learning_rate": 2e-05, "loss": 0.04837454, "step": 7140 }, { "epoch": 14.282, "grad_norm": 1.218680739402771, "learning_rate": 2e-05, "loss": 0.03407823, "step": 7141 }, { "epoch": 14.284, "grad_norm": 1.3564642667770386, "learning_rate": 2e-05, "loss": 0.04712793, "step": 7142 }, { "epoch": 14.286, "grad_norm": 1.7670422792434692, "learning_rate": 2e-05, "loss": 0.04199456, "step": 7143 }, { "epoch": 14.288, "grad_norm": 1.2026097774505615, "learning_rate": 2e-05, "loss": 0.03444853, "step": 7144 }, { "epoch": 14.29, "grad_norm": 1.443743348121643, "learning_rate": 2e-05, "loss": 0.04106067, "step": 7145 }, { "epoch": 14.292, "grad_norm": 1.2027446031570435, "learning_rate": 2e-05, "loss": 0.03707718, "step": 7146 }, { "epoch": 14.294, "grad_norm": 1.0716592073440552, "learning_rate": 2e-05, "loss": 0.04112831, "step": 7147 }, { "epoch": 14.296, "grad_norm": 1.7344224452972412, "learning_rate": 2e-05, "loss": 0.03029624, "step": 7148 }, { "epoch": 14.298, "grad_norm": 2.744713068008423, "learning_rate": 2e-05, "loss": 0.03705482, "step": 7149 }, { "epoch": 14.3, "grad_norm": 1.1636744737625122, "learning_rate": 2e-05, "loss": 0.0361629, "step": 7150 }, { "epoch": 14.302, "grad_norm": 1.6198397874832153, "learning_rate": 2e-05, "loss": 0.05550705, "step": 7151 }, { "epoch": 14.304, "grad_norm": 1.4462108612060547, "learning_rate": 2e-05, "loss": 0.03110327, "step": 7152 }, { "epoch": 14.306, "grad_norm": 2.7012014389038086, "learning_rate": 2e-05, "loss": 0.05303191, "step": 7153 }, { "epoch": 14.308, "grad_norm": 0.8201614022254944, "learning_rate": 2e-05, "loss": 0.02300132, "step": 7154 }, { "epoch": 14.31, "grad_norm": 1.4760310649871826, "learning_rate": 2e-05, "loss": 0.04489978, "step": 7155 }, { "epoch": 14.312, "grad_norm": 1.3183153867721558, "learning_rate": 2e-05, "loss": 0.04012213, "step": 7156 }, { "epoch": 14.314, "grad_norm": 1.5775775909423828, "learning_rate": 2e-05, "loss": 0.04109515, "step": 7157 }, { "epoch": 14.316, "grad_norm": 1.6039639711380005, "learning_rate": 2e-05, "loss": 0.04469662, "step": 7158 }, { "epoch": 14.318, "grad_norm": 1.8237558603286743, "learning_rate": 2e-05, "loss": 0.05006624, "step": 7159 }, { "epoch": 14.32, "grad_norm": 1.929965615272522, "learning_rate": 2e-05, "loss": 0.05642921, "step": 7160 }, { "epoch": 14.322, "grad_norm": 1.932225227355957, "learning_rate": 2e-05, "loss": 0.05633236, "step": 7161 }, { "epoch": 14.324, "grad_norm": 1.1771067380905151, "learning_rate": 2e-05, "loss": 0.03297538, "step": 7162 }, { "epoch": 14.326, "grad_norm": 1.7193629741668701, "learning_rate": 2e-05, "loss": 0.04758242, "step": 7163 }, { "epoch": 14.328, "grad_norm": 1.122786521911621, "learning_rate": 2e-05, "loss": 0.04002685, "step": 7164 }, { "epoch": 14.33, "grad_norm": 1.0502139329910278, "learning_rate": 2e-05, "loss": 0.03614987, "step": 7165 }, { "epoch": 14.332, "grad_norm": 1.1357371807098389, "learning_rate": 2e-05, "loss": 0.03864967, "step": 7166 }, { "epoch": 14.334, "grad_norm": 1.074387788772583, "learning_rate": 2e-05, "loss": 0.03561043, "step": 7167 }, { "epoch": 14.336, "grad_norm": 1.5098060369491577, "learning_rate": 2e-05, "loss": 0.04273804, "step": 7168 }, { "epoch": 14.338, "grad_norm": 1.3217259645462036, "learning_rate": 2e-05, "loss": 0.05451931, "step": 7169 }, { "epoch": 14.34, "grad_norm": 1.2757598161697388, "learning_rate": 2e-05, "loss": 0.03788436, "step": 7170 }, { "epoch": 14.342, "grad_norm": 0.9583383798599243, "learning_rate": 2e-05, "loss": 0.03389092, "step": 7171 }, { "epoch": 14.344, "grad_norm": 1.8409156799316406, "learning_rate": 2e-05, "loss": 0.05510804, "step": 7172 }, { "epoch": 14.346, "grad_norm": 1.3510578870773315, "learning_rate": 2e-05, "loss": 0.03992224, "step": 7173 }, { "epoch": 14.348, "grad_norm": 1.52592933177948, "learning_rate": 2e-05, "loss": 0.04563883, "step": 7174 }, { "epoch": 14.35, "grad_norm": 1.946403980255127, "learning_rate": 2e-05, "loss": 0.03222995, "step": 7175 }, { "epoch": 14.352, "grad_norm": 1.9366340637207031, "learning_rate": 2e-05, "loss": 0.05663209, "step": 7176 }, { "epoch": 14.354, "grad_norm": 0.7667877078056335, "learning_rate": 2e-05, "loss": 0.01848971, "step": 7177 }, { "epoch": 14.356, "grad_norm": 2.2338192462921143, "learning_rate": 2e-05, "loss": 0.0433307, "step": 7178 }, { "epoch": 14.358, "grad_norm": 3.6205711364746094, "learning_rate": 2e-05, "loss": 0.0502264, "step": 7179 }, { "epoch": 14.36, "grad_norm": 1.8776448965072632, "learning_rate": 2e-05, "loss": 0.05654108, "step": 7180 }, { "epoch": 14.362, "grad_norm": 1.7132173776626587, "learning_rate": 2e-05, "loss": 0.03880975, "step": 7181 }, { "epoch": 14.364, "grad_norm": 1.5955792665481567, "learning_rate": 2e-05, "loss": 0.04336097, "step": 7182 }, { "epoch": 14.366, "grad_norm": 1.046798825263977, "learning_rate": 2e-05, "loss": 0.03609883, "step": 7183 }, { "epoch": 14.368, "grad_norm": 1.6806223392486572, "learning_rate": 2e-05, "loss": 0.04325953, "step": 7184 }, { "epoch": 14.37, "grad_norm": 1.8983646631240845, "learning_rate": 2e-05, "loss": 0.04521517, "step": 7185 }, { "epoch": 14.372, "grad_norm": 2.0524489879608154, "learning_rate": 2e-05, "loss": 0.03215559, "step": 7186 }, { "epoch": 14.374, "grad_norm": 1.1298598051071167, "learning_rate": 2e-05, "loss": 0.03601304, "step": 7187 }, { "epoch": 14.376, "grad_norm": 1.4891972541809082, "learning_rate": 2e-05, "loss": 0.03756123, "step": 7188 }, { "epoch": 14.378, "grad_norm": 1.3420482873916626, "learning_rate": 2e-05, "loss": 0.04583802, "step": 7189 }, { "epoch": 14.38, "grad_norm": 5.2002716064453125, "learning_rate": 2e-05, "loss": 0.04100659, "step": 7190 }, { "epoch": 14.382, "grad_norm": 1.079121470451355, "learning_rate": 2e-05, "loss": 0.03196656, "step": 7191 }, { "epoch": 14.384, "grad_norm": 2.1130754947662354, "learning_rate": 2e-05, "loss": 0.05068072, "step": 7192 }, { "epoch": 14.386, "grad_norm": 1.6315432786941528, "learning_rate": 2e-05, "loss": 0.03901272, "step": 7193 }, { "epoch": 14.388, "grad_norm": 1.7703206539154053, "learning_rate": 2e-05, "loss": 0.03575943, "step": 7194 }, { "epoch": 14.39, "grad_norm": 1.4890453815460205, "learning_rate": 2e-05, "loss": 0.03911097, "step": 7195 }, { "epoch": 14.392, "grad_norm": 1.2721261978149414, "learning_rate": 2e-05, "loss": 0.04248876, "step": 7196 }, { "epoch": 14.394, "grad_norm": 1.8303420543670654, "learning_rate": 2e-05, "loss": 0.05177449, "step": 7197 }, { "epoch": 14.396, "grad_norm": 1.322717308998108, "learning_rate": 2e-05, "loss": 0.02881128, "step": 7198 }, { "epoch": 14.398, "grad_norm": 1.5059839487075806, "learning_rate": 2e-05, "loss": 0.04383879, "step": 7199 }, { "epoch": 14.4, "grad_norm": 1.64779794216156, "learning_rate": 2e-05, "loss": 0.0525568, "step": 7200 }, { "epoch": 14.402, "grad_norm": 2.455418586730957, "learning_rate": 2e-05, "loss": 0.04642213, "step": 7201 }, { "epoch": 14.404, "grad_norm": 1.2725389003753662, "learning_rate": 2e-05, "loss": 0.03739651, "step": 7202 }, { "epoch": 14.406, "grad_norm": 1.4967567920684814, "learning_rate": 2e-05, "loss": 0.04378046, "step": 7203 }, { "epoch": 14.408, "grad_norm": 1.1510059833526611, "learning_rate": 2e-05, "loss": 0.03842362, "step": 7204 }, { "epoch": 14.41, "grad_norm": 1.4995372295379639, "learning_rate": 2e-05, "loss": 0.04412612, "step": 7205 }, { "epoch": 14.412, "grad_norm": 0.94809490442276, "learning_rate": 2e-05, "loss": 0.03201088, "step": 7206 }, { "epoch": 14.414, "grad_norm": 1.5775501728057861, "learning_rate": 2e-05, "loss": 0.03421389, "step": 7207 }, { "epoch": 14.416, "grad_norm": 3.60548996925354, "learning_rate": 2e-05, "loss": 0.0545315, "step": 7208 }, { "epoch": 14.418, "grad_norm": 1.0393273830413818, "learning_rate": 2e-05, "loss": 0.03813236, "step": 7209 }, { "epoch": 14.42, "grad_norm": 1.0074549913406372, "learning_rate": 2e-05, "loss": 0.02983594, "step": 7210 }, { "epoch": 14.422, "grad_norm": 1.203730583190918, "learning_rate": 2e-05, "loss": 0.04196813, "step": 7211 }, { "epoch": 14.424, "grad_norm": 1.2036176919937134, "learning_rate": 2e-05, "loss": 0.02699343, "step": 7212 }, { "epoch": 14.426, "grad_norm": 1.3977973461151123, "learning_rate": 2e-05, "loss": 0.02824752, "step": 7213 }, { "epoch": 14.428, "grad_norm": 1.019425392150879, "learning_rate": 2e-05, "loss": 0.03191173, "step": 7214 }, { "epoch": 14.43, "grad_norm": 0.8422673940658569, "learning_rate": 2e-05, "loss": 0.02386178, "step": 7215 }, { "epoch": 14.432, "grad_norm": 1.4867335557937622, "learning_rate": 2e-05, "loss": 0.03482441, "step": 7216 }, { "epoch": 14.434, "grad_norm": 2.1186928749084473, "learning_rate": 2e-05, "loss": 0.05948957, "step": 7217 }, { "epoch": 14.436, "grad_norm": 1.7550668716430664, "learning_rate": 2e-05, "loss": 0.04442699, "step": 7218 }, { "epoch": 14.438, "grad_norm": 1.0397560596466064, "learning_rate": 2e-05, "loss": 0.0290125, "step": 7219 }, { "epoch": 14.44, "grad_norm": 1.3454571962356567, "learning_rate": 2e-05, "loss": 0.03583448, "step": 7220 }, { "epoch": 14.442, "grad_norm": 1.2962967157363892, "learning_rate": 2e-05, "loss": 0.0369962, "step": 7221 }, { "epoch": 14.444, "grad_norm": 1.420045256614685, "learning_rate": 2e-05, "loss": 0.03899933, "step": 7222 }, { "epoch": 14.446, "grad_norm": 2.546292781829834, "learning_rate": 2e-05, "loss": 0.05628195, "step": 7223 }, { "epoch": 14.448, "grad_norm": 1.8872132301330566, "learning_rate": 2e-05, "loss": 0.03206664, "step": 7224 }, { "epoch": 14.45, "grad_norm": 0.9982779622077942, "learning_rate": 2e-05, "loss": 0.01746661, "step": 7225 }, { "epoch": 14.452, "grad_norm": 2.069234609603882, "learning_rate": 2e-05, "loss": 0.04197174, "step": 7226 }, { "epoch": 14.454, "grad_norm": 1.5738203525543213, "learning_rate": 2e-05, "loss": 0.04109071, "step": 7227 }, { "epoch": 14.456, "grad_norm": 1.1500931978225708, "learning_rate": 2e-05, "loss": 0.03633848, "step": 7228 }, { "epoch": 14.458, "grad_norm": 1.2482191324234009, "learning_rate": 2e-05, "loss": 0.04544804, "step": 7229 }, { "epoch": 14.46, "grad_norm": 1.5206077098846436, "learning_rate": 2e-05, "loss": 0.03966599, "step": 7230 }, { "epoch": 14.462, "grad_norm": 1.76610267162323, "learning_rate": 2e-05, "loss": 0.04585, "step": 7231 }, { "epoch": 14.464, "grad_norm": 1.5877853631973267, "learning_rate": 2e-05, "loss": 0.03790272, "step": 7232 }, { "epoch": 14.466, "grad_norm": 1.1064239740371704, "learning_rate": 2e-05, "loss": 0.03053769, "step": 7233 }, { "epoch": 14.468, "grad_norm": 1.0071042776107788, "learning_rate": 2e-05, "loss": 0.03742989, "step": 7234 }, { "epoch": 14.47, "grad_norm": 2.6944468021392822, "learning_rate": 2e-05, "loss": 0.05254611, "step": 7235 }, { "epoch": 14.472, "grad_norm": 1.4075301885604858, "learning_rate": 2e-05, "loss": 0.05554926, "step": 7236 }, { "epoch": 14.474, "grad_norm": 1.048970103263855, "learning_rate": 2e-05, "loss": 0.0344836, "step": 7237 }, { "epoch": 14.475999999999999, "grad_norm": 0.9587178826332092, "learning_rate": 2e-05, "loss": 0.03106603, "step": 7238 }, { "epoch": 14.478, "grad_norm": 1.0842715501785278, "learning_rate": 2e-05, "loss": 0.03258051, "step": 7239 }, { "epoch": 14.48, "grad_norm": 1.1892338991165161, "learning_rate": 2e-05, "loss": 0.04680737, "step": 7240 }, { "epoch": 14.482, "grad_norm": 1.856372594833374, "learning_rate": 2e-05, "loss": 0.05747771, "step": 7241 }, { "epoch": 14.484, "grad_norm": 1.1554561853408813, "learning_rate": 2e-05, "loss": 0.02923879, "step": 7242 }, { "epoch": 14.486, "grad_norm": 1.0455029010772705, "learning_rate": 2e-05, "loss": 0.03191773, "step": 7243 }, { "epoch": 14.488, "grad_norm": 2.2019166946411133, "learning_rate": 2e-05, "loss": 0.04987948, "step": 7244 }, { "epoch": 14.49, "grad_norm": 1.404489278793335, "learning_rate": 2e-05, "loss": 0.03941087, "step": 7245 }, { "epoch": 14.492, "grad_norm": 1.0887120962142944, "learning_rate": 2e-05, "loss": 0.03620033, "step": 7246 }, { "epoch": 14.494, "grad_norm": 1.727051019668579, "learning_rate": 2e-05, "loss": 0.03951932, "step": 7247 }, { "epoch": 14.496, "grad_norm": 1.1405435800552368, "learning_rate": 2e-05, "loss": 0.04180471, "step": 7248 }, { "epoch": 14.498, "grad_norm": 1.45590341091156, "learning_rate": 2e-05, "loss": 0.035485, "step": 7249 }, { "epoch": 14.5, "grad_norm": 1.9948844909667969, "learning_rate": 2e-05, "loss": 0.04214467, "step": 7250 }, { "epoch": 14.502, "grad_norm": 1.238334059715271, "learning_rate": 2e-05, "loss": 0.03087533, "step": 7251 }, { "epoch": 14.504, "grad_norm": 1.136520504951477, "learning_rate": 2e-05, "loss": 0.03111343, "step": 7252 }, { "epoch": 14.506, "grad_norm": 1.5107890367507935, "learning_rate": 2e-05, "loss": 0.03291022, "step": 7253 }, { "epoch": 14.508, "grad_norm": 1.3895598649978638, "learning_rate": 2e-05, "loss": 0.04216557, "step": 7254 }, { "epoch": 14.51, "grad_norm": 2.2805137634277344, "learning_rate": 2e-05, "loss": 0.04727894, "step": 7255 }, { "epoch": 14.512, "grad_norm": 1.5463753938674927, "learning_rate": 2e-05, "loss": 0.03055894, "step": 7256 }, { "epoch": 14.514, "grad_norm": 0.9866666793823242, "learning_rate": 2e-05, "loss": 0.02884018, "step": 7257 }, { "epoch": 14.516, "grad_norm": 1.2003675699234009, "learning_rate": 2e-05, "loss": 0.03621934, "step": 7258 }, { "epoch": 14.518, "grad_norm": 2.279313802719116, "learning_rate": 2e-05, "loss": 0.08503312, "step": 7259 }, { "epoch": 14.52, "grad_norm": 2.878683090209961, "learning_rate": 2e-05, "loss": 0.03383691, "step": 7260 }, { "epoch": 14.522, "grad_norm": 1.457414150238037, "learning_rate": 2e-05, "loss": 0.04102724, "step": 7261 }, { "epoch": 14.524000000000001, "grad_norm": 1.2874579429626465, "learning_rate": 2e-05, "loss": 0.02973592, "step": 7262 }, { "epoch": 14.526, "grad_norm": 1.9015241861343384, "learning_rate": 2e-05, "loss": 0.04436665, "step": 7263 }, { "epoch": 14.528, "grad_norm": 1.2383368015289307, "learning_rate": 2e-05, "loss": 0.03040004, "step": 7264 }, { "epoch": 14.53, "grad_norm": 1.5358835458755493, "learning_rate": 2e-05, "loss": 0.05725804, "step": 7265 }, { "epoch": 14.532, "grad_norm": 2.0177321434020996, "learning_rate": 2e-05, "loss": 0.04995024, "step": 7266 }, { "epoch": 14.534, "grad_norm": 1.6162405014038086, "learning_rate": 2e-05, "loss": 0.03423398, "step": 7267 }, { "epoch": 14.536, "grad_norm": 1.3776979446411133, "learning_rate": 2e-05, "loss": 0.03925561, "step": 7268 }, { "epoch": 14.538, "grad_norm": 1.2735576629638672, "learning_rate": 2e-05, "loss": 0.03249297, "step": 7269 }, { "epoch": 14.54, "grad_norm": 1.3519257307052612, "learning_rate": 2e-05, "loss": 0.03134355, "step": 7270 }, { "epoch": 14.542, "grad_norm": 1.2011692523956299, "learning_rate": 2e-05, "loss": 0.03818227, "step": 7271 }, { "epoch": 14.544, "grad_norm": 1.6174978017807007, "learning_rate": 2e-05, "loss": 0.04904899, "step": 7272 }, { "epoch": 14.546, "grad_norm": 1.3549933433532715, "learning_rate": 2e-05, "loss": 0.04195243, "step": 7273 }, { "epoch": 14.548, "grad_norm": 1.4086804389953613, "learning_rate": 2e-05, "loss": 0.03927261, "step": 7274 }, { "epoch": 14.55, "grad_norm": 1.495468258857727, "learning_rate": 2e-05, "loss": 0.03486994, "step": 7275 }, { "epoch": 14.552, "grad_norm": 1.4550012350082397, "learning_rate": 2e-05, "loss": 0.04446776, "step": 7276 }, { "epoch": 14.554, "grad_norm": 1.0507636070251465, "learning_rate": 2e-05, "loss": 0.02539073, "step": 7277 }, { "epoch": 14.556000000000001, "grad_norm": 6.91660213470459, "learning_rate": 2e-05, "loss": 0.03748736, "step": 7278 }, { "epoch": 14.558, "grad_norm": 2.021453857421875, "learning_rate": 2e-05, "loss": 0.05216369, "step": 7279 }, { "epoch": 14.56, "grad_norm": 1.6024717092514038, "learning_rate": 2e-05, "loss": 0.05282374, "step": 7280 }, { "epoch": 14.562, "grad_norm": 1.2320939302444458, "learning_rate": 2e-05, "loss": 0.03876211, "step": 7281 }, { "epoch": 14.564, "grad_norm": 1.198306679725647, "learning_rate": 2e-05, "loss": 0.03282074, "step": 7282 }, { "epoch": 14.566, "grad_norm": 1.5773409605026245, "learning_rate": 2e-05, "loss": 0.03945959, "step": 7283 }, { "epoch": 14.568, "grad_norm": 1.21599543094635, "learning_rate": 2e-05, "loss": 0.04125897, "step": 7284 }, { "epoch": 14.57, "grad_norm": 2.8407862186431885, "learning_rate": 2e-05, "loss": 0.04945897, "step": 7285 }, { "epoch": 14.572, "grad_norm": 1.447121500968933, "learning_rate": 2e-05, "loss": 0.03155572, "step": 7286 }, { "epoch": 14.574, "grad_norm": 1.1770319938659668, "learning_rate": 2e-05, "loss": 0.03424954, "step": 7287 }, { "epoch": 14.576, "grad_norm": 1.8243449926376343, "learning_rate": 2e-05, "loss": 0.04884764, "step": 7288 }, { "epoch": 14.578, "grad_norm": 0.871478259563446, "learning_rate": 2e-05, "loss": 0.02348526, "step": 7289 }, { "epoch": 14.58, "grad_norm": 1.1215943098068237, "learning_rate": 2e-05, "loss": 0.03072283, "step": 7290 }, { "epoch": 14.582, "grad_norm": 1.7615373134613037, "learning_rate": 2e-05, "loss": 0.05309131, "step": 7291 }, { "epoch": 14.584, "grad_norm": 2.3434910774230957, "learning_rate": 2e-05, "loss": 0.03328982, "step": 7292 }, { "epoch": 14.586, "grad_norm": 2.394543409347534, "learning_rate": 2e-05, "loss": 0.03665608, "step": 7293 }, { "epoch": 14.588, "grad_norm": 1.7310210466384888, "learning_rate": 2e-05, "loss": 0.04254977, "step": 7294 }, { "epoch": 14.59, "grad_norm": 1.5410484075546265, "learning_rate": 2e-05, "loss": 0.03172578, "step": 7295 }, { "epoch": 14.592, "grad_norm": 1.7711609601974487, "learning_rate": 2e-05, "loss": 0.04883514, "step": 7296 }, { "epoch": 14.594, "grad_norm": 1.606502652168274, "learning_rate": 2e-05, "loss": 0.03369958, "step": 7297 }, { "epoch": 14.596, "grad_norm": 1.9675401449203491, "learning_rate": 2e-05, "loss": 0.03461675, "step": 7298 }, { "epoch": 14.598, "grad_norm": 1.6831592321395874, "learning_rate": 2e-05, "loss": 0.04358295, "step": 7299 }, { "epoch": 14.6, "grad_norm": 1.846706748008728, "learning_rate": 2e-05, "loss": 0.04121499, "step": 7300 }, { "epoch": 14.602, "grad_norm": 1.6495356559753418, "learning_rate": 2e-05, "loss": 0.04087245, "step": 7301 }, { "epoch": 14.604, "grad_norm": 1.3375145196914673, "learning_rate": 2e-05, "loss": 0.02481322, "step": 7302 }, { "epoch": 14.606, "grad_norm": 1.3421443700790405, "learning_rate": 2e-05, "loss": 0.04807069, "step": 7303 }, { "epoch": 14.608, "grad_norm": 1.5804858207702637, "learning_rate": 2e-05, "loss": 0.03445378, "step": 7304 }, { "epoch": 14.61, "grad_norm": 3.078519344329834, "learning_rate": 2e-05, "loss": 0.05874301, "step": 7305 }, { "epoch": 14.612, "grad_norm": 1.269014596939087, "learning_rate": 2e-05, "loss": 0.03813095, "step": 7306 }, { "epoch": 14.614, "grad_norm": 1.667083978652954, "learning_rate": 2e-05, "loss": 0.05303952, "step": 7307 }, { "epoch": 14.616, "grad_norm": 1.014502763748169, "learning_rate": 2e-05, "loss": 0.0317478, "step": 7308 }, { "epoch": 14.618, "grad_norm": 1.066616177558899, "learning_rate": 2e-05, "loss": 0.02247791, "step": 7309 }, { "epoch": 14.62, "grad_norm": 1.208592176437378, "learning_rate": 2e-05, "loss": 0.02994377, "step": 7310 }, { "epoch": 14.622, "grad_norm": 0.8381512761116028, "learning_rate": 2e-05, "loss": 0.02272216, "step": 7311 }, { "epoch": 14.624, "grad_norm": 1.4301813840866089, "learning_rate": 2e-05, "loss": 0.04580442, "step": 7312 }, { "epoch": 14.626, "grad_norm": 1.006054401397705, "learning_rate": 2e-05, "loss": 0.02406761, "step": 7313 }, { "epoch": 14.628, "grad_norm": 2.2879605293273926, "learning_rate": 2e-05, "loss": 0.05421574, "step": 7314 }, { "epoch": 14.63, "grad_norm": 1.347216010093689, "learning_rate": 2e-05, "loss": 0.03223877, "step": 7315 }, { "epoch": 14.632, "grad_norm": 1.3925851583480835, "learning_rate": 2e-05, "loss": 0.04525027, "step": 7316 }, { "epoch": 14.634, "grad_norm": 1.5058016777038574, "learning_rate": 2e-05, "loss": 0.05671232, "step": 7317 }, { "epoch": 14.636, "grad_norm": 1.2585381269454956, "learning_rate": 2e-05, "loss": 0.04648329, "step": 7318 }, { "epoch": 14.638, "grad_norm": 2.345365047454834, "learning_rate": 2e-05, "loss": 0.02963935, "step": 7319 }, { "epoch": 14.64, "grad_norm": 1.1654967069625854, "learning_rate": 2e-05, "loss": 0.0446506, "step": 7320 }, { "epoch": 14.642, "grad_norm": 2.1558237075805664, "learning_rate": 2e-05, "loss": 0.04308884, "step": 7321 }, { "epoch": 14.644, "grad_norm": 1.271641492843628, "learning_rate": 2e-05, "loss": 0.0442902, "step": 7322 }, { "epoch": 14.646, "grad_norm": 1.7214982509613037, "learning_rate": 2e-05, "loss": 0.0474602, "step": 7323 }, { "epoch": 14.648, "grad_norm": 1.1854116916656494, "learning_rate": 2e-05, "loss": 0.04027828, "step": 7324 }, { "epoch": 14.65, "grad_norm": 1.9680629968643188, "learning_rate": 2e-05, "loss": 0.04050666, "step": 7325 }, { "epoch": 14.652, "grad_norm": 1.021872878074646, "learning_rate": 2e-05, "loss": 0.03418166, "step": 7326 }, { "epoch": 14.654, "grad_norm": 1.3861567974090576, "learning_rate": 2e-05, "loss": 0.04764885, "step": 7327 }, { "epoch": 14.656, "grad_norm": 0.8934645056724548, "learning_rate": 2e-05, "loss": 0.02765589, "step": 7328 }, { "epoch": 14.658, "grad_norm": 1.2780135869979858, "learning_rate": 2e-05, "loss": 0.03148212, "step": 7329 }, { "epoch": 14.66, "grad_norm": 1.3805195093154907, "learning_rate": 2e-05, "loss": 0.04608523, "step": 7330 }, { "epoch": 14.662, "grad_norm": 1.4747601747512817, "learning_rate": 2e-05, "loss": 0.0399397, "step": 7331 }, { "epoch": 14.664, "grad_norm": 2.1372389793395996, "learning_rate": 2e-05, "loss": 0.05007102, "step": 7332 }, { "epoch": 14.666, "grad_norm": 1.1365556716918945, "learning_rate": 2e-05, "loss": 0.0315646, "step": 7333 }, { "epoch": 14.668, "grad_norm": 1.1190814971923828, "learning_rate": 2e-05, "loss": 0.03757735, "step": 7334 }, { "epoch": 14.67, "grad_norm": 1.0525307655334473, "learning_rate": 2e-05, "loss": 0.02962991, "step": 7335 }, { "epoch": 14.672, "grad_norm": 1.0830663442611694, "learning_rate": 2e-05, "loss": 0.03256768, "step": 7336 }, { "epoch": 14.674, "grad_norm": 1.400888204574585, "learning_rate": 2e-05, "loss": 0.03843899, "step": 7337 }, { "epoch": 14.676, "grad_norm": 1.4905816316604614, "learning_rate": 2e-05, "loss": 0.0448906, "step": 7338 }, { "epoch": 14.678, "grad_norm": 1.2425509691238403, "learning_rate": 2e-05, "loss": 0.0399595, "step": 7339 }, { "epoch": 14.68, "grad_norm": 1.1389684677124023, "learning_rate": 2e-05, "loss": 0.03919307, "step": 7340 }, { "epoch": 14.682, "grad_norm": 1.3145670890808105, "learning_rate": 2e-05, "loss": 0.03573439, "step": 7341 }, { "epoch": 14.684, "grad_norm": 1.6601872444152832, "learning_rate": 2e-05, "loss": 0.04692688, "step": 7342 }, { "epoch": 14.686, "grad_norm": 1.6752896308898926, "learning_rate": 2e-05, "loss": 0.02998948, "step": 7343 }, { "epoch": 14.688, "grad_norm": 1.4124311208724976, "learning_rate": 2e-05, "loss": 0.0381167, "step": 7344 }, { "epoch": 14.69, "grad_norm": 1.7222316265106201, "learning_rate": 2e-05, "loss": 0.04596563, "step": 7345 }, { "epoch": 14.692, "grad_norm": 1.5030279159545898, "learning_rate": 2e-05, "loss": 0.03170253, "step": 7346 }, { "epoch": 14.693999999999999, "grad_norm": 1.9802442789077759, "learning_rate": 2e-05, "loss": 0.05240085, "step": 7347 }, { "epoch": 14.696, "grad_norm": 1.9767365455627441, "learning_rate": 2e-05, "loss": 0.0302583, "step": 7348 }, { "epoch": 14.698, "grad_norm": 1.5814634561538696, "learning_rate": 2e-05, "loss": 0.03863395, "step": 7349 }, { "epoch": 14.7, "grad_norm": 0.9202812910079956, "learning_rate": 2e-05, "loss": 0.02484436, "step": 7350 }, { "epoch": 14.702, "grad_norm": 1.5224653482437134, "learning_rate": 2e-05, "loss": 0.04415604, "step": 7351 }, { "epoch": 14.704, "grad_norm": 1.4382812976837158, "learning_rate": 2e-05, "loss": 0.04574153, "step": 7352 }, { "epoch": 14.706, "grad_norm": 1.1333105564117432, "learning_rate": 2e-05, "loss": 0.03003317, "step": 7353 }, { "epoch": 14.708, "grad_norm": 1.1706465482711792, "learning_rate": 2e-05, "loss": 0.03788555, "step": 7354 }, { "epoch": 14.71, "grad_norm": 2.07646107673645, "learning_rate": 2e-05, "loss": 0.04297365, "step": 7355 }, { "epoch": 14.712, "grad_norm": 1.1796342134475708, "learning_rate": 2e-05, "loss": 0.02585716, "step": 7356 }, { "epoch": 14.714, "grad_norm": 1.6022311449050903, "learning_rate": 2e-05, "loss": 0.03057653, "step": 7357 }, { "epoch": 14.716, "grad_norm": 1.4186314344406128, "learning_rate": 2e-05, "loss": 0.04783071, "step": 7358 }, { "epoch": 14.718, "grad_norm": 1.163080096244812, "learning_rate": 2e-05, "loss": 0.04012655, "step": 7359 }, { "epoch": 14.72, "grad_norm": 1.7531554698944092, "learning_rate": 2e-05, "loss": 0.06195234, "step": 7360 }, { "epoch": 14.722, "grad_norm": 0.8606005311012268, "learning_rate": 2e-05, "loss": 0.02377318, "step": 7361 }, { "epoch": 14.724, "grad_norm": 2.0389902591705322, "learning_rate": 2e-05, "loss": 0.03832042, "step": 7362 }, { "epoch": 14.725999999999999, "grad_norm": 2.013206720352173, "learning_rate": 2e-05, "loss": 0.03251911, "step": 7363 }, { "epoch": 14.728, "grad_norm": 1.606460690498352, "learning_rate": 2e-05, "loss": 0.03653719, "step": 7364 }, { "epoch": 14.73, "grad_norm": 0.8811898827552795, "learning_rate": 2e-05, "loss": 0.03039603, "step": 7365 }, { "epoch": 14.732, "grad_norm": 1.03880774974823, "learning_rate": 2e-05, "loss": 0.0278089, "step": 7366 }, { "epoch": 14.734, "grad_norm": 1.3265225887298584, "learning_rate": 2e-05, "loss": 0.03921036, "step": 7367 }, { "epoch": 14.736, "grad_norm": 1.2806612253189087, "learning_rate": 2e-05, "loss": 0.04968328, "step": 7368 }, { "epoch": 14.738, "grad_norm": 1.5343149900436401, "learning_rate": 2e-05, "loss": 0.04367102, "step": 7369 }, { "epoch": 14.74, "grad_norm": 1.47340726852417, "learning_rate": 2e-05, "loss": 0.03140695, "step": 7370 }, { "epoch": 14.742, "grad_norm": 1.4524576663970947, "learning_rate": 2e-05, "loss": 0.03347445, "step": 7371 }, { "epoch": 14.744, "grad_norm": 2.033576250076294, "learning_rate": 2e-05, "loss": 0.04897091, "step": 7372 }, { "epoch": 14.746, "grad_norm": 0.9620274305343628, "learning_rate": 2e-05, "loss": 0.02743071, "step": 7373 }, { "epoch": 14.748, "grad_norm": 1.0374791622161865, "learning_rate": 2e-05, "loss": 0.04549268, "step": 7374 }, { "epoch": 14.75, "grad_norm": 1.0183595418930054, "learning_rate": 2e-05, "loss": 0.02789947, "step": 7375 }, { "epoch": 14.752, "grad_norm": 0.9441385269165039, "learning_rate": 2e-05, "loss": 0.03046933, "step": 7376 }, { "epoch": 14.754, "grad_norm": 1.2050881385803223, "learning_rate": 2e-05, "loss": 0.04037102, "step": 7377 }, { "epoch": 14.756, "grad_norm": 1.4334502220153809, "learning_rate": 2e-05, "loss": 0.04295122, "step": 7378 }, { "epoch": 14.758, "grad_norm": 1.652840495109558, "learning_rate": 2e-05, "loss": 0.03737336, "step": 7379 }, { "epoch": 14.76, "grad_norm": 1.331262469291687, "learning_rate": 2e-05, "loss": 0.04912826, "step": 7380 }, { "epoch": 14.762, "grad_norm": 1.5204346179962158, "learning_rate": 2e-05, "loss": 0.04136185, "step": 7381 }, { "epoch": 14.764, "grad_norm": 1.3094161748886108, "learning_rate": 2e-05, "loss": 0.04010442, "step": 7382 }, { "epoch": 14.766, "grad_norm": 1.716664433479309, "learning_rate": 2e-05, "loss": 0.04056903, "step": 7383 }, { "epoch": 14.768, "grad_norm": 2.7664754390716553, "learning_rate": 2e-05, "loss": 0.04453249, "step": 7384 }, { "epoch": 14.77, "grad_norm": 1.1787066459655762, "learning_rate": 2e-05, "loss": 0.04342939, "step": 7385 }, { "epoch": 14.772, "grad_norm": 1.3815988302230835, "learning_rate": 2e-05, "loss": 0.04383612, "step": 7386 }, { "epoch": 14.774000000000001, "grad_norm": 1.0485109090805054, "learning_rate": 2e-05, "loss": 0.03212638, "step": 7387 }, { "epoch": 14.776, "grad_norm": 1.444206714630127, "learning_rate": 2e-05, "loss": 0.04447151, "step": 7388 }, { "epoch": 14.778, "grad_norm": 0.9816635847091675, "learning_rate": 2e-05, "loss": 0.03149556, "step": 7389 }, { "epoch": 14.78, "grad_norm": 1.0881997346878052, "learning_rate": 2e-05, "loss": 0.03420866, "step": 7390 }, { "epoch": 14.782, "grad_norm": 1.6856642961502075, "learning_rate": 2e-05, "loss": 0.04961979, "step": 7391 }, { "epoch": 14.784, "grad_norm": 4.939731597900391, "learning_rate": 2e-05, "loss": 0.05440149, "step": 7392 }, { "epoch": 14.786, "grad_norm": 1.1305186748504639, "learning_rate": 2e-05, "loss": 0.03616676, "step": 7393 }, { "epoch": 14.788, "grad_norm": 1.8306798934936523, "learning_rate": 2e-05, "loss": 0.04979054, "step": 7394 }, { "epoch": 14.79, "grad_norm": 1.601699948310852, "learning_rate": 2e-05, "loss": 0.0460626, "step": 7395 }, { "epoch": 14.792, "grad_norm": 1.8894065618515015, "learning_rate": 2e-05, "loss": 0.05282786, "step": 7396 }, { "epoch": 14.794, "grad_norm": 1.5035409927368164, "learning_rate": 2e-05, "loss": 0.03724657, "step": 7397 }, { "epoch": 14.796, "grad_norm": 2.4653127193450928, "learning_rate": 2e-05, "loss": 0.03604883, "step": 7398 }, { "epoch": 14.798, "grad_norm": 1.3967053890228271, "learning_rate": 2e-05, "loss": 0.04081305, "step": 7399 }, { "epoch": 14.8, "grad_norm": 1.280266523361206, "learning_rate": 2e-05, "loss": 0.03488631, "step": 7400 }, { "epoch": 14.802, "grad_norm": 1.1659793853759766, "learning_rate": 2e-05, "loss": 0.03118264, "step": 7401 }, { "epoch": 14.804, "grad_norm": 1.2721163034439087, "learning_rate": 2e-05, "loss": 0.0195914, "step": 7402 }, { "epoch": 14.806000000000001, "grad_norm": 0.9028842449188232, "learning_rate": 2e-05, "loss": 0.02684617, "step": 7403 }, { "epoch": 14.808, "grad_norm": 1.3119491338729858, "learning_rate": 2e-05, "loss": 0.0373154, "step": 7404 }, { "epoch": 14.81, "grad_norm": 2.559068202972412, "learning_rate": 2e-05, "loss": 0.03095316, "step": 7405 }, { "epoch": 14.812, "grad_norm": 1.241502046585083, "learning_rate": 2e-05, "loss": 0.02786666, "step": 7406 }, { "epoch": 14.814, "grad_norm": 1.2434546947479248, "learning_rate": 2e-05, "loss": 0.03359643, "step": 7407 }, { "epoch": 14.816, "grad_norm": 1.3161190748214722, "learning_rate": 2e-05, "loss": 0.03395464, "step": 7408 }, { "epoch": 14.818, "grad_norm": 1.1681431531906128, "learning_rate": 2e-05, "loss": 0.03358738, "step": 7409 }, { "epoch": 14.82, "grad_norm": 2.086354970932007, "learning_rate": 2e-05, "loss": 0.0546408, "step": 7410 }, { "epoch": 14.822, "grad_norm": 1.3456135988235474, "learning_rate": 2e-05, "loss": 0.04479208, "step": 7411 }, { "epoch": 14.824, "grad_norm": 1.053308367729187, "learning_rate": 2e-05, "loss": 0.02650757, "step": 7412 }, { "epoch": 14.826, "grad_norm": 1.4386178255081177, "learning_rate": 2e-05, "loss": 0.03358426, "step": 7413 }, { "epoch": 14.828, "grad_norm": 1.846922755241394, "learning_rate": 2e-05, "loss": 0.037901, "step": 7414 }, { "epoch": 14.83, "grad_norm": 1.393807053565979, "learning_rate": 2e-05, "loss": 0.05257457, "step": 7415 }, { "epoch": 14.832, "grad_norm": 1.5299506187438965, "learning_rate": 2e-05, "loss": 0.04238684, "step": 7416 }, { "epoch": 14.834, "grad_norm": 1.1853361129760742, "learning_rate": 2e-05, "loss": 0.04620481, "step": 7417 }, { "epoch": 14.836, "grad_norm": 1.9609429836273193, "learning_rate": 2e-05, "loss": 0.05831449, "step": 7418 }, { "epoch": 14.838, "grad_norm": 0.9670793414115906, "learning_rate": 2e-05, "loss": 0.0317945, "step": 7419 }, { "epoch": 14.84, "grad_norm": 2.3657424449920654, "learning_rate": 2e-05, "loss": 0.06117197, "step": 7420 }, { "epoch": 14.842, "grad_norm": 1.2410660982131958, "learning_rate": 2e-05, "loss": 0.03587608, "step": 7421 }, { "epoch": 14.844, "grad_norm": 1.5402605533599854, "learning_rate": 2e-05, "loss": 0.03572353, "step": 7422 }, { "epoch": 14.846, "grad_norm": 1.4933197498321533, "learning_rate": 2e-05, "loss": 0.03981678, "step": 7423 }, { "epoch": 14.848, "grad_norm": 1.4539066553115845, "learning_rate": 2e-05, "loss": 0.05267172, "step": 7424 }, { "epoch": 14.85, "grad_norm": 1.239725947380066, "learning_rate": 2e-05, "loss": 0.04814597, "step": 7425 }, { "epoch": 14.852, "grad_norm": 1.062360405921936, "learning_rate": 2e-05, "loss": 0.03394141, "step": 7426 }, { "epoch": 14.854, "grad_norm": 1.3617041110992432, "learning_rate": 2e-05, "loss": 0.04762523, "step": 7427 }, { "epoch": 14.856, "grad_norm": 1.218767762184143, "learning_rate": 2e-05, "loss": 0.0463365, "step": 7428 }, { "epoch": 14.858, "grad_norm": 1.1264668703079224, "learning_rate": 2e-05, "loss": 0.02673528, "step": 7429 }, { "epoch": 14.86, "grad_norm": 1.9460324048995972, "learning_rate": 2e-05, "loss": 0.03690345, "step": 7430 }, { "epoch": 14.862, "grad_norm": 1.3760199546813965, "learning_rate": 2e-05, "loss": 0.04101796, "step": 7431 }, { "epoch": 14.864, "grad_norm": 1.8361059427261353, "learning_rate": 2e-05, "loss": 0.05019148, "step": 7432 }, { "epoch": 14.866, "grad_norm": 2.861609697341919, "learning_rate": 2e-05, "loss": 0.04384901, "step": 7433 }, { "epoch": 14.868, "grad_norm": 2.696000814437866, "learning_rate": 2e-05, "loss": 0.04344347, "step": 7434 }, { "epoch": 14.87, "grad_norm": 1.0894790887832642, "learning_rate": 2e-05, "loss": 0.03905128, "step": 7435 }, { "epoch": 14.872, "grad_norm": 1.1012567281723022, "learning_rate": 2e-05, "loss": 0.0368652, "step": 7436 }, { "epoch": 14.874, "grad_norm": 1.441910982131958, "learning_rate": 2e-05, "loss": 0.04437996, "step": 7437 }, { "epoch": 14.876, "grad_norm": 1.305418610572815, "learning_rate": 2e-05, "loss": 0.03403091, "step": 7438 }, { "epoch": 14.878, "grad_norm": 0.936972975730896, "learning_rate": 2e-05, "loss": 0.03193845, "step": 7439 }, { "epoch": 14.88, "grad_norm": 1.2693448066711426, "learning_rate": 2e-05, "loss": 0.03593332, "step": 7440 }, { "epoch": 14.882, "grad_norm": 0.9230713844299316, "learning_rate": 2e-05, "loss": 0.02694279, "step": 7441 }, { "epoch": 14.884, "grad_norm": 1.4311070442199707, "learning_rate": 2e-05, "loss": 0.03495328, "step": 7442 }, { "epoch": 14.886, "grad_norm": 2.3351352214813232, "learning_rate": 2e-05, "loss": 0.04533555, "step": 7443 }, { "epoch": 14.888, "grad_norm": 1.6613550186157227, "learning_rate": 2e-05, "loss": 0.0545169, "step": 7444 }, { "epoch": 14.89, "grad_norm": 3.134610414505005, "learning_rate": 2e-05, "loss": 0.06269054, "step": 7445 }, { "epoch": 14.892, "grad_norm": 1.2900604009628296, "learning_rate": 2e-05, "loss": 0.0403505, "step": 7446 }, { "epoch": 14.894, "grad_norm": 1.0350453853607178, "learning_rate": 2e-05, "loss": 0.02884517, "step": 7447 }, { "epoch": 14.896, "grad_norm": 2.1216492652893066, "learning_rate": 2e-05, "loss": 0.05400493, "step": 7448 }, { "epoch": 14.898, "grad_norm": 1.5325112342834473, "learning_rate": 2e-05, "loss": 0.03483828, "step": 7449 }, { "epoch": 14.9, "grad_norm": 1.1589800119400024, "learning_rate": 2e-05, "loss": 0.03094343, "step": 7450 }, { "epoch": 14.902, "grad_norm": 1.6227322816848755, "learning_rate": 2e-05, "loss": 0.0469828, "step": 7451 }, { "epoch": 14.904, "grad_norm": 1.6878936290740967, "learning_rate": 2e-05, "loss": 0.04225905, "step": 7452 }, { "epoch": 14.906, "grad_norm": 1.1886628866195679, "learning_rate": 2e-05, "loss": 0.03824829, "step": 7453 }, { "epoch": 14.908, "grad_norm": 1.4336858987808228, "learning_rate": 2e-05, "loss": 0.04599863, "step": 7454 }, { "epoch": 14.91, "grad_norm": 1.2423670291900635, "learning_rate": 2e-05, "loss": 0.05265063, "step": 7455 }, { "epoch": 14.912, "grad_norm": 1.2401492595672607, "learning_rate": 2e-05, "loss": 0.03636886, "step": 7456 }, { "epoch": 14.914, "grad_norm": 0.7528156638145447, "learning_rate": 2e-05, "loss": 0.02554782, "step": 7457 }, { "epoch": 14.916, "grad_norm": 1.5916630029678345, "learning_rate": 2e-05, "loss": 0.03105753, "step": 7458 }, { "epoch": 14.918, "grad_norm": 1.8795676231384277, "learning_rate": 2e-05, "loss": 0.03935018, "step": 7459 }, { "epoch": 14.92, "grad_norm": 1.8167335987091064, "learning_rate": 2e-05, "loss": 0.04380289, "step": 7460 }, { "epoch": 14.922, "grad_norm": 1.1691914796829224, "learning_rate": 2e-05, "loss": 0.03274843, "step": 7461 }, { "epoch": 14.924, "grad_norm": 1.3650342226028442, "learning_rate": 2e-05, "loss": 0.04208003, "step": 7462 }, { "epoch": 14.926, "grad_norm": 1.24591863155365, "learning_rate": 2e-05, "loss": 0.04226547, "step": 7463 }, { "epoch": 14.928, "grad_norm": 1.3983882665634155, "learning_rate": 2e-05, "loss": 0.05762651, "step": 7464 }, { "epoch": 14.93, "grad_norm": 1.1452008485794067, "learning_rate": 2e-05, "loss": 0.02998473, "step": 7465 }, { "epoch": 14.932, "grad_norm": 0.9939238429069519, "learning_rate": 2e-05, "loss": 0.02803001, "step": 7466 }, { "epoch": 14.934, "grad_norm": 1.5362166166305542, "learning_rate": 2e-05, "loss": 0.05231043, "step": 7467 }, { "epoch": 14.936, "grad_norm": 1.7269541025161743, "learning_rate": 2e-05, "loss": 0.04780792, "step": 7468 }, { "epoch": 14.938, "grad_norm": 1.1512809991836548, "learning_rate": 2e-05, "loss": 0.03671274, "step": 7469 }, { "epoch": 14.94, "grad_norm": 1.2893776893615723, "learning_rate": 2e-05, "loss": 0.03543502, "step": 7470 }, { "epoch": 14.942, "grad_norm": 2.2255067825317383, "learning_rate": 2e-05, "loss": 0.05238653, "step": 7471 }, { "epoch": 14.943999999999999, "grad_norm": 0.9970213770866394, "learning_rate": 2e-05, "loss": 0.02268671, "step": 7472 }, { "epoch": 14.946, "grad_norm": 1.0443769693374634, "learning_rate": 2e-05, "loss": 0.03668858, "step": 7473 }, { "epoch": 14.948, "grad_norm": 0.9335085153579712, "learning_rate": 2e-05, "loss": 0.03486544, "step": 7474 }, { "epoch": 14.95, "grad_norm": 2.1096580028533936, "learning_rate": 2e-05, "loss": 0.06607223, "step": 7475 }, { "epoch": 14.952, "grad_norm": 2.1149537563323975, "learning_rate": 2e-05, "loss": 0.03609411, "step": 7476 }, { "epoch": 14.954, "grad_norm": 1.5304067134857178, "learning_rate": 2e-05, "loss": 0.03244342, "step": 7477 }, { "epoch": 14.956, "grad_norm": 1.4157954454421997, "learning_rate": 2e-05, "loss": 0.04437434, "step": 7478 }, { "epoch": 14.958, "grad_norm": 1.3668354749679565, "learning_rate": 2e-05, "loss": 0.03368328, "step": 7479 }, { "epoch": 14.96, "grad_norm": 1.1357492208480835, "learning_rate": 2e-05, "loss": 0.03183534, "step": 7480 }, { "epoch": 14.962, "grad_norm": 1.2593474388122559, "learning_rate": 2e-05, "loss": 0.03054814, "step": 7481 }, { "epoch": 14.964, "grad_norm": 1.0651172399520874, "learning_rate": 2e-05, "loss": 0.03272149, "step": 7482 }, { "epoch": 14.966, "grad_norm": 1.3296829462051392, "learning_rate": 2e-05, "loss": 0.02990423, "step": 7483 }, { "epoch": 14.968, "grad_norm": 1.153295874595642, "learning_rate": 2e-05, "loss": 0.04132929, "step": 7484 }, { "epoch": 14.97, "grad_norm": 1.8791780471801758, "learning_rate": 2e-05, "loss": 0.0307649, "step": 7485 }, { "epoch": 14.972, "grad_norm": 1.850902795791626, "learning_rate": 2e-05, "loss": 0.05484937, "step": 7486 }, { "epoch": 14.974, "grad_norm": 1.170993685722351, "learning_rate": 2e-05, "loss": 0.03880947, "step": 7487 }, { "epoch": 14.975999999999999, "grad_norm": 1.0112059116363525, "learning_rate": 2e-05, "loss": 0.05152441, "step": 7488 }, { "epoch": 14.978, "grad_norm": 0.9202816486358643, "learning_rate": 2e-05, "loss": 0.03082973, "step": 7489 }, { "epoch": 14.98, "grad_norm": 1.5370155572891235, "learning_rate": 2e-05, "loss": 0.03976063, "step": 7490 }, { "epoch": 14.982, "grad_norm": 1.6214145421981812, "learning_rate": 2e-05, "loss": 0.03335922, "step": 7491 }, { "epoch": 14.984, "grad_norm": 1.5763835906982422, "learning_rate": 2e-05, "loss": 0.05680116, "step": 7492 }, { "epoch": 14.986, "grad_norm": 1.2244529724121094, "learning_rate": 2e-05, "loss": 0.03010012, "step": 7493 }, { "epoch": 14.988, "grad_norm": 1.0402621030807495, "learning_rate": 2e-05, "loss": 0.03202302, "step": 7494 }, { "epoch": 14.99, "grad_norm": 1.1672792434692383, "learning_rate": 2e-05, "loss": 0.0357578, "step": 7495 }, { "epoch": 14.992, "grad_norm": 1.5200659036636353, "learning_rate": 2e-05, "loss": 0.04126337, "step": 7496 }, { "epoch": 14.994, "grad_norm": 3.4498698711395264, "learning_rate": 2e-05, "loss": 0.06399542, "step": 7497 }, { "epoch": 14.996, "grad_norm": 2.6905369758605957, "learning_rate": 2e-05, "loss": 0.05302805, "step": 7498 }, { "epoch": 14.998, "grad_norm": 1.3441663980484009, "learning_rate": 2e-05, "loss": 0.03965209, "step": 7499 }, { "epoch": 15.0, "grad_norm": 1.0610202550888062, "learning_rate": 2e-05, "loss": 0.03335898, "step": 7500 }, { "epoch": 15.0, "eval_performance": { "AngleClassification_1": 0.982, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9600798403193613, "Equal_1": 0.994, "Equal_2": 0.9640718562874252, "Equal_3": 0.8982035928143712, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.99, "Perpendicular_1": 0.992, "Perpendicular_2": 0.972, "Perpendicular_3": 0.7114228456913828, "PointLiesOnCircle_1": 0.9939879759519038, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 320.0741, "eval_samples_per_second": 32.805, "eval_steps_per_second": 0.656, "step": 7500 }, { "epoch": 15.002, "grad_norm": 1.5323970317840576, "learning_rate": 2e-05, "loss": 0.03357163, "step": 7501 }, { "epoch": 15.004, "grad_norm": 1.106204628944397, "learning_rate": 2e-05, "loss": 0.02693918, "step": 7502 }, { "epoch": 15.006, "grad_norm": 2.7281370162963867, "learning_rate": 2e-05, "loss": 0.07801263, "step": 7503 }, { "epoch": 15.008, "grad_norm": 0.9546897411346436, "learning_rate": 2e-05, "loss": 0.0307308, "step": 7504 }, { "epoch": 15.01, "grad_norm": 1.819274663925171, "learning_rate": 2e-05, "loss": 0.02562927, "step": 7505 }, { "epoch": 15.012, "grad_norm": 2.0995659828186035, "learning_rate": 2e-05, "loss": 0.05087785, "step": 7506 }, { "epoch": 15.014, "grad_norm": 1.5401643514633179, "learning_rate": 2e-05, "loss": 0.05163966, "step": 7507 }, { "epoch": 15.016, "grad_norm": 3.4083080291748047, "learning_rate": 2e-05, "loss": 0.04428158, "step": 7508 }, { "epoch": 15.018, "grad_norm": 3.521087646484375, "learning_rate": 2e-05, "loss": 0.04372428, "step": 7509 }, { "epoch": 15.02, "grad_norm": 1.4367711544036865, "learning_rate": 2e-05, "loss": 0.0497531, "step": 7510 }, { "epoch": 15.022, "grad_norm": 1.270221471786499, "learning_rate": 2e-05, "loss": 0.03810829, "step": 7511 }, { "epoch": 15.024, "grad_norm": 1.317637324333191, "learning_rate": 2e-05, "loss": 0.0325052, "step": 7512 }, { "epoch": 15.026, "grad_norm": 1.2471699714660645, "learning_rate": 2e-05, "loss": 0.02949313, "step": 7513 }, { "epoch": 15.028, "grad_norm": 1.197861671447754, "learning_rate": 2e-05, "loss": 0.03684509, "step": 7514 }, { "epoch": 15.03, "grad_norm": 1.8183475732803345, "learning_rate": 2e-05, "loss": 0.06737492, "step": 7515 }, { "epoch": 15.032, "grad_norm": 1.03377366065979, "learning_rate": 2e-05, "loss": 0.03917066, "step": 7516 }, { "epoch": 15.034, "grad_norm": 2.038936138153076, "learning_rate": 2e-05, "loss": 0.03911228, "step": 7517 }, { "epoch": 15.036, "grad_norm": 0.945249617099762, "learning_rate": 2e-05, "loss": 0.02501803, "step": 7518 }, { "epoch": 15.038, "grad_norm": 3.087181568145752, "learning_rate": 2e-05, "loss": 0.0438556, "step": 7519 }, { "epoch": 15.04, "grad_norm": 1.3081676959991455, "learning_rate": 2e-05, "loss": 0.04287355, "step": 7520 }, { "epoch": 15.042, "grad_norm": 1.7691004276275635, "learning_rate": 2e-05, "loss": 0.03438368, "step": 7521 }, { "epoch": 15.044, "grad_norm": 1.5667086839675903, "learning_rate": 2e-05, "loss": 0.02922621, "step": 7522 }, { "epoch": 15.046, "grad_norm": 2.0105979442596436, "learning_rate": 2e-05, "loss": 0.04000949, "step": 7523 }, { "epoch": 15.048, "grad_norm": 1.2400439977645874, "learning_rate": 2e-05, "loss": 0.0464882, "step": 7524 }, { "epoch": 15.05, "grad_norm": 1.4981132745742798, "learning_rate": 2e-05, "loss": 0.0353303, "step": 7525 }, { "epoch": 15.052, "grad_norm": 1.7976100444793701, "learning_rate": 2e-05, "loss": 0.04217862, "step": 7526 }, { "epoch": 15.054, "grad_norm": 1.2558151483535767, "learning_rate": 2e-05, "loss": 0.03909191, "step": 7527 }, { "epoch": 15.056, "grad_norm": 0.9325153231620789, "learning_rate": 2e-05, "loss": 0.03274619, "step": 7528 }, { "epoch": 15.058, "grad_norm": 1.0450353622436523, "learning_rate": 2e-05, "loss": 0.03170678, "step": 7529 }, { "epoch": 15.06, "grad_norm": 2.4308180809020996, "learning_rate": 2e-05, "loss": 0.04601308, "step": 7530 }, { "epoch": 15.062, "grad_norm": 1.7169846296310425, "learning_rate": 2e-05, "loss": 0.03847077, "step": 7531 }, { "epoch": 15.064, "grad_norm": 1.9664361476898193, "learning_rate": 2e-05, "loss": 0.06695174, "step": 7532 }, { "epoch": 15.066, "grad_norm": 1.3882360458374023, "learning_rate": 2e-05, "loss": 0.03631517, "step": 7533 }, { "epoch": 15.068, "grad_norm": 1.6631497144699097, "learning_rate": 2e-05, "loss": 0.03166562, "step": 7534 }, { "epoch": 15.07, "grad_norm": 1.215599536895752, "learning_rate": 2e-05, "loss": 0.04256611, "step": 7535 }, { "epoch": 15.072, "grad_norm": 1.148917555809021, "learning_rate": 2e-05, "loss": 0.04094999, "step": 7536 }, { "epoch": 15.074, "grad_norm": 1.262528419494629, "learning_rate": 2e-05, "loss": 0.02669748, "step": 7537 }, { "epoch": 15.076, "grad_norm": 1.4280608892440796, "learning_rate": 2e-05, "loss": 0.03824629, "step": 7538 }, { "epoch": 15.078, "grad_norm": 1.1914440393447876, "learning_rate": 2e-05, "loss": 0.03420604, "step": 7539 }, { "epoch": 15.08, "grad_norm": 1.5418035984039307, "learning_rate": 2e-05, "loss": 0.04447289, "step": 7540 }, { "epoch": 15.082, "grad_norm": 1.7850476503372192, "learning_rate": 2e-05, "loss": 0.05204378, "step": 7541 }, { "epoch": 15.084, "grad_norm": 1.7155487537384033, "learning_rate": 2e-05, "loss": 0.04036555, "step": 7542 }, { "epoch": 15.086, "grad_norm": 1.1043587923049927, "learning_rate": 2e-05, "loss": 0.03954956, "step": 7543 }, { "epoch": 15.088, "grad_norm": 1.4283198118209839, "learning_rate": 2e-05, "loss": 0.04688943, "step": 7544 }, { "epoch": 15.09, "grad_norm": 1.2153453826904297, "learning_rate": 2e-05, "loss": 0.03587115, "step": 7545 }, { "epoch": 15.092, "grad_norm": 1.3699722290039062, "learning_rate": 2e-05, "loss": 0.04313603, "step": 7546 }, { "epoch": 15.094, "grad_norm": 1.0918267965316772, "learning_rate": 2e-05, "loss": 0.03959726, "step": 7547 }, { "epoch": 15.096, "grad_norm": 1.2050516605377197, "learning_rate": 2e-05, "loss": 0.03665368, "step": 7548 }, { "epoch": 15.098, "grad_norm": 1.611580729484558, "learning_rate": 2e-05, "loss": 0.04823206, "step": 7549 }, { "epoch": 15.1, "grad_norm": 1.3868486881256104, "learning_rate": 2e-05, "loss": 0.04774088, "step": 7550 }, { "epoch": 15.102, "grad_norm": 2.077451229095459, "learning_rate": 2e-05, "loss": 0.03221234, "step": 7551 }, { "epoch": 15.104, "grad_norm": 1.458793044090271, "learning_rate": 2e-05, "loss": 0.02752698, "step": 7552 }, { "epoch": 15.106, "grad_norm": 1.0230228900909424, "learning_rate": 2e-05, "loss": 0.03414164, "step": 7553 }, { "epoch": 15.108, "grad_norm": 1.8648970127105713, "learning_rate": 2e-05, "loss": 0.04431803, "step": 7554 }, { "epoch": 15.11, "grad_norm": 1.213136076927185, "learning_rate": 2e-05, "loss": 0.03528218, "step": 7555 }, { "epoch": 15.112, "grad_norm": 1.1067938804626465, "learning_rate": 2e-05, "loss": 0.03371948, "step": 7556 }, { "epoch": 15.114, "grad_norm": 1.099187970161438, "learning_rate": 2e-05, "loss": 0.04380881, "step": 7557 }, { "epoch": 15.116, "grad_norm": 1.4904146194458008, "learning_rate": 2e-05, "loss": 0.04857348, "step": 7558 }, { "epoch": 15.118, "grad_norm": 1.9208067655563354, "learning_rate": 2e-05, "loss": 0.02925631, "step": 7559 }, { "epoch": 15.12, "grad_norm": 1.3808106184005737, "learning_rate": 2e-05, "loss": 0.05164766, "step": 7560 }, { "epoch": 15.122, "grad_norm": 1.7450789213180542, "learning_rate": 2e-05, "loss": 0.05939767, "step": 7561 }, { "epoch": 15.124, "grad_norm": 1.3187388181686401, "learning_rate": 2e-05, "loss": 0.04064866, "step": 7562 }, { "epoch": 15.126, "grad_norm": 2.158761501312256, "learning_rate": 2e-05, "loss": 0.03640309, "step": 7563 }, { "epoch": 15.128, "grad_norm": 1.668043613433838, "learning_rate": 2e-05, "loss": 0.03924328, "step": 7564 }, { "epoch": 15.13, "grad_norm": 1.2158105373382568, "learning_rate": 2e-05, "loss": 0.03478661, "step": 7565 }, { "epoch": 15.132, "grad_norm": 1.681146264076233, "learning_rate": 2e-05, "loss": 0.05600887, "step": 7566 }, { "epoch": 15.134, "grad_norm": 0.9708520770072937, "learning_rate": 2e-05, "loss": 0.01993345, "step": 7567 }, { "epoch": 15.136, "grad_norm": 0.926152765750885, "learning_rate": 2e-05, "loss": 0.03074206, "step": 7568 }, { "epoch": 15.138, "grad_norm": 1.0853475332260132, "learning_rate": 2e-05, "loss": 0.03965416, "step": 7569 }, { "epoch": 15.14, "grad_norm": 1.4762166738510132, "learning_rate": 2e-05, "loss": 0.0375259, "step": 7570 }, { "epoch": 15.142, "grad_norm": 1.307599425315857, "learning_rate": 2e-05, "loss": 0.04117063, "step": 7571 }, { "epoch": 15.144, "grad_norm": 1.364780068397522, "learning_rate": 2e-05, "loss": 0.03955507, "step": 7572 }, { "epoch": 15.146, "grad_norm": 1.8140677213668823, "learning_rate": 2e-05, "loss": 0.04545042, "step": 7573 }, { "epoch": 15.148, "grad_norm": 1.234650731086731, "learning_rate": 2e-05, "loss": 0.03562243, "step": 7574 }, { "epoch": 15.15, "grad_norm": 1.4778765439987183, "learning_rate": 2e-05, "loss": 0.05000856, "step": 7575 }, { "epoch": 15.152, "grad_norm": 1.323702096939087, "learning_rate": 2e-05, "loss": 0.02919011, "step": 7576 }, { "epoch": 15.154, "grad_norm": 0.9931415319442749, "learning_rate": 2e-05, "loss": 0.03235832, "step": 7577 }, { "epoch": 15.156, "grad_norm": 1.086990237236023, "learning_rate": 2e-05, "loss": 0.02611094, "step": 7578 }, { "epoch": 15.158, "grad_norm": 1.284932255744934, "learning_rate": 2e-05, "loss": 0.04641385, "step": 7579 }, { "epoch": 15.16, "grad_norm": 0.9622644782066345, "learning_rate": 2e-05, "loss": 0.02520053, "step": 7580 }, { "epoch": 15.162, "grad_norm": 1.0503311157226562, "learning_rate": 2e-05, "loss": 0.03852223, "step": 7581 }, { "epoch": 15.164, "grad_norm": 1.5426743030548096, "learning_rate": 2e-05, "loss": 0.04453558, "step": 7582 }, { "epoch": 15.166, "grad_norm": 1.0924078226089478, "learning_rate": 2e-05, "loss": 0.03900549, "step": 7583 }, { "epoch": 15.168, "grad_norm": 1.5701236724853516, "learning_rate": 2e-05, "loss": 0.03460903, "step": 7584 }, { "epoch": 15.17, "grad_norm": 1.7909417152404785, "learning_rate": 2e-05, "loss": 0.02750588, "step": 7585 }, { "epoch": 15.172, "grad_norm": 3.5445761680603027, "learning_rate": 2e-05, "loss": 0.05145767, "step": 7586 }, { "epoch": 15.174, "grad_norm": 1.7344753742218018, "learning_rate": 2e-05, "loss": 0.04307804, "step": 7587 }, { "epoch": 15.176, "grad_norm": 1.1254594326019287, "learning_rate": 2e-05, "loss": 0.03898653, "step": 7588 }, { "epoch": 15.178, "grad_norm": 1.2491363286972046, "learning_rate": 2e-05, "loss": 0.0345161, "step": 7589 }, { "epoch": 15.18, "grad_norm": 1.5165410041809082, "learning_rate": 2e-05, "loss": 0.04044965, "step": 7590 }, { "epoch": 15.182, "grad_norm": 1.306868553161621, "learning_rate": 2e-05, "loss": 0.04725231, "step": 7591 }, { "epoch": 15.184, "grad_norm": 1.8153162002563477, "learning_rate": 2e-05, "loss": 0.0434998, "step": 7592 }, { "epoch": 15.186, "grad_norm": 2.0734148025512695, "learning_rate": 2e-05, "loss": 0.04822708, "step": 7593 }, { "epoch": 15.188, "grad_norm": 1.193581223487854, "learning_rate": 2e-05, "loss": 0.04005368, "step": 7594 }, { "epoch": 15.19, "grad_norm": 1.9884793758392334, "learning_rate": 2e-05, "loss": 0.03887493, "step": 7595 }, { "epoch": 15.192, "grad_norm": 2.539429187774658, "learning_rate": 2e-05, "loss": 0.03816453, "step": 7596 }, { "epoch": 15.194, "grad_norm": 1.7256981134414673, "learning_rate": 2e-05, "loss": 0.02610845, "step": 7597 }, { "epoch": 15.196, "grad_norm": 0.9873558282852173, "learning_rate": 2e-05, "loss": 0.02836427, "step": 7598 }, { "epoch": 15.198, "grad_norm": 3.33852481842041, "learning_rate": 2e-05, "loss": 0.06002682, "step": 7599 }, { "epoch": 15.2, "grad_norm": 2.537189483642578, "learning_rate": 2e-05, "loss": 0.07417429, "step": 7600 }, { "epoch": 15.202, "grad_norm": 0.9124858379364014, "learning_rate": 2e-05, "loss": 0.03152982, "step": 7601 }, { "epoch": 15.204, "grad_norm": 1.0165115594863892, "learning_rate": 2e-05, "loss": 0.03425337, "step": 7602 }, { "epoch": 15.206, "grad_norm": 1.362256407737732, "learning_rate": 2e-05, "loss": 0.04571758, "step": 7603 }, { "epoch": 15.208, "grad_norm": 1.2816169261932373, "learning_rate": 2e-05, "loss": 0.04699683, "step": 7604 }, { "epoch": 15.21, "grad_norm": 1.6668884754180908, "learning_rate": 2e-05, "loss": 0.03962279, "step": 7605 }, { "epoch": 15.212, "grad_norm": 1.2966806888580322, "learning_rate": 2e-05, "loss": 0.04509297, "step": 7606 }, { "epoch": 15.214, "grad_norm": 1.191419243812561, "learning_rate": 2e-05, "loss": 0.04549539, "step": 7607 }, { "epoch": 15.216, "grad_norm": 0.941321611404419, "learning_rate": 2e-05, "loss": 0.02918254, "step": 7608 }, { "epoch": 15.218, "grad_norm": 1.7886515855789185, "learning_rate": 2e-05, "loss": 0.04620847, "step": 7609 }, { "epoch": 15.22, "grad_norm": 1.8236616849899292, "learning_rate": 2e-05, "loss": 0.04852483, "step": 7610 }, { "epoch": 15.222, "grad_norm": 1.224780797958374, "learning_rate": 2e-05, "loss": 0.0375507, "step": 7611 }, { "epoch": 15.224, "grad_norm": 1.335411548614502, "learning_rate": 2e-05, "loss": 0.04444863, "step": 7612 }, { "epoch": 15.226, "grad_norm": 1.0695840120315552, "learning_rate": 2e-05, "loss": 0.03443304, "step": 7613 }, { "epoch": 15.228, "grad_norm": 1.1740689277648926, "learning_rate": 2e-05, "loss": 0.03335565, "step": 7614 }, { "epoch": 15.23, "grad_norm": 2.6707682609558105, "learning_rate": 2e-05, "loss": 0.05265085, "step": 7615 }, { "epoch": 15.232, "grad_norm": 4.135421276092529, "learning_rate": 2e-05, "loss": 0.0563576, "step": 7616 }, { "epoch": 15.234, "grad_norm": 1.716046690940857, "learning_rate": 2e-05, "loss": 0.04460949, "step": 7617 }, { "epoch": 15.236, "grad_norm": 1.721646785736084, "learning_rate": 2e-05, "loss": 0.04767875, "step": 7618 }, { "epoch": 15.238, "grad_norm": 1.284906029701233, "learning_rate": 2e-05, "loss": 0.01877031, "step": 7619 }, { "epoch": 15.24, "grad_norm": 1.517042636871338, "learning_rate": 2e-05, "loss": 0.04870673, "step": 7620 }, { "epoch": 15.242, "grad_norm": 0.9522220492362976, "learning_rate": 2e-05, "loss": 0.02781921, "step": 7621 }, { "epoch": 15.244, "grad_norm": 1.4575544595718384, "learning_rate": 2e-05, "loss": 0.04900608, "step": 7622 }, { "epoch": 15.246, "grad_norm": 2.664926052093506, "learning_rate": 2e-05, "loss": 0.05986939, "step": 7623 }, { "epoch": 15.248, "grad_norm": 1.0192816257476807, "learning_rate": 2e-05, "loss": 0.03001048, "step": 7624 }, { "epoch": 15.25, "grad_norm": 2.431614637374878, "learning_rate": 2e-05, "loss": 0.05669653, "step": 7625 }, { "epoch": 15.252, "grad_norm": 1.4480475187301636, "learning_rate": 2e-05, "loss": 0.0410952, "step": 7626 }, { "epoch": 15.254, "grad_norm": 1.0749616622924805, "learning_rate": 2e-05, "loss": 0.03243171, "step": 7627 }, { "epoch": 15.256, "grad_norm": 1.4723212718963623, "learning_rate": 2e-05, "loss": 0.04396664, "step": 7628 }, { "epoch": 15.258, "grad_norm": 1.4612725973129272, "learning_rate": 2e-05, "loss": 0.05846308, "step": 7629 }, { "epoch": 15.26, "grad_norm": 1.0961060523986816, "learning_rate": 2e-05, "loss": 0.03180789, "step": 7630 }, { "epoch": 15.262, "grad_norm": 1.0098501443862915, "learning_rate": 2e-05, "loss": 0.02932501, "step": 7631 }, { "epoch": 15.264, "grad_norm": 1.500989317893982, "learning_rate": 2e-05, "loss": 0.05016597, "step": 7632 }, { "epoch": 15.266, "grad_norm": 1.462710976600647, "learning_rate": 2e-05, "loss": 0.04754984, "step": 7633 }, { "epoch": 15.268, "grad_norm": 1.3882156610488892, "learning_rate": 2e-05, "loss": 0.04774838, "step": 7634 }, { "epoch": 15.27, "grad_norm": 1.2340351343154907, "learning_rate": 2e-05, "loss": 0.05167802, "step": 7635 }, { "epoch": 15.272, "grad_norm": 1.213295817375183, "learning_rate": 2e-05, "loss": 0.04757159, "step": 7636 }, { "epoch": 15.274000000000001, "grad_norm": 1.2619282007217407, "learning_rate": 2e-05, "loss": 0.05256531, "step": 7637 }, { "epoch": 15.276, "grad_norm": 1.552124261856079, "learning_rate": 2e-05, "loss": 0.04303239, "step": 7638 }, { "epoch": 15.278, "grad_norm": 1.615976333618164, "learning_rate": 2e-05, "loss": 0.05144659, "step": 7639 }, { "epoch": 15.28, "grad_norm": 1.2104065418243408, "learning_rate": 2e-05, "loss": 0.02734979, "step": 7640 }, { "epoch": 15.282, "grad_norm": 2.24019455909729, "learning_rate": 2e-05, "loss": 0.04673993, "step": 7641 }, { "epoch": 15.284, "grad_norm": 1.7466453313827515, "learning_rate": 2e-05, "loss": 0.03623344, "step": 7642 }, { "epoch": 15.286, "grad_norm": 1.443344235420227, "learning_rate": 2e-05, "loss": 0.06160107, "step": 7643 }, { "epoch": 15.288, "grad_norm": 0.9850043654441833, "learning_rate": 2e-05, "loss": 0.03402284, "step": 7644 }, { "epoch": 15.29, "grad_norm": 1.4699482917785645, "learning_rate": 2e-05, "loss": 0.04960431, "step": 7645 }, { "epoch": 15.292, "grad_norm": 0.8527708053588867, "learning_rate": 2e-05, "loss": 0.02158963, "step": 7646 }, { "epoch": 15.294, "grad_norm": 0.7715446352958679, "learning_rate": 2e-05, "loss": 0.02164961, "step": 7647 }, { "epoch": 15.296, "grad_norm": 1.1812517642974854, "learning_rate": 2e-05, "loss": 0.04358544, "step": 7648 }, { "epoch": 15.298, "grad_norm": 1.6417049169540405, "learning_rate": 2e-05, "loss": 0.05505785, "step": 7649 }, { "epoch": 15.3, "grad_norm": 0.9022408723831177, "learning_rate": 2e-05, "loss": 0.02869208, "step": 7650 }, { "epoch": 15.302, "grad_norm": 1.2955352067947388, "learning_rate": 2e-05, "loss": 0.05353476, "step": 7651 }, { "epoch": 15.304, "grad_norm": 2.33793044090271, "learning_rate": 2e-05, "loss": 0.02959348, "step": 7652 }, { "epoch": 15.306, "grad_norm": 1.1095025539398193, "learning_rate": 2e-05, "loss": 0.03602049, "step": 7653 }, { "epoch": 15.308, "grad_norm": 1.6043916940689087, "learning_rate": 2e-05, "loss": 0.03583729, "step": 7654 }, { "epoch": 15.31, "grad_norm": 1.0492905378341675, "learning_rate": 2e-05, "loss": 0.03110175, "step": 7655 }, { "epoch": 15.312, "grad_norm": 1.7085450887680054, "learning_rate": 2e-05, "loss": 0.04954446, "step": 7656 }, { "epoch": 15.314, "grad_norm": 1.9505372047424316, "learning_rate": 2e-05, "loss": 0.03658181, "step": 7657 }, { "epoch": 15.316, "grad_norm": 0.9936202168464661, "learning_rate": 2e-05, "loss": 0.02535901, "step": 7658 }, { "epoch": 15.318, "grad_norm": 1.0207105875015259, "learning_rate": 2e-05, "loss": 0.03539135, "step": 7659 }, { "epoch": 15.32, "grad_norm": 1.5546672344207764, "learning_rate": 2e-05, "loss": 0.04938481, "step": 7660 }, { "epoch": 15.322, "grad_norm": 1.247316837310791, "learning_rate": 2e-05, "loss": 0.03931482, "step": 7661 }, { "epoch": 15.324, "grad_norm": 1.2438799142837524, "learning_rate": 2e-05, "loss": 0.02923568, "step": 7662 }, { "epoch": 15.326, "grad_norm": 1.87889564037323, "learning_rate": 2e-05, "loss": 0.04272472, "step": 7663 }, { "epoch": 15.328, "grad_norm": 1.0760419368743896, "learning_rate": 2e-05, "loss": 0.03573871, "step": 7664 }, { "epoch": 15.33, "grad_norm": 2.030569314956665, "learning_rate": 2e-05, "loss": 0.04339175, "step": 7665 }, { "epoch": 15.332, "grad_norm": 1.7053855657577515, "learning_rate": 2e-05, "loss": 0.04916566, "step": 7666 }, { "epoch": 15.334, "grad_norm": 1.4660038948059082, "learning_rate": 2e-05, "loss": 0.03246354, "step": 7667 }, { "epoch": 15.336, "grad_norm": 1.257335901260376, "learning_rate": 2e-05, "loss": 0.04967248, "step": 7668 }, { "epoch": 15.338, "grad_norm": 1.4573662281036377, "learning_rate": 2e-05, "loss": 0.04021806, "step": 7669 }, { "epoch": 15.34, "grad_norm": 1.271093487739563, "learning_rate": 2e-05, "loss": 0.04290817, "step": 7670 }, { "epoch": 15.342, "grad_norm": 0.9368219971656799, "learning_rate": 2e-05, "loss": 0.0271676, "step": 7671 }, { "epoch": 15.344, "grad_norm": 1.1794713735580444, "learning_rate": 2e-05, "loss": 0.03208122, "step": 7672 }, { "epoch": 15.346, "grad_norm": 0.9926052689552307, "learning_rate": 2e-05, "loss": 0.02915346, "step": 7673 }, { "epoch": 15.348, "grad_norm": 1.1521005630493164, "learning_rate": 2e-05, "loss": 0.03530318, "step": 7674 }, { "epoch": 15.35, "grad_norm": 1.2285836935043335, "learning_rate": 2e-05, "loss": 0.03318744, "step": 7675 }, { "epoch": 15.352, "grad_norm": 2.876605987548828, "learning_rate": 2e-05, "loss": 0.02903116, "step": 7676 }, { "epoch": 15.354, "grad_norm": 2.023113489151001, "learning_rate": 2e-05, "loss": 0.04902362, "step": 7677 }, { "epoch": 15.356, "grad_norm": 1.7338629961013794, "learning_rate": 2e-05, "loss": 0.03628127, "step": 7678 }, { "epoch": 15.358, "grad_norm": 1.0066721439361572, "learning_rate": 2e-05, "loss": 0.0289756, "step": 7679 }, { "epoch": 15.36, "grad_norm": 1.7639458179473877, "learning_rate": 2e-05, "loss": 0.04403928, "step": 7680 }, { "epoch": 15.362, "grad_norm": 1.4575209617614746, "learning_rate": 2e-05, "loss": 0.0353655, "step": 7681 }, { "epoch": 15.364, "grad_norm": 1.1432892084121704, "learning_rate": 2e-05, "loss": 0.03814424, "step": 7682 }, { "epoch": 15.366, "grad_norm": 1.755494236946106, "learning_rate": 2e-05, "loss": 0.0562299, "step": 7683 }, { "epoch": 15.368, "grad_norm": 1.7338649034500122, "learning_rate": 2e-05, "loss": 0.03804263, "step": 7684 }, { "epoch": 15.37, "grad_norm": 2.1905031204223633, "learning_rate": 2e-05, "loss": 0.03826035, "step": 7685 }, { "epoch": 15.372, "grad_norm": 3.7067763805389404, "learning_rate": 2e-05, "loss": 0.05445283, "step": 7686 }, { "epoch": 15.374, "grad_norm": 2.2767043113708496, "learning_rate": 2e-05, "loss": 0.06008461, "step": 7687 }, { "epoch": 15.376, "grad_norm": 1.2579028606414795, "learning_rate": 2e-05, "loss": 0.03845136, "step": 7688 }, { "epoch": 15.378, "grad_norm": 1.5080291032791138, "learning_rate": 2e-05, "loss": 0.04014179, "step": 7689 }, { "epoch": 15.38, "grad_norm": 1.7374647855758667, "learning_rate": 2e-05, "loss": 0.03181672, "step": 7690 }, { "epoch": 15.382, "grad_norm": 1.6790274381637573, "learning_rate": 2e-05, "loss": 0.03847949, "step": 7691 }, { "epoch": 15.384, "grad_norm": 2.135026216506958, "learning_rate": 2e-05, "loss": 0.04469639, "step": 7692 }, { "epoch": 15.386, "grad_norm": 1.4211525917053223, "learning_rate": 2e-05, "loss": 0.04342383, "step": 7693 }, { "epoch": 15.388, "grad_norm": 1.0718973875045776, "learning_rate": 2e-05, "loss": 0.03107191, "step": 7694 }, { "epoch": 15.39, "grad_norm": 0.8555018901824951, "learning_rate": 2e-05, "loss": 0.02420723, "step": 7695 }, { "epoch": 15.392, "grad_norm": 1.2442560195922852, "learning_rate": 2e-05, "loss": 0.04132719, "step": 7696 }, { "epoch": 15.394, "grad_norm": 1.0570979118347168, "learning_rate": 2e-05, "loss": 0.03066903, "step": 7697 }, { "epoch": 15.396, "grad_norm": 1.171759009361267, "learning_rate": 2e-05, "loss": 0.03119994, "step": 7698 }, { "epoch": 15.398, "grad_norm": 1.6701782941818237, "learning_rate": 2e-05, "loss": 0.03852317, "step": 7699 }, { "epoch": 15.4, "grad_norm": 1.8172271251678467, "learning_rate": 2e-05, "loss": 0.02544076, "step": 7700 }, { "epoch": 15.402, "grad_norm": 1.2106478214263916, "learning_rate": 2e-05, "loss": 0.03819906, "step": 7701 }, { "epoch": 15.404, "grad_norm": 2.4221479892730713, "learning_rate": 2e-05, "loss": 0.04959261, "step": 7702 }, { "epoch": 15.406, "grad_norm": 1.6806089878082275, "learning_rate": 2e-05, "loss": 0.04468667, "step": 7703 }, { "epoch": 15.408, "grad_norm": 1.3105989694595337, "learning_rate": 2e-05, "loss": 0.03792799, "step": 7704 }, { "epoch": 15.41, "grad_norm": 1.3647056818008423, "learning_rate": 2e-05, "loss": 0.04311409, "step": 7705 }, { "epoch": 15.412, "grad_norm": 1.6955467462539673, "learning_rate": 2e-05, "loss": 0.04090759, "step": 7706 }, { "epoch": 15.414, "grad_norm": 1.6445177793502808, "learning_rate": 2e-05, "loss": 0.050005, "step": 7707 }, { "epoch": 15.416, "grad_norm": 2.115290403366089, "learning_rate": 2e-05, "loss": 0.04707139, "step": 7708 }, { "epoch": 15.418, "grad_norm": 1.0016669034957886, "learning_rate": 2e-05, "loss": 0.02786621, "step": 7709 }, { "epoch": 15.42, "grad_norm": 1.5075523853302002, "learning_rate": 2e-05, "loss": 0.04682674, "step": 7710 }, { "epoch": 15.422, "grad_norm": 1.5943173170089722, "learning_rate": 2e-05, "loss": 0.0265224, "step": 7711 }, { "epoch": 15.424, "grad_norm": 1.5050104856491089, "learning_rate": 2e-05, "loss": 0.04416554, "step": 7712 }, { "epoch": 15.426, "grad_norm": 0.9919576644897461, "learning_rate": 2e-05, "loss": 0.02424739, "step": 7713 }, { "epoch": 15.428, "grad_norm": 1.0542445182800293, "learning_rate": 2e-05, "loss": 0.03701505, "step": 7714 }, { "epoch": 15.43, "grad_norm": 0.9411125779151917, "learning_rate": 2e-05, "loss": 0.03089548, "step": 7715 }, { "epoch": 15.432, "grad_norm": 1.2335981130599976, "learning_rate": 2e-05, "loss": 0.04131325, "step": 7716 }, { "epoch": 15.434, "grad_norm": 1.460959553718567, "learning_rate": 2e-05, "loss": 0.03338287, "step": 7717 }, { "epoch": 15.436, "grad_norm": 0.9841430187225342, "learning_rate": 2e-05, "loss": 0.03597587, "step": 7718 }, { "epoch": 15.438, "grad_norm": 1.1647802591323853, "learning_rate": 2e-05, "loss": 0.03573506, "step": 7719 }, { "epoch": 15.44, "grad_norm": 1.4322996139526367, "learning_rate": 2e-05, "loss": 0.04496816, "step": 7720 }, { "epoch": 15.442, "grad_norm": 3.0110630989074707, "learning_rate": 2e-05, "loss": 0.04119972, "step": 7721 }, { "epoch": 15.444, "grad_norm": 1.2038313150405884, "learning_rate": 2e-05, "loss": 0.03304677, "step": 7722 }, { "epoch": 15.446, "grad_norm": 1.3562027215957642, "learning_rate": 2e-05, "loss": 0.04422673, "step": 7723 }, { "epoch": 15.448, "grad_norm": 1.345652461051941, "learning_rate": 2e-05, "loss": 0.04346151, "step": 7724 }, { "epoch": 15.45, "grad_norm": 0.9705491662025452, "learning_rate": 2e-05, "loss": 0.02673662, "step": 7725 }, { "epoch": 15.452, "grad_norm": 1.6109822988510132, "learning_rate": 2e-05, "loss": 0.04332538, "step": 7726 }, { "epoch": 15.454, "grad_norm": 1.4140931367874146, "learning_rate": 2e-05, "loss": 0.04497489, "step": 7727 }, { "epoch": 15.456, "grad_norm": 1.2737836837768555, "learning_rate": 2e-05, "loss": 0.03585126, "step": 7728 }, { "epoch": 15.458, "grad_norm": 1.0965650081634521, "learning_rate": 2e-05, "loss": 0.02634549, "step": 7729 }, { "epoch": 15.46, "grad_norm": 1.3999693393707275, "learning_rate": 2e-05, "loss": 0.04551007, "step": 7730 }, { "epoch": 15.462, "grad_norm": 1.013411045074463, "learning_rate": 2e-05, "loss": 0.02215614, "step": 7731 }, { "epoch": 15.464, "grad_norm": 1.3173936605453491, "learning_rate": 2e-05, "loss": 0.04206047, "step": 7732 }, { "epoch": 15.466, "grad_norm": 1.0382493734359741, "learning_rate": 2e-05, "loss": 0.02827413, "step": 7733 }, { "epoch": 15.468, "grad_norm": 2.1286673545837402, "learning_rate": 2e-05, "loss": 0.04167566, "step": 7734 }, { "epoch": 15.47, "grad_norm": 2.1657168865203857, "learning_rate": 2e-05, "loss": 0.04439145, "step": 7735 }, { "epoch": 15.472, "grad_norm": 1.6205779314041138, "learning_rate": 2e-05, "loss": 0.04393222, "step": 7736 }, { "epoch": 15.474, "grad_norm": 1.3515499830245972, "learning_rate": 2e-05, "loss": 0.04231175, "step": 7737 }, { "epoch": 15.475999999999999, "grad_norm": 1.089202880859375, "learning_rate": 2e-05, "loss": 0.02547337, "step": 7738 }, { "epoch": 15.478, "grad_norm": 1.4004868268966675, "learning_rate": 2e-05, "loss": 0.0451882, "step": 7739 }, { "epoch": 15.48, "grad_norm": 1.272910714149475, "learning_rate": 2e-05, "loss": 0.01924961, "step": 7740 }, { "epoch": 15.482, "grad_norm": 1.2643826007843018, "learning_rate": 2e-05, "loss": 0.04131962, "step": 7741 }, { "epoch": 15.484, "grad_norm": 2.1027934551239014, "learning_rate": 2e-05, "loss": 0.06375703, "step": 7742 }, { "epoch": 15.486, "grad_norm": 1.2265628576278687, "learning_rate": 2e-05, "loss": 0.02747299, "step": 7743 }, { "epoch": 15.488, "grad_norm": 2.7004458904266357, "learning_rate": 2e-05, "loss": 0.06089799, "step": 7744 }, { "epoch": 15.49, "grad_norm": 0.9138990044593811, "learning_rate": 2e-05, "loss": 0.02951707, "step": 7745 }, { "epoch": 15.492, "grad_norm": 1.3887770175933838, "learning_rate": 2e-05, "loss": 0.03592655, "step": 7746 }, { "epoch": 15.494, "grad_norm": 1.399113655090332, "learning_rate": 2e-05, "loss": 0.03822273, "step": 7747 }, { "epoch": 15.496, "grad_norm": 1.7515449523925781, "learning_rate": 2e-05, "loss": 0.04199392, "step": 7748 }, { "epoch": 15.498, "grad_norm": 1.4153516292572021, "learning_rate": 2e-05, "loss": 0.04716617, "step": 7749 }, { "epoch": 15.5, "grad_norm": 2.602876901626587, "learning_rate": 2e-05, "loss": 0.04844777, "step": 7750 }, { "epoch": 15.502, "grad_norm": 1.2918318510055542, "learning_rate": 2e-05, "loss": 0.04199758, "step": 7751 }, { "epoch": 15.504, "grad_norm": 0.7387645244598389, "learning_rate": 2e-05, "loss": 0.01926706, "step": 7752 }, { "epoch": 15.506, "grad_norm": 1.028178095817566, "learning_rate": 2e-05, "loss": 0.02961706, "step": 7753 }, { "epoch": 15.508, "grad_norm": 1.2695460319519043, "learning_rate": 2e-05, "loss": 0.03514853, "step": 7754 }, { "epoch": 15.51, "grad_norm": 2.6682016849517822, "learning_rate": 2e-05, "loss": 0.06611276, "step": 7755 }, { "epoch": 15.512, "grad_norm": 1.2907907962799072, "learning_rate": 2e-05, "loss": 0.03901846, "step": 7756 }, { "epoch": 15.514, "grad_norm": 1.2247368097305298, "learning_rate": 2e-05, "loss": 0.03462955, "step": 7757 }, { "epoch": 15.516, "grad_norm": 1.1045002937316895, "learning_rate": 2e-05, "loss": 0.02914516, "step": 7758 }, { "epoch": 15.518, "grad_norm": 1.3329474925994873, "learning_rate": 2e-05, "loss": 0.03299643, "step": 7759 }, { "epoch": 15.52, "grad_norm": 0.9705315828323364, "learning_rate": 2e-05, "loss": 0.0249468, "step": 7760 }, { "epoch": 15.522, "grad_norm": 1.3083375692367554, "learning_rate": 2e-05, "loss": 0.04079135, "step": 7761 }, { "epoch": 15.524000000000001, "grad_norm": 1.0932661294937134, "learning_rate": 2e-05, "loss": 0.03963382, "step": 7762 }, { "epoch": 15.526, "grad_norm": 1.1822131872177124, "learning_rate": 2e-05, "loss": 0.03383718, "step": 7763 }, { "epoch": 15.528, "grad_norm": 1.1285481452941895, "learning_rate": 2e-05, "loss": 0.0485891, "step": 7764 }, { "epoch": 15.53, "grad_norm": 2.6022520065307617, "learning_rate": 2e-05, "loss": 0.04467677, "step": 7765 }, { "epoch": 15.532, "grad_norm": 2.1171436309814453, "learning_rate": 2e-05, "loss": 0.05000626, "step": 7766 }, { "epoch": 15.534, "grad_norm": 0.948603093624115, "learning_rate": 2e-05, "loss": 0.02811343, "step": 7767 }, { "epoch": 15.536, "grad_norm": 3.1038739681243896, "learning_rate": 2e-05, "loss": 0.04482667, "step": 7768 }, { "epoch": 15.538, "grad_norm": 1.6356004476547241, "learning_rate": 2e-05, "loss": 0.05364137, "step": 7769 }, { "epoch": 15.54, "grad_norm": 1.1389317512512207, "learning_rate": 2e-05, "loss": 0.03385459, "step": 7770 }, { "epoch": 15.542, "grad_norm": 1.53994619846344, "learning_rate": 2e-05, "loss": 0.04406751, "step": 7771 }, { "epoch": 15.544, "grad_norm": 1.5611374378204346, "learning_rate": 2e-05, "loss": 0.0473385, "step": 7772 }, { "epoch": 15.546, "grad_norm": 1.0507432222366333, "learning_rate": 2e-05, "loss": 0.02707745, "step": 7773 }, { "epoch": 15.548, "grad_norm": 1.1123173236846924, "learning_rate": 2e-05, "loss": 0.03449173, "step": 7774 }, { "epoch": 15.55, "grad_norm": 1.3029831647872925, "learning_rate": 2e-05, "loss": 0.03552601, "step": 7775 }, { "epoch": 15.552, "grad_norm": 1.6921583414077759, "learning_rate": 2e-05, "loss": 0.05067451, "step": 7776 }, { "epoch": 15.554, "grad_norm": 1.275075078010559, "learning_rate": 2e-05, "loss": 0.02784466, "step": 7777 }, { "epoch": 15.556000000000001, "grad_norm": 1.4142465591430664, "learning_rate": 2e-05, "loss": 0.04801814, "step": 7778 }, { "epoch": 15.558, "grad_norm": 1.0732700824737549, "learning_rate": 2e-05, "loss": 0.03766344, "step": 7779 }, { "epoch": 15.56, "grad_norm": 1.280580997467041, "learning_rate": 2e-05, "loss": 0.03040556, "step": 7780 }, { "epoch": 15.562, "grad_norm": 1.8353358507156372, "learning_rate": 2e-05, "loss": 0.05565303, "step": 7781 }, { "epoch": 15.564, "grad_norm": 1.4312429428100586, "learning_rate": 2e-05, "loss": 0.03630222, "step": 7782 }, { "epoch": 15.566, "grad_norm": 0.9180557131767273, "learning_rate": 2e-05, "loss": 0.03424197, "step": 7783 }, { "epoch": 15.568, "grad_norm": 1.8518563508987427, "learning_rate": 2e-05, "loss": 0.05018555, "step": 7784 }, { "epoch": 15.57, "grad_norm": 1.6587504148483276, "learning_rate": 2e-05, "loss": 0.05404566, "step": 7785 }, { "epoch": 15.572, "grad_norm": 1.2998889684677124, "learning_rate": 2e-05, "loss": 0.03402172, "step": 7786 }, { "epoch": 15.574, "grad_norm": 1.43665611743927, "learning_rate": 2e-05, "loss": 0.03349872, "step": 7787 }, { "epoch": 15.576, "grad_norm": 1.3342171907424927, "learning_rate": 2e-05, "loss": 0.04050234, "step": 7788 }, { "epoch": 15.578, "grad_norm": 1.462536096572876, "learning_rate": 2e-05, "loss": 0.03564973, "step": 7789 }, { "epoch": 15.58, "grad_norm": 1.421918511390686, "learning_rate": 2e-05, "loss": 0.02840041, "step": 7790 }, { "epoch": 15.582, "grad_norm": 2.16733980178833, "learning_rate": 2e-05, "loss": 0.04823796, "step": 7791 }, { "epoch": 15.584, "grad_norm": 1.9504356384277344, "learning_rate": 2e-05, "loss": 0.04157623, "step": 7792 }, { "epoch": 15.586, "grad_norm": 2.9325337409973145, "learning_rate": 2e-05, "loss": 0.03892434, "step": 7793 }, { "epoch": 15.588, "grad_norm": 1.5955098867416382, "learning_rate": 2e-05, "loss": 0.04307658, "step": 7794 }, { "epoch": 15.59, "grad_norm": 1.1761583089828491, "learning_rate": 2e-05, "loss": 0.03487573, "step": 7795 }, { "epoch": 15.592, "grad_norm": 1.8221702575683594, "learning_rate": 2e-05, "loss": 0.05856226, "step": 7796 }, { "epoch": 15.594, "grad_norm": 1.3372985124588013, "learning_rate": 2e-05, "loss": 0.04135559, "step": 7797 }, { "epoch": 15.596, "grad_norm": 1.5400853157043457, "learning_rate": 2e-05, "loss": 0.04417802, "step": 7798 }, { "epoch": 15.598, "grad_norm": 1.4287244081497192, "learning_rate": 2e-05, "loss": 0.03800631, "step": 7799 }, { "epoch": 15.6, "grad_norm": 1.5493749380111694, "learning_rate": 2e-05, "loss": 0.03243243, "step": 7800 }, { "epoch": 15.602, "grad_norm": 1.8770220279693604, "learning_rate": 2e-05, "loss": 0.04838708, "step": 7801 }, { "epoch": 15.604, "grad_norm": 1.8443878889083862, "learning_rate": 2e-05, "loss": 0.04210856, "step": 7802 }, { "epoch": 15.606, "grad_norm": 1.1599087715148926, "learning_rate": 2e-05, "loss": 0.03727244, "step": 7803 }, { "epoch": 15.608, "grad_norm": 0.8143905997276306, "learning_rate": 2e-05, "loss": 0.02723217, "step": 7804 }, { "epoch": 15.61, "grad_norm": 1.514428734779358, "learning_rate": 2e-05, "loss": 0.04333169, "step": 7805 }, { "epoch": 15.612, "grad_norm": 1.0530762672424316, "learning_rate": 2e-05, "loss": 0.02669099, "step": 7806 }, { "epoch": 15.614, "grad_norm": 1.2812447547912598, "learning_rate": 2e-05, "loss": 0.03541653, "step": 7807 }, { "epoch": 15.616, "grad_norm": 0.8917434215545654, "learning_rate": 2e-05, "loss": 0.02033772, "step": 7808 }, { "epoch": 15.618, "grad_norm": 1.388106346130371, "learning_rate": 2e-05, "loss": 0.0356602, "step": 7809 }, { "epoch": 15.62, "grad_norm": 1.629955530166626, "learning_rate": 2e-05, "loss": 0.04595727, "step": 7810 }, { "epoch": 15.622, "grad_norm": 1.964969277381897, "learning_rate": 2e-05, "loss": 0.04757115, "step": 7811 }, { "epoch": 15.624, "grad_norm": 1.00034761428833, "learning_rate": 2e-05, "loss": 0.02988995, "step": 7812 }, { "epoch": 15.626, "grad_norm": 1.4420907497406006, "learning_rate": 2e-05, "loss": 0.03713204, "step": 7813 }, { "epoch": 15.628, "grad_norm": 1.7254669666290283, "learning_rate": 2e-05, "loss": 0.05012753, "step": 7814 }, { "epoch": 15.63, "grad_norm": 1.617090106010437, "learning_rate": 2e-05, "loss": 0.04258318, "step": 7815 }, { "epoch": 15.632, "grad_norm": 1.8159074783325195, "learning_rate": 2e-05, "loss": 0.0533081, "step": 7816 }, { "epoch": 15.634, "grad_norm": 1.0557678937911987, "learning_rate": 2e-05, "loss": 0.03816307, "step": 7817 }, { "epoch": 15.636, "grad_norm": 1.1911511421203613, "learning_rate": 2e-05, "loss": 0.02825535, "step": 7818 }, { "epoch": 15.638, "grad_norm": 1.1290611028671265, "learning_rate": 2e-05, "loss": 0.03365977, "step": 7819 }, { "epoch": 15.64, "grad_norm": 1.4087905883789062, "learning_rate": 2e-05, "loss": 0.03113106, "step": 7820 }, { "epoch": 15.642, "grad_norm": 1.3932369947433472, "learning_rate": 2e-05, "loss": 0.03929411, "step": 7821 }, { "epoch": 15.644, "grad_norm": 1.611405611038208, "learning_rate": 2e-05, "loss": 0.05581418, "step": 7822 }, { "epoch": 15.646, "grad_norm": 1.394471526145935, "learning_rate": 2e-05, "loss": 0.03460547, "step": 7823 }, { "epoch": 15.648, "grad_norm": 1.114952564239502, "learning_rate": 2e-05, "loss": 0.03128414, "step": 7824 }, { "epoch": 15.65, "grad_norm": 1.2153035402297974, "learning_rate": 2e-05, "loss": 0.03026392, "step": 7825 }, { "epoch": 15.652, "grad_norm": 1.245772361755371, "learning_rate": 2e-05, "loss": 0.04067977, "step": 7826 }, { "epoch": 15.654, "grad_norm": 1.2498624324798584, "learning_rate": 2e-05, "loss": 0.0399544, "step": 7827 }, { "epoch": 15.656, "grad_norm": 1.4734687805175781, "learning_rate": 2e-05, "loss": 0.0403645, "step": 7828 }, { "epoch": 15.658, "grad_norm": 1.184058666229248, "learning_rate": 2e-05, "loss": 0.03515352, "step": 7829 }, { "epoch": 15.66, "grad_norm": 1.6777468919754028, "learning_rate": 2e-05, "loss": 0.04460485, "step": 7830 }, { "epoch": 15.662, "grad_norm": 1.869699478149414, "learning_rate": 2e-05, "loss": 0.03454949, "step": 7831 }, { "epoch": 15.664, "grad_norm": 1.1511545181274414, "learning_rate": 2e-05, "loss": 0.03649542, "step": 7832 }, { "epoch": 15.666, "grad_norm": 1.5351507663726807, "learning_rate": 2e-05, "loss": 0.03351032, "step": 7833 }, { "epoch": 15.668, "grad_norm": 1.0590790510177612, "learning_rate": 2e-05, "loss": 0.04073928, "step": 7834 }, { "epoch": 15.67, "grad_norm": 1.9279193878173828, "learning_rate": 2e-05, "loss": 0.05023941, "step": 7835 }, { "epoch": 15.672, "grad_norm": 1.389474630355835, "learning_rate": 2e-05, "loss": 0.0375604, "step": 7836 }, { "epoch": 15.674, "grad_norm": 1.5739963054656982, "learning_rate": 2e-05, "loss": 0.05597922, "step": 7837 }, { "epoch": 15.676, "grad_norm": 2.872504472732544, "learning_rate": 2e-05, "loss": 0.03245638, "step": 7838 }, { "epoch": 15.678, "grad_norm": 1.1751714944839478, "learning_rate": 2e-05, "loss": 0.04733478, "step": 7839 }, { "epoch": 15.68, "grad_norm": 1.265234351158142, "learning_rate": 2e-05, "loss": 0.04258277, "step": 7840 }, { "epoch": 15.682, "grad_norm": 1.1896003484725952, "learning_rate": 2e-05, "loss": 0.03169346, "step": 7841 }, { "epoch": 15.684, "grad_norm": 1.7013142108917236, "learning_rate": 2e-05, "loss": 0.05300517, "step": 7842 }, { "epoch": 15.686, "grad_norm": 1.666005253791809, "learning_rate": 2e-05, "loss": 0.04027229, "step": 7843 }, { "epoch": 15.688, "grad_norm": 1.2857798337936401, "learning_rate": 2e-05, "loss": 0.02931785, "step": 7844 }, { "epoch": 15.69, "grad_norm": 1.989986777305603, "learning_rate": 2e-05, "loss": 0.06056058, "step": 7845 }, { "epoch": 15.692, "grad_norm": 1.0686649084091187, "learning_rate": 2e-05, "loss": 0.03009918, "step": 7846 }, { "epoch": 15.693999999999999, "grad_norm": 1.2238051891326904, "learning_rate": 2e-05, "loss": 0.03506152, "step": 7847 }, { "epoch": 15.696, "grad_norm": 1.7078624963760376, "learning_rate": 2e-05, "loss": 0.04438978, "step": 7848 }, { "epoch": 15.698, "grad_norm": 2.4543190002441406, "learning_rate": 2e-05, "loss": 0.03001017, "step": 7849 }, { "epoch": 15.7, "grad_norm": 1.3647072315216064, "learning_rate": 2e-05, "loss": 0.04168668, "step": 7850 }, { "epoch": 15.702, "grad_norm": 1.3066121339797974, "learning_rate": 2e-05, "loss": 0.04153648, "step": 7851 }, { "epoch": 15.704, "grad_norm": 1.0811482667922974, "learning_rate": 2e-05, "loss": 0.03107862, "step": 7852 }, { "epoch": 15.706, "grad_norm": 3.0416204929351807, "learning_rate": 2e-05, "loss": 0.05520962, "step": 7853 }, { "epoch": 15.708, "grad_norm": 2.0119881629943848, "learning_rate": 2e-05, "loss": 0.03293385, "step": 7854 }, { "epoch": 15.71, "grad_norm": 1.3695378303527832, "learning_rate": 2e-05, "loss": 0.03456626, "step": 7855 }, { "epoch": 15.712, "grad_norm": 1.410203218460083, "learning_rate": 2e-05, "loss": 0.0467374, "step": 7856 }, { "epoch": 15.714, "grad_norm": 0.9683268666267395, "learning_rate": 2e-05, "loss": 0.02218588, "step": 7857 }, { "epoch": 15.716, "grad_norm": 1.1725350618362427, "learning_rate": 2e-05, "loss": 0.0399307, "step": 7858 }, { "epoch": 15.718, "grad_norm": 1.156903862953186, "learning_rate": 2e-05, "loss": 0.04084864, "step": 7859 }, { "epoch": 15.72, "grad_norm": 1.099974274635315, "learning_rate": 2e-05, "loss": 0.03805087, "step": 7860 }, { "epoch": 15.722, "grad_norm": 0.9585137367248535, "learning_rate": 2e-05, "loss": 0.02989551, "step": 7861 }, { "epoch": 15.724, "grad_norm": 0.9844270348548889, "learning_rate": 2e-05, "loss": 0.02908732, "step": 7862 }, { "epoch": 15.725999999999999, "grad_norm": 1.854518175125122, "learning_rate": 2e-05, "loss": 0.04460219, "step": 7863 }, { "epoch": 15.728, "grad_norm": 1.2902519702911377, "learning_rate": 2e-05, "loss": 0.0421297, "step": 7864 }, { "epoch": 15.73, "grad_norm": 0.9230936169624329, "learning_rate": 2e-05, "loss": 0.02882043, "step": 7865 }, { "epoch": 15.732, "grad_norm": 1.5599287748336792, "learning_rate": 2e-05, "loss": 0.03928513, "step": 7866 }, { "epoch": 15.734, "grad_norm": 1.1548335552215576, "learning_rate": 2e-05, "loss": 0.03820485, "step": 7867 }, { "epoch": 15.736, "grad_norm": 1.2006397247314453, "learning_rate": 2e-05, "loss": 0.02806983, "step": 7868 }, { "epoch": 15.738, "grad_norm": 2.920689582824707, "learning_rate": 2e-05, "loss": 0.0410319, "step": 7869 }, { "epoch": 15.74, "grad_norm": 1.2876091003417969, "learning_rate": 2e-05, "loss": 0.03893904, "step": 7870 }, { "epoch": 15.742, "grad_norm": 1.2860698699951172, "learning_rate": 2e-05, "loss": 0.03412844, "step": 7871 }, { "epoch": 15.744, "grad_norm": 1.402205467224121, "learning_rate": 2e-05, "loss": 0.04057181, "step": 7872 }, { "epoch": 15.746, "grad_norm": 1.643515706062317, "learning_rate": 2e-05, "loss": 0.03522312, "step": 7873 }, { "epoch": 15.748, "grad_norm": 1.7808668613433838, "learning_rate": 2e-05, "loss": 0.051851, "step": 7874 }, { "epoch": 15.75, "grad_norm": 1.259395956993103, "learning_rate": 2e-05, "loss": 0.03302209, "step": 7875 }, { "epoch": 15.752, "grad_norm": 1.9360871315002441, "learning_rate": 2e-05, "loss": 0.03717834, "step": 7876 }, { "epoch": 15.754, "grad_norm": 1.372341513633728, "learning_rate": 2e-05, "loss": 0.04391986, "step": 7877 }, { "epoch": 15.756, "grad_norm": 2.2007229328155518, "learning_rate": 2e-05, "loss": 0.03659112, "step": 7878 }, { "epoch": 15.758, "grad_norm": 2.7030446529388428, "learning_rate": 2e-05, "loss": 0.05342212, "step": 7879 }, { "epoch": 15.76, "grad_norm": 1.5437792539596558, "learning_rate": 2e-05, "loss": 0.03884444, "step": 7880 }, { "epoch": 15.762, "grad_norm": 0.9608680605888367, "learning_rate": 2e-05, "loss": 0.0303247, "step": 7881 }, { "epoch": 15.764, "grad_norm": 1.3708651065826416, "learning_rate": 2e-05, "loss": 0.03259217, "step": 7882 }, { "epoch": 15.766, "grad_norm": 1.9985793828964233, "learning_rate": 2e-05, "loss": 0.04025662, "step": 7883 }, { "epoch": 15.768, "grad_norm": 1.3265514373779297, "learning_rate": 2e-05, "loss": 0.04500604, "step": 7884 }, { "epoch": 15.77, "grad_norm": 1.3119704723358154, "learning_rate": 2e-05, "loss": 0.03770127, "step": 7885 }, { "epoch": 15.772, "grad_norm": 1.0361597537994385, "learning_rate": 2e-05, "loss": 0.02482234, "step": 7886 }, { "epoch": 15.774000000000001, "grad_norm": 1.0766968727111816, "learning_rate": 2e-05, "loss": 0.03262966, "step": 7887 }, { "epoch": 15.776, "grad_norm": 1.372813105583191, "learning_rate": 2e-05, "loss": 0.04390799, "step": 7888 }, { "epoch": 15.778, "grad_norm": 1.996690273284912, "learning_rate": 2e-05, "loss": 0.04705422, "step": 7889 }, { "epoch": 15.78, "grad_norm": 1.3125027418136597, "learning_rate": 2e-05, "loss": 0.05340045, "step": 7890 }, { "epoch": 15.782, "grad_norm": 1.0259758234024048, "learning_rate": 2e-05, "loss": 0.03187435, "step": 7891 }, { "epoch": 15.784, "grad_norm": 1.176693320274353, "learning_rate": 2e-05, "loss": 0.03257523, "step": 7892 }, { "epoch": 15.786, "grad_norm": 0.9599425196647644, "learning_rate": 2e-05, "loss": 0.02640433, "step": 7893 }, { "epoch": 15.788, "grad_norm": 1.4598140716552734, "learning_rate": 2e-05, "loss": 0.03303267, "step": 7894 }, { "epoch": 15.79, "grad_norm": 1.3222160339355469, "learning_rate": 2e-05, "loss": 0.03670667, "step": 7895 }, { "epoch": 15.792, "grad_norm": 0.9463991522789001, "learning_rate": 2e-05, "loss": 0.02554088, "step": 7896 }, { "epoch": 15.794, "grad_norm": 1.3123785257339478, "learning_rate": 2e-05, "loss": 0.0461954, "step": 7897 }, { "epoch": 15.796, "grad_norm": 1.9543800354003906, "learning_rate": 2e-05, "loss": 0.0520162, "step": 7898 }, { "epoch": 15.798, "grad_norm": 1.3964611291885376, "learning_rate": 2e-05, "loss": 0.03359364, "step": 7899 }, { "epoch": 15.8, "grad_norm": 1.7428091764450073, "learning_rate": 2e-05, "loss": 0.04074088, "step": 7900 }, { "epoch": 15.802, "grad_norm": 1.9242103099822998, "learning_rate": 2e-05, "loss": 0.02629791, "step": 7901 }, { "epoch": 15.804, "grad_norm": 2.1996188163757324, "learning_rate": 2e-05, "loss": 0.04311199, "step": 7902 }, { "epoch": 15.806000000000001, "grad_norm": 1.1592961549758911, "learning_rate": 2e-05, "loss": 0.0427461, "step": 7903 }, { "epoch": 15.808, "grad_norm": 1.3940585851669312, "learning_rate": 2e-05, "loss": 0.04456202, "step": 7904 }, { "epoch": 15.81, "grad_norm": 1.175157070159912, "learning_rate": 2e-05, "loss": 0.02939461, "step": 7905 }, { "epoch": 15.812, "grad_norm": 1.855078935623169, "learning_rate": 2e-05, "loss": 0.06233601, "step": 7906 }, { "epoch": 15.814, "grad_norm": 1.4420384168624878, "learning_rate": 2e-05, "loss": 0.04433277, "step": 7907 }, { "epoch": 15.816, "grad_norm": 1.6261996030807495, "learning_rate": 2e-05, "loss": 0.05150939, "step": 7908 }, { "epoch": 15.818, "grad_norm": 0.9924352765083313, "learning_rate": 2e-05, "loss": 0.03078244, "step": 7909 }, { "epoch": 15.82, "grad_norm": 1.1630253791809082, "learning_rate": 2e-05, "loss": 0.02466009, "step": 7910 }, { "epoch": 15.822, "grad_norm": 1.1106728315353394, "learning_rate": 2e-05, "loss": 0.04763328, "step": 7911 }, { "epoch": 15.824, "grad_norm": 1.9522145986557007, "learning_rate": 2e-05, "loss": 0.04942587, "step": 7912 }, { "epoch": 15.826, "grad_norm": 0.9739556312561035, "learning_rate": 2e-05, "loss": 0.03213318, "step": 7913 }, { "epoch": 15.828, "grad_norm": 2.2910735607147217, "learning_rate": 2e-05, "loss": 0.04557914, "step": 7914 }, { "epoch": 15.83, "grad_norm": 2.2116222381591797, "learning_rate": 2e-05, "loss": 0.04482364, "step": 7915 }, { "epoch": 15.832, "grad_norm": 1.637697696685791, "learning_rate": 2e-05, "loss": 0.03125627, "step": 7916 }, { "epoch": 15.834, "grad_norm": 1.0995755195617676, "learning_rate": 2e-05, "loss": 0.03378412, "step": 7917 }, { "epoch": 15.836, "grad_norm": 2.1459877490997314, "learning_rate": 2e-05, "loss": 0.03629637, "step": 7918 }, { "epoch": 15.838, "grad_norm": 1.2889842987060547, "learning_rate": 2e-05, "loss": 0.04306239, "step": 7919 }, { "epoch": 15.84, "grad_norm": 4.523320198059082, "learning_rate": 2e-05, "loss": 0.04296661, "step": 7920 }, { "epoch": 15.842, "grad_norm": 1.7894271612167358, "learning_rate": 2e-05, "loss": 0.03566499, "step": 7921 }, { "epoch": 15.844, "grad_norm": 1.401320219039917, "learning_rate": 2e-05, "loss": 0.02614049, "step": 7922 }, { "epoch": 15.846, "grad_norm": 1.4759875535964966, "learning_rate": 2e-05, "loss": 0.02819363, "step": 7923 }, { "epoch": 15.848, "grad_norm": 1.3998092412948608, "learning_rate": 2e-05, "loss": 0.03906362, "step": 7924 }, { "epoch": 15.85, "grad_norm": 1.1265228986740112, "learning_rate": 2e-05, "loss": 0.03560297, "step": 7925 }, { "epoch": 15.852, "grad_norm": 2.067176342010498, "learning_rate": 2e-05, "loss": 0.04355159, "step": 7926 }, { "epoch": 15.854, "grad_norm": 1.7233660221099854, "learning_rate": 2e-05, "loss": 0.029684, "step": 7927 }, { "epoch": 15.856, "grad_norm": 2.1673214435577393, "learning_rate": 2e-05, "loss": 0.04684749, "step": 7928 }, { "epoch": 15.858, "grad_norm": 3.047663450241089, "learning_rate": 2e-05, "loss": 0.04030456, "step": 7929 }, { "epoch": 15.86, "grad_norm": 1.677051305770874, "learning_rate": 2e-05, "loss": 0.03150005, "step": 7930 }, { "epoch": 15.862, "grad_norm": 1.5520917177200317, "learning_rate": 2e-05, "loss": 0.03935989, "step": 7931 }, { "epoch": 15.864, "grad_norm": 2.1706817150115967, "learning_rate": 2e-05, "loss": 0.03663088, "step": 7932 }, { "epoch": 15.866, "grad_norm": 1.029218316078186, "learning_rate": 2e-05, "loss": 0.03442633, "step": 7933 }, { "epoch": 15.868, "grad_norm": 0.9123724699020386, "learning_rate": 2e-05, "loss": 0.02511028, "step": 7934 }, { "epoch": 15.87, "grad_norm": 1.1108697652816772, "learning_rate": 2e-05, "loss": 0.04436678, "step": 7935 }, { "epoch": 15.872, "grad_norm": 1.1441642045974731, "learning_rate": 2e-05, "loss": 0.03152635, "step": 7936 }, { "epoch": 15.874, "grad_norm": 0.9864327907562256, "learning_rate": 2e-05, "loss": 0.02202858, "step": 7937 }, { "epoch": 15.876, "grad_norm": 0.9307705760002136, "learning_rate": 2e-05, "loss": 0.02411098, "step": 7938 }, { "epoch": 15.878, "grad_norm": 1.2462923526763916, "learning_rate": 2e-05, "loss": 0.03807527, "step": 7939 }, { "epoch": 15.88, "grad_norm": 0.9074599146842957, "learning_rate": 2e-05, "loss": 0.02559845, "step": 7940 }, { "epoch": 15.882, "grad_norm": 1.3017256259918213, "learning_rate": 2e-05, "loss": 0.04016859, "step": 7941 }, { "epoch": 15.884, "grad_norm": 2.846317768096924, "learning_rate": 2e-05, "loss": 0.0447731, "step": 7942 }, { "epoch": 15.886, "grad_norm": 2.893179416656494, "learning_rate": 2e-05, "loss": 0.05213889, "step": 7943 }, { "epoch": 15.888, "grad_norm": 1.6494814157485962, "learning_rate": 2e-05, "loss": 0.0391501, "step": 7944 }, { "epoch": 15.89, "grad_norm": 1.257524847984314, "learning_rate": 2e-05, "loss": 0.05023734, "step": 7945 }, { "epoch": 15.892, "grad_norm": 1.13717782497406, "learning_rate": 2e-05, "loss": 0.0257063, "step": 7946 }, { "epoch": 15.894, "grad_norm": 1.18727707862854, "learning_rate": 2e-05, "loss": 0.03686336, "step": 7947 }, { "epoch": 15.896, "grad_norm": 1.3855704069137573, "learning_rate": 2e-05, "loss": 0.03288057, "step": 7948 }, { "epoch": 15.898, "grad_norm": 1.4399868249893188, "learning_rate": 2e-05, "loss": 0.04080542, "step": 7949 }, { "epoch": 15.9, "grad_norm": 1.5103329420089722, "learning_rate": 2e-05, "loss": 0.04284345, "step": 7950 }, { "epoch": 15.902, "grad_norm": 1.8133221864700317, "learning_rate": 2e-05, "loss": 0.04338891, "step": 7951 }, { "epoch": 15.904, "grad_norm": 1.0757697820663452, "learning_rate": 2e-05, "loss": 0.03184106, "step": 7952 }, { "epoch": 15.906, "grad_norm": 1.2824019193649292, "learning_rate": 2e-05, "loss": 0.03566642, "step": 7953 }, { "epoch": 15.908, "grad_norm": 1.087025761604309, "learning_rate": 2e-05, "loss": 0.02856821, "step": 7954 }, { "epoch": 15.91, "grad_norm": 3.879120349884033, "learning_rate": 2e-05, "loss": 0.0441326, "step": 7955 }, { "epoch": 15.912, "grad_norm": 1.081153392791748, "learning_rate": 2e-05, "loss": 0.04057573, "step": 7956 }, { "epoch": 15.914, "grad_norm": 2.346388578414917, "learning_rate": 2e-05, "loss": 0.04326249, "step": 7957 }, { "epoch": 15.916, "grad_norm": 1.8399007320404053, "learning_rate": 2e-05, "loss": 0.05309239, "step": 7958 }, { "epoch": 15.918, "grad_norm": 1.1926047801971436, "learning_rate": 2e-05, "loss": 0.02422775, "step": 7959 }, { "epoch": 15.92, "grad_norm": 2.166907787322998, "learning_rate": 2e-05, "loss": 0.03924531, "step": 7960 }, { "epoch": 15.922, "grad_norm": 2.182678461074829, "learning_rate": 2e-05, "loss": 0.03237635, "step": 7961 }, { "epoch": 15.924, "grad_norm": 1.284359335899353, "learning_rate": 2e-05, "loss": 0.03156743, "step": 7962 }, { "epoch": 15.926, "grad_norm": 2.093848705291748, "learning_rate": 2e-05, "loss": 0.05261733, "step": 7963 }, { "epoch": 15.928, "grad_norm": 2.00311541557312, "learning_rate": 2e-05, "loss": 0.03726339, "step": 7964 }, { "epoch": 15.93, "grad_norm": 1.8436921834945679, "learning_rate": 2e-05, "loss": 0.05885259, "step": 7965 }, { "epoch": 15.932, "grad_norm": 1.5446103811264038, "learning_rate": 2e-05, "loss": 0.0387977, "step": 7966 }, { "epoch": 15.934, "grad_norm": 1.519590973854065, "learning_rate": 2e-05, "loss": 0.03424424, "step": 7967 }, { "epoch": 15.936, "grad_norm": 1.6442177295684814, "learning_rate": 2e-05, "loss": 0.05894388, "step": 7968 }, { "epoch": 15.938, "grad_norm": 2.041292905807495, "learning_rate": 2e-05, "loss": 0.04844762, "step": 7969 }, { "epoch": 15.94, "grad_norm": 1.1668561697006226, "learning_rate": 2e-05, "loss": 0.03171788, "step": 7970 }, { "epoch": 15.942, "grad_norm": 2.4591891765594482, "learning_rate": 2e-05, "loss": 0.05597981, "step": 7971 }, { "epoch": 15.943999999999999, "grad_norm": 1.7161046266555786, "learning_rate": 2e-05, "loss": 0.04831922, "step": 7972 }, { "epoch": 15.946, "grad_norm": 1.3476585149765015, "learning_rate": 2e-05, "loss": 0.03861203, "step": 7973 }, { "epoch": 15.948, "grad_norm": 1.5570892095565796, "learning_rate": 2e-05, "loss": 0.04717612, "step": 7974 }, { "epoch": 15.95, "grad_norm": 2.5573222637176514, "learning_rate": 2e-05, "loss": 0.04429121, "step": 7975 }, { "epoch": 15.952, "grad_norm": 1.9069948196411133, "learning_rate": 2e-05, "loss": 0.04974307, "step": 7976 }, { "epoch": 15.954, "grad_norm": 3.160170316696167, "learning_rate": 2e-05, "loss": 0.03945491, "step": 7977 }, { "epoch": 15.956, "grad_norm": 1.0503807067871094, "learning_rate": 2e-05, "loss": 0.03511816, "step": 7978 }, { "epoch": 15.958, "grad_norm": 1.0775737762451172, "learning_rate": 2e-05, "loss": 0.03104691, "step": 7979 }, { "epoch": 15.96, "grad_norm": 1.1653412580490112, "learning_rate": 2e-05, "loss": 0.04440337, "step": 7980 }, { "epoch": 15.962, "grad_norm": 1.088532567024231, "learning_rate": 2e-05, "loss": 0.04312788, "step": 7981 }, { "epoch": 15.964, "grad_norm": 1.1584064960479736, "learning_rate": 2e-05, "loss": 0.02283434, "step": 7982 }, { "epoch": 15.966, "grad_norm": 1.043766975402832, "learning_rate": 2e-05, "loss": 0.02896805, "step": 7983 }, { "epoch": 15.968, "grad_norm": 0.9442967176437378, "learning_rate": 2e-05, "loss": 0.02886542, "step": 7984 }, { "epoch": 15.97, "grad_norm": 1.3705523014068604, "learning_rate": 2e-05, "loss": 0.04226013, "step": 7985 }, { "epoch": 15.972, "grad_norm": 3.2384965419769287, "learning_rate": 2e-05, "loss": 0.05349871, "step": 7986 }, { "epoch": 15.974, "grad_norm": 1.393133282661438, "learning_rate": 2e-05, "loss": 0.02859312, "step": 7987 }, { "epoch": 15.975999999999999, "grad_norm": 1.0506641864776611, "learning_rate": 2e-05, "loss": 0.03882669, "step": 7988 }, { "epoch": 15.978, "grad_norm": 1.3604516983032227, "learning_rate": 2e-05, "loss": 0.04398809, "step": 7989 }, { "epoch": 15.98, "grad_norm": 1.3526203632354736, "learning_rate": 2e-05, "loss": 0.03669821, "step": 7990 }, { "epoch": 15.982, "grad_norm": 1.1345425844192505, "learning_rate": 2e-05, "loss": 0.03266115, "step": 7991 }, { "epoch": 15.984, "grad_norm": 1.23661470413208, "learning_rate": 2e-05, "loss": 0.02663805, "step": 7992 }, { "epoch": 15.986, "grad_norm": 2.231520652770996, "learning_rate": 2e-05, "loss": 0.034388, "step": 7993 }, { "epoch": 15.988, "grad_norm": 1.1724952459335327, "learning_rate": 2e-05, "loss": 0.0357694, "step": 7994 }, { "epoch": 15.99, "grad_norm": 1.698375940322876, "learning_rate": 2e-05, "loss": 0.04616644, "step": 7995 }, { "epoch": 15.992, "grad_norm": 1.5685293674468994, "learning_rate": 2e-05, "loss": 0.03160156, "step": 7996 }, { "epoch": 15.994, "grad_norm": 2.4769506454467773, "learning_rate": 2e-05, "loss": 0.04571313, "step": 7997 }, { "epoch": 15.996, "grad_norm": 1.7058403491973877, "learning_rate": 2e-05, "loss": 0.03262362, "step": 7998 }, { "epoch": 15.998, "grad_norm": 1.0402840375900269, "learning_rate": 2e-05, "loss": 0.02683425, "step": 7999 }, { "epoch": 16.0, "grad_norm": 1.4686156511306763, "learning_rate": 2e-05, "loss": 0.03669425, "step": 8000 }, { "epoch": 16.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9481037924151696, "Equal_1": 0.994, "Equal_2": 0.9660678642714571, "Equal_3": 0.8902195608782435, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.986, "Perpendicular_1": 0.998, "Perpendicular_2": 0.982, "Perpendicular_3": 0.7274549098196392, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9996666666666667, "PointLiesOnCircle_3": 0.9912666666666666, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9780439121756487 }, "eval_runtime": 319.9457, "eval_samples_per_second": 32.818, "eval_steps_per_second": 0.656, "step": 8000 }, { "epoch": 16.002, "grad_norm": 1.1055775880813599, "learning_rate": 2e-05, "loss": 0.0335177, "step": 8001 }, { "epoch": 16.004, "grad_norm": 1.2921119928359985, "learning_rate": 2e-05, "loss": 0.02823064, "step": 8002 }, { "epoch": 16.006, "grad_norm": 2.824982166290283, "learning_rate": 2e-05, "loss": 0.02834114, "step": 8003 }, { "epoch": 16.008, "grad_norm": 2.268012523651123, "learning_rate": 2e-05, "loss": 0.03194273, "step": 8004 }, { "epoch": 16.01, "grad_norm": 1.4545838832855225, "learning_rate": 2e-05, "loss": 0.02953038, "step": 8005 }, { "epoch": 16.012, "grad_norm": 1.845914363861084, "learning_rate": 2e-05, "loss": 0.03039683, "step": 8006 }, { "epoch": 16.014, "grad_norm": 1.2593659162521362, "learning_rate": 2e-05, "loss": 0.03985323, "step": 8007 }, { "epoch": 16.016, "grad_norm": 1.1578806638717651, "learning_rate": 2e-05, "loss": 0.04368738, "step": 8008 }, { "epoch": 16.018, "grad_norm": 2.670903444290161, "learning_rate": 2e-05, "loss": 0.05551417, "step": 8009 }, { "epoch": 16.02, "grad_norm": 1.4139609336853027, "learning_rate": 2e-05, "loss": 0.03750867, "step": 8010 }, { "epoch": 16.022, "grad_norm": 1.3667073249816895, "learning_rate": 2e-05, "loss": 0.05370542, "step": 8011 }, { "epoch": 16.024, "grad_norm": 2.2714273929595947, "learning_rate": 2e-05, "loss": 0.0414988, "step": 8012 }, { "epoch": 16.026, "grad_norm": 2.840731382369995, "learning_rate": 2e-05, "loss": 0.03340321, "step": 8013 }, { "epoch": 16.028, "grad_norm": 2.810915946960449, "learning_rate": 2e-05, "loss": 0.05696411, "step": 8014 }, { "epoch": 16.03, "grad_norm": 1.45564866065979, "learning_rate": 2e-05, "loss": 0.04313691, "step": 8015 }, { "epoch": 16.032, "grad_norm": 1.4301260709762573, "learning_rate": 2e-05, "loss": 0.04525199, "step": 8016 }, { "epoch": 16.034, "grad_norm": 1.1844078302383423, "learning_rate": 2e-05, "loss": 0.0316691, "step": 8017 }, { "epoch": 16.036, "grad_norm": 1.4068323373794556, "learning_rate": 2e-05, "loss": 0.03633066, "step": 8018 }, { "epoch": 16.038, "grad_norm": 1.5108731985092163, "learning_rate": 2e-05, "loss": 0.04725716, "step": 8019 }, { "epoch": 16.04, "grad_norm": 1.654616355895996, "learning_rate": 2e-05, "loss": 0.04446092, "step": 8020 }, { "epoch": 16.042, "grad_norm": 1.4129464626312256, "learning_rate": 2e-05, "loss": 0.03359355, "step": 8021 }, { "epoch": 16.044, "grad_norm": 1.0702934265136719, "learning_rate": 2e-05, "loss": 0.02224928, "step": 8022 }, { "epoch": 16.046, "grad_norm": 1.9897451400756836, "learning_rate": 2e-05, "loss": 0.04445896, "step": 8023 }, { "epoch": 16.048, "grad_norm": 1.2325469255447388, "learning_rate": 2e-05, "loss": 0.03780691, "step": 8024 }, { "epoch": 16.05, "grad_norm": 1.2062650918960571, "learning_rate": 2e-05, "loss": 0.03396706, "step": 8025 }, { "epoch": 16.052, "grad_norm": 1.6414982080459595, "learning_rate": 2e-05, "loss": 0.04513337, "step": 8026 }, { "epoch": 16.054, "grad_norm": 3.0704195499420166, "learning_rate": 2e-05, "loss": 0.05739868, "step": 8027 }, { "epoch": 16.056, "grad_norm": 1.746216893196106, "learning_rate": 2e-05, "loss": 0.04020358, "step": 8028 }, { "epoch": 16.058, "grad_norm": 1.0398463010787964, "learning_rate": 2e-05, "loss": 0.02914593, "step": 8029 }, { "epoch": 16.06, "grad_norm": 1.3823471069335938, "learning_rate": 2e-05, "loss": 0.0427549, "step": 8030 }, { "epoch": 16.062, "grad_norm": 0.9713700413703918, "learning_rate": 2e-05, "loss": 0.02133579, "step": 8031 }, { "epoch": 16.064, "grad_norm": 2.3745758533477783, "learning_rate": 2e-05, "loss": 0.03649534, "step": 8032 }, { "epoch": 16.066, "grad_norm": 1.386365294456482, "learning_rate": 2e-05, "loss": 0.04500858, "step": 8033 }, { "epoch": 16.068, "grad_norm": 2.186955690383911, "learning_rate": 2e-05, "loss": 0.05879531, "step": 8034 }, { "epoch": 16.07, "grad_norm": 2.0882043838500977, "learning_rate": 2e-05, "loss": 0.04632348, "step": 8035 }, { "epoch": 16.072, "grad_norm": 1.3914663791656494, "learning_rate": 2e-05, "loss": 0.03216641, "step": 8036 }, { "epoch": 16.074, "grad_norm": 1.4008915424346924, "learning_rate": 2e-05, "loss": 0.03341067, "step": 8037 }, { "epoch": 16.076, "grad_norm": 1.3791346549987793, "learning_rate": 2e-05, "loss": 0.0505546, "step": 8038 }, { "epoch": 16.078, "grad_norm": 1.302667498588562, "learning_rate": 2e-05, "loss": 0.04582535, "step": 8039 }, { "epoch": 16.08, "grad_norm": 1.6824592351913452, "learning_rate": 2e-05, "loss": 0.0420664, "step": 8040 }, { "epoch": 16.082, "grad_norm": 1.354354977607727, "learning_rate": 2e-05, "loss": 0.02892293, "step": 8041 }, { "epoch": 16.084, "grad_norm": 1.0839284658432007, "learning_rate": 2e-05, "loss": 0.03057658, "step": 8042 }, { "epoch": 16.086, "grad_norm": 1.1779735088348389, "learning_rate": 2e-05, "loss": 0.02183773, "step": 8043 }, { "epoch": 16.088, "grad_norm": 1.1066539287567139, "learning_rate": 2e-05, "loss": 0.03328647, "step": 8044 }, { "epoch": 16.09, "grad_norm": 1.5935084819793701, "learning_rate": 2e-05, "loss": 0.04813398, "step": 8045 }, { "epoch": 16.092, "grad_norm": 1.7510586977005005, "learning_rate": 2e-05, "loss": 0.03419684, "step": 8046 }, { "epoch": 16.094, "grad_norm": 1.2361801862716675, "learning_rate": 2e-05, "loss": 0.03396645, "step": 8047 }, { "epoch": 16.096, "grad_norm": 1.069996953010559, "learning_rate": 2e-05, "loss": 0.02971686, "step": 8048 }, { "epoch": 16.098, "grad_norm": 1.4288445711135864, "learning_rate": 2e-05, "loss": 0.03799113, "step": 8049 }, { "epoch": 16.1, "grad_norm": 0.9517697691917419, "learning_rate": 2e-05, "loss": 0.02629692, "step": 8050 }, { "epoch": 16.102, "grad_norm": 1.0445160865783691, "learning_rate": 2e-05, "loss": 0.03541003, "step": 8051 }, { "epoch": 16.104, "grad_norm": 1.2751644849777222, "learning_rate": 2e-05, "loss": 0.03903734, "step": 8052 }, { "epoch": 16.106, "grad_norm": 1.3192799091339111, "learning_rate": 2e-05, "loss": 0.04088945, "step": 8053 }, { "epoch": 16.108, "grad_norm": 0.8523271679878235, "learning_rate": 2e-05, "loss": 0.02510538, "step": 8054 }, { "epoch": 16.11, "grad_norm": 1.4209495782852173, "learning_rate": 2e-05, "loss": 0.03651667, "step": 8055 }, { "epoch": 16.112, "grad_norm": 2.66572904586792, "learning_rate": 2e-05, "loss": 0.02939776, "step": 8056 }, { "epoch": 16.114, "grad_norm": 1.7730138301849365, "learning_rate": 2e-05, "loss": 0.05316716, "step": 8057 }, { "epoch": 16.116, "grad_norm": 1.0424127578735352, "learning_rate": 2e-05, "loss": 0.03889843, "step": 8058 }, { "epoch": 16.118, "grad_norm": 1.0265769958496094, "learning_rate": 2e-05, "loss": 0.03288617, "step": 8059 }, { "epoch": 16.12, "grad_norm": 1.0434588193893433, "learning_rate": 2e-05, "loss": 0.04037582, "step": 8060 }, { "epoch": 16.122, "grad_norm": 1.2070543766021729, "learning_rate": 2e-05, "loss": 0.02952406, "step": 8061 }, { "epoch": 16.124, "grad_norm": 1.59628427028656, "learning_rate": 2e-05, "loss": 0.04046272, "step": 8062 }, { "epoch": 16.126, "grad_norm": 1.1951675415039062, "learning_rate": 2e-05, "loss": 0.03478213, "step": 8063 }, { "epoch": 16.128, "grad_norm": 1.0106141567230225, "learning_rate": 2e-05, "loss": 0.03325225, "step": 8064 }, { "epoch": 16.13, "grad_norm": 0.993449330329895, "learning_rate": 2e-05, "loss": 0.01964446, "step": 8065 }, { "epoch": 16.132, "grad_norm": 1.4265450239181519, "learning_rate": 2e-05, "loss": 0.03322139, "step": 8066 }, { "epoch": 16.134, "grad_norm": 1.0770517587661743, "learning_rate": 2e-05, "loss": 0.03661602, "step": 8067 }, { "epoch": 16.136, "grad_norm": 1.4938647747039795, "learning_rate": 2e-05, "loss": 0.03427796, "step": 8068 }, { "epoch": 16.138, "grad_norm": 1.4040374755859375, "learning_rate": 2e-05, "loss": 0.03253736, "step": 8069 }, { "epoch": 16.14, "grad_norm": 1.381060242652893, "learning_rate": 2e-05, "loss": 0.03683833, "step": 8070 }, { "epoch": 16.142, "grad_norm": 1.1228476762771606, "learning_rate": 2e-05, "loss": 0.04204592, "step": 8071 }, { "epoch": 16.144, "grad_norm": 1.1399726867675781, "learning_rate": 2e-05, "loss": 0.0393802, "step": 8072 }, { "epoch": 16.146, "grad_norm": 1.6664113998413086, "learning_rate": 2e-05, "loss": 0.03525341, "step": 8073 }, { "epoch": 16.148, "grad_norm": 1.0361666679382324, "learning_rate": 2e-05, "loss": 0.03461679, "step": 8074 }, { "epoch": 16.15, "grad_norm": 1.3984792232513428, "learning_rate": 2e-05, "loss": 0.04337152, "step": 8075 }, { "epoch": 16.152, "grad_norm": 1.3185367584228516, "learning_rate": 2e-05, "loss": 0.04654099, "step": 8076 }, { "epoch": 16.154, "grad_norm": 2.2266247272491455, "learning_rate": 2e-05, "loss": 0.05011526, "step": 8077 }, { "epoch": 16.156, "grad_norm": 1.3377346992492676, "learning_rate": 2e-05, "loss": 0.04082262, "step": 8078 }, { "epoch": 16.158, "grad_norm": 3.1692075729370117, "learning_rate": 2e-05, "loss": 0.05622632, "step": 8079 }, { "epoch": 16.16, "grad_norm": 1.0942643880844116, "learning_rate": 2e-05, "loss": 0.02746512, "step": 8080 }, { "epoch": 16.162, "grad_norm": 1.217752456665039, "learning_rate": 2e-05, "loss": 0.03617308, "step": 8081 }, { "epoch": 16.164, "grad_norm": 1.8734045028686523, "learning_rate": 2e-05, "loss": 0.04952653, "step": 8082 }, { "epoch": 16.166, "grad_norm": 1.50890052318573, "learning_rate": 2e-05, "loss": 0.04156789, "step": 8083 }, { "epoch": 16.168, "grad_norm": 1.4338157176971436, "learning_rate": 2e-05, "loss": 0.05392047, "step": 8084 }, { "epoch": 16.17, "grad_norm": 1.7338756322860718, "learning_rate": 2e-05, "loss": 0.04553423, "step": 8085 }, { "epoch": 16.172, "grad_norm": 1.5352826118469238, "learning_rate": 2e-05, "loss": 0.04915183, "step": 8086 }, { "epoch": 16.174, "grad_norm": 1.7722485065460205, "learning_rate": 2e-05, "loss": 0.03585491, "step": 8087 }, { "epoch": 16.176, "grad_norm": 0.9967431426048279, "learning_rate": 2e-05, "loss": 0.02942163, "step": 8088 }, { "epoch": 16.178, "grad_norm": 1.2177917957305908, "learning_rate": 2e-05, "loss": 0.04380299, "step": 8089 }, { "epoch": 16.18, "grad_norm": 1.2559581995010376, "learning_rate": 2e-05, "loss": 0.02480926, "step": 8090 }, { "epoch": 16.182, "grad_norm": 1.5222054719924927, "learning_rate": 2e-05, "loss": 0.04941765, "step": 8091 }, { "epoch": 16.184, "grad_norm": 1.0515002012252808, "learning_rate": 2e-05, "loss": 0.04209872, "step": 8092 }, { "epoch": 16.186, "grad_norm": 1.8783875703811646, "learning_rate": 2e-05, "loss": 0.03835857, "step": 8093 }, { "epoch": 16.188, "grad_norm": 1.2935268878936768, "learning_rate": 2e-05, "loss": 0.05065227, "step": 8094 }, { "epoch": 16.19, "grad_norm": 1.6219897270202637, "learning_rate": 2e-05, "loss": 0.02984177, "step": 8095 }, { "epoch": 16.192, "grad_norm": 1.3317407369613647, "learning_rate": 2e-05, "loss": 0.0331154, "step": 8096 }, { "epoch": 16.194, "grad_norm": 1.2899963855743408, "learning_rate": 2e-05, "loss": 0.03418003, "step": 8097 }, { "epoch": 16.196, "grad_norm": 1.1565256118774414, "learning_rate": 2e-05, "loss": 0.03147539, "step": 8098 }, { "epoch": 16.198, "grad_norm": 1.2315301895141602, "learning_rate": 2e-05, "loss": 0.0391296, "step": 8099 }, { "epoch": 16.2, "grad_norm": 1.1125531196594238, "learning_rate": 2e-05, "loss": 0.03473473, "step": 8100 }, { "epoch": 16.202, "grad_norm": 1.5574398040771484, "learning_rate": 2e-05, "loss": 0.0310582, "step": 8101 }, { "epoch": 16.204, "grad_norm": 1.4571691751480103, "learning_rate": 2e-05, "loss": 0.03549339, "step": 8102 }, { "epoch": 16.206, "grad_norm": 1.2197107076644897, "learning_rate": 2e-05, "loss": 0.04195347, "step": 8103 }, { "epoch": 16.208, "grad_norm": 2.789921760559082, "learning_rate": 2e-05, "loss": 0.03652881, "step": 8104 }, { "epoch": 16.21, "grad_norm": 4.851873397827148, "learning_rate": 2e-05, "loss": 0.03688388, "step": 8105 }, { "epoch": 16.212, "grad_norm": 1.457202434539795, "learning_rate": 2e-05, "loss": 0.03209008, "step": 8106 }, { "epoch": 16.214, "grad_norm": 1.0909388065338135, "learning_rate": 2e-05, "loss": 0.02918109, "step": 8107 }, { "epoch": 16.216, "grad_norm": 2.0140578746795654, "learning_rate": 2e-05, "loss": 0.05105082, "step": 8108 }, { "epoch": 16.218, "grad_norm": 1.9580172300338745, "learning_rate": 2e-05, "loss": 0.04800195, "step": 8109 }, { "epoch": 16.22, "grad_norm": 3.5140187740325928, "learning_rate": 2e-05, "loss": 0.04388273, "step": 8110 }, { "epoch": 16.222, "grad_norm": 1.116141676902771, "learning_rate": 2e-05, "loss": 0.03350087, "step": 8111 }, { "epoch": 16.224, "grad_norm": 2.4265329837799072, "learning_rate": 2e-05, "loss": 0.04413512, "step": 8112 }, { "epoch": 16.226, "grad_norm": 1.9018596410751343, "learning_rate": 2e-05, "loss": 0.0421547, "step": 8113 }, { "epoch": 16.228, "grad_norm": 1.3331454992294312, "learning_rate": 2e-05, "loss": 0.04760406, "step": 8114 }, { "epoch": 16.23, "grad_norm": 0.9366099834442139, "learning_rate": 2e-05, "loss": 0.02583786, "step": 8115 }, { "epoch": 16.232, "grad_norm": 2.820237636566162, "learning_rate": 2e-05, "loss": 0.02830683, "step": 8116 }, { "epoch": 16.234, "grad_norm": 1.1773334741592407, "learning_rate": 2e-05, "loss": 0.03467422, "step": 8117 }, { "epoch": 16.236, "grad_norm": 1.685434103012085, "learning_rate": 2e-05, "loss": 0.04194804, "step": 8118 }, { "epoch": 16.238, "grad_norm": 1.8741134405136108, "learning_rate": 2e-05, "loss": 0.0288431, "step": 8119 }, { "epoch": 16.24, "grad_norm": 1.3268035650253296, "learning_rate": 2e-05, "loss": 0.0287002, "step": 8120 }, { "epoch": 16.242, "grad_norm": 1.3486356735229492, "learning_rate": 2e-05, "loss": 0.03664606, "step": 8121 }, { "epoch": 16.244, "grad_norm": 1.631987452507019, "learning_rate": 2e-05, "loss": 0.05182088, "step": 8122 }, { "epoch": 16.246, "grad_norm": 2.88786244392395, "learning_rate": 2e-05, "loss": 0.05489088, "step": 8123 }, { "epoch": 16.248, "grad_norm": 2.0187666416168213, "learning_rate": 2e-05, "loss": 0.04556795, "step": 8124 }, { "epoch": 16.25, "grad_norm": 1.089003562927246, "learning_rate": 2e-05, "loss": 0.03417504, "step": 8125 }, { "epoch": 16.252, "grad_norm": 1.238042950630188, "learning_rate": 2e-05, "loss": 0.03643361, "step": 8126 }, { "epoch": 16.254, "grad_norm": 1.347186803817749, "learning_rate": 2e-05, "loss": 0.0338303, "step": 8127 }, { "epoch": 16.256, "grad_norm": 1.772897720336914, "learning_rate": 2e-05, "loss": 0.05571172, "step": 8128 }, { "epoch": 16.258, "grad_norm": 0.9345927834510803, "learning_rate": 2e-05, "loss": 0.02508531, "step": 8129 }, { "epoch": 16.26, "grad_norm": 1.491607904434204, "learning_rate": 2e-05, "loss": 0.03616347, "step": 8130 }, { "epoch": 16.262, "grad_norm": 1.141614317893982, "learning_rate": 2e-05, "loss": 0.03462991, "step": 8131 }, { "epoch": 16.264, "grad_norm": 1.2147083282470703, "learning_rate": 2e-05, "loss": 0.03725596, "step": 8132 }, { "epoch": 16.266, "grad_norm": 1.7106729745864868, "learning_rate": 2e-05, "loss": 0.03632404, "step": 8133 }, { "epoch": 16.268, "grad_norm": 1.0277926921844482, "learning_rate": 2e-05, "loss": 0.0347311, "step": 8134 }, { "epoch": 16.27, "grad_norm": 1.2781480550765991, "learning_rate": 2e-05, "loss": 0.03461632, "step": 8135 }, { "epoch": 16.272, "grad_norm": 1.013649582862854, "learning_rate": 2e-05, "loss": 0.03419242, "step": 8136 }, { "epoch": 16.274, "grad_norm": 1.7054558992385864, "learning_rate": 2e-05, "loss": 0.04766083, "step": 8137 }, { "epoch": 16.276, "grad_norm": 1.1981877088546753, "learning_rate": 2e-05, "loss": 0.03875602, "step": 8138 }, { "epoch": 16.278, "grad_norm": 1.4614924192428589, "learning_rate": 2e-05, "loss": 0.04386155, "step": 8139 }, { "epoch": 16.28, "grad_norm": 0.8466894626617432, "learning_rate": 2e-05, "loss": 0.01991762, "step": 8140 }, { "epoch": 16.282, "grad_norm": 2.6902008056640625, "learning_rate": 2e-05, "loss": 0.04461161, "step": 8141 }, { "epoch": 16.284, "grad_norm": 1.712175965309143, "learning_rate": 2e-05, "loss": 0.04943609, "step": 8142 }, { "epoch": 16.286, "grad_norm": 1.3116623163223267, "learning_rate": 2e-05, "loss": 0.03155245, "step": 8143 }, { "epoch": 16.288, "grad_norm": 2.068293809890747, "learning_rate": 2e-05, "loss": 0.04026989, "step": 8144 }, { "epoch": 16.29, "grad_norm": 2.457705020904541, "learning_rate": 2e-05, "loss": 0.05160169, "step": 8145 }, { "epoch": 16.292, "grad_norm": 1.1721792221069336, "learning_rate": 2e-05, "loss": 0.03861858, "step": 8146 }, { "epoch": 16.294, "grad_norm": 1.3003913164138794, "learning_rate": 2e-05, "loss": 0.03377434, "step": 8147 }, { "epoch": 16.296, "grad_norm": 1.6648625135421753, "learning_rate": 2e-05, "loss": 0.05673098, "step": 8148 }, { "epoch": 16.298, "grad_norm": 1.6256797313690186, "learning_rate": 2e-05, "loss": 0.03393706, "step": 8149 }, { "epoch": 16.3, "grad_norm": 1.4245861768722534, "learning_rate": 2e-05, "loss": 0.03638397, "step": 8150 }, { "epoch": 16.302, "grad_norm": 3.2517616748809814, "learning_rate": 2e-05, "loss": 0.04278335, "step": 8151 }, { "epoch": 16.304, "grad_norm": 0.8710333108901978, "learning_rate": 2e-05, "loss": 0.02735967, "step": 8152 }, { "epoch": 16.306, "grad_norm": 3.8048529624938965, "learning_rate": 2e-05, "loss": 0.04639032, "step": 8153 }, { "epoch": 16.308, "grad_norm": 1.7357240915298462, "learning_rate": 2e-05, "loss": 0.04244914, "step": 8154 }, { "epoch": 16.31, "grad_norm": 1.8791494369506836, "learning_rate": 2e-05, "loss": 0.04231622, "step": 8155 }, { "epoch": 16.312, "grad_norm": 1.1524310111999512, "learning_rate": 2e-05, "loss": 0.03676322, "step": 8156 }, { "epoch": 16.314, "grad_norm": 2.3864612579345703, "learning_rate": 2e-05, "loss": 0.04578413, "step": 8157 }, { "epoch": 16.316, "grad_norm": 1.7564839124679565, "learning_rate": 2e-05, "loss": 0.04500315, "step": 8158 }, { "epoch": 16.318, "grad_norm": 2.045438766479492, "learning_rate": 2e-05, "loss": 0.0306298, "step": 8159 }, { "epoch": 16.32, "grad_norm": 0.8264663815498352, "learning_rate": 2e-05, "loss": 0.02070304, "step": 8160 }, { "epoch": 16.322, "grad_norm": 1.1368653774261475, "learning_rate": 2e-05, "loss": 0.03928949, "step": 8161 }, { "epoch": 16.324, "grad_norm": 0.9940206408500671, "learning_rate": 2e-05, "loss": 0.034535, "step": 8162 }, { "epoch": 16.326, "grad_norm": 1.2441214323043823, "learning_rate": 2e-05, "loss": 0.0337163, "step": 8163 }, { "epoch": 16.328, "grad_norm": 1.5334255695343018, "learning_rate": 2e-05, "loss": 0.04570875, "step": 8164 }, { "epoch": 16.33, "grad_norm": 1.4286037683486938, "learning_rate": 2e-05, "loss": 0.05645541, "step": 8165 }, { "epoch": 16.332, "grad_norm": 2.280270576477051, "learning_rate": 2e-05, "loss": 0.0485545, "step": 8166 }, { "epoch": 16.334, "grad_norm": 1.9893189668655396, "learning_rate": 2e-05, "loss": 0.03430265, "step": 8167 }, { "epoch": 16.336, "grad_norm": 1.4144779443740845, "learning_rate": 2e-05, "loss": 0.04236887, "step": 8168 }, { "epoch": 16.338, "grad_norm": 1.0118204355239868, "learning_rate": 2e-05, "loss": 0.02984769, "step": 8169 }, { "epoch": 16.34, "grad_norm": 1.6643238067626953, "learning_rate": 2e-05, "loss": 0.04789416, "step": 8170 }, { "epoch": 16.342, "grad_norm": 0.8409438729286194, "learning_rate": 2e-05, "loss": 0.01973891, "step": 8171 }, { "epoch": 16.344, "grad_norm": 1.0004379749298096, "learning_rate": 2e-05, "loss": 0.03158588, "step": 8172 }, { "epoch": 16.346, "grad_norm": 2.194171667098999, "learning_rate": 2e-05, "loss": 0.06659742, "step": 8173 }, { "epoch": 16.348, "grad_norm": 1.4098418951034546, "learning_rate": 2e-05, "loss": 0.03775009, "step": 8174 }, { "epoch": 16.35, "grad_norm": 1.5424364805221558, "learning_rate": 2e-05, "loss": 0.04169576, "step": 8175 }, { "epoch": 16.352, "grad_norm": 1.8581490516662598, "learning_rate": 2e-05, "loss": 0.07275459, "step": 8176 }, { "epoch": 16.354, "grad_norm": 1.3709255456924438, "learning_rate": 2e-05, "loss": 0.03472374, "step": 8177 }, { "epoch": 16.356, "grad_norm": 1.9072877168655396, "learning_rate": 2e-05, "loss": 0.0435157, "step": 8178 }, { "epoch": 16.358, "grad_norm": 2.198082208633423, "learning_rate": 2e-05, "loss": 0.02928953, "step": 8179 }, { "epoch": 16.36, "grad_norm": 0.9633461833000183, "learning_rate": 2e-05, "loss": 0.03266443, "step": 8180 }, { "epoch": 16.362, "grad_norm": 2.41123366355896, "learning_rate": 2e-05, "loss": 0.04052667, "step": 8181 }, { "epoch": 16.364, "grad_norm": 1.4949405193328857, "learning_rate": 2e-05, "loss": 0.05271856, "step": 8182 }, { "epoch": 16.366, "grad_norm": 1.5633461475372314, "learning_rate": 2e-05, "loss": 0.04020818, "step": 8183 }, { "epoch": 16.368, "grad_norm": 1.0727436542510986, "learning_rate": 2e-05, "loss": 0.03464281, "step": 8184 }, { "epoch": 16.37, "grad_norm": 1.2527438402175903, "learning_rate": 2e-05, "loss": 0.04834267, "step": 8185 }, { "epoch": 16.372, "grad_norm": 1.9610435962677002, "learning_rate": 2e-05, "loss": 0.05433511, "step": 8186 }, { "epoch": 16.374, "grad_norm": 1.6253420114517212, "learning_rate": 2e-05, "loss": 0.04721621, "step": 8187 }, { "epoch": 16.376, "grad_norm": 2.0063889026641846, "learning_rate": 2e-05, "loss": 0.04768866, "step": 8188 }, { "epoch": 16.378, "grad_norm": 1.5373892784118652, "learning_rate": 2e-05, "loss": 0.04332329, "step": 8189 }, { "epoch": 16.38, "grad_norm": 1.7891055345535278, "learning_rate": 2e-05, "loss": 0.0316738, "step": 8190 }, { "epoch": 16.382, "grad_norm": 1.626381516456604, "learning_rate": 2e-05, "loss": 0.0384635, "step": 8191 }, { "epoch": 16.384, "grad_norm": 1.2403956651687622, "learning_rate": 2e-05, "loss": 0.03793911, "step": 8192 }, { "epoch": 16.386, "grad_norm": 1.388485074043274, "learning_rate": 2e-05, "loss": 0.05434792, "step": 8193 }, { "epoch": 16.388, "grad_norm": 0.903146505355835, "learning_rate": 2e-05, "loss": 0.02984499, "step": 8194 }, { "epoch": 16.39, "grad_norm": 1.14999258518219, "learning_rate": 2e-05, "loss": 0.0324681, "step": 8195 }, { "epoch": 16.392, "grad_norm": 0.8718903660774231, "learning_rate": 2e-05, "loss": 0.03075159, "step": 8196 }, { "epoch": 16.394, "grad_norm": 0.9332501888275146, "learning_rate": 2e-05, "loss": 0.03167652, "step": 8197 }, { "epoch": 16.396, "grad_norm": 1.2964439392089844, "learning_rate": 2e-05, "loss": 0.05365559, "step": 8198 }, { "epoch": 16.398, "grad_norm": 1.2190895080566406, "learning_rate": 2e-05, "loss": 0.04543667, "step": 8199 }, { "epoch": 16.4, "grad_norm": 1.297627568244934, "learning_rate": 2e-05, "loss": 0.03421879, "step": 8200 }, { "epoch": 16.402, "grad_norm": 1.3913570642471313, "learning_rate": 2e-05, "loss": 0.03622192, "step": 8201 }, { "epoch": 16.404, "grad_norm": 0.967580258846283, "learning_rate": 2e-05, "loss": 0.03260601, "step": 8202 }, { "epoch": 16.406, "grad_norm": 1.093234896659851, "learning_rate": 2e-05, "loss": 0.04168686, "step": 8203 }, { "epoch": 16.408, "grad_norm": 1.2416380643844604, "learning_rate": 2e-05, "loss": 0.04545043, "step": 8204 }, { "epoch": 16.41, "grad_norm": 2.081451892852783, "learning_rate": 2e-05, "loss": 0.03653099, "step": 8205 }, { "epoch": 16.412, "grad_norm": 1.4831323623657227, "learning_rate": 2e-05, "loss": 0.04202024, "step": 8206 }, { "epoch": 16.414, "grad_norm": 0.8882843852043152, "learning_rate": 2e-05, "loss": 0.02640081, "step": 8207 }, { "epoch": 16.416, "grad_norm": 1.2519268989562988, "learning_rate": 2e-05, "loss": 0.03396625, "step": 8208 }, { "epoch": 16.418, "grad_norm": 1.0374751091003418, "learning_rate": 2e-05, "loss": 0.03408733, "step": 8209 }, { "epoch": 16.42, "grad_norm": 1.2353500127792358, "learning_rate": 2e-05, "loss": 0.04257344, "step": 8210 }, { "epoch": 16.422, "grad_norm": 1.310669183731079, "learning_rate": 2e-05, "loss": 0.03693325, "step": 8211 }, { "epoch": 16.424, "grad_norm": 2.0331413745880127, "learning_rate": 2e-05, "loss": 0.03563491, "step": 8212 }, { "epoch": 16.426, "grad_norm": 1.2886682748794556, "learning_rate": 2e-05, "loss": 0.03784215, "step": 8213 }, { "epoch": 16.428, "grad_norm": 1.1843023300170898, "learning_rate": 2e-05, "loss": 0.04167389, "step": 8214 }, { "epoch": 16.43, "grad_norm": 2.5486996173858643, "learning_rate": 2e-05, "loss": 0.05189614, "step": 8215 }, { "epoch": 16.432, "grad_norm": 1.908674955368042, "learning_rate": 2e-05, "loss": 0.02384158, "step": 8216 }, { "epoch": 16.434, "grad_norm": 1.7661586999893188, "learning_rate": 2e-05, "loss": 0.03849943, "step": 8217 }, { "epoch": 16.436, "grad_norm": 1.0904186964035034, "learning_rate": 2e-05, "loss": 0.04790416, "step": 8218 }, { "epoch": 16.438, "grad_norm": 1.0966798067092896, "learning_rate": 2e-05, "loss": 0.03781455, "step": 8219 }, { "epoch": 16.44, "grad_norm": 0.9001188278198242, "learning_rate": 2e-05, "loss": 0.02561498, "step": 8220 }, { "epoch": 16.442, "grad_norm": 0.9903857111930847, "learning_rate": 2e-05, "loss": 0.0389318, "step": 8221 }, { "epoch": 16.444, "grad_norm": 1.5551356077194214, "learning_rate": 2e-05, "loss": 0.05141037, "step": 8222 }, { "epoch": 16.446, "grad_norm": 1.2812175750732422, "learning_rate": 2e-05, "loss": 0.03643136, "step": 8223 }, { "epoch": 16.448, "grad_norm": 1.0215216875076294, "learning_rate": 2e-05, "loss": 0.02926203, "step": 8224 }, { "epoch": 16.45, "grad_norm": 1.4955259561538696, "learning_rate": 2e-05, "loss": 0.03899873, "step": 8225 }, { "epoch": 16.452, "grad_norm": 1.4788180589675903, "learning_rate": 2e-05, "loss": 0.05681079, "step": 8226 }, { "epoch": 16.454, "grad_norm": 1.0436245203018188, "learning_rate": 2e-05, "loss": 0.03019481, "step": 8227 }, { "epoch": 16.456, "grad_norm": 0.839566707611084, "learning_rate": 2e-05, "loss": 0.02787675, "step": 8228 }, { "epoch": 16.458, "grad_norm": 1.0782071352005005, "learning_rate": 2e-05, "loss": 0.0280141, "step": 8229 }, { "epoch": 16.46, "grad_norm": 1.0634349584579468, "learning_rate": 2e-05, "loss": 0.03917925, "step": 8230 }, { "epoch": 16.462, "grad_norm": 1.0668877363204956, "learning_rate": 2e-05, "loss": 0.03235231, "step": 8231 }, { "epoch": 16.464, "grad_norm": 1.0869591236114502, "learning_rate": 2e-05, "loss": 0.03207518, "step": 8232 }, { "epoch": 16.466, "grad_norm": 1.7971811294555664, "learning_rate": 2e-05, "loss": 0.04344584, "step": 8233 }, { "epoch": 16.468, "grad_norm": 2.0052719116210938, "learning_rate": 2e-05, "loss": 0.03197655, "step": 8234 }, { "epoch": 16.47, "grad_norm": 1.3378233909606934, "learning_rate": 2e-05, "loss": 0.03234541, "step": 8235 }, { "epoch": 16.472, "grad_norm": 2.523751735687256, "learning_rate": 2e-05, "loss": 0.04990536, "step": 8236 }, { "epoch": 16.474, "grad_norm": 1.0454021692276, "learning_rate": 2e-05, "loss": 0.03331465, "step": 8237 }, { "epoch": 16.476, "grad_norm": 1.1137912273406982, "learning_rate": 2e-05, "loss": 0.03970309, "step": 8238 }, { "epoch": 16.478, "grad_norm": 1.585533618927002, "learning_rate": 2e-05, "loss": 0.0488726, "step": 8239 }, { "epoch": 16.48, "grad_norm": 3.0131523609161377, "learning_rate": 2e-05, "loss": 0.04734076, "step": 8240 }, { "epoch": 16.482, "grad_norm": 0.9826610684394836, "learning_rate": 2e-05, "loss": 0.0252513, "step": 8241 }, { "epoch": 16.484, "grad_norm": 1.9169968366622925, "learning_rate": 2e-05, "loss": 0.03856509, "step": 8242 }, { "epoch": 16.486, "grad_norm": 1.338022232055664, "learning_rate": 2e-05, "loss": 0.03761451, "step": 8243 }, { "epoch": 16.488, "grad_norm": 1.4171205759048462, "learning_rate": 2e-05, "loss": 0.04478035, "step": 8244 }, { "epoch": 16.49, "grad_norm": 1.2033617496490479, "learning_rate": 2e-05, "loss": 0.03369937, "step": 8245 }, { "epoch": 16.492, "grad_norm": 1.676818609237671, "learning_rate": 2e-05, "loss": 0.03760742, "step": 8246 }, { "epoch": 16.494, "grad_norm": 1.0126457214355469, "learning_rate": 2e-05, "loss": 0.03277501, "step": 8247 }, { "epoch": 16.496, "grad_norm": 1.122615933418274, "learning_rate": 2e-05, "loss": 0.03475213, "step": 8248 }, { "epoch": 16.498, "grad_norm": 1.2609951496124268, "learning_rate": 2e-05, "loss": 0.03638831, "step": 8249 }, { "epoch": 16.5, "grad_norm": 1.3109709024429321, "learning_rate": 2e-05, "loss": 0.04073593, "step": 8250 }, { "epoch": 16.502, "grad_norm": 1.6363704204559326, "learning_rate": 2e-05, "loss": 0.04546898, "step": 8251 }, { "epoch": 16.504, "grad_norm": 1.0490771532058716, "learning_rate": 2e-05, "loss": 0.03132774, "step": 8252 }, { "epoch": 16.506, "grad_norm": 1.9119375944137573, "learning_rate": 2e-05, "loss": 0.04283144, "step": 8253 }, { "epoch": 16.508, "grad_norm": 1.7922325134277344, "learning_rate": 2e-05, "loss": 0.04706988, "step": 8254 }, { "epoch": 16.51, "grad_norm": 2.0722665786743164, "learning_rate": 2e-05, "loss": 0.04967377, "step": 8255 }, { "epoch": 16.512, "grad_norm": 1.6956374645233154, "learning_rate": 2e-05, "loss": 0.03436553, "step": 8256 }, { "epoch": 16.514, "grad_norm": 1.2904715538024902, "learning_rate": 2e-05, "loss": 0.03789183, "step": 8257 }, { "epoch": 16.516, "grad_norm": 1.4763702154159546, "learning_rate": 2e-05, "loss": 0.04574104, "step": 8258 }, { "epoch": 16.518, "grad_norm": 1.6451772451400757, "learning_rate": 2e-05, "loss": 0.05261879, "step": 8259 }, { "epoch": 16.52, "grad_norm": 1.7381635904312134, "learning_rate": 2e-05, "loss": 0.03526191, "step": 8260 }, { "epoch": 16.522, "grad_norm": 1.263259768486023, "learning_rate": 2e-05, "loss": 0.03281062, "step": 8261 }, { "epoch": 16.524, "grad_norm": 1.220733642578125, "learning_rate": 2e-05, "loss": 0.03505442, "step": 8262 }, { "epoch": 16.526, "grad_norm": 1.2341818809509277, "learning_rate": 2e-05, "loss": 0.04311585, "step": 8263 }, { "epoch": 16.528, "grad_norm": 0.9565370678901672, "learning_rate": 2e-05, "loss": 0.02928256, "step": 8264 }, { "epoch": 16.53, "grad_norm": 0.8718287944793701, "learning_rate": 2e-05, "loss": 0.02539262, "step": 8265 }, { "epoch": 16.532, "grad_norm": 1.1354038715362549, "learning_rate": 2e-05, "loss": 0.03417098, "step": 8266 }, { "epoch": 16.534, "grad_norm": 1.1788674592971802, "learning_rate": 2e-05, "loss": 0.03965496, "step": 8267 }, { "epoch": 16.536, "grad_norm": 1.3101824522018433, "learning_rate": 2e-05, "loss": 0.03684475, "step": 8268 }, { "epoch": 16.538, "grad_norm": 1.3392512798309326, "learning_rate": 2e-05, "loss": 0.04036241, "step": 8269 }, { "epoch": 16.54, "grad_norm": 2.521176338195801, "learning_rate": 2e-05, "loss": 0.04675987, "step": 8270 }, { "epoch": 16.542, "grad_norm": 1.11541748046875, "learning_rate": 2e-05, "loss": 0.03730694, "step": 8271 }, { "epoch": 16.544, "grad_norm": 1.528278112411499, "learning_rate": 2e-05, "loss": 0.06005888, "step": 8272 }, { "epoch": 16.546, "grad_norm": 1.2424397468566895, "learning_rate": 2e-05, "loss": 0.05591384, "step": 8273 }, { "epoch": 16.548000000000002, "grad_norm": 1.0814480781555176, "learning_rate": 2e-05, "loss": 0.03427697, "step": 8274 }, { "epoch": 16.55, "grad_norm": 2.6472551822662354, "learning_rate": 2e-05, "loss": 0.0521223, "step": 8275 }, { "epoch": 16.552, "grad_norm": 1.3621563911437988, "learning_rate": 2e-05, "loss": 0.04933754, "step": 8276 }, { "epoch": 16.554, "grad_norm": 2.2583024501800537, "learning_rate": 2e-05, "loss": 0.05284938, "step": 8277 }, { "epoch": 16.556, "grad_norm": 1.0987781286239624, "learning_rate": 2e-05, "loss": 0.02926936, "step": 8278 }, { "epoch": 16.558, "grad_norm": 1.283735752105713, "learning_rate": 2e-05, "loss": 0.03353404, "step": 8279 }, { "epoch": 16.56, "grad_norm": 1.0234546661376953, "learning_rate": 2e-05, "loss": 0.03110966, "step": 8280 }, { "epoch": 16.562, "grad_norm": 1.325693130493164, "learning_rate": 2e-05, "loss": 0.04735526, "step": 8281 }, { "epoch": 16.564, "grad_norm": 1.5526974201202393, "learning_rate": 2e-05, "loss": 0.04662727, "step": 8282 }, { "epoch": 16.566, "grad_norm": 2.4452273845672607, "learning_rate": 2e-05, "loss": 0.03830102, "step": 8283 }, { "epoch": 16.568, "grad_norm": 1.2660468816757202, "learning_rate": 2e-05, "loss": 0.04160655, "step": 8284 }, { "epoch": 16.57, "grad_norm": 1.1161909103393555, "learning_rate": 2e-05, "loss": 0.04231364, "step": 8285 }, { "epoch": 16.572, "grad_norm": 1.012315273284912, "learning_rate": 2e-05, "loss": 0.03184524, "step": 8286 }, { "epoch": 16.574, "grad_norm": 1.1147305965423584, "learning_rate": 2e-05, "loss": 0.03896227, "step": 8287 }, { "epoch": 16.576, "grad_norm": 1.0445970296859741, "learning_rate": 2e-05, "loss": 0.03200065, "step": 8288 }, { "epoch": 16.578, "grad_norm": 1.7669461965560913, "learning_rate": 2e-05, "loss": 0.04407949, "step": 8289 }, { "epoch": 16.58, "grad_norm": 1.550229549407959, "learning_rate": 2e-05, "loss": 0.02869645, "step": 8290 }, { "epoch": 16.582, "grad_norm": 1.0623831748962402, "learning_rate": 2e-05, "loss": 0.0305838, "step": 8291 }, { "epoch": 16.584, "grad_norm": 1.5914502143859863, "learning_rate": 2e-05, "loss": 0.04625592, "step": 8292 }, { "epoch": 16.586, "grad_norm": 1.5053743124008179, "learning_rate": 2e-05, "loss": 0.03940097, "step": 8293 }, { "epoch": 16.588, "grad_norm": 1.9061319828033447, "learning_rate": 2e-05, "loss": 0.05159453, "step": 8294 }, { "epoch": 16.59, "grad_norm": 1.4841290712356567, "learning_rate": 2e-05, "loss": 0.05897345, "step": 8295 }, { "epoch": 16.592, "grad_norm": 2.136908769607544, "learning_rate": 2e-05, "loss": 0.05323712, "step": 8296 }, { "epoch": 16.594, "grad_norm": 1.3379402160644531, "learning_rate": 2e-05, "loss": 0.03460371, "step": 8297 }, { "epoch": 16.596, "grad_norm": 1.020004391670227, "learning_rate": 2e-05, "loss": 0.03321714, "step": 8298 }, { "epoch": 16.598, "grad_norm": 2.1319894790649414, "learning_rate": 2e-05, "loss": 0.04367604, "step": 8299 }, { "epoch": 16.6, "grad_norm": 1.1898785829544067, "learning_rate": 2e-05, "loss": 0.03668055, "step": 8300 }, { "epoch": 16.602, "grad_norm": 1.096915364265442, "learning_rate": 2e-05, "loss": 0.03318947, "step": 8301 }, { "epoch": 16.604, "grad_norm": 1.0413960218429565, "learning_rate": 2e-05, "loss": 0.03442251, "step": 8302 }, { "epoch": 16.606, "grad_norm": 1.2012441158294678, "learning_rate": 2e-05, "loss": 0.02705516, "step": 8303 }, { "epoch": 16.608, "grad_norm": 1.2894916534423828, "learning_rate": 2e-05, "loss": 0.03271394, "step": 8304 }, { "epoch": 16.61, "grad_norm": 1.2636003494262695, "learning_rate": 2e-05, "loss": 0.04137167, "step": 8305 }, { "epoch": 16.612, "grad_norm": 1.2958347797393799, "learning_rate": 2e-05, "loss": 0.04770758, "step": 8306 }, { "epoch": 16.614, "grad_norm": 2.002460479736328, "learning_rate": 2e-05, "loss": 0.0475505, "step": 8307 }, { "epoch": 16.616, "grad_norm": 0.9963358640670776, "learning_rate": 2e-05, "loss": 0.03287155, "step": 8308 }, { "epoch": 16.618, "grad_norm": 1.1301119327545166, "learning_rate": 2e-05, "loss": 0.04497526, "step": 8309 }, { "epoch": 16.62, "grad_norm": 1.0189284086227417, "learning_rate": 2e-05, "loss": 0.0293178, "step": 8310 }, { "epoch": 16.622, "grad_norm": 0.8906993269920349, "learning_rate": 2e-05, "loss": 0.01907645, "step": 8311 }, { "epoch": 16.624, "grad_norm": 1.2180598974227905, "learning_rate": 2e-05, "loss": 0.03251302, "step": 8312 }, { "epoch": 16.626, "grad_norm": 1.7638144493103027, "learning_rate": 2e-05, "loss": 0.03417107, "step": 8313 }, { "epoch": 16.628, "grad_norm": 1.3163055181503296, "learning_rate": 2e-05, "loss": 0.03712931, "step": 8314 }, { "epoch": 16.63, "grad_norm": 0.903484046459198, "learning_rate": 2e-05, "loss": 0.02302654, "step": 8315 }, { "epoch": 16.632, "grad_norm": 2.890394926071167, "learning_rate": 2e-05, "loss": 0.05785254, "step": 8316 }, { "epoch": 16.634, "grad_norm": 1.6316351890563965, "learning_rate": 2e-05, "loss": 0.05490664, "step": 8317 }, { "epoch": 16.636, "grad_norm": 1.3253639936447144, "learning_rate": 2e-05, "loss": 0.03378769, "step": 8318 }, { "epoch": 16.638, "grad_norm": 0.8841352462768555, "learning_rate": 2e-05, "loss": 0.02580001, "step": 8319 }, { "epoch": 16.64, "grad_norm": 1.5405997037887573, "learning_rate": 2e-05, "loss": 0.0418197, "step": 8320 }, { "epoch": 16.642, "grad_norm": 1.6014611721038818, "learning_rate": 2e-05, "loss": 0.05297592, "step": 8321 }, { "epoch": 16.644, "grad_norm": 0.9699270129203796, "learning_rate": 2e-05, "loss": 0.03074204, "step": 8322 }, { "epoch": 16.646, "grad_norm": 1.0691285133361816, "learning_rate": 2e-05, "loss": 0.02828666, "step": 8323 }, { "epoch": 16.648, "grad_norm": 1.4978876113891602, "learning_rate": 2e-05, "loss": 0.04264086, "step": 8324 }, { "epoch": 16.65, "grad_norm": 1.067944884300232, "learning_rate": 2e-05, "loss": 0.04064353, "step": 8325 }, { "epoch": 16.652, "grad_norm": 1.609089732170105, "learning_rate": 2e-05, "loss": 0.03844542, "step": 8326 }, { "epoch": 16.654, "grad_norm": 1.3730612993240356, "learning_rate": 2e-05, "loss": 0.03860483, "step": 8327 }, { "epoch": 16.656, "grad_norm": 0.989010214805603, "learning_rate": 2e-05, "loss": 0.02395894, "step": 8328 }, { "epoch": 16.658, "grad_norm": 0.89674311876297, "learning_rate": 2e-05, "loss": 0.02376273, "step": 8329 }, { "epoch": 16.66, "grad_norm": 1.1137092113494873, "learning_rate": 2e-05, "loss": 0.01789567, "step": 8330 }, { "epoch": 16.662, "grad_norm": 1.0320215225219727, "learning_rate": 2e-05, "loss": 0.0323988, "step": 8331 }, { "epoch": 16.664, "grad_norm": 2.489551067352295, "learning_rate": 2e-05, "loss": 0.05527273, "step": 8332 }, { "epoch": 16.666, "grad_norm": 1.2354681491851807, "learning_rate": 2e-05, "loss": 0.03027255, "step": 8333 }, { "epoch": 16.668, "grad_norm": 1.5036636590957642, "learning_rate": 2e-05, "loss": 0.03653241, "step": 8334 }, { "epoch": 16.67, "grad_norm": 1.5149270296096802, "learning_rate": 2e-05, "loss": 0.04814313, "step": 8335 }, { "epoch": 16.672, "grad_norm": 1.3045963048934937, "learning_rate": 2e-05, "loss": 0.03031799, "step": 8336 }, { "epoch": 16.674, "grad_norm": 1.2806516885757446, "learning_rate": 2e-05, "loss": 0.04692192, "step": 8337 }, { "epoch": 16.676, "grad_norm": 1.1512733697891235, "learning_rate": 2e-05, "loss": 0.04004252, "step": 8338 }, { "epoch": 16.678, "grad_norm": 3.1383249759674072, "learning_rate": 2e-05, "loss": 0.0324553, "step": 8339 }, { "epoch": 16.68, "grad_norm": 1.2203539609909058, "learning_rate": 2e-05, "loss": 0.03611985, "step": 8340 }, { "epoch": 16.682, "grad_norm": 1.2353651523590088, "learning_rate": 2e-05, "loss": 0.03453249, "step": 8341 }, { "epoch": 16.684, "grad_norm": 1.2120214700698853, "learning_rate": 2e-05, "loss": 0.04656072, "step": 8342 }, { "epoch": 16.686, "grad_norm": 1.1942620277404785, "learning_rate": 2e-05, "loss": 0.04031057, "step": 8343 }, { "epoch": 16.688, "grad_norm": 1.174004077911377, "learning_rate": 2e-05, "loss": 0.03165752, "step": 8344 }, { "epoch": 16.69, "grad_norm": 2.1396734714508057, "learning_rate": 2e-05, "loss": 0.04839662, "step": 8345 }, { "epoch": 16.692, "grad_norm": 1.1642301082611084, "learning_rate": 2e-05, "loss": 0.0288971, "step": 8346 }, { "epoch": 16.694, "grad_norm": 1.6050286293029785, "learning_rate": 2e-05, "loss": 0.03548468, "step": 8347 }, { "epoch": 16.696, "grad_norm": 1.416585922241211, "learning_rate": 2e-05, "loss": 0.03986949, "step": 8348 }, { "epoch": 16.698, "grad_norm": 1.3496794700622559, "learning_rate": 2e-05, "loss": 0.04378842, "step": 8349 }, { "epoch": 16.7, "grad_norm": 3.526726722717285, "learning_rate": 2e-05, "loss": 0.04224211, "step": 8350 }, { "epoch": 16.701999999999998, "grad_norm": 2.085320234298706, "learning_rate": 2e-05, "loss": 0.02257501, "step": 8351 }, { "epoch": 16.704, "grad_norm": 1.418104648590088, "learning_rate": 2e-05, "loss": 0.03896306, "step": 8352 }, { "epoch": 16.706, "grad_norm": 1.8346154689788818, "learning_rate": 2e-05, "loss": 0.04356946, "step": 8353 }, { "epoch": 16.708, "grad_norm": 1.2784098386764526, "learning_rate": 2e-05, "loss": 0.04127508, "step": 8354 }, { "epoch": 16.71, "grad_norm": 1.3677000999450684, "learning_rate": 2e-05, "loss": 0.03775769, "step": 8355 }, { "epoch": 16.712, "grad_norm": 1.7542495727539062, "learning_rate": 2e-05, "loss": 0.05272632, "step": 8356 }, { "epoch": 16.714, "grad_norm": 1.9329322576522827, "learning_rate": 2e-05, "loss": 0.041315, "step": 8357 }, { "epoch": 16.716, "grad_norm": 1.4365530014038086, "learning_rate": 2e-05, "loss": 0.03562573, "step": 8358 }, { "epoch": 16.718, "grad_norm": 1.7461820840835571, "learning_rate": 2e-05, "loss": 0.04044671, "step": 8359 }, { "epoch": 16.72, "grad_norm": 5.9637556076049805, "learning_rate": 2e-05, "loss": 0.04565287, "step": 8360 }, { "epoch": 16.722, "grad_norm": 1.428763747215271, "learning_rate": 2e-05, "loss": 0.03354582, "step": 8361 }, { "epoch": 16.724, "grad_norm": 1.4038552045822144, "learning_rate": 2e-05, "loss": 0.0424789, "step": 8362 }, { "epoch": 16.726, "grad_norm": 1.2637503147125244, "learning_rate": 2e-05, "loss": 0.03532393, "step": 8363 }, { "epoch": 16.728, "grad_norm": 1.817488670349121, "learning_rate": 2e-05, "loss": 0.04140956, "step": 8364 }, { "epoch": 16.73, "grad_norm": 1.4110076427459717, "learning_rate": 2e-05, "loss": 0.03238542, "step": 8365 }, { "epoch": 16.732, "grad_norm": 1.412273645401001, "learning_rate": 2e-05, "loss": 0.0464592, "step": 8366 }, { "epoch": 16.734, "grad_norm": 1.112044334411621, "learning_rate": 2e-05, "loss": 0.0404929, "step": 8367 }, { "epoch": 16.736, "grad_norm": 1.867051362991333, "learning_rate": 2e-05, "loss": 0.06586152, "step": 8368 }, { "epoch": 16.738, "grad_norm": 6.789832592010498, "learning_rate": 2e-05, "loss": 0.03430699, "step": 8369 }, { "epoch": 16.74, "grad_norm": 1.4201772212982178, "learning_rate": 2e-05, "loss": 0.03015824, "step": 8370 }, { "epoch": 16.742, "grad_norm": 1.1724557876586914, "learning_rate": 2e-05, "loss": 0.03560322, "step": 8371 }, { "epoch": 16.744, "grad_norm": 1.1797523498535156, "learning_rate": 2e-05, "loss": 0.02636663, "step": 8372 }, { "epoch": 16.746, "grad_norm": 1.3166332244873047, "learning_rate": 2e-05, "loss": 0.03184573, "step": 8373 }, { "epoch": 16.748, "grad_norm": 1.034335970878601, "learning_rate": 2e-05, "loss": 0.02896601, "step": 8374 }, { "epoch": 16.75, "grad_norm": 1.0973848104476929, "learning_rate": 2e-05, "loss": 0.03897912, "step": 8375 }, { "epoch": 16.752, "grad_norm": 1.1487135887145996, "learning_rate": 2e-05, "loss": 0.03742709, "step": 8376 }, { "epoch": 16.754, "grad_norm": 1.0410828590393066, "learning_rate": 2e-05, "loss": 0.03710461, "step": 8377 }, { "epoch": 16.756, "grad_norm": 1.2444871664047241, "learning_rate": 2e-05, "loss": 0.03904767, "step": 8378 }, { "epoch": 16.758, "grad_norm": 1.5975366830825806, "learning_rate": 2e-05, "loss": 0.04068123, "step": 8379 }, { "epoch": 16.76, "grad_norm": 1.1661255359649658, "learning_rate": 2e-05, "loss": 0.02516171, "step": 8380 }, { "epoch": 16.762, "grad_norm": 2.5861120223999023, "learning_rate": 2e-05, "loss": 0.04131169, "step": 8381 }, { "epoch": 16.764, "grad_norm": 2.497673749923706, "learning_rate": 2e-05, "loss": 0.04161269, "step": 8382 }, { "epoch": 16.766, "grad_norm": 0.9345721006393433, "learning_rate": 2e-05, "loss": 0.02630493, "step": 8383 }, { "epoch": 16.768, "grad_norm": 1.4174412488937378, "learning_rate": 2e-05, "loss": 0.03265426, "step": 8384 }, { "epoch": 16.77, "grad_norm": 1.2070684432983398, "learning_rate": 2e-05, "loss": 0.03450275, "step": 8385 }, { "epoch": 16.772, "grad_norm": 1.3117280006408691, "learning_rate": 2e-05, "loss": 0.04178444, "step": 8386 }, { "epoch": 16.774, "grad_norm": 1.5097566843032837, "learning_rate": 2e-05, "loss": 0.04680046, "step": 8387 }, { "epoch": 16.776, "grad_norm": 1.4381500482559204, "learning_rate": 2e-05, "loss": 0.04752283, "step": 8388 }, { "epoch": 16.778, "grad_norm": 1.1138615608215332, "learning_rate": 2e-05, "loss": 0.03561661, "step": 8389 }, { "epoch": 16.78, "grad_norm": 1.2291978597640991, "learning_rate": 2e-05, "loss": 0.03155664, "step": 8390 }, { "epoch": 16.782, "grad_norm": 1.3546652793884277, "learning_rate": 2e-05, "loss": 0.03348469, "step": 8391 }, { "epoch": 16.784, "grad_norm": 1.1350245475769043, "learning_rate": 2e-05, "loss": 0.04207388, "step": 8392 }, { "epoch": 16.786, "grad_norm": 1.6995071172714233, "learning_rate": 2e-05, "loss": 0.05210853, "step": 8393 }, { "epoch": 16.788, "grad_norm": 2.271347761154175, "learning_rate": 2e-05, "loss": 0.05933637, "step": 8394 }, { "epoch": 16.79, "grad_norm": 0.8941410779953003, "learning_rate": 2e-05, "loss": 0.02386924, "step": 8395 }, { "epoch": 16.792, "grad_norm": 1.6003661155700684, "learning_rate": 2e-05, "loss": 0.05291647, "step": 8396 }, { "epoch": 16.794, "grad_norm": 1.3317174911499023, "learning_rate": 2e-05, "loss": 0.03087507, "step": 8397 }, { "epoch": 16.796, "grad_norm": 0.9487977623939514, "learning_rate": 2e-05, "loss": 0.02690414, "step": 8398 }, { "epoch": 16.798000000000002, "grad_norm": 1.009123682975769, "learning_rate": 2e-05, "loss": 0.02644867, "step": 8399 }, { "epoch": 16.8, "grad_norm": 1.2167582511901855, "learning_rate": 2e-05, "loss": 0.03899329, "step": 8400 }, { "epoch": 16.802, "grad_norm": 1.482376217842102, "learning_rate": 2e-05, "loss": 0.02873115, "step": 8401 }, { "epoch": 16.804, "grad_norm": 1.1082066297531128, "learning_rate": 2e-05, "loss": 0.03039532, "step": 8402 }, { "epoch": 16.806, "grad_norm": 1.2599750757217407, "learning_rate": 2e-05, "loss": 0.04293299, "step": 8403 }, { "epoch": 16.808, "grad_norm": 0.9543814063072205, "learning_rate": 2e-05, "loss": 0.03469695, "step": 8404 }, { "epoch": 16.81, "grad_norm": 1.0824414491653442, "learning_rate": 2e-05, "loss": 0.03982303, "step": 8405 }, { "epoch": 16.812, "grad_norm": 1.099599838256836, "learning_rate": 2e-05, "loss": 0.02591001, "step": 8406 }, { "epoch": 16.814, "grad_norm": 1.7804385423660278, "learning_rate": 2e-05, "loss": 0.0323559, "step": 8407 }, { "epoch": 16.816, "grad_norm": 1.8087066411972046, "learning_rate": 2e-05, "loss": 0.04229058, "step": 8408 }, { "epoch": 16.818, "grad_norm": 1.2992652654647827, "learning_rate": 2e-05, "loss": 0.0409187, "step": 8409 }, { "epoch": 16.82, "grad_norm": 1.1120244264602661, "learning_rate": 2e-05, "loss": 0.03508178, "step": 8410 }, { "epoch": 16.822, "grad_norm": 0.9184687733650208, "learning_rate": 2e-05, "loss": 0.02861979, "step": 8411 }, { "epoch": 16.824, "grad_norm": 1.2980962991714478, "learning_rate": 2e-05, "loss": 0.02535828, "step": 8412 }, { "epoch": 16.826, "grad_norm": 1.9372985363006592, "learning_rate": 2e-05, "loss": 0.03568355, "step": 8413 }, { "epoch": 16.828, "grad_norm": 1.514122486114502, "learning_rate": 2e-05, "loss": 0.03849922, "step": 8414 }, { "epoch": 16.83, "grad_norm": 1.285515308380127, "learning_rate": 2e-05, "loss": 0.03315242, "step": 8415 }, { "epoch": 16.832, "grad_norm": 0.9878953099250793, "learning_rate": 2e-05, "loss": 0.02930378, "step": 8416 }, { "epoch": 16.834, "grad_norm": 1.765859842300415, "learning_rate": 2e-05, "loss": 0.05877309, "step": 8417 }, { "epoch": 16.836, "grad_norm": 1.3087562322616577, "learning_rate": 2e-05, "loss": 0.04466981, "step": 8418 }, { "epoch": 16.838, "grad_norm": 3.0149006843566895, "learning_rate": 2e-05, "loss": 0.05240371, "step": 8419 }, { "epoch": 16.84, "grad_norm": 1.1996604204177856, "learning_rate": 2e-05, "loss": 0.04400566, "step": 8420 }, { "epoch": 16.842, "grad_norm": 1.4025583267211914, "learning_rate": 2e-05, "loss": 0.04432599, "step": 8421 }, { "epoch": 16.844, "grad_norm": 2.9091427326202393, "learning_rate": 2e-05, "loss": 0.04849138, "step": 8422 }, { "epoch": 16.846, "grad_norm": 2.77856707572937, "learning_rate": 2e-05, "loss": 0.05462526, "step": 8423 }, { "epoch": 16.848, "grad_norm": 1.108500361442566, "learning_rate": 2e-05, "loss": 0.03238573, "step": 8424 }, { "epoch": 16.85, "grad_norm": 1.0004764795303345, "learning_rate": 2e-05, "loss": 0.02566298, "step": 8425 }, { "epoch": 16.852, "grad_norm": 6.166355133056641, "learning_rate": 2e-05, "loss": 0.05295902, "step": 8426 }, { "epoch": 16.854, "grad_norm": 1.1335303783416748, "learning_rate": 2e-05, "loss": 0.0286171, "step": 8427 }, { "epoch": 16.856, "grad_norm": 1.1951411962509155, "learning_rate": 2e-05, "loss": 0.03944296, "step": 8428 }, { "epoch": 16.858, "grad_norm": 1.430793046951294, "learning_rate": 2e-05, "loss": 0.03612977, "step": 8429 }, { "epoch": 16.86, "grad_norm": 1.3680472373962402, "learning_rate": 2e-05, "loss": 0.03450888, "step": 8430 }, { "epoch": 16.862, "grad_norm": 1.0445443391799927, "learning_rate": 2e-05, "loss": 0.03357092, "step": 8431 }, { "epoch": 16.864, "grad_norm": 1.1048623323440552, "learning_rate": 2e-05, "loss": 0.02782473, "step": 8432 }, { "epoch": 16.866, "grad_norm": 1.264748215675354, "learning_rate": 2e-05, "loss": 0.05136178, "step": 8433 }, { "epoch": 16.868, "grad_norm": 3.1316561698913574, "learning_rate": 2e-05, "loss": 0.0381931, "step": 8434 }, { "epoch": 16.87, "grad_norm": 0.8829813599586487, "learning_rate": 2e-05, "loss": 0.02602118, "step": 8435 }, { "epoch": 16.872, "grad_norm": 1.240877389907837, "learning_rate": 2e-05, "loss": 0.0411177, "step": 8436 }, { "epoch": 16.874, "grad_norm": 0.9366046786308289, "learning_rate": 2e-05, "loss": 0.02752039, "step": 8437 }, { "epoch": 16.876, "grad_norm": 1.1578431129455566, "learning_rate": 2e-05, "loss": 0.03548286, "step": 8438 }, { "epoch": 16.878, "grad_norm": 1.1822301149368286, "learning_rate": 2e-05, "loss": 0.03064628, "step": 8439 }, { "epoch": 16.88, "grad_norm": 2.111692428588867, "learning_rate": 2e-05, "loss": 0.05242971, "step": 8440 }, { "epoch": 16.882, "grad_norm": 1.7009152173995972, "learning_rate": 2e-05, "loss": 0.03527777, "step": 8441 }, { "epoch": 16.884, "grad_norm": 1.7795860767364502, "learning_rate": 2e-05, "loss": 0.04584951, "step": 8442 }, { "epoch": 16.886, "grad_norm": 1.7482233047485352, "learning_rate": 2e-05, "loss": 0.04331917, "step": 8443 }, { "epoch": 16.888, "grad_norm": 1.1555484533309937, "learning_rate": 2e-05, "loss": 0.03320149, "step": 8444 }, { "epoch": 16.89, "grad_norm": 1.5613912343978882, "learning_rate": 2e-05, "loss": 0.03231899, "step": 8445 }, { "epoch": 16.892, "grad_norm": 1.4290865659713745, "learning_rate": 2e-05, "loss": 0.03894955, "step": 8446 }, { "epoch": 16.894, "grad_norm": 1.1222296953201294, "learning_rate": 2e-05, "loss": 0.03836842, "step": 8447 }, { "epoch": 16.896, "grad_norm": 2.30586838722229, "learning_rate": 2e-05, "loss": 0.04976833, "step": 8448 }, { "epoch": 16.898, "grad_norm": 1.0983035564422607, "learning_rate": 2e-05, "loss": 0.03981961, "step": 8449 }, { "epoch": 16.9, "grad_norm": 1.6317425966262817, "learning_rate": 2e-05, "loss": 0.05492754, "step": 8450 }, { "epoch": 16.902, "grad_norm": 1.9192370176315308, "learning_rate": 2e-05, "loss": 0.03952098, "step": 8451 }, { "epoch": 16.904, "grad_norm": 1.1875005960464478, "learning_rate": 2e-05, "loss": 0.03277217, "step": 8452 }, { "epoch": 16.906, "grad_norm": 1.4212887287139893, "learning_rate": 2e-05, "loss": 0.02984127, "step": 8453 }, { "epoch": 16.908, "grad_norm": 1.5091922283172607, "learning_rate": 2e-05, "loss": 0.03316611, "step": 8454 }, { "epoch": 16.91, "grad_norm": 2.024031400680542, "learning_rate": 2e-05, "loss": 0.04787176, "step": 8455 }, { "epoch": 16.912, "grad_norm": 2.3478596210479736, "learning_rate": 2e-05, "loss": 0.0493004, "step": 8456 }, { "epoch": 16.914, "grad_norm": 1.2963180541992188, "learning_rate": 2e-05, "loss": 0.02935142, "step": 8457 }, { "epoch": 16.916, "grad_norm": 1.0010120868682861, "learning_rate": 2e-05, "loss": 0.02706702, "step": 8458 }, { "epoch": 16.918, "grad_norm": 1.579359531402588, "learning_rate": 2e-05, "loss": 0.03226943, "step": 8459 }, { "epoch": 16.92, "grad_norm": 1.0996006727218628, "learning_rate": 2e-05, "loss": 0.03097832, "step": 8460 }, { "epoch": 16.922, "grad_norm": 1.2066538333892822, "learning_rate": 2e-05, "loss": 0.03681121, "step": 8461 }, { "epoch": 16.924, "grad_norm": 1.255723476409912, "learning_rate": 2e-05, "loss": 0.03210659, "step": 8462 }, { "epoch": 16.926, "grad_norm": 1.2862356901168823, "learning_rate": 2e-05, "loss": 0.03344401, "step": 8463 }, { "epoch": 16.928, "grad_norm": 1.046955943107605, "learning_rate": 2e-05, "loss": 0.02581613, "step": 8464 }, { "epoch": 16.93, "grad_norm": 1.5476090908050537, "learning_rate": 2e-05, "loss": 0.04508192, "step": 8465 }, { "epoch": 16.932, "grad_norm": 1.1604325771331787, "learning_rate": 2e-05, "loss": 0.03390781, "step": 8466 }, { "epoch": 16.934, "grad_norm": 1.613224983215332, "learning_rate": 2e-05, "loss": 0.05440042, "step": 8467 }, { "epoch": 16.936, "grad_norm": 1.0437114238739014, "learning_rate": 2e-05, "loss": 0.03564305, "step": 8468 }, { "epoch": 16.938, "grad_norm": 1.7123421430587769, "learning_rate": 2e-05, "loss": 0.03162618, "step": 8469 }, { "epoch": 16.94, "grad_norm": 1.2123863697052002, "learning_rate": 2e-05, "loss": 0.03680079, "step": 8470 }, { "epoch": 16.942, "grad_norm": 1.7789182662963867, "learning_rate": 2e-05, "loss": 0.05218393, "step": 8471 }, { "epoch": 16.944, "grad_norm": 1.4410932064056396, "learning_rate": 2e-05, "loss": 0.03325985, "step": 8472 }, { "epoch": 16.946, "grad_norm": 0.8621723055839539, "learning_rate": 2e-05, "loss": 0.0242934, "step": 8473 }, { "epoch": 16.948, "grad_norm": 1.3893159627914429, "learning_rate": 2e-05, "loss": 0.05090608, "step": 8474 }, { "epoch": 16.95, "grad_norm": 1.0170800685882568, "learning_rate": 2e-05, "loss": 0.03791419, "step": 8475 }, { "epoch": 16.951999999999998, "grad_norm": 1.146407127380371, "learning_rate": 2e-05, "loss": 0.04361141, "step": 8476 }, { "epoch": 16.954, "grad_norm": 1.6457027196884155, "learning_rate": 2e-05, "loss": 0.04759943, "step": 8477 }, { "epoch": 16.956, "grad_norm": 1.1141819953918457, "learning_rate": 2e-05, "loss": 0.02985225, "step": 8478 }, { "epoch": 16.958, "grad_norm": 1.0538251399993896, "learning_rate": 2e-05, "loss": 0.02504754, "step": 8479 }, { "epoch": 16.96, "grad_norm": 1.3906612396240234, "learning_rate": 2e-05, "loss": 0.04034654, "step": 8480 }, { "epoch": 16.962, "grad_norm": 1.228425145149231, "learning_rate": 2e-05, "loss": 0.05102978, "step": 8481 }, { "epoch": 16.964, "grad_norm": 1.1037037372589111, "learning_rate": 2e-05, "loss": 0.02688233, "step": 8482 }, { "epoch": 16.966, "grad_norm": 1.2161399126052856, "learning_rate": 2e-05, "loss": 0.03387916, "step": 8483 }, { "epoch": 16.968, "grad_norm": 2.475914716720581, "learning_rate": 2e-05, "loss": 0.04503019, "step": 8484 }, { "epoch": 16.97, "grad_norm": 2.1681854724884033, "learning_rate": 2e-05, "loss": 0.03996513, "step": 8485 }, { "epoch": 16.972, "grad_norm": 2.7576403617858887, "learning_rate": 2e-05, "loss": 0.06164122, "step": 8486 }, { "epoch": 16.974, "grad_norm": 1.2548010349273682, "learning_rate": 2e-05, "loss": 0.0354467, "step": 8487 }, { "epoch": 16.976, "grad_norm": 1.4516505002975464, "learning_rate": 2e-05, "loss": 0.03556127, "step": 8488 }, { "epoch": 16.978, "grad_norm": 2.353919744491577, "learning_rate": 2e-05, "loss": 0.03970582, "step": 8489 }, { "epoch": 16.98, "grad_norm": 1.0244505405426025, "learning_rate": 2e-05, "loss": 0.03089332, "step": 8490 }, { "epoch": 16.982, "grad_norm": 1.5581693649291992, "learning_rate": 2e-05, "loss": 0.03672716, "step": 8491 }, { "epoch": 16.984, "grad_norm": 1.7653076648712158, "learning_rate": 2e-05, "loss": 0.04039037, "step": 8492 }, { "epoch": 16.986, "grad_norm": 1.0333586931228638, "learning_rate": 2e-05, "loss": 0.03089204, "step": 8493 }, { "epoch": 16.988, "grad_norm": 1.0320618152618408, "learning_rate": 2e-05, "loss": 0.0225993, "step": 8494 }, { "epoch": 16.99, "grad_norm": 1.119234561920166, "learning_rate": 2e-05, "loss": 0.03324748, "step": 8495 }, { "epoch": 16.992, "grad_norm": 1.9788683652877808, "learning_rate": 2e-05, "loss": 0.03121308, "step": 8496 }, { "epoch": 16.994, "grad_norm": 1.3806347846984863, "learning_rate": 2e-05, "loss": 0.04654794, "step": 8497 }, { "epoch": 16.996, "grad_norm": 1.1207362413406372, "learning_rate": 2e-05, "loss": 0.04140043, "step": 8498 }, { "epoch": 16.998, "grad_norm": 1.039371371269226, "learning_rate": 2e-05, "loss": 0.03301085, "step": 8499 }, { "epoch": 17.0, "grad_norm": 2.4387052059173584, "learning_rate": 2e-05, "loss": 0.04641984, "step": 8500 }, { "epoch": 17.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9760479041916168, "Equal_1": 0.996, "Equal_2": 0.9680638722554891, "Equal_3": 0.9181636726546906, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.99, "Perpendicular_1": 0.998, "Perpendicular_2": 0.986, "Perpendicular_3": 0.7755511022044088, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9936666666666667, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 320.1697, "eval_samples_per_second": 32.795, "eval_steps_per_second": 0.656, "step": 8500 }, { "epoch": 17.002, "grad_norm": 1.4980146884918213, "learning_rate": 2e-05, "loss": 0.04539901, "step": 8501 }, { "epoch": 17.004, "grad_norm": 1.0780891180038452, "learning_rate": 2e-05, "loss": 0.03377684, "step": 8502 }, { "epoch": 17.006, "grad_norm": 1.0425775051116943, "learning_rate": 2e-05, "loss": 0.02465243, "step": 8503 }, { "epoch": 17.008, "grad_norm": 1.0172406435012817, "learning_rate": 2e-05, "loss": 0.02660679, "step": 8504 }, { "epoch": 17.01, "grad_norm": 1.2166515588760376, "learning_rate": 2e-05, "loss": 0.04529484, "step": 8505 }, { "epoch": 17.012, "grad_norm": 1.4045565128326416, "learning_rate": 2e-05, "loss": 0.04070731, "step": 8506 }, { "epoch": 17.014, "grad_norm": 1.53168785572052, "learning_rate": 2e-05, "loss": 0.04683904, "step": 8507 }, { "epoch": 17.016, "grad_norm": 1.4374054670333862, "learning_rate": 2e-05, "loss": 0.05705344, "step": 8508 }, { "epoch": 17.018, "grad_norm": 1.0057858228683472, "learning_rate": 2e-05, "loss": 0.03421285, "step": 8509 }, { "epoch": 17.02, "grad_norm": 1.1364305019378662, "learning_rate": 2e-05, "loss": 0.03909264, "step": 8510 }, { "epoch": 17.022, "grad_norm": 1.0018776655197144, "learning_rate": 2e-05, "loss": 0.03444162, "step": 8511 }, { "epoch": 17.024, "grad_norm": 1.4022345542907715, "learning_rate": 2e-05, "loss": 0.05768448, "step": 8512 }, { "epoch": 17.026, "grad_norm": 2.5111844539642334, "learning_rate": 2e-05, "loss": 0.0339235, "step": 8513 }, { "epoch": 17.028, "grad_norm": 1.921712040901184, "learning_rate": 2e-05, "loss": 0.03785212, "step": 8514 }, { "epoch": 17.03, "grad_norm": 1.1148395538330078, "learning_rate": 2e-05, "loss": 0.02421642, "step": 8515 }, { "epoch": 17.032, "grad_norm": 1.0764633417129517, "learning_rate": 2e-05, "loss": 0.03142115, "step": 8516 }, { "epoch": 17.034, "grad_norm": 1.4113231897354126, "learning_rate": 2e-05, "loss": 0.03779317, "step": 8517 }, { "epoch": 17.036, "grad_norm": 1.0931544303894043, "learning_rate": 2e-05, "loss": 0.02576648, "step": 8518 }, { "epoch": 17.038, "grad_norm": 1.8094255924224854, "learning_rate": 2e-05, "loss": 0.04606591, "step": 8519 }, { "epoch": 17.04, "grad_norm": 1.4644229412078857, "learning_rate": 2e-05, "loss": 0.043492, "step": 8520 }, { "epoch": 17.042, "grad_norm": 1.4266523122787476, "learning_rate": 2e-05, "loss": 0.03921729, "step": 8521 }, { "epoch": 17.044, "grad_norm": 1.3539372682571411, "learning_rate": 2e-05, "loss": 0.03920222, "step": 8522 }, { "epoch": 17.046, "grad_norm": 1.028294563293457, "learning_rate": 2e-05, "loss": 0.03639862, "step": 8523 }, { "epoch": 17.048, "grad_norm": 1.2716553211212158, "learning_rate": 2e-05, "loss": 0.03918849, "step": 8524 }, { "epoch": 17.05, "grad_norm": 0.8702855706214905, "learning_rate": 2e-05, "loss": 0.02787583, "step": 8525 }, { "epoch": 17.052, "grad_norm": 1.2607735395431519, "learning_rate": 2e-05, "loss": 0.04798802, "step": 8526 }, { "epoch": 17.054, "grad_norm": 1.8052009344100952, "learning_rate": 2e-05, "loss": 0.04934546, "step": 8527 }, { "epoch": 17.056, "grad_norm": 1.3691691160202026, "learning_rate": 2e-05, "loss": 0.02806879, "step": 8528 }, { "epoch": 17.058, "grad_norm": 2.2015974521636963, "learning_rate": 2e-05, "loss": 0.03393783, "step": 8529 }, { "epoch": 17.06, "grad_norm": 1.7185814380645752, "learning_rate": 2e-05, "loss": 0.04044708, "step": 8530 }, { "epoch": 17.062, "grad_norm": 1.6913961172103882, "learning_rate": 2e-05, "loss": 0.04489831, "step": 8531 }, { "epoch": 17.064, "grad_norm": 1.6114678382873535, "learning_rate": 2e-05, "loss": 0.06014886, "step": 8532 }, { "epoch": 17.066, "grad_norm": 1.866416335105896, "learning_rate": 2e-05, "loss": 0.05753294, "step": 8533 }, { "epoch": 17.068, "grad_norm": 2.823669910430908, "learning_rate": 2e-05, "loss": 0.0364582, "step": 8534 }, { "epoch": 17.07, "grad_norm": 1.3200300931930542, "learning_rate": 2e-05, "loss": 0.02338468, "step": 8535 }, { "epoch": 17.072, "grad_norm": 1.4321677684783936, "learning_rate": 2e-05, "loss": 0.03681827, "step": 8536 }, { "epoch": 17.074, "grad_norm": 1.1837363243103027, "learning_rate": 2e-05, "loss": 0.03107641, "step": 8537 }, { "epoch": 17.076, "grad_norm": 1.5008882284164429, "learning_rate": 2e-05, "loss": 0.04763446, "step": 8538 }, { "epoch": 17.078, "grad_norm": 0.9194531440734863, "learning_rate": 2e-05, "loss": 0.03133226, "step": 8539 }, { "epoch": 17.08, "grad_norm": 1.7025022506713867, "learning_rate": 2e-05, "loss": 0.0493839, "step": 8540 }, { "epoch": 17.082, "grad_norm": 1.8602598905563354, "learning_rate": 2e-05, "loss": 0.0486247, "step": 8541 }, { "epoch": 17.084, "grad_norm": 6.84015417098999, "learning_rate": 2e-05, "loss": 0.02411661, "step": 8542 }, { "epoch": 17.086, "grad_norm": 1.2023508548736572, "learning_rate": 2e-05, "loss": 0.03706846, "step": 8543 }, { "epoch": 17.088, "grad_norm": 1.4911589622497559, "learning_rate": 2e-05, "loss": 0.05094575, "step": 8544 }, { "epoch": 17.09, "grad_norm": 0.8137642741203308, "learning_rate": 2e-05, "loss": 0.02076185, "step": 8545 }, { "epoch": 17.092, "grad_norm": 1.6235312223434448, "learning_rate": 2e-05, "loss": 0.04601521, "step": 8546 }, { "epoch": 17.094, "grad_norm": 2.2012600898742676, "learning_rate": 2e-05, "loss": 0.04425218, "step": 8547 }, { "epoch": 17.096, "grad_norm": 1.039987564086914, "learning_rate": 2e-05, "loss": 0.02848525, "step": 8548 }, { "epoch": 17.098, "grad_norm": 1.7109042406082153, "learning_rate": 2e-05, "loss": 0.06454369, "step": 8549 }, { "epoch": 17.1, "grad_norm": 1.8760957717895508, "learning_rate": 2e-05, "loss": 0.02889769, "step": 8550 }, { "epoch": 17.102, "grad_norm": 1.0814695358276367, "learning_rate": 2e-05, "loss": 0.0295868, "step": 8551 }, { "epoch": 17.104, "grad_norm": 1.5878161191940308, "learning_rate": 2e-05, "loss": 0.03735021, "step": 8552 }, { "epoch": 17.106, "grad_norm": 1.4773187637329102, "learning_rate": 2e-05, "loss": 0.04390514, "step": 8553 }, { "epoch": 17.108, "grad_norm": 1.8776631355285645, "learning_rate": 2e-05, "loss": 0.04321947, "step": 8554 }, { "epoch": 17.11, "grad_norm": 1.309813141822815, "learning_rate": 2e-05, "loss": 0.03866652, "step": 8555 }, { "epoch": 17.112, "grad_norm": 0.9536056518554688, "learning_rate": 2e-05, "loss": 0.02671996, "step": 8556 }, { "epoch": 17.114, "grad_norm": 1.3391567468643188, "learning_rate": 2e-05, "loss": 0.02907237, "step": 8557 }, { "epoch": 17.116, "grad_norm": 1.0060744285583496, "learning_rate": 2e-05, "loss": 0.03439762, "step": 8558 }, { "epoch": 17.118, "grad_norm": 0.9535341262817383, "learning_rate": 2e-05, "loss": 0.02899779, "step": 8559 }, { "epoch": 17.12, "grad_norm": 0.7711392045021057, "learning_rate": 2e-05, "loss": 0.02512438, "step": 8560 }, { "epoch": 17.122, "grad_norm": 1.0597705841064453, "learning_rate": 2e-05, "loss": 0.03412605, "step": 8561 }, { "epoch": 17.124, "grad_norm": 1.8042422533035278, "learning_rate": 2e-05, "loss": 0.05712911, "step": 8562 }, { "epoch": 17.126, "grad_norm": 1.9867520332336426, "learning_rate": 2e-05, "loss": 0.04097403, "step": 8563 }, { "epoch": 17.128, "grad_norm": 1.8169209957122803, "learning_rate": 2e-05, "loss": 0.05447955, "step": 8564 }, { "epoch": 17.13, "grad_norm": 1.8252878189086914, "learning_rate": 2e-05, "loss": 0.03339032, "step": 8565 }, { "epoch": 17.132, "grad_norm": 2.2705142498016357, "learning_rate": 2e-05, "loss": 0.04022797, "step": 8566 }, { "epoch": 17.134, "grad_norm": 1.9331085681915283, "learning_rate": 2e-05, "loss": 0.03970163, "step": 8567 }, { "epoch": 17.136, "grad_norm": 1.5278258323669434, "learning_rate": 2e-05, "loss": 0.03794595, "step": 8568 }, { "epoch": 17.138, "grad_norm": 2.026817560195923, "learning_rate": 2e-05, "loss": 0.0471566, "step": 8569 }, { "epoch": 17.14, "grad_norm": 1.5275219678878784, "learning_rate": 2e-05, "loss": 0.04161655, "step": 8570 }, { "epoch": 17.142, "grad_norm": 1.6281226873397827, "learning_rate": 2e-05, "loss": 0.03525121, "step": 8571 }, { "epoch": 17.144, "grad_norm": 2.1644468307495117, "learning_rate": 2e-05, "loss": 0.05372618, "step": 8572 }, { "epoch": 17.146, "grad_norm": 1.3852871656417847, "learning_rate": 2e-05, "loss": 0.04645745, "step": 8573 }, { "epoch": 17.148, "grad_norm": 1.2143161296844482, "learning_rate": 2e-05, "loss": 0.02777028, "step": 8574 }, { "epoch": 17.15, "grad_norm": 1.1853028535842896, "learning_rate": 2e-05, "loss": 0.03904351, "step": 8575 }, { "epoch": 17.152, "grad_norm": 1.9432642459869385, "learning_rate": 2e-05, "loss": 0.04074139, "step": 8576 }, { "epoch": 17.154, "grad_norm": 1.6851083040237427, "learning_rate": 2e-05, "loss": 0.05192745, "step": 8577 }, { "epoch": 17.156, "grad_norm": 1.3564956188201904, "learning_rate": 2e-05, "loss": 0.03743386, "step": 8578 }, { "epoch": 17.158, "grad_norm": 1.7471429109573364, "learning_rate": 2e-05, "loss": 0.0443368, "step": 8579 }, { "epoch": 17.16, "grad_norm": 1.41841459274292, "learning_rate": 2e-05, "loss": 0.04000156, "step": 8580 }, { "epoch": 17.162, "grad_norm": 0.9492871761322021, "learning_rate": 2e-05, "loss": 0.0318904, "step": 8581 }, { "epoch": 17.164, "grad_norm": 1.0863397121429443, "learning_rate": 2e-05, "loss": 0.02380545, "step": 8582 }, { "epoch": 17.166, "grad_norm": 3.9473729133605957, "learning_rate": 2e-05, "loss": 0.03920302, "step": 8583 }, { "epoch": 17.168, "grad_norm": 1.176370620727539, "learning_rate": 2e-05, "loss": 0.04776629, "step": 8584 }, { "epoch": 17.17, "grad_norm": 1.3243099451065063, "learning_rate": 2e-05, "loss": 0.03770189, "step": 8585 }, { "epoch": 17.172, "grad_norm": 1.1418389081954956, "learning_rate": 2e-05, "loss": 0.02544946, "step": 8586 }, { "epoch": 17.174, "grad_norm": 1.9030910730361938, "learning_rate": 2e-05, "loss": 0.03700698, "step": 8587 }, { "epoch": 17.176, "grad_norm": 1.3709733486175537, "learning_rate": 2e-05, "loss": 0.04343133, "step": 8588 }, { "epoch": 17.178, "grad_norm": 1.4506902694702148, "learning_rate": 2e-05, "loss": 0.03628192, "step": 8589 }, { "epoch": 17.18, "grad_norm": 1.3070485591888428, "learning_rate": 2e-05, "loss": 0.04146688, "step": 8590 }, { "epoch": 17.182, "grad_norm": 1.234598994255066, "learning_rate": 2e-05, "loss": 0.03088631, "step": 8591 }, { "epoch": 17.184, "grad_norm": 2.7125167846679688, "learning_rate": 2e-05, "loss": 0.03890502, "step": 8592 }, { "epoch": 17.186, "grad_norm": 1.535182237625122, "learning_rate": 2e-05, "loss": 0.04280488, "step": 8593 }, { "epoch": 17.188, "grad_norm": 1.0927315950393677, "learning_rate": 2e-05, "loss": 0.0398199, "step": 8594 }, { "epoch": 17.19, "grad_norm": 2.3022119998931885, "learning_rate": 2e-05, "loss": 0.04995221, "step": 8595 }, { "epoch": 17.192, "grad_norm": 1.2546242475509644, "learning_rate": 2e-05, "loss": 0.03015491, "step": 8596 }, { "epoch": 17.194, "grad_norm": 1.2892979383468628, "learning_rate": 2e-05, "loss": 0.02786731, "step": 8597 }, { "epoch": 17.196, "grad_norm": 1.2983757257461548, "learning_rate": 2e-05, "loss": 0.03557572, "step": 8598 }, { "epoch": 17.198, "grad_norm": 1.1861157417297363, "learning_rate": 2e-05, "loss": 0.03920743, "step": 8599 }, { "epoch": 17.2, "grad_norm": 1.5425999164581299, "learning_rate": 2e-05, "loss": 0.03938346, "step": 8600 }, { "epoch": 17.202, "grad_norm": 1.861615538597107, "learning_rate": 2e-05, "loss": 0.03425018, "step": 8601 }, { "epoch": 17.204, "grad_norm": 1.7203572988510132, "learning_rate": 2e-05, "loss": 0.04107557, "step": 8602 }, { "epoch": 17.206, "grad_norm": 1.869676113128662, "learning_rate": 2e-05, "loss": 0.02804127, "step": 8603 }, { "epoch": 17.208, "grad_norm": 1.1111414432525635, "learning_rate": 2e-05, "loss": 0.03163296, "step": 8604 }, { "epoch": 17.21, "grad_norm": 1.1464769840240479, "learning_rate": 2e-05, "loss": 0.03462142, "step": 8605 }, { "epoch": 17.212, "grad_norm": 1.6060582399368286, "learning_rate": 2e-05, "loss": 0.06490205, "step": 8606 }, { "epoch": 17.214, "grad_norm": 1.082814335823059, "learning_rate": 2e-05, "loss": 0.02265838, "step": 8607 }, { "epoch": 17.216, "grad_norm": 1.1353710889816284, "learning_rate": 2e-05, "loss": 0.03856842, "step": 8608 }, { "epoch": 17.218, "grad_norm": 1.3957148790359497, "learning_rate": 2e-05, "loss": 0.03138808, "step": 8609 }, { "epoch": 17.22, "grad_norm": 1.2920960187911987, "learning_rate": 2e-05, "loss": 0.0430844, "step": 8610 }, { "epoch": 17.222, "grad_norm": 1.2752081155776978, "learning_rate": 2e-05, "loss": 0.04843235, "step": 8611 }, { "epoch": 17.224, "grad_norm": 1.4055542945861816, "learning_rate": 2e-05, "loss": 0.03793164, "step": 8612 }, { "epoch": 17.226, "grad_norm": 1.3925482034683228, "learning_rate": 2e-05, "loss": 0.03343806, "step": 8613 }, { "epoch": 17.228, "grad_norm": 1.2577686309814453, "learning_rate": 2e-05, "loss": 0.03357682, "step": 8614 }, { "epoch": 17.23, "grad_norm": 1.5118682384490967, "learning_rate": 2e-05, "loss": 0.05048358, "step": 8615 }, { "epoch": 17.232, "grad_norm": 1.094104528427124, "learning_rate": 2e-05, "loss": 0.02639047, "step": 8616 }, { "epoch": 17.234, "grad_norm": 1.337664008140564, "learning_rate": 2e-05, "loss": 0.03451654, "step": 8617 }, { "epoch": 17.236, "grad_norm": 1.4902300834655762, "learning_rate": 2e-05, "loss": 0.03678834, "step": 8618 }, { "epoch": 17.238, "grad_norm": 0.9843270778656006, "learning_rate": 2e-05, "loss": 0.02527812, "step": 8619 }, { "epoch": 17.24, "grad_norm": 1.1602085828781128, "learning_rate": 2e-05, "loss": 0.02774549, "step": 8620 }, { "epoch": 17.242, "grad_norm": 1.4860682487487793, "learning_rate": 2e-05, "loss": 0.04466415, "step": 8621 }, { "epoch": 17.244, "grad_norm": 1.3373606204986572, "learning_rate": 2e-05, "loss": 0.03324435, "step": 8622 }, { "epoch": 17.246, "grad_norm": 1.4892737865447998, "learning_rate": 2e-05, "loss": 0.03874163, "step": 8623 }, { "epoch": 17.248, "grad_norm": 1.1000738143920898, "learning_rate": 2e-05, "loss": 0.03442953, "step": 8624 }, { "epoch": 17.25, "grad_norm": 1.588263988494873, "learning_rate": 2e-05, "loss": 0.05064608, "step": 8625 }, { "epoch": 17.252, "grad_norm": 2.7729272842407227, "learning_rate": 2e-05, "loss": 0.03416018, "step": 8626 }, { "epoch": 17.254, "grad_norm": 1.785430669784546, "learning_rate": 2e-05, "loss": 0.04151814, "step": 8627 }, { "epoch": 17.256, "grad_norm": 1.0420557260513306, "learning_rate": 2e-05, "loss": 0.03002853, "step": 8628 }, { "epoch": 17.258, "grad_norm": 1.5617362260818481, "learning_rate": 2e-05, "loss": 0.0326589, "step": 8629 }, { "epoch": 17.26, "grad_norm": 1.3772300481796265, "learning_rate": 2e-05, "loss": 0.03914788, "step": 8630 }, { "epoch": 17.262, "grad_norm": 1.2935206890106201, "learning_rate": 2e-05, "loss": 0.03735418, "step": 8631 }, { "epoch": 17.264, "grad_norm": 1.2990622520446777, "learning_rate": 2e-05, "loss": 0.03097917, "step": 8632 }, { "epoch": 17.266, "grad_norm": 1.4346680641174316, "learning_rate": 2e-05, "loss": 0.03221875, "step": 8633 }, { "epoch": 17.268, "grad_norm": 1.139218807220459, "learning_rate": 2e-05, "loss": 0.03914971, "step": 8634 }, { "epoch": 17.27, "grad_norm": 1.8043378591537476, "learning_rate": 2e-05, "loss": 0.05375669, "step": 8635 }, { "epoch": 17.272, "grad_norm": 1.302628993988037, "learning_rate": 2e-05, "loss": 0.03872466, "step": 8636 }, { "epoch": 17.274, "grad_norm": 1.0800704956054688, "learning_rate": 2e-05, "loss": 0.03145566, "step": 8637 }, { "epoch": 17.276, "grad_norm": 1.0463593006134033, "learning_rate": 2e-05, "loss": 0.03460503, "step": 8638 }, { "epoch": 17.278, "grad_norm": 1.3378190994262695, "learning_rate": 2e-05, "loss": 0.05418106, "step": 8639 }, { "epoch": 17.28, "grad_norm": 1.3952049016952515, "learning_rate": 2e-05, "loss": 0.04103397, "step": 8640 }, { "epoch": 17.282, "grad_norm": 1.4025013446807861, "learning_rate": 2e-05, "loss": 0.03793554, "step": 8641 }, { "epoch": 17.284, "grad_norm": 1.229583978652954, "learning_rate": 2e-05, "loss": 0.05052347, "step": 8642 }, { "epoch": 17.286, "grad_norm": 1.4108766317367554, "learning_rate": 2e-05, "loss": 0.02928887, "step": 8643 }, { "epoch": 17.288, "grad_norm": 1.744924783706665, "learning_rate": 2e-05, "loss": 0.04452213, "step": 8644 }, { "epoch": 17.29, "grad_norm": 1.015102744102478, "learning_rate": 2e-05, "loss": 0.0326981, "step": 8645 }, { "epoch": 17.292, "grad_norm": 2.1634633541107178, "learning_rate": 2e-05, "loss": 0.04052316, "step": 8646 }, { "epoch": 17.294, "grad_norm": 0.9109699130058289, "learning_rate": 2e-05, "loss": 0.02265767, "step": 8647 }, { "epoch": 17.296, "grad_norm": 1.964977502822876, "learning_rate": 2e-05, "loss": 0.07206996, "step": 8648 }, { "epoch": 17.298, "grad_norm": 1.3658807277679443, "learning_rate": 2e-05, "loss": 0.04043622, "step": 8649 }, { "epoch": 17.3, "grad_norm": 1.3623096942901611, "learning_rate": 2e-05, "loss": 0.02930884, "step": 8650 }, { "epoch": 17.302, "grad_norm": 1.2337470054626465, "learning_rate": 2e-05, "loss": 0.03874534, "step": 8651 }, { "epoch": 17.304, "grad_norm": 0.892915666103363, "learning_rate": 2e-05, "loss": 0.03198083, "step": 8652 }, { "epoch": 17.306, "grad_norm": 0.9995888471603394, "learning_rate": 2e-05, "loss": 0.03072457, "step": 8653 }, { "epoch": 17.308, "grad_norm": 1.4063588380813599, "learning_rate": 2e-05, "loss": 0.04875659, "step": 8654 }, { "epoch": 17.31, "grad_norm": 1.2465662956237793, "learning_rate": 2e-05, "loss": 0.03184005, "step": 8655 }, { "epoch": 17.312, "grad_norm": 1.2158682346343994, "learning_rate": 2e-05, "loss": 0.03214359, "step": 8656 }, { "epoch": 17.314, "grad_norm": 0.8416528701782227, "learning_rate": 2e-05, "loss": 0.02059457, "step": 8657 }, { "epoch": 17.316, "grad_norm": 1.3326908349990845, "learning_rate": 2e-05, "loss": 0.04002319, "step": 8658 }, { "epoch": 17.318, "grad_norm": 1.201299786567688, "learning_rate": 2e-05, "loss": 0.03972005, "step": 8659 }, { "epoch": 17.32, "grad_norm": 0.8614779114723206, "learning_rate": 2e-05, "loss": 0.02517816, "step": 8660 }, { "epoch": 17.322, "grad_norm": 1.0429071187973022, "learning_rate": 2e-05, "loss": 0.0332981, "step": 8661 }, { "epoch": 17.324, "grad_norm": 1.0974829196929932, "learning_rate": 2e-05, "loss": 0.0395813, "step": 8662 }, { "epoch": 17.326, "grad_norm": 1.7790201902389526, "learning_rate": 2e-05, "loss": 0.04459688, "step": 8663 }, { "epoch": 17.328, "grad_norm": 2.299643039703369, "learning_rate": 2e-05, "loss": 0.04014507, "step": 8664 }, { "epoch": 17.33, "grad_norm": 1.0453588962554932, "learning_rate": 2e-05, "loss": 0.02698977, "step": 8665 }, { "epoch": 17.332, "grad_norm": 0.8015127778053284, "learning_rate": 2e-05, "loss": 0.02366449, "step": 8666 }, { "epoch": 17.334, "grad_norm": 1.241165280342102, "learning_rate": 2e-05, "loss": 0.03793594, "step": 8667 }, { "epoch": 17.336, "grad_norm": 3.191179037094116, "learning_rate": 2e-05, "loss": 0.05078402, "step": 8668 }, { "epoch": 17.338, "grad_norm": 1.3432191610336304, "learning_rate": 2e-05, "loss": 0.03950462, "step": 8669 }, { "epoch": 17.34, "grad_norm": 1.2250608205795288, "learning_rate": 2e-05, "loss": 0.04277131, "step": 8670 }, { "epoch": 17.342, "grad_norm": 1.4782729148864746, "learning_rate": 2e-05, "loss": 0.03562623, "step": 8671 }, { "epoch": 17.344, "grad_norm": 1.5145196914672852, "learning_rate": 2e-05, "loss": 0.03654065, "step": 8672 }, { "epoch": 17.346, "grad_norm": 1.0246665477752686, "learning_rate": 2e-05, "loss": 0.03273585, "step": 8673 }, { "epoch": 17.348, "grad_norm": 0.9382162690162659, "learning_rate": 2e-05, "loss": 0.03068813, "step": 8674 }, { "epoch": 17.35, "grad_norm": 1.0336867570877075, "learning_rate": 2e-05, "loss": 0.03111012, "step": 8675 }, { "epoch": 17.352, "grad_norm": 1.593807339668274, "learning_rate": 2e-05, "loss": 0.0379719, "step": 8676 }, { "epoch": 17.354, "grad_norm": 1.819577693939209, "learning_rate": 2e-05, "loss": 0.03360122, "step": 8677 }, { "epoch": 17.356, "grad_norm": 1.5745559930801392, "learning_rate": 2e-05, "loss": 0.05366817, "step": 8678 }, { "epoch": 17.358, "grad_norm": 1.227429986000061, "learning_rate": 2e-05, "loss": 0.04186604, "step": 8679 }, { "epoch": 17.36, "grad_norm": 1.5559841394424438, "learning_rate": 2e-05, "loss": 0.0397791, "step": 8680 }, { "epoch": 17.362, "grad_norm": 1.0777933597564697, "learning_rate": 2e-05, "loss": 0.02661498, "step": 8681 }, { "epoch": 17.364, "grad_norm": 1.2830750942230225, "learning_rate": 2e-05, "loss": 0.03172344, "step": 8682 }, { "epoch": 17.366, "grad_norm": 0.9228249788284302, "learning_rate": 2e-05, "loss": 0.03080669, "step": 8683 }, { "epoch": 17.368, "grad_norm": 1.545691967010498, "learning_rate": 2e-05, "loss": 0.04729048, "step": 8684 }, { "epoch": 17.37, "grad_norm": 2.842621326446533, "learning_rate": 2e-05, "loss": 0.03637499, "step": 8685 }, { "epoch": 17.372, "grad_norm": 2.176117181777954, "learning_rate": 2e-05, "loss": 0.04287196, "step": 8686 }, { "epoch": 17.374, "grad_norm": 2.0805728435516357, "learning_rate": 2e-05, "loss": 0.03709119, "step": 8687 }, { "epoch": 17.376, "grad_norm": 1.0376954078674316, "learning_rate": 2e-05, "loss": 0.02746601, "step": 8688 }, { "epoch": 17.378, "grad_norm": 4.563030242919922, "learning_rate": 2e-05, "loss": 0.0574781, "step": 8689 }, { "epoch": 17.38, "grad_norm": 1.4875022172927856, "learning_rate": 2e-05, "loss": 0.0384874, "step": 8690 }, { "epoch": 17.382, "grad_norm": 1.295417070388794, "learning_rate": 2e-05, "loss": 0.04185217, "step": 8691 }, { "epoch": 17.384, "grad_norm": 1.428702712059021, "learning_rate": 2e-05, "loss": 0.04110858, "step": 8692 }, { "epoch": 17.386, "grad_norm": 1.7342407703399658, "learning_rate": 2e-05, "loss": 0.04000304, "step": 8693 }, { "epoch": 17.388, "grad_norm": 1.2365176677703857, "learning_rate": 2e-05, "loss": 0.0342959, "step": 8694 }, { "epoch": 17.39, "grad_norm": 2.3056838512420654, "learning_rate": 2e-05, "loss": 0.04070599, "step": 8695 }, { "epoch": 17.392, "grad_norm": 1.948683261871338, "learning_rate": 2e-05, "loss": 0.03871055, "step": 8696 }, { "epoch": 17.394, "grad_norm": 3.8803670406341553, "learning_rate": 2e-05, "loss": 0.03387075, "step": 8697 }, { "epoch": 17.396, "grad_norm": 1.0368173122406006, "learning_rate": 2e-05, "loss": 0.03241309, "step": 8698 }, { "epoch": 17.398, "grad_norm": 3.3204972743988037, "learning_rate": 2e-05, "loss": 0.06573039, "step": 8699 }, { "epoch": 17.4, "grad_norm": 1.3388797044754028, "learning_rate": 2e-05, "loss": 0.03789306, "step": 8700 }, { "epoch": 17.402, "grad_norm": 2.9801526069641113, "learning_rate": 2e-05, "loss": 0.03678794, "step": 8701 }, { "epoch": 17.404, "grad_norm": 1.5335742235183716, "learning_rate": 2e-05, "loss": 0.0323316, "step": 8702 }, { "epoch": 17.406, "grad_norm": 1.2214679718017578, "learning_rate": 2e-05, "loss": 0.02849279, "step": 8703 }, { "epoch": 17.408, "grad_norm": 1.08639395236969, "learning_rate": 2e-05, "loss": 0.04005399, "step": 8704 }, { "epoch": 17.41, "grad_norm": 1.7272155284881592, "learning_rate": 2e-05, "loss": 0.04677524, "step": 8705 }, { "epoch": 17.412, "grad_norm": 1.4193323850631714, "learning_rate": 2e-05, "loss": 0.04003004, "step": 8706 }, { "epoch": 17.414, "grad_norm": 1.1759463548660278, "learning_rate": 2e-05, "loss": 0.0381237, "step": 8707 }, { "epoch": 17.416, "grad_norm": 2.2209203243255615, "learning_rate": 2e-05, "loss": 0.04388089, "step": 8708 }, { "epoch": 17.418, "grad_norm": 1.5851181745529175, "learning_rate": 2e-05, "loss": 0.02392046, "step": 8709 }, { "epoch": 17.42, "grad_norm": 0.9647265076637268, "learning_rate": 2e-05, "loss": 0.02776775, "step": 8710 }, { "epoch": 17.422, "grad_norm": 1.6217008829116821, "learning_rate": 2e-05, "loss": 0.04731276, "step": 8711 }, { "epoch": 17.424, "grad_norm": 0.8883227109909058, "learning_rate": 2e-05, "loss": 0.02925518, "step": 8712 }, { "epoch": 17.426, "grad_norm": 1.7081272602081299, "learning_rate": 2e-05, "loss": 0.03178982, "step": 8713 }, { "epoch": 17.428, "grad_norm": 1.0577318668365479, "learning_rate": 2e-05, "loss": 0.02738732, "step": 8714 }, { "epoch": 17.43, "grad_norm": 1.5216280221939087, "learning_rate": 2e-05, "loss": 0.0358897, "step": 8715 }, { "epoch": 17.432, "grad_norm": 1.733811855316162, "learning_rate": 2e-05, "loss": 0.05327031, "step": 8716 }, { "epoch": 17.434, "grad_norm": 1.0504800081253052, "learning_rate": 2e-05, "loss": 0.02647152, "step": 8717 }, { "epoch": 17.436, "grad_norm": 1.4641132354736328, "learning_rate": 2e-05, "loss": 0.03930076, "step": 8718 }, { "epoch": 17.438, "grad_norm": 0.9525814652442932, "learning_rate": 2e-05, "loss": 0.03154223, "step": 8719 }, { "epoch": 17.44, "grad_norm": 1.2682076692581177, "learning_rate": 2e-05, "loss": 0.04108154, "step": 8720 }, { "epoch": 17.442, "grad_norm": 0.9335964918136597, "learning_rate": 2e-05, "loss": 0.0300378, "step": 8721 }, { "epoch": 17.444, "grad_norm": 2.1530227661132812, "learning_rate": 2e-05, "loss": 0.0590164, "step": 8722 }, { "epoch": 17.446, "grad_norm": 1.495179295539856, "learning_rate": 2e-05, "loss": 0.04352273, "step": 8723 }, { "epoch": 17.448, "grad_norm": 1.4746037721633911, "learning_rate": 2e-05, "loss": 0.04175236, "step": 8724 }, { "epoch": 17.45, "grad_norm": 1.263691782951355, "learning_rate": 2e-05, "loss": 0.03465231, "step": 8725 }, { "epoch": 17.452, "grad_norm": 1.277445912361145, "learning_rate": 2e-05, "loss": 0.0429166, "step": 8726 }, { "epoch": 17.454, "grad_norm": 0.9855414628982544, "learning_rate": 2e-05, "loss": 0.02733266, "step": 8727 }, { "epoch": 17.456, "grad_norm": 1.5489836931228638, "learning_rate": 2e-05, "loss": 0.04620903, "step": 8728 }, { "epoch": 17.458, "grad_norm": 1.0529900789260864, "learning_rate": 2e-05, "loss": 0.03364294, "step": 8729 }, { "epoch": 17.46, "grad_norm": 1.1789546012878418, "learning_rate": 2e-05, "loss": 0.03275872, "step": 8730 }, { "epoch": 17.462, "grad_norm": 0.9303401112556458, "learning_rate": 2e-05, "loss": 0.0350357, "step": 8731 }, { "epoch": 17.464, "grad_norm": 0.8787309527397156, "learning_rate": 2e-05, "loss": 0.02384308, "step": 8732 }, { "epoch": 17.466, "grad_norm": 1.6333367824554443, "learning_rate": 2e-05, "loss": 0.03150726, "step": 8733 }, { "epoch": 17.468, "grad_norm": 1.1511269807815552, "learning_rate": 2e-05, "loss": 0.03846743, "step": 8734 }, { "epoch": 17.47, "grad_norm": 1.0783816576004028, "learning_rate": 2e-05, "loss": 0.02969732, "step": 8735 }, { "epoch": 17.472, "grad_norm": 1.126899003982544, "learning_rate": 2e-05, "loss": 0.03826762, "step": 8736 }, { "epoch": 17.474, "grad_norm": 0.972001314163208, "learning_rate": 2e-05, "loss": 0.0226456, "step": 8737 }, { "epoch": 17.476, "grad_norm": 2.0911567211151123, "learning_rate": 2e-05, "loss": 0.04673334, "step": 8738 }, { "epoch": 17.478, "grad_norm": 1.2967848777770996, "learning_rate": 2e-05, "loss": 0.03325557, "step": 8739 }, { "epoch": 17.48, "grad_norm": 1.4275823831558228, "learning_rate": 2e-05, "loss": 0.03418783, "step": 8740 }, { "epoch": 17.482, "grad_norm": 1.6219322681427002, "learning_rate": 2e-05, "loss": 0.03391339, "step": 8741 }, { "epoch": 17.484, "grad_norm": 2.5044708251953125, "learning_rate": 2e-05, "loss": 0.04077835, "step": 8742 }, { "epoch": 17.486, "grad_norm": 1.286624789237976, "learning_rate": 2e-05, "loss": 0.03713045, "step": 8743 }, { "epoch": 17.488, "grad_norm": 1.0048493146896362, "learning_rate": 2e-05, "loss": 0.02878037, "step": 8744 }, { "epoch": 17.49, "grad_norm": 1.4934651851654053, "learning_rate": 2e-05, "loss": 0.03150274, "step": 8745 }, { "epoch": 17.492, "grad_norm": 1.1032167673110962, "learning_rate": 2e-05, "loss": 0.03681361, "step": 8746 }, { "epoch": 17.494, "grad_norm": 1.4902517795562744, "learning_rate": 2e-05, "loss": 0.04430451, "step": 8747 }, { "epoch": 17.496, "grad_norm": 2.1936278343200684, "learning_rate": 2e-05, "loss": 0.05962869, "step": 8748 }, { "epoch": 17.498, "grad_norm": 1.479569673538208, "learning_rate": 2e-05, "loss": 0.04059211, "step": 8749 }, { "epoch": 17.5, "grad_norm": 1.139105200767517, "learning_rate": 2e-05, "loss": 0.03791492, "step": 8750 }, { "epoch": 17.502, "grad_norm": 1.4118876457214355, "learning_rate": 2e-05, "loss": 0.03690355, "step": 8751 }, { "epoch": 17.504, "grad_norm": 1.1906758546829224, "learning_rate": 2e-05, "loss": 0.03069144, "step": 8752 }, { "epoch": 17.506, "grad_norm": 1.3194007873535156, "learning_rate": 2e-05, "loss": 0.04116385, "step": 8753 }, { "epoch": 17.508, "grad_norm": 2.3798933029174805, "learning_rate": 2e-05, "loss": 0.03834019, "step": 8754 }, { "epoch": 17.51, "grad_norm": 1.7592543363571167, "learning_rate": 2e-05, "loss": 0.05264074, "step": 8755 }, { "epoch": 17.512, "grad_norm": 1.1170209646224976, "learning_rate": 2e-05, "loss": 0.03822346, "step": 8756 }, { "epoch": 17.514, "grad_norm": 1.0373886823654175, "learning_rate": 2e-05, "loss": 0.03700188, "step": 8757 }, { "epoch": 17.516, "grad_norm": 1.619546890258789, "learning_rate": 2e-05, "loss": 0.03848478, "step": 8758 }, { "epoch": 17.518, "grad_norm": 0.9552158117294312, "learning_rate": 2e-05, "loss": 0.0324291, "step": 8759 }, { "epoch": 17.52, "grad_norm": 1.3211631774902344, "learning_rate": 2e-05, "loss": 0.04355022, "step": 8760 }, { "epoch": 17.522, "grad_norm": 1.0997872352600098, "learning_rate": 2e-05, "loss": 0.03464405, "step": 8761 }, { "epoch": 17.524, "grad_norm": 1.546210527420044, "learning_rate": 2e-05, "loss": 0.03780903, "step": 8762 }, { "epoch": 17.526, "grad_norm": 1.3221256732940674, "learning_rate": 2e-05, "loss": 0.03689417, "step": 8763 }, { "epoch": 17.528, "grad_norm": 1.8055446147918701, "learning_rate": 2e-05, "loss": 0.04009835, "step": 8764 }, { "epoch": 17.53, "grad_norm": 1.3256739377975464, "learning_rate": 2e-05, "loss": 0.02718241, "step": 8765 }, { "epoch": 17.532, "grad_norm": 1.0728418827056885, "learning_rate": 2e-05, "loss": 0.02585372, "step": 8766 }, { "epoch": 17.534, "grad_norm": 2.8010523319244385, "learning_rate": 2e-05, "loss": 0.03689571, "step": 8767 }, { "epoch": 17.536, "grad_norm": 1.8129520416259766, "learning_rate": 2e-05, "loss": 0.04276496, "step": 8768 }, { "epoch": 17.538, "grad_norm": 1.5692745447158813, "learning_rate": 2e-05, "loss": 0.03856734, "step": 8769 }, { "epoch": 17.54, "grad_norm": 1.0610696077346802, "learning_rate": 2e-05, "loss": 0.02780731, "step": 8770 }, { "epoch": 17.542, "grad_norm": 2.569103479385376, "learning_rate": 2e-05, "loss": 0.04206748, "step": 8771 }, { "epoch": 17.544, "grad_norm": 2.11444354057312, "learning_rate": 2e-05, "loss": 0.03847713, "step": 8772 }, { "epoch": 17.546, "grad_norm": 1.2605483531951904, "learning_rate": 2e-05, "loss": 0.03328384, "step": 8773 }, { "epoch": 17.548000000000002, "grad_norm": 2.3000173568725586, "learning_rate": 2e-05, "loss": 0.0356028, "step": 8774 }, { "epoch": 17.55, "grad_norm": 1.8785638809204102, "learning_rate": 2e-05, "loss": 0.04467007, "step": 8775 }, { "epoch": 17.552, "grad_norm": 1.260977864265442, "learning_rate": 2e-05, "loss": 0.02562046, "step": 8776 }, { "epoch": 17.554, "grad_norm": 9.660441398620605, "learning_rate": 2e-05, "loss": 0.04144814, "step": 8777 }, { "epoch": 17.556, "grad_norm": 1.8776023387908936, "learning_rate": 2e-05, "loss": 0.04732294, "step": 8778 }, { "epoch": 17.558, "grad_norm": 1.1368457078933716, "learning_rate": 2e-05, "loss": 0.04169676, "step": 8779 }, { "epoch": 17.56, "grad_norm": 1.5295716524124146, "learning_rate": 2e-05, "loss": 0.05010715, "step": 8780 }, { "epoch": 17.562, "grad_norm": 0.9344932436943054, "learning_rate": 2e-05, "loss": 0.02512026, "step": 8781 }, { "epoch": 17.564, "grad_norm": 1.0945249795913696, "learning_rate": 2e-05, "loss": 0.03761927, "step": 8782 }, { "epoch": 17.566, "grad_norm": 1.4906353950500488, "learning_rate": 2e-05, "loss": 0.04850148, "step": 8783 }, { "epoch": 17.568, "grad_norm": 1.0239425897598267, "learning_rate": 2e-05, "loss": 0.0349384, "step": 8784 }, { "epoch": 17.57, "grad_norm": 1.3439842462539673, "learning_rate": 2e-05, "loss": 0.02740002, "step": 8785 }, { "epoch": 17.572, "grad_norm": 1.8573307991027832, "learning_rate": 2e-05, "loss": 0.03505997, "step": 8786 }, { "epoch": 17.574, "grad_norm": 1.1892509460449219, "learning_rate": 2e-05, "loss": 0.03444887, "step": 8787 }, { "epoch": 17.576, "grad_norm": 1.6804590225219727, "learning_rate": 2e-05, "loss": 0.03725114, "step": 8788 }, { "epoch": 17.578, "grad_norm": 1.5094798803329468, "learning_rate": 2e-05, "loss": 0.03691319, "step": 8789 }, { "epoch": 17.58, "grad_norm": 1.433280110359192, "learning_rate": 2e-05, "loss": 0.03601858, "step": 8790 }, { "epoch": 17.582, "grad_norm": 0.9142979383468628, "learning_rate": 2e-05, "loss": 0.03045445, "step": 8791 }, { "epoch": 17.584, "grad_norm": 0.9239221811294556, "learning_rate": 2e-05, "loss": 0.02360635, "step": 8792 }, { "epoch": 17.586, "grad_norm": 2.742828369140625, "learning_rate": 2e-05, "loss": 0.04096455, "step": 8793 }, { "epoch": 17.588, "grad_norm": 1.469237208366394, "learning_rate": 2e-05, "loss": 0.03894402, "step": 8794 }, { "epoch": 17.59, "grad_norm": 1.224602222442627, "learning_rate": 2e-05, "loss": 0.03480183, "step": 8795 }, { "epoch": 17.592, "grad_norm": 2.1132779121398926, "learning_rate": 2e-05, "loss": 0.04112736, "step": 8796 }, { "epoch": 17.594, "grad_norm": 1.4963428974151611, "learning_rate": 2e-05, "loss": 0.03678083, "step": 8797 }, { "epoch": 17.596, "grad_norm": 1.1321431398391724, "learning_rate": 2e-05, "loss": 0.02224684, "step": 8798 }, { "epoch": 17.598, "grad_norm": 1.7322660684585571, "learning_rate": 2e-05, "loss": 0.04057566, "step": 8799 }, { "epoch": 17.6, "grad_norm": 1.437656283378601, "learning_rate": 2e-05, "loss": 0.03213751, "step": 8800 }, { "epoch": 17.602, "grad_norm": 1.932984471321106, "learning_rate": 2e-05, "loss": 0.04008548, "step": 8801 }, { "epoch": 17.604, "grad_norm": 1.0793958902359009, "learning_rate": 2e-05, "loss": 0.02873497, "step": 8802 }, { "epoch": 17.606, "grad_norm": 1.9617685079574585, "learning_rate": 2e-05, "loss": 0.04457264, "step": 8803 }, { "epoch": 17.608, "grad_norm": 1.7142366170883179, "learning_rate": 2e-05, "loss": 0.03925028, "step": 8804 }, { "epoch": 17.61, "grad_norm": 1.638058066368103, "learning_rate": 2e-05, "loss": 0.04914237, "step": 8805 }, { "epoch": 17.612, "grad_norm": 1.3433510065078735, "learning_rate": 2e-05, "loss": 0.03332677, "step": 8806 }, { "epoch": 17.614, "grad_norm": 0.9329344034194946, "learning_rate": 2e-05, "loss": 0.03115493, "step": 8807 }, { "epoch": 17.616, "grad_norm": 0.9696981310844421, "learning_rate": 2e-05, "loss": 0.0317462, "step": 8808 }, { "epoch": 17.618, "grad_norm": 2.5762224197387695, "learning_rate": 2e-05, "loss": 0.04256003, "step": 8809 }, { "epoch": 17.62, "grad_norm": 1.8832440376281738, "learning_rate": 2e-05, "loss": 0.03043187, "step": 8810 }, { "epoch": 17.622, "grad_norm": 2.1648950576782227, "learning_rate": 2e-05, "loss": 0.04386858, "step": 8811 }, { "epoch": 17.624, "grad_norm": 1.578770637512207, "learning_rate": 2e-05, "loss": 0.03511237, "step": 8812 }, { "epoch": 17.626, "grad_norm": 1.0861693620681763, "learning_rate": 2e-05, "loss": 0.04000392, "step": 8813 }, { "epoch": 17.628, "grad_norm": 1.1005021333694458, "learning_rate": 2e-05, "loss": 0.03653765, "step": 8814 }, { "epoch": 17.63, "grad_norm": 1.5705111026763916, "learning_rate": 2e-05, "loss": 0.04100737, "step": 8815 }, { "epoch": 17.632, "grad_norm": 0.9939731359481812, "learning_rate": 2e-05, "loss": 0.02468416, "step": 8816 }, { "epoch": 17.634, "grad_norm": 1.6492228507995605, "learning_rate": 2e-05, "loss": 0.03122869, "step": 8817 }, { "epoch": 17.636, "grad_norm": 1.1242082118988037, "learning_rate": 2e-05, "loss": 0.03778775, "step": 8818 }, { "epoch": 17.638, "grad_norm": 1.0076687335968018, "learning_rate": 2e-05, "loss": 0.03823631, "step": 8819 }, { "epoch": 17.64, "grad_norm": 1.2714289426803589, "learning_rate": 2e-05, "loss": 0.03733125, "step": 8820 }, { "epoch": 17.642, "grad_norm": 1.4574763774871826, "learning_rate": 2e-05, "loss": 0.04100376, "step": 8821 }, { "epoch": 17.644, "grad_norm": 1.4745090007781982, "learning_rate": 2e-05, "loss": 0.04590669, "step": 8822 }, { "epoch": 17.646, "grad_norm": 1.2290929555892944, "learning_rate": 2e-05, "loss": 0.02955338, "step": 8823 }, { "epoch": 17.648, "grad_norm": 1.5989232063293457, "learning_rate": 2e-05, "loss": 0.04057942, "step": 8824 }, { "epoch": 17.65, "grad_norm": 1.0911918878555298, "learning_rate": 2e-05, "loss": 0.0314585, "step": 8825 }, { "epoch": 17.652, "grad_norm": 1.1875574588775635, "learning_rate": 2e-05, "loss": 0.02796382, "step": 8826 }, { "epoch": 17.654, "grad_norm": 0.8497743010520935, "learning_rate": 2e-05, "loss": 0.02476349, "step": 8827 }, { "epoch": 17.656, "grad_norm": 1.1606796979904175, "learning_rate": 2e-05, "loss": 0.03331729, "step": 8828 }, { "epoch": 17.658, "grad_norm": 1.929567575454712, "learning_rate": 2e-05, "loss": 0.04759387, "step": 8829 }, { "epoch": 17.66, "grad_norm": 1.10890531539917, "learning_rate": 2e-05, "loss": 0.03875232, "step": 8830 }, { "epoch": 17.662, "grad_norm": 1.1237422227859497, "learning_rate": 2e-05, "loss": 0.02639496, "step": 8831 }, { "epoch": 17.664, "grad_norm": 1.3612616062164307, "learning_rate": 2e-05, "loss": 0.04055498, "step": 8832 }, { "epoch": 17.666, "grad_norm": 1.0668309926986694, "learning_rate": 2e-05, "loss": 0.02782569, "step": 8833 }, { "epoch": 17.668, "grad_norm": 1.186408519744873, "learning_rate": 2e-05, "loss": 0.03598, "step": 8834 }, { "epoch": 17.67, "grad_norm": 2.71864914894104, "learning_rate": 2e-05, "loss": 0.05295214, "step": 8835 }, { "epoch": 17.672, "grad_norm": 2.222414016723633, "learning_rate": 2e-05, "loss": 0.04588395, "step": 8836 }, { "epoch": 17.674, "grad_norm": 1.272736668586731, "learning_rate": 2e-05, "loss": 0.03839596, "step": 8837 }, { "epoch": 17.676, "grad_norm": 1.5483832359313965, "learning_rate": 2e-05, "loss": 0.0436643, "step": 8838 }, { "epoch": 17.678, "grad_norm": 3.551170587539673, "learning_rate": 2e-05, "loss": 0.03059383, "step": 8839 }, { "epoch": 17.68, "grad_norm": 1.3416359424591064, "learning_rate": 2e-05, "loss": 0.03237988, "step": 8840 }, { "epoch": 17.682, "grad_norm": 2.128422975540161, "learning_rate": 2e-05, "loss": 0.04019484, "step": 8841 }, { "epoch": 17.684, "grad_norm": 1.4956728219985962, "learning_rate": 2e-05, "loss": 0.03178658, "step": 8842 }, { "epoch": 17.686, "grad_norm": 1.644662618637085, "learning_rate": 2e-05, "loss": 0.05027148, "step": 8843 }, { "epoch": 17.688, "grad_norm": 3.315823554992676, "learning_rate": 2e-05, "loss": 0.03754386, "step": 8844 }, { "epoch": 17.69, "grad_norm": 2.4120471477508545, "learning_rate": 2e-05, "loss": 0.03160707, "step": 8845 }, { "epoch": 17.692, "grad_norm": 3.103656053543091, "learning_rate": 2e-05, "loss": 0.04504135, "step": 8846 }, { "epoch": 17.694, "grad_norm": 1.1503428220748901, "learning_rate": 2e-05, "loss": 0.03259352, "step": 8847 }, { "epoch": 17.696, "grad_norm": 1.2756330966949463, "learning_rate": 2e-05, "loss": 0.03456133, "step": 8848 }, { "epoch": 17.698, "grad_norm": 1.1104017496109009, "learning_rate": 2e-05, "loss": 0.02761788, "step": 8849 }, { "epoch": 17.7, "grad_norm": 0.9424893856048584, "learning_rate": 2e-05, "loss": 0.02640409, "step": 8850 }, { "epoch": 17.701999999999998, "grad_norm": 1.0416557788848877, "learning_rate": 2e-05, "loss": 0.0349526, "step": 8851 }, { "epoch": 17.704, "grad_norm": 1.6049892902374268, "learning_rate": 2e-05, "loss": 0.02749394, "step": 8852 }, { "epoch": 17.706, "grad_norm": 1.6006180047988892, "learning_rate": 2e-05, "loss": 0.03950756, "step": 8853 }, { "epoch": 17.708, "grad_norm": 1.0300740003585815, "learning_rate": 2e-05, "loss": 0.03133941, "step": 8854 }, { "epoch": 17.71, "grad_norm": 3.491128444671631, "learning_rate": 2e-05, "loss": 0.04869714, "step": 8855 }, { "epoch": 17.712, "grad_norm": 1.389901041984558, "learning_rate": 2e-05, "loss": 0.04399059, "step": 8856 }, { "epoch": 17.714, "grad_norm": 1.09212327003479, "learning_rate": 2e-05, "loss": 0.03761019, "step": 8857 }, { "epoch": 17.716, "grad_norm": 1.3363914489746094, "learning_rate": 2e-05, "loss": 0.0412604, "step": 8858 }, { "epoch": 17.718, "grad_norm": 1.353605031967163, "learning_rate": 2e-05, "loss": 0.04291609, "step": 8859 }, { "epoch": 17.72, "grad_norm": 1.0849765539169312, "learning_rate": 2e-05, "loss": 0.03004822, "step": 8860 }, { "epoch": 17.722, "grad_norm": 1.2996422052383423, "learning_rate": 2e-05, "loss": 0.04499619, "step": 8861 }, { "epoch": 17.724, "grad_norm": 1.1636970043182373, "learning_rate": 2e-05, "loss": 0.03615176, "step": 8862 }, { "epoch": 17.726, "grad_norm": 0.9753794074058533, "learning_rate": 2e-05, "loss": 0.03914835, "step": 8863 }, { "epoch": 17.728, "grad_norm": 1.3837984800338745, "learning_rate": 2e-05, "loss": 0.05555439, "step": 8864 }, { "epoch": 17.73, "grad_norm": 1.9436976909637451, "learning_rate": 2e-05, "loss": 0.05068126, "step": 8865 }, { "epoch": 17.732, "grad_norm": 1.1782814264297485, "learning_rate": 2e-05, "loss": 0.03269964, "step": 8866 }, { "epoch": 17.734, "grad_norm": 1.342545747756958, "learning_rate": 2e-05, "loss": 0.051434, "step": 8867 }, { "epoch": 17.736, "grad_norm": 1.1777751445770264, "learning_rate": 2e-05, "loss": 0.03173701, "step": 8868 }, { "epoch": 17.738, "grad_norm": 1.666817307472229, "learning_rate": 2e-05, "loss": 0.03970077, "step": 8869 }, { "epoch": 17.74, "grad_norm": 1.2977221012115479, "learning_rate": 2e-05, "loss": 0.04394735, "step": 8870 }, { "epoch": 17.742, "grad_norm": 1.076141357421875, "learning_rate": 2e-05, "loss": 0.03446401, "step": 8871 }, { "epoch": 17.744, "grad_norm": 1.037126898765564, "learning_rate": 2e-05, "loss": 0.03374257, "step": 8872 }, { "epoch": 17.746, "grad_norm": 1.9888211488723755, "learning_rate": 2e-05, "loss": 0.05602897, "step": 8873 }, { "epoch": 17.748, "grad_norm": 1.2631540298461914, "learning_rate": 2e-05, "loss": 0.03305461, "step": 8874 }, { "epoch": 17.75, "grad_norm": 1.383660078048706, "learning_rate": 2e-05, "loss": 0.03023957, "step": 8875 }, { "epoch": 17.752, "grad_norm": 1.6274633407592773, "learning_rate": 2e-05, "loss": 0.02720205, "step": 8876 }, { "epoch": 17.754, "grad_norm": 2.328664541244507, "learning_rate": 2e-05, "loss": 0.04531138, "step": 8877 }, { "epoch": 17.756, "grad_norm": 1.0562291145324707, "learning_rate": 2e-05, "loss": 0.02581457, "step": 8878 }, { "epoch": 17.758, "grad_norm": 1.2635678052902222, "learning_rate": 2e-05, "loss": 0.03207887, "step": 8879 }, { "epoch": 17.76, "grad_norm": 1.1033519506454468, "learning_rate": 2e-05, "loss": 0.03914376, "step": 8880 }, { "epoch": 17.762, "grad_norm": 2.1870508193969727, "learning_rate": 2e-05, "loss": 0.04750444, "step": 8881 }, { "epoch": 17.764, "grad_norm": 1.3173456192016602, "learning_rate": 2e-05, "loss": 0.04112824, "step": 8882 }, { "epoch": 17.766, "grad_norm": 1.5373280048370361, "learning_rate": 2e-05, "loss": 0.03573046, "step": 8883 }, { "epoch": 17.768, "grad_norm": 1.249330759048462, "learning_rate": 2e-05, "loss": 0.03755806, "step": 8884 }, { "epoch": 17.77, "grad_norm": 1.2358646392822266, "learning_rate": 2e-05, "loss": 0.02392829, "step": 8885 }, { "epoch": 17.772, "grad_norm": 0.6719095706939697, "learning_rate": 2e-05, "loss": 0.01649454, "step": 8886 }, { "epoch": 17.774, "grad_norm": 1.3092347383499146, "learning_rate": 2e-05, "loss": 0.04142372, "step": 8887 }, { "epoch": 17.776, "grad_norm": 1.4076433181762695, "learning_rate": 2e-05, "loss": 0.03731369, "step": 8888 }, { "epoch": 17.778, "grad_norm": 1.1862058639526367, "learning_rate": 2e-05, "loss": 0.02469937, "step": 8889 }, { "epoch": 17.78, "grad_norm": 1.0884705781936646, "learning_rate": 2e-05, "loss": 0.03976347, "step": 8890 }, { "epoch": 17.782, "grad_norm": 1.3288317918777466, "learning_rate": 2e-05, "loss": 0.03909069, "step": 8891 }, { "epoch": 17.784, "grad_norm": 1.437835693359375, "learning_rate": 2e-05, "loss": 0.04604793, "step": 8892 }, { "epoch": 17.786, "grad_norm": 1.2429791688919067, "learning_rate": 2e-05, "loss": 0.03789811, "step": 8893 }, { "epoch": 17.788, "grad_norm": 0.8929753303527832, "learning_rate": 2e-05, "loss": 0.02409481, "step": 8894 }, { "epoch": 17.79, "grad_norm": 1.292466163635254, "learning_rate": 2e-05, "loss": 0.04581561, "step": 8895 }, { "epoch": 17.792, "grad_norm": 1.1110408306121826, "learning_rate": 2e-05, "loss": 0.03448091, "step": 8896 }, { "epoch": 17.794, "grad_norm": 1.2385011911392212, "learning_rate": 2e-05, "loss": 0.03106187, "step": 8897 }, { "epoch": 17.796, "grad_norm": 2.670729637145996, "learning_rate": 2e-05, "loss": 0.04714116, "step": 8898 }, { "epoch": 17.798000000000002, "grad_norm": 0.9738274216651917, "learning_rate": 2e-05, "loss": 0.02809829, "step": 8899 }, { "epoch": 17.8, "grad_norm": 1.753575325012207, "learning_rate": 2e-05, "loss": 0.04472911, "step": 8900 }, { "epoch": 17.802, "grad_norm": 1.6126110553741455, "learning_rate": 2e-05, "loss": 0.0548849, "step": 8901 }, { "epoch": 17.804, "grad_norm": 1.9280973672866821, "learning_rate": 2e-05, "loss": 0.0414143, "step": 8902 }, { "epoch": 17.806, "grad_norm": 0.8208984136581421, "learning_rate": 2e-05, "loss": 0.01933501, "step": 8903 }, { "epoch": 17.808, "grad_norm": 2.380815029144287, "learning_rate": 2e-05, "loss": 0.03992394, "step": 8904 }, { "epoch": 17.81, "grad_norm": 2.3603808879852295, "learning_rate": 2e-05, "loss": 0.06256126, "step": 8905 }, { "epoch": 17.812, "grad_norm": 1.170880675315857, "learning_rate": 2e-05, "loss": 0.0337522, "step": 8906 }, { "epoch": 17.814, "grad_norm": 1.7238221168518066, "learning_rate": 2e-05, "loss": 0.0442452, "step": 8907 }, { "epoch": 17.816, "grad_norm": 1.3815233707427979, "learning_rate": 2e-05, "loss": 0.03570531, "step": 8908 }, { "epoch": 17.818, "grad_norm": 1.077028751373291, "learning_rate": 2e-05, "loss": 0.02792567, "step": 8909 }, { "epoch": 17.82, "grad_norm": 1.8009798526763916, "learning_rate": 2e-05, "loss": 0.03502089, "step": 8910 }, { "epoch": 17.822, "grad_norm": 1.4753614664077759, "learning_rate": 2e-05, "loss": 0.04613937, "step": 8911 }, { "epoch": 17.824, "grad_norm": 1.8474282026290894, "learning_rate": 2e-05, "loss": 0.05605477, "step": 8912 }, { "epoch": 17.826, "grad_norm": 1.3313781023025513, "learning_rate": 2e-05, "loss": 0.04386368, "step": 8913 }, { "epoch": 17.828, "grad_norm": 1.3753377199172974, "learning_rate": 2e-05, "loss": 0.0385115, "step": 8914 }, { "epoch": 17.83, "grad_norm": 0.8378182053565979, "learning_rate": 2e-05, "loss": 0.02854342, "step": 8915 }, { "epoch": 17.832, "grad_norm": 1.0748872756958008, "learning_rate": 2e-05, "loss": 0.02578593, "step": 8916 }, { "epoch": 17.834, "grad_norm": 0.881872832775116, "learning_rate": 2e-05, "loss": 0.02822345, "step": 8917 }, { "epoch": 17.836, "grad_norm": 2.561361074447632, "learning_rate": 2e-05, "loss": 0.04112708, "step": 8918 }, { "epoch": 17.838, "grad_norm": 1.0299837589263916, "learning_rate": 2e-05, "loss": 0.030092, "step": 8919 }, { "epoch": 17.84, "grad_norm": 1.040921688079834, "learning_rate": 2e-05, "loss": 0.02578293, "step": 8920 }, { "epoch": 17.842, "grad_norm": 1.1591360569000244, "learning_rate": 2e-05, "loss": 0.04461095, "step": 8921 }, { "epoch": 17.844, "grad_norm": 0.929568886756897, "learning_rate": 2e-05, "loss": 0.03212486, "step": 8922 }, { "epoch": 17.846, "grad_norm": 1.0860724449157715, "learning_rate": 2e-05, "loss": 0.02192564, "step": 8923 }, { "epoch": 17.848, "grad_norm": 1.2266968488693237, "learning_rate": 2e-05, "loss": 0.0363798, "step": 8924 }, { "epoch": 17.85, "grad_norm": 1.622202754020691, "learning_rate": 2e-05, "loss": 0.03073082, "step": 8925 }, { "epoch": 17.852, "grad_norm": 1.0483282804489136, "learning_rate": 2e-05, "loss": 0.03715392, "step": 8926 }, { "epoch": 17.854, "grad_norm": 1.1092106103897095, "learning_rate": 2e-05, "loss": 0.03876258, "step": 8927 }, { "epoch": 17.856, "grad_norm": 0.9673099517822266, "learning_rate": 2e-05, "loss": 0.02583649, "step": 8928 }, { "epoch": 17.858, "grad_norm": 1.635252833366394, "learning_rate": 2e-05, "loss": 0.05052019, "step": 8929 }, { "epoch": 17.86, "grad_norm": 1.153358817100525, "learning_rate": 2e-05, "loss": 0.03466346, "step": 8930 }, { "epoch": 17.862, "grad_norm": 2.039275884628296, "learning_rate": 2e-05, "loss": 0.05241006, "step": 8931 }, { "epoch": 17.864, "grad_norm": 1.3057268857955933, "learning_rate": 2e-05, "loss": 0.04647853, "step": 8932 }, { "epoch": 17.866, "grad_norm": 1.776149868965149, "learning_rate": 2e-05, "loss": 0.03174097, "step": 8933 }, { "epoch": 17.868, "grad_norm": 1.7656913995742798, "learning_rate": 2e-05, "loss": 0.03435258, "step": 8934 }, { "epoch": 17.87, "grad_norm": 1.3331656455993652, "learning_rate": 2e-05, "loss": 0.04026199, "step": 8935 }, { "epoch": 17.872, "grad_norm": 1.0311089754104614, "learning_rate": 2e-05, "loss": 0.02966706, "step": 8936 }, { "epoch": 17.874, "grad_norm": 1.0116089582443237, "learning_rate": 2e-05, "loss": 0.03720299, "step": 8937 }, { "epoch": 17.876, "grad_norm": 1.1523536443710327, "learning_rate": 2e-05, "loss": 0.0370546, "step": 8938 }, { "epoch": 17.878, "grad_norm": 1.1194231510162354, "learning_rate": 2e-05, "loss": 0.03367734, "step": 8939 }, { "epoch": 17.88, "grad_norm": 1.4645055532455444, "learning_rate": 2e-05, "loss": 0.04109664, "step": 8940 }, { "epoch": 17.882, "grad_norm": 1.3934130668640137, "learning_rate": 2e-05, "loss": 0.04116001, "step": 8941 }, { "epoch": 17.884, "grad_norm": 1.2432390451431274, "learning_rate": 2e-05, "loss": 0.0318905, "step": 8942 }, { "epoch": 17.886, "grad_norm": 1.9031294584274292, "learning_rate": 2e-05, "loss": 0.04899106, "step": 8943 }, { "epoch": 17.888, "grad_norm": 1.3559234142303467, "learning_rate": 2e-05, "loss": 0.04302301, "step": 8944 }, { "epoch": 17.89, "grad_norm": 1.1823681592941284, "learning_rate": 2e-05, "loss": 0.03750154, "step": 8945 }, { "epoch": 17.892, "grad_norm": 2.3975961208343506, "learning_rate": 2e-05, "loss": 0.04103041, "step": 8946 }, { "epoch": 17.894, "grad_norm": 1.2826277017593384, "learning_rate": 2e-05, "loss": 0.03688804, "step": 8947 }, { "epoch": 17.896, "grad_norm": 2.822355270385742, "learning_rate": 2e-05, "loss": 0.05060627, "step": 8948 }, { "epoch": 17.898, "grad_norm": 1.0254136323928833, "learning_rate": 2e-05, "loss": 0.03011078, "step": 8949 }, { "epoch": 17.9, "grad_norm": 1.1992510557174683, "learning_rate": 2e-05, "loss": 0.04118895, "step": 8950 }, { "epoch": 17.902, "grad_norm": 1.697759747505188, "learning_rate": 2e-05, "loss": 0.04727785, "step": 8951 }, { "epoch": 17.904, "grad_norm": 1.5532244443893433, "learning_rate": 2e-05, "loss": 0.05363618, "step": 8952 }, { "epoch": 17.906, "grad_norm": 1.0093700885772705, "learning_rate": 2e-05, "loss": 0.03118528, "step": 8953 }, { "epoch": 17.908, "grad_norm": 1.895651936531067, "learning_rate": 2e-05, "loss": 0.06020194, "step": 8954 }, { "epoch": 17.91, "grad_norm": 1.0814530849456787, "learning_rate": 2e-05, "loss": 0.0355964, "step": 8955 }, { "epoch": 17.912, "grad_norm": 1.521867275238037, "learning_rate": 2e-05, "loss": 0.04056902, "step": 8956 }, { "epoch": 17.914, "grad_norm": 1.2673386335372925, "learning_rate": 2e-05, "loss": 0.03534425, "step": 8957 }, { "epoch": 17.916, "grad_norm": 2.1670548915863037, "learning_rate": 2e-05, "loss": 0.03837797, "step": 8958 }, { "epoch": 17.918, "grad_norm": 0.9772452116012573, "learning_rate": 2e-05, "loss": 0.02949987, "step": 8959 }, { "epoch": 17.92, "grad_norm": 1.940874695777893, "learning_rate": 2e-05, "loss": 0.06990305, "step": 8960 }, { "epoch": 17.922, "grad_norm": 2.148874044418335, "learning_rate": 2e-05, "loss": 0.03925563, "step": 8961 }, { "epoch": 17.924, "grad_norm": 1.2279850244522095, "learning_rate": 2e-05, "loss": 0.03267869, "step": 8962 }, { "epoch": 17.926, "grad_norm": 1.2713209390640259, "learning_rate": 2e-05, "loss": 0.03438141, "step": 8963 }, { "epoch": 17.928, "grad_norm": 1.0285924673080444, "learning_rate": 2e-05, "loss": 0.03503043, "step": 8964 }, { "epoch": 17.93, "grad_norm": 1.2333204746246338, "learning_rate": 2e-05, "loss": 0.03405837, "step": 8965 }, { "epoch": 17.932, "grad_norm": 2.2821717262268066, "learning_rate": 2e-05, "loss": 0.03725312, "step": 8966 }, { "epoch": 17.934, "grad_norm": 3.1810193061828613, "learning_rate": 2e-05, "loss": 0.0413457, "step": 8967 }, { "epoch": 17.936, "grad_norm": 1.8636484146118164, "learning_rate": 2e-05, "loss": 0.0562732, "step": 8968 }, { "epoch": 17.938, "grad_norm": 2.1253907680511475, "learning_rate": 2e-05, "loss": 0.04173985, "step": 8969 }, { "epoch": 17.94, "grad_norm": 1.4649451971054077, "learning_rate": 2e-05, "loss": 0.04346614, "step": 8970 }, { "epoch": 17.942, "grad_norm": 1.5256109237670898, "learning_rate": 2e-05, "loss": 0.05348255, "step": 8971 }, { "epoch": 17.944, "grad_norm": 1.0634779930114746, "learning_rate": 2e-05, "loss": 0.04019362, "step": 8972 }, { "epoch": 17.946, "grad_norm": 1.4219046831130981, "learning_rate": 2e-05, "loss": 0.03420949, "step": 8973 }, { "epoch": 17.948, "grad_norm": 1.1320511102676392, "learning_rate": 2e-05, "loss": 0.03867266, "step": 8974 }, { "epoch": 17.95, "grad_norm": 1.6765214204788208, "learning_rate": 2e-05, "loss": 0.04724558, "step": 8975 }, { "epoch": 17.951999999999998, "grad_norm": 1.8259295225143433, "learning_rate": 2e-05, "loss": 0.03461309, "step": 8976 }, { "epoch": 17.954, "grad_norm": 1.2042784690856934, "learning_rate": 2e-05, "loss": 0.04839892, "step": 8977 }, { "epoch": 17.956, "grad_norm": 1.5858327150344849, "learning_rate": 2e-05, "loss": 0.0500534, "step": 8978 }, { "epoch": 17.958, "grad_norm": 1.3547513484954834, "learning_rate": 2e-05, "loss": 0.02594247, "step": 8979 }, { "epoch": 17.96, "grad_norm": 1.0181814432144165, "learning_rate": 2e-05, "loss": 0.02916558, "step": 8980 }, { "epoch": 17.962, "grad_norm": 1.466145634651184, "learning_rate": 2e-05, "loss": 0.04672647, "step": 8981 }, { "epoch": 17.964, "grad_norm": 1.3053315877914429, "learning_rate": 2e-05, "loss": 0.03587544, "step": 8982 }, { "epoch": 17.966, "grad_norm": 1.0494365692138672, "learning_rate": 2e-05, "loss": 0.02380068, "step": 8983 }, { "epoch": 17.968, "grad_norm": 1.8024916648864746, "learning_rate": 2e-05, "loss": 0.03359849, "step": 8984 }, { "epoch": 17.97, "grad_norm": 0.9630800485610962, "learning_rate": 2e-05, "loss": 0.03356433, "step": 8985 }, { "epoch": 17.972, "grad_norm": 1.1515108346939087, "learning_rate": 2e-05, "loss": 0.03505271, "step": 8986 }, { "epoch": 17.974, "grad_norm": 1.3020286560058594, "learning_rate": 2e-05, "loss": 0.05472892, "step": 8987 }, { "epoch": 17.976, "grad_norm": 1.7581522464752197, "learning_rate": 2e-05, "loss": 0.0482644, "step": 8988 }, { "epoch": 17.978, "grad_norm": 1.5518864393234253, "learning_rate": 2e-05, "loss": 0.04105756, "step": 8989 }, { "epoch": 17.98, "grad_norm": 0.9530649781227112, "learning_rate": 2e-05, "loss": 0.02671498, "step": 8990 }, { "epoch": 17.982, "grad_norm": 1.1680177450180054, "learning_rate": 2e-05, "loss": 0.02665569, "step": 8991 }, { "epoch": 17.984, "grad_norm": 1.260867714881897, "learning_rate": 2e-05, "loss": 0.02883239, "step": 8992 }, { "epoch": 17.986, "grad_norm": 1.3985310792922974, "learning_rate": 2e-05, "loss": 0.03463531, "step": 8993 }, { "epoch": 17.988, "grad_norm": 1.3712165355682373, "learning_rate": 2e-05, "loss": 0.04792679, "step": 8994 }, { "epoch": 17.99, "grad_norm": 1.8040891885757446, "learning_rate": 2e-05, "loss": 0.04169717, "step": 8995 }, { "epoch": 17.992, "grad_norm": 2.077030897140503, "learning_rate": 2e-05, "loss": 0.04134944, "step": 8996 }, { "epoch": 17.994, "grad_norm": 0.8118720054626465, "learning_rate": 2e-05, "loss": 0.01849389, "step": 8997 }, { "epoch": 17.996, "grad_norm": 0.983228862285614, "learning_rate": 2e-05, "loss": 0.02795386, "step": 8998 }, { "epoch": 17.998, "grad_norm": 1.1231356859207153, "learning_rate": 2e-05, "loss": 0.02436145, "step": 8999 }, { "epoch": 18.0, "grad_norm": 1.8219008445739746, "learning_rate": 2e-05, "loss": 0.03983248, "step": 9000 }, { "epoch": 18.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9740518962075848, "Equal_1": 0.996, "Equal_2": 0.9700598802395209, "Equal_3": 0.93812375249501, "LineComparison_1": 0.998, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.996, "Perpendicular_2": 0.978, "Perpendicular_3": 0.7464929859719439, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9973333333333333, "PointLiesOnCircle_3": 0.9896666666666666, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9740518962075848 }, "eval_runtime": 320.1045, "eval_samples_per_second": 32.802, "eval_steps_per_second": 0.656, "step": 9000 }, { "epoch": 18.002, "grad_norm": 1.8216357231140137, "learning_rate": 2e-05, "loss": 0.03083301, "step": 9001 }, { "epoch": 18.004, "grad_norm": 0.8909358978271484, "learning_rate": 2e-05, "loss": 0.02501141, "step": 9002 }, { "epoch": 18.006, "grad_norm": 1.4257256984710693, "learning_rate": 2e-05, "loss": 0.03546458, "step": 9003 }, { "epoch": 18.008, "grad_norm": 1.231162667274475, "learning_rate": 2e-05, "loss": 0.04436444, "step": 9004 }, { "epoch": 18.01, "grad_norm": 2.071355104446411, "learning_rate": 2e-05, "loss": 0.05257452, "step": 9005 }, { "epoch": 18.012, "grad_norm": 2.303741455078125, "learning_rate": 2e-05, "loss": 0.04662026, "step": 9006 }, { "epoch": 18.014, "grad_norm": 1.0220463275909424, "learning_rate": 2e-05, "loss": 0.03330886, "step": 9007 }, { "epoch": 18.016, "grad_norm": 2.040062665939331, "learning_rate": 2e-05, "loss": 0.04663801, "step": 9008 }, { "epoch": 18.018, "grad_norm": 2.4291489124298096, "learning_rate": 2e-05, "loss": 0.05256157, "step": 9009 }, { "epoch": 18.02, "grad_norm": 2.1016221046447754, "learning_rate": 2e-05, "loss": 0.04151364, "step": 9010 }, { "epoch": 18.022, "grad_norm": 1.0601978302001953, "learning_rate": 2e-05, "loss": 0.04119922, "step": 9011 }, { "epoch": 18.024, "grad_norm": 1.0514633655548096, "learning_rate": 2e-05, "loss": 0.03135135, "step": 9012 }, { "epoch": 18.026, "grad_norm": 1.4121065139770508, "learning_rate": 2e-05, "loss": 0.04619808, "step": 9013 }, { "epoch": 18.028, "grad_norm": 0.9003342986106873, "learning_rate": 2e-05, "loss": 0.02542493, "step": 9014 }, { "epoch": 18.03, "grad_norm": 2.492168664932251, "learning_rate": 2e-05, "loss": 0.04310292, "step": 9015 }, { "epoch": 18.032, "grad_norm": 1.2616229057312012, "learning_rate": 2e-05, "loss": 0.03716831, "step": 9016 }, { "epoch": 18.034, "grad_norm": 1.5388479232788086, "learning_rate": 2e-05, "loss": 0.05547727, "step": 9017 }, { "epoch": 18.036, "grad_norm": 2.8383405208587646, "learning_rate": 2e-05, "loss": 0.04588626, "step": 9018 }, { "epoch": 18.038, "grad_norm": 1.0203205347061157, "learning_rate": 2e-05, "loss": 0.03126208, "step": 9019 }, { "epoch": 18.04, "grad_norm": 1.4815860986709595, "learning_rate": 2e-05, "loss": 0.04387635, "step": 9020 }, { "epoch": 18.042, "grad_norm": 1.163259744644165, "learning_rate": 2e-05, "loss": 0.0341371, "step": 9021 }, { "epoch": 18.044, "grad_norm": 1.398336410522461, "learning_rate": 2e-05, "loss": 0.03288767, "step": 9022 }, { "epoch": 18.046, "grad_norm": 1.4146158695220947, "learning_rate": 2e-05, "loss": 0.03783367, "step": 9023 }, { "epoch": 18.048, "grad_norm": 1.2341772317886353, "learning_rate": 2e-05, "loss": 0.0348506, "step": 9024 }, { "epoch": 18.05, "grad_norm": 1.4742604494094849, "learning_rate": 2e-05, "loss": 0.04025548, "step": 9025 }, { "epoch": 18.052, "grad_norm": 1.2109090089797974, "learning_rate": 2e-05, "loss": 0.02520112, "step": 9026 }, { "epoch": 18.054, "grad_norm": 0.9972143173217773, "learning_rate": 2e-05, "loss": 0.03415427, "step": 9027 }, { "epoch": 18.056, "grad_norm": 1.110215425491333, "learning_rate": 2e-05, "loss": 0.03701129, "step": 9028 }, { "epoch": 18.058, "grad_norm": 1.2750699520111084, "learning_rate": 2e-05, "loss": 0.03478452, "step": 9029 }, { "epoch": 18.06, "grad_norm": 0.906257152557373, "learning_rate": 2e-05, "loss": 0.02598533, "step": 9030 }, { "epoch": 18.062, "grad_norm": 2.5207362174987793, "learning_rate": 2e-05, "loss": 0.04807776, "step": 9031 }, { "epoch": 18.064, "grad_norm": 1.246948480606079, "learning_rate": 2e-05, "loss": 0.03488917, "step": 9032 }, { "epoch": 18.066, "grad_norm": 2.9162485599517822, "learning_rate": 2e-05, "loss": 0.03648629, "step": 9033 }, { "epoch": 18.068, "grad_norm": 1.169908046722412, "learning_rate": 2e-05, "loss": 0.02641434, "step": 9034 }, { "epoch": 18.07, "grad_norm": 1.152937889099121, "learning_rate": 2e-05, "loss": 0.03226584, "step": 9035 }, { "epoch": 18.072, "grad_norm": 1.7191685438156128, "learning_rate": 2e-05, "loss": 0.05071743, "step": 9036 }, { "epoch": 18.074, "grad_norm": 1.6077672243118286, "learning_rate": 2e-05, "loss": 0.02946869, "step": 9037 }, { "epoch": 18.076, "grad_norm": 1.787205457687378, "learning_rate": 2e-05, "loss": 0.02539303, "step": 9038 }, { "epoch": 18.078, "grad_norm": 1.3960199356079102, "learning_rate": 2e-05, "loss": 0.03590056, "step": 9039 }, { "epoch": 18.08, "grad_norm": 1.8743473291397095, "learning_rate": 2e-05, "loss": 0.03464347, "step": 9040 }, { "epoch": 18.082, "grad_norm": 1.2113933563232422, "learning_rate": 2e-05, "loss": 0.03156437, "step": 9041 }, { "epoch": 18.084, "grad_norm": 1.8189163208007812, "learning_rate": 2e-05, "loss": 0.04444864, "step": 9042 }, { "epoch": 18.086, "grad_norm": 1.960848331451416, "learning_rate": 2e-05, "loss": 0.03387512, "step": 9043 }, { "epoch": 18.088, "grad_norm": 1.3409546613693237, "learning_rate": 2e-05, "loss": 0.03057274, "step": 9044 }, { "epoch": 18.09, "grad_norm": 1.4044934511184692, "learning_rate": 2e-05, "loss": 0.03606112, "step": 9045 }, { "epoch": 18.092, "grad_norm": 1.1396024227142334, "learning_rate": 2e-05, "loss": 0.03115062, "step": 9046 }, { "epoch": 18.094, "grad_norm": 1.4510269165039062, "learning_rate": 2e-05, "loss": 0.02674197, "step": 9047 }, { "epoch": 18.096, "grad_norm": 1.1940547227859497, "learning_rate": 2e-05, "loss": 0.03157366, "step": 9048 }, { "epoch": 18.098, "grad_norm": 1.4505969285964966, "learning_rate": 2e-05, "loss": 0.04264753, "step": 9049 }, { "epoch": 18.1, "grad_norm": 1.8302192687988281, "learning_rate": 2e-05, "loss": 0.04496709, "step": 9050 }, { "epoch": 18.102, "grad_norm": 1.3621551990509033, "learning_rate": 2e-05, "loss": 0.03655332, "step": 9051 }, { "epoch": 18.104, "grad_norm": 0.982716977596283, "learning_rate": 2e-05, "loss": 0.03123835, "step": 9052 }, { "epoch": 18.106, "grad_norm": 1.5590128898620605, "learning_rate": 2e-05, "loss": 0.04147382, "step": 9053 }, { "epoch": 18.108, "grad_norm": 1.2122939825057983, "learning_rate": 2e-05, "loss": 0.03553297, "step": 9054 }, { "epoch": 18.11, "grad_norm": 1.2034052610397339, "learning_rate": 2e-05, "loss": 0.04050352, "step": 9055 }, { "epoch": 18.112, "grad_norm": 1.3900257349014282, "learning_rate": 2e-05, "loss": 0.05008593, "step": 9056 }, { "epoch": 18.114, "grad_norm": 1.8129817247390747, "learning_rate": 2e-05, "loss": 0.03101974, "step": 9057 }, { "epoch": 18.116, "grad_norm": 1.3509774208068848, "learning_rate": 2e-05, "loss": 0.03191446, "step": 9058 }, { "epoch": 18.118, "grad_norm": 1.6840113401412964, "learning_rate": 2e-05, "loss": 0.03167528, "step": 9059 }, { "epoch": 18.12, "grad_norm": 1.132699966430664, "learning_rate": 2e-05, "loss": 0.03625121, "step": 9060 }, { "epoch": 18.122, "grad_norm": 1.4251673221588135, "learning_rate": 2e-05, "loss": 0.04312788, "step": 9061 }, { "epoch": 18.124, "grad_norm": 1.160861611366272, "learning_rate": 2e-05, "loss": 0.03482513, "step": 9062 }, { "epoch": 18.126, "grad_norm": 1.3063910007476807, "learning_rate": 2e-05, "loss": 0.03584398, "step": 9063 }, { "epoch": 18.128, "grad_norm": 1.410151481628418, "learning_rate": 2e-05, "loss": 0.04368694, "step": 9064 }, { "epoch": 18.13, "grad_norm": 2.592433452606201, "learning_rate": 2e-05, "loss": 0.03493256, "step": 9065 }, { "epoch": 18.132, "grad_norm": 1.192308783531189, "learning_rate": 2e-05, "loss": 0.03010586, "step": 9066 }, { "epoch": 18.134, "grad_norm": 1.1783413887023926, "learning_rate": 2e-05, "loss": 0.04549911, "step": 9067 }, { "epoch": 18.136, "grad_norm": 0.9659681916236877, "learning_rate": 2e-05, "loss": 0.02878298, "step": 9068 }, { "epoch": 18.138, "grad_norm": 1.4795477390289307, "learning_rate": 2e-05, "loss": 0.03338488, "step": 9069 }, { "epoch": 18.14, "grad_norm": 1.3997915983200073, "learning_rate": 2e-05, "loss": 0.04527793, "step": 9070 }, { "epoch": 18.142, "grad_norm": 0.974898099899292, "learning_rate": 2e-05, "loss": 0.02718613, "step": 9071 }, { "epoch": 18.144, "grad_norm": 1.643044114112854, "learning_rate": 2e-05, "loss": 0.04503238, "step": 9072 }, { "epoch": 18.146, "grad_norm": 2.4663209915161133, "learning_rate": 2e-05, "loss": 0.05017319, "step": 9073 }, { "epoch": 18.148, "grad_norm": 1.3137898445129395, "learning_rate": 2e-05, "loss": 0.03679071, "step": 9074 }, { "epoch": 18.15, "grad_norm": 1.2270176410675049, "learning_rate": 2e-05, "loss": 0.02902906, "step": 9075 }, { "epoch": 18.152, "grad_norm": 1.5809822082519531, "learning_rate": 2e-05, "loss": 0.02031413, "step": 9076 }, { "epoch": 18.154, "grad_norm": 1.235666275024414, "learning_rate": 2e-05, "loss": 0.0429434, "step": 9077 }, { "epoch": 18.156, "grad_norm": 1.3485252857208252, "learning_rate": 2e-05, "loss": 0.04158372, "step": 9078 }, { "epoch": 18.158, "grad_norm": 1.5450528860092163, "learning_rate": 2e-05, "loss": 0.03034053, "step": 9079 }, { "epoch": 18.16, "grad_norm": 1.102346420288086, "learning_rate": 2e-05, "loss": 0.03850158, "step": 9080 }, { "epoch": 18.162, "grad_norm": 1.7495108842849731, "learning_rate": 2e-05, "loss": 0.04332937, "step": 9081 }, { "epoch": 18.164, "grad_norm": 1.059772253036499, "learning_rate": 2e-05, "loss": 0.04180691, "step": 9082 }, { "epoch": 18.166, "grad_norm": 2.6385281085968018, "learning_rate": 2e-05, "loss": 0.04071419, "step": 9083 }, { "epoch": 18.168, "grad_norm": 1.432405948638916, "learning_rate": 2e-05, "loss": 0.02765241, "step": 9084 }, { "epoch": 18.17, "grad_norm": 1.8066904544830322, "learning_rate": 2e-05, "loss": 0.04745378, "step": 9085 }, { "epoch": 18.172, "grad_norm": 0.885301411151886, "learning_rate": 2e-05, "loss": 0.02941084, "step": 9086 }, { "epoch": 18.174, "grad_norm": 1.0743720531463623, "learning_rate": 2e-05, "loss": 0.03165472, "step": 9087 }, { "epoch": 18.176, "grad_norm": 1.8421827554702759, "learning_rate": 2e-05, "loss": 0.03389416, "step": 9088 }, { "epoch": 18.178, "grad_norm": 1.3630011081695557, "learning_rate": 2e-05, "loss": 0.03734118, "step": 9089 }, { "epoch": 18.18, "grad_norm": 1.5092341899871826, "learning_rate": 2e-05, "loss": 0.04258218, "step": 9090 }, { "epoch": 18.182, "grad_norm": 2.4805521965026855, "learning_rate": 2e-05, "loss": 0.05131678, "step": 9091 }, { "epoch": 18.184, "grad_norm": 1.2178826332092285, "learning_rate": 2e-05, "loss": 0.03363734, "step": 9092 }, { "epoch": 18.186, "grad_norm": 1.2770280838012695, "learning_rate": 2e-05, "loss": 0.04184774, "step": 9093 }, { "epoch": 18.188, "grad_norm": 2.7290360927581787, "learning_rate": 2e-05, "loss": 0.04432489, "step": 9094 }, { "epoch": 18.19, "grad_norm": 1.8310285806655884, "learning_rate": 2e-05, "loss": 0.03530759, "step": 9095 }, { "epoch": 18.192, "grad_norm": 1.0748549699783325, "learning_rate": 2e-05, "loss": 0.03645511, "step": 9096 }, { "epoch": 18.194, "grad_norm": 1.8210999965667725, "learning_rate": 2e-05, "loss": 0.04495174, "step": 9097 }, { "epoch": 18.196, "grad_norm": 1.0958075523376465, "learning_rate": 2e-05, "loss": 0.02407749, "step": 9098 }, { "epoch": 18.198, "grad_norm": 1.8292564153671265, "learning_rate": 2e-05, "loss": 0.0362148, "step": 9099 }, { "epoch": 18.2, "grad_norm": 1.0899569988250732, "learning_rate": 2e-05, "loss": 0.03374222, "step": 9100 }, { "epoch": 18.202, "grad_norm": 1.0096633434295654, "learning_rate": 2e-05, "loss": 0.03209589, "step": 9101 }, { "epoch": 18.204, "grad_norm": 1.1050734519958496, "learning_rate": 2e-05, "loss": 0.03580528, "step": 9102 }, { "epoch": 18.206, "grad_norm": 2.117532253265381, "learning_rate": 2e-05, "loss": 0.04968563, "step": 9103 }, { "epoch": 18.208, "grad_norm": 2.2677221298217773, "learning_rate": 2e-05, "loss": 0.06845517, "step": 9104 }, { "epoch": 18.21, "grad_norm": 0.8368435502052307, "learning_rate": 2e-05, "loss": 0.02537123, "step": 9105 }, { "epoch": 18.212, "grad_norm": 1.149839162826538, "learning_rate": 2e-05, "loss": 0.03183338, "step": 9106 }, { "epoch": 18.214, "grad_norm": 1.2164126634597778, "learning_rate": 2e-05, "loss": 0.0268788, "step": 9107 }, { "epoch": 18.216, "grad_norm": 1.1083381175994873, "learning_rate": 2e-05, "loss": 0.0347072, "step": 9108 }, { "epoch": 18.218, "grad_norm": 1.0997363328933716, "learning_rate": 2e-05, "loss": 0.03136883, "step": 9109 }, { "epoch": 18.22, "grad_norm": 1.3243005275726318, "learning_rate": 2e-05, "loss": 0.04105141, "step": 9110 }, { "epoch": 18.222, "grad_norm": 1.123883843421936, "learning_rate": 2e-05, "loss": 0.04037739, "step": 9111 }, { "epoch": 18.224, "grad_norm": 1.3631707429885864, "learning_rate": 2e-05, "loss": 0.02595056, "step": 9112 }, { "epoch": 18.226, "grad_norm": 1.5019488334655762, "learning_rate": 2e-05, "loss": 0.03916592, "step": 9113 }, { "epoch": 18.228, "grad_norm": 1.153467059135437, "learning_rate": 2e-05, "loss": 0.03865172, "step": 9114 }, { "epoch": 18.23, "grad_norm": 1.4573817253112793, "learning_rate": 2e-05, "loss": 0.02908707, "step": 9115 }, { "epoch": 18.232, "grad_norm": 1.7540217638015747, "learning_rate": 2e-05, "loss": 0.03963351, "step": 9116 }, { "epoch": 18.234, "grad_norm": 2.5012366771698, "learning_rate": 2e-05, "loss": 0.04361957, "step": 9117 }, { "epoch": 18.236, "grad_norm": 1.4801292419433594, "learning_rate": 2e-05, "loss": 0.03021835, "step": 9118 }, { "epoch": 18.238, "grad_norm": 1.3169306516647339, "learning_rate": 2e-05, "loss": 0.03268102, "step": 9119 }, { "epoch": 18.24, "grad_norm": 1.0948139429092407, "learning_rate": 2e-05, "loss": 0.02789821, "step": 9120 }, { "epoch": 18.242, "grad_norm": 1.2871372699737549, "learning_rate": 2e-05, "loss": 0.03492365, "step": 9121 }, { "epoch": 18.244, "grad_norm": 1.7145776748657227, "learning_rate": 2e-05, "loss": 0.03364137, "step": 9122 }, { "epoch": 18.246, "grad_norm": 1.5056641101837158, "learning_rate": 2e-05, "loss": 0.03888101, "step": 9123 }, { "epoch": 18.248, "grad_norm": 2.125732660293579, "learning_rate": 2e-05, "loss": 0.04138645, "step": 9124 }, { "epoch": 18.25, "grad_norm": 2.0914463996887207, "learning_rate": 2e-05, "loss": 0.04323914, "step": 9125 }, { "epoch": 18.252, "grad_norm": 1.3840261697769165, "learning_rate": 2e-05, "loss": 0.04744683, "step": 9126 }, { "epoch": 18.254, "grad_norm": 1.3877594470977783, "learning_rate": 2e-05, "loss": 0.03428207, "step": 9127 }, { "epoch": 18.256, "grad_norm": 1.9282671213150024, "learning_rate": 2e-05, "loss": 0.03370817, "step": 9128 }, { "epoch": 18.258, "grad_norm": 1.2625566720962524, "learning_rate": 2e-05, "loss": 0.03845701, "step": 9129 }, { "epoch": 18.26, "grad_norm": 1.497373104095459, "learning_rate": 2e-05, "loss": 0.02765183, "step": 9130 }, { "epoch": 18.262, "grad_norm": 0.938334584236145, "learning_rate": 2e-05, "loss": 0.0283683, "step": 9131 }, { "epoch": 18.264, "grad_norm": 0.8075963258743286, "learning_rate": 2e-05, "loss": 0.0257271, "step": 9132 }, { "epoch": 18.266, "grad_norm": 2.6727170944213867, "learning_rate": 2e-05, "loss": 0.03528313, "step": 9133 }, { "epoch": 18.268, "grad_norm": 1.1714603900909424, "learning_rate": 2e-05, "loss": 0.03839287, "step": 9134 }, { "epoch": 18.27, "grad_norm": 1.3799819946289062, "learning_rate": 2e-05, "loss": 0.04040816, "step": 9135 }, { "epoch": 18.272, "grad_norm": 0.8756110668182373, "learning_rate": 2e-05, "loss": 0.02800023, "step": 9136 }, { "epoch": 18.274, "grad_norm": 1.278294563293457, "learning_rate": 2e-05, "loss": 0.03553412, "step": 9137 }, { "epoch": 18.276, "grad_norm": 1.649017333984375, "learning_rate": 2e-05, "loss": 0.0240197, "step": 9138 }, { "epoch": 18.278, "grad_norm": 1.2307323217391968, "learning_rate": 2e-05, "loss": 0.03196525, "step": 9139 }, { "epoch": 18.28, "grad_norm": 1.2029914855957031, "learning_rate": 2e-05, "loss": 0.04263403, "step": 9140 }, { "epoch": 18.282, "grad_norm": 1.1589373350143433, "learning_rate": 2e-05, "loss": 0.04057025, "step": 9141 }, { "epoch": 18.284, "grad_norm": 1.0348948240280151, "learning_rate": 2e-05, "loss": 0.04266765, "step": 9142 }, { "epoch": 18.286, "grad_norm": 0.8373733162879944, "learning_rate": 2e-05, "loss": 0.01983244, "step": 9143 }, { "epoch": 18.288, "grad_norm": 2.752629041671753, "learning_rate": 2e-05, "loss": 0.04166656, "step": 9144 }, { "epoch": 18.29, "grad_norm": 1.1935979127883911, "learning_rate": 2e-05, "loss": 0.04558745, "step": 9145 }, { "epoch": 18.292, "grad_norm": 0.9616577625274658, "learning_rate": 2e-05, "loss": 0.02630688, "step": 9146 }, { "epoch": 18.294, "grad_norm": 1.3004382848739624, "learning_rate": 2e-05, "loss": 0.04482454, "step": 9147 }, { "epoch": 18.296, "grad_norm": 2.1716208457946777, "learning_rate": 2e-05, "loss": 0.03779503, "step": 9148 }, { "epoch": 18.298, "grad_norm": 1.1647388935089111, "learning_rate": 2e-05, "loss": 0.03616937, "step": 9149 }, { "epoch": 18.3, "grad_norm": 1.6994318962097168, "learning_rate": 2e-05, "loss": 0.03434645, "step": 9150 }, { "epoch": 18.302, "grad_norm": 1.055916666984558, "learning_rate": 2e-05, "loss": 0.04265877, "step": 9151 }, { "epoch": 18.304, "grad_norm": 1.1713130474090576, "learning_rate": 2e-05, "loss": 0.03589529, "step": 9152 }, { "epoch": 18.306, "grad_norm": 1.2549328804016113, "learning_rate": 2e-05, "loss": 0.03371196, "step": 9153 }, { "epoch": 18.308, "grad_norm": 0.9361233115196228, "learning_rate": 2e-05, "loss": 0.02483992, "step": 9154 }, { "epoch": 18.31, "grad_norm": 1.0725373029708862, "learning_rate": 2e-05, "loss": 0.03467751, "step": 9155 }, { "epoch": 18.312, "grad_norm": 1.8279783725738525, "learning_rate": 2e-05, "loss": 0.05167603, "step": 9156 }, { "epoch": 18.314, "grad_norm": 0.995499312877655, "learning_rate": 2e-05, "loss": 0.02847271, "step": 9157 }, { "epoch": 18.316, "grad_norm": 1.0232125520706177, "learning_rate": 2e-05, "loss": 0.0327347, "step": 9158 }, { "epoch": 18.318, "grad_norm": 1.25034761428833, "learning_rate": 2e-05, "loss": 0.04012694, "step": 9159 }, { "epoch": 18.32, "grad_norm": 2.3386640548706055, "learning_rate": 2e-05, "loss": 0.03319935, "step": 9160 }, { "epoch": 18.322, "grad_norm": 1.7142624855041504, "learning_rate": 2e-05, "loss": 0.03917816, "step": 9161 }, { "epoch": 18.324, "grad_norm": 1.209825038909912, "learning_rate": 2e-05, "loss": 0.03275396, "step": 9162 }, { "epoch": 18.326, "grad_norm": 1.3008837699890137, "learning_rate": 2e-05, "loss": 0.05405723, "step": 9163 }, { "epoch": 18.328, "grad_norm": 1.4746818542480469, "learning_rate": 2e-05, "loss": 0.05087238, "step": 9164 }, { "epoch": 18.33, "grad_norm": 1.1503615379333496, "learning_rate": 2e-05, "loss": 0.03378589, "step": 9165 }, { "epoch": 18.332, "grad_norm": 1.8128838539123535, "learning_rate": 2e-05, "loss": 0.0437137, "step": 9166 }, { "epoch": 18.334, "grad_norm": 0.9782925248146057, "learning_rate": 2e-05, "loss": 0.02864728, "step": 9167 }, { "epoch": 18.336, "grad_norm": 1.0823737382888794, "learning_rate": 2e-05, "loss": 0.03686111, "step": 9168 }, { "epoch": 18.338, "grad_norm": 1.7901039123535156, "learning_rate": 2e-05, "loss": 0.05045727, "step": 9169 }, { "epoch": 18.34, "grad_norm": 1.2802319526672363, "learning_rate": 2e-05, "loss": 0.031836, "step": 9170 }, { "epoch": 18.342, "grad_norm": 1.067213535308838, "learning_rate": 2e-05, "loss": 0.04307377, "step": 9171 }, { "epoch": 18.344, "grad_norm": 2.6261112689971924, "learning_rate": 2e-05, "loss": 0.03852256, "step": 9172 }, { "epoch": 18.346, "grad_norm": 2.581172466278076, "learning_rate": 2e-05, "loss": 0.04666492, "step": 9173 }, { "epoch": 18.348, "grad_norm": 1.2471551895141602, "learning_rate": 2e-05, "loss": 0.03403299, "step": 9174 }, { "epoch": 18.35, "grad_norm": 0.8967499732971191, "learning_rate": 2e-05, "loss": 0.02881518, "step": 9175 }, { "epoch": 18.352, "grad_norm": 1.7108482122421265, "learning_rate": 2e-05, "loss": 0.03858823, "step": 9176 }, { "epoch": 18.354, "grad_norm": 1.9164936542510986, "learning_rate": 2e-05, "loss": 0.02809501, "step": 9177 }, { "epoch": 18.356, "grad_norm": 1.0232477188110352, "learning_rate": 2e-05, "loss": 0.02891929, "step": 9178 }, { "epoch": 18.358, "grad_norm": 1.1295270919799805, "learning_rate": 2e-05, "loss": 0.0412226, "step": 9179 }, { "epoch": 18.36, "grad_norm": 1.951155662536621, "learning_rate": 2e-05, "loss": 0.05396903, "step": 9180 }, { "epoch": 18.362, "grad_norm": 1.2782940864562988, "learning_rate": 2e-05, "loss": 0.03953922, "step": 9181 }, { "epoch": 18.364, "grad_norm": 1.5052493810653687, "learning_rate": 2e-05, "loss": 0.04199969, "step": 9182 }, { "epoch": 18.366, "grad_norm": 2.334589958190918, "learning_rate": 2e-05, "loss": 0.03334442, "step": 9183 }, { "epoch": 18.368, "grad_norm": 1.534926176071167, "learning_rate": 2e-05, "loss": 0.03135791, "step": 9184 }, { "epoch": 18.37, "grad_norm": 2.0098283290863037, "learning_rate": 2e-05, "loss": 0.04160352, "step": 9185 }, { "epoch": 18.372, "grad_norm": 0.8825958967208862, "learning_rate": 2e-05, "loss": 0.02310374, "step": 9186 }, { "epoch": 18.374, "grad_norm": 1.5516126155853271, "learning_rate": 2e-05, "loss": 0.03449169, "step": 9187 }, { "epoch": 18.376, "grad_norm": 1.0211329460144043, "learning_rate": 2e-05, "loss": 0.03518451, "step": 9188 }, { "epoch": 18.378, "grad_norm": 1.4555100202560425, "learning_rate": 2e-05, "loss": 0.03537372, "step": 9189 }, { "epoch": 18.38, "grad_norm": 1.6372886896133423, "learning_rate": 2e-05, "loss": 0.03058396, "step": 9190 }, { "epoch": 18.382, "grad_norm": 1.5627180337905884, "learning_rate": 2e-05, "loss": 0.02930931, "step": 9191 }, { "epoch": 18.384, "grad_norm": 1.7449994087219238, "learning_rate": 2e-05, "loss": 0.03934868, "step": 9192 }, { "epoch": 18.386, "grad_norm": 0.9360613226890564, "learning_rate": 2e-05, "loss": 0.02767797, "step": 9193 }, { "epoch": 18.388, "grad_norm": 1.376953125, "learning_rate": 2e-05, "loss": 0.02869485, "step": 9194 }, { "epoch": 18.39, "grad_norm": 0.9833387732505798, "learning_rate": 2e-05, "loss": 0.02881843, "step": 9195 }, { "epoch": 18.392, "grad_norm": 1.3985074758529663, "learning_rate": 2e-05, "loss": 0.03320839, "step": 9196 }, { "epoch": 18.394, "grad_norm": 1.116469144821167, "learning_rate": 2e-05, "loss": 0.031327, "step": 9197 }, { "epoch": 18.396, "grad_norm": 1.913405418395996, "learning_rate": 2e-05, "loss": 0.03109054, "step": 9198 }, { "epoch": 18.398, "grad_norm": 0.9129771590232849, "learning_rate": 2e-05, "loss": 0.02565073, "step": 9199 }, { "epoch": 18.4, "grad_norm": 0.8751816749572754, "learning_rate": 2e-05, "loss": 0.02509234, "step": 9200 }, { "epoch": 18.402, "grad_norm": 1.0799328088760376, "learning_rate": 2e-05, "loss": 0.02929278, "step": 9201 }, { "epoch": 18.404, "grad_norm": 1.4381532669067383, "learning_rate": 2e-05, "loss": 0.03288494, "step": 9202 }, { "epoch": 18.406, "grad_norm": 1.2862842082977295, "learning_rate": 2e-05, "loss": 0.04411345, "step": 9203 }, { "epoch": 18.408, "grad_norm": 1.2750723361968994, "learning_rate": 2e-05, "loss": 0.04329745, "step": 9204 }, { "epoch": 18.41, "grad_norm": 1.3029229640960693, "learning_rate": 2e-05, "loss": 0.03379948, "step": 9205 }, { "epoch": 18.412, "grad_norm": 1.3032692670822144, "learning_rate": 2e-05, "loss": 0.04243733, "step": 9206 }, { "epoch": 18.414, "grad_norm": 1.4686411619186401, "learning_rate": 2e-05, "loss": 0.04486408, "step": 9207 }, { "epoch": 18.416, "grad_norm": 2.9766223430633545, "learning_rate": 2e-05, "loss": 0.04959375, "step": 9208 }, { "epoch": 18.418, "grad_norm": 1.8033760786056519, "learning_rate": 2e-05, "loss": 0.04121406, "step": 9209 }, { "epoch": 18.42, "grad_norm": 1.320888876914978, "learning_rate": 2e-05, "loss": 0.03473954, "step": 9210 }, { "epoch": 18.422, "grad_norm": 1.6498913764953613, "learning_rate": 2e-05, "loss": 0.04184615, "step": 9211 }, { "epoch": 18.424, "grad_norm": 1.0657768249511719, "learning_rate": 2e-05, "loss": 0.03823342, "step": 9212 }, { "epoch": 18.426, "grad_norm": 1.5349171161651611, "learning_rate": 2e-05, "loss": 0.04052274, "step": 9213 }, { "epoch": 18.428, "grad_norm": 1.3626680374145508, "learning_rate": 2e-05, "loss": 0.03621456, "step": 9214 }, { "epoch": 18.43, "grad_norm": 2.154515027999878, "learning_rate": 2e-05, "loss": 0.05104381, "step": 9215 }, { "epoch": 18.432, "grad_norm": 2.7905545234680176, "learning_rate": 2e-05, "loss": 0.04180931, "step": 9216 }, { "epoch": 18.434, "grad_norm": 1.2199686765670776, "learning_rate": 2e-05, "loss": 0.0278045, "step": 9217 }, { "epoch": 18.436, "grad_norm": 1.7791190147399902, "learning_rate": 2e-05, "loss": 0.03621379, "step": 9218 }, { "epoch": 18.438, "grad_norm": 2.6824915409088135, "learning_rate": 2e-05, "loss": 0.04269816, "step": 9219 }, { "epoch": 18.44, "grad_norm": 1.4576973915100098, "learning_rate": 2e-05, "loss": 0.04696831, "step": 9220 }, { "epoch": 18.442, "grad_norm": 1.0133570432662964, "learning_rate": 2e-05, "loss": 0.03169259, "step": 9221 }, { "epoch": 18.444, "grad_norm": 1.0140568017959595, "learning_rate": 2e-05, "loss": 0.02977751, "step": 9222 }, { "epoch": 18.446, "grad_norm": 1.2628326416015625, "learning_rate": 2e-05, "loss": 0.0529662, "step": 9223 }, { "epoch": 18.448, "grad_norm": 1.7104601860046387, "learning_rate": 2e-05, "loss": 0.05581909, "step": 9224 }, { "epoch": 18.45, "grad_norm": 0.982793390750885, "learning_rate": 2e-05, "loss": 0.03106131, "step": 9225 }, { "epoch": 18.452, "grad_norm": 1.2446315288543701, "learning_rate": 2e-05, "loss": 0.04647019, "step": 9226 }, { "epoch": 18.454, "grad_norm": 1.1005967855453491, "learning_rate": 2e-05, "loss": 0.02941687, "step": 9227 }, { "epoch": 18.456, "grad_norm": 1.3427083492279053, "learning_rate": 2e-05, "loss": 0.03301994, "step": 9228 }, { "epoch": 18.458, "grad_norm": 0.9936813712120056, "learning_rate": 2e-05, "loss": 0.03047378, "step": 9229 }, { "epoch": 18.46, "grad_norm": 1.9145174026489258, "learning_rate": 2e-05, "loss": 0.03703724, "step": 9230 }, { "epoch": 18.462, "grad_norm": 1.308974027633667, "learning_rate": 2e-05, "loss": 0.04389992, "step": 9231 }, { "epoch": 18.464, "grad_norm": 0.7866896390914917, "learning_rate": 2e-05, "loss": 0.0197884, "step": 9232 }, { "epoch": 18.466, "grad_norm": 1.48411226272583, "learning_rate": 2e-05, "loss": 0.03440548, "step": 9233 }, { "epoch": 18.468, "grad_norm": 1.2252159118652344, "learning_rate": 2e-05, "loss": 0.03730087, "step": 9234 }, { "epoch": 18.47, "grad_norm": 2.652885675430298, "learning_rate": 2e-05, "loss": 0.04888747, "step": 9235 }, { "epoch": 18.472, "grad_norm": 0.9633825421333313, "learning_rate": 2e-05, "loss": 0.03162334, "step": 9236 }, { "epoch": 18.474, "grad_norm": 1.207777500152588, "learning_rate": 2e-05, "loss": 0.03481243, "step": 9237 }, { "epoch": 18.476, "grad_norm": 2.7908196449279785, "learning_rate": 2e-05, "loss": 0.06109419, "step": 9238 }, { "epoch": 18.478, "grad_norm": 1.291577935218811, "learning_rate": 2e-05, "loss": 0.04738308, "step": 9239 }, { "epoch": 18.48, "grad_norm": 1.2779804468154907, "learning_rate": 2e-05, "loss": 0.03293496, "step": 9240 }, { "epoch": 18.482, "grad_norm": 1.388176679611206, "learning_rate": 2e-05, "loss": 0.03266312, "step": 9241 }, { "epoch": 18.484, "grad_norm": 1.3886147737503052, "learning_rate": 2e-05, "loss": 0.02937507, "step": 9242 }, { "epoch": 18.486, "grad_norm": 1.400505781173706, "learning_rate": 2e-05, "loss": 0.04522613, "step": 9243 }, { "epoch": 18.488, "grad_norm": 1.2880887985229492, "learning_rate": 2e-05, "loss": 0.03753714, "step": 9244 }, { "epoch": 18.49, "grad_norm": 1.0358575582504272, "learning_rate": 2e-05, "loss": 0.03539129, "step": 9245 }, { "epoch": 18.492, "grad_norm": 3.62886905670166, "learning_rate": 2e-05, "loss": 0.03471145, "step": 9246 }, { "epoch": 18.494, "grad_norm": 1.1280561685562134, "learning_rate": 2e-05, "loss": 0.03343772, "step": 9247 }, { "epoch": 18.496, "grad_norm": 1.5975375175476074, "learning_rate": 2e-05, "loss": 0.0507987, "step": 9248 }, { "epoch": 18.498, "grad_norm": 1.5075079202651978, "learning_rate": 2e-05, "loss": 0.05328429, "step": 9249 }, { "epoch": 18.5, "grad_norm": 0.8867833614349365, "learning_rate": 2e-05, "loss": 0.02282133, "step": 9250 }, { "epoch": 18.502, "grad_norm": 0.9173626899719238, "learning_rate": 2e-05, "loss": 0.02405676, "step": 9251 }, { "epoch": 18.504, "grad_norm": 0.9300764799118042, "learning_rate": 2e-05, "loss": 0.03014572, "step": 9252 }, { "epoch": 18.506, "grad_norm": 0.6862570643424988, "learning_rate": 2e-05, "loss": 0.01409816, "step": 9253 }, { "epoch": 18.508, "grad_norm": 1.5550457239151, "learning_rate": 2e-05, "loss": 0.04886139, "step": 9254 }, { "epoch": 18.51, "grad_norm": 1.2698708772659302, "learning_rate": 2e-05, "loss": 0.03988787, "step": 9255 }, { "epoch": 18.512, "grad_norm": 1.4811738729476929, "learning_rate": 2e-05, "loss": 0.03565231, "step": 9256 }, { "epoch": 18.514, "grad_norm": 2.2947709560394287, "learning_rate": 2e-05, "loss": 0.03940463, "step": 9257 }, { "epoch": 18.516, "grad_norm": 1.0649924278259277, "learning_rate": 2e-05, "loss": 0.02794782, "step": 9258 }, { "epoch": 18.518, "grad_norm": 1.2015745639801025, "learning_rate": 2e-05, "loss": 0.03615477, "step": 9259 }, { "epoch": 18.52, "grad_norm": 1.1461511850357056, "learning_rate": 2e-05, "loss": 0.03559923, "step": 9260 }, { "epoch": 18.522, "grad_norm": 1.8255705833435059, "learning_rate": 2e-05, "loss": 0.04908207, "step": 9261 }, { "epoch": 18.524, "grad_norm": 1.0880002975463867, "learning_rate": 2e-05, "loss": 0.02855574, "step": 9262 }, { "epoch": 18.526, "grad_norm": 2.4838008880615234, "learning_rate": 2e-05, "loss": 0.03444799, "step": 9263 }, { "epoch": 18.528, "grad_norm": 1.1246633529663086, "learning_rate": 2e-05, "loss": 0.02719912, "step": 9264 }, { "epoch": 18.53, "grad_norm": 0.9572923183441162, "learning_rate": 2e-05, "loss": 0.02941136, "step": 9265 }, { "epoch": 18.532, "grad_norm": 1.3162039518356323, "learning_rate": 2e-05, "loss": 0.0248658, "step": 9266 }, { "epoch": 18.534, "grad_norm": 1.1983442306518555, "learning_rate": 2e-05, "loss": 0.04123002, "step": 9267 }, { "epoch": 18.536, "grad_norm": 1.4255578517913818, "learning_rate": 2e-05, "loss": 0.03819581, "step": 9268 }, { "epoch": 18.538, "grad_norm": 1.2587826251983643, "learning_rate": 2e-05, "loss": 0.03482603, "step": 9269 }, { "epoch": 18.54, "grad_norm": 1.2358354330062866, "learning_rate": 2e-05, "loss": 0.03826098, "step": 9270 }, { "epoch": 18.542, "grad_norm": 1.1539580821990967, "learning_rate": 2e-05, "loss": 0.03745293, "step": 9271 }, { "epoch": 18.544, "grad_norm": 1.8408349752426147, "learning_rate": 2e-05, "loss": 0.04294346, "step": 9272 }, { "epoch": 18.546, "grad_norm": 2.7005791664123535, "learning_rate": 2e-05, "loss": 0.03372211, "step": 9273 }, { "epoch": 18.548000000000002, "grad_norm": 2.7007863521575928, "learning_rate": 2e-05, "loss": 0.0421975, "step": 9274 }, { "epoch": 18.55, "grad_norm": 1.045002818107605, "learning_rate": 2e-05, "loss": 0.03461481, "step": 9275 }, { "epoch": 18.552, "grad_norm": 1.1427747011184692, "learning_rate": 2e-05, "loss": 0.0413534, "step": 9276 }, { "epoch": 18.554, "grad_norm": 1.6876646280288696, "learning_rate": 2e-05, "loss": 0.04642463, "step": 9277 }, { "epoch": 18.556, "grad_norm": 0.9396681189537048, "learning_rate": 2e-05, "loss": 0.02529829, "step": 9278 }, { "epoch": 18.558, "grad_norm": 1.2221046686172485, "learning_rate": 2e-05, "loss": 0.03684171, "step": 9279 }, { "epoch": 18.56, "grad_norm": 1.505953073501587, "learning_rate": 2e-05, "loss": 0.04012023, "step": 9280 }, { "epoch": 18.562, "grad_norm": 1.1318151950836182, "learning_rate": 2e-05, "loss": 0.03027987, "step": 9281 }, { "epoch": 18.564, "grad_norm": 2.6939613819122314, "learning_rate": 2e-05, "loss": 0.0387833, "step": 9282 }, { "epoch": 18.566, "grad_norm": 1.3286248445510864, "learning_rate": 2e-05, "loss": 0.03856936, "step": 9283 }, { "epoch": 18.568, "grad_norm": 2.2467446327209473, "learning_rate": 2e-05, "loss": 0.03416197, "step": 9284 }, { "epoch": 18.57, "grad_norm": 0.8738142848014832, "learning_rate": 2e-05, "loss": 0.030899, "step": 9285 }, { "epoch": 18.572, "grad_norm": 1.4438823461532593, "learning_rate": 2e-05, "loss": 0.03023734, "step": 9286 }, { "epoch": 18.574, "grad_norm": 1.0707905292510986, "learning_rate": 2e-05, "loss": 0.03663265, "step": 9287 }, { "epoch": 18.576, "grad_norm": 3.671177864074707, "learning_rate": 2e-05, "loss": 0.03615709, "step": 9288 }, { "epoch": 18.578, "grad_norm": 1.270842432975769, "learning_rate": 2e-05, "loss": 0.03984865, "step": 9289 }, { "epoch": 18.58, "grad_norm": 1.2456284761428833, "learning_rate": 2e-05, "loss": 0.03165886, "step": 9290 }, { "epoch": 18.582, "grad_norm": 1.7004910707473755, "learning_rate": 2e-05, "loss": 0.0358137, "step": 9291 }, { "epoch": 18.584, "grad_norm": 1.0085270404815674, "learning_rate": 2e-05, "loss": 0.03215361, "step": 9292 }, { "epoch": 18.586, "grad_norm": 1.081712245941162, "learning_rate": 2e-05, "loss": 0.02386444, "step": 9293 }, { "epoch": 18.588, "grad_norm": 1.1721680164337158, "learning_rate": 2e-05, "loss": 0.02527738, "step": 9294 }, { "epoch": 18.59, "grad_norm": 0.7640558481216431, "learning_rate": 2e-05, "loss": 0.02131698, "step": 9295 }, { "epoch": 18.592, "grad_norm": 1.278498888015747, "learning_rate": 2e-05, "loss": 0.03705659, "step": 9296 }, { "epoch": 18.594, "grad_norm": 1.4128645658493042, "learning_rate": 2e-05, "loss": 0.02741608, "step": 9297 }, { "epoch": 18.596, "grad_norm": 0.9089370369911194, "learning_rate": 2e-05, "loss": 0.02473187, "step": 9298 }, { "epoch": 18.598, "grad_norm": 1.2044442892074585, "learning_rate": 2e-05, "loss": 0.03941024, "step": 9299 }, { "epoch": 18.6, "grad_norm": 1.4648476839065552, "learning_rate": 2e-05, "loss": 0.02977973, "step": 9300 }, { "epoch": 18.602, "grad_norm": 2.9288134574890137, "learning_rate": 2e-05, "loss": 0.05101031, "step": 9301 }, { "epoch": 18.604, "grad_norm": 1.2922507524490356, "learning_rate": 2e-05, "loss": 0.03375695, "step": 9302 }, { "epoch": 18.606, "grad_norm": 3.628523588180542, "learning_rate": 2e-05, "loss": 0.03046746, "step": 9303 }, { "epoch": 18.608, "grad_norm": 1.5554486513137817, "learning_rate": 2e-05, "loss": 0.03474073, "step": 9304 }, { "epoch": 18.61, "grad_norm": 1.2011704444885254, "learning_rate": 2e-05, "loss": 0.03348228, "step": 9305 }, { "epoch": 18.612, "grad_norm": 1.0347377061843872, "learning_rate": 2e-05, "loss": 0.03440222, "step": 9306 }, { "epoch": 18.614, "grad_norm": 1.410341739654541, "learning_rate": 2e-05, "loss": 0.04505805, "step": 9307 }, { "epoch": 18.616, "grad_norm": 1.230013370513916, "learning_rate": 2e-05, "loss": 0.04353497, "step": 9308 }, { "epoch": 18.618, "grad_norm": 0.8737069964408875, "learning_rate": 2e-05, "loss": 0.02157835, "step": 9309 }, { "epoch": 18.62, "grad_norm": 1.7111151218414307, "learning_rate": 2e-05, "loss": 0.0389607, "step": 9310 }, { "epoch": 18.622, "grad_norm": 1.636101245880127, "learning_rate": 2e-05, "loss": 0.04191142, "step": 9311 }, { "epoch": 18.624, "grad_norm": 2.3487284183502197, "learning_rate": 2e-05, "loss": 0.04457171, "step": 9312 }, { "epoch": 18.626, "grad_norm": 1.1017884016036987, "learning_rate": 2e-05, "loss": 0.03230072, "step": 9313 }, { "epoch": 18.628, "grad_norm": 1.428139567375183, "learning_rate": 2e-05, "loss": 0.03711209, "step": 9314 }, { "epoch": 18.63, "grad_norm": 1.0108582973480225, "learning_rate": 2e-05, "loss": 0.03145947, "step": 9315 }, { "epoch": 18.632, "grad_norm": 1.5939596891403198, "learning_rate": 2e-05, "loss": 0.03615252, "step": 9316 }, { "epoch": 18.634, "grad_norm": 1.2237117290496826, "learning_rate": 2e-05, "loss": 0.03127633, "step": 9317 }, { "epoch": 18.636, "grad_norm": 0.8785711526870728, "learning_rate": 2e-05, "loss": 0.01814137, "step": 9318 }, { "epoch": 18.638, "grad_norm": 1.659043550491333, "learning_rate": 2e-05, "loss": 0.03371783, "step": 9319 }, { "epoch": 18.64, "grad_norm": 1.5230258703231812, "learning_rate": 2e-05, "loss": 0.04253473, "step": 9320 }, { "epoch": 18.642, "grad_norm": 1.0703048706054688, "learning_rate": 2e-05, "loss": 0.02780952, "step": 9321 }, { "epoch": 18.644, "grad_norm": 2.55739688873291, "learning_rate": 2e-05, "loss": 0.05347919, "step": 9322 }, { "epoch": 18.646, "grad_norm": 1.2097032070159912, "learning_rate": 2e-05, "loss": 0.03133424, "step": 9323 }, { "epoch": 18.648, "grad_norm": 1.7412601709365845, "learning_rate": 2e-05, "loss": 0.04346561, "step": 9324 }, { "epoch": 18.65, "grad_norm": 0.9796632528305054, "learning_rate": 2e-05, "loss": 0.02903066, "step": 9325 }, { "epoch": 18.652, "grad_norm": 1.5429102182388306, "learning_rate": 2e-05, "loss": 0.03083556, "step": 9326 }, { "epoch": 18.654, "grad_norm": 3.025692939758301, "learning_rate": 2e-05, "loss": 0.04267042, "step": 9327 }, { "epoch": 18.656, "grad_norm": 1.8448047637939453, "learning_rate": 2e-05, "loss": 0.04758323, "step": 9328 }, { "epoch": 18.658, "grad_norm": 1.5989882946014404, "learning_rate": 2e-05, "loss": 0.04394848, "step": 9329 }, { "epoch": 18.66, "grad_norm": 1.270090103149414, "learning_rate": 2e-05, "loss": 0.03506234, "step": 9330 }, { "epoch": 18.662, "grad_norm": 1.0124268531799316, "learning_rate": 2e-05, "loss": 0.02979084, "step": 9331 }, { "epoch": 18.664, "grad_norm": 1.4778650999069214, "learning_rate": 2e-05, "loss": 0.03793935, "step": 9332 }, { "epoch": 18.666, "grad_norm": 1.5023845434188843, "learning_rate": 2e-05, "loss": 0.0416206, "step": 9333 }, { "epoch": 18.668, "grad_norm": 0.8940832614898682, "learning_rate": 2e-05, "loss": 0.01142349, "step": 9334 }, { "epoch": 18.67, "grad_norm": 1.727463960647583, "learning_rate": 2e-05, "loss": 0.03897662, "step": 9335 }, { "epoch": 18.672, "grad_norm": 1.1958836317062378, "learning_rate": 2e-05, "loss": 0.04261857, "step": 9336 }, { "epoch": 18.674, "grad_norm": 1.020642638206482, "learning_rate": 2e-05, "loss": 0.02642402, "step": 9337 }, { "epoch": 18.676, "grad_norm": 2.0219318866729736, "learning_rate": 2e-05, "loss": 0.0680473, "step": 9338 }, { "epoch": 18.678, "grad_norm": 1.318962574005127, "learning_rate": 2e-05, "loss": 0.02908128, "step": 9339 }, { "epoch": 18.68, "grad_norm": 1.432193636894226, "learning_rate": 2e-05, "loss": 0.03741326, "step": 9340 }, { "epoch": 18.682, "grad_norm": 1.0566816329956055, "learning_rate": 2e-05, "loss": 0.03124825, "step": 9341 }, { "epoch": 18.684, "grad_norm": 1.3076931238174438, "learning_rate": 2e-05, "loss": 0.04089387, "step": 9342 }, { "epoch": 18.686, "grad_norm": 1.0010727643966675, "learning_rate": 2e-05, "loss": 0.03195993, "step": 9343 }, { "epoch": 18.688, "grad_norm": 1.1595393419265747, "learning_rate": 2e-05, "loss": 0.03774239, "step": 9344 }, { "epoch": 18.69, "grad_norm": 0.8064015507698059, "learning_rate": 2e-05, "loss": 0.02562628, "step": 9345 }, { "epoch": 18.692, "grad_norm": 1.1677731275558472, "learning_rate": 2e-05, "loss": 0.02810512, "step": 9346 }, { "epoch": 18.694, "grad_norm": 2.073591709136963, "learning_rate": 2e-05, "loss": 0.04026121, "step": 9347 }, { "epoch": 18.696, "grad_norm": 0.8951675891876221, "learning_rate": 2e-05, "loss": 0.02346078, "step": 9348 }, { "epoch": 18.698, "grad_norm": 1.4167735576629639, "learning_rate": 2e-05, "loss": 0.04481205, "step": 9349 }, { "epoch": 18.7, "grad_norm": 1.254514217376709, "learning_rate": 2e-05, "loss": 0.02620357, "step": 9350 }, { "epoch": 18.701999999999998, "grad_norm": 2.4272637367248535, "learning_rate": 2e-05, "loss": 0.04456335, "step": 9351 }, { "epoch": 18.704, "grad_norm": 1.1000620126724243, "learning_rate": 2e-05, "loss": 0.04522509, "step": 9352 }, { "epoch": 18.706, "grad_norm": 1.0736738443374634, "learning_rate": 2e-05, "loss": 0.04045687, "step": 9353 }, { "epoch": 18.708, "grad_norm": 2.3483619689941406, "learning_rate": 2e-05, "loss": 0.05076428, "step": 9354 }, { "epoch": 18.71, "grad_norm": 0.972253680229187, "learning_rate": 2e-05, "loss": 0.02645556, "step": 9355 }, { "epoch": 18.712, "grad_norm": 0.9229649305343628, "learning_rate": 2e-05, "loss": 0.02200065, "step": 9356 }, { "epoch": 18.714, "grad_norm": 1.2092173099517822, "learning_rate": 2e-05, "loss": 0.03871221, "step": 9357 }, { "epoch": 18.716, "grad_norm": 1.9008631706237793, "learning_rate": 2e-05, "loss": 0.04458017, "step": 9358 }, { "epoch": 18.718, "grad_norm": 1.0636965036392212, "learning_rate": 2e-05, "loss": 0.02840264, "step": 9359 }, { "epoch": 18.72, "grad_norm": 1.2430663108825684, "learning_rate": 2e-05, "loss": 0.03148837, "step": 9360 }, { "epoch": 18.722, "grad_norm": 1.123664140701294, "learning_rate": 2e-05, "loss": 0.03394098, "step": 9361 }, { "epoch": 18.724, "grad_norm": 1.0054221153259277, "learning_rate": 2e-05, "loss": 0.03370092, "step": 9362 }, { "epoch": 18.726, "grad_norm": 1.5903475284576416, "learning_rate": 2e-05, "loss": 0.04029074, "step": 9363 }, { "epoch": 18.728, "grad_norm": 1.9477814435958862, "learning_rate": 2e-05, "loss": 0.06019651, "step": 9364 }, { "epoch": 18.73, "grad_norm": 2.084622383117676, "learning_rate": 2e-05, "loss": 0.02988496, "step": 9365 }, { "epoch": 18.732, "grad_norm": 1.7150975465774536, "learning_rate": 2e-05, "loss": 0.05392132, "step": 9366 }, { "epoch": 18.734, "grad_norm": 0.9419837594032288, "learning_rate": 2e-05, "loss": 0.02924297, "step": 9367 }, { "epoch": 18.736, "grad_norm": 1.4422987699508667, "learning_rate": 2e-05, "loss": 0.03685901, "step": 9368 }, { "epoch": 18.738, "grad_norm": 0.9840115904808044, "learning_rate": 2e-05, "loss": 0.02506851, "step": 9369 }, { "epoch": 18.74, "grad_norm": 1.052154302597046, "learning_rate": 2e-05, "loss": 0.03087404, "step": 9370 }, { "epoch": 18.742, "grad_norm": 1.2569382190704346, "learning_rate": 2e-05, "loss": 0.04886915, "step": 9371 }, { "epoch": 18.744, "grad_norm": 1.5744709968566895, "learning_rate": 2e-05, "loss": 0.04542425, "step": 9372 }, { "epoch": 18.746, "grad_norm": 2.963470458984375, "learning_rate": 2e-05, "loss": 0.03224288, "step": 9373 }, { "epoch": 18.748, "grad_norm": 1.8991296291351318, "learning_rate": 2e-05, "loss": 0.046722, "step": 9374 }, { "epoch": 18.75, "grad_norm": 0.9656346440315247, "learning_rate": 2e-05, "loss": 0.03440123, "step": 9375 }, { "epoch": 18.752, "grad_norm": 1.0450048446655273, "learning_rate": 2e-05, "loss": 0.03775831, "step": 9376 }, { "epoch": 18.754, "grad_norm": 1.7124624252319336, "learning_rate": 2e-05, "loss": 0.05066157, "step": 9377 }, { "epoch": 18.756, "grad_norm": 1.501320242881775, "learning_rate": 2e-05, "loss": 0.03876662, "step": 9378 }, { "epoch": 18.758, "grad_norm": 1.2065678834915161, "learning_rate": 2e-05, "loss": 0.03976551, "step": 9379 }, { "epoch": 18.76, "grad_norm": 1.4827100038528442, "learning_rate": 2e-05, "loss": 0.03961009, "step": 9380 }, { "epoch": 18.762, "grad_norm": 1.3919841051101685, "learning_rate": 2e-05, "loss": 0.0386041, "step": 9381 }, { "epoch": 18.764, "grad_norm": 1.1867378950119019, "learning_rate": 2e-05, "loss": 0.03478377, "step": 9382 }, { "epoch": 18.766, "grad_norm": 1.0938994884490967, "learning_rate": 2e-05, "loss": 0.03122077, "step": 9383 }, { "epoch": 18.768, "grad_norm": 2.3887717723846436, "learning_rate": 2e-05, "loss": 0.04409286, "step": 9384 }, { "epoch": 18.77, "grad_norm": 1.0273598432540894, "learning_rate": 2e-05, "loss": 0.02485752, "step": 9385 }, { "epoch": 18.772, "grad_norm": 1.7304757833480835, "learning_rate": 2e-05, "loss": 0.03049651, "step": 9386 }, { "epoch": 18.774, "grad_norm": 1.9166194200515747, "learning_rate": 2e-05, "loss": 0.04044746, "step": 9387 }, { "epoch": 18.776, "grad_norm": 1.7198108434677124, "learning_rate": 2e-05, "loss": 0.0405063, "step": 9388 }, { "epoch": 18.778, "grad_norm": 1.8082114458084106, "learning_rate": 2e-05, "loss": 0.03769893, "step": 9389 }, { "epoch": 18.78, "grad_norm": 0.9580773115158081, "learning_rate": 2e-05, "loss": 0.03357112, "step": 9390 }, { "epoch": 18.782, "grad_norm": 1.059820532798767, "learning_rate": 2e-05, "loss": 0.02640706, "step": 9391 }, { "epoch": 18.784, "grad_norm": 4.033216953277588, "learning_rate": 2e-05, "loss": 0.03864349, "step": 9392 }, { "epoch": 18.786, "grad_norm": 1.1333658695220947, "learning_rate": 2e-05, "loss": 0.03008582, "step": 9393 }, { "epoch": 18.788, "grad_norm": 2.1095573902130127, "learning_rate": 2e-05, "loss": 0.03836686, "step": 9394 }, { "epoch": 18.79, "grad_norm": 0.7854803204536438, "learning_rate": 2e-05, "loss": 0.01916845, "step": 9395 }, { "epoch": 18.792, "grad_norm": 1.424535870552063, "learning_rate": 2e-05, "loss": 0.03236981, "step": 9396 }, { "epoch": 18.794, "grad_norm": 1.234863042831421, "learning_rate": 2e-05, "loss": 0.0439956, "step": 9397 }, { "epoch": 18.796, "grad_norm": 1.0828542709350586, "learning_rate": 2e-05, "loss": 0.03070717, "step": 9398 }, { "epoch": 18.798000000000002, "grad_norm": 1.125207781791687, "learning_rate": 2e-05, "loss": 0.03891785, "step": 9399 }, { "epoch": 18.8, "grad_norm": 1.4716780185699463, "learning_rate": 2e-05, "loss": 0.04283679, "step": 9400 }, { "epoch": 18.802, "grad_norm": 2.2038402557373047, "learning_rate": 2e-05, "loss": 0.04184093, "step": 9401 }, { "epoch": 18.804, "grad_norm": 2.594099283218384, "learning_rate": 2e-05, "loss": 0.0254508, "step": 9402 }, { "epoch": 18.806, "grad_norm": 0.977802038192749, "learning_rate": 2e-05, "loss": 0.02622373, "step": 9403 }, { "epoch": 18.808, "grad_norm": 1.5012508630752563, "learning_rate": 2e-05, "loss": 0.05293182, "step": 9404 }, { "epoch": 18.81, "grad_norm": 1.1779042482376099, "learning_rate": 2e-05, "loss": 0.03067445, "step": 9405 }, { "epoch": 18.812, "grad_norm": 1.9061391353607178, "learning_rate": 2e-05, "loss": 0.04364157, "step": 9406 }, { "epoch": 18.814, "grad_norm": 1.0848644971847534, "learning_rate": 2e-05, "loss": 0.03666593, "step": 9407 }, { "epoch": 18.816, "grad_norm": 4.1378960609436035, "learning_rate": 2e-05, "loss": 0.03240601, "step": 9408 }, { "epoch": 18.818, "grad_norm": 1.168457269668579, "learning_rate": 2e-05, "loss": 0.03614372, "step": 9409 }, { "epoch": 18.82, "grad_norm": 1.1519936323165894, "learning_rate": 2e-05, "loss": 0.04695103, "step": 9410 }, { "epoch": 18.822, "grad_norm": 1.9238035678863525, "learning_rate": 2e-05, "loss": 0.04975787, "step": 9411 }, { "epoch": 18.824, "grad_norm": 1.304675579071045, "learning_rate": 2e-05, "loss": 0.04074424, "step": 9412 }, { "epoch": 18.826, "grad_norm": 2.158198833465576, "learning_rate": 2e-05, "loss": 0.0502829, "step": 9413 }, { "epoch": 18.828, "grad_norm": 1.3099114894866943, "learning_rate": 2e-05, "loss": 0.04483989, "step": 9414 }, { "epoch": 18.83, "grad_norm": 1.9079896211624146, "learning_rate": 2e-05, "loss": 0.04286634, "step": 9415 }, { "epoch": 18.832, "grad_norm": 1.7033705711364746, "learning_rate": 2e-05, "loss": 0.03799369, "step": 9416 }, { "epoch": 18.834, "grad_norm": 0.991951584815979, "learning_rate": 2e-05, "loss": 0.03253823, "step": 9417 }, { "epoch": 18.836, "grad_norm": 1.4874738454818726, "learning_rate": 2e-05, "loss": 0.05053546, "step": 9418 }, { "epoch": 18.838, "grad_norm": 1.4518730640411377, "learning_rate": 2e-05, "loss": 0.03553774, "step": 9419 }, { "epoch": 18.84, "grad_norm": 1.0209287405014038, "learning_rate": 2e-05, "loss": 0.03443102, "step": 9420 }, { "epoch": 18.842, "grad_norm": 1.0967309474945068, "learning_rate": 2e-05, "loss": 0.02932144, "step": 9421 }, { "epoch": 18.844, "grad_norm": 2.3560574054718018, "learning_rate": 2e-05, "loss": 0.04702324, "step": 9422 }, { "epoch": 18.846, "grad_norm": 1.2899200916290283, "learning_rate": 2e-05, "loss": 0.0305502, "step": 9423 }, { "epoch": 18.848, "grad_norm": 1.8223811388015747, "learning_rate": 2e-05, "loss": 0.05424388, "step": 9424 }, { "epoch": 18.85, "grad_norm": 1.6738855838775635, "learning_rate": 2e-05, "loss": 0.04101273, "step": 9425 }, { "epoch": 18.852, "grad_norm": 1.2631632089614868, "learning_rate": 2e-05, "loss": 0.03289703, "step": 9426 }, { "epoch": 18.854, "grad_norm": 1.1822715997695923, "learning_rate": 2e-05, "loss": 0.03998087, "step": 9427 }, { "epoch": 18.856, "grad_norm": 1.7935429811477661, "learning_rate": 2e-05, "loss": 0.04708499, "step": 9428 }, { "epoch": 18.858, "grad_norm": 1.6989531517028809, "learning_rate": 2e-05, "loss": 0.03702974, "step": 9429 }, { "epoch": 18.86, "grad_norm": 1.411005973815918, "learning_rate": 2e-05, "loss": 0.03301138, "step": 9430 }, { "epoch": 18.862, "grad_norm": 1.575526475906372, "learning_rate": 2e-05, "loss": 0.0518712, "step": 9431 }, { "epoch": 18.864, "grad_norm": 0.8966975808143616, "learning_rate": 2e-05, "loss": 0.02268358, "step": 9432 }, { "epoch": 18.866, "grad_norm": 1.3550505638122559, "learning_rate": 2e-05, "loss": 0.03749962, "step": 9433 }, { "epoch": 18.868, "grad_norm": 1.207885980606079, "learning_rate": 2e-05, "loss": 0.0400594, "step": 9434 }, { "epoch": 18.87, "grad_norm": 1.0403681993484497, "learning_rate": 2e-05, "loss": 0.0228872, "step": 9435 }, { "epoch": 18.872, "grad_norm": 2.362938404083252, "learning_rate": 2e-05, "loss": 0.06088825, "step": 9436 }, { "epoch": 18.874, "grad_norm": 0.9768615365028381, "learning_rate": 2e-05, "loss": 0.03623533, "step": 9437 }, { "epoch": 18.876, "grad_norm": 0.7810379862785339, "learning_rate": 2e-05, "loss": 0.01865301, "step": 9438 }, { "epoch": 18.878, "grad_norm": 1.4565064907073975, "learning_rate": 2e-05, "loss": 0.04857381, "step": 9439 }, { "epoch": 18.88, "grad_norm": 1.5316739082336426, "learning_rate": 2e-05, "loss": 0.04568139, "step": 9440 }, { "epoch": 18.882, "grad_norm": 1.5420665740966797, "learning_rate": 2e-05, "loss": 0.03343572, "step": 9441 }, { "epoch": 18.884, "grad_norm": 1.7624716758728027, "learning_rate": 2e-05, "loss": 0.03562377, "step": 9442 }, { "epoch": 18.886, "grad_norm": 1.3358241319656372, "learning_rate": 2e-05, "loss": 0.06009006, "step": 9443 }, { "epoch": 18.888, "grad_norm": 1.3039106130599976, "learning_rate": 2e-05, "loss": 0.04013005, "step": 9444 }, { "epoch": 18.89, "grad_norm": 0.8214094638824463, "learning_rate": 2e-05, "loss": 0.02453827, "step": 9445 }, { "epoch": 18.892, "grad_norm": 2.3487486839294434, "learning_rate": 2e-05, "loss": 0.04817913, "step": 9446 }, { "epoch": 18.894, "grad_norm": 2.1647088527679443, "learning_rate": 2e-05, "loss": 0.03035383, "step": 9447 }, { "epoch": 18.896, "grad_norm": 1.254131555557251, "learning_rate": 2e-05, "loss": 0.03613605, "step": 9448 }, { "epoch": 18.898, "grad_norm": 1.1320441961288452, "learning_rate": 2e-05, "loss": 0.03620258, "step": 9449 }, { "epoch": 18.9, "grad_norm": 1.6133835315704346, "learning_rate": 2e-05, "loss": 0.03855583, "step": 9450 }, { "epoch": 18.902, "grad_norm": 0.9080222845077515, "learning_rate": 2e-05, "loss": 0.02581986, "step": 9451 }, { "epoch": 18.904, "grad_norm": 1.1648834943771362, "learning_rate": 2e-05, "loss": 0.04252192, "step": 9452 }, { "epoch": 18.906, "grad_norm": 1.1894398927688599, "learning_rate": 2e-05, "loss": 0.03881192, "step": 9453 }, { "epoch": 18.908, "grad_norm": 2.000978469848633, "learning_rate": 2e-05, "loss": 0.03810741, "step": 9454 }, { "epoch": 18.91, "grad_norm": 1.288516879081726, "learning_rate": 2e-05, "loss": 0.0437531, "step": 9455 }, { "epoch": 18.912, "grad_norm": 1.3083631992340088, "learning_rate": 2e-05, "loss": 0.03637118, "step": 9456 }, { "epoch": 18.914, "grad_norm": 1.1857115030288696, "learning_rate": 2e-05, "loss": 0.04493545, "step": 9457 }, { "epoch": 18.916, "grad_norm": 1.9343324899673462, "learning_rate": 2e-05, "loss": 0.04948426, "step": 9458 }, { "epoch": 18.918, "grad_norm": 1.187826156616211, "learning_rate": 2e-05, "loss": 0.03746122, "step": 9459 }, { "epoch": 18.92, "grad_norm": 2.1284165382385254, "learning_rate": 2e-05, "loss": 0.04689957, "step": 9460 }, { "epoch": 18.922, "grad_norm": 1.2563556432724, "learning_rate": 2e-05, "loss": 0.03775541, "step": 9461 }, { "epoch": 18.924, "grad_norm": 1.2920955419540405, "learning_rate": 2e-05, "loss": 0.02915121, "step": 9462 }, { "epoch": 18.926, "grad_norm": 1.910418152809143, "learning_rate": 2e-05, "loss": 0.04141461, "step": 9463 }, { "epoch": 18.928, "grad_norm": 0.9791347980499268, "learning_rate": 2e-05, "loss": 0.03308304, "step": 9464 }, { "epoch": 18.93, "grad_norm": 2.0668258666992188, "learning_rate": 2e-05, "loss": 0.04485352, "step": 9465 }, { "epoch": 18.932, "grad_norm": 0.8224574327468872, "learning_rate": 2e-05, "loss": 0.02453919, "step": 9466 }, { "epoch": 18.934, "grad_norm": 0.975167453289032, "learning_rate": 2e-05, "loss": 0.03143321, "step": 9467 }, { "epoch": 18.936, "grad_norm": 1.0642677545547485, "learning_rate": 2e-05, "loss": 0.03415136, "step": 9468 }, { "epoch": 18.938, "grad_norm": 0.8453977704048157, "learning_rate": 2e-05, "loss": 0.02789571, "step": 9469 }, { "epoch": 18.94, "grad_norm": 2.6224365234375, "learning_rate": 2e-05, "loss": 0.03373963, "step": 9470 }, { "epoch": 18.942, "grad_norm": 1.6013566255569458, "learning_rate": 2e-05, "loss": 0.02791347, "step": 9471 }, { "epoch": 18.944, "grad_norm": 1.6802016496658325, "learning_rate": 2e-05, "loss": 0.04182333, "step": 9472 }, { "epoch": 18.946, "grad_norm": 1.069710373878479, "learning_rate": 2e-05, "loss": 0.03297665, "step": 9473 }, { "epoch": 18.948, "grad_norm": 1.6262844800949097, "learning_rate": 2e-05, "loss": 0.05678248, "step": 9474 }, { "epoch": 18.95, "grad_norm": 1.6136468648910522, "learning_rate": 2e-05, "loss": 0.03860488, "step": 9475 }, { "epoch": 18.951999999999998, "grad_norm": 1.6454910039901733, "learning_rate": 2e-05, "loss": 0.0360266, "step": 9476 }, { "epoch": 18.954, "grad_norm": 1.3030211925506592, "learning_rate": 2e-05, "loss": 0.03737117, "step": 9477 }, { "epoch": 18.956, "grad_norm": 1.0022649765014648, "learning_rate": 2e-05, "loss": 0.03304816, "step": 9478 }, { "epoch": 18.958, "grad_norm": 2.1013195514678955, "learning_rate": 2e-05, "loss": 0.04374126, "step": 9479 }, { "epoch": 18.96, "grad_norm": 0.9510260224342346, "learning_rate": 2e-05, "loss": 0.03056635, "step": 9480 }, { "epoch": 18.962, "grad_norm": 1.7025002241134644, "learning_rate": 2e-05, "loss": 0.04591848, "step": 9481 }, { "epoch": 18.964, "grad_norm": 2.784985303878784, "learning_rate": 2e-05, "loss": 0.04702611, "step": 9482 }, { "epoch": 18.966, "grad_norm": 1.0321415662765503, "learning_rate": 2e-05, "loss": 0.03965008, "step": 9483 }, { "epoch": 18.968, "grad_norm": 1.3998851776123047, "learning_rate": 2e-05, "loss": 0.04656575, "step": 9484 }, { "epoch": 18.97, "grad_norm": 1.053924322128296, "learning_rate": 2e-05, "loss": 0.04150043, "step": 9485 }, { "epoch": 18.972, "grad_norm": 1.0211212635040283, "learning_rate": 2e-05, "loss": 0.03201551, "step": 9486 }, { "epoch": 18.974, "grad_norm": 1.109754204750061, "learning_rate": 2e-05, "loss": 0.03495897, "step": 9487 }, { "epoch": 18.976, "grad_norm": 0.9835900664329529, "learning_rate": 2e-05, "loss": 0.03508302, "step": 9488 }, { "epoch": 18.978, "grad_norm": 1.2372853755950928, "learning_rate": 2e-05, "loss": 0.02763392, "step": 9489 }, { "epoch": 18.98, "grad_norm": 1.1497222185134888, "learning_rate": 2e-05, "loss": 0.04323982, "step": 9490 }, { "epoch": 18.982, "grad_norm": 0.9017625451087952, "learning_rate": 2e-05, "loss": 0.03254882, "step": 9491 }, { "epoch": 18.984, "grad_norm": 1.0615708827972412, "learning_rate": 2e-05, "loss": 0.02894014, "step": 9492 }, { "epoch": 18.986, "grad_norm": 1.5303292274475098, "learning_rate": 2e-05, "loss": 0.0392815, "step": 9493 }, { "epoch": 18.988, "grad_norm": 1.4052002429962158, "learning_rate": 2e-05, "loss": 0.0530249, "step": 9494 }, { "epoch": 18.99, "grad_norm": 1.3222413063049316, "learning_rate": 2e-05, "loss": 0.03406934, "step": 9495 }, { "epoch": 18.992, "grad_norm": 1.365356683731079, "learning_rate": 2e-05, "loss": 0.03618102, "step": 9496 }, { "epoch": 18.994, "grad_norm": 1.526374101638794, "learning_rate": 2e-05, "loss": 0.03938029, "step": 9497 }, { "epoch": 18.996, "grad_norm": 1.2375332117080688, "learning_rate": 2e-05, "loss": 0.04605676, "step": 9498 }, { "epoch": 18.998, "grad_norm": 1.3788480758666992, "learning_rate": 2e-05, "loss": 0.04089382, "step": 9499 }, { "epoch": 19.0, "grad_norm": 1.1874690055847168, "learning_rate": 2e-05, "loss": 0.03390002, "step": 9500 }, { "epoch": 19.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9720558882235529, "Equal_1": 0.994, "Equal_2": 0.9720558882235529, "Equal_3": 0.9620758483033932, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.994, "Perpendicular_2": 0.986, "Perpendicular_3": 0.7935871743486974, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9973333333333333, "PointLiesOnCircle_3": 0.9933333333333333, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9780439121756487 }, "eval_runtime": 319.3618, "eval_samples_per_second": 32.878, "eval_steps_per_second": 0.658, "step": 9500 }, { "epoch": 19.002, "grad_norm": 0.7328594923019409, "learning_rate": 2e-05, "loss": 0.02319442, "step": 9501 }, { "epoch": 19.004, "grad_norm": 1.1689836978912354, "learning_rate": 2e-05, "loss": 0.03362505, "step": 9502 }, { "epoch": 19.006, "grad_norm": 1.2494908571243286, "learning_rate": 2e-05, "loss": 0.04948422, "step": 9503 }, { "epoch": 19.008, "grad_norm": 1.5237233638763428, "learning_rate": 2e-05, "loss": 0.03897671, "step": 9504 }, { "epoch": 19.01, "grad_norm": 1.9613434076309204, "learning_rate": 2e-05, "loss": 0.0261043, "step": 9505 }, { "epoch": 19.012, "grad_norm": 0.9122869372367859, "learning_rate": 2e-05, "loss": 0.0235957, "step": 9506 }, { "epoch": 19.014, "grad_norm": 2.309724807739258, "learning_rate": 2e-05, "loss": 0.04885814, "step": 9507 }, { "epoch": 19.016, "grad_norm": 1.0690571069717407, "learning_rate": 2e-05, "loss": 0.0407089, "step": 9508 }, { "epoch": 19.018, "grad_norm": 1.2194948196411133, "learning_rate": 2e-05, "loss": 0.03530276, "step": 9509 }, { "epoch": 19.02, "grad_norm": 1.3204545974731445, "learning_rate": 2e-05, "loss": 0.03693368, "step": 9510 }, { "epoch": 19.022, "grad_norm": 1.1874244213104248, "learning_rate": 2e-05, "loss": 0.03080766, "step": 9511 }, { "epoch": 19.024, "grad_norm": 2.5260133743286133, "learning_rate": 2e-05, "loss": 0.04506269, "step": 9512 }, { "epoch": 19.026, "grad_norm": 0.8363938331604004, "learning_rate": 2e-05, "loss": 0.02124131, "step": 9513 }, { "epoch": 19.028, "grad_norm": 2.5137112140655518, "learning_rate": 2e-05, "loss": 0.03363831, "step": 9514 }, { "epoch": 19.03, "grad_norm": 1.983594536781311, "learning_rate": 2e-05, "loss": 0.04035079, "step": 9515 }, { "epoch": 19.032, "grad_norm": 2.5143167972564697, "learning_rate": 2e-05, "loss": 0.05136511, "step": 9516 }, { "epoch": 19.034, "grad_norm": 1.130037784576416, "learning_rate": 2e-05, "loss": 0.03966995, "step": 9517 }, { "epoch": 19.036, "grad_norm": 1.764394760131836, "learning_rate": 2e-05, "loss": 0.04287095, "step": 9518 }, { "epoch": 19.038, "grad_norm": 1.359204888343811, "learning_rate": 2e-05, "loss": 0.03334723, "step": 9519 }, { "epoch": 19.04, "grad_norm": 1.1975325345993042, "learning_rate": 2e-05, "loss": 0.03555122, "step": 9520 }, { "epoch": 19.042, "grad_norm": 0.9960451126098633, "learning_rate": 2e-05, "loss": 0.02776124, "step": 9521 }, { "epoch": 19.044, "grad_norm": 1.2696477174758911, "learning_rate": 2e-05, "loss": 0.02704079, "step": 9522 }, { "epoch": 19.046, "grad_norm": 1.127091646194458, "learning_rate": 2e-05, "loss": 0.03138428, "step": 9523 }, { "epoch": 19.048, "grad_norm": 2.801011800765991, "learning_rate": 2e-05, "loss": 0.04420168, "step": 9524 }, { "epoch": 19.05, "grad_norm": 1.672225832939148, "learning_rate": 2e-05, "loss": 0.03045794, "step": 9525 }, { "epoch": 19.052, "grad_norm": 1.3001759052276611, "learning_rate": 2e-05, "loss": 0.03507664, "step": 9526 }, { "epoch": 19.054, "grad_norm": 1.307578444480896, "learning_rate": 2e-05, "loss": 0.03051586, "step": 9527 }, { "epoch": 19.056, "grad_norm": 2.060009479522705, "learning_rate": 2e-05, "loss": 0.04484456, "step": 9528 }, { "epoch": 19.058, "grad_norm": 1.9255805015563965, "learning_rate": 2e-05, "loss": 0.03917917, "step": 9529 }, { "epoch": 19.06, "grad_norm": 0.9870061874389648, "learning_rate": 2e-05, "loss": 0.02856109, "step": 9530 }, { "epoch": 19.062, "grad_norm": 1.398775577545166, "learning_rate": 2e-05, "loss": 0.04870712, "step": 9531 }, { "epoch": 19.064, "grad_norm": 1.2352436780929565, "learning_rate": 2e-05, "loss": 0.04471589, "step": 9532 }, { "epoch": 19.066, "grad_norm": 1.8325438499450684, "learning_rate": 2e-05, "loss": 0.04382673, "step": 9533 }, { "epoch": 19.068, "grad_norm": 1.0861464738845825, "learning_rate": 2e-05, "loss": 0.02810282, "step": 9534 }, { "epoch": 19.07, "grad_norm": 3.1042890548706055, "learning_rate": 2e-05, "loss": 0.04670685, "step": 9535 }, { "epoch": 19.072, "grad_norm": 1.2983977794647217, "learning_rate": 2e-05, "loss": 0.03450656, "step": 9536 }, { "epoch": 19.074, "grad_norm": 2.436805248260498, "learning_rate": 2e-05, "loss": 0.03273325, "step": 9537 }, { "epoch": 19.076, "grad_norm": 1.8270798921585083, "learning_rate": 2e-05, "loss": 0.05142706, "step": 9538 }, { "epoch": 19.078, "grad_norm": 1.7410194873809814, "learning_rate": 2e-05, "loss": 0.04839787, "step": 9539 }, { "epoch": 19.08, "grad_norm": 0.7884871959686279, "learning_rate": 2e-05, "loss": 0.02119577, "step": 9540 }, { "epoch": 19.082, "grad_norm": 1.4480359554290771, "learning_rate": 2e-05, "loss": 0.03697205, "step": 9541 }, { "epoch": 19.084, "grad_norm": 0.9422207474708557, "learning_rate": 2e-05, "loss": 0.02769065, "step": 9542 }, { "epoch": 19.086, "grad_norm": 0.791770339012146, "learning_rate": 2e-05, "loss": 0.01657294, "step": 9543 }, { "epoch": 19.088, "grad_norm": 1.4777929782867432, "learning_rate": 2e-05, "loss": 0.04559475, "step": 9544 }, { "epoch": 19.09, "grad_norm": 1.4669833183288574, "learning_rate": 2e-05, "loss": 0.03167301, "step": 9545 }, { "epoch": 19.092, "grad_norm": 1.2533551454544067, "learning_rate": 2e-05, "loss": 0.02627946, "step": 9546 }, { "epoch": 19.094, "grad_norm": 1.4709750413894653, "learning_rate": 2e-05, "loss": 0.04461734, "step": 9547 }, { "epoch": 19.096, "grad_norm": 1.149090051651001, "learning_rate": 2e-05, "loss": 0.03871008, "step": 9548 }, { "epoch": 19.098, "grad_norm": 1.1180267333984375, "learning_rate": 2e-05, "loss": 0.03474029, "step": 9549 }, { "epoch": 19.1, "grad_norm": 1.3500217199325562, "learning_rate": 2e-05, "loss": 0.02843937, "step": 9550 }, { "epoch": 19.102, "grad_norm": 1.7513254880905151, "learning_rate": 2e-05, "loss": 0.0540585, "step": 9551 }, { "epoch": 19.104, "grad_norm": 1.781827688217163, "learning_rate": 2e-05, "loss": 0.04126269, "step": 9552 }, { "epoch": 19.106, "grad_norm": 1.1269136667251587, "learning_rate": 2e-05, "loss": 0.03399577, "step": 9553 }, { "epoch": 19.108, "grad_norm": 1.8535258769989014, "learning_rate": 2e-05, "loss": 0.05306114, "step": 9554 }, { "epoch": 19.11, "grad_norm": 2.0714800357818604, "learning_rate": 2e-05, "loss": 0.0468117, "step": 9555 }, { "epoch": 19.112, "grad_norm": 1.0497852563858032, "learning_rate": 2e-05, "loss": 0.02673732, "step": 9556 }, { "epoch": 19.114, "grad_norm": 0.9615012407302856, "learning_rate": 2e-05, "loss": 0.02658118, "step": 9557 }, { "epoch": 19.116, "grad_norm": 3.7830493450164795, "learning_rate": 2e-05, "loss": 0.04481242, "step": 9558 }, { "epoch": 19.118, "grad_norm": 1.9422591924667358, "learning_rate": 2e-05, "loss": 0.05550155, "step": 9559 }, { "epoch": 19.12, "grad_norm": 1.294343113899231, "learning_rate": 2e-05, "loss": 0.03953799, "step": 9560 }, { "epoch": 19.122, "grad_norm": 1.5340063571929932, "learning_rate": 2e-05, "loss": 0.03284139, "step": 9561 }, { "epoch": 19.124, "grad_norm": 1.1982046365737915, "learning_rate": 2e-05, "loss": 0.04478461, "step": 9562 }, { "epoch": 19.126, "grad_norm": 1.1274042129516602, "learning_rate": 2e-05, "loss": 0.03823815, "step": 9563 }, { "epoch": 19.128, "grad_norm": 1.3324651718139648, "learning_rate": 2e-05, "loss": 0.04417039, "step": 9564 }, { "epoch": 19.13, "grad_norm": 3.788226366043091, "learning_rate": 2e-05, "loss": 0.04432316, "step": 9565 }, { "epoch": 19.132, "grad_norm": 1.0010766983032227, "learning_rate": 2e-05, "loss": 0.02808233, "step": 9566 }, { "epoch": 19.134, "grad_norm": 1.5182632207870483, "learning_rate": 2e-05, "loss": 0.04026213, "step": 9567 }, { "epoch": 19.136, "grad_norm": 1.1371148824691772, "learning_rate": 2e-05, "loss": 0.03047445, "step": 9568 }, { "epoch": 19.138, "grad_norm": 1.003008246421814, "learning_rate": 2e-05, "loss": 0.03161223, "step": 9569 }, { "epoch": 19.14, "grad_norm": 1.9005447626113892, "learning_rate": 2e-05, "loss": 0.04390565, "step": 9570 }, { "epoch": 19.142, "grad_norm": 1.1964303255081177, "learning_rate": 2e-05, "loss": 0.03318737, "step": 9571 }, { "epoch": 19.144, "grad_norm": 1.471183180809021, "learning_rate": 2e-05, "loss": 0.04245757, "step": 9572 }, { "epoch": 19.146, "grad_norm": 1.0873215198516846, "learning_rate": 2e-05, "loss": 0.03344093, "step": 9573 }, { "epoch": 19.148, "grad_norm": 1.1384050846099854, "learning_rate": 2e-05, "loss": 0.03804214, "step": 9574 }, { "epoch": 19.15, "grad_norm": 1.8956702947616577, "learning_rate": 2e-05, "loss": 0.04579899, "step": 9575 }, { "epoch": 19.152, "grad_norm": 0.9021958112716675, "learning_rate": 2e-05, "loss": 0.02364812, "step": 9576 }, { "epoch": 19.154, "grad_norm": 0.9730253219604492, "learning_rate": 2e-05, "loss": 0.0266248, "step": 9577 }, { "epoch": 19.156, "grad_norm": 1.2630407810211182, "learning_rate": 2e-05, "loss": 0.03151676, "step": 9578 }, { "epoch": 19.158, "grad_norm": 1.0007412433624268, "learning_rate": 2e-05, "loss": 0.03561787, "step": 9579 }, { "epoch": 19.16, "grad_norm": 1.62027907371521, "learning_rate": 2e-05, "loss": 0.05210748, "step": 9580 }, { "epoch": 19.162, "grad_norm": 0.950829029083252, "learning_rate": 2e-05, "loss": 0.03114957, "step": 9581 }, { "epoch": 19.164, "grad_norm": 1.3806723356246948, "learning_rate": 2e-05, "loss": 0.04665224, "step": 9582 }, { "epoch": 19.166, "grad_norm": 4.24110221862793, "learning_rate": 2e-05, "loss": 0.03417417, "step": 9583 }, { "epoch": 19.168, "grad_norm": 1.3316015005111694, "learning_rate": 2e-05, "loss": 0.04237635, "step": 9584 }, { "epoch": 19.17, "grad_norm": 1.7277883291244507, "learning_rate": 2e-05, "loss": 0.03498271, "step": 9585 }, { "epoch": 19.172, "grad_norm": 1.2385425567626953, "learning_rate": 2e-05, "loss": 0.03356936, "step": 9586 }, { "epoch": 19.174, "grad_norm": 0.8457931280136108, "learning_rate": 2e-05, "loss": 0.01994565, "step": 9587 }, { "epoch": 19.176, "grad_norm": 1.2455838918685913, "learning_rate": 2e-05, "loss": 0.03502472, "step": 9588 }, { "epoch": 19.178, "grad_norm": 1.9572054147720337, "learning_rate": 2e-05, "loss": 0.03634415, "step": 9589 }, { "epoch": 19.18, "grad_norm": 1.2908679246902466, "learning_rate": 2e-05, "loss": 0.03044868, "step": 9590 }, { "epoch": 19.182, "grad_norm": 2.5042431354522705, "learning_rate": 2e-05, "loss": 0.04156593, "step": 9591 }, { "epoch": 19.184, "grad_norm": 1.0194517374038696, "learning_rate": 2e-05, "loss": 0.02526481, "step": 9592 }, { "epoch": 19.186, "grad_norm": 1.0619885921478271, "learning_rate": 2e-05, "loss": 0.0298758, "step": 9593 }, { "epoch": 19.188, "grad_norm": 1.263814926147461, "learning_rate": 2e-05, "loss": 0.03626692, "step": 9594 }, { "epoch": 19.19, "grad_norm": 2.5439209938049316, "learning_rate": 2e-05, "loss": 0.05030162, "step": 9595 }, { "epoch": 19.192, "grad_norm": 1.6095026731491089, "learning_rate": 2e-05, "loss": 0.04484971, "step": 9596 }, { "epoch": 19.194, "grad_norm": 1.921481728553772, "learning_rate": 2e-05, "loss": 0.05408298, "step": 9597 }, { "epoch": 19.196, "grad_norm": 2.8024349212646484, "learning_rate": 2e-05, "loss": 0.05455287, "step": 9598 }, { "epoch": 19.198, "grad_norm": 0.9602892398834229, "learning_rate": 2e-05, "loss": 0.02859609, "step": 9599 }, { "epoch": 19.2, "grad_norm": 1.6992692947387695, "learning_rate": 2e-05, "loss": 0.03378526, "step": 9600 }, { "epoch": 19.202, "grad_norm": 2.3561947345733643, "learning_rate": 2e-05, "loss": 0.03298889, "step": 9601 }, { "epoch": 19.204, "grad_norm": 1.4221720695495605, "learning_rate": 2e-05, "loss": 0.04421243, "step": 9602 }, { "epoch": 19.206, "grad_norm": 1.0201853513717651, "learning_rate": 2e-05, "loss": 0.02438156, "step": 9603 }, { "epoch": 19.208, "grad_norm": 0.9587017893791199, "learning_rate": 2e-05, "loss": 0.02957552, "step": 9604 }, { "epoch": 19.21, "grad_norm": 1.138653039932251, "learning_rate": 2e-05, "loss": 0.02991896, "step": 9605 }, { "epoch": 19.212, "grad_norm": 2.0702502727508545, "learning_rate": 2e-05, "loss": 0.03139401, "step": 9606 }, { "epoch": 19.214, "grad_norm": 1.0454638004302979, "learning_rate": 2e-05, "loss": 0.02659562, "step": 9607 }, { "epoch": 19.216, "grad_norm": 2.350715160369873, "learning_rate": 2e-05, "loss": 0.05148138, "step": 9608 }, { "epoch": 19.218, "grad_norm": 0.7986869812011719, "learning_rate": 2e-05, "loss": 0.02145108, "step": 9609 }, { "epoch": 19.22, "grad_norm": 1.9818875789642334, "learning_rate": 2e-05, "loss": 0.04348661, "step": 9610 }, { "epoch": 19.222, "grad_norm": 1.72350013256073, "learning_rate": 2e-05, "loss": 0.03537084, "step": 9611 }, { "epoch": 19.224, "grad_norm": 0.917288601398468, "learning_rate": 2e-05, "loss": 0.02850287, "step": 9612 }, { "epoch": 19.226, "grad_norm": 2.157785177230835, "learning_rate": 2e-05, "loss": 0.04212007, "step": 9613 }, { "epoch": 19.228, "grad_norm": 1.6661065816879272, "learning_rate": 2e-05, "loss": 0.04836546, "step": 9614 }, { "epoch": 19.23, "grad_norm": 1.0794106721878052, "learning_rate": 2e-05, "loss": 0.0291114, "step": 9615 }, { "epoch": 19.232, "grad_norm": 1.1897625923156738, "learning_rate": 2e-05, "loss": 0.04130687, "step": 9616 }, { "epoch": 19.234, "grad_norm": 1.4216676950454712, "learning_rate": 2e-05, "loss": 0.03022526, "step": 9617 }, { "epoch": 19.236, "grad_norm": 1.3475738763809204, "learning_rate": 2e-05, "loss": 0.04719062, "step": 9618 }, { "epoch": 19.238, "grad_norm": 1.4195927381515503, "learning_rate": 2e-05, "loss": 0.03779185, "step": 9619 }, { "epoch": 19.24, "grad_norm": 1.1476095914840698, "learning_rate": 2e-05, "loss": 0.0434536, "step": 9620 }, { "epoch": 19.242, "grad_norm": 1.682695984840393, "learning_rate": 2e-05, "loss": 0.05591667, "step": 9621 }, { "epoch": 19.244, "grad_norm": 1.1905351877212524, "learning_rate": 2e-05, "loss": 0.0391013, "step": 9622 }, { "epoch": 19.246, "grad_norm": 1.3827733993530273, "learning_rate": 2e-05, "loss": 0.02527856, "step": 9623 }, { "epoch": 19.248, "grad_norm": 1.235648512840271, "learning_rate": 2e-05, "loss": 0.03677778, "step": 9624 }, { "epoch": 19.25, "grad_norm": 2.980347156524658, "learning_rate": 2e-05, "loss": 0.06639814, "step": 9625 }, { "epoch": 19.252, "grad_norm": 1.079358458518982, "learning_rate": 2e-05, "loss": 0.0342607, "step": 9626 }, { "epoch": 19.254, "grad_norm": 1.3070192337036133, "learning_rate": 2e-05, "loss": 0.03877965, "step": 9627 }, { "epoch": 19.256, "grad_norm": 1.132627010345459, "learning_rate": 2e-05, "loss": 0.03127551, "step": 9628 }, { "epoch": 19.258, "grad_norm": 1.1576032638549805, "learning_rate": 2e-05, "loss": 0.03139985, "step": 9629 }, { "epoch": 19.26, "grad_norm": 1.2397043704986572, "learning_rate": 2e-05, "loss": 0.04228866, "step": 9630 }, { "epoch": 19.262, "grad_norm": 1.0662872791290283, "learning_rate": 2e-05, "loss": 0.03128354, "step": 9631 }, { "epoch": 19.264, "grad_norm": 3.369060754776001, "learning_rate": 2e-05, "loss": 0.03271961, "step": 9632 }, { "epoch": 19.266, "grad_norm": 1.7384709119796753, "learning_rate": 2e-05, "loss": 0.04713779, "step": 9633 }, { "epoch": 19.268, "grad_norm": 1.8644956350326538, "learning_rate": 2e-05, "loss": 0.03739468, "step": 9634 }, { "epoch": 19.27, "grad_norm": 1.260280966758728, "learning_rate": 2e-05, "loss": 0.0264209, "step": 9635 }, { "epoch": 19.272, "grad_norm": 1.2313249111175537, "learning_rate": 2e-05, "loss": 0.03959552, "step": 9636 }, { "epoch": 19.274, "grad_norm": 1.3005576133728027, "learning_rate": 2e-05, "loss": 0.03051363, "step": 9637 }, { "epoch": 19.276, "grad_norm": 1.6750763654708862, "learning_rate": 2e-05, "loss": 0.05048709, "step": 9638 }, { "epoch": 19.278, "grad_norm": 1.6315701007843018, "learning_rate": 2e-05, "loss": 0.02604922, "step": 9639 }, { "epoch": 19.28, "grad_norm": 1.7863506078720093, "learning_rate": 2e-05, "loss": 0.03610089, "step": 9640 }, { "epoch": 19.282, "grad_norm": 1.3061033487319946, "learning_rate": 2e-05, "loss": 0.04164078, "step": 9641 }, { "epoch": 19.284, "grad_norm": 1.4289368391036987, "learning_rate": 2e-05, "loss": 0.02338367, "step": 9642 }, { "epoch": 19.286, "grad_norm": 1.3021084070205688, "learning_rate": 2e-05, "loss": 0.04391333, "step": 9643 }, { "epoch": 19.288, "grad_norm": 1.178717851638794, "learning_rate": 2e-05, "loss": 0.03647342, "step": 9644 }, { "epoch": 19.29, "grad_norm": 0.8449578881263733, "learning_rate": 2e-05, "loss": 0.02405243, "step": 9645 }, { "epoch": 19.292, "grad_norm": 1.4764927625656128, "learning_rate": 2e-05, "loss": 0.05233892, "step": 9646 }, { "epoch": 19.294, "grad_norm": 1.8029993772506714, "learning_rate": 2e-05, "loss": 0.03435803, "step": 9647 }, { "epoch": 19.296, "grad_norm": 1.4311555624008179, "learning_rate": 2e-05, "loss": 0.03782001, "step": 9648 }, { "epoch": 19.298, "grad_norm": 1.300187587738037, "learning_rate": 2e-05, "loss": 0.05648835, "step": 9649 }, { "epoch": 19.3, "grad_norm": 1.4080634117126465, "learning_rate": 2e-05, "loss": 0.03893708, "step": 9650 }, { "epoch": 19.302, "grad_norm": 2.201977014541626, "learning_rate": 2e-05, "loss": 0.04349912, "step": 9651 }, { "epoch": 19.304, "grad_norm": 0.9720236659049988, "learning_rate": 2e-05, "loss": 0.02697369, "step": 9652 }, { "epoch": 19.306, "grad_norm": 1.6441378593444824, "learning_rate": 2e-05, "loss": 0.05696252, "step": 9653 }, { "epoch": 19.308, "grad_norm": 1.6382756233215332, "learning_rate": 2e-05, "loss": 0.03626692, "step": 9654 }, { "epoch": 19.31, "grad_norm": 1.034172534942627, "learning_rate": 2e-05, "loss": 0.03107112, "step": 9655 }, { "epoch": 19.312, "grad_norm": 1.183516263961792, "learning_rate": 2e-05, "loss": 0.03426999, "step": 9656 }, { "epoch": 19.314, "grad_norm": 1.6998095512390137, "learning_rate": 2e-05, "loss": 0.03958786, "step": 9657 }, { "epoch": 19.316, "grad_norm": 1.598720669746399, "learning_rate": 2e-05, "loss": 0.03835572, "step": 9658 }, { "epoch": 19.318, "grad_norm": 1.1745725870132446, "learning_rate": 2e-05, "loss": 0.03925322, "step": 9659 }, { "epoch": 19.32, "grad_norm": 1.0180588960647583, "learning_rate": 2e-05, "loss": 0.03550194, "step": 9660 }, { "epoch": 19.322, "grad_norm": 0.9355264902114868, "learning_rate": 2e-05, "loss": 0.03325229, "step": 9661 }, { "epoch": 19.324, "grad_norm": 1.2007991075515747, "learning_rate": 2e-05, "loss": 0.04493001, "step": 9662 }, { "epoch": 19.326, "grad_norm": 0.86708664894104, "learning_rate": 2e-05, "loss": 0.02545637, "step": 9663 }, { "epoch": 19.328, "grad_norm": 1.0623421669006348, "learning_rate": 2e-05, "loss": 0.03543958, "step": 9664 }, { "epoch": 19.33, "grad_norm": 1.6305174827575684, "learning_rate": 2e-05, "loss": 0.04481956, "step": 9665 }, { "epoch": 19.332, "grad_norm": 2.0141186714172363, "learning_rate": 2e-05, "loss": 0.04622241, "step": 9666 }, { "epoch": 19.334, "grad_norm": 1.1043932437896729, "learning_rate": 2e-05, "loss": 0.03127149, "step": 9667 }, { "epoch": 19.336, "grad_norm": 1.262362003326416, "learning_rate": 2e-05, "loss": 0.02693222, "step": 9668 }, { "epoch": 19.338, "grad_norm": 2.6822168827056885, "learning_rate": 2e-05, "loss": 0.03670801, "step": 9669 }, { "epoch": 19.34, "grad_norm": 1.137508749961853, "learning_rate": 2e-05, "loss": 0.03024655, "step": 9670 }, { "epoch": 19.342, "grad_norm": 1.265586018562317, "learning_rate": 2e-05, "loss": 0.03883039, "step": 9671 }, { "epoch": 19.344, "grad_norm": 1.6028434038162231, "learning_rate": 2e-05, "loss": 0.04389777, "step": 9672 }, { "epoch": 19.346, "grad_norm": 2.5654678344726562, "learning_rate": 2e-05, "loss": 0.049094, "step": 9673 }, { "epoch": 19.348, "grad_norm": 1.247218370437622, "learning_rate": 2e-05, "loss": 0.04227669, "step": 9674 }, { "epoch": 19.35, "grad_norm": 0.9476075768470764, "learning_rate": 2e-05, "loss": 0.02782707, "step": 9675 }, { "epoch": 19.352, "grad_norm": 1.2325526475906372, "learning_rate": 2e-05, "loss": 0.02918253, "step": 9676 }, { "epoch": 19.354, "grad_norm": 1.1856415271759033, "learning_rate": 2e-05, "loss": 0.0287297, "step": 9677 }, { "epoch": 19.356, "grad_norm": 0.832682728767395, "learning_rate": 2e-05, "loss": 0.02484965, "step": 9678 }, { "epoch": 19.358, "grad_norm": 2.037794828414917, "learning_rate": 2e-05, "loss": 0.0416809, "step": 9679 }, { "epoch": 19.36, "grad_norm": 1.2113049030303955, "learning_rate": 2e-05, "loss": 0.03574871, "step": 9680 }, { "epoch": 19.362, "grad_norm": 1.0858124494552612, "learning_rate": 2e-05, "loss": 0.03182752, "step": 9681 }, { "epoch": 19.364, "grad_norm": 1.3244341611862183, "learning_rate": 2e-05, "loss": 0.0379288, "step": 9682 }, { "epoch": 19.366, "grad_norm": 0.96357661485672, "learning_rate": 2e-05, "loss": 0.03285439, "step": 9683 }, { "epoch": 19.368, "grad_norm": 0.9765337109565735, "learning_rate": 2e-05, "loss": 0.02620794, "step": 9684 }, { "epoch": 19.37, "grad_norm": 0.8831155300140381, "learning_rate": 2e-05, "loss": 0.02626178, "step": 9685 }, { "epoch": 19.372, "grad_norm": 1.1981676816940308, "learning_rate": 2e-05, "loss": 0.02839826, "step": 9686 }, { "epoch": 19.374, "grad_norm": 3.4404029846191406, "learning_rate": 2e-05, "loss": 0.04103406, "step": 9687 }, { "epoch": 19.376, "grad_norm": 1.0752460956573486, "learning_rate": 2e-05, "loss": 0.02952642, "step": 9688 }, { "epoch": 19.378, "grad_norm": 1.3563824892044067, "learning_rate": 2e-05, "loss": 0.0425565, "step": 9689 }, { "epoch": 19.38, "grad_norm": 1.359243631362915, "learning_rate": 2e-05, "loss": 0.02972773, "step": 9690 }, { "epoch": 19.382, "grad_norm": 1.3436927795410156, "learning_rate": 2e-05, "loss": 0.04154352, "step": 9691 }, { "epoch": 19.384, "grad_norm": 1.4160605669021606, "learning_rate": 2e-05, "loss": 0.04017557, "step": 9692 }, { "epoch": 19.386, "grad_norm": 2.5221076011657715, "learning_rate": 2e-05, "loss": 0.04263007, "step": 9693 }, { "epoch": 19.388, "grad_norm": 1.3207120895385742, "learning_rate": 2e-05, "loss": 0.04798321, "step": 9694 }, { "epoch": 19.39, "grad_norm": 1.1256715059280396, "learning_rate": 2e-05, "loss": 0.03117888, "step": 9695 }, { "epoch": 19.392, "grad_norm": 1.1344276666641235, "learning_rate": 2e-05, "loss": 0.03253318, "step": 9696 }, { "epoch": 19.394, "grad_norm": 1.3823455572128296, "learning_rate": 2e-05, "loss": 0.03994993, "step": 9697 }, { "epoch": 19.396, "grad_norm": 2.88213849067688, "learning_rate": 2e-05, "loss": 0.03269661, "step": 9698 }, { "epoch": 19.398, "grad_norm": 1.369133472442627, "learning_rate": 2e-05, "loss": 0.04676585, "step": 9699 }, { "epoch": 19.4, "grad_norm": 1.3740583658218384, "learning_rate": 2e-05, "loss": 0.03353403, "step": 9700 }, { "epoch": 19.402, "grad_norm": 1.107534646987915, "learning_rate": 2e-05, "loss": 0.03524677, "step": 9701 }, { "epoch": 19.404, "grad_norm": 1.0297796726226807, "learning_rate": 2e-05, "loss": 0.03444082, "step": 9702 }, { "epoch": 19.406, "grad_norm": 1.664433240890503, "learning_rate": 2e-05, "loss": 0.04659187, "step": 9703 }, { "epoch": 19.408, "grad_norm": 1.3409240245819092, "learning_rate": 2e-05, "loss": 0.03735002, "step": 9704 }, { "epoch": 19.41, "grad_norm": 2.016294479370117, "learning_rate": 2e-05, "loss": 0.03130295, "step": 9705 }, { "epoch": 19.412, "grad_norm": 2.1538240909576416, "learning_rate": 2e-05, "loss": 0.05228285, "step": 9706 }, { "epoch": 19.414, "grad_norm": 1.5620522499084473, "learning_rate": 2e-05, "loss": 0.03496569, "step": 9707 }, { "epoch": 19.416, "grad_norm": 2.113233804702759, "learning_rate": 2e-05, "loss": 0.03931812, "step": 9708 }, { "epoch": 19.418, "grad_norm": 1.4287680387496948, "learning_rate": 2e-05, "loss": 0.02812807, "step": 9709 }, { "epoch": 19.42, "grad_norm": 1.4849001169204712, "learning_rate": 2e-05, "loss": 0.03003873, "step": 9710 }, { "epoch": 19.422, "grad_norm": 2.0336673259735107, "learning_rate": 2e-05, "loss": 0.05090691, "step": 9711 }, { "epoch": 19.424, "grad_norm": 1.0571931600570679, "learning_rate": 2e-05, "loss": 0.0383536, "step": 9712 }, { "epoch": 19.426, "grad_norm": 0.924149751663208, "learning_rate": 2e-05, "loss": 0.02016873, "step": 9713 }, { "epoch": 19.428, "grad_norm": 1.2180832624435425, "learning_rate": 2e-05, "loss": 0.02738358, "step": 9714 }, { "epoch": 19.43, "grad_norm": 3.771353006362915, "learning_rate": 2e-05, "loss": 0.03492292, "step": 9715 }, { "epoch": 19.432, "grad_norm": 0.9769635796546936, "learning_rate": 2e-05, "loss": 0.0286561, "step": 9716 }, { "epoch": 19.434, "grad_norm": 1.5093841552734375, "learning_rate": 2e-05, "loss": 0.03438292, "step": 9717 }, { "epoch": 19.436, "grad_norm": 1.6384273767471313, "learning_rate": 2e-05, "loss": 0.03842306, "step": 9718 }, { "epoch": 19.438, "grad_norm": 1.3384544849395752, "learning_rate": 2e-05, "loss": 0.03926391, "step": 9719 }, { "epoch": 19.44, "grad_norm": 1.1322165727615356, "learning_rate": 2e-05, "loss": 0.04284387, "step": 9720 }, { "epoch": 19.442, "grad_norm": 1.9500755071640015, "learning_rate": 2e-05, "loss": 0.03185847, "step": 9721 }, { "epoch": 19.444, "grad_norm": 2.0812883377075195, "learning_rate": 2e-05, "loss": 0.05557717, "step": 9722 }, { "epoch": 19.446, "grad_norm": 0.8484706878662109, "learning_rate": 2e-05, "loss": 0.01811722, "step": 9723 }, { "epoch": 19.448, "grad_norm": 1.0412821769714355, "learning_rate": 2e-05, "loss": 0.0315608, "step": 9724 }, { "epoch": 19.45, "grad_norm": 1.6890451908111572, "learning_rate": 2e-05, "loss": 0.0433359, "step": 9725 }, { "epoch": 19.452, "grad_norm": 1.1269593238830566, "learning_rate": 2e-05, "loss": 0.0354099, "step": 9726 }, { "epoch": 19.454, "grad_norm": 1.0882933139801025, "learning_rate": 2e-05, "loss": 0.02488111, "step": 9727 }, { "epoch": 19.456, "grad_norm": 2.212448835372925, "learning_rate": 2e-05, "loss": 0.0372538, "step": 9728 }, { "epoch": 19.458, "grad_norm": 1.6880683898925781, "learning_rate": 2e-05, "loss": 0.03757115, "step": 9729 }, { "epoch": 19.46, "grad_norm": 1.3081451654434204, "learning_rate": 2e-05, "loss": 0.04177973, "step": 9730 }, { "epoch": 19.462, "grad_norm": 1.5396312475204468, "learning_rate": 2e-05, "loss": 0.0427272, "step": 9731 }, { "epoch": 19.464, "grad_norm": 1.6483396291732788, "learning_rate": 2e-05, "loss": 0.04573702, "step": 9732 }, { "epoch": 19.466, "grad_norm": 2.9675261974334717, "learning_rate": 2e-05, "loss": 0.05595337, "step": 9733 }, { "epoch": 19.468, "grad_norm": 1.0395759344100952, "learning_rate": 2e-05, "loss": 0.03035801, "step": 9734 }, { "epoch": 19.47, "grad_norm": 1.7170274257659912, "learning_rate": 2e-05, "loss": 0.04833669, "step": 9735 }, { "epoch": 19.472, "grad_norm": 0.9310649633407593, "learning_rate": 2e-05, "loss": 0.03122351, "step": 9736 }, { "epoch": 19.474, "grad_norm": 1.8986892700195312, "learning_rate": 2e-05, "loss": 0.0391847, "step": 9737 }, { "epoch": 19.476, "grad_norm": 1.0801862478256226, "learning_rate": 2e-05, "loss": 0.04195695, "step": 9738 }, { "epoch": 19.478, "grad_norm": 2.033712148666382, "learning_rate": 2e-05, "loss": 0.03731752, "step": 9739 }, { "epoch": 19.48, "grad_norm": 1.0139687061309814, "learning_rate": 2e-05, "loss": 0.03789478, "step": 9740 }, { "epoch": 19.482, "grad_norm": 1.770564317703247, "learning_rate": 2e-05, "loss": 0.04420698, "step": 9741 }, { "epoch": 19.484, "grad_norm": 1.256616234779358, "learning_rate": 2e-05, "loss": 0.03891748, "step": 9742 }, { "epoch": 19.486, "grad_norm": 1.4175989627838135, "learning_rate": 2e-05, "loss": 0.03451204, "step": 9743 }, { "epoch": 19.488, "grad_norm": 1.5900920629501343, "learning_rate": 2e-05, "loss": 0.05277488, "step": 9744 }, { "epoch": 19.49, "grad_norm": 1.8523023128509521, "learning_rate": 2e-05, "loss": 0.04177346, "step": 9745 }, { "epoch": 19.492, "grad_norm": 1.2272021770477295, "learning_rate": 2e-05, "loss": 0.02621971, "step": 9746 }, { "epoch": 19.494, "grad_norm": 2.3542540073394775, "learning_rate": 2e-05, "loss": 0.04458754, "step": 9747 }, { "epoch": 19.496, "grad_norm": 1.0178302526474, "learning_rate": 2e-05, "loss": 0.02959592, "step": 9748 }, { "epoch": 19.498, "grad_norm": 1.0469385385513306, "learning_rate": 2e-05, "loss": 0.03299112, "step": 9749 }, { "epoch": 19.5, "grad_norm": 1.7686867713928223, "learning_rate": 2e-05, "loss": 0.03709473, "step": 9750 }, { "epoch": 19.502, "grad_norm": 1.1636873483657837, "learning_rate": 2e-05, "loss": 0.0360143, "step": 9751 }, { "epoch": 19.504, "grad_norm": 1.5657356977462769, "learning_rate": 2e-05, "loss": 0.02885246, "step": 9752 }, { "epoch": 19.506, "grad_norm": 1.2220760583877563, "learning_rate": 2e-05, "loss": 0.03826221, "step": 9753 }, { "epoch": 19.508, "grad_norm": 1.124636173248291, "learning_rate": 2e-05, "loss": 0.03203499, "step": 9754 }, { "epoch": 19.51, "grad_norm": 1.681024193763733, "learning_rate": 2e-05, "loss": 0.03180341, "step": 9755 }, { "epoch": 19.512, "grad_norm": 2.359029531478882, "learning_rate": 2e-05, "loss": 0.04386077, "step": 9756 }, { "epoch": 19.514, "grad_norm": 1.4641876220703125, "learning_rate": 2e-05, "loss": 0.03449754, "step": 9757 }, { "epoch": 19.516, "grad_norm": 1.5766197443008423, "learning_rate": 2e-05, "loss": 0.02979608, "step": 9758 }, { "epoch": 19.518, "grad_norm": 2.0075721740722656, "learning_rate": 2e-05, "loss": 0.0430277, "step": 9759 }, { "epoch": 19.52, "grad_norm": 1.359182596206665, "learning_rate": 2e-05, "loss": 0.03539016, "step": 9760 }, { "epoch": 19.522, "grad_norm": 1.0913418531417847, "learning_rate": 2e-05, "loss": 0.03243357, "step": 9761 }, { "epoch": 19.524, "grad_norm": 1.3150811195373535, "learning_rate": 2e-05, "loss": 0.03395069, "step": 9762 }, { "epoch": 19.526, "grad_norm": 1.6575753688812256, "learning_rate": 2e-05, "loss": 0.03828706, "step": 9763 }, { "epoch": 19.528, "grad_norm": 1.700451374053955, "learning_rate": 2e-05, "loss": 0.05022026, "step": 9764 }, { "epoch": 19.53, "grad_norm": 1.0677083730697632, "learning_rate": 2e-05, "loss": 0.03512496, "step": 9765 }, { "epoch": 19.532, "grad_norm": 1.0207821130752563, "learning_rate": 2e-05, "loss": 0.03350664, "step": 9766 }, { "epoch": 19.534, "grad_norm": 1.220189094543457, "learning_rate": 2e-05, "loss": 0.03028592, "step": 9767 }, { "epoch": 19.536, "grad_norm": 1.0478119850158691, "learning_rate": 2e-05, "loss": 0.02295764, "step": 9768 }, { "epoch": 19.538, "grad_norm": 1.4206011295318604, "learning_rate": 2e-05, "loss": 0.04312293, "step": 9769 }, { "epoch": 19.54, "grad_norm": 2.566202163696289, "learning_rate": 2e-05, "loss": 0.04163282, "step": 9770 }, { "epoch": 19.542, "grad_norm": 1.8683357238769531, "learning_rate": 2e-05, "loss": 0.02420815, "step": 9771 }, { "epoch": 19.544, "grad_norm": 1.4970320463180542, "learning_rate": 2e-05, "loss": 0.04130397, "step": 9772 }, { "epoch": 19.546, "grad_norm": 1.3153479099273682, "learning_rate": 2e-05, "loss": 0.04295473, "step": 9773 }, { "epoch": 19.548000000000002, "grad_norm": 1.5751969814300537, "learning_rate": 2e-05, "loss": 0.05208491, "step": 9774 }, { "epoch": 19.55, "grad_norm": 1.0567619800567627, "learning_rate": 2e-05, "loss": 0.02717445, "step": 9775 }, { "epoch": 19.552, "grad_norm": 1.5886588096618652, "learning_rate": 2e-05, "loss": 0.05369675, "step": 9776 }, { "epoch": 19.554, "grad_norm": 1.6979105472564697, "learning_rate": 2e-05, "loss": 0.04253804, "step": 9777 }, { "epoch": 19.556, "grad_norm": 1.3951151371002197, "learning_rate": 2e-05, "loss": 0.04301263, "step": 9778 }, { "epoch": 19.558, "grad_norm": 3.395491361618042, "learning_rate": 2e-05, "loss": 0.03041153, "step": 9779 }, { "epoch": 19.56, "grad_norm": 1.3777649402618408, "learning_rate": 2e-05, "loss": 0.04786427, "step": 9780 }, { "epoch": 19.562, "grad_norm": 1.122922420501709, "learning_rate": 2e-05, "loss": 0.03683935, "step": 9781 }, { "epoch": 19.564, "grad_norm": 2.4130606651306152, "learning_rate": 2e-05, "loss": 0.04994403, "step": 9782 }, { "epoch": 19.566, "grad_norm": 2.533428907394409, "learning_rate": 2e-05, "loss": 0.03640559, "step": 9783 }, { "epoch": 19.568, "grad_norm": 1.6431173086166382, "learning_rate": 2e-05, "loss": 0.03542903, "step": 9784 }, { "epoch": 19.57, "grad_norm": 0.8823850750923157, "learning_rate": 2e-05, "loss": 0.02428854, "step": 9785 }, { "epoch": 19.572, "grad_norm": 1.7447413206100464, "learning_rate": 2e-05, "loss": 0.05290275, "step": 9786 }, { "epoch": 19.574, "grad_norm": 1.2419053316116333, "learning_rate": 2e-05, "loss": 0.04086098, "step": 9787 }, { "epoch": 19.576, "grad_norm": 2.450349807739258, "learning_rate": 2e-05, "loss": 0.0346169, "step": 9788 }, { "epoch": 19.578, "grad_norm": 1.867328405380249, "learning_rate": 2e-05, "loss": 0.04954457, "step": 9789 }, { "epoch": 19.58, "grad_norm": 1.358357310295105, "learning_rate": 2e-05, "loss": 0.04382439, "step": 9790 }, { "epoch": 19.582, "grad_norm": 1.9822919368743896, "learning_rate": 2e-05, "loss": 0.03268323, "step": 9791 }, { "epoch": 19.584, "grad_norm": 1.2838702201843262, "learning_rate": 2e-05, "loss": 0.02432247, "step": 9792 }, { "epoch": 19.586, "grad_norm": 1.2836079597473145, "learning_rate": 2e-05, "loss": 0.03453143, "step": 9793 }, { "epoch": 19.588, "grad_norm": 1.7768176794052124, "learning_rate": 2e-05, "loss": 0.03774252, "step": 9794 }, { "epoch": 19.59, "grad_norm": 1.0157089233398438, "learning_rate": 2e-05, "loss": 0.03381804, "step": 9795 }, { "epoch": 19.592, "grad_norm": 2.1426620483398438, "learning_rate": 2e-05, "loss": 0.04200416, "step": 9796 }, { "epoch": 19.594, "grad_norm": 1.004624605178833, "learning_rate": 2e-05, "loss": 0.02966635, "step": 9797 }, { "epoch": 19.596, "grad_norm": 1.0430629253387451, "learning_rate": 2e-05, "loss": 0.03210926, "step": 9798 }, { "epoch": 19.598, "grad_norm": 0.9733067750930786, "learning_rate": 2e-05, "loss": 0.02974671, "step": 9799 }, { "epoch": 19.6, "grad_norm": 2.1266605854034424, "learning_rate": 2e-05, "loss": 0.04483049, "step": 9800 }, { "epoch": 19.602, "grad_norm": 1.2150872945785522, "learning_rate": 2e-05, "loss": 0.04423954, "step": 9801 }, { "epoch": 19.604, "grad_norm": 1.1437227725982666, "learning_rate": 2e-05, "loss": 0.03814924, "step": 9802 }, { "epoch": 19.606, "grad_norm": 1.234365701675415, "learning_rate": 2e-05, "loss": 0.04211161, "step": 9803 }, { "epoch": 19.608, "grad_norm": 1.2904542684555054, "learning_rate": 2e-05, "loss": 0.039315, "step": 9804 }, { "epoch": 19.61, "grad_norm": 1.0852253437042236, "learning_rate": 2e-05, "loss": 0.03015836, "step": 9805 }, { "epoch": 19.612, "grad_norm": 2.3996999263763428, "learning_rate": 2e-05, "loss": 0.04053651, "step": 9806 }, { "epoch": 19.614, "grad_norm": 1.0746995210647583, "learning_rate": 2e-05, "loss": 0.03122765, "step": 9807 }, { "epoch": 19.616, "grad_norm": 1.0592286586761475, "learning_rate": 2e-05, "loss": 0.03569851, "step": 9808 }, { "epoch": 19.618, "grad_norm": 1.2815927267074585, "learning_rate": 2e-05, "loss": 0.03232668, "step": 9809 }, { "epoch": 19.62, "grad_norm": 1.2451739311218262, "learning_rate": 2e-05, "loss": 0.02683158, "step": 9810 }, { "epoch": 19.622, "grad_norm": 0.9535860419273376, "learning_rate": 2e-05, "loss": 0.02343744, "step": 9811 }, { "epoch": 19.624, "grad_norm": 2.1150941848754883, "learning_rate": 2e-05, "loss": 0.03646648, "step": 9812 }, { "epoch": 19.626, "grad_norm": 1.4279240369796753, "learning_rate": 2e-05, "loss": 0.03931186, "step": 9813 }, { "epoch": 19.628, "grad_norm": 1.9593764543533325, "learning_rate": 2e-05, "loss": 0.04083652, "step": 9814 }, { "epoch": 19.63, "grad_norm": 1.289267659187317, "learning_rate": 2e-05, "loss": 0.03806636, "step": 9815 }, { "epoch": 19.632, "grad_norm": 2.0705997943878174, "learning_rate": 2e-05, "loss": 0.0471487, "step": 9816 }, { "epoch": 19.634, "grad_norm": 1.577562928199768, "learning_rate": 2e-05, "loss": 0.02332294, "step": 9817 }, { "epoch": 19.636, "grad_norm": 1.0667752027511597, "learning_rate": 2e-05, "loss": 0.03316651, "step": 9818 }, { "epoch": 19.638, "grad_norm": 1.272627592086792, "learning_rate": 2e-05, "loss": 0.0328108, "step": 9819 }, { "epoch": 19.64, "grad_norm": 1.1442145109176636, "learning_rate": 2e-05, "loss": 0.03424655, "step": 9820 }, { "epoch": 19.642, "grad_norm": 0.977202832698822, "learning_rate": 2e-05, "loss": 0.02730237, "step": 9821 }, { "epoch": 19.644, "grad_norm": 1.2419651746749878, "learning_rate": 2e-05, "loss": 0.03938533, "step": 9822 }, { "epoch": 19.646, "grad_norm": 1.5671879053115845, "learning_rate": 2e-05, "loss": 0.03334674, "step": 9823 }, { "epoch": 19.648, "grad_norm": 1.2008931636810303, "learning_rate": 2e-05, "loss": 0.03198599, "step": 9824 }, { "epoch": 19.65, "grad_norm": 0.8605839610099792, "learning_rate": 2e-05, "loss": 0.02071075, "step": 9825 }, { "epoch": 19.652, "grad_norm": 1.1936160326004028, "learning_rate": 2e-05, "loss": 0.041041, "step": 9826 }, { "epoch": 19.654, "grad_norm": 0.7941514253616333, "learning_rate": 2e-05, "loss": 0.01732956, "step": 9827 }, { "epoch": 19.656, "grad_norm": 0.9034144282341003, "learning_rate": 2e-05, "loss": 0.02284335, "step": 9828 }, { "epoch": 19.658, "grad_norm": 2.9886972904205322, "learning_rate": 2e-05, "loss": 0.05936865, "step": 9829 }, { "epoch": 19.66, "grad_norm": 1.7433563470840454, "learning_rate": 2e-05, "loss": 0.04660515, "step": 9830 }, { "epoch": 19.662, "grad_norm": 1.4639521837234497, "learning_rate": 2e-05, "loss": 0.03722664, "step": 9831 }, { "epoch": 19.664, "grad_norm": 1.6266924142837524, "learning_rate": 2e-05, "loss": 0.04071777, "step": 9832 }, { "epoch": 19.666, "grad_norm": 1.52934730052948, "learning_rate": 2e-05, "loss": 0.03618839, "step": 9833 }, { "epoch": 19.668, "grad_norm": 1.321384310722351, "learning_rate": 2e-05, "loss": 0.04987688, "step": 9834 }, { "epoch": 19.67, "grad_norm": 1.1635421514511108, "learning_rate": 2e-05, "loss": 0.03412395, "step": 9835 }, { "epoch": 19.672, "grad_norm": 2.087972402572632, "learning_rate": 2e-05, "loss": 0.02932746, "step": 9836 }, { "epoch": 19.674, "grad_norm": 1.8116962909698486, "learning_rate": 2e-05, "loss": 0.05201422, "step": 9837 }, { "epoch": 19.676, "grad_norm": 1.7317609786987305, "learning_rate": 2e-05, "loss": 0.03608788, "step": 9838 }, { "epoch": 19.678, "grad_norm": 1.6292537450790405, "learning_rate": 2e-05, "loss": 0.03296152, "step": 9839 }, { "epoch": 19.68, "grad_norm": 1.4448356628417969, "learning_rate": 2e-05, "loss": 0.04338595, "step": 9840 }, { "epoch": 19.682, "grad_norm": 1.1970990896224976, "learning_rate": 2e-05, "loss": 0.02565513, "step": 9841 }, { "epoch": 19.684, "grad_norm": 1.4799765348434448, "learning_rate": 2e-05, "loss": 0.04556567, "step": 9842 }, { "epoch": 19.686, "grad_norm": 1.1363059282302856, "learning_rate": 2e-05, "loss": 0.03603154, "step": 9843 }, { "epoch": 19.688, "grad_norm": 2.854661703109741, "learning_rate": 2e-05, "loss": 0.05047087, "step": 9844 }, { "epoch": 19.69, "grad_norm": 0.8889420032501221, "learning_rate": 2e-05, "loss": 0.02277106, "step": 9845 }, { "epoch": 19.692, "grad_norm": 1.6678924560546875, "learning_rate": 2e-05, "loss": 0.03258391, "step": 9846 }, { "epoch": 19.694, "grad_norm": 3.123685598373413, "learning_rate": 2e-05, "loss": 0.03615994, "step": 9847 }, { "epoch": 19.696, "grad_norm": 1.1917728185653687, "learning_rate": 2e-05, "loss": 0.0365363, "step": 9848 }, { "epoch": 19.698, "grad_norm": 1.2014756202697754, "learning_rate": 2e-05, "loss": 0.03089717, "step": 9849 }, { "epoch": 19.7, "grad_norm": 2.1376969814300537, "learning_rate": 2e-05, "loss": 0.04704689, "step": 9850 }, { "epoch": 19.701999999999998, "grad_norm": 1.4255774021148682, "learning_rate": 2e-05, "loss": 0.02486222, "step": 9851 }, { "epoch": 19.704, "grad_norm": 1.1981037855148315, "learning_rate": 2e-05, "loss": 0.04301386, "step": 9852 }, { "epoch": 19.706, "grad_norm": 1.5258287191390991, "learning_rate": 2e-05, "loss": 0.05485792, "step": 9853 }, { "epoch": 19.708, "grad_norm": 1.6070016622543335, "learning_rate": 2e-05, "loss": 0.03380585, "step": 9854 }, { "epoch": 19.71, "grad_norm": 1.0304168462753296, "learning_rate": 2e-05, "loss": 0.02755873, "step": 9855 }, { "epoch": 19.712, "grad_norm": 2.085618257522583, "learning_rate": 2e-05, "loss": 0.04198353, "step": 9856 }, { "epoch": 19.714, "grad_norm": 1.2231016159057617, "learning_rate": 2e-05, "loss": 0.03908198, "step": 9857 }, { "epoch": 19.716, "grad_norm": 2.0941994190216064, "learning_rate": 2e-05, "loss": 0.04153078, "step": 9858 }, { "epoch": 19.718, "grad_norm": 1.3140674829483032, "learning_rate": 2e-05, "loss": 0.03461843, "step": 9859 }, { "epoch": 19.72, "grad_norm": 2.5073509216308594, "learning_rate": 2e-05, "loss": 0.05611764, "step": 9860 }, { "epoch": 19.722, "grad_norm": 1.2403875589370728, "learning_rate": 2e-05, "loss": 0.04040193, "step": 9861 }, { "epoch": 19.724, "grad_norm": 1.2236120700836182, "learning_rate": 2e-05, "loss": 0.04124648, "step": 9862 }, { "epoch": 19.726, "grad_norm": 1.410003662109375, "learning_rate": 2e-05, "loss": 0.0384649, "step": 9863 }, { "epoch": 19.728, "grad_norm": 1.1842060089111328, "learning_rate": 2e-05, "loss": 0.03703492, "step": 9864 }, { "epoch": 19.73, "grad_norm": 1.596753478050232, "learning_rate": 2e-05, "loss": 0.03340092, "step": 9865 }, { "epoch": 19.732, "grad_norm": 1.6975880861282349, "learning_rate": 2e-05, "loss": 0.04212178, "step": 9866 }, { "epoch": 19.734, "grad_norm": 2.210099458694458, "learning_rate": 2e-05, "loss": 0.03261279, "step": 9867 }, { "epoch": 19.736, "grad_norm": 1.1055867671966553, "learning_rate": 2e-05, "loss": 0.03092555, "step": 9868 }, { "epoch": 19.738, "grad_norm": 1.1590442657470703, "learning_rate": 2e-05, "loss": 0.02638486, "step": 9869 }, { "epoch": 19.74, "grad_norm": 1.1977529525756836, "learning_rate": 2e-05, "loss": 0.03209082, "step": 9870 }, { "epoch": 19.742, "grad_norm": 1.3903778791427612, "learning_rate": 2e-05, "loss": 0.03176203, "step": 9871 }, { "epoch": 19.744, "grad_norm": 1.7422199249267578, "learning_rate": 2e-05, "loss": 0.03671364, "step": 9872 }, { "epoch": 19.746, "grad_norm": 1.0997118949890137, "learning_rate": 2e-05, "loss": 0.03464361, "step": 9873 }, { "epoch": 19.748, "grad_norm": 1.4066072702407837, "learning_rate": 2e-05, "loss": 0.03441656, "step": 9874 }, { "epoch": 19.75, "grad_norm": 1.0262517929077148, "learning_rate": 2e-05, "loss": 0.03322373, "step": 9875 }, { "epoch": 19.752, "grad_norm": 1.3730692863464355, "learning_rate": 2e-05, "loss": 0.04439168, "step": 9876 }, { "epoch": 19.754, "grad_norm": 1.2978415489196777, "learning_rate": 2e-05, "loss": 0.04093964, "step": 9877 }, { "epoch": 19.756, "grad_norm": 1.0929715633392334, "learning_rate": 2e-05, "loss": 0.0319971, "step": 9878 }, { "epoch": 19.758, "grad_norm": 1.8838789463043213, "learning_rate": 2e-05, "loss": 0.0498081, "step": 9879 }, { "epoch": 19.76, "grad_norm": 1.343527913093567, "learning_rate": 2e-05, "loss": 0.03659369, "step": 9880 }, { "epoch": 19.762, "grad_norm": 2.9458236694335938, "learning_rate": 2e-05, "loss": 0.05308362, "step": 9881 }, { "epoch": 19.764, "grad_norm": 1.1302599906921387, "learning_rate": 2e-05, "loss": 0.03225252, "step": 9882 }, { "epoch": 19.766, "grad_norm": 1.005309820175171, "learning_rate": 2e-05, "loss": 0.02554948, "step": 9883 }, { "epoch": 19.768, "grad_norm": 1.148569107055664, "learning_rate": 2e-05, "loss": 0.0364852, "step": 9884 }, { "epoch": 19.77, "grad_norm": 1.1067266464233398, "learning_rate": 2e-05, "loss": 0.03569014, "step": 9885 }, { "epoch": 19.772, "grad_norm": 1.2811511754989624, "learning_rate": 2e-05, "loss": 0.02505126, "step": 9886 }, { "epoch": 19.774, "grad_norm": 1.8366814851760864, "learning_rate": 2e-05, "loss": 0.04801732, "step": 9887 }, { "epoch": 19.776, "grad_norm": 1.0430152416229248, "learning_rate": 2e-05, "loss": 0.02715734, "step": 9888 }, { "epoch": 19.778, "grad_norm": 1.6979142427444458, "learning_rate": 2e-05, "loss": 0.02943612, "step": 9889 }, { "epoch": 19.78, "grad_norm": 2.1606314182281494, "learning_rate": 2e-05, "loss": 0.05470781, "step": 9890 }, { "epoch": 19.782, "grad_norm": 1.1862667798995972, "learning_rate": 2e-05, "loss": 0.0372127, "step": 9891 }, { "epoch": 19.784, "grad_norm": 1.5462552309036255, "learning_rate": 2e-05, "loss": 0.04436889, "step": 9892 }, { "epoch": 19.786, "grad_norm": 1.1272681951522827, "learning_rate": 2e-05, "loss": 0.02969773, "step": 9893 }, { "epoch": 19.788, "grad_norm": 1.8199870586395264, "learning_rate": 2e-05, "loss": 0.04257311, "step": 9894 }, { "epoch": 19.79, "grad_norm": 1.1364978551864624, "learning_rate": 2e-05, "loss": 0.03960226, "step": 9895 }, { "epoch": 19.792, "grad_norm": 5.6248908042907715, "learning_rate": 2e-05, "loss": 0.05272412, "step": 9896 }, { "epoch": 19.794, "grad_norm": 1.9296268224716187, "learning_rate": 2e-05, "loss": 0.0427717, "step": 9897 }, { "epoch": 19.796, "grad_norm": 1.5250190496444702, "learning_rate": 2e-05, "loss": 0.04301464, "step": 9898 }, { "epoch": 19.798000000000002, "grad_norm": 3.7405781745910645, "learning_rate": 2e-05, "loss": 0.03769503, "step": 9899 }, { "epoch": 19.8, "grad_norm": 1.0620787143707275, "learning_rate": 2e-05, "loss": 0.02631702, "step": 9900 }, { "epoch": 19.802, "grad_norm": 1.9227813482284546, "learning_rate": 2e-05, "loss": 0.03936617, "step": 9901 }, { "epoch": 19.804, "grad_norm": 1.4326848983764648, "learning_rate": 2e-05, "loss": 0.03811381, "step": 9902 }, { "epoch": 19.806, "grad_norm": 0.9656258821487427, "learning_rate": 2e-05, "loss": 0.02461326, "step": 9903 }, { "epoch": 19.808, "grad_norm": 1.468076229095459, "learning_rate": 2e-05, "loss": 0.04942961, "step": 9904 }, { "epoch": 19.81, "grad_norm": 1.0729457139968872, "learning_rate": 2e-05, "loss": 0.03775813, "step": 9905 }, { "epoch": 19.812, "grad_norm": 2.371782064437866, "learning_rate": 2e-05, "loss": 0.04209847, "step": 9906 }, { "epoch": 19.814, "grad_norm": 1.1211527585983276, "learning_rate": 2e-05, "loss": 0.03618193, "step": 9907 }, { "epoch": 19.816, "grad_norm": 1.2822222709655762, "learning_rate": 2e-05, "loss": 0.02676849, "step": 9908 }, { "epoch": 19.818, "grad_norm": 1.2275272607803345, "learning_rate": 2e-05, "loss": 0.03364244, "step": 9909 }, { "epoch": 19.82, "grad_norm": 1.4807791709899902, "learning_rate": 2e-05, "loss": 0.03341034, "step": 9910 }, { "epoch": 19.822, "grad_norm": 0.8294656872749329, "learning_rate": 2e-05, "loss": 0.02359098, "step": 9911 }, { "epoch": 19.824, "grad_norm": 1.1592644453048706, "learning_rate": 2e-05, "loss": 0.0353507, "step": 9912 }, { "epoch": 19.826, "grad_norm": 1.078616976737976, "learning_rate": 2e-05, "loss": 0.02933605, "step": 9913 }, { "epoch": 19.828, "grad_norm": 1.74703848361969, "learning_rate": 2e-05, "loss": 0.03395237, "step": 9914 }, { "epoch": 19.83, "grad_norm": 0.9906914830207825, "learning_rate": 2e-05, "loss": 0.03834459, "step": 9915 }, { "epoch": 19.832, "grad_norm": 1.5938894748687744, "learning_rate": 2e-05, "loss": 0.04390221, "step": 9916 }, { "epoch": 19.834, "grad_norm": 0.8304893374443054, "learning_rate": 2e-05, "loss": 0.02339101, "step": 9917 }, { "epoch": 19.836, "grad_norm": 2.1542248725891113, "learning_rate": 2e-05, "loss": 0.04835535, "step": 9918 }, { "epoch": 19.838, "grad_norm": 1.7245725393295288, "learning_rate": 2e-05, "loss": 0.04382911, "step": 9919 }, { "epoch": 19.84, "grad_norm": 1.0681260824203491, "learning_rate": 2e-05, "loss": 0.0333902, "step": 9920 }, { "epoch": 19.842, "grad_norm": 1.5578216314315796, "learning_rate": 2e-05, "loss": 0.03641013, "step": 9921 }, { "epoch": 19.844, "grad_norm": 0.9164597392082214, "learning_rate": 2e-05, "loss": 0.02537297, "step": 9922 }, { "epoch": 19.846, "grad_norm": 2.094348192214966, "learning_rate": 2e-05, "loss": 0.04775488, "step": 9923 }, { "epoch": 19.848, "grad_norm": 2.1640682220458984, "learning_rate": 2e-05, "loss": 0.03262997, "step": 9924 }, { "epoch": 19.85, "grad_norm": 1.3162791728973389, "learning_rate": 2e-05, "loss": 0.03451164, "step": 9925 }, { "epoch": 19.852, "grad_norm": 1.047416090965271, "learning_rate": 2e-05, "loss": 0.02686568, "step": 9926 }, { "epoch": 19.854, "grad_norm": 1.4626446962356567, "learning_rate": 2e-05, "loss": 0.05131056, "step": 9927 }, { "epoch": 19.856, "grad_norm": 1.1286470890045166, "learning_rate": 2e-05, "loss": 0.03642206, "step": 9928 }, { "epoch": 19.858, "grad_norm": 1.7644599676132202, "learning_rate": 2e-05, "loss": 0.04343008, "step": 9929 }, { "epoch": 19.86, "grad_norm": 0.8793272972106934, "learning_rate": 2e-05, "loss": 0.02633665, "step": 9930 }, { "epoch": 19.862, "grad_norm": 1.3826559782028198, "learning_rate": 2e-05, "loss": 0.03667331, "step": 9931 }, { "epoch": 19.864, "grad_norm": 1.5260701179504395, "learning_rate": 2e-05, "loss": 0.03625732, "step": 9932 }, { "epoch": 19.866, "grad_norm": 1.5434454679489136, "learning_rate": 2e-05, "loss": 0.04190221, "step": 9933 }, { "epoch": 19.868, "grad_norm": 1.4295403957366943, "learning_rate": 2e-05, "loss": 0.03398123, "step": 9934 }, { "epoch": 19.87, "grad_norm": 1.359604835510254, "learning_rate": 2e-05, "loss": 0.04142676, "step": 9935 }, { "epoch": 19.872, "grad_norm": 2.0198137760162354, "learning_rate": 2e-05, "loss": 0.05195599, "step": 9936 }, { "epoch": 19.874, "grad_norm": 0.9484997987747192, "learning_rate": 2e-05, "loss": 0.0277728, "step": 9937 }, { "epoch": 19.876, "grad_norm": 1.3963159322738647, "learning_rate": 2e-05, "loss": 0.05602345, "step": 9938 }, { "epoch": 19.878, "grad_norm": 1.419621229171753, "learning_rate": 2e-05, "loss": 0.02756134, "step": 9939 }, { "epoch": 19.88, "grad_norm": 2.4479565620422363, "learning_rate": 2e-05, "loss": 0.0412005, "step": 9940 }, { "epoch": 19.882, "grad_norm": 1.392535924911499, "learning_rate": 2e-05, "loss": 0.03324911, "step": 9941 }, { "epoch": 19.884, "grad_norm": 0.9907611608505249, "learning_rate": 2e-05, "loss": 0.03080793, "step": 9942 }, { "epoch": 19.886, "grad_norm": 1.6985739469528198, "learning_rate": 2e-05, "loss": 0.04235946, "step": 9943 }, { "epoch": 19.888, "grad_norm": 1.201528787612915, "learning_rate": 2e-05, "loss": 0.02874477, "step": 9944 }, { "epoch": 19.89, "grad_norm": 1.272756814956665, "learning_rate": 2e-05, "loss": 0.03114101, "step": 9945 }, { "epoch": 19.892, "grad_norm": 2.536895513534546, "learning_rate": 2e-05, "loss": 0.04412911, "step": 9946 }, { "epoch": 19.894, "grad_norm": 1.345763087272644, "learning_rate": 2e-05, "loss": 0.03955846, "step": 9947 }, { "epoch": 19.896, "grad_norm": 1.703199863433838, "learning_rate": 2e-05, "loss": 0.04491033, "step": 9948 }, { "epoch": 19.898, "grad_norm": 2.1777453422546387, "learning_rate": 2e-05, "loss": 0.0519994, "step": 9949 }, { "epoch": 19.9, "grad_norm": 1.9769400358200073, "learning_rate": 2e-05, "loss": 0.04786195, "step": 9950 }, { "epoch": 19.902, "grad_norm": 0.9934007525444031, "learning_rate": 2e-05, "loss": 0.02457796, "step": 9951 }, { "epoch": 19.904, "grad_norm": 1.0393855571746826, "learning_rate": 2e-05, "loss": 0.03693616, "step": 9952 }, { "epoch": 19.906, "grad_norm": 1.4611011743545532, "learning_rate": 2e-05, "loss": 0.03181311, "step": 9953 }, { "epoch": 19.908, "grad_norm": 1.9323896169662476, "learning_rate": 2e-05, "loss": 0.04420885, "step": 9954 }, { "epoch": 19.91, "grad_norm": 1.2806192636489868, "learning_rate": 2e-05, "loss": 0.0216569, "step": 9955 }, { "epoch": 19.912, "grad_norm": 2.6190237998962402, "learning_rate": 2e-05, "loss": 0.04326738, "step": 9956 }, { "epoch": 19.914, "grad_norm": 1.5687636137008667, "learning_rate": 2e-05, "loss": 0.03183849, "step": 9957 }, { "epoch": 19.916, "grad_norm": 1.1878702640533447, "learning_rate": 2e-05, "loss": 0.03523634, "step": 9958 }, { "epoch": 19.918, "grad_norm": 1.3091981410980225, "learning_rate": 2e-05, "loss": 0.03142114, "step": 9959 }, { "epoch": 19.92, "grad_norm": 2.1375269889831543, "learning_rate": 2e-05, "loss": 0.04402493, "step": 9960 }, { "epoch": 19.922, "grad_norm": 1.4389981031417847, "learning_rate": 2e-05, "loss": 0.03660287, "step": 9961 }, { "epoch": 19.924, "grad_norm": 1.4258970022201538, "learning_rate": 2e-05, "loss": 0.03248418, "step": 9962 }, { "epoch": 19.926, "grad_norm": 1.94060480594635, "learning_rate": 2e-05, "loss": 0.05550197, "step": 9963 }, { "epoch": 19.928, "grad_norm": 2.0639617443084717, "learning_rate": 2e-05, "loss": 0.04555191, "step": 9964 }, { "epoch": 19.93, "grad_norm": 1.4543213844299316, "learning_rate": 2e-05, "loss": 0.02980587, "step": 9965 }, { "epoch": 19.932, "grad_norm": 1.0913479328155518, "learning_rate": 2e-05, "loss": 0.03366387, "step": 9966 }, { "epoch": 19.934, "grad_norm": 1.459293246269226, "learning_rate": 2e-05, "loss": 0.03716648, "step": 9967 }, { "epoch": 19.936, "grad_norm": 1.5800535678863525, "learning_rate": 2e-05, "loss": 0.04051695, "step": 9968 }, { "epoch": 19.938, "grad_norm": 1.6184855699539185, "learning_rate": 2e-05, "loss": 0.0413382, "step": 9969 }, { "epoch": 19.94, "grad_norm": 1.287768840789795, "learning_rate": 2e-05, "loss": 0.02909556, "step": 9970 }, { "epoch": 19.942, "grad_norm": 2.3062894344329834, "learning_rate": 2e-05, "loss": 0.04965471, "step": 9971 }, { "epoch": 19.944, "grad_norm": 1.639174222946167, "learning_rate": 2e-05, "loss": 0.03276775, "step": 9972 }, { "epoch": 19.946, "grad_norm": 1.1336901187896729, "learning_rate": 2e-05, "loss": 0.03398212, "step": 9973 }, { "epoch": 19.948, "grad_norm": 1.7506204843521118, "learning_rate": 2e-05, "loss": 0.05709363, "step": 9974 }, { "epoch": 19.95, "grad_norm": 1.0985453128814697, "learning_rate": 2e-05, "loss": 0.03749266, "step": 9975 }, { "epoch": 19.951999999999998, "grad_norm": 1.533744215965271, "learning_rate": 2e-05, "loss": 0.04838315, "step": 9976 }, { "epoch": 19.954, "grad_norm": 1.545249104499817, "learning_rate": 2e-05, "loss": 0.03769622, "step": 9977 }, { "epoch": 19.956, "grad_norm": 1.0320427417755127, "learning_rate": 2e-05, "loss": 0.02421537, "step": 9978 }, { "epoch": 19.958, "grad_norm": 1.2649873495101929, "learning_rate": 2e-05, "loss": 0.03971934, "step": 9979 }, { "epoch": 19.96, "grad_norm": 1.2545596361160278, "learning_rate": 2e-05, "loss": 0.04188591, "step": 9980 }, { "epoch": 19.962, "grad_norm": 1.0163325071334839, "learning_rate": 2e-05, "loss": 0.02098066, "step": 9981 }, { "epoch": 19.964, "grad_norm": 0.8341543674468994, "learning_rate": 2e-05, "loss": 0.01667216, "step": 9982 }, { "epoch": 19.966, "grad_norm": 1.1888387203216553, "learning_rate": 2e-05, "loss": 0.0333435, "step": 9983 }, { "epoch": 19.968, "grad_norm": 1.6849191188812256, "learning_rate": 2e-05, "loss": 0.04021031, "step": 9984 }, { "epoch": 19.97, "grad_norm": 1.1306986808776855, "learning_rate": 2e-05, "loss": 0.03524192, "step": 9985 }, { "epoch": 19.972, "grad_norm": 1.2705979347229004, "learning_rate": 2e-05, "loss": 0.045315, "step": 9986 }, { "epoch": 19.974, "grad_norm": 3.2684991359710693, "learning_rate": 2e-05, "loss": 0.05361638, "step": 9987 }, { "epoch": 19.976, "grad_norm": 2.3001787662506104, "learning_rate": 2e-05, "loss": 0.0321941, "step": 9988 }, { "epoch": 19.978, "grad_norm": 1.2926708459854126, "learning_rate": 2e-05, "loss": 0.03842488, "step": 9989 }, { "epoch": 19.98, "grad_norm": 1.6358253955841064, "learning_rate": 2e-05, "loss": 0.04522712, "step": 9990 }, { "epoch": 19.982, "grad_norm": 1.0329595804214478, "learning_rate": 2e-05, "loss": 0.02501506, "step": 9991 }, { "epoch": 19.984, "grad_norm": 1.3090020418167114, "learning_rate": 2e-05, "loss": 0.03367433, "step": 9992 }, { "epoch": 19.986, "grad_norm": 1.0016107559204102, "learning_rate": 2e-05, "loss": 0.02978981, "step": 9993 }, { "epoch": 19.988, "grad_norm": 1.311272382736206, "learning_rate": 2e-05, "loss": 0.04261601, "step": 9994 }, { "epoch": 19.99, "grad_norm": 0.9918799996376038, "learning_rate": 2e-05, "loss": 0.02678192, "step": 9995 }, { "epoch": 19.992, "grad_norm": 1.2438549995422363, "learning_rate": 2e-05, "loss": 0.04806288, "step": 9996 }, { "epoch": 19.994, "grad_norm": 1.1533282995224, "learning_rate": 2e-05, "loss": 0.03929444, "step": 9997 }, { "epoch": 19.996, "grad_norm": 1.9555519819259644, "learning_rate": 2e-05, "loss": 0.04684892, "step": 9998 }, { "epoch": 19.998, "grad_norm": 1.2001152038574219, "learning_rate": 2e-05, "loss": 0.02799315, "step": 9999 }, { "epoch": 20.0, "grad_norm": 1.3295609951019287, "learning_rate": 2e-05, "loss": 0.04259039, "step": 10000 }, { "epoch": 20.0, "eval_performance": { "AngleClassification_1": 0.99, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9740518962075848, "Equal_1": 0.996, "Equal_2": 0.9820359281437125, "Equal_3": 0.9520958083832335, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.99, "Perpendicular_1": 0.998, "Perpendicular_2": 0.986, "Perpendicular_3": 0.7975951903807615, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 319.7877, "eval_samples_per_second": 32.834, "eval_steps_per_second": 0.657, "step": 10000 }, { "epoch": 20.002, "grad_norm": 1.157810926437378, "learning_rate": 2e-05, "loss": 0.02725174, "step": 10001 }, { "epoch": 20.004, "grad_norm": 2.2255258560180664, "learning_rate": 2e-05, "loss": 0.04750681, "step": 10002 }, { "epoch": 20.006, "grad_norm": 0.977530300617218, "learning_rate": 2e-05, "loss": 0.03366182, "step": 10003 }, { "epoch": 20.008, "grad_norm": 2.368126392364502, "learning_rate": 2e-05, "loss": 0.03093569, "step": 10004 }, { "epoch": 20.01, "grad_norm": 1.1914955377578735, "learning_rate": 2e-05, "loss": 0.04483454, "step": 10005 }, { "epoch": 20.012, "grad_norm": 1.2124160528182983, "learning_rate": 2e-05, "loss": 0.03203357, "step": 10006 }, { "epoch": 20.014, "grad_norm": 2.622993230819702, "learning_rate": 2e-05, "loss": 0.0496589, "step": 10007 }, { "epoch": 20.016, "grad_norm": 1.2373409271240234, "learning_rate": 2e-05, "loss": 0.04058324, "step": 10008 }, { "epoch": 20.018, "grad_norm": 1.6478288173675537, "learning_rate": 2e-05, "loss": 0.04132287, "step": 10009 }, { "epoch": 20.02, "grad_norm": 1.0980284214019775, "learning_rate": 2e-05, "loss": 0.03450535, "step": 10010 }, { "epoch": 20.022, "grad_norm": 0.9690496921539307, "learning_rate": 2e-05, "loss": 0.02480958, "step": 10011 }, { "epoch": 20.024, "grad_norm": 3.7256979942321777, "learning_rate": 2e-05, "loss": 0.03763371, "step": 10012 }, { "epoch": 20.026, "grad_norm": 0.9773795008659363, "learning_rate": 2e-05, "loss": 0.02543466, "step": 10013 }, { "epoch": 20.028, "grad_norm": 1.7975311279296875, "learning_rate": 2e-05, "loss": 0.0339796, "step": 10014 }, { "epoch": 20.03, "grad_norm": 1.9980261325836182, "learning_rate": 2e-05, "loss": 0.03168187, "step": 10015 }, { "epoch": 20.032, "grad_norm": 1.4407600164413452, "learning_rate": 2e-05, "loss": 0.05185389, "step": 10016 }, { "epoch": 20.034, "grad_norm": 1.0228748321533203, "learning_rate": 2e-05, "loss": 0.02575734, "step": 10017 }, { "epoch": 20.036, "grad_norm": 1.1408131122589111, "learning_rate": 2e-05, "loss": 0.03414143, "step": 10018 }, { "epoch": 20.038, "grad_norm": 1.2371625900268555, "learning_rate": 2e-05, "loss": 0.03162018, "step": 10019 }, { "epoch": 20.04, "grad_norm": 0.982409656047821, "learning_rate": 2e-05, "loss": 0.0248183, "step": 10020 }, { "epoch": 20.042, "grad_norm": 1.5073639154434204, "learning_rate": 2e-05, "loss": 0.04884845, "step": 10021 }, { "epoch": 20.044, "grad_norm": 1.28328537940979, "learning_rate": 2e-05, "loss": 0.04167043, "step": 10022 }, { "epoch": 20.046, "grad_norm": 1.0359996557235718, "learning_rate": 2e-05, "loss": 0.0376031, "step": 10023 }, { "epoch": 20.048, "grad_norm": 1.150727391242981, "learning_rate": 2e-05, "loss": 0.02745938, "step": 10024 }, { "epoch": 20.05, "grad_norm": 1.7619673013687134, "learning_rate": 2e-05, "loss": 0.04664058, "step": 10025 }, { "epoch": 20.052, "grad_norm": 1.849218726158142, "learning_rate": 2e-05, "loss": 0.0393936, "step": 10026 }, { "epoch": 20.054, "grad_norm": 1.425082802772522, "learning_rate": 2e-05, "loss": 0.03768519, "step": 10027 }, { "epoch": 20.056, "grad_norm": 2.417065143585205, "learning_rate": 2e-05, "loss": 0.04438198, "step": 10028 }, { "epoch": 20.058, "grad_norm": 1.103333592414856, "learning_rate": 2e-05, "loss": 0.0304841, "step": 10029 }, { "epoch": 20.06, "grad_norm": 1.0776103734970093, "learning_rate": 2e-05, "loss": 0.03893217, "step": 10030 }, { "epoch": 20.062, "grad_norm": 1.4815536737442017, "learning_rate": 2e-05, "loss": 0.05196786, "step": 10031 }, { "epoch": 20.064, "grad_norm": 1.3739426136016846, "learning_rate": 2e-05, "loss": 0.03645986, "step": 10032 }, { "epoch": 20.066, "grad_norm": 1.1692328453063965, "learning_rate": 2e-05, "loss": 0.03630241, "step": 10033 }, { "epoch": 20.068, "grad_norm": 1.0185339450836182, "learning_rate": 2e-05, "loss": 0.0344325, "step": 10034 }, { "epoch": 20.07, "grad_norm": 0.9241943359375, "learning_rate": 2e-05, "loss": 0.02990804, "step": 10035 }, { "epoch": 20.072, "grad_norm": 2.4085822105407715, "learning_rate": 2e-05, "loss": 0.04084285, "step": 10036 }, { "epoch": 20.074, "grad_norm": 1.6510875225067139, "learning_rate": 2e-05, "loss": 0.03414775, "step": 10037 }, { "epoch": 20.076, "grad_norm": 1.0024648904800415, "learning_rate": 2e-05, "loss": 0.02915655, "step": 10038 }, { "epoch": 20.078, "grad_norm": 1.1941813230514526, "learning_rate": 2e-05, "loss": 0.03444272, "step": 10039 }, { "epoch": 20.08, "grad_norm": 1.5588897466659546, "learning_rate": 2e-05, "loss": 0.04292718, "step": 10040 }, { "epoch": 20.082, "grad_norm": 1.1992404460906982, "learning_rate": 2e-05, "loss": 0.03661583, "step": 10041 }, { "epoch": 20.084, "grad_norm": 1.6118459701538086, "learning_rate": 2e-05, "loss": 0.03280291, "step": 10042 }, { "epoch": 20.086, "grad_norm": 1.184962272644043, "learning_rate": 2e-05, "loss": 0.03350543, "step": 10043 }, { "epoch": 20.088, "grad_norm": 1.2698707580566406, "learning_rate": 2e-05, "loss": 0.03131055, "step": 10044 }, { "epoch": 20.09, "grad_norm": 1.2608489990234375, "learning_rate": 2e-05, "loss": 0.04219927, "step": 10045 }, { "epoch": 20.092, "grad_norm": 1.3150699138641357, "learning_rate": 2e-05, "loss": 0.04263498, "step": 10046 }, { "epoch": 20.094, "grad_norm": 0.7644698619842529, "learning_rate": 2e-05, "loss": 0.01624379, "step": 10047 }, { "epoch": 20.096, "grad_norm": 1.202438235282898, "learning_rate": 2e-05, "loss": 0.04117297, "step": 10048 }, { "epoch": 20.098, "grad_norm": 1.1358537673950195, "learning_rate": 2e-05, "loss": 0.0320413, "step": 10049 }, { "epoch": 20.1, "grad_norm": 1.1852686405181885, "learning_rate": 2e-05, "loss": 0.0407727, "step": 10050 }, { "epoch": 20.102, "grad_norm": 1.051586389541626, "learning_rate": 2e-05, "loss": 0.03409174, "step": 10051 }, { "epoch": 20.104, "grad_norm": 0.8840859532356262, "learning_rate": 2e-05, "loss": 0.02449731, "step": 10052 }, { "epoch": 20.106, "grad_norm": 0.8056784868240356, "learning_rate": 2e-05, "loss": 0.02186309, "step": 10053 }, { "epoch": 20.108, "grad_norm": 1.1816960573196411, "learning_rate": 2e-05, "loss": 0.03838727, "step": 10054 }, { "epoch": 20.11, "grad_norm": 1.3361520767211914, "learning_rate": 2e-05, "loss": 0.046068, "step": 10055 }, { "epoch": 20.112, "grad_norm": 1.7416718006134033, "learning_rate": 2e-05, "loss": 0.02897919, "step": 10056 }, { "epoch": 20.114, "grad_norm": 0.8946969509124756, "learning_rate": 2e-05, "loss": 0.02574443, "step": 10057 }, { "epoch": 20.116, "grad_norm": 0.9283491969108582, "learning_rate": 2e-05, "loss": 0.02700006, "step": 10058 }, { "epoch": 20.118, "grad_norm": 1.316556692123413, "learning_rate": 2e-05, "loss": 0.03163679, "step": 10059 }, { "epoch": 20.12, "grad_norm": 1.7087239027023315, "learning_rate": 2e-05, "loss": 0.05993263, "step": 10060 }, { "epoch": 20.122, "grad_norm": 1.1801674365997314, "learning_rate": 2e-05, "loss": 0.03400156, "step": 10061 }, { "epoch": 20.124, "grad_norm": 1.5751726627349854, "learning_rate": 2e-05, "loss": 0.03422957, "step": 10062 }, { "epoch": 20.126, "grad_norm": 0.9739691019058228, "learning_rate": 2e-05, "loss": 0.02737168, "step": 10063 }, { "epoch": 20.128, "grad_norm": 1.8991624116897583, "learning_rate": 2e-05, "loss": 0.05057021, "step": 10064 }, { "epoch": 20.13, "grad_norm": 1.7493529319763184, "learning_rate": 2e-05, "loss": 0.03247079, "step": 10065 }, { "epoch": 20.132, "grad_norm": 0.9996808767318726, "learning_rate": 2e-05, "loss": 0.03386039, "step": 10066 }, { "epoch": 20.134, "grad_norm": 1.3784937858581543, "learning_rate": 2e-05, "loss": 0.04235096, "step": 10067 }, { "epoch": 20.136, "grad_norm": 0.9882324934005737, "learning_rate": 2e-05, "loss": 0.03374637, "step": 10068 }, { "epoch": 20.138, "grad_norm": 1.1563467979431152, "learning_rate": 2e-05, "loss": 0.03350766, "step": 10069 }, { "epoch": 20.14, "grad_norm": 1.2875149250030518, "learning_rate": 2e-05, "loss": 0.03958068, "step": 10070 }, { "epoch": 20.142, "grad_norm": 1.2082184553146362, "learning_rate": 2e-05, "loss": 0.03617564, "step": 10071 }, { "epoch": 20.144, "grad_norm": 1.5155640840530396, "learning_rate": 2e-05, "loss": 0.03431886, "step": 10072 }, { "epoch": 20.146, "grad_norm": 1.241598129272461, "learning_rate": 2e-05, "loss": 0.05081156, "step": 10073 }, { "epoch": 20.148, "grad_norm": 1.3844859600067139, "learning_rate": 2e-05, "loss": 0.04880771, "step": 10074 }, { "epoch": 20.15, "grad_norm": 1.1351920366287231, "learning_rate": 2e-05, "loss": 0.04456853, "step": 10075 }, { "epoch": 20.152, "grad_norm": 1.7876904010772705, "learning_rate": 2e-05, "loss": 0.04733547, "step": 10076 }, { "epoch": 20.154, "grad_norm": 1.676299810409546, "learning_rate": 2e-05, "loss": 0.04053468, "step": 10077 }, { "epoch": 20.156, "grad_norm": 0.9991481304168701, "learning_rate": 2e-05, "loss": 0.03337016, "step": 10078 }, { "epoch": 20.158, "grad_norm": 1.0327903032302856, "learning_rate": 2e-05, "loss": 0.03195174, "step": 10079 }, { "epoch": 20.16, "grad_norm": 1.0681126117706299, "learning_rate": 2e-05, "loss": 0.03106238, "step": 10080 }, { "epoch": 20.162, "grad_norm": 2.461606979370117, "learning_rate": 2e-05, "loss": 0.04076942, "step": 10081 }, { "epoch": 20.164, "grad_norm": 1.4891592264175415, "learning_rate": 2e-05, "loss": 0.0378016, "step": 10082 }, { "epoch": 20.166, "grad_norm": 2.7966361045837402, "learning_rate": 2e-05, "loss": 0.04116115, "step": 10083 }, { "epoch": 20.168, "grad_norm": 1.0875968933105469, "learning_rate": 2e-05, "loss": 0.03374017, "step": 10084 }, { "epoch": 20.17, "grad_norm": 1.6224273443222046, "learning_rate": 2e-05, "loss": 0.04455825, "step": 10085 }, { "epoch": 20.172, "grad_norm": 1.2282185554504395, "learning_rate": 2e-05, "loss": 0.03824051, "step": 10086 }, { "epoch": 20.174, "grad_norm": 1.5995073318481445, "learning_rate": 2e-05, "loss": 0.03549742, "step": 10087 }, { "epoch": 20.176, "grad_norm": 1.2183595895767212, "learning_rate": 2e-05, "loss": 0.03944673, "step": 10088 }, { "epoch": 20.178, "grad_norm": 1.4532480239868164, "learning_rate": 2e-05, "loss": 0.03099249, "step": 10089 }, { "epoch": 20.18, "grad_norm": 1.6714351177215576, "learning_rate": 2e-05, "loss": 0.05370768, "step": 10090 }, { "epoch": 20.182, "grad_norm": 1.2066564559936523, "learning_rate": 2e-05, "loss": 0.03310648, "step": 10091 }, { "epoch": 20.184, "grad_norm": 1.9016071557998657, "learning_rate": 2e-05, "loss": 0.06958458, "step": 10092 }, { "epoch": 20.186, "grad_norm": 1.0576119422912598, "learning_rate": 2e-05, "loss": 0.03138105, "step": 10093 }, { "epoch": 20.188, "grad_norm": 1.632576823234558, "learning_rate": 2e-05, "loss": 0.05661856, "step": 10094 }, { "epoch": 20.19, "grad_norm": 1.3058475255966187, "learning_rate": 2e-05, "loss": 0.02934664, "step": 10095 }, { "epoch": 20.192, "grad_norm": 1.5141475200653076, "learning_rate": 2e-05, "loss": 0.04773264, "step": 10096 }, { "epoch": 20.194, "grad_norm": 1.2892210483551025, "learning_rate": 2e-05, "loss": 0.0409392, "step": 10097 }, { "epoch": 20.196, "grad_norm": 1.0145940780639648, "learning_rate": 2e-05, "loss": 0.03348306, "step": 10098 }, { "epoch": 20.198, "grad_norm": 1.3150386810302734, "learning_rate": 2e-05, "loss": 0.04228361, "step": 10099 }, { "epoch": 20.2, "grad_norm": 1.4747895002365112, "learning_rate": 2e-05, "loss": 0.04473066, "step": 10100 }, { "epoch": 20.202, "grad_norm": 1.3429152965545654, "learning_rate": 2e-05, "loss": 0.03698963, "step": 10101 }, { "epoch": 20.204, "grad_norm": 1.5666682720184326, "learning_rate": 2e-05, "loss": 0.03814546, "step": 10102 }, { "epoch": 20.206, "grad_norm": 1.1224846839904785, "learning_rate": 2e-05, "loss": 0.03632937, "step": 10103 }, { "epoch": 20.208, "grad_norm": 1.2551809549331665, "learning_rate": 2e-05, "loss": 0.05948218, "step": 10104 }, { "epoch": 20.21, "grad_norm": 1.0920709371566772, "learning_rate": 2e-05, "loss": 0.03180977, "step": 10105 }, { "epoch": 20.212, "grad_norm": 1.1358767747879028, "learning_rate": 2e-05, "loss": 0.05127998, "step": 10106 }, { "epoch": 20.214, "grad_norm": 1.1630680561065674, "learning_rate": 2e-05, "loss": 0.03373503, "step": 10107 }, { "epoch": 20.216, "grad_norm": 1.0587674379348755, "learning_rate": 2e-05, "loss": 0.03114433, "step": 10108 }, { "epoch": 20.218, "grad_norm": 2.387716054916382, "learning_rate": 2e-05, "loss": 0.05491668, "step": 10109 }, { "epoch": 20.22, "grad_norm": 1.344147801399231, "learning_rate": 2e-05, "loss": 0.03537369, "step": 10110 }, { "epoch": 20.222, "grad_norm": 1.4900367259979248, "learning_rate": 2e-05, "loss": 0.04168044, "step": 10111 }, { "epoch": 20.224, "grad_norm": 1.3198118209838867, "learning_rate": 2e-05, "loss": 0.03236744, "step": 10112 }, { "epoch": 20.226, "grad_norm": 1.05833101272583, "learning_rate": 2e-05, "loss": 0.03506115, "step": 10113 }, { "epoch": 20.228, "grad_norm": 3.1193032264709473, "learning_rate": 2e-05, "loss": 0.06916459, "step": 10114 }, { "epoch": 20.23, "grad_norm": 0.9400885105133057, "learning_rate": 2e-05, "loss": 0.02644516, "step": 10115 }, { "epoch": 20.232, "grad_norm": 1.744911551475525, "learning_rate": 2e-05, "loss": 0.03334912, "step": 10116 }, { "epoch": 20.234, "grad_norm": 1.1233267784118652, "learning_rate": 2e-05, "loss": 0.02539495, "step": 10117 }, { "epoch": 20.236, "grad_norm": 0.8864102959632874, "learning_rate": 2e-05, "loss": 0.02736823, "step": 10118 }, { "epoch": 20.238, "grad_norm": 0.9077542424201965, "learning_rate": 2e-05, "loss": 0.02917507, "step": 10119 }, { "epoch": 20.24, "grad_norm": 1.7056787014007568, "learning_rate": 2e-05, "loss": 0.04501066, "step": 10120 }, { "epoch": 20.242, "grad_norm": 1.0653247833251953, "learning_rate": 2e-05, "loss": 0.03258765, "step": 10121 }, { "epoch": 20.244, "grad_norm": 1.5916606187820435, "learning_rate": 2e-05, "loss": 0.03004246, "step": 10122 }, { "epoch": 20.246, "grad_norm": 0.992498517036438, "learning_rate": 2e-05, "loss": 0.03313632, "step": 10123 }, { "epoch": 20.248, "grad_norm": 1.408166527748108, "learning_rate": 2e-05, "loss": 0.03982632, "step": 10124 }, { "epoch": 20.25, "grad_norm": 2.0703201293945312, "learning_rate": 2e-05, "loss": 0.03419951, "step": 10125 }, { "epoch": 20.252, "grad_norm": 2.027787446975708, "learning_rate": 2e-05, "loss": 0.04078956, "step": 10126 }, { "epoch": 20.254, "grad_norm": 1.7033048868179321, "learning_rate": 2e-05, "loss": 0.04830636, "step": 10127 }, { "epoch": 20.256, "grad_norm": 1.3836185932159424, "learning_rate": 2e-05, "loss": 0.05766873, "step": 10128 }, { "epoch": 20.258, "grad_norm": 0.874599039554596, "learning_rate": 2e-05, "loss": 0.02754955, "step": 10129 }, { "epoch": 20.26, "grad_norm": 1.280648946762085, "learning_rate": 2e-05, "loss": 0.03249458, "step": 10130 }, { "epoch": 20.262, "grad_norm": 3.8084380626678467, "learning_rate": 2e-05, "loss": 0.03238885, "step": 10131 }, { "epoch": 20.264, "grad_norm": 1.1068285703659058, "learning_rate": 2e-05, "loss": 0.03581792, "step": 10132 }, { "epoch": 20.266, "grad_norm": 1.4236106872558594, "learning_rate": 2e-05, "loss": 0.03960142, "step": 10133 }, { "epoch": 20.268, "grad_norm": 1.674912452697754, "learning_rate": 2e-05, "loss": 0.04210643, "step": 10134 }, { "epoch": 20.27, "grad_norm": 1.6190119981765747, "learning_rate": 2e-05, "loss": 0.03533464, "step": 10135 }, { "epoch": 20.272, "grad_norm": 1.0870835781097412, "learning_rate": 2e-05, "loss": 0.03944698, "step": 10136 }, { "epoch": 20.274, "grad_norm": 1.3776017427444458, "learning_rate": 2e-05, "loss": 0.03751071, "step": 10137 }, { "epoch": 20.276, "grad_norm": 1.1693474054336548, "learning_rate": 2e-05, "loss": 0.03089845, "step": 10138 }, { "epoch": 20.278, "grad_norm": 1.5422534942626953, "learning_rate": 2e-05, "loss": 0.0364728, "step": 10139 }, { "epoch": 20.28, "grad_norm": 1.130757212638855, "learning_rate": 2e-05, "loss": 0.03968713, "step": 10140 }, { "epoch": 20.282, "grad_norm": 1.601479172706604, "learning_rate": 2e-05, "loss": 0.0454113, "step": 10141 }, { "epoch": 20.284, "grad_norm": 1.0961538553237915, "learning_rate": 2e-05, "loss": 0.03847424, "step": 10142 }, { "epoch": 20.286, "grad_norm": 1.3003602027893066, "learning_rate": 2e-05, "loss": 0.03098981, "step": 10143 }, { "epoch": 20.288, "grad_norm": 1.9535208940505981, "learning_rate": 2e-05, "loss": 0.03620074, "step": 10144 }, { "epoch": 20.29, "grad_norm": 1.0742915868759155, "learning_rate": 2e-05, "loss": 0.03768191, "step": 10145 }, { "epoch": 20.292, "grad_norm": 0.8740436434745789, "learning_rate": 2e-05, "loss": 0.0226828, "step": 10146 }, { "epoch": 20.294, "grad_norm": 0.9478847980499268, "learning_rate": 2e-05, "loss": 0.03354997, "step": 10147 }, { "epoch": 20.296, "grad_norm": 1.195736289024353, "learning_rate": 2e-05, "loss": 0.03266739, "step": 10148 }, { "epoch": 20.298, "grad_norm": 1.1402510404586792, "learning_rate": 2e-05, "loss": 0.03164797, "step": 10149 }, { "epoch": 20.3, "grad_norm": 0.9935315847396851, "learning_rate": 2e-05, "loss": 0.0263091, "step": 10150 }, { "epoch": 20.302, "grad_norm": 1.2046669721603394, "learning_rate": 2e-05, "loss": 0.03377987, "step": 10151 }, { "epoch": 20.304, "grad_norm": 1.2782913446426392, "learning_rate": 2e-05, "loss": 0.03555286, "step": 10152 }, { "epoch": 20.306, "grad_norm": 1.155943512916565, "learning_rate": 2e-05, "loss": 0.03053265, "step": 10153 }, { "epoch": 20.308, "grad_norm": 1.3335613012313843, "learning_rate": 2e-05, "loss": 0.02797567, "step": 10154 }, { "epoch": 20.31, "grad_norm": 1.3252575397491455, "learning_rate": 2e-05, "loss": 0.04167391, "step": 10155 }, { "epoch": 20.312, "grad_norm": 1.257401704788208, "learning_rate": 2e-05, "loss": 0.04301036, "step": 10156 }, { "epoch": 20.314, "grad_norm": 1.2897112369537354, "learning_rate": 2e-05, "loss": 0.02559963, "step": 10157 }, { "epoch": 20.316, "grad_norm": 1.0978411436080933, "learning_rate": 2e-05, "loss": 0.03113418, "step": 10158 }, { "epoch": 20.318, "grad_norm": 0.9940638542175293, "learning_rate": 2e-05, "loss": 0.02741957, "step": 10159 }, { "epoch": 20.32, "grad_norm": 1.1258987188339233, "learning_rate": 2e-05, "loss": 0.03458284, "step": 10160 }, { "epoch": 20.322, "grad_norm": 1.3880150318145752, "learning_rate": 2e-05, "loss": 0.02557427, "step": 10161 }, { "epoch": 20.324, "grad_norm": 0.89255690574646, "learning_rate": 2e-05, "loss": 0.02698753, "step": 10162 }, { "epoch": 20.326, "grad_norm": 0.9891429543495178, "learning_rate": 2e-05, "loss": 0.03858247, "step": 10163 }, { "epoch": 20.328, "grad_norm": 3.275198459625244, "learning_rate": 2e-05, "loss": 0.04408592, "step": 10164 }, { "epoch": 20.33, "grad_norm": 2.566772699356079, "learning_rate": 2e-05, "loss": 0.04836412, "step": 10165 }, { "epoch": 20.332, "grad_norm": 1.2465749979019165, "learning_rate": 2e-05, "loss": 0.03609654, "step": 10166 }, { "epoch": 20.334, "grad_norm": 0.8518936634063721, "learning_rate": 2e-05, "loss": 0.02846691, "step": 10167 }, { "epoch": 20.336, "grad_norm": 0.8058186769485474, "learning_rate": 2e-05, "loss": 0.02809364, "step": 10168 }, { "epoch": 20.338, "grad_norm": 1.2480473518371582, "learning_rate": 2e-05, "loss": 0.03842864, "step": 10169 }, { "epoch": 20.34, "grad_norm": 1.6874932050704956, "learning_rate": 2e-05, "loss": 0.02990905, "step": 10170 }, { "epoch": 20.342, "grad_norm": 1.2645959854125977, "learning_rate": 2e-05, "loss": 0.02560621, "step": 10171 }, { "epoch": 20.344, "grad_norm": 1.209860920906067, "learning_rate": 2e-05, "loss": 0.04289609, "step": 10172 }, { "epoch": 20.346, "grad_norm": 0.9277303814888, "learning_rate": 2e-05, "loss": 0.02414119, "step": 10173 }, { "epoch": 20.348, "grad_norm": 1.4776111841201782, "learning_rate": 2e-05, "loss": 0.03888512, "step": 10174 }, { "epoch": 20.35, "grad_norm": 10.23653507232666, "learning_rate": 2e-05, "loss": 0.04151426, "step": 10175 }, { "epoch": 20.352, "grad_norm": 1.4618253707885742, "learning_rate": 2e-05, "loss": 0.04169954, "step": 10176 }, { "epoch": 20.354, "grad_norm": 1.517190933227539, "learning_rate": 2e-05, "loss": 0.03129986, "step": 10177 }, { "epoch": 20.356, "grad_norm": 1.265041470527649, "learning_rate": 2e-05, "loss": 0.04625013, "step": 10178 }, { "epoch": 20.358, "grad_norm": 0.9929906725883484, "learning_rate": 2e-05, "loss": 0.03318879, "step": 10179 }, { "epoch": 20.36, "grad_norm": 1.1843352317810059, "learning_rate": 2e-05, "loss": 0.02937111, "step": 10180 }, { "epoch": 20.362, "grad_norm": 3.79577374458313, "learning_rate": 2e-05, "loss": 0.04123313, "step": 10181 }, { "epoch": 20.364, "grad_norm": 1.2206071615219116, "learning_rate": 2e-05, "loss": 0.03874102, "step": 10182 }, { "epoch": 20.366, "grad_norm": 1.6392779350280762, "learning_rate": 2e-05, "loss": 0.03447268, "step": 10183 }, { "epoch": 20.368, "grad_norm": 1.166060447692871, "learning_rate": 2e-05, "loss": 0.04190687, "step": 10184 }, { "epoch": 20.37, "grad_norm": 1.3748960494995117, "learning_rate": 2e-05, "loss": 0.05042437, "step": 10185 }, { "epoch": 20.372, "grad_norm": 1.016833782196045, "learning_rate": 2e-05, "loss": 0.0323528, "step": 10186 }, { "epoch": 20.374, "grad_norm": 1.248270869255066, "learning_rate": 2e-05, "loss": 0.03355975, "step": 10187 }, { "epoch": 20.376, "grad_norm": 2.965949058532715, "learning_rate": 2e-05, "loss": 0.03507435, "step": 10188 }, { "epoch": 20.378, "grad_norm": 0.9657095074653625, "learning_rate": 2e-05, "loss": 0.03347619, "step": 10189 }, { "epoch": 20.38, "grad_norm": 1.3561325073242188, "learning_rate": 2e-05, "loss": 0.03161833, "step": 10190 }, { "epoch": 20.382, "grad_norm": 1.271353006362915, "learning_rate": 2e-05, "loss": 0.04460255, "step": 10191 }, { "epoch": 20.384, "grad_norm": 1.1262338161468506, "learning_rate": 2e-05, "loss": 0.03606068, "step": 10192 }, { "epoch": 20.386, "grad_norm": 1.0850212574005127, "learning_rate": 2e-05, "loss": 0.03550104, "step": 10193 }, { "epoch": 20.388, "grad_norm": 1.1879736185073853, "learning_rate": 2e-05, "loss": 0.03706096, "step": 10194 }, { "epoch": 20.39, "grad_norm": 0.8170276284217834, "learning_rate": 2e-05, "loss": 0.02326233, "step": 10195 }, { "epoch": 20.392, "grad_norm": 1.2504932880401611, "learning_rate": 2e-05, "loss": 0.0375908, "step": 10196 }, { "epoch": 20.394, "grad_norm": 2.0169646739959717, "learning_rate": 2e-05, "loss": 0.03000656, "step": 10197 }, { "epoch": 20.396, "grad_norm": 1.223647952079773, "learning_rate": 2e-05, "loss": 0.03923958, "step": 10198 }, { "epoch": 20.398, "grad_norm": 1.207067847251892, "learning_rate": 2e-05, "loss": 0.03308685, "step": 10199 }, { "epoch": 20.4, "grad_norm": 1.9212299585342407, "learning_rate": 2e-05, "loss": 0.03529272, "step": 10200 }, { "epoch": 20.402, "grad_norm": 2.4769113063812256, "learning_rate": 2e-05, "loss": 0.03606252, "step": 10201 }, { "epoch": 20.404, "grad_norm": 1.2097878456115723, "learning_rate": 2e-05, "loss": 0.03837938, "step": 10202 }, { "epoch": 20.406, "grad_norm": 1.7725907564163208, "learning_rate": 2e-05, "loss": 0.03187569, "step": 10203 }, { "epoch": 20.408, "grad_norm": 1.7797393798828125, "learning_rate": 2e-05, "loss": 0.04215318, "step": 10204 }, { "epoch": 20.41, "grad_norm": 1.4638700485229492, "learning_rate": 2e-05, "loss": 0.04161576, "step": 10205 }, { "epoch": 20.412, "grad_norm": 1.297315001487732, "learning_rate": 2e-05, "loss": 0.03515932, "step": 10206 }, { "epoch": 20.414, "grad_norm": 1.8025538921356201, "learning_rate": 2e-05, "loss": 0.03362041, "step": 10207 }, { "epoch": 20.416, "grad_norm": 1.6176317930221558, "learning_rate": 2e-05, "loss": 0.040805, "step": 10208 }, { "epoch": 20.418, "grad_norm": 0.9706657528877258, "learning_rate": 2e-05, "loss": 0.02987682, "step": 10209 }, { "epoch": 20.42, "grad_norm": 1.238422155380249, "learning_rate": 2e-05, "loss": 0.03280898, "step": 10210 }, { "epoch": 20.422, "grad_norm": 1.4637218713760376, "learning_rate": 2e-05, "loss": 0.03068679, "step": 10211 }, { "epoch": 20.424, "grad_norm": 2.2752456665039062, "learning_rate": 2e-05, "loss": 0.04319573, "step": 10212 }, { "epoch": 20.426, "grad_norm": 0.9194338917732239, "learning_rate": 2e-05, "loss": 0.03249656, "step": 10213 }, { "epoch": 20.428, "grad_norm": 1.2353630065917969, "learning_rate": 2e-05, "loss": 0.02999087, "step": 10214 }, { "epoch": 20.43, "grad_norm": 1.2627156972885132, "learning_rate": 2e-05, "loss": 0.04242412, "step": 10215 }, { "epoch": 20.432, "grad_norm": 1.1771798133850098, "learning_rate": 2e-05, "loss": 0.04588514, "step": 10216 }, { "epoch": 20.434, "grad_norm": 1.0947905778884888, "learning_rate": 2e-05, "loss": 0.03065883, "step": 10217 }, { "epoch": 20.436, "grad_norm": 1.1154054403305054, "learning_rate": 2e-05, "loss": 0.02652307, "step": 10218 }, { "epoch": 20.438, "grad_norm": 1.1405092477798462, "learning_rate": 2e-05, "loss": 0.01361668, "step": 10219 }, { "epoch": 20.44, "grad_norm": 1.1158807277679443, "learning_rate": 2e-05, "loss": 0.02150419, "step": 10220 }, { "epoch": 20.442, "grad_norm": 1.2909730672836304, "learning_rate": 2e-05, "loss": 0.03332622, "step": 10221 }, { "epoch": 20.444, "grad_norm": 1.8945097923278809, "learning_rate": 2e-05, "loss": 0.05206165, "step": 10222 }, { "epoch": 20.446, "grad_norm": 1.1864949464797974, "learning_rate": 2e-05, "loss": 0.02879405, "step": 10223 }, { "epoch": 20.448, "grad_norm": 1.5208016633987427, "learning_rate": 2e-05, "loss": 0.04240531, "step": 10224 }, { "epoch": 20.45, "grad_norm": 1.8025298118591309, "learning_rate": 2e-05, "loss": 0.04271449, "step": 10225 }, { "epoch": 20.452, "grad_norm": 1.7837144136428833, "learning_rate": 2e-05, "loss": 0.0441253, "step": 10226 }, { "epoch": 20.454, "grad_norm": 1.146601915359497, "learning_rate": 2e-05, "loss": 0.03485624, "step": 10227 }, { "epoch": 20.456, "grad_norm": 0.8954287171363831, "learning_rate": 2e-05, "loss": 0.02285667, "step": 10228 }, { "epoch": 20.458, "grad_norm": 2.9269511699676514, "learning_rate": 2e-05, "loss": 0.03113767, "step": 10229 }, { "epoch": 20.46, "grad_norm": 0.9090030193328857, "learning_rate": 2e-05, "loss": 0.02447719, "step": 10230 }, { "epoch": 20.462, "grad_norm": 1.004408836364746, "learning_rate": 2e-05, "loss": 0.03344005, "step": 10231 }, { "epoch": 20.464, "grad_norm": 1.294810175895691, "learning_rate": 2e-05, "loss": 0.02832571, "step": 10232 }, { "epoch": 20.466, "grad_norm": 1.3520246744155884, "learning_rate": 2e-05, "loss": 0.05982265, "step": 10233 }, { "epoch": 20.468, "grad_norm": 1.4492565393447876, "learning_rate": 2e-05, "loss": 0.04218536, "step": 10234 }, { "epoch": 20.47, "grad_norm": 2.6897168159484863, "learning_rate": 2e-05, "loss": 0.04501551, "step": 10235 }, { "epoch": 20.472, "grad_norm": 1.8297394514083862, "learning_rate": 2e-05, "loss": 0.03225635, "step": 10236 }, { "epoch": 20.474, "grad_norm": 1.2002131938934326, "learning_rate": 2e-05, "loss": 0.03257868, "step": 10237 }, { "epoch": 20.476, "grad_norm": 1.413238525390625, "learning_rate": 2e-05, "loss": 0.04716073, "step": 10238 }, { "epoch": 20.478, "grad_norm": 1.317274808883667, "learning_rate": 2e-05, "loss": 0.04736836, "step": 10239 }, { "epoch": 20.48, "grad_norm": 1.25471031665802, "learning_rate": 2e-05, "loss": 0.03034903, "step": 10240 }, { "epoch": 20.482, "grad_norm": 0.9913167357444763, "learning_rate": 2e-05, "loss": 0.02995886, "step": 10241 }, { "epoch": 20.484, "grad_norm": 1.0410956144332886, "learning_rate": 2e-05, "loss": 0.03229685, "step": 10242 }, { "epoch": 20.486, "grad_norm": 1.0938940048217773, "learning_rate": 2e-05, "loss": 0.02566743, "step": 10243 }, { "epoch": 20.488, "grad_norm": 1.5702996253967285, "learning_rate": 2e-05, "loss": 0.03801944, "step": 10244 }, { "epoch": 20.49, "grad_norm": 1.0842231512069702, "learning_rate": 2e-05, "loss": 0.03229133, "step": 10245 }, { "epoch": 20.492, "grad_norm": 4.291932106018066, "learning_rate": 2e-05, "loss": 0.03782872, "step": 10246 }, { "epoch": 20.494, "grad_norm": 1.3879879713058472, "learning_rate": 2e-05, "loss": 0.05402232, "step": 10247 }, { "epoch": 20.496, "grad_norm": 1.004504919052124, "learning_rate": 2e-05, "loss": 0.03432404, "step": 10248 }, { "epoch": 20.498, "grad_norm": 0.948047399520874, "learning_rate": 2e-05, "loss": 0.02306466, "step": 10249 }, { "epoch": 20.5, "grad_norm": 1.270635724067688, "learning_rate": 2e-05, "loss": 0.03679778, "step": 10250 }, { "epoch": 20.502, "grad_norm": 1.9827309846878052, "learning_rate": 2e-05, "loss": 0.04181923, "step": 10251 }, { "epoch": 20.504, "grad_norm": 1.788630485534668, "learning_rate": 2e-05, "loss": 0.03067524, "step": 10252 }, { "epoch": 20.506, "grad_norm": 1.5751676559448242, "learning_rate": 2e-05, "loss": 0.04409302, "step": 10253 }, { "epoch": 20.508, "grad_norm": 1.0470032691955566, "learning_rate": 2e-05, "loss": 0.02919145, "step": 10254 }, { "epoch": 20.51, "grad_norm": 1.5532153844833374, "learning_rate": 2e-05, "loss": 0.0342193, "step": 10255 }, { "epoch": 20.512, "grad_norm": 1.2492958307266235, "learning_rate": 2e-05, "loss": 0.03348667, "step": 10256 }, { "epoch": 20.514, "grad_norm": 1.015598177909851, "learning_rate": 2e-05, "loss": 0.02103408, "step": 10257 }, { "epoch": 20.516, "grad_norm": 1.0364876985549927, "learning_rate": 2e-05, "loss": 0.03669195, "step": 10258 }, { "epoch": 20.518, "grad_norm": 1.7574011087417603, "learning_rate": 2e-05, "loss": 0.02889457, "step": 10259 }, { "epoch": 20.52, "grad_norm": 2.7406435012817383, "learning_rate": 2e-05, "loss": 0.03528633, "step": 10260 }, { "epoch": 20.522, "grad_norm": 1.9497075080871582, "learning_rate": 2e-05, "loss": 0.03676779, "step": 10261 }, { "epoch": 20.524, "grad_norm": 1.2621787786483765, "learning_rate": 2e-05, "loss": 0.03754564, "step": 10262 }, { "epoch": 20.526, "grad_norm": 1.5117218494415283, "learning_rate": 2e-05, "loss": 0.03292262, "step": 10263 }, { "epoch": 20.528, "grad_norm": 0.9753475785255432, "learning_rate": 2e-05, "loss": 0.02898685, "step": 10264 }, { "epoch": 20.53, "grad_norm": 1.2703803777694702, "learning_rate": 2e-05, "loss": 0.03447834, "step": 10265 }, { "epoch": 20.532, "grad_norm": 1.037750244140625, "learning_rate": 2e-05, "loss": 0.02343212, "step": 10266 }, { "epoch": 20.534, "grad_norm": 1.8119739294052124, "learning_rate": 2e-05, "loss": 0.0345272, "step": 10267 }, { "epoch": 20.536, "grad_norm": 0.9879088401794434, "learning_rate": 2e-05, "loss": 0.03324077, "step": 10268 }, { "epoch": 20.538, "grad_norm": 1.1989351511001587, "learning_rate": 2e-05, "loss": 0.04151801, "step": 10269 }, { "epoch": 20.54, "grad_norm": 1.0397365093231201, "learning_rate": 2e-05, "loss": 0.03603576, "step": 10270 }, { "epoch": 20.542, "grad_norm": 3.7377114295959473, "learning_rate": 2e-05, "loss": 0.03584925, "step": 10271 }, { "epoch": 20.544, "grad_norm": 0.9478249549865723, "learning_rate": 2e-05, "loss": 0.02031682, "step": 10272 }, { "epoch": 20.546, "grad_norm": 1.2334407567977905, "learning_rate": 2e-05, "loss": 0.04748309, "step": 10273 }, { "epoch": 20.548000000000002, "grad_norm": 2.86969256401062, "learning_rate": 2e-05, "loss": 0.04576512, "step": 10274 }, { "epoch": 20.55, "grad_norm": 1.3003041744232178, "learning_rate": 2e-05, "loss": 0.03236845, "step": 10275 }, { "epoch": 20.552, "grad_norm": 2.296776533126831, "learning_rate": 2e-05, "loss": 0.04013178, "step": 10276 }, { "epoch": 20.554, "grad_norm": 1.155997395515442, "learning_rate": 2e-05, "loss": 0.03783529, "step": 10277 }, { "epoch": 20.556, "grad_norm": 1.140368938446045, "learning_rate": 2e-05, "loss": 0.03774282, "step": 10278 }, { "epoch": 20.558, "grad_norm": 1.4872280359268188, "learning_rate": 2e-05, "loss": 0.03347996, "step": 10279 }, { "epoch": 20.56, "grad_norm": 2.4217143058776855, "learning_rate": 2e-05, "loss": 0.0531043, "step": 10280 }, { "epoch": 20.562, "grad_norm": 1.4787334203720093, "learning_rate": 2e-05, "loss": 0.03938647, "step": 10281 }, { "epoch": 20.564, "grad_norm": 1.2541735172271729, "learning_rate": 2e-05, "loss": 0.03302208, "step": 10282 }, { "epoch": 20.566, "grad_norm": 1.1605701446533203, "learning_rate": 2e-05, "loss": 0.03414555, "step": 10283 }, { "epoch": 20.568, "grad_norm": 2.4995267391204834, "learning_rate": 2e-05, "loss": 0.03342891, "step": 10284 }, { "epoch": 20.57, "grad_norm": 1.6872836351394653, "learning_rate": 2e-05, "loss": 0.0238953, "step": 10285 }, { "epoch": 20.572, "grad_norm": 1.2707072496414185, "learning_rate": 2e-05, "loss": 0.03610934, "step": 10286 }, { "epoch": 20.574, "grad_norm": 1.908408761024475, "learning_rate": 2e-05, "loss": 0.02757995, "step": 10287 }, { "epoch": 20.576, "grad_norm": 0.9678316712379456, "learning_rate": 2e-05, "loss": 0.02639127, "step": 10288 }, { "epoch": 20.578, "grad_norm": 1.0835262537002563, "learning_rate": 2e-05, "loss": 0.03382458, "step": 10289 }, { "epoch": 20.58, "grad_norm": 1.21681547164917, "learning_rate": 2e-05, "loss": 0.02967013, "step": 10290 }, { "epoch": 20.582, "grad_norm": 1.723157286643982, "learning_rate": 2e-05, "loss": 0.03698352, "step": 10291 }, { "epoch": 20.584, "grad_norm": 1.0680166482925415, "learning_rate": 2e-05, "loss": 0.02927051, "step": 10292 }, { "epoch": 20.586, "grad_norm": 0.9486793279647827, "learning_rate": 2e-05, "loss": 0.0270779, "step": 10293 }, { "epoch": 20.588, "grad_norm": 2.921025037765503, "learning_rate": 2e-05, "loss": 0.04973118, "step": 10294 }, { "epoch": 20.59, "grad_norm": 1.2367773056030273, "learning_rate": 2e-05, "loss": 0.04252058, "step": 10295 }, { "epoch": 20.592, "grad_norm": 1.1579737663269043, "learning_rate": 2e-05, "loss": 0.03316213, "step": 10296 }, { "epoch": 20.594, "grad_norm": 2.2136647701263428, "learning_rate": 2e-05, "loss": 0.04563174, "step": 10297 }, { "epoch": 20.596, "grad_norm": 1.8893747329711914, "learning_rate": 2e-05, "loss": 0.03102884, "step": 10298 }, { "epoch": 20.598, "grad_norm": 2.0692083835601807, "learning_rate": 2e-05, "loss": 0.05330367, "step": 10299 }, { "epoch": 20.6, "grad_norm": 0.9594127535820007, "learning_rate": 2e-05, "loss": 0.02779751, "step": 10300 }, { "epoch": 20.602, "grad_norm": 2.0204217433929443, "learning_rate": 2e-05, "loss": 0.05240842, "step": 10301 }, { "epoch": 20.604, "grad_norm": 1.9438049793243408, "learning_rate": 2e-05, "loss": 0.0536239, "step": 10302 }, { "epoch": 20.606, "grad_norm": 1.0696160793304443, "learning_rate": 2e-05, "loss": 0.02945369, "step": 10303 }, { "epoch": 20.608, "grad_norm": 1.0507266521453857, "learning_rate": 2e-05, "loss": 0.0354307, "step": 10304 }, { "epoch": 20.61, "grad_norm": 1.37172269821167, "learning_rate": 2e-05, "loss": 0.04699483, "step": 10305 }, { "epoch": 20.612, "grad_norm": 1.324902057647705, "learning_rate": 2e-05, "loss": 0.02831972, "step": 10306 }, { "epoch": 20.614, "grad_norm": 1.9394724369049072, "learning_rate": 2e-05, "loss": 0.03686791, "step": 10307 }, { "epoch": 20.616, "grad_norm": 1.221810221672058, "learning_rate": 2e-05, "loss": 0.04077676, "step": 10308 }, { "epoch": 20.618, "grad_norm": 1.411195993423462, "learning_rate": 2e-05, "loss": 0.02411193, "step": 10309 }, { "epoch": 20.62, "grad_norm": 1.5662122964859009, "learning_rate": 2e-05, "loss": 0.04722676, "step": 10310 }, { "epoch": 20.622, "grad_norm": 0.8571231365203857, "learning_rate": 2e-05, "loss": 0.03123869, "step": 10311 }, { "epoch": 20.624, "grad_norm": 0.9552671313285828, "learning_rate": 2e-05, "loss": 0.02956947, "step": 10312 }, { "epoch": 20.626, "grad_norm": 1.1970704793930054, "learning_rate": 2e-05, "loss": 0.03908085, "step": 10313 }, { "epoch": 20.628, "grad_norm": 1.5940030813217163, "learning_rate": 2e-05, "loss": 0.03680157, "step": 10314 }, { "epoch": 20.63, "grad_norm": 1.1858786344528198, "learning_rate": 2e-05, "loss": 0.0258833, "step": 10315 }, { "epoch": 20.632, "grad_norm": 1.1859554052352905, "learning_rate": 2e-05, "loss": 0.0370981, "step": 10316 }, { "epoch": 20.634, "grad_norm": 0.9557812809944153, "learning_rate": 2e-05, "loss": 0.03161231, "step": 10317 }, { "epoch": 20.636, "grad_norm": 1.269998550415039, "learning_rate": 2e-05, "loss": 0.03466748, "step": 10318 }, { "epoch": 20.638, "grad_norm": 1.5743147134780884, "learning_rate": 2e-05, "loss": 0.03005154, "step": 10319 }, { "epoch": 20.64, "grad_norm": 1.3234727382659912, "learning_rate": 2e-05, "loss": 0.03330294, "step": 10320 }, { "epoch": 20.642, "grad_norm": 1.6464325189590454, "learning_rate": 2e-05, "loss": 0.04935025, "step": 10321 }, { "epoch": 20.644, "grad_norm": 1.5875946283340454, "learning_rate": 2e-05, "loss": 0.03549227, "step": 10322 }, { "epoch": 20.646, "grad_norm": 1.4373325109481812, "learning_rate": 2e-05, "loss": 0.03930869, "step": 10323 }, { "epoch": 20.648, "grad_norm": 0.8817594647407532, "learning_rate": 2e-05, "loss": 0.0271128, "step": 10324 }, { "epoch": 20.65, "grad_norm": 1.6336750984191895, "learning_rate": 2e-05, "loss": 0.03263681, "step": 10325 }, { "epoch": 20.652, "grad_norm": 1.517414927482605, "learning_rate": 2e-05, "loss": 0.04617567, "step": 10326 }, { "epoch": 20.654, "grad_norm": 4.604348182678223, "learning_rate": 2e-05, "loss": 0.02881363, "step": 10327 }, { "epoch": 20.656, "grad_norm": 1.4577995538711548, "learning_rate": 2e-05, "loss": 0.03543629, "step": 10328 }, { "epoch": 20.658, "grad_norm": 0.9887872338294983, "learning_rate": 2e-05, "loss": 0.02208835, "step": 10329 }, { "epoch": 20.66, "grad_norm": 1.5389630794525146, "learning_rate": 2e-05, "loss": 0.02897654, "step": 10330 }, { "epoch": 20.662, "grad_norm": 1.308394432067871, "learning_rate": 2e-05, "loss": 0.03866008, "step": 10331 }, { "epoch": 20.664, "grad_norm": 0.8264133334159851, "learning_rate": 2e-05, "loss": 0.02847798, "step": 10332 }, { "epoch": 20.666, "grad_norm": 1.7936815023422241, "learning_rate": 2e-05, "loss": 0.03791363, "step": 10333 }, { "epoch": 20.668, "grad_norm": 1.3060663938522339, "learning_rate": 2e-05, "loss": 0.03301517, "step": 10334 }, { "epoch": 20.67, "grad_norm": 1.709163784980774, "learning_rate": 2e-05, "loss": 0.03455461, "step": 10335 }, { "epoch": 20.672, "grad_norm": 1.8301975727081299, "learning_rate": 2e-05, "loss": 0.02920924, "step": 10336 }, { "epoch": 20.674, "grad_norm": 1.4292941093444824, "learning_rate": 2e-05, "loss": 0.03628965, "step": 10337 }, { "epoch": 20.676, "grad_norm": 1.2578301429748535, "learning_rate": 2e-05, "loss": 0.02768565, "step": 10338 }, { "epoch": 20.678, "grad_norm": 1.4743505716323853, "learning_rate": 2e-05, "loss": 0.03460295, "step": 10339 }, { "epoch": 20.68, "grad_norm": 0.9008990526199341, "learning_rate": 2e-05, "loss": 0.02599203, "step": 10340 }, { "epoch": 20.682, "grad_norm": 2.693477153778076, "learning_rate": 2e-05, "loss": 0.02892848, "step": 10341 }, { "epoch": 20.684, "grad_norm": 1.2961777448654175, "learning_rate": 2e-05, "loss": 0.03386326, "step": 10342 }, { "epoch": 20.686, "grad_norm": 1.1865119934082031, "learning_rate": 2e-05, "loss": 0.02271389, "step": 10343 }, { "epoch": 20.688, "grad_norm": 1.4448591470718384, "learning_rate": 2e-05, "loss": 0.03761415, "step": 10344 }, { "epoch": 20.69, "grad_norm": 1.2941032648086548, "learning_rate": 2e-05, "loss": 0.04060592, "step": 10345 }, { "epoch": 20.692, "grad_norm": 1.4364266395568848, "learning_rate": 2e-05, "loss": 0.0526364, "step": 10346 }, { "epoch": 20.694, "grad_norm": 1.228283405303955, "learning_rate": 2e-05, "loss": 0.03359667, "step": 10347 }, { "epoch": 20.696, "grad_norm": 1.265254259109497, "learning_rate": 2e-05, "loss": 0.04058044, "step": 10348 }, { "epoch": 20.698, "grad_norm": 1.298516035079956, "learning_rate": 2e-05, "loss": 0.04035057, "step": 10349 }, { "epoch": 20.7, "grad_norm": 1.272953748703003, "learning_rate": 2e-05, "loss": 0.03609696, "step": 10350 }, { "epoch": 20.701999999999998, "grad_norm": 1.0742324590682983, "learning_rate": 2e-05, "loss": 0.03620451, "step": 10351 }, { "epoch": 20.704, "grad_norm": 1.4301596879959106, "learning_rate": 2e-05, "loss": 0.04321437, "step": 10352 }, { "epoch": 20.706, "grad_norm": 1.9156485795974731, "learning_rate": 2e-05, "loss": 0.03270895, "step": 10353 }, { "epoch": 20.708, "grad_norm": 1.2552930116653442, "learning_rate": 2e-05, "loss": 0.02602671, "step": 10354 }, { "epoch": 20.71, "grad_norm": 1.193481683731079, "learning_rate": 2e-05, "loss": 0.03754304, "step": 10355 }, { "epoch": 20.712, "grad_norm": 1.4162143468856812, "learning_rate": 2e-05, "loss": 0.04919789, "step": 10356 }, { "epoch": 20.714, "grad_norm": 1.1660418510437012, "learning_rate": 2e-05, "loss": 0.03052988, "step": 10357 }, { "epoch": 20.716, "grad_norm": 0.9906470775604248, "learning_rate": 2e-05, "loss": 0.0268365, "step": 10358 }, { "epoch": 20.718, "grad_norm": 1.7477552890777588, "learning_rate": 2e-05, "loss": 0.05105488, "step": 10359 }, { "epoch": 20.72, "grad_norm": 1.1876134872436523, "learning_rate": 2e-05, "loss": 0.02859118, "step": 10360 }, { "epoch": 20.722, "grad_norm": 1.3102295398712158, "learning_rate": 2e-05, "loss": 0.03058487, "step": 10361 }, { "epoch": 20.724, "grad_norm": 1.7961902618408203, "learning_rate": 2e-05, "loss": 0.04488046, "step": 10362 }, { "epoch": 20.726, "grad_norm": 1.058826208114624, "learning_rate": 2e-05, "loss": 0.03246313, "step": 10363 }, { "epoch": 20.728, "grad_norm": 1.0731538534164429, "learning_rate": 2e-05, "loss": 0.02816553, "step": 10364 }, { "epoch": 20.73, "grad_norm": 2.4187989234924316, "learning_rate": 2e-05, "loss": 0.04112451, "step": 10365 }, { "epoch": 20.732, "grad_norm": 1.7762290239334106, "learning_rate": 2e-05, "loss": 0.04738872, "step": 10366 }, { "epoch": 20.734, "grad_norm": 1.1634255647659302, "learning_rate": 2e-05, "loss": 0.03303593, "step": 10367 }, { "epoch": 20.736, "grad_norm": 1.1194519996643066, "learning_rate": 2e-05, "loss": 0.03090698, "step": 10368 }, { "epoch": 20.738, "grad_norm": 1.9328291416168213, "learning_rate": 2e-05, "loss": 0.03827959, "step": 10369 }, { "epoch": 20.74, "grad_norm": 0.9472059607505798, "learning_rate": 2e-05, "loss": 0.02610377, "step": 10370 }, { "epoch": 20.742, "grad_norm": 1.0899560451507568, "learning_rate": 2e-05, "loss": 0.04154932, "step": 10371 }, { "epoch": 20.744, "grad_norm": 1.071056604385376, "learning_rate": 2e-05, "loss": 0.02690003, "step": 10372 }, { "epoch": 20.746, "grad_norm": 1.2740556001663208, "learning_rate": 2e-05, "loss": 0.03140601, "step": 10373 }, { "epoch": 20.748, "grad_norm": 1.2829887866973877, "learning_rate": 2e-05, "loss": 0.0347194, "step": 10374 }, { "epoch": 20.75, "grad_norm": 0.9616041779518127, "learning_rate": 2e-05, "loss": 0.03521022, "step": 10375 }, { "epoch": 20.752, "grad_norm": 1.2607543468475342, "learning_rate": 2e-05, "loss": 0.03321379, "step": 10376 }, { "epoch": 20.754, "grad_norm": 1.4216960668563843, "learning_rate": 2e-05, "loss": 0.03590911, "step": 10377 }, { "epoch": 20.756, "grad_norm": 1.5020769834518433, "learning_rate": 2e-05, "loss": 0.04069631, "step": 10378 }, { "epoch": 20.758, "grad_norm": 1.044640302658081, "learning_rate": 2e-05, "loss": 0.02541364, "step": 10379 }, { "epoch": 20.76, "grad_norm": 1.2528002262115479, "learning_rate": 2e-05, "loss": 0.03949961, "step": 10380 }, { "epoch": 20.762, "grad_norm": 1.23416006565094, "learning_rate": 2e-05, "loss": 0.02047839, "step": 10381 }, { "epoch": 20.764, "grad_norm": 0.9270185232162476, "learning_rate": 2e-05, "loss": 0.03086235, "step": 10382 }, { "epoch": 20.766, "grad_norm": 1.3459302186965942, "learning_rate": 2e-05, "loss": 0.04397811, "step": 10383 }, { "epoch": 20.768, "grad_norm": 1.8959941864013672, "learning_rate": 2e-05, "loss": 0.02847361, "step": 10384 }, { "epoch": 20.77, "grad_norm": 1.0249385833740234, "learning_rate": 2e-05, "loss": 0.02996654, "step": 10385 }, { "epoch": 20.772, "grad_norm": 1.1461448669433594, "learning_rate": 2e-05, "loss": 0.04238908, "step": 10386 }, { "epoch": 20.774, "grad_norm": 0.896342933177948, "learning_rate": 2e-05, "loss": 0.02898027, "step": 10387 }, { "epoch": 20.776, "grad_norm": 1.2837777137756348, "learning_rate": 2e-05, "loss": 0.03371213, "step": 10388 }, { "epoch": 20.778, "grad_norm": 1.4851652383804321, "learning_rate": 2e-05, "loss": 0.04785133, "step": 10389 }, { "epoch": 20.78, "grad_norm": 1.1204262971878052, "learning_rate": 2e-05, "loss": 0.03940021, "step": 10390 }, { "epoch": 20.782, "grad_norm": 1.6720068454742432, "learning_rate": 2e-05, "loss": 0.03579511, "step": 10391 }, { "epoch": 20.784, "grad_norm": 0.9738611578941345, "learning_rate": 2e-05, "loss": 0.0249313, "step": 10392 }, { "epoch": 20.786, "grad_norm": 1.8201998472213745, "learning_rate": 2e-05, "loss": 0.048961, "step": 10393 }, { "epoch": 20.788, "grad_norm": 1.2513413429260254, "learning_rate": 2e-05, "loss": 0.02755917, "step": 10394 }, { "epoch": 20.79, "grad_norm": 1.3315002918243408, "learning_rate": 2e-05, "loss": 0.04382675, "step": 10395 }, { "epoch": 20.792, "grad_norm": 1.3850572109222412, "learning_rate": 2e-05, "loss": 0.04453446, "step": 10396 }, { "epoch": 20.794, "grad_norm": 1.5482556819915771, "learning_rate": 2e-05, "loss": 0.04031963, "step": 10397 }, { "epoch": 20.796, "grad_norm": 1.8607449531555176, "learning_rate": 2e-05, "loss": 0.05087737, "step": 10398 }, { "epoch": 20.798000000000002, "grad_norm": 1.6714609861373901, "learning_rate": 2e-05, "loss": 0.04965856, "step": 10399 }, { "epoch": 20.8, "grad_norm": 1.9360687732696533, "learning_rate": 2e-05, "loss": 0.04098749, "step": 10400 }, { "epoch": 20.802, "grad_norm": 0.939067542552948, "learning_rate": 2e-05, "loss": 0.02232962, "step": 10401 }, { "epoch": 20.804, "grad_norm": 1.1615712642669678, "learning_rate": 2e-05, "loss": 0.03574735, "step": 10402 }, { "epoch": 20.806, "grad_norm": 1.2097338438034058, "learning_rate": 2e-05, "loss": 0.03018945, "step": 10403 }, { "epoch": 20.808, "grad_norm": 1.0195422172546387, "learning_rate": 2e-05, "loss": 0.03126357, "step": 10404 }, { "epoch": 20.81, "grad_norm": 1.9747889041900635, "learning_rate": 2e-05, "loss": 0.04224984, "step": 10405 }, { "epoch": 20.812, "grad_norm": 1.3474302291870117, "learning_rate": 2e-05, "loss": 0.04074406, "step": 10406 }, { "epoch": 20.814, "grad_norm": 1.5049179792404175, "learning_rate": 2e-05, "loss": 0.04507799, "step": 10407 }, { "epoch": 20.816, "grad_norm": 1.5321961641311646, "learning_rate": 2e-05, "loss": 0.04021778, "step": 10408 }, { "epoch": 20.818, "grad_norm": 0.8743321895599365, "learning_rate": 2e-05, "loss": 0.0247119, "step": 10409 }, { "epoch": 20.82, "grad_norm": 1.6426585912704468, "learning_rate": 2e-05, "loss": 0.04337841, "step": 10410 }, { "epoch": 20.822, "grad_norm": 2.048464059829712, "learning_rate": 2e-05, "loss": 0.04811566, "step": 10411 }, { "epoch": 20.824, "grad_norm": 1.0955586433410645, "learning_rate": 2e-05, "loss": 0.02928637, "step": 10412 }, { "epoch": 20.826, "grad_norm": 3.2851905822753906, "learning_rate": 2e-05, "loss": 0.03792008, "step": 10413 }, { "epoch": 20.828, "grad_norm": 1.1193740367889404, "learning_rate": 2e-05, "loss": 0.03241657, "step": 10414 }, { "epoch": 20.83, "grad_norm": 2.217038631439209, "learning_rate": 2e-05, "loss": 0.04027008, "step": 10415 }, { "epoch": 20.832, "grad_norm": 1.1971979141235352, "learning_rate": 2e-05, "loss": 0.0313881, "step": 10416 }, { "epoch": 20.834, "grad_norm": 1.2874393463134766, "learning_rate": 2e-05, "loss": 0.03874963, "step": 10417 }, { "epoch": 20.836, "grad_norm": 1.7825161218643188, "learning_rate": 2e-05, "loss": 0.0345277, "step": 10418 }, { "epoch": 20.838, "grad_norm": 2.392350196838379, "learning_rate": 2e-05, "loss": 0.05566229, "step": 10419 }, { "epoch": 20.84, "grad_norm": 1.6109018325805664, "learning_rate": 2e-05, "loss": 0.04091034, "step": 10420 }, { "epoch": 20.842, "grad_norm": 1.0898905992507935, "learning_rate": 2e-05, "loss": 0.03517729, "step": 10421 }, { "epoch": 20.844, "grad_norm": 1.2754164934158325, "learning_rate": 2e-05, "loss": 0.03659736, "step": 10422 }, { "epoch": 20.846, "grad_norm": 1.520416259765625, "learning_rate": 2e-05, "loss": 0.03716481, "step": 10423 }, { "epoch": 20.848, "grad_norm": 2.5298376083374023, "learning_rate": 2e-05, "loss": 0.04783663, "step": 10424 }, { "epoch": 20.85, "grad_norm": 1.6789484024047852, "learning_rate": 2e-05, "loss": 0.03685528, "step": 10425 }, { "epoch": 20.852, "grad_norm": 1.5136722326278687, "learning_rate": 2e-05, "loss": 0.03236245, "step": 10426 }, { "epoch": 20.854, "grad_norm": 0.9097597002983093, "learning_rate": 2e-05, "loss": 0.02598057, "step": 10427 }, { "epoch": 20.856, "grad_norm": 1.2258105278015137, "learning_rate": 2e-05, "loss": 0.02003474, "step": 10428 }, { "epoch": 20.858, "grad_norm": 1.2743656635284424, "learning_rate": 2e-05, "loss": 0.04647747, "step": 10429 }, { "epoch": 20.86, "grad_norm": 1.199223279953003, "learning_rate": 2e-05, "loss": 0.03247061, "step": 10430 }, { "epoch": 20.862, "grad_norm": 1.2215161323547363, "learning_rate": 2e-05, "loss": 0.03422792, "step": 10431 }, { "epoch": 20.864, "grad_norm": 1.02229642868042, "learning_rate": 2e-05, "loss": 0.02587418, "step": 10432 }, { "epoch": 20.866, "grad_norm": 1.7659459114074707, "learning_rate": 2e-05, "loss": 0.03618574, "step": 10433 }, { "epoch": 20.868, "grad_norm": 1.2492812871932983, "learning_rate": 2e-05, "loss": 0.03435018, "step": 10434 }, { "epoch": 20.87, "grad_norm": 1.9097106456756592, "learning_rate": 2e-05, "loss": 0.05099519, "step": 10435 }, { "epoch": 20.872, "grad_norm": 2.3891992568969727, "learning_rate": 2e-05, "loss": 0.03493572, "step": 10436 }, { "epoch": 20.874, "grad_norm": 6.847945690155029, "learning_rate": 2e-05, "loss": 0.04543907, "step": 10437 }, { "epoch": 20.876, "grad_norm": 0.8078843951225281, "learning_rate": 2e-05, "loss": 0.01693185, "step": 10438 }, { "epoch": 20.878, "grad_norm": 1.1145824193954468, "learning_rate": 2e-05, "loss": 0.03014626, "step": 10439 }, { "epoch": 20.88, "grad_norm": 1.4058823585510254, "learning_rate": 2e-05, "loss": 0.03669397, "step": 10440 }, { "epoch": 20.882, "grad_norm": 1.749841570854187, "learning_rate": 2e-05, "loss": 0.03852821, "step": 10441 }, { "epoch": 20.884, "grad_norm": 1.228166937828064, "learning_rate": 2e-05, "loss": 0.04459646, "step": 10442 }, { "epoch": 20.886, "grad_norm": 1.1540074348449707, "learning_rate": 2e-05, "loss": 0.02661585, "step": 10443 }, { "epoch": 20.888, "grad_norm": 2.69791841506958, "learning_rate": 2e-05, "loss": 0.03573243, "step": 10444 }, { "epoch": 20.89, "grad_norm": 1.062644124031067, "learning_rate": 2e-05, "loss": 0.02639816, "step": 10445 }, { "epoch": 20.892, "grad_norm": 1.1434491872787476, "learning_rate": 2e-05, "loss": 0.03068436, "step": 10446 }, { "epoch": 20.894, "grad_norm": 1.0184530019760132, "learning_rate": 2e-05, "loss": 0.02732141, "step": 10447 }, { "epoch": 20.896, "grad_norm": 1.670699954032898, "learning_rate": 2e-05, "loss": 0.02273174, "step": 10448 }, { "epoch": 20.898, "grad_norm": 1.2134953737258911, "learning_rate": 2e-05, "loss": 0.03250109, "step": 10449 }, { "epoch": 20.9, "grad_norm": 1.2033954858779907, "learning_rate": 2e-05, "loss": 0.04112671, "step": 10450 }, { "epoch": 20.902, "grad_norm": 1.2063027620315552, "learning_rate": 2e-05, "loss": 0.03256904, "step": 10451 }, { "epoch": 20.904, "grad_norm": 1.2613540887832642, "learning_rate": 2e-05, "loss": 0.02923998, "step": 10452 }, { "epoch": 20.906, "grad_norm": 1.8170700073242188, "learning_rate": 2e-05, "loss": 0.04404435, "step": 10453 }, { "epoch": 20.908, "grad_norm": 2.074605703353882, "learning_rate": 2e-05, "loss": 0.06511399, "step": 10454 }, { "epoch": 20.91, "grad_norm": 1.1869606971740723, "learning_rate": 2e-05, "loss": 0.03975774, "step": 10455 }, { "epoch": 20.912, "grad_norm": 1.0519496202468872, "learning_rate": 2e-05, "loss": 0.03061169, "step": 10456 }, { "epoch": 20.914, "grad_norm": 1.1407067775726318, "learning_rate": 2e-05, "loss": 0.02600056, "step": 10457 }, { "epoch": 20.916, "grad_norm": 1.2726540565490723, "learning_rate": 2e-05, "loss": 0.03579668, "step": 10458 }, { "epoch": 20.918, "grad_norm": 1.0577654838562012, "learning_rate": 2e-05, "loss": 0.0273657, "step": 10459 }, { "epoch": 20.92, "grad_norm": 3.370119333267212, "learning_rate": 2e-05, "loss": 0.04482879, "step": 10460 }, { "epoch": 20.922, "grad_norm": 0.9270197153091431, "learning_rate": 2e-05, "loss": 0.02642347, "step": 10461 }, { "epoch": 20.924, "grad_norm": 2.1031179428100586, "learning_rate": 2e-05, "loss": 0.03457503, "step": 10462 }, { "epoch": 20.926, "grad_norm": 1.47755765914917, "learning_rate": 2e-05, "loss": 0.04129061, "step": 10463 }, { "epoch": 20.928, "grad_norm": 1.221456527709961, "learning_rate": 2e-05, "loss": 0.0394924, "step": 10464 }, { "epoch": 20.93, "grad_norm": 1.5422698259353638, "learning_rate": 2e-05, "loss": 0.04629602, "step": 10465 }, { "epoch": 20.932, "grad_norm": 1.2598649263381958, "learning_rate": 2e-05, "loss": 0.03605943, "step": 10466 }, { "epoch": 20.934, "grad_norm": 1.0877323150634766, "learning_rate": 2e-05, "loss": 0.03485626, "step": 10467 }, { "epoch": 20.936, "grad_norm": 0.8516746163368225, "learning_rate": 2e-05, "loss": 0.01967622, "step": 10468 }, { "epoch": 20.938, "grad_norm": 1.01011061668396, "learning_rate": 2e-05, "loss": 0.0255489, "step": 10469 }, { "epoch": 20.94, "grad_norm": 1.1266335248947144, "learning_rate": 2e-05, "loss": 0.02532486, "step": 10470 }, { "epoch": 20.942, "grad_norm": 1.5323753356933594, "learning_rate": 2e-05, "loss": 0.03835236, "step": 10471 }, { "epoch": 20.944, "grad_norm": 1.58487069606781, "learning_rate": 2e-05, "loss": 0.03736504, "step": 10472 }, { "epoch": 20.946, "grad_norm": 1.2376452684402466, "learning_rate": 2e-05, "loss": 0.03332981, "step": 10473 }, { "epoch": 20.948, "grad_norm": 1.3151975870132446, "learning_rate": 2e-05, "loss": 0.03374566, "step": 10474 }, { "epoch": 20.95, "grad_norm": 1.410962700843811, "learning_rate": 2e-05, "loss": 0.04390511, "step": 10475 }, { "epoch": 20.951999999999998, "grad_norm": 3.6582796573638916, "learning_rate": 2e-05, "loss": 0.04068881, "step": 10476 }, { "epoch": 20.954, "grad_norm": 1.183980941772461, "learning_rate": 2e-05, "loss": 0.03768382, "step": 10477 }, { "epoch": 20.956, "grad_norm": 1.9583686590194702, "learning_rate": 2e-05, "loss": 0.04036102, "step": 10478 }, { "epoch": 20.958, "grad_norm": 1.1289726495742798, "learning_rate": 2e-05, "loss": 0.02689121, "step": 10479 }, { "epoch": 20.96, "grad_norm": 1.1652100086212158, "learning_rate": 2e-05, "loss": 0.03687986, "step": 10480 }, { "epoch": 20.962, "grad_norm": 1.4179235696792603, "learning_rate": 2e-05, "loss": 0.03052982, "step": 10481 }, { "epoch": 20.964, "grad_norm": 0.9714958071708679, "learning_rate": 2e-05, "loss": 0.02744197, "step": 10482 }, { "epoch": 20.966, "grad_norm": 1.1505253314971924, "learning_rate": 2e-05, "loss": 0.03075511, "step": 10483 }, { "epoch": 20.968, "grad_norm": 1.3006511926651, "learning_rate": 2e-05, "loss": 0.04219224, "step": 10484 }, { "epoch": 20.97, "grad_norm": 1.2317531108856201, "learning_rate": 2e-05, "loss": 0.0362198, "step": 10485 }, { "epoch": 20.972, "grad_norm": 1.2480534315109253, "learning_rate": 2e-05, "loss": 0.04598153, "step": 10486 }, { "epoch": 20.974, "grad_norm": 1.9260978698730469, "learning_rate": 2e-05, "loss": 0.05104748, "step": 10487 }, { "epoch": 20.976, "grad_norm": 1.0552128553390503, "learning_rate": 2e-05, "loss": 0.02821925, "step": 10488 }, { "epoch": 20.978, "grad_norm": 1.0819281339645386, "learning_rate": 2e-05, "loss": 0.03735295, "step": 10489 }, { "epoch": 20.98, "grad_norm": 0.9959445595741272, "learning_rate": 2e-05, "loss": 0.03469016, "step": 10490 }, { "epoch": 20.982, "grad_norm": 1.0732587575912476, "learning_rate": 2e-05, "loss": 0.02674491, "step": 10491 }, { "epoch": 20.984, "grad_norm": 1.1409895420074463, "learning_rate": 2e-05, "loss": 0.02786115, "step": 10492 }, { "epoch": 20.986, "grad_norm": 0.8526608943939209, "learning_rate": 2e-05, "loss": 0.02098497, "step": 10493 }, { "epoch": 20.988, "grad_norm": 1.0358623266220093, "learning_rate": 2e-05, "loss": 0.02189691, "step": 10494 }, { "epoch": 20.99, "grad_norm": 1.4046947956085205, "learning_rate": 2e-05, "loss": 0.0338182, "step": 10495 }, { "epoch": 20.992, "grad_norm": 1.1783146858215332, "learning_rate": 2e-05, "loss": 0.03898209, "step": 10496 }, { "epoch": 20.994, "grad_norm": 0.9742079973220825, "learning_rate": 2e-05, "loss": 0.02756248, "step": 10497 }, { "epoch": 20.996, "grad_norm": 2.197392463684082, "learning_rate": 2e-05, "loss": 0.04526626, "step": 10498 }, { "epoch": 20.998, "grad_norm": 1.057904601097107, "learning_rate": 2e-05, "loss": 0.04241625, "step": 10499 }, { "epoch": 21.0, "grad_norm": 1.4337189197540283, "learning_rate": 2e-05, "loss": 0.03345023, "step": 10500 }, { "epoch": 21.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9740518962075848, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9680638722554891, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9899799599198397, "Parallel_3": 0.986, "Perpendicular_1": 0.998, "Perpendicular_2": 0.984, "Perpendicular_3": 0.8186372745490982, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.988, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 319.6633, "eval_samples_per_second": 32.847, "eval_steps_per_second": 0.657, "step": 10500 }, { "epoch": 21.002, "grad_norm": 1.4172569513320923, "learning_rate": 2e-05, "loss": 0.04958025, "step": 10501 }, { "epoch": 21.004, "grad_norm": 1.2641007900238037, "learning_rate": 2e-05, "loss": 0.04138844, "step": 10502 }, { "epoch": 21.006, "grad_norm": 1.5297917127609253, "learning_rate": 2e-05, "loss": 0.05362614, "step": 10503 }, { "epoch": 21.008, "grad_norm": 1.4040725231170654, "learning_rate": 2e-05, "loss": 0.02881721, "step": 10504 }, { "epoch": 21.01, "grad_norm": 2.7434298992156982, "learning_rate": 2e-05, "loss": 0.05710435, "step": 10505 }, { "epoch": 21.012, "grad_norm": 0.9089720845222473, "learning_rate": 2e-05, "loss": 0.02749285, "step": 10506 }, { "epoch": 21.014, "grad_norm": 1.093634009361267, "learning_rate": 2e-05, "loss": 0.02694854, "step": 10507 }, { "epoch": 21.016, "grad_norm": 0.8842881917953491, "learning_rate": 2e-05, "loss": 0.02866191, "step": 10508 }, { "epoch": 21.018, "grad_norm": 1.1293853521347046, "learning_rate": 2e-05, "loss": 0.03138885, "step": 10509 }, { "epoch": 21.02, "grad_norm": 2.5073678493499756, "learning_rate": 2e-05, "loss": 0.036281, "step": 10510 }, { "epoch": 21.022, "grad_norm": 0.8775820136070251, "learning_rate": 2e-05, "loss": 0.0302633, "step": 10511 }, { "epoch": 21.024, "grad_norm": 1.844066858291626, "learning_rate": 2e-05, "loss": 0.05106075, "step": 10512 }, { "epoch": 21.026, "grad_norm": 1.2659056186676025, "learning_rate": 2e-05, "loss": 0.03020029, "step": 10513 }, { "epoch": 21.028, "grad_norm": 1.132651686668396, "learning_rate": 2e-05, "loss": 0.02988936, "step": 10514 }, { "epoch": 21.03, "grad_norm": 0.9240502119064331, "learning_rate": 2e-05, "loss": 0.02960332, "step": 10515 }, { "epoch": 21.032, "grad_norm": 3.4578001499176025, "learning_rate": 2e-05, "loss": 0.03723548, "step": 10516 }, { "epoch": 21.034, "grad_norm": 1.327440619468689, "learning_rate": 2e-05, "loss": 0.04209972, "step": 10517 }, { "epoch": 21.036, "grad_norm": 0.7104504704475403, "learning_rate": 2e-05, "loss": 0.01660552, "step": 10518 }, { "epoch": 21.038, "grad_norm": 1.9451689720153809, "learning_rate": 2e-05, "loss": 0.04870243, "step": 10519 }, { "epoch": 21.04, "grad_norm": 1.5693484544754028, "learning_rate": 2e-05, "loss": 0.05037589, "step": 10520 }, { "epoch": 21.042, "grad_norm": 1.0665810108184814, "learning_rate": 2e-05, "loss": 0.03087834, "step": 10521 }, { "epoch": 21.044, "grad_norm": 1.309026837348938, "learning_rate": 2e-05, "loss": 0.03562339, "step": 10522 }, { "epoch": 21.046, "grad_norm": 1.5604883432388306, "learning_rate": 2e-05, "loss": 0.044502, "step": 10523 }, { "epoch": 21.048, "grad_norm": 0.9193902015686035, "learning_rate": 2e-05, "loss": 0.02742101, "step": 10524 }, { "epoch": 21.05, "grad_norm": 1.5470359325408936, "learning_rate": 2e-05, "loss": 0.03719456, "step": 10525 }, { "epoch": 21.052, "grad_norm": 0.9354279637336731, "learning_rate": 2e-05, "loss": 0.0307904, "step": 10526 }, { "epoch": 21.054, "grad_norm": 0.9876614212989807, "learning_rate": 2e-05, "loss": 0.03531288, "step": 10527 }, { "epoch": 21.056, "grad_norm": 1.2255337238311768, "learning_rate": 2e-05, "loss": 0.03369109, "step": 10528 }, { "epoch": 21.058, "grad_norm": 1.4609296321868896, "learning_rate": 2e-05, "loss": 0.04581051, "step": 10529 }, { "epoch": 21.06, "grad_norm": 1.1844290494918823, "learning_rate": 2e-05, "loss": 0.02935641, "step": 10530 }, { "epoch": 21.062, "grad_norm": 1.3473209142684937, "learning_rate": 2e-05, "loss": 0.03874001, "step": 10531 }, { "epoch": 21.064, "grad_norm": 1.2861710786819458, "learning_rate": 2e-05, "loss": 0.03785869, "step": 10532 }, { "epoch": 21.066, "grad_norm": 1.2455552816390991, "learning_rate": 2e-05, "loss": 0.02974376, "step": 10533 }, { "epoch": 21.068, "grad_norm": 1.0268170833587646, "learning_rate": 2e-05, "loss": 0.0280002, "step": 10534 }, { "epoch": 21.07, "grad_norm": 2.149333953857422, "learning_rate": 2e-05, "loss": 0.0475785, "step": 10535 }, { "epoch": 21.072, "grad_norm": 0.8871065378189087, "learning_rate": 2e-05, "loss": 0.02665732, "step": 10536 }, { "epoch": 21.074, "grad_norm": 1.2057782411575317, "learning_rate": 2e-05, "loss": 0.04148353, "step": 10537 }, { "epoch": 21.076, "grad_norm": 2.8635036945343018, "learning_rate": 2e-05, "loss": 0.04504614, "step": 10538 }, { "epoch": 21.078, "grad_norm": 1.0987458229064941, "learning_rate": 2e-05, "loss": 0.02471746, "step": 10539 }, { "epoch": 21.08, "grad_norm": 1.5803258419036865, "learning_rate": 2e-05, "loss": 0.04167273, "step": 10540 }, { "epoch": 21.082, "grad_norm": 0.9525123834609985, "learning_rate": 2e-05, "loss": 0.02323587, "step": 10541 }, { "epoch": 21.084, "grad_norm": 1.051931619644165, "learning_rate": 2e-05, "loss": 0.03668146, "step": 10542 }, { "epoch": 21.086, "grad_norm": 1.020364761352539, "learning_rate": 2e-05, "loss": 0.03155644, "step": 10543 }, { "epoch": 21.088, "grad_norm": 1.9535802602767944, "learning_rate": 2e-05, "loss": 0.04253891, "step": 10544 }, { "epoch": 21.09, "grad_norm": 1.563684105873108, "learning_rate": 2e-05, "loss": 0.03040604, "step": 10545 }, { "epoch": 21.092, "grad_norm": 1.5148497819900513, "learning_rate": 2e-05, "loss": 0.04719204, "step": 10546 }, { "epoch": 21.094, "grad_norm": 1.0597929954528809, "learning_rate": 2e-05, "loss": 0.03394167, "step": 10547 }, { "epoch": 21.096, "grad_norm": 0.9946009516716003, "learning_rate": 2e-05, "loss": 0.02729562, "step": 10548 }, { "epoch": 21.098, "grad_norm": 1.0386313199996948, "learning_rate": 2e-05, "loss": 0.02707431, "step": 10549 }, { "epoch": 21.1, "grad_norm": 1.930969476699829, "learning_rate": 2e-05, "loss": 0.03724827, "step": 10550 }, { "epoch": 21.102, "grad_norm": 0.8346301913261414, "learning_rate": 2e-05, "loss": 0.02298054, "step": 10551 }, { "epoch": 21.104, "grad_norm": 1.1870561838150024, "learning_rate": 2e-05, "loss": 0.03454594, "step": 10552 }, { "epoch": 21.106, "grad_norm": 1.3278943300247192, "learning_rate": 2e-05, "loss": 0.03413796, "step": 10553 }, { "epoch": 21.108, "grad_norm": 0.9806317090988159, "learning_rate": 2e-05, "loss": 0.03050506, "step": 10554 }, { "epoch": 21.11, "grad_norm": 1.6823358535766602, "learning_rate": 2e-05, "loss": 0.04869309, "step": 10555 }, { "epoch": 21.112, "grad_norm": 0.9092576503753662, "learning_rate": 2e-05, "loss": 0.02638655, "step": 10556 }, { "epoch": 21.114, "grad_norm": 1.2272701263427734, "learning_rate": 2e-05, "loss": 0.03748032, "step": 10557 }, { "epoch": 21.116, "grad_norm": 1.1986057758331299, "learning_rate": 2e-05, "loss": 0.02931621, "step": 10558 }, { "epoch": 21.118, "grad_norm": 1.966601014137268, "learning_rate": 2e-05, "loss": 0.04338966, "step": 10559 }, { "epoch": 21.12, "grad_norm": 2.9418132305145264, "learning_rate": 2e-05, "loss": 0.05720629, "step": 10560 }, { "epoch": 21.122, "grad_norm": 1.3184374570846558, "learning_rate": 2e-05, "loss": 0.03051766, "step": 10561 }, { "epoch": 21.124, "grad_norm": 1.2324519157409668, "learning_rate": 2e-05, "loss": 0.04240867, "step": 10562 }, { "epoch": 21.126, "grad_norm": 1.0354188680648804, "learning_rate": 2e-05, "loss": 0.03184131, "step": 10563 }, { "epoch": 21.128, "grad_norm": 1.3084588050842285, "learning_rate": 2e-05, "loss": 0.04317082, "step": 10564 }, { "epoch": 21.13, "grad_norm": 1.621947169303894, "learning_rate": 2e-05, "loss": 0.04106213, "step": 10565 }, { "epoch": 21.132, "grad_norm": 2.01584529876709, "learning_rate": 2e-05, "loss": 0.04236465, "step": 10566 }, { "epoch": 21.134, "grad_norm": 1.481381893157959, "learning_rate": 2e-05, "loss": 0.02457852, "step": 10567 }, { "epoch": 21.136, "grad_norm": 1.3118445873260498, "learning_rate": 2e-05, "loss": 0.04216508, "step": 10568 }, { "epoch": 21.138, "grad_norm": 1.3927664756774902, "learning_rate": 2e-05, "loss": 0.04639062, "step": 10569 }, { "epoch": 21.14, "grad_norm": 1.2951942682266235, "learning_rate": 2e-05, "loss": 0.03051125, "step": 10570 }, { "epoch": 21.142, "grad_norm": 3.7828381061553955, "learning_rate": 2e-05, "loss": 0.03892427, "step": 10571 }, { "epoch": 21.144, "grad_norm": 1.8915671110153198, "learning_rate": 2e-05, "loss": 0.03912808, "step": 10572 }, { "epoch": 21.146, "grad_norm": 1.7816556692123413, "learning_rate": 2e-05, "loss": 0.03675058, "step": 10573 }, { "epoch": 21.148, "grad_norm": 2.443760395050049, "learning_rate": 2e-05, "loss": 0.03171434, "step": 10574 }, { "epoch": 21.15, "grad_norm": 1.0887539386749268, "learning_rate": 2e-05, "loss": 0.03385027, "step": 10575 }, { "epoch": 21.152, "grad_norm": 1.2952345609664917, "learning_rate": 2e-05, "loss": 0.04195005, "step": 10576 }, { "epoch": 21.154, "grad_norm": 1.1885733604431152, "learning_rate": 2e-05, "loss": 0.02599936, "step": 10577 }, { "epoch": 21.156, "grad_norm": 1.1241782903671265, "learning_rate": 2e-05, "loss": 0.03315149, "step": 10578 }, { "epoch": 21.158, "grad_norm": 1.3803486824035645, "learning_rate": 2e-05, "loss": 0.03571745, "step": 10579 }, { "epoch": 21.16, "grad_norm": 1.2156238555908203, "learning_rate": 2e-05, "loss": 0.04436778, "step": 10580 }, { "epoch": 21.162, "grad_norm": 1.586046576499939, "learning_rate": 2e-05, "loss": 0.04987106, "step": 10581 }, { "epoch": 21.164, "grad_norm": 1.261434555053711, "learning_rate": 2e-05, "loss": 0.03705215, "step": 10582 }, { "epoch": 21.166, "grad_norm": 0.886199414730072, "learning_rate": 2e-05, "loss": 0.02416335, "step": 10583 }, { "epoch": 21.168, "grad_norm": 1.2906107902526855, "learning_rate": 2e-05, "loss": 0.04419378, "step": 10584 }, { "epoch": 21.17, "grad_norm": 1.1108649969100952, "learning_rate": 2e-05, "loss": 0.03079815, "step": 10585 }, { "epoch": 21.172, "grad_norm": 2.070798635482788, "learning_rate": 2e-05, "loss": 0.0433712, "step": 10586 }, { "epoch": 21.174, "grad_norm": 1.1178715229034424, "learning_rate": 2e-05, "loss": 0.03338214, "step": 10587 }, { "epoch": 21.176, "grad_norm": 1.2780183553695679, "learning_rate": 2e-05, "loss": 0.04754562, "step": 10588 }, { "epoch": 21.178, "grad_norm": 1.154023289680481, "learning_rate": 2e-05, "loss": 0.03899136, "step": 10589 }, { "epoch": 21.18, "grad_norm": 0.9963793158531189, "learning_rate": 2e-05, "loss": 0.02338295, "step": 10590 }, { "epoch": 21.182, "grad_norm": 1.49871027469635, "learning_rate": 2e-05, "loss": 0.04136465, "step": 10591 }, { "epoch": 21.184, "grad_norm": 1.006020426750183, "learning_rate": 2e-05, "loss": 0.02428103, "step": 10592 }, { "epoch": 21.186, "grad_norm": 1.1841108798980713, "learning_rate": 2e-05, "loss": 0.03439215, "step": 10593 }, { "epoch": 21.188, "grad_norm": 1.2510030269622803, "learning_rate": 2e-05, "loss": 0.03146035, "step": 10594 }, { "epoch": 21.19, "grad_norm": 0.7315729260444641, "learning_rate": 2e-05, "loss": 0.01793394, "step": 10595 }, { "epoch": 21.192, "grad_norm": 1.2097729444503784, "learning_rate": 2e-05, "loss": 0.03397623, "step": 10596 }, { "epoch": 21.194, "grad_norm": 1.0274741649627686, "learning_rate": 2e-05, "loss": 0.02825071, "step": 10597 }, { "epoch": 21.196, "grad_norm": 0.9847630262374878, "learning_rate": 2e-05, "loss": 0.03145049, "step": 10598 }, { "epoch": 21.198, "grad_norm": 1.3303278684616089, "learning_rate": 2e-05, "loss": 0.04809329, "step": 10599 }, { "epoch": 21.2, "grad_norm": 1.0973892211914062, "learning_rate": 2e-05, "loss": 0.03610747, "step": 10600 }, { "epoch": 21.202, "grad_norm": 1.8867298364639282, "learning_rate": 2e-05, "loss": 0.04427704, "step": 10601 }, { "epoch": 21.204, "grad_norm": 1.922042965888977, "learning_rate": 2e-05, "loss": 0.0379353, "step": 10602 }, { "epoch": 21.206, "grad_norm": 1.1012451648712158, "learning_rate": 2e-05, "loss": 0.03171213, "step": 10603 }, { "epoch": 21.208, "grad_norm": 1.0954500436782837, "learning_rate": 2e-05, "loss": 0.02412405, "step": 10604 }, { "epoch": 21.21, "grad_norm": 1.3452584743499756, "learning_rate": 2e-05, "loss": 0.0347417, "step": 10605 }, { "epoch": 21.212, "grad_norm": 1.3375858068466187, "learning_rate": 2e-05, "loss": 0.03575499, "step": 10606 }, { "epoch": 21.214, "grad_norm": 1.139094352722168, "learning_rate": 2e-05, "loss": 0.0376694, "step": 10607 }, { "epoch": 21.216, "grad_norm": 1.5566028356552124, "learning_rate": 2e-05, "loss": 0.03238266, "step": 10608 }, { "epoch": 21.218, "grad_norm": 1.9407174587249756, "learning_rate": 2e-05, "loss": 0.04087084, "step": 10609 }, { "epoch": 21.22, "grad_norm": 5.404819011688232, "learning_rate": 2e-05, "loss": 0.04306193, "step": 10610 }, { "epoch": 21.222, "grad_norm": 1.1310769319534302, "learning_rate": 2e-05, "loss": 0.03149523, "step": 10611 }, { "epoch": 21.224, "grad_norm": 2.3743486404418945, "learning_rate": 2e-05, "loss": 0.0347545, "step": 10612 }, { "epoch": 21.226, "grad_norm": 1.9811265468597412, "learning_rate": 2e-05, "loss": 0.04471551, "step": 10613 }, { "epoch": 21.228, "grad_norm": 1.034940242767334, "learning_rate": 2e-05, "loss": 0.02823063, "step": 10614 }, { "epoch": 21.23, "grad_norm": 1.0800728797912598, "learning_rate": 2e-05, "loss": 0.04498807, "step": 10615 }, { "epoch": 21.232, "grad_norm": 1.2216445207595825, "learning_rate": 2e-05, "loss": 0.03705755, "step": 10616 }, { "epoch": 21.234, "grad_norm": 1.5470830202102661, "learning_rate": 2e-05, "loss": 0.03225365, "step": 10617 }, { "epoch": 21.236, "grad_norm": 2.094444990158081, "learning_rate": 2e-05, "loss": 0.04915155, "step": 10618 }, { "epoch": 21.238, "grad_norm": 1.5187581777572632, "learning_rate": 2e-05, "loss": 0.04350504, "step": 10619 }, { "epoch": 21.24, "grad_norm": 1.3666698932647705, "learning_rate": 2e-05, "loss": 0.05134146, "step": 10620 }, { "epoch": 21.242, "grad_norm": 0.9730545282363892, "learning_rate": 2e-05, "loss": 0.0246509, "step": 10621 }, { "epoch": 21.244, "grad_norm": 1.3245638608932495, "learning_rate": 2e-05, "loss": 0.03410356, "step": 10622 }, { "epoch": 21.246, "grad_norm": 1.0870567560195923, "learning_rate": 2e-05, "loss": 0.03783543, "step": 10623 }, { "epoch": 21.248, "grad_norm": 1.4121298789978027, "learning_rate": 2e-05, "loss": 0.0571961, "step": 10624 }, { "epoch": 21.25, "grad_norm": 1.5266447067260742, "learning_rate": 2e-05, "loss": 0.04245551, "step": 10625 }, { "epoch": 21.252, "grad_norm": 1.5656712055206299, "learning_rate": 2e-05, "loss": 0.04033789, "step": 10626 }, { "epoch": 21.254, "grad_norm": 1.1030497550964355, "learning_rate": 2e-05, "loss": 0.03589434, "step": 10627 }, { "epoch": 21.256, "grad_norm": 1.4850778579711914, "learning_rate": 2e-05, "loss": 0.03722943, "step": 10628 }, { "epoch": 21.258, "grad_norm": 1.0993595123291016, "learning_rate": 2e-05, "loss": 0.02807938, "step": 10629 }, { "epoch": 21.26, "grad_norm": 1.2334455251693726, "learning_rate": 2e-05, "loss": 0.03748228, "step": 10630 }, { "epoch": 21.262, "grad_norm": 1.5018088817596436, "learning_rate": 2e-05, "loss": 0.04651701, "step": 10631 }, { "epoch": 21.264, "grad_norm": 1.5770760774612427, "learning_rate": 2e-05, "loss": 0.03737814, "step": 10632 }, { "epoch": 21.266, "grad_norm": 1.3698883056640625, "learning_rate": 2e-05, "loss": 0.03702818, "step": 10633 }, { "epoch": 21.268, "grad_norm": 1.316947340965271, "learning_rate": 2e-05, "loss": 0.0374598, "step": 10634 }, { "epoch": 21.27, "grad_norm": 1.1345027685165405, "learning_rate": 2e-05, "loss": 0.03323581, "step": 10635 }, { "epoch": 21.272, "grad_norm": 1.6421146392822266, "learning_rate": 2e-05, "loss": 0.03895211, "step": 10636 }, { "epoch": 21.274, "grad_norm": 0.812617301940918, "learning_rate": 2e-05, "loss": 0.02046642, "step": 10637 }, { "epoch": 21.276, "grad_norm": 1.6946412324905396, "learning_rate": 2e-05, "loss": 0.03659395, "step": 10638 }, { "epoch": 21.278, "grad_norm": 2.288081645965576, "learning_rate": 2e-05, "loss": 0.05296513, "step": 10639 }, { "epoch": 21.28, "grad_norm": 1.681496500968933, "learning_rate": 2e-05, "loss": 0.03126854, "step": 10640 }, { "epoch": 21.282, "grad_norm": 3.044177532196045, "learning_rate": 2e-05, "loss": 0.0360253, "step": 10641 }, { "epoch": 21.284, "grad_norm": 1.5990822315216064, "learning_rate": 2e-05, "loss": 0.03093606, "step": 10642 }, { "epoch": 21.286, "grad_norm": 1.1280889511108398, "learning_rate": 2e-05, "loss": 0.0305376, "step": 10643 }, { "epoch": 21.288, "grad_norm": 0.7883005738258362, "learning_rate": 2e-05, "loss": 0.02066907, "step": 10644 }, { "epoch": 21.29, "grad_norm": 0.9310852885246277, "learning_rate": 2e-05, "loss": 0.02677006, "step": 10645 }, { "epoch": 21.292, "grad_norm": 1.0146135091781616, "learning_rate": 2e-05, "loss": 0.03050572, "step": 10646 }, { "epoch": 21.294, "grad_norm": 1.7839759588241577, "learning_rate": 2e-05, "loss": 0.03637395, "step": 10647 }, { "epoch": 21.296, "grad_norm": 1.1365673542022705, "learning_rate": 2e-05, "loss": 0.03653955, "step": 10648 }, { "epoch": 21.298, "grad_norm": 1.2921481132507324, "learning_rate": 2e-05, "loss": 0.04410624, "step": 10649 }, { "epoch": 21.3, "grad_norm": 1.155529499053955, "learning_rate": 2e-05, "loss": 0.03705712, "step": 10650 }, { "epoch": 21.302, "grad_norm": 1.44944167137146, "learning_rate": 2e-05, "loss": 0.04845714, "step": 10651 }, { "epoch": 21.304, "grad_norm": 1.276952862739563, "learning_rate": 2e-05, "loss": 0.04431798, "step": 10652 }, { "epoch": 21.306, "grad_norm": 1.0401476621627808, "learning_rate": 2e-05, "loss": 0.02794194, "step": 10653 }, { "epoch": 21.308, "grad_norm": 1.4874932765960693, "learning_rate": 2e-05, "loss": 0.04844416, "step": 10654 }, { "epoch": 21.31, "grad_norm": 1.2578763961791992, "learning_rate": 2e-05, "loss": 0.02141695, "step": 10655 }, { "epoch": 21.312, "grad_norm": 1.0979886054992676, "learning_rate": 2e-05, "loss": 0.03747806, "step": 10656 }, { "epoch": 21.314, "grad_norm": 1.5013819932937622, "learning_rate": 2e-05, "loss": 0.03525694, "step": 10657 }, { "epoch": 21.316, "grad_norm": 2.042410373687744, "learning_rate": 2e-05, "loss": 0.04237705, "step": 10658 }, { "epoch": 21.318, "grad_norm": 0.8237473964691162, "learning_rate": 2e-05, "loss": 0.02557302, "step": 10659 }, { "epoch": 21.32, "grad_norm": 1.2069071531295776, "learning_rate": 2e-05, "loss": 0.03492039, "step": 10660 }, { "epoch": 21.322, "grad_norm": 2.445582866668701, "learning_rate": 2e-05, "loss": 0.04500083, "step": 10661 }, { "epoch": 21.324, "grad_norm": 1.7285526990890503, "learning_rate": 2e-05, "loss": 0.0372141, "step": 10662 }, { "epoch": 21.326, "grad_norm": 1.2738267183303833, "learning_rate": 2e-05, "loss": 0.04208554, "step": 10663 }, { "epoch": 21.328, "grad_norm": 1.3018332719802856, "learning_rate": 2e-05, "loss": 0.02245201, "step": 10664 }, { "epoch": 21.33, "grad_norm": 1.3849666118621826, "learning_rate": 2e-05, "loss": 0.04083522, "step": 10665 }, { "epoch": 21.332, "grad_norm": 1.0650460720062256, "learning_rate": 2e-05, "loss": 0.03396599, "step": 10666 }, { "epoch": 21.334, "grad_norm": 1.0239198207855225, "learning_rate": 2e-05, "loss": 0.02632653, "step": 10667 }, { "epoch": 21.336, "grad_norm": 1.1510930061340332, "learning_rate": 2e-05, "loss": 0.03786356, "step": 10668 }, { "epoch": 21.338, "grad_norm": 1.134979486465454, "learning_rate": 2e-05, "loss": 0.03500597, "step": 10669 }, { "epoch": 21.34, "grad_norm": 1.7041795253753662, "learning_rate": 2e-05, "loss": 0.04310299, "step": 10670 }, { "epoch": 21.342, "grad_norm": 1.080082893371582, "learning_rate": 2e-05, "loss": 0.02889851, "step": 10671 }, { "epoch": 21.344, "grad_norm": 1.2668462991714478, "learning_rate": 2e-05, "loss": 0.06208187, "step": 10672 }, { "epoch": 21.346, "grad_norm": 0.9316691160202026, "learning_rate": 2e-05, "loss": 0.02679442, "step": 10673 }, { "epoch": 21.348, "grad_norm": 1.3193854093551636, "learning_rate": 2e-05, "loss": 0.03310707, "step": 10674 }, { "epoch": 21.35, "grad_norm": 1.5377140045166016, "learning_rate": 2e-05, "loss": 0.03435023, "step": 10675 }, { "epoch": 21.352, "grad_norm": 1.0346852540969849, "learning_rate": 2e-05, "loss": 0.02915202, "step": 10676 }, { "epoch": 21.354, "grad_norm": 1.0699576139450073, "learning_rate": 2e-05, "loss": 0.03234834, "step": 10677 }, { "epoch": 21.356, "grad_norm": 4.232156753540039, "learning_rate": 2e-05, "loss": 0.04847697, "step": 10678 }, { "epoch": 21.358, "grad_norm": 0.8950043320655823, "learning_rate": 2e-05, "loss": 0.02786786, "step": 10679 }, { "epoch": 21.36, "grad_norm": 1.1412599086761475, "learning_rate": 2e-05, "loss": 0.02532648, "step": 10680 }, { "epoch": 21.362, "grad_norm": 1.9713459014892578, "learning_rate": 2e-05, "loss": 0.04944416, "step": 10681 }, { "epoch": 21.364, "grad_norm": 1.2647299766540527, "learning_rate": 2e-05, "loss": 0.04223265, "step": 10682 }, { "epoch": 21.366, "grad_norm": 2.940858840942383, "learning_rate": 2e-05, "loss": 0.03530517, "step": 10683 }, { "epoch": 21.368, "grad_norm": 0.9534826874732971, "learning_rate": 2e-05, "loss": 0.02479281, "step": 10684 }, { "epoch": 21.37, "grad_norm": 1.3853439092636108, "learning_rate": 2e-05, "loss": 0.03295173, "step": 10685 }, { "epoch": 21.372, "grad_norm": 1.2453714609146118, "learning_rate": 2e-05, "loss": 0.02070951, "step": 10686 }, { "epoch": 21.374, "grad_norm": 1.1567037105560303, "learning_rate": 2e-05, "loss": 0.03087223, "step": 10687 }, { "epoch": 21.376, "grad_norm": 0.9362542629241943, "learning_rate": 2e-05, "loss": 0.01926921, "step": 10688 }, { "epoch": 21.378, "grad_norm": 0.9018503427505493, "learning_rate": 2e-05, "loss": 0.02120415, "step": 10689 }, { "epoch": 21.38, "grad_norm": 1.236549735069275, "learning_rate": 2e-05, "loss": 0.04043602, "step": 10690 }, { "epoch": 21.382, "grad_norm": 1.1474121809005737, "learning_rate": 2e-05, "loss": 0.03565197, "step": 10691 }, { "epoch": 21.384, "grad_norm": 1.0585695505142212, "learning_rate": 2e-05, "loss": 0.03018987, "step": 10692 }, { "epoch": 21.386, "grad_norm": 1.0328543186187744, "learning_rate": 2e-05, "loss": 0.03397262, "step": 10693 }, { "epoch": 21.388, "grad_norm": 1.446506381034851, "learning_rate": 2e-05, "loss": 0.02783697, "step": 10694 }, { "epoch": 21.39, "grad_norm": 1.6255260705947876, "learning_rate": 2e-05, "loss": 0.03847382, "step": 10695 }, { "epoch": 21.392, "grad_norm": 1.0737837553024292, "learning_rate": 2e-05, "loss": 0.02603565, "step": 10696 }, { "epoch": 21.394, "grad_norm": 1.392006754875183, "learning_rate": 2e-05, "loss": 0.03012003, "step": 10697 }, { "epoch": 21.396, "grad_norm": 2.5857255458831787, "learning_rate": 2e-05, "loss": 0.03107409, "step": 10698 }, { "epoch": 21.398, "grad_norm": 1.2561590671539307, "learning_rate": 2e-05, "loss": 0.03176247, "step": 10699 }, { "epoch": 21.4, "grad_norm": 1.0437184572219849, "learning_rate": 2e-05, "loss": 0.03001492, "step": 10700 }, { "epoch": 21.402, "grad_norm": 1.9724184274673462, "learning_rate": 2e-05, "loss": 0.02830181, "step": 10701 }, { "epoch": 21.404, "grad_norm": 1.181110143661499, "learning_rate": 2e-05, "loss": 0.0300355, "step": 10702 }, { "epoch": 21.406, "grad_norm": 2.5362696647644043, "learning_rate": 2e-05, "loss": 0.04556922, "step": 10703 }, { "epoch": 21.408, "grad_norm": 1.9228975772857666, "learning_rate": 2e-05, "loss": 0.0374077, "step": 10704 }, { "epoch": 21.41, "grad_norm": 1.4400721788406372, "learning_rate": 2e-05, "loss": 0.0367013, "step": 10705 }, { "epoch": 21.412, "grad_norm": 1.2807708978652954, "learning_rate": 2e-05, "loss": 0.045549, "step": 10706 }, { "epoch": 21.414, "grad_norm": 2.0344345569610596, "learning_rate": 2e-05, "loss": 0.04431529, "step": 10707 }, { "epoch": 21.416, "grad_norm": 3.9442005157470703, "learning_rate": 2e-05, "loss": 0.04201176, "step": 10708 }, { "epoch": 21.418, "grad_norm": 0.9622934460639954, "learning_rate": 2e-05, "loss": 0.0284987, "step": 10709 }, { "epoch": 21.42, "grad_norm": 1.0971020460128784, "learning_rate": 2e-05, "loss": 0.036263, "step": 10710 }, { "epoch": 21.422, "grad_norm": 0.9987945556640625, "learning_rate": 2e-05, "loss": 0.02871657, "step": 10711 }, { "epoch": 21.424, "grad_norm": 0.8595823645591736, "learning_rate": 2e-05, "loss": 0.02600442, "step": 10712 }, { "epoch": 21.426, "grad_norm": 0.8434016108512878, "learning_rate": 2e-05, "loss": 0.02094415, "step": 10713 }, { "epoch": 21.428, "grad_norm": 1.313782811164856, "learning_rate": 2e-05, "loss": 0.04309317, "step": 10714 }, { "epoch": 21.43, "grad_norm": 1.0133094787597656, "learning_rate": 2e-05, "loss": 0.03012585, "step": 10715 }, { "epoch": 21.432, "grad_norm": 1.157460331916809, "learning_rate": 2e-05, "loss": 0.03752935, "step": 10716 }, { "epoch": 21.434, "grad_norm": 1.142626166343689, "learning_rate": 2e-05, "loss": 0.02614125, "step": 10717 }, { "epoch": 21.436, "grad_norm": 1.3068962097167969, "learning_rate": 2e-05, "loss": 0.04685502, "step": 10718 }, { "epoch": 21.438, "grad_norm": 1.4647998809814453, "learning_rate": 2e-05, "loss": 0.04809128, "step": 10719 }, { "epoch": 21.44, "grad_norm": 0.9506215453147888, "learning_rate": 2e-05, "loss": 0.02731987, "step": 10720 }, { "epoch": 21.442, "grad_norm": 1.687732458114624, "learning_rate": 2e-05, "loss": 0.0411804, "step": 10721 }, { "epoch": 21.444, "grad_norm": 1.8168407678604126, "learning_rate": 2e-05, "loss": 0.04425385, "step": 10722 }, { "epoch": 21.446, "grad_norm": 1.2137858867645264, "learning_rate": 2e-05, "loss": 0.03381842, "step": 10723 }, { "epoch": 21.448, "grad_norm": 1.0868464708328247, "learning_rate": 2e-05, "loss": 0.0301272, "step": 10724 }, { "epoch": 21.45, "grad_norm": 2.293076753616333, "learning_rate": 2e-05, "loss": 0.05272197, "step": 10725 }, { "epoch": 21.452, "grad_norm": 1.2076009511947632, "learning_rate": 2e-05, "loss": 0.04387645, "step": 10726 }, { "epoch": 21.454, "grad_norm": 1.757698655128479, "learning_rate": 2e-05, "loss": 0.02590645, "step": 10727 }, { "epoch": 21.456, "grad_norm": 1.1657187938690186, "learning_rate": 2e-05, "loss": 0.03529991, "step": 10728 }, { "epoch": 21.458, "grad_norm": 1.177914023399353, "learning_rate": 2e-05, "loss": 0.03198649, "step": 10729 }, { "epoch": 21.46, "grad_norm": 2.1333088874816895, "learning_rate": 2e-05, "loss": 0.05930211, "step": 10730 }, { "epoch": 21.462, "grad_norm": 1.0726081132888794, "learning_rate": 2e-05, "loss": 0.02519402, "step": 10731 }, { "epoch": 21.464, "grad_norm": 1.8570704460144043, "learning_rate": 2e-05, "loss": 0.04541021, "step": 10732 }, { "epoch": 21.466, "grad_norm": 1.4362432956695557, "learning_rate": 2e-05, "loss": 0.03639318, "step": 10733 }, { "epoch": 21.468, "grad_norm": 1.3196659088134766, "learning_rate": 2e-05, "loss": 0.03501175, "step": 10734 }, { "epoch": 21.47, "grad_norm": 1.788077712059021, "learning_rate": 2e-05, "loss": 0.04476755, "step": 10735 }, { "epoch": 21.472, "grad_norm": 2.1154396533966064, "learning_rate": 2e-05, "loss": 0.04601542, "step": 10736 }, { "epoch": 21.474, "grad_norm": 0.9764267206192017, "learning_rate": 2e-05, "loss": 0.03156991, "step": 10737 }, { "epoch": 21.476, "grad_norm": 0.862126886844635, "learning_rate": 2e-05, "loss": 0.02627644, "step": 10738 }, { "epoch": 21.478, "grad_norm": 2.1665496826171875, "learning_rate": 2e-05, "loss": 0.03621161, "step": 10739 }, { "epoch": 21.48, "grad_norm": 0.9720172882080078, "learning_rate": 2e-05, "loss": 0.02424107, "step": 10740 }, { "epoch": 21.482, "grad_norm": 1.1352647542953491, "learning_rate": 2e-05, "loss": 0.0395835, "step": 10741 }, { "epoch": 21.484, "grad_norm": 1.2119773626327515, "learning_rate": 2e-05, "loss": 0.03099651, "step": 10742 }, { "epoch": 21.486, "grad_norm": 1.091362476348877, "learning_rate": 2e-05, "loss": 0.03427457, "step": 10743 }, { "epoch": 21.488, "grad_norm": 1.5212750434875488, "learning_rate": 2e-05, "loss": 0.03149851, "step": 10744 }, { "epoch": 21.49, "grad_norm": 2.698265552520752, "learning_rate": 2e-05, "loss": 0.03275203, "step": 10745 }, { "epoch": 21.492, "grad_norm": 0.9697499871253967, "learning_rate": 2e-05, "loss": 0.02803354, "step": 10746 }, { "epoch": 21.494, "grad_norm": 1.4407355785369873, "learning_rate": 2e-05, "loss": 0.04102314, "step": 10747 }, { "epoch": 21.496, "grad_norm": 0.9825798869132996, "learning_rate": 2e-05, "loss": 0.02902812, "step": 10748 }, { "epoch": 21.498, "grad_norm": 0.9173822999000549, "learning_rate": 2e-05, "loss": 0.02705132, "step": 10749 }, { "epoch": 21.5, "grad_norm": 1.5008710622787476, "learning_rate": 2e-05, "loss": 0.03890024, "step": 10750 }, { "epoch": 21.502, "grad_norm": 2.323859214782715, "learning_rate": 2e-05, "loss": 0.03871877, "step": 10751 }, { "epoch": 21.504, "grad_norm": 0.9519139528274536, "learning_rate": 2e-05, "loss": 0.02205279, "step": 10752 }, { "epoch": 21.506, "grad_norm": 1.150467038154602, "learning_rate": 2e-05, "loss": 0.03497121, "step": 10753 }, { "epoch": 21.508, "grad_norm": 1.5614557266235352, "learning_rate": 2e-05, "loss": 0.03725933, "step": 10754 }, { "epoch": 21.51, "grad_norm": 0.8480432629585266, "learning_rate": 2e-05, "loss": 0.02463213, "step": 10755 }, { "epoch": 21.512, "grad_norm": 1.0168507099151611, "learning_rate": 2e-05, "loss": 0.03348152, "step": 10756 }, { "epoch": 21.514, "grad_norm": 1.9600995779037476, "learning_rate": 2e-05, "loss": 0.04116196, "step": 10757 }, { "epoch": 21.516, "grad_norm": 2.4078519344329834, "learning_rate": 2e-05, "loss": 0.05193283, "step": 10758 }, { "epoch": 21.518, "grad_norm": 1.9011753797531128, "learning_rate": 2e-05, "loss": 0.04391605, "step": 10759 }, { "epoch": 21.52, "grad_norm": 1.0992064476013184, "learning_rate": 2e-05, "loss": 0.03505319, "step": 10760 }, { "epoch": 21.522, "grad_norm": 1.1341238021850586, "learning_rate": 2e-05, "loss": 0.04327236, "step": 10761 }, { "epoch": 21.524, "grad_norm": 0.8820840716362, "learning_rate": 2e-05, "loss": 0.02557, "step": 10762 }, { "epoch": 21.526, "grad_norm": 1.7110241651535034, "learning_rate": 2e-05, "loss": 0.02929918, "step": 10763 }, { "epoch": 21.528, "grad_norm": 2.175755262374878, "learning_rate": 2e-05, "loss": 0.04092379, "step": 10764 }, { "epoch": 21.53, "grad_norm": 0.9964827299118042, "learning_rate": 2e-05, "loss": 0.02928205, "step": 10765 }, { "epoch": 21.532, "grad_norm": 1.0591028928756714, "learning_rate": 2e-05, "loss": 0.03802252, "step": 10766 }, { "epoch": 21.534, "grad_norm": 1.0037879943847656, "learning_rate": 2e-05, "loss": 0.02184133, "step": 10767 }, { "epoch": 21.536, "grad_norm": 1.0294376611709595, "learning_rate": 2e-05, "loss": 0.03275803, "step": 10768 }, { "epoch": 21.538, "grad_norm": 1.2927500009536743, "learning_rate": 2e-05, "loss": 0.05052555, "step": 10769 }, { "epoch": 21.54, "grad_norm": 1.269002079963684, "learning_rate": 2e-05, "loss": 0.04041898, "step": 10770 }, { "epoch": 21.542, "grad_norm": 1.3406201601028442, "learning_rate": 2e-05, "loss": 0.0340461, "step": 10771 }, { "epoch": 21.544, "grad_norm": 1.2689399719238281, "learning_rate": 2e-05, "loss": 0.02636706, "step": 10772 }, { "epoch": 21.546, "grad_norm": 1.7005540132522583, "learning_rate": 2e-05, "loss": 0.03229015, "step": 10773 }, { "epoch": 21.548000000000002, "grad_norm": 1.281423568725586, "learning_rate": 2e-05, "loss": 0.03840701, "step": 10774 }, { "epoch": 21.55, "grad_norm": 1.9321891069412231, "learning_rate": 2e-05, "loss": 0.03856434, "step": 10775 }, { "epoch": 21.552, "grad_norm": 2.3012235164642334, "learning_rate": 2e-05, "loss": 0.04851568, "step": 10776 }, { "epoch": 21.554, "grad_norm": 0.8743276000022888, "learning_rate": 2e-05, "loss": 0.0196914, "step": 10777 }, { "epoch": 21.556, "grad_norm": 1.0524275302886963, "learning_rate": 2e-05, "loss": 0.038031, "step": 10778 }, { "epoch": 21.558, "grad_norm": 1.160980463027954, "learning_rate": 2e-05, "loss": 0.04083832, "step": 10779 }, { "epoch": 21.56, "grad_norm": 3.0021450519561768, "learning_rate": 2e-05, "loss": 0.03252212, "step": 10780 }, { "epoch": 21.562, "grad_norm": 2.0682501792907715, "learning_rate": 2e-05, "loss": 0.03913051, "step": 10781 }, { "epoch": 21.564, "grad_norm": 1.703480839729309, "learning_rate": 2e-05, "loss": 0.04668419, "step": 10782 }, { "epoch": 21.566, "grad_norm": 1.4658693075180054, "learning_rate": 2e-05, "loss": 0.03443877, "step": 10783 }, { "epoch": 21.568, "grad_norm": 1.1543418169021606, "learning_rate": 2e-05, "loss": 0.02747395, "step": 10784 }, { "epoch": 21.57, "grad_norm": 3.2979228496551514, "learning_rate": 2e-05, "loss": 0.05042007, "step": 10785 }, { "epoch": 21.572, "grad_norm": 1.8336671590805054, "learning_rate": 2e-05, "loss": 0.02918666, "step": 10786 }, { "epoch": 21.574, "grad_norm": 1.3803110122680664, "learning_rate": 2e-05, "loss": 0.03612125, "step": 10787 }, { "epoch": 21.576, "grad_norm": 1.217238426208496, "learning_rate": 2e-05, "loss": 0.03045898, "step": 10788 }, { "epoch": 21.578, "grad_norm": 0.8724953532218933, "learning_rate": 2e-05, "loss": 0.02683663, "step": 10789 }, { "epoch": 21.58, "grad_norm": 1.0224673748016357, "learning_rate": 2e-05, "loss": 0.03132909, "step": 10790 }, { "epoch": 21.582, "grad_norm": 1.0978913307189941, "learning_rate": 2e-05, "loss": 0.03923561, "step": 10791 }, { "epoch": 21.584, "grad_norm": 1.336134672164917, "learning_rate": 2e-05, "loss": 0.03779566, "step": 10792 }, { "epoch": 21.586, "grad_norm": 2.5332863330841064, "learning_rate": 2e-05, "loss": 0.04706411, "step": 10793 }, { "epoch": 21.588, "grad_norm": 0.9782661199569702, "learning_rate": 2e-05, "loss": 0.02816963, "step": 10794 }, { "epoch": 21.59, "grad_norm": 0.963988184928894, "learning_rate": 2e-05, "loss": 0.03094437, "step": 10795 }, { "epoch": 21.592, "grad_norm": 2.6186649799346924, "learning_rate": 2e-05, "loss": 0.02780627, "step": 10796 }, { "epoch": 21.594, "grad_norm": 1.1740295886993408, "learning_rate": 2e-05, "loss": 0.03014258, "step": 10797 }, { "epoch": 21.596, "grad_norm": 1.2682572603225708, "learning_rate": 2e-05, "loss": 0.03773146, "step": 10798 }, { "epoch": 21.598, "grad_norm": 1.0841542482376099, "learning_rate": 2e-05, "loss": 0.02988249, "step": 10799 }, { "epoch": 21.6, "grad_norm": 1.3508737087249756, "learning_rate": 2e-05, "loss": 0.0372295, "step": 10800 }, { "epoch": 21.602, "grad_norm": 1.229504942893982, "learning_rate": 2e-05, "loss": 0.03721376, "step": 10801 }, { "epoch": 21.604, "grad_norm": 1.0140235424041748, "learning_rate": 2e-05, "loss": 0.02006028, "step": 10802 }, { "epoch": 21.606, "grad_norm": 2.93935489654541, "learning_rate": 2e-05, "loss": 0.04623206, "step": 10803 }, { "epoch": 21.608, "grad_norm": 1.6117465496063232, "learning_rate": 2e-05, "loss": 0.02327195, "step": 10804 }, { "epoch": 21.61, "grad_norm": 1.7175722122192383, "learning_rate": 2e-05, "loss": 0.03024486, "step": 10805 }, { "epoch": 21.612, "grad_norm": 2.5179755687713623, "learning_rate": 2e-05, "loss": 0.03515901, "step": 10806 }, { "epoch": 21.614, "grad_norm": 1.4241502285003662, "learning_rate": 2e-05, "loss": 0.03859766, "step": 10807 }, { "epoch": 21.616, "grad_norm": 2.0011253356933594, "learning_rate": 2e-05, "loss": 0.02655087, "step": 10808 }, { "epoch": 21.618, "grad_norm": 1.9893662929534912, "learning_rate": 2e-05, "loss": 0.03004796, "step": 10809 }, { "epoch": 21.62, "grad_norm": 1.3463841676712036, "learning_rate": 2e-05, "loss": 0.02972662, "step": 10810 }, { "epoch": 21.622, "grad_norm": 1.1787477731704712, "learning_rate": 2e-05, "loss": 0.0288261, "step": 10811 }, { "epoch": 21.624, "grad_norm": 1.237833857536316, "learning_rate": 2e-05, "loss": 0.03204871, "step": 10812 }, { "epoch": 21.626, "grad_norm": 3.5159149169921875, "learning_rate": 2e-05, "loss": 0.04790648, "step": 10813 }, { "epoch": 21.628, "grad_norm": 0.845460057258606, "learning_rate": 2e-05, "loss": 0.02701693, "step": 10814 }, { "epoch": 21.63, "grad_norm": 1.7467832565307617, "learning_rate": 2e-05, "loss": 0.02598082, "step": 10815 }, { "epoch": 21.632, "grad_norm": 2.1213173866271973, "learning_rate": 2e-05, "loss": 0.04864348, "step": 10816 }, { "epoch": 21.634, "grad_norm": 1.6620383262634277, "learning_rate": 2e-05, "loss": 0.05002079, "step": 10817 }, { "epoch": 21.636, "grad_norm": 1.2031606435775757, "learning_rate": 2e-05, "loss": 0.02948435, "step": 10818 }, { "epoch": 21.638, "grad_norm": 3.743027687072754, "learning_rate": 2e-05, "loss": 0.05274049, "step": 10819 }, { "epoch": 21.64, "grad_norm": 2.317620038986206, "learning_rate": 2e-05, "loss": 0.03110951, "step": 10820 }, { "epoch": 21.642, "grad_norm": 1.2688599824905396, "learning_rate": 2e-05, "loss": 0.03901623, "step": 10821 }, { "epoch": 21.644, "grad_norm": 1.1904910802841187, "learning_rate": 2e-05, "loss": 0.03599124, "step": 10822 }, { "epoch": 21.646, "grad_norm": 1.423413634300232, "learning_rate": 2e-05, "loss": 0.04643358, "step": 10823 }, { "epoch": 21.648, "grad_norm": 1.1683127880096436, "learning_rate": 2e-05, "loss": 0.03427366, "step": 10824 }, { "epoch": 21.65, "grad_norm": 0.9204855561256409, "learning_rate": 2e-05, "loss": 0.03122223, "step": 10825 }, { "epoch": 21.652, "grad_norm": 1.1113882064819336, "learning_rate": 2e-05, "loss": 0.0381987, "step": 10826 }, { "epoch": 21.654, "grad_norm": 1.2636103630065918, "learning_rate": 2e-05, "loss": 0.03268664, "step": 10827 }, { "epoch": 21.656, "grad_norm": 0.9222037196159363, "learning_rate": 2e-05, "loss": 0.03147195, "step": 10828 }, { "epoch": 21.658, "grad_norm": 1.5140608549118042, "learning_rate": 2e-05, "loss": 0.03292232, "step": 10829 }, { "epoch": 21.66, "grad_norm": 1.063954472541809, "learning_rate": 2e-05, "loss": 0.03509793, "step": 10830 }, { "epoch": 21.662, "grad_norm": 1.110836148262024, "learning_rate": 2e-05, "loss": 0.03393824, "step": 10831 }, { "epoch": 21.664, "grad_norm": 0.9153269529342651, "learning_rate": 2e-05, "loss": 0.02850673, "step": 10832 }, { "epoch": 21.666, "grad_norm": 1.3513257503509521, "learning_rate": 2e-05, "loss": 0.03400831, "step": 10833 }, { "epoch": 21.668, "grad_norm": 0.828287661075592, "learning_rate": 2e-05, "loss": 0.02091564, "step": 10834 }, { "epoch": 21.67, "grad_norm": 1.5664379596710205, "learning_rate": 2e-05, "loss": 0.03538562, "step": 10835 }, { "epoch": 21.672, "grad_norm": 2.088440179824829, "learning_rate": 2e-05, "loss": 0.03902008, "step": 10836 }, { "epoch": 21.674, "grad_norm": 0.9757646322250366, "learning_rate": 2e-05, "loss": 0.03331909, "step": 10837 }, { "epoch": 21.676, "grad_norm": 1.3544541597366333, "learning_rate": 2e-05, "loss": 0.03964958, "step": 10838 }, { "epoch": 21.678, "grad_norm": 1.8273234367370605, "learning_rate": 2e-05, "loss": 0.03560327, "step": 10839 }, { "epoch": 21.68, "grad_norm": 0.8851908445358276, "learning_rate": 2e-05, "loss": 0.02724171, "step": 10840 }, { "epoch": 21.682, "grad_norm": 1.3723584413528442, "learning_rate": 2e-05, "loss": 0.03260377, "step": 10841 }, { "epoch": 21.684, "grad_norm": 1.7293577194213867, "learning_rate": 2e-05, "loss": 0.04087131, "step": 10842 }, { "epoch": 21.686, "grad_norm": 1.4376215934753418, "learning_rate": 2e-05, "loss": 0.03094893, "step": 10843 }, { "epoch": 21.688, "grad_norm": 2.080545663833618, "learning_rate": 2e-05, "loss": 0.04254334, "step": 10844 }, { "epoch": 21.69, "grad_norm": 1.221475601196289, "learning_rate": 2e-05, "loss": 0.042444, "step": 10845 }, { "epoch": 21.692, "grad_norm": 1.0191986560821533, "learning_rate": 2e-05, "loss": 0.03012396, "step": 10846 }, { "epoch": 21.694, "grad_norm": 1.5199073553085327, "learning_rate": 2e-05, "loss": 0.04867528, "step": 10847 }, { "epoch": 21.696, "grad_norm": 1.0538051128387451, "learning_rate": 2e-05, "loss": 0.02168122, "step": 10848 }, { "epoch": 21.698, "grad_norm": 1.2353911399841309, "learning_rate": 2e-05, "loss": 0.04300802, "step": 10849 }, { "epoch": 21.7, "grad_norm": 1.1052453517913818, "learning_rate": 2e-05, "loss": 0.01482047, "step": 10850 }, { "epoch": 21.701999999999998, "grad_norm": 1.7022722959518433, "learning_rate": 2e-05, "loss": 0.04935133, "step": 10851 }, { "epoch": 21.704, "grad_norm": 1.097477674484253, "learning_rate": 2e-05, "loss": 0.03660508, "step": 10852 }, { "epoch": 21.706, "grad_norm": 2.1557564735412598, "learning_rate": 2e-05, "loss": 0.03016577, "step": 10853 }, { "epoch": 21.708, "grad_norm": 2.0594379901885986, "learning_rate": 2e-05, "loss": 0.02772827, "step": 10854 }, { "epoch": 21.71, "grad_norm": 1.2176257371902466, "learning_rate": 2e-05, "loss": 0.04331783, "step": 10855 }, { "epoch": 21.712, "grad_norm": 1.0477436780929565, "learning_rate": 2e-05, "loss": 0.03022527, "step": 10856 }, { "epoch": 21.714, "grad_norm": 0.903574526309967, "learning_rate": 2e-05, "loss": 0.02697276, "step": 10857 }, { "epoch": 21.716, "grad_norm": 1.0498381853103638, "learning_rate": 2e-05, "loss": 0.03695409, "step": 10858 }, { "epoch": 21.718, "grad_norm": 1.2936769723892212, "learning_rate": 2e-05, "loss": 0.02531984, "step": 10859 }, { "epoch": 21.72, "grad_norm": 1.3947609663009644, "learning_rate": 2e-05, "loss": 0.04383749, "step": 10860 }, { "epoch": 21.722, "grad_norm": 1.824804425239563, "learning_rate": 2e-05, "loss": 0.05057254, "step": 10861 }, { "epoch": 21.724, "grad_norm": 0.9313336610794067, "learning_rate": 2e-05, "loss": 0.02528257, "step": 10862 }, { "epoch": 21.726, "grad_norm": 1.0199240446090698, "learning_rate": 2e-05, "loss": 0.02904977, "step": 10863 }, { "epoch": 21.728, "grad_norm": 1.1817435026168823, "learning_rate": 2e-05, "loss": 0.04280265, "step": 10864 }, { "epoch": 21.73, "grad_norm": 1.3964251279830933, "learning_rate": 2e-05, "loss": 0.03219439, "step": 10865 }, { "epoch": 21.732, "grad_norm": 1.4096508026123047, "learning_rate": 2e-05, "loss": 0.03554904, "step": 10866 }, { "epoch": 21.734, "grad_norm": 1.8143075704574585, "learning_rate": 2e-05, "loss": 0.03615258, "step": 10867 }, { "epoch": 21.736, "grad_norm": 0.7444595694541931, "learning_rate": 2e-05, "loss": 0.01764659, "step": 10868 }, { "epoch": 21.738, "grad_norm": 1.0963743925094604, "learning_rate": 2e-05, "loss": 0.03289765, "step": 10869 }, { "epoch": 21.74, "grad_norm": 1.7623260021209717, "learning_rate": 2e-05, "loss": 0.03723545, "step": 10870 }, { "epoch": 21.742, "grad_norm": 1.765259861946106, "learning_rate": 2e-05, "loss": 0.03898823, "step": 10871 }, { "epoch": 21.744, "grad_norm": 1.039548635482788, "learning_rate": 2e-05, "loss": 0.02444161, "step": 10872 }, { "epoch": 21.746, "grad_norm": 1.2688932418823242, "learning_rate": 2e-05, "loss": 0.0317545, "step": 10873 }, { "epoch": 21.748, "grad_norm": 1.3791906833648682, "learning_rate": 2e-05, "loss": 0.03312441, "step": 10874 }, { "epoch": 21.75, "grad_norm": 0.9484188556671143, "learning_rate": 2e-05, "loss": 0.0266212, "step": 10875 }, { "epoch": 21.752, "grad_norm": 0.8790581226348877, "learning_rate": 2e-05, "loss": 0.0297839, "step": 10876 }, { "epoch": 21.754, "grad_norm": 1.943943738937378, "learning_rate": 2e-05, "loss": 0.04509932, "step": 10877 }, { "epoch": 21.756, "grad_norm": 1.0038764476776123, "learning_rate": 2e-05, "loss": 0.03279506, "step": 10878 }, { "epoch": 21.758, "grad_norm": 1.6507289409637451, "learning_rate": 2e-05, "loss": 0.04125448, "step": 10879 }, { "epoch": 21.76, "grad_norm": 1.6259219646453857, "learning_rate": 2e-05, "loss": 0.04601936, "step": 10880 }, { "epoch": 21.762, "grad_norm": 1.209537386894226, "learning_rate": 2e-05, "loss": 0.03919362, "step": 10881 }, { "epoch": 21.764, "grad_norm": 1.0259144306182861, "learning_rate": 2e-05, "loss": 0.03256726, "step": 10882 }, { "epoch": 21.766, "grad_norm": 1.5114238262176514, "learning_rate": 2e-05, "loss": 0.04673877, "step": 10883 }, { "epoch": 21.768, "grad_norm": 1.0645627975463867, "learning_rate": 2e-05, "loss": 0.03577953, "step": 10884 }, { "epoch": 21.77, "grad_norm": 2.1890196800231934, "learning_rate": 2e-05, "loss": 0.0552833, "step": 10885 }, { "epoch": 21.772, "grad_norm": 1.5644853115081787, "learning_rate": 2e-05, "loss": 0.05350343, "step": 10886 }, { "epoch": 21.774, "grad_norm": 0.8159758448600769, "learning_rate": 2e-05, "loss": 0.0221364, "step": 10887 }, { "epoch": 21.776, "grad_norm": 1.310706615447998, "learning_rate": 2e-05, "loss": 0.02867308, "step": 10888 }, { "epoch": 21.778, "grad_norm": 1.3240361213684082, "learning_rate": 2e-05, "loss": 0.04336674, "step": 10889 }, { "epoch": 21.78, "grad_norm": 1.7382214069366455, "learning_rate": 2e-05, "loss": 0.04056187, "step": 10890 }, { "epoch": 21.782, "grad_norm": 1.1201266050338745, "learning_rate": 2e-05, "loss": 0.0285767, "step": 10891 }, { "epoch": 21.784, "grad_norm": 1.0340373516082764, "learning_rate": 2e-05, "loss": 0.0394372, "step": 10892 }, { "epoch": 21.786, "grad_norm": 1.3631528615951538, "learning_rate": 2e-05, "loss": 0.04386802, "step": 10893 }, { "epoch": 21.788, "grad_norm": 1.7087925672531128, "learning_rate": 2e-05, "loss": 0.05740018, "step": 10894 }, { "epoch": 21.79, "grad_norm": 1.2946619987487793, "learning_rate": 2e-05, "loss": 0.04351215, "step": 10895 }, { "epoch": 21.792, "grad_norm": 1.9575139284133911, "learning_rate": 2e-05, "loss": 0.04059194, "step": 10896 }, { "epoch": 21.794, "grad_norm": 1.6252760887145996, "learning_rate": 2e-05, "loss": 0.03451142, "step": 10897 }, { "epoch": 21.796, "grad_norm": 1.5331840515136719, "learning_rate": 2e-05, "loss": 0.03251875, "step": 10898 }, { "epoch": 21.798000000000002, "grad_norm": 1.0864592790603638, "learning_rate": 2e-05, "loss": 0.03452892, "step": 10899 }, { "epoch": 21.8, "grad_norm": 0.8250343799591064, "learning_rate": 2e-05, "loss": 0.02099156, "step": 10900 }, { "epoch": 21.802, "grad_norm": 1.166359782218933, "learning_rate": 2e-05, "loss": 0.04394403, "step": 10901 }, { "epoch": 21.804, "grad_norm": 1.5257127285003662, "learning_rate": 2e-05, "loss": 0.03211277, "step": 10902 }, { "epoch": 21.806, "grad_norm": 1.0145310163497925, "learning_rate": 2e-05, "loss": 0.02621819, "step": 10903 }, { "epoch": 21.808, "grad_norm": 2.11439847946167, "learning_rate": 2e-05, "loss": 0.04509841, "step": 10904 }, { "epoch": 21.81, "grad_norm": 1.0672552585601807, "learning_rate": 2e-05, "loss": 0.03072799, "step": 10905 }, { "epoch": 21.812, "grad_norm": 1.1568739414215088, "learning_rate": 2e-05, "loss": 0.04023495, "step": 10906 }, { "epoch": 21.814, "grad_norm": 1.9787969589233398, "learning_rate": 2e-05, "loss": 0.04461158, "step": 10907 }, { "epoch": 21.816, "grad_norm": 1.268618106842041, "learning_rate": 2e-05, "loss": 0.04039661, "step": 10908 }, { "epoch": 21.818, "grad_norm": 1.030121088027954, "learning_rate": 2e-05, "loss": 0.03256563, "step": 10909 }, { "epoch": 21.82, "grad_norm": 1.3051402568817139, "learning_rate": 2e-05, "loss": 0.03315262, "step": 10910 }, { "epoch": 21.822, "grad_norm": 1.599260687828064, "learning_rate": 2e-05, "loss": 0.05178364, "step": 10911 }, { "epoch": 21.824, "grad_norm": 1.3491737842559814, "learning_rate": 2e-05, "loss": 0.03285315, "step": 10912 }, { "epoch": 21.826, "grad_norm": 0.9857081770896912, "learning_rate": 2e-05, "loss": 0.02820598, "step": 10913 }, { "epoch": 21.828, "grad_norm": 1.2068244218826294, "learning_rate": 2e-05, "loss": 0.02220411, "step": 10914 }, { "epoch": 21.83, "grad_norm": 1.127752661705017, "learning_rate": 2e-05, "loss": 0.03646821, "step": 10915 }, { "epoch": 21.832, "grad_norm": 1.1618894338607788, "learning_rate": 2e-05, "loss": 0.03601151, "step": 10916 }, { "epoch": 21.834, "grad_norm": 2.112398624420166, "learning_rate": 2e-05, "loss": 0.03499029, "step": 10917 }, { "epoch": 21.836, "grad_norm": 1.555198073387146, "learning_rate": 2e-05, "loss": 0.04573829, "step": 10918 }, { "epoch": 21.838, "grad_norm": 0.9329383373260498, "learning_rate": 2e-05, "loss": 0.03361698, "step": 10919 }, { "epoch": 21.84, "grad_norm": 1.7770673036575317, "learning_rate": 2e-05, "loss": 0.03091755, "step": 10920 }, { "epoch": 21.842, "grad_norm": 1.157621145248413, "learning_rate": 2e-05, "loss": 0.03168878, "step": 10921 }, { "epoch": 21.844, "grad_norm": 1.2026602029800415, "learning_rate": 2e-05, "loss": 0.02261108, "step": 10922 }, { "epoch": 21.846, "grad_norm": 1.4362621307373047, "learning_rate": 2e-05, "loss": 0.02808312, "step": 10923 }, { "epoch": 21.848, "grad_norm": 1.054166555404663, "learning_rate": 2e-05, "loss": 0.03865466, "step": 10924 }, { "epoch": 21.85, "grad_norm": 1.0605528354644775, "learning_rate": 2e-05, "loss": 0.03541258, "step": 10925 }, { "epoch": 21.852, "grad_norm": 1.4870340824127197, "learning_rate": 2e-05, "loss": 0.04259035, "step": 10926 }, { "epoch": 21.854, "grad_norm": 1.6365232467651367, "learning_rate": 2e-05, "loss": 0.03734278, "step": 10927 }, { "epoch": 21.856, "grad_norm": 0.7869289517402649, "learning_rate": 2e-05, "loss": 0.02219597, "step": 10928 }, { "epoch": 21.858, "grad_norm": 1.1174979209899902, "learning_rate": 2e-05, "loss": 0.03317832, "step": 10929 }, { "epoch": 21.86, "grad_norm": 1.7156922817230225, "learning_rate": 2e-05, "loss": 0.03578085, "step": 10930 }, { "epoch": 21.862, "grad_norm": 1.5693000555038452, "learning_rate": 2e-05, "loss": 0.03383234, "step": 10931 }, { "epoch": 21.864, "grad_norm": 2.124469041824341, "learning_rate": 2e-05, "loss": 0.029794, "step": 10932 }, { "epoch": 21.866, "grad_norm": 1.151176929473877, "learning_rate": 2e-05, "loss": 0.04017279, "step": 10933 }, { "epoch": 21.868, "grad_norm": 0.8996794819831848, "learning_rate": 2e-05, "loss": 0.02869819, "step": 10934 }, { "epoch": 21.87, "grad_norm": 0.9696109294891357, "learning_rate": 2e-05, "loss": 0.032364, "step": 10935 }, { "epoch": 21.872, "grad_norm": 1.4245364665985107, "learning_rate": 2e-05, "loss": 0.03265813, "step": 10936 }, { "epoch": 21.874, "grad_norm": 1.5670595169067383, "learning_rate": 2e-05, "loss": 0.04186102, "step": 10937 }, { "epoch": 21.876, "grad_norm": 1.4556249380111694, "learning_rate": 2e-05, "loss": 0.04338999, "step": 10938 }, { "epoch": 21.878, "grad_norm": 1.3597520589828491, "learning_rate": 2e-05, "loss": 0.03609984, "step": 10939 }, { "epoch": 21.88, "grad_norm": 1.7528231143951416, "learning_rate": 2e-05, "loss": 0.03558186, "step": 10940 }, { "epoch": 21.882, "grad_norm": 1.0914018154144287, "learning_rate": 2e-05, "loss": 0.0334373, "step": 10941 }, { "epoch": 21.884, "grad_norm": 1.0560919046401978, "learning_rate": 2e-05, "loss": 0.02701947, "step": 10942 }, { "epoch": 21.886, "grad_norm": 1.0461790561676025, "learning_rate": 2e-05, "loss": 0.02477691, "step": 10943 }, { "epoch": 21.888, "grad_norm": 1.0176409482955933, "learning_rate": 2e-05, "loss": 0.02632477, "step": 10944 }, { "epoch": 21.89, "grad_norm": 1.188032627105713, "learning_rate": 2e-05, "loss": 0.03136832, "step": 10945 }, { "epoch": 21.892, "grad_norm": 1.8192259073257446, "learning_rate": 2e-05, "loss": 0.03940712, "step": 10946 }, { "epoch": 21.894, "grad_norm": 1.1689777374267578, "learning_rate": 2e-05, "loss": 0.04128121, "step": 10947 }, { "epoch": 21.896, "grad_norm": 0.9614786505699158, "learning_rate": 2e-05, "loss": 0.03097624, "step": 10948 }, { "epoch": 21.898, "grad_norm": 2.1277706623077393, "learning_rate": 2e-05, "loss": 0.04416885, "step": 10949 }, { "epoch": 21.9, "grad_norm": 1.0847748517990112, "learning_rate": 2e-05, "loss": 0.047647, "step": 10950 }, { "epoch": 21.902, "grad_norm": 1.4761464595794678, "learning_rate": 2e-05, "loss": 0.03700073, "step": 10951 }, { "epoch": 21.904, "grad_norm": 1.335006833076477, "learning_rate": 2e-05, "loss": 0.02501204, "step": 10952 }, { "epoch": 21.906, "grad_norm": 1.1310573816299438, "learning_rate": 2e-05, "loss": 0.03214348, "step": 10953 }, { "epoch": 21.908, "grad_norm": 2.758718490600586, "learning_rate": 2e-05, "loss": 0.0428556, "step": 10954 }, { "epoch": 21.91, "grad_norm": 1.922041654586792, "learning_rate": 2e-05, "loss": 0.04771857, "step": 10955 }, { "epoch": 21.912, "grad_norm": 1.7065603733062744, "learning_rate": 2e-05, "loss": 0.0471417, "step": 10956 }, { "epoch": 21.914, "grad_norm": 1.5573147535324097, "learning_rate": 2e-05, "loss": 0.03855333, "step": 10957 }, { "epoch": 21.916, "grad_norm": 1.1030230522155762, "learning_rate": 2e-05, "loss": 0.03686146, "step": 10958 }, { "epoch": 21.918, "grad_norm": 1.0365067720413208, "learning_rate": 2e-05, "loss": 0.02389233, "step": 10959 }, { "epoch": 21.92, "grad_norm": 1.0167555809020996, "learning_rate": 2e-05, "loss": 0.03511351, "step": 10960 }, { "epoch": 21.922, "grad_norm": 1.0623586177825928, "learning_rate": 2e-05, "loss": 0.0339056, "step": 10961 }, { "epoch": 21.924, "grad_norm": 2.31209397315979, "learning_rate": 2e-05, "loss": 0.04047135, "step": 10962 }, { "epoch": 21.926, "grad_norm": 1.1770862340927124, "learning_rate": 2e-05, "loss": 0.04121822, "step": 10963 }, { "epoch": 21.928, "grad_norm": 0.8528270125389099, "learning_rate": 2e-05, "loss": 0.02435702, "step": 10964 }, { "epoch": 21.93, "grad_norm": 1.0807077884674072, "learning_rate": 2e-05, "loss": 0.03478127, "step": 10965 }, { "epoch": 21.932, "grad_norm": 1.1846781969070435, "learning_rate": 2e-05, "loss": 0.04354676, "step": 10966 }, { "epoch": 21.934, "grad_norm": 1.334719181060791, "learning_rate": 2e-05, "loss": 0.02577735, "step": 10967 }, { "epoch": 21.936, "grad_norm": 1.0082858800888062, "learning_rate": 2e-05, "loss": 0.0299839, "step": 10968 }, { "epoch": 21.938, "grad_norm": 1.5717273950576782, "learning_rate": 2e-05, "loss": 0.02820227, "step": 10969 }, { "epoch": 21.94, "grad_norm": 1.0491491556167603, "learning_rate": 2e-05, "loss": 0.02673104, "step": 10970 }, { "epoch": 21.942, "grad_norm": 1.0635792016983032, "learning_rate": 2e-05, "loss": 0.02951121, "step": 10971 }, { "epoch": 21.944, "grad_norm": 1.3966928720474243, "learning_rate": 2e-05, "loss": 0.04850317, "step": 10972 }, { "epoch": 21.946, "grad_norm": 1.1007291078567505, "learning_rate": 2e-05, "loss": 0.02945408, "step": 10973 }, { "epoch": 21.948, "grad_norm": 0.8906104564666748, "learning_rate": 2e-05, "loss": 0.03067725, "step": 10974 }, { "epoch": 21.95, "grad_norm": 1.0863364934921265, "learning_rate": 2e-05, "loss": 0.02871832, "step": 10975 }, { "epoch": 21.951999999999998, "grad_norm": 1.6591525077819824, "learning_rate": 2e-05, "loss": 0.04460109, "step": 10976 }, { "epoch": 21.954, "grad_norm": 1.431624174118042, "learning_rate": 2e-05, "loss": 0.03387917, "step": 10977 }, { "epoch": 21.956, "grad_norm": 1.215907335281372, "learning_rate": 2e-05, "loss": 0.05533048, "step": 10978 }, { "epoch": 21.958, "grad_norm": 1.0936074256896973, "learning_rate": 2e-05, "loss": 0.02711234, "step": 10979 }, { "epoch": 21.96, "grad_norm": 1.123211145401001, "learning_rate": 2e-05, "loss": 0.04708305, "step": 10980 }, { "epoch": 21.962, "grad_norm": 1.28691828250885, "learning_rate": 2e-05, "loss": 0.03268384, "step": 10981 }, { "epoch": 21.964, "grad_norm": 1.013840913772583, "learning_rate": 2e-05, "loss": 0.03659943, "step": 10982 }, { "epoch": 21.966, "grad_norm": 1.2497045993804932, "learning_rate": 2e-05, "loss": 0.05112303, "step": 10983 }, { "epoch": 21.968, "grad_norm": 0.9134369492530823, "learning_rate": 2e-05, "loss": 0.02654456, "step": 10984 }, { "epoch": 21.97, "grad_norm": 1.2738003730773926, "learning_rate": 2e-05, "loss": 0.03234825, "step": 10985 }, { "epoch": 21.972, "grad_norm": 0.8131256699562073, "learning_rate": 2e-05, "loss": 0.0200008, "step": 10986 }, { "epoch": 21.974, "grad_norm": 1.4570480585098267, "learning_rate": 2e-05, "loss": 0.02533815, "step": 10987 }, { "epoch": 21.976, "grad_norm": 0.9933975338935852, "learning_rate": 2e-05, "loss": 0.03682224, "step": 10988 }, { "epoch": 21.978, "grad_norm": 2.5660200119018555, "learning_rate": 2e-05, "loss": 0.04893076, "step": 10989 }, { "epoch": 21.98, "grad_norm": 1.247011661529541, "learning_rate": 2e-05, "loss": 0.03728838, "step": 10990 }, { "epoch": 21.982, "grad_norm": 2.838491201400757, "learning_rate": 2e-05, "loss": 0.04673835, "step": 10991 }, { "epoch": 21.984, "grad_norm": 1.4233250617980957, "learning_rate": 2e-05, "loss": 0.04645887, "step": 10992 }, { "epoch": 21.986, "grad_norm": 1.8115314245224, "learning_rate": 2e-05, "loss": 0.03267002, "step": 10993 }, { "epoch": 21.988, "grad_norm": 0.8721919655799866, "learning_rate": 2e-05, "loss": 0.03281524, "step": 10994 }, { "epoch": 21.99, "grad_norm": 1.70564866065979, "learning_rate": 2e-05, "loss": 0.04003553, "step": 10995 }, { "epoch": 21.992, "grad_norm": 0.9615834951400757, "learning_rate": 2e-05, "loss": 0.03332902, "step": 10996 }, { "epoch": 21.994, "grad_norm": 1.3987194299697876, "learning_rate": 2e-05, "loss": 0.03447945, "step": 10997 }, { "epoch": 21.996, "grad_norm": 0.9507851600646973, "learning_rate": 2e-05, "loss": 0.02853581, "step": 10998 }, { "epoch": 21.998, "grad_norm": 0.9905051589012146, "learning_rate": 2e-05, "loss": 0.02947031, "step": 10999 }, { "epoch": 22.0, "grad_norm": 1.9134247303009033, "learning_rate": 2e-05, "loss": 0.03030916, "step": 11000 }, { "epoch": 22.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9760479041916168, "Equal_1": 0.998, "Equal_2": 0.9800399201596807, "Equal_3": 0.9740518962075848, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.988, "Perpendicular_3": 0.8296593186372746, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9956666666666667, "PointLiesOnCircle_3": 0.988, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 319.6303, "eval_samples_per_second": 32.85, "eval_steps_per_second": 0.657, "step": 11000 }, { "epoch": 22.002, "grad_norm": 1.5013846158981323, "learning_rate": 2e-05, "loss": 0.04865558, "step": 11001 }, { "epoch": 22.004, "grad_norm": 0.9657471179962158, "learning_rate": 2e-05, "loss": 0.03410444, "step": 11002 }, { "epoch": 22.006, "grad_norm": 1.055569052696228, "learning_rate": 2e-05, "loss": 0.03405615, "step": 11003 }, { "epoch": 22.008, "grad_norm": 1.843996524810791, "learning_rate": 2e-05, "loss": 0.05212854, "step": 11004 }, { "epoch": 22.01, "grad_norm": 1.2965750694274902, "learning_rate": 2e-05, "loss": 0.04010913, "step": 11005 }, { "epoch": 22.012, "grad_norm": 0.9425175189971924, "learning_rate": 2e-05, "loss": 0.02818494, "step": 11006 }, { "epoch": 22.014, "grad_norm": 1.5576436519622803, "learning_rate": 2e-05, "loss": 0.05102572, "step": 11007 }, { "epoch": 22.016, "grad_norm": 0.9978459477424622, "learning_rate": 2e-05, "loss": 0.03394839, "step": 11008 }, { "epoch": 22.018, "grad_norm": 1.1211631298065186, "learning_rate": 2e-05, "loss": 0.04935118, "step": 11009 }, { "epoch": 22.02, "grad_norm": 2.0036747455596924, "learning_rate": 2e-05, "loss": 0.04285323, "step": 11010 }, { "epoch": 22.022, "grad_norm": 1.1582741737365723, "learning_rate": 2e-05, "loss": 0.04079919, "step": 11011 }, { "epoch": 22.024, "grad_norm": 1.5606409311294556, "learning_rate": 2e-05, "loss": 0.03285683, "step": 11012 }, { "epoch": 22.026, "grad_norm": 2.8261466026306152, "learning_rate": 2e-05, "loss": 0.04419506, "step": 11013 }, { "epoch": 22.028, "grad_norm": 1.250126838684082, "learning_rate": 2e-05, "loss": 0.03550693, "step": 11014 }, { "epoch": 22.03, "grad_norm": 2.362985372543335, "learning_rate": 2e-05, "loss": 0.03660453, "step": 11015 }, { "epoch": 22.032, "grad_norm": 1.1419157981872559, "learning_rate": 2e-05, "loss": 0.03367894, "step": 11016 }, { "epoch": 22.034, "grad_norm": 1.2348449230194092, "learning_rate": 2e-05, "loss": 0.03856073, "step": 11017 }, { "epoch": 22.036, "grad_norm": 0.8628196120262146, "learning_rate": 2e-05, "loss": 0.02549962, "step": 11018 }, { "epoch": 22.038, "grad_norm": 1.2292640209197998, "learning_rate": 2e-05, "loss": 0.03300656, "step": 11019 }, { "epoch": 22.04, "grad_norm": 1.549729824066162, "learning_rate": 2e-05, "loss": 0.03245661, "step": 11020 }, { "epoch": 22.042, "grad_norm": 1.387235164642334, "learning_rate": 2e-05, "loss": 0.03017138, "step": 11021 }, { "epoch": 22.044, "grad_norm": 1.231819987297058, "learning_rate": 2e-05, "loss": 0.03833802, "step": 11022 }, { "epoch": 22.046, "grad_norm": 0.9583532214164734, "learning_rate": 2e-05, "loss": 0.0348784, "step": 11023 }, { "epoch": 22.048, "grad_norm": 2.0372843742370605, "learning_rate": 2e-05, "loss": 0.0497199, "step": 11024 }, { "epoch": 22.05, "grad_norm": 1.5443356037139893, "learning_rate": 2e-05, "loss": 0.04319365, "step": 11025 }, { "epoch": 22.052, "grad_norm": 0.9617211818695068, "learning_rate": 2e-05, "loss": 0.02863034, "step": 11026 }, { "epoch": 22.054, "grad_norm": 1.008378028869629, "learning_rate": 2e-05, "loss": 0.03430276, "step": 11027 }, { "epoch": 22.056, "grad_norm": 0.8539184927940369, "learning_rate": 2e-05, "loss": 0.02441457, "step": 11028 }, { "epoch": 22.058, "grad_norm": 0.9425522685050964, "learning_rate": 2e-05, "loss": 0.03636635, "step": 11029 }, { "epoch": 22.06, "grad_norm": 1.6154694557189941, "learning_rate": 2e-05, "loss": 0.03873938, "step": 11030 }, { "epoch": 22.062, "grad_norm": 0.9321594834327698, "learning_rate": 2e-05, "loss": 0.02615494, "step": 11031 }, { "epoch": 22.064, "grad_norm": 1.0639960765838623, "learning_rate": 2e-05, "loss": 0.03128612, "step": 11032 }, { "epoch": 22.066, "grad_norm": 2.2421159744262695, "learning_rate": 2e-05, "loss": 0.04077518, "step": 11033 }, { "epoch": 22.068, "grad_norm": 1.059031367301941, "learning_rate": 2e-05, "loss": 0.03726078, "step": 11034 }, { "epoch": 22.07, "grad_norm": 1.1438366174697876, "learning_rate": 2e-05, "loss": 0.03756997, "step": 11035 }, { "epoch": 22.072, "grad_norm": 1.1501072645187378, "learning_rate": 2e-05, "loss": 0.02969356, "step": 11036 }, { "epoch": 22.074, "grad_norm": 1.339308738708496, "learning_rate": 2e-05, "loss": 0.02925887, "step": 11037 }, { "epoch": 22.076, "grad_norm": 1.7447677850723267, "learning_rate": 2e-05, "loss": 0.04287693, "step": 11038 }, { "epoch": 22.078, "grad_norm": 1.0596781969070435, "learning_rate": 2e-05, "loss": 0.02210912, "step": 11039 }, { "epoch": 22.08, "grad_norm": 1.0472910404205322, "learning_rate": 2e-05, "loss": 0.03132582, "step": 11040 }, { "epoch": 22.082, "grad_norm": 1.045034408569336, "learning_rate": 2e-05, "loss": 0.03018015, "step": 11041 }, { "epoch": 22.084, "grad_norm": 0.9737959504127502, "learning_rate": 2e-05, "loss": 0.0249765, "step": 11042 }, { "epoch": 22.086, "grad_norm": 1.3689839839935303, "learning_rate": 2e-05, "loss": 0.03519817, "step": 11043 }, { "epoch": 22.088, "grad_norm": 1.2619421482086182, "learning_rate": 2e-05, "loss": 0.03652804, "step": 11044 }, { "epoch": 22.09, "grad_norm": 1.0860421657562256, "learning_rate": 2e-05, "loss": 0.02551399, "step": 11045 }, { "epoch": 22.092, "grad_norm": 0.9912066459655762, "learning_rate": 2e-05, "loss": 0.03179378, "step": 11046 }, { "epoch": 22.094, "grad_norm": 1.0879795551300049, "learning_rate": 2e-05, "loss": 0.03158774, "step": 11047 }, { "epoch": 22.096, "grad_norm": 0.8615609407424927, "learning_rate": 2e-05, "loss": 0.02629388, "step": 11048 }, { "epoch": 22.098, "grad_norm": 1.7215999364852905, "learning_rate": 2e-05, "loss": 0.03771915, "step": 11049 }, { "epoch": 22.1, "grad_norm": 0.9128824472427368, "learning_rate": 2e-05, "loss": 0.02178533, "step": 11050 }, { "epoch": 22.102, "grad_norm": 1.2859452962875366, "learning_rate": 2e-05, "loss": 0.04280837, "step": 11051 }, { "epoch": 22.104, "grad_norm": 1.0833191871643066, "learning_rate": 2e-05, "loss": 0.03400262, "step": 11052 }, { "epoch": 22.106, "grad_norm": 0.8177392482757568, "learning_rate": 2e-05, "loss": 0.02097586, "step": 11053 }, { "epoch": 22.108, "grad_norm": 3.141331195831299, "learning_rate": 2e-05, "loss": 0.02519385, "step": 11054 }, { "epoch": 22.11, "grad_norm": 1.0663577318191528, "learning_rate": 2e-05, "loss": 0.03236621, "step": 11055 }, { "epoch": 22.112, "grad_norm": 3.876330614089966, "learning_rate": 2e-05, "loss": 0.05157751, "step": 11056 }, { "epoch": 22.114, "grad_norm": 1.0990570783615112, "learning_rate": 2e-05, "loss": 0.03127418, "step": 11057 }, { "epoch": 22.116, "grad_norm": 2.1724092960357666, "learning_rate": 2e-05, "loss": 0.05966609, "step": 11058 }, { "epoch": 22.118, "grad_norm": 2.2459487915039062, "learning_rate": 2e-05, "loss": 0.04710309, "step": 11059 }, { "epoch": 22.12, "grad_norm": 1.9107338190078735, "learning_rate": 2e-05, "loss": 0.04102355, "step": 11060 }, { "epoch": 22.122, "grad_norm": 1.542749285697937, "learning_rate": 2e-05, "loss": 0.04050046, "step": 11061 }, { "epoch": 22.124, "grad_norm": 1.8505321741104126, "learning_rate": 2e-05, "loss": 0.04142891, "step": 11062 }, { "epoch": 22.126, "grad_norm": 1.4331146478652954, "learning_rate": 2e-05, "loss": 0.03895901, "step": 11063 }, { "epoch": 22.128, "grad_norm": 1.0269427299499512, "learning_rate": 2e-05, "loss": 0.03234866, "step": 11064 }, { "epoch": 22.13, "grad_norm": 1.1322760581970215, "learning_rate": 2e-05, "loss": 0.03056391, "step": 11065 }, { "epoch": 22.132, "grad_norm": 0.8751665949821472, "learning_rate": 2e-05, "loss": 0.01894474, "step": 11066 }, { "epoch": 22.134, "grad_norm": 1.1677522659301758, "learning_rate": 2e-05, "loss": 0.03264951, "step": 11067 }, { "epoch": 22.136, "grad_norm": 1.8770594596862793, "learning_rate": 2e-05, "loss": 0.04727862, "step": 11068 }, { "epoch": 22.138, "grad_norm": 0.9931758642196655, "learning_rate": 2e-05, "loss": 0.02625104, "step": 11069 }, { "epoch": 22.14, "grad_norm": 1.8161505460739136, "learning_rate": 2e-05, "loss": 0.0335126, "step": 11070 }, { "epoch": 22.142, "grad_norm": 1.2563273906707764, "learning_rate": 2e-05, "loss": 0.03935399, "step": 11071 }, { "epoch": 22.144, "grad_norm": 1.6449320316314697, "learning_rate": 2e-05, "loss": 0.03845852, "step": 11072 }, { "epoch": 22.146, "grad_norm": 1.3035930395126343, "learning_rate": 2e-05, "loss": 0.03262253, "step": 11073 }, { "epoch": 22.148, "grad_norm": 1.5313411951065063, "learning_rate": 2e-05, "loss": 0.03509212, "step": 11074 }, { "epoch": 22.15, "grad_norm": 2.219724178314209, "learning_rate": 2e-05, "loss": 0.05217472, "step": 11075 }, { "epoch": 22.152, "grad_norm": 1.2089931964874268, "learning_rate": 2e-05, "loss": 0.03936118, "step": 11076 }, { "epoch": 22.154, "grad_norm": 1.1982935667037964, "learning_rate": 2e-05, "loss": 0.03750388, "step": 11077 }, { "epoch": 22.156, "grad_norm": 1.3776034116744995, "learning_rate": 2e-05, "loss": 0.02474811, "step": 11078 }, { "epoch": 22.158, "grad_norm": 1.5565482378005981, "learning_rate": 2e-05, "loss": 0.04205162, "step": 11079 }, { "epoch": 22.16, "grad_norm": 1.5331981182098389, "learning_rate": 2e-05, "loss": 0.05201468, "step": 11080 }, { "epoch": 22.162, "grad_norm": 1.260038137435913, "learning_rate": 2e-05, "loss": 0.03021825, "step": 11081 }, { "epoch": 22.164, "grad_norm": 1.128806471824646, "learning_rate": 2e-05, "loss": 0.03233521, "step": 11082 }, { "epoch": 22.166, "grad_norm": 1.0538668632507324, "learning_rate": 2e-05, "loss": 0.02831938, "step": 11083 }, { "epoch": 22.168, "grad_norm": 1.506290078163147, "learning_rate": 2e-05, "loss": 0.03654964, "step": 11084 }, { "epoch": 22.17, "grad_norm": 1.4972366094589233, "learning_rate": 2e-05, "loss": 0.04231321, "step": 11085 }, { "epoch": 22.172, "grad_norm": 1.2762951850891113, "learning_rate": 2e-05, "loss": 0.04460282, "step": 11086 }, { "epoch": 22.174, "grad_norm": 1.0688859224319458, "learning_rate": 2e-05, "loss": 0.03068903, "step": 11087 }, { "epoch": 22.176, "grad_norm": 1.194064974784851, "learning_rate": 2e-05, "loss": 0.03391276, "step": 11088 }, { "epoch": 22.178, "grad_norm": 0.7239233255386353, "learning_rate": 2e-05, "loss": 0.01693652, "step": 11089 }, { "epoch": 22.18, "grad_norm": 1.0344641208648682, "learning_rate": 2e-05, "loss": 0.03364387, "step": 11090 }, { "epoch": 22.182, "grad_norm": 1.1779170036315918, "learning_rate": 2e-05, "loss": 0.03767635, "step": 11091 }, { "epoch": 22.184, "grad_norm": 1.9103333950042725, "learning_rate": 2e-05, "loss": 0.04368252, "step": 11092 }, { "epoch": 22.186, "grad_norm": 1.6527727842330933, "learning_rate": 2e-05, "loss": 0.03112076, "step": 11093 }, { "epoch": 22.188, "grad_norm": 0.8650185465812683, "learning_rate": 2e-05, "loss": 0.0230502, "step": 11094 }, { "epoch": 22.19, "grad_norm": 2.3402581214904785, "learning_rate": 2e-05, "loss": 0.05151432, "step": 11095 }, { "epoch": 22.192, "grad_norm": 1.087644100189209, "learning_rate": 2e-05, "loss": 0.02510383, "step": 11096 }, { "epoch": 22.194, "grad_norm": 1.062873125076294, "learning_rate": 2e-05, "loss": 0.02815459, "step": 11097 }, { "epoch": 22.196, "grad_norm": 1.5720903873443604, "learning_rate": 2e-05, "loss": 0.04108073, "step": 11098 }, { "epoch": 22.198, "grad_norm": 1.1922218799591064, "learning_rate": 2e-05, "loss": 0.03966674, "step": 11099 }, { "epoch": 22.2, "grad_norm": 1.7009609937667847, "learning_rate": 2e-05, "loss": 0.03648136, "step": 11100 }, { "epoch": 22.202, "grad_norm": 1.3058127164840698, "learning_rate": 2e-05, "loss": 0.030958, "step": 11101 }, { "epoch": 22.204, "grad_norm": 1.2523670196533203, "learning_rate": 2e-05, "loss": 0.03727802, "step": 11102 }, { "epoch": 22.206, "grad_norm": 2.1313955783843994, "learning_rate": 2e-05, "loss": 0.03327603, "step": 11103 }, { "epoch": 22.208, "grad_norm": 1.0386605262756348, "learning_rate": 2e-05, "loss": 0.03811013, "step": 11104 }, { "epoch": 22.21, "grad_norm": 0.9282265305519104, "learning_rate": 2e-05, "loss": 0.02790239, "step": 11105 }, { "epoch": 22.212, "grad_norm": 1.176169991493225, "learning_rate": 2e-05, "loss": 0.03345915, "step": 11106 }, { "epoch": 22.214, "grad_norm": 1.745169758796692, "learning_rate": 2e-05, "loss": 0.04287583, "step": 11107 }, { "epoch": 22.216, "grad_norm": 1.2296968698501587, "learning_rate": 2e-05, "loss": 0.04217129, "step": 11108 }, { "epoch": 22.218, "grad_norm": 1.1114269495010376, "learning_rate": 2e-05, "loss": 0.04333063, "step": 11109 }, { "epoch": 22.22, "grad_norm": 1.1733880043029785, "learning_rate": 2e-05, "loss": 0.03284467, "step": 11110 }, { "epoch": 22.222, "grad_norm": 1.0831984281539917, "learning_rate": 2e-05, "loss": 0.03438616, "step": 11111 }, { "epoch": 22.224, "grad_norm": 1.352116346359253, "learning_rate": 2e-05, "loss": 0.04775696, "step": 11112 }, { "epoch": 22.226, "grad_norm": 1.0606354475021362, "learning_rate": 2e-05, "loss": 0.02460128, "step": 11113 }, { "epoch": 22.228, "grad_norm": 1.1735827922821045, "learning_rate": 2e-05, "loss": 0.03722972, "step": 11114 }, { "epoch": 22.23, "grad_norm": 0.9459177851676941, "learning_rate": 2e-05, "loss": 0.03487729, "step": 11115 }, { "epoch": 22.232, "grad_norm": 1.0781097412109375, "learning_rate": 2e-05, "loss": 0.02943964, "step": 11116 }, { "epoch": 22.234, "grad_norm": 1.2076375484466553, "learning_rate": 2e-05, "loss": 0.03915054, "step": 11117 }, { "epoch": 22.236, "grad_norm": 0.8902519941329956, "learning_rate": 2e-05, "loss": 0.02953203, "step": 11118 }, { "epoch": 22.238, "grad_norm": 1.423532247543335, "learning_rate": 2e-05, "loss": 0.03786097, "step": 11119 }, { "epoch": 22.24, "grad_norm": 1.068610668182373, "learning_rate": 2e-05, "loss": 0.0284631, "step": 11120 }, { "epoch": 22.242, "grad_norm": 1.3024731874465942, "learning_rate": 2e-05, "loss": 0.0295128, "step": 11121 }, { "epoch": 22.244, "grad_norm": 0.8875964283943176, "learning_rate": 2e-05, "loss": 0.02831073, "step": 11122 }, { "epoch": 22.246, "grad_norm": 1.04351806640625, "learning_rate": 2e-05, "loss": 0.03519507, "step": 11123 }, { "epoch": 22.248, "grad_norm": 0.969316303730011, "learning_rate": 2e-05, "loss": 0.02787546, "step": 11124 }, { "epoch": 22.25, "grad_norm": 1.1152201890945435, "learning_rate": 2e-05, "loss": 0.03151672, "step": 11125 }, { "epoch": 22.252, "grad_norm": 1.1251158714294434, "learning_rate": 2e-05, "loss": 0.03841231, "step": 11126 }, { "epoch": 22.254, "grad_norm": 1.6095986366271973, "learning_rate": 2e-05, "loss": 0.03633893, "step": 11127 }, { "epoch": 22.256, "grad_norm": 1.3410676717758179, "learning_rate": 2e-05, "loss": 0.0304535, "step": 11128 }, { "epoch": 22.258, "grad_norm": 1.5073579549789429, "learning_rate": 2e-05, "loss": 0.04205672, "step": 11129 }, { "epoch": 22.26, "grad_norm": 1.2273343801498413, "learning_rate": 2e-05, "loss": 0.02800684, "step": 11130 }, { "epoch": 22.262, "grad_norm": 1.2454593181610107, "learning_rate": 2e-05, "loss": 0.03668667, "step": 11131 }, { "epoch": 22.264, "grad_norm": 0.9008411169052124, "learning_rate": 2e-05, "loss": 0.02018595, "step": 11132 }, { "epoch": 22.266, "grad_norm": 1.4407562017440796, "learning_rate": 2e-05, "loss": 0.02953599, "step": 11133 }, { "epoch": 22.268, "grad_norm": 1.4390043020248413, "learning_rate": 2e-05, "loss": 0.02695351, "step": 11134 }, { "epoch": 22.27, "grad_norm": 1.291393756866455, "learning_rate": 2e-05, "loss": 0.03049627, "step": 11135 }, { "epoch": 22.272, "grad_norm": 1.229018211364746, "learning_rate": 2e-05, "loss": 0.03298253, "step": 11136 }, { "epoch": 22.274, "grad_norm": 2.3607177734375, "learning_rate": 2e-05, "loss": 0.03460927, "step": 11137 }, { "epoch": 22.276, "grad_norm": 4.348043918609619, "learning_rate": 2e-05, "loss": 0.04996082, "step": 11138 }, { "epoch": 22.278, "grad_norm": 1.7652407884597778, "learning_rate": 2e-05, "loss": 0.02721683, "step": 11139 }, { "epoch": 22.28, "grad_norm": 1.593738317489624, "learning_rate": 2e-05, "loss": 0.03514788, "step": 11140 }, { "epoch": 22.282, "grad_norm": 1.2631458044052124, "learning_rate": 2e-05, "loss": 0.03659892, "step": 11141 }, { "epoch": 22.284, "grad_norm": 1.642995834350586, "learning_rate": 2e-05, "loss": 0.03794821, "step": 11142 }, { "epoch": 22.286, "grad_norm": 1.9356567859649658, "learning_rate": 2e-05, "loss": 0.03757293, "step": 11143 }, { "epoch": 22.288, "grad_norm": 0.9092358946800232, "learning_rate": 2e-05, "loss": 0.02223909, "step": 11144 }, { "epoch": 22.29, "grad_norm": 3.2695510387420654, "learning_rate": 2e-05, "loss": 0.03481711, "step": 11145 }, { "epoch": 22.292, "grad_norm": 0.9674291610717773, "learning_rate": 2e-05, "loss": 0.02906576, "step": 11146 }, { "epoch": 22.294, "grad_norm": 6.927521705627441, "learning_rate": 2e-05, "loss": 0.05944148, "step": 11147 }, { "epoch": 22.296, "grad_norm": 1.8510019779205322, "learning_rate": 2e-05, "loss": 0.04159633, "step": 11148 }, { "epoch": 22.298, "grad_norm": 1.190011739730835, "learning_rate": 2e-05, "loss": 0.03650186, "step": 11149 }, { "epoch": 22.3, "grad_norm": 0.9493358731269836, "learning_rate": 2e-05, "loss": 0.03242558, "step": 11150 }, { "epoch": 22.302, "grad_norm": 3.193427324295044, "learning_rate": 2e-05, "loss": 0.04184601, "step": 11151 }, { "epoch": 22.304, "grad_norm": 1.372183918952942, "learning_rate": 2e-05, "loss": 0.03488796, "step": 11152 }, { "epoch": 22.306, "grad_norm": 2.2507684230804443, "learning_rate": 2e-05, "loss": 0.04868758, "step": 11153 }, { "epoch": 22.308, "grad_norm": 1.2131235599517822, "learning_rate": 2e-05, "loss": 0.02608864, "step": 11154 }, { "epoch": 22.31, "grad_norm": 1.179132103919983, "learning_rate": 2e-05, "loss": 0.03008761, "step": 11155 }, { "epoch": 22.312, "grad_norm": 1.2756638526916504, "learning_rate": 2e-05, "loss": 0.03774351, "step": 11156 }, { "epoch": 22.314, "grad_norm": 1.5968365669250488, "learning_rate": 2e-05, "loss": 0.0491349, "step": 11157 }, { "epoch": 22.316, "grad_norm": 2.220818519592285, "learning_rate": 2e-05, "loss": 0.02767688, "step": 11158 }, { "epoch": 22.318, "grad_norm": 0.9472852349281311, "learning_rate": 2e-05, "loss": 0.02426755, "step": 11159 }, { "epoch": 22.32, "grad_norm": 2.503251552581787, "learning_rate": 2e-05, "loss": 0.0384932, "step": 11160 }, { "epoch": 22.322, "grad_norm": 3.294299602508545, "learning_rate": 2e-05, "loss": 0.06644698, "step": 11161 }, { "epoch": 22.324, "grad_norm": 1.2162548303604126, "learning_rate": 2e-05, "loss": 0.03608144, "step": 11162 }, { "epoch": 22.326, "grad_norm": 1.3543113470077515, "learning_rate": 2e-05, "loss": 0.03112835, "step": 11163 }, { "epoch": 22.328, "grad_norm": 1.1519101858139038, "learning_rate": 2e-05, "loss": 0.02835603, "step": 11164 }, { "epoch": 22.33, "grad_norm": 0.905073344707489, "learning_rate": 2e-05, "loss": 0.03140076, "step": 11165 }, { "epoch": 22.332, "grad_norm": 1.0447617769241333, "learning_rate": 2e-05, "loss": 0.02909181, "step": 11166 }, { "epoch": 22.334, "grad_norm": 1.8674046993255615, "learning_rate": 2e-05, "loss": 0.0350634, "step": 11167 }, { "epoch": 22.336, "grad_norm": 1.658823013305664, "learning_rate": 2e-05, "loss": 0.04215661, "step": 11168 }, { "epoch": 22.338, "grad_norm": 0.9590745568275452, "learning_rate": 2e-05, "loss": 0.03000915, "step": 11169 }, { "epoch": 22.34, "grad_norm": 1.0610401630401611, "learning_rate": 2e-05, "loss": 0.03618003, "step": 11170 }, { "epoch": 22.342, "grad_norm": 1.1848183870315552, "learning_rate": 2e-05, "loss": 0.03476301, "step": 11171 }, { "epoch": 22.344, "grad_norm": 1.2776687145233154, "learning_rate": 2e-05, "loss": 0.03656887, "step": 11172 }, { "epoch": 22.346, "grad_norm": 1.9115201234817505, "learning_rate": 2e-05, "loss": 0.04781888, "step": 11173 }, { "epoch": 22.348, "grad_norm": 2.2884371280670166, "learning_rate": 2e-05, "loss": 0.03745548, "step": 11174 }, { "epoch": 22.35, "grad_norm": 0.9647842049598694, "learning_rate": 2e-05, "loss": 0.02903546, "step": 11175 }, { "epoch": 22.352, "grad_norm": 1.4671611785888672, "learning_rate": 2e-05, "loss": 0.05149737, "step": 11176 }, { "epoch": 22.354, "grad_norm": 1.0586042404174805, "learning_rate": 2e-05, "loss": 0.0410578, "step": 11177 }, { "epoch": 22.356, "grad_norm": 1.2867225408554077, "learning_rate": 2e-05, "loss": 0.04615509, "step": 11178 }, { "epoch": 22.358, "grad_norm": 1.0811021327972412, "learning_rate": 2e-05, "loss": 0.03351441, "step": 11179 }, { "epoch": 22.36, "grad_norm": 2.024754762649536, "learning_rate": 2e-05, "loss": 0.04169374, "step": 11180 }, { "epoch": 22.362, "grad_norm": 1.5441582202911377, "learning_rate": 2e-05, "loss": 0.03294868, "step": 11181 }, { "epoch": 22.364, "grad_norm": 1.9213157892227173, "learning_rate": 2e-05, "loss": 0.04192325, "step": 11182 }, { "epoch": 22.366, "grad_norm": 1.0890470743179321, "learning_rate": 2e-05, "loss": 0.03932363, "step": 11183 }, { "epoch": 22.368, "grad_norm": 1.6594054698944092, "learning_rate": 2e-05, "loss": 0.04021504, "step": 11184 }, { "epoch": 22.37, "grad_norm": 1.811708927154541, "learning_rate": 2e-05, "loss": 0.05453177, "step": 11185 }, { "epoch": 22.372, "grad_norm": 1.0302735567092896, "learning_rate": 2e-05, "loss": 0.03242699, "step": 11186 }, { "epoch": 22.374, "grad_norm": 0.9932673573493958, "learning_rate": 2e-05, "loss": 0.02922393, "step": 11187 }, { "epoch": 22.376, "grad_norm": 1.15402352809906, "learning_rate": 2e-05, "loss": 0.03236395, "step": 11188 }, { "epoch": 22.378, "grad_norm": 0.9961234927177429, "learning_rate": 2e-05, "loss": 0.03148089, "step": 11189 }, { "epoch": 22.38, "grad_norm": 1.1807105541229248, "learning_rate": 2e-05, "loss": 0.03198289, "step": 11190 }, { "epoch": 22.382, "grad_norm": 1.0972753763198853, "learning_rate": 2e-05, "loss": 0.03932333, "step": 11191 }, { "epoch": 22.384, "grad_norm": 1.3407927751541138, "learning_rate": 2e-05, "loss": 0.02748748, "step": 11192 }, { "epoch": 22.386, "grad_norm": 1.1336009502410889, "learning_rate": 2e-05, "loss": 0.03101401, "step": 11193 }, { "epoch": 22.388, "grad_norm": 0.8580100536346436, "learning_rate": 2e-05, "loss": 0.01859001, "step": 11194 }, { "epoch": 22.39, "grad_norm": 1.9567729234695435, "learning_rate": 2e-05, "loss": 0.03903724, "step": 11195 }, { "epoch": 22.392, "grad_norm": 1.1536086797714233, "learning_rate": 2e-05, "loss": 0.03743483, "step": 11196 }, { "epoch": 22.394, "grad_norm": 1.7297052145004272, "learning_rate": 2e-05, "loss": 0.04060826, "step": 11197 }, { "epoch": 22.396, "grad_norm": 1.1482161283493042, "learning_rate": 2e-05, "loss": 0.04031848, "step": 11198 }, { "epoch": 22.398, "grad_norm": 1.4597668647766113, "learning_rate": 2e-05, "loss": 0.04166424, "step": 11199 }, { "epoch": 22.4, "grad_norm": 1.0787426233291626, "learning_rate": 2e-05, "loss": 0.0363807, "step": 11200 }, { "epoch": 22.402, "grad_norm": 2.6459505558013916, "learning_rate": 2e-05, "loss": 0.03180039, "step": 11201 }, { "epoch": 22.404, "grad_norm": 1.28476881980896, "learning_rate": 2e-05, "loss": 0.03239727, "step": 11202 }, { "epoch": 22.406, "grad_norm": 1.4840121269226074, "learning_rate": 2e-05, "loss": 0.02138729, "step": 11203 }, { "epoch": 22.408, "grad_norm": 1.6134271621704102, "learning_rate": 2e-05, "loss": 0.05612726, "step": 11204 }, { "epoch": 22.41, "grad_norm": 1.0505448579788208, "learning_rate": 2e-05, "loss": 0.02743955, "step": 11205 }, { "epoch": 22.412, "grad_norm": 1.2720935344696045, "learning_rate": 2e-05, "loss": 0.04470626, "step": 11206 }, { "epoch": 22.414, "grad_norm": 1.216604232788086, "learning_rate": 2e-05, "loss": 0.03580245, "step": 11207 }, { "epoch": 22.416, "grad_norm": 0.9434407949447632, "learning_rate": 2e-05, "loss": 0.02070105, "step": 11208 }, { "epoch": 22.418, "grad_norm": 1.0345122814178467, "learning_rate": 2e-05, "loss": 0.0290225, "step": 11209 }, { "epoch": 22.42, "grad_norm": 1.173581600189209, "learning_rate": 2e-05, "loss": 0.03839279, "step": 11210 }, { "epoch": 22.422, "grad_norm": 1.084951400756836, "learning_rate": 2e-05, "loss": 0.03221672, "step": 11211 }, { "epoch": 22.424, "grad_norm": 1.0095467567443848, "learning_rate": 2e-05, "loss": 0.02846614, "step": 11212 }, { "epoch": 22.426, "grad_norm": 1.254839539527893, "learning_rate": 2e-05, "loss": 0.03883304, "step": 11213 }, { "epoch": 22.428, "grad_norm": 2.123760461807251, "learning_rate": 2e-05, "loss": 0.03277697, "step": 11214 }, { "epoch": 22.43, "grad_norm": 1.1430293321609497, "learning_rate": 2e-05, "loss": 0.02167347, "step": 11215 }, { "epoch": 22.432, "grad_norm": 1.242515206336975, "learning_rate": 2e-05, "loss": 0.0301038, "step": 11216 }, { "epoch": 22.434, "grad_norm": 1.651579737663269, "learning_rate": 2e-05, "loss": 0.03766736, "step": 11217 }, { "epoch": 22.436, "grad_norm": 0.8291965126991272, "learning_rate": 2e-05, "loss": 0.02428487, "step": 11218 }, { "epoch": 22.438, "grad_norm": 0.8632979393005371, "learning_rate": 2e-05, "loss": 0.0235811, "step": 11219 }, { "epoch": 22.44, "grad_norm": 1.366252064704895, "learning_rate": 2e-05, "loss": 0.04416512, "step": 11220 }, { "epoch": 22.442, "grad_norm": 0.796024739742279, "learning_rate": 2e-05, "loss": 0.0193195, "step": 11221 }, { "epoch": 22.444, "grad_norm": 1.679572582244873, "learning_rate": 2e-05, "loss": 0.04569577, "step": 11222 }, { "epoch": 22.446, "grad_norm": 1.6671518087387085, "learning_rate": 2e-05, "loss": 0.03679072, "step": 11223 }, { "epoch": 22.448, "grad_norm": 1.1129558086395264, "learning_rate": 2e-05, "loss": 0.03054724, "step": 11224 }, { "epoch": 22.45, "grad_norm": 5.476849555969238, "learning_rate": 2e-05, "loss": 0.03754855, "step": 11225 }, { "epoch": 22.452, "grad_norm": 1.0009487867355347, "learning_rate": 2e-05, "loss": 0.03189157, "step": 11226 }, { "epoch": 22.454, "grad_norm": 1.420681118965149, "learning_rate": 2e-05, "loss": 0.0333651, "step": 11227 }, { "epoch": 22.456, "grad_norm": 1.3200653791427612, "learning_rate": 2e-05, "loss": 0.0283935, "step": 11228 }, { "epoch": 22.458, "grad_norm": 2.4781174659729004, "learning_rate": 2e-05, "loss": 0.0337758, "step": 11229 }, { "epoch": 22.46, "grad_norm": 1.2006012201309204, "learning_rate": 2e-05, "loss": 0.04407885, "step": 11230 }, { "epoch": 22.462, "grad_norm": 0.8424386978149414, "learning_rate": 2e-05, "loss": 0.02036972, "step": 11231 }, { "epoch": 22.464, "grad_norm": 1.0086331367492676, "learning_rate": 2e-05, "loss": 0.02813908, "step": 11232 }, { "epoch": 22.466, "grad_norm": 1.6784151792526245, "learning_rate": 2e-05, "loss": 0.05713883, "step": 11233 }, { "epoch": 22.468, "grad_norm": 1.6650753021240234, "learning_rate": 2e-05, "loss": 0.0437688, "step": 11234 }, { "epoch": 22.47, "grad_norm": 1.033752679824829, "learning_rate": 2e-05, "loss": 0.02289294, "step": 11235 }, { "epoch": 22.472, "grad_norm": 1.9619020223617554, "learning_rate": 2e-05, "loss": 0.03555152, "step": 11236 }, { "epoch": 22.474, "grad_norm": 0.9467611908912659, "learning_rate": 2e-05, "loss": 0.02932581, "step": 11237 }, { "epoch": 22.476, "grad_norm": 1.2341922521591187, "learning_rate": 2e-05, "loss": 0.0385951, "step": 11238 }, { "epoch": 22.478, "grad_norm": 1.1940884590148926, "learning_rate": 2e-05, "loss": 0.03371501, "step": 11239 }, { "epoch": 22.48, "grad_norm": 1.9683741331100464, "learning_rate": 2e-05, "loss": 0.02918104, "step": 11240 }, { "epoch": 22.482, "grad_norm": 1.0450471639633179, "learning_rate": 2e-05, "loss": 0.02672218, "step": 11241 }, { "epoch": 22.484, "grad_norm": 2.0168466567993164, "learning_rate": 2e-05, "loss": 0.03515015, "step": 11242 }, { "epoch": 22.486, "grad_norm": 1.5114184617996216, "learning_rate": 2e-05, "loss": 0.04158841, "step": 11243 }, { "epoch": 22.488, "grad_norm": 1.5208250284194946, "learning_rate": 2e-05, "loss": 0.05399328, "step": 11244 }, { "epoch": 22.49, "grad_norm": 1.0174354314804077, "learning_rate": 2e-05, "loss": 0.02212554, "step": 11245 }, { "epoch": 22.492, "grad_norm": 1.1009485721588135, "learning_rate": 2e-05, "loss": 0.03259212, "step": 11246 }, { "epoch": 22.494, "grad_norm": 1.2722481489181519, "learning_rate": 2e-05, "loss": 0.03970824, "step": 11247 }, { "epoch": 22.496, "grad_norm": 1.4732874631881714, "learning_rate": 2e-05, "loss": 0.03239143, "step": 11248 }, { "epoch": 22.498, "grad_norm": 1.8055660724639893, "learning_rate": 2e-05, "loss": 0.04277326, "step": 11249 }, { "epoch": 22.5, "grad_norm": 1.5957167148590088, "learning_rate": 2e-05, "loss": 0.04040106, "step": 11250 }, { "epoch": 22.502, "grad_norm": 1.0191160440444946, "learning_rate": 2e-05, "loss": 0.03819025, "step": 11251 }, { "epoch": 22.504, "grad_norm": 0.8751825094223022, "learning_rate": 2e-05, "loss": 0.02078329, "step": 11252 }, { "epoch": 22.506, "grad_norm": 0.9703699350357056, "learning_rate": 2e-05, "loss": 0.02371472, "step": 11253 }, { "epoch": 22.508, "grad_norm": 1.4609352350234985, "learning_rate": 2e-05, "loss": 0.04331794, "step": 11254 }, { "epoch": 22.51, "grad_norm": 0.7537633180618286, "learning_rate": 2e-05, "loss": 0.02106013, "step": 11255 }, { "epoch": 22.512, "grad_norm": 1.1532191038131714, "learning_rate": 2e-05, "loss": 0.02696463, "step": 11256 }, { "epoch": 22.514, "grad_norm": 1.2470663785934448, "learning_rate": 2e-05, "loss": 0.03676067, "step": 11257 }, { "epoch": 22.516, "grad_norm": 1.034314751625061, "learning_rate": 2e-05, "loss": 0.03010433, "step": 11258 }, { "epoch": 22.518, "grad_norm": 1.0208652019500732, "learning_rate": 2e-05, "loss": 0.0345837, "step": 11259 }, { "epoch": 22.52, "grad_norm": 1.0119693279266357, "learning_rate": 2e-05, "loss": 0.02655143, "step": 11260 }, { "epoch": 22.522, "grad_norm": 1.8333549499511719, "learning_rate": 2e-05, "loss": 0.04552222, "step": 11261 }, { "epoch": 22.524, "grad_norm": 1.5406705141067505, "learning_rate": 2e-05, "loss": 0.04645106, "step": 11262 }, { "epoch": 22.526, "grad_norm": 0.7743955850601196, "learning_rate": 2e-05, "loss": 0.02030542, "step": 11263 }, { "epoch": 22.528, "grad_norm": 1.0403064489364624, "learning_rate": 2e-05, "loss": 0.02845855, "step": 11264 }, { "epoch": 22.53, "grad_norm": 1.3076633214950562, "learning_rate": 2e-05, "loss": 0.04241259, "step": 11265 }, { "epoch": 22.532, "grad_norm": 1.454092025756836, "learning_rate": 2e-05, "loss": 0.04193448, "step": 11266 }, { "epoch": 22.534, "grad_norm": 1.0106709003448486, "learning_rate": 2e-05, "loss": 0.03059579, "step": 11267 }, { "epoch": 22.536, "grad_norm": 1.4397797584533691, "learning_rate": 2e-05, "loss": 0.03595772, "step": 11268 }, { "epoch": 22.538, "grad_norm": 1.0913630723953247, "learning_rate": 2e-05, "loss": 0.02504985, "step": 11269 }, { "epoch": 22.54, "grad_norm": 1.1001238822937012, "learning_rate": 2e-05, "loss": 0.03245225, "step": 11270 }, { "epoch": 22.542, "grad_norm": 1.6088941097259521, "learning_rate": 2e-05, "loss": 0.05301979, "step": 11271 }, { "epoch": 22.544, "grad_norm": 1.3868272304534912, "learning_rate": 2e-05, "loss": 0.04549325, "step": 11272 }, { "epoch": 22.546, "grad_norm": 1.3786357641220093, "learning_rate": 2e-05, "loss": 0.03595918, "step": 11273 }, { "epoch": 22.548000000000002, "grad_norm": 0.9569694995880127, "learning_rate": 2e-05, "loss": 0.03288563, "step": 11274 }, { "epoch": 22.55, "grad_norm": 1.6313834190368652, "learning_rate": 2e-05, "loss": 0.0272957, "step": 11275 }, { "epoch": 22.552, "grad_norm": 1.3030887842178345, "learning_rate": 2e-05, "loss": 0.03881227, "step": 11276 }, { "epoch": 22.554, "grad_norm": 1.4454890489578247, "learning_rate": 2e-05, "loss": 0.03911527, "step": 11277 }, { "epoch": 22.556, "grad_norm": 1.0935059785842896, "learning_rate": 2e-05, "loss": 0.02929432, "step": 11278 }, { "epoch": 22.558, "grad_norm": 1.0467413663864136, "learning_rate": 2e-05, "loss": 0.03879336, "step": 11279 }, { "epoch": 22.56, "grad_norm": 1.3743126392364502, "learning_rate": 2e-05, "loss": 0.03689353, "step": 11280 }, { "epoch": 22.562, "grad_norm": 0.9728202819824219, "learning_rate": 2e-05, "loss": 0.02886998, "step": 11281 }, { "epoch": 22.564, "grad_norm": 1.2006583213806152, "learning_rate": 2e-05, "loss": 0.03440745, "step": 11282 }, { "epoch": 22.566, "grad_norm": 1.1317431926727295, "learning_rate": 2e-05, "loss": 0.03354093, "step": 11283 }, { "epoch": 22.568, "grad_norm": 1.0374213457107544, "learning_rate": 2e-05, "loss": 0.0318547, "step": 11284 }, { "epoch": 22.57, "grad_norm": 1.3933595418930054, "learning_rate": 2e-05, "loss": 0.03949068, "step": 11285 }, { "epoch": 22.572, "grad_norm": 1.9650115966796875, "learning_rate": 2e-05, "loss": 0.0620589, "step": 11286 }, { "epoch": 22.574, "grad_norm": 1.1582067012786865, "learning_rate": 2e-05, "loss": 0.03287086, "step": 11287 }, { "epoch": 22.576, "grad_norm": 1.4262588024139404, "learning_rate": 2e-05, "loss": 0.0474546, "step": 11288 }, { "epoch": 22.578, "grad_norm": 1.8528952598571777, "learning_rate": 2e-05, "loss": 0.03344478, "step": 11289 }, { "epoch": 22.58, "grad_norm": 2.4040780067443848, "learning_rate": 2e-05, "loss": 0.04099388, "step": 11290 }, { "epoch": 22.582, "grad_norm": 1.1463083028793335, "learning_rate": 2e-05, "loss": 0.02880567, "step": 11291 }, { "epoch": 22.584, "grad_norm": 1.1943498849868774, "learning_rate": 2e-05, "loss": 0.03802372, "step": 11292 }, { "epoch": 22.586, "grad_norm": 0.8871561288833618, "learning_rate": 2e-05, "loss": 0.02874197, "step": 11293 }, { "epoch": 22.588, "grad_norm": 1.1465365886688232, "learning_rate": 2e-05, "loss": 0.0340536, "step": 11294 }, { "epoch": 22.59, "grad_norm": 0.9088796973228455, "learning_rate": 2e-05, "loss": 0.0175512, "step": 11295 }, { "epoch": 22.592, "grad_norm": 1.8269764184951782, "learning_rate": 2e-05, "loss": 0.04000066, "step": 11296 }, { "epoch": 22.594, "grad_norm": 0.9784151315689087, "learning_rate": 2e-05, "loss": 0.02507006, "step": 11297 }, { "epoch": 22.596, "grad_norm": 1.2063928842544556, "learning_rate": 2e-05, "loss": 0.04113289, "step": 11298 }, { "epoch": 22.598, "grad_norm": 1.7086107730865479, "learning_rate": 2e-05, "loss": 0.03416719, "step": 11299 }, { "epoch": 22.6, "grad_norm": 0.8789715766906738, "learning_rate": 2e-05, "loss": 0.02553638, "step": 11300 }, { "epoch": 22.602, "grad_norm": 1.5890235900878906, "learning_rate": 2e-05, "loss": 0.03483776, "step": 11301 }, { "epoch": 22.604, "grad_norm": 1.4625310897827148, "learning_rate": 2e-05, "loss": 0.04801542, "step": 11302 }, { "epoch": 22.606, "grad_norm": 1.0777349472045898, "learning_rate": 2e-05, "loss": 0.03591266, "step": 11303 }, { "epoch": 22.608, "grad_norm": 0.9330995082855225, "learning_rate": 2e-05, "loss": 0.02881388, "step": 11304 }, { "epoch": 22.61, "grad_norm": 1.3750163316726685, "learning_rate": 2e-05, "loss": 0.03394581, "step": 11305 }, { "epoch": 22.612, "grad_norm": 1.35263192653656, "learning_rate": 2e-05, "loss": 0.03068662, "step": 11306 }, { "epoch": 22.614, "grad_norm": 1.291716456413269, "learning_rate": 2e-05, "loss": 0.0435909, "step": 11307 }, { "epoch": 22.616, "grad_norm": 1.3781776428222656, "learning_rate": 2e-05, "loss": 0.03991321, "step": 11308 }, { "epoch": 22.618, "grad_norm": 1.09850013256073, "learning_rate": 2e-05, "loss": 0.04068404, "step": 11309 }, { "epoch": 22.62, "grad_norm": 1.2705893516540527, "learning_rate": 2e-05, "loss": 0.04384391, "step": 11310 }, { "epoch": 22.622, "grad_norm": 1.6616660356521606, "learning_rate": 2e-05, "loss": 0.02786833, "step": 11311 }, { "epoch": 22.624, "grad_norm": 1.111756443977356, "learning_rate": 2e-05, "loss": 0.02686176, "step": 11312 }, { "epoch": 22.626, "grad_norm": 1.2425123453140259, "learning_rate": 2e-05, "loss": 0.04210493, "step": 11313 }, { "epoch": 22.628, "grad_norm": 1.0276856422424316, "learning_rate": 2e-05, "loss": 0.03203695, "step": 11314 }, { "epoch": 22.63, "grad_norm": 1.0379737615585327, "learning_rate": 2e-05, "loss": 0.03296281, "step": 11315 }, { "epoch": 22.632, "grad_norm": 0.9909905195236206, "learning_rate": 2e-05, "loss": 0.02666735, "step": 11316 }, { "epoch": 22.634, "grad_norm": 1.0876964330673218, "learning_rate": 2e-05, "loss": 0.03161968, "step": 11317 }, { "epoch": 22.636, "grad_norm": 1.085287094116211, "learning_rate": 2e-05, "loss": 0.02732705, "step": 11318 }, { "epoch": 22.638, "grad_norm": 2.1646599769592285, "learning_rate": 2e-05, "loss": 0.02609478, "step": 11319 }, { "epoch": 22.64, "grad_norm": 0.967041552066803, "learning_rate": 2e-05, "loss": 0.02320741, "step": 11320 }, { "epoch": 22.642, "grad_norm": 1.5888314247131348, "learning_rate": 2e-05, "loss": 0.05063655, "step": 11321 }, { "epoch": 22.644, "grad_norm": 0.9226511120796204, "learning_rate": 2e-05, "loss": 0.03058358, "step": 11322 }, { "epoch": 22.646, "grad_norm": 1.701810598373413, "learning_rate": 2e-05, "loss": 0.02915188, "step": 11323 }, { "epoch": 22.648, "grad_norm": 1.497314453125, "learning_rate": 2e-05, "loss": 0.04008635, "step": 11324 }, { "epoch": 22.65, "grad_norm": 1.3982007503509521, "learning_rate": 2e-05, "loss": 0.026863, "step": 11325 }, { "epoch": 22.652, "grad_norm": 1.4211523532867432, "learning_rate": 2e-05, "loss": 0.04486757, "step": 11326 }, { "epoch": 22.654, "grad_norm": 1.1829328536987305, "learning_rate": 2e-05, "loss": 0.02879838, "step": 11327 }, { "epoch": 22.656, "grad_norm": 1.7771282196044922, "learning_rate": 2e-05, "loss": 0.03814802, "step": 11328 }, { "epoch": 22.658, "grad_norm": 2.1755659580230713, "learning_rate": 2e-05, "loss": 0.03225455, "step": 11329 }, { "epoch": 22.66, "grad_norm": 1.3810690641403198, "learning_rate": 2e-05, "loss": 0.03953212, "step": 11330 }, { "epoch": 22.662, "grad_norm": 1.7033722400665283, "learning_rate": 2e-05, "loss": 0.04444302, "step": 11331 }, { "epoch": 22.664, "grad_norm": 2.3877339363098145, "learning_rate": 2e-05, "loss": 0.04218495, "step": 11332 }, { "epoch": 22.666, "grad_norm": 1.8948878049850464, "learning_rate": 2e-05, "loss": 0.05076663, "step": 11333 }, { "epoch": 22.668, "grad_norm": 1.181063175201416, "learning_rate": 2e-05, "loss": 0.02133227, "step": 11334 }, { "epoch": 22.67, "grad_norm": 2.0228304862976074, "learning_rate": 2e-05, "loss": 0.04120112, "step": 11335 }, { "epoch": 22.672, "grad_norm": 1.0375754833221436, "learning_rate": 2e-05, "loss": 0.03073654, "step": 11336 }, { "epoch": 22.674, "grad_norm": 2.258004665374756, "learning_rate": 2e-05, "loss": 0.05255279, "step": 11337 }, { "epoch": 22.676, "grad_norm": 1.752378225326538, "learning_rate": 2e-05, "loss": 0.05190774, "step": 11338 }, { "epoch": 22.678, "grad_norm": 2.363682746887207, "learning_rate": 2e-05, "loss": 0.04572538, "step": 11339 }, { "epoch": 22.68, "grad_norm": 1.088084101676941, "learning_rate": 2e-05, "loss": 0.03764492, "step": 11340 }, { "epoch": 22.682, "grad_norm": 0.8578435182571411, "learning_rate": 2e-05, "loss": 0.02391421, "step": 11341 }, { "epoch": 22.684, "grad_norm": 1.1827582120895386, "learning_rate": 2e-05, "loss": 0.03264995, "step": 11342 }, { "epoch": 22.686, "grad_norm": 1.3935420513153076, "learning_rate": 2e-05, "loss": 0.03195801, "step": 11343 }, { "epoch": 22.688, "grad_norm": 0.9729677438735962, "learning_rate": 2e-05, "loss": 0.02463444, "step": 11344 }, { "epoch": 22.69, "grad_norm": 3.6322379112243652, "learning_rate": 2e-05, "loss": 0.04378824, "step": 11345 }, { "epoch": 22.692, "grad_norm": 1.002524971961975, "learning_rate": 2e-05, "loss": 0.02371977, "step": 11346 }, { "epoch": 22.694, "grad_norm": 1.1274267435073853, "learning_rate": 2e-05, "loss": 0.02771427, "step": 11347 }, { "epoch": 22.696, "grad_norm": 0.9226782321929932, "learning_rate": 2e-05, "loss": 0.0263548, "step": 11348 }, { "epoch": 22.698, "grad_norm": 1.6948686838150024, "learning_rate": 2e-05, "loss": 0.03164578, "step": 11349 }, { "epoch": 22.7, "grad_norm": 1.1010253429412842, "learning_rate": 2e-05, "loss": 0.03652514, "step": 11350 }, { "epoch": 22.701999999999998, "grad_norm": 1.2103954553604126, "learning_rate": 2e-05, "loss": 0.03659312, "step": 11351 }, { "epoch": 22.704, "grad_norm": 1.1126821041107178, "learning_rate": 2e-05, "loss": 0.02752934, "step": 11352 }, { "epoch": 22.706, "grad_norm": 0.9469572305679321, "learning_rate": 2e-05, "loss": 0.02585907, "step": 11353 }, { "epoch": 22.708, "grad_norm": 1.8642154932022095, "learning_rate": 2e-05, "loss": 0.04325049, "step": 11354 }, { "epoch": 22.71, "grad_norm": 1.1645115613937378, "learning_rate": 2e-05, "loss": 0.04005027, "step": 11355 }, { "epoch": 22.712, "grad_norm": 1.520301342010498, "learning_rate": 2e-05, "loss": 0.0399612, "step": 11356 }, { "epoch": 22.714, "grad_norm": 1.235013723373413, "learning_rate": 2e-05, "loss": 0.03438204, "step": 11357 }, { "epoch": 22.716, "grad_norm": 0.9834000468254089, "learning_rate": 2e-05, "loss": 0.02958825, "step": 11358 }, { "epoch": 22.718, "grad_norm": 0.9529434442520142, "learning_rate": 2e-05, "loss": 0.02942239, "step": 11359 }, { "epoch": 22.72, "grad_norm": 1.0147749185562134, "learning_rate": 2e-05, "loss": 0.02894795, "step": 11360 }, { "epoch": 22.722, "grad_norm": 2.125749111175537, "learning_rate": 2e-05, "loss": 0.02771318, "step": 11361 }, { "epoch": 22.724, "grad_norm": 1.1008570194244385, "learning_rate": 2e-05, "loss": 0.0344195, "step": 11362 }, { "epoch": 22.726, "grad_norm": 2.6095540523529053, "learning_rate": 2e-05, "loss": 0.04947619, "step": 11363 }, { "epoch": 22.728, "grad_norm": 1.228231430053711, "learning_rate": 2e-05, "loss": 0.03951849, "step": 11364 }, { "epoch": 22.73, "grad_norm": 3.1176652908325195, "learning_rate": 2e-05, "loss": 0.03356297, "step": 11365 }, { "epoch": 22.732, "grad_norm": 1.1963483095169067, "learning_rate": 2e-05, "loss": 0.02985599, "step": 11366 }, { "epoch": 22.734, "grad_norm": 1.533469319343567, "learning_rate": 2e-05, "loss": 0.03753369, "step": 11367 }, { "epoch": 22.736, "grad_norm": 0.9125386476516724, "learning_rate": 2e-05, "loss": 0.03140819, "step": 11368 }, { "epoch": 22.738, "grad_norm": 1.361525058746338, "learning_rate": 2e-05, "loss": 0.03976889, "step": 11369 }, { "epoch": 22.74, "grad_norm": 1.0508217811584473, "learning_rate": 2e-05, "loss": 0.03463693, "step": 11370 }, { "epoch": 22.742, "grad_norm": 1.6200356483459473, "learning_rate": 2e-05, "loss": 0.04815769, "step": 11371 }, { "epoch": 22.744, "grad_norm": 1.0508980751037598, "learning_rate": 2e-05, "loss": 0.04034566, "step": 11372 }, { "epoch": 22.746, "grad_norm": 1.024295687675476, "learning_rate": 2e-05, "loss": 0.03280937, "step": 11373 }, { "epoch": 22.748, "grad_norm": 1.413459062576294, "learning_rate": 2e-05, "loss": 0.04194808, "step": 11374 }, { "epoch": 22.75, "grad_norm": 1.0012433528900146, "learning_rate": 2e-05, "loss": 0.02298179, "step": 11375 }, { "epoch": 22.752, "grad_norm": 2.7343661785125732, "learning_rate": 2e-05, "loss": 0.04880745, "step": 11376 }, { "epoch": 22.754, "grad_norm": 1.2125567197799683, "learning_rate": 2e-05, "loss": 0.0418051, "step": 11377 }, { "epoch": 22.756, "grad_norm": 1.162410855293274, "learning_rate": 2e-05, "loss": 0.02900402, "step": 11378 }, { "epoch": 22.758, "grad_norm": 0.9885851740837097, "learning_rate": 2e-05, "loss": 0.02954095, "step": 11379 }, { "epoch": 22.76, "grad_norm": 1.2678016424179077, "learning_rate": 2e-05, "loss": 0.04300134, "step": 11380 }, { "epoch": 22.762, "grad_norm": 1.4912469387054443, "learning_rate": 2e-05, "loss": 0.03353298, "step": 11381 }, { "epoch": 22.764, "grad_norm": 1.0253992080688477, "learning_rate": 2e-05, "loss": 0.02656168, "step": 11382 }, { "epoch": 22.766, "grad_norm": 0.8745793104171753, "learning_rate": 2e-05, "loss": 0.02277788, "step": 11383 }, { "epoch": 22.768, "grad_norm": 1.3656665086746216, "learning_rate": 2e-05, "loss": 0.03005461, "step": 11384 }, { "epoch": 22.77, "grad_norm": 1.9831993579864502, "learning_rate": 2e-05, "loss": 0.04527285, "step": 11385 }, { "epoch": 22.772, "grad_norm": 1.8033963441848755, "learning_rate": 2e-05, "loss": 0.05182678, "step": 11386 }, { "epoch": 22.774, "grad_norm": 1.5675684213638306, "learning_rate": 2e-05, "loss": 0.03885451, "step": 11387 }, { "epoch": 22.776, "grad_norm": 2.476839065551758, "learning_rate": 2e-05, "loss": 0.05386474, "step": 11388 }, { "epoch": 22.778, "grad_norm": 0.9441741108894348, "learning_rate": 2e-05, "loss": 0.02855249, "step": 11389 }, { "epoch": 22.78, "grad_norm": 1.4872889518737793, "learning_rate": 2e-05, "loss": 0.04495686, "step": 11390 }, { "epoch": 22.782, "grad_norm": 2.2526416778564453, "learning_rate": 2e-05, "loss": 0.04384479, "step": 11391 }, { "epoch": 22.784, "grad_norm": 1.2508896589279175, "learning_rate": 2e-05, "loss": 0.03676173, "step": 11392 }, { "epoch": 22.786, "grad_norm": 1.5125991106033325, "learning_rate": 2e-05, "loss": 0.04031794, "step": 11393 }, { "epoch": 22.788, "grad_norm": 0.8906210660934448, "learning_rate": 2e-05, "loss": 0.02660401, "step": 11394 }, { "epoch": 22.79, "grad_norm": 3.060696601867676, "learning_rate": 2e-05, "loss": 0.04931217, "step": 11395 }, { "epoch": 22.792, "grad_norm": 1.2167766094207764, "learning_rate": 2e-05, "loss": 0.0397851, "step": 11396 }, { "epoch": 22.794, "grad_norm": 1.559085726737976, "learning_rate": 2e-05, "loss": 0.04445662, "step": 11397 }, { "epoch": 22.796, "grad_norm": 2.269927978515625, "learning_rate": 2e-05, "loss": 0.05920038, "step": 11398 }, { "epoch": 22.798000000000002, "grad_norm": 1.1354275941848755, "learning_rate": 2e-05, "loss": 0.03149124, "step": 11399 }, { "epoch": 22.8, "grad_norm": 1.1973793506622314, "learning_rate": 2e-05, "loss": 0.03659835, "step": 11400 }, { "epoch": 22.802, "grad_norm": 1.178956151008606, "learning_rate": 2e-05, "loss": 0.03625498, "step": 11401 }, { "epoch": 22.804, "grad_norm": 0.8932549357414246, "learning_rate": 2e-05, "loss": 0.02956768, "step": 11402 }, { "epoch": 22.806, "grad_norm": 1.0559618473052979, "learning_rate": 2e-05, "loss": 0.03430596, "step": 11403 }, { "epoch": 22.808, "grad_norm": 2.337083578109741, "learning_rate": 2e-05, "loss": 0.04307479, "step": 11404 }, { "epoch": 22.81, "grad_norm": 1.271933913230896, "learning_rate": 2e-05, "loss": 0.03103825, "step": 11405 }, { "epoch": 22.812, "grad_norm": 1.2939378023147583, "learning_rate": 2e-05, "loss": 0.04132862, "step": 11406 }, { "epoch": 22.814, "grad_norm": 1.2300240993499756, "learning_rate": 2e-05, "loss": 0.03786105, "step": 11407 }, { "epoch": 22.816, "grad_norm": 1.1687867641448975, "learning_rate": 2e-05, "loss": 0.04594323, "step": 11408 }, { "epoch": 22.818, "grad_norm": 1.2083779573440552, "learning_rate": 2e-05, "loss": 0.04165336, "step": 11409 }, { "epoch": 22.82, "grad_norm": 2.3479809761047363, "learning_rate": 2e-05, "loss": 0.0412714, "step": 11410 }, { "epoch": 22.822, "grad_norm": 1.5195822715759277, "learning_rate": 2e-05, "loss": 0.02944362, "step": 11411 }, { "epoch": 22.824, "grad_norm": 1.2599931955337524, "learning_rate": 2e-05, "loss": 0.039657, "step": 11412 }, { "epoch": 22.826, "grad_norm": 1.5169256925582886, "learning_rate": 2e-05, "loss": 0.0313296, "step": 11413 }, { "epoch": 22.828, "grad_norm": 1.3896204233169556, "learning_rate": 2e-05, "loss": 0.041568, "step": 11414 }, { "epoch": 22.83, "grad_norm": 1.3151553869247437, "learning_rate": 2e-05, "loss": 0.03976612, "step": 11415 }, { "epoch": 22.832, "grad_norm": 1.2248992919921875, "learning_rate": 2e-05, "loss": 0.032742, "step": 11416 }, { "epoch": 22.834, "grad_norm": 1.6058671474456787, "learning_rate": 2e-05, "loss": 0.04475835, "step": 11417 }, { "epoch": 22.836, "grad_norm": 1.131249189376831, "learning_rate": 2e-05, "loss": 0.03879447, "step": 11418 }, { "epoch": 22.838, "grad_norm": 1.2209913730621338, "learning_rate": 2e-05, "loss": 0.03018184, "step": 11419 }, { "epoch": 22.84, "grad_norm": 1.2565336227416992, "learning_rate": 2e-05, "loss": 0.03886155, "step": 11420 }, { "epoch": 22.842, "grad_norm": 1.2150452136993408, "learning_rate": 2e-05, "loss": 0.03672185, "step": 11421 }, { "epoch": 22.844, "grad_norm": 1.4771347045898438, "learning_rate": 2e-05, "loss": 0.02761836, "step": 11422 }, { "epoch": 22.846, "grad_norm": 2.192431926727295, "learning_rate": 2e-05, "loss": 0.0543457, "step": 11423 }, { "epoch": 22.848, "grad_norm": 1.1383287906646729, "learning_rate": 2e-05, "loss": 0.03061396, "step": 11424 }, { "epoch": 22.85, "grad_norm": 2.1792497634887695, "learning_rate": 2e-05, "loss": 0.03490089, "step": 11425 }, { "epoch": 22.852, "grad_norm": 1.2106961011886597, "learning_rate": 2e-05, "loss": 0.03852124, "step": 11426 }, { "epoch": 22.854, "grad_norm": 4.948497772216797, "learning_rate": 2e-05, "loss": 0.02499816, "step": 11427 }, { "epoch": 22.856, "grad_norm": 1.3616306781768799, "learning_rate": 2e-05, "loss": 0.04259917, "step": 11428 }, { "epoch": 22.858, "grad_norm": 1.405735969543457, "learning_rate": 2e-05, "loss": 0.03377235, "step": 11429 }, { "epoch": 22.86, "grad_norm": 1.426137924194336, "learning_rate": 2e-05, "loss": 0.04095915, "step": 11430 }, { "epoch": 22.862, "grad_norm": 1.1575813293457031, "learning_rate": 2e-05, "loss": 0.03657633, "step": 11431 }, { "epoch": 22.864, "grad_norm": 1.048122525215149, "learning_rate": 2e-05, "loss": 0.03505576, "step": 11432 }, { "epoch": 22.866, "grad_norm": 1.589388132095337, "learning_rate": 2e-05, "loss": 0.02945594, "step": 11433 }, { "epoch": 22.868, "grad_norm": 1.7383077144622803, "learning_rate": 2e-05, "loss": 0.03888205, "step": 11434 }, { "epoch": 22.87, "grad_norm": 1.2866278886795044, "learning_rate": 2e-05, "loss": 0.02404101, "step": 11435 }, { "epoch": 22.872, "grad_norm": 1.8006558418273926, "learning_rate": 2e-05, "loss": 0.02283414, "step": 11436 }, { "epoch": 22.874, "grad_norm": 0.7845028638839722, "learning_rate": 2e-05, "loss": 0.01730304, "step": 11437 }, { "epoch": 22.876, "grad_norm": 1.3924261331558228, "learning_rate": 2e-05, "loss": 0.03165732, "step": 11438 }, { "epoch": 22.878, "grad_norm": 1.0454896688461304, "learning_rate": 2e-05, "loss": 0.03067488, "step": 11439 }, { "epoch": 22.88, "grad_norm": 1.8691692352294922, "learning_rate": 2e-05, "loss": 0.03286079, "step": 11440 }, { "epoch": 22.882, "grad_norm": 1.0586732625961304, "learning_rate": 2e-05, "loss": 0.02909458, "step": 11441 }, { "epoch": 22.884, "grad_norm": 1.525641679763794, "learning_rate": 2e-05, "loss": 0.03261862, "step": 11442 }, { "epoch": 22.886, "grad_norm": 1.164318323135376, "learning_rate": 2e-05, "loss": 0.02912915, "step": 11443 }, { "epoch": 22.888, "grad_norm": 1.5031243562698364, "learning_rate": 2e-05, "loss": 0.03876949, "step": 11444 }, { "epoch": 22.89, "grad_norm": 1.0397024154663086, "learning_rate": 2e-05, "loss": 0.02776971, "step": 11445 }, { "epoch": 22.892, "grad_norm": 1.2293657064437866, "learning_rate": 2e-05, "loss": 0.04357108, "step": 11446 }, { "epoch": 22.894, "grad_norm": 1.8432687520980835, "learning_rate": 2e-05, "loss": 0.02928544, "step": 11447 }, { "epoch": 22.896, "grad_norm": 1.107912540435791, "learning_rate": 2e-05, "loss": 0.02876203, "step": 11448 }, { "epoch": 22.898, "grad_norm": 1.5207486152648926, "learning_rate": 2e-05, "loss": 0.04288258, "step": 11449 }, { "epoch": 22.9, "grad_norm": 1.0726830959320068, "learning_rate": 2e-05, "loss": 0.03220513, "step": 11450 }, { "epoch": 22.902, "grad_norm": 1.3904377222061157, "learning_rate": 2e-05, "loss": 0.03931183, "step": 11451 }, { "epoch": 22.904, "grad_norm": 1.601686954498291, "learning_rate": 2e-05, "loss": 0.04329685, "step": 11452 }, { "epoch": 22.906, "grad_norm": 1.7275676727294922, "learning_rate": 2e-05, "loss": 0.03662112, "step": 11453 }, { "epoch": 22.908, "grad_norm": 0.9452254176139832, "learning_rate": 2e-05, "loss": 0.03591129, "step": 11454 }, { "epoch": 22.91, "grad_norm": 0.9925097823143005, "learning_rate": 2e-05, "loss": 0.02755562, "step": 11455 }, { "epoch": 22.912, "grad_norm": 1.9532277584075928, "learning_rate": 2e-05, "loss": 0.03555189, "step": 11456 }, { "epoch": 22.914, "grad_norm": 0.8371837139129639, "learning_rate": 2e-05, "loss": 0.02261299, "step": 11457 }, { "epoch": 22.916, "grad_norm": 2.2544491291046143, "learning_rate": 2e-05, "loss": 0.05361641, "step": 11458 }, { "epoch": 22.918, "grad_norm": 2.455197334289551, "learning_rate": 2e-05, "loss": 0.04926509, "step": 11459 }, { "epoch": 22.92, "grad_norm": 1.0468469858169556, "learning_rate": 2e-05, "loss": 0.03623208, "step": 11460 }, { "epoch": 22.922, "grad_norm": 1.6605795621871948, "learning_rate": 2e-05, "loss": 0.03582647, "step": 11461 }, { "epoch": 22.924, "grad_norm": 1.6512527465820312, "learning_rate": 2e-05, "loss": 0.03864128, "step": 11462 }, { "epoch": 22.926, "grad_norm": 1.015683889389038, "learning_rate": 2e-05, "loss": 0.02952475, "step": 11463 }, { "epoch": 22.928, "grad_norm": 2.490291118621826, "learning_rate": 2e-05, "loss": 0.04171167, "step": 11464 }, { "epoch": 22.93, "grad_norm": 1.092077612876892, "learning_rate": 2e-05, "loss": 0.03160276, "step": 11465 }, { "epoch": 22.932, "grad_norm": 1.0306111574172974, "learning_rate": 2e-05, "loss": 0.02688064, "step": 11466 }, { "epoch": 22.934, "grad_norm": 1.20967435836792, "learning_rate": 2e-05, "loss": 0.03792361, "step": 11467 }, { "epoch": 22.936, "grad_norm": 1.6543128490447998, "learning_rate": 2e-05, "loss": 0.0479852, "step": 11468 }, { "epoch": 22.938, "grad_norm": 1.193781852722168, "learning_rate": 2e-05, "loss": 0.03420633, "step": 11469 }, { "epoch": 22.94, "grad_norm": 1.764358639717102, "learning_rate": 2e-05, "loss": 0.03625751, "step": 11470 }, { "epoch": 22.942, "grad_norm": 1.6757545471191406, "learning_rate": 2e-05, "loss": 0.03107898, "step": 11471 }, { "epoch": 22.944, "grad_norm": 0.9526821970939636, "learning_rate": 2e-05, "loss": 0.03141288, "step": 11472 }, { "epoch": 22.946, "grad_norm": 1.6071827411651611, "learning_rate": 2e-05, "loss": 0.03736177, "step": 11473 }, { "epoch": 22.948, "grad_norm": 1.1540484428405762, "learning_rate": 2e-05, "loss": 0.03194441, "step": 11474 }, { "epoch": 22.95, "grad_norm": 1.1837899684906006, "learning_rate": 2e-05, "loss": 0.0288153, "step": 11475 }, { "epoch": 22.951999999999998, "grad_norm": 1.1613291501998901, "learning_rate": 2e-05, "loss": 0.03203413, "step": 11476 }, { "epoch": 22.954, "grad_norm": 2.0555005073547363, "learning_rate": 2e-05, "loss": 0.04438991, "step": 11477 }, { "epoch": 22.956, "grad_norm": 1.4944390058517456, "learning_rate": 2e-05, "loss": 0.05310993, "step": 11478 }, { "epoch": 22.958, "grad_norm": 1.4034525156021118, "learning_rate": 2e-05, "loss": 0.0385785, "step": 11479 }, { "epoch": 22.96, "grad_norm": 2.2362515926361084, "learning_rate": 2e-05, "loss": 0.03343879, "step": 11480 }, { "epoch": 22.962, "grad_norm": 1.9142746925354004, "learning_rate": 2e-05, "loss": 0.04086325, "step": 11481 }, { "epoch": 22.964, "grad_norm": 3.3769659996032715, "learning_rate": 2e-05, "loss": 0.03347282, "step": 11482 }, { "epoch": 22.966, "grad_norm": 1.3966118097305298, "learning_rate": 2e-05, "loss": 0.02449038, "step": 11483 }, { "epoch": 22.968, "grad_norm": 1.6454405784606934, "learning_rate": 2e-05, "loss": 0.03337567, "step": 11484 }, { "epoch": 22.97, "grad_norm": 1.2182114124298096, "learning_rate": 2e-05, "loss": 0.02583343, "step": 11485 }, { "epoch": 22.972, "grad_norm": 2.4437437057495117, "learning_rate": 2e-05, "loss": 0.04662333, "step": 11486 }, { "epoch": 22.974, "grad_norm": 1.3717036247253418, "learning_rate": 2e-05, "loss": 0.04354615, "step": 11487 }, { "epoch": 22.976, "grad_norm": 1.0817804336547852, "learning_rate": 2e-05, "loss": 0.03474058, "step": 11488 }, { "epoch": 22.978, "grad_norm": 1.8078384399414062, "learning_rate": 2e-05, "loss": 0.04659226, "step": 11489 }, { "epoch": 22.98, "grad_norm": 1.3917157649993896, "learning_rate": 2e-05, "loss": 0.04078533, "step": 11490 }, { "epoch": 22.982, "grad_norm": 0.8220876455307007, "learning_rate": 2e-05, "loss": 0.02258881, "step": 11491 }, { "epoch": 22.984, "grad_norm": 1.2913342714309692, "learning_rate": 2e-05, "loss": 0.04604527, "step": 11492 }, { "epoch": 22.986, "grad_norm": 1.271330714225769, "learning_rate": 2e-05, "loss": 0.03667264, "step": 11493 }, { "epoch": 22.988, "grad_norm": 1.3295248746871948, "learning_rate": 2e-05, "loss": 0.03829803, "step": 11494 }, { "epoch": 22.99, "grad_norm": 1.3785157203674316, "learning_rate": 2e-05, "loss": 0.04370276, "step": 11495 }, { "epoch": 22.992, "grad_norm": 1.7691086530685425, "learning_rate": 2e-05, "loss": 0.04192688, "step": 11496 }, { "epoch": 22.994, "grad_norm": 0.933940052986145, "learning_rate": 2e-05, "loss": 0.01630483, "step": 11497 }, { "epoch": 22.996, "grad_norm": 1.4123674631118774, "learning_rate": 2e-05, "loss": 0.03609384, "step": 11498 }, { "epoch": 22.998, "grad_norm": 1.3186585903167725, "learning_rate": 2e-05, "loss": 0.04399614, "step": 11499 }, { "epoch": 23.0, "grad_norm": 0.9407261610031128, "learning_rate": 2e-05, "loss": 0.02819065, "step": 11500 }, { "epoch": 23.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.998, "Equal_2": 0.9780439121756487, "Equal_3": 0.9760479041916168, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8386773547094188, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9956666666666667, "PointLiesOnCircle_3": 0.9896666666666667, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 319.8554, "eval_samples_per_second": 32.827, "eval_steps_per_second": 0.657, "step": 11500 }, { "epoch": 23.002, "grad_norm": 1.2540278434753418, "learning_rate": 2e-05, "loss": 0.05130012, "step": 11501 }, { "epoch": 23.004, "grad_norm": 1.3764632940292358, "learning_rate": 2e-05, "loss": 0.04294942, "step": 11502 }, { "epoch": 23.006, "grad_norm": 1.6750069856643677, "learning_rate": 2e-05, "loss": 0.05229911, "step": 11503 }, { "epoch": 23.008, "grad_norm": 1.1598528623580933, "learning_rate": 2e-05, "loss": 0.04085714, "step": 11504 }, { "epoch": 23.01, "grad_norm": 1.1853110790252686, "learning_rate": 2e-05, "loss": 0.03996443, "step": 11505 }, { "epoch": 23.012, "grad_norm": 1.4552375078201294, "learning_rate": 2e-05, "loss": 0.05250798, "step": 11506 }, { "epoch": 23.014, "grad_norm": 1.4703574180603027, "learning_rate": 2e-05, "loss": 0.05003911, "step": 11507 }, { "epoch": 23.016, "grad_norm": 1.011425256729126, "learning_rate": 2e-05, "loss": 0.03787163, "step": 11508 }, { "epoch": 23.018, "grad_norm": 0.9965170621871948, "learning_rate": 2e-05, "loss": 0.04001056, "step": 11509 }, { "epoch": 23.02, "grad_norm": 1.442737102508545, "learning_rate": 2e-05, "loss": 0.03976019, "step": 11510 }, { "epoch": 23.022, "grad_norm": 1.067285418510437, "learning_rate": 2e-05, "loss": 0.03311403, "step": 11511 }, { "epoch": 23.024, "grad_norm": 1.3310978412628174, "learning_rate": 2e-05, "loss": 0.05192403, "step": 11512 }, { "epoch": 23.026, "grad_norm": 2.071756601333618, "learning_rate": 2e-05, "loss": 0.04093051, "step": 11513 }, { "epoch": 23.028, "grad_norm": 1.8485080003738403, "learning_rate": 2e-05, "loss": 0.04342106, "step": 11514 }, { "epoch": 23.03, "grad_norm": 1.2285382747650146, "learning_rate": 2e-05, "loss": 0.03801531, "step": 11515 }, { "epoch": 23.032, "grad_norm": 1.2549402713775635, "learning_rate": 2e-05, "loss": 0.04710529, "step": 11516 }, { "epoch": 23.034, "grad_norm": 1.2575291395187378, "learning_rate": 2e-05, "loss": 0.04867338, "step": 11517 }, { "epoch": 23.036, "grad_norm": 1.4146618843078613, "learning_rate": 2e-05, "loss": 0.04894332, "step": 11518 }, { "epoch": 23.038, "grad_norm": 1.2756978273391724, "learning_rate": 2e-05, "loss": 0.0424, "step": 11519 }, { "epoch": 23.04, "grad_norm": 1.3770201206207275, "learning_rate": 2e-05, "loss": 0.05650711, "step": 11520 }, { "epoch": 23.042, "grad_norm": 1.5191999673843384, "learning_rate": 2e-05, "loss": 0.05803034, "step": 11521 }, { "epoch": 23.044, "grad_norm": 1.6032277345657349, "learning_rate": 2e-05, "loss": 0.04611324, "step": 11522 }, { "epoch": 23.046, "grad_norm": 1.0860769748687744, "learning_rate": 2e-05, "loss": 0.03498707, "step": 11523 }, { "epoch": 23.048, "grad_norm": 1.0449998378753662, "learning_rate": 2e-05, "loss": 0.03648846, "step": 11524 }, { "epoch": 23.05, "grad_norm": 2.3625895977020264, "learning_rate": 2e-05, "loss": 0.03183797, "step": 11525 }, { "epoch": 23.052, "grad_norm": 1.3209874629974365, "learning_rate": 2e-05, "loss": 0.0523079, "step": 11526 }, { "epoch": 23.054, "grad_norm": 0.8777402639389038, "learning_rate": 2e-05, "loss": 0.02545576, "step": 11527 }, { "epoch": 23.056, "grad_norm": 1.1120661497116089, "learning_rate": 2e-05, "loss": 0.0336577, "step": 11528 }, { "epoch": 23.058, "grad_norm": 1.3430697917938232, "learning_rate": 2e-05, "loss": 0.03209985, "step": 11529 }, { "epoch": 23.06, "grad_norm": 1.346922755241394, "learning_rate": 2e-05, "loss": 0.02798453, "step": 11530 }, { "epoch": 23.062, "grad_norm": 1.8189756870269775, "learning_rate": 2e-05, "loss": 0.05564386, "step": 11531 }, { "epoch": 23.064, "grad_norm": 3.2581892013549805, "learning_rate": 2e-05, "loss": 0.04301608, "step": 11532 }, { "epoch": 23.066, "grad_norm": 1.3685656785964966, "learning_rate": 2e-05, "loss": 0.03543826, "step": 11533 }, { "epoch": 23.068, "grad_norm": 1.6216603517532349, "learning_rate": 2e-05, "loss": 0.04750851, "step": 11534 }, { "epoch": 23.07, "grad_norm": 2.4956471920013428, "learning_rate": 2e-05, "loss": 0.05645571, "step": 11535 }, { "epoch": 23.072, "grad_norm": 3.800372362136841, "learning_rate": 2e-05, "loss": 0.03642789, "step": 11536 }, { "epoch": 23.074, "grad_norm": 1.135061502456665, "learning_rate": 2e-05, "loss": 0.03842633, "step": 11537 }, { "epoch": 23.076, "grad_norm": 1.3326822519302368, "learning_rate": 2e-05, "loss": 0.05297691, "step": 11538 }, { "epoch": 23.078, "grad_norm": 1.7269304990768433, "learning_rate": 2e-05, "loss": 0.04963746, "step": 11539 }, { "epoch": 23.08, "grad_norm": 1.300743818283081, "learning_rate": 2e-05, "loss": 0.04434649, "step": 11540 }, { "epoch": 23.082, "grad_norm": 2.736264705657959, "learning_rate": 2e-05, "loss": 0.05283449, "step": 11541 }, { "epoch": 23.084, "grad_norm": 1.9172672033309937, "learning_rate": 2e-05, "loss": 0.05265498, "step": 11542 }, { "epoch": 23.086, "grad_norm": 1.1808491945266724, "learning_rate": 2e-05, "loss": 0.0372861, "step": 11543 }, { "epoch": 23.088, "grad_norm": 1.2032355070114136, "learning_rate": 2e-05, "loss": 0.04825965, "step": 11544 }, { "epoch": 23.09, "grad_norm": 1.292203426361084, "learning_rate": 2e-05, "loss": 0.04288067, "step": 11545 }, { "epoch": 23.092, "grad_norm": 1.23992919921875, "learning_rate": 2e-05, "loss": 0.03847829, "step": 11546 }, { "epoch": 23.094, "grad_norm": 1.6928577423095703, "learning_rate": 2e-05, "loss": 0.05047772, "step": 11547 }, { "epoch": 23.096, "grad_norm": 1.2044388055801392, "learning_rate": 2e-05, "loss": 0.04760785, "step": 11548 }, { "epoch": 23.098, "grad_norm": 4.208912372589111, "learning_rate": 2e-05, "loss": 0.04748537, "step": 11549 }, { "epoch": 23.1, "grad_norm": 1.1794403791427612, "learning_rate": 2e-05, "loss": 0.03520359, "step": 11550 }, { "epoch": 23.102, "grad_norm": 2.432137966156006, "learning_rate": 2e-05, "loss": 0.05768427, "step": 11551 }, { "epoch": 23.104, "grad_norm": 1.1416656970977783, "learning_rate": 2e-05, "loss": 0.04104606, "step": 11552 }, { "epoch": 23.106, "grad_norm": 0.929355263710022, "learning_rate": 2e-05, "loss": 0.0206061, "step": 11553 }, { "epoch": 23.108, "grad_norm": 1.0629310607910156, "learning_rate": 2e-05, "loss": 0.02847815, "step": 11554 }, { "epoch": 23.11, "grad_norm": 2.062800168991089, "learning_rate": 2e-05, "loss": 0.043769, "step": 11555 }, { "epoch": 23.112, "grad_norm": 1.8286502361297607, "learning_rate": 2e-05, "loss": 0.05828995, "step": 11556 }, { "epoch": 23.114, "grad_norm": 1.8750455379486084, "learning_rate": 2e-05, "loss": 0.04257144, "step": 11557 }, { "epoch": 23.116, "grad_norm": 1.3888318538665771, "learning_rate": 2e-05, "loss": 0.04705369, "step": 11558 }, { "epoch": 23.118, "grad_norm": 1.7153780460357666, "learning_rate": 2e-05, "loss": 0.0691988, "step": 11559 }, { "epoch": 23.12, "grad_norm": 1.4313924312591553, "learning_rate": 2e-05, "loss": 0.04791881, "step": 11560 }, { "epoch": 23.122, "grad_norm": 1.0603506565093994, "learning_rate": 2e-05, "loss": 0.0366033, "step": 11561 }, { "epoch": 23.124, "grad_norm": 1.405933141708374, "learning_rate": 2e-05, "loss": 0.03930571, "step": 11562 }, { "epoch": 23.126, "grad_norm": 1.920100212097168, "learning_rate": 2e-05, "loss": 0.04094848, "step": 11563 }, { "epoch": 23.128, "grad_norm": 5.326123237609863, "learning_rate": 2e-05, "loss": 0.06398241, "step": 11564 }, { "epoch": 23.13, "grad_norm": 1.9219155311584473, "learning_rate": 2e-05, "loss": 0.02961898, "step": 11565 }, { "epoch": 23.132, "grad_norm": 2.914210081100464, "learning_rate": 2e-05, "loss": 0.04696817, "step": 11566 }, { "epoch": 23.134, "grad_norm": 0.8635774254798889, "learning_rate": 2e-05, "loss": 0.02733534, "step": 11567 }, { "epoch": 23.136, "grad_norm": 1.496962308883667, "learning_rate": 2e-05, "loss": 0.03688932, "step": 11568 }, { "epoch": 23.138, "grad_norm": 2.043313980102539, "learning_rate": 2e-05, "loss": 0.03681432, "step": 11569 }, { "epoch": 23.14, "grad_norm": 1.5973297357559204, "learning_rate": 2e-05, "loss": 0.04360689, "step": 11570 }, { "epoch": 23.142, "grad_norm": 1.3972605466842651, "learning_rate": 2e-05, "loss": 0.04391249, "step": 11571 }, { "epoch": 23.144, "grad_norm": 1.3387508392333984, "learning_rate": 2e-05, "loss": 0.04608027, "step": 11572 }, { "epoch": 23.146, "grad_norm": 2.011808395385742, "learning_rate": 2e-05, "loss": 0.05625965, "step": 11573 }, { "epoch": 23.148, "grad_norm": 1.5013598203659058, "learning_rate": 2e-05, "loss": 0.05085777, "step": 11574 }, { "epoch": 23.15, "grad_norm": 2.186159133911133, "learning_rate": 2e-05, "loss": 0.04711618, "step": 11575 }, { "epoch": 23.152, "grad_norm": 2.2853493690490723, "learning_rate": 2e-05, "loss": 0.0342935, "step": 11576 }, { "epoch": 23.154, "grad_norm": 1.3616294860839844, "learning_rate": 2e-05, "loss": 0.04885361, "step": 11577 }, { "epoch": 23.156, "grad_norm": 1.2394838333129883, "learning_rate": 2e-05, "loss": 0.03285236, "step": 11578 }, { "epoch": 23.158, "grad_norm": 1.339077353477478, "learning_rate": 2e-05, "loss": 0.02606569, "step": 11579 }, { "epoch": 23.16, "grad_norm": 0.8776892423629761, "learning_rate": 2e-05, "loss": 0.02158548, "step": 11580 }, { "epoch": 23.162, "grad_norm": 3.6260955333709717, "learning_rate": 2e-05, "loss": 0.05039684, "step": 11581 }, { "epoch": 23.164, "grad_norm": 1.242028832435608, "learning_rate": 2e-05, "loss": 0.04099006, "step": 11582 }, { "epoch": 23.166, "grad_norm": 1.870340347290039, "learning_rate": 2e-05, "loss": 0.05608212, "step": 11583 }, { "epoch": 23.168, "grad_norm": 1.346118688583374, "learning_rate": 2e-05, "loss": 0.03667539, "step": 11584 }, { "epoch": 23.17, "grad_norm": 1.3202615976333618, "learning_rate": 2e-05, "loss": 0.06091684, "step": 11585 }, { "epoch": 23.172, "grad_norm": 1.252225399017334, "learning_rate": 2e-05, "loss": 0.0603791, "step": 11586 }, { "epoch": 23.174, "grad_norm": 1.584975242614746, "learning_rate": 2e-05, "loss": 0.04847322, "step": 11587 }, { "epoch": 23.176, "grad_norm": 1.3901718854904175, "learning_rate": 2e-05, "loss": 0.03669205, "step": 11588 }, { "epoch": 23.178, "grad_norm": 1.0474369525909424, "learning_rate": 2e-05, "loss": 0.04350015, "step": 11589 }, { "epoch": 23.18, "grad_norm": 1.0090280771255493, "learning_rate": 2e-05, "loss": 0.03717218, "step": 11590 }, { "epoch": 23.182, "grad_norm": 1.004239797592163, "learning_rate": 2e-05, "loss": 0.02906239, "step": 11591 }, { "epoch": 23.184, "grad_norm": 1.3345508575439453, "learning_rate": 2e-05, "loss": 0.03678144, "step": 11592 }, { "epoch": 23.186, "grad_norm": 1.837895393371582, "learning_rate": 2e-05, "loss": 0.03756856, "step": 11593 }, { "epoch": 23.188, "grad_norm": 1.3044272661209106, "learning_rate": 2e-05, "loss": 0.04445122, "step": 11594 }, { "epoch": 23.19, "grad_norm": 1.1923726797103882, "learning_rate": 2e-05, "loss": 0.03589686, "step": 11595 }, { "epoch": 23.192, "grad_norm": 1.0984914302825928, "learning_rate": 2e-05, "loss": 0.04005678, "step": 11596 }, { "epoch": 23.194, "grad_norm": 1.199487566947937, "learning_rate": 2e-05, "loss": 0.03045943, "step": 11597 }, { "epoch": 23.196, "grad_norm": 1.032716989517212, "learning_rate": 2e-05, "loss": 0.03927022, "step": 11598 }, { "epoch": 23.198, "grad_norm": 2.102476119995117, "learning_rate": 2e-05, "loss": 0.06727884, "step": 11599 }, { "epoch": 23.2, "grad_norm": 1.172778844833374, "learning_rate": 2e-05, "loss": 0.05053917, "step": 11600 }, { "epoch": 23.202, "grad_norm": 1.736275315284729, "learning_rate": 2e-05, "loss": 0.03909831, "step": 11601 }, { "epoch": 23.204, "grad_norm": 1.0708051919937134, "learning_rate": 2e-05, "loss": 0.03059747, "step": 11602 }, { "epoch": 23.206, "grad_norm": 1.1244726181030273, "learning_rate": 2e-05, "loss": 0.04023635, "step": 11603 }, { "epoch": 23.208, "grad_norm": 0.9452853202819824, "learning_rate": 2e-05, "loss": 0.03030682, "step": 11604 }, { "epoch": 23.21, "grad_norm": 2.1545941829681396, "learning_rate": 2e-05, "loss": 0.04214806, "step": 11605 }, { "epoch": 23.212, "grad_norm": 1.2645689249038696, "learning_rate": 2e-05, "loss": 0.03720896, "step": 11606 }, { "epoch": 23.214, "grad_norm": 1.2697169780731201, "learning_rate": 2e-05, "loss": 0.04333124, "step": 11607 }, { "epoch": 23.216, "grad_norm": 1.4149627685546875, "learning_rate": 2e-05, "loss": 0.05068656, "step": 11608 }, { "epoch": 23.218, "grad_norm": 2.079878568649292, "learning_rate": 2e-05, "loss": 0.06022862, "step": 11609 }, { "epoch": 23.22, "grad_norm": 1.221092700958252, "learning_rate": 2e-05, "loss": 0.02790497, "step": 11610 }, { "epoch": 23.222, "grad_norm": 0.9675205945968628, "learning_rate": 2e-05, "loss": 0.03455825, "step": 11611 }, { "epoch": 23.224, "grad_norm": 0.8989473581314087, "learning_rate": 2e-05, "loss": 0.02922, "step": 11612 }, { "epoch": 23.226, "grad_norm": 1.088036060333252, "learning_rate": 2e-05, "loss": 0.05001824, "step": 11613 }, { "epoch": 23.228, "grad_norm": 1.4129053354263306, "learning_rate": 2e-05, "loss": 0.04396573, "step": 11614 }, { "epoch": 23.23, "grad_norm": 1.193305253982544, "learning_rate": 2e-05, "loss": 0.03746897, "step": 11615 }, { "epoch": 23.232, "grad_norm": 1.3615189790725708, "learning_rate": 2e-05, "loss": 0.03328097, "step": 11616 }, { "epoch": 23.234, "grad_norm": 3.6548335552215576, "learning_rate": 2e-05, "loss": 0.0415346, "step": 11617 }, { "epoch": 23.236, "grad_norm": 1.218424677848816, "learning_rate": 2e-05, "loss": 0.02998128, "step": 11618 }, { "epoch": 23.238, "grad_norm": 1.3885092735290527, "learning_rate": 2e-05, "loss": 0.04785194, "step": 11619 }, { "epoch": 23.24, "grad_norm": 2.74619722366333, "learning_rate": 2e-05, "loss": 0.04847107, "step": 11620 }, { "epoch": 23.242, "grad_norm": 1.482759952545166, "learning_rate": 2e-05, "loss": 0.04352796, "step": 11621 }, { "epoch": 23.244, "grad_norm": 1.3058183193206787, "learning_rate": 2e-05, "loss": 0.0443982, "step": 11622 }, { "epoch": 23.246, "grad_norm": 1.1419992446899414, "learning_rate": 2e-05, "loss": 0.03190926, "step": 11623 }, { "epoch": 23.248, "grad_norm": 3.0687108039855957, "learning_rate": 2e-05, "loss": 0.04174676, "step": 11624 }, { "epoch": 23.25, "grad_norm": 1.1151496171951294, "learning_rate": 2e-05, "loss": 0.0575142, "step": 11625 }, { "epoch": 23.252, "grad_norm": 1.7354116439819336, "learning_rate": 2e-05, "loss": 0.03637656, "step": 11626 }, { "epoch": 23.254, "grad_norm": 1.7158818244934082, "learning_rate": 2e-05, "loss": 0.04089367, "step": 11627 }, { "epoch": 23.256, "grad_norm": 1.027208685874939, "learning_rate": 2e-05, "loss": 0.04252267, "step": 11628 }, { "epoch": 23.258, "grad_norm": 1.3836479187011719, "learning_rate": 2e-05, "loss": 0.04314929, "step": 11629 }, { "epoch": 23.26, "grad_norm": 1.6362491846084595, "learning_rate": 2e-05, "loss": 0.04100268, "step": 11630 }, { "epoch": 23.262, "grad_norm": 1.1033443212509155, "learning_rate": 2e-05, "loss": 0.04861282, "step": 11631 }, { "epoch": 23.264, "grad_norm": 1.354112148284912, "learning_rate": 2e-05, "loss": 0.03558677, "step": 11632 }, { "epoch": 23.266, "grad_norm": 2.211174726486206, "learning_rate": 2e-05, "loss": 0.05863594, "step": 11633 }, { "epoch": 23.268, "grad_norm": 1.1978131532669067, "learning_rate": 2e-05, "loss": 0.02527563, "step": 11634 }, { "epoch": 23.27, "grad_norm": 1.079159140586853, "learning_rate": 2e-05, "loss": 0.04221547, "step": 11635 }, { "epoch": 23.272, "grad_norm": 2.7079126834869385, "learning_rate": 2e-05, "loss": 0.04663559, "step": 11636 }, { "epoch": 23.274, "grad_norm": 1.3492555618286133, "learning_rate": 2e-05, "loss": 0.05419156, "step": 11637 }, { "epoch": 23.276, "grad_norm": 2.4996025562286377, "learning_rate": 2e-05, "loss": 0.03772485, "step": 11638 }, { "epoch": 23.278, "grad_norm": 2.083409070968628, "learning_rate": 2e-05, "loss": 0.06437519, "step": 11639 }, { "epoch": 23.28, "grad_norm": 1.45590341091156, "learning_rate": 2e-05, "loss": 0.03550784, "step": 11640 }, { "epoch": 23.282, "grad_norm": 1.1977746486663818, "learning_rate": 2e-05, "loss": 0.03832368, "step": 11641 }, { "epoch": 23.284, "grad_norm": 1.926700234413147, "learning_rate": 2e-05, "loss": 0.06392578, "step": 11642 }, { "epoch": 23.286, "grad_norm": 2.1961700916290283, "learning_rate": 2e-05, "loss": 0.03601138, "step": 11643 }, { "epoch": 23.288, "grad_norm": 1.1713889837265015, "learning_rate": 2e-05, "loss": 0.03601219, "step": 11644 }, { "epoch": 23.29, "grad_norm": 1.6894010305404663, "learning_rate": 2e-05, "loss": 0.0445708, "step": 11645 }, { "epoch": 23.292, "grad_norm": 1.3147658109664917, "learning_rate": 2e-05, "loss": 0.0355294, "step": 11646 }, { "epoch": 23.294, "grad_norm": 1.2042319774627686, "learning_rate": 2e-05, "loss": 0.04006883, "step": 11647 }, { "epoch": 23.296, "grad_norm": 1.1680289506912231, "learning_rate": 2e-05, "loss": 0.04503386, "step": 11648 }, { "epoch": 23.298, "grad_norm": 1.135467529296875, "learning_rate": 2e-05, "loss": 0.0402963, "step": 11649 }, { "epoch": 23.3, "grad_norm": 1.505689024925232, "learning_rate": 2e-05, "loss": 0.03736635, "step": 11650 }, { "epoch": 23.302, "grad_norm": 2.4752161502838135, "learning_rate": 2e-05, "loss": 0.04134458, "step": 11651 }, { "epoch": 23.304, "grad_norm": 1.4017146825790405, "learning_rate": 2e-05, "loss": 0.04087723, "step": 11652 }, { "epoch": 23.306, "grad_norm": 1.2549480199813843, "learning_rate": 2e-05, "loss": 0.04560962, "step": 11653 }, { "epoch": 23.308, "grad_norm": 1.496455192565918, "learning_rate": 2e-05, "loss": 0.04883281, "step": 11654 }, { "epoch": 23.31, "grad_norm": 1.4462777376174927, "learning_rate": 2e-05, "loss": 0.04287733, "step": 11655 }, { "epoch": 23.312, "grad_norm": 1.8287996053695679, "learning_rate": 2e-05, "loss": 0.03942668, "step": 11656 }, { "epoch": 23.314, "grad_norm": 1.9385639429092407, "learning_rate": 2e-05, "loss": 0.06809842, "step": 11657 }, { "epoch": 23.316, "grad_norm": 0.842710018157959, "learning_rate": 2e-05, "loss": 0.02134952, "step": 11658 }, { "epoch": 23.318, "grad_norm": 2.0979163646698, "learning_rate": 2e-05, "loss": 0.05558499, "step": 11659 }, { "epoch": 23.32, "grad_norm": 1.2247816324234009, "learning_rate": 2e-05, "loss": 0.04178263, "step": 11660 }, { "epoch": 23.322, "grad_norm": 1.5208022594451904, "learning_rate": 2e-05, "loss": 0.04280531, "step": 11661 }, { "epoch": 23.324, "grad_norm": 2.318500280380249, "learning_rate": 2e-05, "loss": 0.06325538, "step": 11662 }, { "epoch": 23.326, "grad_norm": 1.6465061902999878, "learning_rate": 2e-05, "loss": 0.05929951, "step": 11663 }, { "epoch": 23.328, "grad_norm": 1.3455055952072144, "learning_rate": 2e-05, "loss": 0.03735639, "step": 11664 }, { "epoch": 23.33, "grad_norm": 1.0528393983840942, "learning_rate": 2e-05, "loss": 0.03686676, "step": 11665 }, { "epoch": 23.332, "grad_norm": 2.8287322521209717, "learning_rate": 2e-05, "loss": 0.04164189, "step": 11666 }, { "epoch": 23.334, "grad_norm": 1.6893528699874878, "learning_rate": 2e-05, "loss": 0.04093581, "step": 11667 }, { "epoch": 23.336, "grad_norm": 1.6616655588150024, "learning_rate": 2e-05, "loss": 0.03945962, "step": 11668 }, { "epoch": 23.338, "grad_norm": 1.084054946899414, "learning_rate": 2e-05, "loss": 0.04473053, "step": 11669 }, { "epoch": 23.34, "grad_norm": 1.4993929862976074, "learning_rate": 2e-05, "loss": 0.03447219, "step": 11670 }, { "epoch": 23.342, "grad_norm": 1.050879955291748, "learning_rate": 2e-05, "loss": 0.03411791, "step": 11671 }, { "epoch": 23.344, "grad_norm": 1.2374294996261597, "learning_rate": 2e-05, "loss": 0.0351023, "step": 11672 }, { "epoch": 23.346, "grad_norm": 1.3457894325256348, "learning_rate": 2e-05, "loss": 0.04368747, "step": 11673 }, { "epoch": 23.348, "grad_norm": 0.8786593675613403, "learning_rate": 2e-05, "loss": 0.03530385, "step": 11674 }, { "epoch": 23.35, "grad_norm": 1.326228380203247, "learning_rate": 2e-05, "loss": 0.04968689, "step": 11675 }, { "epoch": 23.352, "grad_norm": 0.9884204864501953, "learning_rate": 2e-05, "loss": 0.03281524, "step": 11676 }, { "epoch": 23.354, "grad_norm": 1.3991303443908691, "learning_rate": 2e-05, "loss": 0.04180757, "step": 11677 }, { "epoch": 23.356, "grad_norm": 6.225818157196045, "learning_rate": 2e-05, "loss": 0.03679702, "step": 11678 }, { "epoch": 23.358, "grad_norm": 0.8489519953727722, "learning_rate": 2e-05, "loss": 0.02504526, "step": 11679 }, { "epoch": 23.36, "grad_norm": 1.34859299659729, "learning_rate": 2e-05, "loss": 0.05301417, "step": 11680 }, { "epoch": 23.362, "grad_norm": 2.322361469268799, "learning_rate": 2e-05, "loss": 0.0461299, "step": 11681 }, { "epoch": 23.364, "grad_norm": 1.9422720670700073, "learning_rate": 2e-05, "loss": 0.04207643, "step": 11682 }, { "epoch": 23.366, "grad_norm": 2.020577907562256, "learning_rate": 2e-05, "loss": 0.04851538, "step": 11683 }, { "epoch": 23.368, "grad_norm": 1.196955919265747, "learning_rate": 2e-05, "loss": 0.04109028, "step": 11684 }, { "epoch": 23.37, "grad_norm": 2.016350507736206, "learning_rate": 2e-05, "loss": 0.04552692, "step": 11685 }, { "epoch": 23.372, "grad_norm": 1.13778555393219, "learning_rate": 2e-05, "loss": 0.02917241, "step": 11686 }, { "epoch": 23.374, "grad_norm": 1.7037264108657837, "learning_rate": 2e-05, "loss": 0.05261172, "step": 11687 }, { "epoch": 23.376, "grad_norm": 6.611384868621826, "learning_rate": 2e-05, "loss": 0.03373696, "step": 11688 }, { "epoch": 23.378, "grad_norm": 1.5197391510009766, "learning_rate": 2e-05, "loss": 0.05965453, "step": 11689 }, { "epoch": 23.38, "grad_norm": 1.1589999198913574, "learning_rate": 2e-05, "loss": 0.02925105, "step": 11690 }, { "epoch": 23.382, "grad_norm": 1.4183608293533325, "learning_rate": 2e-05, "loss": 0.03728583, "step": 11691 }, { "epoch": 23.384, "grad_norm": 1.9492428302764893, "learning_rate": 2e-05, "loss": 0.04366478, "step": 11692 }, { "epoch": 23.386, "grad_norm": 1.5098069906234741, "learning_rate": 2e-05, "loss": 0.04676771, "step": 11693 }, { "epoch": 23.388, "grad_norm": 2.645620107650757, "learning_rate": 2e-05, "loss": 0.03401638, "step": 11694 }, { "epoch": 23.39, "grad_norm": 1.2716730833053589, "learning_rate": 2e-05, "loss": 0.03060681, "step": 11695 }, { "epoch": 23.392, "grad_norm": 1.4287192821502686, "learning_rate": 2e-05, "loss": 0.04606835, "step": 11696 }, { "epoch": 23.394, "grad_norm": 2.0073654651641846, "learning_rate": 2e-05, "loss": 0.03770929, "step": 11697 }, { "epoch": 23.396, "grad_norm": 1.0809135437011719, "learning_rate": 2e-05, "loss": 0.04094427, "step": 11698 }, { "epoch": 23.398, "grad_norm": 1.341443419456482, "learning_rate": 2e-05, "loss": 0.03574315, "step": 11699 }, { "epoch": 23.4, "grad_norm": 1.4604313373565674, "learning_rate": 2e-05, "loss": 0.04919671, "step": 11700 }, { "epoch": 23.402, "grad_norm": 1.32045578956604, "learning_rate": 2e-05, "loss": 0.04904388, "step": 11701 }, { "epoch": 23.404, "grad_norm": 1.5795214176177979, "learning_rate": 2e-05, "loss": 0.04413631, "step": 11702 }, { "epoch": 23.406, "grad_norm": 1.566278338432312, "learning_rate": 2e-05, "loss": 0.04938511, "step": 11703 }, { "epoch": 23.408, "grad_norm": 1.6116101741790771, "learning_rate": 2e-05, "loss": 0.03976731, "step": 11704 }, { "epoch": 23.41, "grad_norm": 1.1294550895690918, "learning_rate": 2e-05, "loss": 0.04623659, "step": 11705 }, { "epoch": 23.412, "grad_norm": 2.9337821006774902, "learning_rate": 2e-05, "loss": 0.05919053, "step": 11706 }, { "epoch": 23.414, "grad_norm": 1.2668352127075195, "learning_rate": 2e-05, "loss": 0.04299584, "step": 11707 }, { "epoch": 23.416, "grad_norm": 1.6017807722091675, "learning_rate": 2e-05, "loss": 0.04490201, "step": 11708 }, { "epoch": 23.418, "grad_norm": 1.9470270872116089, "learning_rate": 2e-05, "loss": 0.04873323, "step": 11709 }, { "epoch": 23.42, "grad_norm": 0.8678619265556335, "learning_rate": 2e-05, "loss": 0.01984765, "step": 11710 }, { "epoch": 23.422, "grad_norm": 1.015408992767334, "learning_rate": 2e-05, "loss": 0.02890258, "step": 11711 }, { "epoch": 23.424, "grad_norm": 1.9212663173675537, "learning_rate": 2e-05, "loss": 0.05686199, "step": 11712 }, { "epoch": 23.426, "grad_norm": 1.5647587776184082, "learning_rate": 2e-05, "loss": 0.04229132, "step": 11713 }, { "epoch": 23.428, "grad_norm": 1.157758116722107, "learning_rate": 2e-05, "loss": 0.05381518, "step": 11714 }, { "epoch": 23.43, "grad_norm": 2.0962116718292236, "learning_rate": 2e-05, "loss": 0.04939672, "step": 11715 }, { "epoch": 23.432, "grad_norm": 1.778320074081421, "learning_rate": 2e-05, "loss": 0.03275887, "step": 11716 }, { "epoch": 23.434, "grad_norm": 1.3979294300079346, "learning_rate": 2e-05, "loss": 0.04589324, "step": 11717 }, { "epoch": 23.436, "grad_norm": 1.1280796527862549, "learning_rate": 2e-05, "loss": 0.04539976, "step": 11718 }, { "epoch": 23.438, "grad_norm": 2.4845614433288574, "learning_rate": 2e-05, "loss": 0.04491439, "step": 11719 }, { "epoch": 23.44, "grad_norm": 1.0096144676208496, "learning_rate": 2e-05, "loss": 0.03461198, "step": 11720 }, { "epoch": 23.442, "grad_norm": 2.4556872844696045, "learning_rate": 2e-05, "loss": 0.05000499, "step": 11721 }, { "epoch": 23.444, "grad_norm": 5.186877727508545, "learning_rate": 2e-05, "loss": 0.05488808, "step": 11722 }, { "epoch": 23.446, "grad_norm": 1.524732232093811, "learning_rate": 2e-05, "loss": 0.03662727, "step": 11723 }, { "epoch": 23.448, "grad_norm": 2.764235734939575, "learning_rate": 2e-05, "loss": 0.04267842, "step": 11724 }, { "epoch": 23.45, "grad_norm": 3.6539905071258545, "learning_rate": 2e-05, "loss": 0.05041397, "step": 11725 }, { "epoch": 23.452, "grad_norm": 2.682654619216919, "learning_rate": 2e-05, "loss": 0.0332579, "step": 11726 }, { "epoch": 23.454, "grad_norm": 1.5499696731567383, "learning_rate": 2e-05, "loss": 0.03875547, "step": 11727 }, { "epoch": 23.456, "grad_norm": 2.318291664123535, "learning_rate": 2e-05, "loss": 0.02854845, "step": 11728 }, { "epoch": 23.458, "grad_norm": 1.7998384237289429, "learning_rate": 2e-05, "loss": 0.05344633, "step": 11729 }, { "epoch": 23.46, "grad_norm": 0.9075918197631836, "learning_rate": 2e-05, "loss": 0.02585726, "step": 11730 }, { "epoch": 23.462, "grad_norm": 1.5107016563415527, "learning_rate": 2e-05, "loss": 0.05805733, "step": 11731 }, { "epoch": 23.464, "grad_norm": 2.0827245712280273, "learning_rate": 2e-05, "loss": 0.04989189, "step": 11732 }, { "epoch": 23.466, "grad_norm": 2.3105974197387695, "learning_rate": 2e-05, "loss": 0.06807564, "step": 11733 }, { "epoch": 23.468, "grad_norm": 1.2695666551589966, "learning_rate": 2e-05, "loss": 0.03619879, "step": 11734 }, { "epoch": 23.47, "grad_norm": 1.2384021282196045, "learning_rate": 2e-05, "loss": 0.02629324, "step": 11735 }, { "epoch": 23.472, "grad_norm": 1.6881636381149292, "learning_rate": 2e-05, "loss": 0.04189314, "step": 11736 }, { "epoch": 23.474, "grad_norm": 2.447591543197632, "learning_rate": 2e-05, "loss": 0.04387872, "step": 11737 }, { "epoch": 23.476, "grad_norm": 0.8882629871368408, "learning_rate": 2e-05, "loss": 0.02441452, "step": 11738 }, { "epoch": 23.478, "grad_norm": 1.4331190586090088, "learning_rate": 2e-05, "loss": 0.03524774, "step": 11739 }, { "epoch": 23.48, "grad_norm": 2.20993709564209, "learning_rate": 2e-05, "loss": 0.05033822, "step": 11740 }, { "epoch": 23.482, "grad_norm": 1.4944759607315063, "learning_rate": 2e-05, "loss": 0.05018061, "step": 11741 }, { "epoch": 23.484, "grad_norm": 2.6251118183135986, "learning_rate": 2e-05, "loss": 0.04994038, "step": 11742 }, { "epoch": 23.486, "grad_norm": 1.345861792564392, "learning_rate": 2e-05, "loss": 0.044821, "step": 11743 }, { "epoch": 23.488, "grad_norm": 1.239540934562683, "learning_rate": 2e-05, "loss": 0.04019895, "step": 11744 }, { "epoch": 23.49, "grad_norm": 1.280549168586731, "learning_rate": 2e-05, "loss": 0.03652037, "step": 11745 }, { "epoch": 23.492, "grad_norm": 1.523679256439209, "learning_rate": 2e-05, "loss": 0.04372611, "step": 11746 }, { "epoch": 23.494, "grad_norm": 1.49849271774292, "learning_rate": 2e-05, "loss": 0.03744507, "step": 11747 }, { "epoch": 23.496, "grad_norm": 1.3029149770736694, "learning_rate": 2e-05, "loss": 0.04071214, "step": 11748 }, { "epoch": 23.498, "grad_norm": 2.212434768676758, "learning_rate": 2e-05, "loss": 0.06702287, "step": 11749 }, { "epoch": 23.5, "grad_norm": 1.800525188446045, "learning_rate": 2e-05, "loss": 0.04466784, "step": 11750 }, { "epoch": 23.502, "grad_norm": 1.8852155208587646, "learning_rate": 2e-05, "loss": 0.06043893, "step": 11751 }, { "epoch": 23.504, "grad_norm": 1.5796104669570923, "learning_rate": 2e-05, "loss": 0.03296822, "step": 11752 }, { "epoch": 23.506, "grad_norm": 1.2062172889709473, "learning_rate": 2e-05, "loss": 0.03532603, "step": 11753 }, { "epoch": 23.508, "grad_norm": 2.296785831451416, "learning_rate": 2e-05, "loss": 0.06512903, "step": 11754 }, { "epoch": 23.51, "grad_norm": 1.458593487739563, "learning_rate": 2e-05, "loss": 0.06009974, "step": 11755 }, { "epoch": 23.512, "grad_norm": 1.1910109519958496, "learning_rate": 2e-05, "loss": 0.03358722, "step": 11756 }, { "epoch": 23.514, "grad_norm": 1.1627408266067505, "learning_rate": 2e-05, "loss": 0.03376558, "step": 11757 }, { "epoch": 23.516, "grad_norm": 2.2633957862854004, "learning_rate": 2e-05, "loss": 0.06547912, "step": 11758 }, { "epoch": 23.518, "grad_norm": 0.9992890357971191, "learning_rate": 2e-05, "loss": 0.02558643, "step": 11759 }, { "epoch": 23.52, "grad_norm": 2.1989448070526123, "learning_rate": 2e-05, "loss": 0.04395504, "step": 11760 }, { "epoch": 23.522, "grad_norm": 2.1435000896453857, "learning_rate": 2e-05, "loss": 0.04401811, "step": 11761 }, { "epoch": 23.524, "grad_norm": 1.109850287437439, "learning_rate": 2e-05, "loss": 0.02487316, "step": 11762 }, { "epoch": 23.526, "grad_norm": 2.0983777046203613, "learning_rate": 2e-05, "loss": 0.04313958, "step": 11763 }, { "epoch": 23.528, "grad_norm": 1.1474552154541016, "learning_rate": 2e-05, "loss": 0.04640581, "step": 11764 }, { "epoch": 23.53, "grad_norm": 2.8319504261016846, "learning_rate": 2e-05, "loss": 0.05273167, "step": 11765 }, { "epoch": 23.532, "grad_norm": 1.2033793926239014, "learning_rate": 2e-05, "loss": 0.0447002, "step": 11766 }, { "epoch": 23.534, "grad_norm": 0.9693114757537842, "learning_rate": 2e-05, "loss": 0.02837565, "step": 11767 }, { "epoch": 23.536, "grad_norm": 0.9325622916221619, "learning_rate": 2e-05, "loss": 0.03289568, "step": 11768 }, { "epoch": 23.538, "grad_norm": 1.8937511444091797, "learning_rate": 2e-05, "loss": 0.05601943, "step": 11769 }, { "epoch": 23.54, "grad_norm": 1.1438976526260376, "learning_rate": 2e-05, "loss": 0.04349836, "step": 11770 }, { "epoch": 23.542, "grad_norm": 1.7353458404541016, "learning_rate": 2e-05, "loss": 0.06614293, "step": 11771 }, { "epoch": 23.544, "grad_norm": 1.3022950887680054, "learning_rate": 2e-05, "loss": 0.03682633, "step": 11772 }, { "epoch": 23.546, "grad_norm": 2.2560153007507324, "learning_rate": 2e-05, "loss": 0.03180346, "step": 11773 }, { "epoch": 23.548000000000002, "grad_norm": 2.171947479248047, "learning_rate": 2e-05, "loss": 0.0739259, "step": 11774 }, { "epoch": 23.55, "grad_norm": 1.3080508708953857, "learning_rate": 2e-05, "loss": 0.0323229, "step": 11775 }, { "epoch": 23.552, "grad_norm": 1.3092063665390015, "learning_rate": 2e-05, "loss": 0.04459471, "step": 11776 }, { "epoch": 23.554, "grad_norm": 1.0528229475021362, "learning_rate": 2e-05, "loss": 0.02914536, "step": 11777 }, { "epoch": 23.556, "grad_norm": 1.155535340309143, "learning_rate": 2e-05, "loss": 0.03187998, "step": 11778 }, { "epoch": 23.558, "grad_norm": 1.5524402856826782, "learning_rate": 2e-05, "loss": 0.04663787, "step": 11779 }, { "epoch": 23.56, "grad_norm": 1.0907926559448242, "learning_rate": 2e-05, "loss": 0.04237938, "step": 11780 }, { "epoch": 23.562, "grad_norm": 1.2452819347381592, "learning_rate": 2e-05, "loss": 0.04205526, "step": 11781 }, { "epoch": 23.564, "grad_norm": 1.1769381761550903, "learning_rate": 2e-05, "loss": 0.03974335, "step": 11782 }, { "epoch": 23.566, "grad_norm": 1.1212342977523804, "learning_rate": 2e-05, "loss": 0.04549305, "step": 11783 }, { "epoch": 23.568, "grad_norm": 1.3117882013320923, "learning_rate": 2e-05, "loss": 0.03518543, "step": 11784 }, { "epoch": 23.57, "grad_norm": 2.5933187007904053, "learning_rate": 2e-05, "loss": 0.05680026, "step": 11785 }, { "epoch": 23.572, "grad_norm": 1.0752009153366089, "learning_rate": 2e-05, "loss": 0.03501371, "step": 11786 }, { "epoch": 23.574, "grad_norm": 1.2819617986679077, "learning_rate": 2e-05, "loss": 0.04427963, "step": 11787 }, { "epoch": 23.576, "grad_norm": 1.0761966705322266, "learning_rate": 2e-05, "loss": 0.03001852, "step": 11788 }, { "epoch": 23.578, "grad_norm": 1.9416499137878418, "learning_rate": 2e-05, "loss": 0.05700012, "step": 11789 }, { "epoch": 23.58, "grad_norm": 2.196974992752075, "learning_rate": 2e-05, "loss": 0.06811506, "step": 11790 }, { "epoch": 23.582, "grad_norm": 1.458816409111023, "learning_rate": 2e-05, "loss": 0.03885439, "step": 11791 }, { "epoch": 23.584, "grad_norm": 1.5310564041137695, "learning_rate": 2e-05, "loss": 0.04649789, "step": 11792 }, { "epoch": 23.586, "grad_norm": 1.3450838327407837, "learning_rate": 2e-05, "loss": 0.05772156, "step": 11793 }, { "epoch": 23.588, "grad_norm": 1.8807865381240845, "learning_rate": 2e-05, "loss": 0.04831642, "step": 11794 }, { "epoch": 23.59, "grad_norm": 1.4770256280899048, "learning_rate": 2e-05, "loss": 0.05228359, "step": 11795 }, { "epoch": 23.592, "grad_norm": 1.5434561967849731, "learning_rate": 2e-05, "loss": 0.04642072, "step": 11796 }, { "epoch": 23.594, "grad_norm": 1.5032665729522705, "learning_rate": 2e-05, "loss": 0.04318558, "step": 11797 }, { "epoch": 23.596, "grad_norm": 1.2868092060089111, "learning_rate": 2e-05, "loss": 0.05032202, "step": 11798 }, { "epoch": 23.598, "grad_norm": 2.0231192111968994, "learning_rate": 2e-05, "loss": 0.0559545, "step": 11799 }, { "epoch": 23.6, "grad_norm": 1.333901047706604, "learning_rate": 2e-05, "loss": 0.04533472, "step": 11800 }, { "epoch": 23.602, "grad_norm": 1.2813880443572998, "learning_rate": 2e-05, "loss": 0.04703806, "step": 11801 }, { "epoch": 23.604, "grad_norm": 2.0209999084472656, "learning_rate": 2e-05, "loss": 0.03075261, "step": 11802 }, { "epoch": 23.606, "grad_norm": 0.8158363103866577, "learning_rate": 2e-05, "loss": 0.02488266, "step": 11803 }, { "epoch": 23.608, "grad_norm": 1.2968827486038208, "learning_rate": 2e-05, "loss": 0.04825204, "step": 11804 }, { "epoch": 23.61, "grad_norm": 1.1694176197052002, "learning_rate": 2e-05, "loss": 0.04675373, "step": 11805 }, { "epoch": 23.612, "grad_norm": 2.4291164875030518, "learning_rate": 2e-05, "loss": 0.03782834, "step": 11806 }, { "epoch": 23.614, "grad_norm": 3.2317073345184326, "learning_rate": 2e-05, "loss": 0.05336016, "step": 11807 }, { "epoch": 23.616, "grad_norm": 2.4341442584991455, "learning_rate": 2e-05, "loss": 0.04534258, "step": 11808 }, { "epoch": 23.618, "grad_norm": 1.3405523300170898, "learning_rate": 2e-05, "loss": 0.03542194, "step": 11809 }, { "epoch": 23.62, "grad_norm": 0.952492356300354, "learning_rate": 2e-05, "loss": 0.02990831, "step": 11810 }, { "epoch": 23.622, "grad_norm": 1.8398486375808716, "learning_rate": 2e-05, "loss": 0.04314672, "step": 11811 }, { "epoch": 23.624, "grad_norm": 1.486546277999878, "learning_rate": 2e-05, "loss": 0.04819717, "step": 11812 }, { "epoch": 23.626, "grad_norm": 1.3383139371871948, "learning_rate": 2e-05, "loss": 0.03825889, "step": 11813 }, { "epoch": 23.628, "grad_norm": 1.57940673828125, "learning_rate": 2e-05, "loss": 0.03851825, "step": 11814 }, { "epoch": 23.63, "grad_norm": 1.3672587871551514, "learning_rate": 2e-05, "loss": 0.04236236, "step": 11815 }, { "epoch": 23.632, "grad_norm": 1.9322770833969116, "learning_rate": 2e-05, "loss": 0.04957227, "step": 11816 }, { "epoch": 23.634, "grad_norm": 2.318655014038086, "learning_rate": 2e-05, "loss": 0.05477686, "step": 11817 }, { "epoch": 23.636, "grad_norm": 1.3054866790771484, "learning_rate": 2e-05, "loss": 0.0492695, "step": 11818 }, { "epoch": 23.638, "grad_norm": 1.7329834699630737, "learning_rate": 2e-05, "loss": 0.03611884, "step": 11819 }, { "epoch": 23.64, "grad_norm": 1.0600239038467407, "learning_rate": 2e-05, "loss": 0.0398752, "step": 11820 }, { "epoch": 23.642, "grad_norm": 1.5419100522994995, "learning_rate": 2e-05, "loss": 0.05457453, "step": 11821 }, { "epoch": 23.644, "grad_norm": 1.1670844554901123, "learning_rate": 2e-05, "loss": 0.04318011, "step": 11822 }, { "epoch": 23.646, "grad_norm": 1.8038170337677002, "learning_rate": 2e-05, "loss": 0.05024581, "step": 11823 }, { "epoch": 23.648, "grad_norm": 1.4596983194351196, "learning_rate": 2e-05, "loss": 0.04731473, "step": 11824 }, { "epoch": 23.65, "grad_norm": 2.0060338973999023, "learning_rate": 2e-05, "loss": 0.03213666, "step": 11825 }, { "epoch": 23.652, "grad_norm": 2.789665699005127, "learning_rate": 2e-05, "loss": 0.05821307, "step": 11826 }, { "epoch": 23.654, "grad_norm": 3.190103054046631, "learning_rate": 2e-05, "loss": 0.04068895, "step": 11827 }, { "epoch": 23.656, "grad_norm": 1.2811437845230103, "learning_rate": 2e-05, "loss": 0.05212625, "step": 11828 }, { "epoch": 23.658, "grad_norm": 1.1203440427780151, "learning_rate": 2e-05, "loss": 0.04808802, "step": 11829 }, { "epoch": 23.66, "grad_norm": 2.3472464084625244, "learning_rate": 2e-05, "loss": 0.0419308, "step": 11830 }, { "epoch": 23.662, "grad_norm": 1.1911481618881226, "learning_rate": 2e-05, "loss": 0.05066958, "step": 11831 }, { "epoch": 23.664, "grad_norm": 1.5024276971817017, "learning_rate": 2e-05, "loss": 0.03530101, "step": 11832 }, { "epoch": 23.666, "grad_norm": 2.032147169113159, "learning_rate": 2e-05, "loss": 0.04991383, "step": 11833 }, { "epoch": 23.668, "grad_norm": 1.2729007005691528, "learning_rate": 2e-05, "loss": 0.04908589, "step": 11834 }, { "epoch": 23.67, "grad_norm": 7.958946228027344, "learning_rate": 2e-05, "loss": 0.06328648, "step": 11835 }, { "epoch": 23.672, "grad_norm": 1.1186507940292358, "learning_rate": 2e-05, "loss": 0.03300332, "step": 11836 }, { "epoch": 23.674, "grad_norm": 1.4105454683303833, "learning_rate": 2e-05, "loss": 0.04340991, "step": 11837 }, { "epoch": 23.676, "grad_norm": 1.504833698272705, "learning_rate": 2e-05, "loss": 0.05525521, "step": 11838 }, { "epoch": 23.678, "grad_norm": 1.2396059036254883, "learning_rate": 2e-05, "loss": 0.04817509, "step": 11839 }, { "epoch": 23.68, "grad_norm": 1.5307303667068481, "learning_rate": 2e-05, "loss": 0.02923251, "step": 11840 }, { "epoch": 23.682, "grad_norm": 1.6105175018310547, "learning_rate": 2e-05, "loss": 0.03141434, "step": 11841 }, { "epoch": 23.684, "grad_norm": 1.293770670890808, "learning_rate": 2e-05, "loss": 0.05276821, "step": 11842 }, { "epoch": 23.686, "grad_norm": 1.8566535711288452, "learning_rate": 2e-05, "loss": 0.04680733, "step": 11843 }, { "epoch": 23.688, "grad_norm": 1.2725869417190552, "learning_rate": 2e-05, "loss": 0.03304363, "step": 11844 }, { "epoch": 23.69, "grad_norm": 1.192263126373291, "learning_rate": 2e-05, "loss": 0.03634157, "step": 11845 }, { "epoch": 23.692, "grad_norm": 3.0075485706329346, "learning_rate": 2e-05, "loss": 0.04553998, "step": 11846 }, { "epoch": 23.694, "grad_norm": 2.9852254390716553, "learning_rate": 2e-05, "loss": 0.06526848, "step": 11847 }, { "epoch": 23.696, "grad_norm": 1.9898120164871216, "learning_rate": 2e-05, "loss": 0.03190294, "step": 11848 }, { "epoch": 23.698, "grad_norm": 1.2109845876693726, "learning_rate": 2e-05, "loss": 0.04095478, "step": 11849 }, { "epoch": 23.7, "grad_norm": 3.1448001861572266, "learning_rate": 2e-05, "loss": 0.0624724, "step": 11850 }, { "epoch": 23.701999999999998, "grad_norm": 1.063796043395996, "learning_rate": 2e-05, "loss": 0.04226255, "step": 11851 }, { "epoch": 23.704, "grad_norm": 1.0157510042190552, "learning_rate": 2e-05, "loss": 0.04364079, "step": 11852 }, { "epoch": 23.706, "grad_norm": 0.9760161638259888, "learning_rate": 2e-05, "loss": 0.02745278, "step": 11853 }, { "epoch": 23.708, "grad_norm": 1.4634634256362915, "learning_rate": 2e-05, "loss": 0.04308833, "step": 11854 }, { "epoch": 23.71, "grad_norm": 1.4211335182189941, "learning_rate": 2e-05, "loss": 0.03839317, "step": 11855 }, { "epoch": 23.712, "grad_norm": 1.0064549446105957, "learning_rate": 2e-05, "loss": 0.03243163, "step": 11856 }, { "epoch": 23.714, "grad_norm": 1.0889908075332642, "learning_rate": 2e-05, "loss": 0.03015575, "step": 11857 }, { "epoch": 23.716, "grad_norm": 1.499775767326355, "learning_rate": 2e-05, "loss": 0.04761718, "step": 11858 }, { "epoch": 23.718, "grad_norm": 1.0862691402435303, "learning_rate": 2e-05, "loss": 0.03753242, "step": 11859 }, { "epoch": 23.72, "grad_norm": 0.9295043349266052, "learning_rate": 2e-05, "loss": 0.02882018, "step": 11860 }, { "epoch": 23.722, "grad_norm": 1.0936144590377808, "learning_rate": 2e-05, "loss": 0.03536011, "step": 11861 }, { "epoch": 23.724, "grad_norm": 0.9443338513374329, "learning_rate": 2e-05, "loss": 0.03331787, "step": 11862 }, { "epoch": 23.726, "grad_norm": 1.0032241344451904, "learning_rate": 2e-05, "loss": 0.02793972, "step": 11863 }, { "epoch": 23.728, "grad_norm": 1.677073359489441, "learning_rate": 2e-05, "loss": 0.04015389, "step": 11864 }, { "epoch": 23.73, "grad_norm": 2.5019454956054688, "learning_rate": 2e-05, "loss": 0.04113224, "step": 11865 }, { "epoch": 23.732, "grad_norm": 1.2569458484649658, "learning_rate": 2e-05, "loss": 0.04976218, "step": 11866 }, { "epoch": 23.734, "grad_norm": 1.3353568315505981, "learning_rate": 2e-05, "loss": 0.0452405, "step": 11867 }, { "epoch": 23.736, "grad_norm": 1.0816160440444946, "learning_rate": 2e-05, "loss": 0.03934583, "step": 11868 }, { "epoch": 23.738, "grad_norm": 2.60617733001709, "learning_rate": 2e-05, "loss": 0.04733625, "step": 11869 }, { "epoch": 23.74, "grad_norm": 1.3264927864074707, "learning_rate": 2e-05, "loss": 0.04373094, "step": 11870 }, { "epoch": 23.742, "grad_norm": 1.8265389204025269, "learning_rate": 2e-05, "loss": 0.04801701, "step": 11871 }, { "epoch": 23.744, "grad_norm": 1.5918618440628052, "learning_rate": 2e-05, "loss": 0.03365224, "step": 11872 }, { "epoch": 23.746, "grad_norm": 1.1717815399169922, "learning_rate": 2e-05, "loss": 0.04041979, "step": 11873 }, { "epoch": 23.748, "grad_norm": 1.1238340139389038, "learning_rate": 2e-05, "loss": 0.03989327, "step": 11874 }, { "epoch": 23.75, "grad_norm": 3.7857089042663574, "learning_rate": 2e-05, "loss": 0.04945645, "step": 11875 }, { "epoch": 23.752, "grad_norm": 3.371209144592285, "learning_rate": 2e-05, "loss": 0.0476009, "step": 11876 }, { "epoch": 23.754, "grad_norm": 1.3550797700881958, "learning_rate": 2e-05, "loss": 0.04116515, "step": 11877 }, { "epoch": 23.756, "grad_norm": 1.1104875802993774, "learning_rate": 2e-05, "loss": 0.03957765, "step": 11878 }, { "epoch": 23.758, "grad_norm": 1.2116479873657227, "learning_rate": 2e-05, "loss": 0.03302076, "step": 11879 }, { "epoch": 23.76, "grad_norm": 1.565588355064392, "learning_rate": 2e-05, "loss": 0.05508092, "step": 11880 }, { "epoch": 23.762, "grad_norm": 6.225358486175537, "learning_rate": 2e-05, "loss": 0.0478447, "step": 11881 }, { "epoch": 23.764, "grad_norm": 1.688665747642517, "learning_rate": 2e-05, "loss": 0.0437132, "step": 11882 }, { "epoch": 23.766, "grad_norm": 1.2847251892089844, "learning_rate": 2e-05, "loss": 0.04984112, "step": 11883 }, { "epoch": 23.768, "grad_norm": 1.4837887287139893, "learning_rate": 2e-05, "loss": 0.03870174, "step": 11884 }, { "epoch": 23.77, "grad_norm": 1.2898259162902832, "learning_rate": 2e-05, "loss": 0.0534673, "step": 11885 }, { "epoch": 23.772, "grad_norm": 1.0326087474822998, "learning_rate": 2e-05, "loss": 0.0354575, "step": 11886 }, { "epoch": 23.774, "grad_norm": 1.8046265840530396, "learning_rate": 2e-05, "loss": 0.0476476, "step": 11887 }, { "epoch": 23.776, "grad_norm": 1.4603756666183472, "learning_rate": 2e-05, "loss": 0.04588185, "step": 11888 }, { "epoch": 23.778, "grad_norm": 1.1678380966186523, "learning_rate": 2e-05, "loss": 0.04390156, "step": 11889 }, { "epoch": 23.78, "grad_norm": 1.5542722940444946, "learning_rate": 2e-05, "loss": 0.04794715, "step": 11890 }, { "epoch": 23.782, "grad_norm": 1.2043498754501343, "learning_rate": 2e-05, "loss": 0.03155842, "step": 11891 }, { "epoch": 23.784, "grad_norm": 1.1357460021972656, "learning_rate": 2e-05, "loss": 0.04152321, "step": 11892 }, { "epoch": 23.786, "grad_norm": 1.1794549226760864, "learning_rate": 2e-05, "loss": 0.04586139, "step": 11893 }, { "epoch": 23.788, "grad_norm": 0.9895995855331421, "learning_rate": 2e-05, "loss": 0.0327672, "step": 11894 }, { "epoch": 23.79, "grad_norm": 1.7362169027328491, "learning_rate": 2e-05, "loss": 0.05948992, "step": 11895 }, { "epoch": 23.792, "grad_norm": 0.9421995282173157, "learning_rate": 2e-05, "loss": 0.02367941, "step": 11896 }, { "epoch": 23.794, "grad_norm": 2.629747152328491, "learning_rate": 2e-05, "loss": 0.04058916, "step": 11897 }, { "epoch": 23.796, "grad_norm": 1.0108635425567627, "learning_rate": 2e-05, "loss": 0.02985597, "step": 11898 }, { "epoch": 23.798000000000002, "grad_norm": 3.040156126022339, "learning_rate": 2e-05, "loss": 0.0340285, "step": 11899 }, { "epoch": 23.8, "grad_norm": 1.1744736433029175, "learning_rate": 2e-05, "loss": 0.04568225, "step": 11900 }, { "epoch": 23.802, "grad_norm": 2.0291764736175537, "learning_rate": 2e-05, "loss": 0.05318277, "step": 11901 }, { "epoch": 23.804, "grad_norm": 1.3046629428863525, "learning_rate": 2e-05, "loss": 0.05274621, "step": 11902 }, { "epoch": 23.806, "grad_norm": 2.1963841915130615, "learning_rate": 2e-05, "loss": 0.04720778, "step": 11903 }, { "epoch": 23.808, "grad_norm": 2.655916213989258, "learning_rate": 2e-05, "loss": 0.0420041, "step": 11904 }, { "epoch": 23.81, "grad_norm": 1.176939845085144, "learning_rate": 2e-05, "loss": 0.04580169, "step": 11905 }, { "epoch": 23.812, "grad_norm": 1.0048329830169678, "learning_rate": 2e-05, "loss": 0.03731166, "step": 11906 }, { "epoch": 23.814, "grad_norm": 1.414959192276001, "learning_rate": 2e-05, "loss": 0.06279035, "step": 11907 }, { "epoch": 23.816, "grad_norm": 1.7334407567977905, "learning_rate": 2e-05, "loss": 0.03839599, "step": 11908 }, { "epoch": 23.818, "grad_norm": 1.2090424299240112, "learning_rate": 2e-05, "loss": 0.03566598, "step": 11909 }, { "epoch": 23.82, "grad_norm": 1.0905237197875977, "learning_rate": 2e-05, "loss": 0.04220146, "step": 11910 }, { "epoch": 23.822, "grad_norm": 2.646314859390259, "learning_rate": 2e-05, "loss": 0.06015548, "step": 11911 }, { "epoch": 23.824, "grad_norm": 1.4104152917861938, "learning_rate": 2e-05, "loss": 0.03940243, "step": 11912 }, { "epoch": 23.826, "grad_norm": 2.7767820358276367, "learning_rate": 2e-05, "loss": 0.04243702, "step": 11913 }, { "epoch": 23.828, "grad_norm": 1.3988125324249268, "learning_rate": 2e-05, "loss": 0.03724855, "step": 11914 }, { "epoch": 23.83, "grad_norm": 1.0905022621154785, "learning_rate": 2e-05, "loss": 0.02765215, "step": 11915 }, { "epoch": 23.832, "grad_norm": 1.3085315227508545, "learning_rate": 2e-05, "loss": 0.02965268, "step": 11916 }, { "epoch": 23.834, "grad_norm": 2.0017828941345215, "learning_rate": 2e-05, "loss": 0.05234018, "step": 11917 }, { "epoch": 23.836, "grad_norm": 1.6834447383880615, "learning_rate": 2e-05, "loss": 0.04721446, "step": 11918 }, { "epoch": 23.838, "grad_norm": 1.4597904682159424, "learning_rate": 2e-05, "loss": 0.05151546, "step": 11919 }, { "epoch": 23.84, "grad_norm": 2.2075035572052, "learning_rate": 2e-05, "loss": 0.04451163, "step": 11920 }, { "epoch": 23.842, "grad_norm": 1.4260821342468262, "learning_rate": 2e-05, "loss": 0.03413969, "step": 11921 }, { "epoch": 23.844, "grad_norm": 1.2742419242858887, "learning_rate": 2e-05, "loss": 0.0354282, "step": 11922 }, { "epoch": 23.846, "grad_norm": 1.3900338411331177, "learning_rate": 2e-05, "loss": 0.03880811, "step": 11923 }, { "epoch": 23.848, "grad_norm": 1.640749216079712, "learning_rate": 2e-05, "loss": 0.03321562, "step": 11924 }, { "epoch": 23.85, "grad_norm": 1.2059239149093628, "learning_rate": 2e-05, "loss": 0.03351336, "step": 11925 }, { "epoch": 23.852, "grad_norm": 1.6385819911956787, "learning_rate": 2e-05, "loss": 0.03568233, "step": 11926 }, { "epoch": 23.854, "grad_norm": 1.6820650100708008, "learning_rate": 2e-05, "loss": 0.04155786, "step": 11927 }, { "epoch": 23.856, "grad_norm": 1.2074123620986938, "learning_rate": 2e-05, "loss": 0.04882422, "step": 11928 }, { "epoch": 23.858, "grad_norm": 1.130673885345459, "learning_rate": 2e-05, "loss": 0.03696358, "step": 11929 }, { "epoch": 23.86, "grad_norm": 1.832159161567688, "learning_rate": 2e-05, "loss": 0.02932361, "step": 11930 }, { "epoch": 23.862, "grad_norm": 1.9174375534057617, "learning_rate": 2e-05, "loss": 0.05652925, "step": 11931 }, { "epoch": 23.864, "grad_norm": 1.8890222311019897, "learning_rate": 2e-05, "loss": 0.03878344, "step": 11932 }, { "epoch": 23.866, "grad_norm": 1.596886157989502, "learning_rate": 2e-05, "loss": 0.047834, "step": 11933 }, { "epoch": 23.868, "grad_norm": 1.2127768993377686, "learning_rate": 2e-05, "loss": 0.04310586, "step": 11934 }, { "epoch": 23.87, "grad_norm": 1.5679130554199219, "learning_rate": 2e-05, "loss": 0.05463172, "step": 11935 }, { "epoch": 23.872, "grad_norm": 2.985722303390503, "learning_rate": 2e-05, "loss": 0.05015947, "step": 11936 }, { "epoch": 23.874, "grad_norm": 1.1504042148590088, "learning_rate": 2e-05, "loss": 0.04618395, "step": 11937 }, { "epoch": 23.876, "grad_norm": 1.0759702920913696, "learning_rate": 2e-05, "loss": 0.03113378, "step": 11938 }, { "epoch": 23.878, "grad_norm": 0.9902017116546631, "learning_rate": 2e-05, "loss": 0.03239877, "step": 11939 }, { "epoch": 23.88, "grad_norm": 2.066293954849243, "learning_rate": 2e-05, "loss": 0.0501477, "step": 11940 }, { "epoch": 23.882, "grad_norm": 1.0674852132797241, "learning_rate": 2e-05, "loss": 0.02988044, "step": 11941 }, { "epoch": 23.884, "grad_norm": 1.0986425876617432, "learning_rate": 2e-05, "loss": 0.03874698, "step": 11942 }, { "epoch": 23.886, "grad_norm": 2.511347770690918, "learning_rate": 2e-05, "loss": 0.04582223, "step": 11943 }, { "epoch": 23.888, "grad_norm": 1.30561101436615, "learning_rate": 2e-05, "loss": 0.04915758, "step": 11944 }, { "epoch": 23.89, "grad_norm": 1.0842196941375732, "learning_rate": 2e-05, "loss": 0.03920197, "step": 11945 }, { "epoch": 23.892, "grad_norm": 2.458435535430908, "learning_rate": 2e-05, "loss": 0.06143365, "step": 11946 }, { "epoch": 23.894, "grad_norm": 1.3508713245391846, "learning_rate": 2e-05, "loss": 0.04859567, "step": 11947 }, { "epoch": 23.896, "grad_norm": 1.2235263586044312, "learning_rate": 2e-05, "loss": 0.04123156, "step": 11948 }, { "epoch": 23.898, "grad_norm": 1.3440524339675903, "learning_rate": 2e-05, "loss": 0.05537596, "step": 11949 }, { "epoch": 23.9, "grad_norm": 1.618463158607483, "learning_rate": 2e-05, "loss": 0.05320452, "step": 11950 }, { "epoch": 23.902, "grad_norm": 1.1047896146774292, "learning_rate": 2e-05, "loss": 0.03042508, "step": 11951 }, { "epoch": 23.904, "grad_norm": 1.4379069805145264, "learning_rate": 2e-05, "loss": 0.03938862, "step": 11952 }, { "epoch": 23.906, "grad_norm": 1.2280552387237549, "learning_rate": 2e-05, "loss": 0.03531722, "step": 11953 }, { "epoch": 23.908, "grad_norm": 1.7554948329925537, "learning_rate": 2e-05, "loss": 0.0457234, "step": 11954 }, { "epoch": 23.91, "grad_norm": 1.4118913412094116, "learning_rate": 2e-05, "loss": 0.04296067, "step": 11955 }, { "epoch": 23.912, "grad_norm": 1.3764146566390991, "learning_rate": 2e-05, "loss": 0.04469619, "step": 11956 }, { "epoch": 23.914, "grad_norm": 1.2633867263793945, "learning_rate": 2e-05, "loss": 0.03698818, "step": 11957 }, { "epoch": 23.916, "grad_norm": 0.9480093717575073, "learning_rate": 2e-05, "loss": 0.02930729, "step": 11958 }, { "epoch": 23.918, "grad_norm": 1.1288198232650757, "learning_rate": 2e-05, "loss": 0.04000931, "step": 11959 }, { "epoch": 23.92, "grad_norm": 1.1547532081604004, "learning_rate": 2e-05, "loss": 0.04611331, "step": 11960 }, { "epoch": 23.922, "grad_norm": 1.3261470794677734, "learning_rate": 2e-05, "loss": 0.05121126, "step": 11961 }, { "epoch": 23.924, "grad_norm": 1.592149019241333, "learning_rate": 2e-05, "loss": 0.03919889, "step": 11962 }, { "epoch": 23.926, "grad_norm": 1.4399209022521973, "learning_rate": 2e-05, "loss": 0.0399067, "step": 11963 }, { "epoch": 23.928, "grad_norm": 2.2152695655822754, "learning_rate": 2e-05, "loss": 0.05575511, "step": 11964 }, { "epoch": 23.93, "grad_norm": 1.7007336616516113, "learning_rate": 2e-05, "loss": 0.04545318, "step": 11965 }, { "epoch": 23.932, "grad_norm": 1.149708867073059, "learning_rate": 2e-05, "loss": 0.04423167, "step": 11966 }, { "epoch": 23.934, "grad_norm": 1.4653066396713257, "learning_rate": 2e-05, "loss": 0.03144412, "step": 11967 }, { "epoch": 23.936, "grad_norm": 1.416612982749939, "learning_rate": 2e-05, "loss": 0.0496158, "step": 11968 }, { "epoch": 23.938, "grad_norm": 3.0709216594696045, "learning_rate": 2e-05, "loss": 0.03679985, "step": 11969 }, { "epoch": 23.94, "grad_norm": 1.1610190868377686, "learning_rate": 2e-05, "loss": 0.02437357, "step": 11970 }, { "epoch": 23.942, "grad_norm": 1.3058899641036987, "learning_rate": 2e-05, "loss": 0.03944077, "step": 11971 }, { "epoch": 23.944, "grad_norm": 1.279998779296875, "learning_rate": 2e-05, "loss": 0.03772661, "step": 11972 }, { "epoch": 23.946, "grad_norm": 3.4212002754211426, "learning_rate": 2e-05, "loss": 0.06490073, "step": 11973 }, { "epoch": 23.948, "grad_norm": 1.8969491720199585, "learning_rate": 2e-05, "loss": 0.05955943, "step": 11974 }, { "epoch": 23.95, "grad_norm": 1.3523305654525757, "learning_rate": 2e-05, "loss": 0.04351056, "step": 11975 }, { "epoch": 23.951999999999998, "grad_norm": 2.597954273223877, "learning_rate": 2e-05, "loss": 0.06529416, "step": 11976 }, { "epoch": 23.954, "grad_norm": 1.3287073373794556, "learning_rate": 2e-05, "loss": 0.02537181, "step": 11977 }, { "epoch": 23.956, "grad_norm": 1.4415262937545776, "learning_rate": 2e-05, "loss": 0.04575014, "step": 11978 }, { "epoch": 23.958, "grad_norm": 2.070765256881714, "learning_rate": 2e-05, "loss": 0.04959387, "step": 11979 }, { "epoch": 23.96, "grad_norm": 1.198920726776123, "learning_rate": 2e-05, "loss": 0.04554899, "step": 11980 }, { "epoch": 23.962, "grad_norm": 1.5052765607833862, "learning_rate": 2e-05, "loss": 0.0560311, "step": 11981 }, { "epoch": 23.964, "grad_norm": 1.083158016204834, "learning_rate": 2e-05, "loss": 0.03436514, "step": 11982 }, { "epoch": 23.966, "grad_norm": 1.2290107011795044, "learning_rate": 2e-05, "loss": 0.02736303, "step": 11983 }, { "epoch": 23.968, "grad_norm": 2.753286361694336, "learning_rate": 2e-05, "loss": 0.05253476, "step": 11984 }, { "epoch": 23.97, "grad_norm": 1.206758975982666, "learning_rate": 2e-05, "loss": 0.02033206, "step": 11985 }, { "epoch": 23.972, "grad_norm": 2.5052928924560547, "learning_rate": 2e-05, "loss": 0.05282067, "step": 11986 }, { "epoch": 23.974, "grad_norm": 1.4807002544403076, "learning_rate": 2e-05, "loss": 0.04118428, "step": 11987 }, { "epoch": 23.976, "grad_norm": 0.930181622505188, "learning_rate": 2e-05, "loss": 0.0332891, "step": 11988 }, { "epoch": 23.978, "grad_norm": 1.6639894247055054, "learning_rate": 2e-05, "loss": 0.04549947, "step": 11989 }, { "epoch": 23.98, "grad_norm": 1.3116426467895508, "learning_rate": 2e-05, "loss": 0.03784752, "step": 11990 }, { "epoch": 23.982, "grad_norm": 0.9380161166191101, "learning_rate": 2e-05, "loss": 0.02113626, "step": 11991 }, { "epoch": 23.984, "grad_norm": 1.9898308515548706, "learning_rate": 2e-05, "loss": 0.05268819, "step": 11992 }, { "epoch": 23.986, "grad_norm": 0.75873202085495, "learning_rate": 2e-05, "loss": 0.01814926, "step": 11993 }, { "epoch": 23.988, "grad_norm": 1.49936842918396, "learning_rate": 2e-05, "loss": 0.03649104, "step": 11994 }, { "epoch": 23.99, "grad_norm": 1.114337682723999, "learning_rate": 2e-05, "loss": 0.04498353, "step": 11995 }, { "epoch": 23.992, "grad_norm": 1.0039514303207397, "learning_rate": 2e-05, "loss": 0.03449545, "step": 11996 }, { "epoch": 23.994, "grad_norm": 0.9935739040374756, "learning_rate": 2e-05, "loss": 0.03035527, "step": 11997 }, { "epoch": 23.996, "grad_norm": 1.7863175868988037, "learning_rate": 2e-05, "loss": 0.044944, "step": 11998 }, { "epoch": 23.998, "grad_norm": 2.193424701690674, "learning_rate": 2e-05, "loss": 0.03365751, "step": 11999 }, { "epoch": 24.0, "grad_norm": 1.198980450630188, "learning_rate": 2e-05, "loss": 0.02690567, "step": 12000 }, { "epoch": 24.0, "eval_performance": { "AngleClassification_1": 0.978, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9700598802395209, "Equal_1": 0.996, "Equal_2": 0.9800399201596807, "Equal_3": 0.9800399201596807, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.992, "Perpendicular_1": 1.0, "Perpendicular_2": 0.988, "Perpendicular_3": 0.8697394789579158, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.986, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9940119760479041 }, "eval_runtime": 320.967, "eval_samples_per_second": 32.714, "eval_steps_per_second": 0.654, "step": 12000 }, { "epoch": 24.002, "grad_norm": 1.232427716255188, "learning_rate": 2e-05, "loss": 0.0302996, "step": 12001 }, { "epoch": 24.004, "grad_norm": 1.1565972566604614, "learning_rate": 2e-05, "loss": 0.0323018, "step": 12002 }, { "epoch": 24.006, "grad_norm": 2.3447890281677246, "learning_rate": 2e-05, "loss": 0.04620555, "step": 12003 }, { "epoch": 24.008, "grad_norm": 1.4546306133270264, "learning_rate": 2e-05, "loss": 0.04248685, "step": 12004 }, { "epoch": 24.01, "grad_norm": 2.750286340713501, "learning_rate": 2e-05, "loss": 0.05949475, "step": 12005 }, { "epoch": 24.012, "grad_norm": 2.553252935409546, "learning_rate": 2e-05, "loss": 0.05698572, "step": 12006 }, { "epoch": 24.014, "grad_norm": 1.3687764406204224, "learning_rate": 2e-05, "loss": 0.05525066, "step": 12007 }, { "epoch": 24.016, "grad_norm": 1.277368187904358, "learning_rate": 2e-05, "loss": 0.05651337, "step": 12008 }, { "epoch": 24.018, "grad_norm": 1.1808255910873413, "learning_rate": 2e-05, "loss": 0.04304896, "step": 12009 }, { "epoch": 24.02, "grad_norm": 1.2423754930496216, "learning_rate": 2e-05, "loss": 0.04889252, "step": 12010 }, { "epoch": 24.022, "grad_norm": 2.5050649642944336, "learning_rate": 2e-05, "loss": 0.05278215, "step": 12011 }, { "epoch": 24.024, "grad_norm": 2.068812131881714, "learning_rate": 2e-05, "loss": 0.04776769, "step": 12012 }, { "epoch": 24.026, "grad_norm": 1.2705119848251343, "learning_rate": 2e-05, "loss": 0.06349517, "step": 12013 }, { "epoch": 24.028, "grad_norm": 1.3783538341522217, "learning_rate": 2e-05, "loss": 0.04796961, "step": 12014 }, { "epoch": 24.03, "grad_norm": 1.4065525531768799, "learning_rate": 2e-05, "loss": 0.05514029, "step": 12015 }, { "epoch": 24.032, "grad_norm": 2.559398889541626, "learning_rate": 2e-05, "loss": 0.04297279, "step": 12016 }, { "epoch": 24.034, "grad_norm": 1.3823760747909546, "learning_rate": 2e-05, "loss": 0.05167028, "step": 12017 }, { "epoch": 24.036, "grad_norm": 0.9577856659889221, "learning_rate": 2e-05, "loss": 0.02910194, "step": 12018 }, { "epoch": 24.038, "grad_norm": 1.0815929174423218, "learning_rate": 2e-05, "loss": 0.03755962, "step": 12019 }, { "epoch": 24.04, "grad_norm": 1.190253734588623, "learning_rate": 2e-05, "loss": 0.03743257, "step": 12020 }, { "epoch": 24.042, "grad_norm": 2.5249247550964355, "learning_rate": 2e-05, "loss": 0.05092564, "step": 12021 }, { "epoch": 24.044, "grad_norm": 1.0962779521942139, "learning_rate": 2e-05, "loss": 0.04152154, "step": 12022 }, { "epoch": 24.046, "grad_norm": 1.5275741815567017, "learning_rate": 2e-05, "loss": 0.04184947, "step": 12023 }, { "epoch": 24.048, "grad_norm": 1.0383355617523193, "learning_rate": 2e-05, "loss": 0.03165521, "step": 12024 }, { "epoch": 24.05, "grad_norm": 1.5432151556015015, "learning_rate": 2e-05, "loss": 0.05171941, "step": 12025 }, { "epoch": 24.052, "grad_norm": 1.249215841293335, "learning_rate": 2e-05, "loss": 0.04950016, "step": 12026 }, { "epoch": 24.054, "grad_norm": 1.309938907623291, "learning_rate": 2e-05, "loss": 0.04960479, "step": 12027 }, { "epoch": 24.056, "grad_norm": 0.9974091053009033, "learning_rate": 2e-05, "loss": 0.03317707, "step": 12028 }, { "epoch": 24.058, "grad_norm": 1.5228265523910522, "learning_rate": 2e-05, "loss": 0.04856505, "step": 12029 }, { "epoch": 24.06, "grad_norm": 1.8886607885360718, "learning_rate": 2e-05, "loss": 0.05328999, "step": 12030 }, { "epoch": 24.062, "grad_norm": 1.02639639377594, "learning_rate": 2e-05, "loss": 0.02694909, "step": 12031 }, { "epoch": 24.064, "grad_norm": 1.0701245069503784, "learning_rate": 2e-05, "loss": 0.04255382, "step": 12032 }, { "epoch": 24.066, "grad_norm": 1.3096833229064941, "learning_rate": 2e-05, "loss": 0.04552224, "step": 12033 }, { "epoch": 24.068, "grad_norm": 1.0245039463043213, "learning_rate": 2e-05, "loss": 0.03121547, "step": 12034 }, { "epoch": 24.07, "grad_norm": 2.104153871536255, "learning_rate": 2e-05, "loss": 0.05049267, "step": 12035 }, { "epoch": 24.072, "grad_norm": 0.9794881343841553, "learning_rate": 2e-05, "loss": 0.04013614, "step": 12036 }, { "epoch": 24.074, "grad_norm": 1.3545254468917847, "learning_rate": 2e-05, "loss": 0.04678908, "step": 12037 }, { "epoch": 24.076, "grad_norm": 1.7231847047805786, "learning_rate": 2e-05, "loss": 0.05604774, "step": 12038 }, { "epoch": 24.078, "grad_norm": 1.0859684944152832, "learning_rate": 2e-05, "loss": 0.02874695, "step": 12039 }, { "epoch": 24.08, "grad_norm": 1.4772136211395264, "learning_rate": 2e-05, "loss": 0.04590698, "step": 12040 }, { "epoch": 24.082, "grad_norm": 1.1300466060638428, "learning_rate": 2e-05, "loss": 0.03713751, "step": 12041 }, { "epoch": 24.084, "grad_norm": 1.4452584981918335, "learning_rate": 2e-05, "loss": 0.04535814, "step": 12042 }, { "epoch": 24.086, "grad_norm": 1.703972578048706, "learning_rate": 2e-05, "loss": 0.05336364, "step": 12043 }, { "epoch": 24.088, "grad_norm": 1.5008318424224854, "learning_rate": 2e-05, "loss": 0.0530287, "step": 12044 }, { "epoch": 24.09, "grad_norm": 1.0992947816848755, "learning_rate": 2e-05, "loss": 0.05150533, "step": 12045 }, { "epoch": 24.092, "grad_norm": 1.1373698711395264, "learning_rate": 2e-05, "loss": 0.04210654, "step": 12046 }, { "epoch": 24.094, "grad_norm": 1.3362079858779907, "learning_rate": 2e-05, "loss": 0.04046137, "step": 12047 }, { "epoch": 24.096, "grad_norm": 2.110414743423462, "learning_rate": 2e-05, "loss": 0.05383376, "step": 12048 }, { "epoch": 24.098, "grad_norm": 2.22654128074646, "learning_rate": 2e-05, "loss": 0.04562001, "step": 12049 }, { "epoch": 24.1, "grad_norm": 1.5892013311386108, "learning_rate": 2e-05, "loss": 0.04545607, "step": 12050 }, { "epoch": 24.102, "grad_norm": 1.7397476434707642, "learning_rate": 2e-05, "loss": 0.05527458, "step": 12051 }, { "epoch": 24.104, "grad_norm": 1.3350776433944702, "learning_rate": 2e-05, "loss": 0.04359471, "step": 12052 }, { "epoch": 24.106, "grad_norm": 1.240904450416565, "learning_rate": 2e-05, "loss": 0.04825516, "step": 12053 }, { "epoch": 24.108, "grad_norm": 1.1014329195022583, "learning_rate": 2e-05, "loss": 0.0354472, "step": 12054 }, { "epoch": 24.11, "grad_norm": 1.3946998119354248, "learning_rate": 2e-05, "loss": 0.04494639, "step": 12055 }, { "epoch": 24.112, "grad_norm": 1.7110241651535034, "learning_rate": 2e-05, "loss": 0.05996033, "step": 12056 }, { "epoch": 24.114, "grad_norm": 1.3246296644210815, "learning_rate": 2e-05, "loss": 0.0507584, "step": 12057 }, { "epoch": 24.116, "grad_norm": 1.423532247543335, "learning_rate": 2e-05, "loss": 0.04155703, "step": 12058 }, { "epoch": 24.118, "grad_norm": 1.6252057552337646, "learning_rate": 2e-05, "loss": 0.05566594, "step": 12059 }, { "epoch": 24.12, "grad_norm": 1.978572964668274, "learning_rate": 2e-05, "loss": 0.06226615, "step": 12060 }, { "epoch": 24.122, "grad_norm": 1.053499460220337, "learning_rate": 2e-05, "loss": 0.03881479, "step": 12061 }, { "epoch": 24.124, "grad_norm": 1.3819252252578735, "learning_rate": 2e-05, "loss": 0.05036075, "step": 12062 }, { "epoch": 24.126, "grad_norm": 1.7073538303375244, "learning_rate": 2e-05, "loss": 0.05705138, "step": 12063 }, { "epoch": 24.128, "grad_norm": 1.606673002243042, "learning_rate": 2e-05, "loss": 0.04539119, "step": 12064 }, { "epoch": 24.13, "grad_norm": 1.0602710247039795, "learning_rate": 2e-05, "loss": 0.04797367, "step": 12065 }, { "epoch": 24.132, "grad_norm": 1.3240618705749512, "learning_rate": 2e-05, "loss": 0.05344753, "step": 12066 }, { "epoch": 24.134, "grad_norm": 1.652435541152954, "learning_rate": 2e-05, "loss": 0.03491877, "step": 12067 }, { "epoch": 24.136, "grad_norm": 2.6440341472625732, "learning_rate": 2e-05, "loss": 0.05096871, "step": 12068 }, { "epoch": 24.138, "grad_norm": 1.0971581935882568, "learning_rate": 2e-05, "loss": 0.04188203, "step": 12069 }, { "epoch": 24.14, "grad_norm": 0.9165200591087341, "learning_rate": 2e-05, "loss": 0.03046097, "step": 12070 }, { "epoch": 24.142, "grad_norm": 2.975809097290039, "learning_rate": 2e-05, "loss": 0.05606359, "step": 12071 }, { "epoch": 24.144, "grad_norm": 1.127575397491455, "learning_rate": 2e-05, "loss": 0.0311743, "step": 12072 }, { "epoch": 24.146, "grad_norm": 1.337193489074707, "learning_rate": 2e-05, "loss": 0.03485664, "step": 12073 }, { "epoch": 24.148, "grad_norm": 2.253185510635376, "learning_rate": 2e-05, "loss": 0.04143036, "step": 12074 }, { "epoch": 24.15, "grad_norm": 2.4968769550323486, "learning_rate": 2e-05, "loss": 0.0586257, "step": 12075 }, { "epoch": 24.152, "grad_norm": 1.3548872470855713, "learning_rate": 2e-05, "loss": 0.04275392, "step": 12076 }, { "epoch": 24.154, "grad_norm": 1.05968177318573, "learning_rate": 2e-05, "loss": 0.03807945, "step": 12077 }, { "epoch": 24.156, "grad_norm": 1.2706948518753052, "learning_rate": 2e-05, "loss": 0.05273522, "step": 12078 }, { "epoch": 24.158, "grad_norm": 1.2641117572784424, "learning_rate": 2e-05, "loss": 0.05271117, "step": 12079 }, { "epoch": 24.16, "grad_norm": 1.2767136096954346, "learning_rate": 2e-05, "loss": 0.03717405, "step": 12080 }, { "epoch": 24.162, "grad_norm": 1.8129770755767822, "learning_rate": 2e-05, "loss": 0.05442118, "step": 12081 }, { "epoch": 24.164, "grad_norm": 1.6097712516784668, "learning_rate": 2e-05, "loss": 0.02836596, "step": 12082 }, { "epoch": 24.166, "grad_norm": 1.6396865844726562, "learning_rate": 2e-05, "loss": 0.0487038, "step": 12083 }, { "epoch": 24.168, "grad_norm": 5.180088520050049, "learning_rate": 2e-05, "loss": 0.07057432, "step": 12084 }, { "epoch": 24.17, "grad_norm": 3.231645345687866, "learning_rate": 2e-05, "loss": 0.0679914, "step": 12085 }, { "epoch": 24.172, "grad_norm": 2.2453713417053223, "learning_rate": 2e-05, "loss": 0.0494958, "step": 12086 }, { "epoch": 24.174, "grad_norm": 1.019257664680481, "learning_rate": 2e-05, "loss": 0.03262502, "step": 12087 }, { "epoch": 24.176, "grad_norm": 1.8194106817245483, "learning_rate": 2e-05, "loss": 0.04817811, "step": 12088 }, { "epoch": 24.178, "grad_norm": 1.1825400590896606, "learning_rate": 2e-05, "loss": 0.04405992, "step": 12089 }, { "epoch": 24.18, "grad_norm": 0.9297054409980774, "learning_rate": 2e-05, "loss": 0.03029897, "step": 12090 }, { "epoch": 24.182, "grad_norm": 1.7777191400527954, "learning_rate": 2e-05, "loss": 0.04615546, "step": 12091 }, { "epoch": 24.184, "grad_norm": 3.235630750656128, "learning_rate": 2e-05, "loss": 0.05811172, "step": 12092 }, { "epoch": 24.186, "grad_norm": 1.1768110990524292, "learning_rate": 2e-05, "loss": 0.03787137, "step": 12093 }, { "epoch": 24.188, "grad_norm": 4.949007034301758, "learning_rate": 2e-05, "loss": 0.03465776, "step": 12094 }, { "epoch": 24.19, "grad_norm": 1.4985238313674927, "learning_rate": 2e-05, "loss": 0.04126935, "step": 12095 }, { "epoch": 24.192, "grad_norm": 2.3714089393615723, "learning_rate": 2e-05, "loss": 0.03277493, "step": 12096 }, { "epoch": 24.194, "grad_norm": 1.5087124109268188, "learning_rate": 2e-05, "loss": 0.05334678, "step": 12097 }, { "epoch": 24.196, "grad_norm": 1.2616825103759766, "learning_rate": 2e-05, "loss": 0.04551134, "step": 12098 }, { "epoch": 24.198, "grad_norm": 1.158451795578003, "learning_rate": 2e-05, "loss": 0.04630031, "step": 12099 }, { "epoch": 24.2, "grad_norm": 1.520171880722046, "learning_rate": 2e-05, "loss": 0.03448167, "step": 12100 }, { "epoch": 24.202, "grad_norm": 5.375433921813965, "learning_rate": 2e-05, "loss": 0.05557346, "step": 12101 }, { "epoch": 24.204, "grad_norm": 2.1798291206359863, "learning_rate": 2e-05, "loss": 0.05976859, "step": 12102 }, { "epoch": 24.206, "grad_norm": 1.5132516622543335, "learning_rate": 2e-05, "loss": 0.04973812, "step": 12103 }, { "epoch": 24.208, "grad_norm": 1.4551432132720947, "learning_rate": 2e-05, "loss": 0.03925546, "step": 12104 }, { "epoch": 24.21, "grad_norm": 1.4387096166610718, "learning_rate": 2e-05, "loss": 0.04110787, "step": 12105 }, { "epoch": 24.212, "grad_norm": 0.9817237854003906, "learning_rate": 2e-05, "loss": 0.043275, "step": 12106 }, { "epoch": 24.214, "grad_norm": 0.899992048740387, "learning_rate": 2e-05, "loss": 0.02760095, "step": 12107 }, { "epoch": 24.216, "grad_norm": 1.9981087446212769, "learning_rate": 2e-05, "loss": 0.08154525, "step": 12108 }, { "epoch": 24.218, "grad_norm": 1.6760551929473877, "learning_rate": 2e-05, "loss": 0.05679325, "step": 12109 }, { "epoch": 24.22, "grad_norm": 1.3508455753326416, "learning_rate": 2e-05, "loss": 0.05091976, "step": 12110 }, { "epoch": 24.222, "grad_norm": 1.8912901878356934, "learning_rate": 2e-05, "loss": 0.06227458, "step": 12111 }, { "epoch": 24.224, "grad_norm": 1.3273926973342896, "learning_rate": 2e-05, "loss": 0.05052402, "step": 12112 }, { "epoch": 24.226, "grad_norm": 1.2464828491210938, "learning_rate": 2e-05, "loss": 0.05788217, "step": 12113 }, { "epoch": 24.228, "grad_norm": 2.049316883087158, "learning_rate": 2e-05, "loss": 0.06217295, "step": 12114 }, { "epoch": 24.23, "grad_norm": 1.428179144859314, "learning_rate": 2e-05, "loss": 0.05017955, "step": 12115 }, { "epoch": 24.232, "grad_norm": 1.767197847366333, "learning_rate": 2e-05, "loss": 0.05077636, "step": 12116 }, { "epoch": 24.234, "grad_norm": 1.231481671333313, "learning_rate": 2e-05, "loss": 0.05873109, "step": 12117 }, { "epoch": 24.236, "grad_norm": 1.6732200384140015, "learning_rate": 2e-05, "loss": 0.04653981, "step": 12118 }, { "epoch": 24.238, "grad_norm": 1.0597010850906372, "learning_rate": 2e-05, "loss": 0.03581889, "step": 12119 }, { "epoch": 24.24, "grad_norm": 1.2121529579162598, "learning_rate": 2e-05, "loss": 0.04930365, "step": 12120 }, { "epoch": 24.242, "grad_norm": 1.3998339176177979, "learning_rate": 2e-05, "loss": 0.03907991, "step": 12121 }, { "epoch": 24.244, "grad_norm": 1.2382348775863647, "learning_rate": 2e-05, "loss": 0.05211905, "step": 12122 }, { "epoch": 24.246, "grad_norm": 1.1277509927749634, "learning_rate": 2e-05, "loss": 0.03834852, "step": 12123 }, { "epoch": 24.248, "grad_norm": 1.2743759155273438, "learning_rate": 2e-05, "loss": 0.0476411, "step": 12124 }, { "epoch": 24.25, "grad_norm": 1.2436480522155762, "learning_rate": 2e-05, "loss": 0.0411014, "step": 12125 }, { "epoch": 24.252, "grad_norm": 1.9588987827301025, "learning_rate": 2e-05, "loss": 0.06562616, "step": 12126 }, { "epoch": 24.254, "grad_norm": 1.318472146987915, "learning_rate": 2e-05, "loss": 0.04503114, "step": 12127 }, { "epoch": 24.256, "grad_norm": 1.3602678775787354, "learning_rate": 2e-05, "loss": 0.02978888, "step": 12128 }, { "epoch": 24.258, "grad_norm": 1.2801728248596191, "learning_rate": 2e-05, "loss": 0.03404569, "step": 12129 }, { "epoch": 24.26, "grad_norm": 1.8510991334915161, "learning_rate": 2e-05, "loss": 0.041315, "step": 12130 }, { "epoch": 24.262, "grad_norm": 1.1254820823669434, "learning_rate": 2e-05, "loss": 0.04042017, "step": 12131 }, { "epoch": 24.264, "grad_norm": 1.4146277904510498, "learning_rate": 2e-05, "loss": 0.04251551, "step": 12132 }, { "epoch": 24.266, "grad_norm": 1.4054226875305176, "learning_rate": 2e-05, "loss": 0.03718261, "step": 12133 }, { "epoch": 24.268, "grad_norm": 1.2703016996383667, "learning_rate": 2e-05, "loss": 0.0467224, "step": 12134 }, { "epoch": 24.27, "grad_norm": 1.6173110008239746, "learning_rate": 2e-05, "loss": 0.05684683, "step": 12135 }, { "epoch": 24.272, "grad_norm": 2.035446882247925, "learning_rate": 2e-05, "loss": 0.04400192, "step": 12136 }, { "epoch": 24.274, "grad_norm": 1.105391502380371, "learning_rate": 2e-05, "loss": 0.04434478, "step": 12137 }, { "epoch": 24.276, "grad_norm": 1.2330455780029297, "learning_rate": 2e-05, "loss": 0.05438062, "step": 12138 }, { "epoch": 24.278, "grad_norm": 1.1647205352783203, "learning_rate": 2e-05, "loss": 0.03314302, "step": 12139 }, { "epoch": 24.28, "grad_norm": 1.1791774034500122, "learning_rate": 2e-05, "loss": 0.05466758, "step": 12140 }, { "epoch": 24.282, "grad_norm": 1.4208977222442627, "learning_rate": 2e-05, "loss": 0.03269932, "step": 12141 }, { "epoch": 24.284, "grad_norm": 1.1843621730804443, "learning_rate": 2e-05, "loss": 0.04443068, "step": 12142 }, { "epoch": 24.286, "grad_norm": 1.522578239440918, "learning_rate": 2e-05, "loss": 0.06013615, "step": 12143 }, { "epoch": 24.288, "grad_norm": 1.4927306175231934, "learning_rate": 2e-05, "loss": 0.04813029, "step": 12144 }, { "epoch": 24.29, "grad_norm": 1.2117818593978882, "learning_rate": 2e-05, "loss": 0.03942649, "step": 12145 }, { "epoch": 24.292, "grad_norm": 1.0251680612564087, "learning_rate": 2e-05, "loss": 0.03021972, "step": 12146 }, { "epoch": 24.294, "grad_norm": 1.3790630102157593, "learning_rate": 2e-05, "loss": 0.05024281, "step": 12147 }, { "epoch": 24.296, "grad_norm": 1.5437334775924683, "learning_rate": 2e-05, "loss": 0.03962575, "step": 12148 }, { "epoch": 24.298, "grad_norm": 1.204317569732666, "learning_rate": 2e-05, "loss": 0.03893788, "step": 12149 }, { "epoch": 24.3, "grad_norm": 1.1474257707595825, "learning_rate": 2e-05, "loss": 0.04675932, "step": 12150 }, { "epoch": 24.302, "grad_norm": 0.9916406869888306, "learning_rate": 2e-05, "loss": 0.03100633, "step": 12151 }, { "epoch": 24.304, "grad_norm": 1.9832628965377808, "learning_rate": 2e-05, "loss": 0.05169064, "step": 12152 }, { "epoch": 24.306, "grad_norm": 1.811354398727417, "learning_rate": 2e-05, "loss": 0.05321472, "step": 12153 }, { "epoch": 24.308, "grad_norm": 1.277738094329834, "learning_rate": 2e-05, "loss": 0.04673781, "step": 12154 }, { "epoch": 24.31, "grad_norm": 1.3241530656814575, "learning_rate": 2e-05, "loss": 0.04513844, "step": 12155 }, { "epoch": 24.312, "grad_norm": 2.2016355991363525, "learning_rate": 2e-05, "loss": 0.03816313, "step": 12156 }, { "epoch": 24.314, "grad_norm": 1.972180962562561, "learning_rate": 2e-05, "loss": 0.04108272, "step": 12157 }, { "epoch": 24.316, "grad_norm": 1.1562265157699585, "learning_rate": 2e-05, "loss": 0.04686077, "step": 12158 }, { "epoch": 24.318, "grad_norm": 1.2996026277542114, "learning_rate": 2e-05, "loss": 0.04213938, "step": 12159 }, { "epoch": 24.32, "grad_norm": 2.028224468231201, "learning_rate": 2e-05, "loss": 0.05159696, "step": 12160 }, { "epoch": 24.322, "grad_norm": 1.2881364822387695, "learning_rate": 2e-05, "loss": 0.04292353, "step": 12161 }, { "epoch": 24.324, "grad_norm": 1.4620164632797241, "learning_rate": 2e-05, "loss": 0.04024596, "step": 12162 }, { "epoch": 24.326, "grad_norm": 1.6173001527786255, "learning_rate": 2e-05, "loss": 0.04233917, "step": 12163 }, { "epoch": 24.328, "grad_norm": 1.7425227165222168, "learning_rate": 2e-05, "loss": 0.03686478, "step": 12164 }, { "epoch": 24.33, "grad_norm": 1.112245798110962, "learning_rate": 2e-05, "loss": 0.04378101, "step": 12165 }, { "epoch": 24.332, "grad_norm": 2.1608293056488037, "learning_rate": 2e-05, "loss": 0.04491881, "step": 12166 }, { "epoch": 24.334, "grad_norm": 1.1910284757614136, "learning_rate": 2e-05, "loss": 0.03951531, "step": 12167 }, { "epoch": 24.336, "grad_norm": 1.9994529485702515, "learning_rate": 2e-05, "loss": 0.04223923, "step": 12168 }, { "epoch": 24.338, "grad_norm": 1.0348660945892334, "learning_rate": 2e-05, "loss": 0.03475592, "step": 12169 }, { "epoch": 24.34, "grad_norm": 3.0013294219970703, "learning_rate": 2e-05, "loss": 0.06690484, "step": 12170 }, { "epoch": 24.342, "grad_norm": 1.6031739711761475, "learning_rate": 2e-05, "loss": 0.04361062, "step": 12171 }, { "epoch": 24.344, "grad_norm": 2.190584182739258, "learning_rate": 2e-05, "loss": 0.04059751, "step": 12172 }, { "epoch": 24.346, "grad_norm": 1.5847196578979492, "learning_rate": 2e-05, "loss": 0.03874936, "step": 12173 }, { "epoch": 24.348, "grad_norm": 2.4007716178894043, "learning_rate": 2e-05, "loss": 0.04349686, "step": 12174 }, { "epoch": 24.35, "grad_norm": 3.5807485580444336, "learning_rate": 2e-05, "loss": 0.04612588, "step": 12175 }, { "epoch": 24.352, "grad_norm": 1.5115255117416382, "learning_rate": 2e-05, "loss": 0.05002192, "step": 12176 }, { "epoch": 24.354, "grad_norm": 1.3225903511047363, "learning_rate": 2e-05, "loss": 0.04832003, "step": 12177 }, { "epoch": 24.356, "grad_norm": 1.0971873998641968, "learning_rate": 2e-05, "loss": 0.038155, "step": 12178 }, { "epoch": 24.358, "grad_norm": 0.9766896367073059, "learning_rate": 2e-05, "loss": 0.0364832, "step": 12179 }, { "epoch": 24.36, "grad_norm": 2.5646822452545166, "learning_rate": 2e-05, "loss": 0.04737934, "step": 12180 }, { "epoch": 24.362, "grad_norm": 1.9678560495376587, "learning_rate": 2e-05, "loss": 0.03837503, "step": 12181 }, { "epoch": 24.364, "grad_norm": 1.550510048866272, "learning_rate": 2e-05, "loss": 0.05680725, "step": 12182 }, { "epoch": 24.366, "grad_norm": 1.2245664596557617, "learning_rate": 2e-05, "loss": 0.04190246, "step": 12183 }, { "epoch": 24.368, "grad_norm": 1.0684999227523804, "learning_rate": 2e-05, "loss": 0.04685017, "step": 12184 }, { "epoch": 24.37, "grad_norm": 1.253795862197876, "learning_rate": 2e-05, "loss": 0.05009492, "step": 12185 }, { "epoch": 24.372, "grad_norm": 1.1181055307388306, "learning_rate": 2e-05, "loss": 0.03659803, "step": 12186 }, { "epoch": 24.374, "grad_norm": 2.1159071922302246, "learning_rate": 2e-05, "loss": 0.04460203, "step": 12187 }, { "epoch": 24.376, "grad_norm": 1.5088142156600952, "learning_rate": 2e-05, "loss": 0.05556968, "step": 12188 }, { "epoch": 24.378, "grad_norm": 2.524197578430176, "learning_rate": 2e-05, "loss": 0.05749575, "step": 12189 }, { "epoch": 24.38, "grad_norm": 1.4844391345977783, "learning_rate": 2e-05, "loss": 0.04623716, "step": 12190 }, { "epoch": 24.382, "grad_norm": 1.9306976795196533, "learning_rate": 2e-05, "loss": 0.04222405, "step": 12191 }, { "epoch": 24.384, "grad_norm": 1.4491407871246338, "learning_rate": 2e-05, "loss": 0.03950711, "step": 12192 }, { "epoch": 24.386, "grad_norm": 3.8366308212280273, "learning_rate": 2e-05, "loss": 0.04016727, "step": 12193 }, { "epoch": 24.388, "grad_norm": 2.6945362091064453, "learning_rate": 2e-05, "loss": 0.05264809, "step": 12194 }, { "epoch": 24.39, "grad_norm": 1.028458833694458, "learning_rate": 2e-05, "loss": 0.02927229, "step": 12195 }, { "epoch": 24.392, "grad_norm": 1.1725099086761475, "learning_rate": 2e-05, "loss": 0.04898581, "step": 12196 }, { "epoch": 24.394, "grad_norm": 1.7470937967300415, "learning_rate": 2e-05, "loss": 0.03954092, "step": 12197 }, { "epoch": 24.396, "grad_norm": 2.421860456466675, "learning_rate": 2e-05, "loss": 0.04920001, "step": 12198 }, { "epoch": 24.398, "grad_norm": 1.3461695909500122, "learning_rate": 2e-05, "loss": 0.04510684, "step": 12199 }, { "epoch": 24.4, "grad_norm": 1.7747408151626587, "learning_rate": 2e-05, "loss": 0.05545141, "step": 12200 }, { "epoch": 24.402, "grad_norm": 1.768431544303894, "learning_rate": 2e-05, "loss": 0.03842364, "step": 12201 }, { "epoch": 24.404, "grad_norm": 1.4380724430084229, "learning_rate": 2e-05, "loss": 0.03090051, "step": 12202 }, { "epoch": 24.406, "grad_norm": 1.3252431154251099, "learning_rate": 2e-05, "loss": 0.04328402, "step": 12203 }, { "epoch": 24.408, "grad_norm": 1.5969432592391968, "learning_rate": 2e-05, "loss": 0.04301886, "step": 12204 }, { "epoch": 24.41, "grad_norm": 1.354119062423706, "learning_rate": 2e-05, "loss": 0.03353261, "step": 12205 }, { "epoch": 24.412, "grad_norm": 1.791136622428894, "learning_rate": 2e-05, "loss": 0.06121269, "step": 12206 }, { "epoch": 24.414, "grad_norm": 3.968132495880127, "learning_rate": 2e-05, "loss": 0.04851215, "step": 12207 }, { "epoch": 24.416, "grad_norm": 1.5197120904922485, "learning_rate": 2e-05, "loss": 0.04968302, "step": 12208 }, { "epoch": 24.418, "grad_norm": 2.272183418273926, "learning_rate": 2e-05, "loss": 0.04752434, "step": 12209 }, { "epoch": 24.42, "grad_norm": 2.935992479324341, "learning_rate": 2e-05, "loss": 0.05976545, "step": 12210 }, { "epoch": 24.422, "grad_norm": 1.0861903429031372, "learning_rate": 2e-05, "loss": 0.04118664, "step": 12211 }, { "epoch": 24.424, "grad_norm": 1.050560474395752, "learning_rate": 2e-05, "loss": 0.03079762, "step": 12212 }, { "epoch": 24.426, "grad_norm": 2.5487446784973145, "learning_rate": 2e-05, "loss": 0.04272437, "step": 12213 }, { "epoch": 24.428, "grad_norm": 1.3722039461135864, "learning_rate": 2e-05, "loss": 0.0525198, "step": 12214 }, { "epoch": 24.43, "grad_norm": 1.176689863204956, "learning_rate": 2e-05, "loss": 0.0387351, "step": 12215 }, { "epoch": 24.432, "grad_norm": 1.4874681234359741, "learning_rate": 2e-05, "loss": 0.05370562, "step": 12216 }, { "epoch": 24.434, "grad_norm": 1.266204595565796, "learning_rate": 2e-05, "loss": 0.04619331, "step": 12217 }, { "epoch": 24.436, "grad_norm": 1.2195080518722534, "learning_rate": 2e-05, "loss": 0.03758291, "step": 12218 }, { "epoch": 24.438, "grad_norm": 2.4579455852508545, "learning_rate": 2e-05, "loss": 0.04582806, "step": 12219 }, { "epoch": 24.44, "grad_norm": 1.3026827573776245, "learning_rate": 2e-05, "loss": 0.04520482, "step": 12220 }, { "epoch": 24.442, "grad_norm": 1.2211519479751587, "learning_rate": 2e-05, "loss": 0.0372187, "step": 12221 }, { "epoch": 24.444, "grad_norm": 1.1291041374206543, "learning_rate": 2e-05, "loss": 0.03202847, "step": 12222 }, { "epoch": 24.446, "grad_norm": 1.0536681413650513, "learning_rate": 2e-05, "loss": 0.03556249, "step": 12223 }, { "epoch": 24.448, "grad_norm": 1.5793570280075073, "learning_rate": 2e-05, "loss": 0.04041308, "step": 12224 }, { "epoch": 24.45, "grad_norm": 1.9870901107788086, "learning_rate": 2e-05, "loss": 0.04744115, "step": 12225 }, { "epoch": 24.452, "grad_norm": 1.1192922592163086, "learning_rate": 2e-05, "loss": 0.03771604, "step": 12226 }, { "epoch": 24.454, "grad_norm": 1.5271708965301514, "learning_rate": 2e-05, "loss": 0.04509822, "step": 12227 }, { "epoch": 24.456, "grad_norm": 1.09177565574646, "learning_rate": 2e-05, "loss": 0.03703531, "step": 12228 }, { "epoch": 24.458, "grad_norm": 1.141800880432129, "learning_rate": 2e-05, "loss": 0.03585979, "step": 12229 }, { "epoch": 24.46, "grad_norm": 0.9765757322311401, "learning_rate": 2e-05, "loss": 0.03288185, "step": 12230 }, { "epoch": 24.462, "grad_norm": 1.7331185340881348, "learning_rate": 2e-05, "loss": 0.06162242, "step": 12231 }, { "epoch": 24.464, "grad_norm": 1.0068011283874512, "learning_rate": 2e-05, "loss": 0.03150559, "step": 12232 }, { "epoch": 24.466, "grad_norm": 2.214324951171875, "learning_rate": 2e-05, "loss": 0.05624455, "step": 12233 }, { "epoch": 24.468, "grad_norm": 1.3637458086013794, "learning_rate": 2e-05, "loss": 0.04643622, "step": 12234 }, { "epoch": 24.47, "grad_norm": 1.8071335554122925, "learning_rate": 2e-05, "loss": 0.05237973, "step": 12235 }, { "epoch": 24.472, "grad_norm": 1.4858529567718506, "learning_rate": 2e-05, "loss": 0.04988906, "step": 12236 }, { "epoch": 24.474, "grad_norm": 2.615488052368164, "learning_rate": 2e-05, "loss": 0.05420715, "step": 12237 }, { "epoch": 24.476, "grad_norm": 1.3015847206115723, "learning_rate": 2e-05, "loss": 0.04124864, "step": 12238 }, { "epoch": 24.478, "grad_norm": 1.670394778251648, "learning_rate": 2e-05, "loss": 0.05579087, "step": 12239 }, { "epoch": 24.48, "grad_norm": 0.9430084228515625, "learning_rate": 2e-05, "loss": 0.03753918, "step": 12240 }, { "epoch": 24.482, "grad_norm": 1.3326176404953003, "learning_rate": 2e-05, "loss": 0.05490822, "step": 12241 }, { "epoch": 24.484, "grad_norm": 1.2883130311965942, "learning_rate": 2e-05, "loss": 0.0533482, "step": 12242 }, { "epoch": 24.486, "grad_norm": 1.2082029581069946, "learning_rate": 2e-05, "loss": 0.04799801, "step": 12243 }, { "epoch": 24.488, "grad_norm": 1.6594882011413574, "learning_rate": 2e-05, "loss": 0.04107316, "step": 12244 }, { "epoch": 24.49, "grad_norm": 1.1790004968643188, "learning_rate": 2e-05, "loss": 0.03800457, "step": 12245 }, { "epoch": 24.492, "grad_norm": 1.3230494260787964, "learning_rate": 2e-05, "loss": 0.05228403, "step": 12246 }, { "epoch": 24.494, "grad_norm": 2.5752084255218506, "learning_rate": 2e-05, "loss": 0.04225325, "step": 12247 }, { "epoch": 24.496, "grad_norm": 1.1051968336105347, "learning_rate": 2e-05, "loss": 0.03597275, "step": 12248 }, { "epoch": 24.498, "grad_norm": 1.0105959177017212, "learning_rate": 2e-05, "loss": 0.03709402, "step": 12249 }, { "epoch": 24.5, "grad_norm": 1.417772650718689, "learning_rate": 2e-05, "loss": 0.05351973, "step": 12250 }, { "epoch": 24.502, "grad_norm": 1.1224205493927002, "learning_rate": 2e-05, "loss": 0.03034319, "step": 12251 }, { "epoch": 24.504, "grad_norm": 1.2055881023406982, "learning_rate": 2e-05, "loss": 0.04971508, "step": 12252 }, { "epoch": 24.506, "grad_norm": 2.210259199142456, "learning_rate": 2e-05, "loss": 0.05114106, "step": 12253 }, { "epoch": 24.508, "grad_norm": 1.4939137697219849, "learning_rate": 2e-05, "loss": 0.05452298, "step": 12254 }, { "epoch": 24.51, "grad_norm": 0.9842581748962402, "learning_rate": 2e-05, "loss": 0.03696226, "step": 12255 }, { "epoch": 24.512, "grad_norm": 1.253783941268921, "learning_rate": 2e-05, "loss": 0.03539809, "step": 12256 }, { "epoch": 24.514, "grad_norm": 3.1072165966033936, "learning_rate": 2e-05, "loss": 0.05696538, "step": 12257 }, { "epoch": 24.516, "grad_norm": 1.1666399240493774, "learning_rate": 2e-05, "loss": 0.04510186, "step": 12258 }, { "epoch": 24.518, "grad_norm": 1.8516592979431152, "learning_rate": 2e-05, "loss": 0.06507455, "step": 12259 }, { "epoch": 24.52, "grad_norm": 2.5476531982421875, "learning_rate": 2e-05, "loss": 0.04386877, "step": 12260 }, { "epoch": 24.522, "grad_norm": 1.530977487564087, "learning_rate": 2e-05, "loss": 0.05524298, "step": 12261 }, { "epoch": 24.524, "grad_norm": 0.8967766761779785, "learning_rate": 2e-05, "loss": 0.02332542, "step": 12262 }, { "epoch": 24.526, "grad_norm": 1.7736743688583374, "learning_rate": 2e-05, "loss": 0.05415568, "step": 12263 }, { "epoch": 24.528, "grad_norm": 1.1440763473510742, "learning_rate": 2e-05, "loss": 0.0214408, "step": 12264 }, { "epoch": 24.53, "grad_norm": 1.4621922969818115, "learning_rate": 2e-05, "loss": 0.04975047, "step": 12265 }, { "epoch": 24.532, "grad_norm": 2.4430623054504395, "learning_rate": 2e-05, "loss": 0.0487452, "step": 12266 }, { "epoch": 24.534, "grad_norm": 1.67803955078125, "learning_rate": 2e-05, "loss": 0.046294, "step": 12267 }, { "epoch": 24.536, "grad_norm": 3.9234485626220703, "learning_rate": 2e-05, "loss": 0.05363451, "step": 12268 }, { "epoch": 24.538, "grad_norm": 1.0378974676132202, "learning_rate": 2e-05, "loss": 0.03485204, "step": 12269 }, { "epoch": 24.54, "grad_norm": 2.17879581451416, "learning_rate": 2e-05, "loss": 0.05576217, "step": 12270 }, { "epoch": 24.542, "grad_norm": 1.3864517211914062, "learning_rate": 2e-05, "loss": 0.0414337, "step": 12271 }, { "epoch": 24.544, "grad_norm": 1.147458553314209, "learning_rate": 2e-05, "loss": 0.03207621, "step": 12272 }, { "epoch": 24.546, "grad_norm": 1.8686847686767578, "learning_rate": 2e-05, "loss": 0.06861442, "step": 12273 }, { "epoch": 24.548000000000002, "grad_norm": 1.162184715270996, "learning_rate": 2e-05, "loss": 0.03643592, "step": 12274 }, { "epoch": 24.55, "grad_norm": 2.051259994506836, "learning_rate": 2e-05, "loss": 0.04812557, "step": 12275 }, { "epoch": 24.552, "grad_norm": 3.596480131149292, "learning_rate": 2e-05, "loss": 0.04335639, "step": 12276 }, { "epoch": 24.554, "grad_norm": 1.473477840423584, "learning_rate": 2e-05, "loss": 0.05357858, "step": 12277 }, { "epoch": 24.556, "grad_norm": 1.5587390661239624, "learning_rate": 2e-05, "loss": 0.04699494, "step": 12278 }, { "epoch": 24.558, "grad_norm": 1.1098556518554688, "learning_rate": 2e-05, "loss": 0.04609701, "step": 12279 }, { "epoch": 24.56, "grad_norm": 2.259882688522339, "learning_rate": 2e-05, "loss": 0.05510319, "step": 12280 }, { "epoch": 24.562, "grad_norm": 1.759789228439331, "learning_rate": 2e-05, "loss": 0.04140832, "step": 12281 }, { "epoch": 24.564, "grad_norm": 1.605728268623352, "learning_rate": 2e-05, "loss": 0.04575901, "step": 12282 }, { "epoch": 24.566, "grad_norm": 1.5841501951217651, "learning_rate": 2e-05, "loss": 0.06220622, "step": 12283 }, { "epoch": 24.568, "grad_norm": 1.8580799102783203, "learning_rate": 2e-05, "loss": 0.0317122, "step": 12284 }, { "epoch": 24.57, "grad_norm": 1.2249244451522827, "learning_rate": 2e-05, "loss": 0.04554923, "step": 12285 }, { "epoch": 24.572, "grad_norm": 1.4103236198425293, "learning_rate": 2e-05, "loss": 0.05672751, "step": 12286 }, { "epoch": 24.574, "grad_norm": 1.2100307941436768, "learning_rate": 2e-05, "loss": 0.04418559, "step": 12287 }, { "epoch": 24.576, "grad_norm": 1.7110328674316406, "learning_rate": 2e-05, "loss": 0.05983176, "step": 12288 }, { "epoch": 24.578, "grad_norm": 4.228270053863525, "learning_rate": 2e-05, "loss": 0.05921287, "step": 12289 }, { "epoch": 24.58, "grad_norm": 1.1426968574523926, "learning_rate": 2e-05, "loss": 0.04294404, "step": 12290 }, { "epoch": 24.582, "grad_norm": 2.854487419128418, "learning_rate": 2e-05, "loss": 0.04818879, "step": 12291 }, { "epoch": 24.584, "grad_norm": 1.464268445968628, "learning_rate": 2e-05, "loss": 0.05259855, "step": 12292 }, { "epoch": 24.586, "grad_norm": 1.6875348091125488, "learning_rate": 2e-05, "loss": 0.04386838, "step": 12293 }, { "epoch": 24.588, "grad_norm": 1.284960150718689, "learning_rate": 2e-05, "loss": 0.03831192, "step": 12294 }, { "epoch": 24.59, "grad_norm": 1.7952815294265747, "learning_rate": 2e-05, "loss": 0.05706906, "step": 12295 }, { "epoch": 24.592, "grad_norm": 1.6755213737487793, "learning_rate": 2e-05, "loss": 0.05718254, "step": 12296 }, { "epoch": 24.594, "grad_norm": 1.6490305662155151, "learning_rate": 2e-05, "loss": 0.04554712, "step": 12297 }, { "epoch": 24.596, "grad_norm": 1.4170808792114258, "learning_rate": 2e-05, "loss": 0.04948672, "step": 12298 }, { "epoch": 24.598, "grad_norm": 0.9543237686157227, "learning_rate": 2e-05, "loss": 0.03450447, "step": 12299 }, { "epoch": 24.6, "grad_norm": 1.3354976177215576, "learning_rate": 2e-05, "loss": 0.04121848, "step": 12300 }, { "epoch": 24.602, "grad_norm": 1.322101354598999, "learning_rate": 2e-05, "loss": 0.03823018, "step": 12301 }, { "epoch": 24.604, "grad_norm": 3.014385938644409, "learning_rate": 2e-05, "loss": 0.04420658, "step": 12302 }, { "epoch": 24.606, "grad_norm": 1.7275763750076294, "learning_rate": 2e-05, "loss": 0.05624251, "step": 12303 }, { "epoch": 24.608, "grad_norm": 1.091782569885254, "learning_rate": 2e-05, "loss": 0.0310542, "step": 12304 }, { "epoch": 24.61, "grad_norm": 1.6847692728042603, "learning_rate": 2e-05, "loss": 0.04774918, "step": 12305 }, { "epoch": 24.612, "grad_norm": 1.1882266998291016, "learning_rate": 2e-05, "loss": 0.04614477, "step": 12306 }, { "epoch": 24.614, "grad_norm": 2.3512465953826904, "learning_rate": 2e-05, "loss": 0.07600546, "step": 12307 }, { "epoch": 24.616, "grad_norm": 1.3159834146499634, "learning_rate": 2e-05, "loss": 0.05892436, "step": 12308 }, { "epoch": 24.618, "grad_norm": 1.2765941619873047, "learning_rate": 2e-05, "loss": 0.03476145, "step": 12309 }, { "epoch": 24.62, "grad_norm": 1.3784600496292114, "learning_rate": 2e-05, "loss": 0.03707298, "step": 12310 }, { "epoch": 24.622, "grad_norm": 1.248801350593567, "learning_rate": 2e-05, "loss": 0.04117713, "step": 12311 }, { "epoch": 24.624, "grad_norm": 1.257441759109497, "learning_rate": 2e-05, "loss": 0.03224612, "step": 12312 }, { "epoch": 24.626, "grad_norm": 1.3691023588180542, "learning_rate": 2e-05, "loss": 0.04922335, "step": 12313 }, { "epoch": 24.628, "grad_norm": 1.3157551288604736, "learning_rate": 2e-05, "loss": 0.03670745, "step": 12314 }, { "epoch": 24.63, "grad_norm": 1.2883113622665405, "learning_rate": 2e-05, "loss": 0.03211619, "step": 12315 }, { "epoch": 24.632, "grad_norm": 1.0646746158599854, "learning_rate": 2e-05, "loss": 0.03154656, "step": 12316 }, { "epoch": 24.634, "grad_norm": 2.252239227294922, "learning_rate": 2e-05, "loss": 0.0534423, "step": 12317 }, { "epoch": 24.636, "grad_norm": 1.5512782335281372, "learning_rate": 2e-05, "loss": 0.05046772, "step": 12318 }, { "epoch": 24.638, "grad_norm": 1.1680291891098022, "learning_rate": 2e-05, "loss": 0.04530792, "step": 12319 }, { "epoch": 24.64, "grad_norm": 1.2998766899108887, "learning_rate": 2e-05, "loss": 0.0484796, "step": 12320 }, { "epoch": 24.642, "grad_norm": 2.491387128829956, "learning_rate": 2e-05, "loss": 0.04955789, "step": 12321 }, { "epoch": 24.644, "grad_norm": 2.7813923358917236, "learning_rate": 2e-05, "loss": 0.05465161, "step": 12322 }, { "epoch": 24.646, "grad_norm": 3.847205400466919, "learning_rate": 2e-05, "loss": 0.07493284, "step": 12323 }, { "epoch": 24.648, "grad_norm": 1.364022135734558, "learning_rate": 2e-05, "loss": 0.03194257, "step": 12324 }, { "epoch": 24.65, "grad_norm": 1.3578834533691406, "learning_rate": 2e-05, "loss": 0.05537183, "step": 12325 }, { "epoch": 24.652, "grad_norm": 1.1270604133605957, "learning_rate": 2e-05, "loss": 0.04368369, "step": 12326 }, { "epoch": 24.654, "grad_norm": 2.827782154083252, "learning_rate": 2e-05, "loss": 0.05406433, "step": 12327 }, { "epoch": 24.656, "grad_norm": 1.6303904056549072, "learning_rate": 2e-05, "loss": 0.05844283, "step": 12328 }, { "epoch": 24.658, "grad_norm": 1.267575740814209, "learning_rate": 2e-05, "loss": 0.04553982, "step": 12329 }, { "epoch": 24.66, "grad_norm": 2.1354129314422607, "learning_rate": 2e-05, "loss": 0.05353948, "step": 12330 }, { "epoch": 24.662, "grad_norm": 3.700606346130371, "learning_rate": 2e-05, "loss": 0.03866877, "step": 12331 }, { "epoch": 24.664, "grad_norm": 3.4093546867370605, "learning_rate": 2e-05, "loss": 0.03720096, "step": 12332 }, { "epoch": 24.666, "grad_norm": 1.756603479385376, "learning_rate": 2e-05, "loss": 0.04626716, "step": 12333 }, { "epoch": 24.668, "grad_norm": 2.8935391902923584, "learning_rate": 2e-05, "loss": 0.04296909, "step": 12334 }, { "epoch": 24.67, "grad_norm": 2.631392002105713, "learning_rate": 2e-05, "loss": 0.05087268, "step": 12335 }, { "epoch": 24.672, "grad_norm": 1.9597891569137573, "learning_rate": 2e-05, "loss": 0.04416256, "step": 12336 }, { "epoch": 24.674, "grad_norm": 1.3087323904037476, "learning_rate": 2e-05, "loss": 0.05481458, "step": 12337 }, { "epoch": 24.676, "grad_norm": 172.79251098632812, "learning_rate": 2e-05, "loss": 0.04823511, "step": 12338 }, { "epoch": 24.678, "grad_norm": 1.6188311576843262, "learning_rate": 2e-05, "loss": 0.05058477, "step": 12339 }, { "epoch": 24.68, "grad_norm": 1.4070014953613281, "learning_rate": 2e-05, "loss": 0.05267171, "step": 12340 }, { "epoch": 24.682, "grad_norm": 1.056060552597046, "learning_rate": 2e-05, "loss": 0.04122014, "step": 12341 }, { "epoch": 24.684, "grad_norm": 1.0840667486190796, "learning_rate": 2e-05, "loss": 0.03836139, "step": 12342 }, { "epoch": 24.686, "grad_norm": 1.6224464178085327, "learning_rate": 2e-05, "loss": 0.03195419, "step": 12343 }, { "epoch": 24.688, "grad_norm": 1.4936470985412598, "learning_rate": 2e-05, "loss": 0.07893459, "step": 12344 }, { "epoch": 24.69, "grad_norm": 1.8073782920837402, "learning_rate": 2e-05, "loss": 0.05727108, "step": 12345 }, { "epoch": 24.692, "grad_norm": 1.218421220779419, "learning_rate": 2e-05, "loss": 0.046979, "step": 12346 }, { "epoch": 24.694, "grad_norm": 1.725235939025879, "learning_rate": 2e-05, "loss": 0.03699161, "step": 12347 }, { "epoch": 24.696, "grad_norm": 2.4789557456970215, "learning_rate": 2e-05, "loss": 0.04929878, "step": 12348 }, { "epoch": 24.698, "grad_norm": 3.441601276397705, "learning_rate": 2e-05, "loss": 0.0512137, "step": 12349 }, { "epoch": 24.7, "grad_norm": 1.1820622682571411, "learning_rate": 2e-05, "loss": 0.0380182, "step": 12350 }, { "epoch": 24.701999999999998, "grad_norm": 1.0114235877990723, "learning_rate": 2e-05, "loss": 0.03822229, "step": 12351 }, { "epoch": 24.704, "grad_norm": 1.1000703573226929, "learning_rate": 2e-05, "loss": 0.03765395, "step": 12352 }, { "epoch": 24.706, "grad_norm": 1.2947152853012085, "learning_rate": 2e-05, "loss": 0.03386806, "step": 12353 }, { "epoch": 24.708, "grad_norm": 1.8047212362289429, "learning_rate": 2e-05, "loss": 0.05379967, "step": 12354 }, { "epoch": 24.71, "grad_norm": 2.945786476135254, "learning_rate": 2e-05, "loss": 0.05634, "step": 12355 }, { "epoch": 24.712, "grad_norm": 2.3145387172698975, "learning_rate": 2e-05, "loss": 0.06896135, "step": 12356 }, { "epoch": 24.714, "grad_norm": 1.7058087587356567, "learning_rate": 2e-05, "loss": 0.04002954, "step": 12357 }, { "epoch": 24.716, "grad_norm": 1.0332450866699219, "learning_rate": 2e-05, "loss": 0.04152671, "step": 12358 }, { "epoch": 24.718, "grad_norm": 1.0253223180770874, "learning_rate": 2e-05, "loss": 0.05022538, "step": 12359 }, { "epoch": 24.72, "grad_norm": 1.4460958242416382, "learning_rate": 2e-05, "loss": 0.04726547, "step": 12360 }, { "epoch": 24.722, "grad_norm": 2.94338321685791, "learning_rate": 2e-05, "loss": 0.04982202, "step": 12361 }, { "epoch": 24.724, "grad_norm": 1.2132338285446167, "learning_rate": 2e-05, "loss": 0.0424381, "step": 12362 }, { "epoch": 24.726, "grad_norm": 1.2509801387786865, "learning_rate": 2e-05, "loss": 0.03625492, "step": 12363 }, { "epoch": 24.728, "grad_norm": 1.001670241355896, "learning_rate": 2e-05, "loss": 0.03844631, "step": 12364 }, { "epoch": 24.73, "grad_norm": 1.2094303369522095, "learning_rate": 2e-05, "loss": 0.05520956, "step": 12365 }, { "epoch": 24.732, "grad_norm": 1.0332337617874146, "learning_rate": 2e-05, "loss": 0.03330713, "step": 12366 }, { "epoch": 24.734, "grad_norm": 1.8160288333892822, "learning_rate": 2e-05, "loss": 0.06366383, "step": 12367 }, { "epoch": 24.736, "grad_norm": 1.0962200164794922, "learning_rate": 2e-05, "loss": 0.03358031, "step": 12368 }, { "epoch": 24.738, "grad_norm": 1.2256057262420654, "learning_rate": 2e-05, "loss": 0.04108488, "step": 12369 }, { "epoch": 24.74, "grad_norm": 2.2090930938720703, "learning_rate": 2e-05, "loss": 0.06522129, "step": 12370 }, { "epoch": 24.742, "grad_norm": 1.1580830812454224, "learning_rate": 2e-05, "loss": 0.03839488, "step": 12371 }, { "epoch": 24.744, "grad_norm": 1.1396980285644531, "learning_rate": 2e-05, "loss": 0.04421246, "step": 12372 }, { "epoch": 24.746, "grad_norm": 1.0483407974243164, "learning_rate": 2e-05, "loss": 0.03586071, "step": 12373 }, { "epoch": 24.748, "grad_norm": 1.1575627326965332, "learning_rate": 2e-05, "loss": 0.02652693, "step": 12374 }, { "epoch": 24.75, "grad_norm": 1.2578696012496948, "learning_rate": 2e-05, "loss": 0.04251358, "step": 12375 }, { "epoch": 24.752, "grad_norm": 1.2931718826293945, "learning_rate": 2e-05, "loss": 0.04332025, "step": 12376 }, { "epoch": 24.754, "grad_norm": 1.2202832698822021, "learning_rate": 2e-05, "loss": 0.0368543, "step": 12377 }, { "epoch": 24.756, "grad_norm": 2.3942205905914307, "learning_rate": 2e-05, "loss": 0.05159555, "step": 12378 }, { "epoch": 24.758, "grad_norm": 1.4237806797027588, "learning_rate": 2e-05, "loss": 0.04190379, "step": 12379 }, { "epoch": 24.76, "grad_norm": 2.9148285388946533, "learning_rate": 2e-05, "loss": 0.0515301, "step": 12380 }, { "epoch": 24.762, "grad_norm": 1.355830192565918, "learning_rate": 2e-05, "loss": 0.04525499, "step": 12381 }, { "epoch": 24.764, "grad_norm": 1.7303012609481812, "learning_rate": 2e-05, "loss": 0.05111047, "step": 12382 }, { "epoch": 24.766, "grad_norm": 1.722062110900879, "learning_rate": 2e-05, "loss": 0.05258113, "step": 12383 }, { "epoch": 24.768, "grad_norm": 0.8899957537651062, "learning_rate": 2e-05, "loss": 0.02432646, "step": 12384 }, { "epoch": 24.77, "grad_norm": 1.543811559677124, "learning_rate": 2e-05, "loss": 0.04550616, "step": 12385 }, { "epoch": 24.772, "grad_norm": 1.6278526782989502, "learning_rate": 2e-05, "loss": 0.05577083, "step": 12386 }, { "epoch": 24.774, "grad_norm": 1.1185393333435059, "learning_rate": 2e-05, "loss": 0.0319613, "step": 12387 }, { "epoch": 24.776, "grad_norm": 1.4521170854568481, "learning_rate": 2e-05, "loss": 0.05207948, "step": 12388 }, { "epoch": 24.778, "grad_norm": 1.3518644571304321, "learning_rate": 2e-05, "loss": 0.04421254, "step": 12389 }, { "epoch": 24.78, "grad_norm": 1.6394786834716797, "learning_rate": 2e-05, "loss": 0.05696079, "step": 12390 }, { "epoch": 24.782, "grad_norm": 1.0339347124099731, "learning_rate": 2e-05, "loss": 0.03561187, "step": 12391 }, { "epoch": 24.784, "grad_norm": 2.129795789718628, "learning_rate": 2e-05, "loss": 0.04448505, "step": 12392 }, { "epoch": 24.786, "grad_norm": 1.024999976158142, "learning_rate": 2e-05, "loss": 0.05013253, "step": 12393 }, { "epoch": 24.788, "grad_norm": 1.413780927658081, "learning_rate": 2e-05, "loss": 0.03977506, "step": 12394 }, { "epoch": 24.79, "grad_norm": 1.4434248208999634, "learning_rate": 2e-05, "loss": 0.05382534, "step": 12395 }, { "epoch": 24.792, "grad_norm": 1.4509685039520264, "learning_rate": 2e-05, "loss": 0.04992918, "step": 12396 }, { "epoch": 24.794, "grad_norm": 1.47963547706604, "learning_rate": 2e-05, "loss": 0.0391894, "step": 12397 }, { "epoch": 24.796, "grad_norm": 1.1163206100463867, "learning_rate": 2e-05, "loss": 0.03056799, "step": 12398 }, { "epoch": 24.798000000000002, "grad_norm": 1.147055745124817, "learning_rate": 2e-05, "loss": 0.03964867, "step": 12399 }, { "epoch": 24.8, "grad_norm": 0.9283363819122314, "learning_rate": 2e-05, "loss": 0.02905277, "step": 12400 }, { "epoch": 24.802, "grad_norm": 0.967431902885437, "learning_rate": 2e-05, "loss": 0.03618809, "step": 12401 }, { "epoch": 24.804, "grad_norm": 1.2811535596847534, "learning_rate": 2e-05, "loss": 0.04123492, "step": 12402 }, { "epoch": 24.806, "grad_norm": 0.7825071215629578, "learning_rate": 2e-05, "loss": 0.02419277, "step": 12403 }, { "epoch": 24.808, "grad_norm": 3.262420654296875, "learning_rate": 2e-05, "loss": 0.04532979, "step": 12404 }, { "epoch": 24.81, "grad_norm": 3.3228957653045654, "learning_rate": 2e-05, "loss": 0.04625534, "step": 12405 }, { "epoch": 24.812, "grad_norm": 2.1002676486968994, "learning_rate": 2e-05, "loss": 0.04704674, "step": 12406 }, { "epoch": 24.814, "grad_norm": 2.0813817977905273, "learning_rate": 2e-05, "loss": 0.06468686, "step": 12407 }, { "epoch": 24.816, "grad_norm": 1.4478918313980103, "learning_rate": 2e-05, "loss": 0.04282294, "step": 12408 }, { "epoch": 24.818, "grad_norm": 1.2476028203964233, "learning_rate": 2e-05, "loss": 0.04784694, "step": 12409 }, { "epoch": 24.82, "grad_norm": 1.4642516374588013, "learning_rate": 2e-05, "loss": 0.05254901, "step": 12410 }, { "epoch": 24.822, "grad_norm": 0.8719230890274048, "learning_rate": 2e-05, "loss": 0.0320285, "step": 12411 }, { "epoch": 24.824, "grad_norm": 3.0026919841766357, "learning_rate": 2e-05, "loss": 0.05441289, "step": 12412 }, { "epoch": 24.826, "grad_norm": 3.024451732635498, "learning_rate": 2e-05, "loss": 0.04576012, "step": 12413 }, { "epoch": 24.828, "grad_norm": 1.2607303857803345, "learning_rate": 2e-05, "loss": 0.05559196, "step": 12414 }, { "epoch": 24.83, "grad_norm": 1.9243049621582031, "learning_rate": 2e-05, "loss": 0.04724313, "step": 12415 }, { "epoch": 24.832, "grad_norm": 1.1702837944030762, "learning_rate": 2e-05, "loss": 0.04239753, "step": 12416 }, { "epoch": 24.834, "grad_norm": 1.2738158702850342, "learning_rate": 2e-05, "loss": 0.04667337, "step": 12417 }, { "epoch": 24.836, "grad_norm": 1.4549773931503296, "learning_rate": 2e-05, "loss": 0.03520815, "step": 12418 }, { "epoch": 24.838, "grad_norm": 1.89911687374115, "learning_rate": 2e-05, "loss": 0.05533872, "step": 12419 }, { "epoch": 24.84, "grad_norm": 1.3373122215270996, "learning_rate": 2e-05, "loss": 0.04861745, "step": 12420 }, { "epoch": 24.842, "grad_norm": 1.1704626083374023, "learning_rate": 2e-05, "loss": 0.04655242, "step": 12421 }, { "epoch": 24.844, "grad_norm": 1.7334611415863037, "learning_rate": 2e-05, "loss": 0.04279828, "step": 12422 }, { "epoch": 24.846, "grad_norm": 1.5228956937789917, "learning_rate": 2e-05, "loss": 0.06574546, "step": 12423 }, { "epoch": 24.848, "grad_norm": 1.5824440717697144, "learning_rate": 2e-05, "loss": 0.03519925, "step": 12424 }, { "epoch": 24.85, "grad_norm": 1.4289437532424927, "learning_rate": 2e-05, "loss": 0.03780733, "step": 12425 }, { "epoch": 24.852, "grad_norm": 1.498123288154602, "learning_rate": 2e-05, "loss": 0.03797191, "step": 12426 }, { "epoch": 24.854, "grad_norm": 1.006430745124817, "learning_rate": 2e-05, "loss": 0.03351758, "step": 12427 }, { "epoch": 24.856, "grad_norm": 1.0861419439315796, "learning_rate": 2e-05, "loss": 0.0464669, "step": 12428 }, { "epoch": 24.858, "grad_norm": 1.2548224925994873, "learning_rate": 2e-05, "loss": 0.02788034, "step": 12429 }, { "epoch": 24.86, "grad_norm": 1.3924577236175537, "learning_rate": 2e-05, "loss": 0.02896036, "step": 12430 }, { "epoch": 24.862, "grad_norm": 2.9513254165649414, "learning_rate": 2e-05, "loss": 0.05202112, "step": 12431 }, { "epoch": 24.864, "grad_norm": 1.3894107341766357, "learning_rate": 2e-05, "loss": 0.05267946, "step": 12432 }, { "epoch": 24.866, "grad_norm": 1.1392052173614502, "learning_rate": 2e-05, "loss": 0.04014817, "step": 12433 }, { "epoch": 24.868, "grad_norm": 1.3228307962417603, "learning_rate": 2e-05, "loss": 0.03538731, "step": 12434 }, { "epoch": 24.87, "grad_norm": 1.11819589138031, "learning_rate": 2e-05, "loss": 0.03386147, "step": 12435 }, { "epoch": 24.872, "grad_norm": 1.2249038219451904, "learning_rate": 2e-05, "loss": 0.0489039, "step": 12436 }, { "epoch": 24.874, "grad_norm": 1.4754010438919067, "learning_rate": 2e-05, "loss": 0.04698762, "step": 12437 }, { "epoch": 24.876, "grad_norm": 3.302642583847046, "learning_rate": 2e-05, "loss": 0.05646276, "step": 12438 }, { "epoch": 24.878, "grad_norm": 1.419040560722351, "learning_rate": 2e-05, "loss": 0.04880114, "step": 12439 }, { "epoch": 24.88, "grad_norm": 1.1326826810836792, "learning_rate": 2e-05, "loss": 0.04119021, "step": 12440 }, { "epoch": 24.882, "grad_norm": 1.429632306098938, "learning_rate": 2e-05, "loss": 0.0510744, "step": 12441 }, { "epoch": 24.884, "grad_norm": 2.021416425704956, "learning_rate": 2e-05, "loss": 0.05398323, "step": 12442 }, { "epoch": 24.886, "grad_norm": 1.2925662994384766, "learning_rate": 2e-05, "loss": 0.0487259, "step": 12443 }, { "epoch": 24.888, "grad_norm": 1.3188891410827637, "learning_rate": 2e-05, "loss": 0.04359906, "step": 12444 }, { "epoch": 24.89, "grad_norm": 1.8213289976119995, "learning_rate": 2e-05, "loss": 0.05419963, "step": 12445 }, { "epoch": 24.892, "grad_norm": 1.8820993900299072, "learning_rate": 2e-05, "loss": 0.04406717, "step": 12446 }, { "epoch": 24.894, "grad_norm": 1.1095833778381348, "learning_rate": 2e-05, "loss": 0.04703937, "step": 12447 }, { "epoch": 24.896, "grad_norm": 1.145403265953064, "learning_rate": 2e-05, "loss": 0.04098237, "step": 12448 }, { "epoch": 24.898, "grad_norm": 4.939516067504883, "learning_rate": 2e-05, "loss": 0.04888795, "step": 12449 }, { "epoch": 24.9, "grad_norm": 1.2902284860610962, "learning_rate": 2e-05, "loss": 0.04742273, "step": 12450 }, { "epoch": 24.902, "grad_norm": 1.15613853931427, "learning_rate": 2e-05, "loss": 0.02914495, "step": 12451 }, { "epoch": 24.904, "grad_norm": 2.327373504638672, "learning_rate": 2e-05, "loss": 0.05553031, "step": 12452 }, { "epoch": 24.906, "grad_norm": 1.4978739023208618, "learning_rate": 2e-05, "loss": 0.03483084, "step": 12453 }, { "epoch": 24.908, "grad_norm": 1.7437061071395874, "learning_rate": 2e-05, "loss": 0.05667341, "step": 12454 }, { "epoch": 24.91, "grad_norm": 2.045104742050171, "learning_rate": 2e-05, "loss": 0.0504937, "step": 12455 }, { "epoch": 24.912, "grad_norm": 2.127146005630493, "learning_rate": 2e-05, "loss": 0.04464479, "step": 12456 }, { "epoch": 24.914, "grad_norm": 1.9251153469085693, "learning_rate": 2e-05, "loss": 0.04323905, "step": 12457 }, { "epoch": 24.916, "grad_norm": 1.2294851541519165, "learning_rate": 2e-05, "loss": 0.03710897, "step": 12458 }, { "epoch": 24.918, "grad_norm": 1.0860822200775146, "learning_rate": 2e-05, "loss": 0.04871416, "step": 12459 }, { "epoch": 24.92, "grad_norm": 1.0837758779525757, "learning_rate": 2e-05, "loss": 0.03727125, "step": 12460 }, { "epoch": 24.922, "grad_norm": 1.4478741884231567, "learning_rate": 2e-05, "loss": 0.04571364, "step": 12461 }, { "epoch": 24.924, "grad_norm": 1.5994067192077637, "learning_rate": 2e-05, "loss": 0.04567996, "step": 12462 }, { "epoch": 24.926, "grad_norm": 2.3954379558563232, "learning_rate": 2e-05, "loss": 0.05547336, "step": 12463 }, { "epoch": 24.928, "grad_norm": 2.130188465118408, "learning_rate": 2e-05, "loss": 0.0510396, "step": 12464 }, { "epoch": 24.93, "grad_norm": 19.31425666809082, "learning_rate": 2e-05, "loss": 0.09202991, "step": 12465 }, { "epoch": 24.932, "grad_norm": 1.2676620483398438, "learning_rate": 2e-05, "loss": 0.03964383, "step": 12466 }, { "epoch": 24.934, "grad_norm": 1.5841865539550781, "learning_rate": 2e-05, "loss": 0.03884972, "step": 12467 }, { "epoch": 24.936, "grad_norm": 1.8690932989120483, "learning_rate": 2e-05, "loss": 0.05343563, "step": 12468 }, { "epoch": 24.938, "grad_norm": 1.262275218963623, "learning_rate": 2e-05, "loss": 0.03402144, "step": 12469 }, { "epoch": 24.94, "grad_norm": 1.1100060939788818, "learning_rate": 2e-05, "loss": 0.04514051, "step": 12470 }, { "epoch": 24.942, "grad_norm": 1.4891170263290405, "learning_rate": 2e-05, "loss": 0.04965903, "step": 12471 }, { "epoch": 24.944, "grad_norm": 1.1756887435913086, "learning_rate": 2e-05, "loss": 0.03555219, "step": 12472 }, { "epoch": 24.946, "grad_norm": 1.38705575466156, "learning_rate": 2e-05, "loss": 0.05496265, "step": 12473 }, { "epoch": 24.948, "grad_norm": 1.8380342721939087, "learning_rate": 2e-05, "loss": 0.05392865, "step": 12474 }, { "epoch": 24.95, "grad_norm": 0.9538768529891968, "learning_rate": 2e-05, "loss": 0.03763831, "step": 12475 }, { "epoch": 24.951999999999998, "grad_norm": 1.4392766952514648, "learning_rate": 2e-05, "loss": 0.03219794, "step": 12476 }, { "epoch": 24.954, "grad_norm": 1.1344821453094482, "learning_rate": 2e-05, "loss": 0.04084168, "step": 12477 }, { "epoch": 24.956, "grad_norm": 1.4016164541244507, "learning_rate": 2e-05, "loss": 0.04202775, "step": 12478 }, { "epoch": 24.958, "grad_norm": 1.3038735389709473, "learning_rate": 2e-05, "loss": 0.05586255, "step": 12479 }, { "epoch": 24.96, "grad_norm": 1.2745096683502197, "learning_rate": 2e-05, "loss": 0.04563636, "step": 12480 }, { "epoch": 24.962, "grad_norm": 1.8629027605056763, "learning_rate": 2e-05, "loss": 0.04409952, "step": 12481 }, { "epoch": 24.964, "grad_norm": 1.1554667949676514, "learning_rate": 2e-05, "loss": 0.04890275, "step": 12482 }, { "epoch": 24.966, "grad_norm": 1.329375147819519, "learning_rate": 2e-05, "loss": 0.04882819, "step": 12483 }, { "epoch": 24.968, "grad_norm": 1.0821406841278076, "learning_rate": 2e-05, "loss": 0.03480864, "step": 12484 }, { "epoch": 24.97, "grad_norm": 1.5325900316238403, "learning_rate": 2e-05, "loss": 0.04224129, "step": 12485 }, { "epoch": 24.972, "grad_norm": 1.1319386959075928, "learning_rate": 2e-05, "loss": 0.03097108, "step": 12486 }, { "epoch": 24.974, "grad_norm": 1.6647253036499023, "learning_rate": 2e-05, "loss": 0.05010362, "step": 12487 }, { "epoch": 24.976, "grad_norm": 1.5213617086410522, "learning_rate": 2e-05, "loss": 0.05155212, "step": 12488 }, { "epoch": 24.978, "grad_norm": 1.181519865989685, "learning_rate": 2e-05, "loss": 0.05764193, "step": 12489 }, { "epoch": 24.98, "grad_norm": 1.0516437292099, "learning_rate": 2e-05, "loss": 0.04471372, "step": 12490 }, { "epoch": 24.982, "grad_norm": 1.2531055212020874, "learning_rate": 2e-05, "loss": 0.04816005, "step": 12491 }, { "epoch": 24.984, "grad_norm": 2.094572067260742, "learning_rate": 2e-05, "loss": 0.0658077, "step": 12492 }, { "epoch": 24.986, "grad_norm": 1.2252720594406128, "learning_rate": 2e-05, "loss": 0.04415631, "step": 12493 }, { "epoch": 24.988, "grad_norm": 1.2434792518615723, "learning_rate": 2e-05, "loss": 0.04903311, "step": 12494 }, { "epoch": 24.99, "grad_norm": 0.994875967502594, "learning_rate": 2e-05, "loss": 0.03526482, "step": 12495 }, { "epoch": 24.992, "grad_norm": 1.4973106384277344, "learning_rate": 2e-05, "loss": 0.03749626, "step": 12496 }, { "epoch": 24.994, "grad_norm": 0.9715427160263062, "learning_rate": 2e-05, "loss": 0.0366944, "step": 12497 }, { "epoch": 24.996, "grad_norm": 1.1738646030426025, "learning_rate": 2e-05, "loss": 0.03677404, "step": 12498 }, { "epoch": 24.998, "grad_norm": 3.087301731109619, "learning_rate": 2e-05, "loss": 0.06307375, "step": 12499 }, { "epoch": 25.0, "grad_norm": 1.783982276916504, "learning_rate": 2e-05, "loss": 0.04853023, "step": 12500 }, { "epoch": 25.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9720558882235529, "Equal_1": 0.996, "Equal_2": 0.9840319361277445, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.988, "Perpendicular_1": 0.992, "Perpendicular_2": 0.984, "Perpendicular_3": 0.8697394789579158, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.9916666666666667, "PointLiesOnCircle_3": 0.9912000000000001, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 320.1532, "eval_samples_per_second": 32.797, "eval_steps_per_second": 0.656, "step": 12500 }, { "epoch": 25.002, "grad_norm": 2.553316354751587, "learning_rate": 2e-05, "loss": 0.04658042, "step": 12501 }, { "epoch": 25.004, "grad_norm": 1.2298393249511719, "learning_rate": 2e-05, "loss": 0.04537143, "step": 12502 }, { "epoch": 25.006, "grad_norm": 1.3164474964141846, "learning_rate": 2e-05, "loss": 0.06407714, "step": 12503 }, { "epoch": 25.008, "grad_norm": 1.3401498794555664, "learning_rate": 2e-05, "loss": 0.04077845, "step": 12504 }, { "epoch": 25.01, "grad_norm": 1.0891218185424805, "learning_rate": 2e-05, "loss": 0.02724287, "step": 12505 }, { "epoch": 25.012, "grad_norm": 1.2074103355407715, "learning_rate": 2e-05, "loss": 0.05306327, "step": 12506 }, { "epoch": 25.014, "grad_norm": 1.1338789463043213, "learning_rate": 2e-05, "loss": 0.04252898, "step": 12507 }, { "epoch": 25.016, "grad_norm": 1.2618156671524048, "learning_rate": 2e-05, "loss": 0.0498926, "step": 12508 }, { "epoch": 25.018, "grad_norm": 1.14939546585083, "learning_rate": 2e-05, "loss": 0.04680283, "step": 12509 }, { "epoch": 25.02, "grad_norm": 1.1815592050552368, "learning_rate": 2e-05, "loss": 0.04917777, "step": 12510 }, { "epoch": 25.022, "grad_norm": 1.619907259941101, "learning_rate": 2e-05, "loss": 0.05033459, "step": 12511 }, { "epoch": 25.024, "grad_norm": 1.0121499300003052, "learning_rate": 2e-05, "loss": 0.03902668, "step": 12512 }, { "epoch": 25.026, "grad_norm": 1.4635061025619507, "learning_rate": 2e-05, "loss": 0.03216758, "step": 12513 }, { "epoch": 25.028, "grad_norm": 1.818674087524414, "learning_rate": 2e-05, "loss": 0.05529295, "step": 12514 }, { "epoch": 25.03, "grad_norm": 1.5501385927200317, "learning_rate": 2e-05, "loss": 0.05194931, "step": 12515 }, { "epoch": 25.032, "grad_norm": 1.735026240348816, "learning_rate": 2e-05, "loss": 0.0563397, "step": 12516 }, { "epoch": 25.034, "grad_norm": 1.9267089366912842, "learning_rate": 2e-05, "loss": 0.05115256, "step": 12517 }, { "epoch": 25.036, "grad_norm": 1.591173768043518, "learning_rate": 2e-05, "loss": 0.04865324, "step": 12518 }, { "epoch": 25.038, "grad_norm": 1.233425259590149, "learning_rate": 2e-05, "loss": 0.04186363, "step": 12519 }, { "epoch": 25.04, "grad_norm": 1.3910762071609497, "learning_rate": 2e-05, "loss": 0.05607095, "step": 12520 }, { "epoch": 25.042, "grad_norm": 1.911817193031311, "learning_rate": 2e-05, "loss": 0.04746141, "step": 12521 }, { "epoch": 25.044, "grad_norm": 1.5743073225021362, "learning_rate": 2e-05, "loss": 0.0310831, "step": 12522 }, { "epoch": 25.046, "grad_norm": 2.0004522800445557, "learning_rate": 2e-05, "loss": 0.05432073, "step": 12523 }, { "epoch": 25.048, "grad_norm": 1.304870367050171, "learning_rate": 2e-05, "loss": 0.05191491, "step": 12524 }, { "epoch": 25.05, "grad_norm": 1.6975371837615967, "learning_rate": 2e-05, "loss": 0.04251885, "step": 12525 }, { "epoch": 25.052, "grad_norm": 1.4703913927078247, "learning_rate": 2e-05, "loss": 0.04141282, "step": 12526 }, { "epoch": 25.054, "grad_norm": 1.6704292297363281, "learning_rate": 2e-05, "loss": 0.05951826, "step": 12527 }, { "epoch": 25.056, "grad_norm": 1.4327316284179688, "learning_rate": 2e-05, "loss": 0.05041571, "step": 12528 }, { "epoch": 25.058, "grad_norm": 2.054896116256714, "learning_rate": 2e-05, "loss": 0.05156599, "step": 12529 }, { "epoch": 25.06, "grad_norm": 1.0578651428222656, "learning_rate": 2e-05, "loss": 0.03640224, "step": 12530 }, { "epoch": 25.062, "grad_norm": 1.2330236434936523, "learning_rate": 2e-05, "loss": 0.03764113, "step": 12531 }, { "epoch": 25.064, "grad_norm": 1.5884158611297607, "learning_rate": 2e-05, "loss": 0.04582855, "step": 12532 }, { "epoch": 25.066, "grad_norm": 1.3398418426513672, "learning_rate": 2e-05, "loss": 0.04462974, "step": 12533 }, { "epoch": 25.068, "grad_norm": 1.4082545042037964, "learning_rate": 2e-05, "loss": 0.03973638, "step": 12534 }, { "epoch": 25.07, "grad_norm": 3.2086148262023926, "learning_rate": 2e-05, "loss": 0.0822015, "step": 12535 }, { "epoch": 25.072, "grad_norm": 1.2738802433013916, "learning_rate": 2e-05, "loss": 0.0383213, "step": 12536 }, { "epoch": 25.074, "grad_norm": 1.359592318534851, "learning_rate": 2e-05, "loss": 0.04718591, "step": 12537 }, { "epoch": 25.076, "grad_norm": 2.490873336791992, "learning_rate": 2e-05, "loss": 0.04099283, "step": 12538 }, { "epoch": 25.078, "grad_norm": 1.3400729894638062, "learning_rate": 2e-05, "loss": 0.05246966, "step": 12539 }, { "epoch": 25.08, "grad_norm": 1.2105579376220703, "learning_rate": 2e-05, "loss": 0.04603817, "step": 12540 }, { "epoch": 25.082, "grad_norm": 1.2426912784576416, "learning_rate": 2e-05, "loss": 0.04203958, "step": 12541 }, { "epoch": 25.084, "grad_norm": 1.232663869857788, "learning_rate": 2e-05, "loss": 0.03772744, "step": 12542 }, { "epoch": 25.086, "grad_norm": 1.6726223230361938, "learning_rate": 2e-05, "loss": 0.04957581, "step": 12543 }, { "epoch": 25.088, "grad_norm": 1.9126125574111938, "learning_rate": 2e-05, "loss": 0.05544265, "step": 12544 }, { "epoch": 25.09, "grad_norm": 1.0108952522277832, "learning_rate": 2e-05, "loss": 0.02635121, "step": 12545 }, { "epoch": 25.092, "grad_norm": 1.3411767482757568, "learning_rate": 2e-05, "loss": 0.03911927, "step": 12546 }, { "epoch": 25.094, "grad_norm": 2.594507932662964, "learning_rate": 2e-05, "loss": 0.05281266, "step": 12547 }, { "epoch": 25.096, "grad_norm": 1.0433748960494995, "learning_rate": 2e-05, "loss": 0.04605139, "step": 12548 }, { "epoch": 25.098, "grad_norm": 1.9768903255462646, "learning_rate": 2e-05, "loss": 0.0549066, "step": 12549 }, { "epoch": 25.1, "grad_norm": 2.1046414375305176, "learning_rate": 2e-05, "loss": 0.05329197, "step": 12550 }, { "epoch": 25.102, "grad_norm": 1.1665661334991455, "learning_rate": 2e-05, "loss": 0.04764657, "step": 12551 }, { "epoch": 25.104, "grad_norm": 2.242833137512207, "learning_rate": 2e-05, "loss": 0.04974656, "step": 12552 }, { "epoch": 25.106, "grad_norm": 1.416796326637268, "learning_rate": 2e-05, "loss": 0.04945759, "step": 12553 }, { "epoch": 25.108, "grad_norm": 1.1873366832733154, "learning_rate": 2e-05, "loss": 0.05103811, "step": 12554 }, { "epoch": 25.11, "grad_norm": 1.926827311515808, "learning_rate": 2e-05, "loss": 0.05267503, "step": 12555 }, { "epoch": 25.112, "grad_norm": 0.9494261741638184, "learning_rate": 2e-05, "loss": 0.03307255, "step": 12556 }, { "epoch": 25.114, "grad_norm": 1.8937228918075562, "learning_rate": 2e-05, "loss": 0.06592927, "step": 12557 }, { "epoch": 25.116, "grad_norm": 2.0769922733306885, "learning_rate": 2e-05, "loss": 0.05289575, "step": 12558 }, { "epoch": 25.118, "grad_norm": 1.1254668235778809, "learning_rate": 2e-05, "loss": 0.03823429, "step": 12559 }, { "epoch": 25.12, "grad_norm": 2.4549336433410645, "learning_rate": 2e-05, "loss": 0.04544316, "step": 12560 }, { "epoch": 25.122, "grad_norm": 1.7756479978561401, "learning_rate": 2e-05, "loss": 0.0518619, "step": 12561 }, { "epoch": 25.124, "grad_norm": 1.4617094993591309, "learning_rate": 2e-05, "loss": 0.05234993, "step": 12562 }, { "epoch": 25.126, "grad_norm": 1.5612094402313232, "learning_rate": 2e-05, "loss": 0.04799661, "step": 12563 }, { "epoch": 25.128, "grad_norm": 1.2895623445510864, "learning_rate": 2e-05, "loss": 0.04298138, "step": 12564 }, { "epoch": 25.13, "grad_norm": 1.085653305053711, "learning_rate": 2e-05, "loss": 0.03644668, "step": 12565 }, { "epoch": 25.132, "grad_norm": 2.4465861320495605, "learning_rate": 2e-05, "loss": 0.04523169, "step": 12566 }, { "epoch": 25.134, "grad_norm": 1.476048231124878, "learning_rate": 2e-05, "loss": 0.02938263, "step": 12567 }, { "epoch": 25.136, "grad_norm": 0.9077489376068115, "learning_rate": 2e-05, "loss": 0.02951773, "step": 12568 }, { "epoch": 25.138, "grad_norm": 1.348293662071228, "learning_rate": 2e-05, "loss": 0.04014814, "step": 12569 }, { "epoch": 25.14, "grad_norm": 1.2255805730819702, "learning_rate": 2e-05, "loss": 0.04978126, "step": 12570 }, { "epoch": 25.142, "grad_norm": 1.4020949602127075, "learning_rate": 2e-05, "loss": 0.05750665, "step": 12571 }, { "epoch": 25.144, "grad_norm": 1.526517629623413, "learning_rate": 2e-05, "loss": 0.04812055, "step": 12572 }, { "epoch": 25.146, "grad_norm": 1.3054735660552979, "learning_rate": 2e-05, "loss": 0.04948972, "step": 12573 }, { "epoch": 25.148, "grad_norm": 0.968127429485321, "learning_rate": 2e-05, "loss": 0.03364644, "step": 12574 }, { "epoch": 25.15, "grad_norm": 2.5585010051727295, "learning_rate": 2e-05, "loss": 0.06127685, "step": 12575 }, { "epoch": 25.152, "grad_norm": 1.267968773841858, "learning_rate": 2e-05, "loss": 0.05282283, "step": 12576 }, { "epoch": 25.154, "grad_norm": 1.3801593780517578, "learning_rate": 2e-05, "loss": 0.0542292, "step": 12577 }, { "epoch": 25.156, "grad_norm": 1.1124210357666016, "learning_rate": 2e-05, "loss": 0.03990389, "step": 12578 }, { "epoch": 25.158, "grad_norm": 1.1204739809036255, "learning_rate": 2e-05, "loss": 0.04882549, "step": 12579 }, { "epoch": 25.16, "grad_norm": 1.5411254167556763, "learning_rate": 2e-05, "loss": 0.03279787, "step": 12580 }, { "epoch": 25.162, "grad_norm": 1.0646896362304688, "learning_rate": 2e-05, "loss": 0.04206897, "step": 12581 }, { "epoch": 25.164, "grad_norm": 1.4947314262390137, "learning_rate": 2e-05, "loss": 0.04689228, "step": 12582 }, { "epoch": 25.166, "grad_norm": 1.3432666063308716, "learning_rate": 2e-05, "loss": 0.03807723, "step": 12583 }, { "epoch": 25.168, "grad_norm": 1.3646643161773682, "learning_rate": 2e-05, "loss": 0.05141376, "step": 12584 }, { "epoch": 25.17, "grad_norm": 2.0790557861328125, "learning_rate": 2e-05, "loss": 0.04800526, "step": 12585 }, { "epoch": 25.172, "grad_norm": 1.5363324880599976, "learning_rate": 2e-05, "loss": 0.03496822, "step": 12586 }, { "epoch": 25.174, "grad_norm": 1.1385252475738525, "learning_rate": 2e-05, "loss": 0.04525664, "step": 12587 }, { "epoch": 25.176, "grad_norm": 1.0282803773880005, "learning_rate": 2e-05, "loss": 0.03413811, "step": 12588 }, { "epoch": 25.178, "grad_norm": 1.563729166984558, "learning_rate": 2e-05, "loss": 0.04510281, "step": 12589 }, { "epoch": 25.18, "grad_norm": 2.2484776973724365, "learning_rate": 2e-05, "loss": 0.05236363, "step": 12590 }, { "epoch": 25.182, "grad_norm": 1.737161636352539, "learning_rate": 2e-05, "loss": 0.04524446, "step": 12591 }, { "epoch": 25.184, "grad_norm": 1.867302656173706, "learning_rate": 2e-05, "loss": 0.06257963, "step": 12592 }, { "epoch": 25.186, "grad_norm": 1.426841139793396, "learning_rate": 2e-05, "loss": 0.04176682, "step": 12593 }, { "epoch": 25.188, "grad_norm": 1.0782243013381958, "learning_rate": 2e-05, "loss": 0.03285508, "step": 12594 }, { "epoch": 25.19, "grad_norm": 3.013762950897217, "learning_rate": 2e-05, "loss": 0.05704542, "step": 12595 }, { "epoch": 25.192, "grad_norm": 1.8257044553756714, "learning_rate": 2e-05, "loss": 0.04196394, "step": 12596 }, { "epoch": 25.194, "grad_norm": 0.9650020599365234, "learning_rate": 2e-05, "loss": 0.03296274, "step": 12597 }, { "epoch": 25.196, "grad_norm": 2.078765630722046, "learning_rate": 2e-05, "loss": 0.04554818, "step": 12598 }, { "epoch": 25.198, "grad_norm": 1.3087246417999268, "learning_rate": 2e-05, "loss": 0.05761782, "step": 12599 }, { "epoch": 25.2, "grad_norm": 1.3766916990280151, "learning_rate": 2e-05, "loss": 0.052761, "step": 12600 }, { "epoch": 25.202, "grad_norm": 1.2516851425170898, "learning_rate": 2e-05, "loss": 0.04158796, "step": 12601 }, { "epoch": 25.204, "grad_norm": 1.0545592308044434, "learning_rate": 2e-05, "loss": 0.03375325, "step": 12602 }, { "epoch": 25.206, "grad_norm": 1.732971429824829, "learning_rate": 2e-05, "loss": 0.05470074, "step": 12603 }, { "epoch": 25.208, "grad_norm": 1.1150429248809814, "learning_rate": 2e-05, "loss": 0.03985225, "step": 12604 }, { "epoch": 25.21, "grad_norm": 1.1678208112716675, "learning_rate": 2e-05, "loss": 0.04104099, "step": 12605 }, { "epoch": 25.212, "grad_norm": 1.085423469543457, "learning_rate": 2e-05, "loss": 0.04031212, "step": 12606 }, { "epoch": 25.214, "grad_norm": 1.413877248764038, "learning_rate": 2e-05, "loss": 0.04183791, "step": 12607 }, { "epoch": 25.216, "grad_norm": 1.270094871520996, "learning_rate": 2e-05, "loss": 0.03035654, "step": 12608 }, { "epoch": 25.218, "grad_norm": 1.045606017112732, "learning_rate": 2e-05, "loss": 0.03988522, "step": 12609 }, { "epoch": 25.22, "grad_norm": 1.19292151927948, "learning_rate": 2e-05, "loss": 0.03305714, "step": 12610 }, { "epoch": 25.222, "grad_norm": 1.377384901046753, "learning_rate": 2e-05, "loss": 0.0412189, "step": 12611 }, { "epoch": 25.224, "grad_norm": 1.903836727142334, "learning_rate": 2e-05, "loss": 0.05559286, "step": 12612 }, { "epoch": 25.226, "grad_norm": 1.2150182723999023, "learning_rate": 2e-05, "loss": 0.04249961, "step": 12613 }, { "epoch": 25.228, "grad_norm": 1.5361416339874268, "learning_rate": 2e-05, "loss": 0.05546279, "step": 12614 }, { "epoch": 25.23, "grad_norm": 1.1674758195877075, "learning_rate": 2e-05, "loss": 0.03651671, "step": 12615 }, { "epoch": 25.232, "grad_norm": 1.5208848714828491, "learning_rate": 2e-05, "loss": 0.06328288, "step": 12616 }, { "epoch": 25.234, "grad_norm": 1.5309934616088867, "learning_rate": 2e-05, "loss": 0.06355253, "step": 12617 }, { "epoch": 25.236, "grad_norm": 1.5100221633911133, "learning_rate": 2e-05, "loss": 0.04278588, "step": 12618 }, { "epoch": 25.238, "grad_norm": 1.191089153289795, "learning_rate": 2e-05, "loss": 0.04086279, "step": 12619 }, { "epoch": 25.24, "grad_norm": 1.7226134538650513, "learning_rate": 2e-05, "loss": 0.04553945, "step": 12620 }, { "epoch": 25.242, "grad_norm": 1.6444710493087769, "learning_rate": 2e-05, "loss": 0.04359776, "step": 12621 }, { "epoch": 25.244, "grad_norm": 1.3565912246704102, "learning_rate": 2e-05, "loss": 0.04153623, "step": 12622 }, { "epoch": 25.246, "grad_norm": 1.295622706413269, "learning_rate": 2e-05, "loss": 0.03719122, "step": 12623 }, { "epoch": 25.248, "grad_norm": 1.560507893562317, "learning_rate": 2e-05, "loss": 0.04558239, "step": 12624 }, { "epoch": 25.25, "grad_norm": 1.1936217546463013, "learning_rate": 2e-05, "loss": 0.04132167, "step": 12625 }, { "epoch": 25.252, "grad_norm": 1.165188193321228, "learning_rate": 2e-05, "loss": 0.04114609, "step": 12626 }, { "epoch": 25.254, "grad_norm": 1.5809382200241089, "learning_rate": 2e-05, "loss": 0.04351167, "step": 12627 }, { "epoch": 25.256, "grad_norm": 1.449571132659912, "learning_rate": 2e-05, "loss": 0.0306358, "step": 12628 }, { "epoch": 25.258, "grad_norm": 1.3308436870574951, "learning_rate": 2e-05, "loss": 0.02577632, "step": 12629 }, { "epoch": 25.26, "grad_norm": 1.2263076305389404, "learning_rate": 2e-05, "loss": 0.03553704, "step": 12630 }, { "epoch": 25.262, "grad_norm": 1.204790711402893, "learning_rate": 2e-05, "loss": 0.04875295, "step": 12631 }, { "epoch": 25.264, "grad_norm": 1.111572504043579, "learning_rate": 2e-05, "loss": 0.04481274, "step": 12632 }, { "epoch": 25.266, "grad_norm": 1.6431896686553955, "learning_rate": 2e-05, "loss": 0.05574667, "step": 12633 }, { "epoch": 25.268, "grad_norm": 1.5125669240951538, "learning_rate": 2e-05, "loss": 0.05090974, "step": 12634 }, { "epoch": 25.27, "grad_norm": 6.2000932693481445, "learning_rate": 2e-05, "loss": 0.04442751, "step": 12635 }, { "epoch": 25.272, "grad_norm": 4.573184967041016, "learning_rate": 2e-05, "loss": 0.06365525, "step": 12636 }, { "epoch": 25.274, "grad_norm": 2.154564142227173, "learning_rate": 2e-05, "loss": 0.03896628, "step": 12637 }, { "epoch": 25.276, "grad_norm": 1.2963447570800781, "learning_rate": 2e-05, "loss": 0.04430124, "step": 12638 }, { "epoch": 25.278, "grad_norm": 1.1166437864303589, "learning_rate": 2e-05, "loss": 0.03765798, "step": 12639 }, { "epoch": 25.28, "grad_norm": 1.1233835220336914, "learning_rate": 2e-05, "loss": 0.03647223, "step": 12640 }, { "epoch": 25.282, "grad_norm": 1.2153040170669556, "learning_rate": 2e-05, "loss": 0.04259994, "step": 12641 }, { "epoch": 25.284, "grad_norm": 1.6373004913330078, "learning_rate": 2e-05, "loss": 0.05044476, "step": 12642 }, { "epoch": 25.286, "grad_norm": 1.4357736110687256, "learning_rate": 2e-05, "loss": 0.04274999, "step": 12643 }, { "epoch": 25.288, "grad_norm": 1.0987167358398438, "learning_rate": 2e-05, "loss": 0.04832689, "step": 12644 }, { "epoch": 25.29, "grad_norm": 1.4695364236831665, "learning_rate": 2e-05, "loss": 0.04650634, "step": 12645 }, { "epoch": 25.292, "grad_norm": 1.8129997253417969, "learning_rate": 2e-05, "loss": 0.03666451, "step": 12646 }, { "epoch": 25.294, "grad_norm": 1.2179253101348877, "learning_rate": 2e-05, "loss": 0.03910381, "step": 12647 }, { "epoch": 25.296, "grad_norm": 1.5008174180984497, "learning_rate": 2e-05, "loss": 0.05692586, "step": 12648 }, { "epoch": 25.298, "grad_norm": 0.9341026544570923, "learning_rate": 2e-05, "loss": 0.03112362, "step": 12649 }, { "epoch": 25.3, "grad_norm": 0.8844679594039917, "learning_rate": 2e-05, "loss": 0.02362539, "step": 12650 }, { "epoch": 25.302, "grad_norm": 1.7629514932632446, "learning_rate": 2e-05, "loss": 0.03517685, "step": 12651 }, { "epoch": 25.304, "grad_norm": 1.303821325302124, "learning_rate": 2e-05, "loss": 0.05173323, "step": 12652 }, { "epoch": 25.306, "grad_norm": 1.4266308546066284, "learning_rate": 2e-05, "loss": 0.05540767, "step": 12653 }, { "epoch": 25.308, "grad_norm": 1.2281945943832397, "learning_rate": 2e-05, "loss": 0.04114369, "step": 12654 }, { "epoch": 25.31, "grad_norm": 1.3700491189956665, "learning_rate": 2e-05, "loss": 0.03522223, "step": 12655 }, { "epoch": 25.312, "grad_norm": 1.426082968711853, "learning_rate": 2e-05, "loss": 0.06648382, "step": 12656 }, { "epoch": 25.314, "grad_norm": 1.3388099670410156, "learning_rate": 2e-05, "loss": 0.05874391, "step": 12657 }, { "epoch": 25.316, "grad_norm": 0.9311839938163757, "learning_rate": 2e-05, "loss": 0.0290327, "step": 12658 }, { "epoch": 25.318, "grad_norm": 1.222132682800293, "learning_rate": 2e-05, "loss": 0.03191571, "step": 12659 }, { "epoch": 25.32, "grad_norm": 1.1202843189239502, "learning_rate": 2e-05, "loss": 0.03595486, "step": 12660 }, { "epoch": 25.322, "grad_norm": 1.4704622030258179, "learning_rate": 2e-05, "loss": 0.04287569, "step": 12661 }, { "epoch": 25.324, "grad_norm": 0.9908611178398132, "learning_rate": 2e-05, "loss": 0.03052938, "step": 12662 }, { "epoch": 25.326, "grad_norm": 1.0778824090957642, "learning_rate": 2e-05, "loss": 0.04454803, "step": 12663 }, { "epoch": 25.328, "grad_norm": 1.4507533311843872, "learning_rate": 2e-05, "loss": 0.04375216, "step": 12664 }, { "epoch": 25.33, "grad_norm": 1.0799167156219482, "learning_rate": 2e-05, "loss": 0.03894107, "step": 12665 }, { "epoch": 25.332, "grad_norm": 1.2972630262374878, "learning_rate": 2e-05, "loss": 0.03249099, "step": 12666 }, { "epoch": 25.334, "grad_norm": 1.2298357486724854, "learning_rate": 2e-05, "loss": 0.04411597, "step": 12667 }, { "epoch": 25.336, "grad_norm": 1.2464599609375, "learning_rate": 2e-05, "loss": 0.03922657, "step": 12668 }, { "epoch": 25.338, "grad_norm": 1.6811728477478027, "learning_rate": 2e-05, "loss": 0.05457025, "step": 12669 }, { "epoch": 25.34, "grad_norm": 1.4204384088516235, "learning_rate": 2e-05, "loss": 0.03159765, "step": 12670 }, { "epoch": 25.342, "grad_norm": 1.5988712310791016, "learning_rate": 2e-05, "loss": 0.04662495, "step": 12671 }, { "epoch": 25.344, "grad_norm": 1.4310500621795654, "learning_rate": 2e-05, "loss": 0.05521808, "step": 12672 }, { "epoch": 25.346, "grad_norm": 1.4796041250228882, "learning_rate": 2e-05, "loss": 0.05029026, "step": 12673 }, { "epoch": 25.348, "grad_norm": 1.120280385017395, "learning_rate": 2e-05, "loss": 0.05406159, "step": 12674 }, { "epoch": 25.35, "grad_norm": 1.5472899675369263, "learning_rate": 2e-05, "loss": 0.04434624, "step": 12675 }, { "epoch": 25.352, "grad_norm": 1.5988304615020752, "learning_rate": 2e-05, "loss": 0.03512241, "step": 12676 }, { "epoch": 25.354, "grad_norm": 2.166592836380005, "learning_rate": 2e-05, "loss": 0.05324619, "step": 12677 }, { "epoch": 25.356, "grad_norm": 1.23325514793396, "learning_rate": 2e-05, "loss": 0.0458478, "step": 12678 }, { "epoch": 25.358, "grad_norm": 1.305748462677002, "learning_rate": 2e-05, "loss": 0.04038317, "step": 12679 }, { "epoch": 25.36, "grad_norm": 1.2116440534591675, "learning_rate": 2e-05, "loss": 0.04997266, "step": 12680 }, { "epoch": 25.362, "grad_norm": 1.0483074188232422, "learning_rate": 2e-05, "loss": 0.03716281, "step": 12681 }, { "epoch": 25.364, "grad_norm": 1.1586565971374512, "learning_rate": 2e-05, "loss": 0.03767997, "step": 12682 }, { "epoch": 25.366, "grad_norm": 1.693536400794983, "learning_rate": 2e-05, "loss": 0.05087979, "step": 12683 }, { "epoch": 25.368, "grad_norm": 1.4441686868667603, "learning_rate": 2e-05, "loss": 0.05642378, "step": 12684 }, { "epoch": 25.37, "grad_norm": 1.18898344039917, "learning_rate": 2e-05, "loss": 0.04010397, "step": 12685 }, { "epoch": 25.372, "grad_norm": 1.593220591545105, "learning_rate": 2e-05, "loss": 0.0599516, "step": 12686 }, { "epoch": 25.374, "grad_norm": 2.1640501022338867, "learning_rate": 2e-05, "loss": 0.04657248, "step": 12687 }, { "epoch": 25.376, "grad_norm": 1.3017292022705078, "learning_rate": 2e-05, "loss": 0.05840718, "step": 12688 }, { "epoch": 25.378, "grad_norm": 1.1755383014678955, "learning_rate": 2e-05, "loss": 0.04789887, "step": 12689 }, { "epoch": 25.38, "grad_norm": 1.9385019540786743, "learning_rate": 2e-05, "loss": 0.05613485, "step": 12690 }, { "epoch": 25.382, "grad_norm": 1.2281224727630615, "learning_rate": 2e-05, "loss": 0.03748925, "step": 12691 }, { "epoch": 25.384, "grad_norm": 1.4565155506134033, "learning_rate": 2e-05, "loss": 0.0405966, "step": 12692 }, { "epoch": 25.386, "grad_norm": 2.12028169631958, "learning_rate": 2e-05, "loss": 0.04600952, "step": 12693 }, { "epoch": 25.388, "grad_norm": 1.0148862600326538, "learning_rate": 2e-05, "loss": 0.02967937, "step": 12694 }, { "epoch": 25.39, "grad_norm": 1.3207978010177612, "learning_rate": 2e-05, "loss": 0.06044834, "step": 12695 }, { "epoch": 25.392, "grad_norm": 1.3644589185714722, "learning_rate": 2e-05, "loss": 0.03912506, "step": 12696 }, { "epoch": 25.394, "grad_norm": 1.5914479494094849, "learning_rate": 2e-05, "loss": 0.0491058, "step": 12697 }, { "epoch": 25.396, "grad_norm": 1.4540284872055054, "learning_rate": 2e-05, "loss": 0.04001951, "step": 12698 }, { "epoch": 25.398, "grad_norm": 1.3614214658737183, "learning_rate": 2e-05, "loss": 0.04715081, "step": 12699 }, { "epoch": 25.4, "grad_norm": 1.1477137804031372, "learning_rate": 2e-05, "loss": 0.0351803, "step": 12700 }, { "epoch": 25.402, "grad_norm": 1.382109522819519, "learning_rate": 2e-05, "loss": 0.04908609, "step": 12701 }, { "epoch": 25.404, "grad_norm": 1.372443675994873, "learning_rate": 2e-05, "loss": 0.06265668, "step": 12702 }, { "epoch": 25.406, "grad_norm": 1.0587457418441772, "learning_rate": 2e-05, "loss": 0.04351508, "step": 12703 }, { "epoch": 25.408, "grad_norm": 1.4110264778137207, "learning_rate": 2e-05, "loss": 0.05135802, "step": 12704 }, { "epoch": 25.41, "grad_norm": 2.1139957904815674, "learning_rate": 2e-05, "loss": 0.03126623, "step": 12705 }, { "epoch": 25.412, "grad_norm": 1.8020521402359009, "learning_rate": 2e-05, "loss": 0.04075658, "step": 12706 }, { "epoch": 25.414, "grad_norm": 1.4906790256500244, "learning_rate": 2e-05, "loss": 0.03323084, "step": 12707 }, { "epoch": 25.416, "grad_norm": 1.2605034112930298, "learning_rate": 2e-05, "loss": 0.04425685, "step": 12708 }, { "epoch": 25.418, "grad_norm": 1.4229034185409546, "learning_rate": 2e-05, "loss": 0.06664809, "step": 12709 }, { "epoch": 25.42, "grad_norm": 3.019768476486206, "learning_rate": 2e-05, "loss": 0.06653355, "step": 12710 }, { "epoch": 25.422, "grad_norm": 1.1232490539550781, "learning_rate": 2e-05, "loss": 0.03858437, "step": 12711 }, { "epoch": 25.424, "grad_norm": 1.0489226579666138, "learning_rate": 2e-05, "loss": 0.03429198, "step": 12712 }, { "epoch": 25.426, "grad_norm": 2.398514747619629, "learning_rate": 2e-05, "loss": 0.07073388, "step": 12713 }, { "epoch": 25.428, "grad_norm": 1.2204501628875732, "learning_rate": 2e-05, "loss": 0.04312594, "step": 12714 }, { "epoch": 25.43, "grad_norm": 0.9238330721855164, "learning_rate": 2e-05, "loss": 0.03127215, "step": 12715 }, { "epoch": 25.432, "grad_norm": 1.1325103044509888, "learning_rate": 2e-05, "loss": 0.04861875, "step": 12716 }, { "epoch": 25.434, "grad_norm": 1.4087941646575928, "learning_rate": 2e-05, "loss": 0.04097962, "step": 12717 }, { "epoch": 25.436, "grad_norm": 2.770249366760254, "learning_rate": 2e-05, "loss": 0.05767106, "step": 12718 }, { "epoch": 25.438, "grad_norm": 1.5272942781448364, "learning_rate": 2e-05, "loss": 0.05232708, "step": 12719 }, { "epoch": 25.44, "grad_norm": 1.8735677003860474, "learning_rate": 2e-05, "loss": 0.04702976, "step": 12720 }, { "epoch": 25.442, "grad_norm": 1.5433956384658813, "learning_rate": 2e-05, "loss": 0.04802687, "step": 12721 }, { "epoch": 25.444, "grad_norm": 1.2358498573303223, "learning_rate": 2e-05, "loss": 0.03744403, "step": 12722 }, { "epoch": 25.446, "grad_norm": 1.9896537065505981, "learning_rate": 2e-05, "loss": 0.0489389, "step": 12723 }, { "epoch": 25.448, "grad_norm": 1.2103878259658813, "learning_rate": 2e-05, "loss": 0.04206013, "step": 12724 }, { "epoch": 25.45, "grad_norm": 1.4859042167663574, "learning_rate": 2e-05, "loss": 0.04817963, "step": 12725 }, { "epoch": 25.452, "grad_norm": 1.0707011222839355, "learning_rate": 2e-05, "loss": 0.02487176, "step": 12726 }, { "epoch": 25.454, "grad_norm": 0.8648288249969482, "learning_rate": 2e-05, "loss": 0.02900369, "step": 12727 }, { "epoch": 25.456, "grad_norm": 1.057659387588501, "learning_rate": 2e-05, "loss": 0.03823693, "step": 12728 }, { "epoch": 25.458, "grad_norm": 1.477248191833496, "learning_rate": 2e-05, "loss": 0.05023545, "step": 12729 }, { "epoch": 25.46, "grad_norm": 1.2823160886764526, "learning_rate": 2e-05, "loss": 0.05811198, "step": 12730 }, { "epoch": 25.462, "grad_norm": 1.3287951946258545, "learning_rate": 2e-05, "loss": 0.05152711, "step": 12731 }, { "epoch": 25.464, "grad_norm": 1.0755188465118408, "learning_rate": 2e-05, "loss": 0.04283263, "step": 12732 }, { "epoch": 25.466, "grad_norm": 1.084681510925293, "learning_rate": 2e-05, "loss": 0.03596375, "step": 12733 }, { "epoch": 25.468, "grad_norm": 1.1877176761627197, "learning_rate": 2e-05, "loss": 0.04653904, "step": 12734 }, { "epoch": 25.47, "grad_norm": 1.7374459505081177, "learning_rate": 2e-05, "loss": 0.03502556, "step": 12735 }, { "epoch": 25.472, "grad_norm": 1.5863116979599, "learning_rate": 2e-05, "loss": 0.03652181, "step": 12736 }, { "epoch": 25.474, "grad_norm": 1.1849809885025024, "learning_rate": 2e-05, "loss": 0.0374795, "step": 12737 }, { "epoch": 25.476, "grad_norm": 1.6925612688064575, "learning_rate": 2e-05, "loss": 0.05677975, "step": 12738 }, { "epoch": 25.478, "grad_norm": 1.0018976926803589, "learning_rate": 2e-05, "loss": 0.035368, "step": 12739 }, { "epoch": 25.48, "grad_norm": 1.0738310813903809, "learning_rate": 2e-05, "loss": 0.04165709, "step": 12740 }, { "epoch": 25.482, "grad_norm": 1.0751115083694458, "learning_rate": 2e-05, "loss": 0.04024215, "step": 12741 }, { "epoch": 25.484, "grad_norm": 1.2548881769180298, "learning_rate": 2e-05, "loss": 0.06063127, "step": 12742 }, { "epoch": 25.486, "grad_norm": 2.8296592235565186, "learning_rate": 2e-05, "loss": 0.04534397, "step": 12743 }, { "epoch": 25.488, "grad_norm": 2.0588326454162598, "learning_rate": 2e-05, "loss": 0.07510805, "step": 12744 }, { "epoch": 25.49, "grad_norm": 2.919177293777466, "learning_rate": 2e-05, "loss": 0.05336414, "step": 12745 }, { "epoch": 25.492, "grad_norm": 1.1437351703643799, "learning_rate": 2e-05, "loss": 0.04661782, "step": 12746 }, { "epoch": 25.494, "grad_norm": 1.2010008096694946, "learning_rate": 2e-05, "loss": 0.04713929, "step": 12747 }, { "epoch": 25.496, "grad_norm": 1.2396228313446045, "learning_rate": 2e-05, "loss": 0.03676428, "step": 12748 }, { "epoch": 25.498, "grad_norm": 1.1913061141967773, "learning_rate": 2e-05, "loss": 0.03432823, "step": 12749 }, { "epoch": 25.5, "grad_norm": 1.0427284240722656, "learning_rate": 2e-05, "loss": 0.04304377, "step": 12750 }, { "epoch": 25.502, "grad_norm": 0.9250820279121399, "learning_rate": 2e-05, "loss": 0.02991773, "step": 12751 }, { "epoch": 25.504, "grad_norm": 1.7029353380203247, "learning_rate": 2e-05, "loss": 0.05027733, "step": 12752 }, { "epoch": 25.506, "grad_norm": 1.2598884105682373, "learning_rate": 2e-05, "loss": 0.03872483, "step": 12753 }, { "epoch": 25.508, "grad_norm": 1.1312626600265503, "learning_rate": 2e-05, "loss": 0.03387032, "step": 12754 }, { "epoch": 25.51, "grad_norm": 1.4454662799835205, "learning_rate": 2e-05, "loss": 0.04541782, "step": 12755 }, { "epoch": 25.512, "grad_norm": 1.2431281805038452, "learning_rate": 2e-05, "loss": 0.04072841, "step": 12756 }, { "epoch": 25.514, "grad_norm": 1.0991272926330566, "learning_rate": 2e-05, "loss": 0.03918246, "step": 12757 }, { "epoch": 25.516, "grad_norm": 1.9460620880126953, "learning_rate": 2e-05, "loss": 0.06519485, "step": 12758 }, { "epoch": 25.518, "grad_norm": 1.238317608833313, "learning_rate": 2e-05, "loss": 0.05222324, "step": 12759 }, { "epoch": 25.52, "grad_norm": 1.1859232187271118, "learning_rate": 2e-05, "loss": 0.03913081, "step": 12760 }, { "epoch": 25.522, "grad_norm": 1.1361500024795532, "learning_rate": 2e-05, "loss": 0.05126172, "step": 12761 }, { "epoch": 25.524, "grad_norm": 1.657873511314392, "learning_rate": 2e-05, "loss": 0.06139502, "step": 12762 }, { "epoch": 25.526, "grad_norm": 0.9964461326599121, "learning_rate": 2e-05, "loss": 0.03220826, "step": 12763 }, { "epoch": 25.528, "grad_norm": 1.958337426185608, "learning_rate": 2e-05, "loss": 0.03383143, "step": 12764 }, { "epoch": 25.53, "grad_norm": 1.3577609062194824, "learning_rate": 2e-05, "loss": 0.04257483, "step": 12765 }, { "epoch": 25.532, "grad_norm": 1.1528269052505493, "learning_rate": 2e-05, "loss": 0.04727619, "step": 12766 }, { "epoch": 25.534, "grad_norm": 1.3304978609085083, "learning_rate": 2e-05, "loss": 0.04373534, "step": 12767 }, { "epoch": 25.536, "grad_norm": 2.185859441757202, "learning_rate": 2e-05, "loss": 0.06034337, "step": 12768 }, { "epoch": 25.538, "grad_norm": 1.6654530763626099, "learning_rate": 2e-05, "loss": 0.05115644, "step": 12769 }, { "epoch": 25.54, "grad_norm": 1.3865916728973389, "learning_rate": 2e-05, "loss": 0.05147178, "step": 12770 }, { "epoch": 25.542, "grad_norm": 1.5472357273101807, "learning_rate": 2e-05, "loss": 0.04129698, "step": 12771 }, { "epoch": 25.544, "grad_norm": 2.8680312633514404, "learning_rate": 2e-05, "loss": 0.05492282, "step": 12772 }, { "epoch": 25.546, "grad_norm": 1.9399982690811157, "learning_rate": 2e-05, "loss": 0.04655978, "step": 12773 }, { "epoch": 25.548000000000002, "grad_norm": 1.1271389722824097, "learning_rate": 2e-05, "loss": 0.04596433, "step": 12774 }, { "epoch": 25.55, "grad_norm": 1.3790403604507446, "learning_rate": 2e-05, "loss": 0.04871649, "step": 12775 }, { "epoch": 25.552, "grad_norm": 1.8597426414489746, "learning_rate": 2e-05, "loss": 0.04528391, "step": 12776 }, { "epoch": 25.554, "grad_norm": 1.6001335382461548, "learning_rate": 2e-05, "loss": 0.04731301, "step": 12777 }, { "epoch": 25.556, "grad_norm": 1.0347784757614136, "learning_rate": 2e-05, "loss": 0.03880426, "step": 12778 }, { "epoch": 25.558, "grad_norm": 1.1842137575149536, "learning_rate": 2e-05, "loss": 0.04206136, "step": 12779 }, { "epoch": 25.56, "grad_norm": 1.1110947132110596, "learning_rate": 2e-05, "loss": 0.02964817, "step": 12780 }, { "epoch": 25.562, "grad_norm": 1.3468905687332153, "learning_rate": 2e-05, "loss": 0.05464962, "step": 12781 }, { "epoch": 25.564, "grad_norm": 1.660844326019287, "learning_rate": 2e-05, "loss": 0.05474025, "step": 12782 }, { "epoch": 25.566, "grad_norm": 0.958869993686676, "learning_rate": 2e-05, "loss": 0.03442763, "step": 12783 }, { "epoch": 25.568, "grad_norm": 1.2681729793548584, "learning_rate": 2e-05, "loss": 0.05398714, "step": 12784 }, { "epoch": 25.57, "grad_norm": 1.6511400938034058, "learning_rate": 2e-05, "loss": 0.04100592, "step": 12785 }, { "epoch": 25.572, "grad_norm": 1.7444071769714355, "learning_rate": 2e-05, "loss": 0.05146802, "step": 12786 }, { "epoch": 25.574, "grad_norm": 1.5246992111206055, "learning_rate": 2e-05, "loss": 0.04002138, "step": 12787 }, { "epoch": 25.576, "grad_norm": 1.2669793367385864, "learning_rate": 2e-05, "loss": 0.03664404, "step": 12788 }, { "epoch": 25.578, "grad_norm": 1.2276067733764648, "learning_rate": 2e-05, "loss": 0.04909092, "step": 12789 }, { "epoch": 25.58, "grad_norm": 1.8915936946868896, "learning_rate": 2e-05, "loss": 0.04786887, "step": 12790 }, { "epoch": 25.582, "grad_norm": 1.526929259300232, "learning_rate": 2e-05, "loss": 0.0330558, "step": 12791 }, { "epoch": 25.584, "grad_norm": 1.7936866283416748, "learning_rate": 2e-05, "loss": 0.04382351, "step": 12792 }, { "epoch": 25.586, "grad_norm": 2.3560688495635986, "learning_rate": 2e-05, "loss": 0.03439542, "step": 12793 }, { "epoch": 25.588, "grad_norm": 1.3485525846481323, "learning_rate": 2e-05, "loss": 0.03818742, "step": 12794 }, { "epoch": 25.59, "grad_norm": 1.1782861948013306, "learning_rate": 2e-05, "loss": 0.03852155, "step": 12795 }, { "epoch": 25.592, "grad_norm": 1.5356895923614502, "learning_rate": 2e-05, "loss": 0.05399141, "step": 12796 }, { "epoch": 25.594, "grad_norm": 1.246084451675415, "learning_rate": 2e-05, "loss": 0.04823676, "step": 12797 }, { "epoch": 25.596, "grad_norm": 1.3950222730636597, "learning_rate": 2e-05, "loss": 0.02780427, "step": 12798 }, { "epoch": 25.598, "grad_norm": 3.066758632659912, "learning_rate": 2e-05, "loss": 0.05022492, "step": 12799 }, { "epoch": 25.6, "grad_norm": 0.9001642465591431, "learning_rate": 2e-05, "loss": 0.02825335, "step": 12800 }, { "epoch": 25.602, "grad_norm": 1.1426812410354614, "learning_rate": 2e-05, "loss": 0.02332602, "step": 12801 }, { "epoch": 25.604, "grad_norm": 1.341883897781372, "learning_rate": 2e-05, "loss": 0.05295313, "step": 12802 }, { "epoch": 25.606, "grad_norm": 1.4535542726516724, "learning_rate": 2e-05, "loss": 0.04531492, "step": 12803 }, { "epoch": 25.608, "grad_norm": 1.222536563873291, "learning_rate": 2e-05, "loss": 0.03721589, "step": 12804 }, { "epoch": 25.61, "grad_norm": 1.7234156131744385, "learning_rate": 2e-05, "loss": 0.05661245, "step": 12805 }, { "epoch": 25.612, "grad_norm": 1.7372621297836304, "learning_rate": 2e-05, "loss": 0.03441635, "step": 12806 }, { "epoch": 25.614, "grad_norm": 1.6260429620742798, "learning_rate": 2e-05, "loss": 0.0488297, "step": 12807 }, { "epoch": 25.616, "grad_norm": 1.0251811742782593, "learning_rate": 2e-05, "loss": 0.03384798, "step": 12808 }, { "epoch": 25.618, "grad_norm": 1.0292855501174927, "learning_rate": 2e-05, "loss": 0.04441176, "step": 12809 }, { "epoch": 25.62, "grad_norm": 0.9246246218681335, "learning_rate": 2e-05, "loss": 0.0311376, "step": 12810 }, { "epoch": 25.622, "grad_norm": 1.1222032308578491, "learning_rate": 2e-05, "loss": 0.03349977, "step": 12811 }, { "epoch": 25.624, "grad_norm": 1.044348955154419, "learning_rate": 2e-05, "loss": 0.03562807, "step": 12812 }, { "epoch": 25.626, "grad_norm": 1.314139723777771, "learning_rate": 2e-05, "loss": 0.048839, "step": 12813 }, { "epoch": 25.628, "grad_norm": 1.1304693222045898, "learning_rate": 2e-05, "loss": 0.05354474, "step": 12814 }, { "epoch": 25.63, "grad_norm": 1.2466613054275513, "learning_rate": 2e-05, "loss": 0.0536761, "step": 12815 }, { "epoch": 25.632, "grad_norm": 1.0759629011154175, "learning_rate": 2e-05, "loss": 0.04039822, "step": 12816 }, { "epoch": 25.634, "grad_norm": 1.232729196548462, "learning_rate": 2e-05, "loss": 0.04778222, "step": 12817 }, { "epoch": 25.636, "grad_norm": 1.3825780153274536, "learning_rate": 2e-05, "loss": 0.0383257, "step": 12818 }, { "epoch": 25.638, "grad_norm": 1.105268120765686, "learning_rate": 2e-05, "loss": 0.03146917, "step": 12819 }, { "epoch": 25.64, "grad_norm": 3.4076695442199707, "learning_rate": 2e-05, "loss": 0.04861237, "step": 12820 }, { "epoch": 25.642, "grad_norm": 1.1343084573745728, "learning_rate": 2e-05, "loss": 0.0362022, "step": 12821 }, { "epoch": 25.644, "grad_norm": 1.5663788318634033, "learning_rate": 2e-05, "loss": 0.04831579, "step": 12822 }, { "epoch": 25.646, "grad_norm": 1.799911618232727, "learning_rate": 2e-05, "loss": 0.05474582, "step": 12823 }, { "epoch": 25.648, "grad_norm": 1.5941003561019897, "learning_rate": 2e-05, "loss": 0.04637301, "step": 12824 }, { "epoch": 25.65, "grad_norm": 1.7795392274856567, "learning_rate": 2e-05, "loss": 0.0298002, "step": 12825 }, { "epoch": 25.652, "grad_norm": 1.5927716493606567, "learning_rate": 2e-05, "loss": 0.0472445, "step": 12826 }, { "epoch": 25.654, "grad_norm": 1.3922079801559448, "learning_rate": 2e-05, "loss": 0.04148681, "step": 12827 }, { "epoch": 25.656, "grad_norm": 1.3033424615859985, "learning_rate": 2e-05, "loss": 0.03751112, "step": 12828 }, { "epoch": 25.658, "grad_norm": 1.3551427125930786, "learning_rate": 2e-05, "loss": 0.04426542, "step": 12829 }, { "epoch": 25.66, "grad_norm": 4.914473056793213, "learning_rate": 2e-05, "loss": 0.05875074, "step": 12830 }, { "epoch": 25.662, "grad_norm": 1.2864055633544922, "learning_rate": 2e-05, "loss": 0.05747083, "step": 12831 }, { "epoch": 25.664, "grad_norm": 1.245685338973999, "learning_rate": 2e-05, "loss": 0.04856413, "step": 12832 }, { "epoch": 25.666, "grad_norm": 1.2700167894363403, "learning_rate": 2e-05, "loss": 0.04627567, "step": 12833 }, { "epoch": 25.668, "grad_norm": 2.3879594802856445, "learning_rate": 2e-05, "loss": 0.04573566, "step": 12834 }, { "epoch": 25.67, "grad_norm": 1.0853418111801147, "learning_rate": 2e-05, "loss": 0.03547914, "step": 12835 }, { "epoch": 25.672, "grad_norm": 2.4202558994293213, "learning_rate": 2e-05, "loss": 0.05616173, "step": 12836 }, { "epoch": 25.674, "grad_norm": 1.213026523590088, "learning_rate": 2e-05, "loss": 0.04792723, "step": 12837 }, { "epoch": 25.676, "grad_norm": 1.4499080181121826, "learning_rate": 2e-05, "loss": 0.04635759, "step": 12838 }, { "epoch": 25.678, "grad_norm": 1.0613125562667847, "learning_rate": 2e-05, "loss": 0.04478652, "step": 12839 }, { "epoch": 25.68, "grad_norm": 0.9953880906105042, "learning_rate": 2e-05, "loss": 0.02956217, "step": 12840 }, { "epoch": 25.682, "grad_norm": 2.4529874324798584, "learning_rate": 2e-05, "loss": 0.04293549, "step": 12841 }, { "epoch": 25.684, "grad_norm": 1.058303952217102, "learning_rate": 2e-05, "loss": 0.03956451, "step": 12842 }, { "epoch": 25.686, "grad_norm": 1.2368803024291992, "learning_rate": 2e-05, "loss": 0.05040938, "step": 12843 }, { "epoch": 25.688, "grad_norm": 1.3737293481826782, "learning_rate": 2e-05, "loss": 0.05072392, "step": 12844 }, { "epoch": 25.69, "grad_norm": 1.0648189783096313, "learning_rate": 2e-05, "loss": 0.03040619, "step": 12845 }, { "epoch": 25.692, "grad_norm": 1.3716273307800293, "learning_rate": 2e-05, "loss": 0.03951315, "step": 12846 }, { "epoch": 25.694, "grad_norm": 1.4560929536819458, "learning_rate": 2e-05, "loss": 0.0416421, "step": 12847 }, { "epoch": 25.696, "grad_norm": 1.9304611682891846, "learning_rate": 2e-05, "loss": 0.0425549, "step": 12848 }, { "epoch": 25.698, "grad_norm": 0.9284382462501526, "learning_rate": 2e-05, "loss": 0.03185133, "step": 12849 }, { "epoch": 25.7, "grad_norm": 1.3706117868423462, "learning_rate": 2e-05, "loss": 0.04668346, "step": 12850 }, { "epoch": 25.701999999999998, "grad_norm": 1.7373970746994019, "learning_rate": 2e-05, "loss": 0.03499283, "step": 12851 }, { "epoch": 25.704, "grad_norm": 2.239835262298584, "learning_rate": 2e-05, "loss": 0.0479632, "step": 12852 }, { "epoch": 25.706, "grad_norm": 1.1869913339614868, "learning_rate": 2e-05, "loss": 0.04254553, "step": 12853 }, { "epoch": 25.708, "grad_norm": 1.1868373155593872, "learning_rate": 2e-05, "loss": 0.03624144, "step": 12854 }, { "epoch": 25.71, "grad_norm": 1.2735049724578857, "learning_rate": 2e-05, "loss": 0.04621403, "step": 12855 }, { "epoch": 25.712, "grad_norm": 2.1693756580352783, "learning_rate": 2e-05, "loss": 0.05169474, "step": 12856 }, { "epoch": 25.714, "grad_norm": 1.0680606365203857, "learning_rate": 2e-05, "loss": 0.03689266, "step": 12857 }, { "epoch": 25.716, "grad_norm": 1.085115671157837, "learning_rate": 2e-05, "loss": 0.04505826, "step": 12858 }, { "epoch": 25.718, "grad_norm": 1.4041411876678467, "learning_rate": 2e-05, "loss": 0.05596658, "step": 12859 }, { "epoch": 25.72, "grad_norm": 1.3453253507614136, "learning_rate": 2e-05, "loss": 0.0477687, "step": 12860 }, { "epoch": 25.722, "grad_norm": 1.0043227672576904, "learning_rate": 2e-05, "loss": 0.03752232, "step": 12861 }, { "epoch": 25.724, "grad_norm": 1.8186486959457397, "learning_rate": 2e-05, "loss": 0.04446795, "step": 12862 }, { "epoch": 25.726, "grad_norm": 1.1549947261810303, "learning_rate": 2e-05, "loss": 0.03548076, "step": 12863 }, { "epoch": 25.728, "grad_norm": 1.8598767518997192, "learning_rate": 2e-05, "loss": 0.05279416, "step": 12864 }, { "epoch": 25.73, "grad_norm": 1.2044051885604858, "learning_rate": 2e-05, "loss": 0.04757049, "step": 12865 }, { "epoch": 25.732, "grad_norm": 1.5500402450561523, "learning_rate": 2e-05, "loss": 0.04289635, "step": 12866 }, { "epoch": 25.734, "grad_norm": 1.505210041999817, "learning_rate": 2e-05, "loss": 0.03871378, "step": 12867 }, { "epoch": 25.736, "grad_norm": 1.5146814584732056, "learning_rate": 2e-05, "loss": 0.06822012, "step": 12868 }, { "epoch": 25.738, "grad_norm": 1.9045121669769287, "learning_rate": 2e-05, "loss": 0.04598961, "step": 12869 }, { "epoch": 25.74, "grad_norm": 1.5291447639465332, "learning_rate": 2e-05, "loss": 0.04768593, "step": 12870 }, { "epoch": 25.742, "grad_norm": 1.6933094263076782, "learning_rate": 2e-05, "loss": 0.03961466, "step": 12871 }, { "epoch": 25.744, "grad_norm": 1.7180720567703247, "learning_rate": 2e-05, "loss": 0.04465307, "step": 12872 }, { "epoch": 25.746, "grad_norm": 1.5562777519226074, "learning_rate": 2e-05, "loss": 0.04760274, "step": 12873 }, { "epoch": 25.748, "grad_norm": 1.8690643310546875, "learning_rate": 2e-05, "loss": 0.0456716, "step": 12874 }, { "epoch": 25.75, "grad_norm": 1.6642539501190186, "learning_rate": 2e-05, "loss": 0.0361044, "step": 12875 }, { "epoch": 25.752, "grad_norm": 1.0987035036087036, "learning_rate": 2e-05, "loss": 0.0358355, "step": 12876 }, { "epoch": 25.754, "grad_norm": 1.276738166809082, "learning_rate": 2e-05, "loss": 0.04325083, "step": 12877 }, { "epoch": 25.756, "grad_norm": 1.4052026271820068, "learning_rate": 2e-05, "loss": 0.05564051, "step": 12878 }, { "epoch": 25.758, "grad_norm": 1.1691828966140747, "learning_rate": 2e-05, "loss": 0.0309645, "step": 12879 }, { "epoch": 25.76, "grad_norm": 1.5935523509979248, "learning_rate": 2e-05, "loss": 0.04389683, "step": 12880 }, { "epoch": 25.762, "grad_norm": 1.4796446561813354, "learning_rate": 2e-05, "loss": 0.05674142, "step": 12881 }, { "epoch": 25.764, "grad_norm": 1.473631501197815, "learning_rate": 2e-05, "loss": 0.05766422, "step": 12882 }, { "epoch": 25.766, "grad_norm": 1.003163456916809, "learning_rate": 2e-05, "loss": 0.03590136, "step": 12883 }, { "epoch": 25.768, "grad_norm": 1.1867930889129639, "learning_rate": 2e-05, "loss": 0.04083221, "step": 12884 }, { "epoch": 25.77, "grad_norm": 1.342621922492981, "learning_rate": 2e-05, "loss": 0.04845904, "step": 12885 }, { "epoch": 25.772, "grad_norm": 1.7419025897979736, "learning_rate": 2e-05, "loss": 0.04856285, "step": 12886 }, { "epoch": 25.774, "grad_norm": 0.9666280150413513, "learning_rate": 2e-05, "loss": 0.03569352, "step": 12887 }, { "epoch": 25.776, "grad_norm": 1.4296828508377075, "learning_rate": 2e-05, "loss": 0.03583002, "step": 12888 }, { "epoch": 25.778, "grad_norm": 1.1190946102142334, "learning_rate": 2e-05, "loss": 0.02983772, "step": 12889 }, { "epoch": 25.78, "grad_norm": 1.2515982389450073, "learning_rate": 2e-05, "loss": 0.04836629, "step": 12890 }, { "epoch": 25.782, "grad_norm": 2.553123712539673, "learning_rate": 2e-05, "loss": 0.05472127, "step": 12891 }, { "epoch": 25.784, "grad_norm": 1.407144546508789, "learning_rate": 2e-05, "loss": 0.04308041, "step": 12892 }, { "epoch": 25.786, "grad_norm": 1.1658834218978882, "learning_rate": 2e-05, "loss": 0.03924808, "step": 12893 }, { "epoch": 25.788, "grad_norm": 1.917145848274231, "learning_rate": 2e-05, "loss": 0.06007392, "step": 12894 }, { "epoch": 25.79, "grad_norm": 2.5494346618652344, "learning_rate": 2e-05, "loss": 0.04623754, "step": 12895 }, { "epoch": 25.792, "grad_norm": 1.3239128589630127, "learning_rate": 2e-05, "loss": 0.04573008, "step": 12896 }, { "epoch": 25.794, "grad_norm": 1.2541325092315674, "learning_rate": 2e-05, "loss": 0.03188618, "step": 12897 }, { "epoch": 25.796, "grad_norm": 1.2667912244796753, "learning_rate": 2e-05, "loss": 0.04757137, "step": 12898 }, { "epoch": 25.798000000000002, "grad_norm": 1.1675081253051758, "learning_rate": 2e-05, "loss": 0.03582035, "step": 12899 }, { "epoch": 25.8, "grad_norm": 1.2599587440490723, "learning_rate": 2e-05, "loss": 0.05033941, "step": 12900 }, { "epoch": 25.802, "grad_norm": 1.4526861906051636, "learning_rate": 2e-05, "loss": 0.02551962, "step": 12901 }, { "epoch": 25.804, "grad_norm": 2.03424334526062, "learning_rate": 2e-05, "loss": 0.04258246, "step": 12902 }, { "epoch": 25.806, "grad_norm": 1.5461796522140503, "learning_rate": 2e-05, "loss": 0.05401737, "step": 12903 }, { "epoch": 25.808, "grad_norm": 1.3652410507202148, "learning_rate": 2e-05, "loss": 0.04861759, "step": 12904 }, { "epoch": 25.81, "grad_norm": 2.5918078422546387, "learning_rate": 2e-05, "loss": 0.04921463, "step": 12905 }, { "epoch": 25.812, "grad_norm": 1.6058486700057983, "learning_rate": 2e-05, "loss": 0.03778245, "step": 12906 }, { "epoch": 25.814, "grad_norm": 1.3766907453536987, "learning_rate": 2e-05, "loss": 0.03459726, "step": 12907 }, { "epoch": 25.816, "grad_norm": 1.255175232887268, "learning_rate": 2e-05, "loss": 0.04270238, "step": 12908 }, { "epoch": 25.818, "grad_norm": 1.1050204038619995, "learning_rate": 2e-05, "loss": 0.03457598, "step": 12909 }, { "epoch": 25.82, "grad_norm": 1.7525054216384888, "learning_rate": 2e-05, "loss": 0.04051763, "step": 12910 }, { "epoch": 25.822, "grad_norm": 3.4903576374053955, "learning_rate": 2e-05, "loss": 0.04965971, "step": 12911 }, { "epoch": 25.824, "grad_norm": 1.1599464416503906, "learning_rate": 2e-05, "loss": 0.03711332, "step": 12912 }, { "epoch": 25.826, "grad_norm": 3.031643867492676, "learning_rate": 2e-05, "loss": 0.05846217, "step": 12913 }, { "epoch": 25.828, "grad_norm": 1.1548397541046143, "learning_rate": 2e-05, "loss": 0.03609941, "step": 12914 }, { "epoch": 25.83, "grad_norm": 6.252331733703613, "learning_rate": 2e-05, "loss": 0.04764732, "step": 12915 }, { "epoch": 25.832, "grad_norm": 1.6452500820159912, "learning_rate": 2e-05, "loss": 0.05718385, "step": 12916 }, { "epoch": 25.834, "grad_norm": 1.1862694025039673, "learning_rate": 2e-05, "loss": 0.06088445, "step": 12917 }, { "epoch": 25.836, "grad_norm": 1.5176570415496826, "learning_rate": 2e-05, "loss": 0.03762556, "step": 12918 }, { "epoch": 25.838, "grad_norm": 1.6177486181259155, "learning_rate": 2e-05, "loss": 0.05010308, "step": 12919 }, { "epoch": 25.84, "grad_norm": 3.6465678215026855, "learning_rate": 2e-05, "loss": 0.04568088, "step": 12920 }, { "epoch": 25.842, "grad_norm": 1.225893497467041, "learning_rate": 2e-05, "loss": 0.04407287, "step": 12921 }, { "epoch": 25.844, "grad_norm": 1.419485092163086, "learning_rate": 2e-05, "loss": 0.05671504, "step": 12922 }, { "epoch": 25.846, "grad_norm": 0.9688834547996521, "learning_rate": 2e-05, "loss": 0.0258717, "step": 12923 }, { "epoch": 25.848, "grad_norm": 1.4948381185531616, "learning_rate": 2e-05, "loss": 0.05070211, "step": 12924 }, { "epoch": 25.85, "grad_norm": 1.48432195186615, "learning_rate": 2e-05, "loss": 0.03078519, "step": 12925 }, { "epoch": 25.852, "grad_norm": 1.1909409761428833, "learning_rate": 2e-05, "loss": 0.04312345, "step": 12926 }, { "epoch": 25.854, "grad_norm": 1.25083327293396, "learning_rate": 2e-05, "loss": 0.04205637, "step": 12927 }, { "epoch": 25.856, "grad_norm": 1.1452685594558716, "learning_rate": 2e-05, "loss": 0.04423122, "step": 12928 }, { "epoch": 25.858, "grad_norm": 1.53263258934021, "learning_rate": 2e-05, "loss": 0.04331882, "step": 12929 }, { "epoch": 25.86, "grad_norm": 1.451874852180481, "learning_rate": 2e-05, "loss": 0.04954249, "step": 12930 }, { "epoch": 25.862, "grad_norm": 1.3785700798034668, "learning_rate": 2e-05, "loss": 0.04673723, "step": 12931 }, { "epoch": 25.864, "grad_norm": 1.375231146812439, "learning_rate": 2e-05, "loss": 0.0459612, "step": 12932 }, { "epoch": 25.866, "grad_norm": 1.2162593603134155, "learning_rate": 2e-05, "loss": 0.04416041, "step": 12933 }, { "epoch": 25.868, "grad_norm": 1.1460282802581787, "learning_rate": 2e-05, "loss": 0.0397279, "step": 12934 }, { "epoch": 25.87, "grad_norm": 1.4855611324310303, "learning_rate": 2e-05, "loss": 0.04811959, "step": 12935 }, { "epoch": 25.872, "grad_norm": 1.6014434099197388, "learning_rate": 2e-05, "loss": 0.05218223, "step": 12936 }, { "epoch": 25.874, "grad_norm": 1.3930706977844238, "learning_rate": 2e-05, "loss": 0.05569389, "step": 12937 }, { "epoch": 25.876, "grad_norm": 1.5376465320587158, "learning_rate": 2e-05, "loss": 0.05116809, "step": 12938 }, { "epoch": 25.878, "grad_norm": 1.3046684265136719, "learning_rate": 2e-05, "loss": 0.05488844, "step": 12939 }, { "epoch": 25.88, "grad_norm": 1.029398798942566, "learning_rate": 2e-05, "loss": 0.03596478, "step": 12940 }, { "epoch": 25.882, "grad_norm": 1.2957504987716675, "learning_rate": 2e-05, "loss": 0.03197503, "step": 12941 }, { "epoch": 25.884, "grad_norm": 1.1034080982208252, "learning_rate": 2e-05, "loss": 0.04575316, "step": 12942 }, { "epoch": 25.886, "grad_norm": 1.4886722564697266, "learning_rate": 2e-05, "loss": 0.04241584, "step": 12943 }, { "epoch": 25.888, "grad_norm": 1.763895869255066, "learning_rate": 2e-05, "loss": 0.04630455, "step": 12944 }, { "epoch": 25.89, "grad_norm": 1.2453800439834595, "learning_rate": 2e-05, "loss": 0.04511211, "step": 12945 }, { "epoch": 25.892, "grad_norm": 1.3455971479415894, "learning_rate": 2e-05, "loss": 0.03952543, "step": 12946 }, { "epoch": 25.894, "grad_norm": 1.4724493026733398, "learning_rate": 2e-05, "loss": 0.06532818, "step": 12947 }, { "epoch": 25.896, "grad_norm": 1.60633385181427, "learning_rate": 2e-05, "loss": 0.06741303, "step": 12948 }, { "epoch": 25.898, "grad_norm": 1.3577247858047485, "learning_rate": 2e-05, "loss": 0.04642791, "step": 12949 }, { "epoch": 25.9, "grad_norm": 0.9978728890419006, "learning_rate": 2e-05, "loss": 0.0259608, "step": 12950 }, { "epoch": 25.902, "grad_norm": 1.5392905473709106, "learning_rate": 2e-05, "loss": 0.03973429, "step": 12951 }, { "epoch": 25.904, "grad_norm": 1.0129889249801636, "learning_rate": 2e-05, "loss": 0.02280146, "step": 12952 }, { "epoch": 25.906, "grad_norm": 2.133425712585449, "learning_rate": 2e-05, "loss": 0.04928362, "step": 12953 }, { "epoch": 25.908, "grad_norm": 1.3658127784729004, "learning_rate": 2e-05, "loss": 0.0481901, "step": 12954 }, { "epoch": 25.91, "grad_norm": 2.754051446914673, "learning_rate": 2e-05, "loss": 0.05194226, "step": 12955 }, { "epoch": 25.912, "grad_norm": 1.568504810333252, "learning_rate": 2e-05, "loss": 0.04438355, "step": 12956 }, { "epoch": 25.914, "grad_norm": 1.2474275827407837, "learning_rate": 2e-05, "loss": 0.0374986, "step": 12957 }, { "epoch": 25.916, "grad_norm": 1.8854233026504517, "learning_rate": 2e-05, "loss": 0.05303825, "step": 12958 }, { "epoch": 25.918, "grad_norm": 1.0603793859481812, "learning_rate": 2e-05, "loss": 0.04348066, "step": 12959 }, { "epoch": 25.92, "grad_norm": 1.2751173973083496, "learning_rate": 2e-05, "loss": 0.04942772, "step": 12960 }, { "epoch": 25.922, "grad_norm": 1.5532885789871216, "learning_rate": 2e-05, "loss": 0.04615701, "step": 12961 }, { "epoch": 25.924, "grad_norm": 1.4249638319015503, "learning_rate": 2e-05, "loss": 0.04614986, "step": 12962 }, { "epoch": 25.926, "grad_norm": 1.7213596105575562, "learning_rate": 2e-05, "loss": 0.04523218, "step": 12963 }, { "epoch": 25.928, "grad_norm": 1.564770221710205, "learning_rate": 2e-05, "loss": 0.06634088, "step": 12964 }, { "epoch": 25.93, "grad_norm": 2.3734843730926514, "learning_rate": 2e-05, "loss": 0.06825857, "step": 12965 }, { "epoch": 25.932, "grad_norm": 0.9273267388343811, "learning_rate": 2e-05, "loss": 0.02456025, "step": 12966 }, { "epoch": 25.934, "grad_norm": 2.232722759246826, "learning_rate": 2e-05, "loss": 0.05355635, "step": 12967 }, { "epoch": 25.936, "grad_norm": 3.6321330070495605, "learning_rate": 2e-05, "loss": 0.05499596, "step": 12968 }, { "epoch": 25.938, "grad_norm": 1.379671335220337, "learning_rate": 2e-05, "loss": 0.05246424, "step": 12969 }, { "epoch": 25.94, "grad_norm": 1.2768114805221558, "learning_rate": 2e-05, "loss": 0.03318752, "step": 12970 }, { "epoch": 25.942, "grad_norm": 1.4348204135894775, "learning_rate": 2e-05, "loss": 0.04715006, "step": 12971 }, { "epoch": 25.944, "grad_norm": 1.4383244514465332, "learning_rate": 2e-05, "loss": 0.05146648, "step": 12972 }, { "epoch": 25.946, "grad_norm": 2.579630136489868, "learning_rate": 2e-05, "loss": 0.05426867, "step": 12973 }, { "epoch": 25.948, "grad_norm": 1.199095606803894, "learning_rate": 2e-05, "loss": 0.04479141, "step": 12974 }, { "epoch": 25.95, "grad_norm": 1.3972845077514648, "learning_rate": 2e-05, "loss": 0.0324418, "step": 12975 }, { "epoch": 25.951999999999998, "grad_norm": 1.0197676420211792, "learning_rate": 2e-05, "loss": 0.03971727, "step": 12976 }, { "epoch": 25.954, "grad_norm": 1.1759026050567627, "learning_rate": 2e-05, "loss": 0.03519825, "step": 12977 }, { "epoch": 25.956, "grad_norm": 1.27419912815094, "learning_rate": 2e-05, "loss": 0.04272161, "step": 12978 }, { "epoch": 25.958, "grad_norm": 1.3605846166610718, "learning_rate": 2e-05, "loss": 0.05668283, "step": 12979 }, { "epoch": 25.96, "grad_norm": 2.3534092903137207, "learning_rate": 2e-05, "loss": 0.04117073, "step": 12980 }, { "epoch": 25.962, "grad_norm": 1.1537200212478638, "learning_rate": 2e-05, "loss": 0.06345319, "step": 12981 }, { "epoch": 25.964, "grad_norm": 2.1213245391845703, "learning_rate": 2e-05, "loss": 0.04125203, "step": 12982 }, { "epoch": 25.966, "grad_norm": 1.309527039527893, "learning_rate": 2e-05, "loss": 0.04417682, "step": 12983 }, { "epoch": 25.968, "grad_norm": 1.206042766571045, "learning_rate": 2e-05, "loss": 0.04790517, "step": 12984 }, { "epoch": 25.97, "grad_norm": 1.2381349802017212, "learning_rate": 2e-05, "loss": 0.03844728, "step": 12985 }, { "epoch": 25.972, "grad_norm": 1.0183463096618652, "learning_rate": 2e-05, "loss": 0.04048822, "step": 12986 }, { "epoch": 25.974, "grad_norm": 1.5077916383743286, "learning_rate": 2e-05, "loss": 0.03975806, "step": 12987 }, { "epoch": 25.976, "grad_norm": 1.284839391708374, "learning_rate": 2e-05, "loss": 0.03966205, "step": 12988 }, { "epoch": 25.978, "grad_norm": 2.6741936206817627, "learning_rate": 2e-05, "loss": 0.06823039, "step": 12989 }, { "epoch": 25.98, "grad_norm": 1.2821533679962158, "learning_rate": 2e-05, "loss": 0.02755376, "step": 12990 }, { "epoch": 25.982, "grad_norm": 1.1977559328079224, "learning_rate": 2e-05, "loss": 0.05067346, "step": 12991 }, { "epoch": 25.984, "grad_norm": 1.267210602760315, "learning_rate": 2e-05, "loss": 0.05051599, "step": 12992 }, { "epoch": 25.986, "grad_norm": 1.6138875484466553, "learning_rate": 2e-05, "loss": 0.05310912, "step": 12993 }, { "epoch": 25.988, "grad_norm": 1.2645289897918701, "learning_rate": 2e-05, "loss": 0.05147942, "step": 12994 }, { "epoch": 25.99, "grad_norm": 1.5932942628860474, "learning_rate": 2e-05, "loss": 0.02298946, "step": 12995 }, { "epoch": 25.992, "grad_norm": 1.1337193250656128, "learning_rate": 2e-05, "loss": 0.02549054, "step": 12996 }, { "epoch": 25.994, "grad_norm": 1.2463297843933105, "learning_rate": 2e-05, "loss": 0.04182723, "step": 12997 }, { "epoch": 25.996, "grad_norm": 2.0521812438964844, "learning_rate": 2e-05, "loss": 0.04027394, "step": 12998 }, { "epoch": 25.998, "grad_norm": 1.0944292545318604, "learning_rate": 2e-05, "loss": 0.05282188, "step": 12999 }, { "epoch": 26.0, "grad_norm": 0.9855789542198181, "learning_rate": 2e-05, "loss": 0.03689377, "step": 13000 }, { "epoch": 26.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.996, "Equal_2": 0.9840319361277445, "Equal_3": 0.9800399201596807, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 0.99, "Perpendicular_2": 0.982, "Perpendicular_3": 0.8717434869739479, "PointLiesOnCircle_1": 0.9996659986639947, "PointLiesOnCircle_2": 0.9996666666666667, "PointLiesOnCircle_3": 0.9936, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9700598802395209 }, "eval_runtime": 319.7653, "eval_samples_per_second": 32.837, "eval_steps_per_second": 0.657, "step": 13000 }, { "epoch": 26.002, "grad_norm": 1.2228409051895142, "learning_rate": 2e-05, "loss": 0.03640506, "step": 13001 }, { "epoch": 26.004, "grad_norm": 2.15970778465271, "learning_rate": 2e-05, "loss": 0.05572034, "step": 13002 }, { "epoch": 26.006, "grad_norm": 1.3950461149215698, "learning_rate": 2e-05, "loss": 0.04152773, "step": 13003 }, { "epoch": 26.008, "grad_norm": 1.6164307594299316, "learning_rate": 2e-05, "loss": 0.04839995, "step": 13004 }, { "epoch": 26.01, "grad_norm": 1.7224260568618774, "learning_rate": 2e-05, "loss": 0.04432855, "step": 13005 }, { "epoch": 26.012, "grad_norm": 1.3991963863372803, "learning_rate": 2e-05, "loss": 0.05045583, "step": 13006 }, { "epoch": 26.014, "grad_norm": 1.8230713605880737, "learning_rate": 2e-05, "loss": 0.06015655, "step": 13007 }, { "epoch": 26.016, "grad_norm": 1.7791680097579956, "learning_rate": 2e-05, "loss": 0.0451679, "step": 13008 }, { "epoch": 26.018, "grad_norm": 1.7463334798812866, "learning_rate": 2e-05, "loss": 0.05355278, "step": 13009 }, { "epoch": 26.02, "grad_norm": 1.9259138107299805, "learning_rate": 2e-05, "loss": 0.05147279, "step": 13010 }, { "epoch": 26.022, "grad_norm": 1.498757243156433, "learning_rate": 2e-05, "loss": 0.05272644, "step": 13011 }, { "epoch": 26.024, "grad_norm": 2.2757580280303955, "learning_rate": 2e-05, "loss": 0.0499465, "step": 13012 }, { "epoch": 26.026, "grad_norm": 2.213714122772217, "learning_rate": 2e-05, "loss": 0.04541406, "step": 13013 }, { "epoch": 26.028, "grad_norm": 1.893241286277771, "learning_rate": 2e-05, "loss": 0.05529698, "step": 13014 }, { "epoch": 26.03, "grad_norm": 1.8718706369400024, "learning_rate": 2e-05, "loss": 0.04376329, "step": 13015 }, { "epoch": 26.032, "grad_norm": 1.3967310190200806, "learning_rate": 2e-05, "loss": 0.03513469, "step": 13016 }, { "epoch": 26.034, "grad_norm": 1.8421751260757446, "learning_rate": 2e-05, "loss": 0.04883641, "step": 13017 }, { "epoch": 26.036, "grad_norm": 1.9495335817337036, "learning_rate": 2e-05, "loss": 0.03737961, "step": 13018 }, { "epoch": 26.038, "grad_norm": 1.0064893960952759, "learning_rate": 2e-05, "loss": 0.04025814, "step": 13019 }, { "epoch": 26.04, "grad_norm": 1.1454533338546753, "learning_rate": 2e-05, "loss": 0.05090432, "step": 13020 }, { "epoch": 26.042, "grad_norm": 1.2419127225875854, "learning_rate": 2e-05, "loss": 0.04127611, "step": 13021 }, { "epoch": 26.044, "grad_norm": 1.4765992164611816, "learning_rate": 2e-05, "loss": 0.053638, "step": 13022 }, { "epoch": 26.046, "grad_norm": 1.3810056447982788, "learning_rate": 2e-05, "loss": 0.02833214, "step": 13023 }, { "epoch": 26.048, "grad_norm": 1.1815146207809448, "learning_rate": 2e-05, "loss": 0.0438899, "step": 13024 }, { "epoch": 26.05, "grad_norm": 1.4837486743927002, "learning_rate": 2e-05, "loss": 0.04868677, "step": 13025 }, { "epoch": 26.052, "grad_norm": 1.6083544492721558, "learning_rate": 2e-05, "loss": 0.0421802, "step": 13026 }, { "epoch": 26.054, "grad_norm": 1.2895910739898682, "learning_rate": 2e-05, "loss": 0.04041936, "step": 13027 }, { "epoch": 26.056, "grad_norm": 1.0532166957855225, "learning_rate": 2e-05, "loss": 0.03752539, "step": 13028 }, { "epoch": 26.058, "grad_norm": 1.7177013158798218, "learning_rate": 2e-05, "loss": 0.04090296, "step": 13029 }, { "epoch": 26.06, "grad_norm": 1.4352203607559204, "learning_rate": 2e-05, "loss": 0.06325287, "step": 13030 }, { "epoch": 26.062, "grad_norm": 1.2944906949996948, "learning_rate": 2e-05, "loss": 0.03913593, "step": 13031 }, { "epoch": 26.064, "grad_norm": 1.1036475896835327, "learning_rate": 2e-05, "loss": 0.03017885, "step": 13032 }, { "epoch": 26.066, "grad_norm": 1.4907349348068237, "learning_rate": 2e-05, "loss": 0.06610879, "step": 13033 }, { "epoch": 26.068, "grad_norm": 2.620415210723877, "learning_rate": 2e-05, "loss": 0.06170364, "step": 13034 }, { "epoch": 26.07, "grad_norm": 1.2906463146209717, "learning_rate": 2e-05, "loss": 0.05566068, "step": 13035 }, { "epoch": 26.072, "grad_norm": 1.5933210849761963, "learning_rate": 2e-05, "loss": 0.06452239, "step": 13036 }, { "epoch": 26.074, "grad_norm": 1.2673341035842896, "learning_rate": 2e-05, "loss": 0.04142977, "step": 13037 }, { "epoch": 26.076, "grad_norm": 1.4276865720748901, "learning_rate": 2e-05, "loss": 0.04855426, "step": 13038 }, { "epoch": 26.078, "grad_norm": 1.5951037406921387, "learning_rate": 2e-05, "loss": 0.04814789, "step": 13039 }, { "epoch": 26.08, "grad_norm": 1.3216607570648193, "learning_rate": 2e-05, "loss": 0.04974344, "step": 13040 }, { "epoch": 26.082, "grad_norm": 1.4452524185180664, "learning_rate": 2e-05, "loss": 0.04898542, "step": 13041 }, { "epoch": 26.084, "grad_norm": 1.5695279836654663, "learning_rate": 2e-05, "loss": 0.05378542, "step": 13042 }, { "epoch": 26.086, "grad_norm": 2.6487820148468018, "learning_rate": 2e-05, "loss": 0.05485958, "step": 13043 }, { "epoch": 26.088, "grad_norm": 1.1026345491409302, "learning_rate": 2e-05, "loss": 0.03617822, "step": 13044 }, { "epoch": 26.09, "grad_norm": 2.1571977138519287, "learning_rate": 2e-05, "loss": 0.05426963, "step": 13045 }, { "epoch": 26.092, "grad_norm": 1.3975282907485962, "learning_rate": 2e-05, "loss": 0.05105602, "step": 13046 }, { "epoch": 26.094, "grad_norm": 1.2141252756118774, "learning_rate": 2e-05, "loss": 0.05173802, "step": 13047 }, { "epoch": 26.096, "grad_norm": 2.8250155448913574, "learning_rate": 2e-05, "loss": 0.06396338, "step": 13048 }, { "epoch": 26.098, "grad_norm": 1.162996530532837, "learning_rate": 2e-05, "loss": 0.05161465, "step": 13049 }, { "epoch": 26.1, "grad_norm": 1.0605719089508057, "learning_rate": 2e-05, "loss": 0.03604612, "step": 13050 }, { "epoch": 26.102, "grad_norm": 1.1841809749603271, "learning_rate": 2e-05, "loss": 0.04377447, "step": 13051 }, { "epoch": 26.104, "grad_norm": 1.2295196056365967, "learning_rate": 2e-05, "loss": 0.05042927, "step": 13052 }, { "epoch": 26.106, "grad_norm": 1.04031503200531, "learning_rate": 2e-05, "loss": 0.02950481, "step": 13053 }, { "epoch": 26.108, "grad_norm": 1.689014196395874, "learning_rate": 2e-05, "loss": 0.05845881, "step": 13054 }, { "epoch": 26.11, "grad_norm": 1.123939871788025, "learning_rate": 2e-05, "loss": 0.04595611, "step": 13055 }, { "epoch": 26.112, "grad_norm": 2.1183228492736816, "learning_rate": 2e-05, "loss": 0.03145133, "step": 13056 }, { "epoch": 26.114, "grad_norm": 1.138588786125183, "learning_rate": 2e-05, "loss": 0.04393515, "step": 13057 }, { "epoch": 26.116, "grad_norm": 1.1965045928955078, "learning_rate": 2e-05, "loss": 0.05045221, "step": 13058 }, { "epoch": 26.118, "grad_norm": 1.3790438175201416, "learning_rate": 2e-05, "loss": 0.05804875, "step": 13059 }, { "epoch": 26.12, "grad_norm": 1.0586308240890503, "learning_rate": 2e-05, "loss": 0.03835727, "step": 13060 }, { "epoch": 26.122, "grad_norm": 0.9596161842346191, "learning_rate": 2e-05, "loss": 0.03627136, "step": 13061 }, { "epoch": 26.124, "grad_norm": 1.7609083652496338, "learning_rate": 2e-05, "loss": 0.04719493, "step": 13062 }, { "epoch": 26.126, "grad_norm": 1.9145840406417847, "learning_rate": 2e-05, "loss": 0.05495848, "step": 13063 }, { "epoch": 26.128, "grad_norm": 3.6397500038146973, "learning_rate": 2e-05, "loss": 0.06928319, "step": 13064 }, { "epoch": 26.13, "grad_norm": 1.7477335929870605, "learning_rate": 2e-05, "loss": 0.06259966, "step": 13065 }, { "epoch": 26.132, "grad_norm": 1.4324450492858887, "learning_rate": 2e-05, "loss": 0.05550072, "step": 13066 }, { "epoch": 26.134, "grad_norm": 1.6972732543945312, "learning_rate": 2e-05, "loss": 0.03786982, "step": 13067 }, { "epoch": 26.136, "grad_norm": 2.0623652935028076, "learning_rate": 2e-05, "loss": 0.0455045, "step": 13068 }, { "epoch": 26.138, "grad_norm": 1.056091547012329, "learning_rate": 2e-05, "loss": 0.03710656, "step": 13069 }, { "epoch": 26.14, "grad_norm": 1.2790498733520508, "learning_rate": 2e-05, "loss": 0.0524893, "step": 13070 }, { "epoch": 26.142, "grad_norm": 1.587206244468689, "learning_rate": 2e-05, "loss": 0.04856799, "step": 13071 }, { "epoch": 26.144, "grad_norm": 1.0538597106933594, "learning_rate": 2e-05, "loss": 0.03798814, "step": 13072 }, { "epoch": 26.146, "grad_norm": 1.031635046005249, "learning_rate": 2e-05, "loss": 0.03815435, "step": 13073 }, { "epoch": 26.148, "grad_norm": 1.4059712886810303, "learning_rate": 2e-05, "loss": 0.0483502, "step": 13074 }, { "epoch": 26.15, "grad_norm": 0.9544400572776794, "learning_rate": 2e-05, "loss": 0.02810543, "step": 13075 }, { "epoch": 26.152, "grad_norm": 1.027392029762268, "learning_rate": 2e-05, "loss": 0.03287507, "step": 13076 }, { "epoch": 26.154, "grad_norm": 1.084064245223999, "learning_rate": 2e-05, "loss": 0.03592873, "step": 13077 }, { "epoch": 26.156, "grad_norm": 1.2711600065231323, "learning_rate": 2e-05, "loss": 0.04065077, "step": 13078 }, { "epoch": 26.158, "grad_norm": 1.0948227643966675, "learning_rate": 2e-05, "loss": 0.03384892, "step": 13079 }, { "epoch": 26.16, "grad_norm": 1.3854420185089111, "learning_rate": 2e-05, "loss": 0.04718272, "step": 13080 }, { "epoch": 26.162, "grad_norm": 1.2978075742721558, "learning_rate": 2e-05, "loss": 0.0398887, "step": 13081 }, { "epoch": 26.164, "grad_norm": 1.2315889596939087, "learning_rate": 2e-05, "loss": 0.03722582, "step": 13082 }, { "epoch": 26.166, "grad_norm": 1.9231492280960083, "learning_rate": 2e-05, "loss": 0.05172444, "step": 13083 }, { "epoch": 26.168, "grad_norm": 1.1194469928741455, "learning_rate": 2e-05, "loss": 0.03749369, "step": 13084 }, { "epoch": 26.17, "grad_norm": 1.6649547815322876, "learning_rate": 2e-05, "loss": 0.05333608, "step": 13085 }, { "epoch": 26.172, "grad_norm": 2.011101245880127, "learning_rate": 2e-05, "loss": 0.04135909, "step": 13086 }, { "epoch": 26.174, "grad_norm": 1.6736245155334473, "learning_rate": 2e-05, "loss": 0.043147, "step": 13087 }, { "epoch": 26.176, "grad_norm": 1.3968009948730469, "learning_rate": 2e-05, "loss": 0.04125686, "step": 13088 }, { "epoch": 26.178, "grad_norm": 1.05874764919281, "learning_rate": 2e-05, "loss": 0.03640444, "step": 13089 }, { "epoch": 26.18, "grad_norm": 1.14805006980896, "learning_rate": 2e-05, "loss": 0.0389007, "step": 13090 }, { "epoch": 26.182, "grad_norm": 1.106278419494629, "learning_rate": 2e-05, "loss": 0.04055985, "step": 13091 }, { "epoch": 26.184, "grad_norm": 2.2514469623565674, "learning_rate": 2e-05, "loss": 0.04158842, "step": 13092 }, { "epoch": 26.186, "grad_norm": 1.33916437625885, "learning_rate": 2e-05, "loss": 0.04522097, "step": 13093 }, { "epoch": 26.188, "grad_norm": 2.1648190021514893, "learning_rate": 2e-05, "loss": 0.04654589, "step": 13094 }, { "epoch": 26.19, "grad_norm": 1.1959937810897827, "learning_rate": 2e-05, "loss": 0.04704513, "step": 13095 }, { "epoch": 26.192, "grad_norm": 1.733241081237793, "learning_rate": 2e-05, "loss": 0.05815672, "step": 13096 }, { "epoch": 26.194, "grad_norm": 0.8504005670547485, "learning_rate": 2e-05, "loss": 0.02225319, "step": 13097 }, { "epoch": 26.196, "grad_norm": 2.0521740913391113, "learning_rate": 2e-05, "loss": 0.05633967, "step": 13098 }, { "epoch": 26.198, "grad_norm": 1.087705373764038, "learning_rate": 2e-05, "loss": 0.04992017, "step": 13099 }, { "epoch": 26.2, "grad_norm": 1.121282696723938, "learning_rate": 2e-05, "loss": 0.03436579, "step": 13100 }, { "epoch": 26.202, "grad_norm": 1.6202627420425415, "learning_rate": 2e-05, "loss": 0.04655264, "step": 13101 }, { "epoch": 26.204, "grad_norm": 1.7769203186035156, "learning_rate": 2e-05, "loss": 0.05040466, "step": 13102 }, { "epoch": 26.206, "grad_norm": 1.3449256420135498, "learning_rate": 2e-05, "loss": 0.0527843, "step": 13103 }, { "epoch": 26.208, "grad_norm": 2.069218397140503, "learning_rate": 2e-05, "loss": 0.05726243, "step": 13104 }, { "epoch": 26.21, "grad_norm": 1.4746137857437134, "learning_rate": 2e-05, "loss": 0.06106601, "step": 13105 }, { "epoch": 26.212, "grad_norm": 1.4619065523147583, "learning_rate": 2e-05, "loss": 0.03955628, "step": 13106 }, { "epoch": 26.214, "grad_norm": 0.9846628308296204, "learning_rate": 2e-05, "loss": 0.03640333, "step": 13107 }, { "epoch": 26.216, "grad_norm": 1.4153640270233154, "learning_rate": 2e-05, "loss": 0.04586072, "step": 13108 }, { "epoch": 26.218, "grad_norm": 1.6593786478042603, "learning_rate": 2e-05, "loss": 0.04747347, "step": 13109 }, { "epoch": 26.22, "grad_norm": 1.1394518613815308, "learning_rate": 2e-05, "loss": 0.04001589, "step": 13110 }, { "epoch": 26.222, "grad_norm": 1.1682400703430176, "learning_rate": 2e-05, "loss": 0.05425475, "step": 13111 }, { "epoch": 26.224, "grad_norm": 1.2645248174667358, "learning_rate": 2e-05, "loss": 0.03757692, "step": 13112 }, { "epoch": 26.226, "grad_norm": 1.0900676250457764, "learning_rate": 2e-05, "loss": 0.03077891, "step": 13113 }, { "epoch": 26.228, "grad_norm": 1.570024013519287, "learning_rate": 2e-05, "loss": 0.04349901, "step": 13114 }, { "epoch": 26.23, "grad_norm": 1.2463085651397705, "learning_rate": 2e-05, "loss": 0.05144939, "step": 13115 }, { "epoch": 26.232, "grad_norm": 2.3603127002716064, "learning_rate": 2e-05, "loss": 0.05303069, "step": 13116 }, { "epoch": 26.234, "grad_norm": 1.4877861738204956, "learning_rate": 2e-05, "loss": 0.04663004, "step": 13117 }, { "epoch": 26.236, "grad_norm": 2.002016544342041, "learning_rate": 2e-05, "loss": 0.02264161, "step": 13118 }, { "epoch": 26.238, "grad_norm": 1.2714250087738037, "learning_rate": 2e-05, "loss": 0.05852698, "step": 13119 }, { "epoch": 26.24, "grad_norm": 1.6923631429672241, "learning_rate": 2e-05, "loss": 0.03670363, "step": 13120 }, { "epoch": 26.242, "grad_norm": 2.9363479614257812, "learning_rate": 2e-05, "loss": 0.05860969, "step": 13121 }, { "epoch": 26.244, "grad_norm": 0.9841324090957642, "learning_rate": 2e-05, "loss": 0.03121854, "step": 13122 }, { "epoch": 26.246, "grad_norm": 1.421994686126709, "learning_rate": 2e-05, "loss": 0.04863666, "step": 13123 }, { "epoch": 26.248, "grad_norm": 1.1231731176376343, "learning_rate": 2e-05, "loss": 0.04469264, "step": 13124 }, { "epoch": 26.25, "grad_norm": 1.8403480052947998, "learning_rate": 2e-05, "loss": 0.0488335, "step": 13125 }, { "epoch": 26.252, "grad_norm": 2.583953619003296, "learning_rate": 2e-05, "loss": 0.04319778, "step": 13126 }, { "epoch": 26.254, "grad_norm": 9.854668617248535, "learning_rate": 2e-05, "loss": 0.08909784, "step": 13127 }, { "epoch": 26.256, "grad_norm": 52.158538818359375, "learning_rate": 2e-05, "loss": 0.10964724, "step": 13128 }, { "epoch": 26.258, "grad_norm": 1.0165956020355225, "learning_rate": 2e-05, "loss": 0.0289845, "step": 13129 }, { "epoch": 26.26, "grad_norm": 18.144968032836914, "learning_rate": 2e-05, "loss": 0.25045502, "step": 13130 }, { "epoch": 26.262, "grad_norm": 4.379713535308838, "learning_rate": 2e-05, "loss": 0.03416047, "step": 13131 }, { "epoch": 26.264, "grad_norm": 25.38080596923828, "learning_rate": 2e-05, "loss": 0.12389896, "step": 13132 }, { "epoch": 26.266, "grad_norm": 1.384544014930725, "learning_rate": 2e-05, "loss": 0.04459827, "step": 13133 }, { "epoch": 26.268, "grad_norm": 1.378836750984192, "learning_rate": 2e-05, "loss": 0.04477947, "step": 13134 }, { "epoch": 26.27, "grad_norm": 2.1596052646636963, "learning_rate": 2e-05, "loss": 0.05260035, "step": 13135 }, { "epoch": 26.272, "grad_norm": 1.0437805652618408, "learning_rate": 2e-05, "loss": 0.03832313, "step": 13136 }, { "epoch": 26.274, "grad_norm": 1.2373123168945312, "learning_rate": 2e-05, "loss": 0.04758352, "step": 13137 }, { "epoch": 26.276, "grad_norm": 1.18471097946167, "learning_rate": 2e-05, "loss": 0.0513614, "step": 13138 }, { "epoch": 26.278, "grad_norm": 1.364261507987976, "learning_rate": 2e-05, "loss": 0.03552637, "step": 13139 }, { "epoch": 26.28, "grad_norm": 1.291282296180725, "learning_rate": 2e-05, "loss": 0.04767456, "step": 13140 }, { "epoch": 26.282, "grad_norm": 2.0756161212921143, "learning_rate": 2e-05, "loss": 0.05183939, "step": 13141 }, { "epoch": 26.284, "grad_norm": 0.7976016402244568, "learning_rate": 2e-05, "loss": 0.02524626, "step": 13142 }, { "epoch": 26.286, "grad_norm": 1.0466339588165283, "learning_rate": 2e-05, "loss": 0.05251211, "step": 13143 }, { "epoch": 26.288, "grad_norm": 1.6141102313995361, "learning_rate": 2e-05, "loss": 0.02955076, "step": 13144 }, { "epoch": 26.29, "grad_norm": 1.5093724727630615, "learning_rate": 2e-05, "loss": 0.05642843, "step": 13145 }, { "epoch": 26.292, "grad_norm": 1.431347131729126, "learning_rate": 2e-05, "loss": 0.04590548, "step": 13146 }, { "epoch": 26.294, "grad_norm": 1.239392876625061, "learning_rate": 2e-05, "loss": 0.04617111, "step": 13147 }, { "epoch": 26.296, "grad_norm": 1.1694923639297485, "learning_rate": 2e-05, "loss": 0.03655789, "step": 13148 }, { "epoch": 26.298, "grad_norm": 1.7410540580749512, "learning_rate": 2e-05, "loss": 0.05551188, "step": 13149 }, { "epoch": 26.3, "grad_norm": 1.2234365940093994, "learning_rate": 2e-05, "loss": 0.03679156, "step": 13150 }, { "epoch": 26.302, "grad_norm": 0.9119114875793457, "learning_rate": 2e-05, "loss": 0.02980712, "step": 13151 }, { "epoch": 26.304, "grad_norm": 2.018742561340332, "learning_rate": 2e-05, "loss": 0.05444905, "step": 13152 }, { "epoch": 26.306, "grad_norm": 1.7591094970703125, "learning_rate": 2e-05, "loss": 0.06758714, "step": 13153 }, { "epoch": 26.308, "grad_norm": 1.300772786140442, "learning_rate": 2e-05, "loss": 0.0449839, "step": 13154 }, { "epoch": 26.31, "grad_norm": 1.3861204385757446, "learning_rate": 2e-05, "loss": 0.04811238, "step": 13155 }, { "epoch": 26.312, "grad_norm": 1.2385214567184448, "learning_rate": 2e-05, "loss": 0.03959964, "step": 13156 }, { "epoch": 26.314, "grad_norm": 1.3340177536010742, "learning_rate": 2e-05, "loss": 0.04750549, "step": 13157 }, { "epoch": 26.316, "grad_norm": 2.2569944858551025, "learning_rate": 2e-05, "loss": 0.05605095, "step": 13158 }, { "epoch": 26.318, "grad_norm": 2.1105170249938965, "learning_rate": 2e-05, "loss": 0.05581967, "step": 13159 }, { "epoch": 26.32, "grad_norm": 1.4911856651306152, "learning_rate": 2e-05, "loss": 0.06944424, "step": 13160 }, { "epoch": 26.322, "grad_norm": 0.8688299655914307, "learning_rate": 2e-05, "loss": 0.01928959, "step": 13161 }, { "epoch": 26.324, "grad_norm": 1.162561297416687, "learning_rate": 2e-05, "loss": 0.03705761, "step": 13162 }, { "epoch": 26.326, "grad_norm": 1.117928385734558, "learning_rate": 2e-05, "loss": 0.04116834, "step": 13163 }, { "epoch": 26.328, "grad_norm": 3.4923346042633057, "learning_rate": 2e-05, "loss": 0.04038919, "step": 13164 }, { "epoch": 26.33, "grad_norm": 1.4581316709518433, "learning_rate": 2e-05, "loss": 0.03603051, "step": 13165 }, { "epoch": 26.332, "grad_norm": 1.2061631679534912, "learning_rate": 2e-05, "loss": 0.03987415, "step": 13166 }, { "epoch": 26.334, "grad_norm": 2.4524641036987305, "learning_rate": 2e-05, "loss": 0.06676086, "step": 13167 }, { "epoch": 26.336, "grad_norm": 1.0566414594650269, "learning_rate": 2e-05, "loss": 0.04725492, "step": 13168 }, { "epoch": 26.338, "grad_norm": 0.9778428077697754, "learning_rate": 2e-05, "loss": 0.03469347, "step": 13169 }, { "epoch": 26.34, "grad_norm": 1.4649147987365723, "learning_rate": 2e-05, "loss": 0.04087276, "step": 13170 }, { "epoch": 26.342, "grad_norm": 1.4261488914489746, "learning_rate": 2e-05, "loss": 0.04543655, "step": 13171 }, { "epoch": 26.344, "grad_norm": 1.2056576013565063, "learning_rate": 2e-05, "loss": 0.04065976, "step": 13172 }, { "epoch": 26.346, "grad_norm": 1.3527207374572754, "learning_rate": 2e-05, "loss": 0.05951834, "step": 13173 }, { "epoch": 26.348, "grad_norm": 1.4392766952514648, "learning_rate": 2e-05, "loss": 0.04477938, "step": 13174 }, { "epoch": 26.35, "grad_norm": 1.0581060647964478, "learning_rate": 2e-05, "loss": 0.04285652, "step": 13175 }, { "epoch": 26.352, "grad_norm": 1.3287581205368042, "learning_rate": 2e-05, "loss": 0.05369517, "step": 13176 }, { "epoch": 26.354, "grad_norm": 1.4318236112594604, "learning_rate": 2e-05, "loss": 0.03889168, "step": 13177 }, { "epoch": 26.356, "grad_norm": 4.168168544769287, "learning_rate": 2e-05, "loss": 0.04014147, "step": 13178 }, { "epoch": 26.358, "grad_norm": 2.5664639472961426, "learning_rate": 2e-05, "loss": 0.0509958, "step": 13179 }, { "epoch": 26.36, "grad_norm": 1.6791688203811646, "learning_rate": 2e-05, "loss": 0.0571983, "step": 13180 }, { "epoch": 26.362, "grad_norm": 1.597809076309204, "learning_rate": 2e-05, "loss": 0.04761202, "step": 13181 }, { "epoch": 26.364, "grad_norm": 1.0649417638778687, "learning_rate": 2e-05, "loss": 0.03972602, "step": 13182 }, { "epoch": 26.366, "grad_norm": 0.9360977411270142, "learning_rate": 2e-05, "loss": 0.03613855, "step": 13183 }, { "epoch": 26.368, "grad_norm": 0.9736573696136475, "learning_rate": 2e-05, "loss": 0.03216821, "step": 13184 }, { "epoch": 26.37, "grad_norm": 1.6165627241134644, "learning_rate": 2e-05, "loss": 0.05862963, "step": 13185 }, { "epoch": 26.372, "grad_norm": 1.3186026811599731, "learning_rate": 2e-05, "loss": 0.04542302, "step": 13186 }, { "epoch": 26.374, "grad_norm": 1.6366772651672363, "learning_rate": 2e-05, "loss": 0.04529631, "step": 13187 }, { "epoch": 26.376, "grad_norm": 1.0712040662765503, "learning_rate": 2e-05, "loss": 0.03381637, "step": 13188 }, { "epoch": 26.378, "grad_norm": 1.1308493614196777, "learning_rate": 2e-05, "loss": 0.04229905, "step": 13189 }, { "epoch": 26.38, "grad_norm": 1.3662320375442505, "learning_rate": 2e-05, "loss": 0.05177809, "step": 13190 }, { "epoch": 26.382, "grad_norm": 1.9889822006225586, "learning_rate": 2e-05, "loss": 0.07416862, "step": 13191 }, { "epoch": 26.384, "grad_norm": 1.464272379875183, "learning_rate": 2e-05, "loss": 0.04299095, "step": 13192 }, { "epoch": 26.386, "grad_norm": 1.1341749429702759, "learning_rate": 2e-05, "loss": 0.03406891, "step": 13193 }, { "epoch": 26.388, "grad_norm": 1.3196316957473755, "learning_rate": 2e-05, "loss": 0.05596439, "step": 13194 }, { "epoch": 26.39, "grad_norm": 1.1319140195846558, "learning_rate": 2e-05, "loss": 0.03612085, "step": 13195 }, { "epoch": 26.392, "grad_norm": 0.9723718762397766, "learning_rate": 2e-05, "loss": 0.03945912, "step": 13196 }, { "epoch": 26.394, "grad_norm": 0.8996081352233887, "learning_rate": 2e-05, "loss": 0.03478167, "step": 13197 }, { "epoch": 26.396, "grad_norm": 1.8404150009155273, "learning_rate": 2e-05, "loss": 0.04371741, "step": 13198 }, { "epoch": 26.398, "grad_norm": 2.319556713104248, "learning_rate": 2e-05, "loss": 0.05231041, "step": 13199 }, { "epoch": 26.4, "grad_norm": 2.335930824279785, "learning_rate": 2e-05, "loss": 0.0324939, "step": 13200 }, { "epoch": 26.402, "grad_norm": 1.6622105836868286, "learning_rate": 2e-05, "loss": 0.02796814, "step": 13201 }, { "epoch": 26.404, "grad_norm": 2.356870651245117, "learning_rate": 2e-05, "loss": 0.04440472, "step": 13202 }, { "epoch": 26.406, "grad_norm": 1.4844690561294556, "learning_rate": 2e-05, "loss": 0.05918201, "step": 13203 }, { "epoch": 26.408, "grad_norm": 1.3682526350021362, "learning_rate": 2e-05, "loss": 0.064179, "step": 13204 }, { "epoch": 26.41, "grad_norm": 1.0957056283950806, "learning_rate": 2e-05, "loss": 0.046661, "step": 13205 }, { "epoch": 26.412, "grad_norm": 1.2934621572494507, "learning_rate": 2e-05, "loss": 0.04822002, "step": 13206 }, { "epoch": 26.414, "grad_norm": 1.6465322971343994, "learning_rate": 2e-05, "loss": 0.05369683, "step": 13207 }, { "epoch": 26.416, "grad_norm": 1.0616811513900757, "learning_rate": 2e-05, "loss": 0.04072348, "step": 13208 }, { "epoch": 26.418, "grad_norm": 4.23490047454834, "learning_rate": 2e-05, "loss": 0.05251167, "step": 13209 }, { "epoch": 26.42, "grad_norm": 1.170454978942871, "learning_rate": 2e-05, "loss": 0.03898567, "step": 13210 }, { "epoch": 26.422, "grad_norm": 1.7174979448318481, "learning_rate": 2e-05, "loss": 0.04406247, "step": 13211 }, { "epoch": 26.424, "grad_norm": 1.1600885391235352, "learning_rate": 2e-05, "loss": 0.05435538, "step": 13212 }, { "epoch": 26.426, "grad_norm": 2.2355101108551025, "learning_rate": 2e-05, "loss": 0.05025771, "step": 13213 }, { "epoch": 26.428, "grad_norm": 1.322678804397583, "learning_rate": 2e-05, "loss": 0.04307056, "step": 13214 }, { "epoch": 26.43, "grad_norm": 1.323901891708374, "learning_rate": 2e-05, "loss": 0.04015271, "step": 13215 }, { "epoch": 26.432, "grad_norm": 1.8236982822418213, "learning_rate": 2e-05, "loss": 0.04085752, "step": 13216 }, { "epoch": 26.434, "grad_norm": 1.3549261093139648, "learning_rate": 2e-05, "loss": 0.04854722, "step": 13217 }, { "epoch": 26.436, "grad_norm": 1.7481671571731567, "learning_rate": 2e-05, "loss": 0.04112818, "step": 13218 }, { "epoch": 26.438, "grad_norm": 1.3064723014831543, "learning_rate": 2e-05, "loss": 0.05516483, "step": 13219 }, { "epoch": 26.44, "grad_norm": 3.367008686065674, "learning_rate": 2e-05, "loss": 0.05630918, "step": 13220 }, { "epoch": 26.442, "grad_norm": 1.8773056268692017, "learning_rate": 2e-05, "loss": 0.06338409, "step": 13221 }, { "epoch": 26.444, "grad_norm": 1.168461561203003, "learning_rate": 2e-05, "loss": 0.04579566, "step": 13222 }, { "epoch": 26.446, "grad_norm": 1.4216398000717163, "learning_rate": 2e-05, "loss": 0.03986656, "step": 13223 }, { "epoch": 26.448, "grad_norm": 0.9925347566604614, "learning_rate": 2e-05, "loss": 0.03974314, "step": 13224 }, { "epoch": 26.45, "grad_norm": 1.569463849067688, "learning_rate": 2e-05, "loss": 0.0388345, "step": 13225 }, { "epoch": 26.452, "grad_norm": 1.6125260591506958, "learning_rate": 2e-05, "loss": 0.0357227, "step": 13226 }, { "epoch": 26.454, "grad_norm": 1.2239407300949097, "learning_rate": 2e-05, "loss": 0.04946052, "step": 13227 }, { "epoch": 26.456, "grad_norm": 0.8565706014633179, "learning_rate": 2e-05, "loss": 0.03200175, "step": 13228 }, { "epoch": 26.458, "grad_norm": 1.0081415176391602, "learning_rate": 2e-05, "loss": 0.0341515, "step": 13229 }, { "epoch": 26.46, "grad_norm": 1.0593496561050415, "learning_rate": 2e-05, "loss": 0.03936413, "step": 13230 }, { "epoch": 26.462, "grad_norm": 1.1382449865341187, "learning_rate": 2e-05, "loss": 0.02872925, "step": 13231 }, { "epoch": 26.464, "grad_norm": 1.4055805206298828, "learning_rate": 2e-05, "loss": 0.03323968, "step": 13232 }, { "epoch": 26.466, "grad_norm": 1.23487389087677, "learning_rate": 2e-05, "loss": 0.05444343, "step": 13233 }, { "epoch": 26.468, "grad_norm": 1.100627064704895, "learning_rate": 2e-05, "loss": 0.04639729, "step": 13234 }, { "epoch": 26.47, "grad_norm": 1.6120901107788086, "learning_rate": 2e-05, "loss": 0.05141643, "step": 13235 }, { "epoch": 26.472, "grad_norm": 1.5615583658218384, "learning_rate": 2e-05, "loss": 0.04910383, "step": 13236 }, { "epoch": 26.474, "grad_norm": 1.7524781227111816, "learning_rate": 2e-05, "loss": 0.0517026, "step": 13237 }, { "epoch": 26.476, "grad_norm": 1.4189718961715698, "learning_rate": 2e-05, "loss": 0.03197357, "step": 13238 }, { "epoch": 26.478, "grad_norm": 1.2002609968185425, "learning_rate": 2e-05, "loss": 0.04137202, "step": 13239 }, { "epoch": 26.48, "grad_norm": 1.5820086002349854, "learning_rate": 2e-05, "loss": 0.05161813, "step": 13240 }, { "epoch": 26.482, "grad_norm": 2.00372052192688, "learning_rate": 2e-05, "loss": 0.05241256, "step": 13241 }, { "epoch": 26.484, "grad_norm": 1.582104206085205, "learning_rate": 2e-05, "loss": 0.05075792, "step": 13242 }, { "epoch": 26.486, "grad_norm": 1.3096644878387451, "learning_rate": 2e-05, "loss": 0.04678864, "step": 13243 }, { "epoch": 26.488, "grad_norm": 1.2743966579437256, "learning_rate": 2e-05, "loss": 0.05511482, "step": 13244 }, { "epoch": 26.49, "grad_norm": 0.9095728397369385, "learning_rate": 2e-05, "loss": 0.0362159, "step": 13245 }, { "epoch": 26.492, "grad_norm": 2.057610511779785, "learning_rate": 2e-05, "loss": 0.04670215, "step": 13246 }, { "epoch": 26.494, "grad_norm": 1.7796850204467773, "learning_rate": 2e-05, "loss": 0.0543985, "step": 13247 }, { "epoch": 26.496, "grad_norm": 2.0312533378601074, "learning_rate": 2e-05, "loss": 0.03435779, "step": 13248 }, { "epoch": 26.498, "grad_norm": 0.8784269690513611, "learning_rate": 2e-05, "loss": 0.02505475, "step": 13249 }, { "epoch": 26.5, "grad_norm": 1.3337388038635254, "learning_rate": 2e-05, "loss": 0.03425723, "step": 13250 }, { "epoch": 26.502, "grad_norm": 1.1322087049484253, "learning_rate": 2e-05, "loss": 0.0380166, "step": 13251 }, { "epoch": 26.504, "grad_norm": 1.4790630340576172, "learning_rate": 2e-05, "loss": 0.05530652, "step": 13252 }, { "epoch": 26.506, "grad_norm": 1.203283429145813, "learning_rate": 2e-05, "loss": 0.04369481, "step": 13253 }, { "epoch": 26.508, "grad_norm": 0.9502495527267456, "learning_rate": 2e-05, "loss": 0.03597327, "step": 13254 }, { "epoch": 26.51, "grad_norm": 1.1296265125274658, "learning_rate": 2e-05, "loss": 0.03838349, "step": 13255 }, { "epoch": 26.512, "grad_norm": 1.4536200761795044, "learning_rate": 2e-05, "loss": 0.06651304, "step": 13256 }, { "epoch": 26.514, "grad_norm": 1.022311806678772, "learning_rate": 2e-05, "loss": 0.03070169, "step": 13257 }, { "epoch": 26.516, "grad_norm": 1.7230098247528076, "learning_rate": 2e-05, "loss": 0.05207474, "step": 13258 }, { "epoch": 26.518, "grad_norm": 1.0804290771484375, "learning_rate": 2e-05, "loss": 0.03778263, "step": 13259 }, { "epoch": 26.52, "grad_norm": 0.8944292068481445, "learning_rate": 2e-05, "loss": 0.02704319, "step": 13260 }, { "epoch": 26.522, "grad_norm": 1.2151002883911133, "learning_rate": 2e-05, "loss": 0.03234539, "step": 13261 }, { "epoch": 26.524, "grad_norm": 1.4448111057281494, "learning_rate": 2e-05, "loss": 0.04379795, "step": 13262 }, { "epoch": 26.526, "grad_norm": 3.976080894470215, "learning_rate": 2e-05, "loss": 0.04685978, "step": 13263 }, { "epoch": 26.528, "grad_norm": 1.7629016637802124, "learning_rate": 2e-05, "loss": 0.0644666, "step": 13264 }, { "epoch": 26.53, "grad_norm": 1.4677865505218506, "learning_rate": 2e-05, "loss": 0.0506825, "step": 13265 }, { "epoch": 26.532, "grad_norm": 0.9562329649925232, "learning_rate": 2e-05, "loss": 0.03811152, "step": 13266 }, { "epoch": 26.534, "grad_norm": 2.1469244956970215, "learning_rate": 2e-05, "loss": 0.04386814, "step": 13267 }, { "epoch": 26.536, "grad_norm": 1.172168254852295, "learning_rate": 2e-05, "loss": 0.03443911, "step": 13268 }, { "epoch": 26.538, "grad_norm": 1.5751445293426514, "learning_rate": 2e-05, "loss": 0.05732818, "step": 13269 }, { "epoch": 26.54, "grad_norm": 0.9219292402267456, "learning_rate": 2e-05, "loss": 0.02910371, "step": 13270 }, { "epoch": 26.542, "grad_norm": 1.7685173749923706, "learning_rate": 2e-05, "loss": 0.05187953, "step": 13271 }, { "epoch": 26.544, "grad_norm": 1.689509630203247, "learning_rate": 2e-05, "loss": 0.05508012, "step": 13272 }, { "epoch": 26.546, "grad_norm": 1.1748220920562744, "learning_rate": 2e-05, "loss": 0.03818913, "step": 13273 }, { "epoch": 26.548000000000002, "grad_norm": 2.35014009475708, "learning_rate": 2e-05, "loss": 0.05988345, "step": 13274 }, { "epoch": 26.55, "grad_norm": 0.9859991669654846, "learning_rate": 2e-05, "loss": 0.02704637, "step": 13275 }, { "epoch": 26.552, "grad_norm": 1.1411195993423462, "learning_rate": 2e-05, "loss": 0.03679326, "step": 13276 }, { "epoch": 26.554, "grad_norm": 2.2478442192077637, "learning_rate": 2e-05, "loss": 0.05884918, "step": 13277 }, { "epoch": 26.556, "grad_norm": 1.2798776626586914, "learning_rate": 2e-05, "loss": 0.03827868, "step": 13278 }, { "epoch": 26.558, "grad_norm": 1.3902195692062378, "learning_rate": 2e-05, "loss": 0.04380446, "step": 13279 }, { "epoch": 26.56, "grad_norm": 1.3358991146087646, "learning_rate": 2e-05, "loss": 0.04080527, "step": 13280 }, { "epoch": 26.562, "grad_norm": 1.769484043121338, "learning_rate": 2e-05, "loss": 0.06003635, "step": 13281 }, { "epoch": 26.564, "grad_norm": 1.0746612548828125, "learning_rate": 2e-05, "loss": 0.02794596, "step": 13282 }, { "epoch": 26.566, "grad_norm": 1.3699100017547607, "learning_rate": 2e-05, "loss": 0.03206124, "step": 13283 }, { "epoch": 26.568, "grad_norm": 1.0902633666992188, "learning_rate": 2e-05, "loss": 0.0326319, "step": 13284 }, { "epoch": 26.57, "grad_norm": 1.6095848083496094, "learning_rate": 2e-05, "loss": 0.05489502, "step": 13285 }, { "epoch": 26.572, "grad_norm": 1.2423216104507446, "learning_rate": 2e-05, "loss": 0.03849361, "step": 13286 }, { "epoch": 26.574, "grad_norm": 1.2105095386505127, "learning_rate": 2e-05, "loss": 0.0360062, "step": 13287 }, { "epoch": 26.576, "grad_norm": 1.267501950263977, "learning_rate": 2e-05, "loss": 0.04801185, "step": 13288 }, { "epoch": 26.578, "grad_norm": 2.0049567222595215, "learning_rate": 2e-05, "loss": 0.03750632, "step": 13289 }, { "epoch": 26.58, "grad_norm": 2.5399057865142822, "learning_rate": 2e-05, "loss": 0.0422065, "step": 13290 }, { "epoch": 26.582, "grad_norm": 1.4443379640579224, "learning_rate": 2e-05, "loss": 0.03480319, "step": 13291 }, { "epoch": 26.584, "grad_norm": 1.135719656944275, "learning_rate": 2e-05, "loss": 0.03749589, "step": 13292 }, { "epoch": 26.586, "grad_norm": 1.231929063796997, "learning_rate": 2e-05, "loss": 0.04628847, "step": 13293 }, { "epoch": 26.588, "grad_norm": 1.4225656986236572, "learning_rate": 2e-05, "loss": 0.05674511, "step": 13294 }, { "epoch": 26.59, "grad_norm": 1.0678924322128296, "learning_rate": 2e-05, "loss": 0.03472822, "step": 13295 }, { "epoch": 26.592, "grad_norm": 1.3472518920898438, "learning_rate": 2e-05, "loss": 0.04637545, "step": 13296 }, { "epoch": 26.594, "grad_norm": 1.0952340364456177, "learning_rate": 2e-05, "loss": 0.0380681, "step": 13297 }, { "epoch": 26.596, "grad_norm": 2.9363811016082764, "learning_rate": 2e-05, "loss": 0.05129644, "step": 13298 }, { "epoch": 26.598, "grad_norm": 1.1145724058151245, "learning_rate": 2e-05, "loss": 0.02599895, "step": 13299 }, { "epoch": 26.6, "grad_norm": 0.9582138061523438, "learning_rate": 2e-05, "loss": 0.03655213, "step": 13300 }, { "epoch": 26.602, "grad_norm": 0.9235168099403381, "learning_rate": 2e-05, "loss": 0.03182671, "step": 13301 }, { "epoch": 26.604, "grad_norm": 1.1051247119903564, "learning_rate": 2e-05, "loss": 0.03536615, "step": 13302 }, { "epoch": 26.606, "grad_norm": 1.4593842029571533, "learning_rate": 2e-05, "loss": 0.06312197, "step": 13303 }, { "epoch": 26.608, "grad_norm": 1.04954993724823, "learning_rate": 2e-05, "loss": 0.04040704, "step": 13304 }, { "epoch": 26.61, "grad_norm": 1.3381813764572144, "learning_rate": 2e-05, "loss": 0.04579471, "step": 13305 }, { "epoch": 26.612, "grad_norm": 1.8087036609649658, "learning_rate": 2e-05, "loss": 0.05931551, "step": 13306 }, { "epoch": 26.614, "grad_norm": 1.8034483194351196, "learning_rate": 2e-05, "loss": 0.05207924, "step": 13307 }, { "epoch": 26.616, "grad_norm": 1.1185029745101929, "learning_rate": 2e-05, "loss": 0.03802615, "step": 13308 }, { "epoch": 26.618, "grad_norm": 2.3116369247436523, "learning_rate": 2e-05, "loss": 0.05495331, "step": 13309 }, { "epoch": 26.62, "grad_norm": 1.7399510145187378, "learning_rate": 2e-05, "loss": 0.04346514, "step": 13310 }, { "epoch": 26.622, "grad_norm": 1.2430211305618286, "learning_rate": 2e-05, "loss": 0.03976776, "step": 13311 }, { "epoch": 26.624, "grad_norm": 1.2265691757202148, "learning_rate": 2e-05, "loss": 0.03571905, "step": 13312 }, { "epoch": 26.626, "grad_norm": 3.572852611541748, "learning_rate": 2e-05, "loss": 0.06186254, "step": 13313 }, { "epoch": 26.628, "grad_norm": 1.096924066543579, "learning_rate": 2e-05, "loss": 0.03906501, "step": 13314 }, { "epoch": 26.63, "grad_norm": 2.9369330406188965, "learning_rate": 2e-05, "loss": 0.05661511, "step": 13315 }, { "epoch": 26.632, "grad_norm": 1.467205286026001, "learning_rate": 2e-05, "loss": 0.02951013, "step": 13316 }, { "epoch": 26.634, "grad_norm": 1.185747742652893, "learning_rate": 2e-05, "loss": 0.03714722, "step": 13317 }, { "epoch": 26.636, "grad_norm": 2.264068126678467, "learning_rate": 2e-05, "loss": 0.0546186, "step": 13318 }, { "epoch": 26.638, "grad_norm": 1.3490537405014038, "learning_rate": 2e-05, "loss": 0.03904513, "step": 13319 }, { "epoch": 26.64, "grad_norm": 2.2553672790527344, "learning_rate": 2e-05, "loss": 0.04899627, "step": 13320 }, { "epoch": 26.642, "grad_norm": 1.3507367372512817, "learning_rate": 2e-05, "loss": 0.04580309, "step": 13321 }, { "epoch": 26.644, "grad_norm": 1.3584458827972412, "learning_rate": 2e-05, "loss": 0.04628047, "step": 13322 }, { "epoch": 26.646, "grad_norm": 1.6204533576965332, "learning_rate": 2e-05, "loss": 0.05554104, "step": 13323 }, { "epoch": 26.648, "grad_norm": 2.109013319015503, "learning_rate": 2e-05, "loss": 0.05139774, "step": 13324 }, { "epoch": 26.65, "grad_norm": 1.177474021911621, "learning_rate": 2e-05, "loss": 0.04520677, "step": 13325 }, { "epoch": 26.652, "grad_norm": 1.265397071838379, "learning_rate": 2e-05, "loss": 0.0448864, "step": 13326 }, { "epoch": 26.654, "grad_norm": 0.9993850588798523, "learning_rate": 2e-05, "loss": 0.04312583, "step": 13327 }, { "epoch": 26.656, "grad_norm": 2.175631523132324, "learning_rate": 2e-05, "loss": 0.04794764, "step": 13328 }, { "epoch": 26.658, "grad_norm": 1.602054476737976, "learning_rate": 2e-05, "loss": 0.06128681, "step": 13329 }, { "epoch": 26.66, "grad_norm": 1.4241081476211548, "learning_rate": 2e-05, "loss": 0.04915302, "step": 13330 }, { "epoch": 26.662, "grad_norm": 1.6712620258331299, "learning_rate": 2e-05, "loss": 0.0512425, "step": 13331 }, { "epoch": 26.664, "grad_norm": 2.4270451068878174, "learning_rate": 2e-05, "loss": 0.07092887, "step": 13332 }, { "epoch": 26.666, "grad_norm": 1.007644772529602, "learning_rate": 2e-05, "loss": 0.03094992, "step": 13333 }, { "epoch": 26.668, "grad_norm": 1.4786583185195923, "learning_rate": 2e-05, "loss": 0.05066321, "step": 13334 }, { "epoch": 26.67, "grad_norm": 1.4783406257629395, "learning_rate": 2e-05, "loss": 0.04284883, "step": 13335 }, { "epoch": 26.672, "grad_norm": 2.875722885131836, "learning_rate": 2e-05, "loss": 0.05276472, "step": 13336 }, { "epoch": 26.674, "grad_norm": 1.0497190952301025, "learning_rate": 2e-05, "loss": 0.03627689, "step": 13337 }, { "epoch": 26.676, "grad_norm": 1.1127156019210815, "learning_rate": 2e-05, "loss": 0.03644489, "step": 13338 }, { "epoch": 26.678, "grad_norm": 1.5005398988723755, "learning_rate": 2e-05, "loss": 0.0399859, "step": 13339 }, { "epoch": 26.68, "grad_norm": 1.4886964559555054, "learning_rate": 2e-05, "loss": 0.05113001, "step": 13340 }, { "epoch": 26.682, "grad_norm": 1.1941289901733398, "learning_rate": 2e-05, "loss": 0.04594672, "step": 13341 }, { "epoch": 26.684, "grad_norm": 1.4600493907928467, "learning_rate": 2e-05, "loss": 0.05219441, "step": 13342 }, { "epoch": 26.686, "grad_norm": 1.2766631841659546, "learning_rate": 2e-05, "loss": 0.05881347, "step": 13343 }, { "epoch": 26.688, "grad_norm": 1.2152045965194702, "learning_rate": 2e-05, "loss": 0.02941299, "step": 13344 }, { "epoch": 26.69, "grad_norm": 1.2594579458236694, "learning_rate": 2e-05, "loss": 0.04704573, "step": 13345 }, { "epoch": 26.692, "grad_norm": 1.4651315212249756, "learning_rate": 2e-05, "loss": 0.04764215, "step": 13346 }, { "epoch": 26.694, "grad_norm": 1.5274326801300049, "learning_rate": 2e-05, "loss": 0.06374133, "step": 13347 }, { "epoch": 26.696, "grad_norm": 1.9418085813522339, "learning_rate": 2e-05, "loss": 0.06519874, "step": 13348 }, { "epoch": 26.698, "grad_norm": 1.6129246950149536, "learning_rate": 2e-05, "loss": 0.03720499, "step": 13349 }, { "epoch": 26.7, "grad_norm": 0.7866122126579285, "learning_rate": 2e-05, "loss": 0.02468162, "step": 13350 }, { "epoch": 26.701999999999998, "grad_norm": 1.3163105249404907, "learning_rate": 2e-05, "loss": 0.05013556, "step": 13351 }, { "epoch": 26.704, "grad_norm": 1.4515641927719116, "learning_rate": 2e-05, "loss": 0.04763116, "step": 13352 }, { "epoch": 26.706, "grad_norm": 1.3559627532958984, "learning_rate": 2e-05, "loss": 0.04021575, "step": 13353 }, { "epoch": 26.708, "grad_norm": 2.714416742324829, "learning_rate": 2e-05, "loss": 0.05345739, "step": 13354 }, { "epoch": 26.71, "grad_norm": 2.2046430110931396, "learning_rate": 2e-05, "loss": 0.04232349, "step": 13355 }, { "epoch": 26.712, "grad_norm": 1.0346970558166504, "learning_rate": 2e-05, "loss": 0.03661943, "step": 13356 }, { "epoch": 26.714, "grad_norm": 1.037308692932129, "learning_rate": 2e-05, "loss": 0.0360332, "step": 13357 }, { "epoch": 26.716, "grad_norm": 1.399788498878479, "learning_rate": 2e-05, "loss": 0.04450074, "step": 13358 }, { "epoch": 26.718, "grad_norm": 1.1871050596237183, "learning_rate": 2e-05, "loss": 0.04489849, "step": 13359 }, { "epoch": 26.72, "grad_norm": 1.4322409629821777, "learning_rate": 2e-05, "loss": 0.06084087, "step": 13360 }, { "epoch": 26.722, "grad_norm": 1.2092519998550415, "learning_rate": 2e-05, "loss": 0.04185452, "step": 13361 }, { "epoch": 26.724, "grad_norm": 1.5179123878479004, "learning_rate": 2e-05, "loss": 0.05089298, "step": 13362 }, { "epoch": 26.726, "grad_norm": 1.2479588985443115, "learning_rate": 2e-05, "loss": 0.04334253, "step": 13363 }, { "epoch": 26.728, "grad_norm": 1.2945444583892822, "learning_rate": 2e-05, "loss": 0.03954721, "step": 13364 }, { "epoch": 26.73, "grad_norm": 1.0343196392059326, "learning_rate": 2e-05, "loss": 0.03208121, "step": 13365 }, { "epoch": 26.732, "grad_norm": 1.5581246614456177, "learning_rate": 2e-05, "loss": 0.05093454, "step": 13366 }, { "epoch": 26.734, "grad_norm": 1.2444391250610352, "learning_rate": 2e-05, "loss": 0.04247945, "step": 13367 }, { "epoch": 26.736, "grad_norm": 0.9851118326187134, "learning_rate": 2e-05, "loss": 0.03192877, "step": 13368 }, { "epoch": 26.738, "grad_norm": 1.6827157735824585, "learning_rate": 2e-05, "loss": 0.04289787, "step": 13369 }, { "epoch": 26.74, "grad_norm": 1.6345899105072021, "learning_rate": 2e-05, "loss": 0.05687879, "step": 13370 }, { "epoch": 26.742, "grad_norm": 1.2550289630889893, "learning_rate": 2e-05, "loss": 0.04343875, "step": 13371 }, { "epoch": 26.744, "grad_norm": 1.3297144174575806, "learning_rate": 2e-05, "loss": 0.03608532, "step": 13372 }, { "epoch": 26.746, "grad_norm": 1.9059828519821167, "learning_rate": 2e-05, "loss": 0.06531657, "step": 13373 }, { "epoch": 26.748, "grad_norm": 1.4299732446670532, "learning_rate": 2e-05, "loss": 0.05242453, "step": 13374 }, { "epoch": 26.75, "grad_norm": 1.1574034690856934, "learning_rate": 2e-05, "loss": 0.0402981, "step": 13375 }, { "epoch": 26.752, "grad_norm": 2.168186902999878, "learning_rate": 2e-05, "loss": 0.052399, "step": 13376 }, { "epoch": 26.754, "grad_norm": 1.0010058879852295, "learning_rate": 2e-05, "loss": 0.03435899, "step": 13377 }, { "epoch": 26.756, "grad_norm": 1.2347971200942993, "learning_rate": 2e-05, "loss": 0.04020776, "step": 13378 }, { "epoch": 26.758, "grad_norm": 1.1826847791671753, "learning_rate": 2e-05, "loss": 0.03945916, "step": 13379 }, { "epoch": 26.76, "grad_norm": 1.6033371686935425, "learning_rate": 2e-05, "loss": 0.0463616, "step": 13380 }, { "epoch": 26.762, "grad_norm": 1.1534563302993774, "learning_rate": 2e-05, "loss": 0.04242963, "step": 13381 }, { "epoch": 26.764, "grad_norm": 1.3048988580703735, "learning_rate": 2e-05, "loss": 0.05592864, "step": 13382 }, { "epoch": 26.766, "grad_norm": 1.2142225503921509, "learning_rate": 2e-05, "loss": 0.04550473, "step": 13383 }, { "epoch": 26.768, "grad_norm": 1.946568489074707, "learning_rate": 2e-05, "loss": 0.06550965, "step": 13384 }, { "epoch": 26.77, "grad_norm": 1.2773605585098267, "learning_rate": 2e-05, "loss": 0.06554952, "step": 13385 }, { "epoch": 26.772, "grad_norm": 1.2231202125549316, "learning_rate": 2e-05, "loss": 0.03776004, "step": 13386 }, { "epoch": 26.774, "grad_norm": 1.1051039695739746, "learning_rate": 2e-05, "loss": 0.02784972, "step": 13387 }, { "epoch": 26.776, "grad_norm": 1.1587811708450317, "learning_rate": 2e-05, "loss": 0.03560489, "step": 13388 }, { "epoch": 26.778, "grad_norm": 1.0926035642623901, "learning_rate": 2e-05, "loss": 0.03061144, "step": 13389 }, { "epoch": 26.78, "grad_norm": 0.9534873962402344, "learning_rate": 2e-05, "loss": 0.03757609, "step": 13390 }, { "epoch": 26.782, "grad_norm": 1.3637419939041138, "learning_rate": 2e-05, "loss": 0.04305605, "step": 13391 }, { "epoch": 26.784, "grad_norm": 1.3764069080352783, "learning_rate": 2e-05, "loss": 0.0500427, "step": 13392 }, { "epoch": 26.786, "grad_norm": 1.036009430885315, "learning_rate": 2e-05, "loss": 0.04027718, "step": 13393 }, { "epoch": 26.788, "grad_norm": 1.7560526132583618, "learning_rate": 2e-05, "loss": 0.06162121, "step": 13394 }, { "epoch": 26.79, "grad_norm": 1.822432279586792, "learning_rate": 2e-05, "loss": 0.04765713, "step": 13395 }, { "epoch": 26.792, "grad_norm": 1.488826036453247, "learning_rate": 2e-05, "loss": 0.03322754, "step": 13396 }, { "epoch": 26.794, "grad_norm": 1.4484844207763672, "learning_rate": 2e-05, "loss": 0.04716588, "step": 13397 }, { "epoch": 26.796, "grad_norm": 1.1175645589828491, "learning_rate": 2e-05, "loss": 0.03686436, "step": 13398 }, { "epoch": 26.798000000000002, "grad_norm": 1.6677278280258179, "learning_rate": 2e-05, "loss": 0.03573879, "step": 13399 }, { "epoch": 26.8, "grad_norm": 1.8486576080322266, "learning_rate": 2e-05, "loss": 0.05908503, "step": 13400 }, { "epoch": 26.802, "grad_norm": 2.1419644355773926, "learning_rate": 2e-05, "loss": 0.04133147, "step": 13401 }, { "epoch": 26.804, "grad_norm": 1.5237059593200684, "learning_rate": 2e-05, "loss": 0.04093695, "step": 13402 }, { "epoch": 26.806, "grad_norm": 0.9631497263908386, "learning_rate": 2e-05, "loss": 0.03032106, "step": 13403 }, { "epoch": 26.808, "grad_norm": 1.6903769969940186, "learning_rate": 2e-05, "loss": 0.03851802, "step": 13404 }, { "epoch": 26.81, "grad_norm": 2.2639379501342773, "learning_rate": 2e-05, "loss": 0.0339236, "step": 13405 }, { "epoch": 26.812, "grad_norm": 1.8234732151031494, "learning_rate": 2e-05, "loss": 0.04727671, "step": 13406 }, { "epoch": 26.814, "grad_norm": 1.174176573753357, "learning_rate": 2e-05, "loss": 0.04155207, "step": 13407 }, { "epoch": 26.816, "grad_norm": 1.388486385345459, "learning_rate": 2e-05, "loss": 0.03067862, "step": 13408 }, { "epoch": 26.818, "grad_norm": 1.2704886198043823, "learning_rate": 2e-05, "loss": 0.02589917, "step": 13409 }, { "epoch": 26.82, "grad_norm": 1.151698350906372, "learning_rate": 2e-05, "loss": 0.05481917, "step": 13410 }, { "epoch": 26.822, "grad_norm": 1.2374764680862427, "learning_rate": 2e-05, "loss": 0.04253447, "step": 13411 }, { "epoch": 26.824, "grad_norm": 1.3624358177185059, "learning_rate": 2e-05, "loss": 0.04338268, "step": 13412 }, { "epoch": 26.826, "grad_norm": 1.1399511098861694, "learning_rate": 2e-05, "loss": 0.04491112, "step": 13413 }, { "epoch": 26.828, "grad_norm": 1.8094592094421387, "learning_rate": 2e-05, "loss": 0.04950622, "step": 13414 }, { "epoch": 26.83, "grad_norm": 1.5777703523635864, "learning_rate": 2e-05, "loss": 0.04463634, "step": 13415 }, { "epoch": 26.832, "grad_norm": 1.1766401529312134, "learning_rate": 2e-05, "loss": 0.04658622, "step": 13416 }, { "epoch": 26.834, "grad_norm": 1.3368674516677856, "learning_rate": 2e-05, "loss": 0.04229524, "step": 13417 }, { "epoch": 26.836, "grad_norm": 1.3476362228393555, "learning_rate": 2e-05, "loss": 0.03923023, "step": 13418 }, { "epoch": 26.838, "grad_norm": 1.4750795364379883, "learning_rate": 2e-05, "loss": 0.06064068, "step": 13419 }, { "epoch": 26.84, "grad_norm": 1.1680598258972168, "learning_rate": 2e-05, "loss": 0.04480653, "step": 13420 }, { "epoch": 26.842, "grad_norm": 2.2910873889923096, "learning_rate": 2e-05, "loss": 0.06019218, "step": 13421 }, { "epoch": 26.844, "grad_norm": 1.3044073581695557, "learning_rate": 2e-05, "loss": 0.06455293, "step": 13422 }, { "epoch": 26.846, "grad_norm": 1.560138463973999, "learning_rate": 2e-05, "loss": 0.06546596, "step": 13423 }, { "epoch": 26.848, "grad_norm": 1.6893231868743896, "learning_rate": 2e-05, "loss": 0.0381937, "step": 13424 }, { "epoch": 26.85, "grad_norm": 1.344321846961975, "learning_rate": 2e-05, "loss": 0.05524303, "step": 13425 }, { "epoch": 26.852, "grad_norm": 1.0075470209121704, "learning_rate": 2e-05, "loss": 0.03442932, "step": 13426 }, { "epoch": 26.854, "grad_norm": 1.832734227180481, "learning_rate": 2e-05, "loss": 0.06405198, "step": 13427 }, { "epoch": 26.856, "grad_norm": 0.9961326122283936, "learning_rate": 2e-05, "loss": 0.02738804, "step": 13428 }, { "epoch": 26.858, "grad_norm": 0.9834625124931335, "learning_rate": 2e-05, "loss": 0.03841186, "step": 13429 }, { "epoch": 26.86, "grad_norm": 1.1662216186523438, "learning_rate": 2e-05, "loss": 0.04747712, "step": 13430 }, { "epoch": 26.862, "grad_norm": 1.8224124908447266, "learning_rate": 2e-05, "loss": 0.038079, "step": 13431 }, { "epoch": 26.864, "grad_norm": 1.2939532995224, "learning_rate": 2e-05, "loss": 0.04183429, "step": 13432 }, { "epoch": 26.866, "grad_norm": 1.1002147197723389, "learning_rate": 2e-05, "loss": 0.03624891, "step": 13433 }, { "epoch": 26.868, "grad_norm": 1.4251444339752197, "learning_rate": 2e-05, "loss": 0.05983438, "step": 13434 }, { "epoch": 26.87, "grad_norm": 1.7216922044754028, "learning_rate": 2e-05, "loss": 0.09802464, "step": 13435 }, { "epoch": 26.872, "grad_norm": 1.3873647451400757, "learning_rate": 2e-05, "loss": 0.04079521, "step": 13436 }, { "epoch": 26.874, "grad_norm": 1.0737560987472534, "learning_rate": 2e-05, "loss": 0.04044875, "step": 13437 }, { "epoch": 26.876, "grad_norm": 1.148348331451416, "learning_rate": 2e-05, "loss": 0.04982531, "step": 13438 }, { "epoch": 26.878, "grad_norm": 1.1479979753494263, "learning_rate": 2e-05, "loss": 0.05051372, "step": 13439 }, { "epoch": 26.88, "grad_norm": 1.727099061012268, "learning_rate": 2e-05, "loss": 0.04605647, "step": 13440 }, { "epoch": 26.882, "grad_norm": 1.8774471282958984, "learning_rate": 2e-05, "loss": 0.04778759, "step": 13441 }, { "epoch": 26.884, "grad_norm": 1.4502348899841309, "learning_rate": 2e-05, "loss": 0.03749793, "step": 13442 }, { "epoch": 26.886, "grad_norm": 2.354149103164673, "learning_rate": 2e-05, "loss": 0.05970661, "step": 13443 }, { "epoch": 26.888, "grad_norm": 1.1341004371643066, "learning_rate": 2e-05, "loss": 0.04527812, "step": 13444 }, { "epoch": 26.89, "grad_norm": 1.6915284395217896, "learning_rate": 2e-05, "loss": 0.04350384, "step": 13445 }, { "epoch": 26.892, "grad_norm": 1.5579638481140137, "learning_rate": 2e-05, "loss": 0.0307783, "step": 13446 }, { "epoch": 26.894, "grad_norm": 1.3309636116027832, "learning_rate": 2e-05, "loss": 0.0582713, "step": 13447 }, { "epoch": 26.896, "grad_norm": 1.2728379964828491, "learning_rate": 2e-05, "loss": 0.04255494, "step": 13448 }, { "epoch": 26.898, "grad_norm": 1.1662172079086304, "learning_rate": 2e-05, "loss": 0.03874854, "step": 13449 }, { "epoch": 26.9, "grad_norm": 2.72029447555542, "learning_rate": 2e-05, "loss": 0.0405199, "step": 13450 }, { "epoch": 26.902, "grad_norm": 1.25313401222229, "learning_rate": 2e-05, "loss": 0.04713894, "step": 13451 }, { "epoch": 26.904, "grad_norm": 1.2333053350448608, "learning_rate": 2e-05, "loss": 0.03969055, "step": 13452 }, { "epoch": 26.906, "grad_norm": 1.149762511253357, "learning_rate": 2e-05, "loss": 0.03785865, "step": 13453 }, { "epoch": 26.908, "grad_norm": 0.8945139646530151, "learning_rate": 2e-05, "loss": 0.03088951, "step": 13454 }, { "epoch": 26.91, "grad_norm": 1.6503828763961792, "learning_rate": 2e-05, "loss": 0.03936726, "step": 13455 }, { "epoch": 26.912, "grad_norm": 1.1440142393112183, "learning_rate": 2e-05, "loss": 0.04367042, "step": 13456 }, { "epoch": 26.914, "grad_norm": 1.2169008255004883, "learning_rate": 2e-05, "loss": 0.04358334, "step": 13457 }, { "epoch": 26.916, "grad_norm": 0.9487123489379883, "learning_rate": 2e-05, "loss": 0.02613301, "step": 13458 }, { "epoch": 26.918, "grad_norm": 1.0501741170883179, "learning_rate": 2e-05, "loss": 0.03885425, "step": 13459 }, { "epoch": 26.92, "grad_norm": 1.3245856761932373, "learning_rate": 2e-05, "loss": 0.03801511, "step": 13460 }, { "epoch": 26.922, "grad_norm": 1.1823177337646484, "learning_rate": 2e-05, "loss": 0.04091566, "step": 13461 }, { "epoch": 26.924, "grad_norm": 1.5307384729385376, "learning_rate": 2e-05, "loss": 0.04803543, "step": 13462 }, { "epoch": 26.926, "grad_norm": 1.2377997636795044, "learning_rate": 2e-05, "loss": 0.04242143, "step": 13463 }, { "epoch": 26.928, "grad_norm": 1.0577019453048706, "learning_rate": 2e-05, "loss": 0.04687814, "step": 13464 }, { "epoch": 26.93, "grad_norm": 1.1497957706451416, "learning_rate": 2e-05, "loss": 0.03771542, "step": 13465 }, { "epoch": 26.932, "grad_norm": 1.1796302795410156, "learning_rate": 2e-05, "loss": 0.04946107, "step": 13466 }, { "epoch": 26.934, "grad_norm": 1.092475175857544, "learning_rate": 2e-05, "loss": 0.03278045, "step": 13467 }, { "epoch": 26.936, "grad_norm": 1.85981285572052, "learning_rate": 2e-05, "loss": 0.04644259, "step": 13468 }, { "epoch": 26.938, "grad_norm": 1.8343428373336792, "learning_rate": 2e-05, "loss": 0.05847539, "step": 13469 }, { "epoch": 26.94, "grad_norm": 2.2233643531799316, "learning_rate": 2e-05, "loss": 0.05392168, "step": 13470 }, { "epoch": 26.942, "grad_norm": 2.9804506301879883, "learning_rate": 2e-05, "loss": 0.05396551, "step": 13471 }, { "epoch": 26.944, "grad_norm": 0.8914803862571716, "learning_rate": 2e-05, "loss": 0.02593549, "step": 13472 }, { "epoch": 26.946, "grad_norm": 1.0326448678970337, "learning_rate": 2e-05, "loss": 0.03847109, "step": 13473 }, { "epoch": 26.948, "grad_norm": 1.935031533241272, "learning_rate": 2e-05, "loss": 0.0416937, "step": 13474 }, { "epoch": 26.95, "grad_norm": 1.1312302350997925, "learning_rate": 2e-05, "loss": 0.04824629, "step": 13475 }, { "epoch": 26.951999999999998, "grad_norm": 1.6324800252914429, "learning_rate": 2e-05, "loss": 0.04897019, "step": 13476 }, { "epoch": 26.954, "grad_norm": 1.1452162265777588, "learning_rate": 2e-05, "loss": 0.03952339, "step": 13477 }, { "epoch": 26.956, "grad_norm": 1.284656047821045, "learning_rate": 2e-05, "loss": 0.03469507, "step": 13478 }, { "epoch": 26.958, "grad_norm": 1.433944582939148, "learning_rate": 2e-05, "loss": 0.03940383, "step": 13479 }, { "epoch": 26.96, "grad_norm": 0.9404771327972412, "learning_rate": 2e-05, "loss": 0.03143159, "step": 13480 }, { "epoch": 26.962, "grad_norm": 1.0665420293807983, "learning_rate": 2e-05, "loss": 0.03889358, "step": 13481 }, { "epoch": 26.964, "grad_norm": 1.0098894834518433, "learning_rate": 2e-05, "loss": 0.03096025, "step": 13482 }, { "epoch": 26.966, "grad_norm": 1.2511407136917114, "learning_rate": 2e-05, "loss": 0.05308896, "step": 13483 }, { "epoch": 26.968, "grad_norm": 1.9953383207321167, "learning_rate": 2e-05, "loss": 0.03955588, "step": 13484 }, { "epoch": 26.97, "grad_norm": 1.377296805381775, "learning_rate": 2e-05, "loss": 0.03692595, "step": 13485 }, { "epoch": 26.972, "grad_norm": 1.0224653482437134, "learning_rate": 2e-05, "loss": 0.03503982, "step": 13486 }, { "epoch": 26.974, "grad_norm": 1.0944488048553467, "learning_rate": 2e-05, "loss": 0.03338355, "step": 13487 }, { "epoch": 26.976, "grad_norm": 1.0018296241760254, "learning_rate": 2e-05, "loss": 0.02892867, "step": 13488 }, { "epoch": 26.978, "grad_norm": 1.019363522529602, "learning_rate": 2e-05, "loss": 0.03823832, "step": 13489 }, { "epoch": 26.98, "grad_norm": 1.2241965532302856, "learning_rate": 2e-05, "loss": 0.04636896, "step": 13490 }, { "epoch": 26.982, "grad_norm": 1.5487970113754272, "learning_rate": 2e-05, "loss": 0.05474398, "step": 13491 }, { "epoch": 26.984, "grad_norm": 0.9554542899131775, "learning_rate": 2e-05, "loss": 0.03364764, "step": 13492 }, { "epoch": 26.986, "grad_norm": 2.8217360973358154, "learning_rate": 2e-05, "loss": 0.03697766, "step": 13493 }, { "epoch": 26.988, "grad_norm": 1.6074271202087402, "learning_rate": 2e-05, "loss": 0.04182863, "step": 13494 }, { "epoch": 26.99, "grad_norm": 1.3544893264770508, "learning_rate": 2e-05, "loss": 0.04771724, "step": 13495 }, { "epoch": 26.992, "grad_norm": 0.9438838362693787, "learning_rate": 2e-05, "loss": 0.03837147, "step": 13496 }, { "epoch": 26.994, "grad_norm": 1.067697525024414, "learning_rate": 2e-05, "loss": 0.03082527, "step": 13497 }, { "epoch": 26.996, "grad_norm": 3.1041669845581055, "learning_rate": 2e-05, "loss": 0.05515031, "step": 13498 }, { "epoch": 26.998, "grad_norm": 1.1457984447479248, "learning_rate": 2e-05, "loss": 0.04233011, "step": 13499 }, { "epoch": 27.0, "grad_norm": 1.2711799144744873, "learning_rate": 2e-05, "loss": 0.04493605, "step": 13500 }, { "epoch": 27.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9740518962075848, "Equal_1": 0.998, "Equal_2": 0.9860279441117764, "Equal_3": 0.9920159680638723, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 0.998, "Perpendicular_2": 0.99, "Perpendicular_3": 0.8977955911823647, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9996666666666667, "PointLiesOnCircle_3": 0.986, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 319.7304, "eval_samples_per_second": 32.84, "eval_steps_per_second": 0.657, "step": 13500 }, { "epoch": 27.002, "grad_norm": 1.1527739763259888, "learning_rate": 2e-05, "loss": 0.04508916, "step": 13501 }, { "epoch": 27.004, "grad_norm": 1.562675952911377, "learning_rate": 2e-05, "loss": 0.04754071, "step": 13502 }, { "epoch": 27.006, "grad_norm": 1.1957426071166992, "learning_rate": 2e-05, "loss": 0.03879406, "step": 13503 }, { "epoch": 27.008, "grad_norm": 1.1542894840240479, "learning_rate": 2e-05, "loss": 0.0369472, "step": 13504 }, { "epoch": 27.01, "grad_norm": 0.9654834866523743, "learning_rate": 2e-05, "loss": 0.03338242, "step": 13505 }, { "epoch": 27.012, "grad_norm": 0.94996577501297, "learning_rate": 2e-05, "loss": 0.03290822, "step": 13506 }, { "epoch": 27.014, "grad_norm": 1.7157975435256958, "learning_rate": 2e-05, "loss": 0.0513361, "step": 13507 }, { "epoch": 27.016, "grad_norm": 1.248037576675415, "learning_rate": 2e-05, "loss": 0.03775504, "step": 13508 }, { "epoch": 27.018, "grad_norm": 0.9875592589378357, "learning_rate": 2e-05, "loss": 0.03280458, "step": 13509 }, { "epoch": 27.02, "grad_norm": 1.2043684720993042, "learning_rate": 2e-05, "loss": 0.03919297, "step": 13510 }, { "epoch": 27.022, "grad_norm": 1.1797631978988647, "learning_rate": 2e-05, "loss": 0.03209981, "step": 13511 }, { "epoch": 27.024, "grad_norm": 0.9786536693572998, "learning_rate": 2e-05, "loss": 0.03343519, "step": 13512 }, { "epoch": 27.026, "grad_norm": 1.6166154146194458, "learning_rate": 2e-05, "loss": 0.04526688, "step": 13513 }, { "epoch": 27.028, "grad_norm": 1.9152218103408813, "learning_rate": 2e-05, "loss": 0.04358869, "step": 13514 }, { "epoch": 27.03, "grad_norm": 1.1101301908493042, "learning_rate": 2e-05, "loss": 0.02884656, "step": 13515 }, { "epoch": 27.032, "grad_norm": 1.529909610748291, "learning_rate": 2e-05, "loss": 0.03502784, "step": 13516 }, { "epoch": 27.034, "grad_norm": 0.97792649269104, "learning_rate": 2e-05, "loss": 0.02850207, "step": 13517 }, { "epoch": 27.036, "grad_norm": 2.824786901473999, "learning_rate": 2e-05, "loss": 0.04891523, "step": 13518 }, { "epoch": 27.038, "grad_norm": 1.4314439296722412, "learning_rate": 2e-05, "loss": 0.03873865, "step": 13519 }, { "epoch": 27.04, "grad_norm": 1.3454880714416504, "learning_rate": 2e-05, "loss": 0.050864, "step": 13520 }, { "epoch": 27.042, "grad_norm": 1.2566922903060913, "learning_rate": 2e-05, "loss": 0.05498883, "step": 13521 }, { "epoch": 27.044, "grad_norm": 1.0700196027755737, "learning_rate": 2e-05, "loss": 0.03465045, "step": 13522 }, { "epoch": 27.046, "grad_norm": 1.479763388633728, "learning_rate": 2e-05, "loss": 0.04723434, "step": 13523 }, { "epoch": 27.048, "grad_norm": 1.4551644325256348, "learning_rate": 2e-05, "loss": 0.04056372, "step": 13524 }, { "epoch": 27.05, "grad_norm": 1.3529044389724731, "learning_rate": 2e-05, "loss": 0.06563016, "step": 13525 }, { "epoch": 27.052, "grad_norm": 1.122623085975647, "learning_rate": 2e-05, "loss": 0.04986753, "step": 13526 }, { "epoch": 27.054, "grad_norm": 1.1177496910095215, "learning_rate": 2e-05, "loss": 0.04144656, "step": 13527 }, { "epoch": 27.056, "grad_norm": 1.2102779150009155, "learning_rate": 2e-05, "loss": 0.04852476, "step": 13528 }, { "epoch": 27.058, "grad_norm": 1.0115301609039307, "learning_rate": 2e-05, "loss": 0.03474714, "step": 13529 }, { "epoch": 27.06, "grad_norm": 0.9496335983276367, "learning_rate": 2e-05, "loss": 0.03019882, "step": 13530 }, { "epoch": 27.062, "grad_norm": 1.04624342918396, "learning_rate": 2e-05, "loss": 0.03820858, "step": 13531 }, { "epoch": 27.064, "grad_norm": 1.3676940202713013, "learning_rate": 2e-05, "loss": 0.05121147, "step": 13532 }, { "epoch": 27.066, "grad_norm": 1.2893519401550293, "learning_rate": 2e-05, "loss": 0.04120003, "step": 13533 }, { "epoch": 27.068, "grad_norm": 3.120879650115967, "learning_rate": 2e-05, "loss": 0.05591538, "step": 13534 }, { "epoch": 27.07, "grad_norm": 1.1986747980117798, "learning_rate": 2e-05, "loss": 0.04751917, "step": 13535 }, { "epoch": 27.072, "grad_norm": 1.3522542715072632, "learning_rate": 2e-05, "loss": 0.04812311, "step": 13536 }, { "epoch": 27.074, "grad_norm": 0.9945651292800903, "learning_rate": 2e-05, "loss": 0.04262705, "step": 13537 }, { "epoch": 27.076, "grad_norm": 1.059237003326416, "learning_rate": 2e-05, "loss": 0.04054239, "step": 13538 }, { "epoch": 27.078, "grad_norm": 2.535274028778076, "learning_rate": 2e-05, "loss": 0.0487668, "step": 13539 }, { "epoch": 27.08, "grad_norm": 1.2254159450531006, "learning_rate": 2e-05, "loss": 0.04526926, "step": 13540 }, { "epoch": 27.082, "grad_norm": 1.0619149208068848, "learning_rate": 2e-05, "loss": 0.03487138, "step": 13541 }, { "epoch": 27.084, "grad_norm": 1.4296939373016357, "learning_rate": 2e-05, "loss": 0.05496912, "step": 13542 }, { "epoch": 27.086, "grad_norm": 1.0750129222869873, "learning_rate": 2e-05, "loss": 0.04620029, "step": 13543 }, { "epoch": 27.088, "grad_norm": 1.0724533796310425, "learning_rate": 2e-05, "loss": 0.0384186, "step": 13544 }, { "epoch": 27.09, "grad_norm": 1.2280149459838867, "learning_rate": 2e-05, "loss": 0.04641897, "step": 13545 }, { "epoch": 27.092, "grad_norm": 1.78925359249115, "learning_rate": 2e-05, "loss": 0.0408336, "step": 13546 }, { "epoch": 27.094, "grad_norm": 1.6435742378234863, "learning_rate": 2e-05, "loss": 0.04276124, "step": 13547 }, { "epoch": 27.096, "grad_norm": 1.4462822675704956, "learning_rate": 2e-05, "loss": 0.05522976, "step": 13548 }, { "epoch": 27.098, "grad_norm": 1.1338396072387695, "learning_rate": 2e-05, "loss": 0.03534421, "step": 13549 }, { "epoch": 27.1, "grad_norm": 1.2627896070480347, "learning_rate": 2e-05, "loss": 0.04895552, "step": 13550 }, { "epoch": 27.102, "grad_norm": 1.2807172536849976, "learning_rate": 2e-05, "loss": 0.03973666, "step": 13551 }, { "epoch": 27.104, "grad_norm": 1.1761025190353394, "learning_rate": 2e-05, "loss": 0.04104013, "step": 13552 }, { "epoch": 27.106, "grad_norm": 2.8090035915374756, "learning_rate": 2e-05, "loss": 0.05685973, "step": 13553 }, { "epoch": 27.108, "grad_norm": 1.8607357740402222, "learning_rate": 2e-05, "loss": 0.0470711, "step": 13554 }, { "epoch": 27.11, "grad_norm": 1.0800814628601074, "learning_rate": 2e-05, "loss": 0.03861955, "step": 13555 }, { "epoch": 27.112, "grad_norm": 2.145892858505249, "learning_rate": 2e-05, "loss": 0.05473135, "step": 13556 }, { "epoch": 27.114, "grad_norm": 0.7809432148933411, "learning_rate": 2e-05, "loss": 0.02051185, "step": 13557 }, { "epoch": 27.116, "grad_norm": 1.3810927867889404, "learning_rate": 2e-05, "loss": 0.05468324, "step": 13558 }, { "epoch": 27.118, "grad_norm": 1.2400935888290405, "learning_rate": 2e-05, "loss": 0.03758835, "step": 13559 }, { "epoch": 27.12, "grad_norm": 2.447911262512207, "learning_rate": 2e-05, "loss": 0.07153429, "step": 13560 }, { "epoch": 27.122, "grad_norm": 1.063595175743103, "learning_rate": 2e-05, "loss": 0.04638623, "step": 13561 }, { "epoch": 27.124, "grad_norm": 1.1095471382141113, "learning_rate": 2e-05, "loss": 0.03623333, "step": 13562 }, { "epoch": 27.126, "grad_norm": 2.209012031555176, "learning_rate": 2e-05, "loss": 0.03872228, "step": 13563 }, { "epoch": 27.128, "grad_norm": 1.9592890739440918, "learning_rate": 2e-05, "loss": 0.03570852, "step": 13564 }, { "epoch": 27.13, "grad_norm": 1.3131749629974365, "learning_rate": 2e-05, "loss": 0.04663508, "step": 13565 }, { "epoch": 27.132, "grad_norm": 1.461511492729187, "learning_rate": 2e-05, "loss": 0.04620817, "step": 13566 }, { "epoch": 27.134, "grad_norm": 2.396193504333496, "learning_rate": 2e-05, "loss": 0.05443357, "step": 13567 }, { "epoch": 27.136, "grad_norm": 1.4050869941711426, "learning_rate": 2e-05, "loss": 0.06443222, "step": 13568 }, { "epoch": 27.138, "grad_norm": 2.008094072341919, "learning_rate": 2e-05, "loss": 0.05765811, "step": 13569 }, { "epoch": 27.14, "grad_norm": 2.7625555992126465, "learning_rate": 2e-05, "loss": 0.07556622, "step": 13570 }, { "epoch": 27.142, "grad_norm": 0.9222900867462158, "learning_rate": 2e-05, "loss": 0.02512233, "step": 13571 }, { "epoch": 27.144, "grad_norm": 1.1549010276794434, "learning_rate": 2e-05, "loss": 0.03712163, "step": 13572 }, { "epoch": 27.146, "grad_norm": 2.7713279724121094, "learning_rate": 2e-05, "loss": 0.04769222, "step": 13573 }, { "epoch": 27.148, "grad_norm": 1.5160826444625854, "learning_rate": 2e-05, "loss": 0.04316017, "step": 13574 }, { "epoch": 27.15, "grad_norm": 1.5735561847686768, "learning_rate": 2e-05, "loss": 0.05453978, "step": 13575 }, { "epoch": 27.152, "grad_norm": 1.5878405570983887, "learning_rate": 2e-05, "loss": 0.0406678, "step": 13576 }, { "epoch": 27.154, "grad_norm": 1.9090240001678467, "learning_rate": 2e-05, "loss": 0.04792298, "step": 13577 }, { "epoch": 27.156, "grad_norm": 1.24784255027771, "learning_rate": 2e-05, "loss": 0.04576039, "step": 13578 }, { "epoch": 27.158, "grad_norm": 1.1896952390670776, "learning_rate": 2e-05, "loss": 0.03995226, "step": 13579 }, { "epoch": 27.16, "grad_norm": 1.1981700658798218, "learning_rate": 2e-05, "loss": 0.04131437, "step": 13580 }, { "epoch": 27.162, "grad_norm": 1.583319067955017, "learning_rate": 2e-05, "loss": 0.03563969, "step": 13581 }, { "epoch": 27.164, "grad_norm": 1.8085306882858276, "learning_rate": 2e-05, "loss": 0.04181949, "step": 13582 }, { "epoch": 27.166, "grad_norm": 1.1413896083831787, "learning_rate": 2e-05, "loss": 0.04053574, "step": 13583 }, { "epoch": 27.168, "grad_norm": 1.0028431415557861, "learning_rate": 2e-05, "loss": 0.04165848, "step": 13584 }, { "epoch": 27.17, "grad_norm": 1.02912175655365, "learning_rate": 2e-05, "loss": 0.03777147, "step": 13585 }, { "epoch": 27.172, "grad_norm": 1.0012383460998535, "learning_rate": 2e-05, "loss": 0.03452305, "step": 13586 }, { "epoch": 27.174, "grad_norm": 1.588924765586853, "learning_rate": 2e-05, "loss": 0.06215196, "step": 13587 }, { "epoch": 27.176, "grad_norm": 1.2847086191177368, "learning_rate": 2e-05, "loss": 0.0568499, "step": 13588 }, { "epoch": 27.178, "grad_norm": 0.8857622146606445, "learning_rate": 2e-05, "loss": 0.03210337, "step": 13589 }, { "epoch": 27.18, "grad_norm": 1.2372381687164307, "learning_rate": 2e-05, "loss": 0.03922543, "step": 13590 }, { "epoch": 27.182, "grad_norm": 1.127703070640564, "learning_rate": 2e-05, "loss": 0.04630746, "step": 13591 }, { "epoch": 27.184, "grad_norm": 1.2580335140228271, "learning_rate": 2e-05, "loss": 0.0422937, "step": 13592 }, { "epoch": 27.186, "grad_norm": 1.6288437843322754, "learning_rate": 2e-05, "loss": 0.04852737, "step": 13593 }, { "epoch": 27.188, "grad_norm": 1.931805968284607, "learning_rate": 2e-05, "loss": 0.05083385, "step": 13594 }, { "epoch": 27.19, "grad_norm": 1.3048754930496216, "learning_rate": 2e-05, "loss": 0.05102141, "step": 13595 }, { "epoch": 27.192, "grad_norm": 1.8813409805297852, "learning_rate": 2e-05, "loss": 0.06094917, "step": 13596 }, { "epoch": 27.194, "grad_norm": 1.7660962343215942, "learning_rate": 2e-05, "loss": 0.04968596, "step": 13597 }, { "epoch": 27.196, "grad_norm": 1.2359485626220703, "learning_rate": 2e-05, "loss": 0.05053048, "step": 13598 }, { "epoch": 27.198, "grad_norm": 1.4421732425689697, "learning_rate": 2e-05, "loss": 0.03922078, "step": 13599 }, { "epoch": 27.2, "grad_norm": 1.9196407794952393, "learning_rate": 2e-05, "loss": 0.04884792, "step": 13600 }, { "epoch": 27.202, "grad_norm": 1.4350956678390503, "learning_rate": 2e-05, "loss": 0.05208459, "step": 13601 }, { "epoch": 27.204, "grad_norm": 1.878467321395874, "learning_rate": 2e-05, "loss": 0.06585848, "step": 13602 }, { "epoch": 27.206, "grad_norm": 2.1743922233581543, "learning_rate": 2e-05, "loss": 0.0544947, "step": 13603 }, { "epoch": 27.208, "grad_norm": 1.2076090574264526, "learning_rate": 2e-05, "loss": 0.03394687, "step": 13604 }, { "epoch": 27.21, "grad_norm": 2.1118581295013428, "learning_rate": 2e-05, "loss": 0.06034087, "step": 13605 }, { "epoch": 27.212, "grad_norm": 0.812614381313324, "learning_rate": 2e-05, "loss": 0.03094057, "step": 13606 }, { "epoch": 27.214, "grad_norm": 1.090105652809143, "learning_rate": 2e-05, "loss": 0.05721441, "step": 13607 }, { "epoch": 27.216, "grad_norm": 1.6390467882156372, "learning_rate": 2e-05, "loss": 0.04660071, "step": 13608 }, { "epoch": 27.218, "grad_norm": 2.038808584213257, "learning_rate": 2e-05, "loss": 0.05760818, "step": 13609 }, { "epoch": 27.22, "grad_norm": 1.293735146522522, "learning_rate": 2e-05, "loss": 0.03360115, "step": 13610 }, { "epoch": 27.222, "grad_norm": 1.192157506942749, "learning_rate": 2e-05, "loss": 0.03546806, "step": 13611 }, { "epoch": 27.224, "grad_norm": 1.3775697946548462, "learning_rate": 2e-05, "loss": 0.0483359, "step": 13612 }, { "epoch": 27.226, "grad_norm": 1.3804492950439453, "learning_rate": 2e-05, "loss": 0.05687822, "step": 13613 }, { "epoch": 27.228, "grad_norm": 1.8396795988082886, "learning_rate": 2e-05, "loss": 0.04156945, "step": 13614 }, { "epoch": 27.23, "grad_norm": 1.3716535568237305, "learning_rate": 2e-05, "loss": 0.05421146, "step": 13615 }, { "epoch": 27.232, "grad_norm": 1.2649599313735962, "learning_rate": 2e-05, "loss": 0.04328211, "step": 13616 }, { "epoch": 27.234, "grad_norm": 1.3094875812530518, "learning_rate": 2e-05, "loss": 0.04095453, "step": 13617 }, { "epoch": 27.236, "grad_norm": 1.3521350622177124, "learning_rate": 2e-05, "loss": 0.0435282, "step": 13618 }, { "epoch": 27.238, "grad_norm": 0.901831865310669, "learning_rate": 2e-05, "loss": 0.02451799, "step": 13619 }, { "epoch": 27.24, "grad_norm": 1.344462275505066, "learning_rate": 2e-05, "loss": 0.06785841, "step": 13620 }, { "epoch": 27.242, "grad_norm": 1.497972846031189, "learning_rate": 2e-05, "loss": 0.04425344, "step": 13621 }, { "epoch": 27.244, "grad_norm": 0.9255624413490295, "learning_rate": 2e-05, "loss": 0.02206445, "step": 13622 }, { "epoch": 27.246, "grad_norm": 0.988325834274292, "learning_rate": 2e-05, "loss": 0.02141338, "step": 13623 }, { "epoch": 27.248, "grad_norm": 1.9612500667572021, "learning_rate": 2e-05, "loss": 0.03873473, "step": 13624 }, { "epoch": 27.25, "grad_norm": 2.3645355701446533, "learning_rate": 2e-05, "loss": 0.0586706, "step": 13625 }, { "epoch": 27.252, "grad_norm": 1.6442853212356567, "learning_rate": 2e-05, "loss": 0.04403327, "step": 13626 }, { "epoch": 27.254, "grad_norm": 1.2499282360076904, "learning_rate": 2e-05, "loss": 0.05442981, "step": 13627 }, { "epoch": 27.256, "grad_norm": 1.122299313545227, "learning_rate": 2e-05, "loss": 0.04064956, "step": 13628 }, { "epoch": 27.258, "grad_norm": 1.537174105644226, "learning_rate": 2e-05, "loss": 0.04302373, "step": 13629 }, { "epoch": 27.26, "grad_norm": 1.0611103773117065, "learning_rate": 2e-05, "loss": 0.03115848, "step": 13630 }, { "epoch": 27.262, "grad_norm": 1.2895267009735107, "learning_rate": 2e-05, "loss": 0.03978854, "step": 13631 }, { "epoch": 27.264, "grad_norm": 1.8378465175628662, "learning_rate": 2e-05, "loss": 0.06035455, "step": 13632 }, { "epoch": 27.266, "grad_norm": 0.9278236031532288, "learning_rate": 2e-05, "loss": 0.0275078, "step": 13633 }, { "epoch": 27.268, "grad_norm": 1.2331749200820923, "learning_rate": 2e-05, "loss": 0.03504308, "step": 13634 }, { "epoch": 27.27, "grad_norm": 1.629676103591919, "learning_rate": 2e-05, "loss": 0.0618648, "step": 13635 }, { "epoch": 27.272, "grad_norm": 1.405290961265564, "learning_rate": 2e-05, "loss": 0.0505509, "step": 13636 }, { "epoch": 27.274, "grad_norm": 1.1765658855438232, "learning_rate": 2e-05, "loss": 0.05041191, "step": 13637 }, { "epoch": 27.276, "grad_norm": 1.735039472579956, "learning_rate": 2e-05, "loss": 0.02883138, "step": 13638 }, { "epoch": 27.278, "grad_norm": 0.9482174515724182, "learning_rate": 2e-05, "loss": 0.02577251, "step": 13639 }, { "epoch": 27.28, "grad_norm": 1.1373050212860107, "learning_rate": 2e-05, "loss": 0.04171196, "step": 13640 }, { "epoch": 27.282, "grad_norm": 2.016035795211792, "learning_rate": 2e-05, "loss": 0.0531563, "step": 13641 }, { "epoch": 27.284, "grad_norm": 1.1706429719924927, "learning_rate": 2e-05, "loss": 0.04091615, "step": 13642 }, { "epoch": 27.286, "grad_norm": 0.9347245693206787, "learning_rate": 2e-05, "loss": 0.03005055, "step": 13643 }, { "epoch": 27.288, "grad_norm": 1.119093418121338, "learning_rate": 2e-05, "loss": 0.05457556, "step": 13644 }, { "epoch": 27.29, "grad_norm": 1.3505531549453735, "learning_rate": 2e-05, "loss": 0.03677881, "step": 13645 }, { "epoch": 27.292, "grad_norm": 1.270559549331665, "learning_rate": 2e-05, "loss": 0.03553203, "step": 13646 }, { "epoch": 27.294, "grad_norm": 1.016276240348816, "learning_rate": 2e-05, "loss": 0.02860792, "step": 13647 }, { "epoch": 27.296, "grad_norm": 2.6578431129455566, "learning_rate": 2e-05, "loss": 0.0371431, "step": 13648 }, { "epoch": 27.298, "grad_norm": 1.0674734115600586, "learning_rate": 2e-05, "loss": 0.03743986, "step": 13649 }, { "epoch": 27.3, "grad_norm": 0.987488865852356, "learning_rate": 2e-05, "loss": 0.02790395, "step": 13650 }, { "epoch": 27.302, "grad_norm": 1.2899256944656372, "learning_rate": 2e-05, "loss": 0.05075889, "step": 13651 }, { "epoch": 27.304, "grad_norm": 1.9835212230682373, "learning_rate": 2e-05, "loss": 0.05801808, "step": 13652 }, { "epoch": 27.306, "grad_norm": 1.7673009634017944, "learning_rate": 2e-05, "loss": 0.05642558, "step": 13653 }, { "epoch": 27.308, "grad_norm": 1.229543924331665, "learning_rate": 2e-05, "loss": 0.04363228, "step": 13654 }, { "epoch": 27.31, "grad_norm": 1.8748433589935303, "learning_rate": 2e-05, "loss": 0.04515584, "step": 13655 }, { "epoch": 27.312, "grad_norm": 1.0442911386489868, "learning_rate": 2e-05, "loss": 0.03930074, "step": 13656 }, { "epoch": 27.314, "grad_norm": 1.443469524383545, "learning_rate": 2e-05, "loss": 0.04800137, "step": 13657 }, { "epoch": 27.316, "grad_norm": 1.3523346185684204, "learning_rate": 2e-05, "loss": 0.04009256, "step": 13658 }, { "epoch": 27.318, "grad_norm": 1.8447299003601074, "learning_rate": 2e-05, "loss": 0.0515473, "step": 13659 }, { "epoch": 27.32, "grad_norm": 1.3202327489852905, "learning_rate": 2e-05, "loss": 0.05036566, "step": 13660 }, { "epoch": 27.322, "grad_norm": 1.1027467250823975, "learning_rate": 2e-05, "loss": 0.04486959, "step": 13661 }, { "epoch": 27.324, "grad_norm": 1.2013741731643677, "learning_rate": 2e-05, "loss": 0.04390001, "step": 13662 }, { "epoch": 27.326, "grad_norm": 1.7813587188720703, "learning_rate": 2e-05, "loss": 0.03983445, "step": 13663 }, { "epoch": 27.328, "grad_norm": 2.0535483360290527, "learning_rate": 2e-05, "loss": 0.0440939, "step": 13664 }, { "epoch": 27.33, "grad_norm": 1.0619609355926514, "learning_rate": 2e-05, "loss": 0.03448323, "step": 13665 }, { "epoch": 27.332, "grad_norm": 1.0306423902511597, "learning_rate": 2e-05, "loss": 0.03753592, "step": 13666 }, { "epoch": 27.334, "grad_norm": 1.2383522987365723, "learning_rate": 2e-05, "loss": 0.03938786, "step": 13667 }, { "epoch": 27.336, "grad_norm": 1.9096163511276245, "learning_rate": 2e-05, "loss": 0.07150873, "step": 13668 }, { "epoch": 27.338, "grad_norm": 1.201596975326538, "learning_rate": 2e-05, "loss": 0.03716744, "step": 13669 }, { "epoch": 27.34, "grad_norm": 1.5398945808410645, "learning_rate": 2e-05, "loss": 0.0461993, "step": 13670 }, { "epoch": 27.342, "grad_norm": 1.073887586593628, "learning_rate": 2e-05, "loss": 0.0333119, "step": 13671 }, { "epoch": 27.344, "grad_norm": 1.5942431688308716, "learning_rate": 2e-05, "loss": 0.0529358, "step": 13672 }, { "epoch": 27.346, "grad_norm": 1.545664668083191, "learning_rate": 2e-05, "loss": 0.03542736, "step": 13673 }, { "epoch": 27.348, "grad_norm": 1.5381520986557007, "learning_rate": 2e-05, "loss": 0.03843318, "step": 13674 }, { "epoch": 27.35, "grad_norm": 1.3689863681793213, "learning_rate": 2e-05, "loss": 0.02982055, "step": 13675 }, { "epoch": 27.352, "grad_norm": 1.2728350162506104, "learning_rate": 2e-05, "loss": 0.05316496, "step": 13676 }, { "epoch": 27.354, "grad_norm": 0.9394140839576721, "learning_rate": 2e-05, "loss": 0.02646692, "step": 13677 }, { "epoch": 27.356, "grad_norm": 0.9711006879806519, "learning_rate": 2e-05, "loss": 0.02917571, "step": 13678 }, { "epoch": 27.358, "grad_norm": 1.5109331607818604, "learning_rate": 2e-05, "loss": 0.03243531, "step": 13679 }, { "epoch": 27.36, "grad_norm": 1.113895297050476, "learning_rate": 2e-05, "loss": 0.03930811, "step": 13680 }, { "epoch": 27.362, "grad_norm": 1.5176002979278564, "learning_rate": 2e-05, "loss": 0.06130125, "step": 13681 }, { "epoch": 27.364, "grad_norm": 0.8235756754875183, "learning_rate": 2e-05, "loss": 0.02695323, "step": 13682 }, { "epoch": 27.366, "grad_norm": 1.0173813104629517, "learning_rate": 2e-05, "loss": 0.03705809, "step": 13683 }, { "epoch": 27.368, "grad_norm": 1.7335045337677002, "learning_rate": 2e-05, "loss": 0.04375722, "step": 13684 }, { "epoch": 27.37, "grad_norm": 1.392207145690918, "learning_rate": 2e-05, "loss": 0.03899303, "step": 13685 }, { "epoch": 27.372, "grad_norm": 1.5254584550857544, "learning_rate": 2e-05, "loss": 0.05612728, "step": 13686 }, { "epoch": 27.374, "grad_norm": 1.4681589603424072, "learning_rate": 2e-05, "loss": 0.05056222, "step": 13687 }, { "epoch": 27.376, "grad_norm": 1.21063232421875, "learning_rate": 2e-05, "loss": 0.03048548, "step": 13688 }, { "epoch": 27.378, "grad_norm": 1.43213951587677, "learning_rate": 2e-05, "loss": 0.06458762, "step": 13689 }, { "epoch": 27.38, "grad_norm": 2.742194652557373, "learning_rate": 2e-05, "loss": 0.05234894, "step": 13690 }, { "epoch": 27.382, "grad_norm": 2.0506792068481445, "learning_rate": 2e-05, "loss": 0.05463105, "step": 13691 }, { "epoch": 27.384, "grad_norm": 1.5875942707061768, "learning_rate": 2e-05, "loss": 0.03925296, "step": 13692 }, { "epoch": 27.386, "grad_norm": 1.5243009328842163, "learning_rate": 2e-05, "loss": 0.05017775, "step": 13693 }, { "epoch": 27.388, "grad_norm": 1.4063950777053833, "learning_rate": 2e-05, "loss": 0.05697461, "step": 13694 }, { "epoch": 27.39, "grad_norm": 2.2699193954467773, "learning_rate": 2e-05, "loss": 0.03670984, "step": 13695 }, { "epoch": 27.392, "grad_norm": 2.1173882484436035, "learning_rate": 2e-05, "loss": 0.05186939, "step": 13696 }, { "epoch": 27.394, "grad_norm": 1.2842679023742676, "learning_rate": 2e-05, "loss": 0.06256726, "step": 13697 }, { "epoch": 27.396, "grad_norm": 2.2587108612060547, "learning_rate": 2e-05, "loss": 0.06481649, "step": 13698 }, { "epoch": 27.398, "grad_norm": 1.8947147130966187, "learning_rate": 2e-05, "loss": 0.04356439, "step": 13699 }, { "epoch": 27.4, "grad_norm": 0.9812667369842529, "learning_rate": 2e-05, "loss": 0.02878316, "step": 13700 }, { "epoch": 27.402, "grad_norm": 1.1468307971954346, "learning_rate": 2e-05, "loss": 0.04130252, "step": 13701 }, { "epoch": 27.404, "grad_norm": 1.0459030866622925, "learning_rate": 2e-05, "loss": 0.03767227, "step": 13702 }, { "epoch": 27.406, "grad_norm": 1.448286771774292, "learning_rate": 2e-05, "loss": 0.04471764, "step": 13703 }, { "epoch": 27.408, "grad_norm": 1.1528844833374023, "learning_rate": 2e-05, "loss": 0.02849596, "step": 13704 }, { "epoch": 27.41, "grad_norm": 1.6160606145858765, "learning_rate": 2e-05, "loss": 0.02551612, "step": 13705 }, { "epoch": 27.412, "grad_norm": 2.2915830612182617, "learning_rate": 2e-05, "loss": 0.05899898, "step": 13706 }, { "epoch": 27.414, "grad_norm": 1.510726809501648, "learning_rate": 2e-05, "loss": 0.05045645, "step": 13707 }, { "epoch": 27.416, "grad_norm": 3.0780930519104004, "learning_rate": 2e-05, "loss": 0.06752353, "step": 13708 }, { "epoch": 27.418, "grad_norm": 1.264102816581726, "learning_rate": 2e-05, "loss": 0.04427083, "step": 13709 }, { "epoch": 27.42, "grad_norm": 1.073390007019043, "learning_rate": 2e-05, "loss": 0.04122963, "step": 13710 }, { "epoch": 27.422, "grad_norm": 1.1260555982589722, "learning_rate": 2e-05, "loss": 0.04339466, "step": 13711 }, { "epoch": 27.424, "grad_norm": 1.236524224281311, "learning_rate": 2e-05, "loss": 0.03498268, "step": 13712 }, { "epoch": 27.426, "grad_norm": 1.594119906425476, "learning_rate": 2e-05, "loss": 0.04375029, "step": 13713 }, { "epoch": 27.428, "grad_norm": 1.2735551595687866, "learning_rate": 2e-05, "loss": 0.03774681, "step": 13714 }, { "epoch": 27.43, "grad_norm": 1.059618592262268, "learning_rate": 2e-05, "loss": 0.03130772, "step": 13715 }, { "epoch": 27.432, "grad_norm": 1.5274853706359863, "learning_rate": 2e-05, "loss": 0.05236381, "step": 13716 }, { "epoch": 27.434, "grad_norm": 0.9794763326644897, "learning_rate": 2e-05, "loss": 0.03129626, "step": 13717 }, { "epoch": 27.436, "grad_norm": 1.2296133041381836, "learning_rate": 2e-05, "loss": 0.04068862, "step": 13718 }, { "epoch": 27.438, "grad_norm": 1.5292551517486572, "learning_rate": 2e-05, "loss": 0.04712692, "step": 13719 }, { "epoch": 27.44, "grad_norm": 1.157141089439392, "learning_rate": 2e-05, "loss": 0.05303669, "step": 13720 }, { "epoch": 27.442, "grad_norm": 1.2794381380081177, "learning_rate": 2e-05, "loss": 0.05657168, "step": 13721 }, { "epoch": 27.444, "grad_norm": 1.1883527040481567, "learning_rate": 2e-05, "loss": 0.05889457, "step": 13722 }, { "epoch": 27.446, "grad_norm": 1.260737419128418, "learning_rate": 2e-05, "loss": 0.04855951, "step": 13723 }, { "epoch": 27.448, "grad_norm": 1.1919201612472534, "learning_rate": 2e-05, "loss": 0.02818074, "step": 13724 }, { "epoch": 27.45, "grad_norm": 1.3503472805023193, "learning_rate": 2e-05, "loss": 0.05925683, "step": 13725 }, { "epoch": 27.452, "grad_norm": 1.102662205696106, "learning_rate": 2e-05, "loss": 0.03601834, "step": 13726 }, { "epoch": 27.454, "grad_norm": 0.9547258019447327, "learning_rate": 2e-05, "loss": 0.0299718, "step": 13727 }, { "epoch": 27.456, "grad_norm": 1.2688747644424438, "learning_rate": 2e-05, "loss": 0.04194263, "step": 13728 }, { "epoch": 27.458, "grad_norm": 2.936462640762329, "learning_rate": 2e-05, "loss": 0.0673967, "step": 13729 }, { "epoch": 27.46, "grad_norm": 1.7205671072006226, "learning_rate": 2e-05, "loss": 0.06203447, "step": 13730 }, { "epoch": 27.462, "grad_norm": 1.4902185201644897, "learning_rate": 2e-05, "loss": 0.04702317, "step": 13731 }, { "epoch": 27.464, "grad_norm": 1.2047202587127686, "learning_rate": 2e-05, "loss": 0.04467279, "step": 13732 }, { "epoch": 27.466, "grad_norm": 1.2293957471847534, "learning_rate": 2e-05, "loss": 0.03134837, "step": 13733 }, { "epoch": 27.468, "grad_norm": 1.4293403625488281, "learning_rate": 2e-05, "loss": 0.03927672, "step": 13734 }, { "epoch": 27.47, "grad_norm": 1.2355231046676636, "learning_rate": 2e-05, "loss": 0.03786084, "step": 13735 }, { "epoch": 27.472, "grad_norm": 1.4935061931610107, "learning_rate": 2e-05, "loss": 0.05582435, "step": 13736 }, { "epoch": 27.474, "grad_norm": 1.0883551836013794, "learning_rate": 2e-05, "loss": 0.05210774, "step": 13737 }, { "epoch": 27.476, "grad_norm": 1.4402879476547241, "learning_rate": 2e-05, "loss": 0.0516119, "step": 13738 }, { "epoch": 27.478, "grad_norm": 1.3633337020874023, "learning_rate": 2e-05, "loss": 0.05501125, "step": 13739 }, { "epoch": 27.48, "grad_norm": 1.403083324432373, "learning_rate": 2e-05, "loss": 0.07498935, "step": 13740 }, { "epoch": 27.482, "grad_norm": 1.22272527217865, "learning_rate": 2e-05, "loss": 0.04192954, "step": 13741 }, { "epoch": 27.484, "grad_norm": 0.9646196961402893, "learning_rate": 2e-05, "loss": 0.03863492, "step": 13742 }, { "epoch": 27.486, "grad_norm": 1.4368653297424316, "learning_rate": 2e-05, "loss": 0.05678754, "step": 13743 }, { "epoch": 27.488, "grad_norm": 1.0323776006698608, "learning_rate": 2e-05, "loss": 0.03164582, "step": 13744 }, { "epoch": 27.49, "grad_norm": 1.335216760635376, "learning_rate": 2e-05, "loss": 0.04490231, "step": 13745 }, { "epoch": 27.492, "grad_norm": 1.462903618812561, "learning_rate": 2e-05, "loss": 0.05864259, "step": 13746 }, { "epoch": 27.494, "grad_norm": 1.1962298154830933, "learning_rate": 2e-05, "loss": 0.03484888, "step": 13747 }, { "epoch": 27.496, "grad_norm": 1.8432676792144775, "learning_rate": 2e-05, "loss": 0.04844757, "step": 13748 }, { "epoch": 27.498, "grad_norm": 1.9054256677627563, "learning_rate": 2e-05, "loss": 0.03610344, "step": 13749 }, { "epoch": 27.5, "grad_norm": 1.8628199100494385, "learning_rate": 2e-05, "loss": 0.03124732, "step": 13750 }, { "epoch": 27.502, "grad_norm": 1.300698161125183, "learning_rate": 2e-05, "loss": 0.03852872, "step": 13751 }, { "epoch": 27.504, "grad_norm": 1.4470487833023071, "learning_rate": 2e-05, "loss": 0.03881678, "step": 13752 }, { "epoch": 27.506, "grad_norm": 1.4798719882965088, "learning_rate": 2e-05, "loss": 0.0504625, "step": 13753 }, { "epoch": 27.508, "grad_norm": 0.9879271388053894, "learning_rate": 2e-05, "loss": 0.0390942, "step": 13754 }, { "epoch": 27.51, "grad_norm": 1.9289140701293945, "learning_rate": 2e-05, "loss": 0.04204384, "step": 13755 }, { "epoch": 27.512, "grad_norm": 2.8271074295043945, "learning_rate": 2e-05, "loss": 0.0529681, "step": 13756 }, { "epoch": 27.514, "grad_norm": 4.042783260345459, "learning_rate": 2e-05, "loss": 0.04220014, "step": 13757 }, { "epoch": 27.516, "grad_norm": 1.2601898908615112, "learning_rate": 2e-05, "loss": 0.03472359, "step": 13758 }, { "epoch": 27.518, "grad_norm": 1.2109252214431763, "learning_rate": 2e-05, "loss": 0.04545831, "step": 13759 }, { "epoch": 27.52, "grad_norm": 1.168678641319275, "learning_rate": 2e-05, "loss": 0.03518002, "step": 13760 }, { "epoch": 27.522, "grad_norm": 1.7536158561706543, "learning_rate": 2e-05, "loss": 0.05130105, "step": 13761 }, { "epoch": 27.524, "grad_norm": 1.5836191177368164, "learning_rate": 2e-05, "loss": 0.03332604, "step": 13762 }, { "epoch": 27.526, "grad_norm": 2.656456232070923, "learning_rate": 2e-05, "loss": 0.05432518, "step": 13763 }, { "epoch": 27.528, "grad_norm": 2.7838237285614014, "learning_rate": 2e-05, "loss": 0.03519111, "step": 13764 }, { "epoch": 27.53, "grad_norm": 2.392075777053833, "learning_rate": 2e-05, "loss": 0.04325394, "step": 13765 }, { "epoch": 27.532, "grad_norm": 1.0115920305252075, "learning_rate": 2e-05, "loss": 0.03398691, "step": 13766 }, { "epoch": 27.534, "grad_norm": 1.0626614093780518, "learning_rate": 2e-05, "loss": 0.03680531, "step": 13767 }, { "epoch": 27.536, "grad_norm": 1.1814494132995605, "learning_rate": 2e-05, "loss": 0.0340856, "step": 13768 }, { "epoch": 27.538, "grad_norm": 1.5384715795516968, "learning_rate": 2e-05, "loss": 0.04477349, "step": 13769 }, { "epoch": 27.54, "grad_norm": 1.392616868019104, "learning_rate": 2e-05, "loss": 0.03679165, "step": 13770 }, { "epoch": 27.542, "grad_norm": 2.3772451877593994, "learning_rate": 2e-05, "loss": 0.04677385, "step": 13771 }, { "epoch": 27.544, "grad_norm": 2.6153879165649414, "learning_rate": 2e-05, "loss": 0.05070829, "step": 13772 }, { "epoch": 27.546, "grad_norm": 1.1042912006378174, "learning_rate": 2e-05, "loss": 0.03895772, "step": 13773 }, { "epoch": 27.548000000000002, "grad_norm": 2.243035316467285, "learning_rate": 2e-05, "loss": 0.04948647, "step": 13774 }, { "epoch": 27.55, "grad_norm": 1.1408405303955078, "learning_rate": 2e-05, "loss": 0.03680218, "step": 13775 }, { "epoch": 27.552, "grad_norm": 1.123650074005127, "learning_rate": 2e-05, "loss": 0.04027671, "step": 13776 }, { "epoch": 27.554, "grad_norm": 1.9606479406356812, "learning_rate": 2e-05, "loss": 0.05564614, "step": 13777 }, { "epoch": 27.556, "grad_norm": 1.310119867324829, "learning_rate": 2e-05, "loss": 0.03973632, "step": 13778 }, { "epoch": 27.558, "grad_norm": 1.5927740335464478, "learning_rate": 2e-05, "loss": 0.04547199, "step": 13779 }, { "epoch": 27.56, "grad_norm": 1.2601191997528076, "learning_rate": 2e-05, "loss": 0.05317003, "step": 13780 }, { "epoch": 27.562, "grad_norm": 1.647443175315857, "learning_rate": 2e-05, "loss": 0.0479614, "step": 13781 }, { "epoch": 27.564, "grad_norm": 1.0511468648910522, "learning_rate": 2e-05, "loss": 0.03439099, "step": 13782 }, { "epoch": 27.566, "grad_norm": 1.2322078943252563, "learning_rate": 2e-05, "loss": 0.04574615, "step": 13783 }, { "epoch": 27.568, "grad_norm": 1.0778727531433105, "learning_rate": 2e-05, "loss": 0.03921001, "step": 13784 }, { "epoch": 27.57, "grad_norm": 1.158125877380371, "learning_rate": 2e-05, "loss": 0.0469446, "step": 13785 }, { "epoch": 27.572, "grad_norm": 1.4670352935791016, "learning_rate": 2e-05, "loss": 0.04167302, "step": 13786 }, { "epoch": 27.574, "grad_norm": 2.666126251220703, "learning_rate": 2e-05, "loss": 0.05114643, "step": 13787 }, { "epoch": 27.576, "grad_norm": 1.1133148670196533, "learning_rate": 2e-05, "loss": 0.03147897, "step": 13788 }, { "epoch": 27.578, "grad_norm": 1.703502893447876, "learning_rate": 2e-05, "loss": 0.03852395, "step": 13789 }, { "epoch": 27.58, "grad_norm": 1.3457422256469727, "learning_rate": 2e-05, "loss": 0.0443656, "step": 13790 }, { "epoch": 27.582, "grad_norm": 1.0474896430969238, "learning_rate": 2e-05, "loss": 0.02956739, "step": 13791 }, { "epoch": 27.584, "grad_norm": 1.2296174764633179, "learning_rate": 2e-05, "loss": 0.03405485, "step": 13792 }, { "epoch": 27.586, "grad_norm": 0.9992415308952332, "learning_rate": 2e-05, "loss": 0.02881895, "step": 13793 }, { "epoch": 27.588, "grad_norm": 1.370805263519287, "learning_rate": 2e-05, "loss": 0.0559724, "step": 13794 }, { "epoch": 27.59, "grad_norm": 2.4789819717407227, "learning_rate": 2e-05, "loss": 0.04826446, "step": 13795 }, { "epoch": 27.592, "grad_norm": 2.2076499462127686, "learning_rate": 2e-05, "loss": 0.05645463, "step": 13796 }, { "epoch": 27.594, "grad_norm": 1.5076375007629395, "learning_rate": 2e-05, "loss": 0.04553786, "step": 13797 }, { "epoch": 27.596, "grad_norm": 1.0695239305496216, "learning_rate": 2e-05, "loss": 0.0458618, "step": 13798 }, { "epoch": 27.598, "grad_norm": 1.2451701164245605, "learning_rate": 2e-05, "loss": 0.04357987, "step": 13799 }, { "epoch": 27.6, "grad_norm": 1.1719753742218018, "learning_rate": 2e-05, "loss": 0.05280523, "step": 13800 }, { "epoch": 27.602, "grad_norm": 1.9771161079406738, "learning_rate": 2e-05, "loss": 0.0490317, "step": 13801 }, { "epoch": 27.604, "grad_norm": 1.2304954528808594, "learning_rate": 2e-05, "loss": 0.04574135, "step": 13802 }, { "epoch": 27.606, "grad_norm": 1.237014651298523, "learning_rate": 2e-05, "loss": 0.03605729, "step": 13803 }, { "epoch": 27.608, "grad_norm": 3.3194026947021484, "learning_rate": 2e-05, "loss": 0.05870149, "step": 13804 }, { "epoch": 27.61, "grad_norm": 1.4011256694793701, "learning_rate": 2e-05, "loss": 0.03893548, "step": 13805 }, { "epoch": 27.612, "grad_norm": 1.604568362236023, "learning_rate": 2e-05, "loss": 0.0501047, "step": 13806 }, { "epoch": 27.614, "grad_norm": 1.0760208368301392, "learning_rate": 2e-05, "loss": 0.03730212, "step": 13807 }, { "epoch": 27.616, "grad_norm": 1.54534912109375, "learning_rate": 2e-05, "loss": 0.05426643, "step": 13808 }, { "epoch": 27.618, "grad_norm": 1.1083625555038452, "learning_rate": 2e-05, "loss": 0.03855055, "step": 13809 }, { "epoch": 27.62, "grad_norm": 1.462074875831604, "learning_rate": 2e-05, "loss": 0.04273522, "step": 13810 }, { "epoch": 27.622, "grad_norm": 2.044818878173828, "learning_rate": 2e-05, "loss": 0.06233047, "step": 13811 }, { "epoch": 27.624, "grad_norm": 1.4091565608978271, "learning_rate": 2e-05, "loss": 0.04076818, "step": 13812 }, { "epoch": 27.626, "grad_norm": 1.1645406484603882, "learning_rate": 2e-05, "loss": 0.0466489, "step": 13813 }, { "epoch": 27.628, "grad_norm": 1.4516359567642212, "learning_rate": 2e-05, "loss": 0.04340706, "step": 13814 }, { "epoch": 27.63, "grad_norm": 1.0174524784088135, "learning_rate": 2e-05, "loss": 0.02986944, "step": 13815 }, { "epoch": 27.632, "grad_norm": 1.4557236433029175, "learning_rate": 2e-05, "loss": 0.04444972, "step": 13816 }, { "epoch": 27.634, "grad_norm": 1.2507081031799316, "learning_rate": 2e-05, "loss": 0.0360889, "step": 13817 }, { "epoch": 27.636, "grad_norm": 2.815284013748169, "learning_rate": 2e-05, "loss": 0.04525167, "step": 13818 }, { "epoch": 27.638, "grad_norm": 0.9910860061645508, "learning_rate": 2e-05, "loss": 0.03063339, "step": 13819 }, { "epoch": 27.64, "grad_norm": 2.006213426589966, "learning_rate": 2e-05, "loss": 0.04587614, "step": 13820 }, { "epoch": 27.642, "grad_norm": 1.6014063358306885, "learning_rate": 2e-05, "loss": 0.05192234, "step": 13821 }, { "epoch": 27.644, "grad_norm": 1.4815598726272583, "learning_rate": 2e-05, "loss": 0.05399399, "step": 13822 }, { "epoch": 27.646, "grad_norm": 1.2255171537399292, "learning_rate": 2e-05, "loss": 0.04327129, "step": 13823 }, { "epoch": 27.648, "grad_norm": 1.0979926586151123, "learning_rate": 2e-05, "loss": 0.03967486, "step": 13824 }, { "epoch": 27.65, "grad_norm": 1.2818236351013184, "learning_rate": 2e-05, "loss": 0.05102768, "step": 13825 }, { "epoch": 27.652, "grad_norm": 1.1726670265197754, "learning_rate": 2e-05, "loss": 0.04293338, "step": 13826 }, { "epoch": 27.654, "grad_norm": 1.7466474771499634, "learning_rate": 2e-05, "loss": 0.05429044, "step": 13827 }, { "epoch": 27.656, "grad_norm": 1.266982078552246, "learning_rate": 2e-05, "loss": 0.05010791, "step": 13828 }, { "epoch": 27.658, "grad_norm": 1.0520493984222412, "learning_rate": 2e-05, "loss": 0.0411777, "step": 13829 }, { "epoch": 27.66, "grad_norm": 1.2513072490692139, "learning_rate": 2e-05, "loss": 0.0348096, "step": 13830 }, { "epoch": 27.662, "grad_norm": 0.8475068211555481, "learning_rate": 2e-05, "loss": 0.02205401, "step": 13831 }, { "epoch": 27.664, "grad_norm": 1.2510443925857544, "learning_rate": 2e-05, "loss": 0.04548548, "step": 13832 }, { "epoch": 27.666, "grad_norm": 1.2824064493179321, "learning_rate": 2e-05, "loss": 0.03442299, "step": 13833 }, { "epoch": 27.668, "grad_norm": 1.1054766178131104, "learning_rate": 2e-05, "loss": 0.04257656, "step": 13834 }, { "epoch": 27.67, "grad_norm": 1.853498935699463, "learning_rate": 2e-05, "loss": 0.05419736, "step": 13835 }, { "epoch": 27.672, "grad_norm": 4.41054630279541, "learning_rate": 2e-05, "loss": 0.05251537, "step": 13836 }, { "epoch": 27.674, "grad_norm": 1.2487452030181885, "learning_rate": 2e-05, "loss": 0.04494422, "step": 13837 }, { "epoch": 27.676, "grad_norm": 1.9383426904678345, "learning_rate": 2e-05, "loss": 0.05443368, "step": 13838 }, { "epoch": 27.678, "grad_norm": 1.7748433351516724, "learning_rate": 2e-05, "loss": 0.04427961, "step": 13839 }, { "epoch": 27.68, "grad_norm": 0.8062875866889954, "learning_rate": 2e-05, "loss": 0.02282286, "step": 13840 }, { "epoch": 27.682, "grad_norm": 2.103724479675293, "learning_rate": 2e-05, "loss": 0.05308349, "step": 13841 }, { "epoch": 27.684, "grad_norm": 1.1036003828048706, "learning_rate": 2e-05, "loss": 0.0406925, "step": 13842 }, { "epoch": 27.686, "grad_norm": 1.1960515975952148, "learning_rate": 2e-05, "loss": 0.04112234, "step": 13843 }, { "epoch": 27.688, "grad_norm": 1.1153143644332886, "learning_rate": 2e-05, "loss": 0.02981465, "step": 13844 }, { "epoch": 27.69, "grad_norm": 1.2569992542266846, "learning_rate": 2e-05, "loss": 0.0435634, "step": 13845 }, { "epoch": 27.692, "grad_norm": 1.384782314300537, "learning_rate": 2e-05, "loss": 0.03404148, "step": 13846 }, { "epoch": 27.694, "grad_norm": 1.6214118003845215, "learning_rate": 2e-05, "loss": 0.04686133, "step": 13847 }, { "epoch": 27.696, "grad_norm": 1.5570800304412842, "learning_rate": 2e-05, "loss": 0.05804433, "step": 13848 }, { "epoch": 27.698, "grad_norm": 1.5227315425872803, "learning_rate": 2e-05, "loss": 0.04167802, "step": 13849 }, { "epoch": 27.7, "grad_norm": 1.578632116317749, "learning_rate": 2e-05, "loss": 0.04537342, "step": 13850 }, { "epoch": 27.701999999999998, "grad_norm": 1.34732186794281, "learning_rate": 2e-05, "loss": 0.05119549, "step": 13851 }, { "epoch": 27.704, "grad_norm": 2.011436939239502, "learning_rate": 2e-05, "loss": 0.08780254, "step": 13852 }, { "epoch": 27.706, "grad_norm": 1.0843994617462158, "learning_rate": 2e-05, "loss": 0.03971418, "step": 13853 }, { "epoch": 27.708, "grad_norm": 1.1688135862350464, "learning_rate": 2e-05, "loss": 0.04056138, "step": 13854 }, { "epoch": 27.71, "grad_norm": 0.9493677616119385, "learning_rate": 2e-05, "loss": 0.03187769, "step": 13855 }, { "epoch": 27.712, "grad_norm": 1.0012264251708984, "learning_rate": 2e-05, "loss": 0.04027501, "step": 13856 }, { "epoch": 27.714, "grad_norm": 0.8952470421791077, "learning_rate": 2e-05, "loss": 0.03563171, "step": 13857 }, { "epoch": 27.716, "grad_norm": 1.1415109634399414, "learning_rate": 2e-05, "loss": 0.03367986, "step": 13858 }, { "epoch": 27.718, "grad_norm": 1.085681676864624, "learning_rate": 2e-05, "loss": 0.04318651, "step": 13859 }, { "epoch": 27.72, "grad_norm": 1.0478479862213135, "learning_rate": 2e-05, "loss": 0.03802703, "step": 13860 }, { "epoch": 27.722, "grad_norm": 1.5198477506637573, "learning_rate": 2e-05, "loss": 0.04132574, "step": 13861 }, { "epoch": 27.724, "grad_norm": 1.0840024948120117, "learning_rate": 2e-05, "loss": 0.04059497, "step": 13862 }, { "epoch": 27.726, "grad_norm": 1.027240514755249, "learning_rate": 2e-05, "loss": 0.03974399, "step": 13863 }, { "epoch": 27.728, "grad_norm": 1.053519368171692, "learning_rate": 2e-05, "loss": 0.03394579, "step": 13864 }, { "epoch": 27.73, "grad_norm": 1.8327863216400146, "learning_rate": 2e-05, "loss": 0.03920068, "step": 13865 }, { "epoch": 27.732, "grad_norm": 2.2196757793426514, "learning_rate": 2e-05, "loss": 0.04846443, "step": 13866 }, { "epoch": 27.734, "grad_norm": 1.4028538465499878, "learning_rate": 2e-05, "loss": 0.04624466, "step": 13867 }, { "epoch": 27.736, "grad_norm": 1.165058970451355, "learning_rate": 2e-05, "loss": 0.0364709, "step": 13868 }, { "epoch": 27.738, "grad_norm": 4.092937469482422, "learning_rate": 2e-05, "loss": 0.05190163, "step": 13869 }, { "epoch": 27.74, "grad_norm": 1.328935146331787, "learning_rate": 2e-05, "loss": 0.05347368, "step": 13870 }, { "epoch": 27.742, "grad_norm": 2.0258467197418213, "learning_rate": 2e-05, "loss": 0.03681408, "step": 13871 }, { "epoch": 27.744, "grad_norm": 1.4326660633087158, "learning_rate": 2e-05, "loss": 0.06321809, "step": 13872 }, { "epoch": 27.746, "grad_norm": 1.131027340888977, "learning_rate": 2e-05, "loss": 0.0508743, "step": 13873 }, { "epoch": 27.748, "grad_norm": 1.18997061252594, "learning_rate": 2e-05, "loss": 0.03275201, "step": 13874 }, { "epoch": 27.75, "grad_norm": 2.4015719890594482, "learning_rate": 2e-05, "loss": 0.03576087, "step": 13875 }, { "epoch": 27.752, "grad_norm": 2.24355149269104, "learning_rate": 2e-05, "loss": 0.05075097, "step": 13876 }, { "epoch": 27.754, "grad_norm": 1.1765543222427368, "learning_rate": 2e-05, "loss": 0.04541855, "step": 13877 }, { "epoch": 27.756, "grad_norm": 1.5028316974639893, "learning_rate": 2e-05, "loss": 0.05536481, "step": 13878 }, { "epoch": 27.758, "grad_norm": 1.1628483533859253, "learning_rate": 2e-05, "loss": 0.04130168, "step": 13879 }, { "epoch": 27.76, "grad_norm": 1.752406120300293, "learning_rate": 2e-05, "loss": 0.05474552, "step": 13880 }, { "epoch": 27.762, "grad_norm": 1.0870063304901123, "learning_rate": 2e-05, "loss": 0.04335282, "step": 13881 }, { "epoch": 27.764, "grad_norm": 2.0513229370117188, "learning_rate": 2e-05, "loss": 0.03874574, "step": 13882 }, { "epoch": 27.766, "grad_norm": 1.2223379611968994, "learning_rate": 2e-05, "loss": 0.05046418, "step": 13883 }, { "epoch": 27.768, "grad_norm": 1.059180498123169, "learning_rate": 2e-05, "loss": 0.03355616, "step": 13884 }, { "epoch": 27.77, "grad_norm": 1.3236563205718994, "learning_rate": 2e-05, "loss": 0.0412216, "step": 13885 }, { "epoch": 27.772, "grad_norm": 1.560232162475586, "learning_rate": 2e-05, "loss": 0.03983791, "step": 13886 }, { "epoch": 27.774, "grad_norm": 1.3728487491607666, "learning_rate": 2e-05, "loss": 0.04270254, "step": 13887 }, { "epoch": 27.776, "grad_norm": 1.7650214433670044, "learning_rate": 2e-05, "loss": 0.03499018, "step": 13888 }, { "epoch": 27.778, "grad_norm": 1.0585721731185913, "learning_rate": 2e-05, "loss": 0.03514803, "step": 13889 }, { "epoch": 27.78, "grad_norm": 1.339477300643921, "learning_rate": 2e-05, "loss": 0.05417306, "step": 13890 }, { "epoch": 27.782, "grad_norm": 1.4370014667510986, "learning_rate": 2e-05, "loss": 0.05690622, "step": 13891 }, { "epoch": 27.784, "grad_norm": 1.5644543170928955, "learning_rate": 2e-05, "loss": 0.04291898, "step": 13892 }, { "epoch": 27.786, "grad_norm": 1.0327696800231934, "learning_rate": 2e-05, "loss": 0.03720927, "step": 13893 }, { "epoch": 27.788, "grad_norm": 1.0730547904968262, "learning_rate": 2e-05, "loss": 0.0368498, "step": 13894 }, { "epoch": 27.79, "grad_norm": 1.1899945735931396, "learning_rate": 2e-05, "loss": 0.04839388, "step": 13895 }, { "epoch": 27.792, "grad_norm": 1.9402941465377808, "learning_rate": 2e-05, "loss": 0.05492554, "step": 13896 }, { "epoch": 27.794, "grad_norm": 1.4402939081192017, "learning_rate": 2e-05, "loss": 0.05447104, "step": 13897 }, { "epoch": 27.796, "grad_norm": 2.334462881088257, "learning_rate": 2e-05, "loss": 0.05614328, "step": 13898 }, { "epoch": 27.798000000000002, "grad_norm": 1.5610952377319336, "learning_rate": 2e-05, "loss": 0.05484813, "step": 13899 }, { "epoch": 27.8, "grad_norm": 2.178652763366699, "learning_rate": 2e-05, "loss": 0.06258977, "step": 13900 }, { "epoch": 27.802, "grad_norm": 1.075310468673706, "learning_rate": 2e-05, "loss": 0.03375444, "step": 13901 }, { "epoch": 27.804, "grad_norm": 1.107140064239502, "learning_rate": 2e-05, "loss": 0.03990953, "step": 13902 }, { "epoch": 27.806, "grad_norm": 1.2043249607086182, "learning_rate": 2e-05, "loss": 0.05589031, "step": 13903 }, { "epoch": 27.808, "grad_norm": 3.252065658569336, "learning_rate": 2e-05, "loss": 0.04488713, "step": 13904 }, { "epoch": 27.81, "grad_norm": 1.4825037717819214, "learning_rate": 2e-05, "loss": 0.04118239, "step": 13905 }, { "epoch": 27.812, "grad_norm": 1.1587483882904053, "learning_rate": 2e-05, "loss": 0.03698997, "step": 13906 }, { "epoch": 27.814, "grad_norm": 1.0998228788375854, "learning_rate": 2e-05, "loss": 0.03384409, "step": 13907 }, { "epoch": 27.816, "grad_norm": 1.2784723043441772, "learning_rate": 2e-05, "loss": 0.04363027, "step": 13908 }, { "epoch": 27.818, "grad_norm": 1.6805578470230103, "learning_rate": 2e-05, "loss": 0.05093968, "step": 13909 }, { "epoch": 27.82, "grad_norm": 4.108209609985352, "learning_rate": 2e-05, "loss": 0.04699673, "step": 13910 }, { "epoch": 27.822, "grad_norm": 1.0773557424545288, "learning_rate": 2e-05, "loss": 0.03354843, "step": 13911 }, { "epoch": 27.824, "grad_norm": 2.1529176235198975, "learning_rate": 2e-05, "loss": 0.05770917, "step": 13912 }, { "epoch": 27.826, "grad_norm": 1.2121992111206055, "learning_rate": 2e-05, "loss": 0.04613957, "step": 13913 }, { "epoch": 27.828, "grad_norm": 1.3433780670166016, "learning_rate": 2e-05, "loss": 0.06019507, "step": 13914 }, { "epoch": 27.83, "grad_norm": 1.4100244045257568, "learning_rate": 2e-05, "loss": 0.04316228, "step": 13915 }, { "epoch": 27.832, "grad_norm": 2.21895694732666, "learning_rate": 2e-05, "loss": 0.04332146, "step": 13916 }, { "epoch": 27.834, "grad_norm": 1.132088303565979, "learning_rate": 2e-05, "loss": 0.03310034, "step": 13917 }, { "epoch": 27.836, "grad_norm": 0.9991299510002136, "learning_rate": 2e-05, "loss": 0.03843431, "step": 13918 }, { "epoch": 27.838, "grad_norm": 1.3979414701461792, "learning_rate": 2e-05, "loss": 0.04863296, "step": 13919 }, { "epoch": 27.84, "grad_norm": 1.2485167980194092, "learning_rate": 2e-05, "loss": 0.03016427, "step": 13920 }, { "epoch": 27.842, "grad_norm": 2.4594881534576416, "learning_rate": 2e-05, "loss": 0.0523196, "step": 13921 }, { "epoch": 27.844, "grad_norm": 1.169227957725525, "learning_rate": 2e-05, "loss": 0.04624384, "step": 13922 }, { "epoch": 27.846, "grad_norm": 1.015640377998352, "learning_rate": 2e-05, "loss": 0.03382251, "step": 13923 }, { "epoch": 27.848, "grad_norm": 1.3113818168640137, "learning_rate": 2e-05, "loss": 0.05841094, "step": 13924 }, { "epoch": 27.85, "grad_norm": 1.174110770225525, "learning_rate": 2e-05, "loss": 0.03625581, "step": 13925 }, { "epoch": 27.852, "grad_norm": 1.187264084815979, "learning_rate": 2e-05, "loss": 0.04166123, "step": 13926 }, { "epoch": 27.854, "grad_norm": 1.0158615112304688, "learning_rate": 2e-05, "loss": 0.0311013, "step": 13927 }, { "epoch": 27.856, "grad_norm": 1.158115267753601, "learning_rate": 2e-05, "loss": 0.03714382, "step": 13928 }, { "epoch": 27.858, "grad_norm": 1.1665765047073364, "learning_rate": 2e-05, "loss": 0.04791439, "step": 13929 }, { "epoch": 27.86, "grad_norm": 1.0975072383880615, "learning_rate": 2e-05, "loss": 0.05305796, "step": 13930 }, { "epoch": 27.862, "grad_norm": 1.2020257711410522, "learning_rate": 2e-05, "loss": 0.03673447, "step": 13931 }, { "epoch": 27.864, "grad_norm": 1.7321451902389526, "learning_rate": 2e-05, "loss": 0.03766854, "step": 13932 }, { "epoch": 27.866, "grad_norm": 1.0557488203048706, "learning_rate": 2e-05, "loss": 0.03491737, "step": 13933 }, { "epoch": 27.868, "grad_norm": 1.1876567602157593, "learning_rate": 2e-05, "loss": 0.02960133, "step": 13934 }, { "epoch": 27.87, "grad_norm": 1.1305729150772095, "learning_rate": 2e-05, "loss": 0.02485805, "step": 13935 }, { "epoch": 27.872, "grad_norm": 1.0962995290756226, "learning_rate": 2e-05, "loss": 0.05305071, "step": 13936 }, { "epoch": 27.874, "grad_norm": 1.6864302158355713, "learning_rate": 2e-05, "loss": 0.05625251, "step": 13937 }, { "epoch": 27.876, "grad_norm": 1.1173137426376343, "learning_rate": 2e-05, "loss": 0.04091568, "step": 13938 }, { "epoch": 27.878, "grad_norm": 1.3211218118667603, "learning_rate": 2e-05, "loss": 0.05462737, "step": 13939 }, { "epoch": 27.88, "grad_norm": 2.1169698238372803, "learning_rate": 2e-05, "loss": 0.07160946, "step": 13940 }, { "epoch": 27.882, "grad_norm": 2.254796028137207, "learning_rate": 2e-05, "loss": 0.05478616, "step": 13941 }, { "epoch": 27.884, "grad_norm": 1.0552836656570435, "learning_rate": 2e-05, "loss": 0.0460465, "step": 13942 }, { "epoch": 27.886, "grad_norm": 1.2649129629135132, "learning_rate": 2e-05, "loss": 0.05034371, "step": 13943 }, { "epoch": 27.888, "grad_norm": 1.4517078399658203, "learning_rate": 2e-05, "loss": 0.05885756, "step": 13944 }, { "epoch": 27.89, "grad_norm": 1.3323506116867065, "learning_rate": 2e-05, "loss": 0.03592032, "step": 13945 }, { "epoch": 27.892, "grad_norm": 1.004231333732605, "learning_rate": 2e-05, "loss": 0.03064884, "step": 13946 }, { "epoch": 27.894, "grad_norm": 3.4080281257629395, "learning_rate": 2e-05, "loss": 0.04444969, "step": 13947 }, { "epoch": 27.896, "grad_norm": 1.0597387552261353, "learning_rate": 2e-05, "loss": 0.05533614, "step": 13948 }, { "epoch": 27.898, "grad_norm": 3.37524151802063, "learning_rate": 2e-05, "loss": 0.03681152, "step": 13949 }, { "epoch": 27.9, "grad_norm": 1.2154083251953125, "learning_rate": 2e-05, "loss": 0.04092389, "step": 13950 }, { "epoch": 27.902, "grad_norm": 1.1490893363952637, "learning_rate": 2e-05, "loss": 0.04752996, "step": 13951 }, { "epoch": 27.904, "grad_norm": 2.7081265449523926, "learning_rate": 2e-05, "loss": 0.07245349, "step": 13952 }, { "epoch": 27.906, "grad_norm": 0.9910997748374939, "learning_rate": 2e-05, "loss": 0.03678518, "step": 13953 }, { "epoch": 27.908, "grad_norm": 1.374592900276184, "learning_rate": 2e-05, "loss": 0.05117041, "step": 13954 }, { "epoch": 27.91, "grad_norm": 1.7255548238754272, "learning_rate": 2e-05, "loss": 0.04001204, "step": 13955 }, { "epoch": 27.912, "grad_norm": 1.196602463722229, "learning_rate": 2e-05, "loss": 0.02974688, "step": 13956 }, { "epoch": 27.914, "grad_norm": 1.4762085676193237, "learning_rate": 2e-05, "loss": 0.04770838, "step": 13957 }, { "epoch": 27.916, "grad_norm": 1.137497067451477, "learning_rate": 2e-05, "loss": 0.04189302, "step": 13958 }, { "epoch": 27.918, "grad_norm": 1.2305675745010376, "learning_rate": 2e-05, "loss": 0.03438773, "step": 13959 }, { "epoch": 27.92, "grad_norm": 1.2307065725326538, "learning_rate": 2e-05, "loss": 0.0470115, "step": 13960 }, { "epoch": 27.922, "grad_norm": 1.3657495975494385, "learning_rate": 2e-05, "loss": 0.04345737, "step": 13961 }, { "epoch": 27.924, "grad_norm": 1.3622334003448486, "learning_rate": 2e-05, "loss": 0.04275557, "step": 13962 }, { "epoch": 27.926, "grad_norm": 1.8857320547103882, "learning_rate": 2e-05, "loss": 0.04838826, "step": 13963 }, { "epoch": 27.928, "grad_norm": 1.1448822021484375, "learning_rate": 2e-05, "loss": 0.04292282, "step": 13964 }, { "epoch": 27.93, "grad_norm": 1.3071253299713135, "learning_rate": 2e-05, "loss": 0.04572289, "step": 13965 }, { "epoch": 27.932, "grad_norm": 1.3378077745437622, "learning_rate": 2e-05, "loss": 0.04263856, "step": 13966 }, { "epoch": 27.934, "grad_norm": 1.6963642835617065, "learning_rate": 2e-05, "loss": 0.05341676, "step": 13967 }, { "epoch": 27.936, "grad_norm": 3.9375112056732178, "learning_rate": 2e-05, "loss": 0.05422161, "step": 13968 }, { "epoch": 27.938, "grad_norm": 1.3592469692230225, "learning_rate": 2e-05, "loss": 0.06483057, "step": 13969 }, { "epoch": 27.94, "grad_norm": 1.460677981376648, "learning_rate": 2e-05, "loss": 0.05142547, "step": 13970 }, { "epoch": 27.942, "grad_norm": 1.2273732423782349, "learning_rate": 2e-05, "loss": 0.03599922, "step": 13971 }, { "epoch": 27.944, "grad_norm": 1.2938038110733032, "learning_rate": 2e-05, "loss": 0.04321476, "step": 13972 }, { "epoch": 27.946, "grad_norm": 1.007976770401001, "learning_rate": 2e-05, "loss": 0.04248318, "step": 13973 }, { "epoch": 27.948, "grad_norm": 2.465160846710205, "learning_rate": 2e-05, "loss": 0.04337021, "step": 13974 }, { "epoch": 27.95, "grad_norm": 3.6507928371429443, "learning_rate": 2e-05, "loss": 0.06409168, "step": 13975 }, { "epoch": 27.951999999999998, "grad_norm": 1.0644190311431885, "learning_rate": 2e-05, "loss": 0.04959762, "step": 13976 }, { "epoch": 27.954, "grad_norm": 1.2026413679122925, "learning_rate": 2e-05, "loss": 0.04680315, "step": 13977 }, { "epoch": 27.956, "grad_norm": 0.87662672996521, "learning_rate": 2e-05, "loss": 0.03997586, "step": 13978 }, { "epoch": 27.958, "grad_norm": 1.1587470769882202, "learning_rate": 2e-05, "loss": 0.0403148, "step": 13979 }, { "epoch": 27.96, "grad_norm": 1.5906647443771362, "learning_rate": 2e-05, "loss": 0.05821125, "step": 13980 }, { "epoch": 27.962, "grad_norm": 2.6488704681396484, "learning_rate": 2e-05, "loss": 0.05198992, "step": 13981 }, { "epoch": 27.964, "grad_norm": 1.068997859954834, "learning_rate": 2e-05, "loss": 0.02909309, "step": 13982 }, { "epoch": 27.966, "grad_norm": 1.0853064060211182, "learning_rate": 2e-05, "loss": 0.03459357, "step": 13983 }, { "epoch": 27.968, "grad_norm": 1.227805733680725, "learning_rate": 2e-05, "loss": 0.04584693, "step": 13984 }, { "epoch": 27.97, "grad_norm": 1.665924072265625, "learning_rate": 2e-05, "loss": 0.04428935, "step": 13985 }, { "epoch": 27.972, "grad_norm": 1.8217002153396606, "learning_rate": 2e-05, "loss": 0.04603599, "step": 13986 }, { "epoch": 27.974, "grad_norm": 1.1141926050186157, "learning_rate": 2e-05, "loss": 0.03780417, "step": 13987 }, { "epoch": 27.976, "grad_norm": 1.0562472343444824, "learning_rate": 2e-05, "loss": 0.04129979, "step": 13988 }, { "epoch": 27.978, "grad_norm": 1.1564449071884155, "learning_rate": 2e-05, "loss": 0.05021477, "step": 13989 }, { "epoch": 27.98, "grad_norm": 1.0061001777648926, "learning_rate": 2e-05, "loss": 0.03430205, "step": 13990 }, { "epoch": 27.982, "grad_norm": 3.191288709640503, "learning_rate": 2e-05, "loss": 0.04336466, "step": 13991 }, { "epoch": 27.984, "grad_norm": 1.3708455562591553, "learning_rate": 2e-05, "loss": 0.03357625, "step": 13992 }, { "epoch": 27.986, "grad_norm": 1.410815954208374, "learning_rate": 2e-05, "loss": 0.03761749, "step": 13993 }, { "epoch": 27.988, "grad_norm": 1.0445529222488403, "learning_rate": 2e-05, "loss": 0.03375229, "step": 13994 }, { "epoch": 27.99, "grad_norm": 1.9780248403549194, "learning_rate": 2e-05, "loss": 0.05313461, "step": 13995 }, { "epoch": 27.992, "grad_norm": 1.082139492034912, "learning_rate": 2e-05, "loss": 0.04322237, "step": 13996 }, { "epoch": 27.994, "grad_norm": 1.701316475868225, "learning_rate": 2e-05, "loss": 0.04349414, "step": 13997 }, { "epoch": 27.996, "grad_norm": 1.5152575969696045, "learning_rate": 2e-05, "loss": 0.04271465, "step": 13998 }, { "epoch": 27.998, "grad_norm": 1.3154436349868774, "learning_rate": 2e-05, "loss": 0.04630833, "step": 13999 }, { "epoch": 28.0, "grad_norm": 1.4491382837295532, "learning_rate": 2e-05, "loss": 0.05390036, "step": 14000 }, { "epoch": 28.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.988, "Perpendicular_3": 0.8677354709418837, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9876666666666667, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 319.0556, "eval_samples_per_second": 32.91, "eval_steps_per_second": 0.658, "step": 14000 }, { "epoch": 28.002, "grad_norm": 1.4470635652542114, "learning_rate": 2e-05, "loss": 0.03747822, "step": 14001 }, { "epoch": 28.004, "grad_norm": 1.0949195623397827, "learning_rate": 2e-05, "loss": 0.04181771, "step": 14002 }, { "epoch": 28.006, "grad_norm": 1.7039893865585327, "learning_rate": 2e-05, "loss": 0.04626131, "step": 14003 }, { "epoch": 28.008, "grad_norm": 2.289400100708008, "learning_rate": 2e-05, "loss": 0.05722143, "step": 14004 }, { "epoch": 28.01, "grad_norm": 1.7248917818069458, "learning_rate": 2e-05, "loss": 0.04162604, "step": 14005 }, { "epoch": 28.012, "grad_norm": 1.235735297203064, "learning_rate": 2e-05, "loss": 0.05281135, "step": 14006 }, { "epoch": 28.014, "grad_norm": 1.0774388313293457, "learning_rate": 2e-05, "loss": 0.03179125, "step": 14007 }, { "epoch": 28.016, "grad_norm": 1.8190655708312988, "learning_rate": 2e-05, "loss": 0.04784362, "step": 14008 }, { "epoch": 28.018, "grad_norm": 1.644675612449646, "learning_rate": 2e-05, "loss": 0.04483693, "step": 14009 }, { "epoch": 28.02, "grad_norm": 1.27074134349823, "learning_rate": 2e-05, "loss": 0.05767696, "step": 14010 }, { "epoch": 28.022, "grad_norm": 1.3586097955703735, "learning_rate": 2e-05, "loss": 0.03288449, "step": 14011 }, { "epoch": 28.024, "grad_norm": 0.9760823845863342, "learning_rate": 2e-05, "loss": 0.03223011, "step": 14012 }, { "epoch": 28.026, "grad_norm": 1.9947422742843628, "learning_rate": 2e-05, "loss": 0.06774095, "step": 14013 }, { "epoch": 28.028, "grad_norm": 1.3853563070297241, "learning_rate": 2e-05, "loss": 0.03108654, "step": 14014 }, { "epoch": 28.03, "grad_norm": 1.166358470916748, "learning_rate": 2e-05, "loss": 0.04371829, "step": 14015 }, { "epoch": 28.032, "grad_norm": 1.3257215023040771, "learning_rate": 2e-05, "loss": 0.05647626, "step": 14016 }, { "epoch": 28.034, "grad_norm": 1.8312715291976929, "learning_rate": 2e-05, "loss": 0.04174164, "step": 14017 }, { "epoch": 28.036, "grad_norm": 1.1713063716888428, "learning_rate": 2e-05, "loss": 0.04272055, "step": 14018 }, { "epoch": 28.038, "grad_norm": 1.022841453552246, "learning_rate": 2e-05, "loss": 0.04106878, "step": 14019 }, { "epoch": 28.04, "grad_norm": 1.123375654220581, "learning_rate": 2e-05, "loss": 0.04132652, "step": 14020 }, { "epoch": 28.042, "grad_norm": 0.9225612878799438, "learning_rate": 2e-05, "loss": 0.03926476, "step": 14021 }, { "epoch": 28.044, "grad_norm": 1.3327795267105103, "learning_rate": 2e-05, "loss": 0.04138635, "step": 14022 }, { "epoch": 28.046, "grad_norm": 1.0123158693313599, "learning_rate": 2e-05, "loss": 0.03201216, "step": 14023 }, { "epoch": 28.048, "grad_norm": 1.0678902864456177, "learning_rate": 2e-05, "loss": 0.03800803, "step": 14024 }, { "epoch": 28.05, "grad_norm": 1.6009563207626343, "learning_rate": 2e-05, "loss": 0.05565003, "step": 14025 }, { "epoch": 28.052, "grad_norm": 1.7109339237213135, "learning_rate": 2e-05, "loss": 0.03978261, "step": 14026 }, { "epoch": 28.054, "grad_norm": 0.9139453172683716, "learning_rate": 2e-05, "loss": 0.02852735, "step": 14027 }, { "epoch": 28.056, "grad_norm": 1.2070707082748413, "learning_rate": 2e-05, "loss": 0.04159069, "step": 14028 }, { "epoch": 28.058, "grad_norm": 1.1406387090682983, "learning_rate": 2e-05, "loss": 0.03497103, "step": 14029 }, { "epoch": 28.06, "grad_norm": 1.5811642408370972, "learning_rate": 2e-05, "loss": 0.04424217, "step": 14030 }, { "epoch": 28.062, "grad_norm": 2.008183479309082, "learning_rate": 2e-05, "loss": 0.0522799, "step": 14031 }, { "epoch": 28.064, "grad_norm": 1.3083295822143555, "learning_rate": 2e-05, "loss": 0.04245078, "step": 14032 }, { "epoch": 28.066, "grad_norm": 1.156947135925293, "learning_rate": 2e-05, "loss": 0.04440115, "step": 14033 }, { "epoch": 28.068, "grad_norm": 1.2095609903335571, "learning_rate": 2e-05, "loss": 0.0388485, "step": 14034 }, { "epoch": 28.07, "grad_norm": 1.2254390716552734, "learning_rate": 2e-05, "loss": 0.03347994, "step": 14035 }, { "epoch": 28.072, "grad_norm": 1.1406832933425903, "learning_rate": 2e-05, "loss": 0.04151241, "step": 14036 }, { "epoch": 28.074, "grad_norm": 1.8342012166976929, "learning_rate": 2e-05, "loss": 0.0339776, "step": 14037 }, { "epoch": 28.076, "grad_norm": 1.5849714279174805, "learning_rate": 2e-05, "loss": 0.04472189, "step": 14038 }, { "epoch": 28.078, "grad_norm": 1.7151453495025635, "learning_rate": 2e-05, "loss": 0.04204217, "step": 14039 }, { "epoch": 28.08, "grad_norm": 1.2614927291870117, "learning_rate": 2e-05, "loss": 0.04573628, "step": 14040 }, { "epoch": 28.082, "grad_norm": 0.9816139936447144, "learning_rate": 2e-05, "loss": 0.04488077, "step": 14041 }, { "epoch": 28.084, "grad_norm": 1.832466959953308, "learning_rate": 2e-05, "loss": 0.03606717, "step": 14042 }, { "epoch": 28.086, "grad_norm": 1.1039230823516846, "learning_rate": 2e-05, "loss": 0.03455424, "step": 14043 }, { "epoch": 28.088, "grad_norm": 1.3150477409362793, "learning_rate": 2e-05, "loss": 0.03564423, "step": 14044 }, { "epoch": 28.09, "grad_norm": 1.4359805583953857, "learning_rate": 2e-05, "loss": 0.04819302, "step": 14045 }, { "epoch": 28.092, "grad_norm": 1.6399556398391724, "learning_rate": 2e-05, "loss": 0.06461568, "step": 14046 }, { "epoch": 28.094, "grad_norm": 1.261181116104126, "learning_rate": 2e-05, "loss": 0.03727288, "step": 14047 }, { "epoch": 28.096, "grad_norm": 1.3191063404083252, "learning_rate": 2e-05, "loss": 0.05051003, "step": 14048 }, { "epoch": 28.098, "grad_norm": 2.9893617630004883, "learning_rate": 2e-05, "loss": 0.04233899, "step": 14049 }, { "epoch": 28.1, "grad_norm": 1.1125555038452148, "learning_rate": 2e-05, "loss": 0.03035634, "step": 14050 }, { "epoch": 28.102, "grad_norm": 1.1988699436187744, "learning_rate": 2e-05, "loss": 0.0434362, "step": 14051 }, { "epoch": 28.104, "grad_norm": 1.1385446786880493, "learning_rate": 2e-05, "loss": 0.04272396, "step": 14052 }, { "epoch": 28.106, "grad_norm": 1.5576671361923218, "learning_rate": 2e-05, "loss": 0.04542271, "step": 14053 }, { "epoch": 28.108, "grad_norm": 0.9911758303642273, "learning_rate": 2e-05, "loss": 0.02662791, "step": 14054 }, { "epoch": 28.11, "grad_norm": 1.0654523372650146, "learning_rate": 2e-05, "loss": 0.02663458, "step": 14055 }, { "epoch": 28.112, "grad_norm": 2.447866916656494, "learning_rate": 2e-05, "loss": 0.06156901, "step": 14056 }, { "epoch": 28.114, "grad_norm": 1.5394794940948486, "learning_rate": 2e-05, "loss": 0.03734413, "step": 14057 }, { "epoch": 28.116, "grad_norm": 1.2493864297866821, "learning_rate": 2e-05, "loss": 0.04325924, "step": 14058 }, { "epoch": 28.118, "grad_norm": 1.0674172639846802, "learning_rate": 2e-05, "loss": 0.03373833, "step": 14059 }, { "epoch": 28.12, "grad_norm": 1.6935789585113525, "learning_rate": 2e-05, "loss": 0.06536911, "step": 14060 }, { "epoch": 28.122, "grad_norm": 1.346823811531067, "learning_rate": 2e-05, "loss": 0.03599031, "step": 14061 }, { "epoch": 28.124, "grad_norm": 1.2584784030914307, "learning_rate": 2e-05, "loss": 0.04426415, "step": 14062 }, { "epoch": 28.126, "grad_norm": 1.5686087608337402, "learning_rate": 2e-05, "loss": 0.03532685, "step": 14063 }, { "epoch": 28.128, "grad_norm": 1.0472480058670044, "learning_rate": 2e-05, "loss": 0.02852079, "step": 14064 }, { "epoch": 28.13, "grad_norm": 1.4226044416427612, "learning_rate": 2e-05, "loss": 0.05019859, "step": 14065 }, { "epoch": 28.132, "grad_norm": 1.4346106052398682, "learning_rate": 2e-05, "loss": 0.05388649, "step": 14066 }, { "epoch": 28.134, "grad_norm": 2.281315803527832, "learning_rate": 2e-05, "loss": 0.0545974, "step": 14067 }, { "epoch": 28.136, "grad_norm": 1.2159454822540283, "learning_rate": 2e-05, "loss": 0.03459051, "step": 14068 }, { "epoch": 28.138, "grad_norm": 1.2951035499572754, "learning_rate": 2e-05, "loss": 0.0595167, "step": 14069 }, { "epoch": 28.14, "grad_norm": 1.1814157962799072, "learning_rate": 2e-05, "loss": 0.03543475, "step": 14070 }, { "epoch": 28.142, "grad_norm": 1.0047551393508911, "learning_rate": 2e-05, "loss": 0.03344669, "step": 14071 }, { "epoch": 28.144, "grad_norm": 0.9211133718490601, "learning_rate": 2e-05, "loss": 0.03054952, "step": 14072 }, { "epoch": 28.146, "grad_norm": 0.922265887260437, "learning_rate": 2e-05, "loss": 0.03004513, "step": 14073 }, { "epoch": 28.148, "grad_norm": 1.5452007055282593, "learning_rate": 2e-05, "loss": 0.0379502, "step": 14074 }, { "epoch": 28.15, "grad_norm": 1.1936745643615723, "learning_rate": 2e-05, "loss": 0.0383325, "step": 14075 }, { "epoch": 28.152, "grad_norm": 0.9541253447532654, "learning_rate": 2e-05, "loss": 0.03261286, "step": 14076 }, { "epoch": 28.154, "grad_norm": 1.3197306394577026, "learning_rate": 2e-05, "loss": 0.05566575, "step": 14077 }, { "epoch": 28.156, "grad_norm": 1.5139483213424683, "learning_rate": 2e-05, "loss": 0.04211396, "step": 14078 }, { "epoch": 28.158, "grad_norm": 2.144613742828369, "learning_rate": 2e-05, "loss": 0.05478022, "step": 14079 }, { "epoch": 28.16, "grad_norm": 1.2877733707427979, "learning_rate": 2e-05, "loss": 0.04739144, "step": 14080 }, { "epoch": 28.162, "grad_norm": 0.9685249924659729, "learning_rate": 2e-05, "loss": 0.02751518, "step": 14081 }, { "epoch": 28.164, "grad_norm": 1.1108578443527222, "learning_rate": 2e-05, "loss": 0.04293613, "step": 14082 }, { "epoch": 28.166, "grad_norm": 0.9321027994155884, "learning_rate": 2e-05, "loss": 0.03453288, "step": 14083 }, { "epoch": 28.168, "grad_norm": 1.8604676723480225, "learning_rate": 2e-05, "loss": 0.03813556, "step": 14084 }, { "epoch": 28.17, "grad_norm": 1.3579093217849731, "learning_rate": 2e-05, "loss": 0.03601374, "step": 14085 }, { "epoch": 28.172, "grad_norm": 1.3795456886291504, "learning_rate": 2e-05, "loss": 0.05737968, "step": 14086 }, { "epoch": 28.174, "grad_norm": 1.3870776891708374, "learning_rate": 2e-05, "loss": 0.03918634, "step": 14087 }, { "epoch": 28.176, "grad_norm": 1.5775760412216187, "learning_rate": 2e-05, "loss": 0.04204413, "step": 14088 }, { "epoch": 28.178, "grad_norm": 1.3750708103179932, "learning_rate": 2e-05, "loss": 0.03528126, "step": 14089 }, { "epoch": 28.18, "grad_norm": 1.1607511043548584, "learning_rate": 2e-05, "loss": 0.0513554, "step": 14090 }, { "epoch": 28.182, "grad_norm": 1.7518668174743652, "learning_rate": 2e-05, "loss": 0.05958167, "step": 14091 }, { "epoch": 28.184, "grad_norm": 0.8187534213066101, "learning_rate": 2e-05, "loss": 0.02375086, "step": 14092 }, { "epoch": 28.186, "grad_norm": 1.1586307287216187, "learning_rate": 2e-05, "loss": 0.03455898, "step": 14093 }, { "epoch": 28.188, "grad_norm": 2.6694910526275635, "learning_rate": 2e-05, "loss": 0.04185054, "step": 14094 }, { "epoch": 28.19, "grad_norm": 1.0022153854370117, "learning_rate": 2e-05, "loss": 0.03717811, "step": 14095 }, { "epoch": 28.192, "grad_norm": 1.133339762687683, "learning_rate": 2e-05, "loss": 0.02898293, "step": 14096 }, { "epoch": 28.194, "grad_norm": 0.9910487532615662, "learning_rate": 2e-05, "loss": 0.02521181, "step": 14097 }, { "epoch": 28.196, "grad_norm": 1.2302407026290894, "learning_rate": 2e-05, "loss": 0.04486918, "step": 14098 }, { "epoch": 28.198, "grad_norm": 1.398140788078308, "learning_rate": 2e-05, "loss": 0.02154693, "step": 14099 }, { "epoch": 28.2, "grad_norm": 2.0596702098846436, "learning_rate": 2e-05, "loss": 0.03849518, "step": 14100 }, { "epoch": 28.202, "grad_norm": 1.3504750728607178, "learning_rate": 2e-05, "loss": 0.0484315, "step": 14101 }, { "epoch": 28.204, "grad_norm": 1.0688234567642212, "learning_rate": 2e-05, "loss": 0.0351585, "step": 14102 }, { "epoch": 28.206, "grad_norm": 1.4983340501785278, "learning_rate": 2e-05, "loss": 0.04929278, "step": 14103 }, { "epoch": 28.208, "grad_norm": 1.1455981731414795, "learning_rate": 2e-05, "loss": 0.04318264, "step": 14104 }, { "epoch": 28.21, "grad_norm": 1.3201831579208374, "learning_rate": 2e-05, "loss": 0.03962881, "step": 14105 }, { "epoch": 28.212, "grad_norm": 1.7087925672531128, "learning_rate": 2e-05, "loss": 0.05958664, "step": 14106 }, { "epoch": 28.214, "grad_norm": 1.799774169921875, "learning_rate": 2e-05, "loss": 0.04615483, "step": 14107 }, { "epoch": 28.216, "grad_norm": 2.0174641609191895, "learning_rate": 2e-05, "loss": 0.03510554, "step": 14108 }, { "epoch": 28.218, "grad_norm": 1.7066216468811035, "learning_rate": 2e-05, "loss": 0.04433943, "step": 14109 }, { "epoch": 28.22, "grad_norm": 1.1175912618637085, "learning_rate": 2e-05, "loss": 0.04259259, "step": 14110 }, { "epoch": 28.222, "grad_norm": 1.3678927421569824, "learning_rate": 2e-05, "loss": 0.0313809, "step": 14111 }, { "epoch": 28.224, "grad_norm": 1.330997109413147, "learning_rate": 2e-05, "loss": 0.03561566, "step": 14112 }, { "epoch": 28.226, "grad_norm": 1.0542383193969727, "learning_rate": 2e-05, "loss": 0.03285637, "step": 14113 }, { "epoch": 28.228, "grad_norm": 1.6406633853912354, "learning_rate": 2e-05, "loss": 0.03652732, "step": 14114 }, { "epoch": 28.23, "grad_norm": 1.4692353010177612, "learning_rate": 2e-05, "loss": 0.04664152, "step": 14115 }, { "epoch": 28.232, "grad_norm": 3.2253143787384033, "learning_rate": 2e-05, "loss": 0.052161, "step": 14116 }, { "epoch": 28.234, "grad_norm": 1.0887095928192139, "learning_rate": 2e-05, "loss": 0.03422359, "step": 14117 }, { "epoch": 28.236, "grad_norm": 1.4389132261276245, "learning_rate": 2e-05, "loss": 0.0335054, "step": 14118 }, { "epoch": 28.238, "grad_norm": 1.1777735948562622, "learning_rate": 2e-05, "loss": 0.0480193, "step": 14119 }, { "epoch": 28.24, "grad_norm": 1.2068471908569336, "learning_rate": 2e-05, "loss": 0.03690059, "step": 14120 }, { "epoch": 28.242, "grad_norm": 0.9290897846221924, "learning_rate": 2e-05, "loss": 0.03334764, "step": 14121 }, { "epoch": 28.244, "grad_norm": 1.0152655839920044, "learning_rate": 2e-05, "loss": 0.04105952, "step": 14122 }, { "epoch": 28.246, "grad_norm": 1.3126530647277832, "learning_rate": 2e-05, "loss": 0.03845873, "step": 14123 }, { "epoch": 28.248, "grad_norm": 1.0583008527755737, "learning_rate": 2e-05, "loss": 0.02750574, "step": 14124 }, { "epoch": 28.25, "grad_norm": 1.3489996194839478, "learning_rate": 2e-05, "loss": 0.03019518, "step": 14125 }, { "epoch": 28.252, "grad_norm": 1.9256123304367065, "learning_rate": 2e-05, "loss": 0.0618353, "step": 14126 }, { "epoch": 28.254, "grad_norm": 1.2744817733764648, "learning_rate": 2e-05, "loss": 0.05729286, "step": 14127 }, { "epoch": 28.256, "grad_norm": 1.2284598350524902, "learning_rate": 2e-05, "loss": 0.0571045, "step": 14128 }, { "epoch": 28.258, "grad_norm": 1.3543322086334229, "learning_rate": 2e-05, "loss": 0.04329966, "step": 14129 }, { "epoch": 28.26, "grad_norm": 1.3076565265655518, "learning_rate": 2e-05, "loss": 0.04858845, "step": 14130 }, { "epoch": 28.262, "grad_norm": 1.8700453042984009, "learning_rate": 2e-05, "loss": 0.06022802, "step": 14131 }, { "epoch": 28.264, "grad_norm": 1.6746340990066528, "learning_rate": 2e-05, "loss": 0.05681061, "step": 14132 }, { "epoch": 28.266, "grad_norm": 1.4257715940475464, "learning_rate": 2e-05, "loss": 0.04958214, "step": 14133 }, { "epoch": 28.268, "grad_norm": 1.2631217241287231, "learning_rate": 2e-05, "loss": 0.03757473, "step": 14134 }, { "epoch": 28.27, "grad_norm": 1.4865784645080566, "learning_rate": 2e-05, "loss": 0.0356946, "step": 14135 }, { "epoch": 28.272, "grad_norm": 1.2895032167434692, "learning_rate": 2e-05, "loss": 0.03698609, "step": 14136 }, { "epoch": 28.274, "grad_norm": 1.0424330234527588, "learning_rate": 2e-05, "loss": 0.03331292, "step": 14137 }, { "epoch": 28.276, "grad_norm": 0.9681985378265381, "learning_rate": 2e-05, "loss": 0.02686777, "step": 14138 }, { "epoch": 28.278, "grad_norm": 1.1451689004898071, "learning_rate": 2e-05, "loss": 0.04804129, "step": 14139 }, { "epoch": 28.28, "grad_norm": 1.2859004735946655, "learning_rate": 2e-05, "loss": 0.03937485, "step": 14140 }, { "epoch": 28.282, "grad_norm": 1.9779037237167358, "learning_rate": 2e-05, "loss": 0.04771383, "step": 14141 }, { "epoch": 28.284, "grad_norm": 1.6972112655639648, "learning_rate": 2e-05, "loss": 0.04609143, "step": 14142 }, { "epoch": 28.286, "grad_norm": 1.5024956464767456, "learning_rate": 2e-05, "loss": 0.04176302, "step": 14143 }, { "epoch": 28.288, "grad_norm": 2.7473392486572266, "learning_rate": 2e-05, "loss": 0.06857879, "step": 14144 }, { "epoch": 28.29, "grad_norm": 1.476321816444397, "learning_rate": 2e-05, "loss": 0.04816295, "step": 14145 }, { "epoch": 28.292, "grad_norm": 1.9630745649337769, "learning_rate": 2e-05, "loss": 0.04113162, "step": 14146 }, { "epoch": 28.294, "grad_norm": 1.5762404203414917, "learning_rate": 2e-05, "loss": 0.04521403, "step": 14147 }, { "epoch": 28.296, "grad_norm": 1.8791242837905884, "learning_rate": 2e-05, "loss": 0.05574446, "step": 14148 }, { "epoch": 28.298, "grad_norm": 1.3232239484786987, "learning_rate": 2e-05, "loss": 0.04592151, "step": 14149 }, { "epoch": 28.3, "grad_norm": 1.0860222578048706, "learning_rate": 2e-05, "loss": 0.03201285, "step": 14150 }, { "epoch": 28.302, "grad_norm": 1.1450574398040771, "learning_rate": 2e-05, "loss": 0.0431738, "step": 14151 }, { "epoch": 28.304, "grad_norm": 1.2073911428451538, "learning_rate": 2e-05, "loss": 0.04106464, "step": 14152 }, { "epoch": 28.306, "grad_norm": 2.2250092029571533, "learning_rate": 2e-05, "loss": 0.03272331, "step": 14153 }, { "epoch": 28.308, "grad_norm": 1.2592679262161255, "learning_rate": 2e-05, "loss": 0.03767652, "step": 14154 }, { "epoch": 28.31, "grad_norm": 1.481679081916809, "learning_rate": 2e-05, "loss": 0.04322655, "step": 14155 }, { "epoch": 28.312, "grad_norm": 1.8476687669754028, "learning_rate": 2e-05, "loss": 0.02506692, "step": 14156 }, { "epoch": 28.314, "grad_norm": 2.533881664276123, "learning_rate": 2e-05, "loss": 0.05798201, "step": 14157 }, { "epoch": 28.316, "grad_norm": 1.16340172290802, "learning_rate": 2e-05, "loss": 0.04741621, "step": 14158 }, { "epoch": 28.318, "grad_norm": 1.0567313432693481, "learning_rate": 2e-05, "loss": 0.03934213, "step": 14159 }, { "epoch": 28.32, "grad_norm": 1.8486943244934082, "learning_rate": 2e-05, "loss": 0.04713926, "step": 14160 }, { "epoch": 28.322, "grad_norm": 1.489852786064148, "learning_rate": 2e-05, "loss": 0.0400935, "step": 14161 }, { "epoch": 28.324, "grad_norm": 3.173983097076416, "learning_rate": 2e-05, "loss": 0.05666588, "step": 14162 }, { "epoch": 28.326, "grad_norm": 1.2710679769515991, "learning_rate": 2e-05, "loss": 0.04324594, "step": 14163 }, { "epoch": 28.328, "grad_norm": 2.056770086288452, "learning_rate": 2e-05, "loss": 0.0498774, "step": 14164 }, { "epoch": 28.33, "grad_norm": 1.194877028465271, "learning_rate": 2e-05, "loss": 0.033945, "step": 14165 }, { "epoch": 28.332, "grad_norm": 1.5242279767990112, "learning_rate": 2e-05, "loss": 0.04730607, "step": 14166 }, { "epoch": 28.334, "grad_norm": 1.2096294164657593, "learning_rate": 2e-05, "loss": 0.04421799, "step": 14167 }, { "epoch": 28.336, "grad_norm": 2.0177338123321533, "learning_rate": 2e-05, "loss": 0.05057753, "step": 14168 }, { "epoch": 28.338, "grad_norm": 1.4078340530395508, "learning_rate": 2e-05, "loss": 0.04942624, "step": 14169 }, { "epoch": 28.34, "grad_norm": 2.0044713020324707, "learning_rate": 2e-05, "loss": 0.05860762, "step": 14170 }, { "epoch": 28.342, "grad_norm": 1.4696686267852783, "learning_rate": 2e-05, "loss": 0.05207686, "step": 14171 }, { "epoch": 28.344, "grad_norm": 1.4194014072418213, "learning_rate": 2e-05, "loss": 0.03678628, "step": 14172 }, { "epoch": 28.346, "grad_norm": 1.3729615211486816, "learning_rate": 2e-05, "loss": 0.07338053, "step": 14173 }, { "epoch": 28.348, "grad_norm": 1.4968407154083252, "learning_rate": 2e-05, "loss": 0.05206795, "step": 14174 }, { "epoch": 28.35, "grad_norm": 0.9872764348983765, "learning_rate": 2e-05, "loss": 0.03436659, "step": 14175 }, { "epoch": 28.352, "grad_norm": 1.0700467824935913, "learning_rate": 2e-05, "loss": 0.02983767, "step": 14176 }, { "epoch": 28.354, "grad_norm": 1.9751871824264526, "learning_rate": 2e-05, "loss": 0.04803991, "step": 14177 }, { "epoch": 28.356, "grad_norm": 1.7464139461517334, "learning_rate": 2e-05, "loss": 0.03985727, "step": 14178 }, { "epoch": 28.358, "grad_norm": 1.93489670753479, "learning_rate": 2e-05, "loss": 0.03951417, "step": 14179 }, { "epoch": 28.36, "grad_norm": 1.8252264261245728, "learning_rate": 2e-05, "loss": 0.05627768, "step": 14180 }, { "epoch": 28.362, "grad_norm": 1.957201361656189, "learning_rate": 2e-05, "loss": 0.03644243, "step": 14181 }, { "epoch": 28.364, "grad_norm": 1.244791865348816, "learning_rate": 2e-05, "loss": 0.05136299, "step": 14182 }, { "epoch": 28.366, "grad_norm": 1.722070336341858, "learning_rate": 2e-05, "loss": 0.05756142, "step": 14183 }, { "epoch": 28.368, "grad_norm": 1.8835808038711548, "learning_rate": 2e-05, "loss": 0.05007068, "step": 14184 }, { "epoch": 28.37, "grad_norm": 1.2348777055740356, "learning_rate": 2e-05, "loss": 0.03819226, "step": 14185 }, { "epoch": 28.372, "grad_norm": 1.257193684577942, "learning_rate": 2e-05, "loss": 0.05539931, "step": 14186 }, { "epoch": 28.374, "grad_norm": 2.638650894165039, "learning_rate": 2e-05, "loss": 0.03770338, "step": 14187 }, { "epoch": 28.376, "grad_norm": 1.6504069566726685, "learning_rate": 2e-05, "loss": 0.05300485, "step": 14188 }, { "epoch": 28.378, "grad_norm": 1.1879500150680542, "learning_rate": 2e-05, "loss": 0.04724121, "step": 14189 }, { "epoch": 28.38, "grad_norm": 1.1239627599716187, "learning_rate": 2e-05, "loss": 0.04006843, "step": 14190 }, { "epoch": 28.382, "grad_norm": 1.3962266445159912, "learning_rate": 2e-05, "loss": 0.04130918, "step": 14191 }, { "epoch": 28.384, "grad_norm": 1.0262913703918457, "learning_rate": 2e-05, "loss": 0.03978799, "step": 14192 }, { "epoch": 28.386, "grad_norm": 1.3596129417419434, "learning_rate": 2e-05, "loss": 0.06046655, "step": 14193 }, { "epoch": 28.388, "grad_norm": 1.1571170091629028, "learning_rate": 2e-05, "loss": 0.05023224, "step": 14194 }, { "epoch": 28.39, "grad_norm": 1.3431873321533203, "learning_rate": 2e-05, "loss": 0.05262809, "step": 14195 }, { "epoch": 28.392, "grad_norm": 2.0367238521575928, "learning_rate": 2e-05, "loss": 0.04556445, "step": 14196 }, { "epoch": 28.394, "grad_norm": 1.3604267835617065, "learning_rate": 2e-05, "loss": 0.04495424, "step": 14197 }, { "epoch": 28.396, "grad_norm": 1.0701876878738403, "learning_rate": 2e-05, "loss": 0.03906007, "step": 14198 }, { "epoch": 28.398, "grad_norm": 1.1933096647262573, "learning_rate": 2e-05, "loss": 0.05860349, "step": 14199 }, { "epoch": 28.4, "grad_norm": 1.3504351377487183, "learning_rate": 2e-05, "loss": 0.04702557, "step": 14200 }, { "epoch": 28.402, "grad_norm": 1.2822675704956055, "learning_rate": 2e-05, "loss": 0.03257849, "step": 14201 }, { "epoch": 28.404, "grad_norm": 1.6527397632598877, "learning_rate": 2e-05, "loss": 0.05858765, "step": 14202 }, { "epoch": 28.406, "grad_norm": 1.0486624240875244, "learning_rate": 2e-05, "loss": 0.03177363, "step": 14203 }, { "epoch": 28.408, "grad_norm": 1.128495216369629, "learning_rate": 2e-05, "loss": 0.04799465, "step": 14204 }, { "epoch": 28.41, "grad_norm": 1.0047136545181274, "learning_rate": 2e-05, "loss": 0.03385249, "step": 14205 }, { "epoch": 28.412, "grad_norm": 1.4099255800247192, "learning_rate": 2e-05, "loss": 0.0562316, "step": 14206 }, { "epoch": 28.414, "grad_norm": 2.193967580795288, "learning_rate": 2e-05, "loss": 0.07079425, "step": 14207 }, { "epoch": 28.416, "grad_norm": 1.1035521030426025, "learning_rate": 2e-05, "loss": 0.04856515, "step": 14208 }, { "epoch": 28.418, "grad_norm": 1.315811038017273, "learning_rate": 2e-05, "loss": 0.03866019, "step": 14209 }, { "epoch": 28.42, "grad_norm": 1.1560747623443604, "learning_rate": 2e-05, "loss": 0.04573543, "step": 14210 }, { "epoch": 28.422, "grad_norm": 1.3213006258010864, "learning_rate": 2e-05, "loss": 0.04729943, "step": 14211 }, { "epoch": 28.424, "grad_norm": 1.467768907546997, "learning_rate": 2e-05, "loss": 0.0497423, "step": 14212 }, { "epoch": 28.426, "grad_norm": 1.0412020683288574, "learning_rate": 2e-05, "loss": 0.03104374, "step": 14213 }, { "epoch": 28.428, "grad_norm": 1.289223074913025, "learning_rate": 2e-05, "loss": 0.04878268, "step": 14214 }, { "epoch": 28.43, "grad_norm": 1.0868279933929443, "learning_rate": 2e-05, "loss": 0.04009105, "step": 14215 }, { "epoch": 28.432, "grad_norm": 1.506838321685791, "learning_rate": 2e-05, "loss": 0.04779705, "step": 14216 }, { "epoch": 28.434, "grad_norm": 1.3598910570144653, "learning_rate": 2e-05, "loss": 0.0399574, "step": 14217 }, { "epoch": 28.436, "grad_norm": 1.165126085281372, "learning_rate": 2e-05, "loss": 0.03589812, "step": 14218 }, { "epoch": 28.438, "grad_norm": 1.403108835220337, "learning_rate": 2e-05, "loss": 0.03950584, "step": 14219 }, { "epoch": 28.44, "grad_norm": 0.9602160453796387, "learning_rate": 2e-05, "loss": 0.0343546, "step": 14220 }, { "epoch": 28.442, "grad_norm": 1.707650065422058, "learning_rate": 2e-05, "loss": 0.04669677, "step": 14221 }, { "epoch": 28.444, "grad_norm": 1.5494041442871094, "learning_rate": 2e-05, "loss": 0.05189268, "step": 14222 }, { "epoch": 28.446, "grad_norm": 1.4422967433929443, "learning_rate": 2e-05, "loss": 0.04455651, "step": 14223 }, { "epoch": 28.448, "grad_norm": 2.1481945514678955, "learning_rate": 2e-05, "loss": 0.04172315, "step": 14224 }, { "epoch": 28.45, "grad_norm": 1.8955481052398682, "learning_rate": 2e-05, "loss": 0.05096827, "step": 14225 }, { "epoch": 28.452, "grad_norm": 1.4757720232009888, "learning_rate": 2e-05, "loss": 0.0427974, "step": 14226 }, { "epoch": 28.454, "grad_norm": 3.5340352058410645, "learning_rate": 2e-05, "loss": 0.03696814, "step": 14227 }, { "epoch": 28.456, "grad_norm": 1.2981595993041992, "learning_rate": 2e-05, "loss": 0.04960819, "step": 14228 }, { "epoch": 28.458, "grad_norm": 1.3688567876815796, "learning_rate": 2e-05, "loss": 0.04346091, "step": 14229 }, { "epoch": 28.46, "grad_norm": 1.0863968133926392, "learning_rate": 2e-05, "loss": 0.04432026, "step": 14230 }, { "epoch": 28.462, "grad_norm": 1.1809697151184082, "learning_rate": 2e-05, "loss": 0.04062311, "step": 14231 }, { "epoch": 28.464, "grad_norm": 1.087384581565857, "learning_rate": 2e-05, "loss": 0.04801937, "step": 14232 }, { "epoch": 28.466, "grad_norm": 3.0253777503967285, "learning_rate": 2e-05, "loss": 0.06829751, "step": 14233 }, { "epoch": 28.468, "grad_norm": 0.9512907862663269, "learning_rate": 2e-05, "loss": 0.02574901, "step": 14234 }, { "epoch": 28.47, "grad_norm": 1.2561564445495605, "learning_rate": 2e-05, "loss": 0.03712705, "step": 14235 }, { "epoch": 28.472, "grad_norm": 1.114723801612854, "learning_rate": 2e-05, "loss": 0.04053148, "step": 14236 }, { "epoch": 28.474, "grad_norm": 0.9256406426429749, "learning_rate": 2e-05, "loss": 0.0282863, "step": 14237 }, { "epoch": 28.476, "grad_norm": 1.0956227779388428, "learning_rate": 2e-05, "loss": 0.03660049, "step": 14238 }, { "epoch": 28.478, "grad_norm": 1.1929676532745361, "learning_rate": 2e-05, "loss": 0.03722402, "step": 14239 }, { "epoch": 28.48, "grad_norm": 1.0275921821594238, "learning_rate": 2e-05, "loss": 0.03313165, "step": 14240 }, { "epoch": 28.482, "grad_norm": 1.4167778491973877, "learning_rate": 2e-05, "loss": 0.04980793, "step": 14241 }, { "epoch": 28.484, "grad_norm": 2.8569796085357666, "learning_rate": 2e-05, "loss": 0.04864322, "step": 14242 }, { "epoch": 28.486, "grad_norm": 1.0566939115524292, "learning_rate": 2e-05, "loss": 0.03854397, "step": 14243 }, { "epoch": 28.488, "grad_norm": 1.8737938404083252, "learning_rate": 2e-05, "loss": 0.06766672, "step": 14244 }, { "epoch": 28.49, "grad_norm": 1.1267757415771484, "learning_rate": 2e-05, "loss": 0.03949556, "step": 14245 }, { "epoch": 28.492, "grad_norm": 1.5439599752426147, "learning_rate": 2e-05, "loss": 0.03746489, "step": 14246 }, { "epoch": 28.494, "grad_norm": 1.10368812084198, "learning_rate": 2e-05, "loss": 0.04418413, "step": 14247 }, { "epoch": 28.496, "grad_norm": 1.8851401805877686, "learning_rate": 2e-05, "loss": 0.05599113, "step": 14248 }, { "epoch": 28.498, "grad_norm": 1.218051552772522, "learning_rate": 2e-05, "loss": 0.0527468, "step": 14249 }, { "epoch": 28.5, "grad_norm": 2.2494425773620605, "learning_rate": 2e-05, "loss": 0.05196273, "step": 14250 }, { "epoch": 28.502, "grad_norm": 1.1959189176559448, "learning_rate": 2e-05, "loss": 0.04170246, "step": 14251 }, { "epoch": 28.504, "grad_norm": 1.4841954708099365, "learning_rate": 2e-05, "loss": 0.04115407, "step": 14252 }, { "epoch": 28.506, "grad_norm": 1.4579923152923584, "learning_rate": 2e-05, "loss": 0.06711036, "step": 14253 }, { "epoch": 28.508, "grad_norm": 1.3409069776535034, "learning_rate": 2e-05, "loss": 0.04993474, "step": 14254 }, { "epoch": 28.51, "grad_norm": 0.9282925128936768, "learning_rate": 2e-05, "loss": 0.02424309, "step": 14255 }, { "epoch": 28.512, "grad_norm": 1.0717816352844238, "learning_rate": 2e-05, "loss": 0.03973263, "step": 14256 }, { "epoch": 28.514, "grad_norm": 1.3361554145812988, "learning_rate": 2e-05, "loss": 0.03955629, "step": 14257 }, { "epoch": 28.516, "grad_norm": 2.1933910846710205, "learning_rate": 2e-05, "loss": 0.05748781, "step": 14258 }, { "epoch": 28.518, "grad_norm": 1.1817597150802612, "learning_rate": 2e-05, "loss": 0.04961357, "step": 14259 }, { "epoch": 28.52, "grad_norm": 1.298127293586731, "learning_rate": 2e-05, "loss": 0.04155416, "step": 14260 }, { "epoch": 28.522, "grad_norm": 1.060938835144043, "learning_rate": 2e-05, "loss": 0.04268425, "step": 14261 }, { "epoch": 28.524, "grad_norm": 1.8386595249176025, "learning_rate": 2e-05, "loss": 0.04054004, "step": 14262 }, { "epoch": 28.526, "grad_norm": 1.0489224195480347, "learning_rate": 2e-05, "loss": 0.0311989, "step": 14263 }, { "epoch": 28.528, "grad_norm": 1.3312833309173584, "learning_rate": 2e-05, "loss": 0.04081573, "step": 14264 }, { "epoch": 28.53, "grad_norm": 1.7386471033096313, "learning_rate": 2e-05, "loss": 0.05501184, "step": 14265 }, { "epoch": 28.532, "grad_norm": 1.1394075155258179, "learning_rate": 2e-05, "loss": 0.0496092, "step": 14266 }, { "epoch": 28.534, "grad_norm": 1.0800431966781616, "learning_rate": 2e-05, "loss": 0.04388725, "step": 14267 }, { "epoch": 28.536, "grad_norm": 1.365820288658142, "learning_rate": 2e-05, "loss": 0.04674472, "step": 14268 }, { "epoch": 28.538, "grad_norm": 1.6938581466674805, "learning_rate": 2e-05, "loss": 0.03524645, "step": 14269 }, { "epoch": 28.54, "grad_norm": 1.0656059980392456, "learning_rate": 2e-05, "loss": 0.0350618, "step": 14270 }, { "epoch": 28.542, "grad_norm": 1.300699234008789, "learning_rate": 2e-05, "loss": 0.0439024, "step": 14271 }, { "epoch": 28.544, "grad_norm": 1.0407557487487793, "learning_rate": 2e-05, "loss": 0.03287885, "step": 14272 }, { "epoch": 28.546, "grad_norm": 1.203810214996338, "learning_rate": 2e-05, "loss": 0.03666135, "step": 14273 }, { "epoch": 28.548000000000002, "grad_norm": 1.3447908163070679, "learning_rate": 2e-05, "loss": 0.03037151, "step": 14274 }, { "epoch": 28.55, "grad_norm": 1.6330796480178833, "learning_rate": 2e-05, "loss": 0.04389308, "step": 14275 }, { "epoch": 28.552, "grad_norm": 1.453428030014038, "learning_rate": 2e-05, "loss": 0.05481421, "step": 14276 }, { "epoch": 28.554, "grad_norm": 1.3805347681045532, "learning_rate": 2e-05, "loss": 0.05831959, "step": 14277 }, { "epoch": 28.556, "grad_norm": 1.1240483522415161, "learning_rate": 2e-05, "loss": 0.03230035, "step": 14278 }, { "epoch": 28.558, "grad_norm": 1.5682629346847534, "learning_rate": 2e-05, "loss": 0.04137525, "step": 14279 }, { "epoch": 28.56, "grad_norm": 1.067922830581665, "learning_rate": 2e-05, "loss": 0.03095679, "step": 14280 }, { "epoch": 28.562, "grad_norm": 1.3047091960906982, "learning_rate": 2e-05, "loss": 0.040992, "step": 14281 }, { "epoch": 28.564, "grad_norm": 1.1715583801269531, "learning_rate": 2e-05, "loss": 0.04538528, "step": 14282 }, { "epoch": 28.566, "grad_norm": 2.710317373275757, "learning_rate": 2e-05, "loss": 0.04703784, "step": 14283 }, { "epoch": 28.568, "grad_norm": 1.1046556234359741, "learning_rate": 2e-05, "loss": 0.03596055, "step": 14284 }, { "epoch": 28.57, "grad_norm": 1.5328203439712524, "learning_rate": 2e-05, "loss": 0.05959145, "step": 14285 }, { "epoch": 28.572, "grad_norm": 1.1067321300506592, "learning_rate": 2e-05, "loss": 0.03717593, "step": 14286 }, { "epoch": 28.574, "grad_norm": 1.5921152830123901, "learning_rate": 2e-05, "loss": 0.04818038, "step": 14287 }, { "epoch": 28.576, "grad_norm": 1.2906737327575684, "learning_rate": 2e-05, "loss": 0.04694307, "step": 14288 }, { "epoch": 28.578, "grad_norm": 1.4389523267745972, "learning_rate": 2e-05, "loss": 0.06396471, "step": 14289 }, { "epoch": 28.58, "grad_norm": 1.250801920890808, "learning_rate": 2e-05, "loss": 0.03378974, "step": 14290 }, { "epoch": 28.582, "grad_norm": 1.8675917387008667, "learning_rate": 2e-05, "loss": 0.04210771, "step": 14291 }, { "epoch": 28.584, "grad_norm": 1.2386926412582397, "learning_rate": 2e-05, "loss": 0.04732635, "step": 14292 }, { "epoch": 28.586, "grad_norm": 2.474187135696411, "learning_rate": 2e-05, "loss": 0.06141835, "step": 14293 }, { "epoch": 28.588, "grad_norm": 1.7406890392303467, "learning_rate": 2e-05, "loss": 0.0405802, "step": 14294 }, { "epoch": 28.59, "grad_norm": 1.0976345539093018, "learning_rate": 2e-05, "loss": 0.03914689, "step": 14295 }, { "epoch": 28.592, "grad_norm": 1.8601065874099731, "learning_rate": 2e-05, "loss": 0.04376506, "step": 14296 }, { "epoch": 28.594, "grad_norm": 1.8947181701660156, "learning_rate": 2e-05, "loss": 0.05417305, "step": 14297 }, { "epoch": 28.596, "grad_norm": 1.3904882669448853, "learning_rate": 2e-05, "loss": 0.05922655, "step": 14298 }, { "epoch": 28.598, "grad_norm": 1.1092592477798462, "learning_rate": 2e-05, "loss": 0.04238461, "step": 14299 }, { "epoch": 28.6, "grad_norm": 1.1075598001480103, "learning_rate": 2e-05, "loss": 0.03969569, "step": 14300 }, { "epoch": 28.602, "grad_norm": 1.2379860877990723, "learning_rate": 2e-05, "loss": 0.05977491, "step": 14301 }, { "epoch": 28.604, "grad_norm": 2.523383378982544, "learning_rate": 2e-05, "loss": 0.04559822, "step": 14302 }, { "epoch": 28.606, "grad_norm": 1.390464186668396, "learning_rate": 2e-05, "loss": 0.03601509, "step": 14303 }, { "epoch": 28.608, "grad_norm": 1.1424009799957275, "learning_rate": 2e-05, "loss": 0.04128938, "step": 14304 }, { "epoch": 28.61, "grad_norm": 1.5949323177337646, "learning_rate": 2e-05, "loss": 0.03449958, "step": 14305 }, { "epoch": 28.612, "grad_norm": 1.0766521692276, "learning_rate": 2e-05, "loss": 0.03662252, "step": 14306 }, { "epoch": 28.614, "grad_norm": 1.684144377708435, "learning_rate": 2e-05, "loss": 0.03993898, "step": 14307 }, { "epoch": 28.616, "grad_norm": 1.2283536195755005, "learning_rate": 2e-05, "loss": 0.0440466, "step": 14308 }, { "epoch": 28.618, "grad_norm": 1.1252257823944092, "learning_rate": 2e-05, "loss": 0.03805107, "step": 14309 }, { "epoch": 28.62, "grad_norm": 2.2137584686279297, "learning_rate": 2e-05, "loss": 0.04531762, "step": 14310 }, { "epoch": 28.622, "grad_norm": 1.8632186651229858, "learning_rate": 2e-05, "loss": 0.04363943, "step": 14311 }, { "epoch": 28.624, "grad_norm": 2.1854779720306396, "learning_rate": 2e-05, "loss": 0.04152116, "step": 14312 }, { "epoch": 28.626, "grad_norm": 1.199179768562317, "learning_rate": 2e-05, "loss": 0.04882842, "step": 14313 }, { "epoch": 28.628, "grad_norm": 1.9288642406463623, "learning_rate": 2e-05, "loss": 0.06294123, "step": 14314 }, { "epoch": 28.63, "grad_norm": 0.9891992807388306, "learning_rate": 2e-05, "loss": 0.03864823, "step": 14315 }, { "epoch": 28.632, "grad_norm": 1.1384472846984863, "learning_rate": 2e-05, "loss": 0.03697848, "step": 14316 }, { "epoch": 28.634, "grad_norm": 1.3403377532958984, "learning_rate": 2e-05, "loss": 0.0520224, "step": 14317 }, { "epoch": 28.636, "grad_norm": 1.8841863870620728, "learning_rate": 2e-05, "loss": 0.03481198, "step": 14318 }, { "epoch": 28.638, "grad_norm": 2.253284215927124, "learning_rate": 2e-05, "loss": 0.05113673, "step": 14319 }, { "epoch": 28.64, "grad_norm": 1.678731918334961, "learning_rate": 2e-05, "loss": 0.03881251, "step": 14320 }, { "epoch": 28.642, "grad_norm": 1.419081211090088, "learning_rate": 2e-05, "loss": 0.03930322, "step": 14321 }, { "epoch": 28.644, "grad_norm": 2.305083990097046, "learning_rate": 2e-05, "loss": 0.03415557, "step": 14322 }, { "epoch": 28.646, "grad_norm": 1.7453691959381104, "learning_rate": 2e-05, "loss": 0.05402776, "step": 14323 }, { "epoch": 28.648, "grad_norm": 1.6866177320480347, "learning_rate": 2e-05, "loss": 0.05624064, "step": 14324 }, { "epoch": 28.65, "grad_norm": 2.8119149208068848, "learning_rate": 2e-05, "loss": 0.03970397, "step": 14325 }, { "epoch": 28.652, "grad_norm": 1.4768586158752441, "learning_rate": 2e-05, "loss": 0.04976168, "step": 14326 }, { "epoch": 28.654, "grad_norm": 1.2367441654205322, "learning_rate": 2e-05, "loss": 0.04001617, "step": 14327 }, { "epoch": 28.656, "grad_norm": 1.3339909315109253, "learning_rate": 2e-05, "loss": 0.0409479, "step": 14328 }, { "epoch": 28.658, "grad_norm": 1.3718701601028442, "learning_rate": 2e-05, "loss": 0.06367579, "step": 14329 }, { "epoch": 28.66, "grad_norm": 1.323080062866211, "learning_rate": 2e-05, "loss": 0.0345336, "step": 14330 }, { "epoch": 28.662, "grad_norm": 0.998365044593811, "learning_rate": 2e-05, "loss": 0.03076694, "step": 14331 }, { "epoch": 28.664, "grad_norm": 1.3956735134124756, "learning_rate": 2e-05, "loss": 0.04270991, "step": 14332 }, { "epoch": 28.666, "grad_norm": 1.0146839618682861, "learning_rate": 2e-05, "loss": 0.03931056, "step": 14333 }, { "epoch": 28.668, "grad_norm": 1.7695027589797974, "learning_rate": 2e-05, "loss": 0.0385569, "step": 14334 }, { "epoch": 28.67, "grad_norm": 1.3834205865859985, "learning_rate": 2e-05, "loss": 0.05658727, "step": 14335 }, { "epoch": 28.672, "grad_norm": 1.1219671964645386, "learning_rate": 2e-05, "loss": 0.03792579, "step": 14336 }, { "epoch": 28.674, "grad_norm": 1.3171169757843018, "learning_rate": 2e-05, "loss": 0.03911427, "step": 14337 }, { "epoch": 28.676, "grad_norm": 1.2529704570770264, "learning_rate": 2e-05, "loss": 0.05144939, "step": 14338 }, { "epoch": 28.678, "grad_norm": 0.909761905670166, "learning_rate": 2e-05, "loss": 0.02666837, "step": 14339 }, { "epoch": 28.68, "grad_norm": 1.0001680850982666, "learning_rate": 2e-05, "loss": 0.03465949, "step": 14340 }, { "epoch": 28.682, "grad_norm": 1.1715612411499023, "learning_rate": 2e-05, "loss": 0.03337407, "step": 14341 }, { "epoch": 28.684, "grad_norm": 1.56963050365448, "learning_rate": 2e-05, "loss": 0.04966843, "step": 14342 }, { "epoch": 28.686, "grad_norm": 1.6177550554275513, "learning_rate": 2e-05, "loss": 0.06656225, "step": 14343 }, { "epoch": 28.688, "grad_norm": 1.37739896774292, "learning_rate": 2e-05, "loss": 0.04131253, "step": 14344 }, { "epoch": 28.69, "grad_norm": 1.133405327796936, "learning_rate": 2e-05, "loss": 0.04604138, "step": 14345 }, { "epoch": 28.692, "grad_norm": 1.2516900300979614, "learning_rate": 2e-05, "loss": 0.04709205, "step": 14346 }, { "epoch": 28.694, "grad_norm": 1.7897838354110718, "learning_rate": 2e-05, "loss": 0.04589042, "step": 14347 }, { "epoch": 28.696, "grad_norm": 0.9966497421264648, "learning_rate": 2e-05, "loss": 0.0285507, "step": 14348 }, { "epoch": 28.698, "grad_norm": 1.126671314239502, "learning_rate": 2e-05, "loss": 0.04037992, "step": 14349 }, { "epoch": 28.7, "grad_norm": 1.076209306716919, "learning_rate": 2e-05, "loss": 0.04209936, "step": 14350 }, { "epoch": 28.701999999999998, "grad_norm": 1.2333250045776367, "learning_rate": 2e-05, "loss": 0.04853529, "step": 14351 }, { "epoch": 28.704, "grad_norm": 1.4717018604278564, "learning_rate": 2e-05, "loss": 0.04341109, "step": 14352 }, { "epoch": 28.706, "grad_norm": 1.9151333570480347, "learning_rate": 2e-05, "loss": 0.04162496, "step": 14353 }, { "epoch": 28.708, "grad_norm": 1.7289716005325317, "learning_rate": 2e-05, "loss": 0.04978495, "step": 14354 }, { "epoch": 28.71, "grad_norm": 1.104732632637024, "learning_rate": 2e-05, "loss": 0.04172861, "step": 14355 }, { "epoch": 28.712, "grad_norm": 1.1524678468704224, "learning_rate": 2e-05, "loss": 0.03477337, "step": 14356 }, { "epoch": 28.714, "grad_norm": 1.5342556238174438, "learning_rate": 2e-05, "loss": 0.04556607, "step": 14357 }, { "epoch": 28.716, "grad_norm": 1.1712950468063354, "learning_rate": 2e-05, "loss": 0.04763829, "step": 14358 }, { "epoch": 28.718, "grad_norm": 1.091592788696289, "learning_rate": 2e-05, "loss": 0.03833639, "step": 14359 }, { "epoch": 28.72, "grad_norm": 0.793989360332489, "learning_rate": 2e-05, "loss": 0.025282, "step": 14360 }, { "epoch": 28.722, "grad_norm": 1.4406577348709106, "learning_rate": 2e-05, "loss": 0.03637164, "step": 14361 }, { "epoch": 28.724, "grad_norm": 1.957209825515747, "learning_rate": 2e-05, "loss": 0.03893009, "step": 14362 }, { "epoch": 28.726, "grad_norm": 1.1229599714279175, "learning_rate": 2e-05, "loss": 0.03854847, "step": 14363 }, { "epoch": 28.728, "grad_norm": 1.9506926536560059, "learning_rate": 2e-05, "loss": 0.03666751, "step": 14364 }, { "epoch": 28.73, "grad_norm": 1.435692310333252, "learning_rate": 2e-05, "loss": 0.03390378, "step": 14365 }, { "epoch": 28.732, "grad_norm": 0.955740749835968, "learning_rate": 2e-05, "loss": 0.02934472, "step": 14366 }, { "epoch": 28.734, "grad_norm": 1.1466535329818726, "learning_rate": 2e-05, "loss": 0.05017526, "step": 14367 }, { "epoch": 28.736, "grad_norm": 1.218614101409912, "learning_rate": 2e-05, "loss": 0.05039631, "step": 14368 }, { "epoch": 28.738, "grad_norm": 1.093510389328003, "learning_rate": 2e-05, "loss": 0.03176686, "step": 14369 }, { "epoch": 28.74, "grad_norm": 1.144060492515564, "learning_rate": 2e-05, "loss": 0.04076057, "step": 14370 }, { "epoch": 28.742, "grad_norm": 1.5863828659057617, "learning_rate": 2e-05, "loss": 0.04815026, "step": 14371 }, { "epoch": 28.744, "grad_norm": 0.9931338429450989, "learning_rate": 2e-05, "loss": 0.03469016, "step": 14372 }, { "epoch": 28.746, "grad_norm": 1.6943480968475342, "learning_rate": 2e-05, "loss": 0.05469589, "step": 14373 }, { "epoch": 28.748, "grad_norm": 1.2340564727783203, "learning_rate": 2e-05, "loss": 0.04292573, "step": 14374 }, { "epoch": 28.75, "grad_norm": 1.6473150253295898, "learning_rate": 2e-05, "loss": 0.06950572, "step": 14375 }, { "epoch": 28.752, "grad_norm": 1.900185465812683, "learning_rate": 2e-05, "loss": 0.05023978, "step": 14376 }, { "epoch": 28.754, "grad_norm": 1.381726622581482, "learning_rate": 2e-05, "loss": 0.05448072, "step": 14377 }, { "epoch": 28.756, "grad_norm": 1.0618399381637573, "learning_rate": 2e-05, "loss": 0.03553692, "step": 14378 }, { "epoch": 28.758, "grad_norm": 1.158182978630066, "learning_rate": 2e-05, "loss": 0.04157444, "step": 14379 }, { "epoch": 28.76, "grad_norm": 1.0218384265899658, "learning_rate": 2e-05, "loss": 0.03774838, "step": 14380 }, { "epoch": 28.762, "grad_norm": 1.4672837257385254, "learning_rate": 2e-05, "loss": 0.04652436, "step": 14381 }, { "epoch": 28.764, "grad_norm": 2.9982128143310547, "learning_rate": 2e-05, "loss": 0.05198572, "step": 14382 }, { "epoch": 28.766, "grad_norm": 1.5844800472259521, "learning_rate": 2e-05, "loss": 0.04013401, "step": 14383 }, { "epoch": 28.768, "grad_norm": 2.2007713317871094, "learning_rate": 2e-05, "loss": 0.0491824, "step": 14384 }, { "epoch": 28.77, "grad_norm": 1.0713263750076294, "learning_rate": 2e-05, "loss": 0.03637439, "step": 14385 }, { "epoch": 28.772, "grad_norm": 2.0838675498962402, "learning_rate": 2e-05, "loss": 0.04036938, "step": 14386 }, { "epoch": 28.774, "grad_norm": 1.051755428314209, "learning_rate": 2e-05, "loss": 0.03982811, "step": 14387 }, { "epoch": 28.776, "grad_norm": 1.3116508722305298, "learning_rate": 2e-05, "loss": 0.04132722, "step": 14388 }, { "epoch": 28.778, "grad_norm": 1.0713746547698975, "learning_rate": 2e-05, "loss": 0.0432993, "step": 14389 }, { "epoch": 28.78, "grad_norm": 0.9972354173660278, "learning_rate": 2e-05, "loss": 0.04672929, "step": 14390 }, { "epoch": 28.782, "grad_norm": 0.9369496703147888, "learning_rate": 2e-05, "loss": 0.02907495, "step": 14391 }, { "epoch": 28.784, "grad_norm": 1.1444287300109863, "learning_rate": 2e-05, "loss": 0.03748661, "step": 14392 }, { "epoch": 28.786, "grad_norm": 1.1183111667633057, "learning_rate": 2e-05, "loss": 0.02960247, "step": 14393 }, { "epoch": 28.788, "grad_norm": 1.0258089303970337, "learning_rate": 2e-05, "loss": 0.03007736, "step": 14394 }, { "epoch": 28.79, "grad_norm": 0.9938691258430481, "learning_rate": 2e-05, "loss": 0.03597631, "step": 14395 }, { "epoch": 28.792, "grad_norm": 1.1507741212844849, "learning_rate": 2e-05, "loss": 0.04170668, "step": 14396 }, { "epoch": 28.794, "grad_norm": 3.4133851528167725, "learning_rate": 2e-05, "loss": 0.04272288, "step": 14397 }, { "epoch": 28.796, "grad_norm": 1.7486724853515625, "learning_rate": 2e-05, "loss": 0.06500375, "step": 14398 }, { "epoch": 28.798000000000002, "grad_norm": 1.4064818620681763, "learning_rate": 2e-05, "loss": 0.04955719, "step": 14399 }, { "epoch": 28.8, "grad_norm": 1.6619688272476196, "learning_rate": 2e-05, "loss": 0.04951192, "step": 14400 }, { "epoch": 28.802, "grad_norm": 1.1675795316696167, "learning_rate": 2e-05, "loss": 0.037157, "step": 14401 }, { "epoch": 28.804, "grad_norm": 1.6489543914794922, "learning_rate": 2e-05, "loss": 0.04647279, "step": 14402 }, { "epoch": 28.806, "grad_norm": 1.7396496534347534, "learning_rate": 2e-05, "loss": 0.04493985, "step": 14403 }, { "epoch": 28.808, "grad_norm": 1.156243920326233, "learning_rate": 2e-05, "loss": 0.04273091, "step": 14404 }, { "epoch": 28.81, "grad_norm": 1.0005199909210205, "learning_rate": 2e-05, "loss": 0.03280544, "step": 14405 }, { "epoch": 28.812, "grad_norm": 3.4301085472106934, "learning_rate": 2e-05, "loss": 0.03465569, "step": 14406 }, { "epoch": 28.814, "grad_norm": 1.50563383102417, "learning_rate": 2e-05, "loss": 0.07086978, "step": 14407 }, { "epoch": 28.816, "grad_norm": 1.5838433504104614, "learning_rate": 2e-05, "loss": 0.03891544, "step": 14408 }, { "epoch": 28.818, "grad_norm": 1.1893781423568726, "learning_rate": 2e-05, "loss": 0.04945404, "step": 14409 }, { "epoch": 28.82, "grad_norm": 0.8621784448623657, "learning_rate": 2e-05, "loss": 0.03163891, "step": 14410 }, { "epoch": 28.822, "grad_norm": 1.1594724655151367, "learning_rate": 2e-05, "loss": 0.05155197, "step": 14411 }, { "epoch": 28.824, "grad_norm": 1.443181037902832, "learning_rate": 2e-05, "loss": 0.05630941, "step": 14412 }, { "epoch": 28.826, "grad_norm": 1.5907959938049316, "learning_rate": 2e-05, "loss": 0.04138833, "step": 14413 }, { "epoch": 28.828, "grad_norm": 1.6216257810592651, "learning_rate": 2e-05, "loss": 0.04543457, "step": 14414 }, { "epoch": 28.83, "grad_norm": 1.4624403715133667, "learning_rate": 2e-05, "loss": 0.05380312, "step": 14415 }, { "epoch": 28.832, "grad_norm": 1.0963571071624756, "learning_rate": 2e-05, "loss": 0.04702737, "step": 14416 }, { "epoch": 28.834, "grad_norm": 1.7015491724014282, "learning_rate": 2e-05, "loss": 0.03770491, "step": 14417 }, { "epoch": 28.836, "grad_norm": 1.20378839969635, "learning_rate": 2e-05, "loss": 0.04444302, "step": 14418 }, { "epoch": 28.838, "grad_norm": 1.72189199924469, "learning_rate": 2e-05, "loss": 0.05816022, "step": 14419 }, { "epoch": 28.84, "grad_norm": 1.1528189182281494, "learning_rate": 2e-05, "loss": 0.03968474, "step": 14420 }, { "epoch": 28.842, "grad_norm": 1.2541671991348267, "learning_rate": 2e-05, "loss": 0.03849109, "step": 14421 }, { "epoch": 28.844, "grad_norm": 2.8099963665008545, "learning_rate": 2e-05, "loss": 0.05084788, "step": 14422 }, { "epoch": 28.846, "grad_norm": 0.9781857132911682, "learning_rate": 2e-05, "loss": 0.0285262, "step": 14423 }, { "epoch": 28.848, "grad_norm": 1.4903124570846558, "learning_rate": 2e-05, "loss": 0.05180159, "step": 14424 }, { "epoch": 28.85, "grad_norm": 1.7416338920593262, "learning_rate": 2e-05, "loss": 0.03428745, "step": 14425 }, { "epoch": 28.852, "grad_norm": 1.521485686302185, "learning_rate": 2e-05, "loss": 0.04813501, "step": 14426 }, { "epoch": 28.854, "grad_norm": 1.2684760093688965, "learning_rate": 2e-05, "loss": 0.04741008, "step": 14427 }, { "epoch": 28.856, "grad_norm": 1.1217941045761108, "learning_rate": 2e-05, "loss": 0.04172241, "step": 14428 }, { "epoch": 28.858, "grad_norm": 1.3092353343963623, "learning_rate": 2e-05, "loss": 0.04935054, "step": 14429 }, { "epoch": 28.86, "grad_norm": 1.0041587352752686, "learning_rate": 2e-05, "loss": 0.03718605, "step": 14430 }, { "epoch": 28.862, "grad_norm": 2.0443413257598877, "learning_rate": 2e-05, "loss": 0.04975209, "step": 14431 }, { "epoch": 28.864, "grad_norm": 0.8965648412704468, "learning_rate": 2e-05, "loss": 0.03016404, "step": 14432 }, { "epoch": 28.866, "grad_norm": 1.8447829484939575, "learning_rate": 2e-05, "loss": 0.05125307, "step": 14433 }, { "epoch": 28.868, "grad_norm": 1.2474303245544434, "learning_rate": 2e-05, "loss": 0.04687266, "step": 14434 }, { "epoch": 28.87, "grad_norm": 1.745761752128601, "learning_rate": 2e-05, "loss": 0.07663966, "step": 14435 }, { "epoch": 28.872, "grad_norm": 1.3358268737792969, "learning_rate": 2e-05, "loss": 0.04147369, "step": 14436 }, { "epoch": 28.874, "grad_norm": 1.2408902645111084, "learning_rate": 2e-05, "loss": 0.04460159, "step": 14437 }, { "epoch": 28.876, "grad_norm": 1.9124995470046997, "learning_rate": 2e-05, "loss": 0.05355459, "step": 14438 }, { "epoch": 28.878, "grad_norm": 1.334394931793213, "learning_rate": 2e-05, "loss": 0.04897958, "step": 14439 }, { "epoch": 28.88, "grad_norm": 1.8889309167861938, "learning_rate": 2e-05, "loss": 0.04751597, "step": 14440 }, { "epoch": 28.882, "grad_norm": 1.1469106674194336, "learning_rate": 2e-05, "loss": 0.04521649, "step": 14441 }, { "epoch": 28.884, "grad_norm": 1.0258030891418457, "learning_rate": 2e-05, "loss": 0.04085048, "step": 14442 }, { "epoch": 28.886, "grad_norm": 2.1602070331573486, "learning_rate": 2e-05, "loss": 0.05456547, "step": 14443 }, { "epoch": 28.888, "grad_norm": 1.524979591369629, "learning_rate": 2e-05, "loss": 0.04410193, "step": 14444 }, { "epoch": 28.89, "grad_norm": 1.0402649641036987, "learning_rate": 2e-05, "loss": 0.03135321, "step": 14445 }, { "epoch": 28.892, "grad_norm": 1.7973295450210571, "learning_rate": 2e-05, "loss": 0.04395194, "step": 14446 }, { "epoch": 28.894, "grad_norm": 1.0734200477600098, "learning_rate": 2e-05, "loss": 0.0351134, "step": 14447 }, { "epoch": 28.896, "grad_norm": 1.271427035331726, "learning_rate": 2e-05, "loss": 0.0497748, "step": 14448 }, { "epoch": 28.898, "grad_norm": 0.9508517384529114, "learning_rate": 2e-05, "loss": 0.03418116, "step": 14449 }, { "epoch": 28.9, "grad_norm": 2.287792921066284, "learning_rate": 2e-05, "loss": 0.04537106, "step": 14450 }, { "epoch": 28.902, "grad_norm": 2.9237358570098877, "learning_rate": 2e-05, "loss": 0.06133288, "step": 14451 }, { "epoch": 28.904, "grad_norm": 1.5036847591400146, "learning_rate": 2e-05, "loss": 0.04737253, "step": 14452 }, { "epoch": 28.906, "grad_norm": 1.2241439819335938, "learning_rate": 2e-05, "loss": 0.03472542, "step": 14453 }, { "epoch": 28.908, "grad_norm": 1.3582462072372437, "learning_rate": 2e-05, "loss": 0.05366043, "step": 14454 }, { "epoch": 28.91, "grad_norm": 1.204209327697754, "learning_rate": 2e-05, "loss": 0.04971019, "step": 14455 }, { "epoch": 28.912, "grad_norm": 1.7010318040847778, "learning_rate": 2e-05, "loss": 0.05780742, "step": 14456 }, { "epoch": 28.914, "grad_norm": 1.0974135398864746, "learning_rate": 2e-05, "loss": 0.03613953, "step": 14457 }, { "epoch": 28.916, "grad_norm": 1.595192551612854, "learning_rate": 2e-05, "loss": 0.04265611, "step": 14458 }, { "epoch": 28.918, "grad_norm": 1.5996463298797607, "learning_rate": 2e-05, "loss": 0.03614064, "step": 14459 }, { "epoch": 28.92, "grad_norm": 0.9934185743331909, "learning_rate": 2e-05, "loss": 0.03452392, "step": 14460 }, { "epoch": 28.922, "grad_norm": 1.1480003595352173, "learning_rate": 2e-05, "loss": 0.03976715, "step": 14461 }, { "epoch": 28.924, "grad_norm": 0.7937719225883484, "learning_rate": 2e-05, "loss": 0.03106033, "step": 14462 }, { "epoch": 28.926, "grad_norm": 0.9472975134849548, "learning_rate": 2e-05, "loss": 0.03592245, "step": 14463 }, { "epoch": 28.928, "grad_norm": 1.2322276830673218, "learning_rate": 2e-05, "loss": 0.06219757, "step": 14464 }, { "epoch": 28.93, "grad_norm": 1.1841628551483154, "learning_rate": 2e-05, "loss": 0.02757031, "step": 14465 }, { "epoch": 28.932, "grad_norm": 2.2316744327545166, "learning_rate": 2e-05, "loss": 0.05290478, "step": 14466 }, { "epoch": 28.934, "grad_norm": 1.1691445112228394, "learning_rate": 2e-05, "loss": 0.04423434, "step": 14467 }, { "epoch": 28.936, "grad_norm": 1.2703580856323242, "learning_rate": 2e-05, "loss": 0.04534604, "step": 14468 }, { "epoch": 28.938, "grad_norm": 1.8662946224212646, "learning_rate": 2e-05, "loss": 0.0505904, "step": 14469 }, { "epoch": 28.94, "grad_norm": 1.984939455986023, "learning_rate": 2e-05, "loss": 0.04816742, "step": 14470 }, { "epoch": 28.942, "grad_norm": 1.6010085344314575, "learning_rate": 2e-05, "loss": 0.03411138, "step": 14471 }, { "epoch": 28.944, "grad_norm": 1.3114997148513794, "learning_rate": 2e-05, "loss": 0.03839431, "step": 14472 }, { "epoch": 28.946, "grad_norm": 1.9743845462799072, "learning_rate": 2e-05, "loss": 0.06387973, "step": 14473 }, { "epoch": 28.948, "grad_norm": 3.784247398376465, "learning_rate": 2e-05, "loss": 0.05783255, "step": 14474 }, { "epoch": 28.95, "grad_norm": 1.5012797117233276, "learning_rate": 2e-05, "loss": 0.04049198, "step": 14475 }, { "epoch": 28.951999999999998, "grad_norm": 0.9062674641609192, "learning_rate": 2e-05, "loss": 0.0387709, "step": 14476 }, { "epoch": 28.954, "grad_norm": 1.216447353363037, "learning_rate": 2e-05, "loss": 0.03773759, "step": 14477 }, { "epoch": 28.956, "grad_norm": 2.346182346343994, "learning_rate": 2e-05, "loss": 0.04593071, "step": 14478 }, { "epoch": 28.958, "grad_norm": 2.298583984375, "learning_rate": 2e-05, "loss": 0.05919803, "step": 14479 }, { "epoch": 28.96, "grad_norm": 0.9582846164703369, "learning_rate": 2e-05, "loss": 0.02871491, "step": 14480 }, { "epoch": 28.962, "grad_norm": 0.7866615056991577, "learning_rate": 2e-05, "loss": 0.02259299, "step": 14481 }, { "epoch": 28.964, "grad_norm": 1.4925915002822876, "learning_rate": 2e-05, "loss": 0.04916498, "step": 14482 }, { "epoch": 28.966, "grad_norm": 1.4005680084228516, "learning_rate": 2e-05, "loss": 0.05168829, "step": 14483 }, { "epoch": 28.968, "grad_norm": 4.166713237762451, "learning_rate": 2e-05, "loss": 0.04969958, "step": 14484 }, { "epoch": 28.97, "grad_norm": 1.1008797883987427, "learning_rate": 2e-05, "loss": 0.03930159, "step": 14485 }, { "epoch": 28.972, "grad_norm": 1.0620421171188354, "learning_rate": 2e-05, "loss": 0.03718957, "step": 14486 }, { "epoch": 28.974, "grad_norm": 1.1578391790390015, "learning_rate": 2e-05, "loss": 0.04585633, "step": 14487 }, { "epoch": 28.976, "grad_norm": 1.48446524143219, "learning_rate": 2e-05, "loss": 0.0247959, "step": 14488 }, { "epoch": 28.978, "grad_norm": 1.0976674556732178, "learning_rate": 2e-05, "loss": 0.03197834, "step": 14489 }, { "epoch": 28.98, "grad_norm": 1.139172077178955, "learning_rate": 2e-05, "loss": 0.04646052, "step": 14490 }, { "epoch": 28.982, "grad_norm": 1.9785137176513672, "learning_rate": 2e-05, "loss": 0.05109739, "step": 14491 }, { "epoch": 28.984, "grad_norm": 1.3987001180648804, "learning_rate": 2e-05, "loss": 0.0440822, "step": 14492 }, { "epoch": 28.986, "grad_norm": 1.220787763595581, "learning_rate": 2e-05, "loss": 0.03669748, "step": 14493 }, { "epoch": 28.988, "grad_norm": 2.083451986312866, "learning_rate": 2e-05, "loss": 0.05152371, "step": 14494 }, { "epoch": 28.99, "grad_norm": 1.1273852586746216, "learning_rate": 2e-05, "loss": 0.03859162, "step": 14495 }, { "epoch": 28.992, "grad_norm": 1.344372034072876, "learning_rate": 2e-05, "loss": 0.03838318, "step": 14496 }, { "epoch": 28.994, "grad_norm": 1.728652000427246, "learning_rate": 2e-05, "loss": 0.03777511, "step": 14497 }, { "epoch": 28.996, "grad_norm": 3.5054938793182373, "learning_rate": 2e-05, "loss": 0.04964066, "step": 14498 }, { "epoch": 28.998, "grad_norm": 1.501190185546875, "learning_rate": 2e-05, "loss": 0.06090293, "step": 14499 }, { "epoch": 29.0, "grad_norm": 1.227286696434021, "learning_rate": 2e-05, "loss": 0.04943068, "step": 14500 }, { "epoch": 29.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.998, "Equal_2": 0.9780439121756487, "Equal_3": 0.9880239520958084, "LineComparison_1": 0.998, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.984, "Perpendicular_3": 0.8897795591182365, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9936666666666667, "PointLiesOnCircle_3": 0.9876, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 320.129, "eval_samples_per_second": 32.799, "eval_steps_per_second": 0.656, "step": 14500 }, { "epoch": 29.002, "grad_norm": 1.3614428043365479, "learning_rate": 2e-05, "loss": 0.04904348, "step": 14501 }, { "epoch": 29.004, "grad_norm": 0.990894079208374, "learning_rate": 2e-05, "loss": 0.03972761, "step": 14502 }, { "epoch": 29.006, "grad_norm": 1.5582256317138672, "learning_rate": 2e-05, "loss": 0.04734577, "step": 14503 }, { "epoch": 29.008, "grad_norm": 1.598989725112915, "learning_rate": 2e-05, "loss": 0.04445969, "step": 14504 }, { "epoch": 29.01, "grad_norm": 1.4012812376022339, "learning_rate": 2e-05, "loss": 0.05144128, "step": 14505 }, { "epoch": 29.012, "grad_norm": 1.633912444114685, "learning_rate": 2e-05, "loss": 0.04580737, "step": 14506 }, { "epoch": 29.014, "grad_norm": 1.3634629249572754, "learning_rate": 2e-05, "loss": 0.03885691, "step": 14507 }, { "epoch": 29.016, "grad_norm": 1.5107965469360352, "learning_rate": 2e-05, "loss": 0.05544426, "step": 14508 }, { "epoch": 29.018, "grad_norm": 1.50559401512146, "learning_rate": 2e-05, "loss": 0.03723505, "step": 14509 }, { "epoch": 29.02, "grad_norm": 1.2116549015045166, "learning_rate": 2e-05, "loss": 0.06099574, "step": 14510 }, { "epoch": 29.022, "grad_norm": 1.4817736148834229, "learning_rate": 2e-05, "loss": 0.04306397, "step": 14511 }, { "epoch": 29.024, "grad_norm": 1.6792289018630981, "learning_rate": 2e-05, "loss": 0.0461472, "step": 14512 }, { "epoch": 29.026, "grad_norm": 1.1383700370788574, "learning_rate": 2e-05, "loss": 0.0400393, "step": 14513 }, { "epoch": 29.028, "grad_norm": 1.6050753593444824, "learning_rate": 2e-05, "loss": 0.0388446, "step": 14514 }, { "epoch": 29.03, "grad_norm": 2.475426435470581, "learning_rate": 2e-05, "loss": 0.04789076, "step": 14515 }, { "epoch": 29.032, "grad_norm": 1.7298914194107056, "learning_rate": 2e-05, "loss": 0.04865304, "step": 14516 }, { "epoch": 29.034, "grad_norm": 1.0967127084732056, "learning_rate": 2e-05, "loss": 0.02864125, "step": 14517 }, { "epoch": 29.036, "grad_norm": 2.11590838432312, "learning_rate": 2e-05, "loss": 0.03580888, "step": 14518 }, { "epoch": 29.038, "grad_norm": 1.135114073753357, "learning_rate": 2e-05, "loss": 0.04573719, "step": 14519 }, { "epoch": 29.04, "grad_norm": 1.1727830171585083, "learning_rate": 2e-05, "loss": 0.04438477, "step": 14520 }, { "epoch": 29.042, "grad_norm": 1.3196028470993042, "learning_rate": 2e-05, "loss": 0.04897299, "step": 14521 }, { "epoch": 29.044, "grad_norm": 1.5784796476364136, "learning_rate": 2e-05, "loss": 0.06415123, "step": 14522 }, { "epoch": 29.046, "grad_norm": 1.6042543649673462, "learning_rate": 2e-05, "loss": 0.03841857, "step": 14523 }, { "epoch": 29.048, "grad_norm": 1.3283536434173584, "learning_rate": 2e-05, "loss": 0.06226228, "step": 14524 }, { "epoch": 29.05, "grad_norm": 2.3464431762695312, "learning_rate": 2e-05, "loss": 0.05047053, "step": 14525 }, { "epoch": 29.052, "grad_norm": 1.370872139930725, "learning_rate": 2e-05, "loss": 0.03550132, "step": 14526 }, { "epoch": 29.054, "grad_norm": 1.3238565921783447, "learning_rate": 2e-05, "loss": 0.0407903, "step": 14527 }, { "epoch": 29.056, "grad_norm": 1.16240656375885, "learning_rate": 2e-05, "loss": 0.03744307, "step": 14528 }, { "epoch": 29.058, "grad_norm": 1.2564165592193604, "learning_rate": 2e-05, "loss": 0.03232381, "step": 14529 }, { "epoch": 29.06, "grad_norm": 1.1828256845474243, "learning_rate": 2e-05, "loss": 0.04936057, "step": 14530 }, { "epoch": 29.062, "grad_norm": 1.9137167930603027, "learning_rate": 2e-05, "loss": 0.05838963, "step": 14531 }, { "epoch": 29.064, "grad_norm": 1.2713327407836914, "learning_rate": 2e-05, "loss": 0.04713174, "step": 14532 }, { "epoch": 29.066, "grad_norm": 1.812070369720459, "learning_rate": 2e-05, "loss": 0.0491786, "step": 14533 }, { "epoch": 29.068, "grad_norm": 1.9103559255599976, "learning_rate": 2e-05, "loss": 0.04707557, "step": 14534 }, { "epoch": 29.07, "grad_norm": 1.211613655090332, "learning_rate": 2e-05, "loss": 0.0457197, "step": 14535 }, { "epoch": 29.072, "grad_norm": 1.562606692314148, "learning_rate": 2e-05, "loss": 0.05934884, "step": 14536 }, { "epoch": 29.074, "grad_norm": 1.6958134174346924, "learning_rate": 2e-05, "loss": 0.04679252, "step": 14537 }, { "epoch": 29.076, "grad_norm": 1.0809810161590576, "learning_rate": 2e-05, "loss": 0.04069981, "step": 14538 }, { "epoch": 29.078, "grad_norm": 1.0606014728546143, "learning_rate": 2e-05, "loss": 0.03034795, "step": 14539 }, { "epoch": 29.08, "grad_norm": 1.3464914560317993, "learning_rate": 2e-05, "loss": 0.03704887, "step": 14540 }, { "epoch": 29.082, "grad_norm": 2.9524738788604736, "learning_rate": 2e-05, "loss": 0.06308039, "step": 14541 }, { "epoch": 29.084, "grad_norm": 1.7321432828903198, "learning_rate": 2e-05, "loss": 0.06781643, "step": 14542 }, { "epoch": 29.086, "grad_norm": 1.9871220588684082, "learning_rate": 2e-05, "loss": 0.0448276, "step": 14543 }, { "epoch": 29.088, "grad_norm": 1.0521864891052246, "learning_rate": 2e-05, "loss": 0.04277543, "step": 14544 }, { "epoch": 29.09, "grad_norm": 1.6437976360321045, "learning_rate": 2e-05, "loss": 0.05797031, "step": 14545 }, { "epoch": 29.092, "grad_norm": 1.883757472038269, "learning_rate": 2e-05, "loss": 0.03330542, "step": 14546 }, { "epoch": 29.094, "grad_norm": 1.5868852138519287, "learning_rate": 2e-05, "loss": 0.05670642, "step": 14547 }, { "epoch": 29.096, "grad_norm": 1.4247984886169434, "learning_rate": 2e-05, "loss": 0.04870617, "step": 14548 }, { "epoch": 29.098, "grad_norm": 1.7102012634277344, "learning_rate": 2e-05, "loss": 0.0426238, "step": 14549 }, { "epoch": 29.1, "grad_norm": 1.3921092748641968, "learning_rate": 2e-05, "loss": 0.06138652, "step": 14550 }, { "epoch": 29.102, "grad_norm": 1.0594866275787354, "learning_rate": 2e-05, "loss": 0.0388628, "step": 14551 }, { "epoch": 29.104, "grad_norm": 1.6638845205307007, "learning_rate": 2e-05, "loss": 0.04031256, "step": 14552 }, { "epoch": 29.106, "grad_norm": 1.3251597881317139, "learning_rate": 2e-05, "loss": 0.0368821, "step": 14553 }, { "epoch": 29.108, "grad_norm": 1.3504347801208496, "learning_rate": 2e-05, "loss": 0.04997741, "step": 14554 }, { "epoch": 29.11, "grad_norm": 1.2452493906021118, "learning_rate": 2e-05, "loss": 0.04492478, "step": 14555 }, { "epoch": 29.112, "grad_norm": 1.7179110050201416, "learning_rate": 2e-05, "loss": 0.06608713, "step": 14556 }, { "epoch": 29.114, "grad_norm": 0.9290695190429688, "learning_rate": 2e-05, "loss": 0.02252526, "step": 14557 }, { "epoch": 29.116, "grad_norm": 1.4510340690612793, "learning_rate": 2e-05, "loss": 0.03936088, "step": 14558 }, { "epoch": 29.118, "grad_norm": 1.056706428527832, "learning_rate": 2e-05, "loss": 0.04137318, "step": 14559 }, { "epoch": 29.12, "grad_norm": 1.5099283456802368, "learning_rate": 2e-05, "loss": 0.05786295, "step": 14560 }, { "epoch": 29.122, "grad_norm": 1.028210997581482, "learning_rate": 2e-05, "loss": 0.03324694, "step": 14561 }, { "epoch": 29.124, "grad_norm": 1.200790524482727, "learning_rate": 2e-05, "loss": 0.0418604, "step": 14562 }, { "epoch": 29.126, "grad_norm": 1.2720760107040405, "learning_rate": 2e-05, "loss": 0.05241677, "step": 14563 }, { "epoch": 29.128, "grad_norm": 1.2256773710250854, "learning_rate": 2e-05, "loss": 0.05769563, "step": 14564 }, { "epoch": 29.13, "grad_norm": 1.125728964805603, "learning_rate": 2e-05, "loss": 0.03989631, "step": 14565 }, { "epoch": 29.132, "grad_norm": 3.5522632598876953, "learning_rate": 2e-05, "loss": 0.04670805, "step": 14566 }, { "epoch": 29.134, "grad_norm": 1.2549570798873901, "learning_rate": 2e-05, "loss": 0.03625032, "step": 14567 }, { "epoch": 29.136, "grad_norm": 1.3453484773635864, "learning_rate": 2e-05, "loss": 0.04612465, "step": 14568 }, { "epoch": 29.138, "grad_norm": 1.3071088790893555, "learning_rate": 2e-05, "loss": 0.04188219, "step": 14569 }, { "epoch": 29.14, "grad_norm": 1.373300313949585, "learning_rate": 2e-05, "loss": 0.05468735, "step": 14570 }, { "epoch": 29.142, "grad_norm": 1.1056849956512451, "learning_rate": 2e-05, "loss": 0.04801034, "step": 14571 }, { "epoch": 29.144, "grad_norm": 2.6124298572540283, "learning_rate": 2e-05, "loss": 0.06286603, "step": 14572 }, { "epoch": 29.146, "grad_norm": 1.090798020362854, "learning_rate": 2e-05, "loss": 0.04604357, "step": 14573 }, { "epoch": 29.148, "grad_norm": 1.4949955940246582, "learning_rate": 2e-05, "loss": 0.05045891, "step": 14574 }, { "epoch": 29.15, "grad_norm": 1.3205533027648926, "learning_rate": 2e-05, "loss": 0.04018179, "step": 14575 }, { "epoch": 29.152, "grad_norm": 1.686370611190796, "learning_rate": 2e-05, "loss": 0.03889347, "step": 14576 }, { "epoch": 29.154, "grad_norm": 0.989204466342926, "learning_rate": 2e-05, "loss": 0.04520692, "step": 14577 }, { "epoch": 29.156, "grad_norm": 1.28013014793396, "learning_rate": 2e-05, "loss": 0.04326824, "step": 14578 }, { "epoch": 29.158, "grad_norm": 1.1764912605285645, "learning_rate": 2e-05, "loss": 0.03642344, "step": 14579 }, { "epoch": 29.16, "grad_norm": 1.8132061958312988, "learning_rate": 2e-05, "loss": 0.05279174, "step": 14580 }, { "epoch": 29.162, "grad_norm": 1.478899359703064, "learning_rate": 2e-05, "loss": 0.05297806, "step": 14581 }, { "epoch": 29.164, "grad_norm": 3.247129201889038, "learning_rate": 2e-05, "loss": 0.05896338, "step": 14582 }, { "epoch": 29.166, "grad_norm": 1.3242168426513672, "learning_rate": 2e-05, "loss": 0.03942503, "step": 14583 }, { "epoch": 29.168, "grad_norm": 1.1935416460037231, "learning_rate": 2e-05, "loss": 0.04639933, "step": 14584 }, { "epoch": 29.17, "grad_norm": 1.0874605178833008, "learning_rate": 2e-05, "loss": 0.04434571, "step": 14585 }, { "epoch": 29.172, "grad_norm": 1.1143393516540527, "learning_rate": 2e-05, "loss": 0.0469871, "step": 14586 }, { "epoch": 29.174, "grad_norm": 1.423553228378296, "learning_rate": 2e-05, "loss": 0.03586338, "step": 14587 }, { "epoch": 29.176, "grad_norm": 1.2574028968811035, "learning_rate": 2e-05, "loss": 0.038145, "step": 14588 }, { "epoch": 29.178, "grad_norm": 1.1540359258651733, "learning_rate": 2e-05, "loss": 0.04100233, "step": 14589 }, { "epoch": 29.18, "grad_norm": 1.3354766368865967, "learning_rate": 2e-05, "loss": 0.05904789, "step": 14590 }, { "epoch": 29.182, "grad_norm": 1.163336992263794, "learning_rate": 2e-05, "loss": 0.04909441, "step": 14591 }, { "epoch": 29.184, "grad_norm": 2.428375482559204, "learning_rate": 2e-05, "loss": 0.04465162, "step": 14592 }, { "epoch": 29.186, "grad_norm": 1.6806772947311401, "learning_rate": 2e-05, "loss": 0.04538292, "step": 14593 }, { "epoch": 29.188, "grad_norm": 1.1672495603561401, "learning_rate": 2e-05, "loss": 0.04777298, "step": 14594 }, { "epoch": 29.19, "grad_norm": 1.0995149612426758, "learning_rate": 2e-05, "loss": 0.03148938, "step": 14595 }, { "epoch": 29.192, "grad_norm": 1.1681532859802246, "learning_rate": 2e-05, "loss": 0.03608632, "step": 14596 }, { "epoch": 29.194, "grad_norm": 1.3820486068725586, "learning_rate": 2e-05, "loss": 0.05060082, "step": 14597 }, { "epoch": 29.196, "grad_norm": 1.0916459560394287, "learning_rate": 2e-05, "loss": 0.03177829, "step": 14598 }, { "epoch": 29.198, "grad_norm": 1.2117968797683716, "learning_rate": 2e-05, "loss": 0.04310794, "step": 14599 }, { "epoch": 29.2, "grad_norm": 1.3815139532089233, "learning_rate": 2e-05, "loss": 0.03904974, "step": 14600 }, { "epoch": 29.202, "grad_norm": 1.455543875694275, "learning_rate": 2e-05, "loss": 0.04168963, "step": 14601 }, { "epoch": 29.204, "grad_norm": 1.2078819274902344, "learning_rate": 2e-05, "loss": 0.05466375, "step": 14602 }, { "epoch": 29.206, "grad_norm": 1.8423599004745483, "learning_rate": 2e-05, "loss": 0.03679687, "step": 14603 }, { "epoch": 29.208, "grad_norm": 1.3103402853012085, "learning_rate": 2e-05, "loss": 0.0345687, "step": 14604 }, { "epoch": 29.21, "grad_norm": 1.3979569673538208, "learning_rate": 2e-05, "loss": 0.04111661, "step": 14605 }, { "epoch": 29.212, "grad_norm": 1.2820682525634766, "learning_rate": 2e-05, "loss": 0.05244845, "step": 14606 }, { "epoch": 29.214, "grad_norm": 1.8376924991607666, "learning_rate": 2e-05, "loss": 0.06727045, "step": 14607 }, { "epoch": 29.216, "grad_norm": 1.4911775588989258, "learning_rate": 2e-05, "loss": 0.03376488, "step": 14608 }, { "epoch": 29.218, "grad_norm": 2.5098586082458496, "learning_rate": 2e-05, "loss": 0.07967003, "step": 14609 }, { "epoch": 29.22, "grad_norm": 1.4665964841842651, "learning_rate": 2e-05, "loss": 0.05623496, "step": 14610 }, { "epoch": 29.222, "grad_norm": 1.1030839681625366, "learning_rate": 2e-05, "loss": 0.04080631, "step": 14611 }, { "epoch": 29.224, "grad_norm": 1.169342041015625, "learning_rate": 2e-05, "loss": 0.0512477, "step": 14612 }, { "epoch": 29.226, "grad_norm": 5.032285690307617, "learning_rate": 2e-05, "loss": 0.04048228, "step": 14613 }, { "epoch": 29.228, "grad_norm": 1.2166979312896729, "learning_rate": 2e-05, "loss": 0.04632211, "step": 14614 }, { "epoch": 29.23, "grad_norm": 1.1534336805343628, "learning_rate": 2e-05, "loss": 0.03913743, "step": 14615 }, { "epoch": 29.232, "grad_norm": 1.1760746240615845, "learning_rate": 2e-05, "loss": 0.04341678, "step": 14616 }, { "epoch": 29.234, "grad_norm": 1.4027602672576904, "learning_rate": 2e-05, "loss": 0.04686597, "step": 14617 }, { "epoch": 29.236, "grad_norm": 3.7930917739868164, "learning_rate": 2e-05, "loss": 0.05493877, "step": 14618 }, { "epoch": 29.238, "grad_norm": 1.149735689163208, "learning_rate": 2e-05, "loss": 0.0425221, "step": 14619 }, { "epoch": 29.24, "grad_norm": 1.6718671321868896, "learning_rate": 2e-05, "loss": 0.05708656, "step": 14620 }, { "epoch": 29.242, "grad_norm": 1.2668569087982178, "learning_rate": 2e-05, "loss": 0.04664732, "step": 14621 }, { "epoch": 29.244, "grad_norm": 1.033226490020752, "learning_rate": 2e-05, "loss": 0.02460607, "step": 14622 }, { "epoch": 29.246, "grad_norm": 1.172166109085083, "learning_rate": 2e-05, "loss": 0.04102369, "step": 14623 }, { "epoch": 29.248, "grad_norm": 1.5639601945877075, "learning_rate": 2e-05, "loss": 0.0346932, "step": 14624 }, { "epoch": 29.25, "grad_norm": 1.2138320207595825, "learning_rate": 2e-05, "loss": 0.05353147, "step": 14625 }, { "epoch": 29.252, "grad_norm": 1.075577974319458, "learning_rate": 2e-05, "loss": 0.04629251, "step": 14626 }, { "epoch": 29.254, "grad_norm": 4.6314473152160645, "learning_rate": 2e-05, "loss": 0.05750819, "step": 14627 }, { "epoch": 29.256, "grad_norm": 1.6789414882659912, "learning_rate": 2e-05, "loss": 0.04619577, "step": 14628 }, { "epoch": 29.258, "grad_norm": 10.09363079071045, "learning_rate": 2e-05, "loss": 0.06505147, "step": 14629 }, { "epoch": 29.26, "grad_norm": 1.3053208589553833, "learning_rate": 2e-05, "loss": 0.03836732, "step": 14630 }, { "epoch": 29.262, "grad_norm": 1.1734479665756226, "learning_rate": 2e-05, "loss": 0.04292649, "step": 14631 }, { "epoch": 29.264, "grad_norm": 1.0191097259521484, "learning_rate": 2e-05, "loss": 0.0362784, "step": 14632 }, { "epoch": 29.266, "grad_norm": 1.4284632205963135, "learning_rate": 2e-05, "loss": 0.04708409, "step": 14633 }, { "epoch": 29.268, "grad_norm": 1.857406735420227, "learning_rate": 2e-05, "loss": 0.04922803, "step": 14634 }, { "epoch": 29.27, "grad_norm": 1.4661109447479248, "learning_rate": 2e-05, "loss": 0.032971, "step": 14635 }, { "epoch": 29.272, "grad_norm": 1.6468689441680908, "learning_rate": 2e-05, "loss": 0.04467516, "step": 14636 }, { "epoch": 29.274, "grad_norm": 1.1976274251937866, "learning_rate": 2e-05, "loss": 0.05661123, "step": 14637 }, { "epoch": 29.276, "grad_norm": 1.0101951360702515, "learning_rate": 2e-05, "loss": 0.04431897, "step": 14638 }, { "epoch": 29.278, "grad_norm": 1.9550938606262207, "learning_rate": 2e-05, "loss": 0.03428712, "step": 14639 }, { "epoch": 29.28, "grad_norm": 1.263190746307373, "learning_rate": 2e-05, "loss": 0.05213517, "step": 14640 }, { "epoch": 29.282, "grad_norm": 1.0882905721664429, "learning_rate": 2e-05, "loss": 0.04379845, "step": 14641 }, { "epoch": 29.284, "grad_norm": 1.6553674936294556, "learning_rate": 2e-05, "loss": 0.04459314, "step": 14642 }, { "epoch": 29.286, "grad_norm": 1.1553642749786377, "learning_rate": 2e-05, "loss": 0.04111927, "step": 14643 }, { "epoch": 29.288, "grad_norm": 1.7052907943725586, "learning_rate": 2e-05, "loss": 0.04706088, "step": 14644 }, { "epoch": 29.29, "grad_norm": 1.7893346548080444, "learning_rate": 2e-05, "loss": 0.04122625, "step": 14645 }, { "epoch": 29.292, "grad_norm": 0.8917121887207031, "learning_rate": 2e-05, "loss": 0.02282002, "step": 14646 }, { "epoch": 29.294, "grad_norm": 1.3831899166107178, "learning_rate": 2e-05, "loss": 0.06464884, "step": 14647 }, { "epoch": 29.296, "grad_norm": 1.0708141326904297, "learning_rate": 2e-05, "loss": 0.03723243, "step": 14648 }, { "epoch": 29.298, "grad_norm": 2.3124451637268066, "learning_rate": 2e-05, "loss": 0.03636015, "step": 14649 }, { "epoch": 29.3, "grad_norm": 1.150187373161316, "learning_rate": 2e-05, "loss": 0.03666411, "step": 14650 }, { "epoch": 29.302, "grad_norm": 1.4192458391189575, "learning_rate": 2e-05, "loss": 0.05449123, "step": 14651 }, { "epoch": 29.304, "grad_norm": 1.0729455947875977, "learning_rate": 2e-05, "loss": 0.04042206, "step": 14652 }, { "epoch": 29.306, "grad_norm": 1.1908464431762695, "learning_rate": 2e-05, "loss": 0.0445194, "step": 14653 }, { "epoch": 29.308, "grad_norm": 1.1397109031677246, "learning_rate": 2e-05, "loss": 0.03711201, "step": 14654 }, { "epoch": 29.31, "grad_norm": 1.309540867805481, "learning_rate": 2e-05, "loss": 0.04084215, "step": 14655 }, { "epoch": 29.312, "grad_norm": 1.0415390729904175, "learning_rate": 2e-05, "loss": 0.03293938, "step": 14656 }, { "epoch": 29.314, "grad_norm": 1.683875560760498, "learning_rate": 2e-05, "loss": 0.04933864, "step": 14657 }, { "epoch": 29.316, "grad_norm": 1.3526222705841064, "learning_rate": 2e-05, "loss": 0.02989796, "step": 14658 }, { "epoch": 29.318, "grad_norm": 1.5054032802581787, "learning_rate": 2e-05, "loss": 0.04449115, "step": 14659 }, { "epoch": 29.32, "grad_norm": 1.2268929481506348, "learning_rate": 2e-05, "loss": 0.0320545, "step": 14660 }, { "epoch": 29.322, "grad_norm": 1.3516592979431152, "learning_rate": 2e-05, "loss": 0.05182027, "step": 14661 }, { "epoch": 29.324, "grad_norm": 1.1587803363800049, "learning_rate": 2e-05, "loss": 0.04185852, "step": 14662 }, { "epoch": 29.326, "grad_norm": 1.048889398574829, "learning_rate": 2e-05, "loss": 0.02531143, "step": 14663 }, { "epoch": 29.328, "grad_norm": 2.0608630180358887, "learning_rate": 2e-05, "loss": 0.04467651, "step": 14664 }, { "epoch": 29.33, "grad_norm": 1.0219707489013672, "learning_rate": 2e-05, "loss": 0.02736703, "step": 14665 }, { "epoch": 29.332, "grad_norm": 1.091489553451538, "learning_rate": 2e-05, "loss": 0.04337668, "step": 14666 }, { "epoch": 29.334, "grad_norm": 1.2775390148162842, "learning_rate": 2e-05, "loss": 0.04817453, "step": 14667 }, { "epoch": 29.336, "grad_norm": 1.760574221611023, "learning_rate": 2e-05, "loss": 0.04524562, "step": 14668 }, { "epoch": 29.338, "grad_norm": 1.5247159004211426, "learning_rate": 2e-05, "loss": 0.04375861, "step": 14669 }, { "epoch": 29.34, "grad_norm": 1.308775544166565, "learning_rate": 2e-05, "loss": 0.04865634, "step": 14670 }, { "epoch": 29.342, "grad_norm": 1.2305173873901367, "learning_rate": 2e-05, "loss": 0.04637342, "step": 14671 }, { "epoch": 29.344, "grad_norm": 1.3184024095535278, "learning_rate": 2e-05, "loss": 0.048801, "step": 14672 }, { "epoch": 29.346, "grad_norm": 1.0286909341812134, "learning_rate": 2e-05, "loss": 0.03713179, "step": 14673 }, { "epoch": 29.348, "grad_norm": 1.2571288347244263, "learning_rate": 2e-05, "loss": 0.03584104, "step": 14674 }, { "epoch": 29.35, "grad_norm": 0.9635476469993591, "learning_rate": 2e-05, "loss": 0.03240035, "step": 14675 }, { "epoch": 29.352, "grad_norm": 0.9458169341087341, "learning_rate": 2e-05, "loss": 0.03441823, "step": 14676 }, { "epoch": 29.354, "grad_norm": 1.2421998977661133, "learning_rate": 2e-05, "loss": 0.02801424, "step": 14677 }, { "epoch": 29.356, "grad_norm": 1.1887518167495728, "learning_rate": 2e-05, "loss": 0.03601447, "step": 14678 }, { "epoch": 29.358, "grad_norm": 1.2935701608657837, "learning_rate": 2e-05, "loss": 0.05169864, "step": 14679 }, { "epoch": 29.36, "grad_norm": 1.5155844688415527, "learning_rate": 2e-05, "loss": 0.05170265, "step": 14680 }, { "epoch": 29.362, "grad_norm": 1.0864388942718506, "learning_rate": 2e-05, "loss": 0.03917462, "step": 14681 }, { "epoch": 29.364, "grad_norm": 3.3979506492614746, "learning_rate": 2e-05, "loss": 0.06590277, "step": 14682 }, { "epoch": 29.366, "grad_norm": 1.263260006904602, "learning_rate": 2e-05, "loss": 0.03736039, "step": 14683 }, { "epoch": 29.368, "grad_norm": 1.6203467845916748, "learning_rate": 2e-05, "loss": 0.03045352, "step": 14684 }, { "epoch": 29.37, "grad_norm": 1.185299038887024, "learning_rate": 2e-05, "loss": 0.0308273, "step": 14685 }, { "epoch": 29.372, "grad_norm": 1.065706491470337, "learning_rate": 2e-05, "loss": 0.04142375, "step": 14686 }, { "epoch": 29.374, "grad_norm": 1.071706771850586, "learning_rate": 2e-05, "loss": 0.02857183, "step": 14687 }, { "epoch": 29.376, "grad_norm": 1.0515819787979126, "learning_rate": 2e-05, "loss": 0.03293607, "step": 14688 }, { "epoch": 29.378, "grad_norm": 1.2863487005233765, "learning_rate": 2e-05, "loss": 0.06486448, "step": 14689 }, { "epoch": 29.38, "grad_norm": 1.3283601999282837, "learning_rate": 2e-05, "loss": 0.0511212, "step": 14690 }, { "epoch": 29.382, "grad_norm": 1.3435486555099487, "learning_rate": 2e-05, "loss": 0.04754411, "step": 14691 }, { "epoch": 29.384, "grad_norm": 2.4031832218170166, "learning_rate": 2e-05, "loss": 0.02932245, "step": 14692 }, { "epoch": 29.386, "grad_norm": 1.0233304500579834, "learning_rate": 2e-05, "loss": 0.02972196, "step": 14693 }, { "epoch": 29.388, "grad_norm": 0.9224061369895935, "learning_rate": 2e-05, "loss": 0.03122431, "step": 14694 }, { "epoch": 29.39, "grad_norm": 3.187535524368286, "learning_rate": 2e-05, "loss": 0.06037506, "step": 14695 }, { "epoch": 29.392, "grad_norm": 2.3195505142211914, "learning_rate": 2e-05, "loss": 0.04076795, "step": 14696 }, { "epoch": 29.394, "grad_norm": 1.0622916221618652, "learning_rate": 2e-05, "loss": 0.02912345, "step": 14697 }, { "epoch": 29.396, "grad_norm": 1.1375831365585327, "learning_rate": 2e-05, "loss": 0.03395087, "step": 14698 }, { "epoch": 29.398, "grad_norm": 4.46896505355835, "learning_rate": 2e-05, "loss": 0.08969009, "step": 14699 }, { "epoch": 29.4, "grad_norm": 1.382957100868225, "learning_rate": 2e-05, "loss": 0.05825138, "step": 14700 }, { "epoch": 29.402, "grad_norm": 3.7034122943878174, "learning_rate": 2e-05, "loss": 0.05984267, "step": 14701 }, { "epoch": 29.404, "grad_norm": 1.288468837738037, "learning_rate": 2e-05, "loss": 0.04220777, "step": 14702 }, { "epoch": 29.406, "grad_norm": 5.697216510772705, "learning_rate": 2e-05, "loss": 0.04966062, "step": 14703 }, { "epoch": 29.408, "grad_norm": 1.3597912788391113, "learning_rate": 2e-05, "loss": 0.06009267, "step": 14704 }, { "epoch": 29.41, "grad_norm": 1.123845100402832, "learning_rate": 2e-05, "loss": 0.03696072, "step": 14705 }, { "epoch": 29.412, "grad_norm": 1.5596554279327393, "learning_rate": 2e-05, "loss": 0.03732103, "step": 14706 }, { "epoch": 29.414, "grad_norm": 1.2256417274475098, "learning_rate": 2e-05, "loss": 0.0455781, "step": 14707 }, { "epoch": 29.416, "grad_norm": 1.2588534355163574, "learning_rate": 2e-05, "loss": 0.04248727, "step": 14708 }, { "epoch": 29.418, "grad_norm": 1.5599313974380493, "learning_rate": 2e-05, "loss": 0.05124461, "step": 14709 }, { "epoch": 29.42, "grad_norm": 1.3829172849655151, "learning_rate": 2e-05, "loss": 0.05284266, "step": 14710 }, { "epoch": 29.422, "grad_norm": 0.9758954644203186, "learning_rate": 2e-05, "loss": 0.03507173, "step": 14711 }, { "epoch": 29.424, "grad_norm": 1.019920825958252, "learning_rate": 2e-05, "loss": 0.03660026, "step": 14712 }, { "epoch": 29.426, "grad_norm": 1.6652642488479614, "learning_rate": 2e-05, "loss": 0.05362531, "step": 14713 }, { "epoch": 29.428, "grad_norm": 3.9348857402801514, "learning_rate": 2e-05, "loss": 0.03445033, "step": 14714 }, { "epoch": 29.43, "grad_norm": 0.9290486574172974, "learning_rate": 2e-05, "loss": 0.03240933, "step": 14715 }, { "epoch": 29.432, "grad_norm": 1.4232569932937622, "learning_rate": 2e-05, "loss": 0.05291854, "step": 14716 }, { "epoch": 29.434, "grad_norm": 1.1019871234893799, "learning_rate": 2e-05, "loss": 0.04499703, "step": 14717 }, { "epoch": 29.436, "grad_norm": 1.0482898950576782, "learning_rate": 2e-05, "loss": 0.02875671, "step": 14718 }, { "epoch": 29.438, "grad_norm": 2.0963454246520996, "learning_rate": 2e-05, "loss": 0.06372029, "step": 14719 }, { "epoch": 29.44, "grad_norm": 1.4427788257598877, "learning_rate": 2e-05, "loss": 0.05994175, "step": 14720 }, { "epoch": 29.442, "grad_norm": 1.1246081590652466, "learning_rate": 2e-05, "loss": 0.05035602, "step": 14721 }, { "epoch": 29.444, "grad_norm": 1.080545425415039, "learning_rate": 2e-05, "loss": 0.03505052, "step": 14722 }, { "epoch": 29.446, "grad_norm": 2.5756614208221436, "learning_rate": 2e-05, "loss": 0.0641964, "step": 14723 }, { "epoch": 29.448, "grad_norm": 0.9659985899925232, "learning_rate": 2e-05, "loss": 0.0326273, "step": 14724 }, { "epoch": 29.45, "grad_norm": 1.0880937576293945, "learning_rate": 2e-05, "loss": 0.03114274, "step": 14725 }, { "epoch": 29.452, "grad_norm": 1.5530476570129395, "learning_rate": 2e-05, "loss": 0.0458639, "step": 14726 }, { "epoch": 29.454, "grad_norm": 1.9776489734649658, "learning_rate": 2e-05, "loss": 0.04284504, "step": 14727 }, { "epoch": 29.456, "grad_norm": 2.1341958045959473, "learning_rate": 2e-05, "loss": 0.06103675, "step": 14728 }, { "epoch": 29.458, "grad_norm": 2.14361834526062, "learning_rate": 2e-05, "loss": 0.04144895, "step": 14729 }, { "epoch": 29.46, "grad_norm": 1.2067841291427612, "learning_rate": 2e-05, "loss": 0.03508757, "step": 14730 }, { "epoch": 29.462, "grad_norm": 1.6224901676177979, "learning_rate": 2e-05, "loss": 0.03946466, "step": 14731 }, { "epoch": 29.464, "grad_norm": 1.643559455871582, "learning_rate": 2e-05, "loss": 0.04336884, "step": 14732 }, { "epoch": 29.466, "grad_norm": 1.0944159030914307, "learning_rate": 2e-05, "loss": 0.05785472, "step": 14733 }, { "epoch": 29.468, "grad_norm": 1.0807979106903076, "learning_rate": 2e-05, "loss": 0.03531633, "step": 14734 }, { "epoch": 29.47, "grad_norm": 2.1468169689178467, "learning_rate": 2e-05, "loss": 0.05086451, "step": 14735 }, { "epoch": 29.472, "grad_norm": 1.5246763229370117, "learning_rate": 2e-05, "loss": 0.05114653, "step": 14736 }, { "epoch": 29.474, "grad_norm": 1.0691903829574585, "learning_rate": 2e-05, "loss": 0.04326432, "step": 14737 }, { "epoch": 29.476, "grad_norm": 1.1668561697006226, "learning_rate": 2e-05, "loss": 0.05376833, "step": 14738 }, { "epoch": 29.478, "grad_norm": 1.191372275352478, "learning_rate": 2e-05, "loss": 0.04350607, "step": 14739 }, { "epoch": 29.48, "grad_norm": 1.1428472995758057, "learning_rate": 2e-05, "loss": 0.03237382, "step": 14740 }, { "epoch": 29.482, "grad_norm": 1.0473662614822388, "learning_rate": 2e-05, "loss": 0.03938694, "step": 14741 }, { "epoch": 29.484, "grad_norm": 0.9359759092330933, "learning_rate": 2e-05, "loss": 0.03335633, "step": 14742 }, { "epoch": 29.486, "grad_norm": 1.2112761735916138, "learning_rate": 2e-05, "loss": 0.03153354, "step": 14743 }, { "epoch": 29.488, "grad_norm": 0.8638390302658081, "learning_rate": 2e-05, "loss": 0.0252349, "step": 14744 }, { "epoch": 29.49, "grad_norm": 0.9504496455192566, "learning_rate": 2e-05, "loss": 0.03327549, "step": 14745 }, { "epoch": 29.492, "grad_norm": 4.146841526031494, "learning_rate": 2e-05, "loss": 0.04628399, "step": 14746 }, { "epoch": 29.494, "grad_norm": 2.1307597160339355, "learning_rate": 2e-05, "loss": 0.06677426, "step": 14747 }, { "epoch": 29.496, "grad_norm": 1.4499460458755493, "learning_rate": 2e-05, "loss": 0.04317267, "step": 14748 }, { "epoch": 29.498, "grad_norm": 1.7764891386032104, "learning_rate": 2e-05, "loss": 0.06008489, "step": 14749 }, { "epoch": 29.5, "grad_norm": 1.4432235956192017, "learning_rate": 2e-05, "loss": 0.06923801, "step": 14750 }, { "epoch": 29.502, "grad_norm": 1.4468677043914795, "learning_rate": 2e-05, "loss": 0.05008958, "step": 14751 }, { "epoch": 29.504, "grad_norm": 1.3012151718139648, "learning_rate": 2e-05, "loss": 0.04738281, "step": 14752 }, { "epoch": 29.506, "grad_norm": 1.1001543998718262, "learning_rate": 2e-05, "loss": 0.04851674, "step": 14753 }, { "epoch": 29.508, "grad_norm": 1.2774382829666138, "learning_rate": 2e-05, "loss": 0.03910155, "step": 14754 }, { "epoch": 29.51, "grad_norm": 1.2547918558120728, "learning_rate": 2e-05, "loss": 0.05241001, "step": 14755 }, { "epoch": 29.512, "grad_norm": 1.3092374801635742, "learning_rate": 2e-05, "loss": 0.04025935, "step": 14756 }, { "epoch": 29.514, "grad_norm": 1.3792351484298706, "learning_rate": 2e-05, "loss": 0.0517666, "step": 14757 }, { "epoch": 29.516, "grad_norm": 0.9093297719955444, "learning_rate": 2e-05, "loss": 0.03267594, "step": 14758 }, { "epoch": 29.518, "grad_norm": 1.0705640316009521, "learning_rate": 2e-05, "loss": 0.04189052, "step": 14759 }, { "epoch": 29.52, "grad_norm": 1.0387095212936401, "learning_rate": 2e-05, "loss": 0.04280265, "step": 14760 }, { "epoch": 29.522, "grad_norm": 1.2232365608215332, "learning_rate": 2e-05, "loss": 0.04506699, "step": 14761 }, { "epoch": 29.524, "grad_norm": 1.1054472923278809, "learning_rate": 2e-05, "loss": 0.04134972, "step": 14762 }, { "epoch": 29.526, "grad_norm": 1.1075634956359863, "learning_rate": 2e-05, "loss": 0.03896327, "step": 14763 }, { "epoch": 29.528, "grad_norm": 1.2247326374053955, "learning_rate": 2e-05, "loss": 0.0459715, "step": 14764 }, { "epoch": 29.53, "grad_norm": 1.2403117418289185, "learning_rate": 2e-05, "loss": 0.03991577, "step": 14765 }, { "epoch": 29.532, "grad_norm": 1.0780881643295288, "learning_rate": 2e-05, "loss": 0.03780124, "step": 14766 }, { "epoch": 29.534, "grad_norm": 1.5894378423690796, "learning_rate": 2e-05, "loss": 0.03415346, "step": 14767 }, { "epoch": 29.536, "grad_norm": 0.9309705495834351, "learning_rate": 2e-05, "loss": 0.02937978, "step": 14768 }, { "epoch": 29.538, "grad_norm": 1.135497808456421, "learning_rate": 2e-05, "loss": 0.03995682, "step": 14769 }, { "epoch": 29.54, "grad_norm": 1.4162516593933105, "learning_rate": 2e-05, "loss": 0.05143316, "step": 14770 }, { "epoch": 29.542, "grad_norm": 1.3331693410873413, "learning_rate": 2e-05, "loss": 0.04119586, "step": 14771 }, { "epoch": 29.544, "grad_norm": 1.2744102478027344, "learning_rate": 2e-05, "loss": 0.06154334, "step": 14772 }, { "epoch": 29.546, "grad_norm": 1.3395334482192993, "learning_rate": 2e-05, "loss": 0.03931785, "step": 14773 }, { "epoch": 29.548000000000002, "grad_norm": 1.2095366716384888, "learning_rate": 2e-05, "loss": 0.04690979, "step": 14774 }, { "epoch": 29.55, "grad_norm": 1.7333354949951172, "learning_rate": 2e-05, "loss": 0.0398626, "step": 14775 }, { "epoch": 29.552, "grad_norm": 1.404498815536499, "learning_rate": 2e-05, "loss": 0.03682255, "step": 14776 }, { "epoch": 29.554, "grad_norm": 1.3003278970718384, "learning_rate": 2e-05, "loss": 0.03933616, "step": 14777 }, { "epoch": 29.556, "grad_norm": 1.013600468635559, "learning_rate": 2e-05, "loss": 0.04233592, "step": 14778 }, { "epoch": 29.558, "grad_norm": 0.9388518333435059, "learning_rate": 2e-05, "loss": 0.03290233, "step": 14779 }, { "epoch": 29.56, "grad_norm": 1.076974868774414, "learning_rate": 2e-05, "loss": 0.03113963, "step": 14780 }, { "epoch": 29.562, "grad_norm": 1.5698806047439575, "learning_rate": 2e-05, "loss": 0.03970818, "step": 14781 }, { "epoch": 29.564, "grad_norm": 1.5527507066726685, "learning_rate": 2e-05, "loss": 0.05457158, "step": 14782 }, { "epoch": 29.566, "grad_norm": 1.105551838874817, "learning_rate": 2e-05, "loss": 0.0364199, "step": 14783 }, { "epoch": 29.568, "grad_norm": 1.2447288036346436, "learning_rate": 2e-05, "loss": 0.05043525, "step": 14784 }, { "epoch": 29.57, "grad_norm": 1.6889266967773438, "learning_rate": 2e-05, "loss": 0.03643173, "step": 14785 }, { "epoch": 29.572, "grad_norm": 1.1132603883743286, "learning_rate": 2e-05, "loss": 0.03821333, "step": 14786 }, { "epoch": 29.574, "grad_norm": 1.628547191619873, "learning_rate": 2e-05, "loss": 0.02059213, "step": 14787 }, { "epoch": 29.576, "grad_norm": 1.4750348329544067, "learning_rate": 2e-05, "loss": 0.05588658, "step": 14788 }, { "epoch": 29.578, "grad_norm": 1.142136812210083, "learning_rate": 2e-05, "loss": 0.04343769, "step": 14789 }, { "epoch": 29.58, "grad_norm": 2.035980463027954, "learning_rate": 2e-05, "loss": 0.04574387, "step": 14790 }, { "epoch": 29.582, "grad_norm": 0.9724331498146057, "learning_rate": 2e-05, "loss": 0.0316183, "step": 14791 }, { "epoch": 29.584, "grad_norm": 1.0555124282836914, "learning_rate": 2e-05, "loss": 0.03854711, "step": 14792 }, { "epoch": 29.586, "grad_norm": 2.9631097316741943, "learning_rate": 2e-05, "loss": 0.07389696, "step": 14793 }, { "epoch": 29.588, "grad_norm": 1.2569674253463745, "learning_rate": 2e-05, "loss": 0.04960248, "step": 14794 }, { "epoch": 29.59, "grad_norm": 0.8959588408470154, "learning_rate": 2e-05, "loss": 0.02651092, "step": 14795 }, { "epoch": 29.592, "grad_norm": 1.4212428331375122, "learning_rate": 2e-05, "loss": 0.05283296, "step": 14796 }, { "epoch": 29.594, "grad_norm": 1.2432961463928223, "learning_rate": 2e-05, "loss": 0.03414245, "step": 14797 }, { "epoch": 29.596, "grad_norm": 1.3017022609710693, "learning_rate": 2e-05, "loss": 0.04572171, "step": 14798 }, { "epoch": 29.598, "grad_norm": 1.7550230026245117, "learning_rate": 2e-05, "loss": 0.04602732, "step": 14799 }, { "epoch": 29.6, "grad_norm": 0.9701183438301086, "learning_rate": 2e-05, "loss": 0.02774266, "step": 14800 }, { "epoch": 29.602, "grad_norm": 1.1324889659881592, "learning_rate": 2e-05, "loss": 0.03797245, "step": 14801 }, { "epoch": 29.604, "grad_norm": 1.1381210088729858, "learning_rate": 2e-05, "loss": 0.043383, "step": 14802 }, { "epoch": 29.606, "grad_norm": 1.3297442197799683, "learning_rate": 2e-05, "loss": 0.04047207, "step": 14803 }, { "epoch": 29.608, "grad_norm": 1.389540433883667, "learning_rate": 2e-05, "loss": 0.04826028, "step": 14804 }, { "epoch": 29.61, "grad_norm": 2.0742807388305664, "learning_rate": 2e-05, "loss": 0.0447638, "step": 14805 }, { "epoch": 29.612, "grad_norm": 3.014904260635376, "learning_rate": 2e-05, "loss": 0.04129062, "step": 14806 }, { "epoch": 29.614, "grad_norm": 1.2650154829025269, "learning_rate": 2e-05, "loss": 0.03646675, "step": 14807 }, { "epoch": 29.616, "grad_norm": 1.6480337381362915, "learning_rate": 2e-05, "loss": 0.03977471, "step": 14808 }, { "epoch": 29.618, "grad_norm": 1.184668779373169, "learning_rate": 2e-05, "loss": 0.05217928, "step": 14809 }, { "epoch": 29.62, "grad_norm": 1.2116751670837402, "learning_rate": 2e-05, "loss": 0.03607161, "step": 14810 }, { "epoch": 29.622, "grad_norm": 1.508227825164795, "learning_rate": 2e-05, "loss": 0.04605871, "step": 14811 }, { "epoch": 29.624, "grad_norm": 1.2774351835250854, "learning_rate": 2e-05, "loss": 0.04796334, "step": 14812 }, { "epoch": 29.626, "grad_norm": 1.0448981523513794, "learning_rate": 2e-05, "loss": 0.03082025, "step": 14813 }, { "epoch": 29.628, "grad_norm": 1.2237423658370972, "learning_rate": 2e-05, "loss": 0.04101267, "step": 14814 }, { "epoch": 29.63, "grad_norm": 1.5655183792114258, "learning_rate": 2e-05, "loss": 0.04582419, "step": 14815 }, { "epoch": 29.632, "grad_norm": 2.1846954822540283, "learning_rate": 2e-05, "loss": 0.05257653, "step": 14816 }, { "epoch": 29.634, "grad_norm": 1.238049030303955, "learning_rate": 2e-05, "loss": 0.03405455, "step": 14817 }, { "epoch": 29.636, "grad_norm": 1.6897296905517578, "learning_rate": 2e-05, "loss": 0.01916685, "step": 14818 }, { "epoch": 29.638, "grad_norm": 0.9575730562210083, "learning_rate": 2e-05, "loss": 0.03765364, "step": 14819 }, { "epoch": 29.64, "grad_norm": 2.1810710430145264, "learning_rate": 2e-05, "loss": 0.05722524, "step": 14820 }, { "epoch": 29.642, "grad_norm": 0.9834027886390686, "learning_rate": 2e-05, "loss": 0.04266867, "step": 14821 }, { "epoch": 29.644, "grad_norm": 1.2944148778915405, "learning_rate": 2e-05, "loss": 0.05544176, "step": 14822 }, { "epoch": 29.646, "grad_norm": 1.9650918245315552, "learning_rate": 2e-05, "loss": 0.02375646, "step": 14823 }, { "epoch": 29.648, "grad_norm": 0.9951708912849426, "learning_rate": 2e-05, "loss": 0.02884598, "step": 14824 }, { "epoch": 29.65, "grad_norm": 1.0523395538330078, "learning_rate": 2e-05, "loss": 0.03761973, "step": 14825 }, { "epoch": 29.652, "grad_norm": 1.0838866233825684, "learning_rate": 2e-05, "loss": 0.03728612, "step": 14826 }, { "epoch": 29.654, "grad_norm": 1.0605547428131104, "learning_rate": 2e-05, "loss": 0.03837313, "step": 14827 }, { "epoch": 29.656, "grad_norm": 1.1938754320144653, "learning_rate": 2e-05, "loss": 0.05729951, "step": 14828 }, { "epoch": 29.658, "grad_norm": 2.144594430923462, "learning_rate": 2e-05, "loss": 0.04445962, "step": 14829 }, { "epoch": 29.66, "grad_norm": 1.8156507015228271, "learning_rate": 2e-05, "loss": 0.03740517, "step": 14830 }, { "epoch": 29.662, "grad_norm": 1.244408130645752, "learning_rate": 2e-05, "loss": 0.04781131, "step": 14831 }, { "epoch": 29.664, "grad_norm": 1.2653872966766357, "learning_rate": 2e-05, "loss": 0.0585079, "step": 14832 }, { "epoch": 29.666, "grad_norm": 1.3794729709625244, "learning_rate": 2e-05, "loss": 0.03325066, "step": 14833 }, { "epoch": 29.668, "grad_norm": 1.5158426761627197, "learning_rate": 2e-05, "loss": 0.04454071, "step": 14834 }, { "epoch": 29.67, "grad_norm": 0.9856611490249634, "learning_rate": 2e-05, "loss": 0.03513188, "step": 14835 }, { "epoch": 29.672, "grad_norm": 1.0112903118133545, "learning_rate": 2e-05, "loss": 0.03400346, "step": 14836 }, { "epoch": 29.674, "grad_norm": 2.110278606414795, "learning_rate": 2e-05, "loss": 0.06449761, "step": 14837 }, { "epoch": 29.676, "grad_norm": 1.4624208211898804, "learning_rate": 2e-05, "loss": 0.05108786, "step": 14838 }, { "epoch": 29.678, "grad_norm": 1.6022717952728271, "learning_rate": 2e-05, "loss": 0.04408568, "step": 14839 }, { "epoch": 29.68, "grad_norm": 1.1528983116149902, "learning_rate": 2e-05, "loss": 0.04578086, "step": 14840 }, { "epoch": 29.682, "grad_norm": 1.0715017318725586, "learning_rate": 2e-05, "loss": 0.04369386, "step": 14841 }, { "epoch": 29.684, "grad_norm": 1.3998444080352783, "learning_rate": 2e-05, "loss": 0.04720485, "step": 14842 }, { "epoch": 29.686, "grad_norm": 1.2437803745269775, "learning_rate": 2e-05, "loss": 0.04152617, "step": 14843 }, { "epoch": 29.688, "grad_norm": 1.2567319869995117, "learning_rate": 2e-05, "loss": 0.03946881, "step": 14844 }, { "epoch": 29.69, "grad_norm": 1.7165799140930176, "learning_rate": 2e-05, "loss": 0.03655791, "step": 14845 }, { "epoch": 29.692, "grad_norm": 0.9617201685905457, "learning_rate": 2e-05, "loss": 0.03272104, "step": 14846 }, { "epoch": 29.694, "grad_norm": 1.2149977684020996, "learning_rate": 2e-05, "loss": 0.04295772, "step": 14847 }, { "epoch": 29.696, "grad_norm": 1.5055172443389893, "learning_rate": 2e-05, "loss": 0.04826812, "step": 14848 }, { "epoch": 29.698, "grad_norm": 1.1299159526824951, "learning_rate": 2e-05, "loss": 0.04282779, "step": 14849 }, { "epoch": 29.7, "grad_norm": 1.5118989944458008, "learning_rate": 2e-05, "loss": 0.04391037, "step": 14850 }, { "epoch": 29.701999999999998, "grad_norm": 2.338747501373291, "learning_rate": 2e-05, "loss": 0.05273699, "step": 14851 }, { "epoch": 29.704, "grad_norm": 1.2019315958023071, "learning_rate": 2e-05, "loss": 0.04617017, "step": 14852 }, { "epoch": 29.706, "grad_norm": 1.618383765220642, "learning_rate": 2e-05, "loss": 0.06011579, "step": 14853 }, { "epoch": 29.708, "grad_norm": 1.8187501430511475, "learning_rate": 2e-05, "loss": 0.03678617, "step": 14854 }, { "epoch": 29.71, "grad_norm": 1.143147587776184, "learning_rate": 2e-05, "loss": 0.04157409, "step": 14855 }, { "epoch": 29.712, "grad_norm": 1.034891128540039, "learning_rate": 2e-05, "loss": 0.04029275, "step": 14856 }, { "epoch": 29.714, "grad_norm": 1.3997042179107666, "learning_rate": 2e-05, "loss": 0.0383551, "step": 14857 }, { "epoch": 29.716, "grad_norm": 1.0622479915618896, "learning_rate": 2e-05, "loss": 0.04367114, "step": 14858 }, { "epoch": 29.718, "grad_norm": 1.824373722076416, "learning_rate": 2e-05, "loss": 0.04785391, "step": 14859 }, { "epoch": 29.72, "grad_norm": 0.980838418006897, "learning_rate": 2e-05, "loss": 0.03948919, "step": 14860 }, { "epoch": 29.722, "grad_norm": 1.3569544553756714, "learning_rate": 2e-05, "loss": 0.05038366, "step": 14861 }, { "epoch": 29.724, "grad_norm": 1.304292917251587, "learning_rate": 2e-05, "loss": 0.03696183, "step": 14862 }, { "epoch": 29.726, "grad_norm": 3.2446513175964355, "learning_rate": 2e-05, "loss": 0.05859087, "step": 14863 }, { "epoch": 29.728, "grad_norm": 1.621229648590088, "learning_rate": 2e-05, "loss": 0.02783653, "step": 14864 }, { "epoch": 29.73, "grad_norm": 1.2278355360031128, "learning_rate": 2e-05, "loss": 0.05444655, "step": 14865 }, { "epoch": 29.732, "grad_norm": 1.2174080610275269, "learning_rate": 2e-05, "loss": 0.05196868, "step": 14866 }, { "epoch": 29.734, "grad_norm": 1.6062108278274536, "learning_rate": 2e-05, "loss": 0.04332883, "step": 14867 }, { "epoch": 29.736, "grad_norm": 1.161248803138733, "learning_rate": 2e-05, "loss": 0.03697128, "step": 14868 }, { "epoch": 29.738, "grad_norm": 1.2859642505645752, "learning_rate": 2e-05, "loss": 0.05505038, "step": 14869 }, { "epoch": 29.74, "grad_norm": 1.4202659130096436, "learning_rate": 2e-05, "loss": 0.0477881, "step": 14870 }, { "epoch": 29.742, "grad_norm": 1.2499854564666748, "learning_rate": 2e-05, "loss": 0.05374869, "step": 14871 }, { "epoch": 29.744, "grad_norm": 0.955565333366394, "learning_rate": 2e-05, "loss": 0.02939542, "step": 14872 }, { "epoch": 29.746, "grad_norm": 1.3554538488388062, "learning_rate": 2e-05, "loss": 0.04614054, "step": 14873 }, { "epoch": 29.748, "grad_norm": 1.089884638786316, "learning_rate": 2e-05, "loss": 0.03633293, "step": 14874 }, { "epoch": 29.75, "grad_norm": 1.1395641565322876, "learning_rate": 2e-05, "loss": 0.04034775, "step": 14875 }, { "epoch": 29.752, "grad_norm": 1.3981385231018066, "learning_rate": 2e-05, "loss": 0.04628996, "step": 14876 }, { "epoch": 29.754, "grad_norm": 1.7084894180297852, "learning_rate": 2e-05, "loss": 0.04334879, "step": 14877 }, { "epoch": 29.756, "grad_norm": 2.442875385284424, "learning_rate": 2e-05, "loss": 0.05451498, "step": 14878 }, { "epoch": 29.758, "grad_norm": 1.4088022708892822, "learning_rate": 2e-05, "loss": 0.05375902, "step": 14879 }, { "epoch": 29.76, "grad_norm": 1.7990258932113647, "learning_rate": 2e-05, "loss": 0.04616355, "step": 14880 }, { "epoch": 29.762, "grad_norm": 2.0218417644500732, "learning_rate": 2e-05, "loss": 0.03352433, "step": 14881 }, { "epoch": 29.764, "grad_norm": 1.8284379243850708, "learning_rate": 2e-05, "loss": 0.04442716, "step": 14882 }, { "epoch": 29.766, "grad_norm": 1.8842039108276367, "learning_rate": 2e-05, "loss": 0.03985734, "step": 14883 }, { "epoch": 29.768, "grad_norm": 1.5910454988479614, "learning_rate": 2e-05, "loss": 0.0417336, "step": 14884 }, { "epoch": 29.77, "grad_norm": 5.6194987297058105, "learning_rate": 2e-05, "loss": 0.06214419, "step": 14885 }, { "epoch": 29.772, "grad_norm": 1.221018671989441, "learning_rate": 2e-05, "loss": 0.05359631, "step": 14886 }, { "epoch": 29.774, "grad_norm": 1.4850738048553467, "learning_rate": 2e-05, "loss": 0.04186449, "step": 14887 }, { "epoch": 29.776, "grad_norm": 2.0336577892303467, "learning_rate": 2e-05, "loss": 0.03588335, "step": 14888 }, { "epoch": 29.778, "grad_norm": 1.0733425617218018, "learning_rate": 2e-05, "loss": 0.04026532, "step": 14889 }, { "epoch": 29.78, "grad_norm": 1.098156452178955, "learning_rate": 2e-05, "loss": 0.04264045, "step": 14890 }, { "epoch": 29.782, "grad_norm": 1.4435909986495972, "learning_rate": 2e-05, "loss": 0.03248915, "step": 14891 }, { "epoch": 29.784, "grad_norm": 3.9169139862060547, "learning_rate": 2e-05, "loss": 0.05105972, "step": 14892 }, { "epoch": 29.786, "grad_norm": 1.969159722328186, "learning_rate": 2e-05, "loss": 0.05551322, "step": 14893 }, { "epoch": 29.788, "grad_norm": 1.2635542154312134, "learning_rate": 2e-05, "loss": 0.04648653, "step": 14894 }, { "epoch": 29.79, "grad_norm": 2.3477652072906494, "learning_rate": 2e-05, "loss": 0.06277344, "step": 14895 }, { "epoch": 29.792, "grad_norm": 1.0149587392807007, "learning_rate": 2e-05, "loss": 0.03186602, "step": 14896 }, { "epoch": 29.794, "grad_norm": 1.1373287439346313, "learning_rate": 2e-05, "loss": 0.03521444, "step": 14897 }, { "epoch": 29.796, "grad_norm": 1.9701231718063354, "learning_rate": 2e-05, "loss": 0.05275909, "step": 14898 }, { "epoch": 29.798000000000002, "grad_norm": 1.1814894676208496, "learning_rate": 2e-05, "loss": 0.0368986, "step": 14899 }, { "epoch": 29.8, "grad_norm": 1.1683019399642944, "learning_rate": 2e-05, "loss": 0.04178146, "step": 14900 }, { "epoch": 29.802, "grad_norm": 1.2779021263122559, "learning_rate": 2e-05, "loss": 0.03463981, "step": 14901 }, { "epoch": 29.804, "grad_norm": 1.0893248319625854, "learning_rate": 2e-05, "loss": 0.03910547, "step": 14902 }, { "epoch": 29.806, "grad_norm": 1.4731847047805786, "learning_rate": 2e-05, "loss": 0.04429988, "step": 14903 }, { "epoch": 29.808, "grad_norm": 1.3464443683624268, "learning_rate": 2e-05, "loss": 0.04595333, "step": 14904 }, { "epoch": 29.81, "grad_norm": 1.2633438110351562, "learning_rate": 2e-05, "loss": 0.03501693, "step": 14905 }, { "epoch": 29.812, "grad_norm": 1.1309336423873901, "learning_rate": 2e-05, "loss": 0.04051533, "step": 14906 }, { "epoch": 29.814, "grad_norm": 1.007876992225647, "learning_rate": 2e-05, "loss": 0.03409252, "step": 14907 }, { "epoch": 29.816, "grad_norm": 1.6379777193069458, "learning_rate": 2e-05, "loss": 0.05974958, "step": 14908 }, { "epoch": 29.818, "grad_norm": 1.3505051136016846, "learning_rate": 2e-05, "loss": 0.05870251, "step": 14909 }, { "epoch": 29.82, "grad_norm": 1.0954749584197998, "learning_rate": 2e-05, "loss": 0.03586072, "step": 14910 }, { "epoch": 29.822, "grad_norm": 0.8500576615333557, "learning_rate": 2e-05, "loss": 0.02894812, "step": 14911 }, { "epoch": 29.824, "grad_norm": 0.9059650897979736, "learning_rate": 2e-05, "loss": 0.03347109, "step": 14912 }, { "epoch": 29.826, "grad_norm": 2.9803061485290527, "learning_rate": 2e-05, "loss": 0.05787177, "step": 14913 }, { "epoch": 29.828, "grad_norm": 1.2712470293045044, "learning_rate": 2e-05, "loss": 0.03648849, "step": 14914 }, { "epoch": 29.83, "grad_norm": 1.3802344799041748, "learning_rate": 2e-05, "loss": 0.04678344, "step": 14915 }, { "epoch": 29.832, "grad_norm": 0.9832471609115601, "learning_rate": 2e-05, "loss": 0.03517216, "step": 14916 }, { "epoch": 29.834, "grad_norm": 1.1615490913391113, "learning_rate": 2e-05, "loss": 0.04364904, "step": 14917 }, { "epoch": 29.836, "grad_norm": 1.3471040725708008, "learning_rate": 2e-05, "loss": 0.0301399, "step": 14918 }, { "epoch": 29.838, "grad_norm": 1.2438772916793823, "learning_rate": 2e-05, "loss": 0.0445204, "step": 14919 }, { "epoch": 29.84, "grad_norm": 1.1942741870880127, "learning_rate": 2e-05, "loss": 0.03932862, "step": 14920 }, { "epoch": 29.842, "grad_norm": 1.0131380558013916, "learning_rate": 2e-05, "loss": 0.04467336, "step": 14921 }, { "epoch": 29.844, "grad_norm": 1.1072560548782349, "learning_rate": 2e-05, "loss": 0.03384002, "step": 14922 }, { "epoch": 29.846, "grad_norm": 1.2888267040252686, "learning_rate": 2e-05, "loss": 0.03786197, "step": 14923 }, { "epoch": 29.848, "grad_norm": 1.1689302921295166, "learning_rate": 2e-05, "loss": 0.05054848, "step": 14924 }, { "epoch": 29.85, "grad_norm": 1.3626478910446167, "learning_rate": 2e-05, "loss": 0.04308386, "step": 14925 }, { "epoch": 29.852, "grad_norm": 1.0472064018249512, "learning_rate": 2e-05, "loss": 0.033345, "step": 14926 }, { "epoch": 29.854, "grad_norm": 1.0115253925323486, "learning_rate": 2e-05, "loss": 0.03919667, "step": 14927 }, { "epoch": 29.856, "grad_norm": 1.8129942417144775, "learning_rate": 2e-05, "loss": 0.04338409, "step": 14928 }, { "epoch": 29.858, "grad_norm": 1.012441873550415, "learning_rate": 2e-05, "loss": 0.03030182, "step": 14929 }, { "epoch": 29.86, "grad_norm": 1.0548601150512695, "learning_rate": 2e-05, "loss": 0.03725137, "step": 14930 }, { "epoch": 29.862, "grad_norm": 1.027254581451416, "learning_rate": 2e-05, "loss": 0.04611764, "step": 14931 }, { "epoch": 29.864, "grad_norm": 1.1189601421356201, "learning_rate": 2e-05, "loss": 0.03380036, "step": 14932 }, { "epoch": 29.866, "grad_norm": 1.33846116065979, "learning_rate": 2e-05, "loss": 0.06069037, "step": 14933 }, { "epoch": 29.868, "grad_norm": 1.096492052078247, "learning_rate": 2e-05, "loss": 0.04014472, "step": 14934 }, { "epoch": 29.87, "grad_norm": 1.174947738647461, "learning_rate": 2e-05, "loss": 0.02727459, "step": 14935 }, { "epoch": 29.872, "grad_norm": 1.087896466255188, "learning_rate": 2e-05, "loss": 0.04823601, "step": 14936 }, { "epoch": 29.874, "grad_norm": 1.0803396701812744, "learning_rate": 2e-05, "loss": 0.04184086, "step": 14937 }, { "epoch": 29.876, "grad_norm": 1.0180484056472778, "learning_rate": 2e-05, "loss": 0.02937475, "step": 14938 }, { "epoch": 29.878, "grad_norm": 1.099775791168213, "learning_rate": 2e-05, "loss": 0.04277577, "step": 14939 }, { "epoch": 29.88, "grad_norm": 1.3840006589889526, "learning_rate": 2e-05, "loss": 0.04482201, "step": 14940 }, { "epoch": 29.882, "grad_norm": 1.119278907775879, "learning_rate": 2e-05, "loss": 0.04266882, "step": 14941 }, { "epoch": 29.884, "grad_norm": 1.1960852146148682, "learning_rate": 2e-05, "loss": 0.03658342, "step": 14942 }, { "epoch": 29.886, "grad_norm": 1.3054516315460205, "learning_rate": 2e-05, "loss": 0.04391373, "step": 14943 }, { "epoch": 29.888, "grad_norm": 2.7474465370178223, "learning_rate": 2e-05, "loss": 0.04351743, "step": 14944 }, { "epoch": 29.89, "grad_norm": 1.2123734951019287, "learning_rate": 2e-05, "loss": 0.03769352, "step": 14945 }, { "epoch": 29.892, "grad_norm": 1.325861930847168, "learning_rate": 2e-05, "loss": 0.05362713, "step": 14946 }, { "epoch": 29.894, "grad_norm": 1.4618078470230103, "learning_rate": 2e-05, "loss": 0.04493023, "step": 14947 }, { "epoch": 29.896, "grad_norm": 1.041840672492981, "learning_rate": 2e-05, "loss": 0.03452935, "step": 14948 }, { "epoch": 29.898, "grad_norm": 1.356163740158081, "learning_rate": 2e-05, "loss": 0.05633589, "step": 14949 }, { "epoch": 29.9, "grad_norm": 1.409123420715332, "learning_rate": 2e-05, "loss": 0.04535218, "step": 14950 }, { "epoch": 29.902, "grad_norm": 1.5181453227996826, "learning_rate": 2e-05, "loss": 0.03477838, "step": 14951 }, { "epoch": 29.904, "grad_norm": 1.1335440874099731, "learning_rate": 2e-05, "loss": 0.05675086, "step": 14952 }, { "epoch": 29.906, "grad_norm": 2.706404209136963, "learning_rate": 2e-05, "loss": 0.04841974, "step": 14953 }, { "epoch": 29.908, "grad_norm": 1.2822026014328003, "learning_rate": 2e-05, "loss": 0.0460251, "step": 14954 }, { "epoch": 29.91, "grad_norm": 1.2433853149414062, "learning_rate": 2e-05, "loss": 0.04511236, "step": 14955 }, { "epoch": 29.912, "grad_norm": 1.2542903423309326, "learning_rate": 2e-05, "loss": 0.04461327, "step": 14956 }, { "epoch": 29.914, "grad_norm": 1.5348360538482666, "learning_rate": 2e-05, "loss": 0.06257273, "step": 14957 }, { "epoch": 29.916, "grad_norm": 1.2867058515548706, "learning_rate": 2e-05, "loss": 0.03613833, "step": 14958 }, { "epoch": 29.918, "grad_norm": 1.8032394647598267, "learning_rate": 2e-05, "loss": 0.06370901, "step": 14959 }, { "epoch": 29.92, "grad_norm": 2.311445951461792, "learning_rate": 2e-05, "loss": 0.03839797, "step": 14960 }, { "epoch": 29.922, "grad_norm": 1.3040590286254883, "learning_rate": 2e-05, "loss": 0.02956476, "step": 14961 }, { "epoch": 29.924, "grad_norm": 1.2339740991592407, "learning_rate": 2e-05, "loss": 0.03441235, "step": 14962 }, { "epoch": 29.926, "grad_norm": 1.8125553131103516, "learning_rate": 2e-05, "loss": 0.04188786, "step": 14963 }, { "epoch": 29.928, "grad_norm": 1.2054352760314941, "learning_rate": 2e-05, "loss": 0.02116534, "step": 14964 }, { "epoch": 29.93, "grad_norm": 1.1890215873718262, "learning_rate": 2e-05, "loss": 0.04022015, "step": 14965 }, { "epoch": 29.932, "grad_norm": 1.709370493888855, "learning_rate": 2e-05, "loss": 0.04972868, "step": 14966 }, { "epoch": 29.934, "grad_norm": 1.2925797700881958, "learning_rate": 2e-05, "loss": 0.0480578, "step": 14967 }, { "epoch": 29.936, "grad_norm": 1.549068808555603, "learning_rate": 2e-05, "loss": 0.05139837, "step": 14968 }, { "epoch": 29.938, "grad_norm": 1.3146717548370361, "learning_rate": 2e-05, "loss": 0.03840823, "step": 14969 }, { "epoch": 29.94, "grad_norm": 1.0093461275100708, "learning_rate": 2e-05, "loss": 0.03652712, "step": 14970 }, { "epoch": 29.942, "grad_norm": 3.679654121398926, "learning_rate": 2e-05, "loss": 0.06581252, "step": 14971 }, { "epoch": 29.944, "grad_norm": 1.36691415309906, "learning_rate": 2e-05, "loss": 0.04719871, "step": 14972 }, { "epoch": 29.946, "grad_norm": 1.2385770082473755, "learning_rate": 2e-05, "loss": 0.04421876, "step": 14973 }, { "epoch": 29.948, "grad_norm": 1.2146538496017456, "learning_rate": 2e-05, "loss": 0.03504267, "step": 14974 }, { "epoch": 29.95, "grad_norm": 1.7719730138778687, "learning_rate": 2e-05, "loss": 0.04804779, "step": 14975 }, { "epoch": 29.951999999999998, "grad_norm": 1.596618890762329, "learning_rate": 2e-05, "loss": 0.06619457, "step": 14976 }, { "epoch": 29.954, "grad_norm": 0.9984351396560669, "learning_rate": 2e-05, "loss": 0.02848834, "step": 14977 }, { "epoch": 29.956, "grad_norm": 1.1433227062225342, "learning_rate": 2e-05, "loss": 0.0418468, "step": 14978 }, { "epoch": 29.958, "grad_norm": 1.499070644378662, "learning_rate": 2e-05, "loss": 0.05216137, "step": 14979 }, { "epoch": 29.96, "grad_norm": 1.0211125612258911, "learning_rate": 2e-05, "loss": 0.03615819, "step": 14980 }, { "epoch": 29.962, "grad_norm": 3.567038059234619, "learning_rate": 2e-05, "loss": 0.05577373, "step": 14981 }, { "epoch": 29.964, "grad_norm": 1.994671106338501, "learning_rate": 2e-05, "loss": 0.04374481, "step": 14982 }, { "epoch": 29.966, "grad_norm": 1.1184746026992798, "learning_rate": 2e-05, "loss": 0.04952614, "step": 14983 }, { "epoch": 29.968, "grad_norm": 1.5312135219573975, "learning_rate": 2e-05, "loss": 0.04638387, "step": 14984 }, { "epoch": 29.97, "grad_norm": 1.1531622409820557, "learning_rate": 2e-05, "loss": 0.04479223, "step": 14985 }, { "epoch": 29.972, "grad_norm": 1.111600637435913, "learning_rate": 2e-05, "loss": 0.03504475, "step": 14986 }, { "epoch": 29.974, "grad_norm": 1.4171292781829834, "learning_rate": 2e-05, "loss": 0.03749974, "step": 14987 }, { "epoch": 29.976, "grad_norm": 0.8947274684906006, "learning_rate": 2e-05, "loss": 0.02938852, "step": 14988 }, { "epoch": 29.978, "grad_norm": 1.0345733165740967, "learning_rate": 2e-05, "loss": 0.04883969, "step": 14989 }, { "epoch": 29.98, "grad_norm": 1.0411769151687622, "learning_rate": 2e-05, "loss": 0.04379828, "step": 14990 }, { "epoch": 29.982, "grad_norm": 1.4568036794662476, "learning_rate": 2e-05, "loss": 0.04191837, "step": 14991 }, { "epoch": 29.984, "grad_norm": 1.2673814296722412, "learning_rate": 2e-05, "loss": 0.04016875, "step": 14992 }, { "epoch": 29.986, "grad_norm": 0.9755784869194031, "learning_rate": 2e-05, "loss": 0.03788521, "step": 14993 }, { "epoch": 29.988, "grad_norm": 1.2146108150482178, "learning_rate": 2e-05, "loss": 0.0330191, "step": 14994 }, { "epoch": 29.99, "grad_norm": 1.6079703569412231, "learning_rate": 2e-05, "loss": 0.05264592, "step": 14995 }, { "epoch": 29.992, "grad_norm": 1.3531326055526733, "learning_rate": 2e-05, "loss": 0.05724774, "step": 14996 }, { "epoch": 29.994, "grad_norm": 1.4556341171264648, "learning_rate": 2e-05, "loss": 0.06128872, "step": 14997 }, { "epoch": 29.996, "grad_norm": 1.7786065340042114, "learning_rate": 2e-05, "loss": 0.06668452, "step": 14998 }, { "epoch": 29.998, "grad_norm": 1.122294306755066, "learning_rate": 2e-05, "loss": 0.04705849, "step": 14999 }, { "epoch": 30.0, "grad_norm": 1.9331656694412231, "learning_rate": 2e-05, "loss": 0.0316266, "step": 15000 }, { "epoch": 30.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9820359281437125, "Equal_1": 0.996, "Equal_2": 0.9820359281437125, "Equal_3": 0.9780439121756487, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.988, "Perpendicular_3": 0.8937875751503006, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9916666666666667, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9900199600798403 }, "eval_runtime": 320.3735, "eval_samples_per_second": 32.774, "eval_steps_per_second": 0.655, "step": 15000 }, { "epoch": 30.002, "grad_norm": 1.4232646226882935, "learning_rate": 2e-05, "loss": 0.04287136, "step": 15001 }, { "epoch": 30.004, "grad_norm": 1.111973524093628, "learning_rate": 2e-05, "loss": 0.04549501, "step": 15002 }, { "epoch": 30.006, "grad_norm": 1.4063668251037598, "learning_rate": 2e-05, "loss": 0.04815795, "step": 15003 }, { "epoch": 30.008, "grad_norm": 1.0374184846878052, "learning_rate": 2e-05, "loss": 0.03240955, "step": 15004 }, { "epoch": 30.01, "grad_norm": 2.511753797531128, "learning_rate": 2e-05, "loss": 0.05327015, "step": 15005 }, { "epoch": 30.012, "grad_norm": 1.5259581804275513, "learning_rate": 2e-05, "loss": 0.04226237, "step": 15006 }, { "epoch": 30.014, "grad_norm": 2.2420709133148193, "learning_rate": 2e-05, "loss": 0.05237993, "step": 15007 }, { "epoch": 30.016, "grad_norm": 1.380776047706604, "learning_rate": 2e-05, "loss": 0.04655606, "step": 15008 }, { "epoch": 30.018, "grad_norm": 1.1870553493499756, "learning_rate": 2e-05, "loss": 0.03608441, "step": 15009 }, { "epoch": 30.02, "grad_norm": 1.869041085243225, "learning_rate": 2e-05, "loss": 0.05452606, "step": 15010 }, { "epoch": 30.022, "grad_norm": 1.5327285528182983, "learning_rate": 2e-05, "loss": 0.07322414, "step": 15011 }, { "epoch": 30.024, "grad_norm": 1.1738957166671753, "learning_rate": 2e-05, "loss": 0.04997585, "step": 15012 }, { "epoch": 30.026, "grad_norm": 1.7185429334640503, "learning_rate": 2e-05, "loss": 0.0466965, "step": 15013 }, { "epoch": 30.028, "grad_norm": 1.3917158842086792, "learning_rate": 2e-05, "loss": 0.04702974, "step": 15014 }, { "epoch": 30.03, "grad_norm": 1.652574896812439, "learning_rate": 2e-05, "loss": 0.03737196, "step": 15015 }, { "epoch": 30.032, "grad_norm": 1.2738018035888672, "learning_rate": 2e-05, "loss": 0.04564106, "step": 15016 }, { "epoch": 30.034, "grad_norm": 1.3657252788543701, "learning_rate": 2e-05, "loss": 0.0442913, "step": 15017 }, { "epoch": 30.036, "grad_norm": 1.1747430562973022, "learning_rate": 2e-05, "loss": 0.02756719, "step": 15018 }, { "epoch": 30.038, "grad_norm": 1.0821881294250488, "learning_rate": 2e-05, "loss": 0.0346719, "step": 15019 }, { "epoch": 30.04, "grad_norm": 1.230070948600769, "learning_rate": 2e-05, "loss": 0.03856944, "step": 15020 }, { "epoch": 30.042, "grad_norm": 1.540521502494812, "learning_rate": 2e-05, "loss": 0.04057005, "step": 15021 }, { "epoch": 30.044, "grad_norm": 2.8572275638580322, "learning_rate": 2e-05, "loss": 0.04592321, "step": 15022 }, { "epoch": 30.046, "grad_norm": 0.957338809967041, "learning_rate": 2e-05, "loss": 0.03693869, "step": 15023 }, { "epoch": 30.048, "grad_norm": 1.0699844360351562, "learning_rate": 2e-05, "loss": 0.03753718, "step": 15024 }, { "epoch": 30.05, "grad_norm": 2.457496404647827, "learning_rate": 2e-05, "loss": 0.04840863, "step": 15025 }, { "epoch": 30.052, "grad_norm": 1.2310270071029663, "learning_rate": 2e-05, "loss": 0.04352465, "step": 15026 }, { "epoch": 30.054, "grad_norm": 1.2693308591842651, "learning_rate": 2e-05, "loss": 0.04948821, "step": 15027 }, { "epoch": 30.056, "grad_norm": 1.4048819541931152, "learning_rate": 2e-05, "loss": 0.0399585, "step": 15028 }, { "epoch": 30.058, "grad_norm": 2.0493316650390625, "learning_rate": 2e-05, "loss": 0.05211222, "step": 15029 }, { "epoch": 30.06, "grad_norm": 3.7002038955688477, "learning_rate": 2e-05, "loss": 0.05875326, "step": 15030 }, { "epoch": 30.062, "grad_norm": 1.7289574146270752, "learning_rate": 2e-05, "loss": 0.04627757, "step": 15031 }, { "epoch": 30.064, "grad_norm": 1.0221132040023804, "learning_rate": 2e-05, "loss": 0.03416388, "step": 15032 }, { "epoch": 30.066, "grad_norm": 1.4639520645141602, "learning_rate": 2e-05, "loss": 0.04067467, "step": 15033 }, { "epoch": 30.068, "grad_norm": 3.5001463890075684, "learning_rate": 2e-05, "loss": 0.04698951, "step": 15034 }, { "epoch": 30.07, "grad_norm": 2.3693861961364746, "learning_rate": 2e-05, "loss": 0.04970219, "step": 15035 }, { "epoch": 30.072, "grad_norm": 1.3238275051116943, "learning_rate": 2e-05, "loss": 0.05959756, "step": 15036 }, { "epoch": 30.074, "grad_norm": 1.5840702056884766, "learning_rate": 2e-05, "loss": 0.04837484, "step": 15037 }, { "epoch": 30.076, "grad_norm": 0.9318821430206299, "learning_rate": 2e-05, "loss": 0.03147685, "step": 15038 }, { "epoch": 30.078, "grad_norm": 1.0989964008331299, "learning_rate": 2e-05, "loss": 0.03799355, "step": 15039 }, { "epoch": 30.08, "grad_norm": 1.4380488395690918, "learning_rate": 2e-05, "loss": 0.04152, "step": 15040 }, { "epoch": 30.082, "grad_norm": 1.2408324480056763, "learning_rate": 2e-05, "loss": 0.04909746, "step": 15041 }, { "epoch": 30.084, "grad_norm": 1.1136013269424438, "learning_rate": 2e-05, "loss": 0.03348158, "step": 15042 }, { "epoch": 30.086, "grad_norm": 1.2548508644104004, "learning_rate": 2e-05, "loss": 0.0451351, "step": 15043 }, { "epoch": 30.088, "grad_norm": 1.0726171731948853, "learning_rate": 2e-05, "loss": 0.04324069, "step": 15044 }, { "epoch": 30.09, "grad_norm": 1.0724010467529297, "learning_rate": 2e-05, "loss": 0.0484529, "step": 15045 }, { "epoch": 30.092, "grad_norm": 1.228132963180542, "learning_rate": 2e-05, "loss": 0.03065848, "step": 15046 }, { "epoch": 30.094, "grad_norm": 1.099954605102539, "learning_rate": 2e-05, "loss": 0.04499891, "step": 15047 }, { "epoch": 30.096, "grad_norm": 1.0415964126586914, "learning_rate": 2e-05, "loss": 0.03373061, "step": 15048 }, { "epoch": 30.098, "grad_norm": 1.237966775894165, "learning_rate": 2e-05, "loss": 0.04612883, "step": 15049 }, { "epoch": 30.1, "grad_norm": 0.8815692067146301, "learning_rate": 2e-05, "loss": 0.02756138, "step": 15050 }, { "epoch": 30.102, "grad_norm": 1.3700236082077026, "learning_rate": 2e-05, "loss": 0.06041703, "step": 15051 }, { "epoch": 30.104, "grad_norm": 1.2398154735565186, "learning_rate": 2e-05, "loss": 0.03576667, "step": 15052 }, { "epoch": 30.106, "grad_norm": 1.1791821718215942, "learning_rate": 2e-05, "loss": 0.05038423, "step": 15053 }, { "epoch": 30.108, "grad_norm": 2.0784451961517334, "learning_rate": 2e-05, "loss": 0.05495892, "step": 15054 }, { "epoch": 30.11, "grad_norm": 2.1504173278808594, "learning_rate": 2e-05, "loss": 0.03890841, "step": 15055 }, { "epoch": 30.112, "grad_norm": 3.112828254699707, "learning_rate": 2e-05, "loss": 0.05653836, "step": 15056 }, { "epoch": 30.114, "grad_norm": 1.1712428331375122, "learning_rate": 2e-05, "loss": 0.05188262, "step": 15057 }, { "epoch": 30.116, "grad_norm": 1.5940415859222412, "learning_rate": 2e-05, "loss": 0.05263701, "step": 15058 }, { "epoch": 30.118, "grad_norm": 1.2304260730743408, "learning_rate": 2e-05, "loss": 0.04616508, "step": 15059 }, { "epoch": 30.12, "grad_norm": 3.7707254886627197, "learning_rate": 2e-05, "loss": 0.05711006, "step": 15060 }, { "epoch": 30.122, "grad_norm": 1.8504637479782104, "learning_rate": 2e-05, "loss": 0.04005916, "step": 15061 }, { "epoch": 30.124, "grad_norm": 1.3818079233169556, "learning_rate": 2e-05, "loss": 0.0316132, "step": 15062 }, { "epoch": 30.126, "grad_norm": 1.582114577293396, "learning_rate": 2e-05, "loss": 0.05220442, "step": 15063 }, { "epoch": 30.128, "grad_norm": 1.2166645526885986, "learning_rate": 2e-05, "loss": 0.03328218, "step": 15064 }, { "epoch": 30.13, "grad_norm": 1.2545546293258667, "learning_rate": 2e-05, "loss": 0.03880144, "step": 15065 }, { "epoch": 30.132, "grad_norm": 1.1685189008712769, "learning_rate": 2e-05, "loss": 0.0422597, "step": 15066 }, { "epoch": 30.134, "grad_norm": 1.2280715703964233, "learning_rate": 2e-05, "loss": 0.03680715, "step": 15067 }, { "epoch": 30.136, "grad_norm": 1.2604179382324219, "learning_rate": 2e-05, "loss": 0.0577843, "step": 15068 }, { "epoch": 30.138, "grad_norm": 1.333001971244812, "learning_rate": 2e-05, "loss": 0.03825287, "step": 15069 }, { "epoch": 30.14, "grad_norm": 1.515370488166809, "learning_rate": 2e-05, "loss": 0.03778506, "step": 15070 }, { "epoch": 30.142, "grad_norm": 1.265008568763733, "learning_rate": 2e-05, "loss": 0.03106946, "step": 15071 }, { "epoch": 30.144, "grad_norm": 2.2077817916870117, "learning_rate": 2e-05, "loss": 0.03389125, "step": 15072 }, { "epoch": 30.146, "grad_norm": 1.4839131832122803, "learning_rate": 2e-05, "loss": 0.0673124, "step": 15073 }, { "epoch": 30.148, "grad_norm": 1.1836014986038208, "learning_rate": 2e-05, "loss": 0.0335351, "step": 15074 }, { "epoch": 30.15, "grad_norm": 1.3027890920639038, "learning_rate": 2e-05, "loss": 0.04611604, "step": 15075 }, { "epoch": 30.152, "grad_norm": 1.2303056716918945, "learning_rate": 2e-05, "loss": 0.04055518, "step": 15076 }, { "epoch": 30.154, "grad_norm": 1.6350833177566528, "learning_rate": 2e-05, "loss": 0.0367165, "step": 15077 }, { "epoch": 30.156, "grad_norm": 0.9786274433135986, "learning_rate": 2e-05, "loss": 0.03549477, "step": 15078 }, { "epoch": 30.158, "grad_norm": 1.5880675315856934, "learning_rate": 2e-05, "loss": 0.04486579, "step": 15079 }, { "epoch": 30.16, "grad_norm": 1.2543953657150269, "learning_rate": 2e-05, "loss": 0.04821232, "step": 15080 }, { "epoch": 30.162, "grad_norm": 2.140390396118164, "learning_rate": 2e-05, "loss": 0.04136558, "step": 15081 }, { "epoch": 30.164, "grad_norm": 1.0265830755233765, "learning_rate": 2e-05, "loss": 0.03865357, "step": 15082 }, { "epoch": 30.166, "grad_norm": 1.3790358304977417, "learning_rate": 2e-05, "loss": 0.06286014, "step": 15083 }, { "epoch": 30.168, "grad_norm": 2.187826633453369, "learning_rate": 2e-05, "loss": 0.06071652, "step": 15084 }, { "epoch": 30.17, "grad_norm": 1.165565848350525, "learning_rate": 2e-05, "loss": 0.04597174, "step": 15085 }, { "epoch": 30.172, "grad_norm": 1.5375369787216187, "learning_rate": 2e-05, "loss": 0.03429234, "step": 15086 }, { "epoch": 30.174, "grad_norm": 2.7333414554595947, "learning_rate": 2e-05, "loss": 0.05888695, "step": 15087 }, { "epoch": 30.176, "grad_norm": 1.0312190055847168, "learning_rate": 2e-05, "loss": 0.0352921, "step": 15088 }, { "epoch": 30.178, "grad_norm": 4.89245080947876, "learning_rate": 2e-05, "loss": 0.0566744, "step": 15089 }, { "epoch": 30.18, "grad_norm": 1.7868283987045288, "learning_rate": 2e-05, "loss": 0.05283628, "step": 15090 }, { "epoch": 30.182, "grad_norm": 2.932136297225952, "learning_rate": 2e-05, "loss": 0.04512861, "step": 15091 }, { "epoch": 30.184, "grad_norm": 1.8231194019317627, "learning_rate": 2e-05, "loss": 0.04028752, "step": 15092 }, { "epoch": 30.186, "grad_norm": 1.5239124298095703, "learning_rate": 2e-05, "loss": 0.03271829, "step": 15093 }, { "epoch": 30.188, "grad_norm": 1.2162010669708252, "learning_rate": 2e-05, "loss": 0.03735817, "step": 15094 }, { "epoch": 30.19, "grad_norm": 1.240540623664856, "learning_rate": 2e-05, "loss": 0.04491368, "step": 15095 }, { "epoch": 30.192, "grad_norm": 1.1648021936416626, "learning_rate": 2e-05, "loss": 0.04308267, "step": 15096 }, { "epoch": 30.194, "grad_norm": 1.0710548162460327, "learning_rate": 2e-05, "loss": 0.04492667, "step": 15097 }, { "epoch": 30.196, "grad_norm": 1.1542491912841797, "learning_rate": 2e-05, "loss": 0.0378947, "step": 15098 }, { "epoch": 30.198, "grad_norm": 1.8039984703063965, "learning_rate": 2e-05, "loss": 0.05451577, "step": 15099 }, { "epoch": 30.2, "grad_norm": 1.4430378675460815, "learning_rate": 2e-05, "loss": 0.05195063, "step": 15100 }, { "epoch": 30.202, "grad_norm": 1.3164018392562866, "learning_rate": 2e-05, "loss": 0.03912523, "step": 15101 }, { "epoch": 30.204, "grad_norm": 1.669808030128479, "learning_rate": 2e-05, "loss": 0.04372003, "step": 15102 }, { "epoch": 30.206, "grad_norm": 1.5180182456970215, "learning_rate": 2e-05, "loss": 0.03866254, "step": 15103 }, { "epoch": 30.208, "grad_norm": 1.6664462089538574, "learning_rate": 2e-05, "loss": 0.0531385, "step": 15104 }, { "epoch": 30.21, "grad_norm": 1.4806171655654907, "learning_rate": 2e-05, "loss": 0.05282668, "step": 15105 }, { "epoch": 30.212, "grad_norm": 1.128859281539917, "learning_rate": 2e-05, "loss": 0.03547608, "step": 15106 }, { "epoch": 30.214, "grad_norm": 1.117397427558899, "learning_rate": 2e-05, "loss": 0.03773435, "step": 15107 }, { "epoch": 30.216, "grad_norm": 1.2309256792068481, "learning_rate": 2e-05, "loss": 0.04536531, "step": 15108 }, { "epoch": 30.218, "grad_norm": 1.3051683902740479, "learning_rate": 2e-05, "loss": 0.04438914, "step": 15109 }, { "epoch": 30.22, "grad_norm": 1.4824970960617065, "learning_rate": 2e-05, "loss": 0.05524845, "step": 15110 }, { "epoch": 30.222, "grad_norm": 1.3169000148773193, "learning_rate": 2e-05, "loss": 0.04102128, "step": 15111 }, { "epoch": 30.224, "grad_norm": 1.2333604097366333, "learning_rate": 2e-05, "loss": 0.06657355, "step": 15112 }, { "epoch": 30.226, "grad_norm": 0.9591508507728577, "learning_rate": 2e-05, "loss": 0.02981432, "step": 15113 }, { "epoch": 30.228, "grad_norm": 1.213059425354004, "learning_rate": 2e-05, "loss": 0.0342233, "step": 15114 }, { "epoch": 30.23, "grad_norm": 1.1020963191986084, "learning_rate": 2e-05, "loss": 0.02755513, "step": 15115 }, { "epoch": 30.232, "grad_norm": 1.0923129320144653, "learning_rate": 2e-05, "loss": 0.04014743, "step": 15116 }, { "epoch": 30.234, "grad_norm": 1.1201938390731812, "learning_rate": 2e-05, "loss": 0.05072346, "step": 15117 }, { "epoch": 30.236, "grad_norm": 3.7093849182128906, "learning_rate": 2e-05, "loss": 0.06261256, "step": 15118 }, { "epoch": 30.238, "grad_norm": 1.1667890548706055, "learning_rate": 2e-05, "loss": 0.04364653, "step": 15119 }, { "epoch": 30.24, "grad_norm": 1.1062977313995361, "learning_rate": 2e-05, "loss": 0.05012048, "step": 15120 }, { "epoch": 30.242, "grad_norm": 1.3190714120864868, "learning_rate": 2e-05, "loss": 0.05197113, "step": 15121 }, { "epoch": 30.244, "grad_norm": 1.458884596824646, "learning_rate": 2e-05, "loss": 0.04996847, "step": 15122 }, { "epoch": 30.246, "grad_norm": 1.366876244544983, "learning_rate": 2e-05, "loss": 0.04006827, "step": 15123 }, { "epoch": 30.248, "grad_norm": 1.0993216037750244, "learning_rate": 2e-05, "loss": 0.03217662, "step": 15124 }, { "epoch": 30.25, "grad_norm": 0.9922396540641785, "learning_rate": 2e-05, "loss": 0.04653015, "step": 15125 }, { "epoch": 30.252, "grad_norm": 1.0631383657455444, "learning_rate": 2e-05, "loss": 0.03681334, "step": 15126 }, { "epoch": 30.254, "grad_norm": 1.0309144258499146, "learning_rate": 2e-05, "loss": 0.04099189, "step": 15127 }, { "epoch": 30.256, "grad_norm": 1.1436161994934082, "learning_rate": 2e-05, "loss": 0.04641513, "step": 15128 }, { "epoch": 30.258, "grad_norm": 1.134342074394226, "learning_rate": 2e-05, "loss": 0.03863002, "step": 15129 }, { "epoch": 30.26, "grad_norm": 1.5008431673049927, "learning_rate": 2e-05, "loss": 0.04123659, "step": 15130 }, { "epoch": 30.262, "grad_norm": 1.044177770614624, "learning_rate": 2e-05, "loss": 0.03907245, "step": 15131 }, { "epoch": 30.264, "grad_norm": 0.9937227964401245, "learning_rate": 2e-05, "loss": 0.03749677, "step": 15132 }, { "epoch": 30.266, "grad_norm": 3.122323989868164, "learning_rate": 2e-05, "loss": 0.05537373, "step": 15133 }, { "epoch": 30.268, "grad_norm": 1.1054846048355103, "learning_rate": 2e-05, "loss": 0.04427914, "step": 15134 }, { "epoch": 30.27, "grad_norm": 1.256409764289856, "learning_rate": 2e-05, "loss": 0.05179529, "step": 15135 }, { "epoch": 30.272, "grad_norm": 1.4733872413635254, "learning_rate": 2e-05, "loss": 0.04531301, "step": 15136 }, { "epoch": 30.274, "grad_norm": 1.2605996131896973, "learning_rate": 2e-05, "loss": 0.0307104, "step": 15137 }, { "epoch": 30.276, "grad_norm": 1.0388493537902832, "learning_rate": 2e-05, "loss": 0.04817986, "step": 15138 }, { "epoch": 30.278, "grad_norm": 1.6238524913787842, "learning_rate": 2e-05, "loss": 0.05085561, "step": 15139 }, { "epoch": 30.28, "grad_norm": 1.096824288368225, "learning_rate": 2e-05, "loss": 0.04942642, "step": 15140 }, { "epoch": 30.282, "grad_norm": 1.2300045490264893, "learning_rate": 2e-05, "loss": 0.0502276, "step": 15141 }, { "epoch": 30.284, "grad_norm": 1.2435781955718994, "learning_rate": 2e-05, "loss": 0.04534131, "step": 15142 }, { "epoch": 30.286, "grad_norm": 1.386216163635254, "learning_rate": 2e-05, "loss": 0.04588864, "step": 15143 }, { "epoch": 30.288, "grad_norm": 1.2593432664871216, "learning_rate": 2e-05, "loss": 0.04650512, "step": 15144 }, { "epoch": 30.29, "grad_norm": 1.6124224662780762, "learning_rate": 2e-05, "loss": 0.04883378, "step": 15145 }, { "epoch": 30.292, "grad_norm": 2.2996997833251953, "learning_rate": 2e-05, "loss": 0.04866761, "step": 15146 }, { "epoch": 30.294, "grad_norm": 1.7783503532409668, "learning_rate": 2e-05, "loss": 0.06645172, "step": 15147 }, { "epoch": 30.296, "grad_norm": 1.2972900867462158, "learning_rate": 2e-05, "loss": 0.05338973, "step": 15148 }, { "epoch": 30.298, "grad_norm": 2.96677827835083, "learning_rate": 2e-05, "loss": 0.05938792, "step": 15149 }, { "epoch": 30.3, "grad_norm": 1.1180599927902222, "learning_rate": 2e-05, "loss": 0.03344629, "step": 15150 }, { "epoch": 30.302, "grad_norm": 1.7869832515716553, "learning_rate": 2e-05, "loss": 0.05831323, "step": 15151 }, { "epoch": 30.304, "grad_norm": 1.225154161453247, "learning_rate": 2e-05, "loss": 0.05661523, "step": 15152 }, { "epoch": 30.306, "grad_norm": 2.486720561981201, "learning_rate": 2e-05, "loss": 0.05136902, "step": 15153 }, { "epoch": 30.308, "grad_norm": 1.1094615459442139, "learning_rate": 2e-05, "loss": 0.04413821, "step": 15154 }, { "epoch": 30.31, "grad_norm": 2.087968349456787, "learning_rate": 2e-05, "loss": 0.05737863, "step": 15155 }, { "epoch": 30.312, "grad_norm": 1.0131031274795532, "learning_rate": 2e-05, "loss": 0.04438207, "step": 15156 }, { "epoch": 30.314, "grad_norm": 1.278663992881775, "learning_rate": 2e-05, "loss": 0.04451298, "step": 15157 }, { "epoch": 30.316, "grad_norm": 1.0880504846572876, "learning_rate": 2e-05, "loss": 0.04140671, "step": 15158 }, { "epoch": 30.318, "grad_norm": 1.6311753988265991, "learning_rate": 2e-05, "loss": 0.04358608, "step": 15159 }, { "epoch": 30.32, "grad_norm": 1.3990247249603271, "learning_rate": 2e-05, "loss": 0.05173497, "step": 15160 }, { "epoch": 30.322, "grad_norm": 1.4861396551132202, "learning_rate": 2e-05, "loss": 0.03629741, "step": 15161 }, { "epoch": 30.324, "grad_norm": 1.2476096153259277, "learning_rate": 2e-05, "loss": 0.03799465, "step": 15162 }, { "epoch": 30.326, "grad_norm": 1.2157728672027588, "learning_rate": 2e-05, "loss": 0.06220795, "step": 15163 }, { "epoch": 30.328, "grad_norm": 1.5057649612426758, "learning_rate": 2e-05, "loss": 0.04527867, "step": 15164 }, { "epoch": 30.33, "grad_norm": 1.7932603359222412, "learning_rate": 2e-05, "loss": 0.05007517, "step": 15165 }, { "epoch": 30.332, "grad_norm": 1.5305033922195435, "learning_rate": 2e-05, "loss": 0.03679098, "step": 15166 }, { "epoch": 30.334, "grad_norm": 1.2285985946655273, "learning_rate": 2e-05, "loss": 0.0510888, "step": 15167 }, { "epoch": 30.336, "grad_norm": 1.0248448848724365, "learning_rate": 2e-05, "loss": 0.04463315, "step": 15168 }, { "epoch": 30.338, "grad_norm": 1.1598585844039917, "learning_rate": 2e-05, "loss": 0.04191558, "step": 15169 }, { "epoch": 30.34, "grad_norm": 1.415334939956665, "learning_rate": 2e-05, "loss": 0.04389124, "step": 15170 }, { "epoch": 30.342, "grad_norm": 1.105470895767212, "learning_rate": 2e-05, "loss": 0.04412735, "step": 15171 }, { "epoch": 30.344, "grad_norm": 2.4653127193450928, "learning_rate": 2e-05, "loss": 0.04922763, "step": 15172 }, { "epoch": 30.346, "grad_norm": 0.9219580292701721, "learning_rate": 2e-05, "loss": 0.02929104, "step": 15173 }, { "epoch": 30.348, "grad_norm": 1.086243987083435, "learning_rate": 2e-05, "loss": 0.04635917, "step": 15174 }, { "epoch": 30.35, "grad_norm": 0.9130674004554749, "learning_rate": 2e-05, "loss": 0.031245, "step": 15175 }, { "epoch": 30.352, "grad_norm": 0.9588483572006226, "learning_rate": 2e-05, "loss": 0.03446349, "step": 15176 }, { "epoch": 30.354, "grad_norm": 1.1762733459472656, "learning_rate": 2e-05, "loss": 0.04649212, "step": 15177 }, { "epoch": 30.356, "grad_norm": 1.3569097518920898, "learning_rate": 2e-05, "loss": 0.05233772, "step": 15178 }, { "epoch": 30.358, "grad_norm": 1.1183359622955322, "learning_rate": 2e-05, "loss": 0.02675498, "step": 15179 }, { "epoch": 30.36, "grad_norm": 1.455556869506836, "learning_rate": 2e-05, "loss": 0.05393392, "step": 15180 }, { "epoch": 30.362, "grad_norm": 1.3345918655395508, "learning_rate": 2e-05, "loss": 0.04711598, "step": 15181 }, { "epoch": 30.364, "grad_norm": 1.4557232856750488, "learning_rate": 2e-05, "loss": 0.05467194, "step": 15182 }, { "epoch": 30.366, "grad_norm": 1.2925028800964355, "learning_rate": 2e-05, "loss": 0.04946, "step": 15183 }, { "epoch": 30.368, "grad_norm": 1.2487906217575073, "learning_rate": 2e-05, "loss": 0.04537457, "step": 15184 }, { "epoch": 30.37, "grad_norm": 1.4377646446228027, "learning_rate": 2e-05, "loss": 0.05969262, "step": 15185 }, { "epoch": 30.372, "grad_norm": 1.0396651029586792, "learning_rate": 2e-05, "loss": 0.03547468, "step": 15186 }, { "epoch": 30.374, "grad_norm": 1.273114562034607, "learning_rate": 2e-05, "loss": 0.04310979, "step": 15187 }, { "epoch": 30.376, "grad_norm": 0.977163553237915, "learning_rate": 2e-05, "loss": 0.03705889, "step": 15188 }, { "epoch": 30.378, "grad_norm": 2.641759157180786, "learning_rate": 2e-05, "loss": 0.05824747, "step": 15189 }, { "epoch": 30.38, "grad_norm": 2.5414299964904785, "learning_rate": 2e-05, "loss": 0.04271651, "step": 15190 }, { "epoch": 30.382, "grad_norm": 1.6944334506988525, "learning_rate": 2e-05, "loss": 0.0411639, "step": 15191 }, { "epoch": 30.384, "grad_norm": 1.1185425519943237, "learning_rate": 2e-05, "loss": 0.03539154, "step": 15192 }, { "epoch": 30.386, "grad_norm": 2.133718252182007, "learning_rate": 2e-05, "loss": 0.03833131, "step": 15193 }, { "epoch": 30.388, "grad_norm": 1.6777894496917725, "learning_rate": 2e-05, "loss": 0.04095278, "step": 15194 }, { "epoch": 30.39, "grad_norm": 2.4663455486297607, "learning_rate": 2e-05, "loss": 0.03233901, "step": 15195 }, { "epoch": 30.392, "grad_norm": 1.5365792512893677, "learning_rate": 2e-05, "loss": 0.05167238, "step": 15196 }, { "epoch": 30.394, "grad_norm": 1.1537460088729858, "learning_rate": 2e-05, "loss": 0.03557686, "step": 15197 }, { "epoch": 30.396, "grad_norm": 0.9411190152168274, "learning_rate": 2e-05, "loss": 0.02952364, "step": 15198 }, { "epoch": 30.398, "grad_norm": 1.2708302736282349, "learning_rate": 2e-05, "loss": 0.05088828, "step": 15199 }, { "epoch": 30.4, "grad_norm": 2.0306994915008545, "learning_rate": 2e-05, "loss": 0.04890837, "step": 15200 }, { "epoch": 30.402, "grad_norm": 1.7756644487380981, "learning_rate": 2e-05, "loss": 0.04039244, "step": 15201 }, { "epoch": 30.404, "grad_norm": 1.4186558723449707, "learning_rate": 2e-05, "loss": 0.05239846, "step": 15202 }, { "epoch": 30.406, "grad_norm": 1.0431525707244873, "learning_rate": 2e-05, "loss": 0.03030356, "step": 15203 }, { "epoch": 30.408, "grad_norm": 2.5661303997039795, "learning_rate": 2e-05, "loss": 0.06191105, "step": 15204 }, { "epoch": 30.41, "grad_norm": 1.5152759552001953, "learning_rate": 2e-05, "loss": 0.0515747, "step": 15205 }, { "epoch": 30.412, "grad_norm": 1.1445180177688599, "learning_rate": 2e-05, "loss": 0.04189182, "step": 15206 }, { "epoch": 30.414, "grad_norm": 1.1220874786376953, "learning_rate": 2e-05, "loss": 0.04300126, "step": 15207 }, { "epoch": 30.416, "grad_norm": 1.1378350257873535, "learning_rate": 2e-05, "loss": 0.03469217, "step": 15208 }, { "epoch": 30.418, "grad_norm": 1.337202548980713, "learning_rate": 2e-05, "loss": 0.03686753, "step": 15209 }, { "epoch": 30.42, "grad_norm": 0.9815614223480225, "learning_rate": 2e-05, "loss": 0.03325251, "step": 15210 }, { "epoch": 30.422, "grad_norm": 0.8998761773109436, "learning_rate": 2e-05, "loss": 0.03149875, "step": 15211 }, { "epoch": 30.424, "grad_norm": 1.0622773170471191, "learning_rate": 2e-05, "loss": 0.04623077, "step": 15212 }, { "epoch": 30.426, "grad_norm": 2.55696177482605, "learning_rate": 2e-05, "loss": 0.05916242, "step": 15213 }, { "epoch": 30.428, "grad_norm": 1.0451836585998535, "learning_rate": 2e-05, "loss": 0.03193265, "step": 15214 }, { "epoch": 30.43, "grad_norm": 1.0104472637176514, "learning_rate": 2e-05, "loss": 0.04023196, "step": 15215 }, { "epoch": 30.432, "grad_norm": 1.5846662521362305, "learning_rate": 2e-05, "loss": 0.04536471, "step": 15216 }, { "epoch": 30.434, "grad_norm": 1.2485827207565308, "learning_rate": 2e-05, "loss": 0.05067682, "step": 15217 }, { "epoch": 30.436, "grad_norm": 2.142920732498169, "learning_rate": 2e-05, "loss": 0.04851421, "step": 15218 }, { "epoch": 30.438, "grad_norm": 2.670254945755005, "learning_rate": 2e-05, "loss": 0.03117582, "step": 15219 }, { "epoch": 30.44, "grad_norm": 2.0113894939422607, "learning_rate": 2e-05, "loss": 0.04463819, "step": 15220 }, { "epoch": 30.442, "grad_norm": 1.347212791442871, "learning_rate": 2e-05, "loss": 0.03855886, "step": 15221 }, { "epoch": 30.444, "grad_norm": 1.3231719732284546, "learning_rate": 2e-05, "loss": 0.04579235, "step": 15222 }, { "epoch": 30.446, "grad_norm": 1.3619619607925415, "learning_rate": 2e-05, "loss": 0.05623931, "step": 15223 }, { "epoch": 30.448, "grad_norm": 1.569140076637268, "learning_rate": 2e-05, "loss": 0.05256468, "step": 15224 }, { "epoch": 30.45, "grad_norm": 1.5590763092041016, "learning_rate": 2e-05, "loss": 0.04745616, "step": 15225 }, { "epoch": 30.452, "grad_norm": 1.8052546977996826, "learning_rate": 2e-05, "loss": 0.03997829, "step": 15226 }, { "epoch": 30.454, "grad_norm": 1.4032646417617798, "learning_rate": 2e-05, "loss": 0.03643226, "step": 15227 }, { "epoch": 30.456, "grad_norm": 1.6574431657791138, "learning_rate": 2e-05, "loss": 0.04806088, "step": 15228 }, { "epoch": 30.458, "grad_norm": 1.1438560485839844, "learning_rate": 2e-05, "loss": 0.04555596, "step": 15229 }, { "epoch": 30.46, "grad_norm": 1.6412994861602783, "learning_rate": 2e-05, "loss": 0.05041576, "step": 15230 }, { "epoch": 30.462, "grad_norm": 1.7027461528778076, "learning_rate": 2e-05, "loss": 0.04613438, "step": 15231 }, { "epoch": 30.464, "grad_norm": 1.8641504049301147, "learning_rate": 2e-05, "loss": 0.05400945, "step": 15232 }, { "epoch": 30.466, "grad_norm": 1.0201693773269653, "learning_rate": 2e-05, "loss": 0.03487154, "step": 15233 }, { "epoch": 30.468, "grad_norm": 3.329221248626709, "learning_rate": 2e-05, "loss": 0.0438869, "step": 15234 }, { "epoch": 30.47, "grad_norm": 2.2483973503112793, "learning_rate": 2e-05, "loss": 0.05888098, "step": 15235 }, { "epoch": 30.472, "grad_norm": 1.1053797006607056, "learning_rate": 2e-05, "loss": 0.02992825, "step": 15236 }, { "epoch": 30.474, "grad_norm": 1.3227661848068237, "learning_rate": 2e-05, "loss": 0.0443338, "step": 15237 }, { "epoch": 30.476, "grad_norm": 1.077270269393921, "learning_rate": 2e-05, "loss": 0.03753895, "step": 15238 }, { "epoch": 30.478, "grad_norm": 1.0654442310333252, "learning_rate": 2e-05, "loss": 0.04142526, "step": 15239 }, { "epoch": 30.48, "grad_norm": 1.0802520513534546, "learning_rate": 2e-05, "loss": 0.04611638, "step": 15240 }, { "epoch": 30.482, "grad_norm": 1.2629036903381348, "learning_rate": 2e-05, "loss": 0.06196934, "step": 15241 }, { "epoch": 30.484, "grad_norm": 1.756333589553833, "learning_rate": 2e-05, "loss": 0.04763553, "step": 15242 }, { "epoch": 30.486, "grad_norm": 1.0675008296966553, "learning_rate": 2e-05, "loss": 0.03430801, "step": 15243 }, { "epoch": 30.488, "grad_norm": 5.384986400604248, "learning_rate": 2e-05, "loss": 0.03727914, "step": 15244 }, { "epoch": 30.49, "grad_norm": 1.504477620124817, "learning_rate": 2e-05, "loss": 0.05895821, "step": 15245 }, { "epoch": 30.492, "grad_norm": 1.3034749031066895, "learning_rate": 2e-05, "loss": 0.04594578, "step": 15246 }, { "epoch": 30.494, "grad_norm": 1.38698148727417, "learning_rate": 2e-05, "loss": 0.05439015, "step": 15247 }, { "epoch": 30.496, "grad_norm": 1.0708712339401245, "learning_rate": 2e-05, "loss": 0.03840674, "step": 15248 }, { "epoch": 30.498, "grad_norm": 1.4558812379837036, "learning_rate": 2e-05, "loss": 0.02542023, "step": 15249 }, { "epoch": 30.5, "grad_norm": 1.7556490898132324, "learning_rate": 2e-05, "loss": 0.0409022, "step": 15250 }, { "epoch": 30.502, "grad_norm": 1.2140228748321533, "learning_rate": 2e-05, "loss": 0.03575923, "step": 15251 }, { "epoch": 30.504, "grad_norm": 0.9344707131385803, "learning_rate": 2e-05, "loss": 0.03167684, "step": 15252 }, { "epoch": 30.506, "grad_norm": 1.3499372005462646, "learning_rate": 2e-05, "loss": 0.03214483, "step": 15253 }, { "epoch": 30.508, "grad_norm": 1.1017028093338013, "learning_rate": 2e-05, "loss": 0.02525232, "step": 15254 }, { "epoch": 30.51, "grad_norm": 1.4885841608047485, "learning_rate": 2e-05, "loss": 0.06516996, "step": 15255 }, { "epoch": 30.512, "grad_norm": 1.4931526184082031, "learning_rate": 2e-05, "loss": 0.06183759, "step": 15256 }, { "epoch": 30.514, "grad_norm": 0.9158805012702942, "learning_rate": 2e-05, "loss": 0.03264806, "step": 15257 }, { "epoch": 30.516, "grad_norm": 1.7295507192611694, "learning_rate": 2e-05, "loss": 0.0437915, "step": 15258 }, { "epoch": 30.518, "grad_norm": 1.2573812007904053, "learning_rate": 2e-05, "loss": 0.04718456, "step": 15259 }, { "epoch": 30.52, "grad_norm": 1.111760139465332, "learning_rate": 2e-05, "loss": 0.03778823, "step": 15260 }, { "epoch": 30.522, "grad_norm": 1.3841087818145752, "learning_rate": 2e-05, "loss": 0.05139244, "step": 15261 }, { "epoch": 30.524, "grad_norm": 1.2217211723327637, "learning_rate": 2e-05, "loss": 0.05407543, "step": 15262 }, { "epoch": 30.526, "grad_norm": 1.9204769134521484, "learning_rate": 2e-05, "loss": 0.05501651, "step": 15263 }, { "epoch": 30.528, "grad_norm": 1.0646864175796509, "learning_rate": 2e-05, "loss": 0.04272152, "step": 15264 }, { "epoch": 30.53, "grad_norm": 1.138542652130127, "learning_rate": 2e-05, "loss": 0.04351484, "step": 15265 }, { "epoch": 30.532, "grad_norm": 1.1147321462631226, "learning_rate": 2e-05, "loss": 0.03230875, "step": 15266 }, { "epoch": 30.534, "grad_norm": 1.3464140892028809, "learning_rate": 2e-05, "loss": 0.05336832, "step": 15267 }, { "epoch": 30.536, "grad_norm": 1.3282221555709839, "learning_rate": 2e-05, "loss": 0.06214736, "step": 15268 }, { "epoch": 30.538, "grad_norm": 1.1814717054367065, "learning_rate": 2e-05, "loss": 0.03814064, "step": 15269 }, { "epoch": 30.54, "grad_norm": 1.167283535003662, "learning_rate": 2e-05, "loss": 0.04938902, "step": 15270 }, { "epoch": 30.542, "grad_norm": 1.5443036556243896, "learning_rate": 2e-05, "loss": 0.04786984, "step": 15271 }, { "epoch": 30.544, "grad_norm": 1.666343331336975, "learning_rate": 2e-05, "loss": 0.05091058, "step": 15272 }, { "epoch": 30.546, "grad_norm": 1.4363032579421997, "learning_rate": 2e-05, "loss": 0.04783217, "step": 15273 }, { "epoch": 30.548000000000002, "grad_norm": 1.348711371421814, "learning_rate": 2e-05, "loss": 0.05052876, "step": 15274 }, { "epoch": 30.55, "grad_norm": 1.2497211694717407, "learning_rate": 2e-05, "loss": 0.03438094, "step": 15275 }, { "epoch": 30.552, "grad_norm": 1.3024879693984985, "learning_rate": 2e-05, "loss": 0.04458308, "step": 15276 }, { "epoch": 30.554, "grad_norm": 1.4879077672958374, "learning_rate": 2e-05, "loss": 0.03748794, "step": 15277 }, { "epoch": 30.556, "grad_norm": 1.565683126449585, "learning_rate": 2e-05, "loss": 0.03531917, "step": 15278 }, { "epoch": 30.558, "grad_norm": 1.226104974746704, "learning_rate": 2e-05, "loss": 0.04298712, "step": 15279 }, { "epoch": 30.56, "grad_norm": 1.5387117862701416, "learning_rate": 2e-05, "loss": 0.04004962, "step": 15280 }, { "epoch": 30.562, "grad_norm": 1.0032356977462769, "learning_rate": 2e-05, "loss": 0.03250517, "step": 15281 }, { "epoch": 30.564, "grad_norm": 1.0234278440475464, "learning_rate": 2e-05, "loss": 0.04238243, "step": 15282 }, { "epoch": 30.566, "grad_norm": 1.151049256324768, "learning_rate": 2e-05, "loss": 0.04576783, "step": 15283 }, { "epoch": 30.568, "grad_norm": 2.4301161766052246, "learning_rate": 2e-05, "loss": 0.04964666, "step": 15284 }, { "epoch": 30.57, "grad_norm": 1.798929214477539, "learning_rate": 2e-05, "loss": 0.04637855, "step": 15285 }, { "epoch": 30.572, "grad_norm": 1.205482006072998, "learning_rate": 2e-05, "loss": 0.04132317, "step": 15286 }, { "epoch": 30.574, "grad_norm": 1.730236530303955, "learning_rate": 2e-05, "loss": 0.04239135, "step": 15287 }, { "epoch": 30.576, "grad_norm": 1.3888801336288452, "learning_rate": 2e-05, "loss": 0.04025697, "step": 15288 }, { "epoch": 30.578, "grad_norm": 1.2676804065704346, "learning_rate": 2e-05, "loss": 0.05485225, "step": 15289 }, { "epoch": 30.58, "grad_norm": 1.2869915962219238, "learning_rate": 2e-05, "loss": 0.04968848, "step": 15290 }, { "epoch": 30.582, "grad_norm": 1.0613614320755005, "learning_rate": 2e-05, "loss": 0.03498927, "step": 15291 }, { "epoch": 30.584, "grad_norm": 1.000826120376587, "learning_rate": 2e-05, "loss": 0.03525396, "step": 15292 }, { "epoch": 30.586, "grad_norm": 1.5731277465820312, "learning_rate": 2e-05, "loss": 0.05101633, "step": 15293 }, { "epoch": 30.588, "grad_norm": 1.1912803649902344, "learning_rate": 2e-05, "loss": 0.04636155, "step": 15294 }, { "epoch": 30.59, "grad_norm": 1.1083427667617798, "learning_rate": 2e-05, "loss": 0.04011728, "step": 15295 }, { "epoch": 30.592, "grad_norm": 0.8691169023513794, "learning_rate": 2e-05, "loss": 0.02712849, "step": 15296 }, { "epoch": 30.594, "grad_norm": 1.5057884454727173, "learning_rate": 2e-05, "loss": 0.05116381, "step": 15297 }, { "epoch": 30.596, "grad_norm": 1.8393473625183105, "learning_rate": 2e-05, "loss": 0.05369818, "step": 15298 }, { "epoch": 30.598, "grad_norm": 1.4146645069122314, "learning_rate": 2e-05, "loss": 0.04586646, "step": 15299 }, { "epoch": 30.6, "grad_norm": 1.0296281576156616, "learning_rate": 2e-05, "loss": 0.02862342, "step": 15300 }, { "epoch": 30.602, "grad_norm": 1.4884545803070068, "learning_rate": 2e-05, "loss": 0.03266924, "step": 15301 }, { "epoch": 30.604, "grad_norm": 1.6892744302749634, "learning_rate": 2e-05, "loss": 0.03972404, "step": 15302 }, { "epoch": 30.606, "grad_norm": 1.5136027336120605, "learning_rate": 2e-05, "loss": 0.02378543, "step": 15303 }, { "epoch": 30.608, "grad_norm": 1.1681137084960938, "learning_rate": 2e-05, "loss": 0.0225147, "step": 15304 }, { "epoch": 30.61, "grad_norm": 1.0413873195648193, "learning_rate": 2e-05, "loss": 0.03786266, "step": 15305 }, { "epoch": 30.612, "grad_norm": 1.3476159572601318, "learning_rate": 2e-05, "loss": 0.0479761, "step": 15306 }, { "epoch": 30.614, "grad_norm": 1.4096819162368774, "learning_rate": 2e-05, "loss": 0.04315577, "step": 15307 }, { "epoch": 30.616, "grad_norm": 1.1657134294509888, "learning_rate": 2e-05, "loss": 0.03629996, "step": 15308 }, { "epoch": 30.618, "grad_norm": 1.0118683576583862, "learning_rate": 2e-05, "loss": 0.03799508, "step": 15309 }, { "epoch": 30.62, "grad_norm": 1.3761227130889893, "learning_rate": 2e-05, "loss": 0.04091384, "step": 15310 }, { "epoch": 30.622, "grad_norm": 1.3315359354019165, "learning_rate": 2e-05, "loss": 0.05451095, "step": 15311 }, { "epoch": 30.624, "grad_norm": 1.0484113693237305, "learning_rate": 2e-05, "loss": 0.04014854, "step": 15312 }, { "epoch": 30.626, "grad_norm": 1.0485554933547974, "learning_rate": 2e-05, "loss": 0.03734627, "step": 15313 }, { "epoch": 30.628, "grad_norm": 1.7621971368789673, "learning_rate": 2e-05, "loss": 0.06587996, "step": 15314 }, { "epoch": 30.63, "grad_norm": 1.1702368259429932, "learning_rate": 2e-05, "loss": 0.03241003, "step": 15315 }, { "epoch": 30.632, "grad_norm": 0.9568459987640381, "learning_rate": 2e-05, "loss": 0.03606553, "step": 15316 }, { "epoch": 30.634, "grad_norm": 1.383159875869751, "learning_rate": 2e-05, "loss": 0.05273655, "step": 15317 }, { "epoch": 30.636, "grad_norm": 1.2365126609802246, "learning_rate": 2e-05, "loss": 0.03494096, "step": 15318 }, { "epoch": 30.638, "grad_norm": 1.046418309211731, "learning_rate": 2e-05, "loss": 0.03035589, "step": 15319 }, { "epoch": 30.64, "grad_norm": 1.7063677310943604, "learning_rate": 2e-05, "loss": 0.04815275, "step": 15320 }, { "epoch": 30.642, "grad_norm": 0.9858196973800659, "learning_rate": 2e-05, "loss": 0.0392507, "step": 15321 }, { "epoch": 30.644, "grad_norm": 2.5675480365753174, "learning_rate": 2e-05, "loss": 0.06889924, "step": 15322 }, { "epoch": 30.646, "grad_norm": 1.2175625562667847, "learning_rate": 2e-05, "loss": 0.04431954, "step": 15323 }, { "epoch": 30.648, "grad_norm": 1.0321712493896484, "learning_rate": 2e-05, "loss": 0.03850232, "step": 15324 }, { "epoch": 30.65, "grad_norm": 1.097364068031311, "learning_rate": 2e-05, "loss": 0.04117165, "step": 15325 }, { "epoch": 30.652, "grad_norm": 1.0748999118804932, "learning_rate": 2e-05, "loss": 0.03478822, "step": 15326 }, { "epoch": 30.654, "grad_norm": 3.8500382900238037, "learning_rate": 2e-05, "loss": 0.03891071, "step": 15327 }, { "epoch": 30.656, "grad_norm": 1.0356502532958984, "learning_rate": 2e-05, "loss": 0.04003483, "step": 15328 }, { "epoch": 30.658, "grad_norm": 1.2226482629776, "learning_rate": 2e-05, "loss": 0.04361072, "step": 15329 }, { "epoch": 30.66, "grad_norm": 3.1957709789276123, "learning_rate": 2e-05, "loss": 0.04747942, "step": 15330 }, { "epoch": 30.662, "grad_norm": 1.8828949928283691, "learning_rate": 2e-05, "loss": 0.05194776, "step": 15331 }, { "epoch": 30.664, "grad_norm": 1.5473778247833252, "learning_rate": 2e-05, "loss": 0.05460526, "step": 15332 }, { "epoch": 30.666, "grad_norm": 1.3898783922195435, "learning_rate": 2e-05, "loss": 0.05540655, "step": 15333 }, { "epoch": 30.668, "grad_norm": 0.9823468923568726, "learning_rate": 2e-05, "loss": 0.02938699, "step": 15334 }, { "epoch": 30.67, "grad_norm": 2.376797676086426, "learning_rate": 2e-05, "loss": 0.03299019, "step": 15335 }, { "epoch": 30.672, "grad_norm": 1.2253479957580566, "learning_rate": 2e-05, "loss": 0.03981312, "step": 15336 }, { "epoch": 30.674, "grad_norm": 1.2062132358551025, "learning_rate": 2e-05, "loss": 0.03478318, "step": 15337 }, { "epoch": 30.676, "grad_norm": 0.9848882555961609, "learning_rate": 2e-05, "loss": 0.03393116, "step": 15338 }, { "epoch": 30.678, "grad_norm": 1.059923768043518, "learning_rate": 2e-05, "loss": 0.03325968, "step": 15339 }, { "epoch": 30.68, "grad_norm": 1.6200644969940186, "learning_rate": 2e-05, "loss": 0.03570171, "step": 15340 }, { "epoch": 30.682, "grad_norm": 1.2216757535934448, "learning_rate": 2e-05, "loss": 0.04476602, "step": 15341 }, { "epoch": 30.684, "grad_norm": 2.0105185508728027, "learning_rate": 2e-05, "loss": 0.04382947, "step": 15342 }, { "epoch": 30.686, "grad_norm": 1.8764110803604126, "learning_rate": 2e-05, "loss": 0.05112007, "step": 15343 }, { "epoch": 30.688, "grad_norm": 1.9776445627212524, "learning_rate": 2e-05, "loss": 0.05051792, "step": 15344 }, { "epoch": 30.69, "grad_norm": 1.2636760473251343, "learning_rate": 2e-05, "loss": 0.03654606, "step": 15345 }, { "epoch": 30.692, "grad_norm": 1.58008873462677, "learning_rate": 2e-05, "loss": 0.05276753, "step": 15346 }, { "epoch": 30.694, "grad_norm": 1.1315622329711914, "learning_rate": 2e-05, "loss": 0.04263277, "step": 15347 }, { "epoch": 30.696, "grad_norm": 0.9804048538208008, "learning_rate": 2e-05, "loss": 0.03775415, "step": 15348 }, { "epoch": 30.698, "grad_norm": 1.381704568862915, "learning_rate": 2e-05, "loss": 0.03425687, "step": 15349 }, { "epoch": 30.7, "grad_norm": 0.9259615540504456, "learning_rate": 2e-05, "loss": 0.02952418, "step": 15350 }, { "epoch": 30.701999999999998, "grad_norm": 1.5775662660598755, "learning_rate": 2e-05, "loss": 0.05005368, "step": 15351 }, { "epoch": 30.704, "grad_norm": 1.3794008493423462, "learning_rate": 2e-05, "loss": 0.0453692, "step": 15352 }, { "epoch": 30.706, "grad_norm": 1.189494013786316, "learning_rate": 2e-05, "loss": 0.04057778, "step": 15353 }, { "epoch": 30.708, "grad_norm": 0.9436183571815491, "learning_rate": 2e-05, "loss": 0.02952637, "step": 15354 }, { "epoch": 30.71, "grad_norm": 1.8463188409805298, "learning_rate": 2e-05, "loss": 0.05099573, "step": 15355 }, { "epoch": 30.712, "grad_norm": 1.0513994693756104, "learning_rate": 2e-05, "loss": 0.03942338, "step": 15356 }, { "epoch": 30.714, "grad_norm": 1.306980013847351, "learning_rate": 2e-05, "loss": 0.04591175, "step": 15357 }, { "epoch": 30.716, "grad_norm": 1.1594239473342896, "learning_rate": 2e-05, "loss": 0.03216893, "step": 15358 }, { "epoch": 30.718, "grad_norm": 1.6709867715835571, "learning_rate": 2e-05, "loss": 0.04788967, "step": 15359 }, { "epoch": 30.72, "grad_norm": 1.4016337394714355, "learning_rate": 2e-05, "loss": 0.04345062, "step": 15360 }, { "epoch": 30.722, "grad_norm": 1.594774603843689, "learning_rate": 2e-05, "loss": 0.04838118, "step": 15361 }, { "epoch": 30.724, "grad_norm": 1.3870517015457153, "learning_rate": 2e-05, "loss": 0.04288274, "step": 15362 }, { "epoch": 30.726, "grad_norm": 1.472120761871338, "learning_rate": 2e-05, "loss": 0.05352866, "step": 15363 }, { "epoch": 30.728, "grad_norm": 1.9960273504257202, "learning_rate": 2e-05, "loss": 0.0549426, "step": 15364 }, { "epoch": 30.73, "grad_norm": 1.4268264770507812, "learning_rate": 2e-05, "loss": 0.04215187, "step": 15365 }, { "epoch": 30.732, "grad_norm": 1.359204649925232, "learning_rate": 2e-05, "loss": 0.03503023, "step": 15366 }, { "epoch": 30.734, "grad_norm": 1.4836820363998413, "learning_rate": 2e-05, "loss": 0.04756457, "step": 15367 }, { "epoch": 30.736, "grad_norm": 1.3566566705703735, "learning_rate": 2e-05, "loss": 0.04132615, "step": 15368 }, { "epoch": 30.738, "grad_norm": 1.0889496803283691, "learning_rate": 2e-05, "loss": 0.03694168, "step": 15369 }, { "epoch": 30.74, "grad_norm": 1.9702776670455933, "learning_rate": 2e-05, "loss": 0.04029308, "step": 15370 }, { "epoch": 30.742, "grad_norm": 1.7063497304916382, "learning_rate": 2e-05, "loss": 0.04850524, "step": 15371 }, { "epoch": 30.744, "grad_norm": 1.1768672466278076, "learning_rate": 2e-05, "loss": 0.04457154, "step": 15372 }, { "epoch": 30.746, "grad_norm": 1.506422758102417, "learning_rate": 2e-05, "loss": 0.05803453, "step": 15373 }, { "epoch": 30.748, "grad_norm": 1.2613164186477661, "learning_rate": 2e-05, "loss": 0.0555273, "step": 15374 }, { "epoch": 30.75, "grad_norm": 1.302585482597351, "learning_rate": 2e-05, "loss": 0.03637126, "step": 15375 }, { "epoch": 30.752, "grad_norm": 1.2151802778244019, "learning_rate": 2e-05, "loss": 0.04023366, "step": 15376 }, { "epoch": 30.754, "grad_norm": 1.4890795946121216, "learning_rate": 2e-05, "loss": 0.04400902, "step": 15377 }, { "epoch": 30.756, "grad_norm": 1.8800874948501587, "learning_rate": 2e-05, "loss": 0.04399022, "step": 15378 }, { "epoch": 30.758, "grad_norm": 1.0342926979064941, "learning_rate": 2e-05, "loss": 0.04493139, "step": 15379 }, { "epoch": 30.76, "grad_norm": 2.302096366882324, "learning_rate": 2e-05, "loss": 0.0552254, "step": 15380 }, { "epoch": 30.762, "grad_norm": 0.9423291087150574, "learning_rate": 2e-05, "loss": 0.03075608, "step": 15381 }, { "epoch": 30.764, "grad_norm": 1.7802855968475342, "learning_rate": 2e-05, "loss": 0.0369587, "step": 15382 }, { "epoch": 30.766, "grad_norm": 1.1834336519241333, "learning_rate": 2e-05, "loss": 0.04712176, "step": 15383 }, { "epoch": 30.768, "grad_norm": 1.2314602136611938, "learning_rate": 2e-05, "loss": 0.04508484, "step": 15384 }, { "epoch": 30.77, "grad_norm": 1.5756516456604004, "learning_rate": 2e-05, "loss": 0.04346953, "step": 15385 }, { "epoch": 30.772, "grad_norm": 3.2680647373199463, "learning_rate": 2e-05, "loss": 0.03386783, "step": 15386 }, { "epoch": 30.774, "grad_norm": 1.0917185544967651, "learning_rate": 2e-05, "loss": 0.04377945, "step": 15387 }, { "epoch": 30.776, "grad_norm": 1.5100263357162476, "learning_rate": 2e-05, "loss": 0.0553541, "step": 15388 }, { "epoch": 30.778, "grad_norm": 1.24834406375885, "learning_rate": 2e-05, "loss": 0.03370894, "step": 15389 }, { "epoch": 30.78, "grad_norm": 1.9447808265686035, "learning_rate": 2e-05, "loss": 0.05794008, "step": 15390 }, { "epoch": 30.782, "grad_norm": 1.1977484226226807, "learning_rate": 2e-05, "loss": 0.04914448, "step": 15391 }, { "epoch": 30.784, "grad_norm": 2.5817272663116455, "learning_rate": 2e-05, "loss": 0.04607561, "step": 15392 }, { "epoch": 30.786, "grad_norm": 0.9399213790893555, "learning_rate": 2e-05, "loss": 0.03578041, "step": 15393 }, { "epoch": 30.788, "grad_norm": 1.2921048402786255, "learning_rate": 2e-05, "loss": 0.03382245, "step": 15394 }, { "epoch": 30.79, "grad_norm": 1.1487122774124146, "learning_rate": 2e-05, "loss": 0.04007035, "step": 15395 }, { "epoch": 30.792, "grad_norm": 1.0508596897125244, "learning_rate": 2e-05, "loss": 0.03421424, "step": 15396 }, { "epoch": 30.794, "grad_norm": 0.864069938659668, "learning_rate": 2e-05, "loss": 0.02603904, "step": 15397 }, { "epoch": 30.796, "grad_norm": 1.4718056917190552, "learning_rate": 2e-05, "loss": 0.038355, "step": 15398 }, { "epoch": 30.798000000000002, "grad_norm": 1.2328131198883057, "learning_rate": 2e-05, "loss": 0.04491291, "step": 15399 }, { "epoch": 30.8, "grad_norm": 1.186107873916626, "learning_rate": 2e-05, "loss": 0.05053133, "step": 15400 }, { "epoch": 30.802, "grad_norm": 1.0685789585113525, "learning_rate": 2e-05, "loss": 0.04427924, "step": 15401 }, { "epoch": 30.804, "grad_norm": 1.1698602437973022, "learning_rate": 2e-05, "loss": 0.03796928, "step": 15402 }, { "epoch": 30.806, "grad_norm": 1.2792614698410034, "learning_rate": 2e-05, "loss": 0.0548584, "step": 15403 }, { "epoch": 30.808, "grad_norm": 1.533371090888977, "learning_rate": 2e-05, "loss": 0.04547575, "step": 15404 }, { "epoch": 30.81, "grad_norm": 1.5394388437271118, "learning_rate": 2e-05, "loss": 0.05188145, "step": 15405 }, { "epoch": 30.812, "grad_norm": 1.0687896013259888, "learning_rate": 2e-05, "loss": 0.04358595, "step": 15406 }, { "epoch": 30.814, "grad_norm": 1.0643762350082397, "learning_rate": 2e-05, "loss": 0.03647963, "step": 15407 }, { "epoch": 30.816, "grad_norm": 1.1489731073379517, "learning_rate": 2e-05, "loss": 0.05470124, "step": 15408 }, { "epoch": 30.818, "grad_norm": 1.8806666135787964, "learning_rate": 2e-05, "loss": 0.05590469, "step": 15409 }, { "epoch": 30.82, "grad_norm": 1.0280253887176514, "learning_rate": 2e-05, "loss": 0.04052454, "step": 15410 }, { "epoch": 30.822, "grad_norm": 1.6329935789108276, "learning_rate": 2e-05, "loss": 0.04793561, "step": 15411 }, { "epoch": 30.824, "grad_norm": 1.4511553049087524, "learning_rate": 2e-05, "loss": 0.03960864, "step": 15412 }, { "epoch": 30.826, "grad_norm": 1.6550666093826294, "learning_rate": 2e-05, "loss": 0.04335929, "step": 15413 }, { "epoch": 30.828, "grad_norm": 1.3845441341400146, "learning_rate": 2e-05, "loss": 0.03868318, "step": 15414 }, { "epoch": 30.83, "grad_norm": 1.1391074657440186, "learning_rate": 2e-05, "loss": 0.0478869, "step": 15415 }, { "epoch": 30.832, "grad_norm": 1.278219223022461, "learning_rate": 2e-05, "loss": 0.04307889, "step": 15416 }, { "epoch": 30.834, "grad_norm": 1.268723726272583, "learning_rate": 2e-05, "loss": 0.03802036, "step": 15417 }, { "epoch": 30.836, "grad_norm": 1.159140706062317, "learning_rate": 2e-05, "loss": 0.03451194, "step": 15418 }, { "epoch": 30.838, "grad_norm": 1.1894630193710327, "learning_rate": 2e-05, "loss": 0.0473679, "step": 15419 }, { "epoch": 30.84, "grad_norm": 1.262392520904541, "learning_rate": 2e-05, "loss": 0.03899877, "step": 15420 }, { "epoch": 30.842, "grad_norm": 1.446376085281372, "learning_rate": 2e-05, "loss": 0.03620021, "step": 15421 }, { "epoch": 30.844, "grad_norm": 1.4437825679779053, "learning_rate": 2e-05, "loss": 0.04326116, "step": 15422 }, { "epoch": 30.846, "grad_norm": 1.1866358518600464, "learning_rate": 2e-05, "loss": 0.03965632, "step": 15423 }, { "epoch": 30.848, "grad_norm": 1.379371166229248, "learning_rate": 2e-05, "loss": 0.04506073, "step": 15424 }, { "epoch": 30.85, "grad_norm": 2.1914074420928955, "learning_rate": 2e-05, "loss": 0.05980007, "step": 15425 }, { "epoch": 30.852, "grad_norm": 1.2148512601852417, "learning_rate": 2e-05, "loss": 0.04434382, "step": 15426 }, { "epoch": 30.854, "grad_norm": 1.0785166025161743, "learning_rate": 2e-05, "loss": 0.03507778, "step": 15427 }, { "epoch": 30.856, "grad_norm": 1.0878171920776367, "learning_rate": 2e-05, "loss": 0.03332923, "step": 15428 }, { "epoch": 30.858, "grad_norm": 1.4415518045425415, "learning_rate": 2e-05, "loss": 0.05356176, "step": 15429 }, { "epoch": 30.86, "grad_norm": 1.4083247184753418, "learning_rate": 2e-05, "loss": 0.05534295, "step": 15430 }, { "epoch": 30.862, "grad_norm": 1.1404030323028564, "learning_rate": 2e-05, "loss": 0.0465, "step": 15431 }, { "epoch": 30.864, "grad_norm": 1.4626610279083252, "learning_rate": 2e-05, "loss": 0.03837757, "step": 15432 }, { "epoch": 30.866, "grad_norm": 1.0799211263656616, "learning_rate": 2e-05, "loss": 0.04735259, "step": 15433 }, { "epoch": 30.868, "grad_norm": 1.3441239595413208, "learning_rate": 2e-05, "loss": 0.0439587, "step": 15434 }, { "epoch": 30.87, "grad_norm": 1.341903567314148, "learning_rate": 2e-05, "loss": 0.04261306, "step": 15435 }, { "epoch": 30.872, "grad_norm": 2.014629602432251, "learning_rate": 2e-05, "loss": 0.04418968, "step": 15436 }, { "epoch": 30.874, "grad_norm": 1.4627952575683594, "learning_rate": 2e-05, "loss": 0.04787583, "step": 15437 }, { "epoch": 30.876, "grad_norm": 1.0740972757339478, "learning_rate": 2e-05, "loss": 0.03408062, "step": 15438 }, { "epoch": 30.878, "grad_norm": 1.458024024963379, "learning_rate": 2e-05, "loss": 0.05833004, "step": 15439 }, { "epoch": 30.88, "grad_norm": 1.0955424308776855, "learning_rate": 2e-05, "loss": 0.04713885, "step": 15440 }, { "epoch": 30.882, "grad_norm": 1.673150658607483, "learning_rate": 2e-05, "loss": 0.05364336, "step": 15441 }, { "epoch": 30.884, "grad_norm": 1.1266365051269531, "learning_rate": 2e-05, "loss": 0.04248474, "step": 15442 }, { "epoch": 30.886, "grad_norm": 1.5739974975585938, "learning_rate": 2e-05, "loss": 0.03701893, "step": 15443 }, { "epoch": 30.888, "grad_norm": 1.4596102237701416, "learning_rate": 2e-05, "loss": 0.047738, "step": 15444 }, { "epoch": 30.89, "grad_norm": 1.0531946420669556, "learning_rate": 2e-05, "loss": 0.04053745, "step": 15445 }, { "epoch": 30.892, "grad_norm": 0.9864213466644287, "learning_rate": 2e-05, "loss": 0.03761032, "step": 15446 }, { "epoch": 30.894, "grad_norm": 2.1464390754699707, "learning_rate": 2e-05, "loss": 0.05027218, "step": 15447 }, { "epoch": 30.896, "grad_norm": 1.2808313369750977, "learning_rate": 2e-05, "loss": 0.04857701, "step": 15448 }, { "epoch": 30.898, "grad_norm": 1.7590348720550537, "learning_rate": 2e-05, "loss": 0.05534477, "step": 15449 }, { "epoch": 30.9, "grad_norm": 1.0882501602172852, "learning_rate": 2e-05, "loss": 0.04137859, "step": 15450 }, { "epoch": 30.902, "grad_norm": 0.946066677570343, "learning_rate": 2e-05, "loss": 0.03223543, "step": 15451 }, { "epoch": 30.904, "grad_norm": 0.9430605173110962, "learning_rate": 2e-05, "loss": 0.0311843, "step": 15452 }, { "epoch": 30.906, "grad_norm": 1.3041696548461914, "learning_rate": 2e-05, "loss": 0.05397451, "step": 15453 }, { "epoch": 30.908, "grad_norm": 1.3684122562408447, "learning_rate": 2e-05, "loss": 0.05539594, "step": 15454 }, { "epoch": 30.91, "grad_norm": 2.258601665496826, "learning_rate": 2e-05, "loss": 0.05372835, "step": 15455 }, { "epoch": 30.912, "grad_norm": 2.0608880519866943, "learning_rate": 2e-05, "loss": 0.03330045, "step": 15456 }, { "epoch": 30.914, "grad_norm": 1.1213666200637817, "learning_rate": 2e-05, "loss": 0.04753671, "step": 15457 }, { "epoch": 30.916, "grad_norm": 0.942790150642395, "learning_rate": 2e-05, "loss": 0.03259289, "step": 15458 }, { "epoch": 30.918, "grad_norm": 1.2493982315063477, "learning_rate": 2e-05, "loss": 0.04098676, "step": 15459 }, { "epoch": 30.92, "grad_norm": 1.1307854652404785, "learning_rate": 2e-05, "loss": 0.04249147, "step": 15460 }, { "epoch": 30.922, "grad_norm": 1.5322867631912231, "learning_rate": 2e-05, "loss": 0.03617536, "step": 15461 }, { "epoch": 30.924, "grad_norm": 1.1813342571258545, "learning_rate": 2e-05, "loss": 0.05639866, "step": 15462 }, { "epoch": 30.926, "grad_norm": 0.8674895763397217, "learning_rate": 2e-05, "loss": 0.03328463, "step": 15463 }, { "epoch": 30.928, "grad_norm": 1.5556342601776123, "learning_rate": 2e-05, "loss": 0.05676714, "step": 15464 }, { "epoch": 30.93, "grad_norm": 1.0045814514160156, "learning_rate": 2e-05, "loss": 0.03216821, "step": 15465 }, { "epoch": 30.932, "grad_norm": 1.0363186597824097, "learning_rate": 2e-05, "loss": 0.04014078, "step": 15466 }, { "epoch": 30.934, "grad_norm": 1.386189341545105, "learning_rate": 2e-05, "loss": 0.05402887, "step": 15467 }, { "epoch": 30.936, "grad_norm": 1.137304663658142, "learning_rate": 2e-05, "loss": 0.03464006, "step": 15468 }, { "epoch": 30.938, "grad_norm": 1.3049153089523315, "learning_rate": 2e-05, "loss": 0.04342077, "step": 15469 }, { "epoch": 30.94, "grad_norm": 1.1227730512619019, "learning_rate": 2e-05, "loss": 0.04238366, "step": 15470 }, { "epoch": 30.942, "grad_norm": 1.2162871360778809, "learning_rate": 2e-05, "loss": 0.06160378, "step": 15471 }, { "epoch": 30.944, "grad_norm": 0.8703532218933105, "learning_rate": 2e-05, "loss": 0.02413613, "step": 15472 }, { "epoch": 30.946, "grad_norm": 1.9870779514312744, "learning_rate": 2e-05, "loss": 0.03381436, "step": 15473 }, { "epoch": 30.948, "grad_norm": 1.9186214208602905, "learning_rate": 2e-05, "loss": 0.05125932, "step": 15474 }, { "epoch": 30.95, "grad_norm": 1.7328497171401978, "learning_rate": 2e-05, "loss": 0.0443011, "step": 15475 }, { "epoch": 30.951999999999998, "grad_norm": 0.9649899005889893, "learning_rate": 2e-05, "loss": 0.03079892, "step": 15476 }, { "epoch": 30.954, "grad_norm": 0.9868589043617249, "learning_rate": 2e-05, "loss": 0.02492999, "step": 15477 }, { "epoch": 30.956, "grad_norm": 1.2447783946990967, "learning_rate": 2e-05, "loss": 0.05273892, "step": 15478 }, { "epoch": 30.958, "grad_norm": 1.2232327461242676, "learning_rate": 2e-05, "loss": 0.04913422, "step": 15479 }, { "epoch": 30.96, "grad_norm": 1.6843918561935425, "learning_rate": 2e-05, "loss": 0.05371039, "step": 15480 }, { "epoch": 30.962, "grad_norm": 0.8725624680519104, "learning_rate": 2e-05, "loss": 0.02640246, "step": 15481 }, { "epoch": 30.964, "grad_norm": 1.2975506782531738, "learning_rate": 2e-05, "loss": 0.05456071, "step": 15482 }, { "epoch": 30.966, "grad_norm": 1.7329095602035522, "learning_rate": 2e-05, "loss": 0.04484017, "step": 15483 }, { "epoch": 30.968, "grad_norm": 1.4026141166687012, "learning_rate": 2e-05, "loss": 0.05021109, "step": 15484 }, { "epoch": 30.97, "grad_norm": 1.046205997467041, "learning_rate": 2e-05, "loss": 0.03913869, "step": 15485 }, { "epoch": 30.972, "grad_norm": 3.3396363258361816, "learning_rate": 2e-05, "loss": 0.04359189, "step": 15486 }, { "epoch": 30.974, "grad_norm": 1.369861125946045, "learning_rate": 2e-05, "loss": 0.04546359, "step": 15487 }, { "epoch": 30.976, "grad_norm": 1.4066284894943237, "learning_rate": 2e-05, "loss": 0.03458145, "step": 15488 }, { "epoch": 30.978, "grad_norm": 1.479104995727539, "learning_rate": 2e-05, "loss": 0.04172707, "step": 15489 }, { "epoch": 30.98, "grad_norm": 1.0808502435684204, "learning_rate": 2e-05, "loss": 0.0374383, "step": 15490 }, { "epoch": 30.982, "grad_norm": 1.153609275817871, "learning_rate": 2e-05, "loss": 0.04683513, "step": 15491 }, { "epoch": 30.984, "grad_norm": 1.3860514163970947, "learning_rate": 2e-05, "loss": 0.04752363, "step": 15492 }, { "epoch": 30.986, "grad_norm": 1.005586862564087, "learning_rate": 2e-05, "loss": 0.03461302, "step": 15493 }, { "epoch": 30.988, "grad_norm": 2.63100266456604, "learning_rate": 2e-05, "loss": 0.05241647, "step": 15494 }, { "epoch": 30.99, "grad_norm": 1.4951649904251099, "learning_rate": 2e-05, "loss": 0.03878006, "step": 15495 }, { "epoch": 30.992, "grad_norm": 1.208921194076538, "learning_rate": 2e-05, "loss": 0.04567692, "step": 15496 }, { "epoch": 30.994, "grad_norm": 1.2812520265579224, "learning_rate": 2e-05, "loss": 0.03168442, "step": 15497 }, { "epoch": 30.996, "grad_norm": 1.3722220659255981, "learning_rate": 2e-05, "loss": 0.04763746, "step": 15498 }, { "epoch": 30.998, "grad_norm": 1.4294661283493042, "learning_rate": 2e-05, "loss": 0.05221847, "step": 15499 }, { "epoch": 31.0, "grad_norm": 1.2685956954956055, "learning_rate": 2e-05, "loss": 0.05318395, "step": 15500 }, { "epoch": 31.0, "eval_performance": { "AngleClassification_1": 0.994, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9780439121756487, "Equal_1": 1.0, "Equal_2": 0.9880239520958084, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8957915831663327, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9936, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 319.7306, "eval_samples_per_second": 32.84, "eval_steps_per_second": 0.657, "step": 15500 }, { "epoch": 31.002, "grad_norm": 1.2148770093917847, "learning_rate": 2e-05, "loss": 0.04367323, "step": 15501 }, { "epoch": 31.004, "grad_norm": 1.2555484771728516, "learning_rate": 2e-05, "loss": 0.04165998, "step": 15502 }, { "epoch": 31.006, "grad_norm": 1.7054599523544312, "learning_rate": 2e-05, "loss": 0.04306837, "step": 15503 }, { "epoch": 31.008, "grad_norm": 1.1247222423553467, "learning_rate": 2e-05, "loss": 0.04493858, "step": 15504 }, { "epoch": 31.01, "grad_norm": 1.21216881275177, "learning_rate": 2e-05, "loss": 0.04912875, "step": 15505 }, { "epoch": 31.012, "grad_norm": 1.0440160036087036, "learning_rate": 2e-05, "loss": 0.03388001, "step": 15506 }, { "epoch": 31.014, "grad_norm": 2.1243808269500732, "learning_rate": 2e-05, "loss": 0.04466413, "step": 15507 }, { "epoch": 31.016, "grad_norm": 1.024837851524353, "learning_rate": 2e-05, "loss": 0.03612889, "step": 15508 }, { "epoch": 31.018, "grad_norm": 1.3261427879333496, "learning_rate": 2e-05, "loss": 0.04339204, "step": 15509 }, { "epoch": 31.02, "grad_norm": 1.1927242279052734, "learning_rate": 2e-05, "loss": 0.04285469, "step": 15510 }, { "epoch": 31.022, "grad_norm": 1.6944687366485596, "learning_rate": 2e-05, "loss": 0.06444372, "step": 15511 }, { "epoch": 31.024, "grad_norm": 1.8218998908996582, "learning_rate": 2e-05, "loss": 0.04665653, "step": 15512 }, { "epoch": 31.026, "grad_norm": 0.961631715297699, "learning_rate": 2e-05, "loss": 0.02488809, "step": 15513 }, { "epoch": 31.028, "grad_norm": 1.3068662881851196, "learning_rate": 2e-05, "loss": 0.04824544, "step": 15514 }, { "epoch": 31.03, "grad_norm": 1.7906019687652588, "learning_rate": 2e-05, "loss": 0.03604405, "step": 15515 }, { "epoch": 31.032, "grad_norm": 0.9942117929458618, "learning_rate": 2e-05, "loss": 0.04298211, "step": 15516 }, { "epoch": 31.034, "grad_norm": 1.2944165468215942, "learning_rate": 2e-05, "loss": 0.04983748, "step": 15517 }, { "epoch": 31.036, "grad_norm": 1.3310195207595825, "learning_rate": 2e-05, "loss": 0.04212454, "step": 15518 }, { "epoch": 31.038, "grad_norm": 1.0146511793136597, "learning_rate": 2e-05, "loss": 0.03470632, "step": 15519 }, { "epoch": 31.04, "grad_norm": 1.0683945417404175, "learning_rate": 2e-05, "loss": 0.03555411, "step": 15520 }, { "epoch": 31.042, "grad_norm": 0.9860395789146423, "learning_rate": 2e-05, "loss": 0.03534026, "step": 15521 }, { "epoch": 31.044, "grad_norm": 1.2321219444274902, "learning_rate": 2e-05, "loss": 0.04802042, "step": 15522 }, { "epoch": 31.046, "grad_norm": 0.9068757891654968, "learning_rate": 2e-05, "loss": 0.03739195, "step": 15523 }, { "epoch": 31.048, "grad_norm": 1.0807992219924927, "learning_rate": 2e-05, "loss": 0.03552995, "step": 15524 }, { "epoch": 31.05, "grad_norm": 1.2086913585662842, "learning_rate": 2e-05, "loss": 0.03402312, "step": 15525 }, { "epoch": 31.052, "grad_norm": 1.1114925146102905, "learning_rate": 2e-05, "loss": 0.04508002, "step": 15526 }, { "epoch": 31.054, "grad_norm": 1.053460717201233, "learning_rate": 2e-05, "loss": 0.03332749, "step": 15527 }, { "epoch": 31.056, "grad_norm": 1.5291039943695068, "learning_rate": 2e-05, "loss": 0.07005154, "step": 15528 }, { "epoch": 31.058, "grad_norm": 1.3969502449035645, "learning_rate": 2e-05, "loss": 0.03875972, "step": 15529 }, { "epoch": 31.06, "grad_norm": 1.258709192276001, "learning_rate": 2e-05, "loss": 0.0648275, "step": 15530 }, { "epoch": 31.062, "grad_norm": 2.1577484607696533, "learning_rate": 2e-05, "loss": 0.04727962, "step": 15531 }, { "epoch": 31.064, "grad_norm": 1.453369379043579, "learning_rate": 2e-05, "loss": 0.03450019, "step": 15532 }, { "epoch": 31.066, "grad_norm": 1.3698201179504395, "learning_rate": 2e-05, "loss": 0.0564566, "step": 15533 }, { "epoch": 31.068, "grad_norm": 1.4147790670394897, "learning_rate": 2e-05, "loss": 0.06183496, "step": 15534 }, { "epoch": 31.07, "grad_norm": 0.8963314294815063, "learning_rate": 2e-05, "loss": 0.0314716, "step": 15535 }, { "epoch": 31.072, "grad_norm": 1.7133666276931763, "learning_rate": 2e-05, "loss": 0.03604105, "step": 15536 }, { "epoch": 31.074, "grad_norm": 0.9274123907089233, "learning_rate": 2e-05, "loss": 0.03644059, "step": 15537 }, { "epoch": 31.076, "grad_norm": 0.9932688474655151, "learning_rate": 2e-05, "loss": 0.03320706, "step": 15538 }, { "epoch": 31.078, "grad_norm": 1.139175534248352, "learning_rate": 2e-05, "loss": 0.04096094, "step": 15539 }, { "epoch": 31.08, "grad_norm": 2.054170846939087, "learning_rate": 2e-05, "loss": 0.03290722, "step": 15540 }, { "epoch": 31.082, "grad_norm": 1.0771855115890503, "learning_rate": 2e-05, "loss": 0.04088487, "step": 15541 }, { "epoch": 31.084, "grad_norm": 4.079372406005859, "learning_rate": 2e-05, "loss": 0.06007933, "step": 15542 }, { "epoch": 31.086, "grad_norm": 4.406796932220459, "learning_rate": 2e-05, "loss": 0.0371773, "step": 15543 }, { "epoch": 31.088, "grad_norm": 1.232641339302063, "learning_rate": 2e-05, "loss": 0.04272714, "step": 15544 }, { "epoch": 31.09, "grad_norm": 1.339889407157898, "learning_rate": 2e-05, "loss": 0.03682033, "step": 15545 }, { "epoch": 31.092, "grad_norm": 1.6685773134231567, "learning_rate": 2e-05, "loss": 0.04497613, "step": 15546 }, { "epoch": 31.094, "grad_norm": 1.1808186769485474, "learning_rate": 2e-05, "loss": 0.04455563, "step": 15547 }, { "epoch": 31.096, "grad_norm": 1.0447876453399658, "learning_rate": 2e-05, "loss": 0.03678391, "step": 15548 }, { "epoch": 31.098, "grad_norm": 1.3275498151779175, "learning_rate": 2e-05, "loss": 0.05487559, "step": 15549 }, { "epoch": 31.1, "grad_norm": 1.0034887790679932, "learning_rate": 2e-05, "loss": 0.03993573, "step": 15550 }, { "epoch": 31.102, "grad_norm": 1.2984068393707275, "learning_rate": 2e-05, "loss": 0.03917623, "step": 15551 }, { "epoch": 31.104, "grad_norm": 1.0606168508529663, "learning_rate": 2e-05, "loss": 0.03269617, "step": 15552 }, { "epoch": 31.106, "grad_norm": 1.4763702154159546, "learning_rate": 2e-05, "loss": 0.05329809, "step": 15553 }, { "epoch": 31.108, "grad_norm": 1.3631935119628906, "learning_rate": 2e-05, "loss": 0.03446741, "step": 15554 }, { "epoch": 31.11, "grad_norm": 1.3570765256881714, "learning_rate": 2e-05, "loss": 0.04257222, "step": 15555 }, { "epoch": 31.112, "grad_norm": 1.1822189092636108, "learning_rate": 2e-05, "loss": 0.04207947, "step": 15556 }, { "epoch": 31.114, "grad_norm": 1.150335431098938, "learning_rate": 2e-05, "loss": 0.04551108, "step": 15557 }, { "epoch": 31.116, "grad_norm": 1.3339632749557495, "learning_rate": 2e-05, "loss": 0.05508456, "step": 15558 }, { "epoch": 31.118, "grad_norm": 2.0711183547973633, "learning_rate": 2e-05, "loss": 0.0530576, "step": 15559 }, { "epoch": 31.12, "grad_norm": 1.3315399885177612, "learning_rate": 2e-05, "loss": 0.04907047, "step": 15560 }, { "epoch": 31.122, "grad_norm": 1.204527497291565, "learning_rate": 2e-05, "loss": 0.04079658, "step": 15561 }, { "epoch": 31.124, "grad_norm": 2.65455961227417, "learning_rate": 2e-05, "loss": 0.05661328, "step": 15562 }, { "epoch": 31.126, "grad_norm": 1.3059120178222656, "learning_rate": 2e-05, "loss": 0.03965393, "step": 15563 }, { "epoch": 31.128, "grad_norm": 1.079121708869934, "learning_rate": 2e-05, "loss": 0.04260961, "step": 15564 }, { "epoch": 31.13, "grad_norm": 2.0246469974517822, "learning_rate": 2e-05, "loss": 0.03333722, "step": 15565 }, { "epoch": 31.132, "grad_norm": 1.0588722229003906, "learning_rate": 2e-05, "loss": 0.04730598, "step": 15566 }, { "epoch": 31.134, "grad_norm": 1.0964921712875366, "learning_rate": 2e-05, "loss": 0.0434188, "step": 15567 }, { "epoch": 31.136, "grad_norm": 1.1378031969070435, "learning_rate": 2e-05, "loss": 0.04260813, "step": 15568 }, { "epoch": 31.138, "grad_norm": 3.1608223915100098, "learning_rate": 2e-05, "loss": 0.0434106, "step": 15569 }, { "epoch": 31.14, "grad_norm": 1.2680219411849976, "learning_rate": 2e-05, "loss": 0.04677624, "step": 15570 }, { "epoch": 31.142, "grad_norm": 1.1608775854110718, "learning_rate": 2e-05, "loss": 0.04532572, "step": 15571 }, { "epoch": 31.144, "grad_norm": 1.3475708961486816, "learning_rate": 2e-05, "loss": 0.05460385, "step": 15572 }, { "epoch": 31.146, "grad_norm": 1.1137781143188477, "learning_rate": 2e-05, "loss": 0.04142278, "step": 15573 }, { "epoch": 31.148, "grad_norm": 1.082829475402832, "learning_rate": 2e-05, "loss": 0.04784383, "step": 15574 }, { "epoch": 31.15, "grad_norm": 1.2136229276657104, "learning_rate": 2e-05, "loss": 0.03027528, "step": 15575 }, { "epoch": 31.152, "grad_norm": 1.0264850854873657, "learning_rate": 2e-05, "loss": 0.04279248, "step": 15576 }, { "epoch": 31.154, "grad_norm": 1.5535433292388916, "learning_rate": 2e-05, "loss": 0.05268865, "step": 15577 }, { "epoch": 31.156, "grad_norm": 1.4060524702072144, "learning_rate": 2e-05, "loss": 0.0343322, "step": 15578 }, { "epoch": 31.158, "grad_norm": 0.6968619227409363, "learning_rate": 2e-05, "loss": 0.01679928, "step": 15579 }, { "epoch": 31.16, "grad_norm": 1.15040123462677, "learning_rate": 2e-05, "loss": 0.04167127, "step": 15580 }, { "epoch": 31.162, "grad_norm": 1.524133563041687, "learning_rate": 2e-05, "loss": 0.03878558, "step": 15581 }, { "epoch": 31.164, "grad_norm": 2.028996706008911, "learning_rate": 2e-05, "loss": 0.05743674, "step": 15582 }, { "epoch": 31.166, "grad_norm": 1.5730241537094116, "learning_rate": 2e-05, "loss": 0.04803181, "step": 15583 }, { "epoch": 31.168, "grad_norm": 1.2136539220809937, "learning_rate": 2e-05, "loss": 0.03908694, "step": 15584 }, { "epoch": 31.17, "grad_norm": 2.451467514038086, "learning_rate": 2e-05, "loss": 0.05021393, "step": 15585 }, { "epoch": 31.172, "grad_norm": 0.967592179775238, "learning_rate": 2e-05, "loss": 0.02962617, "step": 15586 }, { "epoch": 31.174, "grad_norm": 1.205581784248352, "learning_rate": 2e-05, "loss": 0.04359256, "step": 15587 }, { "epoch": 31.176, "grad_norm": 1.8534575700759888, "learning_rate": 2e-05, "loss": 0.05359968, "step": 15588 }, { "epoch": 31.178, "grad_norm": 1.4265109300613403, "learning_rate": 2e-05, "loss": 0.0480662, "step": 15589 }, { "epoch": 31.18, "grad_norm": 2.661834716796875, "learning_rate": 2e-05, "loss": 0.05001631, "step": 15590 }, { "epoch": 31.182, "grad_norm": 2.1135103702545166, "learning_rate": 2e-05, "loss": 0.03104893, "step": 15591 }, { "epoch": 31.184, "grad_norm": 1.0201138257980347, "learning_rate": 2e-05, "loss": 0.03780366, "step": 15592 }, { "epoch": 31.186, "grad_norm": 1.1135469675064087, "learning_rate": 2e-05, "loss": 0.03954279, "step": 15593 }, { "epoch": 31.188, "grad_norm": 1.0150232315063477, "learning_rate": 2e-05, "loss": 0.0309415, "step": 15594 }, { "epoch": 31.19, "grad_norm": 1.0710225105285645, "learning_rate": 2e-05, "loss": 0.03865776, "step": 15595 }, { "epoch": 31.192, "grad_norm": 1.226999044418335, "learning_rate": 2e-05, "loss": 0.04560003, "step": 15596 }, { "epoch": 31.194, "grad_norm": 1.3833179473876953, "learning_rate": 2e-05, "loss": 0.05527067, "step": 15597 }, { "epoch": 31.196, "grad_norm": 1.1716560125350952, "learning_rate": 2e-05, "loss": 0.03517221, "step": 15598 }, { "epoch": 31.198, "grad_norm": 1.7883951663970947, "learning_rate": 2e-05, "loss": 0.04532555, "step": 15599 }, { "epoch": 31.2, "grad_norm": 1.13947594165802, "learning_rate": 2e-05, "loss": 0.03804951, "step": 15600 }, { "epoch": 31.202, "grad_norm": 1.1460981369018555, "learning_rate": 2e-05, "loss": 0.04975809, "step": 15601 }, { "epoch": 31.204, "grad_norm": 2.1435766220092773, "learning_rate": 2e-05, "loss": 0.04334519, "step": 15602 }, { "epoch": 31.206, "grad_norm": 2.5278499126434326, "learning_rate": 2e-05, "loss": 0.04724212, "step": 15603 }, { "epoch": 31.208, "grad_norm": 3.0959975719451904, "learning_rate": 2e-05, "loss": 0.04795504, "step": 15604 }, { "epoch": 31.21, "grad_norm": 1.3681577444076538, "learning_rate": 2e-05, "loss": 0.04222552, "step": 15605 }, { "epoch": 31.212, "grad_norm": 2.0547592639923096, "learning_rate": 2e-05, "loss": 0.04761481, "step": 15606 }, { "epoch": 31.214, "grad_norm": 1.327380657196045, "learning_rate": 2e-05, "loss": 0.05030813, "step": 15607 }, { "epoch": 31.216, "grad_norm": 0.9326903223991394, "learning_rate": 2e-05, "loss": 0.02635142, "step": 15608 }, { "epoch": 31.218, "grad_norm": 2.4155571460723877, "learning_rate": 2e-05, "loss": 0.04080499, "step": 15609 }, { "epoch": 31.22, "grad_norm": 0.9260361194610596, "learning_rate": 2e-05, "loss": 0.03045768, "step": 15610 }, { "epoch": 31.222, "grad_norm": 1.0131243467330933, "learning_rate": 2e-05, "loss": 0.03150607, "step": 15611 }, { "epoch": 31.224, "grad_norm": 1.1739399433135986, "learning_rate": 2e-05, "loss": 0.03871925, "step": 15612 }, { "epoch": 31.226, "grad_norm": 1.072050929069519, "learning_rate": 2e-05, "loss": 0.04814139, "step": 15613 }, { "epoch": 31.228, "grad_norm": 2.805246591567993, "learning_rate": 2e-05, "loss": 0.0489997, "step": 15614 }, { "epoch": 31.23, "grad_norm": 2.114262580871582, "learning_rate": 2e-05, "loss": 0.06214041, "step": 15615 }, { "epoch": 31.232, "grad_norm": 1.275655746459961, "learning_rate": 2e-05, "loss": 0.03479639, "step": 15616 }, { "epoch": 31.234, "grad_norm": 1.3052101135253906, "learning_rate": 2e-05, "loss": 0.04745651, "step": 15617 }, { "epoch": 31.236, "grad_norm": 2.008206605911255, "learning_rate": 2e-05, "loss": 0.02935056, "step": 15618 }, { "epoch": 31.238, "grad_norm": 1.4074678421020508, "learning_rate": 2e-05, "loss": 0.05427198, "step": 15619 }, { "epoch": 31.24, "grad_norm": 1.0969489812850952, "learning_rate": 2e-05, "loss": 0.04732572, "step": 15620 }, { "epoch": 31.242, "grad_norm": 1.1318331956863403, "learning_rate": 2e-05, "loss": 0.04238062, "step": 15621 }, { "epoch": 31.244, "grad_norm": 1.1431554555892944, "learning_rate": 2e-05, "loss": 0.05024539, "step": 15622 }, { "epoch": 31.246, "grad_norm": 1.2575552463531494, "learning_rate": 2e-05, "loss": 0.03069566, "step": 15623 }, { "epoch": 31.248, "grad_norm": 1.2473465204238892, "learning_rate": 2e-05, "loss": 0.04475783, "step": 15624 }, { "epoch": 31.25, "grad_norm": 1.2875679731369019, "learning_rate": 2e-05, "loss": 0.03152344, "step": 15625 }, { "epoch": 31.252, "grad_norm": 1.624121069908142, "learning_rate": 2e-05, "loss": 0.06142828, "step": 15626 }, { "epoch": 31.254, "grad_norm": 1.1332637071609497, "learning_rate": 2e-05, "loss": 0.03317603, "step": 15627 }, { "epoch": 31.256, "grad_norm": 1.1676865816116333, "learning_rate": 2e-05, "loss": 0.04019779, "step": 15628 }, { "epoch": 31.258, "grad_norm": 2.188838005065918, "learning_rate": 2e-05, "loss": 0.03890157, "step": 15629 }, { "epoch": 31.26, "grad_norm": 1.019037127494812, "learning_rate": 2e-05, "loss": 0.03228922, "step": 15630 }, { "epoch": 31.262, "grad_norm": 1.123388409614563, "learning_rate": 2e-05, "loss": 0.03372766, "step": 15631 }, { "epoch": 31.264, "grad_norm": 3.509737491607666, "learning_rate": 2e-05, "loss": 0.04830921, "step": 15632 }, { "epoch": 31.266, "grad_norm": 1.3756506443023682, "learning_rate": 2e-05, "loss": 0.0438694, "step": 15633 }, { "epoch": 31.268, "grad_norm": 1.2744717597961426, "learning_rate": 2e-05, "loss": 0.03653997, "step": 15634 }, { "epoch": 31.27, "grad_norm": 1.5209195613861084, "learning_rate": 2e-05, "loss": 0.04338511, "step": 15635 }, { "epoch": 31.272, "grad_norm": 2.2282023429870605, "learning_rate": 2e-05, "loss": 0.04440949, "step": 15636 }, { "epoch": 31.274, "grad_norm": 1.1879875659942627, "learning_rate": 2e-05, "loss": 0.02991764, "step": 15637 }, { "epoch": 31.276, "grad_norm": 0.8613758683204651, "learning_rate": 2e-05, "loss": 0.02830381, "step": 15638 }, { "epoch": 31.278, "grad_norm": 1.0136206150054932, "learning_rate": 2e-05, "loss": 0.03036819, "step": 15639 }, { "epoch": 31.28, "grad_norm": 1.2615082263946533, "learning_rate": 2e-05, "loss": 0.04580786, "step": 15640 }, { "epoch": 31.282, "grad_norm": 1.1112679243087769, "learning_rate": 2e-05, "loss": 0.04442069, "step": 15641 }, { "epoch": 31.284, "grad_norm": 1.0508791208267212, "learning_rate": 2e-05, "loss": 0.04491913, "step": 15642 }, { "epoch": 31.286, "grad_norm": 1.1440688371658325, "learning_rate": 2e-05, "loss": 0.04976808, "step": 15643 }, { "epoch": 31.288, "grad_norm": 1.3392294645309448, "learning_rate": 2e-05, "loss": 0.04487741, "step": 15644 }, { "epoch": 31.29, "grad_norm": 1.3118019104003906, "learning_rate": 2e-05, "loss": 0.04272094, "step": 15645 }, { "epoch": 31.292, "grad_norm": 1.3110008239746094, "learning_rate": 2e-05, "loss": 0.04961982, "step": 15646 }, { "epoch": 31.294, "grad_norm": 1.3569951057434082, "learning_rate": 2e-05, "loss": 0.05010619, "step": 15647 }, { "epoch": 31.296, "grad_norm": 1.1815541982650757, "learning_rate": 2e-05, "loss": 0.03432131, "step": 15648 }, { "epoch": 31.298, "grad_norm": 1.3439204692840576, "learning_rate": 2e-05, "loss": 0.06246966, "step": 15649 }, { "epoch": 31.3, "grad_norm": 1.0219476222991943, "learning_rate": 2e-05, "loss": 0.02712579, "step": 15650 }, { "epoch": 31.302, "grad_norm": 0.9462128281593323, "learning_rate": 2e-05, "loss": 0.03200362, "step": 15651 }, { "epoch": 31.304, "grad_norm": 1.5896122455596924, "learning_rate": 2e-05, "loss": 0.05370201, "step": 15652 }, { "epoch": 31.306, "grad_norm": 2.8889029026031494, "learning_rate": 2e-05, "loss": 0.03914107, "step": 15653 }, { "epoch": 31.308, "grad_norm": 1.5959099531173706, "learning_rate": 2e-05, "loss": 0.05540708, "step": 15654 }, { "epoch": 31.31, "grad_norm": 1.9818872213363647, "learning_rate": 2e-05, "loss": 0.05058894, "step": 15655 }, { "epoch": 31.312, "grad_norm": 1.4036006927490234, "learning_rate": 2e-05, "loss": 0.05569137, "step": 15656 }, { "epoch": 31.314, "grad_norm": 1.2897917032241821, "learning_rate": 2e-05, "loss": 0.04362039, "step": 15657 }, { "epoch": 31.316, "grad_norm": 1.1039726734161377, "learning_rate": 2e-05, "loss": 0.03560334, "step": 15658 }, { "epoch": 31.318, "grad_norm": 1.2764880657196045, "learning_rate": 2e-05, "loss": 0.04904519, "step": 15659 }, { "epoch": 31.32, "grad_norm": 2.2581369876861572, "learning_rate": 2e-05, "loss": 0.06880252, "step": 15660 }, { "epoch": 31.322, "grad_norm": 1.424851655960083, "learning_rate": 2e-05, "loss": 0.03886406, "step": 15661 }, { "epoch": 31.324, "grad_norm": 0.9566469192504883, "learning_rate": 2e-05, "loss": 0.03768406, "step": 15662 }, { "epoch": 31.326, "grad_norm": 1.1624170541763306, "learning_rate": 2e-05, "loss": 0.04413099, "step": 15663 }, { "epoch": 31.328, "grad_norm": 1.2362680435180664, "learning_rate": 2e-05, "loss": 0.03142476, "step": 15664 }, { "epoch": 31.33, "grad_norm": 1.912945032119751, "learning_rate": 2e-05, "loss": 0.05409247, "step": 15665 }, { "epoch": 31.332, "grad_norm": 2.0025062561035156, "learning_rate": 2e-05, "loss": 0.05454734, "step": 15666 }, { "epoch": 31.334, "grad_norm": 1.087705373764038, "learning_rate": 2e-05, "loss": 0.04066493, "step": 15667 }, { "epoch": 31.336, "grad_norm": 1.9127389192581177, "learning_rate": 2e-05, "loss": 0.05669509, "step": 15668 }, { "epoch": 31.338, "grad_norm": 1.421940803527832, "learning_rate": 2e-05, "loss": 0.05013736, "step": 15669 }, { "epoch": 31.34, "grad_norm": 2.1149110794067383, "learning_rate": 2e-05, "loss": 0.05951951, "step": 15670 }, { "epoch": 31.342, "grad_norm": 2.23716402053833, "learning_rate": 2e-05, "loss": 0.04989032, "step": 15671 }, { "epoch": 31.344, "grad_norm": 1.1148024797439575, "learning_rate": 2e-05, "loss": 0.03459584, "step": 15672 }, { "epoch": 31.346, "grad_norm": 1.1864360570907593, "learning_rate": 2e-05, "loss": 0.04163406, "step": 15673 }, { "epoch": 31.348, "grad_norm": 1.1121900081634521, "learning_rate": 2e-05, "loss": 0.04536529, "step": 15674 }, { "epoch": 31.35, "grad_norm": 1.37013840675354, "learning_rate": 2e-05, "loss": 0.03631928, "step": 15675 }, { "epoch": 31.352, "grad_norm": 1.0761029720306396, "learning_rate": 2e-05, "loss": 0.04334534, "step": 15676 }, { "epoch": 31.354, "grad_norm": 1.2131975889205933, "learning_rate": 2e-05, "loss": 0.03891152, "step": 15677 }, { "epoch": 31.356, "grad_norm": 1.1239383220672607, "learning_rate": 2e-05, "loss": 0.03443652, "step": 15678 }, { "epoch": 31.358, "grad_norm": 1.4421390295028687, "learning_rate": 2e-05, "loss": 0.03504834, "step": 15679 }, { "epoch": 31.36, "grad_norm": 1.6474403142929077, "learning_rate": 2e-05, "loss": 0.06349871, "step": 15680 }, { "epoch": 31.362, "grad_norm": 1.2030842304229736, "learning_rate": 2e-05, "loss": 0.03427202, "step": 15681 }, { "epoch": 31.364, "grad_norm": 1.1239585876464844, "learning_rate": 2e-05, "loss": 0.03592511, "step": 15682 }, { "epoch": 31.366, "grad_norm": 1.0693938732147217, "learning_rate": 2e-05, "loss": 0.03550804, "step": 15683 }, { "epoch": 31.368, "grad_norm": 1.2990278005599976, "learning_rate": 2e-05, "loss": 0.04546881, "step": 15684 }, { "epoch": 31.37, "grad_norm": 1.4115841388702393, "learning_rate": 2e-05, "loss": 0.04322497, "step": 15685 }, { "epoch": 31.372, "grad_norm": 1.1592683792114258, "learning_rate": 2e-05, "loss": 0.04757714, "step": 15686 }, { "epoch": 31.374, "grad_norm": 1.0848994255065918, "learning_rate": 2e-05, "loss": 0.04322045, "step": 15687 }, { "epoch": 31.376, "grad_norm": 1.319326639175415, "learning_rate": 2e-05, "loss": 0.04263577, "step": 15688 }, { "epoch": 31.378, "grad_norm": 1.180602788925171, "learning_rate": 2e-05, "loss": 0.04362357, "step": 15689 }, { "epoch": 31.38, "grad_norm": 1.1107655763626099, "learning_rate": 2e-05, "loss": 0.05218142, "step": 15690 }, { "epoch": 31.382, "grad_norm": 1.0870996713638306, "learning_rate": 2e-05, "loss": 0.03446791, "step": 15691 }, { "epoch": 31.384, "grad_norm": 1.0287660360336304, "learning_rate": 2e-05, "loss": 0.03068786, "step": 15692 }, { "epoch": 31.386, "grad_norm": 1.2678953409194946, "learning_rate": 2e-05, "loss": 0.02981563, "step": 15693 }, { "epoch": 31.388, "grad_norm": 1.2370007038116455, "learning_rate": 2e-05, "loss": 0.04931886, "step": 15694 }, { "epoch": 31.39, "grad_norm": 1.0795314311981201, "learning_rate": 2e-05, "loss": 0.03978341, "step": 15695 }, { "epoch": 31.392, "grad_norm": 1.0488916635513306, "learning_rate": 2e-05, "loss": 0.04341751, "step": 15696 }, { "epoch": 31.394, "grad_norm": 1.9883416891098022, "learning_rate": 2e-05, "loss": 0.04387866, "step": 15697 }, { "epoch": 31.396, "grad_norm": 1.0542610883712769, "learning_rate": 2e-05, "loss": 0.04586036, "step": 15698 }, { "epoch": 31.398, "grad_norm": 1.5984288454055786, "learning_rate": 2e-05, "loss": 0.04890535, "step": 15699 }, { "epoch": 31.4, "grad_norm": 1.2893099784851074, "learning_rate": 2e-05, "loss": 0.0567109, "step": 15700 }, { "epoch": 31.402, "grad_norm": 1.1580631732940674, "learning_rate": 2e-05, "loss": 0.04688855, "step": 15701 }, { "epoch": 31.404, "grad_norm": 1.5734524726867676, "learning_rate": 2e-05, "loss": 0.06079507, "step": 15702 }, { "epoch": 31.406, "grad_norm": 1.2681607007980347, "learning_rate": 2e-05, "loss": 0.0585427, "step": 15703 }, { "epoch": 31.408, "grad_norm": 1.4479280710220337, "learning_rate": 2e-05, "loss": 0.0500034, "step": 15704 }, { "epoch": 31.41, "grad_norm": 1.5085829496383667, "learning_rate": 2e-05, "loss": 0.04053114, "step": 15705 }, { "epoch": 31.412, "grad_norm": 1.2420114278793335, "learning_rate": 2e-05, "loss": 0.0312696, "step": 15706 }, { "epoch": 31.414, "grad_norm": 1.4845657348632812, "learning_rate": 2e-05, "loss": 0.04520275, "step": 15707 }, { "epoch": 31.416, "grad_norm": 1.0234241485595703, "learning_rate": 2e-05, "loss": 0.03995465, "step": 15708 }, { "epoch": 31.418, "grad_norm": 0.9769031405448914, "learning_rate": 2e-05, "loss": 0.03126371, "step": 15709 }, { "epoch": 31.42, "grad_norm": 1.2428449392318726, "learning_rate": 2e-05, "loss": 0.04003659, "step": 15710 }, { "epoch": 31.422, "grad_norm": 1.1226387023925781, "learning_rate": 2e-05, "loss": 0.04901841, "step": 15711 }, { "epoch": 31.424, "grad_norm": 0.9852843284606934, "learning_rate": 2e-05, "loss": 0.03026243, "step": 15712 }, { "epoch": 31.426, "grad_norm": 1.0646196603775024, "learning_rate": 2e-05, "loss": 0.03818695, "step": 15713 }, { "epoch": 31.428, "grad_norm": 1.188592553138733, "learning_rate": 2e-05, "loss": 0.03416793, "step": 15714 }, { "epoch": 31.43, "grad_norm": 1.1710172891616821, "learning_rate": 2e-05, "loss": 0.03767353, "step": 15715 }, { "epoch": 31.432, "grad_norm": 1.2032878398895264, "learning_rate": 2e-05, "loss": 0.04838914, "step": 15716 }, { "epoch": 31.434, "grad_norm": 1.038709282875061, "learning_rate": 2e-05, "loss": 0.03334371, "step": 15717 }, { "epoch": 31.436, "grad_norm": 1.0788524150848389, "learning_rate": 2e-05, "loss": 0.03386791, "step": 15718 }, { "epoch": 31.438, "grad_norm": 1.163702368736267, "learning_rate": 2e-05, "loss": 0.0470679, "step": 15719 }, { "epoch": 31.44, "grad_norm": 1.1703226566314697, "learning_rate": 2e-05, "loss": 0.04177292, "step": 15720 }, { "epoch": 31.442, "grad_norm": 2.1952016353607178, "learning_rate": 2e-05, "loss": 0.03891946, "step": 15721 }, { "epoch": 31.444, "grad_norm": 1.4332395792007446, "learning_rate": 2e-05, "loss": 0.05187111, "step": 15722 }, { "epoch": 31.446, "grad_norm": 1.305293083190918, "learning_rate": 2e-05, "loss": 0.04548291, "step": 15723 }, { "epoch": 31.448, "grad_norm": 1.0815423727035522, "learning_rate": 2e-05, "loss": 0.04857568, "step": 15724 }, { "epoch": 31.45, "grad_norm": 1.2314715385437012, "learning_rate": 2e-05, "loss": 0.03415262, "step": 15725 }, { "epoch": 31.452, "grad_norm": 1.8589730262756348, "learning_rate": 2e-05, "loss": 0.04466228, "step": 15726 }, { "epoch": 31.454, "grad_norm": 1.4462237358093262, "learning_rate": 2e-05, "loss": 0.05154398, "step": 15727 }, { "epoch": 31.456, "grad_norm": 1.0429617166519165, "learning_rate": 2e-05, "loss": 0.03432951, "step": 15728 }, { "epoch": 31.458, "grad_norm": 1.2360962629318237, "learning_rate": 2e-05, "loss": 0.04059312, "step": 15729 }, { "epoch": 31.46, "grad_norm": 1.1760252714157104, "learning_rate": 2e-05, "loss": 0.05076623, "step": 15730 }, { "epoch": 31.462, "grad_norm": 1.2189456224441528, "learning_rate": 2e-05, "loss": 0.05206157, "step": 15731 }, { "epoch": 31.464, "grad_norm": 1.028828740119934, "learning_rate": 2e-05, "loss": 0.02622063, "step": 15732 }, { "epoch": 31.466, "grad_norm": 1.3809655904769897, "learning_rate": 2e-05, "loss": 0.04626153, "step": 15733 }, { "epoch": 31.468, "grad_norm": 1.0601438283920288, "learning_rate": 2e-05, "loss": 0.03184626, "step": 15734 }, { "epoch": 31.47, "grad_norm": 1.7758816480636597, "learning_rate": 2e-05, "loss": 0.04071598, "step": 15735 }, { "epoch": 31.472, "grad_norm": 1.2127716541290283, "learning_rate": 2e-05, "loss": 0.0404386, "step": 15736 }, { "epoch": 31.474, "grad_norm": 1.1199473142623901, "learning_rate": 2e-05, "loss": 0.03128079, "step": 15737 }, { "epoch": 31.476, "grad_norm": 0.9239538908004761, "learning_rate": 2e-05, "loss": 0.0214905, "step": 15738 }, { "epoch": 31.478, "grad_norm": 1.0289355516433716, "learning_rate": 2e-05, "loss": 0.03084357, "step": 15739 }, { "epoch": 31.48, "grad_norm": 1.7557429075241089, "learning_rate": 2e-05, "loss": 0.04279177, "step": 15740 }, { "epoch": 31.482, "grad_norm": 2.431920289993286, "learning_rate": 2e-05, "loss": 0.04216561, "step": 15741 }, { "epoch": 31.484, "grad_norm": 1.3500877618789673, "learning_rate": 2e-05, "loss": 0.04621994, "step": 15742 }, { "epoch": 31.486, "grad_norm": 1.0111196041107178, "learning_rate": 2e-05, "loss": 0.02924443, "step": 15743 }, { "epoch": 31.488, "grad_norm": 1.5376261472702026, "learning_rate": 2e-05, "loss": 0.05262484, "step": 15744 }, { "epoch": 31.49, "grad_norm": 1.3182742595672607, "learning_rate": 2e-05, "loss": 0.0479708, "step": 15745 }, { "epoch": 31.492, "grad_norm": 1.2966049909591675, "learning_rate": 2e-05, "loss": 0.03965274, "step": 15746 }, { "epoch": 31.494, "grad_norm": 1.1283825635910034, "learning_rate": 2e-05, "loss": 0.0380425, "step": 15747 }, { "epoch": 31.496, "grad_norm": 1.2638022899627686, "learning_rate": 2e-05, "loss": 0.0417271, "step": 15748 }, { "epoch": 31.498, "grad_norm": 1.2083308696746826, "learning_rate": 2e-05, "loss": 0.04929405, "step": 15749 }, { "epoch": 31.5, "grad_norm": 1.5783114433288574, "learning_rate": 2e-05, "loss": 0.04968805, "step": 15750 }, { "epoch": 31.502, "grad_norm": 2.3429791927337646, "learning_rate": 2e-05, "loss": 0.0579337, "step": 15751 }, { "epoch": 31.504, "grad_norm": 1.2848727703094482, "learning_rate": 2e-05, "loss": 0.03835206, "step": 15752 }, { "epoch": 31.506, "grad_norm": 1.3516781330108643, "learning_rate": 2e-05, "loss": 0.04133689, "step": 15753 }, { "epoch": 31.508, "grad_norm": 1.0922443866729736, "learning_rate": 2e-05, "loss": 0.03277455, "step": 15754 }, { "epoch": 31.51, "grad_norm": 1.2325778007507324, "learning_rate": 2e-05, "loss": 0.04386805, "step": 15755 }, { "epoch": 31.512, "grad_norm": 1.190375566482544, "learning_rate": 2e-05, "loss": 0.04679071, "step": 15756 }, { "epoch": 31.514, "grad_norm": 1.1857788562774658, "learning_rate": 2e-05, "loss": 0.04052466, "step": 15757 }, { "epoch": 31.516, "grad_norm": 1.4863660335540771, "learning_rate": 2e-05, "loss": 0.04734954, "step": 15758 }, { "epoch": 31.518, "grad_norm": 0.9761635661125183, "learning_rate": 2e-05, "loss": 0.04507826, "step": 15759 }, { "epoch": 31.52, "grad_norm": 3.303678274154663, "learning_rate": 2e-05, "loss": 0.04152103, "step": 15760 }, { "epoch": 31.522, "grad_norm": 1.3020141124725342, "learning_rate": 2e-05, "loss": 0.03344776, "step": 15761 }, { "epoch": 31.524, "grad_norm": 1.2744166851043701, "learning_rate": 2e-05, "loss": 0.03905339, "step": 15762 }, { "epoch": 31.526, "grad_norm": 1.3360401391983032, "learning_rate": 2e-05, "loss": 0.05134521, "step": 15763 }, { "epoch": 31.528, "grad_norm": 1.5633838176727295, "learning_rate": 2e-05, "loss": 0.04629926, "step": 15764 }, { "epoch": 31.53, "grad_norm": 1.144903540611267, "learning_rate": 2e-05, "loss": 0.04337474, "step": 15765 }, { "epoch": 31.532, "grad_norm": 1.549691081047058, "learning_rate": 2e-05, "loss": 0.02903229, "step": 15766 }, { "epoch": 31.534, "grad_norm": 1.6548807621002197, "learning_rate": 2e-05, "loss": 0.03670297, "step": 15767 }, { "epoch": 31.536, "grad_norm": 1.8595404624938965, "learning_rate": 2e-05, "loss": 0.04848412, "step": 15768 }, { "epoch": 31.538, "grad_norm": 0.9308550953865051, "learning_rate": 2e-05, "loss": 0.01970943, "step": 15769 }, { "epoch": 31.54, "grad_norm": 1.6548179388046265, "learning_rate": 2e-05, "loss": 0.05121253, "step": 15770 }, { "epoch": 31.542, "grad_norm": 1.1826523542404175, "learning_rate": 2e-05, "loss": 0.03165106, "step": 15771 }, { "epoch": 31.544, "grad_norm": 2.287088394165039, "learning_rate": 2e-05, "loss": 0.05559523, "step": 15772 }, { "epoch": 31.546, "grad_norm": 1.249127984046936, "learning_rate": 2e-05, "loss": 0.05151629, "step": 15773 }, { "epoch": 31.548000000000002, "grad_norm": 2.0954861640930176, "learning_rate": 2e-05, "loss": 0.05453818, "step": 15774 }, { "epoch": 31.55, "grad_norm": 1.1801806688308716, "learning_rate": 2e-05, "loss": 0.04041165, "step": 15775 }, { "epoch": 31.552, "grad_norm": 1.1119970083236694, "learning_rate": 2e-05, "loss": 0.0460862, "step": 15776 }, { "epoch": 31.554, "grad_norm": 2.1345860958099365, "learning_rate": 2e-05, "loss": 0.04225209, "step": 15777 }, { "epoch": 31.556, "grad_norm": 1.1420177221298218, "learning_rate": 2e-05, "loss": 0.03541413, "step": 15778 }, { "epoch": 31.558, "grad_norm": 1.9255207777023315, "learning_rate": 2e-05, "loss": 0.05924919, "step": 15779 }, { "epoch": 31.56, "grad_norm": 3.971301317214966, "learning_rate": 2e-05, "loss": 0.05418454, "step": 15780 }, { "epoch": 31.562, "grad_norm": 1.384949803352356, "learning_rate": 2e-05, "loss": 0.03826708, "step": 15781 }, { "epoch": 31.564, "grad_norm": 1.1261556148529053, "learning_rate": 2e-05, "loss": 0.04444794, "step": 15782 }, { "epoch": 31.566, "grad_norm": 1.696993112564087, "learning_rate": 2e-05, "loss": 0.03568056, "step": 15783 }, { "epoch": 31.568, "grad_norm": 1.7849308252334595, "learning_rate": 2e-05, "loss": 0.03161497, "step": 15784 }, { "epoch": 31.57, "grad_norm": 2.218967914581299, "learning_rate": 2e-05, "loss": 0.04789735, "step": 15785 }, { "epoch": 31.572, "grad_norm": 1.280608892440796, "learning_rate": 2e-05, "loss": 0.04609762, "step": 15786 }, { "epoch": 31.574, "grad_norm": 1.1164113283157349, "learning_rate": 2e-05, "loss": 0.03556387, "step": 15787 }, { "epoch": 31.576, "grad_norm": 1.5488619804382324, "learning_rate": 2e-05, "loss": 0.06319657, "step": 15788 }, { "epoch": 31.578, "grad_norm": 1.0593352317810059, "learning_rate": 2e-05, "loss": 0.0368279, "step": 15789 }, { "epoch": 31.58, "grad_norm": 1.139189600944519, "learning_rate": 2e-05, "loss": 0.03742014, "step": 15790 }, { "epoch": 31.582, "grad_norm": 1.1916528940200806, "learning_rate": 2e-05, "loss": 0.05514635, "step": 15791 }, { "epoch": 31.584, "grad_norm": 2.922518491744995, "learning_rate": 2e-05, "loss": 0.06346483, "step": 15792 }, { "epoch": 31.586, "grad_norm": 1.1857612133026123, "learning_rate": 2e-05, "loss": 0.04470282, "step": 15793 }, { "epoch": 31.588, "grad_norm": 1.5709048509597778, "learning_rate": 2e-05, "loss": 0.05266313, "step": 15794 }, { "epoch": 31.59, "grad_norm": 1.2784346342086792, "learning_rate": 2e-05, "loss": 0.02789311, "step": 15795 }, { "epoch": 31.592, "grad_norm": 1.4792814254760742, "learning_rate": 2e-05, "loss": 0.06371169, "step": 15796 }, { "epoch": 31.594, "grad_norm": 1.1451568603515625, "learning_rate": 2e-05, "loss": 0.03802731, "step": 15797 }, { "epoch": 31.596, "grad_norm": 1.5499827861785889, "learning_rate": 2e-05, "loss": 0.05359296, "step": 15798 }, { "epoch": 31.598, "grad_norm": 1.11916983127594, "learning_rate": 2e-05, "loss": 0.05549221, "step": 15799 }, { "epoch": 31.6, "grad_norm": 1.0967146158218384, "learning_rate": 2e-05, "loss": 0.03953513, "step": 15800 }, { "epoch": 31.602, "grad_norm": 1.4866015911102295, "learning_rate": 2e-05, "loss": 0.0373404, "step": 15801 }, { "epoch": 31.604, "grad_norm": 1.3071479797363281, "learning_rate": 2e-05, "loss": 0.03140751, "step": 15802 }, { "epoch": 31.606, "grad_norm": 1.8923872709274292, "learning_rate": 2e-05, "loss": 0.03232081, "step": 15803 }, { "epoch": 31.608, "grad_norm": 1.0222185850143433, "learning_rate": 2e-05, "loss": 0.03265133, "step": 15804 }, { "epoch": 31.61, "grad_norm": 1.0511828660964966, "learning_rate": 2e-05, "loss": 0.02982992, "step": 15805 }, { "epoch": 31.612, "grad_norm": 1.0626296997070312, "learning_rate": 2e-05, "loss": 0.03582488, "step": 15806 }, { "epoch": 31.614, "grad_norm": 1.239322304725647, "learning_rate": 2e-05, "loss": 0.04029377, "step": 15807 }, { "epoch": 31.616, "grad_norm": 1.2532638311386108, "learning_rate": 2e-05, "loss": 0.04826434, "step": 15808 }, { "epoch": 31.618, "grad_norm": 0.907904326915741, "learning_rate": 2e-05, "loss": 0.032974, "step": 15809 }, { "epoch": 31.62, "grad_norm": 1.3583287000656128, "learning_rate": 2e-05, "loss": 0.05751316, "step": 15810 }, { "epoch": 31.622, "grad_norm": 0.8747350573539734, "learning_rate": 2e-05, "loss": 0.02985252, "step": 15811 }, { "epoch": 31.624, "grad_norm": 1.2014013528823853, "learning_rate": 2e-05, "loss": 0.0308294, "step": 15812 }, { "epoch": 31.626, "grad_norm": 1.563045620918274, "learning_rate": 2e-05, "loss": 0.0466816, "step": 15813 }, { "epoch": 31.628, "grad_norm": 1.5340209007263184, "learning_rate": 2e-05, "loss": 0.04322568, "step": 15814 }, { "epoch": 31.63, "grad_norm": 1.1069697141647339, "learning_rate": 2e-05, "loss": 0.02941058, "step": 15815 }, { "epoch": 31.632, "grad_norm": 1.6199052333831787, "learning_rate": 2e-05, "loss": 0.06167778, "step": 15816 }, { "epoch": 31.634, "grad_norm": 0.9924496412277222, "learning_rate": 2e-05, "loss": 0.02608846, "step": 15817 }, { "epoch": 31.636, "grad_norm": 1.212367296218872, "learning_rate": 2e-05, "loss": 0.05580743, "step": 15818 }, { "epoch": 31.638, "grad_norm": 1.232508897781372, "learning_rate": 2e-05, "loss": 0.04425735, "step": 15819 }, { "epoch": 31.64, "grad_norm": 2.6619043350219727, "learning_rate": 2e-05, "loss": 0.06107927, "step": 15820 }, { "epoch": 31.642, "grad_norm": 1.5662249326705933, "learning_rate": 2e-05, "loss": 0.04231949, "step": 15821 }, { "epoch": 31.644, "grad_norm": 1.066655158996582, "learning_rate": 2e-05, "loss": 0.03843967, "step": 15822 }, { "epoch": 31.646, "grad_norm": 1.9721606969833374, "learning_rate": 2e-05, "loss": 0.05530218, "step": 15823 }, { "epoch": 31.648, "grad_norm": 1.093684196472168, "learning_rate": 2e-05, "loss": 0.04849446, "step": 15824 }, { "epoch": 31.65, "grad_norm": 1.434188961982727, "learning_rate": 2e-05, "loss": 0.05032559, "step": 15825 }, { "epoch": 31.652, "grad_norm": 1.0273107290267944, "learning_rate": 2e-05, "loss": 0.03886499, "step": 15826 }, { "epoch": 31.654, "grad_norm": 1.0134077072143555, "learning_rate": 2e-05, "loss": 0.0312471, "step": 15827 }, { "epoch": 31.656, "grad_norm": 1.69369637966156, "learning_rate": 2e-05, "loss": 0.05736078, "step": 15828 }, { "epoch": 31.658, "grad_norm": 3.8166019916534424, "learning_rate": 2e-05, "loss": 0.05662319, "step": 15829 }, { "epoch": 31.66, "grad_norm": 1.4313768148422241, "learning_rate": 2e-05, "loss": 0.04016527, "step": 15830 }, { "epoch": 31.662, "grad_norm": 1.6341408491134644, "learning_rate": 2e-05, "loss": 0.05576465, "step": 15831 }, { "epoch": 31.664, "grad_norm": 1.6464592218399048, "learning_rate": 2e-05, "loss": 0.05058682, "step": 15832 }, { "epoch": 31.666, "grad_norm": 1.6304293870925903, "learning_rate": 2e-05, "loss": 0.04562239, "step": 15833 }, { "epoch": 31.668, "grad_norm": 1.1134459972381592, "learning_rate": 2e-05, "loss": 0.0356195, "step": 15834 }, { "epoch": 31.67, "grad_norm": 0.9937390685081482, "learning_rate": 2e-05, "loss": 0.02685843, "step": 15835 }, { "epoch": 31.672, "grad_norm": 1.066178321838379, "learning_rate": 2e-05, "loss": 0.03931656, "step": 15836 }, { "epoch": 31.674, "grad_norm": 1.0088249444961548, "learning_rate": 2e-05, "loss": 0.04021916, "step": 15837 }, { "epoch": 31.676, "grad_norm": 1.028534173965454, "learning_rate": 2e-05, "loss": 0.05189967, "step": 15838 }, { "epoch": 31.678, "grad_norm": 4.298219203948975, "learning_rate": 2e-05, "loss": 0.03603112, "step": 15839 }, { "epoch": 31.68, "grad_norm": 1.1019927263259888, "learning_rate": 2e-05, "loss": 0.03563758, "step": 15840 }, { "epoch": 31.682, "grad_norm": 1.016705870628357, "learning_rate": 2e-05, "loss": 0.03160737, "step": 15841 }, { "epoch": 31.684, "grad_norm": 1.2760132551193237, "learning_rate": 2e-05, "loss": 0.0512272, "step": 15842 }, { "epoch": 31.686, "grad_norm": 1.0675709247589111, "learning_rate": 2e-05, "loss": 0.03803336, "step": 15843 }, { "epoch": 31.688, "grad_norm": 1.0932615995407104, "learning_rate": 2e-05, "loss": 0.03163125, "step": 15844 }, { "epoch": 31.69, "grad_norm": 1.4388149976730347, "learning_rate": 2e-05, "loss": 0.0546472, "step": 15845 }, { "epoch": 31.692, "grad_norm": 1.247949242591858, "learning_rate": 2e-05, "loss": 0.04308487, "step": 15846 }, { "epoch": 31.694, "grad_norm": 1.0364240407943726, "learning_rate": 2e-05, "loss": 0.04179894, "step": 15847 }, { "epoch": 31.696, "grad_norm": 1.3593528270721436, "learning_rate": 2e-05, "loss": 0.03463472, "step": 15848 }, { "epoch": 31.698, "grad_norm": 3.9153060913085938, "learning_rate": 2e-05, "loss": 0.0593623, "step": 15849 }, { "epoch": 31.7, "grad_norm": 1.224266529083252, "learning_rate": 2e-05, "loss": 0.06305296, "step": 15850 }, { "epoch": 31.701999999999998, "grad_norm": 1.8977373838424683, "learning_rate": 2e-05, "loss": 0.04459673, "step": 15851 }, { "epoch": 31.704, "grad_norm": 1.1238889694213867, "learning_rate": 2e-05, "loss": 0.03320381, "step": 15852 }, { "epoch": 31.706, "grad_norm": 1.8452239036560059, "learning_rate": 2e-05, "loss": 0.03556684, "step": 15853 }, { "epoch": 31.708, "grad_norm": 1.469277024269104, "learning_rate": 2e-05, "loss": 0.05553747, "step": 15854 }, { "epoch": 31.71, "grad_norm": 1.2231842279434204, "learning_rate": 2e-05, "loss": 0.04826688, "step": 15855 }, { "epoch": 31.712, "grad_norm": 0.8785945177078247, "learning_rate": 2e-05, "loss": 0.03162833, "step": 15856 }, { "epoch": 31.714, "grad_norm": 1.2260061502456665, "learning_rate": 2e-05, "loss": 0.04555026, "step": 15857 }, { "epoch": 31.716, "grad_norm": 1.4658849239349365, "learning_rate": 2e-05, "loss": 0.04575991, "step": 15858 }, { "epoch": 31.718, "grad_norm": 1.1947789192199707, "learning_rate": 2e-05, "loss": 0.03831368, "step": 15859 }, { "epoch": 31.72, "grad_norm": 2.118058443069458, "learning_rate": 2e-05, "loss": 0.05042726, "step": 15860 }, { "epoch": 31.722, "grad_norm": 0.9737758040428162, "learning_rate": 2e-05, "loss": 0.03438409, "step": 15861 }, { "epoch": 31.724, "grad_norm": 1.606362223625183, "learning_rate": 2e-05, "loss": 0.03443389, "step": 15862 }, { "epoch": 31.726, "grad_norm": 1.9236388206481934, "learning_rate": 2e-05, "loss": 0.04065321, "step": 15863 }, { "epoch": 31.728, "grad_norm": 1.1777132749557495, "learning_rate": 2e-05, "loss": 0.04068302, "step": 15864 }, { "epoch": 31.73, "grad_norm": 1.356837272644043, "learning_rate": 2e-05, "loss": 0.04885522, "step": 15865 }, { "epoch": 31.732, "grad_norm": 1.158024787902832, "learning_rate": 2e-05, "loss": 0.03375251, "step": 15866 }, { "epoch": 31.734, "grad_norm": 1.4615778923034668, "learning_rate": 2e-05, "loss": 0.03782218, "step": 15867 }, { "epoch": 31.736, "grad_norm": 1.936156153678894, "learning_rate": 2e-05, "loss": 0.05020786, "step": 15868 }, { "epoch": 31.738, "grad_norm": 1.1056143045425415, "learning_rate": 2e-05, "loss": 0.03084729, "step": 15869 }, { "epoch": 31.74, "grad_norm": 1.153363823890686, "learning_rate": 2e-05, "loss": 0.04279421, "step": 15870 }, { "epoch": 31.742, "grad_norm": 2.0926640033721924, "learning_rate": 2e-05, "loss": 0.05298992, "step": 15871 }, { "epoch": 31.744, "grad_norm": 0.9748860597610474, "learning_rate": 2e-05, "loss": 0.02891666, "step": 15872 }, { "epoch": 31.746, "grad_norm": 2.135936737060547, "learning_rate": 2e-05, "loss": 0.04711745, "step": 15873 }, { "epoch": 31.748, "grad_norm": 1.9730417728424072, "learning_rate": 2e-05, "loss": 0.0426465, "step": 15874 }, { "epoch": 31.75, "grad_norm": 2.211915969848633, "learning_rate": 2e-05, "loss": 0.0576695, "step": 15875 }, { "epoch": 31.752, "grad_norm": 1.4895312786102295, "learning_rate": 2e-05, "loss": 0.03817018, "step": 15876 }, { "epoch": 31.754, "grad_norm": 1.1091728210449219, "learning_rate": 2e-05, "loss": 0.03473945, "step": 15877 }, { "epoch": 31.756, "grad_norm": 1.5757970809936523, "learning_rate": 2e-05, "loss": 0.05376294, "step": 15878 }, { "epoch": 31.758, "grad_norm": 1.4097827672958374, "learning_rate": 2e-05, "loss": 0.0575452, "step": 15879 }, { "epoch": 31.76, "grad_norm": 1.601607084274292, "learning_rate": 2e-05, "loss": 0.03805475, "step": 15880 }, { "epoch": 31.762, "grad_norm": 1.084592580795288, "learning_rate": 2e-05, "loss": 0.03560298, "step": 15881 }, { "epoch": 31.764, "grad_norm": 1.3301918506622314, "learning_rate": 2e-05, "loss": 0.05340309, "step": 15882 }, { "epoch": 31.766, "grad_norm": 1.21677565574646, "learning_rate": 2e-05, "loss": 0.02901489, "step": 15883 }, { "epoch": 31.768, "grad_norm": 1.5509135723114014, "learning_rate": 2e-05, "loss": 0.0422479, "step": 15884 }, { "epoch": 31.77, "grad_norm": 2.1377058029174805, "learning_rate": 2e-05, "loss": 0.03911637, "step": 15885 }, { "epoch": 31.772, "grad_norm": 0.8442297577857971, "learning_rate": 2e-05, "loss": 0.02588265, "step": 15886 }, { "epoch": 31.774, "grad_norm": 1.5694454908370972, "learning_rate": 2e-05, "loss": 0.04644613, "step": 15887 }, { "epoch": 31.776, "grad_norm": 1.1110693216323853, "learning_rate": 2e-05, "loss": 0.03558012, "step": 15888 }, { "epoch": 31.778, "grad_norm": 1.0614808797836304, "learning_rate": 2e-05, "loss": 0.03117304, "step": 15889 }, { "epoch": 31.78, "grad_norm": 1.6479281187057495, "learning_rate": 2e-05, "loss": 0.03382275, "step": 15890 }, { "epoch": 31.782, "grad_norm": 3.055795192718506, "learning_rate": 2e-05, "loss": 0.02290535, "step": 15891 }, { "epoch": 31.784, "grad_norm": 1.0296449661254883, "learning_rate": 2e-05, "loss": 0.04224445, "step": 15892 }, { "epoch": 31.786, "grad_norm": 0.8810853958129883, "learning_rate": 2e-05, "loss": 0.03229618, "step": 15893 }, { "epoch": 31.788, "grad_norm": 2.7027084827423096, "learning_rate": 2e-05, "loss": 0.05507965, "step": 15894 }, { "epoch": 31.79, "grad_norm": 1.3966178894042969, "learning_rate": 2e-05, "loss": 0.03760487, "step": 15895 }, { "epoch": 31.792, "grad_norm": 1.139143705368042, "learning_rate": 2e-05, "loss": 0.04034952, "step": 15896 }, { "epoch": 31.794, "grad_norm": 1.1100138425827026, "learning_rate": 2e-05, "loss": 0.03606164, "step": 15897 }, { "epoch": 31.796, "grad_norm": 1.0225300788879395, "learning_rate": 2e-05, "loss": 0.03408796, "step": 15898 }, { "epoch": 31.798000000000002, "grad_norm": 1.2668366432189941, "learning_rate": 2e-05, "loss": 0.05540526, "step": 15899 }, { "epoch": 31.8, "grad_norm": 1.2877216339111328, "learning_rate": 2e-05, "loss": 0.05548783, "step": 15900 }, { "epoch": 31.802, "grad_norm": 1.4362618923187256, "learning_rate": 2e-05, "loss": 0.0343156, "step": 15901 }, { "epoch": 31.804, "grad_norm": 1.0215818881988525, "learning_rate": 2e-05, "loss": 0.03574908, "step": 15902 }, { "epoch": 31.806, "grad_norm": 1.2409071922302246, "learning_rate": 2e-05, "loss": 0.04538013, "step": 15903 }, { "epoch": 31.808, "grad_norm": 1.2912746667861938, "learning_rate": 2e-05, "loss": 0.0512158, "step": 15904 }, { "epoch": 31.81, "grad_norm": 2.350969076156616, "learning_rate": 2e-05, "loss": 0.0492909, "step": 15905 }, { "epoch": 31.812, "grad_norm": 1.299422264099121, "learning_rate": 2e-05, "loss": 0.05618146, "step": 15906 }, { "epoch": 31.814, "grad_norm": 1.3270517587661743, "learning_rate": 2e-05, "loss": 0.03777631, "step": 15907 }, { "epoch": 31.816, "grad_norm": 2.0297582149505615, "learning_rate": 2e-05, "loss": 0.05447558, "step": 15908 }, { "epoch": 31.818, "grad_norm": 1.204034686088562, "learning_rate": 2e-05, "loss": 0.04712773, "step": 15909 }, { "epoch": 31.82, "grad_norm": 1.8368512392044067, "learning_rate": 2e-05, "loss": 0.03878936, "step": 15910 }, { "epoch": 31.822, "grad_norm": 1.3600157499313354, "learning_rate": 2e-05, "loss": 0.04160419, "step": 15911 }, { "epoch": 31.824, "grad_norm": 0.9613247513771057, "learning_rate": 2e-05, "loss": 0.03210657, "step": 15912 }, { "epoch": 31.826, "grad_norm": 1.2313897609710693, "learning_rate": 2e-05, "loss": 0.0382159, "step": 15913 }, { "epoch": 31.828, "grad_norm": 1.0835821628570557, "learning_rate": 2e-05, "loss": 0.04776543, "step": 15914 }, { "epoch": 31.83, "grad_norm": 1.1532397270202637, "learning_rate": 2e-05, "loss": 0.02791233, "step": 15915 }, { "epoch": 31.832, "grad_norm": 1.6072558164596558, "learning_rate": 2e-05, "loss": 0.04267748, "step": 15916 }, { "epoch": 31.834, "grad_norm": 1.2094991207122803, "learning_rate": 2e-05, "loss": 0.0439063, "step": 15917 }, { "epoch": 31.836, "grad_norm": 1.9390913248062134, "learning_rate": 2e-05, "loss": 0.06620531, "step": 15918 }, { "epoch": 31.838, "grad_norm": 2.274019479751587, "learning_rate": 2e-05, "loss": 0.04259209, "step": 15919 }, { "epoch": 31.84, "grad_norm": 1.9428718090057373, "learning_rate": 2e-05, "loss": 0.04785924, "step": 15920 }, { "epoch": 31.842, "grad_norm": 1.422423243522644, "learning_rate": 2e-05, "loss": 0.04497388, "step": 15921 }, { "epoch": 31.844, "grad_norm": 1.053911805152893, "learning_rate": 2e-05, "loss": 0.03690197, "step": 15922 }, { "epoch": 31.846, "grad_norm": 3.13558030128479, "learning_rate": 2e-05, "loss": 0.05500776, "step": 15923 }, { "epoch": 31.848, "grad_norm": 1.275652527809143, "learning_rate": 2e-05, "loss": 0.04231162, "step": 15924 }, { "epoch": 31.85, "grad_norm": 1.9165621995925903, "learning_rate": 2e-05, "loss": 0.0441243, "step": 15925 }, { "epoch": 31.852, "grad_norm": 2.099443197250366, "learning_rate": 2e-05, "loss": 0.04373053, "step": 15926 }, { "epoch": 31.854, "grad_norm": 1.221465826034546, "learning_rate": 2e-05, "loss": 0.04799281, "step": 15927 }, { "epoch": 31.856, "grad_norm": 1.7947920560836792, "learning_rate": 2e-05, "loss": 0.04117496, "step": 15928 }, { "epoch": 31.858, "grad_norm": 1.142938256263733, "learning_rate": 2e-05, "loss": 0.03635032, "step": 15929 }, { "epoch": 31.86, "grad_norm": 1.0332616567611694, "learning_rate": 2e-05, "loss": 0.04042405, "step": 15930 }, { "epoch": 31.862, "grad_norm": 1.3157538175582886, "learning_rate": 2e-05, "loss": 0.06223164, "step": 15931 }, { "epoch": 31.864, "grad_norm": 1.207903265953064, "learning_rate": 2e-05, "loss": 0.05030291, "step": 15932 }, { "epoch": 31.866, "grad_norm": 1.8377586603164673, "learning_rate": 2e-05, "loss": 0.05019252, "step": 15933 }, { "epoch": 31.868, "grad_norm": 0.9512478709220886, "learning_rate": 2e-05, "loss": 0.03300637, "step": 15934 }, { "epoch": 31.87, "grad_norm": 1.3328098058700562, "learning_rate": 2e-05, "loss": 0.04929174, "step": 15935 }, { "epoch": 31.872, "grad_norm": 1.0890222787857056, "learning_rate": 2e-05, "loss": 0.04026979, "step": 15936 }, { "epoch": 31.874, "grad_norm": 0.9871217012405396, "learning_rate": 2e-05, "loss": 0.03689782, "step": 15937 }, { "epoch": 31.876, "grad_norm": 1.0311745405197144, "learning_rate": 2e-05, "loss": 0.03890628, "step": 15938 }, { "epoch": 31.878, "grad_norm": 1.252621054649353, "learning_rate": 2e-05, "loss": 0.05022133, "step": 15939 }, { "epoch": 31.88, "grad_norm": 0.8780187964439392, "learning_rate": 2e-05, "loss": 0.02962154, "step": 15940 }, { "epoch": 31.882, "grad_norm": 1.3588930368423462, "learning_rate": 2e-05, "loss": 0.04873728, "step": 15941 }, { "epoch": 31.884, "grad_norm": 1.4300963878631592, "learning_rate": 2e-05, "loss": 0.05331673, "step": 15942 }, { "epoch": 31.886, "grad_norm": 1.7596718072891235, "learning_rate": 2e-05, "loss": 0.05031583, "step": 15943 }, { "epoch": 31.888, "grad_norm": 1.7224795818328857, "learning_rate": 2e-05, "loss": 0.05422819, "step": 15944 }, { "epoch": 31.89, "grad_norm": 1.7030835151672363, "learning_rate": 2e-05, "loss": 0.05407435, "step": 15945 }, { "epoch": 31.892, "grad_norm": 0.9744284749031067, "learning_rate": 2e-05, "loss": 0.03288863, "step": 15946 }, { "epoch": 31.894, "grad_norm": 1.9340544939041138, "learning_rate": 2e-05, "loss": 0.04774361, "step": 15947 }, { "epoch": 31.896, "grad_norm": 1.3208523988723755, "learning_rate": 2e-05, "loss": 0.02824499, "step": 15948 }, { "epoch": 31.898, "grad_norm": 1.0506129264831543, "learning_rate": 2e-05, "loss": 0.03759496, "step": 15949 }, { "epoch": 31.9, "grad_norm": 1.000072717666626, "learning_rate": 2e-05, "loss": 0.03729441, "step": 15950 }, { "epoch": 31.902, "grad_norm": 1.8239920139312744, "learning_rate": 2e-05, "loss": 0.0503429, "step": 15951 }, { "epoch": 31.904, "grad_norm": 0.9123238921165466, "learning_rate": 2e-05, "loss": 0.04094257, "step": 15952 }, { "epoch": 31.906, "grad_norm": 2.360563278198242, "learning_rate": 2e-05, "loss": 0.04889352, "step": 15953 }, { "epoch": 31.908, "grad_norm": 1.2828713655471802, "learning_rate": 2e-05, "loss": 0.04768791, "step": 15954 }, { "epoch": 31.91, "grad_norm": 1.2418571710586548, "learning_rate": 2e-05, "loss": 0.04087563, "step": 15955 }, { "epoch": 31.912, "grad_norm": 2.2912075519561768, "learning_rate": 2e-05, "loss": 0.05685142, "step": 15956 }, { "epoch": 31.914, "grad_norm": 0.9731912612915039, "learning_rate": 2e-05, "loss": 0.03105514, "step": 15957 }, { "epoch": 31.916, "grad_norm": 1.03424072265625, "learning_rate": 2e-05, "loss": 0.0377789, "step": 15958 }, { "epoch": 31.918, "grad_norm": 1.2611950635910034, "learning_rate": 2e-05, "loss": 0.04463454, "step": 15959 }, { "epoch": 31.92, "grad_norm": 1.9684666395187378, "learning_rate": 2e-05, "loss": 0.05295739, "step": 15960 }, { "epoch": 31.922, "grad_norm": 1.3407577276229858, "learning_rate": 2e-05, "loss": 0.04236686, "step": 15961 }, { "epoch": 31.924, "grad_norm": 1.2318034172058105, "learning_rate": 2e-05, "loss": 0.03647027, "step": 15962 }, { "epoch": 31.926, "grad_norm": 1.745802402496338, "learning_rate": 2e-05, "loss": 0.04532122, "step": 15963 }, { "epoch": 31.928, "grad_norm": 1.087468147277832, "learning_rate": 2e-05, "loss": 0.04168069, "step": 15964 }, { "epoch": 31.93, "grad_norm": 2.0053272247314453, "learning_rate": 2e-05, "loss": 0.03822828, "step": 15965 }, { "epoch": 31.932, "grad_norm": 1.0389565229415894, "learning_rate": 2e-05, "loss": 0.03328662, "step": 15966 }, { "epoch": 31.934, "grad_norm": 1.394301414489746, "learning_rate": 2e-05, "loss": 0.05171059, "step": 15967 }, { "epoch": 31.936, "grad_norm": 1.4605191946029663, "learning_rate": 2e-05, "loss": 0.0633463, "step": 15968 }, { "epoch": 31.938, "grad_norm": 1.1026382446289062, "learning_rate": 2e-05, "loss": 0.04084253, "step": 15969 }, { "epoch": 31.94, "grad_norm": 1.5898246765136719, "learning_rate": 2e-05, "loss": 0.06907326, "step": 15970 }, { "epoch": 31.942, "grad_norm": 1.0612562894821167, "learning_rate": 2e-05, "loss": 0.03758101, "step": 15971 }, { "epoch": 31.944, "grad_norm": 1.5062930583953857, "learning_rate": 2e-05, "loss": 0.03778771, "step": 15972 }, { "epoch": 31.946, "grad_norm": 1.8301286697387695, "learning_rate": 2e-05, "loss": 0.03591305, "step": 15973 }, { "epoch": 31.948, "grad_norm": 1.2562285661697388, "learning_rate": 2e-05, "loss": 0.03053888, "step": 15974 }, { "epoch": 31.95, "grad_norm": 1.2209889888763428, "learning_rate": 2e-05, "loss": 0.05101029, "step": 15975 }, { "epoch": 31.951999999999998, "grad_norm": 1.7810455560684204, "learning_rate": 2e-05, "loss": 0.04453781, "step": 15976 }, { "epoch": 31.954, "grad_norm": 1.6434965133666992, "learning_rate": 2e-05, "loss": 0.0371579, "step": 15977 }, { "epoch": 31.956, "grad_norm": 1.207597255706787, "learning_rate": 2e-05, "loss": 0.03318027, "step": 15978 }, { "epoch": 31.958, "grad_norm": 1.3328630924224854, "learning_rate": 2e-05, "loss": 0.05412392, "step": 15979 }, { "epoch": 31.96, "grad_norm": 1.0309226512908936, "learning_rate": 2e-05, "loss": 0.03783122, "step": 15980 }, { "epoch": 31.962, "grad_norm": 1.042348861694336, "learning_rate": 2e-05, "loss": 0.03998961, "step": 15981 }, { "epoch": 31.964, "grad_norm": 1.2534490823745728, "learning_rate": 2e-05, "loss": 0.03557429, "step": 15982 }, { "epoch": 31.966, "grad_norm": 2.982203722000122, "learning_rate": 2e-05, "loss": 0.06171292, "step": 15983 }, { "epoch": 31.968, "grad_norm": 1.2755846977233887, "learning_rate": 2e-05, "loss": 0.03844975, "step": 15984 }, { "epoch": 31.97, "grad_norm": 1.0132505893707275, "learning_rate": 2e-05, "loss": 0.03931327, "step": 15985 }, { "epoch": 31.972, "grad_norm": 1.2777570486068726, "learning_rate": 2e-05, "loss": 0.04398004, "step": 15986 }, { "epoch": 31.974, "grad_norm": 1.2927149534225464, "learning_rate": 2e-05, "loss": 0.05497951, "step": 15987 }, { "epoch": 31.976, "grad_norm": 0.8288196921348572, "learning_rate": 2e-05, "loss": 0.02825504, "step": 15988 }, { "epoch": 31.978, "grad_norm": 1.1850969791412354, "learning_rate": 2e-05, "loss": 0.03954066, "step": 15989 }, { "epoch": 31.98, "grad_norm": 0.8509149551391602, "learning_rate": 2e-05, "loss": 0.02893381, "step": 15990 }, { "epoch": 31.982, "grad_norm": 1.2214093208312988, "learning_rate": 2e-05, "loss": 0.04563006, "step": 15991 }, { "epoch": 31.984, "grad_norm": 1.3428093194961548, "learning_rate": 2e-05, "loss": 0.05709552, "step": 15992 }, { "epoch": 31.986, "grad_norm": 1.2619773149490356, "learning_rate": 2e-05, "loss": 0.04833028, "step": 15993 }, { "epoch": 31.988, "grad_norm": 1.4030076265335083, "learning_rate": 2e-05, "loss": 0.04587878, "step": 15994 }, { "epoch": 31.99, "grad_norm": 1.113511085510254, "learning_rate": 2e-05, "loss": 0.03980121, "step": 15995 }, { "epoch": 31.992, "grad_norm": 1.087285041809082, "learning_rate": 2e-05, "loss": 0.04423406, "step": 15996 }, { "epoch": 31.994, "grad_norm": 1.2081905603408813, "learning_rate": 2e-05, "loss": 0.03930626, "step": 15997 }, { "epoch": 31.996, "grad_norm": 0.9925308227539062, "learning_rate": 2e-05, "loss": 0.03966329, "step": 15998 }, { "epoch": 31.998, "grad_norm": 1.5078144073486328, "learning_rate": 2e-05, "loss": 0.06151321, "step": 15999 }, { "epoch": 32.0, "grad_norm": 1.641327142715454, "learning_rate": 2e-05, "loss": 0.05479438, "step": 16000 }, { "epoch": 32.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.998, "Equal_2": 0.9780439121756487, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.99, "Perpendicular_1": 0.996, "Perpendicular_2": 0.982, "Perpendicular_3": 0.8767535070140281, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9956666666666667, "PointLiesOnCircle_3": 0.9876, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9920159680638723 }, "eval_runtime": 320.2305, "eval_samples_per_second": 32.789, "eval_steps_per_second": 0.656, "step": 16000 }, { "epoch": 32.002, "grad_norm": 1.0825588703155518, "learning_rate": 2e-05, "loss": 0.04043002, "step": 16001 }, { "epoch": 32.004, "grad_norm": 1.116982102394104, "learning_rate": 2e-05, "loss": 0.0345992, "step": 16002 }, { "epoch": 32.006, "grad_norm": 1.4669443368911743, "learning_rate": 2e-05, "loss": 0.05243903, "step": 16003 }, { "epoch": 32.008, "grad_norm": 1.504705548286438, "learning_rate": 2e-05, "loss": 0.04028647, "step": 16004 }, { "epoch": 32.01, "grad_norm": 1.2776727676391602, "learning_rate": 2e-05, "loss": 0.03870967, "step": 16005 }, { "epoch": 32.012, "grad_norm": 2.4159090518951416, "learning_rate": 2e-05, "loss": 0.04641645, "step": 16006 }, { "epoch": 32.014, "grad_norm": 1.1506953239440918, "learning_rate": 2e-05, "loss": 0.03702936, "step": 16007 }, { "epoch": 32.016, "grad_norm": 1.2919611930847168, "learning_rate": 2e-05, "loss": 0.05337862, "step": 16008 }, { "epoch": 32.018, "grad_norm": 0.9426947832107544, "learning_rate": 2e-05, "loss": 0.02725463, "step": 16009 }, { "epoch": 32.02, "grad_norm": 1.00312077999115, "learning_rate": 2e-05, "loss": 0.04260667, "step": 16010 }, { "epoch": 32.022, "grad_norm": 1.1375038623809814, "learning_rate": 2e-05, "loss": 0.03781604, "step": 16011 }, { "epoch": 32.024, "grad_norm": 1.3393661975860596, "learning_rate": 2e-05, "loss": 0.04303484, "step": 16012 }, { "epoch": 32.026, "grad_norm": 1.1762498617172241, "learning_rate": 2e-05, "loss": 0.04548337, "step": 16013 }, { "epoch": 32.028, "grad_norm": 1.3804402351379395, "learning_rate": 2e-05, "loss": 0.0544548, "step": 16014 }, { "epoch": 32.03, "grad_norm": 1.5973312854766846, "learning_rate": 2e-05, "loss": 0.0446333, "step": 16015 }, { "epoch": 32.032, "grad_norm": 1.1906533241271973, "learning_rate": 2e-05, "loss": 0.0428127, "step": 16016 }, { "epoch": 32.034, "grad_norm": 1.367288589477539, "learning_rate": 2e-05, "loss": 0.04731005, "step": 16017 }, { "epoch": 32.036, "grad_norm": 1.3971643447875977, "learning_rate": 2e-05, "loss": 0.062555, "step": 16018 }, { "epoch": 32.038, "grad_norm": 1.3580145835876465, "learning_rate": 2e-05, "loss": 0.04744005, "step": 16019 }, { "epoch": 32.04, "grad_norm": 1.9001810550689697, "learning_rate": 2e-05, "loss": 0.03246989, "step": 16020 }, { "epoch": 32.042, "grad_norm": 1.1311832666397095, "learning_rate": 2e-05, "loss": 0.04354143, "step": 16021 }, { "epoch": 32.044, "grad_norm": 1.2986774444580078, "learning_rate": 2e-05, "loss": 0.04035509, "step": 16022 }, { "epoch": 32.046, "grad_norm": 1.015791654586792, "learning_rate": 2e-05, "loss": 0.03449727, "step": 16023 }, { "epoch": 32.048, "grad_norm": 1.2315489053726196, "learning_rate": 2e-05, "loss": 0.04751674, "step": 16024 }, { "epoch": 32.05, "grad_norm": 1.6999454498291016, "learning_rate": 2e-05, "loss": 0.04061841, "step": 16025 }, { "epoch": 32.052, "grad_norm": 1.1011244058609009, "learning_rate": 2e-05, "loss": 0.04402358, "step": 16026 }, { "epoch": 32.054, "grad_norm": 0.9123277068138123, "learning_rate": 2e-05, "loss": 0.03008776, "step": 16027 }, { "epoch": 32.056, "grad_norm": 1.5880179405212402, "learning_rate": 2e-05, "loss": 0.05666232, "step": 16028 }, { "epoch": 32.058, "grad_norm": 1.4386612176895142, "learning_rate": 2e-05, "loss": 0.04437473, "step": 16029 }, { "epoch": 32.06, "grad_norm": 1.1378834247589111, "learning_rate": 2e-05, "loss": 0.04078399, "step": 16030 }, { "epoch": 32.062, "grad_norm": 1.1456459760665894, "learning_rate": 2e-05, "loss": 0.05264393, "step": 16031 }, { "epoch": 32.064, "grad_norm": 1.2216919660568237, "learning_rate": 2e-05, "loss": 0.04822063, "step": 16032 }, { "epoch": 32.066, "grad_norm": 1.7245386838912964, "learning_rate": 2e-05, "loss": 0.05039418, "step": 16033 }, { "epoch": 32.068, "grad_norm": 1.1855204105377197, "learning_rate": 2e-05, "loss": 0.02989644, "step": 16034 }, { "epoch": 32.07, "grad_norm": 1.0187755823135376, "learning_rate": 2e-05, "loss": 0.03046339, "step": 16035 }, { "epoch": 32.072, "grad_norm": 1.0775607824325562, "learning_rate": 2e-05, "loss": 0.05659611, "step": 16036 }, { "epoch": 32.074, "grad_norm": 1.234232783317566, "learning_rate": 2e-05, "loss": 0.0332603, "step": 16037 }, { "epoch": 32.076, "grad_norm": 1.037575364112854, "learning_rate": 2e-05, "loss": 0.03690245, "step": 16038 }, { "epoch": 32.078, "grad_norm": 1.1662631034851074, "learning_rate": 2e-05, "loss": 0.04369701, "step": 16039 }, { "epoch": 32.08, "grad_norm": 1.172985553741455, "learning_rate": 2e-05, "loss": 0.03649969, "step": 16040 }, { "epoch": 32.082, "grad_norm": 1.399276614189148, "learning_rate": 2e-05, "loss": 0.04782386, "step": 16041 }, { "epoch": 32.084, "grad_norm": 1.089415431022644, "learning_rate": 2e-05, "loss": 0.0528977, "step": 16042 }, { "epoch": 32.086, "grad_norm": 1.217584252357483, "learning_rate": 2e-05, "loss": 0.04556941, "step": 16043 }, { "epoch": 32.088, "grad_norm": 1.1931408643722534, "learning_rate": 2e-05, "loss": 0.04738794, "step": 16044 }, { "epoch": 32.09, "grad_norm": 1.0571660995483398, "learning_rate": 2e-05, "loss": 0.03878347, "step": 16045 }, { "epoch": 32.092, "grad_norm": 1.4781181812286377, "learning_rate": 2e-05, "loss": 0.04194687, "step": 16046 }, { "epoch": 32.094, "grad_norm": 4.250499248504639, "learning_rate": 2e-05, "loss": 0.03575663, "step": 16047 }, { "epoch": 32.096, "grad_norm": 2.5705862045288086, "learning_rate": 2e-05, "loss": 0.04239442, "step": 16048 }, { "epoch": 32.098, "grad_norm": 0.9903239607810974, "learning_rate": 2e-05, "loss": 0.04511203, "step": 16049 }, { "epoch": 32.1, "grad_norm": 1.8635953664779663, "learning_rate": 2e-05, "loss": 0.05268551, "step": 16050 }, { "epoch": 32.102, "grad_norm": 2.417490005493164, "learning_rate": 2e-05, "loss": 0.0534266, "step": 16051 }, { "epoch": 32.104, "grad_norm": 1.4200366735458374, "learning_rate": 2e-05, "loss": 0.05390621, "step": 16052 }, { "epoch": 32.106, "grad_norm": 1.796728491783142, "learning_rate": 2e-05, "loss": 0.05886037, "step": 16053 }, { "epoch": 32.108, "grad_norm": 1.967190146446228, "learning_rate": 2e-05, "loss": 0.04227813, "step": 16054 }, { "epoch": 32.11, "grad_norm": 1.7734386920928955, "learning_rate": 2e-05, "loss": 0.03885096, "step": 16055 }, { "epoch": 32.112, "grad_norm": 1.0239137411117554, "learning_rate": 2e-05, "loss": 0.03256997, "step": 16056 }, { "epoch": 32.114, "grad_norm": 1.1545599699020386, "learning_rate": 2e-05, "loss": 0.04887954, "step": 16057 }, { "epoch": 32.116, "grad_norm": 2.557694673538208, "learning_rate": 2e-05, "loss": 0.04597983, "step": 16058 }, { "epoch": 32.118, "grad_norm": 2.367849826812744, "learning_rate": 2e-05, "loss": 0.07052769, "step": 16059 }, { "epoch": 32.12, "grad_norm": 1.2989052534103394, "learning_rate": 2e-05, "loss": 0.05320881, "step": 16060 }, { "epoch": 32.122, "grad_norm": 1.6988813877105713, "learning_rate": 2e-05, "loss": 0.05019864, "step": 16061 }, { "epoch": 32.124, "grad_norm": 1.2246428728103638, "learning_rate": 2e-05, "loss": 0.03273758, "step": 16062 }, { "epoch": 32.126, "grad_norm": 0.8918790221214294, "learning_rate": 2e-05, "loss": 0.02186277, "step": 16063 }, { "epoch": 32.128, "grad_norm": 0.9243330359458923, "learning_rate": 2e-05, "loss": 0.02869822, "step": 16064 }, { "epoch": 32.13, "grad_norm": 0.9982576370239258, "learning_rate": 2e-05, "loss": 0.03740531, "step": 16065 }, { "epoch": 32.132, "grad_norm": 1.0650466680526733, "learning_rate": 2e-05, "loss": 0.03073342, "step": 16066 }, { "epoch": 32.134, "grad_norm": 2.1163761615753174, "learning_rate": 2e-05, "loss": 0.03885228, "step": 16067 }, { "epoch": 32.136, "grad_norm": 0.9434223771095276, "learning_rate": 2e-05, "loss": 0.03199326, "step": 16068 }, { "epoch": 32.138, "grad_norm": 0.8779048919677734, "learning_rate": 2e-05, "loss": 0.02758794, "step": 16069 }, { "epoch": 32.14, "grad_norm": 1.155031442642212, "learning_rate": 2e-05, "loss": 0.04074682, "step": 16070 }, { "epoch": 32.142, "grad_norm": 1.140641450881958, "learning_rate": 2e-05, "loss": 0.04276387, "step": 16071 }, { "epoch": 32.144, "grad_norm": 0.932158887386322, "learning_rate": 2e-05, "loss": 0.02548703, "step": 16072 }, { "epoch": 32.146, "grad_norm": 1.3973239660263062, "learning_rate": 2e-05, "loss": 0.05437903, "step": 16073 }, { "epoch": 32.148, "grad_norm": 1.164240837097168, "learning_rate": 2e-05, "loss": 0.03863488, "step": 16074 }, { "epoch": 32.15, "grad_norm": 1.162251591682434, "learning_rate": 2e-05, "loss": 0.05012304, "step": 16075 }, { "epoch": 32.152, "grad_norm": 1.5069862604141235, "learning_rate": 2e-05, "loss": 0.04389899, "step": 16076 }, { "epoch": 32.154, "grad_norm": 1.4252487421035767, "learning_rate": 2e-05, "loss": 0.04979219, "step": 16077 }, { "epoch": 32.156, "grad_norm": 1.2503894567489624, "learning_rate": 2e-05, "loss": 0.04105882, "step": 16078 }, { "epoch": 32.158, "grad_norm": 1.1806787252426147, "learning_rate": 2e-05, "loss": 0.04621601, "step": 16079 }, { "epoch": 32.16, "grad_norm": 1.0560612678527832, "learning_rate": 2e-05, "loss": 0.03700335, "step": 16080 }, { "epoch": 32.162, "grad_norm": 1.3735400438308716, "learning_rate": 2e-05, "loss": 0.04111753, "step": 16081 }, { "epoch": 32.164, "grad_norm": 1.053409457206726, "learning_rate": 2e-05, "loss": 0.0357594, "step": 16082 }, { "epoch": 32.166, "grad_norm": 2.5766215324401855, "learning_rate": 2e-05, "loss": 0.05252399, "step": 16083 }, { "epoch": 32.168, "grad_norm": 1.9363195896148682, "learning_rate": 2e-05, "loss": 0.05619735, "step": 16084 }, { "epoch": 32.17, "grad_norm": 0.9756081104278564, "learning_rate": 2e-05, "loss": 0.03354892, "step": 16085 }, { "epoch": 32.172, "grad_norm": 0.9970953464508057, "learning_rate": 2e-05, "loss": 0.03117803, "step": 16086 }, { "epoch": 32.174, "grad_norm": 1.3192036151885986, "learning_rate": 2e-05, "loss": 0.04156514, "step": 16087 }, { "epoch": 32.176, "grad_norm": 1.3369141817092896, "learning_rate": 2e-05, "loss": 0.04897435, "step": 16088 }, { "epoch": 32.178, "grad_norm": 1.4819189310073853, "learning_rate": 2e-05, "loss": 0.04706377, "step": 16089 }, { "epoch": 32.18, "grad_norm": 1.1336413621902466, "learning_rate": 2e-05, "loss": 0.03167891, "step": 16090 }, { "epoch": 32.182, "grad_norm": 1.1428834199905396, "learning_rate": 2e-05, "loss": 0.02547898, "step": 16091 }, { "epoch": 32.184, "grad_norm": 1.2145164012908936, "learning_rate": 2e-05, "loss": 0.03935249, "step": 16092 }, { "epoch": 32.186, "grad_norm": 1.4651552438735962, "learning_rate": 2e-05, "loss": 0.03130937, "step": 16093 }, { "epoch": 32.188, "grad_norm": 1.480681300163269, "learning_rate": 2e-05, "loss": 0.03741454, "step": 16094 }, { "epoch": 32.19, "grad_norm": 2.146885633468628, "learning_rate": 2e-05, "loss": 0.05036818, "step": 16095 }, { "epoch": 32.192, "grad_norm": 1.1545674800872803, "learning_rate": 2e-05, "loss": 0.03278602, "step": 16096 }, { "epoch": 32.194, "grad_norm": 0.9393602013587952, "learning_rate": 2e-05, "loss": 0.03584001, "step": 16097 }, { "epoch": 32.196, "grad_norm": 1.2177561521530151, "learning_rate": 2e-05, "loss": 0.03207495, "step": 16098 }, { "epoch": 32.198, "grad_norm": 1.0664947032928467, "learning_rate": 2e-05, "loss": 0.03970977, "step": 16099 }, { "epoch": 32.2, "grad_norm": 1.6877775192260742, "learning_rate": 2e-05, "loss": 0.04805655, "step": 16100 }, { "epoch": 32.202, "grad_norm": 1.9446738958358765, "learning_rate": 2e-05, "loss": 0.06819682, "step": 16101 }, { "epoch": 32.204, "grad_norm": 1.219976782798767, "learning_rate": 2e-05, "loss": 0.04143453, "step": 16102 }, { "epoch": 32.206, "grad_norm": 1.203502893447876, "learning_rate": 2e-05, "loss": 0.03769158, "step": 16103 }, { "epoch": 32.208, "grad_norm": 1.0326348543167114, "learning_rate": 2e-05, "loss": 0.02815155, "step": 16104 }, { "epoch": 32.21, "grad_norm": 0.9759824872016907, "learning_rate": 2e-05, "loss": 0.03706375, "step": 16105 }, { "epoch": 32.212, "grad_norm": 1.9715107679367065, "learning_rate": 2e-05, "loss": 0.0670663, "step": 16106 }, { "epoch": 32.214, "grad_norm": 2.088665723800659, "learning_rate": 2e-05, "loss": 0.04498452, "step": 16107 }, { "epoch": 32.216, "grad_norm": 1.9537535905838013, "learning_rate": 2e-05, "loss": 0.06290226, "step": 16108 }, { "epoch": 32.218, "grad_norm": 1.1504062414169312, "learning_rate": 2e-05, "loss": 0.0422533, "step": 16109 }, { "epoch": 32.22, "grad_norm": 1.2755309343338013, "learning_rate": 2e-05, "loss": 0.05198646, "step": 16110 }, { "epoch": 32.222, "grad_norm": 1.7408345937728882, "learning_rate": 2e-05, "loss": 0.03385472, "step": 16111 }, { "epoch": 32.224, "grad_norm": 1.2028616666793823, "learning_rate": 2e-05, "loss": 0.03718933, "step": 16112 }, { "epoch": 32.226, "grad_norm": 1.2710516452789307, "learning_rate": 2e-05, "loss": 0.04598017, "step": 16113 }, { "epoch": 32.228, "grad_norm": 1.0447970628738403, "learning_rate": 2e-05, "loss": 0.04060001, "step": 16114 }, { "epoch": 32.23, "grad_norm": 1.267838478088379, "learning_rate": 2e-05, "loss": 0.06093229, "step": 16115 }, { "epoch": 32.232, "grad_norm": 1.31565260887146, "learning_rate": 2e-05, "loss": 0.03904974, "step": 16116 }, { "epoch": 32.234, "grad_norm": 1.2240655422210693, "learning_rate": 2e-05, "loss": 0.03763383, "step": 16117 }, { "epoch": 32.236, "grad_norm": 0.9750972390174866, "learning_rate": 2e-05, "loss": 0.03341448, "step": 16118 }, { "epoch": 32.238, "grad_norm": 1.330909013748169, "learning_rate": 2e-05, "loss": 0.04453826, "step": 16119 }, { "epoch": 32.24, "grad_norm": 2.050285816192627, "learning_rate": 2e-05, "loss": 0.06148023, "step": 16120 }, { "epoch": 32.242, "grad_norm": 1.2034765481948853, "learning_rate": 2e-05, "loss": 0.03787356, "step": 16121 }, { "epoch": 32.244, "grad_norm": 2.573892831802368, "learning_rate": 2e-05, "loss": 0.0428374, "step": 16122 }, { "epoch": 32.246, "grad_norm": 1.063158631324768, "learning_rate": 2e-05, "loss": 0.03356419, "step": 16123 }, { "epoch": 32.248, "grad_norm": 1.1658393144607544, "learning_rate": 2e-05, "loss": 0.02797179, "step": 16124 }, { "epoch": 32.25, "grad_norm": 1.3309680223464966, "learning_rate": 2e-05, "loss": 0.06812909, "step": 16125 }, { "epoch": 32.252, "grad_norm": 0.8895295858383179, "learning_rate": 2e-05, "loss": 0.03186877, "step": 16126 }, { "epoch": 32.254, "grad_norm": 1.42412269115448, "learning_rate": 2e-05, "loss": 0.05221164, "step": 16127 }, { "epoch": 32.256, "grad_norm": 1.7970479726791382, "learning_rate": 2e-05, "loss": 0.04089088, "step": 16128 }, { "epoch": 32.258, "grad_norm": 1.4141465425491333, "learning_rate": 2e-05, "loss": 0.03509438, "step": 16129 }, { "epoch": 32.26, "grad_norm": 1.255109190940857, "learning_rate": 2e-05, "loss": 0.03642782, "step": 16130 }, { "epoch": 32.262, "grad_norm": 2.4638772010803223, "learning_rate": 2e-05, "loss": 0.06095183, "step": 16131 }, { "epoch": 32.264, "grad_norm": 1.527658224105835, "learning_rate": 2e-05, "loss": 0.03274827, "step": 16132 }, { "epoch": 32.266, "grad_norm": 1.2536580562591553, "learning_rate": 2e-05, "loss": 0.0438434, "step": 16133 }, { "epoch": 32.268, "grad_norm": 1.317441701889038, "learning_rate": 2e-05, "loss": 0.05264898, "step": 16134 }, { "epoch": 32.27, "grad_norm": 1.0908536911010742, "learning_rate": 2e-05, "loss": 0.04722644, "step": 16135 }, { "epoch": 32.272, "grad_norm": 1.3812543153762817, "learning_rate": 2e-05, "loss": 0.04213402, "step": 16136 }, { "epoch": 32.274, "grad_norm": 1.2621177434921265, "learning_rate": 2e-05, "loss": 0.04359785, "step": 16137 }, { "epoch": 32.276, "grad_norm": 1.1774916648864746, "learning_rate": 2e-05, "loss": 0.03530777, "step": 16138 }, { "epoch": 32.278, "grad_norm": 1.1465319395065308, "learning_rate": 2e-05, "loss": 0.04779294, "step": 16139 }, { "epoch": 32.28, "grad_norm": 1.1407089233398438, "learning_rate": 2e-05, "loss": 0.04244178, "step": 16140 }, { "epoch": 32.282, "grad_norm": 1.1952414512634277, "learning_rate": 2e-05, "loss": 0.03308697, "step": 16141 }, { "epoch": 32.284, "grad_norm": 1.1828652620315552, "learning_rate": 2e-05, "loss": 0.03380813, "step": 16142 }, { "epoch": 32.286, "grad_norm": 2.2525599002838135, "learning_rate": 2e-05, "loss": 0.05679112, "step": 16143 }, { "epoch": 32.288, "grad_norm": 2.428598642349243, "learning_rate": 2e-05, "loss": 0.05677919, "step": 16144 }, { "epoch": 32.29, "grad_norm": 1.4035303592681885, "learning_rate": 2e-05, "loss": 0.0381352, "step": 16145 }, { "epoch": 32.292, "grad_norm": 1.2325247526168823, "learning_rate": 2e-05, "loss": 0.04354863, "step": 16146 }, { "epoch": 32.294, "grad_norm": 1.1236069202423096, "learning_rate": 2e-05, "loss": 0.04897562, "step": 16147 }, { "epoch": 32.296, "grad_norm": 1.730275273323059, "learning_rate": 2e-05, "loss": 0.03983387, "step": 16148 }, { "epoch": 32.298, "grad_norm": 1.732548713684082, "learning_rate": 2e-05, "loss": 0.04608201, "step": 16149 }, { "epoch": 32.3, "grad_norm": 1.9877454042434692, "learning_rate": 2e-05, "loss": 0.03996104, "step": 16150 }, { "epoch": 32.302, "grad_norm": 1.0003015995025635, "learning_rate": 2e-05, "loss": 0.03154821, "step": 16151 }, { "epoch": 32.304, "grad_norm": 1.7395548820495605, "learning_rate": 2e-05, "loss": 0.05859068, "step": 16152 }, { "epoch": 32.306, "grad_norm": 1.7625404596328735, "learning_rate": 2e-05, "loss": 0.04158505, "step": 16153 }, { "epoch": 32.308, "grad_norm": 1.3035045862197876, "learning_rate": 2e-05, "loss": 0.03832395, "step": 16154 }, { "epoch": 32.31, "grad_norm": 1.2695159912109375, "learning_rate": 2e-05, "loss": 0.05645255, "step": 16155 }, { "epoch": 32.312, "grad_norm": 1.68500816822052, "learning_rate": 2e-05, "loss": 0.07209504, "step": 16156 }, { "epoch": 32.314, "grad_norm": 1.1984082460403442, "learning_rate": 2e-05, "loss": 0.03992949, "step": 16157 }, { "epoch": 32.316, "grad_norm": 1.199971079826355, "learning_rate": 2e-05, "loss": 0.04131328, "step": 16158 }, { "epoch": 32.318, "grad_norm": 1.0048577785491943, "learning_rate": 2e-05, "loss": 0.03791015, "step": 16159 }, { "epoch": 32.32, "grad_norm": 1.0711760520935059, "learning_rate": 2e-05, "loss": 0.03038563, "step": 16160 }, { "epoch": 32.322, "grad_norm": 1.5091052055358887, "learning_rate": 2e-05, "loss": 0.05084156, "step": 16161 }, { "epoch": 32.324, "grad_norm": 1.170743703842163, "learning_rate": 2e-05, "loss": 0.04108653, "step": 16162 }, { "epoch": 32.326, "grad_norm": 0.9663565158843994, "learning_rate": 2e-05, "loss": 0.03507331, "step": 16163 }, { "epoch": 32.328, "grad_norm": 1.7884368896484375, "learning_rate": 2e-05, "loss": 0.03116245, "step": 16164 }, { "epoch": 32.33, "grad_norm": 1.2119756937026978, "learning_rate": 2e-05, "loss": 0.03635027, "step": 16165 }, { "epoch": 32.332, "grad_norm": 1.2660174369812012, "learning_rate": 2e-05, "loss": 0.05102555, "step": 16166 }, { "epoch": 32.334, "grad_norm": 1.6132479906082153, "learning_rate": 2e-05, "loss": 0.05014277, "step": 16167 }, { "epoch": 32.336, "grad_norm": 1.722839117050171, "learning_rate": 2e-05, "loss": 0.04384566, "step": 16168 }, { "epoch": 32.338, "grad_norm": 1.3902097940444946, "learning_rate": 2e-05, "loss": 0.03447413, "step": 16169 }, { "epoch": 32.34, "grad_norm": 1.2110004425048828, "learning_rate": 2e-05, "loss": 0.03868734, "step": 16170 }, { "epoch": 32.342, "grad_norm": 1.4563666582107544, "learning_rate": 2e-05, "loss": 0.04213499, "step": 16171 }, { "epoch": 32.344, "grad_norm": 1.213379144668579, "learning_rate": 2e-05, "loss": 0.05638143, "step": 16172 }, { "epoch": 32.346, "grad_norm": 1.09957754611969, "learning_rate": 2e-05, "loss": 0.04616127, "step": 16173 }, { "epoch": 32.348, "grad_norm": 1.0122456550598145, "learning_rate": 2e-05, "loss": 0.0424768, "step": 16174 }, { "epoch": 32.35, "grad_norm": 1.259447455406189, "learning_rate": 2e-05, "loss": 0.04152884, "step": 16175 }, { "epoch": 32.352, "grad_norm": 1.1292533874511719, "learning_rate": 2e-05, "loss": 0.04704421, "step": 16176 }, { "epoch": 32.354, "grad_norm": 1.455267071723938, "learning_rate": 2e-05, "loss": 0.05591056, "step": 16177 }, { "epoch": 32.356, "grad_norm": 1.2393004894256592, "learning_rate": 2e-05, "loss": 0.03246272, "step": 16178 }, { "epoch": 32.358, "grad_norm": 1.4194928407669067, "learning_rate": 2e-05, "loss": 0.04797504, "step": 16179 }, { "epoch": 32.36, "grad_norm": 1.3719854354858398, "learning_rate": 2e-05, "loss": 0.05387818, "step": 16180 }, { "epoch": 32.362, "grad_norm": 1.0265268087387085, "learning_rate": 2e-05, "loss": 0.03407123, "step": 16181 }, { "epoch": 32.364, "grad_norm": 1.1385420560836792, "learning_rate": 2e-05, "loss": 0.04104249, "step": 16182 }, { "epoch": 32.366, "grad_norm": 1.4606928825378418, "learning_rate": 2e-05, "loss": 0.05414707, "step": 16183 }, { "epoch": 32.368, "grad_norm": 1.4488290548324585, "learning_rate": 2e-05, "loss": 0.05977223, "step": 16184 }, { "epoch": 32.37, "grad_norm": 1.556361436843872, "learning_rate": 2e-05, "loss": 0.03753366, "step": 16185 }, { "epoch": 32.372, "grad_norm": 1.0535379648208618, "learning_rate": 2e-05, "loss": 0.038872, "step": 16186 }, { "epoch": 32.374, "grad_norm": 1.3477610349655151, "learning_rate": 2e-05, "loss": 0.04552489, "step": 16187 }, { "epoch": 32.376, "grad_norm": 0.9643598198890686, "learning_rate": 2e-05, "loss": 0.03325019, "step": 16188 }, { "epoch": 32.378, "grad_norm": 2.3846733570098877, "learning_rate": 2e-05, "loss": 0.03706906, "step": 16189 }, { "epoch": 32.38, "grad_norm": 1.3666075468063354, "learning_rate": 2e-05, "loss": 0.05090994, "step": 16190 }, { "epoch": 32.382, "grad_norm": 1.3230539560317993, "learning_rate": 2e-05, "loss": 0.04045279, "step": 16191 }, { "epoch": 32.384, "grad_norm": 1.1574872732162476, "learning_rate": 2e-05, "loss": 0.043694, "step": 16192 }, { "epoch": 32.386, "grad_norm": 1.11777663230896, "learning_rate": 2e-05, "loss": 0.05323738, "step": 16193 }, { "epoch": 32.388, "grad_norm": 1.1913745403289795, "learning_rate": 2e-05, "loss": 0.04276652, "step": 16194 }, { "epoch": 32.39, "grad_norm": 2.0520830154418945, "learning_rate": 2e-05, "loss": 0.05131931, "step": 16195 }, { "epoch": 32.392, "grad_norm": 1.3622318506240845, "learning_rate": 2e-05, "loss": 0.03571715, "step": 16196 }, { "epoch": 32.394, "grad_norm": 1.278665542602539, "learning_rate": 2e-05, "loss": 0.04110078, "step": 16197 }, { "epoch": 32.396, "grad_norm": 1.3019253015518188, "learning_rate": 2e-05, "loss": 0.03151403, "step": 16198 }, { "epoch": 32.398, "grad_norm": 1.0655170679092407, "learning_rate": 2e-05, "loss": 0.03590965, "step": 16199 }, { "epoch": 32.4, "grad_norm": 1.0709350109100342, "learning_rate": 2e-05, "loss": 0.03407409, "step": 16200 }, { "epoch": 32.402, "grad_norm": 1.1004314422607422, "learning_rate": 2e-05, "loss": 0.04275486, "step": 16201 }, { "epoch": 32.404, "grad_norm": 1.122451901435852, "learning_rate": 2e-05, "loss": 0.0433308, "step": 16202 }, { "epoch": 32.406, "grad_norm": 1.286196231842041, "learning_rate": 2e-05, "loss": 0.05405517, "step": 16203 }, { "epoch": 32.408, "grad_norm": 1.3067890405654907, "learning_rate": 2e-05, "loss": 0.04709282, "step": 16204 }, { "epoch": 32.41, "grad_norm": 1.2169573307037354, "learning_rate": 2e-05, "loss": 0.04799424, "step": 16205 }, { "epoch": 32.412, "grad_norm": 1.0351089239120483, "learning_rate": 2e-05, "loss": 0.02892509, "step": 16206 }, { "epoch": 32.414, "grad_norm": 1.1305210590362549, "learning_rate": 2e-05, "loss": 0.03046542, "step": 16207 }, { "epoch": 32.416, "grad_norm": 1.049100637435913, "learning_rate": 2e-05, "loss": 0.03685327, "step": 16208 }, { "epoch": 32.418, "grad_norm": 2.5545873641967773, "learning_rate": 2e-05, "loss": 0.05403619, "step": 16209 }, { "epoch": 32.42, "grad_norm": 1.5375778675079346, "learning_rate": 2e-05, "loss": 0.0551252, "step": 16210 }, { "epoch": 32.422, "grad_norm": 1.355461597442627, "learning_rate": 2e-05, "loss": 0.03641314, "step": 16211 }, { "epoch": 32.424, "grad_norm": 2.1974892616271973, "learning_rate": 2e-05, "loss": 0.0479245, "step": 16212 }, { "epoch": 32.426, "grad_norm": 2.6640677452087402, "learning_rate": 2e-05, "loss": 0.05790597, "step": 16213 }, { "epoch": 32.428, "grad_norm": 1.0314759016036987, "learning_rate": 2e-05, "loss": 0.02948291, "step": 16214 }, { "epoch": 32.43, "grad_norm": 1.1263335943222046, "learning_rate": 2e-05, "loss": 0.03837535, "step": 16215 }, { "epoch": 32.432, "grad_norm": 0.9716264605522156, "learning_rate": 2e-05, "loss": 0.03991567, "step": 16216 }, { "epoch": 32.434, "grad_norm": 1.3745099306106567, "learning_rate": 2e-05, "loss": 0.05104405, "step": 16217 }, { "epoch": 32.436, "grad_norm": 1.1306488513946533, "learning_rate": 2e-05, "loss": 0.04568043, "step": 16218 }, { "epoch": 32.438, "grad_norm": 1.738258719444275, "learning_rate": 2e-05, "loss": 0.06466265, "step": 16219 }, { "epoch": 32.44, "grad_norm": 1.6814087629318237, "learning_rate": 2e-05, "loss": 0.04136957, "step": 16220 }, { "epoch": 32.442, "grad_norm": 1.1837775707244873, "learning_rate": 2e-05, "loss": 0.0486927, "step": 16221 }, { "epoch": 32.444, "grad_norm": 1.257043719291687, "learning_rate": 2e-05, "loss": 0.04791939, "step": 16222 }, { "epoch": 32.446, "grad_norm": 1.8833086490631104, "learning_rate": 2e-05, "loss": 0.03800988, "step": 16223 }, { "epoch": 32.448, "grad_norm": 1.1962319612503052, "learning_rate": 2e-05, "loss": 0.03850331, "step": 16224 }, { "epoch": 32.45, "grad_norm": 1.2660398483276367, "learning_rate": 2e-05, "loss": 0.03511299, "step": 16225 }, { "epoch": 32.452, "grad_norm": 1.2324693202972412, "learning_rate": 2e-05, "loss": 0.04578947, "step": 16226 }, { "epoch": 32.454, "grad_norm": 0.8969568014144897, "learning_rate": 2e-05, "loss": 0.03069787, "step": 16227 }, { "epoch": 32.456, "grad_norm": 1.4285138845443726, "learning_rate": 2e-05, "loss": 0.06139054, "step": 16228 }, { "epoch": 32.458, "grad_norm": 0.9827262163162231, "learning_rate": 2e-05, "loss": 0.03358797, "step": 16229 }, { "epoch": 32.46, "grad_norm": 1.6161268949508667, "learning_rate": 2e-05, "loss": 0.05113133, "step": 16230 }, { "epoch": 32.462, "grad_norm": 1.2342438697814941, "learning_rate": 2e-05, "loss": 0.04767845, "step": 16231 }, { "epoch": 32.464, "grad_norm": 1.4637534618377686, "learning_rate": 2e-05, "loss": 0.05040576, "step": 16232 }, { "epoch": 32.466, "grad_norm": 1.251790165901184, "learning_rate": 2e-05, "loss": 0.04769853, "step": 16233 }, { "epoch": 32.468, "grad_norm": 2.195897102355957, "learning_rate": 2e-05, "loss": 0.04872566, "step": 16234 }, { "epoch": 32.47, "grad_norm": 0.8366017937660217, "learning_rate": 2e-05, "loss": 0.03476872, "step": 16235 }, { "epoch": 32.472, "grad_norm": 3.469074249267578, "learning_rate": 2e-05, "loss": 0.04331683, "step": 16236 }, { "epoch": 32.474, "grad_norm": 2.101489782333374, "learning_rate": 2e-05, "loss": 0.06111233, "step": 16237 }, { "epoch": 32.476, "grad_norm": 1.5878509283065796, "learning_rate": 2e-05, "loss": 0.04456938, "step": 16238 }, { "epoch": 32.478, "grad_norm": 1.6882450580596924, "learning_rate": 2e-05, "loss": 0.06533901, "step": 16239 }, { "epoch": 32.48, "grad_norm": 1.980782389640808, "learning_rate": 2e-05, "loss": 0.02859255, "step": 16240 }, { "epoch": 32.482, "grad_norm": 2.289910316467285, "learning_rate": 2e-05, "loss": 0.04708671, "step": 16241 }, { "epoch": 32.484, "grad_norm": 1.2156428098678589, "learning_rate": 2e-05, "loss": 0.06721392, "step": 16242 }, { "epoch": 32.486, "grad_norm": 2.177699327468872, "learning_rate": 2e-05, "loss": 0.03834368, "step": 16243 }, { "epoch": 32.488, "grad_norm": 1.3921353816986084, "learning_rate": 2e-05, "loss": 0.05225492, "step": 16244 }, { "epoch": 32.49, "grad_norm": 1.0509675741195679, "learning_rate": 2e-05, "loss": 0.03782216, "step": 16245 }, { "epoch": 32.492, "grad_norm": 0.8965580463409424, "learning_rate": 2e-05, "loss": 0.03118903, "step": 16246 }, { "epoch": 32.494, "grad_norm": 1.2393367290496826, "learning_rate": 2e-05, "loss": 0.04286648, "step": 16247 }, { "epoch": 32.496, "grad_norm": 4.540200233459473, "learning_rate": 2e-05, "loss": 0.05019096, "step": 16248 }, { "epoch": 32.498, "grad_norm": 1.4414736032485962, "learning_rate": 2e-05, "loss": 0.03688266, "step": 16249 }, { "epoch": 32.5, "grad_norm": 1.0771220922470093, "learning_rate": 2e-05, "loss": 0.04028372, "step": 16250 }, { "epoch": 32.502, "grad_norm": 1.2341318130493164, "learning_rate": 2e-05, "loss": 0.04482314, "step": 16251 }, { "epoch": 32.504, "grad_norm": 1.4283705949783325, "learning_rate": 2e-05, "loss": 0.04541865, "step": 16252 }, { "epoch": 32.506, "grad_norm": 1.5509240627288818, "learning_rate": 2e-05, "loss": 0.06562129, "step": 16253 }, { "epoch": 32.508, "grad_norm": 1.07283353805542, "learning_rate": 2e-05, "loss": 0.04550766, "step": 16254 }, { "epoch": 32.51, "grad_norm": 1.1795142889022827, "learning_rate": 2e-05, "loss": 0.05442602, "step": 16255 }, { "epoch": 32.512, "grad_norm": 1.1918917894363403, "learning_rate": 2e-05, "loss": 0.04531888, "step": 16256 }, { "epoch": 32.514, "grad_norm": 1.6722873449325562, "learning_rate": 2e-05, "loss": 0.04310684, "step": 16257 }, { "epoch": 32.516, "grad_norm": 1.2727532386779785, "learning_rate": 2e-05, "loss": 0.04338937, "step": 16258 }, { "epoch": 32.518, "grad_norm": 1.411271095275879, "learning_rate": 2e-05, "loss": 0.0337945, "step": 16259 }, { "epoch": 32.52, "grad_norm": 1.1750949621200562, "learning_rate": 2e-05, "loss": 0.03457836, "step": 16260 }, { "epoch": 32.522, "grad_norm": 3.289924144744873, "learning_rate": 2e-05, "loss": 0.04035544, "step": 16261 }, { "epoch": 32.524, "grad_norm": 0.9971786737442017, "learning_rate": 2e-05, "loss": 0.03331866, "step": 16262 }, { "epoch": 32.526, "grad_norm": 1.3523638248443604, "learning_rate": 2e-05, "loss": 0.04086154, "step": 16263 }, { "epoch": 32.528, "grad_norm": 1.1040973663330078, "learning_rate": 2e-05, "loss": 0.0335994, "step": 16264 }, { "epoch": 32.53, "grad_norm": 1.7474960088729858, "learning_rate": 2e-05, "loss": 0.04292202, "step": 16265 }, { "epoch": 32.532, "grad_norm": 1.0948997735977173, "learning_rate": 2e-05, "loss": 0.03585258, "step": 16266 }, { "epoch": 32.534, "grad_norm": 1.512582778930664, "learning_rate": 2e-05, "loss": 0.04289878, "step": 16267 }, { "epoch": 32.536, "grad_norm": 1.9142391681671143, "learning_rate": 2e-05, "loss": 0.04722939, "step": 16268 }, { "epoch": 32.538, "grad_norm": 1.0834648609161377, "learning_rate": 2e-05, "loss": 0.03546905, "step": 16269 }, { "epoch": 32.54, "grad_norm": 1.8602557182312012, "learning_rate": 2e-05, "loss": 0.03079529, "step": 16270 }, { "epoch": 32.542, "grad_norm": 1.2603952884674072, "learning_rate": 2e-05, "loss": 0.04889859, "step": 16271 }, { "epoch": 32.544, "grad_norm": 0.9242783188819885, "learning_rate": 2e-05, "loss": 0.02692599, "step": 16272 }, { "epoch": 32.546, "grad_norm": 1.140777349472046, "learning_rate": 2e-05, "loss": 0.04153073, "step": 16273 }, { "epoch": 32.548, "grad_norm": 1.153734564781189, "learning_rate": 2e-05, "loss": 0.04307986, "step": 16274 }, { "epoch": 32.55, "grad_norm": 1.1327687501907349, "learning_rate": 2e-05, "loss": 0.04815378, "step": 16275 }, { "epoch": 32.552, "grad_norm": 1.2450306415557861, "learning_rate": 2e-05, "loss": 0.0305282, "step": 16276 }, { "epoch": 32.554, "grad_norm": 1.2972357273101807, "learning_rate": 2e-05, "loss": 0.05288756, "step": 16277 }, { "epoch": 32.556, "grad_norm": 1.880336880683899, "learning_rate": 2e-05, "loss": 0.02928368, "step": 16278 }, { "epoch": 32.558, "grad_norm": 1.2293763160705566, "learning_rate": 2e-05, "loss": 0.03244419, "step": 16279 }, { "epoch": 32.56, "grad_norm": 1.0479623079299927, "learning_rate": 2e-05, "loss": 0.03361559, "step": 16280 }, { "epoch": 32.562, "grad_norm": 1.0732969045639038, "learning_rate": 2e-05, "loss": 0.0405861, "step": 16281 }, { "epoch": 32.564, "grad_norm": 1.251217007637024, "learning_rate": 2e-05, "loss": 0.03988477, "step": 16282 }, { "epoch": 32.566, "grad_norm": 1.0673967599868774, "learning_rate": 2e-05, "loss": 0.03976289, "step": 16283 }, { "epoch": 32.568, "grad_norm": 1.1585348844528198, "learning_rate": 2e-05, "loss": 0.0360926, "step": 16284 }, { "epoch": 32.57, "grad_norm": 1.051295518875122, "learning_rate": 2e-05, "loss": 0.03265924, "step": 16285 }, { "epoch": 32.572, "grad_norm": 1.1959424018859863, "learning_rate": 2e-05, "loss": 0.03449826, "step": 16286 }, { "epoch": 32.574, "grad_norm": 2.357743978500366, "learning_rate": 2e-05, "loss": 0.06374232, "step": 16287 }, { "epoch": 32.576, "grad_norm": 1.252048373222351, "learning_rate": 2e-05, "loss": 0.04862808, "step": 16288 }, { "epoch": 32.578, "grad_norm": 1.318522572517395, "learning_rate": 2e-05, "loss": 0.05090618, "step": 16289 }, { "epoch": 32.58, "grad_norm": 1.414203405380249, "learning_rate": 2e-05, "loss": 0.03399177, "step": 16290 }, { "epoch": 32.582, "grad_norm": 1.1485605239868164, "learning_rate": 2e-05, "loss": 0.03333712, "step": 16291 }, { "epoch": 32.584, "grad_norm": 1.231711506843567, "learning_rate": 2e-05, "loss": 0.04081344, "step": 16292 }, { "epoch": 32.586, "grad_norm": 1.4785326719284058, "learning_rate": 2e-05, "loss": 0.04804887, "step": 16293 }, { "epoch": 32.588, "grad_norm": 1.113040566444397, "learning_rate": 2e-05, "loss": 0.04360687, "step": 16294 }, { "epoch": 32.59, "grad_norm": 1.2656840085983276, "learning_rate": 2e-05, "loss": 0.03424615, "step": 16295 }, { "epoch": 32.592, "grad_norm": 2.090235471725464, "learning_rate": 2e-05, "loss": 0.03176118, "step": 16296 }, { "epoch": 32.594, "grad_norm": 1.2419369220733643, "learning_rate": 2e-05, "loss": 0.0560364, "step": 16297 }, { "epoch": 32.596, "grad_norm": 1.8632920980453491, "learning_rate": 2e-05, "loss": 0.04524683, "step": 16298 }, { "epoch": 32.598, "grad_norm": 1.5768665075302124, "learning_rate": 2e-05, "loss": 0.04588313, "step": 16299 }, { "epoch": 32.6, "grad_norm": 2.376133918762207, "learning_rate": 2e-05, "loss": 0.06110063, "step": 16300 }, { "epoch": 32.602, "grad_norm": 1.2238625288009644, "learning_rate": 2e-05, "loss": 0.04093513, "step": 16301 }, { "epoch": 32.604, "grad_norm": 1.8445336818695068, "learning_rate": 2e-05, "loss": 0.03895678, "step": 16302 }, { "epoch": 32.606, "grad_norm": 1.5113000869750977, "learning_rate": 2e-05, "loss": 0.04946197, "step": 16303 }, { "epoch": 32.608, "grad_norm": 1.107370376586914, "learning_rate": 2e-05, "loss": 0.02631177, "step": 16304 }, { "epoch": 32.61, "grad_norm": 1.319182276725769, "learning_rate": 2e-05, "loss": 0.03960687, "step": 16305 }, { "epoch": 32.612, "grad_norm": 1.1253376007080078, "learning_rate": 2e-05, "loss": 0.03078615, "step": 16306 }, { "epoch": 32.614, "grad_norm": 1.0863707065582275, "learning_rate": 2e-05, "loss": 0.03195829, "step": 16307 }, { "epoch": 32.616, "grad_norm": 1.9176464080810547, "learning_rate": 2e-05, "loss": 0.04252298, "step": 16308 }, { "epoch": 32.618, "grad_norm": 1.123355746269226, "learning_rate": 2e-05, "loss": 0.03697478, "step": 16309 }, { "epoch": 32.62, "grad_norm": 3.388070583343506, "learning_rate": 2e-05, "loss": 0.06398418, "step": 16310 }, { "epoch": 32.622, "grad_norm": 1.3932557106018066, "learning_rate": 2e-05, "loss": 0.05024934, "step": 16311 }, { "epoch": 32.624, "grad_norm": 1.2493484020233154, "learning_rate": 2e-05, "loss": 0.04943375, "step": 16312 }, { "epoch": 32.626, "grad_norm": 1.1439610719680786, "learning_rate": 2e-05, "loss": 0.0385169, "step": 16313 }, { "epoch": 32.628, "grad_norm": 2.4775614738464355, "learning_rate": 2e-05, "loss": 0.06101976, "step": 16314 }, { "epoch": 32.63, "grad_norm": 2.373351573944092, "learning_rate": 2e-05, "loss": 0.04415338, "step": 16315 }, { "epoch": 32.632, "grad_norm": 1.7569504976272583, "learning_rate": 2e-05, "loss": 0.04195081, "step": 16316 }, { "epoch": 32.634, "grad_norm": 1.1292805671691895, "learning_rate": 2e-05, "loss": 0.035651, "step": 16317 }, { "epoch": 32.636, "grad_norm": 1.251456618309021, "learning_rate": 2e-05, "loss": 0.04292186, "step": 16318 }, { "epoch": 32.638, "grad_norm": 0.9629459977149963, "learning_rate": 2e-05, "loss": 0.02403329, "step": 16319 }, { "epoch": 32.64, "grad_norm": 1.001502275466919, "learning_rate": 2e-05, "loss": 0.03040902, "step": 16320 }, { "epoch": 32.642, "grad_norm": 1.3388837575912476, "learning_rate": 2e-05, "loss": 0.0465103, "step": 16321 }, { "epoch": 32.644, "grad_norm": 1.0850396156311035, "learning_rate": 2e-05, "loss": 0.03716237, "step": 16322 }, { "epoch": 32.646, "grad_norm": 3.381190776824951, "learning_rate": 2e-05, "loss": 0.06202536, "step": 16323 }, { "epoch": 32.648, "grad_norm": 1.3530367612838745, "learning_rate": 2e-05, "loss": 0.03582821, "step": 16324 }, { "epoch": 32.65, "grad_norm": 2.433734893798828, "learning_rate": 2e-05, "loss": 0.05068055, "step": 16325 }, { "epoch": 32.652, "grad_norm": 1.7328464984893799, "learning_rate": 2e-05, "loss": 0.0590675, "step": 16326 }, { "epoch": 32.654, "grad_norm": 1.1107189655303955, "learning_rate": 2e-05, "loss": 0.05116561, "step": 16327 }, { "epoch": 32.656, "grad_norm": 1.2130955457687378, "learning_rate": 2e-05, "loss": 0.03328083, "step": 16328 }, { "epoch": 32.658, "grad_norm": 1.7198522090911865, "learning_rate": 2e-05, "loss": 0.04785996, "step": 16329 }, { "epoch": 32.66, "grad_norm": 1.5912532806396484, "learning_rate": 2e-05, "loss": 0.04132064, "step": 16330 }, { "epoch": 32.662, "grad_norm": 1.930972933769226, "learning_rate": 2e-05, "loss": 0.03286422, "step": 16331 }, { "epoch": 32.664, "grad_norm": 1.51142156124115, "learning_rate": 2e-05, "loss": 0.05630952, "step": 16332 }, { "epoch": 32.666, "grad_norm": 1.2394816875457764, "learning_rate": 2e-05, "loss": 0.05155517, "step": 16333 }, { "epoch": 32.668, "grad_norm": 2.041574716567993, "learning_rate": 2e-05, "loss": 0.04154414, "step": 16334 }, { "epoch": 32.67, "grad_norm": 3.471482992172241, "learning_rate": 2e-05, "loss": 0.06497285, "step": 16335 }, { "epoch": 32.672, "grad_norm": 1.1447969675064087, "learning_rate": 2e-05, "loss": 0.04535265, "step": 16336 }, { "epoch": 32.674, "grad_norm": 1.678331732749939, "learning_rate": 2e-05, "loss": 0.0535218, "step": 16337 }, { "epoch": 32.676, "grad_norm": 1.378172755241394, "learning_rate": 2e-05, "loss": 0.04844815, "step": 16338 }, { "epoch": 32.678, "grad_norm": 0.9606616497039795, "learning_rate": 2e-05, "loss": 0.03191324, "step": 16339 }, { "epoch": 32.68, "grad_norm": 1.0323848724365234, "learning_rate": 2e-05, "loss": 0.0283333, "step": 16340 }, { "epoch": 32.682, "grad_norm": 1.1071007251739502, "learning_rate": 2e-05, "loss": 0.03747611, "step": 16341 }, { "epoch": 32.684, "grad_norm": 1.0263428688049316, "learning_rate": 2e-05, "loss": 0.03178697, "step": 16342 }, { "epoch": 32.686, "grad_norm": 1.2423036098480225, "learning_rate": 2e-05, "loss": 0.03112799, "step": 16343 }, { "epoch": 32.688, "grad_norm": 1.1887843608856201, "learning_rate": 2e-05, "loss": 0.04574025, "step": 16344 }, { "epoch": 32.69, "grad_norm": 1.2458415031433105, "learning_rate": 2e-05, "loss": 0.04187585, "step": 16345 }, { "epoch": 32.692, "grad_norm": 1.3959715366363525, "learning_rate": 2e-05, "loss": 0.0457012, "step": 16346 }, { "epoch": 32.694, "grad_norm": 1.9177616834640503, "learning_rate": 2e-05, "loss": 0.03737821, "step": 16347 }, { "epoch": 32.696, "grad_norm": 1.9234297275543213, "learning_rate": 2e-05, "loss": 0.05901012, "step": 16348 }, { "epoch": 32.698, "grad_norm": 2.6709837913513184, "learning_rate": 2e-05, "loss": 0.05703682, "step": 16349 }, { "epoch": 32.7, "grad_norm": 1.9380576610565186, "learning_rate": 2e-05, "loss": 0.05842331, "step": 16350 }, { "epoch": 32.702, "grad_norm": 1.0618531703948975, "learning_rate": 2e-05, "loss": 0.03581022, "step": 16351 }, { "epoch": 32.704, "grad_norm": 1.2853940725326538, "learning_rate": 2e-05, "loss": 0.04471534, "step": 16352 }, { "epoch": 32.706, "grad_norm": 1.9387348890304565, "learning_rate": 2e-05, "loss": 0.04033763, "step": 16353 }, { "epoch": 32.708, "grad_norm": 1.651381015777588, "learning_rate": 2e-05, "loss": 0.04997224, "step": 16354 }, { "epoch": 32.71, "grad_norm": 1.1719062328338623, "learning_rate": 2e-05, "loss": 0.04294977, "step": 16355 }, { "epoch": 32.712, "grad_norm": 1.1827785968780518, "learning_rate": 2e-05, "loss": 0.06157464, "step": 16356 }, { "epoch": 32.714, "grad_norm": 2.0270729064941406, "learning_rate": 2e-05, "loss": 0.04811145, "step": 16357 }, { "epoch": 32.716, "grad_norm": 2.081660032272339, "learning_rate": 2e-05, "loss": 0.03992213, "step": 16358 }, { "epoch": 32.718, "grad_norm": 1.2889872789382935, "learning_rate": 2e-05, "loss": 0.05166517, "step": 16359 }, { "epoch": 32.72, "grad_norm": 1.5523995161056519, "learning_rate": 2e-05, "loss": 0.04683542, "step": 16360 }, { "epoch": 32.722, "grad_norm": 1.2523963451385498, "learning_rate": 2e-05, "loss": 0.03490978, "step": 16361 }, { "epoch": 32.724, "grad_norm": 1.1765249967575073, "learning_rate": 2e-05, "loss": 0.04525149, "step": 16362 }, { "epoch": 32.726, "grad_norm": 2.704094409942627, "learning_rate": 2e-05, "loss": 0.05494591, "step": 16363 }, { "epoch": 32.728, "grad_norm": 1.1506249904632568, "learning_rate": 2e-05, "loss": 0.04181469, "step": 16364 }, { "epoch": 32.73, "grad_norm": 1.0797075033187866, "learning_rate": 2e-05, "loss": 0.03218354, "step": 16365 }, { "epoch": 32.732, "grad_norm": 1.3011102676391602, "learning_rate": 2e-05, "loss": 0.03808787, "step": 16366 }, { "epoch": 32.734, "grad_norm": 1.300958514213562, "learning_rate": 2e-05, "loss": 0.0296345, "step": 16367 }, { "epoch": 32.736, "grad_norm": 1.4178552627563477, "learning_rate": 2e-05, "loss": 0.05248306, "step": 16368 }, { "epoch": 32.738, "grad_norm": 1.0817713737487793, "learning_rate": 2e-05, "loss": 0.03825011, "step": 16369 }, { "epoch": 32.74, "grad_norm": 1.0695898532867432, "learning_rate": 2e-05, "loss": 0.04175761, "step": 16370 }, { "epoch": 32.742, "grad_norm": 2.968357801437378, "learning_rate": 2e-05, "loss": 0.05305419, "step": 16371 }, { "epoch": 32.744, "grad_norm": 1.0489188432693481, "learning_rate": 2e-05, "loss": 0.03766435, "step": 16372 }, { "epoch": 32.746, "grad_norm": 0.9474694132804871, "learning_rate": 2e-05, "loss": 0.03348034, "step": 16373 }, { "epoch": 32.748, "grad_norm": 0.9930300116539001, "learning_rate": 2e-05, "loss": 0.03745917, "step": 16374 }, { "epoch": 32.75, "grad_norm": 1.0736403465270996, "learning_rate": 2e-05, "loss": 0.0384927, "step": 16375 }, { "epoch": 32.752, "grad_norm": 1.2080556154251099, "learning_rate": 2e-05, "loss": 0.05006144, "step": 16376 }, { "epoch": 32.754, "grad_norm": 0.9601371884346008, "learning_rate": 2e-05, "loss": 0.03716236, "step": 16377 }, { "epoch": 32.756, "grad_norm": 1.128725528717041, "learning_rate": 2e-05, "loss": 0.04063039, "step": 16378 }, { "epoch": 32.758, "grad_norm": 1.2183740139007568, "learning_rate": 2e-05, "loss": 0.05256324, "step": 16379 }, { "epoch": 32.76, "grad_norm": 3.258612632751465, "learning_rate": 2e-05, "loss": 0.04376873, "step": 16380 }, { "epoch": 32.762, "grad_norm": 1.1058506965637207, "learning_rate": 2e-05, "loss": 0.04281379, "step": 16381 }, { "epoch": 32.764, "grad_norm": 1.1320183277130127, "learning_rate": 2e-05, "loss": 0.04657583, "step": 16382 }, { "epoch": 32.766, "grad_norm": 1.4201374053955078, "learning_rate": 2e-05, "loss": 0.04276565, "step": 16383 }, { "epoch": 32.768, "grad_norm": 1.0543874502182007, "learning_rate": 2e-05, "loss": 0.04018901, "step": 16384 }, { "epoch": 32.77, "grad_norm": 1.4601715803146362, "learning_rate": 2e-05, "loss": 0.0541468, "step": 16385 }, { "epoch": 32.772, "grad_norm": 2.787783145904541, "learning_rate": 2e-05, "loss": 0.0562762, "step": 16386 }, { "epoch": 32.774, "grad_norm": 1.4297536611557007, "learning_rate": 2e-05, "loss": 0.04877428, "step": 16387 }, { "epoch": 32.776, "grad_norm": 1.5178031921386719, "learning_rate": 2e-05, "loss": 0.03471943, "step": 16388 }, { "epoch": 32.778, "grad_norm": 1.089928388595581, "learning_rate": 2e-05, "loss": 0.04103519, "step": 16389 }, { "epoch": 32.78, "grad_norm": 0.9991024136543274, "learning_rate": 2e-05, "loss": 0.03855386, "step": 16390 }, { "epoch": 32.782, "grad_norm": 1.0029306411743164, "learning_rate": 2e-05, "loss": 0.04183157, "step": 16391 }, { "epoch": 32.784, "grad_norm": 1.2355519533157349, "learning_rate": 2e-05, "loss": 0.05071557, "step": 16392 }, { "epoch": 32.786, "grad_norm": 0.9400306940078735, "learning_rate": 2e-05, "loss": 0.03195136, "step": 16393 }, { "epoch": 32.788, "grad_norm": 1.4382003545761108, "learning_rate": 2e-05, "loss": 0.04054421, "step": 16394 }, { "epoch": 32.79, "grad_norm": 1.1275018453598022, "learning_rate": 2e-05, "loss": 0.03660734, "step": 16395 }, { "epoch": 32.792, "grad_norm": 1.1579735279083252, "learning_rate": 2e-05, "loss": 0.03553727, "step": 16396 }, { "epoch": 32.794, "grad_norm": 1.317656397819519, "learning_rate": 2e-05, "loss": 0.03424529, "step": 16397 }, { "epoch": 32.796, "grad_norm": 2.1389896869659424, "learning_rate": 2e-05, "loss": 0.04343967, "step": 16398 }, { "epoch": 32.798, "grad_norm": 1.9410218000411987, "learning_rate": 2e-05, "loss": 0.05163146, "step": 16399 }, { "epoch": 32.8, "grad_norm": 1.146166205406189, "learning_rate": 2e-05, "loss": 0.03072646, "step": 16400 }, { "epoch": 32.802, "grad_norm": 1.333733081817627, "learning_rate": 2e-05, "loss": 0.0516637, "step": 16401 }, { "epoch": 32.804, "grad_norm": 1.0944757461547852, "learning_rate": 2e-05, "loss": 0.03826744, "step": 16402 }, { "epoch": 32.806, "grad_norm": 1.241289734840393, "learning_rate": 2e-05, "loss": 0.04015851, "step": 16403 }, { "epoch": 32.808, "grad_norm": 1.1392414569854736, "learning_rate": 2e-05, "loss": 0.04946699, "step": 16404 }, { "epoch": 32.81, "grad_norm": 0.9604151844978333, "learning_rate": 2e-05, "loss": 0.02486083, "step": 16405 }, { "epoch": 32.812, "grad_norm": 1.4133819341659546, "learning_rate": 2e-05, "loss": 0.03912335, "step": 16406 }, { "epoch": 32.814, "grad_norm": 3.016547441482544, "learning_rate": 2e-05, "loss": 0.05131318, "step": 16407 }, { "epoch": 32.816, "grad_norm": 1.6161396503448486, "learning_rate": 2e-05, "loss": 0.05032074, "step": 16408 }, { "epoch": 32.818, "grad_norm": 1.1903536319732666, "learning_rate": 2e-05, "loss": 0.0486296, "step": 16409 }, { "epoch": 32.82, "grad_norm": 1.2930749654769897, "learning_rate": 2e-05, "loss": 0.04178453, "step": 16410 }, { "epoch": 32.822, "grad_norm": 0.9721649885177612, "learning_rate": 2e-05, "loss": 0.03203877, "step": 16411 }, { "epoch": 32.824, "grad_norm": 1.4896728992462158, "learning_rate": 2e-05, "loss": 0.0417724, "step": 16412 }, { "epoch": 32.826, "grad_norm": 0.9889609217643738, "learning_rate": 2e-05, "loss": 0.03722448, "step": 16413 }, { "epoch": 32.828, "grad_norm": 1.1123636960983276, "learning_rate": 2e-05, "loss": 0.04222369, "step": 16414 }, { "epoch": 32.83, "grad_norm": 1.0436445474624634, "learning_rate": 2e-05, "loss": 0.03794225, "step": 16415 }, { "epoch": 32.832, "grad_norm": 1.930905818939209, "learning_rate": 2e-05, "loss": 0.04170159, "step": 16416 }, { "epoch": 32.834, "grad_norm": 1.0021107196807861, "learning_rate": 2e-05, "loss": 0.03140467, "step": 16417 }, { "epoch": 32.836, "grad_norm": 1.0246645212173462, "learning_rate": 2e-05, "loss": 0.03313623, "step": 16418 }, { "epoch": 32.838, "grad_norm": 1.666858196258545, "learning_rate": 2e-05, "loss": 0.04175393, "step": 16419 }, { "epoch": 32.84, "grad_norm": 1.1152204275131226, "learning_rate": 2e-05, "loss": 0.02505697, "step": 16420 }, { "epoch": 32.842, "grad_norm": 0.992870032787323, "learning_rate": 2e-05, "loss": 0.03373595, "step": 16421 }, { "epoch": 32.844, "grad_norm": 1.0322585105895996, "learning_rate": 2e-05, "loss": 0.03882974, "step": 16422 }, { "epoch": 32.846, "grad_norm": 2.256075620651245, "learning_rate": 2e-05, "loss": 0.05817116, "step": 16423 }, { "epoch": 32.848, "grad_norm": 5.627259254455566, "learning_rate": 2e-05, "loss": 0.06864762, "step": 16424 }, { "epoch": 32.85, "grad_norm": 1.0085660219192505, "learning_rate": 2e-05, "loss": 0.04046705, "step": 16425 }, { "epoch": 32.852, "grad_norm": 1.0974931716918945, "learning_rate": 2e-05, "loss": 0.03791347, "step": 16426 }, { "epoch": 32.854, "grad_norm": 1.2425390481948853, "learning_rate": 2e-05, "loss": 0.03167582, "step": 16427 }, { "epoch": 32.856, "grad_norm": 1.2744793891906738, "learning_rate": 2e-05, "loss": 0.04943582, "step": 16428 }, { "epoch": 32.858, "grad_norm": 0.9825481176376343, "learning_rate": 2e-05, "loss": 0.02305823, "step": 16429 }, { "epoch": 32.86, "grad_norm": 1.1719574928283691, "learning_rate": 2e-05, "loss": 0.04331547, "step": 16430 }, { "epoch": 32.862, "grad_norm": 1.6855008602142334, "learning_rate": 2e-05, "loss": 0.04401325, "step": 16431 }, { "epoch": 32.864, "grad_norm": 1.4206801652908325, "learning_rate": 2e-05, "loss": 0.04856441, "step": 16432 }, { "epoch": 32.866, "grad_norm": 1.7253879308700562, "learning_rate": 2e-05, "loss": 0.03633871, "step": 16433 }, { "epoch": 32.868, "grad_norm": 1.3289132118225098, "learning_rate": 2e-05, "loss": 0.03001435, "step": 16434 }, { "epoch": 32.87, "grad_norm": 1.1799901723861694, "learning_rate": 2e-05, "loss": 0.03081987, "step": 16435 }, { "epoch": 32.872, "grad_norm": 1.9092236757278442, "learning_rate": 2e-05, "loss": 0.03654516, "step": 16436 }, { "epoch": 32.874, "grad_norm": 1.1453834772109985, "learning_rate": 2e-05, "loss": 0.04329348, "step": 16437 }, { "epoch": 32.876, "grad_norm": 1.3431400060653687, "learning_rate": 2e-05, "loss": 0.04679033, "step": 16438 }, { "epoch": 32.878, "grad_norm": 1.5955402851104736, "learning_rate": 2e-05, "loss": 0.07649103, "step": 16439 }, { "epoch": 32.88, "grad_norm": 1.485243797302246, "learning_rate": 2e-05, "loss": 0.03790029, "step": 16440 }, { "epoch": 32.882, "grad_norm": 1.3634883165359497, "learning_rate": 2e-05, "loss": 0.06115822, "step": 16441 }, { "epoch": 32.884, "grad_norm": 1.1459612846374512, "learning_rate": 2e-05, "loss": 0.04496354, "step": 16442 }, { "epoch": 32.886, "grad_norm": 1.2098444700241089, "learning_rate": 2e-05, "loss": 0.03849549, "step": 16443 }, { "epoch": 32.888, "grad_norm": 1.6543594598770142, "learning_rate": 2e-05, "loss": 0.05749209, "step": 16444 }, { "epoch": 32.89, "grad_norm": 1.1916124820709229, "learning_rate": 2e-05, "loss": 0.02707171, "step": 16445 }, { "epoch": 32.892, "grad_norm": 1.328437328338623, "learning_rate": 2e-05, "loss": 0.04334527, "step": 16446 }, { "epoch": 32.894, "grad_norm": 1.472269058227539, "learning_rate": 2e-05, "loss": 0.02811809, "step": 16447 }, { "epoch": 32.896, "grad_norm": 1.1660832166671753, "learning_rate": 2e-05, "loss": 0.04044122, "step": 16448 }, { "epoch": 32.898, "grad_norm": 4.141280174255371, "learning_rate": 2e-05, "loss": 0.04939477, "step": 16449 }, { "epoch": 32.9, "grad_norm": 1.0704823732376099, "learning_rate": 2e-05, "loss": 0.03728681, "step": 16450 }, { "epoch": 32.902, "grad_norm": 1.1379095315933228, "learning_rate": 2e-05, "loss": 0.04447804, "step": 16451 }, { "epoch": 32.904, "grad_norm": 1.5318787097930908, "learning_rate": 2e-05, "loss": 0.03792928, "step": 16452 }, { "epoch": 32.906, "grad_norm": 1.7072391510009766, "learning_rate": 2e-05, "loss": 0.03659452, "step": 16453 }, { "epoch": 32.908, "grad_norm": 1.2457821369171143, "learning_rate": 2e-05, "loss": 0.03751954, "step": 16454 }, { "epoch": 32.91, "grad_norm": 1.167049527168274, "learning_rate": 2e-05, "loss": 0.03698363, "step": 16455 }, { "epoch": 32.912, "grad_norm": 1.0011647939682007, "learning_rate": 2e-05, "loss": 0.03130367, "step": 16456 }, { "epoch": 32.914, "grad_norm": 0.9641328454017639, "learning_rate": 2e-05, "loss": 0.03359494, "step": 16457 }, { "epoch": 32.916, "grad_norm": 1.1731847524642944, "learning_rate": 2e-05, "loss": 0.02458394, "step": 16458 }, { "epoch": 32.918, "grad_norm": 1.1044111251831055, "learning_rate": 2e-05, "loss": 0.04152217, "step": 16459 }, { "epoch": 32.92, "grad_norm": 1.6179026365280151, "learning_rate": 2e-05, "loss": 0.03974091, "step": 16460 }, { "epoch": 32.922, "grad_norm": 1.6373225450515747, "learning_rate": 2e-05, "loss": 0.05506727, "step": 16461 }, { "epoch": 32.924, "grad_norm": 1.1272046566009521, "learning_rate": 2e-05, "loss": 0.03284402, "step": 16462 }, { "epoch": 32.926, "grad_norm": 1.5268757343292236, "learning_rate": 2e-05, "loss": 0.05714603, "step": 16463 }, { "epoch": 32.928, "grad_norm": 1.1075844764709473, "learning_rate": 2e-05, "loss": 0.03451537, "step": 16464 }, { "epoch": 32.93, "grad_norm": 1.25146484375, "learning_rate": 2e-05, "loss": 0.05552392, "step": 16465 }, { "epoch": 32.932, "grad_norm": 1.9479658603668213, "learning_rate": 2e-05, "loss": 0.06264338, "step": 16466 }, { "epoch": 32.934, "grad_norm": 1.2056105136871338, "learning_rate": 2e-05, "loss": 0.05005362, "step": 16467 }, { "epoch": 32.936, "grad_norm": 1.3711949586868286, "learning_rate": 2e-05, "loss": 0.04337479, "step": 16468 }, { "epoch": 32.938, "grad_norm": 1.2532182931900024, "learning_rate": 2e-05, "loss": 0.04488462, "step": 16469 }, { "epoch": 32.94, "grad_norm": 1.350342035293579, "learning_rate": 2e-05, "loss": 0.04863258, "step": 16470 }, { "epoch": 32.942, "grad_norm": 1.5152119398117065, "learning_rate": 2e-05, "loss": 0.03797042, "step": 16471 }, { "epoch": 32.944, "grad_norm": 1.1705118417739868, "learning_rate": 2e-05, "loss": 0.04442852, "step": 16472 }, { "epoch": 32.946, "grad_norm": 1.4079266786575317, "learning_rate": 2e-05, "loss": 0.0416984, "step": 16473 }, { "epoch": 32.948, "grad_norm": 1.1473650932312012, "learning_rate": 2e-05, "loss": 0.0417373, "step": 16474 }, { "epoch": 32.95, "grad_norm": 1.14018976688385, "learning_rate": 2e-05, "loss": 0.03788053, "step": 16475 }, { "epoch": 32.952, "grad_norm": 1.0621168613433838, "learning_rate": 2e-05, "loss": 0.02839708, "step": 16476 }, { "epoch": 32.954, "grad_norm": 1.2420809268951416, "learning_rate": 2e-05, "loss": 0.04673456, "step": 16477 }, { "epoch": 32.956, "grad_norm": 1.4332877397537231, "learning_rate": 2e-05, "loss": 0.05584753, "step": 16478 }, { "epoch": 32.958, "grad_norm": 0.9597718119621277, "learning_rate": 2e-05, "loss": 0.03618815, "step": 16479 }, { "epoch": 32.96, "grad_norm": 1.8332748413085938, "learning_rate": 2e-05, "loss": 0.04130953, "step": 16480 }, { "epoch": 32.962, "grad_norm": 3.7896127700805664, "learning_rate": 2e-05, "loss": 0.048007, "step": 16481 }, { "epoch": 32.964, "grad_norm": 1.2358813285827637, "learning_rate": 2e-05, "loss": 0.04505859, "step": 16482 }, { "epoch": 32.966, "grad_norm": 1.2916022539138794, "learning_rate": 2e-05, "loss": 0.02319424, "step": 16483 }, { "epoch": 32.968, "grad_norm": 1.1144905090332031, "learning_rate": 2e-05, "loss": 0.04152855, "step": 16484 }, { "epoch": 32.97, "grad_norm": 1.2684904336929321, "learning_rate": 2e-05, "loss": 0.04438142, "step": 16485 }, { "epoch": 32.972, "grad_norm": 2.002365827560425, "learning_rate": 2e-05, "loss": 0.06185231, "step": 16486 }, { "epoch": 32.974, "grad_norm": 1.3114463090896606, "learning_rate": 2e-05, "loss": 0.0507057, "step": 16487 }, { "epoch": 32.976, "grad_norm": 0.9986602663993835, "learning_rate": 2e-05, "loss": 0.04370682, "step": 16488 }, { "epoch": 32.978, "grad_norm": 0.9797910451889038, "learning_rate": 2e-05, "loss": 0.03410993, "step": 16489 }, { "epoch": 32.98, "grad_norm": 1.8880815505981445, "learning_rate": 2e-05, "loss": 0.050763, "step": 16490 }, { "epoch": 32.982, "grad_norm": 1.241797685623169, "learning_rate": 2e-05, "loss": 0.05129294, "step": 16491 }, { "epoch": 32.984, "grad_norm": 1.9726879596710205, "learning_rate": 2e-05, "loss": 0.06170304, "step": 16492 }, { "epoch": 32.986, "grad_norm": 1.1492379903793335, "learning_rate": 2e-05, "loss": 0.05158865, "step": 16493 }, { "epoch": 32.988, "grad_norm": 1.4059292078018188, "learning_rate": 2e-05, "loss": 0.04178857, "step": 16494 }, { "epoch": 32.99, "grad_norm": 1.3059656620025635, "learning_rate": 2e-05, "loss": 0.03553785, "step": 16495 }, { "epoch": 32.992, "grad_norm": 1.3720580339431763, "learning_rate": 2e-05, "loss": 0.04478185, "step": 16496 }, { "epoch": 32.994, "grad_norm": 1.007232427597046, "learning_rate": 2e-05, "loss": 0.03115787, "step": 16497 }, { "epoch": 32.996, "grad_norm": 1.1226294040679932, "learning_rate": 2e-05, "loss": 0.05025791, "step": 16498 }, { "epoch": 32.998, "grad_norm": 1.0878342390060425, "learning_rate": 2e-05, "loss": 0.04733447, "step": 16499 }, { "epoch": 33.0, "grad_norm": 1.013754963874817, "learning_rate": 2e-05, "loss": 0.03848787, "step": 16500 }, { "epoch": 33.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9720558882235529, "Equal_1": 1.0, "Equal_2": 0.9800399201596807, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 0.996, "Perpendicular_2": 0.988, "Perpendicular_3": 0.9138276553106213, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9996666666666667, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9920159680638723 }, "eval_runtime": 320.8236, "eval_samples_per_second": 32.728, "eval_steps_per_second": 0.655, "step": 16500 }, { "epoch": 33.002, "grad_norm": 1.0145152807235718, "learning_rate": 2e-05, "loss": 0.03352314, "step": 16501 }, { "epoch": 33.004, "grad_norm": 2.6528162956237793, "learning_rate": 2e-05, "loss": 0.04622073, "step": 16502 }, { "epoch": 33.006, "grad_norm": 1.5733745098114014, "learning_rate": 2e-05, "loss": 0.04280098, "step": 16503 }, { "epoch": 33.008, "grad_norm": 2.088067054748535, "learning_rate": 2e-05, "loss": 0.04422682, "step": 16504 }, { "epoch": 33.01, "grad_norm": 0.7775627970695496, "learning_rate": 2e-05, "loss": 0.01831872, "step": 16505 }, { "epoch": 33.012, "grad_norm": 0.9788329601287842, "learning_rate": 2e-05, "loss": 0.03603578, "step": 16506 }, { "epoch": 33.014, "grad_norm": 1.1010586023330688, "learning_rate": 2e-05, "loss": 0.03720935, "step": 16507 }, { "epoch": 33.016, "grad_norm": 1.1118940114974976, "learning_rate": 2e-05, "loss": 0.04310315, "step": 16508 }, { "epoch": 33.018, "grad_norm": 1.3882803916931152, "learning_rate": 2e-05, "loss": 0.04679479, "step": 16509 }, { "epoch": 33.02, "grad_norm": 1.1422741413116455, "learning_rate": 2e-05, "loss": 0.03853836, "step": 16510 }, { "epoch": 33.022, "grad_norm": 0.9372391104698181, "learning_rate": 2e-05, "loss": 0.0216747, "step": 16511 }, { "epoch": 33.024, "grad_norm": 1.2110828161239624, "learning_rate": 2e-05, "loss": 0.04518433, "step": 16512 }, { "epoch": 33.026, "grad_norm": 1.14997136592865, "learning_rate": 2e-05, "loss": 0.04907893, "step": 16513 }, { "epoch": 33.028, "grad_norm": 1.4301350116729736, "learning_rate": 2e-05, "loss": 0.05764885, "step": 16514 }, { "epoch": 33.03, "grad_norm": 1.0221556425094604, "learning_rate": 2e-05, "loss": 0.04257384, "step": 16515 }, { "epoch": 33.032, "grad_norm": 1.0143057107925415, "learning_rate": 2e-05, "loss": 0.03372315, "step": 16516 }, { "epoch": 33.034, "grad_norm": 1.1732292175292969, "learning_rate": 2e-05, "loss": 0.04997346, "step": 16517 }, { "epoch": 33.036, "grad_norm": 1.0122156143188477, "learning_rate": 2e-05, "loss": 0.03192022, "step": 16518 }, { "epoch": 33.038, "grad_norm": 1.9777421951293945, "learning_rate": 2e-05, "loss": 0.03694803, "step": 16519 }, { "epoch": 33.04, "grad_norm": 1.1994304656982422, "learning_rate": 2e-05, "loss": 0.05166928, "step": 16520 }, { "epoch": 33.042, "grad_norm": 0.8912333250045776, "learning_rate": 2e-05, "loss": 0.03158681, "step": 16521 }, { "epoch": 33.044, "grad_norm": 1.4108567237854004, "learning_rate": 2e-05, "loss": 0.03951647, "step": 16522 }, { "epoch": 33.046, "grad_norm": 1.5426366329193115, "learning_rate": 2e-05, "loss": 0.05633399, "step": 16523 }, { "epoch": 33.048, "grad_norm": 1.044606328010559, "learning_rate": 2e-05, "loss": 0.03324772, "step": 16524 }, { "epoch": 33.05, "grad_norm": 2.6143667697906494, "learning_rate": 2e-05, "loss": 0.06454179, "step": 16525 }, { "epoch": 33.052, "grad_norm": 1.6238820552825928, "learning_rate": 2e-05, "loss": 0.02863217, "step": 16526 }, { "epoch": 33.054, "grad_norm": 1.1710536479949951, "learning_rate": 2e-05, "loss": 0.05588196, "step": 16527 }, { "epoch": 33.056, "grad_norm": 1.5752817392349243, "learning_rate": 2e-05, "loss": 0.03416557, "step": 16528 }, { "epoch": 33.058, "grad_norm": 1.6088008880615234, "learning_rate": 2e-05, "loss": 0.05284679, "step": 16529 }, { "epoch": 33.06, "grad_norm": 1.1662729978561401, "learning_rate": 2e-05, "loss": 0.03968053, "step": 16530 }, { "epoch": 33.062, "grad_norm": 1.0963259935379028, "learning_rate": 2e-05, "loss": 0.04498053, "step": 16531 }, { "epoch": 33.064, "grad_norm": 0.9817715287208557, "learning_rate": 2e-05, "loss": 0.03069879, "step": 16532 }, { "epoch": 33.066, "grad_norm": 1.3074853420257568, "learning_rate": 2e-05, "loss": 0.06249923, "step": 16533 }, { "epoch": 33.068, "grad_norm": 0.9941977262496948, "learning_rate": 2e-05, "loss": 0.03796024, "step": 16534 }, { "epoch": 33.07, "grad_norm": 1.546793818473816, "learning_rate": 2e-05, "loss": 0.04255424, "step": 16535 }, { "epoch": 33.072, "grad_norm": 1.2092461585998535, "learning_rate": 2e-05, "loss": 0.04925638, "step": 16536 }, { "epoch": 33.074, "grad_norm": 1.3330050706863403, "learning_rate": 2e-05, "loss": 0.04654669, "step": 16537 }, { "epoch": 33.076, "grad_norm": 1.539285659790039, "learning_rate": 2e-05, "loss": 0.04258667, "step": 16538 }, { "epoch": 33.078, "grad_norm": 1.1344162225723267, "learning_rate": 2e-05, "loss": 0.03024994, "step": 16539 }, { "epoch": 33.08, "grad_norm": 1.3139582872390747, "learning_rate": 2e-05, "loss": 0.06406873, "step": 16540 }, { "epoch": 33.082, "grad_norm": 1.342620611190796, "learning_rate": 2e-05, "loss": 0.04298013, "step": 16541 }, { "epoch": 33.084, "grad_norm": 1.1698533296585083, "learning_rate": 2e-05, "loss": 0.0356679, "step": 16542 }, { "epoch": 33.086, "grad_norm": 1.3078049421310425, "learning_rate": 2e-05, "loss": 0.03047599, "step": 16543 }, { "epoch": 33.088, "grad_norm": 1.1613730192184448, "learning_rate": 2e-05, "loss": 0.04012228, "step": 16544 }, { "epoch": 33.09, "grad_norm": 1.5841779708862305, "learning_rate": 2e-05, "loss": 0.05739962, "step": 16545 }, { "epoch": 33.092, "grad_norm": 1.175606608390808, "learning_rate": 2e-05, "loss": 0.0417013, "step": 16546 }, { "epoch": 33.094, "grad_norm": 1.0316969156265259, "learning_rate": 2e-05, "loss": 0.02844536, "step": 16547 }, { "epoch": 33.096, "grad_norm": 1.6557471752166748, "learning_rate": 2e-05, "loss": 0.04946776, "step": 16548 }, { "epoch": 33.098, "grad_norm": 1.2517582178115845, "learning_rate": 2e-05, "loss": 0.03600208, "step": 16549 }, { "epoch": 33.1, "grad_norm": 1.3024152517318726, "learning_rate": 2e-05, "loss": 0.0431541, "step": 16550 }, { "epoch": 33.102, "grad_norm": 1.2932987213134766, "learning_rate": 2e-05, "loss": 0.04457521, "step": 16551 }, { "epoch": 33.104, "grad_norm": 1.2561955451965332, "learning_rate": 2e-05, "loss": 0.05188843, "step": 16552 }, { "epoch": 33.106, "grad_norm": 1.1888597011566162, "learning_rate": 2e-05, "loss": 0.03158218, "step": 16553 }, { "epoch": 33.108, "grad_norm": 1.1802350282669067, "learning_rate": 2e-05, "loss": 0.04162861, "step": 16554 }, { "epoch": 33.11, "grad_norm": 1.2707432508468628, "learning_rate": 2e-05, "loss": 0.0347266, "step": 16555 }, { "epoch": 33.112, "grad_norm": 1.2336151599884033, "learning_rate": 2e-05, "loss": 0.03552887, "step": 16556 }, { "epoch": 33.114, "grad_norm": 1.348934292793274, "learning_rate": 2e-05, "loss": 0.04676005, "step": 16557 }, { "epoch": 33.116, "grad_norm": 0.9855770468711853, "learning_rate": 2e-05, "loss": 0.02963222, "step": 16558 }, { "epoch": 33.118, "grad_norm": 0.9198537468910217, "learning_rate": 2e-05, "loss": 0.03035306, "step": 16559 }, { "epoch": 33.12, "grad_norm": 1.309364914894104, "learning_rate": 2e-05, "loss": 0.05264792, "step": 16560 }, { "epoch": 33.122, "grad_norm": 1.5512858629226685, "learning_rate": 2e-05, "loss": 0.05176829, "step": 16561 }, { "epoch": 33.124, "grad_norm": 1.2333922386169434, "learning_rate": 2e-05, "loss": 0.04524909, "step": 16562 }, { "epoch": 33.126, "grad_norm": 1.252514362335205, "learning_rate": 2e-05, "loss": 0.0393767, "step": 16563 }, { "epoch": 33.128, "grad_norm": 0.95073002576828, "learning_rate": 2e-05, "loss": 0.0251915, "step": 16564 }, { "epoch": 33.13, "grad_norm": 2.1087992191314697, "learning_rate": 2e-05, "loss": 0.04168628, "step": 16565 }, { "epoch": 33.132, "grad_norm": 1.073662519454956, "learning_rate": 2e-05, "loss": 0.03738192, "step": 16566 }, { "epoch": 33.134, "grad_norm": 2.494189977645874, "learning_rate": 2e-05, "loss": 0.03791768, "step": 16567 }, { "epoch": 33.136, "grad_norm": 2.3711354732513428, "learning_rate": 2e-05, "loss": 0.05274618, "step": 16568 }, { "epoch": 33.138, "grad_norm": 1.1939685344696045, "learning_rate": 2e-05, "loss": 0.05177773, "step": 16569 }, { "epoch": 33.14, "grad_norm": 1.0729763507843018, "learning_rate": 2e-05, "loss": 0.04366021, "step": 16570 }, { "epoch": 33.142, "grad_norm": 1.9815735816955566, "learning_rate": 2e-05, "loss": 0.05311869, "step": 16571 }, { "epoch": 33.144, "grad_norm": 2.0230648517608643, "learning_rate": 2e-05, "loss": 0.05889687, "step": 16572 }, { "epoch": 33.146, "grad_norm": 0.9048420190811157, "learning_rate": 2e-05, "loss": 0.02796107, "step": 16573 }, { "epoch": 33.148, "grad_norm": 1.2179429531097412, "learning_rate": 2e-05, "loss": 0.04486234, "step": 16574 }, { "epoch": 33.15, "grad_norm": 1.1371427774429321, "learning_rate": 2e-05, "loss": 0.03180134, "step": 16575 }, { "epoch": 33.152, "grad_norm": 1.2799369096755981, "learning_rate": 2e-05, "loss": 0.04838013, "step": 16576 }, { "epoch": 33.154, "grad_norm": 1.4155060052871704, "learning_rate": 2e-05, "loss": 0.03349139, "step": 16577 }, { "epoch": 33.156, "grad_norm": 1.009975790977478, "learning_rate": 2e-05, "loss": 0.02858535, "step": 16578 }, { "epoch": 33.158, "grad_norm": 1.0067310333251953, "learning_rate": 2e-05, "loss": 0.02488973, "step": 16579 }, { "epoch": 33.16, "grad_norm": 1.6880412101745605, "learning_rate": 2e-05, "loss": 0.04501157, "step": 16580 }, { "epoch": 33.162, "grad_norm": 1.3325822353363037, "learning_rate": 2e-05, "loss": 0.0444175, "step": 16581 }, { "epoch": 33.164, "grad_norm": 5.777720928192139, "learning_rate": 2e-05, "loss": 0.0530759, "step": 16582 }, { "epoch": 33.166, "grad_norm": 2.4529478549957275, "learning_rate": 2e-05, "loss": 0.04354583, "step": 16583 }, { "epoch": 33.168, "grad_norm": 1.3145490884780884, "learning_rate": 2e-05, "loss": 0.03570919, "step": 16584 }, { "epoch": 33.17, "grad_norm": 1.2409156560897827, "learning_rate": 2e-05, "loss": 0.04724094, "step": 16585 }, { "epoch": 33.172, "grad_norm": 0.8115535974502563, "learning_rate": 2e-05, "loss": 0.03618132, "step": 16586 }, { "epoch": 33.174, "grad_norm": 0.9847850799560547, "learning_rate": 2e-05, "loss": 0.03718961, "step": 16587 }, { "epoch": 33.176, "grad_norm": 1.0901992321014404, "learning_rate": 2e-05, "loss": 0.03891124, "step": 16588 }, { "epoch": 33.178, "grad_norm": 1.2606801986694336, "learning_rate": 2e-05, "loss": 0.04655527, "step": 16589 }, { "epoch": 33.18, "grad_norm": 1.6437278985977173, "learning_rate": 2e-05, "loss": 0.04169775, "step": 16590 }, { "epoch": 33.182, "grad_norm": 1.0829734802246094, "learning_rate": 2e-05, "loss": 0.03909619, "step": 16591 }, { "epoch": 33.184, "grad_norm": 1.9588395357131958, "learning_rate": 2e-05, "loss": 0.05959759, "step": 16592 }, { "epoch": 33.186, "grad_norm": 2.825683355331421, "learning_rate": 2e-05, "loss": 0.05063524, "step": 16593 }, { "epoch": 33.188, "grad_norm": 1.0350728034973145, "learning_rate": 2e-05, "loss": 0.03315126, "step": 16594 }, { "epoch": 33.19, "grad_norm": 2.2779905796051025, "learning_rate": 2e-05, "loss": 0.04792159, "step": 16595 }, { "epoch": 33.192, "grad_norm": 1.0446611642837524, "learning_rate": 2e-05, "loss": 0.03796531, "step": 16596 }, { "epoch": 33.194, "grad_norm": 1.1090408563613892, "learning_rate": 2e-05, "loss": 0.03750544, "step": 16597 }, { "epoch": 33.196, "grad_norm": 1.0616990327835083, "learning_rate": 2e-05, "loss": 0.04366959, "step": 16598 }, { "epoch": 33.198, "grad_norm": 1.0580826997756958, "learning_rate": 2e-05, "loss": 0.03785151, "step": 16599 }, { "epoch": 33.2, "grad_norm": 1.757721185684204, "learning_rate": 2e-05, "loss": 0.04076345, "step": 16600 }, { "epoch": 33.202, "grad_norm": 1.2572243213653564, "learning_rate": 2e-05, "loss": 0.04631349, "step": 16601 }, { "epoch": 33.204, "grad_norm": 1.4636191129684448, "learning_rate": 2e-05, "loss": 0.04375532, "step": 16602 }, { "epoch": 33.206, "grad_norm": 1.7812341451644897, "learning_rate": 2e-05, "loss": 0.03871331, "step": 16603 }, { "epoch": 33.208, "grad_norm": 1.4072896242141724, "learning_rate": 2e-05, "loss": 0.05194937, "step": 16604 }, { "epoch": 33.21, "grad_norm": 0.9747751951217651, "learning_rate": 2e-05, "loss": 0.02706435, "step": 16605 }, { "epoch": 33.212, "grad_norm": 1.1808767318725586, "learning_rate": 2e-05, "loss": 0.03670924, "step": 16606 }, { "epoch": 33.214, "grad_norm": 1.2573798894882202, "learning_rate": 2e-05, "loss": 0.03263208, "step": 16607 }, { "epoch": 33.216, "grad_norm": 1.379146695137024, "learning_rate": 2e-05, "loss": 0.04529354, "step": 16608 }, { "epoch": 33.218, "grad_norm": 1.177720308303833, "learning_rate": 2e-05, "loss": 0.03827986, "step": 16609 }, { "epoch": 33.22, "grad_norm": 1.1560696363449097, "learning_rate": 2e-05, "loss": 0.04032604, "step": 16610 }, { "epoch": 33.222, "grad_norm": 1.2086445093154907, "learning_rate": 2e-05, "loss": 0.04973095, "step": 16611 }, { "epoch": 33.224, "grad_norm": 1.0575134754180908, "learning_rate": 2e-05, "loss": 0.03815421, "step": 16612 }, { "epoch": 33.226, "grad_norm": 1.3156135082244873, "learning_rate": 2e-05, "loss": 0.0418507, "step": 16613 }, { "epoch": 33.228, "grad_norm": 1.3325525522232056, "learning_rate": 2e-05, "loss": 0.0365724, "step": 16614 }, { "epoch": 33.23, "grad_norm": 1.4213200807571411, "learning_rate": 2e-05, "loss": 0.04568385, "step": 16615 }, { "epoch": 33.232, "grad_norm": 1.8256237506866455, "learning_rate": 2e-05, "loss": 0.06385624, "step": 16616 }, { "epoch": 33.234, "grad_norm": 2.2751426696777344, "learning_rate": 2e-05, "loss": 0.04987566, "step": 16617 }, { "epoch": 33.236, "grad_norm": 0.9569184184074402, "learning_rate": 2e-05, "loss": 0.02542327, "step": 16618 }, { "epoch": 33.238, "grad_norm": 1.0519928932189941, "learning_rate": 2e-05, "loss": 0.03581095, "step": 16619 }, { "epoch": 33.24, "grad_norm": 1.1077507734298706, "learning_rate": 2e-05, "loss": 0.03992999, "step": 16620 }, { "epoch": 33.242, "grad_norm": 1.6223715543746948, "learning_rate": 2e-05, "loss": 0.02992079, "step": 16621 }, { "epoch": 33.244, "grad_norm": 1.5266954898834229, "learning_rate": 2e-05, "loss": 0.03266663, "step": 16622 }, { "epoch": 33.246, "grad_norm": 1.642143964767456, "learning_rate": 2e-05, "loss": 0.05565649, "step": 16623 }, { "epoch": 33.248, "grad_norm": 2.065688371658325, "learning_rate": 2e-05, "loss": 0.0459539, "step": 16624 }, { "epoch": 33.25, "grad_norm": 1.1380778551101685, "learning_rate": 2e-05, "loss": 0.04274027, "step": 16625 }, { "epoch": 33.252, "grad_norm": 1.23936927318573, "learning_rate": 2e-05, "loss": 0.05491698, "step": 16626 }, { "epoch": 33.254, "grad_norm": 1.0805511474609375, "learning_rate": 2e-05, "loss": 0.04202371, "step": 16627 }, { "epoch": 33.256, "grad_norm": 1.1573930978775024, "learning_rate": 2e-05, "loss": 0.04801682, "step": 16628 }, { "epoch": 33.258, "grad_norm": 1.1957443952560425, "learning_rate": 2e-05, "loss": 0.02971683, "step": 16629 }, { "epoch": 33.26, "grad_norm": 1.1596754789352417, "learning_rate": 2e-05, "loss": 0.03906877, "step": 16630 }, { "epoch": 33.262, "grad_norm": 1.0122718811035156, "learning_rate": 2e-05, "loss": 0.0367621, "step": 16631 }, { "epoch": 33.264, "grad_norm": 1.2783515453338623, "learning_rate": 2e-05, "loss": 0.06680563, "step": 16632 }, { "epoch": 33.266, "grad_norm": 1.4091695547103882, "learning_rate": 2e-05, "loss": 0.05388515, "step": 16633 }, { "epoch": 33.268, "grad_norm": 1.1046702861785889, "learning_rate": 2e-05, "loss": 0.04830401, "step": 16634 }, { "epoch": 33.27, "grad_norm": 1.6029551029205322, "learning_rate": 2e-05, "loss": 0.04491821, "step": 16635 }, { "epoch": 33.272, "grad_norm": 1.1932250261306763, "learning_rate": 2e-05, "loss": 0.04055645, "step": 16636 }, { "epoch": 33.274, "grad_norm": 1.7215577363967896, "learning_rate": 2e-05, "loss": 0.04914788, "step": 16637 }, { "epoch": 33.276, "grad_norm": 0.9664545059204102, "learning_rate": 2e-05, "loss": 0.04409215, "step": 16638 }, { "epoch": 33.278, "grad_norm": 1.0649373531341553, "learning_rate": 2e-05, "loss": 0.04012241, "step": 16639 }, { "epoch": 33.28, "grad_norm": 1.5425022840499878, "learning_rate": 2e-05, "loss": 0.0414533, "step": 16640 }, { "epoch": 33.282, "grad_norm": 1.4415966272354126, "learning_rate": 2e-05, "loss": 0.05605222, "step": 16641 }, { "epoch": 33.284, "grad_norm": 1.9677499532699585, "learning_rate": 2e-05, "loss": 0.05646674, "step": 16642 }, { "epoch": 33.286, "grad_norm": 1.0647591352462769, "learning_rate": 2e-05, "loss": 0.04393759, "step": 16643 }, { "epoch": 33.288, "grad_norm": 0.9724460244178772, "learning_rate": 2e-05, "loss": 0.0201349, "step": 16644 }, { "epoch": 33.29, "grad_norm": 1.3234846591949463, "learning_rate": 2e-05, "loss": 0.04536884, "step": 16645 }, { "epoch": 33.292, "grad_norm": 1.315469741821289, "learning_rate": 2e-05, "loss": 0.03901994, "step": 16646 }, { "epoch": 33.294, "grad_norm": 1.1740449666976929, "learning_rate": 2e-05, "loss": 0.04922675, "step": 16647 }, { "epoch": 33.296, "grad_norm": 2.73299241065979, "learning_rate": 2e-05, "loss": 0.04908387, "step": 16648 }, { "epoch": 33.298, "grad_norm": 1.0189504623413086, "learning_rate": 2e-05, "loss": 0.04484309, "step": 16649 }, { "epoch": 33.3, "grad_norm": 2.3086318969726562, "learning_rate": 2e-05, "loss": 0.06764978, "step": 16650 }, { "epoch": 33.302, "grad_norm": 0.8751531839370728, "learning_rate": 2e-05, "loss": 0.03591763, "step": 16651 }, { "epoch": 33.304, "grad_norm": 1.2427144050598145, "learning_rate": 2e-05, "loss": 0.05099815, "step": 16652 }, { "epoch": 33.306, "grad_norm": 1.8863247632980347, "learning_rate": 2e-05, "loss": 0.0360302, "step": 16653 }, { "epoch": 33.308, "grad_norm": 1.1457570791244507, "learning_rate": 2e-05, "loss": 0.03447718, "step": 16654 }, { "epoch": 33.31, "grad_norm": 1.273047685623169, "learning_rate": 2e-05, "loss": 0.03491887, "step": 16655 }, { "epoch": 33.312, "grad_norm": 1.65647554397583, "learning_rate": 2e-05, "loss": 0.05843513, "step": 16656 }, { "epoch": 33.314, "grad_norm": 1.1409825086593628, "learning_rate": 2e-05, "loss": 0.04555066, "step": 16657 }, { "epoch": 33.316, "grad_norm": 3.0305325984954834, "learning_rate": 2e-05, "loss": 0.05683912, "step": 16658 }, { "epoch": 33.318, "grad_norm": 2.7458958625793457, "learning_rate": 2e-05, "loss": 0.0510499, "step": 16659 }, { "epoch": 33.32, "grad_norm": 2.914576768875122, "learning_rate": 2e-05, "loss": 0.04127868, "step": 16660 }, { "epoch": 33.322, "grad_norm": 1.22411048412323, "learning_rate": 2e-05, "loss": 0.0514855, "step": 16661 }, { "epoch": 33.324, "grad_norm": 1.5475612878799438, "learning_rate": 2e-05, "loss": 0.05710922, "step": 16662 }, { "epoch": 33.326, "grad_norm": 1.0445101261138916, "learning_rate": 2e-05, "loss": 0.0416135, "step": 16663 }, { "epoch": 33.328, "grad_norm": 1.168892502784729, "learning_rate": 2e-05, "loss": 0.02608304, "step": 16664 }, { "epoch": 33.33, "grad_norm": 1.1032596826553345, "learning_rate": 2e-05, "loss": 0.03604908, "step": 16665 }, { "epoch": 33.332, "grad_norm": 1.2727869749069214, "learning_rate": 2e-05, "loss": 0.06578045, "step": 16666 }, { "epoch": 33.334, "grad_norm": 0.9599734544754028, "learning_rate": 2e-05, "loss": 0.03464708, "step": 16667 }, { "epoch": 33.336, "grad_norm": 0.9870487451553345, "learning_rate": 2e-05, "loss": 0.04101057, "step": 16668 }, { "epoch": 33.338, "grad_norm": 1.5237897634506226, "learning_rate": 2e-05, "loss": 0.06414343, "step": 16669 }, { "epoch": 33.34, "grad_norm": 1.0481436252593994, "learning_rate": 2e-05, "loss": 0.03079265, "step": 16670 }, { "epoch": 33.342, "grad_norm": 1.1099966764450073, "learning_rate": 2e-05, "loss": 0.04527031, "step": 16671 }, { "epoch": 33.344, "grad_norm": 1.3195089101791382, "learning_rate": 2e-05, "loss": 0.02745794, "step": 16672 }, { "epoch": 33.346, "grad_norm": 0.8879894018173218, "learning_rate": 2e-05, "loss": 0.03119539, "step": 16673 }, { "epoch": 33.348, "grad_norm": 1.2862367630004883, "learning_rate": 2e-05, "loss": 0.04255934, "step": 16674 }, { "epoch": 33.35, "grad_norm": 1.123695731163025, "learning_rate": 2e-05, "loss": 0.04763044, "step": 16675 }, { "epoch": 33.352, "grad_norm": 1.058069109916687, "learning_rate": 2e-05, "loss": 0.03856095, "step": 16676 }, { "epoch": 33.354, "grad_norm": 1.1873466968536377, "learning_rate": 2e-05, "loss": 0.04556847, "step": 16677 }, { "epoch": 33.356, "grad_norm": 1.5896145105361938, "learning_rate": 2e-05, "loss": 0.06114259, "step": 16678 }, { "epoch": 33.358, "grad_norm": 1.1013940572738647, "learning_rate": 2e-05, "loss": 0.04544736, "step": 16679 }, { "epoch": 33.36, "grad_norm": 1.1660057306289673, "learning_rate": 2e-05, "loss": 0.04236136, "step": 16680 }, { "epoch": 33.362, "grad_norm": 1.0070010423660278, "learning_rate": 2e-05, "loss": 0.03095785, "step": 16681 }, { "epoch": 33.364, "grad_norm": 1.2435543537139893, "learning_rate": 2e-05, "loss": 0.05413376, "step": 16682 }, { "epoch": 33.366, "grad_norm": 1.076140284538269, "learning_rate": 2e-05, "loss": 0.0455126, "step": 16683 }, { "epoch": 33.368, "grad_norm": 0.9917166829109192, "learning_rate": 2e-05, "loss": 0.03302848, "step": 16684 }, { "epoch": 33.37, "grad_norm": 1.7563289403915405, "learning_rate": 2e-05, "loss": 0.03998546, "step": 16685 }, { "epoch": 33.372, "grad_norm": 1.0916879177093506, "learning_rate": 2e-05, "loss": 0.03832256, "step": 16686 }, { "epoch": 33.374, "grad_norm": 1.0437517166137695, "learning_rate": 2e-05, "loss": 0.04042347, "step": 16687 }, { "epoch": 33.376, "grad_norm": 2.85295033454895, "learning_rate": 2e-05, "loss": 0.04512815, "step": 16688 }, { "epoch": 33.378, "grad_norm": 1.3456438779830933, "learning_rate": 2e-05, "loss": 0.05866044, "step": 16689 }, { "epoch": 33.38, "grad_norm": 1.2579126358032227, "learning_rate": 2e-05, "loss": 0.04194574, "step": 16690 }, { "epoch": 33.382, "grad_norm": 1.2284680604934692, "learning_rate": 2e-05, "loss": 0.04293683, "step": 16691 }, { "epoch": 33.384, "grad_norm": 1.0738353729248047, "learning_rate": 2e-05, "loss": 0.03847217, "step": 16692 }, { "epoch": 33.386, "grad_norm": 0.9319986701011658, "learning_rate": 2e-05, "loss": 0.02663419, "step": 16693 }, { "epoch": 33.388, "grad_norm": 2.2574198246002197, "learning_rate": 2e-05, "loss": 0.03809108, "step": 16694 }, { "epoch": 33.39, "grad_norm": 1.3886204957962036, "learning_rate": 2e-05, "loss": 0.04164458, "step": 16695 }, { "epoch": 33.392, "grad_norm": 1.673781156539917, "learning_rate": 2e-05, "loss": 0.04233087, "step": 16696 }, { "epoch": 33.394, "grad_norm": 1.2593107223510742, "learning_rate": 2e-05, "loss": 0.04103605, "step": 16697 }, { "epoch": 33.396, "grad_norm": 2.0994770526885986, "learning_rate": 2e-05, "loss": 0.0581164, "step": 16698 }, { "epoch": 33.398, "grad_norm": 1.1525026559829712, "learning_rate": 2e-05, "loss": 0.0525818, "step": 16699 }, { "epoch": 33.4, "grad_norm": 1.141378402709961, "learning_rate": 2e-05, "loss": 0.05826852, "step": 16700 }, { "epoch": 33.402, "grad_norm": 1.3574016094207764, "learning_rate": 2e-05, "loss": 0.05125176, "step": 16701 }, { "epoch": 33.404, "grad_norm": 1.2207424640655518, "learning_rate": 2e-05, "loss": 0.03801119, "step": 16702 }, { "epoch": 33.406, "grad_norm": 1.1650470495224, "learning_rate": 2e-05, "loss": 0.04888697, "step": 16703 }, { "epoch": 33.408, "grad_norm": 1.325939416885376, "learning_rate": 2e-05, "loss": 0.05456465, "step": 16704 }, { "epoch": 33.41, "grad_norm": 1.076897382736206, "learning_rate": 2e-05, "loss": 0.04185375, "step": 16705 }, { "epoch": 33.412, "grad_norm": 1.0933011770248413, "learning_rate": 2e-05, "loss": 0.03941853, "step": 16706 }, { "epoch": 33.414, "grad_norm": 1.4302270412445068, "learning_rate": 2e-05, "loss": 0.04674981, "step": 16707 }, { "epoch": 33.416, "grad_norm": 5.180481910705566, "learning_rate": 2e-05, "loss": 0.05447872, "step": 16708 }, { "epoch": 33.418, "grad_norm": 0.997200071811676, "learning_rate": 2e-05, "loss": 0.04152042, "step": 16709 }, { "epoch": 33.42, "grad_norm": 1.2832751274108887, "learning_rate": 2e-05, "loss": 0.05069866, "step": 16710 }, { "epoch": 33.422, "grad_norm": 1.0193212032318115, "learning_rate": 2e-05, "loss": 0.0379979, "step": 16711 }, { "epoch": 33.424, "grad_norm": 1.0952942371368408, "learning_rate": 2e-05, "loss": 0.03752263, "step": 16712 }, { "epoch": 33.426, "grad_norm": 1.048114538192749, "learning_rate": 2e-05, "loss": 0.05350174, "step": 16713 }, { "epoch": 33.428, "grad_norm": 1.142867088317871, "learning_rate": 2e-05, "loss": 0.04559736, "step": 16714 }, { "epoch": 33.43, "grad_norm": 1.1532340049743652, "learning_rate": 2e-05, "loss": 0.03369208, "step": 16715 }, { "epoch": 33.432, "grad_norm": 2.2827866077423096, "learning_rate": 2e-05, "loss": 0.04446258, "step": 16716 }, { "epoch": 33.434, "grad_norm": 1.1983774900436401, "learning_rate": 2e-05, "loss": 0.04126913, "step": 16717 }, { "epoch": 33.436, "grad_norm": 1.3406853675842285, "learning_rate": 2e-05, "loss": 0.03477804, "step": 16718 }, { "epoch": 33.438, "grad_norm": 1.4163920879364014, "learning_rate": 2e-05, "loss": 0.05400594, "step": 16719 }, { "epoch": 33.44, "grad_norm": 1.202052116394043, "learning_rate": 2e-05, "loss": 0.04666064, "step": 16720 }, { "epoch": 33.442, "grad_norm": 1.020628809928894, "learning_rate": 2e-05, "loss": 0.03358646, "step": 16721 }, { "epoch": 33.444, "grad_norm": 1.0128796100616455, "learning_rate": 2e-05, "loss": 0.03198342, "step": 16722 }, { "epoch": 33.446, "grad_norm": 1.3667223453521729, "learning_rate": 2e-05, "loss": 0.04862178, "step": 16723 }, { "epoch": 33.448, "grad_norm": 1.0447344779968262, "learning_rate": 2e-05, "loss": 0.0359532, "step": 16724 }, { "epoch": 33.45, "grad_norm": 1.2238010168075562, "learning_rate": 2e-05, "loss": 0.03374971, "step": 16725 }, { "epoch": 33.452, "grad_norm": 1.7347222566604614, "learning_rate": 2e-05, "loss": 0.05317023, "step": 16726 }, { "epoch": 33.454, "grad_norm": 1.2322536706924438, "learning_rate": 2e-05, "loss": 0.05196625, "step": 16727 }, { "epoch": 33.456, "grad_norm": 1.0215041637420654, "learning_rate": 2e-05, "loss": 0.03156345, "step": 16728 }, { "epoch": 33.458, "grad_norm": 1.5477778911590576, "learning_rate": 2e-05, "loss": 0.04214332, "step": 16729 }, { "epoch": 33.46, "grad_norm": 1.1270982027053833, "learning_rate": 2e-05, "loss": 0.04706374, "step": 16730 }, { "epoch": 33.462, "grad_norm": 1.2014801502227783, "learning_rate": 2e-05, "loss": 0.04483427, "step": 16731 }, { "epoch": 33.464, "grad_norm": 1.4638102054595947, "learning_rate": 2e-05, "loss": 0.05931569, "step": 16732 }, { "epoch": 33.466, "grad_norm": 1.084399700164795, "learning_rate": 2e-05, "loss": 0.04061939, "step": 16733 }, { "epoch": 33.468, "grad_norm": 1.254996657371521, "learning_rate": 2e-05, "loss": 0.03688417, "step": 16734 }, { "epoch": 33.47, "grad_norm": 1.2726435661315918, "learning_rate": 2e-05, "loss": 0.04665891, "step": 16735 }, { "epoch": 33.472, "grad_norm": 1.0594907999038696, "learning_rate": 2e-05, "loss": 0.03953811, "step": 16736 }, { "epoch": 33.474, "grad_norm": 1.8973904848098755, "learning_rate": 2e-05, "loss": 0.04582148, "step": 16737 }, { "epoch": 33.476, "grad_norm": 1.1382102966308594, "learning_rate": 2e-05, "loss": 0.03187426, "step": 16738 }, { "epoch": 33.478, "grad_norm": 1.8633939027786255, "learning_rate": 2e-05, "loss": 0.04606871, "step": 16739 }, { "epoch": 33.48, "grad_norm": 1.2930206060409546, "learning_rate": 2e-05, "loss": 0.03444067, "step": 16740 }, { "epoch": 33.482, "grad_norm": 1.696944236755371, "learning_rate": 2e-05, "loss": 0.04227548, "step": 16741 }, { "epoch": 33.484, "grad_norm": 1.115965723991394, "learning_rate": 2e-05, "loss": 0.03508355, "step": 16742 }, { "epoch": 33.486, "grad_norm": 1.2587809562683105, "learning_rate": 2e-05, "loss": 0.03677014, "step": 16743 }, { "epoch": 33.488, "grad_norm": 1.0906723737716675, "learning_rate": 2e-05, "loss": 0.03932015, "step": 16744 }, { "epoch": 33.49, "grad_norm": 1.193812370300293, "learning_rate": 2e-05, "loss": 0.04173867, "step": 16745 }, { "epoch": 33.492, "grad_norm": 1.3376166820526123, "learning_rate": 2e-05, "loss": 0.03034848, "step": 16746 }, { "epoch": 33.494, "grad_norm": 1.1713814735412598, "learning_rate": 2e-05, "loss": 0.04791263, "step": 16747 }, { "epoch": 33.496, "grad_norm": 1.723981261253357, "learning_rate": 2e-05, "loss": 0.04253115, "step": 16748 }, { "epoch": 33.498, "grad_norm": 1.1139684915542603, "learning_rate": 2e-05, "loss": 0.04166551, "step": 16749 }, { "epoch": 33.5, "grad_norm": 1.292219877243042, "learning_rate": 2e-05, "loss": 0.03971953, "step": 16750 }, { "epoch": 33.502, "grad_norm": 1.2598776817321777, "learning_rate": 2e-05, "loss": 0.04661015, "step": 16751 }, { "epoch": 33.504, "grad_norm": 1.248761534690857, "learning_rate": 2e-05, "loss": 0.04709421, "step": 16752 }, { "epoch": 33.506, "grad_norm": 1.290848970413208, "learning_rate": 2e-05, "loss": 0.04546386, "step": 16753 }, { "epoch": 33.508, "grad_norm": 3.039777994155884, "learning_rate": 2e-05, "loss": 0.04197717, "step": 16754 }, { "epoch": 33.51, "grad_norm": 1.0628236532211304, "learning_rate": 2e-05, "loss": 0.03273289, "step": 16755 }, { "epoch": 33.512, "grad_norm": 1.1584326028823853, "learning_rate": 2e-05, "loss": 0.03990288, "step": 16756 }, { "epoch": 33.514, "grad_norm": 1.013412356376648, "learning_rate": 2e-05, "loss": 0.02777958, "step": 16757 }, { "epoch": 33.516, "grad_norm": 2.4592092037200928, "learning_rate": 2e-05, "loss": 0.03798764, "step": 16758 }, { "epoch": 33.518, "grad_norm": 1.4188690185546875, "learning_rate": 2e-05, "loss": 0.04493807, "step": 16759 }, { "epoch": 33.52, "grad_norm": 4.044872760772705, "learning_rate": 2e-05, "loss": 0.05675915, "step": 16760 }, { "epoch": 33.522, "grad_norm": 1.4888228178024292, "learning_rate": 2e-05, "loss": 0.039264, "step": 16761 }, { "epoch": 33.524, "grad_norm": 1.2201298475265503, "learning_rate": 2e-05, "loss": 0.04121239, "step": 16762 }, { "epoch": 33.526, "grad_norm": 1.0518842935562134, "learning_rate": 2e-05, "loss": 0.03466121, "step": 16763 }, { "epoch": 33.528, "grad_norm": 0.8494116067886353, "learning_rate": 2e-05, "loss": 0.02845479, "step": 16764 }, { "epoch": 33.53, "grad_norm": 1.2486729621887207, "learning_rate": 2e-05, "loss": 0.0528804, "step": 16765 }, { "epoch": 33.532, "grad_norm": 1.1569898128509521, "learning_rate": 2e-05, "loss": 0.03768142, "step": 16766 }, { "epoch": 33.534, "grad_norm": 0.9712448120117188, "learning_rate": 2e-05, "loss": 0.03684617, "step": 16767 }, { "epoch": 33.536, "grad_norm": 3.453179121017456, "learning_rate": 2e-05, "loss": 0.05391577, "step": 16768 }, { "epoch": 33.538, "grad_norm": 1.2368735074996948, "learning_rate": 2e-05, "loss": 0.04700865, "step": 16769 }, { "epoch": 33.54, "grad_norm": 1.745194435119629, "learning_rate": 2e-05, "loss": 0.04905911, "step": 16770 }, { "epoch": 33.542, "grad_norm": 1.4005979299545288, "learning_rate": 2e-05, "loss": 0.05808295, "step": 16771 }, { "epoch": 33.544, "grad_norm": 1.6781764030456543, "learning_rate": 2e-05, "loss": 0.03419307, "step": 16772 }, { "epoch": 33.546, "grad_norm": 1.5262959003448486, "learning_rate": 2e-05, "loss": 0.04930171, "step": 16773 }, { "epoch": 33.548, "grad_norm": 1.185598373413086, "learning_rate": 2e-05, "loss": 0.04440915, "step": 16774 }, { "epoch": 33.55, "grad_norm": 1.0013930797576904, "learning_rate": 2e-05, "loss": 0.03913631, "step": 16775 }, { "epoch": 33.552, "grad_norm": 1.5372309684753418, "learning_rate": 2e-05, "loss": 0.04706179, "step": 16776 }, { "epoch": 33.554, "grad_norm": 1.3070640563964844, "learning_rate": 2e-05, "loss": 0.03147201, "step": 16777 }, { "epoch": 33.556, "grad_norm": 1.387190818786621, "learning_rate": 2e-05, "loss": 0.04918057, "step": 16778 }, { "epoch": 33.558, "grad_norm": 2.1223700046539307, "learning_rate": 2e-05, "loss": 0.05167126, "step": 16779 }, { "epoch": 33.56, "grad_norm": 1.3010870218276978, "learning_rate": 2e-05, "loss": 0.04664216, "step": 16780 }, { "epoch": 33.562, "grad_norm": 1.6655634641647339, "learning_rate": 2e-05, "loss": 0.04345709, "step": 16781 }, { "epoch": 33.564, "grad_norm": 2.2487990856170654, "learning_rate": 2e-05, "loss": 0.04758396, "step": 16782 }, { "epoch": 33.566, "grad_norm": 2.5132153034210205, "learning_rate": 2e-05, "loss": 0.05307368, "step": 16783 }, { "epoch": 33.568, "grad_norm": 2.023242235183716, "learning_rate": 2e-05, "loss": 0.03659308, "step": 16784 }, { "epoch": 33.57, "grad_norm": 1.0173568725585938, "learning_rate": 2e-05, "loss": 0.03574786, "step": 16785 }, { "epoch": 33.572, "grad_norm": 1.4924144744873047, "learning_rate": 2e-05, "loss": 0.04335709, "step": 16786 }, { "epoch": 33.574, "grad_norm": 1.2708486318588257, "learning_rate": 2e-05, "loss": 0.04690626, "step": 16787 }, { "epoch": 33.576, "grad_norm": 1.9800394773483276, "learning_rate": 2e-05, "loss": 0.04107783, "step": 16788 }, { "epoch": 33.578, "grad_norm": 2.4189324378967285, "learning_rate": 2e-05, "loss": 0.05079643, "step": 16789 }, { "epoch": 33.58, "grad_norm": 1.4563528299331665, "learning_rate": 2e-05, "loss": 0.04888935, "step": 16790 }, { "epoch": 33.582, "grad_norm": 2.460385799407959, "learning_rate": 2e-05, "loss": 0.04483432, "step": 16791 }, { "epoch": 33.584, "grad_norm": 2.3535897731781006, "learning_rate": 2e-05, "loss": 0.0520683, "step": 16792 }, { "epoch": 33.586, "grad_norm": 1.497182846069336, "learning_rate": 2e-05, "loss": 0.04292556, "step": 16793 }, { "epoch": 33.588, "grad_norm": 2.476083993911743, "learning_rate": 2e-05, "loss": 0.04846964, "step": 16794 }, { "epoch": 33.59, "grad_norm": 1.223767876625061, "learning_rate": 2e-05, "loss": 0.06145135, "step": 16795 }, { "epoch": 33.592, "grad_norm": 2.5422379970550537, "learning_rate": 2e-05, "loss": 0.05470592, "step": 16796 }, { "epoch": 33.594, "grad_norm": 1.644066333770752, "learning_rate": 2e-05, "loss": 0.03853694, "step": 16797 }, { "epoch": 33.596, "grad_norm": 1.5902693271636963, "learning_rate": 2e-05, "loss": 0.04749844, "step": 16798 }, { "epoch": 33.598, "grad_norm": 0.8459369540214539, "learning_rate": 2e-05, "loss": 0.02365252, "step": 16799 }, { "epoch": 33.6, "grad_norm": 0.9091578722000122, "learning_rate": 2e-05, "loss": 0.03125982, "step": 16800 }, { "epoch": 33.602, "grad_norm": 1.5347368717193604, "learning_rate": 2e-05, "loss": 0.04600755, "step": 16801 }, { "epoch": 33.604, "grad_norm": 1.4432686567306519, "learning_rate": 2e-05, "loss": 0.04101708, "step": 16802 }, { "epoch": 33.606, "grad_norm": 1.1293833255767822, "learning_rate": 2e-05, "loss": 0.04084689, "step": 16803 }, { "epoch": 33.608, "grad_norm": 1.2917184829711914, "learning_rate": 2e-05, "loss": 0.04250922, "step": 16804 }, { "epoch": 33.61, "grad_norm": 3.9780828952789307, "learning_rate": 2e-05, "loss": 0.03426572, "step": 16805 }, { "epoch": 33.612, "grad_norm": 2.3805651664733887, "learning_rate": 2e-05, "loss": 0.03994187, "step": 16806 }, { "epoch": 33.614, "grad_norm": 1.2909883260726929, "learning_rate": 2e-05, "loss": 0.0498014, "step": 16807 }, { "epoch": 33.616, "grad_norm": 1.0932729244232178, "learning_rate": 2e-05, "loss": 0.03009418, "step": 16808 }, { "epoch": 33.618, "grad_norm": 1.3504739999771118, "learning_rate": 2e-05, "loss": 0.04348255, "step": 16809 }, { "epoch": 33.62, "grad_norm": 1.2880327701568604, "learning_rate": 2e-05, "loss": 0.05897732, "step": 16810 }, { "epoch": 33.622, "grad_norm": 1.2188899517059326, "learning_rate": 2e-05, "loss": 0.04643267, "step": 16811 }, { "epoch": 33.624, "grad_norm": 1.2222504615783691, "learning_rate": 2e-05, "loss": 0.03556299, "step": 16812 }, { "epoch": 33.626, "grad_norm": 1.506365418434143, "learning_rate": 2e-05, "loss": 0.03758072, "step": 16813 }, { "epoch": 33.628, "grad_norm": 1.1334285736083984, "learning_rate": 2e-05, "loss": 0.03652769, "step": 16814 }, { "epoch": 33.63, "grad_norm": 1.8486438989639282, "learning_rate": 2e-05, "loss": 0.03768915, "step": 16815 }, { "epoch": 33.632, "grad_norm": 2.419917345046997, "learning_rate": 2e-05, "loss": 0.05534883, "step": 16816 }, { "epoch": 33.634, "grad_norm": 1.1120336055755615, "learning_rate": 2e-05, "loss": 0.02930129, "step": 16817 }, { "epoch": 33.636, "grad_norm": 0.9981685280799866, "learning_rate": 2e-05, "loss": 0.03070155, "step": 16818 }, { "epoch": 33.638, "grad_norm": 1.1295273303985596, "learning_rate": 2e-05, "loss": 0.04213031, "step": 16819 }, { "epoch": 33.64, "grad_norm": 3.216989278793335, "learning_rate": 2e-05, "loss": 0.03833716, "step": 16820 }, { "epoch": 33.642, "grad_norm": 2.445704936981201, "learning_rate": 2e-05, "loss": 0.04210251, "step": 16821 }, { "epoch": 33.644, "grad_norm": 1.0248154401779175, "learning_rate": 2e-05, "loss": 0.03077208, "step": 16822 }, { "epoch": 33.646, "grad_norm": 1.1069806814193726, "learning_rate": 2e-05, "loss": 0.04635371, "step": 16823 }, { "epoch": 33.648, "grad_norm": 2.114872455596924, "learning_rate": 2e-05, "loss": 0.06503963, "step": 16824 }, { "epoch": 33.65, "grad_norm": 1.2636666297912598, "learning_rate": 2e-05, "loss": 0.02907618, "step": 16825 }, { "epoch": 33.652, "grad_norm": 1.651583194732666, "learning_rate": 2e-05, "loss": 0.03353261, "step": 16826 }, { "epoch": 33.654, "grad_norm": 1.2803378105163574, "learning_rate": 2e-05, "loss": 0.03870942, "step": 16827 }, { "epoch": 33.656, "grad_norm": 1.2577418088912964, "learning_rate": 2e-05, "loss": 0.04237111, "step": 16828 }, { "epoch": 33.658, "grad_norm": 1.1378142833709717, "learning_rate": 2e-05, "loss": 0.04286456, "step": 16829 }, { "epoch": 33.66, "grad_norm": 2.4869840145111084, "learning_rate": 2e-05, "loss": 0.03604544, "step": 16830 }, { "epoch": 33.662, "grad_norm": 1.4990627765655518, "learning_rate": 2e-05, "loss": 0.03898953, "step": 16831 }, { "epoch": 33.664, "grad_norm": 1.4893847703933716, "learning_rate": 2e-05, "loss": 0.040364, "step": 16832 }, { "epoch": 33.666, "grad_norm": 1.1118310689926147, "learning_rate": 2e-05, "loss": 0.03826702, "step": 16833 }, { "epoch": 33.668, "grad_norm": 1.1041016578674316, "learning_rate": 2e-05, "loss": 0.04368906, "step": 16834 }, { "epoch": 33.67, "grad_norm": 1.1376192569732666, "learning_rate": 2e-05, "loss": 0.04184592, "step": 16835 }, { "epoch": 33.672, "grad_norm": 1.2078814506530762, "learning_rate": 2e-05, "loss": 0.04568657, "step": 16836 }, { "epoch": 33.674, "grad_norm": 1.4587289094924927, "learning_rate": 2e-05, "loss": 0.04338841, "step": 16837 }, { "epoch": 33.676, "grad_norm": 1.011198878288269, "learning_rate": 2e-05, "loss": 0.03405341, "step": 16838 }, { "epoch": 33.678, "grad_norm": 1.286076307296753, "learning_rate": 2e-05, "loss": 0.04624006, "step": 16839 }, { "epoch": 33.68, "grad_norm": 1.1408072710037231, "learning_rate": 2e-05, "loss": 0.04370711, "step": 16840 }, { "epoch": 33.682, "grad_norm": 1.0561197996139526, "learning_rate": 2e-05, "loss": 0.04226065, "step": 16841 }, { "epoch": 33.684, "grad_norm": 1.5302330255508423, "learning_rate": 2e-05, "loss": 0.05760441, "step": 16842 }, { "epoch": 33.686, "grad_norm": 1.3950103521347046, "learning_rate": 2e-05, "loss": 0.06257196, "step": 16843 }, { "epoch": 33.688, "grad_norm": 1.3161698579788208, "learning_rate": 2e-05, "loss": 0.04578729, "step": 16844 }, { "epoch": 33.69, "grad_norm": 1.1795623302459717, "learning_rate": 2e-05, "loss": 0.02204716, "step": 16845 }, { "epoch": 33.692, "grad_norm": 1.0264464616775513, "learning_rate": 2e-05, "loss": 0.04933001, "step": 16846 }, { "epoch": 33.694, "grad_norm": 1.3931221961975098, "learning_rate": 2e-05, "loss": 0.04392222, "step": 16847 }, { "epoch": 33.696, "grad_norm": 0.9317388534545898, "learning_rate": 2e-05, "loss": 0.02685932, "step": 16848 }, { "epoch": 33.698, "grad_norm": 0.9781988263130188, "learning_rate": 2e-05, "loss": 0.03584233, "step": 16849 }, { "epoch": 33.7, "grad_norm": 1.2689793109893799, "learning_rate": 2e-05, "loss": 0.04835085, "step": 16850 }, { "epoch": 33.702, "grad_norm": 1.4062492847442627, "learning_rate": 2e-05, "loss": 0.04714158, "step": 16851 }, { "epoch": 33.704, "grad_norm": 3.2041304111480713, "learning_rate": 2e-05, "loss": 0.05374725, "step": 16852 }, { "epoch": 33.706, "grad_norm": 1.2796287536621094, "learning_rate": 2e-05, "loss": 0.0468824, "step": 16853 }, { "epoch": 33.708, "grad_norm": 2.208021640777588, "learning_rate": 2e-05, "loss": 0.05651149, "step": 16854 }, { "epoch": 33.71, "grad_norm": 1.8455325365066528, "learning_rate": 2e-05, "loss": 0.0509971, "step": 16855 }, { "epoch": 33.712, "grad_norm": 0.9580972790718079, "learning_rate": 2e-05, "loss": 0.02455459, "step": 16856 }, { "epoch": 33.714, "grad_norm": 1.2142010927200317, "learning_rate": 2e-05, "loss": 0.03981315, "step": 16857 }, { "epoch": 33.716, "grad_norm": 1.0216953754425049, "learning_rate": 2e-05, "loss": 0.0417695, "step": 16858 }, { "epoch": 33.718, "grad_norm": 1.1976450681686401, "learning_rate": 2e-05, "loss": 0.02827924, "step": 16859 }, { "epoch": 33.72, "grad_norm": 1.1605479717254639, "learning_rate": 2e-05, "loss": 0.04454171, "step": 16860 }, { "epoch": 33.722, "grad_norm": 1.382978081703186, "learning_rate": 2e-05, "loss": 0.06117807, "step": 16861 }, { "epoch": 33.724, "grad_norm": 0.8501904010772705, "learning_rate": 2e-05, "loss": 0.0254951, "step": 16862 }, { "epoch": 33.726, "grad_norm": 1.1386749744415283, "learning_rate": 2e-05, "loss": 0.04194954, "step": 16863 }, { "epoch": 33.728, "grad_norm": 0.990307092666626, "learning_rate": 2e-05, "loss": 0.03089491, "step": 16864 }, { "epoch": 33.73, "grad_norm": 0.9442136287689209, "learning_rate": 2e-05, "loss": 0.03526897, "step": 16865 }, { "epoch": 33.732, "grad_norm": 1.119691014289856, "learning_rate": 2e-05, "loss": 0.03854529, "step": 16866 }, { "epoch": 33.734, "grad_norm": 1.748223066329956, "learning_rate": 2e-05, "loss": 0.02689804, "step": 16867 }, { "epoch": 33.736, "grad_norm": 1.6101315021514893, "learning_rate": 2e-05, "loss": 0.04781541, "step": 16868 }, { "epoch": 33.738, "grad_norm": 1.0552995204925537, "learning_rate": 2e-05, "loss": 0.03037945, "step": 16869 }, { "epoch": 33.74, "grad_norm": 1.2612851858139038, "learning_rate": 2e-05, "loss": 0.0377595, "step": 16870 }, { "epoch": 33.742, "grad_norm": 0.9552090167999268, "learning_rate": 2e-05, "loss": 0.02962127, "step": 16871 }, { "epoch": 33.744, "grad_norm": 0.9637928009033203, "learning_rate": 2e-05, "loss": 0.04046572, "step": 16872 }, { "epoch": 33.746, "grad_norm": 1.0410312414169312, "learning_rate": 2e-05, "loss": 0.04488287, "step": 16873 }, { "epoch": 33.748, "grad_norm": 1.2417147159576416, "learning_rate": 2e-05, "loss": 0.05209735, "step": 16874 }, { "epoch": 33.75, "grad_norm": 1.3345032930374146, "learning_rate": 2e-05, "loss": 0.06243813, "step": 16875 }, { "epoch": 33.752, "grad_norm": 0.8462825417518616, "learning_rate": 2e-05, "loss": 0.02463733, "step": 16876 }, { "epoch": 33.754, "grad_norm": 1.9590753316879272, "learning_rate": 2e-05, "loss": 0.05277262, "step": 16877 }, { "epoch": 33.756, "grad_norm": 1.141790747642517, "learning_rate": 2e-05, "loss": 0.04721228, "step": 16878 }, { "epoch": 33.758, "grad_norm": 0.9426037669181824, "learning_rate": 2e-05, "loss": 0.03288198, "step": 16879 }, { "epoch": 33.76, "grad_norm": 1.2627002000808716, "learning_rate": 2e-05, "loss": 0.05176467, "step": 16880 }, { "epoch": 33.762, "grad_norm": 1.8264248371124268, "learning_rate": 2e-05, "loss": 0.04593317, "step": 16881 }, { "epoch": 33.764, "grad_norm": 1.3055675029754639, "learning_rate": 2e-05, "loss": 0.06014679, "step": 16882 }, { "epoch": 33.766, "grad_norm": 1.199167013168335, "learning_rate": 2e-05, "loss": 0.05326901, "step": 16883 }, { "epoch": 33.768, "grad_norm": 1.399657964706421, "learning_rate": 2e-05, "loss": 0.03976363, "step": 16884 }, { "epoch": 33.77, "grad_norm": 1.4370189905166626, "learning_rate": 2e-05, "loss": 0.04173758, "step": 16885 }, { "epoch": 33.772, "grad_norm": 1.3217477798461914, "learning_rate": 2e-05, "loss": 0.04312097, "step": 16886 }, { "epoch": 33.774, "grad_norm": 1.0405267477035522, "learning_rate": 2e-05, "loss": 0.02572107, "step": 16887 }, { "epoch": 33.776, "grad_norm": 3.8962230682373047, "learning_rate": 2e-05, "loss": 0.05806974, "step": 16888 }, { "epoch": 33.778, "grad_norm": 1.183304786682129, "learning_rate": 2e-05, "loss": 0.05114609, "step": 16889 }, { "epoch": 33.78, "grad_norm": 1.0144455432891846, "learning_rate": 2e-05, "loss": 0.03402551, "step": 16890 }, { "epoch": 33.782, "grad_norm": 0.912356436252594, "learning_rate": 2e-05, "loss": 0.03162487, "step": 16891 }, { "epoch": 33.784, "grad_norm": 1.5084586143493652, "learning_rate": 2e-05, "loss": 0.04724879, "step": 16892 }, { "epoch": 33.786, "grad_norm": 1.081552267074585, "learning_rate": 2e-05, "loss": 0.03258973, "step": 16893 }, { "epoch": 33.788, "grad_norm": 1.0721511840820312, "learning_rate": 2e-05, "loss": 0.04917396, "step": 16894 }, { "epoch": 33.79, "grad_norm": 1.1885251998901367, "learning_rate": 2e-05, "loss": 0.04815122, "step": 16895 }, { "epoch": 33.792, "grad_norm": 1.4012700319290161, "learning_rate": 2e-05, "loss": 0.04503307, "step": 16896 }, { "epoch": 33.794, "grad_norm": 1.018221378326416, "learning_rate": 2e-05, "loss": 0.03371017, "step": 16897 }, { "epoch": 33.796, "grad_norm": 1.1117615699768066, "learning_rate": 2e-05, "loss": 0.04455762, "step": 16898 }, { "epoch": 33.798, "grad_norm": 2.338944435119629, "learning_rate": 2e-05, "loss": 0.05771154, "step": 16899 }, { "epoch": 33.8, "grad_norm": 1.3427327871322632, "learning_rate": 2e-05, "loss": 0.04563061, "step": 16900 }, { "epoch": 33.802, "grad_norm": 1.598863124847412, "learning_rate": 2e-05, "loss": 0.04085599, "step": 16901 }, { "epoch": 33.804, "grad_norm": 4.519804000854492, "learning_rate": 2e-05, "loss": 0.05506884, "step": 16902 }, { "epoch": 33.806, "grad_norm": 1.7649790048599243, "learning_rate": 2e-05, "loss": 0.04375284, "step": 16903 }, { "epoch": 33.808, "grad_norm": 0.7932525873184204, "learning_rate": 2e-05, "loss": 0.02062046, "step": 16904 }, { "epoch": 33.81, "grad_norm": 1.4744629859924316, "learning_rate": 2e-05, "loss": 0.0412587, "step": 16905 }, { "epoch": 33.812, "grad_norm": 1.8036439418792725, "learning_rate": 2e-05, "loss": 0.04557544, "step": 16906 }, { "epoch": 33.814, "grad_norm": 1.2862471342086792, "learning_rate": 2e-05, "loss": 0.06118719, "step": 16907 }, { "epoch": 33.816, "grad_norm": 1.0730700492858887, "learning_rate": 2e-05, "loss": 0.03417275, "step": 16908 }, { "epoch": 33.818, "grad_norm": 1.6941379308700562, "learning_rate": 2e-05, "loss": 0.06889927, "step": 16909 }, { "epoch": 33.82, "grad_norm": 1.0613124370574951, "learning_rate": 2e-05, "loss": 0.04204726, "step": 16910 }, { "epoch": 33.822, "grad_norm": 1.436385154724121, "learning_rate": 2e-05, "loss": 0.05231769, "step": 16911 }, { "epoch": 33.824, "grad_norm": 1.1055042743682861, "learning_rate": 2e-05, "loss": 0.02975886, "step": 16912 }, { "epoch": 33.826, "grad_norm": 1.3664203882217407, "learning_rate": 2e-05, "loss": 0.0504609, "step": 16913 }, { "epoch": 33.828, "grad_norm": 1.1539993286132812, "learning_rate": 2e-05, "loss": 0.04137579, "step": 16914 }, { "epoch": 33.83, "grad_norm": 1.266783595085144, "learning_rate": 2e-05, "loss": 0.04526065, "step": 16915 }, { "epoch": 33.832, "grad_norm": 1.3044017553329468, "learning_rate": 2e-05, "loss": 0.04141079, "step": 16916 }, { "epoch": 33.834, "grad_norm": 2.1756279468536377, "learning_rate": 2e-05, "loss": 0.04577321, "step": 16917 }, { "epoch": 33.836, "grad_norm": 1.8856961727142334, "learning_rate": 2e-05, "loss": 0.04963861, "step": 16918 }, { "epoch": 33.838, "grad_norm": 1.0676918029785156, "learning_rate": 2e-05, "loss": 0.04658003, "step": 16919 }, { "epoch": 33.84, "grad_norm": 2.096513509750366, "learning_rate": 2e-05, "loss": 0.04904509, "step": 16920 }, { "epoch": 33.842, "grad_norm": 1.2753912210464478, "learning_rate": 2e-05, "loss": 0.05007047, "step": 16921 }, { "epoch": 33.844, "grad_norm": 0.9849660396575928, "learning_rate": 2e-05, "loss": 0.03128029, "step": 16922 }, { "epoch": 33.846, "grad_norm": 1.4278568029403687, "learning_rate": 2e-05, "loss": 0.04545929, "step": 16923 }, { "epoch": 33.848, "grad_norm": 1.4661980867385864, "learning_rate": 2e-05, "loss": 0.05411399, "step": 16924 }, { "epoch": 33.85, "grad_norm": 1.2466408014297485, "learning_rate": 2e-05, "loss": 0.04471597, "step": 16925 }, { "epoch": 33.852, "grad_norm": 0.9380550980567932, "learning_rate": 2e-05, "loss": 0.04102387, "step": 16926 }, { "epoch": 33.854, "grad_norm": 1.1571276187896729, "learning_rate": 2e-05, "loss": 0.05654067, "step": 16927 }, { "epoch": 33.856, "grad_norm": 0.9969253540039062, "learning_rate": 2e-05, "loss": 0.03924453, "step": 16928 }, { "epoch": 33.858, "grad_norm": 1.004270076751709, "learning_rate": 2e-05, "loss": 0.0299517, "step": 16929 }, { "epoch": 33.86, "grad_norm": 1.096602439880371, "learning_rate": 2e-05, "loss": 0.03355722, "step": 16930 }, { "epoch": 33.862, "grad_norm": 1.0416635274887085, "learning_rate": 2e-05, "loss": 0.03369167, "step": 16931 }, { "epoch": 33.864, "grad_norm": 1.327217698097229, "learning_rate": 2e-05, "loss": 0.05232648, "step": 16932 }, { "epoch": 33.866, "grad_norm": 1.9164153337478638, "learning_rate": 2e-05, "loss": 0.05434879, "step": 16933 }, { "epoch": 33.868, "grad_norm": 1.9355528354644775, "learning_rate": 2e-05, "loss": 0.04286207, "step": 16934 }, { "epoch": 33.87, "grad_norm": 2.0409860610961914, "learning_rate": 2e-05, "loss": 0.03842381, "step": 16935 }, { "epoch": 33.872, "grad_norm": 1.4562387466430664, "learning_rate": 2e-05, "loss": 0.03790183, "step": 16936 }, { "epoch": 33.874, "grad_norm": 1.2432488203048706, "learning_rate": 2e-05, "loss": 0.04522638, "step": 16937 }, { "epoch": 33.876, "grad_norm": 1.5313806533813477, "learning_rate": 2e-05, "loss": 0.04211116, "step": 16938 }, { "epoch": 33.878, "grad_norm": 1.330566644668579, "learning_rate": 2e-05, "loss": 0.04907233, "step": 16939 }, { "epoch": 33.88, "grad_norm": 1.2614879608154297, "learning_rate": 2e-05, "loss": 0.06006509, "step": 16940 }, { "epoch": 33.882, "grad_norm": 0.8992356061935425, "learning_rate": 2e-05, "loss": 0.03267233, "step": 16941 }, { "epoch": 33.884, "grad_norm": 1.0046213865280151, "learning_rate": 2e-05, "loss": 0.04911587, "step": 16942 }, { "epoch": 33.886, "grad_norm": 1.811383605003357, "learning_rate": 2e-05, "loss": 0.05342248, "step": 16943 }, { "epoch": 33.888, "grad_norm": 1.098055362701416, "learning_rate": 2e-05, "loss": 0.04004695, "step": 16944 }, { "epoch": 33.89, "grad_norm": 1.5141690969467163, "learning_rate": 2e-05, "loss": 0.05056385, "step": 16945 }, { "epoch": 33.892, "grad_norm": 1.2271699905395508, "learning_rate": 2e-05, "loss": 0.04924796, "step": 16946 }, { "epoch": 33.894, "grad_norm": 1.2581502199172974, "learning_rate": 2e-05, "loss": 0.03863451, "step": 16947 }, { "epoch": 33.896, "grad_norm": 0.9692806005477905, "learning_rate": 2e-05, "loss": 0.04194497, "step": 16948 }, { "epoch": 33.898, "grad_norm": 1.4562419652938843, "learning_rate": 2e-05, "loss": 0.04685233, "step": 16949 }, { "epoch": 33.9, "grad_norm": 1.32866632938385, "learning_rate": 2e-05, "loss": 0.05198713, "step": 16950 }, { "epoch": 33.902, "grad_norm": 1.1685373783111572, "learning_rate": 2e-05, "loss": 0.04833569, "step": 16951 }, { "epoch": 33.904, "grad_norm": 1.1611336469650269, "learning_rate": 2e-05, "loss": 0.04151731, "step": 16952 }, { "epoch": 33.906, "grad_norm": 1.1714519262313843, "learning_rate": 2e-05, "loss": 0.04272041, "step": 16953 }, { "epoch": 33.908, "grad_norm": 1.6427088975906372, "learning_rate": 2e-05, "loss": 0.05739013, "step": 16954 }, { "epoch": 33.91, "grad_norm": 1.11444890499115, "learning_rate": 2e-05, "loss": 0.03927352, "step": 16955 }, { "epoch": 33.912, "grad_norm": 1.4082242250442505, "learning_rate": 2e-05, "loss": 0.06680731, "step": 16956 }, { "epoch": 33.914, "grad_norm": 1.295926809310913, "learning_rate": 2e-05, "loss": 0.04506046, "step": 16957 }, { "epoch": 33.916, "grad_norm": 0.9405683279037476, "learning_rate": 2e-05, "loss": 0.02393782, "step": 16958 }, { "epoch": 33.918, "grad_norm": 1.58418607711792, "learning_rate": 2e-05, "loss": 0.04732141, "step": 16959 }, { "epoch": 33.92, "grad_norm": 1.6658512353897095, "learning_rate": 2e-05, "loss": 0.05234197, "step": 16960 }, { "epoch": 33.922, "grad_norm": 1.5868287086486816, "learning_rate": 2e-05, "loss": 0.05572698, "step": 16961 }, { "epoch": 33.924, "grad_norm": 1.2670631408691406, "learning_rate": 2e-05, "loss": 0.03493927, "step": 16962 }, { "epoch": 33.926, "grad_norm": 1.2648658752441406, "learning_rate": 2e-05, "loss": 0.04391746, "step": 16963 }, { "epoch": 33.928, "grad_norm": 1.5117779970169067, "learning_rate": 2e-05, "loss": 0.04314294, "step": 16964 }, { "epoch": 33.93, "grad_norm": 1.159271001815796, "learning_rate": 2e-05, "loss": 0.03362155, "step": 16965 }, { "epoch": 33.932, "grad_norm": 1.2142740488052368, "learning_rate": 2e-05, "loss": 0.03751262, "step": 16966 }, { "epoch": 33.934, "grad_norm": 1.2300341129302979, "learning_rate": 2e-05, "loss": 0.04216234, "step": 16967 }, { "epoch": 33.936, "grad_norm": 1.0165789127349854, "learning_rate": 2e-05, "loss": 0.03385535, "step": 16968 }, { "epoch": 33.938, "grad_norm": 1.1254189014434814, "learning_rate": 2e-05, "loss": 0.03804931, "step": 16969 }, { "epoch": 33.94, "grad_norm": 1.0231702327728271, "learning_rate": 2e-05, "loss": 0.03211974, "step": 16970 }, { "epoch": 33.942, "grad_norm": 1.1226533651351929, "learning_rate": 2e-05, "loss": 0.04243637, "step": 16971 }, { "epoch": 33.944, "grad_norm": 1.0575497150421143, "learning_rate": 2e-05, "loss": 0.03891562, "step": 16972 }, { "epoch": 33.946, "grad_norm": 1.3300598859786987, "learning_rate": 2e-05, "loss": 0.03813462, "step": 16973 }, { "epoch": 33.948, "grad_norm": 1.1231666803359985, "learning_rate": 2e-05, "loss": 0.04401132, "step": 16974 }, { "epoch": 33.95, "grad_norm": 1.0384533405303955, "learning_rate": 2e-05, "loss": 0.04073372, "step": 16975 }, { "epoch": 33.952, "grad_norm": 0.9705667495727539, "learning_rate": 2e-05, "loss": 0.0333796, "step": 16976 }, { "epoch": 33.954, "grad_norm": 3.3953044414520264, "learning_rate": 2e-05, "loss": 0.04748266, "step": 16977 }, { "epoch": 33.956, "grad_norm": 1.0151921510696411, "learning_rate": 2e-05, "loss": 0.03253239, "step": 16978 }, { "epoch": 33.958, "grad_norm": 1.612838864326477, "learning_rate": 2e-05, "loss": 0.04560025, "step": 16979 }, { "epoch": 33.96, "grad_norm": 1.2825559377670288, "learning_rate": 2e-05, "loss": 0.03292632, "step": 16980 }, { "epoch": 33.962, "grad_norm": 1.1484404802322388, "learning_rate": 2e-05, "loss": 0.03267542, "step": 16981 }, { "epoch": 33.964, "grad_norm": 1.3197839260101318, "learning_rate": 2e-05, "loss": 0.0385856, "step": 16982 }, { "epoch": 33.966, "grad_norm": 1.66212797164917, "learning_rate": 2e-05, "loss": 0.03775371, "step": 16983 }, { "epoch": 33.968, "grad_norm": 2.77333664894104, "learning_rate": 2e-05, "loss": 0.04835136, "step": 16984 }, { "epoch": 33.97, "grad_norm": 1.2204259634017944, "learning_rate": 2e-05, "loss": 0.03903702, "step": 16985 }, { "epoch": 33.972, "grad_norm": 1.9606164693832397, "learning_rate": 2e-05, "loss": 0.03565089, "step": 16986 }, { "epoch": 33.974, "grad_norm": 1.029175043106079, "learning_rate": 2e-05, "loss": 0.03104983, "step": 16987 }, { "epoch": 33.976, "grad_norm": 1.9764819145202637, "learning_rate": 2e-05, "loss": 0.04124163, "step": 16988 }, { "epoch": 33.978, "grad_norm": 1.2919042110443115, "learning_rate": 2e-05, "loss": 0.04482358, "step": 16989 }, { "epoch": 33.98, "grad_norm": 1.8748044967651367, "learning_rate": 2e-05, "loss": 0.05687953, "step": 16990 }, { "epoch": 33.982, "grad_norm": 1.5990813970565796, "learning_rate": 2e-05, "loss": 0.06885971, "step": 16991 }, { "epoch": 33.984, "grad_norm": 1.6569875478744507, "learning_rate": 2e-05, "loss": 0.02224008, "step": 16992 }, { "epoch": 33.986, "grad_norm": 1.5059123039245605, "learning_rate": 2e-05, "loss": 0.03797079, "step": 16993 }, { "epoch": 33.988, "grad_norm": 1.1320881843566895, "learning_rate": 2e-05, "loss": 0.04442799, "step": 16994 }, { "epoch": 33.99, "grad_norm": 1.8858788013458252, "learning_rate": 2e-05, "loss": 0.05333455, "step": 16995 }, { "epoch": 33.992, "grad_norm": 1.0770121812820435, "learning_rate": 2e-05, "loss": 0.04818095, "step": 16996 }, { "epoch": 33.994, "grad_norm": 2.505625009536743, "learning_rate": 2e-05, "loss": 0.04461709, "step": 16997 }, { "epoch": 33.996, "grad_norm": 1.5307080745697021, "learning_rate": 2e-05, "loss": 0.04006788, "step": 16998 }, { "epoch": 33.998, "grad_norm": 1.1944080591201782, "learning_rate": 2e-05, "loss": 0.05010471, "step": 16999 }, { "epoch": 34.0, "grad_norm": 1.0636547803878784, "learning_rate": 2e-05, "loss": 0.04512432, "step": 17000 }, { "epoch": 34.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9820359281437125, "Equal_1": 1.0, "Equal_2": 0.9860279441117764, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 0.996, "Perpendicular_2": 0.98, "Perpendicular_3": 0.905811623246493, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 319.634, "eval_samples_per_second": 32.85, "eval_steps_per_second": 0.657, "step": 17000 }, { "epoch": 34.002, "grad_norm": 1.0812188386917114, "learning_rate": 2e-05, "loss": 0.03332932, "step": 17001 }, { "epoch": 34.004, "grad_norm": 1.6815991401672363, "learning_rate": 2e-05, "loss": 0.04507479, "step": 17002 }, { "epoch": 34.006, "grad_norm": 1.3439853191375732, "learning_rate": 2e-05, "loss": 0.04140931, "step": 17003 }, { "epoch": 34.008, "grad_norm": 1.2833200693130493, "learning_rate": 2e-05, "loss": 0.03846888, "step": 17004 }, { "epoch": 34.01, "grad_norm": 1.3974056243896484, "learning_rate": 2e-05, "loss": 0.0499987, "step": 17005 }, { "epoch": 34.012, "grad_norm": 2.1579604148864746, "learning_rate": 2e-05, "loss": 0.04521484, "step": 17006 }, { "epoch": 34.014, "grad_norm": 1.3013967275619507, "learning_rate": 2e-05, "loss": 0.04818133, "step": 17007 }, { "epoch": 34.016, "grad_norm": 1.740728497505188, "learning_rate": 2e-05, "loss": 0.04121596, "step": 17008 }, { "epoch": 34.018, "grad_norm": 0.9428570866584778, "learning_rate": 2e-05, "loss": 0.03057436, "step": 17009 }, { "epoch": 34.02, "grad_norm": 1.1139823198318481, "learning_rate": 2e-05, "loss": 0.04498542, "step": 17010 }, { "epoch": 34.022, "grad_norm": 1.3437737226486206, "learning_rate": 2e-05, "loss": 0.04737519, "step": 17011 }, { "epoch": 34.024, "grad_norm": 1.2312670946121216, "learning_rate": 2e-05, "loss": 0.04723905, "step": 17012 }, { "epoch": 34.026, "grad_norm": 2.406672954559326, "learning_rate": 2e-05, "loss": 0.04161016, "step": 17013 }, { "epoch": 34.028, "grad_norm": 1.13112211227417, "learning_rate": 2e-05, "loss": 0.05677346, "step": 17014 }, { "epoch": 34.03, "grad_norm": 1.2599197626113892, "learning_rate": 2e-05, "loss": 0.05227666, "step": 17015 }, { "epoch": 34.032, "grad_norm": 1.2989979982376099, "learning_rate": 2e-05, "loss": 0.0471041, "step": 17016 }, { "epoch": 34.034, "grad_norm": 1.432118535041809, "learning_rate": 2e-05, "loss": 0.05415585, "step": 17017 }, { "epoch": 34.036, "grad_norm": 1.2104634046554565, "learning_rate": 2e-05, "loss": 0.04434905, "step": 17018 }, { "epoch": 34.038, "grad_norm": 1.2272220849990845, "learning_rate": 2e-05, "loss": 0.04309473, "step": 17019 }, { "epoch": 34.04, "grad_norm": 1.2818056344985962, "learning_rate": 2e-05, "loss": 0.04750851, "step": 17020 }, { "epoch": 34.042, "grad_norm": 1.795697808265686, "learning_rate": 2e-05, "loss": 0.04016794, "step": 17021 }, { "epoch": 34.044, "grad_norm": 1.261646032333374, "learning_rate": 2e-05, "loss": 0.05218561, "step": 17022 }, { "epoch": 34.046, "grad_norm": 1.2014031410217285, "learning_rate": 2e-05, "loss": 0.03123101, "step": 17023 }, { "epoch": 34.048, "grad_norm": 1.3816989660263062, "learning_rate": 2e-05, "loss": 0.05157385, "step": 17024 }, { "epoch": 34.05, "grad_norm": 0.9320183992385864, "learning_rate": 2e-05, "loss": 0.02841931, "step": 17025 }, { "epoch": 34.052, "grad_norm": 1.5845396518707275, "learning_rate": 2e-05, "loss": 0.0396069, "step": 17026 }, { "epoch": 34.054, "grad_norm": 1.7424157857894897, "learning_rate": 2e-05, "loss": 0.05160773, "step": 17027 }, { "epoch": 34.056, "grad_norm": 1.3250648975372314, "learning_rate": 2e-05, "loss": 0.03230084, "step": 17028 }, { "epoch": 34.058, "grad_norm": 1.3948570489883423, "learning_rate": 2e-05, "loss": 0.04338755, "step": 17029 }, { "epoch": 34.06, "grad_norm": 1.9198899269104004, "learning_rate": 2e-05, "loss": 0.03582556, "step": 17030 }, { "epoch": 34.062, "grad_norm": 1.0664349794387817, "learning_rate": 2e-05, "loss": 0.02794036, "step": 17031 }, { "epoch": 34.064, "grad_norm": 1.0144078731536865, "learning_rate": 2e-05, "loss": 0.03366199, "step": 17032 }, { "epoch": 34.066, "grad_norm": 1.4609384536743164, "learning_rate": 2e-05, "loss": 0.0822296, "step": 17033 }, { "epoch": 34.068, "grad_norm": 1.6045973300933838, "learning_rate": 2e-05, "loss": 0.04467433, "step": 17034 }, { "epoch": 34.07, "grad_norm": 2.1055264472961426, "learning_rate": 2e-05, "loss": 0.04265523, "step": 17035 }, { "epoch": 34.072, "grad_norm": 1.1740909814834595, "learning_rate": 2e-05, "loss": 0.04725765, "step": 17036 }, { "epoch": 34.074, "grad_norm": 1.264272928237915, "learning_rate": 2e-05, "loss": 0.04407164, "step": 17037 }, { "epoch": 34.076, "grad_norm": 1.4800697565078735, "learning_rate": 2e-05, "loss": 0.05361645, "step": 17038 }, { "epoch": 34.078, "grad_norm": 0.9612439870834351, "learning_rate": 2e-05, "loss": 0.02787845, "step": 17039 }, { "epoch": 34.08, "grad_norm": 1.2758971452713013, "learning_rate": 2e-05, "loss": 0.03535602, "step": 17040 }, { "epoch": 34.082, "grad_norm": 1.4546493291854858, "learning_rate": 2e-05, "loss": 0.03896, "step": 17041 }, { "epoch": 34.084, "grad_norm": 1.3334996700286865, "learning_rate": 2e-05, "loss": 0.0402444, "step": 17042 }, { "epoch": 34.086, "grad_norm": 0.9739708304405212, "learning_rate": 2e-05, "loss": 0.03635871, "step": 17043 }, { "epoch": 34.088, "grad_norm": 1.5088661909103394, "learning_rate": 2e-05, "loss": 0.03371816, "step": 17044 }, { "epoch": 34.09, "grad_norm": 1.130311131477356, "learning_rate": 2e-05, "loss": 0.03255319, "step": 17045 }, { "epoch": 34.092, "grad_norm": 1.0935840606689453, "learning_rate": 2e-05, "loss": 0.03417723, "step": 17046 }, { "epoch": 34.094, "grad_norm": 1.748842716217041, "learning_rate": 2e-05, "loss": 0.04565499, "step": 17047 }, { "epoch": 34.096, "grad_norm": 0.9785546064376831, "learning_rate": 2e-05, "loss": 0.02791492, "step": 17048 }, { "epoch": 34.098, "grad_norm": 1.1637617349624634, "learning_rate": 2e-05, "loss": 0.0296043, "step": 17049 }, { "epoch": 34.1, "grad_norm": 1.0762447118759155, "learning_rate": 2e-05, "loss": 0.02891765, "step": 17050 }, { "epoch": 34.102, "grad_norm": 0.9392781257629395, "learning_rate": 2e-05, "loss": 0.02610712, "step": 17051 }, { "epoch": 34.104, "grad_norm": 1.363243579864502, "learning_rate": 2e-05, "loss": 0.04912724, "step": 17052 }, { "epoch": 34.106, "grad_norm": 1.1277014017105103, "learning_rate": 2e-05, "loss": 0.0407737, "step": 17053 }, { "epoch": 34.108, "grad_norm": 1.3390322923660278, "learning_rate": 2e-05, "loss": 0.04672408, "step": 17054 }, { "epoch": 34.11, "grad_norm": 1.0482418537139893, "learning_rate": 2e-05, "loss": 0.03395636, "step": 17055 }, { "epoch": 34.112, "grad_norm": 1.1809817552566528, "learning_rate": 2e-05, "loss": 0.04168408, "step": 17056 }, { "epoch": 34.114, "grad_norm": 1.6997599601745605, "learning_rate": 2e-05, "loss": 0.07434915, "step": 17057 }, { "epoch": 34.116, "grad_norm": 1.3499462604522705, "learning_rate": 2e-05, "loss": 0.04132972, "step": 17058 }, { "epoch": 34.118, "grad_norm": 1.1026899814605713, "learning_rate": 2e-05, "loss": 0.04192564, "step": 17059 }, { "epoch": 34.12, "grad_norm": 1.6073766946792603, "learning_rate": 2e-05, "loss": 0.02773944, "step": 17060 }, { "epoch": 34.122, "grad_norm": 1.5061614513397217, "learning_rate": 2e-05, "loss": 0.04761346, "step": 17061 }, { "epoch": 34.124, "grad_norm": 4.682290077209473, "learning_rate": 2e-05, "loss": 0.02792961, "step": 17062 }, { "epoch": 34.126, "grad_norm": 2.404047966003418, "learning_rate": 2e-05, "loss": 0.03837546, "step": 17063 }, { "epoch": 34.128, "grad_norm": 1.1497547626495361, "learning_rate": 2e-05, "loss": 0.05234189, "step": 17064 }, { "epoch": 34.13, "grad_norm": 0.9120892286300659, "learning_rate": 2e-05, "loss": 0.02887437, "step": 17065 }, { "epoch": 34.132, "grad_norm": 1.2333347797393799, "learning_rate": 2e-05, "loss": 0.04958087, "step": 17066 }, { "epoch": 34.134, "grad_norm": 1.1911989450454712, "learning_rate": 2e-05, "loss": 0.03579342, "step": 17067 }, { "epoch": 34.136, "grad_norm": 1.0716452598571777, "learning_rate": 2e-05, "loss": 0.04589354, "step": 17068 }, { "epoch": 34.138, "grad_norm": 1.1978440284729004, "learning_rate": 2e-05, "loss": 0.04309279, "step": 17069 }, { "epoch": 34.14, "grad_norm": 1.454819679260254, "learning_rate": 2e-05, "loss": 0.03111471, "step": 17070 }, { "epoch": 34.142, "grad_norm": 1.3682692050933838, "learning_rate": 2e-05, "loss": 0.0418794, "step": 17071 }, { "epoch": 34.144, "grad_norm": 1.9202009439468384, "learning_rate": 2e-05, "loss": 0.04135188, "step": 17072 }, { "epoch": 34.146, "grad_norm": 1.1273765563964844, "learning_rate": 2e-05, "loss": 0.03442348, "step": 17073 }, { "epoch": 34.148, "grad_norm": 1.3619593381881714, "learning_rate": 2e-05, "loss": 0.04825344, "step": 17074 }, { "epoch": 34.15, "grad_norm": 1.4348422288894653, "learning_rate": 2e-05, "loss": 0.05776092, "step": 17075 }, { "epoch": 34.152, "grad_norm": 0.9510125517845154, "learning_rate": 2e-05, "loss": 0.02918218, "step": 17076 }, { "epoch": 34.154, "grad_norm": 1.260632872581482, "learning_rate": 2e-05, "loss": 0.04130819, "step": 17077 }, { "epoch": 34.156, "grad_norm": 1.5947790145874023, "learning_rate": 2e-05, "loss": 0.05725566, "step": 17078 }, { "epoch": 34.158, "grad_norm": 1.1542794704437256, "learning_rate": 2e-05, "loss": 0.04837495, "step": 17079 }, { "epoch": 34.16, "grad_norm": 1.712775707244873, "learning_rate": 2e-05, "loss": 0.07433301, "step": 17080 }, { "epoch": 34.162, "grad_norm": 1.1782052516937256, "learning_rate": 2e-05, "loss": 0.0498495, "step": 17081 }, { "epoch": 34.164, "grad_norm": 1.077620267868042, "learning_rate": 2e-05, "loss": 0.040213, "step": 17082 }, { "epoch": 34.166, "grad_norm": 1.2808398008346558, "learning_rate": 2e-05, "loss": 0.05944657, "step": 17083 }, { "epoch": 34.168, "grad_norm": 1.1864197254180908, "learning_rate": 2e-05, "loss": 0.05283065, "step": 17084 }, { "epoch": 34.17, "grad_norm": 1.0761622190475464, "learning_rate": 2e-05, "loss": 0.03802584, "step": 17085 }, { "epoch": 34.172, "grad_norm": 1.062628149986267, "learning_rate": 2e-05, "loss": 0.04319198, "step": 17086 }, { "epoch": 34.174, "grad_norm": 1.46955144405365, "learning_rate": 2e-05, "loss": 0.04133756, "step": 17087 }, { "epoch": 34.176, "grad_norm": 0.9547522068023682, "learning_rate": 2e-05, "loss": 0.035832, "step": 17088 }, { "epoch": 34.178, "grad_norm": 4.085507869720459, "learning_rate": 2e-05, "loss": 0.06185192, "step": 17089 }, { "epoch": 34.18, "grad_norm": 2.0465495586395264, "learning_rate": 2e-05, "loss": 0.04510322, "step": 17090 }, { "epoch": 34.182, "grad_norm": 1.657041311264038, "learning_rate": 2e-05, "loss": 0.04181718, "step": 17091 }, { "epoch": 34.184, "grad_norm": 1.1236737966537476, "learning_rate": 2e-05, "loss": 0.0362424, "step": 17092 }, { "epoch": 34.186, "grad_norm": 1.528167724609375, "learning_rate": 2e-05, "loss": 0.05361965, "step": 17093 }, { "epoch": 34.188, "grad_norm": 0.9988680481910706, "learning_rate": 2e-05, "loss": 0.03881805, "step": 17094 }, { "epoch": 34.19, "grad_norm": 2.2689661979675293, "learning_rate": 2e-05, "loss": 0.04616371, "step": 17095 }, { "epoch": 34.192, "grad_norm": 1.069471836090088, "learning_rate": 2e-05, "loss": 0.0355745, "step": 17096 }, { "epoch": 34.194, "grad_norm": 1.2013444900512695, "learning_rate": 2e-05, "loss": 0.04738539, "step": 17097 }, { "epoch": 34.196, "grad_norm": 1.4872270822525024, "learning_rate": 2e-05, "loss": 0.04246665, "step": 17098 }, { "epoch": 34.198, "grad_norm": 1.2898576259613037, "learning_rate": 2e-05, "loss": 0.03277687, "step": 17099 }, { "epoch": 34.2, "grad_norm": 0.9258760213851929, "learning_rate": 2e-05, "loss": 0.02966371, "step": 17100 }, { "epoch": 34.202, "grad_norm": 1.5347909927368164, "learning_rate": 2e-05, "loss": 0.03460214, "step": 17101 }, { "epoch": 34.204, "grad_norm": 1.5886799097061157, "learning_rate": 2e-05, "loss": 0.04721684, "step": 17102 }, { "epoch": 34.206, "grad_norm": 0.8684136271476746, "learning_rate": 2e-05, "loss": 0.03214806, "step": 17103 }, { "epoch": 34.208, "grad_norm": 1.1119003295898438, "learning_rate": 2e-05, "loss": 0.04176969, "step": 17104 }, { "epoch": 34.21, "grad_norm": 1.462074637413025, "learning_rate": 2e-05, "loss": 0.04227774, "step": 17105 }, { "epoch": 34.212, "grad_norm": 1.4271862506866455, "learning_rate": 2e-05, "loss": 0.05438799, "step": 17106 }, { "epoch": 34.214, "grad_norm": 1.3217233419418335, "learning_rate": 2e-05, "loss": 0.05445388, "step": 17107 }, { "epoch": 34.216, "grad_norm": 0.9007492661476135, "learning_rate": 2e-05, "loss": 0.02505394, "step": 17108 }, { "epoch": 34.218, "grad_norm": 1.0942732095718384, "learning_rate": 2e-05, "loss": 0.04544266, "step": 17109 }, { "epoch": 34.22, "grad_norm": 1.4460761547088623, "learning_rate": 2e-05, "loss": 0.04229202, "step": 17110 }, { "epoch": 34.222, "grad_norm": 1.192115306854248, "learning_rate": 2e-05, "loss": 0.05149196, "step": 17111 }, { "epoch": 34.224, "grad_norm": 1.0671484470367432, "learning_rate": 2e-05, "loss": 0.03309963, "step": 17112 }, { "epoch": 34.226, "grad_norm": 1.1515556573867798, "learning_rate": 2e-05, "loss": 0.04172114, "step": 17113 }, { "epoch": 34.228, "grad_norm": 1.0468469858169556, "learning_rate": 2e-05, "loss": 0.03374918, "step": 17114 }, { "epoch": 34.23, "grad_norm": 1.1221612691879272, "learning_rate": 2e-05, "loss": 0.05307683, "step": 17115 }, { "epoch": 34.232, "grad_norm": 1.047747015953064, "learning_rate": 2e-05, "loss": 0.03866299, "step": 17116 }, { "epoch": 34.234, "grad_norm": 0.9740225076675415, "learning_rate": 2e-05, "loss": 0.02800692, "step": 17117 }, { "epoch": 34.236, "grad_norm": 1.0027884244918823, "learning_rate": 2e-05, "loss": 0.01938191, "step": 17118 }, { "epoch": 34.238, "grad_norm": 1.1319332122802734, "learning_rate": 2e-05, "loss": 0.03879067, "step": 17119 }, { "epoch": 34.24, "grad_norm": 1.2376909255981445, "learning_rate": 2e-05, "loss": 0.04197739, "step": 17120 }, { "epoch": 34.242, "grad_norm": 1.7739689350128174, "learning_rate": 2e-05, "loss": 0.0463138, "step": 17121 }, { "epoch": 34.244, "grad_norm": 1.1264393329620361, "learning_rate": 2e-05, "loss": 0.03141479, "step": 17122 }, { "epoch": 34.246, "grad_norm": 1.261618733406067, "learning_rate": 2e-05, "loss": 0.03376903, "step": 17123 }, { "epoch": 34.248, "grad_norm": 1.123366355895996, "learning_rate": 2e-05, "loss": 0.04086906, "step": 17124 }, { "epoch": 34.25, "grad_norm": 2.098238229751587, "learning_rate": 2e-05, "loss": 0.03711101, "step": 17125 }, { "epoch": 34.252, "grad_norm": 1.1595600843429565, "learning_rate": 2e-05, "loss": 0.04579591, "step": 17126 }, { "epoch": 34.254, "grad_norm": 1.2296733856201172, "learning_rate": 2e-05, "loss": 0.05098995, "step": 17127 }, { "epoch": 34.256, "grad_norm": 1.07576322555542, "learning_rate": 2e-05, "loss": 0.04090093, "step": 17128 }, { "epoch": 34.258, "grad_norm": 1.4022080898284912, "learning_rate": 2e-05, "loss": 0.05891519, "step": 17129 }, { "epoch": 34.26, "grad_norm": 1.2884039878845215, "learning_rate": 2e-05, "loss": 0.06018764, "step": 17130 }, { "epoch": 34.262, "grad_norm": 1.513996958732605, "learning_rate": 2e-05, "loss": 0.05563644, "step": 17131 }, { "epoch": 34.264, "grad_norm": 1.0170012712478638, "learning_rate": 2e-05, "loss": 0.0308474, "step": 17132 }, { "epoch": 34.266, "grad_norm": 1.7678961753845215, "learning_rate": 2e-05, "loss": 0.03012414, "step": 17133 }, { "epoch": 34.268, "grad_norm": 1.930487036705017, "learning_rate": 2e-05, "loss": 0.03921436, "step": 17134 }, { "epoch": 34.27, "grad_norm": 1.1381620168685913, "learning_rate": 2e-05, "loss": 0.04433856, "step": 17135 }, { "epoch": 34.272, "grad_norm": 1.3920421600341797, "learning_rate": 2e-05, "loss": 0.04576306, "step": 17136 }, { "epoch": 34.274, "grad_norm": 1.132322907447815, "learning_rate": 2e-05, "loss": 0.04681329, "step": 17137 }, { "epoch": 34.276, "grad_norm": 1.0427021980285645, "learning_rate": 2e-05, "loss": 0.03912016, "step": 17138 }, { "epoch": 34.278, "grad_norm": 1.3596690893173218, "learning_rate": 2e-05, "loss": 0.0395805, "step": 17139 }, { "epoch": 34.28, "grad_norm": 1.1399190425872803, "learning_rate": 2e-05, "loss": 0.03435064, "step": 17140 }, { "epoch": 34.282, "grad_norm": 1.2819249629974365, "learning_rate": 2e-05, "loss": 0.0416284, "step": 17141 }, { "epoch": 34.284, "grad_norm": 1.2883572578430176, "learning_rate": 2e-05, "loss": 0.02817941, "step": 17142 }, { "epoch": 34.286, "grad_norm": 1.449006199836731, "learning_rate": 2e-05, "loss": 0.05150205, "step": 17143 }, { "epoch": 34.288, "grad_norm": 1.3080250024795532, "learning_rate": 2e-05, "loss": 0.03051752, "step": 17144 }, { "epoch": 34.29, "grad_norm": 1.1870465278625488, "learning_rate": 2e-05, "loss": 0.04249543, "step": 17145 }, { "epoch": 34.292, "grad_norm": 1.2269293069839478, "learning_rate": 2e-05, "loss": 0.0272982, "step": 17146 }, { "epoch": 34.294, "grad_norm": 1.2062244415283203, "learning_rate": 2e-05, "loss": 0.04380896, "step": 17147 }, { "epoch": 34.296, "grad_norm": 1.1690454483032227, "learning_rate": 2e-05, "loss": 0.04224586, "step": 17148 }, { "epoch": 34.298, "grad_norm": 1.076251745223999, "learning_rate": 2e-05, "loss": 0.04731111, "step": 17149 }, { "epoch": 34.3, "grad_norm": 0.9125476479530334, "learning_rate": 2e-05, "loss": 0.03293589, "step": 17150 }, { "epoch": 34.302, "grad_norm": 1.1172863245010376, "learning_rate": 2e-05, "loss": 0.04434405, "step": 17151 }, { "epoch": 34.304, "grad_norm": 1.4537452459335327, "learning_rate": 2e-05, "loss": 0.05699692, "step": 17152 }, { "epoch": 34.306, "grad_norm": 1.2859165668487549, "learning_rate": 2e-05, "loss": 0.05222432, "step": 17153 }, { "epoch": 34.308, "grad_norm": 1.1391727924346924, "learning_rate": 2e-05, "loss": 0.04175641, "step": 17154 }, { "epoch": 34.31, "grad_norm": 1.3534083366394043, "learning_rate": 2e-05, "loss": 0.04102936, "step": 17155 }, { "epoch": 34.312, "grad_norm": 2.2053699493408203, "learning_rate": 2e-05, "loss": 0.04537173, "step": 17156 }, { "epoch": 34.314, "grad_norm": 1.1330358982086182, "learning_rate": 2e-05, "loss": 0.05229243, "step": 17157 }, { "epoch": 34.316, "grad_norm": 1.8947052955627441, "learning_rate": 2e-05, "loss": 0.02641325, "step": 17158 }, { "epoch": 34.318, "grad_norm": 1.3450164794921875, "learning_rate": 2e-05, "loss": 0.04416771, "step": 17159 }, { "epoch": 34.32, "grad_norm": 2.277355909347534, "learning_rate": 2e-05, "loss": 0.03164542, "step": 17160 }, { "epoch": 34.322, "grad_norm": 1.2452149391174316, "learning_rate": 2e-05, "loss": 0.03742987, "step": 17161 }, { "epoch": 34.324, "grad_norm": 1.3950529098510742, "learning_rate": 2e-05, "loss": 0.04363611, "step": 17162 }, { "epoch": 34.326, "grad_norm": 1.2250491380691528, "learning_rate": 2e-05, "loss": 0.04496596, "step": 17163 }, { "epoch": 34.328, "grad_norm": 1.1004265546798706, "learning_rate": 2e-05, "loss": 0.04556531, "step": 17164 }, { "epoch": 34.33, "grad_norm": 1.185380458831787, "learning_rate": 2e-05, "loss": 0.0397131, "step": 17165 }, { "epoch": 34.332, "grad_norm": 1.1757383346557617, "learning_rate": 2e-05, "loss": 0.04187442, "step": 17166 }, { "epoch": 34.334, "grad_norm": 1.0646100044250488, "learning_rate": 2e-05, "loss": 0.04621556, "step": 17167 }, { "epoch": 34.336, "grad_norm": 1.1299614906311035, "learning_rate": 2e-05, "loss": 0.05131941, "step": 17168 }, { "epoch": 34.338, "grad_norm": 1.2977094650268555, "learning_rate": 2e-05, "loss": 0.04573498, "step": 17169 }, { "epoch": 34.34, "grad_norm": 0.9921538829803467, "learning_rate": 2e-05, "loss": 0.03972767, "step": 17170 }, { "epoch": 34.342, "grad_norm": 1.0485543012619019, "learning_rate": 2e-05, "loss": 0.03102573, "step": 17171 }, { "epoch": 34.344, "grad_norm": 3.1085667610168457, "learning_rate": 2e-05, "loss": 0.05401374, "step": 17172 }, { "epoch": 34.346, "grad_norm": 1.159866213798523, "learning_rate": 2e-05, "loss": 0.03029288, "step": 17173 }, { "epoch": 34.348, "grad_norm": 1.4898662567138672, "learning_rate": 2e-05, "loss": 0.037357, "step": 17174 }, { "epoch": 34.35, "grad_norm": 1.475274920463562, "learning_rate": 2e-05, "loss": 0.06047164, "step": 17175 }, { "epoch": 34.352, "grad_norm": 1.2670931816101074, "learning_rate": 2e-05, "loss": 0.04098585, "step": 17176 }, { "epoch": 34.354, "grad_norm": 1.1112204790115356, "learning_rate": 2e-05, "loss": 0.04078077, "step": 17177 }, { "epoch": 34.356, "grad_norm": 1.1330214738845825, "learning_rate": 2e-05, "loss": 0.04091879, "step": 17178 }, { "epoch": 34.358, "grad_norm": 1.0863505601882935, "learning_rate": 2e-05, "loss": 0.04044025, "step": 17179 }, { "epoch": 34.36, "grad_norm": 1.4287241697311401, "learning_rate": 2e-05, "loss": 0.05269104, "step": 17180 }, { "epoch": 34.362, "grad_norm": 1.0420600175857544, "learning_rate": 2e-05, "loss": 0.03304556, "step": 17181 }, { "epoch": 34.364, "grad_norm": 1.1292872428894043, "learning_rate": 2e-05, "loss": 0.04075188, "step": 17182 }, { "epoch": 34.366, "grad_norm": 1.257199764251709, "learning_rate": 2e-05, "loss": 0.06179626, "step": 17183 }, { "epoch": 34.368, "grad_norm": 1.5898911952972412, "learning_rate": 2e-05, "loss": 0.06544568, "step": 17184 }, { "epoch": 34.37, "grad_norm": 1.1866689920425415, "learning_rate": 2e-05, "loss": 0.03154187, "step": 17185 }, { "epoch": 34.372, "grad_norm": 1.1588656902313232, "learning_rate": 2e-05, "loss": 0.04455046, "step": 17186 }, { "epoch": 34.374, "grad_norm": 1.8778082132339478, "learning_rate": 2e-05, "loss": 0.05956361, "step": 17187 }, { "epoch": 34.376, "grad_norm": 1.0748698711395264, "learning_rate": 2e-05, "loss": 0.02959104, "step": 17188 }, { "epoch": 34.378, "grad_norm": 1.2204198837280273, "learning_rate": 2e-05, "loss": 0.0491489, "step": 17189 }, { "epoch": 34.38, "grad_norm": 1.1138110160827637, "learning_rate": 2e-05, "loss": 0.04551839, "step": 17190 }, { "epoch": 34.382, "grad_norm": 1.057921051979065, "learning_rate": 2e-05, "loss": 0.04800026, "step": 17191 }, { "epoch": 34.384, "grad_norm": 0.97761470079422, "learning_rate": 2e-05, "loss": 0.03815104, "step": 17192 }, { "epoch": 34.386, "grad_norm": 1.2259156703948975, "learning_rate": 2e-05, "loss": 0.03457344, "step": 17193 }, { "epoch": 34.388, "grad_norm": 1.6335809230804443, "learning_rate": 2e-05, "loss": 0.05803145, "step": 17194 }, { "epoch": 34.39, "grad_norm": 1.134580135345459, "learning_rate": 2e-05, "loss": 0.03817577, "step": 17195 }, { "epoch": 34.392, "grad_norm": 1.2042521238327026, "learning_rate": 2e-05, "loss": 0.04776943, "step": 17196 }, { "epoch": 34.394, "grad_norm": 0.9557961225509644, "learning_rate": 2e-05, "loss": 0.02998234, "step": 17197 }, { "epoch": 34.396, "grad_norm": 1.669502854347229, "learning_rate": 2e-05, "loss": 0.05116883, "step": 17198 }, { "epoch": 34.398, "grad_norm": 1.938193917274475, "learning_rate": 2e-05, "loss": 0.04275124, "step": 17199 }, { "epoch": 34.4, "grad_norm": 0.95672208070755, "learning_rate": 2e-05, "loss": 0.02722719, "step": 17200 }, { "epoch": 34.402, "grad_norm": 1.6504557132720947, "learning_rate": 2e-05, "loss": 0.04932757, "step": 17201 }, { "epoch": 34.404, "grad_norm": 1.3142409324645996, "learning_rate": 2e-05, "loss": 0.05078411, "step": 17202 }, { "epoch": 34.406, "grad_norm": 1.8775172233581543, "learning_rate": 2e-05, "loss": 0.0415501, "step": 17203 }, { "epoch": 34.408, "grad_norm": 1.108086347579956, "learning_rate": 2e-05, "loss": 0.03562623, "step": 17204 }, { "epoch": 34.41, "grad_norm": 1.1936184167861938, "learning_rate": 2e-05, "loss": 0.06731561, "step": 17205 }, { "epoch": 34.412, "grad_norm": 1.4935089349746704, "learning_rate": 2e-05, "loss": 0.04526928, "step": 17206 }, { "epoch": 34.414, "grad_norm": 0.9472814798355103, "learning_rate": 2e-05, "loss": 0.03531752, "step": 17207 }, { "epoch": 34.416, "grad_norm": 1.1096245050430298, "learning_rate": 2e-05, "loss": 0.03912827, "step": 17208 }, { "epoch": 34.418, "grad_norm": 1.2029078006744385, "learning_rate": 2e-05, "loss": 0.05240467, "step": 17209 }, { "epoch": 34.42, "grad_norm": 1.08064603805542, "learning_rate": 2e-05, "loss": 0.04623947, "step": 17210 }, { "epoch": 34.422, "grad_norm": 1.3892536163330078, "learning_rate": 2e-05, "loss": 0.04560886, "step": 17211 }, { "epoch": 34.424, "grad_norm": 1.2040435075759888, "learning_rate": 2e-05, "loss": 0.03983107, "step": 17212 }, { "epoch": 34.426, "grad_norm": 1.0492370128631592, "learning_rate": 2e-05, "loss": 0.03636064, "step": 17213 }, { "epoch": 34.428, "grad_norm": 1.1657490730285645, "learning_rate": 2e-05, "loss": 0.04331504, "step": 17214 }, { "epoch": 34.43, "grad_norm": 1.2659703493118286, "learning_rate": 2e-05, "loss": 0.05003392, "step": 17215 }, { "epoch": 34.432, "grad_norm": 1.3494330644607544, "learning_rate": 2e-05, "loss": 0.02932413, "step": 17216 }, { "epoch": 34.434, "grad_norm": 1.175642490386963, "learning_rate": 2e-05, "loss": 0.04383037, "step": 17217 }, { "epoch": 34.436, "grad_norm": 1.0933257341384888, "learning_rate": 2e-05, "loss": 0.03921406, "step": 17218 }, { "epoch": 34.438, "grad_norm": 1.1617385149002075, "learning_rate": 2e-05, "loss": 0.04727744, "step": 17219 }, { "epoch": 34.44, "grad_norm": 1.1515668630599976, "learning_rate": 2e-05, "loss": 0.03612133, "step": 17220 }, { "epoch": 34.442, "grad_norm": 1.1153593063354492, "learning_rate": 2e-05, "loss": 0.03581006, "step": 17221 }, { "epoch": 34.444, "grad_norm": 1.1779838800430298, "learning_rate": 2e-05, "loss": 0.03837469, "step": 17222 }, { "epoch": 34.446, "grad_norm": 1.257264256477356, "learning_rate": 2e-05, "loss": 0.04428399, "step": 17223 }, { "epoch": 34.448, "grad_norm": 1.1216603517532349, "learning_rate": 2e-05, "loss": 0.03815703, "step": 17224 }, { "epoch": 34.45, "grad_norm": 1.513350248336792, "learning_rate": 2e-05, "loss": 0.05262036, "step": 17225 }, { "epoch": 34.452, "grad_norm": 1.668152093887329, "learning_rate": 2e-05, "loss": 0.04921922, "step": 17226 }, { "epoch": 34.454, "grad_norm": 1.259291648864746, "learning_rate": 2e-05, "loss": 0.0468533, "step": 17227 }, { "epoch": 34.456, "grad_norm": 2.0547330379486084, "learning_rate": 2e-05, "loss": 0.04505831, "step": 17228 }, { "epoch": 34.458, "grad_norm": 1.0175834894180298, "learning_rate": 2e-05, "loss": 0.03889389, "step": 17229 }, { "epoch": 34.46, "grad_norm": 1.205269694328308, "learning_rate": 2e-05, "loss": 0.05405382, "step": 17230 }, { "epoch": 34.462, "grad_norm": 1.1954044103622437, "learning_rate": 2e-05, "loss": 0.06115945, "step": 17231 }, { "epoch": 34.464, "grad_norm": 1.5559508800506592, "learning_rate": 2e-05, "loss": 0.04823997, "step": 17232 }, { "epoch": 34.466, "grad_norm": 1.123988151550293, "learning_rate": 2e-05, "loss": 0.04282939, "step": 17233 }, { "epoch": 34.468, "grad_norm": 1.5351160764694214, "learning_rate": 2e-05, "loss": 0.04434042, "step": 17234 }, { "epoch": 34.47, "grad_norm": 1.1878026723861694, "learning_rate": 2e-05, "loss": 0.04576051, "step": 17235 }, { "epoch": 34.472, "grad_norm": 1.456591248512268, "learning_rate": 2e-05, "loss": 0.04699751, "step": 17236 }, { "epoch": 34.474, "grad_norm": 1.0269306898117065, "learning_rate": 2e-05, "loss": 0.04063164, "step": 17237 }, { "epoch": 34.476, "grad_norm": 1.0819087028503418, "learning_rate": 2e-05, "loss": 0.04128548, "step": 17238 }, { "epoch": 34.478, "grad_norm": 1.8286333084106445, "learning_rate": 2e-05, "loss": 0.04910714, "step": 17239 }, { "epoch": 34.48, "grad_norm": 3.0177881717681885, "learning_rate": 2e-05, "loss": 0.05242274, "step": 17240 }, { "epoch": 34.482, "grad_norm": 1.391753911972046, "learning_rate": 2e-05, "loss": 0.04662409, "step": 17241 }, { "epoch": 34.484, "grad_norm": 1.5044915676116943, "learning_rate": 2e-05, "loss": 0.03636048, "step": 17242 }, { "epoch": 34.486, "grad_norm": 1.38352370262146, "learning_rate": 2e-05, "loss": 0.04130649, "step": 17243 }, { "epoch": 34.488, "grad_norm": 1.027741551399231, "learning_rate": 2e-05, "loss": 0.04034343, "step": 17244 }, { "epoch": 34.49, "grad_norm": 1.7311643362045288, "learning_rate": 2e-05, "loss": 0.03252632, "step": 17245 }, { "epoch": 34.492, "grad_norm": 0.9958137273788452, "learning_rate": 2e-05, "loss": 0.03432687, "step": 17246 }, { "epoch": 34.494, "grad_norm": 0.9350914359092712, "learning_rate": 2e-05, "loss": 0.04081878, "step": 17247 }, { "epoch": 34.496, "grad_norm": 1.0058718919754028, "learning_rate": 2e-05, "loss": 0.03128472, "step": 17248 }, { "epoch": 34.498, "grad_norm": 1.1736693382263184, "learning_rate": 2e-05, "loss": 0.0507132, "step": 17249 }, { "epoch": 34.5, "grad_norm": 2.9509620666503906, "learning_rate": 2e-05, "loss": 0.04513638, "step": 17250 }, { "epoch": 34.502, "grad_norm": 1.1163856983184814, "learning_rate": 2e-05, "loss": 0.02961075, "step": 17251 }, { "epoch": 34.504, "grad_norm": 1.2255582809448242, "learning_rate": 2e-05, "loss": 0.05628613, "step": 17252 }, { "epoch": 34.506, "grad_norm": 1.1547157764434814, "learning_rate": 2e-05, "loss": 0.04185841, "step": 17253 }, { "epoch": 34.508, "grad_norm": 1.0533969402313232, "learning_rate": 2e-05, "loss": 0.02807963, "step": 17254 }, { "epoch": 34.51, "grad_norm": 1.1036581993103027, "learning_rate": 2e-05, "loss": 0.04609573, "step": 17255 }, { "epoch": 34.512, "grad_norm": 1.2565536499023438, "learning_rate": 2e-05, "loss": 0.0547157, "step": 17256 }, { "epoch": 34.514, "grad_norm": 1.1017217636108398, "learning_rate": 2e-05, "loss": 0.05001441, "step": 17257 }, { "epoch": 34.516, "grad_norm": 1.0183624029159546, "learning_rate": 2e-05, "loss": 0.03306765, "step": 17258 }, { "epoch": 34.518, "grad_norm": 1.5634461641311646, "learning_rate": 2e-05, "loss": 0.05358353, "step": 17259 }, { "epoch": 34.52, "grad_norm": 0.8560883402824402, "learning_rate": 2e-05, "loss": 0.02674874, "step": 17260 }, { "epoch": 34.522, "grad_norm": 1.7791016101837158, "learning_rate": 2e-05, "loss": 0.05959473, "step": 17261 }, { "epoch": 34.524, "grad_norm": 1.059211015701294, "learning_rate": 2e-05, "loss": 0.04623821, "step": 17262 }, { "epoch": 34.526, "grad_norm": 2.1442856788635254, "learning_rate": 2e-05, "loss": 0.03975558, "step": 17263 }, { "epoch": 34.528, "grad_norm": 1.1825506687164307, "learning_rate": 2e-05, "loss": 0.04681484, "step": 17264 }, { "epoch": 34.53, "grad_norm": 1.1355949640274048, "learning_rate": 2e-05, "loss": 0.04300448, "step": 17265 }, { "epoch": 34.532, "grad_norm": 1.151884913444519, "learning_rate": 2e-05, "loss": 0.05281693, "step": 17266 }, { "epoch": 34.534, "grad_norm": 1.244160532951355, "learning_rate": 2e-05, "loss": 0.05270495, "step": 17267 }, { "epoch": 34.536, "grad_norm": 1.332995057106018, "learning_rate": 2e-05, "loss": 0.04768827, "step": 17268 }, { "epoch": 34.538, "grad_norm": 1.1446064710617065, "learning_rate": 2e-05, "loss": 0.03133503, "step": 17269 }, { "epoch": 34.54, "grad_norm": 1.1863194704055786, "learning_rate": 2e-05, "loss": 0.05849977, "step": 17270 }, { "epoch": 34.542, "grad_norm": 1.2308465242385864, "learning_rate": 2e-05, "loss": 0.06080481, "step": 17271 }, { "epoch": 34.544, "grad_norm": 2.3571889400482178, "learning_rate": 2e-05, "loss": 0.05764741, "step": 17272 }, { "epoch": 34.546, "grad_norm": 0.713600218296051, "learning_rate": 2e-05, "loss": 0.0190373, "step": 17273 }, { "epoch": 34.548, "grad_norm": 3.5225939750671387, "learning_rate": 2e-05, "loss": 0.05001704, "step": 17274 }, { "epoch": 34.55, "grad_norm": 2.5222723484039307, "learning_rate": 2e-05, "loss": 0.03726556, "step": 17275 }, { "epoch": 34.552, "grad_norm": 1.0584102869033813, "learning_rate": 2e-05, "loss": 0.04141295, "step": 17276 }, { "epoch": 34.554, "grad_norm": 1.0614216327667236, "learning_rate": 2e-05, "loss": 0.04041729, "step": 17277 }, { "epoch": 34.556, "grad_norm": 0.8729689121246338, "learning_rate": 2e-05, "loss": 0.03110387, "step": 17278 }, { "epoch": 34.558, "grad_norm": 1.013295292854309, "learning_rate": 2e-05, "loss": 0.03849518, "step": 17279 }, { "epoch": 34.56, "grad_norm": 1.2050495147705078, "learning_rate": 2e-05, "loss": 0.04337199, "step": 17280 }, { "epoch": 34.562, "grad_norm": 1.1820077896118164, "learning_rate": 2e-05, "loss": 0.04693332, "step": 17281 }, { "epoch": 34.564, "grad_norm": 1.015336036682129, "learning_rate": 2e-05, "loss": 0.0314662, "step": 17282 }, { "epoch": 34.566, "grad_norm": 1.1241815090179443, "learning_rate": 2e-05, "loss": 0.04584487, "step": 17283 }, { "epoch": 34.568, "grad_norm": 1.4243446588516235, "learning_rate": 2e-05, "loss": 0.04570295, "step": 17284 }, { "epoch": 34.57, "grad_norm": 2.7489190101623535, "learning_rate": 2e-05, "loss": 0.04503506, "step": 17285 }, { "epoch": 34.572, "grad_norm": 1.1095118522644043, "learning_rate": 2e-05, "loss": 0.04129759, "step": 17286 }, { "epoch": 34.574, "grad_norm": 1.1222656965255737, "learning_rate": 2e-05, "loss": 0.04352672, "step": 17287 }, { "epoch": 34.576, "grad_norm": 1.050182580947876, "learning_rate": 2e-05, "loss": 0.04432563, "step": 17288 }, { "epoch": 34.578, "grad_norm": 1.1200228929519653, "learning_rate": 2e-05, "loss": 0.03937971, "step": 17289 }, { "epoch": 34.58, "grad_norm": 1.0767937898635864, "learning_rate": 2e-05, "loss": 0.04535668, "step": 17290 }, { "epoch": 34.582, "grad_norm": 1.556679129600525, "learning_rate": 2e-05, "loss": 0.05495925, "step": 17291 }, { "epoch": 34.584, "grad_norm": 0.9800344109535217, "learning_rate": 2e-05, "loss": 0.03302306, "step": 17292 }, { "epoch": 34.586, "grad_norm": 1.1676750183105469, "learning_rate": 2e-05, "loss": 0.04421997, "step": 17293 }, { "epoch": 34.588, "grad_norm": 1.9742448329925537, "learning_rate": 2e-05, "loss": 0.05324573, "step": 17294 }, { "epoch": 34.59, "grad_norm": 1.217177152633667, "learning_rate": 2e-05, "loss": 0.03502582, "step": 17295 }, { "epoch": 34.592, "grad_norm": 1.535559058189392, "learning_rate": 2e-05, "loss": 0.05487555, "step": 17296 }, { "epoch": 34.594, "grad_norm": 1.073294758796692, "learning_rate": 2e-05, "loss": 0.04047548, "step": 17297 }, { "epoch": 34.596, "grad_norm": 1.468457579612732, "learning_rate": 2e-05, "loss": 0.03949284, "step": 17298 }, { "epoch": 34.598, "grad_norm": 1.1987510919570923, "learning_rate": 2e-05, "loss": 0.038359, "step": 17299 }, { "epoch": 34.6, "grad_norm": 1.8533692359924316, "learning_rate": 2e-05, "loss": 0.04139744, "step": 17300 }, { "epoch": 34.602, "grad_norm": 0.9457276463508606, "learning_rate": 2e-05, "loss": 0.03080427, "step": 17301 }, { "epoch": 34.604, "grad_norm": 1.3518832921981812, "learning_rate": 2e-05, "loss": 0.04552808, "step": 17302 }, { "epoch": 34.606, "grad_norm": 1.7731199264526367, "learning_rate": 2e-05, "loss": 0.03314227, "step": 17303 }, { "epoch": 34.608, "grad_norm": 1.6330634355545044, "learning_rate": 2e-05, "loss": 0.04139177, "step": 17304 }, { "epoch": 34.61, "grad_norm": 1.111138939857483, "learning_rate": 2e-05, "loss": 0.03105775, "step": 17305 }, { "epoch": 34.612, "grad_norm": 1.2260620594024658, "learning_rate": 2e-05, "loss": 0.0478155, "step": 17306 }, { "epoch": 34.614, "grad_norm": 1.1791749000549316, "learning_rate": 2e-05, "loss": 0.04237508, "step": 17307 }, { "epoch": 34.616, "grad_norm": 0.9272366166114807, "learning_rate": 2e-05, "loss": 0.03472099, "step": 17308 }, { "epoch": 34.618, "grad_norm": 1.0389100313186646, "learning_rate": 2e-05, "loss": 0.03994408, "step": 17309 }, { "epoch": 34.62, "grad_norm": 1.1838860511779785, "learning_rate": 2e-05, "loss": 0.03435428, "step": 17310 }, { "epoch": 34.622, "grad_norm": 1.240273356437683, "learning_rate": 2e-05, "loss": 0.04923018, "step": 17311 }, { "epoch": 34.624, "grad_norm": 1.0485180616378784, "learning_rate": 2e-05, "loss": 0.02882761, "step": 17312 }, { "epoch": 34.626, "grad_norm": 1.092466950416565, "learning_rate": 2e-05, "loss": 0.02998949, "step": 17313 }, { "epoch": 34.628, "grad_norm": 2.4525039196014404, "learning_rate": 2e-05, "loss": 0.05779274, "step": 17314 }, { "epoch": 34.63, "grad_norm": 1.1219778060913086, "learning_rate": 2e-05, "loss": 0.03857387, "step": 17315 }, { "epoch": 34.632, "grad_norm": 1.2241005897521973, "learning_rate": 2e-05, "loss": 0.04050406, "step": 17316 }, { "epoch": 34.634, "grad_norm": 1.1506431102752686, "learning_rate": 2e-05, "loss": 0.04586707, "step": 17317 }, { "epoch": 34.636, "grad_norm": 1.113982915878296, "learning_rate": 2e-05, "loss": 0.04473947, "step": 17318 }, { "epoch": 34.638, "grad_norm": 1.2651467323303223, "learning_rate": 2e-05, "loss": 0.04464104, "step": 17319 }, { "epoch": 34.64, "grad_norm": 1.0130887031555176, "learning_rate": 2e-05, "loss": 0.03748289, "step": 17320 }, { "epoch": 34.642, "grad_norm": 1.977115273475647, "learning_rate": 2e-05, "loss": 0.0529108, "step": 17321 }, { "epoch": 34.644, "grad_norm": 1.0311118364334106, "learning_rate": 2e-05, "loss": 0.05024704, "step": 17322 }, { "epoch": 34.646, "grad_norm": 1.7532882690429688, "learning_rate": 2e-05, "loss": 0.05759249, "step": 17323 }, { "epoch": 34.648, "grad_norm": 2.038602828979492, "learning_rate": 2e-05, "loss": 0.03318835, "step": 17324 }, { "epoch": 34.65, "grad_norm": 1.232222557067871, "learning_rate": 2e-05, "loss": 0.04337482, "step": 17325 }, { "epoch": 34.652, "grad_norm": 1.3680845499038696, "learning_rate": 2e-05, "loss": 0.02841615, "step": 17326 }, { "epoch": 34.654, "grad_norm": 1.8066096305847168, "learning_rate": 2e-05, "loss": 0.03865731, "step": 17327 }, { "epoch": 34.656, "grad_norm": 1.0195194482803345, "learning_rate": 2e-05, "loss": 0.03887212, "step": 17328 }, { "epoch": 34.658, "grad_norm": 1.2496850490570068, "learning_rate": 2e-05, "loss": 0.04003082, "step": 17329 }, { "epoch": 34.66, "grad_norm": 1.1898289918899536, "learning_rate": 2e-05, "loss": 0.03792207, "step": 17330 }, { "epoch": 34.662, "grad_norm": 2.7598717212677, "learning_rate": 2e-05, "loss": 0.04439123, "step": 17331 }, { "epoch": 34.664, "grad_norm": 1.5496615171432495, "learning_rate": 2e-05, "loss": 0.05002048, "step": 17332 }, { "epoch": 34.666, "grad_norm": 1.36111581325531, "learning_rate": 2e-05, "loss": 0.03431807, "step": 17333 }, { "epoch": 34.668, "grad_norm": 1.1194837093353271, "learning_rate": 2e-05, "loss": 0.04525601, "step": 17334 }, { "epoch": 34.67, "grad_norm": 2.9787824153900146, "learning_rate": 2e-05, "loss": 0.05164247, "step": 17335 }, { "epoch": 34.672, "grad_norm": 1.0257699489593506, "learning_rate": 2e-05, "loss": 0.03524974, "step": 17336 }, { "epoch": 34.674, "grad_norm": 1.438684105873108, "learning_rate": 2e-05, "loss": 0.03349183, "step": 17337 }, { "epoch": 34.676, "grad_norm": 1.544499397277832, "learning_rate": 2e-05, "loss": 0.04704629, "step": 17338 }, { "epoch": 34.678, "grad_norm": 1.3472787141799927, "learning_rate": 2e-05, "loss": 0.04309006, "step": 17339 }, { "epoch": 34.68, "grad_norm": 1.8793655633926392, "learning_rate": 2e-05, "loss": 0.05061891, "step": 17340 }, { "epoch": 34.682, "grad_norm": 1.2611889839172363, "learning_rate": 2e-05, "loss": 0.05567399, "step": 17341 }, { "epoch": 34.684, "grad_norm": 1.5432864427566528, "learning_rate": 2e-05, "loss": 0.03108479, "step": 17342 }, { "epoch": 34.686, "grad_norm": 1.2350441217422485, "learning_rate": 2e-05, "loss": 0.03987078, "step": 17343 }, { "epoch": 34.688, "grad_norm": 1.0708898305892944, "learning_rate": 2e-05, "loss": 0.03587945, "step": 17344 }, { "epoch": 34.69, "grad_norm": 1.1190636157989502, "learning_rate": 2e-05, "loss": 0.04188859, "step": 17345 }, { "epoch": 34.692, "grad_norm": 1.3909366130828857, "learning_rate": 2e-05, "loss": 0.0505821, "step": 17346 }, { "epoch": 34.694, "grad_norm": 1.1136740446090698, "learning_rate": 2e-05, "loss": 0.04561438, "step": 17347 }, { "epoch": 34.696, "grad_norm": 2.012559413909912, "learning_rate": 2e-05, "loss": 0.04886289, "step": 17348 }, { "epoch": 34.698, "grad_norm": 1.2638075351715088, "learning_rate": 2e-05, "loss": 0.04957847, "step": 17349 }, { "epoch": 34.7, "grad_norm": 1.1630687713623047, "learning_rate": 2e-05, "loss": 0.04921006, "step": 17350 }, { "epoch": 34.702, "grad_norm": 1.2960374355316162, "learning_rate": 2e-05, "loss": 0.03544647, "step": 17351 }, { "epoch": 34.704, "grad_norm": 1.0430259704589844, "learning_rate": 2e-05, "loss": 0.04330702, "step": 17352 }, { "epoch": 34.706, "grad_norm": 1.0777888298034668, "learning_rate": 2e-05, "loss": 0.03540637, "step": 17353 }, { "epoch": 34.708, "grad_norm": 1.176332712173462, "learning_rate": 2e-05, "loss": 0.03709304, "step": 17354 }, { "epoch": 34.71, "grad_norm": 1.5132179260253906, "learning_rate": 2e-05, "loss": 0.05928625, "step": 17355 }, { "epoch": 34.712, "grad_norm": 1.0462723970413208, "learning_rate": 2e-05, "loss": 0.04137703, "step": 17356 }, { "epoch": 34.714, "grad_norm": 1.0485808849334717, "learning_rate": 2e-05, "loss": 0.04173344, "step": 17357 }, { "epoch": 34.716, "grad_norm": 0.9149652719497681, "learning_rate": 2e-05, "loss": 0.03566572, "step": 17358 }, { "epoch": 34.718, "grad_norm": 2.1620941162109375, "learning_rate": 2e-05, "loss": 0.05247457, "step": 17359 }, { "epoch": 34.72, "grad_norm": 1.1164774894714355, "learning_rate": 2e-05, "loss": 0.04975937, "step": 17360 }, { "epoch": 34.722, "grad_norm": 1.0158329010009766, "learning_rate": 2e-05, "loss": 0.04216751, "step": 17361 }, { "epoch": 34.724, "grad_norm": 1.7261289358139038, "learning_rate": 2e-05, "loss": 0.06181233, "step": 17362 }, { "epoch": 34.726, "grad_norm": 0.7953335046768188, "learning_rate": 2e-05, "loss": 0.02627176, "step": 17363 }, { "epoch": 34.728, "grad_norm": 1.0371417999267578, "learning_rate": 2e-05, "loss": 0.03395245, "step": 17364 }, { "epoch": 34.73, "grad_norm": 1.1978789567947388, "learning_rate": 2e-05, "loss": 0.03813355, "step": 17365 }, { "epoch": 34.732, "grad_norm": 1.0728747844696045, "learning_rate": 2e-05, "loss": 0.0317129, "step": 17366 }, { "epoch": 34.734, "grad_norm": 1.3377137184143066, "learning_rate": 2e-05, "loss": 0.04749308, "step": 17367 }, { "epoch": 34.736, "grad_norm": 1.5253896713256836, "learning_rate": 2e-05, "loss": 0.04517586, "step": 17368 }, { "epoch": 34.738, "grad_norm": 1.125253677368164, "learning_rate": 2e-05, "loss": 0.04429846, "step": 17369 }, { "epoch": 34.74, "grad_norm": 1.279826283454895, "learning_rate": 2e-05, "loss": 0.04692722, "step": 17370 }, { "epoch": 34.742, "grad_norm": 1.15207040309906, "learning_rate": 2e-05, "loss": 0.03784764, "step": 17371 }, { "epoch": 34.744, "grad_norm": 1.4558398723602295, "learning_rate": 2e-05, "loss": 0.05303413, "step": 17372 }, { "epoch": 34.746, "grad_norm": 2.0725667476654053, "learning_rate": 2e-05, "loss": 0.03780486, "step": 17373 }, { "epoch": 34.748, "grad_norm": 1.101965069770813, "learning_rate": 2e-05, "loss": 0.0325265, "step": 17374 }, { "epoch": 34.75, "grad_norm": 1.2319386005401611, "learning_rate": 2e-05, "loss": 0.05028831, "step": 17375 }, { "epoch": 34.752, "grad_norm": 0.9592969417572021, "learning_rate": 2e-05, "loss": 0.03598889, "step": 17376 }, { "epoch": 34.754, "grad_norm": 1.2435072660446167, "learning_rate": 2e-05, "loss": 0.04904407, "step": 17377 }, { "epoch": 34.756, "grad_norm": 1.0948567390441895, "learning_rate": 2e-05, "loss": 0.03987746, "step": 17378 }, { "epoch": 34.758, "grad_norm": 1.072523593902588, "learning_rate": 2e-05, "loss": 0.04469089, "step": 17379 }, { "epoch": 34.76, "grad_norm": 4.93184757232666, "learning_rate": 2e-05, "loss": 0.05690203, "step": 17380 }, { "epoch": 34.762, "grad_norm": 1.764129638671875, "learning_rate": 2e-05, "loss": 0.06511163, "step": 17381 }, { "epoch": 34.764, "grad_norm": 4.052314758300781, "learning_rate": 2e-05, "loss": 0.05666729, "step": 17382 }, { "epoch": 34.766, "grad_norm": 1.1972640752792358, "learning_rate": 2e-05, "loss": 0.04556664, "step": 17383 }, { "epoch": 34.768, "grad_norm": 1.0919245481491089, "learning_rate": 2e-05, "loss": 0.03124575, "step": 17384 }, { "epoch": 34.77, "grad_norm": 1.072306513786316, "learning_rate": 2e-05, "loss": 0.03128473, "step": 17385 }, { "epoch": 34.772, "grad_norm": 1.295246958732605, "learning_rate": 2e-05, "loss": 0.05659188, "step": 17386 }, { "epoch": 34.774, "grad_norm": 0.5764150619506836, "learning_rate": 2e-05, "loss": 0.01222921, "step": 17387 }, { "epoch": 34.776, "grad_norm": 2.142376661300659, "learning_rate": 2e-05, "loss": 0.05650788, "step": 17388 }, { "epoch": 34.778, "grad_norm": 1.199610710144043, "learning_rate": 2e-05, "loss": 0.03026199, "step": 17389 }, { "epoch": 34.78, "grad_norm": 4.657124996185303, "learning_rate": 2e-05, "loss": 0.05358446, "step": 17390 }, { "epoch": 34.782, "grad_norm": 1.857397198677063, "learning_rate": 2e-05, "loss": 0.04083439, "step": 17391 }, { "epoch": 34.784, "grad_norm": 1.1742832660675049, "learning_rate": 2e-05, "loss": 0.04639963, "step": 17392 }, { "epoch": 34.786, "grad_norm": 1.3055615425109863, "learning_rate": 2e-05, "loss": 0.03700022, "step": 17393 }, { "epoch": 34.788, "grad_norm": 1.1897599697113037, "learning_rate": 2e-05, "loss": 0.03541516, "step": 17394 }, { "epoch": 34.79, "grad_norm": 0.9236932992935181, "learning_rate": 2e-05, "loss": 0.02803895, "step": 17395 }, { "epoch": 34.792, "grad_norm": 1.2352205514907837, "learning_rate": 2e-05, "loss": 0.05203598, "step": 17396 }, { "epoch": 34.794, "grad_norm": 1.137605905532837, "learning_rate": 2e-05, "loss": 0.04387431, "step": 17397 }, { "epoch": 34.796, "grad_norm": 1.4612951278686523, "learning_rate": 2e-05, "loss": 0.03690141, "step": 17398 }, { "epoch": 34.798, "grad_norm": 1.1920640468597412, "learning_rate": 2e-05, "loss": 0.03032911, "step": 17399 }, { "epoch": 34.8, "grad_norm": 1.4039093255996704, "learning_rate": 2e-05, "loss": 0.04082662, "step": 17400 }, { "epoch": 34.802, "grad_norm": 1.3465620279312134, "learning_rate": 2e-05, "loss": 0.04480909, "step": 17401 }, { "epoch": 34.804, "grad_norm": 1.015446424484253, "learning_rate": 2e-05, "loss": 0.04655547, "step": 17402 }, { "epoch": 34.806, "grad_norm": 1.3519433736801147, "learning_rate": 2e-05, "loss": 0.0522088, "step": 17403 }, { "epoch": 34.808, "grad_norm": 1.9576835632324219, "learning_rate": 2e-05, "loss": 0.04513229, "step": 17404 }, { "epoch": 34.81, "grad_norm": 1.3243753910064697, "learning_rate": 2e-05, "loss": 0.0440766, "step": 17405 }, { "epoch": 34.812, "grad_norm": 2.486659526824951, "learning_rate": 2e-05, "loss": 0.04024377, "step": 17406 }, { "epoch": 34.814, "grad_norm": 1.11258065700531, "learning_rate": 2e-05, "loss": 0.03342846, "step": 17407 }, { "epoch": 34.816, "grad_norm": 1.34394109249115, "learning_rate": 2e-05, "loss": 0.0359684, "step": 17408 }, { "epoch": 34.818, "grad_norm": 1.8080177307128906, "learning_rate": 2e-05, "loss": 0.04266899, "step": 17409 }, { "epoch": 34.82, "grad_norm": 0.9578447937965393, "learning_rate": 2e-05, "loss": 0.02569779, "step": 17410 }, { "epoch": 34.822, "grad_norm": 1.6380552053451538, "learning_rate": 2e-05, "loss": 0.04872344, "step": 17411 }, { "epoch": 34.824, "grad_norm": 1.7012923955917358, "learning_rate": 2e-05, "loss": 0.03796242, "step": 17412 }, { "epoch": 34.826, "grad_norm": 1.7097322940826416, "learning_rate": 2e-05, "loss": 0.04473326, "step": 17413 }, { "epoch": 34.828, "grad_norm": 0.8996273279190063, "learning_rate": 2e-05, "loss": 0.02824127, "step": 17414 }, { "epoch": 34.83, "grad_norm": 1.1458615064620972, "learning_rate": 2e-05, "loss": 0.04492954, "step": 17415 }, { "epoch": 34.832, "grad_norm": 1.754016637802124, "learning_rate": 2e-05, "loss": 0.04475306, "step": 17416 }, { "epoch": 34.834, "grad_norm": 1.4149738550186157, "learning_rate": 2e-05, "loss": 0.05934685, "step": 17417 }, { "epoch": 34.836, "grad_norm": 1.1486657857894897, "learning_rate": 2e-05, "loss": 0.03290381, "step": 17418 }, { "epoch": 34.838, "grad_norm": 1.1294008493423462, "learning_rate": 2e-05, "loss": 0.0412181, "step": 17419 }, { "epoch": 34.84, "grad_norm": 1.3856641054153442, "learning_rate": 2e-05, "loss": 0.04381644, "step": 17420 }, { "epoch": 34.842, "grad_norm": 1.5705233812332153, "learning_rate": 2e-05, "loss": 0.0567019, "step": 17421 }, { "epoch": 34.844, "grad_norm": 2.1528782844543457, "learning_rate": 2e-05, "loss": 0.03446703, "step": 17422 }, { "epoch": 34.846, "grad_norm": 3.2811214923858643, "learning_rate": 2e-05, "loss": 0.05094865, "step": 17423 }, { "epoch": 34.848, "grad_norm": 1.390089988708496, "learning_rate": 2e-05, "loss": 0.06304034, "step": 17424 }, { "epoch": 34.85, "grad_norm": 1.1673579216003418, "learning_rate": 2e-05, "loss": 0.03815352, "step": 17425 }, { "epoch": 34.852, "grad_norm": 1.155975103378296, "learning_rate": 2e-05, "loss": 0.04023435, "step": 17426 }, { "epoch": 34.854, "grad_norm": 2.51444673538208, "learning_rate": 2e-05, "loss": 0.03916804, "step": 17427 }, { "epoch": 34.856, "grad_norm": 4.942361354827881, "learning_rate": 2e-05, "loss": 0.06820999, "step": 17428 }, { "epoch": 34.858, "grad_norm": 1.0035266876220703, "learning_rate": 2e-05, "loss": 0.03401071, "step": 17429 }, { "epoch": 34.86, "grad_norm": 1.0705126523971558, "learning_rate": 2e-05, "loss": 0.02997341, "step": 17430 }, { "epoch": 34.862, "grad_norm": 1.0698009729385376, "learning_rate": 2e-05, "loss": 0.03294614, "step": 17431 }, { "epoch": 34.864, "grad_norm": 1.062578558921814, "learning_rate": 2e-05, "loss": 0.04528739, "step": 17432 }, { "epoch": 34.866, "grad_norm": 1.1648660898208618, "learning_rate": 2e-05, "loss": 0.04851899, "step": 17433 }, { "epoch": 34.868, "grad_norm": 1.1196273565292358, "learning_rate": 2e-05, "loss": 0.04238444, "step": 17434 }, { "epoch": 34.87, "grad_norm": 1.043180227279663, "learning_rate": 2e-05, "loss": 0.04557565, "step": 17435 }, { "epoch": 34.872, "grad_norm": 1.079825520515442, "learning_rate": 2e-05, "loss": 0.04296175, "step": 17436 }, { "epoch": 34.874, "grad_norm": 1.0005967617034912, "learning_rate": 2e-05, "loss": 0.02957804, "step": 17437 }, { "epoch": 34.876, "grad_norm": 1.1389814615249634, "learning_rate": 2e-05, "loss": 0.0373849, "step": 17438 }, { "epoch": 34.878, "grad_norm": 1.3328295946121216, "learning_rate": 2e-05, "loss": 0.03476042, "step": 17439 }, { "epoch": 34.88, "grad_norm": 1.4265692234039307, "learning_rate": 2e-05, "loss": 0.05438258, "step": 17440 }, { "epoch": 34.882, "grad_norm": 1.3445887565612793, "learning_rate": 2e-05, "loss": 0.04738908, "step": 17441 }, { "epoch": 34.884, "grad_norm": 1.100516676902771, "learning_rate": 2e-05, "loss": 0.03541394, "step": 17442 }, { "epoch": 34.886, "grad_norm": 1.2599114179611206, "learning_rate": 2e-05, "loss": 0.05563427, "step": 17443 }, { "epoch": 34.888, "grad_norm": 1.3111646175384521, "learning_rate": 2e-05, "loss": 0.04711374, "step": 17444 }, { "epoch": 34.89, "grad_norm": 1.0831414461135864, "learning_rate": 2e-05, "loss": 0.03449087, "step": 17445 }, { "epoch": 34.892, "grad_norm": 1.390146255493164, "learning_rate": 2e-05, "loss": 0.04913954, "step": 17446 }, { "epoch": 34.894, "grad_norm": 1.4582270383834839, "learning_rate": 2e-05, "loss": 0.04977434, "step": 17447 }, { "epoch": 34.896, "grad_norm": 1.040456771850586, "learning_rate": 2e-05, "loss": 0.04064398, "step": 17448 }, { "epoch": 34.898, "grad_norm": 1.125290036201477, "learning_rate": 2e-05, "loss": 0.0368836, "step": 17449 }, { "epoch": 34.9, "grad_norm": 1.6109248399734497, "learning_rate": 2e-05, "loss": 0.05745726, "step": 17450 }, { "epoch": 34.902, "grad_norm": 1.6481939554214478, "learning_rate": 2e-05, "loss": 0.05567736, "step": 17451 }, { "epoch": 34.904, "grad_norm": 1.1878528594970703, "learning_rate": 2e-05, "loss": 0.03877228, "step": 17452 }, { "epoch": 34.906, "grad_norm": 1.0018279552459717, "learning_rate": 2e-05, "loss": 0.03138274, "step": 17453 }, { "epoch": 34.908, "grad_norm": 1.4977102279663086, "learning_rate": 2e-05, "loss": 0.03562863, "step": 17454 }, { "epoch": 34.91, "grad_norm": 1.0117096900939941, "learning_rate": 2e-05, "loss": 0.03509045, "step": 17455 }, { "epoch": 34.912, "grad_norm": 1.3096954822540283, "learning_rate": 2e-05, "loss": 0.04645215, "step": 17456 }, { "epoch": 34.914, "grad_norm": 1.7684214115142822, "learning_rate": 2e-05, "loss": 0.04929952, "step": 17457 }, { "epoch": 34.916, "grad_norm": 1.0269994735717773, "learning_rate": 2e-05, "loss": 0.03455571, "step": 17458 }, { "epoch": 34.918, "grad_norm": 0.9055759310722351, "learning_rate": 2e-05, "loss": 0.03060112, "step": 17459 }, { "epoch": 34.92, "grad_norm": 1.3847931623458862, "learning_rate": 2e-05, "loss": 0.05905629, "step": 17460 }, { "epoch": 34.922, "grad_norm": 0.8554072380065918, "learning_rate": 2e-05, "loss": 0.02603616, "step": 17461 }, { "epoch": 34.924, "grad_norm": 1.3800654411315918, "learning_rate": 2e-05, "loss": 0.04450605, "step": 17462 }, { "epoch": 34.926, "grad_norm": 1.0499212741851807, "learning_rate": 2e-05, "loss": 0.03139212, "step": 17463 }, { "epoch": 34.928, "grad_norm": 1.5442363023757935, "learning_rate": 2e-05, "loss": 0.05026272, "step": 17464 }, { "epoch": 34.93, "grad_norm": 1.0467393398284912, "learning_rate": 2e-05, "loss": 0.03508221, "step": 17465 }, { "epoch": 34.932, "grad_norm": 1.9836903810501099, "learning_rate": 2e-05, "loss": 0.03904892, "step": 17466 }, { "epoch": 34.934, "grad_norm": 1.667801022529602, "learning_rate": 2e-05, "loss": 0.06595255, "step": 17467 }, { "epoch": 34.936, "grad_norm": 1.248084545135498, "learning_rate": 2e-05, "loss": 0.04658974, "step": 17468 }, { "epoch": 34.938, "grad_norm": 1.1718586683273315, "learning_rate": 2e-05, "loss": 0.04607401, "step": 17469 }, { "epoch": 34.94, "grad_norm": 1.562265396118164, "learning_rate": 2e-05, "loss": 0.05189687, "step": 17470 }, { "epoch": 34.942, "grad_norm": 2.5033798217773438, "learning_rate": 2e-05, "loss": 0.04024173, "step": 17471 }, { "epoch": 34.944, "grad_norm": 1.3833789825439453, "learning_rate": 2e-05, "loss": 0.04375815, "step": 17472 }, { "epoch": 34.946, "grad_norm": 1.2791060209274292, "learning_rate": 2e-05, "loss": 0.03404561, "step": 17473 }, { "epoch": 34.948, "grad_norm": 4.338647365570068, "learning_rate": 2e-05, "loss": 0.05873897, "step": 17474 }, { "epoch": 34.95, "grad_norm": 1.4956046342849731, "learning_rate": 2e-05, "loss": 0.04620642, "step": 17475 }, { "epoch": 34.952, "grad_norm": 1.1326979398727417, "learning_rate": 2e-05, "loss": 0.04122002, "step": 17476 }, { "epoch": 34.954, "grad_norm": 1.0940099954605103, "learning_rate": 2e-05, "loss": 0.04148469, "step": 17477 }, { "epoch": 34.956, "grad_norm": 1.1113457679748535, "learning_rate": 2e-05, "loss": 0.04638319, "step": 17478 }, { "epoch": 34.958, "grad_norm": 1.088361144065857, "learning_rate": 2e-05, "loss": 0.04051323, "step": 17479 }, { "epoch": 34.96, "grad_norm": 1.1193358898162842, "learning_rate": 2e-05, "loss": 0.04485134, "step": 17480 }, { "epoch": 34.962, "grad_norm": 1.2051522731781006, "learning_rate": 2e-05, "loss": 0.03799698, "step": 17481 }, { "epoch": 34.964, "grad_norm": 1.1342461109161377, "learning_rate": 2e-05, "loss": 0.03318293, "step": 17482 }, { "epoch": 34.966, "grad_norm": 1.2009114027023315, "learning_rate": 2e-05, "loss": 0.04339875, "step": 17483 }, { "epoch": 34.968, "grad_norm": 1.1767265796661377, "learning_rate": 2e-05, "loss": 0.04513696, "step": 17484 }, { "epoch": 34.97, "grad_norm": 1.503244400024414, "learning_rate": 2e-05, "loss": 0.07590361, "step": 17485 }, { "epoch": 34.972, "grad_norm": 0.9458696246147156, "learning_rate": 2e-05, "loss": 0.03492884, "step": 17486 }, { "epoch": 34.974, "grad_norm": 1.2209609746932983, "learning_rate": 2e-05, "loss": 0.02998404, "step": 17487 }, { "epoch": 34.976, "grad_norm": 1.2666972875595093, "learning_rate": 2e-05, "loss": 0.05165854, "step": 17488 }, { "epoch": 34.978, "grad_norm": 1.2078595161437988, "learning_rate": 2e-05, "loss": 0.04767955, "step": 17489 }, { "epoch": 34.98, "grad_norm": 1.205334186553955, "learning_rate": 2e-05, "loss": 0.05208175, "step": 17490 }, { "epoch": 34.982, "grad_norm": 1.0179728269577026, "learning_rate": 2e-05, "loss": 0.03114939, "step": 17491 }, { "epoch": 34.984, "grad_norm": 1.8329960107803345, "learning_rate": 2e-05, "loss": 0.06110509, "step": 17492 }, { "epoch": 34.986, "grad_norm": 1.045487880706787, "learning_rate": 2e-05, "loss": 0.02562892, "step": 17493 }, { "epoch": 34.988, "grad_norm": 1.160578966140747, "learning_rate": 2e-05, "loss": 0.04529508, "step": 17494 }, { "epoch": 34.99, "grad_norm": 1.1257965564727783, "learning_rate": 2e-05, "loss": 0.04581528, "step": 17495 }, { "epoch": 34.992, "grad_norm": 1.1430251598358154, "learning_rate": 2e-05, "loss": 0.05036756, "step": 17496 }, { "epoch": 34.994, "grad_norm": 1.6295039653778076, "learning_rate": 2e-05, "loss": 0.04688197, "step": 17497 }, { "epoch": 34.996, "grad_norm": 1.4103097915649414, "learning_rate": 2e-05, "loss": 0.05346009, "step": 17498 }, { "epoch": 34.998, "grad_norm": 1.2611072063446045, "learning_rate": 2e-05, "loss": 0.0430942, "step": 17499 }, { "epoch": 35.0, "grad_norm": 2.2868216037750244, "learning_rate": 2e-05, "loss": 0.04770452, "step": 17500 }, { "epoch": 35.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9820359281437125, "Equal_1": 1.0, "Equal_2": 0.9780439121756487, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.996, "Perpendicular_3": 0.9038076152304609, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.988, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9900199600798403 }, "eval_runtime": 319.8873, "eval_samples_per_second": 32.824, "eval_steps_per_second": 0.656, "step": 17500 }, { "epoch": 35.002, "grad_norm": 1.9222464561462402, "learning_rate": 2e-05, "loss": 0.03432529, "step": 17501 }, { "epoch": 35.004, "grad_norm": 1.4019160270690918, "learning_rate": 2e-05, "loss": 0.03604925, "step": 17502 }, { "epoch": 35.006, "grad_norm": 0.9827688336372375, "learning_rate": 2e-05, "loss": 0.02998075, "step": 17503 }, { "epoch": 35.008, "grad_norm": 1.1721317768096924, "learning_rate": 2e-05, "loss": 0.04828912, "step": 17504 }, { "epoch": 35.01, "grad_norm": 1.120781421661377, "learning_rate": 2e-05, "loss": 0.04639394, "step": 17505 }, { "epoch": 35.012, "grad_norm": 1.3322588205337524, "learning_rate": 2e-05, "loss": 0.03849679, "step": 17506 }, { "epoch": 35.014, "grad_norm": 1.1528602838516235, "learning_rate": 2e-05, "loss": 0.04227344, "step": 17507 }, { "epoch": 35.016, "grad_norm": 1.0254783630371094, "learning_rate": 2e-05, "loss": 0.02713573, "step": 17508 }, { "epoch": 35.018, "grad_norm": 2.194639205932617, "learning_rate": 2e-05, "loss": 0.06066288, "step": 17509 }, { "epoch": 35.02, "grad_norm": 1.1950559616088867, "learning_rate": 2e-05, "loss": 0.05654139, "step": 17510 }, { "epoch": 35.022, "grad_norm": 1.4963222742080688, "learning_rate": 2e-05, "loss": 0.03999151, "step": 17511 }, { "epoch": 35.024, "grad_norm": 1.4710837602615356, "learning_rate": 2e-05, "loss": 0.05531586, "step": 17512 }, { "epoch": 35.026, "grad_norm": 1.7270408868789673, "learning_rate": 2e-05, "loss": 0.04574881, "step": 17513 }, { "epoch": 35.028, "grad_norm": 1.1254507303237915, "learning_rate": 2e-05, "loss": 0.04714286, "step": 17514 }, { "epoch": 35.03, "grad_norm": 1.2321765422821045, "learning_rate": 2e-05, "loss": 0.0381449, "step": 17515 }, { "epoch": 35.032, "grad_norm": 1.3832772970199585, "learning_rate": 2e-05, "loss": 0.04403721, "step": 17516 }, { "epoch": 35.034, "grad_norm": 2.5237765312194824, "learning_rate": 2e-05, "loss": 0.0705086, "step": 17517 }, { "epoch": 35.036, "grad_norm": 1.3510403633117676, "learning_rate": 2e-05, "loss": 0.04805104, "step": 17518 }, { "epoch": 35.038, "grad_norm": 1.1089110374450684, "learning_rate": 2e-05, "loss": 0.04748667, "step": 17519 }, { "epoch": 35.04, "grad_norm": 1.0043740272521973, "learning_rate": 2e-05, "loss": 0.03443906, "step": 17520 }, { "epoch": 35.042, "grad_norm": 1.2505918741226196, "learning_rate": 2e-05, "loss": 0.0434991, "step": 17521 }, { "epoch": 35.044, "grad_norm": 1.0381025075912476, "learning_rate": 2e-05, "loss": 0.04269955, "step": 17522 }, { "epoch": 35.046, "grad_norm": 1.2612941265106201, "learning_rate": 2e-05, "loss": 0.04119605, "step": 17523 }, { "epoch": 35.048, "grad_norm": 2.3623406887054443, "learning_rate": 2e-05, "loss": 0.03740188, "step": 17524 }, { "epoch": 35.05, "grad_norm": 1.2907884120941162, "learning_rate": 2e-05, "loss": 0.04235482, "step": 17525 }, { "epoch": 35.052, "grad_norm": 0.83847975730896, "learning_rate": 2e-05, "loss": 0.02778682, "step": 17526 }, { "epoch": 35.054, "grad_norm": 1.2352306842803955, "learning_rate": 2e-05, "loss": 0.04302741, "step": 17527 }, { "epoch": 35.056, "grad_norm": 1.0656414031982422, "learning_rate": 2e-05, "loss": 0.04405284, "step": 17528 }, { "epoch": 35.058, "grad_norm": 2.7260079383850098, "learning_rate": 2e-05, "loss": 0.04868072, "step": 17529 }, { "epoch": 35.06, "grad_norm": 0.9766820073127747, "learning_rate": 2e-05, "loss": 0.03807481, "step": 17530 }, { "epoch": 35.062, "grad_norm": 0.8948190808296204, "learning_rate": 2e-05, "loss": 0.02815506, "step": 17531 }, { "epoch": 35.064, "grad_norm": 2.2773566246032715, "learning_rate": 2e-05, "loss": 0.05238456, "step": 17532 }, { "epoch": 35.066, "grad_norm": 1.1301003694534302, "learning_rate": 2e-05, "loss": 0.04259311, "step": 17533 }, { "epoch": 35.068, "grad_norm": 1.2741830348968506, "learning_rate": 2e-05, "loss": 0.06291425, "step": 17534 }, { "epoch": 35.07, "grad_norm": 1.1298288106918335, "learning_rate": 2e-05, "loss": 0.04579066, "step": 17535 }, { "epoch": 35.072, "grad_norm": 1.111412525177002, "learning_rate": 2e-05, "loss": 0.02930526, "step": 17536 }, { "epoch": 35.074, "grad_norm": 1.3507663011550903, "learning_rate": 2e-05, "loss": 0.06096983, "step": 17537 }, { "epoch": 35.076, "grad_norm": 1.3331928253173828, "learning_rate": 2e-05, "loss": 0.06763585, "step": 17538 }, { "epoch": 35.078, "grad_norm": 1.7155866622924805, "learning_rate": 2e-05, "loss": 0.04312644, "step": 17539 }, { "epoch": 35.08, "grad_norm": 1.359623908996582, "learning_rate": 2e-05, "loss": 0.04029921, "step": 17540 }, { "epoch": 35.082, "grad_norm": 1.0476104021072388, "learning_rate": 2e-05, "loss": 0.04592532, "step": 17541 }, { "epoch": 35.084, "grad_norm": 2.1303536891937256, "learning_rate": 2e-05, "loss": 0.03890026, "step": 17542 }, { "epoch": 35.086, "grad_norm": 1.1645375490188599, "learning_rate": 2e-05, "loss": 0.04217415, "step": 17543 }, { "epoch": 35.088, "grad_norm": 1.1522724628448486, "learning_rate": 2e-05, "loss": 0.05060149, "step": 17544 }, { "epoch": 35.09, "grad_norm": 0.8973413705825806, "learning_rate": 2e-05, "loss": 0.02097614, "step": 17545 }, { "epoch": 35.092, "grad_norm": 1.3803879022598267, "learning_rate": 2e-05, "loss": 0.04214935, "step": 17546 }, { "epoch": 35.094, "grad_norm": 1.174539566040039, "learning_rate": 2e-05, "loss": 0.04683707, "step": 17547 }, { "epoch": 35.096, "grad_norm": 1.838232398033142, "learning_rate": 2e-05, "loss": 0.04326537, "step": 17548 }, { "epoch": 35.098, "grad_norm": 1.5689642429351807, "learning_rate": 2e-05, "loss": 0.04072879, "step": 17549 }, { "epoch": 35.1, "grad_norm": 1.0995928049087524, "learning_rate": 2e-05, "loss": 0.03880709, "step": 17550 }, { "epoch": 35.102, "grad_norm": 0.8378002047538757, "learning_rate": 2e-05, "loss": 0.02281972, "step": 17551 }, { "epoch": 35.104, "grad_norm": 1.1497136354446411, "learning_rate": 2e-05, "loss": 0.04842537, "step": 17552 }, { "epoch": 35.106, "grad_norm": 1.2610912322998047, "learning_rate": 2e-05, "loss": 0.04262506, "step": 17553 }, { "epoch": 35.108, "grad_norm": 0.969350278377533, "learning_rate": 2e-05, "loss": 0.03585976, "step": 17554 }, { "epoch": 35.11, "grad_norm": 1.0584502220153809, "learning_rate": 2e-05, "loss": 0.03664526, "step": 17555 }, { "epoch": 35.112, "grad_norm": 1.3232938051223755, "learning_rate": 2e-05, "loss": 0.05472728, "step": 17556 }, { "epoch": 35.114, "grad_norm": 1.1075985431671143, "learning_rate": 2e-05, "loss": 0.05077582, "step": 17557 }, { "epoch": 35.116, "grad_norm": 1.2517019510269165, "learning_rate": 2e-05, "loss": 0.05858276, "step": 17558 }, { "epoch": 35.118, "grad_norm": 1.2317686080932617, "learning_rate": 2e-05, "loss": 0.04953536, "step": 17559 }, { "epoch": 35.12, "grad_norm": 2.1341912746429443, "learning_rate": 2e-05, "loss": 0.03007166, "step": 17560 }, { "epoch": 35.122, "grad_norm": 2.465643882751465, "learning_rate": 2e-05, "loss": 0.03830947, "step": 17561 }, { "epoch": 35.124, "grad_norm": 1.5876415967941284, "learning_rate": 2e-05, "loss": 0.03763131, "step": 17562 }, { "epoch": 35.126, "grad_norm": 1.1403216123580933, "learning_rate": 2e-05, "loss": 0.04078696, "step": 17563 }, { "epoch": 35.128, "grad_norm": 1.7711175680160522, "learning_rate": 2e-05, "loss": 0.05040234, "step": 17564 }, { "epoch": 35.13, "grad_norm": 1.334191918373108, "learning_rate": 2e-05, "loss": 0.04977368, "step": 17565 }, { "epoch": 35.132, "grad_norm": 1.9622347354888916, "learning_rate": 2e-05, "loss": 0.04543035, "step": 17566 }, { "epoch": 35.134, "grad_norm": 1.4173260927200317, "learning_rate": 2e-05, "loss": 0.05295032, "step": 17567 }, { "epoch": 35.136, "grad_norm": 1.4834239482879639, "learning_rate": 2e-05, "loss": 0.04917359, "step": 17568 }, { "epoch": 35.138, "grad_norm": 1.31399405002594, "learning_rate": 2e-05, "loss": 0.03308884, "step": 17569 }, { "epoch": 35.14, "grad_norm": 1.307672142982483, "learning_rate": 2e-05, "loss": 0.04067755, "step": 17570 }, { "epoch": 35.142, "grad_norm": 1.3411407470703125, "learning_rate": 2e-05, "loss": 0.041333, "step": 17571 }, { "epoch": 35.144, "grad_norm": 5.57957649230957, "learning_rate": 2e-05, "loss": 0.04797975, "step": 17572 }, { "epoch": 35.146, "grad_norm": 1.3772554397583008, "learning_rate": 2e-05, "loss": 0.05607633, "step": 17573 }, { "epoch": 35.148, "grad_norm": 2.2462568283081055, "learning_rate": 2e-05, "loss": 0.06492513, "step": 17574 }, { "epoch": 35.15, "grad_norm": 1.0662626028060913, "learning_rate": 2e-05, "loss": 0.05135501, "step": 17575 }, { "epoch": 35.152, "grad_norm": 1.4305570125579834, "learning_rate": 2e-05, "loss": 0.04058772, "step": 17576 }, { "epoch": 35.154, "grad_norm": 1.031005620956421, "learning_rate": 2e-05, "loss": 0.0381646, "step": 17577 }, { "epoch": 35.156, "grad_norm": 1.168686866760254, "learning_rate": 2e-05, "loss": 0.04385469, "step": 17578 }, { "epoch": 35.158, "grad_norm": 0.9006215333938599, "learning_rate": 2e-05, "loss": 0.03294571, "step": 17579 }, { "epoch": 35.16, "grad_norm": 1.1258612871170044, "learning_rate": 2e-05, "loss": 0.05022587, "step": 17580 }, { "epoch": 35.162, "grad_norm": 1.2660801410675049, "learning_rate": 2e-05, "loss": 0.06481211, "step": 17581 }, { "epoch": 35.164, "grad_norm": 1.8085393905639648, "learning_rate": 2e-05, "loss": 0.03045495, "step": 17582 }, { "epoch": 35.166, "grad_norm": 1.5225801467895508, "learning_rate": 2e-05, "loss": 0.04185986, "step": 17583 }, { "epoch": 35.168, "grad_norm": 1.3285081386566162, "learning_rate": 2e-05, "loss": 0.03883301, "step": 17584 }, { "epoch": 35.17, "grad_norm": 0.9945167899131775, "learning_rate": 2e-05, "loss": 0.03533852, "step": 17585 }, { "epoch": 35.172, "grad_norm": 1.0777450799942017, "learning_rate": 2e-05, "loss": 0.03911363, "step": 17586 }, { "epoch": 35.174, "grad_norm": 0.8671603202819824, "learning_rate": 2e-05, "loss": 0.02011818, "step": 17587 }, { "epoch": 35.176, "grad_norm": 1.2364228963851929, "learning_rate": 2e-05, "loss": 0.04776933, "step": 17588 }, { "epoch": 35.178, "grad_norm": 1.3007594347000122, "learning_rate": 2e-05, "loss": 0.04221889, "step": 17589 }, { "epoch": 35.18, "grad_norm": 1.1037037372589111, "learning_rate": 2e-05, "loss": 0.03446237, "step": 17590 }, { "epoch": 35.182, "grad_norm": 1.2064841985702515, "learning_rate": 2e-05, "loss": 0.04089108, "step": 17591 }, { "epoch": 35.184, "grad_norm": 0.9847924113273621, "learning_rate": 2e-05, "loss": 0.0352126, "step": 17592 }, { "epoch": 35.186, "grad_norm": 1.4962389469146729, "learning_rate": 2e-05, "loss": 0.06021068, "step": 17593 }, { "epoch": 35.188, "grad_norm": 1.4527535438537598, "learning_rate": 2e-05, "loss": 0.0581253, "step": 17594 }, { "epoch": 35.19, "grad_norm": 1.0838546752929688, "learning_rate": 2e-05, "loss": 0.03708592, "step": 17595 }, { "epoch": 35.192, "grad_norm": 1.216435432434082, "learning_rate": 2e-05, "loss": 0.0478345, "step": 17596 }, { "epoch": 35.194, "grad_norm": 3.266355514526367, "learning_rate": 2e-05, "loss": 0.04139251, "step": 17597 }, { "epoch": 35.196, "grad_norm": 2.6940810680389404, "learning_rate": 2e-05, "loss": 0.04337883, "step": 17598 }, { "epoch": 35.198, "grad_norm": 1.02515709400177, "learning_rate": 2e-05, "loss": 0.0304161, "step": 17599 }, { "epoch": 35.2, "grad_norm": 1.0762580633163452, "learning_rate": 2e-05, "loss": 0.04152739, "step": 17600 }, { "epoch": 35.202, "grad_norm": 2.462306499481201, "learning_rate": 2e-05, "loss": 0.03642452, "step": 17601 }, { "epoch": 35.204, "grad_norm": 1.2067070007324219, "learning_rate": 2e-05, "loss": 0.04705513, "step": 17602 }, { "epoch": 35.206, "grad_norm": 1.4548958539962769, "learning_rate": 2e-05, "loss": 0.06413062, "step": 17603 }, { "epoch": 35.208, "grad_norm": 1.3760452270507812, "learning_rate": 2e-05, "loss": 0.05110206, "step": 17604 }, { "epoch": 35.21, "grad_norm": 1.0767890214920044, "learning_rate": 2e-05, "loss": 0.03728911, "step": 17605 }, { "epoch": 35.212, "grad_norm": 1.1798242330551147, "learning_rate": 2e-05, "loss": 0.04494874, "step": 17606 }, { "epoch": 35.214, "grad_norm": 1.1159026622772217, "learning_rate": 2e-05, "loss": 0.04022665, "step": 17607 }, { "epoch": 35.216, "grad_norm": 1.239780306816101, "learning_rate": 2e-05, "loss": 0.04985084, "step": 17608 }, { "epoch": 35.218, "grad_norm": 1.381771445274353, "learning_rate": 2e-05, "loss": 0.02961168, "step": 17609 }, { "epoch": 35.22, "grad_norm": 1.4819316864013672, "learning_rate": 2e-05, "loss": 0.04220682, "step": 17610 }, { "epoch": 35.222, "grad_norm": 1.0734423398971558, "learning_rate": 2e-05, "loss": 0.03761606, "step": 17611 }, { "epoch": 35.224, "grad_norm": 1.9286874532699585, "learning_rate": 2e-05, "loss": 0.04495767, "step": 17612 }, { "epoch": 35.226, "grad_norm": 1.3085813522338867, "learning_rate": 2e-05, "loss": 0.04169934, "step": 17613 }, { "epoch": 35.228, "grad_norm": 1.250126838684082, "learning_rate": 2e-05, "loss": 0.04465639, "step": 17614 }, { "epoch": 35.23, "grad_norm": 1.335493803024292, "learning_rate": 2e-05, "loss": 0.04012146, "step": 17615 }, { "epoch": 35.232, "grad_norm": 1.5321751832962036, "learning_rate": 2e-05, "loss": 0.06640027, "step": 17616 }, { "epoch": 35.234, "grad_norm": 2.0908942222595215, "learning_rate": 2e-05, "loss": 0.05824264, "step": 17617 }, { "epoch": 35.236, "grad_norm": 1.1407338380813599, "learning_rate": 2e-05, "loss": 0.04133571, "step": 17618 }, { "epoch": 35.238, "grad_norm": 1.3628578186035156, "learning_rate": 2e-05, "loss": 0.03877375, "step": 17619 }, { "epoch": 35.24, "grad_norm": 3.9366886615753174, "learning_rate": 2e-05, "loss": 0.0478052, "step": 17620 }, { "epoch": 35.242, "grad_norm": 2.441206216812134, "learning_rate": 2e-05, "loss": 0.03485952, "step": 17621 }, { "epoch": 35.244, "grad_norm": 2.0725955963134766, "learning_rate": 2e-05, "loss": 0.03543587, "step": 17622 }, { "epoch": 35.246, "grad_norm": 1.0343211889266968, "learning_rate": 2e-05, "loss": 0.03453851, "step": 17623 }, { "epoch": 35.248, "grad_norm": 0.9420909881591797, "learning_rate": 2e-05, "loss": 0.02860404, "step": 17624 }, { "epoch": 35.25, "grad_norm": 1.3072288036346436, "learning_rate": 2e-05, "loss": 0.04773654, "step": 17625 }, { "epoch": 35.252, "grad_norm": 1.3463304042816162, "learning_rate": 2e-05, "loss": 0.03698765, "step": 17626 }, { "epoch": 35.254, "grad_norm": 2.281853199005127, "learning_rate": 2e-05, "loss": 0.05275848, "step": 17627 }, { "epoch": 35.256, "grad_norm": 1.1776528358459473, "learning_rate": 2e-05, "loss": 0.02095211, "step": 17628 }, { "epoch": 35.258, "grad_norm": 1.3505492210388184, "learning_rate": 2e-05, "loss": 0.04562983, "step": 17629 }, { "epoch": 35.26, "grad_norm": 1.349131464958191, "learning_rate": 2e-05, "loss": 0.03608799, "step": 17630 }, { "epoch": 35.262, "grad_norm": 1.218159794807434, "learning_rate": 2e-05, "loss": 0.0397279, "step": 17631 }, { "epoch": 35.264, "grad_norm": 2.076681137084961, "learning_rate": 2e-05, "loss": 0.06574233, "step": 17632 }, { "epoch": 35.266, "grad_norm": 1.0747252702713013, "learning_rate": 2e-05, "loss": 0.04180413, "step": 17633 }, { "epoch": 35.268, "grad_norm": 1.5515995025634766, "learning_rate": 2e-05, "loss": 0.05342175, "step": 17634 }, { "epoch": 35.27, "grad_norm": 1.4581546783447266, "learning_rate": 2e-05, "loss": 0.06602959, "step": 17635 }, { "epoch": 35.272, "grad_norm": 3.7796173095703125, "learning_rate": 2e-05, "loss": 0.0564354, "step": 17636 }, { "epoch": 35.274, "grad_norm": 3.0336711406707764, "learning_rate": 2e-05, "loss": 0.05401599, "step": 17637 }, { "epoch": 35.276, "grad_norm": 1.2968264818191528, "learning_rate": 2e-05, "loss": 0.0414682, "step": 17638 }, { "epoch": 35.278, "grad_norm": 2.132004737854004, "learning_rate": 2e-05, "loss": 0.0513651, "step": 17639 }, { "epoch": 35.28, "grad_norm": 2.880918025970459, "learning_rate": 2e-05, "loss": 0.04456554, "step": 17640 }, { "epoch": 35.282, "grad_norm": 1.0972630977630615, "learning_rate": 2e-05, "loss": 0.04144361, "step": 17641 }, { "epoch": 35.284, "grad_norm": 1.0943173170089722, "learning_rate": 2e-05, "loss": 0.04478058, "step": 17642 }, { "epoch": 35.286, "grad_norm": 1.1014378070831299, "learning_rate": 2e-05, "loss": 0.04401636, "step": 17643 }, { "epoch": 35.288, "grad_norm": 1.2285698652267456, "learning_rate": 2e-05, "loss": 0.04485837, "step": 17644 }, { "epoch": 35.29, "grad_norm": 1.2702783346176147, "learning_rate": 2e-05, "loss": 0.0508531, "step": 17645 }, { "epoch": 35.292, "grad_norm": 1.179574728012085, "learning_rate": 2e-05, "loss": 0.03975598, "step": 17646 }, { "epoch": 35.294, "grad_norm": 0.9671401381492615, "learning_rate": 2e-05, "loss": 0.03313099, "step": 17647 }, { "epoch": 35.296, "grad_norm": 1.1078392267227173, "learning_rate": 2e-05, "loss": 0.03759495, "step": 17648 }, { "epoch": 35.298, "grad_norm": 1.5418148040771484, "learning_rate": 2e-05, "loss": 0.0561055, "step": 17649 }, { "epoch": 35.3, "grad_norm": 1.1559892892837524, "learning_rate": 2e-05, "loss": 0.04232976, "step": 17650 }, { "epoch": 35.302, "grad_norm": 1.9095799922943115, "learning_rate": 2e-05, "loss": 0.05348632, "step": 17651 }, { "epoch": 35.304, "grad_norm": 0.9116816520690918, "learning_rate": 2e-05, "loss": 0.03719225, "step": 17652 }, { "epoch": 35.306, "grad_norm": 1.5335626602172852, "learning_rate": 2e-05, "loss": 0.0475245, "step": 17653 }, { "epoch": 35.308, "grad_norm": 1.8686447143554688, "learning_rate": 2e-05, "loss": 0.0369437, "step": 17654 }, { "epoch": 35.31, "grad_norm": 1.4260104894638062, "learning_rate": 2e-05, "loss": 0.0563136, "step": 17655 }, { "epoch": 35.312, "grad_norm": 1.3176708221435547, "learning_rate": 2e-05, "loss": 0.04525885, "step": 17656 }, { "epoch": 35.314, "grad_norm": 1.062448263168335, "learning_rate": 2e-05, "loss": 0.03579092, "step": 17657 }, { "epoch": 35.316, "grad_norm": 0.8730036020278931, "learning_rate": 2e-05, "loss": 0.02629447, "step": 17658 }, { "epoch": 35.318, "grad_norm": 1.2714184522628784, "learning_rate": 2e-05, "loss": 0.03522924, "step": 17659 }, { "epoch": 35.32, "grad_norm": 1.3497344255447388, "learning_rate": 2e-05, "loss": 0.0463984, "step": 17660 }, { "epoch": 35.322, "grad_norm": 2.49761962890625, "learning_rate": 2e-05, "loss": 0.0529076, "step": 17661 }, { "epoch": 35.324, "grad_norm": 1.4788845777511597, "learning_rate": 2e-05, "loss": 0.04073741, "step": 17662 }, { "epoch": 35.326, "grad_norm": 1.1707643270492554, "learning_rate": 2e-05, "loss": 0.03425131, "step": 17663 }, { "epoch": 35.328, "grad_norm": 0.9647262692451477, "learning_rate": 2e-05, "loss": 0.02807866, "step": 17664 }, { "epoch": 35.33, "grad_norm": 1.2062174081802368, "learning_rate": 2e-05, "loss": 0.06155042, "step": 17665 }, { "epoch": 35.332, "grad_norm": 0.9606382250785828, "learning_rate": 2e-05, "loss": 0.03627588, "step": 17666 }, { "epoch": 35.334, "grad_norm": 1.414086937904358, "learning_rate": 2e-05, "loss": 0.04845129, "step": 17667 }, { "epoch": 35.336, "grad_norm": 1.6463829278945923, "learning_rate": 2e-05, "loss": 0.03860418, "step": 17668 }, { "epoch": 35.338, "grad_norm": 1.1206401586532593, "learning_rate": 2e-05, "loss": 0.04083535, "step": 17669 }, { "epoch": 35.34, "grad_norm": 2.738128662109375, "learning_rate": 2e-05, "loss": 0.06861308, "step": 17670 }, { "epoch": 35.342, "grad_norm": 2.6043827533721924, "learning_rate": 2e-05, "loss": 0.05943043, "step": 17671 }, { "epoch": 35.344, "grad_norm": 1.0954211950302124, "learning_rate": 2e-05, "loss": 0.04462023, "step": 17672 }, { "epoch": 35.346, "grad_norm": 1.1352951526641846, "learning_rate": 2e-05, "loss": 0.05026999, "step": 17673 }, { "epoch": 35.348, "grad_norm": 1.3933370113372803, "learning_rate": 2e-05, "loss": 0.03066497, "step": 17674 }, { "epoch": 35.35, "grad_norm": 1.0098553895950317, "learning_rate": 2e-05, "loss": 0.04665726, "step": 17675 }, { "epoch": 35.352, "grad_norm": 1.050323486328125, "learning_rate": 2e-05, "loss": 0.04057711, "step": 17676 }, { "epoch": 35.354, "grad_norm": 1.5547417402267456, "learning_rate": 2e-05, "loss": 0.04004311, "step": 17677 }, { "epoch": 35.356, "grad_norm": 0.8295509219169617, "learning_rate": 2e-05, "loss": 0.02311123, "step": 17678 }, { "epoch": 35.358, "grad_norm": 1.6722297668457031, "learning_rate": 2e-05, "loss": 0.04569897, "step": 17679 }, { "epoch": 35.36, "grad_norm": 1.2508385181427002, "learning_rate": 2e-05, "loss": 0.04461116, "step": 17680 }, { "epoch": 35.362, "grad_norm": 1.20958411693573, "learning_rate": 2e-05, "loss": 0.04479233, "step": 17681 }, { "epoch": 35.364, "grad_norm": 1.1637946367263794, "learning_rate": 2e-05, "loss": 0.04216972, "step": 17682 }, { "epoch": 35.366, "grad_norm": 1.2432118654251099, "learning_rate": 2e-05, "loss": 0.0468252, "step": 17683 }, { "epoch": 35.368, "grad_norm": 1.830465316772461, "learning_rate": 2e-05, "loss": 0.04752547, "step": 17684 }, { "epoch": 35.37, "grad_norm": 1.0570487976074219, "learning_rate": 2e-05, "loss": 0.03741091, "step": 17685 }, { "epoch": 35.372, "grad_norm": 2.6473779678344727, "learning_rate": 2e-05, "loss": 0.04489774, "step": 17686 }, { "epoch": 35.374, "grad_norm": 1.851365327835083, "learning_rate": 2e-05, "loss": 0.04287185, "step": 17687 }, { "epoch": 35.376, "grad_norm": 0.9998928308486938, "learning_rate": 2e-05, "loss": 0.02766431, "step": 17688 }, { "epoch": 35.378, "grad_norm": 0.9591315388679504, "learning_rate": 2e-05, "loss": 0.04038401, "step": 17689 }, { "epoch": 35.38, "grad_norm": 1.2864335775375366, "learning_rate": 2e-05, "loss": 0.03989007, "step": 17690 }, { "epoch": 35.382, "grad_norm": 0.9475865364074707, "learning_rate": 2e-05, "loss": 0.02444749, "step": 17691 }, { "epoch": 35.384, "grad_norm": 1.568764090538025, "learning_rate": 2e-05, "loss": 0.0419982, "step": 17692 }, { "epoch": 35.386, "grad_norm": 1.4672602415084839, "learning_rate": 2e-05, "loss": 0.04082417, "step": 17693 }, { "epoch": 35.388, "grad_norm": 1.2463346719741821, "learning_rate": 2e-05, "loss": 0.04095122, "step": 17694 }, { "epoch": 35.39, "grad_norm": 0.9424423575401306, "learning_rate": 2e-05, "loss": 0.03527466, "step": 17695 }, { "epoch": 35.392, "grad_norm": 1.1378228664398193, "learning_rate": 2e-05, "loss": 0.04225166, "step": 17696 }, { "epoch": 35.394, "grad_norm": 0.9545497298240662, "learning_rate": 2e-05, "loss": 0.0355546, "step": 17697 }, { "epoch": 35.396, "grad_norm": 1.1322404146194458, "learning_rate": 2e-05, "loss": 0.05043054, "step": 17698 }, { "epoch": 35.398, "grad_norm": 1.0658605098724365, "learning_rate": 2e-05, "loss": 0.04704443, "step": 17699 }, { "epoch": 35.4, "grad_norm": 1.517007827758789, "learning_rate": 2e-05, "loss": 0.03810771, "step": 17700 }, { "epoch": 35.402, "grad_norm": 1.1072192192077637, "learning_rate": 2e-05, "loss": 0.04016921, "step": 17701 }, { "epoch": 35.404, "grad_norm": 2.109288215637207, "learning_rate": 2e-05, "loss": 0.04785345, "step": 17702 }, { "epoch": 35.406, "grad_norm": 2.865124464035034, "learning_rate": 2e-05, "loss": 0.04972965, "step": 17703 }, { "epoch": 35.408, "grad_norm": 1.0339571237564087, "learning_rate": 2e-05, "loss": 0.03648417, "step": 17704 }, { "epoch": 35.41, "grad_norm": 1.8117121458053589, "learning_rate": 2e-05, "loss": 0.04509541, "step": 17705 }, { "epoch": 35.412, "grad_norm": 1.2131115198135376, "learning_rate": 2e-05, "loss": 0.04989497, "step": 17706 }, { "epoch": 35.414, "grad_norm": 1.2343026399612427, "learning_rate": 2e-05, "loss": 0.04979831, "step": 17707 }, { "epoch": 35.416, "grad_norm": 1.052886724472046, "learning_rate": 2e-05, "loss": 0.03326161, "step": 17708 }, { "epoch": 35.418, "grad_norm": 1.1077262163162231, "learning_rate": 2e-05, "loss": 0.0355419, "step": 17709 }, { "epoch": 35.42, "grad_norm": 1.4632622003555298, "learning_rate": 2e-05, "loss": 0.047338, "step": 17710 }, { "epoch": 35.422, "grad_norm": 1.1542631387710571, "learning_rate": 2e-05, "loss": 0.04234635, "step": 17711 }, { "epoch": 35.424, "grad_norm": 4.269802570343018, "learning_rate": 2e-05, "loss": 0.03470967, "step": 17712 }, { "epoch": 35.426, "grad_norm": 1.3135454654693604, "learning_rate": 2e-05, "loss": 0.04146645, "step": 17713 }, { "epoch": 35.428, "grad_norm": 1.0634958744049072, "learning_rate": 2e-05, "loss": 0.02393471, "step": 17714 }, { "epoch": 35.43, "grad_norm": 1.000059723854065, "learning_rate": 2e-05, "loss": 0.0369064, "step": 17715 }, { "epoch": 35.432, "grad_norm": 2.9598135948181152, "learning_rate": 2e-05, "loss": 0.04118183, "step": 17716 }, { "epoch": 35.434, "grad_norm": 1.133852481842041, "learning_rate": 2e-05, "loss": 0.04435404, "step": 17717 }, { "epoch": 35.436, "grad_norm": 0.9735626578330994, "learning_rate": 2e-05, "loss": 0.03352756, "step": 17718 }, { "epoch": 35.438, "grad_norm": 1.356118083000183, "learning_rate": 2e-05, "loss": 0.05865297, "step": 17719 }, { "epoch": 35.44, "grad_norm": 0.9971575736999512, "learning_rate": 2e-05, "loss": 0.02424793, "step": 17720 }, { "epoch": 35.442, "grad_norm": 1.2533522844314575, "learning_rate": 2e-05, "loss": 0.03869835, "step": 17721 }, { "epoch": 35.444, "grad_norm": 0.8965350985527039, "learning_rate": 2e-05, "loss": 0.03387743, "step": 17722 }, { "epoch": 35.446, "grad_norm": 1.1389036178588867, "learning_rate": 2e-05, "loss": 0.04247169, "step": 17723 }, { "epoch": 35.448, "grad_norm": 1.369778037071228, "learning_rate": 2e-05, "loss": 0.06438439, "step": 17724 }, { "epoch": 35.45, "grad_norm": 1.1697176694869995, "learning_rate": 2e-05, "loss": 0.04246277, "step": 17725 }, { "epoch": 35.452, "grad_norm": 1.5296801328659058, "learning_rate": 2e-05, "loss": 0.03574028, "step": 17726 }, { "epoch": 35.454, "grad_norm": 1.0308504104614258, "learning_rate": 2e-05, "loss": 0.03716336, "step": 17727 }, { "epoch": 35.456, "grad_norm": 1.0190324783325195, "learning_rate": 2e-05, "loss": 0.03794012, "step": 17728 }, { "epoch": 35.458, "grad_norm": 1.2319554090499878, "learning_rate": 2e-05, "loss": 0.04770632, "step": 17729 }, { "epoch": 35.46, "grad_norm": 1.7517224550247192, "learning_rate": 2e-05, "loss": 0.04705173, "step": 17730 }, { "epoch": 35.462, "grad_norm": 1.5621715784072876, "learning_rate": 2e-05, "loss": 0.04366981, "step": 17731 }, { "epoch": 35.464, "grad_norm": 1.3193645477294922, "learning_rate": 2e-05, "loss": 0.05270861, "step": 17732 }, { "epoch": 35.466, "grad_norm": 0.9536468982696533, "learning_rate": 2e-05, "loss": 0.0323831, "step": 17733 }, { "epoch": 35.468, "grad_norm": 1.2698876857757568, "learning_rate": 2e-05, "loss": 0.04317931, "step": 17734 }, { "epoch": 35.47, "grad_norm": 1.2431633472442627, "learning_rate": 2e-05, "loss": 0.05195896, "step": 17735 }, { "epoch": 35.472, "grad_norm": 1.0701299905776978, "learning_rate": 2e-05, "loss": 0.03547895, "step": 17736 }, { "epoch": 35.474, "grad_norm": 1.2134490013122559, "learning_rate": 2e-05, "loss": 0.05419344, "step": 17737 }, { "epoch": 35.476, "grad_norm": 1.2833257913589478, "learning_rate": 2e-05, "loss": 0.03962324, "step": 17738 }, { "epoch": 35.478, "grad_norm": 1.1532343626022339, "learning_rate": 2e-05, "loss": 0.03333162, "step": 17739 }, { "epoch": 35.48, "grad_norm": 1.1357736587524414, "learning_rate": 2e-05, "loss": 0.03868002, "step": 17740 }, { "epoch": 35.482, "grad_norm": 1.2286957502365112, "learning_rate": 2e-05, "loss": 0.04705796, "step": 17741 }, { "epoch": 35.484, "grad_norm": 1.0505681037902832, "learning_rate": 2e-05, "loss": 0.04804849, "step": 17742 }, { "epoch": 35.486, "grad_norm": 1.327806830406189, "learning_rate": 2e-05, "loss": 0.04823949, "step": 17743 }, { "epoch": 35.488, "grad_norm": 1.412100076675415, "learning_rate": 2e-05, "loss": 0.06005654, "step": 17744 }, { "epoch": 35.49, "grad_norm": 1.1800956726074219, "learning_rate": 2e-05, "loss": 0.04362959, "step": 17745 }, { "epoch": 35.492, "grad_norm": 1.503348708152771, "learning_rate": 2e-05, "loss": 0.03750347, "step": 17746 }, { "epoch": 35.494, "grad_norm": 1.1118701696395874, "learning_rate": 2e-05, "loss": 0.03852141, "step": 17747 }, { "epoch": 35.496, "grad_norm": 1.1212671995162964, "learning_rate": 2e-05, "loss": 0.0386279, "step": 17748 }, { "epoch": 35.498, "grad_norm": 1.625443935394287, "learning_rate": 2e-05, "loss": 0.0379408, "step": 17749 }, { "epoch": 35.5, "grad_norm": 1.2306393384933472, "learning_rate": 2e-05, "loss": 0.04350326, "step": 17750 }, { "epoch": 35.502, "grad_norm": 2.8825604915618896, "learning_rate": 2e-05, "loss": 0.05902184, "step": 17751 }, { "epoch": 35.504, "grad_norm": 0.9984104037284851, "learning_rate": 2e-05, "loss": 0.0298137, "step": 17752 }, { "epoch": 35.506, "grad_norm": 1.1372625827789307, "learning_rate": 2e-05, "loss": 0.04307988, "step": 17753 }, { "epoch": 35.508, "grad_norm": 1.264777660369873, "learning_rate": 2e-05, "loss": 0.05577639, "step": 17754 }, { "epoch": 35.51, "grad_norm": 1.6673481464385986, "learning_rate": 2e-05, "loss": 0.03963385, "step": 17755 }, { "epoch": 35.512, "grad_norm": 1.1486952304840088, "learning_rate": 2e-05, "loss": 0.0351126, "step": 17756 }, { "epoch": 35.514, "grad_norm": 1.608984351158142, "learning_rate": 2e-05, "loss": 0.04021899, "step": 17757 }, { "epoch": 35.516, "grad_norm": 1.772530436515808, "learning_rate": 2e-05, "loss": 0.05870032, "step": 17758 }, { "epoch": 35.518, "grad_norm": 2.1825215816497803, "learning_rate": 2e-05, "loss": 0.03407493, "step": 17759 }, { "epoch": 35.52, "grad_norm": 1.3578035831451416, "learning_rate": 2e-05, "loss": 0.04622669, "step": 17760 }, { "epoch": 35.522, "grad_norm": 0.9831930994987488, "learning_rate": 2e-05, "loss": 0.0285987, "step": 17761 }, { "epoch": 35.524, "grad_norm": 1.3356895446777344, "learning_rate": 2e-05, "loss": 0.04365962, "step": 17762 }, { "epoch": 35.526, "grad_norm": 1.0443134307861328, "learning_rate": 2e-05, "loss": 0.03748023, "step": 17763 }, { "epoch": 35.528, "grad_norm": 1.1682651042938232, "learning_rate": 2e-05, "loss": 0.04070065, "step": 17764 }, { "epoch": 35.53, "grad_norm": 1.5059261322021484, "learning_rate": 2e-05, "loss": 0.03936179, "step": 17765 }, { "epoch": 35.532, "grad_norm": 1.2437570095062256, "learning_rate": 2e-05, "loss": 0.05362586, "step": 17766 }, { "epoch": 35.534, "grad_norm": 1.5657322406768799, "learning_rate": 2e-05, "loss": 0.04938463, "step": 17767 }, { "epoch": 35.536, "grad_norm": 0.9113253355026245, "learning_rate": 2e-05, "loss": 0.02991488, "step": 17768 }, { "epoch": 35.538, "grad_norm": 1.499922752380371, "learning_rate": 2e-05, "loss": 0.04125605, "step": 17769 }, { "epoch": 35.54, "grad_norm": 1.880131721496582, "learning_rate": 2e-05, "loss": 0.04844569, "step": 17770 }, { "epoch": 35.542, "grad_norm": 1.1775963306427002, "learning_rate": 2e-05, "loss": 0.0403573, "step": 17771 }, { "epoch": 35.544, "grad_norm": 1.082275629043579, "learning_rate": 2e-05, "loss": 0.05027766, "step": 17772 }, { "epoch": 35.546, "grad_norm": 1.3758782148361206, "learning_rate": 2e-05, "loss": 0.03608634, "step": 17773 }, { "epoch": 35.548, "grad_norm": 1.6104998588562012, "learning_rate": 2e-05, "loss": 0.05908882, "step": 17774 }, { "epoch": 35.55, "grad_norm": 1.236854076385498, "learning_rate": 2e-05, "loss": 0.0350708, "step": 17775 }, { "epoch": 35.552, "grad_norm": 1.2313014268875122, "learning_rate": 2e-05, "loss": 0.0381471, "step": 17776 }, { "epoch": 35.554, "grad_norm": 1.328316569328308, "learning_rate": 2e-05, "loss": 0.03309543, "step": 17777 }, { "epoch": 35.556, "grad_norm": 0.9129838943481445, "learning_rate": 2e-05, "loss": 0.02543531, "step": 17778 }, { "epoch": 35.558, "grad_norm": 1.7649102210998535, "learning_rate": 2e-05, "loss": 0.06082066, "step": 17779 }, { "epoch": 35.56, "grad_norm": 1.1966280937194824, "learning_rate": 2e-05, "loss": 0.03694362, "step": 17780 }, { "epoch": 35.562, "grad_norm": 1.4627177715301514, "learning_rate": 2e-05, "loss": 0.05721138, "step": 17781 }, { "epoch": 35.564, "grad_norm": 1.157109260559082, "learning_rate": 2e-05, "loss": 0.03740972, "step": 17782 }, { "epoch": 35.566, "grad_norm": 1.4257314205169678, "learning_rate": 2e-05, "loss": 0.03248692, "step": 17783 }, { "epoch": 35.568, "grad_norm": 0.8779945969581604, "learning_rate": 2e-05, "loss": 0.02795019, "step": 17784 }, { "epoch": 35.57, "grad_norm": 1.033923625946045, "learning_rate": 2e-05, "loss": 0.0340129, "step": 17785 }, { "epoch": 35.572, "grad_norm": 1.221801996231079, "learning_rate": 2e-05, "loss": 0.04395582, "step": 17786 }, { "epoch": 35.574, "grad_norm": 1.7551618814468384, "learning_rate": 2e-05, "loss": 0.03690699, "step": 17787 }, { "epoch": 35.576, "grad_norm": 1.3354934453964233, "learning_rate": 2e-05, "loss": 0.05585198, "step": 17788 }, { "epoch": 35.578, "grad_norm": 1.180930495262146, "learning_rate": 2e-05, "loss": 0.04893908, "step": 17789 }, { "epoch": 35.58, "grad_norm": 1.4613933563232422, "learning_rate": 2e-05, "loss": 0.02561338, "step": 17790 }, { "epoch": 35.582, "grad_norm": 1.3072844743728638, "learning_rate": 2e-05, "loss": 0.05350181, "step": 17791 }, { "epoch": 35.584, "grad_norm": 1.5525765419006348, "learning_rate": 2e-05, "loss": 0.05785905, "step": 17792 }, { "epoch": 35.586, "grad_norm": 1.8124005794525146, "learning_rate": 2e-05, "loss": 0.04928161, "step": 17793 }, { "epoch": 35.588, "grad_norm": 1.0615509748458862, "learning_rate": 2e-05, "loss": 0.04574425, "step": 17794 }, { "epoch": 35.59, "grad_norm": 1.153123140335083, "learning_rate": 2e-05, "loss": 0.04651198, "step": 17795 }, { "epoch": 35.592, "grad_norm": 1.0932104587554932, "learning_rate": 2e-05, "loss": 0.03670096, "step": 17796 }, { "epoch": 35.594, "grad_norm": 1.3459787368774414, "learning_rate": 2e-05, "loss": 0.04722689, "step": 17797 }, { "epoch": 35.596, "grad_norm": 1.6134040355682373, "learning_rate": 2e-05, "loss": 0.05787089, "step": 17798 }, { "epoch": 35.598, "grad_norm": 0.8614208698272705, "learning_rate": 2e-05, "loss": 0.02445607, "step": 17799 }, { "epoch": 35.6, "grad_norm": 1.3574453592300415, "learning_rate": 2e-05, "loss": 0.05048421, "step": 17800 }, { "epoch": 35.602, "grad_norm": 1.5967652797698975, "learning_rate": 2e-05, "loss": 0.05926722, "step": 17801 }, { "epoch": 35.604, "grad_norm": 1.3235586881637573, "learning_rate": 2e-05, "loss": 0.04076, "step": 17802 }, { "epoch": 35.606, "grad_norm": 1.4626275300979614, "learning_rate": 2e-05, "loss": 0.03682541, "step": 17803 }, { "epoch": 35.608, "grad_norm": 1.6847271919250488, "learning_rate": 2e-05, "loss": 0.0393777, "step": 17804 }, { "epoch": 35.61, "grad_norm": 2.216646194458008, "learning_rate": 2e-05, "loss": 0.03593418, "step": 17805 }, { "epoch": 35.612, "grad_norm": 1.109135627746582, "learning_rate": 2e-05, "loss": 0.03311303, "step": 17806 }, { "epoch": 35.614, "grad_norm": 1.2525417804718018, "learning_rate": 2e-05, "loss": 0.05682575, "step": 17807 }, { "epoch": 35.616, "grad_norm": 1.1396535634994507, "learning_rate": 2e-05, "loss": 0.0337383, "step": 17808 }, { "epoch": 35.618, "grad_norm": 0.8801586031913757, "learning_rate": 2e-05, "loss": 0.02758865, "step": 17809 }, { "epoch": 35.62, "grad_norm": 1.4734177589416504, "learning_rate": 2e-05, "loss": 0.04279312, "step": 17810 }, { "epoch": 35.622, "grad_norm": 1.2697399854660034, "learning_rate": 2e-05, "loss": 0.03729413, "step": 17811 }, { "epoch": 35.624, "grad_norm": 1.2622113227844238, "learning_rate": 2e-05, "loss": 0.05834858, "step": 17812 }, { "epoch": 35.626, "grad_norm": 1.3223472833633423, "learning_rate": 2e-05, "loss": 0.04107845, "step": 17813 }, { "epoch": 35.628, "grad_norm": 1.3146884441375732, "learning_rate": 2e-05, "loss": 0.05463982, "step": 17814 }, { "epoch": 35.63, "grad_norm": 0.9225746393203735, "learning_rate": 2e-05, "loss": 0.02599794, "step": 17815 }, { "epoch": 35.632, "grad_norm": 1.4230635166168213, "learning_rate": 2e-05, "loss": 0.04019064, "step": 17816 }, { "epoch": 35.634, "grad_norm": 1.6114380359649658, "learning_rate": 2e-05, "loss": 0.05018803, "step": 17817 }, { "epoch": 35.636, "grad_norm": 1.1191251277923584, "learning_rate": 2e-05, "loss": 0.03190347, "step": 17818 }, { "epoch": 35.638, "grad_norm": 2.0187177658081055, "learning_rate": 2e-05, "loss": 0.04886641, "step": 17819 }, { "epoch": 35.64, "grad_norm": 1.0409353971481323, "learning_rate": 2e-05, "loss": 0.03332942, "step": 17820 }, { "epoch": 35.642, "grad_norm": 1.3523070812225342, "learning_rate": 2e-05, "loss": 0.05481984, "step": 17821 }, { "epoch": 35.644, "grad_norm": 0.8652260899543762, "learning_rate": 2e-05, "loss": 0.02689623, "step": 17822 }, { "epoch": 35.646, "grad_norm": 1.0217618942260742, "learning_rate": 2e-05, "loss": 0.03837987, "step": 17823 }, { "epoch": 35.648, "grad_norm": 2.860708713531494, "learning_rate": 2e-05, "loss": 0.0455092, "step": 17824 }, { "epoch": 35.65, "grad_norm": 1.2912145853042603, "learning_rate": 2e-05, "loss": 0.04535623, "step": 17825 }, { "epoch": 35.652, "grad_norm": 1.1695078611373901, "learning_rate": 2e-05, "loss": 0.047709, "step": 17826 }, { "epoch": 35.654, "grad_norm": 1.5502129793167114, "learning_rate": 2e-05, "loss": 0.05648194, "step": 17827 }, { "epoch": 35.656, "grad_norm": 0.9700777530670166, "learning_rate": 2e-05, "loss": 0.02404735, "step": 17828 }, { "epoch": 35.658, "grad_norm": 1.0643705129623413, "learning_rate": 2e-05, "loss": 0.02897286, "step": 17829 }, { "epoch": 35.66, "grad_norm": 1.0671511888504028, "learning_rate": 2e-05, "loss": 0.03070662, "step": 17830 }, { "epoch": 35.662, "grad_norm": 1.3201302289962769, "learning_rate": 2e-05, "loss": 0.04533841, "step": 17831 }, { "epoch": 35.664, "grad_norm": 1.1541919708251953, "learning_rate": 2e-05, "loss": 0.044394, "step": 17832 }, { "epoch": 35.666, "grad_norm": 1.3019088506698608, "learning_rate": 2e-05, "loss": 0.06686329, "step": 17833 }, { "epoch": 35.668, "grad_norm": 1.0324286222457886, "learning_rate": 2e-05, "loss": 0.04079415, "step": 17834 }, { "epoch": 35.67, "grad_norm": 1.0423561334609985, "learning_rate": 2e-05, "loss": 0.03136035, "step": 17835 }, { "epoch": 35.672, "grad_norm": 1.6549469232559204, "learning_rate": 2e-05, "loss": 0.0495429, "step": 17836 }, { "epoch": 35.674, "grad_norm": 3.071321725845337, "learning_rate": 2e-05, "loss": 0.04680189, "step": 17837 }, { "epoch": 35.676, "grad_norm": 1.4656696319580078, "learning_rate": 2e-05, "loss": 0.04357903, "step": 17838 }, { "epoch": 35.678, "grad_norm": 1.0006139278411865, "learning_rate": 2e-05, "loss": 0.02649287, "step": 17839 }, { "epoch": 35.68, "grad_norm": 0.8368288278579712, "learning_rate": 2e-05, "loss": 0.02872172, "step": 17840 }, { "epoch": 35.682, "grad_norm": 1.1067603826522827, "learning_rate": 2e-05, "loss": 0.04076967, "step": 17841 }, { "epoch": 35.684, "grad_norm": 1.4088644981384277, "learning_rate": 2e-05, "loss": 0.04598125, "step": 17842 }, { "epoch": 35.686, "grad_norm": 1.1588618755340576, "learning_rate": 2e-05, "loss": 0.03723881, "step": 17843 }, { "epoch": 35.688, "grad_norm": 1.1325336694717407, "learning_rate": 2e-05, "loss": 0.04327301, "step": 17844 }, { "epoch": 35.69, "grad_norm": 2.3693675994873047, "learning_rate": 2e-05, "loss": 0.05009006, "step": 17845 }, { "epoch": 35.692, "grad_norm": 1.2514508962631226, "learning_rate": 2e-05, "loss": 0.03267618, "step": 17846 }, { "epoch": 35.694, "grad_norm": 1.3177813291549683, "learning_rate": 2e-05, "loss": 0.02680693, "step": 17847 }, { "epoch": 35.696, "grad_norm": 1.2522759437561035, "learning_rate": 2e-05, "loss": 0.04521048, "step": 17848 }, { "epoch": 35.698, "grad_norm": 1.2396931648254395, "learning_rate": 2e-05, "loss": 0.04618759, "step": 17849 }, { "epoch": 35.7, "grad_norm": 1.2118010520935059, "learning_rate": 2e-05, "loss": 0.03588796, "step": 17850 }, { "epoch": 35.702, "grad_norm": 1.1289559602737427, "learning_rate": 2e-05, "loss": 0.04039676, "step": 17851 }, { "epoch": 35.704, "grad_norm": 2.2850546836853027, "learning_rate": 2e-05, "loss": 0.04581477, "step": 17852 }, { "epoch": 35.706, "grad_norm": 1.0945602655410767, "learning_rate": 2e-05, "loss": 0.02884125, "step": 17853 }, { "epoch": 35.708, "grad_norm": 1.218193769454956, "learning_rate": 2e-05, "loss": 0.03631239, "step": 17854 }, { "epoch": 35.71, "grad_norm": 1.4844825267791748, "learning_rate": 2e-05, "loss": 0.02633794, "step": 17855 }, { "epoch": 35.712, "grad_norm": 2.8186700344085693, "learning_rate": 2e-05, "loss": 0.05136289, "step": 17856 }, { "epoch": 35.714, "grad_norm": 1.0413309335708618, "learning_rate": 2e-05, "loss": 0.02841013, "step": 17857 }, { "epoch": 35.716, "grad_norm": 1.4248929023742676, "learning_rate": 2e-05, "loss": 0.04046667, "step": 17858 }, { "epoch": 35.718, "grad_norm": 1.086219310760498, "learning_rate": 2e-05, "loss": 0.03643753, "step": 17859 }, { "epoch": 35.72, "grad_norm": 1.0413047075271606, "learning_rate": 2e-05, "loss": 0.03506773, "step": 17860 }, { "epoch": 35.722, "grad_norm": 1.138818383216858, "learning_rate": 2e-05, "loss": 0.04383482, "step": 17861 }, { "epoch": 35.724, "grad_norm": 1.0784032344818115, "learning_rate": 2e-05, "loss": 0.04035743, "step": 17862 }, { "epoch": 35.726, "grad_norm": 1.1928517818450928, "learning_rate": 2e-05, "loss": 0.04039065, "step": 17863 }, { "epoch": 35.728, "grad_norm": 1.3450567722320557, "learning_rate": 2e-05, "loss": 0.051686, "step": 17864 }, { "epoch": 35.73, "grad_norm": 1.3145865201950073, "learning_rate": 2e-05, "loss": 0.05856508, "step": 17865 }, { "epoch": 35.732, "grad_norm": 1.7338427305221558, "learning_rate": 2e-05, "loss": 0.05495846, "step": 17866 }, { "epoch": 35.734, "grad_norm": 1.2222492694854736, "learning_rate": 2e-05, "loss": 0.04681749, "step": 17867 }, { "epoch": 35.736, "grad_norm": 2.382176399230957, "learning_rate": 2e-05, "loss": 0.0592317, "step": 17868 }, { "epoch": 35.738, "grad_norm": 1.3851975202560425, "learning_rate": 2e-05, "loss": 0.05830447, "step": 17869 }, { "epoch": 35.74, "grad_norm": 1.8277791738510132, "learning_rate": 2e-05, "loss": 0.04027114, "step": 17870 }, { "epoch": 35.742, "grad_norm": 1.3364810943603516, "learning_rate": 2e-05, "loss": 0.04788211, "step": 17871 }, { "epoch": 35.744, "grad_norm": 1.4539287090301514, "learning_rate": 2e-05, "loss": 0.05281156, "step": 17872 }, { "epoch": 35.746, "grad_norm": 2.4535436630249023, "learning_rate": 2e-05, "loss": 0.05732207, "step": 17873 }, { "epoch": 35.748, "grad_norm": 1.3156609535217285, "learning_rate": 2e-05, "loss": 0.04180748, "step": 17874 }, { "epoch": 35.75, "grad_norm": 1.083983063697815, "learning_rate": 2e-05, "loss": 0.02724944, "step": 17875 }, { "epoch": 35.752, "grad_norm": 1.2590404748916626, "learning_rate": 2e-05, "loss": 0.04488721, "step": 17876 }, { "epoch": 35.754, "grad_norm": 1.1767133474349976, "learning_rate": 2e-05, "loss": 0.03508144, "step": 17877 }, { "epoch": 35.756, "grad_norm": 0.9333146810531616, "learning_rate": 2e-05, "loss": 0.03762346, "step": 17878 }, { "epoch": 35.758, "grad_norm": 3.9581522941589355, "learning_rate": 2e-05, "loss": 0.05363543, "step": 17879 }, { "epoch": 35.76, "grad_norm": 1.9999139308929443, "learning_rate": 2e-05, "loss": 0.05396073, "step": 17880 }, { "epoch": 35.762, "grad_norm": 1.12847900390625, "learning_rate": 2e-05, "loss": 0.04188031, "step": 17881 }, { "epoch": 35.764, "grad_norm": 1.221652626991272, "learning_rate": 2e-05, "loss": 0.04514348, "step": 17882 }, { "epoch": 35.766, "grad_norm": 1.2314302921295166, "learning_rate": 2e-05, "loss": 0.04036722, "step": 17883 }, { "epoch": 35.768, "grad_norm": 1.4413890838623047, "learning_rate": 2e-05, "loss": 0.05193814, "step": 17884 }, { "epoch": 35.77, "grad_norm": 1.3951367139816284, "learning_rate": 2e-05, "loss": 0.03265073, "step": 17885 }, { "epoch": 35.772, "grad_norm": 1.0324435234069824, "learning_rate": 2e-05, "loss": 0.03620376, "step": 17886 }, { "epoch": 35.774, "grad_norm": 1.1713546514511108, "learning_rate": 2e-05, "loss": 0.04896335, "step": 17887 }, { "epoch": 35.776, "grad_norm": 0.991287350654602, "learning_rate": 2e-05, "loss": 0.03548204, "step": 17888 }, { "epoch": 35.778, "grad_norm": 2.7516236305236816, "learning_rate": 2e-05, "loss": 0.06211228, "step": 17889 }, { "epoch": 35.78, "grad_norm": 2.7214620113372803, "learning_rate": 2e-05, "loss": 0.05013406, "step": 17890 }, { "epoch": 35.782, "grad_norm": 1.233435034751892, "learning_rate": 2e-05, "loss": 0.04874311, "step": 17891 }, { "epoch": 35.784, "grad_norm": 1.7725192308425903, "learning_rate": 2e-05, "loss": 0.04005573, "step": 17892 }, { "epoch": 35.786, "grad_norm": 1.3703975677490234, "learning_rate": 2e-05, "loss": 0.04984409, "step": 17893 }, { "epoch": 35.788, "grad_norm": 1.3456453084945679, "learning_rate": 2e-05, "loss": 0.04753321, "step": 17894 }, { "epoch": 35.79, "grad_norm": 1.2621350288391113, "learning_rate": 2e-05, "loss": 0.05199986, "step": 17895 }, { "epoch": 35.792, "grad_norm": 1.3152614831924438, "learning_rate": 2e-05, "loss": 0.03647928, "step": 17896 }, { "epoch": 35.794, "grad_norm": 0.9398272633552551, "learning_rate": 2e-05, "loss": 0.02891801, "step": 17897 }, { "epoch": 35.796, "grad_norm": 1.0658025741577148, "learning_rate": 2e-05, "loss": 0.03435162, "step": 17898 }, { "epoch": 35.798, "grad_norm": 1.157807469367981, "learning_rate": 2e-05, "loss": 0.03711173, "step": 17899 }, { "epoch": 35.8, "grad_norm": 2.451808214187622, "learning_rate": 2e-05, "loss": 0.03681424, "step": 17900 }, { "epoch": 35.802, "grad_norm": 1.0727334022521973, "learning_rate": 2e-05, "loss": 0.04209089, "step": 17901 }, { "epoch": 35.804, "grad_norm": 1.314285159111023, "learning_rate": 2e-05, "loss": 0.0380992, "step": 17902 }, { "epoch": 35.806, "grad_norm": 1.1336785554885864, "learning_rate": 2e-05, "loss": 0.03314412, "step": 17903 }, { "epoch": 35.808, "grad_norm": 1.7368667125701904, "learning_rate": 2e-05, "loss": 0.0345618, "step": 17904 }, { "epoch": 35.81, "grad_norm": 2.1174581050872803, "learning_rate": 2e-05, "loss": 0.06510151, "step": 17905 }, { "epoch": 35.812, "grad_norm": 1.6650149822235107, "learning_rate": 2e-05, "loss": 0.04526421, "step": 17906 }, { "epoch": 35.814, "grad_norm": 1.2398277521133423, "learning_rate": 2e-05, "loss": 0.037236, "step": 17907 }, { "epoch": 35.816, "grad_norm": 1.1168713569641113, "learning_rate": 2e-05, "loss": 0.03919902, "step": 17908 }, { "epoch": 35.818, "grad_norm": 1.6926127672195435, "learning_rate": 2e-05, "loss": 0.05567209, "step": 17909 }, { "epoch": 35.82, "grad_norm": 0.9734522104263306, "learning_rate": 2e-05, "loss": 0.0383275, "step": 17910 }, { "epoch": 35.822, "grad_norm": 1.0680774450302124, "learning_rate": 2e-05, "loss": 0.04609864, "step": 17911 }, { "epoch": 35.824, "grad_norm": 1.3214465379714966, "learning_rate": 2e-05, "loss": 0.04425301, "step": 17912 }, { "epoch": 35.826, "grad_norm": 1.187393307685852, "learning_rate": 2e-05, "loss": 0.04773477, "step": 17913 }, { "epoch": 35.828, "grad_norm": 2.846890926361084, "learning_rate": 2e-05, "loss": 0.04056527, "step": 17914 }, { "epoch": 35.83, "grad_norm": 1.2541725635528564, "learning_rate": 2e-05, "loss": 0.04198845, "step": 17915 }, { "epoch": 35.832, "grad_norm": 1.2594901323318481, "learning_rate": 2e-05, "loss": 0.04509019, "step": 17916 }, { "epoch": 35.834, "grad_norm": 1.6290315389633179, "learning_rate": 2e-05, "loss": 0.04840211, "step": 17917 }, { "epoch": 35.836, "grad_norm": 1.9645651578903198, "learning_rate": 2e-05, "loss": 0.04485439, "step": 17918 }, { "epoch": 35.838, "grad_norm": 1.1047340631484985, "learning_rate": 2e-05, "loss": 0.04456278, "step": 17919 }, { "epoch": 35.84, "grad_norm": 1.4236819744110107, "learning_rate": 2e-05, "loss": 0.05413448, "step": 17920 }, { "epoch": 35.842, "grad_norm": 1.3188161849975586, "learning_rate": 2e-05, "loss": 0.05367437, "step": 17921 }, { "epoch": 35.844, "grad_norm": 1.1377904415130615, "learning_rate": 2e-05, "loss": 0.04965197, "step": 17922 }, { "epoch": 35.846, "grad_norm": 2.1205976009368896, "learning_rate": 2e-05, "loss": 0.05599382, "step": 17923 }, { "epoch": 35.848, "grad_norm": 1.2316977977752686, "learning_rate": 2e-05, "loss": 0.04126142, "step": 17924 }, { "epoch": 35.85, "grad_norm": 1.2489413022994995, "learning_rate": 2e-05, "loss": 0.04298563, "step": 17925 }, { "epoch": 35.852, "grad_norm": 1.4723291397094727, "learning_rate": 2e-05, "loss": 0.04364316, "step": 17926 }, { "epoch": 35.854, "grad_norm": 0.9845232963562012, "learning_rate": 2e-05, "loss": 0.03328026, "step": 17927 }, { "epoch": 35.856, "grad_norm": 1.11769700050354, "learning_rate": 2e-05, "loss": 0.03830117, "step": 17928 }, { "epoch": 35.858, "grad_norm": 1.0789967775344849, "learning_rate": 2e-05, "loss": 0.03304796, "step": 17929 }, { "epoch": 35.86, "grad_norm": 1.4628854990005493, "learning_rate": 2e-05, "loss": 0.04162867, "step": 17930 }, { "epoch": 35.862, "grad_norm": 1.1505697965621948, "learning_rate": 2e-05, "loss": 0.04003832, "step": 17931 }, { "epoch": 35.864, "grad_norm": 0.9786707758903503, "learning_rate": 2e-05, "loss": 0.04291322, "step": 17932 }, { "epoch": 35.866, "grad_norm": 1.514482021331787, "learning_rate": 2e-05, "loss": 0.04682028, "step": 17933 }, { "epoch": 35.868, "grad_norm": 1.150888442993164, "learning_rate": 2e-05, "loss": 0.04786021, "step": 17934 }, { "epoch": 35.87, "grad_norm": 1.2002613544464111, "learning_rate": 2e-05, "loss": 0.0451806, "step": 17935 }, { "epoch": 35.872, "grad_norm": 1.3272571563720703, "learning_rate": 2e-05, "loss": 0.05705176, "step": 17936 }, { "epoch": 35.874, "grad_norm": 1.1266093254089355, "learning_rate": 2e-05, "loss": 0.03312952, "step": 17937 }, { "epoch": 35.876, "grad_norm": 1.2619445323944092, "learning_rate": 2e-05, "loss": 0.04542191, "step": 17938 }, { "epoch": 35.878, "grad_norm": 1.2090903520584106, "learning_rate": 2e-05, "loss": 0.04657109, "step": 17939 }, { "epoch": 35.88, "grad_norm": 1.0083225965499878, "learning_rate": 2e-05, "loss": 0.03867437, "step": 17940 }, { "epoch": 35.882, "grad_norm": 1.0939233303070068, "learning_rate": 2e-05, "loss": 0.02842722, "step": 17941 }, { "epoch": 35.884, "grad_norm": 1.0067133903503418, "learning_rate": 2e-05, "loss": 0.03263469, "step": 17942 }, { "epoch": 35.886, "grad_norm": 1.1299545764923096, "learning_rate": 2e-05, "loss": 0.04446633, "step": 17943 }, { "epoch": 35.888, "grad_norm": 1.0273183584213257, "learning_rate": 2e-05, "loss": 0.05045258, "step": 17944 }, { "epoch": 35.89, "grad_norm": 1.154341697692871, "learning_rate": 2e-05, "loss": 0.03366087, "step": 17945 }, { "epoch": 35.892, "grad_norm": 1.1345313787460327, "learning_rate": 2e-05, "loss": 0.03205415, "step": 17946 }, { "epoch": 35.894, "grad_norm": 1.1819157600402832, "learning_rate": 2e-05, "loss": 0.04350836, "step": 17947 }, { "epoch": 35.896, "grad_norm": 1.7022780179977417, "learning_rate": 2e-05, "loss": 0.048735, "step": 17948 }, { "epoch": 35.898, "grad_norm": 0.8520524501800537, "learning_rate": 2e-05, "loss": 0.02894574, "step": 17949 }, { "epoch": 35.9, "grad_norm": 1.0947223901748657, "learning_rate": 2e-05, "loss": 0.03767444, "step": 17950 }, { "epoch": 35.902, "grad_norm": 2.9594931602478027, "learning_rate": 2e-05, "loss": 0.0309197, "step": 17951 }, { "epoch": 35.904, "grad_norm": 1.3062098026275635, "learning_rate": 2e-05, "loss": 0.04968993, "step": 17952 }, { "epoch": 35.906, "grad_norm": 1.0888330936431885, "learning_rate": 2e-05, "loss": 0.04075143, "step": 17953 }, { "epoch": 35.908, "grad_norm": 1.0041322708129883, "learning_rate": 2e-05, "loss": 0.03644698, "step": 17954 }, { "epoch": 35.91, "grad_norm": 1.0307637453079224, "learning_rate": 2e-05, "loss": 0.04232953, "step": 17955 }, { "epoch": 35.912, "grad_norm": 1.2270115613937378, "learning_rate": 2e-05, "loss": 0.04430905, "step": 17956 }, { "epoch": 35.914, "grad_norm": 1.2956207990646362, "learning_rate": 2e-05, "loss": 0.06014199, "step": 17957 }, { "epoch": 35.916, "grad_norm": 1.5076323747634888, "learning_rate": 2e-05, "loss": 0.0593874, "step": 17958 }, { "epoch": 35.918, "grad_norm": 1.1515308618545532, "learning_rate": 2e-05, "loss": 0.04382184, "step": 17959 }, { "epoch": 35.92, "grad_norm": 1.1946406364440918, "learning_rate": 2e-05, "loss": 0.05360639, "step": 17960 }, { "epoch": 35.922, "grad_norm": 1.332234501838684, "learning_rate": 2e-05, "loss": 0.03495939, "step": 17961 }, { "epoch": 35.924, "grad_norm": 1.5882817506790161, "learning_rate": 2e-05, "loss": 0.02272969, "step": 17962 }, { "epoch": 35.926, "grad_norm": 1.1679266691207886, "learning_rate": 2e-05, "loss": 0.05053775, "step": 17963 }, { "epoch": 35.928, "grad_norm": 1.0632550716400146, "learning_rate": 2e-05, "loss": 0.04534806, "step": 17964 }, { "epoch": 35.93, "grad_norm": 1.6096125841140747, "learning_rate": 2e-05, "loss": 0.0572957, "step": 17965 }, { "epoch": 35.932, "grad_norm": 1.2726202011108398, "learning_rate": 2e-05, "loss": 0.04721672, "step": 17966 }, { "epoch": 35.934, "grad_norm": 1.637152075767517, "learning_rate": 2e-05, "loss": 0.03014551, "step": 17967 }, { "epoch": 35.936, "grad_norm": 1.2941720485687256, "learning_rate": 2e-05, "loss": 0.04506875, "step": 17968 }, { "epoch": 35.938, "grad_norm": 1.165555477142334, "learning_rate": 2e-05, "loss": 0.04141459, "step": 17969 }, { "epoch": 35.94, "grad_norm": 1.2198277711868286, "learning_rate": 2e-05, "loss": 0.04705646, "step": 17970 }, { "epoch": 35.942, "grad_norm": 1.2274084091186523, "learning_rate": 2e-05, "loss": 0.04948511, "step": 17971 }, { "epoch": 35.944, "grad_norm": 1.474108338356018, "learning_rate": 2e-05, "loss": 0.04234532, "step": 17972 }, { "epoch": 35.946, "grad_norm": 1.1247254610061646, "learning_rate": 2e-05, "loss": 0.04175204, "step": 17973 }, { "epoch": 35.948, "grad_norm": 1.2691006660461426, "learning_rate": 2e-05, "loss": 0.05016943, "step": 17974 }, { "epoch": 35.95, "grad_norm": 1.319199800491333, "learning_rate": 2e-05, "loss": 0.05450393, "step": 17975 }, { "epoch": 35.952, "grad_norm": 1.6631337404251099, "learning_rate": 2e-05, "loss": 0.05106764, "step": 17976 }, { "epoch": 35.954, "grad_norm": 1.2125028371810913, "learning_rate": 2e-05, "loss": 0.05636699, "step": 17977 }, { "epoch": 35.956, "grad_norm": 1.0986932516098022, "learning_rate": 2e-05, "loss": 0.04804093, "step": 17978 }, { "epoch": 35.958, "grad_norm": 2.018481731414795, "learning_rate": 2e-05, "loss": 0.04954034, "step": 17979 }, { "epoch": 35.96, "grad_norm": 1.1213892698287964, "learning_rate": 2e-05, "loss": 0.03687691, "step": 17980 }, { "epoch": 35.962, "grad_norm": 1.2770687341690063, "learning_rate": 2e-05, "loss": 0.04670438, "step": 17981 }, { "epoch": 35.964, "grad_norm": 1.8598698377609253, "learning_rate": 2e-05, "loss": 0.04108175, "step": 17982 }, { "epoch": 35.966, "grad_norm": 0.9518316388130188, "learning_rate": 2e-05, "loss": 0.03023814, "step": 17983 }, { "epoch": 35.968, "grad_norm": 1.3764249086380005, "learning_rate": 2e-05, "loss": 0.03840397, "step": 17984 }, { "epoch": 35.97, "grad_norm": 0.9351934194564819, "learning_rate": 2e-05, "loss": 0.03416009, "step": 17985 }, { "epoch": 35.972, "grad_norm": 1.1933887004852295, "learning_rate": 2e-05, "loss": 0.03491713, "step": 17986 }, { "epoch": 35.974, "grad_norm": 1.3929616212844849, "learning_rate": 2e-05, "loss": 0.05122756, "step": 17987 }, { "epoch": 35.976, "grad_norm": 1.3909257650375366, "learning_rate": 2e-05, "loss": 0.04380251, "step": 17988 }, { "epoch": 35.978, "grad_norm": 1.3311553001403809, "learning_rate": 2e-05, "loss": 0.04476759, "step": 17989 }, { "epoch": 35.98, "grad_norm": 1.3036085367202759, "learning_rate": 2e-05, "loss": 0.05047145, "step": 17990 }, { "epoch": 35.982, "grad_norm": 1.5605629682540894, "learning_rate": 2e-05, "loss": 0.04389017, "step": 17991 }, { "epoch": 35.984, "grad_norm": 1.2548414468765259, "learning_rate": 2e-05, "loss": 0.03170571, "step": 17992 }, { "epoch": 35.986, "grad_norm": 1.2417798042297363, "learning_rate": 2e-05, "loss": 0.04844061, "step": 17993 }, { "epoch": 35.988, "grad_norm": 1.1286331415176392, "learning_rate": 2e-05, "loss": 0.04077397, "step": 17994 }, { "epoch": 35.99, "grad_norm": 1.581292748451233, "learning_rate": 2e-05, "loss": 0.03007511, "step": 17995 }, { "epoch": 35.992, "grad_norm": 1.491782784461975, "learning_rate": 2e-05, "loss": 0.03442695, "step": 17996 }, { "epoch": 35.994, "grad_norm": 1.134804368019104, "learning_rate": 2e-05, "loss": 0.03684225, "step": 17997 }, { "epoch": 35.996, "grad_norm": 2.2671315670013428, "learning_rate": 2e-05, "loss": 0.04276287, "step": 17998 }, { "epoch": 35.998, "grad_norm": 1.1745227575302124, "learning_rate": 2e-05, "loss": 0.047242, "step": 17999 }, { "epoch": 36.0, "grad_norm": 1.1241718530654907, "learning_rate": 2e-05, "loss": 0.0284595, "step": 18000 }, { "epoch": 36.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9820359281437125, "Equal_1": 0.998, "Equal_2": 0.9800399201596807, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 0.994, "Perpendicular_2": 0.994, "Perpendicular_3": 0.8837675350701403, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.996, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 319.9643, "eval_samples_per_second": 32.816, "eval_steps_per_second": 0.656, "step": 18000 }, { "epoch": 36.002, "grad_norm": 1.679438829421997, "learning_rate": 2e-05, "loss": 0.06195679, "step": 18001 }, { "epoch": 36.004, "grad_norm": 1.7376255989074707, "learning_rate": 2e-05, "loss": 0.06136629, "step": 18002 }, { "epoch": 36.006, "grad_norm": 1.0917325019836426, "learning_rate": 2e-05, "loss": 0.04106084, "step": 18003 }, { "epoch": 36.008, "grad_norm": 1.2398790121078491, "learning_rate": 2e-05, "loss": 0.05541268, "step": 18004 }, { "epoch": 36.01, "grad_norm": 1.5430097579956055, "learning_rate": 2e-05, "loss": 0.04105628, "step": 18005 }, { "epoch": 36.012, "grad_norm": 1.0548675060272217, "learning_rate": 2e-05, "loss": 0.03720319, "step": 18006 }, { "epoch": 36.014, "grad_norm": 1.1223161220550537, "learning_rate": 2e-05, "loss": 0.03772051, "step": 18007 }, { "epoch": 36.016, "grad_norm": 1.4329164028167725, "learning_rate": 2e-05, "loss": 0.04604128, "step": 18008 }, { "epoch": 36.018, "grad_norm": 2.9451615810394287, "learning_rate": 2e-05, "loss": 0.0591033, "step": 18009 }, { "epoch": 36.02, "grad_norm": 1.7627770900726318, "learning_rate": 2e-05, "loss": 0.05250013, "step": 18010 }, { "epoch": 36.022, "grad_norm": 0.9771580696105957, "learning_rate": 2e-05, "loss": 0.04982607, "step": 18011 }, { "epoch": 36.024, "grad_norm": 1.4142186641693115, "learning_rate": 2e-05, "loss": 0.05801858, "step": 18012 }, { "epoch": 36.026, "grad_norm": 0.9408489465713501, "learning_rate": 2e-05, "loss": 0.0294435, "step": 18013 }, { "epoch": 36.028, "grad_norm": 1.266186237335205, "learning_rate": 2e-05, "loss": 0.04173048, "step": 18014 }, { "epoch": 36.03, "grad_norm": 1.2469063997268677, "learning_rate": 2e-05, "loss": 0.05035538, "step": 18015 }, { "epoch": 36.032, "grad_norm": 0.9710808396339417, "learning_rate": 2e-05, "loss": 0.03061551, "step": 18016 }, { "epoch": 36.034, "grad_norm": 1.1993094682693481, "learning_rate": 2e-05, "loss": 0.0386519, "step": 18017 }, { "epoch": 36.036, "grad_norm": 1.0367350578308105, "learning_rate": 2e-05, "loss": 0.04080002, "step": 18018 }, { "epoch": 36.038, "grad_norm": 1.0484521389007568, "learning_rate": 2e-05, "loss": 0.03632751, "step": 18019 }, { "epoch": 36.04, "grad_norm": 1.096776008605957, "learning_rate": 2e-05, "loss": 0.03241234, "step": 18020 }, { "epoch": 36.042, "grad_norm": 1.2367771863937378, "learning_rate": 2e-05, "loss": 0.04970008, "step": 18021 }, { "epoch": 36.044, "grad_norm": 1.095942735671997, "learning_rate": 2e-05, "loss": 0.03803166, "step": 18022 }, { "epoch": 36.046, "grad_norm": 1.2419805526733398, "learning_rate": 2e-05, "loss": 0.03578827, "step": 18023 }, { "epoch": 36.048, "grad_norm": 0.9755739569664001, "learning_rate": 2e-05, "loss": 0.03225515, "step": 18024 }, { "epoch": 36.05, "grad_norm": 0.9210935235023499, "learning_rate": 2e-05, "loss": 0.03545808, "step": 18025 }, { "epoch": 36.052, "grad_norm": 1.514643669128418, "learning_rate": 2e-05, "loss": 0.04578691, "step": 18026 }, { "epoch": 36.054, "grad_norm": 1.3199200630187988, "learning_rate": 2e-05, "loss": 0.04699472, "step": 18027 }, { "epoch": 36.056, "grad_norm": 1.3400589227676392, "learning_rate": 2e-05, "loss": 0.04195932, "step": 18028 }, { "epoch": 36.058, "grad_norm": 1.3302844762802124, "learning_rate": 2e-05, "loss": 0.05917486, "step": 18029 }, { "epoch": 36.06, "grad_norm": 0.9098374247550964, "learning_rate": 2e-05, "loss": 0.02756627, "step": 18030 }, { "epoch": 36.062, "grad_norm": 0.9550777673721313, "learning_rate": 2e-05, "loss": 0.02522951, "step": 18031 }, { "epoch": 36.064, "grad_norm": 1.3498256206512451, "learning_rate": 2e-05, "loss": 0.03331952, "step": 18032 }, { "epoch": 36.066, "grad_norm": 1.149773359298706, "learning_rate": 2e-05, "loss": 0.04008397, "step": 18033 }, { "epoch": 36.068, "grad_norm": 1.6260327100753784, "learning_rate": 2e-05, "loss": 0.02002457, "step": 18034 }, { "epoch": 36.07, "grad_norm": 1.0886062383651733, "learning_rate": 2e-05, "loss": 0.03689205, "step": 18035 }, { "epoch": 36.072, "grad_norm": 2.1114742755889893, "learning_rate": 2e-05, "loss": 0.04384237, "step": 18036 }, { "epoch": 36.074, "grad_norm": 1.199912428855896, "learning_rate": 2e-05, "loss": 0.03832989, "step": 18037 }, { "epoch": 36.076, "grad_norm": 1.1416548490524292, "learning_rate": 2e-05, "loss": 0.04295663, "step": 18038 }, { "epoch": 36.078, "grad_norm": 5.6245245933532715, "learning_rate": 2e-05, "loss": 0.04268225, "step": 18039 }, { "epoch": 36.08, "grad_norm": 1.2393498420715332, "learning_rate": 2e-05, "loss": 0.03811008, "step": 18040 }, { "epoch": 36.082, "grad_norm": 1.0027004480361938, "learning_rate": 2e-05, "loss": 0.03410269, "step": 18041 }, { "epoch": 36.084, "grad_norm": 1.7057132720947266, "learning_rate": 2e-05, "loss": 0.03965345, "step": 18042 }, { "epoch": 36.086, "grad_norm": 0.9601574540138245, "learning_rate": 2e-05, "loss": 0.02651468, "step": 18043 }, { "epoch": 36.088, "grad_norm": 1.2537764310836792, "learning_rate": 2e-05, "loss": 0.03860223, "step": 18044 }, { "epoch": 36.09, "grad_norm": 1.3177660703659058, "learning_rate": 2e-05, "loss": 0.05462038, "step": 18045 }, { "epoch": 36.092, "grad_norm": 1.2594581842422485, "learning_rate": 2e-05, "loss": 0.04445657, "step": 18046 }, { "epoch": 36.094, "grad_norm": 1.132057547569275, "learning_rate": 2e-05, "loss": 0.05026545, "step": 18047 }, { "epoch": 36.096, "grad_norm": 1.5587725639343262, "learning_rate": 2e-05, "loss": 0.049737, "step": 18048 }, { "epoch": 36.098, "grad_norm": 1.235729694366455, "learning_rate": 2e-05, "loss": 0.04665924, "step": 18049 }, { "epoch": 36.1, "grad_norm": 1.4784846305847168, "learning_rate": 2e-05, "loss": 0.04161521, "step": 18050 }, { "epoch": 36.102, "grad_norm": 1.898224115371704, "learning_rate": 2e-05, "loss": 0.05342315, "step": 18051 }, { "epoch": 36.104, "grad_norm": 1.1921885013580322, "learning_rate": 2e-05, "loss": 0.03857575, "step": 18052 }, { "epoch": 36.106, "grad_norm": 1.1267322301864624, "learning_rate": 2e-05, "loss": 0.04110962, "step": 18053 }, { "epoch": 36.108, "grad_norm": 1.1211832761764526, "learning_rate": 2e-05, "loss": 0.04066541, "step": 18054 }, { "epoch": 36.11, "grad_norm": 1.1396983861923218, "learning_rate": 2e-05, "loss": 0.03554956, "step": 18055 }, { "epoch": 36.112, "grad_norm": 4.26861572265625, "learning_rate": 2e-05, "loss": 0.04480337, "step": 18056 }, { "epoch": 36.114, "grad_norm": 1.3064028024673462, "learning_rate": 2e-05, "loss": 0.05451906, "step": 18057 }, { "epoch": 36.116, "grad_norm": 1.1184580326080322, "learning_rate": 2e-05, "loss": 0.04466115, "step": 18058 }, { "epoch": 36.118, "grad_norm": 2.734436273574829, "learning_rate": 2e-05, "loss": 0.04473677, "step": 18059 }, { "epoch": 36.12, "grad_norm": 1.1926535367965698, "learning_rate": 2e-05, "loss": 0.04510562, "step": 18060 }, { "epoch": 36.122, "grad_norm": 2.227550506591797, "learning_rate": 2e-05, "loss": 0.03720244, "step": 18061 }, { "epoch": 36.124, "grad_norm": 1.2409284114837646, "learning_rate": 2e-05, "loss": 0.04574725, "step": 18062 }, { "epoch": 36.126, "grad_norm": 1.0186352729797363, "learning_rate": 2e-05, "loss": 0.03966548, "step": 18063 }, { "epoch": 36.128, "grad_norm": 1.098799467086792, "learning_rate": 2e-05, "loss": 0.04066562, "step": 18064 }, { "epoch": 36.13, "grad_norm": 0.9615515470504761, "learning_rate": 2e-05, "loss": 0.02897996, "step": 18065 }, { "epoch": 36.132, "grad_norm": 2.371126413345337, "learning_rate": 2e-05, "loss": 0.06829099, "step": 18066 }, { "epoch": 36.134, "grad_norm": 1.4561325311660767, "learning_rate": 2e-05, "loss": 0.04208488, "step": 18067 }, { "epoch": 36.136, "grad_norm": 1.1017025709152222, "learning_rate": 2e-05, "loss": 0.04764595, "step": 18068 }, { "epoch": 36.138, "grad_norm": 1.6677707433700562, "learning_rate": 2e-05, "loss": 0.03898593, "step": 18069 }, { "epoch": 36.14, "grad_norm": 1.2435901165008545, "learning_rate": 2e-05, "loss": 0.03861672, "step": 18070 }, { "epoch": 36.142, "grad_norm": 1.3686686754226685, "learning_rate": 2e-05, "loss": 0.05133841, "step": 18071 }, { "epoch": 36.144, "grad_norm": 1.1071763038635254, "learning_rate": 2e-05, "loss": 0.04213175, "step": 18072 }, { "epoch": 36.146, "grad_norm": 1.1424134969711304, "learning_rate": 2e-05, "loss": 0.04421389, "step": 18073 }, { "epoch": 36.148, "grad_norm": 1.2619765996932983, "learning_rate": 2e-05, "loss": 0.05113876, "step": 18074 }, { "epoch": 36.15, "grad_norm": 2.3322865962982178, "learning_rate": 2e-05, "loss": 0.05380969, "step": 18075 }, { "epoch": 36.152, "grad_norm": 1.5360093116760254, "learning_rate": 2e-05, "loss": 0.03704157, "step": 18076 }, { "epoch": 36.154, "grad_norm": 2.7045676708221436, "learning_rate": 2e-05, "loss": 0.03724046, "step": 18077 }, { "epoch": 36.156, "grad_norm": 1.778171181678772, "learning_rate": 2e-05, "loss": 0.06204282, "step": 18078 }, { "epoch": 36.158, "grad_norm": 2.811089277267456, "learning_rate": 2e-05, "loss": 0.04121366, "step": 18079 }, { "epoch": 36.16, "grad_norm": 1.1129862070083618, "learning_rate": 2e-05, "loss": 0.03516244, "step": 18080 }, { "epoch": 36.162, "grad_norm": 1.1569979190826416, "learning_rate": 2e-05, "loss": 0.04278459, "step": 18081 }, { "epoch": 36.164, "grad_norm": 1.064875841140747, "learning_rate": 2e-05, "loss": 0.03698279, "step": 18082 }, { "epoch": 36.166, "grad_norm": 1.0886690616607666, "learning_rate": 2e-05, "loss": 0.04283421, "step": 18083 }, { "epoch": 36.168, "grad_norm": 3.398261785507202, "learning_rate": 2e-05, "loss": 0.03867529, "step": 18084 }, { "epoch": 36.17, "grad_norm": 1.657079815864563, "learning_rate": 2e-05, "loss": 0.04585808, "step": 18085 }, { "epoch": 36.172, "grad_norm": 1.3320543766021729, "learning_rate": 2e-05, "loss": 0.04661385, "step": 18086 }, { "epoch": 36.174, "grad_norm": 0.8988568782806396, "learning_rate": 2e-05, "loss": 0.02427765, "step": 18087 }, { "epoch": 36.176, "grad_norm": 1.284536361694336, "learning_rate": 2e-05, "loss": 0.0532205, "step": 18088 }, { "epoch": 36.178, "grad_norm": 1.1643673181533813, "learning_rate": 2e-05, "loss": 0.03632015, "step": 18089 }, { "epoch": 36.18, "grad_norm": 1.3021336793899536, "learning_rate": 2e-05, "loss": 0.05177925, "step": 18090 }, { "epoch": 36.182, "grad_norm": 2.0210723876953125, "learning_rate": 2e-05, "loss": 0.04610447, "step": 18091 }, { "epoch": 36.184, "grad_norm": 0.9699888229370117, "learning_rate": 2e-05, "loss": 0.0398217, "step": 18092 }, { "epoch": 36.186, "grad_norm": 1.0085978507995605, "learning_rate": 2e-05, "loss": 0.03745851, "step": 18093 }, { "epoch": 36.188, "grad_norm": 1.3533179759979248, "learning_rate": 2e-05, "loss": 0.05687679, "step": 18094 }, { "epoch": 36.19, "grad_norm": 1.0797785520553589, "learning_rate": 2e-05, "loss": 0.04193022, "step": 18095 }, { "epoch": 36.192, "grad_norm": 1.4561572074890137, "learning_rate": 2e-05, "loss": 0.03306325, "step": 18096 }, { "epoch": 36.194, "grad_norm": 1.2475993633270264, "learning_rate": 2e-05, "loss": 0.03667925, "step": 18097 }, { "epoch": 36.196, "grad_norm": 0.9133176803588867, "learning_rate": 2e-05, "loss": 0.02343579, "step": 18098 }, { "epoch": 36.198, "grad_norm": 1.6272944211959839, "learning_rate": 2e-05, "loss": 0.04554627, "step": 18099 }, { "epoch": 36.2, "grad_norm": 1.5745600461959839, "learning_rate": 2e-05, "loss": 0.04788917, "step": 18100 }, { "epoch": 36.202, "grad_norm": 1.2428606748580933, "learning_rate": 2e-05, "loss": 0.04026973, "step": 18101 }, { "epoch": 36.204, "grad_norm": 1.6128360033035278, "learning_rate": 2e-05, "loss": 0.05024995, "step": 18102 }, { "epoch": 36.206, "grad_norm": 1.0230610370635986, "learning_rate": 2e-05, "loss": 0.03382932, "step": 18103 }, { "epoch": 36.208, "grad_norm": 1.075156331062317, "learning_rate": 2e-05, "loss": 0.04463224, "step": 18104 }, { "epoch": 36.21, "grad_norm": 3.846419095993042, "learning_rate": 2e-05, "loss": 0.05278007, "step": 18105 }, { "epoch": 36.212, "grad_norm": 0.9034450054168701, "learning_rate": 2e-05, "loss": 0.03557969, "step": 18106 }, { "epoch": 36.214, "grad_norm": 0.923078179359436, "learning_rate": 2e-05, "loss": 0.03522691, "step": 18107 }, { "epoch": 36.216, "grad_norm": 1.3479729890823364, "learning_rate": 2e-05, "loss": 0.0439695, "step": 18108 }, { "epoch": 36.218, "grad_norm": 2.1420326232910156, "learning_rate": 2e-05, "loss": 0.06162531, "step": 18109 }, { "epoch": 36.22, "grad_norm": 1.184019684791565, "learning_rate": 2e-05, "loss": 0.04064212, "step": 18110 }, { "epoch": 36.222, "grad_norm": 1.0127112865447998, "learning_rate": 2e-05, "loss": 0.03657559, "step": 18111 }, { "epoch": 36.224, "grad_norm": 0.9742773175239563, "learning_rate": 2e-05, "loss": 0.03809122, "step": 18112 }, { "epoch": 36.226, "grad_norm": 1.1490448713302612, "learning_rate": 2e-05, "loss": 0.04694427, "step": 18113 }, { "epoch": 36.228, "grad_norm": 0.9744361042976379, "learning_rate": 2e-05, "loss": 0.02423439, "step": 18114 }, { "epoch": 36.23, "grad_norm": 1.1082568168640137, "learning_rate": 2e-05, "loss": 0.04402753, "step": 18115 }, { "epoch": 36.232, "grad_norm": 1.1237958669662476, "learning_rate": 2e-05, "loss": 0.03403135, "step": 18116 }, { "epoch": 36.234, "grad_norm": 1.0820505619049072, "learning_rate": 2e-05, "loss": 0.03654093, "step": 18117 }, { "epoch": 36.236, "grad_norm": 1.0331096649169922, "learning_rate": 2e-05, "loss": 0.03929486, "step": 18118 }, { "epoch": 36.238, "grad_norm": 1.2376075983047485, "learning_rate": 2e-05, "loss": 0.03313967, "step": 18119 }, { "epoch": 36.24, "grad_norm": 1.596076250076294, "learning_rate": 2e-05, "loss": 0.04521656, "step": 18120 }, { "epoch": 36.242, "grad_norm": 1.2083992958068848, "learning_rate": 2e-05, "loss": 0.04801769, "step": 18121 }, { "epoch": 36.244, "grad_norm": 1.3693628311157227, "learning_rate": 2e-05, "loss": 0.05652841, "step": 18122 }, { "epoch": 36.246, "grad_norm": 1.8372113704681396, "learning_rate": 2e-05, "loss": 0.03900075, "step": 18123 }, { "epoch": 36.248, "grad_norm": 1.6199109554290771, "learning_rate": 2e-05, "loss": 0.05532604, "step": 18124 }, { "epoch": 36.25, "grad_norm": 1.1799906492233276, "learning_rate": 2e-05, "loss": 0.03721724, "step": 18125 }, { "epoch": 36.252, "grad_norm": 1.0089119672775269, "learning_rate": 2e-05, "loss": 0.03412523, "step": 18126 }, { "epoch": 36.254, "grad_norm": 1.3146682977676392, "learning_rate": 2e-05, "loss": 0.03678686, "step": 18127 }, { "epoch": 36.256, "grad_norm": 1.4293696880340576, "learning_rate": 2e-05, "loss": 0.05230099, "step": 18128 }, { "epoch": 36.258, "grad_norm": 1.1753870248794556, "learning_rate": 2e-05, "loss": 0.04117392, "step": 18129 }, { "epoch": 36.26, "grad_norm": 1.2926392555236816, "learning_rate": 2e-05, "loss": 0.04781886, "step": 18130 }, { "epoch": 36.262, "grad_norm": 1.7987608909606934, "learning_rate": 2e-05, "loss": 0.05109328, "step": 18131 }, { "epoch": 36.264, "grad_norm": 1.0095752477645874, "learning_rate": 2e-05, "loss": 0.04271962, "step": 18132 }, { "epoch": 36.266, "grad_norm": 0.901914656162262, "learning_rate": 2e-05, "loss": 0.02623034, "step": 18133 }, { "epoch": 36.268, "grad_norm": 1.1711057424545288, "learning_rate": 2e-05, "loss": 0.03598663, "step": 18134 }, { "epoch": 36.27, "grad_norm": 0.9837378859519958, "learning_rate": 2e-05, "loss": 0.03688063, "step": 18135 }, { "epoch": 36.272, "grad_norm": 1.2390130758285522, "learning_rate": 2e-05, "loss": 0.05092961, "step": 18136 }, { "epoch": 36.274, "grad_norm": 1.270090103149414, "learning_rate": 2e-05, "loss": 0.0404551, "step": 18137 }, { "epoch": 36.276, "grad_norm": 1.5756562948226929, "learning_rate": 2e-05, "loss": 0.04972642, "step": 18138 }, { "epoch": 36.278, "grad_norm": 0.9940088987350464, "learning_rate": 2e-05, "loss": 0.03693637, "step": 18139 }, { "epoch": 36.28, "grad_norm": 0.9043747782707214, "learning_rate": 2e-05, "loss": 0.02635769, "step": 18140 }, { "epoch": 36.282, "grad_norm": 1.2040432691574097, "learning_rate": 2e-05, "loss": 0.04434269, "step": 18141 }, { "epoch": 36.284, "grad_norm": 1.0949373245239258, "learning_rate": 2e-05, "loss": 0.03474148, "step": 18142 }, { "epoch": 36.286, "grad_norm": 1.09050452709198, "learning_rate": 2e-05, "loss": 0.04845259, "step": 18143 }, { "epoch": 36.288, "grad_norm": 1.3407831192016602, "learning_rate": 2e-05, "loss": 0.04304389, "step": 18144 }, { "epoch": 36.29, "grad_norm": 1.1936886310577393, "learning_rate": 2e-05, "loss": 0.04481979, "step": 18145 }, { "epoch": 36.292, "grad_norm": 2.4183106422424316, "learning_rate": 2e-05, "loss": 0.05331695, "step": 18146 }, { "epoch": 36.294, "grad_norm": 1.1295486688613892, "learning_rate": 2e-05, "loss": 0.03321164, "step": 18147 }, { "epoch": 36.296, "grad_norm": 1.360809087753296, "learning_rate": 2e-05, "loss": 0.03916159, "step": 18148 }, { "epoch": 36.298, "grad_norm": 1.247570276260376, "learning_rate": 2e-05, "loss": 0.0428711, "step": 18149 }, { "epoch": 36.3, "grad_norm": 1.036298394203186, "learning_rate": 2e-05, "loss": 0.04656066, "step": 18150 }, { "epoch": 36.302, "grad_norm": 0.9646769165992737, "learning_rate": 2e-05, "loss": 0.03399408, "step": 18151 }, { "epoch": 36.304, "grad_norm": 1.164666771888733, "learning_rate": 2e-05, "loss": 0.04837671, "step": 18152 }, { "epoch": 36.306, "grad_norm": 1.2248945236206055, "learning_rate": 2e-05, "loss": 0.0307962, "step": 18153 }, { "epoch": 36.308, "grad_norm": 1.1758040189743042, "learning_rate": 2e-05, "loss": 0.0404579, "step": 18154 }, { "epoch": 36.31, "grad_norm": 1.0307793617248535, "learning_rate": 2e-05, "loss": 0.0290431, "step": 18155 }, { "epoch": 36.312, "grad_norm": 0.920478880405426, "learning_rate": 2e-05, "loss": 0.03528172, "step": 18156 }, { "epoch": 36.314, "grad_norm": 1.0334807634353638, "learning_rate": 2e-05, "loss": 0.05046558, "step": 18157 }, { "epoch": 36.316, "grad_norm": 0.9907777309417725, "learning_rate": 2e-05, "loss": 0.03023825, "step": 18158 }, { "epoch": 36.318, "grad_norm": 1.227904200553894, "learning_rate": 2e-05, "loss": 0.04913838, "step": 18159 }, { "epoch": 36.32, "grad_norm": 1.2009419202804565, "learning_rate": 2e-05, "loss": 0.04932972, "step": 18160 }, { "epoch": 36.322, "grad_norm": 0.9665030241012573, "learning_rate": 2e-05, "loss": 0.03171739, "step": 18161 }, { "epoch": 36.324, "grad_norm": 1.2279425859451294, "learning_rate": 2e-05, "loss": 0.0608729, "step": 18162 }, { "epoch": 36.326, "grad_norm": 0.9468924403190613, "learning_rate": 2e-05, "loss": 0.03156189, "step": 18163 }, { "epoch": 36.328, "grad_norm": 1.0261448621749878, "learning_rate": 2e-05, "loss": 0.03610597, "step": 18164 }, { "epoch": 36.33, "grad_norm": 1.276781678199768, "learning_rate": 2e-05, "loss": 0.03549629, "step": 18165 }, { "epoch": 36.332, "grad_norm": 0.9759575724601746, "learning_rate": 2e-05, "loss": 0.03994817, "step": 18166 }, { "epoch": 36.334, "grad_norm": 1.7276750802993774, "learning_rate": 2e-05, "loss": 0.04654774, "step": 18167 }, { "epoch": 36.336, "grad_norm": 0.9664547443389893, "learning_rate": 2e-05, "loss": 0.03177688, "step": 18168 }, { "epoch": 36.338, "grad_norm": 1.9670281410217285, "learning_rate": 2e-05, "loss": 0.04651101, "step": 18169 }, { "epoch": 36.34, "grad_norm": 1.802821159362793, "learning_rate": 2e-05, "loss": 0.04297075, "step": 18170 }, { "epoch": 36.342, "grad_norm": 1.661774754524231, "learning_rate": 2e-05, "loss": 0.06117053, "step": 18171 }, { "epoch": 36.344, "grad_norm": 2.3734986782073975, "learning_rate": 2e-05, "loss": 0.05952629, "step": 18172 }, { "epoch": 36.346, "grad_norm": 1.2025349140167236, "learning_rate": 2e-05, "loss": 0.04102109, "step": 18173 }, { "epoch": 36.348, "grad_norm": 1.0982835292816162, "learning_rate": 2e-05, "loss": 0.04104698, "step": 18174 }, { "epoch": 36.35, "grad_norm": 1.157949686050415, "learning_rate": 2e-05, "loss": 0.03813836, "step": 18175 }, { "epoch": 36.352, "grad_norm": 1.055647611618042, "learning_rate": 2e-05, "loss": 0.03598933, "step": 18176 }, { "epoch": 36.354, "grad_norm": 1.3801038265228271, "learning_rate": 2e-05, "loss": 0.04944832, "step": 18177 }, { "epoch": 36.356, "grad_norm": 1.322588324546814, "learning_rate": 2e-05, "loss": 0.03539138, "step": 18178 }, { "epoch": 36.358, "grad_norm": 1.4064815044403076, "learning_rate": 2e-05, "loss": 0.05578845, "step": 18179 }, { "epoch": 36.36, "grad_norm": 3.2461321353912354, "learning_rate": 2e-05, "loss": 0.04246819, "step": 18180 }, { "epoch": 36.362, "grad_norm": 1.1064374446868896, "learning_rate": 2e-05, "loss": 0.03681324, "step": 18181 }, { "epoch": 36.364, "grad_norm": 1.1665987968444824, "learning_rate": 2e-05, "loss": 0.04058225, "step": 18182 }, { "epoch": 36.366, "grad_norm": 1.029982089996338, "learning_rate": 2e-05, "loss": 0.03287161, "step": 18183 }, { "epoch": 36.368, "grad_norm": 1.1927478313446045, "learning_rate": 2e-05, "loss": 0.05367187, "step": 18184 }, { "epoch": 36.37, "grad_norm": 1.2972198724746704, "learning_rate": 2e-05, "loss": 0.04106817, "step": 18185 }, { "epoch": 36.372, "grad_norm": 1.027411699295044, "learning_rate": 2e-05, "loss": 0.04088916, "step": 18186 }, { "epoch": 36.374, "grad_norm": 1.171830415725708, "learning_rate": 2e-05, "loss": 0.05644122, "step": 18187 }, { "epoch": 36.376, "grad_norm": 1.1561020612716675, "learning_rate": 2e-05, "loss": 0.04188239, "step": 18188 }, { "epoch": 36.378, "grad_norm": 1.0463422536849976, "learning_rate": 2e-05, "loss": 0.04296407, "step": 18189 }, { "epoch": 36.38, "grad_norm": 1.2229862213134766, "learning_rate": 2e-05, "loss": 0.03844842, "step": 18190 }, { "epoch": 36.382, "grad_norm": 0.8768106698989868, "learning_rate": 2e-05, "loss": 0.03120964, "step": 18191 }, { "epoch": 36.384, "grad_norm": 1.1767603158950806, "learning_rate": 2e-05, "loss": 0.03380742, "step": 18192 }, { "epoch": 36.386, "grad_norm": 0.9635298848152161, "learning_rate": 2e-05, "loss": 0.03339863, "step": 18193 }, { "epoch": 36.388, "grad_norm": 0.9768321514129639, "learning_rate": 2e-05, "loss": 0.03579611, "step": 18194 }, { "epoch": 36.39, "grad_norm": 0.8538011312484741, "learning_rate": 2e-05, "loss": 0.02970689, "step": 18195 }, { "epoch": 36.392, "grad_norm": 1.0878775119781494, "learning_rate": 2e-05, "loss": 0.04001588, "step": 18196 }, { "epoch": 36.394, "grad_norm": 1.142221212387085, "learning_rate": 2e-05, "loss": 0.04551936, "step": 18197 }, { "epoch": 36.396, "grad_norm": 1.5530418157577515, "learning_rate": 2e-05, "loss": 0.03043629, "step": 18198 }, { "epoch": 36.398, "grad_norm": 1.637148141860962, "learning_rate": 2e-05, "loss": 0.03448223, "step": 18199 }, { "epoch": 36.4, "grad_norm": 1.4483563899993896, "learning_rate": 2e-05, "loss": 0.05348013, "step": 18200 }, { "epoch": 36.402, "grad_norm": 1.0635143518447876, "learning_rate": 2e-05, "loss": 0.03588112, "step": 18201 }, { "epoch": 36.404, "grad_norm": 2.611689329147339, "learning_rate": 2e-05, "loss": 0.06078885, "step": 18202 }, { "epoch": 36.406, "grad_norm": 1.502625584602356, "learning_rate": 2e-05, "loss": 0.03423886, "step": 18203 }, { "epoch": 36.408, "grad_norm": 1.3298054933547974, "learning_rate": 2e-05, "loss": 0.04151591, "step": 18204 }, { "epoch": 36.41, "grad_norm": 1.27322518825531, "learning_rate": 2e-05, "loss": 0.05073592, "step": 18205 }, { "epoch": 36.412, "grad_norm": 1.035677433013916, "learning_rate": 2e-05, "loss": 0.04113561, "step": 18206 }, { "epoch": 36.414, "grad_norm": 1.7269580364227295, "learning_rate": 2e-05, "loss": 0.04068903, "step": 18207 }, { "epoch": 36.416, "grad_norm": 1.314218521118164, "learning_rate": 2e-05, "loss": 0.04115914, "step": 18208 }, { "epoch": 36.418, "grad_norm": 1.3631287813186646, "learning_rate": 2e-05, "loss": 0.05018599, "step": 18209 }, { "epoch": 36.42, "grad_norm": 1.4252984523773193, "learning_rate": 2e-05, "loss": 0.05225445, "step": 18210 }, { "epoch": 36.422, "grad_norm": 1.0618059635162354, "learning_rate": 2e-05, "loss": 0.03559672, "step": 18211 }, { "epoch": 36.424, "grad_norm": 1.217882752418518, "learning_rate": 2e-05, "loss": 0.0485566, "step": 18212 }, { "epoch": 36.426, "grad_norm": 1.2114055156707764, "learning_rate": 2e-05, "loss": 0.04762184, "step": 18213 }, { "epoch": 36.428, "grad_norm": 2.0187506675720215, "learning_rate": 2e-05, "loss": 0.05630653, "step": 18214 }, { "epoch": 36.43, "grad_norm": 1.9597657918930054, "learning_rate": 2e-05, "loss": 0.06257641, "step": 18215 }, { "epoch": 36.432, "grad_norm": 0.9971281886100769, "learning_rate": 2e-05, "loss": 0.03872079, "step": 18216 }, { "epoch": 36.434, "grad_norm": 1.2572100162506104, "learning_rate": 2e-05, "loss": 0.03614823, "step": 18217 }, { "epoch": 36.436, "grad_norm": 1.1719560623168945, "learning_rate": 2e-05, "loss": 0.03753065, "step": 18218 }, { "epoch": 36.438, "grad_norm": 1.0278385877609253, "learning_rate": 2e-05, "loss": 0.03851878, "step": 18219 }, { "epoch": 36.44, "grad_norm": 1.2253730297088623, "learning_rate": 2e-05, "loss": 0.0503806, "step": 18220 }, { "epoch": 36.442, "grad_norm": 0.88956218957901, "learning_rate": 2e-05, "loss": 0.02707586, "step": 18221 }, { "epoch": 36.444, "grad_norm": 0.8710851669311523, "learning_rate": 2e-05, "loss": 0.0222133, "step": 18222 }, { "epoch": 36.446, "grad_norm": 1.2004845142364502, "learning_rate": 2e-05, "loss": 0.05466457, "step": 18223 }, { "epoch": 36.448, "grad_norm": 1.3014144897460938, "learning_rate": 2e-05, "loss": 0.05169172, "step": 18224 }, { "epoch": 36.45, "grad_norm": 1.0197980403900146, "learning_rate": 2e-05, "loss": 0.03822663, "step": 18225 }, { "epoch": 36.452, "grad_norm": 1.3792431354522705, "learning_rate": 2e-05, "loss": 0.04293468, "step": 18226 }, { "epoch": 36.454, "grad_norm": 1.1744886636734009, "learning_rate": 2e-05, "loss": 0.05211157, "step": 18227 }, { "epoch": 36.456, "grad_norm": 1.598494052886963, "learning_rate": 2e-05, "loss": 0.04538838, "step": 18228 }, { "epoch": 36.458, "grad_norm": 0.8673466444015503, "learning_rate": 2e-05, "loss": 0.0319301, "step": 18229 }, { "epoch": 36.46, "grad_norm": 1.918244481086731, "learning_rate": 2e-05, "loss": 0.05689289, "step": 18230 }, { "epoch": 36.462, "grad_norm": 1.1343375444412231, "learning_rate": 2e-05, "loss": 0.04852476, "step": 18231 }, { "epoch": 36.464, "grad_norm": 1.910357117652893, "learning_rate": 2e-05, "loss": 0.04690136, "step": 18232 }, { "epoch": 36.466, "grad_norm": 1.1695517301559448, "learning_rate": 2e-05, "loss": 0.04719057, "step": 18233 }, { "epoch": 36.468, "grad_norm": 1.595226526260376, "learning_rate": 2e-05, "loss": 0.04680973, "step": 18234 }, { "epoch": 36.47, "grad_norm": 1.0426281690597534, "learning_rate": 2e-05, "loss": 0.03990892, "step": 18235 }, { "epoch": 36.472, "grad_norm": 1.362654209136963, "learning_rate": 2e-05, "loss": 0.03500029, "step": 18236 }, { "epoch": 36.474, "grad_norm": 1.1419274806976318, "learning_rate": 2e-05, "loss": 0.04769664, "step": 18237 }, { "epoch": 36.476, "grad_norm": 1.1063685417175293, "learning_rate": 2e-05, "loss": 0.03118099, "step": 18238 }, { "epoch": 36.478, "grad_norm": 1.0712615251541138, "learning_rate": 2e-05, "loss": 0.0473005, "step": 18239 }, { "epoch": 36.48, "grad_norm": 0.9738886952400208, "learning_rate": 2e-05, "loss": 0.03309727, "step": 18240 }, { "epoch": 36.482, "grad_norm": 1.6552538871765137, "learning_rate": 2e-05, "loss": 0.05177336, "step": 18241 }, { "epoch": 36.484, "grad_norm": 0.9547532200813293, "learning_rate": 2e-05, "loss": 0.03158896, "step": 18242 }, { "epoch": 36.486, "grad_norm": 1.1574902534484863, "learning_rate": 2e-05, "loss": 0.03545589, "step": 18243 }, { "epoch": 36.488, "grad_norm": 1.1547956466674805, "learning_rate": 2e-05, "loss": 0.05037494, "step": 18244 }, { "epoch": 36.49, "grad_norm": 1.5758951902389526, "learning_rate": 2e-05, "loss": 0.03462842, "step": 18245 }, { "epoch": 36.492, "grad_norm": 1.3058934211730957, "learning_rate": 2e-05, "loss": 0.05186077, "step": 18246 }, { "epoch": 36.494, "grad_norm": 2.153010129928589, "learning_rate": 2e-05, "loss": 0.05587346, "step": 18247 }, { "epoch": 36.496, "grad_norm": 1.0306788682937622, "learning_rate": 2e-05, "loss": 0.02917742, "step": 18248 }, { "epoch": 36.498, "grad_norm": 1.1398119926452637, "learning_rate": 2e-05, "loss": 0.0482577, "step": 18249 }, { "epoch": 36.5, "grad_norm": 1.2190545797348022, "learning_rate": 2e-05, "loss": 0.04970558, "step": 18250 }, { "epoch": 36.502, "grad_norm": 1.0682768821716309, "learning_rate": 2e-05, "loss": 0.03955474, "step": 18251 }, { "epoch": 36.504, "grad_norm": 1.061971664428711, "learning_rate": 2e-05, "loss": 0.03687964, "step": 18252 }, { "epoch": 36.506, "grad_norm": 1.0158944129943848, "learning_rate": 2e-05, "loss": 0.03078072, "step": 18253 }, { "epoch": 36.508, "grad_norm": 1.3401365280151367, "learning_rate": 2e-05, "loss": 0.0381853, "step": 18254 }, { "epoch": 36.51, "grad_norm": 1.350685715675354, "learning_rate": 2e-05, "loss": 0.04724355, "step": 18255 }, { "epoch": 36.512, "grad_norm": 2.3168015480041504, "learning_rate": 2e-05, "loss": 0.04308641, "step": 18256 }, { "epoch": 36.514, "grad_norm": 1.6673002243041992, "learning_rate": 2e-05, "loss": 0.04929385, "step": 18257 }, { "epoch": 36.516, "grad_norm": 0.9662206768989563, "learning_rate": 2e-05, "loss": 0.03089986, "step": 18258 }, { "epoch": 36.518, "grad_norm": 1.062555193901062, "learning_rate": 2e-05, "loss": 0.0395766, "step": 18259 }, { "epoch": 36.52, "grad_norm": 1.0343207120895386, "learning_rate": 2e-05, "loss": 0.02805191, "step": 18260 }, { "epoch": 36.522, "grad_norm": 0.9246268272399902, "learning_rate": 2e-05, "loss": 0.02393855, "step": 18261 }, { "epoch": 36.524, "grad_norm": 1.8312151432037354, "learning_rate": 2e-05, "loss": 0.04711398, "step": 18262 }, { "epoch": 36.526, "grad_norm": 1.2133851051330566, "learning_rate": 2e-05, "loss": 0.04653526, "step": 18263 }, { "epoch": 36.528, "grad_norm": 0.9863196015357971, "learning_rate": 2e-05, "loss": 0.02355134, "step": 18264 }, { "epoch": 36.53, "grad_norm": 1.0515233278274536, "learning_rate": 2e-05, "loss": 0.03952796, "step": 18265 }, { "epoch": 36.532, "grad_norm": 0.971024751663208, "learning_rate": 2e-05, "loss": 0.03000312, "step": 18266 }, { "epoch": 36.534, "grad_norm": 1.2390265464782715, "learning_rate": 2e-05, "loss": 0.03372075, "step": 18267 }, { "epoch": 36.536, "grad_norm": 0.8119271993637085, "learning_rate": 2e-05, "loss": 0.0300536, "step": 18268 }, { "epoch": 36.538, "grad_norm": 0.9378778338432312, "learning_rate": 2e-05, "loss": 0.02700995, "step": 18269 }, { "epoch": 36.54, "grad_norm": 2.0789153575897217, "learning_rate": 2e-05, "loss": 0.03948624, "step": 18270 }, { "epoch": 36.542, "grad_norm": 1.76309335231781, "learning_rate": 2e-05, "loss": 0.04286005, "step": 18271 }, { "epoch": 36.544, "grad_norm": 4.2802581787109375, "learning_rate": 2e-05, "loss": 0.03132661, "step": 18272 }, { "epoch": 36.546, "grad_norm": 2.573993444442749, "learning_rate": 2e-05, "loss": 0.04764672, "step": 18273 }, { "epoch": 36.548, "grad_norm": 0.87221759557724, "learning_rate": 2e-05, "loss": 0.02382921, "step": 18274 }, { "epoch": 36.55, "grad_norm": 1.0562106370925903, "learning_rate": 2e-05, "loss": 0.04094611, "step": 18275 }, { "epoch": 36.552, "grad_norm": 1.2717176675796509, "learning_rate": 2e-05, "loss": 0.0410012, "step": 18276 }, { "epoch": 36.554, "grad_norm": 1.2382874488830566, "learning_rate": 2e-05, "loss": 0.03663464, "step": 18277 }, { "epoch": 36.556, "grad_norm": 1.964185118675232, "learning_rate": 2e-05, "loss": 0.04018112, "step": 18278 }, { "epoch": 36.558, "grad_norm": 1.2324621677398682, "learning_rate": 2e-05, "loss": 0.03608364, "step": 18279 }, { "epoch": 36.56, "grad_norm": 1.2070492506027222, "learning_rate": 2e-05, "loss": 0.05842055, "step": 18280 }, { "epoch": 36.562, "grad_norm": 1.1493645906448364, "learning_rate": 2e-05, "loss": 0.05052029, "step": 18281 }, { "epoch": 36.564, "grad_norm": 2.735915422439575, "learning_rate": 2e-05, "loss": 0.04691009, "step": 18282 }, { "epoch": 36.566, "grad_norm": 1.2203706502914429, "learning_rate": 2e-05, "loss": 0.04751462, "step": 18283 }, { "epoch": 36.568, "grad_norm": 1.1800801753997803, "learning_rate": 2e-05, "loss": 0.05025848, "step": 18284 }, { "epoch": 36.57, "grad_norm": 1.0068162679672241, "learning_rate": 2e-05, "loss": 0.03809405, "step": 18285 }, { "epoch": 36.572, "grad_norm": 1.2168195247650146, "learning_rate": 2e-05, "loss": 0.04326154, "step": 18286 }, { "epoch": 36.574, "grad_norm": 1.434417724609375, "learning_rate": 2e-05, "loss": 0.0574247, "step": 18287 }, { "epoch": 36.576, "grad_norm": 1.1110270023345947, "learning_rate": 2e-05, "loss": 0.0267412, "step": 18288 }, { "epoch": 36.578, "grad_norm": 1.2842190265655518, "learning_rate": 2e-05, "loss": 0.04922368, "step": 18289 }, { "epoch": 36.58, "grad_norm": 1.4136754274368286, "learning_rate": 2e-05, "loss": 0.03303508, "step": 18290 }, { "epoch": 36.582, "grad_norm": 1.025667667388916, "learning_rate": 2e-05, "loss": 0.03115687, "step": 18291 }, { "epoch": 36.584, "grad_norm": 1.3057501316070557, "learning_rate": 2e-05, "loss": 0.05253988, "step": 18292 }, { "epoch": 36.586, "grad_norm": 1.0783129930496216, "learning_rate": 2e-05, "loss": 0.03761948, "step": 18293 }, { "epoch": 36.588, "grad_norm": 1.1081454753875732, "learning_rate": 2e-05, "loss": 0.03277981, "step": 18294 }, { "epoch": 36.59, "grad_norm": 1.0502467155456543, "learning_rate": 2e-05, "loss": 0.03914522, "step": 18295 }, { "epoch": 36.592, "grad_norm": 1.080041766166687, "learning_rate": 2e-05, "loss": 0.03669895, "step": 18296 }, { "epoch": 36.594, "grad_norm": 2.066941976547241, "learning_rate": 2e-05, "loss": 0.05867566, "step": 18297 }, { "epoch": 36.596, "grad_norm": 1.1148244142532349, "learning_rate": 2e-05, "loss": 0.04393861, "step": 18298 }, { "epoch": 36.598, "grad_norm": 1.0712250471115112, "learning_rate": 2e-05, "loss": 0.03914363, "step": 18299 }, { "epoch": 36.6, "grad_norm": 1.2081435918807983, "learning_rate": 2e-05, "loss": 0.03771123, "step": 18300 }, { "epoch": 36.602, "grad_norm": 1.143244743347168, "learning_rate": 2e-05, "loss": 0.04224782, "step": 18301 }, { "epoch": 36.604, "grad_norm": 1.2254302501678467, "learning_rate": 2e-05, "loss": 0.04013958, "step": 18302 }, { "epoch": 36.606, "grad_norm": 1.3752824068069458, "learning_rate": 2e-05, "loss": 0.050212, "step": 18303 }, { "epoch": 36.608, "grad_norm": 0.94117671251297, "learning_rate": 2e-05, "loss": 0.03055703, "step": 18304 }, { "epoch": 36.61, "grad_norm": 1.1006180047988892, "learning_rate": 2e-05, "loss": 0.03390218, "step": 18305 }, { "epoch": 36.612, "grad_norm": 1.3862262964248657, "learning_rate": 2e-05, "loss": 0.05241469, "step": 18306 }, { "epoch": 36.614, "grad_norm": 1.8648090362548828, "learning_rate": 2e-05, "loss": 0.04927859, "step": 18307 }, { "epoch": 36.616, "grad_norm": 1.4699233770370483, "learning_rate": 2e-05, "loss": 0.04433728, "step": 18308 }, { "epoch": 36.618, "grad_norm": 1.4989550113677979, "learning_rate": 2e-05, "loss": 0.05942933, "step": 18309 }, { "epoch": 36.62, "grad_norm": 1.1221908330917358, "learning_rate": 2e-05, "loss": 0.047569, "step": 18310 }, { "epoch": 36.622, "grad_norm": 1.540700078010559, "learning_rate": 2e-05, "loss": 0.05225103, "step": 18311 }, { "epoch": 36.624, "grad_norm": 1.2131165266036987, "learning_rate": 2e-05, "loss": 0.04262118, "step": 18312 }, { "epoch": 36.626, "grad_norm": 1.1024413108825684, "learning_rate": 2e-05, "loss": 0.03573479, "step": 18313 }, { "epoch": 36.628, "grad_norm": 1.9609568119049072, "learning_rate": 2e-05, "loss": 0.04972448, "step": 18314 }, { "epoch": 36.63, "grad_norm": 1.236175775527954, "learning_rate": 2e-05, "loss": 0.03609653, "step": 18315 }, { "epoch": 36.632, "grad_norm": 1.202167272567749, "learning_rate": 2e-05, "loss": 0.04706704, "step": 18316 }, { "epoch": 36.634, "grad_norm": 1.1365604400634766, "learning_rate": 2e-05, "loss": 0.02697523, "step": 18317 }, { "epoch": 36.636, "grad_norm": 2.451118230819702, "learning_rate": 2e-05, "loss": 0.04429175, "step": 18318 }, { "epoch": 36.638, "grad_norm": 1.0182795524597168, "learning_rate": 2e-05, "loss": 0.03436865, "step": 18319 }, { "epoch": 36.64, "grad_norm": 1.2875375747680664, "learning_rate": 2e-05, "loss": 0.0550098, "step": 18320 }, { "epoch": 36.642, "grad_norm": 1.559504508972168, "learning_rate": 2e-05, "loss": 0.05564836, "step": 18321 }, { "epoch": 36.644, "grad_norm": 1.1889086961746216, "learning_rate": 2e-05, "loss": 0.03784901, "step": 18322 }, { "epoch": 36.646, "grad_norm": 0.8898120522499084, "learning_rate": 2e-05, "loss": 0.0278656, "step": 18323 }, { "epoch": 36.648, "grad_norm": 1.1782766580581665, "learning_rate": 2e-05, "loss": 0.03811631, "step": 18324 }, { "epoch": 36.65, "grad_norm": 1.2543221712112427, "learning_rate": 2e-05, "loss": 0.03640786, "step": 18325 }, { "epoch": 36.652, "grad_norm": 1.0462356805801392, "learning_rate": 2e-05, "loss": 0.03829445, "step": 18326 }, { "epoch": 36.654, "grad_norm": 1.3017123937606812, "learning_rate": 2e-05, "loss": 0.03575253, "step": 18327 }, { "epoch": 36.656, "grad_norm": 1.342948317527771, "learning_rate": 2e-05, "loss": 0.03757192, "step": 18328 }, { "epoch": 36.658, "grad_norm": 1.066953420639038, "learning_rate": 2e-05, "loss": 0.04221145, "step": 18329 }, { "epoch": 36.66, "grad_norm": 1.3225663900375366, "learning_rate": 2e-05, "loss": 0.05248147, "step": 18330 }, { "epoch": 36.662, "grad_norm": 2.983029842376709, "learning_rate": 2e-05, "loss": 0.03616619, "step": 18331 }, { "epoch": 36.664, "grad_norm": 2.2221550941467285, "learning_rate": 2e-05, "loss": 0.04058126, "step": 18332 }, { "epoch": 36.666, "grad_norm": 1.315987467765808, "learning_rate": 2e-05, "loss": 0.03588897, "step": 18333 }, { "epoch": 36.668, "grad_norm": 0.9215741157531738, "learning_rate": 2e-05, "loss": 0.03925972, "step": 18334 }, { "epoch": 36.67, "grad_norm": 1.2016059160232544, "learning_rate": 2e-05, "loss": 0.05430229, "step": 18335 }, { "epoch": 36.672, "grad_norm": 0.921889066696167, "learning_rate": 2e-05, "loss": 0.03469507, "step": 18336 }, { "epoch": 36.674, "grad_norm": 0.9315686821937561, "learning_rate": 2e-05, "loss": 0.02187958, "step": 18337 }, { "epoch": 36.676, "grad_norm": 0.9967896938323975, "learning_rate": 2e-05, "loss": 0.0331551, "step": 18338 }, { "epoch": 36.678, "grad_norm": 1.1743967533111572, "learning_rate": 2e-05, "loss": 0.04423882, "step": 18339 }, { "epoch": 36.68, "grad_norm": 1.2126519680023193, "learning_rate": 2e-05, "loss": 0.03514692, "step": 18340 }, { "epoch": 36.682, "grad_norm": 1.3199232816696167, "learning_rate": 2e-05, "loss": 0.03419685, "step": 18341 }, { "epoch": 36.684, "grad_norm": 1.7518872022628784, "learning_rate": 2e-05, "loss": 0.05697691, "step": 18342 }, { "epoch": 36.686, "grad_norm": 1.0076147317886353, "learning_rate": 2e-05, "loss": 0.0281866, "step": 18343 }, { "epoch": 36.688, "grad_norm": 1.5172227621078491, "learning_rate": 2e-05, "loss": 0.05067704, "step": 18344 }, { "epoch": 36.69, "grad_norm": 1.6824908256530762, "learning_rate": 2e-05, "loss": 0.04503788, "step": 18345 }, { "epoch": 36.692, "grad_norm": 1.3745967149734497, "learning_rate": 2e-05, "loss": 0.0483752, "step": 18346 }, { "epoch": 36.694, "grad_norm": 1.2463769912719727, "learning_rate": 2e-05, "loss": 0.05347317, "step": 18347 }, { "epoch": 36.696, "grad_norm": 1.0087859630584717, "learning_rate": 2e-05, "loss": 0.03465716, "step": 18348 }, { "epoch": 36.698, "grad_norm": 1.2997995615005493, "learning_rate": 2e-05, "loss": 0.03587429, "step": 18349 }, { "epoch": 36.7, "grad_norm": 1.1634178161621094, "learning_rate": 2e-05, "loss": 0.02643599, "step": 18350 }, { "epoch": 36.702, "grad_norm": 1.2005338668823242, "learning_rate": 2e-05, "loss": 0.04984972, "step": 18351 }, { "epoch": 36.704, "grad_norm": 0.9318526387214661, "learning_rate": 2e-05, "loss": 0.02411564, "step": 18352 }, { "epoch": 36.706, "grad_norm": 1.1339325904846191, "learning_rate": 2e-05, "loss": 0.03448115, "step": 18353 }, { "epoch": 36.708, "grad_norm": 1.1679779291152954, "learning_rate": 2e-05, "loss": 0.03647584, "step": 18354 }, { "epoch": 36.71, "grad_norm": 1.2104750871658325, "learning_rate": 2e-05, "loss": 0.0515867, "step": 18355 }, { "epoch": 36.712, "grad_norm": 2.100127935409546, "learning_rate": 2e-05, "loss": 0.05232673, "step": 18356 }, { "epoch": 36.714, "grad_norm": 1.754447340965271, "learning_rate": 2e-05, "loss": 0.04861075, "step": 18357 }, { "epoch": 36.716, "grad_norm": 1.008870005607605, "learning_rate": 2e-05, "loss": 0.03333037, "step": 18358 }, { "epoch": 36.718, "grad_norm": 1.7049437761306763, "learning_rate": 2e-05, "loss": 0.03618935, "step": 18359 }, { "epoch": 36.72, "grad_norm": 1.8496776819229126, "learning_rate": 2e-05, "loss": 0.05417331, "step": 18360 }, { "epoch": 36.722, "grad_norm": 1.0589567422866821, "learning_rate": 2e-05, "loss": 0.02868169, "step": 18361 }, { "epoch": 36.724, "grad_norm": 0.9607617259025574, "learning_rate": 2e-05, "loss": 0.03263365, "step": 18362 }, { "epoch": 36.726, "grad_norm": 2.0316452980041504, "learning_rate": 2e-05, "loss": 0.04388903, "step": 18363 }, { "epoch": 36.728, "grad_norm": 1.2382222414016724, "learning_rate": 2e-05, "loss": 0.04471454, "step": 18364 }, { "epoch": 36.73, "grad_norm": 1.287028193473816, "learning_rate": 2e-05, "loss": 0.04440287, "step": 18365 }, { "epoch": 36.732, "grad_norm": 1.2623907327651978, "learning_rate": 2e-05, "loss": 0.03233304, "step": 18366 }, { "epoch": 36.734, "grad_norm": 1.9164531230926514, "learning_rate": 2e-05, "loss": 0.04776657, "step": 18367 }, { "epoch": 36.736, "grad_norm": 1.3518916368484497, "learning_rate": 2e-05, "loss": 0.06090958, "step": 18368 }, { "epoch": 36.738, "grad_norm": 1.8850350379943848, "learning_rate": 2e-05, "loss": 0.03893983, "step": 18369 }, { "epoch": 36.74, "grad_norm": 1.0571264028549194, "learning_rate": 2e-05, "loss": 0.03137238, "step": 18370 }, { "epoch": 36.742, "grad_norm": 1.1914355754852295, "learning_rate": 2e-05, "loss": 0.03433579, "step": 18371 }, { "epoch": 36.744, "grad_norm": 0.8919147849082947, "learning_rate": 2e-05, "loss": 0.0218019, "step": 18372 }, { "epoch": 36.746, "grad_norm": 1.1925781965255737, "learning_rate": 2e-05, "loss": 0.05873271, "step": 18373 }, { "epoch": 36.748, "grad_norm": 1.1038137674331665, "learning_rate": 2e-05, "loss": 0.04641431, "step": 18374 }, { "epoch": 36.75, "grad_norm": 0.965733528137207, "learning_rate": 2e-05, "loss": 0.02856233, "step": 18375 }, { "epoch": 36.752, "grad_norm": 1.5396867990493774, "learning_rate": 2e-05, "loss": 0.04703198, "step": 18376 }, { "epoch": 36.754, "grad_norm": 2.1341171264648438, "learning_rate": 2e-05, "loss": 0.04486314, "step": 18377 }, { "epoch": 36.756, "grad_norm": 1.7395333051681519, "learning_rate": 2e-05, "loss": 0.04411499, "step": 18378 }, { "epoch": 36.758, "grad_norm": 1.706061840057373, "learning_rate": 2e-05, "loss": 0.03239989, "step": 18379 }, { "epoch": 36.76, "grad_norm": 1.6504026651382446, "learning_rate": 2e-05, "loss": 0.03501267, "step": 18380 }, { "epoch": 36.762, "grad_norm": 1.3423389196395874, "learning_rate": 2e-05, "loss": 0.05261109, "step": 18381 }, { "epoch": 36.764, "grad_norm": 1.3317161798477173, "learning_rate": 2e-05, "loss": 0.04133341, "step": 18382 }, { "epoch": 36.766, "grad_norm": 1.6479923725128174, "learning_rate": 2e-05, "loss": 0.04605314, "step": 18383 }, { "epoch": 36.768, "grad_norm": 1.1072478294372559, "learning_rate": 2e-05, "loss": 0.03600177, "step": 18384 }, { "epoch": 36.77, "grad_norm": 1.1794970035552979, "learning_rate": 2e-05, "loss": 0.05206569, "step": 18385 }, { "epoch": 36.772, "grad_norm": 1.075430989265442, "learning_rate": 2e-05, "loss": 0.03614403, "step": 18386 }, { "epoch": 36.774, "grad_norm": 1.2344156503677368, "learning_rate": 2e-05, "loss": 0.06035028, "step": 18387 }, { "epoch": 36.776, "grad_norm": 1.0581718683242798, "learning_rate": 2e-05, "loss": 0.0294181, "step": 18388 }, { "epoch": 36.778, "grad_norm": 1.7531042098999023, "learning_rate": 2e-05, "loss": 0.04625558, "step": 18389 }, { "epoch": 36.78, "grad_norm": 1.1771091222763062, "learning_rate": 2e-05, "loss": 0.04027297, "step": 18390 }, { "epoch": 36.782, "grad_norm": 0.9724439978599548, "learning_rate": 2e-05, "loss": 0.03231468, "step": 18391 }, { "epoch": 36.784, "grad_norm": 1.1481809616088867, "learning_rate": 2e-05, "loss": 0.03959997, "step": 18392 }, { "epoch": 36.786, "grad_norm": 1.2926446199417114, "learning_rate": 2e-05, "loss": 0.05425737, "step": 18393 }, { "epoch": 36.788, "grad_norm": 1.4414113759994507, "learning_rate": 2e-05, "loss": 0.04928648, "step": 18394 }, { "epoch": 36.79, "grad_norm": 1.5177888870239258, "learning_rate": 2e-05, "loss": 0.03690218, "step": 18395 }, { "epoch": 36.792, "grad_norm": 1.5356523990631104, "learning_rate": 2e-05, "loss": 0.04718888, "step": 18396 }, { "epoch": 36.794, "grad_norm": 2.2311973571777344, "learning_rate": 2e-05, "loss": 0.03536499, "step": 18397 }, { "epoch": 36.796, "grad_norm": 2.834324836730957, "learning_rate": 2e-05, "loss": 0.04099808, "step": 18398 }, { "epoch": 36.798, "grad_norm": 1.1920080184936523, "learning_rate": 2e-05, "loss": 0.04433641, "step": 18399 }, { "epoch": 36.8, "grad_norm": 1.1526068449020386, "learning_rate": 2e-05, "loss": 0.04516728, "step": 18400 }, { "epoch": 36.802, "grad_norm": 1.2687426805496216, "learning_rate": 2e-05, "loss": 0.03586322, "step": 18401 }, { "epoch": 36.804, "grad_norm": 0.9818292856216431, "learning_rate": 2e-05, "loss": 0.02395935, "step": 18402 }, { "epoch": 36.806, "grad_norm": 2.2147302627563477, "learning_rate": 2e-05, "loss": 0.04670376, "step": 18403 }, { "epoch": 36.808, "grad_norm": 1.6681506633758545, "learning_rate": 2e-05, "loss": 0.05808145, "step": 18404 }, { "epoch": 36.81, "grad_norm": 1.1924699544906616, "learning_rate": 2e-05, "loss": 0.04742082, "step": 18405 }, { "epoch": 36.812, "grad_norm": 2.0133984088897705, "learning_rate": 2e-05, "loss": 0.03901449, "step": 18406 }, { "epoch": 36.814, "grad_norm": 1.1701860427856445, "learning_rate": 2e-05, "loss": 0.02848022, "step": 18407 }, { "epoch": 36.816, "grad_norm": 1.089707374572754, "learning_rate": 2e-05, "loss": 0.04020487, "step": 18408 }, { "epoch": 36.818, "grad_norm": 1.0396581888198853, "learning_rate": 2e-05, "loss": 0.03704511, "step": 18409 }, { "epoch": 36.82, "grad_norm": 1.6695880889892578, "learning_rate": 2e-05, "loss": 0.0519683, "step": 18410 }, { "epoch": 36.822, "grad_norm": 1.1158127784729004, "learning_rate": 2e-05, "loss": 0.04286835, "step": 18411 }, { "epoch": 36.824, "grad_norm": 1.1745147705078125, "learning_rate": 2e-05, "loss": 0.03942792, "step": 18412 }, { "epoch": 36.826, "grad_norm": 0.8886756896972656, "learning_rate": 2e-05, "loss": 0.02771359, "step": 18413 }, { "epoch": 36.828, "grad_norm": 1.2267519235610962, "learning_rate": 2e-05, "loss": 0.0511631, "step": 18414 }, { "epoch": 36.83, "grad_norm": 2.179556369781494, "learning_rate": 2e-05, "loss": 0.06088002, "step": 18415 }, { "epoch": 36.832, "grad_norm": 1.2211542129516602, "learning_rate": 2e-05, "loss": 0.04303684, "step": 18416 }, { "epoch": 36.834, "grad_norm": 2.2510719299316406, "learning_rate": 2e-05, "loss": 0.05186936, "step": 18417 }, { "epoch": 36.836, "grad_norm": 1.2228981256484985, "learning_rate": 2e-05, "loss": 0.04879232, "step": 18418 }, { "epoch": 36.838, "grad_norm": 1.098894715309143, "learning_rate": 2e-05, "loss": 0.03979594, "step": 18419 }, { "epoch": 36.84, "grad_norm": 1.2254807949066162, "learning_rate": 2e-05, "loss": 0.03765118, "step": 18420 }, { "epoch": 36.842, "grad_norm": 1.175650954246521, "learning_rate": 2e-05, "loss": 0.04586347, "step": 18421 }, { "epoch": 36.844, "grad_norm": 1.22563636302948, "learning_rate": 2e-05, "loss": 0.0531856, "step": 18422 }, { "epoch": 36.846, "grad_norm": 1.2885513305664062, "learning_rate": 2e-05, "loss": 0.04552082, "step": 18423 }, { "epoch": 36.848, "grad_norm": 1.1295452117919922, "learning_rate": 2e-05, "loss": 0.0402256, "step": 18424 }, { "epoch": 36.85, "grad_norm": 1.0516687631607056, "learning_rate": 2e-05, "loss": 0.04456215, "step": 18425 }, { "epoch": 36.852, "grad_norm": 1.0733994245529175, "learning_rate": 2e-05, "loss": 0.03550437, "step": 18426 }, { "epoch": 36.854, "grad_norm": 1.1007059812545776, "learning_rate": 2e-05, "loss": 0.04091225, "step": 18427 }, { "epoch": 36.856, "grad_norm": 1.5547980070114136, "learning_rate": 2e-05, "loss": 0.04072155, "step": 18428 }, { "epoch": 36.858, "grad_norm": 1.1082590818405151, "learning_rate": 2e-05, "loss": 0.03597355, "step": 18429 }, { "epoch": 36.86, "grad_norm": 1.299734354019165, "learning_rate": 2e-05, "loss": 0.03989294, "step": 18430 }, { "epoch": 36.862, "grad_norm": 0.9722706079483032, "learning_rate": 2e-05, "loss": 0.03567673, "step": 18431 }, { "epoch": 36.864, "grad_norm": 1.2522664070129395, "learning_rate": 2e-05, "loss": 0.04869508, "step": 18432 }, { "epoch": 36.866, "grad_norm": 1.638421654701233, "learning_rate": 2e-05, "loss": 0.03735339, "step": 18433 }, { "epoch": 36.868, "grad_norm": 1.0237358808517456, "learning_rate": 2e-05, "loss": 0.04364359, "step": 18434 }, { "epoch": 36.87, "grad_norm": 1.2411482334136963, "learning_rate": 2e-05, "loss": 0.03947395, "step": 18435 }, { "epoch": 36.872, "grad_norm": 1.159613847732544, "learning_rate": 2e-05, "loss": 0.04042497, "step": 18436 }, { "epoch": 36.874, "grad_norm": 1.2544336318969727, "learning_rate": 2e-05, "loss": 0.0312546, "step": 18437 }, { "epoch": 36.876, "grad_norm": 1.9354642629623413, "learning_rate": 2e-05, "loss": 0.0527093, "step": 18438 }, { "epoch": 36.878, "grad_norm": 3.310471773147583, "learning_rate": 2e-05, "loss": 0.04944807, "step": 18439 }, { "epoch": 36.88, "grad_norm": 1.1568819284439087, "learning_rate": 2e-05, "loss": 0.0369446, "step": 18440 }, { "epoch": 36.882, "grad_norm": 1.2008459568023682, "learning_rate": 2e-05, "loss": 0.04868155, "step": 18441 }, { "epoch": 36.884, "grad_norm": 1.064301609992981, "learning_rate": 2e-05, "loss": 0.03515521, "step": 18442 }, { "epoch": 36.886, "grad_norm": 1.3205647468566895, "learning_rate": 2e-05, "loss": 0.03227288, "step": 18443 }, { "epoch": 36.888, "grad_norm": 1.5607926845550537, "learning_rate": 2e-05, "loss": 0.04700986, "step": 18444 }, { "epoch": 36.89, "grad_norm": 1.1326090097427368, "learning_rate": 2e-05, "loss": 0.03524204, "step": 18445 }, { "epoch": 36.892, "grad_norm": 0.9644931554794312, "learning_rate": 2e-05, "loss": 0.0342176, "step": 18446 }, { "epoch": 36.894, "grad_norm": 1.2272518873214722, "learning_rate": 2e-05, "loss": 0.03322006, "step": 18447 }, { "epoch": 36.896, "grad_norm": 1.061239242553711, "learning_rate": 2e-05, "loss": 0.03333382, "step": 18448 }, { "epoch": 36.898, "grad_norm": 1.1636090278625488, "learning_rate": 2e-05, "loss": 0.04022244, "step": 18449 }, { "epoch": 36.9, "grad_norm": 1.0282191038131714, "learning_rate": 2e-05, "loss": 0.03084, "step": 18450 }, { "epoch": 36.902, "grad_norm": 2.087050199508667, "learning_rate": 2e-05, "loss": 0.0529213, "step": 18451 }, { "epoch": 36.904, "grad_norm": 1.0408107042312622, "learning_rate": 2e-05, "loss": 0.04363842, "step": 18452 }, { "epoch": 36.906, "grad_norm": 1.284910798072815, "learning_rate": 2e-05, "loss": 0.0489226, "step": 18453 }, { "epoch": 36.908, "grad_norm": 1.6543158292770386, "learning_rate": 2e-05, "loss": 0.06356652, "step": 18454 }, { "epoch": 36.91, "grad_norm": 1.2143512964248657, "learning_rate": 2e-05, "loss": 0.05602705, "step": 18455 }, { "epoch": 36.912, "grad_norm": 1.095168113708496, "learning_rate": 2e-05, "loss": 0.04102691, "step": 18456 }, { "epoch": 36.914, "grad_norm": 1.4219430685043335, "learning_rate": 2e-05, "loss": 0.04681015, "step": 18457 }, { "epoch": 36.916, "grad_norm": 1.1736583709716797, "learning_rate": 2e-05, "loss": 0.04344864, "step": 18458 }, { "epoch": 36.918, "grad_norm": 0.9385769963264465, "learning_rate": 2e-05, "loss": 0.02884827, "step": 18459 }, { "epoch": 36.92, "grad_norm": 1.4176242351531982, "learning_rate": 2e-05, "loss": 0.06416596, "step": 18460 }, { "epoch": 36.922, "grad_norm": 1.288359522819519, "learning_rate": 2e-05, "loss": 0.04654737, "step": 18461 }, { "epoch": 36.924, "grad_norm": 1.1389371156692505, "learning_rate": 2e-05, "loss": 0.04628037, "step": 18462 }, { "epoch": 36.926, "grad_norm": 1.098805546760559, "learning_rate": 2e-05, "loss": 0.03729876, "step": 18463 }, { "epoch": 36.928, "grad_norm": 1.4324229955673218, "learning_rate": 2e-05, "loss": 0.0529637, "step": 18464 }, { "epoch": 36.93, "grad_norm": 1.4143197536468506, "learning_rate": 2e-05, "loss": 0.03819642, "step": 18465 }, { "epoch": 36.932, "grad_norm": 1.4157121181488037, "learning_rate": 2e-05, "loss": 0.04541536, "step": 18466 }, { "epoch": 36.934, "grad_norm": 1.393878698348999, "learning_rate": 2e-05, "loss": 0.04265588, "step": 18467 }, { "epoch": 36.936, "grad_norm": 1.0789780616760254, "learning_rate": 2e-05, "loss": 0.04042588, "step": 18468 }, { "epoch": 36.938, "grad_norm": 1.6265769004821777, "learning_rate": 2e-05, "loss": 0.05098584, "step": 18469 }, { "epoch": 36.94, "grad_norm": 1.1690245866775513, "learning_rate": 2e-05, "loss": 0.04554411, "step": 18470 }, { "epoch": 36.942, "grad_norm": 1.3794010877609253, "learning_rate": 2e-05, "loss": 0.044034, "step": 18471 }, { "epoch": 36.944, "grad_norm": 0.8413645625114441, "learning_rate": 2e-05, "loss": 0.02523428, "step": 18472 }, { "epoch": 36.946, "grad_norm": 0.9403594732284546, "learning_rate": 2e-05, "loss": 0.03138512, "step": 18473 }, { "epoch": 36.948, "grad_norm": 1.1274261474609375, "learning_rate": 2e-05, "loss": 0.0406789, "step": 18474 }, { "epoch": 36.95, "grad_norm": 1.4938697814941406, "learning_rate": 2e-05, "loss": 0.05272559, "step": 18475 }, { "epoch": 36.952, "grad_norm": 1.0316252708435059, "learning_rate": 2e-05, "loss": 0.03562522, "step": 18476 }, { "epoch": 36.954, "grad_norm": 1.3863626718521118, "learning_rate": 2e-05, "loss": 0.03377339, "step": 18477 }, { "epoch": 36.956, "grad_norm": 1.7842824459075928, "learning_rate": 2e-05, "loss": 0.05903577, "step": 18478 }, { "epoch": 36.958, "grad_norm": 1.35276198387146, "learning_rate": 2e-05, "loss": 0.03431714, "step": 18479 }, { "epoch": 36.96, "grad_norm": 1.2699631452560425, "learning_rate": 2e-05, "loss": 0.0535304, "step": 18480 }, { "epoch": 36.962, "grad_norm": 1.2348072528839111, "learning_rate": 2e-05, "loss": 0.04691994, "step": 18481 }, { "epoch": 36.964, "grad_norm": 0.9137774705886841, "learning_rate": 2e-05, "loss": 0.0308315, "step": 18482 }, { "epoch": 36.966, "grad_norm": 1.0427780151367188, "learning_rate": 2e-05, "loss": 0.0346647, "step": 18483 }, { "epoch": 36.968, "grad_norm": 2.189744472503662, "learning_rate": 2e-05, "loss": 0.03301712, "step": 18484 }, { "epoch": 36.97, "grad_norm": 1.0989826917648315, "learning_rate": 2e-05, "loss": 0.02908636, "step": 18485 }, { "epoch": 36.972, "grad_norm": 1.6174060106277466, "learning_rate": 2e-05, "loss": 0.04544824, "step": 18486 }, { "epoch": 36.974, "grad_norm": 1.2668603658676147, "learning_rate": 2e-05, "loss": 0.05053727, "step": 18487 }, { "epoch": 36.976, "grad_norm": 1.0576096773147583, "learning_rate": 2e-05, "loss": 0.03743074, "step": 18488 }, { "epoch": 36.978, "grad_norm": 1.1801321506500244, "learning_rate": 2e-05, "loss": 0.03683414, "step": 18489 }, { "epoch": 36.98, "grad_norm": 1.1503815650939941, "learning_rate": 2e-05, "loss": 0.03822848, "step": 18490 }, { "epoch": 36.982, "grad_norm": 1.3549894094467163, "learning_rate": 2e-05, "loss": 0.05866414, "step": 18491 }, { "epoch": 36.984, "grad_norm": 1.0250877141952515, "learning_rate": 2e-05, "loss": 0.03789775, "step": 18492 }, { "epoch": 36.986, "grad_norm": 1.0828768014907837, "learning_rate": 2e-05, "loss": 0.04611751, "step": 18493 }, { "epoch": 36.988, "grad_norm": 1.385727882385254, "learning_rate": 2e-05, "loss": 0.05194426, "step": 18494 }, { "epoch": 36.99, "grad_norm": 1.17097008228302, "learning_rate": 2e-05, "loss": 0.05635056, "step": 18495 }, { "epoch": 36.992, "grad_norm": 1.1106641292572021, "learning_rate": 2e-05, "loss": 0.04129127, "step": 18496 }, { "epoch": 36.994, "grad_norm": 1.0685235261917114, "learning_rate": 2e-05, "loss": 0.04287387, "step": 18497 }, { "epoch": 36.996, "grad_norm": 1.188862919807434, "learning_rate": 2e-05, "loss": 0.04415998, "step": 18498 }, { "epoch": 36.998, "grad_norm": 0.956224799156189, "learning_rate": 2e-05, "loss": 0.03282576, "step": 18499 }, { "epoch": 37.0, "grad_norm": 1.2222694158554077, "learning_rate": 2e-05, "loss": 0.0414125, "step": 18500 }, { "epoch": 37.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.994, "Equal_2": 0.9880239520958084, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.994, "Perpendicular_1": 0.996, "Perpendicular_2": 0.988, "Perpendicular_3": 0.9018036072144289, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.996, "PointLiesOnLine_1": 1.0, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 323.3803, "eval_samples_per_second": 32.47, "eval_steps_per_second": 0.649, "step": 18500 }, { "epoch": 37.002, "grad_norm": 1.2086259126663208, "learning_rate": 2e-05, "loss": 0.05474807, "step": 18501 }, { "epoch": 37.004, "grad_norm": 1.1692421436309814, "learning_rate": 2e-05, "loss": 0.04277514, "step": 18502 }, { "epoch": 37.006, "grad_norm": 1.1070613861083984, "learning_rate": 2e-05, "loss": 0.03776816, "step": 18503 }, { "epoch": 37.008, "grad_norm": 1.263321876525879, "learning_rate": 2e-05, "loss": 0.04286732, "step": 18504 }, { "epoch": 37.01, "grad_norm": 1.0955123901367188, "learning_rate": 2e-05, "loss": 0.02977836, "step": 18505 }, { "epoch": 37.012, "grad_norm": 1.2032184600830078, "learning_rate": 2e-05, "loss": 0.03929453, "step": 18506 }, { "epoch": 37.014, "grad_norm": 1.0266116857528687, "learning_rate": 2e-05, "loss": 0.02832175, "step": 18507 }, { "epoch": 37.016, "grad_norm": 1.862282633781433, "learning_rate": 2e-05, "loss": 0.04781446, "step": 18508 }, { "epoch": 37.018, "grad_norm": 1.4959344863891602, "learning_rate": 2e-05, "loss": 0.05474871, "step": 18509 }, { "epoch": 37.02, "grad_norm": 2.0433266162872314, "learning_rate": 2e-05, "loss": 0.05582491, "step": 18510 }, { "epoch": 37.022, "grad_norm": 1.0982203483581543, "learning_rate": 2e-05, "loss": 0.03969976, "step": 18511 }, { "epoch": 37.024, "grad_norm": 1.8421516418457031, "learning_rate": 2e-05, "loss": 0.05257302, "step": 18512 }, { "epoch": 37.026, "grad_norm": 1.0704379081726074, "learning_rate": 2e-05, "loss": 0.03272648, "step": 18513 }, { "epoch": 37.028, "grad_norm": 1.063231110572815, "learning_rate": 2e-05, "loss": 0.03291452, "step": 18514 }, { "epoch": 37.03, "grad_norm": 1.1486153602600098, "learning_rate": 2e-05, "loss": 0.04854581, "step": 18515 }, { "epoch": 37.032, "grad_norm": 1.0761146545410156, "learning_rate": 2e-05, "loss": 0.04210125, "step": 18516 }, { "epoch": 37.034, "grad_norm": 1.477710247039795, "learning_rate": 2e-05, "loss": 0.05219606, "step": 18517 }, { "epoch": 37.036, "grad_norm": 1.0120596885681152, "learning_rate": 2e-05, "loss": 0.02972842, "step": 18518 }, { "epoch": 37.038, "grad_norm": 1.341340184211731, "learning_rate": 2e-05, "loss": 0.03905867, "step": 18519 }, { "epoch": 37.04, "grad_norm": 1.8006370067596436, "learning_rate": 2e-05, "loss": 0.04028159, "step": 18520 }, { "epoch": 37.042, "grad_norm": 1.06367027759552, "learning_rate": 2e-05, "loss": 0.0451144, "step": 18521 }, { "epoch": 37.044, "grad_norm": 1.3736926317214966, "learning_rate": 2e-05, "loss": 0.05268521, "step": 18522 }, { "epoch": 37.046, "grad_norm": 3.3430514335632324, "learning_rate": 2e-05, "loss": 0.05855541, "step": 18523 }, { "epoch": 37.048, "grad_norm": 1.0005395412445068, "learning_rate": 2e-05, "loss": 0.03178389, "step": 18524 }, { "epoch": 37.05, "grad_norm": 1.4999123811721802, "learning_rate": 2e-05, "loss": 0.03547132, "step": 18525 }, { "epoch": 37.052, "grad_norm": 1.2250981330871582, "learning_rate": 2e-05, "loss": 0.04226706, "step": 18526 }, { "epoch": 37.054, "grad_norm": 1.1662254333496094, "learning_rate": 2e-05, "loss": 0.03948696, "step": 18527 }, { "epoch": 37.056, "grad_norm": 1.1504510641098022, "learning_rate": 2e-05, "loss": 0.04601368, "step": 18528 }, { "epoch": 37.058, "grad_norm": 1.0720382928848267, "learning_rate": 2e-05, "loss": 0.03848777, "step": 18529 }, { "epoch": 37.06, "grad_norm": 1.3089898824691772, "learning_rate": 2e-05, "loss": 0.03624338, "step": 18530 }, { "epoch": 37.062, "grad_norm": 1.2138144969940186, "learning_rate": 2e-05, "loss": 0.0507899, "step": 18531 }, { "epoch": 37.064, "grad_norm": 1.297494888305664, "learning_rate": 2e-05, "loss": 0.04721798, "step": 18532 }, { "epoch": 37.066, "grad_norm": 1.2399834394454956, "learning_rate": 2e-05, "loss": 0.04290456, "step": 18533 }, { "epoch": 37.068, "grad_norm": 1.258151888847351, "learning_rate": 2e-05, "loss": 0.03323229, "step": 18534 }, { "epoch": 37.07, "grad_norm": 2.544591188430786, "learning_rate": 2e-05, "loss": 0.04624221, "step": 18535 }, { "epoch": 37.072, "grad_norm": 1.0994566679000854, "learning_rate": 2e-05, "loss": 0.03203875, "step": 18536 }, { "epoch": 37.074, "grad_norm": 1.5202358961105347, "learning_rate": 2e-05, "loss": 0.04309528, "step": 18537 }, { "epoch": 37.076, "grad_norm": 1.162284255027771, "learning_rate": 2e-05, "loss": 0.04106382, "step": 18538 }, { "epoch": 37.078, "grad_norm": 1.5119906663894653, "learning_rate": 2e-05, "loss": 0.05221222, "step": 18539 }, { "epoch": 37.08, "grad_norm": 1.0148025751113892, "learning_rate": 2e-05, "loss": 0.04121029, "step": 18540 }, { "epoch": 37.082, "grad_norm": 1.2499613761901855, "learning_rate": 2e-05, "loss": 0.05167337, "step": 18541 }, { "epoch": 37.084, "grad_norm": 1.3583709001541138, "learning_rate": 2e-05, "loss": 0.04471046, "step": 18542 }, { "epoch": 37.086, "grad_norm": 2.9629805088043213, "learning_rate": 2e-05, "loss": 0.04842234, "step": 18543 }, { "epoch": 37.088, "grad_norm": 1.327346920967102, "learning_rate": 2e-05, "loss": 0.05041461, "step": 18544 }, { "epoch": 37.09, "grad_norm": 1.7716262340545654, "learning_rate": 2e-05, "loss": 0.03213685, "step": 18545 }, { "epoch": 37.092, "grad_norm": 1.3329129219055176, "learning_rate": 2e-05, "loss": 0.05127243, "step": 18546 }, { "epoch": 37.094, "grad_norm": 1.2340625524520874, "learning_rate": 2e-05, "loss": 0.04882501, "step": 18547 }, { "epoch": 37.096, "grad_norm": 1.5020076036453247, "learning_rate": 2e-05, "loss": 0.05312416, "step": 18548 }, { "epoch": 37.098, "grad_norm": 1.0506970882415771, "learning_rate": 2e-05, "loss": 0.04135638, "step": 18549 }, { "epoch": 37.1, "grad_norm": 0.8881466388702393, "learning_rate": 2e-05, "loss": 0.02312918, "step": 18550 }, { "epoch": 37.102, "grad_norm": 2.499708652496338, "learning_rate": 2e-05, "loss": 0.05453001, "step": 18551 }, { "epoch": 37.104, "grad_norm": 0.9243218302726746, "learning_rate": 2e-05, "loss": 0.02658354, "step": 18552 }, { "epoch": 37.106, "grad_norm": 0.8885984420776367, "learning_rate": 2e-05, "loss": 0.03515668, "step": 18553 }, { "epoch": 37.108, "grad_norm": 1.3641797304153442, "learning_rate": 2e-05, "loss": 0.05175686, "step": 18554 }, { "epoch": 37.11, "grad_norm": 1.5672239065170288, "learning_rate": 2e-05, "loss": 0.05572821, "step": 18555 }, { "epoch": 37.112, "grad_norm": 1.1886248588562012, "learning_rate": 2e-05, "loss": 0.03903022, "step": 18556 }, { "epoch": 37.114, "grad_norm": 2.1297879219055176, "learning_rate": 2e-05, "loss": 0.04060325, "step": 18557 }, { "epoch": 37.116, "grad_norm": 1.0565165281295776, "learning_rate": 2e-05, "loss": 0.03858557, "step": 18558 }, { "epoch": 37.118, "grad_norm": 0.9887270927429199, "learning_rate": 2e-05, "loss": 0.0331764, "step": 18559 }, { "epoch": 37.12, "grad_norm": 1.3902815580368042, "learning_rate": 2e-05, "loss": 0.04316443, "step": 18560 }, { "epoch": 37.122, "grad_norm": 1.7422220706939697, "learning_rate": 2e-05, "loss": 0.05364904, "step": 18561 }, { "epoch": 37.124, "grad_norm": 1.6973824501037598, "learning_rate": 2e-05, "loss": 0.06084717, "step": 18562 }, { "epoch": 37.126, "grad_norm": 1.2839064598083496, "learning_rate": 2e-05, "loss": 0.04210863, "step": 18563 }, { "epoch": 37.128, "grad_norm": 1.687252402305603, "learning_rate": 2e-05, "loss": 0.03391319, "step": 18564 }, { "epoch": 37.13, "grad_norm": 1.2945353984832764, "learning_rate": 2e-05, "loss": 0.04598271, "step": 18565 }, { "epoch": 37.132, "grad_norm": 1.3326228857040405, "learning_rate": 2e-05, "loss": 0.04093219, "step": 18566 }, { "epoch": 37.134, "grad_norm": 2.1192924976348877, "learning_rate": 2e-05, "loss": 0.03747287, "step": 18567 }, { "epoch": 37.136, "grad_norm": 0.9054780006408691, "learning_rate": 2e-05, "loss": 0.02849467, "step": 18568 }, { "epoch": 37.138, "grad_norm": 1.0603820085525513, "learning_rate": 2e-05, "loss": 0.0402334, "step": 18569 }, { "epoch": 37.14, "grad_norm": 1.367003083229065, "learning_rate": 2e-05, "loss": 0.05233489, "step": 18570 }, { "epoch": 37.142, "grad_norm": 0.9761447906494141, "learning_rate": 2e-05, "loss": 0.03226305, "step": 18571 }, { "epoch": 37.144, "grad_norm": 1.0491434335708618, "learning_rate": 2e-05, "loss": 0.03535546, "step": 18572 }, { "epoch": 37.146, "grad_norm": 1.1767827272415161, "learning_rate": 2e-05, "loss": 0.0434572, "step": 18573 }, { "epoch": 37.148, "grad_norm": 1.0024957656860352, "learning_rate": 2e-05, "loss": 0.03991302, "step": 18574 }, { "epoch": 37.15, "grad_norm": 1.4435670375823975, "learning_rate": 2e-05, "loss": 0.04887334, "step": 18575 }, { "epoch": 37.152, "grad_norm": 1.0476168394088745, "learning_rate": 2e-05, "loss": 0.03045881, "step": 18576 }, { "epoch": 37.154, "grad_norm": 3.0771420001983643, "learning_rate": 2e-05, "loss": 0.05882487, "step": 18577 }, { "epoch": 37.156, "grad_norm": 1.1227463483810425, "learning_rate": 2e-05, "loss": 0.04296419, "step": 18578 }, { "epoch": 37.158, "grad_norm": 0.8635389804840088, "learning_rate": 2e-05, "loss": 0.02800985, "step": 18579 }, { "epoch": 37.16, "grad_norm": 1.0923333168029785, "learning_rate": 2e-05, "loss": 0.03294976, "step": 18580 }, { "epoch": 37.162, "grad_norm": 1.2803630828857422, "learning_rate": 2e-05, "loss": 0.05049956, "step": 18581 }, { "epoch": 37.164, "grad_norm": 2.036414384841919, "learning_rate": 2e-05, "loss": 0.03803521, "step": 18582 }, { "epoch": 37.166, "grad_norm": 1.534833312034607, "learning_rate": 2e-05, "loss": 0.03264375, "step": 18583 }, { "epoch": 37.168, "grad_norm": 1.0317829847335815, "learning_rate": 2e-05, "loss": 0.02741832, "step": 18584 }, { "epoch": 37.17, "grad_norm": 1.0632944107055664, "learning_rate": 2e-05, "loss": 0.03481572, "step": 18585 }, { "epoch": 37.172, "grad_norm": 1.263419270515442, "learning_rate": 2e-05, "loss": 0.04441663, "step": 18586 }, { "epoch": 37.174, "grad_norm": 1.0531634092330933, "learning_rate": 2e-05, "loss": 0.03552748, "step": 18587 }, { "epoch": 37.176, "grad_norm": 1.569259524345398, "learning_rate": 2e-05, "loss": 0.05546115, "step": 18588 }, { "epoch": 37.178, "grad_norm": 1.3170031309127808, "learning_rate": 2e-05, "loss": 0.05026477, "step": 18589 }, { "epoch": 37.18, "grad_norm": 1.1640214920043945, "learning_rate": 2e-05, "loss": 0.04955488, "step": 18590 }, { "epoch": 37.182, "grad_norm": 1.6744143962860107, "learning_rate": 2e-05, "loss": 0.03792101, "step": 18591 }, { "epoch": 37.184, "grad_norm": 0.9118578433990479, "learning_rate": 2e-05, "loss": 0.02556996, "step": 18592 }, { "epoch": 37.186, "grad_norm": 1.644417643547058, "learning_rate": 2e-05, "loss": 0.04530199, "step": 18593 }, { "epoch": 37.188, "grad_norm": 1.7604995965957642, "learning_rate": 2e-05, "loss": 0.06252389, "step": 18594 }, { "epoch": 37.19, "grad_norm": 1.0273133516311646, "learning_rate": 2e-05, "loss": 0.04975293, "step": 18595 }, { "epoch": 37.192, "grad_norm": 1.05352783203125, "learning_rate": 2e-05, "loss": 0.04171479, "step": 18596 }, { "epoch": 37.194, "grad_norm": 0.9587264060974121, "learning_rate": 2e-05, "loss": 0.03576777, "step": 18597 }, { "epoch": 37.196, "grad_norm": 1.094084620475769, "learning_rate": 2e-05, "loss": 0.02543401, "step": 18598 }, { "epoch": 37.198, "grad_norm": 1.139223575592041, "learning_rate": 2e-05, "loss": 0.03940034, "step": 18599 }, { "epoch": 37.2, "grad_norm": 1.050518274307251, "learning_rate": 2e-05, "loss": 0.0513064, "step": 18600 }, { "epoch": 37.202, "grad_norm": 1.5931733846664429, "learning_rate": 2e-05, "loss": 0.03497151, "step": 18601 }, { "epoch": 37.204, "grad_norm": 1.2428843975067139, "learning_rate": 2e-05, "loss": 0.04900038, "step": 18602 }, { "epoch": 37.206, "grad_norm": 5.434542179107666, "learning_rate": 2e-05, "loss": 0.05245988, "step": 18603 }, { "epoch": 37.208, "grad_norm": 1.1642683744430542, "learning_rate": 2e-05, "loss": 0.05161486, "step": 18604 }, { "epoch": 37.21, "grad_norm": 1.0116956233978271, "learning_rate": 2e-05, "loss": 0.03130523, "step": 18605 }, { "epoch": 37.212, "grad_norm": 1.7632371187210083, "learning_rate": 2e-05, "loss": 0.04773512, "step": 18606 }, { "epoch": 37.214, "grad_norm": 0.9744578003883362, "learning_rate": 2e-05, "loss": 0.03005046, "step": 18607 }, { "epoch": 37.216, "grad_norm": 1.5116866827011108, "learning_rate": 2e-05, "loss": 0.04617791, "step": 18608 }, { "epoch": 37.218, "grad_norm": 2.665987730026245, "learning_rate": 2e-05, "loss": 0.03156227, "step": 18609 }, { "epoch": 37.22, "grad_norm": 1.0697249174118042, "learning_rate": 2e-05, "loss": 0.03917909, "step": 18610 }, { "epoch": 37.222, "grad_norm": 1.0376712083816528, "learning_rate": 2e-05, "loss": 0.04070775, "step": 18611 }, { "epoch": 37.224, "grad_norm": 1.1940553188323975, "learning_rate": 2e-05, "loss": 0.03836515, "step": 18612 }, { "epoch": 37.226, "grad_norm": 0.9701217412948608, "learning_rate": 2e-05, "loss": 0.03590652, "step": 18613 }, { "epoch": 37.228, "grad_norm": 1.2851488590240479, "learning_rate": 2e-05, "loss": 0.04535402, "step": 18614 }, { "epoch": 37.23, "grad_norm": 2.4411749839782715, "learning_rate": 2e-05, "loss": 0.06194766, "step": 18615 }, { "epoch": 37.232, "grad_norm": 1.3536734580993652, "learning_rate": 2e-05, "loss": 0.04599462, "step": 18616 }, { "epoch": 37.234, "grad_norm": 1.2606672048568726, "learning_rate": 2e-05, "loss": 0.04580709, "step": 18617 }, { "epoch": 37.236, "grad_norm": 1.3303290605545044, "learning_rate": 2e-05, "loss": 0.04706927, "step": 18618 }, { "epoch": 37.238, "grad_norm": 1.4099715948104858, "learning_rate": 2e-05, "loss": 0.02947021, "step": 18619 }, { "epoch": 37.24, "grad_norm": 1.0826631784439087, "learning_rate": 2e-05, "loss": 0.03750202, "step": 18620 }, { "epoch": 37.242, "grad_norm": 1.1417129039764404, "learning_rate": 2e-05, "loss": 0.05023696, "step": 18621 }, { "epoch": 37.244, "grad_norm": 1.0746430158615112, "learning_rate": 2e-05, "loss": 0.04541557, "step": 18622 }, { "epoch": 37.246, "grad_norm": 1.4391956329345703, "learning_rate": 2e-05, "loss": 0.05088585, "step": 18623 }, { "epoch": 37.248, "grad_norm": 1.7742950916290283, "learning_rate": 2e-05, "loss": 0.04011762, "step": 18624 }, { "epoch": 37.25, "grad_norm": 2.4555623531341553, "learning_rate": 2e-05, "loss": 0.04191405, "step": 18625 }, { "epoch": 37.252, "grad_norm": 1.0835415124893188, "learning_rate": 2e-05, "loss": 0.04408552, "step": 18626 }, { "epoch": 37.254, "grad_norm": 1.494162917137146, "learning_rate": 2e-05, "loss": 0.0393875, "step": 18627 }, { "epoch": 37.256, "grad_norm": 0.9933038353919983, "learning_rate": 2e-05, "loss": 0.0362031, "step": 18628 }, { "epoch": 37.258, "grad_norm": 1.1506096124649048, "learning_rate": 2e-05, "loss": 0.0375497, "step": 18629 }, { "epoch": 37.26, "grad_norm": 1.341207504272461, "learning_rate": 2e-05, "loss": 0.04895544, "step": 18630 }, { "epoch": 37.262, "grad_norm": 1.4760468006134033, "learning_rate": 2e-05, "loss": 0.04239687, "step": 18631 }, { "epoch": 37.264, "grad_norm": 1.1099522113800049, "learning_rate": 2e-05, "loss": 0.05547397, "step": 18632 }, { "epoch": 37.266, "grad_norm": 1.1346218585968018, "learning_rate": 2e-05, "loss": 0.04982274, "step": 18633 }, { "epoch": 37.268, "grad_norm": 0.9855700731277466, "learning_rate": 2e-05, "loss": 0.03562504, "step": 18634 }, { "epoch": 37.27, "grad_norm": 1.561249017715454, "learning_rate": 2e-05, "loss": 0.05814129, "step": 18635 }, { "epoch": 37.272, "grad_norm": 1.0120232105255127, "learning_rate": 2e-05, "loss": 0.0441656, "step": 18636 }, { "epoch": 37.274, "grad_norm": 3.5975611209869385, "learning_rate": 2e-05, "loss": 0.03757409, "step": 18637 }, { "epoch": 37.276, "grad_norm": 1.7085171937942505, "learning_rate": 2e-05, "loss": 0.03554228, "step": 18638 }, { "epoch": 37.278, "grad_norm": 1.2262259721755981, "learning_rate": 2e-05, "loss": 0.02165484, "step": 18639 }, { "epoch": 37.28, "grad_norm": 1.2670469284057617, "learning_rate": 2e-05, "loss": 0.03860283, "step": 18640 }, { "epoch": 37.282, "grad_norm": 1.1696661710739136, "learning_rate": 2e-05, "loss": 0.04609601, "step": 18641 }, { "epoch": 37.284, "grad_norm": 2.9907474517822266, "learning_rate": 2e-05, "loss": 0.05883395, "step": 18642 }, { "epoch": 37.286, "grad_norm": 1.062591552734375, "learning_rate": 2e-05, "loss": 0.04270145, "step": 18643 }, { "epoch": 37.288, "grad_norm": 1.022788166999817, "learning_rate": 2e-05, "loss": 0.02788961, "step": 18644 }, { "epoch": 37.29, "grad_norm": 1.3785815238952637, "learning_rate": 2e-05, "loss": 0.04528718, "step": 18645 }, { "epoch": 37.292, "grad_norm": 1.0437957048416138, "learning_rate": 2e-05, "loss": 0.03974991, "step": 18646 }, { "epoch": 37.294, "grad_norm": 1.1609976291656494, "learning_rate": 2e-05, "loss": 0.05013696, "step": 18647 }, { "epoch": 37.296, "grad_norm": 0.8697720170021057, "learning_rate": 2e-05, "loss": 0.03053538, "step": 18648 }, { "epoch": 37.298, "grad_norm": 0.9573113918304443, "learning_rate": 2e-05, "loss": 0.03847982, "step": 18649 }, { "epoch": 37.3, "grad_norm": 1.183646559715271, "learning_rate": 2e-05, "loss": 0.04569777, "step": 18650 }, { "epoch": 37.302, "grad_norm": 0.8802410364151001, "learning_rate": 2e-05, "loss": 0.02939706, "step": 18651 }, { "epoch": 37.304, "grad_norm": 1.2559164762496948, "learning_rate": 2e-05, "loss": 0.0539922, "step": 18652 }, { "epoch": 37.306, "grad_norm": 1.3703943490982056, "learning_rate": 2e-05, "loss": 0.04603429, "step": 18653 }, { "epoch": 37.308, "grad_norm": 1.1752663850784302, "learning_rate": 2e-05, "loss": 0.05147523, "step": 18654 }, { "epoch": 37.31, "grad_norm": 1.039732575416565, "learning_rate": 2e-05, "loss": 0.03672284, "step": 18655 }, { "epoch": 37.312, "grad_norm": 1.0590288639068604, "learning_rate": 2e-05, "loss": 0.04019672, "step": 18656 }, { "epoch": 37.314, "grad_norm": 1.8295365571975708, "learning_rate": 2e-05, "loss": 0.05494248, "step": 18657 }, { "epoch": 37.316, "grad_norm": 1.3013070821762085, "learning_rate": 2e-05, "loss": 0.03197949, "step": 18658 }, { "epoch": 37.318, "grad_norm": 1.4694881439208984, "learning_rate": 2e-05, "loss": 0.02851127, "step": 18659 }, { "epoch": 37.32, "grad_norm": 1.1061211824417114, "learning_rate": 2e-05, "loss": 0.03776077, "step": 18660 }, { "epoch": 37.322, "grad_norm": 0.9431129097938538, "learning_rate": 2e-05, "loss": 0.03077835, "step": 18661 }, { "epoch": 37.324, "grad_norm": 0.9578332901000977, "learning_rate": 2e-05, "loss": 0.02925043, "step": 18662 }, { "epoch": 37.326, "grad_norm": 1.0546810626983643, "learning_rate": 2e-05, "loss": 0.04426049, "step": 18663 }, { "epoch": 37.328, "grad_norm": 1.1375724077224731, "learning_rate": 2e-05, "loss": 0.02645444, "step": 18664 }, { "epoch": 37.33, "grad_norm": 1.3067609071731567, "learning_rate": 2e-05, "loss": 0.04584169, "step": 18665 }, { "epoch": 37.332, "grad_norm": 2.0529701709747314, "learning_rate": 2e-05, "loss": 0.04706659, "step": 18666 }, { "epoch": 37.334, "grad_norm": 1.4616862535476685, "learning_rate": 2e-05, "loss": 0.04822695, "step": 18667 }, { "epoch": 37.336, "grad_norm": 1.1066008806228638, "learning_rate": 2e-05, "loss": 0.03952828, "step": 18668 }, { "epoch": 37.338, "grad_norm": 2.8666927814483643, "learning_rate": 2e-05, "loss": 0.04431106, "step": 18669 }, { "epoch": 37.34, "grad_norm": 1.526808500289917, "learning_rate": 2e-05, "loss": 0.04228971, "step": 18670 }, { "epoch": 37.342, "grad_norm": 1.7227466106414795, "learning_rate": 2e-05, "loss": 0.03985681, "step": 18671 }, { "epoch": 37.344, "grad_norm": 0.960451602935791, "learning_rate": 2e-05, "loss": 0.02552288, "step": 18672 }, { "epoch": 37.346, "grad_norm": 1.1226481199264526, "learning_rate": 2e-05, "loss": 0.04143129, "step": 18673 }, { "epoch": 37.348, "grad_norm": 2.2680718898773193, "learning_rate": 2e-05, "loss": 0.05270278, "step": 18674 }, { "epoch": 37.35, "grad_norm": 0.8541618585586548, "learning_rate": 2e-05, "loss": 0.0224099, "step": 18675 }, { "epoch": 37.352, "grad_norm": 1.2834433317184448, "learning_rate": 2e-05, "loss": 0.05758996, "step": 18676 }, { "epoch": 37.354, "grad_norm": 1.2858184576034546, "learning_rate": 2e-05, "loss": 0.03408124, "step": 18677 }, { "epoch": 37.356, "grad_norm": 1.2244290113449097, "learning_rate": 2e-05, "loss": 0.04213716, "step": 18678 }, { "epoch": 37.358, "grad_norm": 1.783223271369934, "learning_rate": 2e-05, "loss": 0.05025373, "step": 18679 }, { "epoch": 37.36, "grad_norm": 1.0761710405349731, "learning_rate": 2e-05, "loss": 0.04476252, "step": 18680 }, { "epoch": 37.362, "grad_norm": 0.9174004197120667, "learning_rate": 2e-05, "loss": 0.02644821, "step": 18681 }, { "epoch": 37.364, "grad_norm": 1.5571218729019165, "learning_rate": 2e-05, "loss": 0.03530311, "step": 18682 }, { "epoch": 37.366, "grad_norm": 1.3173847198486328, "learning_rate": 2e-05, "loss": 0.0494157, "step": 18683 }, { "epoch": 37.368, "grad_norm": 3.338667869567871, "learning_rate": 2e-05, "loss": 0.07118287, "step": 18684 }, { "epoch": 37.37, "grad_norm": 1.157820701599121, "learning_rate": 2e-05, "loss": 0.0302717, "step": 18685 }, { "epoch": 37.372, "grad_norm": 0.9768419861793518, "learning_rate": 2e-05, "loss": 0.04007539, "step": 18686 }, { "epoch": 37.374, "grad_norm": 1.2794311046600342, "learning_rate": 2e-05, "loss": 0.04950311, "step": 18687 }, { "epoch": 37.376, "grad_norm": 1.141780972480774, "learning_rate": 2e-05, "loss": 0.03149645, "step": 18688 }, { "epoch": 37.378, "grad_norm": 2.4156718254089355, "learning_rate": 2e-05, "loss": 0.04496891, "step": 18689 }, { "epoch": 37.38, "grad_norm": 0.9722008109092712, "learning_rate": 2e-05, "loss": 0.0443184, "step": 18690 }, { "epoch": 37.382, "grad_norm": 1.1352218389511108, "learning_rate": 2e-05, "loss": 0.03867435, "step": 18691 }, { "epoch": 37.384, "grad_norm": 1.627010464668274, "learning_rate": 2e-05, "loss": 0.06134836, "step": 18692 }, { "epoch": 37.386, "grad_norm": 1.1376895904541016, "learning_rate": 2e-05, "loss": 0.03281328, "step": 18693 }, { "epoch": 37.388, "grad_norm": 1.209214210510254, "learning_rate": 2e-05, "loss": 0.04794659, "step": 18694 }, { "epoch": 37.39, "grad_norm": 1.2975291013717651, "learning_rate": 2e-05, "loss": 0.05038759, "step": 18695 }, { "epoch": 37.392, "grad_norm": 1.3444797992706299, "learning_rate": 2e-05, "loss": 0.03968658, "step": 18696 }, { "epoch": 37.394, "grad_norm": 1.1733344793319702, "learning_rate": 2e-05, "loss": 0.04530336, "step": 18697 }, { "epoch": 37.396, "grad_norm": 0.9696661233901978, "learning_rate": 2e-05, "loss": 0.03088088, "step": 18698 }, { "epoch": 37.398, "grad_norm": 1.0545789003372192, "learning_rate": 2e-05, "loss": 0.03587662, "step": 18699 }, { "epoch": 37.4, "grad_norm": 1.031237006187439, "learning_rate": 2e-05, "loss": 0.03672967, "step": 18700 }, { "epoch": 37.402, "grad_norm": 1.3680472373962402, "learning_rate": 2e-05, "loss": 0.05069418, "step": 18701 }, { "epoch": 37.404, "grad_norm": 1.0634808540344238, "learning_rate": 2e-05, "loss": 0.04009442, "step": 18702 }, { "epoch": 37.406, "grad_norm": 0.896915078163147, "learning_rate": 2e-05, "loss": 0.02137747, "step": 18703 }, { "epoch": 37.408, "grad_norm": 1.0462524890899658, "learning_rate": 2e-05, "loss": 0.03831872, "step": 18704 }, { "epoch": 37.41, "grad_norm": 1.36514151096344, "learning_rate": 2e-05, "loss": 0.05347653, "step": 18705 }, { "epoch": 37.412, "grad_norm": 1.608876347541809, "learning_rate": 2e-05, "loss": 0.03230177, "step": 18706 }, { "epoch": 37.414, "grad_norm": 1.5585176944732666, "learning_rate": 2e-05, "loss": 0.04574811, "step": 18707 }, { "epoch": 37.416, "grad_norm": 1.5784344673156738, "learning_rate": 2e-05, "loss": 0.0467354, "step": 18708 }, { "epoch": 37.418, "grad_norm": 1.5583195686340332, "learning_rate": 2e-05, "loss": 0.04075438, "step": 18709 }, { "epoch": 37.42, "grad_norm": 1.0302294492721558, "learning_rate": 2e-05, "loss": 0.04351837, "step": 18710 }, { "epoch": 37.422, "grad_norm": 1.7316805124282837, "learning_rate": 2e-05, "loss": 0.02402731, "step": 18711 }, { "epoch": 37.424, "grad_norm": 1.5626541376113892, "learning_rate": 2e-05, "loss": 0.03232317, "step": 18712 }, { "epoch": 37.426, "grad_norm": 0.9855737686157227, "learning_rate": 2e-05, "loss": 0.03887186, "step": 18713 }, { "epoch": 37.428, "grad_norm": 1.4292187690734863, "learning_rate": 2e-05, "loss": 0.05129439, "step": 18714 }, { "epoch": 37.43, "grad_norm": 1.3205820322036743, "learning_rate": 2e-05, "loss": 0.03848224, "step": 18715 }, { "epoch": 37.432, "grad_norm": 1.474755048751831, "learning_rate": 2e-05, "loss": 0.0503183, "step": 18716 }, { "epoch": 37.434, "grad_norm": 1.0868511199951172, "learning_rate": 2e-05, "loss": 0.02255207, "step": 18717 }, { "epoch": 37.436, "grad_norm": 1.2556840181350708, "learning_rate": 2e-05, "loss": 0.05313279, "step": 18718 }, { "epoch": 37.438, "grad_norm": 1.2861770391464233, "learning_rate": 2e-05, "loss": 0.05083389, "step": 18719 }, { "epoch": 37.44, "grad_norm": 1.3172714710235596, "learning_rate": 2e-05, "loss": 0.0380227, "step": 18720 }, { "epoch": 37.442, "grad_norm": 1.6785846948623657, "learning_rate": 2e-05, "loss": 0.05099306, "step": 18721 }, { "epoch": 37.444, "grad_norm": 1.040389895439148, "learning_rate": 2e-05, "loss": 0.03511147, "step": 18722 }, { "epoch": 37.446, "grad_norm": 1.0567665100097656, "learning_rate": 2e-05, "loss": 0.02976083, "step": 18723 }, { "epoch": 37.448, "grad_norm": 0.9617815613746643, "learning_rate": 2e-05, "loss": 0.03670631, "step": 18724 }, { "epoch": 37.45, "grad_norm": 1.069361925125122, "learning_rate": 2e-05, "loss": 0.03904826, "step": 18725 }, { "epoch": 37.452, "grad_norm": 1.329677939414978, "learning_rate": 2e-05, "loss": 0.05781191, "step": 18726 }, { "epoch": 37.454, "grad_norm": 1.146031379699707, "learning_rate": 2e-05, "loss": 0.05027099, "step": 18727 }, { "epoch": 37.456, "grad_norm": 1.0236581563949585, "learning_rate": 2e-05, "loss": 0.0411206, "step": 18728 }, { "epoch": 37.458, "grad_norm": 1.2816264629364014, "learning_rate": 2e-05, "loss": 0.04896802, "step": 18729 }, { "epoch": 37.46, "grad_norm": 1.1948260068893433, "learning_rate": 2e-05, "loss": 0.03480673, "step": 18730 }, { "epoch": 37.462, "grad_norm": 1.7572070360183716, "learning_rate": 2e-05, "loss": 0.04478199, "step": 18731 }, { "epoch": 37.464, "grad_norm": 1.0101138353347778, "learning_rate": 2e-05, "loss": 0.044527, "step": 18732 }, { "epoch": 37.466, "grad_norm": 1.537125825881958, "learning_rate": 2e-05, "loss": 0.04904353, "step": 18733 }, { "epoch": 37.468, "grad_norm": 2.0057132244110107, "learning_rate": 2e-05, "loss": 0.05284391, "step": 18734 }, { "epoch": 37.47, "grad_norm": 1.139591932296753, "learning_rate": 2e-05, "loss": 0.03955012, "step": 18735 }, { "epoch": 37.472, "grad_norm": 1.3117825984954834, "learning_rate": 2e-05, "loss": 0.03998091, "step": 18736 }, { "epoch": 37.474, "grad_norm": 1.599098563194275, "learning_rate": 2e-05, "loss": 0.0488559, "step": 18737 }, { "epoch": 37.476, "grad_norm": 1.0199345350265503, "learning_rate": 2e-05, "loss": 0.02837726, "step": 18738 }, { "epoch": 37.478, "grad_norm": 1.672568678855896, "learning_rate": 2e-05, "loss": 0.05150389, "step": 18739 }, { "epoch": 37.48, "grad_norm": 0.9359270334243774, "learning_rate": 2e-05, "loss": 0.03139933, "step": 18740 }, { "epoch": 37.482, "grad_norm": 1.5860284566879272, "learning_rate": 2e-05, "loss": 0.04607878, "step": 18741 }, { "epoch": 37.484, "grad_norm": 1.0930761098861694, "learning_rate": 2e-05, "loss": 0.03963002, "step": 18742 }, { "epoch": 37.486, "grad_norm": 1.213152527809143, "learning_rate": 2e-05, "loss": 0.04821583, "step": 18743 }, { "epoch": 37.488, "grad_norm": 0.9678628444671631, "learning_rate": 2e-05, "loss": 0.03491917, "step": 18744 }, { "epoch": 37.49, "grad_norm": 1.1579011678695679, "learning_rate": 2e-05, "loss": 0.05182432, "step": 18745 }, { "epoch": 37.492, "grad_norm": 0.9998135566711426, "learning_rate": 2e-05, "loss": 0.02899381, "step": 18746 }, { "epoch": 37.494, "grad_norm": 1.365634560585022, "learning_rate": 2e-05, "loss": 0.03647054, "step": 18747 }, { "epoch": 37.496, "grad_norm": 2.8775148391723633, "learning_rate": 2e-05, "loss": 0.06468409, "step": 18748 }, { "epoch": 37.498, "grad_norm": 1.3849204778671265, "learning_rate": 2e-05, "loss": 0.05843618, "step": 18749 }, { "epoch": 37.5, "grad_norm": 1.4979232549667358, "learning_rate": 2e-05, "loss": 0.05652013, "step": 18750 }, { "epoch": 37.502, "grad_norm": 1.2144519090652466, "learning_rate": 2e-05, "loss": 0.0389149, "step": 18751 }, { "epoch": 37.504, "grad_norm": 1.0543650388717651, "learning_rate": 2e-05, "loss": 0.0313639, "step": 18752 }, { "epoch": 37.506, "grad_norm": 1.038210391998291, "learning_rate": 2e-05, "loss": 0.04335963, "step": 18753 }, { "epoch": 37.508, "grad_norm": 1.0079690217971802, "learning_rate": 2e-05, "loss": 0.03755116, "step": 18754 }, { "epoch": 37.51, "grad_norm": 1.770876169204712, "learning_rate": 2e-05, "loss": 0.04107982, "step": 18755 }, { "epoch": 37.512, "grad_norm": 1.6218074560165405, "learning_rate": 2e-05, "loss": 0.04625713, "step": 18756 }, { "epoch": 37.514, "grad_norm": 1.1750694513320923, "learning_rate": 2e-05, "loss": 0.03950002, "step": 18757 }, { "epoch": 37.516, "grad_norm": 1.2037353515625, "learning_rate": 2e-05, "loss": 0.04342839, "step": 18758 }, { "epoch": 37.518, "grad_norm": 1.1445868015289307, "learning_rate": 2e-05, "loss": 0.04752634, "step": 18759 }, { "epoch": 37.52, "grad_norm": 1.0486931800842285, "learning_rate": 2e-05, "loss": 0.04497886, "step": 18760 }, { "epoch": 37.522, "grad_norm": 0.9221504330635071, "learning_rate": 2e-05, "loss": 0.0370011, "step": 18761 }, { "epoch": 37.524, "grad_norm": 0.8999439477920532, "learning_rate": 2e-05, "loss": 0.03294341, "step": 18762 }, { "epoch": 37.526, "grad_norm": 1.1722791194915771, "learning_rate": 2e-05, "loss": 0.03934911, "step": 18763 }, { "epoch": 37.528, "grad_norm": 1.2961126565933228, "learning_rate": 2e-05, "loss": 0.04197449, "step": 18764 }, { "epoch": 37.53, "grad_norm": 0.9199429154396057, "learning_rate": 2e-05, "loss": 0.02904882, "step": 18765 }, { "epoch": 37.532, "grad_norm": 0.9045437574386597, "learning_rate": 2e-05, "loss": 0.03960499, "step": 18766 }, { "epoch": 37.534, "grad_norm": 0.9374881982803345, "learning_rate": 2e-05, "loss": 0.03096394, "step": 18767 }, { "epoch": 37.536, "grad_norm": 1.2470446825027466, "learning_rate": 2e-05, "loss": 0.04021191, "step": 18768 }, { "epoch": 37.538, "grad_norm": 1.3754534721374512, "learning_rate": 2e-05, "loss": 0.0446906, "step": 18769 }, { "epoch": 37.54, "grad_norm": 1.608644962310791, "learning_rate": 2e-05, "loss": 0.04486812, "step": 18770 }, { "epoch": 37.542, "grad_norm": 1.4814962148666382, "learning_rate": 2e-05, "loss": 0.03845561, "step": 18771 }, { "epoch": 37.544, "grad_norm": 0.8970831036567688, "learning_rate": 2e-05, "loss": 0.03752285, "step": 18772 }, { "epoch": 37.546, "grad_norm": 1.1656574010849, "learning_rate": 2e-05, "loss": 0.03882036, "step": 18773 }, { "epoch": 37.548, "grad_norm": 0.8688967823982239, "learning_rate": 2e-05, "loss": 0.01826111, "step": 18774 }, { "epoch": 37.55, "grad_norm": 1.0525118112564087, "learning_rate": 2e-05, "loss": 0.04012558, "step": 18775 }, { "epoch": 37.552, "grad_norm": 1.1830229759216309, "learning_rate": 2e-05, "loss": 0.05035875, "step": 18776 }, { "epoch": 37.554, "grad_norm": 1.1931424140930176, "learning_rate": 2e-05, "loss": 0.04494492, "step": 18777 }, { "epoch": 37.556, "grad_norm": 1.3106215000152588, "learning_rate": 2e-05, "loss": 0.04554956, "step": 18778 }, { "epoch": 37.558, "grad_norm": 1.2222228050231934, "learning_rate": 2e-05, "loss": 0.04136209, "step": 18779 }, { "epoch": 37.56, "grad_norm": 2.1237621307373047, "learning_rate": 2e-05, "loss": 0.04145623, "step": 18780 }, { "epoch": 37.562, "grad_norm": 1.1739251613616943, "learning_rate": 2e-05, "loss": 0.04080192, "step": 18781 }, { "epoch": 37.564, "grad_norm": 2.191302537918091, "learning_rate": 2e-05, "loss": 0.06097953, "step": 18782 }, { "epoch": 37.566, "grad_norm": 0.9671980738639832, "learning_rate": 2e-05, "loss": 0.03338189, "step": 18783 }, { "epoch": 37.568, "grad_norm": 1.8375569581985474, "learning_rate": 2e-05, "loss": 0.04796746, "step": 18784 }, { "epoch": 37.57, "grad_norm": 1.0496113300323486, "learning_rate": 2e-05, "loss": 0.02443472, "step": 18785 }, { "epoch": 37.572, "grad_norm": 2.503190755844116, "learning_rate": 2e-05, "loss": 0.05313979, "step": 18786 }, { "epoch": 37.574, "grad_norm": 0.965300977230072, "learning_rate": 2e-05, "loss": 0.03321171, "step": 18787 }, { "epoch": 37.576, "grad_norm": 1.3297483921051025, "learning_rate": 2e-05, "loss": 0.03972304, "step": 18788 }, { "epoch": 37.578, "grad_norm": 1.1176469326019287, "learning_rate": 2e-05, "loss": 0.03872283, "step": 18789 }, { "epoch": 37.58, "grad_norm": 1.2309982776641846, "learning_rate": 2e-05, "loss": 0.05587587, "step": 18790 }, { "epoch": 37.582, "grad_norm": 1.7290147542953491, "learning_rate": 2e-05, "loss": 0.04843904, "step": 18791 }, { "epoch": 37.584, "grad_norm": 1.3155032396316528, "learning_rate": 2e-05, "loss": 0.04053347, "step": 18792 }, { "epoch": 37.586, "grad_norm": 1.361668586730957, "learning_rate": 2e-05, "loss": 0.03224905, "step": 18793 }, { "epoch": 37.588, "grad_norm": 1.1360480785369873, "learning_rate": 2e-05, "loss": 0.03972386, "step": 18794 }, { "epoch": 37.59, "grad_norm": 1.3317919969558716, "learning_rate": 2e-05, "loss": 0.05043165, "step": 18795 }, { "epoch": 37.592, "grad_norm": 1.1488837003707886, "learning_rate": 2e-05, "loss": 0.03248937, "step": 18796 }, { "epoch": 37.594, "grad_norm": 1.4023540019989014, "learning_rate": 2e-05, "loss": 0.03502064, "step": 18797 }, { "epoch": 37.596, "grad_norm": 1.0766652822494507, "learning_rate": 2e-05, "loss": 0.03397478, "step": 18798 }, { "epoch": 37.598, "grad_norm": 1.0824081897735596, "learning_rate": 2e-05, "loss": 0.04138397, "step": 18799 }, { "epoch": 37.6, "grad_norm": 1.0459606647491455, "learning_rate": 2e-05, "loss": 0.04331833, "step": 18800 }, { "epoch": 37.602, "grad_norm": 1.3006701469421387, "learning_rate": 2e-05, "loss": 0.06298004, "step": 18801 }, { "epoch": 37.604, "grad_norm": 0.9166358113288879, "learning_rate": 2e-05, "loss": 0.02415272, "step": 18802 }, { "epoch": 37.606, "grad_norm": 1.091423511505127, "learning_rate": 2e-05, "loss": 0.03883186, "step": 18803 }, { "epoch": 37.608, "grad_norm": 1.242665410041809, "learning_rate": 2e-05, "loss": 0.04210323, "step": 18804 }, { "epoch": 37.61, "grad_norm": 1.4640295505523682, "learning_rate": 2e-05, "loss": 0.05143019, "step": 18805 }, { "epoch": 37.612, "grad_norm": 1.2153027057647705, "learning_rate": 2e-05, "loss": 0.05529271, "step": 18806 }, { "epoch": 37.614, "grad_norm": 2.2144391536712646, "learning_rate": 2e-05, "loss": 0.04179824, "step": 18807 }, { "epoch": 37.616, "grad_norm": 1.0586007833480835, "learning_rate": 2e-05, "loss": 0.0338909, "step": 18808 }, { "epoch": 37.618, "grad_norm": 1.1268335580825806, "learning_rate": 2e-05, "loss": 0.03529718, "step": 18809 }, { "epoch": 37.62, "grad_norm": 1.4889854192733765, "learning_rate": 2e-05, "loss": 0.0585743, "step": 18810 }, { "epoch": 37.622, "grad_norm": 1.6791647672653198, "learning_rate": 2e-05, "loss": 0.05393176, "step": 18811 }, { "epoch": 37.624, "grad_norm": 1.1075037717819214, "learning_rate": 2e-05, "loss": 0.02849288, "step": 18812 }, { "epoch": 37.626, "grad_norm": 1.3440752029418945, "learning_rate": 2e-05, "loss": 0.04735216, "step": 18813 }, { "epoch": 37.628, "grad_norm": 1.2121247053146362, "learning_rate": 2e-05, "loss": 0.03134248, "step": 18814 }, { "epoch": 37.63, "grad_norm": 1.0615363121032715, "learning_rate": 2e-05, "loss": 0.03442275, "step": 18815 }, { "epoch": 37.632, "grad_norm": 1.183545708656311, "learning_rate": 2e-05, "loss": 0.03669387, "step": 18816 }, { "epoch": 37.634, "grad_norm": 0.9101192951202393, "learning_rate": 2e-05, "loss": 0.03575148, "step": 18817 }, { "epoch": 37.636, "grad_norm": 1.5718504190444946, "learning_rate": 2e-05, "loss": 0.05680436, "step": 18818 }, { "epoch": 37.638, "grad_norm": 1.2223116159439087, "learning_rate": 2e-05, "loss": 0.04046562, "step": 18819 }, { "epoch": 37.64, "grad_norm": 1.1684964895248413, "learning_rate": 2e-05, "loss": 0.0525642, "step": 18820 }, { "epoch": 37.642, "grad_norm": 1.0936496257781982, "learning_rate": 2e-05, "loss": 0.04073422, "step": 18821 }, { "epoch": 37.644, "grad_norm": 1.0097864866256714, "learning_rate": 2e-05, "loss": 0.03097866, "step": 18822 }, { "epoch": 37.646, "grad_norm": 1.2851216793060303, "learning_rate": 2e-05, "loss": 0.05580614, "step": 18823 }, { "epoch": 37.648, "grad_norm": 1.9229702949523926, "learning_rate": 2e-05, "loss": 0.03958568, "step": 18824 }, { "epoch": 37.65, "grad_norm": 1.3668385744094849, "learning_rate": 2e-05, "loss": 0.05524768, "step": 18825 }, { "epoch": 37.652, "grad_norm": 1.417112946510315, "learning_rate": 2e-05, "loss": 0.04026988, "step": 18826 }, { "epoch": 37.654, "grad_norm": 1.154919147491455, "learning_rate": 2e-05, "loss": 0.04654554, "step": 18827 }, { "epoch": 37.656, "grad_norm": 0.9886472821235657, "learning_rate": 2e-05, "loss": 0.03133095, "step": 18828 }, { "epoch": 37.658, "grad_norm": 1.2836391925811768, "learning_rate": 2e-05, "loss": 0.03864687, "step": 18829 }, { "epoch": 37.66, "grad_norm": 1.2053261995315552, "learning_rate": 2e-05, "loss": 0.05016092, "step": 18830 }, { "epoch": 37.662, "grad_norm": 1.1073130369186401, "learning_rate": 2e-05, "loss": 0.03775865, "step": 18831 }, { "epoch": 37.664, "grad_norm": 1.2172409296035767, "learning_rate": 2e-05, "loss": 0.05411567, "step": 18832 }, { "epoch": 37.666, "grad_norm": 1.1823009252548218, "learning_rate": 2e-05, "loss": 0.0415944, "step": 18833 }, { "epoch": 37.668, "grad_norm": 1.0593645572662354, "learning_rate": 2e-05, "loss": 0.03887521, "step": 18834 }, { "epoch": 37.67, "grad_norm": 1.0737974643707275, "learning_rate": 2e-05, "loss": 0.03494824, "step": 18835 }, { "epoch": 37.672, "grad_norm": 1.4756815433502197, "learning_rate": 2e-05, "loss": 0.05688382, "step": 18836 }, { "epoch": 37.674, "grad_norm": 1.2232595682144165, "learning_rate": 2e-05, "loss": 0.05065658, "step": 18837 }, { "epoch": 37.676, "grad_norm": 1.5146193504333496, "learning_rate": 2e-05, "loss": 0.04606935, "step": 18838 }, { "epoch": 37.678, "grad_norm": 1.1179404258728027, "learning_rate": 2e-05, "loss": 0.04785991, "step": 18839 }, { "epoch": 37.68, "grad_norm": 1.7704997062683105, "learning_rate": 2e-05, "loss": 0.03499816, "step": 18840 }, { "epoch": 37.682, "grad_norm": 2.1645781993865967, "learning_rate": 2e-05, "loss": 0.06860285, "step": 18841 }, { "epoch": 37.684, "grad_norm": 1.4239698648452759, "learning_rate": 2e-05, "loss": 0.05423044, "step": 18842 }, { "epoch": 37.686, "grad_norm": 1.2061432600021362, "learning_rate": 2e-05, "loss": 0.0422, "step": 18843 }, { "epoch": 37.688, "grad_norm": 1.434126615524292, "learning_rate": 2e-05, "loss": 0.05467582, "step": 18844 }, { "epoch": 37.69, "grad_norm": 1.0668646097183228, "learning_rate": 2e-05, "loss": 0.04183818, "step": 18845 }, { "epoch": 37.692, "grad_norm": 0.9105406999588013, "learning_rate": 2e-05, "loss": 0.0302218, "step": 18846 }, { "epoch": 37.694, "grad_norm": 2.7572875022888184, "learning_rate": 2e-05, "loss": 0.04500583, "step": 18847 }, { "epoch": 37.696, "grad_norm": 2.284607172012329, "learning_rate": 2e-05, "loss": 0.04284734, "step": 18848 }, { "epoch": 37.698, "grad_norm": 2.073329210281372, "learning_rate": 2e-05, "loss": 0.03520484, "step": 18849 }, { "epoch": 37.7, "grad_norm": 3.604762077331543, "learning_rate": 2e-05, "loss": 0.06385957, "step": 18850 }, { "epoch": 37.702, "grad_norm": 1.1945736408233643, "learning_rate": 2e-05, "loss": 0.03985525, "step": 18851 }, { "epoch": 37.704, "grad_norm": 1.2185908555984497, "learning_rate": 2e-05, "loss": 0.03318772, "step": 18852 }, { "epoch": 37.706, "grad_norm": 1.2688206434249878, "learning_rate": 2e-05, "loss": 0.05495746, "step": 18853 }, { "epoch": 37.708, "grad_norm": 1.0381929874420166, "learning_rate": 2e-05, "loss": 0.03600042, "step": 18854 }, { "epoch": 37.71, "grad_norm": 0.952168345451355, "learning_rate": 2e-05, "loss": 0.02752374, "step": 18855 }, { "epoch": 37.712, "grad_norm": 1.2322078943252563, "learning_rate": 2e-05, "loss": 0.04587801, "step": 18856 }, { "epoch": 37.714, "grad_norm": 1.2536684274673462, "learning_rate": 2e-05, "loss": 0.03789248, "step": 18857 }, { "epoch": 37.716, "grad_norm": 2.647052526473999, "learning_rate": 2e-05, "loss": 0.07668023, "step": 18858 }, { "epoch": 37.718, "grad_norm": 2.344609022140503, "learning_rate": 2e-05, "loss": 0.06175537, "step": 18859 }, { "epoch": 37.72, "grad_norm": 1.710271954536438, "learning_rate": 2e-05, "loss": 0.07425263, "step": 18860 }, { "epoch": 37.722, "grad_norm": 0.9385952353477478, "learning_rate": 2e-05, "loss": 0.03988194, "step": 18861 }, { "epoch": 37.724, "grad_norm": 1.0760703086853027, "learning_rate": 2e-05, "loss": 0.03247745, "step": 18862 }, { "epoch": 37.726, "grad_norm": 1.346384882926941, "learning_rate": 2e-05, "loss": 0.0547515, "step": 18863 }, { "epoch": 37.728, "grad_norm": 1.0319280624389648, "learning_rate": 2e-05, "loss": 0.03719486, "step": 18864 }, { "epoch": 37.73, "grad_norm": 1.327825665473938, "learning_rate": 2e-05, "loss": 0.02865359, "step": 18865 }, { "epoch": 37.732, "grad_norm": 1.7671387195587158, "learning_rate": 2e-05, "loss": 0.03048402, "step": 18866 }, { "epoch": 37.734, "grad_norm": 1.0499013662338257, "learning_rate": 2e-05, "loss": 0.03205416, "step": 18867 }, { "epoch": 37.736, "grad_norm": 0.9944764971733093, "learning_rate": 2e-05, "loss": 0.03722709, "step": 18868 }, { "epoch": 37.738, "grad_norm": 1.3089152574539185, "learning_rate": 2e-05, "loss": 0.04756982, "step": 18869 }, { "epoch": 37.74, "grad_norm": 1.5196123123168945, "learning_rate": 2e-05, "loss": 0.06203455, "step": 18870 }, { "epoch": 37.742, "grad_norm": 1.11556077003479, "learning_rate": 2e-05, "loss": 0.03433929, "step": 18871 }, { "epoch": 37.744, "grad_norm": 2.001180410385132, "learning_rate": 2e-05, "loss": 0.046538, "step": 18872 }, { "epoch": 37.746, "grad_norm": 1.2591460943222046, "learning_rate": 2e-05, "loss": 0.05003956, "step": 18873 }, { "epoch": 37.748, "grad_norm": 1.6820541620254517, "learning_rate": 2e-05, "loss": 0.043096, "step": 18874 }, { "epoch": 37.75, "grad_norm": 1.2268869876861572, "learning_rate": 2e-05, "loss": 0.04136671, "step": 18875 }, { "epoch": 37.752, "grad_norm": 1.2533190250396729, "learning_rate": 2e-05, "loss": 0.0366976, "step": 18876 }, { "epoch": 37.754, "grad_norm": 1.4866986274719238, "learning_rate": 2e-05, "loss": 0.03636875, "step": 18877 }, { "epoch": 37.756, "grad_norm": 1.5848642587661743, "learning_rate": 2e-05, "loss": 0.04737707, "step": 18878 }, { "epoch": 37.758, "grad_norm": 1.353268027305603, "learning_rate": 2e-05, "loss": 0.03133782, "step": 18879 }, { "epoch": 37.76, "grad_norm": 0.9636505842208862, "learning_rate": 2e-05, "loss": 0.0301811, "step": 18880 }, { "epoch": 37.762, "grad_norm": 1.2497457265853882, "learning_rate": 2e-05, "loss": 0.05001786, "step": 18881 }, { "epoch": 37.764, "grad_norm": 1.0673328638076782, "learning_rate": 2e-05, "loss": 0.03139454, "step": 18882 }, { "epoch": 37.766, "grad_norm": 1.557485580444336, "learning_rate": 2e-05, "loss": 0.0464134, "step": 18883 }, { "epoch": 37.768, "grad_norm": 0.973440408706665, "learning_rate": 2e-05, "loss": 0.03469043, "step": 18884 }, { "epoch": 37.77, "grad_norm": 1.1748510599136353, "learning_rate": 2e-05, "loss": 0.03730968, "step": 18885 }, { "epoch": 37.772, "grad_norm": 1.0944515466690063, "learning_rate": 2e-05, "loss": 0.03888043, "step": 18886 }, { "epoch": 37.774, "grad_norm": 3.6084587574005127, "learning_rate": 2e-05, "loss": 0.05079032, "step": 18887 }, { "epoch": 37.776, "grad_norm": 1.7033201456069946, "learning_rate": 2e-05, "loss": 0.0442396, "step": 18888 }, { "epoch": 37.778, "grad_norm": 1.3825198411941528, "learning_rate": 2e-05, "loss": 0.05911713, "step": 18889 }, { "epoch": 37.78, "grad_norm": 1.3935472965240479, "learning_rate": 2e-05, "loss": 0.04597136, "step": 18890 }, { "epoch": 37.782, "grad_norm": 1.515001893043518, "learning_rate": 2e-05, "loss": 0.04437057, "step": 18891 }, { "epoch": 37.784, "grad_norm": 1.1268634796142578, "learning_rate": 2e-05, "loss": 0.03411634, "step": 18892 }, { "epoch": 37.786, "grad_norm": 0.9976750016212463, "learning_rate": 2e-05, "loss": 0.03444566, "step": 18893 }, { "epoch": 37.788, "grad_norm": 1.5224945545196533, "learning_rate": 2e-05, "loss": 0.0607723, "step": 18894 }, { "epoch": 37.79, "grad_norm": 1.3955048322677612, "learning_rate": 2e-05, "loss": 0.05213226, "step": 18895 }, { "epoch": 37.792, "grad_norm": 1.1549447774887085, "learning_rate": 2e-05, "loss": 0.03736443, "step": 18896 }, { "epoch": 37.794, "grad_norm": 0.8485814929008484, "learning_rate": 2e-05, "loss": 0.02462177, "step": 18897 }, { "epoch": 37.796, "grad_norm": 1.3781180381774902, "learning_rate": 2e-05, "loss": 0.05236533, "step": 18898 }, { "epoch": 37.798, "grad_norm": 1.4734482765197754, "learning_rate": 2e-05, "loss": 0.05728751, "step": 18899 }, { "epoch": 37.8, "grad_norm": 1.1258692741394043, "learning_rate": 2e-05, "loss": 0.03739987, "step": 18900 }, { "epoch": 37.802, "grad_norm": 1.2827837467193604, "learning_rate": 2e-05, "loss": 0.05210217, "step": 18901 }, { "epoch": 37.804, "grad_norm": 1.1557444334030151, "learning_rate": 2e-05, "loss": 0.0390267, "step": 18902 }, { "epoch": 37.806, "grad_norm": 1.1604247093200684, "learning_rate": 2e-05, "loss": 0.04971418, "step": 18903 }, { "epoch": 37.808, "grad_norm": 1.0621238946914673, "learning_rate": 2e-05, "loss": 0.03640627, "step": 18904 }, { "epoch": 37.81, "grad_norm": 0.9951264262199402, "learning_rate": 2e-05, "loss": 0.03904339, "step": 18905 }, { "epoch": 37.812, "grad_norm": 1.199965476989746, "learning_rate": 2e-05, "loss": 0.04364878, "step": 18906 }, { "epoch": 37.814, "grad_norm": 3.0767629146575928, "learning_rate": 2e-05, "loss": 0.04365045, "step": 18907 }, { "epoch": 37.816, "grad_norm": 0.8647134304046631, "learning_rate": 2e-05, "loss": 0.0334196, "step": 18908 }, { "epoch": 37.818, "grad_norm": 1.1921844482421875, "learning_rate": 2e-05, "loss": 0.03833631, "step": 18909 }, { "epoch": 37.82, "grad_norm": 1.0273867845535278, "learning_rate": 2e-05, "loss": 0.04143481, "step": 18910 }, { "epoch": 37.822, "grad_norm": 1.125104308128357, "learning_rate": 2e-05, "loss": 0.05544262, "step": 18911 }, { "epoch": 37.824, "grad_norm": 1.0411149263381958, "learning_rate": 2e-05, "loss": 0.04289424, "step": 18912 }, { "epoch": 37.826, "grad_norm": 1.2323323488235474, "learning_rate": 2e-05, "loss": 0.05162044, "step": 18913 }, { "epoch": 37.828, "grad_norm": 1.2034226655960083, "learning_rate": 2e-05, "loss": 0.05214075, "step": 18914 }, { "epoch": 37.83, "grad_norm": 1.3405710458755493, "learning_rate": 2e-05, "loss": 0.05168641, "step": 18915 }, { "epoch": 37.832, "grad_norm": 1.2981528043746948, "learning_rate": 2e-05, "loss": 0.04356202, "step": 18916 }, { "epoch": 37.834, "grad_norm": 0.9416662454605103, "learning_rate": 2e-05, "loss": 0.03926639, "step": 18917 }, { "epoch": 37.836, "grad_norm": 1.080146312713623, "learning_rate": 2e-05, "loss": 0.03623664, "step": 18918 }, { "epoch": 37.838, "grad_norm": 1.164093255996704, "learning_rate": 2e-05, "loss": 0.04676047, "step": 18919 }, { "epoch": 37.84, "grad_norm": 1.0014550685882568, "learning_rate": 2e-05, "loss": 0.03417875, "step": 18920 }, { "epoch": 37.842, "grad_norm": 1.2077052593231201, "learning_rate": 2e-05, "loss": 0.03918765, "step": 18921 }, { "epoch": 37.844, "grad_norm": 1.121488094329834, "learning_rate": 2e-05, "loss": 0.04199528, "step": 18922 }, { "epoch": 37.846, "grad_norm": 2.043963670730591, "learning_rate": 2e-05, "loss": 0.06683844, "step": 18923 }, { "epoch": 37.848, "grad_norm": 1.0892078876495361, "learning_rate": 2e-05, "loss": 0.04384585, "step": 18924 }, { "epoch": 37.85, "grad_norm": 1.333020567893982, "learning_rate": 2e-05, "loss": 0.04862942, "step": 18925 }, { "epoch": 37.852, "grad_norm": 1.0511001348495483, "learning_rate": 2e-05, "loss": 0.03976622, "step": 18926 }, { "epoch": 37.854, "grad_norm": 1.2701404094696045, "learning_rate": 2e-05, "loss": 0.04149718, "step": 18927 }, { "epoch": 37.856, "grad_norm": 1.0651763677597046, "learning_rate": 2e-05, "loss": 0.03586752, "step": 18928 }, { "epoch": 37.858, "grad_norm": 1.0089900493621826, "learning_rate": 2e-05, "loss": 0.03525446, "step": 18929 }, { "epoch": 37.86, "grad_norm": 1.1876775026321411, "learning_rate": 2e-05, "loss": 0.04068927, "step": 18930 }, { "epoch": 37.862, "grad_norm": 1.7308181524276733, "learning_rate": 2e-05, "loss": 0.04104917, "step": 18931 }, { "epoch": 37.864, "grad_norm": 1.0084033012390137, "learning_rate": 2e-05, "loss": 0.03396938, "step": 18932 }, { "epoch": 37.866, "grad_norm": 1.4454501867294312, "learning_rate": 2e-05, "loss": 0.05074462, "step": 18933 }, { "epoch": 37.868, "grad_norm": 1.103598952293396, "learning_rate": 2e-05, "loss": 0.04150467, "step": 18934 }, { "epoch": 37.87, "grad_norm": 1.0260006189346313, "learning_rate": 2e-05, "loss": 0.03909385, "step": 18935 }, { "epoch": 37.872, "grad_norm": 1.1484661102294922, "learning_rate": 2e-05, "loss": 0.0446529, "step": 18936 }, { "epoch": 37.874, "grad_norm": 0.9371235966682434, "learning_rate": 2e-05, "loss": 0.03469496, "step": 18937 }, { "epoch": 37.876, "grad_norm": 2.441399097442627, "learning_rate": 2e-05, "loss": 0.04239362, "step": 18938 }, { "epoch": 37.878, "grad_norm": 0.9973140358924866, "learning_rate": 2e-05, "loss": 0.03371353, "step": 18939 }, { "epoch": 37.88, "grad_norm": 1.1159844398498535, "learning_rate": 2e-05, "loss": 0.04418877, "step": 18940 }, { "epoch": 37.882, "grad_norm": 1.0986323356628418, "learning_rate": 2e-05, "loss": 0.0396911, "step": 18941 }, { "epoch": 37.884, "grad_norm": 2.239150285720825, "learning_rate": 2e-05, "loss": 0.05810123, "step": 18942 }, { "epoch": 37.886, "grad_norm": 1.1507476568222046, "learning_rate": 2e-05, "loss": 0.04050256, "step": 18943 }, { "epoch": 37.888, "grad_norm": 1.643411636352539, "learning_rate": 2e-05, "loss": 0.07198939, "step": 18944 }, { "epoch": 37.89, "grad_norm": 1.7802282571792603, "learning_rate": 2e-05, "loss": 0.0361943, "step": 18945 }, { "epoch": 37.892, "grad_norm": 1.0710393190383911, "learning_rate": 2e-05, "loss": 0.0276309, "step": 18946 }, { "epoch": 37.894, "grad_norm": 1.3028510808944702, "learning_rate": 2e-05, "loss": 0.04614664, "step": 18947 }, { "epoch": 37.896, "grad_norm": 1.6930269002914429, "learning_rate": 2e-05, "loss": 0.05176745, "step": 18948 }, { "epoch": 37.898, "grad_norm": 1.3994868993759155, "learning_rate": 2e-05, "loss": 0.04597143, "step": 18949 }, { "epoch": 37.9, "grad_norm": 1.300676703453064, "learning_rate": 2e-05, "loss": 0.0538766, "step": 18950 }, { "epoch": 37.902, "grad_norm": 1.897986888885498, "learning_rate": 2e-05, "loss": 0.03771758, "step": 18951 }, { "epoch": 37.904, "grad_norm": 1.1213583946228027, "learning_rate": 2e-05, "loss": 0.04674072, "step": 18952 }, { "epoch": 37.906, "grad_norm": 1.3618868589401245, "learning_rate": 2e-05, "loss": 0.06035518, "step": 18953 }, { "epoch": 37.908, "grad_norm": 1.2731349468231201, "learning_rate": 2e-05, "loss": 0.05713828, "step": 18954 }, { "epoch": 37.91, "grad_norm": 1.0878880023956299, "learning_rate": 2e-05, "loss": 0.05491067, "step": 18955 }, { "epoch": 37.912, "grad_norm": 1.5778956413269043, "learning_rate": 2e-05, "loss": 0.05252867, "step": 18956 }, { "epoch": 37.914, "grad_norm": 0.9078095555305481, "learning_rate": 2e-05, "loss": 0.03131845, "step": 18957 }, { "epoch": 37.916, "grad_norm": 1.0328174829483032, "learning_rate": 2e-05, "loss": 0.03666805, "step": 18958 }, { "epoch": 37.918, "grad_norm": 1.363809585571289, "learning_rate": 2e-05, "loss": 0.05573125, "step": 18959 }, { "epoch": 37.92, "grad_norm": 1.0942089557647705, "learning_rate": 2e-05, "loss": 0.05161095, "step": 18960 }, { "epoch": 37.922, "grad_norm": 1.2652561664581299, "learning_rate": 2e-05, "loss": 0.05439752, "step": 18961 }, { "epoch": 37.924, "grad_norm": 1.4360697269439697, "learning_rate": 2e-05, "loss": 0.04909812, "step": 18962 }, { "epoch": 37.926, "grad_norm": 1.5713309049606323, "learning_rate": 2e-05, "loss": 0.05065858, "step": 18963 }, { "epoch": 37.928, "grad_norm": 1.3560004234313965, "learning_rate": 2e-05, "loss": 0.05886821, "step": 18964 }, { "epoch": 37.93, "grad_norm": 1.1961268186569214, "learning_rate": 2e-05, "loss": 0.04319284, "step": 18965 }, { "epoch": 37.932, "grad_norm": 1.2720478773117065, "learning_rate": 2e-05, "loss": 0.07183904, "step": 18966 }, { "epoch": 37.934, "grad_norm": 1.26603102684021, "learning_rate": 2e-05, "loss": 0.04299184, "step": 18967 }, { "epoch": 37.936, "grad_norm": 1.0896682739257812, "learning_rate": 2e-05, "loss": 0.04101091, "step": 18968 }, { "epoch": 37.938, "grad_norm": 1.333804726600647, "learning_rate": 2e-05, "loss": 0.03814703, "step": 18969 }, { "epoch": 37.94, "grad_norm": 1.6315276622772217, "learning_rate": 2e-05, "loss": 0.02696334, "step": 18970 }, { "epoch": 37.942, "grad_norm": 1.2752485275268555, "learning_rate": 2e-05, "loss": 0.04623385, "step": 18971 }, { "epoch": 37.944, "grad_norm": 1.2738232612609863, "learning_rate": 2e-05, "loss": 0.05119524, "step": 18972 }, { "epoch": 37.946, "grad_norm": 1.229262351989746, "learning_rate": 2e-05, "loss": 0.03859371, "step": 18973 }, { "epoch": 37.948, "grad_norm": 1.292868971824646, "learning_rate": 2e-05, "loss": 0.03332417, "step": 18974 }, { "epoch": 37.95, "grad_norm": 0.9618489742279053, "learning_rate": 2e-05, "loss": 0.03035795, "step": 18975 }, { "epoch": 37.952, "grad_norm": 0.9348632097244263, "learning_rate": 2e-05, "loss": 0.03629586, "step": 18976 }, { "epoch": 37.954, "grad_norm": 1.0433769226074219, "learning_rate": 2e-05, "loss": 0.04461107, "step": 18977 }, { "epoch": 37.956, "grad_norm": 1.8684062957763672, "learning_rate": 2e-05, "loss": 0.04248746, "step": 18978 }, { "epoch": 37.958, "grad_norm": 1.2349817752838135, "learning_rate": 2e-05, "loss": 0.04595061, "step": 18979 }, { "epoch": 37.96, "grad_norm": 1.1329549551010132, "learning_rate": 2e-05, "loss": 0.05171024, "step": 18980 }, { "epoch": 37.962, "grad_norm": 1.4046157598495483, "learning_rate": 2e-05, "loss": 0.04267633, "step": 18981 }, { "epoch": 37.964, "grad_norm": 1.4222880601882935, "learning_rate": 2e-05, "loss": 0.03729562, "step": 18982 }, { "epoch": 37.966, "grad_norm": 1.1951045989990234, "learning_rate": 2e-05, "loss": 0.04684995, "step": 18983 }, { "epoch": 37.968, "grad_norm": 1.7370728254318237, "learning_rate": 2e-05, "loss": 0.0354794, "step": 18984 }, { "epoch": 37.97, "grad_norm": 1.2649239301681519, "learning_rate": 2e-05, "loss": 0.03856641, "step": 18985 }, { "epoch": 37.972, "grad_norm": 1.2516887187957764, "learning_rate": 2e-05, "loss": 0.03826671, "step": 18986 }, { "epoch": 37.974, "grad_norm": 1.4137239456176758, "learning_rate": 2e-05, "loss": 0.04873248, "step": 18987 }, { "epoch": 37.976, "grad_norm": 1.205417275428772, "learning_rate": 2e-05, "loss": 0.03422677, "step": 18988 }, { "epoch": 37.978, "grad_norm": 1.3862630128860474, "learning_rate": 2e-05, "loss": 0.03816392, "step": 18989 }, { "epoch": 37.98, "grad_norm": 1.0804787874221802, "learning_rate": 2e-05, "loss": 0.04000157, "step": 18990 }, { "epoch": 37.982, "grad_norm": 1.2574692964553833, "learning_rate": 2e-05, "loss": 0.04244877, "step": 18991 }, { "epoch": 37.984, "grad_norm": 0.8911446928977966, "learning_rate": 2e-05, "loss": 0.02988572, "step": 18992 }, { "epoch": 37.986, "grad_norm": 1.157371997833252, "learning_rate": 2e-05, "loss": 0.03278516, "step": 18993 }, { "epoch": 37.988, "grad_norm": 1.2546195983886719, "learning_rate": 2e-05, "loss": 0.04516232, "step": 18994 }, { "epoch": 37.99, "grad_norm": 1.0913975238800049, "learning_rate": 2e-05, "loss": 0.03805106, "step": 18995 }, { "epoch": 37.992, "grad_norm": 1.3969191312789917, "learning_rate": 2e-05, "loss": 0.04859225, "step": 18996 }, { "epoch": 37.994, "grad_norm": 1.4129071235656738, "learning_rate": 2e-05, "loss": 0.03075989, "step": 18997 }, { "epoch": 37.996, "grad_norm": 1.4782291650772095, "learning_rate": 2e-05, "loss": 0.05404112, "step": 18998 }, { "epoch": 37.998, "grad_norm": 2.0971972942352295, "learning_rate": 2e-05, "loss": 0.06696081, "step": 18999 }, { "epoch": 38.0, "grad_norm": 1.0580739974975586, "learning_rate": 2e-05, "loss": 0.03982267, "step": 19000 }, { "epoch": 38.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9900199600798403, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 1.0, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.992, "Perpendicular_1": 0.998, "Perpendicular_2": 1.0, "Perpendicular_3": 0.8977955911823647, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.9956, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 319.6496, "eval_samples_per_second": 32.848, "eval_steps_per_second": 0.657, "step": 19000 }, { "epoch": 38.002, "grad_norm": 1.2653032541275024, "learning_rate": 2e-05, "loss": 0.05122709, "step": 19001 }, { "epoch": 38.004, "grad_norm": 1.1466037034988403, "learning_rate": 2e-05, "loss": 0.03442731, "step": 19002 }, { "epoch": 38.006, "grad_norm": 2.500974178314209, "learning_rate": 2e-05, "loss": 0.05683993, "step": 19003 }, { "epoch": 38.008, "grad_norm": 1.1632379293441772, "learning_rate": 2e-05, "loss": 0.04220579, "step": 19004 }, { "epoch": 38.01, "grad_norm": 1.1766409873962402, "learning_rate": 2e-05, "loss": 0.0437943, "step": 19005 }, { "epoch": 38.012, "grad_norm": 1.2098218202590942, "learning_rate": 2e-05, "loss": 0.04391178, "step": 19006 }, { "epoch": 38.014, "grad_norm": 1.236122965812683, "learning_rate": 2e-05, "loss": 0.05343842, "step": 19007 }, { "epoch": 38.016, "grad_norm": 1.6261658668518066, "learning_rate": 2e-05, "loss": 0.06093828, "step": 19008 }, { "epoch": 38.018, "grad_norm": 1.1033320426940918, "learning_rate": 2e-05, "loss": 0.03286046, "step": 19009 }, { "epoch": 38.02, "grad_norm": 2.3395283222198486, "learning_rate": 2e-05, "loss": 0.04223632, "step": 19010 }, { "epoch": 38.022, "grad_norm": 1.4763344526290894, "learning_rate": 2e-05, "loss": 0.05102969, "step": 19011 }, { "epoch": 38.024, "grad_norm": 1.1282676458358765, "learning_rate": 2e-05, "loss": 0.04249762, "step": 19012 }, { "epoch": 38.026, "grad_norm": 1.2232784032821655, "learning_rate": 2e-05, "loss": 0.03590241, "step": 19013 }, { "epoch": 38.028, "grad_norm": 1.187089204788208, "learning_rate": 2e-05, "loss": 0.05354986, "step": 19014 }, { "epoch": 38.03, "grad_norm": 1.0039637088775635, "learning_rate": 2e-05, "loss": 0.03929551, "step": 19015 }, { "epoch": 38.032, "grad_norm": 1.1649113893508911, "learning_rate": 2e-05, "loss": 0.04089183, "step": 19016 }, { "epoch": 38.034, "grad_norm": 1.1364166736602783, "learning_rate": 2e-05, "loss": 0.0420451, "step": 19017 }, { "epoch": 38.036, "grad_norm": 1.5335687398910522, "learning_rate": 2e-05, "loss": 0.04455702, "step": 19018 }, { "epoch": 38.038, "grad_norm": 1.4168477058410645, "learning_rate": 2e-05, "loss": 0.05238315, "step": 19019 }, { "epoch": 38.04, "grad_norm": 1.2343907356262207, "learning_rate": 2e-05, "loss": 0.04423891, "step": 19020 }, { "epoch": 38.042, "grad_norm": 1.1726269721984863, "learning_rate": 2e-05, "loss": 0.05387194, "step": 19021 }, { "epoch": 38.044, "grad_norm": 2.447615146636963, "learning_rate": 2e-05, "loss": 0.05135684, "step": 19022 }, { "epoch": 38.046, "grad_norm": 1.5104469060897827, "learning_rate": 2e-05, "loss": 0.043165, "step": 19023 }, { "epoch": 38.048, "grad_norm": 1.3275978565216064, "learning_rate": 2e-05, "loss": 0.04400131, "step": 19024 }, { "epoch": 38.05, "grad_norm": 1.2232415676116943, "learning_rate": 2e-05, "loss": 0.04481234, "step": 19025 }, { "epoch": 38.052, "grad_norm": 1.1241319179534912, "learning_rate": 2e-05, "loss": 0.04850707, "step": 19026 }, { "epoch": 38.054, "grad_norm": 1.286199688911438, "learning_rate": 2e-05, "loss": 0.04397567, "step": 19027 }, { "epoch": 38.056, "grad_norm": 1.2442455291748047, "learning_rate": 2e-05, "loss": 0.04638599, "step": 19028 }, { "epoch": 38.058, "grad_norm": 1.691164493560791, "learning_rate": 2e-05, "loss": 0.05108991, "step": 19029 }, { "epoch": 38.06, "grad_norm": 1.2171449661254883, "learning_rate": 2e-05, "loss": 0.04477761, "step": 19030 }, { "epoch": 38.062, "grad_norm": 1.1396671533584595, "learning_rate": 2e-05, "loss": 0.05528203, "step": 19031 }, { "epoch": 38.064, "grad_norm": 1.8548091650009155, "learning_rate": 2e-05, "loss": 0.05561259, "step": 19032 }, { "epoch": 38.066, "grad_norm": 1.0096694231033325, "learning_rate": 2e-05, "loss": 0.03706703, "step": 19033 }, { "epoch": 38.068, "grad_norm": 1.2540130615234375, "learning_rate": 2e-05, "loss": 0.0528646, "step": 19034 }, { "epoch": 38.07, "grad_norm": 1.436498999595642, "learning_rate": 2e-05, "loss": 0.05726215, "step": 19035 }, { "epoch": 38.072, "grad_norm": 1.2053461074829102, "learning_rate": 2e-05, "loss": 0.03862919, "step": 19036 }, { "epoch": 38.074, "grad_norm": 1.2847776412963867, "learning_rate": 2e-05, "loss": 0.04858331, "step": 19037 }, { "epoch": 38.076, "grad_norm": 2.5267117023468018, "learning_rate": 2e-05, "loss": 0.04583256, "step": 19038 }, { "epoch": 38.078, "grad_norm": 1.9496853351593018, "learning_rate": 2e-05, "loss": 0.05861641, "step": 19039 }, { "epoch": 38.08, "grad_norm": 1.204428791999817, "learning_rate": 2e-05, "loss": 0.04567886, "step": 19040 }, { "epoch": 38.082, "grad_norm": 1.1530473232269287, "learning_rate": 2e-05, "loss": 0.03973469, "step": 19041 }, { "epoch": 38.084, "grad_norm": 1.2695164680480957, "learning_rate": 2e-05, "loss": 0.05841737, "step": 19042 }, { "epoch": 38.086, "grad_norm": 1.5974242687225342, "learning_rate": 2e-05, "loss": 0.04161192, "step": 19043 }, { "epoch": 38.088, "grad_norm": 1.1564900875091553, "learning_rate": 2e-05, "loss": 0.05044547, "step": 19044 }, { "epoch": 38.09, "grad_norm": 1.2367781400680542, "learning_rate": 2e-05, "loss": 0.03753296, "step": 19045 }, { "epoch": 38.092, "grad_norm": 1.287509560585022, "learning_rate": 2e-05, "loss": 0.04154661, "step": 19046 }, { "epoch": 38.094, "grad_norm": 1.1446880102157593, "learning_rate": 2e-05, "loss": 0.03765508, "step": 19047 }, { "epoch": 38.096, "grad_norm": 1.177966594696045, "learning_rate": 2e-05, "loss": 0.05387244, "step": 19048 }, { "epoch": 38.098, "grad_norm": 1.4184907674789429, "learning_rate": 2e-05, "loss": 0.03067533, "step": 19049 }, { "epoch": 38.1, "grad_norm": 1.4943373203277588, "learning_rate": 2e-05, "loss": 0.05171797, "step": 19050 }, { "epoch": 38.102, "grad_norm": 1.4353896379470825, "learning_rate": 2e-05, "loss": 0.06505428, "step": 19051 }, { "epoch": 38.104, "grad_norm": 1.2791118621826172, "learning_rate": 2e-05, "loss": 0.0403844, "step": 19052 }, { "epoch": 38.106, "grad_norm": 1.1676243543624878, "learning_rate": 2e-05, "loss": 0.05121243, "step": 19053 }, { "epoch": 38.108, "grad_norm": 1.0785608291625977, "learning_rate": 2e-05, "loss": 0.04726262, "step": 19054 }, { "epoch": 38.11, "grad_norm": 0.9102175235748291, "learning_rate": 2e-05, "loss": 0.02758621, "step": 19055 }, { "epoch": 38.112, "grad_norm": 1.3264470100402832, "learning_rate": 2e-05, "loss": 0.05410461, "step": 19056 }, { "epoch": 38.114, "grad_norm": 1.18822181224823, "learning_rate": 2e-05, "loss": 0.04347523, "step": 19057 }, { "epoch": 38.116, "grad_norm": 1.4534834623336792, "learning_rate": 2e-05, "loss": 0.04533903, "step": 19058 }, { "epoch": 38.118, "grad_norm": 1.193938136100769, "learning_rate": 2e-05, "loss": 0.04152393, "step": 19059 }, { "epoch": 38.12, "grad_norm": 0.9936333298683167, "learning_rate": 2e-05, "loss": 0.02456021, "step": 19060 }, { "epoch": 38.122, "grad_norm": 1.4597140550613403, "learning_rate": 2e-05, "loss": 0.04886616, "step": 19061 }, { "epoch": 38.124, "grad_norm": 1.4201385974884033, "learning_rate": 2e-05, "loss": 0.05720758, "step": 19062 }, { "epoch": 38.126, "grad_norm": 1.197838544845581, "learning_rate": 2e-05, "loss": 0.04132029, "step": 19063 }, { "epoch": 38.128, "grad_norm": 2.9631714820861816, "learning_rate": 2e-05, "loss": 0.05553018, "step": 19064 }, { "epoch": 38.13, "grad_norm": 1.32180655002594, "learning_rate": 2e-05, "loss": 0.03765612, "step": 19065 }, { "epoch": 38.132, "grad_norm": 1.2723172903060913, "learning_rate": 2e-05, "loss": 0.05038822, "step": 19066 }, { "epoch": 38.134, "grad_norm": 1.2453999519348145, "learning_rate": 2e-05, "loss": 0.04746665, "step": 19067 }, { "epoch": 38.136, "grad_norm": 1.2901151180267334, "learning_rate": 2e-05, "loss": 0.05191978, "step": 19068 }, { "epoch": 38.138, "grad_norm": 1.393450379371643, "learning_rate": 2e-05, "loss": 0.04058621, "step": 19069 }, { "epoch": 38.14, "grad_norm": 3.79284405708313, "learning_rate": 2e-05, "loss": 0.089285, "step": 19070 }, { "epoch": 38.142, "grad_norm": 1.231909990310669, "learning_rate": 2e-05, "loss": 0.04528357, "step": 19071 }, { "epoch": 38.144, "grad_norm": 0.9689210057258606, "learning_rate": 2e-05, "loss": 0.03057509, "step": 19072 }, { "epoch": 38.146, "grad_norm": 1.169950246810913, "learning_rate": 2e-05, "loss": 0.04367265, "step": 19073 }, { "epoch": 38.148, "grad_norm": 1.002368688583374, "learning_rate": 2e-05, "loss": 0.0362767, "step": 19074 }, { "epoch": 38.15, "grad_norm": 1.0449949502944946, "learning_rate": 2e-05, "loss": 0.03191894, "step": 19075 }, { "epoch": 38.152, "grad_norm": 1.23591947555542, "learning_rate": 2e-05, "loss": 0.0624435, "step": 19076 }, { "epoch": 38.154, "grad_norm": 1.3809751272201538, "learning_rate": 2e-05, "loss": 0.04096212, "step": 19077 }, { "epoch": 38.156, "grad_norm": 1.3251644372940063, "learning_rate": 2e-05, "loss": 0.04161641, "step": 19078 }, { "epoch": 38.158, "grad_norm": 1.9493850469589233, "learning_rate": 2e-05, "loss": 0.04814357, "step": 19079 }, { "epoch": 38.16, "grad_norm": 1.3206309080123901, "learning_rate": 2e-05, "loss": 0.05091274, "step": 19080 }, { "epoch": 38.162, "grad_norm": 1.2875269651412964, "learning_rate": 2e-05, "loss": 0.04496816, "step": 19081 }, { "epoch": 38.164, "grad_norm": 0.9897270202636719, "learning_rate": 2e-05, "loss": 0.03702825, "step": 19082 }, { "epoch": 38.166, "grad_norm": 1.2053303718566895, "learning_rate": 2e-05, "loss": 0.04418178, "step": 19083 }, { "epoch": 38.168, "grad_norm": 1.2316017150878906, "learning_rate": 2e-05, "loss": 0.05090442, "step": 19084 }, { "epoch": 38.17, "grad_norm": 1.5326013565063477, "learning_rate": 2e-05, "loss": 0.0485841, "step": 19085 }, { "epoch": 38.172, "grad_norm": 1.6336983442306519, "learning_rate": 2e-05, "loss": 0.04477915, "step": 19086 }, { "epoch": 38.174, "grad_norm": 1.2164212465286255, "learning_rate": 2e-05, "loss": 0.0437744, "step": 19087 }, { "epoch": 38.176, "grad_norm": 1.078202724456787, "learning_rate": 2e-05, "loss": 0.04401769, "step": 19088 }, { "epoch": 38.178, "grad_norm": 1.8545653820037842, "learning_rate": 2e-05, "loss": 0.05269055, "step": 19089 }, { "epoch": 38.18, "grad_norm": 1.920038104057312, "learning_rate": 2e-05, "loss": 0.05271509, "step": 19090 }, { "epoch": 38.182, "grad_norm": 1.242497205734253, "learning_rate": 2e-05, "loss": 0.0489504, "step": 19091 }, { "epoch": 38.184, "grad_norm": 1.3824882507324219, "learning_rate": 2e-05, "loss": 0.04757221, "step": 19092 }, { "epoch": 38.186, "grad_norm": 1.007732629776001, "learning_rate": 2e-05, "loss": 0.03562904, "step": 19093 }, { "epoch": 38.188, "grad_norm": 1.1626834869384766, "learning_rate": 2e-05, "loss": 0.04818817, "step": 19094 }, { "epoch": 38.19, "grad_norm": 2.008382558822632, "learning_rate": 2e-05, "loss": 0.05179016, "step": 19095 }, { "epoch": 38.192, "grad_norm": 1.0385682582855225, "learning_rate": 2e-05, "loss": 0.03288983, "step": 19096 }, { "epoch": 38.194, "grad_norm": 1.3801738023757935, "learning_rate": 2e-05, "loss": 0.05888782, "step": 19097 }, { "epoch": 38.196, "grad_norm": 1.1773533821105957, "learning_rate": 2e-05, "loss": 0.04121737, "step": 19098 }, { "epoch": 38.198, "grad_norm": 1.0565184354782104, "learning_rate": 2e-05, "loss": 0.03833776, "step": 19099 }, { "epoch": 38.2, "grad_norm": 1.225994348526001, "learning_rate": 2e-05, "loss": 0.04604607, "step": 19100 }, { "epoch": 38.202, "grad_norm": 1.1828093528747559, "learning_rate": 2e-05, "loss": 0.0390872, "step": 19101 }, { "epoch": 38.204, "grad_norm": 1.1246726512908936, "learning_rate": 2e-05, "loss": 0.04674789, "step": 19102 }, { "epoch": 38.206, "grad_norm": 1.1224784851074219, "learning_rate": 2e-05, "loss": 0.04065286, "step": 19103 }, { "epoch": 38.208, "grad_norm": 1.1293448209762573, "learning_rate": 2e-05, "loss": 0.03387726, "step": 19104 }, { "epoch": 38.21, "grad_norm": 1.6799935102462769, "learning_rate": 2e-05, "loss": 0.04963982, "step": 19105 }, { "epoch": 38.212, "grad_norm": 0.9128696322441101, "learning_rate": 2e-05, "loss": 0.03350312, "step": 19106 }, { "epoch": 38.214, "grad_norm": 1.3261280059814453, "learning_rate": 2e-05, "loss": 0.04866079, "step": 19107 }, { "epoch": 38.216, "grad_norm": 1.1664577722549438, "learning_rate": 2e-05, "loss": 0.03899412, "step": 19108 }, { "epoch": 38.218, "grad_norm": 1.4886510372161865, "learning_rate": 2e-05, "loss": 0.05114747, "step": 19109 }, { "epoch": 38.22, "grad_norm": 1.2345974445343018, "learning_rate": 2e-05, "loss": 0.05016024, "step": 19110 }, { "epoch": 38.222, "grad_norm": 3.129671812057495, "learning_rate": 2e-05, "loss": 0.05178972, "step": 19111 }, { "epoch": 38.224, "grad_norm": 1.3265200853347778, "learning_rate": 2e-05, "loss": 0.04992515, "step": 19112 }, { "epoch": 38.226, "grad_norm": 1.359765887260437, "learning_rate": 2e-05, "loss": 0.05563714, "step": 19113 }, { "epoch": 38.228, "grad_norm": 1.2336148023605347, "learning_rate": 2e-05, "loss": 0.03507271, "step": 19114 }, { "epoch": 38.23, "grad_norm": 1.2638157606124878, "learning_rate": 2e-05, "loss": 0.06374335, "step": 19115 }, { "epoch": 38.232, "grad_norm": 1.4349949359893799, "learning_rate": 2e-05, "loss": 0.05200667, "step": 19116 }, { "epoch": 38.234, "grad_norm": 2.013317584991455, "learning_rate": 2e-05, "loss": 0.03400422, "step": 19117 }, { "epoch": 38.236, "grad_norm": 1.2802153825759888, "learning_rate": 2e-05, "loss": 0.03919921, "step": 19118 }, { "epoch": 38.238, "grad_norm": 0.9960325360298157, "learning_rate": 2e-05, "loss": 0.03037401, "step": 19119 }, { "epoch": 38.24, "grad_norm": 2.0125842094421387, "learning_rate": 2e-05, "loss": 0.05429566, "step": 19120 }, { "epoch": 38.242, "grad_norm": 1.291225552558899, "learning_rate": 2e-05, "loss": 0.05072563, "step": 19121 }, { "epoch": 38.244, "grad_norm": 0.9041070938110352, "learning_rate": 2e-05, "loss": 0.03640107, "step": 19122 }, { "epoch": 38.246, "grad_norm": 2.185776710510254, "learning_rate": 2e-05, "loss": 0.04717995, "step": 19123 }, { "epoch": 38.248, "grad_norm": 1.4118475914001465, "learning_rate": 2e-05, "loss": 0.03875816, "step": 19124 }, { "epoch": 38.25, "grad_norm": 1.0311429500579834, "learning_rate": 2e-05, "loss": 0.05210552, "step": 19125 }, { "epoch": 38.252, "grad_norm": 0.9929075241088867, "learning_rate": 2e-05, "loss": 0.04484344, "step": 19126 }, { "epoch": 38.254, "grad_norm": 1.266503930091858, "learning_rate": 2e-05, "loss": 0.05835935, "step": 19127 }, { "epoch": 38.256, "grad_norm": 1.0839781761169434, "learning_rate": 2e-05, "loss": 0.04155935, "step": 19128 }, { "epoch": 38.258, "grad_norm": 1.6843171119689941, "learning_rate": 2e-05, "loss": 0.05540327, "step": 19129 }, { "epoch": 38.26, "grad_norm": 1.0930052995681763, "learning_rate": 2e-05, "loss": 0.04292402, "step": 19130 }, { "epoch": 38.262, "grad_norm": 1.0712100267410278, "learning_rate": 2e-05, "loss": 0.0455328, "step": 19131 }, { "epoch": 38.264, "grad_norm": 1.5520873069763184, "learning_rate": 2e-05, "loss": 0.05642373, "step": 19132 }, { "epoch": 38.266, "grad_norm": 1.319484829902649, "learning_rate": 2e-05, "loss": 0.05045997, "step": 19133 }, { "epoch": 38.268, "grad_norm": 1.1289286613464355, "learning_rate": 2e-05, "loss": 0.03494524, "step": 19134 }, { "epoch": 38.27, "grad_norm": 2.3391904830932617, "learning_rate": 2e-05, "loss": 0.05442752, "step": 19135 }, { "epoch": 38.272, "grad_norm": 1.3560073375701904, "learning_rate": 2e-05, "loss": 0.0621442, "step": 19136 }, { "epoch": 38.274, "grad_norm": 1.094653844833374, "learning_rate": 2e-05, "loss": 0.04534341, "step": 19137 }, { "epoch": 38.276, "grad_norm": 1.2565248012542725, "learning_rate": 2e-05, "loss": 0.042792, "step": 19138 }, { "epoch": 38.278, "grad_norm": 1.1656410694122314, "learning_rate": 2e-05, "loss": 0.0445144, "step": 19139 }, { "epoch": 38.28, "grad_norm": 1.1632707118988037, "learning_rate": 2e-05, "loss": 0.04515301, "step": 19140 }, { "epoch": 38.282, "grad_norm": 1.305557370185852, "learning_rate": 2e-05, "loss": 0.04854187, "step": 19141 }, { "epoch": 38.284, "grad_norm": 1.1423861980438232, "learning_rate": 2e-05, "loss": 0.04725296, "step": 19142 }, { "epoch": 38.286, "grad_norm": 1.8827954530715942, "learning_rate": 2e-05, "loss": 0.05522726, "step": 19143 }, { "epoch": 38.288, "grad_norm": 1.2071250677108765, "learning_rate": 2e-05, "loss": 0.03631373, "step": 19144 }, { "epoch": 38.29, "grad_norm": 1.5091750621795654, "learning_rate": 2e-05, "loss": 0.04798866, "step": 19145 }, { "epoch": 38.292, "grad_norm": 1.3303464651107788, "learning_rate": 2e-05, "loss": 0.0500569, "step": 19146 }, { "epoch": 38.294, "grad_norm": 1.247398018836975, "learning_rate": 2e-05, "loss": 0.04999819, "step": 19147 }, { "epoch": 38.296, "grad_norm": 1.242234230041504, "learning_rate": 2e-05, "loss": 0.0521092, "step": 19148 }, { "epoch": 38.298, "grad_norm": 1.7514032125473022, "learning_rate": 2e-05, "loss": 0.04611446, "step": 19149 }, { "epoch": 38.3, "grad_norm": 2.0535500049591064, "learning_rate": 2e-05, "loss": 0.04753162, "step": 19150 }, { "epoch": 38.302, "grad_norm": 1.1260801553726196, "learning_rate": 2e-05, "loss": 0.03980887, "step": 19151 }, { "epoch": 38.304, "grad_norm": 1.2702425718307495, "learning_rate": 2e-05, "loss": 0.0628961, "step": 19152 }, { "epoch": 38.306, "grad_norm": 1.7497895956039429, "learning_rate": 2e-05, "loss": 0.04326449, "step": 19153 }, { "epoch": 38.308, "grad_norm": 1.2419395446777344, "learning_rate": 2e-05, "loss": 0.06270743, "step": 19154 }, { "epoch": 38.31, "grad_norm": 2.1101601123809814, "learning_rate": 2e-05, "loss": 0.04829663, "step": 19155 }, { "epoch": 38.312, "grad_norm": 1.3105263710021973, "learning_rate": 2e-05, "loss": 0.05578191, "step": 19156 }, { "epoch": 38.314, "grad_norm": 1.2866804599761963, "learning_rate": 2e-05, "loss": 0.04316871, "step": 19157 }, { "epoch": 38.316, "grad_norm": 1.2355014085769653, "learning_rate": 2e-05, "loss": 0.06234382, "step": 19158 }, { "epoch": 38.318, "grad_norm": 1.1769856214523315, "learning_rate": 2e-05, "loss": 0.02845784, "step": 19159 }, { "epoch": 38.32, "grad_norm": 1.315886378288269, "learning_rate": 2e-05, "loss": 0.05123859, "step": 19160 }, { "epoch": 38.322, "grad_norm": 1.1736537218093872, "learning_rate": 2e-05, "loss": 0.05540813, "step": 19161 }, { "epoch": 38.324, "grad_norm": 1.1168758869171143, "learning_rate": 2e-05, "loss": 0.04026729, "step": 19162 }, { "epoch": 38.326, "grad_norm": 1.267259120941162, "learning_rate": 2e-05, "loss": 0.05570924, "step": 19163 }, { "epoch": 38.328, "grad_norm": 0.9030328989028931, "learning_rate": 2e-05, "loss": 0.03402457, "step": 19164 }, { "epoch": 38.33, "grad_norm": 1.660753846168518, "learning_rate": 2e-05, "loss": 0.06479599, "step": 19165 }, { "epoch": 38.332, "grad_norm": 1.6835894584655762, "learning_rate": 2e-05, "loss": 0.05633393, "step": 19166 }, { "epoch": 38.334, "grad_norm": 1.3103505373001099, "learning_rate": 2e-05, "loss": 0.05098883, "step": 19167 }, { "epoch": 38.336, "grad_norm": 1.1868908405303955, "learning_rate": 2e-05, "loss": 0.04712537, "step": 19168 }, { "epoch": 38.338, "grad_norm": 1.0892060995101929, "learning_rate": 2e-05, "loss": 0.05058462, "step": 19169 }, { "epoch": 38.34, "grad_norm": 1.2633219957351685, "learning_rate": 2e-05, "loss": 0.05579946, "step": 19170 }, { "epoch": 38.342, "grad_norm": 1.1790961027145386, "learning_rate": 2e-05, "loss": 0.04451218, "step": 19171 }, { "epoch": 38.344, "grad_norm": 1.1820404529571533, "learning_rate": 2e-05, "loss": 0.04325217, "step": 19172 }, { "epoch": 38.346, "grad_norm": 1.4797415733337402, "learning_rate": 2e-05, "loss": 0.04494851, "step": 19173 }, { "epoch": 38.348, "grad_norm": 1.6756718158721924, "learning_rate": 2e-05, "loss": 0.04998035, "step": 19174 }, { "epoch": 38.35, "grad_norm": 1.0109578371047974, "learning_rate": 2e-05, "loss": 0.04028856, "step": 19175 }, { "epoch": 38.352, "grad_norm": 1.486900806427002, "learning_rate": 2e-05, "loss": 0.04508706, "step": 19176 }, { "epoch": 38.354, "grad_norm": 1.2009479999542236, "learning_rate": 2e-05, "loss": 0.05440203, "step": 19177 }, { "epoch": 38.356, "grad_norm": 1.3738353252410889, "learning_rate": 2e-05, "loss": 0.04805979, "step": 19178 }, { "epoch": 38.358, "grad_norm": 1.2725608348846436, "learning_rate": 2e-05, "loss": 0.05642783, "step": 19179 }, { "epoch": 38.36, "grad_norm": 1.2467677593231201, "learning_rate": 2e-05, "loss": 0.07003997, "step": 19180 }, { "epoch": 38.362, "grad_norm": 0.9856001734733582, "learning_rate": 2e-05, "loss": 0.0346032, "step": 19181 }, { "epoch": 38.364, "grad_norm": 1.4484895467758179, "learning_rate": 2e-05, "loss": 0.0645525, "step": 19182 }, { "epoch": 38.366, "grad_norm": 3.5123982429504395, "learning_rate": 2e-05, "loss": 0.03936257, "step": 19183 }, { "epoch": 38.368, "grad_norm": 1.4622162580490112, "learning_rate": 2e-05, "loss": 0.03753219, "step": 19184 }, { "epoch": 38.37, "grad_norm": 1.103468656539917, "learning_rate": 2e-05, "loss": 0.04038829, "step": 19185 }, { "epoch": 38.372, "grad_norm": 1.114812970161438, "learning_rate": 2e-05, "loss": 0.04476861, "step": 19186 }, { "epoch": 38.374, "grad_norm": 1.14467453956604, "learning_rate": 2e-05, "loss": 0.03987251, "step": 19187 }, { "epoch": 38.376, "grad_norm": 1.4186773300170898, "learning_rate": 2e-05, "loss": 0.05686907, "step": 19188 }, { "epoch": 38.378, "grad_norm": 2.0654118061065674, "learning_rate": 2e-05, "loss": 0.05757586, "step": 19189 }, { "epoch": 38.38, "grad_norm": 4.5302414894104, "learning_rate": 2e-05, "loss": 0.04920984, "step": 19190 }, { "epoch": 38.382, "grad_norm": 1.2781339883804321, "learning_rate": 2e-05, "loss": 0.04979723, "step": 19191 }, { "epoch": 38.384, "grad_norm": 1.0826224088668823, "learning_rate": 2e-05, "loss": 0.03591222, "step": 19192 }, { "epoch": 38.386, "grad_norm": 1.9421765804290771, "learning_rate": 2e-05, "loss": 0.05677667, "step": 19193 }, { "epoch": 38.388, "grad_norm": 1.4869078397750854, "learning_rate": 2e-05, "loss": 0.05089599, "step": 19194 }, { "epoch": 38.39, "grad_norm": 0.8969262838363647, "learning_rate": 2e-05, "loss": 0.02550054, "step": 19195 }, { "epoch": 38.392, "grad_norm": 2.2101852893829346, "learning_rate": 2e-05, "loss": 0.07438694, "step": 19196 }, { "epoch": 38.394, "grad_norm": 1.3274297714233398, "learning_rate": 2e-05, "loss": 0.03078259, "step": 19197 }, { "epoch": 38.396, "grad_norm": 1.6053683757781982, "learning_rate": 2e-05, "loss": 0.06297421, "step": 19198 }, { "epoch": 38.398, "grad_norm": 1.0665053129196167, "learning_rate": 2e-05, "loss": 0.03194698, "step": 19199 }, { "epoch": 38.4, "grad_norm": 1.062880277633667, "learning_rate": 2e-05, "loss": 0.03726966, "step": 19200 }, { "epoch": 38.402, "grad_norm": 1.6072918176651, "learning_rate": 2e-05, "loss": 0.04004788, "step": 19201 }, { "epoch": 38.404, "grad_norm": 1.026965856552124, "learning_rate": 2e-05, "loss": 0.04006281, "step": 19202 }, { "epoch": 38.406, "grad_norm": 1.131760597229004, "learning_rate": 2e-05, "loss": 0.04987944, "step": 19203 }, { "epoch": 38.408, "grad_norm": 1.1853435039520264, "learning_rate": 2e-05, "loss": 0.04732582, "step": 19204 }, { "epoch": 38.41, "grad_norm": 1.2606431245803833, "learning_rate": 2e-05, "loss": 0.04184794, "step": 19205 }, { "epoch": 38.412, "grad_norm": 1.06600022315979, "learning_rate": 2e-05, "loss": 0.04323639, "step": 19206 }, { "epoch": 38.414, "grad_norm": 1.5144490003585815, "learning_rate": 2e-05, "loss": 0.05575287, "step": 19207 }, { "epoch": 38.416, "grad_norm": 1.3510489463806152, "learning_rate": 2e-05, "loss": 0.05808666, "step": 19208 }, { "epoch": 38.418, "grad_norm": 1.2126901149749756, "learning_rate": 2e-05, "loss": 0.04525844, "step": 19209 }, { "epoch": 38.42, "grad_norm": 1.9178175926208496, "learning_rate": 2e-05, "loss": 0.04605162, "step": 19210 }, { "epoch": 38.422, "grad_norm": 1.0999705791473389, "learning_rate": 2e-05, "loss": 0.03449193, "step": 19211 }, { "epoch": 38.424, "grad_norm": 1.1063787937164307, "learning_rate": 2e-05, "loss": 0.04200126, "step": 19212 }, { "epoch": 38.426, "grad_norm": 1.1517763137817383, "learning_rate": 2e-05, "loss": 0.03924587, "step": 19213 }, { "epoch": 38.428, "grad_norm": 1.4197931289672852, "learning_rate": 2e-05, "loss": 0.06597655, "step": 19214 }, { "epoch": 38.43, "grad_norm": 1.267537236213684, "learning_rate": 2e-05, "loss": 0.04415938, "step": 19215 }, { "epoch": 38.432, "grad_norm": 1.7044909000396729, "learning_rate": 2e-05, "loss": 0.05466124, "step": 19216 }, { "epoch": 38.434, "grad_norm": 1.201332449913025, "learning_rate": 2e-05, "loss": 0.04956157, "step": 19217 }, { "epoch": 38.436, "grad_norm": 1.2084065675735474, "learning_rate": 2e-05, "loss": 0.05828648, "step": 19218 }, { "epoch": 38.438, "grad_norm": 0.9206735491752625, "learning_rate": 2e-05, "loss": 0.0277809, "step": 19219 }, { "epoch": 38.44, "grad_norm": 1.2664018869400024, "learning_rate": 2e-05, "loss": 0.05090235, "step": 19220 }, { "epoch": 38.442, "grad_norm": 1.4958431720733643, "learning_rate": 2e-05, "loss": 0.03930274, "step": 19221 }, { "epoch": 38.444, "grad_norm": 1.0052601099014282, "learning_rate": 2e-05, "loss": 0.03331514, "step": 19222 }, { "epoch": 38.446, "grad_norm": 1.4637385606765747, "learning_rate": 2e-05, "loss": 0.03536751, "step": 19223 }, { "epoch": 38.448, "grad_norm": 2.041100263595581, "learning_rate": 2e-05, "loss": 0.05582372, "step": 19224 }, { "epoch": 38.45, "grad_norm": 2.045063018798828, "learning_rate": 2e-05, "loss": 0.05946861, "step": 19225 }, { "epoch": 38.452, "grad_norm": 1.763791799545288, "learning_rate": 2e-05, "loss": 0.03987541, "step": 19226 }, { "epoch": 38.454, "grad_norm": 1.3544304370880127, "learning_rate": 2e-05, "loss": 0.04928677, "step": 19227 }, { "epoch": 38.456, "grad_norm": 1.0796279907226562, "learning_rate": 2e-05, "loss": 0.04805015, "step": 19228 }, { "epoch": 38.458, "grad_norm": 1.1521120071411133, "learning_rate": 2e-05, "loss": 0.0500932, "step": 19229 }, { "epoch": 38.46, "grad_norm": 1.0432672500610352, "learning_rate": 2e-05, "loss": 0.0380001, "step": 19230 }, { "epoch": 38.462, "grad_norm": 1.323717474937439, "learning_rate": 2e-05, "loss": 0.04665473, "step": 19231 }, { "epoch": 38.464, "grad_norm": 1.5594381093978882, "learning_rate": 2e-05, "loss": 0.0410453, "step": 19232 }, { "epoch": 38.466, "grad_norm": 1.2671560049057007, "learning_rate": 2e-05, "loss": 0.04604392, "step": 19233 }, { "epoch": 38.468, "grad_norm": 2.227236032485962, "learning_rate": 2e-05, "loss": 0.06509386, "step": 19234 }, { "epoch": 38.47, "grad_norm": 1.537432074546814, "learning_rate": 2e-05, "loss": 0.05568233, "step": 19235 }, { "epoch": 38.472, "grad_norm": 1.411525011062622, "learning_rate": 2e-05, "loss": 0.05239115, "step": 19236 }, { "epoch": 38.474, "grad_norm": 1.2611944675445557, "learning_rate": 2e-05, "loss": 0.03326932, "step": 19237 }, { "epoch": 38.476, "grad_norm": 1.2077014446258545, "learning_rate": 2e-05, "loss": 0.05218486, "step": 19238 }, { "epoch": 38.478, "grad_norm": 1.6436618566513062, "learning_rate": 2e-05, "loss": 0.05138172, "step": 19239 }, { "epoch": 38.48, "grad_norm": 1.2838315963745117, "learning_rate": 2e-05, "loss": 0.04669505, "step": 19240 }, { "epoch": 38.482, "grad_norm": 1.3253170251846313, "learning_rate": 2e-05, "loss": 0.05545774, "step": 19241 }, { "epoch": 38.484, "grad_norm": 1.3036000728607178, "learning_rate": 2e-05, "loss": 0.05103177, "step": 19242 }, { "epoch": 38.486, "grad_norm": 1.1623722314834595, "learning_rate": 2e-05, "loss": 0.04469373, "step": 19243 }, { "epoch": 38.488, "grad_norm": 1.3823708295822144, "learning_rate": 2e-05, "loss": 0.05241356, "step": 19244 }, { "epoch": 38.49, "grad_norm": 1.8385578393936157, "learning_rate": 2e-05, "loss": 0.05396533, "step": 19245 }, { "epoch": 38.492, "grad_norm": 1.2699154615402222, "learning_rate": 2e-05, "loss": 0.05115933, "step": 19246 }, { "epoch": 38.494, "grad_norm": 1.23024582862854, "learning_rate": 2e-05, "loss": 0.03833042, "step": 19247 }, { "epoch": 38.496, "grad_norm": 1.3315479755401611, "learning_rate": 2e-05, "loss": 0.0449665, "step": 19248 }, { "epoch": 38.498, "grad_norm": 1.5424014329910278, "learning_rate": 2e-05, "loss": 0.04347795, "step": 19249 }, { "epoch": 38.5, "grad_norm": 1.245294213294983, "learning_rate": 2e-05, "loss": 0.05182352, "step": 19250 }, { "epoch": 38.502, "grad_norm": 2.529345750808716, "learning_rate": 2e-05, "loss": 0.04279209, "step": 19251 }, { "epoch": 38.504, "grad_norm": 1.083033800125122, "learning_rate": 2e-05, "loss": 0.04436203, "step": 19252 }, { "epoch": 38.506, "grad_norm": 1.3808947801589966, "learning_rate": 2e-05, "loss": 0.06541286, "step": 19253 }, { "epoch": 38.508, "grad_norm": 1.0596139430999756, "learning_rate": 2e-05, "loss": 0.03044969, "step": 19254 }, { "epoch": 38.51, "grad_norm": 1.435849666595459, "learning_rate": 2e-05, "loss": 0.06559452, "step": 19255 }, { "epoch": 38.512, "grad_norm": 1.3231841325759888, "learning_rate": 2e-05, "loss": 0.05220275, "step": 19256 }, { "epoch": 38.514, "grad_norm": 1.2284640073776245, "learning_rate": 2e-05, "loss": 0.05421726, "step": 19257 }, { "epoch": 38.516, "grad_norm": 1.0423113107681274, "learning_rate": 2e-05, "loss": 0.0366753, "step": 19258 }, { "epoch": 38.518, "grad_norm": 1.5633624792099, "learning_rate": 2e-05, "loss": 0.05434642, "step": 19259 }, { "epoch": 38.52, "grad_norm": 2.032729148864746, "learning_rate": 2e-05, "loss": 0.04470177, "step": 19260 }, { "epoch": 38.522, "grad_norm": 1.3375166654586792, "learning_rate": 2e-05, "loss": 0.04499324, "step": 19261 }, { "epoch": 38.524, "grad_norm": 1.552940011024475, "learning_rate": 2e-05, "loss": 0.05302621, "step": 19262 }, { "epoch": 38.526, "grad_norm": 1.896203637123108, "learning_rate": 2e-05, "loss": 0.05639236, "step": 19263 }, { "epoch": 38.528, "grad_norm": 1.3673580884933472, "learning_rate": 2e-05, "loss": 0.05501523, "step": 19264 }, { "epoch": 38.53, "grad_norm": 1.3088792562484741, "learning_rate": 2e-05, "loss": 0.04335637, "step": 19265 }, { "epoch": 38.532, "grad_norm": 1.1292967796325684, "learning_rate": 2e-05, "loss": 0.04752572, "step": 19266 }, { "epoch": 38.534, "grad_norm": 1.1001681089401245, "learning_rate": 2e-05, "loss": 0.04105626, "step": 19267 }, { "epoch": 38.536, "grad_norm": 1.1444545984268188, "learning_rate": 2e-05, "loss": 0.03688351, "step": 19268 }, { "epoch": 38.538, "grad_norm": 1.626502275466919, "learning_rate": 2e-05, "loss": 0.0603931, "step": 19269 }, { "epoch": 38.54, "grad_norm": 2.7609822750091553, "learning_rate": 2e-05, "loss": 0.04431499, "step": 19270 }, { "epoch": 38.542, "grad_norm": 1.9874058961868286, "learning_rate": 2e-05, "loss": 0.04629558, "step": 19271 }, { "epoch": 38.544, "grad_norm": 1.3202643394470215, "learning_rate": 2e-05, "loss": 0.0395207, "step": 19272 }, { "epoch": 38.546, "grad_norm": 3.389218807220459, "learning_rate": 2e-05, "loss": 0.03313368, "step": 19273 }, { "epoch": 38.548, "grad_norm": 1.8491859436035156, "learning_rate": 2e-05, "loss": 0.05389631, "step": 19274 }, { "epoch": 38.55, "grad_norm": 1.5536370277404785, "learning_rate": 2e-05, "loss": 0.04745733, "step": 19275 }, { "epoch": 38.552, "grad_norm": 1.2798070907592773, "learning_rate": 2e-05, "loss": 0.04839146, "step": 19276 }, { "epoch": 38.554, "grad_norm": 1.372031807899475, "learning_rate": 2e-05, "loss": 0.04501195, "step": 19277 }, { "epoch": 38.556, "grad_norm": 2.0433499813079834, "learning_rate": 2e-05, "loss": 0.04445722, "step": 19278 }, { "epoch": 38.558, "grad_norm": 2.050704002380371, "learning_rate": 2e-05, "loss": 0.06163099, "step": 19279 }, { "epoch": 38.56, "grad_norm": 1.116773247718811, "learning_rate": 2e-05, "loss": 0.04683295, "step": 19280 }, { "epoch": 38.562, "grad_norm": 1.1566107273101807, "learning_rate": 2e-05, "loss": 0.04381316, "step": 19281 }, { "epoch": 38.564, "grad_norm": 1.1835641860961914, "learning_rate": 2e-05, "loss": 0.05227757, "step": 19282 }, { "epoch": 38.566, "grad_norm": 1.1514893770217896, "learning_rate": 2e-05, "loss": 0.04288551, "step": 19283 }, { "epoch": 38.568, "grad_norm": 0.9600772857666016, "learning_rate": 2e-05, "loss": 0.03298654, "step": 19284 }, { "epoch": 38.57, "grad_norm": 1.2036709785461426, "learning_rate": 2e-05, "loss": 0.050576, "step": 19285 }, { "epoch": 38.572, "grad_norm": 3.4883370399475098, "learning_rate": 2e-05, "loss": 0.05142732, "step": 19286 }, { "epoch": 38.574, "grad_norm": 1.1536023616790771, "learning_rate": 2e-05, "loss": 0.04423029, "step": 19287 }, { "epoch": 38.576, "grad_norm": 1.2535980939865112, "learning_rate": 2e-05, "loss": 0.04615848, "step": 19288 }, { "epoch": 38.578, "grad_norm": 1.3373180627822876, "learning_rate": 2e-05, "loss": 0.0535438, "step": 19289 }, { "epoch": 38.58, "grad_norm": 1.3583929538726807, "learning_rate": 2e-05, "loss": 0.04743036, "step": 19290 }, { "epoch": 38.582, "grad_norm": 1.2381739616394043, "learning_rate": 2e-05, "loss": 0.04024129, "step": 19291 }, { "epoch": 38.584, "grad_norm": 1.3908512592315674, "learning_rate": 2e-05, "loss": 0.03594154, "step": 19292 }, { "epoch": 38.586, "grad_norm": 2.500706911087036, "learning_rate": 2e-05, "loss": 0.0516097, "step": 19293 }, { "epoch": 38.588, "grad_norm": 1.6076672077178955, "learning_rate": 2e-05, "loss": 0.04902358, "step": 19294 }, { "epoch": 38.59, "grad_norm": 1.1396098136901855, "learning_rate": 2e-05, "loss": 0.04243046, "step": 19295 }, { "epoch": 38.592, "grad_norm": 1.1097677946090698, "learning_rate": 2e-05, "loss": 0.04250278, "step": 19296 }, { "epoch": 38.594, "grad_norm": 1.5975714921951294, "learning_rate": 2e-05, "loss": 0.05274806, "step": 19297 }, { "epoch": 38.596, "grad_norm": 1.1447633504867554, "learning_rate": 2e-05, "loss": 0.03151781, "step": 19298 }, { "epoch": 38.598, "grad_norm": 0.938985288143158, "learning_rate": 2e-05, "loss": 0.03843331, "step": 19299 }, { "epoch": 38.6, "grad_norm": 2.130866050720215, "learning_rate": 2e-05, "loss": 0.06860761, "step": 19300 }, { "epoch": 38.602, "grad_norm": 1.0700651407241821, "learning_rate": 2e-05, "loss": 0.03487083, "step": 19301 }, { "epoch": 38.604, "grad_norm": 1.744493007659912, "learning_rate": 2e-05, "loss": 0.06696406, "step": 19302 }, { "epoch": 38.606, "grad_norm": 1.0361813306808472, "learning_rate": 2e-05, "loss": 0.04734627, "step": 19303 }, { "epoch": 38.608, "grad_norm": 2.7028353214263916, "learning_rate": 2e-05, "loss": 0.0526311, "step": 19304 }, { "epoch": 38.61, "grad_norm": 1.9693485498428345, "learning_rate": 2e-05, "loss": 0.04218089, "step": 19305 }, { "epoch": 38.612, "grad_norm": 1.2493609189987183, "learning_rate": 2e-05, "loss": 0.05733298, "step": 19306 }, { "epoch": 38.614, "grad_norm": 1.406848669052124, "learning_rate": 2e-05, "loss": 0.04433504, "step": 19307 }, { "epoch": 38.616, "grad_norm": 1.199169397354126, "learning_rate": 2e-05, "loss": 0.04781777, "step": 19308 }, { "epoch": 38.618, "grad_norm": 1.0775588750839233, "learning_rate": 2e-05, "loss": 0.0368211, "step": 19309 }, { "epoch": 38.62, "grad_norm": 1.0789852142333984, "learning_rate": 2e-05, "loss": 0.05344281, "step": 19310 }, { "epoch": 38.622, "grad_norm": 1.7068625688552856, "learning_rate": 2e-05, "loss": 0.03457305, "step": 19311 }, { "epoch": 38.624, "grad_norm": 1.6094670295715332, "learning_rate": 2e-05, "loss": 0.04848836, "step": 19312 }, { "epoch": 38.626, "grad_norm": 1.0214130878448486, "learning_rate": 2e-05, "loss": 0.03269782, "step": 19313 }, { "epoch": 38.628, "grad_norm": 2.0523335933685303, "learning_rate": 2e-05, "loss": 0.04664942, "step": 19314 }, { "epoch": 38.63, "grad_norm": 1.2456196546554565, "learning_rate": 2e-05, "loss": 0.03976301, "step": 19315 }, { "epoch": 38.632, "grad_norm": 1.076731562614441, "learning_rate": 2e-05, "loss": 0.03696981, "step": 19316 }, { "epoch": 38.634, "grad_norm": 1.4856189489364624, "learning_rate": 2e-05, "loss": 0.05270289, "step": 19317 }, { "epoch": 38.636, "grad_norm": 1.0082216262817383, "learning_rate": 2e-05, "loss": 0.03996618, "step": 19318 }, { "epoch": 38.638, "grad_norm": 1.2000486850738525, "learning_rate": 2e-05, "loss": 0.05204409, "step": 19319 }, { "epoch": 38.64, "grad_norm": 1.0704103708267212, "learning_rate": 2e-05, "loss": 0.04252416, "step": 19320 }, { "epoch": 38.642, "grad_norm": 1.4279651641845703, "learning_rate": 2e-05, "loss": 0.05704008, "step": 19321 }, { "epoch": 38.644, "grad_norm": 1.1782804727554321, "learning_rate": 2e-05, "loss": 0.04499515, "step": 19322 }, { "epoch": 38.646, "grad_norm": 1.00314199924469, "learning_rate": 2e-05, "loss": 0.03433149, "step": 19323 }, { "epoch": 38.648, "grad_norm": 1.2116609811782837, "learning_rate": 2e-05, "loss": 0.04214825, "step": 19324 }, { "epoch": 38.65, "grad_norm": 1.176286220550537, "learning_rate": 2e-05, "loss": 0.04334562, "step": 19325 }, { "epoch": 38.652, "grad_norm": 1.6107664108276367, "learning_rate": 2e-05, "loss": 0.04943176, "step": 19326 }, { "epoch": 38.654, "grad_norm": 1.3699370622634888, "learning_rate": 2e-05, "loss": 0.05509346, "step": 19327 }, { "epoch": 38.656, "grad_norm": 2.1541030406951904, "learning_rate": 2e-05, "loss": 0.0614479, "step": 19328 }, { "epoch": 38.658, "grad_norm": 1.3070214986801147, "learning_rate": 2e-05, "loss": 0.04495452, "step": 19329 }, { "epoch": 38.66, "grad_norm": 1.4150477647781372, "learning_rate": 2e-05, "loss": 0.04712527, "step": 19330 }, { "epoch": 38.662, "grad_norm": 2.080083131790161, "learning_rate": 2e-05, "loss": 0.04044828, "step": 19331 }, { "epoch": 38.664, "grad_norm": 1.225448489189148, "learning_rate": 2e-05, "loss": 0.05349731, "step": 19332 }, { "epoch": 38.666, "grad_norm": 1.4030938148498535, "learning_rate": 2e-05, "loss": 0.04746981, "step": 19333 }, { "epoch": 38.668, "grad_norm": 1.162904143333435, "learning_rate": 2e-05, "loss": 0.05157799, "step": 19334 }, { "epoch": 38.67, "grad_norm": 1.4816622734069824, "learning_rate": 2e-05, "loss": 0.04534583, "step": 19335 }, { "epoch": 38.672, "grad_norm": 3.349114179611206, "learning_rate": 2e-05, "loss": 0.04592524, "step": 19336 }, { "epoch": 38.674, "grad_norm": 1.2360336780548096, "learning_rate": 2e-05, "loss": 0.04279222, "step": 19337 }, { "epoch": 38.676, "grad_norm": 2.137404441833496, "learning_rate": 2e-05, "loss": 0.04810013, "step": 19338 }, { "epoch": 38.678, "grad_norm": 1.7355518341064453, "learning_rate": 2e-05, "loss": 0.0334636, "step": 19339 }, { "epoch": 38.68, "grad_norm": 1.9419608116149902, "learning_rate": 2e-05, "loss": 0.05887587, "step": 19340 }, { "epoch": 38.682, "grad_norm": 1.431899905204773, "learning_rate": 2e-05, "loss": 0.05057277, "step": 19341 }, { "epoch": 38.684, "grad_norm": 1.284608244895935, "learning_rate": 2e-05, "loss": 0.04898845, "step": 19342 }, { "epoch": 38.686, "grad_norm": 1.426267385482788, "learning_rate": 2e-05, "loss": 0.05257843, "step": 19343 }, { "epoch": 38.688, "grad_norm": 1.2423020601272583, "learning_rate": 2e-05, "loss": 0.05175748, "step": 19344 }, { "epoch": 38.69, "grad_norm": 1.3101670742034912, "learning_rate": 2e-05, "loss": 0.04395001, "step": 19345 }, { "epoch": 38.692, "grad_norm": 1.2051000595092773, "learning_rate": 2e-05, "loss": 0.04174311, "step": 19346 }, { "epoch": 38.694, "grad_norm": 1.1244724988937378, "learning_rate": 2e-05, "loss": 0.04402897, "step": 19347 }, { "epoch": 38.696, "grad_norm": 1.2426832914352417, "learning_rate": 2e-05, "loss": 0.04704241, "step": 19348 }, { "epoch": 38.698, "grad_norm": 1.4777288436889648, "learning_rate": 2e-05, "loss": 0.06701204, "step": 19349 }, { "epoch": 38.7, "grad_norm": 2.267500162124634, "learning_rate": 2e-05, "loss": 0.05487394, "step": 19350 }, { "epoch": 38.702, "grad_norm": 1.73554527759552, "learning_rate": 2e-05, "loss": 0.05015863, "step": 19351 }, { "epoch": 38.704, "grad_norm": 2.264845371246338, "learning_rate": 2e-05, "loss": 0.03342777, "step": 19352 }, { "epoch": 38.706, "grad_norm": 1.1378469467163086, "learning_rate": 2e-05, "loss": 0.04409365, "step": 19353 }, { "epoch": 38.708, "grad_norm": 1.3917489051818848, "learning_rate": 2e-05, "loss": 0.04507478, "step": 19354 }, { "epoch": 38.71, "grad_norm": 1.3060683012008667, "learning_rate": 2e-05, "loss": 0.05471382, "step": 19355 }, { "epoch": 38.712, "grad_norm": 1.3260350227355957, "learning_rate": 2e-05, "loss": 0.04912686, "step": 19356 }, { "epoch": 38.714, "grad_norm": 2.3827006816864014, "learning_rate": 2e-05, "loss": 0.05015289, "step": 19357 }, { "epoch": 38.716, "grad_norm": 1.5596778392791748, "learning_rate": 2e-05, "loss": 0.0522569, "step": 19358 }, { "epoch": 38.718, "grad_norm": 2.026498556137085, "learning_rate": 2e-05, "loss": 0.04310253, "step": 19359 }, { "epoch": 38.72, "grad_norm": 1.090510606765747, "learning_rate": 2e-05, "loss": 0.03988282, "step": 19360 }, { "epoch": 38.722, "grad_norm": 1.136027455329895, "learning_rate": 2e-05, "loss": 0.04197805, "step": 19361 }, { "epoch": 38.724, "grad_norm": 1.0448952913284302, "learning_rate": 2e-05, "loss": 0.04863923, "step": 19362 }, { "epoch": 38.726, "grad_norm": 1.3965747356414795, "learning_rate": 2e-05, "loss": 0.05056681, "step": 19363 }, { "epoch": 38.728, "grad_norm": 1.705721378326416, "learning_rate": 2e-05, "loss": 0.05423497, "step": 19364 }, { "epoch": 38.73, "grad_norm": 2.248636245727539, "learning_rate": 2e-05, "loss": 0.06463277, "step": 19365 }, { "epoch": 38.732, "grad_norm": 1.4248642921447754, "learning_rate": 2e-05, "loss": 0.04670746, "step": 19366 }, { "epoch": 38.734, "grad_norm": 1.1761763095855713, "learning_rate": 2e-05, "loss": 0.05271841, "step": 19367 }, { "epoch": 38.736, "grad_norm": 1.2061562538146973, "learning_rate": 2e-05, "loss": 0.04730795, "step": 19368 }, { "epoch": 38.738, "grad_norm": 1.3923081159591675, "learning_rate": 2e-05, "loss": 0.05840427, "step": 19369 }, { "epoch": 38.74, "grad_norm": 1.250772476196289, "learning_rate": 2e-05, "loss": 0.04998203, "step": 19370 }, { "epoch": 38.742, "grad_norm": 1.2058557271957397, "learning_rate": 2e-05, "loss": 0.04659589, "step": 19371 }, { "epoch": 38.744, "grad_norm": 1.7007390260696411, "learning_rate": 2e-05, "loss": 0.05928213, "step": 19372 }, { "epoch": 38.746, "grad_norm": 2.651254415512085, "learning_rate": 2e-05, "loss": 0.05975854, "step": 19373 }, { "epoch": 38.748, "grad_norm": 3.1047842502593994, "learning_rate": 2e-05, "loss": 0.06387729, "step": 19374 }, { "epoch": 38.75, "grad_norm": 5.55999755859375, "learning_rate": 2e-05, "loss": 0.06207875, "step": 19375 }, { "epoch": 38.752, "grad_norm": 1.1087514162063599, "learning_rate": 2e-05, "loss": 0.03848689, "step": 19376 }, { "epoch": 38.754, "grad_norm": 1.1218056678771973, "learning_rate": 2e-05, "loss": 0.045539, "step": 19377 }, { "epoch": 38.756, "grad_norm": 1.1395851373672485, "learning_rate": 2e-05, "loss": 0.05567363, "step": 19378 }, { "epoch": 38.758, "grad_norm": 1.1848268508911133, "learning_rate": 2e-05, "loss": 0.04403425, "step": 19379 }, { "epoch": 38.76, "grad_norm": 1.0772857666015625, "learning_rate": 2e-05, "loss": 0.04982436, "step": 19380 }, { "epoch": 38.762, "grad_norm": 1.0320247411727905, "learning_rate": 2e-05, "loss": 0.03512707, "step": 19381 }, { "epoch": 38.764, "grad_norm": 1.3068004846572876, "learning_rate": 2e-05, "loss": 0.04124657, "step": 19382 }, { "epoch": 38.766, "grad_norm": 1.5414135456085205, "learning_rate": 2e-05, "loss": 0.05403698, "step": 19383 }, { "epoch": 38.768, "grad_norm": 1.0777575969696045, "learning_rate": 2e-05, "loss": 0.05617758, "step": 19384 }, { "epoch": 38.77, "grad_norm": 1.1199369430541992, "learning_rate": 2e-05, "loss": 0.04268428, "step": 19385 }, { "epoch": 38.772, "grad_norm": 1.1600946187973022, "learning_rate": 2e-05, "loss": 0.0405513, "step": 19386 }, { "epoch": 38.774, "grad_norm": 3.984130382537842, "learning_rate": 2e-05, "loss": 0.05990764, "step": 19387 }, { "epoch": 38.776, "grad_norm": 1.1094694137573242, "learning_rate": 2e-05, "loss": 0.03420043, "step": 19388 }, { "epoch": 38.778, "grad_norm": 2.123666286468506, "learning_rate": 2e-05, "loss": 0.05570301, "step": 19389 }, { "epoch": 38.78, "grad_norm": 1.096800446510315, "learning_rate": 2e-05, "loss": 0.04737834, "step": 19390 }, { "epoch": 38.782, "grad_norm": 1.0921696424484253, "learning_rate": 2e-05, "loss": 0.04886656, "step": 19391 }, { "epoch": 38.784, "grad_norm": 1.10837984085083, "learning_rate": 2e-05, "loss": 0.03572984, "step": 19392 }, { "epoch": 38.786, "grad_norm": 0.9516921639442444, "learning_rate": 2e-05, "loss": 0.03601223, "step": 19393 }, { "epoch": 38.788, "grad_norm": 1.0108002424240112, "learning_rate": 2e-05, "loss": 0.04009066, "step": 19394 }, { "epoch": 38.79, "grad_norm": 1.7825038433074951, "learning_rate": 2e-05, "loss": 0.05417824, "step": 19395 }, { "epoch": 38.792, "grad_norm": 1.7519702911376953, "learning_rate": 2e-05, "loss": 0.05254176, "step": 19396 }, { "epoch": 38.794, "grad_norm": 1.3303686380386353, "learning_rate": 2e-05, "loss": 0.05503142, "step": 19397 }, { "epoch": 38.796, "grad_norm": 1.049363136291504, "learning_rate": 2e-05, "loss": 0.0418441, "step": 19398 }, { "epoch": 38.798, "grad_norm": 1.388549566268921, "learning_rate": 2e-05, "loss": 0.04791044, "step": 19399 }, { "epoch": 38.8, "grad_norm": 1.4452887773513794, "learning_rate": 2e-05, "loss": 0.04734738, "step": 19400 }, { "epoch": 38.802, "grad_norm": 1.2454729080200195, "learning_rate": 2e-05, "loss": 0.05771025, "step": 19401 }, { "epoch": 38.804, "grad_norm": 1.1745954751968384, "learning_rate": 2e-05, "loss": 0.04313898, "step": 19402 }, { "epoch": 38.806, "grad_norm": 1.3334699869155884, "learning_rate": 2e-05, "loss": 0.06793031, "step": 19403 }, { "epoch": 38.808, "grad_norm": 1.42832350730896, "learning_rate": 2e-05, "loss": 0.03323733, "step": 19404 }, { "epoch": 38.81, "grad_norm": 1.0687870979309082, "learning_rate": 2e-05, "loss": 0.0360961, "step": 19405 }, { "epoch": 38.812, "grad_norm": 1.741623878479004, "learning_rate": 2e-05, "loss": 0.06625956, "step": 19406 }, { "epoch": 38.814, "grad_norm": 1.6357783079147339, "learning_rate": 2e-05, "loss": 0.05045918, "step": 19407 }, { "epoch": 38.816, "grad_norm": 1.15137779712677, "learning_rate": 2e-05, "loss": 0.03751816, "step": 19408 }, { "epoch": 38.818, "grad_norm": 1.2414789199829102, "learning_rate": 2e-05, "loss": 0.04433636, "step": 19409 }, { "epoch": 38.82, "grad_norm": 1.303000569343567, "learning_rate": 2e-05, "loss": 0.05153903, "step": 19410 }, { "epoch": 38.822, "grad_norm": 2.2832419872283936, "learning_rate": 2e-05, "loss": 0.05765641, "step": 19411 }, { "epoch": 38.824, "grad_norm": 1.1722428798675537, "learning_rate": 2e-05, "loss": 0.04241635, "step": 19412 }, { "epoch": 38.826, "grad_norm": 1.613328218460083, "learning_rate": 2e-05, "loss": 0.03948274, "step": 19413 }, { "epoch": 38.828, "grad_norm": 1.2444097995758057, "learning_rate": 2e-05, "loss": 0.04656627, "step": 19414 }, { "epoch": 38.83, "grad_norm": 1.2532914876937866, "learning_rate": 2e-05, "loss": 0.0406023, "step": 19415 }, { "epoch": 38.832, "grad_norm": 1.4562067985534668, "learning_rate": 2e-05, "loss": 0.06386394, "step": 19416 }, { "epoch": 38.834, "grad_norm": 1.2982302904129028, "learning_rate": 2e-05, "loss": 0.04366529, "step": 19417 }, { "epoch": 38.836, "grad_norm": 1.3933048248291016, "learning_rate": 2e-05, "loss": 0.05839304, "step": 19418 }, { "epoch": 38.838, "grad_norm": 1.2442266941070557, "learning_rate": 2e-05, "loss": 0.04477119, "step": 19419 }, { "epoch": 38.84, "grad_norm": 1.2520731687545776, "learning_rate": 2e-05, "loss": 0.05134234, "step": 19420 }, { "epoch": 38.842, "grad_norm": 1.3517646789550781, "learning_rate": 2e-05, "loss": 0.04959063, "step": 19421 }, { "epoch": 38.844, "grad_norm": 1.1629074811935425, "learning_rate": 2e-05, "loss": 0.04994313, "step": 19422 }, { "epoch": 38.846, "grad_norm": 4.365392684936523, "learning_rate": 2e-05, "loss": 0.0617961, "step": 19423 }, { "epoch": 38.848, "grad_norm": 2.191051959991455, "learning_rate": 2e-05, "loss": 0.04884791, "step": 19424 }, { "epoch": 38.85, "grad_norm": 1.9126558303833008, "learning_rate": 2e-05, "loss": 0.04287531, "step": 19425 }, { "epoch": 38.852, "grad_norm": 1.3821977376937866, "learning_rate": 2e-05, "loss": 0.0512047, "step": 19426 }, { "epoch": 38.854, "grad_norm": 2.440401554107666, "learning_rate": 2e-05, "loss": 0.06993075, "step": 19427 }, { "epoch": 38.856, "grad_norm": 1.3591678142547607, "learning_rate": 2e-05, "loss": 0.05418681, "step": 19428 }, { "epoch": 38.858, "grad_norm": 1.1129034757614136, "learning_rate": 2e-05, "loss": 0.04099159, "step": 19429 }, { "epoch": 38.86, "grad_norm": 0.9613949656486511, "learning_rate": 2e-05, "loss": 0.02941245, "step": 19430 }, { "epoch": 38.862, "grad_norm": 0.8730504512786865, "learning_rate": 2e-05, "loss": 0.02738763, "step": 19431 }, { "epoch": 38.864, "grad_norm": 1.091715931892395, "learning_rate": 2e-05, "loss": 0.03717642, "step": 19432 }, { "epoch": 38.866, "grad_norm": 1.1144849061965942, "learning_rate": 2e-05, "loss": 0.04412191, "step": 19433 }, { "epoch": 38.868, "grad_norm": 1.2194790840148926, "learning_rate": 2e-05, "loss": 0.04663887, "step": 19434 }, { "epoch": 38.87, "grad_norm": 1.3206437826156616, "learning_rate": 2e-05, "loss": 0.05822754, "step": 19435 }, { "epoch": 38.872, "grad_norm": 1.0756211280822754, "learning_rate": 2e-05, "loss": 0.03367351, "step": 19436 }, { "epoch": 38.874, "grad_norm": 1.2603015899658203, "learning_rate": 2e-05, "loss": 0.04884558, "step": 19437 }, { "epoch": 38.876, "grad_norm": 1.3238136768341064, "learning_rate": 2e-05, "loss": 0.0366739, "step": 19438 }, { "epoch": 38.878, "grad_norm": 1.2530521154403687, "learning_rate": 2e-05, "loss": 0.05306315, "step": 19439 }, { "epoch": 38.88, "grad_norm": 1.3359322547912598, "learning_rate": 2e-05, "loss": 0.04838687, "step": 19440 }, { "epoch": 38.882, "grad_norm": 1.201737880706787, "learning_rate": 2e-05, "loss": 0.05184001, "step": 19441 }, { "epoch": 38.884, "grad_norm": 1.2076383829116821, "learning_rate": 2e-05, "loss": 0.04534627, "step": 19442 }, { "epoch": 38.886, "grad_norm": 0.982481062412262, "learning_rate": 2e-05, "loss": 0.03846693, "step": 19443 }, { "epoch": 38.888, "grad_norm": 1.109921932220459, "learning_rate": 2e-05, "loss": 0.04263131, "step": 19444 }, { "epoch": 38.89, "grad_norm": 1.115616798400879, "learning_rate": 2e-05, "loss": 0.05047696, "step": 19445 }, { "epoch": 38.892, "grad_norm": 1.1365337371826172, "learning_rate": 2e-05, "loss": 0.0466363, "step": 19446 }, { "epoch": 38.894, "grad_norm": 1.30055832862854, "learning_rate": 2e-05, "loss": 0.06107081, "step": 19447 }, { "epoch": 38.896, "grad_norm": 1.2148528099060059, "learning_rate": 2e-05, "loss": 0.04618006, "step": 19448 }, { "epoch": 38.898, "grad_norm": 1.095479130744934, "learning_rate": 2e-05, "loss": 0.04486157, "step": 19449 }, { "epoch": 38.9, "grad_norm": 1.2125225067138672, "learning_rate": 2e-05, "loss": 0.04702915, "step": 19450 }, { "epoch": 38.902, "grad_norm": 1.1719056367874146, "learning_rate": 2e-05, "loss": 0.05031497, "step": 19451 }, { "epoch": 38.904, "grad_norm": 1.9016114473342896, "learning_rate": 2e-05, "loss": 0.06579206, "step": 19452 }, { "epoch": 38.906, "grad_norm": 2.084883213043213, "learning_rate": 2e-05, "loss": 0.04503134, "step": 19453 }, { "epoch": 38.908, "grad_norm": 3.5525739192962646, "learning_rate": 2e-05, "loss": 0.06399915, "step": 19454 }, { "epoch": 38.91, "grad_norm": 1.238961935043335, "learning_rate": 2e-05, "loss": 0.05072328, "step": 19455 }, { "epoch": 38.912, "grad_norm": 1.1139464378356934, "learning_rate": 2e-05, "loss": 0.05063874, "step": 19456 }, { "epoch": 38.914, "grad_norm": 1.591186285018921, "learning_rate": 2e-05, "loss": 0.04680803, "step": 19457 }, { "epoch": 38.916, "grad_norm": 1.3666948080062866, "learning_rate": 2e-05, "loss": 0.05156454, "step": 19458 }, { "epoch": 38.918, "grad_norm": 1.5260530710220337, "learning_rate": 2e-05, "loss": 0.07067595, "step": 19459 }, { "epoch": 38.92, "grad_norm": 1.2484623193740845, "learning_rate": 2e-05, "loss": 0.03980666, "step": 19460 }, { "epoch": 38.922, "grad_norm": 1.2105351686477661, "learning_rate": 2e-05, "loss": 0.05022258, "step": 19461 }, { "epoch": 38.924, "grad_norm": 0.9624425172805786, "learning_rate": 2e-05, "loss": 0.02262569, "step": 19462 }, { "epoch": 38.926, "grad_norm": 1.1966172456741333, "learning_rate": 2e-05, "loss": 0.04677077, "step": 19463 }, { "epoch": 38.928, "grad_norm": 2.7761287689208984, "learning_rate": 2e-05, "loss": 0.04095355, "step": 19464 }, { "epoch": 38.93, "grad_norm": 1.3439223766326904, "learning_rate": 2e-05, "loss": 0.05076102, "step": 19465 }, { "epoch": 38.932, "grad_norm": 1.281233310699463, "learning_rate": 2e-05, "loss": 0.05176672, "step": 19466 }, { "epoch": 38.934, "grad_norm": 1.2068181037902832, "learning_rate": 2e-05, "loss": 0.03931053, "step": 19467 }, { "epoch": 38.936, "grad_norm": 2.3821423053741455, "learning_rate": 2e-05, "loss": 0.05391637, "step": 19468 }, { "epoch": 38.938, "grad_norm": 0.8565077781677246, "learning_rate": 2e-05, "loss": 0.02748447, "step": 19469 }, { "epoch": 38.94, "grad_norm": 1.593507170677185, "learning_rate": 2e-05, "loss": 0.05532858, "step": 19470 }, { "epoch": 38.942, "grad_norm": 1.2847896814346313, "learning_rate": 2e-05, "loss": 0.052665, "step": 19471 }, { "epoch": 38.944, "grad_norm": 3.365082263946533, "learning_rate": 2e-05, "loss": 0.04390424, "step": 19472 }, { "epoch": 38.946, "grad_norm": 1.228043556213379, "learning_rate": 2e-05, "loss": 0.03701095, "step": 19473 }, { "epoch": 38.948, "grad_norm": 1.2872579097747803, "learning_rate": 2e-05, "loss": 0.06041116, "step": 19474 }, { "epoch": 38.95, "grad_norm": 5.743537902832031, "learning_rate": 2e-05, "loss": 0.05942594, "step": 19475 }, { "epoch": 38.952, "grad_norm": 1.4819161891937256, "learning_rate": 2e-05, "loss": 0.05315714, "step": 19476 }, { "epoch": 38.954, "grad_norm": 4.263488292694092, "learning_rate": 2e-05, "loss": 0.05684969, "step": 19477 }, { "epoch": 38.956, "grad_norm": 1.293532133102417, "learning_rate": 2e-05, "loss": 0.05474854, "step": 19478 }, { "epoch": 38.958, "grad_norm": 1.5321731567382812, "learning_rate": 2e-05, "loss": 0.0521361, "step": 19479 }, { "epoch": 38.96, "grad_norm": 1.5575649738311768, "learning_rate": 2e-05, "loss": 0.0335691, "step": 19480 }, { "epoch": 38.962, "grad_norm": 1.2537888288497925, "learning_rate": 2e-05, "loss": 0.04636952, "step": 19481 }, { "epoch": 38.964, "grad_norm": 1.7497940063476562, "learning_rate": 2e-05, "loss": 0.06122085, "step": 19482 }, { "epoch": 38.966, "grad_norm": 0.9865143299102783, "learning_rate": 2e-05, "loss": 0.03592449, "step": 19483 }, { "epoch": 38.968, "grad_norm": 1.0101889371871948, "learning_rate": 2e-05, "loss": 0.04233185, "step": 19484 }, { "epoch": 38.97, "grad_norm": 1.1617642641067505, "learning_rate": 2e-05, "loss": 0.04329985, "step": 19485 }, { "epoch": 38.972, "grad_norm": 1.1280710697174072, "learning_rate": 2e-05, "loss": 0.03594364, "step": 19486 }, { "epoch": 38.974, "grad_norm": 1.4211704730987549, "learning_rate": 2e-05, "loss": 0.06367813, "step": 19487 }, { "epoch": 38.976, "grad_norm": 1.0155389308929443, "learning_rate": 2e-05, "loss": 0.04221959, "step": 19488 }, { "epoch": 38.978, "grad_norm": 0.9778318405151367, "learning_rate": 2e-05, "loss": 0.0305156, "step": 19489 }, { "epoch": 38.98, "grad_norm": 1.2039105892181396, "learning_rate": 2e-05, "loss": 0.04294903, "step": 19490 }, { "epoch": 38.982, "grad_norm": 0.9735546112060547, "learning_rate": 2e-05, "loss": 0.03935603, "step": 19491 }, { "epoch": 38.984, "grad_norm": 2.8653101921081543, "learning_rate": 2e-05, "loss": 0.05370308, "step": 19492 }, { "epoch": 38.986, "grad_norm": 1.1377127170562744, "learning_rate": 2e-05, "loss": 0.04588469, "step": 19493 }, { "epoch": 38.988, "grad_norm": 1.1004186868667603, "learning_rate": 2e-05, "loss": 0.05202518, "step": 19494 }, { "epoch": 38.99, "grad_norm": 1.7122163772583008, "learning_rate": 2e-05, "loss": 0.03624396, "step": 19495 }, { "epoch": 38.992, "grad_norm": 2.000995635986328, "learning_rate": 2e-05, "loss": 0.04281428, "step": 19496 }, { "epoch": 38.994, "grad_norm": 1.3111525774002075, "learning_rate": 2e-05, "loss": 0.05325833, "step": 19497 }, { "epoch": 38.996, "grad_norm": 1.5315814018249512, "learning_rate": 2e-05, "loss": 0.06504035, "step": 19498 }, { "epoch": 38.998, "grad_norm": 1.1684447526931763, "learning_rate": 2e-05, "loss": 0.04492722, "step": 19499 }, { "epoch": 39.0, "grad_norm": 1.3859971761703491, "learning_rate": 2e-05, "loss": 0.06541097, "step": 19500 }, { "epoch": 39.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9920159680638723, "Equal_1": 1.0, "Equal_2": 0.9860279441117764, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 0.996, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8847695390781564, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9972666666666666, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 320.8325, "eval_samples_per_second": 32.727, "eval_steps_per_second": 0.655, "step": 19500 }, { "epoch": 39.002, "grad_norm": 6.220664024353027, "learning_rate": 2e-05, "loss": 0.04667213, "step": 19501 }, { "epoch": 39.004, "grad_norm": 1.135877013206482, "learning_rate": 2e-05, "loss": 0.03528913, "step": 19502 }, { "epoch": 39.006, "grad_norm": 1.918533205986023, "learning_rate": 2e-05, "loss": 0.05770627, "step": 19503 }, { "epoch": 39.008, "grad_norm": 1.3888102769851685, "learning_rate": 2e-05, "loss": 0.05147287, "step": 19504 }, { "epoch": 39.01, "grad_norm": 1.2796348333358765, "learning_rate": 2e-05, "loss": 0.04314057, "step": 19505 }, { "epoch": 39.012, "grad_norm": 1.2352646589279175, "learning_rate": 2e-05, "loss": 0.04238914, "step": 19506 }, { "epoch": 39.014, "grad_norm": 1.5327287912368774, "learning_rate": 2e-05, "loss": 0.06135549, "step": 19507 }, { "epoch": 39.016, "grad_norm": 1.9634387493133545, "learning_rate": 2e-05, "loss": 0.04630491, "step": 19508 }, { "epoch": 39.018, "grad_norm": 1.3204143047332764, "learning_rate": 2e-05, "loss": 0.04790439, "step": 19509 }, { "epoch": 39.02, "grad_norm": 1.5740340948104858, "learning_rate": 2e-05, "loss": 0.04070012, "step": 19510 }, { "epoch": 39.022, "grad_norm": 1.3499553203582764, "learning_rate": 2e-05, "loss": 0.05047411, "step": 19511 }, { "epoch": 39.024, "grad_norm": 0.9350709915161133, "learning_rate": 2e-05, "loss": 0.02976116, "step": 19512 }, { "epoch": 39.026, "grad_norm": 1.0683033466339111, "learning_rate": 2e-05, "loss": 0.03930287, "step": 19513 }, { "epoch": 39.028, "grad_norm": 1.1646449565887451, "learning_rate": 2e-05, "loss": 0.04475568, "step": 19514 }, { "epoch": 39.03, "grad_norm": 1.3507591485977173, "learning_rate": 2e-05, "loss": 0.05256594, "step": 19515 }, { "epoch": 39.032, "grad_norm": 1.2592138051986694, "learning_rate": 2e-05, "loss": 0.04029159, "step": 19516 }, { "epoch": 39.034, "grad_norm": 1.0979448556900024, "learning_rate": 2e-05, "loss": 0.05017549, "step": 19517 }, { "epoch": 39.036, "grad_norm": 1.0506963729858398, "learning_rate": 2e-05, "loss": 0.04544584, "step": 19518 }, { "epoch": 39.038, "grad_norm": 1.2208514213562012, "learning_rate": 2e-05, "loss": 0.04804792, "step": 19519 }, { "epoch": 39.04, "grad_norm": 1.086912989616394, "learning_rate": 2e-05, "loss": 0.04290297, "step": 19520 }, { "epoch": 39.042, "grad_norm": 2.351929187774658, "learning_rate": 2e-05, "loss": 0.0460132, "step": 19521 }, { "epoch": 39.044, "grad_norm": 1.1975494623184204, "learning_rate": 2e-05, "loss": 0.04094232, "step": 19522 }, { "epoch": 39.046, "grad_norm": 1.4654606580734253, "learning_rate": 2e-05, "loss": 0.0614688, "step": 19523 }, { "epoch": 39.048, "grad_norm": 1.3602313995361328, "learning_rate": 2e-05, "loss": 0.05025218, "step": 19524 }, { "epoch": 39.05, "grad_norm": 1.1130294799804688, "learning_rate": 2e-05, "loss": 0.03360223, "step": 19525 }, { "epoch": 39.052, "grad_norm": 1.3653548955917358, "learning_rate": 2e-05, "loss": 0.0470277, "step": 19526 }, { "epoch": 39.054, "grad_norm": 1.0190237760543823, "learning_rate": 2e-05, "loss": 0.03658837, "step": 19527 }, { "epoch": 39.056, "grad_norm": 1.2680890560150146, "learning_rate": 2e-05, "loss": 0.04371505, "step": 19528 }, { "epoch": 39.058, "grad_norm": 1.6253620386123657, "learning_rate": 2e-05, "loss": 0.05293868, "step": 19529 }, { "epoch": 39.06, "grad_norm": 1.2560430765151978, "learning_rate": 2e-05, "loss": 0.03810944, "step": 19530 }, { "epoch": 39.062, "grad_norm": 1.2208555936813354, "learning_rate": 2e-05, "loss": 0.04190925, "step": 19531 }, { "epoch": 39.064, "grad_norm": 1.098758578300476, "learning_rate": 2e-05, "loss": 0.03760337, "step": 19532 }, { "epoch": 39.066, "grad_norm": 1.0823376178741455, "learning_rate": 2e-05, "loss": 0.040763, "step": 19533 }, { "epoch": 39.068, "grad_norm": 1.1920530796051025, "learning_rate": 2e-05, "loss": 0.03933562, "step": 19534 }, { "epoch": 39.07, "grad_norm": 1.3417820930480957, "learning_rate": 2e-05, "loss": 0.07266225, "step": 19535 }, { "epoch": 39.072, "grad_norm": 1.156474232673645, "learning_rate": 2e-05, "loss": 0.03301832, "step": 19536 }, { "epoch": 39.074, "grad_norm": 1.7914104461669922, "learning_rate": 2e-05, "loss": 0.05243705, "step": 19537 }, { "epoch": 39.076, "grad_norm": 1.2239590883255005, "learning_rate": 2e-05, "loss": 0.05815011, "step": 19538 }, { "epoch": 39.078, "grad_norm": 1.9638248682022095, "learning_rate": 2e-05, "loss": 0.058245, "step": 19539 }, { "epoch": 39.08, "grad_norm": 1.095078468322754, "learning_rate": 2e-05, "loss": 0.041117, "step": 19540 }, { "epoch": 39.082, "grad_norm": 1.0802770853042603, "learning_rate": 2e-05, "loss": 0.04389591, "step": 19541 }, { "epoch": 39.084, "grad_norm": 2.124570369720459, "learning_rate": 2e-05, "loss": 0.05145385, "step": 19542 }, { "epoch": 39.086, "grad_norm": 1.1902073621749878, "learning_rate": 2e-05, "loss": 0.03708824, "step": 19543 }, { "epoch": 39.088, "grad_norm": 1.1799651384353638, "learning_rate": 2e-05, "loss": 0.03826425, "step": 19544 }, { "epoch": 39.09, "grad_norm": 2.205336570739746, "learning_rate": 2e-05, "loss": 0.0574959, "step": 19545 }, { "epoch": 39.092, "grad_norm": 1.5711607933044434, "learning_rate": 2e-05, "loss": 0.04823216, "step": 19546 }, { "epoch": 39.094, "grad_norm": 1.6747071743011475, "learning_rate": 2e-05, "loss": 0.063738, "step": 19547 }, { "epoch": 39.096, "grad_norm": 1.288379430770874, "learning_rate": 2e-05, "loss": 0.05128692, "step": 19548 }, { "epoch": 39.098, "grad_norm": 1.3630702495574951, "learning_rate": 2e-05, "loss": 0.04941294, "step": 19549 }, { "epoch": 39.1, "grad_norm": 2.2876434326171875, "learning_rate": 2e-05, "loss": 0.05177325, "step": 19550 }, { "epoch": 39.102, "grad_norm": 1.884543538093567, "learning_rate": 2e-05, "loss": 0.0452019, "step": 19551 }, { "epoch": 39.104, "grad_norm": 2.4394567012786865, "learning_rate": 2e-05, "loss": 0.05596255, "step": 19552 }, { "epoch": 39.106, "grad_norm": 1.3302148580551147, "learning_rate": 2e-05, "loss": 0.04958398, "step": 19553 }, { "epoch": 39.108, "grad_norm": 0.9878301620483398, "learning_rate": 2e-05, "loss": 0.03418873, "step": 19554 }, { "epoch": 39.11, "grad_norm": 1.4796167612075806, "learning_rate": 2e-05, "loss": 0.06076609, "step": 19555 }, { "epoch": 39.112, "grad_norm": 0.9968535304069519, "learning_rate": 2e-05, "loss": 0.0371155, "step": 19556 }, { "epoch": 39.114, "grad_norm": 1.0637849569320679, "learning_rate": 2e-05, "loss": 0.03895561, "step": 19557 }, { "epoch": 39.116, "grad_norm": 1.259835958480835, "learning_rate": 2e-05, "loss": 0.05485725, "step": 19558 }, { "epoch": 39.118, "grad_norm": 1.407067894935608, "learning_rate": 2e-05, "loss": 0.03801053, "step": 19559 }, { "epoch": 39.12, "grad_norm": 1.506779432296753, "learning_rate": 2e-05, "loss": 0.0610331, "step": 19560 }, { "epoch": 39.122, "grad_norm": 1.299699306488037, "learning_rate": 2e-05, "loss": 0.06505786, "step": 19561 }, { "epoch": 39.124, "grad_norm": 1.213064193725586, "learning_rate": 2e-05, "loss": 0.04449017, "step": 19562 }, { "epoch": 39.126, "grad_norm": 1.0915803909301758, "learning_rate": 2e-05, "loss": 0.04768021, "step": 19563 }, { "epoch": 39.128, "grad_norm": 1.2898131608963013, "learning_rate": 2e-05, "loss": 0.06006561, "step": 19564 }, { "epoch": 39.13, "grad_norm": 1.134291410446167, "learning_rate": 2e-05, "loss": 0.04017518, "step": 19565 }, { "epoch": 39.132, "grad_norm": 1.839423656463623, "learning_rate": 2e-05, "loss": 0.05884816, "step": 19566 }, { "epoch": 39.134, "grad_norm": 3.196049451828003, "learning_rate": 2e-05, "loss": 0.0544239, "step": 19567 }, { "epoch": 39.136, "grad_norm": 1.3496533632278442, "learning_rate": 2e-05, "loss": 0.06023031, "step": 19568 }, { "epoch": 39.138, "grad_norm": 1.06864595413208, "learning_rate": 2e-05, "loss": 0.04296235, "step": 19569 }, { "epoch": 39.14, "grad_norm": 0.9733324646949768, "learning_rate": 2e-05, "loss": 0.03551897, "step": 19570 }, { "epoch": 39.142, "grad_norm": 1.19362211227417, "learning_rate": 2e-05, "loss": 0.05036366, "step": 19571 }, { "epoch": 39.144, "grad_norm": 1.0025893449783325, "learning_rate": 2e-05, "loss": 0.0287686, "step": 19572 }, { "epoch": 39.146, "grad_norm": 2.9667563438415527, "learning_rate": 2e-05, "loss": 0.0478669, "step": 19573 }, { "epoch": 39.148, "grad_norm": 1.4983551502227783, "learning_rate": 2e-05, "loss": 0.05278377, "step": 19574 }, { "epoch": 39.15, "grad_norm": 1.3283973932266235, "learning_rate": 2e-05, "loss": 0.05365999, "step": 19575 }, { "epoch": 39.152, "grad_norm": 1.4052914381027222, "learning_rate": 2e-05, "loss": 0.04093612, "step": 19576 }, { "epoch": 39.154, "grad_norm": 1.2109755277633667, "learning_rate": 2e-05, "loss": 0.04671524, "step": 19577 }, { "epoch": 39.156, "grad_norm": 1.4497380256652832, "learning_rate": 2e-05, "loss": 0.05801364, "step": 19578 }, { "epoch": 39.158, "grad_norm": 1.0449851751327515, "learning_rate": 2e-05, "loss": 0.03876752, "step": 19579 }, { "epoch": 39.16, "grad_norm": 0.9883303642272949, "learning_rate": 2e-05, "loss": 0.02868701, "step": 19580 }, { "epoch": 39.162, "grad_norm": 1.0830588340759277, "learning_rate": 2e-05, "loss": 0.03940789, "step": 19581 }, { "epoch": 39.164, "grad_norm": 1.4366869926452637, "learning_rate": 2e-05, "loss": 0.07369938, "step": 19582 }, { "epoch": 39.166, "grad_norm": 1.415928840637207, "learning_rate": 2e-05, "loss": 0.04259369, "step": 19583 }, { "epoch": 39.168, "grad_norm": 1.3500007390975952, "learning_rate": 2e-05, "loss": 0.05315146, "step": 19584 }, { "epoch": 39.17, "grad_norm": 1.1341204643249512, "learning_rate": 2e-05, "loss": 0.03991456, "step": 19585 }, { "epoch": 39.172, "grad_norm": 1.41619074344635, "learning_rate": 2e-05, "loss": 0.05198916, "step": 19586 }, { "epoch": 39.174, "grad_norm": 1.457855224609375, "learning_rate": 2e-05, "loss": 0.06273693, "step": 19587 }, { "epoch": 39.176, "grad_norm": 1.1896886825561523, "learning_rate": 2e-05, "loss": 0.05241989, "step": 19588 }, { "epoch": 39.178, "grad_norm": 1.280698299407959, "learning_rate": 2e-05, "loss": 0.04846141, "step": 19589 }, { "epoch": 39.18, "grad_norm": 1.0273659229278564, "learning_rate": 2e-05, "loss": 0.03391914, "step": 19590 }, { "epoch": 39.182, "grad_norm": 1.2925435304641724, "learning_rate": 2e-05, "loss": 0.05286665, "step": 19591 }, { "epoch": 39.184, "grad_norm": 0.9915168881416321, "learning_rate": 2e-05, "loss": 0.04317682, "step": 19592 }, { "epoch": 39.186, "grad_norm": 1.0538667440414429, "learning_rate": 2e-05, "loss": 0.02936137, "step": 19593 }, { "epoch": 39.188, "grad_norm": 1.22129225730896, "learning_rate": 2e-05, "loss": 0.0674229, "step": 19594 }, { "epoch": 39.19, "grad_norm": 1.3039966821670532, "learning_rate": 2e-05, "loss": 0.04973481, "step": 19595 }, { "epoch": 39.192, "grad_norm": 1.0738667249679565, "learning_rate": 2e-05, "loss": 0.03898337, "step": 19596 }, { "epoch": 39.194, "grad_norm": 1.290103793144226, "learning_rate": 2e-05, "loss": 0.04972884, "step": 19597 }, { "epoch": 39.196, "grad_norm": 1.3575490713119507, "learning_rate": 2e-05, "loss": 0.0455936, "step": 19598 }, { "epoch": 39.198, "grad_norm": 1.1250419616699219, "learning_rate": 2e-05, "loss": 0.03418372, "step": 19599 }, { "epoch": 39.2, "grad_norm": 1.4324772357940674, "learning_rate": 2e-05, "loss": 0.04676975, "step": 19600 }, { "epoch": 39.202, "grad_norm": 1.14113187789917, "learning_rate": 2e-05, "loss": 0.036985, "step": 19601 }, { "epoch": 39.204, "grad_norm": 1.313949704170227, "learning_rate": 2e-05, "loss": 0.05859777, "step": 19602 }, { "epoch": 39.206, "grad_norm": 1.142185926437378, "learning_rate": 2e-05, "loss": 0.04276698, "step": 19603 }, { "epoch": 39.208, "grad_norm": 1.3142091035842896, "learning_rate": 2e-05, "loss": 0.05003233, "step": 19604 }, { "epoch": 39.21, "grad_norm": 1.3124581575393677, "learning_rate": 2e-05, "loss": 0.06136658, "step": 19605 }, { "epoch": 39.212, "grad_norm": 1.2220021486282349, "learning_rate": 2e-05, "loss": 0.04311784, "step": 19606 }, { "epoch": 39.214, "grad_norm": 1.3002612590789795, "learning_rate": 2e-05, "loss": 0.0366911, "step": 19607 }, { "epoch": 39.216, "grad_norm": 1.4152746200561523, "learning_rate": 2e-05, "loss": 0.0430445, "step": 19608 }, { "epoch": 39.218, "grad_norm": 1.1827176809310913, "learning_rate": 2e-05, "loss": 0.04736789, "step": 19609 }, { "epoch": 39.22, "grad_norm": 1.1137902736663818, "learning_rate": 2e-05, "loss": 0.04015935, "step": 19610 }, { "epoch": 39.222, "grad_norm": 1.7880975008010864, "learning_rate": 2e-05, "loss": 0.04855542, "step": 19611 }, { "epoch": 39.224, "grad_norm": 1.182569146156311, "learning_rate": 2e-05, "loss": 0.05796768, "step": 19612 }, { "epoch": 39.226, "grad_norm": 2.2326362133026123, "learning_rate": 2e-05, "loss": 0.04780351, "step": 19613 }, { "epoch": 39.228, "grad_norm": 1.2634199857711792, "learning_rate": 2e-05, "loss": 0.04764343, "step": 19614 }, { "epoch": 39.23, "grad_norm": 1.3403682708740234, "learning_rate": 2e-05, "loss": 0.04632024, "step": 19615 }, { "epoch": 39.232, "grad_norm": 1.009716510772705, "learning_rate": 2e-05, "loss": 0.03490862, "step": 19616 }, { "epoch": 39.234, "grad_norm": 1.03059983253479, "learning_rate": 2e-05, "loss": 0.03645796, "step": 19617 }, { "epoch": 39.236, "grad_norm": 1.0769610404968262, "learning_rate": 2e-05, "loss": 0.03788844, "step": 19618 }, { "epoch": 39.238, "grad_norm": 1.1544963121414185, "learning_rate": 2e-05, "loss": 0.04667365, "step": 19619 }, { "epoch": 39.24, "grad_norm": 1.1700681447982788, "learning_rate": 2e-05, "loss": 0.04401879, "step": 19620 }, { "epoch": 39.242, "grad_norm": 1.196091890335083, "learning_rate": 2e-05, "loss": 0.05693919, "step": 19621 }, { "epoch": 39.244, "grad_norm": 2.257040023803711, "learning_rate": 2e-05, "loss": 0.03902316, "step": 19622 }, { "epoch": 39.246, "grad_norm": 1.00798499584198, "learning_rate": 2e-05, "loss": 0.03313605, "step": 19623 }, { "epoch": 39.248, "grad_norm": 1.4052647352218628, "learning_rate": 2e-05, "loss": 0.05165982, "step": 19624 }, { "epoch": 39.25, "grad_norm": 1.2141923904418945, "learning_rate": 2e-05, "loss": 0.03951096, "step": 19625 }, { "epoch": 39.252, "grad_norm": 1.3865104913711548, "learning_rate": 2e-05, "loss": 0.04825272, "step": 19626 }, { "epoch": 39.254, "grad_norm": 1.102805733680725, "learning_rate": 2e-05, "loss": 0.03778962, "step": 19627 }, { "epoch": 39.256, "grad_norm": 0.9773703813552856, "learning_rate": 2e-05, "loss": 0.03974072, "step": 19628 }, { "epoch": 39.258, "grad_norm": 1.361572027206421, "learning_rate": 2e-05, "loss": 0.03857236, "step": 19629 }, { "epoch": 39.26, "grad_norm": 2.139104127883911, "learning_rate": 2e-05, "loss": 0.05093241, "step": 19630 }, { "epoch": 39.262, "grad_norm": 1.442545771598816, "learning_rate": 2e-05, "loss": 0.04413161, "step": 19631 }, { "epoch": 39.264, "grad_norm": 1.0915734767913818, "learning_rate": 2e-05, "loss": 0.03375326, "step": 19632 }, { "epoch": 39.266, "grad_norm": 1.3909891843795776, "learning_rate": 2e-05, "loss": 0.0558477, "step": 19633 }, { "epoch": 39.268, "grad_norm": 1.2892683744430542, "learning_rate": 2e-05, "loss": 0.05482936, "step": 19634 }, { "epoch": 39.27, "grad_norm": 1.4405442476272583, "learning_rate": 2e-05, "loss": 0.05377769, "step": 19635 }, { "epoch": 39.272, "grad_norm": 1.9378975629806519, "learning_rate": 2e-05, "loss": 0.04612908, "step": 19636 }, { "epoch": 39.274, "grad_norm": 0.9840026497840881, "learning_rate": 2e-05, "loss": 0.04712234, "step": 19637 }, { "epoch": 39.276, "grad_norm": 1.2211089134216309, "learning_rate": 2e-05, "loss": 0.05115188, "step": 19638 }, { "epoch": 39.278, "grad_norm": 2.96844220161438, "learning_rate": 2e-05, "loss": 0.04160873, "step": 19639 }, { "epoch": 39.28, "grad_norm": 2.2236135005950928, "learning_rate": 2e-05, "loss": 0.03255095, "step": 19640 }, { "epoch": 39.282, "grad_norm": 1.4627702236175537, "learning_rate": 2e-05, "loss": 0.06116011, "step": 19641 }, { "epoch": 39.284, "grad_norm": 1.137810468673706, "learning_rate": 2e-05, "loss": 0.04151687, "step": 19642 }, { "epoch": 39.286, "grad_norm": 1.4149549007415771, "learning_rate": 2e-05, "loss": 0.0420138, "step": 19643 }, { "epoch": 39.288, "grad_norm": 1.1547538042068481, "learning_rate": 2e-05, "loss": 0.04780672, "step": 19644 }, { "epoch": 39.29, "grad_norm": 1.460732340812683, "learning_rate": 2e-05, "loss": 0.04886123, "step": 19645 }, { "epoch": 39.292, "grad_norm": 1.4473869800567627, "learning_rate": 2e-05, "loss": 0.04399436, "step": 19646 }, { "epoch": 39.294, "grad_norm": 1.1946806907653809, "learning_rate": 2e-05, "loss": 0.04288406, "step": 19647 }, { "epoch": 39.296, "grad_norm": 1.7457919120788574, "learning_rate": 2e-05, "loss": 0.05616899, "step": 19648 }, { "epoch": 39.298, "grad_norm": 1.1243897676467896, "learning_rate": 2e-05, "loss": 0.04388375, "step": 19649 }, { "epoch": 39.3, "grad_norm": 1.2903263568878174, "learning_rate": 2e-05, "loss": 0.0539849, "step": 19650 }, { "epoch": 39.302, "grad_norm": 1.4681276082992554, "learning_rate": 2e-05, "loss": 0.06013412, "step": 19651 }, { "epoch": 39.304, "grad_norm": 1.7446203231811523, "learning_rate": 2e-05, "loss": 0.07473246, "step": 19652 }, { "epoch": 39.306, "grad_norm": 1.5309257507324219, "learning_rate": 2e-05, "loss": 0.06777739, "step": 19653 }, { "epoch": 39.308, "grad_norm": 1.5686246156692505, "learning_rate": 2e-05, "loss": 0.06154291, "step": 19654 }, { "epoch": 39.31, "grad_norm": 1.139773964881897, "learning_rate": 2e-05, "loss": 0.03822503, "step": 19655 }, { "epoch": 39.312, "grad_norm": 2.308309555053711, "learning_rate": 2e-05, "loss": 0.04015625, "step": 19656 }, { "epoch": 39.314, "grad_norm": 1.5338242053985596, "learning_rate": 2e-05, "loss": 0.05909691, "step": 19657 }, { "epoch": 39.316, "grad_norm": 1.2191451787948608, "learning_rate": 2e-05, "loss": 0.04259738, "step": 19658 }, { "epoch": 39.318, "grad_norm": 1.2544686794281006, "learning_rate": 2e-05, "loss": 0.06194799, "step": 19659 }, { "epoch": 39.32, "grad_norm": 1.1385127305984497, "learning_rate": 2e-05, "loss": 0.04202759, "step": 19660 }, { "epoch": 39.322, "grad_norm": 1.508982539176941, "learning_rate": 2e-05, "loss": 0.0428558, "step": 19661 }, { "epoch": 39.324, "grad_norm": 1.4978950023651123, "learning_rate": 2e-05, "loss": 0.06243214, "step": 19662 }, { "epoch": 39.326, "grad_norm": 2.656769037246704, "learning_rate": 2e-05, "loss": 0.06248422, "step": 19663 }, { "epoch": 39.328, "grad_norm": 1.2013676166534424, "learning_rate": 2e-05, "loss": 0.04629922, "step": 19664 }, { "epoch": 39.33, "grad_norm": 1.4212125539779663, "learning_rate": 2e-05, "loss": 0.0516174, "step": 19665 }, { "epoch": 39.332, "grad_norm": 1.4654594659805298, "learning_rate": 2e-05, "loss": 0.04654077, "step": 19666 }, { "epoch": 39.334, "grad_norm": 1.1820266246795654, "learning_rate": 2e-05, "loss": 0.04918681, "step": 19667 }, { "epoch": 39.336, "grad_norm": 1.7852951288223267, "learning_rate": 2e-05, "loss": 0.05676861, "step": 19668 }, { "epoch": 39.338, "grad_norm": 1.3146127462387085, "learning_rate": 2e-05, "loss": 0.06642872, "step": 19669 }, { "epoch": 39.34, "grad_norm": 1.1040120124816895, "learning_rate": 2e-05, "loss": 0.03930391, "step": 19670 }, { "epoch": 39.342, "grad_norm": 1.4802639484405518, "learning_rate": 2e-05, "loss": 0.05159596, "step": 19671 }, { "epoch": 39.344, "grad_norm": 1.3350907564163208, "learning_rate": 2e-05, "loss": 0.05917858, "step": 19672 }, { "epoch": 39.346, "grad_norm": 1.3922500610351562, "learning_rate": 2e-05, "loss": 0.04254092, "step": 19673 }, { "epoch": 39.348, "grad_norm": 1.271043062210083, "learning_rate": 2e-05, "loss": 0.05474164, "step": 19674 }, { "epoch": 39.35, "grad_norm": 1.4507440328598022, "learning_rate": 2e-05, "loss": 0.05754908, "step": 19675 }, { "epoch": 39.352, "grad_norm": 1.2747180461883545, "learning_rate": 2e-05, "loss": 0.06754243, "step": 19676 }, { "epoch": 39.354, "grad_norm": 1.7789802551269531, "learning_rate": 2e-05, "loss": 0.0436502, "step": 19677 }, { "epoch": 39.356, "grad_norm": 2.4285247325897217, "learning_rate": 2e-05, "loss": 0.05950401, "step": 19678 }, { "epoch": 39.358, "grad_norm": 1.8559372425079346, "learning_rate": 2e-05, "loss": 0.04614679, "step": 19679 }, { "epoch": 39.36, "grad_norm": 1.1284407377243042, "learning_rate": 2e-05, "loss": 0.05482705, "step": 19680 }, { "epoch": 39.362, "grad_norm": 1.2401177883148193, "learning_rate": 2e-05, "loss": 0.04206383, "step": 19681 }, { "epoch": 39.364, "grad_norm": 1.6358333826065063, "learning_rate": 2e-05, "loss": 0.04896562, "step": 19682 }, { "epoch": 39.366, "grad_norm": 1.0610060691833496, "learning_rate": 2e-05, "loss": 0.03826354, "step": 19683 }, { "epoch": 39.368, "grad_norm": 1.349070429801941, "learning_rate": 2e-05, "loss": 0.05081733, "step": 19684 }, { "epoch": 39.37, "grad_norm": 1.0122675895690918, "learning_rate": 2e-05, "loss": 0.03560245, "step": 19685 }, { "epoch": 39.372, "grad_norm": 1.1908326148986816, "learning_rate": 2e-05, "loss": 0.04643635, "step": 19686 }, { "epoch": 39.374, "grad_norm": 1.500412106513977, "learning_rate": 2e-05, "loss": 0.04018075, "step": 19687 }, { "epoch": 39.376, "grad_norm": 1.1961153745651245, "learning_rate": 2e-05, "loss": 0.03434346, "step": 19688 }, { "epoch": 39.378, "grad_norm": 1.0913282632827759, "learning_rate": 2e-05, "loss": 0.03738025, "step": 19689 }, { "epoch": 39.38, "grad_norm": 1.1147562265396118, "learning_rate": 2e-05, "loss": 0.04534988, "step": 19690 }, { "epoch": 39.382, "grad_norm": 1.3109427690505981, "learning_rate": 2e-05, "loss": 0.04121095, "step": 19691 }, { "epoch": 39.384, "grad_norm": 1.143779993057251, "learning_rate": 2e-05, "loss": 0.03789821, "step": 19692 }, { "epoch": 39.386, "grad_norm": 1.3124995231628418, "learning_rate": 2e-05, "loss": 0.04957093, "step": 19693 }, { "epoch": 39.388, "grad_norm": 1.287955403327942, "learning_rate": 2e-05, "loss": 0.05975793, "step": 19694 }, { "epoch": 39.39, "grad_norm": 2.0113747119903564, "learning_rate": 2e-05, "loss": 0.0466843, "step": 19695 }, { "epoch": 39.392, "grad_norm": 1.1586730480194092, "learning_rate": 2e-05, "loss": 0.04480777, "step": 19696 }, { "epoch": 39.394, "grad_norm": 1.1337339878082275, "learning_rate": 2e-05, "loss": 0.03835721, "step": 19697 }, { "epoch": 39.396, "grad_norm": 1.8198338747024536, "learning_rate": 2e-05, "loss": 0.04342664, "step": 19698 }, { "epoch": 39.398, "grad_norm": 1.068575382232666, "learning_rate": 2e-05, "loss": 0.02720687, "step": 19699 }, { "epoch": 39.4, "grad_norm": 2.4282875061035156, "learning_rate": 2e-05, "loss": 0.05048774, "step": 19700 }, { "epoch": 39.402, "grad_norm": 1.351075291633606, "learning_rate": 2e-05, "loss": 0.05489894, "step": 19701 }, { "epoch": 39.404, "grad_norm": 1.1262435913085938, "learning_rate": 2e-05, "loss": 0.0376655, "step": 19702 }, { "epoch": 39.406, "grad_norm": 1.3584601879119873, "learning_rate": 2e-05, "loss": 0.05599149, "step": 19703 }, { "epoch": 39.408, "grad_norm": 1.2703129053115845, "learning_rate": 2e-05, "loss": 0.06736299, "step": 19704 }, { "epoch": 39.41, "grad_norm": 1.1085890531539917, "learning_rate": 2e-05, "loss": 0.04166878, "step": 19705 }, { "epoch": 39.412, "grad_norm": 1.3138976097106934, "learning_rate": 2e-05, "loss": 0.05483936, "step": 19706 }, { "epoch": 39.414, "grad_norm": 1.3568849563598633, "learning_rate": 2e-05, "loss": 0.0636967, "step": 19707 }, { "epoch": 39.416, "grad_norm": 1.2971667051315308, "learning_rate": 2e-05, "loss": 0.05253425, "step": 19708 }, { "epoch": 39.418, "grad_norm": 1.1735317707061768, "learning_rate": 2e-05, "loss": 0.03824739, "step": 19709 }, { "epoch": 39.42, "grad_norm": 1.3102675676345825, "learning_rate": 2e-05, "loss": 0.05568937, "step": 19710 }, { "epoch": 39.422, "grad_norm": 1.6656438112258911, "learning_rate": 2e-05, "loss": 0.04870186, "step": 19711 }, { "epoch": 39.424, "grad_norm": 1.870893120765686, "learning_rate": 2e-05, "loss": 0.06180416, "step": 19712 }, { "epoch": 39.426, "grad_norm": 1.1797996759414673, "learning_rate": 2e-05, "loss": 0.04374686, "step": 19713 }, { "epoch": 39.428, "grad_norm": 1.4457714557647705, "learning_rate": 2e-05, "loss": 0.04901951, "step": 19714 }, { "epoch": 39.43, "grad_norm": 1.1880093812942505, "learning_rate": 2e-05, "loss": 0.05779697, "step": 19715 }, { "epoch": 39.432, "grad_norm": 1.140159249305725, "learning_rate": 2e-05, "loss": 0.04950743, "step": 19716 }, { "epoch": 39.434, "grad_norm": 1.0973641872406006, "learning_rate": 2e-05, "loss": 0.03795099, "step": 19717 }, { "epoch": 39.436, "grad_norm": 1.176837682723999, "learning_rate": 2e-05, "loss": 0.04808137, "step": 19718 }, { "epoch": 39.438, "grad_norm": 1.2320091724395752, "learning_rate": 2e-05, "loss": 0.04875737, "step": 19719 }, { "epoch": 39.44, "grad_norm": 1.197391390800476, "learning_rate": 2e-05, "loss": 0.03575107, "step": 19720 }, { "epoch": 39.442, "grad_norm": 1.4280883073806763, "learning_rate": 2e-05, "loss": 0.05063617, "step": 19721 }, { "epoch": 39.444, "grad_norm": 1.6574267148971558, "learning_rate": 2e-05, "loss": 0.04451539, "step": 19722 }, { "epoch": 39.446, "grad_norm": 1.1137486696243286, "learning_rate": 2e-05, "loss": 0.03630831, "step": 19723 }, { "epoch": 39.448, "grad_norm": 1.1743730306625366, "learning_rate": 2e-05, "loss": 0.04013621, "step": 19724 }, { "epoch": 39.45, "grad_norm": 1.239919900894165, "learning_rate": 2e-05, "loss": 0.05531546, "step": 19725 }, { "epoch": 39.452, "grad_norm": 1.6629247665405273, "learning_rate": 2e-05, "loss": 0.06353351, "step": 19726 }, { "epoch": 39.454, "grad_norm": 1.5018807649612427, "learning_rate": 2e-05, "loss": 0.07370369, "step": 19727 }, { "epoch": 39.456, "grad_norm": 1.5102373361587524, "learning_rate": 2e-05, "loss": 0.0483415, "step": 19728 }, { "epoch": 39.458, "grad_norm": 1.106083631515503, "learning_rate": 2e-05, "loss": 0.04570149, "step": 19729 }, { "epoch": 39.46, "grad_norm": 1.376544713973999, "learning_rate": 2e-05, "loss": 0.05271851, "step": 19730 }, { "epoch": 39.462, "grad_norm": 1.1500600576400757, "learning_rate": 2e-05, "loss": 0.05022399, "step": 19731 }, { "epoch": 39.464, "grad_norm": 1.136629343032837, "learning_rate": 2e-05, "loss": 0.04801833, "step": 19732 }, { "epoch": 39.466, "grad_norm": 1.6498074531555176, "learning_rate": 2e-05, "loss": 0.06893224, "step": 19733 }, { "epoch": 39.468, "grad_norm": 1.306951642036438, "learning_rate": 2e-05, "loss": 0.06067065, "step": 19734 }, { "epoch": 39.47, "grad_norm": 1.1913270950317383, "learning_rate": 2e-05, "loss": 0.03944825, "step": 19735 }, { "epoch": 39.472, "grad_norm": 1.2636955976486206, "learning_rate": 2e-05, "loss": 0.04280094, "step": 19736 }, { "epoch": 39.474, "grad_norm": 1.0089428424835205, "learning_rate": 2e-05, "loss": 0.03867158, "step": 19737 }, { "epoch": 39.476, "grad_norm": 1.6654586791992188, "learning_rate": 2e-05, "loss": 0.06643804, "step": 19738 }, { "epoch": 39.478, "grad_norm": 2.2843704223632812, "learning_rate": 2e-05, "loss": 0.05996677, "step": 19739 }, { "epoch": 39.48, "grad_norm": 1.0888022184371948, "learning_rate": 2e-05, "loss": 0.0362496, "step": 19740 }, { "epoch": 39.482, "grad_norm": 1.196286916732788, "learning_rate": 2e-05, "loss": 0.04796268, "step": 19741 }, { "epoch": 39.484, "grad_norm": 1.6246408224105835, "learning_rate": 2e-05, "loss": 0.06164134, "step": 19742 }, { "epoch": 39.486, "grad_norm": 1.4118708372116089, "learning_rate": 2e-05, "loss": 0.06805681, "step": 19743 }, { "epoch": 39.488, "grad_norm": 1.5130550861358643, "learning_rate": 2e-05, "loss": 0.0542859, "step": 19744 }, { "epoch": 39.49, "grad_norm": 0.8534611463546753, "learning_rate": 2e-05, "loss": 0.02287557, "step": 19745 }, { "epoch": 39.492, "grad_norm": 1.087668538093567, "learning_rate": 2e-05, "loss": 0.03380512, "step": 19746 }, { "epoch": 39.494, "grad_norm": 1.2685211896896362, "learning_rate": 2e-05, "loss": 0.05318061, "step": 19747 }, { "epoch": 39.496, "grad_norm": 1.2841920852661133, "learning_rate": 2e-05, "loss": 0.05055482, "step": 19748 }, { "epoch": 39.498, "grad_norm": 1.1762704849243164, "learning_rate": 2e-05, "loss": 0.04554278, "step": 19749 }, { "epoch": 39.5, "grad_norm": 3.7946743965148926, "learning_rate": 2e-05, "loss": 0.03693722, "step": 19750 }, { "epoch": 39.502, "grad_norm": 1.1349517107009888, "learning_rate": 2e-05, "loss": 0.02860119, "step": 19751 }, { "epoch": 39.504, "grad_norm": 2.01347017288208, "learning_rate": 2e-05, "loss": 0.04260724, "step": 19752 }, { "epoch": 39.506, "grad_norm": 1.267555594444275, "learning_rate": 2e-05, "loss": 0.05181779, "step": 19753 }, { "epoch": 39.508, "grad_norm": 1.1315144300460815, "learning_rate": 2e-05, "loss": 0.04263924, "step": 19754 }, { "epoch": 39.51, "grad_norm": 2.0047526359558105, "learning_rate": 2e-05, "loss": 0.04533667, "step": 19755 }, { "epoch": 39.512, "grad_norm": 1.342085361480713, "learning_rate": 2e-05, "loss": 0.04838677, "step": 19756 }, { "epoch": 39.514, "grad_norm": 1.331516146659851, "learning_rate": 2e-05, "loss": 0.06132433, "step": 19757 }, { "epoch": 39.516, "grad_norm": 1.1576191186904907, "learning_rate": 2e-05, "loss": 0.04208809, "step": 19758 }, { "epoch": 39.518, "grad_norm": 0.9372735023498535, "learning_rate": 2e-05, "loss": 0.03229483, "step": 19759 }, { "epoch": 39.52, "grad_norm": 2.456406831741333, "learning_rate": 2e-05, "loss": 0.05844424, "step": 19760 }, { "epoch": 39.522, "grad_norm": 1.2554352283477783, "learning_rate": 2e-05, "loss": 0.05017581, "step": 19761 }, { "epoch": 39.524, "grad_norm": 1.6045249700546265, "learning_rate": 2e-05, "loss": 0.06052469, "step": 19762 }, { "epoch": 39.526, "grad_norm": 1.3831950426101685, "learning_rate": 2e-05, "loss": 0.05542463, "step": 19763 }, { "epoch": 39.528, "grad_norm": 1.6362686157226562, "learning_rate": 2e-05, "loss": 0.05046823, "step": 19764 }, { "epoch": 39.53, "grad_norm": 1.8844678401947021, "learning_rate": 2e-05, "loss": 0.05892744, "step": 19765 }, { "epoch": 39.532, "grad_norm": 1.026892066001892, "learning_rate": 2e-05, "loss": 0.04069612, "step": 19766 }, { "epoch": 39.534, "grad_norm": 1.6162285804748535, "learning_rate": 2e-05, "loss": 0.04750034, "step": 19767 }, { "epoch": 39.536, "grad_norm": 1.3901042938232422, "learning_rate": 2e-05, "loss": 0.05620121, "step": 19768 }, { "epoch": 39.538, "grad_norm": 1.1447288990020752, "learning_rate": 2e-05, "loss": 0.04215189, "step": 19769 }, { "epoch": 39.54, "grad_norm": 2.04343843460083, "learning_rate": 2e-05, "loss": 0.04056927, "step": 19770 }, { "epoch": 39.542, "grad_norm": 1.376480221748352, "learning_rate": 2e-05, "loss": 0.04138695, "step": 19771 }, { "epoch": 39.544, "grad_norm": 1.5023736953735352, "learning_rate": 2e-05, "loss": 0.04907219, "step": 19772 }, { "epoch": 39.546, "grad_norm": 2.4118905067443848, "learning_rate": 2e-05, "loss": 0.05148922, "step": 19773 }, { "epoch": 39.548, "grad_norm": 3.517075300216675, "learning_rate": 2e-05, "loss": 0.05087829, "step": 19774 }, { "epoch": 39.55, "grad_norm": 1.0423048734664917, "learning_rate": 2e-05, "loss": 0.03624145, "step": 19775 }, { "epoch": 39.552, "grad_norm": 1.3357534408569336, "learning_rate": 2e-05, "loss": 0.04405185, "step": 19776 }, { "epoch": 39.554, "grad_norm": 1.2442307472229004, "learning_rate": 2e-05, "loss": 0.04776893, "step": 19777 }, { "epoch": 39.556, "grad_norm": 1.0064873695373535, "learning_rate": 2e-05, "loss": 0.03839774, "step": 19778 }, { "epoch": 39.558, "grad_norm": 1.5154045820236206, "learning_rate": 2e-05, "loss": 0.04444873, "step": 19779 }, { "epoch": 39.56, "grad_norm": 1.2269160747528076, "learning_rate": 2e-05, "loss": 0.04485428, "step": 19780 }, { "epoch": 39.562, "grad_norm": 1.1330827474594116, "learning_rate": 2e-05, "loss": 0.04535697, "step": 19781 }, { "epoch": 39.564, "grad_norm": 1.6141606569290161, "learning_rate": 2e-05, "loss": 0.04330592, "step": 19782 }, { "epoch": 39.566, "grad_norm": 1.2627594470977783, "learning_rate": 2e-05, "loss": 0.03273316, "step": 19783 }, { "epoch": 39.568, "grad_norm": 1.0792535543441772, "learning_rate": 2e-05, "loss": 0.0406695, "step": 19784 }, { "epoch": 39.57, "grad_norm": 1.397845983505249, "learning_rate": 2e-05, "loss": 0.05157507, "step": 19785 }, { "epoch": 39.572, "grad_norm": 2.2438647747039795, "learning_rate": 2e-05, "loss": 0.067011, "step": 19786 }, { "epoch": 39.574, "grad_norm": 1.2847806215286255, "learning_rate": 2e-05, "loss": 0.03849096, "step": 19787 }, { "epoch": 39.576, "grad_norm": 1.204473614692688, "learning_rate": 2e-05, "loss": 0.05438119, "step": 19788 }, { "epoch": 39.578, "grad_norm": 1.9760804176330566, "learning_rate": 2e-05, "loss": 0.05682136, "step": 19789 }, { "epoch": 39.58, "grad_norm": 1.181208610534668, "learning_rate": 2e-05, "loss": 0.0492033, "step": 19790 }, { "epoch": 39.582, "grad_norm": 1.273768424987793, "learning_rate": 2e-05, "loss": 0.04544124, "step": 19791 }, { "epoch": 39.584, "grad_norm": 1.5114247798919678, "learning_rate": 2e-05, "loss": 0.06623958, "step": 19792 }, { "epoch": 39.586, "grad_norm": 1.1232013702392578, "learning_rate": 2e-05, "loss": 0.04789654, "step": 19793 }, { "epoch": 39.588, "grad_norm": 1.1755037307739258, "learning_rate": 2e-05, "loss": 0.050361, "step": 19794 }, { "epoch": 39.59, "grad_norm": 1.6072686910629272, "learning_rate": 2e-05, "loss": 0.06229908, "step": 19795 }, { "epoch": 39.592, "grad_norm": 1.416585087776184, "learning_rate": 2e-05, "loss": 0.04778691, "step": 19796 }, { "epoch": 39.594, "grad_norm": 1.3451114892959595, "learning_rate": 2e-05, "loss": 0.05900617, "step": 19797 }, { "epoch": 39.596, "grad_norm": 1.1347873210906982, "learning_rate": 2e-05, "loss": 0.04485462, "step": 19798 }, { "epoch": 39.598, "grad_norm": 1.158407211303711, "learning_rate": 2e-05, "loss": 0.04194099, "step": 19799 }, { "epoch": 39.6, "grad_norm": 1.222896695137024, "learning_rate": 2e-05, "loss": 0.04154717, "step": 19800 }, { "epoch": 39.602, "grad_norm": 1.1443710327148438, "learning_rate": 2e-05, "loss": 0.04118493, "step": 19801 }, { "epoch": 39.604, "grad_norm": 1.2057150602340698, "learning_rate": 2e-05, "loss": 0.05563217, "step": 19802 }, { "epoch": 39.606, "grad_norm": 1.192069172859192, "learning_rate": 2e-05, "loss": 0.0467116, "step": 19803 }, { "epoch": 39.608, "grad_norm": 1.410652756690979, "learning_rate": 2e-05, "loss": 0.03325736, "step": 19804 }, { "epoch": 39.61, "grad_norm": 1.1169686317443848, "learning_rate": 2e-05, "loss": 0.03927244, "step": 19805 }, { "epoch": 39.612, "grad_norm": 1.2299331426620483, "learning_rate": 2e-05, "loss": 0.04283129, "step": 19806 }, { "epoch": 39.614, "grad_norm": 1.020370602607727, "learning_rate": 2e-05, "loss": 0.03461327, "step": 19807 }, { "epoch": 39.616, "grad_norm": 1.2263249158859253, "learning_rate": 2e-05, "loss": 0.05317787, "step": 19808 }, { "epoch": 39.618, "grad_norm": 2.3587546348571777, "learning_rate": 2e-05, "loss": 0.04611384, "step": 19809 }, { "epoch": 39.62, "grad_norm": 1.2532998323440552, "learning_rate": 2e-05, "loss": 0.04584979, "step": 19810 }, { "epoch": 39.622, "grad_norm": 0.9980117082595825, "learning_rate": 2e-05, "loss": 0.03598379, "step": 19811 }, { "epoch": 39.624, "grad_norm": 1.3296000957489014, "learning_rate": 2e-05, "loss": 0.06023554, "step": 19812 }, { "epoch": 39.626, "grad_norm": 1.3050363063812256, "learning_rate": 2e-05, "loss": 0.03866673, "step": 19813 }, { "epoch": 39.628, "grad_norm": 1.0335310697555542, "learning_rate": 2e-05, "loss": 0.03773557, "step": 19814 }, { "epoch": 39.63, "grad_norm": 1.102943778038025, "learning_rate": 2e-05, "loss": 0.05241087, "step": 19815 }, { "epoch": 39.632, "grad_norm": 2.9029152393341064, "learning_rate": 2e-05, "loss": 0.06153958, "step": 19816 }, { "epoch": 39.634, "grad_norm": 1.207022786140442, "learning_rate": 2e-05, "loss": 0.0496684, "step": 19817 }, { "epoch": 39.636, "grad_norm": 1.147627353668213, "learning_rate": 2e-05, "loss": 0.04096475, "step": 19818 }, { "epoch": 39.638, "grad_norm": 1.1633808612823486, "learning_rate": 2e-05, "loss": 0.0518549, "step": 19819 }, { "epoch": 39.64, "grad_norm": 2.9582087993621826, "learning_rate": 2e-05, "loss": 0.04467195, "step": 19820 }, { "epoch": 39.642, "grad_norm": 1.5623703002929688, "learning_rate": 2e-05, "loss": 0.04819065, "step": 19821 }, { "epoch": 39.644, "grad_norm": 1.2664448022842407, "learning_rate": 2e-05, "loss": 0.04783477, "step": 19822 }, { "epoch": 39.646, "grad_norm": 1.313110589981079, "learning_rate": 2e-05, "loss": 0.03875414, "step": 19823 }, { "epoch": 39.648, "grad_norm": 1.150399923324585, "learning_rate": 2e-05, "loss": 0.05495393, "step": 19824 }, { "epoch": 39.65, "grad_norm": 1.273444414138794, "learning_rate": 2e-05, "loss": 0.05313744, "step": 19825 }, { "epoch": 39.652, "grad_norm": 1.288045048713684, "learning_rate": 2e-05, "loss": 0.05415475, "step": 19826 }, { "epoch": 39.654, "grad_norm": 1.1999372243881226, "learning_rate": 2e-05, "loss": 0.04567619, "step": 19827 }, { "epoch": 39.656, "grad_norm": 1.0808748006820679, "learning_rate": 2e-05, "loss": 0.04751875, "step": 19828 }, { "epoch": 39.658, "grad_norm": 1.122815489768982, "learning_rate": 2e-05, "loss": 0.0411781, "step": 19829 }, { "epoch": 39.66, "grad_norm": 1.6972079277038574, "learning_rate": 2e-05, "loss": 0.05976425, "step": 19830 }, { "epoch": 39.662, "grad_norm": 1.0641003847122192, "learning_rate": 2e-05, "loss": 0.0371467, "step": 19831 }, { "epoch": 39.664, "grad_norm": 1.3201067447662354, "learning_rate": 2e-05, "loss": 0.04569467, "step": 19832 }, { "epoch": 39.666, "grad_norm": 1.5276159048080444, "learning_rate": 2e-05, "loss": 0.04464874, "step": 19833 }, { "epoch": 39.668, "grad_norm": 1.1895759105682373, "learning_rate": 2e-05, "loss": 0.04681417, "step": 19834 }, { "epoch": 39.67, "grad_norm": 1.2756211757659912, "learning_rate": 2e-05, "loss": 0.04865978, "step": 19835 }, { "epoch": 39.672, "grad_norm": 1.315328598022461, "learning_rate": 2e-05, "loss": 0.07096517, "step": 19836 }, { "epoch": 39.674, "grad_norm": 1.2672423124313354, "learning_rate": 2e-05, "loss": 0.04316231, "step": 19837 }, { "epoch": 39.676, "grad_norm": 1.077695369720459, "learning_rate": 2e-05, "loss": 0.03674557, "step": 19838 }, { "epoch": 39.678, "grad_norm": 1.8062963485717773, "learning_rate": 2e-05, "loss": 0.05743162, "step": 19839 }, { "epoch": 39.68, "grad_norm": 1.1730942726135254, "learning_rate": 2e-05, "loss": 0.03555157, "step": 19840 }, { "epoch": 39.682, "grad_norm": 1.0135302543640137, "learning_rate": 2e-05, "loss": 0.02771145, "step": 19841 }, { "epoch": 39.684, "grad_norm": 1.5072895288467407, "learning_rate": 2e-05, "loss": 0.06209271, "step": 19842 }, { "epoch": 39.686, "grad_norm": 1.412246823310852, "learning_rate": 2e-05, "loss": 0.04875721, "step": 19843 }, { "epoch": 39.688, "grad_norm": 1.330792784690857, "learning_rate": 2e-05, "loss": 0.05368386, "step": 19844 }, { "epoch": 39.69, "grad_norm": 1.3758635520935059, "learning_rate": 2e-05, "loss": 0.05050598, "step": 19845 }, { "epoch": 39.692, "grad_norm": 1.2462283372879028, "learning_rate": 2e-05, "loss": 0.0519301, "step": 19846 }, { "epoch": 39.694, "grad_norm": 1.4314063787460327, "learning_rate": 2e-05, "loss": 0.04765672, "step": 19847 }, { "epoch": 39.696, "grad_norm": 1.3259481191635132, "learning_rate": 2e-05, "loss": 0.05249347, "step": 19848 }, { "epoch": 39.698, "grad_norm": 1.4222193956375122, "learning_rate": 2e-05, "loss": 0.0517953, "step": 19849 }, { "epoch": 39.7, "grad_norm": 1.3300631046295166, "learning_rate": 2e-05, "loss": 0.05170576, "step": 19850 }, { "epoch": 39.702, "grad_norm": 1.3391106128692627, "learning_rate": 2e-05, "loss": 0.05839144, "step": 19851 }, { "epoch": 39.704, "grad_norm": 0.9856824278831482, "learning_rate": 2e-05, "loss": 0.03652864, "step": 19852 }, { "epoch": 39.706, "grad_norm": 1.2734506130218506, "learning_rate": 2e-05, "loss": 0.05710208, "step": 19853 }, { "epoch": 39.708, "grad_norm": 1.9725619554519653, "learning_rate": 2e-05, "loss": 0.05481605, "step": 19854 }, { "epoch": 39.71, "grad_norm": 1.1700278520584106, "learning_rate": 2e-05, "loss": 0.05501316, "step": 19855 }, { "epoch": 39.712, "grad_norm": 0.8477627635002136, "learning_rate": 2e-05, "loss": 0.02406826, "step": 19856 }, { "epoch": 39.714, "grad_norm": 1.3238353729248047, "learning_rate": 2e-05, "loss": 0.04450976, "step": 19857 }, { "epoch": 39.716, "grad_norm": 2.597429037094116, "learning_rate": 2e-05, "loss": 0.04625923, "step": 19858 }, { "epoch": 39.718, "grad_norm": 1.2103281021118164, "learning_rate": 2e-05, "loss": 0.05398448, "step": 19859 }, { "epoch": 39.72, "grad_norm": 1.249700665473938, "learning_rate": 2e-05, "loss": 0.05766566, "step": 19860 }, { "epoch": 39.722, "grad_norm": 1.2194814682006836, "learning_rate": 2e-05, "loss": 0.05054, "step": 19861 }, { "epoch": 39.724, "grad_norm": 1.3230112791061401, "learning_rate": 2e-05, "loss": 0.04640412, "step": 19862 }, { "epoch": 39.726, "grad_norm": 1.38727867603302, "learning_rate": 2e-05, "loss": 0.04579895, "step": 19863 }, { "epoch": 39.728, "grad_norm": 1.1224366426467896, "learning_rate": 2e-05, "loss": 0.03637297, "step": 19864 }, { "epoch": 39.73, "grad_norm": 1.8750441074371338, "learning_rate": 2e-05, "loss": 0.06596401, "step": 19865 }, { "epoch": 39.732, "grad_norm": 1.2330036163330078, "learning_rate": 2e-05, "loss": 0.06313409, "step": 19866 }, { "epoch": 39.734, "grad_norm": 1.1567729711532593, "learning_rate": 2e-05, "loss": 0.03799208, "step": 19867 }, { "epoch": 39.736, "grad_norm": 1.8047136068344116, "learning_rate": 2e-05, "loss": 0.05676999, "step": 19868 }, { "epoch": 39.738, "grad_norm": 1.2310786247253418, "learning_rate": 2e-05, "loss": 0.04846741, "step": 19869 }, { "epoch": 39.74, "grad_norm": 1.0939598083496094, "learning_rate": 2e-05, "loss": 0.05183221, "step": 19870 }, { "epoch": 39.742, "grad_norm": 1.2205032110214233, "learning_rate": 2e-05, "loss": 0.05188471, "step": 19871 }, { "epoch": 39.744, "grad_norm": 1.108619213104248, "learning_rate": 2e-05, "loss": 0.05545724, "step": 19872 }, { "epoch": 39.746, "grad_norm": 1.2437492609024048, "learning_rate": 2e-05, "loss": 0.057475, "step": 19873 }, { "epoch": 39.748, "grad_norm": 1.030137300491333, "learning_rate": 2e-05, "loss": 0.03131631, "step": 19874 }, { "epoch": 39.75, "grad_norm": 1.290551781654358, "learning_rate": 2e-05, "loss": 0.05823213, "step": 19875 }, { "epoch": 39.752, "grad_norm": 1.6363590955734253, "learning_rate": 2e-05, "loss": 0.05555134, "step": 19876 }, { "epoch": 39.754, "grad_norm": 2.338663101196289, "learning_rate": 2e-05, "loss": 0.05460815, "step": 19877 }, { "epoch": 39.756, "grad_norm": 1.5691533088684082, "learning_rate": 2e-05, "loss": 0.05019718, "step": 19878 }, { "epoch": 39.758, "grad_norm": 1.2325677871704102, "learning_rate": 2e-05, "loss": 0.04595042, "step": 19879 }, { "epoch": 39.76, "grad_norm": 1.2312854528427124, "learning_rate": 2e-05, "loss": 0.03850013, "step": 19880 }, { "epoch": 39.762, "grad_norm": 1.3569062948226929, "learning_rate": 2e-05, "loss": 0.05348967, "step": 19881 }, { "epoch": 39.764, "grad_norm": 2.510478973388672, "learning_rate": 2e-05, "loss": 0.05543127, "step": 19882 }, { "epoch": 39.766, "grad_norm": 1.1178513765335083, "learning_rate": 2e-05, "loss": 0.05213483, "step": 19883 }, { "epoch": 39.768, "grad_norm": 1.2882338762283325, "learning_rate": 2e-05, "loss": 0.06439535, "step": 19884 }, { "epoch": 39.77, "grad_norm": 1.2819217443466187, "learning_rate": 2e-05, "loss": 0.06040879, "step": 19885 }, { "epoch": 39.772, "grad_norm": 1.1934309005737305, "learning_rate": 2e-05, "loss": 0.04919674, "step": 19886 }, { "epoch": 39.774, "grad_norm": 1.0528241395950317, "learning_rate": 2e-05, "loss": 0.03462794, "step": 19887 }, { "epoch": 39.776, "grad_norm": 1.1237131357192993, "learning_rate": 2e-05, "loss": 0.03184299, "step": 19888 }, { "epoch": 39.778, "grad_norm": 1.4120211601257324, "learning_rate": 2e-05, "loss": 0.05772817, "step": 19889 }, { "epoch": 39.78, "grad_norm": 1.1867109537124634, "learning_rate": 2e-05, "loss": 0.04693531, "step": 19890 }, { "epoch": 39.782, "grad_norm": 1.1656479835510254, "learning_rate": 2e-05, "loss": 0.03705209, "step": 19891 }, { "epoch": 39.784, "grad_norm": 1.0704561471939087, "learning_rate": 2e-05, "loss": 0.04141489, "step": 19892 }, { "epoch": 39.786, "grad_norm": 1.3049782514572144, "learning_rate": 2e-05, "loss": 0.06903137, "step": 19893 }, { "epoch": 39.788, "grad_norm": 1.3069772720336914, "learning_rate": 2e-05, "loss": 0.0366349, "step": 19894 }, { "epoch": 39.79, "grad_norm": 1.3293408155441284, "learning_rate": 2e-05, "loss": 0.05534213, "step": 19895 }, { "epoch": 39.792, "grad_norm": 1.7149981260299683, "learning_rate": 2e-05, "loss": 0.06210932, "step": 19896 }, { "epoch": 39.794, "grad_norm": 1.0320147275924683, "learning_rate": 2e-05, "loss": 0.04126735, "step": 19897 }, { "epoch": 39.796, "grad_norm": 1.9870035648345947, "learning_rate": 2e-05, "loss": 0.05784594, "step": 19898 }, { "epoch": 39.798, "grad_norm": 0.9793746471405029, "learning_rate": 2e-05, "loss": 0.03283355, "step": 19899 }, { "epoch": 39.8, "grad_norm": 1.228532075881958, "learning_rate": 2e-05, "loss": 0.04221442, "step": 19900 }, { "epoch": 39.802, "grad_norm": 1.2652114629745483, "learning_rate": 2e-05, "loss": 0.05898855, "step": 19901 }, { "epoch": 39.804, "grad_norm": 1.1174923181533813, "learning_rate": 2e-05, "loss": 0.04276735, "step": 19902 }, { "epoch": 39.806, "grad_norm": 1.0496095418930054, "learning_rate": 2e-05, "loss": 0.04212025, "step": 19903 }, { "epoch": 39.808, "grad_norm": 1.0706851482391357, "learning_rate": 2e-05, "loss": 0.03774155, "step": 19904 }, { "epoch": 39.81, "grad_norm": 1.1920790672302246, "learning_rate": 2e-05, "loss": 0.04768289, "step": 19905 }, { "epoch": 39.812, "grad_norm": 1.502715826034546, "learning_rate": 2e-05, "loss": 0.04195279, "step": 19906 }, { "epoch": 39.814, "grad_norm": 1.9300915002822876, "learning_rate": 2e-05, "loss": 0.04984208, "step": 19907 }, { "epoch": 39.816, "grad_norm": 2.316269636154175, "learning_rate": 2e-05, "loss": 0.04897273, "step": 19908 }, { "epoch": 39.818, "grad_norm": 1.250138759613037, "learning_rate": 2e-05, "loss": 0.04602588, "step": 19909 }, { "epoch": 39.82, "grad_norm": 1.653391718864441, "learning_rate": 2e-05, "loss": 0.06034857, "step": 19910 }, { "epoch": 39.822, "grad_norm": 1.1049057245254517, "learning_rate": 2e-05, "loss": 0.03826465, "step": 19911 }, { "epoch": 39.824, "grad_norm": 1.1933602094650269, "learning_rate": 2e-05, "loss": 0.03310779, "step": 19912 }, { "epoch": 39.826, "grad_norm": 3.675931930541992, "learning_rate": 2e-05, "loss": 0.05223902, "step": 19913 }, { "epoch": 39.828, "grad_norm": 1.1655473709106445, "learning_rate": 2e-05, "loss": 0.0628321, "step": 19914 }, { "epoch": 39.83, "grad_norm": 1.7604655027389526, "learning_rate": 2e-05, "loss": 0.06078061, "step": 19915 }, { "epoch": 39.832, "grad_norm": 1.100632667541504, "learning_rate": 2e-05, "loss": 0.04568919, "step": 19916 }, { "epoch": 39.834, "grad_norm": 1.2774721384048462, "learning_rate": 2e-05, "loss": 0.05435498, "step": 19917 }, { "epoch": 39.836, "grad_norm": 1.217751145362854, "learning_rate": 2e-05, "loss": 0.05859322, "step": 19918 }, { "epoch": 39.838, "grad_norm": 1.317592978477478, "learning_rate": 2e-05, "loss": 0.05665311, "step": 19919 }, { "epoch": 39.84, "grad_norm": 0.949860692024231, "learning_rate": 2e-05, "loss": 0.02791356, "step": 19920 }, { "epoch": 39.842, "grad_norm": 1.1154917478561401, "learning_rate": 2e-05, "loss": 0.05110167, "step": 19921 }, { "epoch": 39.844, "grad_norm": 1.161690354347229, "learning_rate": 2e-05, "loss": 0.0497525, "step": 19922 }, { "epoch": 39.846, "grad_norm": 1.190811038017273, "learning_rate": 2e-05, "loss": 0.0368493, "step": 19923 }, { "epoch": 39.848, "grad_norm": 1.6861010789871216, "learning_rate": 2e-05, "loss": 0.05476474, "step": 19924 }, { "epoch": 39.85, "grad_norm": 1.9949997663497925, "learning_rate": 2e-05, "loss": 0.0414875, "step": 19925 }, { "epoch": 39.852, "grad_norm": 1.2739412784576416, "learning_rate": 2e-05, "loss": 0.05026732, "step": 19926 }, { "epoch": 39.854, "grad_norm": 1.3758777379989624, "learning_rate": 2e-05, "loss": 0.06855834, "step": 19927 }, { "epoch": 39.856, "grad_norm": 1.577846884727478, "learning_rate": 2e-05, "loss": 0.05498583, "step": 19928 }, { "epoch": 39.858, "grad_norm": 1.269954800605774, "learning_rate": 2e-05, "loss": 0.0476332, "step": 19929 }, { "epoch": 39.86, "grad_norm": 0.9598180055618286, "learning_rate": 2e-05, "loss": 0.04460917, "step": 19930 }, { "epoch": 39.862, "grad_norm": 1.1671171188354492, "learning_rate": 2e-05, "loss": 0.04713274, "step": 19931 }, { "epoch": 39.864, "grad_norm": 1.8716930150985718, "learning_rate": 2e-05, "loss": 0.0466135, "step": 19932 }, { "epoch": 39.866, "grad_norm": 1.3691669702529907, "learning_rate": 2e-05, "loss": 0.04337586, "step": 19933 }, { "epoch": 39.868, "grad_norm": 1.1902244091033936, "learning_rate": 2e-05, "loss": 0.04896775, "step": 19934 }, { "epoch": 39.87, "grad_norm": 1.0358586311340332, "learning_rate": 2e-05, "loss": 0.04044461, "step": 19935 }, { "epoch": 39.872, "grad_norm": 1.4702568054199219, "learning_rate": 2e-05, "loss": 0.06271239, "step": 19936 }, { "epoch": 39.874, "grad_norm": 1.0855350494384766, "learning_rate": 2e-05, "loss": 0.03931961, "step": 19937 }, { "epoch": 39.876, "grad_norm": 1.1319669485092163, "learning_rate": 2e-05, "loss": 0.03855097, "step": 19938 }, { "epoch": 39.878, "grad_norm": 1.354026436805725, "learning_rate": 2e-05, "loss": 0.06375487, "step": 19939 }, { "epoch": 39.88, "grad_norm": 0.9435744285583496, "learning_rate": 2e-05, "loss": 0.03325384, "step": 19940 }, { "epoch": 39.882, "grad_norm": 1.2593286037445068, "learning_rate": 2e-05, "loss": 0.05153989, "step": 19941 }, { "epoch": 39.884, "grad_norm": 0.9804494976997375, "learning_rate": 2e-05, "loss": 0.03355004, "step": 19942 }, { "epoch": 39.886, "grad_norm": 1.18596351146698, "learning_rate": 2e-05, "loss": 0.0458352, "step": 19943 }, { "epoch": 39.888, "grad_norm": 1.5126147270202637, "learning_rate": 2e-05, "loss": 0.05285957, "step": 19944 }, { "epoch": 39.89, "grad_norm": 1.1133012771606445, "learning_rate": 2e-05, "loss": 0.03969733, "step": 19945 }, { "epoch": 39.892, "grad_norm": 2.6804747581481934, "learning_rate": 2e-05, "loss": 0.06336376, "step": 19946 }, { "epoch": 39.894, "grad_norm": 3.003247022628784, "learning_rate": 2e-05, "loss": 0.04598739, "step": 19947 }, { "epoch": 39.896, "grad_norm": 1.1411057710647583, "learning_rate": 2e-05, "loss": 0.03566032, "step": 19948 }, { "epoch": 39.898, "grad_norm": 1.0855693817138672, "learning_rate": 2e-05, "loss": 0.03875684, "step": 19949 }, { "epoch": 39.9, "grad_norm": 1.9709842205047607, "learning_rate": 2e-05, "loss": 0.05917383, "step": 19950 }, { "epoch": 39.902, "grad_norm": 1.1566156148910522, "learning_rate": 2e-05, "loss": 0.05174723, "step": 19951 }, { "epoch": 39.904, "grad_norm": 1.0482864379882812, "learning_rate": 2e-05, "loss": 0.04446269, "step": 19952 }, { "epoch": 39.906, "grad_norm": 1.2975873947143555, "learning_rate": 2e-05, "loss": 0.04888238, "step": 19953 }, { "epoch": 39.908, "grad_norm": 1.3561941385269165, "learning_rate": 2e-05, "loss": 0.05117349, "step": 19954 }, { "epoch": 39.91, "grad_norm": 0.9680836200714111, "learning_rate": 2e-05, "loss": 0.03723207, "step": 19955 }, { "epoch": 39.912, "grad_norm": 1.2413675785064697, "learning_rate": 2e-05, "loss": 0.05808984, "step": 19956 }, { "epoch": 39.914, "grad_norm": 1.634905219078064, "learning_rate": 2e-05, "loss": 0.05397768, "step": 19957 }, { "epoch": 39.916, "grad_norm": 1.1176763772964478, "learning_rate": 2e-05, "loss": 0.04666106, "step": 19958 }, { "epoch": 39.918, "grad_norm": 1.37368643283844, "learning_rate": 2e-05, "loss": 0.04420487, "step": 19959 }, { "epoch": 39.92, "grad_norm": 1.1115273237228394, "learning_rate": 2e-05, "loss": 0.04129547, "step": 19960 }, { "epoch": 39.922, "grad_norm": 1.0844300985336304, "learning_rate": 2e-05, "loss": 0.02857055, "step": 19961 }, { "epoch": 39.924, "grad_norm": 2.0654237270355225, "learning_rate": 2e-05, "loss": 0.05185968, "step": 19962 }, { "epoch": 39.926, "grad_norm": 1.4213045835494995, "learning_rate": 2e-05, "loss": 0.03576629, "step": 19963 }, { "epoch": 39.928, "grad_norm": 1.399935007095337, "learning_rate": 2e-05, "loss": 0.05327077, "step": 19964 }, { "epoch": 39.93, "grad_norm": 1.1581571102142334, "learning_rate": 2e-05, "loss": 0.04553617, "step": 19965 }, { "epoch": 39.932, "grad_norm": 2.8895864486694336, "learning_rate": 2e-05, "loss": 0.0493044, "step": 19966 }, { "epoch": 39.934, "grad_norm": 1.4619674682617188, "learning_rate": 2e-05, "loss": 0.04714632, "step": 19967 }, { "epoch": 39.936, "grad_norm": 1.8203679323196411, "learning_rate": 2e-05, "loss": 0.06075837, "step": 19968 }, { "epoch": 39.938, "grad_norm": 1.141274333000183, "learning_rate": 2e-05, "loss": 0.04157832, "step": 19969 }, { "epoch": 39.94, "grad_norm": 1.180153727531433, "learning_rate": 2e-05, "loss": 0.05030315, "step": 19970 }, { "epoch": 39.942, "grad_norm": 1.2318646907806396, "learning_rate": 2e-05, "loss": 0.04882877, "step": 19971 }, { "epoch": 39.944, "grad_norm": 1.3025565147399902, "learning_rate": 2e-05, "loss": 0.0572556, "step": 19972 }, { "epoch": 39.946, "grad_norm": 1.211354374885559, "learning_rate": 2e-05, "loss": 0.04019732, "step": 19973 }, { "epoch": 39.948, "grad_norm": 1.2427458763122559, "learning_rate": 2e-05, "loss": 0.05008765, "step": 19974 }, { "epoch": 39.95, "grad_norm": 1.2193975448608398, "learning_rate": 2e-05, "loss": 0.05430697, "step": 19975 }, { "epoch": 39.952, "grad_norm": 1.3001223802566528, "learning_rate": 2e-05, "loss": 0.05283924, "step": 19976 }, { "epoch": 39.954, "grad_norm": 1.1963660717010498, "learning_rate": 2e-05, "loss": 0.04262936, "step": 19977 }, { "epoch": 39.956, "grad_norm": 1.0662003755569458, "learning_rate": 2e-05, "loss": 0.02899798, "step": 19978 }, { "epoch": 39.958, "grad_norm": 1.1126896142959595, "learning_rate": 2e-05, "loss": 0.04382278, "step": 19979 }, { "epoch": 39.96, "grad_norm": 1.4259101152420044, "learning_rate": 2e-05, "loss": 0.04453399, "step": 19980 }, { "epoch": 39.962, "grad_norm": 1.2776509523391724, "learning_rate": 2e-05, "loss": 0.05190249, "step": 19981 }, { "epoch": 39.964, "grad_norm": 1.6215944290161133, "learning_rate": 2e-05, "loss": 0.04128952, "step": 19982 }, { "epoch": 39.966, "grad_norm": 1.6409664154052734, "learning_rate": 2e-05, "loss": 0.06073495, "step": 19983 }, { "epoch": 39.968, "grad_norm": 1.4451102018356323, "learning_rate": 2e-05, "loss": 0.04271209, "step": 19984 }, { "epoch": 39.97, "grad_norm": 1.8261207342147827, "learning_rate": 2e-05, "loss": 0.04644296, "step": 19985 }, { "epoch": 39.972, "grad_norm": 1.125677466392517, "learning_rate": 2e-05, "loss": 0.0407895, "step": 19986 }, { "epoch": 39.974, "grad_norm": 1.1863775253295898, "learning_rate": 2e-05, "loss": 0.04969215, "step": 19987 }, { "epoch": 39.976, "grad_norm": 1.3171180486679077, "learning_rate": 2e-05, "loss": 0.04542191, "step": 19988 }, { "epoch": 39.978, "grad_norm": 1.259590983390808, "learning_rate": 2e-05, "loss": 0.04899602, "step": 19989 }, { "epoch": 39.98, "grad_norm": 1.566371202468872, "learning_rate": 2e-05, "loss": 0.05686514, "step": 19990 }, { "epoch": 39.982, "grad_norm": 1.311056137084961, "learning_rate": 2e-05, "loss": 0.05832597, "step": 19991 }, { "epoch": 39.984, "grad_norm": 1.8373768329620361, "learning_rate": 2e-05, "loss": 0.06506597, "step": 19992 }, { "epoch": 39.986, "grad_norm": 1.1901731491088867, "learning_rate": 2e-05, "loss": 0.04623218, "step": 19993 }, { "epoch": 39.988, "grad_norm": 1.0414644479751587, "learning_rate": 2e-05, "loss": 0.034441, "step": 19994 }, { "epoch": 39.99, "grad_norm": 1.188096284866333, "learning_rate": 2e-05, "loss": 0.04996618, "step": 19995 }, { "epoch": 39.992, "grad_norm": 0.9998059868812561, "learning_rate": 2e-05, "loss": 0.03086119, "step": 19996 }, { "epoch": 39.994, "grad_norm": 1.2495959997177124, "learning_rate": 2e-05, "loss": 0.04241527, "step": 19997 }, { "epoch": 39.996, "grad_norm": 1.2383801937103271, "learning_rate": 2e-05, "loss": 0.049159, "step": 19998 }, { "epoch": 39.998, "grad_norm": 3.58790922164917, "learning_rate": 2e-05, "loss": 0.05261368, "step": 19999 }, { "epoch": 40.0, "grad_norm": 2.5835986137390137, "learning_rate": 2e-05, "loss": 0.05580256, "step": 20000 }, { "epoch": 40.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 1.0, "Equal_2": 0.9880239520958084, "Equal_3": 0.9940119760479041, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 0.998, "Perpendicular_2": 0.992, "Perpendicular_3": 0.9018036072144289, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 319.7794, "eval_samples_per_second": 32.835, "eval_steps_per_second": 0.657, "step": 20000 }, { "epoch": 40.002, "grad_norm": 1.5618497133255005, "learning_rate": 2e-05, "loss": 0.05388392, "step": 20001 }, { "epoch": 40.004, "grad_norm": 1.2499672174453735, "learning_rate": 2e-05, "loss": 0.04120894, "step": 20002 }, { "epoch": 40.006, "grad_norm": 1.2109254598617554, "learning_rate": 2e-05, "loss": 0.03839901, "step": 20003 }, { "epoch": 40.008, "grad_norm": 1.3738662004470825, "learning_rate": 2e-05, "loss": 0.05607092, "step": 20004 }, { "epoch": 40.01, "grad_norm": 1.273634672164917, "learning_rate": 2e-05, "loss": 0.05029005, "step": 20005 }, { "epoch": 40.012, "grad_norm": 1.2355257272720337, "learning_rate": 2e-05, "loss": 0.04626942, "step": 20006 }, { "epoch": 40.014, "grad_norm": 1.1767611503601074, "learning_rate": 2e-05, "loss": 0.04972693, "step": 20007 }, { "epoch": 40.016, "grad_norm": 1.1207237243652344, "learning_rate": 2e-05, "loss": 0.03946103, "step": 20008 }, { "epoch": 40.018, "grad_norm": 2.209026336669922, "learning_rate": 2e-05, "loss": 0.05307207, "step": 20009 }, { "epoch": 40.02, "grad_norm": 1.8616058826446533, "learning_rate": 2e-05, "loss": 0.05713968, "step": 20010 }, { "epoch": 40.022, "grad_norm": 1.272833228111267, "learning_rate": 2e-05, "loss": 0.05387183, "step": 20011 }, { "epoch": 40.024, "grad_norm": 1.3753788471221924, "learning_rate": 2e-05, "loss": 0.04635529, "step": 20012 }, { "epoch": 40.026, "grad_norm": 1.1166011095046997, "learning_rate": 2e-05, "loss": 0.03984597, "step": 20013 }, { "epoch": 40.028, "grad_norm": 1.2804628610610962, "learning_rate": 2e-05, "loss": 0.04099889, "step": 20014 }, { "epoch": 40.03, "grad_norm": 1.6008166074752808, "learning_rate": 2e-05, "loss": 0.04193804, "step": 20015 }, { "epoch": 40.032, "grad_norm": 1.178210735321045, "learning_rate": 2e-05, "loss": 0.05431996, "step": 20016 }, { "epoch": 40.034, "grad_norm": 1.2867320775985718, "learning_rate": 2e-05, "loss": 0.04413334, "step": 20017 }, { "epoch": 40.036, "grad_norm": 1.4618786573410034, "learning_rate": 2e-05, "loss": 0.04305667, "step": 20018 }, { "epoch": 40.038, "grad_norm": 1.2682616710662842, "learning_rate": 2e-05, "loss": 0.04806349, "step": 20019 }, { "epoch": 40.04, "grad_norm": 1.1537147760391235, "learning_rate": 2e-05, "loss": 0.0417571, "step": 20020 }, { "epoch": 40.042, "grad_norm": 0.8992504477500916, "learning_rate": 2e-05, "loss": 0.03431882, "step": 20021 }, { "epoch": 40.044, "grad_norm": 2.1597468852996826, "learning_rate": 2e-05, "loss": 0.05409172, "step": 20022 }, { "epoch": 40.046, "grad_norm": 1.5585830211639404, "learning_rate": 2e-05, "loss": 0.0792776, "step": 20023 }, { "epoch": 40.048, "grad_norm": 1.5551427602767944, "learning_rate": 2e-05, "loss": 0.05392529, "step": 20024 }, { "epoch": 40.05, "grad_norm": 1.1377781629562378, "learning_rate": 2e-05, "loss": 0.04901553, "step": 20025 }, { "epoch": 40.052, "grad_norm": 1.1749012470245361, "learning_rate": 2e-05, "loss": 0.04288197, "step": 20026 }, { "epoch": 40.054, "grad_norm": 1.2844057083129883, "learning_rate": 2e-05, "loss": 0.05152037, "step": 20027 }, { "epoch": 40.056, "grad_norm": 0.8824015855789185, "learning_rate": 2e-05, "loss": 0.02397311, "step": 20028 }, { "epoch": 40.058, "grad_norm": 1.6838172674179077, "learning_rate": 2e-05, "loss": 0.05278652, "step": 20029 }, { "epoch": 40.06, "grad_norm": 1.0910221338272095, "learning_rate": 2e-05, "loss": 0.03923189, "step": 20030 }, { "epoch": 40.062, "grad_norm": 1.179529070854187, "learning_rate": 2e-05, "loss": 0.04087804, "step": 20031 }, { "epoch": 40.064, "grad_norm": 1.462498664855957, "learning_rate": 2e-05, "loss": 0.05278556, "step": 20032 }, { "epoch": 40.066, "grad_norm": 1.1131311655044556, "learning_rate": 2e-05, "loss": 0.04452292, "step": 20033 }, { "epoch": 40.068, "grad_norm": 1.4193816184997559, "learning_rate": 2e-05, "loss": 0.05834501, "step": 20034 }, { "epoch": 40.07, "grad_norm": 1.2489386796951294, "learning_rate": 2e-05, "loss": 0.04075392, "step": 20035 }, { "epoch": 40.072, "grad_norm": 1.0575319528579712, "learning_rate": 2e-05, "loss": 0.0387546, "step": 20036 }, { "epoch": 40.074, "grad_norm": 1.1875722408294678, "learning_rate": 2e-05, "loss": 0.04389498, "step": 20037 }, { "epoch": 40.076, "grad_norm": 1.1869674921035767, "learning_rate": 2e-05, "loss": 0.02951438, "step": 20038 }, { "epoch": 40.078, "grad_norm": 2.2647807598114014, "learning_rate": 2e-05, "loss": 0.06623721, "step": 20039 }, { "epoch": 40.08, "grad_norm": 1.8764697313308716, "learning_rate": 2e-05, "loss": 0.04736568, "step": 20040 }, { "epoch": 40.082, "grad_norm": 1.6881517171859741, "learning_rate": 2e-05, "loss": 0.05365419, "step": 20041 }, { "epoch": 40.084, "grad_norm": 1.8008352518081665, "learning_rate": 2e-05, "loss": 0.06396797, "step": 20042 }, { "epoch": 40.086, "grad_norm": 1.4374672174453735, "learning_rate": 2e-05, "loss": 0.06762437, "step": 20043 }, { "epoch": 40.088, "grad_norm": 1.4115700721740723, "learning_rate": 2e-05, "loss": 0.04840955, "step": 20044 }, { "epoch": 40.09, "grad_norm": 1.0511454343795776, "learning_rate": 2e-05, "loss": 0.03845333, "step": 20045 }, { "epoch": 40.092, "grad_norm": 1.0274492502212524, "learning_rate": 2e-05, "loss": 0.03665432, "step": 20046 }, { "epoch": 40.094, "grad_norm": 2.0367865562438965, "learning_rate": 2e-05, "loss": 0.04981969, "step": 20047 }, { "epoch": 40.096, "grad_norm": 1.2091890573501587, "learning_rate": 2e-05, "loss": 0.0455159, "step": 20048 }, { "epoch": 40.098, "grad_norm": 1.1141090393066406, "learning_rate": 2e-05, "loss": 0.04782473, "step": 20049 }, { "epoch": 40.1, "grad_norm": 1.4072805643081665, "learning_rate": 2e-05, "loss": 0.05027459, "step": 20050 }, { "epoch": 40.102, "grad_norm": 1.0128663778305054, "learning_rate": 2e-05, "loss": 0.03786948, "step": 20051 }, { "epoch": 40.104, "grad_norm": 1.241088628768921, "learning_rate": 2e-05, "loss": 0.04034789, "step": 20052 }, { "epoch": 40.106, "grad_norm": 2.3295323848724365, "learning_rate": 2e-05, "loss": 0.04255938, "step": 20053 }, { "epoch": 40.108, "grad_norm": 1.2355490922927856, "learning_rate": 2e-05, "loss": 0.04925885, "step": 20054 }, { "epoch": 40.11, "grad_norm": 1.342072606086731, "learning_rate": 2e-05, "loss": 0.05003198, "step": 20055 }, { "epoch": 40.112, "grad_norm": 1.1754595041275024, "learning_rate": 2e-05, "loss": 0.04749743, "step": 20056 }, { "epoch": 40.114, "grad_norm": 1.235671043395996, "learning_rate": 2e-05, "loss": 0.05393402, "step": 20057 }, { "epoch": 40.116, "grad_norm": 1.2485733032226562, "learning_rate": 2e-05, "loss": 0.0627479, "step": 20058 }, { "epoch": 40.118, "grad_norm": 1.5328861474990845, "learning_rate": 2e-05, "loss": 0.05113653, "step": 20059 }, { "epoch": 40.12, "grad_norm": 1.499184489250183, "learning_rate": 2e-05, "loss": 0.05487531, "step": 20060 }, { "epoch": 40.122, "grad_norm": 2.340745449066162, "learning_rate": 2e-05, "loss": 0.05198453, "step": 20061 }, { "epoch": 40.124, "grad_norm": 1.106819748878479, "learning_rate": 2e-05, "loss": 0.0406749, "step": 20062 }, { "epoch": 40.126, "grad_norm": 0.9359315037727356, "learning_rate": 2e-05, "loss": 0.02896912, "step": 20063 }, { "epoch": 40.128, "grad_norm": 1.3963433504104614, "learning_rate": 2e-05, "loss": 0.05835143, "step": 20064 }, { "epoch": 40.13, "grad_norm": 1.1196558475494385, "learning_rate": 2e-05, "loss": 0.04942945, "step": 20065 }, { "epoch": 40.132, "grad_norm": 1.1688649654388428, "learning_rate": 2e-05, "loss": 0.05483418, "step": 20066 }, { "epoch": 40.134, "grad_norm": 1.5023531913757324, "learning_rate": 2e-05, "loss": 0.05200912, "step": 20067 }, { "epoch": 40.136, "grad_norm": 1.1763683557510376, "learning_rate": 2e-05, "loss": 0.04295605, "step": 20068 }, { "epoch": 40.138, "grad_norm": 1.2277802228927612, "learning_rate": 2e-05, "loss": 0.04733496, "step": 20069 }, { "epoch": 40.14, "grad_norm": 1.6241992712020874, "learning_rate": 2e-05, "loss": 0.06077222, "step": 20070 }, { "epoch": 40.142, "grad_norm": 2.3605220317840576, "learning_rate": 2e-05, "loss": 0.05581881, "step": 20071 }, { "epoch": 40.144, "grad_norm": 1.1370664834976196, "learning_rate": 2e-05, "loss": 0.04241094, "step": 20072 }, { "epoch": 40.146, "grad_norm": 1.6637158393859863, "learning_rate": 2e-05, "loss": 0.05778241, "step": 20073 }, { "epoch": 40.148, "grad_norm": 1.4280550479888916, "learning_rate": 2e-05, "loss": 0.05228476, "step": 20074 }, { "epoch": 40.15, "grad_norm": 1.5583044290542603, "learning_rate": 2e-05, "loss": 0.05583321, "step": 20075 }, { "epoch": 40.152, "grad_norm": 1.119242548942566, "learning_rate": 2e-05, "loss": 0.04524466, "step": 20076 }, { "epoch": 40.154, "grad_norm": 1.4672080278396606, "learning_rate": 2e-05, "loss": 0.06838915, "step": 20077 }, { "epoch": 40.156, "grad_norm": 2.8078713417053223, "learning_rate": 2e-05, "loss": 0.06085558, "step": 20078 }, { "epoch": 40.158, "grad_norm": 7.261538028717041, "learning_rate": 2e-05, "loss": 0.04814359, "step": 20079 }, { "epoch": 40.16, "grad_norm": 1.254329800605774, "learning_rate": 2e-05, "loss": 0.04063053, "step": 20080 }, { "epoch": 40.162, "grad_norm": 1.3946176767349243, "learning_rate": 2e-05, "loss": 0.04426568, "step": 20081 }, { "epoch": 40.164, "grad_norm": 1.089093565940857, "learning_rate": 2e-05, "loss": 0.04132479, "step": 20082 }, { "epoch": 40.166, "grad_norm": 1.2767221927642822, "learning_rate": 2e-05, "loss": 0.05414563, "step": 20083 }, { "epoch": 40.168, "grad_norm": 1.287773609161377, "learning_rate": 2e-05, "loss": 0.04473702, "step": 20084 }, { "epoch": 40.17, "grad_norm": 0.8289897441864014, "learning_rate": 2e-05, "loss": 0.03129639, "step": 20085 }, { "epoch": 40.172, "grad_norm": 1.047951579093933, "learning_rate": 2e-05, "loss": 0.03783755, "step": 20086 }, { "epoch": 40.174, "grad_norm": 0.9991368651390076, "learning_rate": 2e-05, "loss": 0.03878902, "step": 20087 }, { "epoch": 40.176, "grad_norm": 1.249922275543213, "learning_rate": 2e-05, "loss": 0.04872551, "step": 20088 }, { "epoch": 40.178, "grad_norm": 1.3479992151260376, "learning_rate": 2e-05, "loss": 0.07139233, "step": 20089 }, { "epoch": 40.18, "grad_norm": 2.5222294330596924, "learning_rate": 2e-05, "loss": 0.05283742, "step": 20090 }, { "epoch": 40.182, "grad_norm": 1.0991319417953491, "learning_rate": 2e-05, "loss": 0.05567234, "step": 20091 }, { "epoch": 40.184, "grad_norm": 1.6871635913848877, "learning_rate": 2e-05, "loss": 0.08690213, "step": 20092 }, { "epoch": 40.186, "grad_norm": 1.2134432792663574, "learning_rate": 2e-05, "loss": 0.04679528, "step": 20093 }, { "epoch": 40.188, "grad_norm": 1.456554889678955, "learning_rate": 2e-05, "loss": 0.05808542, "step": 20094 }, { "epoch": 40.19, "grad_norm": 1.1095123291015625, "learning_rate": 2e-05, "loss": 0.04110887, "step": 20095 }, { "epoch": 40.192, "grad_norm": 0.9948191046714783, "learning_rate": 2e-05, "loss": 0.0418423, "step": 20096 }, { "epoch": 40.194, "grad_norm": 1.4417386054992676, "learning_rate": 2e-05, "loss": 0.06203289, "step": 20097 }, { "epoch": 40.196, "grad_norm": 2.3071508407592773, "learning_rate": 2e-05, "loss": 0.04821993, "step": 20098 }, { "epoch": 40.198, "grad_norm": 1.352673888206482, "learning_rate": 2e-05, "loss": 0.03600384, "step": 20099 }, { "epoch": 40.2, "grad_norm": 1.105892300605774, "learning_rate": 2e-05, "loss": 0.03900053, "step": 20100 }, { "epoch": 40.202, "grad_norm": 1.2061972618103027, "learning_rate": 2e-05, "loss": 0.05580106, "step": 20101 }, { "epoch": 40.204, "grad_norm": 1.2827521562576294, "learning_rate": 2e-05, "loss": 0.04791928, "step": 20102 }, { "epoch": 40.206, "grad_norm": 1.2111438512802124, "learning_rate": 2e-05, "loss": 0.04924633, "step": 20103 }, { "epoch": 40.208, "grad_norm": 1.4191051721572876, "learning_rate": 2e-05, "loss": 0.04869965, "step": 20104 }, { "epoch": 40.21, "grad_norm": 1.1339068412780762, "learning_rate": 2e-05, "loss": 0.05062132, "step": 20105 }, { "epoch": 40.212, "grad_norm": 1.2514359951019287, "learning_rate": 2e-05, "loss": 0.04554267, "step": 20106 }, { "epoch": 40.214, "grad_norm": 1.1411757469177246, "learning_rate": 2e-05, "loss": 0.04453161, "step": 20107 }, { "epoch": 40.216, "grad_norm": 1.28267240524292, "learning_rate": 2e-05, "loss": 0.03762868, "step": 20108 }, { "epoch": 40.218, "grad_norm": 1.2932469844818115, "learning_rate": 2e-05, "loss": 0.05375424, "step": 20109 }, { "epoch": 40.22, "grad_norm": 1.5914359092712402, "learning_rate": 2e-05, "loss": 0.05069669, "step": 20110 }, { "epoch": 40.222, "grad_norm": 1.1139858961105347, "learning_rate": 2e-05, "loss": 0.03766536, "step": 20111 }, { "epoch": 40.224, "grad_norm": 1.3693923950195312, "learning_rate": 2e-05, "loss": 0.04846753, "step": 20112 }, { "epoch": 40.226, "grad_norm": 1.1039000749588013, "learning_rate": 2e-05, "loss": 0.035409, "step": 20113 }, { "epoch": 40.228, "grad_norm": 1.1846332550048828, "learning_rate": 2e-05, "loss": 0.04201246, "step": 20114 }, { "epoch": 40.23, "grad_norm": 2.8767950534820557, "learning_rate": 2e-05, "loss": 0.06352147, "step": 20115 }, { "epoch": 40.232, "grad_norm": 1.286635160446167, "learning_rate": 2e-05, "loss": 0.05608144, "step": 20116 }, { "epoch": 40.234, "grad_norm": 1.0336881875991821, "learning_rate": 2e-05, "loss": 0.03256797, "step": 20117 }, { "epoch": 40.236, "grad_norm": 8.691652297973633, "learning_rate": 2e-05, "loss": 0.0322794, "step": 20118 }, { "epoch": 40.238, "grad_norm": 2.27980375289917, "learning_rate": 2e-05, "loss": 0.03699232, "step": 20119 }, { "epoch": 40.24, "grad_norm": 1.9396880865097046, "learning_rate": 2e-05, "loss": 0.06942445, "step": 20120 }, { "epoch": 40.242, "grad_norm": 1.0819038152694702, "learning_rate": 2e-05, "loss": 0.04012003, "step": 20121 }, { "epoch": 40.244, "grad_norm": 1.3157330751419067, "learning_rate": 2e-05, "loss": 0.05315004, "step": 20122 }, { "epoch": 40.246, "grad_norm": 1.216844081878662, "learning_rate": 2e-05, "loss": 0.05938418, "step": 20123 }, { "epoch": 40.248, "grad_norm": 1.3893853425979614, "learning_rate": 2e-05, "loss": 0.04685809, "step": 20124 }, { "epoch": 40.25, "grad_norm": 1.0636508464813232, "learning_rate": 2e-05, "loss": 0.03838153, "step": 20125 }, { "epoch": 40.252, "grad_norm": 1.0984792709350586, "learning_rate": 2e-05, "loss": 0.05215897, "step": 20126 }, { "epoch": 40.254, "grad_norm": 1.0352123975753784, "learning_rate": 2e-05, "loss": 0.04288472, "step": 20127 }, { "epoch": 40.256, "grad_norm": 1.0150216817855835, "learning_rate": 2e-05, "loss": 0.03558073, "step": 20128 }, { "epoch": 40.258, "grad_norm": 1.1401340961456299, "learning_rate": 2e-05, "loss": 0.03874261, "step": 20129 }, { "epoch": 40.26, "grad_norm": 1.2564568519592285, "learning_rate": 2e-05, "loss": 0.05399378, "step": 20130 }, { "epoch": 40.262, "grad_norm": 1.2055622339248657, "learning_rate": 2e-05, "loss": 0.05442786, "step": 20131 }, { "epoch": 40.264, "grad_norm": 1.2611266374588013, "learning_rate": 2e-05, "loss": 0.04908681, "step": 20132 }, { "epoch": 40.266, "grad_norm": 1.8219330310821533, "learning_rate": 2e-05, "loss": 0.05871871, "step": 20133 }, { "epoch": 40.268, "grad_norm": 1.118198275566101, "learning_rate": 2e-05, "loss": 0.04319108, "step": 20134 }, { "epoch": 40.27, "grad_norm": 1.0127724409103394, "learning_rate": 2e-05, "loss": 0.03908618, "step": 20135 }, { "epoch": 40.272, "grad_norm": 1.1128853559494019, "learning_rate": 2e-05, "loss": 0.04767665, "step": 20136 }, { "epoch": 40.274, "grad_norm": 1.198813557624817, "learning_rate": 2e-05, "loss": 0.04259463, "step": 20137 }, { "epoch": 40.276, "grad_norm": 1.0490918159484863, "learning_rate": 2e-05, "loss": 0.02958579, "step": 20138 }, { "epoch": 40.278, "grad_norm": 1.3297768831253052, "learning_rate": 2e-05, "loss": 0.05636571, "step": 20139 }, { "epoch": 40.28, "grad_norm": 1.2086269855499268, "learning_rate": 2e-05, "loss": 0.04821461, "step": 20140 }, { "epoch": 40.282, "grad_norm": 2.00396728515625, "learning_rate": 2e-05, "loss": 0.03132957, "step": 20141 }, { "epoch": 40.284, "grad_norm": 1.4229713678359985, "learning_rate": 2e-05, "loss": 0.04370468, "step": 20142 }, { "epoch": 40.286, "grad_norm": 1.6150267124176025, "learning_rate": 2e-05, "loss": 0.05704456, "step": 20143 }, { "epoch": 40.288, "grad_norm": 0.9842348098754883, "learning_rate": 2e-05, "loss": 0.02401066, "step": 20144 }, { "epoch": 40.29, "grad_norm": 1.246802806854248, "learning_rate": 2e-05, "loss": 0.03580172, "step": 20145 }, { "epoch": 40.292, "grad_norm": 1.4223207235336304, "learning_rate": 2e-05, "loss": 0.04059513, "step": 20146 }, { "epoch": 40.294, "grad_norm": 1.252277135848999, "learning_rate": 2e-05, "loss": 0.05555695, "step": 20147 }, { "epoch": 40.296, "grad_norm": 1.1984466314315796, "learning_rate": 2e-05, "loss": 0.04715933, "step": 20148 }, { "epoch": 40.298, "grad_norm": 1.1603848934173584, "learning_rate": 2e-05, "loss": 0.04525081, "step": 20149 }, { "epoch": 40.3, "grad_norm": 1.5191564559936523, "learning_rate": 2e-05, "loss": 0.05356734, "step": 20150 }, { "epoch": 40.302, "grad_norm": 2.3878793716430664, "learning_rate": 2e-05, "loss": 0.04827441, "step": 20151 }, { "epoch": 40.304, "grad_norm": 1.3220070600509644, "learning_rate": 2e-05, "loss": 0.05512287, "step": 20152 }, { "epoch": 40.306, "grad_norm": 1.7690134048461914, "learning_rate": 2e-05, "loss": 0.07210412, "step": 20153 }, { "epoch": 40.308, "grad_norm": 3.268732786178589, "learning_rate": 2e-05, "loss": 0.05046576, "step": 20154 }, { "epoch": 40.31, "grad_norm": 1.3176329135894775, "learning_rate": 2e-05, "loss": 0.04881255, "step": 20155 }, { "epoch": 40.312, "grad_norm": 1.2548283338546753, "learning_rate": 2e-05, "loss": 0.04261008, "step": 20156 }, { "epoch": 40.314, "grad_norm": 1.4517444372177124, "learning_rate": 2e-05, "loss": 0.04030664, "step": 20157 }, { "epoch": 40.316, "grad_norm": 1.297654628753662, "learning_rate": 2e-05, "loss": 0.04864812, "step": 20158 }, { "epoch": 40.318, "grad_norm": 1.3605537414550781, "learning_rate": 2e-05, "loss": 0.05398957, "step": 20159 }, { "epoch": 40.32, "grad_norm": 1.0863500833511353, "learning_rate": 2e-05, "loss": 0.04536635, "step": 20160 }, { "epoch": 40.322, "grad_norm": 1.3427700996398926, "learning_rate": 2e-05, "loss": 0.04969367, "step": 20161 }, { "epoch": 40.324, "grad_norm": 1.6353856325149536, "learning_rate": 2e-05, "loss": 0.04320132, "step": 20162 }, { "epoch": 40.326, "grad_norm": 1.2196747064590454, "learning_rate": 2e-05, "loss": 0.03800016, "step": 20163 }, { "epoch": 40.328, "grad_norm": 1.7924127578735352, "learning_rate": 2e-05, "loss": 0.07237592, "step": 20164 }, { "epoch": 40.33, "grad_norm": 1.7914211750030518, "learning_rate": 2e-05, "loss": 0.05794446, "step": 20165 }, { "epoch": 40.332, "grad_norm": 1.2573678493499756, "learning_rate": 2e-05, "loss": 0.04247341, "step": 20166 }, { "epoch": 40.334, "grad_norm": 1.1113066673278809, "learning_rate": 2e-05, "loss": 0.05276919, "step": 20167 }, { "epoch": 40.336, "grad_norm": 1.5416284799575806, "learning_rate": 2e-05, "loss": 0.05891626, "step": 20168 }, { "epoch": 40.338, "grad_norm": 1.1009063720703125, "learning_rate": 2e-05, "loss": 0.0362423, "step": 20169 }, { "epoch": 40.34, "grad_norm": 1.2229732275009155, "learning_rate": 2e-05, "loss": 0.04161092, "step": 20170 }, { "epoch": 40.342, "grad_norm": 1.4811763763427734, "learning_rate": 2e-05, "loss": 0.05466798, "step": 20171 }, { "epoch": 40.344, "grad_norm": 1.1758745908737183, "learning_rate": 2e-05, "loss": 0.04076952, "step": 20172 }, { "epoch": 40.346, "grad_norm": 1.35615873336792, "learning_rate": 2e-05, "loss": 0.0481509, "step": 20173 }, { "epoch": 40.348, "grad_norm": 1.1503114700317383, "learning_rate": 2e-05, "loss": 0.03523769, "step": 20174 }, { "epoch": 40.35, "grad_norm": 1.1574171781539917, "learning_rate": 2e-05, "loss": 0.03359126, "step": 20175 }, { "epoch": 40.352, "grad_norm": 1.2408151626586914, "learning_rate": 2e-05, "loss": 0.05215861, "step": 20176 }, { "epoch": 40.354, "grad_norm": 1.2449312210083008, "learning_rate": 2e-05, "loss": 0.05467416, "step": 20177 }, { "epoch": 40.356, "grad_norm": 0.9955729246139526, "learning_rate": 2e-05, "loss": 0.03369155, "step": 20178 }, { "epoch": 40.358, "grad_norm": 1.5078949928283691, "learning_rate": 2e-05, "loss": 0.0526335, "step": 20179 }, { "epoch": 40.36, "grad_norm": 1.6468613147735596, "learning_rate": 2e-05, "loss": 0.0292394, "step": 20180 }, { "epoch": 40.362, "grad_norm": 1.2313929796218872, "learning_rate": 2e-05, "loss": 0.05515527, "step": 20181 }, { "epoch": 40.364, "grad_norm": 1.1853318214416504, "learning_rate": 2e-05, "loss": 0.04114341, "step": 20182 }, { "epoch": 40.366, "grad_norm": 2.5944883823394775, "learning_rate": 2e-05, "loss": 0.04679396, "step": 20183 }, { "epoch": 40.368, "grad_norm": 1.1291593313217163, "learning_rate": 2e-05, "loss": 0.03800917, "step": 20184 }, { "epoch": 40.37, "grad_norm": 1.2536524534225464, "learning_rate": 2e-05, "loss": 0.03788491, "step": 20185 }, { "epoch": 40.372, "grad_norm": 1.110937237739563, "learning_rate": 2e-05, "loss": 0.03799309, "step": 20186 }, { "epoch": 40.374, "grad_norm": 1.246289610862732, "learning_rate": 2e-05, "loss": 0.05560606, "step": 20187 }, { "epoch": 40.376, "grad_norm": 1.2249597311019897, "learning_rate": 2e-05, "loss": 0.05621621, "step": 20188 }, { "epoch": 40.378, "grad_norm": 1.4832850694656372, "learning_rate": 2e-05, "loss": 0.05166864, "step": 20189 }, { "epoch": 40.38, "grad_norm": 1.8920128345489502, "learning_rate": 2e-05, "loss": 0.06390877, "step": 20190 }, { "epoch": 40.382, "grad_norm": 1.3039014339447021, "learning_rate": 2e-05, "loss": 0.04282786, "step": 20191 }, { "epoch": 40.384, "grad_norm": 1.2546817064285278, "learning_rate": 2e-05, "loss": 0.05530909, "step": 20192 }, { "epoch": 40.386, "grad_norm": 1.0137908458709717, "learning_rate": 2e-05, "loss": 0.03802532, "step": 20193 }, { "epoch": 40.388, "grad_norm": 3.2582902908325195, "learning_rate": 2e-05, "loss": 0.04777801, "step": 20194 }, { "epoch": 40.39, "grad_norm": 1.6945880651474, "learning_rate": 2e-05, "loss": 0.06353603, "step": 20195 }, { "epoch": 40.392, "grad_norm": 1.861193299293518, "learning_rate": 2e-05, "loss": 0.0449325, "step": 20196 }, { "epoch": 40.394, "grad_norm": 1.4629795551300049, "learning_rate": 2e-05, "loss": 0.06012824, "step": 20197 }, { "epoch": 40.396, "grad_norm": 1.067794919013977, "learning_rate": 2e-05, "loss": 0.04181834, "step": 20198 }, { "epoch": 40.398, "grad_norm": 1.1766245365142822, "learning_rate": 2e-05, "loss": 0.04275657, "step": 20199 }, { "epoch": 40.4, "grad_norm": 3.7547171115875244, "learning_rate": 2e-05, "loss": 0.05930864, "step": 20200 }, { "epoch": 40.402, "grad_norm": 1.0909967422485352, "learning_rate": 2e-05, "loss": 0.04474407, "step": 20201 }, { "epoch": 40.404, "grad_norm": 1.1844147443771362, "learning_rate": 2e-05, "loss": 0.04869113, "step": 20202 }, { "epoch": 40.406, "grad_norm": 0.9126821160316467, "learning_rate": 2e-05, "loss": 0.03085716, "step": 20203 }, { "epoch": 40.408, "grad_norm": 1.0406267642974854, "learning_rate": 2e-05, "loss": 0.03293871, "step": 20204 }, { "epoch": 40.41, "grad_norm": 0.9543370008468628, "learning_rate": 2e-05, "loss": 0.03391955, "step": 20205 }, { "epoch": 40.412, "grad_norm": 0.8653976917266846, "learning_rate": 2e-05, "loss": 0.02381743, "step": 20206 }, { "epoch": 40.414, "grad_norm": 1.078957200050354, "learning_rate": 2e-05, "loss": 0.04199693, "step": 20207 }, { "epoch": 40.416, "grad_norm": 4.299551963806152, "learning_rate": 2e-05, "loss": 0.05353047, "step": 20208 }, { "epoch": 40.418, "grad_norm": 1.4246419668197632, "learning_rate": 2e-05, "loss": 0.04532846, "step": 20209 }, { "epoch": 40.42, "grad_norm": 1.2102235555648804, "learning_rate": 2e-05, "loss": 0.05503858, "step": 20210 }, { "epoch": 40.422, "grad_norm": 1.1240041255950928, "learning_rate": 2e-05, "loss": 0.03953891, "step": 20211 }, { "epoch": 40.424, "grad_norm": 1.3952480554580688, "learning_rate": 2e-05, "loss": 0.05917449, "step": 20212 }, { "epoch": 40.426, "grad_norm": 1.2208077907562256, "learning_rate": 2e-05, "loss": 0.03653198, "step": 20213 }, { "epoch": 40.428, "grad_norm": 1.1388483047485352, "learning_rate": 2e-05, "loss": 0.04844316, "step": 20214 }, { "epoch": 40.43, "grad_norm": 1.294251561164856, "learning_rate": 2e-05, "loss": 0.04915586, "step": 20215 }, { "epoch": 40.432, "grad_norm": 2.817385196685791, "learning_rate": 2e-05, "loss": 0.0448599, "step": 20216 }, { "epoch": 40.434, "grad_norm": 1.1939760446548462, "learning_rate": 2e-05, "loss": 0.04789099, "step": 20217 }, { "epoch": 40.436, "grad_norm": 1.2851756811141968, "learning_rate": 2e-05, "loss": 0.05403595, "step": 20218 }, { "epoch": 40.438, "grad_norm": 1.0391663312911987, "learning_rate": 2e-05, "loss": 0.04843492, "step": 20219 }, { "epoch": 40.44, "grad_norm": 1.0590914487838745, "learning_rate": 2e-05, "loss": 0.04758727, "step": 20220 }, { "epoch": 40.442, "grad_norm": 1.6666922569274902, "learning_rate": 2e-05, "loss": 0.05023216, "step": 20221 }, { "epoch": 40.444, "grad_norm": 1.1484979391098022, "learning_rate": 2e-05, "loss": 0.0386183, "step": 20222 }, { "epoch": 40.446, "grad_norm": 2.4382147789001465, "learning_rate": 2e-05, "loss": 0.06324873, "step": 20223 }, { "epoch": 40.448, "grad_norm": 1.1172566413879395, "learning_rate": 2e-05, "loss": 0.04193846, "step": 20224 }, { "epoch": 40.45, "grad_norm": 4.376603603363037, "learning_rate": 2e-05, "loss": 0.05779196, "step": 20225 }, { "epoch": 40.452, "grad_norm": 1.0015493631362915, "learning_rate": 2e-05, "loss": 0.02681638, "step": 20226 }, { "epoch": 40.454, "grad_norm": 1.31887948513031, "learning_rate": 2e-05, "loss": 0.05519604, "step": 20227 }, { "epoch": 40.456, "grad_norm": 1.7096487283706665, "learning_rate": 2e-05, "loss": 0.07397369, "step": 20228 }, { "epoch": 40.458, "grad_norm": 1.3221633434295654, "learning_rate": 2e-05, "loss": 0.05340026, "step": 20229 }, { "epoch": 40.46, "grad_norm": 1.0366623401641846, "learning_rate": 2e-05, "loss": 0.03435313, "step": 20230 }, { "epoch": 40.462, "grad_norm": 1.385132908821106, "learning_rate": 2e-05, "loss": 0.06896439, "step": 20231 }, { "epoch": 40.464, "grad_norm": 2.499610662460327, "learning_rate": 2e-05, "loss": 0.06424156, "step": 20232 }, { "epoch": 40.466, "grad_norm": 1.1677004098892212, "learning_rate": 2e-05, "loss": 0.03992238, "step": 20233 }, { "epoch": 40.468, "grad_norm": 3.2381439208984375, "learning_rate": 2e-05, "loss": 0.05073539, "step": 20234 }, { "epoch": 40.47, "grad_norm": 1.6042966842651367, "learning_rate": 2e-05, "loss": 0.03290394, "step": 20235 }, { "epoch": 40.472, "grad_norm": 1.2547248601913452, "learning_rate": 2e-05, "loss": 0.04911025, "step": 20236 }, { "epoch": 40.474, "grad_norm": 1.3358653783798218, "learning_rate": 2e-05, "loss": 0.0705348, "step": 20237 }, { "epoch": 40.476, "grad_norm": 1.3888798952102661, "learning_rate": 2e-05, "loss": 0.04967166, "step": 20238 }, { "epoch": 40.478, "grad_norm": 1.2543953657150269, "learning_rate": 2e-05, "loss": 0.04666407, "step": 20239 }, { "epoch": 40.48, "grad_norm": 1.0593624114990234, "learning_rate": 2e-05, "loss": 0.03600551, "step": 20240 }, { "epoch": 40.482, "grad_norm": 1.375593900680542, "learning_rate": 2e-05, "loss": 0.06600478, "step": 20241 }, { "epoch": 40.484, "grad_norm": 1.3863525390625, "learning_rate": 2e-05, "loss": 0.03970123, "step": 20242 }, { "epoch": 40.486, "grad_norm": 1.4013750553131104, "learning_rate": 2e-05, "loss": 0.05529185, "step": 20243 }, { "epoch": 40.488, "grad_norm": 1.3088881969451904, "learning_rate": 2e-05, "loss": 0.05219998, "step": 20244 }, { "epoch": 40.49, "grad_norm": 1.4817276000976562, "learning_rate": 2e-05, "loss": 0.05724048, "step": 20245 }, { "epoch": 40.492, "grad_norm": 1.4716103076934814, "learning_rate": 2e-05, "loss": 0.05594869, "step": 20246 }, { "epoch": 40.494, "grad_norm": 1.3419957160949707, "learning_rate": 2e-05, "loss": 0.06096876, "step": 20247 }, { "epoch": 40.496, "grad_norm": 0.9824522137641907, "learning_rate": 2e-05, "loss": 0.03611157, "step": 20248 }, { "epoch": 40.498, "grad_norm": 1.4187341928482056, "learning_rate": 2e-05, "loss": 0.05075159, "step": 20249 }, { "epoch": 40.5, "grad_norm": 1.4452311992645264, "learning_rate": 2e-05, "loss": 0.04892913, "step": 20250 }, { "epoch": 40.502, "grad_norm": 1.3794788122177124, "learning_rate": 2e-05, "loss": 0.04053912, "step": 20251 }, { "epoch": 40.504, "grad_norm": 1.2607942819595337, "learning_rate": 2e-05, "loss": 0.04043246, "step": 20252 }, { "epoch": 40.506, "grad_norm": 1.2887449264526367, "learning_rate": 2e-05, "loss": 0.04719897, "step": 20253 }, { "epoch": 40.508, "grad_norm": 1.3014177083969116, "learning_rate": 2e-05, "loss": 0.05486921, "step": 20254 }, { "epoch": 40.51, "grad_norm": 1.0283253192901611, "learning_rate": 2e-05, "loss": 0.03235307, "step": 20255 }, { "epoch": 40.512, "grad_norm": 2.2812840938568115, "learning_rate": 2e-05, "loss": 0.05784634, "step": 20256 }, { "epoch": 40.514, "grad_norm": 1.0626236200332642, "learning_rate": 2e-05, "loss": 0.0345856, "step": 20257 }, { "epoch": 40.516, "grad_norm": 1.3558917045593262, "learning_rate": 2e-05, "loss": 0.05744244, "step": 20258 }, { "epoch": 40.518, "grad_norm": 1.1966677904129028, "learning_rate": 2e-05, "loss": 0.04859721, "step": 20259 }, { "epoch": 40.52, "grad_norm": 2.496509552001953, "learning_rate": 2e-05, "loss": 0.05492661, "step": 20260 }, { "epoch": 40.522, "grad_norm": 1.3958922624588013, "learning_rate": 2e-05, "loss": 0.04244683, "step": 20261 }, { "epoch": 40.524, "grad_norm": 1.9222729206085205, "learning_rate": 2e-05, "loss": 0.04568821, "step": 20262 }, { "epoch": 40.526, "grad_norm": 1.3235441446304321, "learning_rate": 2e-05, "loss": 0.04356699, "step": 20263 }, { "epoch": 40.528, "grad_norm": 1.4533641338348389, "learning_rate": 2e-05, "loss": 0.052623, "step": 20264 }, { "epoch": 40.53, "grad_norm": 1.1498498916625977, "learning_rate": 2e-05, "loss": 0.04153707, "step": 20265 }, { "epoch": 40.532, "grad_norm": 1.6196757555007935, "learning_rate": 2e-05, "loss": 0.05171479, "step": 20266 }, { "epoch": 40.534, "grad_norm": 1.4504308700561523, "learning_rate": 2e-05, "loss": 0.04189808, "step": 20267 }, { "epoch": 40.536, "grad_norm": 2.833212375640869, "learning_rate": 2e-05, "loss": 0.05008708, "step": 20268 }, { "epoch": 40.538, "grad_norm": 1.1308425664901733, "learning_rate": 2e-05, "loss": 0.03203578, "step": 20269 }, { "epoch": 40.54, "grad_norm": 1.2283337116241455, "learning_rate": 2e-05, "loss": 0.05175584, "step": 20270 }, { "epoch": 40.542, "grad_norm": 1.1894733905792236, "learning_rate": 2e-05, "loss": 0.04817605, "step": 20271 }, { "epoch": 40.544, "grad_norm": 1.2060327529907227, "learning_rate": 2e-05, "loss": 0.04768998, "step": 20272 }, { "epoch": 40.546, "grad_norm": 1.2168883085250854, "learning_rate": 2e-05, "loss": 0.05099332, "step": 20273 }, { "epoch": 40.548, "grad_norm": 1.1243617534637451, "learning_rate": 2e-05, "loss": 0.03701821, "step": 20274 }, { "epoch": 40.55, "grad_norm": 1.3077539205551147, "learning_rate": 2e-05, "loss": 0.05925671, "step": 20275 }, { "epoch": 40.552, "grad_norm": 1.9214268922805786, "learning_rate": 2e-05, "loss": 0.05207685, "step": 20276 }, { "epoch": 40.554, "grad_norm": 1.2245525121688843, "learning_rate": 2e-05, "loss": 0.03042131, "step": 20277 }, { "epoch": 40.556, "grad_norm": 2.7672958374023438, "learning_rate": 2e-05, "loss": 0.06857204, "step": 20278 }, { "epoch": 40.558, "grad_norm": 1.2869014739990234, "learning_rate": 2e-05, "loss": 0.04283034, "step": 20279 }, { "epoch": 40.56, "grad_norm": 1.1815762519836426, "learning_rate": 2e-05, "loss": 0.04052944, "step": 20280 }, { "epoch": 40.562, "grad_norm": 2.087244749069214, "learning_rate": 2e-05, "loss": 0.06331733, "step": 20281 }, { "epoch": 40.564, "grad_norm": 1.3826656341552734, "learning_rate": 2e-05, "loss": 0.04571678, "step": 20282 }, { "epoch": 40.566, "grad_norm": 3.096768617630005, "learning_rate": 2e-05, "loss": 0.05192983, "step": 20283 }, { "epoch": 40.568, "grad_norm": 1.186075210571289, "learning_rate": 2e-05, "loss": 0.05161081, "step": 20284 }, { "epoch": 40.57, "grad_norm": 1.5007069110870361, "learning_rate": 2e-05, "loss": 0.0494061, "step": 20285 }, { "epoch": 40.572, "grad_norm": 1.4686963558197021, "learning_rate": 2e-05, "loss": 0.04155823, "step": 20286 }, { "epoch": 40.574, "grad_norm": 1.156620979309082, "learning_rate": 2e-05, "loss": 0.04370104, "step": 20287 }, { "epoch": 40.576, "grad_norm": 0.9128440022468567, "learning_rate": 2e-05, "loss": 0.03572758, "step": 20288 }, { "epoch": 40.578, "grad_norm": 2.242276668548584, "learning_rate": 2e-05, "loss": 0.0504857, "step": 20289 }, { "epoch": 40.58, "grad_norm": 3.176802396774292, "learning_rate": 2e-05, "loss": 0.05922861, "step": 20290 }, { "epoch": 40.582, "grad_norm": 1.0698144435882568, "learning_rate": 2e-05, "loss": 0.03699166, "step": 20291 }, { "epoch": 40.584, "grad_norm": 1.1629245281219482, "learning_rate": 2e-05, "loss": 0.04336737, "step": 20292 }, { "epoch": 40.586, "grad_norm": 1.35090172290802, "learning_rate": 2e-05, "loss": 0.05229773, "step": 20293 }, { "epoch": 40.588, "grad_norm": 1.5074291229248047, "learning_rate": 2e-05, "loss": 0.03177891, "step": 20294 }, { "epoch": 40.59, "grad_norm": 1.5969630479812622, "learning_rate": 2e-05, "loss": 0.02993333, "step": 20295 }, { "epoch": 40.592, "grad_norm": 1.2182235717773438, "learning_rate": 2e-05, "loss": 0.05838227, "step": 20296 }, { "epoch": 40.594, "grad_norm": 1.0844266414642334, "learning_rate": 2e-05, "loss": 0.04683068, "step": 20297 }, { "epoch": 40.596, "grad_norm": 1.113297939300537, "learning_rate": 2e-05, "loss": 0.04225264, "step": 20298 }, { "epoch": 40.598, "grad_norm": 1.3273909091949463, "learning_rate": 2e-05, "loss": 0.05528468, "step": 20299 }, { "epoch": 40.6, "grad_norm": 1.0608805418014526, "learning_rate": 2e-05, "loss": 0.04179974, "step": 20300 }, { "epoch": 40.602, "grad_norm": 2.0102715492248535, "learning_rate": 2e-05, "loss": 0.05286686, "step": 20301 }, { "epoch": 40.604, "grad_norm": 1.4773969650268555, "learning_rate": 2e-05, "loss": 0.07245906, "step": 20302 }, { "epoch": 40.606, "grad_norm": 1.4324716329574585, "learning_rate": 2e-05, "loss": 0.0385082, "step": 20303 }, { "epoch": 40.608, "grad_norm": 1.1375013589859009, "learning_rate": 2e-05, "loss": 0.04455408, "step": 20304 }, { "epoch": 40.61, "grad_norm": 1.8112099170684814, "learning_rate": 2e-05, "loss": 0.04102049, "step": 20305 }, { "epoch": 40.612, "grad_norm": 1.1016380786895752, "learning_rate": 2e-05, "loss": 0.04536098, "step": 20306 }, { "epoch": 40.614, "grad_norm": 1.2239372730255127, "learning_rate": 2e-05, "loss": 0.03934653, "step": 20307 }, { "epoch": 40.616, "grad_norm": 1.4652773141860962, "learning_rate": 2e-05, "loss": 0.04647703, "step": 20308 }, { "epoch": 40.618, "grad_norm": 1.3046026229858398, "learning_rate": 2e-05, "loss": 0.04674635, "step": 20309 }, { "epoch": 40.62, "grad_norm": 1.244250774383545, "learning_rate": 2e-05, "loss": 0.04536857, "step": 20310 }, { "epoch": 40.622, "grad_norm": 1.1843619346618652, "learning_rate": 2e-05, "loss": 0.05122893, "step": 20311 }, { "epoch": 40.624, "grad_norm": 1.3475395441055298, "learning_rate": 2e-05, "loss": 0.05014464, "step": 20312 }, { "epoch": 40.626, "grad_norm": 3.323194742202759, "learning_rate": 2e-05, "loss": 0.05366961, "step": 20313 }, { "epoch": 40.628, "grad_norm": 1.2654601335525513, "learning_rate": 2e-05, "loss": 0.03832448, "step": 20314 }, { "epoch": 40.63, "grad_norm": 1.2079969644546509, "learning_rate": 2e-05, "loss": 0.03466445, "step": 20315 }, { "epoch": 40.632, "grad_norm": 1.2388030290603638, "learning_rate": 2e-05, "loss": 0.05009665, "step": 20316 }, { "epoch": 40.634, "grad_norm": 1.3457781076431274, "learning_rate": 2e-05, "loss": 0.06065813, "step": 20317 }, { "epoch": 40.636, "grad_norm": 1.2236225605010986, "learning_rate": 2e-05, "loss": 0.05330027, "step": 20318 }, { "epoch": 40.638, "grad_norm": 1.1197084188461304, "learning_rate": 2e-05, "loss": 0.04628935, "step": 20319 }, { "epoch": 40.64, "grad_norm": 2.9297568798065186, "learning_rate": 2e-05, "loss": 0.06352062, "step": 20320 }, { "epoch": 40.642, "grad_norm": 1.181127667427063, "learning_rate": 2e-05, "loss": 0.04264445, "step": 20321 }, { "epoch": 40.644, "grad_norm": 3.967763662338257, "learning_rate": 2e-05, "loss": 0.06111223, "step": 20322 }, { "epoch": 40.646, "grad_norm": 1.2990795373916626, "learning_rate": 2e-05, "loss": 0.03806433, "step": 20323 }, { "epoch": 40.648, "grad_norm": 1.437103271484375, "learning_rate": 2e-05, "loss": 0.05348209, "step": 20324 }, { "epoch": 40.65, "grad_norm": 1.2915222644805908, "learning_rate": 2e-05, "loss": 0.04917194, "step": 20325 }, { "epoch": 40.652, "grad_norm": 1.3285936117172241, "learning_rate": 2e-05, "loss": 0.05632872, "step": 20326 }, { "epoch": 40.654, "grad_norm": 1.1885918378829956, "learning_rate": 2e-05, "loss": 0.04639854, "step": 20327 }, { "epoch": 40.656, "grad_norm": 1.2425380945205688, "learning_rate": 2e-05, "loss": 0.04708571, "step": 20328 }, { "epoch": 40.658, "grad_norm": 1.2747244834899902, "learning_rate": 2e-05, "loss": 0.05863928, "step": 20329 }, { "epoch": 40.66, "grad_norm": 1.2093666791915894, "learning_rate": 2e-05, "loss": 0.04224218, "step": 20330 }, { "epoch": 40.662, "grad_norm": 1.0434030294418335, "learning_rate": 2e-05, "loss": 0.0403709, "step": 20331 }, { "epoch": 40.664, "grad_norm": 1.101930856704712, "learning_rate": 2e-05, "loss": 0.04556001, "step": 20332 }, { "epoch": 40.666, "grad_norm": 1.3830711841583252, "learning_rate": 2e-05, "loss": 0.0546868, "step": 20333 }, { "epoch": 40.668, "grad_norm": 1.0567309856414795, "learning_rate": 2e-05, "loss": 0.04143964, "step": 20334 }, { "epoch": 40.67, "grad_norm": 1.3042546510696411, "learning_rate": 2e-05, "loss": 0.04565054, "step": 20335 }, { "epoch": 40.672, "grad_norm": 1.3436415195465088, "learning_rate": 2e-05, "loss": 0.06098578, "step": 20336 }, { "epoch": 40.674, "grad_norm": 1.1577244997024536, "learning_rate": 2e-05, "loss": 0.04822986, "step": 20337 }, { "epoch": 40.676, "grad_norm": 1.5855461359024048, "learning_rate": 2e-05, "loss": 0.0444434, "step": 20338 }, { "epoch": 40.678, "grad_norm": 1.7393256425857544, "learning_rate": 2e-05, "loss": 0.0486975, "step": 20339 }, { "epoch": 40.68, "grad_norm": 3.428238868713379, "learning_rate": 2e-05, "loss": 0.04472708, "step": 20340 }, { "epoch": 40.682, "grad_norm": 1.6236741542816162, "learning_rate": 2e-05, "loss": 0.05305525, "step": 20341 }, { "epoch": 40.684, "grad_norm": 1.2626410722732544, "learning_rate": 2e-05, "loss": 0.05415434, "step": 20342 }, { "epoch": 40.686, "grad_norm": 1.0027401447296143, "learning_rate": 2e-05, "loss": 0.03447507, "step": 20343 }, { "epoch": 40.688, "grad_norm": 1.2402012348175049, "learning_rate": 2e-05, "loss": 0.04778074, "step": 20344 }, { "epoch": 40.69, "grad_norm": 1.050713300704956, "learning_rate": 2e-05, "loss": 0.03177363, "step": 20345 }, { "epoch": 40.692, "grad_norm": 1.2489185333251953, "learning_rate": 2e-05, "loss": 0.04607525, "step": 20346 }, { "epoch": 40.694, "grad_norm": 1.1416654586791992, "learning_rate": 2e-05, "loss": 0.04840235, "step": 20347 }, { "epoch": 40.696, "grad_norm": 1.1122640371322632, "learning_rate": 2e-05, "loss": 0.03137276, "step": 20348 }, { "epoch": 40.698, "grad_norm": 1.4408119916915894, "learning_rate": 2e-05, "loss": 0.05424093, "step": 20349 }, { "epoch": 40.7, "grad_norm": 0.9952658414840698, "learning_rate": 2e-05, "loss": 0.02818977, "step": 20350 }, { "epoch": 40.702, "grad_norm": 1.1479781866073608, "learning_rate": 2e-05, "loss": 0.03366838, "step": 20351 }, { "epoch": 40.704, "grad_norm": 1.1322118043899536, "learning_rate": 2e-05, "loss": 0.04626714, "step": 20352 }, { "epoch": 40.706, "grad_norm": 1.3414576053619385, "learning_rate": 2e-05, "loss": 0.04931616, "step": 20353 }, { "epoch": 40.708, "grad_norm": 1.433391809463501, "learning_rate": 2e-05, "loss": 0.04131094, "step": 20354 }, { "epoch": 40.71, "grad_norm": 1.145910382270813, "learning_rate": 2e-05, "loss": 0.04541127, "step": 20355 }, { "epoch": 40.712, "grad_norm": 1.17069411277771, "learning_rate": 2e-05, "loss": 0.04991463, "step": 20356 }, { "epoch": 40.714, "grad_norm": 1.102471113204956, "learning_rate": 2e-05, "loss": 0.04218239, "step": 20357 }, { "epoch": 40.716, "grad_norm": 1.172473430633545, "learning_rate": 2e-05, "loss": 0.0438892, "step": 20358 }, { "epoch": 40.718, "grad_norm": 1.8067024946212769, "learning_rate": 2e-05, "loss": 0.05643699, "step": 20359 }, { "epoch": 40.72, "grad_norm": 1.3518229722976685, "learning_rate": 2e-05, "loss": 0.0555013, "step": 20360 }, { "epoch": 40.722, "grad_norm": 1.3783165216445923, "learning_rate": 2e-05, "loss": 0.05945724, "step": 20361 }, { "epoch": 40.724, "grad_norm": 1.1957453489303589, "learning_rate": 2e-05, "loss": 0.05680922, "step": 20362 }, { "epoch": 40.726, "grad_norm": 1.1252014636993408, "learning_rate": 2e-05, "loss": 0.05653615, "step": 20363 }, { "epoch": 40.728, "grad_norm": 1.2244309186935425, "learning_rate": 2e-05, "loss": 0.04203859, "step": 20364 }, { "epoch": 40.73, "grad_norm": 5.841495037078857, "learning_rate": 2e-05, "loss": 0.05102485, "step": 20365 }, { "epoch": 40.732, "grad_norm": 2.0462138652801514, "learning_rate": 2e-05, "loss": 0.05800704, "step": 20366 }, { "epoch": 40.734, "grad_norm": 1.3569488525390625, "learning_rate": 2e-05, "loss": 0.04887813, "step": 20367 }, { "epoch": 40.736, "grad_norm": 0.9353485703468323, "learning_rate": 2e-05, "loss": 0.0317823, "step": 20368 }, { "epoch": 40.738, "grad_norm": 1.1075727939605713, "learning_rate": 2e-05, "loss": 0.04405333, "step": 20369 }, { "epoch": 40.74, "grad_norm": 1.120461344718933, "learning_rate": 2e-05, "loss": 0.04158332, "step": 20370 }, { "epoch": 40.742, "grad_norm": 1.173218846321106, "learning_rate": 2e-05, "loss": 0.03538671, "step": 20371 }, { "epoch": 40.744, "grad_norm": 1.3119045495986938, "learning_rate": 2e-05, "loss": 0.04965501, "step": 20372 }, { "epoch": 40.746, "grad_norm": 1.4258747100830078, "learning_rate": 2e-05, "loss": 0.04786938, "step": 20373 }, { "epoch": 40.748, "grad_norm": 1.2724413871765137, "learning_rate": 2e-05, "loss": 0.04649073, "step": 20374 }, { "epoch": 40.75, "grad_norm": 1.6921082735061646, "learning_rate": 2e-05, "loss": 0.03883822, "step": 20375 }, { "epoch": 40.752, "grad_norm": 3.84258770942688, "learning_rate": 2e-05, "loss": 0.05532685, "step": 20376 }, { "epoch": 40.754, "grad_norm": 1.4394497871398926, "learning_rate": 2e-05, "loss": 0.04756043, "step": 20377 }, { "epoch": 40.756, "grad_norm": 1.1952511072158813, "learning_rate": 2e-05, "loss": 0.05040652, "step": 20378 }, { "epoch": 40.758, "grad_norm": 1.139320969581604, "learning_rate": 2e-05, "loss": 0.0432628, "step": 20379 }, { "epoch": 40.76, "grad_norm": 1.0140758752822876, "learning_rate": 2e-05, "loss": 0.0302791, "step": 20380 }, { "epoch": 40.762, "grad_norm": 1.783044695854187, "learning_rate": 2e-05, "loss": 0.05844506, "step": 20381 }, { "epoch": 40.764, "grad_norm": 1.1988928318023682, "learning_rate": 2e-05, "loss": 0.03283492, "step": 20382 }, { "epoch": 40.766, "grad_norm": 0.9184706807136536, "learning_rate": 2e-05, "loss": 0.03624507, "step": 20383 }, { "epoch": 40.768, "grad_norm": 1.7441927194595337, "learning_rate": 2e-05, "loss": 0.03876618, "step": 20384 }, { "epoch": 40.77, "grad_norm": 1.261252522468567, "learning_rate": 2e-05, "loss": 0.05795658, "step": 20385 }, { "epoch": 40.772, "grad_norm": 1.1532663106918335, "learning_rate": 2e-05, "loss": 0.050796, "step": 20386 }, { "epoch": 40.774, "grad_norm": 1.3474946022033691, "learning_rate": 2e-05, "loss": 0.0455665, "step": 20387 }, { "epoch": 40.776, "grad_norm": 1.175278663635254, "learning_rate": 2e-05, "loss": 0.03921409, "step": 20388 }, { "epoch": 40.778, "grad_norm": 1.2267823219299316, "learning_rate": 2e-05, "loss": 0.05682051, "step": 20389 }, { "epoch": 40.78, "grad_norm": 3.2952206134796143, "learning_rate": 2e-05, "loss": 0.04363419, "step": 20390 }, { "epoch": 40.782, "grad_norm": 1.7365856170654297, "learning_rate": 2e-05, "loss": 0.05543724, "step": 20391 }, { "epoch": 40.784, "grad_norm": 1.2324695587158203, "learning_rate": 2e-05, "loss": 0.05429393, "step": 20392 }, { "epoch": 40.786, "grad_norm": 1.2314908504486084, "learning_rate": 2e-05, "loss": 0.0417459, "step": 20393 }, { "epoch": 40.788, "grad_norm": 1.052890658378601, "learning_rate": 2e-05, "loss": 0.04568375, "step": 20394 }, { "epoch": 40.79, "grad_norm": 1.1527416706085205, "learning_rate": 2e-05, "loss": 0.05163721, "step": 20395 }, { "epoch": 40.792, "grad_norm": 1.255745768547058, "learning_rate": 2e-05, "loss": 0.04441888, "step": 20396 }, { "epoch": 40.794, "grad_norm": 2.4157254695892334, "learning_rate": 2e-05, "loss": 0.05280969, "step": 20397 }, { "epoch": 40.796, "grad_norm": 1.297743797302246, "learning_rate": 2e-05, "loss": 0.03922386, "step": 20398 }, { "epoch": 40.798, "grad_norm": 1.3191263675689697, "learning_rate": 2e-05, "loss": 0.0459812, "step": 20399 }, { "epoch": 40.8, "grad_norm": 1.6138203144073486, "learning_rate": 2e-05, "loss": 0.04809796, "step": 20400 }, { "epoch": 40.802, "grad_norm": 1.0761232376098633, "learning_rate": 2e-05, "loss": 0.04102475, "step": 20401 }, { "epoch": 40.804, "grad_norm": 1.3820741176605225, "learning_rate": 2e-05, "loss": 0.05293287, "step": 20402 }, { "epoch": 40.806, "grad_norm": 0.8618505597114563, "learning_rate": 2e-05, "loss": 0.03630167, "step": 20403 }, { "epoch": 40.808, "grad_norm": 1.2943086624145508, "learning_rate": 2e-05, "loss": 0.04525239, "step": 20404 }, { "epoch": 40.81, "grad_norm": 1.6318711042404175, "learning_rate": 2e-05, "loss": 0.04626258, "step": 20405 }, { "epoch": 40.812, "grad_norm": 1.0166434049606323, "learning_rate": 2e-05, "loss": 0.03998704, "step": 20406 }, { "epoch": 40.814, "grad_norm": 2.893320083618164, "learning_rate": 2e-05, "loss": 0.0571047, "step": 20407 }, { "epoch": 40.816, "grad_norm": 1.0632132291793823, "learning_rate": 2e-05, "loss": 0.03629584, "step": 20408 }, { "epoch": 40.818, "grad_norm": 1.168479084968567, "learning_rate": 2e-05, "loss": 0.03368823, "step": 20409 }, { "epoch": 40.82, "grad_norm": 1.0956116914749146, "learning_rate": 2e-05, "loss": 0.03800235, "step": 20410 }, { "epoch": 40.822, "grad_norm": 1.0127363204956055, "learning_rate": 2e-05, "loss": 0.03506695, "step": 20411 }, { "epoch": 40.824, "grad_norm": 1.6961843967437744, "learning_rate": 2e-05, "loss": 0.04757579, "step": 20412 }, { "epoch": 40.826, "grad_norm": 1.0485764741897583, "learning_rate": 2e-05, "loss": 0.03640955, "step": 20413 }, { "epoch": 40.828, "grad_norm": 1.4049381017684937, "learning_rate": 2e-05, "loss": 0.05531392, "step": 20414 }, { "epoch": 40.83, "grad_norm": 1.3320997953414917, "learning_rate": 2e-05, "loss": 0.04148456, "step": 20415 }, { "epoch": 40.832, "grad_norm": 1.2389609813690186, "learning_rate": 2e-05, "loss": 0.04860774, "step": 20416 }, { "epoch": 40.834, "grad_norm": 0.9642683863639832, "learning_rate": 2e-05, "loss": 0.04272363, "step": 20417 }, { "epoch": 40.836, "grad_norm": 1.2460993528366089, "learning_rate": 2e-05, "loss": 0.05987071, "step": 20418 }, { "epoch": 40.838, "grad_norm": 1.2246595621109009, "learning_rate": 2e-05, "loss": 0.05187958, "step": 20419 }, { "epoch": 40.84, "grad_norm": 1.6453964710235596, "learning_rate": 2e-05, "loss": 0.04696222, "step": 20420 }, { "epoch": 40.842, "grad_norm": 1.2105334997177124, "learning_rate": 2e-05, "loss": 0.04552323, "step": 20421 }, { "epoch": 40.844, "grad_norm": 1.1809000968933105, "learning_rate": 2e-05, "loss": 0.04364845, "step": 20422 }, { "epoch": 40.846, "grad_norm": 2.2245469093322754, "learning_rate": 2e-05, "loss": 0.04627731, "step": 20423 }, { "epoch": 40.848, "grad_norm": 1.180808186531067, "learning_rate": 2e-05, "loss": 0.04993726, "step": 20424 }, { "epoch": 40.85, "grad_norm": 1.7033965587615967, "learning_rate": 2e-05, "loss": 0.06255874, "step": 20425 }, { "epoch": 40.852, "grad_norm": 1.2189267873764038, "learning_rate": 2e-05, "loss": 0.04405816, "step": 20426 }, { "epoch": 40.854, "grad_norm": 3.7718513011932373, "learning_rate": 2e-05, "loss": 0.05504301, "step": 20427 }, { "epoch": 40.856, "grad_norm": 1.1234560012817383, "learning_rate": 2e-05, "loss": 0.04025323, "step": 20428 }, { "epoch": 40.858, "grad_norm": 1.3701295852661133, "learning_rate": 2e-05, "loss": 0.04871433, "step": 20429 }, { "epoch": 40.86, "grad_norm": 1.6501892805099487, "learning_rate": 2e-05, "loss": 0.06243546, "step": 20430 }, { "epoch": 40.862, "grad_norm": 1.19022536277771, "learning_rate": 2e-05, "loss": 0.04894875, "step": 20431 }, { "epoch": 40.864, "grad_norm": 1.0078957080841064, "learning_rate": 2e-05, "loss": 0.03035384, "step": 20432 }, { "epoch": 40.866, "grad_norm": 1.052426815032959, "learning_rate": 2e-05, "loss": 0.0399336, "step": 20433 }, { "epoch": 40.868, "grad_norm": 1.7193706035614014, "learning_rate": 2e-05, "loss": 0.05601721, "step": 20434 }, { "epoch": 40.87, "grad_norm": 1.1827843189239502, "learning_rate": 2e-05, "loss": 0.0472433, "step": 20435 }, { "epoch": 40.872, "grad_norm": 2.2266764640808105, "learning_rate": 2e-05, "loss": 0.05473574, "step": 20436 }, { "epoch": 40.874, "grad_norm": 1.7117822170257568, "learning_rate": 2e-05, "loss": 0.0380583, "step": 20437 }, { "epoch": 40.876, "grad_norm": 1.234930396080017, "learning_rate": 2e-05, "loss": 0.04837093, "step": 20438 }, { "epoch": 40.878, "grad_norm": 1.377922773361206, "learning_rate": 2e-05, "loss": 0.04900629, "step": 20439 }, { "epoch": 40.88, "grad_norm": 1.206617832183838, "learning_rate": 2e-05, "loss": 0.05211646, "step": 20440 }, { "epoch": 40.882, "grad_norm": 1.1824190616607666, "learning_rate": 2e-05, "loss": 0.03713381, "step": 20441 }, { "epoch": 40.884, "grad_norm": 0.9823435544967651, "learning_rate": 2e-05, "loss": 0.03660665, "step": 20442 }, { "epoch": 40.886, "grad_norm": 1.2949517965316772, "learning_rate": 2e-05, "loss": 0.05451689, "step": 20443 }, { "epoch": 40.888, "grad_norm": 1.2883609533309937, "learning_rate": 2e-05, "loss": 0.05717325, "step": 20444 }, { "epoch": 40.89, "grad_norm": 0.9961985945701599, "learning_rate": 2e-05, "loss": 0.03337915, "step": 20445 }, { "epoch": 40.892, "grad_norm": 1.195377230644226, "learning_rate": 2e-05, "loss": 0.0425334, "step": 20446 }, { "epoch": 40.894, "grad_norm": 1.027193546295166, "learning_rate": 2e-05, "loss": 0.02752158, "step": 20447 }, { "epoch": 40.896, "grad_norm": 1.1972559690475464, "learning_rate": 2e-05, "loss": 0.04988524, "step": 20448 }, { "epoch": 40.898, "grad_norm": 1.175160527229309, "learning_rate": 2e-05, "loss": 0.04154052, "step": 20449 }, { "epoch": 40.9, "grad_norm": 1.1122380495071411, "learning_rate": 2e-05, "loss": 0.03493803, "step": 20450 }, { "epoch": 40.902, "grad_norm": 1.1690434217453003, "learning_rate": 2e-05, "loss": 0.04058939, "step": 20451 }, { "epoch": 40.904, "grad_norm": 2.595667839050293, "learning_rate": 2e-05, "loss": 0.08465319, "step": 20452 }, { "epoch": 40.906, "grad_norm": 1.2236500978469849, "learning_rate": 2e-05, "loss": 0.04924143, "step": 20453 }, { "epoch": 40.908, "grad_norm": 2.681835174560547, "learning_rate": 2e-05, "loss": 0.06435752, "step": 20454 }, { "epoch": 40.91, "grad_norm": 1.396338701248169, "learning_rate": 2e-05, "loss": 0.04998089, "step": 20455 }, { "epoch": 40.912, "grad_norm": 1.3858076333999634, "learning_rate": 2e-05, "loss": 0.05152861, "step": 20456 }, { "epoch": 40.914, "grad_norm": 1.376036286354065, "learning_rate": 2e-05, "loss": 0.04543985, "step": 20457 }, { "epoch": 40.916, "grad_norm": 1.57783842086792, "learning_rate": 2e-05, "loss": 0.05842921, "step": 20458 }, { "epoch": 40.918, "grad_norm": 1.5693801641464233, "learning_rate": 2e-05, "loss": 0.0650731, "step": 20459 }, { "epoch": 40.92, "grad_norm": 1.4704762697219849, "learning_rate": 2e-05, "loss": 0.05608998, "step": 20460 }, { "epoch": 40.922, "grad_norm": 1.1098164319992065, "learning_rate": 2e-05, "loss": 0.04608128, "step": 20461 }, { "epoch": 40.924, "grad_norm": 1.1128933429718018, "learning_rate": 2e-05, "loss": 0.03824999, "step": 20462 }, { "epoch": 40.926, "grad_norm": 1.244307518005371, "learning_rate": 2e-05, "loss": 0.06200198, "step": 20463 }, { "epoch": 40.928, "grad_norm": 1.2337552309036255, "learning_rate": 2e-05, "loss": 0.03821502, "step": 20464 }, { "epoch": 40.93, "grad_norm": 1.2475448846817017, "learning_rate": 2e-05, "loss": 0.05957456, "step": 20465 }, { "epoch": 40.932, "grad_norm": 2.0048727989196777, "learning_rate": 2e-05, "loss": 0.0534838, "step": 20466 }, { "epoch": 40.934, "grad_norm": 1.6427809000015259, "learning_rate": 2e-05, "loss": 0.03990686, "step": 20467 }, { "epoch": 40.936, "grad_norm": 1.199326753616333, "learning_rate": 2e-05, "loss": 0.04839782, "step": 20468 }, { "epoch": 40.938, "grad_norm": 1.3400622606277466, "learning_rate": 2e-05, "loss": 0.05205151, "step": 20469 }, { "epoch": 40.94, "grad_norm": 1.2996052503585815, "learning_rate": 2e-05, "loss": 0.06201072, "step": 20470 }, { "epoch": 40.942, "grad_norm": 1.287192463874817, "learning_rate": 2e-05, "loss": 0.05062536, "step": 20471 }, { "epoch": 40.944, "grad_norm": 3.311911106109619, "learning_rate": 2e-05, "loss": 0.05273374, "step": 20472 }, { "epoch": 40.946, "grad_norm": 1.086965799331665, "learning_rate": 2e-05, "loss": 0.02739996, "step": 20473 }, { "epoch": 40.948, "grad_norm": 1.349342942237854, "learning_rate": 2e-05, "loss": 0.05531573, "step": 20474 }, { "epoch": 40.95, "grad_norm": 1.4824485778808594, "learning_rate": 2e-05, "loss": 0.05945705, "step": 20475 }, { "epoch": 40.952, "grad_norm": 1.3184701204299927, "learning_rate": 2e-05, "loss": 0.04314397, "step": 20476 }, { "epoch": 40.954, "grad_norm": 1.385398030281067, "learning_rate": 2e-05, "loss": 0.06099976, "step": 20477 }, { "epoch": 40.956, "grad_norm": 1.1655802726745605, "learning_rate": 2e-05, "loss": 0.04490764, "step": 20478 }, { "epoch": 40.958, "grad_norm": 4.062130928039551, "learning_rate": 2e-05, "loss": 0.05665839, "step": 20479 }, { "epoch": 40.96, "grad_norm": 1.4232219457626343, "learning_rate": 2e-05, "loss": 0.0396259, "step": 20480 }, { "epoch": 40.962, "grad_norm": 1.2747482061386108, "learning_rate": 2e-05, "loss": 0.05678995, "step": 20481 }, { "epoch": 40.964, "grad_norm": 1.5956790447235107, "learning_rate": 2e-05, "loss": 0.04028114, "step": 20482 }, { "epoch": 40.966, "grad_norm": 1.2978450059890747, "learning_rate": 2e-05, "loss": 0.04526225, "step": 20483 }, { "epoch": 40.968, "grad_norm": 1.3045350313186646, "learning_rate": 2e-05, "loss": 0.05053577, "step": 20484 }, { "epoch": 40.97, "grad_norm": 1.1985734701156616, "learning_rate": 2e-05, "loss": 0.04328226, "step": 20485 }, { "epoch": 40.972, "grad_norm": 1.285112977027893, "learning_rate": 2e-05, "loss": 0.04697558, "step": 20486 }, { "epoch": 40.974, "grad_norm": 1.4183571338653564, "learning_rate": 2e-05, "loss": 0.05476032, "step": 20487 }, { "epoch": 40.976, "grad_norm": 1.4861711263656616, "learning_rate": 2e-05, "loss": 0.05896805, "step": 20488 }, { "epoch": 40.978, "grad_norm": 1.6024667024612427, "learning_rate": 2e-05, "loss": 0.04853802, "step": 20489 }, { "epoch": 40.98, "grad_norm": 1.2148326635360718, "learning_rate": 2e-05, "loss": 0.05488897, "step": 20490 }, { "epoch": 40.982, "grad_norm": 1.6598411798477173, "learning_rate": 2e-05, "loss": 0.06064874, "step": 20491 }, { "epoch": 40.984, "grad_norm": 1.2032941579818726, "learning_rate": 2e-05, "loss": 0.04319712, "step": 20492 }, { "epoch": 40.986, "grad_norm": 2.1562469005584717, "learning_rate": 2e-05, "loss": 0.04315053, "step": 20493 }, { "epoch": 40.988, "grad_norm": 1.3768160343170166, "learning_rate": 2e-05, "loss": 0.0549387, "step": 20494 }, { "epoch": 40.99, "grad_norm": 1.250068187713623, "learning_rate": 2e-05, "loss": 0.04305088, "step": 20495 }, { "epoch": 40.992, "grad_norm": 1.2710068225860596, "learning_rate": 2e-05, "loss": 0.04697052, "step": 20496 }, { "epoch": 40.994, "grad_norm": 1.4347896575927734, "learning_rate": 2e-05, "loss": 0.050442, "step": 20497 }, { "epoch": 40.996, "grad_norm": 0.8269062638282776, "learning_rate": 2e-05, "loss": 0.02071571, "step": 20498 }, { "epoch": 40.998, "grad_norm": 4.110866069793701, "learning_rate": 2e-05, "loss": 0.03941682, "step": 20499 }, { "epoch": 41.0, "grad_norm": 1.8041988611221313, "learning_rate": 2e-05, "loss": 0.04850418, "step": 20500 }, { "epoch": 41.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.998, "Equal_2": 0.9880239520958084, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9839679358717435, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.986, "Perpendicular_1": 0.998, "Perpendicular_2": 0.996, "Perpendicular_3": 0.8977955911823647, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.998, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 321.056, "eval_samples_per_second": 32.705, "eval_steps_per_second": 0.654, "step": 20500 }, { "epoch": 41.002, "grad_norm": 1.1957684755325317, "learning_rate": 2e-05, "loss": 0.03748061, "step": 20501 }, { "epoch": 41.004, "grad_norm": 1.1992721557617188, "learning_rate": 2e-05, "loss": 0.05174942, "step": 20502 }, { "epoch": 41.006, "grad_norm": 1.3277416229248047, "learning_rate": 2e-05, "loss": 0.04400465, "step": 20503 }, { "epoch": 41.008, "grad_norm": 1.6822006702423096, "learning_rate": 2e-05, "loss": 0.05937324, "step": 20504 }, { "epoch": 41.01, "grad_norm": 2.2135109901428223, "learning_rate": 2e-05, "loss": 0.06282458, "step": 20505 }, { "epoch": 41.012, "grad_norm": 1.2385971546173096, "learning_rate": 2e-05, "loss": 0.05500904, "step": 20506 }, { "epoch": 41.014, "grad_norm": 1.4644334316253662, "learning_rate": 2e-05, "loss": 0.03757091, "step": 20507 }, { "epoch": 41.016, "grad_norm": 1.2504773139953613, "learning_rate": 2e-05, "loss": 0.04865737, "step": 20508 }, { "epoch": 41.018, "grad_norm": 1.0606939792633057, "learning_rate": 2e-05, "loss": 0.046809, "step": 20509 }, { "epoch": 41.02, "grad_norm": 0.9825658798217773, "learning_rate": 2e-05, "loss": 0.03104939, "step": 20510 }, { "epoch": 41.022, "grad_norm": 0.9851643443107605, "learning_rate": 2e-05, "loss": 0.03083806, "step": 20511 }, { "epoch": 41.024, "grad_norm": 1.2548247575759888, "learning_rate": 2e-05, "loss": 0.05934072, "step": 20512 }, { "epoch": 41.026, "grad_norm": 2.8248443603515625, "learning_rate": 2e-05, "loss": 0.05007793, "step": 20513 }, { "epoch": 41.028, "grad_norm": 1.832990288734436, "learning_rate": 2e-05, "loss": 0.0635509, "step": 20514 }, { "epoch": 41.03, "grad_norm": 1.1892746686935425, "learning_rate": 2e-05, "loss": 0.04696298, "step": 20515 }, { "epoch": 41.032, "grad_norm": 1.8308331966400146, "learning_rate": 2e-05, "loss": 0.04524075, "step": 20516 }, { "epoch": 41.034, "grad_norm": 1.3011291027069092, "learning_rate": 2e-05, "loss": 0.04992253, "step": 20517 }, { "epoch": 41.036, "grad_norm": 1.1970986127853394, "learning_rate": 2e-05, "loss": 0.05594495, "step": 20518 }, { "epoch": 41.038, "grad_norm": 1.1719766855239868, "learning_rate": 2e-05, "loss": 0.04220466, "step": 20519 }, { "epoch": 41.04, "grad_norm": 1.5280590057373047, "learning_rate": 2e-05, "loss": 0.05499358, "step": 20520 }, { "epoch": 41.042, "grad_norm": 1.2882308959960938, "learning_rate": 2e-05, "loss": 0.04562774, "step": 20521 }, { "epoch": 41.044, "grad_norm": 2.0022976398468018, "learning_rate": 2e-05, "loss": 0.04248636, "step": 20522 }, { "epoch": 41.046, "grad_norm": 1.5285600423812866, "learning_rate": 2e-05, "loss": 0.06069652, "step": 20523 }, { "epoch": 41.048, "grad_norm": 1.181755781173706, "learning_rate": 2e-05, "loss": 0.04825043, "step": 20524 }, { "epoch": 41.05, "grad_norm": 1.338629126548767, "learning_rate": 2e-05, "loss": 0.06323239, "step": 20525 }, { "epoch": 41.052, "grad_norm": 1.1997042894363403, "learning_rate": 2e-05, "loss": 0.05054116, "step": 20526 }, { "epoch": 41.054, "grad_norm": 1.3715033531188965, "learning_rate": 2e-05, "loss": 0.04951176, "step": 20527 }, { "epoch": 41.056, "grad_norm": 1.3828208446502686, "learning_rate": 2e-05, "loss": 0.04772475, "step": 20528 }, { "epoch": 41.058, "grad_norm": 1.0656706094741821, "learning_rate": 2e-05, "loss": 0.04354571, "step": 20529 }, { "epoch": 41.06, "grad_norm": 0.9382243156433105, "learning_rate": 2e-05, "loss": 0.03308392, "step": 20530 }, { "epoch": 41.062, "grad_norm": 1.2257845401763916, "learning_rate": 2e-05, "loss": 0.05354673, "step": 20531 }, { "epoch": 41.064, "grad_norm": 1.0932214260101318, "learning_rate": 2e-05, "loss": 0.05004593, "step": 20532 }, { "epoch": 41.066, "grad_norm": 1.3086014986038208, "learning_rate": 2e-05, "loss": 0.05718559, "step": 20533 }, { "epoch": 41.068, "grad_norm": 1.2380306720733643, "learning_rate": 2e-05, "loss": 0.05476848, "step": 20534 }, { "epoch": 41.07, "grad_norm": 1.06734037399292, "learning_rate": 2e-05, "loss": 0.03656657, "step": 20535 }, { "epoch": 41.072, "grad_norm": 1.3546667098999023, "learning_rate": 2e-05, "loss": 0.05762174, "step": 20536 }, { "epoch": 41.074, "grad_norm": 1.0288851261138916, "learning_rate": 2e-05, "loss": 0.0424634, "step": 20537 }, { "epoch": 41.076, "grad_norm": 1.131334900856018, "learning_rate": 2e-05, "loss": 0.05600655, "step": 20538 }, { "epoch": 41.078, "grad_norm": 1.6864545345306396, "learning_rate": 2e-05, "loss": 0.04475303, "step": 20539 }, { "epoch": 41.08, "grad_norm": 1.9584969282150269, "learning_rate": 2e-05, "loss": 0.04874922, "step": 20540 }, { "epoch": 41.082, "grad_norm": 1.2000778913497925, "learning_rate": 2e-05, "loss": 0.04340697, "step": 20541 }, { "epoch": 41.084, "grad_norm": 1.0453670024871826, "learning_rate": 2e-05, "loss": 0.05378638, "step": 20542 }, { "epoch": 41.086, "grad_norm": 1.0404855012893677, "learning_rate": 2e-05, "loss": 0.0372684, "step": 20543 }, { "epoch": 41.088, "grad_norm": 1.7498598098754883, "learning_rate": 2e-05, "loss": 0.04698805, "step": 20544 }, { "epoch": 41.09, "grad_norm": 1.0921574831008911, "learning_rate": 2e-05, "loss": 0.04534142, "step": 20545 }, { "epoch": 41.092, "grad_norm": 1.1065009832382202, "learning_rate": 2e-05, "loss": 0.05014817, "step": 20546 }, { "epoch": 41.094, "grad_norm": 1.1455692052841187, "learning_rate": 2e-05, "loss": 0.06051344, "step": 20547 }, { "epoch": 41.096, "grad_norm": 1.4879392385482788, "learning_rate": 2e-05, "loss": 0.04928049, "step": 20548 }, { "epoch": 41.098, "grad_norm": 1.4718266725540161, "learning_rate": 2e-05, "loss": 0.05406002, "step": 20549 }, { "epoch": 41.1, "grad_norm": 1.2394520044326782, "learning_rate": 2e-05, "loss": 0.04753721, "step": 20550 }, { "epoch": 41.102, "grad_norm": 1.0697596073150635, "learning_rate": 2e-05, "loss": 0.03903735, "step": 20551 }, { "epoch": 41.104, "grad_norm": 1.3443745374679565, "learning_rate": 2e-05, "loss": 0.0416825, "step": 20552 }, { "epoch": 41.106, "grad_norm": 1.0805517435073853, "learning_rate": 2e-05, "loss": 0.02990261, "step": 20553 }, { "epoch": 41.108, "grad_norm": 1.2133495807647705, "learning_rate": 2e-05, "loss": 0.04597092, "step": 20554 }, { "epoch": 41.11, "grad_norm": 2.3126041889190674, "learning_rate": 2e-05, "loss": 0.04624292, "step": 20555 }, { "epoch": 41.112, "grad_norm": 1.1802953481674194, "learning_rate": 2e-05, "loss": 0.05155454, "step": 20556 }, { "epoch": 41.114, "grad_norm": 1.4480260610580444, "learning_rate": 2e-05, "loss": 0.0588602, "step": 20557 }, { "epoch": 41.116, "grad_norm": 1.3759326934814453, "learning_rate": 2e-05, "loss": 0.04992274, "step": 20558 }, { "epoch": 41.118, "grad_norm": 1.1570967435836792, "learning_rate": 2e-05, "loss": 0.04519217, "step": 20559 }, { "epoch": 41.12, "grad_norm": 1.2483819723129272, "learning_rate": 2e-05, "loss": 0.04176389, "step": 20560 }, { "epoch": 41.122, "grad_norm": 1.4002913236618042, "learning_rate": 2e-05, "loss": 0.04440395, "step": 20561 }, { "epoch": 41.124, "grad_norm": 1.4284930229187012, "learning_rate": 2e-05, "loss": 0.05776929, "step": 20562 }, { "epoch": 41.126, "grad_norm": 1.3323142528533936, "learning_rate": 2e-05, "loss": 0.0383847, "step": 20563 }, { "epoch": 41.128, "grad_norm": 1.1827408075332642, "learning_rate": 2e-05, "loss": 0.0447496, "step": 20564 }, { "epoch": 41.13, "grad_norm": 1.2362679243087769, "learning_rate": 2e-05, "loss": 0.04746515, "step": 20565 }, { "epoch": 41.132, "grad_norm": 1.9688647985458374, "learning_rate": 2e-05, "loss": 0.04489559, "step": 20566 }, { "epoch": 41.134, "grad_norm": 1.304279088973999, "learning_rate": 2e-05, "loss": 0.04824959, "step": 20567 }, { "epoch": 41.136, "grad_norm": 1.7030314207077026, "learning_rate": 2e-05, "loss": 0.04330122, "step": 20568 }, { "epoch": 41.138, "grad_norm": 1.1935453414916992, "learning_rate": 2e-05, "loss": 0.05471293, "step": 20569 }, { "epoch": 41.14, "grad_norm": 1.0641679763793945, "learning_rate": 2e-05, "loss": 0.04133367, "step": 20570 }, { "epoch": 41.142, "grad_norm": 0.9642373323440552, "learning_rate": 2e-05, "loss": 0.03811979, "step": 20571 }, { "epoch": 41.144, "grad_norm": 1.7914299964904785, "learning_rate": 2e-05, "loss": 0.04343174, "step": 20572 }, { "epoch": 41.146, "grad_norm": 1.4745582342147827, "learning_rate": 2e-05, "loss": 0.03861541, "step": 20573 }, { "epoch": 41.148, "grad_norm": 0.9670395255088806, "learning_rate": 2e-05, "loss": 0.03732292, "step": 20574 }, { "epoch": 41.15, "grad_norm": 1.2237446308135986, "learning_rate": 2e-05, "loss": 0.04279939, "step": 20575 }, { "epoch": 41.152, "grad_norm": 1.173431634902954, "learning_rate": 2e-05, "loss": 0.05236944, "step": 20576 }, { "epoch": 41.154, "grad_norm": 2.5610194206237793, "learning_rate": 2e-05, "loss": 0.05082991, "step": 20577 }, { "epoch": 41.156, "grad_norm": 1.1480480432510376, "learning_rate": 2e-05, "loss": 0.03853033, "step": 20578 }, { "epoch": 41.158, "grad_norm": 1.0263923406600952, "learning_rate": 2e-05, "loss": 0.0398251, "step": 20579 }, { "epoch": 41.16, "grad_norm": 1.241070032119751, "learning_rate": 2e-05, "loss": 0.04581333, "step": 20580 }, { "epoch": 41.162, "grad_norm": 0.9623987674713135, "learning_rate": 2e-05, "loss": 0.02442548, "step": 20581 }, { "epoch": 41.164, "grad_norm": 1.2360503673553467, "learning_rate": 2e-05, "loss": 0.04588886, "step": 20582 }, { "epoch": 41.166, "grad_norm": 1.4302246570587158, "learning_rate": 2e-05, "loss": 0.0555368, "step": 20583 }, { "epoch": 41.168, "grad_norm": 1.1271694898605347, "learning_rate": 2e-05, "loss": 0.04541831, "step": 20584 }, { "epoch": 41.17, "grad_norm": 1.3913869857788086, "learning_rate": 2e-05, "loss": 0.05388426, "step": 20585 }, { "epoch": 41.172, "grad_norm": 1.4658206701278687, "learning_rate": 2e-05, "loss": 0.05629757, "step": 20586 }, { "epoch": 41.174, "grad_norm": 1.5316065549850464, "learning_rate": 2e-05, "loss": 0.04545237, "step": 20587 }, { "epoch": 41.176, "grad_norm": 1.3376739025115967, "learning_rate": 2e-05, "loss": 0.04239886, "step": 20588 }, { "epoch": 41.178, "grad_norm": 1.168369174003601, "learning_rate": 2e-05, "loss": 0.04730329, "step": 20589 }, { "epoch": 41.18, "grad_norm": 1.132712483406067, "learning_rate": 2e-05, "loss": 0.0453882, "step": 20590 }, { "epoch": 41.182, "grad_norm": 1.2484111785888672, "learning_rate": 2e-05, "loss": 0.05077428, "step": 20591 }, { "epoch": 41.184, "grad_norm": 1.2351433038711548, "learning_rate": 2e-05, "loss": 0.04053054, "step": 20592 }, { "epoch": 41.186, "grad_norm": 1.127816081047058, "learning_rate": 2e-05, "loss": 0.03557635, "step": 20593 }, { "epoch": 41.188, "grad_norm": 1.6981974840164185, "learning_rate": 2e-05, "loss": 0.05980735, "step": 20594 }, { "epoch": 41.19, "grad_norm": 1.2019031047821045, "learning_rate": 2e-05, "loss": 0.04414729, "step": 20595 }, { "epoch": 41.192, "grad_norm": 2.026304244995117, "learning_rate": 2e-05, "loss": 0.06682765, "step": 20596 }, { "epoch": 41.194, "grad_norm": 1.1906565427780151, "learning_rate": 2e-05, "loss": 0.04870375, "step": 20597 }, { "epoch": 41.196, "grad_norm": 1.194569706916809, "learning_rate": 2e-05, "loss": 0.05204153, "step": 20598 }, { "epoch": 41.198, "grad_norm": 1.4061965942382812, "learning_rate": 2e-05, "loss": 0.05822208, "step": 20599 }, { "epoch": 41.2, "grad_norm": 1.2633558511734009, "learning_rate": 2e-05, "loss": 0.0512266, "step": 20600 }, { "epoch": 41.202, "grad_norm": 1.4011318683624268, "learning_rate": 2e-05, "loss": 0.05827274, "step": 20601 }, { "epoch": 41.204, "grad_norm": 1.0192710161209106, "learning_rate": 2e-05, "loss": 0.03732585, "step": 20602 }, { "epoch": 41.206, "grad_norm": 1.1007825136184692, "learning_rate": 2e-05, "loss": 0.04062988, "step": 20603 }, { "epoch": 41.208, "grad_norm": 1.4489682912826538, "learning_rate": 2e-05, "loss": 0.04679845, "step": 20604 }, { "epoch": 41.21, "grad_norm": 1.3930132389068604, "learning_rate": 2e-05, "loss": 0.04038769, "step": 20605 }, { "epoch": 41.212, "grad_norm": 1.1776801347732544, "learning_rate": 2e-05, "loss": 0.05422183, "step": 20606 }, { "epoch": 41.214, "grad_norm": 1.3811092376708984, "learning_rate": 2e-05, "loss": 0.05448051, "step": 20607 }, { "epoch": 41.216, "grad_norm": 1.6993544101715088, "learning_rate": 2e-05, "loss": 0.05991393, "step": 20608 }, { "epoch": 41.218, "grad_norm": 1.1804338693618774, "learning_rate": 2e-05, "loss": 0.0513094, "step": 20609 }, { "epoch": 41.22, "grad_norm": 1.2084537744522095, "learning_rate": 2e-05, "loss": 0.04306343, "step": 20610 }, { "epoch": 41.222, "grad_norm": 2.1294620037078857, "learning_rate": 2e-05, "loss": 0.05107892, "step": 20611 }, { "epoch": 41.224, "grad_norm": 1.1537929773330688, "learning_rate": 2e-05, "loss": 0.04525739, "step": 20612 }, { "epoch": 41.226, "grad_norm": 1.0452954769134521, "learning_rate": 2e-05, "loss": 0.03667298, "step": 20613 }, { "epoch": 41.228, "grad_norm": 1.2010400295257568, "learning_rate": 2e-05, "loss": 0.03450677, "step": 20614 }, { "epoch": 41.23, "grad_norm": 1.1608492136001587, "learning_rate": 2e-05, "loss": 0.0470752, "step": 20615 }, { "epoch": 41.232, "grad_norm": 1.367234706878662, "learning_rate": 2e-05, "loss": 0.04004624, "step": 20616 }, { "epoch": 41.234, "grad_norm": 3.438629388809204, "learning_rate": 2e-05, "loss": 0.05718534, "step": 20617 }, { "epoch": 41.236, "grad_norm": 1.245473861694336, "learning_rate": 2e-05, "loss": 0.04939371, "step": 20618 }, { "epoch": 41.238, "grad_norm": 1.5737378597259521, "learning_rate": 2e-05, "loss": 0.06649335, "step": 20619 }, { "epoch": 41.24, "grad_norm": 2.474226951599121, "learning_rate": 2e-05, "loss": 0.05714843, "step": 20620 }, { "epoch": 41.242, "grad_norm": 1.1201341152191162, "learning_rate": 2e-05, "loss": 0.03632017, "step": 20621 }, { "epoch": 41.244, "grad_norm": 1.3226417303085327, "learning_rate": 2e-05, "loss": 0.05535428, "step": 20622 }, { "epoch": 41.246, "grad_norm": 1.5820775032043457, "learning_rate": 2e-05, "loss": 0.06497006, "step": 20623 }, { "epoch": 41.248, "grad_norm": 1.6483792066574097, "learning_rate": 2e-05, "loss": 0.04980063, "step": 20624 }, { "epoch": 41.25, "grad_norm": 0.8462343215942383, "learning_rate": 2e-05, "loss": 0.0305022, "step": 20625 }, { "epoch": 41.252, "grad_norm": 1.3294532299041748, "learning_rate": 2e-05, "loss": 0.05518986, "step": 20626 }, { "epoch": 41.254, "grad_norm": 1.0134047269821167, "learning_rate": 2e-05, "loss": 0.04463737, "step": 20627 }, { "epoch": 41.256, "grad_norm": 1.3394545316696167, "learning_rate": 2e-05, "loss": 0.06053488, "step": 20628 }, { "epoch": 41.258, "grad_norm": 1.1298450231552124, "learning_rate": 2e-05, "loss": 0.03974798, "step": 20629 }, { "epoch": 41.26, "grad_norm": 1.2260725498199463, "learning_rate": 2e-05, "loss": 0.04857574, "step": 20630 }, { "epoch": 41.262, "grad_norm": 1.2204060554504395, "learning_rate": 2e-05, "loss": 0.04478389, "step": 20631 }, { "epoch": 41.264, "grad_norm": 2.948246479034424, "learning_rate": 2e-05, "loss": 0.05024678, "step": 20632 }, { "epoch": 41.266, "grad_norm": 1.3143848180770874, "learning_rate": 2e-05, "loss": 0.04200232, "step": 20633 }, { "epoch": 41.268, "grad_norm": 1.1079685688018799, "learning_rate": 2e-05, "loss": 0.03887783, "step": 20634 }, { "epoch": 41.27, "grad_norm": 1.3387101888656616, "learning_rate": 2e-05, "loss": 0.04824906, "step": 20635 }, { "epoch": 41.272, "grad_norm": 1.2918139696121216, "learning_rate": 2e-05, "loss": 0.05828239, "step": 20636 }, { "epoch": 41.274, "grad_norm": 1.2324050664901733, "learning_rate": 2e-05, "loss": 0.05050058, "step": 20637 }, { "epoch": 41.276, "grad_norm": 1.08914315700531, "learning_rate": 2e-05, "loss": 0.03811881, "step": 20638 }, { "epoch": 41.278, "grad_norm": 1.5355799198150635, "learning_rate": 2e-05, "loss": 0.06125507, "step": 20639 }, { "epoch": 41.28, "grad_norm": 1.1275166273117065, "learning_rate": 2e-05, "loss": 0.04609001, "step": 20640 }, { "epoch": 41.282, "grad_norm": 1.394644021987915, "learning_rate": 2e-05, "loss": 0.05548574, "step": 20641 }, { "epoch": 41.284, "grad_norm": 1.1803781986236572, "learning_rate": 2e-05, "loss": 0.04673609, "step": 20642 }, { "epoch": 41.286, "grad_norm": 1.088181972503662, "learning_rate": 2e-05, "loss": 0.04835239, "step": 20643 }, { "epoch": 41.288, "grad_norm": 1.2936146259307861, "learning_rate": 2e-05, "loss": 0.0537074, "step": 20644 }, { "epoch": 41.29, "grad_norm": 1.0376815795898438, "learning_rate": 2e-05, "loss": 0.04589043, "step": 20645 }, { "epoch": 41.292, "grad_norm": 1.2658627033233643, "learning_rate": 2e-05, "loss": 0.04795525, "step": 20646 }, { "epoch": 41.294, "grad_norm": 1.5672342777252197, "learning_rate": 2e-05, "loss": 0.06098137, "step": 20647 }, { "epoch": 41.296, "grad_norm": 1.1743220090866089, "learning_rate": 2e-05, "loss": 0.03908478, "step": 20648 }, { "epoch": 41.298, "grad_norm": 1.368518590927124, "learning_rate": 2e-05, "loss": 0.03311605, "step": 20649 }, { "epoch": 41.3, "grad_norm": 0.9655233025550842, "learning_rate": 2e-05, "loss": 0.03225183, "step": 20650 }, { "epoch": 41.302, "grad_norm": 2.152411937713623, "learning_rate": 2e-05, "loss": 0.04487934, "step": 20651 }, { "epoch": 41.304, "grad_norm": 1.6587146520614624, "learning_rate": 2e-05, "loss": 0.05637205, "step": 20652 }, { "epoch": 41.306, "grad_norm": 1.024804949760437, "learning_rate": 2e-05, "loss": 0.03787657, "step": 20653 }, { "epoch": 41.308, "grad_norm": 0.8710086941719055, "learning_rate": 2e-05, "loss": 0.02578922, "step": 20654 }, { "epoch": 41.31, "grad_norm": 1.093598484992981, "learning_rate": 2e-05, "loss": 0.03931287, "step": 20655 }, { "epoch": 41.312, "grad_norm": 1.3158975839614868, "learning_rate": 2e-05, "loss": 0.0523903, "step": 20656 }, { "epoch": 41.314, "grad_norm": 1.2842652797698975, "learning_rate": 2e-05, "loss": 0.0551443, "step": 20657 }, { "epoch": 41.316, "grad_norm": 0.9673425555229187, "learning_rate": 2e-05, "loss": 0.03543061, "step": 20658 }, { "epoch": 41.318, "grad_norm": 1.4158899784088135, "learning_rate": 2e-05, "loss": 0.05106701, "step": 20659 }, { "epoch": 41.32, "grad_norm": 1.7559031248092651, "learning_rate": 2e-05, "loss": 0.03872785, "step": 20660 }, { "epoch": 41.322, "grad_norm": 1.1263608932495117, "learning_rate": 2e-05, "loss": 0.03599942, "step": 20661 }, { "epoch": 41.324, "grad_norm": 0.9912429451942444, "learning_rate": 2e-05, "loss": 0.02866612, "step": 20662 }, { "epoch": 41.326, "grad_norm": 1.0485600233078003, "learning_rate": 2e-05, "loss": 0.03809555, "step": 20663 }, { "epoch": 41.328, "grad_norm": 2.218872308731079, "learning_rate": 2e-05, "loss": 0.05056027, "step": 20664 }, { "epoch": 41.33, "grad_norm": 1.2448780536651611, "learning_rate": 2e-05, "loss": 0.06671609, "step": 20665 }, { "epoch": 41.332, "grad_norm": 1.1735291481018066, "learning_rate": 2e-05, "loss": 0.04490344, "step": 20666 }, { "epoch": 41.334, "grad_norm": 1.6742703914642334, "learning_rate": 2e-05, "loss": 0.06206613, "step": 20667 }, { "epoch": 41.336, "grad_norm": 1.2419872283935547, "learning_rate": 2e-05, "loss": 0.03979301, "step": 20668 }, { "epoch": 41.338, "grad_norm": 2.8356029987335205, "learning_rate": 2e-05, "loss": 0.05524603, "step": 20669 }, { "epoch": 41.34, "grad_norm": 2.10284161567688, "learning_rate": 2e-05, "loss": 0.05853007, "step": 20670 }, { "epoch": 41.342, "grad_norm": 1.0346118211746216, "learning_rate": 2e-05, "loss": 0.0342777, "step": 20671 }, { "epoch": 41.344, "grad_norm": 1.1584864854812622, "learning_rate": 2e-05, "loss": 0.04522749, "step": 20672 }, { "epoch": 41.346, "grad_norm": 1.0336374044418335, "learning_rate": 2e-05, "loss": 0.0341766, "step": 20673 }, { "epoch": 41.348, "grad_norm": 1.2833257913589478, "learning_rate": 2e-05, "loss": 0.04493989, "step": 20674 }, { "epoch": 41.35, "grad_norm": 1.196931004524231, "learning_rate": 2e-05, "loss": 0.03288671, "step": 20675 }, { "epoch": 41.352, "grad_norm": 1.6481664180755615, "learning_rate": 2e-05, "loss": 0.04984744, "step": 20676 }, { "epoch": 41.354, "grad_norm": 1.3201029300689697, "learning_rate": 2e-05, "loss": 0.05488572, "step": 20677 }, { "epoch": 41.356, "grad_norm": 1.2787818908691406, "learning_rate": 2e-05, "loss": 0.0575133, "step": 20678 }, { "epoch": 41.358, "grad_norm": 1.1592620611190796, "learning_rate": 2e-05, "loss": 0.04519965, "step": 20679 }, { "epoch": 41.36, "grad_norm": 1.7062658071517944, "learning_rate": 2e-05, "loss": 0.04929303, "step": 20680 }, { "epoch": 41.362, "grad_norm": 1.4524036645889282, "learning_rate": 2e-05, "loss": 0.05601212, "step": 20681 }, { "epoch": 41.364, "grad_norm": 1.2186477184295654, "learning_rate": 2e-05, "loss": 0.0421271, "step": 20682 }, { "epoch": 41.366, "grad_norm": 1.7632216215133667, "learning_rate": 2e-05, "loss": 0.05948129, "step": 20683 }, { "epoch": 41.368, "grad_norm": 1.5571186542510986, "learning_rate": 2e-05, "loss": 0.04970782, "step": 20684 }, { "epoch": 41.37, "grad_norm": 1.611167311668396, "learning_rate": 2e-05, "loss": 0.04187579, "step": 20685 }, { "epoch": 41.372, "grad_norm": 1.0352783203125, "learning_rate": 2e-05, "loss": 0.02740442, "step": 20686 }, { "epoch": 41.374, "grad_norm": 1.3376938104629517, "learning_rate": 2e-05, "loss": 0.06241329, "step": 20687 }, { "epoch": 41.376, "grad_norm": 1.135635495185852, "learning_rate": 2e-05, "loss": 0.04911846, "step": 20688 }, { "epoch": 41.378, "grad_norm": 1.2492220401763916, "learning_rate": 2e-05, "loss": 0.05272296, "step": 20689 }, { "epoch": 41.38, "grad_norm": 1.3152111768722534, "learning_rate": 2e-05, "loss": 0.05239505, "step": 20690 }, { "epoch": 41.382, "grad_norm": 1.3397350311279297, "learning_rate": 2e-05, "loss": 0.05067999, "step": 20691 }, { "epoch": 41.384, "grad_norm": 1.1833877563476562, "learning_rate": 2e-05, "loss": 0.04897395, "step": 20692 }, { "epoch": 41.386, "grad_norm": 1.2837327718734741, "learning_rate": 2e-05, "loss": 0.05838244, "step": 20693 }, { "epoch": 41.388, "grad_norm": 1.150177240371704, "learning_rate": 2e-05, "loss": 0.04375577, "step": 20694 }, { "epoch": 41.39, "grad_norm": 1.4795578718185425, "learning_rate": 2e-05, "loss": 0.05091795, "step": 20695 }, { "epoch": 41.392, "grad_norm": 1.2051365375518799, "learning_rate": 2e-05, "loss": 0.05093031, "step": 20696 }, { "epoch": 41.394, "grad_norm": 0.9962400197982788, "learning_rate": 2e-05, "loss": 0.04091921, "step": 20697 }, { "epoch": 41.396, "grad_norm": 2.7451603412628174, "learning_rate": 2e-05, "loss": 0.05224776, "step": 20698 }, { "epoch": 41.398, "grad_norm": 1.16928231716156, "learning_rate": 2e-05, "loss": 0.05099126, "step": 20699 }, { "epoch": 41.4, "grad_norm": 1.571977972984314, "learning_rate": 2e-05, "loss": 0.04833336, "step": 20700 }, { "epoch": 41.402, "grad_norm": 1.3276727199554443, "learning_rate": 2e-05, "loss": 0.04746715, "step": 20701 }, { "epoch": 41.404, "grad_norm": 1.438899040222168, "learning_rate": 2e-05, "loss": 0.05122091, "step": 20702 }, { "epoch": 41.406, "grad_norm": 1.1340113878250122, "learning_rate": 2e-05, "loss": 0.04921956, "step": 20703 }, { "epoch": 41.408, "grad_norm": 1.3475137948989868, "learning_rate": 2e-05, "loss": 0.0532689, "step": 20704 }, { "epoch": 41.41, "grad_norm": 3.48881196975708, "learning_rate": 2e-05, "loss": 0.05500069, "step": 20705 }, { "epoch": 41.412, "grad_norm": 1.010732889175415, "learning_rate": 2e-05, "loss": 0.03719215, "step": 20706 }, { "epoch": 41.414, "grad_norm": 1.2824602127075195, "learning_rate": 2e-05, "loss": 0.05517687, "step": 20707 }, { "epoch": 41.416, "grad_norm": 1.1974467039108276, "learning_rate": 2e-05, "loss": 0.03475419, "step": 20708 }, { "epoch": 41.418, "grad_norm": 0.9742454290390015, "learning_rate": 2e-05, "loss": 0.03150909, "step": 20709 }, { "epoch": 41.42, "grad_norm": 4.395928382873535, "learning_rate": 2e-05, "loss": 0.06255428, "step": 20710 }, { "epoch": 41.422, "grad_norm": 2.005077362060547, "learning_rate": 2e-05, "loss": 0.05040678, "step": 20711 }, { "epoch": 41.424, "grad_norm": 1.1284871101379395, "learning_rate": 2e-05, "loss": 0.04268694, "step": 20712 }, { "epoch": 41.426, "grad_norm": 1.1108037233352661, "learning_rate": 2e-05, "loss": 0.05018146, "step": 20713 }, { "epoch": 41.428, "grad_norm": 1.2900370359420776, "learning_rate": 2e-05, "loss": 0.05577334, "step": 20714 }, { "epoch": 41.43, "grad_norm": 1.625792145729065, "learning_rate": 2e-05, "loss": 0.0519428, "step": 20715 }, { "epoch": 41.432, "grad_norm": 1.2618361711502075, "learning_rate": 2e-05, "loss": 0.06108624, "step": 20716 }, { "epoch": 41.434, "grad_norm": 2.6890978813171387, "learning_rate": 2e-05, "loss": 0.05818431, "step": 20717 }, { "epoch": 41.436, "grad_norm": 1.5866303443908691, "learning_rate": 2e-05, "loss": 0.05809556, "step": 20718 }, { "epoch": 41.438, "grad_norm": 3.1994481086730957, "learning_rate": 2e-05, "loss": 0.03941204, "step": 20719 }, { "epoch": 41.44, "grad_norm": 0.9686678051948547, "learning_rate": 2e-05, "loss": 0.03777551, "step": 20720 }, { "epoch": 41.442, "grad_norm": 1.2619547843933105, "learning_rate": 2e-05, "loss": 0.05709205, "step": 20721 }, { "epoch": 41.444, "grad_norm": 1.1282532215118408, "learning_rate": 2e-05, "loss": 0.03835287, "step": 20722 }, { "epoch": 41.446, "grad_norm": 1.2010022401809692, "learning_rate": 2e-05, "loss": 0.04767352, "step": 20723 }, { "epoch": 41.448, "grad_norm": 1.447494387626648, "learning_rate": 2e-05, "loss": 0.05089083, "step": 20724 }, { "epoch": 41.45, "grad_norm": 1.101004719734192, "learning_rate": 2e-05, "loss": 0.0408543, "step": 20725 }, { "epoch": 41.452, "grad_norm": 1.2500224113464355, "learning_rate": 2e-05, "loss": 0.03975309, "step": 20726 }, { "epoch": 41.454, "grad_norm": 1.746552586555481, "learning_rate": 2e-05, "loss": 0.06327257, "step": 20727 }, { "epoch": 41.456, "grad_norm": 1.121049165725708, "learning_rate": 2e-05, "loss": 0.03917, "step": 20728 }, { "epoch": 41.458, "grad_norm": 1.5828957557678223, "learning_rate": 2e-05, "loss": 0.04092988, "step": 20729 }, { "epoch": 41.46, "grad_norm": 1.3952829837799072, "learning_rate": 2e-05, "loss": 0.05349658, "step": 20730 }, { "epoch": 41.462, "grad_norm": 1.1374624967575073, "learning_rate": 2e-05, "loss": 0.04330311, "step": 20731 }, { "epoch": 41.464, "grad_norm": 2.1786279678344727, "learning_rate": 2e-05, "loss": 0.04306266, "step": 20732 }, { "epoch": 41.466, "grad_norm": 1.1938217878341675, "learning_rate": 2e-05, "loss": 0.04537656, "step": 20733 }, { "epoch": 41.468, "grad_norm": 1.673171877861023, "learning_rate": 2e-05, "loss": 0.04567283, "step": 20734 }, { "epoch": 41.47, "grad_norm": 1.8601455688476562, "learning_rate": 2e-05, "loss": 0.04573951, "step": 20735 }, { "epoch": 41.472, "grad_norm": 1.2552533149719238, "learning_rate": 2e-05, "loss": 0.05038974, "step": 20736 }, { "epoch": 41.474, "grad_norm": 1.2557790279388428, "learning_rate": 2e-05, "loss": 0.04997993, "step": 20737 }, { "epoch": 41.476, "grad_norm": 1.156934142112732, "learning_rate": 2e-05, "loss": 0.04020359, "step": 20738 }, { "epoch": 41.478, "grad_norm": 1.0187697410583496, "learning_rate": 2e-05, "loss": 0.0350149, "step": 20739 }, { "epoch": 41.48, "grad_norm": 1.5980960130691528, "learning_rate": 2e-05, "loss": 0.05507297, "step": 20740 }, { "epoch": 41.482, "grad_norm": 1.2604917287826538, "learning_rate": 2e-05, "loss": 0.04280219, "step": 20741 }, { "epoch": 41.484, "grad_norm": 1.2176854610443115, "learning_rate": 2e-05, "loss": 0.04632607, "step": 20742 }, { "epoch": 41.486, "grad_norm": 1.7439926862716675, "learning_rate": 2e-05, "loss": 0.06102863, "step": 20743 }, { "epoch": 41.488, "grad_norm": 1.1306809186935425, "learning_rate": 2e-05, "loss": 0.04056504, "step": 20744 }, { "epoch": 41.49, "grad_norm": 1.6267520189285278, "learning_rate": 2e-05, "loss": 0.07167388, "step": 20745 }, { "epoch": 41.492, "grad_norm": 1.483768343925476, "learning_rate": 2e-05, "loss": 0.03321824, "step": 20746 }, { "epoch": 41.494, "grad_norm": 1.3936223983764648, "learning_rate": 2e-05, "loss": 0.06460038, "step": 20747 }, { "epoch": 41.496, "grad_norm": 1.23568594455719, "learning_rate": 2e-05, "loss": 0.06132279, "step": 20748 }, { "epoch": 41.498, "grad_norm": 1.2954548597335815, "learning_rate": 2e-05, "loss": 0.04977329, "step": 20749 }, { "epoch": 41.5, "grad_norm": 1.190082311630249, "learning_rate": 2e-05, "loss": 0.05261871, "step": 20750 }, { "epoch": 41.502, "grad_norm": 1.1780047416687012, "learning_rate": 2e-05, "loss": 0.03741407, "step": 20751 }, { "epoch": 41.504, "grad_norm": 1.4033395051956177, "learning_rate": 2e-05, "loss": 0.04731531, "step": 20752 }, { "epoch": 41.506, "grad_norm": 0.8769941329956055, "learning_rate": 2e-05, "loss": 0.02578051, "step": 20753 }, { "epoch": 41.508, "grad_norm": 1.502710223197937, "learning_rate": 2e-05, "loss": 0.05851678, "step": 20754 }, { "epoch": 41.51, "grad_norm": 1.0792176723480225, "learning_rate": 2e-05, "loss": 0.03291167, "step": 20755 }, { "epoch": 41.512, "grad_norm": 1.7319297790527344, "learning_rate": 2e-05, "loss": 0.05475036, "step": 20756 }, { "epoch": 41.514, "grad_norm": 1.918562889099121, "learning_rate": 2e-05, "loss": 0.04272918, "step": 20757 }, { "epoch": 41.516, "grad_norm": 2.9401967525482178, "learning_rate": 2e-05, "loss": 0.05610197, "step": 20758 }, { "epoch": 41.518, "grad_norm": 0.8515689373016357, "learning_rate": 2e-05, "loss": 0.02853437, "step": 20759 }, { "epoch": 41.52, "grad_norm": 1.914589285850525, "learning_rate": 2e-05, "loss": 0.05829733, "step": 20760 }, { "epoch": 41.522, "grad_norm": 1.0378468036651611, "learning_rate": 2e-05, "loss": 0.03754158, "step": 20761 }, { "epoch": 41.524, "grad_norm": 1.3927786350250244, "learning_rate": 2e-05, "loss": 0.05459459, "step": 20762 }, { "epoch": 41.526, "grad_norm": 4.084712028503418, "learning_rate": 2e-05, "loss": 0.0557055, "step": 20763 }, { "epoch": 41.528, "grad_norm": 1.2816001176834106, "learning_rate": 2e-05, "loss": 0.05118892, "step": 20764 }, { "epoch": 41.53, "grad_norm": 1.243981122970581, "learning_rate": 2e-05, "loss": 0.053567, "step": 20765 }, { "epoch": 41.532, "grad_norm": 1.179433822631836, "learning_rate": 2e-05, "loss": 0.04334904, "step": 20766 }, { "epoch": 41.534, "grad_norm": 1.8389700651168823, "learning_rate": 2e-05, "loss": 0.03809846, "step": 20767 }, { "epoch": 41.536, "grad_norm": 1.2287765741348267, "learning_rate": 2e-05, "loss": 0.05584181, "step": 20768 }, { "epoch": 41.538, "grad_norm": 1.2944599390029907, "learning_rate": 2e-05, "loss": 0.04906465, "step": 20769 }, { "epoch": 41.54, "grad_norm": 1.9301244020462036, "learning_rate": 2e-05, "loss": 0.04297593, "step": 20770 }, { "epoch": 41.542, "grad_norm": 1.5391250848770142, "learning_rate": 2e-05, "loss": 0.05037332, "step": 20771 }, { "epoch": 41.544, "grad_norm": 1.3913311958312988, "learning_rate": 2e-05, "loss": 0.0544315, "step": 20772 }, { "epoch": 41.546, "grad_norm": 0.9265551567077637, "learning_rate": 2e-05, "loss": 0.0323727, "step": 20773 }, { "epoch": 41.548, "grad_norm": 1.0124329328536987, "learning_rate": 2e-05, "loss": 0.0399297, "step": 20774 }, { "epoch": 41.55, "grad_norm": 1.2719837427139282, "learning_rate": 2e-05, "loss": 0.04783987, "step": 20775 }, { "epoch": 41.552, "grad_norm": 1.9327387809753418, "learning_rate": 2e-05, "loss": 0.04304921, "step": 20776 }, { "epoch": 41.554, "grad_norm": 1.8879715204238892, "learning_rate": 2e-05, "loss": 0.03215152, "step": 20777 }, { "epoch": 41.556, "grad_norm": 1.0443545579910278, "learning_rate": 2e-05, "loss": 0.04257246, "step": 20778 }, { "epoch": 41.558, "grad_norm": 1.7671880722045898, "learning_rate": 2e-05, "loss": 0.04795214, "step": 20779 }, { "epoch": 41.56, "grad_norm": 1.3257173299789429, "learning_rate": 2e-05, "loss": 0.05144795, "step": 20780 }, { "epoch": 41.562, "grad_norm": 1.5164159536361694, "learning_rate": 2e-05, "loss": 0.05290364, "step": 20781 }, { "epoch": 41.564, "grad_norm": 1.2703793048858643, "learning_rate": 2e-05, "loss": 0.05436259, "step": 20782 }, { "epoch": 41.566, "grad_norm": 1.13009774684906, "learning_rate": 2e-05, "loss": 0.03706975, "step": 20783 }, { "epoch": 41.568, "grad_norm": 1.3934050798416138, "learning_rate": 2e-05, "loss": 0.04990171, "step": 20784 }, { "epoch": 41.57, "grad_norm": 1.6412134170532227, "learning_rate": 2e-05, "loss": 0.04792435, "step": 20785 }, { "epoch": 41.572, "grad_norm": 1.3018213510513306, "learning_rate": 2e-05, "loss": 0.06655528, "step": 20786 }, { "epoch": 41.574, "grad_norm": 1.3327934741973877, "learning_rate": 2e-05, "loss": 0.04949384, "step": 20787 }, { "epoch": 41.576, "grad_norm": 1.173862338066101, "learning_rate": 2e-05, "loss": 0.05083285, "step": 20788 }, { "epoch": 41.578, "grad_norm": 1.3000322580337524, "learning_rate": 2e-05, "loss": 0.05392583, "step": 20789 }, { "epoch": 41.58, "grad_norm": 1.50508451461792, "learning_rate": 2e-05, "loss": 0.03634304, "step": 20790 }, { "epoch": 41.582, "grad_norm": 1.342342734336853, "learning_rate": 2e-05, "loss": 0.05079833, "step": 20791 }, { "epoch": 41.584, "grad_norm": 1.1908931732177734, "learning_rate": 2e-05, "loss": 0.0528966, "step": 20792 }, { "epoch": 41.586, "grad_norm": 1.2644261121749878, "learning_rate": 2e-05, "loss": 0.04991703, "step": 20793 }, { "epoch": 41.588, "grad_norm": 1.19503915309906, "learning_rate": 2e-05, "loss": 0.04296641, "step": 20794 }, { "epoch": 41.59, "grad_norm": 1.3065739870071411, "learning_rate": 2e-05, "loss": 0.04288668, "step": 20795 }, { "epoch": 41.592, "grad_norm": 1.5874643325805664, "learning_rate": 2e-05, "loss": 0.06031433, "step": 20796 }, { "epoch": 41.594, "grad_norm": 1.4615437984466553, "learning_rate": 2e-05, "loss": 0.06782658, "step": 20797 }, { "epoch": 41.596, "grad_norm": 1.0287331342697144, "learning_rate": 2e-05, "loss": 0.03726696, "step": 20798 }, { "epoch": 41.598, "grad_norm": 2.5669822692871094, "learning_rate": 2e-05, "loss": 0.05073772, "step": 20799 }, { "epoch": 41.6, "grad_norm": 1.9865378141403198, "learning_rate": 2e-05, "loss": 0.06835793, "step": 20800 }, { "epoch": 41.602, "grad_norm": 1.1996169090270996, "learning_rate": 2e-05, "loss": 0.0441338, "step": 20801 }, { "epoch": 41.604, "grad_norm": 1.1404577493667603, "learning_rate": 2e-05, "loss": 0.05091837, "step": 20802 }, { "epoch": 41.606, "grad_norm": 1.366138219833374, "learning_rate": 2e-05, "loss": 0.05536352, "step": 20803 }, { "epoch": 41.608, "grad_norm": 1.2868354320526123, "learning_rate": 2e-05, "loss": 0.05645128, "step": 20804 }, { "epoch": 41.61, "grad_norm": 1.2163975238800049, "learning_rate": 2e-05, "loss": 0.04356652, "step": 20805 }, { "epoch": 41.612, "grad_norm": 1.5239417552947998, "learning_rate": 2e-05, "loss": 0.05934812, "step": 20806 }, { "epoch": 41.614, "grad_norm": 1.641875982284546, "learning_rate": 2e-05, "loss": 0.0451197, "step": 20807 }, { "epoch": 41.616, "grad_norm": 1.4720548391342163, "learning_rate": 2e-05, "loss": 0.05895507, "step": 20808 }, { "epoch": 41.618, "grad_norm": 1.20379638671875, "learning_rate": 2e-05, "loss": 0.05722809, "step": 20809 }, { "epoch": 41.62, "grad_norm": 1.206185221672058, "learning_rate": 2e-05, "loss": 0.04924655, "step": 20810 }, { "epoch": 41.622, "grad_norm": 1.3422046899795532, "learning_rate": 2e-05, "loss": 0.04705754, "step": 20811 }, { "epoch": 41.624, "grad_norm": 1.251327395439148, "learning_rate": 2e-05, "loss": 0.03359656, "step": 20812 }, { "epoch": 41.626, "grad_norm": 1.0493559837341309, "learning_rate": 2e-05, "loss": 0.03650389, "step": 20813 }, { "epoch": 41.628, "grad_norm": 1.1283539533615112, "learning_rate": 2e-05, "loss": 0.04552049, "step": 20814 }, { "epoch": 41.63, "grad_norm": 1.2148489952087402, "learning_rate": 2e-05, "loss": 0.04333513, "step": 20815 }, { "epoch": 41.632, "grad_norm": 1.1849173307418823, "learning_rate": 2e-05, "loss": 0.04451859, "step": 20816 }, { "epoch": 41.634, "grad_norm": 1.035466194152832, "learning_rate": 2e-05, "loss": 0.03730474, "step": 20817 }, { "epoch": 41.636, "grad_norm": 1.3401261568069458, "learning_rate": 2e-05, "loss": 0.03913242, "step": 20818 }, { "epoch": 41.638, "grad_norm": 1.3163193464279175, "learning_rate": 2e-05, "loss": 0.04887679, "step": 20819 }, { "epoch": 41.64, "grad_norm": 1.2960766553878784, "learning_rate": 2e-05, "loss": 0.05596934, "step": 20820 }, { "epoch": 41.642, "grad_norm": 1.2313705682754517, "learning_rate": 2e-05, "loss": 0.05113539, "step": 20821 }, { "epoch": 41.644, "grad_norm": 1.16353178024292, "learning_rate": 2e-05, "loss": 0.04695116, "step": 20822 }, { "epoch": 41.646, "grad_norm": 1.308196783065796, "learning_rate": 2e-05, "loss": 0.05837334, "step": 20823 }, { "epoch": 41.648, "grad_norm": 1.0429937839508057, "learning_rate": 2e-05, "loss": 0.0384528, "step": 20824 }, { "epoch": 41.65, "grad_norm": 1.335108995437622, "learning_rate": 2e-05, "loss": 0.04670976, "step": 20825 }, { "epoch": 41.652, "grad_norm": 1.6604396104812622, "learning_rate": 2e-05, "loss": 0.04694479, "step": 20826 }, { "epoch": 41.654, "grad_norm": 1.2151720523834229, "learning_rate": 2e-05, "loss": 0.0505834, "step": 20827 }, { "epoch": 41.656, "grad_norm": 1.6010440587997437, "learning_rate": 2e-05, "loss": 0.0715237, "step": 20828 }, { "epoch": 41.658, "grad_norm": 1.2786024808883667, "learning_rate": 2e-05, "loss": 0.06591807, "step": 20829 }, { "epoch": 41.66, "grad_norm": 1.1999880075454712, "learning_rate": 2e-05, "loss": 0.05343919, "step": 20830 }, { "epoch": 41.662, "grad_norm": 1.5012918710708618, "learning_rate": 2e-05, "loss": 0.05417911, "step": 20831 }, { "epoch": 41.664, "grad_norm": 1.03823983669281, "learning_rate": 2e-05, "loss": 0.03913938, "step": 20832 }, { "epoch": 41.666, "grad_norm": 1.4988044500350952, "learning_rate": 2e-05, "loss": 0.05051984, "step": 20833 }, { "epoch": 41.668, "grad_norm": 1.3459997177124023, "learning_rate": 2e-05, "loss": 0.05139535, "step": 20834 }, { "epoch": 41.67, "grad_norm": 1.1768592596054077, "learning_rate": 2e-05, "loss": 0.05571685, "step": 20835 }, { "epoch": 41.672, "grad_norm": 1.1096575260162354, "learning_rate": 2e-05, "loss": 0.04181407, "step": 20836 }, { "epoch": 41.674, "grad_norm": 1.260703444480896, "learning_rate": 2e-05, "loss": 0.05937716, "step": 20837 }, { "epoch": 41.676, "grad_norm": 1.1465245485305786, "learning_rate": 2e-05, "loss": 0.05365495, "step": 20838 }, { "epoch": 41.678, "grad_norm": 1.7328318357467651, "learning_rate": 2e-05, "loss": 0.06675532, "step": 20839 }, { "epoch": 41.68, "grad_norm": 1.3580307960510254, "learning_rate": 2e-05, "loss": 0.06565544, "step": 20840 }, { "epoch": 41.682, "grad_norm": 1.1711437702178955, "learning_rate": 2e-05, "loss": 0.04366954, "step": 20841 }, { "epoch": 41.684, "grad_norm": 1.7624998092651367, "learning_rate": 2e-05, "loss": 0.05125007, "step": 20842 }, { "epoch": 41.686, "grad_norm": 1.7056723833084106, "learning_rate": 2e-05, "loss": 0.06327073, "step": 20843 }, { "epoch": 41.688, "grad_norm": 1.4002678394317627, "learning_rate": 2e-05, "loss": 0.05082846, "step": 20844 }, { "epoch": 41.69, "grad_norm": 1.8663007020950317, "learning_rate": 2e-05, "loss": 0.06116794, "step": 20845 }, { "epoch": 41.692, "grad_norm": 1.311848759651184, "learning_rate": 2e-05, "loss": 0.05300967, "step": 20846 }, { "epoch": 41.694, "grad_norm": 1.2102714776992798, "learning_rate": 2e-05, "loss": 0.05376907, "step": 20847 }, { "epoch": 41.696, "grad_norm": 1.0833280086517334, "learning_rate": 2e-05, "loss": 0.04561132, "step": 20848 }, { "epoch": 41.698, "grad_norm": 1.1252832412719727, "learning_rate": 2e-05, "loss": 0.04921568, "step": 20849 }, { "epoch": 41.7, "grad_norm": 1.2025973796844482, "learning_rate": 2e-05, "loss": 0.04832213, "step": 20850 }, { "epoch": 41.702, "grad_norm": 1.0918816328048706, "learning_rate": 2e-05, "loss": 0.04648707, "step": 20851 }, { "epoch": 41.704, "grad_norm": 1.1413060426712036, "learning_rate": 2e-05, "loss": 0.05401688, "step": 20852 }, { "epoch": 41.706, "grad_norm": 1.722861886024475, "learning_rate": 2e-05, "loss": 0.04710011, "step": 20853 }, { "epoch": 41.708, "grad_norm": 1.183224081993103, "learning_rate": 2e-05, "loss": 0.04919672, "step": 20854 }, { "epoch": 41.71, "grad_norm": 1.0682933330535889, "learning_rate": 2e-05, "loss": 0.04360975, "step": 20855 }, { "epoch": 41.712, "grad_norm": 1.3909543752670288, "learning_rate": 2e-05, "loss": 0.03963952, "step": 20856 }, { "epoch": 41.714, "grad_norm": 1.2624576091766357, "learning_rate": 2e-05, "loss": 0.04159348, "step": 20857 }, { "epoch": 41.716, "grad_norm": 1.2852903604507446, "learning_rate": 2e-05, "loss": 0.06388488, "step": 20858 }, { "epoch": 41.718, "grad_norm": 1.2193105220794678, "learning_rate": 2e-05, "loss": 0.04823635, "step": 20859 }, { "epoch": 41.72, "grad_norm": 1.3638604879379272, "learning_rate": 2e-05, "loss": 0.0435699, "step": 20860 }, { "epoch": 41.722, "grad_norm": 1.2816431522369385, "learning_rate": 2e-05, "loss": 0.05209901, "step": 20861 }, { "epoch": 41.724, "grad_norm": 1.994900107383728, "learning_rate": 2e-05, "loss": 0.0689984, "step": 20862 }, { "epoch": 41.726, "grad_norm": 1.2706905603408813, "learning_rate": 2e-05, "loss": 0.04907268, "step": 20863 }, { "epoch": 41.728, "grad_norm": 1.134853482246399, "learning_rate": 2e-05, "loss": 0.03874713, "step": 20864 }, { "epoch": 41.73, "grad_norm": 1.305557370185852, "learning_rate": 2e-05, "loss": 0.0443382, "step": 20865 }, { "epoch": 41.732, "grad_norm": 1.287753701210022, "learning_rate": 2e-05, "loss": 0.05005742, "step": 20866 }, { "epoch": 41.734, "grad_norm": 1.2980296611785889, "learning_rate": 2e-05, "loss": 0.04606131, "step": 20867 }, { "epoch": 41.736, "grad_norm": 1.003082036972046, "learning_rate": 2e-05, "loss": 0.03791095, "step": 20868 }, { "epoch": 41.738, "grad_norm": 1.2747972011566162, "learning_rate": 2e-05, "loss": 0.04731661, "step": 20869 }, { "epoch": 41.74, "grad_norm": 1.4059251546859741, "learning_rate": 2e-05, "loss": 0.03448366, "step": 20870 }, { "epoch": 41.742, "grad_norm": 2.1521401405334473, "learning_rate": 2e-05, "loss": 0.06867001, "step": 20871 }, { "epoch": 41.744, "grad_norm": 1.4154833555221558, "learning_rate": 2e-05, "loss": 0.03487004, "step": 20872 }, { "epoch": 41.746, "grad_norm": 1.2275240421295166, "learning_rate": 2e-05, "loss": 0.05700513, "step": 20873 }, { "epoch": 41.748, "grad_norm": 2.4994146823883057, "learning_rate": 2e-05, "loss": 0.05144731, "step": 20874 }, { "epoch": 41.75, "grad_norm": 1.8417681455612183, "learning_rate": 2e-05, "loss": 0.05225058, "step": 20875 }, { "epoch": 41.752, "grad_norm": 1.2966296672821045, "learning_rate": 2e-05, "loss": 0.04987405, "step": 20876 }, { "epoch": 41.754, "grad_norm": 1.2915031909942627, "learning_rate": 2e-05, "loss": 0.05634173, "step": 20877 }, { "epoch": 41.756, "grad_norm": 1.4315176010131836, "learning_rate": 2e-05, "loss": 0.05782579, "step": 20878 }, { "epoch": 41.758, "grad_norm": 1.8965885639190674, "learning_rate": 2e-05, "loss": 0.05472075, "step": 20879 }, { "epoch": 41.76, "grad_norm": 1.0366774797439575, "learning_rate": 2e-05, "loss": 0.03605853, "step": 20880 }, { "epoch": 41.762, "grad_norm": 1.6545394659042358, "learning_rate": 2e-05, "loss": 0.05220097, "step": 20881 }, { "epoch": 41.764, "grad_norm": 1.9253038167953491, "learning_rate": 2e-05, "loss": 0.04118697, "step": 20882 }, { "epoch": 41.766, "grad_norm": 1.229835033416748, "learning_rate": 2e-05, "loss": 0.05954335, "step": 20883 }, { "epoch": 41.768, "grad_norm": 2.4672608375549316, "learning_rate": 2e-05, "loss": 0.03520424, "step": 20884 }, { "epoch": 41.77, "grad_norm": 1.317997694015503, "learning_rate": 2e-05, "loss": 0.05394396, "step": 20885 }, { "epoch": 41.772, "grad_norm": 1.1028608083724976, "learning_rate": 2e-05, "loss": 0.05019546, "step": 20886 }, { "epoch": 41.774, "grad_norm": 1.4625998735427856, "learning_rate": 2e-05, "loss": 0.05548112, "step": 20887 }, { "epoch": 41.776, "grad_norm": 1.5702537298202515, "learning_rate": 2e-05, "loss": 0.0497899, "step": 20888 }, { "epoch": 41.778, "grad_norm": 1.258571743965149, "learning_rate": 2e-05, "loss": 0.04912242, "step": 20889 }, { "epoch": 41.78, "grad_norm": 1.0844334363937378, "learning_rate": 2e-05, "loss": 0.04057037, "step": 20890 }, { "epoch": 41.782, "grad_norm": 1.1522717475891113, "learning_rate": 2e-05, "loss": 0.04750651, "step": 20891 }, { "epoch": 41.784, "grad_norm": 1.290791630744934, "learning_rate": 2e-05, "loss": 0.06190044, "step": 20892 }, { "epoch": 41.786, "grad_norm": 1.1737576723098755, "learning_rate": 2e-05, "loss": 0.03324655, "step": 20893 }, { "epoch": 41.788, "grad_norm": 1.2524381875991821, "learning_rate": 2e-05, "loss": 0.03917806, "step": 20894 }, { "epoch": 41.79, "grad_norm": 1.3281619548797607, "learning_rate": 2e-05, "loss": 0.04660825, "step": 20895 }, { "epoch": 41.792, "grad_norm": 1.430392861366272, "learning_rate": 2e-05, "loss": 0.07056463, "step": 20896 }, { "epoch": 41.794, "grad_norm": 1.4818058013916016, "learning_rate": 2e-05, "loss": 0.05624013, "step": 20897 }, { "epoch": 41.796, "grad_norm": 1.2507944107055664, "learning_rate": 2e-05, "loss": 0.05323803, "step": 20898 }, { "epoch": 41.798, "grad_norm": 1.244249701499939, "learning_rate": 2e-05, "loss": 0.04751854, "step": 20899 }, { "epoch": 41.8, "grad_norm": 2.58414363861084, "learning_rate": 2e-05, "loss": 0.05688569, "step": 20900 }, { "epoch": 41.802, "grad_norm": 1.0846785306930542, "learning_rate": 2e-05, "loss": 0.03785557, "step": 20901 }, { "epoch": 41.804, "grad_norm": 1.3131710290908813, "learning_rate": 2e-05, "loss": 0.05168329, "step": 20902 }, { "epoch": 41.806, "grad_norm": 1.262276530265808, "learning_rate": 2e-05, "loss": 0.04058243, "step": 20903 }, { "epoch": 41.808, "grad_norm": 1.134749412536621, "learning_rate": 2e-05, "loss": 0.033862, "step": 20904 }, { "epoch": 41.81, "grad_norm": 1.2034157514572144, "learning_rate": 2e-05, "loss": 0.0472676, "step": 20905 }, { "epoch": 41.812, "grad_norm": 2.1183056831359863, "learning_rate": 2e-05, "loss": 0.04632457, "step": 20906 }, { "epoch": 41.814, "grad_norm": 1.1431421041488647, "learning_rate": 2e-05, "loss": 0.04334393, "step": 20907 }, { "epoch": 41.816, "grad_norm": 1.328840732574463, "learning_rate": 2e-05, "loss": 0.04690224, "step": 20908 }, { "epoch": 41.818, "grad_norm": 0.96977299451828, "learning_rate": 2e-05, "loss": 0.03683947, "step": 20909 }, { "epoch": 41.82, "grad_norm": 1.1125136613845825, "learning_rate": 2e-05, "loss": 0.04391717, "step": 20910 }, { "epoch": 41.822, "grad_norm": 1.0546776056289673, "learning_rate": 2e-05, "loss": 0.0294345, "step": 20911 }, { "epoch": 41.824, "grad_norm": 1.0246164798736572, "learning_rate": 2e-05, "loss": 0.04162443, "step": 20912 }, { "epoch": 41.826, "grad_norm": 1.2705532312393188, "learning_rate": 2e-05, "loss": 0.06132779, "step": 20913 }, { "epoch": 41.828, "grad_norm": 1.0475337505340576, "learning_rate": 2e-05, "loss": 0.03245585, "step": 20914 }, { "epoch": 41.83, "grad_norm": 1.4540488719940186, "learning_rate": 2e-05, "loss": 0.04266465, "step": 20915 }, { "epoch": 41.832, "grad_norm": 1.089884638786316, "learning_rate": 2e-05, "loss": 0.03583428, "step": 20916 }, { "epoch": 41.834, "grad_norm": 1.248226284980774, "learning_rate": 2e-05, "loss": 0.04630677, "step": 20917 }, { "epoch": 41.836, "grad_norm": 1.2985204458236694, "learning_rate": 2e-05, "loss": 0.05467469, "step": 20918 }, { "epoch": 41.838, "grad_norm": 1.6257275342941284, "learning_rate": 2e-05, "loss": 0.05300467, "step": 20919 }, { "epoch": 41.84, "grad_norm": 1.0730141401290894, "learning_rate": 2e-05, "loss": 0.04086423, "step": 20920 }, { "epoch": 41.842, "grad_norm": 1.7589372396469116, "learning_rate": 2e-05, "loss": 0.07659753, "step": 20921 }, { "epoch": 41.844, "grad_norm": 1.650455355644226, "learning_rate": 2e-05, "loss": 0.04704353, "step": 20922 }, { "epoch": 41.846, "grad_norm": 0.9814362525939941, "learning_rate": 2e-05, "loss": 0.04067108, "step": 20923 }, { "epoch": 41.848, "grad_norm": 2.033730983734131, "learning_rate": 2e-05, "loss": 0.03695693, "step": 20924 }, { "epoch": 41.85, "grad_norm": 1.1429461240768433, "learning_rate": 2e-05, "loss": 0.04535512, "step": 20925 }, { "epoch": 41.852, "grad_norm": 1.354186773300171, "learning_rate": 2e-05, "loss": 0.05408484, "step": 20926 }, { "epoch": 41.854, "grad_norm": 2.6549506187438965, "learning_rate": 2e-05, "loss": 0.05873822, "step": 20927 }, { "epoch": 41.856, "grad_norm": 1.1592339277267456, "learning_rate": 2e-05, "loss": 0.04995368, "step": 20928 }, { "epoch": 41.858, "grad_norm": 1.7437788248062134, "learning_rate": 2e-05, "loss": 0.07510689, "step": 20929 }, { "epoch": 41.86, "grad_norm": 1.1084489822387695, "learning_rate": 2e-05, "loss": 0.04345679, "step": 20930 }, { "epoch": 41.862, "grad_norm": 1.3295727968215942, "learning_rate": 2e-05, "loss": 0.04446131, "step": 20931 }, { "epoch": 41.864, "grad_norm": 1.3433420658111572, "learning_rate": 2e-05, "loss": 0.05595826, "step": 20932 }, { "epoch": 41.866, "grad_norm": 1.3462584018707275, "learning_rate": 2e-05, "loss": 0.05649468, "step": 20933 }, { "epoch": 41.868, "grad_norm": 1.4262540340423584, "learning_rate": 2e-05, "loss": 0.04885809, "step": 20934 }, { "epoch": 41.87, "grad_norm": 2.7343051433563232, "learning_rate": 2e-05, "loss": 0.06057374, "step": 20935 }, { "epoch": 41.872, "grad_norm": 1.2475897073745728, "learning_rate": 2e-05, "loss": 0.05510751, "step": 20936 }, { "epoch": 41.874, "grad_norm": 1.5104964971542358, "learning_rate": 2e-05, "loss": 0.05848288, "step": 20937 }, { "epoch": 41.876, "grad_norm": 1.10996675491333, "learning_rate": 2e-05, "loss": 0.04091154, "step": 20938 }, { "epoch": 41.878, "grad_norm": 1.2295801639556885, "learning_rate": 2e-05, "loss": 0.05182572, "step": 20939 }, { "epoch": 41.88, "grad_norm": 1.200059413909912, "learning_rate": 2e-05, "loss": 0.04649248, "step": 20940 }, { "epoch": 41.882, "grad_norm": 1.1000398397445679, "learning_rate": 2e-05, "loss": 0.03840358, "step": 20941 }, { "epoch": 41.884, "grad_norm": 1.3613183498382568, "learning_rate": 2e-05, "loss": 0.05442974, "step": 20942 }, { "epoch": 41.886, "grad_norm": 1.567658543586731, "learning_rate": 2e-05, "loss": 0.05978185, "step": 20943 }, { "epoch": 41.888, "grad_norm": 1.0944557189941406, "learning_rate": 2e-05, "loss": 0.03656168, "step": 20944 }, { "epoch": 41.89, "grad_norm": 1.177577018737793, "learning_rate": 2e-05, "loss": 0.04644332, "step": 20945 }, { "epoch": 41.892, "grad_norm": 1.2059484720230103, "learning_rate": 2e-05, "loss": 0.03849885, "step": 20946 }, { "epoch": 41.894, "grad_norm": 4.2382283210754395, "learning_rate": 2e-05, "loss": 0.05498065, "step": 20947 }, { "epoch": 41.896, "grad_norm": 1.1419352293014526, "learning_rate": 2e-05, "loss": 0.04763758, "step": 20948 }, { "epoch": 41.898, "grad_norm": 2.648472785949707, "learning_rate": 2e-05, "loss": 0.06559248, "step": 20949 }, { "epoch": 41.9, "grad_norm": 1.1834135055541992, "learning_rate": 2e-05, "loss": 0.06024366, "step": 20950 }, { "epoch": 41.902, "grad_norm": 1.1037529706954956, "learning_rate": 2e-05, "loss": 0.05040961, "step": 20951 }, { "epoch": 41.904, "grad_norm": 1.312083125114441, "learning_rate": 2e-05, "loss": 0.05827889, "step": 20952 }, { "epoch": 41.906, "grad_norm": 1.707861065864563, "learning_rate": 2e-05, "loss": 0.05063179, "step": 20953 }, { "epoch": 41.908, "grad_norm": 1.2550243139266968, "learning_rate": 2e-05, "loss": 0.04975577, "step": 20954 }, { "epoch": 41.91, "grad_norm": 1.1055585145950317, "learning_rate": 2e-05, "loss": 0.04447905, "step": 20955 }, { "epoch": 41.912, "grad_norm": 1.2128030061721802, "learning_rate": 2e-05, "loss": 0.04356065, "step": 20956 }, { "epoch": 41.914, "grad_norm": 2.053502321243286, "learning_rate": 2e-05, "loss": 0.04877847, "step": 20957 }, { "epoch": 41.916, "grad_norm": 1.2415821552276611, "learning_rate": 2e-05, "loss": 0.05251724, "step": 20958 }, { "epoch": 41.918, "grad_norm": 1.246356725692749, "learning_rate": 2e-05, "loss": 0.03539124, "step": 20959 }, { "epoch": 41.92, "grad_norm": 3.0229344367980957, "learning_rate": 2e-05, "loss": 0.04473191, "step": 20960 }, { "epoch": 41.922, "grad_norm": 1.5012351274490356, "learning_rate": 2e-05, "loss": 0.04717286, "step": 20961 }, { "epoch": 41.924, "grad_norm": 1.6548337936401367, "learning_rate": 2e-05, "loss": 0.05977109, "step": 20962 }, { "epoch": 41.926, "grad_norm": 1.0772522687911987, "learning_rate": 2e-05, "loss": 0.04247387, "step": 20963 }, { "epoch": 41.928, "grad_norm": 1.259922981262207, "learning_rate": 2e-05, "loss": 0.05617234, "step": 20964 }, { "epoch": 41.93, "grad_norm": 1.1000100374221802, "learning_rate": 2e-05, "loss": 0.04250489, "step": 20965 }, { "epoch": 41.932, "grad_norm": 0.9738196730613708, "learning_rate": 2e-05, "loss": 0.02657899, "step": 20966 }, { "epoch": 41.934, "grad_norm": 1.1028367280960083, "learning_rate": 2e-05, "loss": 0.03186112, "step": 20967 }, { "epoch": 41.936, "grad_norm": 1.2720319032669067, "learning_rate": 2e-05, "loss": 0.04869255, "step": 20968 }, { "epoch": 41.938, "grad_norm": 1.1687405109405518, "learning_rate": 2e-05, "loss": 0.03809325, "step": 20969 }, { "epoch": 41.94, "grad_norm": 0.9612535238265991, "learning_rate": 2e-05, "loss": 0.02938464, "step": 20970 }, { "epoch": 41.942, "grad_norm": 1.5162972211837769, "learning_rate": 2e-05, "loss": 0.06188224, "step": 20971 }, { "epoch": 41.944, "grad_norm": 2.4762864112854004, "learning_rate": 2e-05, "loss": 0.05203952, "step": 20972 }, { "epoch": 41.946, "grad_norm": 1.2529581785202026, "learning_rate": 2e-05, "loss": 0.04696166, "step": 20973 }, { "epoch": 41.948, "grad_norm": 1.1282131671905518, "learning_rate": 2e-05, "loss": 0.03645737, "step": 20974 }, { "epoch": 41.95, "grad_norm": 1.2952510118484497, "learning_rate": 2e-05, "loss": 0.04191312, "step": 20975 }, { "epoch": 41.952, "grad_norm": 1.780375599861145, "learning_rate": 2e-05, "loss": 0.05335905, "step": 20976 }, { "epoch": 41.954, "grad_norm": 1.3107290267944336, "learning_rate": 2e-05, "loss": 0.04631649, "step": 20977 }, { "epoch": 41.956, "grad_norm": 1.3172683715820312, "learning_rate": 2e-05, "loss": 0.05279814, "step": 20978 }, { "epoch": 41.958, "grad_norm": 1.3357253074645996, "learning_rate": 2e-05, "loss": 0.05109545, "step": 20979 }, { "epoch": 41.96, "grad_norm": 1.298035740852356, "learning_rate": 2e-05, "loss": 0.05070015, "step": 20980 }, { "epoch": 41.962, "grad_norm": 1.3245327472686768, "learning_rate": 2e-05, "loss": 0.05043676, "step": 20981 }, { "epoch": 41.964, "grad_norm": 1.8343544006347656, "learning_rate": 2e-05, "loss": 0.05134499, "step": 20982 }, { "epoch": 41.966, "grad_norm": 1.2885112762451172, "learning_rate": 2e-05, "loss": 0.04808827, "step": 20983 }, { "epoch": 41.968, "grad_norm": 1.0060571432113647, "learning_rate": 2e-05, "loss": 0.04182211, "step": 20984 }, { "epoch": 41.97, "grad_norm": 1.8609304428100586, "learning_rate": 2e-05, "loss": 0.05335212, "step": 20985 }, { "epoch": 41.972, "grad_norm": 1.4980871677398682, "learning_rate": 2e-05, "loss": 0.04966426, "step": 20986 }, { "epoch": 41.974, "grad_norm": 1.4951345920562744, "learning_rate": 2e-05, "loss": 0.04603991, "step": 20987 }, { "epoch": 41.976, "grad_norm": 0.9615522623062134, "learning_rate": 2e-05, "loss": 0.02445277, "step": 20988 }, { "epoch": 41.978, "grad_norm": 1.2384010553359985, "learning_rate": 2e-05, "loss": 0.04333053, "step": 20989 }, { "epoch": 41.98, "grad_norm": 1.366593837738037, "learning_rate": 2e-05, "loss": 0.06016916, "step": 20990 }, { "epoch": 41.982, "grad_norm": 1.1202651262283325, "learning_rate": 2e-05, "loss": 0.0522389, "step": 20991 }, { "epoch": 41.984, "grad_norm": 1.1914691925048828, "learning_rate": 2e-05, "loss": 0.04286882, "step": 20992 }, { "epoch": 41.986, "grad_norm": 1.3832005262374878, "learning_rate": 2e-05, "loss": 0.05637228, "step": 20993 }, { "epoch": 41.988, "grad_norm": 1.253294587135315, "learning_rate": 2e-05, "loss": 0.04640395, "step": 20994 }, { "epoch": 41.99, "grad_norm": 2.2488229274749756, "learning_rate": 2e-05, "loss": 0.04534402, "step": 20995 }, { "epoch": 41.992, "grad_norm": 1.515738606452942, "learning_rate": 2e-05, "loss": 0.0565161, "step": 20996 }, { "epoch": 41.994, "grad_norm": 2.047625780105591, "learning_rate": 2e-05, "loss": 0.04121755, "step": 20997 }, { "epoch": 41.996, "grad_norm": 2.0094411373138428, "learning_rate": 2e-05, "loss": 0.06695313, "step": 20998 }, { "epoch": 41.998, "grad_norm": 1.419292688369751, "learning_rate": 2e-05, "loss": 0.04039873, "step": 20999 }, { "epoch": 42.0, "grad_norm": 2.344158887863159, "learning_rate": 2e-05, "loss": 0.047058, "step": 21000 }, { "epoch": 42.0, "eval_performance": { "AngleClassification_1": 0.994, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9820359281437125, "Equal_1": 0.996, "Equal_2": 0.9820359281437125, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.996, "Perpendicular_1": 1.0, "Perpendicular_2": 0.998, "Perpendicular_3": 0.9038076152304609, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.9956, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 320.4536, "eval_samples_per_second": 32.766, "eval_steps_per_second": 0.655, "step": 21000 }, { "epoch": 42.002, "grad_norm": 2.0079424381256104, "learning_rate": 2e-05, "loss": 0.03449755, "step": 21001 }, { "epoch": 42.004, "grad_norm": 1.365389108657837, "learning_rate": 2e-05, "loss": 0.0548946, "step": 21002 }, { "epoch": 42.006, "grad_norm": 2.7778944969177246, "learning_rate": 2e-05, "loss": 0.04609371, "step": 21003 }, { "epoch": 42.008, "grad_norm": 1.0593626499176025, "learning_rate": 2e-05, "loss": 0.0348668, "step": 21004 }, { "epoch": 42.01, "grad_norm": 1.3281819820404053, "learning_rate": 2e-05, "loss": 0.05575937, "step": 21005 }, { "epoch": 42.012, "grad_norm": 1.5199637413024902, "learning_rate": 2e-05, "loss": 0.04298349, "step": 21006 }, { "epoch": 42.014, "grad_norm": 1.4977668523788452, "learning_rate": 2e-05, "loss": 0.03856365, "step": 21007 }, { "epoch": 42.016, "grad_norm": 1.0063074827194214, "learning_rate": 2e-05, "loss": 0.02791759, "step": 21008 }, { "epoch": 42.018, "grad_norm": 1.3319441080093384, "learning_rate": 2e-05, "loss": 0.05699763, "step": 21009 }, { "epoch": 42.02, "grad_norm": 1.0372650623321533, "learning_rate": 2e-05, "loss": 0.04131971, "step": 21010 }, { "epoch": 42.022, "grad_norm": 1.1082048416137695, "learning_rate": 2e-05, "loss": 0.04324374, "step": 21011 }, { "epoch": 42.024, "grad_norm": 1.2265472412109375, "learning_rate": 2e-05, "loss": 0.04790197, "step": 21012 }, { "epoch": 42.026, "grad_norm": 1.5326513051986694, "learning_rate": 2e-05, "loss": 0.05717977, "step": 21013 }, { "epoch": 42.028, "grad_norm": 1.2501510381698608, "learning_rate": 2e-05, "loss": 0.04369808, "step": 21014 }, { "epoch": 42.03, "grad_norm": 0.9996318817138672, "learning_rate": 2e-05, "loss": 0.03566208, "step": 21015 }, { "epoch": 42.032, "grad_norm": 1.2359881401062012, "learning_rate": 2e-05, "loss": 0.05509493, "step": 21016 }, { "epoch": 42.034, "grad_norm": 1.4756462574005127, "learning_rate": 2e-05, "loss": 0.06215252, "step": 21017 }, { "epoch": 42.036, "grad_norm": 1.2540290355682373, "learning_rate": 2e-05, "loss": 0.03848273, "step": 21018 }, { "epoch": 42.038, "grad_norm": 1.3934910297393799, "learning_rate": 2e-05, "loss": 0.0600573, "step": 21019 }, { "epoch": 42.04, "grad_norm": 1.3779077529907227, "learning_rate": 2e-05, "loss": 0.05684218, "step": 21020 }, { "epoch": 42.042, "grad_norm": 1.3131364583969116, "learning_rate": 2e-05, "loss": 0.04408994, "step": 21021 }, { "epoch": 42.044, "grad_norm": 1.862250804901123, "learning_rate": 2e-05, "loss": 0.05622796, "step": 21022 }, { "epoch": 42.046, "grad_norm": 1.1200668811798096, "learning_rate": 2e-05, "loss": 0.03651465, "step": 21023 }, { "epoch": 42.048, "grad_norm": 1.217587947845459, "learning_rate": 2e-05, "loss": 0.03259541, "step": 21024 }, { "epoch": 42.05, "grad_norm": 1.5544425249099731, "learning_rate": 2e-05, "loss": 0.03680165, "step": 21025 }, { "epoch": 42.052, "grad_norm": 1.274043083190918, "learning_rate": 2e-05, "loss": 0.04595189, "step": 21026 }, { "epoch": 42.054, "grad_norm": 2.187490463256836, "learning_rate": 2e-05, "loss": 0.04805189, "step": 21027 }, { "epoch": 42.056, "grad_norm": 1.2666808366775513, "learning_rate": 2e-05, "loss": 0.05547645, "step": 21028 }, { "epoch": 42.058, "grad_norm": 1.4787068367004395, "learning_rate": 2e-05, "loss": 0.06114297, "step": 21029 }, { "epoch": 42.06, "grad_norm": 1.4573709964752197, "learning_rate": 2e-05, "loss": 0.04937818, "step": 21030 }, { "epoch": 42.062, "grad_norm": 1.2699495553970337, "learning_rate": 2e-05, "loss": 0.04790761, "step": 21031 }, { "epoch": 42.064, "grad_norm": 1.1611040830612183, "learning_rate": 2e-05, "loss": 0.03836013, "step": 21032 }, { "epoch": 42.066, "grad_norm": 1.162618637084961, "learning_rate": 2e-05, "loss": 0.04036701, "step": 21033 }, { "epoch": 42.068, "grad_norm": 1.3911733627319336, "learning_rate": 2e-05, "loss": 0.05297327, "step": 21034 }, { "epoch": 42.07, "grad_norm": 1.1646391153335571, "learning_rate": 2e-05, "loss": 0.05322537, "step": 21035 }, { "epoch": 42.072, "grad_norm": 1.2405617237091064, "learning_rate": 2e-05, "loss": 0.03772672, "step": 21036 }, { "epoch": 42.074, "grad_norm": 1.0591578483581543, "learning_rate": 2e-05, "loss": 0.03944032, "step": 21037 }, { "epoch": 42.076, "grad_norm": 1.182476282119751, "learning_rate": 2e-05, "loss": 0.05112866, "step": 21038 }, { "epoch": 42.078, "grad_norm": 1.7845994234085083, "learning_rate": 2e-05, "loss": 0.04743159, "step": 21039 }, { "epoch": 42.08, "grad_norm": 1.1651856899261475, "learning_rate": 2e-05, "loss": 0.04408582, "step": 21040 }, { "epoch": 42.082, "grad_norm": 1.3968032598495483, "learning_rate": 2e-05, "loss": 0.06172002, "step": 21041 }, { "epoch": 42.084, "grad_norm": 1.2377245426177979, "learning_rate": 2e-05, "loss": 0.06204825, "step": 21042 }, { "epoch": 42.086, "grad_norm": 1.1847991943359375, "learning_rate": 2e-05, "loss": 0.03809762, "step": 21043 }, { "epoch": 42.088, "grad_norm": 1.6209152936935425, "learning_rate": 2e-05, "loss": 0.0557748, "step": 21044 }, { "epoch": 42.09, "grad_norm": 1.1455878019332886, "learning_rate": 2e-05, "loss": 0.04782082, "step": 21045 }, { "epoch": 42.092, "grad_norm": 1.1409224271774292, "learning_rate": 2e-05, "loss": 0.04084107, "step": 21046 }, { "epoch": 42.094, "grad_norm": 1.6999880075454712, "learning_rate": 2e-05, "loss": 0.0430681, "step": 21047 }, { "epoch": 42.096, "grad_norm": 1.3535499572753906, "learning_rate": 2e-05, "loss": 0.04580639, "step": 21048 }, { "epoch": 42.098, "grad_norm": 1.6721376180648804, "learning_rate": 2e-05, "loss": 0.04870587, "step": 21049 }, { "epoch": 42.1, "grad_norm": 3.2024240493774414, "learning_rate": 2e-05, "loss": 0.04476042, "step": 21050 }, { "epoch": 42.102, "grad_norm": 0.9818119406700134, "learning_rate": 2e-05, "loss": 0.0436222, "step": 21051 }, { "epoch": 42.104, "grad_norm": 1.2789314985275269, "learning_rate": 2e-05, "loss": 0.04346582, "step": 21052 }, { "epoch": 42.106, "grad_norm": 1.2788130044937134, "learning_rate": 2e-05, "loss": 0.05709602, "step": 21053 }, { "epoch": 42.108, "grad_norm": 2.102720260620117, "learning_rate": 2e-05, "loss": 0.06700805, "step": 21054 }, { "epoch": 42.11, "grad_norm": 1.278295636177063, "learning_rate": 2e-05, "loss": 0.05733266, "step": 21055 }, { "epoch": 42.112, "grad_norm": 1.3988900184631348, "learning_rate": 2e-05, "loss": 0.04855637, "step": 21056 }, { "epoch": 42.114, "grad_norm": 1.2994776964187622, "learning_rate": 2e-05, "loss": 0.04653971, "step": 21057 }, { "epoch": 42.116, "grad_norm": 1.2954206466674805, "learning_rate": 2e-05, "loss": 0.051019, "step": 21058 }, { "epoch": 42.118, "grad_norm": 1.1361409425735474, "learning_rate": 2e-05, "loss": 0.03964051, "step": 21059 }, { "epoch": 42.12, "grad_norm": 0.9857589602470398, "learning_rate": 2e-05, "loss": 0.0420249, "step": 21060 }, { "epoch": 42.122, "grad_norm": 1.19060480594635, "learning_rate": 2e-05, "loss": 0.03876651, "step": 21061 }, { "epoch": 42.124, "grad_norm": 1.0141627788543701, "learning_rate": 2e-05, "loss": 0.04508419, "step": 21062 }, { "epoch": 42.126, "grad_norm": 1.3992326259613037, "learning_rate": 2e-05, "loss": 0.05133965, "step": 21063 }, { "epoch": 42.128, "grad_norm": 1.2171835899353027, "learning_rate": 2e-05, "loss": 0.03061209, "step": 21064 }, { "epoch": 42.13, "grad_norm": 0.9995740652084351, "learning_rate": 2e-05, "loss": 0.045886, "step": 21065 }, { "epoch": 42.132, "grad_norm": 1.2155423164367676, "learning_rate": 2e-05, "loss": 0.05031782, "step": 21066 }, { "epoch": 42.134, "grad_norm": 3.3475124835968018, "learning_rate": 2e-05, "loss": 0.05089858, "step": 21067 }, { "epoch": 42.136, "grad_norm": 1.1842341423034668, "learning_rate": 2e-05, "loss": 0.03898914, "step": 21068 }, { "epoch": 42.138, "grad_norm": 1.3484647274017334, "learning_rate": 2e-05, "loss": 0.05519821, "step": 21069 }, { "epoch": 42.14, "grad_norm": 1.2679609060287476, "learning_rate": 2e-05, "loss": 0.04845376, "step": 21070 }, { "epoch": 42.142, "grad_norm": 1.0729557275772095, "learning_rate": 2e-05, "loss": 0.03485009, "step": 21071 }, { "epoch": 42.144, "grad_norm": 1.442091464996338, "learning_rate": 2e-05, "loss": 0.04761119, "step": 21072 }, { "epoch": 42.146, "grad_norm": 1.2365809679031372, "learning_rate": 2e-05, "loss": 0.04327618, "step": 21073 }, { "epoch": 42.148, "grad_norm": 1.1715463399887085, "learning_rate": 2e-05, "loss": 0.04974968, "step": 21074 }, { "epoch": 42.15, "grad_norm": 1.1144353151321411, "learning_rate": 2e-05, "loss": 0.04348652, "step": 21075 }, { "epoch": 42.152, "grad_norm": 1.4729726314544678, "learning_rate": 2e-05, "loss": 0.05958918, "step": 21076 }, { "epoch": 42.154, "grad_norm": 1.1265980005264282, "learning_rate": 2e-05, "loss": 0.05273468, "step": 21077 }, { "epoch": 42.156, "grad_norm": 1.2327011823654175, "learning_rate": 2e-05, "loss": 0.05835613, "step": 21078 }, { "epoch": 42.158, "grad_norm": 1.3347694873809814, "learning_rate": 2e-05, "loss": 0.03290975, "step": 21079 }, { "epoch": 42.16, "grad_norm": 1.2273168563842773, "learning_rate": 2e-05, "loss": 0.05718467, "step": 21080 }, { "epoch": 42.162, "grad_norm": 1.1186082363128662, "learning_rate": 2e-05, "loss": 0.0496081, "step": 21081 }, { "epoch": 42.164, "grad_norm": 1.4576385021209717, "learning_rate": 2e-05, "loss": 0.0473141, "step": 21082 }, { "epoch": 42.166, "grad_norm": 1.3093568086624146, "learning_rate": 2e-05, "loss": 0.05596102, "step": 21083 }, { "epoch": 42.168, "grad_norm": 1.2706962823867798, "learning_rate": 2e-05, "loss": 0.050145, "step": 21084 }, { "epoch": 42.17, "grad_norm": 1.4902071952819824, "learning_rate": 2e-05, "loss": 0.04793771, "step": 21085 }, { "epoch": 42.172, "grad_norm": 1.174926519393921, "learning_rate": 2e-05, "loss": 0.05029378, "step": 21086 }, { "epoch": 42.174, "grad_norm": 1.2596014738082886, "learning_rate": 2e-05, "loss": 0.03948942, "step": 21087 }, { "epoch": 42.176, "grad_norm": 1.0307563543319702, "learning_rate": 2e-05, "loss": 0.03940136, "step": 21088 }, { "epoch": 42.178, "grad_norm": 1.1724040508270264, "learning_rate": 2e-05, "loss": 0.04482014, "step": 21089 }, { "epoch": 42.18, "grad_norm": 1.143633484840393, "learning_rate": 2e-05, "loss": 0.03999674, "step": 21090 }, { "epoch": 42.182, "grad_norm": 1.217869758605957, "learning_rate": 2e-05, "loss": 0.03607331, "step": 21091 }, { "epoch": 42.184, "grad_norm": 1.5815616846084595, "learning_rate": 2e-05, "loss": 0.03352413, "step": 21092 }, { "epoch": 42.186, "grad_norm": 1.3018903732299805, "learning_rate": 2e-05, "loss": 0.04986539, "step": 21093 }, { "epoch": 42.188, "grad_norm": 0.9588651657104492, "learning_rate": 2e-05, "loss": 0.03948629, "step": 21094 }, { "epoch": 42.19, "grad_norm": 1.2913947105407715, "learning_rate": 2e-05, "loss": 0.04663259, "step": 21095 }, { "epoch": 42.192, "grad_norm": 1.219344139099121, "learning_rate": 2e-05, "loss": 0.0564451, "step": 21096 }, { "epoch": 42.194, "grad_norm": 1.4023454189300537, "learning_rate": 2e-05, "loss": 0.06140105, "step": 21097 }, { "epoch": 42.196, "grad_norm": 1.1653475761413574, "learning_rate": 2e-05, "loss": 0.03363206, "step": 21098 }, { "epoch": 42.198, "grad_norm": 1.1109018325805664, "learning_rate": 2e-05, "loss": 0.0404594, "step": 21099 }, { "epoch": 42.2, "grad_norm": 1.053236722946167, "learning_rate": 2e-05, "loss": 0.02968258, "step": 21100 }, { "epoch": 42.202, "grad_norm": 1.2577223777770996, "learning_rate": 2e-05, "loss": 0.05090709, "step": 21101 }, { "epoch": 42.204, "grad_norm": 1.3133550882339478, "learning_rate": 2e-05, "loss": 0.05599451, "step": 21102 }, { "epoch": 42.206, "grad_norm": 2.2289819717407227, "learning_rate": 2e-05, "loss": 0.04835991, "step": 21103 }, { "epoch": 42.208, "grad_norm": 1.7648617029190063, "learning_rate": 2e-05, "loss": 0.04039288, "step": 21104 }, { "epoch": 42.21, "grad_norm": 1.5205116271972656, "learning_rate": 2e-05, "loss": 0.06169431, "step": 21105 }, { "epoch": 42.212, "grad_norm": 1.573837161064148, "learning_rate": 2e-05, "loss": 0.06739548, "step": 21106 }, { "epoch": 42.214, "grad_norm": 1.3160827159881592, "learning_rate": 2e-05, "loss": 0.04701831, "step": 21107 }, { "epoch": 42.216, "grad_norm": 1.2954484224319458, "learning_rate": 2e-05, "loss": 0.05630885, "step": 21108 }, { "epoch": 42.218, "grad_norm": 1.2000118494033813, "learning_rate": 2e-05, "loss": 0.0458281, "step": 21109 }, { "epoch": 42.22, "grad_norm": 1.564306378364563, "learning_rate": 2e-05, "loss": 0.05043546, "step": 21110 }, { "epoch": 42.222, "grad_norm": 1.0061910152435303, "learning_rate": 2e-05, "loss": 0.03526705, "step": 21111 }, { "epoch": 42.224, "grad_norm": 1.4038654565811157, "learning_rate": 2e-05, "loss": 0.05683837, "step": 21112 }, { "epoch": 42.226, "grad_norm": 1.0469942092895508, "learning_rate": 2e-05, "loss": 0.04538111, "step": 21113 }, { "epoch": 42.228, "grad_norm": 1.2826262712478638, "learning_rate": 2e-05, "loss": 0.04978053, "step": 21114 }, { "epoch": 42.23, "grad_norm": 1.3049055337905884, "learning_rate": 2e-05, "loss": 0.04201498, "step": 21115 }, { "epoch": 42.232, "grad_norm": 1.076093077659607, "learning_rate": 2e-05, "loss": 0.04795041, "step": 21116 }, { "epoch": 42.234, "grad_norm": 1.1158887147903442, "learning_rate": 2e-05, "loss": 0.03639564, "step": 21117 }, { "epoch": 42.236, "grad_norm": 1.251383900642395, "learning_rate": 2e-05, "loss": 0.04058103, "step": 21118 }, { "epoch": 42.238, "grad_norm": 1.0459401607513428, "learning_rate": 2e-05, "loss": 0.04287563, "step": 21119 }, { "epoch": 42.24, "grad_norm": 1.3883095979690552, "learning_rate": 2e-05, "loss": 0.06586547, "step": 21120 }, { "epoch": 42.242, "grad_norm": 1.1663405895233154, "learning_rate": 2e-05, "loss": 0.03624522, "step": 21121 }, { "epoch": 42.244, "grad_norm": 1.8142751455307007, "learning_rate": 2e-05, "loss": 0.0545116, "step": 21122 }, { "epoch": 42.246, "grad_norm": 2.438311815261841, "learning_rate": 2e-05, "loss": 0.05591767, "step": 21123 }, { "epoch": 42.248, "grad_norm": 1.2450895309448242, "learning_rate": 2e-05, "loss": 0.04676469, "step": 21124 }, { "epoch": 42.25, "grad_norm": 1.227967619895935, "learning_rate": 2e-05, "loss": 0.05129366, "step": 21125 }, { "epoch": 42.252, "grad_norm": 1.1938300132751465, "learning_rate": 2e-05, "loss": 0.05615389, "step": 21126 }, { "epoch": 42.254, "grad_norm": 1.1461693048477173, "learning_rate": 2e-05, "loss": 0.04713152, "step": 21127 }, { "epoch": 42.256, "grad_norm": 1.187277913093567, "learning_rate": 2e-05, "loss": 0.04280775, "step": 21128 }, { "epoch": 42.258, "grad_norm": 1.5151171684265137, "learning_rate": 2e-05, "loss": 0.04615886, "step": 21129 }, { "epoch": 42.26, "grad_norm": 1.1889163255691528, "learning_rate": 2e-05, "loss": 0.0394232, "step": 21130 }, { "epoch": 42.262, "grad_norm": 1.3880109786987305, "learning_rate": 2e-05, "loss": 0.06245904, "step": 21131 }, { "epoch": 42.264, "grad_norm": 1.1990442276000977, "learning_rate": 2e-05, "loss": 0.05073933, "step": 21132 }, { "epoch": 42.266, "grad_norm": 1.3353129625320435, "learning_rate": 2e-05, "loss": 0.04800763, "step": 21133 }, { "epoch": 42.268, "grad_norm": 1.1701955795288086, "learning_rate": 2e-05, "loss": 0.05479547, "step": 21134 }, { "epoch": 42.27, "grad_norm": 1.744545578956604, "learning_rate": 2e-05, "loss": 0.04801206, "step": 21135 }, { "epoch": 42.272, "grad_norm": 1.2501215934753418, "learning_rate": 2e-05, "loss": 0.05587947, "step": 21136 }, { "epoch": 42.274, "grad_norm": 1.1730159521102905, "learning_rate": 2e-05, "loss": 0.03659301, "step": 21137 }, { "epoch": 42.276, "grad_norm": 1.2556431293487549, "learning_rate": 2e-05, "loss": 0.06406794, "step": 21138 }, { "epoch": 42.278, "grad_norm": 0.9709025621414185, "learning_rate": 2e-05, "loss": 0.02665532, "step": 21139 }, { "epoch": 42.28, "grad_norm": 1.0300137996673584, "learning_rate": 2e-05, "loss": 0.0415834, "step": 21140 }, { "epoch": 42.282, "grad_norm": 1.334498643875122, "learning_rate": 2e-05, "loss": 0.04262113, "step": 21141 }, { "epoch": 42.284, "grad_norm": 0.9960622191429138, "learning_rate": 2e-05, "loss": 0.03293207, "step": 21142 }, { "epoch": 42.286, "grad_norm": 1.19757878780365, "learning_rate": 2e-05, "loss": 0.05076621, "step": 21143 }, { "epoch": 42.288, "grad_norm": 1.1853511333465576, "learning_rate": 2e-05, "loss": 0.05421963, "step": 21144 }, { "epoch": 42.29, "grad_norm": 1.0343493223190308, "learning_rate": 2e-05, "loss": 0.03314459, "step": 21145 }, { "epoch": 42.292, "grad_norm": 6.21450662612915, "learning_rate": 2e-05, "loss": 0.05752373, "step": 21146 }, { "epoch": 42.294, "grad_norm": 1.1545538902282715, "learning_rate": 2e-05, "loss": 0.04223744, "step": 21147 }, { "epoch": 42.296, "grad_norm": 1.1654874086380005, "learning_rate": 2e-05, "loss": 0.03923909, "step": 21148 }, { "epoch": 42.298, "grad_norm": 1.1227787733078003, "learning_rate": 2e-05, "loss": 0.03867494, "step": 21149 }, { "epoch": 42.3, "grad_norm": 1.422080397605896, "learning_rate": 2e-05, "loss": 0.05246984, "step": 21150 }, { "epoch": 42.302, "grad_norm": 1.0861878395080566, "learning_rate": 2e-05, "loss": 0.03628934, "step": 21151 }, { "epoch": 42.304, "grad_norm": 1.113157868385315, "learning_rate": 2e-05, "loss": 0.03985634, "step": 21152 }, { "epoch": 42.306, "grad_norm": 1.102103352546692, "learning_rate": 2e-05, "loss": 0.04545946, "step": 21153 }, { "epoch": 42.308, "grad_norm": 2.6324360370635986, "learning_rate": 2e-05, "loss": 0.052878, "step": 21154 }, { "epoch": 42.31, "grad_norm": 3.8525304794311523, "learning_rate": 2e-05, "loss": 0.06736265, "step": 21155 }, { "epoch": 42.312, "grad_norm": 1.187397837638855, "learning_rate": 2e-05, "loss": 0.04645222, "step": 21156 }, { "epoch": 42.314, "grad_norm": 0.9652663469314575, "learning_rate": 2e-05, "loss": 0.03505144, "step": 21157 }, { "epoch": 42.316, "grad_norm": 0.9658792018890381, "learning_rate": 2e-05, "loss": 0.0281552, "step": 21158 }, { "epoch": 42.318, "grad_norm": 2.401021718978882, "learning_rate": 2e-05, "loss": 0.05007901, "step": 21159 }, { "epoch": 42.32, "grad_norm": 1.2055039405822754, "learning_rate": 2e-05, "loss": 0.05365395, "step": 21160 }, { "epoch": 42.322, "grad_norm": 3.1883809566497803, "learning_rate": 2e-05, "loss": 0.05920103, "step": 21161 }, { "epoch": 42.324, "grad_norm": 1.5328859090805054, "learning_rate": 2e-05, "loss": 0.04159598, "step": 21162 }, { "epoch": 42.326, "grad_norm": 1.2868179082870483, "learning_rate": 2e-05, "loss": 0.05470748, "step": 21163 }, { "epoch": 42.328, "grad_norm": 1.2141040563583374, "learning_rate": 2e-05, "loss": 0.0402927, "step": 21164 }, { "epoch": 42.33, "grad_norm": 1.7615190744400024, "learning_rate": 2e-05, "loss": 0.03097968, "step": 21165 }, { "epoch": 42.332, "grad_norm": 1.5224100351333618, "learning_rate": 2e-05, "loss": 0.04342334, "step": 21166 }, { "epoch": 42.334, "grad_norm": 1.1339821815490723, "learning_rate": 2e-05, "loss": 0.03738585, "step": 21167 }, { "epoch": 42.336, "grad_norm": 1.207984209060669, "learning_rate": 2e-05, "loss": 0.0441434, "step": 21168 }, { "epoch": 42.338, "grad_norm": 3.2836313247680664, "learning_rate": 2e-05, "loss": 0.03989324, "step": 21169 }, { "epoch": 42.34, "grad_norm": 1.7195615768432617, "learning_rate": 2e-05, "loss": 0.04979802, "step": 21170 }, { "epoch": 42.342, "grad_norm": 1.368320107460022, "learning_rate": 2e-05, "loss": 0.0440874, "step": 21171 }, { "epoch": 42.344, "grad_norm": 1.2935134172439575, "learning_rate": 2e-05, "loss": 0.06269786, "step": 21172 }, { "epoch": 42.346, "grad_norm": 1.5454837083816528, "learning_rate": 2e-05, "loss": 0.05150343, "step": 21173 }, { "epoch": 42.348, "grad_norm": 1.0489261150360107, "learning_rate": 2e-05, "loss": 0.03613845, "step": 21174 }, { "epoch": 42.35, "grad_norm": 2.1777830123901367, "learning_rate": 2e-05, "loss": 0.05178384, "step": 21175 }, { "epoch": 42.352, "grad_norm": 1.3198027610778809, "learning_rate": 2e-05, "loss": 0.04619918, "step": 21176 }, { "epoch": 42.354, "grad_norm": 1.1379380226135254, "learning_rate": 2e-05, "loss": 0.0368214, "step": 21177 }, { "epoch": 42.356, "grad_norm": 2.0367469787597656, "learning_rate": 2e-05, "loss": 0.04297212, "step": 21178 }, { "epoch": 42.358, "grad_norm": 1.455183506011963, "learning_rate": 2e-05, "loss": 0.06377006, "step": 21179 }, { "epoch": 42.36, "grad_norm": 1.1051256656646729, "learning_rate": 2e-05, "loss": 0.03730104, "step": 21180 }, { "epoch": 42.362, "grad_norm": 1.163114070892334, "learning_rate": 2e-05, "loss": 0.04407148, "step": 21181 }, { "epoch": 42.364, "grad_norm": 1.0142223834991455, "learning_rate": 2e-05, "loss": 0.03786562, "step": 21182 }, { "epoch": 42.366, "grad_norm": 1.0811436176300049, "learning_rate": 2e-05, "loss": 0.03777923, "step": 21183 }, { "epoch": 42.368, "grad_norm": 1.1761528253555298, "learning_rate": 2e-05, "loss": 0.04415475, "step": 21184 }, { "epoch": 42.37, "grad_norm": 0.9909497499465942, "learning_rate": 2e-05, "loss": 0.04150863, "step": 21185 }, { "epoch": 42.372, "grad_norm": 1.077889323234558, "learning_rate": 2e-05, "loss": 0.04909585, "step": 21186 }, { "epoch": 42.374, "grad_norm": 1.0327900648117065, "learning_rate": 2e-05, "loss": 0.03843714, "step": 21187 }, { "epoch": 42.376, "grad_norm": 1.582082748413086, "learning_rate": 2e-05, "loss": 0.04728288, "step": 21188 }, { "epoch": 42.378, "grad_norm": 1.148047924041748, "learning_rate": 2e-05, "loss": 0.04307776, "step": 21189 }, { "epoch": 42.38, "grad_norm": 1.0733706951141357, "learning_rate": 2e-05, "loss": 0.04168211, "step": 21190 }, { "epoch": 42.382, "grad_norm": 1.2072162628173828, "learning_rate": 2e-05, "loss": 0.05512146, "step": 21191 }, { "epoch": 42.384, "grad_norm": 1.1533020734786987, "learning_rate": 2e-05, "loss": 0.03808629, "step": 21192 }, { "epoch": 42.386, "grad_norm": 1.307199478149414, "learning_rate": 2e-05, "loss": 0.05116679, "step": 21193 }, { "epoch": 42.388, "grad_norm": 1.3861874341964722, "learning_rate": 2e-05, "loss": 0.06823573, "step": 21194 }, { "epoch": 42.39, "grad_norm": 1.1966004371643066, "learning_rate": 2e-05, "loss": 0.0478618, "step": 21195 }, { "epoch": 42.392, "grad_norm": 1.1855981349945068, "learning_rate": 2e-05, "loss": 0.0414066, "step": 21196 }, { "epoch": 42.394, "grad_norm": 1.1840327978134155, "learning_rate": 2e-05, "loss": 0.05320561, "step": 21197 }, { "epoch": 42.396, "grad_norm": 1.4169137477874756, "learning_rate": 2e-05, "loss": 0.05518955, "step": 21198 }, { "epoch": 42.398, "grad_norm": 1.1781421899795532, "learning_rate": 2e-05, "loss": 0.04477818, "step": 21199 }, { "epoch": 42.4, "grad_norm": 1.092429757118225, "learning_rate": 2e-05, "loss": 0.04341368, "step": 21200 }, { "epoch": 42.402, "grad_norm": 1.3216253519058228, "learning_rate": 2e-05, "loss": 0.05887017, "step": 21201 }, { "epoch": 42.404, "grad_norm": 1.205215573310852, "learning_rate": 2e-05, "loss": 0.04666806, "step": 21202 }, { "epoch": 42.406, "grad_norm": 1.2955050468444824, "learning_rate": 2e-05, "loss": 0.05575846, "step": 21203 }, { "epoch": 42.408, "grad_norm": 2.5864737033843994, "learning_rate": 2e-05, "loss": 0.03791555, "step": 21204 }, { "epoch": 42.41, "grad_norm": 1.188843011856079, "learning_rate": 2e-05, "loss": 0.05555812, "step": 21205 }, { "epoch": 42.412, "grad_norm": 0.9318320751190186, "learning_rate": 2e-05, "loss": 0.02653901, "step": 21206 }, { "epoch": 42.414, "grad_norm": 1.1733531951904297, "learning_rate": 2e-05, "loss": 0.04083906, "step": 21207 }, { "epoch": 42.416, "grad_norm": 1.1449897289276123, "learning_rate": 2e-05, "loss": 0.04881506, "step": 21208 }, { "epoch": 42.418, "grad_norm": 1.965896487236023, "learning_rate": 2e-05, "loss": 0.04963106, "step": 21209 }, { "epoch": 42.42, "grad_norm": 1.0945767164230347, "learning_rate": 2e-05, "loss": 0.04849849, "step": 21210 }, { "epoch": 42.422, "grad_norm": 1.308809757232666, "learning_rate": 2e-05, "loss": 0.06706718, "step": 21211 }, { "epoch": 42.424, "grad_norm": 1.53829026222229, "learning_rate": 2e-05, "loss": 0.06904881, "step": 21212 }, { "epoch": 42.426, "grad_norm": 1.2670159339904785, "learning_rate": 2e-05, "loss": 0.05303366, "step": 21213 }, { "epoch": 42.428, "grad_norm": 1.2741113901138306, "learning_rate": 2e-05, "loss": 0.0492133, "step": 21214 }, { "epoch": 42.43, "grad_norm": 1.2569606304168701, "learning_rate": 2e-05, "loss": 0.06223689, "step": 21215 }, { "epoch": 42.432, "grad_norm": 1.107643485069275, "learning_rate": 2e-05, "loss": 0.04277761, "step": 21216 }, { "epoch": 42.434, "grad_norm": 1.122188925743103, "learning_rate": 2e-05, "loss": 0.04128901, "step": 21217 }, { "epoch": 42.436, "grad_norm": 1.1837259531021118, "learning_rate": 2e-05, "loss": 0.04776137, "step": 21218 }, { "epoch": 42.438, "grad_norm": 3.169210910797119, "learning_rate": 2e-05, "loss": 0.05383972, "step": 21219 }, { "epoch": 42.44, "grad_norm": 0.9989545941352844, "learning_rate": 2e-05, "loss": 0.03308531, "step": 21220 }, { "epoch": 42.442, "grad_norm": 1.1423425674438477, "learning_rate": 2e-05, "loss": 0.04700923, "step": 21221 }, { "epoch": 42.444, "grad_norm": 1.0154725313186646, "learning_rate": 2e-05, "loss": 0.03601583, "step": 21222 }, { "epoch": 42.446, "grad_norm": 1.450246810913086, "learning_rate": 2e-05, "loss": 0.0615345, "step": 21223 }, { "epoch": 42.448, "grad_norm": 1.1445060968399048, "learning_rate": 2e-05, "loss": 0.03595731, "step": 21224 }, { "epoch": 42.45, "grad_norm": 1.6389358043670654, "learning_rate": 2e-05, "loss": 0.0496392, "step": 21225 }, { "epoch": 42.452, "grad_norm": 1.141597867012024, "learning_rate": 2e-05, "loss": 0.03886272, "step": 21226 }, { "epoch": 42.454, "grad_norm": 2.0984458923339844, "learning_rate": 2e-05, "loss": 0.0533991, "step": 21227 }, { "epoch": 42.456, "grad_norm": 1.5619630813598633, "learning_rate": 2e-05, "loss": 0.0451661, "step": 21228 }, { "epoch": 42.458, "grad_norm": 1.1230692863464355, "learning_rate": 2e-05, "loss": 0.02888122, "step": 21229 }, { "epoch": 42.46, "grad_norm": 1.5001888275146484, "learning_rate": 2e-05, "loss": 0.04741661, "step": 21230 }, { "epoch": 42.462, "grad_norm": 1.1775398254394531, "learning_rate": 2e-05, "loss": 0.05408292, "step": 21231 }, { "epoch": 42.464, "grad_norm": 1.1655455827713013, "learning_rate": 2e-05, "loss": 0.04785192, "step": 21232 }, { "epoch": 42.466, "grad_norm": 1.3385460376739502, "learning_rate": 2e-05, "loss": 0.04888346, "step": 21233 }, { "epoch": 42.468, "grad_norm": 1.5374442338943481, "learning_rate": 2e-05, "loss": 0.048363, "step": 21234 }, { "epoch": 42.47, "grad_norm": 1.1293838024139404, "learning_rate": 2e-05, "loss": 0.03480672, "step": 21235 }, { "epoch": 42.472, "grad_norm": 1.4131519794464111, "learning_rate": 2e-05, "loss": 0.059271, "step": 21236 }, { "epoch": 42.474, "grad_norm": 1.091775894165039, "learning_rate": 2e-05, "loss": 0.04225942, "step": 21237 }, { "epoch": 42.476, "grad_norm": 1.5270899534225464, "learning_rate": 2e-05, "loss": 0.05104741, "step": 21238 }, { "epoch": 42.478, "grad_norm": 1.4533936977386475, "learning_rate": 2e-05, "loss": 0.04931673, "step": 21239 }, { "epoch": 42.48, "grad_norm": 1.187991738319397, "learning_rate": 2e-05, "loss": 0.05418856, "step": 21240 }, { "epoch": 42.482, "grad_norm": 1.3054866790771484, "learning_rate": 2e-05, "loss": 0.05543121, "step": 21241 }, { "epoch": 42.484, "grad_norm": 1.1094039678573608, "learning_rate": 2e-05, "loss": 0.03574315, "step": 21242 }, { "epoch": 42.486, "grad_norm": 1.1816613674163818, "learning_rate": 2e-05, "loss": 0.04213757, "step": 21243 }, { "epoch": 42.488, "grad_norm": 1.0031068325042725, "learning_rate": 2e-05, "loss": 0.03389908, "step": 21244 }, { "epoch": 42.49, "grad_norm": 1.4081437587738037, "learning_rate": 2e-05, "loss": 0.05755498, "step": 21245 }, { "epoch": 42.492, "grad_norm": 1.0443217754364014, "learning_rate": 2e-05, "loss": 0.0272135, "step": 21246 }, { "epoch": 42.494, "grad_norm": 2.372973918914795, "learning_rate": 2e-05, "loss": 0.0727305, "step": 21247 }, { "epoch": 42.496, "grad_norm": 1.1258074045181274, "learning_rate": 2e-05, "loss": 0.04463324, "step": 21248 }, { "epoch": 42.498, "grad_norm": 1.0638490915298462, "learning_rate": 2e-05, "loss": 0.04064821, "step": 21249 }, { "epoch": 42.5, "grad_norm": 2.074948787689209, "learning_rate": 2e-05, "loss": 0.04368262, "step": 21250 }, { "epoch": 42.502, "grad_norm": 1.1462494134902954, "learning_rate": 2e-05, "loss": 0.05238108, "step": 21251 }, { "epoch": 42.504, "grad_norm": 1.1089760065078735, "learning_rate": 2e-05, "loss": 0.05240737, "step": 21252 }, { "epoch": 42.506, "grad_norm": 1.010260820388794, "learning_rate": 2e-05, "loss": 0.0344992, "step": 21253 }, { "epoch": 42.508, "grad_norm": 1.2549633979797363, "learning_rate": 2e-05, "loss": 0.05022692, "step": 21254 }, { "epoch": 42.51, "grad_norm": 1.307962417602539, "learning_rate": 2e-05, "loss": 0.05273689, "step": 21255 }, { "epoch": 42.512, "grad_norm": 1.516259789466858, "learning_rate": 2e-05, "loss": 0.03713296, "step": 21256 }, { "epoch": 42.514, "grad_norm": 1.1646299362182617, "learning_rate": 2e-05, "loss": 0.04224031, "step": 21257 }, { "epoch": 42.516, "grad_norm": 1.3015379905700684, "learning_rate": 2e-05, "loss": 0.05681949, "step": 21258 }, { "epoch": 42.518, "grad_norm": 1.2873107194900513, "learning_rate": 2e-05, "loss": 0.06247327, "step": 21259 }, { "epoch": 42.52, "grad_norm": 1.2081667184829712, "learning_rate": 2e-05, "loss": 0.04261228, "step": 21260 }, { "epoch": 42.522, "grad_norm": 2.0166215896606445, "learning_rate": 2e-05, "loss": 0.05025642, "step": 21261 }, { "epoch": 42.524, "grad_norm": 1.4932124614715576, "learning_rate": 2e-05, "loss": 0.04905674, "step": 21262 }, { "epoch": 42.526, "grad_norm": 1.2398021221160889, "learning_rate": 2e-05, "loss": 0.0445546, "step": 21263 }, { "epoch": 42.528, "grad_norm": 7.126203536987305, "learning_rate": 2e-05, "loss": 0.05969048, "step": 21264 }, { "epoch": 42.53, "grad_norm": 1.2210969924926758, "learning_rate": 2e-05, "loss": 0.03673073, "step": 21265 }, { "epoch": 42.532, "grad_norm": 1.105908751487732, "learning_rate": 2e-05, "loss": 0.04691067, "step": 21266 }, { "epoch": 42.534, "grad_norm": 1.393251657485962, "learning_rate": 2e-05, "loss": 0.04946836, "step": 21267 }, { "epoch": 42.536, "grad_norm": 1.284510850906372, "learning_rate": 2e-05, "loss": 0.04197853, "step": 21268 }, { "epoch": 42.538, "grad_norm": 1.0972687005996704, "learning_rate": 2e-05, "loss": 0.03820524, "step": 21269 }, { "epoch": 42.54, "grad_norm": 1.1322238445281982, "learning_rate": 2e-05, "loss": 0.04426937, "step": 21270 }, { "epoch": 42.542, "grad_norm": 2.50138783454895, "learning_rate": 2e-05, "loss": 0.06171612, "step": 21271 }, { "epoch": 42.544, "grad_norm": 1.0176743268966675, "learning_rate": 2e-05, "loss": 0.03656572, "step": 21272 }, { "epoch": 42.546, "grad_norm": 1.3137876987457275, "learning_rate": 2e-05, "loss": 0.05632983, "step": 21273 }, { "epoch": 42.548, "grad_norm": 2.6000397205352783, "learning_rate": 2e-05, "loss": 0.04389685, "step": 21274 }, { "epoch": 42.55, "grad_norm": 2.7255218029022217, "learning_rate": 2e-05, "loss": 0.06559823, "step": 21275 }, { "epoch": 42.552, "grad_norm": 1.1543296575546265, "learning_rate": 2e-05, "loss": 0.05046273, "step": 21276 }, { "epoch": 42.554, "grad_norm": 1.017396092414856, "learning_rate": 2e-05, "loss": 0.03115503, "step": 21277 }, { "epoch": 42.556, "grad_norm": 1.3216530084609985, "learning_rate": 2e-05, "loss": 0.06245044, "step": 21278 }, { "epoch": 42.558, "grad_norm": 12.367131233215332, "learning_rate": 2e-05, "loss": 0.05980074, "step": 21279 }, { "epoch": 42.56, "grad_norm": 1.1518374681472778, "learning_rate": 2e-05, "loss": 0.03418801, "step": 21280 }, { "epoch": 42.562, "grad_norm": 2.031050443649292, "learning_rate": 2e-05, "loss": 0.04848942, "step": 21281 }, { "epoch": 42.564, "grad_norm": 1.0938751697540283, "learning_rate": 2e-05, "loss": 0.0451759, "step": 21282 }, { "epoch": 42.566, "grad_norm": 1.1018202304840088, "learning_rate": 2e-05, "loss": 0.04848454, "step": 21283 }, { "epoch": 42.568, "grad_norm": 1.2324109077453613, "learning_rate": 2e-05, "loss": 0.04794756, "step": 21284 }, { "epoch": 42.57, "grad_norm": 2.7218592166900635, "learning_rate": 2e-05, "loss": 0.0646372, "step": 21285 }, { "epoch": 42.572, "grad_norm": 1.2674838304519653, "learning_rate": 2e-05, "loss": 0.04550756, "step": 21286 }, { "epoch": 42.574, "grad_norm": 1.5812036991119385, "learning_rate": 2e-05, "loss": 0.05703519, "step": 21287 }, { "epoch": 42.576, "grad_norm": 1.1205166578292847, "learning_rate": 2e-05, "loss": 0.036104, "step": 21288 }, { "epoch": 42.578, "grad_norm": 1.0537066459655762, "learning_rate": 2e-05, "loss": 0.03754063, "step": 21289 }, { "epoch": 42.58, "grad_norm": 1.2235515117645264, "learning_rate": 2e-05, "loss": 0.05299861, "step": 21290 }, { "epoch": 42.582, "grad_norm": 1.3739643096923828, "learning_rate": 2e-05, "loss": 0.0461156, "step": 21291 }, { "epoch": 42.584, "grad_norm": 1.5320465564727783, "learning_rate": 2e-05, "loss": 0.04836542, "step": 21292 }, { "epoch": 42.586, "grad_norm": 0.9427246451377869, "learning_rate": 2e-05, "loss": 0.0362893, "step": 21293 }, { "epoch": 42.588, "grad_norm": 1.3789801597595215, "learning_rate": 2e-05, "loss": 0.06239626, "step": 21294 }, { "epoch": 42.59, "grad_norm": 1.258785605430603, "learning_rate": 2e-05, "loss": 0.05440217, "step": 21295 }, { "epoch": 42.592, "grad_norm": 1.2506073713302612, "learning_rate": 2e-05, "loss": 0.04535231, "step": 21296 }, { "epoch": 42.594, "grad_norm": 1.300620436668396, "learning_rate": 2e-05, "loss": 0.0481655, "step": 21297 }, { "epoch": 42.596, "grad_norm": 1.1247934103012085, "learning_rate": 2e-05, "loss": 0.03969868, "step": 21298 }, { "epoch": 42.598, "grad_norm": 1.2077419757843018, "learning_rate": 2e-05, "loss": 0.04599006, "step": 21299 }, { "epoch": 42.6, "grad_norm": 4.433143615722656, "learning_rate": 2e-05, "loss": 0.05563483, "step": 21300 }, { "epoch": 42.602, "grad_norm": 1.223757266998291, "learning_rate": 2e-05, "loss": 0.04834705, "step": 21301 }, { "epoch": 42.604, "grad_norm": 1.001859426498413, "learning_rate": 2e-05, "loss": 0.03891331, "step": 21302 }, { "epoch": 42.606, "grad_norm": 1.4744184017181396, "learning_rate": 2e-05, "loss": 0.04635298, "step": 21303 }, { "epoch": 42.608, "grad_norm": 1.0292778015136719, "learning_rate": 2e-05, "loss": 0.03995837, "step": 21304 }, { "epoch": 42.61, "grad_norm": 1.2195087671279907, "learning_rate": 2e-05, "loss": 0.04556499, "step": 21305 }, { "epoch": 42.612, "grad_norm": 2.625493049621582, "learning_rate": 2e-05, "loss": 0.0414276, "step": 21306 }, { "epoch": 42.614, "grad_norm": 1.3295499086380005, "learning_rate": 2e-05, "loss": 0.05958395, "step": 21307 }, { "epoch": 42.616, "grad_norm": 2.4811103343963623, "learning_rate": 2e-05, "loss": 0.08586189, "step": 21308 }, { "epoch": 42.618, "grad_norm": 0.9455074667930603, "learning_rate": 2e-05, "loss": 0.02827884, "step": 21309 }, { "epoch": 42.62, "grad_norm": 1.2621259689331055, "learning_rate": 2e-05, "loss": 0.04555801, "step": 21310 }, { "epoch": 42.622, "grad_norm": 1.1620298624038696, "learning_rate": 2e-05, "loss": 0.0330307, "step": 21311 }, { "epoch": 42.624, "grad_norm": 1.165947437286377, "learning_rate": 2e-05, "loss": 0.04790859, "step": 21312 }, { "epoch": 42.626, "grad_norm": 2.1621484756469727, "learning_rate": 2e-05, "loss": 0.06904516, "step": 21313 }, { "epoch": 42.628, "grad_norm": 1.2981723546981812, "learning_rate": 2e-05, "loss": 0.04969555, "step": 21314 }, { "epoch": 42.63, "grad_norm": 1.206282377243042, "learning_rate": 2e-05, "loss": 0.05298881, "step": 21315 }, { "epoch": 42.632, "grad_norm": 1.1706401109695435, "learning_rate": 2e-05, "loss": 0.04146463, "step": 21316 }, { "epoch": 42.634, "grad_norm": 1.349630355834961, "learning_rate": 2e-05, "loss": 0.05339145, "step": 21317 }, { "epoch": 42.636, "grad_norm": 1.3318768739700317, "learning_rate": 2e-05, "loss": 0.04014498, "step": 21318 }, { "epoch": 42.638, "grad_norm": 1.787174105644226, "learning_rate": 2e-05, "loss": 0.07187255, "step": 21319 }, { "epoch": 42.64, "grad_norm": 1.0085779428482056, "learning_rate": 2e-05, "loss": 0.03416509, "step": 21320 }, { "epoch": 42.642, "grad_norm": 1.366184115409851, "learning_rate": 2e-05, "loss": 0.05623956, "step": 21321 }, { "epoch": 42.644, "grad_norm": 1.2050182819366455, "learning_rate": 2e-05, "loss": 0.04033398, "step": 21322 }, { "epoch": 42.646, "grad_norm": 1.4368079900741577, "learning_rate": 2e-05, "loss": 0.04438941, "step": 21323 }, { "epoch": 42.648, "grad_norm": 1.231373906135559, "learning_rate": 2e-05, "loss": 0.04198126, "step": 21324 }, { "epoch": 42.65, "grad_norm": 3.3663556575775146, "learning_rate": 2e-05, "loss": 0.05418935, "step": 21325 }, { "epoch": 42.652, "grad_norm": 1.528099536895752, "learning_rate": 2e-05, "loss": 0.06321813, "step": 21326 }, { "epoch": 42.654, "grad_norm": 1.117147445678711, "learning_rate": 2e-05, "loss": 0.0353119, "step": 21327 }, { "epoch": 42.656, "grad_norm": 1.3439724445343018, "learning_rate": 2e-05, "loss": 0.05230685, "step": 21328 }, { "epoch": 42.658, "grad_norm": 1.0160696506500244, "learning_rate": 2e-05, "loss": 0.04300717, "step": 21329 }, { "epoch": 42.66, "grad_norm": 2.5469577312469482, "learning_rate": 2e-05, "loss": 0.04865529, "step": 21330 }, { "epoch": 42.662, "grad_norm": 1.6859334707260132, "learning_rate": 2e-05, "loss": 0.04481193, "step": 21331 }, { "epoch": 42.664, "grad_norm": 1.0845061540603638, "learning_rate": 2e-05, "loss": 0.03940871, "step": 21332 }, { "epoch": 42.666, "grad_norm": 3.1730074882507324, "learning_rate": 2e-05, "loss": 0.06717253, "step": 21333 }, { "epoch": 42.668, "grad_norm": 0.7828595638275146, "learning_rate": 2e-05, "loss": 0.02153579, "step": 21334 }, { "epoch": 42.67, "grad_norm": 1.4767457246780396, "learning_rate": 2e-05, "loss": 0.05914707, "step": 21335 }, { "epoch": 42.672, "grad_norm": 1.4632948637008667, "learning_rate": 2e-05, "loss": 0.0467616, "step": 21336 }, { "epoch": 42.674, "grad_norm": 1.245827078819275, "learning_rate": 2e-05, "loss": 0.04130943, "step": 21337 }, { "epoch": 42.676, "grad_norm": 1.1984535455703735, "learning_rate": 2e-05, "loss": 0.05097179, "step": 21338 }, { "epoch": 42.678, "grad_norm": 1.018325686454773, "learning_rate": 2e-05, "loss": 0.04212132, "step": 21339 }, { "epoch": 42.68, "grad_norm": 2.334608316421509, "learning_rate": 2e-05, "loss": 0.0457168, "step": 21340 }, { "epoch": 42.682, "grad_norm": 1.2345631122589111, "learning_rate": 2e-05, "loss": 0.04911385, "step": 21341 }, { "epoch": 42.684, "grad_norm": 1.1206731796264648, "learning_rate": 2e-05, "loss": 0.0280387, "step": 21342 }, { "epoch": 42.686, "grad_norm": 1.4151371717453003, "learning_rate": 2e-05, "loss": 0.06245557, "step": 21343 }, { "epoch": 42.688, "grad_norm": 1.2239195108413696, "learning_rate": 2e-05, "loss": 0.04748405, "step": 21344 }, { "epoch": 42.69, "grad_norm": 1.0787338018417358, "learning_rate": 2e-05, "loss": 0.03307759, "step": 21345 }, { "epoch": 42.692, "grad_norm": 1.2297214269638062, "learning_rate": 2e-05, "loss": 0.05256373, "step": 21346 }, { "epoch": 42.694, "grad_norm": 1.2633470296859741, "learning_rate": 2e-05, "loss": 0.05449621, "step": 21347 }, { "epoch": 42.696, "grad_norm": 1.178499460220337, "learning_rate": 2e-05, "loss": 0.06108705, "step": 21348 }, { "epoch": 42.698, "grad_norm": 1.030000925064087, "learning_rate": 2e-05, "loss": 0.03802224, "step": 21349 }, { "epoch": 42.7, "grad_norm": 1.2210856676101685, "learning_rate": 2e-05, "loss": 0.04589963, "step": 21350 }, { "epoch": 42.702, "grad_norm": 2.0504300594329834, "learning_rate": 2e-05, "loss": 0.06983016, "step": 21351 }, { "epoch": 42.704, "grad_norm": 1.2079181671142578, "learning_rate": 2e-05, "loss": 0.04810051, "step": 21352 }, { "epoch": 42.706, "grad_norm": 1.1589932441711426, "learning_rate": 2e-05, "loss": 0.04745119, "step": 21353 }, { "epoch": 42.708, "grad_norm": 4.482708930969238, "learning_rate": 2e-05, "loss": 0.0591661, "step": 21354 }, { "epoch": 42.71, "grad_norm": 1.2510643005371094, "learning_rate": 2e-05, "loss": 0.05218077, "step": 21355 }, { "epoch": 42.712, "grad_norm": 1.295291543006897, "learning_rate": 2e-05, "loss": 0.05780672, "step": 21356 }, { "epoch": 42.714, "grad_norm": 1.1533124446868896, "learning_rate": 2e-05, "loss": 0.03984768, "step": 21357 }, { "epoch": 42.716, "grad_norm": 3.040389060974121, "learning_rate": 2e-05, "loss": 0.04302094, "step": 21358 }, { "epoch": 42.718, "grad_norm": 1.2422996759414673, "learning_rate": 2e-05, "loss": 0.03962889, "step": 21359 }, { "epoch": 42.72, "grad_norm": 1.4480630159378052, "learning_rate": 2e-05, "loss": 0.06389065, "step": 21360 }, { "epoch": 42.722, "grad_norm": 1.3611098527908325, "learning_rate": 2e-05, "loss": 0.0595374, "step": 21361 }, { "epoch": 42.724, "grad_norm": 1.2345424890518188, "learning_rate": 2e-05, "loss": 0.03755029, "step": 21362 }, { "epoch": 42.726, "grad_norm": 1.8066527843475342, "learning_rate": 2e-05, "loss": 0.05974579, "step": 21363 }, { "epoch": 42.728, "grad_norm": 1.1717407703399658, "learning_rate": 2e-05, "loss": 0.04459912, "step": 21364 }, { "epoch": 42.73, "grad_norm": 1.143455982208252, "learning_rate": 2e-05, "loss": 0.04196816, "step": 21365 }, { "epoch": 42.732, "grad_norm": 1.2013553380966187, "learning_rate": 2e-05, "loss": 0.03594541, "step": 21366 }, { "epoch": 42.734, "grad_norm": 1.1833301782608032, "learning_rate": 2e-05, "loss": 0.04475646, "step": 21367 }, { "epoch": 42.736, "grad_norm": 1.0608525276184082, "learning_rate": 2e-05, "loss": 0.0312412, "step": 21368 }, { "epoch": 42.738, "grad_norm": 1.3239516019821167, "learning_rate": 2e-05, "loss": 0.05588631, "step": 21369 }, { "epoch": 42.74, "grad_norm": 1.2072561979293823, "learning_rate": 2e-05, "loss": 0.0523868, "step": 21370 }, { "epoch": 42.742, "grad_norm": 2.1657562255859375, "learning_rate": 2e-05, "loss": 0.0605137, "step": 21371 }, { "epoch": 42.744, "grad_norm": 1.158803105354309, "learning_rate": 2e-05, "loss": 0.04379743, "step": 21372 }, { "epoch": 42.746, "grad_norm": 1.2670451402664185, "learning_rate": 2e-05, "loss": 0.04156856, "step": 21373 }, { "epoch": 42.748, "grad_norm": 1.0824308395385742, "learning_rate": 2e-05, "loss": 0.04734112, "step": 21374 }, { "epoch": 42.75, "grad_norm": 1.4957252740859985, "learning_rate": 2e-05, "loss": 0.04622442, "step": 21375 }, { "epoch": 42.752, "grad_norm": 1.1113194227218628, "learning_rate": 2e-05, "loss": 0.03722591, "step": 21376 }, { "epoch": 42.754, "grad_norm": 1.513335108757019, "learning_rate": 2e-05, "loss": 0.05863171, "step": 21377 }, { "epoch": 42.756, "grad_norm": 1.463580846786499, "learning_rate": 2e-05, "loss": 0.0668181, "step": 21378 }, { "epoch": 42.758, "grad_norm": 1.4309571981430054, "learning_rate": 2e-05, "loss": 0.03635763, "step": 21379 }, { "epoch": 42.76, "grad_norm": 1.273354172706604, "learning_rate": 2e-05, "loss": 0.04854331, "step": 21380 }, { "epoch": 42.762, "grad_norm": 1.6273373365402222, "learning_rate": 2e-05, "loss": 0.05022759, "step": 21381 }, { "epoch": 42.764, "grad_norm": 1.2511389255523682, "learning_rate": 2e-05, "loss": 0.06143327, "step": 21382 }, { "epoch": 42.766, "grad_norm": 2.7621679306030273, "learning_rate": 2e-05, "loss": 0.06704356, "step": 21383 }, { "epoch": 42.768, "grad_norm": 1.5374120473861694, "learning_rate": 2e-05, "loss": 0.04694888, "step": 21384 }, { "epoch": 42.77, "grad_norm": 1.2802921533584595, "learning_rate": 2e-05, "loss": 0.05271628, "step": 21385 }, { "epoch": 42.772, "grad_norm": 1.4109792709350586, "learning_rate": 2e-05, "loss": 0.05482592, "step": 21386 }, { "epoch": 42.774, "grad_norm": 1.5210480690002441, "learning_rate": 2e-05, "loss": 0.05803951, "step": 21387 }, { "epoch": 42.776, "grad_norm": 1.1467036008834839, "learning_rate": 2e-05, "loss": 0.0471014, "step": 21388 }, { "epoch": 42.778, "grad_norm": 1.3119735717773438, "learning_rate": 2e-05, "loss": 0.0533366, "step": 21389 }, { "epoch": 42.78, "grad_norm": 1.1641420125961304, "learning_rate": 2e-05, "loss": 0.03897999, "step": 21390 }, { "epoch": 42.782, "grad_norm": 1.1525665521621704, "learning_rate": 2e-05, "loss": 0.04090577, "step": 21391 }, { "epoch": 42.784, "grad_norm": 1.2021316289901733, "learning_rate": 2e-05, "loss": 0.04947101, "step": 21392 }, { "epoch": 42.786, "grad_norm": 1.2240577936172485, "learning_rate": 2e-05, "loss": 0.05535308, "step": 21393 }, { "epoch": 42.788, "grad_norm": 1.2875986099243164, "learning_rate": 2e-05, "loss": 0.03962916, "step": 21394 }, { "epoch": 42.79, "grad_norm": 1.526619553565979, "learning_rate": 2e-05, "loss": 0.03942203, "step": 21395 }, { "epoch": 42.792, "grad_norm": 1.133887529373169, "learning_rate": 2e-05, "loss": 0.04529195, "step": 21396 }, { "epoch": 42.794, "grad_norm": 1.195630669593811, "learning_rate": 2e-05, "loss": 0.04189469, "step": 21397 }, { "epoch": 42.796, "grad_norm": 1.2251441478729248, "learning_rate": 2e-05, "loss": 0.04151393, "step": 21398 }, { "epoch": 42.798, "grad_norm": 1.0438613891601562, "learning_rate": 2e-05, "loss": 0.03437603, "step": 21399 }, { "epoch": 42.8, "grad_norm": 1.001349687576294, "learning_rate": 2e-05, "loss": 0.03562608, "step": 21400 }, { "epoch": 42.802, "grad_norm": 1.156879186630249, "learning_rate": 2e-05, "loss": 0.03687527, "step": 21401 }, { "epoch": 42.804, "grad_norm": 1.0168277025222778, "learning_rate": 2e-05, "loss": 0.04121634, "step": 21402 }, { "epoch": 42.806, "grad_norm": 1.6192265748977661, "learning_rate": 2e-05, "loss": 0.05773157, "step": 21403 }, { "epoch": 42.808, "grad_norm": 1.384158968925476, "learning_rate": 2e-05, "loss": 0.04894597, "step": 21404 }, { "epoch": 42.81, "grad_norm": 1.0740807056427002, "learning_rate": 2e-05, "loss": 0.03360681, "step": 21405 }, { "epoch": 42.812, "grad_norm": 1.36252760887146, "learning_rate": 2e-05, "loss": 0.04959939, "step": 21406 }, { "epoch": 42.814, "grad_norm": 1.1320419311523438, "learning_rate": 2e-05, "loss": 0.03762558, "step": 21407 }, { "epoch": 42.816, "grad_norm": 1.1921322345733643, "learning_rate": 2e-05, "loss": 0.04960704, "step": 21408 }, { "epoch": 42.818, "grad_norm": 1.363591194152832, "learning_rate": 2e-05, "loss": 0.04697926, "step": 21409 }, { "epoch": 42.82, "grad_norm": 1.330572485923767, "learning_rate": 2e-05, "loss": 0.03431287, "step": 21410 }, { "epoch": 42.822, "grad_norm": 1.108099102973938, "learning_rate": 2e-05, "loss": 0.04299867, "step": 21411 }, { "epoch": 42.824, "grad_norm": 1.28284752368927, "learning_rate": 2e-05, "loss": 0.05352758, "step": 21412 }, { "epoch": 42.826, "grad_norm": 1.2229121923446655, "learning_rate": 2e-05, "loss": 0.0591327, "step": 21413 }, { "epoch": 42.828, "grad_norm": 1.152987003326416, "learning_rate": 2e-05, "loss": 0.03737348, "step": 21414 }, { "epoch": 42.83, "grad_norm": 1.1505484580993652, "learning_rate": 2e-05, "loss": 0.04128478, "step": 21415 }, { "epoch": 42.832, "grad_norm": 1.2108104228973389, "learning_rate": 2e-05, "loss": 0.0475995, "step": 21416 }, { "epoch": 42.834, "grad_norm": 1.315833330154419, "learning_rate": 2e-05, "loss": 0.04457718, "step": 21417 }, { "epoch": 42.836, "grad_norm": 1.3256268501281738, "learning_rate": 2e-05, "loss": 0.04045897, "step": 21418 }, { "epoch": 42.838, "grad_norm": 2.0043587684631348, "learning_rate": 2e-05, "loss": 0.05085915, "step": 21419 }, { "epoch": 42.84, "grad_norm": 1.0381031036376953, "learning_rate": 2e-05, "loss": 0.03765831, "step": 21420 }, { "epoch": 42.842, "grad_norm": 1.3118317127227783, "learning_rate": 2e-05, "loss": 0.05129515, "step": 21421 }, { "epoch": 42.844, "grad_norm": 1.2408820390701294, "learning_rate": 2e-05, "loss": 0.05141382, "step": 21422 }, { "epoch": 42.846, "grad_norm": 1.1298905611038208, "learning_rate": 2e-05, "loss": 0.03599843, "step": 21423 }, { "epoch": 42.848, "grad_norm": 1.4461491107940674, "learning_rate": 2e-05, "loss": 0.06431194, "step": 21424 }, { "epoch": 42.85, "grad_norm": 1.4541326761245728, "learning_rate": 2e-05, "loss": 0.0566168, "step": 21425 }, { "epoch": 42.852, "grad_norm": 1.6744414567947388, "learning_rate": 2e-05, "loss": 0.04111119, "step": 21426 }, { "epoch": 42.854, "grad_norm": 1.2035828828811646, "learning_rate": 2e-05, "loss": 0.05423025, "step": 21427 }, { "epoch": 42.856, "grad_norm": 3.0272951126098633, "learning_rate": 2e-05, "loss": 0.05712648, "step": 21428 }, { "epoch": 42.858, "grad_norm": 0.9234744310379028, "learning_rate": 2e-05, "loss": 0.03638177, "step": 21429 }, { "epoch": 42.86, "grad_norm": 1.2166481018066406, "learning_rate": 2e-05, "loss": 0.04876867, "step": 21430 }, { "epoch": 42.862, "grad_norm": 1.454555630683899, "learning_rate": 2e-05, "loss": 0.06422681, "step": 21431 }, { "epoch": 42.864, "grad_norm": 1.2168675661087036, "learning_rate": 2e-05, "loss": 0.05015201, "step": 21432 }, { "epoch": 42.866, "grad_norm": 1.659549355506897, "learning_rate": 2e-05, "loss": 0.03892962, "step": 21433 }, { "epoch": 42.868, "grad_norm": 1.268166422843933, "learning_rate": 2e-05, "loss": 0.04936218, "step": 21434 }, { "epoch": 42.87, "grad_norm": 1.0668729543685913, "learning_rate": 2e-05, "loss": 0.03783253, "step": 21435 }, { "epoch": 42.872, "grad_norm": 1.1927694082260132, "learning_rate": 2e-05, "loss": 0.03492489, "step": 21436 }, { "epoch": 42.874, "grad_norm": 1.2407193183898926, "learning_rate": 2e-05, "loss": 0.04802696, "step": 21437 }, { "epoch": 42.876, "grad_norm": 1.156215786933899, "learning_rate": 2e-05, "loss": 0.0512966, "step": 21438 }, { "epoch": 42.878, "grad_norm": 1.2110116481781006, "learning_rate": 2e-05, "loss": 0.03545962, "step": 21439 }, { "epoch": 42.88, "grad_norm": 1.1005175113677979, "learning_rate": 2e-05, "loss": 0.04559709, "step": 21440 }, { "epoch": 42.882, "grad_norm": 1.329023003578186, "learning_rate": 2e-05, "loss": 0.0600805, "step": 21441 }, { "epoch": 42.884, "grad_norm": 1.2809315919876099, "learning_rate": 2e-05, "loss": 0.03948345, "step": 21442 }, { "epoch": 42.886, "grad_norm": 1.0475729703903198, "learning_rate": 2e-05, "loss": 0.02779887, "step": 21443 }, { "epoch": 42.888, "grad_norm": 1.2448220252990723, "learning_rate": 2e-05, "loss": 0.05767167, "step": 21444 }, { "epoch": 42.89, "grad_norm": 1.1443229913711548, "learning_rate": 2e-05, "loss": 0.05177973, "step": 21445 }, { "epoch": 42.892, "grad_norm": 1.5171314477920532, "learning_rate": 2e-05, "loss": 0.04922928, "step": 21446 }, { "epoch": 42.894, "grad_norm": 1.217365026473999, "learning_rate": 2e-05, "loss": 0.05303325, "step": 21447 }, { "epoch": 42.896, "grad_norm": 1.2820359468460083, "learning_rate": 2e-05, "loss": 0.06319527, "step": 21448 }, { "epoch": 42.898, "grad_norm": 1.3315848112106323, "learning_rate": 2e-05, "loss": 0.05307559, "step": 21449 }, { "epoch": 42.9, "grad_norm": 1.4399700164794922, "learning_rate": 2e-05, "loss": 0.0532106, "step": 21450 }, { "epoch": 42.902, "grad_norm": 1.1903473138809204, "learning_rate": 2e-05, "loss": 0.03630826, "step": 21451 }, { "epoch": 42.904, "grad_norm": 1.3730727434158325, "learning_rate": 2e-05, "loss": 0.04516565, "step": 21452 }, { "epoch": 42.906, "grad_norm": 1.2712746858596802, "learning_rate": 2e-05, "loss": 0.04588057, "step": 21453 }, { "epoch": 42.908, "grad_norm": 1.266530990600586, "learning_rate": 2e-05, "loss": 0.05008287, "step": 21454 }, { "epoch": 42.91, "grad_norm": 1.0627597570419312, "learning_rate": 2e-05, "loss": 0.03308658, "step": 21455 }, { "epoch": 42.912, "grad_norm": 1.8096920251846313, "learning_rate": 2e-05, "loss": 0.04926098, "step": 21456 }, { "epoch": 42.914, "grad_norm": 1.3828647136688232, "learning_rate": 2e-05, "loss": 0.05100198, "step": 21457 }, { "epoch": 42.916, "grad_norm": 1.3538262844085693, "learning_rate": 2e-05, "loss": 0.05305772, "step": 21458 }, { "epoch": 42.918, "grad_norm": 1.327880620956421, "learning_rate": 2e-05, "loss": 0.05849855, "step": 21459 }, { "epoch": 42.92, "grad_norm": 2.1985175609588623, "learning_rate": 2e-05, "loss": 0.04045462, "step": 21460 }, { "epoch": 42.922, "grad_norm": 1.3796802759170532, "learning_rate": 2e-05, "loss": 0.05500578, "step": 21461 }, { "epoch": 42.924, "grad_norm": 1.4442265033721924, "learning_rate": 2e-05, "loss": 0.06637794, "step": 21462 }, { "epoch": 42.926, "grad_norm": 1.2436082363128662, "learning_rate": 2e-05, "loss": 0.05103221, "step": 21463 }, { "epoch": 42.928, "grad_norm": 1.0881454944610596, "learning_rate": 2e-05, "loss": 0.05290727, "step": 21464 }, { "epoch": 42.93, "grad_norm": 1.2928407192230225, "learning_rate": 2e-05, "loss": 0.06411622, "step": 21465 }, { "epoch": 42.932, "grad_norm": 3.3758764266967773, "learning_rate": 2e-05, "loss": 0.05194551, "step": 21466 }, { "epoch": 42.934, "grad_norm": 1.0337151288986206, "learning_rate": 2e-05, "loss": 0.0436775, "step": 21467 }, { "epoch": 42.936, "grad_norm": 1.7715908288955688, "learning_rate": 2e-05, "loss": 0.05398581, "step": 21468 }, { "epoch": 42.938, "grad_norm": 1.2396409511566162, "learning_rate": 2e-05, "loss": 0.06144951, "step": 21469 }, { "epoch": 42.94, "grad_norm": 1.1984061002731323, "learning_rate": 2e-05, "loss": 0.04839371, "step": 21470 }, { "epoch": 42.942, "grad_norm": 1.130316138267517, "learning_rate": 2e-05, "loss": 0.05355977, "step": 21471 }, { "epoch": 42.944, "grad_norm": 1.217844009399414, "learning_rate": 2e-05, "loss": 0.04676696, "step": 21472 }, { "epoch": 42.946, "grad_norm": 1.0561518669128418, "learning_rate": 2e-05, "loss": 0.0412074, "step": 21473 }, { "epoch": 42.948, "grad_norm": 1.0529133081436157, "learning_rate": 2e-05, "loss": 0.03983943, "step": 21474 }, { "epoch": 42.95, "grad_norm": 1.1867733001708984, "learning_rate": 2e-05, "loss": 0.05299602, "step": 21475 }, { "epoch": 42.952, "grad_norm": 1.284942865371704, "learning_rate": 2e-05, "loss": 0.04374795, "step": 21476 }, { "epoch": 42.954, "grad_norm": 1.425246000289917, "learning_rate": 2e-05, "loss": 0.07214919, "step": 21477 }, { "epoch": 42.956, "grad_norm": 1.3944209814071655, "learning_rate": 2e-05, "loss": 0.06095017, "step": 21478 }, { "epoch": 42.958, "grad_norm": 1.2820805311203003, "learning_rate": 2e-05, "loss": 0.039196, "step": 21479 }, { "epoch": 42.96, "grad_norm": 1.0585222244262695, "learning_rate": 2e-05, "loss": 0.04182017, "step": 21480 }, { "epoch": 42.962, "grad_norm": 1.012815237045288, "learning_rate": 2e-05, "loss": 0.03696268, "step": 21481 }, { "epoch": 42.964, "grad_norm": 1.024418592453003, "learning_rate": 2e-05, "loss": 0.03427555, "step": 21482 }, { "epoch": 42.966, "grad_norm": 1.316025972366333, "learning_rate": 2e-05, "loss": 0.06875788, "step": 21483 }, { "epoch": 42.968, "grad_norm": 1.1476069688796997, "learning_rate": 2e-05, "loss": 0.05424814, "step": 21484 }, { "epoch": 42.97, "grad_norm": 1.8176919221878052, "learning_rate": 2e-05, "loss": 0.06173361, "step": 21485 }, { "epoch": 42.972, "grad_norm": 1.1928600072860718, "learning_rate": 2e-05, "loss": 0.05281177, "step": 21486 }, { "epoch": 42.974, "grad_norm": 0.9522770047187805, "learning_rate": 2e-05, "loss": 0.03397568, "step": 21487 }, { "epoch": 42.976, "grad_norm": 1.3486912250518799, "learning_rate": 2e-05, "loss": 0.05722672, "step": 21488 }, { "epoch": 42.978, "grad_norm": 1.3525702953338623, "learning_rate": 2e-05, "loss": 0.05092058, "step": 21489 }, { "epoch": 42.98, "grad_norm": 1.5690195560455322, "learning_rate": 2e-05, "loss": 0.0533328, "step": 21490 }, { "epoch": 42.982, "grad_norm": 1.4370635747909546, "learning_rate": 2e-05, "loss": 0.06423539, "step": 21491 }, { "epoch": 42.984, "grad_norm": 1.0913952589035034, "learning_rate": 2e-05, "loss": 0.03920151, "step": 21492 }, { "epoch": 42.986, "grad_norm": 1.426624059677124, "learning_rate": 2e-05, "loss": 0.07058513, "step": 21493 }, { "epoch": 42.988, "grad_norm": 1.0381181240081787, "learning_rate": 2e-05, "loss": 0.03804436, "step": 21494 }, { "epoch": 42.99, "grad_norm": 0.9893444776535034, "learning_rate": 2e-05, "loss": 0.03246005, "step": 21495 }, { "epoch": 42.992, "grad_norm": 1.4209214448928833, "learning_rate": 2e-05, "loss": 0.05637572, "step": 21496 }, { "epoch": 42.994, "grad_norm": 1.1668082475662231, "learning_rate": 2e-05, "loss": 0.04799629, "step": 21497 }, { "epoch": 42.996, "grad_norm": 1.103164792060852, "learning_rate": 2e-05, "loss": 0.04127581, "step": 21498 }, { "epoch": 42.998, "grad_norm": 2.728940725326538, "learning_rate": 2e-05, "loss": 0.05009279, "step": 21499 }, { "epoch": 43.0, "grad_norm": 1.1477065086364746, "learning_rate": 2e-05, "loss": 0.03524557, "step": 21500 }, { "epoch": 43.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9860279441117764, "Equal_1": 1.0, "Equal_2": 0.9860279441117764, "Equal_3": 0.9920159680638723, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.99, "Perpendicular_1": 0.998, "Perpendicular_2": 0.994, "Perpendicular_3": 0.9018036072144289, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 319.7331, "eval_samples_per_second": 32.84, "eval_steps_per_second": 0.657, "step": 21500 }, { "epoch": 43.002, "grad_norm": 1.0772887468338013, "learning_rate": 2e-05, "loss": 0.04629757, "step": 21501 }, { "epoch": 43.004, "grad_norm": 1.0623633861541748, "learning_rate": 2e-05, "loss": 0.03715014, "step": 21502 }, { "epoch": 43.006, "grad_norm": 1.1207759380340576, "learning_rate": 2e-05, "loss": 0.05135489, "step": 21503 }, { "epoch": 43.008, "grad_norm": 1.6616121530532837, "learning_rate": 2e-05, "loss": 0.04297728, "step": 21504 }, { "epoch": 43.01, "grad_norm": 1.3650739192962646, "learning_rate": 2e-05, "loss": 0.05856933, "step": 21505 }, { "epoch": 43.012, "grad_norm": 1.8282989263534546, "learning_rate": 2e-05, "loss": 0.04885706, "step": 21506 }, { "epoch": 43.014, "grad_norm": 1.361122727394104, "learning_rate": 2e-05, "loss": 0.05703342, "step": 21507 }, { "epoch": 43.016, "grad_norm": 1.263135313987732, "learning_rate": 2e-05, "loss": 0.04379937, "step": 21508 }, { "epoch": 43.018, "grad_norm": 1.3780148029327393, "learning_rate": 2e-05, "loss": 0.05682152, "step": 21509 }, { "epoch": 43.02, "grad_norm": 1.3219780921936035, "learning_rate": 2e-05, "loss": 0.0547793, "step": 21510 }, { "epoch": 43.022, "grad_norm": 1.2654283046722412, "learning_rate": 2e-05, "loss": 0.047323, "step": 21511 }, { "epoch": 43.024, "grad_norm": 1.272671103477478, "learning_rate": 2e-05, "loss": 0.04931159, "step": 21512 }, { "epoch": 43.026, "grad_norm": 1.1386663913726807, "learning_rate": 2e-05, "loss": 0.03507162, "step": 21513 }, { "epoch": 43.028, "grad_norm": 1.292199730873108, "learning_rate": 2e-05, "loss": 0.04890214, "step": 21514 }, { "epoch": 43.03, "grad_norm": 1.5617351531982422, "learning_rate": 2e-05, "loss": 0.05029353, "step": 21515 }, { "epoch": 43.032, "grad_norm": 2.036496639251709, "learning_rate": 2e-05, "loss": 0.07027332, "step": 21516 }, { "epoch": 43.034, "grad_norm": 0.9926701784133911, "learning_rate": 2e-05, "loss": 0.03244736, "step": 21517 }, { "epoch": 43.036, "grad_norm": 1.0814783573150635, "learning_rate": 2e-05, "loss": 0.04349055, "step": 21518 }, { "epoch": 43.038, "grad_norm": 1.1061608791351318, "learning_rate": 2e-05, "loss": 0.04114666, "step": 21519 }, { "epoch": 43.04, "grad_norm": 1.1178923845291138, "learning_rate": 2e-05, "loss": 0.04589276, "step": 21520 }, { "epoch": 43.042, "grad_norm": 1.3732867240905762, "learning_rate": 2e-05, "loss": 0.0422243, "step": 21521 }, { "epoch": 43.044, "grad_norm": 2.6664786338806152, "learning_rate": 2e-05, "loss": 0.06500043, "step": 21522 }, { "epoch": 43.046, "grad_norm": 1.1350311040878296, "learning_rate": 2e-05, "loss": 0.03851867, "step": 21523 }, { "epoch": 43.048, "grad_norm": 1.0867100954055786, "learning_rate": 2e-05, "loss": 0.04170727, "step": 21524 }, { "epoch": 43.05, "grad_norm": 1.3278635740280151, "learning_rate": 2e-05, "loss": 0.05376638, "step": 21525 }, { "epoch": 43.052, "grad_norm": 1.2718002796173096, "learning_rate": 2e-05, "loss": 0.06238486, "step": 21526 }, { "epoch": 43.054, "grad_norm": 1.2181652784347534, "learning_rate": 2e-05, "loss": 0.05618663, "step": 21527 }, { "epoch": 43.056, "grad_norm": 1.2053866386413574, "learning_rate": 2e-05, "loss": 0.05018714, "step": 21528 }, { "epoch": 43.058, "grad_norm": 1.7344564199447632, "learning_rate": 2e-05, "loss": 0.05830881, "step": 21529 }, { "epoch": 43.06, "grad_norm": 1.2700293064117432, "learning_rate": 2e-05, "loss": 0.05242781, "step": 21530 }, { "epoch": 43.062, "grad_norm": 1.3638324737548828, "learning_rate": 2e-05, "loss": 0.05736677, "step": 21531 }, { "epoch": 43.064, "grad_norm": 1.4970639944076538, "learning_rate": 2e-05, "loss": 0.07066285, "step": 21532 }, { "epoch": 43.066, "grad_norm": 1.4570978879928589, "learning_rate": 2e-05, "loss": 0.05045886, "step": 21533 }, { "epoch": 43.068, "grad_norm": 1.0011194944381714, "learning_rate": 2e-05, "loss": 0.04105179, "step": 21534 }, { "epoch": 43.07, "grad_norm": 1.718483805656433, "learning_rate": 2e-05, "loss": 0.0531066, "step": 21535 }, { "epoch": 43.072, "grad_norm": 1.9208921194076538, "learning_rate": 2e-05, "loss": 0.07216385, "step": 21536 }, { "epoch": 43.074, "grad_norm": 1.464358925819397, "learning_rate": 2e-05, "loss": 0.05128159, "step": 21537 }, { "epoch": 43.076, "grad_norm": 1.617860198020935, "learning_rate": 2e-05, "loss": 0.05280685, "step": 21538 }, { "epoch": 43.078, "grad_norm": 1.2591980695724487, "learning_rate": 2e-05, "loss": 0.03685856, "step": 21539 }, { "epoch": 43.08, "grad_norm": 1.155810832977295, "learning_rate": 2e-05, "loss": 0.04095223, "step": 21540 }, { "epoch": 43.082, "grad_norm": 1.4928919076919556, "learning_rate": 2e-05, "loss": 0.0549767, "step": 21541 }, { "epoch": 43.084, "grad_norm": 1.7492938041687012, "learning_rate": 2e-05, "loss": 0.06689818, "step": 21542 }, { "epoch": 43.086, "grad_norm": 1.3056284189224243, "learning_rate": 2e-05, "loss": 0.05678186, "step": 21543 }, { "epoch": 43.088, "grad_norm": 1.1400055885314941, "learning_rate": 2e-05, "loss": 0.05558588, "step": 21544 }, { "epoch": 43.09, "grad_norm": 1.2211138010025024, "learning_rate": 2e-05, "loss": 0.0515589, "step": 21545 }, { "epoch": 43.092, "grad_norm": 1.1707724332809448, "learning_rate": 2e-05, "loss": 0.03701507, "step": 21546 }, { "epoch": 43.094, "grad_norm": 1.9183954000473022, "learning_rate": 2e-05, "loss": 0.06833592, "step": 21547 }, { "epoch": 43.096, "grad_norm": 1.1282914876937866, "learning_rate": 2e-05, "loss": 0.04579955, "step": 21548 }, { "epoch": 43.098, "grad_norm": 1.173416256904602, "learning_rate": 2e-05, "loss": 0.0542321, "step": 21549 }, { "epoch": 43.1, "grad_norm": 1.2352608442306519, "learning_rate": 2e-05, "loss": 0.05904563, "step": 21550 }, { "epoch": 43.102, "grad_norm": 1.1427820920944214, "learning_rate": 2e-05, "loss": 0.04212008, "step": 21551 }, { "epoch": 43.104, "grad_norm": 1.5755908489227295, "learning_rate": 2e-05, "loss": 0.04956923, "step": 21552 }, { "epoch": 43.106, "grad_norm": 1.5542879104614258, "learning_rate": 2e-05, "loss": 0.06011878, "step": 21553 }, { "epoch": 43.108, "grad_norm": 1.389430284500122, "learning_rate": 2e-05, "loss": 0.04906828, "step": 21554 }, { "epoch": 43.11, "grad_norm": 1.4012058973312378, "learning_rate": 2e-05, "loss": 0.06038637, "step": 21555 }, { "epoch": 43.112, "grad_norm": 3.3362956047058105, "learning_rate": 2e-05, "loss": 0.05289589, "step": 21556 }, { "epoch": 43.114, "grad_norm": 1.240237832069397, "learning_rate": 2e-05, "loss": 0.04178648, "step": 21557 }, { "epoch": 43.116, "grad_norm": 1.0733537673950195, "learning_rate": 2e-05, "loss": 0.0356668, "step": 21558 }, { "epoch": 43.118, "grad_norm": 1.472996711730957, "learning_rate": 2e-05, "loss": 0.04951042, "step": 21559 }, { "epoch": 43.12, "grad_norm": 1.41386878490448, "learning_rate": 2e-05, "loss": 0.05777404, "step": 21560 }, { "epoch": 43.122, "grad_norm": 1.279788851737976, "learning_rate": 2e-05, "loss": 0.04153184, "step": 21561 }, { "epoch": 43.124, "grad_norm": 1.375981092453003, "learning_rate": 2e-05, "loss": 0.05480566, "step": 21562 }, { "epoch": 43.126, "grad_norm": 1.1689554452896118, "learning_rate": 2e-05, "loss": 0.03462553, "step": 21563 }, { "epoch": 43.128, "grad_norm": 1.4414396286010742, "learning_rate": 2e-05, "loss": 0.05469351, "step": 21564 }, { "epoch": 43.13, "grad_norm": 1.1081470251083374, "learning_rate": 2e-05, "loss": 0.0445482, "step": 21565 }, { "epoch": 43.132, "grad_norm": 1.3130306005477905, "learning_rate": 2e-05, "loss": 0.03380114, "step": 21566 }, { "epoch": 43.134, "grad_norm": 1.2124284505844116, "learning_rate": 2e-05, "loss": 0.0559901, "step": 21567 }, { "epoch": 43.136, "grad_norm": 1.755231499671936, "learning_rate": 2e-05, "loss": 0.05166025, "step": 21568 }, { "epoch": 43.138, "grad_norm": 1.3129682540893555, "learning_rate": 2e-05, "loss": 0.05173158, "step": 21569 }, { "epoch": 43.14, "grad_norm": 1.3563148975372314, "learning_rate": 2e-05, "loss": 0.05071842, "step": 21570 }, { "epoch": 43.142, "grad_norm": 1.0146980285644531, "learning_rate": 2e-05, "loss": 0.04583877, "step": 21571 }, { "epoch": 43.144, "grad_norm": 1.3281503915786743, "learning_rate": 2e-05, "loss": 0.04625765, "step": 21572 }, { "epoch": 43.146, "grad_norm": 1.9704128503799438, "learning_rate": 2e-05, "loss": 0.04025682, "step": 21573 }, { "epoch": 43.148, "grad_norm": 1.2888715267181396, "learning_rate": 2e-05, "loss": 0.05024606, "step": 21574 }, { "epoch": 43.15, "grad_norm": 1.2609823942184448, "learning_rate": 2e-05, "loss": 0.04634705, "step": 21575 }, { "epoch": 43.152, "grad_norm": 1.2393063306808472, "learning_rate": 2e-05, "loss": 0.05042982, "step": 21576 }, { "epoch": 43.154, "grad_norm": 1.1345429420471191, "learning_rate": 2e-05, "loss": 0.02828003, "step": 21577 }, { "epoch": 43.156, "grad_norm": 1.1094892024993896, "learning_rate": 2e-05, "loss": 0.04609562, "step": 21578 }, { "epoch": 43.158, "grad_norm": 1.19045889377594, "learning_rate": 2e-05, "loss": 0.05094993, "step": 21579 }, { "epoch": 43.16, "grad_norm": 1.1686400175094604, "learning_rate": 2e-05, "loss": 0.03705244, "step": 21580 }, { "epoch": 43.162, "grad_norm": 1.0274404287338257, "learning_rate": 2e-05, "loss": 0.03788059, "step": 21581 }, { "epoch": 43.164, "grad_norm": 0.9964593052864075, "learning_rate": 2e-05, "loss": 0.03574125, "step": 21582 }, { "epoch": 43.166, "grad_norm": 1.8188575506210327, "learning_rate": 2e-05, "loss": 0.05399974, "step": 21583 }, { "epoch": 43.168, "grad_norm": 2.3859317302703857, "learning_rate": 2e-05, "loss": 0.04840841, "step": 21584 }, { "epoch": 43.17, "grad_norm": 1.4382333755493164, "learning_rate": 2e-05, "loss": 0.05152167, "step": 21585 }, { "epoch": 43.172, "grad_norm": 1.2169090509414673, "learning_rate": 2e-05, "loss": 0.04776974, "step": 21586 }, { "epoch": 43.174, "grad_norm": 1.3083583116531372, "learning_rate": 2e-05, "loss": 0.05877044, "step": 21587 }, { "epoch": 43.176, "grad_norm": 1.264892578125, "learning_rate": 2e-05, "loss": 0.04724294, "step": 21588 }, { "epoch": 43.178, "grad_norm": 1.7333793640136719, "learning_rate": 2e-05, "loss": 0.05662074, "step": 21589 }, { "epoch": 43.18, "grad_norm": 1.1243252754211426, "learning_rate": 2e-05, "loss": 0.0430276, "step": 21590 }, { "epoch": 43.182, "grad_norm": 1.2926504611968994, "learning_rate": 2e-05, "loss": 0.04414042, "step": 21591 }, { "epoch": 43.184, "grad_norm": 1.2931801080703735, "learning_rate": 2e-05, "loss": 0.04339027, "step": 21592 }, { "epoch": 43.186, "grad_norm": 1.1842244863510132, "learning_rate": 2e-05, "loss": 0.05153984, "step": 21593 }, { "epoch": 43.188, "grad_norm": 1.176692008972168, "learning_rate": 2e-05, "loss": 0.04030944, "step": 21594 }, { "epoch": 43.19, "grad_norm": 1.0769304037094116, "learning_rate": 2e-05, "loss": 0.03942397, "step": 21595 }, { "epoch": 43.192, "grad_norm": 1.4324592351913452, "learning_rate": 2e-05, "loss": 0.04200315, "step": 21596 }, { "epoch": 43.194, "grad_norm": 1.7846524715423584, "learning_rate": 2e-05, "loss": 0.05106489, "step": 21597 }, { "epoch": 43.196, "grad_norm": 1.0251342058181763, "learning_rate": 2e-05, "loss": 0.03952106, "step": 21598 }, { "epoch": 43.198, "grad_norm": 1.1627730131149292, "learning_rate": 2e-05, "loss": 0.02958023, "step": 21599 }, { "epoch": 43.2, "grad_norm": 1.2952426671981812, "learning_rate": 2e-05, "loss": 0.04737274, "step": 21600 }, { "epoch": 43.202, "grad_norm": 1.7284358739852905, "learning_rate": 2e-05, "loss": 0.06269697, "step": 21601 }, { "epoch": 43.204, "grad_norm": 1.3142390251159668, "learning_rate": 2e-05, "loss": 0.0550441, "step": 21602 }, { "epoch": 43.206, "grad_norm": 2.0693068504333496, "learning_rate": 2e-05, "loss": 0.05673666, "step": 21603 }, { "epoch": 43.208, "grad_norm": 1.3296486139297485, "learning_rate": 2e-05, "loss": 0.04539488, "step": 21604 }, { "epoch": 43.21, "grad_norm": 1.2718302011489868, "learning_rate": 2e-05, "loss": 0.04932036, "step": 21605 }, { "epoch": 43.212, "grad_norm": 1.0521125793457031, "learning_rate": 2e-05, "loss": 0.04189048, "step": 21606 }, { "epoch": 43.214, "grad_norm": 1.1329081058502197, "learning_rate": 2e-05, "loss": 0.04553405, "step": 21607 }, { "epoch": 43.216, "grad_norm": 1.156334400177002, "learning_rate": 2e-05, "loss": 0.05069153, "step": 21608 }, { "epoch": 43.218, "grad_norm": 1.0390546321868896, "learning_rate": 2e-05, "loss": 0.04528834, "step": 21609 }, { "epoch": 43.22, "grad_norm": 0.9134246110916138, "learning_rate": 2e-05, "loss": 0.02971689, "step": 21610 }, { "epoch": 43.222, "grad_norm": 1.1092662811279297, "learning_rate": 2e-05, "loss": 0.03292485, "step": 21611 }, { "epoch": 43.224, "grad_norm": 2.882272958755493, "learning_rate": 2e-05, "loss": 0.05360059, "step": 21612 }, { "epoch": 43.226, "grad_norm": 1.372489094734192, "learning_rate": 2e-05, "loss": 0.04475276, "step": 21613 }, { "epoch": 43.228, "grad_norm": 1.0251941680908203, "learning_rate": 2e-05, "loss": 0.04171642, "step": 21614 }, { "epoch": 43.23, "grad_norm": 1.3642363548278809, "learning_rate": 2e-05, "loss": 0.05339691, "step": 21615 }, { "epoch": 43.232, "grad_norm": 0.9985992908477783, "learning_rate": 2e-05, "loss": 0.04003511, "step": 21616 }, { "epoch": 43.234, "grad_norm": 1.0361459255218506, "learning_rate": 2e-05, "loss": 0.04068734, "step": 21617 }, { "epoch": 43.236, "grad_norm": 1.290665864944458, "learning_rate": 2e-05, "loss": 0.05313302, "step": 21618 }, { "epoch": 43.238, "grad_norm": 1.1882466077804565, "learning_rate": 2e-05, "loss": 0.04230958, "step": 21619 }, { "epoch": 43.24, "grad_norm": 1.1811344623565674, "learning_rate": 2e-05, "loss": 0.04467118, "step": 21620 }, { "epoch": 43.242, "grad_norm": 1.3368701934814453, "learning_rate": 2e-05, "loss": 0.04908449, "step": 21621 }, { "epoch": 43.244, "grad_norm": 1.071096658706665, "learning_rate": 2e-05, "loss": 0.04370485, "step": 21622 }, { "epoch": 43.246, "grad_norm": 1.1309154033660889, "learning_rate": 2e-05, "loss": 0.04717417, "step": 21623 }, { "epoch": 43.248, "grad_norm": 2.024557590484619, "learning_rate": 2e-05, "loss": 0.04132883, "step": 21624 }, { "epoch": 43.25, "grad_norm": 1.165766954421997, "learning_rate": 2e-05, "loss": 0.0444683, "step": 21625 }, { "epoch": 43.252, "grad_norm": 1.0271003246307373, "learning_rate": 2e-05, "loss": 0.04147922, "step": 21626 }, { "epoch": 43.254, "grad_norm": 1.3019037246704102, "learning_rate": 2e-05, "loss": 0.04356762, "step": 21627 }, { "epoch": 43.256, "grad_norm": 1.1984200477600098, "learning_rate": 2e-05, "loss": 0.04804221, "step": 21628 }, { "epoch": 43.258, "grad_norm": 0.9510525465011597, "learning_rate": 2e-05, "loss": 0.03916862, "step": 21629 }, { "epoch": 43.26, "grad_norm": 1.4483345746994019, "learning_rate": 2e-05, "loss": 0.05215236, "step": 21630 }, { "epoch": 43.262, "grad_norm": 1.4785760641098022, "learning_rate": 2e-05, "loss": 0.05577825, "step": 21631 }, { "epoch": 43.264, "grad_norm": 2.0617661476135254, "learning_rate": 2e-05, "loss": 0.05145205, "step": 21632 }, { "epoch": 43.266, "grad_norm": 1.1790543794631958, "learning_rate": 2e-05, "loss": 0.04180574, "step": 21633 }, { "epoch": 43.268, "grad_norm": 1.2387884855270386, "learning_rate": 2e-05, "loss": 0.04779726, "step": 21634 }, { "epoch": 43.27, "grad_norm": 1.2821975946426392, "learning_rate": 2e-05, "loss": 0.04490368, "step": 21635 }, { "epoch": 43.272, "grad_norm": 1.1714630126953125, "learning_rate": 2e-05, "loss": 0.04618052, "step": 21636 }, { "epoch": 43.274, "grad_norm": 1.2499911785125732, "learning_rate": 2e-05, "loss": 0.04752894, "step": 21637 }, { "epoch": 43.276, "grad_norm": 1.9510254859924316, "learning_rate": 2e-05, "loss": 0.0413634, "step": 21638 }, { "epoch": 43.278, "grad_norm": 0.9319188594818115, "learning_rate": 2e-05, "loss": 0.03260102, "step": 21639 }, { "epoch": 43.28, "grad_norm": 1.2887738943099976, "learning_rate": 2e-05, "loss": 0.05729091, "step": 21640 }, { "epoch": 43.282, "grad_norm": 0.9247579574584961, "learning_rate": 2e-05, "loss": 0.03868435, "step": 21641 }, { "epoch": 43.284, "grad_norm": 1.2112103700637817, "learning_rate": 2e-05, "loss": 0.04973643, "step": 21642 }, { "epoch": 43.286, "grad_norm": 1.4075590372085571, "learning_rate": 2e-05, "loss": 0.0373517, "step": 21643 }, { "epoch": 43.288, "grad_norm": 1.0551819801330566, "learning_rate": 2e-05, "loss": 0.04350845, "step": 21644 }, { "epoch": 43.29, "grad_norm": 1.4103049039840698, "learning_rate": 2e-05, "loss": 0.0423544, "step": 21645 }, { "epoch": 43.292, "grad_norm": 1.1112929582595825, "learning_rate": 2e-05, "loss": 0.03513883, "step": 21646 }, { "epoch": 43.294, "grad_norm": 0.9542357325553894, "learning_rate": 2e-05, "loss": 0.02830679, "step": 21647 }, { "epoch": 43.296, "grad_norm": 1.5825624465942383, "learning_rate": 2e-05, "loss": 0.04779094, "step": 21648 }, { "epoch": 43.298, "grad_norm": 1.1777398586273193, "learning_rate": 2e-05, "loss": 0.0480368, "step": 21649 }, { "epoch": 43.3, "grad_norm": 1.8794219493865967, "learning_rate": 2e-05, "loss": 0.05965583, "step": 21650 }, { "epoch": 43.302, "grad_norm": 1.262937307357788, "learning_rate": 2e-05, "loss": 0.06756496, "step": 21651 }, { "epoch": 43.304, "grad_norm": 2.5823323726654053, "learning_rate": 2e-05, "loss": 0.08038095, "step": 21652 }, { "epoch": 43.306, "grad_norm": 1.1853344440460205, "learning_rate": 2e-05, "loss": 0.03952831, "step": 21653 }, { "epoch": 43.308, "grad_norm": 1.780608892440796, "learning_rate": 2e-05, "loss": 0.0625229, "step": 21654 }, { "epoch": 43.31, "grad_norm": 1.1544749736785889, "learning_rate": 2e-05, "loss": 0.04620733, "step": 21655 }, { "epoch": 43.312, "grad_norm": 1.3556686639785767, "learning_rate": 2e-05, "loss": 0.04416988, "step": 21656 }, { "epoch": 43.314, "grad_norm": 1.0415023565292358, "learning_rate": 2e-05, "loss": 0.04206276, "step": 21657 }, { "epoch": 43.316, "grad_norm": 1.2264755964279175, "learning_rate": 2e-05, "loss": 0.0508853, "step": 21658 }, { "epoch": 43.318, "grad_norm": 1.3368821144104004, "learning_rate": 2e-05, "loss": 0.05324509, "step": 21659 }, { "epoch": 43.32, "grad_norm": 1.3012473583221436, "learning_rate": 2e-05, "loss": 0.05128824, "step": 21660 }, { "epoch": 43.322, "grad_norm": 0.9434711933135986, "learning_rate": 2e-05, "loss": 0.0308494, "step": 21661 }, { "epoch": 43.324, "grad_norm": 1.6305221319198608, "learning_rate": 2e-05, "loss": 0.05408993, "step": 21662 }, { "epoch": 43.326, "grad_norm": 1.6955217123031616, "learning_rate": 2e-05, "loss": 0.07358453, "step": 21663 }, { "epoch": 43.328, "grad_norm": 1.4787384271621704, "learning_rate": 2e-05, "loss": 0.05799458, "step": 21664 }, { "epoch": 43.33, "grad_norm": 1.2819902896881104, "learning_rate": 2e-05, "loss": 0.040464, "step": 21665 }, { "epoch": 43.332, "grad_norm": 1.4899117946624756, "learning_rate": 2e-05, "loss": 0.05940196, "step": 21666 }, { "epoch": 43.334, "grad_norm": 1.644298791885376, "learning_rate": 2e-05, "loss": 0.06026432, "step": 21667 }, { "epoch": 43.336, "grad_norm": 1.2563204765319824, "learning_rate": 2e-05, "loss": 0.05732097, "step": 21668 }, { "epoch": 43.338, "grad_norm": 1.1418761014938354, "learning_rate": 2e-05, "loss": 0.04549374, "step": 21669 }, { "epoch": 43.34, "grad_norm": 1.0015463829040527, "learning_rate": 2e-05, "loss": 0.03093121, "step": 21670 }, { "epoch": 43.342, "grad_norm": 1.0512152910232544, "learning_rate": 2e-05, "loss": 0.04237778, "step": 21671 }, { "epoch": 43.344, "grad_norm": 1.1289246082305908, "learning_rate": 2e-05, "loss": 0.04607947, "step": 21672 }, { "epoch": 43.346, "grad_norm": 1.371304988861084, "learning_rate": 2e-05, "loss": 0.05412474, "step": 21673 }, { "epoch": 43.348, "grad_norm": 1.2538726329803467, "learning_rate": 2e-05, "loss": 0.0535718, "step": 21674 }, { "epoch": 43.35, "grad_norm": 1.3319662809371948, "learning_rate": 2e-05, "loss": 0.04659107, "step": 21675 }, { "epoch": 43.352, "grad_norm": 1.1024374961853027, "learning_rate": 2e-05, "loss": 0.03754035, "step": 21676 }, { "epoch": 43.354, "grad_norm": 1.2166569232940674, "learning_rate": 2e-05, "loss": 0.06684306, "step": 21677 }, { "epoch": 43.356, "grad_norm": 2.1136088371276855, "learning_rate": 2e-05, "loss": 0.04331131, "step": 21678 }, { "epoch": 43.358, "grad_norm": 1.2272017002105713, "learning_rate": 2e-05, "loss": 0.04815719, "step": 21679 }, { "epoch": 43.36, "grad_norm": 1.0744694471359253, "learning_rate": 2e-05, "loss": 0.04111854, "step": 21680 }, { "epoch": 43.362, "grad_norm": 1.242724061012268, "learning_rate": 2e-05, "loss": 0.06325316, "step": 21681 }, { "epoch": 43.364, "grad_norm": 1.985144853591919, "learning_rate": 2e-05, "loss": 0.04359365, "step": 21682 }, { "epoch": 43.366, "grad_norm": 1.1064767837524414, "learning_rate": 2e-05, "loss": 0.05320355, "step": 21683 }, { "epoch": 43.368, "grad_norm": 1.189843773841858, "learning_rate": 2e-05, "loss": 0.04457613, "step": 21684 }, { "epoch": 43.37, "grad_norm": 1.203808069229126, "learning_rate": 2e-05, "loss": 0.05187619, "step": 21685 }, { "epoch": 43.372, "grad_norm": 1.1101083755493164, "learning_rate": 2e-05, "loss": 0.04831094, "step": 21686 }, { "epoch": 43.374, "grad_norm": 1.428242564201355, "learning_rate": 2e-05, "loss": 0.06388721, "step": 21687 }, { "epoch": 43.376, "grad_norm": 1.2503690719604492, "learning_rate": 2e-05, "loss": 0.04847308, "step": 21688 }, { "epoch": 43.378, "grad_norm": 1.1128005981445312, "learning_rate": 2e-05, "loss": 0.03843355, "step": 21689 }, { "epoch": 43.38, "grad_norm": 0.9491602778434753, "learning_rate": 2e-05, "loss": 0.0259631, "step": 21690 }, { "epoch": 43.382, "grad_norm": 1.233364224433899, "learning_rate": 2e-05, "loss": 0.0568186, "step": 21691 }, { "epoch": 43.384, "grad_norm": 0.9893776774406433, "learning_rate": 2e-05, "loss": 0.04174522, "step": 21692 }, { "epoch": 43.386, "grad_norm": 1.187612771987915, "learning_rate": 2e-05, "loss": 0.0390992, "step": 21693 }, { "epoch": 43.388, "grad_norm": 1.3221521377563477, "learning_rate": 2e-05, "loss": 0.05131374, "step": 21694 }, { "epoch": 43.39, "grad_norm": 1.2869195938110352, "learning_rate": 2e-05, "loss": 0.05698796, "step": 21695 }, { "epoch": 43.392, "grad_norm": 1.5506631135940552, "learning_rate": 2e-05, "loss": 0.05239779, "step": 21696 }, { "epoch": 43.394, "grad_norm": 1.1303107738494873, "learning_rate": 2e-05, "loss": 0.0321939, "step": 21697 }, { "epoch": 43.396, "grad_norm": 1.2023539543151855, "learning_rate": 2e-05, "loss": 0.04435757, "step": 21698 }, { "epoch": 43.398, "grad_norm": 1.644532322883606, "learning_rate": 2e-05, "loss": 0.05108094, "step": 21699 }, { "epoch": 43.4, "grad_norm": 0.9367179274559021, "learning_rate": 2e-05, "loss": 0.03235884, "step": 21700 }, { "epoch": 43.402, "grad_norm": 1.0146784782409668, "learning_rate": 2e-05, "loss": 0.03669849, "step": 21701 }, { "epoch": 43.404, "grad_norm": 1.1251667737960815, "learning_rate": 2e-05, "loss": 0.04423268, "step": 21702 }, { "epoch": 43.406, "grad_norm": 1.0853943824768066, "learning_rate": 2e-05, "loss": 0.04164097, "step": 21703 }, { "epoch": 43.408, "grad_norm": 1.3860262632369995, "learning_rate": 2e-05, "loss": 0.06020261, "step": 21704 }, { "epoch": 43.41, "grad_norm": 1.621960163116455, "learning_rate": 2e-05, "loss": 0.05325014, "step": 21705 }, { "epoch": 43.412, "grad_norm": 1.4392348527908325, "learning_rate": 2e-05, "loss": 0.03889326, "step": 21706 }, { "epoch": 43.414, "grad_norm": 1.0264551639556885, "learning_rate": 2e-05, "loss": 0.03898916, "step": 21707 }, { "epoch": 43.416, "grad_norm": 1.2684388160705566, "learning_rate": 2e-05, "loss": 0.05191773, "step": 21708 }, { "epoch": 43.418, "grad_norm": 1.1081558465957642, "learning_rate": 2e-05, "loss": 0.04196195, "step": 21709 }, { "epoch": 43.42, "grad_norm": 1.1390728950500488, "learning_rate": 2e-05, "loss": 0.03881846, "step": 21710 }, { "epoch": 43.422, "grad_norm": 1.3561352491378784, "learning_rate": 2e-05, "loss": 0.05348574, "step": 21711 }, { "epoch": 43.424, "grad_norm": 1.4882853031158447, "learning_rate": 2e-05, "loss": 0.06980874, "step": 21712 }, { "epoch": 43.426, "grad_norm": 1.37758469581604, "learning_rate": 2e-05, "loss": 0.06751367, "step": 21713 }, { "epoch": 43.428, "grad_norm": 1.1444541215896606, "learning_rate": 2e-05, "loss": 0.04994627, "step": 21714 }, { "epoch": 43.43, "grad_norm": 1.2537963390350342, "learning_rate": 2e-05, "loss": 0.05444419, "step": 21715 }, { "epoch": 43.432, "grad_norm": 1.0901340246200562, "learning_rate": 2e-05, "loss": 0.05521618, "step": 21716 }, { "epoch": 43.434, "grad_norm": 1.0437510013580322, "learning_rate": 2e-05, "loss": 0.0424891, "step": 21717 }, { "epoch": 43.436, "grad_norm": 1.073522925376892, "learning_rate": 2e-05, "loss": 0.03688889, "step": 21718 }, { "epoch": 43.438, "grad_norm": 1.129959225654602, "learning_rate": 2e-05, "loss": 0.04021513, "step": 21719 }, { "epoch": 43.44, "grad_norm": 1.2731077671051025, "learning_rate": 2e-05, "loss": 0.0577286, "step": 21720 }, { "epoch": 43.442, "grad_norm": 1.4466447830200195, "learning_rate": 2e-05, "loss": 0.06440642, "step": 21721 }, { "epoch": 43.444, "grad_norm": 1.0096185207366943, "learning_rate": 2e-05, "loss": 0.03558685, "step": 21722 }, { "epoch": 43.446, "grad_norm": 1.1729049682617188, "learning_rate": 2e-05, "loss": 0.05121062, "step": 21723 }, { "epoch": 43.448, "grad_norm": 1.3237818479537964, "learning_rate": 2e-05, "loss": 0.05145048, "step": 21724 }, { "epoch": 43.45, "grad_norm": 1.4835050106048584, "learning_rate": 2e-05, "loss": 0.04858468, "step": 21725 }, { "epoch": 43.452, "grad_norm": 1.2140564918518066, "learning_rate": 2e-05, "loss": 0.04449757, "step": 21726 }, { "epoch": 43.454, "grad_norm": 1.1771492958068848, "learning_rate": 2e-05, "loss": 0.05757561, "step": 21727 }, { "epoch": 43.456, "grad_norm": 1.0299850702285767, "learning_rate": 2e-05, "loss": 0.03556947, "step": 21728 }, { "epoch": 43.458, "grad_norm": 1.5381124019622803, "learning_rate": 2e-05, "loss": 0.06413664, "step": 21729 }, { "epoch": 43.46, "grad_norm": 2.5248165130615234, "learning_rate": 2e-05, "loss": 0.05302243, "step": 21730 }, { "epoch": 43.462, "grad_norm": 1.3111073970794678, "learning_rate": 2e-05, "loss": 0.04211747, "step": 21731 }, { "epoch": 43.464, "grad_norm": 1.2731165885925293, "learning_rate": 2e-05, "loss": 0.05004764, "step": 21732 }, { "epoch": 43.466, "grad_norm": 0.9703748822212219, "learning_rate": 2e-05, "loss": 0.03877125, "step": 21733 }, { "epoch": 43.468, "grad_norm": 1.021268367767334, "learning_rate": 2e-05, "loss": 0.03762333, "step": 21734 }, { "epoch": 43.47, "grad_norm": 1.2705427408218384, "learning_rate": 2e-05, "loss": 0.05988788, "step": 21735 }, { "epoch": 43.472, "grad_norm": 1.2365317344665527, "learning_rate": 2e-05, "loss": 0.05386936, "step": 21736 }, { "epoch": 43.474, "grad_norm": 1.3400436639785767, "learning_rate": 2e-05, "loss": 0.04573278, "step": 21737 }, { "epoch": 43.476, "grad_norm": 1.3688085079193115, "learning_rate": 2e-05, "loss": 0.05334489, "step": 21738 }, { "epoch": 43.478, "grad_norm": 1.086027979850769, "learning_rate": 2e-05, "loss": 0.0416183, "step": 21739 }, { "epoch": 43.48, "grad_norm": 1.3055415153503418, "learning_rate": 2e-05, "loss": 0.03842257, "step": 21740 }, { "epoch": 43.482, "grad_norm": 1.111611247062683, "learning_rate": 2e-05, "loss": 0.0508645, "step": 21741 }, { "epoch": 43.484, "grad_norm": 1.194685935974121, "learning_rate": 2e-05, "loss": 0.03484659, "step": 21742 }, { "epoch": 43.486, "grad_norm": 1.132163405418396, "learning_rate": 2e-05, "loss": 0.03633229, "step": 21743 }, { "epoch": 43.488, "grad_norm": 1.3565982580184937, "learning_rate": 2e-05, "loss": 0.04545782, "step": 21744 }, { "epoch": 43.49, "grad_norm": 1.4622793197631836, "learning_rate": 2e-05, "loss": 0.05259608, "step": 21745 }, { "epoch": 43.492, "grad_norm": 1.0451035499572754, "learning_rate": 2e-05, "loss": 0.04075598, "step": 21746 }, { "epoch": 43.494, "grad_norm": 2.7072818279266357, "learning_rate": 2e-05, "loss": 0.06001053, "step": 21747 }, { "epoch": 43.496, "grad_norm": 1.031333088874817, "learning_rate": 2e-05, "loss": 0.03389198, "step": 21748 }, { "epoch": 43.498, "grad_norm": 0.9913927316665649, "learning_rate": 2e-05, "loss": 0.03781866, "step": 21749 }, { "epoch": 43.5, "grad_norm": 1.303167462348938, "learning_rate": 2e-05, "loss": 0.03760751, "step": 21750 }, { "epoch": 43.502, "grad_norm": 1.3275164365768433, "learning_rate": 2e-05, "loss": 0.04503237, "step": 21751 }, { "epoch": 43.504, "grad_norm": 1.729905366897583, "learning_rate": 2e-05, "loss": 0.04633639, "step": 21752 }, { "epoch": 43.506, "grad_norm": 1.1333032846450806, "learning_rate": 2e-05, "loss": 0.05070849, "step": 21753 }, { "epoch": 43.508, "grad_norm": 1.3890432119369507, "learning_rate": 2e-05, "loss": 0.0517424, "step": 21754 }, { "epoch": 43.51, "grad_norm": 1.8249478340148926, "learning_rate": 2e-05, "loss": 0.03821604, "step": 21755 }, { "epoch": 43.512, "grad_norm": 1.1964874267578125, "learning_rate": 2e-05, "loss": 0.04850964, "step": 21756 }, { "epoch": 43.514, "grad_norm": 1.322390079498291, "learning_rate": 2e-05, "loss": 0.04571023, "step": 21757 }, { "epoch": 43.516, "grad_norm": 1.1808900833129883, "learning_rate": 2e-05, "loss": 0.05330661, "step": 21758 }, { "epoch": 43.518, "grad_norm": 1.255068063735962, "learning_rate": 2e-05, "loss": 0.05100637, "step": 21759 }, { "epoch": 43.52, "grad_norm": 1.2289645671844482, "learning_rate": 2e-05, "loss": 0.04602298, "step": 21760 }, { "epoch": 43.522, "grad_norm": 1.1310776472091675, "learning_rate": 2e-05, "loss": 0.05270489, "step": 21761 }, { "epoch": 43.524, "grad_norm": 1.2839418649673462, "learning_rate": 2e-05, "loss": 0.04927288, "step": 21762 }, { "epoch": 43.526, "grad_norm": 1.0740174055099487, "learning_rate": 2e-05, "loss": 0.03569305, "step": 21763 }, { "epoch": 43.528, "grad_norm": 1.2840776443481445, "learning_rate": 2e-05, "loss": 0.0497875, "step": 21764 }, { "epoch": 43.53, "grad_norm": 1.718205213546753, "learning_rate": 2e-05, "loss": 0.05506313, "step": 21765 }, { "epoch": 43.532, "grad_norm": 1.0014818906784058, "learning_rate": 2e-05, "loss": 0.03927164, "step": 21766 }, { "epoch": 43.534, "grad_norm": 1.1391730308532715, "learning_rate": 2e-05, "loss": 0.03087741, "step": 21767 }, { "epoch": 43.536, "grad_norm": 1.2874438762664795, "learning_rate": 2e-05, "loss": 0.05720982, "step": 21768 }, { "epoch": 43.538, "grad_norm": 1.5402920246124268, "learning_rate": 2e-05, "loss": 0.05315606, "step": 21769 }, { "epoch": 43.54, "grad_norm": 1.420612096786499, "learning_rate": 2e-05, "loss": 0.04444424, "step": 21770 }, { "epoch": 43.542, "grad_norm": 0.9867546558380127, "learning_rate": 2e-05, "loss": 0.03263182, "step": 21771 }, { "epoch": 43.544, "grad_norm": 1.4081987142562866, "learning_rate": 2e-05, "loss": 0.05344656, "step": 21772 }, { "epoch": 43.546, "grad_norm": 1.277359962463379, "learning_rate": 2e-05, "loss": 0.05032256, "step": 21773 }, { "epoch": 43.548, "grad_norm": 1.1443043947219849, "learning_rate": 2e-05, "loss": 0.04011863, "step": 21774 }, { "epoch": 43.55, "grad_norm": 1.1632673740386963, "learning_rate": 2e-05, "loss": 0.03645647, "step": 21775 }, { "epoch": 43.552, "grad_norm": 1.0224082469940186, "learning_rate": 2e-05, "loss": 0.02952418, "step": 21776 }, { "epoch": 43.554, "grad_norm": 1.3022348880767822, "learning_rate": 2e-05, "loss": 0.05518476, "step": 21777 }, { "epoch": 43.556, "grad_norm": 1.1476200819015503, "learning_rate": 2e-05, "loss": 0.03775834, "step": 21778 }, { "epoch": 43.558, "grad_norm": 1.0009914636611938, "learning_rate": 2e-05, "loss": 0.03371383, "step": 21779 }, { "epoch": 43.56, "grad_norm": 1.1276997327804565, "learning_rate": 2e-05, "loss": 0.0438572, "step": 21780 }, { "epoch": 43.562, "grad_norm": 1.9971312284469604, "learning_rate": 2e-05, "loss": 0.05232399, "step": 21781 }, { "epoch": 43.564, "grad_norm": 1.3514554500579834, "learning_rate": 2e-05, "loss": 0.0524235, "step": 21782 }, { "epoch": 43.566, "grad_norm": 1.3473267555236816, "learning_rate": 2e-05, "loss": 0.05278003, "step": 21783 }, { "epoch": 43.568, "grad_norm": 1.1983340978622437, "learning_rate": 2e-05, "loss": 0.05057519, "step": 21784 }, { "epoch": 43.57, "grad_norm": 1.5899887084960938, "learning_rate": 2e-05, "loss": 0.03519575, "step": 21785 }, { "epoch": 43.572, "grad_norm": 1.1169793605804443, "learning_rate": 2e-05, "loss": 0.04725049, "step": 21786 }, { "epoch": 43.574, "grad_norm": 1.2387906312942505, "learning_rate": 2e-05, "loss": 0.03475452, "step": 21787 }, { "epoch": 43.576, "grad_norm": 2.203519821166992, "learning_rate": 2e-05, "loss": 0.04944497, "step": 21788 }, { "epoch": 43.578, "grad_norm": 1.1906425952911377, "learning_rate": 2e-05, "loss": 0.04174117, "step": 21789 }, { "epoch": 43.58, "grad_norm": 1.4967057704925537, "learning_rate": 2e-05, "loss": 0.05349187, "step": 21790 }, { "epoch": 43.582, "grad_norm": 1.9748947620391846, "learning_rate": 2e-05, "loss": 0.03838755, "step": 21791 }, { "epoch": 43.584, "grad_norm": 1.223773717880249, "learning_rate": 2e-05, "loss": 0.03617196, "step": 21792 }, { "epoch": 43.586, "grad_norm": 1.0227309465408325, "learning_rate": 2e-05, "loss": 0.04097284, "step": 21793 }, { "epoch": 43.588, "grad_norm": 1.2495092153549194, "learning_rate": 2e-05, "loss": 0.05023031, "step": 21794 }, { "epoch": 43.59, "grad_norm": 1.443373203277588, "learning_rate": 2e-05, "loss": 0.05545416, "step": 21795 }, { "epoch": 43.592, "grad_norm": 1.3429638147354126, "learning_rate": 2e-05, "loss": 0.04617237, "step": 21796 }, { "epoch": 43.594, "grad_norm": 1.17878258228302, "learning_rate": 2e-05, "loss": 0.04778713, "step": 21797 }, { "epoch": 43.596, "grad_norm": 1.4965510368347168, "learning_rate": 2e-05, "loss": 0.07662305, "step": 21798 }, { "epoch": 43.598, "grad_norm": 1.0231486558914185, "learning_rate": 2e-05, "loss": 0.03974038, "step": 21799 }, { "epoch": 43.6, "grad_norm": 1.3388340473175049, "learning_rate": 2e-05, "loss": 0.05395049, "step": 21800 }, { "epoch": 43.602, "grad_norm": 1.15225088596344, "learning_rate": 2e-05, "loss": 0.05144007, "step": 21801 }, { "epoch": 43.604, "grad_norm": 1.2700670957565308, "learning_rate": 2e-05, "loss": 0.05085467, "step": 21802 }, { "epoch": 43.606, "grad_norm": 1.434041976928711, "learning_rate": 2e-05, "loss": 0.05072808, "step": 21803 }, { "epoch": 43.608, "grad_norm": 1.410075306892395, "learning_rate": 2e-05, "loss": 0.04461277, "step": 21804 }, { "epoch": 43.61, "grad_norm": 1.0994086265563965, "learning_rate": 2e-05, "loss": 0.03965208, "step": 21805 }, { "epoch": 43.612, "grad_norm": 1.2478328943252563, "learning_rate": 2e-05, "loss": 0.05292454, "step": 21806 }, { "epoch": 43.614, "grad_norm": 1.4595047235488892, "learning_rate": 2e-05, "loss": 0.05401939, "step": 21807 }, { "epoch": 43.616, "grad_norm": 1.2957178354263306, "learning_rate": 2e-05, "loss": 0.05286802, "step": 21808 }, { "epoch": 43.618, "grad_norm": 1.0460758209228516, "learning_rate": 2e-05, "loss": 0.03856381, "step": 21809 }, { "epoch": 43.62, "grad_norm": 1.4046540260314941, "learning_rate": 2e-05, "loss": 0.04930651, "step": 21810 }, { "epoch": 43.622, "grad_norm": 0.9765205383300781, "learning_rate": 2e-05, "loss": 0.03233945, "step": 21811 }, { "epoch": 43.624, "grad_norm": 1.4802238941192627, "learning_rate": 2e-05, "loss": 0.05282279, "step": 21812 }, { "epoch": 43.626, "grad_norm": 1.6739566326141357, "learning_rate": 2e-05, "loss": 0.04126877, "step": 21813 }, { "epoch": 43.628, "grad_norm": 1.0595064163208008, "learning_rate": 2e-05, "loss": 0.04565667, "step": 21814 }, { "epoch": 43.63, "grad_norm": 1.4827672243118286, "learning_rate": 2e-05, "loss": 0.03800133, "step": 21815 }, { "epoch": 43.632, "grad_norm": 1.0282008647918701, "learning_rate": 2e-05, "loss": 0.03047697, "step": 21816 }, { "epoch": 43.634, "grad_norm": 2.9951870441436768, "learning_rate": 2e-05, "loss": 0.03744018, "step": 21817 }, { "epoch": 43.636, "grad_norm": 1.112250566482544, "learning_rate": 2e-05, "loss": 0.04401822, "step": 21818 }, { "epoch": 43.638, "grad_norm": 1.140373706817627, "learning_rate": 2e-05, "loss": 0.0461672, "step": 21819 }, { "epoch": 43.64, "grad_norm": 1.1561338901519775, "learning_rate": 2e-05, "loss": 0.0520758, "step": 21820 }, { "epoch": 43.642, "grad_norm": 1.1361647844314575, "learning_rate": 2e-05, "loss": 0.04934377, "step": 21821 }, { "epoch": 43.644, "grad_norm": 0.7384290099143982, "learning_rate": 2e-05, "loss": 0.02234074, "step": 21822 }, { "epoch": 43.646, "grad_norm": 1.0414352416992188, "learning_rate": 2e-05, "loss": 0.03685948, "step": 21823 }, { "epoch": 43.648, "grad_norm": 1.0054208040237427, "learning_rate": 2e-05, "loss": 0.03611515, "step": 21824 }, { "epoch": 43.65, "grad_norm": 1.6434670686721802, "learning_rate": 2e-05, "loss": 0.04983237, "step": 21825 }, { "epoch": 43.652, "grad_norm": 1.1644682884216309, "learning_rate": 2e-05, "loss": 0.04886763, "step": 21826 }, { "epoch": 43.654, "grad_norm": 2.3917458057403564, "learning_rate": 2e-05, "loss": 0.07453853, "step": 21827 }, { "epoch": 43.656, "grad_norm": 1.2234466075897217, "learning_rate": 2e-05, "loss": 0.05833396, "step": 21828 }, { "epoch": 43.658, "grad_norm": 1.1793771982192993, "learning_rate": 2e-05, "loss": 0.04483267, "step": 21829 }, { "epoch": 43.66, "grad_norm": 1.5220239162445068, "learning_rate": 2e-05, "loss": 0.06673879, "step": 21830 }, { "epoch": 43.662, "grad_norm": 1.2569732666015625, "learning_rate": 2e-05, "loss": 0.04199339, "step": 21831 }, { "epoch": 43.664, "grad_norm": 1.400726079940796, "learning_rate": 2e-05, "loss": 0.04842927, "step": 21832 }, { "epoch": 43.666, "grad_norm": 1.587497591972351, "learning_rate": 2e-05, "loss": 0.06091474, "step": 21833 }, { "epoch": 43.668, "grad_norm": 2.467782735824585, "learning_rate": 2e-05, "loss": 0.057216, "step": 21834 }, { "epoch": 43.67, "grad_norm": 1.1979477405548096, "learning_rate": 2e-05, "loss": 0.05099071, "step": 21835 }, { "epoch": 43.672, "grad_norm": 1.6900266408920288, "learning_rate": 2e-05, "loss": 0.04967137, "step": 21836 }, { "epoch": 43.674, "grad_norm": 1.1722626686096191, "learning_rate": 2e-05, "loss": 0.0484486, "step": 21837 }, { "epoch": 43.676, "grad_norm": 1.4999947547912598, "learning_rate": 2e-05, "loss": 0.06908549, "step": 21838 }, { "epoch": 43.678, "grad_norm": 1.2642544507980347, "learning_rate": 2e-05, "loss": 0.04293026, "step": 21839 }, { "epoch": 43.68, "grad_norm": 1.0956248044967651, "learning_rate": 2e-05, "loss": 0.03712745, "step": 21840 }, { "epoch": 43.682, "grad_norm": 2.061314344406128, "learning_rate": 2e-05, "loss": 0.05671034, "step": 21841 }, { "epoch": 43.684, "grad_norm": 1.2066631317138672, "learning_rate": 2e-05, "loss": 0.05251576, "step": 21842 }, { "epoch": 43.686, "grad_norm": 1.1805452108383179, "learning_rate": 2e-05, "loss": 0.05300939, "step": 21843 }, { "epoch": 43.688, "grad_norm": 1.5431995391845703, "learning_rate": 2e-05, "loss": 0.06416997, "step": 21844 }, { "epoch": 43.69, "grad_norm": 0.9500445127487183, "learning_rate": 2e-05, "loss": 0.02867823, "step": 21845 }, { "epoch": 43.692, "grad_norm": 1.0574393272399902, "learning_rate": 2e-05, "loss": 0.03581215, "step": 21846 }, { "epoch": 43.694, "grad_norm": 1.081818699836731, "learning_rate": 2e-05, "loss": 0.03437539, "step": 21847 }, { "epoch": 43.696, "grad_norm": 1.1467571258544922, "learning_rate": 2e-05, "loss": 0.04538688, "step": 21848 }, { "epoch": 43.698, "grad_norm": 1.7992892265319824, "learning_rate": 2e-05, "loss": 0.05461973, "step": 21849 }, { "epoch": 43.7, "grad_norm": 1.0957931280136108, "learning_rate": 2e-05, "loss": 0.03794325, "step": 21850 }, { "epoch": 43.702, "grad_norm": 1.1432968378067017, "learning_rate": 2e-05, "loss": 0.04415257, "step": 21851 }, { "epoch": 43.704, "grad_norm": 1.2000794410705566, "learning_rate": 2e-05, "loss": 0.03692243, "step": 21852 }, { "epoch": 43.706, "grad_norm": 2.0741629600524902, "learning_rate": 2e-05, "loss": 0.05918703, "step": 21853 }, { "epoch": 43.708, "grad_norm": 5.487260341644287, "learning_rate": 2e-05, "loss": 0.07302981, "step": 21854 }, { "epoch": 43.71, "grad_norm": 1.12295401096344, "learning_rate": 2e-05, "loss": 0.03448126, "step": 21855 }, { "epoch": 43.712, "grad_norm": 1.3459410667419434, "learning_rate": 2e-05, "loss": 0.05415928, "step": 21856 }, { "epoch": 43.714, "grad_norm": 2.604322910308838, "learning_rate": 2e-05, "loss": 0.0712513, "step": 21857 }, { "epoch": 43.716, "grad_norm": 1.216117024421692, "learning_rate": 2e-05, "loss": 0.04590901, "step": 21858 }, { "epoch": 43.718, "grad_norm": 1.3675901889801025, "learning_rate": 2e-05, "loss": 0.04854389, "step": 21859 }, { "epoch": 43.72, "grad_norm": 2.20420241355896, "learning_rate": 2e-05, "loss": 0.05691512, "step": 21860 }, { "epoch": 43.722, "grad_norm": 1.0691967010498047, "learning_rate": 2e-05, "loss": 0.04248612, "step": 21861 }, { "epoch": 43.724, "grad_norm": 1.1441617012023926, "learning_rate": 2e-05, "loss": 0.04443025, "step": 21862 }, { "epoch": 43.726, "grad_norm": 1.215785264968872, "learning_rate": 2e-05, "loss": 0.04109699, "step": 21863 }, { "epoch": 43.728, "grad_norm": 1.1096992492675781, "learning_rate": 2e-05, "loss": 0.04342065, "step": 21864 }, { "epoch": 43.73, "grad_norm": 1.2204557657241821, "learning_rate": 2e-05, "loss": 0.04820478, "step": 21865 }, { "epoch": 43.732, "grad_norm": 1.8152563571929932, "learning_rate": 2e-05, "loss": 0.04331843, "step": 21866 }, { "epoch": 43.734, "grad_norm": 1.228575587272644, "learning_rate": 2e-05, "loss": 0.0453392, "step": 21867 }, { "epoch": 43.736, "grad_norm": 1.2545289993286133, "learning_rate": 2e-05, "loss": 0.05211987, "step": 21868 }, { "epoch": 43.738, "grad_norm": 1.061532974243164, "learning_rate": 2e-05, "loss": 0.03736347, "step": 21869 }, { "epoch": 43.74, "grad_norm": 1.4098767042160034, "learning_rate": 2e-05, "loss": 0.04484908, "step": 21870 }, { "epoch": 43.742, "grad_norm": 1.2383657693862915, "learning_rate": 2e-05, "loss": 0.05023118, "step": 21871 }, { "epoch": 43.744, "grad_norm": 1.0549107789993286, "learning_rate": 2e-05, "loss": 0.03575772, "step": 21872 }, { "epoch": 43.746, "grad_norm": 1.266374945640564, "learning_rate": 2e-05, "loss": 0.05691652, "step": 21873 }, { "epoch": 43.748, "grad_norm": 1.2940677404403687, "learning_rate": 2e-05, "loss": 0.05207343, "step": 21874 }, { "epoch": 43.75, "grad_norm": 1.380552887916565, "learning_rate": 2e-05, "loss": 0.03950589, "step": 21875 }, { "epoch": 43.752, "grad_norm": 1.1680132150650024, "learning_rate": 2e-05, "loss": 0.0435213, "step": 21876 }, { "epoch": 43.754, "grad_norm": 2.5544819831848145, "learning_rate": 2e-05, "loss": 0.0586822, "step": 21877 }, { "epoch": 43.756, "grad_norm": 1.1925801038742065, "learning_rate": 2e-05, "loss": 0.0523452, "step": 21878 }, { "epoch": 43.758, "grad_norm": 1.2226167917251587, "learning_rate": 2e-05, "loss": 0.04687249, "step": 21879 }, { "epoch": 43.76, "grad_norm": 1.0646800994873047, "learning_rate": 2e-05, "loss": 0.04498032, "step": 21880 }, { "epoch": 43.762, "grad_norm": 1.3412402868270874, "learning_rate": 2e-05, "loss": 0.05164725, "step": 21881 }, { "epoch": 43.764, "grad_norm": 1.0574461221694946, "learning_rate": 2e-05, "loss": 0.0448842, "step": 21882 }, { "epoch": 43.766, "grad_norm": 1.0488017797470093, "learning_rate": 2e-05, "loss": 0.02794564, "step": 21883 }, { "epoch": 43.768, "grad_norm": 1.2221869230270386, "learning_rate": 2e-05, "loss": 0.05013753, "step": 21884 }, { "epoch": 43.77, "grad_norm": 1.2905614376068115, "learning_rate": 2e-05, "loss": 0.04898647, "step": 21885 }, { "epoch": 43.772, "grad_norm": 1.3449629545211792, "learning_rate": 2e-05, "loss": 0.05343453, "step": 21886 }, { "epoch": 43.774, "grad_norm": 1.0985403060913086, "learning_rate": 2e-05, "loss": 0.04567765, "step": 21887 }, { "epoch": 43.776, "grad_norm": 1.7854421138763428, "learning_rate": 2e-05, "loss": 0.05000356, "step": 21888 }, { "epoch": 43.778, "grad_norm": 0.9867087602615356, "learning_rate": 2e-05, "loss": 0.03435191, "step": 21889 }, { "epoch": 43.78, "grad_norm": 1.1816110610961914, "learning_rate": 2e-05, "loss": 0.04493886, "step": 21890 }, { "epoch": 43.782, "grad_norm": 1.8454886674880981, "learning_rate": 2e-05, "loss": 0.07082605, "step": 21891 }, { "epoch": 43.784, "grad_norm": 1.6479289531707764, "learning_rate": 2e-05, "loss": 0.05434608, "step": 21892 }, { "epoch": 43.786, "grad_norm": 1.2260222434997559, "learning_rate": 2e-05, "loss": 0.0397054, "step": 21893 }, { "epoch": 43.788, "grad_norm": 1.0151551961898804, "learning_rate": 2e-05, "loss": 0.03879777, "step": 21894 }, { "epoch": 43.79, "grad_norm": 1.233112096786499, "learning_rate": 2e-05, "loss": 0.04530869, "step": 21895 }, { "epoch": 43.792, "grad_norm": 1.4653815031051636, "learning_rate": 2e-05, "loss": 0.06113261, "step": 21896 }, { "epoch": 43.794, "grad_norm": 1.832763671875, "learning_rate": 2e-05, "loss": 0.06107483, "step": 21897 }, { "epoch": 43.796, "grad_norm": 1.1519736051559448, "learning_rate": 2e-05, "loss": 0.04647954, "step": 21898 }, { "epoch": 43.798, "grad_norm": 1.4427329301834106, "learning_rate": 2e-05, "loss": 0.0451367, "step": 21899 }, { "epoch": 43.8, "grad_norm": 1.0031224489212036, "learning_rate": 2e-05, "loss": 0.04659247, "step": 21900 }, { "epoch": 43.802, "grad_norm": 1.3513147830963135, "learning_rate": 2e-05, "loss": 0.03858761, "step": 21901 }, { "epoch": 43.804, "grad_norm": 1.4595290422439575, "learning_rate": 2e-05, "loss": 0.04975832, "step": 21902 }, { "epoch": 43.806, "grad_norm": 1.4404231309890747, "learning_rate": 2e-05, "loss": 0.04388293, "step": 21903 }, { "epoch": 43.808, "grad_norm": 1.1193671226501465, "learning_rate": 2e-05, "loss": 0.0433252, "step": 21904 }, { "epoch": 43.81, "grad_norm": 1.7537304162979126, "learning_rate": 2e-05, "loss": 0.05322967, "step": 21905 }, { "epoch": 43.812, "grad_norm": 1.0735844373703003, "learning_rate": 2e-05, "loss": 0.03970418, "step": 21906 }, { "epoch": 43.814, "grad_norm": 1.533126711845398, "learning_rate": 2e-05, "loss": 0.05987874, "step": 21907 }, { "epoch": 43.816, "grad_norm": 1.0501418113708496, "learning_rate": 2e-05, "loss": 0.03239897, "step": 21908 }, { "epoch": 43.818, "grad_norm": 1.326298475265503, "learning_rate": 2e-05, "loss": 0.05782353, "step": 21909 }, { "epoch": 43.82, "grad_norm": 1.2999595403671265, "learning_rate": 2e-05, "loss": 0.04503709, "step": 21910 }, { "epoch": 43.822, "grad_norm": 1.109490156173706, "learning_rate": 2e-05, "loss": 0.0462186, "step": 21911 }, { "epoch": 43.824, "grad_norm": 1.1068756580352783, "learning_rate": 2e-05, "loss": 0.03144252, "step": 21912 }, { "epoch": 43.826, "grad_norm": 1.1656792163848877, "learning_rate": 2e-05, "loss": 0.04489972, "step": 21913 }, { "epoch": 43.828, "grad_norm": 1.3886311054229736, "learning_rate": 2e-05, "loss": 0.04568454, "step": 21914 }, { "epoch": 43.83, "grad_norm": 1.2280189990997314, "learning_rate": 2e-05, "loss": 0.04975533, "step": 21915 }, { "epoch": 43.832, "grad_norm": 1.2794169187545776, "learning_rate": 2e-05, "loss": 0.04802956, "step": 21916 }, { "epoch": 43.834, "grad_norm": 1.431793451309204, "learning_rate": 2e-05, "loss": 0.0503118, "step": 21917 }, { "epoch": 43.836, "grad_norm": 1.1943049430847168, "learning_rate": 2e-05, "loss": 0.05117747, "step": 21918 }, { "epoch": 43.838, "grad_norm": 1.1528602838516235, "learning_rate": 2e-05, "loss": 0.03906956, "step": 21919 }, { "epoch": 43.84, "grad_norm": 1.3231438398361206, "learning_rate": 2e-05, "loss": 0.04882731, "step": 21920 }, { "epoch": 43.842, "grad_norm": 1.6413450241088867, "learning_rate": 2e-05, "loss": 0.04453898, "step": 21921 }, { "epoch": 43.844, "grad_norm": 1.188307285308838, "learning_rate": 2e-05, "loss": 0.04830351, "step": 21922 }, { "epoch": 43.846, "grad_norm": 1.5874241590499878, "learning_rate": 2e-05, "loss": 0.05982009, "step": 21923 }, { "epoch": 43.848, "grad_norm": 0.9717968702316284, "learning_rate": 2e-05, "loss": 0.03369679, "step": 21924 }, { "epoch": 43.85, "grad_norm": 1.2073335647583008, "learning_rate": 2e-05, "loss": 0.03507089, "step": 21925 }, { "epoch": 43.852, "grad_norm": 1.133551001548767, "learning_rate": 2e-05, "loss": 0.04241706, "step": 21926 }, { "epoch": 43.854, "grad_norm": 1.2795426845550537, "learning_rate": 2e-05, "loss": 0.04569482, "step": 21927 }, { "epoch": 43.856, "grad_norm": 1.216707706451416, "learning_rate": 2e-05, "loss": 0.05699688, "step": 21928 }, { "epoch": 43.858, "grad_norm": 1.9533652067184448, "learning_rate": 2e-05, "loss": 0.07641211, "step": 21929 }, { "epoch": 43.86, "grad_norm": 1.0922882556915283, "learning_rate": 2e-05, "loss": 0.02667849, "step": 21930 }, { "epoch": 43.862, "grad_norm": 1.0828759670257568, "learning_rate": 2e-05, "loss": 0.04916801, "step": 21931 }, { "epoch": 43.864, "grad_norm": 1.2230288982391357, "learning_rate": 2e-05, "loss": 0.05529765, "step": 21932 }, { "epoch": 43.866, "grad_norm": 1.1177533864974976, "learning_rate": 2e-05, "loss": 0.03495992, "step": 21933 }, { "epoch": 43.868, "grad_norm": 1.0803815126419067, "learning_rate": 2e-05, "loss": 0.04231985, "step": 21934 }, { "epoch": 43.87, "grad_norm": 1.170892357826233, "learning_rate": 2e-05, "loss": 0.04216111, "step": 21935 }, { "epoch": 43.872, "grad_norm": 1.6169553995132446, "learning_rate": 2e-05, "loss": 0.04968107, "step": 21936 }, { "epoch": 43.874, "grad_norm": 1.2540957927703857, "learning_rate": 2e-05, "loss": 0.05158932, "step": 21937 }, { "epoch": 43.876, "grad_norm": 0.9766530394554138, "learning_rate": 2e-05, "loss": 0.02565751, "step": 21938 }, { "epoch": 43.878, "grad_norm": 1.1864181756973267, "learning_rate": 2e-05, "loss": 0.04187727, "step": 21939 }, { "epoch": 43.88, "grad_norm": 1.157779335975647, "learning_rate": 2e-05, "loss": 0.04422162, "step": 21940 }, { "epoch": 43.882, "grad_norm": 3.2718393802642822, "learning_rate": 2e-05, "loss": 0.03786396, "step": 21941 }, { "epoch": 43.884, "grad_norm": 1.7684119939804077, "learning_rate": 2e-05, "loss": 0.04769549, "step": 21942 }, { "epoch": 43.886, "grad_norm": 1.5492713451385498, "learning_rate": 2e-05, "loss": 0.06170796, "step": 21943 }, { "epoch": 43.888, "grad_norm": 1.0928599834442139, "learning_rate": 2e-05, "loss": 0.03716299, "step": 21944 }, { "epoch": 43.89, "grad_norm": 2.3191888332366943, "learning_rate": 2e-05, "loss": 0.06007298, "step": 21945 }, { "epoch": 43.892, "grad_norm": 1.7292722463607788, "learning_rate": 2e-05, "loss": 0.04911011, "step": 21946 }, { "epoch": 43.894, "grad_norm": 1.0400488376617432, "learning_rate": 2e-05, "loss": 0.03599721, "step": 21947 }, { "epoch": 43.896, "grad_norm": 1.2575953006744385, "learning_rate": 2e-05, "loss": 0.05165, "step": 21948 }, { "epoch": 43.898, "grad_norm": 1.1358708143234253, "learning_rate": 2e-05, "loss": 0.04962153, "step": 21949 }, { "epoch": 43.9, "grad_norm": 1.5051313638687134, "learning_rate": 2e-05, "loss": 0.04658557, "step": 21950 }, { "epoch": 43.902, "grad_norm": 0.9277594685554504, "learning_rate": 2e-05, "loss": 0.03316977, "step": 21951 }, { "epoch": 43.904, "grad_norm": 1.3635467290878296, "learning_rate": 2e-05, "loss": 0.05430625, "step": 21952 }, { "epoch": 43.906, "grad_norm": 1.391068458557129, "learning_rate": 2e-05, "loss": 0.06159748, "step": 21953 }, { "epoch": 43.908, "grad_norm": 1.538338303565979, "learning_rate": 2e-05, "loss": 0.04379988, "step": 21954 }, { "epoch": 43.91, "grad_norm": 3.397902250289917, "learning_rate": 2e-05, "loss": 0.06411252, "step": 21955 }, { "epoch": 43.912, "grad_norm": 1.0770752429962158, "learning_rate": 2e-05, "loss": 0.04424539, "step": 21956 }, { "epoch": 43.914, "grad_norm": 1.3942207098007202, "learning_rate": 2e-05, "loss": 0.04902345, "step": 21957 }, { "epoch": 43.916, "grad_norm": 1.1789013147354126, "learning_rate": 2e-05, "loss": 0.04915366, "step": 21958 }, { "epoch": 43.918, "grad_norm": 1.31106698513031, "learning_rate": 2e-05, "loss": 0.03723363, "step": 21959 }, { "epoch": 43.92, "grad_norm": 1.3015276193618774, "learning_rate": 2e-05, "loss": 0.04564174, "step": 21960 }, { "epoch": 43.922, "grad_norm": 1.144248127937317, "learning_rate": 2e-05, "loss": 0.0414278, "step": 21961 }, { "epoch": 43.924, "grad_norm": 1.1458982229232788, "learning_rate": 2e-05, "loss": 0.04920912, "step": 21962 }, { "epoch": 43.926, "grad_norm": 1.6706799268722534, "learning_rate": 2e-05, "loss": 0.04606791, "step": 21963 }, { "epoch": 43.928, "grad_norm": 1.1837114095687866, "learning_rate": 2e-05, "loss": 0.04852568, "step": 21964 }, { "epoch": 43.93, "grad_norm": 1.614950180053711, "learning_rate": 2e-05, "loss": 0.05900152, "step": 21965 }, { "epoch": 43.932, "grad_norm": 1.168605923652649, "learning_rate": 2e-05, "loss": 0.04590261, "step": 21966 }, { "epoch": 43.934, "grad_norm": 1.91372811794281, "learning_rate": 2e-05, "loss": 0.05605849, "step": 21967 }, { "epoch": 43.936, "grad_norm": 1.2598594427108765, "learning_rate": 2e-05, "loss": 0.05659298, "step": 21968 }, { "epoch": 43.938, "grad_norm": 1.135242223739624, "learning_rate": 2e-05, "loss": 0.03806643, "step": 21969 }, { "epoch": 43.94, "grad_norm": 1.2540684938430786, "learning_rate": 2e-05, "loss": 0.04728854, "step": 21970 }, { "epoch": 43.942, "grad_norm": 1.2394566535949707, "learning_rate": 2e-05, "loss": 0.05657537, "step": 21971 }, { "epoch": 43.944, "grad_norm": 1.1225388050079346, "learning_rate": 2e-05, "loss": 0.05487181, "step": 21972 }, { "epoch": 43.946, "grad_norm": 1.3079673051834106, "learning_rate": 2e-05, "loss": 0.04643994, "step": 21973 }, { "epoch": 43.948, "grad_norm": 1.1278849840164185, "learning_rate": 2e-05, "loss": 0.05071158, "step": 21974 }, { "epoch": 43.95, "grad_norm": 1.311438798904419, "learning_rate": 2e-05, "loss": 0.0565824, "step": 21975 }, { "epoch": 43.952, "grad_norm": 1.0105366706848145, "learning_rate": 2e-05, "loss": 0.04423777, "step": 21976 }, { "epoch": 43.954, "grad_norm": 5.449398040771484, "learning_rate": 2e-05, "loss": 0.06142551, "step": 21977 }, { "epoch": 43.956, "grad_norm": 1.5247281789779663, "learning_rate": 2e-05, "loss": 0.05841498, "step": 21978 }, { "epoch": 43.958, "grad_norm": 1.2645142078399658, "learning_rate": 2e-05, "loss": 0.05982171, "step": 21979 }, { "epoch": 43.96, "grad_norm": 1.3923887014389038, "learning_rate": 2e-05, "loss": 0.05867479, "step": 21980 }, { "epoch": 43.962, "grad_norm": 1.2104207277297974, "learning_rate": 2e-05, "loss": 0.05177038, "step": 21981 }, { "epoch": 43.964, "grad_norm": 1.2273800373077393, "learning_rate": 2e-05, "loss": 0.03514304, "step": 21982 }, { "epoch": 43.966, "grad_norm": 1.2372844219207764, "learning_rate": 2e-05, "loss": 0.05396766, "step": 21983 }, { "epoch": 43.968, "grad_norm": 1.1264572143554688, "learning_rate": 2e-05, "loss": 0.03575965, "step": 21984 }, { "epoch": 43.97, "grad_norm": 1.2060368061065674, "learning_rate": 2e-05, "loss": 0.05158073, "step": 21985 }, { "epoch": 43.972, "grad_norm": 1.7094749212265015, "learning_rate": 2e-05, "loss": 0.06007314, "step": 21986 }, { "epoch": 43.974, "grad_norm": 1.0798571109771729, "learning_rate": 2e-05, "loss": 0.05095109, "step": 21987 }, { "epoch": 43.976, "grad_norm": 1.1857688426971436, "learning_rate": 2e-05, "loss": 0.04817619, "step": 21988 }, { "epoch": 43.978, "grad_norm": 1.956149697303772, "learning_rate": 2e-05, "loss": 0.05255031, "step": 21989 }, { "epoch": 43.98, "grad_norm": 1.298830270767212, "learning_rate": 2e-05, "loss": 0.0518288, "step": 21990 }, { "epoch": 43.982, "grad_norm": 2.401399612426758, "learning_rate": 2e-05, "loss": 0.0452053, "step": 21991 }, { "epoch": 43.984, "grad_norm": 1.1325953006744385, "learning_rate": 2e-05, "loss": 0.039924, "step": 21992 }, { "epoch": 43.986, "grad_norm": 1.7556862831115723, "learning_rate": 2e-05, "loss": 0.052302, "step": 21993 }, { "epoch": 43.988, "grad_norm": 1.3267320394515991, "learning_rate": 2e-05, "loss": 0.05184503, "step": 21994 }, { "epoch": 43.99, "grad_norm": 1.1495281457901, "learning_rate": 2e-05, "loss": 0.04765497, "step": 21995 }, { "epoch": 43.992, "grad_norm": 1.6266371011734009, "learning_rate": 2e-05, "loss": 0.05227334, "step": 21996 }, { "epoch": 43.994, "grad_norm": 0.914303719997406, "learning_rate": 2e-05, "loss": 0.02800883, "step": 21997 }, { "epoch": 43.996, "grad_norm": 1.2601372003555298, "learning_rate": 2e-05, "loss": 0.04211642, "step": 21998 }, { "epoch": 43.998, "grad_norm": 1.1143946647644043, "learning_rate": 2e-05, "loss": 0.0415227, "step": 21999 }, { "epoch": 44.0, "grad_norm": 1.461881160736084, "learning_rate": 2e-05, "loss": 0.06111758, "step": 22000 }, { "epoch": 44.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9860279441117764, "Equal_1": 1.0, "Equal_2": 0.9920159680638723, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 1.0, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8937875751503006, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 319.306, "eval_samples_per_second": 32.884, "eval_steps_per_second": 0.658, "step": 22000 }, { "epoch": 44.002, "grad_norm": 1.1951003074645996, "learning_rate": 2e-05, "loss": 0.04387069, "step": 22001 }, { "epoch": 44.004, "grad_norm": 1.2349199056625366, "learning_rate": 2e-05, "loss": 0.04709039, "step": 22002 }, { "epoch": 44.006, "grad_norm": 1.0489506721496582, "learning_rate": 2e-05, "loss": 0.03470477, "step": 22003 }, { "epoch": 44.008, "grad_norm": 1.1739320755004883, "learning_rate": 2e-05, "loss": 0.03277641, "step": 22004 }, { "epoch": 44.01, "grad_norm": 2.5618724822998047, "learning_rate": 2e-05, "loss": 0.0545634, "step": 22005 }, { "epoch": 44.012, "grad_norm": 1.349908471107483, "learning_rate": 2e-05, "loss": 0.04546019, "step": 22006 }, { "epoch": 44.014, "grad_norm": 1.1877539157867432, "learning_rate": 2e-05, "loss": 0.05321749, "step": 22007 }, { "epoch": 44.016, "grad_norm": 1.0178004503250122, "learning_rate": 2e-05, "loss": 0.02961883, "step": 22008 }, { "epoch": 44.018, "grad_norm": 1.0790722370147705, "learning_rate": 2e-05, "loss": 0.04679262, "step": 22009 }, { "epoch": 44.02, "grad_norm": 1.1426124572753906, "learning_rate": 2e-05, "loss": 0.06072292, "step": 22010 }, { "epoch": 44.022, "grad_norm": 1.529797077178955, "learning_rate": 2e-05, "loss": 0.05173718, "step": 22011 }, { "epoch": 44.024, "grad_norm": 1.4179189205169678, "learning_rate": 2e-05, "loss": 0.04669178, "step": 22012 }, { "epoch": 44.026, "grad_norm": 1.4494482278823853, "learning_rate": 2e-05, "loss": 0.04122728, "step": 22013 }, { "epoch": 44.028, "grad_norm": 1.4146836996078491, "learning_rate": 2e-05, "loss": 0.05055712, "step": 22014 }, { "epoch": 44.03, "grad_norm": 1.372752070426941, "learning_rate": 2e-05, "loss": 0.05814591, "step": 22015 }, { "epoch": 44.032, "grad_norm": 1.9753098487854004, "learning_rate": 2e-05, "loss": 0.0562949, "step": 22016 }, { "epoch": 44.034, "grad_norm": 0.9869195222854614, "learning_rate": 2e-05, "loss": 0.04519263, "step": 22017 }, { "epoch": 44.036, "grad_norm": 1.016710638999939, "learning_rate": 2e-05, "loss": 0.03520773, "step": 22018 }, { "epoch": 44.038, "grad_norm": 1.2054697275161743, "learning_rate": 2e-05, "loss": 0.05319132, "step": 22019 }, { "epoch": 44.04, "grad_norm": 1.1351009607315063, "learning_rate": 2e-05, "loss": 0.03978164, "step": 22020 }, { "epoch": 44.042, "grad_norm": 1.0979878902435303, "learning_rate": 2e-05, "loss": 0.03140669, "step": 22021 }, { "epoch": 44.044, "grad_norm": 1.446884036064148, "learning_rate": 2e-05, "loss": 0.05437474, "step": 22022 }, { "epoch": 44.046, "grad_norm": 1.3147832155227661, "learning_rate": 2e-05, "loss": 0.05483336, "step": 22023 }, { "epoch": 44.048, "grad_norm": 1.4436129331588745, "learning_rate": 2e-05, "loss": 0.05707975, "step": 22024 }, { "epoch": 44.05, "grad_norm": 1.2296398878097534, "learning_rate": 2e-05, "loss": 0.05186768, "step": 22025 }, { "epoch": 44.052, "grad_norm": 1.414892554283142, "learning_rate": 2e-05, "loss": 0.04964254, "step": 22026 }, { "epoch": 44.054, "grad_norm": 0.9986278414726257, "learning_rate": 2e-05, "loss": 0.04020878, "step": 22027 }, { "epoch": 44.056, "grad_norm": 1.0219697952270508, "learning_rate": 2e-05, "loss": 0.03773836, "step": 22028 }, { "epoch": 44.058, "grad_norm": 0.9788745045661926, "learning_rate": 2e-05, "loss": 0.03778582, "step": 22029 }, { "epoch": 44.06, "grad_norm": 1.209914207458496, "learning_rate": 2e-05, "loss": 0.03958928, "step": 22030 }, { "epoch": 44.062, "grad_norm": 1.2830939292907715, "learning_rate": 2e-05, "loss": 0.04328164, "step": 22031 }, { "epoch": 44.064, "grad_norm": 1.635453462600708, "learning_rate": 2e-05, "loss": 0.05318267, "step": 22032 }, { "epoch": 44.066, "grad_norm": 0.9193920493125916, "learning_rate": 2e-05, "loss": 0.02856221, "step": 22033 }, { "epoch": 44.068, "grad_norm": 1.4379791021347046, "learning_rate": 2e-05, "loss": 0.05032479, "step": 22034 }, { "epoch": 44.07, "grad_norm": 2.1090707778930664, "learning_rate": 2e-05, "loss": 0.08148749, "step": 22035 }, { "epoch": 44.072, "grad_norm": 1.4268964529037476, "learning_rate": 2e-05, "loss": 0.03891208, "step": 22036 }, { "epoch": 44.074, "grad_norm": 1.3950912952423096, "learning_rate": 2e-05, "loss": 0.03417912, "step": 22037 }, { "epoch": 44.076, "grad_norm": 1.4302080869674683, "learning_rate": 2e-05, "loss": 0.04385927, "step": 22038 }, { "epoch": 44.078, "grad_norm": 1.038596272468567, "learning_rate": 2e-05, "loss": 0.03837661, "step": 22039 }, { "epoch": 44.08, "grad_norm": 2.1667323112487793, "learning_rate": 2e-05, "loss": 0.06082205, "step": 22040 }, { "epoch": 44.082, "grad_norm": 1.3383917808532715, "learning_rate": 2e-05, "loss": 0.05682547, "step": 22041 }, { "epoch": 44.084, "grad_norm": 1.3785979747772217, "learning_rate": 2e-05, "loss": 0.05159343, "step": 22042 }, { "epoch": 44.086, "grad_norm": 1.1491683721542358, "learning_rate": 2e-05, "loss": 0.04531821, "step": 22043 }, { "epoch": 44.088, "grad_norm": 1.113952398300171, "learning_rate": 2e-05, "loss": 0.04688843, "step": 22044 }, { "epoch": 44.09, "grad_norm": 1.2150129079818726, "learning_rate": 2e-05, "loss": 0.03948733, "step": 22045 }, { "epoch": 44.092, "grad_norm": 1.161972999572754, "learning_rate": 2e-05, "loss": 0.03694011, "step": 22046 }, { "epoch": 44.094, "grad_norm": 1.6014569997787476, "learning_rate": 2e-05, "loss": 0.06034542, "step": 22047 }, { "epoch": 44.096, "grad_norm": 1.49050772190094, "learning_rate": 2e-05, "loss": 0.05083546, "step": 22048 }, { "epoch": 44.098, "grad_norm": 1.4574774503707886, "learning_rate": 2e-05, "loss": 0.0556547, "step": 22049 }, { "epoch": 44.1, "grad_norm": 1.13857102394104, "learning_rate": 2e-05, "loss": 0.04332735, "step": 22050 }, { "epoch": 44.102, "grad_norm": 1.2549856901168823, "learning_rate": 2e-05, "loss": 0.05079667, "step": 22051 }, { "epoch": 44.104, "grad_norm": 1.2218992710113525, "learning_rate": 2e-05, "loss": 0.05055429, "step": 22052 }, { "epoch": 44.106, "grad_norm": 1.3079739809036255, "learning_rate": 2e-05, "loss": 0.05005455, "step": 22053 }, { "epoch": 44.108, "grad_norm": 1.148532748222351, "learning_rate": 2e-05, "loss": 0.04688092, "step": 22054 }, { "epoch": 44.11, "grad_norm": 1.397567629814148, "learning_rate": 2e-05, "loss": 0.0634838, "step": 22055 }, { "epoch": 44.112, "grad_norm": 1.166181206703186, "learning_rate": 2e-05, "loss": 0.0324477, "step": 22056 }, { "epoch": 44.114, "grad_norm": 1.310949444770813, "learning_rate": 2e-05, "loss": 0.04780035, "step": 22057 }, { "epoch": 44.116, "grad_norm": 1.1098144054412842, "learning_rate": 2e-05, "loss": 0.03849977, "step": 22058 }, { "epoch": 44.118, "grad_norm": 1.5903892517089844, "learning_rate": 2e-05, "loss": 0.05317472, "step": 22059 }, { "epoch": 44.12, "grad_norm": 1.1845721006393433, "learning_rate": 2e-05, "loss": 0.03562384, "step": 22060 }, { "epoch": 44.122, "grad_norm": 1.2428687810897827, "learning_rate": 2e-05, "loss": 0.05235748, "step": 22061 }, { "epoch": 44.124, "grad_norm": 1.2767870426177979, "learning_rate": 2e-05, "loss": 0.05173537, "step": 22062 }, { "epoch": 44.126, "grad_norm": 1.3087096214294434, "learning_rate": 2e-05, "loss": 0.05469508, "step": 22063 }, { "epoch": 44.128, "grad_norm": 1.2753592729568481, "learning_rate": 2e-05, "loss": 0.05185987, "step": 22064 }, { "epoch": 44.13, "grad_norm": 1.3028455972671509, "learning_rate": 2e-05, "loss": 0.05058619, "step": 22065 }, { "epoch": 44.132, "grad_norm": 1.2384607791900635, "learning_rate": 2e-05, "loss": 0.04439488, "step": 22066 }, { "epoch": 44.134, "grad_norm": 1.494373083114624, "learning_rate": 2e-05, "loss": 0.06326284, "step": 22067 }, { "epoch": 44.136, "grad_norm": 4.574721813201904, "learning_rate": 2e-05, "loss": 0.0534743, "step": 22068 }, { "epoch": 44.138, "grad_norm": 1.5381698608398438, "learning_rate": 2e-05, "loss": 0.06085927, "step": 22069 }, { "epoch": 44.14, "grad_norm": 1.221481442451477, "learning_rate": 2e-05, "loss": 0.04375359, "step": 22070 }, { "epoch": 44.142, "grad_norm": 1.3349063396453857, "learning_rate": 2e-05, "loss": 0.04402313, "step": 22071 }, { "epoch": 44.144, "grad_norm": 1.3375403881072998, "learning_rate": 2e-05, "loss": 0.05106883, "step": 22072 }, { "epoch": 44.146, "grad_norm": 1.1496771574020386, "learning_rate": 2e-05, "loss": 0.04525649, "step": 22073 }, { "epoch": 44.148, "grad_norm": 1.2824701070785522, "learning_rate": 2e-05, "loss": 0.04934424, "step": 22074 }, { "epoch": 44.15, "grad_norm": 1.1325626373291016, "learning_rate": 2e-05, "loss": 0.0284467, "step": 22075 }, { "epoch": 44.152, "grad_norm": 1.3874157667160034, "learning_rate": 2e-05, "loss": 0.04672045, "step": 22076 }, { "epoch": 44.154, "grad_norm": 3.2082982063293457, "learning_rate": 2e-05, "loss": 0.0601771, "step": 22077 }, { "epoch": 44.156, "grad_norm": 2.588113784790039, "learning_rate": 2e-05, "loss": 0.07006887, "step": 22078 }, { "epoch": 44.158, "grad_norm": 1.3412790298461914, "learning_rate": 2e-05, "loss": 0.04630299, "step": 22079 }, { "epoch": 44.16, "grad_norm": 1.2379393577575684, "learning_rate": 2e-05, "loss": 0.04339472, "step": 22080 }, { "epoch": 44.162, "grad_norm": 1.1797010898590088, "learning_rate": 2e-05, "loss": 0.0429916, "step": 22081 }, { "epoch": 44.164, "grad_norm": 3.3057401180267334, "learning_rate": 2e-05, "loss": 0.04493307, "step": 22082 }, { "epoch": 44.166, "grad_norm": 1.1744298934936523, "learning_rate": 2e-05, "loss": 0.03810238, "step": 22083 }, { "epoch": 44.168, "grad_norm": 1.669509768486023, "learning_rate": 2e-05, "loss": 0.06496652, "step": 22084 }, { "epoch": 44.17, "grad_norm": 1.5125724077224731, "learning_rate": 2e-05, "loss": 0.04929143, "step": 22085 }, { "epoch": 44.172, "grad_norm": 1.4067198038101196, "learning_rate": 2e-05, "loss": 0.05213205, "step": 22086 }, { "epoch": 44.174, "grad_norm": 1.189210057258606, "learning_rate": 2e-05, "loss": 0.04036959, "step": 22087 }, { "epoch": 44.176, "grad_norm": 1.154815673828125, "learning_rate": 2e-05, "loss": 0.04020704, "step": 22088 }, { "epoch": 44.178, "grad_norm": 1.233259677886963, "learning_rate": 2e-05, "loss": 0.05813593, "step": 22089 }, { "epoch": 44.18, "grad_norm": 1.2222100496292114, "learning_rate": 2e-05, "loss": 0.04773624, "step": 22090 }, { "epoch": 44.182, "grad_norm": 1.3617769479751587, "learning_rate": 2e-05, "loss": 0.04983572, "step": 22091 }, { "epoch": 44.184, "grad_norm": 1.1668729782104492, "learning_rate": 2e-05, "loss": 0.04586762, "step": 22092 }, { "epoch": 44.186, "grad_norm": 1.3036890029907227, "learning_rate": 2e-05, "loss": 0.05323526, "step": 22093 }, { "epoch": 44.188, "grad_norm": 3.2073822021484375, "learning_rate": 2e-05, "loss": 0.04346972, "step": 22094 }, { "epoch": 44.19, "grad_norm": 1.1499048471450806, "learning_rate": 2e-05, "loss": 0.05191308, "step": 22095 }, { "epoch": 44.192, "grad_norm": 1.2350811958312988, "learning_rate": 2e-05, "loss": 0.04665541, "step": 22096 }, { "epoch": 44.194, "grad_norm": 1.1341971158981323, "learning_rate": 2e-05, "loss": 0.03508368, "step": 22097 }, { "epoch": 44.196, "grad_norm": 1.3536897897720337, "learning_rate": 2e-05, "loss": 0.04077377, "step": 22098 }, { "epoch": 44.198, "grad_norm": 1.0278751850128174, "learning_rate": 2e-05, "loss": 0.03572151, "step": 22099 }, { "epoch": 44.2, "grad_norm": 1.1718095541000366, "learning_rate": 2e-05, "loss": 0.04011806, "step": 22100 }, { "epoch": 44.202, "grad_norm": 1.184627652168274, "learning_rate": 2e-05, "loss": 0.04278344, "step": 22101 }, { "epoch": 44.204, "grad_norm": 1.8265912532806396, "learning_rate": 2e-05, "loss": 0.04747808, "step": 22102 }, { "epoch": 44.206, "grad_norm": 1.1171393394470215, "learning_rate": 2e-05, "loss": 0.04222995, "step": 22103 }, { "epoch": 44.208, "grad_norm": 1.0637423992156982, "learning_rate": 2e-05, "loss": 0.04015394, "step": 22104 }, { "epoch": 44.21, "grad_norm": 1.0722296237945557, "learning_rate": 2e-05, "loss": 0.04177207, "step": 22105 }, { "epoch": 44.212, "grad_norm": 1.208267092704773, "learning_rate": 2e-05, "loss": 0.04094924, "step": 22106 }, { "epoch": 44.214, "grad_norm": 2.877411365509033, "learning_rate": 2e-05, "loss": 0.05537339, "step": 22107 }, { "epoch": 44.216, "grad_norm": 3.210109233856201, "learning_rate": 2e-05, "loss": 0.06678712, "step": 22108 }, { "epoch": 44.218, "grad_norm": 1.6754549741744995, "learning_rate": 2e-05, "loss": 0.05157789, "step": 22109 }, { "epoch": 44.22, "grad_norm": 1.1392402648925781, "learning_rate": 2e-05, "loss": 0.04350193, "step": 22110 }, { "epoch": 44.222, "grad_norm": 1.8385316133499146, "learning_rate": 2e-05, "loss": 0.03925511, "step": 22111 }, { "epoch": 44.224, "grad_norm": 1.453076958656311, "learning_rate": 2e-05, "loss": 0.05029003, "step": 22112 }, { "epoch": 44.226, "grad_norm": 1.1068811416625977, "learning_rate": 2e-05, "loss": 0.05229628, "step": 22113 }, { "epoch": 44.228, "grad_norm": 1.3013540506362915, "learning_rate": 2e-05, "loss": 0.05385014, "step": 22114 }, { "epoch": 44.23, "grad_norm": 1.258337140083313, "learning_rate": 2e-05, "loss": 0.04877231, "step": 22115 }, { "epoch": 44.232, "grad_norm": 1.5271419286727905, "learning_rate": 2e-05, "loss": 0.0496937, "step": 22116 }, { "epoch": 44.234, "grad_norm": 1.1020830869674683, "learning_rate": 2e-05, "loss": 0.0386355, "step": 22117 }, { "epoch": 44.236, "grad_norm": 0.9935838580131531, "learning_rate": 2e-05, "loss": 0.02613112, "step": 22118 }, { "epoch": 44.238, "grad_norm": 1.5669888257980347, "learning_rate": 2e-05, "loss": 0.05612132, "step": 22119 }, { "epoch": 44.24, "grad_norm": 1.1000120639801025, "learning_rate": 2e-05, "loss": 0.041154, "step": 22120 }, { "epoch": 44.242, "grad_norm": 1.1130436658859253, "learning_rate": 2e-05, "loss": 0.03398903, "step": 22121 }, { "epoch": 44.244, "grad_norm": 1.3796424865722656, "learning_rate": 2e-05, "loss": 0.05808487, "step": 22122 }, { "epoch": 44.246, "grad_norm": 1.7627161741256714, "learning_rate": 2e-05, "loss": 0.05050261, "step": 22123 }, { "epoch": 44.248, "grad_norm": 1.2039992809295654, "learning_rate": 2e-05, "loss": 0.0452828, "step": 22124 }, { "epoch": 44.25, "grad_norm": 1.6339401006698608, "learning_rate": 2e-05, "loss": 0.07191573, "step": 22125 }, { "epoch": 44.252, "grad_norm": 1.870229959487915, "learning_rate": 2e-05, "loss": 0.04964413, "step": 22126 }, { "epoch": 44.254, "grad_norm": 1.4508622884750366, "learning_rate": 2e-05, "loss": 0.05981822, "step": 22127 }, { "epoch": 44.256, "grad_norm": 1.3774610757827759, "learning_rate": 2e-05, "loss": 0.04445232, "step": 22128 }, { "epoch": 44.258, "grad_norm": 1.1424156427383423, "learning_rate": 2e-05, "loss": 0.04557105, "step": 22129 }, { "epoch": 44.26, "grad_norm": 1.0394034385681152, "learning_rate": 2e-05, "loss": 0.0336099, "step": 22130 }, { "epoch": 44.262, "grad_norm": 1.2787925004959106, "learning_rate": 2e-05, "loss": 0.04919507, "step": 22131 }, { "epoch": 44.264, "grad_norm": 0.9578052759170532, "learning_rate": 2e-05, "loss": 0.03036173, "step": 22132 }, { "epoch": 44.266, "grad_norm": 1.0824671983718872, "learning_rate": 2e-05, "loss": 0.04263349, "step": 22133 }, { "epoch": 44.268, "grad_norm": 1.273948311805725, "learning_rate": 2e-05, "loss": 0.03732668, "step": 22134 }, { "epoch": 44.27, "grad_norm": 1.1770116090774536, "learning_rate": 2e-05, "loss": 0.04749018, "step": 22135 }, { "epoch": 44.272, "grad_norm": 1.3313310146331787, "learning_rate": 2e-05, "loss": 0.04976882, "step": 22136 }, { "epoch": 44.274, "grad_norm": 2.539590358734131, "learning_rate": 2e-05, "loss": 0.04594205, "step": 22137 }, { "epoch": 44.276, "grad_norm": 1.6731163263320923, "learning_rate": 2e-05, "loss": 0.06411099, "step": 22138 }, { "epoch": 44.278, "grad_norm": 1.0700182914733887, "learning_rate": 2e-05, "loss": 0.03831624, "step": 22139 }, { "epoch": 44.28, "grad_norm": 1.190203309059143, "learning_rate": 2e-05, "loss": 0.04036564, "step": 22140 }, { "epoch": 44.282, "grad_norm": 3.227332830429077, "learning_rate": 2e-05, "loss": 0.06097856, "step": 22141 }, { "epoch": 44.284, "grad_norm": 1.274222493171692, "learning_rate": 2e-05, "loss": 0.04809301, "step": 22142 }, { "epoch": 44.286, "grad_norm": 1.2631959915161133, "learning_rate": 2e-05, "loss": 0.0570578, "step": 22143 }, { "epoch": 44.288, "grad_norm": 1.4169923067092896, "learning_rate": 2e-05, "loss": 0.05213282, "step": 22144 }, { "epoch": 44.29, "grad_norm": 1.2868964672088623, "learning_rate": 2e-05, "loss": 0.05694466, "step": 22145 }, { "epoch": 44.292, "grad_norm": 1.0576223134994507, "learning_rate": 2e-05, "loss": 0.04163115, "step": 22146 }, { "epoch": 44.294, "grad_norm": 2.8826777935028076, "learning_rate": 2e-05, "loss": 0.05488094, "step": 22147 }, { "epoch": 44.296, "grad_norm": 1.1898715496063232, "learning_rate": 2e-05, "loss": 0.05081913, "step": 22148 }, { "epoch": 44.298, "grad_norm": 1.2379268407821655, "learning_rate": 2e-05, "loss": 0.05061274, "step": 22149 }, { "epoch": 44.3, "grad_norm": 1.355218529701233, "learning_rate": 2e-05, "loss": 0.05804717, "step": 22150 }, { "epoch": 44.302, "grad_norm": 1.3628958463668823, "learning_rate": 2e-05, "loss": 0.04630186, "step": 22151 }, { "epoch": 44.304, "grad_norm": 1.064475178718567, "learning_rate": 2e-05, "loss": 0.0363181, "step": 22152 }, { "epoch": 44.306, "grad_norm": 1.3461925983428955, "learning_rate": 2e-05, "loss": 0.03945461, "step": 22153 }, { "epoch": 44.308, "grad_norm": 1.2384178638458252, "learning_rate": 2e-05, "loss": 0.03994835, "step": 22154 }, { "epoch": 44.31, "grad_norm": 1.2434017658233643, "learning_rate": 2e-05, "loss": 0.05772359, "step": 22155 }, { "epoch": 44.312, "grad_norm": 1.719817876815796, "learning_rate": 2e-05, "loss": 0.04721334, "step": 22156 }, { "epoch": 44.314, "grad_norm": 1.0811158418655396, "learning_rate": 2e-05, "loss": 0.05218527, "step": 22157 }, { "epoch": 44.316, "grad_norm": 1.283691644668579, "learning_rate": 2e-05, "loss": 0.05287713, "step": 22158 }, { "epoch": 44.318, "grad_norm": 1.1990177631378174, "learning_rate": 2e-05, "loss": 0.04951636, "step": 22159 }, { "epoch": 44.32, "grad_norm": 2.536614418029785, "learning_rate": 2e-05, "loss": 0.04560195, "step": 22160 }, { "epoch": 44.322, "grad_norm": 1.0507843494415283, "learning_rate": 2e-05, "loss": 0.03920343, "step": 22161 }, { "epoch": 44.324, "grad_norm": 1.4071505069732666, "learning_rate": 2e-05, "loss": 0.05998628, "step": 22162 }, { "epoch": 44.326, "grad_norm": 1.6779695749282837, "learning_rate": 2e-05, "loss": 0.05833928, "step": 22163 }, { "epoch": 44.328, "grad_norm": 1.0807090997695923, "learning_rate": 2e-05, "loss": 0.04173718, "step": 22164 }, { "epoch": 44.33, "grad_norm": 1.7728112936019897, "learning_rate": 2e-05, "loss": 0.05153607, "step": 22165 }, { "epoch": 44.332, "grad_norm": 1.454960584640503, "learning_rate": 2e-05, "loss": 0.05345749, "step": 22166 }, { "epoch": 44.334, "grad_norm": 1.146530032157898, "learning_rate": 2e-05, "loss": 0.05334022, "step": 22167 }, { "epoch": 44.336, "grad_norm": 1.7574084997177124, "learning_rate": 2e-05, "loss": 0.03719595, "step": 22168 }, { "epoch": 44.338, "grad_norm": 1.4816433191299438, "learning_rate": 2e-05, "loss": 0.07536916, "step": 22169 }, { "epoch": 44.34, "grad_norm": 1.3355191946029663, "learning_rate": 2e-05, "loss": 0.04837946, "step": 22170 }, { "epoch": 44.342, "grad_norm": 1.2563939094543457, "learning_rate": 2e-05, "loss": 0.04800646, "step": 22171 }, { "epoch": 44.344, "grad_norm": 1.0994011163711548, "learning_rate": 2e-05, "loss": 0.03690449, "step": 22172 }, { "epoch": 44.346, "grad_norm": 1.6743828058242798, "learning_rate": 2e-05, "loss": 0.04229587, "step": 22173 }, { "epoch": 44.348, "grad_norm": 1.1569823026657104, "learning_rate": 2e-05, "loss": 0.06016696, "step": 22174 }, { "epoch": 44.35, "grad_norm": 1.8598023653030396, "learning_rate": 2e-05, "loss": 0.06339575, "step": 22175 }, { "epoch": 44.352, "grad_norm": 1.2611335515975952, "learning_rate": 2e-05, "loss": 0.05222043, "step": 22176 }, { "epoch": 44.354, "grad_norm": 0.9472651481628418, "learning_rate": 2e-05, "loss": 0.03059671, "step": 22177 }, { "epoch": 44.356, "grad_norm": 1.521095633506775, "learning_rate": 2e-05, "loss": 0.05748924, "step": 22178 }, { "epoch": 44.358, "grad_norm": 1.19306218624115, "learning_rate": 2e-05, "loss": 0.04892451, "step": 22179 }, { "epoch": 44.36, "grad_norm": 1.12234628200531, "learning_rate": 2e-05, "loss": 0.0413163, "step": 22180 }, { "epoch": 44.362, "grad_norm": 1.408831000328064, "learning_rate": 2e-05, "loss": 0.04378711, "step": 22181 }, { "epoch": 44.364, "grad_norm": 1.1348880529403687, "learning_rate": 2e-05, "loss": 0.04664857, "step": 22182 }, { "epoch": 44.366, "grad_norm": 1.254754662513733, "learning_rate": 2e-05, "loss": 0.03291727, "step": 22183 }, { "epoch": 44.368, "grad_norm": 2.050689458847046, "learning_rate": 2e-05, "loss": 0.06108903, "step": 22184 }, { "epoch": 44.37, "grad_norm": 1.1247540712356567, "learning_rate": 2e-05, "loss": 0.04102144, "step": 22185 }, { "epoch": 44.372, "grad_norm": 1.2489968538284302, "learning_rate": 2e-05, "loss": 0.04336529, "step": 22186 }, { "epoch": 44.374, "grad_norm": 1.2883358001708984, "learning_rate": 2e-05, "loss": 0.04382469, "step": 22187 }, { "epoch": 44.376, "grad_norm": 1.5653047561645508, "learning_rate": 2e-05, "loss": 0.05455358, "step": 22188 }, { "epoch": 44.378, "grad_norm": 1.1407490968704224, "learning_rate": 2e-05, "loss": 0.04385475, "step": 22189 }, { "epoch": 44.38, "grad_norm": 0.9423237442970276, "learning_rate": 2e-05, "loss": 0.03583224, "step": 22190 }, { "epoch": 44.382, "grad_norm": 1.5692898035049438, "learning_rate": 2e-05, "loss": 0.06093635, "step": 22191 }, { "epoch": 44.384, "grad_norm": 1.2798019647598267, "learning_rate": 2e-05, "loss": 0.04287341, "step": 22192 }, { "epoch": 44.386, "grad_norm": 2.308946132659912, "learning_rate": 2e-05, "loss": 0.0538884, "step": 22193 }, { "epoch": 44.388, "grad_norm": 1.9377248287200928, "learning_rate": 2e-05, "loss": 0.06390283, "step": 22194 }, { "epoch": 44.39, "grad_norm": 1.157041072845459, "learning_rate": 2e-05, "loss": 0.04512091, "step": 22195 }, { "epoch": 44.392, "grad_norm": 1.2009409666061401, "learning_rate": 2e-05, "loss": 0.04480921, "step": 22196 }, { "epoch": 44.394, "grad_norm": 1.0632206201553345, "learning_rate": 2e-05, "loss": 0.04720378, "step": 22197 }, { "epoch": 44.396, "grad_norm": 2.500654935836792, "learning_rate": 2e-05, "loss": 0.07204752, "step": 22198 }, { "epoch": 44.398, "grad_norm": 2.257668972015381, "learning_rate": 2e-05, "loss": 0.06277246, "step": 22199 }, { "epoch": 44.4, "grad_norm": 1.4456931352615356, "learning_rate": 2e-05, "loss": 0.06364234, "step": 22200 }, { "epoch": 44.402, "grad_norm": 1.1703506708145142, "learning_rate": 2e-05, "loss": 0.04526365, "step": 22201 }, { "epoch": 44.404, "grad_norm": 0.9013957381248474, "learning_rate": 2e-05, "loss": 0.02716382, "step": 22202 }, { "epoch": 44.406, "grad_norm": 1.0054999589920044, "learning_rate": 2e-05, "loss": 0.04023641, "step": 22203 }, { "epoch": 44.408, "grad_norm": 1.7909125089645386, "learning_rate": 2e-05, "loss": 0.05618212, "step": 22204 }, { "epoch": 44.41, "grad_norm": 1.3415671586990356, "learning_rate": 2e-05, "loss": 0.04464972, "step": 22205 }, { "epoch": 44.412, "grad_norm": 1.8720018863677979, "learning_rate": 2e-05, "loss": 0.06667548, "step": 22206 }, { "epoch": 44.414, "grad_norm": 1.0374834537506104, "learning_rate": 2e-05, "loss": 0.03846564, "step": 22207 }, { "epoch": 44.416, "grad_norm": 1.2723255157470703, "learning_rate": 2e-05, "loss": 0.05234695, "step": 22208 }, { "epoch": 44.418, "grad_norm": 1.134573221206665, "learning_rate": 2e-05, "loss": 0.04465498, "step": 22209 }, { "epoch": 44.42, "grad_norm": 1.5191528797149658, "learning_rate": 2e-05, "loss": 0.0526737, "step": 22210 }, { "epoch": 44.422, "grad_norm": 1.506665825843811, "learning_rate": 2e-05, "loss": 0.06554738, "step": 22211 }, { "epoch": 44.424, "grad_norm": 1.1496007442474365, "learning_rate": 2e-05, "loss": 0.05143321, "step": 22212 }, { "epoch": 44.426, "grad_norm": 1.3427084684371948, "learning_rate": 2e-05, "loss": 0.05444918, "step": 22213 }, { "epoch": 44.428, "grad_norm": 1.3482060432434082, "learning_rate": 2e-05, "loss": 0.04946334, "step": 22214 }, { "epoch": 44.43, "grad_norm": 2.4248390197753906, "learning_rate": 2e-05, "loss": 0.06155992, "step": 22215 }, { "epoch": 44.432, "grad_norm": 1.5861310958862305, "learning_rate": 2e-05, "loss": 0.0607713, "step": 22216 }, { "epoch": 44.434, "grad_norm": 1.7854304313659668, "learning_rate": 2e-05, "loss": 0.05506391, "step": 22217 }, { "epoch": 44.436, "grad_norm": 1.5562728643417358, "learning_rate": 2e-05, "loss": 0.05874497, "step": 22218 }, { "epoch": 44.438, "grad_norm": 1.5572162866592407, "learning_rate": 2e-05, "loss": 0.04674251, "step": 22219 }, { "epoch": 44.44, "grad_norm": 2.5241782665252686, "learning_rate": 2e-05, "loss": 0.05468413, "step": 22220 }, { "epoch": 44.442, "grad_norm": 1.4192159175872803, "learning_rate": 2e-05, "loss": 0.05214172, "step": 22221 }, { "epoch": 44.444, "grad_norm": 1.1719669103622437, "learning_rate": 2e-05, "loss": 0.0375664, "step": 22222 }, { "epoch": 44.446, "grad_norm": 1.4639357328414917, "learning_rate": 2e-05, "loss": 0.06297034, "step": 22223 }, { "epoch": 44.448, "grad_norm": 1.3242526054382324, "learning_rate": 2e-05, "loss": 0.05253812, "step": 22224 }, { "epoch": 44.45, "grad_norm": 1.2953046560287476, "learning_rate": 2e-05, "loss": 0.05739931, "step": 22225 }, { "epoch": 44.452, "grad_norm": 2.824225425720215, "learning_rate": 2e-05, "loss": 0.05475694, "step": 22226 }, { "epoch": 44.454, "grad_norm": 2.2453224658966064, "learning_rate": 2e-05, "loss": 0.04598641, "step": 22227 }, { "epoch": 44.456, "grad_norm": 1.2624878883361816, "learning_rate": 2e-05, "loss": 0.05295527, "step": 22228 }, { "epoch": 44.458, "grad_norm": 1.4258654117584229, "learning_rate": 2e-05, "loss": 0.05860892, "step": 22229 }, { "epoch": 44.46, "grad_norm": 1.2828129529953003, "learning_rate": 2e-05, "loss": 0.04799199, "step": 22230 }, { "epoch": 44.462, "grad_norm": 1.1467986106872559, "learning_rate": 2e-05, "loss": 0.05302534, "step": 22231 }, { "epoch": 44.464, "grad_norm": 1.229738473892212, "learning_rate": 2e-05, "loss": 0.04447974, "step": 22232 }, { "epoch": 44.466, "grad_norm": 1.252163052558899, "learning_rate": 2e-05, "loss": 0.04261618, "step": 22233 }, { "epoch": 44.468, "grad_norm": 1.4980531930923462, "learning_rate": 2e-05, "loss": 0.04990222, "step": 22234 }, { "epoch": 44.47, "grad_norm": 0.8647037148475647, "learning_rate": 2e-05, "loss": 0.03477815, "step": 22235 }, { "epoch": 44.472, "grad_norm": 1.2490501403808594, "learning_rate": 2e-05, "loss": 0.05593645, "step": 22236 }, { "epoch": 44.474, "grad_norm": 2.7238969802856445, "learning_rate": 2e-05, "loss": 0.07153691, "step": 22237 }, { "epoch": 44.476, "grad_norm": 1.0918712615966797, "learning_rate": 2e-05, "loss": 0.03975195, "step": 22238 }, { "epoch": 44.478, "grad_norm": 1.2695947885513306, "learning_rate": 2e-05, "loss": 0.0505439, "step": 22239 }, { "epoch": 44.48, "grad_norm": 1.305942177772522, "learning_rate": 2e-05, "loss": 0.05622169, "step": 22240 }, { "epoch": 44.482, "grad_norm": 1.5338377952575684, "learning_rate": 2e-05, "loss": 0.06709421, "step": 22241 }, { "epoch": 44.484, "grad_norm": 1.2748602628707886, "learning_rate": 2e-05, "loss": 0.05205786, "step": 22242 }, { "epoch": 44.486, "grad_norm": 1.1404742002487183, "learning_rate": 2e-05, "loss": 0.04140478, "step": 22243 }, { "epoch": 44.488, "grad_norm": 1.2886909246444702, "learning_rate": 2e-05, "loss": 0.05009973, "step": 22244 }, { "epoch": 44.49, "grad_norm": 1.3291938304901123, "learning_rate": 2e-05, "loss": 0.03881418, "step": 22245 }, { "epoch": 44.492, "grad_norm": 1.743362307548523, "learning_rate": 2e-05, "loss": 0.06562784, "step": 22246 }, { "epoch": 44.494, "grad_norm": 1.1744399070739746, "learning_rate": 2e-05, "loss": 0.03672204, "step": 22247 }, { "epoch": 44.496, "grad_norm": 1.9760868549346924, "learning_rate": 2e-05, "loss": 0.04297481, "step": 22248 }, { "epoch": 44.498, "grad_norm": 1.2893195152282715, "learning_rate": 2e-05, "loss": 0.0419421, "step": 22249 }, { "epoch": 44.5, "grad_norm": 1.3587701320648193, "learning_rate": 2e-05, "loss": 0.03800445, "step": 22250 }, { "epoch": 44.502, "grad_norm": 1.7126094102859497, "learning_rate": 2e-05, "loss": 0.04739122, "step": 22251 }, { "epoch": 44.504, "grad_norm": 1.3062655925750732, "learning_rate": 2e-05, "loss": 0.0504301, "step": 22252 }, { "epoch": 44.506, "grad_norm": 1.2278339862823486, "learning_rate": 2e-05, "loss": 0.05388809, "step": 22253 }, { "epoch": 44.508, "grad_norm": 1.3173941373825073, "learning_rate": 2e-05, "loss": 0.03916169, "step": 22254 }, { "epoch": 44.51, "grad_norm": 1.1290494203567505, "learning_rate": 2e-05, "loss": 0.04224993, "step": 22255 }, { "epoch": 44.512, "grad_norm": 1.289548397064209, "learning_rate": 2e-05, "loss": 0.05193936, "step": 22256 }, { "epoch": 44.514, "grad_norm": 1.2119817733764648, "learning_rate": 2e-05, "loss": 0.04464715, "step": 22257 }, { "epoch": 44.516, "grad_norm": 1.1168314218521118, "learning_rate": 2e-05, "loss": 0.05150532, "step": 22258 }, { "epoch": 44.518, "grad_norm": 1.120448112487793, "learning_rate": 2e-05, "loss": 0.04792247, "step": 22259 }, { "epoch": 44.52, "grad_norm": 1.3556797504425049, "learning_rate": 2e-05, "loss": 0.05941795, "step": 22260 }, { "epoch": 44.522, "grad_norm": 1.177809715270996, "learning_rate": 2e-05, "loss": 0.04000936, "step": 22261 }, { "epoch": 44.524, "grad_norm": 1.567018747329712, "learning_rate": 2e-05, "loss": 0.04125568, "step": 22262 }, { "epoch": 44.526, "grad_norm": 1.170555830001831, "learning_rate": 2e-05, "loss": 0.03684732, "step": 22263 }, { "epoch": 44.528, "grad_norm": 1.2405741214752197, "learning_rate": 2e-05, "loss": 0.04239763, "step": 22264 }, { "epoch": 44.53, "grad_norm": 1.0276159048080444, "learning_rate": 2e-05, "loss": 0.0344376, "step": 22265 }, { "epoch": 44.532, "grad_norm": 1.2948598861694336, "learning_rate": 2e-05, "loss": 0.05724182, "step": 22266 }, { "epoch": 44.534, "grad_norm": 1.2418267726898193, "learning_rate": 2e-05, "loss": 0.05686314, "step": 22267 }, { "epoch": 44.536, "grad_norm": 1.2589337825775146, "learning_rate": 2e-05, "loss": 0.04628392, "step": 22268 }, { "epoch": 44.538, "grad_norm": 1.343007206916809, "learning_rate": 2e-05, "loss": 0.06061236, "step": 22269 }, { "epoch": 44.54, "grad_norm": 1.2776858806610107, "learning_rate": 2e-05, "loss": 0.0425747, "step": 22270 }, { "epoch": 44.542, "grad_norm": 1.1150517463684082, "learning_rate": 2e-05, "loss": 0.03600602, "step": 22271 }, { "epoch": 44.544, "grad_norm": 1.9361085891723633, "learning_rate": 2e-05, "loss": 0.04623732, "step": 22272 }, { "epoch": 44.546, "grad_norm": 1.4865039587020874, "learning_rate": 2e-05, "loss": 0.04633596, "step": 22273 }, { "epoch": 44.548, "grad_norm": 1.1509850025177002, "learning_rate": 2e-05, "loss": 0.03759123, "step": 22274 }, { "epoch": 44.55, "grad_norm": 1.38397216796875, "learning_rate": 2e-05, "loss": 0.05729149, "step": 22275 }, { "epoch": 44.552, "grad_norm": 1.1179465055465698, "learning_rate": 2e-05, "loss": 0.03808307, "step": 22276 }, { "epoch": 44.554, "grad_norm": 2.435750961303711, "learning_rate": 2e-05, "loss": 0.02759157, "step": 22277 }, { "epoch": 44.556, "grad_norm": 1.195075273513794, "learning_rate": 2e-05, "loss": 0.03713936, "step": 22278 }, { "epoch": 44.558, "grad_norm": 1.1682522296905518, "learning_rate": 2e-05, "loss": 0.04102589, "step": 22279 }, { "epoch": 44.56, "grad_norm": 1.4527888298034668, "learning_rate": 2e-05, "loss": 0.06393797, "step": 22280 }, { "epoch": 44.562, "grad_norm": 1.5062603950500488, "learning_rate": 2e-05, "loss": 0.07412426, "step": 22281 }, { "epoch": 44.564, "grad_norm": 1.2519084215164185, "learning_rate": 2e-05, "loss": 0.04865194, "step": 22282 }, { "epoch": 44.566, "grad_norm": 1.912338137626648, "learning_rate": 2e-05, "loss": 0.0646146, "step": 22283 }, { "epoch": 44.568, "grad_norm": 1.1479963064193726, "learning_rate": 2e-05, "loss": 0.04428418, "step": 22284 }, { "epoch": 44.57, "grad_norm": 1.3839733600616455, "learning_rate": 2e-05, "loss": 0.03918359, "step": 22285 }, { "epoch": 44.572, "grad_norm": 1.479257345199585, "learning_rate": 2e-05, "loss": 0.06053314, "step": 22286 }, { "epoch": 44.574, "grad_norm": 2.5502092838287354, "learning_rate": 2e-05, "loss": 0.07071359, "step": 22287 }, { "epoch": 44.576, "grad_norm": 1.1668108701705933, "learning_rate": 2e-05, "loss": 0.02986339, "step": 22288 }, { "epoch": 44.578, "grad_norm": 1.1986876726150513, "learning_rate": 2e-05, "loss": 0.05737267, "step": 22289 }, { "epoch": 44.58, "grad_norm": 1.2797632217407227, "learning_rate": 2e-05, "loss": 0.05133417, "step": 22290 }, { "epoch": 44.582, "grad_norm": 1.1211811304092407, "learning_rate": 2e-05, "loss": 0.04330084, "step": 22291 }, { "epoch": 44.584, "grad_norm": 1.906973123550415, "learning_rate": 2e-05, "loss": 0.05037066, "step": 22292 }, { "epoch": 44.586, "grad_norm": 1.128289818763733, "learning_rate": 2e-05, "loss": 0.03821924, "step": 22293 }, { "epoch": 44.588, "grad_norm": 2.022193670272827, "learning_rate": 2e-05, "loss": 0.06020941, "step": 22294 }, { "epoch": 44.59, "grad_norm": 1.26398503780365, "learning_rate": 2e-05, "loss": 0.04926804, "step": 22295 }, { "epoch": 44.592, "grad_norm": 1.2855454683303833, "learning_rate": 2e-05, "loss": 0.05323242, "step": 22296 }, { "epoch": 44.594, "grad_norm": 1.0625572204589844, "learning_rate": 2e-05, "loss": 0.03464797, "step": 22297 }, { "epoch": 44.596, "grad_norm": 1.904098391532898, "learning_rate": 2e-05, "loss": 0.07098804, "step": 22298 }, { "epoch": 44.598, "grad_norm": 1.219960331916809, "learning_rate": 2e-05, "loss": 0.04161097, "step": 22299 }, { "epoch": 44.6, "grad_norm": 1.6203744411468506, "learning_rate": 2e-05, "loss": 0.0535806, "step": 22300 }, { "epoch": 44.602, "grad_norm": 1.138048529624939, "learning_rate": 2e-05, "loss": 0.04342931, "step": 22301 }, { "epoch": 44.604, "grad_norm": 1.2476096153259277, "learning_rate": 2e-05, "loss": 0.0449223, "step": 22302 }, { "epoch": 44.606, "grad_norm": 2.6473846435546875, "learning_rate": 2e-05, "loss": 0.04433635, "step": 22303 }, { "epoch": 44.608, "grad_norm": 1.2410426139831543, "learning_rate": 2e-05, "loss": 0.05194189, "step": 22304 }, { "epoch": 44.61, "grad_norm": 1.4751123189926147, "learning_rate": 2e-05, "loss": 0.04747447, "step": 22305 }, { "epoch": 44.612, "grad_norm": 1.1794182062149048, "learning_rate": 2e-05, "loss": 0.04860191, "step": 22306 }, { "epoch": 44.614, "grad_norm": 0.9958474636077881, "learning_rate": 2e-05, "loss": 0.03155885, "step": 22307 }, { "epoch": 44.616, "grad_norm": 1.1580699682235718, "learning_rate": 2e-05, "loss": 0.0375239, "step": 22308 }, { "epoch": 44.618, "grad_norm": 1.1620581150054932, "learning_rate": 2e-05, "loss": 0.05345889, "step": 22309 }, { "epoch": 44.62, "grad_norm": 1.1996610164642334, "learning_rate": 2e-05, "loss": 0.05827244, "step": 22310 }, { "epoch": 44.622, "grad_norm": 1.862815260887146, "learning_rate": 2e-05, "loss": 0.04650091, "step": 22311 }, { "epoch": 44.624, "grad_norm": 1.2667579650878906, "learning_rate": 2e-05, "loss": 0.0441907, "step": 22312 }, { "epoch": 44.626, "grad_norm": 1.3493322134017944, "learning_rate": 2e-05, "loss": 0.05513781, "step": 22313 }, { "epoch": 44.628, "grad_norm": 1.2868801355361938, "learning_rate": 2e-05, "loss": 0.05301888, "step": 22314 }, { "epoch": 44.63, "grad_norm": 1.0114258527755737, "learning_rate": 2e-05, "loss": 0.04150274, "step": 22315 }, { "epoch": 44.632, "grad_norm": 1.1847352981567383, "learning_rate": 2e-05, "loss": 0.04998568, "step": 22316 }, { "epoch": 44.634, "grad_norm": 1.4043679237365723, "learning_rate": 2e-05, "loss": 0.04988774, "step": 22317 }, { "epoch": 44.636, "grad_norm": 2.041414260864258, "learning_rate": 2e-05, "loss": 0.05052672, "step": 22318 }, { "epoch": 44.638, "grad_norm": 2.7315549850463867, "learning_rate": 2e-05, "loss": 0.05014642, "step": 22319 }, { "epoch": 44.64, "grad_norm": 1.9132074117660522, "learning_rate": 2e-05, "loss": 0.04718821, "step": 22320 }, { "epoch": 44.642, "grad_norm": 1.4658045768737793, "learning_rate": 2e-05, "loss": 0.04350495, "step": 22321 }, { "epoch": 44.644, "grad_norm": 1.4384865760803223, "learning_rate": 2e-05, "loss": 0.04404975, "step": 22322 }, { "epoch": 44.646, "grad_norm": 1.4497653245925903, "learning_rate": 2e-05, "loss": 0.03452429, "step": 22323 }, { "epoch": 44.648, "grad_norm": 1.3256078958511353, "learning_rate": 2e-05, "loss": 0.040176, "step": 22324 }, { "epoch": 44.65, "grad_norm": 1.1343733072280884, "learning_rate": 2e-05, "loss": 0.03543989, "step": 22325 }, { "epoch": 44.652, "grad_norm": 1.8382833003997803, "learning_rate": 2e-05, "loss": 0.05061209, "step": 22326 }, { "epoch": 44.654, "grad_norm": 1.3723702430725098, "learning_rate": 2e-05, "loss": 0.04715881, "step": 22327 }, { "epoch": 44.656, "grad_norm": 1.6342358589172363, "learning_rate": 2e-05, "loss": 0.05269512, "step": 22328 }, { "epoch": 44.658, "grad_norm": 1.3743045330047607, "learning_rate": 2e-05, "loss": 0.04504273, "step": 22329 }, { "epoch": 44.66, "grad_norm": 3.2658958435058594, "learning_rate": 2e-05, "loss": 0.05143912, "step": 22330 }, { "epoch": 44.662, "grad_norm": 2.2157278060913086, "learning_rate": 2e-05, "loss": 0.05146747, "step": 22331 }, { "epoch": 44.664, "grad_norm": 1.1454540491104126, "learning_rate": 2e-05, "loss": 0.04466172, "step": 22332 }, { "epoch": 44.666, "grad_norm": 1.2237814664840698, "learning_rate": 2e-05, "loss": 0.05431961, "step": 22333 }, { "epoch": 44.668, "grad_norm": 1.3983421325683594, "learning_rate": 2e-05, "loss": 0.03210323, "step": 22334 }, { "epoch": 44.67, "grad_norm": 1.5525994300842285, "learning_rate": 2e-05, "loss": 0.06114915, "step": 22335 }, { "epoch": 44.672, "grad_norm": 1.054101586341858, "learning_rate": 2e-05, "loss": 0.04592917, "step": 22336 }, { "epoch": 44.674, "grad_norm": 1.3390069007873535, "learning_rate": 2e-05, "loss": 0.04546981, "step": 22337 }, { "epoch": 44.676, "grad_norm": 1.3477840423583984, "learning_rate": 2e-05, "loss": 0.05513988, "step": 22338 }, { "epoch": 44.678, "grad_norm": 1.9911638498306274, "learning_rate": 2e-05, "loss": 0.04411037, "step": 22339 }, { "epoch": 44.68, "grad_norm": 1.5710328817367554, "learning_rate": 2e-05, "loss": 0.0609922, "step": 22340 }, { "epoch": 44.682, "grad_norm": 1.218172550201416, "learning_rate": 2e-05, "loss": 0.05370003, "step": 22341 }, { "epoch": 44.684, "grad_norm": 1.1348539590835571, "learning_rate": 2e-05, "loss": 0.0526722, "step": 22342 }, { "epoch": 44.686, "grad_norm": 1.610276460647583, "learning_rate": 2e-05, "loss": 0.070089, "step": 22343 }, { "epoch": 44.688, "grad_norm": 1.2752341032028198, "learning_rate": 2e-05, "loss": 0.04979321, "step": 22344 }, { "epoch": 44.69, "grad_norm": 1.252686619758606, "learning_rate": 2e-05, "loss": 0.04036684, "step": 22345 }, { "epoch": 44.692, "grad_norm": 1.023863673210144, "learning_rate": 2e-05, "loss": 0.02940369, "step": 22346 }, { "epoch": 44.694, "grad_norm": 0.9316999316215515, "learning_rate": 2e-05, "loss": 0.03571773, "step": 22347 }, { "epoch": 44.696, "grad_norm": 1.4448881149291992, "learning_rate": 2e-05, "loss": 0.06954427, "step": 22348 }, { "epoch": 44.698, "grad_norm": 1.6992919445037842, "learning_rate": 2e-05, "loss": 0.04004553, "step": 22349 }, { "epoch": 44.7, "grad_norm": 1.3580870628356934, "learning_rate": 2e-05, "loss": 0.04999983, "step": 22350 }, { "epoch": 44.702, "grad_norm": 1.1199650764465332, "learning_rate": 2e-05, "loss": 0.04584752, "step": 22351 }, { "epoch": 44.704, "grad_norm": 1.46734619140625, "learning_rate": 2e-05, "loss": 0.04816905, "step": 22352 }, { "epoch": 44.706, "grad_norm": 1.2117671966552734, "learning_rate": 2e-05, "loss": 0.03972312, "step": 22353 }, { "epoch": 44.708, "grad_norm": 1.2888193130493164, "learning_rate": 2e-05, "loss": 0.03945049, "step": 22354 }, { "epoch": 44.71, "grad_norm": 1.204562783241272, "learning_rate": 2e-05, "loss": 0.04760649, "step": 22355 }, { "epoch": 44.712, "grad_norm": 1.2271558046340942, "learning_rate": 2e-05, "loss": 0.04851292, "step": 22356 }, { "epoch": 44.714, "grad_norm": 1.2417253255844116, "learning_rate": 2e-05, "loss": 0.05184454, "step": 22357 }, { "epoch": 44.716, "grad_norm": 1.2915658950805664, "learning_rate": 2e-05, "loss": 0.0653154, "step": 22358 }, { "epoch": 44.718, "grad_norm": 1.126898169517517, "learning_rate": 2e-05, "loss": 0.04059651, "step": 22359 }, { "epoch": 44.72, "grad_norm": 1.2646050453186035, "learning_rate": 2e-05, "loss": 0.0460304, "step": 22360 }, { "epoch": 44.722, "grad_norm": 1.2595983743667603, "learning_rate": 2e-05, "loss": 0.0446999, "step": 22361 }, { "epoch": 44.724, "grad_norm": 1.0106483697891235, "learning_rate": 2e-05, "loss": 0.03425622, "step": 22362 }, { "epoch": 44.726, "grad_norm": 1.0983266830444336, "learning_rate": 2e-05, "loss": 0.04440197, "step": 22363 }, { "epoch": 44.728, "grad_norm": 1.0528661012649536, "learning_rate": 2e-05, "loss": 0.03264795, "step": 22364 }, { "epoch": 44.73, "grad_norm": 1.485556721687317, "learning_rate": 2e-05, "loss": 0.04851057, "step": 22365 }, { "epoch": 44.732, "grad_norm": 1.0484004020690918, "learning_rate": 2e-05, "loss": 0.03785881, "step": 22366 }, { "epoch": 44.734, "grad_norm": 1.0544923543930054, "learning_rate": 2e-05, "loss": 0.04710227, "step": 22367 }, { "epoch": 44.736, "grad_norm": 1.6099900007247925, "learning_rate": 2e-05, "loss": 0.0503059, "step": 22368 }, { "epoch": 44.738, "grad_norm": 1.0800563097000122, "learning_rate": 2e-05, "loss": 0.03581601, "step": 22369 }, { "epoch": 44.74, "grad_norm": 1.2417271137237549, "learning_rate": 2e-05, "loss": 0.05445302, "step": 22370 }, { "epoch": 44.742, "grad_norm": 1.3776066303253174, "learning_rate": 2e-05, "loss": 0.05917332, "step": 22371 }, { "epoch": 44.744, "grad_norm": 6.051969051361084, "learning_rate": 2e-05, "loss": 0.03186266, "step": 22372 }, { "epoch": 44.746, "grad_norm": 1.193009614944458, "learning_rate": 2e-05, "loss": 0.04203957, "step": 22373 }, { "epoch": 44.748, "grad_norm": 1.1934806108474731, "learning_rate": 2e-05, "loss": 0.05293836, "step": 22374 }, { "epoch": 44.75, "grad_norm": 1.2843340635299683, "learning_rate": 2e-05, "loss": 0.05309843, "step": 22375 }, { "epoch": 44.752, "grad_norm": 1.2187143564224243, "learning_rate": 2e-05, "loss": 0.05113508, "step": 22376 }, { "epoch": 44.754, "grad_norm": 1.5703728199005127, "learning_rate": 2e-05, "loss": 0.06165824, "step": 22377 }, { "epoch": 44.756, "grad_norm": 1.9462801218032837, "learning_rate": 2e-05, "loss": 0.06039472, "step": 22378 }, { "epoch": 44.758, "grad_norm": 1.2913825511932373, "learning_rate": 2e-05, "loss": 0.04901668, "step": 22379 }, { "epoch": 44.76, "grad_norm": 1.484749436378479, "learning_rate": 2e-05, "loss": 0.05032424, "step": 22380 }, { "epoch": 44.762, "grad_norm": 1.2912980318069458, "learning_rate": 2e-05, "loss": 0.05285494, "step": 22381 }, { "epoch": 44.764, "grad_norm": 1.471806526184082, "learning_rate": 2e-05, "loss": 0.05935736, "step": 22382 }, { "epoch": 44.766, "grad_norm": 1.4022643566131592, "learning_rate": 2e-05, "loss": 0.06408676, "step": 22383 }, { "epoch": 44.768, "grad_norm": 1.3989965915679932, "learning_rate": 2e-05, "loss": 0.0560586, "step": 22384 }, { "epoch": 44.77, "grad_norm": 1.7269079685211182, "learning_rate": 2e-05, "loss": 0.0490585, "step": 22385 }, { "epoch": 44.772, "grad_norm": 2.0063679218292236, "learning_rate": 2e-05, "loss": 0.06107655, "step": 22386 }, { "epoch": 44.774, "grad_norm": 1.0884474515914917, "learning_rate": 2e-05, "loss": 0.04211147, "step": 22387 }, { "epoch": 44.776, "grad_norm": 1.1166996955871582, "learning_rate": 2e-05, "loss": 0.04605059, "step": 22388 }, { "epoch": 44.778, "grad_norm": 1.3494291305541992, "learning_rate": 2e-05, "loss": 0.05825819, "step": 22389 }, { "epoch": 44.78, "grad_norm": 1.4325668811798096, "learning_rate": 2e-05, "loss": 0.0605996, "step": 22390 }, { "epoch": 44.782, "grad_norm": 1.1748969554901123, "learning_rate": 2e-05, "loss": 0.03148333, "step": 22391 }, { "epoch": 44.784, "grad_norm": 1.8170502185821533, "learning_rate": 2e-05, "loss": 0.06581346, "step": 22392 }, { "epoch": 44.786, "grad_norm": 1.3329365253448486, "learning_rate": 2e-05, "loss": 0.05404956, "step": 22393 }, { "epoch": 44.788, "grad_norm": 1.3301454782485962, "learning_rate": 2e-05, "loss": 0.04755008, "step": 22394 }, { "epoch": 44.79, "grad_norm": 1.2659374475479126, "learning_rate": 2e-05, "loss": 0.04017854, "step": 22395 }, { "epoch": 44.792, "grad_norm": 1.4887542724609375, "learning_rate": 2e-05, "loss": 0.04862245, "step": 22396 }, { "epoch": 44.794, "grad_norm": 1.8280566930770874, "learning_rate": 2e-05, "loss": 0.05217675, "step": 22397 }, { "epoch": 44.796, "grad_norm": 1.307554841041565, "learning_rate": 2e-05, "loss": 0.051239, "step": 22398 }, { "epoch": 44.798, "grad_norm": 1.391467571258545, "learning_rate": 2e-05, "loss": 0.041818, "step": 22399 }, { "epoch": 44.8, "grad_norm": 1.2929948568344116, "learning_rate": 2e-05, "loss": 0.03885982, "step": 22400 }, { "epoch": 44.802, "grad_norm": 0.8741863965988159, "learning_rate": 2e-05, "loss": 0.02871921, "step": 22401 }, { "epoch": 44.804, "grad_norm": 1.1634330749511719, "learning_rate": 2e-05, "loss": 0.05327924, "step": 22402 }, { "epoch": 44.806, "grad_norm": 1.2257028818130493, "learning_rate": 2e-05, "loss": 0.04897387, "step": 22403 }, { "epoch": 44.808, "grad_norm": 1.1557226181030273, "learning_rate": 2e-05, "loss": 0.04159442, "step": 22404 }, { "epoch": 44.81, "grad_norm": 2.137556791305542, "learning_rate": 2e-05, "loss": 0.05608717, "step": 22405 }, { "epoch": 44.812, "grad_norm": 1.224327802658081, "learning_rate": 2e-05, "loss": 0.04210414, "step": 22406 }, { "epoch": 44.814, "grad_norm": 1.342980146408081, "learning_rate": 2e-05, "loss": 0.06127755, "step": 22407 }, { "epoch": 44.816, "grad_norm": 1.5353951454162598, "learning_rate": 2e-05, "loss": 0.06302527, "step": 22408 }, { "epoch": 44.818, "grad_norm": 1.3694101572036743, "learning_rate": 2e-05, "loss": 0.04164361, "step": 22409 }, { "epoch": 44.82, "grad_norm": 1.6121137142181396, "learning_rate": 2e-05, "loss": 0.05497941, "step": 22410 }, { "epoch": 44.822, "grad_norm": 1.375234842300415, "learning_rate": 2e-05, "loss": 0.05874093, "step": 22411 }, { "epoch": 44.824, "grad_norm": 1.243156909942627, "learning_rate": 2e-05, "loss": 0.05009591, "step": 22412 }, { "epoch": 44.826, "grad_norm": 1.365031123161316, "learning_rate": 2e-05, "loss": 0.04909578, "step": 22413 }, { "epoch": 44.828, "grad_norm": 0.9832285046577454, "learning_rate": 2e-05, "loss": 0.0287962, "step": 22414 }, { "epoch": 44.83, "grad_norm": 1.187656044960022, "learning_rate": 2e-05, "loss": 0.04923886, "step": 22415 }, { "epoch": 44.832, "grad_norm": 1.2411973476409912, "learning_rate": 2e-05, "loss": 0.05921061, "step": 22416 }, { "epoch": 44.834, "grad_norm": 1.7300316095352173, "learning_rate": 2e-05, "loss": 0.0390442, "step": 22417 }, { "epoch": 44.836, "grad_norm": 1.0223551988601685, "learning_rate": 2e-05, "loss": 0.03893992, "step": 22418 }, { "epoch": 44.838, "grad_norm": 1.9662129878997803, "learning_rate": 2e-05, "loss": 0.03231378, "step": 22419 }, { "epoch": 44.84, "grad_norm": 1.0947517156600952, "learning_rate": 2e-05, "loss": 0.03922854, "step": 22420 }, { "epoch": 44.842, "grad_norm": 1.6704392433166504, "learning_rate": 2e-05, "loss": 0.04813308, "step": 22421 }, { "epoch": 44.844, "grad_norm": 1.2903733253479004, "learning_rate": 2e-05, "loss": 0.04857474, "step": 22422 }, { "epoch": 44.846, "grad_norm": 1.159749984741211, "learning_rate": 2e-05, "loss": 0.04078946, "step": 22423 }, { "epoch": 44.848, "grad_norm": 1.364219307899475, "learning_rate": 2e-05, "loss": 0.04483482, "step": 22424 }, { "epoch": 44.85, "grad_norm": 1.2046773433685303, "learning_rate": 2e-05, "loss": 0.05179583, "step": 22425 }, { "epoch": 44.852, "grad_norm": 1.419978380203247, "learning_rate": 2e-05, "loss": 0.0563489, "step": 22426 }, { "epoch": 44.854, "grad_norm": 1.4151443243026733, "learning_rate": 2e-05, "loss": 0.05637079, "step": 22427 }, { "epoch": 44.856, "grad_norm": 1.2609163522720337, "learning_rate": 2e-05, "loss": 0.05354507, "step": 22428 }, { "epoch": 44.858, "grad_norm": 1.2031396627426147, "learning_rate": 2e-05, "loss": 0.04809187, "step": 22429 }, { "epoch": 44.86, "grad_norm": 1.2825238704681396, "learning_rate": 2e-05, "loss": 0.05121071, "step": 22430 }, { "epoch": 44.862, "grad_norm": 1.0715117454528809, "learning_rate": 2e-05, "loss": 0.03546477, "step": 22431 }, { "epoch": 44.864, "grad_norm": 1.407547116279602, "learning_rate": 2e-05, "loss": 0.05819602, "step": 22432 }, { "epoch": 44.866, "grad_norm": 1.705998182296753, "learning_rate": 2e-05, "loss": 0.05690518, "step": 22433 }, { "epoch": 44.868, "grad_norm": 1.3436830043792725, "learning_rate": 2e-05, "loss": 0.04754122, "step": 22434 }, { "epoch": 44.87, "grad_norm": 1.3858736753463745, "learning_rate": 2e-05, "loss": 0.06503775, "step": 22435 }, { "epoch": 44.872, "grad_norm": 1.100571632385254, "learning_rate": 2e-05, "loss": 0.04838911, "step": 22436 }, { "epoch": 44.874, "grad_norm": 1.277199387550354, "learning_rate": 2e-05, "loss": 0.0342557, "step": 22437 }, { "epoch": 44.876, "grad_norm": 1.04167640209198, "learning_rate": 2e-05, "loss": 0.03733697, "step": 22438 }, { "epoch": 44.878, "grad_norm": 1.1401691436767578, "learning_rate": 2e-05, "loss": 0.04081557, "step": 22439 }, { "epoch": 44.88, "grad_norm": 1.1583184003829956, "learning_rate": 2e-05, "loss": 0.04236028, "step": 22440 }, { "epoch": 44.882, "grad_norm": 1.224388837814331, "learning_rate": 2e-05, "loss": 0.04657396, "step": 22441 }, { "epoch": 44.884, "grad_norm": 1.3904510736465454, "learning_rate": 2e-05, "loss": 0.05583341, "step": 22442 }, { "epoch": 44.886, "grad_norm": 1.3104639053344727, "learning_rate": 2e-05, "loss": 0.04422941, "step": 22443 }, { "epoch": 44.888, "grad_norm": 1.4339663982391357, "learning_rate": 2e-05, "loss": 0.05825556, "step": 22444 }, { "epoch": 44.89, "grad_norm": 2.071310520172119, "learning_rate": 2e-05, "loss": 0.04446717, "step": 22445 }, { "epoch": 44.892, "grad_norm": 1.579842448234558, "learning_rate": 2e-05, "loss": 0.06342889, "step": 22446 }, { "epoch": 44.894, "grad_norm": 1.1288223266601562, "learning_rate": 2e-05, "loss": 0.04180889, "step": 22447 }, { "epoch": 44.896, "grad_norm": 0.9220855832099915, "learning_rate": 2e-05, "loss": 0.03075635, "step": 22448 }, { "epoch": 44.898, "grad_norm": 1.1610661745071411, "learning_rate": 2e-05, "loss": 0.04427385, "step": 22449 }, { "epoch": 44.9, "grad_norm": 1.0175820589065552, "learning_rate": 2e-05, "loss": 0.03336541, "step": 22450 }, { "epoch": 44.902, "grad_norm": 1.2916738986968994, "learning_rate": 2e-05, "loss": 0.04845192, "step": 22451 }, { "epoch": 44.904, "grad_norm": 1.0840328931808472, "learning_rate": 2e-05, "loss": 0.03961993, "step": 22452 }, { "epoch": 44.906, "grad_norm": 1.168332815170288, "learning_rate": 2e-05, "loss": 0.04420353, "step": 22453 }, { "epoch": 44.908, "grad_norm": 1.2330690622329712, "learning_rate": 2e-05, "loss": 0.046922, "step": 22454 }, { "epoch": 44.91, "grad_norm": 1.777421236038208, "learning_rate": 2e-05, "loss": 0.06229702, "step": 22455 }, { "epoch": 44.912, "grad_norm": 1.3001389503479004, "learning_rate": 2e-05, "loss": 0.05773286, "step": 22456 }, { "epoch": 44.914, "grad_norm": 1.3984302282333374, "learning_rate": 2e-05, "loss": 0.06564485, "step": 22457 }, { "epoch": 44.916, "grad_norm": 1.200603723526001, "learning_rate": 2e-05, "loss": 0.04599864, "step": 22458 }, { "epoch": 44.918, "grad_norm": 1.0556275844573975, "learning_rate": 2e-05, "loss": 0.03204244, "step": 22459 }, { "epoch": 44.92, "grad_norm": 1.3520619869232178, "learning_rate": 2e-05, "loss": 0.05546412, "step": 22460 }, { "epoch": 44.922, "grad_norm": 1.3469730615615845, "learning_rate": 2e-05, "loss": 0.04959213, "step": 22461 }, { "epoch": 44.924, "grad_norm": 1.6626015901565552, "learning_rate": 2e-05, "loss": 0.06908607, "step": 22462 }, { "epoch": 44.926, "grad_norm": 1.079907774925232, "learning_rate": 2e-05, "loss": 0.03825868, "step": 22463 }, { "epoch": 44.928, "grad_norm": 1.4360984563827515, "learning_rate": 2e-05, "loss": 0.06819391, "step": 22464 }, { "epoch": 44.93, "grad_norm": 1.118546485900879, "learning_rate": 2e-05, "loss": 0.03266671, "step": 22465 }, { "epoch": 44.932, "grad_norm": 1.7453476190567017, "learning_rate": 2e-05, "loss": 0.06611872, "step": 22466 }, { "epoch": 44.934, "grad_norm": 1.403899073600769, "learning_rate": 2e-05, "loss": 0.05418711, "step": 22467 }, { "epoch": 44.936, "grad_norm": 4.624436378479004, "learning_rate": 2e-05, "loss": 0.07199771, "step": 22468 }, { "epoch": 44.938, "grad_norm": 1.1568007469177246, "learning_rate": 2e-05, "loss": 0.05059115, "step": 22469 }, { "epoch": 44.94, "grad_norm": 1.137229561805725, "learning_rate": 2e-05, "loss": 0.03843639, "step": 22470 }, { "epoch": 44.942, "grad_norm": 1.4798380136489868, "learning_rate": 2e-05, "loss": 0.04668789, "step": 22471 }, { "epoch": 44.944, "grad_norm": 1.986149549484253, "learning_rate": 2e-05, "loss": 0.03478555, "step": 22472 }, { "epoch": 44.946, "grad_norm": 1.0867761373519897, "learning_rate": 2e-05, "loss": 0.03376482, "step": 22473 }, { "epoch": 44.948, "grad_norm": 1.3647146224975586, "learning_rate": 2e-05, "loss": 0.06791535, "step": 22474 }, { "epoch": 44.95, "grad_norm": 1.3892532587051392, "learning_rate": 2e-05, "loss": 0.05850136, "step": 22475 }, { "epoch": 44.952, "grad_norm": 1.2366359233856201, "learning_rate": 2e-05, "loss": 0.04139677, "step": 22476 }, { "epoch": 44.954, "grad_norm": 2.012018918991089, "learning_rate": 2e-05, "loss": 0.05484272, "step": 22477 }, { "epoch": 44.956, "grad_norm": 1.2772232294082642, "learning_rate": 2e-05, "loss": 0.04107056, "step": 22478 }, { "epoch": 44.958, "grad_norm": 1.0646635293960571, "learning_rate": 2e-05, "loss": 0.04107633, "step": 22479 }, { "epoch": 44.96, "grad_norm": 3.412080764770508, "learning_rate": 2e-05, "loss": 0.06652329, "step": 22480 }, { "epoch": 44.962, "grad_norm": 1.39540696144104, "learning_rate": 2e-05, "loss": 0.04058058, "step": 22481 }, { "epoch": 44.964, "grad_norm": 1.372216820716858, "learning_rate": 2e-05, "loss": 0.05258819, "step": 22482 }, { "epoch": 44.966, "grad_norm": 1.1605424880981445, "learning_rate": 2e-05, "loss": 0.05386285, "step": 22483 }, { "epoch": 44.968, "grad_norm": 1.0500538349151611, "learning_rate": 2e-05, "loss": 0.03223278, "step": 22484 }, { "epoch": 44.97, "grad_norm": 1.171493411064148, "learning_rate": 2e-05, "loss": 0.0475014, "step": 22485 }, { "epoch": 44.972, "grad_norm": 1.1889984607696533, "learning_rate": 2e-05, "loss": 0.03335759, "step": 22486 }, { "epoch": 44.974, "grad_norm": 1.488581895828247, "learning_rate": 2e-05, "loss": 0.04842, "step": 22487 }, { "epoch": 44.976, "grad_norm": 1.2926756143569946, "learning_rate": 2e-05, "loss": 0.04585892, "step": 22488 }, { "epoch": 44.978, "grad_norm": 1.0679482221603394, "learning_rate": 2e-05, "loss": 0.03685507, "step": 22489 }, { "epoch": 44.98, "grad_norm": 1.2621574401855469, "learning_rate": 2e-05, "loss": 0.05050556, "step": 22490 }, { "epoch": 44.982, "grad_norm": 1.104034185409546, "learning_rate": 2e-05, "loss": 0.03997359, "step": 22491 }, { "epoch": 44.984, "grad_norm": 1.1348416805267334, "learning_rate": 2e-05, "loss": 0.03981521, "step": 22492 }, { "epoch": 44.986, "grad_norm": 1.5478214025497437, "learning_rate": 2e-05, "loss": 0.06456232, "step": 22493 }, { "epoch": 44.988, "grad_norm": 1.2740987539291382, "learning_rate": 2e-05, "loss": 0.04914555, "step": 22494 }, { "epoch": 44.99, "grad_norm": 1.046753168106079, "learning_rate": 2e-05, "loss": 0.04628645, "step": 22495 }, { "epoch": 44.992, "grad_norm": 2.204502820968628, "learning_rate": 2e-05, "loss": 0.04956776, "step": 22496 }, { "epoch": 44.994, "grad_norm": 1.2961649894714355, "learning_rate": 2e-05, "loss": 0.04877903, "step": 22497 }, { "epoch": 44.996, "grad_norm": 1.6191555261611938, "learning_rate": 2e-05, "loss": 0.05243221, "step": 22498 }, { "epoch": 44.998, "grad_norm": 1.054588794708252, "learning_rate": 2e-05, "loss": 0.04243375, "step": 22499 }, { "epoch": 45.0, "grad_norm": 1.2328218221664429, "learning_rate": 2e-05, "loss": 0.04469647, "step": 22500 }, { "epoch": 45.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9860279441117764, "Equal_1": 1.0, "Equal_2": 0.9860279441117764, "Equal_3": 0.9960079840319361, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.996, "Perpendicular_1": 0.998, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8877755511022044, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 1.0, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 321.1082, "eval_samples_per_second": 32.699, "eval_steps_per_second": 0.654, "step": 22500 }, { "epoch": 45.002, "grad_norm": 1.2806766033172607, "learning_rate": 2e-05, "loss": 0.03703031, "step": 22501 }, { "epoch": 45.004, "grad_norm": 1.2303308248519897, "learning_rate": 2e-05, "loss": 0.0578556, "step": 22502 }, { "epoch": 45.006, "grad_norm": 1.2770564556121826, "learning_rate": 2e-05, "loss": 0.0442674, "step": 22503 }, { "epoch": 45.008, "grad_norm": 1.6194312572479248, "learning_rate": 2e-05, "loss": 0.06683954, "step": 22504 }, { "epoch": 45.01, "grad_norm": 1.137593388557434, "learning_rate": 2e-05, "loss": 0.03966604, "step": 22505 }, { "epoch": 45.012, "grad_norm": 3.7040164470672607, "learning_rate": 2e-05, "loss": 0.05335582, "step": 22506 }, { "epoch": 45.014, "grad_norm": 1.019702672958374, "learning_rate": 2e-05, "loss": 0.039069, "step": 22507 }, { "epoch": 45.016, "grad_norm": 1.11601984500885, "learning_rate": 2e-05, "loss": 0.03871304, "step": 22508 }, { "epoch": 45.018, "grad_norm": 1.1694669723510742, "learning_rate": 2e-05, "loss": 0.04809815, "step": 22509 }, { "epoch": 45.02, "grad_norm": 1.0752638578414917, "learning_rate": 2e-05, "loss": 0.03829714, "step": 22510 }, { "epoch": 45.022, "grad_norm": 1.298243522644043, "learning_rate": 2e-05, "loss": 0.03895707, "step": 22511 }, { "epoch": 45.024, "grad_norm": 0.9618425965309143, "learning_rate": 2e-05, "loss": 0.03141353, "step": 22512 }, { "epoch": 45.026, "grad_norm": 1.0746543407440186, "learning_rate": 2e-05, "loss": 0.03593572, "step": 22513 }, { "epoch": 45.028, "grad_norm": 1.0014342069625854, "learning_rate": 2e-05, "loss": 0.0362047, "step": 22514 }, { "epoch": 45.03, "grad_norm": 1.230353593826294, "learning_rate": 2e-05, "loss": 0.04621077, "step": 22515 }, { "epoch": 45.032, "grad_norm": 1.305424690246582, "learning_rate": 2e-05, "loss": 0.06496035, "step": 22516 }, { "epoch": 45.034, "grad_norm": 0.9447730183601379, "learning_rate": 2e-05, "loss": 0.02817334, "step": 22517 }, { "epoch": 45.036, "grad_norm": 1.625741958618164, "learning_rate": 2e-05, "loss": 0.06509583, "step": 22518 }, { "epoch": 45.038, "grad_norm": 1.1020500659942627, "learning_rate": 2e-05, "loss": 0.04046689, "step": 22519 }, { "epoch": 45.04, "grad_norm": 1.0736149549484253, "learning_rate": 2e-05, "loss": 0.04837882, "step": 22520 }, { "epoch": 45.042, "grad_norm": 1.7894287109375, "learning_rate": 2e-05, "loss": 0.04507787, "step": 22521 }, { "epoch": 45.044, "grad_norm": 0.9819255471229553, "learning_rate": 2e-05, "loss": 0.02709861, "step": 22522 }, { "epoch": 45.046, "grad_norm": 1.2546124458312988, "learning_rate": 2e-05, "loss": 0.05152019, "step": 22523 }, { "epoch": 45.048, "grad_norm": 1.2507061958312988, "learning_rate": 2e-05, "loss": 0.0508112, "step": 22524 }, { "epoch": 45.05, "grad_norm": 1.0991570949554443, "learning_rate": 2e-05, "loss": 0.04952764, "step": 22525 }, { "epoch": 45.052, "grad_norm": 0.895489513874054, "learning_rate": 2e-05, "loss": 0.02615194, "step": 22526 }, { "epoch": 45.054, "grad_norm": 1.2041149139404297, "learning_rate": 2e-05, "loss": 0.05550298, "step": 22527 }, { "epoch": 45.056, "grad_norm": 1.2021666765213013, "learning_rate": 2e-05, "loss": 0.0547464, "step": 22528 }, { "epoch": 45.058, "grad_norm": 1.1511567831039429, "learning_rate": 2e-05, "loss": 0.04542843, "step": 22529 }, { "epoch": 45.06, "grad_norm": 1.7193586826324463, "learning_rate": 2e-05, "loss": 0.06118374, "step": 22530 }, { "epoch": 45.062, "grad_norm": 1.1019095182418823, "learning_rate": 2e-05, "loss": 0.04117382, "step": 22531 }, { "epoch": 45.064, "grad_norm": 0.9580153822898865, "learning_rate": 2e-05, "loss": 0.04075727, "step": 22532 }, { "epoch": 45.066, "grad_norm": 1.383394718170166, "learning_rate": 2e-05, "loss": 0.05107236, "step": 22533 }, { "epoch": 45.068, "grad_norm": 1.2198097705841064, "learning_rate": 2e-05, "loss": 0.05111481, "step": 22534 }, { "epoch": 45.07, "grad_norm": 1.1172356605529785, "learning_rate": 2e-05, "loss": 0.04869189, "step": 22535 }, { "epoch": 45.072, "grad_norm": 1.1854000091552734, "learning_rate": 2e-05, "loss": 0.05265629, "step": 22536 }, { "epoch": 45.074, "grad_norm": 1.2538152933120728, "learning_rate": 2e-05, "loss": 0.05185087, "step": 22537 }, { "epoch": 45.076, "grad_norm": 1.1875842809677124, "learning_rate": 2e-05, "loss": 0.05062048, "step": 22538 }, { "epoch": 45.078, "grad_norm": 1.1964948177337646, "learning_rate": 2e-05, "loss": 0.04546596, "step": 22539 }, { "epoch": 45.08, "grad_norm": 0.9874855279922485, "learning_rate": 2e-05, "loss": 0.03447803, "step": 22540 }, { "epoch": 45.082, "grad_norm": 1.2589225769042969, "learning_rate": 2e-05, "loss": 0.052671, "step": 22541 }, { "epoch": 45.084, "grad_norm": 1.2300832271575928, "learning_rate": 2e-05, "loss": 0.05471198, "step": 22542 }, { "epoch": 45.086, "grad_norm": 1.014027714729309, "learning_rate": 2e-05, "loss": 0.03973835, "step": 22543 }, { "epoch": 45.088, "grad_norm": 1.0800725221633911, "learning_rate": 2e-05, "loss": 0.0450437, "step": 22544 }, { "epoch": 45.09, "grad_norm": 1.2067081928253174, "learning_rate": 2e-05, "loss": 0.04083369, "step": 22545 }, { "epoch": 45.092, "grad_norm": 1.1328651905059814, "learning_rate": 2e-05, "loss": 0.05992271, "step": 22546 }, { "epoch": 45.094, "grad_norm": 1.180773138999939, "learning_rate": 2e-05, "loss": 0.04769462, "step": 22547 }, { "epoch": 45.096, "grad_norm": 1.1579021215438843, "learning_rate": 2e-05, "loss": 0.05516424, "step": 22548 }, { "epoch": 45.098, "grad_norm": 1.7966943979263306, "learning_rate": 2e-05, "loss": 0.0489285, "step": 22549 }, { "epoch": 45.1, "grad_norm": 1.2300606966018677, "learning_rate": 2e-05, "loss": 0.05087769, "step": 22550 }, { "epoch": 45.102, "grad_norm": 1.3096156120300293, "learning_rate": 2e-05, "loss": 0.04192181, "step": 22551 }, { "epoch": 45.104, "grad_norm": 1.259109377861023, "learning_rate": 2e-05, "loss": 0.05914876, "step": 22552 }, { "epoch": 45.106, "grad_norm": 0.9652984142303467, "learning_rate": 2e-05, "loss": 0.03940653, "step": 22553 }, { "epoch": 45.108, "grad_norm": 1.1099121570587158, "learning_rate": 2e-05, "loss": 0.05075653, "step": 22554 }, { "epoch": 45.11, "grad_norm": 1.2321460247039795, "learning_rate": 2e-05, "loss": 0.05328397, "step": 22555 }, { "epoch": 45.112, "grad_norm": 1.3149770498275757, "learning_rate": 2e-05, "loss": 0.0476843, "step": 22556 }, { "epoch": 45.114, "grad_norm": 0.9816151261329651, "learning_rate": 2e-05, "loss": 0.02892705, "step": 22557 }, { "epoch": 45.116, "grad_norm": 1.0387154817581177, "learning_rate": 2e-05, "loss": 0.029668, "step": 22558 }, { "epoch": 45.118, "grad_norm": 1.4174152612686157, "learning_rate": 2e-05, "loss": 0.03911356, "step": 22559 }, { "epoch": 45.12, "grad_norm": 1.172924518585205, "learning_rate": 2e-05, "loss": 0.04783778, "step": 22560 }, { "epoch": 45.122, "grad_norm": 1.978395700454712, "learning_rate": 2e-05, "loss": 0.06169991, "step": 22561 }, { "epoch": 45.124, "grad_norm": 1.215388298034668, "learning_rate": 2e-05, "loss": 0.05454685, "step": 22562 }, { "epoch": 45.126, "grad_norm": 0.908128559589386, "learning_rate": 2e-05, "loss": 0.028432, "step": 22563 }, { "epoch": 45.128, "grad_norm": 1.1321818828582764, "learning_rate": 2e-05, "loss": 0.0391438, "step": 22564 }, { "epoch": 45.13, "grad_norm": 1.0603052377700806, "learning_rate": 2e-05, "loss": 0.04576109, "step": 22565 }, { "epoch": 45.132, "grad_norm": 1.1814618110656738, "learning_rate": 2e-05, "loss": 0.04789328, "step": 22566 }, { "epoch": 45.134, "grad_norm": 1.4475892782211304, "learning_rate": 2e-05, "loss": 0.03765525, "step": 22567 }, { "epoch": 45.136, "grad_norm": 1.2016019821166992, "learning_rate": 2e-05, "loss": 0.0461702, "step": 22568 }, { "epoch": 45.138, "grad_norm": 1.2433879375457764, "learning_rate": 2e-05, "loss": 0.04532842, "step": 22569 }, { "epoch": 45.14, "grad_norm": 1.0823651552200317, "learning_rate": 2e-05, "loss": 0.03890754, "step": 22570 }, { "epoch": 45.142, "grad_norm": 1.3277068138122559, "learning_rate": 2e-05, "loss": 0.05912031, "step": 22571 }, { "epoch": 45.144, "grad_norm": 1.2994036674499512, "learning_rate": 2e-05, "loss": 0.06122985, "step": 22572 }, { "epoch": 45.146, "grad_norm": 6.131012916564941, "learning_rate": 2e-05, "loss": 0.06587172, "step": 22573 }, { "epoch": 45.148, "grad_norm": 1.9716764688491821, "learning_rate": 2e-05, "loss": 0.06575805, "step": 22574 }, { "epoch": 45.15, "grad_norm": 1.6159476041793823, "learning_rate": 2e-05, "loss": 0.04265655, "step": 22575 }, { "epoch": 45.152, "grad_norm": 1.4026684761047363, "learning_rate": 2e-05, "loss": 0.04744727, "step": 22576 }, { "epoch": 45.154, "grad_norm": 1.106533169746399, "learning_rate": 2e-05, "loss": 0.04791871, "step": 22577 }, { "epoch": 45.156, "grad_norm": 1.9970992803573608, "learning_rate": 2e-05, "loss": 0.05857123, "step": 22578 }, { "epoch": 45.158, "grad_norm": 2.6338624954223633, "learning_rate": 2e-05, "loss": 0.06465561, "step": 22579 }, { "epoch": 45.16, "grad_norm": 3.545212507247925, "learning_rate": 2e-05, "loss": 0.0638639, "step": 22580 }, { "epoch": 45.162, "grad_norm": 1.2237744331359863, "learning_rate": 2e-05, "loss": 0.05294487, "step": 22581 }, { "epoch": 45.164, "grad_norm": 0.872916042804718, "learning_rate": 2e-05, "loss": 0.02183536, "step": 22582 }, { "epoch": 45.166, "grad_norm": 1.3746517896652222, "learning_rate": 2e-05, "loss": 0.05417369, "step": 22583 }, { "epoch": 45.168, "grad_norm": 1.1517207622528076, "learning_rate": 2e-05, "loss": 0.03980051, "step": 22584 }, { "epoch": 45.17, "grad_norm": 1.2823890447616577, "learning_rate": 2e-05, "loss": 0.05957523, "step": 22585 }, { "epoch": 45.172, "grad_norm": 1.4123610258102417, "learning_rate": 2e-05, "loss": 0.05167025, "step": 22586 }, { "epoch": 45.174, "grad_norm": 1.3554683923721313, "learning_rate": 2e-05, "loss": 0.0463458, "step": 22587 }, { "epoch": 45.176, "grad_norm": 1.6008424758911133, "learning_rate": 2e-05, "loss": 0.05228919, "step": 22588 }, { "epoch": 45.178, "grad_norm": 1.1501260995864868, "learning_rate": 2e-05, "loss": 0.04863862, "step": 22589 }, { "epoch": 45.18, "grad_norm": 0.9484877586364746, "learning_rate": 2e-05, "loss": 0.03127411, "step": 22590 }, { "epoch": 45.182, "grad_norm": 1.6070059537887573, "learning_rate": 2e-05, "loss": 0.05212674, "step": 22591 }, { "epoch": 45.184, "grad_norm": 1.3073357343673706, "learning_rate": 2e-05, "loss": 0.05999123, "step": 22592 }, { "epoch": 45.186, "grad_norm": 1.1207677125930786, "learning_rate": 2e-05, "loss": 0.05001438, "step": 22593 }, { "epoch": 45.188, "grad_norm": 1.6346867084503174, "learning_rate": 2e-05, "loss": 0.04875707, "step": 22594 }, { "epoch": 45.19, "grad_norm": 1.2604049444198608, "learning_rate": 2e-05, "loss": 0.0618588, "step": 22595 }, { "epoch": 45.192, "grad_norm": 1.1987062692642212, "learning_rate": 2e-05, "loss": 0.05671919, "step": 22596 }, { "epoch": 45.194, "grad_norm": 1.0913773775100708, "learning_rate": 2e-05, "loss": 0.05150122, "step": 22597 }, { "epoch": 45.196, "grad_norm": 1.0405243635177612, "learning_rate": 2e-05, "loss": 0.03671101, "step": 22598 }, { "epoch": 45.198, "grad_norm": 1.0459086894989014, "learning_rate": 2e-05, "loss": 0.04587247, "step": 22599 }, { "epoch": 45.2, "grad_norm": 0.8678653836250305, "learning_rate": 2e-05, "loss": 0.03158401, "step": 22600 }, { "epoch": 45.202, "grad_norm": 1.2741189002990723, "learning_rate": 2e-05, "loss": 0.05065588, "step": 22601 }, { "epoch": 45.204, "grad_norm": 2.527108907699585, "learning_rate": 2e-05, "loss": 0.05647211, "step": 22602 }, { "epoch": 45.206, "grad_norm": 1.1058838367462158, "learning_rate": 2e-05, "loss": 0.04199415, "step": 22603 }, { "epoch": 45.208, "grad_norm": 1.1132142543792725, "learning_rate": 2e-05, "loss": 0.06637686, "step": 22604 }, { "epoch": 45.21, "grad_norm": 1.6548913717269897, "learning_rate": 2e-05, "loss": 0.05278435, "step": 22605 }, { "epoch": 45.212, "grad_norm": 1.0226762294769287, "learning_rate": 2e-05, "loss": 0.03409335, "step": 22606 }, { "epoch": 45.214, "grad_norm": 1.163483738899231, "learning_rate": 2e-05, "loss": 0.0428452, "step": 22607 }, { "epoch": 45.216, "grad_norm": 0.9998294115066528, "learning_rate": 2e-05, "loss": 0.03900335, "step": 22608 }, { "epoch": 45.218, "grad_norm": 1.5138134956359863, "learning_rate": 2e-05, "loss": 0.05385974, "step": 22609 }, { "epoch": 45.22, "grad_norm": 1.0882453918457031, "learning_rate": 2e-05, "loss": 0.04675626, "step": 22610 }, { "epoch": 45.222, "grad_norm": 1.041988730430603, "learning_rate": 2e-05, "loss": 0.04659366, "step": 22611 }, { "epoch": 45.224, "grad_norm": 1.9339371919631958, "learning_rate": 2e-05, "loss": 0.05996863, "step": 22612 }, { "epoch": 45.226, "grad_norm": 1.7372865676879883, "learning_rate": 2e-05, "loss": 0.06137735, "step": 22613 }, { "epoch": 45.228, "grad_norm": 1.2295641899108887, "learning_rate": 2e-05, "loss": 0.05497001, "step": 22614 }, { "epoch": 45.23, "grad_norm": 1.282944679260254, "learning_rate": 2e-05, "loss": 0.04878019, "step": 22615 }, { "epoch": 45.232, "grad_norm": 1.0056757926940918, "learning_rate": 2e-05, "loss": 0.03496859, "step": 22616 }, { "epoch": 45.234, "grad_norm": 1.1216771602630615, "learning_rate": 2e-05, "loss": 0.04637222, "step": 22617 }, { "epoch": 45.236, "grad_norm": 1.1101162433624268, "learning_rate": 2e-05, "loss": 0.05418177, "step": 22618 }, { "epoch": 45.238, "grad_norm": 2.1140050888061523, "learning_rate": 2e-05, "loss": 0.05144568, "step": 22619 }, { "epoch": 45.24, "grad_norm": 1.0529059171676636, "learning_rate": 2e-05, "loss": 0.04527819, "step": 22620 }, { "epoch": 45.242, "grad_norm": 1.2318655252456665, "learning_rate": 2e-05, "loss": 0.07417886, "step": 22621 }, { "epoch": 45.244, "grad_norm": 1.1625219583511353, "learning_rate": 2e-05, "loss": 0.04924101, "step": 22622 }, { "epoch": 45.246, "grad_norm": 1.1424137353897095, "learning_rate": 2e-05, "loss": 0.0479248, "step": 22623 }, { "epoch": 45.248, "grad_norm": 0.9770327806472778, "learning_rate": 2e-05, "loss": 0.04924466, "step": 22624 }, { "epoch": 45.25, "grad_norm": 1.0025733709335327, "learning_rate": 2e-05, "loss": 0.04422404, "step": 22625 }, { "epoch": 45.252, "grad_norm": 1.084976315498352, "learning_rate": 2e-05, "loss": 0.04491837, "step": 22626 }, { "epoch": 45.254, "grad_norm": 1.000255823135376, "learning_rate": 2e-05, "loss": 0.04638529, "step": 22627 }, { "epoch": 45.256, "grad_norm": 1.7140803337097168, "learning_rate": 2e-05, "loss": 0.0516253, "step": 22628 }, { "epoch": 45.258, "grad_norm": 1.0502322912216187, "learning_rate": 2e-05, "loss": 0.03990895, "step": 22629 }, { "epoch": 45.26, "grad_norm": 1.12234628200531, "learning_rate": 2e-05, "loss": 0.04589079, "step": 22630 }, { "epoch": 45.262, "grad_norm": 1.0489299297332764, "learning_rate": 2e-05, "loss": 0.03381399, "step": 22631 }, { "epoch": 45.264, "grad_norm": 1.171677589416504, "learning_rate": 2e-05, "loss": 0.05054912, "step": 22632 }, { "epoch": 45.266, "grad_norm": 1.0880128145217896, "learning_rate": 2e-05, "loss": 0.03867623, "step": 22633 }, { "epoch": 45.268, "grad_norm": 2.91977596282959, "learning_rate": 2e-05, "loss": 0.05142038, "step": 22634 }, { "epoch": 45.27, "grad_norm": 1.1017811298370361, "learning_rate": 2e-05, "loss": 0.04421684, "step": 22635 }, { "epoch": 45.272, "grad_norm": 1.2263803482055664, "learning_rate": 2e-05, "loss": 0.0688946, "step": 22636 }, { "epoch": 45.274, "grad_norm": 1.1422224044799805, "learning_rate": 2e-05, "loss": 0.04059188, "step": 22637 }, { "epoch": 45.276, "grad_norm": 1.0902363061904907, "learning_rate": 2e-05, "loss": 0.04885675, "step": 22638 }, { "epoch": 45.278, "grad_norm": 1.2119808197021484, "learning_rate": 2e-05, "loss": 0.04337999, "step": 22639 }, { "epoch": 45.28, "grad_norm": 1.135478138923645, "learning_rate": 2e-05, "loss": 0.04769107, "step": 22640 }, { "epoch": 45.282, "grad_norm": 0.9367907643318176, "learning_rate": 2e-05, "loss": 0.02835327, "step": 22641 }, { "epoch": 45.284, "grad_norm": 1.128351092338562, "learning_rate": 2e-05, "loss": 0.05129791, "step": 22642 }, { "epoch": 45.286, "grad_norm": 1.2856783866882324, "learning_rate": 2e-05, "loss": 0.05416092, "step": 22643 }, { "epoch": 45.288, "grad_norm": 1.115014672279358, "learning_rate": 2e-05, "loss": 0.03043066, "step": 22644 }, { "epoch": 45.29, "grad_norm": 1.2709392309188843, "learning_rate": 2e-05, "loss": 0.04415888, "step": 22645 }, { "epoch": 45.292, "grad_norm": 1.6946849822998047, "learning_rate": 2e-05, "loss": 0.0498039, "step": 22646 }, { "epoch": 45.294, "grad_norm": 3.176492929458618, "learning_rate": 2e-05, "loss": 0.08013065, "step": 22647 }, { "epoch": 45.296, "grad_norm": 1.2074462175369263, "learning_rate": 2e-05, "loss": 0.05039094, "step": 22648 }, { "epoch": 45.298, "grad_norm": 1.2501564025878906, "learning_rate": 2e-05, "loss": 0.05088755, "step": 22649 }, { "epoch": 45.3, "grad_norm": 0.9809415340423584, "learning_rate": 2e-05, "loss": 0.038453, "step": 22650 }, { "epoch": 45.302, "grad_norm": 1.3970571756362915, "learning_rate": 2e-05, "loss": 0.04629774, "step": 22651 }, { "epoch": 45.304, "grad_norm": 1.4258559942245483, "learning_rate": 2e-05, "loss": 0.04505184, "step": 22652 }, { "epoch": 45.306, "grad_norm": 1.1975897550582886, "learning_rate": 2e-05, "loss": 0.04722192, "step": 22653 }, { "epoch": 45.308, "grad_norm": 1.2232824563980103, "learning_rate": 2e-05, "loss": 0.06708793, "step": 22654 }, { "epoch": 45.31, "grad_norm": 0.874752938747406, "learning_rate": 2e-05, "loss": 0.03515249, "step": 22655 }, { "epoch": 45.312, "grad_norm": 1.2584034204483032, "learning_rate": 2e-05, "loss": 0.04658163, "step": 22656 }, { "epoch": 45.314, "grad_norm": 1.2609401941299438, "learning_rate": 2e-05, "loss": 0.05220157, "step": 22657 }, { "epoch": 45.316, "grad_norm": 2.2510557174682617, "learning_rate": 2e-05, "loss": 0.04448275, "step": 22658 }, { "epoch": 45.318, "grad_norm": 1.55043625831604, "learning_rate": 2e-05, "loss": 0.05576836, "step": 22659 }, { "epoch": 45.32, "grad_norm": 1.1508514881134033, "learning_rate": 2e-05, "loss": 0.05758848, "step": 22660 }, { "epoch": 45.322, "grad_norm": 1.7243223190307617, "learning_rate": 2e-05, "loss": 0.0692929, "step": 22661 }, { "epoch": 45.324, "grad_norm": 1.220308780670166, "learning_rate": 2e-05, "loss": 0.04048155, "step": 22662 }, { "epoch": 45.326, "grad_norm": 1.149865984916687, "learning_rate": 2e-05, "loss": 0.04047617, "step": 22663 }, { "epoch": 45.328, "grad_norm": 1.3912262916564941, "learning_rate": 2e-05, "loss": 0.04581297, "step": 22664 }, { "epoch": 45.33, "grad_norm": 1.1557172536849976, "learning_rate": 2e-05, "loss": 0.04854126, "step": 22665 }, { "epoch": 45.332, "grad_norm": 1.5989816188812256, "learning_rate": 2e-05, "loss": 0.05772961, "step": 22666 }, { "epoch": 45.334, "grad_norm": 0.9497352838516235, "learning_rate": 2e-05, "loss": 0.03526705, "step": 22667 }, { "epoch": 45.336, "grad_norm": 1.0269160270690918, "learning_rate": 2e-05, "loss": 0.0354956, "step": 22668 }, { "epoch": 45.338, "grad_norm": 0.9239309430122375, "learning_rate": 2e-05, "loss": 0.03152385, "step": 22669 }, { "epoch": 45.34, "grad_norm": 1.0795482397079468, "learning_rate": 2e-05, "loss": 0.04576925, "step": 22670 }, { "epoch": 45.342, "grad_norm": 1.2527785301208496, "learning_rate": 2e-05, "loss": 0.0519852, "step": 22671 }, { "epoch": 45.344, "grad_norm": 1.2454344034194946, "learning_rate": 2e-05, "loss": 0.05516186, "step": 22672 }, { "epoch": 45.346, "grad_norm": 1.122444748878479, "learning_rate": 2e-05, "loss": 0.05303994, "step": 22673 }, { "epoch": 45.348, "grad_norm": 1.169447660446167, "learning_rate": 2e-05, "loss": 0.05172439, "step": 22674 }, { "epoch": 45.35, "grad_norm": 1.2407636642456055, "learning_rate": 2e-05, "loss": 0.05318564, "step": 22675 }, { "epoch": 45.352, "grad_norm": 1.0374010801315308, "learning_rate": 2e-05, "loss": 0.0399787, "step": 22676 }, { "epoch": 45.354, "grad_norm": 1.6039947271347046, "learning_rate": 2e-05, "loss": 0.04658148, "step": 22677 }, { "epoch": 45.356, "grad_norm": 2.6198055744171143, "learning_rate": 2e-05, "loss": 0.04925442, "step": 22678 }, { "epoch": 45.358, "grad_norm": 1.2547622919082642, "learning_rate": 2e-05, "loss": 0.05453364, "step": 22679 }, { "epoch": 45.36, "grad_norm": 1.2854013442993164, "learning_rate": 2e-05, "loss": 0.05297067, "step": 22680 }, { "epoch": 45.362, "grad_norm": 1.3244527578353882, "learning_rate": 2e-05, "loss": 0.0462161, "step": 22681 }, { "epoch": 45.364, "grad_norm": 1.1322252750396729, "learning_rate": 2e-05, "loss": 0.04703336, "step": 22682 }, { "epoch": 45.366, "grad_norm": 1.50275456905365, "learning_rate": 2e-05, "loss": 0.0487467, "step": 22683 }, { "epoch": 45.368, "grad_norm": 1.0968610048294067, "learning_rate": 2e-05, "loss": 0.03857952, "step": 22684 }, { "epoch": 45.37, "grad_norm": 1.4444911479949951, "learning_rate": 2e-05, "loss": 0.0504374, "step": 22685 }, { "epoch": 45.372, "grad_norm": 1.5214262008666992, "learning_rate": 2e-05, "loss": 0.05310171, "step": 22686 }, { "epoch": 45.374, "grad_norm": 1.0097070932388306, "learning_rate": 2e-05, "loss": 0.04260479, "step": 22687 }, { "epoch": 45.376, "grad_norm": 0.984809160232544, "learning_rate": 2e-05, "loss": 0.03620268, "step": 22688 }, { "epoch": 45.378, "grad_norm": 3.517108201980591, "learning_rate": 2e-05, "loss": 0.05183204, "step": 22689 }, { "epoch": 45.38, "grad_norm": 1.4225305318832397, "learning_rate": 2e-05, "loss": 0.05968772, "step": 22690 }, { "epoch": 45.382, "grad_norm": 1.4418306350708008, "learning_rate": 2e-05, "loss": 0.04406624, "step": 22691 }, { "epoch": 45.384, "grad_norm": 1.0155731439590454, "learning_rate": 2e-05, "loss": 0.04117178, "step": 22692 }, { "epoch": 45.386, "grad_norm": 1.1497960090637207, "learning_rate": 2e-05, "loss": 0.04193247, "step": 22693 }, { "epoch": 45.388, "grad_norm": 1.2186371088027954, "learning_rate": 2e-05, "loss": 0.05743311, "step": 22694 }, { "epoch": 45.39, "grad_norm": 1.2327183485031128, "learning_rate": 2e-05, "loss": 0.04604057, "step": 22695 }, { "epoch": 45.392, "grad_norm": 1.4449176788330078, "learning_rate": 2e-05, "loss": 0.06641807, "step": 22696 }, { "epoch": 45.394, "grad_norm": 1.2744368314743042, "learning_rate": 2e-05, "loss": 0.0488281, "step": 22697 }, { "epoch": 45.396, "grad_norm": 1.1756504774093628, "learning_rate": 2e-05, "loss": 0.04841977, "step": 22698 }, { "epoch": 45.398, "grad_norm": 1.5969294309616089, "learning_rate": 2e-05, "loss": 0.0381519, "step": 22699 }, { "epoch": 45.4, "grad_norm": 1.177388310432434, "learning_rate": 2e-05, "loss": 0.05469193, "step": 22700 }, { "epoch": 45.402, "grad_norm": 1.2466199398040771, "learning_rate": 2e-05, "loss": 0.04355872, "step": 22701 }, { "epoch": 45.404, "grad_norm": 1.1346672773361206, "learning_rate": 2e-05, "loss": 0.0505007, "step": 22702 }, { "epoch": 45.406, "grad_norm": 1.0802158117294312, "learning_rate": 2e-05, "loss": 0.03773396, "step": 22703 }, { "epoch": 45.408, "grad_norm": 1.0188031196594238, "learning_rate": 2e-05, "loss": 0.03674341, "step": 22704 }, { "epoch": 45.41, "grad_norm": 0.9986094236373901, "learning_rate": 2e-05, "loss": 0.04618426, "step": 22705 }, { "epoch": 45.412, "grad_norm": 1.0472339391708374, "learning_rate": 2e-05, "loss": 0.04356561, "step": 22706 }, { "epoch": 45.414, "grad_norm": 1.2004998922348022, "learning_rate": 2e-05, "loss": 0.05084341, "step": 22707 }, { "epoch": 45.416, "grad_norm": 1.2121773958206177, "learning_rate": 2e-05, "loss": 0.04511089, "step": 22708 }, { "epoch": 45.418, "grad_norm": 1.7242828607559204, "learning_rate": 2e-05, "loss": 0.0549062, "step": 22709 }, { "epoch": 45.42, "grad_norm": 1.154462218284607, "learning_rate": 2e-05, "loss": 0.04893161, "step": 22710 }, { "epoch": 45.422, "grad_norm": 1.1248419284820557, "learning_rate": 2e-05, "loss": 0.04025434, "step": 22711 }, { "epoch": 45.424, "grad_norm": 1.2144428491592407, "learning_rate": 2e-05, "loss": 0.04915642, "step": 22712 }, { "epoch": 45.426, "grad_norm": 1.1011919975280762, "learning_rate": 2e-05, "loss": 0.04912124, "step": 22713 }, { "epoch": 45.428, "grad_norm": 1.134437084197998, "learning_rate": 2e-05, "loss": 0.0618168, "step": 22714 }, { "epoch": 45.43, "grad_norm": 1.2620517015457153, "learning_rate": 2e-05, "loss": 0.06262559, "step": 22715 }, { "epoch": 45.432, "grad_norm": 1.0983620882034302, "learning_rate": 2e-05, "loss": 0.04276703, "step": 22716 }, { "epoch": 45.434, "grad_norm": 1.1140186786651611, "learning_rate": 2e-05, "loss": 0.05332418, "step": 22717 }, { "epoch": 45.436, "grad_norm": 1.3023191690444946, "learning_rate": 2e-05, "loss": 0.06473353, "step": 22718 }, { "epoch": 45.438, "grad_norm": 0.8834937810897827, "learning_rate": 2e-05, "loss": 0.04048298, "step": 22719 }, { "epoch": 45.44, "grad_norm": 1.276069164276123, "learning_rate": 2e-05, "loss": 0.0446801, "step": 22720 }, { "epoch": 45.442, "grad_norm": 1.2004058361053467, "learning_rate": 2e-05, "loss": 0.05174657, "step": 22721 }, { "epoch": 45.444, "grad_norm": 1.3467042446136475, "learning_rate": 2e-05, "loss": 0.06729078, "step": 22722 }, { "epoch": 45.446, "grad_norm": 1.200899600982666, "learning_rate": 2e-05, "loss": 0.04949638, "step": 22723 }, { "epoch": 45.448, "grad_norm": 1.2946702241897583, "learning_rate": 2e-05, "loss": 0.05751118, "step": 22724 }, { "epoch": 45.45, "grad_norm": 1.094891905784607, "learning_rate": 2e-05, "loss": 0.04523872, "step": 22725 }, { "epoch": 45.452, "grad_norm": 1.049796462059021, "learning_rate": 2e-05, "loss": 0.05196452, "step": 22726 }, { "epoch": 45.454, "grad_norm": 1.2776124477386475, "learning_rate": 2e-05, "loss": 0.04980614, "step": 22727 }, { "epoch": 45.456, "grad_norm": 1.2622987031936646, "learning_rate": 2e-05, "loss": 0.05976017, "step": 22728 }, { "epoch": 45.458, "grad_norm": 1.2927825450897217, "learning_rate": 2e-05, "loss": 0.05953868, "step": 22729 }, { "epoch": 45.46, "grad_norm": 1.0884168148040771, "learning_rate": 2e-05, "loss": 0.03435563, "step": 22730 }, { "epoch": 45.462, "grad_norm": 1.2159061431884766, "learning_rate": 2e-05, "loss": 0.05816945, "step": 22731 }, { "epoch": 45.464, "grad_norm": 1.9903509616851807, "learning_rate": 2e-05, "loss": 0.05068534, "step": 22732 }, { "epoch": 45.466, "grad_norm": 1.7449239492416382, "learning_rate": 2e-05, "loss": 0.05164981, "step": 22733 }, { "epoch": 45.468, "grad_norm": 0.8933517336845398, "learning_rate": 2e-05, "loss": 0.03138543, "step": 22734 }, { "epoch": 45.47, "grad_norm": 1.1336041688919067, "learning_rate": 2e-05, "loss": 0.04539839, "step": 22735 }, { "epoch": 45.472, "grad_norm": 0.8588364720344543, "learning_rate": 2e-05, "loss": 0.03250713, "step": 22736 }, { "epoch": 45.474, "grad_norm": 0.9573074579238892, "learning_rate": 2e-05, "loss": 0.03433614, "step": 22737 }, { "epoch": 45.476, "grad_norm": 3.1477766036987305, "learning_rate": 2e-05, "loss": 0.07124802, "step": 22738 }, { "epoch": 45.478, "grad_norm": 1.1136095523834229, "learning_rate": 2e-05, "loss": 0.04518589, "step": 22739 }, { "epoch": 45.48, "grad_norm": 1.7395950555801392, "learning_rate": 2e-05, "loss": 0.03977414, "step": 22740 }, { "epoch": 45.482, "grad_norm": 1.2946288585662842, "learning_rate": 2e-05, "loss": 0.04495868, "step": 22741 }, { "epoch": 45.484, "grad_norm": 1.1849554777145386, "learning_rate": 2e-05, "loss": 0.04799411, "step": 22742 }, { "epoch": 45.486, "grad_norm": 1.13692307472229, "learning_rate": 2e-05, "loss": 0.04666179, "step": 22743 }, { "epoch": 45.488, "grad_norm": 1.1994067430496216, "learning_rate": 2e-05, "loss": 0.04230376, "step": 22744 }, { "epoch": 45.49, "grad_norm": 1.5030912160873413, "learning_rate": 2e-05, "loss": 0.05279317, "step": 22745 }, { "epoch": 45.492, "grad_norm": 0.8699735999107361, "learning_rate": 2e-05, "loss": 0.02855832, "step": 22746 }, { "epoch": 45.494, "grad_norm": 1.2766979932785034, "learning_rate": 2e-05, "loss": 0.04953785, "step": 22747 }, { "epoch": 45.496, "grad_norm": 1.0743104219436646, "learning_rate": 2e-05, "loss": 0.0503507, "step": 22748 }, { "epoch": 45.498, "grad_norm": 1.15504789352417, "learning_rate": 2e-05, "loss": 0.04353083, "step": 22749 }, { "epoch": 45.5, "grad_norm": 1.5795592069625854, "learning_rate": 2e-05, "loss": 0.05220584, "step": 22750 }, { "epoch": 45.502, "grad_norm": 1.1085896492004395, "learning_rate": 2e-05, "loss": 0.05066767, "step": 22751 }, { "epoch": 45.504, "grad_norm": 1.2716381549835205, "learning_rate": 2e-05, "loss": 0.06609177, "step": 22752 }, { "epoch": 45.506, "grad_norm": 1.9817065000534058, "learning_rate": 2e-05, "loss": 0.06183188, "step": 22753 }, { "epoch": 45.508, "grad_norm": 1.3558555841445923, "learning_rate": 2e-05, "loss": 0.05611308, "step": 22754 }, { "epoch": 45.51, "grad_norm": 1.2409502267837524, "learning_rate": 2e-05, "loss": 0.03493334, "step": 22755 }, { "epoch": 45.512, "grad_norm": 1.2743219137191772, "learning_rate": 2e-05, "loss": 0.04769837, "step": 22756 }, { "epoch": 45.514, "grad_norm": 0.9491370320320129, "learning_rate": 2e-05, "loss": 0.03029821, "step": 22757 }, { "epoch": 45.516, "grad_norm": 4.352717876434326, "learning_rate": 2e-05, "loss": 0.03928834, "step": 22758 }, { "epoch": 45.518, "grad_norm": 1.3825864791870117, "learning_rate": 2e-05, "loss": 0.05772555, "step": 22759 }, { "epoch": 45.52, "grad_norm": 1.626591682434082, "learning_rate": 2e-05, "loss": 0.05705488, "step": 22760 }, { "epoch": 45.522, "grad_norm": 1.2819281816482544, "learning_rate": 2e-05, "loss": 0.06068337, "step": 22761 }, { "epoch": 45.524, "grad_norm": 2.8456923961639404, "learning_rate": 2e-05, "loss": 0.05577563, "step": 22762 }, { "epoch": 45.526, "grad_norm": 1.33793044090271, "learning_rate": 2e-05, "loss": 0.05010552, "step": 22763 }, { "epoch": 45.528, "grad_norm": 1.059351921081543, "learning_rate": 2e-05, "loss": 0.04426561, "step": 22764 }, { "epoch": 45.53, "grad_norm": 1.1769965887069702, "learning_rate": 2e-05, "loss": 0.03966656, "step": 22765 }, { "epoch": 45.532, "grad_norm": 1.2695751190185547, "learning_rate": 2e-05, "loss": 0.04549221, "step": 22766 }, { "epoch": 45.534, "grad_norm": 1.2788498401641846, "learning_rate": 2e-05, "loss": 0.05216346, "step": 22767 }, { "epoch": 45.536, "grad_norm": 0.9913758039474487, "learning_rate": 2e-05, "loss": 0.03447246, "step": 22768 }, { "epoch": 45.538, "grad_norm": 1.0714240074157715, "learning_rate": 2e-05, "loss": 0.04273647, "step": 22769 }, { "epoch": 45.54, "grad_norm": 1.2439073324203491, "learning_rate": 2e-05, "loss": 0.04950628, "step": 22770 }, { "epoch": 45.542, "grad_norm": 1.4593321084976196, "learning_rate": 2e-05, "loss": 0.03826152, "step": 22771 }, { "epoch": 45.544, "grad_norm": 1.562803864479065, "learning_rate": 2e-05, "loss": 0.05784161, "step": 22772 }, { "epoch": 45.546, "grad_norm": 0.9718835949897766, "learning_rate": 2e-05, "loss": 0.04087242, "step": 22773 }, { "epoch": 45.548, "grad_norm": 1.1946215629577637, "learning_rate": 2e-05, "loss": 0.03955045, "step": 22774 }, { "epoch": 45.55, "grad_norm": 5.945659160614014, "learning_rate": 2e-05, "loss": 0.05163043, "step": 22775 }, { "epoch": 45.552, "grad_norm": 0.9503419995307922, "learning_rate": 2e-05, "loss": 0.03446332, "step": 22776 }, { "epoch": 45.554, "grad_norm": 2.860668897628784, "learning_rate": 2e-05, "loss": 0.06032272, "step": 22777 }, { "epoch": 45.556, "grad_norm": 1.0413198471069336, "learning_rate": 2e-05, "loss": 0.03605777, "step": 22778 }, { "epoch": 45.558, "grad_norm": 1.3972159624099731, "learning_rate": 2e-05, "loss": 0.0614309, "step": 22779 }, { "epoch": 45.56, "grad_norm": 1.1884814500808716, "learning_rate": 2e-05, "loss": 0.04031287, "step": 22780 }, { "epoch": 45.562, "grad_norm": 1.1417438983917236, "learning_rate": 2e-05, "loss": 0.04732093, "step": 22781 }, { "epoch": 45.564, "grad_norm": 1.4334638118743896, "learning_rate": 2e-05, "loss": 0.05984236, "step": 22782 }, { "epoch": 45.566, "grad_norm": 2.67592453956604, "learning_rate": 2e-05, "loss": 0.07209148, "step": 22783 }, { "epoch": 45.568, "grad_norm": 1.1358227729797363, "learning_rate": 2e-05, "loss": 0.04441652, "step": 22784 }, { "epoch": 45.57, "grad_norm": 1.161139726638794, "learning_rate": 2e-05, "loss": 0.05294163, "step": 22785 }, { "epoch": 45.572, "grad_norm": 4.65118932723999, "learning_rate": 2e-05, "loss": 0.03936018, "step": 22786 }, { "epoch": 45.574, "grad_norm": 1.0236941576004028, "learning_rate": 2e-05, "loss": 0.05270987, "step": 22787 }, { "epoch": 45.576, "grad_norm": 1.3920187950134277, "learning_rate": 2e-05, "loss": 0.05220519, "step": 22788 }, { "epoch": 45.578, "grad_norm": 1.4506982564926147, "learning_rate": 2e-05, "loss": 0.05476096, "step": 22789 }, { "epoch": 45.58, "grad_norm": 1.6124314069747925, "learning_rate": 2e-05, "loss": 0.04182716, "step": 22790 }, { "epoch": 45.582, "grad_norm": 1.353986144065857, "learning_rate": 2e-05, "loss": 0.04613897, "step": 22791 }, { "epoch": 45.584, "grad_norm": 1.1616084575653076, "learning_rate": 2e-05, "loss": 0.06366609, "step": 22792 }, { "epoch": 45.586, "grad_norm": 1.0128250122070312, "learning_rate": 2e-05, "loss": 0.03711364, "step": 22793 }, { "epoch": 45.588, "grad_norm": 1.2062525749206543, "learning_rate": 2e-05, "loss": 0.04811207, "step": 22794 }, { "epoch": 45.59, "grad_norm": 1.031899094581604, "learning_rate": 2e-05, "loss": 0.03934745, "step": 22795 }, { "epoch": 45.592, "grad_norm": 1.0343753099441528, "learning_rate": 2e-05, "loss": 0.04823088, "step": 22796 }, { "epoch": 45.594, "grad_norm": 3.7844700813293457, "learning_rate": 2e-05, "loss": 0.05894583, "step": 22797 }, { "epoch": 45.596, "grad_norm": 1.1267212629318237, "learning_rate": 2e-05, "loss": 0.03568856, "step": 22798 }, { "epoch": 45.598, "grad_norm": 1.3324168920516968, "learning_rate": 2e-05, "loss": 0.05654506, "step": 22799 }, { "epoch": 45.6, "grad_norm": 1.0916633605957031, "learning_rate": 2e-05, "loss": 0.04748604, "step": 22800 }, { "epoch": 45.602, "grad_norm": 1.7681325674057007, "learning_rate": 2e-05, "loss": 0.04749766, "step": 22801 }, { "epoch": 45.604, "grad_norm": 1.385416030883789, "learning_rate": 2e-05, "loss": 0.05868257, "step": 22802 }, { "epoch": 45.606, "grad_norm": 2.559743642807007, "learning_rate": 2e-05, "loss": 0.06657992, "step": 22803 }, { "epoch": 45.608, "grad_norm": 1.5238560438156128, "learning_rate": 2e-05, "loss": 0.05722816, "step": 22804 }, { "epoch": 45.61, "grad_norm": 1.215009331703186, "learning_rate": 2e-05, "loss": 0.05467505, "step": 22805 }, { "epoch": 45.612, "grad_norm": 1.3188890218734741, "learning_rate": 2e-05, "loss": 0.07242645, "step": 22806 }, { "epoch": 45.614, "grad_norm": 1.3299347162246704, "learning_rate": 2e-05, "loss": 0.04678376, "step": 22807 }, { "epoch": 45.616, "grad_norm": 1.213125467300415, "learning_rate": 2e-05, "loss": 0.05465173, "step": 22808 }, { "epoch": 45.618, "grad_norm": 0.9832747578620911, "learning_rate": 2e-05, "loss": 0.03593368, "step": 22809 }, { "epoch": 45.62, "grad_norm": 1.4957003593444824, "learning_rate": 2e-05, "loss": 0.04334493, "step": 22810 }, { "epoch": 45.622, "grad_norm": 1.1476776599884033, "learning_rate": 2e-05, "loss": 0.05104943, "step": 22811 }, { "epoch": 45.624, "grad_norm": 1.1928304433822632, "learning_rate": 2e-05, "loss": 0.05153049, "step": 22812 }, { "epoch": 45.626, "grad_norm": 3.3560004234313965, "learning_rate": 2e-05, "loss": 0.04769015, "step": 22813 }, { "epoch": 45.628, "grad_norm": 1.2147530317306519, "learning_rate": 2e-05, "loss": 0.04993775, "step": 22814 }, { "epoch": 45.63, "grad_norm": 1.3720028400421143, "learning_rate": 2e-05, "loss": 0.04633579, "step": 22815 }, { "epoch": 45.632, "grad_norm": 1.1289563179016113, "learning_rate": 2e-05, "loss": 0.0430528, "step": 22816 }, { "epoch": 45.634, "grad_norm": 1.3510793447494507, "learning_rate": 2e-05, "loss": 0.04878941, "step": 22817 }, { "epoch": 45.636, "grad_norm": 1.4363319873809814, "learning_rate": 2e-05, "loss": 0.05465399, "step": 22818 }, { "epoch": 45.638, "grad_norm": 2.460465669631958, "learning_rate": 2e-05, "loss": 0.05995677, "step": 22819 }, { "epoch": 45.64, "grad_norm": 1.1262542009353638, "learning_rate": 2e-05, "loss": 0.03892493, "step": 22820 }, { "epoch": 45.642, "grad_norm": 1.2960659265518188, "learning_rate": 2e-05, "loss": 0.0484963, "step": 22821 }, { "epoch": 45.644, "grad_norm": 1.1812695264816284, "learning_rate": 2e-05, "loss": 0.05082157, "step": 22822 }, { "epoch": 45.646, "grad_norm": 1.172400712966919, "learning_rate": 2e-05, "loss": 0.04413823, "step": 22823 }, { "epoch": 45.648, "grad_norm": 1.1976510286331177, "learning_rate": 2e-05, "loss": 0.04225794, "step": 22824 }, { "epoch": 45.65, "grad_norm": 1.4453816413879395, "learning_rate": 2e-05, "loss": 0.05087319, "step": 22825 }, { "epoch": 45.652, "grad_norm": 0.993677020072937, "learning_rate": 2e-05, "loss": 0.02660798, "step": 22826 }, { "epoch": 45.654, "grad_norm": 1.0433624982833862, "learning_rate": 2e-05, "loss": 0.04253418, "step": 22827 }, { "epoch": 45.656, "grad_norm": 1.026785969734192, "learning_rate": 2e-05, "loss": 0.04078073, "step": 22828 }, { "epoch": 45.658, "grad_norm": 1.125288963317871, "learning_rate": 2e-05, "loss": 0.04690692, "step": 22829 }, { "epoch": 45.66, "grad_norm": 1.1184576749801636, "learning_rate": 2e-05, "loss": 0.02984325, "step": 22830 }, { "epoch": 45.662, "grad_norm": 1.30687415599823, "learning_rate": 2e-05, "loss": 0.05822021, "step": 22831 }, { "epoch": 45.664, "grad_norm": 1.2212743759155273, "learning_rate": 2e-05, "loss": 0.04939518, "step": 22832 }, { "epoch": 45.666, "grad_norm": 1.186759352684021, "learning_rate": 2e-05, "loss": 0.05301164, "step": 22833 }, { "epoch": 45.668, "grad_norm": 1.2905839681625366, "learning_rate": 2e-05, "loss": 0.04207082, "step": 22834 }, { "epoch": 45.67, "grad_norm": 1.1900533437728882, "learning_rate": 2e-05, "loss": 0.05991127, "step": 22835 }, { "epoch": 45.672, "grad_norm": 2.1371090412139893, "learning_rate": 2e-05, "loss": 0.05556452, "step": 22836 }, { "epoch": 45.674, "grad_norm": 1.6555566787719727, "learning_rate": 2e-05, "loss": 0.0666324, "step": 22837 }, { "epoch": 45.676, "grad_norm": 1.2142423391342163, "learning_rate": 2e-05, "loss": 0.04500743, "step": 22838 }, { "epoch": 45.678, "grad_norm": 1.204254150390625, "learning_rate": 2e-05, "loss": 0.05000075, "step": 22839 }, { "epoch": 45.68, "grad_norm": 1.3275655508041382, "learning_rate": 2e-05, "loss": 0.05476985, "step": 22840 }, { "epoch": 45.682, "grad_norm": 1.0724529027938843, "learning_rate": 2e-05, "loss": 0.03965185, "step": 22841 }, { "epoch": 45.684, "grad_norm": 1.363209843635559, "learning_rate": 2e-05, "loss": 0.0393138, "step": 22842 }, { "epoch": 45.686, "grad_norm": 1.3756287097930908, "learning_rate": 2e-05, "loss": 0.0670438, "step": 22843 }, { "epoch": 45.688, "grad_norm": 1.252241849899292, "learning_rate": 2e-05, "loss": 0.05263539, "step": 22844 }, { "epoch": 45.69, "grad_norm": 1.1118369102478027, "learning_rate": 2e-05, "loss": 0.05109958, "step": 22845 }, { "epoch": 45.692, "grad_norm": 1.2781487703323364, "learning_rate": 2e-05, "loss": 0.05006749, "step": 22846 }, { "epoch": 45.694, "grad_norm": 1.309794306755066, "learning_rate": 2e-05, "loss": 0.03993253, "step": 22847 }, { "epoch": 45.696, "grad_norm": 1.3862566947937012, "learning_rate": 2e-05, "loss": 0.03329, "step": 22848 }, { "epoch": 45.698, "grad_norm": 1.0639959573745728, "learning_rate": 2e-05, "loss": 0.03904609, "step": 22849 }, { "epoch": 45.7, "grad_norm": 1.0626970529556274, "learning_rate": 2e-05, "loss": 0.03999314, "step": 22850 }, { "epoch": 45.702, "grad_norm": 1.2711509466171265, "learning_rate": 2e-05, "loss": 0.0375726, "step": 22851 }, { "epoch": 45.704, "grad_norm": 0.9790001511573792, "learning_rate": 2e-05, "loss": 0.03540372, "step": 22852 }, { "epoch": 45.706, "grad_norm": 0.975598931312561, "learning_rate": 2e-05, "loss": 0.03365985, "step": 22853 }, { "epoch": 45.708, "grad_norm": 1.0220794677734375, "learning_rate": 2e-05, "loss": 0.04530801, "step": 22854 }, { "epoch": 45.71, "grad_norm": 1.2147080898284912, "learning_rate": 2e-05, "loss": 0.06349006, "step": 22855 }, { "epoch": 45.712, "grad_norm": 1.1468337774276733, "learning_rate": 2e-05, "loss": 0.04973483, "step": 22856 }, { "epoch": 45.714, "grad_norm": 1.4470347166061401, "learning_rate": 2e-05, "loss": 0.04426311, "step": 22857 }, { "epoch": 45.716, "grad_norm": 1.1811226606369019, "learning_rate": 2e-05, "loss": 0.04253963, "step": 22858 }, { "epoch": 45.718, "grad_norm": 1.0529688596725464, "learning_rate": 2e-05, "loss": 0.04702899, "step": 22859 }, { "epoch": 45.72, "grad_norm": 1.1447519063949585, "learning_rate": 2e-05, "loss": 0.06699675, "step": 22860 }, { "epoch": 45.722, "grad_norm": 1.0626235008239746, "learning_rate": 2e-05, "loss": 0.04488755, "step": 22861 }, { "epoch": 45.724, "grad_norm": 1.3267689943313599, "learning_rate": 2e-05, "loss": 0.05437113, "step": 22862 }, { "epoch": 45.726, "grad_norm": 1.0401638746261597, "learning_rate": 2e-05, "loss": 0.03539321, "step": 22863 }, { "epoch": 45.728, "grad_norm": 1.124693512916565, "learning_rate": 2e-05, "loss": 0.03614509, "step": 22864 }, { "epoch": 45.73, "grad_norm": 1.4313117265701294, "learning_rate": 2e-05, "loss": 0.04222115, "step": 22865 }, { "epoch": 45.732, "grad_norm": 2.3040075302124023, "learning_rate": 2e-05, "loss": 0.03782343, "step": 22866 }, { "epoch": 45.734, "grad_norm": 1.12531316280365, "learning_rate": 2e-05, "loss": 0.05212967, "step": 22867 }, { "epoch": 45.736, "grad_norm": 1.1863642930984497, "learning_rate": 2e-05, "loss": 0.04659764, "step": 22868 }, { "epoch": 45.738, "grad_norm": 3.5154294967651367, "learning_rate": 2e-05, "loss": 0.0452398, "step": 22869 }, { "epoch": 45.74, "grad_norm": 1.2669914960861206, "learning_rate": 2e-05, "loss": 0.04121526, "step": 22870 }, { "epoch": 45.742, "grad_norm": 1.5299526453018188, "learning_rate": 2e-05, "loss": 0.06856737, "step": 22871 }, { "epoch": 45.744, "grad_norm": 1.943005919456482, "learning_rate": 2e-05, "loss": 0.03736391, "step": 22872 }, { "epoch": 45.746, "grad_norm": 1.5813325643539429, "learning_rate": 2e-05, "loss": 0.05658036, "step": 22873 }, { "epoch": 45.748, "grad_norm": 1.1869730949401855, "learning_rate": 2e-05, "loss": 0.04118659, "step": 22874 }, { "epoch": 45.75, "grad_norm": 1.1262238025665283, "learning_rate": 2e-05, "loss": 0.05163476, "step": 22875 }, { "epoch": 45.752, "grad_norm": 1.8035818338394165, "learning_rate": 2e-05, "loss": 0.03897575, "step": 22876 }, { "epoch": 45.754, "grad_norm": 1.2717819213867188, "learning_rate": 2e-05, "loss": 0.06012269, "step": 22877 }, { "epoch": 45.756, "grad_norm": 1.081311583518982, "learning_rate": 2e-05, "loss": 0.05380928, "step": 22878 }, { "epoch": 45.758, "grad_norm": 1.1609777212142944, "learning_rate": 2e-05, "loss": 0.04177097, "step": 22879 }, { "epoch": 45.76, "grad_norm": 2.053823471069336, "learning_rate": 2e-05, "loss": 0.04388625, "step": 22880 }, { "epoch": 45.762, "grad_norm": 1.1730915307998657, "learning_rate": 2e-05, "loss": 0.03498461, "step": 22881 }, { "epoch": 45.764, "grad_norm": 1.1314507722854614, "learning_rate": 2e-05, "loss": 0.04693135, "step": 22882 }, { "epoch": 45.766, "grad_norm": 1.214177131652832, "learning_rate": 2e-05, "loss": 0.05761575, "step": 22883 }, { "epoch": 45.768, "grad_norm": 1.2264306545257568, "learning_rate": 2e-05, "loss": 0.05583285, "step": 22884 }, { "epoch": 45.77, "grad_norm": 1.1142363548278809, "learning_rate": 2e-05, "loss": 0.03938077, "step": 22885 }, { "epoch": 45.772, "grad_norm": 1.2358603477478027, "learning_rate": 2e-05, "loss": 0.05267806, "step": 22886 }, { "epoch": 45.774, "grad_norm": 2.0392813682556152, "learning_rate": 2e-05, "loss": 0.05020905, "step": 22887 }, { "epoch": 45.776, "grad_norm": 1.9601068496704102, "learning_rate": 2e-05, "loss": 0.06155182, "step": 22888 }, { "epoch": 45.778, "grad_norm": 1.2073707580566406, "learning_rate": 2e-05, "loss": 0.04262544, "step": 22889 }, { "epoch": 45.78, "grad_norm": 1.61870276927948, "learning_rate": 2e-05, "loss": 0.04001756, "step": 22890 }, { "epoch": 45.782, "grad_norm": 1.62678062915802, "learning_rate": 2e-05, "loss": 0.04077552, "step": 22891 }, { "epoch": 45.784, "grad_norm": 1.0999619960784912, "learning_rate": 2e-05, "loss": 0.0418282, "step": 22892 }, { "epoch": 45.786, "grad_norm": 3.3379061222076416, "learning_rate": 2e-05, "loss": 0.06497736, "step": 22893 }, { "epoch": 45.788, "grad_norm": 1.6847137212753296, "learning_rate": 2e-05, "loss": 0.05242064, "step": 22894 }, { "epoch": 45.79, "grad_norm": 1.1532042026519775, "learning_rate": 2e-05, "loss": 0.0519854, "step": 22895 }, { "epoch": 45.792, "grad_norm": 1.1070436239242554, "learning_rate": 2e-05, "loss": 0.04739845, "step": 22896 }, { "epoch": 45.794, "grad_norm": 1.0771516561508179, "learning_rate": 2e-05, "loss": 0.04649124, "step": 22897 }, { "epoch": 45.796, "grad_norm": 1.0672521591186523, "learning_rate": 2e-05, "loss": 0.04606741, "step": 22898 }, { "epoch": 45.798, "grad_norm": 1.7498153448104858, "learning_rate": 2e-05, "loss": 0.0525641, "step": 22899 }, { "epoch": 45.8, "grad_norm": 3.885802984237671, "learning_rate": 2e-05, "loss": 0.05591378, "step": 22900 }, { "epoch": 45.802, "grad_norm": 0.9939262866973877, "learning_rate": 2e-05, "loss": 0.0356969, "step": 22901 }, { "epoch": 45.804, "grad_norm": 1.0101487636566162, "learning_rate": 2e-05, "loss": 0.03555669, "step": 22902 }, { "epoch": 45.806, "grad_norm": 1.3214828968048096, "learning_rate": 2e-05, "loss": 0.04506984, "step": 22903 }, { "epoch": 45.808, "grad_norm": 1.2031276226043701, "learning_rate": 2e-05, "loss": 0.04604453, "step": 22904 }, { "epoch": 45.81, "grad_norm": 0.993742048740387, "learning_rate": 2e-05, "loss": 0.04115344, "step": 22905 }, { "epoch": 45.812, "grad_norm": 1.3306998014450073, "learning_rate": 2e-05, "loss": 0.05285704, "step": 22906 }, { "epoch": 45.814, "grad_norm": 1.5680806636810303, "learning_rate": 2e-05, "loss": 0.06785664, "step": 22907 }, { "epoch": 45.816, "grad_norm": 1.360222339630127, "learning_rate": 2e-05, "loss": 0.05301679, "step": 22908 }, { "epoch": 45.818, "grad_norm": 1.7606545686721802, "learning_rate": 2e-05, "loss": 0.05103129, "step": 22909 }, { "epoch": 45.82, "grad_norm": 0.9953403472900391, "learning_rate": 2e-05, "loss": 0.03279923, "step": 22910 }, { "epoch": 45.822, "grad_norm": 2.362177610397339, "learning_rate": 2e-05, "loss": 0.04696232, "step": 22911 }, { "epoch": 45.824, "grad_norm": 1.3651307821273804, "learning_rate": 2e-05, "loss": 0.05074629, "step": 22912 }, { "epoch": 45.826, "grad_norm": 1.4231796264648438, "learning_rate": 2e-05, "loss": 0.04402232, "step": 22913 }, { "epoch": 45.828, "grad_norm": 1.3840693235397339, "learning_rate": 2e-05, "loss": 0.05228634, "step": 22914 }, { "epoch": 45.83, "grad_norm": 1.2382279634475708, "learning_rate": 2e-05, "loss": 0.03767666, "step": 22915 }, { "epoch": 45.832, "grad_norm": 1.1215736865997314, "learning_rate": 2e-05, "loss": 0.03750758, "step": 22916 }, { "epoch": 45.834, "grad_norm": 1.5836503505706787, "learning_rate": 2e-05, "loss": 0.05021936, "step": 22917 }, { "epoch": 45.836, "grad_norm": 1.175191879272461, "learning_rate": 2e-05, "loss": 0.05768834, "step": 22918 }, { "epoch": 45.838, "grad_norm": 2.3862361907958984, "learning_rate": 2e-05, "loss": 0.06385323, "step": 22919 }, { "epoch": 45.84, "grad_norm": 1.1213129758834839, "learning_rate": 2e-05, "loss": 0.0474508, "step": 22920 }, { "epoch": 45.842, "grad_norm": 1.1670286655426025, "learning_rate": 2e-05, "loss": 0.05094395, "step": 22921 }, { "epoch": 45.844, "grad_norm": 1.0269542932510376, "learning_rate": 2e-05, "loss": 0.03310216, "step": 22922 }, { "epoch": 45.846, "grad_norm": 1.220439076423645, "learning_rate": 2e-05, "loss": 0.05242105, "step": 22923 }, { "epoch": 45.848, "grad_norm": 1.1493821144104004, "learning_rate": 2e-05, "loss": 0.05762265, "step": 22924 }, { "epoch": 45.85, "grad_norm": 1.2592790126800537, "learning_rate": 2e-05, "loss": 0.05583587, "step": 22925 }, { "epoch": 45.852, "grad_norm": 1.1037945747375488, "learning_rate": 2e-05, "loss": 0.05163608, "step": 22926 }, { "epoch": 45.854, "grad_norm": 1.039101243019104, "learning_rate": 2e-05, "loss": 0.04163966, "step": 22927 }, { "epoch": 45.856, "grad_norm": 1.087335228919983, "learning_rate": 2e-05, "loss": 0.04357848, "step": 22928 }, { "epoch": 45.858, "grad_norm": 1.1296930313110352, "learning_rate": 2e-05, "loss": 0.04551987, "step": 22929 }, { "epoch": 45.86, "grad_norm": 1.1426345109939575, "learning_rate": 2e-05, "loss": 0.05805442, "step": 22930 }, { "epoch": 45.862, "grad_norm": 2.9095516204833984, "learning_rate": 2e-05, "loss": 0.06481321, "step": 22931 }, { "epoch": 45.864, "grad_norm": 1.4706426858901978, "learning_rate": 2e-05, "loss": 0.06707012, "step": 22932 }, { "epoch": 45.866, "grad_norm": 1.0578680038452148, "learning_rate": 2e-05, "loss": 0.03972709, "step": 22933 }, { "epoch": 45.868, "grad_norm": 1.6905008554458618, "learning_rate": 2e-05, "loss": 0.03994934, "step": 22934 }, { "epoch": 45.87, "grad_norm": 1.2955090999603271, "learning_rate": 2e-05, "loss": 0.06763369, "step": 22935 }, { "epoch": 45.872, "grad_norm": 1.0367980003356934, "learning_rate": 2e-05, "loss": 0.03746669, "step": 22936 }, { "epoch": 45.874, "grad_norm": 1.5050883293151855, "learning_rate": 2e-05, "loss": 0.06505072, "step": 22937 }, { "epoch": 45.876, "grad_norm": 1.0128837823867798, "learning_rate": 2e-05, "loss": 0.03502842, "step": 22938 }, { "epoch": 45.878, "grad_norm": 3.16584849357605, "learning_rate": 2e-05, "loss": 0.05251692, "step": 22939 }, { "epoch": 45.88, "grad_norm": 1.3227653503417969, "learning_rate": 2e-05, "loss": 0.02910842, "step": 22940 }, { "epoch": 45.882, "grad_norm": 1.2259905338287354, "learning_rate": 2e-05, "loss": 0.06085608, "step": 22941 }, { "epoch": 45.884, "grad_norm": 1.1765010356903076, "learning_rate": 2e-05, "loss": 0.04740379, "step": 22942 }, { "epoch": 45.886, "grad_norm": 1.1642879247665405, "learning_rate": 2e-05, "loss": 0.05026664, "step": 22943 }, { "epoch": 45.888, "grad_norm": 1.3020238876342773, "learning_rate": 2e-05, "loss": 0.06652694, "step": 22944 }, { "epoch": 45.89, "grad_norm": 1.2435083389282227, "learning_rate": 2e-05, "loss": 0.05145294, "step": 22945 }, { "epoch": 45.892, "grad_norm": 1.1023855209350586, "learning_rate": 2e-05, "loss": 0.03766239, "step": 22946 }, { "epoch": 45.894, "grad_norm": 0.9923415780067444, "learning_rate": 2e-05, "loss": 0.03449097, "step": 22947 }, { "epoch": 45.896, "grad_norm": 1.2809637784957886, "learning_rate": 2e-05, "loss": 0.0631137, "step": 22948 }, { "epoch": 45.898, "grad_norm": 1.0583648681640625, "learning_rate": 2e-05, "loss": 0.04882384, "step": 22949 }, { "epoch": 45.9, "grad_norm": 1.2955833673477173, "learning_rate": 2e-05, "loss": 0.0419897, "step": 22950 }, { "epoch": 45.902, "grad_norm": 1.0099056959152222, "learning_rate": 2e-05, "loss": 0.03409104, "step": 22951 }, { "epoch": 45.904, "grad_norm": 1.4930360317230225, "learning_rate": 2e-05, "loss": 0.06223853, "step": 22952 }, { "epoch": 45.906, "grad_norm": 1.2988282442092896, "learning_rate": 2e-05, "loss": 0.04434647, "step": 22953 }, { "epoch": 45.908, "grad_norm": 1.2352509498596191, "learning_rate": 2e-05, "loss": 0.04742352, "step": 22954 }, { "epoch": 45.91, "grad_norm": 1.0645052194595337, "learning_rate": 2e-05, "loss": 0.03650692, "step": 22955 }, { "epoch": 45.912, "grad_norm": 1.607302188873291, "learning_rate": 2e-05, "loss": 0.03560692, "step": 22956 }, { "epoch": 45.914, "grad_norm": 0.9931079149246216, "learning_rate": 2e-05, "loss": 0.03896011, "step": 22957 }, { "epoch": 45.916, "grad_norm": 1.081117033958435, "learning_rate": 2e-05, "loss": 0.03914691, "step": 22958 }, { "epoch": 45.918, "grad_norm": 2.213714361190796, "learning_rate": 2e-05, "loss": 0.07680227, "step": 22959 }, { "epoch": 45.92, "grad_norm": 1.328762173652649, "learning_rate": 2e-05, "loss": 0.04840521, "step": 22960 }, { "epoch": 45.922, "grad_norm": 1.999614953994751, "learning_rate": 2e-05, "loss": 0.04826399, "step": 22961 }, { "epoch": 45.924, "grad_norm": 1.5177665948867798, "learning_rate": 2e-05, "loss": 0.04357665, "step": 22962 }, { "epoch": 45.926, "grad_norm": 1.822375774383545, "learning_rate": 2e-05, "loss": 0.05791084, "step": 22963 }, { "epoch": 45.928, "grad_norm": 1.217348575592041, "learning_rate": 2e-05, "loss": 0.03876396, "step": 22964 }, { "epoch": 45.93, "grad_norm": 1.5601190328598022, "learning_rate": 2e-05, "loss": 0.06204118, "step": 22965 }, { "epoch": 45.932, "grad_norm": 2.090487480163574, "learning_rate": 2e-05, "loss": 0.0422308, "step": 22966 }, { "epoch": 45.934, "grad_norm": 1.2206273078918457, "learning_rate": 2e-05, "loss": 0.04056178, "step": 22967 }, { "epoch": 45.936, "grad_norm": 1.1585288047790527, "learning_rate": 2e-05, "loss": 0.03958793, "step": 22968 }, { "epoch": 45.938, "grad_norm": 1.2946537733078003, "learning_rate": 2e-05, "loss": 0.04267664, "step": 22969 }, { "epoch": 45.94, "grad_norm": 1.4737160205841064, "learning_rate": 2e-05, "loss": 0.07163599, "step": 22970 }, { "epoch": 45.942, "grad_norm": 1.1721677780151367, "learning_rate": 2e-05, "loss": 0.04065248, "step": 22971 }, { "epoch": 45.944, "grad_norm": 1.7377208471298218, "learning_rate": 2e-05, "loss": 0.05867632, "step": 22972 }, { "epoch": 45.946, "grad_norm": 1.7599306106567383, "learning_rate": 2e-05, "loss": 0.04502322, "step": 22973 }, { "epoch": 45.948, "grad_norm": 1.2876641750335693, "learning_rate": 2e-05, "loss": 0.0509927, "step": 22974 }, { "epoch": 45.95, "grad_norm": 1.1776494979858398, "learning_rate": 2e-05, "loss": 0.05537301, "step": 22975 }, { "epoch": 45.952, "grad_norm": 1.3018521070480347, "learning_rate": 2e-05, "loss": 0.0640357, "step": 22976 }, { "epoch": 45.954, "grad_norm": 1.2216527462005615, "learning_rate": 2e-05, "loss": 0.0506551, "step": 22977 }, { "epoch": 45.956, "grad_norm": 1.313719630241394, "learning_rate": 2e-05, "loss": 0.05280958, "step": 22978 }, { "epoch": 45.958, "grad_norm": 1.1301958560943604, "learning_rate": 2e-05, "loss": 0.04503809, "step": 22979 }, { "epoch": 45.96, "grad_norm": 1.7027695178985596, "learning_rate": 2e-05, "loss": 0.05137224, "step": 22980 }, { "epoch": 45.962, "grad_norm": 2.217540740966797, "learning_rate": 2e-05, "loss": 0.05492135, "step": 22981 }, { "epoch": 45.964, "grad_norm": 1.163046956062317, "learning_rate": 2e-05, "loss": 0.03381425, "step": 22982 }, { "epoch": 45.966, "grad_norm": 1.2386566400527954, "learning_rate": 2e-05, "loss": 0.04658375, "step": 22983 }, { "epoch": 45.968, "grad_norm": 1.490594506263733, "learning_rate": 2e-05, "loss": 0.06036313, "step": 22984 }, { "epoch": 45.97, "grad_norm": 1.3342573642730713, "learning_rate": 2e-05, "loss": 0.04109773, "step": 22985 }, { "epoch": 45.972, "grad_norm": 2.1110265254974365, "learning_rate": 2e-05, "loss": 0.04837161, "step": 22986 }, { "epoch": 45.974, "grad_norm": 1.1389340162277222, "learning_rate": 2e-05, "loss": 0.04660567, "step": 22987 }, { "epoch": 45.976, "grad_norm": 1.324904441833496, "learning_rate": 2e-05, "loss": 0.05071661, "step": 22988 }, { "epoch": 45.978, "grad_norm": 1.7974005937576294, "learning_rate": 2e-05, "loss": 0.05388502, "step": 22989 }, { "epoch": 45.98, "grad_norm": 1.4797214269638062, "learning_rate": 2e-05, "loss": 0.04529134, "step": 22990 }, { "epoch": 45.982, "grad_norm": 2.093899965286255, "learning_rate": 2e-05, "loss": 0.04731727, "step": 22991 }, { "epoch": 45.984, "grad_norm": 1.1308255195617676, "learning_rate": 2e-05, "loss": 0.04801143, "step": 22992 }, { "epoch": 45.986, "grad_norm": 1.4428658485412598, "learning_rate": 2e-05, "loss": 0.04077057, "step": 22993 }, { "epoch": 45.988, "grad_norm": 1.423751950263977, "learning_rate": 2e-05, "loss": 0.05186245, "step": 22994 }, { "epoch": 45.99, "grad_norm": 1.1405518054962158, "learning_rate": 2e-05, "loss": 0.04192315, "step": 22995 }, { "epoch": 45.992, "grad_norm": 1.020820140838623, "learning_rate": 2e-05, "loss": 0.04345158, "step": 22996 }, { "epoch": 45.994, "grad_norm": 1.4293239116668701, "learning_rate": 2e-05, "loss": 0.04355782, "step": 22997 }, { "epoch": 45.996, "grad_norm": 2.0200562477111816, "learning_rate": 2e-05, "loss": 0.06576998, "step": 22998 }, { "epoch": 45.998, "grad_norm": 1.1396820545196533, "learning_rate": 2e-05, "loss": 0.0453399, "step": 22999 }, { "epoch": 46.0, "grad_norm": 1.615691900253296, "learning_rate": 2e-05, "loss": 0.04784571, "step": 23000 }, { "epoch": 46.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9920159680638723, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9959919839679359, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.998, "Perpendicular_3": 0.905811623246493, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 324.2615, "eval_samples_per_second": 32.381, "eval_steps_per_second": 0.648, "step": 23000 }, { "epoch": 46.002, "grad_norm": 1.4302409887313843, "learning_rate": 2e-05, "loss": 0.05861004, "step": 23001 }, { "epoch": 46.004, "grad_norm": 1.39664626121521, "learning_rate": 2e-05, "loss": 0.05934343, "step": 23002 }, { "epoch": 46.006, "grad_norm": 1.1906030178070068, "learning_rate": 2e-05, "loss": 0.03054802, "step": 23003 }, { "epoch": 46.008, "grad_norm": 1.7340916395187378, "learning_rate": 2e-05, "loss": 0.05384779, "step": 23004 }, { "epoch": 46.01, "grad_norm": 1.309027075767517, "learning_rate": 2e-05, "loss": 0.04369318, "step": 23005 }, { "epoch": 46.012, "grad_norm": 1.002219796180725, "learning_rate": 2e-05, "loss": 0.03218366, "step": 23006 }, { "epoch": 46.014, "grad_norm": 1.6894913911819458, "learning_rate": 2e-05, "loss": 0.05528177, "step": 23007 }, { "epoch": 46.016, "grad_norm": 1.18147873878479, "learning_rate": 2e-05, "loss": 0.03384054, "step": 23008 }, { "epoch": 46.018, "grad_norm": 1.331606149673462, "learning_rate": 2e-05, "loss": 0.05830061, "step": 23009 }, { "epoch": 46.02, "grad_norm": 1.5706523656845093, "learning_rate": 2e-05, "loss": 0.05562139, "step": 23010 }, { "epoch": 46.022, "grad_norm": 1.2435215711593628, "learning_rate": 2e-05, "loss": 0.05742179, "step": 23011 }, { "epoch": 46.024, "grad_norm": 1.2738981246948242, "learning_rate": 2e-05, "loss": 0.04086342, "step": 23012 }, { "epoch": 46.026, "grad_norm": 1.4569175243377686, "learning_rate": 2e-05, "loss": 0.04479681, "step": 23013 }, { "epoch": 46.028, "grad_norm": 1.147181749343872, "learning_rate": 2e-05, "loss": 0.04108036, "step": 23014 }, { "epoch": 46.03, "grad_norm": 1.1991140842437744, "learning_rate": 2e-05, "loss": 0.04342316, "step": 23015 }, { "epoch": 46.032, "grad_norm": 1.3934859037399292, "learning_rate": 2e-05, "loss": 0.05477795, "step": 23016 }, { "epoch": 46.034, "grad_norm": 1.2585614919662476, "learning_rate": 2e-05, "loss": 0.04793346, "step": 23017 }, { "epoch": 46.036, "grad_norm": 1.1003919839859009, "learning_rate": 2e-05, "loss": 0.05579524, "step": 23018 }, { "epoch": 46.038, "grad_norm": 1.2833575010299683, "learning_rate": 2e-05, "loss": 0.05675605, "step": 23019 }, { "epoch": 46.04, "grad_norm": 3.453655242919922, "learning_rate": 2e-05, "loss": 0.05313053, "step": 23020 }, { "epoch": 46.042, "grad_norm": 1.191473126411438, "learning_rate": 2e-05, "loss": 0.04780373, "step": 23021 }, { "epoch": 46.044, "grad_norm": 1.29380202293396, "learning_rate": 2e-05, "loss": 0.04844226, "step": 23022 }, { "epoch": 46.046, "grad_norm": 1.3847426176071167, "learning_rate": 2e-05, "loss": 0.05267187, "step": 23023 }, { "epoch": 46.048, "grad_norm": 2.212348222732544, "learning_rate": 2e-05, "loss": 0.0534815, "step": 23024 }, { "epoch": 46.05, "grad_norm": 1.3633710145950317, "learning_rate": 2e-05, "loss": 0.06192131, "step": 23025 }, { "epoch": 46.052, "grad_norm": 1.212673544883728, "learning_rate": 2e-05, "loss": 0.03544668, "step": 23026 }, { "epoch": 46.054, "grad_norm": 1.0772227048873901, "learning_rate": 2e-05, "loss": 0.03711303, "step": 23027 }, { "epoch": 46.056, "grad_norm": 2.53438401222229, "learning_rate": 2e-05, "loss": 0.05098771, "step": 23028 }, { "epoch": 46.058, "grad_norm": 1.814626932144165, "learning_rate": 2e-05, "loss": 0.04822916, "step": 23029 }, { "epoch": 46.06, "grad_norm": 1.6201591491699219, "learning_rate": 2e-05, "loss": 0.0526498, "step": 23030 }, { "epoch": 46.062, "grad_norm": 1.464896559715271, "learning_rate": 2e-05, "loss": 0.05728604, "step": 23031 }, { "epoch": 46.064, "grad_norm": 1.0204657316207886, "learning_rate": 2e-05, "loss": 0.04259542, "step": 23032 }, { "epoch": 46.066, "grad_norm": 1.1545565128326416, "learning_rate": 2e-05, "loss": 0.05285469, "step": 23033 }, { "epoch": 46.068, "grad_norm": 1.0158534049987793, "learning_rate": 2e-05, "loss": 0.03832763, "step": 23034 }, { "epoch": 46.07, "grad_norm": 1.1786905527114868, "learning_rate": 2e-05, "loss": 0.03449775, "step": 23035 }, { "epoch": 46.072, "grad_norm": 1.2744739055633545, "learning_rate": 2e-05, "loss": 0.04379936, "step": 23036 }, { "epoch": 46.074, "grad_norm": 1.868301510810852, "learning_rate": 2e-05, "loss": 0.0644619, "step": 23037 }, { "epoch": 46.076, "grad_norm": 1.1725568771362305, "learning_rate": 2e-05, "loss": 0.04825684, "step": 23038 }, { "epoch": 46.078, "grad_norm": 1.6947202682495117, "learning_rate": 2e-05, "loss": 0.06334668, "step": 23039 }, { "epoch": 46.08, "grad_norm": 1.4566727876663208, "learning_rate": 2e-05, "loss": 0.05566946, "step": 23040 }, { "epoch": 46.082, "grad_norm": 1.2158526182174683, "learning_rate": 2e-05, "loss": 0.03828437, "step": 23041 }, { "epoch": 46.084, "grad_norm": 1.1454522609710693, "learning_rate": 2e-05, "loss": 0.04942612, "step": 23042 }, { "epoch": 46.086, "grad_norm": 1.1992695331573486, "learning_rate": 2e-05, "loss": 0.04454256, "step": 23043 }, { "epoch": 46.088, "grad_norm": 1.242703914642334, "learning_rate": 2e-05, "loss": 0.04919573, "step": 23044 }, { "epoch": 46.09, "grad_norm": 1.1189968585968018, "learning_rate": 2e-05, "loss": 0.04536124, "step": 23045 }, { "epoch": 46.092, "grad_norm": 1.1488397121429443, "learning_rate": 2e-05, "loss": 0.05430396, "step": 23046 }, { "epoch": 46.094, "grad_norm": 1.281959891319275, "learning_rate": 2e-05, "loss": 0.06264284, "step": 23047 }, { "epoch": 46.096, "grad_norm": 1.4672093391418457, "learning_rate": 2e-05, "loss": 0.06311803, "step": 23048 }, { "epoch": 46.098, "grad_norm": 1.124530553817749, "learning_rate": 2e-05, "loss": 0.03884341, "step": 23049 }, { "epoch": 46.1, "grad_norm": 1.1475732326507568, "learning_rate": 2e-05, "loss": 0.05629781, "step": 23050 }, { "epoch": 46.102, "grad_norm": 1.4845279455184937, "learning_rate": 2e-05, "loss": 0.05716734, "step": 23051 }, { "epoch": 46.104, "grad_norm": 0.9782443642616272, "learning_rate": 2e-05, "loss": 0.0321027, "step": 23052 }, { "epoch": 46.106, "grad_norm": 1.0824551582336426, "learning_rate": 2e-05, "loss": 0.04053795, "step": 23053 }, { "epoch": 46.108, "grad_norm": 1.1406644582748413, "learning_rate": 2e-05, "loss": 0.03841585, "step": 23054 }, { "epoch": 46.11, "grad_norm": 1.2297335863113403, "learning_rate": 2e-05, "loss": 0.05340029, "step": 23055 }, { "epoch": 46.112, "grad_norm": 1.1259406805038452, "learning_rate": 2e-05, "loss": 0.05276688, "step": 23056 }, { "epoch": 46.114, "grad_norm": 2.047508478164673, "learning_rate": 2e-05, "loss": 0.05007156, "step": 23057 }, { "epoch": 46.116, "grad_norm": 1.338058352470398, "learning_rate": 2e-05, "loss": 0.04387624, "step": 23058 }, { "epoch": 46.118, "grad_norm": 1.2544599771499634, "learning_rate": 2e-05, "loss": 0.04295979, "step": 23059 }, { "epoch": 46.12, "grad_norm": 1.185205101966858, "learning_rate": 2e-05, "loss": 0.05217368, "step": 23060 }, { "epoch": 46.122, "grad_norm": 1.1495543718338013, "learning_rate": 2e-05, "loss": 0.042289, "step": 23061 }, { "epoch": 46.124, "grad_norm": 1.7538362741470337, "learning_rate": 2e-05, "loss": 0.0680445, "step": 23062 }, { "epoch": 46.126, "grad_norm": 1.0621302127838135, "learning_rate": 2e-05, "loss": 0.04465366, "step": 23063 }, { "epoch": 46.128, "grad_norm": 1.2388436794281006, "learning_rate": 2e-05, "loss": 0.05579409, "step": 23064 }, { "epoch": 46.13, "grad_norm": 1.4218683242797852, "learning_rate": 2e-05, "loss": 0.0443549, "step": 23065 }, { "epoch": 46.132, "grad_norm": 1.1729727983474731, "learning_rate": 2e-05, "loss": 0.05611319, "step": 23066 }, { "epoch": 46.134, "grad_norm": 1.1419835090637207, "learning_rate": 2e-05, "loss": 0.04782356, "step": 23067 }, { "epoch": 46.136, "grad_norm": 1.0463011264801025, "learning_rate": 2e-05, "loss": 0.04206122, "step": 23068 }, { "epoch": 46.138, "grad_norm": 1.1902068853378296, "learning_rate": 2e-05, "loss": 0.0524741, "step": 23069 }, { "epoch": 46.14, "grad_norm": 1.0246272087097168, "learning_rate": 2e-05, "loss": 0.03112867, "step": 23070 }, { "epoch": 46.142, "grad_norm": 1.05487859249115, "learning_rate": 2e-05, "loss": 0.04408978, "step": 23071 }, { "epoch": 46.144, "grad_norm": 1.0214228630065918, "learning_rate": 2e-05, "loss": 0.03995924, "step": 23072 }, { "epoch": 46.146, "grad_norm": 1.0368109941482544, "learning_rate": 2e-05, "loss": 0.04691656, "step": 23073 }, { "epoch": 46.148, "grad_norm": 1.1551309823989868, "learning_rate": 2e-05, "loss": 0.04764878, "step": 23074 }, { "epoch": 46.15, "grad_norm": 1.2423347234725952, "learning_rate": 2e-05, "loss": 0.05095094, "step": 23075 }, { "epoch": 46.152, "grad_norm": 1.4114395380020142, "learning_rate": 2e-05, "loss": 0.05528051, "step": 23076 }, { "epoch": 46.154, "grad_norm": 0.9848634600639343, "learning_rate": 2e-05, "loss": 0.04308025, "step": 23077 }, { "epoch": 46.156, "grad_norm": 1.071808099746704, "learning_rate": 2e-05, "loss": 0.0475583, "step": 23078 }, { "epoch": 46.158, "grad_norm": 1.093819499015808, "learning_rate": 2e-05, "loss": 0.03855433, "step": 23079 }, { "epoch": 46.16, "grad_norm": 1.9816279411315918, "learning_rate": 2e-05, "loss": 0.04996374, "step": 23080 }, { "epoch": 46.162, "grad_norm": 1.2489104270935059, "learning_rate": 2e-05, "loss": 0.05128529, "step": 23081 }, { "epoch": 46.164, "grad_norm": 1.3830829858779907, "learning_rate": 2e-05, "loss": 0.05532495, "step": 23082 }, { "epoch": 46.166, "grad_norm": 1.2296466827392578, "learning_rate": 2e-05, "loss": 0.04965352, "step": 23083 }, { "epoch": 46.168, "grad_norm": 1.0486325025558472, "learning_rate": 2e-05, "loss": 0.03422453, "step": 23084 }, { "epoch": 46.17, "grad_norm": 1.4689314365386963, "learning_rate": 2e-05, "loss": 0.05122601, "step": 23085 }, { "epoch": 46.172, "grad_norm": 1.9588061571121216, "learning_rate": 2e-05, "loss": 0.04775591, "step": 23086 }, { "epoch": 46.174, "grad_norm": 1.2059226036071777, "learning_rate": 2e-05, "loss": 0.04841192, "step": 23087 }, { "epoch": 46.176, "grad_norm": 1.1085312366485596, "learning_rate": 2e-05, "loss": 0.04657616, "step": 23088 }, { "epoch": 46.178, "grad_norm": 2.187915563583374, "learning_rate": 2e-05, "loss": 0.05317451, "step": 23089 }, { "epoch": 46.18, "grad_norm": 1.0216542482376099, "learning_rate": 2e-05, "loss": 0.0392207, "step": 23090 }, { "epoch": 46.182, "grad_norm": 1.3469269275665283, "learning_rate": 2e-05, "loss": 0.05570158, "step": 23091 }, { "epoch": 46.184, "grad_norm": 1.4849958419799805, "learning_rate": 2e-05, "loss": 0.04750857, "step": 23092 }, { "epoch": 46.186, "grad_norm": 1.8725115060806274, "learning_rate": 2e-05, "loss": 0.06257782, "step": 23093 }, { "epoch": 46.188, "grad_norm": 1.5166374444961548, "learning_rate": 2e-05, "loss": 0.04668888, "step": 23094 }, { "epoch": 46.19, "grad_norm": 1.2905293703079224, "learning_rate": 2e-05, "loss": 0.04647534, "step": 23095 }, { "epoch": 46.192, "grad_norm": 1.208664894104004, "learning_rate": 2e-05, "loss": 0.05198023, "step": 23096 }, { "epoch": 46.194, "grad_norm": 1.0935642719268799, "learning_rate": 2e-05, "loss": 0.05911352, "step": 23097 }, { "epoch": 46.196, "grad_norm": 1.104860782623291, "learning_rate": 2e-05, "loss": 0.03793868, "step": 23098 }, { "epoch": 46.198, "grad_norm": 1.06515371799469, "learning_rate": 2e-05, "loss": 0.04790756, "step": 23099 }, { "epoch": 46.2, "grad_norm": 1.6914348602294922, "learning_rate": 2e-05, "loss": 0.04272515, "step": 23100 }, { "epoch": 46.202, "grad_norm": 1.292098879814148, "learning_rate": 2e-05, "loss": 0.04760931, "step": 23101 }, { "epoch": 46.204, "grad_norm": 1.197635531425476, "learning_rate": 2e-05, "loss": 0.05021203, "step": 23102 }, { "epoch": 46.206, "grad_norm": 1.267522931098938, "learning_rate": 2e-05, "loss": 0.06113753, "step": 23103 }, { "epoch": 46.208, "grad_norm": 0.9828411936759949, "learning_rate": 2e-05, "loss": 0.04029978, "step": 23104 }, { "epoch": 46.21, "grad_norm": 1.4888331890106201, "learning_rate": 2e-05, "loss": 0.05187474, "step": 23105 }, { "epoch": 46.212, "grad_norm": 1.055316686630249, "learning_rate": 2e-05, "loss": 0.04340184, "step": 23106 }, { "epoch": 46.214, "grad_norm": 1.2184720039367676, "learning_rate": 2e-05, "loss": 0.0478823, "step": 23107 }, { "epoch": 46.216, "grad_norm": 1.2196934223175049, "learning_rate": 2e-05, "loss": 0.05798684, "step": 23108 }, { "epoch": 46.218, "grad_norm": 1.194522738456726, "learning_rate": 2e-05, "loss": 0.05176704, "step": 23109 }, { "epoch": 46.22, "grad_norm": 1.0654971599578857, "learning_rate": 2e-05, "loss": 0.03646301, "step": 23110 }, { "epoch": 46.222, "grad_norm": 1.3132420778274536, "learning_rate": 2e-05, "loss": 0.03887681, "step": 23111 }, { "epoch": 46.224, "grad_norm": 1.870571255683899, "learning_rate": 2e-05, "loss": 0.0637666, "step": 23112 }, { "epoch": 46.226, "grad_norm": 1.1391980648040771, "learning_rate": 2e-05, "loss": 0.04590259, "step": 23113 }, { "epoch": 46.228, "grad_norm": 1.1913096904754639, "learning_rate": 2e-05, "loss": 0.03562557, "step": 23114 }, { "epoch": 46.23, "grad_norm": 3.3259854316711426, "learning_rate": 2e-05, "loss": 0.05996282, "step": 23115 }, { "epoch": 46.232, "grad_norm": 1.2023781538009644, "learning_rate": 2e-05, "loss": 0.05674819, "step": 23116 }, { "epoch": 46.234, "grad_norm": 1.2463189363479614, "learning_rate": 2e-05, "loss": 0.05823443, "step": 23117 }, { "epoch": 46.236, "grad_norm": 1.352786660194397, "learning_rate": 2e-05, "loss": 0.0599811, "step": 23118 }, { "epoch": 46.238, "grad_norm": 1.1087812185287476, "learning_rate": 2e-05, "loss": 0.05076696, "step": 23119 }, { "epoch": 46.24, "grad_norm": 1.2660101652145386, "learning_rate": 2e-05, "loss": 0.0315996, "step": 23120 }, { "epoch": 46.242, "grad_norm": 1.2009081840515137, "learning_rate": 2e-05, "loss": 0.04985572, "step": 23121 }, { "epoch": 46.244, "grad_norm": 0.9989933371543884, "learning_rate": 2e-05, "loss": 0.04121695, "step": 23122 }, { "epoch": 46.246, "grad_norm": 2.8735687732696533, "learning_rate": 2e-05, "loss": 0.07063636, "step": 23123 }, { "epoch": 46.248, "grad_norm": 1.0923492908477783, "learning_rate": 2e-05, "loss": 0.04320874, "step": 23124 }, { "epoch": 46.25, "grad_norm": 1.0142641067504883, "learning_rate": 2e-05, "loss": 0.03465679, "step": 23125 }, { "epoch": 46.252, "grad_norm": 1.1704773902893066, "learning_rate": 2e-05, "loss": 0.04421233, "step": 23126 }, { "epoch": 46.254, "grad_norm": 1.2731939554214478, "learning_rate": 2e-05, "loss": 0.07911802, "step": 23127 }, { "epoch": 46.256, "grad_norm": 2.1010546684265137, "learning_rate": 2e-05, "loss": 0.04964682, "step": 23128 }, { "epoch": 46.258, "grad_norm": 3.078134536743164, "learning_rate": 2e-05, "loss": 0.0332694, "step": 23129 }, { "epoch": 46.26, "grad_norm": 1.6000622510910034, "learning_rate": 2e-05, "loss": 0.05290427, "step": 23130 }, { "epoch": 46.262, "grad_norm": 2.217181921005249, "learning_rate": 2e-05, "loss": 0.04330307, "step": 23131 }, { "epoch": 46.264, "grad_norm": 1.268926739692688, "learning_rate": 2e-05, "loss": 0.05655915, "step": 23132 }, { "epoch": 46.266, "grad_norm": 5.429689407348633, "learning_rate": 2e-05, "loss": 0.05056974, "step": 23133 }, { "epoch": 46.268, "grad_norm": 4.090033054351807, "learning_rate": 2e-05, "loss": 0.05439257, "step": 23134 }, { "epoch": 46.27, "grad_norm": 4.116130352020264, "learning_rate": 2e-05, "loss": 0.06643784, "step": 23135 }, { "epoch": 46.272, "grad_norm": 1.2675787210464478, "learning_rate": 2e-05, "loss": 0.05370174, "step": 23136 }, { "epoch": 46.274, "grad_norm": 1.1625384092330933, "learning_rate": 2e-05, "loss": 0.04453802, "step": 23137 }, { "epoch": 46.276, "grad_norm": 1.1202532052993774, "learning_rate": 2e-05, "loss": 0.04463059, "step": 23138 }, { "epoch": 46.278, "grad_norm": 1.0193384885787964, "learning_rate": 2e-05, "loss": 0.03918165, "step": 23139 }, { "epoch": 46.28, "grad_norm": 0.9413808584213257, "learning_rate": 2e-05, "loss": 0.02721583, "step": 23140 }, { "epoch": 46.282, "grad_norm": 1.1726583242416382, "learning_rate": 2e-05, "loss": 0.03906541, "step": 23141 }, { "epoch": 46.284, "grad_norm": 1.1463528871536255, "learning_rate": 2e-05, "loss": 0.04882437, "step": 23142 }, { "epoch": 46.286, "grad_norm": 1.02104651927948, "learning_rate": 2e-05, "loss": 0.04882137, "step": 23143 }, { "epoch": 46.288, "grad_norm": 2.9313247203826904, "learning_rate": 2e-05, "loss": 0.04990475, "step": 23144 }, { "epoch": 46.29, "grad_norm": 1.2045042514801025, "learning_rate": 2e-05, "loss": 0.04847421, "step": 23145 }, { "epoch": 46.292, "grad_norm": 1.4009835720062256, "learning_rate": 2e-05, "loss": 0.05024692, "step": 23146 }, { "epoch": 46.294, "grad_norm": 1.1895580291748047, "learning_rate": 2e-05, "loss": 0.04267966, "step": 23147 }, { "epoch": 46.296, "grad_norm": 1.1182502508163452, "learning_rate": 2e-05, "loss": 0.05323965, "step": 23148 }, { "epoch": 46.298, "grad_norm": 1.719334363937378, "learning_rate": 2e-05, "loss": 0.04970006, "step": 23149 }, { "epoch": 46.3, "grad_norm": 2.0725491046905518, "learning_rate": 2e-05, "loss": 0.06950231, "step": 23150 }, { "epoch": 46.302, "grad_norm": 3.3280606269836426, "learning_rate": 2e-05, "loss": 0.066084, "step": 23151 }, { "epoch": 46.304, "grad_norm": 1.1809648275375366, "learning_rate": 2e-05, "loss": 0.04454755, "step": 23152 }, { "epoch": 46.306, "grad_norm": 1.2297167778015137, "learning_rate": 2e-05, "loss": 0.06474146, "step": 23153 }, { "epoch": 46.308, "grad_norm": 1.6546833515167236, "learning_rate": 2e-05, "loss": 0.04665916, "step": 23154 }, { "epoch": 46.31, "grad_norm": 0.9898395538330078, "learning_rate": 2e-05, "loss": 0.03971004, "step": 23155 }, { "epoch": 46.312, "grad_norm": 1.4820255041122437, "learning_rate": 2e-05, "loss": 0.04219128, "step": 23156 }, { "epoch": 46.314, "grad_norm": 1.7158340215682983, "learning_rate": 2e-05, "loss": 0.0606416, "step": 23157 }, { "epoch": 46.316, "grad_norm": 1.0219082832336426, "learning_rate": 2e-05, "loss": 0.04473948, "step": 23158 }, { "epoch": 46.318, "grad_norm": 1.4255625009536743, "learning_rate": 2e-05, "loss": 0.04729429, "step": 23159 }, { "epoch": 46.32, "grad_norm": 1.1542085409164429, "learning_rate": 2e-05, "loss": 0.04332086, "step": 23160 }, { "epoch": 46.322, "grad_norm": 1.1608744859695435, "learning_rate": 2e-05, "loss": 0.05311103, "step": 23161 }, { "epoch": 46.324, "grad_norm": 1.333052396774292, "learning_rate": 2e-05, "loss": 0.05798706, "step": 23162 }, { "epoch": 46.326, "grad_norm": 1.1479754447937012, "learning_rate": 2e-05, "loss": 0.0463843, "step": 23163 }, { "epoch": 46.328, "grad_norm": 1.8615238666534424, "learning_rate": 2e-05, "loss": 0.04511591, "step": 23164 }, { "epoch": 46.33, "grad_norm": 1.2631683349609375, "learning_rate": 2e-05, "loss": 0.05475502, "step": 23165 }, { "epoch": 46.332, "grad_norm": 1.4224528074264526, "learning_rate": 2e-05, "loss": 0.06586679, "step": 23166 }, { "epoch": 46.334, "grad_norm": 1.1078559160232544, "learning_rate": 2e-05, "loss": 0.03966166, "step": 23167 }, { "epoch": 46.336, "grad_norm": 0.9279854893684387, "learning_rate": 2e-05, "loss": 0.03631292, "step": 23168 }, { "epoch": 46.338, "grad_norm": 2.5856709480285645, "learning_rate": 2e-05, "loss": 0.05210607, "step": 23169 }, { "epoch": 46.34, "grad_norm": 1.2316886186599731, "learning_rate": 2e-05, "loss": 0.04556869, "step": 23170 }, { "epoch": 46.342, "grad_norm": 1.1155706644058228, "learning_rate": 2e-05, "loss": 0.05066471, "step": 23171 }, { "epoch": 46.344, "grad_norm": 2.491442918777466, "learning_rate": 2e-05, "loss": 0.05617189, "step": 23172 }, { "epoch": 46.346, "grad_norm": 1.3126240968704224, "learning_rate": 2e-05, "loss": 0.04718146, "step": 23173 }, { "epoch": 46.348, "grad_norm": 1.0606178045272827, "learning_rate": 2e-05, "loss": 0.04762572, "step": 23174 }, { "epoch": 46.35, "grad_norm": 1.0557273626327515, "learning_rate": 2e-05, "loss": 0.03899936, "step": 23175 }, { "epoch": 46.352, "grad_norm": 1.1816285848617554, "learning_rate": 2e-05, "loss": 0.04925738, "step": 23176 }, { "epoch": 46.354, "grad_norm": 1.4543360471725464, "learning_rate": 2e-05, "loss": 0.04988147, "step": 23177 }, { "epoch": 46.356, "grad_norm": 1.1029762029647827, "learning_rate": 2e-05, "loss": 0.04883325, "step": 23178 }, { "epoch": 46.358, "grad_norm": 1.2085076570510864, "learning_rate": 2e-05, "loss": 0.0439341, "step": 23179 }, { "epoch": 46.36, "grad_norm": 0.9505863785743713, "learning_rate": 2e-05, "loss": 0.03014771, "step": 23180 }, { "epoch": 46.362, "grad_norm": 0.9543641805648804, "learning_rate": 2e-05, "loss": 0.03300781, "step": 23181 }, { "epoch": 46.364, "grad_norm": 0.9900519251823425, "learning_rate": 2e-05, "loss": 0.04158117, "step": 23182 }, { "epoch": 46.366, "grad_norm": 2.153928279876709, "learning_rate": 2e-05, "loss": 0.04417425, "step": 23183 }, { "epoch": 46.368, "grad_norm": 2.409182071685791, "learning_rate": 2e-05, "loss": 0.06157666, "step": 23184 }, { "epoch": 46.37, "grad_norm": 1.057466745376587, "learning_rate": 2e-05, "loss": 0.04592596, "step": 23185 }, { "epoch": 46.372, "grad_norm": 1.1165255308151245, "learning_rate": 2e-05, "loss": 0.04613017, "step": 23186 }, { "epoch": 46.374, "grad_norm": 0.9036556482315063, "learning_rate": 2e-05, "loss": 0.03239109, "step": 23187 }, { "epoch": 46.376, "grad_norm": 1.1022392511367798, "learning_rate": 2e-05, "loss": 0.05009172, "step": 23188 }, { "epoch": 46.378, "grad_norm": 1.0640677213668823, "learning_rate": 2e-05, "loss": 0.04262567, "step": 23189 }, { "epoch": 46.38, "grad_norm": 1.11314857006073, "learning_rate": 2e-05, "loss": 0.04713321, "step": 23190 }, { "epoch": 46.382, "grad_norm": 1.3541191816329956, "learning_rate": 2e-05, "loss": 0.04420114, "step": 23191 }, { "epoch": 46.384, "grad_norm": 1.3517026901245117, "learning_rate": 2e-05, "loss": 0.03507008, "step": 23192 }, { "epoch": 46.386, "grad_norm": 1.197635293006897, "learning_rate": 2e-05, "loss": 0.04363513, "step": 23193 }, { "epoch": 46.388, "grad_norm": 1.3382245302200317, "learning_rate": 2e-05, "loss": 0.05595472, "step": 23194 }, { "epoch": 46.39, "grad_norm": 1.074084758758545, "learning_rate": 2e-05, "loss": 0.05243614, "step": 23195 }, { "epoch": 46.392, "grad_norm": 1.0921239852905273, "learning_rate": 2e-05, "loss": 0.03115225, "step": 23196 }, { "epoch": 46.394, "grad_norm": 2.5413079261779785, "learning_rate": 2e-05, "loss": 0.04841086, "step": 23197 }, { "epoch": 46.396, "grad_norm": 1.2901527881622314, "learning_rate": 2e-05, "loss": 0.05885056, "step": 23198 }, { "epoch": 46.398, "grad_norm": 1.157203197479248, "learning_rate": 2e-05, "loss": 0.05642793, "step": 23199 }, { "epoch": 46.4, "grad_norm": 1.1579469442367554, "learning_rate": 2e-05, "loss": 0.04676284, "step": 23200 }, { "epoch": 46.402, "grad_norm": 1.3478295803070068, "learning_rate": 2e-05, "loss": 0.05287962, "step": 23201 }, { "epoch": 46.404, "grad_norm": 0.9474977254867554, "learning_rate": 2e-05, "loss": 0.0353672, "step": 23202 }, { "epoch": 46.406, "grad_norm": 1.401186466217041, "learning_rate": 2e-05, "loss": 0.05350681, "step": 23203 }, { "epoch": 46.408, "grad_norm": 1.6736927032470703, "learning_rate": 2e-05, "loss": 0.05108627, "step": 23204 }, { "epoch": 46.41, "grad_norm": 1.5280506610870361, "learning_rate": 2e-05, "loss": 0.06310539, "step": 23205 }, { "epoch": 46.412, "grad_norm": 1.2769758701324463, "learning_rate": 2e-05, "loss": 0.04531645, "step": 23206 }, { "epoch": 46.414, "grad_norm": 1.0869319438934326, "learning_rate": 2e-05, "loss": 0.04933855, "step": 23207 }, { "epoch": 46.416, "grad_norm": 1.8246020078659058, "learning_rate": 2e-05, "loss": 0.04469419, "step": 23208 }, { "epoch": 46.418, "grad_norm": 1.1753547191619873, "learning_rate": 2e-05, "loss": 0.03421785, "step": 23209 }, { "epoch": 46.42, "grad_norm": 1.1583861112594604, "learning_rate": 2e-05, "loss": 0.05171474, "step": 23210 }, { "epoch": 46.422, "grad_norm": 1.1681585311889648, "learning_rate": 2e-05, "loss": 0.04288517, "step": 23211 }, { "epoch": 46.424, "grad_norm": 1.8628696203231812, "learning_rate": 2e-05, "loss": 0.06078832, "step": 23212 }, { "epoch": 46.426, "grad_norm": 2.069993495941162, "learning_rate": 2e-05, "loss": 0.06477488, "step": 23213 }, { "epoch": 46.428, "grad_norm": 3.0566177368164062, "learning_rate": 2e-05, "loss": 0.05633872, "step": 23214 }, { "epoch": 46.43, "grad_norm": 1.5551815032958984, "learning_rate": 2e-05, "loss": 0.06199846, "step": 23215 }, { "epoch": 46.432, "grad_norm": 1.2828153371810913, "learning_rate": 2e-05, "loss": 0.04329269, "step": 23216 }, { "epoch": 46.434, "grad_norm": 1.1735610961914062, "learning_rate": 2e-05, "loss": 0.05622312, "step": 23217 }, { "epoch": 46.436, "grad_norm": 2.0017809867858887, "learning_rate": 2e-05, "loss": 0.03825869, "step": 23218 }, { "epoch": 46.438, "grad_norm": 1.1812978982925415, "learning_rate": 2e-05, "loss": 0.05794305, "step": 23219 }, { "epoch": 46.44, "grad_norm": 1.3604928255081177, "learning_rate": 2e-05, "loss": 0.05252583, "step": 23220 }, { "epoch": 46.442, "grad_norm": 1.1032557487487793, "learning_rate": 2e-05, "loss": 0.03681751, "step": 23221 }, { "epoch": 46.444, "grad_norm": 1.597011685371399, "learning_rate": 2e-05, "loss": 0.03853673, "step": 23222 }, { "epoch": 46.446, "grad_norm": 1.193235158920288, "learning_rate": 2e-05, "loss": 0.05152307, "step": 23223 }, { "epoch": 46.448, "grad_norm": 1.071548581123352, "learning_rate": 2e-05, "loss": 0.04468784, "step": 23224 }, { "epoch": 46.45, "grad_norm": 1.2598882913589478, "learning_rate": 2e-05, "loss": 0.05761843, "step": 23225 }, { "epoch": 46.452, "grad_norm": 1.1971979141235352, "learning_rate": 2e-05, "loss": 0.05141561, "step": 23226 }, { "epoch": 46.454, "grad_norm": 2.1761207580566406, "learning_rate": 2e-05, "loss": 0.04906508, "step": 23227 }, { "epoch": 46.456, "grad_norm": 1.4585120677947998, "learning_rate": 2e-05, "loss": 0.0423772, "step": 23228 }, { "epoch": 46.458, "grad_norm": 1.1392666101455688, "learning_rate": 2e-05, "loss": 0.04400862, "step": 23229 }, { "epoch": 46.46, "grad_norm": 1.4353058338165283, "learning_rate": 2e-05, "loss": 0.0567156, "step": 23230 }, { "epoch": 46.462, "grad_norm": 1.12968909740448, "learning_rate": 2e-05, "loss": 0.03764338, "step": 23231 }, { "epoch": 46.464, "grad_norm": 1.2651554346084595, "learning_rate": 2e-05, "loss": 0.06105966, "step": 23232 }, { "epoch": 46.466, "grad_norm": 1.1130878925323486, "learning_rate": 2e-05, "loss": 0.03365841, "step": 23233 }, { "epoch": 46.468, "grad_norm": 1.5134097337722778, "learning_rate": 2e-05, "loss": 0.04425268, "step": 23234 }, { "epoch": 46.47, "grad_norm": 1.2247387170791626, "learning_rate": 2e-05, "loss": 0.04427869, "step": 23235 }, { "epoch": 46.472, "grad_norm": 1.1638450622558594, "learning_rate": 2e-05, "loss": 0.04198921, "step": 23236 }, { "epoch": 46.474, "grad_norm": 1.113433599472046, "learning_rate": 2e-05, "loss": 0.03829671, "step": 23237 }, { "epoch": 46.476, "grad_norm": 1.3648087978363037, "learning_rate": 2e-05, "loss": 0.03742781, "step": 23238 }, { "epoch": 46.478, "grad_norm": 1.5586036443710327, "learning_rate": 2e-05, "loss": 0.04454248, "step": 23239 }, { "epoch": 46.48, "grad_norm": 1.5988510847091675, "learning_rate": 2e-05, "loss": 0.04805886, "step": 23240 }, { "epoch": 46.482, "grad_norm": 1.0325082540512085, "learning_rate": 2e-05, "loss": 0.03924419, "step": 23241 }, { "epoch": 46.484, "grad_norm": 1.351863980293274, "learning_rate": 2e-05, "loss": 0.05655452, "step": 23242 }, { "epoch": 46.486, "grad_norm": 1.4276981353759766, "learning_rate": 2e-05, "loss": 0.05009124, "step": 23243 }, { "epoch": 46.488, "grad_norm": 1.474223017692566, "learning_rate": 2e-05, "loss": 0.04846952, "step": 23244 }, { "epoch": 46.49, "grad_norm": 0.9883701205253601, "learning_rate": 2e-05, "loss": 0.03598514, "step": 23245 }, { "epoch": 46.492, "grad_norm": 1.3215599060058594, "learning_rate": 2e-05, "loss": 0.05669083, "step": 23246 }, { "epoch": 46.494, "grad_norm": 1.0561680793762207, "learning_rate": 2e-05, "loss": 0.04358659, "step": 23247 }, { "epoch": 46.496, "grad_norm": 1.151599407196045, "learning_rate": 2e-05, "loss": 0.05015893, "step": 23248 }, { "epoch": 46.498, "grad_norm": 1.4524661302566528, "learning_rate": 2e-05, "loss": 0.04662398, "step": 23249 }, { "epoch": 46.5, "grad_norm": 1.5353271961212158, "learning_rate": 2e-05, "loss": 0.03597783, "step": 23250 }, { "epoch": 46.502, "grad_norm": 2.727353811264038, "learning_rate": 2e-05, "loss": 0.03727568, "step": 23251 }, { "epoch": 46.504, "grad_norm": 1.1945043802261353, "learning_rate": 2e-05, "loss": 0.03968314, "step": 23252 }, { "epoch": 46.506, "grad_norm": 1.0994758605957031, "learning_rate": 2e-05, "loss": 0.05075628, "step": 23253 }, { "epoch": 46.508, "grad_norm": 1.2463784217834473, "learning_rate": 2e-05, "loss": 0.05148967, "step": 23254 }, { "epoch": 46.51, "grad_norm": 1.2693138122558594, "learning_rate": 2e-05, "loss": 0.04996495, "step": 23255 }, { "epoch": 46.512, "grad_norm": 1.0111137628555298, "learning_rate": 2e-05, "loss": 0.04151868, "step": 23256 }, { "epoch": 46.514, "grad_norm": 1.1659035682678223, "learning_rate": 2e-05, "loss": 0.03361614, "step": 23257 }, { "epoch": 46.516, "grad_norm": 1.3636959791183472, "learning_rate": 2e-05, "loss": 0.054345, "step": 23258 }, { "epoch": 46.518, "grad_norm": 1.228248953819275, "learning_rate": 2e-05, "loss": 0.06052267, "step": 23259 }, { "epoch": 46.52, "grad_norm": 1.2913553714752197, "learning_rate": 2e-05, "loss": 0.05008456, "step": 23260 }, { "epoch": 46.522, "grad_norm": 1.2020117044448853, "learning_rate": 2e-05, "loss": 0.05029326, "step": 23261 }, { "epoch": 46.524, "grad_norm": 1.0171692371368408, "learning_rate": 2e-05, "loss": 0.03987342, "step": 23262 }, { "epoch": 46.526, "grad_norm": 1.2745869159698486, "learning_rate": 2e-05, "loss": 0.06460331, "step": 23263 }, { "epoch": 46.528, "grad_norm": 1.0925174951553345, "learning_rate": 2e-05, "loss": 0.05129455, "step": 23264 }, { "epoch": 46.53, "grad_norm": 1.1556943655014038, "learning_rate": 2e-05, "loss": 0.0476981, "step": 23265 }, { "epoch": 46.532, "grad_norm": 1.4467393159866333, "learning_rate": 2e-05, "loss": 0.06611196, "step": 23266 }, { "epoch": 46.534, "grad_norm": 1.8696622848510742, "learning_rate": 2e-05, "loss": 0.04994733, "step": 23267 }, { "epoch": 46.536, "grad_norm": 1.0204404592514038, "learning_rate": 2e-05, "loss": 0.04660401, "step": 23268 }, { "epoch": 46.538, "grad_norm": 1.2084547281265259, "learning_rate": 2e-05, "loss": 0.05732334, "step": 23269 }, { "epoch": 46.54, "grad_norm": 1.333622932434082, "learning_rate": 2e-05, "loss": 0.05636217, "step": 23270 }, { "epoch": 46.542, "grad_norm": 1.1122244596481323, "learning_rate": 2e-05, "loss": 0.04155293, "step": 23271 }, { "epoch": 46.544, "grad_norm": 1.6370881795883179, "learning_rate": 2e-05, "loss": 0.05667079, "step": 23272 }, { "epoch": 46.546, "grad_norm": 1.505200743675232, "learning_rate": 2e-05, "loss": 0.05249736, "step": 23273 }, { "epoch": 46.548, "grad_norm": 1.155645728111267, "learning_rate": 2e-05, "loss": 0.05694194, "step": 23274 }, { "epoch": 46.55, "grad_norm": 1.1647768020629883, "learning_rate": 2e-05, "loss": 0.05435292, "step": 23275 }, { "epoch": 46.552, "grad_norm": 1.0532338619232178, "learning_rate": 2e-05, "loss": 0.04438793, "step": 23276 }, { "epoch": 46.554, "grad_norm": 1.6303856372833252, "learning_rate": 2e-05, "loss": 0.05212019, "step": 23277 }, { "epoch": 46.556, "grad_norm": 1.217422604560852, "learning_rate": 2e-05, "loss": 0.03759392, "step": 23278 }, { "epoch": 46.558, "grad_norm": 1.3669236898422241, "learning_rate": 2e-05, "loss": 0.05771047, "step": 23279 }, { "epoch": 46.56, "grad_norm": 2.1932220458984375, "learning_rate": 2e-05, "loss": 0.04165822, "step": 23280 }, { "epoch": 46.562, "grad_norm": 1.2292122840881348, "learning_rate": 2e-05, "loss": 0.04785195, "step": 23281 }, { "epoch": 46.564, "grad_norm": 1.196970820426941, "learning_rate": 2e-05, "loss": 0.03689167, "step": 23282 }, { "epoch": 46.566, "grad_norm": 1.020137071609497, "learning_rate": 2e-05, "loss": 0.03734909, "step": 23283 }, { "epoch": 46.568, "grad_norm": 2.541544198989868, "learning_rate": 2e-05, "loss": 0.06038415, "step": 23284 }, { "epoch": 46.57, "grad_norm": 1.083556890487671, "learning_rate": 2e-05, "loss": 0.03873578, "step": 23285 }, { "epoch": 46.572, "grad_norm": 1.4096356630325317, "learning_rate": 2e-05, "loss": 0.05036553, "step": 23286 }, { "epoch": 46.574, "grad_norm": 0.9817960858345032, "learning_rate": 2e-05, "loss": 0.03779167, "step": 23287 }, { "epoch": 46.576, "grad_norm": 1.2446906566619873, "learning_rate": 2e-05, "loss": 0.05619381, "step": 23288 }, { "epoch": 46.578, "grad_norm": 1.0761586427688599, "learning_rate": 2e-05, "loss": 0.04057213, "step": 23289 }, { "epoch": 46.58, "grad_norm": 1.518162727355957, "learning_rate": 2e-05, "loss": 0.03429618, "step": 23290 }, { "epoch": 46.582, "grad_norm": 1.1717662811279297, "learning_rate": 2e-05, "loss": 0.0466333, "step": 23291 }, { "epoch": 46.584, "grad_norm": 3.0718834400177, "learning_rate": 2e-05, "loss": 0.04770675, "step": 23292 }, { "epoch": 46.586, "grad_norm": 1.1569914817810059, "learning_rate": 2e-05, "loss": 0.05019266, "step": 23293 }, { "epoch": 46.588, "grad_norm": 1.0586425065994263, "learning_rate": 2e-05, "loss": 0.03773466, "step": 23294 }, { "epoch": 46.59, "grad_norm": 1.4067730903625488, "learning_rate": 2e-05, "loss": 0.06203558, "step": 23295 }, { "epoch": 46.592, "grad_norm": 0.9604422450065613, "learning_rate": 2e-05, "loss": 0.03419396, "step": 23296 }, { "epoch": 46.594, "grad_norm": 1.4648443460464478, "learning_rate": 2e-05, "loss": 0.06012858, "step": 23297 }, { "epoch": 46.596, "grad_norm": 1.02535080909729, "learning_rate": 2e-05, "loss": 0.03938122, "step": 23298 }, { "epoch": 46.598, "grad_norm": 1.0963574647903442, "learning_rate": 2e-05, "loss": 0.0514155, "step": 23299 }, { "epoch": 46.6, "grad_norm": 1.111864447593689, "learning_rate": 2e-05, "loss": 0.03777619, "step": 23300 }, { "epoch": 46.602, "grad_norm": 1.2542434930801392, "learning_rate": 2e-05, "loss": 0.03768213, "step": 23301 }, { "epoch": 46.604, "grad_norm": 0.9642258286476135, "learning_rate": 2e-05, "loss": 0.040147, "step": 23302 }, { "epoch": 46.606, "grad_norm": 1.2112300395965576, "learning_rate": 2e-05, "loss": 0.06443147, "step": 23303 }, { "epoch": 46.608, "grad_norm": 0.9941690564155579, "learning_rate": 2e-05, "loss": 0.04848402, "step": 23304 }, { "epoch": 46.61, "grad_norm": 1.9957858324050903, "learning_rate": 2e-05, "loss": 0.04321967, "step": 23305 }, { "epoch": 46.612, "grad_norm": 0.9449677467346191, "learning_rate": 2e-05, "loss": 0.02798765, "step": 23306 }, { "epoch": 46.614, "grad_norm": 1.068254828453064, "learning_rate": 2e-05, "loss": 0.04512742, "step": 23307 }, { "epoch": 46.616, "grad_norm": 1.142982006072998, "learning_rate": 2e-05, "loss": 0.04882696, "step": 23308 }, { "epoch": 46.618, "grad_norm": 1.2351460456848145, "learning_rate": 2e-05, "loss": 0.05512179, "step": 23309 }, { "epoch": 46.62, "grad_norm": 2.279547691345215, "learning_rate": 2e-05, "loss": 0.03314951, "step": 23310 }, { "epoch": 46.622, "grad_norm": 1.0746487379074097, "learning_rate": 2e-05, "loss": 0.04710182, "step": 23311 }, { "epoch": 46.624, "grad_norm": 1.222518801689148, "learning_rate": 2e-05, "loss": 0.06075151, "step": 23312 }, { "epoch": 46.626, "grad_norm": 0.9651801586151123, "learning_rate": 2e-05, "loss": 0.04442292, "step": 23313 }, { "epoch": 46.628, "grad_norm": 0.9503448605537415, "learning_rate": 2e-05, "loss": 0.03715204, "step": 23314 }, { "epoch": 46.63, "grad_norm": 1.1955111026763916, "learning_rate": 2e-05, "loss": 0.0486602, "step": 23315 }, { "epoch": 46.632, "grad_norm": 1.020430088043213, "learning_rate": 2e-05, "loss": 0.0444079, "step": 23316 }, { "epoch": 46.634, "grad_norm": 1.1504944562911987, "learning_rate": 2e-05, "loss": 0.0579015, "step": 23317 }, { "epoch": 46.636, "grad_norm": 1.0248736143112183, "learning_rate": 2e-05, "loss": 0.03767063, "step": 23318 }, { "epoch": 46.638, "grad_norm": 1.0812240839004517, "learning_rate": 2e-05, "loss": 0.03341661, "step": 23319 }, { "epoch": 46.64, "grad_norm": 1.3688080310821533, "learning_rate": 2e-05, "loss": 0.04959516, "step": 23320 }, { "epoch": 46.642, "grad_norm": 1.0399272441864014, "learning_rate": 2e-05, "loss": 0.04377398, "step": 23321 }, { "epoch": 46.644, "grad_norm": 1.2335994243621826, "learning_rate": 2e-05, "loss": 0.06242413, "step": 23322 }, { "epoch": 46.646, "grad_norm": 1.2382959127426147, "learning_rate": 2e-05, "loss": 0.04701665, "step": 23323 }, { "epoch": 46.648, "grad_norm": 1.318410873413086, "learning_rate": 2e-05, "loss": 0.03696349, "step": 23324 }, { "epoch": 46.65, "grad_norm": 2.3140316009521484, "learning_rate": 2e-05, "loss": 0.05928268, "step": 23325 }, { "epoch": 46.652, "grad_norm": 1.1803500652313232, "learning_rate": 2e-05, "loss": 0.03985074, "step": 23326 }, { "epoch": 46.654, "grad_norm": 1.5823149681091309, "learning_rate": 2e-05, "loss": 0.06254095, "step": 23327 }, { "epoch": 46.656, "grad_norm": 1.4812148809432983, "learning_rate": 2e-05, "loss": 0.05604445, "step": 23328 }, { "epoch": 46.658, "grad_norm": 2.1350300312042236, "learning_rate": 2e-05, "loss": 0.05875448, "step": 23329 }, { "epoch": 46.66, "grad_norm": 1.400207757949829, "learning_rate": 2e-05, "loss": 0.07081041, "step": 23330 }, { "epoch": 46.662, "grad_norm": 1.0994622707366943, "learning_rate": 2e-05, "loss": 0.04271101, "step": 23331 }, { "epoch": 46.664, "grad_norm": 1.0609034299850464, "learning_rate": 2e-05, "loss": 0.04732626, "step": 23332 }, { "epoch": 46.666, "grad_norm": 1.0104788541793823, "learning_rate": 2e-05, "loss": 0.03551992, "step": 23333 }, { "epoch": 46.668, "grad_norm": 1.1824249029159546, "learning_rate": 2e-05, "loss": 0.05614135, "step": 23334 }, { "epoch": 46.67, "grad_norm": 2.1531567573547363, "learning_rate": 2e-05, "loss": 0.04873116, "step": 23335 }, { "epoch": 46.672, "grad_norm": 1.3467340469360352, "learning_rate": 2e-05, "loss": 0.03813969, "step": 23336 }, { "epoch": 46.674, "grad_norm": 1.037055253982544, "learning_rate": 2e-05, "loss": 0.04405836, "step": 23337 }, { "epoch": 46.676, "grad_norm": 1.868750810623169, "learning_rate": 2e-05, "loss": 0.06937762, "step": 23338 }, { "epoch": 46.678, "grad_norm": 1.8962420225143433, "learning_rate": 2e-05, "loss": 0.04944228, "step": 23339 }, { "epoch": 46.68, "grad_norm": 1.170283317565918, "learning_rate": 2e-05, "loss": 0.04979547, "step": 23340 }, { "epoch": 46.682, "grad_norm": 1.2021749019622803, "learning_rate": 2e-05, "loss": 0.05005801, "step": 23341 }, { "epoch": 46.684, "grad_norm": 1.635096549987793, "learning_rate": 2e-05, "loss": 0.05956845, "step": 23342 }, { "epoch": 46.686, "grad_norm": 2.5779504776000977, "learning_rate": 2e-05, "loss": 0.05944379, "step": 23343 }, { "epoch": 46.688, "grad_norm": 1.169403076171875, "learning_rate": 2e-05, "loss": 0.04585174, "step": 23344 }, { "epoch": 46.69, "grad_norm": 1.3411033153533936, "learning_rate": 2e-05, "loss": 0.06493143, "step": 23345 }, { "epoch": 46.692, "grad_norm": 1.2918158769607544, "learning_rate": 2e-05, "loss": 0.04256565, "step": 23346 }, { "epoch": 46.694, "grad_norm": 0.985222578048706, "learning_rate": 2e-05, "loss": 0.0372174, "step": 23347 }, { "epoch": 46.696, "grad_norm": 1.0924230813980103, "learning_rate": 2e-05, "loss": 0.03921843, "step": 23348 }, { "epoch": 46.698, "grad_norm": 1.3069814443588257, "learning_rate": 2e-05, "loss": 0.04222802, "step": 23349 }, { "epoch": 46.7, "grad_norm": 1.1846057176589966, "learning_rate": 2e-05, "loss": 0.04912648, "step": 23350 }, { "epoch": 46.702, "grad_norm": 1.3495811223983765, "learning_rate": 2e-05, "loss": 0.04603204, "step": 23351 }, { "epoch": 46.704, "grad_norm": 1.0049281120300293, "learning_rate": 2e-05, "loss": 0.04690091, "step": 23352 }, { "epoch": 46.706, "grad_norm": 1.2509807348251343, "learning_rate": 2e-05, "loss": 0.05150412, "step": 23353 }, { "epoch": 46.708, "grad_norm": 1.5264276266098022, "learning_rate": 2e-05, "loss": 0.05389606, "step": 23354 }, { "epoch": 46.71, "grad_norm": 1.7153115272521973, "learning_rate": 2e-05, "loss": 0.06340338, "step": 23355 }, { "epoch": 46.712, "grad_norm": 1.010880708694458, "learning_rate": 2e-05, "loss": 0.0329822, "step": 23356 }, { "epoch": 46.714, "grad_norm": 1.157622218132019, "learning_rate": 2e-05, "loss": 0.05921149, "step": 23357 }, { "epoch": 46.716, "grad_norm": 0.9842050075531006, "learning_rate": 2e-05, "loss": 0.03667116, "step": 23358 }, { "epoch": 46.718, "grad_norm": 1.1801451444625854, "learning_rate": 2e-05, "loss": 0.04451838, "step": 23359 }, { "epoch": 46.72, "grad_norm": 1.2267253398895264, "learning_rate": 2e-05, "loss": 0.05214031, "step": 23360 }, { "epoch": 46.722, "grad_norm": 1.019111156463623, "learning_rate": 2e-05, "loss": 0.04116342, "step": 23361 }, { "epoch": 46.724, "grad_norm": 1.3958607912063599, "learning_rate": 2e-05, "loss": 0.05170007, "step": 23362 }, { "epoch": 46.726, "grad_norm": 1.5343711376190186, "learning_rate": 2e-05, "loss": 0.05161713, "step": 23363 }, { "epoch": 46.728, "grad_norm": 0.9997346997261047, "learning_rate": 2e-05, "loss": 0.0342833, "step": 23364 }, { "epoch": 46.73, "grad_norm": 1.421368956565857, "learning_rate": 2e-05, "loss": 0.04781113, "step": 23365 }, { "epoch": 46.732, "grad_norm": 1.6076453924179077, "learning_rate": 2e-05, "loss": 0.04223795, "step": 23366 }, { "epoch": 46.734, "grad_norm": 1.1187045574188232, "learning_rate": 2e-05, "loss": 0.03693566, "step": 23367 }, { "epoch": 46.736, "grad_norm": 1.6310001611709595, "learning_rate": 2e-05, "loss": 0.04587207, "step": 23368 }, { "epoch": 46.738, "grad_norm": 1.0003893375396729, "learning_rate": 2e-05, "loss": 0.02953731, "step": 23369 }, { "epoch": 46.74, "grad_norm": 2.397538900375366, "learning_rate": 2e-05, "loss": 0.04967777, "step": 23370 }, { "epoch": 46.742, "grad_norm": 2.9524292945861816, "learning_rate": 2e-05, "loss": 0.06176184, "step": 23371 }, { "epoch": 46.744, "grad_norm": 1.3411389589309692, "learning_rate": 2e-05, "loss": 0.05731054, "step": 23372 }, { "epoch": 46.746, "grad_norm": 0.9929517507553101, "learning_rate": 2e-05, "loss": 0.04180113, "step": 23373 }, { "epoch": 46.748, "grad_norm": 1.270747184753418, "learning_rate": 2e-05, "loss": 0.05174856, "step": 23374 }, { "epoch": 46.75, "grad_norm": 1.058241605758667, "learning_rate": 2e-05, "loss": 0.04240339, "step": 23375 }, { "epoch": 46.752, "grad_norm": 1.1424481868743896, "learning_rate": 2e-05, "loss": 0.04190686, "step": 23376 }, { "epoch": 46.754, "grad_norm": 1.0503220558166504, "learning_rate": 2e-05, "loss": 0.05099996, "step": 23377 }, { "epoch": 46.756, "grad_norm": 1.255510687828064, "learning_rate": 2e-05, "loss": 0.03783688, "step": 23378 }, { "epoch": 46.758, "grad_norm": 1.1512470245361328, "learning_rate": 2e-05, "loss": 0.04825658, "step": 23379 }, { "epoch": 46.76, "grad_norm": 1.2274504899978638, "learning_rate": 2e-05, "loss": 0.05738919, "step": 23380 }, { "epoch": 46.762, "grad_norm": 2.000230073928833, "learning_rate": 2e-05, "loss": 0.05466451, "step": 23381 }, { "epoch": 46.764, "grad_norm": 1.1941397190093994, "learning_rate": 2e-05, "loss": 0.0490576, "step": 23382 }, { "epoch": 46.766, "grad_norm": 1.1872297525405884, "learning_rate": 2e-05, "loss": 0.04906765, "step": 23383 }, { "epoch": 46.768, "grad_norm": 1.068359136581421, "learning_rate": 2e-05, "loss": 0.04766599, "step": 23384 }, { "epoch": 46.77, "grad_norm": 1.1551222801208496, "learning_rate": 2e-05, "loss": 0.05015551, "step": 23385 }, { "epoch": 46.772, "grad_norm": 1.1826947927474976, "learning_rate": 2e-05, "loss": 0.04717891, "step": 23386 }, { "epoch": 46.774, "grad_norm": 1.3930027484893799, "learning_rate": 2e-05, "loss": 0.04331183, "step": 23387 }, { "epoch": 46.776, "grad_norm": 1.001609206199646, "learning_rate": 2e-05, "loss": 0.03368718, "step": 23388 }, { "epoch": 46.778, "grad_norm": 1.371207356452942, "learning_rate": 2e-05, "loss": 0.05175086, "step": 23389 }, { "epoch": 46.78, "grad_norm": 1.378040075302124, "learning_rate": 2e-05, "loss": 0.04431616, "step": 23390 }, { "epoch": 46.782, "grad_norm": 1.5609763860702515, "learning_rate": 2e-05, "loss": 0.04517556, "step": 23391 }, { "epoch": 46.784, "grad_norm": 1.9839285612106323, "learning_rate": 2e-05, "loss": 0.05456992, "step": 23392 }, { "epoch": 46.786, "grad_norm": 0.9844745993614197, "learning_rate": 2e-05, "loss": 0.0431798, "step": 23393 }, { "epoch": 46.788, "grad_norm": 1.6684527397155762, "learning_rate": 2e-05, "loss": 0.03772101, "step": 23394 }, { "epoch": 46.79, "grad_norm": 1.1648509502410889, "learning_rate": 2e-05, "loss": 0.05023489, "step": 23395 }, { "epoch": 46.792, "grad_norm": 1.1498349905014038, "learning_rate": 2e-05, "loss": 0.05116352, "step": 23396 }, { "epoch": 46.794, "grad_norm": 2.005505084991455, "learning_rate": 2e-05, "loss": 0.04006296, "step": 23397 }, { "epoch": 46.796, "grad_norm": 4.387477874755859, "learning_rate": 2e-05, "loss": 0.05183373, "step": 23398 }, { "epoch": 46.798, "grad_norm": 2.7198569774627686, "learning_rate": 2e-05, "loss": 0.05188341, "step": 23399 }, { "epoch": 46.8, "grad_norm": 1.1940276622772217, "learning_rate": 2e-05, "loss": 0.05574021, "step": 23400 }, { "epoch": 46.802, "grad_norm": 1.0419164896011353, "learning_rate": 2e-05, "loss": 0.04050265, "step": 23401 }, { "epoch": 46.804, "grad_norm": 3.387681484222412, "learning_rate": 2e-05, "loss": 0.04175478, "step": 23402 }, { "epoch": 46.806, "grad_norm": 1.285062551498413, "learning_rate": 2e-05, "loss": 0.04971816, "step": 23403 }, { "epoch": 46.808, "grad_norm": 1.3174958229064941, "learning_rate": 2e-05, "loss": 0.04881707, "step": 23404 }, { "epoch": 46.81, "grad_norm": 3.6777937412261963, "learning_rate": 2e-05, "loss": 0.0468701, "step": 23405 }, { "epoch": 46.812, "grad_norm": 1.1515544652938843, "learning_rate": 2e-05, "loss": 0.04995457, "step": 23406 }, { "epoch": 46.814, "grad_norm": 1.6691585779190063, "learning_rate": 2e-05, "loss": 0.04617448, "step": 23407 }, { "epoch": 46.816, "grad_norm": 1.3046892881393433, "learning_rate": 2e-05, "loss": 0.04137983, "step": 23408 }, { "epoch": 46.818, "grad_norm": 1.3128255605697632, "learning_rate": 2e-05, "loss": 0.05065984, "step": 23409 }, { "epoch": 46.82, "grad_norm": 1.3586230278015137, "learning_rate": 2e-05, "loss": 0.05450898, "step": 23410 }, { "epoch": 46.822, "grad_norm": 1.2502920627593994, "learning_rate": 2e-05, "loss": 0.03689632, "step": 23411 }, { "epoch": 46.824, "grad_norm": 1.1424442529678345, "learning_rate": 2e-05, "loss": 0.03632981, "step": 23412 }, { "epoch": 46.826, "grad_norm": 1.3228057622909546, "learning_rate": 2e-05, "loss": 0.03984486, "step": 23413 }, { "epoch": 46.828, "grad_norm": 1.0505292415618896, "learning_rate": 2e-05, "loss": 0.04190434, "step": 23414 }, { "epoch": 46.83, "grad_norm": 1.3199437856674194, "learning_rate": 2e-05, "loss": 0.0435336, "step": 23415 }, { "epoch": 46.832, "grad_norm": 2.5862584114074707, "learning_rate": 2e-05, "loss": 0.06176166, "step": 23416 }, { "epoch": 46.834, "grad_norm": 1.1140897274017334, "learning_rate": 2e-05, "loss": 0.04548195, "step": 23417 }, { "epoch": 46.836, "grad_norm": 1.4556688070297241, "learning_rate": 2e-05, "loss": 0.04776576, "step": 23418 }, { "epoch": 46.838, "grad_norm": 2.7034659385681152, "learning_rate": 2e-05, "loss": 0.04163261, "step": 23419 }, { "epoch": 46.84, "grad_norm": 1.0860694646835327, "learning_rate": 2e-05, "loss": 0.04266633, "step": 23420 }, { "epoch": 46.842, "grad_norm": 1.2049165964126587, "learning_rate": 2e-05, "loss": 0.05778315, "step": 23421 }, { "epoch": 46.844, "grad_norm": 1.6713166236877441, "learning_rate": 2e-05, "loss": 0.06890272, "step": 23422 }, { "epoch": 46.846, "grad_norm": 3.021561622619629, "learning_rate": 2e-05, "loss": 0.05821927, "step": 23423 }, { "epoch": 46.848, "grad_norm": 1.4241578578948975, "learning_rate": 2e-05, "loss": 0.0569243, "step": 23424 }, { "epoch": 46.85, "grad_norm": 1.1604105234146118, "learning_rate": 2e-05, "loss": 0.0533638, "step": 23425 }, { "epoch": 46.852, "grad_norm": 1.0262799263000488, "learning_rate": 2e-05, "loss": 0.04001687, "step": 23426 }, { "epoch": 46.854, "grad_norm": 1.0680865049362183, "learning_rate": 2e-05, "loss": 0.04203242, "step": 23427 }, { "epoch": 46.856, "grad_norm": 1.7087593078613281, "learning_rate": 2e-05, "loss": 0.04629037, "step": 23428 }, { "epoch": 46.858, "grad_norm": 1.3922479152679443, "learning_rate": 2e-05, "loss": 0.05780581, "step": 23429 }, { "epoch": 46.86, "grad_norm": 1.3703882694244385, "learning_rate": 2e-05, "loss": 0.05306429, "step": 23430 }, { "epoch": 46.862, "grad_norm": 1.1331651210784912, "learning_rate": 2e-05, "loss": 0.05829866, "step": 23431 }, { "epoch": 46.864, "grad_norm": 0.9955253601074219, "learning_rate": 2e-05, "loss": 0.03273296, "step": 23432 }, { "epoch": 46.866, "grad_norm": 0.9291515946388245, "learning_rate": 2e-05, "loss": 0.03978248, "step": 23433 }, { "epoch": 46.868, "grad_norm": 1.4390240907669067, "learning_rate": 2e-05, "loss": 0.06263384, "step": 23434 }, { "epoch": 46.87, "grad_norm": 1.2633771896362305, "learning_rate": 2e-05, "loss": 0.05546718, "step": 23435 }, { "epoch": 46.872, "grad_norm": 0.8656467199325562, "learning_rate": 2e-05, "loss": 0.03080487, "step": 23436 }, { "epoch": 46.874, "grad_norm": 1.1050388813018799, "learning_rate": 2e-05, "loss": 0.05227588, "step": 23437 }, { "epoch": 46.876, "grad_norm": 1.1237446069717407, "learning_rate": 2e-05, "loss": 0.06108324, "step": 23438 }, { "epoch": 46.878, "grad_norm": 1.0278390645980835, "learning_rate": 2e-05, "loss": 0.03754516, "step": 23439 }, { "epoch": 46.88, "grad_norm": 1.2690916061401367, "learning_rate": 2e-05, "loss": 0.05982792, "step": 23440 }, { "epoch": 46.882, "grad_norm": 0.9860858917236328, "learning_rate": 2e-05, "loss": 0.04347444, "step": 23441 }, { "epoch": 46.884, "grad_norm": 1.0543830394744873, "learning_rate": 2e-05, "loss": 0.03453768, "step": 23442 }, { "epoch": 46.886, "grad_norm": 1.214526891708374, "learning_rate": 2e-05, "loss": 0.04959118, "step": 23443 }, { "epoch": 46.888, "grad_norm": 1.9631701707839966, "learning_rate": 2e-05, "loss": 0.05660867, "step": 23444 }, { "epoch": 46.89, "grad_norm": 1.270965337753296, "learning_rate": 2e-05, "loss": 0.05391593, "step": 23445 }, { "epoch": 46.892, "grad_norm": 1.369183897972107, "learning_rate": 2e-05, "loss": 0.05142535, "step": 23446 }, { "epoch": 46.894, "grad_norm": 3.9048008918762207, "learning_rate": 2e-05, "loss": 0.041148, "step": 23447 }, { "epoch": 46.896, "grad_norm": 2.134962320327759, "learning_rate": 2e-05, "loss": 0.03332872, "step": 23448 }, { "epoch": 46.898, "grad_norm": 1.895607352256775, "learning_rate": 2e-05, "loss": 0.0488515, "step": 23449 }, { "epoch": 46.9, "grad_norm": 1.2083396911621094, "learning_rate": 2e-05, "loss": 0.04162563, "step": 23450 }, { "epoch": 46.902, "grad_norm": 1.1905925273895264, "learning_rate": 2e-05, "loss": 0.03953518, "step": 23451 }, { "epoch": 46.904, "grad_norm": 1.0462309122085571, "learning_rate": 2e-05, "loss": 0.04261171, "step": 23452 }, { "epoch": 46.906, "grad_norm": 1.2605634927749634, "learning_rate": 2e-05, "loss": 0.04457783, "step": 23453 }, { "epoch": 46.908, "grad_norm": 1.0726561546325684, "learning_rate": 2e-05, "loss": 0.04535056, "step": 23454 }, { "epoch": 46.91, "grad_norm": 1.0684914588928223, "learning_rate": 2e-05, "loss": 0.04435501, "step": 23455 }, { "epoch": 46.912, "grad_norm": 1.568894386291504, "learning_rate": 2e-05, "loss": 0.05053505, "step": 23456 }, { "epoch": 46.914, "grad_norm": 1.0680261850357056, "learning_rate": 2e-05, "loss": 0.04221952, "step": 23457 }, { "epoch": 46.916, "grad_norm": 1.2090034484863281, "learning_rate": 2e-05, "loss": 0.05732998, "step": 23458 }, { "epoch": 46.918, "grad_norm": 1.0553538799285889, "learning_rate": 2e-05, "loss": 0.03570461, "step": 23459 }, { "epoch": 46.92, "grad_norm": 1.3866254091262817, "learning_rate": 2e-05, "loss": 0.05774738, "step": 23460 }, { "epoch": 46.922, "grad_norm": 1.218993067741394, "learning_rate": 2e-05, "loss": 0.05130921, "step": 23461 }, { "epoch": 46.924, "grad_norm": 1.7904051542282104, "learning_rate": 2e-05, "loss": 0.05767091, "step": 23462 }, { "epoch": 46.926, "grad_norm": 1.2114713191986084, "learning_rate": 2e-05, "loss": 0.05555228, "step": 23463 }, { "epoch": 46.928, "grad_norm": 1.6034760475158691, "learning_rate": 2e-05, "loss": 0.056468, "step": 23464 }, { "epoch": 46.93, "grad_norm": 1.8197824954986572, "learning_rate": 2e-05, "loss": 0.03906733, "step": 23465 }, { "epoch": 46.932, "grad_norm": 2.5520434379577637, "learning_rate": 2e-05, "loss": 0.05988537, "step": 23466 }, { "epoch": 46.934, "grad_norm": 1.1971849203109741, "learning_rate": 2e-05, "loss": 0.05080458, "step": 23467 }, { "epoch": 46.936, "grad_norm": 1.2232027053833008, "learning_rate": 2e-05, "loss": 0.0653313, "step": 23468 }, { "epoch": 46.938, "grad_norm": 1.3607845306396484, "learning_rate": 2e-05, "loss": 0.04467438, "step": 23469 }, { "epoch": 46.94, "grad_norm": 1.129961371421814, "learning_rate": 2e-05, "loss": 0.05106883, "step": 23470 }, { "epoch": 46.942, "grad_norm": 1.3855695724487305, "learning_rate": 2e-05, "loss": 0.0506998, "step": 23471 }, { "epoch": 46.944, "grad_norm": 1.0359809398651123, "learning_rate": 2e-05, "loss": 0.04331663, "step": 23472 }, { "epoch": 46.946, "grad_norm": 0.8526789546012878, "learning_rate": 2e-05, "loss": 0.03156386, "step": 23473 }, { "epoch": 46.948, "grad_norm": 1.0871293544769287, "learning_rate": 2e-05, "loss": 0.04656047, "step": 23474 }, { "epoch": 46.95, "grad_norm": 1.156686782836914, "learning_rate": 2e-05, "loss": 0.06759372, "step": 23475 }, { "epoch": 46.952, "grad_norm": 2.5178990364074707, "learning_rate": 2e-05, "loss": 0.07468967, "step": 23476 }, { "epoch": 46.954, "grad_norm": 1.1853928565979004, "learning_rate": 2e-05, "loss": 0.0469472, "step": 23477 }, { "epoch": 46.956, "grad_norm": 1.038954257965088, "learning_rate": 2e-05, "loss": 0.03782792, "step": 23478 }, { "epoch": 46.958, "grad_norm": 1.2154408693313599, "learning_rate": 2e-05, "loss": 0.05507773, "step": 23479 }, { "epoch": 46.96, "grad_norm": 1.1402373313903809, "learning_rate": 2e-05, "loss": 0.0420175, "step": 23480 }, { "epoch": 46.962, "grad_norm": 2.3009064197540283, "learning_rate": 2e-05, "loss": 0.04467285, "step": 23481 }, { "epoch": 46.964, "grad_norm": 2.1940722465515137, "learning_rate": 2e-05, "loss": 0.05642062, "step": 23482 }, { "epoch": 46.966, "grad_norm": 1.6159992218017578, "learning_rate": 2e-05, "loss": 0.04018041, "step": 23483 }, { "epoch": 46.968, "grad_norm": 1.1516873836517334, "learning_rate": 2e-05, "loss": 0.04684192, "step": 23484 }, { "epoch": 46.97, "grad_norm": 1.2707068920135498, "learning_rate": 2e-05, "loss": 0.04745233, "step": 23485 }, { "epoch": 46.972, "grad_norm": 1.360982060432434, "learning_rate": 2e-05, "loss": 0.04974321, "step": 23486 }, { "epoch": 46.974, "grad_norm": 1.3548970222473145, "learning_rate": 2e-05, "loss": 0.04797807, "step": 23487 }, { "epoch": 46.976, "grad_norm": 1.2003024816513062, "learning_rate": 2e-05, "loss": 0.04765528, "step": 23488 }, { "epoch": 46.978, "grad_norm": 1.032074213027954, "learning_rate": 2e-05, "loss": 0.03950176, "step": 23489 }, { "epoch": 46.98, "grad_norm": 1.1735929250717163, "learning_rate": 2e-05, "loss": 0.05224654, "step": 23490 }, { "epoch": 46.982, "grad_norm": 1.1766208410263062, "learning_rate": 2e-05, "loss": 0.03919265, "step": 23491 }, { "epoch": 46.984, "grad_norm": 1.5051755905151367, "learning_rate": 2e-05, "loss": 0.04944252, "step": 23492 }, { "epoch": 46.986, "grad_norm": 1.4483623504638672, "learning_rate": 2e-05, "loss": 0.04749278, "step": 23493 }, { "epoch": 46.988, "grad_norm": 1.991761565208435, "learning_rate": 2e-05, "loss": 0.07904573, "step": 23494 }, { "epoch": 46.99, "grad_norm": 2.063960552215576, "learning_rate": 2e-05, "loss": 0.06021646, "step": 23495 }, { "epoch": 46.992, "grad_norm": 2.799022912979126, "learning_rate": 2e-05, "loss": 0.04049257, "step": 23496 }, { "epoch": 46.994, "grad_norm": 1.7096704244613647, "learning_rate": 2e-05, "loss": 0.04271127, "step": 23497 }, { "epoch": 46.996, "grad_norm": 0.984707236289978, "learning_rate": 2e-05, "loss": 0.03668294, "step": 23498 }, { "epoch": 46.998, "grad_norm": 1.7703429460525513, "learning_rate": 2e-05, "loss": 0.05086554, "step": 23499 }, { "epoch": 47.0, "grad_norm": 1.1229311227798462, "learning_rate": 2e-05, "loss": 0.04655139, "step": 23500 }, { "epoch": 47.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9820359281437125, "Equal_1": 1.0, "Equal_2": 0.9880239520958084, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 0.998, "Perpendicular_2": 0.998, "Perpendicular_3": 0.905811623246493, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9936, "PointLiesOnLine_1": 1.0, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 319.7259, "eval_samples_per_second": 32.841, "eval_steps_per_second": 0.657, "step": 23500 }, { "epoch": 47.002, "grad_norm": 1.2124334573745728, "learning_rate": 2e-05, "loss": 0.04620025, "step": 23501 }, { "epoch": 47.004, "grad_norm": 1.2111510038375854, "learning_rate": 2e-05, "loss": 0.03958354, "step": 23502 }, { "epoch": 47.006, "grad_norm": 2.0674355030059814, "learning_rate": 2e-05, "loss": 0.06526448, "step": 23503 }, { "epoch": 47.008, "grad_norm": 1.139857530593872, "learning_rate": 2e-05, "loss": 0.0366558, "step": 23504 }, { "epoch": 47.01, "grad_norm": 1.407399296760559, "learning_rate": 2e-05, "loss": 0.05581947, "step": 23505 }, { "epoch": 47.012, "grad_norm": 1.1468596458435059, "learning_rate": 2e-05, "loss": 0.04368159, "step": 23506 }, { "epoch": 47.014, "grad_norm": 1.4752362966537476, "learning_rate": 2e-05, "loss": 0.03284196, "step": 23507 }, { "epoch": 47.016, "grad_norm": 1.083961009979248, "learning_rate": 2e-05, "loss": 0.04144663, "step": 23508 }, { "epoch": 47.018, "grad_norm": 1.0464342832565308, "learning_rate": 2e-05, "loss": 0.04529237, "step": 23509 }, { "epoch": 47.02, "grad_norm": 1.1925157308578491, "learning_rate": 2e-05, "loss": 0.05837334, "step": 23510 }, { "epoch": 47.022, "grad_norm": 1.135394811630249, "learning_rate": 2e-05, "loss": 0.04342172, "step": 23511 }, { "epoch": 47.024, "grad_norm": 1.2975212335586548, "learning_rate": 2e-05, "loss": 0.05698482, "step": 23512 }, { "epoch": 47.026, "grad_norm": 0.9662984609603882, "learning_rate": 2e-05, "loss": 0.03092372, "step": 23513 }, { "epoch": 47.028, "grad_norm": 1.3168891668319702, "learning_rate": 2e-05, "loss": 0.05116647, "step": 23514 }, { "epoch": 47.03, "grad_norm": 1.0158759355545044, "learning_rate": 2e-05, "loss": 0.04270767, "step": 23515 }, { "epoch": 47.032, "grad_norm": 1.0800869464874268, "learning_rate": 2e-05, "loss": 0.0404634, "step": 23516 }, { "epoch": 47.034, "grad_norm": 1.3503354787826538, "learning_rate": 2e-05, "loss": 0.06739471, "step": 23517 }, { "epoch": 47.036, "grad_norm": 1.1669559478759766, "learning_rate": 2e-05, "loss": 0.04633216, "step": 23518 }, { "epoch": 47.038, "grad_norm": 1.211911678314209, "learning_rate": 2e-05, "loss": 0.05827531, "step": 23519 }, { "epoch": 47.04, "grad_norm": 1.1868705749511719, "learning_rate": 2e-05, "loss": 0.04901566, "step": 23520 }, { "epoch": 47.042, "grad_norm": 0.9578158855438232, "learning_rate": 2e-05, "loss": 0.04152099, "step": 23521 }, { "epoch": 47.044, "grad_norm": 1.1845344305038452, "learning_rate": 2e-05, "loss": 0.04116198, "step": 23522 }, { "epoch": 47.046, "grad_norm": 1.7209062576293945, "learning_rate": 2e-05, "loss": 0.05426127, "step": 23523 }, { "epoch": 47.048, "grad_norm": 1.0993847846984863, "learning_rate": 2e-05, "loss": 0.04611755, "step": 23524 }, { "epoch": 47.05, "grad_norm": 1.5714466571807861, "learning_rate": 2e-05, "loss": 0.06220062, "step": 23525 }, { "epoch": 47.052, "grad_norm": 1.5425214767456055, "learning_rate": 2e-05, "loss": 0.04734546, "step": 23526 }, { "epoch": 47.054, "grad_norm": 1.330129623413086, "learning_rate": 2e-05, "loss": 0.0763645, "step": 23527 }, { "epoch": 47.056, "grad_norm": 1.205364465713501, "learning_rate": 2e-05, "loss": 0.06087791, "step": 23528 }, { "epoch": 47.058, "grad_norm": 1.8291528224945068, "learning_rate": 2e-05, "loss": 0.05230067, "step": 23529 }, { "epoch": 47.06, "grad_norm": 1.2380144596099854, "learning_rate": 2e-05, "loss": 0.05399077, "step": 23530 }, { "epoch": 47.062, "grad_norm": 1.1345731019973755, "learning_rate": 2e-05, "loss": 0.04412289, "step": 23531 }, { "epoch": 47.064, "grad_norm": 1.3171319961547852, "learning_rate": 2e-05, "loss": 0.05258146, "step": 23532 }, { "epoch": 47.066, "grad_norm": 1.1284446716308594, "learning_rate": 2e-05, "loss": 0.04565362, "step": 23533 }, { "epoch": 47.068, "grad_norm": 1.0812745094299316, "learning_rate": 2e-05, "loss": 0.03943041, "step": 23534 }, { "epoch": 47.07, "grad_norm": 1.1932939291000366, "learning_rate": 2e-05, "loss": 0.03978365, "step": 23535 }, { "epoch": 47.072, "grad_norm": 1.5841753482818604, "learning_rate": 2e-05, "loss": 0.04983898, "step": 23536 }, { "epoch": 47.074, "grad_norm": 1.2005212306976318, "learning_rate": 2e-05, "loss": 0.0624929, "step": 23537 }, { "epoch": 47.076, "grad_norm": 1.3606860637664795, "learning_rate": 2e-05, "loss": 0.0616478, "step": 23538 }, { "epoch": 47.078, "grad_norm": 1.0764256715774536, "learning_rate": 2e-05, "loss": 0.04636963, "step": 23539 }, { "epoch": 47.08, "grad_norm": 1.1802319288253784, "learning_rate": 2e-05, "loss": 0.05296212, "step": 23540 }, { "epoch": 47.082, "grad_norm": 1.249846339225769, "learning_rate": 2e-05, "loss": 0.05203488, "step": 23541 }, { "epoch": 47.084, "grad_norm": 1.3313225507736206, "learning_rate": 2e-05, "loss": 0.05261878, "step": 23542 }, { "epoch": 47.086, "grad_norm": 1.1203484535217285, "learning_rate": 2e-05, "loss": 0.04687518, "step": 23543 }, { "epoch": 47.088, "grad_norm": 1.1626969575881958, "learning_rate": 2e-05, "loss": 0.04210647, "step": 23544 }, { "epoch": 47.09, "grad_norm": 1.0756282806396484, "learning_rate": 2e-05, "loss": 0.04296558, "step": 23545 }, { "epoch": 47.092, "grad_norm": 1.29530930519104, "learning_rate": 2e-05, "loss": 0.05416109, "step": 23546 }, { "epoch": 47.094, "grad_norm": 1.05868661403656, "learning_rate": 2e-05, "loss": 0.04070142, "step": 23547 }, { "epoch": 47.096, "grad_norm": 1.0833359956741333, "learning_rate": 2e-05, "loss": 0.0563876, "step": 23548 }, { "epoch": 47.098, "grad_norm": 1.1828356981277466, "learning_rate": 2e-05, "loss": 0.05474529, "step": 23549 }, { "epoch": 47.1, "grad_norm": 1.6338591575622559, "learning_rate": 2e-05, "loss": 0.04851716, "step": 23550 }, { "epoch": 47.102, "grad_norm": 1.1993439197540283, "learning_rate": 2e-05, "loss": 0.05251703, "step": 23551 }, { "epoch": 47.104, "grad_norm": 1.1439919471740723, "learning_rate": 2e-05, "loss": 0.04419599, "step": 23552 }, { "epoch": 47.106, "grad_norm": 1.2255439758300781, "learning_rate": 2e-05, "loss": 0.04737487, "step": 23553 }, { "epoch": 47.108, "grad_norm": 1.085721492767334, "learning_rate": 2e-05, "loss": 0.03265755, "step": 23554 }, { "epoch": 47.11, "grad_norm": 1.1767328977584839, "learning_rate": 2e-05, "loss": 0.05188051, "step": 23555 }, { "epoch": 47.112, "grad_norm": 1.1831814050674438, "learning_rate": 2e-05, "loss": 0.06118751, "step": 23556 }, { "epoch": 47.114, "grad_norm": 1.093849778175354, "learning_rate": 2e-05, "loss": 0.04082596, "step": 23557 }, { "epoch": 47.116, "grad_norm": 1.1168532371520996, "learning_rate": 2e-05, "loss": 0.05243224, "step": 23558 }, { "epoch": 47.118, "grad_norm": 1.548278570175171, "learning_rate": 2e-05, "loss": 0.05186, "step": 23559 }, { "epoch": 47.12, "grad_norm": 1.2307542562484741, "learning_rate": 2e-05, "loss": 0.05896568, "step": 23560 }, { "epoch": 47.122, "grad_norm": 1.2599307298660278, "learning_rate": 2e-05, "loss": 0.04969592, "step": 23561 }, { "epoch": 47.124, "grad_norm": 1.4370466470718384, "learning_rate": 2e-05, "loss": 0.06255047, "step": 23562 }, { "epoch": 47.126, "grad_norm": 1.6804131269454956, "learning_rate": 2e-05, "loss": 0.04512757, "step": 23563 }, { "epoch": 47.128, "grad_norm": 1.4804704189300537, "learning_rate": 2e-05, "loss": 0.04683591, "step": 23564 }, { "epoch": 47.13, "grad_norm": 2.8036322593688965, "learning_rate": 2e-05, "loss": 0.06730855, "step": 23565 }, { "epoch": 47.132, "grad_norm": 0.9568971395492554, "learning_rate": 2e-05, "loss": 0.04147626, "step": 23566 }, { "epoch": 47.134, "grad_norm": 1.3193092346191406, "learning_rate": 2e-05, "loss": 0.05314566, "step": 23567 }, { "epoch": 47.136, "grad_norm": 1.122524380683899, "learning_rate": 2e-05, "loss": 0.04262283, "step": 23568 }, { "epoch": 47.138, "grad_norm": 1.9593148231506348, "learning_rate": 2e-05, "loss": 0.05774532, "step": 23569 }, { "epoch": 47.14, "grad_norm": 1.1386750936508179, "learning_rate": 2e-05, "loss": 0.0475467, "step": 23570 }, { "epoch": 47.142, "grad_norm": 1.329964518547058, "learning_rate": 2e-05, "loss": 0.04642937, "step": 23571 }, { "epoch": 47.144, "grad_norm": 1.109947681427002, "learning_rate": 2e-05, "loss": 0.05008635, "step": 23572 }, { "epoch": 47.146, "grad_norm": 1.2006001472473145, "learning_rate": 2e-05, "loss": 0.05233764, "step": 23573 }, { "epoch": 47.148, "grad_norm": 1.4829745292663574, "learning_rate": 2e-05, "loss": 0.05814692, "step": 23574 }, { "epoch": 47.15, "grad_norm": 1.1940250396728516, "learning_rate": 2e-05, "loss": 0.05612388, "step": 23575 }, { "epoch": 47.152, "grad_norm": 1.3208078145980835, "learning_rate": 2e-05, "loss": 0.04610663, "step": 23576 }, { "epoch": 47.154, "grad_norm": 1.1997742652893066, "learning_rate": 2e-05, "loss": 0.04106606, "step": 23577 }, { "epoch": 47.156, "grad_norm": 1.40494704246521, "learning_rate": 2e-05, "loss": 0.06252164, "step": 23578 }, { "epoch": 47.158, "grad_norm": 0.9245737791061401, "learning_rate": 2e-05, "loss": 0.04033583, "step": 23579 }, { "epoch": 47.16, "grad_norm": 1.717743992805481, "learning_rate": 2e-05, "loss": 0.06210865, "step": 23580 }, { "epoch": 47.162, "grad_norm": 1.206834316253662, "learning_rate": 2e-05, "loss": 0.05354768, "step": 23581 }, { "epoch": 47.164, "grad_norm": 1.2854136228561401, "learning_rate": 2e-05, "loss": 0.05695827, "step": 23582 }, { "epoch": 47.166, "grad_norm": 1.6390953063964844, "learning_rate": 2e-05, "loss": 0.06032401, "step": 23583 }, { "epoch": 47.168, "grad_norm": 1.2514336109161377, "learning_rate": 2e-05, "loss": 0.05170215, "step": 23584 }, { "epoch": 47.17, "grad_norm": 1.274780035018921, "learning_rate": 2e-05, "loss": 0.03985206, "step": 23585 }, { "epoch": 47.172, "grad_norm": 1.108774185180664, "learning_rate": 2e-05, "loss": 0.03357091, "step": 23586 }, { "epoch": 47.174, "grad_norm": 1.0785712003707886, "learning_rate": 2e-05, "loss": 0.03320477, "step": 23587 }, { "epoch": 47.176, "grad_norm": 1.3981683254241943, "learning_rate": 2e-05, "loss": 0.03950857, "step": 23588 }, { "epoch": 47.178, "grad_norm": 1.2349064350128174, "learning_rate": 2e-05, "loss": 0.06858353, "step": 23589 }, { "epoch": 47.18, "grad_norm": 2.030306100845337, "learning_rate": 2e-05, "loss": 0.04219834, "step": 23590 }, { "epoch": 47.182, "grad_norm": 1.5019062757492065, "learning_rate": 2e-05, "loss": 0.05146636, "step": 23591 }, { "epoch": 47.184, "grad_norm": 1.3464275598526, "learning_rate": 2e-05, "loss": 0.05433298, "step": 23592 }, { "epoch": 47.186, "grad_norm": 1.466103196144104, "learning_rate": 2e-05, "loss": 0.06641914, "step": 23593 }, { "epoch": 47.188, "grad_norm": 1.3546744585037231, "learning_rate": 2e-05, "loss": 0.05045114, "step": 23594 }, { "epoch": 47.19, "grad_norm": 1.330115795135498, "learning_rate": 2e-05, "loss": 0.0545896, "step": 23595 }, { "epoch": 47.192, "grad_norm": 1.141314148902893, "learning_rate": 2e-05, "loss": 0.03988175, "step": 23596 }, { "epoch": 47.194, "grad_norm": 1.3933604955673218, "learning_rate": 2e-05, "loss": 0.05277719, "step": 23597 }, { "epoch": 47.196, "grad_norm": 1.3836783170700073, "learning_rate": 2e-05, "loss": 0.05862165, "step": 23598 }, { "epoch": 47.198, "grad_norm": 1.1174200773239136, "learning_rate": 2e-05, "loss": 0.04964589, "step": 23599 }, { "epoch": 47.2, "grad_norm": 1.247449278831482, "learning_rate": 2e-05, "loss": 0.05483104, "step": 23600 }, { "epoch": 47.202, "grad_norm": 1.1020945310592651, "learning_rate": 2e-05, "loss": 0.05271735, "step": 23601 }, { "epoch": 47.204, "grad_norm": 1.5037002563476562, "learning_rate": 2e-05, "loss": 0.06174751, "step": 23602 }, { "epoch": 47.206, "grad_norm": 1.4615405797958374, "learning_rate": 2e-05, "loss": 0.05728906, "step": 23603 }, { "epoch": 47.208, "grad_norm": 1.1932406425476074, "learning_rate": 2e-05, "loss": 0.05166669, "step": 23604 }, { "epoch": 47.21, "grad_norm": 1.133001685142517, "learning_rate": 2e-05, "loss": 0.05504667, "step": 23605 }, { "epoch": 47.212, "grad_norm": 1.0907678604125977, "learning_rate": 2e-05, "loss": 0.04519046, "step": 23606 }, { "epoch": 47.214, "grad_norm": 1.0519095659255981, "learning_rate": 2e-05, "loss": 0.0439795, "step": 23607 }, { "epoch": 47.216, "grad_norm": 1.272680401802063, "learning_rate": 2e-05, "loss": 0.04971275, "step": 23608 }, { "epoch": 47.218, "grad_norm": 1.4395430088043213, "learning_rate": 2e-05, "loss": 0.05114156, "step": 23609 }, { "epoch": 47.22, "grad_norm": 1.1029596328735352, "learning_rate": 2e-05, "loss": 0.04121825, "step": 23610 }, { "epoch": 47.222, "grad_norm": 1.232066035270691, "learning_rate": 2e-05, "loss": 0.04981194, "step": 23611 }, { "epoch": 47.224, "grad_norm": 1.2140462398529053, "learning_rate": 2e-05, "loss": 0.05153929, "step": 23612 }, { "epoch": 47.226, "grad_norm": 1.5724217891693115, "learning_rate": 2e-05, "loss": 0.05892528, "step": 23613 }, { "epoch": 47.228, "grad_norm": 1.089890480041504, "learning_rate": 2e-05, "loss": 0.03622951, "step": 23614 }, { "epoch": 47.23, "grad_norm": 1.137673258781433, "learning_rate": 2e-05, "loss": 0.05067534, "step": 23615 }, { "epoch": 47.232, "grad_norm": 1.6163313388824463, "learning_rate": 2e-05, "loss": 0.07416356, "step": 23616 }, { "epoch": 47.234, "grad_norm": 1.5656497478485107, "learning_rate": 2e-05, "loss": 0.04488578, "step": 23617 }, { "epoch": 47.236, "grad_norm": 2.585951566696167, "learning_rate": 2e-05, "loss": 0.0466084, "step": 23618 }, { "epoch": 47.238, "grad_norm": 1.1935782432556152, "learning_rate": 2e-05, "loss": 0.03517368, "step": 23619 }, { "epoch": 47.24, "grad_norm": 0.9748817682266235, "learning_rate": 2e-05, "loss": 0.03498342, "step": 23620 }, { "epoch": 47.242, "grad_norm": 1.339208960533142, "learning_rate": 2e-05, "loss": 0.04053495, "step": 23621 }, { "epoch": 47.244, "grad_norm": 1.0967516899108887, "learning_rate": 2e-05, "loss": 0.03619142, "step": 23622 }, { "epoch": 47.246, "grad_norm": 1.4304256439208984, "learning_rate": 2e-05, "loss": 0.05116164, "step": 23623 }, { "epoch": 47.248, "grad_norm": 1.2332102060317993, "learning_rate": 2e-05, "loss": 0.05349259, "step": 23624 }, { "epoch": 47.25, "grad_norm": 1.084965467453003, "learning_rate": 2e-05, "loss": 0.04240631, "step": 23625 }, { "epoch": 47.252, "grad_norm": 1.075219750404358, "learning_rate": 2e-05, "loss": 0.04627833, "step": 23626 }, { "epoch": 47.254, "grad_norm": 1.1400893926620483, "learning_rate": 2e-05, "loss": 0.04680464, "step": 23627 }, { "epoch": 47.256, "grad_norm": 1.1270958185195923, "learning_rate": 2e-05, "loss": 0.04278168, "step": 23628 }, { "epoch": 47.258, "grad_norm": 2.2311246395111084, "learning_rate": 2e-05, "loss": 0.05921525, "step": 23629 }, { "epoch": 47.26, "grad_norm": 1.1150248050689697, "learning_rate": 2e-05, "loss": 0.04678026, "step": 23630 }, { "epoch": 47.262, "grad_norm": 1.1364946365356445, "learning_rate": 2e-05, "loss": 0.05590563, "step": 23631 }, { "epoch": 47.264, "grad_norm": 1.2034296989440918, "learning_rate": 2e-05, "loss": 0.05708367, "step": 23632 }, { "epoch": 47.266, "grad_norm": 1.4349358081817627, "learning_rate": 2e-05, "loss": 0.06511645, "step": 23633 }, { "epoch": 47.268, "grad_norm": 1.4762065410614014, "learning_rate": 2e-05, "loss": 0.06263445, "step": 23634 }, { "epoch": 47.27, "grad_norm": 0.9478891491889954, "learning_rate": 2e-05, "loss": 0.03476822, "step": 23635 }, { "epoch": 47.272, "grad_norm": 1.010331153869629, "learning_rate": 2e-05, "loss": 0.02933269, "step": 23636 }, { "epoch": 47.274, "grad_norm": 1.250553011894226, "learning_rate": 2e-05, "loss": 0.04823877, "step": 23637 }, { "epoch": 47.276, "grad_norm": 0.9914244413375854, "learning_rate": 2e-05, "loss": 0.02885609, "step": 23638 }, { "epoch": 47.278, "grad_norm": 1.766867995262146, "learning_rate": 2e-05, "loss": 0.05482132, "step": 23639 }, { "epoch": 47.28, "grad_norm": 1.9935588836669922, "learning_rate": 2e-05, "loss": 0.05211507, "step": 23640 }, { "epoch": 47.282, "grad_norm": 1.3328672647476196, "learning_rate": 2e-05, "loss": 0.05550382, "step": 23641 }, { "epoch": 47.284, "grad_norm": 1.2937812805175781, "learning_rate": 2e-05, "loss": 0.05083098, "step": 23642 }, { "epoch": 47.286, "grad_norm": 1.1793773174285889, "learning_rate": 2e-05, "loss": 0.04735196, "step": 23643 }, { "epoch": 47.288, "grad_norm": 0.9814954400062561, "learning_rate": 2e-05, "loss": 0.03630852, "step": 23644 }, { "epoch": 47.29, "grad_norm": 1.104866862297058, "learning_rate": 2e-05, "loss": 0.04346426, "step": 23645 }, { "epoch": 47.292, "grad_norm": 1.7783113718032837, "learning_rate": 2e-05, "loss": 0.05869078, "step": 23646 }, { "epoch": 47.294, "grad_norm": 1.1085009574890137, "learning_rate": 2e-05, "loss": 0.05062877, "step": 23647 }, { "epoch": 47.296, "grad_norm": 1.106187343597412, "learning_rate": 2e-05, "loss": 0.05129405, "step": 23648 }, { "epoch": 47.298, "grad_norm": 1.0873476266860962, "learning_rate": 2e-05, "loss": 0.03707412, "step": 23649 }, { "epoch": 47.3, "grad_norm": 1.7342414855957031, "learning_rate": 2e-05, "loss": 0.05554866, "step": 23650 }, { "epoch": 47.302, "grad_norm": 0.9549355506896973, "learning_rate": 2e-05, "loss": 0.03941218, "step": 23651 }, { "epoch": 47.304, "grad_norm": 1.0804848670959473, "learning_rate": 2e-05, "loss": 0.04693265, "step": 23652 }, { "epoch": 47.306, "grad_norm": 1.6769800186157227, "learning_rate": 2e-05, "loss": 0.05842284, "step": 23653 }, { "epoch": 47.308, "grad_norm": 1.0862559080123901, "learning_rate": 2e-05, "loss": 0.04118923, "step": 23654 }, { "epoch": 47.31, "grad_norm": 1.7151005268096924, "learning_rate": 2e-05, "loss": 0.07205494, "step": 23655 }, { "epoch": 47.312, "grad_norm": 1.1123970746994019, "learning_rate": 2e-05, "loss": 0.04154256, "step": 23656 }, { "epoch": 47.314, "grad_norm": 1.0019702911376953, "learning_rate": 2e-05, "loss": 0.03406595, "step": 23657 }, { "epoch": 47.316, "grad_norm": 1.2018071413040161, "learning_rate": 2e-05, "loss": 0.04904677, "step": 23658 }, { "epoch": 47.318, "grad_norm": 1.2852320671081543, "learning_rate": 2e-05, "loss": 0.03975239, "step": 23659 }, { "epoch": 47.32, "grad_norm": 1.0827425718307495, "learning_rate": 2e-05, "loss": 0.0492649, "step": 23660 }, { "epoch": 47.322, "grad_norm": 1.0454564094543457, "learning_rate": 2e-05, "loss": 0.03652071, "step": 23661 }, { "epoch": 47.324, "grad_norm": 1.3567827939987183, "learning_rate": 2e-05, "loss": 0.05563585, "step": 23662 }, { "epoch": 47.326, "grad_norm": 2.5465247631073, "learning_rate": 2e-05, "loss": 0.06577208, "step": 23663 }, { "epoch": 47.328, "grad_norm": 1.4325695037841797, "learning_rate": 2e-05, "loss": 0.06627315, "step": 23664 }, { "epoch": 47.33, "grad_norm": 1.2820574045181274, "learning_rate": 2e-05, "loss": 0.05097567, "step": 23665 }, { "epoch": 47.332, "grad_norm": 0.9658817648887634, "learning_rate": 2e-05, "loss": 0.03891777, "step": 23666 }, { "epoch": 47.334, "grad_norm": 0.9408270716667175, "learning_rate": 2e-05, "loss": 0.03226583, "step": 23667 }, { "epoch": 47.336, "grad_norm": 1.1844184398651123, "learning_rate": 2e-05, "loss": 0.04694714, "step": 23668 }, { "epoch": 47.338, "grad_norm": 1.1536945104599, "learning_rate": 2e-05, "loss": 0.04273265, "step": 23669 }, { "epoch": 47.34, "grad_norm": 1.163088083267212, "learning_rate": 2e-05, "loss": 0.03042351, "step": 23670 }, { "epoch": 47.342, "grad_norm": 1.5534110069274902, "learning_rate": 2e-05, "loss": 0.04814804, "step": 23671 }, { "epoch": 47.344, "grad_norm": 1.2012584209442139, "learning_rate": 2e-05, "loss": 0.04714233, "step": 23672 }, { "epoch": 47.346, "grad_norm": 1.1810247898101807, "learning_rate": 2e-05, "loss": 0.03760591, "step": 23673 }, { "epoch": 47.348, "grad_norm": 1.1035114526748657, "learning_rate": 2e-05, "loss": 0.04680648, "step": 23674 }, { "epoch": 47.35, "grad_norm": 0.9210943579673767, "learning_rate": 2e-05, "loss": 0.03328598, "step": 23675 }, { "epoch": 47.352, "grad_norm": 1.0501015186309814, "learning_rate": 2e-05, "loss": 0.03831122, "step": 23676 }, { "epoch": 47.354, "grad_norm": 2.732465982437134, "learning_rate": 2e-05, "loss": 0.05034593, "step": 23677 }, { "epoch": 47.356, "grad_norm": 0.9773300290107727, "learning_rate": 2e-05, "loss": 0.03243728, "step": 23678 }, { "epoch": 47.358, "grad_norm": 1.2418444156646729, "learning_rate": 2e-05, "loss": 0.05027065, "step": 23679 }, { "epoch": 47.36, "grad_norm": 1.1005994081497192, "learning_rate": 2e-05, "loss": 0.04392687, "step": 23680 }, { "epoch": 47.362, "grad_norm": 1.098421573638916, "learning_rate": 2e-05, "loss": 0.05161924, "step": 23681 }, { "epoch": 47.364, "grad_norm": 1.4422335624694824, "learning_rate": 2e-05, "loss": 0.04385792, "step": 23682 }, { "epoch": 47.366, "grad_norm": 1.126664638519287, "learning_rate": 2e-05, "loss": 0.04566282, "step": 23683 }, { "epoch": 47.368, "grad_norm": 1.4247503280639648, "learning_rate": 2e-05, "loss": 0.05417196, "step": 23684 }, { "epoch": 47.37, "grad_norm": 1.1334466934204102, "learning_rate": 2e-05, "loss": 0.04590701, "step": 23685 }, { "epoch": 47.372, "grad_norm": 1.1766184568405151, "learning_rate": 2e-05, "loss": 0.04920424, "step": 23686 }, { "epoch": 47.374, "grad_norm": 1.0475587844848633, "learning_rate": 2e-05, "loss": 0.03809017, "step": 23687 }, { "epoch": 47.376, "grad_norm": 1.0179909467697144, "learning_rate": 2e-05, "loss": 0.0358481, "step": 23688 }, { "epoch": 47.378, "grad_norm": 1.281684398651123, "learning_rate": 2e-05, "loss": 0.05288438, "step": 23689 }, { "epoch": 47.38, "grad_norm": 1.3577895164489746, "learning_rate": 2e-05, "loss": 0.05755025, "step": 23690 }, { "epoch": 47.382, "grad_norm": 1.284982681274414, "learning_rate": 2e-05, "loss": 0.0437559, "step": 23691 }, { "epoch": 47.384, "grad_norm": 0.9902780055999756, "learning_rate": 2e-05, "loss": 0.03365954, "step": 23692 }, { "epoch": 47.386, "grad_norm": 2.0288844108581543, "learning_rate": 2e-05, "loss": 0.03784881, "step": 23693 }, { "epoch": 47.388, "grad_norm": 1.6296567916870117, "learning_rate": 2e-05, "loss": 0.05740002, "step": 23694 }, { "epoch": 47.39, "grad_norm": 1.2057472467422485, "learning_rate": 2e-05, "loss": 0.04737182, "step": 23695 }, { "epoch": 47.392, "grad_norm": 2.2552409172058105, "learning_rate": 2e-05, "loss": 0.05677157, "step": 23696 }, { "epoch": 47.394, "grad_norm": 1.9773105382919312, "learning_rate": 2e-05, "loss": 0.05616665, "step": 23697 }, { "epoch": 47.396, "grad_norm": 1.2296985387802124, "learning_rate": 2e-05, "loss": 0.04554373, "step": 23698 }, { "epoch": 47.398, "grad_norm": 1.108909010887146, "learning_rate": 2e-05, "loss": 0.03959118, "step": 23699 }, { "epoch": 47.4, "grad_norm": 1.2280199527740479, "learning_rate": 2e-05, "loss": 0.0505251, "step": 23700 }, { "epoch": 47.402, "grad_norm": 1.2505056858062744, "learning_rate": 2e-05, "loss": 0.04818206, "step": 23701 }, { "epoch": 47.404, "grad_norm": 1.2215907573699951, "learning_rate": 2e-05, "loss": 0.06155346, "step": 23702 }, { "epoch": 47.406, "grad_norm": 2.548524856567383, "learning_rate": 2e-05, "loss": 0.07346775, "step": 23703 }, { "epoch": 47.408, "grad_norm": 1.2835642099380493, "learning_rate": 2e-05, "loss": 0.03647726, "step": 23704 }, { "epoch": 47.41, "grad_norm": 1.0395970344543457, "learning_rate": 2e-05, "loss": 0.03743298, "step": 23705 }, { "epoch": 47.412, "grad_norm": 1.1787834167480469, "learning_rate": 2e-05, "loss": 0.05008036, "step": 23706 }, { "epoch": 47.414, "grad_norm": 1.2608765363693237, "learning_rate": 2e-05, "loss": 0.05554523, "step": 23707 }, { "epoch": 47.416, "grad_norm": 1.1391633749008179, "learning_rate": 2e-05, "loss": 0.04239395, "step": 23708 }, { "epoch": 47.418, "grad_norm": 1.310951590538025, "learning_rate": 2e-05, "loss": 0.05426461, "step": 23709 }, { "epoch": 47.42, "grad_norm": 2.4088597297668457, "learning_rate": 2e-05, "loss": 0.06128083, "step": 23710 }, { "epoch": 47.422, "grad_norm": 1.4820548295974731, "learning_rate": 2e-05, "loss": 0.07166282, "step": 23711 }, { "epoch": 47.424, "grad_norm": 1.2579067945480347, "learning_rate": 2e-05, "loss": 0.05245124, "step": 23712 }, { "epoch": 47.426, "grad_norm": 1.2548208236694336, "learning_rate": 2e-05, "loss": 0.04808481, "step": 23713 }, { "epoch": 47.428, "grad_norm": 1.0751184225082397, "learning_rate": 2e-05, "loss": 0.0375668, "step": 23714 }, { "epoch": 47.43, "grad_norm": 1.1384459733963013, "learning_rate": 2e-05, "loss": 0.03911222, "step": 23715 }, { "epoch": 47.432, "grad_norm": 1.2228471040725708, "learning_rate": 2e-05, "loss": 0.05863813, "step": 23716 }, { "epoch": 47.434, "grad_norm": 3.312714099884033, "learning_rate": 2e-05, "loss": 0.06117887, "step": 23717 }, { "epoch": 47.436, "grad_norm": 1.1511445045471191, "learning_rate": 2e-05, "loss": 0.04497742, "step": 23718 }, { "epoch": 47.438, "grad_norm": 1.4445806741714478, "learning_rate": 2e-05, "loss": 0.06244376, "step": 23719 }, { "epoch": 47.44, "grad_norm": 1.4664429426193237, "learning_rate": 2e-05, "loss": 0.03415911, "step": 23720 }, { "epoch": 47.442, "grad_norm": 1.0343490839004517, "learning_rate": 2e-05, "loss": 0.03623316, "step": 23721 }, { "epoch": 47.444, "grad_norm": 1.4916282892227173, "learning_rate": 2e-05, "loss": 0.04197778, "step": 23722 }, { "epoch": 47.446, "grad_norm": 1.2317581176757812, "learning_rate": 2e-05, "loss": 0.05793425, "step": 23723 }, { "epoch": 47.448, "grad_norm": 2.5570762157440186, "learning_rate": 2e-05, "loss": 0.04695795, "step": 23724 }, { "epoch": 47.45, "grad_norm": 0.9492204785346985, "learning_rate": 2e-05, "loss": 0.0334493, "step": 23725 }, { "epoch": 47.452, "grad_norm": 1.1067724227905273, "learning_rate": 2e-05, "loss": 0.03886347, "step": 23726 }, { "epoch": 47.454, "grad_norm": 1.2168097496032715, "learning_rate": 2e-05, "loss": 0.04640615, "step": 23727 }, { "epoch": 47.456, "grad_norm": 1.1498087644577026, "learning_rate": 2e-05, "loss": 0.05414462, "step": 23728 }, { "epoch": 47.458, "grad_norm": 1.2377431392669678, "learning_rate": 2e-05, "loss": 0.05543441, "step": 23729 }, { "epoch": 47.46, "grad_norm": 1.3042511940002441, "learning_rate": 2e-05, "loss": 0.05268206, "step": 23730 }, { "epoch": 47.462, "grad_norm": 1.4321894645690918, "learning_rate": 2e-05, "loss": 0.04099277, "step": 23731 }, { "epoch": 47.464, "grad_norm": 1.5580233335494995, "learning_rate": 2e-05, "loss": 0.05169383, "step": 23732 }, { "epoch": 47.466, "grad_norm": 1.2211647033691406, "learning_rate": 2e-05, "loss": 0.03869744, "step": 23733 }, { "epoch": 47.468, "grad_norm": 1.683929681777954, "learning_rate": 2e-05, "loss": 0.06134295, "step": 23734 }, { "epoch": 47.47, "grad_norm": 1.2739115953445435, "learning_rate": 2e-05, "loss": 0.0545798, "step": 23735 }, { "epoch": 47.472, "grad_norm": 1.387938141822815, "learning_rate": 2e-05, "loss": 0.06269515, "step": 23736 }, { "epoch": 47.474, "grad_norm": 1.0513253211975098, "learning_rate": 2e-05, "loss": 0.03459634, "step": 23737 }, { "epoch": 47.476, "grad_norm": 1.5735803842544556, "learning_rate": 2e-05, "loss": 0.05392249, "step": 23738 }, { "epoch": 47.478, "grad_norm": 1.1136903762817383, "learning_rate": 2e-05, "loss": 0.0393229, "step": 23739 }, { "epoch": 47.48, "grad_norm": 1.2298059463500977, "learning_rate": 2e-05, "loss": 0.05348741, "step": 23740 }, { "epoch": 47.482, "grad_norm": 1.1526238918304443, "learning_rate": 2e-05, "loss": 0.04516281, "step": 23741 }, { "epoch": 47.484, "grad_norm": 1.8501273393630981, "learning_rate": 2e-05, "loss": 0.04677322, "step": 23742 }, { "epoch": 47.486, "grad_norm": 1.4598199129104614, "learning_rate": 2e-05, "loss": 0.03834684, "step": 23743 }, { "epoch": 47.488, "grad_norm": 1.539868950843811, "learning_rate": 2e-05, "loss": 0.04157211, "step": 23744 }, { "epoch": 47.49, "grad_norm": 1.016363501548767, "learning_rate": 2e-05, "loss": 0.0401577, "step": 23745 }, { "epoch": 47.492, "grad_norm": 1.0004823207855225, "learning_rate": 2e-05, "loss": 0.04216732, "step": 23746 }, { "epoch": 47.494, "grad_norm": 0.8949176669120789, "learning_rate": 2e-05, "loss": 0.03267929, "step": 23747 }, { "epoch": 47.496, "grad_norm": 1.092294692993164, "learning_rate": 2e-05, "loss": 0.04605146, "step": 23748 }, { "epoch": 47.498, "grad_norm": 1.3999691009521484, "learning_rate": 2e-05, "loss": 0.05625776, "step": 23749 }, { "epoch": 47.5, "grad_norm": 1.136635422706604, "learning_rate": 2e-05, "loss": 0.04005692, "step": 23750 }, { "epoch": 47.502, "grad_norm": 1.1078810691833496, "learning_rate": 2e-05, "loss": 0.04342239, "step": 23751 }, { "epoch": 47.504, "grad_norm": 1.279944658279419, "learning_rate": 2e-05, "loss": 0.04219215, "step": 23752 }, { "epoch": 47.506, "grad_norm": 1.091110110282898, "learning_rate": 2e-05, "loss": 0.03880176, "step": 23753 }, { "epoch": 47.508, "grad_norm": 4.729748249053955, "learning_rate": 2e-05, "loss": 0.05638552, "step": 23754 }, { "epoch": 47.51, "grad_norm": 1.0970360040664673, "learning_rate": 2e-05, "loss": 0.04364794, "step": 23755 }, { "epoch": 47.512, "grad_norm": 1.0389903783798218, "learning_rate": 2e-05, "loss": 0.04169441, "step": 23756 }, { "epoch": 47.514, "grad_norm": 1.5221785306930542, "learning_rate": 2e-05, "loss": 0.06585445, "step": 23757 }, { "epoch": 47.516, "grad_norm": 2.1099374294281006, "learning_rate": 2e-05, "loss": 0.06468124, "step": 23758 }, { "epoch": 47.518, "grad_norm": 1.0862318277359009, "learning_rate": 2e-05, "loss": 0.03993117, "step": 23759 }, { "epoch": 47.52, "grad_norm": 1.1114388704299927, "learning_rate": 2e-05, "loss": 0.04338128, "step": 23760 }, { "epoch": 47.522, "grad_norm": 0.9857814908027649, "learning_rate": 2e-05, "loss": 0.03844403, "step": 23761 }, { "epoch": 47.524, "grad_norm": 1.3540973663330078, "learning_rate": 2e-05, "loss": 0.04602545, "step": 23762 }, { "epoch": 47.526, "grad_norm": 1.3859474658966064, "learning_rate": 2e-05, "loss": 0.06367904, "step": 23763 }, { "epoch": 47.528, "grad_norm": 1.4162659645080566, "learning_rate": 2e-05, "loss": 0.05949718, "step": 23764 }, { "epoch": 47.53, "grad_norm": 1.3201795816421509, "learning_rate": 2e-05, "loss": 0.06164296, "step": 23765 }, { "epoch": 47.532, "grad_norm": 1.2807005643844604, "learning_rate": 2e-05, "loss": 0.05345618, "step": 23766 }, { "epoch": 47.534, "grad_norm": 1.0871586799621582, "learning_rate": 2e-05, "loss": 0.04408365, "step": 23767 }, { "epoch": 47.536, "grad_norm": 1.0492790937423706, "learning_rate": 2e-05, "loss": 0.04874376, "step": 23768 }, { "epoch": 47.538, "grad_norm": 1.1767281293869019, "learning_rate": 2e-05, "loss": 0.04965818, "step": 23769 }, { "epoch": 47.54, "grad_norm": 1.1047857999801636, "learning_rate": 2e-05, "loss": 0.05242363, "step": 23770 }, { "epoch": 47.542, "grad_norm": 1.1232954263687134, "learning_rate": 2e-05, "loss": 0.05646368, "step": 23771 }, { "epoch": 47.544, "grad_norm": 1.3574858903884888, "learning_rate": 2e-05, "loss": 0.04148731, "step": 23772 }, { "epoch": 47.546, "grad_norm": 0.9722660183906555, "learning_rate": 2e-05, "loss": 0.0293147, "step": 23773 }, { "epoch": 47.548, "grad_norm": 1.1585569381713867, "learning_rate": 2e-05, "loss": 0.03967403, "step": 23774 }, { "epoch": 47.55, "grad_norm": 1.473108172416687, "learning_rate": 2e-05, "loss": 0.06600234, "step": 23775 }, { "epoch": 47.552, "grad_norm": 1.2558122873306274, "learning_rate": 2e-05, "loss": 0.04727027, "step": 23776 }, { "epoch": 47.554, "grad_norm": 1.1267999410629272, "learning_rate": 2e-05, "loss": 0.05461453, "step": 23777 }, { "epoch": 47.556, "grad_norm": 1.1623164415359497, "learning_rate": 2e-05, "loss": 0.04348694, "step": 23778 }, { "epoch": 47.558, "grad_norm": 0.9064064621925354, "learning_rate": 2e-05, "loss": 0.03024364, "step": 23779 }, { "epoch": 47.56, "grad_norm": 1.4345759153366089, "learning_rate": 2e-05, "loss": 0.04869916, "step": 23780 }, { "epoch": 47.562, "grad_norm": 1.3848798274993896, "learning_rate": 2e-05, "loss": 0.06412567, "step": 23781 }, { "epoch": 47.564, "grad_norm": 1.2268092632293701, "learning_rate": 2e-05, "loss": 0.0550194, "step": 23782 }, { "epoch": 47.566, "grad_norm": 1.1246010065078735, "learning_rate": 2e-05, "loss": 0.05901127, "step": 23783 }, { "epoch": 47.568, "grad_norm": 1.1794065237045288, "learning_rate": 2e-05, "loss": 0.04719983, "step": 23784 }, { "epoch": 47.57, "grad_norm": 1.2328189611434937, "learning_rate": 2e-05, "loss": 0.03939323, "step": 23785 }, { "epoch": 47.572, "grad_norm": 1.12712824344635, "learning_rate": 2e-05, "loss": 0.04430801, "step": 23786 }, { "epoch": 47.574, "grad_norm": 1.1956403255462646, "learning_rate": 2e-05, "loss": 0.05049133, "step": 23787 }, { "epoch": 47.576, "grad_norm": 1.102604866027832, "learning_rate": 2e-05, "loss": 0.04754536, "step": 23788 }, { "epoch": 47.578, "grad_norm": 1.1247801780700684, "learning_rate": 2e-05, "loss": 0.04530304, "step": 23789 }, { "epoch": 47.58, "grad_norm": 1.1731112003326416, "learning_rate": 2e-05, "loss": 0.04685323, "step": 23790 }, { "epoch": 47.582, "grad_norm": 1.0738763809204102, "learning_rate": 2e-05, "loss": 0.0424612, "step": 23791 }, { "epoch": 47.584, "grad_norm": 0.9249600172042847, "learning_rate": 2e-05, "loss": 0.03432335, "step": 23792 }, { "epoch": 47.586, "grad_norm": 0.9936639666557312, "learning_rate": 2e-05, "loss": 0.03993072, "step": 23793 }, { "epoch": 47.588, "grad_norm": 1.2670681476593018, "learning_rate": 2e-05, "loss": 0.04661375, "step": 23794 }, { "epoch": 47.59, "grad_norm": 1.8145248889923096, "learning_rate": 2e-05, "loss": 0.05395586, "step": 23795 }, { "epoch": 47.592, "grad_norm": 1.2371265888214111, "learning_rate": 2e-05, "loss": 0.06457849, "step": 23796 }, { "epoch": 47.594, "grad_norm": 1.2411563396453857, "learning_rate": 2e-05, "loss": 0.04113891, "step": 23797 }, { "epoch": 47.596, "grad_norm": 1.3027358055114746, "learning_rate": 2e-05, "loss": 0.04472353, "step": 23798 }, { "epoch": 47.598, "grad_norm": 1.9770257472991943, "learning_rate": 2e-05, "loss": 0.06172611, "step": 23799 }, { "epoch": 47.6, "grad_norm": 1.2722480297088623, "learning_rate": 2e-05, "loss": 0.04975735, "step": 23800 }, { "epoch": 47.602, "grad_norm": 0.9771064519882202, "learning_rate": 2e-05, "loss": 0.03003059, "step": 23801 }, { "epoch": 47.604, "grad_norm": 1.6549073457717896, "learning_rate": 2e-05, "loss": 0.04169197, "step": 23802 }, { "epoch": 47.606, "grad_norm": 0.9755164980888367, "learning_rate": 2e-05, "loss": 0.03423417, "step": 23803 }, { "epoch": 47.608, "grad_norm": 1.199762225151062, "learning_rate": 2e-05, "loss": 0.05413834, "step": 23804 }, { "epoch": 47.61, "grad_norm": 1.410494089126587, "learning_rate": 2e-05, "loss": 0.04707893, "step": 23805 }, { "epoch": 47.612, "grad_norm": 1.3130079507827759, "learning_rate": 2e-05, "loss": 0.05722672, "step": 23806 }, { "epoch": 47.614, "grad_norm": 1.174035668373108, "learning_rate": 2e-05, "loss": 0.05756243, "step": 23807 }, { "epoch": 47.616, "grad_norm": 1.7935864925384521, "learning_rate": 2e-05, "loss": 0.07460059, "step": 23808 }, { "epoch": 47.618, "grad_norm": 1.6251633167266846, "learning_rate": 2e-05, "loss": 0.03863, "step": 23809 }, { "epoch": 47.62, "grad_norm": 1.1181731224060059, "learning_rate": 2e-05, "loss": 0.04170064, "step": 23810 }, { "epoch": 47.622, "grad_norm": 1.3144530057907104, "learning_rate": 2e-05, "loss": 0.05499358, "step": 23811 }, { "epoch": 47.624, "grad_norm": 1.4863946437835693, "learning_rate": 2e-05, "loss": 0.07462236, "step": 23812 }, { "epoch": 47.626, "grad_norm": 1.226033329963684, "learning_rate": 2e-05, "loss": 0.05091349, "step": 23813 }, { "epoch": 47.628, "grad_norm": 1.164097785949707, "learning_rate": 2e-05, "loss": 0.0417085, "step": 23814 }, { "epoch": 47.63, "grad_norm": 3.6329402923583984, "learning_rate": 2e-05, "loss": 0.03966795, "step": 23815 }, { "epoch": 47.632, "grad_norm": 1.1707428693771362, "learning_rate": 2e-05, "loss": 0.05277382, "step": 23816 }, { "epoch": 47.634, "grad_norm": 0.8920641541481018, "learning_rate": 2e-05, "loss": 0.02758377, "step": 23817 }, { "epoch": 47.636, "grad_norm": 1.1014950275421143, "learning_rate": 2e-05, "loss": 0.04578391, "step": 23818 }, { "epoch": 47.638, "grad_norm": 1.0088590383529663, "learning_rate": 2e-05, "loss": 0.0345577, "step": 23819 }, { "epoch": 47.64, "grad_norm": 1.6413283348083496, "learning_rate": 2e-05, "loss": 0.04818995, "step": 23820 }, { "epoch": 47.642, "grad_norm": 1.2504075765609741, "learning_rate": 2e-05, "loss": 0.04787406, "step": 23821 }, { "epoch": 47.644, "grad_norm": 2.165092945098877, "learning_rate": 2e-05, "loss": 0.03882634, "step": 23822 }, { "epoch": 47.646, "grad_norm": 0.9500136971473694, "learning_rate": 2e-05, "loss": 0.04030318, "step": 23823 }, { "epoch": 47.648, "grad_norm": 1.0927907228469849, "learning_rate": 2e-05, "loss": 0.03360862, "step": 23824 }, { "epoch": 47.65, "grad_norm": 1.44313645362854, "learning_rate": 2e-05, "loss": 0.05937745, "step": 23825 }, { "epoch": 47.652, "grad_norm": 1.2534681558609009, "learning_rate": 2e-05, "loss": 0.04984522, "step": 23826 }, { "epoch": 47.654, "grad_norm": 2.9447219371795654, "learning_rate": 2e-05, "loss": 0.05307635, "step": 23827 }, { "epoch": 47.656, "grad_norm": 1.3493146896362305, "learning_rate": 2e-05, "loss": 0.04327986, "step": 23828 }, { "epoch": 47.658, "grad_norm": 1.0709292888641357, "learning_rate": 2e-05, "loss": 0.04765247, "step": 23829 }, { "epoch": 47.66, "grad_norm": 1.281948447227478, "learning_rate": 2e-05, "loss": 0.05185589, "step": 23830 }, { "epoch": 47.662, "grad_norm": 1.2605935335159302, "learning_rate": 2e-05, "loss": 0.05030036, "step": 23831 }, { "epoch": 47.664, "grad_norm": 1.413135051727295, "learning_rate": 2e-05, "loss": 0.05803326, "step": 23832 }, { "epoch": 47.666, "grad_norm": 1.2617324590682983, "learning_rate": 2e-05, "loss": 0.04378517, "step": 23833 }, { "epoch": 47.668, "grad_norm": 1.1886942386627197, "learning_rate": 2e-05, "loss": 0.05778721, "step": 23834 }, { "epoch": 47.67, "grad_norm": 3.0332577228546143, "learning_rate": 2e-05, "loss": 0.05872624, "step": 23835 }, { "epoch": 47.672, "grad_norm": 1.4225571155548096, "learning_rate": 2e-05, "loss": 0.03956306, "step": 23836 }, { "epoch": 47.674, "grad_norm": 0.9652089476585388, "learning_rate": 2e-05, "loss": 0.02727574, "step": 23837 }, { "epoch": 47.676, "grad_norm": 1.103406548500061, "learning_rate": 2e-05, "loss": 0.04494239, "step": 23838 }, { "epoch": 47.678, "grad_norm": 1.1904319524765015, "learning_rate": 2e-05, "loss": 0.04636001, "step": 23839 }, { "epoch": 47.68, "grad_norm": 1.2117693424224854, "learning_rate": 2e-05, "loss": 0.04273736, "step": 23840 }, { "epoch": 47.682, "grad_norm": 1.095442771911621, "learning_rate": 2e-05, "loss": 0.04069439, "step": 23841 }, { "epoch": 47.684, "grad_norm": 1.315514326095581, "learning_rate": 2e-05, "loss": 0.05416162, "step": 23842 }, { "epoch": 47.686, "grad_norm": 1.8992160558700562, "learning_rate": 2e-05, "loss": 0.07332712, "step": 23843 }, { "epoch": 47.688, "grad_norm": 1.0993719100952148, "learning_rate": 2e-05, "loss": 0.04374916, "step": 23844 }, { "epoch": 47.69, "grad_norm": 1.2032912969589233, "learning_rate": 2e-05, "loss": 0.04704859, "step": 23845 }, { "epoch": 47.692, "grad_norm": 5.187289237976074, "learning_rate": 2e-05, "loss": 0.04735934, "step": 23846 }, { "epoch": 47.694, "grad_norm": 1.0638306140899658, "learning_rate": 2e-05, "loss": 0.04312051, "step": 23847 }, { "epoch": 47.696, "grad_norm": 1.0537846088409424, "learning_rate": 2e-05, "loss": 0.04088642, "step": 23848 }, { "epoch": 47.698, "grad_norm": 1.1591740846633911, "learning_rate": 2e-05, "loss": 0.04216747, "step": 23849 }, { "epoch": 47.7, "grad_norm": 0.8905053734779358, "learning_rate": 2e-05, "loss": 0.03059562, "step": 23850 }, { "epoch": 47.702, "grad_norm": 0.9576981663703918, "learning_rate": 2e-05, "loss": 0.02699701, "step": 23851 }, { "epoch": 47.704, "grad_norm": 1.3115862607955933, "learning_rate": 2e-05, "loss": 0.04908556, "step": 23852 }, { "epoch": 47.706, "grad_norm": 1.1104012727737427, "learning_rate": 2e-05, "loss": 0.04494857, "step": 23853 }, { "epoch": 47.708, "grad_norm": 1.0533535480499268, "learning_rate": 2e-05, "loss": 0.03632095, "step": 23854 }, { "epoch": 47.71, "grad_norm": 1.0708428621292114, "learning_rate": 2e-05, "loss": 0.04081979, "step": 23855 }, { "epoch": 47.712, "grad_norm": 1.3414500951766968, "learning_rate": 2e-05, "loss": 0.04155166, "step": 23856 }, { "epoch": 47.714, "grad_norm": 1.131571888923645, "learning_rate": 2e-05, "loss": 0.03478384, "step": 23857 }, { "epoch": 47.716, "grad_norm": 1.0607177019119263, "learning_rate": 2e-05, "loss": 0.04390678, "step": 23858 }, { "epoch": 47.718, "grad_norm": 1.1620864868164062, "learning_rate": 2e-05, "loss": 0.03903388, "step": 23859 }, { "epoch": 47.72, "grad_norm": 1.2598408460617065, "learning_rate": 2e-05, "loss": 0.04365044, "step": 23860 }, { "epoch": 47.722, "grad_norm": 1.0379410982131958, "learning_rate": 2e-05, "loss": 0.03826845, "step": 23861 }, { "epoch": 47.724, "grad_norm": 1.4189554452896118, "learning_rate": 2e-05, "loss": 0.05523844, "step": 23862 }, { "epoch": 47.726, "grad_norm": 2.2879295349121094, "learning_rate": 2e-05, "loss": 0.04601286, "step": 23863 }, { "epoch": 47.728, "grad_norm": 1.9662691354751587, "learning_rate": 2e-05, "loss": 0.04415329, "step": 23864 }, { "epoch": 47.73, "grad_norm": 1.136714220046997, "learning_rate": 2e-05, "loss": 0.04704106, "step": 23865 }, { "epoch": 47.732, "grad_norm": 1.8158776760101318, "learning_rate": 2e-05, "loss": 0.04839449, "step": 23866 }, { "epoch": 47.734, "grad_norm": 1.312970519065857, "learning_rate": 2e-05, "loss": 0.05811097, "step": 23867 }, { "epoch": 47.736, "grad_norm": 1.4508172273635864, "learning_rate": 2e-05, "loss": 0.06293134, "step": 23868 }, { "epoch": 47.738, "grad_norm": 1.0996098518371582, "learning_rate": 2e-05, "loss": 0.05198246, "step": 23869 }, { "epoch": 47.74, "grad_norm": 1.2088953256607056, "learning_rate": 2e-05, "loss": 0.05847076, "step": 23870 }, { "epoch": 47.742, "grad_norm": 1.990601897239685, "learning_rate": 2e-05, "loss": 0.07238883, "step": 23871 }, { "epoch": 47.744, "grad_norm": 0.990237832069397, "learning_rate": 2e-05, "loss": 0.03466114, "step": 23872 }, { "epoch": 47.746, "grad_norm": 1.187274694442749, "learning_rate": 2e-05, "loss": 0.05454275, "step": 23873 }, { "epoch": 47.748, "grad_norm": 1.2135357856750488, "learning_rate": 2e-05, "loss": 0.05944178, "step": 23874 }, { "epoch": 47.75, "grad_norm": 1.147931694984436, "learning_rate": 2e-05, "loss": 0.04608715, "step": 23875 }, { "epoch": 47.752, "grad_norm": 1.2686227560043335, "learning_rate": 2e-05, "loss": 0.03793816, "step": 23876 }, { "epoch": 47.754, "grad_norm": 1.2888165712356567, "learning_rate": 2e-05, "loss": 0.05994112, "step": 23877 }, { "epoch": 47.756, "grad_norm": 1.1328716278076172, "learning_rate": 2e-05, "loss": 0.04631865, "step": 23878 }, { "epoch": 47.758, "grad_norm": 1.1973130702972412, "learning_rate": 2e-05, "loss": 0.04879654, "step": 23879 }, { "epoch": 47.76, "grad_norm": 1.3249599933624268, "learning_rate": 2e-05, "loss": 0.0531996, "step": 23880 }, { "epoch": 47.762, "grad_norm": 0.9521530866622925, "learning_rate": 2e-05, "loss": 0.03609097, "step": 23881 }, { "epoch": 47.764, "grad_norm": 1.0063776969909668, "learning_rate": 2e-05, "loss": 0.04234225, "step": 23882 }, { "epoch": 47.766, "grad_norm": 1.1210484504699707, "learning_rate": 2e-05, "loss": 0.04907482, "step": 23883 }, { "epoch": 47.768, "grad_norm": 1.406660795211792, "learning_rate": 2e-05, "loss": 0.05606017, "step": 23884 }, { "epoch": 47.77, "grad_norm": 1.1773229837417603, "learning_rate": 2e-05, "loss": 0.05414952, "step": 23885 }, { "epoch": 47.772, "grad_norm": 2.4284679889678955, "learning_rate": 2e-05, "loss": 0.04393204, "step": 23886 }, { "epoch": 47.774, "grad_norm": 1.0576744079589844, "learning_rate": 2e-05, "loss": 0.03376132, "step": 23887 }, { "epoch": 47.776, "grad_norm": 2.838202714920044, "learning_rate": 2e-05, "loss": 0.06541966, "step": 23888 }, { "epoch": 47.778, "grad_norm": 1.1831326484680176, "learning_rate": 2e-05, "loss": 0.04526076, "step": 23889 }, { "epoch": 47.78, "grad_norm": 1.4545902013778687, "learning_rate": 2e-05, "loss": 0.05634362, "step": 23890 }, { "epoch": 47.782, "grad_norm": 1.2465368509292603, "learning_rate": 2e-05, "loss": 0.04193708, "step": 23891 }, { "epoch": 47.784, "grad_norm": 1.2062345743179321, "learning_rate": 2e-05, "loss": 0.05329098, "step": 23892 }, { "epoch": 47.786, "grad_norm": 1.221840262413025, "learning_rate": 2e-05, "loss": 0.04324333, "step": 23893 }, { "epoch": 47.788, "grad_norm": 1.1237281560897827, "learning_rate": 2e-05, "loss": 0.03751169, "step": 23894 }, { "epoch": 47.79, "grad_norm": 1.1282539367675781, "learning_rate": 2e-05, "loss": 0.04889031, "step": 23895 }, { "epoch": 47.792, "grad_norm": 1.0426477193832397, "learning_rate": 2e-05, "loss": 0.03937999, "step": 23896 }, { "epoch": 47.794, "grad_norm": 2.0234551429748535, "learning_rate": 2e-05, "loss": 0.04139862, "step": 23897 }, { "epoch": 47.796, "grad_norm": 1.0973756313323975, "learning_rate": 2e-05, "loss": 0.04653414, "step": 23898 }, { "epoch": 47.798, "grad_norm": 1.1589323282241821, "learning_rate": 2e-05, "loss": 0.05205517, "step": 23899 }, { "epoch": 47.8, "grad_norm": 1.1792693138122559, "learning_rate": 2e-05, "loss": 0.04807966, "step": 23900 }, { "epoch": 47.802, "grad_norm": 0.9611416459083557, "learning_rate": 2e-05, "loss": 0.03486631, "step": 23901 }, { "epoch": 47.804, "grad_norm": 1.1984083652496338, "learning_rate": 2e-05, "loss": 0.06004932, "step": 23902 }, { "epoch": 47.806, "grad_norm": 1.102168083190918, "learning_rate": 2e-05, "loss": 0.05081422, "step": 23903 }, { "epoch": 47.808, "grad_norm": 1.1081064939498901, "learning_rate": 2e-05, "loss": 0.0446634, "step": 23904 }, { "epoch": 47.81, "grad_norm": 0.9922335147857666, "learning_rate": 2e-05, "loss": 0.04278713, "step": 23905 }, { "epoch": 47.812, "grad_norm": 1.3645964860916138, "learning_rate": 2e-05, "loss": 0.06499512, "step": 23906 }, { "epoch": 47.814, "grad_norm": 1.6501846313476562, "learning_rate": 2e-05, "loss": 0.04862681, "step": 23907 }, { "epoch": 47.816, "grad_norm": 1.0368905067443848, "learning_rate": 2e-05, "loss": 0.0441791, "step": 23908 }, { "epoch": 47.818, "grad_norm": 1.1054444313049316, "learning_rate": 2e-05, "loss": 0.04063675, "step": 23909 }, { "epoch": 47.82, "grad_norm": 1.1246895790100098, "learning_rate": 2e-05, "loss": 0.04597709, "step": 23910 }, { "epoch": 47.822, "grad_norm": 1.0721478462219238, "learning_rate": 2e-05, "loss": 0.04677291, "step": 23911 }, { "epoch": 47.824, "grad_norm": 1.1274585723876953, "learning_rate": 2e-05, "loss": 0.05058762, "step": 23912 }, { "epoch": 47.826, "grad_norm": 1.4998968839645386, "learning_rate": 2e-05, "loss": 0.05139451, "step": 23913 }, { "epoch": 47.828, "grad_norm": 0.9521937370300293, "learning_rate": 2e-05, "loss": 0.03633155, "step": 23914 }, { "epoch": 47.83, "grad_norm": 1.3443994522094727, "learning_rate": 2e-05, "loss": 0.06229488, "step": 23915 }, { "epoch": 47.832, "grad_norm": 1.165913701057434, "learning_rate": 2e-05, "loss": 0.05210435, "step": 23916 }, { "epoch": 47.834, "grad_norm": 1.4400709867477417, "learning_rate": 2e-05, "loss": 0.0268791, "step": 23917 }, { "epoch": 47.836, "grad_norm": 0.978833019733429, "learning_rate": 2e-05, "loss": 0.03885253, "step": 23918 }, { "epoch": 47.838, "grad_norm": 3.3101084232330322, "learning_rate": 2e-05, "loss": 0.06251098, "step": 23919 }, { "epoch": 47.84, "grad_norm": 1.6580345630645752, "learning_rate": 2e-05, "loss": 0.04920085, "step": 23920 }, { "epoch": 47.842, "grad_norm": 1.2298296689987183, "learning_rate": 2e-05, "loss": 0.04933652, "step": 23921 }, { "epoch": 47.844, "grad_norm": 1.209776520729065, "learning_rate": 2e-05, "loss": 0.06445093, "step": 23922 }, { "epoch": 47.846, "grad_norm": 1.1187968254089355, "learning_rate": 2e-05, "loss": 0.04598414, "step": 23923 }, { "epoch": 47.848, "grad_norm": 1.0532989501953125, "learning_rate": 2e-05, "loss": 0.03271957, "step": 23924 }, { "epoch": 47.85, "grad_norm": 1.2785584926605225, "learning_rate": 2e-05, "loss": 0.05796767, "step": 23925 }, { "epoch": 47.852, "grad_norm": 1.1597468852996826, "learning_rate": 2e-05, "loss": 0.04433453, "step": 23926 }, { "epoch": 47.854, "grad_norm": 1.1068360805511475, "learning_rate": 2e-05, "loss": 0.04815186, "step": 23927 }, { "epoch": 47.856, "grad_norm": 1.0562989711761475, "learning_rate": 2e-05, "loss": 0.039212, "step": 23928 }, { "epoch": 47.858, "grad_norm": 1.2464039325714111, "learning_rate": 2e-05, "loss": 0.05434022, "step": 23929 }, { "epoch": 47.86, "grad_norm": 1.9144781827926636, "learning_rate": 2e-05, "loss": 0.0500843, "step": 23930 }, { "epoch": 47.862, "grad_norm": 4.982010364532471, "learning_rate": 2e-05, "loss": 0.05325773, "step": 23931 }, { "epoch": 47.864, "grad_norm": 2.1956326961517334, "learning_rate": 2e-05, "loss": 0.04757648, "step": 23932 }, { "epoch": 47.866, "grad_norm": 1.3570915460586548, "learning_rate": 2e-05, "loss": 0.05788545, "step": 23933 }, { "epoch": 47.868, "grad_norm": 1.1021960973739624, "learning_rate": 2e-05, "loss": 0.04605621, "step": 23934 }, { "epoch": 47.87, "grad_norm": 1.2763595581054688, "learning_rate": 2e-05, "loss": 0.04462332, "step": 23935 }, { "epoch": 47.872, "grad_norm": 1.2677397727966309, "learning_rate": 2e-05, "loss": 0.06458889, "step": 23936 }, { "epoch": 47.874, "grad_norm": 1.571900486946106, "learning_rate": 2e-05, "loss": 0.04743953, "step": 23937 }, { "epoch": 47.876, "grad_norm": 1.2645798921585083, "learning_rate": 2e-05, "loss": 0.04014737, "step": 23938 }, { "epoch": 47.878, "grad_norm": 1.3748975992202759, "learning_rate": 2e-05, "loss": 0.05016341, "step": 23939 }, { "epoch": 47.88, "grad_norm": 1.0344460010528564, "learning_rate": 2e-05, "loss": 0.03785864, "step": 23940 }, { "epoch": 47.882, "grad_norm": 1.15352463722229, "learning_rate": 2e-05, "loss": 0.04408693, "step": 23941 }, { "epoch": 47.884, "grad_norm": 0.9576112627983093, "learning_rate": 2e-05, "loss": 0.0283091, "step": 23942 }, { "epoch": 47.886, "grad_norm": 1.1543420553207397, "learning_rate": 2e-05, "loss": 0.03587131, "step": 23943 }, { "epoch": 47.888, "grad_norm": 1.3030697107315063, "learning_rate": 2e-05, "loss": 0.06125759, "step": 23944 }, { "epoch": 47.89, "grad_norm": 1.159096598625183, "learning_rate": 2e-05, "loss": 0.04972298, "step": 23945 }, { "epoch": 47.892, "grad_norm": 1.3995835781097412, "learning_rate": 2e-05, "loss": 0.03951022, "step": 23946 }, { "epoch": 47.894, "grad_norm": 1.2913930416107178, "learning_rate": 2e-05, "loss": 0.04597832, "step": 23947 }, { "epoch": 47.896, "grad_norm": 1.0816295146942139, "learning_rate": 2e-05, "loss": 0.03365443, "step": 23948 }, { "epoch": 47.898, "grad_norm": 1.2228970527648926, "learning_rate": 2e-05, "loss": 0.06327063, "step": 23949 }, { "epoch": 47.9, "grad_norm": 1.1971608400344849, "learning_rate": 2e-05, "loss": 0.04186807, "step": 23950 }, { "epoch": 47.902, "grad_norm": 1.3792004585266113, "learning_rate": 2e-05, "loss": 0.04573726, "step": 23951 }, { "epoch": 47.904, "grad_norm": 1.3499553203582764, "learning_rate": 2e-05, "loss": 0.04411193, "step": 23952 }, { "epoch": 47.906, "grad_norm": 0.9711533188819885, "learning_rate": 2e-05, "loss": 0.04265768, "step": 23953 }, { "epoch": 47.908, "grad_norm": 1.329298496246338, "learning_rate": 2e-05, "loss": 0.0500191, "step": 23954 }, { "epoch": 47.91, "grad_norm": 1.0052729845046997, "learning_rate": 2e-05, "loss": 0.04355924, "step": 23955 }, { "epoch": 47.912, "grad_norm": 1.1501191854476929, "learning_rate": 2e-05, "loss": 0.03861841, "step": 23956 }, { "epoch": 47.914, "grad_norm": 1.1558641195297241, "learning_rate": 2e-05, "loss": 0.05884036, "step": 23957 }, { "epoch": 47.916, "grad_norm": 1.0510032176971436, "learning_rate": 2e-05, "loss": 0.03164276, "step": 23958 }, { "epoch": 47.918, "grad_norm": 1.3713427782058716, "learning_rate": 2e-05, "loss": 0.05324286, "step": 23959 }, { "epoch": 47.92, "grad_norm": 1.4690266847610474, "learning_rate": 2e-05, "loss": 0.04128034, "step": 23960 }, { "epoch": 47.922, "grad_norm": 2.2947113513946533, "learning_rate": 2e-05, "loss": 0.04311752, "step": 23961 }, { "epoch": 47.924, "grad_norm": 1.1298458576202393, "learning_rate": 2e-05, "loss": 0.04617842, "step": 23962 }, { "epoch": 47.926, "grad_norm": 1.2151092290878296, "learning_rate": 2e-05, "loss": 0.0448565, "step": 23963 }, { "epoch": 47.928, "grad_norm": 1.210721731185913, "learning_rate": 2e-05, "loss": 0.03401937, "step": 23964 }, { "epoch": 47.93, "grad_norm": 1.6079378128051758, "learning_rate": 2e-05, "loss": 0.04983381, "step": 23965 }, { "epoch": 47.932, "grad_norm": 0.9416164755821228, "learning_rate": 2e-05, "loss": 0.0281193, "step": 23966 }, { "epoch": 47.934, "grad_norm": 1.450137972831726, "learning_rate": 2e-05, "loss": 0.05513122, "step": 23967 }, { "epoch": 47.936, "grad_norm": 1.427017092704773, "learning_rate": 2e-05, "loss": 0.06112292, "step": 23968 }, { "epoch": 47.938, "grad_norm": 1.1121290922164917, "learning_rate": 2e-05, "loss": 0.04755298, "step": 23969 }, { "epoch": 47.94, "grad_norm": 1.350212812423706, "learning_rate": 2e-05, "loss": 0.02974812, "step": 23970 }, { "epoch": 47.942, "grad_norm": 1.009774088859558, "learning_rate": 2e-05, "loss": 0.03674398, "step": 23971 }, { "epoch": 47.944, "grad_norm": 0.989896297454834, "learning_rate": 2e-05, "loss": 0.04537395, "step": 23972 }, { "epoch": 47.946, "grad_norm": 1.3564207553863525, "learning_rate": 2e-05, "loss": 0.0425001, "step": 23973 }, { "epoch": 47.948, "grad_norm": 2.2842447757720947, "learning_rate": 2e-05, "loss": 0.04634003, "step": 23974 }, { "epoch": 47.95, "grad_norm": 1.2380462884902954, "learning_rate": 2e-05, "loss": 0.04033655, "step": 23975 }, { "epoch": 47.952, "grad_norm": 1.2136390209197998, "learning_rate": 2e-05, "loss": 0.03208754, "step": 23976 }, { "epoch": 47.954, "grad_norm": 1.2181363105773926, "learning_rate": 2e-05, "loss": 0.05851332, "step": 23977 }, { "epoch": 47.956, "grad_norm": 1.1737416982650757, "learning_rate": 2e-05, "loss": 0.04869546, "step": 23978 }, { "epoch": 47.958, "grad_norm": 1.1523841619491577, "learning_rate": 2e-05, "loss": 0.04287051, "step": 23979 }, { "epoch": 47.96, "grad_norm": 1.3035601377487183, "learning_rate": 2e-05, "loss": 0.05795803, "step": 23980 }, { "epoch": 47.962, "grad_norm": 1.4103642702102661, "learning_rate": 2e-05, "loss": 0.0504769, "step": 23981 }, { "epoch": 47.964, "grad_norm": 1.3686590194702148, "learning_rate": 2e-05, "loss": 0.05023767, "step": 23982 }, { "epoch": 47.966, "grad_norm": 1.1425793170928955, "learning_rate": 2e-05, "loss": 0.03801563, "step": 23983 }, { "epoch": 47.968, "grad_norm": 1.2969728708267212, "learning_rate": 2e-05, "loss": 0.04899958, "step": 23984 }, { "epoch": 47.97, "grad_norm": 1.312829613685608, "learning_rate": 2e-05, "loss": 0.06080573, "step": 23985 }, { "epoch": 47.972, "grad_norm": 1.264377236366272, "learning_rate": 2e-05, "loss": 0.04977002, "step": 23986 }, { "epoch": 47.974, "grad_norm": 2.843947410583496, "learning_rate": 2e-05, "loss": 0.06198847, "step": 23987 }, { "epoch": 47.976, "grad_norm": 1.4334129095077515, "learning_rate": 2e-05, "loss": 0.07072139, "step": 23988 }, { "epoch": 47.978, "grad_norm": 1.3132730722427368, "learning_rate": 2e-05, "loss": 0.06276097, "step": 23989 }, { "epoch": 47.98, "grad_norm": 1.2689872980117798, "learning_rate": 2e-05, "loss": 0.04908171, "step": 23990 }, { "epoch": 47.982, "grad_norm": 1.4487504959106445, "learning_rate": 2e-05, "loss": 0.04362805, "step": 23991 }, { "epoch": 47.984, "grad_norm": 1.1339962482452393, "learning_rate": 2e-05, "loss": 0.05087781, "step": 23992 }, { "epoch": 47.986, "grad_norm": 1.7450958490371704, "learning_rate": 2e-05, "loss": 0.05819881, "step": 23993 }, { "epoch": 47.988, "grad_norm": 1.1396898031234741, "learning_rate": 2e-05, "loss": 0.04882061, "step": 23994 }, { "epoch": 47.99, "grad_norm": 1.337996482849121, "learning_rate": 2e-05, "loss": 0.04781748, "step": 23995 }, { "epoch": 47.992, "grad_norm": 1.5428389310836792, "learning_rate": 2e-05, "loss": 0.0593975, "step": 23996 }, { "epoch": 47.994, "grad_norm": 1.029953956604004, "learning_rate": 2e-05, "loss": 0.03917014, "step": 23997 }, { "epoch": 47.996, "grad_norm": 1.3269636631011963, "learning_rate": 2e-05, "loss": 0.06252966, "step": 23998 }, { "epoch": 47.998, "grad_norm": 1.4018568992614746, "learning_rate": 2e-05, "loss": 0.05506238, "step": 23999 }, { "epoch": 48.0, "grad_norm": 1.3268723487854004, "learning_rate": 2e-05, "loss": 0.04358728, "step": 24000 }, { "epoch": 48.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 1.0, "Perpendicular_3": 0.9098196392785571, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.9916, "PointLiesOnLine_1": 0.9979959919839679, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9940119760479041 }, "eval_runtime": 320.3246, "eval_samples_per_second": 32.779, "eval_steps_per_second": 0.656, "step": 24000 }, { "epoch": 48.002, "grad_norm": 1.0694226026535034, "learning_rate": 2e-05, "loss": 0.03889792, "step": 24001 }, { "epoch": 48.004, "grad_norm": 1.1457878351211548, "learning_rate": 2e-05, "loss": 0.04411246, "step": 24002 }, { "epoch": 48.006, "grad_norm": 1.3555324077606201, "learning_rate": 2e-05, "loss": 0.05262123, "step": 24003 }, { "epoch": 48.008, "grad_norm": 1.038233995437622, "learning_rate": 2e-05, "loss": 0.04188913, "step": 24004 }, { "epoch": 48.01, "grad_norm": 3.3475518226623535, "learning_rate": 2e-05, "loss": 0.05884375, "step": 24005 }, { "epoch": 48.012, "grad_norm": 1.470353364944458, "learning_rate": 2e-05, "loss": 0.04114807, "step": 24006 }, { "epoch": 48.014, "grad_norm": 1.2043060064315796, "learning_rate": 2e-05, "loss": 0.0573847, "step": 24007 }, { "epoch": 48.016, "grad_norm": 1.1786561012268066, "learning_rate": 2e-05, "loss": 0.04742495, "step": 24008 }, { "epoch": 48.018, "grad_norm": 1.0251394510269165, "learning_rate": 2e-05, "loss": 0.0432762, "step": 24009 }, { "epoch": 48.02, "grad_norm": 1.1770861148834229, "learning_rate": 2e-05, "loss": 0.04839217, "step": 24010 }, { "epoch": 48.022, "grad_norm": 0.9781084060668945, "learning_rate": 2e-05, "loss": 0.04288932, "step": 24011 }, { "epoch": 48.024, "grad_norm": 1.5834667682647705, "learning_rate": 2e-05, "loss": 0.04880485, "step": 24012 }, { "epoch": 48.026, "grad_norm": 2.6981749534606934, "learning_rate": 2e-05, "loss": 0.03831647, "step": 24013 }, { "epoch": 48.028, "grad_norm": 1.1615235805511475, "learning_rate": 2e-05, "loss": 0.04750589, "step": 24014 }, { "epoch": 48.03, "grad_norm": 1.1639854907989502, "learning_rate": 2e-05, "loss": 0.04984055, "step": 24015 }, { "epoch": 48.032, "grad_norm": 1.0798165798187256, "learning_rate": 2e-05, "loss": 0.05147992, "step": 24016 }, { "epoch": 48.034, "grad_norm": 1.0274465084075928, "learning_rate": 2e-05, "loss": 0.04617003, "step": 24017 }, { "epoch": 48.036, "grad_norm": 1.038852334022522, "learning_rate": 2e-05, "loss": 0.0369803, "step": 24018 }, { "epoch": 48.038, "grad_norm": 2.6861026287078857, "learning_rate": 2e-05, "loss": 0.07091722, "step": 24019 }, { "epoch": 48.04, "grad_norm": 1.3858764171600342, "learning_rate": 2e-05, "loss": 0.04682039, "step": 24020 }, { "epoch": 48.042, "grad_norm": 1.1013208627700806, "learning_rate": 2e-05, "loss": 0.04063297, "step": 24021 }, { "epoch": 48.044, "grad_norm": 1.4258663654327393, "learning_rate": 2e-05, "loss": 0.05036592, "step": 24022 }, { "epoch": 48.046, "grad_norm": 1.1322613954544067, "learning_rate": 2e-05, "loss": 0.04924812, "step": 24023 }, { "epoch": 48.048, "grad_norm": 1.7514662742614746, "learning_rate": 2e-05, "loss": 0.06260653, "step": 24024 }, { "epoch": 48.05, "grad_norm": 1.3482943773269653, "learning_rate": 2e-05, "loss": 0.05003114, "step": 24025 }, { "epoch": 48.052, "grad_norm": 1.5502170324325562, "learning_rate": 2e-05, "loss": 0.04645883, "step": 24026 }, { "epoch": 48.054, "grad_norm": 1.1267354488372803, "learning_rate": 2e-05, "loss": 0.04372516, "step": 24027 }, { "epoch": 48.056, "grad_norm": 1.668345332145691, "learning_rate": 2e-05, "loss": 0.07128799, "step": 24028 }, { "epoch": 48.058, "grad_norm": 2.1013059616088867, "learning_rate": 2e-05, "loss": 0.04920814, "step": 24029 }, { "epoch": 48.06, "grad_norm": 1.272568941116333, "learning_rate": 2e-05, "loss": 0.04338931, "step": 24030 }, { "epoch": 48.062, "grad_norm": 1.102447748184204, "learning_rate": 2e-05, "loss": 0.03770321, "step": 24031 }, { "epoch": 48.064, "grad_norm": 1.8323739767074585, "learning_rate": 2e-05, "loss": 0.05331714, "step": 24032 }, { "epoch": 48.066, "grad_norm": 2.065495491027832, "learning_rate": 2e-05, "loss": 0.05580588, "step": 24033 }, { "epoch": 48.068, "grad_norm": 4.448790073394775, "learning_rate": 2e-05, "loss": 0.04371783, "step": 24034 }, { "epoch": 48.07, "grad_norm": 1.122683048248291, "learning_rate": 2e-05, "loss": 0.05487248, "step": 24035 }, { "epoch": 48.072, "grad_norm": 1.2778655290603638, "learning_rate": 2e-05, "loss": 0.0487362, "step": 24036 }, { "epoch": 48.074, "grad_norm": 1.5660276412963867, "learning_rate": 2e-05, "loss": 0.05149665, "step": 24037 }, { "epoch": 48.076, "grad_norm": 1.2964324951171875, "learning_rate": 2e-05, "loss": 0.04056223, "step": 24038 }, { "epoch": 48.078, "grad_norm": 1.1524419784545898, "learning_rate": 2e-05, "loss": 0.05273098, "step": 24039 }, { "epoch": 48.08, "grad_norm": 1.4630237817764282, "learning_rate": 2e-05, "loss": 0.05465612, "step": 24040 }, { "epoch": 48.082, "grad_norm": 1.2101308107376099, "learning_rate": 2e-05, "loss": 0.04670757, "step": 24041 }, { "epoch": 48.084, "grad_norm": 1.1870620250701904, "learning_rate": 2e-05, "loss": 0.04672918, "step": 24042 }, { "epoch": 48.086, "grad_norm": 1.2451860904693604, "learning_rate": 2e-05, "loss": 0.05255879, "step": 24043 }, { "epoch": 48.088, "grad_norm": 1.1772750616073608, "learning_rate": 2e-05, "loss": 0.05022447, "step": 24044 }, { "epoch": 48.09, "grad_norm": 1.4949736595153809, "learning_rate": 2e-05, "loss": 0.05462297, "step": 24045 }, { "epoch": 48.092, "grad_norm": 1.0446524620056152, "learning_rate": 2e-05, "loss": 0.05005912, "step": 24046 }, { "epoch": 48.094, "grad_norm": 1.2837433815002441, "learning_rate": 2e-05, "loss": 0.04919739, "step": 24047 }, { "epoch": 48.096, "grad_norm": 1.2766021490097046, "learning_rate": 2e-05, "loss": 0.05800232, "step": 24048 }, { "epoch": 48.098, "grad_norm": 1.386711835861206, "learning_rate": 2e-05, "loss": 0.05083595, "step": 24049 }, { "epoch": 48.1, "grad_norm": 0.9854950308799744, "learning_rate": 2e-05, "loss": 0.03358131, "step": 24050 }, { "epoch": 48.102, "grad_norm": 1.1516034603118896, "learning_rate": 2e-05, "loss": 0.04865019, "step": 24051 }, { "epoch": 48.104, "grad_norm": 1.855728030204773, "learning_rate": 2e-05, "loss": 0.06086725, "step": 24052 }, { "epoch": 48.106, "grad_norm": 1.3824462890625, "learning_rate": 2e-05, "loss": 0.03999291, "step": 24053 }, { "epoch": 48.108, "grad_norm": 1.1911598443984985, "learning_rate": 2e-05, "loss": 0.04892493, "step": 24054 }, { "epoch": 48.11, "grad_norm": 1.2059804201126099, "learning_rate": 2e-05, "loss": 0.05040662, "step": 24055 }, { "epoch": 48.112, "grad_norm": 1.4151570796966553, "learning_rate": 2e-05, "loss": 0.05632602, "step": 24056 }, { "epoch": 48.114, "grad_norm": 1.101078748703003, "learning_rate": 2e-05, "loss": 0.03802776, "step": 24057 }, { "epoch": 48.116, "grad_norm": 1.3649169206619263, "learning_rate": 2e-05, "loss": 0.06694816, "step": 24058 }, { "epoch": 48.118, "grad_norm": 1.1776071786880493, "learning_rate": 2e-05, "loss": 0.05049934, "step": 24059 }, { "epoch": 48.12, "grad_norm": 1.082025408744812, "learning_rate": 2e-05, "loss": 0.04495796, "step": 24060 }, { "epoch": 48.122, "grad_norm": 1.2222503423690796, "learning_rate": 2e-05, "loss": 0.04708505, "step": 24061 }, { "epoch": 48.124, "grad_norm": 1.5613664388656616, "learning_rate": 2e-05, "loss": 0.05574992, "step": 24062 }, { "epoch": 48.126, "grad_norm": 1.2608956098556519, "learning_rate": 2e-05, "loss": 0.04397441, "step": 24063 }, { "epoch": 48.128, "grad_norm": 0.9204988479614258, "learning_rate": 2e-05, "loss": 0.02955709, "step": 24064 }, { "epoch": 48.13, "grad_norm": 0.984535813331604, "learning_rate": 2e-05, "loss": 0.04042712, "step": 24065 }, { "epoch": 48.132, "grad_norm": 5.602854251861572, "learning_rate": 2e-05, "loss": 0.046597, "step": 24066 }, { "epoch": 48.134, "grad_norm": 1.278770089149475, "learning_rate": 2e-05, "loss": 0.03210206, "step": 24067 }, { "epoch": 48.136, "grad_norm": 0.9865384697914124, "learning_rate": 2e-05, "loss": 0.03966504, "step": 24068 }, { "epoch": 48.138, "grad_norm": 1.1876673698425293, "learning_rate": 2e-05, "loss": 0.0427718, "step": 24069 }, { "epoch": 48.14, "grad_norm": 1.1486650705337524, "learning_rate": 2e-05, "loss": 0.04623136, "step": 24070 }, { "epoch": 48.142, "grad_norm": 1.3332282304763794, "learning_rate": 2e-05, "loss": 0.05006913, "step": 24071 }, { "epoch": 48.144, "grad_norm": 1.2728177309036255, "learning_rate": 2e-05, "loss": 0.05031462, "step": 24072 }, { "epoch": 48.146, "grad_norm": 1.2598438262939453, "learning_rate": 2e-05, "loss": 0.04445124, "step": 24073 }, { "epoch": 48.148, "grad_norm": 1.0530667304992676, "learning_rate": 2e-05, "loss": 0.0403815, "step": 24074 }, { "epoch": 48.15, "grad_norm": 1.0963428020477295, "learning_rate": 2e-05, "loss": 0.0428313, "step": 24075 }, { "epoch": 48.152, "grad_norm": 1.0206382274627686, "learning_rate": 2e-05, "loss": 0.04635064, "step": 24076 }, { "epoch": 48.154, "grad_norm": 1.0546106100082397, "learning_rate": 2e-05, "loss": 0.0390348, "step": 24077 }, { "epoch": 48.156, "grad_norm": 1.1284135580062866, "learning_rate": 2e-05, "loss": 0.03736102, "step": 24078 }, { "epoch": 48.158, "grad_norm": 1.389346957206726, "learning_rate": 2e-05, "loss": 0.04481775, "step": 24079 }, { "epoch": 48.16, "grad_norm": 1.1270530223846436, "learning_rate": 2e-05, "loss": 0.04756751, "step": 24080 }, { "epoch": 48.162, "grad_norm": 1.2751901149749756, "learning_rate": 2e-05, "loss": 0.04584195, "step": 24081 }, { "epoch": 48.164, "grad_norm": 1.0802828073501587, "learning_rate": 2e-05, "loss": 0.06117968, "step": 24082 }, { "epoch": 48.166, "grad_norm": 1.2902870178222656, "learning_rate": 2e-05, "loss": 0.05666249, "step": 24083 }, { "epoch": 48.168, "grad_norm": 1.07093346118927, "learning_rate": 2e-05, "loss": 0.04220451, "step": 24084 }, { "epoch": 48.17, "grad_norm": 0.9996381998062134, "learning_rate": 2e-05, "loss": 0.03157579, "step": 24085 }, { "epoch": 48.172, "grad_norm": 1.4622588157653809, "learning_rate": 2e-05, "loss": 0.04022121, "step": 24086 }, { "epoch": 48.174, "grad_norm": 1.200003981590271, "learning_rate": 2e-05, "loss": 0.03526891, "step": 24087 }, { "epoch": 48.176, "grad_norm": 1.3392802476882935, "learning_rate": 2e-05, "loss": 0.03042413, "step": 24088 }, { "epoch": 48.178, "grad_norm": 1.0749857425689697, "learning_rate": 2e-05, "loss": 0.04232724, "step": 24089 }, { "epoch": 48.18, "grad_norm": 1.1630322933197021, "learning_rate": 2e-05, "loss": 0.04984683, "step": 24090 }, { "epoch": 48.182, "grad_norm": 1.1708325147628784, "learning_rate": 2e-05, "loss": 0.04399581, "step": 24091 }, { "epoch": 48.184, "grad_norm": 2.851407527923584, "learning_rate": 2e-05, "loss": 0.07037769, "step": 24092 }, { "epoch": 48.186, "grad_norm": 1.0833169221878052, "learning_rate": 2e-05, "loss": 0.04655275, "step": 24093 }, { "epoch": 48.188, "grad_norm": 1.372167706489563, "learning_rate": 2e-05, "loss": 0.07025443, "step": 24094 }, { "epoch": 48.19, "grad_norm": 1.2088648080825806, "learning_rate": 2e-05, "loss": 0.04490817, "step": 24095 }, { "epoch": 48.192, "grad_norm": 1.134153962135315, "learning_rate": 2e-05, "loss": 0.05215061, "step": 24096 }, { "epoch": 48.194, "grad_norm": 1.1450132131576538, "learning_rate": 2e-05, "loss": 0.05180396, "step": 24097 }, { "epoch": 48.196, "grad_norm": 1.1347380876541138, "learning_rate": 2e-05, "loss": 0.04732578, "step": 24098 }, { "epoch": 48.198, "grad_norm": 1.1348960399627686, "learning_rate": 2e-05, "loss": 0.05249636, "step": 24099 }, { "epoch": 48.2, "grad_norm": 1.7801083326339722, "learning_rate": 2e-05, "loss": 0.05126803, "step": 24100 }, { "epoch": 48.202, "grad_norm": 1.2715392112731934, "learning_rate": 2e-05, "loss": 0.05465782, "step": 24101 }, { "epoch": 48.204, "grad_norm": 0.9739833474159241, "learning_rate": 2e-05, "loss": 0.03681491, "step": 24102 }, { "epoch": 48.206, "grad_norm": 0.991855800151825, "learning_rate": 2e-05, "loss": 0.03075913, "step": 24103 }, { "epoch": 48.208, "grad_norm": 1.7213369607925415, "learning_rate": 2e-05, "loss": 0.03983373, "step": 24104 }, { "epoch": 48.21, "grad_norm": 1.0067009925842285, "learning_rate": 2e-05, "loss": 0.04427213, "step": 24105 }, { "epoch": 48.212, "grad_norm": 1.8340460062026978, "learning_rate": 2e-05, "loss": 0.04303214, "step": 24106 }, { "epoch": 48.214, "grad_norm": 1.1613703966140747, "learning_rate": 2e-05, "loss": 0.04921772, "step": 24107 }, { "epoch": 48.216, "grad_norm": 2.5453414916992188, "learning_rate": 2e-05, "loss": 0.05552774, "step": 24108 }, { "epoch": 48.218, "grad_norm": 1.020289659500122, "learning_rate": 2e-05, "loss": 0.03909015, "step": 24109 }, { "epoch": 48.22, "grad_norm": 1.1632663011550903, "learning_rate": 2e-05, "loss": 0.05679201, "step": 24110 }, { "epoch": 48.222, "grad_norm": 0.987187385559082, "learning_rate": 2e-05, "loss": 0.03800602, "step": 24111 }, { "epoch": 48.224, "grad_norm": 1.2574355602264404, "learning_rate": 2e-05, "loss": 0.05340779, "step": 24112 }, { "epoch": 48.226, "grad_norm": 1.076492428779602, "learning_rate": 2e-05, "loss": 0.04606473, "step": 24113 }, { "epoch": 48.228, "grad_norm": 1.010301947593689, "learning_rate": 2e-05, "loss": 0.03338667, "step": 24114 }, { "epoch": 48.23, "grad_norm": 1.3499643802642822, "learning_rate": 2e-05, "loss": 0.06098785, "step": 24115 }, { "epoch": 48.232, "grad_norm": 1.1102203130722046, "learning_rate": 2e-05, "loss": 0.04781565, "step": 24116 }, { "epoch": 48.234, "grad_norm": 0.941985011100769, "learning_rate": 2e-05, "loss": 0.03292421, "step": 24117 }, { "epoch": 48.236, "grad_norm": 1.1043226718902588, "learning_rate": 2e-05, "loss": 0.0464811, "step": 24118 }, { "epoch": 48.238, "grad_norm": 0.9736769795417786, "learning_rate": 2e-05, "loss": 0.04281894, "step": 24119 }, { "epoch": 48.24, "grad_norm": 1.1103366613388062, "learning_rate": 2e-05, "loss": 0.05232271, "step": 24120 }, { "epoch": 48.242, "grad_norm": 1.2056519985198975, "learning_rate": 2e-05, "loss": 0.05404781, "step": 24121 }, { "epoch": 48.244, "grad_norm": 1.410598874092102, "learning_rate": 2e-05, "loss": 0.06230663, "step": 24122 }, { "epoch": 48.246, "grad_norm": 1.136397123336792, "learning_rate": 2e-05, "loss": 0.05437019, "step": 24123 }, { "epoch": 48.248, "grad_norm": 1.0891176462173462, "learning_rate": 2e-05, "loss": 0.04057914, "step": 24124 }, { "epoch": 48.25, "grad_norm": 1.1873037815093994, "learning_rate": 2e-05, "loss": 0.04513947, "step": 24125 }, { "epoch": 48.252, "grad_norm": 1.5150892734527588, "learning_rate": 2e-05, "loss": 0.0410989, "step": 24126 }, { "epoch": 48.254, "grad_norm": 2.031759738922119, "learning_rate": 2e-05, "loss": 0.05381062, "step": 24127 }, { "epoch": 48.256, "grad_norm": 1.5702145099639893, "learning_rate": 2e-05, "loss": 0.03662251, "step": 24128 }, { "epoch": 48.258, "grad_norm": 1.2305831909179688, "learning_rate": 2e-05, "loss": 0.06105634, "step": 24129 }, { "epoch": 48.26, "grad_norm": 1.1910518407821655, "learning_rate": 2e-05, "loss": 0.04331035, "step": 24130 }, { "epoch": 48.262, "grad_norm": 1.225128412246704, "learning_rate": 2e-05, "loss": 0.05862859, "step": 24131 }, { "epoch": 48.264, "grad_norm": 0.9506001472473145, "learning_rate": 2e-05, "loss": 0.03767806, "step": 24132 }, { "epoch": 48.266, "grad_norm": 1.2454578876495361, "learning_rate": 2e-05, "loss": 0.05315366, "step": 24133 }, { "epoch": 48.268, "grad_norm": 2.151110887527466, "learning_rate": 2e-05, "loss": 0.04462639, "step": 24134 }, { "epoch": 48.27, "grad_norm": 1.1853653192520142, "learning_rate": 2e-05, "loss": 0.05619586, "step": 24135 }, { "epoch": 48.272, "grad_norm": 0.8391315937042236, "learning_rate": 2e-05, "loss": 0.02844811, "step": 24136 }, { "epoch": 48.274, "grad_norm": 1.102542757987976, "learning_rate": 2e-05, "loss": 0.04351805, "step": 24137 }, { "epoch": 48.276, "grad_norm": 0.9882407784461975, "learning_rate": 2e-05, "loss": 0.0304465, "step": 24138 }, { "epoch": 48.278, "grad_norm": 1.748734951019287, "learning_rate": 2e-05, "loss": 0.06221665, "step": 24139 }, { "epoch": 48.28, "grad_norm": 1.4740245342254639, "learning_rate": 2e-05, "loss": 0.06243878, "step": 24140 }, { "epoch": 48.282, "grad_norm": 1.324210524559021, "learning_rate": 2e-05, "loss": 0.05721395, "step": 24141 }, { "epoch": 48.284, "grad_norm": 1.527857780456543, "learning_rate": 2e-05, "loss": 0.0590317, "step": 24142 }, { "epoch": 48.286, "grad_norm": 1.0875802040100098, "learning_rate": 2e-05, "loss": 0.04194984, "step": 24143 }, { "epoch": 48.288, "grad_norm": 1.2001830339431763, "learning_rate": 2e-05, "loss": 0.05224827, "step": 24144 }, { "epoch": 48.29, "grad_norm": 1.3207565546035767, "learning_rate": 2e-05, "loss": 0.05328258, "step": 24145 }, { "epoch": 48.292, "grad_norm": 1.3341400623321533, "learning_rate": 2e-05, "loss": 0.03919051, "step": 24146 }, { "epoch": 48.294, "grad_norm": 1.3471317291259766, "learning_rate": 2e-05, "loss": 0.0625244, "step": 24147 }, { "epoch": 48.296, "grad_norm": 0.9852702021598816, "learning_rate": 2e-05, "loss": 0.04854707, "step": 24148 }, { "epoch": 48.298, "grad_norm": 0.9104224443435669, "learning_rate": 2e-05, "loss": 0.03098441, "step": 24149 }, { "epoch": 48.3, "grad_norm": 1.0037267208099365, "learning_rate": 2e-05, "loss": 0.03725686, "step": 24150 }, { "epoch": 48.302, "grad_norm": 1.1196410655975342, "learning_rate": 2e-05, "loss": 0.04821742, "step": 24151 }, { "epoch": 48.304, "grad_norm": 1.17573082447052, "learning_rate": 2e-05, "loss": 0.04223769, "step": 24152 }, { "epoch": 48.306, "grad_norm": 1.1599558591842651, "learning_rate": 2e-05, "loss": 0.04793893, "step": 24153 }, { "epoch": 48.308, "grad_norm": 2.1056137084960938, "learning_rate": 2e-05, "loss": 0.04927088, "step": 24154 }, { "epoch": 48.31, "grad_norm": 1.2018673419952393, "learning_rate": 2e-05, "loss": 0.04321744, "step": 24155 }, { "epoch": 48.312, "grad_norm": 1.2046828269958496, "learning_rate": 2e-05, "loss": 0.0523455, "step": 24156 }, { "epoch": 48.314, "grad_norm": 1.2521579265594482, "learning_rate": 2e-05, "loss": 0.04741405, "step": 24157 }, { "epoch": 48.316, "grad_norm": 1.046248435974121, "learning_rate": 2e-05, "loss": 0.03981638, "step": 24158 }, { "epoch": 48.318, "grad_norm": 1.2036545276641846, "learning_rate": 2e-05, "loss": 0.04452758, "step": 24159 }, { "epoch": 48.32, "grad_norm": 1.224697470664978, "learning_rate": 2e-05, "loss": 0.04996022, "step": 24160 }, { "epoch": 48.322, "grad_norm": 1.4698989391326904, "learning_rate": 2e-05, "loss": 0.06074246, "step": 24161 }, { "epoch": 48.324, "grad_norm": 1.205679178237915, "learning_rate": 2e-05, "loss": 0.05434452, "step": 24162 }, { "epoch": 48.326, "grad_norm": 1.2876218557357788, "learning_rate": 2e-05, "loss": 0.04615651, "step": 24163 }, { "epoch": 48.328, "grad_norm": 1.218860149383545, "learning_rate": 2e-05, "loss": 0.05063464, "step": 24164 }, { "epoch": 48.33, "grad_norm": 1.225708246231079, "learning_rate": 2e-05, "loss": 0.04554639, "step": 24165 }, { "epoch": 48.332, "grad_norm": 1.4999538660049438, "learning_rate": 2e-05, "loss": 0.0535819, "step": 24166 }, { "epoch": 48.334, "grad_norm": 1.4471505880355835, "learning_rate": 2e-05, "loss": 0.05369381, "step": 24167 }, { "epoch": 48.336, "grad_norm": 1.0038902759552002, "learning_rate": 2e-05, "loss": 0.03418017, "step": 24168 }, { "epoch": 48.338, "grad_norm": 1.2840253114700317, "learning_rate": 2e-05, "loss": 0.05745913, "step": 24169 }, { "epoch": 48.34, "grad_norm": 1.1269090175628662, "learning_rate": 2e-05, "loss": 0.04826582, "step": 24170 }, { "epoch": 48.342, "grad_norm": 0.9834097623825073, "learning_rate": 2e-05, "loss": 0.0321577, "step": 24171 }, { "epoch": 48.344, "grad_norm": 1.304100513458252, "learning_rate": 2e-05, "loss": 0.05313345, "step": 24172 }, { "epoch": 48.346, "grad_norm": 0.9855166077613831, "learning_rate": 2e-05, "loss": 0.04511321, "step": 24173 }, { "epoch": 48.348, "grad_norm": 1.117134928703308, "learning_rate": 2e-05, "loss": 0.04698523, "step": 24174 }, { "epoch": 48.35, "grad_norm": 1.2151545286178589, "learning_rate": 2e-05, "loss": 0.03640284, "step": 24175 }, { "epoch": 48.352, "grad_norm": 1.690683126449585, "learning_rate": 2e-05, "loss": 0.05967022, "step": 24176 }, { "epoch": 48.354, "grad_norm": 1.0497173070907593, "learning_rate": 2e-05, "loss": 0.03619199, "step": 24177 }, { "epoch": 48.356, "grad_norm": 1.0658822059631348, "learning_rate": 2e-05, "loss": 0.04109097, "step": 24178 }, { "epoch": 48.358, "grad_norm": 1.1863605976104736, "learning_rate": 2e-05, "loss": 0.05850657, "step": 24179 }, { "epoch": 48.36, "grad_norm": 0.910224974155426, "learning_rate": 2e-05, "loss": 0.03389906, "step": 24180 }, { "epoch": 48.362, "grad_norm": 1.1010740995407104, "learning_rate": 2e-05, "loss": 0.04562913, "step": 24181 }, { "epoch": 48.364, "grad_norm": 1.3951750993728638, "learning_rate": 2e-05, "loss": 0.04543884, "step": 24182 }, { "epoch": 48.366, "grad_norm": 1.33402681350708, "learning_rate": 2e-05, "loss": 0.04668194, "step": 24183 }, { "epoch": 48.368, "grad_norm": 1.3181812763214111, "learning_rate": 2e-05, "loss": 0.05848655, "step": 24184 }, { "epoch": 48.37, "grad_norm": 1.1864538192749023, "learning_rate": 2e-05, "loss": 0.04627438, "step": 24185 }, { "epoch": 48.372, "grad_norm": 1.0719053745269775, "learning_rate": 2e-05, "loss": 0.04401678, "step": 24186 }, { "epoch": 48.374, "grad_norm": 1.2365782260894775, "learning_rate": 2e-05, "loss": 0.04953314, "step": 24187 }, { "epoch": 48.376, "grad_norm": 1.362647533416748, "learning_rate": 2e-05, "loss": 0.03675044, "step": 24188 }, { "epoch": 48.378, "grad_norm": 1.1618704795837402, "learning_rate": 2e-05, "loss": 0.05714417, "step": 24189 }, { "epoch": 48.38, "grad_norm": 1.2545180320739746, "learning_rate": 2e-05, "loss": 0.04899006, "step": 24190 }, { "epoch": 48.382, "grad_norm": 1.5738791227340698, "learning_rate": 2e-05, "loss": 0.06693644, "step": 24191 }, { "epoch": 48.384, "grad_norm": 1.2538057565689087, "learning_rate": 2e-05, "loss": 0.04467027, "step": 24192 }, { "epoch": 48.386, "grad_norm": 2.4022083282470703, "learning_rate": 2e-05, "loss": 0.0603236, "step": 24193 }, { "epoch": 48.388, "grad_norm": 1.0010615587234497, "learning_rate": 2e-05, "loss": 0.03820972, "step": 24194 }, { "epoch": 48.39, "grad_norm": 1.7481197118759155, "learning_rate": 2e-05, "loss": 0.06582922, "step": 24195 }, { "epoch": 48.392, "grad_norm": 1.1138973236083984, "learning_rate": 2e-05, "loss": 0.05272717, "step": 24196 }, { "epoch": 48.394, "grad_norm": 1.0726069211959839, "learning_rate": 2e-05, "loss": 0.05133822, "step": 24197 }, { "epoch": 48.396, "grad_norm": 1.7698593139648438, "learning_rate": 2e-05, "loss": 0.05580845, "step": 24198 }, { "epoch": 48.398, "grad_norm": 1.3947736024856567, "learning_rate": 2e-05, "loss": 0.05010434, "step": 24199 }, { "epoch": 48.4, "grad_norm": 1.0563322305679321, "learning_rate": 2e-05, "loss": 0.03886336, "step": 24200 }, { "epoch": 48.402, "grad_norm": 1.6317943334579468, "learning_rate": 2e-05, "loss": 0.06757972, "step": 24201 }, { "epoch": 48.404, "grad_norm": 1.1341196298599243, "learning_rate": 2e-05, "loss": 0.04929399, "step": 24202 }, { "epoch": 48.406, "grad_norm": 1.2951228618621826, "learning_rate": 2e-05, "loss": 0.04734764, "step": 24203 }, { "epoch": 48.408, "grad_norm": 1.2703125476837158, "learning_rate": 2e-05, "loss": 0.0539843, "step": 24204 }, { "epoch": 48.41, "grad_norm": 1.4762182235717773, "learning_rate": 2e-05, "loss": 0.05031619, "step": 24205 }, { "epoch": 48.412, "grad_norm": 1.1297892332077026, "learning_rate": 2e-05, "loss": 0.05098326, "step": 24206 }, { "epoch": 48.414, "grad_norm": 1.776755452156067, "learning_rate": 2e-05, "loss": 0.05509105, "step": 24207 }, { "epoch": 48.416, "grad_norm": 1.4835596084594727, "learning_rate": 2e-05, "loss": 0.05865866, "step": 24208 }, { "epoch": 48.418, "grad_norm": 1.1328264474868774, "learning_rate": 2e-05, "loss": 0.0568237, "step": 24209 }, { "epoch": 48.42, "grad_norm": 1.0949770212173462, "learning_rate": 2e-05, "loss": 0.04025354, "step": 24210 }, { "epoch": 48.422, "grad_norm": 1.2721264362335205, "learning_rate": 2e-05, "loss": 0.05412237, "step": 24211 }, { "epoch": 48.424, "grad_norm": 1.6875686645507812, "learning_rate": 2e-05, "loss": 0.05133815, "step": 24212 }, { "epoch": 48.426, "grad_norm": 1.1843007802963257, "learning_rate": 2e-05, "loss": 0.04539281, "step": 24213 }, { "epoch": 48.428, "grad_norm": 1.1561273336410522, "learning_rate": 2e-05, "loss": 0.05123963, "step": 24214 }, { "epoch": 48.43, "grad_norm": 1.2568432092666626, "learning_rate": 2e-05, "loss": 0.05854414, "step": 24215 }, { "epoch": 48.432, "grad_norm": 1.2234379053115845, "learning_rate": 2e-05, "loss": 0.03763745, "step": 24216 }, { "epoch": 48.434, "grad_norm": 1.1725529432296753, "learning_rate": 2e-05, "loss": 0.05705394, "step": 24217 }, { "epoch": 48.436, "grad_norm": 1.598198652267456, "learning_rate": 2e-05, "loss": 0.05314296, "step": 24218 }, { "epoch": 48.438, "grad_norm": 1.4097914695739746, "learning_rate": 2e-05, "loss": 0.04881332, "step": 24219 }, { "epoch": 48.44, "grad_norm": 1.0265836715698242, "learning_rate": 2e-05, "loss": 0.03001934, "step": 24220 }, { "epoch": 48.442, "grad_norm": 1.0836626291275024, "learning_rate": 2e-05, "loss": 0.04115789, "step": 24221 }, { "epoch": 48.444, "grad_norm": 0.9527786374092102, "learning_rate": 2e-05, "loss": 0.04305189, "step": 24222 }, { "epoch": 48.446, "grad_norm": 2.3282713890075684, "learning_rate": 2e-05, "loss": 0.04773811, "step": 24223 }, { "epoch": 48.448, "grad_norm": 1.238843321800232, "learning_rate": 2e-05, "loss": 0.05873473, "step": 24224 }, { "epoch": 48.45, "grad_norm": 1.050054669380188, "learning_rate": 2e-05, "loss": 0.04225109, "step": 24225 }, { "epoch": 48.452, "grad_norm": 0.9493594765663147, "learning_rate": 2e-05, "loss": 0.03815629, "step": 24226 }, { "epoch": 48.454, "grad_norm": 1.4076915979385376, "learning_rate": 2e-05, "loss": 0.0571268, "step": 24227 }, { "epoch": 48.456, "grad_norm": 1.1200451850891113, "learning_rate": 2e-05, "loss": 0.04536561, "step": 24228 }, { "epoch": 48.458, "grad_norm": 0.9729321002960205, "learning_rate": 2e-05, "loss": 0.04120147, "step": 24229 }, { "epoch": 48.46, "grad_norm": 1.1512271165847778, "learning_rate": 2e-05, "loss": 0.04277606, "step": 24230 }, { "epoch": 48.462, "grad_norm": 1.1026575565338135, "learning_rate": 2e-05, "loss": 0.0351891, "step": 24231 }, { "epoch": 48.464, "grad_norm": 1.0310128927230835, "learning_rate": 2e-05, "loss": 0.03461772, "step": 24232 }, { "epoch": 48.466, "grad_norm": 1.412767767906189, "learning_rate": 2e-05, "loss": 0.04195011, "step": 24233 }, { "epoch": 48.468, "grad_norm": 1.1256314516067505, "learning_rate": 2e-05, "loss": 0.05557293, "step": 24234 }, { "epoch": 48.47, "grad_norm": 1.1693962812423706, "learning_rate": 2e-05, "loss": 0.03926221, "step": 24235 }, { "epoch": 48.472, "grad_norm": 1.1517102718353271, "learning_rate": 2e-05, "loss": 0.05275252, "step": 24236 }, { "epoch": 48.474, "grad_norm": 1.3744908571243286, "learning_rate": 2e-05, "loss": 0.04760543, "step": 24237 }, { "epoch": 48.476, "grad_norm": 1.1861525774002075, "learning_rate": 2e-05, "loss": 0.04502533, "step": 24238 }, { "epoch": 48.478, "grad_norm": 1.4491164684295654, "learning_rate": 2e-05, "loss": 0.05307315, "step": 24239 }, { "epoch": 48.48, "grad_norm": 1.0572928190231323, "learning_rate": 2e-05, "loss": 0.04607842, "step": 24240 }, { "epoch": 48.482, "grad_norm": 1.7253468036651611, "learning_rate": 2e-05, "loss": 0.0420462, "step": 24241 }, { "epoch": 48.484, "grad_norm": 1.1973940134048462, "learning_rate": 2e-05, "loss": 0.06035455, "step": 24242 }, { "epoch": 48.486, "grad_norm": 1.123047947883606, "learning_rate": 2e-05, "loss": 0.04807914, "step": 24243 }, { "epoch": 48.488, "grad_norm": 3.36896014213562, "learning_rate": 2e-05, "loss": 0.05288967, "step": 24244 }, { "epoch": 48.49, "grad_norm": 1.3150179386138916, "learning_rate": 2e-05, "loss": 0.04113879, "step": 24245 }, { "epoch": 48.492, "grad_norm": 0.998184084892273, "learning_rate": 2e-05, "loss": 0.03896046, "step": 24246 }, { "epoch": 48.494, "grad_norm": 2.140963315963745, "learning_rate": 2e-05, "loss": 0.05801499, "step": 24247 }, { "epoch": 48.496, "grad_norm": 0.9742385149002075, "learning_rate": 2e-05, "loss": 0.04312814, "step": 24248 }, { "epoch": 48.498, "grad_norm": 0.9801925420761108, "learning_rate": 2e-05, "loss": 0.04314513, "step": 24249 }, { "epoch": 48.5, "grad_norm": 1.167195200920105, "learning_rate": 2e-05, "loss": 0.04025427, "step": 24250 }, { "epoch": 48.502, "grad_norm": 1.045845627784729, "learning_rate": 2e-05, "loss": 0.03268411, "step": 24251 }, { "epoch": 48.504, "grad_norm": 1.4855949878692627, "learning_rate": 2e-05, "loss": 0.03678501, "step": 24252 }, { "epoch": 48.506, "grad_norm": 1.0382455587387085, "learning_rate": 2e-05, "loss": 0.05245716, "step": 24253 }, { "epoch": 48.508, "grad_norm": 1.0648964643478394, "learning_rate": 2e-05, "loss": 0.0379638, "step": 24254 }, { "epoch": 48.51, "grad_norm": 1.1607509851455688, "learning_rate": 2e-05, "loss": 0.05345634, "step": 24255 }, { "epoch": 48.512, "grad_norm": 1.2925974130630493, "learning_rate": 2e-05, "loss": 0.05352456, "step": 24256 }, { "epoch": 48.514, "grad_norm": 1.1393349170684814, "learning_rate": 2e-05, "loss": 0.04305478, "step": 24257 }, { "epoch": 48.516, "grad_norm": 1.041580319404602, "learning_rate": 2e-05, "loss": 0.05400862, "step": 24258 }, { "epoch": 48.518, "grad_norm": 1.0252822637557983, "learning_rate": 2e-05, "loss": 0.0446092, "step": 24259 }, { "epoch": 48.52, "grad_norm": 1.2504663467407227, "learning_rate": 2e-05, "loss": 0.06324365, "step": 24260 }, { "epoch": 48.522, "grad_norm": 1.246679425239563, "learning_rate": 2e-05, "loss": 0.05985711, "step": 24261 }, { "epoch": 48.524, "grad_norm": 1.0827921628952026, "learning_rate": 2e-05, "loss": 0.04559185, "step": 24262 }, { "epoch": 48.526, "grad_norm": 0.9944784045219421, "learning_rate": 2e-05, "loss": 0.04224956, "step": 24263 }, { "epoch": 48.528, "grad_norm": 1.6731486320495605, "learning_rate": 2e-05, "loss": 0.06704316, "step": 24264 }, { "epoch": 48.53, "grad_norm": 1.1593559980392456, "learning_rate": 2e-05, "loss": 0.04145539, "step": 24265 }, { "epoch": 48.532, "grad_norm": 1.5469125509262085, "learning_rate": 2e-05, "loss": 0.05441985, "step": 24266 }, { "epoch": 48.534, "grad_norm": 1.191585898399353, "learning_rate": 2e-05, "loss": 0.04084273, "step": 24267 }, { "epoch": 48.536, "grad_norm": 2.4968080520629883, "learning_rate": 2e-05, "loss": 0.05761244, "step": 24268 }, { "epoch": 48.538, "grad_norm": 1.4555327892303467, "learning_rate": 2e-05, "loss": 0.05669381, "step": 24269 }, { "epoch": 48.54, "grad_norm": 1.1853517293930054, "learning_rate": 2e-05, "loss": 0.03782331, "step": 24270 }, { "epoch": 48.542, "grad_norm": 1.0957677364349365, "learning_rate": 2e-05, "loss": 0.04011827, "step": 24271 }, { "epoch": 48.544, "grad_norm": 1.120521903038025, "learning_rate": 2e-05, "loss": 0.0431511, "step": 24272 }, { "epoch": 48.546, "grad_norm": 1.3489925861358643, "learning_rate": 2e-05, "loss": 0.04053114, "step": 24273 }, { "epoch": 48.548, "grad_norm": 1.153000831604004, "learning_rate": 2e-05, "loss": 0.05193951, "step": 24274 }, { "epoch": 48.55, "grad_norm": 1.4672309160232544, "learning_rate": 2e-05, "loss": 0.0470688, "step": 24275 }, { "epoch": 48.552, "grad_norm": 1.2560532093048096, "learning_rate": 2e-05, "loss": 0.04077678, "step": 24276 }, { "epoch": 48.554, "grad_norm": 1.1438381671905518, "learning_rate": 2e-05, "loss": 0.04561079, "step": 24277 }, { "epoch": 48.556, "grad_norm": 1.103325605392456, "learning_rate": 2e-05, "loss": 0.04266635, "step": 24278 }, { "epoch": 48.558, "grad_norm": 1.2053292989730835, "learning_rate": 2e-05, "loss": 0.05609243, "step": 24279 }, { "epoch": 48.56, "grad_norm": 1.2866277694702148, "learning_rate": 2e-05, "loss": 0.05822425, "step": 24280 }, { "epoch": 48.562, "grad_norm": 1.3046430349349976, "learning_rate": 2e-05, "loss": 0.05633568, "step": 24281 }, { "epoch": 48.564, "grad_norm": 1.165405035018921, "learning_rate": 2e-05, "loss": 0.02543902, "step": 24282 }, { "epoch": 48.566, "grad_norm": 1.2302770614624023, "learning_rate": 2e-05, "loss": 0.04642507, "step": 24283 }, { "epoch": 48.568, "grad_norm": 1.5563623905181885, "learning_rate": 2e-05, "loss": 0.05143239, "step": 24284 }, { "epoch": 48.57, "grad_norm": 1.0007959604263306, "learning_rate": 2e-05, "loss": 0.03177904, "step": 24285 }, { "epoch": 48.572, "grad_norm": 1.5403399467468262, "learning_rate": 2e-05, "loss": 0.0840653, "step": 24286 }, { "epoch": 48.574, "grad_norm": 2.075601577758789, "learning_rate": 2e-05, "loss": 0.04922469, "step": 24287 }, { "epoch": 48.576, "grad_norm": 1.155706524848938, "learning_rate": 2e-05, "loss": 0.03379347, "step": 24288 }, { "epoch": 48.578, "grad_norm": 2.421046018600464, "learning_rate": 2e-05, "loss": 0.03684851, "step": 24289 }, { "epoch": 48.58, "grad_norm": 1.1234990358352661, "learning_rate": 2e-05, "loss": 0.04452331, "step": 24290 }, { "epoch": 48.582, "grad_norm": 1.228397011756897, "learning_rate": 2e-05, "loss": 0.05986349, "step": 24291 }, { "epoch": 48.584, "grad_norm": 1.2848201990127563, "learning_rate": 2e-05, "loss": 0.05497922, "step": 24292 }, { "epoch": 48.586, "grad_norm": 1.1021811962127686, "learning_rate": 2e-05, "loss": 0.04066538, "step": 24293 }, { "epoch": 48.588, "grad_norm": 1.184058427810669, "learning_rate": 2e-05, "loss": 0.05837499, "step": 24294 }, { "epoch": 48.59, "grad_norm": 1.4225780963897705, "learning_rate": 2e-05, "loss": 0.04438188, "step": 24295 }, { "epoch": 48.592, "grad_norm": 1.1480562686920166, "learning_rate": 2e-05, "loss": 0.0400277, "step": 24296 }, { "epoch": 48.594, "grad_norm": 1.323900580406189, "learning_rate": 2e-05, "loss": 0.05192615, "step": 24297 }, { "epoch": 48.596, "grad_norm": 1.441277265548706, "learning_rate": 2e-05, "loss": 0.03372613, "step": 24298 }, { "epoch": 48.598, "grad_norm": 1.0926908254623413, "learning_rate": 2e-05, "loss": 0.05107879, "step": 24299 }, { "epoch": 48.6, "grad_norm": 1.2348512411117554, "learning_rate": 2e-05, "loss": 0.05852629, "step": 24300 }, { "epoch": 48.602, "grad_norm": 1.182897925376892, "learning_rate": 2e-05, "loss": 0.06499533, "step": 24301 }, { "epoch": 48.604, "grad_norm": 1.1605639457702637, "learning_rate": 2e-05, "loss": 0.05055974, "step": 24302 }, { "epoch": 48.606, "grad_norm": 1.3561877012252808, "learning_rate": 2e-05, "loss": 0.05069784, "step": 24303 }, { "epoch": 48.608, "grad_norm": 1.2737542390823364, "learning_rate": 2e-05, "loss": 0.04829124, "step": 24304 }, { "epoch": 48.61, "grad_norm": 1.2459218502044678, "learning_rate": 2e-05, "loss": 0.057886, "step": 24305 }, { "epoch": 48.612, "grad_norm": 1.154877781867981, "learning_rate": 2e-05, "loss": 0.03690616, "step": 24306 }, { "epoch": 48.614, "grad_norm": 1.1142473220825195, "learning_rate": 2e-05, "loss": 0.04277994, "step": 24307 }, { "epoch": 48.616, "grad_norm": 1.2507994174957275, "learning_rate": 2e-05, "loss": 0.05749039, "step": 24308 }, { "epoch": 48.618, "grad_norm": 1.2597581148147583, "learning_rate": 2e-05, "loss": 0.04754964, "step": 24309 }, { "epoch": 48.62, "grad_norm": 1.1352006196975708, "learning_rate": 2e-05, "loss": 0.04118899, "step": 24310 }, { "epoch": 48.622, "grad_norm": 1.7616618871688843, "learning_rate": 2e-05, "loss": 0.0472442, "step": 24311 }, { "epoch": 48.624, "grad_norm": 1.2920984029769897, "learning_rate": 2e-05, "loss": 0.05620179, "step": 24312 }, { "epoch": 48.626, "grad_norm": 1.0155946016311646, "learning_rate": 2e-05, "loss": 0.03928847, "step": 24313 }, { "epoch": 48.628, "grad_norm": 1.8907221555709839, "learning_rate": 2e-05, "loss": 0.05770548, "step": 24314 }, { "epoch": 48.63, "grad_norm": 1.3525524139404297, "learning_rate": 2e-05, "loss": 0.05569984, "step": 24315 }, { "epoch": 48.632, "grad_norm": 2.0137546062469482, "learning_rate": 2e-05, "loss": 0.04367228, "step": 24316 }, { "epoch": 48.634, "grad_norm": 1.1896785497665405, "learning_rate": 2e-05, "loss": 0.0412126, "step": 24317 }, { "epoch": 48.636, "grad_norm": 1.2872071266174316, "learning_rate": 2e-05, "loss": 0.05311878, "step": 24318 }, { "epoch": 48.638, "grad_norm": 3.229105234146118, "learning_rate": 2e-05, "loss": 0.05653278, "step": 24319 }, { "epoch": 48.64, "grad_norm": 1.131352186203003, "learning_rate": 2e-05, "loss": 0.04109538, "step": 24320 }, { "epoch": 48.642, "grad_norm": 1.0072386264801025, "learning_rate": 2e-05, "loss": 0.04287854, "step": 24321 }, { "epoch": 48.644, "grad_norm": 1.1246684789657593, "learning_rate": 2e-05, "loss": 0.04119401, "step": 24322 }, { "epoch": 48.646, "grad_norm": 0.9696083664894104, "learning_rate": 2e-05, "loss": 0.03462346, "step": 24323 }, { "epoch": 48.648, "grad_norm": 1.8664556741714478, "learning_rate": 2e-05, "loss": 0.0543664, "step": 24324 }, { "epoch": 48.65, "grad_norm": 2.026254653930664, "learning_rate": 2e-05, "loss": 0.0509354, "step": 24325 }, { "epoch": 48.652, "grad_norm": 1.2250194549560547, "learning_rate": 2e-05, "loss": 0.06017506, "step": 24326 }, { "epoch": 48.654, "grad_norm": 1.2674254179000854, "learning_rate": 2e-05, "loss": 0.04169001, "step": 24327 }, { "epoch": 48.656, "grad_norm": 1.2122572660446167, "learning_rate": 2e-05, "loss": 0.05367371, "step": 24328 }, { "epoch": 48.658, "grad_norm": 1.4815667867660522, "learning_rate": 2e-05, "loss": 0.07224436, "step": 24329 }, { "epoch": 48.66, "grad_norm": 1.290479302406311, "learning_rate": 2e-05, "loss": 0.04883266, "step": 24330 }, { "epoch": 48.662, "grad_norm": 1.1489940881729126, "learning_rate": 2e-05, "loss": 0.0474559, "step": 24331 }, { "epoch": 48.664, "grad_norm": 1.302720069885254, "learning_rate": 2e-05, "loss": 0.05755413, "step": 24332 }, { "epoch": 48.666, "grad_norm": 0.9729812741279602, "learning_rate": 2e-05, "loss": 0.0363071, "step": 24333 }, { "epoch": 48.668, "grad_norm": 1.3361284732818604, "learning_rate": 2e-05, "loss": 0.04093036, "step": 24334 }, { "epoch": 48.67, "grad_norm": 1.046409010887146, "learning_rate": 2e-05, "loss": 0.03625423, "step": 24335 }, { "epoch": 48.672, "grad_norm": 1.158137321472168, "learning_rate": 2e-05, "loss": 0.05011062, "step": 24336 }, { "epoch": 48.674, "grad_norm": 1.1454992294311523, "learning_rate": 2e-05, "loss": 0.04545939, "step": 24337 }, { "epoch": 48.676, "grad_norm": 1.2289396524429321, "learning_rate": 2e-05, "loss": 0.06850711, "step": 24338 }, { "epoch": 48.678, "grad_norm": 2.0199902057647705, "learning_rate": 2e-05, "loss": 0.04739082, "step": 24339 }, { "epoch": 48.68, "grad_norm": 1.1302142143249512, "learning_rate": 2e-05, "loss": 0.0431182, "step": 24340 }, { "epoch": 48.682, "grad_norm": 1.3505676984786987, "learning_rate": 2e-05, "loss": 0.04874783, "step": 24341 }, { "epoch": 48.684, "grad_norm": 0.9121742248535156, "learning_rate": 2e-05, "loss": 0.02643291, "step": 24342 }, { "epoch": 48.686, "grad_norm": 1.3847429752349854, "learning_rate": 2e-05, "loss": 0.04750867, "step": 24343 }, { "epoch": 48.688, "grad_norm": 1.1395158767700195, "learning_rate": 2e-05, "loss": 0.04924957, "step": 24344 }, { "epoch": 48.69, "grad_norm": 1.0794190168380737, "learning_rate": 2e-05, "loss": 0.03787911, "step": 24345 }, { "epoch": 48.692, "grad_norm": 1.1486942768096924, "learning_rate": 2e-05, "loss": 0.04261864, "step": 24346 }, { "epoch": 48.694, "grad_norm": 1.1748255491256714, "learning_rate": 2e-05, "loss": 0.04672168, "step": 24347 }, { "epoch": 48.696, "grad_norm": 1.1263988018035889, "learning_rate": 2e-05, "loss": 0.04133555, "step": 24348 }, { "epoch": 48.698, "grad_norm": 1.2288318872451782, "learning_rate": 2e-05, "loss": 0.04745194, "step": 24349 }, { "epoch": 48.7, "grad_norm": 0.9768017530441284, "learning_rate": 2e-05, "loss": 0.04292785, "step": 24350 }, { "epoch": 48.702, "grad_norm": 1.0999313592910767, "learning_rate": 2e-05, "loss": 0.05881019, "step": 24351 }, { "epoch": 48.704, "grad_norm": 1.878710389137268, "learning_rate": 2e-05, "loss": 0.05866309, "step": 24352 }, { "epoch": 48.706, "grad_norm": 1.63356614112854, "learning_rate": 2e-05, "loss": 0.05248346, "step": 24353 }, { "epoch": 48.708, "grad_norm": 1.2071478366851807, "learning_rate": 2e-05, "loss": 0.05778679, "step": 24354 }, { "epoch": 48.71, "grad_norm": 1.188579797744751, "learning_rate": 2e-05, "loss": 0.05696705, "step": 24355 }, { "epoch": 48.712, "grad_norm": 0.9021721482276917, "learning_rate": 2e-05, "loss": 0.02676656, "step": 24356 }, { "epoch": 48.714, "grad_norm": 1.446212887763977, "learning_rate": 2e-05, "loss": 0.04261808, "step": 24357 }, { "epoch": 48.716, "grad_norm": 0.975503146648407, "learning_rate": 2e-05, "loss": 0.03775565, "step": 24358 }, { "epoch": 48.718, "grad_norm": 1.1201655864715576, "learning_rate": 2e-05, "loss": 0.04218309, "step": 24359 }, { "epoch": 48.72, "grad_norm": 1.2044806480407715, "learning_rate": 2e-05, "loss": 0.04980427, "step": 24360 }, { "epoch": 48.722, "grad_norm": 1.0975890159606934, "learning_rate": 2e-05, "loss": 0.04094896, "step": 24361 }, { "epoch": 48.724, "grad_norm": 1.5695173740386963, "learning_rate": 2e-05, "loss": 0.04596317, "step": 24362 }, { "epoch": 48.726, "grad_norm": 1.0056501626968384, "learning_rate": 2e-05, "loss": 0.04648286, "step": 24363 }, { "epoch": 48.728, "grad_norm": 0.9918159246444702, "learning_rate": 2e-05, "loss": 0.03798754, "step": 24364 }, { "epoch": 48.73, "grad_norm": 1.287979245185852, "learning_rate": 2e-05, "loss": 0.05667548, "step": 24365 }, { "epoch": 48.732, "grad_norm": 1.1386852264404297, "learning_rate": 2e-05, "loss": 0.05278626, "step": 24366 }, { "epoch": 48.734, "grad_norm": 0.996176540851593, "learning_rate": 2e-05, "loss": 0.05246001, "step": 24367 }, { "epoch": 48.736, "grad_norm": 2.319737434387207, "learning_rate": 2e-05, "loss": 0.0373604, "step": 24368 }, { "epoch": 48.738, "grad_norm": 1.7545973062515259, "learning_rate": 2e-05, "loss": 0.04798623, "step": 24369 }, { "epoch": 48.74, "grad_norm": 1.3103057146072388, "learning_rate": 2e-05, "loss": 0.05369485, "step": 24370 }, { "epoch": 48.742, "grad_norm": 1.1758966445922852, "learning_rate": 2e-05, "loss": 0.05795531, "step": 24371 }, { "epoch": 48.744, "grad_norm": 0.9001947641372681, "learning_rate": 2e-05, "loss": 0.02734162, "step": 24372 }, { "epoch": 48.746, "grad_norm": 1.0777047872543335, "learning_rate": 2e-05, "loss": 0.03581995, "step": 24373 }, { "epoch": 48.748, "grad_norm": 1.3073005676269531, "learning_rate": 2e-05, "loss": 0.04707327, "step": 24374 }, { "epoch": 48.75, "grad_norm": 0.9895936250686646, "learning_rate": 2e-05, "loss": 0.02827165, "step": 24375 }, { "epoch": 48.752, "grad_norm": 1.1249933242797852, "learning_rate": 2e-05, "loss": 0.04632932, "step": 24376 }, { "epoch": 48.754, "grad_norm": 1.274895191192627, "learning_rate": 2e-05, "loss": 0.04302908, "step": 24377 }, { "epoch": 48.756, "grad_norm": 1.0535324811935425, "learning_rate": 2e-05, "loss": 0.03941915, "step": 24378 }, { "epoch": 48.758, "grad_norm": 1.3476455211639404, "learning_rate": 2e-05, "loss": 0.05376494, "step": 24379 }, { "epoch": 48.76, "grad_norm": 1.2379733324050903, "learning_rate": 2e-05, "loss": 0.05499609, "step": 24380 }, { "epoch": 48.762, "grad_norm": 1.1249278783798218, "learning_rate": 2e-05, "loss": 0.03757162, "step": 24381 }, { "epoch": 48.764, "grad_norm": 2.2356626987457275, "learning_rate": 2e-05, "loss": 0.0576737, "step": 24382 }, { "epoch": 48.766, "grad_norm": 1.1273705959320068, "learning_rate": 2e-05, "loss": 0.05674585, "step": 24383 }, { "epoch": 48.768, "grad_norm": 1.3228777647018433, "learning_rate": 2e-05, "loss": 0.05507819, "step": 24384 }, { "epoch": 48.77, "grad_norm": 1.3240513801574707, "learning_rate": 2e-05, "loss": 0.05233713, "step": 24385 }, { "epoch": 48.772, "grad_norm": 1.2595722675323486, "learning_rate": 2e-05, "loss": 0.05647445, "step": 24386 }, { "epoch": 48.774, "grad_norm": 1.1670889854431152, "learning_rate": 2e-05, "loss": 0.0378773, "step": 24387 }, { "epoch": 48.776, "grad_norm": 0.9020729064941406, "learning_rate": 2e-05, "loss": 0.03322694, "step": 24388 }, { "epoch": 48.778, "grad_norm": 1.0231337547302246, "learning_rate": 2e-05, "loss": 0.03719312, "step": 24389 }, { "epoch": 48.78, "grad_norm": 1.0719740390777588, "learning_rate": 2e-05, "loss": 0.04352896, "step": 24390 }, { "epoch": 48.782, "grad_norm": 1.1282035112380981, "learning_rate": 2e-05, "loss": 0.05233166, "step": 24391 }, { "epoch": 48.784, "grad_norm": 1.2852978706359863, "learning_rate": 2e-05, "loss": 0.05125814, "step": 24392 }, { "epoch": 48.786, "grad_norm": 1.1023353338241577, "learning_rate": 2e-05, "loss": 0.04840891, "step": 24393 }, { "epoch": 48.788, "grad_norm": 1.5600016117095947, "learning_rate": 2e-05, "loss": 0.04811985, "step": 24394 }, { "epoch": 48.79, "grad_norm": 1.1673002243041992, "learning_rate": 2e-05, "loss": 0.04379809, "step": 24395 }, { "epoch": 48.792, "grad_norm": 1.3947582244873047, "learning_rate": 2e-05, "loss": 0.05733298, "step": 24396 }, { "epoch": 48.794, "grad_norm": 1.2544630765914917, "learning_rate": 2e-05, "loss": 0.06692179, "step": 24397 }, { "epoch": 48.796, "grad_norm": 1.9167871475219727, "learning_rate": 2e-05, "loss": 0.06020879, "step": 24398 }, { "epoch": 48.798, "grad_norm": 1.5168108940124512, "learning_rate": 2e-05, "loss": 0.04920478, "step": 24399 }, { "epoch": 48.8, "grad_norm": 1.047117829322815, "learning_rate": 2e-05, "loss": 0.03645068, "step": 24400 }, { "epoch": 48.802, "grad_norm": 1.2618528604507446, "learning_rate": 2e-05, "loss": 0.05060851, "step": 24401 }, { "epoch": 48.804, "grad_norm": 1.0257928371429443, "learning_rate": 2e-05, "loss": 0.04926369, "step": 24402 }, { "epoch": 48.806, "grad_norm": 2.2728285789489746, "learning_rate": 2e-05, "loss": 0.05713367, "step": 24403 }, { "epoch": 48.808, "grad_norm": 1.2449434995651245, "learning_rate": 2e-05, "loss": 0.05508956, "step": 24404 }, { "epoch": 48.81, "grad_norm": 1.1658732891082764, "learning_rate": 2e-05, "loss": 0.05041445, "step": 24405 }, { "epoch": 48.812, "grad_norm": 1.206467866897583, "learning_rate": 2e-05, "loss": 0.06293423, "step": 24406 }, { "epoch": 48.814, "grad_norm": 1.246690034866333, "learning_rate": 2e-05, "loss": 0.05447911, "step": 24407 }, { "epoch": 48.816, "grad_norm": 1.204403042793274, "learning_rate": 2e-05, "loss": 0.05379122, "step": 24408 }, { "epoch": 48.818, "grad_norm": 1.2013062238693237, "learning_rate": 2e-05, "loss": 0.05955196, "step": 24409 }, { "epoch": 48.82, "grad_norm": 1.0979270935058594, "learning_rate": 2e-05, "loss": 0.0413815, "step": 24410 }, { "epoch": 48.822, "grad_norm": 1.224603295326233, "learning_rate": 2e-05, "loss": 0.05617508, "step": 24411 }, { "epoch": 48.824, "grad_norm": 1.262998342514038, "learning_rate": 2e-05, "loss": 0.04537391, "step": 24412 }, { "epoch": 48.826, "grad_norm": 1.1487383842468262, "learning_rate": 2e-05, "loss": 0.03556521, "step": 24413 }, { "epoch": 48.828, "grad_norm": 0.9703657031059265, "learning_rate": 2e-05, "loss": 0.03395029, "step": 24414 }, { "epoch": 48.83, "grad_norm": 1.5587289333343506, "learning_rate": 2e-05, "loss": 0.0705722, "step": 24415 }, { "epoch": 48.832, "grad_norm": 1.0362234115600586, "learning_rate": 2e-05, "loss": 0.04194809, "step": 24416 }, { "epoch": 48.834, "grad_norm": 1.3099713325500488, "learning_rate": 2e-05, "loss": 0.05039264, "step": 24417 }, { "epoch": 48.836, "grad_norm": 2.788674831390381, "learning_rate": 2e-05, "loss": 0.05012584, "step": 24418 }, { "epoch": 48.838, "grad_norm": 1.322467565536499, "learning_rate": 2e-05, "loss": 0.0455, "step": 24419 }, { "epoch": 48.84, "grad_norm": 1.230636477470398, "learning_rate": 2e-05, "loss": 0.03692626, "step": 24420 }, { "epoch": 48.842, "grad_norm": 1.2802784442901611, "learning_rate": 2e-05, "loss": 0.05909276, "step": 24421 }, { "epoch": 48.844, "grad_norm": 1.3420884609222412, "learning_rate": 2e-05, "loss": 0.0529298, "step": 24422 }, { "epoch": 48.846, "grad_norm": 1.0645166635513306, "learning_rate": 2e-05, "loss": 0.04237661, "step": 24423 }, { "epoch": 48.848, "grad_norm": 1.489173173904419, "learning_rate": 2e-05, "loss": 0.04594196, "step": 24424 }, { "epoch": 48.85, "grad_norm": 1.9845540523529053, "learning_rate": 2e-05, "loss": 0.06666305, "step": 24425 }, { "epoch": 48.852, "grad_norm": 1.2646132707595825, "learning_rate": 2e-05, "loss": 0.06317861, "step": 24426 }, { "epoch": 48.854, "grad_norm": 1.1693730354309082, "learning_rate": 2e-05, "loss": 0.03957154, "step": 24427 }, { "epoch": 48.856, "grad_norm": 1.139262080192566, "learning_rate": 2e-05, "loss": 0.03852566, "step": 24428 }, { "epoch": 48.858, "grad_norm": 1.340086579322815, "learning_rate": 2e-05, "loss": 0.06382296, "step": 24429 }, { "epoch": 48.86, "grad_norm": 1.404128909111023, "learning_rate": 2e-05, "loss": 0.06833863, "step": 24430 }, { "epoch": 48.862, "grad_norm": 1.1025450229644775, "learning_rate": 2e-05, "loss": 0.05407901, "step": 24431 }, { "epoch": 48.864, "grad_norm": 1.1320126056671143, "learning_rate": 2e-05, "loss": 0.04277468, "step": 24432 }, { "epoch": 48.866, "grad_norm": 1.192533016204834, "learning_rate": 2e-05, "loss": 0.04574575, "step": 24433 }, { "epoch": 48.868, "grad_norm": 5.2781267166137695, "learning_rate": 2e-05, "loss": 0.06549813, "step": 24434 }, { "epoch": 48.87, "grad_norm": 1.2239826917648315, "learning_rate": 2e-05, "loss": 0.05279471, "step": 24435 }, { "epoch": 48.872, "grad_norm": 1.0941675901412964, "learning_rate": 2e-05, "loss": 0.04686259, "step": 24436 }, { "epoch": 48.874, "grad_norm": 1.5225481986999512, "learning_rate": 2e-05, "loss": 0.05451784, "step": 24437 }, { "epoch": 48.876, "grad_norm": 1.0985329151153564, "learning_rate": 2e-05, "loss": 0.03912208, "step": 24438 }, { "epoch": 48.878, "grad_norm": 0.9841903448104858, "learning_rate": 2e-05, "loss": 0.03415463, "step": 24439 }, { "epoch": 48.88, "grad_norm": 1.519121527671814, "learning_rate": 2e-05, "loss": 0.05647705, "step": 24440 }, { "epoch": 48.882, "grad_norm": 1.738462209701538, "learning_rate": 2e-05, "loss": 0.05705027, "step": 24441 }, { "epoch": 48.884, "grad_norm": 1.2349008321762085, "learning_rate": 2e-05, "loss": 0.06694788, "step": 24442 }, { "epoch": 48.886, "grad_norm": 1.1472513675689697, "learning_rate": 2e-05, "loss": 0.04741376, "step": 24443 }, { "epoch": 48.888, "grad_norm": 1.7409826517105103, "learning_rate": 2e-05, "loss": 0.04635547, "step": 24444 }, { "epoch": 48.89, "grad_norm": 1.507589340209961, "learning_rate": 2e-05, "loss": 0.04607899, "step": 24445 }, { "epoch": 48.892, "grad_norm": 2.211873769760132, "learning_rate": 2e-05, "loss": 0.05145214, "step": 24446 }, { "epoch": 48.894, "grad_norm": 1.286216139793396, "learning_rate": 2e-05, "loss": 0.06177491, "step": 24447 }, { "epoch": 48.896, "grad_norm": 1.2425750494003296, "learning_rate": 2e-05, "loss": 0.05383733, "step": 24448 }, { "epoch": 48.898, "grad_norm": 1.189475655555725, "learning_rate": 2e-05, "loss": 0.05400949, "step": 24449 }, { "epoch": 48.9, "grad_norm": 1.3408055305480957, "learning_rate": 2e-05, "loss": 0.03949302, "step": 24450 }, { "epoch": 48.902, "grad_norm": 2.6601455211639404, "learning_rate": 2e-05, "loss": 0.06074892, "step": 24451 }, { "epoch": 48.904, "grad_norm": 0.9952139854431152, "learning_rate": 2e-05, "loss": 0.04453053, "step": 24452 }, { "epoch": 48.906, "grad_norm": 1.4619004726409912, "learning_rate": 2e-05, "loss": 0.04188272, "step": 24453 }, { "epoch": 48.908, "grad_norm": 1.2014166116714478, "learning_rate": 2e-05, "loss": 0.04575384, "step": 24454 }, { "epoch": 48.91, "grad_norm": 2.021064043045044, "learning_rate": 2e-05, "loss": 0.04514164, "step": 24455 }, { "epoch": 48.912, "grad_norm": 1.2506054639816284, "learning_rate": 2e-05, "loss": 0.0620794, "step": 24456 }, { "epoch": 48.914, "grad_norm": 1.2398749589920044, "learning_rate": 2e-05, "loss": 0.04733563, "step": 24457 }, { "epoch": 48.916, "grad_norm": 1.095046043395996, "learning_rate": 2e-05, "loss": 0.04853748, "step": 24458 }, { "epoch": 48.918, "grad_norm": 2.492252826690674, "learning_rate": 2e-05, "loss": 0.04443378, "step": 24459 }, { "epoch": 48.92, "grad_norm": 1.4789276123046875, "learning_rate": 2e-05, "loss": 0.04718126, "step": 24460 }, { "epoch": 48.922, "grad_norm": 1.7125012874603271, "learning_rate": 2e-05, "loss": 0.03764442, "step": 24461 }, { "epoch": 48.924, "grad_norm": 1.5009963512420654, "learning_rate": 2e-05, "loss": 0.02679938, "step": 24462 }, { "epoch": 48.926, "grad_norm": 1.1632426977157593, "learning_rate": 2e-05, "loss": 0.04027367, "step": 24463 }, { "epoch": 48.928, "grad_norm": 1.0739227533340454, "learning_rate": 2e-05, "loss": 0.04538012, "step": 24464 }, { "epoch": 48.93, "grad_norm": 1.1324819326400757, "learning_rate": 2e-05, "loss": 0.05008095, "step": 24465 }, { "epoch": 48.932, "grad_norm": 1.2299511432647705, "learning_rate": 2e-05, "loss": 0.03868235, "step": 24466 }, { "epoch": 48.934, "grad_norm": 1.457084059715271, "learning_rate": 2e-05, "loss": 0.06458244, "step": 24467 }, { "epoch": 48.936, "grad_norm": 1.4329404830932617, "learning_rate": 2e-05, "loss": 0.06372182, "step": 24468 }, { "epoch": 48.938, "grad_norm": 1.0780876874923706, "learning_rate": 2e-05, "loss": 0.04435993, "step": 24469 }, { "epoch": 48.94, "grad_norm": 1.6297838687896729, "learning_rate": 2e-05, "loss": 0.05016711, "step": 24470 }, { "epoch": 48.942, "grad_norm": 1.1690902709960938, "learning_rate": 2e-05, "loss": 0.06398173, "step": 24471 }, { "epoch": 48.944, "grad_norm": 1.1354949474334717, "learning_rate": 2e-05, "loss": 0.04506849, "step": 24472 }, { "epoch": 48.946, "grad_norm": 1.3150396347045898, "learning_rate": 2e-05, "loss": 0.06466317, "step": 24473 }, { "epoch": 48.948, "grad_norm": 1.104675531387329, "learning_rate": 2e-05, "loss": 0.0407197, "step": 24474 }, { "epoch": 48.95, "grad_norm": 1.0967339277267456, "learning_rate": 2e-05, "loss": 0.04545303, "step": 24475 }, { "epoch": 48.952, "grad_norm": 1.0893007516860962, "learning_rate": 2e-05, "loss": 0.05033952, "step": 24476 }, { "epoch": 48.954, "grad_norm": 1.1199082136154175, "learning_rate": 2e-05, "loss": 0.04963399, "step": 24477 }, { "epoch": 48.956, "grad_norm": 1.1054890155792236, "learning_rate": 2e-05, "loss": 0.04332212, "step": 24478 }, { "epoch": 48.958, "grad_norm": 2.2792885303497314, "learning_rate": 2e-05, "loss": 0.05663423, "step": 24479 }, { "epoch": 48.96, "grad_norm": 1.1706188917160034, "learning_rate": 2e-05, "loss": 0.04888928, "step": 24480 }, { "epoch": 48.962, "grad_norm": 2.348127841949463, "learning_rate": 2e-05, "loss": 0.05282357, "step": 24481 }, { "epoch": 48.964, "grad_norm": 1.2429864406585693, "learning_rate": 2e-05, "loss": 0.05059104, "step": 24482 }, { "epoch": 48.966, "grad_norm": 1.1958144903182983, "learning_rate": 2e-05, "loss": 0.03917575, "step": 24483 }, { "epoch": 48.968, "grad_norm": 1.0921530723571777, "learning_rate": 2e-05, "loss": 0.03745528, "step": 24484 }, { "epoch": 48.97, "grad_norm": 1.7767388820648193, "learning_rate": 2e-05, "loss": 0.06159507, "step": 24485 }, { "epoch": 48.972, "grad_norm": 1.1890246868133545, "learning_rate": 2e-05, "loss": 0.04934237, "step": 24486 }, { "epoch": 48.974, "grad_norm": 1.616060495376587, "learning_rate": 2e-05, "loss": 0.04842779, "step": 24487 }, { "epoch": 48.976, "grad_norm": 2.0892298221588135, "learning_rate": 2e-05, "loss": 0.03829409, "step": 24488 }, { "epoch": 48.978, "grad_norm": 1.2111293077468872, "learning_rate": 2e-05, "loss": 0.05161772, "step": 24489 }, { "epoch": 48.98, "grad_norm": 1.258104920387268, "learning_rate": 2e-05, "loss": 0.04110197, "step": 24490 }, { "epoch": 48.982, "grad_norm": 1.1039538383483887, "learning_rate": 2e-05, "loss": 0.03855299, "step": 24491 }, { "epoch": 48.984, "grad_norm": 1.0499625205993652, "learning_rate": 2e-05, "loss": 0.04614359, "step": 24492 }, { "epoch": 48.986, "grad_norm": 1.2007472515106201, "learning_rate": 2e-05, "loss": 0.03996876, "step": 24493 }, { "epoch": 48.988, "grad_norm": 1.2354191541671753, "learning_rate": 2e-05, "loss": 0.04676054, "step": 24494 }, { "epoch": 48.99, "grad_norm": 1.0763860940933228, "learning_rate": 2e-05, "loss": 0.04218274, "step": 24495 }, { "epoch": 48.992, "grad_norm": 1.1772406101226807, "learning_rate": 2e-05, "loss": 0.06190931, "step": 24496 }, { "epoch": 48.994, "grad_norm": 2.410313844680786, "learning_rate": 2e-05, "loss": 0.05162603, "step": 24497 }, { "epoch": 48.996, "grad_norm": 1.293403148651123, "learning_rate": 2e-05, "loss": 0.04930715, "step": 24498 }, { "epoch": 48.998, "grad_norm": 2.3302156925201416, "learning_rate": 2e-05, "loss": 0.05537525, "step": 24499 }, { "epoch": 49.0, "grad_norm": 1.106102705001831, "learning_rate": 2e-05, "loss": 0.05640403, "step": 24500 }, { "epoch": 49.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9720558882235529, "Equal_1": 1.0, "Equal_2": 0.9800399201596807, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.998, "Perpendicular_3": 0.8897795591182365, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.9956, "PointLiesOnLine_1": 1.0, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 320.4713, "eval_samples_per_second": 32.764, "eval_steps_per_second": 0.655, "step": 24500 }, { "epoch": 49.002, "grad_norm": 1.3205698728561401, "learning_rate": 2e-05, "loss": 0.0536387, "step": 24501 }, { "epoch": 49.004, "grad_norm": 1.0756568908691406, "learning_rate": 2e-05, "loss": 0.03509374, "step": 24502 }, { "epoch": 49.006, "grad_norm": 1.1383591890335083, "learning_rate": 2e-05, "loss": 0.05303418, "step": 24503 }, { "epoch": 49.008, "grad_norm": 1.2055492401123047, "learning_rate": 2e-05, "loss": 0.0511542, "step": 24504 }, { "epoch": 49.01, "grad_norm": 1.0886911153793335, "learning_rate": 2e-05, "loss": 0.0437855, "step": 24505 }, { "epoch": 49.012, "grad_norm": 1.0806195735931396, "learning_rate": 2e-05, "loss": 0.04402277, "step": 24506 }, { "epoch": 49.014, "grad_norm": 0.97076815366745, "learning_rate": 2e-05, "loss": 0.04103847, "step": 24507 }, { "epoch": 49.016, "grad_norm": 0.9942542910575867, "learning_rate": 2e-05, "loss": 0.03674922, "step": 24508 }, { "epoch": 49.018, "grad_norm": 1.3719143867492676, "learning_rate": 2e-05, "loss": 0.04641896, "step": 24509 }, { "epoch": 49.02, "grad_norm": 1.4681041240692139, "learning_rate": 2e-05, "loss": 0.05441123, "step": 24510 }, { "epoch": 49.022, "grad_norm": 1.3180499076843262, "learning_rate": 2e-05, "loss": 0.05244955, "step": 24511 }, { "epoch": 49.024, "grad_norm": 2.063997983932495, "learning_rate": 2e-05, "loss": 0.06359348, "step": 24512 }, { "epoch": 49.026, "grad_norm": 1.1440930366516113, "learning_rate": 2e-05, "loss": 0.04446263, "step": 24513 }, { "epoch": 49.028, "grad_norm": 1.2878350019454956, "learning_rate": 2e-05, "loss": 0.03576039, "step": 24514 }, { "epoch": 49.03, "grad_norm": 0.9617430567741394, "learning_rate": 2e-05, "loss": 0.02875559, "step": 24515 }, { "epoch": 49.032, "grad_norm": 1.2200411558151245, "learning_rate": 2e-05, "loss": 0.04824933, "step": 24516 }, { "epoch": 49.034, "grad_norm": 1.4803850650787354, "learning_rate": 2e-05, "loss": 0.05635831, "step": 24517 }, { "epoch": 49.036, "grad_norm": 1.9696398973464966, "learning_rate": 2e-05, "loss": 0.04805397, "step": 24518 }, { "epoch": 49.038, "grad_norm": 1.0635892152786255, "learning_rate": 2e-05, "loss": 0.04648872, "step": 24519 }, { "epoch": 49.04, "grad_norm": 1.0286033153533936, "learning_rate": 2e-05, "loss": 0.03219466, "step": 24520 }, { "epoch": 49.042, "grad_norm": 1.2040108442306519, "learning_rate": 2e-05, "loss": 0.05577746, "step": 24521 }, { "epoch": 49.044, "grad_norm": 1.2264634370803833, "learning_rate": 2e-05, "loss": 0.05810263, "step": 24522 }, { "epoch": 49.046, "grad_norm": 1.1748919486999512, "learning_rate": 2e-05, "loss": 0.0435359, "step": 24523 }, { "epoch": 49.048, "grad_norm": 1.623011589050293, "learning_rate": 2e-05, "loss": 0.06521875, "step": 24524 }, { "epoch": 49.05, "grad_norm": 1.593483805656433, "learning_rate": 2e-05, "loss": 0.04450686, "step": 24525 }, { "epoch": 49.052, "grad_norm": 1.0634883642196655, "learning_rate": 2e-05, "loss": 0.0392899, "step": 24526 }, { "epoch": 49.054, "grad_norm": 0.962153971195221, "learning_rate": 2e-05, "loss": 0.03904355, "step": 24527 }, { "epoch": 49.056, "grad_norm": 1.0543261766433716, "learning_rate": 2e-05, "loss": 0.03974721, "step": 24528 }, { "epoch": 49.058, "grad_norm": 1.3880788087844849, "learning_rate": 2e-05, "loss": 0.04019085, "step": 24529 }, { "epoch": 49.06, "grad_norm": 1.029956340789795, "learning_rate": 2e-05, "loss": 0.04097775, "step": 24530 }, { "epoch": 49.062, "grad_norm": 1.2715673446655273, "learning_rate": 2e-05, "loss": 0.05796398, "step": 24531 }, { "epoch": 49.064, "grad_norm": 1.1110681295394897, "learning_rate": 2e-05, "loss": 0.04320613, "step": 24532 }, { "epoch": 49.066, "grad_norm": 1.2434399127960205, "learning_rate": 2e-05, "loss": 0.05722959, "step": 24533 }, { "epoch": 49.068, "grad_norm": 1.0647788047790527, "learning_rate": 2e-05, "loss": 0.04631557, "step": 24534 }, { "epoch": 49.07, "grad_norm": 1.1238845586776733, "learning_rate": 2e-05, "loss": 0.04434226, "step": 24535 }, { "epoch": 49.072, "grad_norm": 2.4695746898651123, "learning_rate": 2e-05, "loss": 0.05840037, "step": 24536 }, { "epoch": 49.074, "grad_norm": 1.5649980306625366, "learning_rate": 2e-05, "loss": 0.04456908, "step": 24537 }, { "epoch": 49.076, "grad_norm": 1.1340800523757935, "learning_rate": 2e-05, "loss": 0.02922975, "step": 24538 }, { "epoch": 49.078, "grad_norm": 1.6249136924743652, "learning_rate": 2e-05, "loss": 0.05591683, "step": 24539 }, { "epoch": 49.08, "grad_norm": 1.0379282236099243, "learning_rate": 2e-05, "loss": 0.04590633, "step": 24540 }, { "epoch": 49.082, "grad_norm": 1.1572468280792236, "learning_rate": 2e-05, "loss": 0.05310665, "step": 24541 }, { "epoch": 49.084, "grad_norm": 1.3208953142166138, "learning_rate": 2e-05, "loss": 0.04799189, "step": 24542 }, { "epoch": 49.086, "grad_norm": 1.1476277112960815, "learning_rate": 2e-05, "loss": 0.04075008, "step": 24543 }, { "epoch": 49.088, "grad_norm": 1.445860505104065, "learning_rate": 2e-05, "loss": 0.04546492, "step": 24544 }, { "epoch": 49.09, "grad_norm": 1.02455472946167, "learning_rate": 2e-05, "loss": 0.04289842, "step": 24545 }, { "epoch": 49.092, "grad_norm": 1.0884724855422974, "learning_rate": 2e-05, "loss": 0.05313245, "step": 24546 }, { "epoch": 49.094, "grad_norm": 1.2504005432128906, "learning_rate": 2e-05, "loss": 0.05837908, "step": 24547 }, { "epoch": 49.096, "grad_norm": 1.050110101699829, "learning_rate": 2e-05, "loss": 0.04570572, "step": 24548 }, { "epoch": 49.098, "grad_norm": 1.3742461204528809, "learning_rate": 2e-05, "loss": 0.04856823, "step": 24549 }, { "epoch": 49.1, "grad_norm": 1.0349839925765991, "learning_rate": 2e-05, "loss": 0.04231741, "step": 24550 }, { "epoch": 49.102, "grad_norm": 1.7095385789871216, "learning_rate": 2e-05, "loss": 0.05369782, "step": 24551 }, { "epoch": 49.104, "grad_norm": 1.103822946548462, "learning_rate": 2e-05, "loss": 0.04244398, "step": 24552 }, { "epoch": 49.106, "grad_norm": 1.2387681007385254, "learning_rate": 2e-05, "loss": 0.05892359, "step": 24553 }, { "epoch": 49.108, "grad_norm": 1.1725491285324097, "learning_rate": 2e-05, "loss": 0.05246655, "step": 24554 }, { "epoch": 49.11, "grad_norm": 1.0159834623336792, "learning_rate": 2e-05, "loss": 0.04580885, "step": 24555 }, { "epoch": 49.112, "grad_norm": 1.1978039741516113, "learning_rate": 2e-05, "loss": 0.05257396, "step": 24556 }, { "epoch": 49.114, "grad_norm": 1.2739366292953491, "learning_rate": 2e-05, "loss": 0.06457013, "step": 24557 }, { "epoch": 49.116, "grad_norm": 1.2462983131408691, "learning_rate": 2e-05, "loss": 0.05257029, "step": 24558 }, { "epoch": 49.118, "grad_norm": 1.0246502161026, "learning_rate": 2e-05, "loss": 0.03672457, "step": 24559 }, { "epoch": 49.12, "grad_norm": 1.0243715047836304, "learning_rate": 2e-05, "loss": 0.03848898, "step": 24560 }, { "epoch": 49.122, "grad_norm": 1.0890052318572998, "learning_rate": 2e-05, "loss": 0.04123619, "step": 24561 }, { "epoch": 49.124, "grad_norm": 1.0428330898284912, "learning_rate": 2e-05, "loss": 0.04471182, "step": 24562 }, { "epoch": 49.126, "grad_norm": 1.534914255142212, "learning_rate": 2e-05, "loss": 0.03503163, "step": 24563 }, { "epoch": 49.128, "grad_norm": 3.263657808303833, "learning_rate": 2e-05, "loss": 0.06536023, "step": 24564 }, { "epoch": 49.13, "grad_norm": 0.8561310768127441, "learning_rate": 2e-05, "loss": 0.02339423, "step": 24565 }, { "epoch": 49.132, "grad_norm": 1.2668267488479614, "learning_rate": 2e-05, "loss": 0.05148039, "step": 24566 }, { "epoch": 49.134, "grad_norm": 1.4245041608810425, "learning_rate": 2e-05, "loss": 0.04260141, "step": 24567 }, { "epoch": 49.136, "grad_norm": 1.2553406953811646, "learning_rate": 2e-05, "loss": 0.05330418, "step": 24568 }, { "epoch": 49.138, "grad_norm": 1.5819698572158813, "learning_rate": 2e-05, "loss": 0.0418847, "step": 24569 }, { "epoch": 49.14, "grad_norm": 1.157503604888916, "learning_rate": 2e-05, "loss": 0.0456868, "step": 24570 }, { "epoch": 49.142, "grad_norm": 1.1057533025741577, "learning_rate": 2e-05, "loss": 0.04578763, "step": 24571 }, { "epoch": 49.144, "grad_norm": 1.3727549314498901, "learning_rate": 2e-05, "loss": 0.06042334, "step": 24572 }, { "epoch": 49.146, "grad_norm": 1.2736660242080688, "learning_rate": 2e-05, "loss": 0.05840679, "step": 24573 }, { "epoch": 49.148, "grad_norm": 1.6220810413360596, "learning_rate": 2e-05, "loss": 0.04685524, "step": 24574 }, { "epoch": 49.15, "grad_norm": 1.0651113986968994, "learning_rate": 2e-05, "loss": 0.0451685, "step": 24575 }, { "epoch": 49.152, "grad_norm": 1.3879947662353516, "learning_rate": 2e-05, "loss": 0.05266923, "step": 24576 }, { "epoch": 49.154, "grad_norm": 1.164844036102295, "learning_rate": 2e-05, "loss": 0.04246272, "step": 24577 }, { "epoch": 49.156, "grad_norm": 1.1619504690170288, "learning_rate": 2e-05, "loss": 0.04659127, "step": 24578 }, { "epoch": 49.158, "grad_norm": 1.3078287839889526, "learning_rate": 2e-05, "loss": 0.04308105, "step": 24579 }, { "epoch": 49.16, "grad_norm": 1.6431167125701904, "learning_rate": 2e-05, "loss": 0.04275879, "step": 24580 }, { "epoch": 49.162, "grad_norm": 0.956802248954773, "learning_rate": 2e-05, "loss": 0.03798681, "step": 24581 }, { "epoch": 49.164, "grad_norm": 1.2058719396591187, "learning_rate": 2e-05, "loss": 0.04575798, "step": 24582 }, { "epoch": 49.166, "grad_norm": 0.9033730626106262, "learning_rate": 2e-05, "loss": 0.03232187, "step": 24583 }, { "epoch": 49.168, "grad_norm": 1.3325138092041016, "learning_rate": 2e-05, "loss": 0.03885297, "step": 24584 }, { "epoch": 49.17, "grad_norm": 1.1684311628341675, "learning_rate": 2e-05, "loss": 0.04936481, "step": 24585 }, { "epoch": 49.172, "grad_norm": 0.9386561512947083, "learning_rate": 2e-05, "loss": 0.02889916, "step": 24586 }, { "epoch": 49.174, "grad_norm": 1.115565538406372, "learning_rate": 2e-05, "loss": 0.04214649, "step": 24587 }, { "epoch": 49.176, "grad_norm": 1.8959245681762695, "learning_rate": 2e-05, "loss": 0.05413285, "step": 24588 }, { "epoch": 49.178, "grad_norm": 1.2707425355911255, "learning_rate": 2e-05, "loss": 0.04919063, "step": 24589 }, { "epoch": 49.18, "grad_norm": 1.1527551412582397, "learning_rate": 2e-05, "loss": 0.04787771, "step": 24590 }, { "epoch": 49.182, "grad_norm": 1.5040851831436157, "learning_rate": 2e-05, "loss": 0.0654645, "step": 24591 }, { "epoch": 49.184, "grad_norm": 1.2260459661483765, "learning_rate": 2e-05, "loss": 0.04879155, "step": 24592 }, { "epoch": 49.186, "grad_norm": 1.2306815385818481, "learning_rate": 2e-05, "loss": 0.03491394, "step": 24593 }, { "epoch": 49.188, "grad_norm": 1.220257043838501, "learning_rate": 2e-05, "loss": 0.04896056, "step": 24594 }, { "epoch": 49.19, "grad_norm": 1.178658366203308, "learning_rate": 2e-05, "loss": 0.03527681, "step": 24595 }, { "epoch": 49.192, "grad_norm": 1.795593023300171, "learning_rate": 2e-05, "loss": 0.07905695, "step": 24596 }, { "epoch": 49.194, "grad_norm": 1.1190385818481445, "learning_rate": 2e-05, "loss": 0.04868895, "step": 24597 }, { "epoch": 49.196, "grad_norm": 1.0901833772659302, "learning_rate": 2e-05, "loss": 0.04650778, "step": 24598 }, { "epoch": 49.198, "grad_norm": 1.1784275770187378, "learning_rate": 2e-05, "loss": 0.03697899, "step": 24599 }, { "epoch": 49.2, "grad_norm": 0.9963816404342651, "learning_rate": 2e-05, "loss": 0.04271566, "step": 24600 }, { "epoch": 49.202, "grad_norm": 1.3513808250427246, "learning_rate": 2e-05, "loss": 0.03827177, "step": 24601 }, { "epoch": 49.204, "grad_norm": 1.297082781791687, "learning_rate": 2e-05, "loss": 0.05176802, "step": 24602 }, { "epoch": 49.206, "grad_norm": 1.3902502059936523, "learning_rate": 2e-05, "loss": 0.04333564, "step": 24603 }, { "epoch": 49.208, "grad_norm": 1.5116182565689087, "learning_rate": 2e-05, "loss": 0.06067524, "step": 24604 }, { "epoch": 49.21, "grad_norm": 1.116183876991272, "learning_rate": 2e-05, "loss": 0.04102014, "step": 24605 }, { "epoch": 49.212, "grad_norm": 1.3426848649978638, "learning_rate": 2e-05, "loss": 0.06735122, "step": 24606 }, { "epoch": 49.214, "grad_norm": 1.37204110622406, "learning_rate": 2e-05, "loss": 0.04117677, "step": 24607 }, { "epoch": 49.216, "grad_norm": 1.1000261306762695, "learning_rate": 2e-05, "loss": 0.03981378, "step": 24608 }, { "epoch": 49.218, "grad_norm": 0.9247246980667114, "learning_rate": 2e-05, "loss": 0.02853769, "step": 24609 }, { "epoch": 49.22, "grad_norm": 1.3698744773864746, "learning_rate": 2e-05, "loss": 0.05477355, "step": 24610 }, { "epoch": 49.222, "grad_norm": 7.3837504386901855, "learning_rate": 2e-05, "loss": 0.06142946, "step": 24611 }, { "epoch": 49.224, "grad_norm": 1.7267507314682007, "learning_rate": 2e-05, "loss": 0.05547143, "step": 24612 }, { "epoch": 49.226, "grad_norm": 1.1767046451568604, "learning_rate": 2e-05, "loss": 0.05019828, "step": 24613 }, { "epoch": 49.228, "grad_norm": 1.5185580253601074, "learning_rate": 2e-05, "loss": 0.05122478, "step": 24614 }, { "epoch": 49.23, "grad_norm": 1.286940097808838, "learning_rate": 2e-05, "loss": 0.05315614, "step": 24615 }, { "epoch": 49.232, "grad_norm": 1.2825911045074463, "learning_rate": 2e-05, "loss": 0.04709144, "step": 24616 }, { "epoch": 49.234, "grad_norm": 1.1364567279815674, "learning_rate": 2e-05, "loss": 0.06106904, "step": 24617 }, { "epoch": 49.236, "grad_norm": 1.2616055011749268, "learning_rate": 2e-05, "loss": 0.05195578, "step": 24618 }, { "epoch": 49.238, "grad_norm": 1.2682206630706787, "learning_rate": 2e-05, "loss": 0.0589853, "step": 24619 }, { "epoch": 49.24, "grad_norm": 1.1738789081573486, "learning_rate": 2e-05, "loss": 0.04862665, "step": 24620 }, { "epoch": 49.242, "grad_norm": 1.5461301803588867, "learning_rate": 2e-05, "loss": 0.05598284, "step": 24621 }, { "epoch": 49.244, "grad_norm": 1.336150050163269, "learning_rate": 2e-05, "loss": 0.05503397, "step": 24622 }, { "epoch": 49.246, "grad_norm": 1.2071928977966309, "learning_rate": 2e-05, "loss": 0.05489289, "step": 24623 }, { "epoch": 49.248, "grad_norm": 1.186057448387146, "learning_rate": 2e-05, "loss": 0.04869892, "step": 24624 }, { "epoch": 49.25, "grad_norm": 1.1905598640441895, "learning_rate": 2e-05, "loss": 0.04225285, "step": 24625 }, { "epoch": 49.252, "grad_norm": 1.1007153987884521, "learning_rate": 2e-05, "loss": 0.05079801, "step": 24626 }, { "epoch": 49.254, "grad_norm": 1.4382884502410889, "learning_rate": 2e-05, "loss": 0.03811382, "step": 24627 }, { "epoch": 49.256, "grad_norm": 1.2123656272888184, "learning_rate": 2e-05, "loss": 0.05187578, "step": 24628 }, { "epoch": 49.258, "grad_norm": 1.222488284111023, "learning_rate": 2e-05, "loss": 0.05206077, "step": 24629 }, { "epoch": 49.26, "grad_norm": 1.4503012895584106, "learning_rate": 2e-05, "loss": 0.04863203, "step": 24630 }, { "epoch": 49.262, "grad_norm": 1.278603196144104, "learning_rate": 2e-05, "loss": 0.05332947, "step": 24631 }, { "epoch": 49.264, "grad_norm": 1.2204537391662598, "learning_rate": 2e-05, "loss": 0.0511545, "step": 24632 }, { "epoch": 49.266, "grad_norm": 1.0226802825927734, "learning_rate": 2e-05, "loss": 0.0365659, "step": 24633 }, { "epoch": 49.268, "grad_norm": 0.9728658199310303, "learning_rate": 2e-05, "loss": 0.03656091, "step": 24634 }, { "epoch": 49.27, "grad_norm": 1.2572722434997559, "learning_rate": 2e-05, "loss": 0.05211511, "step": 24635 }, { "epoch": 49.272, "grad_norm": 1.153205394744873, "learning_rate": 2e-05, "loss": 0.06159973, "step": 24636 }, { "epoch": 49.274, "grad_norm": 1.1002930402755737, "learning_rate": 2e-05, "loss": 0.04466134, "step": 24637 }, { "epoch": 49.276, "grad_norm": 1.2222155332565308, "learning_rate": 2e-05, "loss": 0.05036249, "step": 24638 }, { "epoch": 49.278, "grad_norm": 3.585354804992676, "learning_rate": 2e-05, "loss": 0.03442348, "step": 24639 }, { "epoch": 49.28, "grad_norm": 1.2959743738174438, "learning_rate": 2e-05, "loss": 0.05108596, "step": 24640 }, { "epoch": 49.282, "grad_norm": 1.3540737628936768, "learning_rate": 2e-05, "loss": 0.05969388, "step": 24641 }, { "epoch": 49.284, "grad_norm": 1.276310920715332, "learning_rate": 2e-05, "loss": 0.05358229, "step": 24642 }, { "epoch": 49.286, "grad_norm": 1.1716383695602417, "learning_rate": 2e-05, "loss": 0.03722314, "step": 24643 }, { "epoch": 49.288, "grad_norm": 1.0765808820724487, "learning_rate": 2e-05, "loss": 0.03878718, "step": 24644 }, { "epoch": 49.29, "grad_norm": 1.1529293060302734, "learning_rate": 2e-05, "loss": 0.05101361, "step": 24645 }, { "epoch": 49.292, "grad_norm": 1.209089994430542, "learning_rate": 2e-05, "loss": 0.03981159, "step": 24646 }, { "epoch": 49.294, "grad_norm": 1.1552914381027222, "learning_rate": 2e-05, "loss": 0.03878843, "step": 24647 }, { "epoch": 49.296, "grad_norm": 3.3828413486480713, "learning_rate": 2e-05, "loss": 0.06451204, "step": 24648 }, { "epoch": 49.298, "grad_norm": 1.4841753244400024, "learning_rate": 2e-05, "loss": 0.04134528, "step": 24649 }, { "epoch": 49.3, "grad_norm": 1.1813883781433105, "learning_rate": 2e-05, "loss": 0.04356892, "step": 24650 }, { "epoch": 49.302, "grad_norm": 1.3516215085983276, "learning_rate": 2e-05, "loss": 0.06003821, "step": 24651 }, { "epoch": 49.304, "grad_norm": 1.0883997678756714, "learning_rate": 2e-05, "loss": 0.04874385, "step": 24652 }, { "epoch": 49.306, "grad_norm": 1.3258439302444458, "learning_rate": 2e-05, "loss": 0.06162676, "step": 24653 }, { "epoch": 49.308, "grad_norm": 0.9306918382644653, "learning_rate": 2e-05, "loss": 0.04051726, "step": 24654 }, { "epoch": 49.31, "grad_norm": 1.3069220781326294, "learning_rate": 2e-05, "loss": 0.06446021, "step": 24655 }, { "epoch": 49.312, "grad_norm": 1.4380950927734375, "learning_rate": 2e-05, "loss": 0.06534431, "step": 24656 }, { "epoch": 49.314, "grad_norm": 1.293108582496643, "learning_rate": 2e-05, "loss": 0.05122873, "step": 24657 }, { "epoch": 49.316, "grad_norm": 1.5020869970321655, "learning_rate": 2e-05, "loss": 0.04365094, "step": 24658 }, { "epoch": 49.318, "grad_norm": 1.0571807622909546, "learning_rate": 2e-05, "loss": 0.04433767, "step": 24659 }, { "epoch": 49.32, "grad_norm": 1.6660947799682617, "learning_rate": 2e-05, "loss": 0.05386006, "step": 24660 }, { "epoch": 49.322, "grad_norm": 1.2687824964523315, "learning_rate": 2e-05, "loss": 0.05124846, "step": 24661 }, { "epoch": 49.324, "grad_norm": 1.3088194131851196, "learning_rate": 2e-05, "loss": 0.05232558, "step": 24662 }, { "epoch": 49.326, "grad_norm": 1.0739314556121826, "learning_rate": 2e-05, "loss": 0.04608125, "step": 24663 }, { "epoch": 49.328, "grad_norm": 1.3387324810028076, "learning_rate": 2e-05, "loss": 0.0492764, "step": 24664 }, { "epoch": 49.33, "grad_norm": 1.2844595909118652, "learning_rate": 2e-05, "loss": 0.04417478, "step": 24665 }, { "epoch": 49.332, "grad_norm": 1.1230336427688599, "learning_rate": 2e-05, "loss": 0.04758064, "step": 24666 }, { "epoch": 49.334, "grad_norm": 5.419571876525879, "learning_rate": 2e-05, "loss": 0.04511326, "step": 24667 }, { "epoch": 49.336, "grad_norm": 1.1522164344787598, "learning_rate": 2e-05, "loss": 0.04697948, "step": 24668 }, { "epoch": 49.338, "grad_norm": 1.43909752368927, "learning_rate": 2e-05, "loss": 0.05124324, "step": 24669 }, { "epoch": 49.34, "grad_norm": 1.290650486946106, "learning_rate": 2e-05, "loss": 0.03891211, "step": 24670 }, { "epoch": 49.342, "grad_norm": 1.0777543783187866, "learning_rate": 2e-05, "loss": 0.03897686, "step": 24671 }, { "epoch": 49.344, "grad_norm": 1.2240030765533447, "learning_rate": 2e-05, "loss": 0.04273064, "step": 24672 }, { "epoch": 49.346, "grad_norm": 1.2230967283248901, "learning_rate": 2e-05, "loss": 0.05533133, "step": 24673 }, { "epoch": 49.348, "grad_norm": 1.7442550659179688, "learning_rate": 2e-05, "loss": 0.04444986, "step": 24674 }, { "epoch": 49.35, "grad_norm": 1.193528413772583, "learning_rate": 2e-05, "loss": 0.02971295, "step": 24675 }, { "epoch": 49.352, "grad_norm": 1.548940896987915, "learning_rate": 2e-05, "loss": 0.05257022, "step": 24676 }, { "epoch": 49.354, "grad_norm": 1.4281851053237915, "learning_rate": 2e-05, "loss": 0.05808748, "step": 24677 }, { "epoch": 49.356, "grad_norm": 1.1668903827667236, "learning_rate": 2e-05, "loss": 0.039745, "step": 24678 }, { "epoch": 49.358, "grad_norm": 2.4779348373413086, "learning_rate": 2e-05, "loss": 0.04287686, "step": 24679 }, { "epoch": 49.36, "grad_norm": 1.0857728719711304, "learning_rate": 2e-05, "loss": 0.03546745, "step": 24680 }, { "epoch": 49.362, "grad_norm": 1.758777141571045, "learning_rate": 2e-05, "loss": 0.03802776, "step": 24681 }, { "epoch": 49.364, "grad_norm": 0.9436732530593872, "learning_rate": 2e-05, "loss": 0.03220242, "step": 24682 }, { "epoch": 49.366, "grad_norm": 1.0815670490264893, "learning_rate": 2e-05, "loss": 0.04280725, "step": 24683 }, { "epoch": 49.368, "grad_norm": 1.114959955215454, "learning_rate": 2e-05, "loss": 0.04519267, "step": 24684 }, { "epoch": 49.37, "grad_norm": 1.2002875804901123, "learning_rate": 2e-05, "loss": 0.04737946, "step": 24685 }, { "epoch": 49.372, "grad_norm": 1.381123661994934, "learning_rate": 2e-05, "loss": 0.0593003, "step": 24686 }, { "epoch": 49.374, "grad_norm": 2.0842089653015137, "learning_rate": 2e-05, "loss": 0.0516322, "step": 24687 }, { "epoch": 49.376, "grad_norm": 1.0045135021209717, "learning_rate": 2e-05, "loss": 0.03573417, "step": 24688 }, { "epoch": 49.378, "grad_norm": 1.3091294765472412, "learning_rate": 2e-05, "loss": 0.04659965, "step": 24689 }, { "epoch": 49.38, "grad_norm": 1.3777577877044678, "learning_rate": 2e-05, "loss": 0.0343422, "step": 24690 }, { "epoch": 49.382, "grad_norm": 1.5019209384918213, "learning_rate": 2e-05, "loss": 0.05598423, "step": 24691 }, { "epoch": 49.384, "grad_norm": 1.168333888053894, "learning_rate": 2e-05, "loss": 0.04606739, "step": 24692 }, { "epoch": 49.386, "grad_norm": 1.3140724897384644, "learning_rate": 2e-05, "loss": 0.05491744, "step": 24693 }, { "epoch": 49.388, "grad_norm": 1.1507341861724854, "learning_rate": 2e-05, "loss": 0.04055898, "step": 24694 }, { "epoch": 49.39, "grad_norm": 1.2338950634002686, "learning_rate": 2e-05, "loss": 0.04385046, "step": 24695 }, { "epoch": 49.392, "grad_norm": 1.2507073879241943, "learning_rate": 2e-05, "loss": 0.06624331, "step": 24696 }, { "epoch": 49.394, "grad_norm": 1.172641634941101, "learning_rate": 2e-05, "loss": 0.05282669, "step": 24697 }, { "epoch": 49.396, "grad_norm": 1.290877103805542, "learning_rate": 2e-05, "loss": 0.0552482, "step": 24698 }, { "epoch": 49.398, "grad_norm": 1.3046849966049194, "learning_rate": 2e-05, "loss": 0.05484886, "step": 24699 }, { "epoch": 49.4, "grad_norm": 1.0632675886154175, "learning_rate": 2e-05, "loss": 0.04052684, "step": 24700 }, { "epoch": 49.402, "grad_norm": 1.0683989524841309, "learning_rate": 2e-05, "loss": 0.04582777, "step": 24701 }, { "epoch": 49.404, "grad_norm": 1.6077747344970703, "learning_rate": 2e-05, "loss": 0.04787633, "step": 24702 }, { "epoch": 49.406, "grad_norm": 1.207194209098816, "learning_rate": 2e-05, "loss": 0.05000635, "step": 24703 }, { "epoch": 49.408, "grad_norm": 1.1203255653381348, "learning_rate": 2e-05, "loss": 0.05599294, "step": 24704 }, { "epoch": 49.41, "grad_norm": 1.4439189434051514, "learning_rate": 2e-05, "loss": 0.04717576, "step": 24705 }, { "epoch": 49.412, "grad_norm": 1.1799260377883911, "learning_rate": 2e-05, "loss": 0.04708055, "step": 24706 }, { "epoch": 49.414, "grad_norm": 1.2697405815124512, "learning_rate": 2e-05, "loss": 0.05073357, "step": 24707 }, { "epoch": 49.416, "grad_norm": 1.1728367805480957, "learning_rate": 2e-05, "loss": 0.05580144, "step": 24708 }, { "epoch": 49.418, "grad_norm": 1.9259074926376343, "learning_rate": 2e-05, "loss": 0.04573488, "step": 24709 }, { "epoch": 49.42, "grad_norm": 1.285468578338623, "learning_rate": 2e-05, "loss": 0.06362525, "step": 24710 }, { "epoch": 49.422, "grad_norm": 1.0827348232269287, "learning_rate": 2e-05, "loss": 0.0446627, "step": 24711 }, { "epoch": 49.424, "grad_norm": 1.354174256324768, "learning_rate": 2e-05, "loss": 0.06585463, "step": 24712 }, { "epoch": 49.426, "grad_norm": 1.2439528703689575, "learning_rate": 2e-05, "loss": 0.04843699, "step": 24713 }, { "epoch": 49.428, "grad_norm": 1.024624228477478, "learning_rate": 2e-05, "loss": 0.04217855, "step": 24714 }, { "epoch": 49.43, "grad_norm": 1.0700979232788086, "learning_rate": 2e-05, "loss": 0.04749613, "step": 24715 }, { "epoch": 49.432, "grad_norm": 1.496504306793213, "learning_rate": 2e-05, "loss": 0.0489388, "step": 24716 }, { "epoch": 49.434, "grad_norm": 1.1693992614746094, "learning_rate": 2e-05, "loss": 0.04739952, "step": 24717 }, { "epoch": 49.436, "grad_norm": 1.1603397130966187, "learning_rate": 2e-05, "loss": 0.04808056, "step": 24718 }, { "epoch": 49.438, "grad_norm": 1.1455094814300537, "learning_rate": 2e-05, "loss": 0.05102035, "step": 24719 }, { "epoch": 49.44, "grad_norm": 1.1179344654083252, "learning_rate": 2e-05, "loss": 0.04601201, "step": 24720 }, { "epoch": 49.442, "grad_norm": 1.0473073720932007, "learning_rate": 2e-05, "loss": 0.04930402, "step": 24721 }, { "epoch": 49.444, "grad_norm": 1.236930251121521, "learning_rate": 2e-05, "loss": 0.04765532, "step": 24722 }, { "epoch": 49.446, "grad_norm": 1.2558659315109253, "learning_rate": 2e-05, "loss": 0.04867148, "step": 24723 }, { "epoch": 49.448, "grad_norm": 1.1845966577529907, "learning_rate": 2e-05, "loss": 0.04930171, "step": 24724 }, { "epoch": 49.45, "grad_norm": 1.112603783607483, "learning_rate": 2e-05, "loss": 0.04704773, "step": 24725 }, { "epoch": 49.452, "grad_norm": 1.188025712966919, "learning_rate": 2e-05, "loss": 0.04383368, "step": 24726 }, { "epoch": 49.454, "grad_norm": 1.1775012016296387, "learning_rate": 2e-05, "loss": 0.04882312, "step": 24727 }, { "epoch": 49.456, "grad_norm": 1.2298845052719116, "learning_rate": 2e-05, "loss": 0.04555701, "step": 24728 }, { "epoch": 49.458, "grad_norm": 1.373660683631897, "learning_rate": 2e-05, "loss": 0.05526736, "step": 24729 }, { "epoch": 49.46, "grad_norm": 1.3296241760253906, "learning_rate": 2e-05, "loss": 0.06211912, "step": 24730 }, { "epoch": 49.462, "grad_norm": 1.1016606092453003, "learning_rate": 2e-05, "loss": 0.05170915, "step": 24731 }, { "epoch": 49.464, "grad_norm": 1.3367371559143066, "learning_rate": 2e-05, "loss": 0.04138402, "step": 24732 }, { "epoch": 49.466, "grad_norm": 3.0442302227020264, "learning_rate": 2e-05, "loss": 0.07401902, "step": 24733 }, { "epoch": 49.468, "grad_norm": 1.304378867149353, "learning_rate": 2e-05, "loss": 0.04841861, "step": 24734 }, { "epoch": 49.47, "grad_norm": 1.211232304573059, "learning_rate": 2e-05, "loss": 0.04923613, "step": 24735 }, { "epoch": 49.472, "grad_norm": 1.0207058191299438, "learning_rate": 2e-05, "loss": 0.03922756, "step": 24736 }, { "epoch": 49.474, "grad_norm": 1.7041534185409546, "learning_rate": 2e-05, "loss": 0.04910336, "step": 24737 }, { "epoch": 49.476, "grad_norm": 1.1709401607513428, "learning_rate": 2e-05, "loss": 0.04106373, "step": 24738 }, { "epoch": 49.478, "grad_norm": 2.144960880279541, "learning_rate": 2e-05, "loss": 0.05597088, "step": 24739 }, { "epoch": 49.48, "grad_norm": 1.052988052368164, "learning_rate": 2e-05, "loss": 0.04293213, "step": 24740 }, { "epoch": 49.482, "grad_norm": 1.4180502891540527, "learning_rate": 2e-05, "loss": 0.04159679, "step": 24741 }, { "epoch": 49.484, "grad_norm": 1.2406322956085205, "learning_rate": 2e-05, "loss": 0.05517141, "step": 24742 }, { "epoch": 49.486, "grad_norm": 1.1686590909957886, "learning_rate": 2e-05, "loss": 0.04570251, "step": 24743 }, { "epoch": 49.488, "grad_norm": 1.2575324773788452, "learning_rate": 2e-05, "loss": 0.03278139, "step": 24744 }, { "epoch": 49.49, "grad_norm": 1.0804585218429565, "learning_rate": 2e-05, "loss": 0.03636895, "step": 24745 }, { "epoch": 49.492, "grad_norm": 1.3195343017578125, "learning_rate": 2e-05, "loss": 0.05228486, "step": 24746 }, { "epoch": 49.494, "grad_norm": 1.1343023777008057, "learning_rate": 2e-05, "loss": 0.04752772, "step": 24747 }, { "epoch": 49.496, "grad_norm": 1.1542292833328247, "learning_rate": 2e-05, "loss": 0.04346114, "step": 24748 }, { "epoch": 49.498, "grad_norm": 1.0267248153686523, "learning_rate": 2e-05, "loss": 0.04148855, "step": 24749 }, { "epoch": 49.5, "grad_norm": 1.197594404220581, "learning_rate": 2e-05, "loss": 0.05338103, "step": 24750 }, { "epoch": 49.502, "grad_norm": 1.3752979040145874, "learning_rate": 2e-05, "loss": 0.05251706, "step": 24751 }, { "epoch": 49.504, "grad_norm": 2.483018636703491, "learning_rate": 2e-05, "loss": 0.05616256, "step": 24752 }, { "epoch": 49.506, "grad_norm": 0.9340147972106934, "learning_rate": 2e-05, "loss": 0.0262505, "step": 24753 }, { "epoch": 49.508, "grad_norm": 1.1258265972137451, "learning_rate": 2e-05, "loss": 0.03799858, "step": 24754 }, { "epoch": 49.51, "grad_norm": 1.6418697834014893, "learning_rate": 2e-05, "loss": 0.06251703, "step": 24755 }, { "epoch": 49.512, "grad_norm": 1.4351956844329834, "learning_rate": 2e-05, "loss": 0.04956281, "step": 24756 }, { "epoch": 49.514, "grad_norm": 1.3971424102783203, "learning_rate": 2e-05, "loss": 0.07184118, "step": 24757 }, { "epoch": 49.516, "grad_norm": 1.242491602897644, "learning_rate": 2e-05, "loss": 0.04963819, "step": 24758 }, { "epoch": 49.518, "grad_norm": 1.8168903589248657, "learning_rate": 2e-05, "loss": 0.05463743, "step": 24759 }, { "epoch": 49.52, "grad_norm": 1.0832699537277222, "learning_rate": 2e-05, "loss": 0.04424177, "step": 24760 }, { "epoch": 49.522, "grad_norm": 0.9689600467681885, "learning_rate": 2e-05, "loss": 0.04252536, "step": 24761 }, { "epoch": 49.524, "grad_norm": 1.111344337463379, "learning_rate": 2e-05, "loss": 0.04760269, "step": 24762 }, { "epoch": 49.526, "grad_norm": 1.120718240737915, "learning_rate": 2e-05, "loss": 0.04254458, "step": 24763 }, { "epoch": 49.528, "grad_norm": 1.1055079698562622, "learning_rate": 2e-05, "loss": 0.04112123, "step": 24764 }, { "epoch": 49.53, "grad_norm": 1.8217605352401733, "learning_rate": 2e-05, "loss": 0.05603817, "step": 24765 }, { "epoch": 49.532, "grad_norm": 1.008670449256897, "learning_rate": 2e-05, "loss": 0.03235183, "step": 24766 }, { "epoch": 49.534, "grad_norm": 1.1165664196014404, "learning_rate": 2e-05, "loss": 0.0455579, "step": 24767 }, { "epoch": 49.536, "grad_norm": 1.0880547761917114, "learning_rate": 2e-05, "loss": 0.04338773, "step": 24768 }, { "epoch": 49.538, "grad_norm": 0.9217278361320496, "learning_rate": 2e-05, "loss": 0.03157655, "step": 24769 }, { "epoch": 49.54, "grad_norm": 1.095906376838684, "learning_rate": 2e-05, "loss": 0.04428188, "step": 24770 }, { "epoch": 49.542, "grad_norm": 1.5616658926010132, "learning_rate": 2e-05, "loss": 0.03784732, "step": 24771 }, { "epoch": 49.544, "grad_norm": 1.0943976640701294, "learning_rate": 2e-05, "loss": 0.04282872, "step": 24772 }, { "epoch": 49.546, "grad_norm": 1.0541517734527588, "learning_rate": 2e-05, "loss": 0.04491699, "step": 24773 }, { "epoch": 49.548, "grad_norm": 3.1224377155303955, "learning_rate": 2e-05, "loss": 0.04894248, "step": 24774 }, { "epoch": 49.55, "grad_norm": 1.100438117980957, "learning_rate": 2e-05, "loss": 0.04886558, "step": 24775 }, { "epoch": 49.552, "grad_norm": 1.7008203268051147, "learning_rate": 2e-05, "loss": 0.03542287, "step": 24776 }, { "epoch": 49.554, "grad_norm": 1.2163397073745728, "learning_rate": 2e-05, "loss": 0.05112882, "step": 24777 }, { "epoch": 49.556, "grad_norm": 1.178572177886963, "learning_rate": 2e-05, "loss": 0.05226102, "step": 24778 }, { "epoch": 49.558, "grad_norm": 1.07505202293396, "learning_rate": 2e-05, "loss": 0.04829258, "step": 24779 }, { "epoch": 49.56, "grad_norm": 1.545498013496399, "learning_rate": 2e-05, "loss": 0.05802028, "step": 24780 }, { "epoch": 49.562, "grad_norm": 1.0516809225082397, "learning_rate": 2e-05, "loss": 0.03485567, "step": 24781 }, { "epoch": 49.564, "grad_norm": 1.0159010887145996, "learning_rate": 2e-05, "loss": 0.04023987, "step": 24782 }, { "epoch": 49.566, "grad_norm": 1.1692757606506348, "learning_rate": 2e-05, "loss": 0.04836097, "step": 24783 }, { "epoch": 49.568, "grad_norm": 1.2164714336395264, "learning_rate": 2e-05, "loss": 0.05162116, "step": 24784 }, { "epoch": 49.57, "grad_norm": 1.0095146894454956, "learning_rate": 2e-05, "loss": 0.04396236, "step": 24785 }, { "epoch": 49.572, "grad_norm": 1.3631678819656372, "learning_rate": 2e-05, "loss": 0.05843265, "step": 24786 }, { "epoch": 49.574, "grad_norm": 1.3029148578643799, "learning_rate": 2e-05, "loss": 0.04724909, "step": 24787 }, { "epoch": 49.576, "grad_norm": 1.3042387962341309, "learning_rate": 2e-05, "loss": 0.04229445, "step": 24788 }, { "epoch": 49.578, "grad_norm": 1.0347017049789429, "learning_rate": 2e-05, "loss": 0.03812275, "step": 24789 }, { "epoch": 49.58, "grad_norm": 1.7310672998428345, "learning_rate": 2e-05, "loss": 0.06088685, "step": 24790 }, { "epoch": 49.582, "grad_norm": 0.9168398976325989, "learning_rate": 2e-05, "loss": 0.02849315, "step": 24791 }, { "epoch": 49.584, "grad_norm": 1.1964490413665771, "learning_rate": 2e-05, "loss": 0.0535724, "step": 24792 }, { "epoch": 49.586, "grad_norm": 1.1546590328216553, "learning_rate": 2e-05, "loss": 0.03581116, "step": 24793 }, { "epoch": 49.588, "grad_norm": 1.0855379104614258, "learning_rate": 2e-05, "loss": 0.05509954, "step": 24794 }, { "epoch": 49.59, "grad_norm": 1.2587318420410156, "learning_rate": 2e-05, "loss": 0.04235309, "step": 24795 }, { "epoch": 49.592, "grad_norm": 1.2183408737182617, "learning_rate": 2e-05, "loss": 0.05718917, "step": 24796 }, { "epoch": 49.594, "grad_norm": 1.1804263591766357, "learning_rate": 2e-05, "loss": 0.04849926, "step": 24797 }, { "epoch": 49.596, "grad_norm": 1.4819769859313965, "learning_rate": 2e-05, "loss": 0.05709495, "step": 24798 }, { "epoch": 49.598, "grad_norm": 1.1413755416870117, "learning_rate": 2e-05, "loss": 0.0554666, "step": 24799 }, { "epoch": 49.6, "grad_norm": 1.1274505853652954, "learning_rate": 2e-05, "loss": 0.04003352, "step": 24800 }, { "epoch": 49.602, "grad_norm": 1.2199150323867798, "learning_rate": 2e-05, "loss": 0.04663592, "step": 24801 }, { "epoch": 49.604, "grad_norm": 1.1105495691299438, "learning_rate": 2e-05, "loss": 0.04194749, "step": 24802 }, { "epoch": 49.606, "grad_norm": 1.1821727752685547, "learning_rate": 2e-05, "loss": 0.05207427, "step": 24803 }, { "epoch": 49.608, "grad_norm": 1.0401041507720947, "learning_rate": 2e-05, "loss": 0.04428156, "step": 24804 }, { "epoch": 49.61, "grad_norm": 1.840406060218811, "learning_rate": 2e-05, "loss": 0.05603269, "step": 24805 }, { "epoch": 49.612, "grad_norm": 1.3533574342727661, "learning_rate": 2e-05, "loss": 0.03703146, "step": 24806 }, { "epoch": 49.614, "grad_norm": 1.0640056133270264, "learning_rate": 2e-05, "loss": 0.03546765, "step": 24807 }, { "epoch": 49.616, "grad_norm": 1.206377625465393, "learning_rate": 2e-05, "loss": 0.05672641, "step": 24808 }, { "epoch": 49.618, "grad_norm": 1.0968784093856812, "learning_rate": 2e-05, "loss": 0.03934178, "step": 24809 }, { "epoch": 49.62, "grad_norm": 1.4784859418869019, "learning_rate": 2e-05, "loss": 0.05061619, "step": 24810 }, { "epoch": 49.622, "grad_norm": 1.2458291053771973, "learning_rate": 2e-05, "loss": 0.04952991, "step": 24811 }, { "epoch": 49.624, "grad_norm": 0.917413055896759, "learning_rate": 2e-05, "loss": 0.03567467, "step": 24812 }, { "epoch": 49.626, "grad_norm": 1.093457818031311, "learning_rate": 2e-05, "loss": 0.04676176, "step": 24813 }, { "epoch": 49.628, "grad_norm": 1.19669771194458, "learning_rate": 2e-05, "loss": 0.05353126, "step": 24814 }, { "epoch": 49.63, "grad_norm": 1.258165955543518, "learning_rate": 2e-05, "loss": 0.05881431, "step": 24815 }, { "epoch": 49.632, "grad_norm": 1.8352237939834595, "learning_rate": 2e-05, "loss": 0.03418227, "step": 24816 }, { "epoch": 49.634, "grad_norm": 1.2149909734725952, "learning_rate": 2e-05, "loss": 0.0405385, "step": 24817 }, { "epoch": 49.636, "grad_norm": 1.1387282609939575, "learning_rate": 2e-05, "loss": 0.04924862, "step": 24818 }, { "epoch": 49.638, "grad_norm": 1.0884685516357422, "learning_rate": 2e-05, "loss": 0.03455062, "step": 24819 }, { "epoch": 49.64, "grad_norm": 1.1216976642608643, "learning_rate": 2e-05, "loss": 0.04718848, "step": 24820 }, { "epoch": 49.642, "grad_norm": 1.1525574922561646, "learning_rate": 2e-05, "loss": 0.0507764, "step": 24821 }, { "epoch": 49.644, "grad_norm": 1.040356993675232, "learning_rate": 2e-05, "loss": 0.03835782, "step": 24822 }, { "epoch": 49.646, "grad_norm": 1.6863218545913696, "learning_rate": 2e-05, "loss": 0.05974036, "step": 24823 }, { "epoch": 49.648, "grad_norm": 1.5963078737258911, "learning_rate": 2e-05, "loss": 0.05577124, "step": 24824 }, { "epoch": 49.65, "grad_norm": 1.9898076057434082, "learning_rate": 2e-05, "loss": 0.04162847, "step": 24825 }, { "epoch": 49.652, "grad_norm": 1.7517437934875488, "learning_rate": 2e-05, "loss": 0.04477061, "step": 24826 }, { "epoch": 49.654, "grad_norm": 1.0563920736312866, "learning_rate": 2e-05, "loss": 0.04491702, "step": 24827 }, { "epoch": 49.656, "grad_norm": 1.1382147073745728, "learning_rate": 2e-05, "loss": 0.04465196, "step": 24828 }, { "epoch": 49.658, "grad_norm": 1.1406420469284058, "learning_rate": 2e-05, "loss": 0.04676701, "step": 24829 }, { "epoch": 49.66, "grad_norm": 0.9442988038063049, "learning_rate": 2e-05, "loss": 0.03124127, "step": 24830 }, { "epoch": 49.662, "grad_norm": 1.63547682762146, "learning_rate": 2e-05, "loss": 0.05501399, "step": 24831 }, { "epoch": 49.664, "grad_norm": 0.9990745782852173, "learning_rate": 2e-05, "loss": 0.03285347, "step": 24832 }, { "epoch": 49.666, "grad_norm": 1.0436017513275146, "learning_rate": 2e-05, "loss": 0.0376052, "step": 24833 }, { "epoch": 49.668, "grad_norm": 1.4094048738479614, "learning_rate": 2e-05, "loss": 0.05660111, "step": 24834 }, { "epoch": 49.67, "grad_norm": 1.181092381477356, "learning_rate": 2e-05, "loss": 0.04279668, "step": 24835 }, { "epoch": 49.672, "grad_norm": 1.2665365934371948, "learning_rate": 2e-05, "loss": 0.05205832, "step": 24836 }, { "epoch": 49.674, "grad_norm": 1.061452031135559, "learning_rate": 2e-05, "loss": 0.03486584, "step": 24837 }, { "epoch": 49.676, "grad_norm": 1.312247395515442, "learning_rate": 2e-05, "loss": 0.05554381, "step": 24838 }, { "epoch": 49.678, "grad_norm": 1.2573567628860474, "learning_rate": 2e-05, "loss": 0.05710577, "step": 24839 }, { "epoch": 49.68, "grad_norm": 1.2652338743209839, "learning_rate": 2e-05, "loss": 0.04693561, "step": 24840 }, { "epoch": 49.682, "grad_norm": 1.076812505722046, "learning_rate": 2e-05, "loss": 0.04891666, "step": 24841 }, { "epoch": 49.684, "grad_norm": 1.0280793905258179, "learning_rate": 2e-05, "loss": 0.03525931, "step": 24842 }, { "epoch": 49.686, "grad_norm": 1.2222983837127686, "learning_rate": 2e-05, "loss": 0.03659509, "step": 24843 }, { "epoch": 49.688, "grad_norm": 1.1637095212936401, "learning_rate": 2e-05, "loss": 0.04860913, "step": 24844 }, { "epoch": 49.69, "grad_norm": 1.232571005821228, "learning_rate": 2e-05, "loss": 0.04756819, "step": 24845 }, { "epoch": 49.692, "grad_norm": 1.7061697244644165, "learning_rate": 2e-05, "loss": 0.05409347, "step": 24846 }, { "epoch": 49.694, "grad_norm": 3.033412218093872, "learning_rate": 2e-05, "loss": 0.05069696, "step": 24847 }, { "epoch": 49.696, "grad_norm": 1.3082060813903809, "learning_rate": 2e-05, "loss": 0.06048907, "step": 24848 }, { "epoch": 49.698, "grad_norm": 1.3506269454956055, "learning_rate": 2e-05, "loss": 0.04115216, "step": 24849 }, { "epoch": 49.7, "grad_norm": 1.5505048036575317, "learning_rate": 2e-05, "loss": 0.04728535, "step": 24850 }, { "epoch": 49.702, "grad_norm": 1.154044270515442, "learning_rate": 2e-05, "loss": 0.03792886, "step": 24851 }, { "epoch": 49.704, "grad_norm": 1.337306261062622, "learning_rate": 2e-05, "loss": 0.0570382, "step": 24852 }, { "epoch": 49.706, "grad_norm": 1.206878900527954, "learning_rate": 2e-05, "loss": 0.05738255, "step": 24853 }, { "epoch": 49.708, "grad_norm": 1.0860217809677124, "learning_rate": 2e-05, "loss": 0.04733476, "step": 24854 }, { "epoch": 49.71, "grad_norm": 1.1992672681808472, "learning_rate": 2e-05, "loss": 0.05342748, "step": 24855 }, { "epoch": 49.712, "grad_norm": 1.2483594417572021, "learning_rate": 2e-05, "loss": 0.03885884, "step": 24856 }, { "epoch": 49.714, "grad_norm": 1.1189866065979004, "learning_rate": 2e-05, "loss": 0.03883381, "step": 24857 }, { "epoch": 49.716, "grad_norm": 3.194491386413574, "learning_rate": 2e-05, "loss": 0.05132719, "step": 24858 }, { "epoch": 49.718, "grad_norm": 2.8470568656921387, "learning_rate": 2e-05, "loss": 0.04896916, "step": 24859 }, { "epoch": 49.72, "grad_norm": 1.2007850408554077, "learning_rate": 2e-05, "loss": 0.04390725, "step": 24860 }, { "epoch": 49.722, "grad_norm": 1.5928409099578857, "learning_rate": 2e-05, "loss": 0.06557049, "step": 24861 }, { "epoch": 49.724, "grad_norm": 1.5397547483444214, "learning_rate": 2e-05, "loss": 0.05117305, "step": 24862 }, { "epoch": 49.726, "grad_norm": 1.1637266874313354, "learning_rate": 2e-05, "loss": 0.0384597, "step": 24863 }, { "epoch": 49.728, "grad_norm": 1.3207765817642212, "learning_rate": 2e-05, "loss": 0.04964681, "step": 24864 }, { "epoch": 49.73, "grad_norm": 1.3087096214294434, "learning_rate": 2e-05, "loss": 0.05499934, "step": 24865 }, { "epoch": 49.732, "grad_norm": 1.8449457883834839, "learning_rate": 2e-05, "loss": 0.053078, "step": 24866 }, { "epoch": 49.734, "grad_norm": 1.1697624921798706, "learning_rate": 2e-05, "loss": 0.04434161, "step": 24867 }, { "epoch": 49.736, "grad_norm": 1.2733197212219238, "learning_rate": 2e-05, "loss": 0.05949679, "step": 24868 }, { "epoch": 49.738, "grad_norm": 1.6024494171142578, "learning_rate": 2e-05, "loss": 0.05078848, "step": 24869 }, { "epoch": 49.74, "grad_norm": 1.3443810939788818, "learning_rate": 2e-05, "loss": 0.04602218, "step": 24870 }, { "epoch": 49.742, "grad_norm": 1.3979158401489258, "learning_rate": 2e-05, "loss": 0.05351568, "step": 24871 }, { "epoch": 49.744, "grad_norm": 1.2029260396957397, "learning_rate": 2e-05, "loss": 0.0432038, "step": 24872 }, { "epoch": 49.746, "grad_norm": 1.2258926630020142, "learning_rate": 2e-05, "loss": 0.05626082, "step": 24873 }, { "epoch": 49.748, "grad_norm": 1.232932686805725, "learning_rate": 2e-05, "loss": 0.05177718, "step": 24874 }, { "epoch": 49.75, "grad_norm": 1.1512670516967773, "learning_rate": 2e-05, "loss": 0.04957498, "step": 24875 }, { "epoch": 49.752, "grad_norm": 1.1977880001068115, "learning_rate": 2e-05, "loss": 0.04884469, "step": 24876 }, { "epoch": 49.754, "grad_norm": 1.363495111465454, "learning_rate": 2e-05, "loss": 0.04705457, "step": 24877 }, { "epoch": 49.756, "grad_norm": 1.2528479099273682, "learning_rate": 2e-05, "loss": 0.04862735, "step": 24878 }, { "epoch": 49.758, "grad_norm": 2.5792670249938965, "learning_rate": 2e-05, "loss": 0.0410705, "step": 24879 }, { "epoch": 49.76, "grad_norm": 2.7361626625061035, "learning_rate": 2e-05, "loss": 0.05311979, "step": 24880 }, { "epoch": 49.762, "grad_norm": 2.464141845703125, "learning_rate": 2e-05, "loss": 0.05702599, "step": 24881 }, { "epoch": 49.764, "grad_norm": 1.099612832069397, "learning_rate": 2e-05, "loss": 0.03931236, "step": 24882 }, { "epoch": 49.766, "grad_norm": 1.0781171321868896, "learning_rate": 2e-05, "loss": 0.04257463, "step": 24883 }, { "epoch": 49.768, "grad_norm": 4.444637775421143, "learning_rate": 2e-05, "loss": 0.05754374, "step": 24884 }, { "epoch": 49.77, "grad_norm": 1.3171765804290771, "learning_rate": 2e-05, "loss": 0.05533032, "step": 24885 }, { "epoch": 49.772, "grad_norm": 1.1796399354934692, "learning_rate": 2e-05, "loss": 0.03754626, "step": 24886 }, { "epoch": 49.774, "grad_norm": 1.154632806777954, "learning_rate": 2e-05, "loss": 0.04902484, "step": 24887 }, { "epoch": 49.776, "grad_norm": 1.1982251405715942, "learning_rate": 2e-05, "loss": 0.04432965, "step": 24888 }, { "epoch": 49.778, "grad_norm": 1.4120286703109741, "learning_rate": 2e-05, "loss": 0.04318996, "step": 24889 }, { "epoch": 49.78, "grad_norm": 1.3181231021881104, "learning_rate": 2e-05, "loss": 0.04140037, "step": 24890 }, { "epoch": 49.782, "grad_norm": 1.1879209280014038, "learning_rate": 2e-05, "loss": 0.04180463, "step": 24891 }, { "epoch": 49.784, "grad_norm": 1.2485787868499756, "learning_rate": 2e-05, "loss": 0.06122434, "step": 24892 }, { "epoch": 49.786, "grad_norm": 1.3617092370986938, "learning_rate": 2e-05, "loss": 0.04306863, "step": 24893 }, { "epoch": 49.788, "grad_norm": 1.1197565793991089, "learning_rate": 2e-05, "loss": 0.04167994, "step": 24894 }, { "epoch": 49.79, "grad_norm": 1.2155925035476685, "learning_rate": 2e-05, "loss": 0.05426242, "step": 24895 }, { "epoch": 49.792, "grad_norm": 1.309443712234497, "learning_rate": 2e-05, "loss": 0.05209014, "step": 24896 }, { "epoch": 49.794, "grad_norm": 2.3679568767547607, "learning_rate": 2e-05, "loss": 0.0490745, "step": 24897 }, { "epoch": 49.796, "grad_norm": 1.1244512796401978, "learning_rate": 2e-05, "loss": 0.04849419, "step": 24898 }, { "epoch": 49.798, "grad_norm": 1.0218008756637573, "learning_rate": 2e-05, "loss": 0.04120092, "step": 24899 }, { "epoch": 49.8, "grad_norm": 1.2458765506744385, "learning_rate": 2e-05, "loss": 0.05435371, "step": 24900 }, { "epoch": 49.802, "grad_norm": 1.1713382005691528, "learning_rate": 2e-05, "loss": 0.04432591, "step": 24901 }, { "epoch": 49.804, "grad_norm": 1.043931007385254, "learning_rate": 2e-05, "loss": 0.03500161, "step": 24902 }, { "epoch": 49.806, "grad_norm": 1.1524564027786255, "learning_rate": 2e-05, "loss": 0.05594937, "step": 24903 }, { "epoch": 49.808, "grad_norm": 1.2194099426269531, "learning_rate": 2e-05, "loss": 0.06552596, "step": 24904 }, { "epoch": 49.81, "grad_norm": 1.5245249271392822, "learning_rate": 2e-05, "loss": 0.04009125, "step": 24905 }, { "epoch": 49.812, "grad_norm": 1.0253095626831055, "learning_rate": 2e-05, "loss": 0.04843939, "step": 24906 }, { "epoch": 49.814, "grad_norm": 1.9079041481018066, "learning_rate": 2e-05, "loss": 0.05018196, "step": 24907 }, { "epoch": 49.816, "grad_norm": 1.1070141792297363, "learning_rate": 2e-05, "loss": 0.04317448, "step": 24908 }, { "epoch": 49.818, "grad_norm": 1.1777336597442627, "learning_rate": 2e-05, "loss": 0.05528118, "step": 24909 }, { "epoch": 49.82, "grad_norm": 1.162487268447876, "learning_rate": 2e-05, "loss": 0.04349917, "step": 24910 }, { "epoch": 49.822, "grad_norm": 1.0818195343017578, "learning_rate": 2e-05, "loss": 0.03660588, "step": 24911 }, { "epoch": 49.824, "grad_norm": 1.178435206413269, "learning_rate": 2e-05, "loss": 0.04270165, "step": 24912 }, { "epoch": 49.826, "grad_norm": 1.0772615671157837, "learning_rate": 2e-05, "loss": 0.0409967, "step": 24913 }, { "epoch": 49.828, "grad_norm": 1.5055285692214966, "learning_rate": 2e-05, "loss": 0.0552909, "step": 24914 }, { "epoch": 49.83, "grad_norm": 1.929326057434082, "learning_rate": 2e-05, "loss": 0.05568858, "step": 24915 }, { "epoch": 49.832, "grad_norm": 1.478667974472046, "learning_rate": 2e-05, "loss": 0.06444258, "step": 24916 }, { "epoch": 49.834, "grad_norm": 2.3012428283691406, "learning_rate": 2e-05, "loss": 0.06190757, "step": 24917 }, { "epoch": 49.836, "grad_norm": 1.908669114112854, "learning_rate": 2e-05, "loss": 0.07899611, "step": 24918 }, { "epoch": 49.838, "grad_norm": 1.1932661533355713, "learning_rate": 2e-05, "loss": 0.05439082, "step": 24919 }, { "epoch": 49.84, "grad_norm": 1.1718835830688477, "learning_rate": 2e-05, "loss": 0.04695781, "step": 24920 }, { "epoch": 49.842, "grad_norm": 1.2461529970169067, "learning_rate": 2e-05, "loss": 0.06060567, "step": 24921 }, { "epoch": 49.844, "grad_norm": 1.0214687585830688, "learning_rate": 2e-05, "loss": 0.03788566, "step": 24922 }, { "epoch": 49.846, "grad_norm": 1.10343599319458, "learning_rate": 2e-05, "loss": 0.05159609, "step": 24923 }, { "epoch": 49.848, "grad_norm": 1.017777919769287, "learning_rate": 2e-05, "loss": 0.0479437, "step": 24924 }, { "epoch": 49.85, "grad_norm": 1.4957668781280518, "learning_rate": 2e-05, "loss": 0.04876685, "step": 24925 }, { "epoch": 49.852, "grad_norm": 1.3842114210128784, "learning_rate": 2e-05, "loss": 0.05079174, "step": 24926 }, { "epoch": 49.854, "grad_norm": 1.3459488153457642, "learning_rate": 2e-05, "loss": 0.05015388, "step": 24927 }, { "epoch": 49.856, "grad_norm": 1.1398237943649292, "learning_rate": 2e-05, "loss": 0.04445317, "step": 24928 }, { "epoch": 49.858, "grad_norm": 1.344321370124817, "learning_rate": 2e-05, "loss": 0.05805134, "step": 24929 }, { "epoch": 49.86, "grad_norm": 1.3307359218597412, "learning_rate": 2e-05, "loss": 0.04095314, "step": 24930 }, { "epoch": 49.862, "grad_norm": 1.3429630994796753, "learning_rate": 2e-05, "loss": 0.04923171, "step": 24931 }, { "epoch": 49.864, "grad_norm": 1.1111401319503784, "learning_rate": 2e-05, "loss": 0.05133491, "step": 24932 }, { "epoch": 49.866, "grad_norm": 1.182901382446289, "learning_rate": 2e-05, "loss": 0.05458375, "step": 24933 }, { "epoch": 49.868, "grad_norm": 0.9391345977783203, "learning_rate": 2e-05, "loss": 0.03023691, "step": 24934 }, { "epoch": 49.87, "grad_norm": 2.566356658935547, "learning_rate": 2e-05, "loss": 0.06601861, "step": 24935 }, { "epoch": 49.872, "grad_norm": 1.2246321439743042, "learning_rate": 2e-05, "loss": 0.05346267, "step": 24936 }, { "epoch": 49.874, "grad_norm": 1.1111116409301758, "learning_rate": 2e-05, "loss": 0.04999265, "step": 24937 }, { "epoch": 49.876, "grad_norm": 1.101694941520691, "learning_rate": 2e-05, "loss": 0.04546491, "step": 24938 }, { "epoch": 49.878, "grad_norm": 1.1672554016113281, "learning_rate": 2e-05, "loss": 0.0529165, "step": 24939 }, { "epoch": 49.88, "grad_norm": 1.219642996788025, "learning_rate": 2e-05, "loss": 0.03548888, "step": 24940 }, { "epoch": 49.882, "grad_norm": 1.246626853942871, "learning_rate": 2e-05, "loss": 0.05844071, "step": 24941 }, { "epoch": 49.884, "grad_norm": 1.2160290479660034, "learning_rate": 2e-05, "loss": 0.05645484, "step": 24942 }, { "epoch": 49.886, "grad_norm": 2.756503105163574, "learning_rate": 2e-05, "loss": 0.05539309, "step": 24943 }, { "epoch": 49.888, "grad_norm": 2.0938100814819336, "learning_rate": 2e-05, "loss": 0.04288248, "step": 24944 }, { "epoch": 49.89, "grad_norm": 1.1077803373336792, "learning_rate": 2e-05, "loss": 0.04419069, "step": 24945 }, { "epoch": 49.892, "grad_norm": 1.1682178974151611, "learning_rate": 2e-05, "loss": 0.04008082, "step": 24946 }, { "epoch": 49.894, "grad_norm": 1.7397586107254028, "learning_rate": 2e-05, "loss": 0.04435687, "step": 24947 }, { "epoch": 49.896, "grad_norm": 0.9368448257446289, "learning_rate": 2e-05, "loss": 0.04006282, "step": 24948 }, { "epoch": 49.898, "grad_norm": 2.1141977310180664, "learning_rate": 2e-05, "loss": 0.04719079, "step": 24949 }, { "epoch": 49.9, "grad_norm": 1.1511861085891724, "learning_rate": 2e-05, "loss": 0.03775914, "step": 24950 }, { "epoch": 49.902, "grad_norm": 1.1330093145370483, "learning_rate": 2e-05, "loss": 0.04169647, "step": 24951 }, { "epoch": 49.904, "grad_norm": 1.1492831707000732, "learning_rate": 2e-05, "loss": 0.04711498, "step": 24952 }, { "epoch": 49.906, "grad_norm": 1.6958730220794678, "learning_rate": 2e-05, "loss": 0.05036974, "step": 24953 }, { "epoch": 49.908, "grad_norm": 1.0818232297897339, "learning_rate": 2e-05, "loss": 0.0415853, "step": 24954 }, { "epoch": 49.91, "grad_norm": 1.1111513376235962, "learning_rate": 2e-05, "loss": 0.04978892, "step": 24955 }, { "epoch": 49.912, "grad_norm": 1.2296136617660522, "learning_rate": 2e-05, "loss": 0.04639769, "step": 24956 }, { "epoch": 49.914, "grad_norm": 1.226378321647644, "learning_rate": 2e-05, "loss": 0.05798329, "step": 24957 }, { "epoch": 49.916, "grad_norm": 1.0596457719802856, "learning_rate": 2e-05, "loss": 0.04839835, "step": 24958 }, { "epoch": 49.918, "grad_norm": 1.3038498163223267, "learning_rate": 2e-05, "loss": 0.05488671, "step": 24959 }, { "epoch": 49.92, "grad_norm": 1.0039633512496948, "learning_rate": 2e-05, "loss": 0.03758329, "step": 24960 }, { "epoch": 49.922, "grad_norm": 1.0808930397033691, "learning_rate": 2e-05, "loss": 0.03469931, "step": 24961 }, { "epoch": 49.924, "grad_norm": 1.2433420419692993, "learning_rate": 2e-05, "loss": 0.04504021, "step": 24962 }, { "epoch": 49.926, "grad_norm": 1.254826545715332, "learning_rate": 2e-05, "loss": 0.05490943, "step": 24963 }, { "epoch": 49.928, "grad_norm": 0.9941253066062927, "learning_rate": 2e-05, "loss": 0.05116001, "step": 24964 }, { "epoch": 49.93, "grad_norm": 1.0954303741455078, "learning_rate": 2e-05, "loss": 0.0452432, "step": 24965 }, { "epoch": 49.932, "grad_norm": 1.0251585245132446, "learning_rate": 2e-05, "loss": 0.04928794, "step": 24966 }, { "epoch": 49.934, "grad_norm": 1.1478753089904785, "learning_rate": 2e-05, "loss": 0.0425126, "step": 24967 }, { "epoch": 49.936, "grad_norm": 1.222156047821045, "learning_rate": 2e-05, "loss": 0.04921298, "step": 24968 }, { "epoch": 49.938, "grad_norm": 1.1650967597961426, "learning_rate": 2e-05, "loss": 0.04439209, "step": 24969 }, { "epoch": 49.94, "grad_norm": 1.2680879831314087, "learning_rate": 2e-05, "loss": 0.05739405, "step": 24970 }, { "epoch": 49.942, "grad_norm": 1.1213183403015137, "learning_rate": 2e-05, "loss": 0.05398349, "step": 24971 }, { "epoch": 49.944, "grad_norm": 1.088508129119873, "learning_rate": 2e-05, "loss": 0.04370025, "step": 24972 }, { "epoch": 49.946, "grad_norm": 1.314470887184143, "learning_rate": 2e-05, "loss": 0.05276281, "step": 24973 }, { "epoch": 49.948, "grad_norm": 1.5439000129699707, "learning_rate": 2e-05, "loss": 0.04817521, "step": 24974 }, { "epoch": 49.95, "grad_norm": 1.5129501819610596, "learning_rate": 2e-05, "loss": 0.05268683, "step": 24975 }, { "epoch": 49.952, "grad_norm": 1.2225157022476196, "learning_rate": 2e-05, "loss": 0.05818931, "step": 24976 }, { "epoch": 49.954, "grad_norm": 2.1078433990478516, "learning_rate": 2e-05, "loss": 0.05812836, "step": 24977 }, { "epoch": 49.956, "grad_norm": 1.2115638256072998, "learning_rate": 2e-05, "loss": 0.04945941, "step": 24978 }, { "epoch": 49.958, "grad_norm": 1.2102251052856445, "learning_rate": 2e-05, "loss": 0.04883428, "step": 24979 }, { "epoch": 49.96, "grad_norm": 1.0502766370773315, "learning_rate": 2e-05, "loss": 0.03550196, "step": 24980 }, { "epoch": 49.962, "grad_norm": 1.2148598432540894, "learning_rate": 2e-05, "loss": 0.048013, "step": 24981 }, { "epoch": 49.964, "grad_norm": 1.189860224723816, "learning_rate": 2e-05, "loss": 0.06181513, "step": 24982 }, { "epoch": 49.966, "grad_norm": 1.195906639099121, "learning_rate": 2e-05, "loss": 0.04937382, "step": 24983 }, { "epoch": 49.968, "grad_norm": 3.3317697048187256, "learning_rate": 2e-05, "loss": 0.05652332, "step": 24984 }, { "epoch": 49.97, "grad_norm": 0.9888531565666199, "learning_rate": 2e-05, "loss": 0.04144951, "step": 24985 }, { "epoch": 49.972, "grad_norm": 1.15919828414917, "learning_rate": 2e-05, "loss": 0.04281298, "step": 24986 }, { "epoch": 49.974, "grad_norm": 1.744892954826355, "learning_rate": 2e-05, "loss": 0.04002345, "step": 24987 }, { "epoch": 49.976, "grad_norm": 1.2224351167678833, "learning_rate": 2e-05, "loss": 0.04044309, "step": 24988 }, { "epoch": 49.978, "grad_norm": 1.1010823249816895, "learning_rate": 2e-05, "loss": 0.04040832, "step": 24989 }, { "epoch": 49.98, "grad_norm": 1.0111849308013916, "learning_rate": 2e-05, "loss": 0.03537663, "step": 24990 }, { "epoch": 49.982, "grad_norm": 1.0763754844665527, "learning_rate": 2e-05, "loss": 0.0413045, "step": 24991 }, { "epoch": 49.984, "grad_norm": 1.0886006355285645, "learning_rate": 2e-05, "loss": 0.05110417, "step": 24992 }, { "epoch": 49.986, "grad_norm": 1.0088766813278198, "learning_rate": 2e-05, "loss": 0.03378244, "step": 24993 }, { "epoch": 49.988, "grad_norm": 1.1878137588500977, "learning_rate": 2e-05, "loss": 0.05160052, "step": 24994 }, { "epoch": 49.99, "grad_norm": 1.188334584236145, "learning_rate": 2e-05, "loss": 0.03924968, "step": 24995 }, { "epoch": 49.992, "grad_norm": 1.1772879362106323, "learning_rate": 2e-05, "loss": 0.05064984, "step": 24996 }, { "epoch": 49.994, "grad_norm": 0.7895515561103821, "learning_rate": 2e-05, "loss": 0.02828157, "step": 24997 }, { "epoch": 49.996, "grad_norm": 0.9504234790802002, "learning_rate": 2e-05, "loss": 0.02684129, "step": 24998 }, { "epoch": 49.998, "grad_norm": 1.4684773683547974, "learning_rate": 2e-05, "loss": 0.05880807, "step": 24999 }, { "epoch": 50.0, "grad_norm": 1.0322144031524658, "learning_rate": 2e-05, "loss": 0.03352503, "step": 25000 }, { "epoch": 50.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9840319361277445, "Equal_1": 1.0, "Equal_2": 0.9840319361277445, "Equal_3": 0.9940119760479041, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.996, "Perpendicular_3": 0.9078156312625251, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 1.0, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9880239520958084 }, "eval_runtime": 321.2431, "eval_samples_per_second": 32.686, "eval_steps_per_second": 0.654, "step": 25000 }, { "epoch": 50.0, "step": 25000, "total_flos": 9.912786476979978e+17, "train_loss": 0.05224685758773238, "train_runtime": 92925.7559, "train_samples_per_second": 17.218, "train_steps_per_second": 0.269 } ], "logging_steps": 1, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.912786476979978e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }