{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 100, "global_step": 2630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.7020912646068926, "learning_rate": 2.531645569620253e-06, "loss": 4.131, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.711257017230532, "learning_rate": 5.063291139240506e-06, "loss": 4.3352, "step": 2 }, { "epoch": 0.01, "grad_norm": 4.204808135014829, "learning_rate": 7.5949367088607605e-06, "loss": 4.2239, "step": 3 }, { "epoch": 0.01, "grad_norm": 4.434566825013616, "learning_rate": 1.0126582278481012e-05, "loss": 4.3129, "step": 4 }, { "epoch": 0.01, "grad_norm": 3.6885819077043704, "learning_rate": 1.2658227848101267e-05, "loss": 4.1114, "step": 5 }, { "epoch": 0.01, "grad_norm": 3.0688481470287092, "learning_rate": 1.5189873417721521e-05, "loss": 3.9076, "step": 6 }, { "epoch": 0.01, "grad_norm": 3.1125018255964707, "learning_rate": 1.7721518987341772e-05, "loss": 4.0988, "step": 7 }, { "epoch": 0.02, "grad_norm": 2.8562898632514764, "learning_rate": 2.0253164556962025e-05, "loss": 3.6883, "step": 8 }, { "epoch": 0.02, "grad_norm": 2.675271603209553, "learning_rate": 2.278481012658228e-05, "loss": 3.5593, "step": 9 }, { "epoch": 0.02, "grad_norm": 2.3421094900998516, "learning_rate": 2.5316455696202533e-05, "loss": 3.3601, "step": 10 }, { "epoch": 0.02, "grad_norm": 2.0715165975113154, "learning_rate": 2.7848101265822786e-05, "loss": 3.1437, "step": 11 }, { "epoch": 0.02, "grad_norm": 1.7317337818341625, "learning_rate": 3.0379746835443042e-05, "loss": 2.9834, "step": 12 }, { "epoch": 0.02, "grad_norm": 1.3528047563991092, "learning_rate": 3.291139240506329e-05, "loss": 2.8964, "step": 13 }, { "epoch": 0.03, "grad_norm": 1.2912860501125218, "learning_rate": 3.5443037974683544e-05, "loss": 2.8228, "step": 14 }, { "epoch": 0.03, "grad_norm": 1.5476770100119945, "learning_rate": 3.79746835443038e-05, "loss": 2.8982, "step": 15 }, { "epoch": 0.03, "grad_norm": 1.8731912110220108, "learning_rate": 4.050632911392405e-05, "loss": 2.6205, "step": 16 }, { "epoch": 0.03, "grad_norm": 1.927460308855779, "learning_rate": 4.3037974683544305e-05, "loss": 2.8007, "step": 17 }, { "epoch": 0.03, "grad_norm": 2.485736011372154, "learning_rate": 4.556962025316456e-05, "loss": 2.6022, "step": 18 }, { "epoch": 0.04, "grad_norm": 1.3829298580284985, "learning_rate": 4.810126582278481e-05, "loss": 2.6883, "step": 19 }, { "epoch": 0.04, "grad_norm": 1.511355611852234, "learning_rate": 5.0632911392405066e-05, "loss": 2.6065, "step": 20 }, { "epoch": 0.04, "grad_norm": 1.3682306812253513, "learning_rate": 5.3164556962025316e-05, "loss": 2.4445, "step": 21 }, { "epoch": 0.04, "grad_norm": 1.0778966755383976, "learning_rate": 5.569620253164557e-05, "loss": 2.4926, "step": 22 }, { "epoch": 0.04, "grad_norm": 0.954389778115518, "learning_rate": 5.822784810126583e-05, "loss": 2.4405, "step": 23 }, { "epoch": 0.05, "grad_norm": 0.8725081430596546, "learning_rate": 6.0759493670886084e-05, "loss": 2.3653, "step": 24 }, { "epoch": 0.05, "grad_norm": 0.846332603595458, "learning_rate": 6.329113924050633e-05, "loss": 2.4129, "step": 25 }, { "epoch": 0.05, "grad_norm": 1.0104753547546788, "learning_rate": 6.582278481012658e-05, "loss": 2.4878, "step": 26 }, { "epoch": 0.05, "grad_norm": 0.793457068361266, "learning_rate": 6.835443037974683e-05, "loss": 2.397, "step": 27 }, { "epoch": 0.05, "grad_norm": 0.7558673214901267, "learning_rate": 7.088607594936709e-05, "loss": 2.3936, "step": 28 }, { "epoch": 0.06, "grad_norm": 0.6929143146296144, "learning_rate": 7.341772151898734e-05, "loss": 2.1862, "step": 29 }, { "epoch": 0.06, "grad_norm": 0.8817924098651356, "learning_rate": 7.59493670886076e-05, "loss": 2.2683, "step": 30 }, { "epoch": 0.06, "grad_norm": 0.8365858066866906, "learning_rate": 7.848101265822784e-05, "loss": 2.2765, "step": 31 }, { "epoch": 0.06, "grad_norm": 0.7636104520858601, "learning_rate": 8.10126582278481e-05, "loss": 2.3457, "step": 32 }, { "epoch": 0.06, "grad_norm": 0.8046227576870789, "learning_rate": 8.354430379746835e-05, "loss": 2.245, "step": 33 }, { "epoch": 0.06, "grad_norm": 0.7444961152372495, "learning_rate": 8.607594936708861e-05, "loss": 2.1621, "step": 34 }, { "epoch": 0.07, "grad_norm": 0.8517870010137579, "learning_rate": 8.860759493670887e-05, "loss": 2.3147, "step": 35 }, { "epoch": 0.07, "grad_norm": 0.7277022148936985, "learning_rate": 9.113924050632912e-05, "loss": 2.1934, "step": 36 }, { "epoch": 0.07, "grad_norm": 0.7921223459103484, "learning_rate": 9.367088607594936e-05, "loss": 2.3078, "step": 37 }, { "epoch": 0.07, "grad_norm": 0.812128937567918, "learning_rate": 9.620253164556962e-05, "loss": 2.0993, "step": 38 }, { "epoch": 0.07, "grad_norm": 0.9107680221281336, "learning_rate": 9.873417721518988e-05, "loss": 2.2832, "step": 39 }, { "epoch": 0.08, "grad_norm": 0.7853226762475329, "learning_rate": 0.00010126582278481013, "loss": 2.1324, "step": 40 }, { "epoch": 0.08, "grad_norm": 0.8130606787909572, "learning_rate": 0.00010379746835443039, "loss": 2.1732, "step": 41 }, { "epoch": 0.08, "grad_norm": 0.7670251505409779, "learning_rate": 0.00010632911392405063, "loss": 2.1297, "step": 42 }, { "epoch": 0.08, "grad_norm": 0.8616023351470287, "learning_rate": 0.00010886075949367089, "loss": 2.1162, "step": 43 }, { "epoch": 0.08, "grad_norm": 0.9935878026746121, "learning_rate": 0.00011139240506329114, "loss": 2.1604, "step": 44 }, { "epoch": 0.09, "grad_norm": 0.886693613306828, "learning_rate": 0.0001139240506329114, "loss": 2.1091, "step": 45 }, { "epoch": 0.09, "grad_norm": 0.8408380978133146, "learning_rate": 0.00011645569620253166, "loss": 2.0425, "step": 46 }, { "epoch": 0.09, "grad_norm": 0.8571487189687654, "learning_rate": 0.0001189873417721519, "loss": 2.0622, "step": 47 }, { "epoch": 0.09, "grad_norm": 0.9973321906766347, "learning_rate": 0.00012151898734177217, "loss": 2.0819, "step": 48 }, { "epoch": 0.09, "grad_norm": 0.9846250035064288, "learning_rate": 0.0001240506329113924, "loss": 2.0929, "step": 49 }, { "epoch": 0.1, "grad_norm": 1.04173939910535, "learning_rate": 0.00012658227848101267, "loss": 2.0688, "step": 50 }, { "epoch": 0.1, "grad_norm": 0.9547280899094779, "learning_rate": 0.00012911392405063292, "loss": 2.0072, "step": 51 }, { "epoch": 0.1, "grad_norm": 0.9946952866867602, "learning_rate": 0.00013164556962025315, "loss": 2.1007, "step": 52 }, { "epoch": 0.1, "grad_norm": 0.9787024522145511, "learning_rate": 0.00013417721518987343, "loss": 1.969, "step": 53 }, { "epoch": 0.1, "grad_norm": 0.9718678580909227, "learning_rate": 0.00013670886075949366, "loss": 2.0974, "step": 54 }, { "epoch": 0.1, "grad_norm": 0.9486314460596359, "learning_rate": 0.00013924050632911395, "loss": 1.8316, "step": 55 }, { "epoch": 0.11, "grad_norm": 0.9995281876322577, "learning_rate": 0.00014177215189873418, "loss": 2.0594, "step": 56 }, { "epoch": 0.11, "grad_norm": 1.0484791315961572, "learning_rate": 0.00014430379746835443, "loss": 2.003, "step": 57 }, { "epoch": 0.11, "grad_norm": 1.0875822215793156, "learning_rate": 0.0001468354430379747, "loss": 1.9331, "step": 58 }, { "epoch": 0.11, "grad_norm": 0.922758435892472, "learning_rate": 0.00014936708860759494, "loss": 1.8499, "step": 59 }, { "epoch": 0.11, "grad_norm": 1.015314492700278, "learning_rate": 0.0001518987341772152, "loss": 1.9175, "step": 60 }, { "epoch": 0.12, "grad_norm": 1.0703309031061434, "learning_rate": 0.00015443037974683546, "loss": 1.9089, "step": 61 }, { "epoch": 0.12, "grad_norm": 1.0958164318572725, "learning_rate": 0.00015696202531645568, "loss": 1.9026, "step": 62 }, { "epoch": 0.12, "grad_norm": 0.9855413144987873, "learning_rate": 0.00015949367088607597, "loss": 1.8312, "step": 63 }, { "epoch": 0.12, "grad_norm": 1.1444556846755751, "learning_rate": 0.0001620253164556962, "loss": 1.9843, "step": 64 }, { "epoch": 0.12, "grad_norm": 0.9790128009286685, "learning_rate": 0.00016455696202531648, "loss": 1.8093, "step": 65 }, { "epoch": 0.13, "grad_norm": 1.0025542117372797, "learning_rate": 0.0001670886075949367, "loss": 1.7911, "step": 66 }, { "epoch": 0.13, "grad_norm": 1.0562288774910464, "learning_rate": 0.00016962025316455696, "loss": 1.9584, "step": 67 }, { "epoch": 0.13, "grad_norm": 0.9969777151281523, "learning_rate": 0.00017215189873417722, "loss": 1.7665, "step": 68 }, { "epoch": 0.13, "grad_norm": 0.9870403456691881, "learning_rate": 0.00017468354430379748, "loss": 1.7838, "step": 69 }, { "epoch": 0.13, "grad_norm": 0.9874240640381884, "learning_rate": 0.00017721518987341773, "loss": 1.7562, "step": 70 }, { "epoch": 0.13, "grad_norm": 1.1228689372459228, "learning_rate": 0.000179746835443038, "loss": 1.6236, "step": 71 }, { "epoch": 0.14, "grad_norm": 1.153320982860836, "learning_rate": 0.00018227848101265824, "loss": 1.7328, "step": 72 }, { "epoch": 0.14, "grad_norm": 1.0998936869559794, "learning_rate": 0.0001848101265822785, "loss": 1.6254, "step": 73 }, { "epoch": 0.14, "grad_norm": 1.1449366286838898, "learning_rate": 0.00018734177215189873, "loss": 1.8079, "step": 74 }, { "epoch": 0.14, "grad_norm": 1.0835142066515404, "learning_rate": 0.00018987341772151899, "loss": 1.7, "step": 75 }, { "epoch": 0.14, "grad_norm": 1.0002494378852271, "learning_rate": 0.00019240506329113924, "loss": 1.6878, "step": 76 }, { "epoch": 0.15, "grad_norm": 0.9667622794417475, "learning_rate": 0.0001949367088607595, "loss": 1.7383, "step": 77 }, { "epoch": 0.15, "grad_norm": 0.9443362501967373, "learning_rate": 0.00019746835443037975, "loss": 1.5258, "step": 78 }, { "epoch": 0.15, "grad_norm": 0.9398820819297641, "learning_rate": 0.0002, "loss": 1.6559, "step": 79 }, { "epoch": 0.15, "grad_norm": 0.9911072971369427, "learning_rate": 0.00019999992416865176, "loss": 1.5496, "step": 80 }, { "epoch": 0.15, "grad_norm": 1.2373753866098949, "learning_rate": 0.00019999969667472203, "loss": 1.7454, "step": 81 }, { "epoch": 0.16, "grad_norm": 1.1387046300594492, "learning_rate": 0.00019999931751855582, "loss": 1.6163, "step": 82 }, { "epoch": 0.16, "grad_norm": 1.118437419786219, "learning_rate": 0.0001999987867007282, "loss": 1.7214, "step": 83 }, { "epoch": 0.16, "grad_norm": 1.0124220953403187, "learning_rate": 0.00019999810422204422, "loss": 1.5843, "step": 84 }, { "epoch": 0.16, "grad_norm": 1.0311468167149382, "learning_rate": 0.0001999972700835389, "loss": 1.6748, "step": 85 }, { "epoch": 0.16, "grad_norm": 1.0413281872154954, "learning_rate": 0.00019999628428647736, "loss": 1.6224, "step": 86 }, { "epoch": 0.17, "grad_norm": 1.0815320804877413, "learning_rate": 0.0001999951468323547, "loss": 1.5469, "step": 87 }, { "epoch": 0.17, "grad_norm": 1.115100039311681, "learning_rate": 0.00019999385772289597, "loss": 1.5185, "step": 88 }, { "epoch": 0.17, "grad_norm": 1.1781802596819995, "learning_rate": 0.00019999241696005632, "loss": 1.5238, "step": 89 }, { "epoch": 0.17, "grad_norm": 1.1631076512702936, "learning_rate": 0.00019999082454602078, "loss": 1.617, "step": 90 }, { "epoch": 0.17, "grad_norm": 0.9937687399639329, "learning_rate": 0.0001999890804832045, "loss": 1.5079, "step": 91 }, { "epoch": 0.17, "grad_norm": 1.067930385983506, "learning_rate": 0.00019998718477425256, "loss": 1.5052, "step": 92 }, { "epoch": 0.18, "grad_norm": 0.9873344482859154, "learning_rate": 0.00019998513742204005, "loss": 1.4452, "step": 93 }, { "epoch": 0.18, "grad_norm": 0.9699373623728758, "learning_rate": 0.000199982938429672, "loss": 1.4277, "step": 94 }, { "epoch": 0.18, "grad_norm": 1.1035885788025745, "learning_rate": 0.00019998058780048352, "loss": 1.4932, "step": 95 }, { "epoch": 0.18, "grad_norm": 1.2697025994460067, "learning_rate": 0.0001999780855380396, "loss": 1.4149, "step": 96 }, { "epoch": 0.18, "grad_norm": 1.4120511478883055, "learning_rate": 0.00019997543164613525, "loss": 1.5412, "step": 97 }, { "epoch": 0.19, "grad_norm": 1.0506838445755373, "learning_rate": 0.00019997262612879543, "loss": 1.4616, "step": 98 }, { "epoch": 0.19, "grad_norm": 0.969359771281005, "learning_rate": 0.0001999696689902751, "loss": 1.5714, "step": 99 }, { "epoch": 0.19, "grad_norm": 0.9775813815856758, "learning_rate": 0.00019996656023505907, "loss": 1.4512, "step": 100 }, { "epoch": 0.19, "eval_blimp_filtered_avg": 0.7225373134328358, "eval_blimp_filtered_std": 0.00486367711361154, "step": 100 }, { "epoch": 0.19, "eval_blimp_supplement_avg": 0.8232758620689655, "eval_blimp_supplement_std": 0.017001976219390976, "step": 100 }, { "epoch": 0.19, "eval_vqa_filtered_avg": 0.53, "eval_vqa_filtered_std": 0.0501613558046592, "step": 100 }, { "epoch": 0.19, "eval_winoground_filtered_avg": 0.68, "eval_winoground_filtered_std": 0.046882617226215034, "step": 100 }, { "epoch": 0.19, "grad_norm": 0.89807479592677, "learning_rate": 0.0001999632998678622, "loss": 1.268, "step": 101 }, { "epoch": 0.19, "grad_norm": 1.011460001506197, "learning_rate": 0.00019995988789362924, "loss": 1.4116, "step": 102 }, { "epoch": 0.2, "grad_norm": 1.228473943871322, "learning_rate": 0.0001999563243175349, "loss": 1.5549, "step": 103 }, { "epoch": 0.2, "grad_norm": 1.233085560192852, "learning_rate": 0.0001999526091449838, "loss": 1.3736, "step": 104 }, { "epoch": 0.2, "grad_norm": 1.175083026784327, "learning_rate": 0.0001999487423816104, "loss": 1.4724, "step": 105 }, { "epoch": 0.2, "grad_norm": 1.0990039379793748, "learning_rate": 0.00019994472403327924, "loss": 1.4784, "step": 106 }, { "epoch": 0.2, "grad_norm": 0.947643330168305, "learning_rate": 0.0001999405541060846, "loss": 1.4023, "step": 107 }, { "epoch": 0.21, "grad_norm": 1.0313955809717408, "learning_rate": 0.0001999362326063507, "loss": 1.4565, "step": 108 }, { "epoch": 0.21, "grad_norm": 1.0085745378952695, "learning_rate": 0.00019993175954063162, "loss": 1.2713, "step": 109 }, { "epoch": 0.21, "grad_norm": 1.0614118043181633, "learning_rate": 0.00019992713491571141, "loss": 1.345, "step": 110 }, { "epoch": 0.21, "grad_norm": 1.1199132272861083, "learning_rate": 0.00019992235873860387, "loss": 1.3219, "step": 111 }, { "epoch": 0.21, "grad_norm": 1.197206215469527, "learning_rate": 0.0001999174310165526, "loss": 1.2873, "step": 112 }, { "epoch": 0.21, "grad_norm": 1.077112620687454, "learning_rate": 0.00019991235175703125, "loss": 1.3069, "step": 113 }, { "epoch": 0.22, "grad_norm": 1.0570419172441128, "learning_rate": 0.0001999071209677431, "loss": 1.1977, "step": 114 }, { "epoch": 0.22, "grad_norm": 1.0682566151341115, "learning_rate": 0.00019990173865662124, "loss": 1.0983, "step": 115 }, { "epoch": 0.22, "grad_norm": 1.074234369979894, "learning_rate": 0.00019989620483182874, "loss": 1.3487, "step": 116 }, { "epoch": 0.22, "grad_norm": 1.1218932476685106, "learning_rate": 0.00019989051950175827, "loss": 1.3241, "step": 117 }, { "epoch": 0.22, "grad_norm": 1.0965614520144202, "learning_rate": 0.0001998846826750324, "loss": 1.2817, "step": 118 }, { "epoch": 0.23, "grad_norm": 1.0172256386341207, "learning_rate": 0.00019987869436050338, "loss": 1.1106, "step": 119 }, { "epoch": 0.23, "grad_norm": 1.029882984915272, "learning_rate": 0.00019987255456725326, "loss": 1.1558, "step": 120 }, { "epoch": 0.23, "grad_norm": 1.2012856134585497, "learning_rate": 0.00019986626330459383, "loss": 1.4219, "step": 121 }, { "epoch": 0.23, "grad_norm": 1.0978353242422414, "learning_rate": 0.0001998598205820666, "loss": 1.2545, "step": 122 }, { "epoch": 0.23, "grad_norm": 1.0903113541616258, "learning_rate": 0.00019985322640944276, "loss": 1.2472, "step": 123 }, { "epoch": 0.24, "grad_norm": 0.9904664739042491, "learning_rate": 0.0001998464807967232, "loss": 1.181, "step": 124 }, { "epoch": 0.24, "grad_norm": 1.0292233153723453, "learning_rate": 0.0001998395837541385, "loss": 1.2542, "step": 125 }, { "epoch": 0.24, "grad_norm": 1.0258782685501524, "learning_rate": 0.00019983253529214892, "loss": 1.2388, "step": 126 }, { "epoch": 0.24, "grad_norm": 1.1198270939045862, "learning_rate": 0.00019982533542144438, "loss": 1.3485, "step": 127 }, { "epoch": 0.24, "grad_norm": 1.073030068145302, "learning_rate": 0.0001998179841529443, "loss": 1.1906, "step": 128 }, { "epoch": 0.25, "grad_norm": 1.1222089659948165, "learning_rate": 0.00019981048149779785, "loss": 1.268, "step": 129 }, { "epoch": 0.25, "grad_norm": 1.083496716300647, "learning_rate": 0.00019980282746738385, "loss": 1.2744, "step": 130 }, { "epoch": 0.25, "grad_norm": 1.0584748504747377, "learning_rate": 0.00019979502207331047, "loss": 1.2033, "step": 131 }, { "epoch": 0.25, "grad_norm": 1.0545330984605028, "learning_rate": 0.0001997870653274157, "loss": 1.193, "step": 132 }, { "epoch": 0.25, "grad_norm": 1.0087526110647596, "learning_rate": 0.00019977895724176685, "loss": 0.9976, "step": 133 }, { "epoch": 0.25, "grad_norm": 1.1094268623730854, "learning_rate": 0.00019977069782866092, "loss": 1.1458, "step": 134 }, { "epoch": 0.26, "grad_norm": 1.1237531426484695, "learning_rate": 0.0001997622871006244, "loss": 1.1239, "step": 135 }, { "epoch": 0.26, "grad_norm": 1.1047963137713694, "learning_rate": 0.00019975372507041313, "loss": 1.0877, "step": 136 }, { "epoch": 0.26, "grad_norm": 1.2609625091214856, "learning_rate": 0.00019974501175101258, "loss": 1.2488, "step": 137 }, { "epoch": 0.26, "grad_norm": 1.0369383266346148, "learning_rate": 0.00019973614715563758, "loss": 1.0444, "step": 138 }, { "epoch": 0.26, "grad_norm": 1.0262020140157868, "learning_rate": 0.00019972713129773242, "loss": 1.0271, "step": 139 }, { "epoch": 0.27, "grad_norm": 0.9564501045560494, "learning_rate": 0.00019971796419097082, "loss": 1.0624, "step": 140 }, { "epoch": 0.27, "grad_norm": 1.0168884027205154, "learning_rate": 0.00019970864584925582, "loss": 1.0464, "step": 141 }, { "epoch": 0.27, "grad_norm": 0.9164401572305155, "learning_rate": 0.0001996991762867199, "loss": 0.8736, "step": 142 }, { "epoch": 0.27, "grad_norm": 1.2189325595970382, "learning_rate": 0.00019968955551772483, "loss": 1.1701, "step": 143 }, { "epoch": 0.27, "grad_norm": 1.1267118747761415, "learning_rate": 0.00019967978355686175, "loss": 0.995, "step": 144 }, { "epoch": 0.28, "grad_norm": 1.255042667528437, "learning_rate": 0.00019966986041895107, "loss": 1.121, "step": 145 }, { "epoch": 0.28, "grad_norm": 1.077114448645021, "learning_rate": 0.00019965978611904248, "loss": 1.1181, "step": 146 }, { "epoch": 0.28, "grad_norm": 0.9830895388621461, "learning_rate": 0.00019964956067241492, "loss": 0.9535, "step": 147 }, { "epoch": 0.28, "grad_norm": 0.9918814334996905, "learning_rate": 0.0001996391840945766, "loss": 0.9696, "step": 148 }, { "epoch": 0.28, "grad_norm": 1.1557017241911947, "learning_rate": 0.00019962865640126495, "loss": 1.0217, "step": 149 }, { "epoch": 0.29, "grad_norm": 1.180831049901838, "learning_rate": 0.0001996179776084465, "loss": 1.0537, "step": 150 }, { "epoch": 0.29, "grad_norm": 1.0944379372099293, "learning_rate": 0.00019960714773231702, "loss": 1.0312, "step": 151 }, { "epoch": 0.29, "grad_norm": 1.1601730132015307, "learning_rate": 0.0001995961667893014, "loss": 1.0508, "step": 152 }, { "epoch": 0.29, "grad_norm": 1.1267679737249436, "learning_rate": 0.0001995850347960536, "loss": 0.9747, "step": 153 }, { "epoch": 0.29, "grad_norm": 1.0111144529254112, "learning_rate": 0.00019957375176945675, "loss": 0.9082, "step": 154 }, { "epoch": 0.29, "grad_norm": 1.0446308913078524, "learning_rate": 0.00019956231772662292, "loss": 0.8823, "step": 155 }, { "epoch": 0.3, "grad_norm": 1.0670924544820553, "learning_rate": 0.00019955073268489336, "loss": 0.9027, "step": 156 }, { "epoch": 0.3, "grad_norm": 1.030946778548119, "learning_rate": 0.00019953899666183824, "loss": 0.8316, "step": 157 }, { "epoch": 0.3, "grad_norm": 1.4038682896189518, "learning_rate": 0.0001995271096752567, "loss": 1.0903, "step": 158 }, { "epoch": 0.3, "grad_norm": 1.247321089045751, "learning_rate": 0.0001995150717431769, "loss": 1.1219, "step": 159 }, { "epoch": 0.3, "grad_norm": 0.9697911796324739, "learning_rate": 0.00019950288288385587, "loss": 0.8121, "step": 160 }, { "epoch": 0.31, "grad_norm": 0.903724913672907, "learning_rate": 0.00019949054311577957, "loss": 0.8737, "step": 161 }, { "epoch": 0.31, "grad_norm": 0.9139348234869528, "learning_rate": 0.0001994780524576628, "loss": 0.8801, "step": 162 }, { "epoch": 0.31, "grad_norm": 1.0063223090813032, "learning_rate": 0.0001994654109284493, "loss": 0.8403, "step": 163 }, { "epoch": 0.31, "grad_norm": 0.946905077949599, "learning_rate": 0.0001994526185473115, "loss": 0.8636, "step": 164 }, { "epoch": 0.31, "grad_norm": 1.1246510656502924, "learning_rate": 0.00019943967533365061, "loss": 1.0096, "step": 165 }, { "epoch": 0.32, "grad_norm": 1.1282170902077655, "learning_rate": 0.0001994265813070968, "loss": 1.0462, "step": 166 }, { "epoch": 0.32, "grad_norm": 1.0594869241471085, "learning_rate": 0.0001994133364875087, "loss": 0.9746, "step": 167 }, { "epoch": 0.32, "grad_norm": 1.1076346975099083, "learning_rate": 0.00019939994089497384, "loss": 0.8982, "step": 168 }, { "epoch": 0.32, "grad_norm": 1.0464266186404214, "learning_rate": 0.00019938639454980826, "loss": 0.9103, "step": 169 }, { "epoch": 0.32, "grad_norm": 1.069423419075571, "learning_rate": 0.0001993726974725568, "loss": 0.8207, "step": 170 }, { "epoch": 0.33, "grad_norm": 1.122828434593887, "learning_rate": 0.00019935884968399277, "loss": 0.877, "step": 171 }, { "epoch": 0.33, "grad_norm": 1.0065961546730098, "learning_rate": 0.0001993448512051181, "loss": 0.8059, "step": 172 }, { "epoch": 0.33, "grad_norm": 1.118480559990779, "learning_rate": 0.00019933070205716328, "loss": 0.8724, "step": 173 }, { "epoch": 0.33, "grad_norm": 1.1206058516317394, "learning_rate": 0.0001993164022615872, "loss": 0.9526, "step": 174 }, { "epoch": 0.33, "grad_norm": 0.9760299687775463, "learning_rate": 0.00019930195184007747, "loss": 0.7557, "step": 175 }, { "epoch": 0.33, "grad_norm": 1.1421841253110052, "learning_rate": 0.00019928735081454986, "loss": 0.9536, "step": 176 }, { "epoch": 0.34, "grad_norm": 1.0036248334596232, "learning_rate": 0.00019927259920714873, "loss": 0.8339, "step": 177 }, { "epoch": 0.34, "grad_norm": 0.9628373956259314, "learning_rate": 0.0001992576970402468, "loss": 0.8222, "step": 178 }, { "epoch": 0.34, "grad_norm": 1.049581357009389, "learning_rate": 0.000199242644336445, "loss": 0.917, "step": 179 }, { "epoch": 0.34, "grad_norm": 0.98035195765884, "learning_rate": 0.00019922744111857278, "loss": 0.7851, "step": 180 }, { "epoch": 0.34, "grad_norm": 1.1889019494970867, "learning_rate": 0.00019921208740968769, "loss": 0.973, "step": 181 }, { "epoch": 0.35, "grad_norm": 1.0078848988171867, "learning_rate": 0.00019919658323307559, "loss": 0.9598, "step": 182 }, { "epoch": 0.35, "grad_norm": 0.9320752631548938, "learning_rate": 0.0001991809286122505, "loss": 0.7788, "step": 183 }, { "epoch": 0.35, "grad_norm": 0.8947298723805523, "learning_rate": 0.00019916512357095467, "loss": 0.699, "step": 184 }, { "epoch": 0.35, "grad_norm": 0.9002902409709923, "learning_rate": 0.00019914916813315844, "loss": 0.7571, "step": 185 }, { "epoch": 0.35, "grad_norm": 1.0448706219520691, "learning_rate": 0.0001991330623230603, "loss": 0.8291, "step": 186 }, { "epoch": 0.36, "grad_norm": 1.0406807796166349, "learning_rate": 0.00019911680616508672, "loss": 0.7903, "step": 187 }, { "epoch": 0.36, "grad_norm": 0.9942615018214022, "learning_rate": 0.00019910039968389223, "loss": 0.7123, "step": 188 }, { "epoch": 0.36, "grad_norm": 1.1060851233211086, "learning_rate": 0.00019908384290435934, "loss": 0.8265, "step": 189 }, { "epoch": 0.36, "grad_norm": 1.0969472198832921, "learning_rate": 0.00019906713585159848, "loss": 0.8125, "step": 190 }, { "epoch": 0.36, "grad_norm": 0.9532264978251747, "learning_rate": 0.00019905027855094808, "loss": 0.7962, "step": 191 }, { "epoch": 0.37, "grad_norm": 1.00203122161559, "learning_rate": 0.00019903327102797433, "loss": 0.7305, "step": 192 }, { "epoch": 0.37, "grad_norm": 1.0132931236525817, "learning_rate": 0.00019901611330847132, "loss": 0.8387, "step": 193 }, { "epoch": 0.37, "grad_norm": 0.9256951562981848, "learning_rate": 0.0001989988054184609, "loss": 0.6441, "step": 194 }, { "epoch": 0.37, "grad_norm": 1.087545927658831, "learning_rate": 0.00019898134738419268, "loss": 0.719, "step": 195 }, { "epoch": 0.37, "grad_norm": 1.1362397665228519, "learning_rate": 0.000198963739232144, "loss": 0.7412, "step": 196 }, { "epoch": 0.37, "grad_norm": 1.1446035006196638, "learning_rate": 0.00019894598098901988, "loss": 0.761, "step": 197 }, { "epoch": 0.38, "grad_norm": 1.1910700684579427, "learning_rate": 0.00019892807268175283, "loss": 0.833, "step": 198 }, { "epoch": 0.38, "grad_norm": 1.0523017598967883, "learning_rate": 0.00019891001433750325, "loss": 0.6709, "step": 199 }, { "epoch": 0.38, "grad_norm": 0.9724220841730856, "learning_rate": 0.00019889180598365878, "loss": 0.6748, "step": 200 }, { "epoch": 0.38, "eval_blimp_filtered_avg": 0.72, "eval_blimp_filtered_std": 0.004936348210662178, "step": 200 }, { "epoch": 0.38, "eval_blimp_supplement_avg": 0.8297413793103449, "eval_blimp_supplement_std": 0.016743873320697964, "step": 200 }, { "epoch": 0.38, "eval_vqa_filtered_avg": 0.51, "eval_vqa_filtered_std": 0.05024183937956911, "step": 200 }, { "epoch": 0.38, "eval_winoground_filtered_avg": 0.65, "eval_winoground_filtered_std": 0.04793724854411019, "step": 200 }, { "epoch": 0.38, "grad_norm": 0.9834105334797844, "learning_rate": 0.00019887344764783475, "loss": 0.808, "step": 201 }, { "epoch": 0.38, "grad_norm": 0.808455665428161, "learning_rate": 0.0001988549393578739, "loss": 0.6152, "step": 202 }, { "epoch": 0.39, "grad_norm": 0.9500494225223745, "learning_rate": 0.0001988362811418464, "loss": 0.7579, "step": 203 }, { "epoch": 0.39, "grad_norm": 1.0082271810758079, "learning_rate": 0.0001988174730280498, "loss": 0.7942, "step": 204 }, { "epoch": 0.39, "grad_norm": 1.2515966711655988, "learning_rate": 0.000198798515045009, "loss": 0.9188, "step": 205 }, { "epoch": 0.39, "grad_norm": 0.9269206960712918, "learning_rate": 0.0001987794072214762, "loss": 0.6609, "step": 206 }, { "epoch": 0.39, "grad_norm": 0.9431254696792429, "learning_rate": 0.0001987601495864308, "loss": 0.6185, "step": 207 }, { "epoch": 0.4, "grad_norm": 0.966884295036907, "learning_rate": 0.0001987407421690795, "loss": 0.6351, "step": 208 }, { "epoch": 0.4, "grad_norm": 1.0681446801221601, "learning_rate": 0.0001987211849988561, "loss": 0.7627, "step": 209 }, { "epoch": 0.4, "grad_norm": 0.9349843643214157, "learning_rate": 0.00019870147810542148, "loss": 0.7122, "step": 210 }, { "epoch": 0.4, "grad_norm": 1.017193208949513, "learning_rate": 0.00019868162151866371, "loss": 0.7659, "step": 211 }, { "epoch": 0.4, "grad_norm": 0.9226654403347498, "learning_rate": 0.0001986616152686978, "loss": 0.7136, "step": 212 }, { "epoch": 0.4, "grad_norm": 0.9191325488872414, "learning_rate": 0.00019864145938586574, "loss": 0.6705, "step": 213 }, { "epoch": 0.41, "grad_norm": 0.8523466149694187, "learning_rate": 0.00019862115390073654, "loss": 0.6372, "step": 214 }, { "epoch": 0.41, "grad_norm": 0.9211701297557979, "learning_rate": 0.00019860069884410604, "loss": 0.6786, "step": 215 }, { "epoch": 0.41, "grad_norm": 1.0142548059638694, "learning_rate": 0.00019858009424699686, "loss": 0.6429, "step": 216 }, { "epoch": 0.41, "grad_norm": 1.0579023067684232, "learning_rate": 0.00019855934014065857, "loss": 0.7151, "step": 217 }, { "epoch": 0.41, "grad_norm": 1.0245966641995732, "learning_rate": 0.00019853843655656737, "loss": 0.6598, "step": 218 }, { "epoch": 0.42, "grad_norm": 1.0294065386860334, "learning_rate": 0.0001985173835264262, "loss": 0.624, "step": 219 }, { "epoch": 0.42, "grad_norm": 1.0190874613795216, "learning_rate": 0.00019849618108216466, "loss": 0.6489, "step": 220 }, { "epoch": 0.42, "grad_norm": 1.1077038616056318, "learning_rate": 0.00019847482925593895, "loss": 0.5914, "step": 221 }, { "epoch": 0.42, "grad_norm": 1.166597017073193, "learning_rate": 0.00019845332808013182, "loss": 0.7083, "step": 222 }, { "epoch": 0.42, "grad_norm": 1.055355555556852, "learning_rate": 0.00019843167758735254, "loss": 0.7718, "step": 223 }, { "epoch": 0.43, "grad_norm": 0.9324034706062544, "learning_rate": 0.00019840987781043684, "loss": 0.6841, "step": 224 }, { "epoch": 0.43, "grad_norm": 0.8520445473531901, "learning_rate": 0.0001983879287824468, "loss": 0.5435, "step": 225 }, { "epoch": 0.43, "grad_norm": 1.0414067587609948, "learning_rate": 0.00019836583053667095, "loss": 0.6633, "step": 226 }, { "epoch": 0.43, "grad_norm": 0.8277957101280178, "learning_rate": 0.0001983435831066241, "loss": 0.595, "step": 227 }, { "epoch": 0.43, "grad_norm": 1.0168498451599652, "learning_rate": 0.00019832118652604727, "loss": 0.697, "step": 228 }, { "epoch": 0.44, "grad_norm": 0.9810394986305114, "learning_rate": 0.00019829864082890772, "loss": 0.6446, "step": 229 }, { "epoch": 0.44, "grad_norm": 1.044510943482436, "learning_rate": 0.0001982759460493989, "loss": 0.7424, "step": 230 }, { "epoch": 0.44, "grad_norm": 0.8492178493661765, "learning_rate": 0.00019825310222194026, "loss": 0.5474, "step": 231 }, { "epoch": 0.44, "grad_norm": 1.036834905506577, "learning_rate": 0.00019823010938117743, "loss": 0.6499, "step": 232 }, { "epoch": 0.44, "grad_norm": 0.9544918529617384, "learning_rate": 0.00019820696756198193, "loss": 0.611, "step": 233 }, { "epoch": 0.44, "grad_norm": 0.8770655556983282, "learning_rate": 0.00019818367679945128, "loss": 0.5385, "step": 234 }, { "epoch": 0.45, "grad_norm": 1.080126750486886, "learning_rate": 0.0001981602371289089, "loss": 0.7073, "step": 235 }, { "epoch": 0.45, "grad_norm": 0.9430729815399004, "learning_rate": 0.00019813664858590397, "loss": 0.6103, "step": 236 }, { "epoch": 0.45, "grad_norm": 0.9438259916402627, "learning_rate": 0.00019811291120621155, "loss": 0.6995, "step": 237 }, { "epoch": 0.45, "grad_norm": 0.8314088128600715, "learning_rate": 0.0001980890250258324, "loss": 0.5108, "step": 238 }, { "epoch": 0.45, "grad_norm": 0.8671116948515513, "learning_rate": 0.0001980649900809929, "loss": 0.5494, "step": 239 }, { "epoch": 0.46, "grad_norm": 0.8989434429274697, "learning_rate": 0.00019804080640814514, "loss": 0.5861, "step": 240 }, { "epoch": 0.46, "grad_norm": 1.0639606105596968, "learning_rate": 0.00019801647404396676, "loss": 0.7602, "step": 241 }, { "epoch": 0.46, "grad_norm": 0.8957440403226736, "learning_rate": 0.0001979919930253608, "loss": 0.6212, "step": 242 }, { "epoch": 0.46, "grad_norm": 1.0856933152126511, "learning_rate": 0.00019796736338945588, "loss": 0.7865, "step": 243 }, { "epoch": 0.46, "grad_norm": 1.0604593263152318, "learning_rate": 0.00019794258517360594, "loss": 0.6085, "step": 244 }, { "epoch": 0.47, "grad_norm": 0.9556791081869946, "learning_rate": 0.0001979176584153903, "loss": 0.5364, "step": 245 }, { "epoch": 0.47, "grad_norm": 1.0558248224856681, "learning_rate": 0.0001978925831526136, "loss": 0.6464, "step": 246 }, { "epoch": 0.47, "grad_norm": 0.9388990275244753, "learning_rate": 0.00019786735942330558, "loss": 0.5694, "step": 247 }, { "epoch": 0.47, "grad_norm": 0.9395983009700571, "learning_rate": 0.0001978419872657213, "loss": 0.6755, "step": 248 }, { "epoch": 0.47, "grad_norm": 0.9682497449702151, "learning_rate": 0.00019781646671834083, "loss": 0.6885, "step": 249 }, { "epoch": 0.48, "grad_norm": 0.9269107389163566, "learning_rate": 0.00019779079781986932, "loss": 0.6178, "step": 250 }, { "epoch": 0.48, "grad_norm": 0.8942595821109374, "learning_rate": 0.0001977649806092369, "loss": 0.5717, "step": 251 }, { "epoch": 0.48, "grad_norm": 0.9520611484509951, "learning_rate": 0.00019773901512559866, "loss": 0.6452, "step": 252 }, { "epoch": 0.48, "grad_norm": 0.9284162719020478, "learning_rate": 0.00019771290140833457, "loss": 0.5992, "step": 253 }, { "epoch": 0.48, "grad_norm": 0.9842878077757757, "learning_rate": 0.00019768663949704934, "loss": 0.6766, "step": 254 }, { "epoch": 0.48, "grad_norm": 0.9508087740845261, "learning_rate": 0.0001976602294315726, "loss": 0.6011, "step": 255 }, { "epoch": 0.49, "grad_norm": 1.3710316261321145, "learning_rate": 0.00019763367125195846, "loss": 0.6119, "step": 256 }, { "epoch": 0.49, "grad_norm": 0.9811415321292337, "learning_rate": 0.00019760696499848581, "loss": 0.6603, "step": 257 }, { "epoch": 0.49, "grad_norm": 1.0626725595948214, "learning_rate": 0.00019758011071165806, "loss": 0.6075, "step": 258 }, { "epoch": 0.49, "grad_norm": 1.0242002853727268, "learning_rate": 0.0001975531084322032, "loss": 0.642, "step": 259 }, { "epoch": 0.49, "grad_norm": 1.0357738536602263, "learning_rate": 0.00019752595820107357, "loss": 0.5994, "step": 260 }, { "epoch": 0.5, "grad_norm": 0.8853728861676884, "learning_rate": 0.00019749866005944596, "loss": 0.554, "step": 261 }, { "epoch": 0.5, "grad_norm": 1.0081714561529906, "learning_rate": 0.0001974712140487214, "loss": 0.6283, "step": 262 }, { "epoch": 0.5, "grad_norm": 0.9473372431904323, "learning_rate": 0.00019744362021052538, "loss": 0.5829, "step": 263 }, { "epoch": 0.5, "grad_norm": 0.9059260532524075, "learning_rate": 0.00019741587858670734, "loss": 0.5321, "step": 264 }, { "epoch": 0.5, "grad_norm": 0.9576206774377005, "learning_rate": 0.00019738798921934106, "loss": 0.5871, "step": 265 }, { "epoch": 0.51, "grad_norm": 0.9576666583850492, "learning_rate": 0.00019735995215072424, "loss": 0.5693, "step": 266 }, { "epoch": 0.51, "grad_norm": 1.014863977023919, "learning_rate": 0.0001973317674233787, "loss": 0.6348, "step": 267 }, { "epoch": 0.51, "grad_norm": 1.00796671939854, "learning_rate": 0.0001973034350800501, "loss": 0.6082, "step": 268 }, { "epoch": 0.51, "grad_norm": 0.8642554578550907, "learning_rate": 0.0001972749551637081, "loss": 0.4529, "step": 269 }, { "epoch": 0.51, "grad_norm": 0.9617733713826381, "learning_rate": 0.0001972463277175461, "loss": 0.5706, "step": 270 }, { "epoch": 0.52, "grad_norm": 0.8894606151025055, "learning_rate": 0.00019721755278498125, "loss": 0.5128, "step": 271 }, { "epoch": 0.52, "grad_norm": 0.9617771760244375, "learning_rate": 0.00019718863040965433, "loss": 0.553, "step": 272 }, { "epoch": 0.52, "grad_norm": 0.848286810107298, "learning_rate": 0.00019715956063542987, "loss": 0.4788, "step": 273 }, { "epoch": 0.52, "grad_norm": 0.9593161616733296, "learning_rate": 0.00019713034350639586, "loss": 0.5785, "step": 274 }, { "epoch": 0.52, "grad_norm": 0.9652136324944444, "learning_rate": 0.0001971009790668638, "loss": 0.5618, "step": 275 }, { "epoch": 0.52, "grad_norm": 1.022922191893183, "learning_rate": 0.0001970714673613685, "loss": 0.6421, "step": 276 }, { "epoch": 0.53, "grad_norm": 1.0267767397971175, "learning_rate": 0.00019704180843466832, "loss": 0.5932, "step": 277 }, { "epoch": 0.53, "grad_norm": 0.8355471409697771, "learning_rate": 0.0001970120023317447, "loss": 0.4922, "step": 278 }, { "epoch": 0.53, "grad_norm": 0.9213229036238134, "learning_rate": 0.00019698204909780244, "loss": 0.5104, "step": 279 }, { "epoch": 0.53, "grad_norm": 0.8689292720477446, "learning_rate": 0.00019695194877826942, "loss": 0.5291, "step": 280 }, { "epoch": 0.53, "grad_norm": 0.9543618979677062, "learning_rate": 0.00019692170141879655, "loss": 0.5951, "step": 281 }, { "epoch": 0.54, "grad_norm": 0.899161615524234, "learning_rate": 0.00019689130706525783, "loss": 0.486, "step": 282 }, { "epoch": 0.54, "grad_norm": 0.8611902034536167, "learning_rate": 0.00019686076576375016, "loss": 0.4548, "step": 283 }, { "epoch": 0.54, "grad_norm": 1.0403019519808119, "learning_rate": 0.00019683007756059325, "loss": 0.6482, "step": 284 }, { "epoch": 0.54, "grad_norm": 1.0289731413525156, "learning_rate": 0.00019679924250232974, "loss": 0.5827, "step": 285 }, { "epoch": 0.54, "grad_norm": 0.907199648133166, "learning_rate": 0.00019676826063572483, "loss": 0.5274, "step": 286 }, { "epoch": 0.55, "grad_norm": 0.826746260463674, "learning_rate": 0.00019673713200776653, "loss": 0.4387, "step": 287 }, { "epoch": 0.55, "grad_norm": 0.8594328452167301, "learning_rate": 0.0001967058566656653, "loss": 0.49, "step": 288 }, { "epoch": 0.55, "grad_norm": 1.0273716683580285, "learning_rate": 0.0001966744346568542, "loss": 0.5685, "step": 289 }, { "epoch": 0.55, "grad_norm": 1.062794897692209, "learning_rate": 0.0001966428660289886, "loss": 0.6148, "step": 290 }, { "epoch": 0.55, "grad_norm": 0.945806271295085, "learning_rate": 0.00019661115082994648, "loss": 0.4252, "step": 291 }, { "epoch": 0.56, "grad_norm": 0.983827320495218, "learning_rate": 0.00019657928910782788, "loss": 0.4855, "step": 292 }, { "epoch": 0.56, "grad_norm": 0.9674357488373523, "learning_rate": 0.00019654728091095516, "loss": 0.5662, "step": 293 }, { "epoch": 0.56, "grad_norm": 1.0937395939512053, "learning_rate": 0.00019651512628787284, "loss": 0.6735, "step": 294 }, { "epoch": 0.56, "grad_norm": 0.8770469757306242, "learning_rate": 0.00019648282528734743, "loss": 0.4596, "step": 295 }, { "epoch": 0.56, "grad_norm": 0.8806348351212576, "learning_rate": 0.00019645037795836757, "loss": 0.5366, "step": 296 }, { "epoch": 0.56, "grad_norm": 0.8647150491364859, "learning_rate": 0.00019641778435014367, "loss": 0.5467, "step": 297 }, { "epoch": 0.57, "grad_norm": 0.9502749679192279, "learning_rate": 0.00019638504451210818, "loss": 0.5595, "step": 298 }, { "epoch": 0.57, "grad_norm": 0.6767934195120455, "learning_rate": 0.00019635215849391513, "loss": 0.3713, "step": 299 }, { "epoch": 0.57, "grad_norm": 0.9969910971733658, "learning_rate": 0.00019631912634544038, "loss": 0.5577, "step": 300 }, { "epoch": 0.57, "eval_blimp_filtered_avg": 0.7292537313432835, "eval_blimp_filtered_std": 0.004896116204659586, "step": 300 }, { "epoch": 0.57, "eval_blimp_supplement_avg": 0.8254310344827587, "eval_blimp_supplement_std": 0.016679339333559105, "step": 300 }, { "epoch": 0.57, "eval_vqa_filtered_avg": 0.53, "eval_vqa_filtered_std": 0.05016135580465919, "step": 300 }, { "epoch": 0.57, "eval_winoground_filtered_avg": 0.66, "eval_winoground_filtered_std": 0.04760952285695237, "step": 300 }, { "epoch": 0.57, "grad_norm": 0.9523724382154625, "learning_rate": 0.0001962859481167814, "loss": 0.5358, "step": 301 }, { "epoch": 0.57, "grad_norm": 1.1596024532022375, "learning_rate": 0.00019625262385825713, "loss": 0.7372, "step": 302 }, { "epoch": 0.58, "grad_norm": 0.8957504656694092, "learning_rate": 0.0001962191536204081, "loss": 0.5251, "step": 303 }, { "epoch": 0.58, "grad_norm": 0.874502822035417, "learning_rate": 0.00019618553745399614, "loss": 0.5142, "step": 304 }, { "epoch": 0.58, "grad_norm": 0.9760114009990478, "learning_rate": 0.0001961517754100044, "loss": 0.5479, "step": 305 }, { "epoch": 0.58, "grad_norm": 0.7572251930365458, "learning_rate": 0.0001961178675396374, "loss": 0.4784, "step": 306 }, { "epoch": 0.58, "grad_norm": 0.9047492975418054, "learning_rate": 0.00019608381389432062, "loss": 0.5062, "step": 307 }, { "epoch": 0.59, "grad_norm": 0.8591870090979306, "learning_rate": 0.00019604961452570084, "loss": 0.4682, "step": 308 }, { "epoch": 0.59, "grad_norm": 0.8257127986878524, "learning_rate": 0.00019601526948564567, "loss": 0.4637, "step": 309 }, { "epoch": 0.59, "grad_norm": 1.0339606960111059, "learning_rate": 0.00019598077882624376, "loss": 0.5984, "step": 310 }, { "epoch": 0.59, "grad_norm": 0.9974187307850673, "learning_rate": 0.00019594614259980456, "loss": 0.4317, "step": 311 }, { "epoch": 0.59, "grad_norm": 0.969156835096392, "learning_rate": 0.0001959113608588583, "loss": 0.5223, "step": 312 }, { "epoch": 0.6, "grad_norm": 0.9561237715171084, "learning_rate": 0.00019587643365615595, "loss": 0.4907, "step": 313 }, { "epoch": 0.6, "grad_norm": 0.856996974257559, "learning_rate": 0.000195841361044669, "loss": 0.4396, "step": 314 }, { "epoch": 0.6, "grad_norm": 0.878556179525401, "learning_rate": 0.00019580614307758952, "loss": 0.4463, "step": 315 }, { "epoch": 0.6, "grad_norm": 0.9871293531237648, "learning_rate": 0.00019577077980833006, "loss": 0.4758, "step": 316 }, { "epoch": 0.6, "grad_norm": 0.7172529380325134, "learning_rate": 0.00019573527129052346, "loss": 0.3742, "step": 317 }, { "epoch": 0.6, "grad_norm": 0.8885706209333719, "learning_rate": 0.00019569961757802298, "loss": 0.4161, "step": 318 }, { "epoch": 0.61, "grad_norm": 1.1929960857421218, "learning_rate": 0.0001956638187249019, "loss": 0.5821, "step": 319 }, { "epoch": 0.61, "grad_norm": 1.171984618833384, "learning_rate": 0.00019562787478545377, "loss": 0.7065, "step": 320 }, { "epoch": 0.61, "grad_norm": 0.9471738270897865, "learning_rate": 0.00019559178581419215, "loss": 0.537, "step": 321 }, { "epoch": 0.61, "grad_norm": 0.7889195077811647, "learning_rate": 0.00019555555186585055, "loss": 0.413, "step": 322 }, { "epoch": 0.61, "grad_norm": 1.0141044572081825, "learning_rate": 0.00019551917299538232, "loss": 0.566, "step": 323 }, { "epoch": 0.62, "grad_norm": 0.8249044014332252, "learning_rate": 0.00019548264925796068, "loss": 0.3715, "step": 324 }, { "epoch": 0.62, "grad_norm": 1.0948778457755304, "learning_rate": 0.00019544598070897847, "loss": 0.5534, "step": 325 }, { "epoch": 0.62, "grad_norm": 0.8299560384683674, "learning_rate": 0.0001954091674040482, "loss": 0.3924, "step": 326 }, { "epoch": 0.62, "grad_norm": 1.0109552761057785, "learning_rate": 0.00019537220939900202, "loss": 0.4172, "step": 327 }, { "epoch": 0.62, "grad_norm": 0.9193146379473909, "learning_rate": 0.00019533510674989127, "loss": 0.4723, "step": 328 }, { "epoch": 0.63, "grad_norm": 0.8464902006456488, "learning_rate": 0.00019529785951298697, "loss": 0.4061, "step": 329 }, { "epoch": 0.63, "grad_norm": 0.8665252742833923, "learning_rate": 0.0001952604677447792, "loss": 0.3516, "step": 330 }, { "epoch": 0.63, "grad_norm": 0.9242785943857765, "learning_rate": 0.00019522293150197738, "loss": 0.3931, "step": 331 }, { "epoch": 0.63, "grad_norm": 1.123640629717831, "learning_rate": 0.00019518525084150995, "loss": 0.487, "step": 332 }, { "epoch": 0.63, "grad_norm": 0.8912757779249998, "learning_rate": 0.00019514742582052446, "loss": 0.4297, "step": 333 }, { "epoch": 0.63, "grad_norm": 0.9599696136982607, "learning_rate": 0.0001951094564963873, "loss": 0.4399, "step": 334 }, { "epoch": 0.64, "grad_norm": 0.7752845138543624, "learning_rate": 0.00019507134292668377, "loss": 0.4005, "step": 335 }, { "epoch": 0.64, "grad_norm": 0.8343015903326114, "learning_rate": 0.000195033085169218, "loss": 0.4976, "step": 336 }, { "epoch": 0.64, "grad_norm": 0.9652826829639269, "learning_rate": 0.00019499468328201269, "loss": 0.523, "step": 337 }, { "epoch": 0.64, "grad_norm": 0.8201404920189374, "learning_rate": 0.00019495613732330917, "loss": 0.4228, "step": 338 }, { "epoch": 0.64, "grad_norm": 0.9056264910288868, "learning_rate": 0.00019491744735156732, "loss": 0.4212, "step": 339 }, { "epoch": 0.65, "grad_norm": 0.9372826096785716, "learning_rate": 0.00019487861342546537, "loss": 0.4938, "step": 340 }, { "epoch": 0.65, "grad_norm": 0.9258551008769413, "learning_rate": 0.0001948396356038999, "loss": 0.4338, "step": 341 }, { "epoch": 0.65, "grad_norm": 0.8352225047795613, "learning_rate": 0.00019480051394598572, "loss": 0.3447, "step": 342 }, { "epoch": 0.65, "grad_norm": 1.0264479537977966, "learning_rate": 0.00019476124851105578, "loss": 0.4632, "step": 343 }, { "epoch": 0.65, "grad_norm": 0.8505576431535298, "learning_rate": 0.00019472183935866118, "loss": 0.3723, "step": 344 }, { "epoch": 0.66, "grad_norm": 0.9354718395920592, "learning_rate": 0.0001946822865485708, "loss": 0.4314, "step": 345 }, { "epoch": 0.66, "grad_norm": 0.7884285228091152, "learning_rate": 0.00019464259014077153, "loss": 0.327, "step": 346 }, { "epoch": 0.66, "grad_norm": 0.8857013416185759, "learning_rate": 0.00019460275019546802, "loss": 0.4415, "step": 347 }, { "epoch": 0.66, "grad_norm": 1.0337778270922653, "learning_rate": 0.00019456276677308262, "loss": 0.5053, "step": 348 }, { "epoch": 0.66, "grad_norm": 0.8426388041297315, "learning_rate": 0.00019452263993425522, "loss": 0.3727, "step": 349 }, { "epoch": 0.67, "grad_norm": 0.9643386093744049, "learning_rate": 0.00019448236973984334, "loss": 0.4228, "step": 350 }, { "epoch": 0.67, "grad_norm": 0.784171556407315, "learning_rate": 0.0001944419562509218, "loss": 0.2981, "step": 351 }, { "epoch": 0.67, "grad_norm": 0.7902823530901618, "learning_rate": 0.00019440139952878275, "loss": 0.3877, "step": 352 }, { "epoch": 0.67, "grad_norm": 0.7613934765127186, "learning_rate": 0.00019436069963493568, "loss": 0.3346, "step": 353 }, { "epoch": 0.67, "grad_norm": 0.8999251758392433, "learning_rate": 0.0001943198566311071, "loss": 0.4133, "step": 354 }, { "epoch": 0.67, "grad_norm": 0.9451952672780423, "learning_rate": 0.0001942788705792406, "loss": 0.4108, "step": 355 }, { "epoch": 0.68, "grad_norm": 0.9304585718649663, "learning_rate": 0.0001942377415414968, "loss": 0.4253, "step": 356 }, { "epoch": 0.68, "grad_norm": 0.8939297989653083, "learning_rate": 0.00019419646958025304, "loss": 0.3837, "step": 357 }, { "epoch": 0.68, "grad_norm": 0.8443810916661918, "learning_rate": 0.00019415505475810352, "loss": 0.4014, "step": 358 }, { "epoch": 0.68, "grad_norm": 0.8541552488529793, "learning_rate": 0.00019411349713785905, "loss": 0.3856, "step": 359 }, { "epoch": 0.68, "grad_norm": 0.8643474725642729, "learning_rate": 0.00019407179678254707, "loss": 0.4447, "step": 360 }, { "epoch": 0.69, "grad_norm": 0.8853502057127087, "learning_rate": 0.00019402995375541145, "loss": 0.3772, "step": 361 }, { "epoch": 0.69, "grad_norm": 1.036433422250706, "learning_rate": 0.00019398796811991243, "loss": 0.5337, "step": 362 }, { "epoch": 0.69, "grad_norm": 0.7798025777608153, "learning_rate": 0.0001939458399397266, "loss": 0.3306, "step": 363 }, { "epoch": 0.69, "grad_norm": 0.9322109536587133, "learning_rate": 0.00019390356927874666, "loss": 0.4802, "step": 364 }, { "epoch": 0.69, "grad_norm": 0.8730127906250732, "learning_rate": 0.00019386115620108148, "loss": 0.3313, "step": 365 }, { "epoch": 0.7, "grad_norm": 0.7986332661105695, "learning_rate": 0.0001938186007710558, "loss": 0.427, "step": 366 }, { "epoch": 0.7, "grad_norm": 0.8844969011205849, "learning_rate": 0.0001937759030532104, "loss": 0.5009, "step": 367 }, { "epoch": 0.7, "grad_norm": 0.9311717423637607, "learning_rate": 0.00019373306311230176, "loss": 0.4125, "step": 368 }, { "epoch": 0.7, "grad_norm": 0.8765188583513654, "learning_rate": 0.0001936900810133021, "loss": 0.3846, "step": 369 }, { "epoch": 0.7, "grad_norm": 0.8550045791563712, "learning_rate": 0.00019364695682139922, "loss": 0.4687, "step": 370 }, { "epoch": 0.71, "grad_norm": 1.0304029410593343, "learning_rate": 0.00019360369060199647, "loss": 0.5129, "step": 371 }, { "epoch": 0.71, "grad_norm": 0.8601397225350441, "learning_rate": 0.00019356028242071248, "loss": 0.3505, "step": 372 }, { "epoch": 0.71, "grad_norm": 0.8191140637338483, "learning_rate": 0.00019351673234338138, "loss": 0.3602, "step": 373 }, { "epoch": 0.71, "grad_norm": 0.9583434030495129, "learning_rate": 0.0001934730404360523, "loss": 0.4882, "step": 374 }, { "epoch": 0.71, "grad_norm": 0.8260925799541268, "learning_rate": 0.0001934292067649896, "loss": 0.4086, "step": 375 }, { "epoch": 0.71, "grad_norm": 0.7651427982511435, "learning_rate": 0.00019338523139667262, "loss": 0.3537, "step": 376 }, { "epoch": 0.72, "grad_norm": 0.7957622298608096, "learning_rate": 0.00019334111439779558, "loss": 0.3456, "step": 377 }, { "epoch": 0.72, "grad_norm": 0.8628900741999042, "learning_rate": 0.00019329685583526748, "loss": 0.3831, "step": 378 }, { "epoch": 0.72, "grad_norm": 0.9236071632116489, "learning_rate": 0.00019325245577621209, "loss": 0.406, "step": 379 }, { "epoch": 0.72, "grad_norm": 0.7994941529277539, "learning_rate": 0.00019320791428796774, "loss": 0.3208, "step": 380 }, { "epoch": 0.72, "grad_norm": 0.953737050468449, "learning_rate": 0.00019316323143808722, "loss": 0.3885, "step": 381 }, { "epoch": 0.73, "grad_norm": 0.972714086728551, "learning_rate": 0.00019311840729433773, "loss": 0.4543, "step": 382 }, { "epoch": 0.73, "grad_norm": 0.9498252306752221, "learning_rate": 0.00019307344192470084, "loss": 0.4536, "step": 383 }, { "epoch": 0.73, "grad_norm": 0.7907197333771339, "learning_rate": 0.0001930283353973722, "loss": 0.3494, "step": 384 }, { "epoch": 0.73, "grad_norm": 0.8432527823659746, "learning_rate": 0.0001929830877807616, "loss": 0.4099, "step": 385 }, { "epoch": 0.73, "grad_norm": 0.7298782314992149, "learning_rate": 0.0001929376991434928, "loss": 0.2966, "step": 386 }, { "epoch": 0.74, "grad_norm": 0.8010869709489054, "learning_rate": 0.00019289216955440338, "loss": 0.3478, "step": 387 }, { "epoch": 0.74, "grad_norm": 0.8315659349052836, "learning_rate": 0.00019284649908254479, "loss": 0.4672, "step": 388 }, { "epoch": 0.74, "grad_norm": 0.8428304568349718, "learning_rate": 0.0001928006877971821, "loss": 0.4168, "step": 389 }, { "epoch": 0.74, "grad_norm": 1.0415549597866942, "learning_rate": 0.00019275473576779395, "loss": 0.5222, "step": 390 }, { "epoch": 0.74, "grad_norm": 0.8765315615011869, "learning_rate": 0.00019270864306407238, "loss": 0.4136, "step": 391 }, { "epoch": 0.75, "grad_norm": 0.8752295378750232, "learning_rate": 0.00019266240975592287, "loss": 0.4304, "step": 392 }, { "epoch": 0.75, "grad_norm": 0.7685858900227794, "learning_rate": 0.00019261603591346407, "loss": 0.3986, "step": 393 }, { "epoch": 0.75, "grad_norm": 0.7937208514458222, "learning_rate": 0.00019256952160702783, "loss": 0.3922, "step": 394 }, { "epoch": 0.75, "grad_norm": 0.8335338329014955, "learning_rate": 0.000192522866907159, "loss": 0.4103, "step": 395 }, { "epoch": 0.75, "grad_norm": 0.916854882603191, "learning_rate": 0.00019247607188461533, "loss": 0.4377, "step": 396 }, { "epoch": 0.75, "grad_norm": 0.8729801271359788, "learning_rate": 0.00019242913661036742, "loss": 0.3584, "step": 397 }, { "epoch": 0.76, "grad_norm": 1.0032121956987208, "learning_rate": 0.0001923820611555986, "loss": 0.4644, "step": 398 }, { "epoch": 0.76, "grad_norm": 0.8204760660771041, "learning_rate": 0.00019233484559170474, "loss": 0.419, "step": 399 }, { "epoch": 0.76, "grad_norm": 0.7533330515920971, "learning_rate": 0.00019228748999029424, "loss": 0.3485, "step": 400 }, { "epoch": 0.76, "eval_blimp_filtered_avg": 0.7294029850746269, "eval_blimp_filtered_std": 0.004894576361142103, "step": 400 }, { "epoch": 0.76, "eval_blimp_supplement_avg": 0.8146551724137931, "eval_blimp_supplement_std": 0.01703137971434146, "step": 400 }, { "epoch": 0.76, "eval_vqa_filtered_avg": 0.49, "eval_vqa_filtered_std": 0.05024183937956912, "step": 400 }, { "epoch": 0.76, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 400 }, { "epoch": 0.76, "grad_norm": 0.8231222082354804, "learning_rate": 0.00019223999442318792, "loss": 0.3709, "step": 401 }, { "epoch": 0.76, "grad_norm": 0.7517103882633678, "learning_rate": 0.00019219235896241878, "loss": 0.4009, "step": 402 }, { "epoch": 0.77, "grad_norm": 0.7537822631218105, "learning_rate": 0.00019214458368023213, "loss": 0.3135, "step": 403 }, { "epoch": 0.77, "grad_norm": 0.8491002671181301, "learning_rate": 0.00019209666864908513, "loss": 0.3615, "step": 404 }, { "epoch": 0.77, "grad_norm": 0.9524671354370461, "learning_rate": 0.00019204861394164712, "loss": 0.4254, "step": 405 }, { "epoch": 0.77, "grad_norm": 0.9727494317531464, "learning_rate": 0.0001920004196307991, "loss": 0.42, "step": 406 }, { "epoch": 0.77, "grad_norm": 0.9417175477394744, "learning_rate": 0.0001919520857896339, "loss": 0.3945, "step": 407 }, { "epoch": 0.78, "grad_norm": 0.8042105242162203, "learning_rate": 0.0001919036124914559, "loss": 0.3218, "step": 408 }, { "epoch": 0.78, "grad_norm": 0.878103893157393, "learning_rate": 0.000191854999809781, "loss": 0.404, "step": 409 }, { "epoch": 0.78, "grad_norm": 0.8675957071276235, "learning_rate": 0.00019180624781833653, "loss": 0.4425, "step": 410 }, { "epoch": 0.78, "grad_norm": 0.8398153942171104, "learning_rate": 0.0001917573565910611, "loss": 0.4392, "step": 411 }, { "epoch": 0.78, "grad_norm": 0.8164251440897234, "learning_rate": 0.0001917083262021044, "loss": 0.3698, "step": 412 }, { "epoch": 0.79, "grad_norm": 0.819277562993247, "learning_rate": 0.00019165915672582728, "loss": 0.3399, "step": 413 }, { "epoch": 0.79, "grad_norm": 0.7996339118370486, "learning_rate": 0.0001916098482368015, "loss": 0.3453, "step": 414 }, { "epoch": 0.79, "grad_norm": 0.6936221463278008, "learning_rate": 0.00019156040080980962, "loss": 0.2801, "step": 415 }, { "epoch": 0.79, "grad_norm": 0.7885645590569011, "learning_rate": 0.00019151081451984495, "loss": 0.3586, "step": 416 }, { "epoch": 0.79, "grad_norm": 0.8148386893083517, "learning_rate": 0.00019146108944211138, "loss": 0.3912, "step": 417 }, { "epoch": 0.79, "grad_norm": 0.9136095073255519, "learning_rate": 0.00019141122565202335, "loss": 0.3324, "step": 418 }, { "epoch": 0.8, "grad_norm": 0.981292046185523, "learning_rate": 0.00019136122322520555, "loss": 0.4756, "step": 419 }, { "epoch": 0.8, "grad_norm": 0.9595566234177066, "learning_rate": 0.0001913110822374931, "loss": 0.3539, "step": 420 }, { "epoch": 0.8, "grad_norm": 0.8058308328622027, "learning_rate": 0.0001912608027649311, "loss": 0.2976, "step": 421 }, { "epoch": 0.8, "grad_norm": 0.7071647386912141, "learning_rate": 0.00019121038488377481, "loss": 0.2459, "step": 422 }, { "epoch": 0.8, "grad_norm": 0.8633809109964284, "learning_rate": 0.0001911598286704893, "loss": 0.3639, "step": 423 }, { "epoch": 0.81, "grad_norm": 0.7249829487528143, "learning_rate": 0.00019110913420174953, "loss": 0.2811, "step": 424 }, { "epoch": 0.81, "grad_norm": 1.079166049828545, "learning_rate": 0.00019105830155444006, "loss": 0.4888, "step": 425 }, { "epoch": 0.81, "grad_norm": 0.8940271965295993, "learning_rate": 0.00019100733080565507, "loss": 0.3883, "step": 426 }, { "epoch": 0.81, "grad_norm": 1.0674883572080187, "learning_rate": 0.00019095622203269818, "loss": 0.4651, "step": 427 }, { "epoch": 0.81, "grad_norm": 0.7698181144620465, "learning_rate": 0.00019090497531308232, "loss": 0.3614, "step": 428 }, { "epoch": 0.82, "grad_norm": 0.7249551829839687, "learning_rate": 0.00019085359072452965, "loss": 0.3328, "step": 429 }, { "epoch": 0.82, "grad_norm": 0.7333155511647317, "learning_rate": 0.00019080206834497143, "loss": 0.3185, "step": 430 }, { "epoch": 0.82, "grad_norm": 0.8685064192757026, "learning_rate": 0.0001907504082525479, "loss": 0.3908, "step": 431 }, { "epoch": 0.82, "grad_norm": 0.9214637444603775, "learning_rate": 0.00019069861052560812, "loss": 0.3534, "step": 432 }, { "epoch": 0.82, "grad_norm": 0.8589720382301342, "learning_rate": 0.00019064667524270994, "loss": 0.3474, "step": 433 }, { "epoch": 0.83, "grad_norm": 0.9838771556990461, "learning_rate": 0.00019059460248261977, "loss": 0.4869, "step": 434 }, { "epoch": 0.83, "grad_norm": 0.751104133504552, "learning_rate": 0.00019054239232431264, "loss": 0.2767, "step": 435 }, { "epoch": 0.83, "grad_norm": 0.7372825001651253, "learning_rate": 0.00019049004484697184, "loss": 0.3003, "step": 436 }, { "epoch": 0.83, "grad_norm": 0.7855632324187257, "learning_rate": 0.00019043756012998895, "loss": 0.2888, "step": 437 }, { "epoch": 0.83, "grad_norm": 0.9507135792362003, "learning_rate": 0.0001903849382529637, "loss": 0.4293, "step": 438 }, { "epoch": 0.83, "grad_norm": 0.712659839965583, "learning_rate": 0.00019033217929570391, "loss": 0.3119, "step": 439 }, { "epoch": 0.84, "grad_norm": 0.7878566954812354, "learning_rate": 0.00019027928333822515, "loss": 0.3668, "step": 440 }, { "epoch": 0.84, "grad_norm": 0.6010996094629223, "learning_rate": 0.00019022625046075095, "loss": 0.2624, "step": 441 }, { "epoch": 0.84, "grad_norm": 0.7482905155342765, "learning_rate": 0.00019017308074371233, "loss": 0.3223, "step": 442 }, { "epoch": 0.84, "grad_norm": 0.7891853478624358, "learning_rate": 0.00019011977426774797, "loss": 0.3361, "step": 443 }, { "epoch": 0.84, "grad_norm": 0.9927766728824884, "learning_rate": 0.00019006633111370386, "loss": 0.3462, "step": 444 }, { "epoch": 0.85, "grad_norm": 0.7738801089990177, "learning_rate": 0.00019001275136263333, "loss": 0.3148, "step": 445 }, { "epoch": 0.85, "grad_norm": 0.9433913279300008, "learning_rate": 0.00018995903509579694, "loss": 0.4532, "step": 446 }, { "epoch": 0.85, "grad_norm": 1.014602206710638, "learning_rate": 0.00018990518239466215, "loss": 0.4457, "step": 447 }, { "epoch": 0.85, "grad_norm": 0.7828637785674653, "learning_rate": 0.00018985119334090348, "loss": 0.3057, "step": 448 }, { "epoch": 0.85, "grad_norm": 0.8617361102591325, "learning_rate": 0.00018979706801640212, "loss": 0.3863, "step": 449 }, { "epoch": 0.86, "grad_norm": 0.7857022032100281, "learning_rate": 0.00018974280650324606, "loss": 0.3578, "step": 450 }, { "epoch": 0.86, "grad_norm": 0.720765282412796, "learning_rate": 0.00018968840888372972, "loss": 0.2558, "step": 451 }, { "epoch": 0.86, "grad_norm": 0.9022114041839514, "learning_rate": 0.00018963387524035405, "loss": 0.3526, "step": 452 }, { "epoch": 0.86, "grad_norm": 0.9501105608708412, "learning_rate": 0.0001895792056558262, "loss": 0.3477, "step": 453 }, { "epoch": 0.86, "grad_norm": 0.9002797979465998, "learning_rate": 0.00018952440021305958, "loss": 0.4137, "step": 454 }, { "epoch": 0.87, "grad_norm": 0.7600807671247659, "learning_rate": 0.00018946945899517355, "loss": 0.3113, "step": 455 }, { "epoch": 0.87, "grad_norm": 0.8802707826847642, "learning_rate": 0.00018941438208549348, "loss": 0.327, "step": 456 }, { "epoch": 0.87, "grad_norm": 0.8187586443070746, "learning_rate": 0.00018935916956755049, "loss": 0.348, "step": 457 }, { "epoch": 0.87, "grad_norm": 0.8862118053296703, "learning_rate": 0.00018930382152508135, "loss": 0.3192, "step": 458 }, { "epoch": 0.87, "grad_norm": 0.8688603508348431, "learning_rate": 0.0001892483380420284, "loss": 0.3846, "step": 459 }, { "epoch": 0.87, "grad_norm": 0.839176129748898, "learning_rate": 0.00018919271920253946, "loss": 0.3654, "step": 460 }, { "epoch": 0.88, "grad_norm": 0.8056955576829697, "learning_rate": 0.00018913696509096744, "loss": 0.3641, "step": 461 }, { "epoch": 0.88, "grad_norm": 0.791755740101033, "learning_rate": 0.00018908107579187062, "loss": 0.3249, "step": 462 }, { "epoch": 0.88, "grad_norm": 0.7327839937087249, "learning_rate": 0.00018902505139001217, "loss": 0.3141, "step": 463 }, { "epoch": 0.88, "grad_norm": 0.6114510772274955, "learning_rate": 0.00018896889197036028, "loss": 0.2739, "step": 464 }, { "epoch": 0.88, "grad_norm": 0.8230140065469466, "learning_rate": 0.0001889125976180877, "loss": 0.2872, "step": 465 }, { "epoch": 0.89, "grad_norm": 0.7974918649963386, "learning_rate": 0.00018885616841857213, "loss": 0.3281, "step": 466 }, { "epoch": 0.89, "grad_norm": 1.0287993767432624, "learning_rate": 0.00018879960445739545, "loss": 0.3955, "step": 467 }, { "epoch": 0.89, "grad_norm": 0.8253204559022008, "learning_rate": 0.00018874290582034426, "loss": 0.3583, "step": 468 }, { "epoch": 0.89, "grad_norm": 0.8721280798079037, "learning_rate": 0.00018868607259340912, "loss": 0.3398, "step": 469 }, { "epoch": 0.89, "grad_norm": 0.9881138304924735, "learning_rate": 0.00018862910486278485, "loss": 0.4233, "step": 470 }, { "epoch": 0.9, "grad_norm": 0.8465065749267051, "learning_rate": 0.0001885720027148703, "loss": 0.2813, "step": 471 }, { "epoch": 0.9, "grad_norm": 0.7366414765735947, "learning_rate": 0.00018851476623626804, "loss": 0.2362, "step": 472 }, { "epoch": 0.9, "grad_norm": 0.7613269681521384, "learning_rate": 0.00018845739551378454, "loss": 0.2927, "step": 473 }, { "epoch": 0.9, "grad_norm": 0.8962271890877377, "learning_rate": 0.00018839989063442974, "loss": 0.3305, "step": 474 }, { "epoch": 0.9, "grad_norm": 0.7429224101513932, "learning_rate": 0.00018834225168541714, "loss": 0.267, "step": 475 }, { "epoch": 0.9, "grad_norm": 0.7332998128751836, "learning_rate": 0.0001882844787541635, "loss": 0.2611, "step": 476 }, { "epoch": 0.91, "grad_norm": 0.8056080640867231, "learning_rate": 0.00018822657192828877, "loss": 0.3379, "step": 477 }, { "epoch": 0.91, "grad_norm": 0.6958487507712043, "learning_rate": 0.00018816853129561601, "loss": 0.2965, "step": 478 }, { "epoch": 0.91, "grad_norm": 0.7486686725101088, "learning_rate": 0.00018811035694417126, "loss": 0.2827, "step": 479 }, { "epoch": 0.91, "grad_norm": 0.8489225897101828, "learning_rate": 0.00018805204896218326, "loss": 0.3048, "step": 480 }, { "epoch": 0.91, "grad_norm": 0.9239566775695283, "learning_rate": 0.00018799360743808346, "loss": 0.3458, "step": 481 }, { "epoch": 0.92, "grad_norm": 0.9417388428899055, "learning_rate": 0.00018793503246050593, "loss": 0.3506, "step": 482 }, { "epoch": 0.92, "grad_norm": 0.8897296810525606, "learning_rate": 0.00018787632411828697, "loss": 0.2606, "step": 483 }, { "epoch": 0.92, "grad_norm": 0.8526505863066965, "learning_rate": 0.0001878174825004653, "loss": 0.2573, "step": 484 }, { "epoch": 0.92, "grad_norm": 0.81797333559419, "learning_rate": 0.00018775850769628164, "loss": 0.2729, "step": 485 }, { "epoch": 0.92, "grad_norm": 0.9107012166969273, "learning_rate": 0.00018769939979517883, "loss": 0.3203, "step": 486 }, { "epoch": 0.93, "grad_norm": 0.8973657411870988, "learning_rate": 0.0001876401588868015, "loss": 0.305, "step": 487 }, { "epoch": 0.93, "grad_norm": 0.9105631256208211, "learning_rate": 0.00018758078506099594, "loss": 0.2837, "step": 488 }, { "epoch": 0.93, "grad_norm": 0.9153767386819269, "learning_rate": 0.00018752127840781016, "loss": 0.355, "step": 489 }, { "epoch": 0.93, "grad_norm": 0.7690258817066227, "learning_rate": 0.00018746163901749356, "loss": 0.2683, "step": 490 }, { "epoch": 0.93, "grad_norm": 0.8889835881593598, "learning_rate": 0.00018740186698049682, "loss": 0.3187, "step": 491 }, { "epoch": 0.94, "grad_norm": 0.8707162215414088, "learning_rate": 0.00018734196238747185, "loss": 0.3057, "step": 492 }, { "epoch": 0.94, "grad_norm": 0.8378544650381443, "learning_rate": 0.0001872819253292715, "loss": 0.3285, "step": 493 }, { "epoch": 0.94, "grad_norm": 0.6866732968669836, "learning_rate": 0.0001872217558969497, "loss": 0.2649, "step": 494 }, { "epoch": 0.94, "grad_norm": 0.7404145314492061, "learning_rate": 0.00018716145418176092, "loss": 0.2665, "step": 495 }, { "epoch": 0.94, "grad_norm": 0.7424795106056246, "learning_rate": 0.00018710102027516047, "loss": 0.307, "step": 496 }, { "epoch": 0.94, "grad_norm": 0.7555120550419504, "learning_rate": 0.00018704045426880397, "loss": 0.242, "step": 497 }, { "epoch": 0.95, "grad_norm": 0.7412457866679383, "learning_rate": 0.0001869797562545475, "loss": 0.2463, "step": 498 }, { "epoch": 0.95, "grad_norm": 1.0061249989413361, "learning_rate": 0.0001869189263244473, "loss": 0.3577, "step": 499 }, { "epoch": 0.95, "grad_norm": 0.7919444214454764, "learning_rate": 0.00018685796457075963, "loss": 0.2545, "step": 500 }, { "epoch": 0.95, "eval_blimp_filtered_avg": 0.7246268656716418, "eval_blimp_filtered_std": 0.004912656216129938, "step": 500 }, { "epoch": 0.95, "eval_blimp_supplement_avg": 0.7931034482758621, "eval_blimp_supplement_std": 0.017927568607443472, "step": 500 }, { "epoch": 0.95, "eval_vqa_filtered_avg": 0.49, "eval_vqa_filtered_std": 0.05024183937956912, "step": 500 }, { "epoch": 0.95, "eval_winoground_filtered_avg": 0.65, "eval_winoground_filtered_std": 0.04793724854411019, "step": 500 }, { "epoch": 0.95, "grad_norm": 0.9047024215683449, "learning_rate": 0.00018679687108594082, "loss": 0.2181, "step": 501 }, { "epoch": 0.95, "grad_norm": 0.945045751426577, "learning_rate": 0.0001867356459626468, "loss": 0.3297, "step": 502 }, { "epoch": 0.96, "grad_norm": 0.7708941556671836, "learning_rate": 0.00018667428929373335, "loss": 0.2802, "step": 503 }, { "epoch": 0.96, "grad_norm": 0.7864576290257109, "learning_rate": 0.00018661280117225555, "loss": 0.2363, "step": 504 }, { "epoch": 0.96, "grad_norm": 0.8807410411303845, "learning_rate": 0.00018655118169146797, "loss": 0.3465, "step": 505 }, { "epoch": 0.96, "grad_norm": 0.8177576437866109, "learning_rate": 0.00018648943094482442, "loss": 0.3139, "step": 506 }, { "epoch": 0.96, "grad_norm": 0.7291615503092084, "learning_rate": 0.0001864275490259777, "loss": 0.2722, "step": 507 }, { "epoch": 0.97, "grad_norm": 0.876001876601026, "learning_rate": 0.00018636553602877963, "loss": 0.3268, "step": 508 }, { "epoch": 0.97, "grad_norm": 0.7435709315376433, "learning_rate": 0.00018630339204728076, "loss": 0.3153, "step": 509 }, { "epoch": 0.97, "grad_norm": 0.7496362728750922, "learning_rate": 0.00018624111717573035, "loss": 0.2486, "step": 510 }, { "epoch": 0.97, "grad_norm": 0.699719666300313, "learning_rate": 0.00018617871150857616, "loss": 0.2023, "step": 511 }, { "epoch": 0.97, "grad_norm": 0.6835547928928958, "learning_rate": 0.00018611617514046426, "loss": 0.2638, "step": 512 }, { "epoch": 0.98, "grad_norm": 0.9864560221119604, "learning_rate": 0.00018605350816623903, "loss": 0.407, "step": 513 }, { "epoch": 0.98, "grad_norm": 0.9479030208862049, "learning_rate": 0.0001859907106809429, "loss": 0.3656, "step": 514 }, { "epoch": 0.98, "grad_norm": 0.7105931176285886, "learning_rate": 0.0001859277827798162, "loss": 0.2721, "step": 515 }, { "epoch": 0.98, "grad_norm": 0.5946932016498732, "learning_rate": 0.00018586472455829714, "loss": 0.2256, "step": 516 }, { "epoch": 0.98, "grad_norm": 0.7006489244919021, "learning_rate": 0.00018580153611202143, "loss": 0.2209, "step": 517 }, { "epoch": 0.98, "grad_norm": 0.8638550022122704, "learning_rate": 0.00018573821753682242, "loss": 0.3789, "step": 518 }, { "epoch": 0.99, "grad_norm": 0.7696877027674492, "learning_rate": 0.0001856747689287308, "loss": 0.2524, "step": 519 }, { "epoch": 0.99, "grad_norm": 0.735148803035053, "learning_rate": 0.0001856111903839744, "loss": 0.2498, "step": 520 }, { "epoch": 0.99, "grad_norm": 0.7092404082977668, "learning_rate": 0.00018554748199897813, "loss": 0.1818, "step": 521 }, { "epoch": 0.99, "grad_norm": 0.6986274260768622, "learning_rate": 0.0001854836438703639, "loss": 0.2443, "step": 522 }, { "epoch": 0.99, "grad_norm": 0.8162573044603693, "learning_rate": 0.00018541967609495032, "loss": 0.2802, "step": 523 }, { "epoch": 1.0, "grad_norm": 0.9104619939280256, "learning_rate": 0.00018535557876975264, "loss": 0.3385, "step": 524 }, { "epoch": 1.0, "grad_norm": 0.8429995487287484, "learning_rate": 0.00018529135199198257, "loss": 0.3274, "step": 525 }, { "epoch": 1.0, "grad_norm": 0.68050368758629, "learning_rate": 0.00018522699585904822, "loss": 0.2277, "step": 526 }, { "epoch": 1.0, "grad_norm": 0.5549516256327602, "learning_rate": 0.0001851625104685538, "loss": 0.1786, "step": 527 }, { "epoch": 1.0, "grad_norm": 0.7010438305488472, "learning_rate": 0.00018509789591829957, "loss": 0.2392, "step": 528 }, { "epoch": 1.01, "grad_norm": 0.8108489518311869, "learning_rate": 0.00018503315230628176, "loss": 0.2187, "step": 529 }, { "epoch": 1.01, "grad_norm": 0.6633969449874358, "learning_rate": 0.00018496827973069223, "loss": 0.1957, "step": 530 }, { "epoch": 1.01, "grad_norm": 0.5981280489163264, "learning_rate": 0.00018490327828991852, "loss": 0.2028, "step": 531 }, { "epoch": 1.01, "grad_norm": 0.5193440542325414, "learning_rate": 0.0001848381480825435, "loss": 0.1578, "step": 532 }, { "epoch": 1.01, "grad_norm": 0.7018267767314952, "learning_rate": 0.0001847728892073455, "loss": 0.154, "step": 533 }, { "epoch": 1.02, "grad_norm": 0.7207634258492159, "learning_rate": 0.00018470750176329781, "loss": 0.1911, "step": 534 }, { "epoch": 1.02, "grad_norm": 0.8742288398180306, "learning_rate": 0.0001846419858495688, "loss": 0.1793, "step": 535 }, { "epoch": 1.02, "grad_norm": 0.8130965751347197, "learning_rate": 0.00018457634156552168, "loss": 0.1928, "step": 536 }, { "epoch": 1.02, "grad_norm": 0.6749065543181691, "learning_rate": 0.00018451056901071434, "loss": 0.1377, "step": 537 }, { "epoch": 1.02, "grad_norm": 0.8444171754053439, "learning_rate": 0.00018444466828489923, "loss": 0.1405, "step": 538 }, { "epoch": 1.02, "grad_norm": 0.7067221925471963, "learning_rate": 0.00018437863948802315, "loss": 0.1409, "step": 539 }, { "epoch": 1.03, "grad_norm": 0.7860935704345432, "learning_rate": 0.00018431248272022716, "loss": 0.1842, "step": 540 }, { "epoch": 1.03, "grad_norm": 0.6603847667707058, "learning_rate": 0.0001842461980818464, "loss": 0.1739, "step": 541 }, { "epoch": 1.03, "grad_norm": 0.6894212939553926, "learning_rate": 0.00018417978567340996, "loss": 0.1697, "step": 542 }, { "epoch": 1.03, "grad_norm": 0.7201701072784407, "learning_rate": 0.0001841132455956406, "loss": 0.1811, "step": 543 }, { "epoch": 1.03, "grad_norm": 0.8733877698046052, "learning_rate": 0.0001840465779494549, "loss": 0.2357, "step": 544 }, { "epoch": 1.04, "grad_norm": 0.7829823954809882, "learning_rate": 0.00018397978283596274, "loss": 0.1621, "step": 545 }, { "epoch": 1.04, "grad_norm": 0.6816321161120285, "learning_rate": 0.0001839128603564675, "loss": 0.174, "step": 546 }, { "epoch": 1.04, "grad_norm": 0.932289749254496, "learning_rate": 0.00018384581061246547, "loss": 0.2239, "step": 547 }, { "epoch": 1.04, "grad_norm": 0.7370846714040882, "learning_rate": 0.00018377863370564617, "loss": 0.206, "step": 548 }, { "epoch": 1.04, "grad_norm": 0.7751222247963414, "learning_rate": 0.00018371132973789192, "loss": 0.1989, "step": 549 }, { "epoch": 1.05, "grad_norm": 0.6646740482106583, "learning_rate": 0.0001836438988112777, "loss": 0.1843, "step": 550 }, { "epoch": 1.05, "grad_norm": 0.8160625426882251, "learning_rate": 0.00018357634102807112, "loss": 0.2295, "step": 551 }, { "epoch": 1.05, "grad_norm": 0.7709914014974445, "learning_rate": 0.0001835086564907321, "loss": 0.1527, "step": 552 }, { "epoch": 1.05, "grad_norm": 0.6576540831558555, "learning_rate": 0.00018344084530191282, "loss": 0.1685, "step": 553 }, { "epoch": 1.05, "grad_norm": 0.7404511688778156, "learning_rate": 0.00018337290756445757, "loss": 0.1695, "step": 554 }, { "epoch": 1.06, "grad_norm": 0.6562771631997727, "learning_rate": 0.0001833048433814026, "loss": 0.1301, "step": 555 }, { "epoch": 1.06, "grad_norm": 0.8423127212808739, "learning_rate": 0.00018323665285597583, "loss": 0.2216, "step": 556 }, { "epoch": 1.06, "grad_norm": 0.7533482652362475, "learning_rate": 0.00018316833609159682, "loss": 0.1802, "step": 557 }, { "epoch": 1.06, "grad_norm": 0.6832228076177387, "learning_rate": 0.00018309989319187673, "loss": 0.1767, "step": 558 }, { "epoch": 1.06, "grad_norm": 0.8553554619120222, "learning_rate": 0.00018303132426061782, "loss": 0.1997, "step": 559 }, { "epoch": 1.06, "grad_norm": 0.68297241613415, "learning_rate": 0.0001829626294018136, "loss": 0.1779, "step": 560 }, { "epoch": 1.07, "grad_norm": 0.7079636600509892, "learning_rate": 0.0001828938087196485, "loss": 0.1747, "step": 561 }, { "epoch": 1.07, "grad_norm": 0.5962882906471558, "learning_rate": 0.00018282486231849794, "loss": 0.1454, "step": 562 }, { "epoch": 1.07, "grad_norm": 0.6784431496569886, "learning_rate": 0.00018275579030292778, "loss": 0.1605, "step": 563 }, { "epoch": 1.07, "grad_norm": 0.8997481596223621, "learning_rate": 0.00018268659277769457, "loss": 0.1761, "step": 564 }, { "epoch": 1.07, "grad_norm": 0.5267405203952372, "learning_rate": 0.00018261726984774508, "loss": 0.1202, "step": 565 }, { "epoch": 1.08, "grad_norm": 0.7868423383529788, "learning_rate": 0.0001825478216182164, "loss": 0.2038, "step": 566 }, { "epoch": 1.08, "grad_norm": 0.712509802101007, "learning_rate": 0.00018247824819443554, "loss": 0.1792, "step": 567 }, { "epoch": 1.08, "grad_norm": 0.5857691755995014, "learning_rate": 0.00018240854968191945, "loss": 0.1112, "step": 568 }, { "epoch": 1.08, "grad_norm": 0.8529165088539624, "learning_rate": 0.00018233872618637475, "loss": 0.1952, "step": 569 }, { "epoch": 1.08, "grad_norm": 0.7710700157025805, "learning_rate": 0.0001822687778136977, "loss": 0.1735, "step": 570 }, { "epoch": 1.09, "grad_norm": 0.8490794167720118, "learning_rate": 0.0001821987046699738, "loss": 0.222, "step": 571 }, { "epoch": 1.09, "grad_norm": 0.7898967912885708, "learning_rate": 0.00018212850686147793, "loss": 0.1824, "step": 572 }, { "epoch": 1.09, "grad_norm": 0.6685765138692875, "learning_rate": 0.00018205818449467398, "loss": 0.1775, "step": 573 }, { "epoch": 1.09, "grad_norm": 0.7333054041954968, "learning_rate": 0.0001819877376762147, "loss": 0.1741, "step": 574 }, { "epoch": 1.09, "grad_norm": 1.0068035221891851, "learning_rate": 0.00018191716651294167, "loss": 0.2715, "step": 575 }, { "epoch": 1.1, "grad_norm": 0.5961800412169528, "learning_rate": 0.00018184647111188506, "loss": 0.126, "step": 576 }, { "epoch": 1.1, "grad_norm": 0.8282139615296287, "learning_rate": 0.00018177565158026334, "loss": 0.2252, "step": 577 }, { "epoch": 1.1, "grad_norm": 0.786210197002174, "learning_rate": 0.00018170470802548336, "loss": 0.2158, "step": 578 }, { "epoch": 1.1, "grad_norm": 0.7137667324770188, "learning_rate": 0.00018163364055514002, "loss": 0.1796, "step": 579 }, { "epoch": 1.1, "grad_norm": 0.5913274775045431, "learning_rate": 0.00018156244927701616, "loss": 0.1592, "step": 580 }, { "epoch": 1.1, "grad_norm": 0.6298059933497385, "learning_rate": 0.00018149113429908242, "loss": 0.1636, "step": 581 }, { "epoch": 1.11, "grad_norm": 0.6920173750884634, "learning_rate": 0.000181419695729497, "loss": 0.1843, "step": 582 }, { "epoch": 1.11, "grad_norm": 0.7311778207893501, "learning_rate": 0.00018134813367660555, "loss": 0.1872, "step": 583 }, { "epoch": 1.11, "grad_norm": 0.8065477612639352, "learning_rate": 0.00018127644824894102, "loss": 0.203, "step": 584 }, { "epoch": 1.11, "grad_norm": 0.6865343646333009, "learning_rate": 0.00018120463955522345, "loss": 0.1849, "step": 585 }, { "epoch": 1.11, "grad_norm": 0.7175193051236145, "learning_rate": 0.00018113270770435985, "loss": 0.1854, "step": 586 }, { "epoch": 1.12, "grad_norm": 0.8079208793282562, "learning_rate": 0.000181060652805444, "loss": 0.2076, "step": 587 }, { "epoch": 1.12, "grad_norm": 0.7205804575107025, "learning_rate": 0.00018098847496775635, "loss": 0.1808, "step": 588 }, { "epoch": 1.12, "grad_norm": 0.6863626186137156, "learning_rate": 0.0001809161743007637, "loss": 0.1881, "step": 589 }, { "epoch": 1.12, "grad_norm": 0.74734839000031, "learning_rate": 0.0001808437509141192, "loss": 0.142, "step": 590 }, { "epoch": 1.12, "grad_norm": 0.6557729330865883, "learning_rate": 0.00018077120491766208, "loss": 0.1994, "step": 591 }, { "epoch": 1.13, "grad_norm": 0.6780195764743244, "learning_rate": 0.00018069853642141762, "loss": 0.1526, "step": 592 }, { "epoch": 1.13, "grad_norm": 1.6180744869168515, "learning_rate": 0.0001806257455355968, "loss": 0.2083, "step": 593 }, { "epoch": 1.13, "grad_norm": 0.7995678856368128, "learning_rate": 0.00018055283237059622, "loss": 0.2091, "step": 594 }, { "epoch": 1.13, "grad_norm": 0.6960205132974582, "learning_rate": 0.00018047979703699797, "loss": 0.1563, "step": 595 }, { "epoch": 1.13, "grad_norm": 0.7737093353282247, "learning_rate": 0.0001804066396455694, "loss": 0.1543, "step": 596 }, { "epoch": 1.13, "grad_norm": 0.8588160375883382, "learning_rate": 0.000180333360307263, "loss": 0.2144, "step": 597 }, { "epoch": 1.14, "grad_norm": 0.8262989518227583, "learning_rate": 0.00018025995913321615, "loss": 0.2253, "step": 598 }, { "epoch": 1.14, "grad_norm": 0.5904834769570555, "learning_rate": 0.00018018643623475105, "loss": 0.1156, "step": 599 }, { "epoch": 1.14, "grad_norm": 0.5724974357598809, "learning_rate": 0.00018011279172337456, "loss": 0.101, "step": 600 }, { "epoch": 1.14, "eval_blimp_filtered_avg": 0.7116417910447761, "eval_blimp_filtered_std": 0.0049767975476872435, "step": 600 }, { "epoch": 1.14, "eval_blimp_supplement_avg": 0.8168103448275862, "eval_blimp_supplement_std": 0.016572021101987022, "step": 600 }, { "epoch": 1.14, "eval_vqa_filtered_avg": 0.52, "eval_vqa_filtered_std": 0.05021167315686779, "step": 600 }, { "epoch": 1.14, "eval_winoground_filtered_avg": 0.65, "eval_winoground_filtered_std": 0.047937248544110196, "step": 600 }, { "epoch": 1.14, "grad_norm": 0.6072337671429158, "learning_rate": 0.00018003902571077794, "loss": 0.1393, "step": 601 }, { "epoch": 1.14, "grad_norm": 0.5760813527811247, "learning_rate": 0.00017996513830883664, "loss": 0.1281, "step": 602 }, { "epoch": 1.15, "grad_norm": 0.6312651429252463, "learning_rate": 0.00017989112962961033, "loss": 0.1228, "step": 603 }, { "epoch": 1.15, "grad_norm": 0.772712341565609, "learning_rate": 0.00017981699978534256, "loss": 0.1641, "step": 604 }, { "epoch": 1.15, "grad_norm": 0.827402803364175, "learning_rate": 0.00017974274888846065, "loss": 0.2291, "step": 605 }, { "epoch": 1.15, "grad_norm": 0.7333007808660956, "learning_rate": 0.0001796683770515755, "loss": 0.1449, "step": 606 }, { "epoch": 1.15, "grad_norm": 0.8670722093848082, "learning_rate": 0.00017959388438748151, "loss": 0.1881, "step": 607 }, { "epoch": 1.16, "grad_norm": 0.7171063968028948, "learning_rate": 0.00017951927100915617, "loss": 0.169, "step": 608 }, { "epoch": 1.16, "grad_norm": 0.6882672139484538, "learning_rate": 0.00017944453702976022, "loss": 0.1657, "step": 609 }, { "epoch": 1.16, "grad_norm": 0.7955966213434372, "learning_rate": 0.00017936968256263719, "loss": 0.1911, "step": 610 }, { "epoch": 1.16, "grad_norm": 0.7209134484578615, "learning_rate": 0.00017929470772131336, "loss": 0.1451, "step": 611 }, { "epoch": 1.16, "grad_norm": 0.7367617038507538, "learning_rate": 0.00017921961261949763, "loss": 0.1659, "step": 612 }, { "epoch": 1.17, "grad_norm": 0.6317232917268062, "learning_rate": 0.00017914439737108128, "loss": 0.1508, "step": 613 }, { "epoch": 1.17, "grad_norm": 0.8018650192338301, "learning_rate": 0.0001790690620901377, "loss": 0.2371, "step": 614 }, { "epoch": 1.17, "grad_norm": 0.7956390055973336, "learning_rate": 0.00017899360689092248, "loss": 0.1536, "step": 615 }, { "epoch": 1.17, "grad_norm": 0.7036749431484245, "learning_rate": 0.000178918031887873, "loss": 0.1633, "step": 616 }, { "epoch": 1.17, "grad_norm": 0.6517817414955687, "learning_rate": 0.00017884233719560832, "loss": 0.1703, "step": 617 }, { "epoch": 1.17, "grad_norm": 0.7389186829134701, "learning_rate": 0.0001787665229289291, "loss": 0.1834, "step": 618 }, { "epoch": 1.18, "grad_norm": 0.7508811717442017, "learning_rate": 0.00017869058920281727, "loss": 0.1748, "step": 619 }, { "epoch": 1.18, "grad_norm": 0.6469588882603385, "learning_rate": 0.00017861453613243593, "loss": 0.1382, "step": 620 }, { "epoch": 1.18, "grad_norm": 0.6194359209470729, "learning_rate": 0.0001785383638331293, "loss": 0.1624, "step": 621 }, { "epoch": 1.18, "grad_norm": 0.6839401947778359, "learning_rate": 0.00017846207242042228, "loss": 0.1745, "step": 622 }, { "epoch": 1.18, "grad_norm": 0.7314190058203299, "learning_rate": 0.00017838566201002046, "loss": 0.1915, "step": 623 }, { "epoch": 1.19, "grad_norm": 0.6173653618772352, "learning_rate": 0.00017830913271781005, "loss": 0.1572, "step": 624 }, { "epoch": 1.19, "grad_norm": 0.5945650322249489, "learning_rate": 0.00017823248465985732, "loss": 0.1697, "step": 625 }, { "epoch": 1.19, "grad_norm": 0.570180906323657, "learning_rate": 0.0001781557179524088, "loss": 0.1495, "step": 626 }, { "epoch": 1.19, "grad_norm": 0.5811848891544852, "learning_rate": 0.00017807883271189098, "loss": 0.129, "step": 627 }, { "epoch": 1.19, "grad_norm": 0.6055393781495153, "learning_rate": 0.0001780018290549101, "loss": 0.1445, "step": 628 }, { "epoch": 1.2, "grad_norm": 0.6584653743213179, "learning_rate": 0.00017792470709825193, "loss": 0.1539, "step": 629 }, { "epoch": 1.2, "grad_norm": 0.7008780845051042, "learning_rate": 0.00017784746695888174, "loss": 0.144, "step": 630 }, { "epoch": 1.2, "grad_norm": 0.7400826849297436, "learning_rate": 0.00017777010875394403, "loss": 0.1561, "step": 631 }, { "epoch": 1.2, "grad_norm": 0.6573288023382563, "learning_rate": 0.00017769263260076232, "loss": 0.1483, "step": 632 }, { "epoch": 1.2, "grad_norm": 0.6418977886872678, "learning_rate": 0.000177615038616839, "loss": 0.1559, "step": 633 }, { "epoch": 1.21, "grad_norm": 0.6873239208406079, "learning_rate": 0.00017753732691985525, "loss": 0.1515, "step": 634 }, { "epoch": 1.21, "grad_norm": 0.6914292158945461, "learning_rate": 0.00017745949762767072, "loss": 0.1536, "step": 635 }, { "epoch": 1.21, "grad_norm": 0.6690154172724786, "learning_rate": 0.00017738155085832337, "loss": 0.146, "step": 636 }, { "epoch": 1.21, "grad_norm": 0.6392854184175377, "learning_rate": 0.0001773034867300294, "loss": 0.1431, "step": 637 }, { "epoch": 1.21, "grad_norm": 0.7305760748912644, "learning_rate": 0.000177225305361183, "loss": 0.1853, "step": 638 }, { "epoch": 1.21, "grad_norm": 0.7559423971083366, "learning_rate": 0.00017714700687035607, "loss": 0.2006, "step": 639 }, { "epoch": 1.22, "grad_norm": 0.6853070295068343, "learning_rate": 0.00017706859137629825, "loss": 0.1554, "step": 640 }, { "epoch": 1.22, "grad_norm": 0.7369879734394859, "learning_rate": 0.0001769900589979366, "loss": 0.2116, "step": 641 }, { "epoch": 1.22, "grad_norm": 0.7789928014062807, "learning_rate": 0.0001769114098543755, "loss": 0.2045, "step": 642 }, { "epoch": 1.22, "grad_norm": 0.8138610813280952, "learning_rate": 0.00017683264406489625, "loss": 0.2091, "step": 643 }, { "epoch": 1.22, "grad_norm": 0.6719977990162087, "learning_rate": 0.00017675376174895724, "loss": 0.1687, "step": 644 }, { "epoch": 1.23, "grad_norm": 0.669520099952342, "learning_rate": 0.00017667476302619354, "loss": 0.1591, "step": 645 }, { "epoch": 1.23, "grad_norm": 0.5224287164733763, "learning_rate": 0.00017659564801641664, "loss": 0.1238, "step": 646 }, { "epoch": 1.23, "grad_norm": 0.6084034320934578, "learning_rate": 0.00017651641683961457, "loss": 0.1588, "step": 647 }, { "epoch": 1.23, "grad_norm": 0.6653867612857268, "learning_rate": 0.00017643706961595148, "loss": 0.1958, "step": 648 }, { "epoch": 1.23, "grad_norm": 0.793192817553755, "learning_rate": 0.00017635760646576748, "loss": 0.2052, "step": 649 }, { "epoch": 1.24, "grad_norm": 0.6707405243308575, "learning_rate": 0.00017627802750957853, "loss": 0.1591, "step": 650 }, { "epoch": 1.24, "grad_norm": 0.6944323867273439, "learning_rate": 0.00017619833286807625, "loss": 0.1782, "step": 651 }, { "epoch": 1.24, "grad_norm": 0.7172230728861968, "learning_rate": 0.00017611852266212762, "loss": 0.1481, "step": 652 }, { "epoch": 1.24, "grad_norm": 0.5370227314209999, "learning_rate": 0.00017603859701277502, "loss": 0.114, "step": 653 }, { "epoch": 1.24, "grad_norm": 0.7504757489576531, "learning_rate": 0.0001759585560412358, "loss": 0.1913, "step": 654 }, { "epoch": 1.25, "grad_norm": 0.8409877337326641, "learning_rate": 0.00017587839986890228, "loss": 0.1618, "step": 655 }, { "epoch": 1.25, "grad_norm": 0.5853720105621518, "learning_rate": 0.00017579812861734143, "loss": 0.133, "step": 656 }, { "epoch": 1.25, "grad_norm": 1.0486350545840877, "learning_rate": 0.00017571774240829487, "loss": 0.2681, "step": 657 }, { "epoch": 1.25, "grad_norm": 0.7091395853169653, "learning_rate": 0.00017563724136367842, "loss": 0.1851, "step": 658 }, { "epoch": 1.25, "grad_norm": 0.6981627519864624, "learning_rate": 0.0001755566256055822, "loss": 0.1987, "step": 659 }, { "epoch": 1.25, "grad_norm": 0.6323995397466856, "learning_rate": 0.00017547589525627018, "loss": 0.1349, "step": 660 }, { "epoch": 1.26, "grad_norm": 0.7294587661978843, "learning_rate": 0.0001753950504381802, "loss": 0.1593, "step": 661 }, { "epoch": 1.26, "grad_norm": 0.5690867556550939, "learning_rate": 0.00017531409127392373, "loss": 0.1396, "step": 662 }, { "epoch": 1.26, "grad_norm": 0.6185883063002445, "learning_rate": 0.00017523301788628556, "loss": 0.175, "step": 663 }, { "epoch": 1.26, "grad_norm": 0.5775134222578471, "learning_rate": 0.00017515183039822383, "loss": 0.1503, "step": 664 }, { "epoch": 1.26, "grad_norm": 0.6198479134679262, "learning_rate": 0.00017507052893286966, "loss": 0.1303, "step": 665 }, { "epoch": 1.27, "grad_norm": 0.751370448170556, "learning_rate": 0.00017498911361352702, "loss": 0.2293, "step": 666 }, { "epoch": 1.27, "grad_norm": 0.5593964378295713, "learning_rate": 0.0001749075845636726, "loss": 0.1042, "step": 667 }, { "epoch": 1.27, "grad_norm": 0.7480469239097289, "learning_rate": 0.00017482594190695557, "loss": 0.188, "step": 668 }, { "epoch": 1.27, "grad_norm": 0.5547242160925578, "learning_rate": 0.00017474418576719734, "loss": 0.1222, "step": 669 }, { "epoch": 1.27, "grad_norm": 0.6431121333630164, "learning_rate": 0.0001746623162683915, "loss": 0.1313, "step": 670 }, { "epoch": 1.28, "grad_norm": 0.6741730303451717, "learning_rate": 0.00017458033353470354, "loss": 0.1414, "step": 671 }, { "epoch": 1.28, "grad_norm": 0.8188519323365596, "learning_rate": 0.00017449823769047072, "loss": 0.1591, "step": 672 }, { "epoch": 1.28, "grad_norm": 0.7753965207735892, "learning_rate": 0.0001744160288602018, "loss": 0.1715, "step": 673 }, { "epoch": 1.28, "grad_norm": 0.7790894324704803, "learning_rate": 0.0001743337071685768, "loss": 0.144, "step": 674 }, { "epoch": 1.28, "grad_norm": 0.7597374034722125, "learning_rate": 0.00017425127274044714, "loss": 0.1877, "step": 675 }, { "epoch": 1.29, "grad_norm": 0.6178955957838258, "learning_rate": 0.00017416872570083508, "loss": 0.1243, "step": 676 }, { "epoch": 1.29, "grad_norm": 0.6145969194808465, "learning_rate": 0.00017408606617493367, "loss": 0.1471, "step": 677 }, { "epoch": 1.29, "grad_norm": 0.5501674878414058, "learning_rate": 0.00017400329428810655, "loss": 0.1044, "step": 678 }, { "epoch": 1.29, "grad_norm": 0.648025314336819, "learning_rate": 0.00017392041016588781, "loss": 0.1542, "step": 679 }, { "epoch": 1.29, "grad_norm": 0.7135644760775234, "learning_rate": 0.00017383741393398177, "loss": 0.1704, "step": 680 }, { "epoch": 1.29, "grad_norm": 0.6925234228037228, "learning_rate": 0.00017375430571826277, "loss": 0.186, "step": 681 }, { "epoch": 1.3, "grad_norm": 0.6495489723048841, "learning_rate": 0.0001736710856447749, "loss": 0.1556, "step": 682 }, { "epoch": 1.3, "grad_norm": 0.597593927735184, "learning_rate": 0.00017358775383973201, "loss": 0.1473, "step": 683 }, { "epoch": 1.3, "grad_norm": 0.5958655144398192, "learning_rate": 0.00017350431042951735, "loss": 0.1291, "step": 684 }, { "epoch": 1.3, "grad_norm": 0.581561580306994, "learning_rate": 0.00017342075554068343, "loss": 0.1492, "step": 685 }, { "epoch": 1.3, "grad_norm": 0.6293388416556095, "learning_rate": 0.00017333708929995192, "loss": 0.1737, "step": 686 }, { "epoch": 1.31, "grad_norm": 0.5967562059240008, "learning_rate": 0.00017325331183421324, "loss": 0.1497, "step": 687 }, { "epoch": 1.31, "grad_norm": 0.6521657636136647, "learning_rate": 0.00017316942327052652, "loss": 0.1353, "step": 688 }, { "epoch": 1.31, "grad_norm": 0.7345496367194667, "learning_rate": 0.00017308542373611948, "loss": 0.1428, "step": 689 }, { "epoch": 1.31, "grad_norm": 0.6482156528734515, "learning_rate": 0.00017300131335838806, "loss": 0.155, "step": 690 }, { "epoch": 1.31, "grad_norm": 0.6774039332851115, "learning_rate": 0.00017291709226489635, "loss": 0.1813, "step": 691 }, { "epoch": 1.32, "grad_norm": 0.6582826079887422, "learning_rate": 0.00017283276058337624, "loss": 0.17, "step": 692 }, { "epoch": 1.32, "grad_norm": 0.5957273685269454, "learning_rate": 0.00017274831844172757, "loss": 0.1303, "step": 693 }, { "epoch": 1.32, "grad_norm": 0.5518840435461293, "learning_rate": 0.00017266376596801743, "loss": 0.1233, "step": 694 }, { "epoch": 1.32, "grad_norm": 0.6810742952023181, "learning_rate": 0.0001725791032904805, "loss": 0.1453, "step": 695 }, { "epoch": 1.32, "grad_norm": 0.6298751460255544, "learning_rate": 0.0001724943305375184, "loss": 0.1259, "step": 696 }, { "epoch": 1.33, "grad_norm": 0.5498400688744579, "learning_rate": 0.0001724094478376998, "loss": 0.1336, "step": 697 }, { "epoch": 1.33, "grad_norm": 0.6808449145911093, "learning_rate": 0.00017232445531976012, "loss": 0.1495, "step": 698 }, { "epoch": 1.33, "grad_norm": 0.7113660327882848, "learning_rate": 0.00017223935311260125, "loss": 0.1484, "step": 699 }, { "epoch": 1.33, "grad_norm": 0.8381487312510373, "learning_rate": 0.00017215414134529153, "loss": 0.1942, "step": 700 }, { "epoch": 1.33, "eval_blimp_filtered_avg": 0.721044776119403, "eval_blimp_filtered_std": 0.004941974589093023, "step": 700 }, { "epoch": 1.33, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.01766072877873868, "step": 700 }, { "epoch": 1.33, "eval_vqa_filtered_avg": 0.53, "eval_vqa_filtered_std": 0.0501613558046592, "step": 700 }, { "epoch": 1.33, "eval_winoground_filtered_avg": 0.67, "eval_winoground_filtered_std": 0.04725815626252607, "step": 700 }, { "epoch": 1.33, "grad_norm": 0.6535647762444462, "learning_rate": 0.0001720688201470654, "loss": 0.1336, "step": 701 }, { "epoch": 1.33, "grad_norm": 0.6592659154564937, "learning_rate": 0.00017198338964732334, "loss": 0.1304, "step": 702 }, { "epoch": 1.34, "grad_norm": 0.8932419062442624, "learning_rate": 0.00017189784997563147, "loss": 0.183, "step": 703 }, { "epoch": 1.34, "grad_norm": 0.7691052097946697, "learning_rate": 0.00017181220126172164, "loss": 0.1989, "step": 704 }, { "epoch": 1.34, "grad_norm": 0.6817488605732711, "learning_rate": 0.00017172644363549092, "loss": 0.1428, "step": 705 }, { "epoch": 1.34, "grad_norm": 0.6922135717438749, "learning_rate": 0.00017164057722700174, "loss": 0.1603, "step": 706 }, { "epoch": 1.34, "grad_norm": 0.5501834367535323, "learning_rate": 0.00017155460216648131, "loss": 0.114, "step": 707 }, { "epoch": 1.35, "grad_norm": 0.48257382191063675, "learning_rate": 0.0001714685185843218, "loss": 0.1247, "step": 708 }, { "epoch": 1.35, "grad_norm": 0.5688998093697579, "learning_rate": 0.00017138232661107982, "loss": 0.1552, "step": 709 }, { "epoch": 1.35, "grad_norm": 0.6164161467146562, "learning_rate": 0.0001712960263774765, "loss": 0.1561, "step": 710 }, { "epoch": 1.35, "grad_norm": 0.9782417002377043, "learning_rate": 0.0001712096180143971, "loss": 0.2064, "step": 711 }, { "epoch": 1.35, "grad_norm": 0.6248139849831165, "learning_rate": 0.00017112310165289082, "loss": 0.1785, "step": 712 }, { "epoch": 1.36, "grad_norm": 0.5828319802456879, "learning_rate": 0.0001710364774241708, "loss": 0.1446, "step": 713 }, { "epoch": 1.36, "grad_norm": 0.5274316018078327, "learning_rate": 0.00017094974545961357, "loss": 0.149, "step": 714 }, { "epoch": 1.36, "grad_norm": 0.6080346341587625, "learning_rate": 0.00017086290589075925, "loss": 0.1472, "step": 715 }, { "epoch": 1.36, "grad_norm": 0.7703865298339624, "learning_rate": 0.000170775958849311, "loss": 0.2076, "step": 716 }, { "epoch": 1.36, "grad_norm": 0.7546333086547935, "learning_rate": 0.00017068890446713512, "loss": 0.1594, "step": 717 }, { "epoch": 1.37, "grad_norm": 0.6806863412430921, "learning_rate": 0.0001706017428762606, "loss": 0.1315, "step": 718 }, { "epoch": 1.37, "grad_norm": 0.7664282680251971, "learning_rate": 0.00017051447420887906, "loss": 0.1636, "step": 719 }, { "epoch": 1.37, "grad_norm": 0.6711100886673493, "learning_rate": 0.00017042709859734455, "loss": 0.1564, "step": 720 }, { "epoch": 1.37, "grad_norm": 0.8612347907788099, "learning_rate": 0.0001703396161741732, "loss": 0.1355, "step": 721 }, { "epoch": 1.37, "grad_norm": 0.5321314201095316, "learning_rate": 0.00017025202707204325, "loss": 0.1303, "step": 722 }, { "epoch": 1.37, "grad_norm": 0.826967674318077, "learning_rate": 0.00017016433142379473, "loss": 0.1651, "step": 723 }, { "epoch": 1.38, "grad_norm": 0.6015428617781767, "learning_rate": 0.0001700765293624292, "loss": 0.1525, "step": 724 }, { "epoch": 1.38, "grad_norm": 0.6493672296134518, "learning_rate": 0.0001699886210211096, "loss": 0.142, "step": 725 }, { "epoch": 1.38, "grad_norm": 0.6230608258121246, "learning_rate": 0.00016990060653316013, "loss": 0.1597, "step": 726 }, { "epoch": 1.38, "grad_norm": 0.6776787511474927, "learning_rate": 0.00016981248603206592, "loss": 0.1206, "step": 727 }, { "epoch": 1.38, "grad_norm": 0.6223438507529719, "learning_rate": 0.00016972425965147293, "loss": 0.1338, "step": 728 }, { "epoch": 1.39, "grad_norm": 0.6967978395013763, "learning_rate": 0.00016963592752518763, "loss": 0.1596, "step": 729 }, { "epoch": 1.39, "grad_norm": 0.6122169114736753, "learning_rate": 0.0001695474897871769, "loss": 0.1531, "step": 730 }, { "epoch": 1.39, "grad_norm": 0.8354850445539928, "learning_rate": 0.00016945894657156784, "loss": 0.1416, "step": 731 }, { "epoch": 1.39, "grad_norm": 0.6248643708836904, "learning_rate": 0.00016937029801264742, "loss": 0.1347, "step": 732 }, { "epoch": 1.39, "grad_norm": 0.7900242063479574, "learning_rate": 0.0001692815442448625, "loss": 0.1776, "step": 733 }, { "epoch": 1.4, "grad_norm": 0.6570560646409306, "learning_rate": 0.00016919268540281936, "loss": 0.1564, "step": 734 }, { "epoch": 1.4, "grad_norm": 0.5580787619664727, "learning_rate": 0.00016910372162128382, "loss": 0.0967, "step": 735 }, { "epoch": 1.4, "grad_norm": 0.6697470679588717, "learning_rate": 0.00016901465303518064, "loss": 0.1255, "step": 736 }, { "epoch": 1.4, "grad_norm": 0.8337973279319283, "learning_rate": 0.0001689254797795937, "loss": 0.1916, "step": 737 }, { "epoch": 1.4, "grad_norm": 0.7503277966013299, "learning_rate": 0.00016883620198976558, "loss": 0.1759, "step": 738 }, { "epoch": 1.4, "grad_norm": 0.7427133008094162, "learning_rate": 0.00016874681980109734, "loss": 0.1789, "step": 739 }, { "epoch": 1.41, "grad_norm": 0.7444474281330717, "learning_rate": 0.00016865733334914845, "loss": 0.1704, "step": 740 }, { "epoch": 1.41, "grad_norm": 0.6554556919500664, "learning_rate": 0.00016856774276963646, "loss": 0.168, "step": 741 }, { "epoch": 1.41, "grad_norm": 0.686418619542461, "learning_rate": 0.00016847804819843684, "loss": 0.1766, "step": 742 }, { "epoch": 1.41, "grad_norm": 0.555191524616281, "learning_rate": 0.00016838824977158284, "loss": 0.1349, "step": 743 }, { "epoch": 1.41, "grad_norm": 0.688587528475618, "learning_rate": 0.00016829834762526513, "loss": 0.1919, "step": 744 }, { "epoch": 1.42, "grad_norm": 0.6101212129187827, "learning_rate": 0.00016820834189583175, "loss": 0.1703, "step": 745 }, { "epoch": 1.42, "grad_norm": 0.5724345299376605, "learning_rate": 0.00016811823271978784, "loss": 0.1394, "step": 746 }, { "epoch": 1.42, "grad_norm": 0.7265677036948114, "learning_rate": 0.00016802802023379538, "loss": 0.1593, "step": 747 }, { "epoch": 1.42, "grad_norm": 0.6509488956293229, "learning_rate": 0.00016793770457467302, "loss": 0.1291, "step": 748 }, { "epoch": 1.42, "grad_norm": 0.7802769896487252, "learning_rate": 0.00016784728587939602, "loss": 0.1978, "step": 749 }, { "epoch": 1.43, "grad_norm": 0.7049230597478269, "learning_rate": 0.0001677567642850957, "loss": 0.1928, "step": 750 }, { "epoch": 1.43, "grad_norm": 0.6901356706135111, "learning_rate": 0.00016766613992905965, "loss": 0.1461, "step": 751 }, { "epoch": 1.43, "grad_norm": 0.5770222511184242, "learning_rate": 0.00016757541294873117, "loss": 0.1093, "step": 752 }, { "epoch": 1.43, "grad_norm": 0.7561016364161574, "learning_rate": 0.00016748458348170924, "loss": 0.1547, "step": 753 }, { "epoch": 1.43, "grad_norm": 0.6945747490923173, "learning_rate": 0.00016739365166574827, "loss": 0.1906, "step": 754 }, { "epoch": 1.44, "grad_norm": 0.7221913273834322, "learning_rate": 0.0001673026176387579, "loss": 0.1452, "step": 755 }, { "epoch": 1.44, "grad_norm": 0.5631107633289717, "learning_rate": 0.00016721148153880285, "loss": 0.1379, "step": 756 }, { "epoch": 1.44, "grad_norm": 0.7221127019488849, "learning_rate": 0.00016712024350410253, "loss": 0.1738, "step": 757 }, { "epoch": 1.44, "grad_norm": 0.6899243314785456, "learning_rate": 0.00016702890367303102, "loss": 0.1464, "step": 758 }, { "epoch": 1.44, "grad_norm": 0.6990507736294583, "learning_rate": 0.00016693746218411677, "loss": 0.1606, "step": 759 }, { "epoch": 1.44, "grad_norm": 0.8728344004394792, "learning_rate": 0.0001668459191760424, "loss": 0.2399, "step": 760 }, { "epoch": 1.45, "grad_norm": 0.5933646419442087, "learning_rate": 0.00016675427478764448, "loss": 0.1512, "step": 761 }, { "epoch": 1.45, "grad_norm": 0.5813001124102621, "learning_rate": 0.00016666252915791346, "loss": 0.1593, "step": 762 }, { "epoch": 1.45, "grad_norm": 0.6569121392530403, "learning_rate": 0.00016657068242599313, "loss": 0.1746, "step": 763 }, { "epoch": 1.45, "grad_norm": 0.587750920145892, "learning_rate": 0.00016647873473118075, "loss": 0.1078, "step": 764 }, { "epoch": 1.45, "grad_norm": 0.6406963155293464, "learning_rate": 0.00016638668621292668, "loss": 0.1431, "step": 765 }, { "epoch": 1.46, "grad_norm": 0.6514858826674613, "learning_rate": 0.0001662945370108342, "loss": 0.1657, "step": 766 }, { "epoch": 1.46, "grad_norm": 0.6000346033912863, "learning_rate": 0.00016620228726465922, "loss": 0.1401, "step": 767 }, { "epoch": 1.46, "grad_norm": 0.5590604692454315, "learning_rate": 0.00016610993711431028, "loss": 0.1133, "step": 768 }, { "epoch": 1.46, "grad_norm": 0.6871757460294788, "learning_rate": 0.00016601748669984806, "loss": 0.1554, "step": 769 }, { "epoch": 1.46, "grad_norm": 0.7311303719206016, "learning_rate": 0.00016592493616148535, "loss": 0.1753, "step": 770 }, { "epoch": 1.47, "grad_norm": 0.5410255457060548, "learning_rate": 0.00016583228563958678, "loss": 0.0925, "step": 771 }, { "epoch": 1.47, "grad_norm": 0.8309845197985601, "learning_rate": 0.00016573953527466864, "loss": 0.2083, "step": 772 }, { "epoch": 1.47, "grad_norm": 0.5782432188712097, "learning_rate": 0.00016564668520739867, "loss": 0.1255, "step": 773 }, { "epoch": 1.47, "grad_norm": 0.6029089169364341, "learning_rate": 0.00016555373557859573, "loss": 0.1178, "step": 774 }, { "epoch": 1.47, "grad_norm": 0.5745630715025581, "learning_rate": 0.00016546068652922976, "loss": 0.131, "step": 775 }, { "epoch": 1.48, "grad_norm": 0.6673301780602401, "learning_rate": 0.0001653675382004215, "loss": 0.1808, "step": 776 }, { "epoch": 1.48, "grad_norm": 0.5350078880995204, "learning_rate": 0.0001652742907334421, "loss": 0.1251, "step": 777 }, { "epoch": 1.48, "grad_norm": 0.6749099393878708, "learning_rate": 0.0001651809442697133, "loss": 0.1912, "step": 778 }, { "epoch": 1.48, "grad_norm": 0.6737556699144797, "learning_rate": 0.0001650874989508068, "loss": 0.1421, "step": 779 }, { "epoch": 1.48, "grad_norm": 0.7622906292095792, "learning_rate": 0.0001649939549184443, "loss": 0.1758, "step": 780 }, { "epoch": 1.48, "grad_norm": 0.49807299728548804, "learning_rate": 0.00016490031231449726, "loss": 0.1036, "step": 781 }, { "epoch": 1.49, "grad_norm": 0.6441718475928477, "learning_rate": 0.0001648065712809865, "loss": 0.1214, "step": 782 }, { "epoch": 1.49, "grad_norm": 0.6244331010569716, "learning_rate": 0.00016471273196008224, "loss": 0.1686, "step": 783 }, { "epoch": 1.49, "grad_norm": 0.6519250681347502, "learning_rate": 0.0001646187944941037, "loss": 0.1311, "step": 784 }, { "epoch": 1.49, "grad_norm": 0.6897593863790831, "learning_rate": 0.00016452475902551902, "loss": 0.163, "step": 785 }, { "epoch": 1.49, "grad_norm": 0.783612559161525, "learning_rate": 0.00016443062569694483, "loss": 0.196, "step": 786 }, { "epoch": 1.5, "grad_norm": 0.7913943669895579, "learning_rate": 0.00016433639465114638, "loss": 0.1761, "step": 787 }, { "epoch": 1.5, "grad_norm": 0.6616078026175262, "learning_rate": 0.000164242066031037, "loss": 0.109, "step": 788 }, { "epoch": 1.5, "grad_norm": 0.6091343608177869, "learning_rate": 0.00016414763997967793, "loss": 0.1262, "step": 789 }, { "epoch": 1.5, "grad_norm": 0.5767964122920965, "learning_rate": 0.00016405311664027838, "loss": 0.1077, "step": 790 }, { "epoch": 1.5, "grad_norm": 0.6750538076486876, "learning_rate": 0.00016395849615619495, "loss": 0.1606, "step": 791 }, { "epoch": 1.51, "grad_norm": 0.6840425704324167, "learning_rate": 0.00016386377867093157, "loss": 0.1289, "step": 792 }, { "epoch": 1.51, "grad_norm": 0.6915042967058971, "learning_rate": 0.00016376896432813943, "loss": 0.1468, "step": 793 }, { "epoch": 1.51, "grad_norm": 0.6764079007115708, "learning_rate": 0.0001636740532716164, "loss": 0.1525, "step": 794 }, { "epoch": 1.51, "grad_norm": 0.780478410526655, "learning_rate": 0.00016357904564530726, "loss": 0.1672, "step": 795 }, { "epoch": 1.51, "grad_norm": 0.7776244355601664, "learning_rate": 0.00016348394159330308, "loss": 0.2223, "step": 796 }, { "epoch": 1.52, "grad_norm": 0.7284002385340363, "learning_rate": 0.0001633887412598412, "loss": 0.1593, "step": 797 }, { "epoch": 1.52, "grad_norm": 0.6249673040787976, "learning_rate": 0.0001632934447893051, "loss": 0.1336, "step": 798 }, { "epoch": 1.52, "grad_norm": 0.742257845277557, "learning_rate": 0.0001631980523262239, "loss": 0.193, "step": 799 }, { "epoch": 1.52, "grad_norm": 0.5979756999178074, "learning_rate": 0.00016310256401527243, "loss": 0.1115, "step": 800 }, { "epoch": 1.52, "eval_blimp_filtered_avg": 0.7192537313432836, "eval_blimp_filtered_std": 0.004986035073690131, "step": 800 }, { "epoch": 1.52, "eval_blimp_supplement_avg": 0.8232758620689655, "eval_blimp_supplement_std": 0.016697275813971665, "step": 800 }, { "epoch": 1.52, "eval_vqa_filtered_avg": 0.53, "eval_vqa_filtered_std": 0.0501613558046592, "step": 800 }, { "epoch": 1.52, "eval_winoground_filtered_avg": 0.65, "eval_winoground_filtered_std": 0.047937248544110196, "step": 800 }, { "epoch": 1.52, "grad_norm": 0.7231612954290718, "learning_rate": 0.0001630069800012708, "loss": 0.1841, "step": 801 }, { "epoch": 1.52, "grad_norm": 0.666669810585418, "learning_rate": 0.0001629113004291843, "loss": 0.1624, "step": 802 }, { "epoch": 1.53, "grad_norm": 0.6074220544123056, "learning_rate": 0.0001628155254441232, "loss": 0.1159, "step": 803 }, { "epoch": 1.53, "grad_norm": 0.6257900838581575, "learning_rate": 0.0001627196551913424, "loss": 0.1369, "step": 804 }, { "epoch": 1.53, "grad_norm": 0.5998151789231743, "learning_rate": 0.0001626236898162413, "loss": 0.1288, "step": 805 }, { "epoch": 1.53, "grad_norm": 0.7010718419697726, "learning_rate": 0.00016252762946436357, "loss": 0.1459, "step": 806 }, { "epoch": 1.53, "grad_norm": 0.7700969032578737, "learning_rate": 0.00016243147428139694, "loss": 0.156, "step": 807 }, { "epoch": 1.54, "grad_norm": 0.7599824231308213, "learning_rate": 0.00016233522441317296, "loss": 0.1763, "step": 808 }, { "epoch": 1.54, "grad_norm": 0.5872130840919129, "learning_rate": 0.00016223888000566677, "loss": 0.1151, "step": 809 }, { "epoch": 1.54, "grad_norm": 0.7374310053035389, "learning_rate": 0.0001621424412049969, "loss": 0.1407, "step": 810 }, { "epoch": 1.54, "grad_norm": 0.7711844157191171, "learning_rate": 0.00016204590815742503, "loss": 0.1884, "step": 811 }, { "epoch": 1.54, "grad_norm": 0.8367967566458452, "learning_rate": 0.00016194928100935575, "loss": 0.1689, "step": 812 }, { "epoch": 1.55, "grad_norm": 0.6488243188186983, "learning_rate": 0.00016185255990733649, "loss": 0.1403, "step": 813 }, { "epoch": 1.55, "grad_norm": 0.6353420427423099, "learning_rate": 0.00016175574499805698, "loss": 0.1046, "step": 814 }, { "epoch": 1.55, "grad_norm": 0.6529876334786414, "learning_rate": 0.00016165883642834937, "loss": 0.1304, "step": 815 }, { "epoch": 1.55, "grad_norm": 0.5968720472838218, "learning_rate": 0.0001615618343451878, "loss": 0.1202, "step": 816 }, { "epoch": 1.55, "grad_norm": 0.6515236870995509, "learning_rate": 0.00016146473889568828, "loss": 0.1175, "step": 817 }, { "epoch": 1.56, "grad_norm": 0.9694270930903521, "learning_rate": 0.00016136755022710836, "loss": 0.1303, "step": 818 }, { "epoch": 1.56, "grad_norm": 0.6679062692296702, "learning_rate": 0.00016127026848684696, "loss": 0.2053, "step": 819 }, { "epoch": 1.56, "grad_norm": 0.7221830649774356, "learning_rate": 0.00016117289382244424, "loss": 0.1702, "step": 820 }, { "epoch": 1.56, "grad_norm": 0.6034235241283965, "learning_rate": 0.00016107542638158122, "loss": 0.1234, "step": 821 }, { "epoch": 1.56, "grad_norm": 0.5915870528991883, "learning_rate": 0.00016097786631207966, "loss": 0.1325, "step": 822 }, { "epoch": 1.56, "grad_norm": 0.7738823263412343, "learning_rate": 0.00016088021376190175, "loss": 0.1869, "step": 823 }, { "epoch": 1.57, "grad_norm": 0.8122271876628837, "learning_rate": 0.00016078246887915007, "loss": 0.1697, "step": 824 }, { "epoch": 1.57, "grad_norm": 0.5155311408491958, "learning_rate": 0.00016068463181206707, "loss": 0.1184, "step": 825 }, { "epoch": 1.57, "grad_norm": 0.7037673787078604, "learning_rate": 0.00016058670270903514, "loss": 0.1619, "step": 826 }, { "epoch": 1.57, "grad_norm": 0.6629483154004843, "learning_rate": 0.00016048868171857612, "loss": 0.1198, "step": 827 }, { "epoch": 1.57, "grad_norm": 0.7502758563751385, "learning_rate": 0.00016039056898935132, "loss": 0.1502, "step": 828 }, { "epoch": 1.58, "grad_norm": 0.7712552164889611, "learning_rate": 0.0001602923646701612, "loss": 0.1603, "step": 829 }, { "epoch": 1.58, "grad_norm": 0.7388052168667906, "learning_rate": 0.00016019406890994503, "loss": 0.132, "step": 830 }, { "epoch": 1.58, "grad_norm": 0.5858900662091469, "learning_rate": 0.00016009568185778084, "loss": 0.1134, "step": 831 }, { "epoch": 1.58, "grad_norm": 0.7460402284301239, "learning_rate": 0.00015999720366288503, "loss": 0.1821, "step": 832 }, { "epoch": 1.58, "grad_norm": 0.6092649357339386, "learning_rate": 0.00015989863447461234, "loss": 0.135, "step": 833 }, { "epoch": 1.59, "grad_norm": 0.5922013221167839, "learning_rate": 0.00015979997444245543, "loss": 0.1364, "step": 834 }, { "epoch": 1.59, "grad_norm": 0.5818810492015078, "learning_rate": 0.00015970122371604476, "loss": 0.134, "step": 835 }, { "epoch": 1.59, "grad_norm": 0.6092218058999829, "learning_rate": 0.00015960238244514842, "loss": 0.1285, "step": 836 }, { "epoch": 1.59, "grad_norm": 0.6444969494489426, "learning_rate": 0.00015950345077967167, "loss": 0.1274, "step": 837 }, { "epoch": 1.59, "grad_norm": 0.7632847395032976, "learning_rate": 0.00015940442886965694, "loss": 0.1593, "step": 838 }, { "epoch": 1.6, "grad_norm": 0.8366421268691201, "learning_rate": 0.00015930531686528357, "loss": 0.2012, "step": 839 }, { "epoch": 1.6, "grad_norm": 0.7406785434565065, "learning_rate": 0.00015920611491686745, "loss": 0.1658, "step": 840 }, { "epoch": 1.6, "grad_norm": 0.6992498652778883, "learning_rate": 0.00015910682317486096, "loss": 0.184, "step": 841 }, { "epoch": 1.6, "grad_norm": 0.550264344595364, "learning_rate": 0.00015900744178985266, "loss": 0.1475, "step": 842 }, { "epoch": 1.6, "grad_norm": 0.5948124177865503, "learning_rate": 0.00015890797091256697, "loss": 0.1412, "step": 843 }, { "epoch": 1.6, "grad_norm": 0.5665554175583916, "learning_rate": 0.00015880841069386417, "loss": 0.1468, "step": 844 }, { "epoch": 1.61, "grad_norm": 0.4729292446367662, "learning_rate": 0.00015870876128473994, "loss": 0.0996, "step": 845 }, { "epoch": 1.61, "grad_norm": 0.5733133837571472, "learning_rate": 0.0001586090228363253, "loss": 0.1136, "step": 846 }, { "epoch": 1.61, "grad_norm": 0.6908827286924109, "learning_rate": 0.0001585091954998862, "loss": 0.1612, "step": 847 }, { "epoch": 1.61, "grad_norm": 0.6372232388496126, "learning_rate": 0.0001584092794268235, "loss": 0.136, "step": 848 }, { "epoch": 1.61, "grad_norm": 0.5788672830637405, "learning_rate": 0.00015830927476867266, "loss": 0.1052, "step": 849 }, { "epoch": 1.62, "grad_norm": 0.6298037606981483, "learning_rate": 0.00015820918167710338, "loss": 0.1205, "step": 850 }, { "epoch": 1.62, "grad_norm": 0.6897460029072285, "learning_rate": 0.00015810900030391954, "loss": 0.1503, "step": 851 }, { "epoch": 1.62, "grad_norm": 0.5682563653846615, "learning_rate": 0.00015800873080105895, "loss": 0.0977, "step": 852 }, { "epoch": 1.62, "grad_norm": 0.5386184045228795, "learning_rate": 0.000157908373320593, "loss": 0.1051, "step": 853 }, { "epoch": 1.62, "grad_norm": 0.5745746303331557, "learning_rate": 0.0001578079280147266, "loss": 0.1054, "step": 854 }, { "epoch": 1.63, "grad_norm": 0.7613931451376398, "learning_rate": 0.0001577073950357978, "loss": 0.1355, "step": 855 }, { "epoch": 1.63, "grad_norm": 0.7402732434418905, "learning_rate": 0.00015760677453627756, "loss": 0.1634, "step": 856 }, { "epoch": 1.63, "grad_norm": 0.5743678454359308, "learning_rate": 0.00015750606666876978, "loss": 0.124, "step": 857 }, { "epoch": 1.63, "grad_norm": 0.7875563806171093, "learning_rate": 0.00015740527158601055, "loss": 0.1708, "step": 858 }, { "epoch": 1.63, "grad_norm": 0.5383835073241827, "learning_rate": 0.00015730438944086856, "loss": 0.1174, "step": 859 }, { "epoch": 1.63, "grad_norm": 0.6036500881515476, "learning_rate": 0.0001572034203863443, "loss": 0.1281, "step": 860 }, { "epoch": 1.64, "grad_norm": 0.5875326549467926, "learning_rate": 0.00015710236457557023, "loss": 0.1299, "step": 861 }, { "epoch": 1.64, "grad_norm": 0.5093038385842131, "learning_rate": 0.00015700122216181028, "loss": 0.1203, "step": 862 }, { "epoch": 1.64, "grad_norm": 0.6343967428594269, "learning_rate": 0.00015689999329845974, "loss": 0.1585, "step": 863 }, { "epoch": 1.64, "grad_norm": 0.7362749852012354, "learning_rate": 0.00015679867813904504, "loss": 0.1906, "step": 864 }, { "epoch": 1.64, "grad_norm": 0.5434403825293117, "learning_rate": 0.00015669727683722355, "loss": 0.1082, "step": 865 }, { "epoch": 1.65, "grad_norm": 0.6057236219012212, "learning_rate": 0.00015659578954678314, "loss": 0.1326, "step": 866 }, { "epoch": 1.65, "grad_norm": 0.6544209598061177, "learning_rate": 0.00015649421642164221, "loss": 0.139, "step": 867 }, { "epoch": 1.65, "grad_norm": 0.6782392402325479, "learning_rate": 0.0001563925576158493, "loss": 0.1299, "step": 868 }, { "epoch": 1.65, "grad_norm": 0.567055306970883, "learning_rate": 0.00015629081328358285, "loss": 0.1024, "step": 869 }, { "epoch": 1.65, "grad_norm": 0.508674396919858, "learning_rate": 0.00015618898357915115, "loss": 0.0959, "step": 870 }, { "epoch": 1.66, "grad_norm": 0.6212696514822578, "learning_rate": 0.00015608706865699178, "loss": 0.1253, "step": 871 }, { "epoch": 1.66, "grad_norm": 0.6550149129472909, "learning_rate": 0.0001559850686716717, "loss": 0.124, "step": 872 }, { "epoch": 1.66, "grad_norm": 0.7006254273978296, "learning_rate": 0.00015588298377788688, "loss": 0.1427, "step": 873 }, { "epoch": 1.66, "grad_norm": 0.6858895747820053, "learning_rate": 0.00015578081413046191, "loss": 0.1363, "step": 874 }, { "epoch": 1.66, "grad_norm": 0.644291624773719, "learning_rate": 0.0001556785598843502, "loss": 0.1679, "step": 875 }, { "epoch": 1.67, "grad_norm": 0.6705477555574304, "learning_rate": 0.00015557622119463313, "loss": 0.1356, "step": 876 }, { "epoch": 1.67, "grad_norm": 0.6505933095134234, "learning_rate": 0.00015547379821652038, "loss": 0.1404, "step": 877 }, { "epoch": 1.67, "grad_norm": 0.715762851224345, "learning_rate": 0.00015537129110534945, "loss": 0.1851, "step": 878 }, { "epoch": 1.67, "grad_norm": 0.5721932834777848, "learning_rate": 0.00015526870001658532, "loss": 0.1097, "step": 879 }, { "epoch": 1.67, "grad_norm": 0.6520586824741472, "learning_rate": 0.00015516602510582043, "loss": 0.1457, "step": 880 }, { "epoch": 1.67, "grad_norm": 0.7089733294877258, "learning_rate": 0.00015506326652877433, "loss": 0.1544, "step": 881 }, { "epoch": 1.68, "grad_norm": 0.46307181224383903, "learning_rate": 0.00015496042444129338, "loss": 0.0922, "step": 882 }, { "epoch": 1.68, "grad_norm": 0.5810374952365608, "learning_rate": 0.0001548574989993508, "loss": 0.1371, "step": 883 }, { "epoch": 1.68, "grad_norm": 0.4682525823079267, "learning_rate": 0.00015475449035904596, "loss": 0.1088, "step": 884 }, { "epoch": 1.68, "grad_norm": 0.5551239030241639, "learning_rate": 0.0001546513986766046, "loss": 0.097, "step": 885 }, { "epoch": 1.68, "grad_norm": 0.6704034691510374, "learning_rate": 0.00015454822410837832, "loss": 0.1286, "step": 886 }, { "epoch": 1.69, "grad_norm": 0.646267201472606, "learning_rate": 0.0001544449668108445, "loss": 0.15, "step": 887 }, { "epoch": 1.69, "grad_norm": 0.6617113377048783, "learning_rate": 0.0001543416269406059, "loss": 0.1489, "step": 888 }, { "epoch": 1.69, "grad_norm": 0.6664383282887847, "learning_rate": 0.0001542382046543905, "loss": 0.135, "step": 889 }, { "epoch": 1.69, "grad_norm": 0.7213237545895587, "learning_rate": 0.00015413470010905146, "loss": 0.1672, "step": 890 }, { "epoch": 1.69, "grad_norm": 0.5504441046316134, "learning_rate": 0.00015403111346156648, "loss": 0.13, "step": 891 }, { "epoch": 1.7, "grad_norm": 0.749591527908574, "learning_rate": 0.00015392744486903787, "loss": 0.1404, "step": 892 }, { "epoch": 1.7, "grad_norm": 0.6166687583012981, "learning_rate": 0.00015382369448869226, "loss": 0.1415, "step": 893 }, { "epoch": 1.7, "grad_norm": 0.7376529338524976, "learning_rate": 0.00015371986247788018, "loss": 0.1836, "step": 894 }, { "epoch": 1.7, "grad_norm": 0.5026544343159087, "learning_rate": 0.00015361594899407615, "loss": 0.1123, "step": 895 }, { "epoch": 1.7, "grad_norm": 0.5147237033159537, "learning_rate": 0.0001535119541948781, "loss": 0.111, "step": 896 }, { "epoch": 1.71, "grad_norm": 0.5528511220337659, "learning_rate": 0.00015340787823800737, "loss": 0.0943, "step": 897 }, { "epoch": 1.71, "grad_norm": 0.6780745616918618, "learning_rate": 0.0001533037212813084, "loss": 0.1508, "step": 898 }, { "epoch": 1.71, "grad_norm": 0.6427802488919993, "learning_rate": 0.00015319948348274835, "loss": 0.1325, "step": 899 }, { "epoch": 1.71, "grad_norm": 0.5336409340940563, "learning_rate": 0.00015309516500041718, "loss": 0.1093, "step": 900 }, { "epoch": 1.71, "eval_blimp_filtered_avg": 0.7191044776119403, "eval_blimp_filtered_std": 0.004946275471452051, "step": 900 }, { "epoch": 1.71, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.017495116139186168, "step": 900 }, { "epoch": 1.71, "eval_vqa_filtered_avg": 0.54, "eval_vqa_filtered_std": 0.05009082659620333, "step": 900 }, { "epoch": 1.71, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.04852365870939099, "step": 900 }, { "epoch": 1.71, "grad_norm": 0.6396036187663479, "learning_rate": 0.00015299076599252701, "loss": 0.1155, "step": 901 }, { "epoch": 1.71, "grad_norm": 0.660905554412118, "learning_rate": 0.00015288628661741229, "loss": 0.1222, "step": 902 }, { "epoch": 1.72, "grad_norm": 0.4977498679569131, "learning_rate": 0.00015278172703352916, "loss": 0.0999, "step": 903 }, { "epoch": 1.72, "grad_norm": 0.6686044894597927, "learning_rate": 0.00015267708739945558, "loss": 0.1351, "step": 904 }, { "epoch": 1.72, "grad_norm": 0.6483088081638239, "learning_rate": 0.00015257236787389084, "loss": 0.1417, "step": 905 }, { "epoch": 1.72, "grad_norm": 0.6356993457079873, "learning_rate": 0.00015246756861565536, "loss": 0.1131, "step": 906 }, { "epoch": 1.72, "grad_norm": 0.6862335280859668, "learning_rate": 0.00015236268978369052, "loss": 0.1559, "step": 907 }, { "epoch": 1.73, "grad_norm": 0.671322234780189, "learning_rate": 0.0001522577315370584, "loss": 0.1456, "step": 908 }, { "epoch": 1.73, "grad_norm": 0.6005992171608379, "learning_rate": 0.0001521526940349415, "loss": 0.0972, "step": 909 }, { "epoch": 1.73, "grad_norm": 0.6685806706922095, "learning_rate": 0.00015204757743664252, "loss": 0.1094, "step": 910 }, { "epoch": 1.73, "grad_norm": 0.7520666699760424, "learning_rate": 0.00015194238190158416, "loss": 0.148, "step": 911 }, { "epoch": 1.73, "grad_norm": 0.656015963325358, "learning_rate": 0.0001518371075893088, "loss": 0.1358, "step": 912 }, { "epoch": 1.74, "grad_norm": 0.6387578049044235, "learning_rate": 0.00015173175465947827, "loss": 0.1438, "step": 913 }, { "epoch": 1.74, "grad_norm": 0.7069517520786764, "learning_rate": 0.00015162632327187368, "loss": 0.1421, "step": 914 }, { "epoch": 1.74, "grad_norm": 0.4875760911225769, "learning_rate": 0.0001515208135863951, "loss": 0.0846, "step": 915 }, { "epoch": 1.74, "grad_norm": 0.6013314974226278, "learning_rate": 0.00015141522576306136, "loss": 0.1109, "step": 916 }, { "epoch": 1.74, "grad_norm": 0.6323288865005231, "learning_rate": 0.0001513095599620099, "loss": 0.145, "step": 917 }, { "epoch": 1.75, "grad_norm": 0.7091725826977056, "learning_rate": 0.00015120381634349617, "loss": 0.1803, "step": 918 }, { "epoch": 1.75, "grad_norm": 0.5523107065310319, "learning_rate": 0.0001510979950678939, "loss": 0.1105, "step": 919 }, { "epoch": 1.75, "grad_norm": 0.5815527633790858, "learning_rate": 0.00015099209629569442, "loss": 0.1297, "step": 920 }, { "epoch": 1.75, "grad_norm": 0.45403393194901637, "learning_rate": 0.0001508861201875067, "loss": 0.086, "step": 921 }, { "epoch": 1.75, "grad_norm": 0.5832244881512828, "learning_rate": 0.000150780066904057, "loss": 0.088, "step": 922 }, { "epoch": 1.75, "grad_norm": 0.648259004615104, "learning_rate": 0.00015067393660618853, "loss": 0.1276, "step": 923 }, { "epoch": 1.76, "grad_norm": 0.6220056196226617, "learning_rate": 0.0001505677294548614, "loss": 0.1242, "step": 924 }, { "epoch": 1.76, "grad_norm": 0.6711639619741492, "learning_rate": 0.0001504614456111522, "loss": 0.1577, "step": 925 }, { "epoch": 1.76, "grad_norm": 0.5719298525951193, "learning_rate": 0.0001503550852362539, "loss": 0.1214, "step": 926 }, { "epoch": 1.76, "grad_norm": 0.6959951437661268, "learning_rate": 0.00015024864849147554, "loss": 0.1312, "step": 927 }, { "epoch": 1.76, "grad_norm": 0.6827170543568457, "learning_rate": 0.00015014213553824187, "loss": 0.1472, "step": 928 }, { "epoch": 1.77, "grad_norm": 0.48803569347721965, "learning_rate": 0.00015003554653809342, "loss": 0.0802, "step": 929 }, { "epoch": 1.77, "grad_norm": 0.718118598569043, "learning_rate": 0.00014992888165268583, "loss": 0.1688, "step": 930 }, { "epoch": 1.77, "grad_norm": 0.5631893455982298, "learning_rate": 0.00014982214104379, "loss": 0.1193, "step": 931 }, { "epoch": 1.77, "grad_norm": 0.6880291222414352, "learning_rate": 0.00014971532487329165, "loss": 0.1208, "step": 932 }, { "epoch": 1.77, "grad_norm": 0.5342703581746533, "learning_rate": 0.000149608433303191, "loss": 0.0998, "step": 933 }, { "epoch": 1.78, "grad_norm": 0.5420593599389201, "learning_rate": 0.00014950146649560274, "loss": 0.0973, "step": 934 }, { "epoch": 1.78, "grad_norm": 0.534229660207934, "learning_rate": 0.00014939442461275556, "loss": 0.0994, "step": 935 }, { "epoch": 1.78, "grad_norm": 0.4396039028879332, "learning_rate": 0.00014928730781699212, "loss": 0.0996, "step": 936 }, { "epoch": 1.78, "grad_norm": 0.5633956616231355, "learning_rate": 0.0001491801162707686, "loss": 0.1352, "step": 937 }, { "epoch": 1.78, "grad_norm": 0.6252006812339965, "learning_rate": 0.0001490728501366546, "loss": 0.1466, "step": 938 }, { "epoch": 1.79, "grad_norm": 0.513885982736411, "learning_rate": 0.00014896550957733284, "loss": 0.1074, "step": 939 }, { "epoch": 1.79, "grad_norm": 0.7735696089993059, "learning_rate": 0.0001488580947555989, "loss": 0.1792, "step": 940 }, { "epoch": 1.79, "grad_norm": 0.5818581757360678, "learning_rate": 0.00014875060583436101, "loss": 0.1212, "step": 941 }, { "epoch": 1.79, "grad_norm": 0.6330168247149923, "learning_rate": 0.00014864304297663975, "loss": 0.1057, "step": 942 }, { "epoch": 1.79, "grad_norm": 0.5931554332164831, "learning_rate": 0.00014853540634556783, "loss": 0.1143, "step": 943 }, { "epoch": 1.79, "grad_norm": 0.6189344626783686, "learning_rate": 0.00014842769610438992, "loss": 0.1393, "step": 944 }, { "epoch": 1.8, "grad_norm": 0.6437219511072853, "learning_rate": 0.00014831991241646226, "loss": 0.0946, "step": 945 }, { "epoch": 1.8, "grad_norm": 1.0061995783528255, "learning_rate": 0.00014821205544525244, "loss": 0.1079, "step": 946 }, { "epoch": 1.8, "grad_norm": 0.7164030555689023, "learning_rate": 0.00014810412535433935, "loss": 0.1556, "step": 947 }, { "epoch": 1.8, "grad_norm": 0.49757379357095527, "learning_rate": 0.00014799612230741258, "loss": 0.1135, "step": 948 }, { "epoch": 1.8, "grad_norm": 0.6783480330724511, "learning_rate": 0.0001478880464682725, "loss": 0.1494, "step": 949 }, { "epoch": 1.81, "grad_norm": 0.8257204558786394, "learning_rate": 0.0001477798980008299, "loss": 0.1049, "step": 950 }, { "epoch": 1.81, "grad_norm": 0.6973226133510904, "learning_rate": 0.00014767167706910555, "loss": 0.1842, "step": 951 }, { "epoch": 1.81, "grad_norm": 0.6737983387235739, "learning_rate": 0.00014756338383723033, "loss": 0.1306, "step": 952 }, { "epoch": 1.81, "grad_norm": 0.860945360924049, "learning_rate": 0.00014745501846944462, "loss": 0.1553, "step": 953 }, { "epoch": 1.81, "grad_norm": 0.5577858633973706, "learning_rate": 0.0001473465811300983, "loss": 0.1115, "step": 954 }, { "epoch": 1.82, "grad_norm": 0.6600242248355296, "learning_rate": 0.00014723807198365033, "loss": 0.1419, "step": 955 }, { "epoch": 1.82, "grad_norm": 0.6715876151439384, "learning_rate": 0.0001471294911946686, "loss": 0.1529, "step": 956 }, { "epoch": 1.82, "grad_norm": 0.5126785067996659, "learning_rate": 0.0001470208389278297, "loss": 0.0992, "step": 957 }, { "epoch": 1.82, "grad_norm": 0.7076391608212873, "learning_rate": 0.00014691211534791857, "loss": 0.1416, "step": 958 }, { "epoch": 1.82, "grad_norm": 0.5395459459292579, "learning_rate": 0.0001468033206198283, "loss": 0.1008, "step": 959 }, { "epoch": 1.83, "grad_norm": 0.7305476347654316, "learning_rate": 0.00014669445490855996, "loss": 0.1507, "step": 960 }, { "epoch": 1.83, "grad_norm": 0.4977148553366427, "learning_rate": 0.00014658551837922214, "loss": 0.1075, "step": 961 }, { "epoch": 1.83, "grad_norm": 0.6837837309013377, "learning_rate": 0.00014647651119703098, "loss": 0.1324, "step": 962 }, { "epoch": 1.83, "grad_norm": 0.6765315261494734, "learning_rate": 0.00014636743352730976, "loss": 0.1399, "step": 963 }, { "epoch": 1.83, "grad_norm": 0.5563055866407175, "learning_rate": 0.00014625828553548853, "loss": 0.0935, "step": 964 }, { "epoch": 1.83, "grad_norm": 0.561808468728103, "learning_rate": 0.00014614906738710408, "loss": 0.0868, "step": 965 }, { "epoch": 1.84, "grad_norm": 0.5073522972551656, "learning_rate": 0.00014603977924779963, "loss": 0.1089, "step": 966 }, { "epoch": 1.84, "grad_norm": 0.5548656841918778, "learning_rate": 0.00014593042128332453, "loss": 0.1139, "step": 967 }, { "epoch": 1.84, "grad_norm": 0.49130440653575425, "learning_rate": 0.00014582099365953398, "loss": 0.1049, "step": 968 }, { "epoch": 1.84, "grad_norm": 0.6856207722512034, "learning_rate": 0.0001457114965423889, "loss": 0.1216, "step": 969 }, { "epoch": 1.84, "grad_norm": 0.5203542906127011, "learning_rate": 0.00014560193009795555, "loss": 0.1079, "step": 970 }, { "epoch": 1.85, "grad_norm": 0.6205223825187324, "learning_rate": 0.0001454922944924054, "loss": 0.1127, "step": 971 }, { "epoch": 1.85, "grad_norm": 0.7146283427566428, "learning_rate": 0.00014538258989201466, "loss": 0.1311, "step": 972 }, { "epoch": 1.85, "grad_norm": 0.6629500370974946, "learning_rate": 0.00014527281646316438, "loss": 0.1447, "step": 973 }, { "epoch": 1.85, "grad_norm": 0.5157945675965905, "learning_rate": 0.00014516297437233987, "loss": 0.0917, "step": 974 }, { "epoch": 1.85, "grad_norm": 0.4897831443896959, "learning_rate": 0.00014505306378613062, "loss": 0.093, "step": 975 }, { "epoch": 1.86, "grad_norm": 0.6429474305738813, "learning_rate": 0.00014494308487123, "loss": 0.1469, "step": 976 }, { "epoch": 1.86, "grad_norm": 0.51172401671792, "learning_rate": 0.0001448330377944349, "loss": 0.0877, "step": 977 }, { "epoch": 1.86, "grad_norm": 0.5310993662468206, "learning_rate": 0.00014472292272264584, "loss": 0.0966, "step": 978 }, { "epoch": 1.86, "grad_norm": 0.5144043581733186, "learning_rate": 0.00014461273982286618, "loss": 0.1221, "step": 979 }, { "epoch": 1.86, "grad_norm": 0.6144994193768769, "learning_rate": 0.00014450248926220236, "loss": 0.1137, "step": 980 }, { "epoch": 1.87, "grad_norm": 0.694559009582273, "learning_rate": 0.0001443921712078633, "loss": 0.152, "step": 981 }, { "epoch": 1.87, "grad_norm": 0.6237296450680663, "learning_rate": 0.00014428178582716035, "loss": 0.1337, "step": 982 }, { "epoch": 1.87, "grad_norm": 0.614692100406052, "learning_rate": 0.00014417133328750693, "loss": 0.1264, "step": 983 }, { "epoch": 1.87, "grad_norm": 0.5961583320475107, "learning_rate": 0.0001440608137564184, "loss": 0.1266, "step": 984 }, { "epoch": 1.87, "grad_norm": 0.6281667070527972, "learning_rate": 0.00014395022740151163, "loss": 0.1303, "step": 985 }, { "epoch": 1.87, "grad_norm": 0.5183173975658547, "learning_rate": 0.00014383957439050485, "loss": 0.112, "step": 986 }, { "epoch": 1.88, "grad_norm": 0.5170857242125807, "learning_rate": 0.00014372885489121744, "loss": 0.0987, "step": 987 }, { "epoch": 1.88, "grad_norm": 0.7509525711096852, "learning_rate": 0.00014361806907156957, "loss": 0.1612, "step": 988 }, { "epoch": 1.88, "grad_norm": 0.6749940939261515, "learning_rate": 0.00014350721709958196, "loss": 0.1242, "step": 989 }, { "epoch": 1.88, "grad_norm": 0.6980235830253116, "learning_rate": 0.00014339629914337571, "loss": 0.1808, "step": 990 }, { "epoch": 1.88, "grad_norm": 0.5277119377727666, "learning_rate": 0.00014328531537117204, "loss": 0.1066, "step": 991 }, { "epoch": 1.89, "grad_norm": 0.4811968246377426, "learning_rate": 0.0001431742659512919, "loss": 0.0952, "step": 992 }, { "epoch": 1.89, "grad_norm": 0.6246455448800744, "learning_rate": 0.00014306315105215578, "loss": 0.1364, "step": 993 }, { "epoch": 1.89, "grad_norm": 0.6695568756551928, "learning_rate": 0.0001429519708422836, "loss": 0.1691, "step": 994 }, { "epoch": 1.89, "grad_norm": 0.5135811234770673, "learning_rate": 0.00014284072549029423, "loss": 0.1008, "step": 995 }, { "epoch": 1.89, "grad_norm": 0.6134236652457754, "learning_rate": 0.0001427294151649054, "loss": 0.124, "step": 996 }, { "epoch": 1.9, "grad_norm": 0.6385133871792419, "learning_rate": 0.00014261804003493333, "loss": 0.1434, "step": 997 }, { "epoch": 1.9, "grad_norm": 0.534916310085437, "learning_rate": 0.00014250660026929256, "loss": 0.1066, "step": 998 }, { "epoch": 1.9, "grad_norm": 0.578246251487676, "learning_rate": 0.00014239509603699562, "loss": 0.1115, "step": 999 }, { "epoch": 1.9, "grad_norm": 0.8131274078358276, "learning_rate": 0.00014228352750715286, "loss": 0.1552, "step": 1000 }, { "epoch": 1.9, "eval_blimp_filtered_avg": 0.72, "eval_blimp_filtered_std": 0.004964982879314705, "step": 1000 }, { "epoch": 1.9, "eval_blimp_supplement_avg": 0.8189655172413793, "eval_blimp_supplement_std": 0.01685958607740786, "step": 1000 }, { "epoch": 1.9, "eval_vqa_filtered_avg": 0.52, "eval_vqa_filtered_std": 0.05021167315686779, "step": 1000 }, { "epoch": 1.9, "eval_winoground_filtered_avg": 0.65, "eval_winoground_filtered_std": 0.0479372485441102, "step": 1000 }, { "epoch": 1.9, "grad_norm": 0.6049460549218416, "learning_rate": 0.0001421718948489721, "loss": 0.0992, "step": 1001 }, { "epoch": 1.9, "grad_norm": 0.6129692103395931, "learning_rate": 0.00014206019823175843, "loss": 0.1555, "step": 1002 }, { "epoch": 1.91, "grad_norm": 0.6433526969624807, "learning_rate": 0.00014194843782491402, "loss": 0.1327, "step": 1003 }, { "epoch": 1.91, "grad_norm": 0.7292876630311822, "learning_rate": 0.00014183661379793764, "loss": 0.1664, "step": 1004 }, { "epoch": 1.91, "grad_norm": 0.4792455885361662, "learning_rate": 0.00014172472632042465, "loss": 0.0828, "step": 1005 }, { "epoch": 1.91, "grad_norm": 0.662385231200864, "learning_rate": 0.0001416127755620666, "loss": 0.1236, "step": 1006 }, { "epoch": 1.91, "grad_norm": 0.5287479945668606, "learning_rate": 0.00014150076169265106, "loss": 0.0869, "step": 1007 }, { "epoch": 1.92, "grad_norm": 0.6124106378254337, "learning_rate": 0.00014138868488206127, "loss": 0.1106, "step": 1008 }, { "epoch": 1.92, "grad_norm": 0.7283110931108968, "learning_rate": 0.00014127654530027596, "loss": 0.1275, "step": 1009 }, { "epoch": 1.92, "grad_norm": 0.5240335479136654, "learning_rate": 0.00014116434311736904, "loss": 0.0828, "step": 1010 }, { "epoch": 1.92, "grad_norm": 0.6969772664319349, "learning_rate": 0.00014105207850350932, "loss": 0.1097, "step": 1011 }, { "epoch": 1.92, "grad_norm": 0.6537627316186466, "learning_rate": 0.00014093975162896038, "loss": 0.1027, "step": 1012 }, { "epoch": 1.93, "grad_norm": 0.6094186959080358, "learning_rate": 0.0001408273626640802, "loss": 0.116, "step": 1013 }, { "epoch": 1.93, "grad_norm": 0.5946465166675295, "learning_rate": 0.0001407149117793209, "loss": 0.1144, "step": 1014 }, { "epoch": 1.93, "grad_norm": 0.6662402660776539, "learning_rate": 0.0001406023991452285, "loss": 0.1445, "step": 1015 }, { "epoch": 1.93, "grad_norm": 0.5310035453201003, "learning_rate": 0.00014048982493244268, "loss": 0.1022, "step": 1016 }, { "epoch": 1.93, "grad_norm": 0.5232706132812045, "learning_rate": 0.00014037718931169662, "loss": 0.1023, "step": 1017 }, { "epoch": 1.94, "grad_norm": 0.6821627291676288, "learning_rate": 0.00014026449245381647, "loss": 0.142, "step": 1018 }, { "epoch": 1.94, "grad_norm": 0.6764507689739919, "learning_rate": 0.0001401517345297213, "loss": 0.1573, "step": 1019 }, { "epoch": 1.94, "grad_norm": 0.4332529015036505, "learning_rate": 0.0001400389157104229, "loss": 0.0786, "step": 1020 }, { "epoch": 1.94, "grad_norm": 0.5591954002968514, "learning_rate": 0.00013992603616702525, "loss": 0.0946, "step": 1021 }, { "epoch": 1.94, "grad_norm": 0.5539273165435759, "learning_rate": 0.0001398130960707246, "loss": 0.1175, "step": 1022 }, { "epoch": 1.94, "grad_norm": 0.49838082891119917, "learning_rate": 0.00013970009559280882, "loss": 0.0933, "step": 1023 }, { "epoch": 1.95, "grad_norm": 0.4787638095887521, "learning_rate": 0.00013958703490465758, "loss": 0.0822, "step": 1024 }, { "epoch": 1.95, "grad_norm": 0.6248592585507684, "learning_rate": 0.00013947391417774176, "loss": 0.1103, "step": 1025 }, { "epoch": 1.95, "grad_norm": 0.5415849157520246, "learning_rate": 0.00013936073358362328, "loss": 0.1004, "step": 1026 }, { "epoch": 1.95, "grad_norm": 0.47209859802428455, "learning_rate": 0.00013924749329395487, "loss": 0.0891, "step": 1027 }, { "epoch": 1.95, "grad_norm": 0.618261233735571, "learning_rate": 0.00013913419348047983, "loss": 0.1191, "step": 1028 }, { "epoch": 1.96, "grad_norm": 0.4500346192526262, "learning_rate": 0.0001390208343150317, "loss": 0.0877, "step": 1029 }, { "epoch": 1.96, "grad_norm": 0.5158952885265543, "learning_rate": 0.00013890741596953406, "loss": 0.0844, "step": 1030 }, { "epoch": 1.96, "grad_norm": 0.5598011284217639, "learning_rate": 0.00013879393861600023, "loss": 0.0702, "step": 1031 }, { "epoch": 1.96, "grad_norm": 0.6337815408029293, "learning_rate": 0.000138680402426533, "loss": 0.1498, "step": 1032 }, { "epoch": 1.96, "grad_norm": 0.6727113469376889, "learning_rate": 0.0001385668075733245, "loss": 0.091, "step": 1033 }, { "epoch": 1.97, "grad_norm": 0.6208189932402287, "learning_rate": 0.00013845315422865561, "loss": 0.0964, "step": 1034 }, { "epoch": 1.97, "grad_norm": 0.3982982510872747, "learning_rate": 0.00013833944256489615, "loss": 0.0631, "step": 1035 }, { "epoch": 1.97, "grad_norm": 0.6087207898223468, "learning_rate": 0.00013822567275450427, "loss": 0.0995, "step": 1036 }, { "epoch": 1.97, "grad_norm": 0.6139176180798773, "learning_rate": 0.00013811184497002635, "loss": 0.1194, "step": 1037 }, { "epoch": 1.97, "grad_norm": 0.5975968122645353, "learning_rate": 0.0001379979593840967, "loss": 0.1113, "step": 1038 }, { "epoch": 1.98, "grad_norm": 0.5730959419728413, "learning_rate": 0.00013788401616943716, "loss": 0.1231, "step": 1039 }, { "epoch": 1.98, "grad_norm": 0.43215476706358097, "learning_rate": 0.0001377700154988572, "loss": 0.0773, "step": 1040 }, { "epoch": 1.98, "grad_norm": 0.6732584615698285, "learning_rate": 0.00013765595754525325, "loss": 0.1336, "step": 1041 }, { "epoch": 1.98, "grad_norm": 0.7249311466074623, "learning_rate": 0.00013754184248160868, "loss": 0.1439, "step": 1042 }, { "epoch": 1.98, "grad_norm": 0.5535952740506467, "learning_rate": 0.00013742767048099353, "loss": 0.0921, "step": 1043 }, { "epoch": 1.98, "grad_norm": 0.48590595436753564, "learning_rate": 0.0001373134417165641, "loss": 0.0855, "step": 1044 }, { "epoch": 1.99, "grad_norm": 0.5458867636882345, "learning_rate": 0.00013719915636156276, "loss": 0.116, "step": 1045 }, { "epoch": 1.99, "grad_norm": 0.6701109905050361, "learning_rate": 0.00013708481458931784, "loss": 0.1346, "step": 1046 }, { "epoch": 1.99, "grad_norm": 0.7152120972588478, "learning_rate": 0.0001369704165732431, "loss": 0.0939, "step": 1047 }, { "epoch": 1.99, "grad_norm": 0.4916019974907788, "learning_rate": 0.00013685596248683772, "loss": 0.0808, "step": 1048 }, { "epoch": 1.99, "grad_norm": 0.5969473082214369, "learning_rate": 0.0001367414525036858, "loss": 0.1193, "step": 1049 }, { "epoch": 2.0, "grad_norm": 0.6724075722268807, "learning_rate": 0.00013662688679745626, "loss": 0.1331, "step": 1050 }, { "epoch": 2.0, "grad_norm": 0.6730546075640864, "learning_rate": 0.00013651226554190258, "loss": 0.1426, "step": 1051 }, { "epoch": 2.0, "grad_norm": 0.5913648875040054, "learning_rate": 0.0001363975889108624, "loss": 0.0944, "step": 1052 }, { "epoch": 2.0, "grad_norm": 0.2569008434564097, "learning_rate": 0.00013628285707825745, "loss": 0.0317, "step": 1053 }, { "epoch": 2.0, "grad_norm": 0.3756917736291013, "learning_rate": 0.00013616807021809305, "loss": 0.0451, "step": 1054 }, { "epoch": 2.01, "grad_norm": 0.3146616146946178, "learning_rate": 0.00013605322850445813, "loss": 0.0395, "step": 1055 }, { "epoch": 2.01, "grad_norm": 0.33965542861644565, "learning_rate": 0.0001359383321115247, "loss": 0.0379, "step": 1056 }, { "epoch": 2.01, "grad_norm": 0.5269479710220547, "learning_rate": 0.00013582338121354768, "loss": 0.0693, "step": 1057 }, { "epoch": 2.01, "grad_norm": 0.5388617174307568, "learning_rate": 0.00013570837598486475, "loss": 0.0711, "step": 1058 }, { "epoch": 2.01, "grad_norm": 0.5041191642059525, "learning_rate": 0.0001355933165998959, "loss": 0.0388, "step": 1059 }, { "epoch": 2.02, "grad_norm": 0.3572559690437862, "learning_rate": 0.00013547820323314336, "loss": 0.0286, "step": 1060 }, { "epoch": 2.02, "grad_norm": 0.402166709341025, "learning_rate": 0.0001353630360591911, "loss": 0.048, "step": 1061 }, { "epoch": 2.02, "grad_norm": 0.4441157842069636, "learning_rate": 0.0001352478152527048, "loss": 0.0377, "step": 1062 }, { "epoch": 2.02, "grad_norm": 0.4366689464813995, "learning_rate": 0.00013513254098843143, "loss": 0.0457, "step": 1063 }, { "epoch": 2.02, "grad_norm": 0.7551676027030814, "learning_rate": 0.00013501721344119907, "loss": 0.0666, "step": 1064 }, { "epoch": 2.02, "grad_norm": 0.4069892532969601, "learning_rate": 0.00013490183278591652, "loss": 0.0384, "step": 1065 }, { "epoch": 2.03, "grad_norm": 0.5067093586703465, "learning_rate": 0.0001347863991975733, "loss": 0.0493, "step": 1066 }, { "epoch": 2.03, "grad_norm": 0.7812234749028085, "learning_rate": 0.00013467091285123903, "loss": 0.0456, "step": 1067 }, { "epoch": 2.03, "grad_norm": 0.3483744367570125, "learning_rate": 0.00013455537392206339, "loss": 0.0313, "step": 1068 }, { "epoch": 2.03, "grad_norm": 0.3953811867007035, "learning_rate": 0.0001344397825852759, "loss": 0.0384, "step": 1069 }, { "epoch": 2.03, "grad_norm": 0.47590560195955134, "learning_rate": 0.00013432413901618548, "loss": 0.0489, "step": 1070 }, { "epoch": 2.04, "grad_norm": 0.5169996856189782, "learning_rate": 0.00013420844339018028, "loss": 0.0515, "step": 1071 }, { "epoch": 2.04, "grad_norm": 0.5467238178235401, "learning_rate": 0.0001340926958827274, "loss": 0.0487, "step": 1072 }, { "epoch": 2.04, "grad_norm": 0.5738788928718197, "learning_rate": 0.00013397689666937266, "loss": 0.0371, "step": 1073 }, { "epoch": 2.04, "grad_norm": 0.47325453900967235, "learning_rate": 0.00013386104592574022, "loss": 0.035, "step": 1074 }, { "epoch": 2.04, "grad_norm": 0.4723504682890789, "learning_rate": 0.00013374514382753246, "loss": 0.0544, "step": 1075 }, { "epoch": 2.05, "grad_norm": 0.6138324235452328, "learning_rate": 0.00013362919055052966, "loss": 0.0762, "step": 1076 }, { "epoch": 2.05, "grad_norm": 0.424211201025921, "learning_rate": 0.00013351318627058964, "loss": 0.0495, "step": 1077 }, { "epoch": 2.05, "grad_norm": 0.4222790060021048, "learning_rate": 0.00013339713116364768, "loss": 0.0426, "step": 1078 }, { "epoch": 2.05, "grad_norm": 0.4719436053491697, "learning_rate": 0.000133281025405716, "loss": 0.051, "step": 1079 }, { "epoch": 2.05, "grad_norm": 0.3560604139520418, "learning_rate": 0.00013316486917288377, "loss": 0.0496, "step": 1080 }, { "epoch": 2.06, "grad_norm": 0.432503269741199, "learning_rate": 0.00013304866264131669, "loss": 0.0553, "step": 1081 }, { "epoch": 2.06, "grad_norm": 0.34497343000109104, "learning_rate": 0.00013293240598725666, "loss": 0.0254, "step": 1082 }, { "epoch": 2.06, "grad_norm": 0.7305541125053528, "learning_rate": 0.00013281609938702173, "loss": 0.0551, "step": 1083 }, { "epoch": 2.06, "grad_norm": 0.412067896315099, "learning_rate": 0.00013269974301700557, "loss": 0.0434, "step": 1084 }, { "epoch": 2.06, "grad_norm": 0.36107130273152804, "learning_rate": 0.0001325833370536774, "loss": 0.045, "step": 1085 }, { "epoch": 2.06, "grad_norm": 0.522370085859856, "learning_rate": 0.00013246688167358164, "loss": 0.0448, "step": 1086 }, { "epoch": 2.07, "grad_norm": 0.4400508089800668, "learning_rate": 0.00013235037705333765, "loss": 0.0486, "step": 1087 }, { "epoch": 2.07, "grad_norm": 0.4707713104781209, "learning_rate": 0.00013223382336963952, "loss": 0.0381, "step": 1088 }, { "epoch": 2.07, "grad_norm": 0.4484233192069746, "learning_rate": 0.00013211722079925568, "loss": 0.0521, "step": 1089 }, { "epoch": 2.07, "grad_norm": 0.31714508408341024, "learning_rate": 0.00013200056951902876, "loss": 0.0417, "step": 1090 }, { "epoch": 2.07, "grad_norm": 0.3035109770480951, "learning_rate": 0.00013188386970587517, "loss": 0.0312, "step": 1091 }, { "epoch": 2.08, "grad_norm": 0.3744873024203581, "learning_rate": 0.00013176712153678509, "loss": 0.03, "step": 1092 }, { "epoch": 2.08, "grad_norm": 0.5855426474166568, "learning_rate": 0.00013165032518882184, "loss": 0.0657, "step": 1093 }, { "epoch": 2.08, "grad_norm": 0.43545501187610847, "learning_rate": 0.000131533480839122, "loss": 0.0324, "step": 1094 }, { "epoch": 2.08, "grad_norm": 0.327078925190862, "learning_rate": 0.00013141658866489477, "loss": 0.0352, "step": 1095 }, { "epoch": 2.08, "grad_norm": 0.3686904573971941, "learning_rate": 0.00013129964884342206, "loss": 0.028, "step": 1096 }, { "epoch": 2.09, "grad_norm": 0.5716681078597073, "learning_rate": 0.0001311826615520579, "loss": 0.0671, "step": 1097 }, { "epoch": 2.09, "grad_norm": 0.4948573340166021, "learning_rate": 0.0001310656269682284, "loss": 0.0582, "step": 1098 }, { "epoch": 2.09, "grad_norm": 0.3763958489841063, "learning_rate": 0.00013094854526943134, "loss": 0.0411, "step": 1099 }, { "epoch": 2.09, "grad_norm": 0.40587486050114185, "learning_rate": 0.00013083141663323603, "loss": 0.0386, "step": 1100 }, { "epoch": 2.09, "eval_blimp_filtered_avg": 0.7094029850746268, "eval_blimp_filtered_std": 0.005025636834389819, "step": 1100 }, { "epoch": 2.09, "eval_blimp_supplement_avg": 0.8017241379310345, "eval_blimp_supplement_std": 0.017725994288774204, "step": 1100 }, { "epoch": 2.09, "eval_vqa_filtered_avg": 0.51, "eval_vqa_filtered_std": 0.05024183937956912, "step": 1100 }, { "epoch": 2.09, "eval_winoground_filtered_avg": 0.61, "eval_winoground_filtered_std": 0.04902071300001975, "step": 1100 }, { "epoch": 2.09, "grad_norm": 0.46949964685620826, "learning_rate": 0.0001307142412372829, "loss": 0.0368, "step": 1101 }, { "epoch": 2.1, "grad_norm": 0.48455075599751435, "learning_rate": 0.00013059701925928328, "loss": 0.0423, "step": 1102 }, { "epoch": 2.1, "grad_norm": 0.5997623154996554, "learning_rate": 0.00013047975087701917, "loss": 0.0607, "step": 1103 }, { "epoch": 2.1, "grad_norm": 0.3716678364800413, "learning_rate": 0.000130362436268343, "loss": 0.0314, "step": 1104 }, { "epoch": 2.1, "grad_norm": 0.3892223997141819, "learning_rate": 0.0001302450756111773, "loss": 0.0284, "step": 1105 }, { "epoch": 2.1, "grad_norm": 0.507263222208999, "learning_rate": 0.00013012766908351438, "loss": 0.0457, "step": 1106 }, { "epoch": 2.1, "grad_norm": 0.40180755154073383, "learning_rate": 0.00013001021686341615, "loss": 0.0437, "step": 1107 }, { "epoch": 2.11, "grad_norm": 0.37465338531830633, "learning_rate": 0.00012989271912901374, "loss": 0.043, "step": 1108 }, { "epoch": 2.11, "grad_norm": 0.468612972770224, "learning_rate": 0.00012977517605850745, "loss": 0.03, "step": 1109 }, { "epoch": 2.11, "grad_norm": 0.4107226650616942, "learning_rate": 0.00012965758783016633, "loss": 0.035, "step": 1110 }, { "epoch": 2.11, "grad_norm": 0.3135837890163377, "learning_rate": 0.00012953995462232771, "loss": 0.0328, "step": 1111 }, { "epoch": 2.11, "grad_norm": 0.532204135510291, "learning_rate": 0.00012942227661339744, "loss": 0.0369, "step": 1112 }, { "epoch": 2.12, "grad_norm": 0.38182747397261896, "learning_rate": 0.00012930455398184904, "loss": 0.0296, "step": 1113 }, { "epoch": 2.12, "grad_norm": 0.4254003143554997, "learning_rate": 0.00012918678690622388, "loss": 0.0368, "step": 1114 }, { "epoch": 2.12, "grad_norm": 0.46440557700614427, "learning_rate": 0.0001290689755651307, "loss": 0.0497, "step": 1115 }, { "epoch": 2.12, "grad_norm": 0.4294572881474271, "learning_rate": 0.00012895112013724532, "loss": 0.0428, "step": 1116 }, { "epoch": 2.12, "grad_norm": 0.4252031998428546, "learning_rate": 0.00012883322080131047, "loss": 0.036, "step": 1117 }, { "epoch": 2.13, "grad_norm": 0.4051565282953917, "learning_rate": 0.00012871527773613547, "loss": 0.0537, "step": 1118 }, { "epoch": 2.13, "grad_norm": 0.4294633349331862, "learning_rate": 0.00012859729112059596, "loss": 0.0422, "step": 1119 }, { "epoch": 2.13, "grad_norm": 0.3912538915184366, "learning_rate": 0.0001284792611336336, "loss": 0.0409, "step": 1120 }, { "epoch": 2.13, "grad_norm": 0.420338337451183, "learning_rate": 0.00012836118795425585, "loss": 0.0455, "step": 1121 }, { "epoch": 2.13, "grad_norm": 0.48926767118164194, "learning_rate": 0.0001282430717615357, "loss": 0.0428, "step": 1122 }, { "epoch": 2.13, "grad_norm": 0.4436304094160602, "learning_rate": 0.00012812491273461136, "loss": 0.0375, "step": 1123 }, { "epoch": 2.14, "grad_norm": 0.4442306180939482, "learning_rate": 0.00012800671105268598, "loss": 0.0452, "step": 1124 }, { "epoch": 2.14, "grad_norm": 0.39134488465993666, "learning_rate": 0.0001278884668950274, "loss": 0.0428, "step": 1125 }, { "epoch": 2.14, "grad_norm": 0.3849210489845949, "learning_rate": 0.00012777018044096792, "loss": 0.0384, "step": 1126 }, { "epoch": 2.14, "grad_norm": 0.32954320405842347, "learning_rate": 0.00012765185186990396, "loss": 0.0252, "step": 1127 }, { "epoch": 2.14, "grad_norm": 0.3875118332106187, "learning_rate": 0.00012753348136129583, "loss": 0.0487, "step": 1128 }, { "epoch": 2.15, "grad_norm": 0.3818080211910118, "learning_rate": 0.00012741506909466743, "loss": 0.0349, "step": 1129 }, { "epoch": 2.15, "grad_norm": 0.4826749157978257, "learning_rate": 0.00012729661524960598, "loss": 0.0488, "step": 1130 }, { "epoch": 2.15, "grad_norm": 0.2714887367657404, "learning_rate": 0.00012717812000576182, "loss": 0.0253, "step": 1131 }, { "epoch": 2.15, "grad_norm": 0.4050491963611946, "learning_rate": 0.00012705958354284797, "loss": 0.0435, "step": 1132 }, { "epoch": 2.15, "grad_norm": 0.32710369664362476, "learning_rate": 0.0001269410060406401, "loss": 0.0218, "step": 1133 }, { "epoch": 2.16, "grad_norm": 0.3575098425935964, "learning_rate": 0.00012682238767897596, "loss": 0.026, "step": 1134 }, { "epoch": 2.16, "grad_norm": 0.39100411733432433, "learning_rate": 0.00012670372863775545, "loss": 0.0377, "step": 1135 }, { "epoch": 2.16, "grad_norm": 0.5472311257793697, "learning_rate": 0.00012658502909694, "loss": 0.0478, "step": 1136 }, { "epoch": 2.16, "grad_norm": 0.49160757720334675, "learning_rate": 0.00012646628923655253, "loss": 0.0386, "step": 1137 }, { "epoch": 2.16, "grad_norm": 0.27119874174099495, "learning_rate": 0.00012634750923667717, "loss": 0.0307, "step": 1138 }, { "epoch": 2.17, "grad_norm": 0.34392707408310796, "learning_rate": 0.00012622868927745882, "loss": 0.0322, "step": 1139 }, { "epoch": 2.17, "grad_norm": 0.3578798901676809, "learning_rate": 0.00012610982953910308, "loss": 0.0297, "step": 1140 }, { "epoch": 2.17, "grad_norm": 0.3491581863195109, "learning_rate": 0.00012599093020187582, "loss": 0.0251, "step": 1141 }, { "epoch": 2.17, "grad_norm": 0.41562099124601415, "learning_rate": 0.00012587199144610292, "loss": 0.0305, "step": 1142 }, { "epoch": 2.17, "grad_norm": 0.49578357374913345, "learning_rate": 0.00012575301345217022, "loss": 0.0412, "step": 1143 }, { "epoch": 2.17, "grad_norm": 0.2756711054242124, "learning_rate": 0.00012563399640052288, "loss": 0.0191, "step": 1144 }, { "epoch": 2.18, "grad_norm": 0.35925458690722906, "learning_rate": 0.00012551494047166533, "loss": 0.0314, "step": 1145 }, { "epoch": 2.18, "grad_norm": 0.43038506604263066, "learning_rate": 0.00012539584584616109, "loss": 0.0314, "step": 1146 }, { "epoch": 2.18, "grad_norm": 0.6149437848984874, "learning_rate": 0.00012527671270463225, "loss": 0.0429, "step": 1147 }, { "epoch": 2.18, "grad_norm": 0.3498763556696733, "learning_rate": 0.00012515754122775931, "loss": 0.0327, "step": 1148 }, { "epoch": 2.18, "grad_norm": 0.3788192493590721, "learning_rate": 0.000125038331596281, "loss": 0.04, "step": 1149 }, { "epoch": 2.19, "grad_norm": 0.531347094099742, "learning_rate": 0.0001249190839909938, "loss": 0.0463, "step": 1150 }, { "epoch": 2.19, "grad_norm": 0.44701069460309784, "learning_rate": 0.0001247997985927519, "loss": 0.0462, "step": 1151 }, { "epoch": 2.19, "grad_norm": 0.5213636448569404, "learning_rate": 0.00012468047558246675, "loss": 0.0386, "step": 1152 }, { "epoch": 2.19, "grad_norm": 0.4446863510500287, "learning_rate": 0.00012456111514110677, "loss": 0.047, "step": 1153 }, { "epoch": 2.19, "grad_norm": 0.3400598266936459, "learning_rate": 0.00012444171744969732, "loss": 0.0262, "step": 1154 }, { "epoch": 2.2, "grad_norm": 0.3471200439757837, "learning_rate": 0.0001243222826893201, "loss": 0.0328, "step": 1155 }, { "epoch": 2.2, "grad_norm": 0.5207867439629942, "learning_rate": 0.0001242028110411131, "loss": 0.0447, "step": 1156 }, { "epoch": 2.2, "grad_norm": 0.41197571393433186, "learning_rate": 0.00012408330268627027, "loss": 0.0384, "step": 1157 }, { "epoch": 2.2, "grad_norm": 0.4941593887673083, "learning_rate": 0.00012396375780604116, "loss": 0.059, "step": 1158 }, { "epoch": 2.2, "grad_norm": 0.48922080787535993, "learning_rate": 0.0001238441765817308, "loss": 0.0491, "step": 1159 }, { "epoch": 2.21, "grad_norm": 0.3043623174307196, "learning_rate": 0.00012372455919469925, "loss": 0.03, "step": 1160 }, { "epoch": 2.21, "grad_norm": 0.4860080914202065, "learning_rate": 0.0001236049058263615, "loss": 0.0519, "step": 1161 }, { "epoch": 2.21, "grad_norm": 0.49683878210727017, "learning_rate": 0.00012348521665818708, "loss": 0.0499, "step": 1162 }, { "epoch": 2.21, "grad_norm": 0.4264904933232595, "learning_rate": 0.00012336549187169982, "loss": 0.0367, "step": 1163 }, { "epoch": 2.21, "grad_norm": 0.3502437231614882, "learning_rate": 0.0001232457316484775, "loss": 0.0264, "step": 1164 }, { "epoch": 2.21, "grad_norm": 0.4428874415587124, "learning_rate": 0.00012312593617015176, "loss": 0.0528, "step": 1165 }, { "epoch": 2.22, "grad_norm": 0.45858305035047414, "learning_rate": 0.00012300610561840762, "loss": 0.0352, "step": 1166 }, { "epoch": 2.22, "grad_norm": 0.5156146878589504, "learning_rate": 0.00012288624017498336, "loss": 0.0616, "step": 1167 }, { "epoch": 2.22, "grad_norm": 0.2966113548199596, "learning_rate": 0.0001227663400216701, "loss": 0.0274, "step": 1168 }, { "epoch": 2.22, "grad_norm": 0.5765531597689414, "learning_rate": 0.0001226464053403117, "loss": 0.0522, "step": 1169 }, { "epoch": 2.22, "grad_norm": 0.4740177569982564, "learning_rate": 0.0001225264363128043, "loss": 0.0516, "step": 1170 }, { "epoch": 2.23, "grad_norm": 0.5076756208601756, "learning_rate": 0.00012240643312109615, "loss": 0.0421, "step": 1171 }, { "epoch": 2.23, "grad_norm": 0.47450987977141956, "learning_rate": 0.00012228639594718735, "loss": 0.0376, "step": 1172 }, { "epoch": 2.23, "grad_norm": 0.37775299991079087, "learning_rate": 0.00012216632497312948, "loss": 0.043, "step": 1173 }, { "epoch": 2.23, "grad_norm": 0.3032606054773591, "learning_rate": 0.00012204622038102547, "loss": 0.0244, "step": 1174 }, { "epoch": 2.23, "grad_norm": 0.41593603720779515, "learning_rate": 0.00012192608235302914, "loss": 0.0271, "step": 1175 }, { "epoch": 2.24, "grad_norm": 0.48616356522987864, "learning_rate": 0.00012180591107134507, "loss": 0.0511, "step": 1176 }, { "epoch": 2.24, "grad_norm": 0.33342763976896517, "learning_rate": 0.0001216857067182283, "loss": 0.0335, "step": 1177 }, { "epoch": 2.24, "grad_norm": 0.3820811099681301, "learning_rate": 0.00012156546947598393, "loss": 0.0451, "step": 1178 }, { "epoch": 2.24, "grad_norm": 0.3608056418747065, "learning_rate": 0.00012144519952696707, "loss": 0.0329, "step": 1179 }, { "epoch": 2.24, "grad_norm": 0.3310375937897457, "learning_rate": 0.00012132489705358234, "loss": 0.0231, "step": 1180 }, { "epoch": 2.25, "grad_norm": 0.2739588899219208, "learning_rate": 0.00012120456223828371, "loss": 0.0287, "step": 1181 }, { "epoch": 2.25, "grad_norm": 0.368161677564393, "learning_rate": 0.00012108419526357421, "loss": 0.0383, "step": 1182 }, { "epoch": 2.25, "grad_norm": 0.3397825800068154, "learning_rate": 0.00012096379631200563, "loss": 0.0408, "step": 1183 }, { "epoch": 2.25, "grad_norm": 0.45078794373381137, "learning_rate": 0.00012084336556617826, "loss": 0.032, "step": 1184 }, { "epoch": 2.25, "grad_norm": 0.5551416999232486, "learning_rate": 0.00012072290320874067, "loss": 0.0467, "step": 1185 }, { "epoch": 2.25, "grad_norm": 0.4333124420452397, "learning_rate": 0.00012060240942238927, "loss": 0.0466, "step": 1186 }, { "epoch": 2.26, "grad_norm": 0.40261962336794965, "learning_rate": 0.00012048188438986821, "loss": 0.0375, "step": 1187 }, { "epoch": 2.26, "grad_norm": 0.34982893392062914, "learning_rate": 0.00012036132829396895, "loss": 0.0382, "step": 1188 }, { "epoch": 2.26, "grad_norm": 0.37750691841870915, "learning_rate": 0.00012024074131753018, "loss": 0.0309, "step": 1189 }, { "epoch": 2.26, "grad_norm": 0.32955810993987106, "learning_rate": 0.00012012012364343735, "loss": 0.0298, "step": 1190 }, { "epoch": 2.26, "grad_norm": 0.4350162477004107, "learning_rate": 0.00011999947545462242, "loss": 0.0318, "step": 1191 }, { "epoch": 2.27, "grad_norm": 0.49527937896308655, "learning_rate": 0.00011987879693406379, "loss": 0.0368, "step": 1192 }, { "epoch": 2.27, "grad_norm": 0.3970128519776545, "learning_rate": 0.00011975808826478567, "loss": 0.0333, "step": 1193 }, { "epoch": 2.27, "grad_norm": 0.48302407742718406, "learning_rate": 0.00011963734962985811, "loss": 0.0487, "step": 1194 }, { "epoch": 2.27, "grad_norm": 0.4885388080436485, "learning_rate": 0.0001195165812123966, "loss": 0.0372, "step": 1195 }, { "epoch": 2.27, "grad_norm": 0.3270333714375525, "learning_rate": 0.00011939578319556173, "loss": 0.0235, "step": 1196 }, { "epoch": 2.28, "grad_norm": 0.29054914807156107, "learning_rate": 0.00011927495576255907, "loss": 0.0217, "step": 1197 }, { "epoch": 2.28, "grad_norm": 0.36107262821775626, "learning_rate": 0.00011915409909663878, "loss": 0.0339, "step": 1198 }, { "epoch": 2.28, "grad_norm": 0.3491300672920857, "learning_rate": 0.00011903321338109527, "loss": 0.0326, "step": 1199 }, { "epoch": 2.28, "grad_norm": 0.3569252068469629, "learning_rate": 0.00011891229879926715, "loss": 0.0345, "step": 1200 }, { "epoch": 2.28, "eval_blimp_filtered_avg": 0.7146268656716418, "eval_blimp_filtered_std": 0.0050090655893130775, "step": 1200 }, { "epoch": 2.28, "eval_blimp_supplement_avg": 0.8297413793103449, "eval_blimp_supplement_std": 0.016667723090251323, "step": 1200 }, { "epoch": 2.28, "eval_vqa_filtered_avg": 0.55, "eval_vqa_filtered_std": 0.05, "step": 1200 }, { "epoch": 2.28, "eval_winoground_filtered_avg": 0.61, "eval_winoground_filtered_std": 0.04902071300001975, "step": 1200 }, { "epoch": 2.28, "grad_norm": 0.4499518128427458, "learning_rate": 0.00011879135553453666, "loss": 0.0446, "step": 1201 }, { "epoch": 2.29, "grad_norm": 0.2587934853423328, "learning_rate": 0.00011867038377032968, "loss": 0.0159, "step": 1202 }, { "epoch": 2.29, "grad_norm": 0.4265362644145419, "learning_rate": 0.00011854938369011524, "loss": 0.0408, "step": 1203 }, { "epoch": 2.29, "grad_norm": 0.39881658292188565, "learning_rate": 0.00011842835547740532, "loss": 0.0371, "step": 1204 }, { "epoch": 2.29, "grad_norm": 0.23201834461619725, "learning_rate": 0.00011830729931575455, "loss": 0.0153, "step": 1205 }, { "epoch": 2.29, "grad_norm": 0.383873492446786, "learning_rate": 0.00011818621538875998, "loss": 0.026, "step": 1206 }, { "epoch": 2.29, "grad_norm": 0.34211497444190253, "learning_rate": 0.00011806510388006074, "loss": 0.0259, "step": 1207 }, { "epoch": 2.3, "grad_norm": 0.33846161704573596, "learning_rate": 0.00011794396497333787, "loss": 0.0281, "step": 1208 }, { "epoch": 2.3, "grad_norm": 0.33371372504799635, "learning_rate": 0.00011782279885231385, "loss": 0.019, "step": 1209 }, { "epoch": 2.3, "grad_norm": 0.4726326321422764, "learning_rate": 0.00011770160570075248, "loss": 0.0247, "step": 1210 }, { "epoch": 2.3, "grad_norm": 0.39288257765592993, "learning_rate": 0.0001175803857024586, "loss": 0.0385, "step": 1211 }, { "epoch": 2.3, "grad_norm": 0.46290305440635493, "learning_rate": 0.00011745913904127769, "loss": 0.0351, "step": 1212 }, { "epoch": 2.31, "grad_norm": 0.36417694150143853, "learning_rate": 0.00011733786590109577, "loss": 0.0255, "step": 1213 }, { "epoch": 2.31, "grad_norm": 0.31749641468830037, "learning_rate": 0.00011721656646583885, "loss": 0.0242, "step": 1214 }, { "epoch": 2.31, "grad_norm": 0.4167630504748403, "learning_rate": 0.00011709524091947304, "loss": 0.0224, "step": 1215 }, { "epoch": 2.31, "grad_norm": 0.3196664793717022, "learning_rate": 0.00011697388944600385, "loss": 0.0343, "step": 1216 }, { "epoch": 2.31, "grad_norm": 0.23001926174561355, "learning_rate": 0.00011685251222947621, "loss": 0.0186, "step": 1217 }, { "epoch": 2.32, "grad_norm": 0.42372530838437683, "learning_rate": 0.00011673110945397414, "loss": 0.0227, "step": 1218 }, { "epoch": 2.32, "grad_norm": 0.383070712973622, "learning_rate": 0.00011660968130362029, "loss": 0.0306, "step": 1219 }, { "epoch": 2.32, "grad_norm": 0.32479904681246463, "learning_rate": 0.0001164882279625759, "loss": 0.0213, "step": 1220 }, { "epoch": 2.32, "grad_norm": 0.3957453101889674, "learning_rate": 0.0001163667496150404, "loss": 0.0414, "step": 1221 }, { "epoch": 2.32, "grad_norm": 0.34430594794724173, "learning_rate": 0.00011624524644525108, "loss": 0.0366, "step": 1222 }, { "epoch": 2.33, "grad_norm": 0.5148387849399129, "learning_rate": 0.00011612371863748295, "loss": 0.0343, "step": 1223 }, { "epoch": 2.33, "grad_norm": 0.34149783079129464, "learning_rate": 0.00011600216637604835, "loss": 0.0343, "step": 1224 }, { "epoch": 2.33, "grad_norm": 0.44714035185730155, "learning_rate": 0.00011588058984529673, "loss": 0.0351, "step": 1225 }, { "epoch": 2.33, "grad_norm": 0.425828796909314, "learning_rate": 0.00011575898922961435, "loss": 0.0285, "step": 1226 }, { "epoch": 2.33, "grad_norm": 0.41588514697320916, "learning_rate": 0.00011563736471342395, "loss": 0.0394, "step": 1227 }, { "epoch": 2.33, "grad_norm": 0.47844811591245856, "learning_rate": 0.00011551571648118456, "loss": 0.031, "step": 1228 }, { "epoch": 2.34, "grad_norm": 0.27722364324768506, "learning_rate": 0.0001153940447173912, "loss": 0.0267, "step": 1229 }, { "epoch": 2.34, "grad_norm": 0.3683510748397264, "learning_rate": 0.00011527234960657449, "loss": 0.0293, "step": 1230 }, { "epoch": 2.34, "grad_norm": 0.38101338013295954, "learning_rate": 0.00011515063133330057, "loss": 0.032, "step": 1231 }, { "epoch": 2.34, "grad_norm": 0.47935254044806175, "learning_rate": 0.00011502889008217063, "loss": 0.0476, "step": 1232 }, { "epoch": 2.34, "grad_norm": 0.3588840807450633, "learning_rate": 0.00011490712603782073, "loss": 0.0303, "step": 1233 }, { "epoch": 2.35, "grad_norm": 0.35111378363503076, "learning_rate": 0.00011478533938492153, "loss": 0.022, "step": 1234 }, { "epoch": 2.35, "grad_norm": 0.4596992867544448, "learning_rate": 0.00011466353030817791, "loss": 0.0381, "step": 1235 }, { "epoch": 2.35, "grad_norm": 0.3616763781988242, "learning_rate": 0.00011454169899232885, "loss": 0.0265, "step": 1236 }, { "epoch": 2.35, "grad_norm": 0.5073009232662775, "learning_rate": 0.00011441984562214693, "loss": 0.043, "step": 1237 }, { "epoch": 2.35, "grad_norm": 0.4874016916025411, "learning_rate": 0.00011429797038243836, "loss": 0.0385, "step": 1238 }, { "epoch": 2.36, "grad_norm": 0.3812545722181971, "learning_rate": 0.00011417607345804238, "loss": 0.0341, "step": 1239 }, { "epoch": 2.36, "grad_norm": 0.4851265990460394, "learning_rate": 0.00011405415503383107, "loss": 0.037, "step": 1240 }, { "epoch": 2.36, "grad_norm": 0.4118471808267898, "learning_rate": 0.0001139322152947093, "loss": 0.0339, "step": 1241 }, { "epoch": 2.36, "grad_norm": 0.32602197572052743, "learning_rate": 0.00011381025442561415, "loss": 0.0245, "step": 1242 }, { "epoch": 2.36, "grad_norm": 0.4987215656455969, "learning_rate": 0.00011368827261151473, "loss": 0.0559, "step": 1243 }, { "epoch": 2.37, "grad_norm": 0.44021599547588836, "learning_rate": 0.00011356627003741198, "loss": 0.04, "step": 1244 }, { "epoch": 2.37, "grad_norm": 0.3513641703074855, "learning_rate": 0.00011344424688833823, "loss": 0.0376, "step": 1245 }, { "epoch": 2.37, "grad_norm": 0.46447853439268383, "learning_rate": 0.00011332220334935715, "loss": 0.037, "step": 1246 }, { "epoch": 2.37, "grad_norm": 0.4044930064261446, "learning_rate": 0.00011320013960556326, "loss": 0.0418, "step": 1247 }, { "epoch": 2.37, "grad_norm": 0.3563193294160725, "learning_rate": 0.00011307805584208167, "loss": 0.0337, "step": 1248 }, { "epoch": 2.37, "grad_norm": 0.3108300332805482, "learning_rate": 0.00011295595224406796, "loss": 0.0251, "step": 1249 }, { "epoch": 2.38, "grad_norm": 0.41920117850904765, "learning_rate": 0.00011283382899670774, "loss": 0.035, "step": 1250 }, { "epoch": 2.38, "grad_norm": 0.4411990830353891, "learning_rate": 0.00011271168628521636, "loss": 0.0343, "step": 1251 }, { "epoch": 2.38, "grad_norm": 0.4370180681562133, "learning_rate": 0.00011258952429483882, "loss": 0.0494, "step": 1252 }, { "epoch": 2.38, "grad_norm": 0.2538058868120381, "learning_rate": 0.00011246734321084925, "loss": 0.021, "step": 1253 }, { "epoch": 2.38, "grad_norm": 0.3611265531926242, "learning_rate": 0.00011234514321855078, "loss": 0.0396, "step": 1254 }, { "epoch": 2.39, "grad_norm": 0.46603577742199576, "learning_rate": 0.00011222292450327523, "loss": 0.0397, "step": 1255 }, { "epoch": 2.39, "grad_norm": 0.4906838969540557, "learning_rate": 0.00011210068725038277, "loss": 0.0414, "step": 1256 }, { "epoch": 2.39, "grad_norm": 0.4063032548307775, "learning_rate": 0.00011197843164526173, "loss": 0.0419, "step": 1257 }, { "epoch": 2.39, "grad_norm": 0.4534044749360507, "learning_rate": 0.00011185615787332826, "loss": 0.0349, "step": 1258 }, { "epoch": 2.39, "grad_norm": 0.36700971933483045, "learning_rate": 0.00011173386612002605, "loss": 0.0279, "step": 1259 }, { "epoch": 2.4, "grad_norm": 0.35071672100078005, "learning_rate": 0.00011161155657082611, "loss": 0.034, "step": 1260 }, { "epoch": 2.4, "grad_norm": 0.3677275017378925, "learning_rate": 0.00011148922941122637, "loss": 0.0247, "step": 1261 }, { "epoch": 2.4, "grad_norm": 0.5476683264168639, "learning_rate": 0.0001113668848267515, "loss": 0.0512, "step": 1262 }, { "epoch": 2.4, "grad_norm": 0.41316279070376744, "learning_rate": 0.00011124452300295256, "loss": 0.0457, "step": 1263 }, { "epoch": 2.4, "grad_norm": 0.37585584549327916, "learning_rate": 0.00011112214412540685, "loss": 0.0296, "step": 1264 }, { "epoch": 2.4, "grad_norm": 0.34107796152241554, "learning_rate": 0.00011099974837971745, "loss": 0.0286, "step": 1265 }, { "epoch": 2.41, "grad_norm": 0.35738889209842356, "learning_rate": 0.00011087733595151306, "loss": 0.0261, "step": 1266 }, { "epoch": 2.41, "grad_norm": 0.43187684784639013, "learning_rate": 0.00011075490702644765, "loss": 0.0299, "step": 1267 }, { "epoch": 2.41, "grad_norm": 0.41630122671214553, "learning_rate": 0.00011063246179020022, "loss": 0.0404, "step": 1268 }, { "epoch": 2.41, "grad_norm": 0.3123885559458753, "learning_rate": 0.00011051000042847453, "loss": 0.0322, "step": 1269 }, { "epoch": 2.41, "grad_norm": 0.42724206390329467, "learning_rate": 0.00011038752312699883, "loss": 0.0432, "step": 1270 }, { "epoch": 2.42, "grad_norm": 0.3268879710231284, "learning_rate": 0.00011026503007152542, "loss": 0.0251, "step": 1271 }, { "epoch": 2.42, "grad_norm": 0.335694998935913, "learning_rate": 0.00011014252144783061, "loss": 0.0263, "step": 1272 }, { "epoch": 2.42, "grad_norm": 0.3004434626260821, "learning_rate": 0.00011001999744171431, "loss": 0.0281, "step": 1273 }, { "epoch": 2.42, "grad_norm": 0.3583533980035141, "learning_rate": 0.00010989745823899968, "loss": 0.0289, "step": 1274 }, { "epoch": 2.42, "grad_norm": 0.23720009597790934, "learning_rate": 0.000109774904025533, "loss": 0.0169, "step": 1275 }, { "epoch": 2.43, "grad_norm": 0.4508970062883064, "learning_rate": 0.0001096523349871833, "loss": 0.0314, "step": 1276 }, { "epoch": 2.43, "grad_norm": 0.3839348660581242, "learning_rate": 0.00010952975130984209, "loss": 0.0285, "step": 1277 }, { "epoch": 2.43, "grad_norm": 0.41336432734399003, "learning_rate": 0.00010940715317942308, "loss": 0.0374, "step": 1278 }, { "epoch": 2.43, "grad_norm": 0.3806761532689941, "learning_rate": 0.00010928454078186186, "loss": 0.0266, "step": 1279 }, { "epoch": 2.43, "grad_norm": 0.36447604654757154, "learning_rate": 0.00010916191430311576, "loss": 0.0258, "step": 1280 }, { "epoch": 2.44, "grad_norm": 0.3566842682178245, "learning_rate": 0.00010903927392916335, "loss": 0.0206, "step": 1281 }, { "epoch": 2.44, "grad_norm": 0.3177852477892238, "learning_rate": 0.00010891661984600437, "loss": 0.0241, "step": 1282 }, { "epoch": 2.44, "grad_norm": 0.38530438290863867, "learning_rate": 0.00010879395223965932, "loss": 0.0271, "step": 1283 }, { "epoch": 2.44, "grad_norm": 0.4478175424882381, "learning_rate": 0.00010867127129616917, "loss": 0.0454, "step": 1284 }, { "epoch": 2.44, "grad_norm": 0.33051333167184704, "learning_rate": 0.00010854857720159515, "loss": 0.0308, "step": 1285 }, { "epoch": 2.44, "grad_norm": 0.42479812638265846, "learning_rate": 0.0001084258701420184, "loss": 0.0327, "step": 1286 }, { "epoch": 2.45, "grad_norm": 0.395977590854192, "learning_rate": 0.00010830315030353979, "loss": 0.0347, "step": 1287 }, { "epoch": 2.45, "grad_norm": 0.4116343209941424, "learning_rate": 0.00010818041787227957, "loss": 0.0345, "step": 1288 }, { "epoch": 2.45, "grad_norm": 0.35984247469950636, "learning_rate": 0.00010805767303437702, "loss": 0.0423, "step": 1289 }, { "epoch": 2.45, "grad_norm": 0.4230373875954332, "learning_rate": 0.00010793491597599026, "loss": 0.032, "step": 1290 }, { "epoch": 2.45, "grad_norm": 0.41471153331767924, "learning_rate": 0.00010781214688329598, "loss": 0.0302, "step": 1291 }, { "epoch": 2.46, "grad_norm": 0.5895913455957583, "learning_rate": 0.00010768936594248904, "loss": 0.0448, "step": 1292 }, { "epoch": 2.46, "grad_norm": 0.33587338576654735, "learning_rate": 0.00010756657333978242, "loss": 0.037, "step": 1293 }, { "epoch": 2.46, "grad_norm": 0.3679261327641876, "learning_rate": 0.0001074437692614066, "loss": 0.0375, "step": 1294 }, { "epoch": 2.46, "grad_norm": 0.4744331560491348, "learning_rate": 0.0001073209538936096, "loss": 0.0441, "step": 1295 }, { "epoch": 2.46, "grad_norm": 0.32248235884446397, "learning_rate": 0.00010719812742265656, "loss": 0.0258, "step": 1296 }, { "epoch": 2.47, "grad_norm": 0.30693126970401974, "learning_rate": 0.00010707529003482932, "loss": 0.0277, "step": 1297 }, { "epoch": 2.47, "grad_norm": 0.33039762934355243, "learning_rate": 0.00010695244191642648, "loss": 0.0282, "step": 1298 }, { "epoch": 2.47, "grad_norm": 0.31989532965851364, "learning_rate": 0.00010682958325376271, "loss": 0.0239, "step": 1299 }, { "epoch": 2.47, "grad_norm": 0.40121396143758714, "learning_rate": 0.00010670671423316884, "loss": 0.0433, "step": 1300 }, { "epoch": 2.47, "eval_blimp_filtered_avg": 0.7165671641791045, "eval_blimp_filtered_std": 0.0049867119692282086, "step": 1300 }, { "epoch": 2.47, "eval_blimp_supplement_avg": 0.8232758620689655, "eval_blimp_supplement_std": 0.016842867895142236, "step": 1300 }, { "epoch": 2.47, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 1300 }, { "epoch": 2.47, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 1300 }, { "epoch": 2.47, "grad_norm": 0.21527932609027264, "learning_rate": 0.00010658383504099134, "loss": 0.0147, "step": 1301 }, { "epoch": 2.48, "grad_norm": 0.3428369896835573, "learning_rate": 0.00010646094586359203, "loss": 0.0361, "step": 1302 }, { "epoch": 2.48, "grad_norm": 0.2857981636238273, "learning_rate": 0.00010633804688734806, "loss": 0.0239, "step": 1303 }, { "epoch": 2.48, "grad_norm": 0.42134477750099747, "learning_rate": 0.00010621513829865124, "loss": 0.0408, "step": 1304 }, { "epoch": 2.48, "grad_norm": 0.40667069624522056, "learning_rate": 0.00010609222028390808, "loss": 0.0369, "step": 1305 }, { "epoch": 2.48, "grad_norm": 0.39576317410583967, "learning_rate": 0.00010596929302953937, "loss": 0.0305, "step": 1306 }, { "epoch": 2.48, "grad_norm": 0.2679111342942966, "learning_rate": 0.0001058463567219799, "loss": 0.0193, "step": 1307 }, { "epoch": 2.49, "grad_norm": 0.3954701876320804, "learning_rate": 0.00010572341154767817, "loss": 0.039, "step": 1308 }, { "epoch": 2.49, "grad_norm": 0.4154410580497173, "learning_rate": 0.00010560045769309617, "loss": 0.0399, "step": 1309 }, { "epoch": 2.49, "grad_norm": 0.621373119583937, "learning_rate": 0.00010547749534470898, "loss": 0.0424, "step": 1310 }, { "epoch": 2.49, "grad_norm": 0.5245710668012695, "learning_rate": 0.00010535452468900471, "loss": 0.0628, "step": 1311 }, { "epoch": 2.49, "grad_norm": 0.3734449894316944, "learning_rate": 0.00010523154591248387, "loss": 0.0394, "step": 1312 }, { "epoch": 2.5, "grad_norm": 0.3424598907594405, "learning_rate": 0.00010510855920165944, "loss": 0.0342, "step": 1313 }, { "epoch": 2.5, "grad_norm": 0.3501461138120323, "learning_rate": 0.00010498556474305638, "loss": 0.0215, "step": 1314 }, { "epoch": 2.5, "grad_norm": 0.33739428004094746, "learning_rate": 0.00010486256272321137, "loss": 0.0235, "step": 1315 }, { "epoch": 2.5, "grad_norm": 0.3135821413964043, "learning_rate": 0.00010473955332867265, "loss": 0.0281, "step": 1316 }, { "epoch": 2.5, "grad_norm": 0.24634077990341816, "learning_rate": 0.00010461653674599951, "loss": 0.0156, "step": 1317 }, { "epoch": 2.51, "grad_norm": 0.37426327417650795, "learning_rate": 0.0001044935131617623, "loss": 0.0334, "step": 1318 }, { "epoch": 2.51, "grad_norm": 0.3608927554578012, "learning_rate": 0.00010437048276254185, "loss": 0.0402, "step": 1319 }, { "epoch": 2.51, "grad_norm": 0.35519968970220295, "learning_rate": 0.00010424744573492937, "loss": 0.0304, "step": 1320 }, { "epoch": 2.51, "grad_norm": 0.2548453515677027, "learning_rate": 0.00010412440226552618, "loss": 0.0202, "step": 1321 }, { "epoch": 2.51, "grad_norm": 0.5559590645043953, "learning_rate": 0.00010400135254094328, "loss": 0.0418, "step": 1322 }, { "epoch": 2.52, "grad_norm": 0.32725348859680575, "learning_rate": 0.00010387829674780123, "loss": 0.0293, "step": 1323 }, { "epoch": 2.52, "grad_norm": 0.3560004459100193, "learning_rate": 0.00010375523507272975, "loss": 0.0247, "step": 1324 }, { "epoch": 2.52, "grad_norm": 0.2559049145949141, "learning_rate": 0.0001036321677023675, "loss": 0.0242, "step": 1325 }, { "epoch": 2.52, "grad_norm": 0.5751923067778225, "learning_rate": 0.00010350909482336176, "loss": 0.0409, "step": 1326 }, { "epoch": 2.52, "grad_norm": 0.3464402216126489, "learning_rate": 0.00010338601662236823, "loss": 0.0316, "step": 1327 }, { "epoch": 2.52, "grad_norm": 0.3813750171506301, "learning_rate": 0.00010326293328605052, "loss": 0.0348, "step": 1328 }, { "epoch": 2.53, "grad_norm": 0.36671520196349777, "learning_rate": 0.00010313984500108025, "loss": 0.0295, "step": 1329 }, { "epoch": 2.53, "grad_norm": 0.3527750265848055, "learning_rate": 0.0001030167519541364, "loss": 0.0219, "step": 1330 }, { "epoch": 2.53, "grad_norm": 0.3780782355643077, "learning_rate": 0.00010289365433190514, "loss": 0.0334, "step": 1331 }, { "epoch": 2.53, "grad_norm": 0.3961401334000163, "learning_rate": 0.00010277055232107975, "loss": 0.0437, "step": 1332 }, { "epoch": 2.53, "grad_norm": 0.35127614654335637, "learning_rate": 0.00010264744610835995, "loss": 0.0296, "step": 1333 }, { "epoch": 2.54, "grad_norm": 0.36460646037163175, "learning_rate": 0.00010252433588045203, "loss": 0.0269, "step": 1334 }, { "epoch": 2.54, "grad_norm": 0.3429669827018095, "learning_rate": 0.00010240122182406824, "loss": 0.0203, "step": 1335 }, { "epoch": 2.54, "grad_norm": 0.2488373821786636, "learning_rate": 0.00010227810412592667, "loss": 0.0149, "step": 1336 }, { "epoch": 2.54, "grad_norm": 0.43089627443055606, "learning_rate": 0.00010215498297275095, "loss": 0.0404, "step": 1337 }, { "epoch": 2.54, "grad_norm": 0.27627200592915613, "learning_rate": 0.00010203185855126995, "loss": 0.016, "step": 1338 }, { "epoch": 2.55, "grad_norm": 0.41502277967854867, "learning_rate": 0.00010190873104821747, "loss": 0.0323, "step": 1339 }, { "epoch": 2.55, "grad_norm": 0.329949860704148, "learning_rate": 0.00010178560065033202, "loss": 0.0242, "step": 1340 }, { "epoch": 2.55, "grad_norm": 0.3238517075670822, "learning_rate": 0.0001016624675443565, "loss": 0.0225, "step": 1341 }, { "epoch": 2.55, "grad_norm": 0.45040966591231596, "learning_rate": 0.00010153933191703789, "loss": 0.0427, "step": 1342 }, { "epoch": 2.55, "grad_norm": 0.4711448861275173, "learning_rate": 0.00010141619395512694, "loss": 0.0348, "step": 1343 }, { "epoch": 2.56, "grad_norm": 0.3505607980560381, "learning_rate": 0.00010129305384537803, "loss": 0.031, "step": 1344 }, { "epoch": 2.56, "grad_norm": 0.4643992668012489, "learning_rate": 0.00010116991177454884, "loss": 0.0505, "step": 1345 }, { "epoch": 2.56, "grad_norm": 0.4101460858833117, "learning_rate": 0.00010104676792939991, "loss": 0.0293, "step": 1346 }, { "epoch": 2.56, "grad_norm": 0.31851655848194144, "learning_rate": 0.00010092362249669449, "loss": 0.0238, "step": 1347 }, { "epoch": 2.56, "grad_norm": 0.2879967777647897, "learning_rate": 0.00010080047566319828, "loss": 0.0349, "step": 1348 }, { "epoch": 2.56, "grad_norm": 0.3606150364546391, "learning_rate": 0.00010067732761567909, "loss": 0.0306, "step": 1349 }, { "epoch": 2.57, "grad_norm": 0.49687561777014166, "learning_rate": 0.00010055417854090661, "loss": 0.0466, "step": 1350 }, { "epoch": 2.57, "grad_norm": 0.4928455511924334, "learning_rate": 0.00010043102862565197, "loss": 0.0423, "step": 1351 }, { "epoch": 2.57, "grad_norm": 0.2568770910634051, "learning_rate": 0.00010030787805668772, "loss": 0.0144, "step": 1352 }, { "epoch": 2.57, "grad_norm": 0.3413295008848333, "learning_rate": 0.00010018472702078731, "loss": 0.0196, "step": 1353 }, { "epoch": 2.57, "grad_norm": 0.3719013769648537, "learning_rate": 0.0001000615757047249, "loss": 0.0339, "step": 1354 }, { "epoch": 2.58, "grad_norm": 0.35005055438749044, "learning_rate": 9.993842429527511e-05, "loss": 0.0283, "step": 1355 }, { "epoch": 2.58, "grad_norm": 0.3445421357485359, "learning_rate": 9.981527297921271e-05, "loss": 0.0229, "step": 1356 }, { "epoch": 2.58, "grad_norm": 0.33568454362935823, "learning_rate": 9.96921219433123e-05, "loss": 0.0303, "step": 1357 }, { "epoch": 2.58, "grad_norm": 0.3164635502282193, "learning_rate": 9.956897137434803e-05, "loss": 0.031, "step": 1358 }, { "epoch": 2.58, "grad_norm": 0.34099465230196313, "learning_rate": 9.944582145909342e-05, "loss": 0.0325, "step": 1359 }, { "epoch": 2.59, "grad_norm": 0.3964358930885032, "learning_rate": 9.932267238432092e-05, "loss": 0.0308, "step": 1360 }, { "epoch": 2.59, "grad_norm": 0.3481605558412622, "learning_rate": 9.919952433680176e-05, "loss": 0.0332, "step": 1361 }, { "epoch": 2.59, "grad_norm": 0.38789832753657627, "learning_rate": 9.907637750330552e-05, "loss": 0.0298, "step": 1362 }, { "epoch": 2.59, "grad_norm": 0.4878289660807816, "learning_rate": 9.895323207060012e-05, "loss": 0.0429, "step": 1363 }, { "epoch": 2.59, "grad_norm": 0.37077570892917433, "learning_rate": 9.883008822545118e-05, "loss": 0.0376, "step": 1364 }, { "epoch": 2.6, "grad_norm": 0.3412347451329468, "learning_rate": 9.870694615462196e-05, "loss": 0.0188, "step": 1365 }, { "epoch": 2.6, "grad_norm": 0.33825899510246443, "learning_rate": 9.85838060448731e-05, "loss": 0.0294, "step": 1366 }, { "epoch": 2.6, "grad_norm": 0.47227070717207315, "learning_rate": 9.846066808296216e-05, "loss": 0.0408, "step": 1367 }, { "epoch": 2.6, "grad_norm": 0.3451499649936329, "learning_rate": 9.83375324556435e-05, "loss": 0.0294, "step": 1368 }, { "epoch": 2.6, "grad_norm": 0.4585586864655057, "learning_rate": 9.821439934966799e-05, "loss": 0.0361, "step": 1369 }, { "epoch": 2.6, "grad_norm": 0.43927993364906615, "learning_rate": 9.809126895178255e-05, "loss": 0.0343, "step": 1370 }, { "epoch": 2.61, "grad_norm": 0.4443091023264192, "learning_rate": 9.796814144873006e-05, "loss": 0.0451, "step": 1371 }, { "epoch": 2.61, "grad_norm": 0.37900181805226696, "learning_rate": 9.784501702724906e-05, "loss": 0.031, "step": 1372 }, { "epoch": 2.61, "grad_norm": 0.4613671801513967, "learning_rate": 9.772189587407337e-05, "loss": 0.038, "step": 1373 }, { "epoch": 2.61, "grad_norm": 0.2974197531255102, "learning_rate": 9.759877817593181e-05, "loss": 0.0252, "step": 1374 }, { "epoch": 2.61, "grad_norm": 0.3313741548840074, "learning_rate": 9.7475664119548e-05, "loss": 0.0281, "step": 1375 }, { "epoch": 2.62, "grad_norm": 0.3168610710205634, "learning_rate": 9.735255389164007e-05, "loss": 0.0281, "step": 1376 }, { "epoch": 2.62, "grad_norm": 0.40267794755821834, "learning_rate": 9.72294476789203e-05, "loss": 0.0321, "step": 1377 }, { "epoch": 2.62, "grad_norm": 0.3334997878539434, "learning_rate": 9.710634566809484e-05, "loss": 0.0229, "step": 1378 }, { "epoch": 2.62, "grad_norm": 0.33070122194744656, "learning_rate": 9.698324804586362e-05, "loss": 0.0263, "step": 1379 }, { "epoch": 2.62, "grad_norm": 0.32426019879713563, "learning_rate": 9.686015499891976e-05, "loss": 0.0236, "step": 1380 }, { "epoch": 2.63, "grad_norm": 0.3425898682823762, "learning_rate": 9.673706671394947e-05, "loss": 0.0268, "step": 1381 }, { "epoch": 2.63, "grad_norm": 0.3885917952548531, "learning_rate": 9.661398337763181e-05, "loss": 0.0302, "step": 1382 }, { "epoch": 2.63, "grad_norm": 0.3148771670536703, "learning_rate": 9.649090517663825e-05, "loss": 0.0255, "step": 1383 }, { "epoch": 2.63, "grad_norm": 0.3763602091454849, "learning_rate": 9.63678322976325e-05, "loss": 0.03, "step": 1384 }, { "epoch": 2.63, "grad_norm": 0.3827865099170777, "learning_rate": 9.624476492727026e-05, "loss": 0.0295, "step": 1385 }, { "epoch": 2.63, "grad_norm": 0.24858990332197078, "learning_rate": 9.61217032521988e-05, "loss": 0.0152, "step": 1386 }, { "epoch": 2.64, "grad_norm": 0.3627054210227893, "learning_rate": 9.599864745905676e-05, "loss": 0.0247, "step": 1387 }, { "epoch": 2.64, "grad_norm": 0.3619195067440461, "learning_rate": 9.587559773447386e-05, "loss": 0.0366, "step": 1388 }, { "epoch": 2.64, "grad_norm": 0.3288003593742663, "learning_rate": 9.575255426507066e-05, "loss": 0.037, "step": 1389 }, { "epoch": 2.64, "grad_norm": 0.4390547282959035, "learning_rate": 9.56295172374582e-05, "loss": 0.035, "step": 1390 }, { "epoch": 2.64, "grad_norm": 0.40011090503249586, "learning_rate": 9.550648683823774e-05, "loss": 0.0279, "step": 1391 }, { "epoch": 2.65, "grad_norm": 0.4295368571667212, "learning_rate": 9.53834632540005e-05, "loss": 0.0296, "step": 1392 }, { "epoch": 2.65, "grad_norm": 0.3746059861527143, "learning_rate": 9.52604466713274e-05, "loss": 0.0251, "step": 1393 }, { "epoch": 2.65, "grad_norm": 0.3947164560866061, "learning_rate": 9.513743727678862e-05, "loss": 0.0507, "step": 1394 }, { "epoch": 2.65, "grad_norm": 0.29591257641119395, "learning_rate": 9.501443525694364e-05, "loss": 0.0279, "step": 1395 }, { "epoch": 2.65, "grad_norm": 0.37730886797785385, "learning_rate": 9.489144079834057e-05, "loss": 0.0257, "step": 1396 }, { "epoch": 2.66, "grad_norm": 0.4886357139441725, "learning_rate": 9.476845408751614e-05, "loss": 0.0311, "step": 1397 }, { "epoch": 2.66, "grad_norm": 0.26047893215028, "learning_rate": 9.464547531099531e-05, "loss": 0.0209, "step": 1398 }, { "epoch": 2.66, "grad_norm": 0.4630681171150204, "learning_rate": 9.452250465529103e-05, "loss": 0.0273, "step": 1399 }, { "epoch": 2.66, "grad_norm": 0.49098021941278874, "learning_rate": 9.439954230690387e-05, "loss": 0.0306, "step": 1400 }, { "epoch": 2.66, "eval_blimp_filtered_avg": 0.7165671641791045, "eval_blimp_filtered_std": 0.004970893723595079, "step": 1400 }, { "epoch": 2.66, "eval_blimp_supplement_avg": 0.8146551724137931, "eval_blimp_supplement_std": 0.017056431761521194, "step": 1400 }, { "epoch": 2.66, "eval_vqa_filtered_avg": 0.55, "eval_vqa_filtered_std": 0.05, "step": 1400 }, { "epoch": 2.66, "eval_winoground_filtered_avg": 0.6, "eval_winoground_filtered_std": 0.04923659639173309, "step": 1400 }, { "epoch": 2.66, "grad_norm": 0.34855790140328624, "learning_rate": 9.427658845232183e-05, "loss": 0.0294, "step": 1401 }, { "epoch": 2.67, "grad_norm": 0.3795959885142413, "learning_rate": 9.41536432780201e-05, "loss": 0.0376, "step": 1402 }, { "epoch": 2.67, "grad_norm": 0.271292372410208, "learning_rate": 9.403070697046064e-05, "loss": 0.0248, "step": 1403 }, { "epoch": 2.67, "grad_norm": 0.37361300595219055, "learning_rate": 9.390777971609192e-05, "loss": 0.0298, "step": 1404 }, { "epoch": 2.67, "grad_norm": 0.35927501437869697, "learning_rate": 9.37848617013488e-05, "loss": 0.0256, "step": 1405 }, { "epoch": 2.67, "grad_norm": 0.4483802922690773, "learning_rate": 9.366195311265199e-05, "loss": 0.0247, "step": 1406 }, { "epoch": 2.67, "grad_norm": 0.2888754304457457, "learning_rate": 9.353905413640795e-05, "loss": 0.0196, "step": 1407 }, { "epoch": 2.68, "grad_norm": 0.4637802080929335, "learning_rate": 9.34161649590087e-05, "loss": 0.0319, "step": 1408 }, { "epoch": 2.68, "grad_norm": 0.37201421324790496, "learning_rate": 9.329328576683117e-05, "loss": 0.0249, "step": 1409 }, { "epoch": 2.68, "grad_norm": 0.34588050501167417, "learning_rate": 9.317041674623731e-05, "loss": 0.0264, "step": 1410 }, { "epoch": 2.68, "grad_norm": 0.34463939108897235, "learning_rate": 9.304755808357355e-05, "loss": 0.0226, "step": 1411 }, { "epoch": 2.68, "grad_norm": 0.3609871257572005, "learning_rate": 9.292470996517069e-05, "loss": 0.0339, "step": 1412 }, { "epoch": 2.69, "grad_norm": 0.3744148406857398, "learning_rate": 9.280187257734349e-05, "loss": 0.0285, "step": 1413 }, { "epoch": 2.69, "grad_norm": 0.2589504611664149, "learning_rate": 9.26790461063904e-05, "loss": 0.0158, "step": 1414 }, { "epoch": 2.69, "grad_norm": 0.31899005792340096, "learning_rate": 9.255623073859343e-05, "loss": 0.0255, "step": 1415 }, { "epoch": 2.69, "grad_norm": 0.3149354819811578, "learning_rate": 9.243342666021764e-05, "loss": 0.0223, "step": 1416 }, { "epoch": 2.69, "grad_norm": 0.37426136216792594, "learning_rate": 9.231063405751095e-05, "loss": 0.026, "step": 1417 }, { "epoch": 2.7, "grad_norm": 0.7765084702275945, "learning_rate": 9.218785311670406e-05, "loss": 0.0632, "step": 1418 }, { "epoch": 2.7, "grad_norm": 0.2687780484508228, "learning_rate": 9.206508402400978e-05, "loss": 0.014, "step": 1419 }, { "epoch": 2.7, "grad_norm": 0.3742571062261221, "learning_rate": 9.194232696562299e-05, "loss": 0.026, "step": 1420 }, { "epoch": 2.7, "grad_norm": 0.30311627429649346, "learning_rate": 9.181958212772045e-05, "loss": 0.0131, "step": 1421 }, { "epoch": 2.7, "grad_norm": 0.4640372452778985, "learning_rate": 9.169684969646022e-05, "loss": 0.0193, "step": 1422 }, { "epoch": 2.71, "grad_norm": 0.3369663542866382, "learning_rate": 9.157412985798164e-05, "loss": 0.0189, "step": 1423 }, { "epoch": 2.71, "grad_norm": 0.3359678945550697, "learning_rate": 9.145142279840489e-05, "loss": 0.0197, "step": 1424 }, { "epoch": 2.71, "grad_norm": 0.3876354470283143, "learning_rate": 9.132872870383086e-05, "loss": 0.0373, "step": 1425 }, { "epoch": 2.71, "grad_norm": 0.3434801149171747, "learning_rate": 9.12060477603407e-05, "loss": 0.0268, "step": 1426 }, { "epoch": 2.71, "grad_norm": 0.4581890755589651, "learning_rate": 9.108338015399563e-05, "loss": 0.0205, "step": 1427 }, { "epoch": 2.71, "grad_norm": 0.30995142890903965, "learning_rate": 9.096072607083667e-05, "loss": 0.0275, "step": 1428 }, { "epoch": 2.72, "grad_norm": 0.35949886776614665, "learning_rate": 9.083808569688428e-05, "loss": 0.0207, "step": 1429 }, { "epoch": 2.72, "grad_norm": 0.42743262024302614, "learning_rate": 9.071545921813814e-05, "loss": 0.024, "step": 1430 }, { "epoch": 2.72, "grad_norm": 0.4480519337609637, "learning_rate": 9.059284682057695e-05, "loss": 0.0306, "step": 1431 }, { "epoch": 2.72, "grad_norm": 0.49872327944848366, "learning_rate": 9.047024869015794e-05, "loss": 0.0366, "step": 1432 }, { "epoch": 2.72, "grad_norm": 0.3211990767738049, "learning_rate": 9.034766501281671e-05, "loss": 0.0232, "step": 1433 }, { "epoch": 2.73, "grad_norm": 0.3364600960570337, "learning_rate": 9.0225095974467e-05, "loss": 0.0252, "step": 1434 }, { "epoch": 2.73, "grad_norm": 0.22171868195059205, "learning_rate": 9.010254176100034e-05, "loss": 0.0102, "step": 1435 }, { "epoch": 2.73, "grad_norm": 0.4738198036034313, "learning_rate": 8.998000255828573e-05, "loss": 0.0429, "step": 1436 }, { "epoch": 2.73, "grad_norm": 0.3078090648198985, "learning_rate": 8.98574785521694e-05, "loss": 0.0237, "step": 1437 }, { "epoch": 2.73, "grad_norm": 0.44122502393827967, "learning_rate": 8.97349699284746e-05, "loss": 0.03, "step": 1438 }, { "epoch": 2.74, "grad_norm": 0.4392746265341573, "learning_rate": 8.961247687300122e-05, "loss": 0.042, "step": 1439 }, { "epoch": 2.74, "grad_norm": 0.2660736458192222, "learning_rate": 8.948999957152547e-05, "loss": 0.0215, "step": 1440 }, { "epoch": 2.74, "grad_norm": 0.4077742665448871, "learning_rate": 8.936753820979981e-05, "loss": 0.0409, "step": 1441 }, { "epoch": 2.74, "grad_norm": 0.39844806722745824, "learning_rate": 8.924509297355239e-05, "loss": 0.0407, "step": 1442 }, { "epoch": 2.74, "grad_norm": 0.40990088517118745, "learning_rate": 8.912266404848697e-05, "loss": 0.0329, "step": 1443 }, { "epoch": 2.75, "grad_norm": 0.39734488851347005, "learning_rate": 8.900025162028257e-05, "loss": 0.0333, "step": 1444 }, { "epoch": 2.75, "grad_norm": 0.411509373931655, "learning_rate": 8.887785587459319e-05, "loss": 0.0366, "step": 1445 }, { "epoch": 2.75, "grad_norm": 0.31985153799973354, "learning_rate": 8.875547699704742e-05, "loss": 0.0282, "step": 1446 }, { "epoch": 2.75, "grad_norm": 0.39772673194779784, "learning_rate": 8.863311517324852e-05, "loss": 0.027, "step": 1447 }, { "epoch": 2.75, "grad_norm": 0.34807225776585804, "learning_rate": 8.851077058877364e-05, "loss": 0.0323, "step": 1448 }, { "epoch": 2.75, "grad_norm": 0.32599805045537333, "learning_rate": 8.83884434291739e-05, "loss": 0.0336, "step": 1449 }, { "epoch": 2.76, "grad_norm": 0.4328676971088165, "learning_rate": 8.826613387997393e-05, "loss": 0.0412, "step": 1450 }, { "epoch": 2.76, "grad_norm": 0.2590774750681525, "learning_rate": 8.814384212667175e-05, "loss": 0.0211, "step": 1451 }, { "epoch": 2.76, "grad_norm": 0.3229540508122981, "learning_rate": 8.80215683547383e-05, "loss": 0.0339, "step": 1452 }, { "epoch": 2.76, "grad_norm": 0.3387409169769227, "learning_rate": 8.789931274961724e-05, "loss": 0.0284, "step": 1453 }, { "epoch": 2.76, "grad_norm": 0.3470793967023417, "learning_rate": 8.77770754967248e-05, "loss": 0.0362, "step": 1454 }, { "epoch": 2.77, "grad_norm": 0.3023370856472519, "learning_rate": 8.765485678144925e-05, "loss": 0.0214, "step": 1455 }, { "epoch": 2.77, "grad_norm": 0.2724592360620246, "learning_rate": 8.753265678915076e-05, "loss": 0.0238, "step": 1456 }, { "epoch": 2.77, "grad_norm": 0.41180396150980564, "learning_rate": 8.74104757051612e-05, "loss": 0.0326, "step": 1457 }, { "epoch": 2.77, "grad_norm": 0.3437571227687915, "learning_rate": 8.728831371478365e-05, "loss": 0.0299, "step": 1458 }, { "epoch": 2.77, "grad_norm": 0.35012801433319957, "learning_rate": 8.716617100329231e-05, "loss": 0.039, "step": 1459 }, { "epoch": 2.78, "grad_norm": 0.3720267662350085, "learning_rate": 8.704404775593205e-05, "loss": 0.0383, "step": 1460 }, { "epoch": 2.78, "grad_norm": 0.32849029416027736, "learning_rate": 8.692194415791834e-05, "loss": 0.0341, "step": 1461 }, { "epoch": 2.78, "grad_norm": 0.32977957664510765, "learning_rate": 8.679986039443679e-05, "loss": 0.025, "step": 1462 }, { "epoch": 2.78, "grad_norm": 0.31921580401486155, "learning_rate": 8.667779665064284e-05, "loss": 0.0263, "step": 1463 }, { "epoch": 2.78, "grad_norm": 0.3496921695434258, "learning_rate": 8.655575311166178e-05, "loss": 0.0267, "step": 1464 }, { "epoch": 2.79, "grad_norm": 0.2893016213958925, "learning_rate": 8.643372996258807e-05, "loss": 0.0266, "step": 1465 }, { "epoch": 2.79, "grad_norm": 0.2827720858272798, "learning_rate": 8.63117273884853e-05, "loss": 0.023, "step": 1466 }, { "epoch": 2.79, "grad_norm": 0.2507539582089694, "learning_rate": 8.618974557438588e-05, "loss": 0.0144, "step": 1467 }, { "epoch": 2.79, "grad_norm": 0.3253918681052989, "learning_rate": 8.606778470529072e-05, "loss": 0.0267, "step": 1468 }, { "epoch": 2.79, "grad_norm": 0.2853197260718476, "learning_rate": 8.594584496616892e-05, "loss": 0.0229, "step": 1469 }, { "epoch": 2.79, "grad_norm": 0.39623830990676, "learning_rate": 8.582392654195765e-05, "loss": 0.0352, "step": 1470 }, { "epoch": 2.8, "grad_norm": 0.38221404535662346, "learning_rate": 8.570202961756166e-05, "loss": 0.0333, "step": 1471 }, { "epoch": 2.8, "grad_norm": 0.27800590133941455, "learning_rate": 8.558015437785307e-05, "loss": 0.02, "step": 1472 }, { "epoch": 2.8, "grad_norm": 0.25189126531108946, "learning_rate": 8.545830100767119e-05, "loss": 0.0184, "step": 1473 }, { "epoch": 2.8, "grad_norm": 0.27328082944415183, "learning_rate": 8.533646969182212e-05, "loss": 0.0239, "step": 1474 }, { "epoch": 2.8, "grad_norm": 0.34479787138946755, "learning_rate": 8.521466061507851e-05, "loss": 0.0262, "step": 1475 }, { "epoch": 2.81, "grad_norm": 0.3973291099415102, "learning_rate": 8.509287396217927e-05, "loss": 0.0315, "step": 1476 }, { "epoch": 2.81, "grad_norm": 0.29970714562799544, "learning_rate": 8.497110991782938e-05, "loss": 0.0196, "step": 1477 }, { "epoch": 2.81, "grad_norm": 0.324793403562103, "learning_rate": 8.484936866669945e-05, "loss": 0.0249, "step": 1478 }, { "epoch": 2.81, "grad_norm": 0.4492721349522451, "learning_rate": 8.472765039342551e-05, "loss": 0.0338, "step": 1479 }, { "epoch": 2.81, "grad_norm": 0.4324794495514251, "learning_rate": 8.460595528260883e-05, "loss": 0.0305, "step": 1480 }, { "epoch": 2.82, "grad_norm": 0.34771478365084374, "learning_rate": 8.448428351881545e-05, "loss": 0.0259, "step": 1481 }, { "epoch": 2.82, "grad_norm": 0.35810455453104506, "learning_rate": 8.436263528657605e-05, "loss": 0.0173, "step": 1482 }, { "epoch": 2.82, "grad_norm": 0.24683984352526817, "learning_rate": 8.424101077038568e-05, "loss": 0.0179, "step": 1483 }, { "epoch": 2.82, "grad_norm": 0.2494248165756095, "learning_rate": 8.411941015470329e-05, "loss": 0.0163, "step": 1484 }, { "epoch": 2.82, "grad_norm": 0.20029425662267691, "learning_rate": 8.399783362395169e-05, "loss": 0.0107, "step": 1485 }, { "epoch": 2.83, "grad_norm": 0.4803391382237165, "learning_rate": 8.387628136251707e-05, "loss": 0.0275, "step": 1486 }, { "epoch": 2.83, "grad_norm": 0.4850245593271423, "learning_rate": 8.375475355474895e-05, "loss": 0.0447, "step": 1487 }, { "epoch": 2.83, "grad_norm": 0.2805216875314029, "learning_rate": 8.363325038495965e-05, "loss": 0.0167, "step": 1488 }, { "epoch": 2.83, "grad_norm": 0.45643882757789234, "learning_rate": 8.351177203742412e-05, "loss": 0.045, "step": 1489 }, { "epoch": 2.83, "grad_norm": 0.4156796039404557, "learning_rate": 8.339031869637974e-05, "loss": 0.0425, "step": 1490 }, { "epoch": 2.83, "grad_norm": 0.27485734131777034, "learning_rate": 8.32688905460259e-05, "loss": 0.017, "step": 1491 }, { "epoch": 2.84, "grad_norm": 0.34804499919091014, "learning_rate": 8.314748777052377e-05, "loss": 0.0206, "step": 1492 }, { "epoch": 2.84, "grad_norm": 0.39779210164889356, "learning_rate": 8.302611055399616e-05, "loss": 0.0298, "step": 1493 }, { "epoch": 2.84, "grad_norm": 0.40987474599936125, "learning_rate": 8.2904759080527e-05, "loss": 0.0268, "step": 1494 }, { "epoch": 2.84, "grad_norm": 0.3103291372300987, "learning_rate": 8.278343353416114e-05, "loss": 0.0221, "step": 1495 }, { "epoch": 2.84, "grad_norm": 0.26220780689731965, "learning_rate": 8.266213409890427e-05, "loss": 0.0168, "step": 1496 }, { "epoch": 2.85, "grad_norm": 0.3624146132343466, "learning_rate": 8.254086095872232e-05, "loss": 0.0256, "step": 1497 }, { "epoch": 2.85, "grad_norm": 0.35049289687640045, "learning_rate": 8.241961429754144e-05, "loss": 0.0338, "step": 1498 }, { "epoch": 2.85, "grad_norm": 0.3082615240489079, "learning_rate": 8.229839429924753e-05, "loss": 0.0196, "step": 1499 }, { "epoch": 2.85, "grad_norm": 0.38923104552680143, "learning_rate": 8.217720114768618e-05, "loss": 0.0301, "step": 1500 }, { "epoch": 2.85, "eval_blimp_filtered_avg": 0.7107462686567164, "eval_blimp_filtered_std": 0.005021538359160847, "step": 1500 }, { "epoch": 2.85, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.01719761873380847, "step": 1500 }, { "epoch": 2.85, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 1500 }, { "epoch": 2.85, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 1500 }, { "epoch": 2.85, "grad_norm": 0.3976131609655864, "learning_rate": 8.205603502666216e-05, "loss": 0.0325, "step": 1501 }, { "epoch": 2.86, "grad_norm": 0.4737112081716313, "learning_rate": 8.193489611993926e-05, "loss": 0.0276, "step": 1502 }, { "epoch": 2.86, "grad_norm": 0.3936238202189691, "learning_rate": 8.181378461124006e-05, "loss": 0.0251, "step": 1503 }, { "epoch": 2.86, "grad_norm": 0.4116835318577408, "learning_rate": 8.169270068424549e-05, "loss": 0.0257, "step": 1504 }, { "epoch": 2.86, "grad_norm": 0.4262720065326714, "learning_rate": 8.157164452259469e-05, "loss": 0.0286, "step": 1505 }, { "epoch": 2.86, "grad_norm": 0.42914036891724067, "learning_rate": 8.145061630988479e-05, "loss": 0.038, "step": 1506 }, { "epoch": 2.87, "grad_norm": 0.3298878730408135, "learning_rate": 8.132961622967035e-05, "loss": 0.0247, "step": 1507 }, { "epoch": 2.87, "grad_norm": 0.25384112321753693, "learning_rate": 8.120864446546338e-05, "loss": 0.0142, "step": 1508 }, { "epoch": 2.87, "grad_norm": 0.3187058790444291, "learning_rate": 8.108770120073289e-05, "loss": 0.0186, "step": 1509 }, { "epoch": 2.87, "grad_norm": 0.2724308297439597, "learning_rate": 8.096678661890475e-05, "loss": 0.0193, "step": 1510 }, { "epoch": 2.87, "grad_norm": 0.25323058836797524, "learning_rate": 8.084590090336127e-05, "loss": 0.0167, "step": 1511 }, { "epoch": 2.87, "grad_norm": 0.30492471775426294, "learning_rate": 8.072504423744094e-05, "loss": 0.0276, "step": 1512 }, { "epoch": 2.88, "grad_norm": 0.24799458582049735, "learning_rate": 8.060421680443831e-05, "loss": 0.0122, "step": 1513 }, { "epoch": 2.88, "grad_norm": 0.3643541587763288, "learning_rate": 8.048341878760345e-05, "loss": 0.024, "step": 1514 }, { "epoch": 2.88, "grad_norm": 0.31288393267070713, "learning_rate": 8.036265037014188e-05, "loss": 0.0207, "step": 1515 }, { "epoch": 2.88, "grad_norm": 0.34606780018711464, "learning_rate": 8.024191173521435e-05, "loss": 0.0233, "step": 1516 }, { "epoch": 2.88, "grad_norm": 0.44695302408126714, "learning_rate": 8.012120306593623e-05, "loss": 0.0464, "step": 1517 }, { "epoch": 2.89, "grad_norm": 0.2211182830843226, "learning_rate": 8.000052454537756e-05, "loss": 0.0142, "step": 1518 }, { "epoch": 2.89, "grad_norm": 0.33513677378525336, "learning_rate": 7.987987635656267e-05, "loss": 0.0289, "step": 1519 }, { "epoch": 2.89, "grad_norm": 0.42709227456791454, "learning_rate": 7.975925868246985e-05, "loss": 0.0281, "step": 1520 }, { "epoch": 2.89, "grad_norm": 0.26296139389235823, "learning_rate": 7.963867170603109e-05, "loss": 0.013, "step": 1521 }, { "epoch": 2.89, "grad_norm": 0.29032367547446636, "learning_rate": 7.951811561013181e-05, "loss": 0.0184, "step": 1522 }, { "epoch": 2.9, "grad_norm": 0.3942242930204967, "learning_rate": 7.939759057761075e-05, "loss": 0.0262, "step": 1523 }, { "epoch": 2.9, "grad_norm": 0.3839823568755629, "learning_rate": 7.927709679125935e-05, "loss": 0.0275, "step": 1524 }, { "epoch": 2.9, "grad_norm": 0.33459762677194826, "learning_rate": 7.915663443382173e-05, "loss": 0.0185, "step": 1525 }, { "epoch": 2.9, "grad_norm": 0.3743502023038977, "learning_rate": 7.903620368799439e-05, "loss": 0.0205, "step": 1526 }, { "epoch": 2.9, "grad_norm": 0.26737600322588945, "learning_rate": 7.891580473642582e-05, "loss": 0.0201, "step": 1527 }, { "epoch": 2.9, "grad_norm": 0.3125204502588891, "learning_rate": 7.87954377617163e-05, "loss": 0.0251, "step": 1528 }, { "epoch": 2.91, "grad_norm": 0.4143160519961925, "learning_rate": 7.86751029464177e-05, "loss": 0.0317, "step": 1529 }, { "epoch": 2.91, "grad_norm": 0.48675159484047675, "learning_rate": 7.855480047303296e-05, "loss": 0.0362, "step": 1530 }, { "epoch": 2.91, "grad_norm": 0.218971788877109, "learning_rate": 7.843453052401606e-05, "loss": 0.0122, "step": 1531 }, { "epoch": 2.91, "grad_norm": 0.3476461447763632, "learning_rate": 7.831429328177172e-05, "loss": 0.0241, "step": 1532 }, { "epoch": 2.91, "grad_norm": 0.3238664575632263, "learning_rate": 7.819408892865496e-05, "loss": 0.0296, "step": 1533 }, { "epoch": 2.92, "grad_norm": 0.32565319746921123, "learning_rate": 7.80739176469709e-05, "loss": 0.022, "step": 1534 }, { "epoch": 2.92, "grad_norm": 0.3766087702595169, "learning_rate": 7.795377961897457e-05, "loss": 0.0299, "step": 1535 }, { "epoch": 2.92, "grad_norm": 0.3397108165338899, "learning_rate": 7.783367502687055e-05, "loss": 0.0292, "step": 1536 }, { "epoch": 2.92, "grad_norm": 0.43235893152453564, "learning_rate": 7.77136040528127e-05, "loss": 0.0387, "step": 1537 }, { "epoch": 2.92, "grad_norm": 0.26981082801348333, "learning_rate": 7.759356687890387e-05, "loss": 0.0183, "step": 1538 }, { "epoch": 2.93, "grad_norm": 0.33662155088013107, "learning_rate": 7.747356368719572e-05, "loss": 0.0238, "step": 1539 }, { "epoch": 2.93, "grad_norm": 0.22166236684150192, "learning_rate": 7.735359465968833e-05, "loss": 0.0196, "step": 1540 }, { "epoch": 2.93, "grad_norm": 0.38533774970830864, "learning_rate": 7.723365997832989e-05, "loss": 0.0298, "step": 1541 }, { "epoch": 2.93, "grad_norm": 0.42198005843826125, "learning_rate": 7.711375982501666e-05, "loss": 0.0297, "step": 1542 }, { "epoch": 2.93, "grad_norm": 0.3516645438984776, "learning_rate": 7.69938943815924e-05, "loss": 0.023, "step": 1543 }, { "epoch": 2.94, "grad_norm": 0.4323656767823725, "learning_rate": 7.687406382984824e-05, "loss": 0.0368, "step": 1544 }, { "epoch": 2.94, "grad_norm": 0.4691798122848509, "learning_rate": 7.675426835152251e-05, "loss": 0.0277, "step": 1545 }, { "epoch": 2.94, "grad_norm": 0.38911907738391777, "learning_rate": 7.663450812830022e-05, "loss": 0.0325, "step": 1546 }, { "epoch": 2.94, "grad_norm": 0.3940729860939947, "learning_rate": 7.651478334181294e-05, "loss": 0.032, "step": 1547 }, { "epoch": 2.94, "grad_norm": 0.3376505934214612, "learning_rate": 7.639509417363851e-05, "loss": 0.0283, "step": 1548 }, { "epoch": 2.94, "grad_norm": 0.33223724408779914, "learning_rate": 7.627544080530077e-05, "loss": 0.0311, "step": 1549 }, { "epoch": 2.95, "grad_norm": 0.4283696603252754, "learning_rate": 7.615582341826924e-05, "loss": 0.0321, "step": 1550 }, { "epoch": 2.95, "grad_norm": 0.30005551347485493, "learning_rate": 7.603624219395886e-05, "loss": 0.0253, "step": 1551 }, { "epoch": 2.95, "grad_norm": 0.3137479308944608, "learning_rate": 7.591669731372977e-05, "loss": 0.0246, "step": 1552 }, { "epoch": 2.95, "grad_norm": 0.26246349849687683, "learning_rate": 7.579718895888693e-05, "loss": 0.0146, "step": 1553 }, { "epoch": 2.95, "grad_norm": 0.42453426487327994, "learning_rate": 7.56777173106799e-05, "loss": 0.0314, "step": 1554 }, { "epoch": 2.96, "grad_norm": 0.38192094589351694, "learning_rate": 7.555828255030269e-05, "loss": 0.0242, "step": 1555 }, { "epoch": 2.96, "grad_norm": 0.3273709013900648, "learning_rate": 7.543888485889325e-05, "loss": 0.0243, "step": 1556 }, { "epoch": 2.96, "grad_norm": 0.37422428612608344, "learning_rate": 7.53195244175333e-05, "loss": 0.0241, "step": 1557 }, { "epoch": 2.96, "grad_norm": 0.267453906565526, "learning_rate": 7.520020140724812e-05, "loss": 0.0229, "step": 1558 }, { "epoch": 2.96, "grad_norm": 0.26807112672221634, "learning_rate": 7.508091600900622e-05, "loss": 0.0158, "step": 1559 }, { "epoch": 2.97, "grad_norm": 0.3639153522896899, "learning_rate": 7.496166840371905e-05, "loss": 0.0278, "step": 1560 }, { "epoch": 2.97, "grad_norm": 0.2748639182545751, "learning_rate": 7.48424587722407e-05, "loss": 0.0223, "step": 1561 }, { "epoch": 2.97, "grad_norm": 0.47999195989424775, "learning_rate": 7.472328729536778e-05, "loss": 0.0205, "step": 1562 }, { "epoch": 2.97, "grad_norm": 0.3652674907570494, "learning_rate": 7.460415415383892e-05, "loss": 0.0271, "step": 1563 }, { "epoch": 2.97, "grad_norm": 0.3071261517641582, "learning_rate": 7.448505952833467e-05, "loss": 0.0215, "step": 1564 }, { "epoch": 2.98, "grad_norm": 0.40810413975001997, "learning_rate": 7.436600359947716e-05, "loss": 0.0226, "step": 1565 }, { "epoch": 2.98, "grad_norm": 0.28792445247281073, "learning_rate": 7.424698654782982e-05, "loss": 0.0195, "step": 1566 }, { "epoch": 2.98, "grad_norm": 0.45425452361704743, "learning_rate": 7.412800855389706e-05, "loss": 0.0278, "step": 1567 }, { "epoch": 2.98, "grad_norm": 0.39486931553194377, "learning_rate": 7.400906979812419e-05, "loss": 0.0325, "step": 1568 }, { "epoch": 2.98, "grad_norm": 0.2884280906728479, "learning_rate": 7.389017046089693e-05, "loss": 0.0199, "step": 1569 }, { "epoch": 2.98, "grad_norm": 0.29085739553049594, "learning_rate": 7.37713107225412e-05, "loss": 0.0234, "step": 1570 }, { "epoch": 2.99, "grad_norm": 0.5575732538777772, "learning_rate": 7.365249076332286e-05, "loss": 0.0334, "step": 1571 }, { "epoch": 2.99, "grad_norm": 0.27928275859820023, "learning_rate": 7.35337107634475e-05, "loss": 0.0146, "step": 1572 }, { "epoch": 2.99, "grad_norm": 0.4502818239833082, "learning_rate": 7.341497090306007e-05, "loss": 0.0339, "step": 1573 }, { "epoch": 2.99, "grad_norm": 0.3546890485761428, "learning_rate": 7.329627136224459e-05, "loss": 0.0207, "step": 1574 }, { "epoch": 2.99, "grad_norm": 0.34519659364178895, "learning_rate": 7.317761232102407e-05, "loss": 0.0187, "step": 1575 }, { "epoch": 3.0, "grad_norm": 0.42623405721043006, "learning_rate": 7.305899395935996e-05, "loss": 0.0398, "step": 1576 }, { "epoch": 3.0, "grad_norm": 0.46529165216919016, "learning_rate": 7.294041645715202e-05, "loss": 0.0357, "step": 1577 }, { "epoch": 3.0, "grad_norm": 0.3668261213828814, "learning_rate": 7.282187999423819e-05, "loss": 0.0192, "step": 1578 }, { "epoch": 3.0, "grad_norm": 0.09908776159443472, "learning_rate": 7.270338475039403e-05, "loss": 0.0046, "step": 1579 }, { "epoch": 3.0, "grad_norm": 0.1775702525021454, "learning_rate": 7.258493090533258e-05, "loss": 0.0089, "step": 1580 }, { "epoch": 3.01, "grad_norm": 0.19702818397947422, "learning_rate": 7.246651863870419e-05, "loss": 0.0097, "step": 1581 }, { "epoch": 3.01, "grad_norm": 0.30158664093109644, "learning_rate": 7.234814813009607e-05, "loss": 0.0108, "step": 1582 }, { "epoch": 3.01, "grad_norm": 0.22792911049458375, "learning_rate": 7.222981955903212e-05, "loss": 0.025, "step": 1583 }, { "epoch": 3.01, "grad_norm": 0.18021323568350237, "learning_rate": 7.21115331049726e-05, "loss": 0.01, "step": 1584 }, { "epoch": 3.01, "grad_norm": 0.06137729440500412, "learning_rate": 7.199328894731405e-05, "loss": 0.0032, "step": 1585 }, { "epoch": 3.02, "grad_norm": 0.18438219089810437, "learning_rate": 7.187508726538868e-05, "loss": 0.0079, "step": 1586 }, { "epoch": 3.02, "grad_norm": 0.22725798130413852, "learning_rate": 7.17569282384643e-05, "loss": 0.0128, "step": 1587 }, { "epoch": 3.02, "grad_norm": 0.2374060011706904, "learning_rate": 7.163881204574416e-05, "loss": 0.0161, "step": 1588 }, { "epoch": 3.02, "grad_norm": 0.1572347436320327, "learning_rate": 7.152073886636644e-05, "loss": 0.0088, "step": 1589 }, { "epoch": 3.02, "grad_norm": 0.1524770779360217, "learning_rate": 7.140270887940406e-05, "loss": 0.007, "step": 1590 }, { "epoch": 3.02, "grad_norm": 0.1644354118818157, "learning_rate": 7.128472226386455e-05, "loss": 0.0123, "step": 1591 }, { "epoch": 3.03, "grad_norm": 0.06219594707923605, "learning_rate": 7.116677919868954e-05, "loss": 0.0027, "step": 1592 }, { "epoch": 3.03, "grad_norm": 0.169394947396397, "learning_rate": 7.10488798627547e-05, "loss": 0.0049, "step": 1593 }, { "epoch": 3.03, "grad_norm": 0.14589542161363886, "learning_rate": 7.093102443486932e-05, "loss": 0.0042, "step": 1594 }, { "epoch": 3.03, "grad_norm": 0.19086256750623434, "learning_rate": 7.081321309377615e-05, "loss": 0.0078, "step": 1595 }, { "epoch": 3.03, "grad_norm": 0.21703039195493715, "learning_rate": 7.069544601815099e-05, "loss": 0.0128, "step": 1596 }, { "epoch": 3.04, "grad_norm": 0.3783537251722694, "learning_rate": 7.05777233866026e-05, "loss": 0.015, "step": 1597 }, { "epoch": 3.04, "grad_norm": 0.2769949943597018, "learning_rate": 7.04600453776723e-05, "loss": 0.0106, "step": 1598 }, { "epoch": 3.04, "grad_norm": 0.1870354283769606, "learning_rate": 7.034241216983373e-05, "loss": 0.0096, "step": 1599 }, { "epoch": 3.04, "grad_norm": 0.13411004036315335, "learning_rate": 7.022482394149252e-05, "loss": 0.0043, "step": 1600 }, { "epoch": 3.04, "eval_blimp_filtered_avg": 0.7122388059701492, "eval_blimp_filtered_std": 0.005002997452153943, "step": 1600 }, { "epoch": 3.04, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.017197618733808467, "step": 1600 }, { "epoch": 3.04, "eval_vqa_filtered_avg": 0.58, "eval_vqa_filtered_std": 0.049604496374885836, "step": 1600 }, { "epoch": 3.04, "eval_winoground_filtered_avg": 0.62, "eval_winoground_filtered_std": 0.04878317312145633, "step": 1600 }, { "epoch": 3.04, "grad_norm": 0.2723950675138623, "learning_rate": 7.010728087098627e-05, "loss": 0.0077, "step": 1601 }, { "epoch": 3.05, "grad_norm": 0.30423511787500884, "learning_rate": 6.998978313658391e-05, "loss": 0.018, "step": 1602 }, { "epoch": 3.05, "grad_norm": 0.19803293881110526, "learning_rate": 6.987233091648563e-05, "loss": 0.0075, "step": 1603 }, { "epoch": 3.05, "grad_norm": 0.2336626903860384, "learning_rate": 6.97549243888227e-05, "loss": 0.0097, "step": 1604 }, { "epoch": 3.05, "grad_norm": 0.1520817956324536, "learning_rate": 6.9637563731657e-05, "loss": 0.0085, "step": 1605 }, { "epoch": 3.05, "grad_norm": 0.3009701114855925, "learning_rate": 6.952024912298087e-05, "loss": 0.0151, "step": 1606 }, { "epoch": 3.06, "grad_norm": 0.34527872943547033, "learning_rate": 6.940298074071674e-05, "loss": 0.009, "step": 1607 }, { "epoch": 3.06, "grad_norm": 0.27801090639660214, "learning_rate": 6.928575876271714e-05, "loss": 0.0133, "step": 1608 }, { "epoch": 3.06, "grad_norm": 0.31825549642911855, "learning_rate": 6.916858336676399e-05, "loss": 0.0174, "step": 1609 }, { "epoch": 3.06, "grad_norm": 0.1566029672057963, "learning_rate": 6.905145473056866e-05, "loss": 0.0079, "step": 1610 }, { "epoch": 3.06, "grad_norm": 0.1932883386831182, "learning_rate": 6.893437303177162e-05, "loss": 0.0083, "step": 1611 }, { "epoch": 3.06, "grad_norm": 0.24714713996001283, "learning_rate": 6.881733844794213e-05, "loss": 0.011, "step": 1612 }, { "epoch": 3.07, "grad_norm": 0.19055730116058128, "learning_rate": 6.870035115657795e-05, "loss": 0.0102, "step": 1613 }, { "epoch": 3.07, "grad_norm": 0.17210997860056712, "learning_rate": 6.858341133510524e-05, "loss": 0.012, "step": 1614 }, { "epoch": 3.07, "grad_norm": 0.2019042581973744, "learning_rate": 6.846651916087806e-05, "loss": 0.0108, "step": 1615 }, { "epoch": 3.07, "grad_norm": 0.20246119065431806, "learning_rate": 6.834967481117817e-05, "loss": 0.0079, "step": 1616 }, { "epoch": 3.07, "grad_norm": 0.22167830488619875, "learning_rate": 6.823287846321495e-05, "loss": 0.0126, "step": 1617 }, { "epoch": 3.08, "grad_norm": 0.10807549801450059, "learning_rate": 6.811613029412485e-05, "loss": 0.0034, "step": 1618 }, { "epoch": 3.08, "grad_norm": 0.1850363286886787, "learning_rate": 6.799943048097129e-05, "loss": 0.0081, "step": 1619 }, { "epoch": 3.08, "grad_norm": 0.17800744366883223, "learning_rate": 6.788277920074433e-05, "loss": 0.0066, "step": 1620 }, { "epoch": 3.08, "grad_norm": 0.19428529755207224, "learning_rate": 6.77661766303605e-05, "loss": 0.0085, "step": 1621 }, { "epoch": 3.08, "grad_norm": 0.23179481736742502, "learning_rate": 6.764962294666237e-05, "loss": 0.0073, "step": 1622 }, { "epoch": 3.09, "grad_norm": 0.22185786261407323, "learning_rate": 6.753311832641837e-05, "loss": 0.0086, "step": 1623 }, { "epoch": 3.09, "grad_norm": 0.18419688888895805, "learning_rate": 6.741666294632263e-05, "loss": 0.0133, "step": 1624 }, { "epoch": 3.09, "grad_norm": 0.24114228949755903, "learning_rate": 6.730025698299446e-05, "loss": 0.0082, "step": 1625 }, { "epoch": 3.09, "grad_norm": 0.26408790972597246, "learning_rate": 6.718390061297829e-05, "loss": 0.0131, "step": 1626 }, { "epoch": 3.09, "grad_norm": 0.11032016141648922, "learning_rate": 6.706759401274334e-05, "loss": 0.0032, "step": 1627 }, { "epoch": 3.1, "grad_norm": 0.2388391031913144, "learning_rate": 6.695133735868335e-05, "loss": 0.0133, "step": 1628 }, { "epoch": 3.1, "grad_norm": 0.200299456935639, "learning_rate": 6.683513082711622e-05, "loss": 0.0102, "step": 1629 }, { "epoch": 3.1, "grad_norm": 0.15263926849800605, "learning_rate": 6.671897459428403e-05, "loss": 0.0062, "step": 1630 }, { "epoch": 3.1, "grad_norm": 0.13715046185046964, "learning_rate": 6.660286883635236e-05, "loss": 0.0091, "step": 1631 }, { "epoch": 3.1, "grad_norm": 0.13736622948904872, "learning_rate": 6.648681372941038e-05, "loss": 0.0069, "step": 1632 }, { "epoch": 3.1, "grad_norm": 0.656476172512363, "learning_rate": 6.637080944947036e-05, "loss": 0.0289, "step": 1633 }, { "epoch": 3.11, "grad_norm": 0.12805888461795084, "learning_rate": 6.625485617246756e-05, "loss": 0.0042, "step": 1634 }, { "epoch": 3.11, "grad_norm": 0.37979728513879185, "learning_rate": 6.613895407425982e-05, "loss": 0.0136, "step": 1635 }, { "epoch": 3.11, "grad_norm": 0.1600332424073113, "learning_rate": 6.602310333062735e-05, "loss": 0.0053, "step": 1636 }, { "epoch": 3.11, "grad_norm": 0.19892068699936605, "learning_rate": 6.590730411727259e-05, "loss": 0.006, "step": 1637 }, { "epoch": 3.11, "grad_norm": 0.28991295132524836, "learning_rate": 6.579155660981973e-05, "loss": 0.0085, "step": 1638 }, { "epoch": 3.12, "grad_norm": 0.222039356313551, "learning_rate": 6.567586098381451e-05, "loss": 0.0052, "step": 1639 }, { "epoch": 3.12, "grad_norm": 0.18400959133111777, "learning_rate": 6.55602174147241e-05, "loss": 0.0071, "step": 1640 }, { "epoch": 3.12, "grad_norm": 0.15979964593310922, "learning_rate": 6.544462607793662e-05, "loss": 0.0061, "step": 1641 }, { "epoch": 3.12, "grad_norm": 0.16060493258736797, "learning_rate": 6.532908714876098e-05, "loss": 0.0051, "step": 1642 }, { "epoch": 3.12, "grad_norm": 0.2692242819237397, "learning_rate": 6.521360080242672e-05, "loss": 0.0079, "step": 1643 }, { "epoch": 3.13, "grad_norm": 0.1418969679977067, "learning_rate": 6.509816721408349e-05, "loss": 0.0073, "step": 1644 }, { "epoch": 3.13, "grad_norm": 0.20137360856400904, "learning_rate": 6.498278655880098e-05, "loss": 0.0034, "step": 1645 }, { "epoch": 3.13, "grad_norm": 0.2525680683141097, "learning_rate": 6.486745901156857e-05, "loss": 0.0139, "step": 1646 }, { "epoch": 3.13, "grad_norm": 0.26543965917369516, "learning_rate": 6.475218474729521e-05, "loss": 0.0114, "step": 1647 }, { "epoch": 3.13, "grad_norm": 0.10460041083811998, "learning_rate": 6.463696394080892e-05, "loss": 0.0035, "step": 1648 }, { "epoch": 3.13, "grad_norm": 0.1972656552744006, "learning_rate": 6.452179676685666e-05, "loss": 0.0116, "step": 1649 }, { "epoch": 3.14, "grad_norm": 0.15945952528645216, "learning_rate": 6.440668340010412e-05, "loss": 0.0068, "step": 1650 }, { "epoch": 3.14, "grad_norm": 0.22160304921653876, "learning_rate": 6.42916240151353e-05, "loss": 0.0143, "step": 1651 }, { "epoch": 3.14, "grad_norm": 0.27899681735543214, "learning_rate": 6.417661878645234e-05, "loss": 0.0112, "step": 1652 }, { "epoch": 3.14, "grad_norm": 0.05745610168674489, "learning_rate": 6.406166788847531e-05, "loss": 0.0022, "step": 1653 }, { "epoch": 3.14, "grad_norm": 0.18388648049219605, "learning_rate": 6.394677149554188e-05, "loss": 0.0132, "step": 1654 }, { "epoch": 3.15, "grad_norm": 0.11126809298021803, "learning_rate": 6.383192978190695e-05, "loss": 0.0039, "step": 1655 }, { "epoch": 3.15, "grad_norm": 0.1751342536792735, "learning_rate": 6.371714292174257e-05, "loss": 0.0084, "step": 1656 }, { "epoch": 3.15, "grad_norm": 0.07198144727069393, "learning_rate": 6.360241108913763e-05, "loss": 0.0023, "step": 1657 }, { "epoch": 3.15, "grad_norm": 0.140241766468108, "learning_rate": 6.348773445809747e-05, "loss": 0.0034, "step": 1658 }, { "epoch": 3.15, "grad_norm": 0.12202938273975636, "learning_rate": 6.337311320254375e-05, "loss": 0.0048, "step": 1659 }, { "epoch": 3.16, "grad_norm": 0.14757237346153076, "learning_rate": 6.325854749631423e-05, "loss": 0.0068, "step": 1660 }, { "epoch": 3.16, "grad_norm": 0.19543365756217967, "learning_rate": 6.314403751316231e-05, "loss": 0.0066, "step": 1661 }, { "epoch": 3.16, "grad_norm": 0.192436232347563, "learning_rate": 6.30295834267569e-05, "loss": 0.0056, "step": 1662 }, { "epoch": 3.16, "grad_norm": 0.13874383298628637, "learning_rate": 6.291518541068217e-05, "loss": 0.0061, "step": 1663 }, { "epoch": 3.16, "grad_norm": 0.2076914810289057, "learning_rate": 6.280084363843726e-05, "loss": 0.0079, "step": 1664 }, { "epoch": 3.17, "grad_norm": 0.22620035478130315, "learning_rate": 6.268655828343591e-05, "loss": 0.0097, "step": 1665 }, { "epoch": 3.17, "grad_norm": 0.08866362277641801, "learning_rate": 6.257232951900649e-05, "loss": 0.0032, "step": 1666 }, { "epoch": 3.17, "grad_norm": 0.2118964076078099, "learning_rate": 6.245815751839133e-05, "loss": 0.0092, "step": 1667 }, { "epoch": 3.17, "grad_norm": 0.17893422113388063, "learning_rate": 6.23440424547468e-05, "loss": 0.0064, "step": 1668 }, { "epoch": 3.17, "grad_norm": 0.20858379727355614, "learning_rate": 6.222998450114283e-05, "loss": 0.0061, "step": 1669 }, { "epoch": 3.17, "grad_norm": 0.23299482493061568, "learning_rate": 6.211598383056287e-05, "loss": 0.0154, "step": 1670 }, { "epoch": 3.18, "grad_norm": 0.3087353990355211, "learning_rate": 6.200204061590336e-05, "loss": 0.0064, "step": 1671 }, { "epoch": 3.18, "grad_norm": 0.19529801292514398, "learning_rate": 6.188815502997367e-05, "loss": 0.0107, "step": 1672 }, { "epoch": 3.18, "grad_norm": 0.10439004027654841, "learning_rate": 6.177432724549574e-05, "loss": 0.004, "step": 1673 }, { "epoch": 3.18, "grad_norm": 0.22520522379527905, "learning_rate": 6.166055743510388e-05, "loss": 0.0035, "step": 1674 }, { "epoch": 3.18, "grad_norm": 0.2069014465880773, "learning_rate": 6.15468457713444e-05, "loss": 0.0087, "step": 1675 }, { "epoch": 3.19, "grad_norm": 0.14752713934386646, "learning_rate": 6.143319242667554e-05, "loss": 0.005, "step": 1676 }, { "epoch": 3.19, "grad_norm": 0.11614933474778306, "learning_rate": 6.131959757346699e-05, "loss": 0.0029, "step": 1677 }, { "epoch": 3.19, "grad_norm": 0.17368892997860894, "learning_rate": 6.120606138399977e-05, "loss": 0.0106, "step": 1678 }, { "epoch": 3.19, "grad_norm": 0.18244832621751822, "learning_rate": 6.109258403046593e-05, "loss": 0.0056, "step": 1679 }, { "epoch": 3.19, "grad_norm": 0.17473306842458772, "learning_rate": 6.097916568496831e-05, "loss": 0.01, "step": 1680 }, { "epoch": 3.2, "grad_norm": 0.09562015610195132, "learning_rate": 6.086580651952021e-05, "loss": 0.0036, "step": 1681 }, { "epoch": 3.2, "grad_norm": 0.09485094520662278, "learning_rate": 6.0752506706045134e-05, "loss": 0.0036, "step": 1682 }, { "epoch": 3.2, "grad_norm": 0.09233529809924032, "learning_rate": 6.063926641637674e-05, "loss": 0.0035, "step": 1683 }, { "epoch": 3.2, "grad_norm": 0.13781718545140342, "learning_rate": 6.052608582225827e-05, "loss": 0.0043, "step": 1684 }, { "epoch": 3.2, "grad_norm": 0.20933633169807805, "learning_rate": 6.0412965095342425e-05, "loss": 0.0042, "step": 1685 }, { "epoch": 3.21, "grad_norm": 0.22018039999892794, "learning_rate": 6.02999044071912e-05, "loss": 0.0094, "step": 1686 }, { "epoch": 3.21, "grad_norm": 0.40488924895178346, "learning_rate": 6.018690392927546e-05, "loss": 0.0086, "step": 1687 }, { "epoch": 3.21, "grad_norm": 0.06298135047469916, "learning_rate": 6.0073963832974735e-05, "loss": 0.0022, "step": 1688 }, { "epoch": 3.21, "grad_norm": 0.25708965050178895, "learning_rate": 5.996108428957713e-05, "loss": 0.0076, "step": 1689 }, { "epoch": 3.21, "grad_norm": 0.20523542116623134, "learning_rate": 5.984826547027871e-05, "loss": 0.0114, "step": 1690 }, { "epoch": 3.21, "grad_norm": 0.23950216485325834, "learning_rate": 5.973550754618353e-05, "loss": 0.006, "step": 1691 }, { "epoch": 3.22, "grad_norm": 0.22144811042832438, "learning_rate": 5.96228106883034e-05, "loss": 0.018, "step": 1692 }, { "epoch": 3.22, "grad_norm": 0.23024287511806021, "learning_rate": 5.951017506755732e-05, "loss": 0.0118, "step": 1693 }, { "epoch": 3.22, "grad_norm": 0.23649647655857645, "learning_rate": 5.939760085477155e-05, "loss": 0.0084, "step": 1694 }, { "epoch": 3.22, "grad_norm": 0.18105008811187184, "learning_rate": 5.928508822067914e-05, "loss": 0.0077, "step": 1695 }, { "epoch": 3.22, "grad_norm": 0.18023497999324814, "learning_rate": 5.9172637335919834e-05, "loss": 0.006, "step": 1696 }, { "epoch": 3.23, "grad_norm": 0.1721597761552031, "learning_rate": 5.906024837103965e-05, "loss": 0.0082, "step": 1697 }, { "epoch": 3.23, "grad_norm": 0.21511508109952715, "learning_rate": 5.894792149649069e-05, "loss": 0.0054, "step": 1698 }, { "epoch": 3.23, "grad_norm": 0.25839577886508164, "learning_rate": 5.883565688263099e-05, "loss": 0.0066, "step": 1699 }, { "epoch": 3.23, "grad_norm": 0.10031897407111227, "learning_rate": 5.872345469972405e-05, "loss": 0.0037, "step": 1700 }, { "epoch": 3.23, "eval_blimp_filtered_avg": 0.7135820895522388, "eval_blimp_filtered_std": 0.004985222677724364, "step": 1700 }, { "epoch": 3.23, "eval_blimp_supplement_avg": 0.8081896551724138, "eval_blimp_supplement_std": 0.01717302898329157, "step": 1700 }, { "epoch": 3.23, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.04988876515698589, "step": 1700 }, { "epoch": 3.23, "eval_winoground_filtered_avg": 0.62, "eval_winoground_filtered_std": 0.04878317312145633, "step": 1700 }, { "epoch": 3.23, "grad_norm": 0.20520166059846773, "learning_rate": 5.861131511793871e-05, "loss": 0.0079, "step": 1701 }, { "epoch": 3.24, "grad_norm": 0.1792361725075278, "learning_rate": 5.849923830734895e-05, "loss": 0.0065, "step": 1702 }, { "epoch": 3.24, "grad_norm": 0.2152349307180451, "learning_rate": 5.8387224437933416e-05, "loss": 0.0194, "step": 1703 }, { "epoch": 3.24, "grad_norm": 0.13449490572022552, "learning_rate": 5.827527367957536e-05, "loss": 0.0043, "step": 1704 }, { "epoch": 3.24, "grad_norm": 0.12023281320078483, "learning_rate": 5.816338620206239e-05, "loss": 0.0027, "step": 1705 }, { "epoch": 3.24, "grad_norm": 0.15605001754059103, "learning_rate": 5.805156217508601e-05, "loss": 0.0076, "step": 1706 }, { "epoch": 3.25, "grad_norm": 0.08999290829103396, "learning_rate": 5.793980176824158e-05, "loss": 0.0032, "step": 1707 }, { "epoch": 3.25, "grad_norm": 0.1398917778961148, "learning_rate": 5.78281051510279e-05, "loss": 0.0062, "step": 1708 }, { "epoch": 3.25, "grad_norm": 0.20561772304166465, "learning_rate": 5.771647249284715e-05, "loss": 0.0055, "step": 1709 }, { "epoch": 3.25, "grad_norm": 0.16199466612324698, "learning_rate": 5.760490396300443e-05, "loss": 0.0049, "step": 1710 }, { "epoch": 3.25, "grad_norm": 0.16960405809181006, "learning_rate": 5.7493399730707464e-05, "loss": 0.0107, "step": 1711 }, { "epoch": 3.25, "grad_norm": 0.261397323544806, "learning_rate": 5.73819599650667e-05, "loss": 0.0048, "step": 1712 }, { "epoch": 3.26, "grad_norm": 0.21493749409576024, "learning_rate": 5.727058483509463e-05, "loss": 0.0073, "step": 1713 }, { "epoch": 3.26, "grad_norm": 0.2167189964328902, "learning_rate": 5.715927450970577e-05, "loss": 0.0058, "step": 1714 }, { "epoch": 3.26, "grad_norm": 0.18079771728637525, "learning_rate": 5.704802915771642e-05, "loss": 0.0061, "step": 1715 }, { "epoch": 3.26, "grad_norm": 0.061356500213057746, "learning_rate": 5.6936848947844245e-05, "loss": 0.0022, "step": 1716 }, { "epoch": 3.26, "grad_norm": 0.23895208707416427, "learning_rate": 5.6825734048708155e-05, "loss": 0.0089, "step": 1717 }, { "epoch": 3.27, "grad_norm": 0.1460641177475997, "learning_rate": 5.671468462882796e-05, "loss": 0.0067, "step": 1718 }, { "epoch": 3.27, "grad_norm": 0.18447087506628765, "learning_rate": 5.6603700856624276e-05, "loss": 0.006, "step": 1719 }, { "epoch": 3.27, "grad_norm": 0.49641187039043877, "learning_rate": 5.649278290041806e-05, "loss": 0.0079, "step": 1720 }, { "epoch": 3.27, "grad_norm": 0.10697406064531279, "learning_rate": 5.6381930928430474e-05, "loss": 0.0047, "step": 1721 }, { "epoch": 3.27, "grad_norm": 0.06088870389827189, "learning_rate": 5.627114510878257e-05, "loss": 0.0031, "step": 1722 }, { "epoch": 3.28, "grad_norm": 0.15315176835365849, "learning_rate": 5.616042560949517e-05, "loss": 0.0064, "step": 1723 }, { "epoch": 3.28, "grad_norm": 0.21844419967785006, "learning_rate": 5.6049772598488385e-05, "loss": 0.0043, "step": 1724 }, { "epoch": 3.28, "grad_norm": 0.11723759222038571, "learning_rate": 5.5939186243581607e-05, "loss": 0.0045, "step": 1725 }, { "epoch": 3.28, "grad_norm": 0.204000504170908, "learning_rate": 5.58286667124931e-05, "loss": 0.0104, "step": 1726 }, { "epoch": 3.28, "grad_norm": 0.20235060946864336, "learning_rate": 5.5718214172839664e-05, "loss": 0.007, "step": 1727 }, { "epoch": 3.29, "grad_norm": 0.21475953749263604, "learning_rate": 5.560782879213673e-05, "loss": 0.0063, "step": 1728 }, { "epoch": 3.29, "grad_norm": 0.19039096521066015, "learning_rate": 5.549751073779768e-05, "loss": 0.0141, "step": 1729 }, { "epoch": 3.29, "grad_norm": 0.1774198672641829, "learning_rate": 5.538726017713385e-05, "loss": 0.009, "step": 1730 }, { "epoch": 3.29, "grad_norm": 0.22508541467196794, "learning_rate": 5.527707727735416e-05, "loss": 0.0055, "step": 1731 }, { "epoch": 3.29, "grad_norm": 0.1881357883619823, "learning_rate": 5.5166962205565076e-05, "loss": 0.0097, "step": 1732 }, { "epoch": 3.29, "grad_norm": 0.19419500941527232, "learning_rate": 5.505691512877007e-05, "loss": 0.0075, "step": 1733 }, { "epoch": 3.3, "grad_norm": 0.13321306878939898, "learning_rate": 5.4946936213869394e-05, "loss": 0.0032, "step": 1734 }, { "epoch": 3.3, "grad_norm": 0.17624977675076212, "learning_rate": 5.4837025627660154e-05, "loss": 0.0045, "step": 1735 }, { "epoch": 3.3, "grad_norm": 0.12713818907738952, "learning_rate": 5.4727183536835645e-05, "loss": 0.0047, "step": 1736 }, { "epoch": 3.3, "grad_norm": 0.2633844589463349, "learning_rate": 5.4617410107985334e-05, "loss": 0.0065, "step": 1737 }, { "epoch": 3.3, "grad_norm": 0.1608692289102195, "learning_rate": 5.450770550759463e-05, "loss": 0.0036, "step": 1738 }, { "epoch": 3.31, "grad_norm": 0.17902257258867613, "learning_rate": 5.4398069902044456e-05, "loss": 0.0046, "step": 1739 }, { "epoch": 3.31, "grad_norm": 0.13036424565255653, "learning_rate": 5.428850345761107e-05, "loss": 0.0037, "step": 1740 }, { "epoch": 3.31, "grad_norm": 0.2645244397151156, "learning_rate": 5.4179006340466e-05, "loss": 0.0097, "step": 1741 }, { "epoch": 3.31, "grad_norm": 0.1815002098829256, "learning_rate": 5.4069578716675486e-05, "loss": 0.0034, "step": 1742 }, { "epoch": 3.31, "grad_norm": 0.142846534289561, "learning_rate": 5.396022075220037e-05, "loss": 0.0053, "step": 1743 }, { "epoch": 3.32, "grad_norm": 0.13196624436086613, "learning_rate": 5.385093261289594e-05, "loss": 0.0041, "step": 1744 }, { "epoch": 3.32, "grad_norm": 0.14068017474204914, "learning_rate": 5.3741714464511526e-05, "loss": 0.0045, "step": 1745 }, { "epoch": 3.32, "grad_norm": 0.1454692936838625, "learning_rate": 5.363256647269028e-05, "loss": 0.0039, "step": 1746 }, { "epoch": 3.32, "grad_norm": 0.09554762366794076, "learning_rate": 5.3523488802969e-05, "loss": 0.0023, "step": 1747 }, { "epoch": 3.32, "grad_norm": 0.22218276327401063, "learning_rate": 5.3414481620777867e-05, "loss": 0.0101, "step": 1748 }, { "epoch": 3.33, "grad_norm": 0.27393300716804586, "learning_rate": 5.33055450914401e-05, "loss": 0.0059, "step": 1749 }, { "epoch": 3.33, "grad_norm": 0.14513998474967443, "learning_rate": 5.31966793801717e-05, "loss": 0.0049, "step": 1750 }, { "epoch": 3.33, "grad_norm": 0.1739959711892374, "learning_rate": 5.308788465208146e-05, "loss": 0.0049, "step": 1751 }, { "epoch": 3.33, "grad_norm": 0.14191147901774587, "learning_rate": 5.297916107217033e-05, "loss": 0.004, "step": 1752 }, { "epoch": 3.33, "grad_norm": 0.18907117821047975, "learning_rate": 5.287050880533138e-05, "loss": 0.0039, "step": 1753 }, { "epoch": 3.33, "grad_norm": 0.2173803899449214, "learning_rate": 5.276192801634967e-05, "loss": 0.0053, "step": 1754 }, { "epoch": 3.34, "grad_norm": 0.12161032985928287, "learning_rate": 5.2653418869901714e-05, "loss": 0.0022, "step": 1755 }, { "epoch": 3.34, "grad_norm": 0.25525229096109575, "learning_rate": 5.2544981530555425e-05, "loss": 0.0074, "step": 1756 }, { "epoch": 3.34, "grad_norm": 0.12495700591253037, "learning_rate": 5.243661616276969e-05, "loss": 0.0042, "step": 1757 }, { "epoch": 3.34, "grad_norm": 0.2298167600447919, "learning_rate": 5.2328322930894466e-05, "loss": 0.0062, "step": 1758 }, { "epoch": 3.34, "grad_norm": 0.22393131101409508, "learning_rate": 5.2220101999170156e-05, "loss": 0.0046, "step": 1759 }, { "epoch": 3.35, "grad_norm": 0.05954898963777507, "learning_rate": 5.2111953531727484e-05, "loss": 0.0015, "step": 1760 }, { "epoch": 3.35, "grad_norm": 0.2500301001983586, "learning_rate": 5.2003877692587435e-05, "loss": 0.003, "step": 1761 }, { "epoch": 3.35, "grad_norm": 0.14814341687465535, "learning_rate": 5.189587464566069e-05, "loss": 0.0061, "step": 1762 }, { "epoch": 3.35, "grad_norm": 0.16286058462153816, "learning_rate": 5.178794455474754e-05, "loss": 0.0033, "step": 1763 }, { "epoch": 3.35, "grad_norm": 0.10033188161683179, "learning_rate": 5.168008758353775e-05, "loss": 0.0022, "step": 1764 }, { "epoch": 3.36, "grad_norm": 0.2523731009829415, "learning_rate": 5.1572303895610086e-05, "loss": 0.0068, "step": 1765 }, { "epoch": 3.36, "grad_norm": 0.09174766443470675, "learning_rate": 5.146459365443217e-05, "loss": 0.003, "step": 1766 }, { "epoch": 3.36, "grad_norm": 0.13599966624250887, "learning_rate": 5.1356957023360287e-05, "loss": 0.0052, "step": 1767 }, { "epoch": 3.36, "grad_norm": 0.178257364632924, "learning_rate": 5.124939416563903e-05, "loss": 0.0075, "step": 1768 }, { "epoch": 3.36, "grad_norm": 0.06725724818673247, "learning_rate": 5.1141905244401144e-05, "loss": 0.0019, "step": 1769 }, { "epoch": 3.37, "grad_norm": 0.1007817666251486, "learning_rate": 5.103449042266717e-05, "loss": 0.0026, "step": 1770 }, { "epoch": 3.37, "grad_norm": 0.184886718705301, "learning_rate": 5.0927149863345416e-05, "loss": 0.0026, "step": 1771 }, { "epoch": 3.37, "grad_norm": 0.17855660999370337, "learning_rate": 5.081988372923143e-05, "loss": 0.0036, "step": 1772 }, { "epoch": 3.37, "grad_norm": 0.130547426518295, "learning_rate": 5.071269218300789e-05, "loss": 0.0043, "step": 1773 }, { "epoch": 3.37, "grad_norm": 0.17561290891529632, "learning_rate": 5.060557538724444e-05, "loss": 0.0164, "step": 1774 }, { "epoch": 3.37, "grad_norm": 0.1599714748418811, "learning_rate": 5.0498533504397286e-05, "loss": 0.007, "step": 1775 }, { "epoch": 3.38, "grad_norm": 0.21867733408808793, "learning_rate": 5.039156669680898e-05, "loss": 0.0047, "step": 1776 }, { "epoch": 3.38, "grad_norm": 0.12602730923239938, "learning_rate": 5.028467512670834e-05, "loss": 0.0034, "step": 1777 }, { "epoch": 3.38, "grad_norm": 0.12221322968573925, "learning_rate": 5.017785895620999e-05, "loss": 0.0078, "step": 1778 }, { "epoch": 3.38, "grad_norm": 0.052061335797215384, "learning_rate": 5.007111834731422e-05, "loss": 0.002, "step": 1779 }, { "epoch": 3.38, "grad_norm": 0.06412628104525493, "learning_rate": 4.9964453461906626e-05, "loss": 0.0019, "step": 1780 }, { "epoch": 3.39, "grad_norm": 0.14548272610118818, "learning_rate": 4.985786446175815e-05, "loss": 0.0048, "step": 1781 }, { "epoch": 3.39, "grad_norm": 0.14424640934220478, "learning_rate": 4.975135150852452e-05, "loss": 0.0053, "step": 1782 }, { "epoch": 3.39, "grad_norm": 0.17025192195030572, "learning_rate": 4.964491476374611e-05, "loss": 0.0047, "step": 1783 }, { "epoch": 3.39, "grad_norm": 0.2900571564127192, "learning_rate": 4.953855438884782e-05, "loss": 0.0108, "step": 1784 }, { "epoch": 3.39, "grad_norm": 0.25040999064018377, "learning_rate": 4.943227054513864e-05, "loss": 0.0123, "step": 1785 }, { "epoch": 3.4, "grad_norm": 0.2353466512980329, "learning_rate": 4.932606339381146e-05, "loss": 0.008, "step": 1786 }, { "epoch": 3.4, "grad_norm": 0.25791591057410157, "learning_rate": 4.9219933095943005e-05, "loss": 0.007, "step": 1787 }, { "epoch": 3.4, "grad_norm": 0.20155837267756818, "learning_rate": 4.911387981249329e-05, "loss": 0.0099, "step": 1788 }, { "epoch": 3.4, "grad_norm": 0.043861195573343816, "learning_rate": 4.90079037043056e-05, "loss": 0.0013, "step": 1789 }, { "epoch": 3.4, "grad_norm": 0.19558989880690153, "learning_rate": 4.890200493210615e-05, "loss": 0.0074, "step": 1790 }, { "epoch": 3.4, "grad_norm": 0.16924379115993018, "learning_rate": 4.879618365650387e-05, "loss": 0.0042, "step": 1791 }, { "epoch": 3.41, "grad_norm": 0.2901210776587374, "learning_rate": 4.869044003799017e-05, "loss": 0.0104, "step": 1792 }, { "epoch": 3.41, "grad_norm": 0.16984952871978176, "learning_rate": 4.858477423693862e-05, "loss": 0.0044, "step": 1793 }, { "epoch": 3.41, "grad_norm": 0.3159111663839413, "learning_rate": 4.8479186413604924e-05, "loss": 0.013, "step": 1794 }, { "epoch": 3.41, "grad_norm": 0.17987328463591265, "learning_rate": 4.837367672812636e-05, "loss": 0.0024, "step": 1795 }, { "epoch": 3.41, "grad_norm": 0.11947429591317629, "learning_rate": 4.826824534052174e-05, "loss": 0.0042, "step": 1796 }, { "epoch": 3.42, "grad_norm": 0.14422523400445814, "learning_rate": 4.816289241069122e-05, "loss": 0.005, "step": 1797 }, { "epoch": 3.42, "grad_norm": 0.23739911408824768, "learning_rate": 4.8057618098415845e-05, "loss": 0.007, "step": 1798 }, { "epoch": 3.42, "grad_norm": 0.09273088437229945, "learning_rate": 4.7952422563357444e-05, "loss": 0.0028, "step": 1799 }, { "epoch": 3.42, "grad_norm": 0.06594897992506532, "learning_rate": 4.7847305965058497e-05, "loss": 0.0029, "step": 1800 }, { "epoch": 3.42, "eval_blimp_filtered_avg": 0.713134328358209, "eval_blimp_filtered_std": 0.00498533551841397, "step": 1800 }, { "epoch": 3.42, "eval_blimp_supplement_avg": 0.8081896551724138, "eval_blimp_supplement_std": 0.01717302898329157, "step": 1800 }, { "epoch": 3.42, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 1800 }, { "epoch": 3.42, "eval_winoground_filtered_avg": 0.62, "eval_winoground_filtered_std": 0.04878317312145633, "step": 1800 }, { "epoch": 3.42, "grad_norm": 0.06561530364498148, "learning_rate": 4.7742268462941606e-05, "loss": 0.0017, "step": 1801 }, { "epoch": 3.43, "grad_norm": 0.1617203757997895, "learning_rate": 4.763731021630949e-05, "loss": 0.0117, "step": 1802 }, { "epoch": 3.43, "grad_norm": 0.10036091624320939, "learning_rate": 4.7532431384344666e-05, "loss": 0.0018, "step": 1803 }, { "epoch": 3.43, "grad_norm": 0.11030294564765286, "learning_rate": 4.7427632126109186e-05, "loss": 0.0055, "step": 1804 }, { "epoch": 3.43, "grad_norm": 0.1830650765087559, "learning_rate": 4.7322912600544435e-05, "loss": 0.006, "step": 1805 }, { "epoch": 3.43, "grad_norm": 0.3101549901612274, "learning_rate": 4.721827296647083e-05, "loss": 0.0138, "step": 1806 }, { "epoch": 3.44, "grad_norm": 0.16540700241323528, "learning_rate": 4.7113713382587745e-05, "loss": 0.003, "step": 1807 }, { "epoch": 3.44, "grad_norm": 0.28707392708716223, "learning_rate": 4.7009234007473016e-05, "loss": 0.01, "step": 1808 }, { "epoch": 3.44, "grad_norm": 0.1053312119308134, "learning_rate": 4.6904834999582834e-05, "loss": 0.0042, "step": 1809 }, { "epoch": 3.44, "grad_norm": 0.24562998660417207, "learning_rate": 4.6800516517251644e-05, "loss": 0.0079, "step": 1810 }, { "epoch": 3.44, "grad_norm": 0.15046701286005718, "learning_rate": 4.6696278718691635e-05, "loss": 0.0056, "step": 1811 }, { "epoch": 3.44, "grad_norm": 0.18020212751092216, "learning_rate": 4.659212176199264e-05, "loss": 0.0094, "step": 1812 }, { "epoch": 3.45, "grad_norm": 0.09140968577746518, "learning_rate": 4.6488045805121936e-05, "loss": 0.0032, "step": 1813 }, { "epoch": 3.45, "grad_norm": 0.20092322263833567, "learning_rate": 4.63840510059239e-05, "loss": 0.0062, "step": 1814 }, { "epoch": 3.45, "grad_norm": 0.0851971919420257, "learning_rate": 4.628013752211987e-05, "loss": 0.0027, "step": 1815 }, { "epoch": 3.45, "grad_norm": 0.091348454913649, "learning_rate": 4.617630551130778e-05, "loss": 0.0021, "step": 1816 }, { "epoch": 3.45, "grad_norm": 0.1763665969855068, "learning_rate": 4.607255513096215e-05, "loss": 0.0049, "step": 1817 }, { "epoch": 3.46, "grad_norm": 0.27464525999451567, "learning_rate": 4.596888653843354e-05, "loss": 0.008, "step": 1818 }, { "epoch": 3.46, "grad_norm": 0.129322067762118, "learning_rate": 4.586529989094853e-05, "loss": 0.0064, "step": 1819 }, { "epoch": 3.46, "grad_norm": 0.14708686590539963, "learning_rate": 4.576179534560948e-05, "loss": 0.0052, "step": 1820 }, { "epoch": 3.46, "grad_norm": 0.18053860887874643, "learning_rate": 4.565837305939414e-05, "loss": 0.0069, "step": 1821 }, { "epoch": 3.46, "grad_norm": 0.40534507015518995, "learning_rate": 4.5555033189155505e-05, "loss": 0.003, "step": 1822 }, { "epoch": 3.47, "grad_norm": 0.12178369931499572, "learning_rate": 4.545177589162167e-05, "loss": 0.0025, "step": 1823 }, { "epoch": 3.47, "grad_norm": 0.30904100910329946, "learning_rate": 4.5348601323395415e-05, "loss": 0.0117, "step": 1824 }, { "epoch": 3.47, "grad_norm": 0.12584424865521657, "learning_rate": 4.5245509640954057e-05, "loss": 0.0052, "step": 1825 }, { "epoch": 3.47, "grad_norm": 0.34889979811708477, "learning_rate": 4.514250100064924e-05, "loss": 0.0038, "step": 1826 }, { "epoch": 3.47, "grad_norm": 0.23600091432503922, "learning_rate": 4.5039575558706625e-05, "loss": 0.0079, "step": 1827 }, { "epoch": 3.48, "grad_norm": 0.18911011730523458, "learning_rate": 4.493673347122572e-05, "loss": 0.0052, "step": 1828 }, { "epoch": 3.48, "grad_norm": 0.11931900239653219, "learning_rate": 4.483397489417959e-05, "loss": 0.003, "step": 1829 }, { "epoch": 3.48, "grad_norm": 0.2474234145187063, "learning_rate": 4.47312999834147e-05, "loss": 0.0108, "step": 1830 }, { "epoch": 3.48, "grad_norm": 0.12520984474508043, "learning_rate": 4.462870889465058e-05, "loss": 0.0042, "step": 1831 }, { "epoch": 3.48, "grad_norm": 0.15603911212732596, "learning_rate": 4.45262017834796e-05, "loss": 0.0044, "step": 1832 }, { "epoch": 3.48, "grad_norm": 0.19324129783040236, "learning_rate": 4.442377880536689e-05, "loss": 0.0078, "step": 1833 }, { "epoch": 3.49, "grad_norm": 0.17316412353214808, "learning_rate": 4.4321440115649835e-05, "loss": 0.0032, "step": 1834 }, { "epoch": 3.49, "grad_norm": 0.11312710129354221, "learning_rate": 4.421918586953808e-05, "loss": 0.0037, "step": 1835 }, { "epoch": 3.49, "grad_norm": 0.28419720432619716, "learning_rate": 4.411701622211316e-05, "loss": 0.0068, "step": 1836 }, { "epoch": 3.49, "grad_norm": 0.2354873159827851, "learning_rate": 4.401493132832832e-05, "loss": 0.0062, "step": 1837 }, { "epoch": 3.49, "grad_norm": 0.18044029715185292, "learning_rate": 4.391293134300824e-05, "loss": 0.0055, "step": 1838 }, { "epoch": 3.5, "grad_norm": 0.10160451291273094, "learning_rate": 4.3811016420848884e-05, "loss": 0.0028, "step": 1839 }, { "epoch": 3.5, "grad_norm": 0.21771151022346857, "learning_rate": 4.370918671641716e-05, "loss": 0.0062, "step": 1840 }, { "epoch": 3.5, "grad_norm": 0.2175688055140635, "learning_rate": 4.360744238415075e-05, "loss": 0.007, "step": 1841 }, { "epoch": 3.5, "grad_norm": 0.24331651769751125, "learning_rate": 4.350578357835781e-05, "loss": 0.0081, "step": 1842 }, { "epoch": 3.5, "grad_norm": 0.09732818067662535, "learning_rate": 4.340421045321688e-05, "loss": 0.003, "step": 1843 }, { "epoch": 3.51, "grad_norm": 0.17199608525434515, "learning_rate": 4.330272316277648e-05, "loss": 0.0069, "step": 1844 }, { "epoch": 3.51, "grad_norm": 0.06544777221013504, "learning_rate": 4.3201321860954943e-05, "loss": 0.0018, "step": 1845 }, { "epoch": 3.51, "grad_norm": 0.1119569545472664, "learning_rate": 4.3100006701540274e-05, "loss": 0.0052, "step": 1846 }, { "epoch": 3.51, "grad_norm": 0.15584371914550924, "learning_rate": 4.299877783818975e-05, "loss": 0.0062, "step": 1847 }, { "epoch": 3.51, "grad_norm": 0.1463921566586945, "learning_rate": 4.2897635424429795e-05, "loss": 0.0043, "step": 1848 }, { "epoch": 3.52, "grad_norm": 0.13310404419079103, "learning_rate": 4.279657961365572e-05, "loss": 0.0057, "step": 1849 }, { "epoch": 3.52, "grad_norm": 0.23405135579070535, "learning_rate": 4.269561055913148e-05, "loss": 0.0113, "step": 1850 }, { "epoch": 3.52, "grad_norm": 0.18568207902154524, "learning_rate": 4.259472841398945e-05, "loss": 0.0114, "step": 1851 }, { "epoch": 3.52, "grad_norm": 0.1474270106126823, "learning_rate": 4.249393333123026e-05, "loss": 0.006, "step": 1852 }, { "epoch": 3.52, "grad_norm": 0.05300527121692373, "learning_rate": 4.239322546372244e-05, "loss": 0.0015, "step": 1853 }, { "epoch": 3.52, "grad_norm": 0.1755799906209208, "learning_rate": 4.229260496420224e-05, "loss": 0.0051, "step": 1854 }, { "epoch": 3.53, "grad_norm": 0.15931523278587123, "learning_rate": 4.219207198527339e-05, "loss": 0.0037, "step": 1855 }, { "epoch": 3.53, "grad_norm": 0.08319968399694082, "learning_rate": 4.2091626679407004e-05, "loss": 0.0028, "step": 1856 }, { "epoch": 3.53, "grad_norm": 0.09126187695274073, "learning_rate": 4.1991269198941084e-05, "loss": 0.0035, "step": 1857 }, { "epoch": 3.53, "grad_norm": 0.11791121170012347, "learning_rate": 4.189099969608049e-05, "loss": 0.0041, "step": 1858 }, { "epoch": 3.53, "grad_norm": 0.2555602269145006, "learning_rate": 4.179081832289667e-05, "loss": 0.0153, "step": 1859 }, { "epoch": 3.54, "grad_norm": 0.3434327931493038, "learning_rate": 4.16907252313274e-05, "loss": 0.0141, "step": 1860 }, { "epoch": 3.54, "grad_norm": 0.22772262024785017, "learning_rate": 4.15907205731765e-05, "loss": 0.0082, "step": 1861 }, { "epoch": 3.54, "grad_norm": 0.25094452644092596, "learning_rate": 4.149080450011382e-05, "loss": 0.0117, "step": 1862 }, { "epoch": 3.54, "grad_norm": 0.3969345314647962, "learning_rate": 4.139097716367474e-05, "loss": 0.0056, "step": 1863 }, { "epoch": 3.54, "grad_norm": 0.16994552963305587, "learning_rate": 4.129123871526007e-05, "loss": 0.0055, "step": 1864 }, { "epoch": 3.55, "grad_norm": 0.0836556809574058, "learning_rate": 4.1191589306135824e-05, "loss": 0.0024, "step": 1865 }, { "epoch": 3.55, "grad_norm": 0.08427141767818795, "learning_rate": 4.109202908743303e-05, "loss": 0.0024, "step": 1866 }, { "epoch": 3.55, "grad_norm": 0.18287992228651564, "learning_rate": 4.0992558210147366e-05, "loss": 0.0043, "step": 1867 }, { "epoch": 3.55, "grad_norm": 0.25176020387418824, "learning_rate": 4.089317682513902e-05, "loss": 0.0044, "step": 1868 }, { "epoch": 3.55, "grad_norm": 0.1816629539767448, "learning_rate": 4.079388508313255e-05, "loss": 0.0055, "step": 1869 }, { "epoch": 3.56, "grad_norm": 0.07170772444840139, "learning_rate": 4.069468313471646e-05, "loss": 0.0023, "step": 1870 }, { "epoch": 3.56, "grad_norm": 0.1502392878538748, "learning_rate": 4.059557113034308e-05, "loss": 0.0036, "step": 1871 }, { "epoch": 3.56, "grad_norm": 0.163055645417052, "learning_rate": 4.0496549220328376e-05, "loss": 0.0034, "step": 1872 }, { "epoch": 3.56, "grad_norm": 0.16001552273401443, "learning_rate": 4.0397617554851616e-05, "loss": 0.0044, "step": 1873 }, { "epoch": 3.56, "grad_norm": 0.16684239346926555, "learning_rate": 4.029877628395522e-05, "loss": 0.0033, "step": 1874 }, { "epoch": 3.56, "grad_norm": 0.1936217246129422, "learning_rate": 4.020002555754459e-05, "loss": 0.0062, "step": 1875 }, { "epoch": 3.57, "grad_norm": 0.09828303376697355, "learning_rate": 4.010136552538769e-05, "loss": 0.0039, "step": 1876 }, { "epoch": 3.57, "grad_norm": 0.14914325738708725, "learning_rate": 4.000279633711501e-05, "loss": 0.0028, "step": 1877 }, { "epoch": 3.57, "grad_norm": 0.20322848142212466, "learning_rate": 3.990431814221919e-05, "loss": 0.0069, "step": 1878 }, { "epoch": 3.57, "grad_norm": 0.22258175174732805, "learning_rate": 3.980593109005498e-05, "loss": 0.009, "step": 1879 }, { "epoch": 3.57, "grad_norm": 0.16699668996369763, "learning_rate": 3.970763532983882e-05, "loss": 0.0074, "step": 1880 }, { "epoch": 3.58, "grad_norm": 0.10680721873457874, "learning_rate": 3.960943101064869e-05, "loss": 0.0029, "step": 1881 }, { "epoch": 3.58, "grad_norm": 0.08365276306999468, "learning_rate": 3.9511318281423923e-05, "loss": 0.0022, "step": 1882 }, { "epoch": 3.58, "grad_norm": 0.15816557415128818, "learning_rate": 3.941329729096492e-05, "loss": 0.0046, "step": 1883 }, { "epoch": 3.58, "grad_norm": 0.333397267547051, "learning_rate": 3.9315368187932934e-05, "loss": 0.0103, "step": 1884 }, { "epoch": 3.58, "grad_norm": 0.1540267350863146, "learning_rate": 3.921753112084995e-05, "loss": 0.0026, "step": 1885 }, { "epoch": 3.59, "grad_norm": 0.1269413871268906, "learning_rate": 3.911978623809826e-05, "loss": 0.003, "step": 1886 }, { "epoch": 3.59, "grad_norm": 0.12174872466023548, "learning_rate": 3.9022133687920346e-05, "loss": 0.005, "step": 1887 }, { "epoch": 3.59, "grad_norm": 0.11011911305738481, "learning_rate": 3.892457361841879e-05, "loss": 0.0046, "step": 1888 }, { "epoch": 3.59, "grad_norm": 0.16982951175555652, "learning_rate": 3.882710617755578e-05, "loss": 0.0055, "step": 1889 }, { "epoch": 3.59, "grad_norm": 0.10510479824835871, "learning_rate": 3.8729731513153065e-05, "loss": 0.0065, "step": 1890 }, { "epoch": 3.6, "grad_norm": 0.3126450471343747, "learning_rate": 3.863244977289165e-05, "loss": 0.0044, "step": 1891 }, { "epoch": 3.6, "grad_norm": 0.21065605418745886, "learning_rate": 3.8535261104311725e-05, "loss": 0.0072, "step": 1892 }, { "epoch": 3.6, "grad_norm": 0.20316106856027857, "learning_rate": 3.8438165654812194e-05, "loss": 0.011, "step": 1893 }, { "epoch": 3.6, "grad_norm": 0.18101813713874018, "learning_rate": 3.834116357165064e-05, "loss": 0.0085, "step": 1894 }, { "epoch": 3.6, "grad_norm": 0.08031375611459984, "learning_rate": 3.824425500194305e-05, "loss": 0.0035, "step": 1895 }, { "epoch": 3.6, "grad_norm": 0.3153749825229324, "learning_rate": 3.814744009266356e-05, "loss": 0.0102, "step": 1896 }, { "epoch": 3.61, "grad_norm": 0.14891033484351676, "learning_rate": 3.805071899064424e-05, "loss": 0.0055, "step": 1897 }, { "epoch": 3.61, "grad_norm": 0.13084769823538112, "learning_rate": 3.7954091842575004e-05, "loss": 0.0028, "step": 1898 }, { "epoch": 3.61, "grad_norm": 0.09906699013242604, "learning_rate": 3.785755879500312e-05, "loss": 0.002, "step": 1899 }, { "epoch": 3.61, "grad_norm": 0.12158452718962741, "learning_rate": 3.7761119994333215e-05, "loss": 0.0044, "step": 1900 }, { "epoch": 3.61, "eval_blimp_filtered_avg": 0.7107462686567164, "eval_blimp_filtered_std": 0.005005965015993623, "step": 1900 }, { "epoch": 3.61, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.01726573984749586, "step": 1900 }, { "epoch": 3.61, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 1900 }, { "epoch": 3.61, "eval_winoground_filtered_avg": 0.64, "eval_winoground_filtered_std": 0.048241815132442176, "step": 1900 }, { "epoch": 3.61, "grad_norm": 0.19281313172074194, "learning_rate": 3.766477558682704e-05, "loss": 0.0085, "step": 1901 }, { "epoch": 3.62, "grad_norm": 0.22532967427553033, "learning_rate": 3.756852571860307e-05, "loss": 0.0189, "step": 1902 }, { "epoch": 3.62, "grad_norm": 0.06631853404511011, "learning_rate": 3.7472370535636445e-05, "loss": 0.0019, "step": 1903 }, { "epoch": 3.62, "grad_norm": 0.11724975707504783, "learning_rate": 3.737631018375872e-05, "loss": 0.0043, "step": 1904 }, { "epoch": 3.62, "grad_norm": 0.220788268546307, "learning_rate": 3.728034480865763e-05, "loss": 0.0042, "step": 1905 }, { "epoch": 3.62, "grad_norm": 0.090446747085492, "learning_rate": 3.718447455587682e-05, "loss": 0.0021, "step": 1906 }, { "epoch": 3.63, "grad_norm": 0.21828882265807786, "learning_rate": 3.7088699570815686e-05, "loss": 0.0065, "step": 1907 }, { "epoch": 3.63, "grad_norm": 0.0971059205945317, "learning_rate": 3.699301999872922e-05, "loss": 0.0052, "step": 1908 }, { "epoch": 3.63, "grad_norm": 0.10798070415280281, "learning_rate": 3.6897435984727605e-05, "loss": 0.0032, "step": 1909 }, { "epoch": 3.63, "grad_norm": 0.11951932143174697, "learning_rate": 3.680194767377609e-05, "loss": 0.0025, "step": 1910 }, { "epoch": 3.63, "grad_norm": 0.03798531565799055, "learning_rate": 3.6706555210694914e-05, "loss": 0.002, "step": 1911 }, { "epoch": 3.63, "grad_norm": 0.11980527201357412, "learning_rate": 3.661125874015881e-05, "loss": 0.0028, "step": 1912 }, { "epoch": 3.64, "grad_norm": 0.10556457330210402, "learning_rate": 3.651605840669695e-05, "loss": 0.0025, "step": 1913 }, { "epoch": 3.64, "grad_norm": 0.08409222582969861, "learning_rate": 3.642095435469274e-05, "loss": 0.0021, "step": 1914 }, { "epoch": 3.64, "grad_norm": 0.27207553173721655, "learning_rate": 3.6325946728383584e-05, "loss": 0.0063, "step": 1915 }, { "epoch": 3.64, "grad_norm": 0.07126665798750997, "learning_rate": 3.62310356718606e-05, "loss": 0.0022, "step": 1916 }, { "epoch": 3.64, "grad_norm": 0.17067313786705038, "learning_rate": 3.613622132906843e-05, "loss": 0.0068, "step": 1917 }, { "epoch": 3.65, "grad_norm": 0.25497100030374553, "learning_rate": 3.604150384380508e-05, "loss": 0.004, "step": 1918 }, { "epoch": 3.65, "grad_norm": 0.08296956274753983, "learning_rate": 3.594688335972164e-05, "loss": 0.002, "step": 1919 }, { "epoch": 3.65, "grad_norm": 0.3728036434446719, "learning_rate": 3.585236002032205e-05, "loss": 0.0038, "step": 1920 }, { "epoch": 3.65, "grad_norm": 0.0919735183461791, "learning_rate": 3.575793396896303e-05, "loss": 0.0028, "step": 1921 }, { "epoch": 3.65, "grad_norm": 0.03098004272725508, "learning_rate": 3.5663605348853625e-05, "loss": 0.0011, "step": 1922 }, { "epoch": 3.66, "grad_norm": 0.2435486618821207, "learning_rate": 3.556937430305515e-05, "loss": 0.0032, "step": 1923 }, { "epoch": 3.66, "grad_norm": 0.16238758678729023, "learning_rate": 3.5475240974481006e-05, "loss": 0.0054, "step": 1924 }, { "epoch": 3.66, "grad_norm": 0.20120943195053595, "learning_rate": 3.538120550589631e-05, "loss": 0.0059, "step": 1925 }, { "epoch": 3.66, "grad_norm": 0.0838212190234571, "learning_rate": 3.5287268039917785e-05, "loss": 0.0045, "step": 1926 }, { "epoch": 3.66, "grad_norm": 0.057407059532016615, "learning_rate": 3.5193428719013523e-05, "loss": 0.0016, "step": 1927 }, { "epoch": 3.67, "grad_norm": 0.1457555571491291, "learning_rate": 3.509968768550278e-05, "loss": 0.0068, "step": 1928 }, { "epoch": 3.67, "grad_norm": 0.14299743557967626, "learning_rate": 3.500604508155571e-05, "loss": 0.0032, "step": 1929 }, { "epoch": 3.67, "grad_norm": 0.04464025158684323, "learning_rate": 3.491250104919321e-05, "loss": 0.0015, "step": 1930 }, { "epoch": 3.67, "grad_norm": 0.1256637158666533, "learning_rate": 3.481905573028673e-05, "loss": 0.0051, "step": 1931 }, { "epoch": 3.67, "grad_norm": 0.15028709266612308, "learning_rate": 3.4725709266557924e-05, "loss": 0.0031, "step": 1932 }, { "epoch": 3.67, "grad_norm": 0.13580691306122022, "learning_rate": 3.4632461799578534e-05, "loss": 0.0055, "step": 1933 }, { "epoch": 3.68, "grad_norm": 0.18727855450777772, "learning_rate": 3.453931347077024e-05, "loss": 0.0053, "step": 1934 }, { "epoch": 3.68, "grad_norm": 0.1922587318279276, "learning_rate": 3.444626442140428e-05, "loss": 0.0033, "step": 1935 }, { "epoch": 3.68, "grad_norm": 0.16557295906716402, "learning_rate": 3.435331479260133e-05, "loss": 0.005, "step": 1936 }, { "epoch": 3.68, "grad_norm": 0.16681073767250382, "learning_rate": 3.4260464725331345e-05, "loss": 0.0088, "step": 1937 }, { "epoch": 3.68, "grad_norm": 0.20027349607684253, "learning_rate": 3.416771436041323e-05, "loss": 0.006, "step": 1938 }, { "epoch": 3.69, "grad_norm": 0.17580078381421513, "learning_rate": 3.4075063838514676e-05, "loss": 0.0064, "step": 1939 }, { "epoch": 3.69, "grad_norm": 0.19948423031639978, "learning_rate": 3.3982513300151964e-05, "loss": 0.005, "step": 1940 }, { "epoch": 3.69, "grad_norm": 0.09175817900803064, "learning_rate": 3.389006288568974e-05, "loss": 0.0032, "step": 1941 }, { "epoch": 3.69, "grad_norm": 0.19243846565840744, "learning_rate": 3.3797712735340794e-05, "loss": 0.0052, "step": 1942 }, { "epoch": 3.69, "grad_norm": 0.12691357042780635, "learning_rate": 3.370546298916583e-05, "loss": 0.0055, "step": 1943 }, { "epoch": 3.7, "grad_norm": 0.22380266533018373, "learning_rate": 3.361331378707334e-05, "loss": 0.0065, "step": 1944 }, { "epoch": 3.7, "grad_norm": 0.18496485603222843, "learning_rate": 3.3521265268819294e-05, "loss": 0.0048, "step": 1945 }, { "epoch": 3.7, "grad_norm": 0.5330202066069344, "learning_rate": 3.342931757400689e-05, "loss": 0.0095, "step": 1946 }, { "epoch": 3.7, "grad_norm": 0.21141736464998725, "learning_rate": 3.333747084208657e-05, "loss": 0.0085, "step": 1947 }, { "epoch": 3.7, "grad_norm": 0.3260101877954071, "learning_rate": 3.324572521235552e-05, "loss": 0.0122, "step": 1948 }, { "epoch": 3.71, "grad_norm": 0.16092010434520088, "learning_rate": 3.3154080823957635e-05, "loss": 0.0034, "step": 1949 }, { "epoch": 3.71, "grad_norm": 0.17614488311080514, "learning_rate": 3.306253781588327e-05, "loss": 0.0043, "step": 1950 }, { "epoch": 3.71, "grad_norm": 0.031112377141338594, "learning_rate": 3.297109632696902e-05, "loss": 0.001, "step": 1951 }, { "epoch": 3.71, "grad_norm": 0.21717668506829108, "learning_rate": 3.287975649589751e-05, "loss": 0.0022, "step": 1952 }, { "epoch": 3.71, "grad_norm": 0.051020112320890536, "learning_rate": 3.2788518461197157e-05, "loss": 0.0011, "step": 1953 }, { "epoch": 3.71, "grad_norm": 0.1103882304855566, "learning_rate": 3.2697382361242104e-05, "loss": 0.0031, "step": 1954 }, { "epoch": 3.72, "grad_norm": 0.18371208371580738, "learning_rate": 3.2606348334251755e-05, "loss": 0.0043, "step": 1955 }, { "epoch": 3.72, "grad_norm": 0.12302603737370847, "learning_rate": 3.2515416518290774e-05, "loss": 0.0082, "step": 1956 }, { "epoch": 3.72, "grad_norm": 0.28298712755155053, "learning_rate": 3.2424587051268837e-05, "loss": 0.0076, "step": 1957 }, { "epoch": 3.72, "grad_norm": 0.2058328080589075, "learning_rate": 3.233386007094036e-05, "loss": 0.0038, "step": 1958 }, { "epoch": 3.72, "grad_norm": 0.20152947281540468, "learning_rate": 3.224323571490428e-05, "loss": 0.0058, "step": 1959 }, { "epoch": 3.73, "grad_norm": 0.04277464729351368, "learning_rate": 3.2152714120603986e-05, "loss": 0.0017, "step": 1960 }, { "epoch": 3.73, "grad_norm": 0.21751629293011684, "learning_rate": 3.206229542532697e-05, "loss": 0.0034, "step": 1961 }, { "epoch": 3.73, "grad_norm": 0.18058567097161793, "learning_rate": 3.197197976620469e-05, "loss": 0.0063, "step": 1962 }, { "epoch": 3.73, "grad_norm": 0.1395213424712423, "learning_rate": 3.188176728021218e-05, "loss": 0.0046, "step": 1963 }, { "epoch": 3.73, "grad_norm": 0.04882399830216606, "learning_rate": 3.179165810416826e-05, "loss": 0.0016, "step": 1964 }, { "epoch": 3.74, "grad_norm": 0.176186064856677, "learning_rate": 3.17016523747349e-05, "loss": 0.0082, "step": 1965 }, { "epoch": 3.74, "grad_norm": 0.0897411923661201, "learning_rate": 3.161175022841717e-05, "loss": 0.0017, "step": 1966 }, { "epoch": 3.74, "grad_norm": 0.3137960682933037, "learning_rate": 3.152195180156317e-05, "loss": 0.0093, "step": 1967 }, { "epoch": 3.74, "grad_norm": 0.21699178935317506, "learning_rate": 3.143225723036357e-05, "loss": 0.0036, "step": 1968 }, { "epoch": 3.74, "grad_norm": 0.1057255038988309, "learning_rate": 3.134266665085154e-05, "loss": 0.0032, "step": 1969 }, { "epoch": 3.75, "grad_norm": 0.26739253114563377, "learning_rate": 3.1253180198902655e-05, "loss": 0.0029, "step": 1970 }, { "epoch": 3.75, "grad_norm": 0.09887369181119855, "learning_rate": 3.1163798010234424e-05, "loss": 0.0027, "step": 1971 }, { "epoch": 3.75, "grad_norm": 0.1540048808050214, "learning_rate": 3.10745202204063e-05, "loss": 0.0043, "step": 1972 }, { "epoch": 3.75, "grad_norm": 0.2203352631787339, "learning_rate": 3.098534696481937e-05, "loss": 0.0088, "step": 1973 }, { "epoch": 3.75, "grad_norm": 0.06703849359135595, "learning_rate": 3.089627837871623e-05, "loss": 0.0017, "step": 1974 }, { "epoch": 3.75, "grad_norm": 0.08037419655833167, "learning_rate": 3.080731459718067e-05, "loss": 0.0031, "step": 1975 }, { "epoch": 3.76, "grad_norm": 0.08791132776517463, "learning_rate": 3.0718455755137534e-05, "loss": 0.0033, "step": 1976 }, { "epoch": 3.76, "grad_norm": 0.24657062968854843, "learning_rate": 3.06297019873526e-05, "loss": 0.0038, "step": 1977 }, { "epoch": 3.76, "grad_norm": 0.14273542153867397, "learning_rate": 3.054105342843221e-05, "loss": 0.003, "step": 1978 }, { "epoch": 3.76, "grad_norm": 0.19684642668597407, "learning_rate": 3.0452510212823104e-05, "loss": 0.004, "step": 1979 }, { "epoch": 3.76, "grad_norm": 0.13053739197148675, "learning_rate": 3.03640724748124e-05, "loss": 0.005, "step": 1980 }, { "epoch": 3.77, "grad_norm": 0.1655571800218019, "learning_rate": 3.0275740348527093e-05, "loss": 0.0025, "step": 1981 }, { "epoch": 3.77, "grad_norm": 0.10120321913643077, "learning_rate": 3.0187513967934067e-05, "loss": 0.0092, "step": 1982 }, { "epoch": 3.77, "grad_norm": 0.12240086097916901, "learning_rate": 3.0099393466839864e-05, "loss": 0.004, "step": 1983 }, { "epoch": 3.77, "grad_norm": 0.09716475922881314, "learning_rate": 3.0011378978890416e-05, "loss": 0.0015, "step": 1984 }, { "epoch": 3.77, "grad_norm": 0.17888898241383958, "learning_rate": 2.9923470637570827e-05, "loss": 0.0053, "step": 1985 }, { "epoch": 3.78, "grad_norm": 0.09484840209023661, "learning_rate": 2.9835668576205288e-05, "loss": 0.0037, "step": 1986 }, { "epoch": 3.78, "grad_norm": 0.1912630292736913, "learning_rate": 2.9747972927956768e-05, "loss": 0.0055, "step": 1987 }, { "epoch": 3.78, "grad_norm": 0.11504125564625761, "learning_rate": 2.9660383825826842e-05, "loss": 0.0033, "step": 1988 }, { "epoch": 3.78, "grad_norm": 0.18800930238431912, "learning_rate": 2.9572901402655484e-05, "loss": 0.0121, "step": 1989 }, { "epoch": 3.78, "grad_norm": 0.11817734618325683, "learning_rate": 2.948552579112095e-05, "loss": 0.0029, "step": 1990 }, { "epoch": 3.79, "grad_norm": 0.047550712075836755, "learning_rate": 2.9398257123739413e-05, "loss": 0.0013, "step": 1991 }, { "epoch": 3.79, "grad_norm": 0.10710469679026098, "learning_rate": 2.9311095532864874e-05, "loss": 0.0045, "step": 1992 }, { "epoch": 3.79, "grad_norm": 0.04282912420930876, "learning_rate": 2.9224041150688997e-05, "loss": 0.0011, "step": 1993 }, { "epoch": 3.79, "grad_norm": 0.10840122905676781, "learning_rate": 2.913709410924078e-05, "loss": 0.0028, "step": 1994 }, { "epoch": 3.79, "grad_norm": 0.14083452136410807, "learning_rate": 2.9050254540386457e-05, "loss": 0.0031, "step": 1995 }, { "epoch": 3.79, "grad_norm": 0.22666164634012337, "learning_rate": 2.896352257582925e-05, "loss": 0.0044, "step": 1996 }, { "epoch": 3.8, "grad_norm": 0.09397350879621791, "learning_rate": 2.8876898347109195e-05, "loss": 0.0028, "step": 1997 }, { "epoch": 3.8, "grad_norm": 0.12422692744292312, "learning_rate": 2.8790381985602922e-05, "loss": 0.0039, "step": 1998 }, { "epoch": 3.8, "grad_norm": 0.05315612514690125, "learning_rate": 2.8703973622523505e-05, "loss": 0.0014, "step": 1999 }, { "epoch": 3.8, "grad_norm": 0.14299049878820608, "learning_rate": 2.8617673388920197e-05, "loss": 0.0064, "step": 2000 }, { "epoch": 3.8, "eval_blimp_filtered_avg": 0.7105970149253731, "eval_blimp_filtered_std": 0.005006481911562259, "step": 2000 }, { "epoch": 3.8, "eval_blimp_supplement_avg": 0.8125, "eval_blimp_supplement_std": 0.017057808126567407, "step": 2000 }, { "epoch": 3.8, "eval_vqa_filtered_avg": 0.57, "eval_vqa_filtered_std": 0.04975698519562428, "step": 2000 }, { "epoch": 3.8, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 2000 }, { "epoch": 3.8, "grad_norm": 0.13221786425502333, "learning_rate": 2.853148141567824e-05, "loss": 0.002, "step": 2001 }, { "epoch": 3.81, "grad_norm": 0.2660377022805947, "learning_rate": 2.8445397833518695e-05, "loss": 0.0119, "step": 2002 }, { "epoch": 3.81, "grad_norm": 0.10384777583728098, "learning_rate": 2.8359422772998278e-05, "loss": 0.0037, "step": 2003 }, { "epoch": 3.81, "grad_norm": 0.1778424856311883, "learning_rate": 2.8273556364509078e-05, "loss": 0.0034, "step": 2004 }, { "epoch": 3.81, "grad_norm": 0.054816935386158505, "learning_rate": 2.8187798738278358e-05, "loss": 0.0013, "step": 2005 }, { "epoch": 3.81, "grad_norm": 0.07834945813118446, "learning_rate": 2.8102150024368525e-05, "loss": 0.0021, "step": 2006 }, { "epoch": 3.82, "grad_norm": 0.03943533603386439, "learning_rate": 2.8016610352676675e-05, "loss": 0.001, "step": 2007 }, { "epoch": 3.82, "grad_norm": 0.20151132057251744, "learning_rate": 2.7931179852934596e-05, "loss": 0.0059, "step": 2008 }, { "epoch": 3.82, "grad_norm": 0.11276731014833763, "learning_rate": 2.784585865470849e-05, "loss": 0.004, "step": 2009 }, { "epoch": 3.82, "grad_norm": 0.1140472214620237, "learning_rate": 2.776064688739878e-05, "loss": 0.0021, "step": 2010 }, { "epoch": 3.82, "grad_norm": 0.13184729372595377, "learning_rate": 2.767554468023992e-05, "loss": 0.0042, "step": 2011 }, { "epoch": 3.83, "grad_norm": 0.06798371963830036, "learning_rate": 2.7590552162300198e-05, "loss": 0.0014, "step": 2012 }, { "epoch": 3.83, "grad_norm": 0.11142260288973364, "learning_rate": 2.750566946248162e-05, "loss": 0.0028, "step": 2013 }, { "epoch": 3.83, "grad_norm": 0.11585717388484869, "learning_rate": 2.742089670951954e-05, "loss": 0.004, "step": 2014 }, { "epoch": 3.83, "grad_norm": 0.2669041138746359, "learning_rate": 2.7336234031982565e-05, "loss": 0.006, "step": 2015 }, { "epoch": 3.83, "grad_norm": 0.19279356680884094, "learning_rate": 2.7251681558272456e-05, "loss": 0.0084, "step": 2016 }, { "epoch": 3.83, "grad_norm": 0.1338139815248205, "learning_rate": 2.716723941662377e-05, "loss": 0.0042, "step": 2017 }, { "epoch": 3.84, "grad_norm": 0.08466515015135673, "learning_rate": 2.7082907735103703e-05, "loss": 0.002, "step": 2018 }, { "epoch": 3.84, "grad_norm": 0.327840366636767, "learning_rate": 2.6998686641611968e-05, "loss": 0.0108, "step": 2019 }, { "epoch": 3.84, "grad_norm": 0.052555215020957946, "learning_rate": 2.6914576263880554e-05, "loss": 0.0012, "step": 2020 }, { "epoch": 3.84, "grad_norm": 0.1441191304525962, "learning_rate": 2.683057672947349e-05, "loss": 0.0048, "step": 2021 }, { "epoch": 3.84, "grad_norm": 0.1980229766794034, "learning_rate": 2.6746688165786804e-05, "loss": 0.0036, "step": 2022 }, { "epoch": 3.85, "grad_norm": 0.1333029130681713, "learning_rate": 2.6662910700048115e-05, "loss": 0.0024, "step": 2023 }, { "epoch": 3.85, "grad_norm": 0.2395641976942636, "learning_rate": 2.6579244459316587e-05, "loss": 0.0035, "step": 2024 }, { "epoch": 3.85, "grad_norm": 0.10562063004312244, "learning_rate": 2.649568957048266e-05, "loss": 0.0038, "step": 2025 }, { "epoch": 3.85, "grad_norm": 0.1771068573804324, "learning_rate": 2.6412246160268016e-05, "loss": 0.0041, "step": 2026 }, { "epoch": 3.85, "grad_norm": 0.06302461113280906, "learning_rate": 2.6328914355225133e-05, "loss": 0.0017, "step": 2027 }, { "epoch": 3.86, "grad_norm": 0.11541574108773603, "learning_rate": 2.624569428173723e-05, "loss": 0.0032, "step": 2028 }, { "epoch": 3.86, "grad_norm": 0.11688965767774528, "learning_rate": 2.6162586066018213e-05, "loss": 0.0036, "step": 2029 }, { "epoch": 3.86, "grad_norm": 0.24142715665388345, "learning_rate": 2.607958983411217e-05, "loss": 0.008, "step": 2030 }, { "epoch": 3.86, "grad_norm": 0.11765185033383793, "learning_rate": 2.5996705711893453e-05, "loss": 0.0036, "step": 2031 }, { "epoch": 3.86, "grad_norm": 0.181901264173488, "learning_rate": 2.591393382506635e-05, "loss": 0.0053, "step": 2032 }, { "epoch": 3.87, "grad_norm": 0.170820146123471, "learning_rate": 2.5831274299164932e-05, "loss": 0.0034, "step": 2033 }, { "epoch": 3.87, "grad_norm": 0.22363067488054775, "learning_rate": 2.574872725955284e-05, "loss": 0.0051, "step": 2034 }, { "epoch": 3.87, "grad_norm": 0.09937724864490816, "learning_rate": 2.56662928314232e-05, "loss": 0.0018, "step": 2035 }, { "epoch": 3.87, "grad_norm": 0.14520547756072028, "learning_rate": 2.5583971139798246e-05, "loss": 0.0034, "step": 2036 }, { "epoch": 3.87, "grad_norm": 0.09728415140187784, "learning_rate": 2.5501762309529298e-05, "loss": 0.003, "step": 2037 }, { "epoch": 3.87, "grad_norm": 0.05618543507886018, "learning_rate": 2.541966646529643e-05, "loss": 0.0013, "step": 2038 }, { "epoch": 3.88, "grad_norm": 0.13162508249952493, "learning_rate": 2.5337683731608496e-05, "loss": 0.0032, "step": 2039 }, { "epoch": 3.88, "grad_norm": 0.09408987846820814, "learning_rate": 2.5255814232802677e-05, "loss": 0.002, "step": 2040 }, { "epoch": 3.88, "grad_norm": 0.2685637012780772, "learning_rate": 2.517405809304446e-05, "loss": 0.0051, "step": 2041 }, { "epoch": 3.88, "grad_norm": 0.1619834052102421, "learning_rate": 2.5092415436327432e-05, "loss": 0.014, "step": 2042 }, { "epoch": 3.88, "grad_norm": 0.24371438900050688, "learning_rate": 2.501088638647302e-05, "loss": 0.0027, "step": 2043 }, { "epoch": 3.89, "grad_norm": 0.24209363199566278, "learning_rate": 2.4929471067130363e-05, "loss": 0.0048, "step": 2044 }, { "epoch": 3.89, "grad_norm": 0.1765807876208265, "learning_rate": 2.4848169601776183e-05, "loss": 0.0061, "step": 2045 }, { "epoch": 3.89, "grad_norm": 0.23846217713826065, "learning_rate": 2.4766982113714454e-05, "loss": 0.005, "step": 2046 }, { "epoch": 3.89, "grad_norm": 0.08604195836262088, "learning_rate": 2.468590872607628e-05, "loss": 0.0024, "step": 2047 }, { "epoch": 3.89, "grad_norm": 0.13616021802980335, "learning_rate": 2.4604949561819803e-05, "loss": 0.004, "step": 2048 }, { "epoch": 3.9, "grad_norm": 0.17661093245619958, "learning_rate": 2.4524104743729838e-05, "loss": 0.0057, "step": 2049 }, { "epoch": 3.9, "grad_norm": 0.07534710370840456, "learning_rate": 2.444337439441783e-05, "loss": 0.0019, "step": 2050 }, { "epoch": 3.9, "grad_norm": 0.0716288224739238, "learning_rate": 2.436275863632156e-05, "loss": 0.0021, "step": 2051 }, { "epoch": 3.9, "grad_norm": 0.0886146320174713, "learning_rate": 2.4282257591705127e-05, "loss": 0.0025, "step": 2052 }, { "epoch": 3.9, "grad_norm": 0.08162868283541941, "learning_rate": 2.420187138265856e-05, "loss": 0.0022, "step": 2053 }, { "epoch": 3.9, "grad_norm": 0.067441959646627, "learning_rate": 2.412160013109773e-05, "loss": 0.0015, "step": 2054 }, { "epoch": 3.91, "grad_norm": 0.15242125110592467, "learning_rate": 2.404144395876422e-05, "loss": 0.0024, "step": 2055 }, { "epoch": 3.91, "grad_norm": 0.07350382394243764, "learning_rate": 2.396140298722501e-05, "loss": 0.0016, "step": 2056 }, { "epoch": 3.91, "grad_norm": 0.15799316452195813, "learning_rate": 2.388147733787237e-05, "loss": 0.0032, "step": 2057 }, { "epoch": 3.91, "grad_norm": 0.17680666277824064, "learning_rate": 2.3801667131923778e-05, "loss": 0.0114, "step": 2058 }, { "epoch": 3.91, "grad_norm": 0.0900670412966967, "learning_rate": 2.3721972490421486e-05, "loss": 0.0025, "step": 2059 }, { "epoch": 3.92, "grad_norm": 0.06123383456579665, "learning_rate": 2.3642393534232543e-05, "loss": 0.0016, "step": 2060 }, { "epoch": 3.92, "grad_norm": 0.07948687745069533, "learning_rate": 2.3562930384048533e-05, "loss": 0.002, "step": 2061 }, { "epoch": 3.92, "grad_norm": 0.1435467012587238, "learning_rate": 2.3483583160385435e-05, "loss": 0.0023, "step": 2062 }, { "epoch": 3.92, "grad_norm": 0.22461017451347118, "learning_rate": 2.3404351983583385e-05, "loss": 0.0066, "step": 2063 }, { "epoch": 3.92, "grad_norm": 0.10099899421639032, "learning_rate": 2.332523697380652e-05, "loss": 0.0076, "step": 2064 }, { "epoch": 3.93, "grad_norm": 0.06366664745365132, "learning_rate": 2.3246238251042783e-05, "loss": 0.0016, "step": 2065 }, { "epoch": 3.93, "grad_norm": 0.3223204922955491, "learning_rate": 2.3167355935103775e-05, "loss": 0.0087, "step": 2066 }, { "epoch": 3.93, "grad_norm": 0.055948894672222124, "learning_rate": 2.3088590145624524e-05, "loss": 0.0019, "step": 2067 }, { "epoch": 3.93, "grad_norm": 0.14145474075245545, "learning_rate": 2.3009941002063386e-05, "loss": 0.0029, "step": 2068 }, { "epoch": 3.93, "grad_norm": 0.19586486212408502, "learning_rate": 2.2931408623701768e-05, "loss": 0.0039, "step": 2069 }, { "epoch": 3.94, "grad_norm": 0.16705294248123112, "learning_rate": 2.2852993129643953e-05, "loss": 0.0058, "step": 2070 }, { "epoch": 3.94, "grad_norm": 0.2222300114207974, "learning_rate": 2.277469463881704e-05, "loss": 0.0044, "step": 2071 }, { "epoch": 3.94, "grad_norm": 0.1331004576071273, "learning_rate": 2.269651326997062e-05, "loss": 0.0044, "step": 2072 }, { "epoch": 3.94, "grad_norm": 0.08297189097341143, "learning_rate": 2.2618449141676666e-05, "loss": 0.0038, "step": 2073 }, { "epoch": 3.94, "grad_norm": 0.09859230988855601, "learning_rate": 2.2540502372329298e-05, "loss": 0.0025, "step": 2074 }, { "epoch": 3.94, "grad_norm": 0.16134141131808927, "learning_rate": 2.2462673080144747e-05, "loss": 0.0034, "step": 2075 }, { "epoch": 3.95, "grad_norm": 0.05107112548593406, "learning_rate": 2.238496138316101e-05, "loss": 0.0014, "step": 2076 }, { "epoch": 3.95, "grad_norm": 0.21262704990669276, "learning_rate": 2.230736739923771e-05, "loss": 0.0052, "step": 2077 }, { "epoch": 3.95, "grad_norm": 0.10046233115787302, "learning_rate": 2.222989124605599e-05, "loss": 0.003, "step": 2078 }, { "epoch": 3.95, "grad_norm": 0.0691312314873374, "learning_rate": 2.2152533041118275e-05, "loss": 0.0022, "step": 2079 }, { "epoch": 3.95, "grad_norm": 0.07719799444422605, "learning_rate": 2.207529290174808e-05, "loss": 0.0026, "step": 2080 }, { "epoch": 3.96, "grad_norm": 0.228900056572133, "learning_rate": 2.1998170945089923e-05, "loss": 0.0031, "step": 2081 }, { "epoch": 3.96, "grad_norm": 0.08374329136139678, "learning_rate": 2.1921167288109034e-05, "loss": 0.0017, "step": 2082 }, { "epoch": 3.96, "grad_norm": 0.27239880897937585, "learning_rate": 2.18442820475912e-05, "loss": 0.0036, "step": 2083 }, { "epoch": 3.96, "grad_norm": 0.2741632468973706, "learning_rate": 2.1767515340142708e-05, "loss": 0.002, "step": 2084 }, { "epoch": 3.96, "grad_norm": 0.07863544598661765, "learning_rate": 2.1690867282189974e-05, "loss": 0.0023, "step": 2085 }, { "epoch": 3.97, "grad_norm": 0.12304204838834815, "learning_rate": 2.1614337989979527e-05, "loss": 0.0035, "step": 2086 }, { "epoch": 3.97, "grad_norm": 0.27251786711394954, "learning_rate": 2.1537927579577754e-05, "loss": 0.0053, "step": 2087 }, { "epoch": 3.97, "grad_norm": 0.10195226022602293, "learning_rate": 2.1461636166870735e-05, "loss": 0.0033, "step": 2088 }, { "epoch": 3.97, "grad_norm": 0.17889928715110698, "learning_rate": 2.1385463867564093e-05, "loss": 0.0054, "step": 2089 }, { "epoch": 3.97, "grad_norm": 0.07488206845543043, "learning_rate": 2.1309410797182748e-05, "loss": 0.0016, "step": 2090 }, { "epoch": 3.98, "grad_norm": 0.13675058583763772, "learning_rate": 2.123347707107092e-05, "loss": 0.0034, "step": 2091 }, { "epoch": 3.98, "grad_norm": 0.18982408373708987, "learning_rate": 2.1157662804391688e-05, "loss": 0.0033, "step": 2092 }, { "epoch": 3.98, "grad_norm": 0.12906197969271738, "learning_rate": 2.1081968112127004e-05, "loss": 0.0022, "step": 2093 }, { "epoch": 3.98, "grad_norm": 0.17014883349963036, "learning_rate": 2.1006393109077525e-05, "loss": 0.0044, "step": 2094 }, { "epoch": 3.98, "grad_norm": 0.13975808037566378, "learning_rate": 2.0930937909862315e-05, "loss": 0.0062, "step": 2095 }, { "epoch": 3.98, "grad_norm": 0.21009805253392408, "learning_rate": 2.0855602628918747e-05, "loss": 0.005, "step": 2096 }, { "epoch": 3.99, "grad_norm": 0.11389410853347025, "learning_rate": 2.0780387380502375e-05, "loss": 0.0062, "step": 2097 }, { "epoch": 3.99, "grad_norm": 0.13821889800742213, "learning_rate": 2.070529227868665e-05, "loss": 0.004, "step": 2098 }, { "epoch": 3.99, "grad_norm": 0.040610235186443015, "learning_rate": 2.0630317437362834e-05, "loss": 0.001, "step": 2099 }, { "epoch": 3.99, "grad_norm": 0.3099558929435358, "learning_rate": 2.05554629702398e-05, "loss": 0.0094, "step": 2100 }, { "epoch": 3.99, "eval_blimp_filtered_avg": 0.7107462686567164, "eval_blimp_filtered_std": 0.005012837611842662, "step": 2100 }, { "epoch": 3.99, "eval_blimp_supplement_avg": 0.8060344827586207, "eval_blimp_supplement_std": 0.017248054239992837, "step": 2100 }, { "epoch": 3.99, "eval_vqa_filtered_avg": 0.57, "eval_vqa_filtered_std": 0.04975698519562428, "step": 2100 }, { "epoch": 3.99, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 2100 }, { "epoch": 3.99, "grad_norm": 0.2703101646878085, "learning_rate": 2.0480728990843833e-05, "loss": 0.0066, "step": 2101 }, { "epoch": 4.0, "grad_norm": 0.14584426359564218, "learning_rate": 2.040611561251853e-05, "loss": 0.0099, "step": 2102 }, { "epoch": 4.0, "grad_norm": 0.2405910759986856, "learning_rate": 2.033162294842449e-05, "loss": 0.0112, "step": 2103 }, { "epoch": 4.0, "grad_norm": 0.027826264726547242, "learning_rate": 2.0257251111539365e-05, "loss": 0.0009, "step": 2104 }, { "epoch": 4.0, "grad_norm": 0.12641289765456418, "learning_rate": 2.0183000214657478e-05, "loss": 0.0031, "step": 2105 }, { "epoch": 4.0, "grad_norm": 0.020216992666688006, "learning_rate": 2.0108870370389687e-05, "loss": 0.0008, "step": 2106 }, { "epoch": 4.01, "grad_norm": 0.02261041872760547, "learning_rate": 2.0034861691163374e-05, "loss": 0.0008, "step": 2107 }, { "epoch": 4.01, "grad_norm": 0.02839203291176099, "learning_rate": 1.996097428922209e-05, "loss": 0.0009, "step": 2108 }, { "epoch": 4.01, "grad_norm": 0.0582095373371363, "learning_rate": 1.9887208276625446e-05, "loss": 0.0007, "step": 2109 }, { "epoch": 4.01, "grad_norm": 0.03656119595701867, "learning_rate": 1.9813563765248953e-05, "loss": 0.0011, "step": 2110 }, { "epoch": 4.01, "grad_norm": 0.022521465388367756, "learning_rate": 1.9740040866783892e-05, "loss": 0.0009, "step": 2111 }, { "epoch": 4.02, "grad_norm": 0.03666822567404891, "learning_rate": 1.9666639692737055e-05, "loss": 0.0011, "step": 2112 }, { "epoch": 4.02, "grad_norm": 0.04029139813314486, "learning_rate": 1.959336035443061e-05, "loss": 0.0009, "step": 2113 }, { "epoch": 4.02, "grad_norm": 0.04430097600614133, "learning_rate": 1.9520202963002043e-05, "loss": 0.0022, "step": 2114 }, { "epoch": 4.02, "grad_norm": 0.1632013888360851, "learning_rate": 1.9447167629403805e-05, "loss": 0.0026, "step": 2115 }, { "epoch": 4.02, "grad_norm": 0.054182188220267935, "learning_rate": 1.9374254464403207e-05, "loss": 0.0011, "step": 2116 }, { "epoch": 4.02, "grad_norm": 0.08484679805313386, "learning_rate": 1.930146357858238e-05, "loss": 0.0027, "step": 2117 }, { "epoch": 4.03, "grad_norm": 0.13997839183434996, "learning_rate": 1.9228795082337926e-05, "loss": 0.0034, "step": 2118 }, { "epoch": 4.03, "grad_norm": 0.02561355823906125, "learning_rate": 1.9156249085880818e-05, "loss": 0.0008, "step": 2119 }, { "epoch": 4.03, "grad_norm": 0.03361904626627986, "learning_rate": 1.9083825699236323e-05, "loss": 0.0014, "step": 2120 }, { "epoch": 4.03, "grad_norm": 0.05789756629804056, "learning_rate": 1.901152503224366e-05, "loss": 0.0014, "step": 2121 }, { "epoch": 4.03, "grad_norm": 0.04551082959478288, "learning_rate": 1.893934719455599e-05, "loss": 0.0006, "step": 2122 }, { "epoch": 4.04, "grad_norm": 0.0250980626588901, "learning_rate": 1.886729229564016e-05, "loss": 0.0013, "step": 2123 }, { "epoch": 4.04, "grad_norm": 0.08448506293916248, "learning_rate": 1.8795360444776577e-05, "loss": 0.0025, "step": 2124 }, { "epoch": 4.04, "grad_norm": 0.028151289003708363, "learning_rate": 1.872355175105902e-05, "loss": 0.001, "step": 2125 }, { "epoch": 4.04, "grad_norm": 0.017409005398781645, "learning_rate": 1.8651866323394473e-05, "loss": 0.0007, "step": 2126 }, { "epoch": 4.04, "grad_norm": 0.00998135422532678, "learning_rate": 1.8580304270503024e-05, "loss": 0.0004, "step": 2127 }, { "epoch": 4.05, "grad_norm": 0.03159723254070562, "learning_rate": 1.8508865700917598e-05, "loss": 0.0007, "step": 2128 }, { "epoch": 4.05, "grad_norm": 0.024271895067512907, "learning_rate": 1.843755072298383e-05, "loss": 0.0007, "step": 2129 }, { "epoch": 4.05, "grad_norm": 0.034391872200718314, "learning_rate": 1.836635944485999e-05, "loss": 0.001, "step": 2130 }, { "epoch": 4.05, "grad_norm": 0.02713660983256368, "learning_rate": 1.829529197451666e-05, "loss": 0.001, "step": 2131 }, { "epoch": 4.05, "grad_norm": 0.039010931382664864, "learning_rate": 1.8224348419736693e-05, "loss": 0.0009, "step": 2132 }, { "epoch": 4.06, "grad_norm": 0.0236378724616161, "learning_rate": 1.815352888811498e-05, "loss": 0.0009, "step": 2133 }, { "epoch": 4.06, "grad_norm": 0.03979736363157825, "learning_rate": 1.8082833487058337e-05, "loss": 0.0009, "step": 2134 }, { "epoch": 4.06, "grad_norm": 0.3412062574176322, "learning_rate": 1.8012262323785323e-05, "loss": 0.0046, "step": 2135 }, { "epoch": 4.06, "grad_norm": 0.05746913459454494, "learning_rate": 1.7941815505326042e-05, "loss": 0.0031, "step": 2136 }, { "epoch": 4.06, "grad_norm": 0.06584462013769363, "learning_rate": 1.787149313852209e-05, "loss": 0.0024, "step": 2137 }, { "epoch": 4.06, "grad_norm": 0.10593237347661362, "learning_rate": 1.7801295330026223e-05, "loss": 0.0031, "step": 2138 }, { "epoch": 4.07, "grad_norm": 0.04542746334710252, "learning_rate": 1.7731222186302323e-05, "loss": 0.0014, "step": 2139 }, { "epoch": 4.07, "grad_norm": 0.1041673901902467, "learning_rate": 1.7661273813625256e-05, "loss": 0.0029, "step": 2140 }, { "epoch": 4.07, "grad_norm": 0.09196500481290533, "learning_rate": 1.7591450318080573e-05, "loss": 0.0015, "step": 2141 }, { "epoch": 4.07, "grad_norm": 0.010686776975271577, "learning_rate": 1.752175180556447e-05, "loss": 0.0005, "step": 2142 }, { "epoch": 4.07, "grad_norm": 0.02041008806581037, "learning_rate": 1.7452178381783613e-05, "loss": 0.0006, "step": 2143 }, { "epoch": 4.08, "grad_norm": 0.07559523840415752, "learning_rate": 1.7382730152254922e-05, "loss": 0.0029, "step": 2144 }, { "epoch": 4.08, "grad_norm": 0.03650943807464675, "learning_rate": 1.7313407222305456e-05, "loss": 0.0011, "step": 2145 }, { "epoch": 4.08, "grad_norm": 0.10815721694600308, "learning_rate": 1.7244209697072233e-05, "loss": 0.0014, "step": 2146 }, { "epoch": 4.08, "grad_norm": 0.024307087470174137, "learning_rate": 1.7175137681502085e-05, "loss": 0.0009, "step": 2147 }, { "epoch": 4.08, "grad_norm": 0.03478432264086617, "learning_rate": 1.71061912803515e-05, "loss": 0.0008, "step": 2148 }, { "epoch": 4.09, "grad_norm": 0.0711556888740473, "learning_rate": 1.703737059818643e-05, "loss": 0.0011, "step": 2149 }, { "epoch": 4.09, "grad_norm": 0.09023813502132623, "learning_rate": 1.6968675739382213e-05, "loss": 0.0014, "step": 2150 }, { "epoch": 4.09, "grad_norm": 0.018299833895395884, "learning_rate": 1.6900106808123307e-05, "loss": 0.0008, "step": 2151 }, { "epoch": 4.09, "grad_norm": 0.019014728480897945, "learning_rate": 1.6831663908403173e-05, "loss": 0.0005, "step": 2152 }, { "epoch": 4.09, "grad_norm": 0.035848106636017264, "learning_rate": 1.6763347144024198e-05, "loss": 0.0012, "step": 2153 }, { "epoch": 4.1, "grad_norm": 0.04934356603696483, "learning_rate": 1.669515661859743e-05, "loss": 0.0006, "step": 2154 }, { "epoch": 4.1, "grad_norm": 0.01742374710219539, "learning_rate": 1.6627092435542448e-05, "loss": 0.0006, "step": 2155 }, { "epoch": 4.1, "grad_norm": 0.06755597470378497, "learning_rate": 1.655915469808722e-05, "loss": 0.001, "step": 2156 }, { "epoch": 4.1, "grad_norm": 0.043706422068466366, "learning_rate": 1.6491343509267955e-05, "loss": 0.0017, "step": 2157 }, { "epoch": 4.1, "grad_norm": 0.042261824303225844, "learning_rate": 1.6423658971928923e-05, "loss": 0.0014, "step": 2158 }, { "epoch": 4.1, "grad_norm": 0.031508116540596855, "learning_rate": 1.6356101188722305e-05, "loss": 0.001, "step": 2159 }, { "epoch": 4.11, "grad_norm": 0.0542761855716672, "learning_rate": 1.6288670262108108e-05, "loss": 0.0012, "step": 2160 }, { "epoch": 4.11, "grad_norm": 0.005268449943186476, "learning_rate": 1.6221366294353857e-05, "loss": 0.0003, "step": 2161 }, { "epoch": 4.11, "grad_norm": 0.01821103350861815, "learning_rate": 1.6154189387534546e-05, "loss": 0.0006, "step": 2162 }, { "epoch": 4.11, "grad_norm": 0.03069365229537274, "learning_rate": 1.6087139643532533e-05, "loss": 0.0007, "step": 2163 }, { "epoch": 4.11, "grad_norm": 0.059439817908067544, "learning_rate": 1.6020217164037244e-05, "loss": 0.0018, "step": 2164 }, { "epoch": 4.12, "grad_norm": 0.01204095424100321, "learning_rate": 1.5953422050545098e-05, "loss": 0.0004, "step": 2165 }, { "epoch": 4.12, "grad_norm": 0.048935210558161385, "learning_rate": 1.5886754404359393e-05, "loss": 0.0009, "step": 2166 }, { "epoch": 4.12, "grad_norm": 0.05581307034706589, "learning_rate": 1.5820214326590066e-05, "loss": 0.0007, "step": 2167 }, { "epoch": 4.12, "grad_norm": 0.16323245707788916, "learning_rate": 1.575380191815361e-05, "loss": 0.0033, "step": 2168 }, { "epoch": 4.12, "grad_norm": 0.01434743635222807, "learning_rate": 1.5687517279772846e-05, "loss": 0.0006, "step": 2169 }, { "epoch": 4.13, "grad_norm": 0.02885433903772247, "learning_rate": 1.5621360511976857e-05, "loss": 0.0009, "step": 2170 }, { "epoch": 4.13, "grad_norm": 0.03197180268119516, "learning_rate": 1.5555331715100784e-05, "loss": 0.0005, "step": 2171 }, { "epoch": 4.13, "grad_norm": 0.04238754643747755, "learning_rate": 1.548943098928566e-05, "loss": 0.0009, "step": 2172 }, { "epoch": 4.13, "grad_norm": 0.05993482187442768, "learning_rate": 1.5423658434478338e-05, "loss": 0.0016, "step": 2173 }, { "epoch": 4.13, "grad_norm": 0.030837239576284328, "learning_rate": 1.535801415043123e-05, "loss": 0.0011, "step": 2174 }, { "epoch": 4.13, "grad_norm": 0.02301510829768239, "learning_rate": 1.5292498236702213e-05, "loss": 0.0007, "step": 2175 }, { "epoch": 4.14, "grad_norm": 0.03633628857337952, "learning_rate": 1.522711079265452e-05, "loss": 0.001, "step": 2176 }, { "epoch": 4.14, "grad_norm": 0.02467291281558659, "learning_rate": 1.5161851917456494e-05, "loss": 0.001, "step": 2177 }, { "epoch": 4.14, "grad_norm": 0.11638744895491956, "learning_rate": 1.5096721710081507e-05, "loss": 0.0018, "step": 2178 }, { "epoch": 4.14, "grad_norm": 0.028642741301740222, "learning_rate": 1.5031720269307792e-05, "loss": 0.0007, "step": 2179 }, { "epoch": 4.14, "grad_norm": 0.03780352398194929, "learning_rate": 1.4966847693718279e-05, "loss": 0.0009, "step": 2180 }, { "epoch": 4.15, "grad_norm": 0.1796541784542282, "learning_rate": 1.4902104081700441e-05, "loss": 0.0025, "step": 2181 }, { "epoch": 4.15, "grad_norm": 0.1868047208620871, "learning_rate": 1.4837489531446237e-05, "loss": 0.0022, "step": 2182 }, { "epoch": 4.15, "grad_norm": 0.4007672830879667, "learning_rate": 1.4773004140951807e-05, "loss": 0.0044, "step": 2183 }, { "epoch": 4.15, "grad_norm": 0.022167126631072384, "learning_rate": 1.4708648008017444e-05, "loss": 0.0008, "step": 2184 }, { "epoch": 4.15, "grad_norm": 0.08333022940524783, "learning_rate": 1.4644421230247374e-05, "loss": 0.0032, "step": 2185 }, { "epoch": 4.16, "grad_norm": 0.041903388699391995, "learning_rate": 1.4580323905049686e-05, "loss": 0.0009, "step": 2186 }, { "epoch": 4.16, "grad_norm": 0.02540920941064506, "learning_rate": 1.451635612963611e-05, "loss": 0.0009, "step": 2187 }, { "epoch": 4.16, "grad_norm": 0.010028674723647294, "learning_rate": 1.4452518001021864e-05, "loss": 0.0004, "step": 2188 }, { "epoch": 4.16, "grad_norm": 0.12915832262561266, "learning_rate": 1.4388809616025622e-05, "loss": 0.0022, "step": 2189 }, { "epoch": 4.16, "grad_norm": 0.025920346047791757, "learning_rate": 1.4325231071269218e-05, "loss": 0.0008, "step": 2190 }, { "epoch": 4.17, "grad_norm": 0.02292175422092031, "learning_rate": 1.4261782463177587e-05, "loss": 0.0006, "step": 2191 }, { "epoch": 4.17, "grad_norm": 0.10373020930743873, "learning_rate": 1.4198463887978596e-05, "loss": 0.0037, "step": 2192 }, { "epoch": 4.17, "grad_norm": 0.02475583174173045, "learning_rate": 1.41352754417029e-05, "loss": 0.0008, "step": 2193 }, { "epoch": 4.17, "grad_norm": 0.010136513290601555, "learning_rate": 1.4072217220183793e-05, "loss": 0.0004, "step": 2194 }, { "epoch": 4.17, "grad_norm": 0.033407724064127536, "learning_rate": 1.4009289319057118e-05, "loss": 0.0006, "step": 2195 }, { "epoch": 4.17, "grad_norm": 0.01956183863715537, "learning_rate": 1.3946491833760988e-05, "loss": 0.0006, "step": 2196 }, { "epoch": 4.18, "grad_norm": 0.017611462212393427, "learning_rate": 1.3883824859535765e-05, "loss": 0.0006, "step": 2197 }, { "epoch": 4.18, "grad_norm": 0.011941603398477036, "learning_rate": 1.3821288491423867e-05, "loss": 0.0005, "step": 2198 }, { "epoch": 4.18, "grad_norm": 0.025113705202182326, "learning_rate": 1.3758882824269659e-05, "loss": 0.0006, "step": 2199 }, { "epoch": 4.18, "grad_norm": 0.06845660984592719, "learning_rate": 1.3696607952719253e-05, "loss": 0.0012, "step": 2200 }, { "epoch": 4.18, "eval_blimp_filtered_avg": 0.7113432835820895, "eval_blimp_filtered_std": 0.005007223451132713, "step": 2200 }, { "epoch": 4.18, "eval_blimp_supplement_avg": 0.8103448275862069, "eval_blimp_supplement_std": 0.0170726829875934, "step": 2200 }, { "epoch": 4.18, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 2200 }, { "epoch": 4.18, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 2200 }, { "epoch": 4.18, "grad_norm": 0.023211075279813215, "learning_rate": 1.3634463971220402e-05, "loss": 0.0005, "step": 2201 }, { "epoch": 4.19, "grad_norm": 0.021743351771981466, "learning_rate": 1.3572450974022321e-05, "loss": 0.0009, "step": 2202 }, { "epoch": 4.19, "grad_norm": 0.016197899875574382, "learning_rate": 1.3510569055175603e-05, "loss": 0.0005, "step": 2203 }, { "epoch": 4.19, "grad_norm": 0.03175858761485727, "learning_rate": 1.3448818308532029e-05, "loss": 0.0009, "step": 2204 }, { "epoch": 4.19, "grad_norm": 0.044805559420463416, "learning_rate": 1.3387198827744473e-05, "loss": 0.0009, "step": 2205 }, { "epoch": 4.19, "grad_norm": 0.10279043886756811, "learning_rate": 1.3325710706266692e-05, "loss": 0.0014, "step": 2206 }, { "epoch": 4.2, "grad_norm": 0.02467847749963854, "learning_rate": 1.326435403735321e-05, "loss": 0.0007, "step": 2207 }, { "epoch": 4.2, "grad_norm": 0.010984586508518232, "learning_rate": 1.3203128914059194e-05, "loss": 0.0004, "step": 2208 }, { "epoch": 4.2, "grad_norm": 0.02722337015465194, "learning_rate": 1.3142035429240373e-05, "loss": 0.0011, "step": 2209 }, { "epoch": 4.2, "grad_norm": 0.15902041589098054, "learning_rate": 1.3081073675552735e-05, "loss": 0.0006, "step": 2210 }, { "epoch": 4.2, "grad_norm": 0.06442983169782598, "learning_rate": 1.3020243745452498e-05, "loss": 0.0014, "step": 2211 }, { "epoch": 4.21, "grad_norm": 0.045487256363298076, "learning_rate": 1.2959545731196032e-05, "loss": 0.0015, "step": 2212 }, { "epoch": 4.21, "grad_norm": 0.02670170080512995, "learning_rate": 1.2898979724839545e-05, "loss": 0.0006, "step": 2213 }, { "epoch": 4.21, "grad_norm": 0.016159138212590386, "learning_rate": 1.283854581823909e-05, "loss": 0.0005, "step": 2214 }, { "epoch": 4.21, "grad_norm": 0.08080786755340247, "learning_rate": 1.2778244103050341e-05, "loss": 0.0031, "step": 2215 }, { "epoch": 4.21, "grad_norm": 0.07231434741758229, "learning_rate": 1.271807467072852e-05, "loss": 0.0029, "step": 2216 }, { "epoch": 4.21, "grad_norm": 0.046246360157070066, "learning_rate": 1.2658037612528184e-05, "loss": 0.001, "step": 2217 }, { "epoch": 4.22, "grad_norm": 0.014579787572196998, "learning_rate": 1.2598133019503189e-05, "loss": 0.0004, "step": 2218 }, { "epoch": 4.22, "grad_norm": 0.023023522987247573, "learning_rate": 1.2538360982506459e-05, "loss": 0.0008, "step": 2219 }, { "epoch": 4.22, "grad_norm": 0.041980533042016197, "learning_rate": 1.2478721592189846e-05, "loss": 0.0009, "step": 2220 }, { "epoch": 4.22, "grad_norm": 0.06749635580897663, "learning_rate": 1.2419214939004065e-05, "loss": 0.0032, "step": 2221 }, { "epoch": 4.22, "grad_norm": 0.06044225164599959, "learning_rate": 1.2359841113198534e-05, "loss": 0.0009, "step": 2222 }, { "epoch": 4.23, "grad_norm": 0.017926206597966787, "learning_rate": 1.2300600204821178e-05, "loss": 0.0007, "step": 2223 }, { "epoch": 4.23, "grad_norm": 0.3746163684573489, "learning_rate": 1.2241492303718372e-05, "loss": 0.0031, "step": 2224 }, { "epoch": 4.23, "grad_norm": 0.06469539663921119, "learning_rate": 1.2182517499534728e-05, "loss": 0.0014, "step": 2225 }, { "epoch": 4.23, "grad_norm": 0.07699499224776057, "learning_rate": 1.2123675881713048e-05, "loss": 0.0017, "step": 2226 }, { "epoch": 4.23, "grad_norm": 0.02087287442704627, "learning_rate": 1.2064967539494087e-05, "loss": 0.0005, "step": 2227 }, { "epoch": 4.24, "grad_norm": 0.025692218683222783, "learning_rate": 1.2006392561916535e-05, "loss": 0.0005, "step": 2228 }, { "epoch": 4.24, "grad_norm": 0.015168296168479426, "learning_rate": 1.1947951037816762e-05, "loss": 0.0005, "step": 2229 }, { "epoch": 4.24, "grad_norm": 0.02748066566757391, "learning_rate": 1.1889643055828758e-05, "loss": 0.001, "step": 2230 }, { "epoch": 4.24, "grad_norm": 0.017543296419526445, "learning_rate": 1.1831468704383997e-05, "loss": 0.0006, "step": 2231 }, { "epoch": 4.24, "grad_norm": 0.04038040410045484, "learning_rate": 1.1773428071711256e-05, "loss": 0.0009, "step": 2232 }, { "epoch": 4.25, "grad_norm": 0.13565157862186356, "learning_rate": 1.1715521245836524e-05, "loss": 0.0009, "step": 2233 }, { "epoch": 4.25, "grad_norm": 0.01676188380404595, "learning_rate": 1.1657748314582851e-05, "loss": 0.0005, "step": 2234 }, { "epoch": 4.25, "grad_norm": 0.09308468707616276, "learning_rate": 1.1600109365570234e-05, "loss": 0.0016, "step": 2235 }, { "epoch": 4.25, "grad_norm": 0.010363078306186676, "learning_rate": 1.1542604486215458e-05, "loss": 0.0004, "step": 2236 }, { "epoch": 4.25, "grad_norm": 0.21410616437679444, "learning_rate": 1.1485233763731961e-05, "loss": 0.0013, "step": 2237 }, { "epoch": 4.25, "grad_norm": 0.03253620788138145, "learning_rate": 1.1427997285129743e-05, "loss": 0.0007, "step": 2238 }, { "epoch": 4.26, "grad_norm": 0.027655233516539555, "learning_rate": 1.1370895137215176e-05, "loss": 0.0008, "step": 2239 }, { "epoch": 4.26, "grad_norm": 0.01722467147336098, "learning_rate": 1.1313927406590908e-05, "loss": 0.0005, "step": 2240 }, { "epoch": 4.26, "grad_norm": 0.01968236138781447, "learning_rate": 1.1257094179655769e-05, "loss": 0.0006, "step": 2241 }, { "epoch": 4.26, "grad_norm": 0.045684068046885275, "learning_rate": 1.1200395542604547e-05, "loss": 0.0012, "step": 2242 }, { "epoch": 4.26, "grad_norm": 0.04426575686826035, "learning_rate": 1.1143831581427889e-05, "loss": 0.0013, "step": 2243 }, { "epoch": 4.27, "grad_norm": 0.011502032969880317, "learning_rate": 1.1087402381912293e-05, "loss": 0.0004, "step": 2244 }, { "epoch": 4.27, "grad_norm": 0.012917952856439157, "learning_rate": 1.1031108029639758e-05, "loss": 0.0005, "step": 2245 }, { "epoch": 4.27, "grad_norm": 0.05735180073299798, "learning_rate": 1.0974948609987823e-05, "loss": 0.0014, "step": 2246 }, { "epoch": 4.27, "grad_norm": 0.03476269567588729, "learning_rate": 1.0918924208129389e-05, "loss": 0.0007, "step": 2247 }, { "epoch": 4.27, "grad_norm": 0.044019130151903355, "learning_rate": 1.0863034909032566e-05, "loss": 0.0008, "step": 2248 }, { "epoch": 4.28, "grad_norm": 0.026438502748404636, "learning_rate": 1.0807280797460572e-05, "loss": 0.0007, "step": 2249 }, { "epoch": 4.28, "grad_norm": 0.05459544015728906, "learning_rate": 1.0751661957971582e-05, "loss": 0.0021, "step": 2250 }, { "epoch": 4.28, "grad_norm": 0.01829272874365218, "learning_rate": 1.069617847491866e-05, "loss": 0.0007, "step": 2251 }, { "epoch": 4.28, "grad_norm": 0.04489059864173249, "learning_rate": 1.0640830432449534e-05, "loss": 0.001, "step": 2252 }, { "epoch": 4.28, "grad_norm": 0.02424797898093633, "learning_rate": 1.0585617914506529e-05, "loss": 0.0004, "step": 2253 }, { "epoch": 4.29, "grad_norm": 0.03023289943477459, "learning_rate": 1.0530541004826455e-05, "loss": 0.0008, "step": 2254 }, { "epoch": 4.29, "grad_norm": 0.05577576045849193, "learning_rate": 1.0475599786940438e-05, "loss": 0.0011, "step": 2255 }, { "epoch": 4.29, "grad_norm": 0.023567108974518344, "learning_rate": 1.0420794344173812e-05, "loss": 0.0007, "step": 2256 }, { "epoch": 4.29, "grad_norm": 0.0061855596756405316, "learning_rate": 1.0366124759645957e-05, "loss": 0.0003, "step": 2257 }, { "epoch": 4.29, "grad_norm": 0.11735199444729304, "learning_rate": 1.031159111627028e-05, "loss": 0.0019, "step": 2258 }, { "epoch": 4.29, "grad_norm": 0.16992243329570939, "learning_rate": 1.025719349675396e-05, "loss": 0.004, "step": 2259 }, { "epoch": 4.3, "grad_norm": 0.11783092589962654, "learning_rate": 1.0202931983597896e-05, "loss": 0.001, "step": 2260 }, { "epoch": 4.3, "grad_norm": 0.051425100774424755, "learning_rate": 1.0148806659096555e-05, "loss": 0.0011, "step": 2261 }, { "epoch": 4.3, "grad_norm": 0.03212252580554443, "learning_rate": 1.009481760533787e-05, "loss": 0.0009, "step": 2262 }, { "epoch": 4.3, "grad_norm": 0.05909699825523043, "learning_rate": 1.0040964904203076e-05, "loss": 0.0017, "step": 2263 }, { "epoch": 4.3, "grad_norm": 0.021683654283113356, "learning_rate": 9.987248637366664e-06, "loss": 0.0008, "step": 2264 }, { "epoch": 4.31, "grad_norm": 0.027646669112229606, "learning_rate": 9.933668886296155e-06, "loss": 0.0005, "step": 2265 }, { "epoch": 4.31, "grad_norm": 0.015563462159981327, "learning_rate": 9.880225732252035e-06, "loss": 0.0004, "step": 2266 }, { "epoch": 4.31, "grad_norm": 0.046722108674008264, "learning_rate": 9.82691925628766e-06, "loss": 0.0011, "step": 2267 }, { "epoch": 4.31, "grad_norm": 0.027101823304647422, "learning_rate": 9.773749539249055e-06, "loss": 0.0006, "step": 2268 }, { "epoch": 4.31, "grad_norm": 0.05534943978982573, "learning_rate": 9.720716661774843e-06, "loss": 0.002, "step": 2269 }, { "epoch": 4.32, "grad_norm": 0.009690000393852425, "learning_rate": 9.667820704296116e-06, "loss": 0.0004, "step": 2270 }, { "epoch": 4.32, "grad_norm": 0.04698176262346315, "learning_rate": 9.615061747036314e-06, "loss": 0.0005, "step": 2271 }, { "epoch": 4.32, "grad_norm": 0.023757508417077196, "learning_rate": 9.562439870011098e-06, "loss": 0.0005, "step": 2272 }, { "epoch": 4.32, "grad_norm": 0.032016316484417925, "learning_rate": 9.509955153028193e-06, "loss": 0.0009, "step": 2273 }, { "epoch": 4.32, "grad_norm": 0.05200286089748206, "learning_rate": 9.457607675687374e-06, "loss": 0.0013, "step": 2274 }, { "epoch": 4.33, "grad_norm": 0.07514387447650506, "learning_rate": 9.405397517380233e-06, "loss": 0.001, "step": 2275 }, { "epoch": 4.33, "grad_norm": 0.013716600658865477, "learning_rate": 9.353324757290082e-06, "loss": 0.0004, "step": 2276 }, { "epoch": 4.33, "grad_norm": 0.021430973291896718, "learning_rate": 9.301389474391897e-06, "loss": 0.0006, "step": 2277 }, { "epoch": 4.33, "grad_norm": 0.019042121301878056, "learning_rate": 9.249591747452124e-06, "loss": 0.0005, "step": 2278 }, { "epoch": 4.33, "grad_norm": 0.07460660899321149, "learning_rate": 9.197931655028558e-06, "loss": 0.001, "step": 2279 }, { "epoch": 4.33, "grad_norm": 0.012326423732101247, "learning_rate": 9.146409275470346e-06, "loss": 0.0004, "step": 2280 }, { "epoch": 4.34, "grad_norm": 0.015177406902266478, "learning_rate": 9.095024686917687e-06, "loss": 0.0004, "step": 2281 }, { "epoch": 4.34, "grad_norm": 0.009765117329358846, "learning_rate": 9.043777967301836e-06, "loss": 0.0004, "step": 2282 }, { "epoch": 4.34, "grad_norm": 0.056348636809888414, "learning_rate": 8.99266919434495e-06, "loss": 0.001, "step": 2283 }, { "epoch": 4.34, "grad_norm": 0.017156826426080473, "learning_rate": 8.941698445559965e-06, "loss": 0.0007, "step": 2284 }, { "epoch": 4.34, "grad_norm": 0.022317465634687367, "learning_rate": 8.890865798250503e-06, "loss": 0.0005, "step": 2285 }, { "epoch": 4.35, "grad_norm": 0.058173080898459725, "learning_rate": 8.840171329510705e-06, "loss": 0.001, "step": 2286 }, { "epoch": 4.35, "grad_norm": 0.02308522600062027, "learning_rate": 8.789615116225213e-06, "loss": 0.0007, "step": 2287 }, { "epoch": 4.35, "grad_norm": 0.03144886308695512, "learning_rate": 8.739197235068918e-06, "loss": 0.0009, "step": 2288 }, { "epoch": 4.35, "grad_norm": 0.03065071956137382, "learning_rate": 8.68891776250691e-06, "loss": 0.0008, "step": 2289 }, { "epoch": 4.35, "grad_norm": 0.013498540600616275, "learning_rate": 8.638776774794454e-06, "loss": 0.0005, "step": 2290 }, { "epoch": 4.36, "grad_norm": 0.02267759437385857, "learning_rate": 8.58877434797668e-06, "loss": 0.0007, "step": 2291 }, { "epoch": 4.36, "grad_norm": 0.020068140010749476, "learning_rate": 8.538910557888635e-06, "loss": 0.0005, "step": 2292 }, { "epoch": 4.36, "grad_norm": 0.019758579432176977, "learning_rate": 8.489185480155082e-06, "loss": 0.0005, "step": 2293 }, { "epoch": 4.36, "grad_norm": 0.0067021153192553945, "learning_rate": 8.439599190190417e-06, "loss": 0.0003, "step": 2294 }, { "epoch": 4.36, "grad_norm": 0.06291586245509134, "learning_rate": 8.390151763198528e-06, "loss": 0.001, "step": 2295 }, { "epoch": 4.37, "grad_norm": 0.035544667453152615, "learning_rate": 8.340843274172728e-06, "loss": 0.0008, "step": 2296 }, { "epoch": 4.37, "grad_norm": 0.056240380246989095, "learning_rate": 8.291673797895616e-06, "loss": 0.0016, "step": 2297 }, { "epoch": 4.37, "grad_norm": 0.14709299344430404, "learning_rate": 8.242643408938922e-06, "loss": 0.003, "step": 2298 }, { "epoch": 4.37, "grad_norm": 0.032492592558725844, "learning_rate": 8.193752181663461e-06, "loss": 0.0008, "step": 2299 }, { "epoch": 4.37, "grad_norm": 0.059510887904614375, "learning_rate": 8.145000190219e-06, "loss": 0.0012, "step": 2300 }, { "epoch": 4.37, "eval_blimp_filtered_avg": 0.7105970149253731, "eval_blimp_filtered_std": 0.005019454245014997, "step": 2300 }, { "epoch": 4.37, "eval_blimp_supplement_avg": 0.8103448275862069, "eval_blimp_supplement_std": 0.017078440291583386, "step": 2300 }, { "epoch": 4.37, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 2300 }, { "epoch": 4.37, "eval_winoground_filtered_avg": 0.63, "eval_winoground_filtered_std": 0.048523658709391, "step": 2300 }, { "epoch": 4.37, "grad_norm": 0.026751576593003444, "learning_rate": 8.096387508544123e-06, "loss": 0.0006, "step": 2301 }, { "epoch": 4.38, "grad_norm": 0.03355490009043517, "learning_rate": 8.047914210366104e-06, "loss": 0.0007, "step": 2302 }, { "epoch": 4.38, "grad_norm": 0.12047766532822138, "learning_rate": 7.999580369200898e-06, "loss": 0.0014, "step": 2303 }, { "epoch": 4.38, "grad_norm": 0.22488611818838974, "learning_rate": 7.951386058352894e-06, "loss": 0.0038, "step": 2304 }, { "epoch": 4.38, "grad_norm": 0.020484803729220128, "learning_rate": 7.903331350914867e-06, "loss": 0.0006, "step": 2305 }, { "epoch": 4.38, "grad_norm": 0.031146828915640922, "learning_rate": 7.855416319767905e-06, "loss": 0.0006, "step": 2306 }, { "epoch": 4.39, "grad_norm": 0.059892263305618046, "learning_rate": 7.807641037581226e-06, "loss": 0.0012, "step": 2307 }, { "epoch": 4.39, "grad_norm": 0.017479959286144084, "learning_rate": 7.760005576812112e-06, "loss": 0.0006, "step": 2308 }, { "epoch": 4.39, "grad_norm": 0.02295839382988013, "learning_rate": 7.712510009705764e-06, "loss": 0.0007, "step": 2309 }, { "epoch": 4.39, "grad_norm": 0.016874918899971038, "learning_rate": 7.665154408295283e-06, "loss": 0.0005, "step": 2310 }, { "epoch": 4.39, "grad_norm": 0.04314407547570439, "learning_rate": 7.617938844401429e-06, "loss": 0.001, "step": 2311 }, { "epoch": 4.4, "grad_norm": 0.0856714094055771, "learning_rate": 7.570863389632588e-06, "loss": 0.001, "step": 2312 }, { "epoch": 4.4, "grad_norm": 0.009092261693978806, "learning_rate": 7.523928115384682e-06, "loss": 0.0003, "step": 2313 }, { "epoch": 4.4, "grad_norm": 0.03340653111451234, "learning_rate": 7.477133092841027e-06, "loss": 0.0007, "step": 2314 }, { "epoch": 4.4, "grad_norm": 0.034133939861753664, "learning_rate": 7.430478392972184e-06, "loss": 0.0006, "step": 2315 }, { "epoch": 4.4, "grad_norm": 0.015429907243243002, "learning_rate": 7.3839640865359435e-06, "loss": 0.0005, "step": 2316 }, { "epoch": 4.4, "grad_norm": 0.02707599493958444, "learning_rate": 7.3375902440771635e-06, "loss": 0.0006, "step": 2317 }, { "epoch": 4.41, "grad_norm": 0.06732210370913361, "learning_rate": 7.291356935927651e-06, "loss": 0.001, "step": 2318 }, { "epoch": 4.41, "grad_norm": 0.01737054068856081, "learning_rate": 7.245264232206073e-06, "loss": 0.0006, "step": 2319 }, { "epoch": 4.41, "grad_norm": 0.013471569351705863, "learning_rate": 7.199312202817909e-06, "loss": 0.0005, "step": 2320 }, { "epoch": 4.41, "grad_norm": 0.17169435209357026, "learning_rate": 7.153500917455225e-06, "loss": 0.0032, "step": 2321 }, { "epoch": 4.41, "grad_norm": 0.02569821472252798, "learning_rate": 7.107830445596631e-06, "loss": 0.0008, "step": 2322 }, { "epoch": 4.42, "grad_norm": 0.028585291702777623, "learning_rate": 7.062300856507231e-06, "loss": 0.0008, "step": 2323 }, { "epoch": 4.42, "grad_norm": 0.013606631577130563, "learning_rate": 7.0169122192384144e-06, "loss": 0.0004, "step": 2324 }, { "epoch": 4.42, "grad_norm": 0.024782482591790132, "learning_rate": 6.971664602627792e-06, "loss": 0.0007, "step": 2325 }, { "epoch": 4.42, "grad_norm": 0.09410860782147326, "learning_rate": 6.9265580752991495e-06, "loss": 0.0041, "step": 2326 }, { "epoch": 4.42, "grad_norm": 0.12385762362065939, "learning_rate": 6.881592705662265e-06, "loss": 0.003, "step": 2327 }, { "epoch": 4.43, "grad_norm": 0.024585772557670586, "learning_rate": 6.836768561912798e-06, "loss": 0.0006, "step": 2328 }, { "epoch": 4.43, "grad_norm": 0.0646493070640954, "learning_rate": 6.792085712032281e-06, "loss": 0.0008, "step": 2329 }, { "epoch": 4.43, "grad_norm": 0.07101098245826719, "learning_rate": 6.747544223787916e-06, "loss": 0.0007, "step": 2330 }, { "epoch": 4.43, "grad_norm": 0.026334898796943024, "learning_rate": 6.7031441647325335e-06, "loss": 0.0004, "step": 2331 }, { "epoch": 4.43, "grad_norm": 0.03997174981043806, "learning_rate": 6.658885602204446e-06, "loss": 0.0012, "step": 2332 }, { "epoch": 4.44, "grad_norm": 0.012754892703396503, "learning_rate": 6.614768603327393e-06, "loss": 0.0004, "step": 2333 }, { "epoch": 4.44, "grad_norm": 0.1550587335125986, "learning_rate": 6.570793235010408e-06, "loss": 0.0041, "step": 2334 }, { "epoch": 4.44, "grad_norm": 0.02902134237674221, "learning_rate": 6.52695956394771e-06, "loss": 0.0007, "step": 2335 }, { "epoch": 4.44, "grad_norm": 0.03690222250494539, "learning_rate": 6.483267656618641e-06, "loss": 0.0011, "step": 2336 }, { "epoch": 4.44, "grad_norm": 0.05471156890852082, "learning_rate": 6.439717579287518e-06, "loss": 0.0014, "step": 2337 }, { "epoch": 4.44, "grad_norm": 0.03453721323449001, "learning_rate": 6.396309398003564e-06, "loss": 0.0012, "step": 2338 }, { "epoch": 4.45, "grad_norm": 0.21357605957497988, "learning_rate": 6.3530431786007926e-06, "loss": 0.0041, "step": 2339 }, { "epoch": 4.45, "grad_norm": 0.007420639769343339, "learning_rate": 6.309918986697916e-06, "loss": 0.0003, "step": 2340 }, { "epoch": 4.45, "grad_norm": 0.04389259121910479, "learning_rate": 6.26693688769825e-06, "loss": 0.0005, "step": 2341 }, { "epoch": 4.45, "grad_norm": 0.0962052648249097, "learning_rate": 6.224096946789615e-06, "loss": 0.0023, "step": 2342 }, { "epoch": 4.45, "grad_norm": 0.014639990789130697, "learning_rate": 6.181399228944218e-06, "loss": 0.0004, "step": 2343 }, { "epoch": 4.46, "grad_norm": 0.03637920566710083, "learning_rate": 6.138843798918558e-06, "loss": 0.0006, "step": 2344 }, { "epoch": 4.46, "grad_norm": 0.04228135037210124, "learning_rate": 6.096430721253343e-06, "loss": 0.0011, "step": 2345 }, { "epoch": 4.46, "grad_norm": 0.03587273244980655, "learning_rate": 6.054160060273406e-06, "loss": 0.0009, "step": 2346 }, { "epoch": 4.46, "grad_norm": 0.02341599825906793, "learning_rate": 6.012031880087576e-06, "loss": 0.0006, "step": 2347 }, { "epoch": 4.46, "grad_norm": 0.03284541391083857, "learning_rate": 5.970046244588557e-06, "loss": 0.0009, "step": 2348 }, { "epoch": 4.47, "grad_norm": 0.049321557230435686, "learning_rate": 5.928203217452944e-06, "loss": 0.0004, "step": 2349 }, { "epoch": 4.47, "grad_norm": 0.01831657623868921, "learning_rate": 5.886502862140952e-06, "loss": 0.0006, "step": 2350 }, { "epoch": 4.47, "grad_norm": 0.0790216221960292, "learning_rate": 5.844945241896505e-06, "loss": 0.001, "step": 2351 }, { "epoch": 4.47, "grad_norm": 0.028551676422318543, "learning_rate": 5.803530419746972e-06, "loss": 0.0008, "step": 2352 }, { "epoch": 4.47, "grad_norm": 0.012348920026924684, "learning_rate": 5.762258458503223e-06, "loss": 0.0004, "step": 2353 }, { "epoch": 4.48, "grad_norm": 0.058336081988530866, "learning_rate": 5.721129420759386e-06, "loss": 0.0014, "step": 2354 }, { "epoch": 4.48, "grad_norm": 0.02784386556666216, "learning_rate": 5.68014336889291e-06, "loss": 0.0009, "step": 2355 }, { "epoch": 4.48, "grad_norm": 0.009617324065452341, "learning_rate": 5.639300365064337e-06, "loss": 0.0003, "step": 2356 }, { "epoch": 4.48, "grad_norm": 0.08794322634382945, "learning_rate": 5.598600471217253e-06, "loss": 0.0032, "step": 2357 }, { "epoch": 4.48, "grad_norm": 0.10575686980697799, "learning_rate": 5.558043749078213e-06, "loss": 0.0044, "step": 2358 }, { "epoch": 4.48, "grad_norm": 0.027517337110599036, "learning_rate": 5.517630260156659e-06, "loss": 0.0008, "step": 2359 }, { "epoch": 4.49, "grad_norm": 0.034306344129642945, "learning_rate": 5.477360065744764e-06, "loss": 0.0015, "step": 2360 }, { "epoch": 4.49, "grad_norm": 0.010395196390792226, "learning_rate": 5.437233226917393e-06, "loss": 0.0003, "step": 2361 }, { "epoch": 4.49, "grad_norm": 0.031779334376125984, "learning_rate": 5.39724980453199e-06, "loss": 0.0008, "step": 2362 }, { "epoch": 4.49, "grad_norm": 0.05400015884704923, "learning_rate": 5.3574098592284906e-06, "loss": 0.0013, "step": 2363 }, { "epoch": 4.49, "grad_norm": 0.019814306302162712, "learning_rate": 5.317713451429218e-06, "loss": 0.0004, "step": 2364 }, { "epoch": 4.5, "grad_norm": 0.09407480797269273, "learning_rate": 5.2781606413388475e-06, "loss": 0.0008, "step": 2365 }, { "epoch": 4.5, "grad_norm": 0.01602363078536408, "learning_rate": 5.2387514889442045e-06, "loss": 0.0004, "step": 2366 }, { "epoch": 4.5, "grad_norm": 0.009243200129207317, "learning_rate": 5.199486054014291e-06, "loss": 0.0003, "step": 2367 }, { "epoch": 4.5, "grad_norm": 0.01777300996342488, "learning_rate": 5.160364396100115e-06, "loss": 0.0005, "step": 2368 }, { "epoch": 4.5, "grad_norm": 0.09829905173937997, "learning_rate": 5.121386574534648e-06, "loss": 0.0013, "step": 2369 }, { "epoch": 4.51, "grad_norm": 0.016538698528417082, "learning_rate": 5.082552648432693e-06, "loss": 0.0005, "step": 2370 }, { "epoch": 4.51, "grad_norm": 0.0937277485817206, "learning_rate": 5.043862676690825e-06, "loss": 0.0022, "step": 2371 }, { "epoch": 4.51, "grad_norm": 0.01721930405850343, "learning_rate": 5.005316717987329e-06, "loss": 0.0005, "step": 2372 }, { "epoch": 4.51, "grad_norm": 0.03625908662810159, "learning_rate": 4.96691483078201e-06, "loss": 0.0005, "step": 2373 }, { "epoch": 4.51, "grad_norm": 0.029826499160870608, "learning_rate": 4.928657073316234e-06, "loss": 0.0006, "step": 2374 }, { "epoch": 4.52, "grad_norm": 0.1633453619620533, "learning_rate": 4.890543503612733e-06, "loss": 0.0059, "step": 2375 }, { "epoch": 4.52, "grad_norm": 0.08189650024230923, "learning_rate": 4.852574179475566e-06, "loss": 0.0031, "step": 2376 }, { "epoch": 4.52, "grad_norm": 0.05477383525701842, "learning_rate": 4.8147491584900395e-06, "loss": 0.0005, "step": 2377 }, { "epoch": 4.52, "grad_norm": 0.10709664682525366, "learning_rate": 4.777068498022619e-06, "loss": 0.0007, "step": 2378 }, { "epoch": 4.52, "grad_norm": 0.029311574398358013, "learning_rate": 4.739532255220791e-06, "loss": 0.0007, "step": 2379 }, { "epoch": 4.52, "grad_norm": 0.0264332685198945, "learning_rate": 4.70214048701304e-06, "loss": 0.0006, "step": 2380 }, { "epoch": 4.53, "grad_norm": 0.02734604752030089, "learning_rate": 4.664893250108715e-06, "loss": 0.0006, "step": 2381 }, { "epoch": 4.53, "grad_norm": 0.10202724415922525, "learning_rate": 4.627790600998005e-06, "loss": 0.0014, "step": 2382 }, { "epoch": 4.53, "grad_norm": 0.010696733415847261, "learning_rate": 4.590832595951777e-06, "loss": 0.0003, "step": 2383 }, { "epoch": 4.53, "grad_norm": 0.01345908170586467, "learning_rate": 4.554019291021538e-06, "loss": 0.0004, "step": 2384 }, { "epoch": 4.53, "grad_norm": 0.026916324061976752, "learning_rate": 4.517350742039339e-06, "loss": 0.0007, "step": 2385 }, { "epoch": 4.54, "grad_norm": 0.04733761199802435, "learning_rate": 4.480827004617682e-06, "loss": 0.001, "step": 2386 }, { "epoch": 4.54, "grad_norm": 0.0085528530557041, "learning_rate": 4.4444481341494595e-06, "loss": 0.0003, "step": 2387 }, { "epoch": 4.54, "grad_norm": 0.06991959448470207, "learning_rate": 4.408214185807846e-06, "loss": 0.0008, "step": 2388 }, { "epoch": 4.54, "grad_norm": 0.044100731947637854, "learning_rate": 4.37212521454623e-06, "loss": 0.0006, "step": 2389 }, { "epoch": 4.54, "grad_norm": 0.13730703932743335, "learning_rate": 4.336181275098106e-06, "loss": 0.0025, "step": 2390 }, { "epoch": 4.55, "grad_norm": 0.11203174743956908, "learning_rate": 4.300382421977034e-06, "loss": 0.0029, "step": 2391 }, { "epoch": 4.55, "grad_norm": 0.10989208914941753, "learning_rate": 4.264728709476529e-06, "loss": 0.0033, "step": 2392 }, { "epoch": 4.55, "grad_norm": 0.01536066069225573, "learning_rate": 4.229220191669947e-06, "loss": 0.0006, "step": 2393 }, { "epoch": 4.55, "grad_norm": 0.03606634656630337, "learning_rate": 4.193856922410466e-06, "loss": 0.0007, "step": 2394 }, { "epoch": 4.55, "grad_norm": 0.06270051014705572, "learning_rate": 4.158638955331007e-06, "loss": 0.0008, "step": 2395 }, { "epoch": 4.56, "grad_norm": 0.031684472376423196, "learning_rate": 4.123566343844054e-06, "loss": 0.0007, "step": 2396 }, { "epoch": 4.56, "grad_norm": 0.008311884257506995, "learning_rate": 4.088639141141692e-06, "loss": 0.0003, "step": 2397 }, { "epoch": 4.56, "grad_norm": 0.016265496249226328, "learning_rate": 4.0538574001954485e-06, "loss": 0.0005, "step": 2398 }, { "epoch": 4.56, "grad_norm": 0.017666464097766885, "learning_rate": 4.01922117375626e-06, "loss": 0.0005, "step": 2399 }, { "epoch": 4.56, "grad_norm": 0.025244060985909905, "learning_rate": 3.98473051435434e-06, "loss": 0.0009, "step": 2400 }, { "epoch": 4.56, "eval_blimp_filtered_avg": 0.7113432835820895, "eval_blimp_filtered_std": 0.0050169656206006516, "step": 2400 }, { "epoch": 4.56, "eval_blimp_supplement_avg": 0.8125, "eval_blimp_supplement_std": 0.017057808126567407, "step": 2400 }, { "epoch": 4.56, "eval_vqa_filtered_avg": 0.57, "eval_vqa_filtered_std": 0.04975698519562427, "step": 2400 }, { "epoch": 4.56, "eval_winoground_filtered_avg": 0.64, "eval_winoground_filtered_std": 0.048241815132442176, "step": 2400 }, { "epoch": 4.56, "grad_norm": 0.04599108259755368, "learning_rate": 3.95038547429919e-06, "loss": 0.0018, "step": 2401 }, { "epoch": 4.57, "grad_norm": 0.028441406632249706, "learning_rate": 3.916186105679387e-06, "loss": 0.0013, "step": 2402 }, { "epoch": 4.57, "grad_norm": 0.18984073329878887, "learning_rate": 3.882132460362631e-06, "loss": 0.0025, "step": 2403 }, { "epoch": 4.57, "grad_norm": 0.032466836182799656, "learning_rate": 3.8482245899956085e-06, "loss": 0.0008, "step": 2404 }, { "epoch": 4.57, "grad_norm": 0.21937623976596987, "learning_rate": 3.814462546003894e-06, "loss": 0.0048, "step": 2405 }, { "epoch": 4.57, "grad_norm": 0.027755573848289913, "learning_rate": 3.780846379591929e-06, "loss": 0.0006, "step": 2406 }, { "epoch": 4.58, "grad_norm": 0.02185677546595959, "learning_rate": 3.7473761417428865e-06, "loss": 0.0005, "step": 2407 }, { "epoch": 4.58, "grad_norm": 0.03532133971072564, "learning_rate": 3.714051883218628e-06, "loss": 0.001, "step": 2408 }, { "epoch": 4.58, "grad_norm": 0.052059107963985475, "learning_rate": 3.6808736545596356e-06, "loss": 0.0013, "step": 2409 }, { "epoch": 4.58, "grad_norm": 0.019587898155203867, "learning_rate": 3.6478415060848812e-06, "loss": 0.0005, "step": 2410 }, { "epoch": 4.58, "grad_norm": 0.04686878448551844, "learning_rate": 3.6149554878918466e-06, "loss": 0.001, "step": 2411 }, { "epoch": 4.59, "grad_norm": 0.05051236209837026, "learning_rate": 3.582215649856335e-06, "loss": 0.001, "step": 2412 }, { "epoch": 4.59, "grad_norm": 0.0077058898779751705, "learning_rate": 3.549622041632461e-06, "loss": 0.0003, "step": 2413 }, { "epoch": 4.59, "grad_norm": 0.17071365765770594, "learning_rate": 3.5171747126525823e-06, "loss": 0.0013, "step": 2414 }, { "epoch": 4.59, "grad_norm": 0.08566458382899354, "learning_rate": 3.4848737121271903e-06, "loss": 0.0027, "step": 2415 }, { "epoch": 4.59, "grad_norm": 0.021279486442264257, "learning_rate": 3.4527190890448535e-06, "loss": 0.0007, "step": 2416 }, { "epoch": 4.6, "grad_norm": 0.08034899217031141, "learning_rate": 3.4207108921721296e-06, "loss": 0.0009, "step": 2417 }, { "epoch": 4.6, "grad_norm": 0.01509017358207429, "learning_rate": 3.388849170053532e-06, "loss": 0.0004, "step": 2418 }, { "epoch": 4.6, "grad_norm": 0.025694851175607183, "learning_rate": 3.357133971011395e-06, "loss": 0.0008, "step": 2419 }, { "epoch": 4.6, "grad_norm": 0.03585094608930215, "learning_rate": 3.3255653431458533e-06, "loss": 0.0008, "step": 2420 }, { "epoch": 4.6, "grad_norm": 0.03669830636563905, "learning_rate": 3.2941433343347205e-06, "loss": 0.0006, "step": 2421 }, { "epoch": 4.6, "grad_norm": 0.07152495062394419, "learning_rate": 3.2628679922334872e-06, "loss": 0.0019, "step": 2422 }, { "epoch": 4.61, "grad_norm": 0.19930380168278272, "learning_rate": 3.231739364275155e-06, "loss": 0.0012, "step": 2423 }, { "epoch": 4.61, "grad_norm": 0.05893247493902563, "learning_rate": 3.200757497670259e-06, "loss": 0.0012, "step": 2424 }, { "epoch": 4.61, "grad_norm": 0.025976418033222665, "learning_rate": 3.169922439406736e-06, "loss": 0.0006, "step": 2425 }, { "epoch": 4.61, "grad_norm": 0.019008599540742643, "learning_rate": 3.139234236249844e-06, "loss": 0.0007, "step": 2426 }, { "epoch": 4.61, "grad_norm": 0.0644693712036983, "learning_rate": 3.1086929347421635e-06, "loss": 0.0015, "step": 2427 }, { "epoch": 4.62, "grad_norm": 0.013417184975925407, "learning_rate": 3.0782985812034536e-06, "loss": 0.0005, "step": 2428 }, { "epoch": 4.62, "grad_norm": 0.09606988648963026, "learning_rate": 3.048051221730597e-06, "loss": 0.0023, "step": 2429 }, { "epoch": 4.62, "grad_norm": 0.01540367422738716, "learning_rate": 3.017950902197575e-06, "loss": 0.0004, "step": 2430 }, { "epoch": 4.62, "grad_norm": 0.03396755171696882, "learning_rate": 2.9879976682553157e-06, "loss": 0.0005, "step": 2431 }, { "epoch": 4.62, "grad_norm": 0.014120160130365437, "learning_rate": 2.958191565331725e-06, "loss": 0.0004, "step": 2432 }, { "epoch": 4.63, "grad_norm": 0.024542281548193705, "learning_rate": 2.9285326386315203e-06, "loss": 0.0011, "step": 2433 }, { "epoch": 4.63, "grad_norm": 0.0276721370829225, "learning_rate": 2.899020933136254e-06, "loss": 0.0006, "step": 2434 }, { "epoch": 4.63, "grad_norm": 0.06045306835871971, "learning_rate": 2.869656493604156e-06, "loss": 0.0008, "step": 2435 }, { "epoch": 4.63, "grad_norm": 0.038334954913440314, "learning_rate": 2.8404393645701243e-06, "loss": 0.0013, "step": 2436 }, { "epoch": 4.63, "grad_norm": 0.051888904963733704, "learning_rate": 2.8113695903456804e-06, "loss": 0.0007, "step": 2437 }, { "epoch": 4.63, "grad_norm": 0.049976266092226086, "learning_rate": 2.7824472150187907e-06, "loss": 0.0013, "step": 2438 }, { "epoch": 4.64, "grad_norm": 0.027071265219881933, "learning_rate": 2.753672282453912e-06, "loss": 0.0007, "step": 2439 }, { "epoch": 4.64, "grad_norm": 0.05366876143830362, "learning_rate": 2.7250448362919013e-06, "loss": 0.0014, "step": 2440 }, { "epoch": 4.64, "grad_norm": 0.05068048062607784, "learning_rate": 2.6965649199499064e-06, "loss": 0.0014, "step": 2441 }, { "epoch": 4.64, "grad_norm": 0.019382236715078598, "learning_rate": 2.6682325766213323e-06, "loss": 0.0006, "step": 2442 }, { "epoch": 4.64, "grad_norm": 0.02488462364409607, "learning_rate": 2.640047849275784e-06, "loss": 0.0005, "step": 2443 }, { "epoch": 4.65, "grad_norm": 0.02176805294130523, "learning_rate": 2.612010780658969e-06, "loss": 0.0004, "step": 2444 }, { "epoch": 4.65, "grad_norm": 0.026357061803990302, "learning_rate": 2.5841214132926728e-06, "loss": 0.0007, "step": 2445 }, { "epoch": 4.65, "grad_norm": 0.012138481259041808, "learning_rate": 2.5563797894746388e-06, "loss": 0.0004, "step": 2446 }, { "epoch": 4.65, "grad_norm": 0.04640745205879017, "learning_rate": 2.5287859512785894e-06, "loss": 0.0014, "step": 2447 }, { "epoch": 4.65, "grad_norm": 0.020982451519934702, "learning_rate": 2.5013399405540706e-06, "loss": 0.0006, "step": 2448 }, { "epoch": 4.66, "grad_norm": 0.0381215183005262, "learning_rate": 2.4740417989264408e-06, "loss": 0.001, "step": 2449 }, { "epoch": 4.66, "grad_norm": 0.025738543201325674, "learning_rate": 2.446891567796805e-06, "loss": 0.0006, "step": 2450 }, { "epoch": 4.66, "grad_norm": 0.13000448649034232, "learning_rate": 2.4198892883419256e-06, "loss": 0.0025, "step": 2451 }, { "epoch": 4.66, "grad_norm": 0.019105448548729462, "learning_rate": 2.39303500151421e-06, "loss": 0.0004, "step": 2452 }, { "epoch": 4.66, "grad_norm": 0.12527770474583733, "learning_rate": 2.3663287480415688e-06, "loss": 0.0047, "step": 2453 }, { "epoch": 4.67, "grad_norm": 0.06334427956111124, "learning_rate": 2.3397705684274353e-06, "loss": 0.0007, "step": 2454 }, { "epoch": 4.67, "grad_norm": 0.021950097494288417, "learning_rate": 2.3133605029506567e-06, "loss": 0.0004, "step": 2455 }, { "epoch": 4.67, "grad_norm": 0.18468854989678415, "learning_rate": 2.2870985916654487e-06, "loss": 0.0016, "step": 2456 }, { "epoch": 4.67, "grad_norm": 0.07045376732474712, "learning_rate": 2.2609848744013505e-06, "loss": 0.0019, "step": 2457 }, { "epoch": 4.67, "grad_norm": 0.030157693350919428, "learning_rate": 2.2350193907631157e-06, "loss": 0.0008, "step": 2458 }, { "epoch": 4.67, "grad_norm": 0.03403061485730726, "learning_rate": 2.2092021801306983e-06, "loss": 0.0007, "step": 2459 }, { "epoch": 4.68, "grad_norm": 0.008492783726407152, "learning_rate": 2.183533281659178e-06, "loss": 0.0003, "step": 2460 }, { "epoch": 4.68, "grad_norm": 0.10164541038133999, "learning_rate": 2.158012734278703e-06, "loss": 0.0052, "step": 2461 }, { "epoch": 4.68, "grad_norm": 0.02874170193943148, "learning_rate": 2.132640576694411e-06, "loss": 0.0008, "step": 2462 }, { "epoch": 4.68, "grad_norm": 0.015038451522168567, "learning_rate": 2.107416847386423e-06, "loss": 0.0004, "step": 2463 }, { "epoch": 4.68, "grad_norm": 0.008887645372581498, "learning_rate": 2.0823415846097037e-06, "loss": 0.0003, "step": 2464 }, { "epoch": 4.69, "grad_norm": 0.02263259366105849, "learning_rate": 2.057414826394077e-06, "loss": 0.0007, "step": 2465 }, { "epoch": 4.69, "grad_norm": 0.08224076508576018, "learning_rate": 2.0326366105441584e-06, "loss": 0.0012, "step": 2466 }, { "epoch": 4.69, "grad_norm": 0.031378108365815914, "learning_rate": 2.0080069746392314e-06, "loss": 0.0011, "step": 2467 }, { "epoch": 4.69, "grad_norm": 0.011865615273719352, "learning_rate": 1.983525956033272e-06, "loss": 0.0004, "step": 2468 }, { "epoch": 4.69, "grad_norm": 0.03533078426000643, "learning_rate": 1.9591935918548464e-06, "loss": 0.0007, "step": 2469 }, { "epoch": 4.7, "grad_norm": 0.02841422719002293, "learning_rate": 1.9350099190071026e-06, "loss": 0.0006, "step": 2470 }, { "epoch": 4.7, "grad_norm": 0.03139512942044064, "learning_rate": 1.9109749741676232e-06, "loss": 0.0011, "step": 2471 }, { "epoch": 4.7, "grad_norm": 0.010222091630202932, "learning_rate": 1.8870887937884606e-06, "loss": 0.0003, "step": 2472 }, { "epoch": 4.7, "grad_norm": 0.02682658144608497, "learning_rate": 1.8633514140960484e-06, "loss": 0.0008, "step": 2473 }, { "epoch": 4.7, "grad_norm": 0.033018276311175335, "learning_rate": 1.839762871091133e-06, "loss": 0.0007, "step": 2474 }, { "epoch": 4.71, "grad_norm": 0.011943880287337219, "learning_rate": 1.8163232005487418e-06, "loss": 0.0004, "step": 2475 }, { "epoch": 4.71, "grad_norm": 0.035723500714461835, "learning_rate": 1.793032438018083e-06, "loss": 0.001, "step": 2476 }, { "epoch": 4.71, "grad_norm": 0.12499858958472201, "learning_rate": 1.7698906188225895e-06, "loss": 0.001, "step": 2477 }, { "epoch": 4.71, "grad_norm": 0.08480431472297088, "learning_rate": 1.7468977780597528e-06, "loss": 0.001, "step": 2478 }, { "epoch": 4.71, "grad_norm": 0.12481871749678175, "learning_rate": 1.7240539506011234e-06, "loss": 0.0037, "step": 2479 }, { "epoch": 4.71, "grad_norm": 0.07155823447136185, "learning_rate": 1.7013591710922761e-06, "loss": 0.0013, "step": 2480 }, { "epoch": 4.72, "grad_norm": 0.03255557485678407, "learning_rate": 1.678813473952745e-06, "loss": 0.0009, "step": 2481 }, { "epoch": 4.72, "grad_norm": 0.02417901278088356, "learning_rate": 1.6564168933759006e-06, "loss": 0.0007, "step": 2482 }, { "epoch": 4.72, "grad_norm": 0.02618491965146442, "learning_rate": 1.6341694633290495e-06, "loss": 0.0005, "step": 2483 }, { "epoch": 4.72, "grad_norm": 0.02544919814051457, "learning_rate": 1.6120712175532128e-06, "loss": 0.0006, "step": 2484 }, { "epoch": 4.72, "grad_norm": 0.208572062337169, "learning_rate": 1.5901221895631814e-06, "loss": 0.0009, "step": 2485 }, { "epoch": 4.73, "grad_norm": 0.01852086278238246, "learning_rate": 1.5683224126474604e-06, "loss": 0.0005, "step": 2486 }, { "epoch": 4.73, "grad_norm": 0.04473212569735427, "learning_rate": 1.546671919868181e-06, "loss": 0.0008, "step": 2487 }, { "epoch": 4.73, "grad_norm": 0.025311397316563917, "learning_rate": 1.5251707440610552e-06, "loss": 0.0007, "step": 2488 }, { "epoch": 4.73, "grad_norm": 0.053052338737681524, "learning_rate": 1.5038189178353424e-06, "loss": 0.0006, "step": 2489 }, { "epoch": 4.73, "grad_norm": 0.1522976847123016, "learning_rate": 1.4826164735738057e-06, "loss": 0.003, "step": 2490 }, { "epoch": 4.74, "grad_norm": 0.19110512935690702, "learning_rate": 1.4615634434326453e-06, "loss": 0.0011, "step": 2491 }, { "epoch": 4.74, "grad_norm": 0.02623297132584474, "learning_rate": 1.4406598593414311e-06, "loss": 0.0005, "step": 2492 }, { "epoch": 4.74, "grad_norm": 0.12177539090259627, "learning_rate": 1.4199057530031367e-06, "loss": 0.0017, "step": 2493 }, { "epoch": 4.74, "grad_norm": 0.008512864553601488, "learning_rate": 1.399301155893995e-06, "loss": 0.0003, "step": 2494 }, { "epoch": 4.74, "grad_norm": 0.017853489419719298, "learning_rate": 1.3788460992634644e-06, "loss": 0.0006, "step": 2495 }, { "epoch": 4.75, "grad_norm": 0.027192185390911266, "learning_rate": 1.3585406141342517e-06, "loss": 0.0006, "step": 2496 }, { "epoch": 4.75, "grad_norm": 0.0432360416513147, "learning_rate": 1.3383847313022224e-06, "loss": 0.0009, "step": 2497 }, { "epoch": 4.75, "grad_norm": 0.00862694006090508, "learning_rate": 1.318378481336302e-06, "loss": 0.0003, "step": 2498 }, { "epoch": 4.75, "grad_norm": 0.07675516116769943, "learning_rate": 1.2985218945785304e-06, "loss": 0.0011, "step": 2499 }, { "epoch": 4.75, "grad_norm": 0.04531809055721268, "learning_rate": 1.2788150011439294e-06, "loss": 0.0017, "step": 2500 }, { "epoch": 4.75, "eval_blimp_filtered_avg": 0.7104477611940299, "eval_blimp_filtered_std": 0.005018535167114561, "step": 2500 }, { "epoch": 4.75, "eval_blimp_supplement_avg": 0.8125, "eval_blimp_supplement_std": 0.017057808126567407, "step": 2500 }, { "epoch": 4.75, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 2500 }, { "epoch": 4.75, "eval_winoground_filtered_avg": 0.64, "eval_winoground_filtered_std": 0.048241815132442176, "step": 2500 }, { "epoch": 4.75, "grad_norm": 0.012896031353797119, "learning_rate": 1.2592578309205017e-06, "loss": 0.0003, "step": 2501 }, { "epoch": 4.76, "grad_norm": 0.02487490155714827, "learning_rate": 1.2398504135691991e-06, "loss": 0.0005, "step": 2502 }, { "epoch": 4.76, "grad_norm": 0.2533765922243423, "learning_rate": 1.2205927785238213e-06, "loss": 0.0011, "step": 2503 }, { "epoch": 4.76, "grad_norm": 0.022775178835564975, "learning_rate": 1.2014849549910056e-06, "loss": 0.0006, "step": 2504 }, { "epoch": 4.76, "grad_norm": 0.01480860747918389, "learning_rate": 1.1825269719502041e-06, "loss": 0.0004, "step": 2505 }, { "epoch": 4.76, "grad_norm": 0.01413390971547087, "learning_rate": 1.1637188581536175e-06, "loss": 0.0004, "step": 2506 }, { "epoch": 4.77, "grad_norm": 0.2437562093510463, "learning_rate": 1.1450606421261167e-06, "loss": 0.0043, "step": 2507 }, { "epoch": 4.77, "grad_norm": 0.01900279921987444, "learning_rate": 1.1265523521652666e-06, "loss": 0.0006, "step": 2508 }, { "epoch": 4.77, "grad_norm": 0.005686360309626745, "learning_rate": 1.1081940163412352e-06, "loss": 0.0002, "step": 2509 }, { "epoch": 4.77, "grad_norm": 0.03421447328367397, "learning_rate": 1.089985662496773e-06, "loss": 0.0005, "step": 2510 }, { "epoch": 4.77, "grad_norm": 0.028654991496887553, "learning_rate": 1.0719273182471569e-06, "loss": 0.0007, "step": 2511 }, { "epoch": 4.78, "grad_norm": 0.04238713560117334, "learning_rate": 1.0540190109801562e-06, "loss": 0.0009, "step": 2512 }, { "epoch": 4.78, "grad_norm": 0.01935491201188404, "learning_rate": 1.036260767856001e-06, "loss": 0.0005, "step": 2513 }, { "epoch": 4.78, "grad_norm": 0.02044561341157439, "learning_rate": 1.0186526158073251e-06, "loss": 0.0005, "step": 2514 }, { "epoch": 4.78, "grad_norm": 0.009677637139046047, "learning_rate": 1.0011945815391e-06, "loss": 0.0003, "step": 2515 }, { "epoch": 4.78, "grad_norm": 0.03602700511118458, "learning_rate": 9.838866915286903e-07, "loss": 0.0008, "step": 2516 }, { "epoch": 4.79, "grad_norm": 0.13757265991303527, "learning_rate": 9.667289720256766e-07, "loss": 0.0022, "step": 2517 }, { "epoch": 4.79, "grad_norm": 0.02095171966698804, "learning_rate": 9.497214490519213e-07, "loss": 0.0005, "step": 2518 }, { "epoch": 4.79, "grad_norm": 0.016274277131482676, "learning_rate": 9.328641484015244e-07, "loss": 0.0005, "step": 2519 }, { "epoch": 4.79, "grad_norm": 0.057047314259145505, "learning_rate": 9.161570956406907e-07, "loss": 0.0019, "step": 2520 }, { "epoch": 4.79, "grad_norm": 0.009533827061984626, "learning_rate": 8.99600316107796e-07, "loss": 0.0003, "step": 2521 }, { "epoch": 4.79, "grad_norm": 0.011249772288376057, "learning_rate": 8.831938349132984e-07, "loss": 0.0004, "step": 2522 }, { "epoch": 4.8, "grad_norm": 0.09522764242362229, "learning_rate": 8.669376769397053e-07, "loss": 0.0016, "step": 2523 }, { "epoch": 4.8, "grad_norm": 0.022805003259363996, "learning_rate": 8.508318668415505e-07, "loss": 0.0006, "step": 2524 }, { "epoch": 4.8, "grad_norm": 0.06221227664076278, "learning_rate": 8.348764290453392e-07, "loss": 0.0011, "step": 2525 }, { "epoch": 4.8, "grad_norm": 0.17614049610562432, "learning_rate": 8.19071387749526e-07, "loss": 0.0033, "step": 2526 }, { "epoch": 4.8, "grad_norm": 0.029785967836139914, "learning_rate": 8.034167669244475e-07, "loss": 0.0006, "step": 2527 }, { "epoch": 4.81, "grad_norm": 0.05273826790690243, "learning_rate": 7.87912590312323e-07, "loss": 0.0014, "step": 2528 }, { "epoch": 4.81, "grad_norm": 0.04573201979236683, "learning_rate": 7.725588814272211e-07, "loss": 0.0007, "step": 2529 }, { "epoch": 4.81, "grad_norm": 0.06074119586780906, "learning_rate": 7.573556635549928e-07, "loss": 0.0023, "step": 2530 }, { "epoch": 4.81, "grad_norm": 0.034542244896624115, "learning_rate": 7.423029597532161e-07, "loss": 0.0009, "step": 2531 }, { "epoch": 4.81, "grad_norm": 0.035012883572341016, "learning_rate": 7.274007928512627e-07, "loss": 0.0005, "step": 2532 }, { "epoch": 4.82, "grad_norm": 0.03649191340656196, "learning_rate": 7.126491854501427e-07, "loss": 0.0005, "step": 2533 }, { "epoch": 4.82, "grad_norm": 0.05142521090703685, "learning_rate": 6.980481599225486e-07, "loss": 0.0012, "step": 2534 }, { "epoch": 4.82, "grad_norm": 0.0959010624445109, "learning_rate": 6.83597738412789e-07, "loss": 0.002, "step": 2535 }, { "epoch": 4.82, "grad_norm": 0.3086639652434374, "learning_rate": 6.692979428367663e-07, "loss": 0.0015, "step": 2536 }, { "epoch": 4.82, "grad_norm": 0.01377809774562358, "learning_rate": 6.551487948819212e-07, "loss": 0.0004, "step": 2537 }, { "epoch": 4.83, "grad_norm": 0.22839552019604156, "learning_rate": 6.411503160072441e-07, "loss": 0.0015, "step": 2538 }, { "epoch": 4.83, "grad_norm": 0.017270039801469118, "learning_rate": 6.273025274431965e-07, "loss": 0.0005, "step": 2539 }, { "epoch": 4.83, "grad_norm": 0.03461038500636875, "learning_rate": 6.136054501917232e-07, "loss": 0.0013, "step": 2540 }, { "epoch": 4.83, "grad_norm": 0.05795936832685819, "learning_rate": 6.000591050261739e-07, "loss": 0.0012, "step": 2541 }, { "epoch": 4.83, "grad_norm": 0.06716059553702539, "learning_rate": 5.866635124913034e-07, "loss": 0.0014, "step": 2542 }, { "epoch": 4.83, "grad_norm": 0.06677219349453664, "learning_rate": 5.734186929032159e-07, "loss": 0.004, "step": 2543 }, { "epoch": 4.84, "grad_norm": 0.043576476507538806, "learning_rate": 5.603246663493766e-07, "loss": 0.0013, "step": 2544 }, { "epoch": 4.84, "grad_norm": 0.034497397282779334, "learning_rate": 5.473814526885335e-07, "loss": 0.0007, "step": 2545 }, { "epoch": 4.84, "grad_norm": 0.019384261558981583, "learning_rate": 5.345890715507173e-07, "loss": 0.0005, "step": 2546 }, { "epoch": 4.84, "grad_norm": 0.017937902675520534, "learning_rate": 5.219475423371867e-07, "loss": 0.0004, "step": 2547 }, { "epoch": 4.84, "grad_norm": 0.02884126916452058, "learning_rate": 5.094568842204383e-07, "loss": 0.0008, "step": 2548 }, { "epoch": 4.85, "grad_norm": 0.01818291145004686, "learning_rate": 4.971171161441302e-07, "loss": 0.0005, "step": 2549 }, { "epoch": 4.85, "grad_norm": 0.05281011812989244, "learning_rate": 4.849282568231028e-07, "loss": 0.0008, "step": 2550 }, { "epoch": 4.85, "grad_norm": 0.026516639879742555, "learning_rate": 4.7289032474329143e-07, "loss": 0.0008, "step": 2551 }, { "epoch": 4.85, "grad_norm": 0.010652355946898293, "learning_rate": 4.610033381617695e-07, "loss": 0.0003, "step": 2552 }, { "epoch": 4.85, "grad_norm": 0.015843924282158695, "learning_rate": 4.4926731510663843e-07, "loss": 0.0005, "step": 2553 }, { "epoch": 4.86, "grad_norm": 0.02448330584386274, "learning_rate": 4.3768227337707135e-07, "loss": 0.0007, "step": 2554 }, { "epoch": 4.86, "grad_norm": 0.0730136712300552, "learning_rate": 4.2624823054328024e-07, "loss": 0.0019, "step": 2555 }, { "epoch": 4.86, "grad_norm": 0.05238270086901457, "learning_rate": 4.149652039464047e-07, "loss": 0.0008, "step": 2556 }, { "epoch": 4.86, "grad_norm": 0.11707348502219697, "learning_rate": 4.038332106986231e-07, "loss": 0.0024, "step": 2557 }, { "epoch": 4.86, "grad_norm": 0.05400309393358307, "learning_rate": 3.92852267682986e-07, "loss": 0.0044, "step": 2558 }, { "epoch": 4.87, "grad_norm": 0.024601598366834024, "learning_rate": 3.820223915535048e-07, "loss": 0.0006, "step": 2559 }, { "epoch": 4.87, "grad_norm": 0.04550258508970989, "learning_rate": 3.713435987350522e-07, "loss": 0.0009, "step": 2560 }, { "epoch": 4.87, "grad_norm": 0.1104642450157822, "learning_rate": 3.608159054233951e-07, "loss": 0.0014, "step": 2561 }, { "epoch": 4.87, "grad_norm": 0.026591476694135592, "learning_rate": 3.504393275850948e-07, "loss": 0.0008, "step": 2562 }, { "epoch": 4.87, "grad_norm": 0.033276706923240654, "learning_rate": 3.402138809575517e-07, "loss": 0.0004, "step": 2563 }, { "epoch": 4.87, "grad_norm": 0.12286977799714043, "learning_rate": 3.301395810489494e-07, "loss": 0.0015, "step": 2564 }, { "epoch": 4.88, "grad_norm": 0.09736148711219433, "learning_rate": 3.202164431382659e-07, "loss": 0.0014, "step": 2565 }, { "epoch": 4.88, "grad_norm": 0.01932105521773662, "learning_rate": 3.10444482275174e-07, "loss": 0.0005, "step": 2566 }, { "epoch": 4.88, "grad_norm": 0.02021861125693131, "learning_rate": 3.0082371328010727e-07, "loss": 0.0006, "step": 2567 }, { "epoch": 4.88, "grad_norm": 0.016807818598983913, "learning_rate": 2.91354150744183e-07, "loss": 0.0006, "step": 2568 }, { "epoch": 4.88, "grad_norm": 0.013321983780121267, "learning_rate": 2.820358090291908e-07, "loss": 0.0004, "step": 2569 }, { "epoch": 4.89, "grad_norm": 0.17218061188535344, "learning_rate": 2.7286870226758135e-07, "loss": 0.0024, "step": 2570 }, { "epoch": 4.89, "grad_norm": 0.05702000909493229, "learning_rate": 2.638528443624333e-07, "loss": 0.0008, "step": 2571 }, { "epoch": 4.89, "grad_norm": 0.03831504374990623, "learning_rate": 2.5498824898744224e-07, "loss": 0.0008, "step": 2572 }, { "epoch": 4.89, "grad_norm": 0.019372196223314663, "learning_rate": 2.4627492958688714e-07, "loss": 0.001, "step": 2573 }, { "epoch": 4.89, "grad_norm": 0.020318353419216346, "learning_rate": 2.3771289937563056e-07, "loss": 0.0005, "step": 2574 }, { "epoch": 4.9, "grad_norm": 0.02631837161540417, "learning_rate": 2.2930217133907418e-07, "loss": 0.0008, "step": 2575 }, { "epoch": 4.9, "grad_norm": 0.031847107405479844, "learning_rate": 2.2104275823315868e-07, "loss": 0.0008, "step": 2576 }, { "epoch": 4.9, "grad_norm": 0.07631466620386895, "learning_rate": 2.1293467258433065e-07, "loss": 0.0008, "step": 2577 }, { "epoch": 4.9, "grad_norm": 0.018665840470923428, "learning_rate": 2.0497792668953132e-07, "loss": 0.0006, "step": 2578 }, { "epoch": 4.9, "grad_norm": 0.04635465281578875, "learning_rate": 1.9717253261617442e-07, "loss": 0.001, "step": 2579 }, { "epoch": 4.9, "grad_norm": 0.09804259868798113, "learning_rate": 1.8951850220213508e-07, "loss": 0.0017, "step": 2580 }, { "epoch": 4.91, "grad_norm": 0.029726375875381777, "learning_rate": 1.820158470557165e-07, "loss": 0.0007, "step": 2581 }, { "epoch": 4.91, "grad_norm": 0.008491336304010949, "learning_rate": 1.7466457855565e-07, "loss": 0.0003, "step": 2582 }, { "epoch": 4.91, "grad_norm": 0.0396147689917411, "learning_rate": 1.674647078510727e-07, "loss": 0.0021, "step": 2583 }, { "epoch": 4.91, "grad_norm": 0.04814982890800119, "learning_rate": 1.604162458614944e-07, "loss": 0.0013, "step": 2584 }, { "epoch": 4.91, "grad_norm": 0.02040241045019675, "learning_rate": 1.5351920327680847e-07, "loss": 0.0007, "step": 2585 }, { "epoch": 4.92, "grad_norm": 0.04774447392669528, "learning_rate": 1.467735905572476e-07, "loss": 0.0012, "step": 2586 }, { "epoch": 4.92, "grad_norm": 0.08776275681611595, "learning_rate": 1.4017941793340593e-07, "loss": 0.0013, "step": 2587 }, { "epoch": 4.92, "grad_norm": 0.012410773101058281, "learning_rate": 1.337366954061725e-07, "loss": 0.0004, "step": 2588 }, { "epoch": 4.92, "grad_norm": 0.01439838530621655, "learning_rate": 1.2744543274675334e-07, "loss": 0.0005, "step": 2589 }, { "epoch": 4.92, "grad_norm": 0.025898075882835738, "learning_rate": 1.213056394966494e-07, "loss": 0.0007, "step": 2590 }, { "epoch": 4.93, "grad_norm": 0.05099228230340174, "learning_rate": 1.1531732496763425e-07, "loss": 0.0015, "step": 2591 }, { "epoch": 4.93, "grad_norm": 0.04196502644261051, "learning_rate": 1.09480498241743e-07, "loss": 0.0004, "step": 2592 }, { "epoch": 4.93, "grad_norm": 0.010980852204008412, "learning_rate": 1.0379516817128343e-07, "loss": 0.0004, "step": 2593 }, { "epoch": 4.93, "grad_norm": 0.02267258999487461, "learning_rate": 9.826134337875826e-08, "loss": 0.0007, "step": 2594 }, { "epoch": 4.93, "grad_norm": 0.020718336748258355, "learning_rate": 9.287903225693173e-08, "loss": 0.0007, "step": 2595 }, { "epoch": 4.94, "grad_norm": 0.2281688247832748, "learning_rate": 8.764824296875196e-08, "loss": 0.004, "step": 2596 }, { "epoch": 4.94, "grad_norm": 0.03412845296169451, "learning_rate": 8.256898344737307e-08, "loss": 0.0007, "step": 2597 }, { "epoch": 4.94, "grad_norm": 0.042437379654414055, "learning_rate": 7.764126139615524e-08, "loss": 0.0011, "step": 2598 }, { "epoch": 4.94, "grad_norm": 0.01509671421837453, "learning_rate": 7.286508428858696e-08, "loss": 0.0005, "step": 2599 }, { "epoch": 4.94, "grad_norm": 0.06764501519367483, "learning_rate": 6.824045936836276e-08, "loss": 0.0011, "step": 2600 }, { "epoch": 4.94, "eval_blimp_filtered_avg": 0.7101492537313433, "eval_blimp_filtered_std": 0.005018759347975435, "step": 2600 }, { "epoch": 4.94, "eval_blimp_supplement_avg": 0.8125, "eval_blimp_supplement_std": 0.016980559879776237, "step": 2600 }, { "epoch": 4.94, "eval_vqa_filtered_avg": 0.56, "eval_vqa_filtered_std": 0.049888765156985884, "step": 2600 }, { "epoch": 4.94, "eval_winoground_filtered_avg": 0.65, "eval_winoground_filtered_std": 0.04793724854411019, "step": 2600 }, { "epoch": 4.94, "grad_norm": 0.013018267259711499, "learning_rate": 6.376739364932772e-08, "loss": 0.0004, "step": 2601 }, { "epoch": 4.95, "grad_norm": 0.0334205390508429, "learning_rate": 5.944589391542188e-08, "loss": 0.0006, "step": 2602 }, { "epoch": 4.95, "grad_norm": 0.05987211661355866, "learning_rate": 5.527596672078028e-08, "loss": 0.0008, "step": 2603 }, { "epoch": 4.95, "grad_norm": 0.120353594428597, "learning_rate": 5.125761838959964e-08, "loss": 0.0014, "step": 2604 }, { "epoch": 4.95, "grad_norm": 0.09992963261873887, "learning_rate": 4.73908550162272e-08, "loss": 0.0013, "step": 2605 }, { "epoch": 4.95, "grad_norm": 0.038537193742353704, "learning_rate": 4.367568246510523e-08, "loss": 0.0013, "step": 2606 }, { "epoch": 4.96, "grad_norm": 0.02322641827749232, "learning_rate": 4.01121063707599e-08, "loss": 0.0004, "step": 2607 }, { "epoch": 4.96, "grad_norm": 0.1270597235887708, "learning_rate": 3.6700132137812426e-08, "loss": 0.0023, "step": 2608 }, { "epoch": 4.96, "grad_norm": 0.06376014895578985, "learning_rate": 3.3439764940934594e-08, "loss": 0.0036, "step": 2609 }, { "epoch": 4.96, "grad_norm": 0.03686280283896642, "learning_rate": 3.033100972491543e-08, "loss": 0.001, "step": 2610 }, { "epoch": 4.96, "grad_norm": 0.007476811282146952, "learning_rate": 2.7373871204561252e-08, "loss": 0.0003, "step": 2611 }, { "epoch": 4.97, "grad_norm": 0.00972296150248139, "learning_rate": 2.4568353864751204e-08, "loss": 0.0003, "step": 2612 }, { "epoch": 4.97, "grad_norm": 0.14542682093952902, "learning_rate": 2.191446196040392e-08, "loss": 0.0035, "step": 2613 }, { "epoch": 4.97, "grad_norm": 0.06290806699766371, "learning_rate": 1.941219951648865e-08, "loss": 0.0009, "step": 2614 }, { "epoch": 4.97, "grad_norm": 0.0131718784232126, "learning_rate": 1.706157032800304e-08, "loss": 0.0005, "step": 2615 }, { "epoch": 4.97, "grad_norm": 0.011382439596556946, "learning_rate": 1.4862577959973145e-08, "loss": 0.0004, "step": 2616 }, { "epoch": 4.98, "grad_norm": 0.02885523090718644, "learning_rate": 1.281522574745342e-08, "loss": 0.0005, "step": 2617 }, { "epoch": 4.98, "grad_norm": 0.023207892314343326, "learning_rate": 1.0919516795515617e-08, "loss": 0.0007, "step": 2618 }, { "epoch": 4.98, "grad_norm": 0.127179723689329, "learning_rate": 9.17545397922659e-09, "loss": 0.001, "step": 2619 }, { "epoch": 4.98, "grad_norm": 0.024531112676178513, "learning_rate": 7.583039943703795e-09, "loss": 0.0006, "step": 2620 }, { "epoch": 4.98, "grad_norm": 0.08925343406175566, "learning_rate": 6.142277104026484e-09, "loss": 0.0018, "step": 2621 }, { "epoch": 4.98, "grad_norm": 0.02194610487270605, "learning_rate": 4.853167645302303e-09, "loss": 0.0005, "step": 2622 }, { "epoch": 4.99, "grad_norm": 0.015935450945960765, "learning_rate": 3.715713522622899e-09, "loss": 0.0004, "step": 2623 }, { "epoch": 4.99, "grad_norm": 0.018652762435898033, "learning_rate": 2.729916461097215e-09, "loss": 0.0005, "step": 2624 }, { "epoch": 4.99, "grad_norm": 0.025814596144350454, "learning_rate": 1.8957779557959855e-09, "loss": 0.0007, "step": 2625 }, { "epoch": 4.99, "grad_norm": 0.013141250703923461, "learning_rate": 1.2132992717961422e-09, "loss": 0.0005, "step": 2626 }, { "epoch": 4.99, "grad_norm": 0.11212192006038513, "learning_rate": 6.824814441808159e-10, "loss": 0.0023, "step": 2627 }, { "epoch": 5.0, "grad_norm": 0.05269515202357764, "learning_rate": 3.033252779838236e-10, "loss": 0.0013, "step": 2628 }, { "epoch": 5.0, "grad_norm": 0.045446613278717574, "learning_rate": 7.583134824518112e-11, "loss": 0.0012, "step": 2629 }, { "epoch": 5.0, "grad_norm": 0.006239283584941159, "learning_rate": 0.0, "loss": 0.0002, "step": 2630 }, { "epoch": 5.0, "step": 2630, "total_flos": 59706594754560.0, "train_loss": 0.21589338771112268, "train_runtime": 24742.8256, "train_samples_per_second": 13.596, "train_steps_per_second": 0.106 } ], "logging_steps": 1.0, "max_steps": 2630, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 59706594754560.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }