{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 100, "global_step": 2630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.9178037643432617, "learning_rate": 7.59493670886076e-07, "loss": 0.1337, "step": 1 }, { "epoch": 0.0, "grad_norm": 5.145163059234619, "learning_rate": 1.518987341772152e-06, "loss": 0.1635, "step": 2 }, { "epoch": 0.01, "grad_norm": 5.679388999938965, "learning_rate": 2.278481012658228e-06, "loss": 0.1945, "step": 3 }, { "epoch": 0.01, "grad_norm": 5.537349700927734, "learning_rate": 3.037974683544304e-06, "loss": 0.1835, "step": 4 }, { "epoch": 0.01, "grad_norm": 5.0309576988220215, "learning_rate": 3.7974683544303802e-06, "loss": 0.1519, "step": 5 }, { "epoch": 0.01, "grad_norm": 4.202467441558838, "learning_rate": 4.556962025316456e-06, "loss": 0.1758, "step": 6 }, { "epoch": 0.01, "grad_norm": 5.201393127441406, "learning_rate": 5.3164556962025316e-06, "loss": 0.175, "step": 7 }, { "epoch": 0.02, "grad_norm": 5.629143238067627, "learning_rate": 6.075949367088608e-06, "loss": 0.121, "step": 8 }, { "epoch": 0.02, "grad_norm": 4.6880693435668945, "learning_rate": 6.835443037974683e-06, "loss": 0.1623, "step": 9 }, { "epoch": 0.02, "grad_norm": 5.9317450523376465, "learning_rate": 7.5949367088607605e-06, "loss": 0.1889, "step": 10 }, { "epoch": 0.02, "grad_norm": 7.021185874938965, "learning_rate": 8.354430379746835e-06, "loss": 0.1548, "step": 11 }, { "epoch": 0.02, "grad_norm": 6.778924465179443, "learning_rate": 9.113924050632912e-06, "loss": 0.1778, "step": 12 }, { "epoch": 0.02, "grad_norm": 8.034538269042969, "learning_rate": 9.873417721518988e-06, "loss": 0.1859, "step": 13 }, { "epoch": 0.03, "grad_norm": 7.433892726898193, "learning_rate": 1.0632911392405063e-05, "loss": 0.194, "step": 14 }, { "epoch": 0.03, "grad_norm": 8.679495811462402, "learning_rate": 1.139240506329114e-05, "loss": 0.3057, "step": 15 }, { "epoch": 0.03, "grad_norm": 6.622086524963379, "learning_rate": 1.2151898734177216e-05, "loss": 0.1531, "step": 16 }, { "epoch": 0.03, "grad_norm": 9.60741138458252, "learning_rate": 1.2911392405063291e-05, "loss": 0.2835, "step": 17 }, { "epoch": 0.03, "grad_norm": 10.686683654785156, "learning_rate": 1.3670886075949367e-05, "loss": 0.3471, "step": 18 }, { "epoch": 0.04, "grad_norm": 10.399796485900879, "learning_rate": 1.4430379746835444e-05, "loss": 0.339, "step": 19 }, { "epoch": 0.04, "grad_norm": 11.086629867553711, "learning_rate": 1.5189873417721521e-05, "loss": 0.3284, "step": 20 }, { "epoch": 0.04, "grad_norm": 10.86033821105957, "learning_rate": 1.5949367088607595e-05, "loss": 0.3484, "step": 21 }, { "epoch": 0.04, "grad_norm": 10.078252792358398, "learning_rate": 1.670886075949367e-05, "loss": 0.4086, "step": 22 }, { "epoch": 0.04, "grad_norm": 10.870636940002441, "learning_rate": 1.7468354430379746e-05, "loss": 0.4558, "step": 23 }, { "epoch": 0.05, "grad_norm": 10.51819133758545, "learning_rate": 1.8227848101265824e-05, "loss": 0.4707, "step": 24 }, { "epoch": 0.05, "grad_norm": 9.369209289550781, "learning_rate": 1.89873417721519e-05, "loss": 0.5728, "step": 25 }, { "epoch": 0.05, "grad_norm": 9.470343589782715, "learning_rate": 1.9746835443037975e-05, "loss": 0.5723, "step": 26 }, { "epoch": 0.05, "grad_norm": 10.251852989196777, "learning_rate": 2.050632911392405e-05, "loss": 0.6438, "step": 27 }, { "epoch": 0.05, "grad_norm": 9.325061798095703, "learning_rate": 2.1265822784810126e-05, "loss": 0.6188, "step": 28 }, { "epoch": 0.06, "grad_norm": 9.562792778015137, "learning_rate": 2.2025316455696205e-05, "loss": 0.6782, "step": 29 }, { "epoch": 0.06, "grad_norm": 8.499227523803711, "learning_rate": 2.278481012658228e-05, "loss": 0.589, "step": 30 }, { "epoch": 0.06, "grad_norm": 9.434649467468262, "learning_rate": 2.3544303797468353e-05, "loss": 0.681, "step": 31 }, { "epoch": 0.06, "grad_norm": 9.950018882751465, "learning_rate": 2.430379746835443e-05, "loss": 0.7878, "step": 32 }, { "epoch": 0.06, "grad_norm": 9.224929809570312, "learning_rate": 2.5063291139240507e-05, "loss": 0.7453, "step": 33 }, { "epoch": 0.06, "grad_norm": 8.095212936401367, "learning_rate": 2.5822784810126582e-05, "loss": 0.7134, "step": 34 }, { "epoch": 0.07, "grad_norm": 8.467231750488281, "learning_rate": 2.6582278481012658e-05, "loss": 0.7862, "step": 35 }, { "epoch": 0.07, "grad_norm": 8.939255714416504, "learning_rate": 2.7341772151898733e-05, "loss": 0.8078, "step": 36 }, { "epoch": 0.07, "grad_norm": 8.694756507873535, "learning_rate": 2.8101265822784812e-05, "loss": 0.8551, "step": 37 }, { "epoch": 0.07, "grad_norm": 8.83495807647705, "learning_rate": 2.8860759493670888e-05, "loss": 0.8021, "step": 38 }, { "epoch": 0.07, "grad_norm": 7.58929443359375, "learning_rate": 2.9620253164556963e-05, "loss": 0.7758, "step": 39 }, { "epoch": 0.08, "grad_norm": 7.393100738525391, "learning_rate": 3.0379746835443042e-05, "loss": 0.813, "step": 40 }, { "epoch": 0.08, "grad_norm": 7.671378135681152, "learning_rate": 3.1139240506329114e-05, "loss": 0.7975, "step": 41 }, { "epoch": 0.08, "grad_norm": 7.613136291503906, "learning_rate": 3.189873417721519e-05, "loss": 0.8874, "step": 42 }, { "epoch": 0.08, "grad_norm": 7.509779453277588, "learning_rate": 3.2658227848101265e-05, "loss": 0.7297, "step": 43 }, { "epoch": 0.08, "grad_norm": 7.137118816375732, "learning_rate": 3.341772151898734e-05, "loss": 0.7928, "step": 44 }, { "epoch": 0.09, "grad_norm": 7.485304832458496, "learning_rate": 3.417721518987342e-05, "loss": 0.7672, "step": 45 }, { "epoch": 0.09, "grad_norm": 7.349469184875488, "learning_rate": 3.493670886075949e-05, "loss": 0.673, "step": 46 }, { "epoch": 0.09, "grad_norm": 7.599184989929199, "learning_rate": 3.569620253164557e-05, "loss": 0.791, "step": 47 }, { "epoch": 0.09, "grad_norm": 7.672878265380859, "learning_rate": 3.645569620253165e-05, "loss": 0.8976, "step": 48 }, { "epoch": 0.09, "grad_norm": 7.521357536315918, "learning_rate": 3.721518987341772e-05, "loss": 0.7654, "step": 49 }, { "epoch": 0.1, "grad_norm": 6.770138740539551, "learning_rate": 3.79746835443038e-05, "loss": 0.8296, "step": 50 }, { "epoch": 0.1, "grad_norm": 7.303180694580078, "learning_rate": 3.8734177215189875e-05, "loss": 0.912, "step": 51 }, { "epoch": 0.1, "grad_norm": 6.6409382820129395, "learning_rate": 3.949367088607595e-05, "loss": 0.8534, "step": 52 }, { "epoch": 0.1, "grad_norm": 6.812019348144531, "learning_rate": 4.0253164556962026e-05, "loss": 0.8732, "step": 53 }, { "epoch": 0.1, "grad_norm": 6.574493408203125, "learning_rate": 4.10126582278481e-05, "loss": 0.9303, "step": 54 }, { "epoch": 0.1, "grad_norm": 6.0937275886535645, "learning_rate": 4.1772151898734184e-05, "loss": 0.6772, "step": 55 }, { "epoch": 0.11, "grad_norm": 6.544432640075684, "learning_rate": 4.253164556962025e-05, "loss": 0.8691, "step": 56 }, { "epoch": 0.11, "grad_norm": 6.647430419921875, "learning_rate": 4.329113924050633e-05, "loss": 0.8648, "step": 57 }, { "epoch": 0.11, "grad_norm": 7.131146430969238, "learning_rate": 4.405063291139241e-05, "loss": 0.9166, "step": 58 }, { "epoch": 0.11, "grad_norm": 6.380138874053955, "learning_rate": 4.481012658227848e-05, "loss": 0.6997, "step": 59 }, { "epoch": 0.11, "grad_norm": 6.713687419891357, "learning_rate": 4.556962025316456e-05, "loss": 0.8989, "step": 60 }, { "epoch": 0.12, "grad_norm": 5.137933254241943, "learning_rate": 4.6329113924050637e-05, "loss": 0.7503, "step": 61 }, { "epoch": 0.12, "grad_norm": 5.792801856994629, "learning_rate": 4.7088607594936705e-05, "loss": 0.9457, "step": 62 }, { "epoch": 0.12, "grad_norm": 6.09445858001709, "learning_rate": 4.784810126582279e-05, "loss": 0.7407, "step": 63 }, { "epoch": 0.12, "grad_norm": 6.43452262878418, "learning_rate": 4.860759493670886e-05, "loss": 0.935, "step": 64 }, { "epoch": 0.12, "grad_norm": 5.942362308502197, "learning_rate": 4.936708860759494e-05, "loss": 0.7772, "step": 65 }, { "epoch": 0.13, "grad_norm": 5.4012298583984375, "learning_rate": 5.0126582278481014e-05, "loss": 0.7525, "step": 66 }, { "epoch": 0.13, "grad_norm": 6.424576759338379, "learning_rate": 5.088607594936709e-05, "loss": 0.9226, "step": 67 }, { "epoch": 0.13, "grad_norm": 5.242638111114502, "learning_rate": 5.1645569620253165e-05, "loss": 0.6777, "step": 68 }, { "epoch": 0.13, "grad_norm": 5.477936744689941, "learning_rate": 5.240506329113924e-05, "loss": 0.8089, "step": 69 }, { "epoch": 0.13, "grad_norm": 5.14202356338501, "learning_rate": 5.3164556962025316e-05, "loss": 0.7228, "step": 70 }, { "epoch": 0.13, "grad_norm": 5.5987653732299805, "learning_rate": 5.39240506329114e-05, "loss": 0.7745, "step": 71 }, { "epoch": 0.14, "grad_norm": 5.638814926147461, "learning_rate": 5.4683544303797467e-05, "loss": 0.8268, "step": 72 }, { "epoch": 0.14, "grad_norm": 5.210118770599365, "learning_rate": 5.544303797468355e-05, "loss": 0.7129, "step": 73 }, { "epoch": 0.14, "grad_norm": 5.078627586364746, "learning_rate": 5.6202531645569624e-05, "loss": 0.8765, "step": 74 }, { "epoch": 0.14, "grad_norm": 5.073202610015869, "learning_rate": 5.696202531645569e-05, "loss": 0.8022, "step": 75 }, { "epoch": 0.14, "grad_norm": 5.502076625823975, "learning_rate": 5.7721518987341775e-05, "loss": 0.8125, "step": 76 }, { "epoch": 0.15, "grad_norm": 4.979417324066162, "learning_rate": 5.848101265822785e-05, "loss": 0.8057, "step": 77 }, { "epoch": 0.15, "grad_norm": 15.108443260192871, "learning_rate": 5.9240506329113926e-05, "loss": 0.7012, "step": 78 }, { "epoch": 0.15, "grad_norm": 5.183372974395752, "learning_rate": 6e-05, "loss": 0.8966, "step": 79 }, { "epoch": 0.15, "grad_norm": 4.979413032531738, "learning_rate": 5.999997725059553e-05, "loss": 0.7637, "step": 80 }, { "epoch": 0.15, "grad_norm": 5.306042194366455, "learning_rate": 5.9999909002416605e-05, "loss": 0.9028, "step": 81 }, { "epoch": 0.16, "grad_norm": 4.906080722808838, "learning_rate": 5.999979525556675e-05, "loss": 0.7755, "step": 82 }, { "epoch": 0.16, "grad_norm": 5.158020496368408, "learning_rate": 5.999963601021846e-05, "loss": 0.9171, "step": 83 }, { "epoch": 0.16, "grad_norm": 4.6192307472229, "learning_rate": 5.999943126661326e-05, "loss": 0.7796, "step": 84 }, { "epoch": 0.16, "grad_norm": 5.2372612953186035, "learning_rate": 5.9999181025061666e-05, "loss": 0.933, "step": 85 }, { "epoch": 0.16, "grad_norm": 5.088154315948486, "learning_rate": 5.999888528594321e-05, "loss": 0.7788, "step": 86 }, { "epoch": 0.17, "grad_norm": 4.975386142730713, "learning_rate": 5.9998544049706406e-05, "loss": 0.856, "step": 87 }, { "epoch": 0.17, "grad_norm": 4.655994415283203, "learning_rate": 5.999815731686879e-05, "loss": 0.7073, "step": 88 }, { "epoch": 0.17, "grad_norm": 5.228769302368164, "learning_rate": 5.999772508801689e-05, "loss": 0.7698, "step": 89 }, { "epoch": 0.17, "grad_norm": 4.9669389724731445, "learning_rate": 5.999724736380623e-05, "loss": 0.8543, "step": 90 }, { "epoch": 0.17, "grad_norm": 4.38006591796875, "learning_rate": 5.999672414496134e-05, "loss": 0.6732, "step": 91 }, { "epoch": 0.17, "grad_norm": 29.22960662841797, "learning_rate": 5.999615543227577e-05, "loss": 0.7667, "step": 92 }, { "epoch": 0.18, "grad_norm": 5.180074214935303, "learning_rate": 5.999554122661201e-05, "loss": 0.6817, "step": 93 }, { "epoch": 0.18, "grad_norm": 5.02724027633667, "learning_rate": 5.9994881528901594e-05, "loss": 0.7842, "step": 94 }, { "epoch": 0.18, "grad_norm": 5.431690692901611, "learning_rate": 5.999417634014506e-05, "loss": 0.7464, "step": 95 }, { "epoch": 0.18, "grad_norm": 6.079984664916992, "learning_rate": 5.999342566141188e-05, "loss": 0.7342, "step": 96 }, { "epoch": 0.18, "grad_norm": 4.984606742858887, "learning_rate": 5.9992629493840576e-05, "loss": 0.8233, "step": 97 }, { "epoch": 0.19, "grad_norm": 5.192000389099121, "learning_rate": 5.999178783863863e-05, "loss": 0.7302, "step": 98 }, { "epoch": 0.19, "grad_norm": 5.316534519195557, "learning_rate": 5.9990900697082524e-05, "loss": 0.7964, "step": 99 }, { "epoch": 0.19, "grad_norm": 4.617951393127441, "learning_rate": 5.9989968070517715e-05, "loss": 0.6914, "step": 100 }, { "epoch": 0.19, "eval_blimp_filtered_avg": 0.7338805970149254, "eval_blimp_filtered_std": 0.0048823323060754684, "step": 100 }, { "epoch": 0.19, "eval_blimp_supplement_avg": 0.7995689655172413, "eval_blimp_supplement_std": 0.017413320190118042, "step": 100 }, { "epoch": 0.19, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.047937248544110196, "step": 100 }, { "epoch": 0.19, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 100 }, { "epoch": 0.19, "grad_norm": 3.9274191856384277, "learning_rate": 5.998898996035866e-05, "loss": 0.572, "step": 101 }, { "epoch": 0.19, "grad_norm": 4.757721900939941, "learning_rate": 5.9987966368088775e-05, "loss": 0.7208, "step": 102 }, { "epoch": 0.2, "grad_norm": 5.277878284454346, "learning_rate": 5.998689729526047e-05, "loss": 0.7923, "step": 103 }, { "epoch": 0.2, "grad_norm": 4.434948921203613, "learning_rate": 5.998578274349514e-05, "loss": 0.7017, "step": 104 }, { "epoch": 0.2, "grad_norm": 4.075104236602783, "learning_rate": 5.998462271448312e-05, "loss": 0.7192, "step": 105 }, { "epoch": 0.2, "grad_norm": 3.971759557723999, "learning_rate": 5.998341720998377e-05, "loss": 0.728, "step": 106 }, { "epoch": 0.2, "grad_norm": 3.6067185401916504, "learning_rate": 5.998216623182537e-05, "loss": 0.6644, "step": 107 }, { "epoch": 0.21, "grad_norm": 4.2261528968811035, "learning_rate": 5.99808697819052e-05, "loss": 0.8316, "step": 108 }, { "epoch": 0.21, "grad_norm": 4.665741443634033, "learning_rate": 5.997952786218949e-05, "loss": 0.6785, "step": 109 }, { "epoch": 0.21, "grad_norm": 4.038613796234131, "learning_rate": 5.9978140474713426e-05, "loss": 0.6905, "step": 110 }, { "epoch": 0.21, "grad_norm": 4.172923564910889, "learning_rate": 5.997670762158116e-05, "loss": 0.6963, "step": 111 }, { "epoch": 0.21, "grad_norm": 3.687509536743164, "learning_rate": 5.9975229304965784e-05, "loss": 0.5782, "step": 112 }, { "epoch": 0.21, "grad_norm": 3.9388458728790283, "learning_rate": 5.9973705527109374e-05, "loss": 0.6037, "step": 113 }, { "epoch": 0.22, "grad_norm": 3.780076026916504, "learning_rate": 5.997213629032293e-05, "loss": 0.5709, "step": 114 }, { "epoch": 0.22, "grad_norm": 4.839271068572998, "learning_rate": 5.997052159698638e-05, "loss": 0.5531, "step": 115 }, { "epoch": 0.22, "grad_norm": 5.035843372344971, "learning_rate": 5.996886144954862e-05, "loss": 0.8543, "step": 116 }, { "epoch": 0.22, "grad_norm": 3.7868270874023438, "learning_rate": 5.996715585052748e-05, "loss": 0.6511, "step": 117 }, { "epoch": 0.22, "grad_norm": 3.7187459468841553, "learning_rate": 5.996540480250971e-05, "loss": 0.5694, "step": 118 }, { "epoch": 0.23, "grad_norm": 3.5235018730163574, "learning_rate": 5.996360830815101e-05, "loss": 0.5512, "step": 119 }, { "epoch": 0.23, "grad_norm": 4.018977165222168, "learning_rate": 5.996176637017597e-05, "loss": 0.5428, "step": 120 }, { "epoch": 0.23, "grad_norm": 4.850902080535889, "learning_rate": 5.995987899137815e-05, "loss": 0.7791, "step": 121 }, { "epoch": 0.23, "grad_norm": 4.253856182098389, "learning_rate": 5.995794617461998e-05, "loss": 0.5913, "step": 122 }, { "epoch": 0.23, "grad_norm": 3.9796884059906006, "learning_rate": 5.995596792283283e-05, "loss": 0.6376, "step": 123 }, { "epoch": 0.24, "grad_norm": 3.6007590293884277, "learning_rate": 5.9953944239016966e-05, "loss": 0.5743, "step": 124 }, { "epoch": 0.24, "grad_norm": 4.195765495300293, "learning_rate": 5.9951875126241556e-05, "loss": 0.6949, "step": 125 }, { "epoch": 0.24, "grad_norm": 4.4413275718688965, "learning_rate": 5.994976058764468e-05, "loss": 0.6985, "step": 126 }, { "epoch": 0.24, "grad_norm": 4.659218788146973, "learning_rate": 5.994760062643331e-05, "loss": 0.7086, "step": 127 }, { "epoch": 0.24, "grad_norm": 4.4821319580078125, "learning_rate": 5.994539524588329e-05, "loss": 0.6839, "step": 128 }, { "epoch": 0.25, "grad_norm": 3.9956719875335693, "learning_rate": 5.9943144449339356e-05, "loss": 0.6439, "step": 129 }, { "epoch": 0.25, "grad_norm": 4.131137371063232, "learning_rate": 5.994084824021515e-05, "loss": 0.7482, "step": 130 }, { "epoch": 0.25, "grad_norm": 3.4933621883392334, "learning_rate": 5.993850662199314e-05, "loss": 0.5903, "step": 131 }, { "epoch": 0.25, "grad_norm": 4.5322585105896, "learning_rate": 5.9936119598224706e-05, "loss": 0.6306, "step": 132 }, { "epoch": 0.25, "grad_norm": 3.0102627277374268, "learning_rate": 5.993368717253006e-05, "loss": 0.4891, "step": 133 }, { "epoch": 0.25, "grad_norm": 4.039618968963623, "learning_rate": 5.993120934859828e-05, "loss": 0.5104, "step": 134 }, { "epoch": 0.26, "grad_norm": 3.7283830642700195, "learning_rate": 5.992868613018731e-05, "loss": 0.5507, "step": 135 }, { "epoch": 0.26, "grad_norm": 3.314037561416626, "learning_rate": 5.992611752112394e-05, "loss": 0.4333, "step": 136 }, { "epoch": 0.26, "grad_norm": 3.8569116592407227, "learning_rate": 5.992350352530377e-05, "loss": 0.6142, "step": 137 }, { "epoch": 0.26, "grad_norm": 3.989668607711792, "learning_rate": 5.992084414669127e-05, "loss": 0.4735, "step": 138 }, { "epoch": 0.26, "grad_norm": 3.7991912364959717, "learning_rate": 5.9918139389319726e-05, "loss": 0.5126, "step": 139 }, { "epoch": 0.27, "grad_norm": 3.448589324951172, "learning_rate": 5.991538925729125e-05, "loss": 0.4504, "step": 140 }, { "epoch": 0.27, "grad_norm": 3.7634055614471436, "learning_rate": 5.991259375477674e-05, "loss": 0.5294, "step": 141 }, { "epoch": 0.27, "grad_norm": 3.474403142929077, "learning_rate": 5.990975288601597e-05, "loss": 0.4134, "step": 142 }, { "epoch": 0.27, "grad_norm": 3.542149066925049, "learning_rate": 5.990686665531745e-05, "loss": 0.6313, "step": 143 }, { "epoch": 0.27, "grad_norm": 7.515610694885254, "learning_rate": 5.9903935067058524e-05, "loss": 0.5194, "step": 144 }, { "epoch": 0.28, "grad_norm": 3.6602540016174316, "learning_rate": 5.990095812568532e-05, "loss": 0.5121, "step": 145 }, { "epoch": 0.28, "grad_norm": 3.622753858566284, "learning_rate": 5.989793583571274e-05, "loss": 0.5868, "step": 146 }, { "epoch": 0.28, "grad_norm": 3.281049966812134, "learning_rate": 5.9894868201724475e-05, "loss": 0.4085, "step": 147 }, { "epoch": 0.28, "grad_norm": 3.3639843463897705, "learning_rate": 5.989175522837298e-05, "loss": 0.4384, "step": 148 }, { "epoch": 0.28, "grad_norm": 3.805417537689209, "learning_rate": 5.9888596920379485e-05, "loss": 0.507, "step": 149 }, { "epoch": 0.29, "grad_norm": 3.676046848297119, "learning_rate": 5.988539328253395e-05, "loss": 0.5969, "step": 150 }, { "epoch": 0.29, "grad_norm": 3.669481039047241, "learning_rate": 5.9882144319695104e-05, "loss": 0.5173, "step": 151 }, { "epoch": 0.29, "grad_norm": 4.32971715927124, "learning_rate": 5.987885003679042e-05, "loss": 0.5519, "step": 152 }, { "epoch": 0.29, "grad_norm": 3.2745518684387207, "learning_rate": 5.987551043881608e-05, "loss": 0.4358, "step": 153 }, { "epoch": 0.29, "grad_norm": 3.2405261993408203, "learning_rate": 5.987212553083702e-05, "loss": 0.4208, "step": 154 }, { "epoch": 0.29, "grad_norm": 3.2085394859313965, "learning_rate": 5.986869531798688e-05, "loss": 0.5158, "step": 155 }, { "epoch": 0.3, "grad_norm": 3.2327260971069336, "learning_rate": 5.986521980546801e-05, "loss": 0.4864, "step": 156 }, { "epoch": 0.3, "grad_norm": 2.937777280807495, "learning_rate": 5.986169899855147e-05, "loss": 0.3904, "step": 157 }, { "epoch": 0.3, "grad_norm": 3.773653268814087, "learning_rate": 5.9858132902577014e-05, "loss": 0.5072, "step": 158 }, { "epoch": 0.3, "grad_norm": 3.7459592819213867, "learning_rate": 5.9854521522953065e-05, "loss": 0.6356, "step": 159 }, { "epoch": 0.3, "grad_norm": 3.1582250595092773, "learning_rate": 5.9850864865156764e-05, "loss": 0.4183, "step": 160 }, { "epoch": 0.31, "grad_norm": 2.863201141357422, "learning_rate": 5.984716293473387e-05, "loss": 0.3847, "step": 161 }, { "epoch": 0.31, "grad_norm": 3.0690877437591553, "learning_rate": 5.9843415737298844e-05, "loss": 0.4273, "step": 162 }, { "epoch": 0.31, "grad_norm": 3.2450554370880127, "learning_rate": 5.983962327853479e-05, "loss": 0.412, "step": 163 }, { "epoch": 0.31, "grad_norm": 3.047470808029175, "learning_rate": 5.983578556419344e-05, "loss": 0.4135, "step": 164 }, { "epoch": 0.31, "grad_norm": 3.358280897140503, "learning_rate": 5.983190260009519e-05, "loss": 0.5194, "step": 165 }, { "epoch": 0.32, "grad_norm": 3.7504189014434814, "learning_rate": 5.982797439212904e-05, "loss": 0.544, "step": 166 }, { "epoch": 0.32, "grad_norm": 3.3471579551696777, "learning_rate": 5.982400094625261e-05, "loss": 0.4946, "step": 167 }, { "epoch": 0.32, "grad_norm": 3.6451363563537598, "learning_rate": 5.981998226849215e-05, "loss": 0.4891, "step": 168 }, { "epoch": 0.32, "grad_norm": 2.7766494750976562, "learning_rate": 5.981591836494248e-05, "loss": 0.4231, "step": 169 }, { "epoch": 0.32, "grad_norm": 2.7406065464019775, "learning_rate": 5.981180924176704e-05, "loss": 0.4199, "step": 170 }, { "epoch": 0.33, "grad_norm": 3.185415029525757, "learning_rate": 5.9807654905197826e-05, "loss": 0.4184, "step": 171 }, { "epoch": 0.33, "grad_norm": 3.0489799976348877, "learning_rate": 5.980345536153543e-05, "loss": 0.3797, "step": 172 }, { "epoch": 0.33, "grad_norm": 2.854858636856079, "learning_rate": 5.979921061714898e-05, "loss": 0.4255, "step": 173 }, { "epoch": 0.33, "grad_norm": 3.6051957607269287, "learning_rate": 5.979492067847616e-05, "loss": 0.5252, "step": 174 }, { "epoch": 0.33, "grad_norm": 3.1513173580169678, "learning_rate": 5.979058555202324e-05, "loss": 0.3913, "step": 175 }, { "epoch": 0.33, "grad_norm": 3.3339765071868896, "learning_rate": 5.9786205244364954e-05, "loss": 0.5661, "step": 176 }, { "epoch": 0.34, "grad_norm": 2.8010852336883545, "learning_rate": 5.978177976214462e-05, "loss": 0.3712, "step": 177 }, { "epoch": 0.34, "grad_norm": 3.020876884460449, "learning_rate": 5.977730911207404e-05, "loss": 0.4717, "step": 178 }, { "epoch": 0.34, "grad_norm": 3.3257739543914795, "learning_rate": 5.9772793300933505e-05, "loss": 0.5192, "step": 179 }, { "epoch": 0.34, "grad_norm": 3.1674604415893555, "learning_rate": 5.976823233557184e-05, "loss": 0.4258, "step": 180 }, { "epoch": 0.34, "grad_norm": 3.238715648651123, "learning_rate": 5.9763626222906304e-05, "loss": 0.5655, "step": 181 }, { "epoch": 0.35, "grad_norm": 2.7789483070373535, "learning_rate": 5.975897496992267e-05, "loss": 0.4999, "step": 182 }, { "epoch": 0.35, "grad_norm": 2.551887273788452, "learning_rate": 5.975427858367514e-05, "loss": 0.4167, "step": 183 }, { "epoch": 0.35, "grad_norm": 2.5836236476898193, "learning_rate": 5.97495370712864e-05, "loss": 0.3751, "step": 184 }, { "epoch": 0.35, "grad_norm": 2.7166545391082764, "learning_rate": 5.974475043994753e-05, "loss": 0.3539, "step": 185 }, { "epoch": 0.35, "grad_norm": 2.656646966934204, "learning_rate": 5.973991869691809e-05, "loss": 0.3618, "step": 186 }, { "epoch": 0.36, "grad_norm": 2.761584997177124, "learning_rate": 5.973504184952602e-05, "loss": 0.4565, "step": 187 }, { "epoch": 0.36, "grad_norm": 2.6381356716156006, "learning_rate": 5.973011990516767e-05, "loss": 0.3768, "step": 188 }, { "epoch": 0.36, "grad_norm": 3.151721239089966, "learning_rate": 5.97251528713078e-05, "loss": 0.4709, "step": 189 }, { "epoch": 0.36, "grad_norm": 3.438836097717285, "learning_rate": 5.972014075547954e-05, "loss": 0.4529, "step": 190 }, { "epoch": 0.36, "grad_norm": 3.455315113067627, "learning_rate": 5.9715083565284426e-05, "loss": 0.3863, "step": 191 }, { "epoch": 0.37, "grad_norm": 3.0959794521331787, "learning_rate": 5.97099813083923e-05, "loss": 0.3882, "step": 192 }, { "epoch": 0.37, "grad_norm": 2.9149835109710693, "learning_rate": 5.970483399254139e-05, "loss": 0.435, "step": 193 }, { "epoch": 0.37, "grad_norm": 2.6301016807556152, "learning_rate": 5.9699641625538276e-05, "loss": 0.3061, "step": 194 }, { "epoch": 0.37, "grad_norm": 2.6692023277282715, "learning_rate": 5.9694404215257804e-05, "loss": 0.2991, "step": 195 }, { "epoch": 0.37, "grad_norm": 2.9471986293792725, "learning_rate": 5.96891217696432e-05, "loss": 0.315, "step": 196 }, { "epoch": 0.37, "grad_norm": 2.8195290565490723, "learning_rate": 5.968379429670596e-05, "loss": 0.4036, "step": 197 }, { "epoch": 0.38, "grad_norm": 2.799516439437866, "learning_rate": 5.967842180452585e-05, "loss": 0.4454, "step": 198 }, { "epoch": 0.38, "grad_norm": 2.6410439014434814, "learning_rate": 5.9673004301250975e-05, "loss": 0.344, "step": 199 }, { "epoch": 0.38, "grad_norm": 2.590595245361328, "learning_rate": 5.9667541795097633e-05, "loss": 0.3271, "step": 200 }, { "epoch": 0.38, "eval_blimp_filtered_avg": 0.731044776119403, "eval_blimp_filtered_std": 0.004917623386579932, "step": 200 }, { "epoch": 0.38, "eval_blimp_supplement_avg": 0.7995689655172413, "eval_blimp_supplement_std": 0.01689887816511576, "step": 200 }, { "epoch": 0.38, "eval_vqa_filtered_avg": 0.46, "eval_vqa_filtered_std": 0.05009082659620333, "step": 200 }, { "epoch": 0.38, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 200 }, { "epoch": 0.38, "grad_norm": 3.9680376052856445, "learning_rate": 5.9662034294350426e-05, "loss": 0.3957, "step": 201 }, { "epoch": 0.38, "grad_norm": 2.454033136367798, "learning_rate": 5.965648180736217e-05, "loss": 0.2839, "step": 202 }, { "epoch": 0.39, "grad_norm": 3.035494089126587, "learning_rate": 5.965088434255392e-05, "loss": 0.3998, "step": 203 }, { "epoch": 0.39, "grad_norm": 3.030374050140381, "learning_rate": 5.964524190841494e-05, "loss": 0.4197, "step": 204 }, { "epoch": 0.39, "grad_norm": 3.925255537033081, "learning_rate": 5.96395545135027e-05, "loss": 0.5301, "step": 205 }, { "epoch": 0.39, "grad_norm": 2.6362340450286865, "learning_rate": 5.963382216644285e-05, "loss": 0.3563, "step": 206 }, { "epoch": 0.39, "grad_norm": 2.416459798812866, "learning_rate": 5.962804487592924e-05, "loss": 0.2899, "step": 207 }, { "epoch": 0.4, "grad_norm": 2.7058846950531006, "learning_rate": 5.9622222650723846e-05, "loss": 0.3185, "step": 208 }, { "epoch": 0.4, "grad_norm": 2.7494921684265137, "learning_rate": 5.9616355499656825e-05, "loss": 0.3873, "step": 209 }, { "epoch": 0.4, "grad_norm": 2.6906375885009766, "learning_rate": 5.961044343162644e-05, "loss": 0.3798, "step": 210 }, { "epoch": 0.4, "grad_norm": 2.9552249908447266, "learning_rate": 5.960448645559912e-05, "loss": 0.4164, "step": 211 }, { "epoch": 0.4, "grad_norm": 2.3196799755096436, "learning_rate": 5.959848458060934e-05, "loss": 0.3611, "step": 212 }, { "epoch": 0.4, "grad_norm": 2.686737537384033, "learning_rate": 5.959243781575972e-05, "loss": 0.3239, "step": 213 }, { "epoch": 0.41, "grad_norm": 2.414632797241211, "learning_rate": 5.958634617022096e-05, "loss": 0.3565, "step": 214 }, { "epoch": 0.41, "grad_norm": 2.5314457416534424, "learning_rate": 5.958020965323181e-05, "loss": 0.3827, "step": 215 }, { "epoch": 0.41, "grad_norm": 3.778815746307373, "learning_rate": 5.9574028274099054e-05, "loss": 0.3652, "step": 216 }, { "epoch": 0.41, "grad_norm": 2.678086042404175, "learning_rate": 5.956780204219757e-05, "loss": 0.3256, "step": 217 }, { "epoch": 0.41, "grad_norm": 2.4087207317352295, "learning_rate": 5.956153096697021e-05, "loss": 0.3424, "step": 218 }, { "epoch": 0.42, "grad_norm": 2.48030948638916, "learning_rate": 5.9555215057927854e-05, "loss": 0.3063, "step": 219 }, { "epoch": 0.42, "grad_norm": 2.2783730030059814, "learning_rate": 5.9548854324649395e-05, "loss": 0.2769, "step": 220 }, { "epoch": 0.42, "grad_norm": 2.516817808151245, "learning_rate": 5.954244877678168e-05, "loss": 0.278, "step": 221 }, { "epoch": 0.42, "grad_norm": 3.013007640838623, "learning_rate": 5.953599842403954e-05, "loss": 0.3598, "step": 222 }, { "epoch": 0.42, "grad_norm": 2.973921775817871, "learning_rate": 5.952950327620576e-05, "loss": 0.3996, "step": 223 }, { "epoch": 0.43, "grad_norm": 2.809828281402588, "learning_rate": 5.9522963343131046e-05, "loss": 0.369, "step": 224 }, { "epoch": 0.43, "grad_norm": 2.286276340484619, "learning_rate": 5.9516378634734044e-05, "loss": 0.2596, "step": 225 }, { "epoch": 0.43, "grad_norm": 2.8966543674468994, "learning_rate": 5.950974916100128e-05, "loss": 0.3823, "step": 226 }, { "epoch": 0.43, "grad_norm": 2.701875686645508, "learning_rate": 5.9503074931987226e-05, "loss": 0.2708, "step": 227 }, { "epoch": 0.43, "grad_norm": 3.137319326400757, "learning_rate": 5.949635595781418e-05, "loss": 0.3664, "step": 228 }, { "epoch": 0.44, "grad_norm": 2.6834545135498047, "learning_rate": 5.948959224867232e-05, "loss": 0.3149, "step": 229 }, { "epoch": 0.44, "grad_norm": 3.0918145179748535, "learning_rate": 5.948278381481967e-05, "loss": 0.399, "step": 230 }, { "epoch": 0.44, "grad_norm": 2.5498368740081787, "learning_rate": 5.947593066658208e-05, "loss": 0.2744, "step": 231 }, { "epoch": 0.44, "grad_norm": 2.92252779006958, "learning_rate": 5.946903281435323e-05, "loss": 0.3307, "step": 232 }, { "epoch": 0.44, "grad_norm": 2.336634635925293, "learning_rate": 5.946209026859458e-05, "loss": 0.3582, "step": 233 }, { "epoch": 0.44, "grad_norm": 2.3818435668945312, "learning_rate": 5.9455103039835384e-05, "loss": 0.2562, "step": 234 }, { "epoch": 0.45, "grad_norm": 2.566678524017334, "learning_rate": 5.944807113867266e-05, "loss": 0.3115, "step": 235 }, { "epoch": 0.45, "grad_norm": 2.4223618507385254, "learning_rate": 5.944099457577119e-05, "loss": 0.2975, "step": 236 }, { "epoch": 0.45, "grad_norm": 3.084547519683838, "learning_rate": 5.943387336186346e-05, "loss": 0.3545, "step": 237 }, { "epoch": 0.45, "grad_norm": 2.2014191150665283, "learning_rate": 5.9426707507749716e-05, "loss": 0.313, "step": 238 }, { "epoch": 0.45, "grad_norm": 2.550644874572754, "learning_rate": 5.941949702429787e-05, "loss": 0.2996, "step": 239 }, { "epoch": 0.46, "grad_norm": 2.4011244773864746, "learning_rate": 5.9412241922443544e-05, "loss": 0.2734, "step": 240 }, { "epoch": 0.46, "grad_norm": 3.4184110164642334, "learning_rate": 5.940494221319002e-05, "loss": 0.4222, "step": 241 }, { "epoch": 0.46, "grad_norm": 2.4557957649230957, "learning_rate": 5.939759790760823e-05, "loss": 0.3637, "step": 242 }, { "epoch": 0.46, "grad_norm": 3.1393237113952637, "learning_rate": 5.939020901683676e-05, "loss": 0.4706, "step": 243 }, { "epoch": 0.46, "grad_norm": 3.145115375518799, "learning_rate": 5.938277555208178e-05, "loss": 0.2879, "step": 244 }, { "epoch": 0.47, "grad_norm": 2.6259799003601074, "learning_rate": 5.9375297524617085e-05, "loss": 0.2713, "step": 245 }, { "epoch": 0.47, "grad_norm": 2.680312395095825, "learning_rate": 5.936777494578407e-05, "loss": 0.2962, "step": 246 }, { "epoch": 0.47, "grad_norm": 2.5393292903900146, "learning_rate": 5.9360207826991674e-05, "loss": 0.3186, "step": 247 }, { "epoch": 0.47, "grad_norm": 2.968337059020996, "learning_rate": 5.935259617971639e-05, "loss": 0.4779, "step": 248 }, { "epoch": 0.47, "grad_norm": 2.607654094696045, "learning_rate": 5.9344940015502246e-05, "loss": 0.3812, "step": 249 }, { "epoch": 0.48, "grad_norm": 2.696956157684326, "learning_rate": 5.9337239345960794e-05, "loss": 0.3269, "step": 250 }, { "epoch": 0.48, "grad_norm": 2.592890501022339, "learning_rate": 5.932949418277107e-05, "loss": 0.2842, "step": 251 }, { "epoch": 0.48, "grad_norm": 3.0158824920654297, "learning_rate": 5.93217045376796e-05, "loss": 0.3226, "step": 252 }, { "epoch": 0.48, "grad_norm": 2.5884647369384766, "learning_rate": 5.9313870422500365e-05, "loss": 0.3052, "step": 253 }, { "epoch": 0.48, "grad_norm": 3.3384313583374023, "learning_rate": 5.9305991849114805e-05, "loss": 0.4197, "step": 254 }, { "epoch": 0.48, "grad_norm": 2.298654079437256, "learning_rate": 5.929806882947177e-05, "loss": 0.3389, "step": 255 }, { "epoch": 0.49, "grad_norm": 2.5178072452545166, "learning_rate": 5.929010137558754e-05, "loss": 0.336, "step": 256 }, { "epoch": 0.49, "grad_norm": 2.6319165229797363, "learning_rate": 5.928208949954574e-05, "loss": 0.3262, "step": 257 }, { "epoch": 0.49, "grad_norm": 2.577749490737915, "learning_rate": 5.927403321349742e-05, "loss": 0.3105, "step": 258 }, { "epoch": 0.49, "grad_norm": 2.849790334701538, "learning_rate": 5.926593252966096e-05, "loss": 0.347, "step": 259 }, { "epoch": 0.49, "grad_norm": 2.79257869720459, "learning_rate": 5.9257787460322066e-05, "loss": 0.3554, "step": 260 }, { "epoch": 0.5, "grad_norm": 2.3352463245391846, "learning_rate": 5.924959801783378e-05, "loss": 0.2683, "step": 261 }, { "epoch": 0.5, "grad_norm": 2.469876527786255, "learning_rate": 5.9241364214616424e-05, "loss": 0.3163, "step": 262 }, { "epoch": 0.5, "grad_norm": 2.2705085277557373, "learning_rate": 5.923308606315761e-05, "loss": 0.3234, "step": 263 }, { "epoch": 0.5, "grad_norm": 2.3504743576049805, "learning_rate": 5.92247635760122e-05, "loss": 0.3031, "step": 264 }, { "epoch": 0.5, "grad_norm": 2.54025936126709, "learning_rate": 5.921639676580231e-05, "loss": 0.3004, "step": 265 }, { "epoch": 0.51, "grad_norm": 2.2669663429260254, "learning_rate": 5.920798564521727e-05, "loss": 0.2681, "step": 266 }, { "epoch": 0.51, "grad_norm": 2.4458699226379395, "learning_rate": 5.919953022701361e-05, "loss": 0.3563, "step": 267 }, { "epoch": 0.51, "grad_norm": 2.774376392364502, "learning_rate": 5.919103052401503e-05, "loss": 0.3908, "step": 268 }, { "epoch": 0.51, "grad_norm": 2.2772321701049805, "learning_rate": 5.918248654911243e-05, "loss": 0.2056, "step": 269 }, { "epoch": 0.51, "grad_norm": 2.2852046489715576, "learning_rate": 5.917389831526383e-05, "loss": 0.3119, "step": 270 }, { "epoch": 0.52, "grad_norm": 2.3815088272094727, "learning_rate": 5.916526583549437e-05, "loss": 0.2836, "step": 271 }, { "epoch": 0.52, "grad_norm": 2.580552577972412, "learning_rate": 5.9156589122896294e-05, "loss": 0.302, "step": 272 }, { "epoch": 0.52, "grad_norm": 2.09328293800354, "learning_rate": 5.9147868190628955e-05, "loss": 0.2621, "step": 273 }, { "epoch": 0.52, "grad_norm": 2.5290675163269043, "learning_rate": 5.9139103051918755e-05, "loss": 0.3156, "step": 274 }, { "epoch": 0.52, "grad_norm": 2.701312780380249, "learning_rate": 5.913029372005913e-05, "loss": 0.2819, "step": 275 }, { "epoch": 0.52, "grad_norm": 3.000481367111206, "learning_rate": 5.912144020841055e-05, "loss": 0.3533, "step": 276 }, { "epoch": 0.53, "grad_norm": 3.1176326274871826, "learning_rate": 5.9112542530400493e-05, "loss": 0.3465, "step": 277 }, { "epoch": 0.53, "grad_norm": 2.0078957080841064, "learning_rate": 5.910360069952341e-05, "loss": 0.2698, "step": 278 }, { "epoch": 0.53, "grad_norm": 2.5833702087402344, "learning_rate": 5.9094614729340735e-05, "loss": 0.2669, "step": 279 }, { "epoch": 0.53, "grad_norm": 2.3653202056884766, "learning_rate": 5.908558463348082e-05, "loss": 0.328, "step": 280 }, { "epoch": 0.53, "grad_norm": 2.568941831588745, "learning_rate": 5.9076510425638965e-05, "loss": 0.331, "step": 281 }, { "epoch": 0.54, "grad_norm": 2.5518531799316406, "learning_rate": 5.906739211957734e-05, "loss": 0.3192, "step": 282 }, { "epoch": 0.54, "grad_norm": 2.541548490524292, "learning_rate": 5.905822972912505e-05, "loss": 0.2225, "step": 283 }, { "epoch": 0.54, "grad_norm": 2.839200258255005, "learning_rate": 5.9049023268177976e-05, "loss": 0.3639, "step": 284 }, { "epoch": 0.54, "grad_norm": 2.433194637298584, "learning_rate": 5.903977275069892e-05, "loss": 0.3161, "step": 285 }, { "epoch": 0.54, "grad_norm": 2.416057586669922, "learning_rate": 5.9030478190717454e-05, "loss": 0.2833, "step": 286 }, { "epoch": 0.55, "grad_norm": 2.2822322845458984, "learning_rate": 5.902113960232996e-05, "loss": 0.2692, "step": 287 }, { "epoch": 0.55, "grad_norm": 2.958235740661621, "learning_rate": 5.901175699969959e-05, "loss": 0.28, "step": 288 }, { "epoch": 0.55, "grad_norm": 2.285823345184326, "learning_rate": 5.900233039705625e-05, "loss": 0.2803, "step": 289 }, { "epoch": 0.55, "grad_norm": 2.873676300048828, "learning_rate": 5.899285980869658e-05, "loss": 0.3863, "step": 290 }, { "epoch": 0.55, "grad_norm": 2.751312732696533, "learning_rate": 5.8983345248983944e-05, "loss": 0.2085, "step": 291 }, { "epoch": 0.56, "grad_norm": 2.911459445953369, "learning_rate": 5.897378673234836e-05, "loss": 0.2624, "step": 292 }, { "epoch": 0.56, "grad_norm": 2.68094539642334, "learning_rate": 5.8964184273286544e-05, "loss": 0.3321, "step": 293 }, { "epoch": 0.56, "grad_norm": 3.0350465774536133, "learning_rate": 5.8954537886361846e-05, "loss": 0.4113, "step": 294 }, { "epoch": 0.56, "grad_norm": 2.3299427032470703, "learning_rate": 5.894484758620423e-05, "loss": 0.2482, "step": 295 }, { "epoch": 0.56, "grad_norm": 2.6114351749420166, "learning_rate": 5.893511338751027e-05, "loss": 0.2951, "step": 296 }, { "epoch": 0.56, "grad_norm": 2.0772764682769775, "learning_rate": 5.8925335305043106e-05, "loss": 0.2783, "step": 297 }, { "epoch": 0.57, "grad_norm": 2.269282341003418, "learning_rate": 5.891551335363245e-05, "loss": 0.2875, "step": 298 }, { "epoch": 0.57, "grad_norm": 1.8769372701644897, "learning_rate": 5.890564754817454e-05, "loss": 0.1977, "step": 299 }, { "epoch": 0.57, "grad_norm": 2.716355323791504, "learning_rate": 5.8895737903632116e-05, "loss": 0.3503, "step": 300 }, { "epoch": 0.57, "eval_blimp_filtered_avg": 0.7346268656716418, "eval_blimp_filtered_std": 0.004871651250570951, "step": 300 }, { "epoch": 0.57, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017417364352440096, "step": 300 }, { "epoch": 0.57, "eval_vqa_filtered_avg": 0.39, "eval_vqa_filtered_std": 0.04902071300001975, "step": 300 }, { "epoch": 0.57, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 300 }, { "epoch": 0.57, "grad_norm": 2.6866073608398438, "learning_rate": 5.888578443503442e-05, "loss": 0.3214, "step": 301 }, { "epoch": 0.57, "grad_norm": 3.0890543460845947, "learning_rate": 5.887578715747714e-05, "loss": 0.4515, "step": 302 }, { "epoch": 0.58, "grad_norm": 2.4582107067108154, "learning_rate": 5.886574608612242e-05, "loss": 0.314, "step": 303 }, { "epoch": 0.58, "grad_norm": 2.6271920204162598, "learning_rate": 5.885566123619884e-05, "loss": 0.2973, "step": 304 }, { "epoch": 0.58, "grad_norm": 2.8287744522094727, "learning_rate": 5.884553262300132e-05, "loss": 0.326, "step": 305 }, { "epoch": 0.58, "grad_norm": 2.321165084838867, "learning_rate": 5.8835360261891216e-05, "loss": 0.2887, "step": 306 }, { "epoch": 0.58, "grad_norm": 2.334892749786377, "learning_rate": 5.8825144168296184e-05, "loss": 0.2835, "step": 307 }, { "epoch": 0.59, "grad_norm": 2.216693878173828, "learning_rate": 5.881488435771025e-05, "loss": 0.2612, "step": 308 }, { "epoch": 0.59, "grad_norm": 2.3379125595092773, "learning_rate": 5.88045808456937e-05, "loss": 0.2896, "step": 309 }, { "epoch": 0.59, "grad_norm": 2.648791551589966, "learning_rate": 5.879423364787313e-05, "loss": 0.3704, "step": 310 }, { "epoch": 0.59, "grad_norm": 2.3418290615081787, "learning_rate": 5.8783842779941364e-05, "loss": 0.2317, "step": 311 }, { "epoch": 0.59, "grad_norm": 2.2646195888519287, "learning_rate": 5.877340825765749e-05, "loss": 0.2465, "step": 312 }, { "epoch": 0.6, "grad_norm": 2.2615280151367188, "learning_rate": 5.876293009684679e-05, "loss": 0.2822, "step": 313 }, { "epoch": 0.6, "grad_norm": 2.1160836219787598, "learning_rate": 5.87524083134007e-05, "loss": 0.229, "step": 314 }, { "epoch": 0.6, "grad_norm": 2.533101797103882, "learning_rate": 5.874184292327686e-05, "loss": 0.2193, "step": 315 }, { "epoch": 0.6, "grad_norm": 2.3035876750946045, "learning_rate": 5.873123394249902e-05, "loss": 0.2269, "step": 316 }, { "epoch": 0.6, "grad_norm": 1.9207470417022705, "learning_rate": 5.872058138715704e-05, "loss": 0.2005, "step": 317 }, { "epoch": 0.6, "grad_norm": 2.1250712871551514, "learning_rate": 5.870988527340689e-05, "loss": 0.2169, "step": 318 }, { "epoch": 0.61, "grad_norm": 3.5033559799194336, "learning_rate": 5.8699145617470564e-05, "loss": 0.2705, "step": 319 }, { "epoch": 0.61, "grad_norm": 3.0903007984161377, "learning_rate": 5.868836243563613e-05, "loss": 0.4404, "step": 320 }, { "epoch": 0.61, "grad_norm": 2.3742923736572266, "learning_rate": 5.8677535744257647e-05, "loss": 0.3174, "step": 321 }, { "epoch": 0.61, "grad_norm": 2.314197301864624, "learning_rate": 5.866666555975517e-05, "loss": 0.1987, "step": 322 }, { "epoch": 0.61, "grad_norm": 2.790220022201538, "learning_rate": 5.86557518986147e-05, "loss": 0.3305, "step": 323 }, { "epoch": 0.62, "grad_norm": 3.4918172359466553, "learning_rate": 5.86447947773882e-05, "loss": 0.1878, "step": 324 }, { "epoch": 0.62, "grad_norm": 2.381162643432617, "learning_rate": 5.863379421269354e-05, "loss": 0.3021, "step": 325 }, { "epoch": 0.62, "grad_norm": 2.250220775604248, "learning_rate": 5.8622750221214466e-05, "loss": 0.2487, "step": 326 }, { "epoch": 0.62, "grad_norm": 2.025141477584839, "learning_rate": 5.86116628197006e-05, "loss": 0.2204, "step": 327 }, { "epoch": 0.62, "grad_norm": 2.0021190643310547, "learning_rate": 5.8600532024967384e-05, "loss": 0.2459, "step": 328 }, { "epoch": 0.63, "grad_norm": 2.1671030521392822, "learning_rate": 5.858935785389609e-05, "loss": 0.2312, "step": 329 }, { "epoch": 0.63, "grad_norm": 2.122903823852539, "learning_rate": 5.857814032343376e-05, "loss": 0.181, "step": 330 }, { "epoch": 0.63, "grad_norm": 2.5282070636749268, "learning_rate": 5.8566879450593216e-05, "loss": 0.2506, "step": 331 }, { "epoch": 0.63, "grad_norm": 3.409966230392456, "learning_rate": 5.855557525245298e-05, "loss": 0.3036, "step": 332 }, { "epoch": 0.63, "grad_norm": 2.314934253692627, "learning_rate": 5.8544227746157335e-05, "loss": 0.2869, "step": 333 }, { "epoch": 0.63, "grad_norm": 2.412508487701416, "learning_rate": 5.8532836948916186e-05, "loss": 0.2452, "step": 334 }, { "epoch": 0.64, "grad_norm": 2.045656681060791, "learning_rate": 5.852140287800513e-05, "loss": 0.1905, "step": 335 }, { "epoch": 0.64, "grad_norm": 2.349597215652466, "learning_rate": 5.85099255507654e-05, "loss": 0.2676, "step": 336 }, { "epoch": 0.64, "grad_norm": 2.7947564125061035, "learning_rate": 5.84984049846038e-05, "loss": 0.3452, "step": 337 }, { "epoch": 0.64, "grad_norm": 3.7544479370117188, "learning_rate": 5.848684119699275e-05, "loss": 0.2349, "step": 338 }, { "epoch": 0.64, "grad_norm": 2.1787116527557373, "learning_rate": 5.8475234205470195e-05, "loss": 0.2369, "step": 339 }, { "epoch": 0.65, "grad_norm": 2.7091166973114014, "learning_rate": 5.846358402763962e-05, "loss": 0.3069, "step": 340 }, { "epoch": 0.65, "grad_norm": 2.310443162918091, "learning_rate": 5.845189068116997e-05, "loss": 0.2778, "step": 341 }, { "epoch": 0.65, "grad_norm": 2.2098917961120605, "learning_rate": 5.844015418379572e-05, "loss": 0.2092, "step": 342 }, { "epoch": 0.65, "grad_norm": 3.0821828842163086, "learning_rate": 5.842837455331674e-05, "loss": 0.2735, "step": 343 }, { "epoch": 0.65, "grad_norm": 2.3731069564819336, "learning_rate": 5.841655180759835e-05, "loss": 0.2468, "step": 344 }, { "epoch": 0.66, "grad_norm": 2.5292489528656006, "learning_rate": 5.8404685964571236e-05, "loss": 0.2496, "step": 345 }, { "epoch": 0.66, "grad_norm": 2.3637948036193848, "learning_rate": 5.8392777042231455e-05, "loss": 0.1987, "step": 346 }, { "epoch": 0.66, "grad_norm": 3.157881021499634, "learning_rate": 5.8380825058640406e-05, "loss": 0.2782, "step": 347 }, { "epoch": 0.66, "grad_norm": 2.382798910140991, "learning_rate": 5.8368830031924785e-05, "loss": 0.2476, "step": 348 }, { "epoch": 0.66, "grad_norm": 2.4701547622680664, "learning_rate": 5.835679198027657e-05, "loss": 0.2102, "step": 349 }, { "epoch": 0.67, "grad_norm": 3.2264156341552734, "learning_rate": 5.8344710921953e-05, "loss": 0.254, "step": 350 }, { "epoch": 0.67, "grad_norm": 1.9908560514450073, "learning_rate": 5.8332586875276535e-05, "loss": 0.1755, "step": 351 }, { "epoch": 0.67, "grad_norm": 1.982800006866455, "learning_rate": 5.832041985863483e-05, "loss": 0.2252, "step": 352 }, { "epoch": 0.67, "grad_norm": 2.2395284175872803, "learning_rate": 5.8308209890480706e-05, "loss": 0.188, "step": 353 }, { "epoch": 0.67, "grad_norm": 2.446495771408081, "learning_rate": 5.829595698933213e-05, "loss": 0.2262, "step": 354 }, { "epoch": 0.67, "grad_norm": 2.6848526000976562, "learning_rate": 5.828366117377218e-05, "loss": 0.25, "step": 355 }, { "epoch": 0.68, "grad_norm": 2.533198595046997, "learning_rate": 5.8271322462449034e-05, "loss": 0.2322, "step": 356 }, { "epoch": 0.68, "grad_norm": 2.207051992416382, "learning_rate": 5.825894087407591e-05, "loss": 0.1848, "step": 357 }, { "epoch": 0.68, "grad_norm": 2.5082716941833496, "learning_rate": 5.8246516427431056e-05, "loss": 0.2662, "step": 358 }, { "epoch": 0.68, "grad_norm": 2.294053554534912, "learning_rate": 5.823404914135771e-05, "loss": 0.2522, "step": 359 }, { "epoch": 0.68, "grad_norm": 2.2550570964813232, "learning_rate": 5.822153903476412e-05, "loss": 0.2295, "step": 360 }, { "epoch": 0.69, "grad_norm": 2.4013185501098633, "learning_rate": 5.820898612662343e-05, "loss": 0.2289, "step": 361 }, { "epoch": 0.69, "grad_norm": 2.5732388496398926, "learning_rate": 5.819639043597372e-05, "loss": 0.4076, "step": 362 }, { "epoch": 0.69, "grad_norm": 2.2926485538482666, "learning_rate": 5.818375198191798e-05, "loss": 0.1752, "step": 363 }, { "epoch": 0.69, "grad_norm": 2.1141748428344727, "learning_rate": 5.8171070783624e-05, "loss": 0.2587, "step": 364 }, { "epoch": 0.69, "grad_norm": 2.037566900253296, "learning_rate": 5.815834686032444e-05, "loss": 0.1962, "step": 365 }, { "epoch": 0.7, "grad_norm": 2.057671546936035, "learning_rate": 5.814558023131674e-05, "loss": 0.2595, "step": 366 }, { "epoch": 0.7, "grad_norm": 2.374681234359741, "learning_rate": 5.813277091596311e-05, "loss": 0.3135, "step": 367 }, { "epoch": 0.7, "grad_norm": 1.9933072328567505, "learning_rate": 5.811991893369053e-05, "loss": 0.2364, "step": 368 }, { "epoch": 0.7, "grad_norm": 2.073105573654175, "learning_rate": 5.810702430399063e-05, "loss": 0.2448, "step": 369 }, { "epoch": 0.7, "grad_norm": 1.941116213798523, "learning_rate": 5.8094087046419765e-05, "loss": 0.2559, "step": 370 }, { "epoch": 0.71, "grad_norm": 2.3984506130218506, "learning_rate": 5.808110718059894e-05, "loss": 0.2626, "step": 371 }, { "epoch": 0.71, "grad_norm": 2.0268592834472656, "learning_rate": 5.806808472621374e-05, "loss": 0.1982, "step": 372 }, { "epoch": 0.71, "grad_norm": 2.3408658504486084, "learning_rate": 5.8055019703014406e-05, "loss": 0.2352, "step": 373 }, { "epoch": 0.71, "grad_norm": 2.5285656452178955, "learning_rate": 5.804191213081569e-05, "loss": 0.2998, "step": 374 }, { "epoch": 0.71, "grad_norm": 2.6744384765625, "learning_rate": 5.8028762029496884e-05, "loss": 0.2263, "step": 375 }, { "epoch": 0.71, "grad_norm": 2.1365628242492676, "learning_rate": 5.8015569419001784e-05, "loss": 0.1837, "step": 376 }, { "epoch": 0.72, "grad_norm": 2.0920653343200684, "learning_rate": 5.800233431933867e-05, "loss": 0.1933, "step": 377 }, { "epoch": 0.72, "grad_norm": 2.260333299636841, "learning_rate": 5.7989056750580245e-05, "loss": 0.2166, "step": 378 }, { "epoch": 0.72, "grad_norm": 2.259202718734741, "learning_rate": 5.7975736732863626e-05, "loss": 0.2681, "step": 379 }, { "epoch": 0.72, "grad_norm": 1.9396440982818604, "learning_rate": 5.796237428639032e-05, "loss": 0.1705, "step": 380 }, { "epoch": 0.72, "grad_norm": 2.007056713104248, "learning_rate": 5.794896943142616e-05, "loss": 0.224, "step": 381 }, { "epoch": 0.73, "grad_norm": 2.566392183303833, "learning_rate": 5.793552218830132e-05, "loss": 0.2911, "step": 382 }, { "epoch": 0.73, "grad_norm": 2.247673273086548, "learning_rate": 5.792203257741025e-05, "loss": 0.2378, "step": 383 }, { "epoch": 0.73, "grad_norm": 1.989481806755066, "learning_rate": 5.790850061921166e-05, "loss": 0.208, "step": 384 }, { "epoch": 0.73, "grad_norm": 2.261098623275757, "learning_rate": 5.789492633422848e-05, "loss": 0.1941, "step": 385 }, { "epoch": 0.73, "grad_norm": 1.7599819898605347, "learning_rate": 5.788130974304783e-05, "loss": 0.1669, "step": 386 }, { "epoch": 0.74, "grad_norm": 1.8933569192886353, "learning_rate": 5.786765086632102e-05, "loss": 0.1861, "step": 387 }, { "epoch": 0.74, "grad_norm": 2.36226749420166, "learning_rate": 5.7853949724763436e-05, "loss": 0.3365, "step": 388 }, { "epoch": 0.74, "grad_norm": 2.2125847339630127, "learning_rate": 5.784020633915463e-05, "loss": 0.2517, "step": 389 }, { "epoch": 0.74, "grad_norm": 3.5447051525115967, "learning_rate": 5.782642073033818e-05, "loss": 0.4189, "step": 390 }, { "epoch": 0.74, "grad_norm": 2.0892772674560547, "learning_rate": 5.781259291922171e-05, "loss": 0.2581, "step": 391 }, { "epoch": 0.75, "grad_norm": 2.4254229068756104, "learning_rate": 5.779872292677686e-05, "loss": 0.2563, "step": 392 }, { "epoch": 0.75, "grad_norm": 1.8640512228012085, "learning_rate": 5.7784810774039216e-05, "loss": 0.2156, "step": 393 }, { "epoch": 0.75, "grad_norm": 2.004009485244751, "learning_rate": 5.777085648210835e-05, "loss": 0.2406, "step": 394 }, { "epoch": 0.75, "grad_norm": 1.9399547576904297, "learning_rate": 5.77568600721477e-05, "loss": 0.2319, "step": 395 }, { "epoch": 0.75, "grad_norm": 2.259922504425049, "learning_rate": 5.77428215653846e-05, "loss": 0.2175, "step": 396 }, { "epoch": 0.75, "grad_norm": 2.060833215713501, "learning_rate": 5.772874098311023e-05, "loss": 0.2372, "step": 397 }, { "epoch": 0.76, "grad_norm": 2.223633050918579, "learning_rate": 5.771461834667958e-05, "loss": 0.292, "step": 398 }, { "epoch": 0.76, "grad_norm": 2.289308547973633, "learning_rate": 5.770045367751142e-05, "loss": 0.2953, "step": 399 }, { "epoch": 0.76, "grad_norm": 2.1454524993896484, "learning_rate": 5.768624699708827e-05, "loss": 0.2373, "step": 400 }, { "epoch": 0.76, "eval_blimp_filtered_avg": 0.7302985074626865, "eval_blimp_filtered_std": 0.004918378323290225, "step": 400 }, { "epoch": 0.76, "eval_blimp_supplement_avg": 0.7672413793103449, "eval_blimp_supplement_std": 0.018094413033767805, "step": 400 }, { "epoch": 0.76, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.04852365870939099, "step": 400 }, { "epoch": 0.76, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 400 }, { "epoch": 0.76, "grad_norm": 2.074946403503418, "learning_rate": 5.767199832695637e-05, "loss": 0.1981, "step": 401 }, { "epoch": 0.76, "grad_norm": 2.2893378734588623, "learning_rate": 5.7657707688725635e-05, "loss": 0.287, "step": 402 }, { "epoch": 0.77, "grad_norm": 1.9099466800689697, "learning_rate": 5.7643375104069635e-05, "loss": 0.1461, "step": 403 }, { "epoch": 0.77, "grad_norm": 2.600417137145996, "learning_rate": 5.7629000594725536e-05, "loss": 0.2304, "step": 404 }, { "epoch": 0.77, "grad_norm": 2.0551822185516357, "learning_rate": 5.7614584182494135e-05, "loss": 0.2437, "step": 405 }, { "epoch": 0.77, "grad_norm": 2.7099804878234863, "learning_rate": 5.760012588923973e-05, "loss": 0.2522, "step": 406 }, { "epoch": 0.77, "grad_norm": 2.3622803688049316, "learning_rate": 5.7585625736890165e-05, "loss": 0.2735, "step": 407 }, { "epoch": 0.78, "grad_norm": 1.9058256149291992, "learning_rate": 5.757108374743677e-05, "loss": 0.1988, "step": 408 }, { "epoch": 0.78, "grad_norm": 2.392219066619873, "learning_rate": 5.75564999429343e-05, "loss": 0.2954, "step": 409 }, { "epoch": 0.78, "grad_norm": 4.494578838348389, "learning_rate": 5.754187434550096e-05, "loss": 0.2682, "step": 410 }, { "epoch": 0.78, "grad_norm": 2.50071382522583, "learning_rate": 5.752720697731833e-05, "loss": 0.2781, "step": 411 }, { "epoch": 0.78, "grad_norm": 2.297830581665039, "learning_rate": 5.7512497860631316e-05, "loss": 0.202, "step": 412 }, { "epoch": 0.79, "grad_norm": 1.7907462120056152, "learning_rate": 5.7497747017748185e-05, "loss": 0.2114, "step": 413 }, { "epoch": 0.79, "grad_norm": 1.9351602792739868, "learning_rate": 5.748295447104044e-05, "loss": 0.1877, "step": 414 }, { "epoch": 0.79, "grad_norm": 1.5212252140045166, "learning_rate": 5.746812024294288e-05, "loss": 0.166, "step": 415 }, { "epoch": 0.79, "grad_norm": 1.8124661445617676, "learning_rate": 5.745324435595348e-05, "loss": 0.1875, "step": 416 }, { "epoch": 0.79, "grad_norm": 3.0356814861297607, "learning_rate": 5.7438326832633414e-05, "loss": 0.2453, "step": 417 }, { "epoch": 0.79, "grad_norm": 1.746372103691101, "learning_rate": 5.7423367695607e-05, "loss": 0.151, "step": 418 }, { "epoch": 0.8, "grad_norm": 2.6006665229797363, "learning_rate": 5.740836696756167e-05, "loss": 0.314, "step": 419 }, { "epoch": 0.8, "grad_norm": 2.1404876708984375, "learning_rate": 5.7393324671247926e-05, "loss": 0.2113, "step": 420 }, { "epoch": 0.8, "grad_norm": 1.8521326780319214, "learning_rate": 5.737824082947933e-05, "loss": 0.1398, "step": 421 }, { "epoch": 0.8, "grad_norm": 1.5915924310684204, "learning_rate": 5.736311546513244e-05, "loss": 0.1429, "step": 422 }, { "epoch": 0.8, "grad_norm": 2.596851348876953, "learning_rate": 5.734794860114679e-05, "loss": 0.1919, "step": 423 }, { "epoch": 0.81, "grad_norm": 2.1375324726104736, "learning_rate": 5.7332740260524855e-05, "loss": 0.1893, "step": 424 }, { "epoch": 0.81, "grad_norm": 2.623591899871826, "learning_rate": 5.731749046633201e-05, "loss": 0.3138, "step": 425 }, { "epoch": 0.81, "grad_norm": 2.046555995941162, "learning_rate": 5.730219924169652e-05, "loss": 0.208, "step": 426 }, { "epoch": 0.81, "grad_norm": 2.938157081604004, "learning_rate": 5.728686660980945e-05, "loss": 0.2727, "step": 427 }, { "epoch": 0.81, "grad_norm": 2.5000252723693848, "learning_rate": 5.727149259392469e-05, "loss": 0.2259, "step": 428 }, { "epoch": 0.82, "grad_norm": 2.1363184452056885, "learning_rate": 5.725607721735889e-05, "loss": 0.2339, "step": 429 }, { "epoch": 0.82, "grad_norm": 2.2473514080047607, "learning_rate": 5.724062050349143e-05, "loss": 0.2253, "step": 430 }, { "epoch": 0.82, "grad_norm": 1.8311669826507568, "learning_rate": 5.722512247576436e-05, "loss": 0.2071, "step": 431 }, { "epoch": 0.82, "grad_norm": 2.601623058319092, "learning_rate": 5.720958315768243e-05, "loss": 0.2266, "step": 432 }, { "epoch": 0.82, "grad_norm": 1.8435903787612915, "learning_rate": 5.7194002572812983e-05, "loss": 0.217, "step": 433 }, { "epoch": 0.83, "grad_norm": 2.0863888263702393, "learning_rate": 5.717838074478593e-05, "loss": 0.2788, "step": 434 }, { "epoch": 0.83, "grad_norm": 1.8889342546463013, "learning_rate": 5.716271769729379e-05, "loss": 0.1996, "step": 435 }, { "epoch": 0.83, "grad_norm": 1.7615338563919067, "learning_rate": 5.714701345409155e-05, "loss": 0.1764, "step": 436 }, { "epoch": 0.83, "grad_norm": 1.7294743061065674, "learning_rate": 5.7131268038996684e-05, "loss": 0.1684, "step": 437 }, { "epoch": 0.83, "grad_norm": 2.4683783054351807, "learning_rate": 5.711548147588911e-05, "loss": 0.2815, "step": 438 }, { "epoch": 0.83, "grad_norm": 1.8670258522033691, "learning_rate": 5.709965378871117e-05, "loss": 0.1977, "step": 439 }, { "epoch": 0.84, "grad_norm": 2.521878719329834, "learning_rate": 5.7083785001467545e-05, "loss": 0.2593, "step": 440 }, { "epoch": 0.84, "grad_norm": 1.7472074031829834, "learning_rate": 5.706787513822528e-05, "loss": 0.1484, "step": 441 }, { "epoch": 0.84, "grad_norm": 1.760799765586853, "learning_rate": 5.7051924223113704e-05, "loss": 0.1585, "step": 442 }, { "epoch": 0.84, "grad_norm": 1.8064112663269043, "learning_rate": 5.703593228032439e-05, "loss": 0.2296, "step": 443 }, { "epoch": 0.84, "grad_norm": 2.113285541534424, "learning_rate": 5.701989933411116e-05, "loss": 0.187, "step": 444 }, { "epoch": 0.85, "grad_norm": 1.8230639696121216, "learning_rate": 5.700382540879e-05, "loss": 0.2004, "step": 445 }, { "epoch": 0.85, "grad_norm": 2.1724791526794434, "learning_rate": 5.698771052873908e-05, "loss": 0.3057, "step": 446 }, { "epoch": 0.85, "grad_norm": 2.459582805633545, "learning_rate": 5.6971554718398644e-05, "loss": 0.2817, "step": 447 }, { "epoch": 0.85, "grad_norm": 2.0014374256134033, "learning_rate": 5.6955358002271036e-05, "loss": 0.1791, "step": 448 }, { "epoch": 0.85, "grad_norm": 1.9328356981277466, "learning_rate": 5.693912040492063e-05, "loss": 0.2357, "step": 449 }, { "epoch": 0.86, "grad_norm": 2.579789876937866, "learning_rate": 5.692284195097381e-05, "loss": 0.2625, "step": 450 }, { "epoch": 0.86, "grad_norm": 1.713832139968872, "learning_rate": 5.6906522665118915e-05, "loss": 0.1562, "step": 451 }, { "epoch": 0.86, "grad_norm": 2.1924309730529785, "learning_rate": 5.6890162572106214e-05, "loss": 0.2234, "step": 452 }, { "epoch": 0.86, "grad_norm": 1.7906739711761475, "learning_rate": 5.687376169674786e-05, "loss": 0.1827, "step": 453 }, { "epoch": 0.86, "grad_norm": 2.2000465393066406, "learning_rate": 5.685732006391787e-05, "loss": 0.251, "step": 454 }, { "epoch": 0.87, "grad_norm": 1.7570834159851074, "learning_rate": 5.6840837698552064e-05, "loss": 0.1609, "step": 455 }, { "epoch": 0.87, "grad_norm": 2.079606533050537, "learning_rate": 5.682431462564805e-05, "loss": 0.1847, "step": 456 }, { "epoch": 0.87, "grad_norm": 1.682856798171997, "learning_rate": 5.680775087026514e-05, "loss": 0.2068, "step": 457 }, { "epoch": 0.87, "grad_norm": 1.7952682971954346, "learning_rate": 5.6791146457524404e-05, "loss": 0.1679, "step": 458 }, { "epoch": 0.87, "grad_norm": 2.5911026000976562, "learning_rate": 5.6774501412608525e-05, "loss": 0.217, "step": 459 }, { "epoch": 0.87, "grad_norm": 2.127225160598755, "learning_rate": 5.675781576076183e-05, "loss": 0.2347, "step": 460 }, { "epoch": 0.88, "grad_norm": 2.322711944580078, "learning_rate": 5.6741089527290235e-05, "loss": 0.265, "step": 461 }, { "epoch": 0.88, "grad_norm": 2.152456521987915, "learning_rate": 5.6724322737561185e-05, "loss": 0.1912, "step": 462 }, { "epoch": 0.88, "grad_norm": 1.6904296875, "learning_rate": 5.670751541700366e-05, "loss": 0.212, "step": 463 }, { "epoch": 0.88, "grad_norm": 1.838593602180481, "learning_rate": 5.669066759110808e-05, "loss": 0.153, "step": 464 }, { "epoch": 0.88, "grad_norm": 1.6022109985351562, "learning_rate": 5.6673779285426314e-05, "loss": 0.1259, "step": 465 }, { "epoch": 0.89, "grad_norm": 1.7915856838226318, "learning_rate": 5.665685052557164e-05, "loss": 0.211, "step": 466 }, { "epoch": 0.89, "grad_norm": 2.2177846431732178, "learning_rate": 5.663988133721864e-05, "loss": 0.2215, "step": 467 }, { "epoch": 0.89, "grad_norm": 2.0839827060699463, "learning_rate": 5.6622871746103276e-05, "loss": 0.2375, "step": 468 }, { "epoch": 0.89, "grad_norm": 2.1613292694091797, "learning_rate": 5.660582177802273e-05, "loss": 0.2271, "step": 469 }, { "epoch": 0.89, "grad_norm": 2.8779914379119873, "learning_rate": 5.658873145883545e-05, "loss": 0.287, "step": 470 }, { "epoch": 0.9, "grad_norm": 1.7030457258224487, "learning_rate": 5.657160081446108e-05, "loss": 0.1504, "step": 471 }, { "epoch": 0.9, "grad_norm": 1.422960877418518, "learning_rate": 5.6554429870880416e-05, "loss": 0.1094, "step": 472 }, { "epoch": 0.9, "grad_norm": 1.9078720808029175, "learning_rate": 5.653721865413536e-05, "loss": 0.1754, "step": 473 }, { "epoch": 0.9, "grad_norm": 1.9932233095169067, "learning_rate": 5.6519967190328924e-05, "loss": 0.2215, "step": 474 }, { "epoch": 0.9, "grad_norm": 1.7894288301467896, "learning_rate": 5.6502675505625146e-05, "loss": 0.1784, "step": 475 }, { "epoch": 0.9, "grad_norm": 1.860581874847412, "learning_rate": 5.648534362624905e-05, "loss": 0.1693, "step": 476 }, { "epoch": 0.91, "grad_norm": 1.8172698020935059, "learning_rate": 5.646797157848663e-05, "loss": 0.2426, "step": 477 }, { "epoch": 0.91, "grad_norm": 2.1779303550720215, "learning_rate": 5.6450559388684804e-05, "loss": 0.2393, "step": 478 }, { "epoch": 0.91, "grad_norm": 1.8775416612625122, "learning_rate": 5.643310708325137e-05, "loss": 0.1609, "step": 479 }, { "epoch": 0.91, "grad_norm": 1.8157578706741333, "learning_rate": 5.641561468865498e-05, "loss": 0.1655, "step": 480 }, { "epoch": 0.91, "grad_norm": 2.970018148422241, "learning_rate": 5.639808223142504e-05, "loss": 0.2094, "step": 481 }, { "epoch": 0.92, "grad_norm": 2.1869263648986816, "learning_rate": 5.638050973815177e-05, "loss": 0.2297, "step": 482 }, { "epoch": 0.92, "grad_norm": 1.9261630773544312, "learning_rate": 5.636289723548609e-05, "loss": 0.1537, "step": 483 }, { "epoch": 0.92, "grad_norm": 1.759760856628418, "learning_rate": 5.6345244750139586e-05, "loss": 0.154, "step": 484 }, { "epoch": 0.92, "grad_norm": 2.253368616104126, "learning_rate": 5.632755230888449e-05, "loss": 0.1744, "step": 485 }, { "epoch": 0.92, "grad_norm": 2.2024550437927246, "learning_rate": 5.630981993855365e-05, "loss": 0.195, "step": 486 }, { "epoch": 0.93, "grad_norm": 2.036249876022339, "learning_rate": 5.629204766604044e-05, "loss": 0.1832, "step": 487 }, { "epoch": 0.93, "grad_norm": 2.1150636672973633, "learning_rate": 5.627423551829878e-05, "loss": 0.1893, "step": 488 }, { "epoch": 0.93, "grad_norm": 2.219698190689087, "learning_rate": 5.625638352234305e-05, "loss": 0.222, "step": 489 }, { "epoch": 0.93, "grad_norm": 1.8558318614959717, "learning_rate": 5.623849170524807e-05, "loss": 0.1682, "step": 490 }, { "epoch": 0.93, "grad_norm": 2.1072654724121094, "learning_rate": 5.6220560094149046e-05, "loss": 0.1961, "step": 491 }, { "epoch": 0.94, "grad_norm": 1.9951170682907104, "learning_rate": 5.620258871624155e-05, "loss": 0.1806, "step": 492 }, { "epoch": 0.94, "grad_norm": 2.148515224456787, "learning_rate": 5.6184577598781456e-05, "loss": 0.2525, "step": 493 }, { "epoch": 0.94, "grad_norm": 1.805219054222107, "learning_rate": 5.6166526769084906e-05, "loss": 0.1665, "step": 494 }, { "epoch": 0.94, "grad_norm": 1.7017015218734741, "learning_rate": 5.6148436254528276e-05, "loss": 0.1631, "step": 495 }, { "epoch": 0.94, "grad_norm": 1.8937941789627075, "learning_rate": 5.6130306082548135e-05, "loss": 0.1688, "step": 496 }, { "epoch": 0.94, "grad_norm": 1.8434207439422607, "learning_rate": 5.611213628064119e-05, "loss": 0.1785, "step": 497 }, { "epoch": 0.95, "grad_norm": 2.041842460632324, "learning_rate": 5.609392687636425e-05, "loss": 0.1693, "step": 498 }, { "epoch": 0.95, "grad_norm": 2.3552134037017822, "learning_rate": 5.607567789733419e-05, "loss": 0.2504, "step": 499 }, { "epoch": 0.95, "grad_norm": 1.6137069463729858, "learning_rate": 5.605738937122789e-05, "loss": 0.1501, "step": 500 }, { "epoch": 0.95, "eval_blimp_filtered_avg": 0.74, "eval_blimp_filtered_std": 0.004837164532202057, "step": 500 }, { "epoch": 0.95, "eval_blimp_supplement_avg": 0.771551724137931, "eval_blimp_supplement_std": 0.01774849530328597, "step": 500 }, { "epoch": 0.95, "eval_vqa_filtered_avg": 0.3, "eval_vqa_filtered_std": 0.046056618647183814, "step": 500 }, { "epoch": 0.95, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 500 }, { "epoch": 0.95, "grad_norm": 1.9342821836471558, "learning_rate": 5.6039061325782245e-05, "loss": 0.1463, "step": 501 }, { "epoch": 0.95, "grad_norm": 1.9728329181671143, "learning_rate": 5.602069378879404e-05, "loss": 0.2016, "step": 502 }, { "epoch": 0.96, "grad_norm": 1.8331334590911865, "learning_rate": 5.600228678812e-05, "loss": 0.1761, "step": 503 }, { "epoch": 0.96, "grad_norm": 1.7379391193389893, "learning_rate": 5.598384035167666e-05, "loss": 0.124, "step": 504 }, { "epoch": 0.96, "grad_norm": 2.067453384399414, "learning_rate": 5.596535450744039e-05, "loss": 0.2213, "step": 505 }, { "epoch": 0.96, "grad_norm": 2.0867607593536377, "learning_rate": 5.594682928344732e-05, "loss": 0.2126, "step": 506 }, { "epoch": 0.96, "grad_norm": 2.196709394454956, "learning_rate": 5.592826470779331e-05, "loss": 0.1806, "step": 507 }, { "epoch": 0.97, "grad_norm": 2.1728084087371826, "learning_rate": 5.590966080863388e-05, "loss": 0.1979, "step": 508 }, { "epoch": 0.97, "grad_norm": 2.213994026184082, "learning_rate": 5.5891017614184224e-05, "loss": 0.2067, "step": 509 }, { "epoch": 0.97, "grad_norm": 1.5774598121643066, "learning_rate": 5.58723351527191e-05, "loss": 0.143, "step": 510 }, { "epoch": 0.97, "grad_norm": 1.4751396179199219, "learning_rate": 5.585361345257285e-05, "loss": 0.1008, "step": 511 }, { "epoch": 0.97, "grad_norm": 2.0189709663391113, "learning_rate": 5.5834852542139276e-05, "loss": 0.1472, "step": 512 }, { "epoch": 0.98, "grad_norm": 2.099942445755005, "learning_rate": 5.5816052449871705e-05, "loss": 0.2902, "step": 513 }, { "epoch": 0.98, "grad_norm": 2.1842572689056396, "learning_rate": 5.579721320428286e-05, "loss": 0.2589, "step": 514 }, { "epoch": 0.98, "grad_norm": 1.521382451057434, "learning_rate": 5.5778334833944866e-05, "loss": 0.1752, "step": 515 }, { "epoch": 0.98, "grad_norm": 1.5111007690429688, "learning_rate": 5.575941736748914e-05, "loss": 0.1382, "step": 516 }, { "epoch": 0.98, "grad_norm": 1.672255277633667, "learning_rate": 5.574046083360643e-05, "loss": 0.1477, "step": 517 }, { "epoch": 0.98, "grad_norm": 2.1890604496002197, "learning_rate": 5.572146526104673e-05, "loss": 0.2498, "step": 518 }, { "epoch": 0.99, "grad_norm": 1.515865683555603, "learning_rate": 5.570243067861924e-05, "loss": 0.1513, "step": 519 }, { "epoch": 0.99, "grad_norm": 1.8684024810791016, "learning_rate": 5.568335711519231e-05, "loss": 0.186, "step": 520 }, { "epoch": 0.99, "grad_norm": 1.405471682548523, "learning_rate": 5.566424459969344e-05, "loss": 0.1215, "step": 521 }, { "epoch": 0.99, "grad_norm": 1.333060622215271, "learning_rate": 5.564509316110917e-05, "loss": 0.1581, "step": 522 }, { "epoch": 0.99, "grad_norm": 1.8413891792297363, "learning_rate": 5.56259028284851e-05, "loss": 0.1689, "step": 523 }, { "epoch": 1.0, "grad_norm": 2.06471848487854, "learning_rate": 5.5606673630925796e-05, "loss": 0.2121, "step": 524 }, { "epoch": 1.0, "grad_norm": 1.850932002067566, "learning_rate": 5.558740559759477e-05, "loss": 0.1886, "step": 525 }, { "epoch": 1.0, "grad_norm": 1.542215347290039, "learning_rate": 5.5568098757714463e-05, "loss": 0.1447, "step": 526 }, { "epoch": 1.0, "grad_norm": 1.4103710651397705, "learning_rate": 5.554875314056614e-05, "loss": 0.0813, "step": 527 }, { "epoch": 1.0, "grad_norm": 1.9373829364776611, "learning_rate": 5.552936877548987e-05, "loss": 0.1501, "step": 528 }, { "epoch": 1.01, "grad_norm": 1.8618676662445068, "learning_rate": 5.550994569188452e-05, "loss": 0.1102, "step": 529 }, { "epoch": 1.01, "grad_norm": 1.8673502206802368, "learning_rate": 5.549048391920767e-05, "loss": 0.1145, "step": 530 }, { "epoch": 1.01, "grad_norm": 1.4425359964370728, "learning_rate": 5.547098348697555e-05, "loss": 0.1164, "step": 531 }, { "epoch": 1.01, "grad_norm": 1.3321317434310913, "learning_rate": 5.545144442476305e-05, "loss": 0.0783, "step": 532 }, { "epoch": 1.01, "grad_norm": 1.9611432552337646, "learning_rate": 5.543186676220364e-05, "loss": 0.1043, "step": 533 }, { "epoch": 1.02, "grad_norm": 1.4478418827056885, "learning_rate": 5.541225052898934e-05, "loss": 0.1018, "step": 534 }, { "epoch": 1.02, "grad_norm": 1.5156428813934326, "learning_rate": 5.539259575487064e-05, "loss": 0.0927, "step": 535 }, { "epoch": 1.02, "grad_norm": 1.7324143648147583, "learning_rate": 5.5372902469656505e-05, "loss": 0.1011, "step": 536 }, { "epoch": 1.02, "grad_norm": 1.5002551078796387, "learning_rate": 5.5353170703214303e-05, "loss": 0.0731, "step": 537 }, { "epoch": 1.02, "grad_norm": 1.4146058559417725, "learning_rate": 5.533340048546977e-05, "loss": 0.0655, "step": 538 }, { "epoch": 1.02, "grad_norm": 1.902039885520935, "learning_rate": 5.531359184640695e-05, "loss": 0.1024, "step": 539 }, { "epoch": 1.03, "grad_norm": 1.6905338764190674, "learning_rate": 5.529374481606815e-05, "loss": 0.1002, "step": 540 }, { "epoch": 1.03, "grad_norm": 1.6884539127349854, "learning_rate": 5.527385942455392e-05, "loss": 0.103, "step": 541 }, { "epoch": 1.03, "grad_norm": 1.8852143287658691, "learning_rate": 5.525393570202298e-05, "loss": 0.1151, "step": 542 }, { "epoch": 1.03, "grad_norm": 2.0434534549713135, "learning_rate": 5.523397367869218e-05, "loss": 0.0907, "step": 543 }, { "epoch": 1.03, "grad_norm": 1.8832985162734985, "learning_rate": 5.5213973384836466e-05, "loss": 0.1057, "step": 544 }, { "epoch": 1.04, "grad_norm": 1.445332407951355, "learning_rate": 5.519393485078883e-05, "loss": 0.0753, "step": 545 }, { "epoch": 1.04, "grad_norm": 1.5464884042739868, "learning_rate": 5.517385810694024e-05, "loss": 0.0892, "step": 546 }, { "epoch": 1.04, "grad_norm": 1.8375697135925293, "learning_rate": 5.515374318373964e-05, "loss": 0.1228, "step": 547 }, { "epoch": 1.04, "grad_norm": 1.9110418558120728, "learning_rate": 5.513359011169385e-05, "loss": 0.0978, "step": 548 }, { "epoch": 1.04, "grad_norm": 1.7806639671325684, "learning_rate": 5.511339892136757e-05, "loss": 0.1114, "step": 549 }, { "epoch": 1.05, "grad_norm": 1.989496111869812, "learning_rate": 5.509316964338331e-05, "loss": 0.1108, "step": 550 }, { "epoch": 1.05, "grad_norm": 1.9261369705200195, "learning_rate": 5.507290230842133e-05, "loss": 0.1278, "step": 551 }, { "epoch": 1.05, "grad_norm": 1.4255013465881348, "learning_rate": 5.505259694721962e-05, "loss": 0.0807, "step": 552 }, { "epoch": 1.05, "grad_norm": 1.2872581481933594, "learning_rate": 5.5032253590573844e-05, "loss": 0.0823, "step": 553 }, { "epoch": 1.05, "grad_norm": 1.6312404870986938, "learning_rate": 5.501187226933727e-05, "loss": 0.0871, "step": 554 }, { "epoch": 1.06, "grad_norm": 1.536910891532898, "learning_rate": 5.499145301442078e-05, "loss": 0.0949, "step": 555 }, { "epoch": 1.06, "grad_norm": 2.194495677947998, "learning_rate": 5.4970995856792744e-05, "loss": 0.1387, "step": 556 }, { "epoch": 1.06, "grad_norm": 1.4832044839859009, "learning_rate": 5.4950500827479044e-05, "loss": 0.1183, "step": 557 }, { "epoch": 1.06, "grad_norm": 1.6146732568740845, "learning_rate": 5.4929967957563016e-05, "loss": 0.1195, "step": 558 }, { "epoch": 1.06, "grad_norm": 1.7307969331741333, "learning_rate": 5.4909397278185344e-05, "loss": 0.0988, "step": 559 }, { "epoch": 1.06, "grad_norm": 1.8573774099349976, "learning_rate": 5.4888788820544075e-05, "loss": 0.0986, "step": 560 }, { "epoch": 1.07, "grad_norm": 2.3260960578918457, "learning_rate": 5.4868142615894555e-05, "loss": 0.1067, "step": 561 }, { "epoch": 1.07, "grad_norm": 28.097003936767578, "learning_rate": 5.4847458695549376e-05, "loss": 0.1137, "step": 562 }, { "epoch": 1.07, "grad_norm": 1.4165924787521362, "learning_rate": 5.482673709087833e-05, "loss": 0.0971, "step": 563 }, { "epoch": 1.07, "grad_norm": 2.3212599754333496, "learning_rate": 5.480597783330837e-05, "loss": 0.1351, "step": 564 }, { "epoch": 1.07, "grad_norm": 2.3222923278808594, "learning_rate": 5.4785180954323525e-05, "loss": 0.1506, "step": 565 }, { "epoch": 1.08, "grad_norm": 3.7861616611480713, "learning_rate": 5.476434648546492e-05, "loss": 0.1966, "step": 566 }, { "epoch": 1.08, "grad_norm": 4.011321067810059, "learning_rate": 5.474347445833066e-05, "loss": 0.1805, "step": 567 }, { "epoch": 1.08, "grad_norm": 2.593517303466797, "learning_rate": 5.472256490457583e-05, "loss": 0.1167, "step": 568 }, { "epoch": 1.08, "grad_norm": 2.8687984943389893, "learning_rate": 5.4701617855912426e-05, "loss": 0.1619, "step": 569 }, { "epoch": 1.08, "grad_norm": 2.169426918029785, "learning_rate": 5.4680633344109306e-05, "loss": 0.1073, "step": 570 }, { "epoch": 1.09, "grad_norm": 2.2280919551849365, "learning_rate": 5.465961140099214e-05, "loss": 0.1339, "step": 571 }, { "epoch": 1.09, "grad_norm": 2.0876410007476807, "learning_rate": 5.463855205844338e-05, "loss": 0.1262, "step": 572 }, { "epoch": 1.09, "grad_norm": 2.082850694656372, "learning_rate": 5.4617455348402195e-05, "loss": 0.1317, "step": 573 }, { "epoch": 1.09, "grad_norm": 2.1971609592437744, "learning_rate": 5.459632130286441e-05, "loss": 0.128, "step": 574 }, { "epoch": 1.09, "grad_norm": 2.9942030906677246, "learning_rate": 5.45751499538825e-05, "loss": 0.1843, "step": 575 }, { "epoch": 1.1, "grad_norm": 2.5910158157348633, "learning_rate": 5.455394133356552e-05, "loss": 0.1622, "step": 576 }, { "epoch": 1.1, "grad_norm": 1.8187247514724731, "learning_rate": 5.4532695474078994e-05, "loss": 0.1191, "step": 577 }, { "epoch": 1.1, "grad_norm": 2.9920620918273926, "learning_rate": 5.451141240764501e-05, "loss": 0.1466, "step": 578 }, { "epoch": 1.1, "grad_norm": 2.230437994003296, "learning_rate": 5.4490092166542005e-05, "loss": 0.1175, "step": 579 }, { "epoch": 1.1, "grad_norm": 2.5035059452056885, "learning_rate": 5.446873478310485e-05, "loss": 0.131, "step": 580 }, { "epoch": 1.1, "grad_norm": 2.372528314590454, "learning_rate": 5.4447340289724725e-05, "loss": 0.0836, "step": 581 }, { "epoch": 1.11, "grad_norm": 1.9527522325515747, "learning_rate": 5.44259087188491e-05, "loss": 0.1042, "step": 582 }, { "epoch": 1.11, "grad_norm": 2.597435235977173, "learning_rate": 5.440444010298166e-05, "loss": 0.1678, "step": 583 }, { "epoch": 1.11, "grad_norm": 3.3734793663024902, "learning_rate": 5.43829344746823e-05, "loss": 0.1382, "step": 584 }, { "epoch": 1.11, "grad_norm": 1.9677966833114624, "learning_rate": 5.436139186656703e-05, "loss": 0.1167, "step": 585 }, { "epoch": 1.11, "grad_norm": 1.8038456439971924, "learning_rate": 5.4339812311307956e-05, "loss": 0.1255, "step": 586 }, { "epoch": 1.12, "grad_norm": 2.1445164680480957, "learning_rate": 5.4318195841633204e-05, "loss": 0.1661, "step": 587 }, { "epoch": 1.12, "grad_norm": 2.1004483699798584, "learning_rate": 5.42965424903269e-05, "loss": 0.107, "step": 588 }, { "epoch": 1.12, "grad_norm": 2.3648619651794434, "learning_rate": 5.4274852290229104e-05, "loss": 0.1381, "step": 589 }, { "epoch": 1.12, "grad_norm": 2.4993481636047363, "learning_rate": 5.425312527423575e-05, "loss": 0.1302, "step": 590 }, { "epoch": 1.12, "grad_norm": 2.0718445777893066, "learning_rate": 5.4231361475298624e-05, "loss": 0.1261, "step": 591 }, { "epoch": 1.13, "grad_norm": 1.9651837348937988, "learning_rate": 5.4209560926425285e-05, "loss": 0.0982, "step": 592 }, { "epoch": 1.13, "grad_norm": 1.959956169128418, "learning_rate": 5.4187723660679044e-05, "loss": 0.1433, "step": 593 }, { "epoch": 1.13, "grad_norm": 2.347327947616577, "learning_rate": 5.416584971117887e-05, "loss": 0.1584, "step": 594 }, { "epoch": 1.13, "grad_norm": 1.8283354043960571, "learning_rate": 5.414393911109939e-05, "loss": 0.0992, "step": 595 }, { "epoch": 1.13, "grad_norm": 1.8695316314697266, "learning_rate": 5.412199189367082e-05, "loss": 0.1087, "step": 596 }, { "epoch": 1.13, "grad_norm": 2.7929341793060303, "learning_rate": 5.41000080921789e-05, "loss": 0.1306, "step": 597 }, { "epoch": 1.14, "grad_norm": 2.571697473526001, "learning_rate": 5.407798773996484e-05, "loss": 0.161, "step": 598 }, { "epoch": 1.14, "grad_norm": 2.0943222045898438, "learning_rate": 5.405593087042532e-05, "loss": 0.1388, "step": 599 }, { "epoch": 1.14, "grad_norm": 2.8328568935394287, "learning_rate": 5.403383751701237e-05, "loss": 0.1193, "step": 600 }, { "epoch": 1.14, "eval_blimp_filtered_avg": 0.7092537313432836, "eval_blimp_filtered_std": 0.0050051558548941475, "step": 600 }, { "epoch": 1.14, "eval_blimp_supplement_avg": 0.7758620689655172, "eval_blimp_supplement_std": 0.01768488932556595, "step": 600 }, { "epoch": 1.14, "eval_vqa_filtered_avg": 0.43, "eval_vqa_filtered_std": 0.04975698519562428, "step": 600 }, { "epoch": 1.14, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 600 }, { "epoch": 1.14, "grad_norm": 1.7905281782150269, "learning_rate": 5.4011707713233376e-05, "loss": 0.1027, "step": 601 }, { "epoch": 1.14, "grad_norm": 1.5792362689971924, "learning_rate": 5.398954149265099e-05, "loss": 0.1021, "step": 602 }, { "epoch": 1.15, "grad_norm": 2.008040428161621, "learning_rate": 5.3967338888883095e-05, "loss": 0.0949, "step": 603 }, { "epoch": 1.15, "grad_norm": 1.670125961303711, "learning_rate": 5.394509993560276e-05, "loss": 0.0933, "step": 604 }, { "epoch": 1.15, "grad_norm": 2.3257761001586914, "learning_rate": 5.392282466653819e-05, "loss": 0.1525, "step": 605 }, { "epoch": 1.15, "grad_norm": 1.8176473379135132, "learning_rate": 5.390051311547265e-05, "loss": 0.0946, "step": 606 }, { "epoch": 1.15, "grad_norm": 2.578733205795288, "learning_rate": 5.387816531624445e-05, "loss": 0.1239, "step": 607 }, { "epoch": 1.16, "grad_norm": 2.1935880184173584, "learning_rate": 5.385578130274685e-05, "loss": 0.1262, "step": 608 }, { "epoch": 1.16, "grad_norm": 1.5130038261413574, "learning_rate": 5.383336110892806e-05, "loss": 0.0882, "step": 609 }, { "epoch": 1.16, "grad_norm": 1.814089298248291, "learning_rate": 5.381090476879115e-05, "loss": 0.1125, "step": 610 }, { "epoch": 1.16, "grad_norm": 1.4655040502548218, "learning_rate": 5.378841231639401e-05, "loss": 0.1073, "step": 611 }, { "epoch": 1.16, "grad_norm": 1.9137489795684814, "learning_rate": 5.3765883785849284e-05, "loss": 0.1027, "step": 612 }, { "epoch": 1.17, "grad_norm": 2.3522536754608154, "learning_rate": 5.3743319211324376e-05, "loss": 0.1288, "step": 613 }, { "epoch": 1.17, "grad_norm": 2.242788553237915, "learning_rate": 5.3720718627041304e-05, "loss": 0.1269, "step": 614 }, { "epoch": 1.17, "grad_norm": 1.8491194248199463, "learning_rate": 5.369808206727674e-05, "loss": 0.1109, "step": 615 }, { "epoch": 1.17, "grad_norm": 1.9652773141860962, "learning_rate": 5.36754095663619e-05, "loss": 0.1303, "step": 616 }, { "epoch": 1.17, "grad_norm": 1.9412562847137451, "learning_rate": 5.365270115868249e-05, "loss": 0.1167, "step": 617 }, { "epoch": 1.17, "grad_norm": 2.4108376502990723, "learning_rate": 5.362995687867873e-05, "loss": 0.129, "step": 618 }, { "epoch": 1.18, "grad_norm": 1.794213056564331, "learning_rate": 5.360717676084518e-05, "loss": 0.0929, "step": 619 }, { "epoch": 1.18, "grad_norm": 2.0564124584198, "learning_rate": 5.3584360839730777e-05, "loss": 0.1232, "step": 620 }, { "epoch": 1.18, "grad_norm": 1.78448486328125, "learning_rate": 5.356150914993879e-05, "loss": 0.1056, "step": 621 }, { "epoch": 1.18, "grad_norm": 1.9061312675476074, "learning_rate": 5.3538621726126685e-05, "loss": 0.1531, "step": 622 }, { "epoch": 1.18, "grad_norm": 2.1962196826934814, "learning_rate": 5.351569860300614e-05, "loss": 0.1462, "step": 623 }, { "epoch": 1.19, "grad_norm": 2.0914902687072754, "learning_rate": 5.3492739815343016e-05, "loss": 0.1186, "step": 624 }, { "epoch": 1.19, "grad_norm": 1.9525226354599, "learning_rate": 5.346974539795719e-05, "loss": 0.1055, "step": 625 }, { "epoch": 1.19, "grad_norm": 1.7897628545761108, "learning_rate": 5.3446715385722635e-05, "loss": 0.1115, "step": 626 }, { "epoch": 1.19, "grad_norm": 1.6046199798583984, "learning_rate": 5.3423649813567294e-05, "loss": 0.1142, "step": 627 }, { "epoch": 1.19, "grad_norm": 1.7880631685256958, "learning_rate": 5.3400548716473026e-05, "loss": 0.1097, "step": 628 }, { "epoch": 1.2, "grad_norm": 2.1043283939361572, "learning_rate": 5.337741212947558e-05, "loss": 0.0963, "step": 629 }, { "epoch": 1.2, "grad_norm": 2.30271053314209, "learning_rate": 5.335424008766452e-05, "loss": 0.1099, "step": 630 }, { "epoch": 1.2, "grad_norm": 2.0826668739318848, "learning_rate": 5.33310326261832e-05, "loss": 0.1243, "step": 631 }, { "epoch": 1.2, "grad_norm": 2.260484218597412, "learning_rate": 5.330778978022869e-05, "loss": 0.1252, "step": 632 }, { "epoch": 1.2, "grad_norm": 2.509763717651367, "learning_rate": 5.3284511585051706e-05, "loss": 0.1228, "step": 633 }, { "epoch": 1.21, "grad_norm": 1.8258944749832153, "learning_rate": 5.3261198075956574e-05, "loss": 0.0907, "step": 634 }, { "epoch": 1.21, "grad_norm": 1.7467939853668213, "learning_rate": 5.323784928830121e-05, "loss": 0.0891, "step": 635 }, { "epoch": 1.21, "grad_norm": 1.4804024696350098, "learning_rate": 5.3214465257497014e-05, "loss": 0.0908, "step": 636 }, { "epoch": 1.21, "grad_norm": 1.5605990886688232, "learning_rate": 5.3191046019008814e-05, "loss": 0.0959, "step": 637 }, { "epoch": 1.21, "grad_norm": 2.58066987991333, "learning_rate": 5.316759160835489e-05, "loss": 0.1359, "step": 638 }, { "epoch": 1.21, "grad_norm": 1.825940728187561, "learning_rate": 5.314410206110682e-05, "loss": 0.1108, "step": 639 }, { "epoch": 1.22, "grad_norm": 3.4770517349243164, "learning_rate": 5.3120577412889474e-05, "loss": 0.1026, "step": 640 }, { "epoch": 1.22, "grad_norm": 2.331094264984131, "learning_rate": 5.3097017699380986e-05, "loss": 0.1455, "step": 641 }, { "epoch": 1.22, "grad_norm": 2.1107215881347656, "learning_rate": 5.307342295631265e-05, "loss": 0.1603, "step": 642 }, { "epoch": 1.22, "grad_norm": 2.1056315898895264, "learning_rate": 5.3049793219468875e-05, "loss": 0.1105, "step": 643 }, { "epoch": 1.22, "grad_norm": 1.805467128753662, "learning_rate": 5.3026128524687167e-05, "loss": 0.0981, "step": 644 }, { "epoch": 1.23, "grad_norm": 1.9870015382766724, "learning_rate": 5.3002428907858054e-05, "loss": 0.1237, "step": 645 }, { "epoch": 1.23, "grad_norm": 1.6658166646957397, "learning_rate": 5.297869440492499e-05, "loss": 0.0988, "step": 646 }, { "epoch": 1.23, "grad_norm": 1.5618807077407837, "learning_rate": 5.295492505188437e-05, "loss": 0.1156, "step": 647 }, { "epoch": 1.23, "grad_norm": 1.8002574443817139, "learning_rate": 5.293112088478544e-05, "loss": 0.1427, "step": 648 }, { "epoch": 1.23, "grad_norm": 2.080690860748291, "learning_rate": 5.290728193973024e-05, "loss": 0.153, "step": 649 }, { "epoch": 1.24, "grad_norm": 1.3384283781051636, "learning_rate": 5.288340825287356e-05, "loss": 0.0827, "step": 650 }, { "epoch": 1.24, "grad_norm": 1.8391361236572266, "learning_rate": 5.285949986042287e-05, "loss": 0.1262, "step": 651 }, { "epoch": 1.24, "grad_norm": 1.7678861618041992, "learning_rate": 5.283555679863829e-05, "loss": 0.0959, "step": 652 }, { "epoch": 1.24, "grad_norm": 1.4670836925506592, "learning_rate": 5.28115791038325e-05, "loss": 0.0871, "step": 653 }, { "epoch": 1.24, "grad_norm": 1.3235465288162231, "learning_rate": 5.278756681237074e-05, "loss": 0.0886, "step": 654 }, { "epoch": 1.25, "grad_norm": 1.5532070398330688, "learning_rate": 5.276351996067069e-05, "loss": 0.1019, "step": 655 }, { "epoch": 1.25, "grad_norm": 1.6364673376083374, "learning_rate": 5.273943858520243e-05, "loss": 0.0954, "step": 656 }, { "epoch": 1.25, "grad_norm": 2.1719634532928467, "learning_rate": 5.271532272248846e-05, "loss": 0.1501, "step": 657 }, { "epoch": 1.25, "grad_norm": 1.7861624956130981, "learning_rate": 5.269117240910353e-05, "loss": 0.0984, "step": 658 }, { "epoch": 1.25, "grad_norm": 1.595908284187317, "learning_rate": 5.2666987681674654e-05, "loss": 0.0962, "step": 659 }, { "epoch": 1.25, "grad_norm": 1.6407750844955444, "learning_rate": 5.264276857688105e-05, "loss": 0.0906, "step": 660 }, { "epoch": 1.26, "grad_norm": 2.0432002544403076, "learning_rate": 5.2618515131454064e-05, "loss": 0.11, "step": 661 }, { "epoch": 1.26, "grad_norm": 1.97548246383667, "learning_rate": 5.259422738217712e-05, "loss": 0.1312, "step": 662 }, { "epoch": 1.26, "grad_norm": 1.9954290390014648, "learning_rate": 5.2569905365885666e-05, "loss": 0.1002, "step": 663 }, { "epoch": 1.26, "grad_norm": 1.8987418413162231, "learning_rate": 5.254554911946715e-05, "loss": 0.1048, "step": 664 }, { "epoch": 1.26, "grad_norm": 1.4435830116271973, "learning_rate": 5.25211586798609e-05, "loss": 0.0728, "step": 665 }, { "epoch": 1.27, "grad_norm": 1.6864045858383179, "learning_rate": 5.24967340840581e-05, "loss": 0.1095, "step": 666 }, { "epoch": 1.27, "grad_norm": 1.4274249076843262, "learning_rate": 5.247227536910178e-05, "loss": 0.0544, "step": 667 }, { "epoch": 1.27, "grad_norm": 1.7726070880889893, "learning_rate": 5.244778257208667e-05, "loss": 0.1157, "step": 668 }, { "epoch": 1.27, "grad_norm": 1.3930350542068481, "learning_rate": 5.24232557301592e-05, "loss": 0.0775, "step": 669 }, { "epoch": 1.27, "grad_norm": 1.5162173509597778, "learning_rate": 5.2398694880517454e-05, "loss": 0.0993, "step": 670 }, { "epoch": 1.28, "grad_norm": 1.639870285987854, "learning_rate": 5.2374100060411065e-05, "loss": 0.089, "step": 671 }, { "epoch": 1.28, "grad_norm": 1.6351251602172852, "learning_rate": 5.234947130714122e-05, "loss": 0.0994, "step": 672 }, { "epoch": 1.28, "grad_norm": 2.0320165157318115, "learning_rate": 5.232480865806053e-05, "loss": 0.1313, "step": 673 }, { "epoch": 1.28, "grad_norm": 1.5547624826431274, "learning_rate": 5.230011215057304e-05, "loss": 0.0811, "step": 674 }, { "epoch": 1.28, "grad_norm": 2.0797629356384277, "learning_rate": 5.227538182213414e-05, "loss": 0.135, "step": 675 }, { "epoch": 1.29, "grad_norm": 1.2062526941299438, "learning_rate": 5.225061771025053e-05, "loss": 0.0577, "step": 676 }, { "epoch": 1.29, "grad_norm": 1.6713844537734985, "learning_rate": 5.2225819852480104e-05, "loss": 0.107, "step": 677 }, { "epoch": 1.29, "grad_norm": 1.7761340141296387, "learning_rate": 5.220098828643197e-05, "loss": 0.0721, "step": 678 }, { "epoch": 1.29, "grad_norm": 2.0071258544921875, "learning_rate": 5.217612304976635e-05, "loss": 0.0874, "step": 679 }, { "epoch": 1.29, "grad_norm": 1.4159749746322632, "learning_rate": 5.215122418019453e-05, "loss": 0.0779, "step": 680 }, { "epoch": 1.29, "grad_norm": 1.7390931844711304, "learning_rate": 5.212629171547883e-05, "loss": 0.1062, "step": 681 }, { "epoch": 1.3, "grad_norm": 1.5629298686981201, "learning_rate": 5.210132569343247e-05, "loss": 0.088, "step": 682 }, { "epoch": 1.3, "grad_norm": 1.41744863986969, "learning_rate": 5.20763261519196e-05, "loss": 0.0864, "step": 683 }, { "epoch": 1.3, "grad_norm": 1.5766433477401733, "learning_rate": 5.205129312885521e-05, "loss": 0.0763, "step": 684 }, { "epoch": 1.3, "grad_norm": 1.875252366065979, "learning_rate": 5.202622666220503e-05, "loss": 0.1033, "step": 685 }, { "epoch": 1.3, "grad_norm": 1.6621183156967163, "learning_rate": 5.200112678998557e-05, "loss": 0.09, "step": 686 }, { "epoch": 1.31, "grad_norm": 1.521379828453064, "learning_rate": 5.197599355026397e-05, "loss": 0.0941, "step": 687 }, { "epoch": 1.31, "grad_norm": 1.5478641986846924, "learning_rate": 5.195082698115795e-05, "loss": 0.0849, "step": 688 }, { "epoch": 1.31, "grad_norm": 1.7076359987258911, "learning_rate": 5.192562712083584e-05, "loss": 0.0911, "step": 689 }, { "epoch": 1.31, "grad_norm": 1.4439295530319214, "learning_rate": 5.1900394007516413e-05, "loss": 0.0887, "step": 690 }, { "epoch": 1.31, "grad_norm": 1.64594304561615, "learning_rate": 5.18751276794689e-05, "loss": 0.1103, "step": 691 }, { "epoch": 1.32, "grad_norm": 1.5656001567840576, "learning_rate": 5.184982817501287e-05, "loss": 0.1044, "step": 692 }, { "epoch": 1.32, "grad_norm": 1.8841419219970703, "learning_rate": 5.182449553251827e-05, "loss": 0.1005, "step": 693 }, { "epoch": 1.32, "grad_norm": 1.9682115316390991, "learning_rate": 5.1799129790405226e-05, "loss": 0.0755, "step": 694 }, { "epoch": 1.32, "grad_norm": 1.9997506141662598, "learning_rate": 5.177373098714414e-05, "loss": 0.109, "step": 695 }, { "epoch": 1.32, "grad_norm": 1.2103787660598755, "learning_rate": 5.174829916125552e-05, "loss": 0.071, "step": 696 }, { "epoch": 1.33, "grad_norm": 1.585961103439331, "learning_rate": 5.172283435130994e-05, "loss": 0.0922, "step": 697 }, { "epoch": 1.33, "grad_norm": 1.4379081726074219, "learning_rate": 5.1697336595928033e-05, "loss": 0.0876, "step": 698 }, { "epoch": 1.33, "grad_norm": 1.583418846130371, "learning_rate": 5.167180593378037e-05, "loss": 0.0825, "step": 699 }, { "epoch": 1.33, "grad_norm": 1.6073997020721436, "learning_rate": 5.164624240358746e-05, "loss": 0.103, "step": 700 }, { "epoch": 1.33, "eval_blimp_filtered_avg": 0.7365671641791045, "eval_blimp_filtered_std": 0.004864972354874181, "step": 700 }, { "epoch": 1.33, "eval_blimp_supplement_avg": 0.7629310344827587, "eval_blimp_supplement_std": 0.018123906270834964, "step": 700 }, { "epoch": 1.33, "eval_vqa_filtered_avg": 0.24, "eval_vqa_filtered_std": 0.04292346959909283, "step": 700 }, { "epoch": 1.33, "eval_winoground_filtered_avg": 0.48, "eval_winoground_filtered_std": 0.05021167315686779, "step": 700 }, { "epoch": 1.33, "grad_norm": 1.6832022666931152, "learning_rate": 5.162064604411962e-05, "loss": 0.0805, "step": 701 }, { "epoch": 1.33, "grad_norm": 1.3079286813735962, "learning_rate": 5.1595016894197e-05, "loss": 0.0776, "step": 702 }, { "epoch": 1.34, "grad_norm": 1.756169080734253, "learning_rate": 5.156935499268944e-05, "loss": 0.1169, "step": 703 }, { "epoch": 1.34, "grad_norm": 1.4796730279922485, "learning_rate": 5.154366037851649e-05, "loss": 0.0887, "step": 704 }, { "epoch": 1.34, "grad_norm": 1.2750813961029053, "learning_rate": 5.151793309064728e-05, "loss": 0.076, "step": 705 }, { "epoch": 1.34, "grad_norm": 1.3818494081497192, "learning_rate": 5.149217316810052e-05, "loss": 0.107, "step": 706 }, { "epoch": 1.34, "grad_norm": 1.0844285488128662, "learning_rate": 5.146638064994439e-05, "loss": 0.0697, "step": 707 }, { "epoch": 1.35, "grad_norm": 1.2025790214538574, "learning_rate": 5.144055557529654e-05, "loss": 0.0722, "step": 708 }, { "epoch": 1.35, "grad_norm": 1.5269311666488647, "learning_rate": 5.141469798332395e-05, "loss": 0.0949, "step": 709 }, { "epoch": 1.35, "grad_norm": 1.157436728477478, "learning_rate": 5.138880791324295e-05, "loss": 0.0594, "step": 710 }, { "epoch": 1.35, "grad_norm": 1.7213796377182007, "learning_rate": 5.136288540431913e-05, "loss": 0.1046, "step": 711 }, { "epoch": 1.35, "grad_norm": 1.8684593439102173, "learning_rate": 5.1336930495867245e-05, "loss": 0.1016, "step": 712 }, { "epoch": 1.36, "grad_norm": 1.5526412725448608, "learning_rate": 5.1310943227251236e-05, "loss": 0.081, "step": 713 }, { "epoch": 1.36, "grad_norm": 1.8524771928787231, "learning_rate": 5.128492363788407e-05, "loss": 0.1044, "step": 714 }, { "epoch": 1.36, "grad_norm": 1.3574373722076416, "learning_rate": 5.125887176722777e-05, "loss": 0.0773, "step": 715 }, { "epoch": 1.36, "grad_norm": 1.6668269634246826, "learning_rate": 5.1232787654793304e-05, "loss": 0.1088, "step": 716 }, { "epoch": 1.36, "grad_norm": 1.5448557138442993, "learning_rate": 5.120667134014053e-05, "loss": 0.089, "step": 717 }, { "epoch": 1.37, "grad_norm": 1.539372205734253, "learning_rate": 5.118052286287818e-05, "loss": 0.074, "step": 718 }, { "epoch": 1.37, "grad_norm": 1.8250555992126465, "learning_rate": 5.115434226266372e-05, "loss": 0.1026, "step": 719 }, { "epoch": 1.37, "grad_norm": 1.4867888689041138, "learning_rate": 5.112812957920336e-05, "loss": 0.066, "step": 720 }, { "epoch": 1.37, "grad_norm": 1.7609546184539795, "learning_rate": 5.110188485225195e-05, "loss": 0.1038, "step": 721 }, { "epoch": 1.37, "grad_norm": 1.41838538646698, "learning_rate": 5.107560812161298e-05, "loss": 0.0727, "step": 722 }, { "epoch": 1.37, "grad_norm": 1.3984074592590332, "learning_rate": 5.104929942713842e-05, "loss": 0.0818, "step": 723 }, { "epoch": 1.38, "grad_norm": 1.476593255996704, "learning_rate": 5.1022958808728756e-05, "loss": 0.0769, "step": 724 }, { "epoch": 1.38, "grad_norm": 1.4742274284362793, "learning_rate": 5.0996586306332874e-05, "loss": 0.0768, "step": 725 }, { "epoch": 1.38, "grad_norm": 1.5887644290924072, "learning_rate": 5.097018195994804e-05, "loss": 0.0871, "step": 726 }, { "epoch": 1.38, "grad_norm": 1.6411266326904297, "learning_rate": 5.0943745809619774e-05, "loss": 0.0878, "step": 727 }, { "epoch": 1.38, "grad_norm": 1.959336519241333, "learning_rate": 5.091727789544188e-05, "loss": 0.0957, "step": 728 }, { "epoch": 1.39, "grad_norm": 1.68968665599823, "learning_rate": 5.089077825755628e-05, "loss": 0.0629, "step": 729 }, { "epoch": 1.39, "grad_norm": 1.5767394304275513, "learning_rate": 5.0864246936153064e-05, "loss": 0.0942, "step": 730 }, { "epoch": 1.39, "grad_norm": 2.517864942550659, "learning_rate": 5.0837683971470344e-05, "loss": 0.1123, "step": 731 }, { "epoch": 1.39, "grad_norm": 1.3760803937911987, "learning_rate": 5.081108940379423e-05, "loss": 0.0611, "step": 732 }, { "epoch": 1.39, "grad_norm": 1.7282682657241821, "learning_rate": 5.0784463273458746e-05, "loss": 0.1179, "step": 733 }, { "epoch": 1.4, "grad_norm": 2.201974391937256, "learning_rate": 5.0757805620845807e-05, "loss": 0.1206, "step": 734 }, { "epoch": 1.4, "grad_norm": 2.4067790508270264, "learning_rate": 5.073111648638514e-05, "loss": 0.0972, "step": 735 }, { "epoch": 1.4, "grad_norm": 1.404708743095398, "learning_rate": 5.070439591055419e-05, "loss": 0.0902, "step": 736 }, { "epoch": 1.4, "grad_norm": 2.1635916233062744, "learning_rate": 5.067764393387811e-05, "loss": 0.0981, "step": 737 }, { "epoch": 1.4, "grad_norm": 1.9200741052627563, "learning_rate": 5.065086059692967e-05, "loss": 0.106, "step": 738 }, { "epoch": 1.4, "grad_norm": 1.6392035484313965, "learning_rate": 5.06240459403292e-05, "loss": 0.0972, "step": 739 }, { "epoch": 1.41, "grad_norm": 1.7555145025253296, "learning_rate": 5.0597200004744534e-05, "loss": 0.1125, "step": 740 }, { "epoch": 1.41, "grad_norm": 1.4915844202041626, "learning_rate": 5.057032283089094e-05, "loss": 0.1128, "step": 741 }, { "epoch": 1.41, "grad_norm": 1.6414216756820679, "learning_rate": 5.0543414459531054e-05, "loss": 0.0952, "step": 742 }, { "epoch": 1.41, "grad_norm": 1.7593255043029785, "learning_rate": 5.0516474931474846e-05, "loss": 0.0749, "step": 743 }, { "epoch": 1.41, "grad_norm": 1.5891826152801514, "learning_rate": 5.048950428757954e-05, "loss": 0.105, "step": 744 }, { "epoch": 1.42, "grad_norm": 1.6083284616470337, "learning_rate": 5.046250256874953e-05, "loss": 0.0915, "step": 745 }, { "epoch": 1.42, "grad_norm": 1.8360236883163452, "learning_rate": 5.043546981593635e-05, "loss": 0.0863, "step": 746 }, { "epoch": 1.42, "grad_norm": 1.539463996887207, "learning_rate": 5.040840607013861e-05, "loss": 0.0845, "step": 747 }, { "epoch": 1.42, "grad_norm": 1.5645968914031982, "learning_rate": 5.0381311372401906e-05, "loss": 0.0801, "step": 748 }, { "epoch": 1.42, "grad_norm": 1.7578240633010864, "learning_rate": 5.03541857638188e-05, "loss": 0.1387, "step": 749 }, { "epoch": 1.43, "grad_norm": 2.0956554412841797, "learning_rate": 5.0327029285528715e-05, "loss": 0.1289, "step": 750 }, { "epoch": 1.43, "grad_norm": 2.0128889083862305, "learning_rate": 5.0299841978717895e-05, "loss": 0.0962, "step": 751 }, { "epoch": 1.43, "grad_norm": 1.3259589672088623, "learning_rate": 5.027262388461934e-05, "loss": 0.0624, "step": 752 }, { "epoch": 1.43, "grad_norm": 1.7419687509536743, "learning_rate": 5.024537504451277e-05, "loss": 0.1037, "step": 753 }, { "epoch": 1.43, "grad_norm": 1.6418821811676025, "learning_rate": 5.021809549972448e-05, "loss": 0.0999, "step": 754 }, { "epoch": 1.44, "grad_norm": 1.7461858987808228, "learning_rate": 5.019078529162737e-05, "loss": 0.0997, "step": 755 }, { "epoch": 1.44, "grad_norm": 1.5762851238250732, "learning_rate": 5.016344446164085e-05, "loss": 0.0741, "step": 756 }, { "epoch": 1.44, "grad_norm": 1.6880779266357422, "learning_rate": 5.013607305123076e-05, "loss": 0.0819, "step": 757 }, { "epoch": 1.44, "grad_norm": 1.4713430404663086, "learning_rate": 5.01086711019093e-05, "loss": 0.0881, "step": 758 }, { "epoch": 1.44, "grad_norm": 1.7281020879745483, "learning_rate": 5.008123865523503e-05, "loss": 0.1064, "step": 759 }, { "epoch": 1.44, "grad_norm": 1.9122358560562134, "learning_rate": 5.005377575281272e-05, "loss": 0.1393, "step": 760 }, { "epoch": 1.45, "grad_norm": 1.5014458894729614, "learning_rate": 5.0026282436293345e-05, "loss": 0.0849, "step": 761 }, { "epoch": 1.45, "grad_norm": 1.4942837953567505, "learning_rate": 4.999875874737403e-05, "loss": 0.0755, "step": 762 }, { "epoch": 1.45, "grad_norm": 1.99698805809021, "learning_rate": 4.997120472779793e-05, "loss": 0.1242, "step": 763 }, { "epoch": 1.45, "grad_norm": 1.380458116531372, "learning_rate": 4.9943620419354224e-05, "loss": 0.0558, "step": 764 }, { "epoch": 1.45, "grad_norm": 1.6242510080337524, "learning_rate": 4.9916005863878e-05, "loss": 0.0953, "step": 765 }, { "epoch": 1.46, "grad_norm": 2.112661361694336, "learning_rate": 4.9888361103250255e-05, "loss": 0.1106, "step": 766 }, { "epoch": 1.46, "grad_norm": 1.7358219623565674, "learning_rate": 4.986068617939777e-05, "loss": 0.1179, "step": 767 }, { "epoch": 1.46, "grad_norm": 2.099543571472168, "learning_rate": 4.9832981134293086e-05, "loss": 0.0833, "step": 768 }, { "epoch": 1.46, "grad_norm": 1.4832333326339722, "learning_rate": 4.980524600995442e-05, "loss": 0.0904, "step": 769 }, { "epoch": 1.46, "grad_norm": 1.6206539869308472, "learning_rate": 4.97774808484456e-05, "loss": 0.0925, "step": 770 }, { "epoch": 1.47, "grad_norm": 1.3815038204193115, "learning_rate": 4.974968569187603e-05, "loss": 0.0787, "step": 771 }, { "epoch": 1.47, "grad_norm": 1.6432677507400513, "learning_rate": 4.9721860582400596e-05, "loss": 0.108, "step": 772 }, { "epoch": 1.47, "grad_norm": 1.6057548522949219, "learning_rate": 4.96940055622196e-05, "loss": 0.094, "step": 773 }, { "epoch": 1.47, "grad_norm": 1.3608471155166626, "learning_rate": 4.9666120673578716e-05, "loss": 0.0655, "step": 774 }, { "epoch": 1.47, "grad_norm": 1.484653353691101, "learning_rate": 4.963820595876893e-05, "loss": 0.0799, "step": 775 }, { "epoch": 1.48, "grad_norm": 1.6824122667312622, "learning_rate": 4.9610261460126445e-05, "loss": 0.0867, "step": 776 }, { "epoch": 1.48, "grad_norm": 1.5186853408813477, "learning_rate": 4.958228722003263e-05, "loss": 0.0928, "step": 777 }, { "epoch": 1.48, "grad_norm": 1.8374680280685425, "learning_rate": 4.9554283280913985e-05, "loss": 0.1347, "step": 778 }, { "epoch": 1.48, "grad_norm": 1.4191824197769165, "learning_rate": 4.952624968524204e-05, "loss": 0.0658, "step": 779 }, { "epoch": 1.48, "grad_norm": 2.0143301486968994, "learning_rate": 4.949818647553329e-05, "loss": 0.1149, "step": 780 }, { "epoch": 1.48, "grad_norm": 1.3839784860610962, "learning_rate": 4.9470093694349174e-05, "loss": 0.058, "step": 781 }, { "epoch": 1.49, "grad_norm": 1.548513412475586, "learning_rate": 4.9441971384295954e-05, "loss": 0.078, "step": 782 }, { "epoch": 1.49, "grad_norm": 1.6983556747436523, "learning_rate": 4.941381958802467e-05, "loss": 0.1108, "step": 783 }, { "epoch": 1.49, "grad_norm": 1.610230565071106, "learning_rate": 4.938563834823111e-05, "loss": 0.0804, "step": 784 }, { "epoch": 1.49, "grad_norm": 1.5688830614089966, "learning_rate": 4.93574277076557e-05, "loss": 0.1306, "step": 785 }, { "epoch": 1.49, "grad_norm": 2.638524055480957, "learning_rate": 4.932918770908345e-05, "loss": 0.131, "step": 786 }, { "epoch": 1.5, "grad_norm": 1.68451726436615, "learning_rate": 4.930091839534392e-05, "loss": 0.1012, "step": 787 }, { "epoch": 1.5, "grad_norm": 1.9430699348449707, "learning_rate": 4.92726198093111e-05, "loss": 0.0933, "step": 788 }, { "epoch": 1.5, "grad_norm": 1.5076407194137573, "learning_rate": 4.9244291993903386e-05, "loss": 0.0869, "step": 789 }, { "epoch": 1.5, "grad_norm": 1.3327629566192627, "learning_rate": 4.9215934992083515e-05, "loss": 0.072, "step": 790 }, { "epoch": 1.5, "grad_norm": 1.4817475080490112, "learning_rate": 4.918754884685848e-05, "loss": 0.0977, "step": 791 }, { "epoch": 1.51, "grad_norm": 1.4313697814941406, "learning_rate": 4.915913360127947e-05, "loss": 0.0888, "step": 792 }, { "epoch": 1.51, "grad_norm": 1.5232633352279663, "learning_rate": 4.9130689298441824e-05, "loss": 0.1153, "step": 793 }, { "epoch": 1.51, "grad_norm": 1.48499596118927, "learning_rate": 4.9102215981484926e-05, "loss": 0.0886, "step": 794 }, { "epoch": 1.51, "grad_norm": 1.2479290962219238, "learning_rate": 4.9073713693592176e-05, "loss": 0.1016, "step": 795 }, { "epoch": 1.51, "grad_norm": 1.5575228929519653, "learning_rate": 4.904518247799092e-05, "loss": 0.1023, "step": 796 }, { "epoch": 1.52, "grad_norm": 1.3390549421310425, "learning_rate": 4.9016622377952363e-05, "loss": 0.0934, "step": 797 }, { "epoch": 1.52, "grad_norm": 1.2640340328216553, "learning_rate": 4.898803343679152e-05, "loss": 0.0782, "step": 798 }, { "epoch": 1.52, "grad_norm": 1.5619608163833618, "learning_rate": 4.8959415697867164e-05, "loss": 0.1051, "step": 799 }, { "epoch": 1.52, "grad_norm": 1.4732738733291626, "learning_rate": 4.893076920458173e-05, "loss": 0.0768, "step": 800 }, { "epoch": 1.52, "eval_blimp_filtered_avg": 0.7344776119402985, "eval_blimp_filtered_std": 0.004906377121498967, "step": 800 }, { "epoch": 1.52, "eval_blimp_supplement_avg": 0.7693965517241379, "eval_blimp_supplement_std": 0.01783953158573839, "step": 800 }, { "epoch": 1.52, "eval_vqa_filtered_avg": 0.31, "eval_vqa_filtered_std": 0.04648231987117316, "step": 800 }, { "epoch": 1.52, "eval_winoground_filtered_avg": 0.44, "eval_winoground_filtered_std": 0.04988876515698589, "step": 800 }, { "epoch": 1.52, "grad_norm": 1.584505319595337, "learning_rate": 4.890209400038124e-05, "loss": 0.0876, "step": 801 }, { "epoch": 1.52, "grad_norm": 1.5282456874847412, "learning_rate": 4.8873390128755294e-05, "loss": 0.0933, "step": 802 }, { "epoch": 1.53, "grad_norm": 1.3660120964050293, "learning_rate": 4.8844657633236965e-05, "loss": 0.0613, "step": 803 }, { "epoch": 1.53, "grad_norm": 1.5211470127105713, "learning_rate": 4.881589655740272e-05, "loss": 0.0946, "step": 804 }, { "epoch": 1.53, "grad_norm": 1.6008862257003784, "learning_rate": 4.878710694487239e-05, "loss": 0.0887, "step": 805 }, { "epoch": 1.53, "grad_norm": 1.680538535118103, "learning_rate": 4.875828883930907e-05, "loss": 0.0782, "step": 806 }, { "epoch": 1.53, "grad_norm": 1.8873873949050903, "learning_rate": 4.8729442284419084e-05, "loss": 0.0915, "step": 807 }, { "epoch": 1.54, "grad_norm": 1.7563488483428955, "learning_rate": 4.870056732395189e-05, "loss": 0.0925, "step": 808 }, { "epoch": 1.54, "grad_norm": 1.5891441106796265, "learning_rate": 4.867166400170003e-05, "loss": 0.0761, "step": 809 }, { "epoch": 1.54, "grad_norm": 1.729095697402954, "learning_rate": 4.8642732361499074e-05, "loss": 0.0948, "step": 810 }, { "epoch": 1.54, "grad_norm": 1.9891425371170044, "learning_rate": 4.86137724472275e-05, "loss": 0.0867, "step": 811 }, { "epoch": 1.54, "grad_norm": 1.350780963897705, "learning_rate": 4.858478430280673e-05, "loss": 0.0907, "step": 812 }, { "epoch": 1.55, "grad_norm": 1.7412500381469727, "learning_rate": 4.855576797220094e-05, "loss": 0.082, "step": 813 }, { "epoch": 1.55, "grad_norm": 1.3755289316177368, "learning_rate": 4.85267234994171e-05, "loss": 0.0601, "step": 814 }, { "epoch": 1.55, "grad_norm": 1.3738691806793213, "learning_rate": 4.8497650928504816e-05, "loss": 0.0748, "step": 815 }, { "epoch": 1.55, "grad_norm": 1.2963359355926514, "learning_rate": 4.846855030355634e-05, "loss": 0.0726, "step": 816 }, { "epoch": 1.55, "grad_norm": 1.4942185878753662, "learning_rate": 4.8439421668706484e-05, "loss": 0.0813, "step": 817 }, { "epoch": 1.56, "grad_norm": 1.248502254486084, "learning_rate": 4.84102650681325e-05, "loss": 0.0562, "step": 818 }, { "epoch": 1.56, "grad_norm": 1.4156216382980347, "learning_rate": 4.838108054605409e-05, "loss": 0.0938, "step": 819 }, { "epoch": 1.56, "grad_norm": 1.9230626821517944, "learning_rate": 4.8351868146733265e-05, "loss": 0.1326, "step": 820 }, { "epoch": 1.56, "grad_norm": 1.5948419570922852, "learning_rate": 4.8322627914474365e-05, "loss": 0.0708, "step": 821 }, { "epoch": 1.56, "grad_norm": 1.563085675239563, "learning_rate": 4.8293359893623895e-05, "loss": 0.0615, "step": 822 }, { "epoch": 1.56, "grad_norm": 1.6486804485321045, "learning_rate": 4.826406412857053e-05, "loss": 0.0812, "step": 823 }, { "epoch": 1.57, "grad_norm": 1.5839260816574097, "learning_rate": 4.823474066374502e-05, "loss": 0.0846, "step": 824 }, { "epoch": 1.57, "grad_norm": 1.3706841468811035, "learning_rate": 4.820538954362012e-05, "loss": 0.0814, "step": 825 }, { "epoch": 1.57, "grad_norm": 1.6664705276489258, "learning_rate": 4.817601081271054e-05, "loss": 0.1154, "step": 826 }, { "epoch": 1.57, "grad_norm": 1.7063605785369873, "learning_rate": 4.8146604515572836e-05, "loss": 0.0732, "step": 827 }, { "epoch": 1.57, "grad_norm": 1.551948070526123, "learning_rate": 4.81171706968054e-05, "loss": 0.0921, "step": 828 }, { "epoch": 1.58, "grad_norm": 1.5162036418914795, "learning_rate": 4.808770940104836e-05, "loss": 0.093, "step": 829 }, { "epoch": 1.58, "grad_norm": 1.3207262754440308, "learning_rate": 4.805822067298351e-05, "loss": 0.0704, "step": 830 }, { "epoch": 1.58, "grad_norm": 1.3487106561660767, "learning_rate": 4.802870455733425e-05, "loss": 0.0605, "step": 831 }, { "epoch": 1.58, "grad_norm": 1.5071426630020142, "learning_rate": 4.79991610988655e-05, "loss": 0.0922, "step": 832 }, { "epoch": 1.58, "grad_norm": 1.3361766338348389, "learning_rate": 4.7969590342383695e-05, "loss": 0.0924, "step": 833 }, { "epoch": 1.59, "grad_norm": 1.6002601385116577, "learning_rate": 4.793999233273663e-05, "loss": 0.0973, "step": 834 }, { "epoch": 1.59, "grad_norm": 1.493301272392273, "learning_rate": 4.791036711481343e-05, "loss": 0.0967, "step": 835 }, { "epoch": 1.59, "grad_norm": 1.2087712287902832, "learning_rate": 4.788071473354453e-05, "loss": 0.0648, "step": 836 }, { "epoch": 1.59, "grad_norm": 1.1971074342727661, "learning_rate": 4.7851035233901496e-05, "loss": 0.0635, "step": 837 }, { "epoch": 1.59, "grad_norm": 1.5427327156066895, "learning_rate": 4.782132866089708e-05, "loss": 0.1047, "step": 838 }, { "epoch": 1.6, "grad_norm": 1.5916732549667358, "learning_rate": 4.7791595059585066e-05, "loss": 0.1212, "step": 839 }, { "epoch": 1.6, "grad_norm": 1.5895280838012695, "learning_rate": 4.776183447506024e-05, "loss": 0.0956, "step": 840 }, { "epoch": 1.6, "grad_norm": 1.6642342805862427, "learning_rate": 4.773204695245829e-05, "loss": 0.1021, "step": 841 }, { "epoch": 1.6, "grad_norm": 1.4982519149780273, "learning_rate": 4.7702232536955794e-05, "loss": 0.0818, "step": 842 }, { "epoch": 1.6, "grad_norm": 1.4745231866836548, "learning_rate": 4.767239127377009e-05, "loss": 0.0636, "step": 843 }, { "epoch": 1.6, "grad_norm": 1.7280640602111816, "learning_rate": 4.7642523208159254e-05, "loss": 0.0955, "step": 844 }, { "epoch": 1.61, "grad_norm": 5.109907627105713, "learning_rate": 4.761262838542198e-05, "loss": 0.0637, "step": 845 }, { "epoch": 1.61, "grad_norm": 1.3097459077835083, "learning_rate": 4.758270685089758e-05, "loss": 0.0635, "step": 846 }, { "epoch": 1.61, "grad_norm": 1.591741681098938, "learning_rate": 4.755275864996586e-05, "loss": 0.1145, "step": 847 }, { "epoch": 1.61, "grad_norm": 2.012320041656494, "learning_rate": 4.7522783828047055e-05, "loss": 0.0929, "step": 848 }, { "epoch": 1.61, "grad_norm": 1.1567516326904297, "learning_rate": 4.7492782430601794e-05, "loss": 0.0555, "step": 849 }, { "epoch": 1.62, "grad_norm": 1.3476217985153198, "learning_rate": 4.746275450313101e-05, "loss": 0.0748, "step": 850 }, { "epoch": 1.62, "grad_norm": 2.2989659309387207, "learning_rate": 4.743270009117586e-05, "loss": 0.0946, "step": 851 }, { "epoch": 1.62, "grad_norm": 1.2919695377349854, "learning_rate": 4.740261924031768e-05, "loss": 0.0473, "step": 852 }, { "epoch": 1.62, "grad_norm": 1.6320407390594482, "learning_rate": 4.73725119961779e-05, "loss": 0.0865, "step": 853 }, { "epoch": 1.62, "grad_norm": 1.5531123876571655, "learning_rate": 4.734237840441798e-05, "loss": 0.0625, "step": 854 }, { "epoch": 1.63, "grad_norm": 1.7860746383666992, "learning_rate": 4.731221851073934e-05, "loss": 0.0715, "step": 855 }, { "epoch": 1.63, "grad_norm": 1.6702278852462769, "learning_rate": 4.728203236088327e-05, "loss": 0.067, "step": 856 }, { "epoch": 1.63, "grad_norm": 1.443503499031067, "learning_rate": 4.725182000063093e-05, "loss": 0.0725, "step": 857 }, { "epoch": 1.63, "grad_norm": 1.782613754272461, "learning_rate": 4.7221581475803164e-05, "loss": 0.074, "step": 858 }, { "epoch": 1.63, "grad_norm": 1.230510950088501, "learning_rate": 4.7191316832260564e-05, "loss": 0.0645, "step": 859 }, { "epoch": 1.63, "grad_norm": 1.808100700378418, "learning_rate": 4.716102611590329e-05, "loss": 0.0932, "step": 860 }, { "epoch": 1.64, "grad_norm": 1.6835540533065796, "learning_rate": 4.713070937267106e-05, "loss": 0.0828, "step": 861 }, { "epoch": 1.64, "grad_norm": 1.482191562652588, "learning_rate": 4.710036664854308e-05, "loss": 0.0849, "step": 862 }, { "epoch": 1.64, "grad_norm": 1.644769549369812, "learning_rate": 4.706999798953792e-05, "loss": 0.0978, "step": 863 }, { "epoch": 1.64, "grad_norm": 2.1240599155426025, "learning_rate": 4.703960344171352e-05, "loss": 0.1277, "step": 864 }, { "epoch": 1.64, "grad_norm": 1.4567464590072632, "learning_rate": 4.700918305116706e-05, "loss": 0.0836, "step": 865 }, { "epoch": 1.65, "grad_norm": 1.1690260171890259, "learning_rate": 4.697873686403494e-05, "loss": 0.0626, "step": 866 }, { "epoch": 1.65, "grad_norm": 1.5146856307983398, "learning_rate": 4.694826492649266e-05, "loss": 0.079, "step": 867 }, { "epoch": 1.65, "grad_norm": 1.8328827619552612, "learning_rate": 4.691776728475478e-05, "loss": 0.0635, "step": 868 }, { "epoch": 1.65, "grad_norm": 1.4944828748703003, "learning_rate": 4.6887243985074854e-05, "loss": 0.0754, "step": 869 }, { "epoch": 1.65, "grad_norm": 1.3059903383255005, "learning_rate": 4.6856695073745346e-05, "loss": 0.0631, "step": 870 }, { "epoch": 1.66, "grad_norm": 1.8021363019943237, "learning_rate": 4.6826120597097536e-05, "loss": 0.0821, "step": 871 }, { "epoch": 1.66, "grad_norm": 1.612770676612854, "learning_rate": 4.679552060150151e-05, "loss": 0.0791, "step": 872 }, { "epoch": 1.66, "grad_norm": 1.709902048110962, "learning_rate": 4.6764895133366066e-05, "loss": 0.0983, "step": 873 }, { "epoch": 1.66, "grad_norm": 1.883644700050354, "learning_rate": 4.6734244239138574e-05, "loss": 0.1174, "step": 874 }, { "epoch": 1.66, "grad_norm": 1.4830905199050903, "learning_rate": 4.670356796530505e-05, "loss": 0.1168, "step": 875 }, { "epoch": 1.67, "grad_norm": 1.542386531829834, "learning_rate": 4.667286635838994e-05, "loss": 0.0945, "step": 876 }, { "epoch": 1.67, "grad_norm": 1.894826889038086, "learning_rate": 4.664213946495611e-05, "loss": 0.1143, "step": 877 }, { "epoch": 1.67, "grad_norm": 1.8013970851898193, "learning_rate": 4.661138733160483e-05, "loss": 0.1031, "step": 878 }, { "epoch": 1.67, "grad_norm": 1.1459193229675293, "learning_rate": 4.65806100049756e-05, "loss": 0.0504, "step": 879 }, { "epoch": 1.67, "grad_norm": 1.5899440050125122, "learning_rate": 4.654980753174613e-05, "loss": 0.0835, "step": 880 }, { "epoch": 1.67, "grad_norm": 1.371309518814087, "learning_rate": 4.65189799586323e-05, "loss": 0.0897, "step": 881 }, { "epoch": 1.68, "grad_norm": 1.0178749561309814, "learning_rate": 4.6488127332388014e-05, "loss": 0.0457, "step": 882 }, { "epoch": 1.68, "grad_norm": 1.62483549118042, "learning_rate": 4.645724969980524e-05, "loss": 0.0974, "step": 883 }, { "epoch": 1.68, "grad_norm": 1.2416328191757202, "learning_rate": 4.6426347107713787e-05, "loss": 0.0736, "step": 884 }, { "epoch": 1.68, "grad_norm": 1.2442591190338135, "learning_rate": 4.6395419602981374e-05, "loss": 0.0787, "step": 885 }, { "epoch": 1.68, "grad_norm": 1.3987239599227905, "learning_rate": 4.6364467232513494e-05, "loss": 0.089, "step": 886 }, { "epoch": 1.69, "grad_norm": 1.644081711769104, "learning_rate": 4.633349004325334e-05, "loss": 0.0841, "step": 887 }, { "epoch": 1.69, "grad_norm": 1.2702934741973877, "learning_rate": 4.6302488082181766e-05, "loss": 0.0801, "step": 888 }, { "epoch": 1.69, "grad_norm": 1.5228726863861084, "learning_rate": 4.6271461396317154e-05, "loss": 0.0988, "step": 889 }, { "epoch": 1.69, "grad_norm": 1.894463300704956, "learning_rate": 4.624041003271544e-05, "loss": 0.1097, "step": 890 }, { "epoch": 1.69, "grad_norm": 1.5197497606277466, "learning_rate": 4.6209334038469944e-05, "loss": 0.0695, "step": 891 }, { "epoch": 1.7, "grad_norm": 1.4954235553741455, "learning_rate": 4.617823346071136e-05, "loss": 0.0904, "step": 892 }, { "epoch": 1.7, "grad_norm": 1.3489166498184204, "learning_rate": 4.614710834660768e-05, "loss": 0.0834, "step": 893 }, { "epoch": 1.7, "grad_norm": 2.0391480922698975, "learning_rate": 4.611595874336405e-05, "loss": 0.1278, "step": 894 }, { "epoch": 1.7, "grad_norm": 1.396215558052063, "learning_rate": 4.6084784698222844e-05, "loss": 0.0851, "step": 895 }, { "epoch": 1.7, "grad_norm": 1.204738736152649, "learning_rate": 4.605358625846343e-05, "loss": 0.0609, "step": 896 }, { "epoch": 1.71, "grad_norm": 1.1848770380020142, "learning_rate": 4.602236347140221e-05, "loss": 0.0426, "step": 897 }, { "epoch": 1.71, "grad_norm": 1.3790240287780762, "learning_rate": 4.5991116384392514e-05, "loss": 0.0841, "step": 898 }, { "epoch": 1.71, "grad_norm": 1.513181209564209, "learning_rate": 4.5959845044824506e-05, "loss": 0.0896, "step": 899 }, { "epoch": 1.71, "grad_norm": 1.1747398376464844, "learning_rate": 4.592854950012515e-05, "loss": 0.0623, "step": 900 }, { "epoch": 1.71, "eval_blimp_filtered_avg": 0.7388059701492538, "eval_blimp_filtered_std": 0.004885258017482677, "step": 900 }, { "epoch": 1.71, "eval_blimp_supplement_avg": 0.7629310344827587, "eval_blimp_supplement_std": 0.017961502620966185, "step": 900 }, { "epoch": 1.71, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.04793724854411019, "step": 900 }, { "epoch": 1.71, "eval_winoground_filtered_avg": 0.46, "eval_winoground_filtered_std": 0.05009082659620333, "step": 900 }, { "epoch": 1.71, "grad_norm": 1.2659538984298706, "learning_rate": 4.5897229797758104e-05, "loss": 0.0626, "step": 901 }, { "epoch": 1.71, "grad_norm": 1.2470483779907227, "learning_rate": 4.5865885985223686e-05, "loss": 0.0743, "step": 902 }, { "epoch": 1.72, "grad_norm": 1.2330950498580933, "learning_rate": 4.583451811005875e-05, "loss": 0.0628, "step": 903 }, { "epoch": 1.72, "grad_norm": 1.589770793914795, "learning_rate": 4.5803126219836674e-05, "loss": 0.0839, "step": 904 }, { "epoch": 1.72, "grad_norm": 1.70102059841156, "learning_rate": 4.5771710362167254e-05, "loss": 0.0887, "step": 905 }, { "epoch": 1.72, "grad_norm": 1.5882220268249512, "learning_rate": 4.574027058469661e-05, "loss": 0.0707, "step": 906 }, { "epoch": 1.72, "grad_norm": 1.5911141633987427, "learning_rate": 4.570880693510716e-05, "loss": 0.1188, "step": 907 }, { "epoch": 1.73, "grad_norm": 1.5583292245864868, "learning_rate": 4.567731946111752e-05, "loss": 0.0888, "step": 908 }, { "epoch": 1.73, "grad_norm": 1.4930033683776855, "learning_rate": 4.564580821048245e-05, "loss": 0.0668, "step": 909 }, { "epoch": 1.73, "grad_norm": 1.1644017696380615, "learning_rate": 4.561427323099276e-05, "loss": 0.0792, "step": 910 }, { "epoch": 1.73, "grad_norm": 1.3101482391357422, "learning_rate": 4.5582714570475245e-05, "loss": 0.0695, "step": 911 }, { "epoch": 1.73, "grad_norm": 1.6129924058914185, "learning_rate": 4.555113227679264e-05, "loss": 0.0852, "step": 912 }, { "epoch": 1.74, "grad_norm": 1.4384307861328125, "learning_rate": 4.5519526397843475e-05, "loss": 0.07, "step": 913 }, { "epoch": 1.74, "grad_norm": 1.5406631231307983, "learning_rate": 4.54878969815621e-05, "loss": 0.0817, "step": 914 }, { "epoch": 1.74, "grad_norm": 1.0334581136703491, "learning_rate": 4.545624407591853e-05, "loss": 0.0543, "step": 915 }, { "epoch": 1.74, "grad_norm": 1.067602515220642, "learning_rate": 4.542456772891841e-05, "loss": 0.0482, "step": 916 }, { "epoch": 1.74, "grad_norm": 1.7109581232070923, "learning_rate": 4.5392867988602965e-05, "loss": 0.0764, "step": 917 }, { "epoch": 1.75, "grad_norm": 1.501543641090393, "learning_rate": 4.536114490304885e-05, "loss": 0.0925, "step": 918 }, { "epoch": 1.75, "grad_norm": 1.294136881828308, "learning_rate": 4.5329398520368174e-05, "loss": 0.0594, "step": 919 }, { "epoch": 1.75, "grad_norm": 1.688539981842041, "learning_rate": 4.5297628888708325e-05, "loss": 0.098, "step": 920 }, { "epoch": 1.75, "grad_norm": 1.4879752397537231, "learning_rate": 4.526583605625201e-05, "loss": 0.0733, "step": 921 }, { "epoch": 1.75, "grad_norm": 1.0149370431900024, "learning_rate": 4.5234020071217096e-05, "loss": 0.0427, "step": 922 }, { "epoch": 1.75, "grad_norm": 1.7456779479980469, "learning_rate": 4.520218098185656e-05, "loss": 0.0939, "step": 923 }, { "epoch": 1.76, "grad_norm": 1.1811097860336304, "learning_rate": 4.517031883645842e-05, "loss": 0.0735, "step": 924 }, { "epoch": 1.76, "grad_norm": 1.8038142919540405, "learning_rate": 4.513843368334566e-05, "loss": 0.1399, "step": 925 }, { "epoch": 1.76, "grad_norm": 1.2405563592910767, "learning_rate": 4.510652557087617e-05, "loss": 0.0609, "step": 926 }, { "epoch": 1.76, "grad_norm": 1.7716548442840576, "learning_rate": 4.507459454744266e-05, "loss": 0.108, "step": 927 }, { "epoch": 1.76, "grad_norm": 1.7465747594833374, "learning_rate": 4.504264066147256e-05, "loss": 0.0862, "step": 928 }, { "epoch": 1.77, "grad_norm": 1.3045947551727295, "learning_rate": 4.5010663961428024e-05, "loss": 0.06, "step": 929 }, { "epoch": 1.77, "grad_norm": 1.3510265350341797, "learning_rate": 4.4978664495805753e-05, "loss": 0.0896, "step": 930 }, { "epoch": 1.77, "grad_norm": 1.3224555253982544, "learning_rate": 4.4946642313137004e-05, "loss": 0.0593, "step": 931 }, { "epoch": 1.77, "grad_norm": 1.2466806173324585, "learning_rate": 4.4914597461987494e-05, "loss": 0.063, "step": 932 }, { "epoch": 1.77, "grad_norm": 1.4589390754699707, "learning_rate": 4.4882529990957305e-05, "loss": 0.0837, "step": 933 }, { "epoch": 1.78, "grad_norm": 1.4167959690093994, "learning_rate": 4.4850439948680824e-05, "loss": 0.0579, "step": 934 }, { "epoch": 1.78, "grad_norm": 1.6318268775939941, "learning_rate": 4.481832738382667e-05, "loss": 0.0822, "step": 935 }, { "epoch": 1.78, "grad_norm": 1.1142100095748901, "learning_rate": 4.4786192345097635e-05, "loss": 0.0657, "step": 936 }, { "epoch": 1.78, "grad_norm": 1.292106032371521, "learning_rate": 4.475403488123058e-05, "loss": 0.0583, "step": 937 }, { "epoch": 1.78, "grad_norm": 1.240082859992981, "learning_rate": 4.472185504099638e-05, "loss": 0.0741, "step": 938 }, { "epoch": 1.79, "grad_norm": 1.2142534255981445, "learning_rate": 4.468965287319985e-05, "loss": 0.0543, "step": 939 }, { "epoch": 1.79, "grad_norm": 1.7420969009399414, "learning_rate": 4.4657428426679674e-05, "loss": 0.0843, "step": 940 }, { "epoch": 1.79, "grad_norm": 1.1750537157058716, "learning_rate": 4.4625181750308306e-05, "loss": 0.0769, "step": 941 }, { "epoch": 1.79, "grad_norm": 1.1668052673339844, "learning_rate": 4.459291289299193e-05, "loss": 0.0574, "step": 942 }, { "epoch": 1.79, "grad_norm": 1.5769915580749512, "learning_rate": 4.456062190367035e-05, "loss": 0.0729, "step": 943 }, { "epoch": 1.79, "grad_norm": 1.3525842428207397, "learning_rate": 4.452830883131697e-05, "loss": 0.0976, "step": 944 }, { "epoch": 1.8, "grad_norm": 1.16890549659729, "learning_rate": 4.449597372493868e-05, "loss": 0.0435, "step": 945 }, { "epoch": 1.8, "grad_norm": 1.4228549003601074, "learning_rate": 4.4463616633575735e-05, "loss": 0.0665, "step": 946 }, { "epoch": 1.8, "grad_norm": 1.5848950147628784, "learning_rate": 4.44312376063018e-05, "loss": 0.0951, "step": 947 }, { "epoch": 1.8, "grad_norm": 1.2078777551651, "learning_rate": 4.439883669222377e-05, "loss": 0.0635, "step": 948 }, { "epoch": 1.8, "grad_norm": 1.183646321296692, "learning_rate": 4.436641394048175e-05, "loss": 0.0651, "step": 949 }, { "epoch": 1.81, "grad_norm": 1.3018157482147217, "learning_rate": 4.433396940024897e-05, "loss": 0.069, "step": 950 }, { "epoch": 1.81, "grad_norm": 1.4921379089355469, "learning_rate": 4.430150312073167e-05, "loss": 0.1089, "step": 951 }, { "epoch": 1.81, "grad_norm": 1.326827883720398, "learning_rate": 4.42690151511691e-05, "loss": 0.0595, "step": 952 }, { "epoch": 1.81, "grad_norm": 1.4922373294830322, "learning_rate": 4.4236505540833386e-05, "loss": 0.0843, "step": 953 }, { "epoch": 1.81, "grad_norm": 1.0689603090286255, "learning_rate": 4.4203974339029485e-05, "loss": 0.0751, "step": 954 }, { "epoch": 1.82, "grad_norm": 1.2698966264724731, "learning_rate": 4.417142159509509e-05, "loss": 0.0784, "step": 955 }, { "epoch": 1.82, "grad_norm": 1.572508692741394, "learning_rate": 4.413884735840058e-05, "loss": 0.0989, "step": 956 }, { "epoch": 1.82, "grad_norm": 1.1410672664642334, "learning_rate": 4.4106251678348905e-05, "loss": 0.0664, "step": 957 }, { "epoch": 1.82, "grad_norm": 1.5577747821807861, "learning_rate": 4.407363460437557e-05, "loss": 0.0931, "step": 958 }, { "epoch": 1.82, "grad_norm": 1.1328744888305664, "learning_rate": 4.4040996185948495e-05, "loss": 0.0518, "step": 959 }, { "epoch": 1.83, "grad_norm": 1.5649348497390747, "learning_rate": 4.4008336472567984e-05, "loss": 0.0983, "step": 960 }, { "epoch": 1.83, "grad_norm": 1.2352359294891357, "learning_rate": 4.3975655513766645e-05, "loss": 0.0801, "step": 961 }, { "epoch": 1.83, "grad_norm": 1.3915117979049683, "learning_rate": 4.3942953359109295e-05, "loss": 0.081, "step": 962 }, { "epoch": 1.83, "grad_norm": 1.5125086307525635, "learning_rate": 4.3910230058192926e-05, "loss": 0.0827, "step": 963 }, { "epoch": 1.83, "grad_norm": 1.158286452293396, "learning_rate": 4.387748566064656e-05, "loss": 0.0529, "step": 964 }, { "epoch": 1.83, "grad_norm": 0.8862749338150024, "learning_rate": 4.384472021613122e-05, "loss": 0.0573, "step": 965 }, { "epoch": 1.84, "grad_norm": 1.2909945249557495, "learning_rate": 4.381193377433989e-05, "loss": 0.06, "step": 966 }, { "epoch": 1.84, "grad_norm": 1.2697809934616089, "learning_rate": 4.377912638499736e-05, "loss": 0.083, "step": 967 }, { "epoch": 1.84, "grad_norm": 1.4459086656570435, "learning_rate": 4.3746298097860196e-05, "loss": 0.072, "step": 968 }, { "epoch": 1.84, "grad_norm": 1.225091814994812, "learning_rate": 4.3713448962716674e-05, "loss": 0.0782, "step": 969 }, { "epoch": 1.84, "grad_norm": 1.5829181671142578, "learning_rate": 4.368057902938666e-05, "loss": 0.0911, "step": 970 }, { "epoch": 1.85, "grad_norm": 1.6562423706054688, "learning_rate": 4.3647688347721614e-05, "loss": 0.0958, "step": 971 }, { "epoch": 1.85, "grad_norm": 1.4291036128997803, "learning_rate": 4.3614776967604397e-05, "loss": 0.0915, "step": 972 }, { "epoch": 1.85, "grad_norm": 1.2845791578292847, "learning_rate": 4.358184493894932e-05, "loss": 0.0711, "step": 973 }, { "epoch": 1.85, "grad_norm": 1.1175628900527954, "learning_rate": 4.354889231170196e-05, "loss": 0.0509, "step": 974 }, { "epoch": 1.85, "grad_norm": 0.921883225440979, "learning_rate": 4.351591913583918e-05, "loss": 0.0522, "step": 975 }, { "epoch": 1.86, "grad_norm": 1.1847916841506958, "learning_rate": 4.348292546136899e-05, "loss": 0.0739, "step": 976 }, { "epoch": 1.86, "grad_norm": 1.5273025035858154, "learning_rate": 4.3449911338330474e-05, "loss": 0.0834, "step": 977 }, { "epoch": 1.86, "grad_norm": 1.216287612915039, "learning_rate": 4.3416876816793744e-05, "loss": 0.0564, "step": 978 }, { "epoch": 1.86, "grad_norm": 1.15571928024292, "learning_rate": 4.338382194685985e-05, "loss": 0.0719, "step": 979 }, { "epoch": 1.86, "grad_norm": 1.5973104238510132, "learning_rate": 4.33507467786607e-05, "loss": 0.0721, "step": 980 }, { "epoch": 1.87, "grad_norm": 1.4570748805999756, "learning_rate": 4.331765136235899e-05, "loss": 0.0845, "step": 981 }, { "epoch": 1.87, "grad_norm": 1.1695295572280884, "learning_rate": 4.32845357481481e-05, "loss": 0.0664, "step": 982 }, { "epoch": 1.87, "grad_norm": 1.192367434501648, "learning_rate": 4.325139998625208e-05, "loss": 0.0676, "step": 983 }, { "epoch": 1.87, "grad_norm": 1.4806044101715088, "learning_rate": 4.321824412692552e-05, "loss": 0.081, "step": 984 }, { "epoch": 1.87, "grad_norm": 1.3497810363769531, "learning_rate": 4.3185068220453484e-05, "loss": 0.0805, "step": 985 }, { "epoch": 1.87, "grad_norm": 1.3560150861740112, "learning_rate": 4.3151872317151456e-05, "loss": 0.0631, "step": 986 }, { "epoch": 1.88, "grad_norm": 1.2363392114639282, "learning_rate": 4.311865646736524e-05, "loss": 0.0646, "step": 987 }, { "epoch": 1.88, "grad_norm": 1.4370615482330322, "learning_rate": 4.3085420721470864e-05, "loss": 0.0803, "step": 988 }, { "epoch": 1.88, "grad_norm": 1.142534852027893, "learning_rate": 4.305216512987459e-05, "loss": 0.0553, "step": 989 }, { "epoch": 1.88, "grad_norm": 1.8768911361694336, "learning_rate": 4.301888974301271e-05, "loss": 0.1029, "step": 990 }, { "epoch": 1.88, "grad_norm": 1.4808437824249268, "learning_rate": 4.2985594611351614e-05, "loss": 0.0803, "step": 991 }, { "epoch": 1.89, "grad_norm": 1.492554783821106, "learning_rate": 4.2952279785387564e-05, "loss": 0.0518, "step": 992 }, { "epoch": 1.89, "grad_norm": 1.5033847093582153, "learning_rate": 4.291894531564673e-05, "loss": 0.0759, "step": 993 }, { "epoch": 1.89, "grad_norm": 1.6785972118377686, "learning_rate": 4.288559125268508e-05, "loss": 0.101, "step": 994 }, { "epoch": 1.89, "grad_norm": 0.9530592560768127, "learning_rate": 4.285221764708827e-05, "loss": 0.0488, "step": 995 }, { "epoch": 1.89, "grad_norm": 1.3272395133972168, "learning_rate": 4.2818824549471624e-05, "loss": 0.0658, "step": 996 }, { "epoch": 1.9, "grad_norm": 1.487097144126892, "learning_rate": 4.278541201048e-05, "loss": 0.0749, "step": 997 }, { "epoch": 1.9, "grad_norm": 1.1615517139434814, "learning_rate": 4.275198008078777e-05, "loss": 0.0535, "step": 998 }, { "epoch": 1.9, "grad_norm": 1.7249425649642944, "learning_rate": 4.271852881109869e-05, "loss": 0.098, "step": 999 }, { "epoch": 1.9, "grad_norm": 1.3330974578857422, "learning_rate": 4.268505825214586e-05, "loss": 0.0762, "step": 1000 }, { "epoch": 1.9, "eval_blimp_filtered_avg": 0.7373134328358208, "eval_blimp_filtered_std": 0.004893449900158107, "step": 1000 }, { "epoch": 1.9, "eval_blimp_supplement_avg": 0.7607758620689655, "eval_blimp_supplement_std": 0.018414567782766414, "step": 1000 }, { "epoch": 1.9, "eval_vqa_filtered_avg": 0.4, "eval_vqa_filtered_std": 0.04923659639173309, "step": 1000 }, { "epoch": 1.9, "eval_winoground_filtered_avg": 0.44, "eval_winoground_filtered_std": 0.04988876515698589, "step": 1000 }, { "epoch": 1.9, "grad_norm": 1.4711142778396606, "learning_rate": 4.2651568454691625e-05, "loss": 0.0723, "step": 1001 }, { "epoch": 1.9, "grad_norm": 1.484944462776184, "learning_rate": 4.261805946952753e-05, "loss": 0.1037, "step": 1002 }, { "epoch": 1.91, "grad_norm": 1.43315851688385, "learning_rate": 4.258453134747421e-05, "loss": 0.0866, "step": 1003 }, { "epoch": 1.91, "grad_norm": 1.4623805284500122, "learning_rate": 4.2550984139381293e-05, "loss": 0.0863, "step": 1004 }, { "epoch": 1.91, "grad_norm": 1.3799711465835571, "learning_rate": 4.251741789612739e-05, "loss": 0.061, "step": 1005 }, { "epoch": 1.91, "grad_norm": 1.4027959108352661, "learning_rate": 4.248383266861998e-05, "loss": 0.0702, "step": 1006 }, { "epoch": 1.91, "grad_norm": 1.0753225088119507, "learning_rate": 4.245022850779532e-05, "loss": 0.0577, "step": 1007 }, { "epoch": 1.92, "grad_norm": 1.8280802965164185, "learning_rate": 4.241660546461838e-05, "loss": 0.0824, "step": 1008 }, { "epoch": 1.92, "grad_norm": 1.5547451972961426, "learning_rate": 4.238296359008279e-05, "loss": 0.0765, "step": 1009 }, { "epoch": 1.92, "grad_norm": 1.698030710220337, "learning_rate": 4.234930293521071e-05, "loss": 0.0663, "step": 1010 }, { "epoch": 1.92, "grad_norm": 1.2120630741119385, "learning_rate": 4.2315623551052796e-05, "loss": 0.0715, "step": 1011 }, { "epoch": 1.92, "grad_norm": 2.1559317111968994, "learning_rate": 4.228192548868811e-05, "loss": 0.0828, "step": 1012 }, { "epoch": 1.93, "grad_norm": 1.3094383478164673, "learning_rate": 4.224820879922406e-05, "loss": 0.0632, "step": 1013 }, { "epoch": 1.93, "grad_norm": 1.204358458518982, "learning_rate": 4.221447353379626e-05, "loss": 0.0648, "step": 1014 }, { "epoch": 1.93, "grad_norm": 2.017810106277466, "learning_rate": 4.218071974356855e-05, "loss": 0.0911, "step": 1015 }, { "epoch": 1.93, "grad_norm": 1.3073359727859497, "learning_rate": 4.214694747973281e-05, "loss": 0.0834, "step": 1016 }, { "epoch": 1.93, "grad_norm": 1.3970775604248047, "learning_rate": 4.211315679350899e-05, "loss": 0.0713, "step": 1017 }, { "epoch": 1.94, "grad_norm": 1.3864994049072266, "learning_rate": 4.2079347736144934e-05, "loss": 0.0809, "step": 1018 }, { "epoch": 1.94, "grad_norm": 1.348341464996338, "learning_rate": 4.204552035891639e-05, "loss": 0.09, "step": 1019 }, { "epoch": 1.94, "grad_norm": 1.2095041275024414, "learning_rate": 4.201167471312687e-05, "loss": 0.0531, "step": 1020 }, { "epoch": 1.94, "grad_norm": 1.2746816873550415, "learning_rate": 4.197781085010758e-05, "loss": 0.0697, "step": 1021 }, { "epoch": 1.94, "grad_norm": 1.3956737518310547, "learning_rate": 4.1943928821217375e-05, "loss": 0.077, "step": 1022 }, { "epoch": 1.94, "grad_norm": 1.4578558206558228, "learning_rate": 4.191002867784265e-05, "loss": 0.0842, "step": 1023 }, { "epoch": 1.95, "grad_norm": 0.9007298350334167, "learning_rate": 4.187611047139727e-05, "loss": 0.0371, "step": 1024 }, { "epoch": 1.95, "grad_norm": 1.8008947372436523, "learning_rate": 4.184217425332253e-05, "loss": 0.0701, "step": 1025 }, { "epoch": 1.95, "grad_norm": 1.5437817573547363, "learning_rate": 4.180822007508698e-05, "loss": 0.0703, "step": 1026 }, { "epoch": 1.95, "grad_norm": 1.172078251838684, "learning_rate": 4.177424798818646e-05, "loss": 0.051, "step": 1027 }, { "epoch": 1.95, "grad_norm": 1.2286282777786255, "learning_rate": 4.1740258044143945e-05, "loss": 0.0672, "step": 1028 }, { "epoch": 1.96, "grad_norm": 1.0238698720932007, "learning_rate": 4.170625029450951e-05, "loss": 0.0406, "step": 1029 }, { "epoch": 1.96, "grad_norm": 1.090419054031372, "learning_rate": 4.167222479086022e-05, "loss": 0.0518, "step": 1030 }, { "epoch": 1.96, "grad_norm": 1.1259323358535767, "learning_rate": 4.163818158480007e-05, "loss": 0.0396, "step": 1031 }, { "epoch": 1.96, "grad_norm": 1.3050808906555176, "learning_rate": 4.160412072795991e-05, "loss": 0.0976, "step": 1032 }, { "epoch": 1.96, "grad_norm": 1.4500107765197754, "learning_rate": 4.157004227199735e-05, "loss": 0.0664, "step": 1033 }, { "epoch": 1.97, "grad_norm": 2.0856730937957764, "learning_rate": 4.1535946268596684e-05, "loss": 0.0715, "step": 1034 }, { "epoch": 1.97, "grad_norm": 1.1378005743026733, "learning_rate": 4.150183276946884e-05, "loss": 0.0461, "step": 1035 }, { "epoch": 1.97, "grad_norm": 1.370568871498108, "learning_rate": 4.146770182635128e-05, "loss": 0.0665, "step": 1036 }, { "epoch": 1.97, "grad_norm": 1.4813791513442993, "learning_rate": 4.143355349100791e-05, "loss": 0.0894, "step": 1037 }, { "epoch": 1.97, "grad_norm": 1.3825584650039673, "learning_rate": 4.1399387815229e-05, "loss": 0.0727, "step": 1038 }, { "epoch": 1.98, "grad_norm": 1.2598187923431396, "learning_rate": 4.1365204850831144e-05, "loss": 0.0616, "step": 1039 }, { "epoch": 1.98, "grad_norm": 1.1747922897338867, "learning_rate": 4.133100464965716e-05, "loss": 0.0488, "step": 1040 }, { "epoch": 1.98, "grad_norm": 1.2766090631484985, "learning_rate": 4.129678726357597e-05, "loss": 0.0825, "step": 1041 }, { "epoch": 1.98, "grad_norm": 1.4454387426376343, "learning_rate": 4.12625527444826e-05, "loss": 0.0935, "step": 1042 }, { "epoch": 1.98, "grad_norm": 0.8908864855766296, "learning_rate": 4.122830114429806e-05, "loss": 0.0366, "step": 1043 }, { "epoch": 1.98, "grad_norm": 1.0782757997512817, "learning_rate": 4.1194032514969225e-05, "loss": 0.0635, "step": 1044 }, { "epoch": 1.99, "grad_norm": 1.3421026468276978, "learning_rate": 4.1159746908468834e-05, "loss": 0.0687, "step": 1045 }, { "epoch": 1.99, "grad_norm": 1.729850172996521, "learning_rate": 4.112544437679535e-05, "loss": 0.0807, "step": 1046 }, { "epoch": 1.99, "grad_norm": 1.2272552251815796, "learning_rate": 4.109112497197293e-05, "loss": 0.0683, "step": 1047 }, { "epoch": 1.99, "grad_norm": 0.8792155385017395, "learning_rate": 4.1056788746051316e-05, "loss": 0.0374, "step": 1048 }, { "epoch": 1.99, "grad_norm": 1.6992003917694092, "learning_rate": 4.102243575110574e-05, "loss": 0.0827, "step": 1049 }, { "epoch": 2.0, "grad_norm": 1.9711517095565796, "learning_rate": 4.098806603923688e-05, "loss": 0.1174, "step": 1050 }, { "epoch": 2.0, "grad_norm": 1.5974726676940918, "learning_rate": 4.095367966257077e-05, "loss": 0.0801, "step": 1051 }, { "epoch": 2.0, "grad_norm": 1.1956522464752197, "learning_rate": 4.091927667325872e-05, "loss": 0.0497, "step": 1052 }, { "epoch": 2.0, "grad_norm": 0.7160119414329529, "learning_rate": 4.088485712347723e-05, "loss": 0.0246, "step": 1053 }, { "epoch": 2.0, "grad_norm": 0.8052932620048523, "learning_rate": 4.085042106542792e-05, "loss": 0.0228, "step": 1054 }, { "epoch": 2.01, "grad_norm": 1.7292722463607788, "learning_rate": 4.081596855133744e-05, "loss": 0.0656, "step": 1055 }, { "epoch": 2.01, "grad_norm": 0.8976931571960449, "learning_rate": 4.0781499633457404e-05, "loss": 0.0311, "step": 1056 }, { "epoch": 2.01, "grad_norm": 1.0544778108596802, "learning_rate": 4.07470143640643e-05, "loss": 0.0383, "step": 1057 }, { "epoch": 2.01, "grad_norm": 0.796032190322876, "learning_rate": 4.071251279545942e-05, "loss": 0.0378, "step": 1058 }, { "epoch": 2.01, "grad_norm": 1.060745358467102, "learning_rate": 4.067799497996877e-05, "loss": 0.0328, "step": 1059 }, { "epoch": 2.02, "grad_norm": 1.2713921070098877, "learning_rate": 4.064346096994301e-05, "loss": 0.0275, "step": 1060 }, { "epoch": 2.02, "grad_norm": 0.9388766884803772, "learning_rate": 4.060891081775733e-05, "loss": 0.0406, "step": 1061 }, { "epoch": 2.02, "grad_norm": 1.017356276512146, "learning_rate": 4.057434457581144e-05, "loss": 0.0377, "step": 1062 }, { "epoch": 2.02, "grad_norm": 1.371375322341919, "learning_rate": 4.053976229652943e-05, "loss": 0.0485, "step": 1063 }, { "epoch": 2.02, "grad_norm": 1.2231448888778687, "learning_rate": 4.0505164032359716e-05, "loss": 0.0414, "step": 1064 }, { "epoch": 2.02, "grad_norm": 0.8701097369194031, "learning_rate": 4.047054983577496e-05, "loss": 0.0235, "step": 1065 }, { "epoch": 2.03, "grad_norm": 1.145439624786377, "learning_rate": 4.043591975927199e-05, "loss": 0.034, "step": 1066 }, { "epoch": 2.03, "grad_norm": 1.6429330110549927, "learning_rate": 4.040127385537171e-05, "loss": 0.0469, "step": 1067 }, { "epoch": 2.03, "grad_norm": 1.1188381910324097, "learning_rate": 4.0366612176619014e-05, "loss": 0.0359, "step": 1068 }, { "epoch": 2.03, "grad_norm": 1.262399673461914, "learning_rate": 4.0331934775582776e-05, "loss": 0.0232, "step": 1069 }, { "epoch": 2.03, "grad_norm": 0.9754251837730408, "learning_rate": 4.0297241704855645e-05, "loss": 0.0314, "step": 1070 }, { "epoch": 2.04, "grad_norm": 1.1487596035003662, "learning_rate": 4.0262533017054087e-05, "loss": 0.0418, "step": 1071 }, { "epoch": 2.04, "grad_norm": 1.0032458305358887, "learning_rate": 4.0227808764818225e-05, "loss": 0.0379, "step": 1072 }, { "epoch": 2.04, "grad_norm": 0.9035434722900391, "learning_rate": 4.01930690008118e-05, "loss": 0.0396, "step": 1073 }, { "epoch": 2.04, "grad_norm": 0.9303309917449951, "learning_rate": 4.0158313777722064e-05, "loss": 0.0182, "step": 1074 }, { "epoch": 2.04, "grad_norm": 1.213032841682434, "learning_rate": 4.012354314825974e-05, "loss": 0.0431, "step": 1075 }, { "epoch": 2.05, "grad_norm": 1.3096210956573486, "learning_rate": 4.00887571651589e-05, "loss": 0.0541, "step": 1076 }, { "epoch": 2.05, "grad_norm": 1.0566493272781372, "learning_rate": 4.005395588117689e-05, "loss": 0.0299, "step": 1077 }, { "epoch": 2.05, "grad_norm": 1.1422333717346191, "learning_rate": 4.00191393490943e-05, "loss": 0.0278, "step": 1078 }, { "epoch": 2.05, "grad_norm": 0.9793741703033447, "learning_rate": 3.99843076217148e-05, "loss": 0.029, "step": 1079 }, { "epoch": 2.05, "grad_norm": 1.0706398487091064, "learning_rate": 3.9949460751865135e-05, "loss": 0.04, "step": 1080 }, { "epoch": 2.06, "grad_norm": 0.8914555907249451, "learning_rate": 3.9914598792395006e-05, "loss": 0.0445, "step": 1081 }, { "epoch": 2.06, "grad_norm": 1.1445811986923218, "learning_rate": 3.9879721796177e-05, "loss": 0.0455, "step": 1082 }, { "epoch": 2.06, "grad_norm": 1.0356100797653198, "learning_rate": 3.9844829816106515e-05, "loss": 0.03, "step": 1083 }, { "epoch": 2.06, "grad_norm": 0.9359371662139893, "learning_rate": 3.980992290510168e-05, "loss": 0.0352, "step": 1084 }, { "epoch": 2.06, "grad_norm": 1.5905426740646362, "learning_rate": 3.977500111610322e-05, "loss": 0.038, "step": 1085 }, { "epoch": 2.06, "grad_norm": 1.0888453722000122, "learning_rate": 3.974006450207449e-05, "loss": 0.0237, "step": 1086 }, { "epoch": 2.07, "grad_norm": 0.7957691550254822, "learning_rate": 3.97051131160013e-05, "loss": 0.0329, "step": 1087 }, { "epoch": 2.07, "grad_norm": 0.53327476978302, "learning_rate": 3.9670147010891854e-05, "loss": 0.0156, "step": 1088 }, { "epoch": 2.07, "grad_norm": 0.9060192704200745, "learning_rate": 3.96351662397767e-05, "loss": 0.0266, "step": 1089 }, { "epoch": 2.07, "grad_norm": 1.04215669631958, "learning_rate": 3.960017085570862e-05, "loss": 0.0372, "step": 1090 }, { "epoch": 2.07, "grad_norm": 0.9917066097259521, "learning_rate": 3.956516091176255e-05, "loss": 0.0256, "step": 1091 }, { "epoch": 2.08, "grad_norm": 0.8185844421386719, "learning_rate": 3.953013646103552e-05, "loss": 0.024, "step": 1092 }, { "epoch": 2.08, "grad_norm": 1.2610061168670654, "learning_rate": 3.949509755664655e-05, "loss": 0.0393, "step": 1093 }, { "epoch": 2.08, "grad_norm": 1.5944629907608032, "learning_rate": 3.946004425173659e-05, "loss": 0.0416, "step": 1094 }, { "epoch": 2.08, "grad_norm": 0.7313938140869141, "learning_rate": 3.942497659946843e-05, "loss": 0.0242, "step": 1095 }, { "epoch": 2.08, "grad_norm": 1.0167222023010254, "learning_rate": 3.9389894653026614e-05, "loss": 0.0269, "step": 1096 }, { "epoch": 2.09, "grad_norm": 1.4261940717697144, "learning_rate": 3.9354798465617365e-05, "loss": 0.0521, "step": 1097 }, { "epoch": 2.09, "grad_norm": 0.8648484945297241, "learning_rate": 3.9319688090468516e-05, "loss": 0.0348, "step": 1098 }, { "epoch": 2.09, "grad_norm": 1.057897925376892, "learning_rate": 3.92845635808294e-05, "loss": 0.0409, "step": 1099 }, { "epoch": 2.09, "grad_norm": 0.9395806789398193, "learning_rate": 3.924942498997081e-05, "loss": 0.05, "step": 1100 }, { "epoch": 2.09, "eval_blimp_filtered_avg": 0.7373134328358208, "eval_blimp_filtered_std": 0.004887836719226768, "step": 1100 }, { "epoch": 2.09, "eval_blimp_supplement_avg": 0.7780172413793104, "eval_blimp_supplement_std": 0.017850138682385417, "step": 1100 }, { "epoch": 2.09, "eval_vqa_filtered_avg": 0.41, "eval_vqa_filtered_std": 0.049431107042371025, "step": 1100 }, { "epoch": 2.09, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 1100 }, { "epoch": 2.09, "grad_norm": 1.0324339866638184, "learning_rate": 3.9214272371184866e-05, "loss": 0.0397, "step": 1101 }, { "epoch": 2.1, "grad_norm": 0.973565399646759, "learning_rate": 3.917910577778498e-05, "loss": 0.0281, "step": 1102 }, { "epoch": 2.1, "grad_norm": 0.9592449069023132, "learning_rate": 3.914392526310575e-05, "loss": 0.0307, "step": 1103 }, { "epoch": 2.1, "grad_norm": 0.8664752244949341, "learning_rate": 3.91087308805029e-05, "loss": 0.0255, "step": 1104 }, { "epoch": 2.1, "grad_norm": 1.4591771364212036, "learning_rate": 3.907352268335319e-05, "loss": 0.0303, "step": 1105 }, { "epoch": 2.1, "grad_norm": 0.9747409820556641, "learning_rate": 3.9038300725054316e-05, "loss": 0.0483, "step": 1106 }, { "epoch": 2.1, "grad_norm": 0.9744357466697693, "learning_rate": 3.9003065059024844e-05, "loss": 0.0203, "step": 1107 }, { "epoch": 2.11, "grad_norm": 1.3293272256851196, "learning_rate": 3.8967815738704126e-05, "loss": 0.034, "step": 1108 }, { "epoch": 2.11, "grad_norm": 1.0484956502914429, "learning_rate": 3.893255281755224e-05, "loss": 0.0377, "step": 1109 }, { "epoch": 2.11, "grad_norm": 0.6398167014122009, "learning_rate": 3.8897276349049896e-05, "loss": 0.0142, "step": 1110 }, { "epoch": 2.11, "grad_norm": 0.638053297996521, "learning_rate": 3.886198638669831e-05, "loss": 0.0156, "step": 1111 }, { "epoch": 2.11, "grad_norm": 0.8265632390975952, "learning_rate": 3.882668298401923e-05, "loss": 0.0331, "step": 1112 }, { "epoch": 2.12, "grad_norm": 0.9578907489776611, "learning_rate": 3.879136619455471e-05, "loss": 0.0238, "step": 1113 }, { "epoch": 2.12, "grad_norm": 1.1970043182373047, "learning_rate": 3.8756036071867166e-05, "loss": 0.0224, "step": 1114 }, { "epoch": 2.12, "grad_norm": 0.8913975954055786, "learning_rate": 3.872069266953921e-05, "loss": 0.0322, "step": 1115 }, { "epoch": 2.12, "grad_norm": 1.1116799116134644, "learning_rate": 3.868533604117359e-05, "loss": 0.028, "step": 1116 }, { "epoch": 2.12, "grad_norm": 0.9005084037780762, "learning_rate": 3.864996624039314e-05, "loss": 0.0172, "step": 1117 }, { "epoch": 2.13, "grad_norm": 0.8184544444084167, "learning_rate": 3.861458332084064e-05, "loss": 0.0207, "step": 1118 }, { "epoch": 2.13, "grad_norm": 1.1632020473480225, "learning_rate": 3.857918733617879e-05, "loss": 0.035, "step": 1119 }, { "epoch": 2.13, "grad_norm": 1.2648346424102783, "learning_rate": 3.854377834009008e-05, "loss": 0.0385, "step": 1120 }, { "epoch": 2.13, "grad_norm": 0.9768345952033997, "learning_rate": 3.850835638627675e-05, "loss": 0.0392, "step": 1121 }, { "epoch": 2.13, "grad_norm": 0.9851695895195007, "learning_rate": 3.8472921528460704e-05, "loss": 0.0284, "step": 1122 }, { "epoch": 2.13, "grad_norm": 0.8710018396377563, "learning_rate": 3.8437473820383406e-05, "loss": 0.0278, "step": 1123 }, { "epoch": 2.14, "grad_norm": 0.5897279977798462, "learning_rate": 3.840201331580579e-05, "loss": 0.0194, "step": 1124 }, { "epoch": 2.14, "grad_norm": 1.0394585132598877, "learning_rate": 3.8366540068508223e-05, "loss": 0.0185, "step": 1125 }, { "epoch": 2.14, "grad_norm": 0.9071407914161682, "learning_rate": 3.8331054132290375e-05, "loss": 0.028, "step": 1126 }, { "epoch": 2.14, "grad_norm": 0.9899227619171143, "learning_rate": 3.829555556097119e-05, "loss": 0.0169, "step": 1127 }, { "epoch": 2.14, "grad_norm": 1.2670469284057617, "learning_rate": 3.826004440838875e-05, "loss": 0.035, "step": 1128 }, { "epoch": 2.15, "grad_norm": 0.8780686855316162, "learning_rate": 3.8224520728400224e-05, "loss": 0.0213, "step": 1129 }, { "epoch": 2.15, "grad_norm": 1.7404210567474365, "learning_rate": 3.8188984574881796e-05, "loss": 0.0399, "step": 1130 }, { "epoch": 2.15, "grad_norm": 0.9448434114456177, "learning_rate": 3.815343600172854e-05, "loss": 0.0238, "step": 1131 }, { "epoch": 2.15, "grad_norm": 1.466584324836731, "learning_rate": 3.811787506285439e-05, "loss": 0.0396, "step": 1132 }, { "epoch": 2.15, "grad_norm": 0.6913077235221863, "learning_rate": 3.808230181219203e-05, "loss": 0.0165, "step": 1133 }, { "epoch": 2.16, "grad_norm": 0.8944645524024963, "learning_rate": 3.804671630369279e-05, "loss": 0.0244, "step": 1134 }, { "epoch": 2.16, "grad_norm": 1.0638607740402222, "learning_rate": 3.801111859132663e-05, "loss": 0.031, "step": 1135 }, { "epoch": 2.16, "grad_norm": 0.8543553948402405, "learning_rate": 3.7975508729081994e-05, "loss": 0.0187, "step": 1136 }, { "epoch": 2.16, "grad_norm": 1.4747676849365234, "learning_rate": 3.793988677096576e-05, "loss": 0.0411, "step": 1137 }, { "epoch": 2.16, "grad_norm": 0.8469879031181335, "learning_rate": 3.790425277100315e-05, "loss": 0.0319, "step": 1138 }, { "epoch": 2.17, "grad_norm": 1.0670251846313477, "learning_rate": 3.786860678323764e-05, "loss": 0.0519, "step": 1139 }, { "epoch": 2.17, "grad_norm": 0.7798013091087341, "learning_rate": 3.783294886173093e-05, "loss": 0.0209, "step": 1140 }, { "epoch": 2.17, "grad_norm": 0.9237111806869507, "learning_rate": 3.779727906056275e-05, "loss": 0.0299, "step": 1141 }, { "epoch": 2.17, "grad_norm": 1.0328737497329712, "learning_rate": 3.776159743383088e-05, "loss": 0.0283, "step": 1142 }, { "epoch": 2.17, "grad_norm": 1.1980313062667847, "learning_rate": 3.772590403565107e-05, "loss": 0.0414, "step": 1143 }, { "epoch": 2.17, "grad_norm": 1.1724169254302979, "learning_rate": 3.769019892015686e-05, "loss": 0.0426, "step": 1144 }, { "epoch": 2.18, "grad_norm": 0.8504079580307007, "learning_rate": 3.765448214149959e-05, "loss": 0.0324, "step": 1145 }, { "epoch": 2.18, "grad_norm": 1.0094321966171265, "learning_rate": 3.761875375384832e-05, "loss": 0.029, "step": 1146 }, { "epoch": 2.18, "grad_norm": 0.962568998336792, "learning_rate": 3.758301381138967e-05, "loss": 0.0347, "step": 1147 }, { "epoch": 2.18, "grad_norm": 0.8885593414306641, "learning_rate": 3.754726236832779e-05, "loss": 0.0297, "step": 1148 }, { "epoch": 2.18, "grad_norm": 0.9040636420249939, "learning_rate": 3.75114994788843e-05, "loss": 0.0277, "step": 1149 }, { "epoch": 2.19, "grad_norm": 1.2410639524459839, "learning_rate": 3.747572519729814e-05, "loss": 0.041, "step": 1150 }, { "epoch": 2.19, "grad_norm": 1.0731176137924194, "learning_rate": 3.743993957782557e-05, "loss": 0.0255, "step": 1151 }, { "epoch": 2.19, "grad_norm": 1.0069273710250854, "learning_rate": 3.740414267474002e-05, "loss": 0.0332, "step": 1152 }, { "epoch": 2.19, "grad_norm": 0.8850776553153992, "learning_rate": 3.7368334542332034e-05, "loss": 0.0238, "step": 1153 }, { "epoch": 2.19, "grad_norm": 0.9408190846443176, "learning_rate": 3.7332515234909195e-05, "loss": 0.0344, "step": 1154 }, { "epoch": 2.2, "grad_norm": 0.8336172103881836, "learning_rate": 3.7296684806796024e-05, "loss": 0.0303, "step": 1155 }, { "epoch": 2.2, "grad_norm": 0.9822571873664856, "learning_rate": 3.726084331233393e-05, "loss": 0.0318, "step": 1156 }, { "epoch": 2.2, "grad_norm": 0.8975710272789001, "learning_rate": 3.722499080588108e-05, "loss": 0.0328, "step": 1157 }, { "epoch": 2.2, "grad_norm": 0.7209893465042114, "learning_rate": 3.718912734181235e-05, "loss": 0.0252, "step": 1158 }, { "epoch": 2.2, "grad_norm": 0.8768108487129211, "learning_rate": 3.715325297451924e-05, "loss": 0.0258, "step": 1159 }, { "epoch": 2.21, "grad_norm": 0.6897008419036865, "learning_rate": 3.7117367758409775e-05, "loss": 0.0178, "step": 1160 }, { "epoch": 2.21, "grad_norm": 0.8130915760993958, "learning_rate": 3.7081471747908444e-05, "loss": 0.0289, "step": 1161 }, { "epoch": 2.21, "grad_norm": 1.0224395990371704, "learning_rate": 3.7045564997456124e-05, "loss": 0.0293, "step": 1162 }, { "epoch": 2.21, "grad_norm": 0.7682933807373047, "learning_rate": 3.7009647561509946e-05, "loss": 0.0223, "step": 1163 }, { "epoch": 2.21, "grad_norm": 0.6841908097267151, "learning_rate": 3.697371949454325e-05, "loss": 0.0221, "step": 1164 }, { "epoch": 2.21, "grad_norm": 0.8919627070426941, "learning_rate": 3.693778085104553e-05, "loss": 0.0181, "step": 1165 }, { "epoch": 2.22, "grad_norm": 0.8586483597755432, "learning_rate": 3.690183168552228e-05, "loss": 0.0295, "step": 1166 }, { "epoch": 2.22, "grad_norm": 1.0093531608581543, "learning_rate": 3.686587205249501e-05, "loss": 0.0183, "step": 1167 }, { "epoch": 2.22, "grad_norm": 0.7242430448532104, "learning_rate": 3.682990200650103e-05, "loss": 0.0191, "step": 1168 }, { "epoch": 2.22, "grad_norm": 0.859990656375885, "learning_rate": 3.6793921602093514e-05, "loss": 0.0279, "step": 1169 }, { "epoch": 2.22, "grad_norm": 1.198469877243042, "learning_rate": 3.675793089384129e-05, "loss": 0.0323, "step": 1170 }, { "epoch": 2.23, "grad_norm": 0.9630205631256104, "learning_rate": 3.672192993632884e-05, "loss": 0.0316, "step": 1171 }, { "epoch": 2.23, "grad_norm": 0.9857786893844604, "learning_rate": 3.66859187841562e-05, "loss": 0.0396, "step": 1172 }, { "epoch": 2.23, "grad_norm": 1.0439516305923462, "learning_rate": 3.664989749193884e-05, "loss": 0.0372, "step": 1173 }, { "epoch": 2.23, "grad_norm": 1.0566637516021729, "learning_rate": 3.661386611430764e-05, "loss": 0.0237, "step": 1174 }, { "epoch": 2.23, "grad_norm": 0.5969395041465759, "learning_rate": 3.657782470590874e-05, "loss": 0.0127, "step": 1175 }, { "epoch": 2.24, "grad_norm": 0.787785530090332, "learning_rate": 3.654177332140352e-05, "loss": 0.0124, "step": 1176 }, { "epoch": 2.24, "grad_norm": 0.7811552882194519, "learning_rate": 3.6505712015468487e-05, "loss": 0.0199, "step": 1177 }, { "epoch": 2.24, "grad_norm": 1.09591543674469, "learning_rate": 3.646964084279518e-05, "loss": 0.0289, "step": 1178 }, { "epoch": 2.24, "grad_norm": 1.7446881532669067, "learning_rate": 3.643355985809012e-05, "loss": 0.0314, "step": 1179 }, { "epoch": 2.24, "grad_norm": 1.385205864906311, "learning_rate": 3.6397469116074705e-05, "loss": 0.0206, "step": 1180 }, { "epoch": 2.25, "grad_norm": 0.930779218673706, "learning_rate": 3.636136867148511e-05, "loss": 0.0251, "step": 1181 }, { "epoch": 2.25, "grad_norm": 1.157547116279602, "learning_rate": 3.6325258579072266e-05, "loss": 0.0552, "step": 1182 }, { "epoch": 2.25, "grad_norm": 0.8737353086471558, "learning_rate": 3.628913889360169e-05, "loss": 0.0234, "step": 1183 }, { "epoch": 2.25, "grad_norm": 0.9815208315849304, "learning_rate": 3.625300966985347e-05, "loss": 0.0247, "step": 1184 }, { "epoch": 2.25, "grad_norm": 1.2596527338027954, "learning_rate": 3.62168709626222e-05, "loss": 0.0342, "step": 1185 }, { "epoch": 2.25, "grad_norm": 1.0906219482421875, "learning_rate": 3.618072282671678e-05, "loss": 0.0271, "step": 1186 }, { "epoch": 2.26, "grad_norm": 1.1445506811141968, "learning_rate": 3.614456531696046e-05, "loss": 0.045, "step": 1187 }, { "epoch": 2.26, "grad_norm": 0.8012164831161499, "learning_rate": 3.610839848819069e-05, "loss": 0.0284, "step": 1188 }, { "epoch": 2.26, "grad_norm": 1.108413577079773, "learning_rate": 3.607222239525905e-05, "loss": 0.0257, "step": 1189 }, { "epoch": 2.26, "grad_norm": 1.6038694381713867, "learning_rate": 3.60360370930312e-05, "loss": 0.0352, "step": 1190 }, { "epoch": 2.26, "grad_norm": 1.0407356023788452, "learning_rate": 3.5999842636386724e-05, "loss": 0.0283, "step": 1191 }, { "epoch": 2.27, "grad_norm": 0.7519016265869141, "learning_rate": 3.596363908021913e-05, "loss": 0.0216, "step": 1192 }, { "epoch": 2.27, "grad_norm": 1.090643048286438, "learning_rate": 3.59274264794357e-05, "loss": 0.0417, "step": 1193 }, { "epoch": 2.27, "grad_norm": 0.8029544353485107, "learning_rate": 3.589120488895743e-05, "loss": 0.0187, "step": 1194 }, { "epoch": 2.27, "grad_norm": 1.444327473640442, "learning_rate": 3.585497436371898e-05, "loss": 0.0361, "step": 1195 }, { "epoch": 2.27, "grad_norm": 0.8517007827758789, "learning_rate": 3.581873495866852e-05, "loss": 0.0229, "step": 1196 }, { "epoch": 2.28, "grad_norm": 0.7948609590530396, "learning_rate": 3.578248672876772e-05, "loss": 0.0186, "step": 1197 }, { "epoch": 2.28, "grad_norm": 1.017580509185791, "learning_rate": 3.574622972899163e-05, "loss": 0.0305, "step": 1198 }, { "epoch": 2.28, "grad_norm": 1.0789713859558105, "learning_rate": 3.570996401432858e-05, "loss": 0.0344, "step": 1199 }, { "epoch": 2.28, "grad_norm": 0.8819797039031982, "learning_rate": 3.567368963978015e-05, "loss": 0.0225, "step": 1200 }, { "epoch": 2.28, "eval_blimp_filtered_avg": 0.735820895522388, "eval_blimp_filtered_std": 0.004887790682911497, "step": 1200 }, { "epoch": 2.28, "eval_blimp_supplement_avg": 0.7780172413793104, "eval_blimp_supplement_std": 0.017923639564343903, "step": 1200 }, { "epoch": 2.28, "eval_vqa_filtered_avg": 0.38, "eval_vqa_filtered_std": 0.048783173121456316, "step": 1200 }, { "epoch": 2.28, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 1200 }, { "epoch": 2.28, "grad_norm": 1.0546010732650757, "learning_rate": 3.5637406660360996e-05, "loss": 0.0388, "step": 1201 }, { "epoch": 2.29, "grad_norm": 0.48209214210510254, "learning_rate": 3.56011151310989e-05, "loss": 0.0122, "step": 1202 }, { "epoch": 2.29, "grad_norm": 0.9037948250770569, "learning_rate": 3.556481510703457e-05, "loss": 0.0281, "step": 1203 }, { "epoch": 2.29, "grad_norm": 1.2600152492523193, "learning_rate": 3.5528506643221594e-05, "loss": 0.0327, "step": 1204 }, { "epoch": 2.29, "grad_norm": 0.9247565269470215, "learning_rate": 3.549218979472636e-05, "loss": 0.0205, "step": 1205 }, { "epoch": 2.29, "grad_norm": 0.8958795070648193, "learning_rate": 3.5455864616627994e-05, "loss": 0.0217, "step": 1206 }, { "epoch": 2.29, "grad_norm": 1.2813855409622192, "learning_rate": 3.541953116401822e-05, "loss": 0.0347, "step": 1207 }, { "epoch": 2.3, "grad_norm": 0.878525972366333, "learning_rate": 3.538318949200136e-05, "loss": 0.0214, "step": 1208 }, { "epoch": 2.3, "grad_norm": 0.9167356491088867, "learning_rate": 3.534683965569415e-05, "loss": 0.0205, "step": 1209 }, { "epoch": 2.3, "grad_norm": 1.149776577949524, "learning_rate": 3.5310481710225744e-05, "loss": 0.0328, "step": 1210 }, { "epoch": 2.3, "grad_norm": 0.5970554351806641, "learning_rate": 3.5274115710737576e-05, "loss": 0.0186, "step": 1211 }, { "epoch": 2.3, "grad_norm": 1.1585978269577026, "learning_rate": 3.5237741712383306e-05, "loss": 0.0162, "step": 1212 }, { "epoch": 2.31, "grad_norm": 0.7382116317749023, "learning_rate": 3.5201359770328726e-05, "loss": 0.0243, "step": 1213 }, { "epoch": 2.31, "grad_norm": 0.9483512043952942, "learning_rate": 3.5164969939751655e-05, "loss": 0.0267, "step": 1214 }, { "epoch": 2.31, "grad_norm": 1.2445852756500244, "learning_rate": 3.512857227584191e-05, "loss": 0.0266, "step": 1215 }, { "epoch": 2.31, "grad_norm": 0.5459380745887756, "learning_rate": 3.509216683380115e-05, "loss": 0.0164, "step": 1216 }, { "epoch": 2.31, "grad_norm": 0.538429856300354, "learning_rate": 3.5055753668842865e-05, "loss": 0.0125, "step": 1217 }, { "epoch": 2.32, "grad_norm": 0.7816592454910278, "learning_rate": 3.501933283619224e-05, "loss": 0.0148, "step": 1218 }, { "epoch": 2.32, "grad_norm": 0.7872945070266724, "learning_rate": 3.498290439108609e-05, "loss": 0.0174, "step": 1219 }, { "epoch": 2.32, "grad_norm": 0.846574068069458, "learning_rate": 3.494646838877277e-05, "loss": 0.0164, "step": 1220 }, { "epoch": 2.32, "grad_norm": 1.2166684865951538, "learning_rate": 3.491002488451212e-05, "loss": 0.0292, "step": 1221 }, { "epoch": 2.32, "grad_norm": 0.6567668318748474, "learning_rate": 3.4873573933575324e-05, "loss": 0.0129, "step": 1222 }, { "epoch": 2.33, "grad_norm": 1.200331449508667, "learning_rate": 3.483711559124488e-05, "loss": 0.0415, "step": 1223 }, { "epoch": 2.33, "grad_norm": 0.9385453462600708, "learning_rate": 3.48006499128145e-05, "loss": 0.0281, "step": 1224 }, { "epoch": 2.33, "grad_norm": 0.8724737763404846, "learning_rate": 3.476417695358902e-05, "loss": 0.0269, "step": 1225 }, { "epoch": 2.33, "grad_norm": 0.9694152474403381, "learning_rate": 3.4727696768884304e-05, "loss": 0.0265, "step": 1226 }, { "epoch": 2.33, "grad_norm": 0.7688245177268982, "learning_rate": 3.4691209414027186e-05, "loss": 0.023, "step": 1227 }, { "epoch": 2.33, "grad_norm": 0.7955913543701172, "learning_rate": 3.465471494435536e-05, "loss": 0.017, "step": 1228 }, { "epoch": 2.34, "grad_norm": 0.9916734099388123, "learning_rate": 3.4618213415217356e-05, "loss": 0.0478, "step": 1229 }, { "epoch": 2.34, "grad_norm": 0.8537799119949341, "learning_rate": 3.4581704881972344e-05, "loss": 0.0193, "step": 1230 }, { "epoch": 2.34, "grad_norm": 0.8964945673942566, "learning_rate": 3.454518939999017e-05, "loss": 0.0194, "step": 1231 }, { "epoch": 2.34, "grad_norm": 1.4639755487442017, "learning_rate": 3.450866702465119e-05, "loss": 0.0411, "step": 1232 }, { "epoch": 2.34, "grad_norm": 0.8643292784690857, "learning_rate": 3.4472137811346215e-05, "loss": 0.0199, "step": 1233 }, { "epoch": 2.35, "grad_norm": 1.1320050954818726, "learning_rate": 3.443560181547646e-05, "loss": 0.0304, "step": 1234 }, { "epoch": 2.35, "grad_norm": 0.993150532245636, "learning_rate": 3.439905909245337e-05, "loss": 0.0226, "step": 1235 }, { "epoch": 2.35, "grad_norm": 1.0723025798797607, "learning_rate": 3.436250969769865e-05, "loss": 0.0321, "step": 1236 }, { "epoch": 2.35, "grad_norm": 1.1654707193374634, "learning_rate": 3.432595368664408e-05, "loss": 0.0337, "step": 1237 }, { "epoch": 2.35, "grad_norm": 0.6940431594848633, "learning_rate": 3.4289391114731507e-05, "loss": 0.035, "step": 1238 }, { "epoch": 2.36, "grad_norm": 1.0504443645477295, "learning_rate": 3.425282203741271e-05, "loss": 0.0352, "step": 1239 }, { "epoch": 2.36, "grad_norm": 0.726099967956543, "learning_rate": 3.421624651014932e-05, "loss": 0.0202, "step": 1240 }, { "epoch": 2.36, "grad_norm": 1.4504814147949219, "learning_rate": 3.417966458841279e-05, "loss": 0.0391, "step": 1241 }, { "epoch": 2.36, "grad_norm": 0.9218035936355591, "learning_rate": 3.4143076327684246e-05, "loss": 0.0176, "step": 1242 }, { "epoch": 2.36, "grad_norm": 1.1375185251235962, "learning_rate": 3.410648178345442e-05, "loss": 0.0442, "step": 1243 }, { "epoch": 2.37, "grad_norm": 1.5283360481262207, "learning_rate": 3.406988101122359e-05, "loss": 0.0421, "step": 1244 }, { "epoch": 2.37, "grad_norm": 0.9782218933105469, "learning_rate": 3.403327406650147e-05, "loss": 0.031, "step": 1245 }, { "epoch": 2.37, "grad_norm": 1.2577868700027466, "learning_rate": 3.399666100480714e-05, "loss": 0.0406, "step": 1246 }, { "epoch": 2.37, "grad_norm": 0.8968157172203064, "learning_rate": 3.396004188166898e-05, "loss": 0.0208, "step": 1247 }, { "epoch": 2.37, "grad_norm": 0.8263989090919495, "learning_rate": 3.3923416752624496e-05, "loss": 0.0306, "step": 1248 }, { "epoch": 2.37, "grad_norm": 0.8170139789581299, "learning_rate": 3.388678567322039e-05, "loss": 0.0166, "step": 1249 }, { "epoch": 2.38, "grad_norm": 0.6975342631340027, "learning_rate": 3.385014869901232e-05, "loss": 0.0152, "step": 1250 }, { "epoch": 2.38, "grad_norm": 0.7280627489089966, "learning_rate": 3.381350588556491e-05, "loss": 0.0237, "step": 1251 }, { "epoch": 2.38, "grad_norm": 0.9785979390144348, "learning_rate": 3.377685728845164e-05, "loss": 0.0328, "step": 1252 }, { "epoch": 2.38, "grad_norm": 0.895891547203064, "learning_rate": 3.374020296325477e-05, "loss": 0.0297, "step": 1253 }, { "epoch": 2.38, "grad_norm": 1.2668248414993286, "learning_rate": 3.3703542965565235e-05, "loss": 0.0221, "step": 1254 }, { "epoch": 2.39, "grad_norm": 0.7150057554244995, "learning_rate": 3.366687735098257e-05, "loss": 0.0197, "step": 1255 }, { "epoch": 2.39, "grad_norm": 0.7753677368164062, "learning_rate": 3.363020617511483e-05, "loss": 0.0179, "step": 1256 }, { "epoch": 2.39, "grad_norm": 1.2130573987960815, "learning_rate": 3.359352949357852e-05, "loss": 0.0408, "step": 1257 }, { "epoch": 2.39, "grad_norm": 0.912406861782074, "learning_rate": 3.3556847361998475e-05, "loss": 0.0216, "step": 1258 }, { "epoch": 2.39, "grad_norm": 1.0149822235107422, "learning_rate": 3.3520159836007817e-05, "loss": 0.0367, "step": 1259 }, { "epoch": 2.4, "grad_norm": 0.5798947811126709, "learning_rate": 3.3483466971247834e-05, "loss": 0.0214, "step": 1260 }, { "epoch": 2.4, "grad_norm": 0.7553463578224182, "learning_rate": 3.344676882336791e-05, "loss": 0.0168, "step": 1261 }, { "epoch": 2.4, "grad_norm": 0.9607523083686829, "learning_rate": 3.341006544802545e-05, "loss": 0.0257, "step": 1262 }, { "epoch": 2.4, "grad_norm": 1.199916958808899, "learning_rate": 3.3373356900885765e-05, "loss": 0.0528, "step": 1263 }, { "epoch": 2.4, "grad_norm": 0.7416831851005554, "learning_rate": 3.3336643237622055e-05, "loss": 0.0177, "step": 1264 }, { "epoch": 2.4, "grad_norm": 0.750126838684082, "learning_rate": 3.329992451391523e-05, "loss": 0.0362, "step": 1265 }, { "epoch": 2.41, "grad_norm": 0.9131319522857666, "learning_rate": 3.326320078545392e-05, "loss": 0.032, "step": 1266 }, { "epoch": 2.41, "grad_norm": 0.7278329730033875, "learning_rate": 3.322647210793429e-05, "loss": 0.0191, "step": 1267 }, { "epoch": 2.41, "grad_norm": 0.9052397012710571, "learning_rate": 3.3189738537060064e-05, "loss": 0.0264, "step": 1268 }, { "epoch": 2.41, "grad_norm": 0.7002453804016113, "learning_rate": 3.315300012854236e-05, "loss": 0.0205, "step": 1269 }, { "epoch": 2.41, "grad_norm": 0.9374354481697083, "learning_rate": 3.311625693809965e-05, "loss": 0.0322, "step": 1270 }, { "epoch": 2.42, "grad_norm": 0.6562173962593079, "learning_rate": 3.307950902145763e-05, "loss": 0.0176, "step": 1271 }, { "epoch": 2.42, "grad_norm": 1.0235211849212646, "learning_rate": 3.3042756434349186e-05, "loss": 0.0282, "step": 1272 }, { "epoch": 2.42, "grad_norm": 1.3698484897613525, "learning_rate": 3.30059992325143e-05, "loss": 0.0325, "step": 1273 }, { "epoch": 2.42, "grad_norm": 1.1551504135131836, "learning_rate": 3.2969237471699906e-05, "loss": 0.0359, "step": 1274 }, { "epoch": 2.42, "grad_norm": 1.2397526502609253, "learning_rate": 3.29324712076599e-05, "loss": 0.0297, "step": 1275 }, { "epoch": 2.43, "grad_norm": 0.8171043992042542, "learning_rate": 3.289570049615499e-05, "loss": 0.0415, "step": 1276 }, { "epoch": 2.43, "grad_norm": 1.1505523920059204, "learning_rate": 3.2858925392952624e-05, "loss": 0.0286, "step": 1277 }, { "epoch": 2.43, "grad_norm": 1.3288356065750122, "learning_rate": 3.2822145953826924e-05, "loss": 0.051, "step": 1278 }, { "epoch": 2.43, "grad_norm": 0.6441298127174377, "learning_rate": 3.278536223455856e-05, "loss": 0.0262, "step": 1279 }, { "epoch": 2.43, "grad_norm": 0.5151641368865967, "learning_rate": 3.2748574290934725e-05, "loss": 0.0125, "step": 1280 }, { "epoch": 2.44, "grad_norm": 0.8240029215812683, "learning_rate": 3.271178217874901e-05, "loss": 0.0173, "step": 1281 }, { "epoch": 2.44, "grad_norm": 0.7596692442893982, "learning_rate": 3.267498595380131e-05, "loss": 0.0263, "step": 1282 }, { "epoch": 2.44, "grad_norm": 0.6713970899581909, "learning_rate": 3.2638185671897794e-05, "loss": 0.0276, "step": 1283 }, { "epoch": 2.44, "grad_norm": 0.9288908243179321, "learning_rate": 3.2601381388850746e-05, "loss": 0.0333, "step": 1284 }, { "epoch": 2.44, "grad_norm": 0.6760089993476868, "learning_rate": 3.2564573160478546e-05, "loss": 0.023, "step": 1285 }, { "epoch": 2.44, "grad_norm": 0.9844262599945068, "learning_rate": 3.252776104260552e-05, "loss": 0.0285, "step": 1286 }, { "epoch": 2.45, "grad_norm": 0.9611965417861938, "learning_rate": 3.249094509106194e-05, "loss": 0.0222, "step": 1287 }, { "epoch": 2.45, "grad_norm": 0.7170396447181702, "learning_rate": 3.245412536168387e-05, "loss": 0.0208, "step": 1288 }, { "epoch": 2.45, "grad_norm": 1.4808533191680908, "learning_rate": 3.2417301910313107e-05, "loss": 0.0421, "step": 1289 }, { "epoch": 2.45, "grad_norm": 0.9701176285743713, "learning_rate": 3.238047479279708e-05, "loss": 0.0249, "step": 1290 }, { "epoch": 2.45, "grad_norm": 1.49601411819458, "learning_rate": 3.234364406498879e-05, "loss": 0.0336, "step": 1291 }, { "epoch": 2.46, "grad_norm": 0.5164822936058044, "learning_rate": 3.230680978274671e-05, "loss": 0.0149, "step": 1292 }, { "epoch": 2.46, "grad_norm": 1.1006197929382324, "learning_rate": 3.226997200193472e-05, "loss": 0.0222, "step": 1293 }, { "epoch": 2.46, "grad_norm": 0.6277118921279907, "learning_rate": 3.223313077842198e-05, "loss": 0.0232, "step": 1294 }, { "epoch": 2.46, "grad_norm": 1.1834864616394043, "learning_rate": 3.2196286168082884e-05, "loss": 0.031, "step": 1295 }, { "epoch": 2.46, "grad_norm": 1.2606849670410156, "learning_rate": 3.215943822679697e-05, "loss": 0.0277, "step": 1296 }, { "epoch": 2.47, "grad_norm": 1.075881838798523, "learning_rate": 3.2122587010448796e-05, "loss": 0.0374, "step": 1297 }, { "epoch": 2.47, "grad_norm": 0.6080549955368042, "learning_rate": 3.208573257492795e-05, "loss": 0.0239, "step": 1298 }, { "epoch": 2.47, "grad_norm": 0.5829216241836548, "learning_rate": 3.204887497612881e-05, "loss": 0.0197, "step": 1299 }, { "epoch": 2.47, "grad_norm": 0.5608998537063599, "learning_rate": 3.201201426995066e-05, "loss": 0.0198, "step": 1300 }, { "epoch": 2.47, "eval_blimp_filtered_avg": 0.7326865671641791, "eval_blimp_filtered_std": 0.004916685273434163, "step": 1300 }, { "epoch": 2.47, "eval_blimp_supplement_avg": 0.7737068965517241, "eval_blimp_supplement_std": 0.01788167605447029, "step": 1300 }, { "epoch": 2.47, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695236, "step": 1300 }, { "epoch": 2.47, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 1300 }, { "epoch": 2.47, "grad_norm": 0.8052921295166016, "learning_rate": 3.19751505122974e-05, "loss": 0.0222, "step": 1301 }, { "epoch": 2.48, "grad_norm": 1.3892381191253662, "learning_rate": 3.193828375907761e-05, "loss": 0.0305, "step": 1302 }, { "epoch": 2.48, "grad_norm": 0.7554770708084106, "learning_rate": 3.190141406620442e-05, "loss": 0.0294, "step": 1303 }, { "epoch": 2.48, "grad_norm": 1.0050979852676392, "learning_rate": 3.186454148959537e-05, "loss": 0.032, "step": 1304 }, { "epoch": 2.48, "grad_norm": 0.7120445370674133, "learning_rate": 3.182766608517242e-05, "loss": 0.0256, "step": 1305 }, { "epoch": 2.48, "grad_norm": 0.8618227243423462, "learning_rate": 3.179078790886181e-05, "loss": 0.0295, "step": 1306 }, { "epoch": 2.48, "grad_norm": 0.8017216324806213, "learning_rate": 3.175390701659397e-05, "loss": 0.0187, "step": 1307 }, { "epoch": 2.49, "grad_norm": 0.8312126994132996, "learning_rate": 3.171702346430345e-05, "loss": 0.0282, "step": 1308 }, { "epoch": 2.49, "grad_norm": 1.2441346645355225, "learning_rate": 3.168013730792885e-05, "loss": 0.0422, "step": 1309 }, { "epoch": 2.49, "grad_norm": 0.8543381690979004, "learning_rate": 3.164324860341269e-05, "loss": 0.031, "step": 1310 }, { "epoch": 2.49, "grad_norm": 1.0195410251617432, "learning_rate": 3.160635740670141e-05, "loss": 0.0321, "step": 1311 }, { "epoch": 2.49, "grad_norm": 0.6801515221595764, "learning_rate": 3.1569463773745165e-05, "loss": 0.0177, "step": 1312 }, { "epoch": 2.5, "grad_norm": 0.9141638875007629, "learning_rate": 3.1532567760497825e-05, "loss": 0.0219, "step": 1313 }, { "epoch": 2.5, "grad_norm": 0.7351475358009338, "learning_rate": 3.1495669422916916e-05, "loss": 0.0194, "step": 1314 }, { "epoch": 2.5, "grad_norm": 1.1820323467254639, "learning_rate": 3.145876881696341e-05, "loss": 0.0235, "step": 1315 }, { "epoch": 2.5, "grad_norm": 0.9780482649803162, "learning_rate": 3.142186599860179e-05, "loss": 0.0219, "step": 1316 }, { "epoch": 2.5, "grad_norm": 1.1507515907287598, "learning_rate": 3.1384961023799856e-05, "loss": 0.027, "step": 1317 }, { "epoch": 2.51, "grad_norm": 0.732064425945282, "learning_rate": 3.134805394852869e-05, "loss": 0.0122, "step": 1318 }, { "epoch": 2.51, "grad_norm": 1.0467997789382935, "learning_rate": 3.1311144828762554e-05, "loss": 0.0352, "step": 1319 }, { "epoch": 2.51, "grad_norm": 0.674696147441864, "learning_rate": 3.127423372047881e-05, "loss": 0.0193, "step": 1320 }, { "epoch": 2.51, "grad_norm": 0.9180742502212524, "learning_rate": 3.1237320679657856e-05, "loss": 0.0302, "step": 1321 }, { "epoch": 2.51, "grad_norm": 1.1329432725906372, "learning_rate": 3.1200405762282985e-05, "loss": 0.0214, "step": 1322 }, { "epoch": 2.52, "grad_norm": 0.9363932013511658, "learning_rate": 3.116348902434037e-05, "loss": 0.0208, "step": 1323 }, { "epoch": 2.52, "grad_norm": 0.7593256831169128, "learning_rate": 3.112657052181892e-05, "loss": 0.0294, "step": 1324 }, { "epoch": 2.52, "grad_norm": 0.8442426323890686, "learning_rate": 3.108965031071025e-05, "loss": 0.0261, "step": 1325 }, { "epoch": 2.52, "grad_norm": 0.8864880204200745, "learning_rate": 3.1052728447008524e-05, "loss": 0.0365, "step": 1326 }, { "epoch": 2.52, "grad_norm": 1.0457208156585693, "learning_rate": 3.101580498671046e-05, "loss": 0.0305, "step": 1327 }, { "epoch": 2.52, "grad_norm": 0.83686763048172, "learning_rate": 3.0978879985815154e-05, "loss": 0.0355, "step": 1328 }, { "epoch": 2.53, "grad_norm": 0.9200435876846313, "learning_rate": 3.0941953500324074e-05, "loss": 0.023, "step": 1329 }, { "epoch": 2.53, "grad_norm": 0.6402056813240051, "learning_rate": 3.090502558624092e-05, "loss": 0.0177, "step": 1330 }, { "epoch": 2.53, "grad_norm": 0.7330701351165771, "learning_rate": 3.0868096299571544e-05, "loss": 0.0142, "step": 1331 }, { "epoch": 2.53, "grad_norm": 0.9574253559112549, "learning_rate": 3.083116569632393e-05, "loss": 0.0232, "step": 1332 }, { "epoch": 2.53, "grad_norm": 1.4417943954467773, "learning_rate": 3.079423383250798e-05, "loss": 0.0335, "step": 1333 }, { "epoch": 2.54, "grad_norm": 1.0514860153198242, "learning_rate": 3.0757300764135606e-05, "loss": 0.0268, "step": 1334 }, { "epoch": 2.54, "grad_norm": 0.7393744587898254, "learning_rate": 3.072036654722047e-05, "loss": 0.0108, "step": 1335 }, { "epoch": 2.54, "grad_norm": 0.4334273040294647, "learning_rate": 3.0683431237778e-05, "loss": 0.01, "step": 1336 }, { "epoch": 2.54, "grad_norm": 0.9760047197341919, "learning_rate": 3.0646494891825285e-05, "loss": 0.028, "step": 1337 }, { "epoch": 2.54, "grad_norm": 1.0864204168319702, "learning_rate": 3.060955756538099e-05, "loss": 0.0166, "step": 1338 }, { "epoch": 2.55, "grad_norm": 1.2224558591842651, "learning_rate": 3.057261931446524e-05, "loss": 0.0449, "step": 1339 }, { "epoch": 2.55, "grad_norm": 0.9224399924278259, "learning_rate": 3.0535680195099604e-05, "loss": 0.0213, "step": 1340 }, { "epoch": 2.55, "grad_norm": 0.9087544679641724, "learning_rate": 3.0498740263306954e-05, "loss": 0.0226, "step": 1341 }, { "epoch": 2.55, "grad_norm": 1.428886890411377, "learning_rate": 3.0461799575111364e-05, "loss": 0.0278, "step": 1342 }, { "epoch": 2.55, "grad_norm": 0.9300993084907532, "learning_rate": 3.042485818653808e-05, "loss": 0.0344, "step": 1343 }, { "epoch": 2.56, "grad_norm": 1.2383787631988525, "learning_rate": 3.038791615361341e-05, "loss": 0.037, "step": 1344 }, { "epoch": 2.56, "grad_norm": 1.0835143327713013, "learning_rate": 3.0350973532364653e-05, "loss": 0.0253, "step": 1345 }, { "epoch": 2.56, "grad_norm": 0.9245375394821167, "learning_rate": 3.031403037881997e-05, "loss": 0.0202, "step": 1346 }, { "epoch": 2.56, "grad_norm": 0.6015616655349731, "learning_rate": 3.0277086749008345e-05, "loss": 0.0169, "step": 1347 }, { "epoch": 2.56, "grad_norm": 0.910750150680542, "learning_rate": 3.0240142698959483e-05, "loss": 0.033, "step": 1348 }, { "epoch": 2.56, "grad_norm": 0.9448145627975464, "learning_rate": 3.020319828470373e-05, "loss": 0.0415, "step": 1349 }, { "epoch": 2.57, "grad_norm": 0.7794601321220398, "learning_rate": 3.016625356227198e-05, "loss": 0.0213, "step": 1350 }, { "epoch": 2.57, "grad_norm": 0.9962546825408936, "learning_rate": 3.012930858769559e-05, "loss": 0.0222, "step": 1351 }, { "epoch": 2.57, "grad_norm": 0.7120707035064697, "learning_rate": 3.0092363417006313e-05, "loss": 0.0148, "step": 1352 }, { "epoch": 2.57, "grad_norm": 0.913685142993927, "learning_rate": 3.0055418106236193e-05, "loss": 0.0229, "step": 1353 }, { "epoch": 2.57, "grad_norm": 1.0789960622787476, "learning_rate": 3.0018472711417467e-05, "loss": 0.0419, "step": 1354 }, { "epoch": 2.58, "grad_norm": 0.5032746195793152, "learning_rate": 2.998152728858253e-05, "loss": 0.0135, "step": 1355 }, { "epoch": 2.58, "grad_norm": 1.0093202590942383, "learning_rate": 2.9944581893763815e-05, "loss": 0.0447, "step": 1356 }, { "epoch": 2.58, "grad_norm": 1.1455811262130737, "learning_rate": 2.990763658299369e-05, "loss": 0.0201, "step": 1357 }, { "epoch": 2.58, "grad_norm": 0.8035153150558472, "learning_rate": 2.9870691412304407e-05, "loss": 0.0231, "step": 1358 }, { "epoch": 2.58, "grad_norm": 0.7908992171287537, "learning_rate": 2.9833746437728027e-05, "loss": 0.0297, "step": 1359 }, { "epoch": 2.59, "grad_norm": 1.0182263851165771, "learning_rate": 2.9796801715296276e-05, "loss": 0.0282, "step": 1360 }, { "epoch": 2.59, "grad_norm": 0.7409573197364807, "learning_rate": 2.975985730104053e-05, "loss": 0.0282, "step": 1361 }, { "epoch": 2.59, "grad_norm": 0.8745615482330322, "learning_rate": 2.9722913250991656e-05, "loss": 0.0221, "step": 1362 }, { "epoch": 2.59, "grad_norm": 1.1195486783981323, "learning_rate": 2.9685969621180037e-05, "loss": 0.0399, "step": 1363 }, { "epoch": 2.59, "grad_norm": 0.8848345279693604, "learning_rate": 2.964902646763535e-05, "loss": 0.0315, "step": 1364 }, { "epoch": 2.6, "grad_norm": 0.8234761357307434, "learning_rate": 2.9612083846386587e-05, "loss": 0.0133, "step": 1365 }, { "epoch": 2.6, "grad_norm": 0.8237802982330322, "learning_rate": 2.9575141813461928e-05, "loss": 0.0215, "step": 1366 }, { "epoch": 2.6, "grad_norm": 0.6651610136032104, "learning_rate": 2.953820042488865e-05, "loss": 0.0206, "step": 1367 }, { "epoch": 2.6, "grad_norm": 0.5577760338783264, "learning_rate": 2.950125973669305e-05, "loss": 0.0182, "step": 1368 }, { "epoch": 2.6, "grad_norm": 0.8579639196395874, "learning_rate": 2.9464319804900394e-05, "loss": 0.0372, "step": 1369 }, { "epoch": 2.6, "grad_norm": 1.356151819229126, "learning_rate": 2.9427380685534766e-05, "loss": 0.0442, "step": 1370 }, { "epoch": 2.61, "grad_norm": 1.054283618927002, "learning_rate": 2.9390442434619016e-05, "loss": 0.0438, "step": 1371 }, { "epoch": 2.61, "grad_norm": 0.7560109496116638, "learning_rate": 2.935350510817472e-05, "loss": 0.0276, "step": 1372 }, { "epoch": 2.61, "grad_norm": 0.8271297812461853, "learning_rate": 2.931656876222201e-05, "loss": 0.0258, "step": 1373 }, { "epoch": 2.61, "grad_norm": 0.6031360626220703, "learning_rate": 2.927963345277954e-05, "loss": 0.0139, "step": 1374 }, { "epoch": 2.61, "grad_norm": 0.8246234059333801, "learning_rate": 2.92426992358644e-05, "loss": 0.0292, "step": 1375 }, { "epoch": 2.62, "grad_norm": 0.9296931028366089, "learning_rate": 2.920576616749202e-05, "loss": 0.0208, "step": 1376 }, { "epoch": 2.62, "grad_norm": 0.707583487033844, "learning_rate": 2.916883430367609e-05, "loss": 0.0276, "step": 1377 }, { "epoch": 2.62, "grad_norm": 0.8194584250450134, "learning_rate": 2.9131903700428454e-05, "loss": 0.0219, "step": 1378 }, { "epoch": 2.62, "grad_norm": 1.2159682512283325, "learning_rate": 2.909497441375909e-05, "loss": 0.0368, "step": 1379 }, { "epoch": 2.62, "grad_norm": 0.870678186416626, "learning_rate": 2.9058046499675928e-05, "loss": 0.0234, "step": 1380 }, { "epoch": 2.63, "grad_norm": 0.6772706508636475, "learning_rate": 2.902112001418484e-05, "loss": 0.0251, "step": 1381 }, { "epoch": 2.63, "grad_norm": 0.7661545276641846, "learning_rate": 2.8984195013289543e-05, "loss": 0.0181, "step": 1382 }, { "epoch": 2.63, "grad_norm": 0.7262756824493408, "learning_rate": 2.8947271552991477e-05, "loss": 0.0304, "step": 1383 }, { "epoch": 2.63, "grad_norm": 1.1328446865081787, "learning_rate": 2.8910349689289748e-05, "loss": 0.0393, "step": 1384 }, { "epoch": 2.63, "grad_norm": 0.8276193737983704, "learning_rate": 2.8873429478181076e-05, "loss": 0.0161, "step": 1385 }, { "epoch": 2.63, "grad_norm": 1.094473123550415, "learning_rate": 2.883651097565964e-05, "loss": 0.0239, "step": 1386 }, { "epoch": 2.64, "grad_norm": 0.6505980491638184, "learning_rate": 2.8799594237717027e-05, "loss": 0.0157, "step": 1387 }, { "epoch": 2.64, "grad_norm": 0.6054595112800598, "learning_rate": 2.8762679320342156e-05, "loss": 0.0176, "step": 1388 }, { "epoch": 2.64, "grad_norm": 0.8456588387489319, "learning_rate": 2.8725766279521197e-05, "loss": 0.0161, "step": 1389 }, { "epoch": 2.64, "grad_norm": 0.7880589962005615, "learning_rate": 2.868885517123746e-05, "loss": 0.017, "step": 1390 }, { "epoch": 2.64, "grad_norm": 1.0319850444793701, "learning_rate": 2.8651946051471317e-05, "loss": 0.0391, "step": 1391 }, { "epoch": 2.65, "grad_norm": 1.1339360475540161, "learning_rate": 2.861503897620015e-05, "loss": 0.043, "step": 1392 }, { "epoch": 2.65, "grad_norm": 0.6851608157157898, "learning_rate": 2.8578134001398217e-05, "loss": 0.0143, "step": 1393 }, { "epoch": 2.65, "grad_norm": 0.754137396812439, "learning_rate": 2.8541231183036585e-05, "loss": 0.0272, "step": 1394 }, { "epoch": 2.65, "grad_norm": 0.6524330377578735, "learning_rate": 2.8504330577083092e-05, "loss": 0.0251, "step": 1395 }, { "epoch": 2.65, "grad_norm": 0.6524056196212769, "learning_rate": 2.8467432239502173e-05, "loss": 0.0176, "step": 1396 }, { "epoch": 2.66, "grad_norm": 1.0624055862426758, "learning_rate": 2.843053622625484e-05, "loss": 0.0272, "step": 1397 }, { "epoch": 2.66, "grad_norm": 0.4871482253074646, "learning_rate": 2.8393642593298594e-05, "loss": 0.0155, "step": 1398 }, { "epoch": 2.66, "grad_norm": 0.5850666165351868, "learning_rate": 2.8356751396587306e-05, "loss": 0.0161, "step": 1399 }, { "epoch": 2.66, "grad_norm": 0.8983573913574219, "learning_rate": 2.831986269207116e-05, "loss": 0.0185, "step": 1400 }, { "epoch": 2.66, "eval_blimp_filtered_avg": 0.7340298507462687, "eval_blimp_filtered_std": 0.004881802265301223, "step": 1400 }, { "epoch": 2.66, "eval_blimp_supplement_avg": 0.7672413793103449, "eval_blimp_supplement_std": 0.01812803473256185, "step": 1400 }, { "epoch": 2.66, "eval_vqa_filtered_avg": 0.31, "eval_vqa_filtered_std": 0.04648231987117316, "step": 1400 }, { "epoch": 2.66, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 1400 }, { "epoch": 2.66, "grad_norm": 1.1601905822753906, "learning_rate": 2.8282976535696546e-05, "loss": 0.0241, "step": 1401 }, { "epoch": 2.67, "grad_norm": 0.792147696018219, "learning_rate": 2.8246092983406033e-05, "loss": 0.0225, "step": 1402 }, { "epoch": 2.67, "grad_norm": 0.8272244930267334, "learning_rate": 2.8209212091138194e-05, "loss": 0.025, "step": 1403 }, { "epoch": 2.67, "grad_norm": 0.8053359985351562, "learning_rate": 2.817233391482758e-05, "loss": 0.0195, "step": 1404 }, { "epoch": 2.67, "grad_norm": 0.9786686301231384, "learning_rate": 2.8135458510404636e-05, "loss": 0.0312, "step": 1405 }, { "epoch": 2.67, "grad_norm": 0.7075037956237793, "learning_rate": 2.8098585933795597e-05, "loss": 0.0145, "step": 1406 }, { "epoch": 2.67, "grad_norm": 0.7536090612411499, "learning_rate": 2.8061716240922387e-05, "loss": 0.0167, "step": 1407 }, { "epoch": 2.68, "grad_norm": 0.7621767520904541, "learning_rate": 2.8024849487702605e-05, "loss": 0.0227, "step": 1408 }, { "epoch": 2.68, "grad_norm": 0.6289641261100769, "learning_rate": 2.798798573004935e-05, "loss": 0.0178, "step": 1409 }, { "epoch": 2.68, "grad_norm": 0.5681379437446594, "learning_rate": 2.7951125023871193e-05, "loss": 0.0172, "step": 1410 }, { "epoch": 2.68, "grad_norm": 0.5485211610794067, "learning_rate": 2.7914267425072064e-05, "loss": 0.0166, "step": 1411 }, { "epoch": 2.68, "grad_norm": 0.6899523138999939, "learning_rate": 2.7877412989551206e-05, "loss": 0.0233, "step": 1412 }, { "epoch": 2.69, "grad_norm": 1.0330462455749512, "learning_rate": 2.7840561773203044e-05, "loss": 0.0295, "step": 1413 }, { "epoch": 2.69, "grad_norm": 0.7368313074111938, "learning_rate": 2.7803713831917117e-05, "loss": 0.0162, "step": 1414 }, { "epoch": 2.69, "grad_norm": 0.7766881585121155, "learning_rate": 2.7766869221578027e-05, "loss": 0.0209, "step": 1415 }, { "epoch": 2.69, "grad_norm": 1.2105177640914917, "learning_rate": 2.773002799806529e-05, "loss": 0.0298, "step": 1416 }, { "epoch": 2.69, "grad_norm": 1.023190975189209, "learning_rate": 2.7693190217253286e-05, "loss": 0.0315, "step": 1417 }, { "epoch": 2.7, "grad_norm": 1.1438359022140503, "learning_rate": 2.7656355935011216e-05, "loss": 0.0233, "step": 1418 }, { "epoch": 2.7, "grad_norm": 1.0560482740402222, "learning_rate": 2.761952520720293e-05, "loss": 0.0252, "step": 1419 }, { "epoch": 2.7, "grad_norm": 0.9607229232788086, "learning_rate": 2.75826980896869e-05, "loss": 0.0119, "step": 1420 }, { "epoch": 2.7, "grad_norm": 0.7243250012397766, "learning_rate": 2.7545874638316134e-05, "loss": 0.0121, "step": 1421 }, { "epoch": 2.7, "grad_norm": 1.479274868965149, "learning_rate": 2.7509054908938064e-05, "loss": 0.0189, "step": 1422 }, { "epoch": 2.71, "grad_norm": 0.737623929977417, "learning_rate": 2.747223895739449e-05, "loss": 0.0092, "step": 1423 }, { "epoch": 2.71, "grad_norm": 0.6566274166107178, "learning_rate": 2.7435426839521465e-05, "loss": 0.0145, "step": 1424 }, { "epoch": 2.71, "grad_norm": 1.0043070316314697, "learning_rate": 2.7398618611149255e-05, "loss": 0.0235, "step": 1425 }, { "epoch": 2.71, "grad_norm": 0.8989971280097961, "learning_rate": 2.736181432810221e-05, "loss": 0.0227, "step": 1426 }, { "epoch": 2.71, "grad_norm": 0.6066439747810364, "learning_rate": 2.7325014046198686e-05, "loss": 0.0187, "step": 1427 }, { "epoch": 2.71, "grad_norm": 0.9143069386482239, "learning_rate": 2.7288217821251e-05, "loss": 0.0267, "step": 1428 }, { "epoch": 2.72, "grad_norm": 1.0348525047302246, "learning_rate": 2.7251425709065283e-05, "loss": 0.0168, "step": 1429 }, { "epoch": 2.72, "grad_norm": 0.6741753220558167, "learning_rate": 2.721463776544144e-05, "loss": 0.0159, "step": 1430 }, { "epoch": 2.72, "grad_norm": 0.9597513675689697, "learning_rate": 2.7177854046173084e-05, "loss": 0.0216, "step": 1431 }, { "epoch": 2.72, "grad_norm": 1.432040810585022, "learning_rate": 2.714107460704738e-05, "loss": 0.066, "step": 1432 }, { "epoch": 2.72, "grad_norm": 0.8823462724685669, "learning_rate": 2.710429950384501e-05, "loss": 0.0152, "step": 1433 }, { "epoch": 2.73, "grad_norm": 0.7270981669425964, "learning_rate": 2.7067528792340102e-05, "loss": 0.0169, "step": 1434 }, { "epoch": 2.73, "grad_norm": 0.4958323538303375, "learning_rate": 2.7030762528300102e-05, "loss": 0.0112, "step": 1435 }, { "epoch": 2.73, "grad_norm": 1.08528733253479, "learning_rate": 2.6994000767485718e-05, "loss": 0.0288, "step": 1436 }, { "epoch": 2.73, "grad_norm": 0.6881751418113708, "learning_rate": 2.695724356565082e-05, "loss": 0.017, "step": 1437 }, { "epoch": 2.73, "grad_norm": 0.5704440474510193, "learning_rate": 2.692049097854238e-05, "loss": 0.0166, "step": 1438 }, { "epoch": 2.74, "grad_norm": 1.011928677558899, "learning_rate": 2.6883743061900364e-05, "loss": 0.0349, "step": 1439 }, { "epoch": 2.74, "grad_norm": 0.9444913268089294, "learning_rate": 2.6846999871457636e-05, "loss": 0.019, "step": 1440 }, { "epoch": 2.74, "grad_norm": 0.9532956480979919, "learning_rate": 2.681026146293994e-05, "loss": 0.0244, "step": 1441 }, { "epoch": 2.74, "grad_norm": 1.4311964511871338, "learning_rate": 2.6773527892065716e-05, "loss": 0.0446, "step": 1442 }, { "epoch": 2.74, "grad_norm": 0.7681885957717896, "learning_rate": 2.6736799214546092e-05, "loss": 0.0159, "step": 1443 }, { "epoch": 2.75, "grad_norm": 0.9015945792198181, "learning_rate": 2.670007548608477e-05, "loss": 0.0261, "step": 1444 }, { "epoch": 2.75, "grad_norm": 0.9776804447174072, "learning_rate": 2.6663356762377956e-05, "loss": 0.0173, "step": 1445 }, { "epoch": 2.75, "grad_norm": 0.965928316116333, "learning_rate": 2.6626643099114227e-05, "loss": 0.0264, "step": 1446 }, { "epoch": 2.75, "grad_norm": 0.9755499958992004, "learning_rate": 2.6589934551974554e-05, "loss": 0.0192, "step": 1447 }, { "epoch": 2.75, "grad_norm": 0.9365895390510559, "learning_rate": 2.6553231176632092e-05, "loss": 0.0246, "step": 1448 }, { "epoch": 2.75, "grad_norm": 0.5492715835571289, "learning_rate": 2.6516533028752167e-05, "loss": 0.0171, "step": 1449 }, { "epoch": 2.76, "grad_norm": 0.6731768846511841, "learning_rate": 2.6479840163992178e-05, "loss": 0.0236, "step": 1450 }, { "epoch": 2.76, "grad_norm": 0.9657402634620667, "learning_rate": 2.6443152638001523e-05, "loss": 0.0328, "step": 1451 }, { "epoch": 2.76, "grad_norm": 0.9317682981491089, "learning_rate": 2.640647050642149e-05, "loss": 0.039, "step": 1452 }, { "epoch": 2.76, "grad_norm": 0.8600403666496277, "learning_rate": 2.636979382488517e-05, "loss": 0.0224, "step": 1453 }, { "epoch": 2.76, "grad_norm": 0.8357488512992859, "learning_rate": 2.6333122649017438e-05, "loss": 0.0234, "step": 1454 }, { "epoch": 2.77, "grad_norm": 0.7980093359947205, "learning_rate": 2.6296457034434773e-05, "loss": 0.0191, "step": 1455 }, { "epoch": 2.77, "grad_norm": 0.9591374397277832, "learning_rate": 2.625979703674523e-05, "loss": 0.0234, "step": 1456 }, { "epoch": 2.77, "grad_norm": 0.8387436270713806, "learning_rate": 2.6223142711548358e-05, "loss": 0.0234, "step": 1457 }, { "epoch": 2.77, "grad_norm": 0.7560609579086304, "learning_rate": 2.6186494114435094e-05, "loss": 0.02, "step": 1458 }, { "epoch": 2.77, "grad_norm": 0.6520232558250427, "learning_rate": 2.614985130098769e-05, "loss": 0.0196, "step": 1459 }, { "epoch": 2.78, "grad_norm": 1.2818493843078613, "learning_rate": 2.6113214326779613e-05, "loss": 0.0419, "step": 1460 }, { "epoch": 2.78, "grad_norm": 0.8640978336334229, "learning_rate": 2.6076583247375502e-05, "loss": 0.0199, "step": 1461 }, { "epoch": 2.78, "grad_norm": 0.8062564134597778, "learning_rate": 2.6039958118331034e-05, "loss": 0.0167, "step": 1462 }, { "epoch": 2.78, "grad_norm": 0.5497221946716309, "learning_rate": 2.600333899519285e-05, "loss": 0.0119, "step": 1463 }, { "epoch": 2.78, "grad_norm": 0.7077271342277527, "learning_rate": 2.5966725933498533e-05, "loss": 0.0193, "step": 1464 }, { "epoch": 2.79, "grad_norm": 0.6604231595993042, "learning_rate": 2.593011898877642e-05, "loss": 0.0128, "step": 1465 }, { "epoch": 2.79, "grad_norm": 0.5315125584602356, "learning_rate": 2.5893518216545587e-05, "loss": 0.0123, "step": 1466 }, { "epoch": 2.79, "grad_norm": 0.6900595426559448, "learning_rate": 2.5856923672315765e-05, "loss": 0.0202, "step": 1467 }, { "epoch": 2.79, "grad_norm": 0.7459152340888977, "learning_rate": 2.5820335411587217e-05, "loss": 0.0265, "step": 1468 }, { "epoch": 2.79, "grad_norm": 1.6864856481552124, "learning_rate": 2.5783753489850677e-05, "loss": 0.0236, "step": 1469 }, { "epoch": 2.79, "grad_norm": 0.5979843735694885, "learning_rate": 2.5747177962587296e-05, "loss": 0.0174, "step": 1470 }, { "epoch": 2.8, "grad_norm": 0.8606807589530945, "learning_rate": 2.5710608885268495e-05, "loss": 0.0195, "step": 1471 }, { "epoch": 2.8, "grad_norm": 0.6362294554710388, "learning_rate": 2.567404631335592e-05, "loss": 0.0271, "step": 1472 }, { "epoch": 2.8, "grad_norm": 0.6874938011169434, "learning_rate": 2.5637490302301355e-05, "loss": 0.0134, "step": 1473 }, { "epoch": 2.8, "grad_norm": 0.4187270402908325, "learning_rate": 2.5600940907546635e-05, "loss": 0.0126, "step": 1474 }, { "epoch": 2.8, "grad_norm": 0.7785152792930603, "learning_rate": 2.5564398184523553e-05, "loss": 0.0274, "step": 1475 }, { "epoch": 2.81, "grad_norm": 0.8068028688430786, "learning_rate": 2.552786218865378e-05, "loss": 0.0222, "step": 1476 }, { "epoch": 2.81, "grad_norm": 0.7898040413856506, "learning_rate": 2.5491332975348815e-05, "loss": 0.0158, "step": 1477 }, { "epoch": 2.81, "grad_norm": 0.5764217972755432, "learning_rate": 2.5454810600009835e-05, "loss": 0.0107, "step": 1478 }, { "epoch": 2.81, "grad_norm": 0.7511358261108398, "learning_rate": 2.541829511802765e-05, "loss": 0.0118, "step": 1479 }, { "epoch": 2.81, "grad_norm": 1.126897931098938, "learning_rate": 2.538178658478265e-05, "loss": 0.0212, "step": 1480 }, { "epoch": 2.82, "grad_norm": 0.7243058085441589, "learning_rate": 2.5345285055644635e-05, "loss": 0.0155, "step": 1481 }, { "epoch": 2.82, "grad_norm": 0.5659047961235046, "learning_rate": 2.5308790585972812e-05, "loss": 0.0089, "step": 1482 }, { "epoch": 2.82, "grad_norm": 0.5990458130836487, "learning_rate": 2.52723032311157e-05, "loss": 0.0127, "step": 1483 }, { "epoch": 2.82, "grad_norm": 0.415659636259079, "learning_rate": 2.5235823046410988e-05, "loss": 0.0081, "step": 1484 }, { "epoch": 2.82, "grad_norm": 0.6466391086578369, "learning_rate": 2.519935008718551e-05, "loss": 0.0122, "step": 1485 }, { "epoch": 2.83, "grad_norm": 0.7019263505935669, "learning_rate": 2.516288440875512e-05, "loss": 0.0184, "step": 1486 }, { "epoch": 2.83, "grad_norm": 0.9744683504104614, "learning_rate": 2.5126426066424685e-05, "loss": 0.0302, "step": 1487 }, { "epoch": 2.83, "grad_norm": 1.0501154661178589, "learning_rate": 2.5089975115487894e-05, "loss": 0.0273, "step": 1488 }, { "epoch": 2.83, "grad_norm": 1.1197574138641357, "learning_rate": 2.5053531611227236e-05, "loss": 0.0305, "step": 1489 }, { "epoch": 2.83, "grad_norm": 0.515388548374176, "learning_rate": 2.501709560891392e-05, "loss": 0.0168, "step": 1490 }, { "epoch": 2.83, "grad_norm": 0.8243635296821594, "learning_rate": 2.498066716380777e-05, "loss": 0.019, "step": 1491 }, { "epoch": 2.84, "grad_norm": 0.8300518989562988, "learning_rate": 2.494424633115713e-05, "loss": 0.013, "step": 1492 }, { "epoch": 2.84, "grad_norm": 0.9307559728622437, "learning_rate": 2.4907833166198848e-05, "loss": 0.0243, "step": 1493 }, { "epoch": 2.84, "grad_norm": 0.8946309089660645, "learning_rate": 2.4871427724158097e-05, "loss": 0.017, "step": 1494 }, { "epoch": 2.84, "grad_norm": 0.8541837334632874, "learning_rate": 2.483503006024834e-05, "loss": 0.0274, "step": 1495 }, { "epoch": 2.84, "grad_norm": 0.5598162412643433, "learning_rate": 2.479864022967128e-05, "loss": 0.0116, "step": 1496 }, { "epoch": 2.85, "grad_norm": 1.0122627019882202, "learning_rate": 2.4762258287616695e-05, "loss": 0.0206, "step": 1497 }, { "epoch": 2.85, "grad_norm": 0.7835296392440796, "learning_rate": 2.4725884289262432e-05, "loss": 0.0328, "step": 1498 }, { "epoch": 2.85, "grad_norm": 0.6845372915267944, "learning_rate": 2.4689518289774258e-05, "loss": 0.0118, "step": 1499 }, { "epoch": 2.85, "grad_norm": 0.9361919164657593, "learning_rate": 2.4653160344305852e-05, "loss": 0.0204, "step": 1500 }, { "epoch": 2.85, "eval_blimp_filtered_avg": 0.7379104477611941, "eval_blimp_filtered_std": 0.004868648036771746, "step": 1500 }, { "epoch": 2.85, "eval_blimp_supplement_avg": 0.7672413793103449, "eval_blimp_supplement_std": 0.018109975891891693, "step": 1500 }, { "epoch": 2.85, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.04852365870939099, "step": 1500 }, { "epoch": 2.85, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 1500 }, { "epoch": 2.85, "grad_norm": 0.8769916296005249, "learning_rate": 2.4616810507998647e-05, "loss": 0.0161, "step": 1501 }, { "epoch": 2.86, "grad_norm": 0.7938847541809082, "learning_rate": 2.4580468835981778e-05, "loss": 0.027, "step": 1502 }, { "epoch": 2.86, "grad_norm": 1.2593802213668823, "learning_rate": 2.4544135383372015e-05, "loss": 0.0277, "step": 1503 }, { "epoch": 2.86, "grad_norm": 0.9966208338737488, "learning_rate": 2.4507810205273647e-05, "loss": 0.0245, "step": 1504 }, { "epoch": 2.86, "grad_norm": 0.6259188652038574, "learning_rate": 2.4471493356778407e-05, "loss": 0.0195, "step": 1505 }, { "epoch": 2.86, "grad_norm": 1.107204794883728, "learning_rate": 2.4435184892965436e-05, "loss": 0.0307, "step": 1506 }, { "epoch": 2.87, "grad_norm": 1.1803444623947144, "learning_rate": 2.4398884868901102e-05, "loss": 0.0326, "step": 1507 }, { "epoch": 2.87, "grad_norm": 0.7771293520927429, "learning_rate": 2.4362593339639013e-05, "loss": 0.0168, "step": 1508 }, { "epoch": 2.87, "grad_norm": 0.5898905992507935, "learning_rate": 2.4326310360219865e-05, "loss": 0.0092, "step": 1509 }, { "epoch": 2.87, "grad_norm": 0.6910146474838257, "learning_rate": 2.4290035985671426e-05, "loss": 0.0147, "step": 1510 }, { "epoch": 2.87, "grad_norm": 0.8199265003204346, "learning_rate": 2.425377027100838e-05, "loss": 0.0189, "step": 1511 }, { "epoch": 2.87, "grad_norm": 0.7745525240898132, "learning_rate": 2.421751327123228e-05, "loss": 0.0198, "step": 1512 }, { "epoch": 2.88, "grad_norm": 0.8126581311225891, "learning_rate": 2.418126504133149e-05, "loss": 0.0117, "step": 1513 }, { "epoch": 2.88, "grad_norm": 0.9579986333847046, "learning_rate": 2.4145025636281035e-05, "loss": 0.0137, "step": 1514 }, { "epoch": 2.88, "grad_norm": 0.9612418413162231, "learning_rate": 2.4108795111042565e-05, "loss": 0.0274, "step": 1515 }, { "epoch": 2.88, "grad_norm": 0.8980733156204224, "learning_rate": 2.4072573520564305e-05, "loss": 0.0273, "step": 1516 }, { "epoch": 2.88, "grad_norm": 0.9648678302764893, "learning_rate": 2.403636091978087e-05, "loss": 0.0151, "step": 1517 }, { "epoch": 2.89, "grad_norm": 0.8918325901031494, "learning_rate": 2.400015736361327e-05, "loss": 0.0168, "step": 1518 }, { "epoch": 2.89, "grad_norm": 1.250093936920166, "learning_rate": 2.39639629069688e-05, "loss": 0.044, "step": 1519 }, { "epoch": 2.89, "grad_norm": 0.8976491689682007, "learning_rate": 2.3927777604740953e-05, "loss": 0.0148, "step": 1520 }, { "epoch": 2.89, "grad_norm": 2.0842761993408203, "learning_rate": 2.3891601511809324e-05, "loss": 0.0233, "step": 1521 }, { "epoch": 2.89, "grad_norm": 0.9132564663887024, "learning_rate": 2.3855434683039544e-05, "loss": 0.0181, "step": 1522 }, { "epoch": 2.9, "grad_norm": 0.8275769948959351, "learning_rate": 2.3819277173283223e-05, "loss": 0.0187, "step": 1523 }, { "epoch": 2.9, "grad_norm": 0.4849015176296234, "learning_rate": 2.3783129037377804e-05, "loss": 0.0082, "step": 1524 }, { "epoch": 2.9, "grad_norm": 0.3785208761692047, "learning_rate": 2.3746990330146518e-05, "loss": 0.0073, "step": 1525 }, { "epoch": 2.9, "grad_norm": 0.5020454525947571, "learning_rate": 2.3710861106398316e-05, "loss": 0.0071, "step": 1526 }, { "epoch": 2.9, "grad_norm": 0.8598502278327942, "learning_rate": 2.3674741420927745e-05, "loss": 0.0221, "step": 1527 }, { "epoch": 2.9, "grad_norm": 0.7911075353622437, "learning_rate": 2.3638631328514887e-05, "loss": 0.0162, "step": 1528 }, { "epoch": 2.91, "grad_norm": 0.9278848767280579, "learning_rate": 2.3602530883925306e-05, "loss": 0.0329, "step": 1529 }, { "epoch": 2.91, "grad_norm": 1.0214879512786865, "learning_rate": 2.356644014190989e-05, "loss": 0.0361, "step": 1530 }, { "epoch": 2.91, "grad_norm": 0.6860681772232056, "learning_rate": 2.3530359157204818e-05, "loss": 0.0162, "step": 1531 }, { "epoch": 2.91, "grad_norm": 0.7466261982917786, "learning_rate": 2.3494287984531515e-05, "loss": 0.0177, "step": 1532 }, { "epoch": 2.91, "grad_norm": 1.276952862739563, "learning_rate": 2.3458226678596485e-05, "loss": 0.0317, "step": 1533 }, { "epoch": 2.92, "grad_norm": 0.42949149012565613, "learning_rate": 2.3422175294091268e-05, "loss": 0.0077, "step": 1534 }, { "epoch": 2.92, "grad_norm": 0.815201461315155, "learning_rate": 2.3386133885692368e-05, "loss": 0.0222, "step": 1535 }, { "epoch": 2.92, "grad_norm": 0.7794772386550903, "learning_rate": 2.3350102508061167e-05, "loss": 0.0187, "step": 1536 }, { "epoch": 2.92, "grad_norm": 2.040034770965576, "learning_rate": 2.331408121584381e-05, "loss": 0.0343, "step": 1537 }, { "epoch": 2.92, "grad_norm": 0.6018684506416321, "learning_rate": 2.327807006367116e-05, "loss": 0.0191, "step": 1538 }, { "epoch": 2.93, "grad_norm": 0.7681739926338196, "learning_rate": 2.3242069106158718e-05, "loss": 0.0237, "step": 1539 }, { "epoch": 2.93, "grad_norm": 0.6892005205154419, "learning_rate": 2.3206078397906495e-05, "loss": 0.014, "step": 1540 }, { "epoch": 2.93, "grad_norm": 1.0807920694351196, "learning_rate": 2.317009799349897e-05, "loss": 0.0289, "step": 1541 }, { "epoch": 2.93, "grad_norm": 0.6178621649742126, "learning_rate": 2.3134127947504997e-05, "loss": 0.0111, "step": 1542 }, { "epoch": 2.93, "grad_norm": 0.702030599117279, "learning_rate": 2.309816831447772e-05, "loss": 0.0213, "step": 1543 }, { "epoch": 2.94, "grad_norm": 1.1256015300750732, "learning_rate": 2.3062219148954474e-05, "loss": 0.0333, "step": 1544 }, { "epoch": 2.94, "grad_norm": 1.0941768884658813, "learning_rate": 2.3026280505456752e-05, "loss": 0.0318, "step": 1545 }, { "epoch": 2.94, "grad_norm": 0.7426414489746094, "learning_rate": 2.2990352438490066e-05, "loss": 0.0131, "step": 1546 }, { "epoch": 2.94, "grad_norm": 0.9098531007766724, "learning_rate": 2.2954435002543885e-05, "loss": 0.0365, "step": 1547 }, { "epoch": 2.94, "grad_norm": 0.787928581237793, "learning_rate": 2.291852825209155e-05, "loss": 0.0232, "step": 1548 }, { "epoch": 2.94, "grad_norm": 1.1215766668319702, "learning_rate": 2.2882632241590233e-05, "loss": 0.0223, "step": 1549 }, { "epoch": 2.95, "grad_norm": 0.8206033110618591, "learning_rate": 2.284674702548077e-05, "loss": 0.0274, "step": 1550 }, { "epoch": 2.95, "grad_norm": 0.668441653251648, "learning_rate": 2.281087265818766e-05, "loss": 0.0194, "step": 1551 }, { "epoch": 2.95, "grad_norm": 0.7935766577720642, "learning_rate": 2.277500919411893e-05, "loss": 0.0246, "step": 1552 }, { "epoch": 2.95, "grad_norm": 0.4250183701515198, "learning_rate": 2.273915668766608e-05, "loss": 0.0086, "step": 1553 }, { "epoch": 2.95, "grad_norm": 0.651317298412323, "learning_rate": 2.270331519320397e-05, "loss": 0.0155, "step": 1554 }, { "epoch": 2.96, "grad_norm": 0.8129798769950867, "learning_rate": 2.266748476509081e-05, "loss": 0.0327, "step": 1555 }, { "epoch": 2.96, "grad_norm": 0.8599788546562195, "learning_rate": 2.263166545766797e-05, "loss": 0.0213, "step": 1556 }, { "epoch": 2.96, "grad_norm": 0.5675526857376099, "learning_rate": 2.2595857325259992e-05, "loss": 0.0168, "step": 1557 }, { "epoch": 2.96, "grad_norm": 1.0046356916427612, "learning_rate": 2.2560060422174433e-05, "loss": 0.0262, "step": 1558 }, { "epoch": 2.96, "grad_norm": 0.6994456052780151, "learning_rate": 2.2524274802701865e-05, "loss": 0.016, "step": 1559 }, { "epoch": 2.97, "grad_norm": 0.6365400552749634, "learning_rate": 2.2488500521115716e-05, "loss": 0.0146, "step": 1560 }, { "epoch": 2.97, "grad_norm": 0.900261640548706, "learning_rate": 2.245273763167221e-05, "loss": 0.0236, "step": 1561 }, { "epoch": 2.97, "grad_norm": 0.7196252346038818, "learning_rate": 2.241698618861033e-05, "loss": 0.0148, "step": 1562 }, { "epoch": 2.97, "grad_norm": 1.1120961904525757, "learning_rate": 2.2381246246151675e-05, "loss": 0.0279, "step": 1563 }, { "epoch": 2.97, "grad_norm": 0.7412280440330505, "learning_rate": 2.23455178585004e-05, "loss": 0.0263, "step": 1564 }, { "epoch": 2.98, "grad_norm": 1.0816118717193604, "learning_rate": 2.230980107984315e-05, "loss": 0.0197, "step": 1565 }, { "epoch": 2.98, "grad_norm": 0.7041802406311035, "learning_rate": 2.2274095964348945e-05, "loss": 0.0138, "step": 1566 }, { "epoch": 2.98, "grad_norm": 0.7118067741394043, "learning_rate": 2.2238402566169117e-05, "loss": 0.0186, "step": 1567 }, { "epoch": 2.98, "grad_norm": 1.053608775138855, "learning_rate": 2.220272093943726e-05, "loss": 0.0289, "step": 1568 }, { "epoch": 2.98, "grad_norm": 0.7637268304824829, "learning_rate": 2.216705113826908e-05, "loss": 0.019, "step": 1569 }, { "epoch": 2.98, "grad_norm": 0.9110425710678101, "learning_rate": 2.213139321676236e-05, "loss": 0.0202, "step": 1570 }, { "epoch": 2.99, "grad_norm": 0.5671938061714172, "learning_rate": 2.2095747228996856e-05, "loss": 0.0187, "step": 1571 }, { "epoch": 2.99, "grad_norm": 0.6871151924133301, "learning_rate": 2.206011322903425e-05, "loss": 0.013, "step": 1572 }, { "epoch": 2.99, "grad_norm": 1.0366058349609375, "learning_rate": 2.202449127091802e-05, "loss": 0.0267, "step": 1573 }, { "epoch": 2.99, "grad_norm": 1.1244114637374878, "learning_rate": 2.1988881408673377e-05, "loss": 0.0195, "step": 1574 }, { "epoch": 2.99, "grad_norm": 0.6455907821655273, "learning_rate": 2.195328369630722e-05, "loss": 0.0145, "step": 1575 }, { "epoch": 3.0, "grad_norm": 0.8673487901687622, "learning_rate": 2.1917698187807987e-05, "loss": 0.0192, "step": 1576 }, { "epoch": 3.0, "grad_norm": 1.4222618341445923, "learning_rate": 2.188212493714561e-05, "loss": 0.0158, "step": 1577 }, { "epoch": 3.0, "grad_norm": 0.983835756778717, "learning_rate": 2.184656399827146e-05, "loss": 0.0166, "step": 1578 }, { "epoch": 3.0, "grad_norm": 0.4228699505329132, "learning_rate": 2.181101542511821e-05, "loss": 0.0069, "step": 1579 }, { "epoch": 3.0, "grad_norm": 0.41458526253700256, "learning_rate": 2.1775479271599774e-05, "loss": 0.0078, "step": 1580 }, { "epoch": 3.01, "grad_norm": 0.4302844703197479, "learning_rate": 2.173995559161126e-05, "loss": 0.0074, "step": 1581 }, { "epoch": 3.01, "grad_norm": 0.675521194934845, "learning_rate": 2.170444443902882e-05, "loss": 0.0141, "step": 1582 }, { "epoch": 3.01, "grad_norm": 1.2319819927215576, "learning_rate": 2.1668945867709636e-05, "loss": 0.0176, "step": 1583 }, { "epoch": 3.01, "grad_norm": 0.6111138463020325, "learning_rate": 2.163345993149178e-05, "loss": 0.0095, "step": 1584 }, { "epoch": 3.01, "grad_norm": 0.42409682273864746, "learning_rate": 2.1597986684194215e-05, "loss": 0.0049, "step": 1585 }, { "epoch": 3.02, "grad_norm": 0.71693354845047, "learning_rate": 2.1562526179616603e-05, "loss": 0.0103, "step": 1586 }, { "epoch": 3.02, "grad_norm": 0.673190712928772, "learning_rate": 2.152707847153929e-05, "loss": 0.0195, "step": 1587 }, { "epoch": 3.02, "grad_norm": 0.6518903374671936, "learning_rate": 2.1491643613723248e-05, "loss": 0.0114, "step": 1588 }, { "epoch": 3.02, "grad_norm": 0.7918753623962402, "learning_rate": 2.1456221659909932e-05, "loss": 0.0101, "step": 1589 }, { "epoch": 3.02, "grad_norm": 0.7618105411529541, "learning_rate": 2.1420812663821218e-05, "loss": 0.013, "step": 1590 }, { "epoch": 3.02, "grad_norm": 0.41970619559288025, "learning_rate": 2.1385416679159363e-05, "loss": 0.008, "step": 1591 }, { "epoch": 3.03, "grad_norm": 0.38451847434043884, "learning_rate": 2.1350033759606863e-05, "loss": 0.0036, "step": 1592 }, { "epoch": 3.03, "grad_norm": 0.8014621138572693, "learning_rate": 2.131466395882641e-05, "loss": 0.016, "step": 1593 }, { "epoch": 3.03, "grad_norm": 0.599155843257904, "learning_rate": 2.1279307330460794e-05, "loss": 0.0068, "step": 1594 }, { "epoch": 3.03, "grad_norm": 0.4048203229904175, "learning_rate": 2.1243963928132843e-05, "loss": 0.0099, "step": 1595 }, { "epoch": 3.03, "grad_norm": 0.8897512555122375, "learning_rate": 2.1208633805445296e-05, "loss": 0.0104, "step": 1596 }, { "epoch": 3.04, "grad_norm": 0.6163508892059326, "learning_rate": 2.117331701598078e-05, "loss": 0.013, "step": 1597 }, { "epoch": 3.04, "grad_norm": 0.9449763298034668, "learning_rate": 2.113801361330169e-05, "loss": 0.0086, "step": 1598 }, { "epoch": 3.04, "grad_norm": 0.7328210473060608, "learning_rate": 2.110272365095012e-05, "loss": 0.0153, "step": 1599 }, { "epoch": 3.04, "grad_norm": 0.44620901346206665, "learning_rate": 2.1067447182447756e-05, "loss": 0.0085, "step": 1600 }, { "epoch": 3.04, "eval_blimp_filtered_avg": 0.7356716417910448, "eval_blimp_filtered_std": 0.004863630848615429, "step": 1600 }, { "epoch": 3.04, "eval_blimp_supplement_avg": 0.771551724137931, "eval_blimp_supplement_std": 0.017896030119414992, "step": 1600 }, { "epoch": 3.04, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 1600 }, { "epoch": 3.04, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 1600 }, { "epoch": 3.04, "grad_norm": 0.498424768447876, "learning_rate": 2.103218426129588e-05, "loss": 0.0115, "step": 1601 }, { "epoch": 3.05, "grad_norm": 0.7964808940887451, "learning_rate": 2.099693494097517e-05, "loss": 0.0158, "step": 1602 }, { "epoch": 3.05, "grad_norm": 0.5348392724990845, "learning_rate": 2.096169927494569e-05, "loss": 0.0098, "step": 1603 }, { "epoch": 3.05, "grad_norm": 0.9597808718681335, "learning_rate": 2.092647731664681e-05, "loss": 0.0096, "step": 1604 }, { "epoch": 3.05, "grad_norm": 0.7487015724182129, "learning_rate": 2.0891269119497097e-05, "loss": 0.0175, "step": 1605 }, { "epoch": 3.05, "grad_norm": 0.5483823418617249, "learning_rate": 2.085607473689426e-05, "loss": 0.0082, "step": 1606 }, { "epoch": 3.06, "grad_norm": 0.33164533972740173, "learning_rate": 2.0820894222215024e-05, "loss": 0.0049, "step": 1607 }, { "epoch": 3.06, "grad_norm": 0.4578284025192261, "learning_rate": 2.078572762881514e-05, "loss": 0.0121, "step": 1608 }, { "epoch": 3.06, "grad_norm": 0.4054017663002014, "learning_rate": 2.0750575010029198e-05, "loss": 0.0059, "step": 1609 }, { "epoch": 3.06, "grad_norm": 0.6199090480804443, "learning_rate": 2.0715436419170595e-05, "loss": 0.0131, "step": 1610 }, { "epoch": 3.06, "grad_norm": 0.6165754199028015, "learning_rate": 2.0680311909531486e-05, "loss": 0.0072, "step": 1611 }, { "epoch": 3.06, "grad_norm": 0.6854336261749268, "learning_rate": 2.064520153438264e-05, "loss": 0.0159, "step": 1612 }, { "epoch": 3.07, "grad_norm": 0.5769352316856384, "learning_rate": 2.0610105346973387e-05, "loss": 0.0088, "step": 1613 }, { "epoch": 3.07, "grad_norm": 0.41764429211616516, "learning_rate": 2.0575023400531572e-05, "loss": 0.0085, "step": 1614 }, { "epoch": 3.07, "grad_norm": 0.40481942892074585, "learning_rate": 2.0539955748263414e-05, "loss": 0.0075, "step": 1615 }, { "epoch": 3.07, "grad_norm": 0.5887228846549988, "learning_rate": 2.0504902443353453e-05, "loss": 0.009, "step": 1616 }, { "epoch": 3.07, "grad_norm": 0.452993780374527, "learning_rate": 2.0469863538964487e-05, "loss": 0.0057, "step": 1617 }, { "epoch": 3.08, "grad_norm": 0.3764880299568176, "learning_rate": 2.0434839088237455e-05, "loss": 0.0041, "step": 1618 }, { "epoch": 3.08, "grad_norm": 1.1180106401443481, "learning_rate": 2.0399829144291385e-05, "loss": 0.0217, "step": 1619 }, { "epoch": 3.08, "grad_norm": 0.44227492809295654, "learning_rate": 2.0364833760223297e-05, "loss": 0.0053, "step": 1620 }, { "epoch": 3.08, "grad_norm": 0.5597434639930725, "learning_rate": 2.032985298910815e-05, "loss": 0.0118, "step": 1621 }, { "epoch": 3.08, "grad_norm": 0.6109591126441956, "learning_rate": 2.029488688399871e-05, "loss": 0.0153, "step": 1622 }, { "epoch": 3.09, "grad_norm": 0.4646584689617157, "learning_rate": 2.0259935497925514e-05, "loss": 0.0069, "step": 1623 }, { "epoch": 3.09, "grad_norm": 0.5460329651832581, "learning_rate": 2.0224998883896788e-05, "loss": 0.0121, "step": 1624 }, { "epoch": 3.09, "grad_norm": 0.23464956879615784, "learning_rate": 2.019007709489834e-05, "loss": 0.0035, "step": 1625 }, { "epoch": 3.09, "grad_norm": 0.3066728413105011, "learning_rate": 2.0155170183893486e-05, "loss": 0.0053, "step": 1626 }, { "epoch": 3.09, "grad_norm": 1.2554696798324585, "learning_rate": 2.0120278203823003e-05, "loss": 0.0086, "step": 1627 }, { "epoch": 3.1, "grad_norm": 1.014670491218567, "learning_rate": 2.0085401207605006e-05, "loss": 0.0145, "step": 1628 }, { "epoch": 3.1, "grad_norm": 0.6873512268066406, "learning_rate": 2.0050539248134867e-05, "loss": 0.0119, "step": 1629 }, { "epoch": 3.1, "grad_norm": 1.14180326461792, "learning_rate": 2.0015692378285206e-05, "loss": 0.0089, "step": 1630 }, { "epoch": 3.1, "grad_norm": 0.8630284667015076, "learning_rate": 1.998086065090571e-05, "loss": 0.0135, "step": 1631 }, { "epoch": 3.1, "grad_norm": 0.7126181125640869, "learning_rate": 1.9946044118823112e-05, "loss": 0.0106, "step": 1632 }, { "epoch": 3.1, "grad_norm": 0.6965020895004272, "learning_rate": 1.991124283484111e-05, "loss": 0.0111, "step": 1633 }, { "epoch": 3.11, "grad_norm": 0.14631886780261993, "learning_rate": 1.9876456851740267e-05, "loss": 0.0023, "step": 1634 }, { "epoch": 3.11, "grad_norm": 0.7207170128822327, "learning_rate": 1.9841686222277944e-05, "loss": 0.0083, "step": 1635 }, { "epoch": 3.11, "grad_norm": 0.59142005443573, "learning_rate": 1.9806930999188207e-05, "loss": 0.0099, "step": 1636 }, { "epoch": 3.11, "grad_norm": 1.0965369939804077, "learning_rate": 1.9772191235181777e-05, "loss": 0.0113, "step": 1637 }, { "epoch": 3.11, "grad_norm": 0.7539475560188293, "learning_rate": 1.973746698294592e-05, "loss": 0.0091, "step": 1638 }, { "epoch": 3.12, "grad_norm": 0.4140196740627289, "learning_rate": 1.9702758295144354e-05, "loss": 0.009, "step": 1639 }, { "epoch": 3.12, "grad_norm": 0.3400113582611084, "learning_rate": 1.966806522441723e-05, "loss": 0.0037, "step": 1640 }, { "epoch": 3.12, "grad_norm": 0.7773045897483826, "learning_rate": 1.9633387823380987e-05, "loss": 0.0108, "step": 1641 }, { "epoch": 3.12, "grad_norm": 0.5960767269134521, "learning_rate": 1.9598726144628294e-05, "loss": 0.0094, "step": 1642 }, { "epoch": 3.12, "grad_norm": 0.959697961807251, "learning_rate": 1.9564080240728017e-05, "loss": 0.0124, "step": 1643 }, { "epoch": 3.13, "grad_norm": 0.6202993988990784, "learning_rate": 1.9529450164225045e-05, "loss": 0.0107, "step": 1644 }, { "epoch": 3.13, "grad_norm": 0.4382043182849884, "learning_rate": 1.9494835967640292e-05, "loss": 0.0045, "step": 1645 }, { "epoch": 3.13, "grad_norm": 0.5104768872261047, "learning_rate": 1.9460237703470568e-05, "loss": 0.0073, "step": 1646 }, { "epoch": 3.13, "grad_norm": 1.0403046607971191, "learning_rate": 1.9425655424188563e-05, "loss": 0.009, "step": 1647 }, { "epoch": 3.13, "grad_norm": 0.5179813504219055, "learning_rate": 1.9391089182242677e-05, "loss": 0.0142, "step": 1648 }, { "epoch": 3.13, "grad_norm": 0.961421012878418, "learning_rate": 1.9356539030056998e-05, "loss": 0.0152, "step": 1649 }, { "epoch": 3.14, "grad_norm": 0.504166841506958, "learning_rate": 1.9322005020031234e-05, "loss": 0.0076, "step": 1650 }, { "epoch": 3.14, "grad_norm": 0.36413660645484924, "learning_rate": 1.928748720454059e-05, "loss": 0.0039, "step": 1651 }, { "epoch": 3.14, "grad_norm": 0.6749222874641418, "learning_rate": 1.92529856359357e-05, "loss": 0.0165, "step": 1652 }, { "epoch": 3.14, "grad_norm": 0.42491668462753296, "learning_rate": 1.9218500366542594e-05, "loss": 0.0041, "step": 1653 }, { "epoch": 3.14, "grad_norm": 0.5363407731056213, "learning_rate": 1.9184031448662564e-05, "loss": 0.0107, "step": 1654 }, { "epoch": 3.15, "grad_norm": 0.4969552755355835, "learning_rate": 1.9149578934572085e-05, "loss": 0.0083, "step": 1655 }, { "epoch": 3.15, "grad_norm": 0.40218767523765564, "learning_rate": 1.911514287652277e-05, "loss": 0.0115, "step": 1656 }, { "epoch": 3.15, "grad_norm": 0.42659255862236023, "learning_rate": 1.9080723326741287e-05, "loss": 0.0071, "step": 1657 }, { "epoch": 3.15, "grad_norm": 1.1300135850906372, "learning_rate": 1.904632033742924e-05, "loss": 0.0177, "step": 1658 }, { "epoch": 3.15, "grad_norm": 0.3743501901626587, "learning_rate": 1.9011933960763126e-05, "loss": 0.0054, "step": 1659 }, { "epoch": 3.16, "grad_norm": 0.6971900463104248, "learning_rate": 1.897756424889427e-05, "loss": 0.0158, "step": 1660 }, { "epoch": 3.16, "grad_norm": 0.7266177535057068, "learning_rate": 1.8943211253948692e-05, "loss": 0.0094, "step": 1661 }, { "epoch": 3.16, "grad_norm": 0.948196291923523, "learning_rate": 1.8908875028027066e-05, "loss": 0.0078, "step": 1662 }, { "epoch": 3.16, "grad_norm": 0.5921658277511597, "learning_rate": 1.887455562320465e-05, "loss": 0.0058, "step": 1663 }, { "epoch": 3.16, "grad_norm": 0.5703031420707703, "learning_rate": 1.8840253091531178e-05, "loss": 0.0124, "step": 1664 }, { "epoch": 3.17, "grad_norm": 0.6423876285552979, "learning_rate": 1.8805967485030773e-05, "loss": 0.0146, "step": 1665 }, { "epoch": 3.17, "grad_norm": 0.6701260209083557, "learning_rate": 1.8771698855701946e-05, "loss": 0.0113, "step": 1666 }, { "epoch": 3.17, "grad_norm": 0.627770721912384, "learning_rate": 1.8737447255517402e-05, "loss": 0.0069, "step": 1667 }, { "epoch": 3.17, "grad_norm": 0.3450928032398224, "learning_rate": 1.8703212736424038e-05, "loss": 0.0074, "step": 1668 }, { "epoch": 3.17, "grad_norm": 0.5324114561080933, "learning_rate": 1.8668995350342846e-05, "loss": 0.011, "step": 1669 }, { "epoch": 3.17, "grad_norm": 0.4200190007686615, "learning_rate": 1.863479514916886e-05, "loss": 0.0088, "step": 1670 }, { "epoch": 3.18, "grad_norm": 0.9725277423858643, "learning_rate": 1.860061218477101e-05, "loss": 0.0083, "step": 1671 }, { "epoch": 3.18, "grad_norm": 0.5657826066017151, "learning_rate": 1.85664465089921e-05, "loss": 0.008, "step": 1672 }, { "epoch": 3.18, "grad_norm": 0.340916246175766, "learning_rate": 1.8532298173648724e-05, "loss": 0.006, "step": 1673 }, { "epoch": 3.18, "grad_norm": 0.7623887658119202, "learning_rate": 1.8498167230531165e-05, "loss": 0.0191, "step": 1674 }, { "epoch": 3.18, "grad_norm": 0.6761280298233032, "learning_rate": 1.8464053731403317e-05, "loss": 0.0147, "step": 1675 }, { "epoch": 3.19, "grad_norm": 0.3998967111110687, "learning_rate": 1.842995772800266e-05, "loss": 0.0057, "step": 1676 }, { "epoch": 3.19, "grad_norm": 0.4015098214149475, "learning_rate": 1.8395879272040098e-05, "loss": 0.0039, "step": 1677 }, { "epoch": 3.19, "grad_norm": 0.580696702003479, "learning_rate": 1.836181841519993e-05, "loss": 0.0088, "step": 1678 }, { "epoch": 3.19, "grad_norm": 0.43923866748809814, "learning_rate": 1.832777520913978e-05, "loss": 0.0054, "step": 1679 }, { "epoch": 3.19, "grad_norm": 0.714422881603241, "learning_rate": 1.8293749705490494e-05, "loss": 0.0111, "step": 1680 }, { "epoch": 3.2, "grad_norm": 0.4906494617462158, "learning_rate": 1.825974195585606e-05, "loss": 0.0065, "step": 1681 }, { "epoch": 3.2, "grad_norm": 0.4951362907886505, "learning_rate": 1.8225752011813538e-05, "loss": 0.0164, "step": 1682 }, { "epoch": 3.2, "grad_norm": 0.2716967463493347, "learning_rate": 1.8191779924913022e-05, "loss": 0.0038, "step": 1683 }, { "epoch": 3.2, "grad_norm": 0.2104797661304474, "learning_rate": 1.8157825746677482e-05, "loss": 0.0023, "step": 1684 }, { "epoch": 3.2, "grad_norm": 0.6418616771697998, "learning_rate": 1.8123889528602726e-05, "loss": 0.0068, "step": 1685 }, { "epoch": 3.21, "grad_norm": 0.5393822193145752, "learning_rate": 1.808997132215736e-05, "loss": 0.0084, "step": 1686 }, { "epoch": 3.21, "grad_norm": 0.9136606454849243, "learning_rate": 1.805607117878264e-05, "loss": 0.0117, "step": 1687 }, { "epoch": 3.21, "grad_norm": 0.7960745692253113, "learning_rate": 1.802218914989242e-05, "loss": 0.0132, "step": 1688 }, { "epoch": 3.21, "grad_norm": 0.26014140248298645, "learning_rate": 1.798832528687314e-05, "loss": 0.0042, "step": 1689 }, { "epoch": 3.21, "grad_norm": 0.5577003359794617, "learning_rate": 1.7954479641083613e-05, "loss": 0.0066, "step": 1690 }, { "epoch": 3.21, "grad_norm": 0.33937495946884155, "learning_rate": 1.792065226385506e-05, "loss": 0.0057, "step": 1691 }, { "epoch": 3.22, "grad_norm": 0.7793117761611938, "learning_rate": 1.788684320649102e-05, "loss": 0.0125, "step": 1692 }, { "epoch": 3.22, "grad_norm": 0.45964959263801575, "learning_rate": 1.7853052520267195e-05, "loss": 0.006, "step": 1693 }, { "epoch": 3.22, "grad_norm": 0.4690602123737335, "learning_rate": 1.7819280256431465e-05, "loss": 0.0067, "step": 1694 }, { "epoch": 3.22, "grad_norm": 0.9708989262580872, "learning_rate": 1.778552646620374e-05, "loss": 0.0134, "step": 1695 }, { "epoch": 3.22, "grad_norm": 0.3689495623111725, "learning_rate": 1.775179120077595e-05, "loss": 0.0046, "step": 1696 }, { "epoch": 3.23, "grad_norm": 1.1244350671768188, "learning_rate": 1.7718074511311894e-05, "loss": 0.0198, "step": 1697 }, { "epoch": 3.23, "grad_norm": 0.8593351244926453, "learning_rate": 1.7684376448947205e-05, "loss": 0.0142, "step": 1698 }, { "epoch": 3.23, "grad_norm": 0.6909305453300476, "learning_rate": 1.7650697064789295e-05, "loss": 0.0084, "step": 1699 }, { "epoch": 3.23, "grad_norm": 0.5702932476997375, "learning_rate": 1.7617036409917213e-05, "loss": 0.0098, "step": 1700 }, { "epoch": 3.23, "eval_blimp_filtered_avg": 0.7349253731343284, "eval_blimp_filtered_std": 0.004882217084715261, "step": 1700 }, { "epoch": 3.23, "eval_blimp_supplement_avg": 0.7672413793103449, "eval_blimp_supplement_std": 0.018021527243313875, "step": 1700 }, { "epoch": 3.23, "eval_vqa_filtered_avg": 0.36, "eval_vqa_filtered_std": 0.048241815132442176, "step": 1700 }, { "epoch": 3.23, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 1700 }, { "epoch": 3.23, "grad_norm": 0.3672007918357849, "learning_rate": 1.7583394535381613e-05, "loss": 0.0043, "step": 1701 }, { "epoch": 3.24, "grad_norm": 0.37652409076690674, "learning_rate": 1.7549771492204683e-05, "loss": 0.0042, "step": 1702 }, { "epoch": 3.24, "grad_norm": 0.5915730595588684, "learning_rate": 1.7516167331380024e-05, "loss": 0.0122, "step": 1703 }, { "epoch": 3.24, "grad_norm": 0.4291975796222687, "learning_rate": 1.7482582103872608e-05, "loss": 0.0036, "step": 1704 }, { "epoch": 3.24, "grad_norm": 0.24895597994327545, "learning_rate": 1.7449015860618715e-05, "loss": 0.0043, "step": 1705 }, { "epoch": 3.24, "grad_norm": 0.5016794800758362, "learning_rate": 1.7415468652525802e-05, "loss": 0.0075, "step": 1706 }, { "epoch": 3.25, "grad_norm": 0.3567659258842468, "learning_rate": 1.7381940530472476e-05, "loss": 0.0075, "step": 1707 }, { "epoch": 3.25, "grad_norm": 0.30997323989868164, "learning_rate": 1.734843154530837e-05, "loss": 0.0056, "step": 1708 }, { "epoch": 3.25, "grad_norm": 1.0080888271331787, "learning_rate": 1.7314941747854145e-05, "loss": 0.0152, "step": 1709 }, { "epoch": 3.25, "grad_norm": 0.35624292492866516, "learning_rate": 1.728147118890133e-05, "loss": 0.0048, "step": 1710 }, { "epoch": 3.25, "grad_norm": 0.6068875789642334, "learning_rate": 1.724801991921224e-05, "loss": 0.0067, "step": 1711 }, { "epoch": 3.25, "grad_norm": 0.6067308783531189, "learning_rate": 1.721458798952001e-05, "loss": 0.0064, "step": 1712 }, { "epoch": 3.26, "grad_norm": 0.33551183342933655, "learning_rate": 1.718117545052839e-05, "loss": 0.0036, "step": 1713 }, { "epoch": 3.26, "grad_norm": 0.38168156147003174, "learning_rate": 1.714778235291173e-05, "loss": 0.0046, "step": 1714 }, { "epoch": 3.26, "grad_norm": 0.3481459617614746, "learning_rate": 1.7114408747314925e-05, "loss": 0.0053, "step": 1715 }, { "epoch": 3.26, "grad_norm": 0.4899192452430725, "learning_rate": 1.7081054684353272e-05, "loss": 0.0044, "step": 1716 }, { "epoch": 3.26, "grad_norm": 0.44372472167015076, "learning_rate": 1.7047720214612447e-05, "loss": 0.0042, "step": 1717 }, { "epoch": 3.27, "grad_norm": 0.34694576263427734, "learning_rate": 1.7014405388648387e-05, "loss": 0.0044, "step": 1718 }, { "epoch": 3.27, "grad_norm": 0.7049763202667236, "learning_rate": 1.6981110256987282e-05, "loss": 0.0075, "step": 1719 }, { "epoch": 3.27, "grad_norm": 0.2011437565088272, "learning_rate": 1.6947834870125417e-05, "loss": 0.0029, "step": 1720 }, { "epoch": 3.27, "grad_norm": 0.5964885354042053, "learning_rate": 1.691457927852914e-05, "loss": 0.0058, "step": 1721 }, { "epoch": 3.27, "grad_norm": 0.3933841586112976, "learning_rate": 1.6881343532634773e-05, "loss": 0.0041, "step": 1722 }, { "epoch": 3.28, "grad_norm": 0.3797585964202881, "learning_rate": 1.6848127682848552e-05, "loss": 0.0036, "step": 1723 }, { "epoch": 3.28, "grad_norm": 0.5695186257362366, "learning_rate": 1.6814931779546514e-05, "loss": 0.0048, "step": 1724 }, { "epoch": 3.28, "grad_norm": 0.36833855509757996, "learning_rate": 1.6781755873074483e-05, "loss": 0.004, "step": 1725 }, { "epoch": 3.28, "grad_norm": 0.7106064558029175, "learning_rate": 1.674860001374793e-05, "loss": 0.0129, "step": 1726 }, { "epoch": 3.28, "grad_norm": 0.303149551153183, "learning_rate": 1.67154642518519e-05, "loss": 0.0084, "step": 1727 }, { "epoch": 3.29, "grad_norm": 0.7336820960044861, "learning_rate": 1.6682348637641018e-05, "loss": 0.0101, "step": 1728 }, { "epoch": 3.29, "grad_norm": 0.4565402567386627, "learning_rate": 1.6649253221339302e-05, "loss": 0.0134, "step": 1729 }, { "epoch": 3.29, "grad_norm": 0.2802707552909851, "learning_rate": 1.6616178053140155e-05, "loss": 0.0044, "step": 1730 }, { "epoch": 3.29, "grad_norm": 0.5425388813018799, "learning_rate": 1.6583123183206247e-05, "loss": 0.0059, "step": 1731 }, { "epoch": 3.29, "grad_norm": 0.28855809569358826, "learning_rate": 1.6550088661669524e-05, "loss": 0.011, "step": 1732 }, { "epoch": 3.29, "grad_norm": 0.6018909811973572, "learning_rate": 1.651707453863102e-05, "loss": 0.0103, "step": 1733 }, { "epoch": 3.3, "grad_norm": 0.4441666603088379, "learning_rate": 1.6484080864160816e-05, "loss": 0.0072, "step": 1734 }, { "epoch": 3.3, "grad_norm": 0.340992271900177, "learning_rate": 1.6451107688298046e-05, "loss": 0.0045, "step": 1735 }, { "epoch": 3.3, "grad_norm": 1.105954647064209, "learning_rate": 1.6418155061050694e-05, "loss": 0.0114, "step": 1736 }, { "epoch": 3.3, "grad_norm": 0.5751716494560242, "learning_rate": 1.6385223032395598e-05, "loss": 0.0067, "step": 1737 }, { "epoch": 3.3, "grad_norm": 0.18725290894508362, "learning_rate": 1.6352311652278388e-05, "loss": 0.0021, "step": 1738 }, { "epoch": 3.31, "grad_norm": 0.39952540397644043, "learning_rate": 1.6319420970613336e-05, "loss": 0.0075, "step": 1739 }, { "epoch": 3.31, "grad_norm": 0.40847453474998474, "learning_rate": 1.628655103728332e-05, "loss": 0.0044, "step": 1740 }, { "epoch": 3.31, "grad_norm": 0.964826226234436, "learning_rate": 1.62537019021398e-05, "loss": 0.0158, "step": 1741 }, { "epoch": 3.31, "grad_norm": 0.3726364076137543, "learning_rate": 1.6220873615002646e-05, "loss": 0.0036, "step": 1742 }, { "epoch": 3.31, "grad_norm": 0.47202497720718384, "learning_rate": 1.6188066225660113e-05, "loss": 0.0058, "step": 1743 }, { "epoch": 3.32, "grad_norm": 0.4112474322319031, "learning_rate": 1.6155279783868782e-05, "loss": 0.0043, "step": 1744 }, { "epoch": 3.32, "grad_norm": 0.4355575442314148, "learning_rate": 1.612251433935346e-05, "loss": 0.0052, "step": 1745 }, { "epoch": 3.32, "grad_norm": 0.8189808130264282, "learning_rate": 1.6089769941807086e-05, "loss": 0.0047, "step": 1746 }, { "epoch": 3.32, "grad_norm": 0.5853738784790039, "learning_rate": 1.60570466408907e-05, "loss": 0.0032, "step": 1747 }, { "epoch": 3.32, "grad_norm": 0.162607342004776, "learning_rate": 1.602434448623336e-05, "loss": 0.0015, "step": 1748 }, { "epoch": 3.33, "grad_norm": 0.626732587814331, "learning_rate": 1.599166352743203e-05, "loss": 0.01, "step": 1749 }, { "epoch": 3.33, "grad_norm": 0.46365225315093994, "learning_rate": 1.595900381405151e-05, "loss": 0.006, "step": 1750 }, { "epoch": 3.33, "grad_norm": 0.8126720190048218, "learning_rate": 1.5926365395624436e-05, "loss": 0.0117, "step": 1751 }, { "epoch": 3.33, "grad_norm": 1.3622221946716309, "learning_rate": 1.5893748321651097e-05, "loss": 0.0258, "step": 1752 }, { "epoch": 3.33, "grad_norm": 0.5719367861747742, "learning_rate": 1.5861152641599413e-05, "loss": 0.0067, "step": 1753 }, { "epoch": 3.33, "grad_norm": 0.3044682741165161, "learning_rate": 1.5828578404904903e-05, "loss": 0.008, "step": 1754 }, { "epoch": 3.34, "grad_norm": 0.45612961053848267, "learning_rate": 1.5796025660970513e-05, "loss": 0.0053, "step": 1755 }, { "epoch": 3.34, "grad_norm": 0.6850053668022156, "learning_rate": 1.5763494459166626e-05, "loss": 0.004, "step": 1756 }, { "epoch": 3.34, "grad_norm": 0.7067946195602417, "learning_rate": 1.573098484883091e-05, "loss": 0.0035, "step": 1757 }, { "epoch": 3.34, "grad_norm": 0.9244266748428345, "learning_rate": 1.569849687926834e-05, "loss": 0.0131, "step": 1758 }, { "epoch": 3.34, "grad_norm": 0.20684728026390076, "learning_rate": 1.5666030599751047e-05, "loss": 0.0029, "step": 1759 }, { "epoch": 3.35, "grad_norm": 0.4999117851257324, "learning_rate": 1.5633586059518244e-05, "loss": 0.0098, "step": 1760 }, { "epoch": 3.35, "grad_norm": 0.5740731954574585, "learning_rate": 1.560116330777623e-05, "loss": 0.0101, "step": 1761 }, { "epoch": 3.35, "grad_norm": 0.3280794322490692, "learning_rate": 1.556876239369821e-05, "loss": 0.0042, "step": 1762 }, { "epoch": 3.35, "grad_norm": 0.25032198429107666, "learning_rate": 1.553638336642426e-05, "loss": 0.0025, "step": 1763 }, { "epoch": 3.35, "grad_norm": 0.39408278465270996, "learning_rate": 1.5504026275061326e-05, "loss": 0.0033, "step": 1764 }, { "epoch": 3.36, "grad_norm": 0.8217020630836487, "learning_rate": 1.5471691168683025e-05, "loss": 0.0096, "step": 1765 }, { "epoch": 3.36, "grad_norm": 0.28382396697998047, "learning_rate": 1.5439378096329652e-05, "loss": 0.0033, "step": 1766 }, { "epoch": 3.36, "grad_norm": 0.9812242388725281, "learning_rate": 1.5407087107008086e-05, "loss": 0.0123, "step": 1767 }, { "epoch": 3.36, "grad_norm": 0.6693195104598999, "learning_rate": 1.537481824969171e-05, "loss": 0.0067, "step": 1768 }, { "epoch": 3.36, "grad_norm": 0.6880369186401367, "learning_rate": 1.5342571573320344e-05, "loss": 0.0048, "step": 1769 }, { "epoch": 3.37, "grad_norm": 0.8025116324424744, "learning_rate": 1.531034712680015e-05, "loss": 0.0166, "step": 1770 }, { "epoch": 3.37, "grad_norm": 0.7586734890937805, "learning_rate": 1.5278144959003624e-05, "loss": 0.0059, "step": 1771 }, { "epoch": 3.37, "grad_norm": 0.5267674922943115, "learning_rate": 1.5245965118769429e-05, "loss": 0.0056, "step": 1772 }, { "epoch": 3.37, "grad_norm": 0.8072908520698547, "learning_rate": 1.5213807654902367e-05, "loss": 0.0407, "step": 1773 }, { "epoch": 3.37, "grad_norm": 0.3827669322490692, "learning_rate": 1.5181672616173332e-05, "loss": 0.0122, "step": 1774 }, { "epoch": 3.37, "grad_norm": 0.34025195240974426, "learning_rate": 1.5149560051319184e-05, "loss": 0.0047, "step": 1775 }, { "epoch": 3.38, "grad_norm": 0.8436722755432129, "learning_rate": 1.5117470009042693e-05, "loss": 0.0037, "step": 1776 }, { "epoch": 3.38, "grad_norm": 0.15361633896827698, "learning_rate": 1.50854025380125e-05, "loss": 0.0014, "step": 1777 }, { "epoch": 3.38, "grad_norm": 0.8732007741928101, "learning_rate": 1.5053357686862996e-05, "loss": 0.0084, "step": 1778 }, { "epoch": 3.38, "grad_norm": 0.8618711233139038, "learning_rate": 1.5021335504194265e-05, "loss": 0.0135, "step": 1779 }, { "epoch": 3.38, "grad_norm": 0.1544099599123001, "learning_rate": 1.4989336038571987e-05, "loss": 0.0019, "step": 1780 }, { "epoch": 3.39, "grad_norm": 0.7078830599784851, "learning_rate": 1.4957359338527446e-05, "loss": 0.0117, "step": 1781 }, { "epoch": 3.39, "grad_norm": 0.16900455951690674, "learning_rate": 1.4925405452557357e-05, "loss": 0.0022, "step": 1782 }, { "epoch": 3.39, "grad_norm": 0.35784637928009033, "learning_rate": 1.4893474429123833e-05, "loss": 0.0046, "step": 1783 }, { "epoch": 3.39, "grad_norm": 0.499473512172699, "learning_rate": 1.4861566316654347e-05, "loss": 0.0056, "step": 1784 }, { "epoch": 3.39, "grad_norm": 0.6228310465812683, "learning_rate": 1.482968116354159e-05, "loss": 0.0091, "step": 1785 }, { "epoch": 3.4, "grad_norm": 0.7582800984382629, "learning_rate": 1.4797819018143438e-05, "loss": 0.005, "step": 1786 }, { "epoch": 3.4, "grad_norm": 0.7926908135414124, "learning_rate": 1.47659799287829e-05, "loss": 0.0148, "step": 1787 }, { "epoch": 3.4, "grad_norm": 0.6741030812263489, "learning_rate": 1.4734163943747986e-05, "loss": 0.0084, "step": 1788 }, { "epoch": 3.4, "grad_norm": 0.40426379442214966, "learning_rate": 1.470237111129168e-05, "loss": 0.0058, "step": 1789 }, { "epoch": 3.4, "grad_norm": 0.23570197820663452, "learning_rate": 1.4670601479631845e-05, "loss": 0.0029, "step": 1790 }, { "epoch": 3.4, "grad_norm": 0.878614068031311, "learning_rate": 1.463885509695116e-05, "loss": 0.0092, "step": 1791 }, { "epoch": 3.41, "grad_norm": 0.5676522850990295, "learning_rate": 1.460713201139705e-05, "loss": 0.0122, "step": 1792 }, { "epoch": 3.41, "grad_norm": 0.3009334206581116, "learning_rate": 1.4575432271081587e-05, "loss": 0.0026, "step": 1793 }, { "epoch": 3.41, "grad_norm": 0.6190624833106995, "learning_rate": 1.4543755924081478e-05, "loss": 0.0045, "step": 1794 }, { "epoch": 3.41, "grad_norm": 0.8536520600318909, "learning_rate": 1.4512103018437908e-05, "loss": 0.0091, "step": 1795 }, { "epoch": 3.41, "grad_norm": 0.5566865801811218, "learning_rate": 1.4480473602156523e-05, "loss": 0.0065, "step": 1796 }, { "epoch": 3.42, "grad_norm": 0.25256940722465515, "learning_rate": 1.4448867723207365e-05, "loss": 0.0035, "step": 1797 }, { "epoch": 3.42, "grad_norm": 0.6214127540588379, "learning_rate": 1.4417285429524753e-05, "loss": 0.01, "step": 1798 }, { "epoch": 3.42, "grad_norm": 0.19216740131378174, "learning_rate": 1.4385726769007233e-05, "loss": 0.0024, "step": 1799 }, { "epoch": 3.42, "grad_norm": 0.36631953716278076, "learning_rate": 1.435419178951755e-05, "loss": 0.0032, "step": 1800 }, { "epoch": 3.42, "eval_blimp_filtered_avg": 0.736268656716418, "eval_blimp_filtered_std": 0.0048661285289556665, "step": 1800 }, { "epoch": 3.42, "eval_blimp_supplement_avg": 0.7693965517241379, "eval_blimp_supplement_std": 0.0178764236895297, "step": 1800 }, { "epoch": 3.42, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 1800 }, { "epoch": 3.42, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 1800 }, { "epoch": 3.42, "grad_norm": 0.5840461850166321, "learning_rate": 1.4322680538882483e-05, "loss": 0.0069, "step": 1801 }, { "epoch": 3.43, "grad_norm": 0.5017431378364563, "learning_rate": 1.4291193064892848e-05, "loss": 0.0113, "step": 1802 }, { "epoch": 3.43, "grad_norm": 0.7115788459777832, "learning_rate": 1.42597294153034e-05, "loss": 0.0029, "step": 1803 }, { "epoch": 3.43, "grad_norm": 0.5457814335823059, "learning_rate": 1.4228289637832757e-05, "loss": 0.0094, "step": 1804 }, { "epoch": 3.43, "grad_norm": 0.35616686940193176, "learning_rate": 1.419687378016333e-05, "loss": 0.0049, "step": 1805 }, { "epoch": 3.43, "grad_norm": 0.5619931817054749, "learning_rate": 1.416548188994125e-05, "loss": 0.0066, "step": 1806 }, { "epoch": 3.44, "grad_norm": 0.3764835000038147, "learning_rate": 1.4134114014776323e-05, "loss": 0.0049, "step": 1807 }, { "epoch": 3.44, "grad_norm": 0.5983908176422119, "learning_rate": 1.4102770202241904e-05, "loss": 0.0052, "step": 1808 }, { "epoch": 3.44, "grad_norm": 0.6124565005302429, "learning_rate": 1.4071450499874851e-05, "loss": 0.0166, "step": 1809 }, { "epoch": 3.44, "grad_norm": 0.7653805613517761, "learning_rate": 1.4040154955175494e-05, "loss": 0.0084, "step": 1810 }, { "epoch": 3.44, "grad_norm": 0.7539626359939575, "learning_rate": 1.4008883615607489e-05, "loss": 0.0098, "step": 1811 }, { "epoch": 3.44, "grad_norm": 0.4694896936416626, "learning_rate": 1.3977636528597794e-05, "loss": 0.0052, "step": 1812 }, { "epoch": 3.45, "grad_norm": 0.6175418496131897, "learning_rate": 1.394641374153658e-05, "loss": 0.0062, "step": 1813 }, { "epoch": 3.45, "grad_norm": 0.5546243190765381, "learning_rate": 1.391521530177717e-05, "loss": 0.0088, "step": 1814 }, { "epoch": 3.45, "grad_norm": 0.7222805619239807, "learning_rate": 1.388404125663596e-05, "loss": 0.0075, "step": 1815 }, { "epoch": 3.45, "grad_norm": 0.33851948380470276, "learning_rate": 1.3852891653392335e-05, "loss": 0.0037, "step": 1816 }, { "epoch": 3.45, "grad_norm": 0.5231871604919434, "learning_rate": 1.3821766539288644e-05, "loss": 0.0094, "step": 1817 }, { "epoch": 3.46, "grad_norm": 0.5797617435455322, "learning_rate": 1.3790665961530063e-05, "loss": 0.0079, "step": 1818 }, { "epoch": 3.46, "grad_norm": 0.4603036046028137, "learning_rate": 1.3759589967284559e-05, "loss": 0.0053, "step": 1819 }, { "epoch": 3.46, "grad_norm": 0.5350714325904846, "learning_rate": 1.3728538603682844e-05, "loss": 0.0215, "step": 1820 }, { "epoch": 3.46, "grad_norm": 0.2464483231306076, "learning_rate": 1.3697511917818243e-05, "loss": 0.0043, "step": 1821 }, { "epoch": 3.46, "grad_norm": 0.32760384678840637, "learning_rate": 1.3666509956746652e-05, "loss": 0.0029, "step": 1822 }, { "epoch": 3.47, "grad_norm": 0.4090158939361572, "learning_rate": 1.3635532767486502e-05, "loss": 0.0034, "step": 1823 }, { "epoch": 3.47, "grad_norm": 0.5918548107147217, "learning_rate": 1.3604580397018624e-05, "loss": 0.0059, "step": 1824 }, { "epoch": 3.47, "grad_norm": 0.5030539035797119, "learning_rate": 1.3573652892286215e-05, "loss": 0.003, "step": 1825 }, { "epoch": 3.47, "grad_norm": 0.13065429031848907, "learning_rate": 1.3542750300194772e-05, "loss": 0.0022, "step": 1826 }, { "epoch": 3.47, "grad_norm": 0.5641190409660339, "learning_rate": 1.3511872667611987e-05, "loss": 0.0058, "step": 1827 }, { "epoch": 3.48, "grad_norm": 0.45961880683898926, "learning_rate": 1.3481020041367717e-05, "loss": 0.0069, "step": 1828 }, { "epoch": 3.48, "grad_norm": 0.24739032983779907, "learning_rate": 1.3450192468253876e-05, "loss": 0.0033, "step": 1829 }, { "epoch": 3.48, "grad_norm": 0.3610837459564209, "learning_rate": 1.341938999502441e-05, "loss": 0.0034, "step": 1830 }, { "epoch": 3.48, "grad_norm": 0.3666897416114807, "learning_rate": 1.3388612668395175e-05, "loss": 0.004, "step": 1831 }, { "epoch": 3.48, "grad_norm": 0.29246801137924194, "learning_rate": 1.335786053504388e-05, "loss": 0.0061, "step": 1832 }, { "epoch": 3.48, "grad_norm": 0.5108804702758789, "learning_rate": 1.3327133641610066e-05, "loss": 0.0104, "step": 1833 }, { "epoch": 3.49, "grad_norm": 0.7471219897270203, "learning_rate": 1.329643203469495e-05, "loss": 0.0158, "step": 1834 }, { "epoch": 3.49, "grad_norm": 0.8334513902664185, "learning_rate": 1.3265755760861424e-05, "loss": 0.0104, "step": 1835 }, { "epoch": 3.49, "grad_norm": 0.3491614758968353, "learning_rate": 1.3235104866633948e-05, "loss": 0.0083, "step": 1836 }, { "epoch": 3.49, "grad_norm": 0.4338594079017639, "learning_rate": 1.3204479398498496e-05, "loss": 0.0034, "step": 1837 }, { "epoch": 3.49, "grad_norm": 0.6310893297195435, "learning_rate": 1.317387940290247e-05, "loss": 0.006, "step": 1838 }, { "epoch": 3.5, "grad_norm": 0.7705931663513184, "learning_rate": 1.3143304926254664e-05, "loss": 0.0059, "step": 1839 }, { "epoch": 3.5, "grad_norm": 0.3221486210823059, "learning_rate": 1.3112756014925148e-05, "loss": 0.0032, "step": 1840 }, { "epoch": 3.5, "grad_norm": 0.6792155504226685, "learning_rate": 1.3082232715245226e-05, "loss": 0.0096, "step": 1841 }, { "epoch": 3.5, "grad_norm": 0.4572995901107788, "learning_rate": 1.3051735073507342e-05, "loss": 0.0042, "step": 1842 }, { "epoch": 3.5, "grad_norm": 0.3886929452419281, "learning_rate": 1.3021263135965062e-05, "loss": 0.0036, "step": 1843 }, { "epoch": 3.51, "grad_norm": 0.9852690100669861, "learning_rate": 1.2990816948832945e-05, "loss": 0.0226, "step": 1844 }, { "epoch": 3.51, "grad_norm": 0.28153640031814575, "learning_rate": 1.2960396558286484e-05, "loss": 0.003, "step": 1845 }, { "epoch": 3.51, "grad_norm": 0.30118662118911743, "learning_rate": 1.2930002010462082e-05, "loss": 0.0037, "step": 1846 }, { "epoch": 3.51, "grad_norm": 0.898674488067627, "learning_rate": 1.2899633351456926e-05, "loss": 0.0077, "step": 1847 }, { "epoch": 3.51, "grad_norm": 0.7886599898338318, "learning_rate": 1.2869290627328938e-05, "loss": 0.0109, "step": 1848 }, { "epoch": 3.52, "grad_norm": 0.5740482211112976, "learning_rate": 1.2838973884096715e-05, "loss": 0.0047, "step": 1849 }, { "epoch": 3.52, "grad_norm": 0.33247944712638855, "learning_rate": 1.2808683167739444e-05, "loss": 0.007, "step": 1850 }, { "epoch": 3.52, "grad_norm": 0.5990211963653564, "learning_rate": 1.2778418524196835e-05, "loss": 0.0059, "step": 1851 }, { "epoch": 3.52, "grad_norm": 0.9455280900001526, "learning_rate": 1.2748179999369079e-05, "loss": 0.0142, "step": 1852 }, { "epoch": 3.52, "grad_norm": 0.22986450791358948, "learning_rate": 1.2717967639116732e-05, "loss": 0.0017, "step": 1853 }, { "epoch": 3.52, "grad_norm": 0.5632677674293518, "learning_rate": 1.2687781489260673e-05, "loss": 0.0055, "step": 1854 }, { "epoch": 3.53, "grad_norm": 0.46911388635635376, "learning_rate": 1.2657621595582018e-05, "loss": 0.0053, "step": 1855 }, { "epoch": 3.53, "grad_norm": 0.5447803139686584, "learning_rate": 1.26274880038221e-05, "loss": 0.0065, "step": 1856 }, { "epoch": 3.53, "grad_norm": 0.3561791479587555, "learning_rate": 1.2597380759682325e-05, "loss": 0.006, "step": 1857 }, { "epoch": 3.53, "grad_norm": 1.268258810043335, "learning_rate": 1.2567299908824146e-05, "loss": 0.0119, "step": 1858 }, { "epoch": 3.53, "grad_norm": 0.7749211192131042, "learning_rate": 1.2537245496869002e-05, "loss": 0.0186, "step": 1859 }, { "epoch": 3.54, "grad_norm": 0.3879917562007904, "learning_rate": 1.250721756939822e-05, "loss": 0.0059, "step": 1860 }, { "epoch": 3.54, "grad_norm": 0.6990310549736023, "learning_rate": 1.247721617195295e-05, "loss": 0.007, "step": 1861 }, { "epoch": 3.54, "grad_norm": 0.952443540096283, "learning_rate": 1.2447241350034147e-05, "loss": 0.0057, "step": 1862 }, { "epoch": 3.54, "grad_norm": 0.17345838248729706, "learning_rate": 1.2417293149102422e-05, "loss": 0.0022, "step": 1863 }, { "epoch": 3.54, "grad_norm": 0.8210578560829163, "learning_rate": 1.2387371614578021e-05, "loss": 0.0079, "step": 1864 }, { "epoch": 3.55, "grad_norm": 0.20211558043956757, "learning_rate": 1.2357476791840747e-05, "loss": 0.0034, "step": 1865 }, { "epoch": 3.55, "grad_norm": 0.36619478464126587, "learning_rate": 1.2327608726229907e-05, "loss": 0.0038, "step": 1866 }, { "epoch": 3.55, "grad_norm": 0.5522010922431946, "learning_rate": 1.229776746304421e-05, "loss": 0.0122, "step": 1867 }, { "epoch": 3.55, "grad_norm": 0.41797181963920593, "learning_rate": 1.2267953047541706e-05, "loss": 0.0114, "step": 1868 }, { "epoch": 3.55, "grad_norm": 0.8998970985412598, "learning_rate": 1.2238165524939767e-05, "loss": 0.0172, "step": 1869 }, { "epoch": 3.56, "grad_norm": 0.5817302465438843, "learning_rate": 1.2208404940414937e-05, "loss": 0.0042, "step": 1870 }, { "epoch": 3.56, "grad_norm": 0.7900393605232239, "learning_rate": 1.2178671339102923e-05, "loss": 0.0084, "step": 1871 }, { "epoch": 3.56, "grad_norm": 0.37246838212013245, "learning_rate": 1.2148964766098512e-05, "loss": 0.0062, "step": 1872 }, { "epoch": 3.56, "grad_norm": 0.4499676525592804, "learning_rate": 1.2119285266455486e-05, "loss": 0.0055, "step": 1873 }, { "epoch": 3.56, "grad_norm": 0.38272762298583984, "learning_rate": 1.2089632885186565e-05, "loss": 0.0038, "step": 1874 }, { "epoch": 3.56, "grad_norm": 0.2821221351623535, "learning_rate": 1.2060007667263374e-05, "loss": 0.0059, "step": 1875 }, { "epoch": 3.57, "grad_norm": 0.5012558698654175, "learning_rate": 1.2030409657616308e-05, "loss": 0.0139, "step": 1876 }, { "epoch": 3.57, "grad_norm": 0.2907921373844147, "learning_rate": 1.2000838901134503e-05, "loss": 0.0036, "step": 1877 }, { "epoch": 3.57, "grad_norm": 0.999122679233551, "learning_rate": 1.1971295442665755e-05, "loss": 0.0152, "step": 1878 }, { "epoch": 3.57, "grad_norm": 0.40374046564102173, "learning_rate": 1.1941779327016494e-05, "loss": 0.0047, "step": 1879 }, { "epoch": 3.57, "grad_norm": 0.600990891456604, "learning_rate": 1.1912290598951646e-05, "loss": 0.0049, "step": 1880 }, { "epoch": 3.58, "grad_norm": 0.5232167840003967, "learning_rate": 1.1882829303194607e-05, "loss": 0.0052, "step": 1881 }, { "epoch": 3.58, "grad_norm": 0.8558958768844604, "learning_rate": 1.1853395484427177e-05, "loss": 0.0099, "step": 1882 }, { "epoch": 3.58, "grad_norm": 0.4265744090080261, "learning_rate": 1.1823989187289477e-05, "loss": 0.0039, "step": 1883 }, { "epoch": 3.58, "grad_norm": 0.5951396226882935, "learning_rate": 1.179461045637988e-05, "loss": 0.0086, "step": 1884 }, { "epoch": 3.58, "grad_norm": 0.6064973473548889, "learning_rate": 1.1765259336254984e-05, "loss": 0.0056, "step": 1885 }, { "epoch": 3.59, "grad_norm": 0.32437869906425476, "learning_rate": 1.1735935871429476e-05, "loss": 0.0045, "step": 1886 }, { "epoch": 3.59, "grad_norm": 0.8104749321937561, "learning_rate": 1.1706640106376105e-05, "loss": 0.0162, "step": 1887 }, { "epoch": 3.59, "grad_norm": 0.25032442808151245, "learning_rate": 1.1677372085525638e-05, "loss": 0.0032, "step": 1888 }, { "epoch": 3.59, "grad_norm": 0.6559709906578064, "learning_rate": 1.1648131853266735e-05, "loss": 0.0072, "step": 1889 }, { "epoch": 3.59, "grad_norm": 0.7392078638076782, "learning_rate": 1.161891945394592e-05, "loss": 0.0104, "step": 1890 }, { "epoch": 3.6, "grad_norm": 0.8716597557067871, "learning_rate": 1.1589734931867496e-05, "loss": 0.0058, "step": 1891 }, { "epoch": 3.6, "grad_norm": 0.6604300141334534, "learning_rate": 1.1560578331293517e-05, "loss": 0.0078, "step": 1892 }, { "epoch": 3.6, "grad_norm": 0.3246626853942871, "learning_rate": 1.1531449696443659e-05, "loss": 0.0023, "step": 1893 }, { "epoch": 3.6, "grad_norm": 0.5962731838226318, "learning_rate": 1.1502349071495192e-05, "loss": 0.0093, "step": 1894 }, { "epoch": 3.6, "grad_norm": 0.6020123362541199, "learning_rate": 1.1473276500582916e-05, "loss": 0.0075, "step": 1895 }, { "epoch": 3.6, "grad_norm": 0.5529095530509949, "learning_rate": 1.1444232027799066e-05, "loss": 0.0101, "step": 1896 }, { "epoch": 3.61, "grad_norm": 0.6240059733390808, "learning_rate": 1.1415215697193273e-05, "loss": 0.0101, "step": 1897 }, { "epoch": 3.61, "grad_norm": 0.41967126727104187, "learning_rate": 1.13862275527725e-05, "loss": 0.0031, "step": 1898 }, { "epoch": 3.61, "grad_norm": 0.45616474747657776, "learning_rate": 1.1357267638500937e-05, "loss": 0.0036, "step": 1899 }, { "epoch": 3.61, "grad_norm": 0.5288574695587158, "learning_rate": 1.1328335998299964e-05, "loss": 0.0049, "step": 1900 }, { "epoch": 3.61, "eval_blimp_filtered_avg": 0.7371641791044776, "eval_blimp_filtered_std": 0.004872875106609372, "step": 1900 }, { "epoch": 3.61, "eval_blimp_supplement_avg": 0.7672413793103449, "eval_blimp_supplement_std": 0.01786985606239687, "step": 1900 }, { "epoch": 3.61, "eval_vqa_filtered_avg": 0.33, "eval_vqa_filtered_std": 0.04725815626252604, "step": 1900 }, { "epoch": 3.61, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 1900 }, { "epoch": 3.61, "grad_norm": 0.6306433081626892, "learning_rate": 1.1299432676048112e-05, "loss": 0.0045, "step": 1901 }, { "epoch": 3.62, "grad_norm": 0.9045761823654175, "learning_rate": 1.1270557715580919e-05, "loss": 0.0466, "step": 1902 }, { "epoch": 3.62, "grad_norm": 1.2392215728759766, "learning_rate": 1.1241711160690933e-05, "loss": 0.0062, "step": 1903 }, { "epoch": 3.62, "grad_norm": 0.3800784647464752, "learning_rate": 1.1212893055127616e-05, "loss": 0.0073, "step": 1904 }, { "epoch": 3.62, "grad_norm": 0.5995686054229736, "learning_rate": 1.1184103442597288e-05, "loss": 0.0054, "step": 1905 }, { "epoch": 3.62, "grad_norm": 0.4459642171859741, "learning_rate": 1.1155342366763047e-05, "loss": 0.0098, "step": 1906 }, { "epoch": 3.63, "grad_norm": 0.3807677626609802, "learning_rate": 1.1126609871244706e-05, "loss": 0.0024, "step": 1907 }, { "epoch": 3.63, "grad_norm": 0.28635501861572266, "learning_rate": 1.1097905999618765e-05, "loss": 0.0044, "step": 1908 }, { "epoch": 3.63, "grad_norm": 0.1614091545343399, "learning_rate": 1.106923079541828e-05, "loss": 0.0016, "step": 1909 }, { "epoch": 3.63, "grad_norm": 0.6299512982368469, "learning_rate": 1.1040584302132827e-05, "loss": 0.0075, "step": 1910 }, { "epoch": 3.63, "grad_norm": 0.46857622265815735, "learning_rate": 1.1011966563208474e-05, "loss": 0.0053, "step": 1911 }, { "epoch": 3.63, "grad_norm": 0.5456902384757996, "learning_rate": 1.0983377622047641e-05, "loss": 0.003, "step": 1912 }, { "epoch": 3.64, "grad_norm": 0.40421047806739807, "learning_rate": 1.0954817522009086e-05, "loss": 0.0039, "step": 1913 }, { "epoch": 3.64, "grad_norm": 0.3474641740322113, "learning_rate": 1.0926286306407822e-05, "loss": 0.0063, "step": 1914 }, { "epoch": 3.64, "grad_norm": 0.5922101736068726, "learning_rate": 1.0897784018515075e-05, "loss": 0.0066, "step": 1915 }, { "epoch": 3.64, "grad_norm": 0.4230577349662781, "learning_rate": 1.0869310701558181e-05, "loss": 0.0037, "step": 1916 }, { "epoch": 3.64, "grad_norm": 0.3523775339126587, "learning_rate": 1.084086639872053e-05, "loss": 0.0038, "step": 1917 }, { "epoch": 3.65, "grad_norm": 0.4636978507041931, "learning_rate": 1.0812451153141524e-05, "loss": 0.0046, "step": 1918 }, { "epoch": 3.65, "grad_norm": 0.20073391497135162, "learning_rate": 1.0784065007916492e-05, "loss": 0.0026, "step": 1919 }, { "epoch": 3.65, "grad_norm": 0.40262582898139954, "learning_rate": 1.0755708006096616e-05, "loss": 0.0039, "step": 1920 }, { "epoch": 3.65, "grad_norm": 0.40842974185943604, "learning_rate": 1.072738019068891e-05, "loss": 0.0042, "step": 1921 }, { "epoch": 3.65, "grad_norm": 0.2489616423845291, "learning_rate": 1.0699081604656088e-05, "loss": 0.0025, "step": 1922 }, { "epoch": 3.66, "grad_norm": 0.6092913150787354, "learning_rate": 1.0670812290916545e-05, "loss": 0.0063, "step": 1923 }, { "epoch": 3.66, "grad_norm": 0.2509463131427765, "learning_rate": 1.0642572292344301e-05, "loss": 0.0028, "step": 1924 }, { "epoch": 3.66, "grad_norm": 0.4505819082260132, "learning_rate": 1.0614361651768893e-05, "loss": 0.0051, "step": 1925 }, { "epoch": 3.66, "grad_norm": 0.23108340799808502, "learning_rate": 1.0586180411975335e-05, "loss": 0.0029, "step": 1926 }, { "epoch": 3.66, "grad_norm": 0.399082213640213, "learning_rate": 1.0558028615704056e-05, "loss": 0.0054, "step": 1927 }, { "epoch": 3.67, "grad_norm": 0.7197695374488831, "learning_rate": 1.0529906305650833e-05, "loss": 0.0091, "step": 1928 }, { "epoch": 3.67, "grad_norm": 0.3435472846031189, "learning_rate": 1.0501813524466715e-05, "loss": 0.0109, "step": 1929 }, { "epoch": 3.67, "grad_norm": 0.3064359128475189, "learning_rate": 1.0473750314757964e-05, "loss": 0.0027, "step": 1930 }, { "epoch": 3.67, "grad_norm": 0.5802640914916992, "learning_rate": 1.044571671908602e-05, "loss": 0.0122, "step": 1931 }, { "epoch": 3.67, "grad_norm": 0.4225955605506897, "learning_rate": 1.0417712779967377e-05, "loss": 0.0054, "step": 1932 }, { "epoch": 3.67, "grad_norm": 0.5601993799209595, "learning_rate": 1.038973853987356e-05, "loss": 0.0079, "step": 1933 }, { "epoch": 3.68, "grad_norm": 0.35390400886535645, "learning_rate": 1.0361794041231072e-05, "loss": 0.0054, "step": 1934 }, { "epoch": 3.68, "grad_norm": 0.15066559612751007, "learning_rate": 1.0333879326421285e-05, "loss": 0.0012, "step": 1935 }, { "epoch": 3.68, "grad_norm": 0.6991298794746399, "learning_rate": 1.03059944377804e-05, "loss": 0.0064, "step": 1936 }, { "epoch": 3.68, "grad_norm": 0.5412514805793762, "learning_rate": 1.0278139417599402e-05, "loss": 0.0074, "step": 1937 }, { "epoch": 3.68, "grad_norm": 0.35950642824172974, "learning_rate": 1.0250314308123968e-05, "loss": 0.0025, "step": 1938 }, { "epoch": 3.69, "grad_norm": 0.2601001262664795, "learning_rate": 1.0222519151554402e-05, "loss": 0.0037, "step": 1939 }, { "epoch": 3.69, "grad_norm": 0.5593259930610657, "learning_rate": 1.019475399004559e-05, "loss": 0.0061, "step": 1940 }, { "epoch": 3.69, "grad_norm": 0.18690624833106995, "learning_rate": 1.0167018865706922e-05, "loss": 0.003, "step": 1941 }, { "epoch": 3.69, "grad_norm": 0.22292889654636383, "learning_rate": 1.0139313820602238e-05, "loss": 0.0034, "step": 1942 }, { "epoch": 3.69, "grad_norm": 0.6586833596229553, "learning_rate": 1.0111638896749748e-05, "loss": 0.006, "step": 1943 }, { "epoch": 3.7, "grad_norm": 0.719986081123352, "learning_rate": 1.0083994136122002e-05, "loss": 0.0131, "step": 1944 }, { "epoch": 3.7, "grad_norm": 0.6659079790115356, "learning_rate": 1.0056379580645786e-05, "loss": 0.0069, "step": 1945 }, { "epoch": 3.7, "grad_norm": 0.43169254064559937, "learning_rate": 1.0028795272202067e-05, "loss": 0.0053, "step": 1946 }, { "epoch": 3.7, "grad_norm": 0.33976855874061584, "learning_rate": 1.0001241252625971e-05, "loss": 0.0036, "step": 1947 }, { "epoch": 3.7, "grad_norm": 0.5299177169799805, "learning_rate": 9.973717563706654e-06, "loss": 0.0101, "step": 1948 }, { "epoch": 3.71, "grad_norm": 0.8007693886756897, "learning_rate": 9.94622424718729e-06, "loss": 0.0105, "step": 1949 }, { "epoch": 3.71, "grad_norm": 0.3417668640613556, "learning_rate": 9.91876134476498e-06, "loss": 0.0053, "step": 1950 }, { "epoch": 3.71, "grad_norm": 0.649199366569519, "learning_rate": 9.891328898090705e-06, "loss": 0.0101, "step": 1951 }, { "epoch": 3.71, "grad_norm": 0.34273287653923035, "learning_rate": 9.863926948769251e-06, "loss": 0.0037, "step": 1952 }, { "epoch": 3.71, "grad_norm": 0.26835981011390686, "learning_rate": 9.836555538359147e-06, "loss": 0.0039, "step": 1953 }, { "epoch": 3.71, "grad_norm": 0.4131266176700592, "learning_rate": 9.80921470837263e-06, "loss": 0.0053, "step": 1954 }, { "epoch": 3.72, "grad_norm": 0.18732571601867676, "learning_rate": 9.781904500275527e-06, "loss": 0.003, "step": 1955 }, { "epoch": 3.72, "grad_norm": 0.4812428057193756, "learning_rate": 9.754624955487233e-06, "loss": 0.0072, "step": 1956 }, { "epoch": 3.72, "grad_norm": 0.8043769598007202, "learning_rate": 9.727376115380652e-06, "loss": 0.0079, "step": 1957 }, { "epoch": 3.72, "grad_norm": 0.33528441190719604, "learning_rate": 9.700158021282107e-06, "loss": 0.0035, "step": 1958 }, { "epoch": 3.72, "grad_norm": 0.2522065043449402, "learning_rate": 9.672970714471281e-06, "loss": 0.002, "step": 1959 }, { "epoch": 3.73, "grad_norm": 0.568202018737793, "learning_rate": 9.645814236181197e-06, "loss": 0.0057, "step": 1960 }, { "epoch": 3.73, "grad_norm": 0.5697447657585144, "learning_rate": 9.618688627598092e-06, "loss": 0.0036, "step": 1961 }, { "epoch": 3.73, "grad_norm": 1.0610249042510986, "learning_rate": 9.591593929861405e-06, "loss": 0.0124, "step": 1962 }, { "epoch": 3.73, "grad_norm": 0.6091943383216858, "learning_rate": 9.564530184063654e-06, "loss": 0.0068, "step": 1963 }, { "epoch": 3.73, "grad_norm": 0.33729487657546997, "learning_rate": 9.537497431250479e-06, "loss": 0.0028, "step": 1964 }, { "epoch": 3.74, "grad_norm": 0.28482404351234436, "learning_rate": 9.51049571242047e-06, "loss": 0.0053, "step": 1965 }, { "epoch": 3.74, "grad_norm": 0.2527482807636261, "learning_rate": 9.48352506852515e-06, "loss": 0.0023, "step": 1966 }, { "epoch": 3.74, "grad_norm": 0.23866161704063416, "learning_rate": 9.45658554046895e-06, "loss": 0.0034, "step": 1967 }, { "epoch": 3.74, "grad_norm": 1.2749905586242676, "learning_rate": 9.429677169109072e-06, "loss": 0.0041, "step": 1968 }, { "epoch": 3.74, "grad_norm": 0.8587445020675659, "learning_rate": 9.402799995255461e-06, "loss": 0.0099, "step": 1969 }, { "epoch": 3.75, "grad_norm": 0.44589969515800476, "learning_rate": 9.375954059670797e-06, "loss": 0.0044, "step": 1970 }, { "epoch": 3.75, "grad_norm": 0.4014206528663635, "learning_rate": 9.349139403070326e-06, "loss": 0.0045, "step": 1971 }, { "epoch": 3.75, "grad_norm": 0.2687934637069702, "learning_rate": 9.32235606612189e-06, "loss": 0.003, "step": 1972 }, { "epoch": 3.75, "grad_norm": 0.8691784143447876, "learning_rate": 9.295604089445812e-06, "loss": 0.008, "step": 1973 }, { "epoch": 3.75, "grad_norm": 0.7646000981330872, "learning_rate": 9.268883513614867e-06, "loss": 0.0051, "step": 1974 }, { "epoch": 3.75, "grad_norm": 0.15929022431373596, "learning_rate": 9.2421943791542e-06, "loss": 0.0033, "step": 1975 }, { "epoch": 3.76, "grad_norm": 0.7193689942359924, "learning_rate": 9.215536726541259e-06, "loss": 0.0089, "step": 1976 }, { "epoch": 3.76, "grad_norm": 0.7729222774505615, "learning_rate": 9.18891059620578e-06, "loss": 0.0087, "step": 1977 }, { "epoch": 3.76, "grad_norm": 0.9050508141517639, "learning_rate": 9.162316028529663e-06, "loss": 0.0075, "step": 1978 }, { "epoch": 3.76, "grad_norm": 0.5967780351638794, "learning_rate": 9.13575306384693e-06, "loss": 0.0056, "step": 1979 }, { "epoch": 3.76, "grad_norm": 0.3386865258216858, "learning_rate": 9.10922174244372e-06, "loss": 0.006, "step": 1980 }, { "epoch": 3.77, "grad_norm": 0.33524060249328613, "learning_rate": 9.082722104558127e-06, "loss": 0.0045, "step": 1981 }, { "epoch": 3.77, "grad_norm": 0.5824353098869324, "learning_rate": 9.05625419038022e-06, "loss": 0.0187, "step": 1982 }, { "epoch": 3.77, "grad_norm": 0.5269536375999451, "learning_rate": 9.029818040051958e-06, "loss": 0.005, "step": 1983 }, { "epoch": 3.77, "grad_norm": 0.927579402923584, "learning_rate": 9.003413693667124e-06, "loss": 0.0084, "step": 1984 }, { "epoch": 3.77, "grad_norm": 0.3056764602661133, "learning_rate": 8.977041191271249e-06, "loss": 0.0025, "step": 1985 }, { "epoch": 3.78, "grad_norm": 0.6074756383895874, "learning_rate": 8.950700572861586e-06, "loss": 0.0075, "step": 1986 }, { "epoch": 3.78, "grad_norm": 0.7862502336502075, "learning_rate": 8.92439187838703e-06, "loss": 0.0091, "step": 1987 }, { "epoch": 3.78, "grad_norm": 0.5209690928459167, "learning_rate": 8.898115147748053e-06, "loss": 0.0061, "step": 1988 }, { "epoch": 3.78, "grad_norm": 0.3744378387928009, "learning_rate": 8.871870420796644e-06, "loss": 0.0084, "step": 1989 }, { "epoch": 3.78, "grad_norm": 0.34643474221229553, "learning_rate": 8.845657737336285e-06, "loss": 0.0037, "step": 1990 }, { "epoch": 3.79, "grad_norm": 0.2529134452342987, "learning_rate": 8.819477137121825e-06, "loss": 0.0025, "step": 1991 }, { "epoch": 3.79, "grad_norm": 0.3004602789878845, "learning_rate": 8.793328659859462e-06, "loss": 0.0046, "step": 1992 }, { "epoch": 3.79, "grad_norm": 0.11179956793785095, "learning_rate": 8.767212345206698e-06, "loss": 0.0011, "step": 1993 }, { "epoch": 3.79, "grad_norm": 0.4369969069957733, "learning_rate": 8.741128232772235e-06, "loss": 0.0059, "step": 1994 }, { "epoch": 3.79, "grad_norm": 0.9249531030654907, "learning_rate": 8.715076362115936e-06, "loss": 0.0109, "step": 1995 }, { "epoch": 3.79, "grad_norm": 0.3022116422653198, "learning_rate": 8.689056772748775e-06, "loss": 0.0032, "step": 1996 }, { "epoch": 3.8, "grad_norm": 0.49259495735168457, "learning_rate": 8.663069504132758e-06, "loss": 0.0088, "step": 1997 }, { "epoch": 3.8, "grad_norm": 0.7160173654556274, "learning_rate": 8.637114595680876e-06, "loss": 0.007, "step": 1998 }, { "epoch": 3.8, "grad_norm": 0.5215254426002502, "learning_rate": 8.611192086757051e-06, "loss": 0.0044, "step": 1999 }, { "epoch": 3.8, "grad_norm": 0.3305625021457672, "learning_rate": 8.585302016676059e-06, "loss": 0.0038, "step": 2000 }, { "epoch": 3.8, "eval_blimp_filtered_avg": 0.7370149253731343, "eval_blimp_filtered_std": 0.004870126773467291, "step": 2000 }, { "epoch": 3.8, "eval_blimp_supplement_avg": 0.7693965517241379, "eval_blimp_supplement_std": 0.017941967737557248, "step": 2000 }, { "epoch": 3.8, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 2000 }, { "epoch": 3.8, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 2000 }, { "epoch": 3.8, "grad_norm": 0.4931644797325134, "learning_rate": 8.559444424703472e-06, "loss": 0.0054, "step": 2001 }, { "epoch": 3.81, "grad_norm": 0.545304000377655, "learning_rate": 8.533619350055608e-06, "loss": 0.0127, "step": 2002 }, { "epoch": 3.81, "grad_norm": 0.4190770089626312, "learning_rate": 8.507826831899483e-06, "loss": 0.0065, "step": 2003 }, { "epoch": 3.81, "grad_norm": 0.2555009722709656, "learning_rate": 8.482066909352724e-06, "loss": 0.0022, "step": 2004 }, { "epoch": 3.81, "grad_norm": 0.4623473882675171, "learning_rate": 8.456339621483507e-06, "loss": 0.0058, "step": 2005 }, { "epoch": 3.81, "grad_norm": 0.4198661148548126, "learning_rate": 8.430645007310557e-06, "loss": 0.0051, "step": 2006 }, { "epoch": 3.82, "grad_norm": 0.377596914768219, "learning_rate": 8.404983105803002e-06, "loss": 0.0033, "step": 2007 }, { "epoch": 3.82, "grad_norm": 0.3927759528160095, "learning_rate": 8.379353955880379e-06, "loss": 0.0035, "step": 2008 }, { "epoch": 3.82, "grad_norm": 0.23971648514270782, "learning_rate": 8.353757596412546e-06, "loss": 0.0033, "step": 2009 }, { "epoch": 3.82, "grad_norm": 0.1616954803466797, "learning_rate": 8.328194066219633e-06, "loss": 0.0013, "step": 2010 }, { "epoch": 3.82, "grad_norm": 1.3864433765411377, "learning_rate": 8.302663404071975e-06, "loss": 0.0086, "step": 2011 }, { "epoch": 3.83, "grad_norm": 0.4333480894565582, "learning_rate": 8.277165648690059e-06, "loss": 0.0047, "step": 2012 }, { "epoch": 3.83, "grad_norm": 0.13646261394023895, "learning_rate": 8.251700838744485e-06, "loss": 0.001, "step": 2013 }, { "epoch": 3.83, "grad_norm": 0.5439692139625549, "learning_rate": 8.226269012855861e-06, "loss": 0.0061, "step": 2014 }, { "epoch": 3.83, "grad_norm": 0.5790982246398926, "learning_rate": 8.20087020959477e-06, "loss": 0.0122, "step": 2015 }, { "epoch": 3.83, "grad_norm": 0.4731241762638092, "learning_rate": 8.175504467481737e-06, "loss": 0.0069, "step": 2016 }, { "epoch": 3.83, "grad_norm": 0.5954114198684692, "learning_rate": 8.15017182498713e-06, "loss": 0.0032, "step": 2017 }, { "epoch": 3.84, "grad_norm": 0.11621417850255966, "learning_rate": 8.12487232053111e-06, "loss": 0.0011, "step": 2018 }, { "epoch": 3.84, "grad_norm": 0.6876360177993774, "learning_rate": 8.099605992483591e-06, "loss": 0.0089, "step": 2019 }, { "epoch": 3.84, "grad_norm": 0.8178340792655945, "learning_rate": 8.074372879164167e-06, "loss": 0.0094, "step": 2020 }, { "epoch": 3.84, "grad_norm": 0.28852492570877075, "learning_rate": 8.049173018842047e-06, "loss": 0.0029, "step": 2021 }, { "epoch": 3.84, "grad_norm": 0.44345101714134216, "learning_rate": 8.024006449736041e-06, "loss": 0.0043, "step": 2022 }, { "epoch": 3.85, "grad_norm": 0.4702736437320709, "learning_rate": 7.998873210014435e-06, "loss": 0.0097, "step": 2023 }, { "epoch": 3.85, "grad_norm": 0.8170405030250549, "learning_rate": 7.973773337794977e-06, "loss": 0.01, "step": 2024 }, { "epoch": 3.85, "grad_norm": 0.26128533482551575, "learning_rate": 7.9487068711448e-06, "loss": 0.0055, "step": 2025 }, { "epoch": 3.85, "grad_norm": 0.5175860524177551, "learning_rate": 7.923673848080405e-06, "loss": 0.0103, "step": 2026 }, { "epoch": 3.85, "grad_norm": 0.38476333022117615, "learning_rate": 7.89867430656754e-06, "loss": 0.0042, "step": 2027 }, { "epoch": 3.86, "grad_norm": 0.5077754259109497, "learning_rate": 7.87370828452117e-06, "loss": 0.003, "step": 2028 }, { "epoch": 3.86, "grad_norm": 0.400964617729187, "learning_rate": 7.848775819805465e-06, "loss": 0.0038, "step": 2029 }, { "epoch": 3.86, "grad_norm": 0.6568931937217712, "learning_rate": 7.823876950233652e-06, "loss": 0.0115, "step": 2030 }, { "epoch": 3.86, "grad_norm": 0.6056835055351257, "learning_rate": 7.799011713568035e-06, "loss": 0.0064, "step": 2031 }, { "epoch": 3.86, "grad_norm": 0.2792681157588959, "learning_rate": 7.774180147519905e-06, "loss": 0.0029, "step": 2032 }, { "epoch": 3.87, "grad_norm": 0.43281757831573486, "learning_rate": 7.74938228974948e-06, "loss": 0.0055, "step": 2033 }, { "epoch": 3.87, "grad_norm": 0.43225181102752686, "learning_rate": 7.724618177865851e-06, "loss": 0.0052, "step": 2034 }, { "epoch": 3.87, "grad_norm": 0.3549021780490875, "learning_rate": 7.69988784942696e-06, "loss": 0.0034, "step": 2035 }, { "epoch": 3.87, "grad_norm": 0.22384007275104523, "learning_rate": 7.675191341939474e-06, "loss": 0.003, "step": 2036 }, { "epoch": 3.87, "grad_norm": 0.6981735229492188, "learning_rate": 7.650528692858789e-06, "loss": 0.0077, "step": 2037 }, { "epoch": 3.87, "grad_norm": 0.48524975776672363, "learning_rate": 7.625899939588929e-06, "loss": 0.0026, "step": 2038 }, { "epoch": 3.88, "grad_norm": 1.8354507684707642, "learning_rate": 7.601305119482549e-06, "loss": 0.0046, "step": 2039 }, { "epoch": 3.88, "grad_norm": 0.3950873613357544, "learning_rate": 7.576744269840803e-06, "loss": 0.0033, "step": 2040 }, { "epoch": 3.88, "grad_norm": 0.6199808120727539, "learning_rate": 7.552217427913338e-06, "loss": 0.0029, "step": 2041 }, { "epoch": 3.88, "grad_norm": 0.5876902341842651, "learning_rate": 7.52772463089823e-06, "loss": 0.0133, "step": 2042 }, { "epoch": 3.88, "grad_norm": 0.24647833406925201, "learning_rate": 7.503265915941906e-06, "loss": 0.0031, "step": 2043 }, { "epoch": 3.89, "grad_norm": 0.273151159286499, "learning_rate": 7.4788413201391085e-06, "loss": 0.0054, "step": 2044 }, { "epoch": 3.89, "grad_norm": 0.853195071220398, "learning_rate": 7.454450880532855e-06, "loss": 0.0111, "step": 2045 }, { "epoch": 3.89, "grad_norm": 0.5560944080352783, "learning_rate": 7.430094634114336e-06, "loss": 0.0059, "step": 2046 }, { "epoch": 3.89, "grad_norm": 0.24572812020778656, "learning_rate": 7.405772617822884e-06, "loss": 0.0031, "step": 2047 }, { "epoch": 3.89, "grad_norm": 0.31085413694381714, "learning_rate": 7.38148486854594e-06, "loss": 0.0031, "step": 2048 }, { "epoch": 3.9, "grad_norm": 0.20510323345661163, "learning_rate": 7.357231423118951e-06, "loss": 0.0035, "step": 2049 }, { "epoch": 3.9, "grad_norm": 0.2305988073348999, "learning_rate": 7.333012318325349e-06, "loss": 0.0015, "step": 2050 }, { "epoch": 3.9, "grad_norm": 0.3253186345100403, "learning_rate": 7.308827590896467e-06, "loss": 0.0032, "step": 2051 }, { "epoch": 3.9, "grad_norm": 0.48094770312309265, "learning_rate": 7.284677277511538e-06, "loss": 0.0041, "step": 2052 }, { "epoch": 3.9, "grad_norm": 0.3493376076221466, "learning_rate": 7.2605614147975675e-06, "loss": 0.0036, "step": 2053 }, { "epoch": 3.9, "grad_norm": 0.153300479054451, "learning_rate": 7.23648003932932e-06, "loss": 0.0014, "step": 2054 }, { "epoch": 3.91, "grad_norm": 0.8644400835037231, "learning_rate": 7.212433187629266e-06, "loss": 0.0054, "step": 2055 }, { "epoch": 3.91, "grad_norm": 0.6497452855110168, "learning_rate": 7.188420896167503e-06, "loss": 0.0102, "step": 2056 }, { "epoch": 3.91, "grad_norm": 0.4875055253505707, "learning_rate": 7.164443201361711e-06, "loss": 0.0043, "step": 2057 }, { "epoch": 3.91, "grad_norm": 0.9820030927658081, "learning_rate": 7.140500139577133e-06, "loss": 0.0311, "step": 2058 }, { "epoch": 3.91, "grad_norm": 0.2610166072845459, "learning_rate": 7.1165917471264455e-06, "loss": 0.0017, "step": 2059 }, { "epoch": 3.92, "grad_norm": 0.2836827039718628, "learning_rate": 7.092718060269763e-06, "loss": 0.0028, "step": 2060 }, { "epoch": 3.92, "grad_norm": 0.4017001688480377, "learning_rate": 7.068879115214559e-06, "loss": 0.0058, "step": 2061 }, { "epoch": 3.92, "grad_norm": 0.14462187886238098, "learning_rate": 7.045074948115631e-06, "loss": 0.0015, "step": 2062 }, { "epoch": 3.92, "grad_norm": 0.43315547704696655, "learning_rate": 7.021305595075015e-06, "loss": 0.0019, "step": 2063 }, { "epoch": 3.92, "grad_norm": 0.4018650949001312, "learning_rate": 6.997571092141955e-06, "loss": 0.0044, "step": 2064 }, { "epoch": 3.93, "grad_norm": 0.4230715036392212, "learning_rate": 6.973871475312835e-06, "loss": 0.0018, "step": 2065 }, { "epoch": 3.93, "grad_norm": 0.6133526563644409, "learning_rate": 6.9502067805311324e-06, "loss": 0.0038, "step": 2066 }, { "epoch": 3.93, "grad_norm": 0.8343930244445801, "learning_rate": 6.926577043687357e-06, "loss": 0.0097, "step": 2067 }, { "epoch": 3.93, "grad_norm": 0.6734695434570312, "learning_rate": 6.9029823006190155e-06, "loss": 0.0027, "step": 2068 }, { "epoch": 3.93, "grad_norm": 0.5046018958091736, "learning_rate": 6.87942258711053e-06, "loss": 0.0034, "step": 2069 }, { "epoch": 3.94, "grad_norm": 0.25391271710395813, "learning_rate": 6.855897938893185e-06, "loss": 0.0027, "step": 2070 }, { "epoch": 3.94, "grad_norm": 0.44689083099365234, "learning_rate": 6.832408391645112e-06, "loss": 0.0029, "step": 2071 }, { "epoch": 3.94, "grad_norm": 0.27414780855178833, "learning_rate": 6.808953980991186e-06, "loss": 0.0027, "step": 2072 }, { "epoch": 3.94, "grad_norm": 0.3180437684059143, "learning_rate": 6.785534742503e-06, "loss": 0.0034, "step": 2073 }, { "epoch": 3.94, "grad_norm": 0.6726911067962646, "learning_rate": 6.762150711698789e-06, "loss": 0.0086, "step": 2074 }, { "epoch": 3.94, "grad_norm": 0.2855995297431946, "learning_rate": 6.738801924043424e-06, "loss": 0.004, "step": 2075 }, { "epoch": 3.95, "grad_norm": 0.547330379486084, "learning_rate": 6.715488414948302e-06, "loss": 0.0092, "step": 2076 }, { "epoch": 3.95, "grad_norm": 0.4335971474647522, "learning_rate": 6.692210219771313e-06, "loss": 0.0051, "step": 2077 }, { "epoch": 3.95, "grad_norm": 0.3217659294605255, "learning_rate": 6.6689673738167966e-06, "loss": 0.002, "step": 2078 }, { "epoch": 3.95, "grad_norm": 0.14226658642292023, "learning_rate": 6.645759912335482e-06, "loss": 0.0022, "step": 2079 }, { "epoch": 3.95, "grad_norm": 0.647911012172699, "learning_rate": 6.622587870524424e-06, "loss": 0.0072, "step": 2080 }, { "epoch": 3.96, "grad_norm": 0.36883682012557983, "learning_rate": 6.599451283526977e-06, "loss": 0.0077, "step": 2081 }, { "epoch": 3.96, "grad_norm": 0.7094461917877197, "learning_rate": 6.57635018643271e-06, "loss": 0.0083, "step": 2082 }, { "epoch": 3.96, "grad_norm": 0.5455428957939148, "learning_rate": 6.553284614277361e-06, "loss": 0.0281, "step": 2083 }, { "epoch": 3.96, "grad_norm": 0.5036842226982117, "learning_rate": 6.530254602042812e-06, "loss": 0.0034, "step": 2084 }, { "epoch": 3.96, "grad_norm": 0.1985722929239273, "learning_rate": 6.5072601846569926e-06, "loss": 0.0022, "step": 2085 }, { "epoch": 3.97, "grad_norm": 0.6863254904747009, "learning_rate": 6.484301396993858e-06, "loss": 0.0068, "step": 2086 }, { "epoch": 3.97, "grad_norm": 0.21521367132663727, "learning_rate": 6.4613782738733265e-06, "loss": 0.0018, "step": 2087 }, { "epoch": 3.97, "grad_norm": 0.26540061831474304, "learning_rate": 6.438490850061221e-06, "loss": 0.0041, "step": 2088 }, { "epoch": 3.97, "grad_norm": 0.6362552046775818, "learning_rate": 6.4156391602692275e-06, "loss": 0.0026, "step": 2089 }, { "epoch": 3.97, "grad_norm": 0.08968577533960342, "learning_rate": 6.392823239154825e-06, "loss": 0.0012, "step": 2090 }, { "epoch": 3.98, "grad_norm": 1.0800411701202393, "learning_rate": 6.370043121321275e-06, "loss": 0.0058, "step": 2091 }, { "epoch": 3.98, "grad_norm": 0.3055567443370819, "learning_rate": 6.347298841317506e-06, "loss": 0.003, "step": 2092 }, { "epoch": 3.98, "grad_norm": 0.369182825088501, "learning_rate": 6.324590433638101e-06, "loss": 0.0038, "step": 2093 }, { "epoch": 3.98, "grad_norm": 0.29505467414855957, "learning_rate": 6.301917932723258e-06, "loss": 0.0023, "step": 2094 }, { "epoch": 3.98, "grad_norm": 0.7598143815994263, "learning_rate": 6.279281372958694e-06, "loss": 0.0077, "step": 2095 }, { "epoch": 3.98, "grad_norm": 0.25651392340660095, "learning_rate": 6.256680788675624e-06, "loss": 0.0028, "step": 2096 }, { "epoch": 3.99, "grad_norm": 0.20776863396167755, "learning_rate": 6.2341162141507125e-06, "loss": 0.0015, "step": 2097 }, { "epoch": 3.99, "grad_norm": 0.25192689895629883, "learning_rate": 6.211587683605995e-06, "loss": 0.0046, "step": 2098 }, { "epoch": 3.99, "grad_norm": 0.3831007182598114, "learning_rate": 6.18909523120885e-06, "loss": 0.0017, "step": 2099 }, { "epoch": 3.99, "grad_norm": 0.27303415536880493, "learning_rate": 6.16663889107194e-06, "loss": 0.007, "step": 2100 }, { "epoch": 3.99, "eval_blimp_filtered_avg": 0.7365671641791045, "eval_blimp_filtered_std": 0.004862381527322526, "step": 2100 }, { "epoch": 3.99, "eval_blimp_supplement_avg": 0.7672413793103449, "eval_blimp_supplement_std": 0.017938041848358677, "step": 2100 }, { "epoch": 3.99, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 2100 }, { "epoch": 3.99, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 2100 }, { "epoch": 3.99, "grad_norm": 0.5813764929771423, "learning_rate": 6.14421869725315e-06, "loss": 0.0069, "step": 2101 }, { "epoch": 4.0, "grad_norm": 0.48742151260375977, "learning_rate": 6.121834683755559e-06, "loss": 0.0107, "step": 2102 }, { "epoch": 4.0, "grad_norm": 0.46283599734306335, "learning_rate": 6.099486884527347e-06, "loss": 0.0042, "step": 2103 }, { "epoch": 4.0, "grad_norm": 0.2261534035205841, "learning_rate": 6.077175333461809e-06, "loss": 0.0022, "step": 2104 }, { "epoch": 4.0, "grad_norm": 0.3273404538631439, "learning_rate": 6.054900064397243e-06, "loss": 0.0027, "step": 2105 }, { "epoch": 4.0, "grad_norm": 0.2814160883426666, "learning_rate": 6.032661111116906e-06, "loss": 0.002, "step": 2106 }, { "epoch": 4.01, "grad_norm": 0.10023230314254761, "learning_rate": 6.010458507349013e-06, "loss": 0.0012, "step": 2107 }, { "epoch": 4.01, "grad_norm": 0.05479172244668007, "learning_rate": 5.988292286766627e-06, "loss": 0.0007, "step": 2108 }, { "epoch": 4.01, "grad_norm": 0.4346805214881897, "learning_rate": 5.9661624829876334e-06, "loss": 0.0013, "step": 2109 }, { "epoch": 4.01, "grad_norm": 0.2092072069644928, "learning_rate": 5.944069129574686e-06, "loss": 0.0011, "step": 2110 }, { "epoch": 4.01, "grad_norm": 0.502153217792511, "learning_rate": 5.922012260035168e-06, "loss": 0.0023, "step": 2111 }, { "epoch": 4.02, "grad_norm": 0.13216935098171234, "learning_rate": 5.899991907821116e-06, "loss": 0.0013, "step": 2112 }, { "epoch": 4.02, "grad_norm": 0.5114156603813171, "learning_rate": 5.878008106329183e-06, "loss": 0.0032, "step": 2113 }, { "epoch": 4.02, "grad_norm": 0.22112639248371124, "learning_rate": 5.856060888900613e-06, "loss": 0.0027, "step": 2114 }, { "epoch": 4.02, "grad_norm": 0.562226414680481, "learning_rate": 5.834150288821142e-06, "loss": 0.0051, "step": 2115 }, { "epoch": 4.02, "grad_norm": 0.1428949236869812, "learning_rate": 5.812276339320962e-06, "loss": 0.0018, "step": 2116 }, { "epoch": 4.02, "grad_norm": 0.31600385904312134, "learning_rate": 5.790439073574714e-06, "loss": 0.0041, "step": 2117 }, { "epoch": 4.03, "grad_norm": 0.13254402577877045, "learning_rate": 5.768638524701378e-06, "loss": 0.0017, "step": 2118 }, { "epoch": 4.03, "grad_norm": 0.15879802405834198, "learning_rate": 5.746874725764245e-06, "loss": 0.0012, "step": 2119 }, { "epoch": 4.03, "grad_norm": 0.4133429229259491, "learning_rate": 5.725147709770897e-06, "loss": 0.0026, "step": 2120 }, { "epoch": 4.03, "grad_norm": 0.7016065120697021, "learning_rate": 5.703457509673098e-06, "loss": 0.003, "step": 2121 }, { "epoch": 4.03, "grad_norm": 0.16142359375953674, "learning_rate": 5.681804158366797e-06, "loss": 0.0016, "step": 2122 }, { "epoch": 4.04, "grad_norm": 0.34820201992988586, "learning_rate": 5.660187688692048e-06, "loss": 0.0042, "step": 2123 }, { "epoch": 4.04, "grad_norm": 0.3191019892692566, "learning_rate": 5.638608133432973e-06, "loss": 0.0039, "step": 2124 }, { "epoch": 4.04, "grad_norm": 0.08248015493154526, "learning_rate": 5.617065525317706e-06, "loss": 0.0012, "step": 2125 }, { "epoch": 4.04, "grad_norm": 0.27349621057510376, "learning_rate": 5.595559897018342e-06, "loss": 0.0015, "step": 2126 }, { "epoch": 4.04, "grad_norm": 0.11731717735528946, "learning_rate": 5.574091281150907e-06, "loss": 0.0007, "step": 2127 }, { "epoch": 4.05, "grad_norm": 0.1377369463443756, "learning_rate": 5.55265971027528e-06, "loss": 0.001, "step": 2128 }, { "epoch": 4.05, "grad_norm": 0.4597553610801697, "learning_rate": 5.531265216895149e-06, "loss": 0.0021, "step": 2129 }, { "epoch": 4.05, "grad_norm": 0.5730646252632141, "learning_rate": 5.509907833457998e-06, "loss": 0.0022, "step": 2130 }, { "epoch": 4.05, "grad_norm": 0.399208128452301, "learning_rate": 5.488587592354998e-06, "loss": 0.0041, "step": 2131 }, { "epoch": 4.05, "grad_norm": 0.3266209363937378, "learning_rate": 5.467304525921007e-06, "loss": 0.0013, "step": 2132 }, { "epoch": 4.06, "grad_norm": 0.21762153506278992, "learning_rate": 5.446058666434493e-06, "loss": 0.0023, "step": 2133 }, { "epoch": 4.06, "grad_norm": 0.13951054215431213, "learning_rate": 5.424850046117501e-06, "loss": 0.0016, "step": 2134 }, { "epoch": 4.06, "grad_norm": 0.4463764429092407, "learning_rate": 5.403678697135597e-06, "loss": 0.0022, "step": 2135 }, { "epoch": 4.06, "grad_norm": 0.5207061171531677, "learning_rate": 5.382544651597813e-06, "loss": 0.0057, "step": 2136 }, { "epoch": 4.06, "grad_norm": 0.2233709990978241, "learning_rate": 5.361447941556626e-06, "loss": 0.0015, "step": 2137 }, { "epoch": 4.06, "grad_norm": 0.34373053908348083, "learning_rate": 5.340388599007867e-06, "loss": 0.0028, "step": 2138 }, { "epoch": 4.07, "grad_norm": 0.13423624634742737, "learning_rate": 5.319366655890696e-06, "loss": 0.0011, "step": 2139 }, { "epoch": 4.07, "grad_norm": 0.6274204850196838, "learning_rate": 5.298382144087577e-06, "loss": 0.0033, "step": 2140 }, { "epoch": 4.07, "grad_norm": 0.29670166969299316, "learning_rate": 5.277435095424172e-06, "loss": 0.0014, "step": 2141 }, { "epoch": 4.07, "grad_norm": 0.15110144019126892, "learning_rate": 5.2565255416693405e-06, "loss": 0.0009, "step": 2142 }, { "epoch": 4.07, "grad_norm": 0.34043174982070923, "learning_rate": 5.235653514535084e-06, "loss": 0.0032, "step": 2143 }, { "epoch": 4.08, "grad_norm": 0.29326191544532776, "learning_rate": 5.214819045676477e-06, "loss": 0.0036, "step": 2144 }, { "epoch": 4.08, "grad_norm": 0.1864091157913208, "learning_rate": 5.194022166691636e-06, "loss": 0.002, "step": 2145 }, { "epoch": 4.08, "grad_norm": 0.2404012233018875, "learning_rate": 5.17326290912167e-06, "loss": 0.0012, "step": 2146 }, { "epoch": 4.08, "grad_norm": 0.40066882967948914, "learning_rate": 5.152541304450625e-06, "loss": 0.0036, "step": 2147 }, { "epoch": 4.08, "grad_norm": 0.8251341581344604, "learning_rate": 5.131857384105451e-06, "loss": 0.004, "step": 2148 }, { "epoch": 4.09, "grad_norm": 0.4735771119594574, "learning_rate": 5.111211179455929e-06, "loss": 0.0052, "step": 2149 }, { "epoch": 4.09, "grad_norm": 0.19052261114120483, "learning_rate": 5.090602721814664e-06, "loss": 0.0016, "step": 2150 }, { "epoch": 4.09, "grad_norm": 0.5713977813720703, "learning_rate": 5.070032042436992e-06, "loss": 0.0038, "step": 2151 }, { "epoch": 4.09, "grad_norm": 0.39053210616111755, "learning_rate": 5.049499172520952e-06, "loss": 0.0052, "step": 2152 }, { "epoch": 4.09, "grad_norm": 0.13532043993473053, "learning_rate": 5.02900414320726e-06, "loss": 0.0023, "step": 2153 }, { "epoch": 4.1, "grad_norm": 0.16055667400360107, "learning_rate": 5.008546985579229e-06, "loss": 0.0012, "step": 2154 }, { "epoch": 4.1, "grad_norm": 0.3031613528728485, "learning_rate": 4.988127730662734e-06, "loss": 0.0034, "step": 2155 }, { "epoch": 4.1, "grad_norm": 0.13530214130878448, "learning_rate": 4.967746409426166e-06, "loss": 0.0008, "step": 2156 }, { "epoch": 4.1, "grad_norm": 0.07158363610506058, "learning_rate": 4.947403052780386e-06, "loss": 0.001, "step": 2157 }, { "epoch": 4.1, "grad_norm": 0.18955844640731812, "learning_rate": 4.927097691578677e-06, "loss": 0.0022, "step": 2158 }, { "epoch": 4.1, "grad_norm": 0.06351499259471893, "learning_rate": 4.906830356616691e-06, "loss": 0.0007, "step": 2159 }, { "epoch": 4.11, "grad_norm": 0.4450909495353699, "learning_rate": 4.886601078632432e-06, "loss": 0.0029, "step": 2160 }, { "epoch": 4.11, "grad_norm": 0.07548544555902481, "learning_rate": 4.866409888306157e-06, "loss": 0.0006, "step": 2161 }, { "epoch": 4.11, "grad_norm": 0.41776594519615173, "learning_rate": 4.846256816260364e-06, "loss": 0.003, "step": 2162 }, { "epoch": 4.11, "grad_norm": 0.5184533596038818, "learning_rate": 4.82614189305976e-06, "loss": 0.0042, "step": 2163 }, { "epoch": 4.11, "grad_norm": 0.17446200549602509, "learning_rate": 4.806065149211173e-06, "loss": 0.0027, "step": 2164 }, { "epoch": 4.12, "grad_norm": 0.0962333083152771, "learning_rate": 4.786026615163529e-06, "loss": 0.0009, "step": 2165 }, { "epoch": 4.12, "grad_norm": 0.6901455521583557, "learning_rate": 4.766026321307818e-06, "loss": 0.0026, "step": 2166 }, { "epoch": 4.12, "grad_norm": 0.0798833891749382, "learning_rate": 4.74606429797702e-06, "loss": 0.0006, "step": 2167 }, { "epoch": 4.12, "grad_norm": 0.219170942902565, "learning_rate": 4.726140575446083e-06, "loss": 0.0015, "step": 2168 }, { "epoch": 4.12, "grad_norm": 0.3361653983592987, "learning_rate": 4.706255183931854e-06, "loss": 0.0025, "step": 2169 }, { "epoch": 4.13, "grad_norm": 0.29465046525001526, "learning_rate": 4.686408153593057e-06, "loss": 0.0017, "step": 2170 }, { "epoch": 4.13, "grad_norm": 0.26954135298728943, "learning_rate": 4.666599514530235e-06, "loss": 0.0011, "step": 2171 }, { "epoch": 4.13, "grad_norm": 0.21110129356384277, "learning_rate": 4.646829296785697e-06, "loss": 0.0015, "step": 2172 }, { "epoch": 4.13, "grad_norm": 0.4584662616252899, "learning_rate": 4.627097530343502e-06, "loss": 0.0026, "step": 2173 }, { "epoch": 4.13, "grad_norm": 0.23288874328136444, "learning_rate": 4.607404245129369e-06, "loss": 0.0024, "step": 2174 }, { "epoch": 4.13, "grad_norm": 0.11634723842144012, "learning_rate": 4.587749471010664e-06, "loss": 0.0015, "step": 2175 }, { "epoch": 4.14, "grad_norm": 0.4100732207298279, "learning_rate": 4.5681332377963555e-06, "loss": 0.0044, "step": 2176 }, { "epoch": 4.14, "grad_norm": 0.4768621027469635, "learning_rate": 4.5485555752369476e-06, "loss": 0.003, "step": 2177 }, { "epoch": 4.14, "grad_norm": 0.4060652256011963, "learning_rate": 4.529016513024452e-06, "loss": 0.0044, "step": 2178 }, { "epoch": 4.14, "grad_norm": 0.3495858311653137, "learning_rate": 4.509516080792337e-06, "loss": 0.0016, "step": 2179 }, { "epoch": 4.14, "grad_norm": 0.2775787115097046, "learning_rate": 4.490054308115483e-06, "loss": 0.0027, "step": 2180 }, { "epoch": 4.15, "grad_norm": 0.12351595610380173, "learning_rate": 4.470631224510132e-06, "loss": 0.0019, "step": 2181 }, { "epoch": 4.15, "grad_norm": 0.0896507278084755, "learning_rate": 4.45124685943387e-06, "loss": 0.0011, "step": 2182 }, { "epoch": 4.15, "grad_norm": 0.2336849421262741, "learning_rate": 4.431901242285542e-06, "loss": 0.0019, "step": 2183 }, { "epoch": 4.15, "grad_norm": 0.21560931205749512, "learning_rate": 4.412594402405233e-06, "loss": 0.0022, "step": 2184 }, { "epoch": 4.15, "grad_norm": 0.2174382358789444, "learning_rate": 4.393326369074212e-06, "loss": 0.0018, "step": 2185 }, { "epoch": 4.16, "grad_norm": 0.5686240792274475, "learning_rate": 4.374097171514906e-06, "loss": 0.0047, "step": 2186 }, { "epoch": 4.16, "grad_norm": 0.33834362030029297, "learning_rate": 4.354906838890833e-06, "loss": 0.0028, "step": 2187 }, { "epoch": 4.16, "grad_norm": 0.7946546077728271, "learning_rate": 4.335755400306559e-06, "loss": 0.007, "step": 2188 }, { "epoch": 4.16, "grad_norm": 0.5668672919273376, "learning_rate": 4.316642884807687e-06, "loss": 0.0034, "step": 2189 }, { "epoch": 4.16, "grad_norm": 0.3763357400894165, "learning_rate": 4.297569321380765e-06, "loss": 0.004, "step": 2190 }, { "epoch": 4.17, "grad_norm": 0.5797121524810791, "learning_rate": 4.278534738953276e-06, "loss": 0.0035, "step": 2191 }, { "epoch": 4.17, "grad_norm": 0.2249254286289215, "learning_rate": 4.259539166393579e-06, "loss": 0.0034, "step": 2192 }, { "epoch": 4.17, "grad_norm": 0.29160207509994507, "learning_rate": 4.2405826325108695e-06, "loss": 0.0031, "step": 2193 }, { "epoch": 4.17, "grad_norm": 0.09755288809537888, "learning_rate": 4.221665166055137e-06, "loss": 0.0011, "step": 2194 }, { "epoch": 4.17, "grad_norm": 0.16026374697685242, "learning_rate": 4.202786795717135e-06, "loss": 0.0011, "step": 2195 }, { "epoch": 4.17, "grad_norm": 0.04046407341957092, "learning_rate": 4.1839475501282965e-06, "loss": 0.0006, "step": 2196 }, { "epoch": 4.18, "grad_norm": 0.6314577460289001, "learning_rate": 4.165147457860729e-06, "loss": 0.0045, "step": 2197 }, { "epoch": 4.18, "grad_norm": 0.2245558798313141, "learning_rate": 4.14638654742716e-06, "loss": 0.0009, "step": 2198 }, { "epoch": 4.18, "grad_norm": 0.1802760660648346, "learning_rate": 4.127664847280898e-06, "loss": 0.0013, "step": 2199 }, { "epoch": 4.18, "grad_norm": 0.10451976209878922, "learning_rate": 4.108982385815776e-06, "loss": 0.0009, "step": 2200 }, { "epoch": 4.18, "eval_blimp_filtered_avg": 0.7350746268656716, "eval_blimp_filtered_std": 0.004878228749552333, "step": 2200 }, { "epoch": 4.18, "eval_blimp_supplement_avg": 0.7693965517241379, "eval_blimp_supplement_std": 0.0178764236895297, "step": 2200 }, { "epoch": 4.18, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 2200 }, { "epoch": 4.18, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 2200 }, { "epoch": 4.18, "grad_norm": 0.07488784939050674, "learning_rate": 4.090339191366121e-06, "loss": 0.0008, "step": 2201 }, { "epoch": 4.19, "grad_norm": 0.131003275513649, "learning_rate": 4.071735292206696e-06, "loss": 0.0013, "step": 2202 }, { "epoch": 4.19, "grad_norm": 0.1794111579656601, "learning_rate": 4.053170716552681e-06, "loss": 0.0009, "step": 2203 }, { "epoch": 4.19, "grad_norm": 0.1275775134563446, "learning_rate": 4.034645492559609e-06, "loss": 0.0011, "step": 2204 }, { "epoch": 4.19, "grad_norm": 0.16457538306713104, "learning_rate": 4.016159648323342e-06, "loss": 0.0018, "step": 2205 }, { "epoch": 4.19, "grad_norm": 0.06506211310625076, "learning_rate": 3.9977132118800075e-06, "loss": 0.0005, "step": 2206 }, { "epoch": 4.2, "grad_norm": 0.04938840493559837, "learning_rate": 3.979306211205963e-06, "loss": 0.0006, "step": 2207 }, { "epoch": 4.2, "grad_norm": 0.3126935064792633, "learning_rate": 3.960938674217758e-06, "loss": 0.0019, "step": 2208 }, { "epoch": 4.2, "grad_norm": 0.12894806265830994, "learning_rate": 3.942610628772112e-06, "loss": 0.0007, "step": 2209 }, { "epoch": 4.2, "grad_norm": 0.10940194129943848, "learning_rate": 3.92432210266582e-06, "loss": 0.0009, "step": 2210 }, { "epoch": 4.2, "grad_norm": 2.056753635406494, "learning_rate": 3.90607312363575e-06, "loss": 0.0247, "step": 2211 }, { "epoch": 4.21, "grad_norm": 0.16192509233951569, "learning_rate": 3.88786371935881e-06, "loss": 0.0018, "step": 2212 }, { "epoch": 4.21, "grad_norm": 0.3906192183494568, "learning_rate": 3.869693917451863e-06, "loss": 0.0028, "step": 2213 }, { "epoch": 4.21, "grad_norm": 0.1911083161830902, "learning_rate": 3.851563745471727e-06, "loss": 0.0018, "step": 2214 }, { "epoch": 4.21, "grad_norm": 0.3106186091899872, "learning_rate": 3.8334732309151025e-06, "loss": 0.0054, "step": 2215 }, { "epoch": 4.21, "grad_norm": 0.2753145098686218, "learning_rate": 3.815422401218556e-06, "loss": 0.0037, "step": 2216 }, { "epoch": 4.21, "grad_norm": 0.33287522196769714, "learning_rate": 3.797411283758455e-06, "loss": 0.0048, "step": 2217 }, { "epoch": 4.22, "grad_norm": 0.37221452593803406, "learning_rate": 3.7794399058509564e-06, "loss": 0.0026, "step": 2218 }, { "epoch": 4.22, "grad_norm": 0.10136180371046066, "learning_rate": 3.7615082947519375e-06, "loss": 0.0013, "step": 2219 }, { "epoch": 4.22, "grad_norm": 0.11359865218400955, "learning_rate": 3.7436164776569537e-06, "loss": 0.0009, "step": 2220 }, { "epoch": 4.22, "grad_norm": 0.43286362290382385, "learning_rate": 3.7257644817012194e-06, "loss": 0.0033, "step": 2221 }, { "epoch": 4.22, "grad_norm": 0.12314853817224503, "learning_rate": 3.70795233395956e-06, "loss": 0.0009, "step": 2222 }, { "epoch": 4.23, "grad_norm": 0.15389223396778107, "learning_rate": 3.690180061446353e-06, "loss": 0.0009, "step": 2223 }, { "epoch": 4.23, "grad_norm": 0.12354395538568497, "learning_rate": 3.6724476911155112e-06, "loss": 0.001, "step": 2224 }, { "epoch": 4.23, "grad_norm": 0.22780930995941162, "learning_rate": 3.6547552498604185e-06, "loss": 0.0016, "step": 2225 }, { "epoch": 4.23, "grad_norm": 0.7230266332626343, "learning_rate": 3.6371027645139145e-06, "loss": 0.0024, "step": 2226 }, { "epoch": 4.23, "grad_norm": 0.2772524952888489, "learning_rate": 3.6194902618482264e-06, "loss": 0.0021, "step": 2227 }, { "epoch": 4.24, "grad_norm": 0.2248557209968567, "learning_rate": 3.6019177685749603e-06, "loss": 0.0024, "step": 2228 }, { "epoch": 4.24, "grad_norm": 0.13710734248161316, "learning_rate": 3.584385311345029e-06, "loss": 0.001, "step": 2229 }, { "epoch": 4.24, "grad_norm": 0.14277781546115875, "learning_rate": 3.5668929167486274e-06, "loss": 0.0017, "step": 2230 }, { "epoch": 4.24, "grad_norm": 0.20011688768863678, "learning_rate": 3.549440611315199e-06, "loss": 0.0012, "step": 2231 }, { "epoch": 4.24, "grad_norm": 0.40033504366874695, "learning_rate": 3.532028421513377e-06, "loss": 0.0019, "step": 2232 }, { "epoch": 4.25, "grad_norm": 0.2618328630924225, "learning_rate": 3.5146563737509574e-06, "loss": 0.002, "step": 2233 }, { "epoch": 4.25, "grad_norm": 0.07014365494251251, "learning_rate": 3.497324494374855e-06, "loss": 0.0005, "step": 2234 }, { "epoch": 4.25, "grad_norm": 0.7007044553756714, "learning_rate": 3.48003280967107e-06, "loss": 0.0046, "step": 2235 }, { "epoch": 4.25, "grad_norm": 0.3133013844490051, "learning_rate": 3.4627813458646374e-06, "loss": 0.0018, "step": 2236 }, { "epoch": 4.25, "grad_norm": 0.20324303209781647, "learning_rate": 3.4455701291195884e-06, "loss": 0.0018, "step": 2237 }, { "epoch": 4.25, "grad_norm": 0.15622930228710175, "learning_rate": 3.428399185538923e-06, "loss": 0.0012, "step": 2238 }, { "epoch": 4.26, "grad_norm": 0.07019805908203125, "learning_rate": 3.4112685411645526e-06, "loss": 0.0006, "step": 2239 }, { "epoch": 4.26, "grad_norm": 0.06915505230426788, "learning_rate": 3.394178221977272e-06, "loss": 0.0006, "step": 2240 }, { "epoch": 4.26, "grad_norm": 0.1401248574256897, "learning_rate": 3.3771282538967306e-06, "loss": 0.0014, "step": 2241 }, { "epoch": 4.26, "grad_norm": 0.23029322922229767, "learning_rate": 3.360118662781364e-06, "loss": 0.0015, "step": 2242 }, { "epoch": 4.26, "grad_norm": 0.09618336707353592, "learning_rate": 3.3431494744283665e-06, "loss": 0.0012, "step": 2243 }, { "epoch": 4.27, "grad_norm": 0.1664588451385498, "learning_rate": 3.3262207145736878e-06, "loss": 0.0014, "step": 2244 }, { "epoch": 4.27, "grad_norm": 0.33928313851356506, "learning_rate": 3.3093324088919274e-06, "loss": 0.0021, "step": 2245 }, { "epoch": 4.27, "grad_norm": 0.6191930770874023, "learning_rate": 3.2924845829963466e-06, "loss": 0.005, "step": 2246 }, { "epoch": 4.27, "grad_norm": 0.08277377486228943, "learning_rate": 3.2756772624388166e-06, "loss": 0.0007, "step": 2247 }, { "epoch": 4.27, "grad_norm": 0.43681275844573975, "learning_rate": 3.2589104727097697e-06, "loss": 0.0019, "step": 2248 }, { "epoch": 4.28, "grad_norm": 0.25761955976486206, "learning_rate": 3.242184239238172e-06, "loss": 0.0013, "step": 2249 }, { "epoch": 4.28, "grad_norm": 0.2568921446800232, "learning_rate": 3.2254985873914744e-06, "loss": 0.0022, "step": 2250 }, { "epoch": 4.28, "grad_norm": 0.09449932724237442, "learning_rate": 3.208853542475598e-06, "loss": 0.001, "step": 2251 }, { "epoch": 4.28, "grad_norm": 1.1501524448394775, "learning_rate": 3.19224912973486e-06, "loss": 0.0032, "step": 2252 }, { "epoch": 4.28, "grad_norm": 0.22815896570682526, "learning_rate": 3.1756853743519587e-06, "loss": 0.0007, "step": 2253 }, { "epoch": 4.29, "grad_norm": 0.3710043430328369, "learning_rate": 3.1591623014479365e-06, "loss": 0.0026, "step": 2254 }, { "epoch": 4.29, "grad_norm": 0.3456280529499054, "learning_rate": 3.1426799360821313e-06, "loss": 0.0013, "step": 2255 }, { "epoch": 4.29, "grad_norm": 0.5340463519096375, "learning_rate": 3.1262383032521434e-06, "loss": 0.0028, "step": 2256 }, { "epoch": 4.29, "grad_norm": 0.03505891561508179, "learning_rate": 3.109837427893787e-06, "loss": 0.0003, "step": 2257 }, { "epoch": 4.29, "grad_norm": 0.1148778423666954, "learning_rate": 3.093477334881084e-06, "loss": 0.0034, "step": 2258 }, { "epoch": 4.29, "grad_norm": 0.5770148634910583, "learning_rate": 3.0771580490261874e-06, "loss": 0.0029, "step": 2259 }, { "epoch": 4.3, "grad_norm": 0.23489557206630707, "learning_rate": 3.0608795950793687e-06, "loss": 0.0033, "step": 2260 }, { "epoch": 4.3, "grad_norm": 0.3386395275592804, "learning_rate": 3.0446419977289665e-06, "loss": 0.002, "step": 2261 }, { "epoch": 4.3, "grad_norm": 0.1545574963092804, "learning_rate": 3.0284452816013607e-06, "loss": 0.001, "step": 2262 }, { "epoch": 4.3, "grad_norm": 0.5714330673217773, "learning_rate": 3.012289471260923e-06, "loss": 0.0047, "step": 2263 }, { "epoch": 4.3, "grad_norm": 0.09166965633630753, "learning_rate": 2.996174591209999e-06, "loss": 0.0009, "step": 2264 }, { "epoch": 4.31, "grad_norm": 0.15859735012054443, "learning_rate": 2.980100665888846e-06, "loss": 0.0008, "step": 2265 }, { "epoch": 4.31, "grad_norm": 0.09787857532501221, "learning_rate": 2.9640677196756104e-06, "loss": 0.001, "step": 2266 }, { "epoch": 4.31, "grad_norm": 0.25462406873703003, "learning_rate": 2.9480757768862976e-06, "loss": 0.0021, "step": 2267 }, { "epoch": 4.31, "grad_norm": 0.09256923943758011, "learning_rate": 2.9321248617747164e-06, "loss": 0.0011, "step": 2268 }, { "epoch": 4.31, "grad_norm": 0.2532057762145996, "learning_rate": 2.9162149985324527e-06, "loss": 0.0033, "step": 2269 }, { "epoch": 4.32, "grad_norm": 0.505912184715271, "learning_rate": 2.9003462112888344e-06, "loss": 0.0027, "step": 2270 }, { "epoch": 4.32, "grad_norm": 0.3633926212787628, "learning_rate": 2.884518524110894e-06, "loss": 0.0028, "step": 2271 }, { "epoch": 4.32, "grad_norm": 0.057133954018354416, "learning_rate": 2.8687319610033292e-06, "loss": 0.0006, "step": 2272 }, { "epoch": 4.32, "grad_norm": 0.2013535052537918, "learning_rate": 2.852986545908458e-06, "loss": 0.0012, "step": 2273 }, { "epoch": 4.32, "grad_norm": 0.21683816611766815, "learning_rate": 2.8372823027062123e-06, "loss": 0.0019, "step": 2274 }, { "epoch": 4.33, "grad_norm": 0.25325772166252136, "learning_rate": 2.8216192552140697e-06, "loss": 0.0026, "step": 2275 }, { "epoch": 4.33, "grad_norm": 0.1974702775478363, "learning_rate": 2.805997427187025e-06, "loss": 0.0009, "step": 2276 }, { "epoch": 4.33, "grad_norm": 0.14971096813678741, "learning_rate": 2.790416842317569e-06, "loss": 0.0009, "step": 2277 }, { "epoch": 4.33, "grad_norm": 0.4378780424594879, "learning_rate": 2.774877524235637e-06, "loss": 0.0026, "step": 2278 }, { "epoch": 4.33, "grad_norm": 0.20723019540309906, "learning_rate": 2.7593794965085674e-06, "loss": 0.0011, "step": 2279 }, { "epoch": 4.33, "grad_norm": 0.1812250018119812, "learning_rate": 2.743922782641104e-06, "loss": 0.0009, "step": 2280 }, { "epoch": 4.34, "grad_norm": 0.1105412095785141, "learning_rate": 2.7285074060753057e-06, "loss": 0.0005, "step": 2281 }, { "epoch": 4.34, "grad_norm": 0.166802316904068, "learning_rate": 2.7131333901905508e-06, "loss": 0.0006, "step": 2282 }, { "epoch": 4.34, "grad_norm": 0.21133564412593842, "learning_rate": 2.6978007583034845e-06, "loss": 0.0012, "step": 2283 }, { "epoch": 4.34, "grad_norm": 0.1005329042673111, "learning_rate": 2.6825095336679896e-06, "loss": 0.0011, "step": 2284 }, { "epoch": 4.34, "grad_norm": 0.017541740089654922, "learning_rate": 2.6672597394751508e-06, "loss": 0.0002, "step": 2285 }, { "epoch": 4.35, "grad_norm": 0.24828095734119415, "learning_rate": 2.6520513988532113e-06, "loss": 0.0009, "step": 2286 }, { "epoch": 4.35, "grad_norm": 0.2944730818271637, "learning_rate": 2.636884534867564e-06, "loss": 0.0025, "step": 2287 }, { "epoch": 4.35, "grad_norm": 0.4525836110115051, "learning_rate": 2.6217591705206758e-06, "loss": 0.0041, "step": 2288 }, { "epoch": 4.35, "grad_norm": 0.2006378173828125, "learning_rate": 2.6066753287520728e-06, "loss": 0.0017, "step": 2289 }, { "epoch": 4.35, "grad_norm": 0.6250471472740173, "learning_rate": 2.5916330324383364e-06, "loss": 0.0018, "step": 2290 }, { "epoch": 4.36, "grad_norm": 0.30300450325012207, "learning_rate": 2.576632304393004e-06, "loss": 0.0029, "step": 2291 }, { "epoch": 4.36, "grad_norm": 0.0917062982916832, "learning_rate": 2.5616731673665905e-06, "loss": 0.001, "step": 2292 }, { "epoch": 4.36, "grad_norm": 0.052613791078329086, "learning_rate": 2.5467556440465245e-06, "loss": 0.0008, "step": 2293 }, { "epoch": 4.36, "grad_norm": 0.04219425097107887, "learning_rate": 2.531879757057125e-06, "loss": 0.0004, "step": 2294 }, { "epoch": 4.36, "grad_norm": 0.11975749582052231, "learning_rate": 2.5170455289595585e-06, "loss": 0.0005, "step": 2295 }, { "epoch": 4.37, "grad_norm": 0.36920997500419617, "learning_rate": 2.502252982251818e-06, "loss": 0.0026, "step": 2296 }, { "epoch": 4.37, "grad_norm": 0.21274226903915405, "learning_rate": 2.4875021393686848e-06, "loss": 0.0019, "step": 2297 }, { "epoch": 4.37, "grad_norm": 0.11421352624893188, "learning_rate": 2.472793022681676e-06, "loss": 0.0015, "step": 2298 }, { "epoch": 4.37, "grad_norm": 0.2699141800403595, "learning_rate": 2.4581256544990383e-06, "loss": 0.0012, "step": 2299 }, { "epoch": 4.37, "grad_norm": 0.23244938254356384, "learning_rate": 2.4435000570656998e-06, "loss": 0.0018, "step": 2300 }, { "epoch": 4.37, "eval_blimp_filtered_avg": 0.735820895522388, "eval_blimp_filtered_std": 0.004876544831833769, "step": 2300 }, { "epoch": 4.37, "eval_blimp_supplement_avg": 0.771551724137931, "eval_blimp_supplement_std": 0.01785408390039854, "step": 2300 }, { "epoch": 4.37, "eval_vqa_filtered_avg": 0.31, "eval_vqa_filtered_std": 0.04648231987117316, "step": 2300 }, { "epoch": 4.37, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 2300 }, { "epoch": 4.37, "grad_norm": 0.3278847336769104, "learning_rate": 2.428916252563237e-06, "loss": 0.0031, "step": 2301 }, { "epoch": 4.38, "grad_norm": 0.20433160662651062, "learning_rate": 2.414374263109831e-06, "loss": 0.002, "step": 2302 }, { "epoch": 4.38, "grad_norm": 0.07027294486761093, "learning_rate": 2.3998741107602697e-06, "loss": 0.0008, "step": 2303 }, { "epoch": 4.38, "grad_norm": 0.489435076713562, "learning_rate": 2.385415817505868e-06, "loss": 0.0031, "step": 2304 }, { "epoch": 4.38, "grad_norm": 0.5638420581817627, "learning_rate": 2.37099940527446e-06, "loss": 0.002, "step": 2305 }, { "epoch": 4.38, "grad_norm": 0.3394109010696411, "learning_rate": 2.3566248959303715e-06, "loss": 0.0029, "step": 2306 }, { "epoch": 4.39, "grad_norm": 0.2899934947490692, "learning_rate": 2.3422923112743677e-06, "loss": 0.0021, "step": 2307 }, { "epoch": 4.39, "grad_norm": 0.4708399474620819, "learning_rate": 2.3280016730436337e-06, "loss": 0.0018, "step": 2308 }, { "epoch": 4.39, "grad_norm": 0.08708974719047546, "learning_rate": 2.3137530029117295e-06, "loss": 0.0008, "step": 2309 }, { "epoch": 4.39, "grad_norm": 0.7561993598937988, "learning_rate": 2.299546322488585e-06, "loss": 0.007, "step": 2310 }, { "epoch": 4.39, "grad_norm": 0.3716883361339569, "learning_rate": 2.2853816533204288e-06, "loss": 0.0022, "step": 2311 }, { "epoch": 4.4, "grad_norm": 0.17910340428352356, "learning_rate": 2.271259016889776e-06, "loss": 0.0014, "step": 2312 }, { "epoch": 4.4, "grad_norm": 0.14461778104305267, "learning_rate": 2.2571784346154047e-06, "loss": 0.0015, "step": 2313 }, { "epoch": 4.4, "grad_norm": 0.14999932050704956, "learning_rate": 2.243139927852308e-06, "loss": 0.0012, "step": 2314 }, { "epoch": 4.4, "grad_norm": 0.4657723009586334, "learning_rate": 2.2291435178916552e-06, "loss": 0.0043, "step": 2315 }, { "epoch": 4.4, "grad_norm": 0.2393365502357483, "learning_rate": 2.2151892259607833e-06, "loss": 0.0009, "step": 2316 }, { "epoch": 4.4, "grad_norm": 0.5149463415145874, "learning_rate": 2.2012770732231493e-06, "loss": 0.0136, "step": 2317 }, { "epoch": 4.41, "grad_norm": 0.6978269815444946, "learning_rate": 2.1874070807782952e-06, "loss": 0.0049, "step": 2318 }, { "epoch": 4.41, "grad_norm": 0.1278958022594452, "learning_rate": 2.173579269661822e-06, "loss": 0.0008, "step": 2319 }, { "epoch": 4.41, "grad_norm": 0.0966443344950676, "learning_rate": 2.1597936608453727e-06, "loss": 0.0007, "step": 2320 }, { "epoch": 4.41, "grad_norm": 0.10803236812353134, "learning_rate": 2.1460502752365672e-06, "loss": 0.0012, "step": 2321 }, { "epoch": 4.41, "grad_norm": 0.2597060799598694, "learning_rate": 2.132349133678989e-06, "loss": 0.0022, "step": 2322 }, { "epoch": 4.42, "grad_norm": 0.22782136499881744, "learning_rate": 2.118690256952169e-06, "loss": 0.0019, "step": 2323 }, { "epoch": 4.42, "grad_norm": 0.13722477853298187, "learning_rate": 2.1050736657715242e-06, "loss": 0.001, "step": 2324 }, { "epoch": 4.42, "grad_norm": 0.3209693133831024, "learning_rate": 2.0914993807883375e-06, "loss": 0.0024, "step": 2325 }, { "epoch": 4.42, "grad_norm": 0.4220280945301056, "learning_rate": 2.077967422589745e-06, "loss": 0.0032, "step": 2326 }, { "epoch": 4.42, "grad_norm": 0.26442980766296387, "learning_rate": 2.0644778116986798e-06, "loss": 0.0015, "step": 2327 }, { "epoch": 4.43, "grad_norm": 0.2073400616645813, "learning_rate": 2.0510305685738397e-06, "loss": 0.0011, "step": 2328 }, { "epoch": 4.43, "grad_norm": 0.08138926327228546, "learning_rate": 2.0376257136096843e-06, "loss": 0.0007, "step": 2329 }, { "epoch": 4.43, "grad_norm": 0.2356102168560028, "learning_rate": 2.024263267136375e-06, "loss": 0.0021, "step": 2330 }, { "epoch": 4.43, "grad_norm": 0.28146153688430786, "learning_rate": 2.01094324941976e-06, "loss": 0.0011, "step": 2331 }, { "epoch": 4.43, "grad_norm": 0.1627344787120819, "learning_rate": 1.9976656806613336e-06, "loss": 0.0025, "step": 2332 }, { "epoch": 4.44, "grad_norm": 0.09014760702848434, "learning_rate": 1.9844305809982177e-06, "loss": 0.001, "step": 2333 }, { "epoch": 4.44, "grad_norm": 0.7490527033805847, "learning_rate": 1.9712379705031225e-06, "loss": 0.0058, "step": 2334 }, { "epoch": 4.44, "grad_norm": 0.35208097100257874, "learning_rate": 1.958087869184313e-06, "loss": 0.0027, "step": 2335 }, { "epoch": 4.44, "grad_norm": 0.37949320673942566, "learning_rate": 1.9449802969855924e-06, "loss": 0.0019, "step": 2336 }, { "epoch": 4.44, "grad_norm": 0.23057514429092407, "learning_rate": 1.9319152737862557e-06, "loss": 0.0034, "step": 2337 }, { "epoch": 4.44, "grad_norm": 0.3224025070667267, "learning_rate": 1.918892819401069e-06, "loss": 0.0038, "step": 2338 }, { "epoch": 4.45, "grad_norm": 0.1443626433610916, "learning_rate": 1.9059129535802378e-06, "loss": 0.0014, "step": 2339 }, { "epoch": 4.45, "grad_norm": 0.05364913493394852, "learning_rate": 1.8929756960093748e-06, "loss": 0.0004, "step": 2340 }, { "epoch": 4.45, "grad_norm": 0.05624647065997124, "learning_rate": 1.8800810663094746e-06, "loss": 0.0005, "step": 2341 }, { "epoch": 4.45, "grad_norm": 0.6527302861213684, "learning_rate": 1.8672290840368844e-06, "loss": 0.0064, "step": 2342 }, { "epoch": 4.45, "grad_norm": 0.08485785871744156, "learning_rate": 1.8544197686832654e-06, "loss": 0.0008, "step": 2343 }, { "epoch": 4.46, "grad_norm": 0.06775938719511032, "learning_rate": 1.8416531396755677e-06, "loss": 0.0006, "step": 2344 }, { "epoch": 4.46, "grad_norm": 0.19240416586399078, "learning_rate": 1.8289292163760029e-06, "loss": 0.0018, "step": 2345 }, { "epoch": 4.46, "grad_norm": 0.19794131815433502, "learning_rate": 1.8162480180820218e-06, "loss": 0.0025, "step": 2346 }, { "epoch": 4.46, "grad_norm": 0.31101974844932556, "learning_rate": 1.8036095640262728e-06, "loss": 0.0014, "step": 2347 }, { "epoch": 4.46, "grad_norm": 0.24028553068637848, "learning_rate": 1.7910138733765668e-06, "loss": 0.002, "step": 2348 }, { "epoch": 4.47, "grad_norm": 0.7509512901306152, "learning_rate": 1.778460965235883e-06, "loss": 0.0027, "step": 2349 }, { "epoch": 4.47, "grad_norm": 0.0803212970495224, "learning_rate": 1.7659508586422858e-06, "loss": 0.0009, "step": 2350 }, { "epoch": 4.47, "grad_norm": 0.5486602783203125, "learning_rate": 1.7534835725689513e-06, "loss": 0.003, "step": 2351 }, { "epoch": 4.47, "grad_norm": 0.2206975668668747, "learning_rate": 1.7410591259240916e-06, "loss": 0.0014, "step": 2352 }, { "epoch": 4.47, "grad_norm": 0.29476413130760193, "learning_rate": 1.7286775375509668e-06, "loss": 0.0014, "step": 2353 }, { "epoch": 4.48, "grad_norm": 0.23067010939121246, "learning_rate": 1.7163388262278157e-06, "loss": 0.0028, "step": 2354 }, { "epoch": 4.48, "grad_norm": 0.1426241099834442, "learning_rate": 1.7040430106678728e-06, "loss": 0.0017, "step": 2355 }, { "epoch": 4.48, "grad_norm": 0.18827100098133087, "learning_rate": 1.6917901095193011e-06, "loss": 0.001, "step": 2356 }, { "epoch": 4.48, "grad_norm": 0.4491666257381439, "learning_rate": 1.6795801413651756e-06, "loss": 0.0043, "step": 2357 }, { "epoch": 4.48, "grad_norm": 0.7452276349067688, "learning_rate": 1.6674131247234636e-06, "loss": 0.0069, "step": 2358 }, { "epoch": 4.48, "grad_norm": 0.8492374420166016, "learning_rate": 1.6552890780469977e-06, "loss": 0.0059, "step": 2359 }, { "epoch": 4.49, "grad_norm": 0.1093207523226738, "learning_rate": 1.643208019723429e-06, "loss": 0.001, "step": 2360 }, { "epoch": 4.49, "grad_norm": 0.12802806496620178, "learning_rate": 1.6311699680752179e-06, "loss": 0.0007, "step": 2361 }, { "epoch": 4.49, "grad_norm": 0.14585329592227936, "learning_rate": 1.619174941359597e-06, "loss": 0.001, "step": 2362 }, { "epoch": 4.49, "grad_norm": 0.2742142677307129, "learning_rate": 1.607222957768547e-06, "loss": 0.0021, "step": 2363 }, { "epoch": 4.49, "grad_norm": 0.026210980489850044, "learning_rate": 1.5953140354287655e-06, "loss": 0.0003, "step": 2364 }, { "epoch": 4.5, "grad_norm": 0.12615147233009338, "learning_rate": 1.5834481924016542e-06, "loss": 0.0017, "step": 2365 }, { "epoch": 4.5, "grad_norm": 0.19228467345237732, "learning_rate": 1.5716254466832613e-06, "loss": 0.0009, "step": 2366 }, { "epoch": 4.5, "grad_norm": 0.3419423997402191, "learning_rate": 1.5598458162042872e-06, "loss": 0.0015, "step": 2367 }, { "epoch": 4.5, "grad_norm": 0.22453303635120392, "learning_rate": 1.5481093188300344e-06, "loss": 0.0016, "step": 2368 }, { "epoch": 4.5, "grad_norm": 0.24328398704528809, "learning_rate": 1.5364159723603942e-06, "loss": 0.0013, "step": 2369 }, { "epoch": 4.51, "grad_norm": 0.4312683343887329, "learning_rate": 1.5247657945298077e-06, "loss": 0.0055, "step": 2370 }, { "epoch": 4.51, "grad_norm": 0.47260892391204834, "learning_rate": 1.5131588030072474e-06, "loss": 0.0039, "step": 2371 }, { "epoch": 4.51, "grad_norm": 0.3590221405029297, "learning_rate": 1.5015950153961988e-06, "loss": 0.0017, "step": 2372 }, { "epoch": 4.51, "grad_norm": 0.5437275171279907, "learning_rate": 1.490074449234603e-06, "loss": 0.0046, "step": 2373 }, { "epoch": 4.51, "grad_norm": 0.13215738534927368, "learning_rate": 1.4785971219948702e-06, "loss": 0.0011, "step": 2374 }, { "epoch": 4.52, "grad_norm": 0.30088549852371216, "learning_rate": 1.4671630510838196e-06, "loss": 0.0012, "step": 2375 }, { "epoch": 4.52, "grad_norm": 0.4471828043460846, "learning_rate": 1.4557722538426698e-06, "loss": 0.0043, "step": 2376 }, { "epoch": 4.52, "grad_norm": 0.06588437408208847, "learning_rate": 1.4444247475470117e-06, "loss": 0.0006, "step": 2377 }, { "epoch": 4.52, "grad_norm": 0.3304627239704132, "learning_rate": 1.4331205494067857e-06, "loss": 0.0011, "step": 2378 }, { "epoch": 4.52, "grad_norm": 0.4730941653251648, "learning_rate": 1.4218596765662372e-06, "loss": 0.0022, "step": 2379 }, { "epoch": 4.52, "grad_norm": 0.2745803892612457, "learning_rate": 1.410642146103912e-06, "loss": 0.0016, "step": 2380 }, { "epoch": 4.53, "grad_norm": 0.3122328221797943, "learning_rate": 1.3994679750326145e-06, "loss": 0.0034, "step": 2381 }, { "epoch": 4.53, "grad_norm": 0.2697027325630188, "learning_rate": 1.3883371802994017e-06, "loss": 0.0013, "step": 2382 }, { "epoch": 4.53, "grad_norm": 0.7577111124992371, "learning_rate": 1.3772497787855331e-06, "loss": 0.0025, "step": 2383 }, { "epoch": 4.53, "grad_norm": 0.14535383880138397, "learning_rate": 1.3662057873064615e-06, "loss": 0.0011, "step": 2384 }, { "epoch": 4.53, "grad_norm": 0.17643114924430847, "learning_rate": 1.3552052226118018e-06, "loss": 0.0009, "step": 2385 }, { "epoch": 4.54, "grad_norm": 0.2357095628976822, "learning_rate": 1.3442481013853048e-06, "loss": 0.001, "step": 2386 }, { "epoch": 4.54, "grad_norm": 0.26036277413368225, "learning_rate": 1.3333344402448378e-06, "loss": 0.001, "step": 2387 }, { "epoch": 4.54, "grad_norm": 0.728064239025116, "learning_rate": 1.322464255742354e-06, "loss": 0.0038, "step": 2388 }, { "epoch": 4.54, "grad_norm": 0.14768853783607483, "learning_rate": 1.311637564363869e-06, "loss": 0.0009, "step": 2389 }, { "epoch": 4.54, "grad_norm": 0.1166086196899414, "learning_rate": 1.3008543825294316e-06, "loss": 0.0014, "step": 2390 }, { "epoch": 4.55, "grad_norm": 0.2811184227466583, "learning_rate": 1.29011472659311e-06, "loss": 0.0035, "step": 2391 }, { "epoch": 4.55, "grad_norm": 0.20858044922351837, "learning_rate": 1.2794186128429587e-06, "loss": 0.0072, "step": 2392 }, { "epoch": 4.55, "grad_norm": 0.16962888836860657, "learning_rate": 1.268766057500984e-06, "loss": 0.001, "step": 2393 }, { "epoch": 4.55, "grad_norm": 0.1812812238931656, "learning_rate": 1.2581570767231398e-06, "loss": 0.001, "step": 2394 }, { "epoch": 4.55, "grad_norm": 0.2835127115249634, "learning_rate": 1.247591686599302e-06, "loss": 0.0033, "step": 2395 }, { "epoch": 4.56, "grad_norm": 0.13727015256881714, "learning_rate": 1.2370699031532162e-06, "loss": 0.0017, "step": 2396 }, { "epoch": 4.56, "grad_norm": 0.13082189857959747, "learning_rate": 1.2265917423425077e-06, "loss": 0.0005, "step": 2397 }, { "epoch": 4.56, "grad_norm": 0.1884649395942688, "learning_rate": 1.2161572200586345e-06, "loss": 0.0014, "step": 2398 }, { "epoch": 4.56, "grad_norm": 0.29612240195274353, "learning_rate": 1.205766352126878e-06, "loss": 0.0008, "step": 2399 }, { "epoch": 4.56, "grad_norm": 0.49802684783935547, "learning_rate": 1.195419154306302e-06, "loss": 0.0019, "step": 2400 }, { "epoch": 4.56, "eval_blimp_filtered_avg": 0.7353731343283582, "eval_blimp_filtered_std": 0.004875229587591827, "step": 2400 }, { "epoch": 4.56, "eval_blimp_supplement_avg": 0.7737068965517241, "eval_blimp_supplement_std": 0.01779217497937174, "step": 2400 }, { "epoch": 4.56, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 2400 }, { "epoch": 4.56, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 2400 }, { "epoch": 4.56, "grad_norm": 0.16176338493824005, "learning_rate": 1.185115642289757e-06, "loss": 0.0016, "step": 2401 }, { "epoch": 4.57, "grad_norm": 0.328130841255188, "learning_rate": 1.1748558317038161e-06, "loss": 0.0018, "step": 2402 }, { "epoch": 4.57, "grad_norm": 0.24625727534294128, "learning_rate": 1.1646397381087893e-06, "loss": 0.001, "step": 2403 }, { "epoch": 4.57, "grad_norm": 0.24211180210113525, "learning_rate": 1.1544673769986825e-06, "loss": 0.0015, "step": 2404 }, { "epoch": 4.57, "grad_norm": 0.4728427529335022, "learning_rate": 1.1443387638011681e-06, "loss": 0.0021, "step": 2405 }, { "epoch": 4.57, "grad_norm": 0.08558090776205063, "learning_rate": 1.1342539138775787e-06, "loss": 0.0007, "step": 2406 }, { "epoch": 4.58, "grad_norm": 0.202053964138031, "learning_rate": 1.124212842522866e-06, "loss": 0.0008, "step": 2407 }, { "epoch": 4.58, "grad_norm": 0.3982681334018707, "learning_rate": 1.1142155649655883e-06, "loss": 0.0017, "step": 2408 }, { "epoch": 4.58, "grad_norm": 0.3338080942630768, "learning_rate": 1.1042620963678907e-06, "loss": 0.0032, "step": 2409 }, { "epoch": 4.58, "grad_norm": 0.13139772415161133, "learning_rate": 1.0943524518254644e-06, "loss": 0.0015, "step": 2410 }, { "epoch": 4.58, "grad_norm": 0.0735810324549675, "learning_rate": 1.084486646367554e-06, "loss": 0.0009, "step": 2411 }, { "epoch": 4.59, "grad_norm": 0.06594954431056976, "learning_rate": 1.0746646949569006e-06, "loss": 0.0007, "step": 2412 }, { "epoch": 4.59, "grad_norm": 0.30022093653678894, "learning_rate": 1.0648866124897382e-06, "loss": 0.0017, "step": 2413 }, { "epoch": 4.59, "grad_norm": 0.06074086204171181, "learning_rate": 1.0551524137957747e-06, "loss": 0.0004, "step": 2414 }, { "epoch": 4.59, "grad_norm": 0.2615915834903717, "learning_rate": 1.045462113638157e-06, "loss": 0.002, "step": 2415 }, { "epoch": 4.59, "grad_norm": 0.1418762058019638, "learning_rate": 1.035815726713456e-06, "loss": 0.0014, "step": 2416 }, { "epoch": 4.6, "grad_norm": 0.4479876160621643, "learning_rate": 1.026213267651639e-06, "loss": 0.0023, "step": 2417 }, { "epoch": 4.6, "grad_norm": 0.2596869170665741, "learning_rate": 1.0166547510160595e-06, "loss": 0.002, "step": 2418 }, { "epoch": 4.6, "grad_norm": 0.5878065228462219, "learning_rate": 1.0071401913034185e-06, "loss": 0.0018, "step": 2419 }, { "epoch": 4.6, "grad_norm": 0.17570051550865173, "learning_rate": 9.97669602943756e-07, "loss": 0.0009, "step": 2420 }, { "epoch": 4.6, "grad_norm": 0.20731766521930695, "learning_rate": 9.882430003004162e-07, "loss": 0.0017, "step": 2421 }, { "epoch": 4.6, "grad_norm": 0.4420585632324219, "learning_rate": 9.788603976700461e-07, "loss": 0.0033, "step": 2422 }, { "epoch": 4.61, "grad_norm": 0.6178790926933289, "learning_rate": 9.695218092825465e-07, "loss": 0.0031, "step": 2423 }, { "epoch": 4.61, "grad_norm": 0.20594890415668488, "learning_rate": 9.602272493010779e-07, "loss": 0.0022, "step": 2424 }, { "epoch": 4.61, "grad_norm": 0.12181002646684647, "learning_rate": 9.509767318220209e-07, "loss": 0.001, "step": 2425 }, { "epoch": 4.61, "grad_norm": 0.2186240404844284, "learning_rate": 9.417702708749531e-07, "loss": 0.0012, "step": 2426 }, { "epoch": 4.61, "grad_norm": 0.4868548810482025, "learning_rate": 9.32607880422649e-07, "loss": 0.0026, "step": 2427 }, { "epoch": 4.62, "grad_norm": 0.14514607191085815, "learning_rate": 9.23489574361036e-07, "loss": 0.0011, "step": 2428 }, { "epoch": 4.62, "grad_norm": 0.09579657018184662, "learning_rate": 9.14415366519179e-07, "loss": 0.0011, "step": 2429 }, { "epoch": 4.62, "grad_norm": 0.1572553664445877, "learning_rate": 9.053852706592725e-07, "loss": 0.001, "step": 2430 }, { "epoch": 4.62, "grad_norm": 0.3067115247249603, "learning_rate": 8.963993004765946e-07, "loss": 0.0014, "step": 2431 }, { "epoch": 4.62, "grad_norm": 0.9155110120773315, "learning_rate": 8.874574695995175e-07, "loss": 0.0041, "step": 2432 }, { "epoch": 4.63, "grad_norm": 0.6904696226119995, "learning_rate": 8.785597915894561e-07, "loss": 0.0047, "step": 2433 }, { "epoch": 4.63, "grad_norm": 0.45636120438575745, "learning_rate": 8.697062799408762e-07, "loss": 0.002, "step": 2434 }, { "epoch": 4.63, "grad_norm": 0.18764224648475647, "learning_rate": 8.608969480812468e-07, "loss": 0.0009, "step": 2435 }, { "epoch": 4.63, "grad_norm": 0.5872719883918762, "learning_rate": 8.521318093710373e-07, "loss": 0.0033, "step": 2436 }, { "epoch": 4.63, "grad_norm": 0.34531182050704956, "learning_rate": 8.434108771037042e-07, "loss": 0.0007, "step": 2437 }, { "epoch": 4.63, "grad_norm": 0.22775159776210785, "learning_rate": 8.347341645056372e-07, "loss": 0.0012, "step": 2438 }, { "epoch": 4.64, "grad_norm": 0.49454137682914734, "learning_rate": 8.261016847361736e-07, "loss": 0.0026, "step": 2439 }, { "epoch": 4.64, "grad_norm": 0.20178662240505219, "learning_rate": 8.175134508875703e-07, "loss": 0.0023, "step": 2440 }, { "epoch": 4.64, "grad_norm": 0.1483611911535263, "learning_rate": 8.08969475984972e-07, "loss": 0.0015, "step": 2441 }, { "epoch": 4.64, "grad_norm": 0.6181549429893494, "learning_rate": 8.004697729863997e-07, "loss": 0.0033, "step": 2442 }, { "epoch": 4.64, "grad_norm": 0.28629112243652344, "learning_rate": 7.920143547827352e-07, "loss": 0.0015, "step": 2443 }, { "epoch": 4.65, "grad_norm": 0.4971417486667633, "learning_rate": 7.836032341976907e-07, "loss": 0.0029, "step": 2444 }, { "epoch": 4.65, "grad_norm": 0.10344850271940231, "learning_rate": 7.752364239878018e-07, "loss": 0.0009, "step": 2445 }, { "epoch": 4.65, "grad_norm": 0.44091737270355225, "learning_rate": 7.669139368423916e-07, "loss": 0.0023, "step": 2446 }, { "epoch": 4.65, "grad_norm": 0.2271566390991211, "learning_rate": 7.586357853835768e-07, "loss": 0.0022, "step": 2447 }, { "epoch": 4.65, "grad_norm": 0.3080141544342041, "learning_rate": 7.504019821662211e-07, "loss": 0.0012, "step": 2448 }, { "epoch": 4.66, "grad_norm": 0.20020586252212524, "learning_rate": 7.422125396779323e-07, "loss": 0.002, "step": 2449 }, { "epoch": 4.66, "grad_norm": 0.30571305751800537, "learning_rate": 7.340674703390415e-07, "loss": 0.002, "step": 2450 }, { "epoch": 4.66, "grad_norm": 0.21792994439601898, "learning_rate": 7.259667865025776e-07, "loss": 0.0015, "step": 2451 }, { "epoch": 4.66, "grad_norm": 0.31306275725364685, "learning_rate": 7.179105004542629e-07, "loss": 0.0018, "step": 2452 }, { "epoch": 4.66, "grad_norm": 0.15107539296150208, "learning_rate": 7.098986244124706e-07, "loss": 0.0018, "step": 2453 }, { "epoch": 4.67, "grad_norm": 0.1174221783876419, "learning_rate": 7.019311705282305e-07, "loss": 0.0008, "step": 2454 }, { "epoch": 4.67, "grad_norm": 0.16680720448493958, "learning_rate": 6.940081508851969e-07, "loss": 0.0012, "step": 2455 }, { "epoch": 4.67, "grad_norm": 0.1376991868019104, "learning_rate": 6.861295774996346e-07, "loss": 0.0004, "step": 2456 }, { "epoch": 4.67, "grad_norm": 0.23229040205478668, "learning_rate": 6.782954623204052e-07, "loss": 0.0031, "step": 2457 }, { "epoch": 4.67, "grad_norm": 0.6541776061058044, "learning_rate": 6.705058172289347e-07, "loss": 0.003, "step": 2458 }, { "epoch": 4.67, "grad_norm": 0.27930083870887756, "learning_rate": 6.627606540392095e-07, "loss": 0.0025, "step": 2459 }, { "epoch": 4.68, "grad_norm": 0.04029128700494766, "learning_rate": 6.550599844977534e-07, "loss": 0.0003, "step": 2460 }, { "epoch": 4.68, "grad_norm": 0.2456011325120926, "learning_rate": 6.474038202836108e-07, "loss": 0.0032, "step": 2461 }, { "epoch": 4.68, "grad_norm": 0.28489720821380615, "learning_rate": 6.397921730083234e-07, "loss": 0.0017, "step": 2462 }, { "epoch": 4.68, "grad_norm": 0.11701435595750809, "learning_rate": 6.322250542159269e-07, "loss": 0.001, "step": 2463 }, { "epoch": 4.68, "grad_norm": 2.7472336292266846, "learning_rate": 6.247024753829111e-07, "loss": 0.0062, "step": 2464 }, { "epoch": 4.69, "grad_norm": 0.2147785723209381, "learning_rate": 6.172244479182232e-07, "loss": 0.0019, "step": 2465 }, { "epoch": 4.69, "grad_norm": 0.26832616329193115, "learning_rate": 6.097909831632476e-07, "loss": 0.0038, "step": 2466 }, { "epoch": 4.69, "grad_norm": 0.5398795008659363, "learning_rate": 6.024020923917695e-07, "loss": 0.0017, "step": 2467 }, { "epoch": 4.69, "grad_norm": 1.2443073987960815, "learning_rate": 5.950577868099816e-07, "loss": 0.003, "step": 2468 }, { "epoch": 4.69, "grad_norm": 0.1274510771036148, "learning_rate": 5.877580775564539e-07, "loss": 0.0013, "step": 2469 }, { "epoch": 4.7, "grad_norm": 0.12470702081918716, "learning_rate": 5.805029757021307e-07, "loss": 0.0007, "step": 2470 }, { "epoch": 4.7, "grad_norm": 0.1052505150437355, "learning_rate": 5.732924922502869e-07, "loss": 0.001, "step": 2471 }, { "epoch": 4.7, "grad_norm": 0.6303703188896179, "learning_rate": 5.661266381365383e-07, "loss": 0.0034, "step": 2472 }, { "epoch": 4.7, "grad_norm": 0.09370459616184235, "learning_rate": 5.590054242288145e-07, "loss": 0.0009, "step": 2473 }, { "epoch": 4.7, "grad_norm": 0.6264445781707764, "learning_rate": 5.519288613273398e-07, "loss": 0.0019, "step": 2474 }, { "epoch": 4.71, "grad_norm": 0.12388049066066742, "learning_rate": 5.448969601646225e-07, "loss": 0.0013, "step": 2475 }, { "epoch": 4.71, "grad_norm": 0.23256835341453552, "learning_rate": 5.379097314054249e-07, "loss": 0.0022, "step": 2476 }, { "epoch": 4.71, "grad_norm": 0.2536225914955139, "learning_rate": 5.309671856467768e-07, "loss": 0.0016, "step": 2477 }, { "epoch": 4.71, "grad_norm": 0.12463518232107162, "learning_rate": 5.240693334179259e-07, "loss": 0.0009, "step": 2478 }, { "epoch": 4.71, "grad_norm": 0.10569529235363007, "learning_rate": 5.17216185180337e-07, "loss": 0.001, "step": 2479 }, { "epoch": 4.71, "grad_norm": 0.5335217118263245, "learning_rate": 5.104077513276828e-07, "loss": 0.0038, "step": 2480 }, { "epoch": 4.72, "grad_norm": 0.402052104473114, "learning_rate": 5.036440421858235e-07, "loss": 0.0031, "step": 2481 }, { "epoch": 4.72, "grad_norm": 0.17072813212871552, "learning_rate": 4.969250680127701e-07, "loss": 0.0014, "step": 2482 }, { "epoch": 4.72, "grad_norm": 0.4702839255332947, "learning_rate": 4.902508389987148e-07, "loss": 0.0016, "step": 2483 }, { "epoch": 4.72, "grad_norm": 0.12013585865497589, "learning_rate": 4.836213652659638e-07, "loss": 0.0009, "step": 2484 }, { "epoch": 4.72, "grad_norm": 0.4298049211502075, "learning_rate": 4.770366568689544e-07, "loss": 0.0017, "step": 2485 }, { "epoch": 4.73, "grad_norm": 0.2844957113265991, "learning_rate": 4.7049672379423813e-07, "loss": 0.0025, "step": 2486 }, { "epoch": 4.73, "grad_norm": 0.5046013593673706, "learning_rate": 4.6400157596045433e-07, "loss": 0.0033, "step": 2487 }, { "epoch": 4.73, "grad_norm": 0.45614200830459595, "learning_rate": 4.5755122321831656e-07, "loss": 0.0044, "step": 2488 }, { "epoch": 4.73, "grad_norm": 0.15651580691337585, "learning_rate": 4.511456753506027e-07, "loss": 0.0016, "step": 2489 }, { "epoch": 4.73, "grad_norm": 0.3952268660068512, "learning_rate": 4.447849420721417e-07, "loss": 0.0032, "step": 2490 }, { "epoch": 4.74, "grad_norm": 0.4787686765193939, "learning_rate": 4.3846903302979356e-07, "loss": 0.0018, "step": 2491 }, { "epoch": 4.74, "grad_norm": 0.020277615636587143, "learning_rate": 4.321979578024293e-07, "loss": 0.0003, "step": 2492 }, { "epoch": 4.74, "grad_norm": 0.25518131256103516, "learning_rate": 4.2597172590094104e-07, "loss": 0.0026, "step": 2493 }, { "epoch": 4.74, "grad_norm": 0.15369807183742523, "learning_rate": 4.197903467681985e-07, "loss": 0.0008, "step": 2494 }, { "epoch": 4.74, "grad_norm": 0.45417654514312744, "learning_rate": 4.136538297790393e-07, "loss": 0.0016, "step": 2495 }, { "epoch": 4.75, "grad_norm": 0.16063813865184784, "learning_rate": 4.0756218424027547e-07, "loss": 0.0008, "step": 2496 }, { "epoch": 4.75, "grad_norm": 0.4586852192878723, "learning_rate": 4.015154193906667e-07, "loss": 0.0032, "step": 2497 }, { "epoch": 4.75, "grad_norm": 0.0900648683309555, "learning_rate": 3.955135444008906e-07, "loss": 0.0004, "step": 2498 }, { "epoch": 4.75, "grad_norm": 0.2141711413860321, "learning_rate": 3.8955656837355915e-07, "loss": 0.0023, "step": 2499 }, { "epoch": 4.75, "grad_norm": 0.09921552985906601, "learning_rate": 3.8364450034317876e-07, "loss": 0.001, "step": 2500 }, { "epoch": 4.75, "eval_blimp_filtered_avg": 0.7361194029850746, "eval_blimp_filtered_std": 0.004873636974928129, "step": 2500 }, { "epoch": 4.75, "eval_blimp_supplement_avg": 0.7780172413793104, "eval_blimp_supplement_std": 0.01772872571346667, "step": 2500 }, { "epoch": 4.75, "eval_vqa_filtered_avg": 0.29, "eval_vqa_filtered_std": 0.04560480215720684, "step": 2500 }, { "epoch": 4.75, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 2500 }, { "epoch": 4.75, "grad_norm": 0.12592703104019165, "learning_rate": 3.777773492761505e-07, "loss": 0.0005, "step": 2501 }, { "epoch": 4.76, "grad_norm": 0.17080189287662506, "learning_rate": 3.719551240707597e-07, "loss": 0.0014, "step": 2502 }, { "epoch": 4.76, "grad_norm": 0.1809116154909134, "learning_rate": 3.661778335571464e-07, "loss": 0.0019, "step": 2503 }, { "epoch": 4.76, "grad_norm": 0.5533795356750488, "learning_rate": 3.604454864973017e-07, "loss": 0.0017, "step": 2504 }, { "epoch": 4.76, "grad_norm": 0.126326784491539, "learning_rate": 3.547580915850612e-07, "loss": 0.0012, "step": 2505 }, { "epoch": 4.76, "grad_norm": 0.1688750684261322, "learning_rate": 3.491156574460852e-07, "loss": 0.0015, "step": 2506 }, { "epoch": 4.77, "grad_norm": 0.40464380383491516, "learning_rate": 3.4351819263783503e-07, "loss": 0.0038, "step": 2507 }, { "epoch": 4.77, "grad_norm": 0.09627801924943924, "learning_rate": 3.3796570564957997e-07, "loss": 0.0014, "step": 2508 }, { "epoch": 4.77, "grad_norm": 0.19420894980430603, "learning_rate": 3.3245820490237056e-07, "loss": 0.0028, "step": 2509 }, { "epoch": 4.77, "grad_norm": 0.13505572080612183, "learning_rate": 3.269956987490319e-07, "loss": 0.0011, "step": 2510 }, { "epoch": 4.77, "grad_norm": 0.6302899122238159, "learning_rate": 3.2157819547414704e-07, "loss": 0.0026, "step": 2511 }, { "epoch": 4.78, "grad_norm": 0.4387391209602356, "learning_rate": 3.162057032940469e-07, "loss": 0.0022, "step": 2512 }, { "epoch": 4.78, "grad_norm": 1.1944607496261597, "learning_rate": 3.108782303568003e-07, "loss": 0.0047, "step": 2513 }, { "epoch": 4.78, "grad_norm": 0.29947394132614136, "learning_rate": 3.055957847421975e-07, "loss": 0.0021, "step": 2514 }, { "epoch": 4.78, "grad_norm": 0.567141592502594, "learning_rate": 3.0035837446173e-07, "loss": 0.0027, "step": 2515 }, { "epoch": 4.78, "grad_norm": 0.8341576457023621, "learning_rate": 2.951660074586071e-07, "loss": 0.0039, "step": 2516 }, { "epoch": 4.79, "grad_norm": 0.3014719784259796, "learning_rate": 2.90018691607703e-07, "loss": 0.0023, "step": 2517 }, { "epoch": 4.79, "grad_norm": 0.17060449719429016, "learning_rate": 2.8491643471557637e-07, "loss": 0.0007, "step": 2518 }, { "epoch": 4.79, "grad_norm": 0.08191616088151932, "learning_rate": 2.798592445204573e-07, "loss": 0.0007, "step": 2519 }, { "epoch": 4.79, "grad_norm": 0.16501758992671967, "learning_rate": 2.748471286922072e-07, "loss": 0.0022, "step": 2520 }, { "epoch": 4.79, "grad_norm": 0.11205416172742844, "learning_rate": 2.698800948323388e-07, "loss": 0.0006, "step": 2521 }, { "epoch": 4.79, "grad_norm": 0.1397981494665146, "learning_rate": 2.6495815047398955e-07, "loss": 0.0005, "step": 2522 }, { "epoch": 4.8, "grad_norm": 0.13031837344169617, "learning_rate": 2.600813030819116e-07, "loss": 0.0007, "step": 2523 }, { "epoch": 4.8, "grad_norm": 0.060526613146066666, "learning_rate": 2.552495600524651e-07, "loss": 0.0005, "step": 2524 }, { "epoch": 4.8, "grad_norm": 0.8468664288520813, "learning_rate": 2.5046292871360177e-07, "loss": 0.0061, "step": 2525 }, { "epoch": 4.8, "grad_norm": 0.37743860483169556, "learning_rate": 2.457214163248578e-07, "loss": 0.0022, "step": 2526 }, { "epoch": 4.8, "grad_norm": 0.3832987844944, "learning_rate": 2.410250300773342e-07, "loss": 0.0025, "step": 2527 }, { "epoch": 4.81, "grad_norm": 0.11870352923870087, "learning_rate": 2.3637377709369689e-07, "loss": 0.0012, "step": 2528 }, { "epoch": 4.81, "grad_norm": 0.08517323434352875, "learning_rate": 2.3176766442816633e-07, "loss": 0.0005, "step": 2529 }, { "epoch": 4.81, "grad_norm": 0.5082300305366516, "learning_rate": 2.2720669906649783e-07, "loss": 0.004, "step": 2530 }, { "epoch": 4.81, "grad_norm": 0.8537079095840454, "learning_rate": 2.226908879259648e-07, "loss": 0.003, "step": 2531 }, { "epoch": 4.81, "grad_norm": 0.12528948485851288, "learning_rate": 2.182202378553788e-07, "loss": 0.0006, "step": 2532 }, { "epoch": 4.82, "grad_norm": 0.2934698760509491, "learning_rate": 2.137947556350428e-07, "loss": 0.0022, "step": 2533 }, { "epoch": 4.82, "grad_norm": 0.2707921266555786, "learning_rate": 2.0941444797676457e-07, "loss": 0.0013, "step": 2534 }, { "epoch": 4.82, "grad_norm": 0.580873966217041, "learning_rate": 2.0507932152383668e-07, "loss": 0.0052, "step": 2535 }, { "epoch": 4.82, "grad_norm": 0.3178699314594269, "learning_rate": 2.0078938285102988e-07, "loss": 0.0037, "step": 2536 }, { "epoch": 4.82, "grad_norm": 0.1255538910627365, "learning_rate": 1.9654463846457637e-07, "loss": 0.0011, "step": 2537 }, { "epoch": 4.83, "grad_norm": 0.43579888343811035, "learning_rate": 1.9234509480217323e-07, "loss": 0.0019, "step": 2538 }, { "epoch": 4.83, "grad_norm": 0.5966338515281677, "learning_rate": 1.8819075823295896e-07, "loss": 0.0034, "step": 2539 }, { "epoch": 4.83, "grad_norm": 0.18749940395355225, "learning_rate": 1.8408163505751697e-07, "loss": 0.0014, "step": 2540 }, { "epoch": 4.83, "grad_norm": 0.2378704696893692, "learning_rate": 1.8001773150785218e-07, "loss": 0.0009, "step": 2541 }, { "epoch": 4.83, "grad_norm": 0.16289666295051575, "learning_rate": 1.75999053747391e-07, "loss": 0.0018, "step": 2542 }, { "epoch": 4.83, "grad_norm": 0.48491355776786804, "learning_rate": 1.7202560787096476e-07, "loss": 0.013, "step": 2543 }, { "epoch": 4.84, "grad_norm": 0.08821028470993042, "learning_rate": 1.68097399904813e-07, "loss": 0.0013, "step": 2544 }, { "epoch": 4.84, "grad_norm": 0.14902262389659882, "learning_rate": 1.6421443580656003e-07, "loss": 0.0013, "step": 2545 }, { "epoch": 4.84, "grad_norm": 0.08079373836517334, "learning_rate": 1.603767214652152e-07, "loss": 0.0005, "step": 2546 }, { "epoch": 4.84, "grad_norm": 0.06826672703027725, "learning_rate": 1.56584262701156e-07, "loss": 0.0006, "step": 2547 }, { "epoch": 4.84, "grad_norm": 0.42661720514297485, "learning_rate": 1.528370652661315e-07, "loss": 0.0043, "step": 2548 }, { "epoch": 4.85, "grad_norm": 0.09662548452615738, "learning_rate": 1.4913513484323904e-07, "loss": 0.0007, "step": 2549 }, { "epoch": 4.85, "grad_norm": 0.08601194620132446, "learning_rate": 1.4547847704693085e-07, "loss": 0.0006, "step": 2550 }, { "epoch": 4.85, "grad_norm": 0.22964641451835632, "learning_rate": 1.418670974229874e-07, "loss": 0.0022, "step": 2551 }, { "epoch": 4.85, "grad_norm": 0.08517295122146606, "learning_rate": 1.3830100144853086e-07, "loss": 0.0006, "step": 2552 }, { "epoch": 4.85, "grad_norm": 0.2734367549419403, "learning_rate": 1.3478019453199152e-07, "loss": 0.0014, "step": 2553 }, { "epoch": 4.86, "grad_norm": 0.18821953237056732, "learning_rate": 1.313046820131214e-07, "loss": 0.0014, "step": 2554 }, { "epoch": 4.86, "grad_norm": 0.22060233354568481, "learning_rate": 1.2787446916298407e-07, "loss": 0.0018, "step": 2555 }, { "epoch": 4.86, "grad_norm": 0.37115103006362915, "learning_rate": 1.2448956118392142e-07, "loss": 0.004, "step": 2556 }, { "epoch": 4.86, "grad_norm": 0.43767398595809937, "learning_rate": 1.2114996320958694e-07, "loss": 0.0053, "step": 2557 }, { "epoch": 4.86, "grad_norm": 0.43892624974250793, "learning_rate": 1.1785568030489579e-07, "loss": 0.0034, "step": 2558 }, { "epoch": 4.87, "grad_norm": 0.3595508933067322, "learning_rate": 1.1460671746605144e-07, "loss": 0.0011, "step": 2559 }, { "epoch": 4.87, "grad_norm": 0.19588567316532135, "learning_rate": 1.1140307962051565e-07, "loss": 0.0015, "step": 2560 }, { "epoch": 4.87, "grad_norm": 0.1196543276309967, "learning_rate": 1.0824477162701851e-07, "loss": 0.0014, "step": 2561 }, { "epoch": 4.87, "grad_norm": 0.4718242883682251, "learning_rate": 1.0513179827552844e-07, "loss": 0.0044, "step": 2562 }, { "epoch": 4.87, "grad_norm": 0.1582292765378952, "learning_rate": 1.020641642872655e-07, "loss": 0.0007, "step": 2563 }, { "epoch": 4.87, "grad_norm": 0.32436901330947876, "learning_rate": 9.904187431468481e-08, "loss": 0.0029, "step": 2564 }, { "epoch": 4.88, "grad_norm": 0.3937620222568512, "learning_rate": 9.606493294147978e-08, "loss": 0.003, "step": 2565 }, { "epoch": 4.88, "grad_norm": 0.2221381962299347, "learning_rate": 9.313334468255219e-08, "loss": 0.001, "step": 2566 }, { "epoch": 4.88, "grad_norm": 0.42876583337783813, "learning_rate": 9.024711398403218e-08, "loss": 0.0018, "step": 2567 }, { "epoch": 4.88, "grad_norm": 0.21051064133644104, "learning_rate": 8.74062452232549e-08, "loss": 0.0009, "step": 2568 }, { "epoch": 4.88, "grad_norm": 0.359129935503006, "learning_rate": 8.461074270875723e-08, "loss": 0.0025, "step": 2569 }, { "epoch": 4.89, "grad_norm": 0.27563896775245667, "learning_rate": 8.186061068027439e-08, "loss": 0.003, "step": 2570 }, { "epoch": 4.89, "grad_norm": 0.07437489926815033, "learning_rate": 7.915585330872999e-08, "loss": 0.0007, "step": 2571 }, { "epoch": 4.89, "grad_norm": 0.24903130531311035, "learning_rate": 7.649647469623267e-08, "loss": 0.001, "step": 2572 }, { "epoch": 4.89, "grad_norm": 0.6150416731834412, "learning_rate": 7.388247887606614e-08, "loss": 0.0027, "step": 2573 }, { "epoch": 4.89, "grad_norm": 0.12331343442201614, "learning_rate": 7.131386981268918e-08, "loss": 0.0009, "step": 2574 }, { "epoch": 4.9, "grad_norm": 0.12440402060747147, "learning_rate": 6.879065140172225e-08, "loss": 0.001, "step": 2575 }, { "epoch": 4.9, "grad_norm": 0.2896176874637604, "learning_rate": 6.63128274699476e-08, "loss": 0.0021, "step": 2576 }, { "epoch": 4.9, "grad_norm": 0.4548191428184509, "learning_rate": 6.38804017752992e-08, "loss": 0.0033, "step": 2577 }, { "epoch": 4.9, "grad_norm": 0.24217510223388672, "learning_rate": 6.14933780068594e-08, "loss": 0.0017, "step": 2578 }, { "epoch": 4.9, "grad_norm": 0.20585671067237854, "learning_rate": 5.915175978485232e-08, "loss": 0.0018, "step": 2579 }, { "epoch": 4.9, "grad_norm": 0.5276055932044983, "learning_rate": 5.685555066064052e-08, "loss": 0.004, "step": 2580 }, { "epoch": 4.91, "grad_norm": 0.3069263994693756, "learning_rate": 5.460475411671495e-08, "loss": 0.0041, "step": 2581 }, { "epoch": 4.91, "grad_norm": 0.05879399925470352, "learning_rate": 5.2399373566694994e-08, "loss": 0.0006, "step": 2582 }, { "epoch": 4.91, "grad_norm": 0.4693526029586792, "learning_rate": 5.0239412355321814e-08, "loss": 0.008, "step": 2583 }, { "epoch": 4.91, "grad_norm": 0.22923944890499115, "learning_rate": 4.812487375844832e-08, "loss": 0.0022, "step": 2584 }, { "epoch": 4.91, "grad_norm": 0.17988573014736176, "learning_rate": 4.605576098304254e-08, "loss": 0.001, "step": 2585 }, { "epoch": 4.92, "grad_norm": 0.34996870160102844, "learning_rate": 4.4032077167174275e-08, "loss": 0.0022, "step": 2586 }, { "epoch": 4.92, "grad_norm": 0.4732515811920166, "learning_rate": 4.205382538002178e-08, "loss": 0.0049, "step": 2587 }, { "epoch": 4.92, "grad_norm": 0.08113665878772736, "learning_rate": 4.0121008621851754e-08, "loss": 0.0005, "step": 2588 }, { "epoch": 4.92, "grad_norm": 0.28771209716796875, "learning_rate": 3.8233629824026005e-08, "loss": 0.0018, "step": 2589 }, { "epoch": 4.92, "grad_norm": 0.2508082687854767, "learning_rate": 3.6391691848994824e-08, "loss": 0.0033, "step": 2590 }, { "epoch": 4.93, "grad_norm": 0.6334829330444336, "learning_rate": 3.459519749029028e-08, "loss": 0.0028, "step": 2591 }, { "epoch": 4.93, "grad_norm": 0.15751120448112488, "learning_rate": 3.28441494725229e-08, "loss": 0.0015, "step": 2592 }, { "epoch": 4.93, "grad_norm": 0.2826721966266632, "learning_rate": 3.113855045138503e-08, "loss": 0.0014, "step": 2593 }, { "epoch": 4.93, "grad_norm": 0.1072096973657608, "learning_rate": 2.9478403013627477e-08, "loss": 0.0011, "step": 2594 }, { "epoch": 4.93, "grad_norm": 0.08694400638341904, "learning_rate": 2.786370967707952e-08, "loss": 0.0012, "step": 2595 }, { "epoch": 4.94, "grad_norm": 0.22402212023735046, "learning_rate": 2.6294472890625586e-08, "loss": 0.0025, "step": 2596 }, { "epoch": 4.94, "grad_norm": 0.1562776267528534, "learning_rate": 2.477069503421192e-08, "loss": 0.0009, "step": 2597 }, { "epoch": 4.94, "grad_norm": 0.2831669747829437, "learning_rate": 2.3292378418846572e-08, "loss": 0.0015, "step": 2598 }, { "epoch": 4.94, "grad_norm": 0.31335747241973877, "learning_rate": 2.1859525286576087e-08, "loss": 0.0028, "step": 2599 }, { "epoch": 4.94, "grad_norm": 0.3888968825340271, "learning_rate": 2.0472137810508827e-08, "loss": 0.0032, "step": 2600 }, { "epoch": 4.94, "eval_blimp_filtered_avg": 0.7359701492537314, "eval_blimp_filtered_std": 0.004877398397033817, "step": 2600 }, { "epoch": 4.94, "eval_blimp_supplement_avg": 0.7758620689655172, "eval_blimp_supplement_std": 0.01779349443318937, "step": 2600 }, { "epoch": 4.94, "eval_vqa_filtered_avg": 0.3, "eval_vqa_filtered_std": 0.046056618647183814, "step": 2600 }, { "epoch": 4.94, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 2600 }, { "epoch": 4.94, "grad_norm": 0.053542472422122955, "learning_rate": 1.9130218094798313e-08, "loss": 0.0003, "step": 2601 }, { "epoch": 4.95, "grad_norm": 0.14890460669994354, "learning_rate": 1.7833768174626564e-08, "loss": 0.0006, "step": 2602 }, { "epoch": 4.95, "grad_norm": 0.2754262387752533, "learning_rate": 1.6582790016234084e-08, "loss": 0.0028, "step": 2603 }, { "epoch": 4.95, "grad_norm": 0.22954076528549194, "learning_rate": 1.537728551687989e-08, "loss": 0.0013, "step": 2604 }, { "epoch": 4.95, "grad_norm": 0.030557632446289062, "learning_rate": 1.421725650486816e-08, "loss": 0.0002, "step": 2605 }, { "epoch": 4.95, "grad_norm": 0.4670690596103668, "learning_rate": 1.3102704739531569e-08, "loss": 0.0051, "step": 2606 }, { "epoch": 4.96, "grad_norm": 0.4513750970363617, "learning_rate": 1.2033631911227972e-08, "loss": 0.002, "step": 2607 }, { "epoch": 4.96, "grad_norm": 0.47704145312309265, "learning_rate": 1.1010039641343728e-08, "loss": 0.0013, "step": 2608 }, { "epoch": 4.96, "grad_norm": 0.6101816296577454, "learning_rate": 1.0031929482280378e-08, "loss": 0.002, "step": 2609 }, { "epoch": 4.96, "grad_norm": 0.18245604634284973, "learning_rate": 9.099302917474627e-09, "loss": 0.0008, "step": 2610 }, { "epoch": 4.96, "grad_norm": 0.28127577900886536, "learning_rate": 8.212161361368375e-09, "loss": 0.0011, "step": 2611 }, { "epoch": 4.97, "grad_norm": 0.20208145678043365, "learning_rate": 7.370506159425361e-09, "loss": 0.0009, "step": 2612 }, { "epoch": 4.97, "grad_norm": 0.2964656352996826, "learning_rate": 6.574338588121176e-09, "loss": 0.0025, "step": 2613 }, { "epoch": 4.97, "grad_norm": 0.3100679814815521, "learning_rate": 5.823659854946594e-09, "loss": 0.0027, "step": 2614 }, { "epoch": 4.97, "grad_norm": 0.1320505440235138, "learning_rate": 5.118471098400912e-09, "loss": 0.0014, "step": 2615 }, { "epoch": 4.97, "grad_norm": 0.5624819397926331, "learning_rate": 4.458773387991943e-09, "loss": 0.0059, "step": 2616 }, { "epoch": 4.98, "grad_norm": 0.2583712637424469, "learning_rate": 3.844567724236026e-09, "loss": 0.0011, "step": 2617 }, { "epoch": 4.98, "grad_norm": 0.17332583665847778, "learning_rate": 3.2758550386546848e-09, "loss": 0.0009, "step": 2618 }, { "epoch": 4.98, "grad_norm": 0.17415505647659302, "learning_rate": 2.7526361937679767e-09, "loss": 0.0015, "step": 2619 }, { "epoch": 4.98, "grad_norm": 0.06746897101402283, "learning_rate": 2.274911983111139e-09, "loss": 0.0006, "step": 2620 }, { "epoch": 4.98, "grad_norm": 0.2487659603357315, "learning_rate": 1.842683131207945e-09, "loss": 0.0024, "step": 2621 }, { "epoch": 4.98, "grad_norm": 0.2689647078514099, "learning_rate": 1.455950293590691e-09, "loss": 0.0014, "step": 2622 }, { "epoch": 4.99, "grad_norm": 0.3559643626213074, "learning_rate": 1.1147140567868697e-09, "loss": 0.0016, "step": 2623 }, { "epoch": 4.99, "grad_norm": 0.06829884648323059, "learning_rate": 8.189749383291644e-10, "loss": 0.0007, "step": 2624 }, { "epoch": 4.99, "grad_norm": 0.4270227253437042, "learning_rate": 5.687333867387956e-10, "loss": 0.0018, "step": 2625 }, { "epoch": 4.99, "grad_norm": 0.17640671133995056, "learning_rate": 3.6398978153884266e-10, "loss": 0.0015, "step": 2626 }, { "epoch": 4.99, "grad_norm": 0.4409826397895813, "learning_rate": 2.0474443325424474e-10, "loss": 0.004, "step": 2627 }, { "epoch": 5.0, "grad_norm": 0.14133916795253754, "learning_rate": 9.099758339514708e-11, "loss": 0.0018, "step": 2628 }, { "epoch": 5.0, "grad_norm": 0.1439923793077469, "learning_rate": 2.2749404473554336e-11, "loss": 0.0018, "step": 2629 }, { "epoch": 5.0, "grad_norm": 0.07924532890319824, "learning_rate": 0.0, "loss": 0.0004, "step": 2630 }, { "epoch": 5.0, "step": 2630, "total_flos": 6.633565184222822e+17, "train_loss": 0.1017905679114425, "train_runtime": 6453.1979, "train_samples_per_second": 52.129, "train_steps_per_second": 0.408 } ], "logging_steps": 1.0, "max_steps": 2630, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "total_flos": 6.633565184222822e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }