ICM-Qwen2-VL-7B / trainer_state.json
zhaoyuzhi's picture
Add files using upload-large-folder tool
48fc15d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 3000000,
"global_step": 2254,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0044365572315882874,
"grad_norm": 34.20283403950735,
"learning_rate": 4.4247787610619474e-07,
"loss": 1.6635,
"step": 10
},
{
"epoch": 0.008873114463176575,
"grad_norm": 12.575384142861347,
"learning_rate": 8.849557522123895e-07,
"loss": 1.5066,
"step": 20
},
{
"epoch": 0.013309671694764862,
"grad_norm": 5.263263199846598,
"learning_rate": 1.3274336283185843e-06,
"loss": 1.2307,
"step": 30
},
{
"epoch": 0.01774622892635315,
"grad_norm": 4.982907507940082,
"learning_rate": 1.769911504424779e-06,
"loss": 1.0518,
"step": 40
},
{
"epoch": 0.022182786157941437,
"grad_norm": 3.6874648219458925,
"learning_rate": 2.212389380530974e-06,
"loss": 0.9991,
"step": 50
},
{
"epoch": 0.026619343389529725,
"grad_norm": 4.125478287085026,
"learning_rate": 2.6548672566371687e-06,
"loss": 0.9461,
"step": 60
},
{
"epoch": 0.031055900621118012,
"grad_norm": 3.7527731246847797,
"learning_rate": 3.097345132743363e-06,
"loss": 0.9212,
"step": 70
},
{
"epoch": 0.0354924578527063,
"grad_norm": 4.250041476792804,
"learning_rate": 3.539823008849558e-06,
"loss": 0.9086,
"step": 80
},
{
"epoch": 0.03992901508429459,
"grad_norm": 4.884005781902834,
"learning_rate": 3.982300884955752e-06,
"loss": 0.9103,
"step": 90
},
{
"epoch": 0.044365572315882874,
"grad_norm": 3.363574198575473,
"learning_rate": 4.424778761061948e-06,
"loss": 0.9075,
"step": 100
},
{
"epoch": 0.048802129547471165,
"grad_norm": 4.481974877802558,
"learning_rate": 4.867256637168142e-06,
"loss": 0.8666,
"step": 110
},
{
"epoch": 0.05323868677905945,
"grad_norm": 4.507301002428585,
"learning_rate": 5.309734513274337e-06,
"loss": 0.8747,
"step": 120
},
{
"epoch": 0.05767524401064774,
"grad_norm": 4.360704218511151,
"learning_rate": 5.752212389380532e-06,
"loss": 0.8662,
"step": 130
},
{
"epoch": 0.062111801242236024,
"grad_norm": 3.693651598803813,
"learning_rate": 6.194690265486726e-06,
"loss": 0.8903,
"step": 140
},
{
"epoch": 0.06654835847382432,
"grad_norm": 4.347036121522918,
"learning_rate": 6.6371681415929215e-06,
"loss": 0.8777,
"step": 150
},
{
"epoch": 0.0709849157054126,
"grad_norm": 4.180008806520496,
"learning_rate": 7.079646017699116e-06,
"loss": 0.8825,
"step": 160
},
{
"epoch": 0.07542147293700088,
"grad_norm": 3.5743895818219964,
"learning_rate": 7.5221238938053095e-06,
"loss": 0.8432,
"step": 170
},
{
"epoch": 0.07985803016858918,
"grad_norm": 3.178231587914838,
"learning_rate": 7.964601769911505e-06,
"loss": 0.8815,
"step": 180
},
{
"epoch": 0.08429458740017746,
"grad_norm": 3.9776430514151713,
"learning_rate": 8.4070796460177e-06,
"loss": 0.842,
"step": 190
},
{
"epoch": 0.08873114463176575,
"grad_norm": 3.525176682554,
"learning_rate": 8.849557522123895e-06,
"loss": 0.8823,
"step": 200
},
{
"epoch": 0.09316770186335403,
"grad_norm": 3.6839303051450547,
"learning_rate": 9.29203539823009e-06,
"loss": 0.8879,
"step": 210
},
{
"epoch": 0.09760425909494233,
"grad_norm": 3.707960198706698,
"learning_rate": 9.734513274336284e-06,
"loss": 0.8592,
"step": 220
},
{
"epoch": 0.10204081632653061,
"grad_norm": 3.987885333720805,
"learning_rate": 9.999904010783725e-06,
"loss": 0.9191,
"step": 230
},
{
"epoch": 0.1064773735581189,
"grad_norm": 4.161618022645433,
"learning_rate": 9.998824174426658e-06,
"loss": 0.876,
"step": 240
},
{
"epoch": 0.11091393078970718,
"grad_norm": 3.936885714208245,
"learning_rate": 9.99654477518325e-06,
"loss": 0.8448,
"step": 250
},
{
"epoch": 0.11535048802129548,
"grad_norm": 3.381832157299745,
"learning_rate": 9.993066360038679e-06,
"loss": 0.8693,
"step": 260
},
{
"epoch": 0.11978704525288376,
"grad_norm": 3.4534900024735595,
"learning_rate": 9.98838976370478e-06,
"loss": 0.8907,
"step": 270
},
{
"epoch": 0.12422360248447205,
"grad_norm": 3.286109931988662,
"learning_rate": 9.982516108419746e-06,
"loss": 0.859,
"step": 280
},
{
"epoch": 0.12866015971606035,
"grad_norm": 3.8608367415146367,
"learning_rate": 9.975446803678818e-06,
"loss": 0.8497,
"step": 290
},
{
"epoch": 0.13309671694764863,
"grad_norm": 3.3536728069117747,
"learning_rate": 9.967183545896055e-06,
"loss": 0.8744,
"step": 300
},
{
"epoch": 0.13753327417923691,
"grad_norm": 2.9217222323855654,
"learning_rate": 9.95772831799724e-06,
"loss": 0.8552,
"step": 310
},
{
"epoch": 0.1419698314108252,
"grad_norm": 3.166995337332963,
"learning_rate": 9.94708338894405e-06,
"loss": 0.8643,
"step": 320
},
{
"epoch": 0.14640638864241348,
"grad_norm": 3.5968311661201553,
"learning_rate": 9.935251313189564e-06,
"loss": 0.8861,
"step": 330
},
{
"epoch": 0.15084294587400177,
"grad_norm": 3.1984772961959127,
"learning_rate": 9.922234930065286e-06,
"loss": 0.8529,
"step": 340
},
{
"epoch": 0.15527950310559005,
"grad_norm": 2.994176553345875,
"learning_rate": 9.908037363099782e-06,
"loss": 0.8777,
"step": 350
},
{
"epoch": 0.15971606033717836,
"grad_norm": 2.8847550451762944,
"learning_rate": 9.892662019269136e-06,
"loss": 0.841,
"step": 360
},
{
"epoch": 0.16415261756876665,
"grad_norm": 3.4925529215345326,
"learning_rate": 9.876112588179378e-06,
"loss": 0.8581,
"step": 370
},
{
"epoch": 0.16858917480035493,
"grad_norm": 3.0392967138153875,
"learning_rate": 9.858393041181096e-06,
"loss": 0.8702,
"step": 380
},
{
"epoch": 0.1730257320319432,
"grad_norm": 3.3458625683350536,
"learning_rate": 9.839507630416436e-06,
"loss": 0.8584,
"step": 390
},
{
"epoch": 0.1774622892635315,
"grad_norm": 2.4613591773979904,
"learning_rate": 9.819460887798714e-06,
"loss": 0.865,
"step": 400
},
{
"epoch": 0.18189884649511978,
"grad_norm": 2.9097688328420803,
"learning_rate": 9.7982576239249e-06,
"loss": 0.8241,
"step": 410
},
{
"epoch": 0.18633540372670807,
"grad_norm": 2.7123773430443294,
"learning_rate": 9.775902926921228e-06,
"loss": 0.8497,
"step": 420
},
{
"epoch": 0.19077196095829635,
"grad_norm": 3.15278672904484,
"learning_rate": 9.7524021612222e-06,
"loss": 0.8382,
"step": 430
},
{
"epoch": 0.19520851818988466,
"grad_norm": 3.140823772708262,
"learning_rate": 9.727760966283285e-06,
"loss": 0.8762,
"step": 440
},
{
"epoch": 0.19964507542147295,
"grad_norm": 3.0291324290256916,
"learning_rate": 9.701985255227624e-06,
"loss": 0.8603,
"step": 450
},
{
"epoch": 0.20408163265306123,
"grad_norm": 3.244917065658077,
"learning_rate": 9.675081213427076e-06,
"loss": 0.8615,
"step": 460
},
{
"epoch": 0.2085181898846495,
"grad_norm": 2.955073572383622,
"learning_rate": 9.647055297017901e-06,
"loss": 0.8563,
"step": 470
},
{
"epoch": 0.2129547471162378,
"grad_norm": 2.9411304836471936,
"learning_rate": 9.617914231351511e-06,
"loss": 0.8516,
"step": 480
},
{
"epoch": 0.21739130434782608,
"grad_norm": 3.2750932461773092,
"learning_rate": 9.587665009380565e-06,
"loss": 0.8692,
"step": 490
},
{
"epoch": 0.22182786157941436,
"grad_norm": 3.504372310689358,
"learning_rate": 9.556314889980906e-06,
"loss": 0.8804,
"step": 500
},
{
"epoch": 0.22626441881100265,
"grad_norm": 2.7396306271254836,
"learning_rate": 9.523871396209633e-06,
"loss": 0.8404,
"step": 510
},
{
"epoch": 0.23070097604259096,
"grad_norm": 3.2247600207290357,
"learning_rate": 9.49034231349982e-06,
"loss": 0.869,
"step": 520
},
{
"epoch": 0.23513753327417924,
"grad_norm": 2.8471166448572243,
"learning_rate": 9.455735687792233e-06,
"loss": 0.8304,
"step": 530
},
{
"epoch": 0.23957409050576753,
"grad_norm": 3.257232995701795,
"learning_rate": 9.420059823604573e-06,
"loss": 0.884,
"step": 540
},
{
"epoch": 0.2440106477373558,
"grad_norm": 3.244453444955561,
"learning_rate": 9.383323282038632e-06,
"loss": 0.8379,
"step": 550
},
{
"epoch": 0.2484472049689441,
"grad_norm": 2.864674646057881,
"learning_rate": 9.345534878725908e-06,
"loss": 0.8443,
"step": 560
},
{
"epoch": 0.2528837622005324,
"grad_norm": 3.1532607318799997,
"learning_rate": 9.30670368171212e-06,
"loss": 0.8446,
"step": 570
},
{
"epoch": 0.2573203194321207,
"grad_norm": 3.0985282276684605,
"learning_rate": 9.266839009281154e-06,
"loss": 0.8463,
"step": 580
},
{
"epoch": 0.26175687666370895,
"grad_norm": 3.2388655001812947,
"learning_rate": 9.225950427718974e-06,
"loss": 0.8666,
"step": 590
},
{
"epoch": 0.26619343389529726,
"grad_norm": 3.6155166491443573,
"learning_rate": 9.184047749018002e-06,
"loss": 0.8382,
"step": 600
},
{
"epoch": 0.2706299911268855,
"grad_norm": 2.4403938414282074,
"learning_rate": 9.141141028522544e-06,
"loss": 0.8188,
"step": 610
},
{
"epoch": 0.27506654835847383,
"grad_norm": 2.4856078708290115,
"learning_rate": 9.097240562515825e-06,
"loss": 0.8465,
"step": 620
},
{
"epoch": 0.2795031055900621,
"grad_norm": 2.920160366149781,
"learning_rate": 9.052356885749191e-06,
"loss": 0.8513,
"step": 630
},
{
"epoch": 0.2839396628216504,
"grad_norm": 3.215108369697536,
"learning_rate": 9.006500768914106e-06,
"loss": 0.81,
"step": 640
},
{
"epoch": 0.2883762200532387,
"grad_norm": 3.3125489624622357,
"learning_rate": 8.959683216057512e-06,
"loss": 0.8273,
"step": 650
},
{
"epoch": 0.29281277728482696,
"grad_norm": 2.913737564979685,
"learning_rate": 8.911915461941198e-06,
"loss": 0.836,
"step": 660
},
{
"epoch": 0.2972493345164153,
"grad_norm": 3.42492673444743,
"learning_rate": 8.86320896934581e-06,
"loss": 0.8254,
"step": 670
},
{
"epoch": 0.30168589174800353,
"grad_norm": 3.1014982685234633,
"learning_rate": 8.81357542632014e-06,
"loss": 0.838,
"step": 680
},
{
"epoch": 0.30612244897959184,
"grad_norm": 2.8652652554853693,
"learning_rate": 8.763026743376349e-06,
"loss": 0.8242,
"step": 690
},
{
"epoch": 0.3105590062111801,
"grad_norm": 3.117617428014748,
"learning_rate": 8.711575050631823e-06,
"loss": 0.7902,
"step": 700
},
{
"epoch": 0.3149955634427684,
"grad_norm": 2.8066840010172256,
"learning_rate": 8.659232694898307e-06,
"loss": 0.8398,
"step": 710
},
{
"epoch": 0.3194321206743567,
"grad_norm": 3.1802863661171648,
"learning_rate": 8.606012236719073e-06,
"loss": 0.8487,
"step": 720
},
{
"epoch": 0.323868677905945,
"grad_norm": 2.5919669683832645,
"learning_rate": 8.551926447354759e-06,
"loss": 0.8182,
"step": 730
},
{
"epoch": 0.3283052351375333,
"grad_norm": 2.1927751988515887,
"learning_rate": 8.496988305718672e-06,
"loss": 0.8357,
"step": 740
},
{
"epoch": 0.33274179236912155,
"grad_norm": 2.9601792065674766,
"learning_rate": 8.44121099526225e-06,
"loss": 0.8458,
"step": 750
},
{
"epoch": 0.33717834960070986,
"grad_norm": 2.9727625766130052,
"learning_rate": 8.384607900811442e-06,
"loss": 0.8271,
"step": 760
},
{
"epoch": 0.3416149068322981,
"grad_norm": 3.0013212096413393,
"learning_rate": 8.327192605354766e-06,
"loss": 0.7945,
"step": 770
},
{
"epoch": 0.3460514640638864,
"grad_norm": 2.7912070631337715,
"learning_rate": 8.268978886783807e-06,
"loss": 0.8222,
"step": 780
},
{
"epoch": 0.35048802129547474,
"grad_norm": 2.83451175997105,
"learning_rate": 8.209980714586955e-06,
"loss": 0.8321,
"step": 790
},
{
"epoch": 0.354924578527063,
"grad_norm": 2.9155174928570027,
"learning_rate": 8.150212246497165e-06,
"loss": 0.816,
"step": 800
},
{
"epoch": 0.3593611357586513,
"grad_norm": 3.4205616322803905,
"learning_rate": 8.089687825094524e-06,
"loss": 0.7996,
"step": 810
},
{
"epoch": 0.36379769299023956,
"grad_norm": 3.1050076903443298,
"learning_rate": 8.0284219743645e-06,
"loss": 0.8067,
"step": 820
},
{
"epoch": 0.3682342502218279,
"grad_norm": 2.912400492688149,
"learning_rate": 7.96642939621261e-06,
"loss": 0.8137,
"step": 830
},
{
"epoch": 0.37267080745341613,
"grad_norm": 3.2683909275955094,
"learning_rate": 7.903724966936442e-06,
"loss": 0.8507,
"step": 840
},
{
"epoch": 0.37710736468500444,
"grad_norm": 2.8879169834094385,
"learning_rate": 7.84032373365578e-06,
"loss": 0.8198,
"step": 850
},
{
"epoch": 0.3815439219165927,
"grad_norm": 3.1376295436995663,
"learning_rate": 7.776240910701788e-06,
"loss": 0.8259,
"step": 860
},
{
"epoch": 0.385980479148181,
"grad_norm": 2.7251763162480427,
"learning_rate": 7.71149187596602e-06,
"loss": 0.7873,
"step": 870
},
{
"epoch": 0.3904170363797693,
"grad_norm": 2.856774279255724,
"learning_rate": 7.646092167210217e-06,
"loss": 0.7761,
"step": 880
},
{
"epoch": 0.3948535936113576,
"grad_norm": 2.9462908501967187,
"learning_rate": 7.580057478337717e-06,
"loss": 0.7899,
"step": 890
},
{
"epoch": 0.3992901508429459,
"grad_norm": 2.5526957889688866,
"learning_rate": 7.5134036556274085e-06,
"loss": 0.8128,
"step": 900
},
{
"epoch": 0.40372670807453415,
"grad_norm": 2.8313232961928976,
"learning_rate": 7.446146693931111e-06,
"loss": 0.815,
"step": 910
},
{
"epoch": 0.40816326530612246,
"grad_norm": 2.560445546914752,
"learning_rate": 7.378302732835317e-06,
"loss": 0.8283,
"step": 920
},
{
"epoch": 0.4125998225377107,
"grad_norm": 3.1172570475169774,
"learning_rate": 7.3098880527881755e-06,
"loss": 0.7941,
"step": 930
},
{
"epoch": 0.417036379769299,
"grad_norm": 3.480216407476258,
"learning_rate": 7.2409190711927015e-06,
"loss": 0.7987,
"step": 940
},
{
"epoch": 0.42147293700088734,
"grad_norm": 3.2148941428286677,
"learning_rate": 7.171412338467101e-06,
"loss": 0.7864,
"step": 950
},
{
"epoch": 0.4259094942324756,
"grad_norm": 3.026908707292069,
"learning_rate": 7.1013845340731865e-06,
"loss": 0.8066,
"step": 960
},
{
"epoch": 0.4303460514640639,
"grad_norm": 3.13804883356682,
"learning_rate": 7.030852462513827e-06,
"loss": 0.7869,
"step": 970
},
{
"epoch": 0.43478260869565216,
"grad_norm": 3.0995750225017336,
"learning_rate": 6.959833049300376e-06,
"loss": 0.794,
"step": 980
},
{
"epoch": 0.4392191659272405,
"grad_norm": 2.452271023256726,
"learning_rate": 6.888343336891088e-06,
"loss": 0.8038,
"step": 990
},
{
"epoch": 0.44365572315882873,
"grad_norm": 2.945520429899043,
"learning_rate": 6.816400480601445e-06,
"loss": 0.8273,
"step": 1000
},
{
"epoch": 0.44809228039041704,
"grad_norm": 2.7654772587664347,
"learning_rate": 6.744021744487422e-06,
"loss": 0.8232,
"step": 1010
},
{
"epoch": 0.4525288376220053,
"grad_norm": 2.574659419159971,
"learning_rate": 6.671224497202637e-06,
"loss": 0.7968,
"step": 1020
},
{
"epoch": 0.4569653948535936,
"grad_norm": 2.6941300017602723,
"learning_rate": 6.598026207830428e-06,
"loss": 0.7774,
"step": 1030
},
{
"epoch": 0.4614019520851819,
"grad_norm": 2.3049982572840797,
"learning_rate": 6.524444441691796e-06,
"loss": 0.8201,
"step": 1040
},
{
"epoch": 0.4658385093167702,
"grad_norm": 2.8950877335554352,
"learning_rate": 6.4504968561302905e-06,
"loss": 0.7908,
"step": 1050
},
{
"epoch": 0.4702750665483585,
"grad_norm": 2.721699004440637,
"learning_rate": 6.376201196274778e-06,
"loss": 0.8126,
"step": 1060
},
{
"epoch": 0.47471162377994675,
"grad_norm": 2.3821339398566046,
"learning_rate": 6.301575290781174e-06,
"loss": 0.7828,
"step": 1070
},
{
"epoch": 0.47914818101153506,
"grad_norm": 2.895185018848345,
"learning_rate": 6.226637047554113e-06,
"loss": 0.8102,
"step": 1080
},
{
"epoch": 0.4835847382431233,
"grad_norm": 2.5424898208946183,
"learning_rate": 6.1514044494496e-06,
"loss": 0.8043,
"step": 1090
},
{
"epoch": 0.4880212954747116,
"grad_norm": 2.5740094881555025,
"learning_rate": 6.075895549959694e-06,
"loss": 0.8092,
"step": 1100
},
{
"epoch": 0.49245785270629994,
"grad_norm": 3.0317460337993998,
"learning_rate": 6.000128468880223e-06,
"loss": 0.769,
"step": 1110
},
{
"epoch": 0.4968944099378882,
"grad_norm": 2.4350512141767795,
"learning_rate": 5.924121387962594e-06,
"loss": 0.781,
"step": 1120
},
{
"epoch": 0.5013309671694764,
"grad_norm": 2.4910751483001157,
"learning_rate": 5.847892546550738e-06,
"loss": 0.8354,
"step": 1130
},
{
"epoch": 0.5057675244010648,
"grad_norm": 2.4987734030771476,
"learning_rate": 5.771460237204231e-06,
"loss": 0.7981,
"step": 1140
},
{
"epoch": 0.5102040816326531,
"grad_norm": 2.78796449183458,
"learning_rate": 5.694842801308651e-06,
"loss": 0.7749,
"step": 1150
},
{
"epoch": 0.5146406388642414,
"grad_norm": 2.636798263235474,
"learning_rate": 5.618058624674207e-06,
"loss": 0.7738,
"step": 1160
},
{
"epoch": 0.5190771960958296,
"grad_norm": 2.4250601704506556,
"learning_rate": 5.541126133123721e-06,
"loss": 0.8487,
"step": 1170
},
{
"epoch": 0.5235137533274179,
"grad_norm": 3.0366629210786398,
"learning_rate": 5.464063788070996e-06,
"loss": 0.7813,
"step": 1180
},
{
"epoch": 0.5279503105590062,
"grad_norm": 2.9568222566071674,
"learning_rate": 5.386890082090652e-06,
"loss": 0.7945,
"step": 1190
},
{
"epoch": 0.5323868677905945,
"grad_norm": 2.9732986690318812,
"learning_rate": 5.309623534480481e-06,
"loss": 0.805,
"step": 1200
},
{
"epoch": 0.5368234250221828,
"grad_norm": 2.449200180183246,
"learning_rate": 5.232282686817392e-06,
"loss": 0.8011,
"step": 1210
},
{
"epoch": 0.541259982253771,
"grad_norm": 2.656104751785374,
"learning_rate": 5.154886098507995e-06,
"loss": 0.7727,
"step": 1220
},
{
"epoch": 0.5456965394853593,
"grad_norm": 2.9067760420236737,
"learning_rate": 5.077452342334939e-06,
"loss": 0.7905,
"step": 1230
},
{
"epoch": 0.5501330967169477,
"grad_norm": 2.527266541526558,
"learning_rate": 5e-06,
"loss": 0.7957,
"step": 1240
},
{
"epoch": 0.554569653948536,
"grad_norm": 3.260917632804995,
"learning_rate": 4.922547657665062e-06,
"loss": 0.769,
"step": 1250
},
{
"epoch": 0.5590062111801242,
"grad_norm": 2.9675971347845373,
"learning_rate": 4.845113901492005e-06,
"loss": 0.755,
"step": 1260
},
{
"epoch": 0.5634427684117125,
"grad_norm": 2.8643410190667047,
"learning_rate": 4.767717313182611e-06,
"loss": 0.7949,
"step": 1270
},
{
"epoch": 0.5678793256433008,
"grad_norm": 2.2606470678665485,
"learning_rate": 4.69037646551952e-06,
"loss": 0.7711,
"step": 1280
},
{
"epoch": 0.5723158828748891,
"grad_norm": 2.654813528917904,
"learning_rate": 4.613109917909349e-06,
"loss": 0.7714,
"step": 1290
},
{
"epoch": 0.5767524401064774,
"grad_norm": 2.741384900081702,
"learning_rate": 4.535936211929005e-06,
"loss": 0.7891,
"step": 1300
},
{
"epoch": 0.5811889973380656,
"grad_norm": 2.5799332532291004,
"learning_rate": 4.458873866876282e-06,
"loss": 0.7865,
"step": 1310
},
{
"epoch": 0.5856255545696539,
"grad_norm": 2.3867677434001044,
"learning_rate": 4.3819413753257945e-06,
"loss": 0.7656,
"step": 1320
},
{
"epoch": 0.5900621118012422,
"grad_norm": 2.6115344238359217,
"learning_rate": 4.305157198691351e-06,
"loss": 0.7934,
"step": 1330
},
{
"epoch": 0.5944986690328306,
"grad_norm": 3.0185681719969293,
"learning_rate": 4.228539762795769e-06,
"loss": 0.7669,
"step": 1340
},
{
"epoch": 0.5989352262644189,
"grad_norm": 2.628488411245796,
"learning_rate": 4.152107453449263e-06,
"loss": 0.7445,
"step": 1350
},
{
"epoch": 0.6033717834960071,
"grad_norm": 2.480295060210172,
"learning_rate": 4.075878612037408e-06,
"loss": 0.7765,
"step": 1360
},
{
"epoch": 0.6078083407275954,
"grad_norm": 3.1433976467269416,
"learning_rate": 3.999871531119779e-06,
"loss": 0.7817,
"step": 1370
},
{
"epoch": 0.6122448979591837,
"grad_norm": 2.2970916095173335,
"learning_rate": 3.924104450040308e-06,
"loss": 0.7862,
"step": 1380
},
{
"epoch": 0.616681455190772,
"grad_norm": 2.64995989465673,
"learning_rate": 3.848595550550401e-06,
"loss": 0.7494,
"step": 1390
},
{
"epoch": 0.6211180124223602,
"grad_norm": 2.4179592820902647,
"learning_rate": 3.773362952445889e-06,
"loss": 0.7389,
"step": 1400
},
{
"epoch": 0.6255545696539485,
"grad_norm": 2.7575087976553023,
"learning_rate": 3.6984247092188265e-06,
"loss": 0.7748,
"step": 1410
},
{
"epoch": 0.6299911268855368,
"grad_norm": 2.5672434080502753,
"learning_rate": 3.623798803725223e-06,
"loss": 0.7694,
"step": 1420
},
{
"epoch": 0.6344276841171251,
"grad_norm": 2.370793395981591,
"learning_rate": 3.5495031438697103e-06,
"loss": 0.7638,
"step": 1430
},
{
"epoch": 0.6388642413487134,
"grad_norm": 2.894475317512521,
"learning_rate": 3.475555558308206e-06,
"loss": 0.8022,
"step": 1440
},
{
"epoch": 0.6433007985803016,
"grad_norm": 2.495989068116402,
"learning_rate": 3.401973792169574e-06,
"loss": 0.7626,
"step": 1450
},
{
"epoch": 0.64773735581189,
"grad_norm": 2.3680512452024676,
"learning_rate": 3.3287755027973634e-06,
"loss": 0.7762,
"step": 1460
},
{
"epoch": 0.6521739130434783,
"grad_norm": 2.865587957987949,
"learning_rate": 3.2559782555125793e-06,
"loss": 0.7756,
"step": 1470
},
{
"epoch": 0.6566104702750666,
"grad_norm": 2.5614023023762273,
"learning_rate": 3.1835995193985548e-06,
"loss": 0.7917,
"step": 1480
},
{
"epoch": 0.6610470275066548,
"grad_norm": 2.470759417948881,
"learning_rate": 3.111656663108914e-06,
"loss": 0.766,
"step": 1490
},
{
"epoch": 0.6654835847382431,
"grad_norm": 2.6142907884168958,
"learning_rate": 3.040166950699626e-06,
"loss": 0.7798,
"step": 1500
},
{
"epoch": 0.6699201419698314,
"grad_norm": 2.5564545984732208,
"learning_rate": 2.969147537486175e-06,
"loss": 0.7961,
"step": 1510
},
{
"epoch": 0.6743566992014197,
"grad_norm": 2.771325911908224,
"learning_rate": 2.898615465926814e-06,
"loss": 0.7621,
"step": 1520
},
{
"epoch": 0.678793256433008,
"grad_norm": 2.6108513029462337,
"learning_rate": 2.828587661532901e-06,
"loss": 0.7543,
"step": 1530
},
{
"epoch": 0.6832298136645962,
"grad_norm": 2.421876420317442,
"learning_rate": 2.7590809288073e-06,
"loss": 0.7801,
"step": 1540
},
{
"epoch": 0.6876663708961845,
"grad_norm": 2.2222956329711607,
"learning_rate": 2.6901119472118253e-06,
"loss": 0.7959,
"step": 1550
},
{
"epoch": 0.6921029281277729,
"grad_norm": 2.5970672573393294,
"learning_rate": 2.6216972671646846e-06,
"loss": 0.778,
"step": 1560
},
{
"epoch": 0.6965394853593612,
"grad_norm": 2.7539817190713,
"learning_rate": 2.553853306068888e-06,
"loss": 0.7742,
"step": 1570
},
{
"epoch": 0.7009760425909495,
"grad_norm": 2.7061208568232935,
"learning_rate": 2.4865963443725945e-06,
"loss": 0.7607,
"step": 1580
},
{
"epoch": 0.7054125998225377,
"grad_norm": 2.4787094896326,
"learning_rate": 2.419942521662285e-06,
"loss": 0.7273,
"step": 1590
},
{
"epoch": 0.709849157054126,
"grad_norm": 2.680134848840613,
"learning_rate": 2.3539078327897846e-06,
"loss": 0.7493,
"step": 1600
},
{
"epoch": 0.7142857142857143,
"grad_norm": 2.378836521235688,
"learning_rate": 2.2885081240339813e-06,
"loss": 0.7303,
"step": 1610
},
{
"epoch": 0.7187222715173026,
"grad_norm": 2.419540388712507,
"learning_rate": 2.223759089298214e-06,
"loss": 0.7849,
"step": 1620
},
{
"epoch": 0.7231588287488908,
"grad_norm": 2.7437113821072225,
"learning_rate": 2.159676266344222e-06,
"loss": 0.7491,
"step": 1630
},
{
"epoch": 0.7275953859804791,
"grad_norm": 2.800814230902216,
"learning_rate": 2.096275033063561e-06,
"loss": 0.735,
"step": 1640
},
{
"epoch": 0.7320319432120674,
"grad_norm": 2.5835441454701873,
"learning_rate": 2.033570603787391e-06,
"loss": 0.7848,
"step": 1650
},
{
"epoch": 0.7364685004436557,
"grad_norm": 2.3927048837823848,
"learning_rate": 1.9715780256355014e-06,
"loss": 0.7785,
"step": 1660
},
{
"epoch": 0.7409050576752441,
"grad_norm": 2.451375198464792,
"learning_rate": 1.910312174905477e-06,
"loss": 0.7737,
"step": 1670
},
{
"epoch": 0.7453416149068323,
"grad_norm": 2.3689843619182165,
"learning_rate": 1.849787753502838e-06,
"loss": 0.7604,
"step": 1680
},
{
"epoch": 0.7497781721384206,
"grad_norm": 2.3270088409766103,
"learning_rate": 1.7900192854130465e-06,
"loss": 0.7681,
"step": 1690
},
{
"epoch": 0.7542147293700089,
"grad_norm": 2.4484436031124086,
"learning_rate": 1.7310211132161936e-06,
"loss": 0.7107,
"step": 1700
},
{
"epoch": 0.7586512866015972,
"grad_norm": 2.8269432995587653,
"learning_rate": 1.672807394645236e-06,
"loss": 0.7169,
"step": 1710
},
{
"epoch": 0.7630878438331854,
"grad_norm": 2.4522790771322596,
"learning_rate": 1.6153920991885591e-06,
"loss": 0.7465,
"step": 1720
},
{
"epoch": 0.7675244010647737,
"grad_norm": 2.86172957277401,
"learning_rate": 1.5587890047377512e-06,
"loss": 0.7431,
"step": 1730
},
{
"epoch": 0.771960958296362,
"grad_norm": 2.3353346131787838,
"learning_rate": 1.50301169428133e-06,
"loss": 0.788,
"step": 1740
},
{
"epoch": 0.7763975155279503,
"grad_norm": 2.2356374657269957,
"learning_rate": 1.4480735526452427e-06,
"loss": 0.7722,
"step": 1750
},
{
"epoch": 0.7808340727595386,
"grad_norm": 2.7719896017269257,
"learning_rate": 1.3939877632809279e-06,
"loss": 0.7281,
"step": 1760
},
{
"epoch": 0.7852706299911268,
"grad_norm": 2.0933629026791696,
"learning_rate": 1.340767305101694e-06,
"loss": 0.7493,
"step": 1770
},
{
"epoch": 0.7897071872227152,
"grad_norm": 2.5031162243937595,
"learning_rate": 1.28842494936818e-06,
"loss": 0.7484,
"step": 1780
},
{
"epoch": 0.7941437444543035,
"grad_norm": 2.7578714943858844,
"learning_rate": 1.2369732566236508e-06,
"loss": 0.7525,
"step": 1790
},
{
"epoch": 0.7985803016858918,
"grad_norm": 2.5586093386128406,
"learning_rate": 1.1864245736798618e-06,
"loss": 0.7577,
"step": 1800
},
{
"epoch": 0.80301685891748,
"grad_norm": 2.8639096532826263,
"learning_rate": 1.1367910306541918e-06,
"loss": 0.764,
"step": 1810
},
{
"epoch": 0.8074534161490683,
"grad_norm": 2.8829135266213175,
"learning_rate": 1.088084538058804e-06,
"loss": 0.7613,
"step": 1820
},
{
"epoch": 0.8118899733806566,
"grad_norm": 2.1713022267735202,
"learning_rate": 1.0403167839424883e-06,
"loss": 0.7369,
"step": 1830
},
{
"epoch": 0.8163265306122449,
"grad_norm": 1.895710946494562,
"learning_rate": 9.934992310858944e-07,
"loss": 0.7435,
"step": 1840
},
{
"epoch": 0.8207630878438332,
"grad_norm": 2.2819940109062906,
"learning_rate": 9.476431142508097e-07,
"loss": 0.7413,
"step": 1850
},
{
"epoch": 0.8251996450754214,
"grad_norm": 2.27074516610647,
"learning_rate": 9.027594374841764e-07,
"loss": 0.742,
"step": 1860
},
{
"epoch": 0.8296362023070097,
"grad_norm": 2.9923428531081844,
"learning_rate": 8.58858971477457e-07,
"loss": 0.7529,
"step": 1870
},
{
"epoch": 0.834072759538598,
"grad_norm": 2.297772259893077,
"learning_rate": 8.159522509819995e-07,
"loss": 0.7703,
"step": 1880
},
{
"epoch": 0.8385093167701864,
"grad_norm": 2.7125609059674733,
"learning_rate": 7.740495722810271e-07,
"loss": 0.7242,
"step": 1890
},
{
"epoch": 0.8429458740017747,
"grad_norm": 2.2166873258070523,
"learning_rate": 7.33160990718847e-07,
"loss": 0.7325,
"step": 1900
},
{
"epoch": 0.8473824312333629,
"grad_norm": 3.181933616809904,
"learning_rate": 6.932963182878821e-07,
"loss": 0.7461,
"step": 1910
},
{
"epoch": 0.8518189884649512,
"grad_norm": 2.457506471010056,
"learning_rate": 6.544651212740915e-07,
"loss": 0.7374,
"step": 1920
},
{
"epoch": 0.8562555456965395,
"grad_norm": 2.427607748961866,
"learning_rate": 6.166767179613691e-07,
"loss": 0.7727,
"step": 1930
},
{
"epoch": 0.8606921029281278,
"grad_norm": 3.432067630928332,
"learning_rate": 5.799401763954287e-07,
"loss": 0.7531,
"step": 1940
},
{
"epoch": 0.865128660159716,
"grad_norm": 2.557627658159976,
"learning_rate": 5.442643122077673e-07,
"loss": 0.745,
"step": 1950
},
{
"epoch": 0.8695652173913043,
"grad_norm": 2.5202626872451845,
"learning_rate": 5.096576865001802e-07,
"loss": 0.7603,
"step": 1960
},
{
"epoch": 0.8740017746228926,
"grad_norm": 2.656418699241345,
"learning_rate": 4.7612860379036674e-07,
"loss": 0.7804,
"step": 1970
},
{
"epoch": 0.878438331854481,
"grad_norm": 2.7085851646031665,
"learning_rate": 4.436851100190953e-07,
"loss": 0.7398,
"step": 1980
},
{
"epoch": 0.8828748890860693,
"grad_norm": 2.293307857396717,
"learning_rate": 4.123349906194357e-07,
"loss": 0.7538,
"step": 1990
},
{
"epoch": 0.8873114463176575,
"grad_norm": 2.886861570546567,
"learning_rate": 3.820857686484908e-07,
"loss": 0.7291,
"step": 2000
},
{
"epoch": 0.8917480035492458,
"grad_norm": 2.455668193013591,
"learning_rate": 3.5294470298209817e-07,
"loss": 0.7799,
"step": 2010
},
{
"epoch": 0.8961845607808341,
"grad_norm": 2.708948960335714,
"learning_rate": 3.2491878657292643e-07,
"loss": 0.7492,
"step": 2020
},
{
"epoch": 0.9006211180124224,
"grad_norm": 2.5597091034095194,
"learning_rate": 2.980147447723775e-07,
"loss": 0.7392,
"step": 2030
},
{
"epoch": 0.9050576752440106,
"grad_norm": 2.463909703586001,
"learning_rate": 2.72239033716718e-07,
"loss": 0.7255,
"step": 2040
},
{
"epoch": 0.9094942324755989,
"grad_norm": 2.714509240372469,
"learning_rate": 2.475978387778e-07,
"loss": 0.7412,
"step": 2050
},
{
"epoch": 0.9139307897071872,
"grad_norm": 3.0070268485752103,
"learning_rate": 2.2409707307877226e-07,
"loss": 0.756,
"step": 2060
},
{
"epoch": 0.9183673469387755,
"grad_norm": 2.6554320065677093,
"learning_rate": 2.0174237607510138e-07,
"loss": 0.7385,
"step": 2070
},
{
"epoch": 0.9228039041703638,
"grad_norm": 2.19585204977499,
"learning_rate": 1.805391122012884e-07,
"loss": 0.7528,
"step": 2080
},
{
"epoch": 0.927240461401952,
"grad_norm": 2.246232816291186,
"learning_rate": 1.6049236958356475e-07,
"loss": 0.7765,
"step": 2090
},
{
"epoch": 0.9316770186335404,
"grad_norm": 2.781850208748413,
"learning_rate": 1.416069588189045e-07,
"loss": 0.7558,
"step": 2100
},
{
"epoch": 0.9361135758651287,
"grad_norm": 3.2985753891715945,
"learning_rate": 1.2388741182062348e-07,
"loss": 0.7419,
"step": 2110
},
{
"epoch": 0.940550133096717,
"grad_norm": 3.112687270289018,
"learning_rate": 1.0733798073086498e-07,
"loss": 0.7531,
"step": 2120
},
{
"epoch": 0.9449866903283053,
"grad_norm": 2.5616717115112784,
"learning_rate": 9.1962636900218e-08,
"loss": 0.7605,
"step": 2130
},
{
"epoch": 0.9494232475598935,
"grad_norm": 2.672849952243094,
"learning_rate": 7.776506993471323e-08,
"loss": 0.7408,
"step": 2140
},
{
"epoch": 0.9538598047914818,
"grad_norm": 2.9266883416397724,
"learning_rate": 6.474868681043578e-08,
"loss": 0.7539,
"step": 2150
},
{
"epoch": 0.9582963620230701,
"grad_norm": 2.593151981582472,
"learning_rate": 5.291661105595147e-08,
"loss": 0.7456,
"step": 2160
},
{
"epoch": 0.9627329192546584,
"grad_norm": 2.7307372540316868,
"learning_rate": 4.227168200276077e-08,
"loss": 0.7348,
"step": 2170
},
{
"epoch": 0.9671694764862466,
"grad_norm": 2.40736535660914,
"learning_rate": 3.2816454103945514e-08,
"loss": 0.7683,
"step": 2180
},
{
"epoch": 0.9716060337178349,
"grad_norm": 2.6188698813112774,
"learning_rate": 2.455319632118147e-08,
"loss": 0.7436,
"step": 2190
},
{
"epoch": 0.9760425909494232,
"grad_norm": 2.5274565265510525,
"learning_rate": 1.7483891580253877e-08,
"loss": 0.7647,
"step": 2200
},
{
"epoch": 0.9804791481810116,
"grad_norm": 2.144358539554466,
"learning_rate": 1.161023629522029e-08,
"loss": 0.7297,
"step": 2210
},
{
"epoch": 0.9849157054125999,
"grad_norm": 2.4523679132502316,
"learning_rate": 6.933639961322347e-09,
"loss": 0.7642,
"step": 2220
},
{
"epoch": 0.9893522626441881,
"grad_norm": 2.9622315500466687,
"learning_rate": 3.4552248167507576e-09,
"loss": 0.7555,
"step": 2230
},
{
"epoch": 0.9937888198757764,
"grad_norm": 2.652778627419083,
"learning_rate": 1.1758255733423928e-09,
"loss": 0.7527,
"step": 2240
},
{
"epoch": 0.9982253771073647,
"grad_norm": 2.1355065406932594,
"learning_rate": 9.598921627607116e-11,
"loss": 0.7696,
"step": 2250
},
{
"epoch": 1.0,
"step": 2254,
"total_flos": 3809428622016512.0,
"train_loss": 0.811508896611069,
"train_runtime": 11825.8532,
"train_samples_per_second": 24.397,
"train_steps_per_second": 0.191
}
],
"logging_steps": 10,
"max_steps": 2254,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3809428622016512.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}