zephyr-7b-dpo-lora / trainer_state.json
yihang7's picture
Model save
48fa12f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9993414204074695,
"eval_steps": 100,
"global_step": 1470,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.4013605442176867e-09,
"logits/chosen": -2.8035497665405273,
"logits/rejected": -2.7962629795074463,
"logps/chosen": -211.36532592773438,
"logps/rejected": -294.74530029296875,
"loss": 0.693,
"rewards/accuracies": 0.2265625,
"rewards/chosen": 0.0010320872534066439,
"rewards/margins": 0.0005493065109476447,
"rewards/rejected": 0.0004827805096283555,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 3.4013605442176873e-08,
"logits/chosen": -2.7791833877563477,
"logits/rejected": -2.804030418395996,
"logps/chosen": -240.9124298095703,
"logps/rejected": -369.5000305175781,
"loss": 0.6926,
"rewards/accuracies": 0.4696180522441864,
"rewards/chosen": 0.00038262151065282524,
"rewards/margins": 0.0016919042682275176,
"rewards/rejected": -0.00130928261205554,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 6.802721088435375e-08,
"logits/chosen": -2.7648768424987793,
"logits/rejected": -2.78273606300354,
"logps/chosen": -245.15121459960938,
"logps/rejected": -350.14898681640625,
"loss": 0.6932,
"rewards/accuracies": 0.5023437738418579,
"rewards/chosen": 0.0015446910401806235,
"rewards/margins": 0.0005673653213307261,
"rewards/rejected": 0.000977325951680541,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 1.0204081632653061e-07,
"logits/chosen": -2.8178772926330566,
"logits/rejected": -2.786083221435547,
"logps/chosen": -240.51516723632812,
"logps/rejected": -352.41339111328125,
"loss": 0.6929,
"rewards/accuracies": 0.508593738079071,
"rewards/chosen": 0.0015831931959837675,
"rewards/margins": 0.001221821061335504,
"rewards/rejected": 0.00036137248389422894,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 1.360544217687075e-07,
"logits/chosen": -2.803492307662964,
"logits/rejected": -2.7716286182403564,
"logps/chosen": -235.7887725830078,
"logps/rejected": -359.8059997558594,
"loss": 0.6912,
"rewards/accuracies": 0.5234375,
"rewards/chosen": 0.0033938586711883545,
"rewards/margins": 0.004743899218738079,
"rewards/rejected": -0.001350040198303759,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 1.7006802721088434e-07,
"logits/chosen": -2.8103866577148438,
"logits/rejected": -2.803828716278076,
"logps/chosen": -245.4801483154297,
"logps/rejected": -306.09783935546875,
"loss": 0.6899,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": 0.00749587407335639,
"rewards/margins": 0.007244518492370844,
"rewards/rejected": 0.00025135590112768114,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 2.0408163265306121e-07,
"logits/chosen": -2.7881524562835693,
"logits/rejected": -2.808814525604248,
"logps/chosen": -269.1226501464844,
"logps/rejected": -339.7763977050781,
"loss": 0.6874,
"rewards/accuracies": 0.6015625,
"rewards/chosen": 0.011595133692026138,
"rewards/margins": 0.012225830927491188,
"rewards/rejected": -0.000630697060842067,
"step": 60
},
{
"epoch": 0.05,
"learning_rate": 2.3809523809523806e-07,
"logits/chosen": -2.7982544898986816,
"logits/rejected": -2.765774726867676,
"logps/chosen": -258.89117431640625,
"logps/rejected": -372.06451416015625,
"loss": 0.6864,
"rewards/accuracies": 0.6148437261581421,
"rewards/chosen": 0.013396549038589,
"rewards/margins": 0.014446373097598553,
"rewards/rejected": -0.0010498259216547012,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 2.72108843537415e-07,
"logits/chosen": -2.8109402656555176,
"logits/rejected": -2.7843804359436035,
"logps/chosen": -244.15817260742188,
"logps/rejected": -369.6734313964844,
"loss": 0.6821,
"rewards/accuracies": 0.6742187738418579,
"rewards/chosen": 0.02298940345644951,
"rewards/margins": 0.023194540292024612,
"rewards/rejected": -0.0002051351184491068,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 3.0612244897959183e-07,
"logits/chosen": -2.8090157508850098,
"logits/rejected": -2.7707672119140625,
"logps/chosen": -222.1091766357422,
"logps/rejected": -365.6192321777344,
"loss": 0.6766,
"rewards/accuracies": 0.7242187261581421,
"rewards/chosen": 0.030939970165491104,
"rewards/margins": 0.03436826914548874,
"rewards/rejected": -0.0034283031709492207,
"step": 90
},
{
"epoch": 0.07,
"learning_rate": 3.401360544217687e-07,
"logits/chosen": -2.7735049724578857,
"logits/rejected": -2.7935452461242676,
"logps/chosen": -251.73049926757812,
"logps/rejected": -388.00115966796875,
"loss": 0.6728,
"rewards/accuracies": 0.735156238079071,
"rewards/chosen": 0.03729977086186409,
"rewards/margins": 0.04232599213719368,
"rewards/rejected": -0.0050262222066521645,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 3.741496598639456e-07,
"logits/chosen": -2.797628164291382,
"logits/rejected": -2.784834384918213,
"logps/chosen": -255.72265625,
"logps/rejected": -349.15985107421875,
"loss": 0.6651,
"rewards/accuracies": 0.788281261920929,
"rewards/chosen": 0.05175922438502312,
"rewards/margins": 0.05847715586423874,
"rewards/rejected": -0.00671793520450592,
"step": 110
},
{
"epoch": 0.08,
"learning_rate": 4.0816326530612243e-07,
"logits/chosen": -2.7973737716674805,
"logits/rejected": -2.7825686931610107,
"logps/chosen": -252.3303985595703,
"logps/rejected": -348.4207458496094,
"loss": 0.6604,
"rewards/accuracies": 0.813281238079071,
"rewards/chosen": 0.06004839017987251,
"rewards/margins": 0.06873828917741776,
"rewards/rejected": -0.008689895272254944,
"step": 120
},
{
"epoch": 0.09,
"learning_rate": 4.421768707482993e-07,
"logits/chosen": -2.7856903076171875,
"logits/rejected": -2.8103625774383545,
"logps/chosen": -248.4453125,
"logps/rejected": -316.520263671875,
"loss": 0.6528,
"rewards/accuracies": 0.8179687261581421,
"rewards/chosen": 0.07609430700540543,
"rewards/margins": 0.08578468859195709,
"rewards/rejected": -0.00969038438051939,
"step": 130
},
{
"epoch": 0.1,
"learning_rate": 4.761904761904761e-07,
"logits/chosen": -2.7964794635772705,
"logits/rejected": -2.8038413524627686,
"logps/chosen": -251.0780029296875,
"logps/rejected": -380.4024353027344,
"loss": 0.6409,
"rewards/accuracies": 0.842968761920929,
"rewards/chosen": 0.10089793056249619,
"rewards/margins": 0.11140058934688568,
"rewards/rejected": -0.010502668097615242,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 4.988662131519274e-07,
"logits/chosen": -2.7733452320098877,
"logits/rejected": -2.799926280975342,
"logps/chosen": -259.34686279296875,
"logps/rejected": -335.1527404785156,
"loss": 0.6297,
"rewards/accuracies": 0.8539062738418579,
"rewards/chosen": 0.12008102238178253,
"rewards/margins": 0.13700444996356964,
"rewards/rejected": -0.016923416405916214,
"step": 150
},
{
"epoch": 0.11,
"learning_rate": 4.950869236583522e-07,
"logits/chosen": -2.774165153503418,
"logits/rejected": -2.7881526947021484,
"logps/chosen": -245.5338134765625,
"logps/rejected": -338.31597900390625,
"loss": 0.6201,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.14051470160484314,
"rewards/margins": 0.1599283218383789,
"rewards/rejected": -0.019413620233535767,
"step": 160
},
{
"epoch": 0.12,
"learning_rate": 4.91307634164777e-07,
"logits/chosen": -2.811603546142578,
"logits/rejected": -2.8174936771392822,
"logps/chosen": -260.7558898925781,
"logps/rejected": -356.88153076171875,
"loss": 0.6041,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.1749168038368225,
"rewards/margins": 0.19711166620254517,
"rewards/rejected": -0.022194867953658104,
"step": 170
},
{
"epoch": 0.12,
"learning_rate": 4.875283446712018e-07,
"logits/chosen": -2.7915146350860596,
"logits/rejected": -2.7889480590820312,
"logps/chosen": -264.36138916015625,
"logps/rejected": -353.7435607910156,
"loss": 0.5926,
"rewards/accuracies": 0.883593738079071,
"rewards/chosen": 0.19911792874336243,
"rewards/margins": 0.22633683681488037,
"rewards/rejected": -0.02721891924738884,
"step": 180
},
{
"epoch": 0.13,
"learning_rate": 4.837490551776266e-07,
"logits/chosen": -2.7990036010742188,
"logits/rejected": -2.7916808128356934,
"logps/chosen": -257.4069519042969,
"logps/rejected": -372.6297302246094,
"loss": 0.5799,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.22631244361400604,
"rewards/margins": 0.2581940293312073,
"rewards/rejected": -0.031881578266620636,
"step": 190
},
{
"epoch": 0.14,
"learning_rate": 4.799697656840514e-07,
"logits/chosen": -2.7753312587738037,
"logits/rejected": -2.7730696201324463,
"logps/chosen": -259.2568054199219,
"logps/rejected": -390.26995849609375,
"loss": 0.564,
"rewards/accuracies": 0.889843761920929,
"rewards/chosen": 0.25861743092536926,
"rewards/margins": 0.30055442452430725,
"rewards/rejected": -0.0419369637966156,
"step": 200
},
{
"epoch": 0.14,
"learning_rate": 4.761904761904761e-07,
"logits/chosen": -2.7830989360809326,
"logits/rejected": -2.7885472774505615,
"logps/chosen": -229.49685668945312,
"logps/rejected": -346.35784912109375,
"loss": 0.5551,
"rewards/accuracies": 0.91015625,
"rewards/chosen": 0.28561651706695557,
"rewards/margins": 0.32180091738700867,
"rewards/rejected": -0.03618443384766579,
"step": 210
},
{
"epoch": 0.15,
"learning_rate": 4.7241118669690096e-07,
"logits/chosen": -2.7914628982543945,
"logits/rejected": -2.7812819480895996,
"logps/chosen": -277.1968078613281,
"logps/rejected": -334.34124755859375,
"loss": 0.5473,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 0.30997538566589355,
"rewards/margins": 0.3486320972442627,
"rewards/rejected": -0.038656704127788544,
"step": 220
},
{
"epoch": 0.16,
"learning_rate": 4.6863189720332574e-07,
"logits/chosen": -2.7915186882019043,
"logits/rejected": -2.7635109424591064,
"logps/chosen": -230.6345672607422,
"logps/rejected": -366.45855712890625,
"loss": 0.5283,
"rewards/accuracies": 0.901562511920929,
"rewards/chosen": 0.3480406403541565,
"rewards/margins": 0.3980127274990082,
"rewards/rejected": -0.04997207969427109,
"step": 230
},
{
"epoch": 0.16,
"learning_rate": 4.648526077097505e-07,
"logits/chosen": -2.8176677227020264,
"logits/rejected": -2.8094589710235596,
"logps/chosen": -255.73318481445312,
"logps/rejected": -356.473876953125,
"loss": 0.5141,
"rewards/accuracies": 0.905468761920929,
"rewards/chosen": 0.38035809993743896,
"rewards/margins": 0.4426742494106293,
"rewards/rejected": -0.062316179275512695,
"step": 240
},
{
"epoch": 0.17,
"learning_rate": 4.6107331821617536e-07,
"logits/chosen": -2.778831958770752,
"logits/rejected": -2.7532734870910645,
"logps/chosen": -260.0787658691406,
"logps/rejected": -382.69403076171875,
"loss": 0.5037,
"rewards/accuracies": 0.9078124761581421,
"rewards/chosen": 0.4094300866127014,
"rewards/margins": 0.4735100269317627,
"rewards/rejected": -0.06407993286848068,
"step": 250
},
{
"epoch": 0.18,
"learning_rate": 4.5729402872260014e-07,
"logits/chosen": -2.7875959873199463,
"logits/rejected": -2.789522647857666,
"logps/chosen": -245.36215209960938,
"logps/rejected": -398.8630676269531,
"loss": 0.4946,
"rewards/accuracies": 0.897656261920929,
"rewards/chosen": 0.43164581060409546,
"rewards/margins": 0.506696879863739,
"rewards/rejected": -0.07505108416080475,
"step": 260
},
{
"epoch": 0.18,
"learning_rate": 4.535147392290249e-07,
"logits/chosen": -2.7784600257873535,
"logits/rejected": -2.743320941925049,
"logps/chosen": -240.0518035888672,
"logps/rejected": -373.5130920410156,
"loss": 0.4891,
"rewards/accuracies": 0.89453125,
"rewards/chosen": 0.45601949095726013,
"rewards/margins": 0.5297552347183228,
"rewards/rejected": -0.07373576611280441,
"step": 270
},
{
"epoch": 0.19,
"learning_rate": 4.497354497354497e-07,
"logits/chosen": -2.777036190032959,
"logits/rejected": -2.7678191661834717,
"logps/chosen": -264.9656677246094,
"logps/rejected": -373.12042236328125,
"loss": 0.4766,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 0.47401291131973267,
"rewards/margins": 0.5673891305923462,
"rewards/rejected": -0.09337621927261353,
"step": 280
},
{
"epoch": 0.2,
"learning_rate": 4.459561602418745e-07,
"logits/chosen": -2.7813751697540283,
"logits/rejected": -2.7827224731445312,
"logps/chosen": -239.7397918701172,
"logps/rejected": -392.6272888183594,
"loss": 0.4603,
"rewards/accuracies": 0.9117187261581421,
"rewards/chosen": 0.5112585425376892,
"rewards/margins": 0.6238077878952026,
"rewards/rejected": -0.11254926025867462,
"step": 290
},
{
"epoch": 0.2,
"learning_rate": 4.421768707482993e-07,
"logits/chosen": -2.784381628036499,
"logits/rejected": -2.7823455333709717,
"logps/chosen": -247.23696899414062,
"logps/rejected": -340.01971435546875,
"loss": 0.4569,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": 0.5431731939315796,
"rewards/margins": 0.6343038082122803,
"rewards/rejected": -0.09113059937953949,
"step": 300
},
{
"epoch": 0.21,
"learning_rate": 4.383975812547241e-07,
"logits/chosen": -2.7919013500213623,
"logits/rejected": -2.7927372455596924,
"logps/chosen": -244.9982147216797,
"logps/rejected": -345.5526428222656,
"loss": 0.4422,
"rewards/accuracies": 0.922656238079071,
"rewards/chosen": 0.5760600566864014,
"rewards/margins": 0.6899352669715881,
"rewards/rejected": -0.11387525498867035,
"step": 310
},
{
"epoch": 0.22,
"learning_rate": 4.346182917611489e-07,
"logits/chosen": -2.786698341369629,
"logits/rejected": -2.7934978008270264,
"logps/chosen": -255.37142944335938,
"logps/rejected": -399.12957763671875,
"loss": 0.4344,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.5843140482902527,
"rewards/margins": 0.7244275808334351,
"rewards/rejected": -0.1401134431362152,
"step": 320
},
{
"epoch": 0.22,
"learning_rate": 4.308390022675737e-07,
"logits/chosen": -2.7745113372802734,
"logits/rejected": -2.7805206775665283,
"logps/chosen": -252.92514038085938,
"logps/rejected": -392.51165771484375,
"loss": 0.4332,
"rewards/accuracies": 0.9046875238418579,
"rewards/chosen": 0.5970828533172607,
"rewards/margins": 0.7323796153068542,
"rewards/rejected": -0.1352967619895935,
"step": 330
},
{
"epoch": 0.23,
"learning_rate": 4.270597127739985e-07,
"logits/chosen": -2.783926486968994,
"logits/rejected": -2.7875866889953613,
"logps/chosen": -250.8353729248047,
"logps/rejected": -335.03265380859375,
"loss": 0.4175,
"rewards/accuracies": 0.9164062738418579,
"rewards/chosen": 0.6492675542831421,
"rewards/margins": 0.7875067591667175,
"rewards/rejected": -0.13823917508125305,
"step": 340
},
{
"epoch": 0.24,
"learning_rate": 4.2328042328042324e-07,
"logits/chosen": -2.7828190326690674,
"logits/rejected": -2.772052764892578,
"logps/chosen": -236.33706665039062,
"logps/rejected": -370.28399658203125,
"loss": 0.4152,
"rewards/accuracies": 0.9242187738418579,
"rewards/chosen": 0.662378191947937,
"rewards/margins": 0.7986767888069153,
"rewards/rejected": -0.13629861176013947,
"step": 350
},
{
"epoch": 0.24,
"learning_rate": 4.19501133786848e-07,
"logits/chosen": -2.780648946762085,
"logits/rejected": -2.771820545196533,
"logps/chosen": -228.22445678710938,
"logps/rejected": -390.63751220703125,
"loss": 0.4051,
"rewards/accuracies": 0.9140625,
"rewards/chosen": 0.6962443590164185,
"rewards/margins": 0.8446155786514282,
"rewards/rejected": -0.1483711302280426,
"step": 360
},
{
"epoch": 0.25,
"learning_rate": 4.1572184429327286e-07,
"logits/chosen": -2.8088645935058594,
"logits/rejected": -2.7826154232025146,
"logps/chosen": -255.2318572998047,
"logps/rejected": -344.69183349609375,
"loss": 0.3908,
"rewards/accuracies": 0.9203125238418579,
"rewards/chosen": 0.7306076288223267,
"rewards/margins": 0.900040328502655,
"rewards/rejected": -0.16943258047103882,
"step": 370
},
{
"epoch": 0.26,
"learning_rate": 4.1194255479969764e-07,
"logits/chosen": -2.7837393283843994,
"logits/rejected": -2.754739284515381,
"logps/chosen": -252.39779663085938,
"logps/rejected": -347.7734069824219,
"loss": 0.4019,
"rewards/accuracies": 0.907031238079071,
"rewards/chosen": 0.7146260738372803,
"rewards/margins": 0.8642898797988892,
"rewards/rejected": -0.14966385066509247,
"step": 380
},
{
"epoch": 0.27,
"learning_rate": 4.0816326530612243e-07,
"logits/chosen": -2.793994426727295,
"logits/rejected": -2.789456605911255,
"logps/chosen": -250.083984375,
"logps/rejected": -345.2536315917969,
"loss": 0.3843,
"rewards/accuracies": 0.9203125238418579,
"rewards/chosen": 0.760775089263916,
"rewards/margins": 0.9284068942070007,
"rewards/rejected": -0.1676318198442459,
"step": 390
},
{
"epoch": 0.27,
"learning_rate": 4.0438397581254726e-07,
"logits/chosen": -2.7863235473632812,
"logits/rejected": -2.7660741806030273,
"logps/chosen": -243.2860565185547,
"logps/rejected": -375.15283203125,
"loss": 0.3736,
"rewards/accuracies": 0.9195312261581421,
"rewards/chosen": 0.7728086113929749,
"rewards/margins": 0.9798704385757446,
"rewards/rejected": -0.20706184208393097,
"step": 400
},
{
"epoch": 0.28,
"learning_rate": 4.0060468631897205e-07,
"logits/chosen": -2.7740797996520996,
"logits/rejected": -2.787078857421875,
"logps/chosen": -231.3814239501953,
"logps/rejected": -373.4275817871094,
"loss": 0.3779,
"rewards/accuracies": 0.9140625,
"rewards/chosen": 0.786165177822113,
"rewards/margins": 0.9645744562149048,
"rewards/rejected": -0.1784091293811798,
"step": 410
},
{
"epoch": 0.29,
"learning_rate": 3.968253968253968e-07,
"logits/chosen": -2.7854466438293457,
"logits/rejected": -2.782599449157715,
"logps/chosen": -234.27853393554688,
"logps/rejected": -341.40106201171875,
"loss": 0.3758,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.8017100095748901,
"rewards/margins": 0.9820283651351929,
"rewards/rejected": -0.1803184449672699,
"step": 420
},
{
"epoch": 0.29,
"learning_rate": 3.930461073318216e-07,
"logits/chosen": -2.7634427547454834,
"logits/rejected": -2.7768495082855225,
"logps/chosen": -230.73318481445312,
"logps/rejected": -427.71917724609375,
"loss": 0.3665,
"rewards/accuracies": 0.91796875,
"rewards/chosen": 0.8091424703598022,
"rewards/margins": 1.025179386138916,
"rewards/rejected": -0.2160368263721466,
"step": 430
},
{
"epoch": 0.3,
"learning_rate": 3.892668178382464e-07,
"logits/chosen": -2.774629592895508,
"logits/rejected": -2.7814247608184814,
"logps/chosen": -253.4683074951172,
"logps/rejected": -386.40216064453125,
"loss": 0.3495,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.8851088285446167,
"rewards/margins": 1.123652696609497,
"rewards/rejected": -0.23854386806488037,
"step": 440
},
{
"epoch": 0.31,
"learning_rate": 3.854875283446712e-07,
"logits/chosen": -2.766551971435547,
"logits/rejected": -2.7709641456604004,
"logps/chosen": -271.8524475097656,
"logps/rejected": -379.4809265136719,
"loss": 0.3575,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.8432048559188843,
"rewards/margins": 1.0976295471191406,
"rewards/rejected": -0.254424512386322,
"step": 450
},
{
"epoch": 0.31,
"learning_rate": 3.8170823885109596e-07,
"logits/chosen": -2.8009865283966064,
"logits/rejected": -2.7705283164978027,
"logps/chosen": -241.07632446289062,
"logps/rejected": -366.87127685546875,
"loss": 0.3459,
"rewards/accuracies": 0.9281250238418579,
"rewards/chosen": 0.9000816345214844,
"rewards/margins": 1.1292930841445923,
"rewards/rejected": -0.22921133041381836,
"step": 460
},
{
"epoch": 0.32,
"learning_rate": 3.779289493575208e-07,
"logits/chosen": -2.7855477333068848,
"logits/rejected": -2.771469831466675,
"logps/chosen": -248.2216033935547,
"logps/rejected": -379.58709716796875,
"loss": 0.3488,
"rewards/accuracies": 0.913281261920929,
"rewards/chosen": 0.8979974985122681,
"rewards/margins": 1.1383633613586426,
"rewards/rejected": -0.2403658926486969,
"step": 470
},
{
"epoch": 0.33,
"learning_rate": 3.741496598639456e-07,
"logits/chosen": -2.783979892730713,
"logits/rejected": -2.787400722503662,
"logps/chosen": -234.78939819335938,
"logps/rejected": -391.0784912109375,
"loss": 0.3396,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.8895782232284546,
"rewards/margins": 1.1713939905166626,
"rewards/rejected": -0.281815767288208,
"step": 480
},
{
"epoch": 0.33,
"learning_rate": 3.703703703703703e-07,
"logits/chosen": -2.7796401977539062,
"logits/rejected": -2.78939151763916,
"logps/chosen": -255.79556274414062,
"logps/rejected": -376.7617492675781,
"loss": 0.3496,
"rewards/accuracies": 0.91796875,
"rewards/chosen": 0.8880151510238647,
"rewards/margins": 1.1511998176574707,
"rewards/rejected": -0.26318463683128357,
"step": 490
},
{
"epoch": 0.34,
"learning_rate": 3.6659108087679515e-07,
"logits/chosen": -2.784447193145752,
"logits/rejected": -2.7811279296875,
"logps/chosen": -240.26943969726562,
"logps/rejected": -373.43585205078125,
"loss": 0.3317,
"rewards/accuracies": 0.9242187738418579,
"rewards/chosen": 0.9207477569580078,
"rewards/margins": 1.2141565084457397,
"rewards/rejected": -0.2934088110923767,
"step": 500
},
{
"epoch": 0.35,
"learning_rate": 3.6281179138321993e-07,
"logits/chosen": -2.7936480045318604,
"logits/rejected": -2.7741034030914307,
"logps/chosen": -253.25625610351562,
"logps/rejected": -388.1740417480469,
"loss": 0.3307,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.9261225461959839,
"rewards/margins": 1.2367761135101318,
"rewards/rejected": -0.310653418302536,
"step": 510
},
{
"epoch": 0.35,
"learning_rate": 3.590325018896447e-07,
"logits/chosen": -2.764971971511841,
"logits/rejected": -2.779900074005127,
"logps/chosen": -277.50433349609375,
"logps/rejected": -390.9405822753906,
"loss": 0.3301,
"rewards/accuracies": 0.9203125238418579,
"rewards/chosen": 0.9403823614120483,
"rewards/margins": 1.250135898590088,
"rewards/rejected": -0.3097533881664276,
"step": 520
},
{
"epoch": 0.36,
"learning_rate": 3.5525321239606955e-07,
"logits/chosen": -2.7859063148498535,
"logits/rejected": -2.7852673530578613,
"logps/chosen": -240.83847045898438,
"logps/rejected": -329.5592346191406,
"loss": 0.3185,
"rewards/accuracies": 0.9296875,
"rewards/chosen": 0.998257040977478,
"rewards/margins": 1.3062750101089478,
"rewards/rejected": -0.30801790952682495,
"step": 530
},
{
"epoch": 0.37,
"learning_rate": 3.5147392290249433e-07,
"logits/chosen": -2.7856059074401855,
"logits/rejected": -2.7904558181762695,
"logps/chosen": -256.13116455078125,
"logps/rejected": -359.0440673828125,
"loss": 0.3201,
"rewards/accuracies": 0.917187511920929,
"rewards/chosen": 0.9812418222427368,
"rewards/margins": 1.2980186939239502,
"rewards/rejected": -0.316776841878891,
"step": 540
},
{
"epoch": 0.37,
"learning_rate": 3.4769463340891906e-07,
"logits/chosen": -2.7746291160583496,
"logits/rejected": -2.8083655834198,
"logps/chosen": -243.3596649169922,
"logps/rejected": -381.6620788574219,
"loss": 0.321,
"rewards/accuracies": 0.921093761920929,
"rewards/chosen": 0.9785689115524292,
"rewards/margins": 1.3102028369903564,
"rewards/rejected": -0.33163395524024963,
"step": 550
},
{
"epoch": 0.38,
"learning_rate": 3.439153439153439e-07,
"logits/chosen": -2.788200616836548,
"logits/rejected": -2.806088924407959,
"logps/chosen": -243.46371459960938,
"logps/rejected": -353.0728454589844,
"loss": 0.3037,
"rewards/accuracies": 0.9281250238418579,
"rewards/chosen": 1.0423057079315186,
"rewards/margins": 1.40134596824646,
"rewards/rejected": -0.35903996229171753,
"step": 560
},
{
"epoch": 0.39,
"learning_rate": 3.401360544217687e-07,
"logits/chosen": -2.8205642700195312,
"logits/rejected": -2.75651216506958,
"logps/chosen": -225.49380493164062,
"logps/rejected": -383.3102111816406,
"loss": 0.2961,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.0518951416015625,
"rewards/margins": 1.416092872619629,
"rewards/rejected": -0.3641977310180664,
"step": 570
},
{
"epoch": 0.39,
"learning_rate": 3.3635676492819346e-07,
"logits/chosen": -2.778111696243286,
"logits/rejected": -2.8062729835510254,
"logps/chosen": -241.8183135986328,
"logps/rejected": -360.12677001953125,
"loss": 0.3026,
"rewards/accuracies": 0.928906261920929,
"rewards/chosen": 1.0336360931396484,
"rewards/margins": 1.3975627422332764,
"rewards/rejected": -0.3639264702796936,
"step": 580
},
{
"epoch": 0.4,
"learning_rate": 3.325774754346183e-07,
"logits/chosen": -2.7760305404663086,
"logits/rejected": -2.7639145851135254,
"logps/chosen": -263.2132568359375,
"logps/rejected": -326.3753356933594,
"loss": 0.3079,
"rewards/accuracies": 0.917187511920929,
"rewards/chosen": 1.0236365795135498,
"rewards/margins": 1.3882102966308594,
"rewards/rejected": -0.36457380652427673,
"step": 590
},
{
"epoch": 0.41,
"learning_rate": 3.287981859410431e-07,
"logits/chosen": -2.8040480613708496,
"logits/rejected": -2.781839370727539,
"logps/chosen": -232.688720703125,
"logps/rejected": -341.75372314453125,
"loss": 0.294,
"rewards/accuracies": 0.92578125,
"rewards/chosen": 1.0763448476791382,
"rewards/margins": 1.459729790687561,
"rewards/rejected": -0.38338491320610046,
"step": 600
},
{
"epoch": 0.41,
"learning_rate": 3.2501889644746787e-07,
"logits/chosen": -2.797874927520752,
"logits/rejected": -2.748481512069702,
"logps/chosen": -232.8326873779297,
"logps/rejected": -369.7907409667969,
"loss": 0.2837,
"rewards/accuracies": 0.938281238079071,
"rewards/chosen": 1.101806640625,
"rewards/margins": 1.498957633972168,
"rewards/rejected": -0.39715105295181274,
"step": 610
},
{
"epoch": 0.42,
"learning_rate": 3.2123960695389265e-07,
"logits/chosen": -2.780925989151001,
"logits/rejected": -2.735792636871338,
"logps/chosen": -222.20596313476562,
"logps/rejected": -380.5815124511719,
"loss": 0.2935,
"rewards/accuracies": 0.921093761920929,
"rewards/chosen": 1.0859084129333496,
"rewards/margins": 1.4819860458374023,
"rewards/rejected": -0.39607763290405273,
"step": 620
},
{
"epoch": 0.43,
"learning_rate": 3.1746031746031743e-07,
"logits/chosen": -2.7768056392669678,
"logits/rejected": -2.764166831970215,
"logps/chosen": -236.9914093017578,
"logps/rejected": -345.6325378417969,
"loss": 0.2895,
"rewards/accuracies": 0.932812511920929,
"rewards/chosen": 1.1014459133148193,
"rewards/margins": 1.5069670677185059,
"rewards/rejected": -0.40552106499671936,
"step": 630
},
{
"epoch": 0.44,
"learning_rate": 3.136810279667422e-07,
"logits/chosen": -2.7987258434295654,
"logits/rejected": -2.8054118156433105,
"logps/chosen": -235.97109985351562,
"logps/rejected": -330.56439208984375,
"loss": 0.2775,
"rewards/accuracies": 0.93359375,
"rewards/chosen": 1.1580729484558105,
"rewards/margins": 1.5699806213378906,
"rewards/rejected": -0.4119076728820801,
"step": 640
},
{
"epoch": 0.44,
"learning_rate": 3.0990173847316705e-07,
"logits/chosen": -2.7858521938323975,
"logits/rejected": -2.779346466064453,
"logps/chosen": -257.5158386230469,
"logps/rejected": -322.25103759765625,
"loss": 0.287,
"rewards/accuracies": 0.9195312261581421,
"rewards/chosen": 1.1325995922088623,
"rewards/margins": 1.5360453128814697,
"rewards/rejected": -0.40344563126564026,
"step": 650
},
{
"epoch": 0.45,
"learning_rate": 3.0612244897959183e-07,
"logits/chosen": -2.7976508140563965,
"logits/rejected": -2.8010151386260986,
"logps/chosen": -219.1446533203125,
"logps/rejected": -315.2838439941406,
"loss": 0.2703,
"rewards/accuracies": 0.9453125,
"rewards/chosen": 1.1511547565460205,
"rewards/margins": 1.5933144092559814,
"rewards/rejected": -0.44215965270996094,
"step": 660
},
{
"epoch": 0.46,
"learning_rate": 3.023431594860166e-07,
"logits/chosen": -2.767582416534424,
"logits/rejected": -2.8024327754974365,
"logps/chosen": -237.21578979492188,
"logps/rejected": -314.68377685546875,
"loss": 0.2637,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.1508355140686035,
"rewards/margins": 1.6350256204605103,
"rewards/rejected": -0.48419007658958435,
"step": 670
},
{
"epoch": 0.46,
"learning_rate": 2.9856386999244145e-07,
"logits/chosen": -2.7926082611083984,
"logits/rejected": -2.780251979827881,
"logps/chosen": -244.810302734375,
"logps/rejected": -347.9936828613281,
"loss": 0.2784,
"rewards/accuracies": 0.930468738079071,
"rewards/chosen": 1.1081712245941162,
"rewards/margins": 1.5819367170333862,
"rewards/rejected": -0.47376567125320435,
"step": 680
},
{
"epoch": 0.47,
"learning_rate": 2.947845804988662e-07,
"logits/chosen": -2.771953821182251,
"logits/rejected": -2.768907070159912,
"logps/chosen": -248.50332641601562,
"logps/rejected": -360.6126403808594,
"loss": 0.2897,
"rewards/accuracies": 0.921875,
"rewards/chosen": 1.0936378240585327,
"rewards/margins": 1.5781736373901367,
"rewards/rejected": -0.4845358729362488,
"step": 690
},
{
"epoch": 0.48,
"learning_rate": 2.9100529100529097e-07,
"logits/chosen": -2.7748546600341797,
"logits/rejected": -2.7857470512390137,
"logps/chosen": -227.1557159423828,
"logps/rejected": -390.3030700683594,
"loss": 0.2597,
"rewards/accuracies": 0.9320312738418579,
"rewards/chosen": 1.1781264543533325,
"rewards/margins": 1.7109047174453735,
"rewards/rejected": -0.5327781438827515,
"step": 700
},
{
"epoch": 0.48,
"learning_rate": 2.872260015117158e-07,
"logits/chosen": -2.77628231048584,
"logits/rejected": -2.7869679927825928,
"logps/chosen": -245.57839965820312,
"logps/rejected": -326.86212158203125,
"loss": 0.2613,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.1419804096221924,
"rewards/margins": 1.6727325916290283,
"rewards/rejected": -0.5307522416114807,
"step": 710
},
{
"epoch": 0.49,
"learning_rate": 2.834467120181406e-07,
"logits/chosen": -2.7608537673950195,
"logits/rejected": -2.7646660804748535,
"logps/chosen": -241.5836944580078,
"logps/rejected": -379.62860107421875,
"loss": 0.2738,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.1373337507247925,
"rewards/margins": 1.6593284606933594,
"rewards/rejected": -0.5219947099685669,
"step": 720
},
{
"epoch": 0.5,
"learning_rate": 2.7966742252456537e-07,
"logits/chosen": -2.777465343475342,
"logits/rejected": -2.801975965499878,
"logps/chosen": -227.2059326171875,
"logps/rejected": -369.7891540527344,
"loss": 0.2554,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.209241271018982,
"rewards/margins": 1.7282158136367798,
"rewards/rejected": -0.5189744234085083,
"step": 730
},
{
"epoch": 0.5,
"learning_rate": 2.758881330309902e-07,
"logits/chosen": -2.7639384269714355,
"logits/rejected": -2.7558932304382324,
"logps/chosen": -255.972412109375,
"logps/rejected": -410.17431640625,
"loss": 0.2766,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 1.1485779285430908,
"rewards/margins": 1.672486662864685,
"rewards/rejected": -0.5239086151123047,
"step": 740
},
{
"epoch": 0.51,
"learning_rate": 2.72108843537415e-07,
"logits/chosen": -2.7429962158203125,
"logits/rejected": -2.7603325843811035,
"logps/chosen": -248.05697631835938,
"logps/rejected": -382.65863037109375,
"loss": 0.2692,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.1628259420394897,
"rewards/margins": 1.7002170085906982,
"rewards/rejected": -0.5373910665512085,
"step": 750
},
{
"epoch": 0.52,
"learning_rate": 2.683295540438397e-07,
"logits/chosen": -2.7732365131378174,
"logits/rejected": -2.7899222373962402,
"logps/chosen": -230.82577514648438,
"logps/rejected": -356.39349365234375,
"loss": 0.262,
"rewards/accuracies": 0.93359375,
"rewards/chosen": 1.1516262292861938,
"rewards/margins": 1.7132419347763062,
"rewards/rejected": -0.5616158843040466,
"step": 760
},
{
"epoch": 0.52,
"learning_rate": 2.645502645502645e-07,
"logits/chosen": -2.764669895172119,
"logits/rejected": -2.7641212940216064,
"logps/chosen": -246.3456573486328,
"logps/rejected": -370.99896240234375,
"loss": 0.2701,
"rewards/accuracies": 0.9164062738418579,
"rewards/chosen": 1.191197395324707,
"rewards/margins": 1.7232650518417358,
"rewards/rejected": -0.5320678949356079,
"step": 770
},
{
"epoch": 0.53,
"learning_rate": 2.6077097505668934e-07,
"logits/chosen": -2.7817633152008057,
"logits/rejected": -2.7922616004943848,
"logps/chosen": -256.2757873535156,
"logps/rejected": -356.1881408691406,
"loss": 0.2571,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.2059863805770874,
"rewards/margins": 1.7701711654663086,
"rewards/rejected": -0.5641847848892212,
"step": 780
},
{
"epoch": 0.54,
"learning_rate": 2.569916855631141e-07,
"logits/chosen": -2.7949161529541016,
"logits/rejected": -2.800379514694214,
"logps/chosen": -219.2698516845703,
"logps/rejected": -384.794189453125,
"loss": 0.2512,
"rewards/accuracies": 0.9296875,
"rewards/chosen": 1.2469325065612793,
"rewards/margins": 1.8279892206192017,
"rewards/rejected": -0.5810565948486328,
"step": 790
},
{
"epoch": 0.54,
"learning_rate": 2.532123960695389e-07,
"logits/chosen": -2.7864224910736084,
"logits/rejected": -2.8051304817199707,
"logps/chosen": -243.42105102539062,
"logps/rejected": -376.7647399902344,
"loss": 0.2455,
"rewards/accuracies": 0.9398437738418579,
"rewards/chosen": 1.25649094581604,
"rewards/margins": 1.8448721170425415,
"rewards/rejected": -0.5883811712265015,
"step": 800
},
{
"epoch": 0.55,
"learning_rate": 2.494331065759637e-07,
"logits/chosen": -2.7794528007507324,
"logits/rejected": -2.787205457687378,
"logps/chosen": -239.23776245117188,
"logps/rejected": -348.8122863769531,
"loss": 0.2407,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": 1.279539942741394,
"rewards/margins": 1.9069591760635376,
"rewards/rejected": -0.627419114112854,
"step": 810
},
{
"epoch": 0.56,
"learning_rate": 2.456538170823885e-07,
"logits/chosen": -2.7908012866973877,
"logits/rejected": -2.775237798690796,
"logps/chosen": -237.18807983398438,
"logps/rejected": -347.73028564453125,
"loss": 0.2346,
"rewards/accuracies": 0.938281238079071,
"rewards/chosen": 1.2818529605865479,
"rewards/margins": 1.891405701637268,
"rewards/rejected": -0.6095527410507202,
"step": 820
},
{
"epoch": 0.56,
"learning_rate": 2.418745275888133e-07,
"logits/chosen": -2.788677453994751,
"logits/rejected": -2.759464740753174,
"logps/chosen": -244.3543243408203,
"logps/rejected": -384.2773742675781,
"loss": 0.249,
"rewards/accuracies": 0.9273437261581421,
"rewards/chosen": 1.2608978748321533,
"rewards/margins": 1.8487341403961182,
"rewards/rejected": -0.5878363251686096,
"step": 830
},
{
"epoch": 0.57,
"learning_rate": 2.3809523809523806e-07,
"logits/chosen": -2.7865688800811768,
"logits/rejected": -2.744267463684082,
"logps/chosen": -225.56716918945312,
"logps/rejected": -373.64788818359375,
"loss": 0.2401,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": 1.2590898275375366,
"rewards/margins": 1.872513771057129,
"rewards/rejected": -0.6134239435195923,
"step": 840
},
{
"epoch": 0.58,
"learning_rate": 2.3431594860166287e-07,
"logits/chosen": -2.763679027557373,
"logits/rejected": -2.7585010528564453,
"logps/chosen": -234.14706420898438,
"logps/rejected": -332.43975830078125,
"loss": 0.2506,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.2429834604263306,
"rewards/margins": 1.8476206064224243,
"rewards/rejected": -0.6046372056007385,
"step": 850
},
{
"epoch": 0.58,
"learning_rate": 2.3053665910808768e-07,
"logits/chosen": -2.7579002380371094,
"logits/rejected": -2.7620043754577637,
"logps/chosen": -236.3244171142578,
"logps/rejected": -339.3128356933594,
"loss": 0.2543,
"rewards/accuracies": 0.936718761920929,
"rewards/chosen": 1.218972086906433,
"rewards/margins": 1.8291162252426147,
"rewards/rejected": -0.6101440191268921,
"step": 860
},
{
"epoch": 0.59,
"learning_rate": 2.2675736961451246e-07,
"logits/chosen": -2.7839019298553467,
"logits/rejected": -2.7369167804718018,
"logps/chosen": -219.27053833007812,
"logps/rejected": -405.5704650878906,
"loss": 0.2458,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.2801100015640259,
"rewards/margins": 1.8934139013290405,
"rewards/rejected": -0.6133038997650146,
"step": 870
},
{
"epoch": 0.6,
"learning_rate": 2.2297808012093725e-07,
"logits/chosen": -2.782578945159912,
"logits/rejected": -2.7683374881744385,
"logps/chosen": -245.6527099609375,
"logps/rejected": -378.6884765625,
"loss": 0.2384,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.321287751197815,
"rewards/margins": 1.9386436939239502,
"rewards/rejected": -0.6173557043075562,
"step": 880
},
{
"epoch": 0.61,
"learning_rate": 2.1919879062736206e-07,
"logits/chosen": -2.7775015830993652,
"logits/rejected": -2.752042293548584,
"logps/chosen": -229.3787078857422,
"logps/rejected": -356.0593566894531,
"loss": 0.2423,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.2723052501678467,
"rewards/margins": 1.9301214218139648,
"rewards/rejected": -0.6578160524368286,
"step": 890
},
{
"epoch": 0.61,
"learning_rate": 2.1541950113378684e-07,
"logits/chosen": -2.768510580062866,
"logits/rejected": -2.7404208183288574,
"logps/chosen": -265.3998107910156,
"logps/rejected": -373.4928283691406,
"loss": 0.2467,
"rewards/accuracies": 0.9320312738418579,
"rewards/chosen": 1.264615774154663,
"rewards/margins": 1.920330286026001,
"rewards/rejected": -0.6557145714759827,
"step": 900
},
{
"epoch": 0.62,
"learning_rate": 2.1164021164021162e-07,
"logits/chosen": -2.7891170978546143,
"logits/rejected": -2.7741641998291016,
"logps/chosen": -220.24307250976562,
"logps/rejected": -358.5487976074219,
"loss": 0.2284,
"rewards/accuracies": 0.94140625,
"rewards/chosen": 1.304023027420044,
"rewards/margins": 1.9829524755477905,
"rewards/rejected": -0.6789294481277466,
"step": 910
},
{
"epoch": 0.63,
"learning_rate": 2.0786092214663643e-07,
"logits/chosen": -2.7575926780700684,
"logits/rejected": -2.7642369270324707,
"logps/chosen": -234.12026977539062,
"logps/rejected": -384.3020935058594,
"loss": 0.2373,
"rewards/accuracies": 0.94140625,
"rewards/chosen": 1.2832618951797485,
"rewards/margins": 1.9688091278076172,
"rewards/rejected": -0.6855469942092896,
"step": 920
},
{
"epoch": 0.63,
"learning_rate": 2.0408163265306121e-07,
"logits/chosen": -2.766233444213867,
"logits/rejected": -2.7951343059539795,
"logps/chosen": -244.18026733398438,
"logps/rejected": -320.21771240234375,
"loss": 0.2259,
"rewards/accuracies": 0.9398437738418579,
"rewards/chosen": 1.312534213066101,
"rewards/margins": 2.0482983589172363,
"rewards/rejected": -0.7357643246650696,
"step": 930
},
{
"epoch": 0.64,
"learning_rate": 2.0030234315948602e-07,
"logits/chosen": -2.768256664276123,
"logits/rejected": -2.7545723915100098,
"logps/chosen": -248.816650390625,
"logps/rejected": -401.00958251953125,
"loss": 0.234,
"rewards/accuracies": 0.9320312738418579,
"rewards/chosen": 1.3067686557769775,
"rewards/margins": 2.003986358642578,
"rewards/rejected": -0.6972178816795349,
"step": 940
},
{
"epoch": 0.65,
"learning_rate": 1.965230536659108e-07,
"logits/chosen": -2.7718937397003174,
"logits/rejected": -2.7864131927490234,
"logps/chosen": -245.76220703125,
"logps/rejected": -350.4901428222656,
"loss": 0.2342,
"rewards/accuracies": 0.938281238079071,
"rewards/chosen": 1.3374592065811157,
"rewards/margins": 2.001889228820801,
"rewards/rejected": -0.6644300222396851,
"step": 950
},
{
"epoch": 0.65,
"learning_rate": 1.927437641723356e-07,
"logits/chosen": -2.7670133113861084,
"logits/rejected": -2.76993465423584,
"logps/chosen": -227.41748046875,
"logps/rejected": -354.6375427246094,
"loss": 0.2386,
"rewards/accuracies": 0.92578125,
"rewards/chosen": 1.3317902088165283,
"rewards/margins": 1.9966375827789307,
"rewards/rejected": -0.6648473739624023,
"step": 960
},
{
"epoch": 0.66,
"learning_rate": 1.889644746787604e-07,
"logits/chosen": -2.7860965728759766,
"logits/rejected": -2.776639699935913,
"logps/chosen": -257.2185363769531,
"logps/rejected": -302.48846435546875,
"loss": 0.2278,
"rewards/accuracies": 0.9390624761581421,
"rewards/chosen": 1.352912187576294,
"rewards/margins": 2.036379814147949,
"rewards/rejected": -0.6834677457809448,
"step": 970
},
{
"epoch": 0.67,
"learning_rate": 1.8518518518518516e-07,
"logits/chosen": -2.7641091346740723,
"logits/rejected": -2.7789313793182373,
"logps/chosen": -256.19476318359375,
"logps/rejected": -390.69549560546875,
"loss": 0.2325,
"rewards/accuracies": 0.942187488079071,
"rewards/chosen": 1.3011709451675415,
"rewards/margins": 2.039425849914551,
"rewards/rejected": -0.738254964351654,
"step": 980
},
{
"epoch": 0.67,
"learning_rate": 1.8140589569160996e-07,
"logits/chosen": -2.795642614364624,
"logits/rejected": -2.7746355533599854,
"logps/chosen": -234.4689483642578,
"logps/rejected": -395.40618896484375,
"loss": 0.223,
"rewards/accuracies": 0.9390624761581421,
"rewards/chosen": 1.3416879177093506,
"rewards/margins": 2.1014368534088135,
"rewards/rejected": -0.7597488164901733,
"step": 990
},
{
"epoch": 0.68,
"learning_rate": 1.7762660619803477e-07,
"logits/chosen": -2.7756259441375732,
"logits/rejected": -2.741664409637451,
"logps/chosen": -242.3101348876953,
"logps/rejected": -363.46160888671875,
"loss": 0.2285,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.3446866273880005,
"rewards/margins": 2.070406436920166,
"rewards/rejected": -0.7257199287414551,
"step": 1000
},
{
"epoch": 0.69,
"learning_rate": 1.7384731670445953e-07,
"logits/chosen": -2.7595419883728027,
"logits/rejected": -2.7858798503875732,
"logps/chosen": -259.9520568847656,
"logps/rejected": -358.3509216308594,
"loss": 0.2273,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.3033568859100342,
"rewards/margins": 2.0887067317962646,
"rewards/rejected": -0.78534996509552,
"step": 1010
},
{
"epoch": 0.69,
"learning_rate": 1.7006802721088434e-07,
"logits/chosen": -2.768449068069458,
"logits/rejected": -2.7718656063079834,
"logps/chosen": -238.11740112304688,
"logps/rejected": -354.0820007324219,
"loss": 0.236,
"rewards/accuracies": 0.9351562261581421,
"rewards/chosen": 1.3048899173736572,
"rewards/margins": 2.033565044403076,
"rewards/rejected": -0.7286752462387085,
"step": 1020
},
{
"epoch": 0.7,
"learning_rate": 1.6628873771730915e-07,
"logits/chosen": -2.7650275230407715,
"logits/rejected": -2.7476916313171387,
"logps/chosen": -245.41885375976562,
"logps/rejected": -343.54437255859375,
"loss": 0.2357,
"rewards/accuracies": 0.928906261920929,
"rewards/chosen": 1.316489815711975,
"rewards/margins": 2.062798023223877,
"rewards/rejected": -0.7463082671165466,
"step": 1030
},
{
"epoch": 0.71,
"learning_rate": 1.6250944822373393e-07,
"logits/chosen": -2.7734358310699463,
"logits/rejected": -2.7748751640319824,
"logps/chosen": -237.48538208007812,
"logps/rejected": -389.1809997558594,
"loss": 0.2308,
"rewards/accuracies": 0.930468738079071,
"rewards/chosen": 1.2910696268081665,
"rewards/margins": 2.079051971435547,
"rewards/rejected": -0.7879821062088013,
"step": 1040
},
{
"epoch": 0.71,
"learning_rate": 1.5873015873015872e-07,
"logits/chosen": -2.7670979499816895,
"logits/rejected": -2.769535779953003,
"logps/chosen": -214.1968536376953,
"logps/rejected": -331.4734802246094,
"loss": 0.224,
"rewards/accuracies": 0.9390624761581421,
"rewards/chosen": 1.377071738243103,
"rewards/margins": 2.1104674339294434,
"rewards/rejected": -0.7333956956863403,
"step": 1050
},
{
"epoch": 0.72,
"learning_rate": 1.5495086923658353e-07,
"logits/chosen": -2.789698839187622,
"logits/rejected": -2.7418367862701416,
"logps/chosen": -233.3469696044922,
"logps/rejected": -372.7334289550781,
"loss": 0.2046,
"rewards/accuracies": 0.948437511920929,
"rewards/chosen": 1.4205210208892822,
"rewards/margins": 2.2147347927093506,
"rewards/rejected": -0.7942138910293579,
"step": 1060
},
{
"epoch": 0.73,
"learning_rate": 1.511715797430083e-07,
"logits/chosen": -2.7732410430908203,
"logits/rejected": -2.7837493419647217,
"logps/chosen": -240.2108917236328,
"logps/rejected": -340.86712646484375,
"loss": 0.2229,
"rewards/accuracies": 0.932812511920929,
"rewards/chosen": 1.365039348602295,
"rewards/margins": 2.149728298187256,
"rewards/rejected": -0.7846890091896057,
"step": 1070
},
{
"epoch": 0.73,
"learning_rate": 1.473922902494331e-07,
"logits/chosen": -2.762357711791992,
"logits/rejected": -2.7503538131713867,
"logps/chosen": -245.73129272460938,
"logps/rejected": -367.2342529296875,
"loss": 0.2247,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 1.3420137166976929,
"rewards/margins": 2.1435036659240723,
"rewards/rejected": -0.8014899492263794,
"step": 1080
},
{
"epoch": 0.74,
"learning_rate": 1.436130007558579e-07,
"logits/chosen": -2.786447048187256,
"logits/rejected": -2.7433903217315674,
"logps/chosen": -259.77923583984375,
"logps/rejected": -384.2717590332031,
"loss": 0.2176,
"rewards/accuracies": 0.938281238079071,
"rewards/chosen": 1.4291341304779053,
"rewards/margins": 2.1485352516174316,
"rewards/rejected": -0.7194010019302368,
"step": 1090
},
{
"epoch": 0.75,
"learning_rate": 1.3983371126228268e-07,
"logits/chosen": -2.766045570373535,
"logits/rejected": -2.783592700958252,
"logps/chosen": -258.433349609375,
"logps/rejected": -356.44293212890625,
"loss": 0.2166,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.3983967304229736,
"rewards/margins": 2.2020390033721924,
"rewards/rejected": -0.8036419153213501,
"step": 1100
},
{
"epoch": 0.75,
"learning_rate": 1.360544217687075e-07,
"logits/chosen": -2.784245491027832,
"logits/rejected": -2.7566187381744385,
"logps/chosen": -251.7339324951172,
"logps/rejected": -356.1120300292969,
"loss": 0.2042,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.394803762435913,
"rewards/margins": 2.219846487045288,
"rewards/rejected": -0.8250430822372437,
"step": 1110
},
{
"epoch": 0.76,
"learning_rate": 1.3227513227513225e-07,
"logits/chosen": -2.768209218978882,
"logits/rejected": -2.7927510738372803,
"logps/chosen": -250.1661376953125,
"logps/rejected": -341.25396728515625,
"loss": 0.2216,
"rewards/accuracies": 0.936718761920929,
"rewards/chosen": 1.3784762620925903,
"rewards/margins": 2.1875884532928467,
"rewards/rejected": -0.8091121912002563,
"step": 1120
},
{
"epoch": 0.77,
"learning_rate": 1.2849584278155706e-07,
"logits/chosen": -2.755992889404297,
"logits/rejected": -2.7883083820343018,
"logps/chosen": -258.3106384277344,
"logps/rejected": -338.23822021484375,
"loss": 0.2233,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.371977686882019,
"rewards/margins": 2.1515755653381348,
"rewards/rejected": -0.7795979380607605,
"step": 1130
},
{
"epoch": 0.77,
"learning_rate": 1.2471655328798184e-07,
"logits/chosen": -2.765443801879883,
"logits/rejected": -2.773919105529785,
"logps/chosen": -228.51766967773438,
"logps/rejected": -353.0353698730469,
"loss": 0.2184,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.3785618543624878,
"rewards/margins": 2.173300266265869,
"rewards/rejected": -0.7947384119033813,
"step": 1140
},
{
"epoch": 0.78,
"learning_rate": 1.2093726379440665e-07,
"logits/chosen": -2.7806408405303955,
"logits/rejected": -2.756528854370117,
"logps/chosen": -227.71621704101562,
"logps/rejected": -391.3194580078125,
"loss": 0.2113,
"rewards/accuracies": 0.9476562738418579,
"rewards/chosen": 1.3853504657745361,
"rewards/margins": 2.21071195602417,
"rewards/rejected": -0.8253618478775024,
"step": 1150
},
{
"epoch": 0.79,
"learning_rate": 1.1715797430083144e-07,
"logits/chosen": -2.7610268592834473,
"logits/rejected": -2.7615675926208496,
"logps/chosen": -269.79010009765625,
"logps/rejected": -378.21209716796875,
"loss": 0.2102,
"rewards/accuracies": 0.9398437738418579,
"rewards/chosen": 1.3920191526412964,
"rewards/margins": 2.2798304557800293,
"rewards/rejected": -0.887811541557312,
"step": 1160
},
{
"epoch": 0.8,
"learning_rate": 1.1337868480725623e-07,
"logits/chosen": -2.776198625564575,
"logits/rejected": -2.768550395965576,
"logps/chosen": -246.81887817382812,
"logps/rejected": -365.49249267578125,
"loss": 0.2134,
"rewards/accuracies": 0.93359375,
"rewards/chosen": 1.4072265625,
"rewards/margins": 2.2190985679626465,
"rewards/rejected": -0.8118720054626465,
"step": 1170
},
{
"epoch": 0.8,
"learning_rate": 1.0959939531368103e-07,
"logits/chosen": -2.782680034637451,
"logits/rejected": -2.7389519214630127,
"logps/chosen": -239.7198944091797,
"logps/rejected": -363.89215087890625,
"loss": 0.2147,
"rewards/accuracies": 0.942187488079071,
"rewards/chosen": 1.3731368780136108,
"rewards/margins": 2.227461814880371,
"rewards/rejected": -0.8543251156806946,
"step": 1180
},
{
"epoch": 0.81,
"learning_rate": 1.0582010582010581e-07,
"logits/chosen": -2.7652835845947266,
"logits/rejected": -2.74135160446167,
"logps/chosen": -223.03579711914062,
"logps/rejected": -407.8848571777344,
"loss": 0.2255,
"rewards/accuracies": 0.930468738079071,
"rewards/chosen": 1.3337465524673462,
"rewards/margins": 2.1734132766723633,
"rewards/rejected": -0.8396667242050171,
"step": 1190
},
{
"epoch": 0.82,
"learning_rate": 1.0204081632653061e-07,
"logits/chosen": -2.785404920578003,
"logits/rejected": -2.768556594848633,
"logps/chosen": -217.6273651123047,
"logps/rejected": -390.0624694824219,
"loss": 0.2004,
"rewards/accuracies": 0.9515625238418579,
"rewards/chosen": 1.452343225479126,
"rewards/margins": 2.3132705688476562,
"rewards/rejected": -0.8609271049499512,
"step": 1200
},
{
"epoch": 0.82,
"learning_rate": 9.82615268329554e-08,
"logits/chosen": -2.7641220092773438,
"logits/rejected": -2.7403550148010254,
"logps/chosen": -254.1678466796875,
"logps/rejected": -381.2986145019531,
"loss": 0.2165,
"rewards/accuracies": 0.938281238079071,
"rewards/chosen": 1.3716920614242554,
"rewards/margins": 2.208040952682495,
"rewards/rejected": -0.8363490104675293,
"step": 1210
},
{
"epoch": 0.83,
"learning_rate": 9.44822373393802e-08,
"logits/chosen": -2.7834503650665283,
"logits/rejected": -2.7499313354492188,
"logps/chosen": -220.555908203125,
"logps/rejected": -353.34454345703125,
"loss": 0.2073,
"rewards/accuracies": 0.936718761920929,
"rewards/chosen": 1.4545724391937256,
"rewards/margins": 2.2807674407958984,
"rewards/rejected": -0.826195240020752,
"step": 1220
},
{
"epoch": 0.84,
"learning_rate": 9.070294784580498e-08,
"logits/chosen": -2.7742843627929688,
"logits/rejected": -2.7704269886016846,
"logps/chosen": -240.16586303710938,
"logps/rejected": -341.08270263671875,
"loss": 0.2097,
"rewards/accuracies": 0.9398437738418579,
"rewards/chosen": 1.3970229625701904,
"rewards/margins": 2.205933094024658,
"rewards/rejected": -0.8089098930358887,
"step": 1230
},
{
"epoch": 0.84,
"learning_rate": 8.692365835222977e-08,
"logits/chosen": -2.7731618881225586,
"logits/rejected": -2.7807064056396484,
"logps/chosen": -246.8760223388672,
"logps/rejected": -394.09661865234375,
"loss": 0.1942,
"rewards/accuracies": 0.94921875,
"rewards/chosen": 1.4174280166625977,
"rewards/margins": 2.335336446762085,
"rewards/rejected": -0.9179089665412903,
"step": 1240
},
{
"epoch": 0.85,
"learning_rate": 8.314436885865457e-08,
"logits/chosen": -2.7794883251190186,
"logits/rejected": -2.7599997520446777,
"logps/chosen": -234.8397979736328,
"logps/rejected": -354.03411865234375,
"loss": 0.2101,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.3885688781738281,
"rewards/margins": 2.2632603645324707,
"rewards/rejected": -0.8746916651725769,
"step": 1250
},
{
"epoch": 0.86,
"learning_rate": 7.936507936507936e-08,
"logits/chosen": -2.7606375217437744,
"logits/rejected": -2.7535159587860107,
"logps/chosen": -225.62606811523438,
"logps/rejected": -382.0788269042969,
"loss": 0.2247,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.3528351783752441,
"rewards/margins": 2.18499755859375,
"rewards/rejected": -0.8321624994277954,
"step": 1260
},
{
"epoch": 0.86,
"learning_rate": 7.558578987150415e-08,
"logits/chosen": -2.7874550819396973,
"logits/rejected": -2.7440848350524902,
"logps/chosen": -216.8153533935547,
"logps/rejected": -372.12982177734375,
"loss": 0.2204,
"rewards/accuracies": 0.946093738079071,
"rewards/chosen": 1.3856043815612793,
"rewards/margins": 2.1913902759552,
"rewards/rejected": -0.8057858347892761,
"step": 1270
},
{
"epoch": 0.87,
"learning_rate": 7.180650037792895e-08,
"logits/chosen": -2.7706284523010254,
"logits/rejected": -2.7321717739105225,
"logps/chosen": -249.1674041748047,
"logps/rejected": -390.70855712890625,
"loss": 0.2218,
"rewards/accuracies": 0.932812511920929,
"rewards/chosen": 1.3266818523406982,
"rewards/margins": 2.204909086227417,
"rewards/rejected": -0.8782272338867188,
"step": 1280
},
{
"epoch": 0.88,
"learning_rate": 6.802721088435375e-08,
"logits/chosen": -2.771331310272217,
"logits/rejected": -2.7345921993255615,
"logps/chosen": -244.32217407226562,
"logps/rejected": -395.6925964355469,
"loss": 0.2148,
"rewards/accuracies": 0.93359375,
"rewards/chosen": 1.379319190979004,
"rewards/margins": 2.2045130729675293,
"rewards/rejected": -0.8251941800117493,
"step": 1290
},
{
"epoch": 0.88,
"learning_rate": 6.424792139077853e-08,
"logits/chosen": -2.781578779220581,
"logits/rejected": -2.7588868141174316,
"logps/chosen": -234.79800415039062,
"logps/rejected": -359.72332763671875,
"loss": 0.2047,
"rewards/accuracies": 0.9476562738418579,
"rewards/chosen": 1.4176688194274902,
"rewards/margins": 2.3017234802246094,
"rewards/rejected": -0.8840547800064087,
"step": 1300
},
{
"epoch": 0.89,
"learning_rate": 6.046863189720333e-08,
"logits/chosen": -2.770113706588745,
"logits/rejected": -2.73785662651062,
"logps/chosen": -244.0814971923828,
"logps/rejected": -370.0007019042969,
"loss": 0.2086,
"rewards/accuracies": 0.9351562261581421,
"rewards/chosen": 1.4492876529693604,
"rewards/margins": 2.2907984256744385,
"rewards/rejected": -0.8415109515190125,
"step": 1310
},
{
"epoch": 0.9,
"learning_rate": 5.6689342403628116e-08,
"logits/chosen": -2.753613233566284,
"logits/rejected": -2.7601518630981445,
"logps/chosen": -250.3900604248047,
"logps/rejected": -360.0616455078125,
"loss": 0.2099,
"rewards/accuracies": 0.9359375238418579,
"rewards/chosen": 1.4489208459854126,
"rewards/margins": 2.3246617317199707,
"rewards/rejected": -0.8757408261299133,
"step": 1320
},
{
"epoch": 0.9,
"learning_rate": 5.2910052910052905e-08,
"logits/chosen": -2.7654261589050293,
"logits/rejected": -2.7347397804260254,
"logps/chosen": -232.9058837890625,
"logps/rejected": -352.3494567871094,
"loss": 0.2101,
"rewards/accuracies": 0.9398437738418579,
"rewards/chosen": 1.4914627075195312,
"rewards/margins": 2.3455305099487305,
"rewards/rejected": -0.8540679216384888,
"step": 1330
},
{
"epoch": 0.91,
"learning_rate": 4.91307634164777e-08,
"logits/chosen": -2.7729830741882324,
"logits/rejected": -2.7506096363067627,
"logps/chosen": -237.5419158935547,
"logps/rejected": -361.7286071777344,
"loss": 0.2271,
"rewards/accuracies": 0.936718761920929,
"rewards/chosen": 1.3387925624847412,
"rewards/margins": 2.1680846214294434,
"rewards/rejected": -0.8292919397354126,
"step": 1340
},
{
"epoch": 0.92,
"learning_rate": 4.535147392290249e-08,
"logits/chosen": -2.758366584777832,
"logits/rejected": -2.747448444366455,
"logps/chosen": -262.02313232421875,
"logps/rejected": -371.6409912109375,
"loss": 0.2117,
"rewards/accuracies": 0.93359375,
"rewards/chosen": 1.4255142211914062,
"rewards/margins": 2.2661709785461426,
"rewards/rejected": -0.8406568765640259,
"step": 1350
},
{
"epoch": 0.92,
"learning_rate": 4.157218442932729e-08,
"logits/chosen": -2.7460246086120605,
"logits/rejected": -2.7499794960021973,
"logps/chosen": -242.7806396484375,
"logps/rejected": -374.79736328125,
"loss": 0.2305,
"rewards/accuracies": 0.9242187738418579,
"rewards/chosen": 1.3290668725967407,
"rewards/margins": 2.187917470932007,
"rewards/rejected": -0.8588504791259766,
"step": 1360
},
{
"epoch": 0.93,
"learning_rate": 3.779289493575208e-08,
"logits/chosen": -2.7681326866149902,
"logits/rejected": -2.7562155723571777,
"logps/chosen": -220.0043487548828,
"logps/rejected": -369.31268310546875,
"loss": 0.2015,
"rewards/accuracies": 0.944531261920929,
"rewards/chosen": 1.4147917032241821,
"rewards/margins": 2.347784996032715,
"rewards/rejected": -0.9329932332038879,
"step": 1370
},
{
"epoch": 0.94,
"learning_rate": 3.4013605442176873e-08,
"logits/chosen": -2.7685980796813965,
"logits/rejected": -2.761018753051758,
"logps/chosen": -244.3848114013672,
"logps/rejected": -352.2154235839844,
"loss": 0.2147,
"rewards/accuracies": 0.9398437738418579,
"rewards/chosen": 1.3917274475097656,
"rewards/margins": 2.2305819988250732,
"rewards/rejected": -0.8388546109199524,
"step": 1380
},
{
"epoch": 0.94,
"learning_rate": 3.023431594860166e-08,
"logits/chosen": -2.7724173069000244,
"logits/rejected": -2.773851156234741,
"logps/chosen": -251.663330078125,
"logps/rejected": -341.803466796875,
"loss": 0.1992,
"rewards/accuracies": 0.94921875,
"rewards/chosen": 1.4376652240753174,
"rewards/margins": 2.324432134628296,
"rewards/rejected": -0.886766791343689,
"step": 1390
},
{
"epoch": 0.95,
"learning_rate": 2.6455026455026453e-08,
"logits/chosen": -2.758798122406006,
"logits/rejected": -2.763350009918213,
"logps/chosen": -238.17745971679688,
"logps/rejected": -398.58135986328125,
"loss": 0.2004,
"rewards/accuracies": 0.9476562738418579,
"rewards/chosen": 1.4174001216888428,
"rewards/margins": 2.3445682525634766,
"rewards/rejected": -0.9271681904792786,
"step": 1400
},
{
"epoch": 0.96,
"learning_rate": 2.2675736961451246e-08,
"logits/chosen": -2.7801098823547363,
"logits/rejected": -2.7490382194519043,
"logps/chosen": -242.81613159179688,
"logps/rejected": -361.264892578125,
"loss": 0.2077,
"rewards/accuracies": 0.94140625,
"rewards/chosen": 1.4166629314422607,
"rewards/margins": 2.316483736038208,
"rewards/rejected": -0.8998208045959473,
"step": 1410
},
{
"epoch": 0.97,
"learning_rate": 1.889644746787604e-08,
"logits/chosen": -2.75722336769104,
"logits/rejected": -2.7228329181671143,
"logps/chosen": -251.5331268310547,
"logps/rejected": -375.8110046386719,
"loss": 0.2226,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": 1.4032243490219116,
"rewards/margins": 2.2185873985290527,
"rewards/rejected": -0.8153629302978516,
"step": 1420
},
{
"epoch": 0.97,
"learning_rate": 1.511715797430083e-08,
"logits/chosen": -2.7710134983062744,
"logits/rejected": -2.787081241607666,
"logps/chosen": -241.9620361328125,
"logps/rejected": -356.4383544921875,
"loss": 0.2074,
"rewards/accuracies": 0.938281238079071,
"rewards/chosen": 1.4058793783187866,
"rewards/margins": 2.3387274742126465,
"rewards/rejected": -0.9328481554985046,
"step": 1430
},
{
"epoch": 0.98,
"learning_rate": 1.1337868480725623e-08,
"logits/chosen": -2.788255214691162,
"logits/rejected": -2.790001392364502,
"logps/chosen": -249.0662078857422,
"logps/rejected": -375.603759765625,
"loss": 0.1976,
"rewards/accuracies": 0.9429687261581421,
"rewards/chosen": 1.4689807891845703,
"rewards/margins": 2.375899076461792,
"rewards/rejected": -0.9069182276725769,
"step": 1440
},
{
"epoch": 0.99,
"learning_rate": 7.558578987150416e-09,
"logits/chosen": -2.762585401535034,
"logits/rejected": -2.7085330486297607,
"logps/chosen": -238.41751098632812,
"logps/rejected": -380.84942626953125,
"loss": 0.2228,
"rewards/accuracies": 0.936718761920929,
"rewards/chosen": 1.4105838537216187,
"rewards/margins": 2.214503288269043,
"rewards/rejected": -0.8039194345474243,
"step": 1450
},
{
"epoch": 0.99,
"learning_rate": 3.779289493575208e-09,
"logits/chosen": -2.7654013633728027,
"logits/rejected": -2.7555670738220215,
"logps/chosen": -237.80899047851562,
"logps/rejected": -345.7412109375,
"loss": 0.2026,
"rewards/accuracies": 0.9515625238418579,
"rewards/chosen": 1.418304443359375,
"rewards/margins": 2.327260971069336,
"rewards/rejected": -0.9089563488960266,
"step": 1460
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"logits/chosen": -2.75099515914917,
"logits/rejected": -2.7724432945251465,
"logps/chosen": -255.767578125,
"logps/rejected": -360.5829772949219,
"loss": 0.2019,
"rewards/accuracies": 0.946093738079071,
"rewards/chosen": 1.4355896711349487,
"rewards/margins": 2.3439955711364746,
"rewards/rejected": -0.9084057807922363,
"step": 1470
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.617767333984375,
"eval_logits/rejected": -2.721874952316284,
"eval_logps/chosen": -238.54788208007812,
"eval_logps/rejected": -388.59033203125,
"eval_loss": 0.20815864205360413,
"eval_rewards/accuracies": 0.9413930773735046,
"eval_rewards/chosen": 1.3856867551803589,
"eval_rewards/margins": 2.292266845703125,
"eval_rewards/rejected": -0.9065799117088318,
"eval_runtime": 2798.4996,
"eval_samples_per_second": 3.395,
"eval_steps_per_second": 0.425,
"step": 1470
},
{
"epoch": 1.0,
"step": 1470,
"total_flos": 0.0,
"train_loss": 0.33413780781687524,
"train_runtime": 91396.7242,
"train_samples_per_second": 2.06,
"train_steps_per_second": 0.016
}
],
"logging_steps": 10,
"max_steps": 1470,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}