{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 436, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022935779816513763, "grad_norm": 9.13641688324014, "learning_rate": 1.1363636363636363e-07, "logits/chosen": -2.6193928718566895, "logits/rejected": -2.552712917327881, "logps/chosen": -265.43743896484375, "logps/rejected": -236.1606903076172, "loss": 0.6931, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": 8.363189408555627e-05, "rewards/margins": 0.0003909034130629152, "rewards/rejected": -0.00030727163539268076, "step": 10 }, { "epoch": 0.045871559633027525, "grad_norm": 8.694428459805497, "learning_rate": 2.2727272727272726e-07, "logits/chosen": -2.6578612327575684, "logits/rejected": -2.575941324234009, "logps/chosen": -298.83441162109375, "logps/rejected": -274.2864685058594, "loss": 0.6921, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.0004915857571177185, "rewards/margins": 0.0012736506760120392, "rewards/rejected": -0.0017652364913374186, "step": 20 }, { "epoch": 0.06880733944954129, "grad_norm": 8.216933133006783, "learning_rate": 3.4090909090909085e-07, "logits/chosen": -2.675276279449463, "logits/rejected": -2.601776599884033, "logps/chosen": -290.555908203125, "logps/rejected": -234.41598510742188, "loss": 0.6878, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.007768664509057999, "rewards/margins": 0.0121098468080163, "rewards/rejected": -0.0043411822989583015, "step": 30 }, { "epoch": 0.09174311926605505, "grad_norm": 8.711915321379939, "learning_rate": 4.545454545454545e-07, "logits/chosen": -2.660461902618408, "logits/rejected": -2.6112020015716553, "logps/chosen": -281.26617431640625, "logps/rejected": -267.74810791015625, "loss": 0.6757, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.039030514657497406, "rewards/margins": 0.03858140856027603, "rewards/rejected": 0.0004491090658120811, "step": 40 }, { "epoch": 0.11467889908256881, "grad_norm": 9.886855722455959, "learning_rate": 4.997110275491701e-07, "logits/chosen": -2.6204681396484375, "logits/rejected": -2.6130080223083496, "logps/chosen": -294.60052490234375, "logps/rejected": -304.6225280761719, "loss": 0.6583, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.016506755724549294, "rewards/margins": 0.07089035958051682, "rewards/rejected": -0.05438361316919327, "step": 50 }, { "epoch": 0.13761467889908258, "grad_norm": 11.187614568198285, "learning_rate": 4.979475034558115e-07, "logits/chosen": -2.575127124786377, "logits/rejected": -2.5189614295959473, "logps/chosen": -294.8846130371094, "logps/rejected": -274.777587890625, "loss": 0.6358, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05399893596768379, "rewards/margins": 0.11821160465478897, "rewards/rejected": -0.17221052944660187, "step": 60 }, { "epoch": 0.16055045871559634, "grad_norm": 12.12486219819406, "learning_rate": 4.945923025551788e-07, "logits/chosen": -2.4791793823242188, "logits/rejected": -2.43456768989563, "logps/chosen": -341.71612548828125, "logps/rejected": -300.43731689453125, "loss": 0.6121, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1414516121149063, "rewards/margins": 0.24884521961212158, "rewards/rejected": -0.3902968466281891, "step": 70 }, { "epoch": 0.1834862385321101, "grad_norm": 12.125843765946055, "learning_rate": 4.896669632591651e-07, "logits/chosen": -2.398482084274292, "logits/rejected": -2.2879607677459717, "logps/chosen": -305.4925537109375, "logps/rejected": -298.49224853515625, "loss": 0.6049, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1964271366596222, "rewards/margins": 0.2619345188140869, "rewards/rejected": -0.4583616256713867, "step": 80 }, { "epoch": 0.20642201834862386, "grad_norm": 14.181798116852052, "learning_rate": 4.832031033425662e-07, "logits/chosen": -2.314242124557495, "logits/rejected": -2.2462363243103027, "logps/chosen": -307.9869079589844, "logps/rejected": -303.9811706542969, "loss": 0.5963, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.20393487811088562, "rewards/margins": 0.3678644895553589, "rewards/rejected": -0.5717993974685669, "step": 90 }, { "epoch": 0.22935779816513763, "grad_norm": 21.310278276106065, "learning_rate": 4.752422169756047e-07, "logits/chosen": -1.5569207668304443, "logits/rejected": -1.490770936012268, "logps/chosen": -306.3553161621094, "logps/rejected": -343.92852783203125, "loss": 0.5718, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3393058478832245, "rewards/margins": 0.3806948661804199, "rewards/rejected": -0.7200007438659668, "step": 100 }, { "epoch": 0.22935779816513763, "eval_logits/chosen": -1.366591215133667, "eval_logits/rejected": -1.1398286819458008, "eval_logps/chosen": -323.2030944824219, "eval_logps/rejected": -340.7015075683594, "eval_loss": 0.5524524450302124, "eval_rewards/accuracies": 0.693965494632721, "eval_rewards/chosen": -0.3811298906803131, "eval_rewards/margins": 0.5606611967086792, "eval_rewards/rejected": -0.9417910575866699, "eval_runtime": 95.9209, "eval_samples_per_second": 18.953, "eval_steps_per_second": 0.302, "step": 100 }, { "epoch": 0.25229357798165136, "grad_norm": 22.6242102385037, "learning_rate": 4.658354083558188e-07, "logits/chosen": -1.3675466775894165, "logits/rejected": -1.107060432434082, "logps/chosen": -325.51593017578125, "logps/rejected": -339.6572265625, "loss": 0.5522, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6032779216766357, "rewards/margins": 0.45596203207969666, "rewards/rejected": -1.0592399835586548, "step": 110 }, { "epoch": 0.27522935779816515, "grad_norm": 26.734564490123315, "learning_rate": 4.550430636492389e-07, "logits/chosen": -1.174917459487915, "logits/rejected": -1.0646188259124756, "logps/chosen": -337.2206726074219, "logps/rejected": -367.90960693359375, "loss": 0.5593, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6140136122703552, "rewards/margins": 0.5504432320594788, "rewards/rejected": -1.1644567251205444, "step": 120 }, { "epoch": 0.2981651376146789, "grad_norm": 25.5727518504957, "learning_rate": 4.429344633468004e-07, "logits/chosen": -0.7371357083320618, "logits/rejected": -0.4288739562034607, "logps/chosen": -328.1370544433594, "logps/rejected": -368.2788391113281, "loss": 0.5539, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7195693850517273, "rewards/margins": 0.5370928645133972, "rewards/rejected": -1.256662130355835, "step": 130 }, { "epoch": 0.3211009174311927, "grad_norm": 27.718146660570127, "learning_rate": 4.2958733752443187e-07, "logits/chosen": -0.9468735456466675, "logits/rejected": -0.6918075680732727, "logps/chosen": -324.7828063964844, "logps/rejected": -322.2585144042969, "loss": 0.5515, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.48597660660743713, "rewards/margins": 0.4936434328556061, "rewards/rejected": -0.9796198606491089, "step": 140 }, { "epoch": 0.3440366972477064, "grad_norm": 27.968029844080455, "learning_rate": 4.150873668617898e-07, "logits/chosen": -0.30871185660362244, "logits/rejected": 0.24544291198253632, "logps/chosen": -354.21392822265625, "logps/rejected": -385.76666259765625, "loss": 0.5389, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8767034411430359, "rewards/margins": 0.6490874290466309, "rewards/rejected": -1.5257909297943115, "step": 150 }, { "epoch": 0.3669724770642202, "grad_norm": 28.129228197799993, "learning_rate": 3.9952763262280397e-07, "logits/chosen": -0.545559287071228, "logits/rejected": -0.1624789535999298, "logps/chosen": -366.7821350097656, "logps/rejected": -417.90545654296875, "loss": 0.5313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7023102045059204, "rewards/margins": 0.6170581579208374, "rewards/rejected": -1.3193682432174683, "step": 160 }, { "epoch": 0.38990825688073394, "grad_norm": 22.14745919356143, "learning_rate": 3.8300801912883414e-07, "logits/chosen": 0.30408772826194763, "logits/rejected": 0.8390473127365112, "logps/chosen": -362.6156311035156, "logps/rejected": -431.7569274902344, "loss": 0.5007, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9101476669311523, "rewards/margins": 0.7067325711250305, "rewards/rejected": -1.616880178451538, "step": 170 }, { "epoch": 0.41284403669724773, "grad_norm": 21.278511576979266, "learning_rate": 3.6563457256020884e-07, "logits/chosen": 0.29116564989089966, "logits/rejected": 1.1298694610595703, "logps/chosen": -386.0079650878906, "logps/rejected": -382.1528625488281, "loss": 0.5349, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8265001177787781, "rewards/margins": 0.6100791692733765, "rewards/rejected": -1.4365794658660889, "step": 180 }, { "epoch": 0.43577981651376146, "grad_norm": 27.634146578624538, "learning_rate": 3.475188202022617e-07, "logits/chosen": 0.34451404213905334, "logits/rejected": 0.971908688545227, "logps/chosen": -325.24951171875, "logps/rejected": -408.52886962890625, "loss": 0.5132, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.6713501811027527, "rewards/margins": 0.7704941034317017, "rewards/rejected": -1.4418442249298096, "step": 190 }, { "epoch": 0.45871559633027525, "grad_norm": 23.689525185525035, "learning_rate": 3.287770545059052e-07, "logits/chosen": 0.3892287611961365, "logits/rejected": 1.2381625175476074, "logps/chosen": -350.74658203125, "logps/rejected": -392.03265380859375, "loss": 0.5281, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7246497869491577, "rewards/margins": 0.7571171522140503, "rewards/rejected": -1.4817668199539185, "step": 200 }, { "epoch": 0.45871559633027525, "eval_logits/chosen": -0.12046875804662704, "eval_logits/rejected": 1.0060540437698364, "eval_logps/chosen": -347.0378112792969, "eval_logps/rejected": -388.8913269042969, "eval_loss": 0.5102300047874451, "eval_rewards/accuracies": 0.7456896305084229, "eval_rewards/chosen": -0.6194772124290466, "eval_rewards/margins": 0.804211437702179, "eval_rewards/rejected": -1.4236886501312256, "eval_runtime": 96.4808, "eval_samples_per_second": 18.843, "eval_steps_per_second": 0.301, "step": 200 }, { "epoch": 0.481651376146789, "grad_norm": 29.46326810590551, "learning_rate": 3.0952958655864954e-07, "logits/chosen": 0.42962178587913513, "logits/rejected": 1.0683366060256958, "logps/chosen": -348.16741943359375, "logps/rejected": -402.72344970703125, "loss": 0.5081, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7486302256584167, "rewards/margins": 0.7351824045181274, "rewards/rejected": -1.4838125705718994, "step": 210 }, { "epoch": 0.5045871559633027, "grad_norm": 25.818080936093143, "learning_rate": 2.898999737583448e-07, "logits/chosen": 1.011344075202942, "logits/rejected": 2.023766279220581, "logps/chosen": -427.4793395996094, "logps/rejected": -478.3731994628906, "loss": 0.4988, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0479902029037476, "rewards/margins": 0.8860396146774292, "rewards/rejected": -1.9340295791625977, "step": 220 }, { "epoch": 0.5275229357798165, "grad_norm": 22.47549460336781, "learning_rate": 2.7001422664752333e-07, "logits/chosen": 0.21876761317253113, "logits/rejected": 0.814139723777771, "logps/chosen": -328.03802490234375, "logps/rejected": -390.96160888671875, "loss": 0.5217, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.600945234298706, "rewards/margins": 0.6524718403816223, "rewards/rejected": -1.2534170150756836, "step": 230 }, { "epoch": 0.5504587155963303, "grad_norm": 21.171293378773015, "learning_rate": 2.5e-07, "logits/chosen": -0.24384799599647522, "logits/rejected": 0.6541222333908081, "logps/chosen": -352.31732177734375, "logps/rejected": -387.68975830078125, "loss": 0.5355, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6478549838066101, "rewards/margins": 0.6155428886413574, "rewards/rejected": -1.2633978128433228, "step": 240 }, { "epoch": 0.573394495412844, "grad_norm": 20.854041628569117, "learning_rate": 2.2998577335247667e-07, "logits/chosen": 0.6312379837036133, "logits/rejected": 1.7261543273925781, "logps/chosen": -404.70721435546875, "logps/rejected": -441.50775146484375, "loss": 0.5067, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.0091339349746704, "rewards/margins": 0.801504909992218, "rewards/rejected": -1.8106390237808228, "step": 250 }, { "epoch": 0.5963302752293578, "grad_norm": 22.834810900622486, "learning_rate": 2.1010002624165524e-07, "logits/chosen": 1.0223064422607422, "logits/rejected": 1.8627128601074219, "logps/chosen": -392.29034423828125, "logps/rejected": -494.05767822265625, "loss": 0.5001, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.215757966041565, "rewards/margins": 0.9808815121650696, "rewards/rejected": -2.1966395378112793, "step": 260 }, { "epoch": 0.6192660550458715, "grad_norm": 24.01182646236212, "learning_rate": 1.9047041344135043e-07, "logits/chosen": 0.8764435052871704, "logits/rejected": 1.520572304725647, "logps/chosen": -372.30413818359375, "logps/rejected": -447.957763671875, "loss": 0.4994, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0624181032180786, "rewards/margins": 0.8986889123916626, "rewards/rejected": -1.9611070156097412, "step": 270 }, { "epoch": 0.6422018348623854, "grad_norm": 27.05306764931255, "learning_rate": 1.7122294549409482e-07, "logits/chosen": 0.5640047788619995, "logits/rejected": 1.584166169166565, "logps/chosen": -370.31890869140625, "logps/rejected": -454.9625549316406, "loss": 0.5238, "rewards/accuracies": 0.78125, "rewards/chosen": -0.9732006192207336, "rewards/margins": 0.9384667277336121, "rewards/rejected": -1.9116674661636353, "step": 280 }, { "epoch": 0.6651376146788991, "grad_norm": 26.88772523041706, "learning_rate": 1.524811797977383e-07, "logits/chosen": 0.8203527331352234, "logits/rejected": 1.9352061748504639, "logps/chosen": -392.50665283203125, "logps/rejected": -447.2642517089844, "loss": 0.5027, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0477584600448608, "rewards/margins": 0.8857167363166809, "rewards/rejected": -1.9334752559661865, "step": 290 }, { "epoch": 0.6880733944954128, "grad_norm": 23.287905403576715, "learning_rate": 1.3436542743979125e-07, "logits/chosen": 0.5250275731086731, "logits/rejected": 1.2394458055496216, "logps/chosen": -408.4005432128906, "logps/rejected": -430.564453125, "loss": 0.5213, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.0245119333267212, "rewards/margins": 0.6925337314605713, "rewards/rejected": -1.717045545578003, "step": 300 }, { "epoch": 0.6880733944954128, "eval_logits/chosen": 0.3984619081020355, "eval_logits/rejected": 1.7280592918395996, "eval_logps/chosen": -379.9385070800781, "eval_logps/rejected": -434.2354431152344, "eval_loss": 0.4939006567001343, "eval_rewards/accuracies": 0.7543103694915771, "eval_rewards/chosen": -0.9484842419624329, "eval_rewards/margins": 0.9286458492279053, "eval_rewards/rejected": -1.877130150794983, "eval_runtime": 93.6728, "eval_samples_per_second": 19.408, "eval_steps_per_second": 0.31, "step": 300 }, { "epoch": 0.7110091743119266, "grad_norm": 27.450541462100524, "learning_rate": 1.1699198087116588e-07, "logits/chosen": 0.4360331594944, "logits/rejected": 1.4486221075057983, "logps/chosen": -380.5530700683594, "logps/rejected": -445.94012451171875, "loss": 0.5239, "rewards/accuracies": 0.75, "rewards/chosen": -1.0258790254592896, "rewards/margins": 0.7690252065658569, "rewards/rejected": -1.794904351234436, "step": 310 }, { "epoch": 0.7339449541284404, "grad_norm": 28.27778056239839, "learning_rate": 1.00472367377196e-07, "logits/chosen": 0.6628949642181396, "logits/rejected": 2.0110418796539307, "logps/chosen": -364.7244873046875, "logps/rejected": -434.63568115234375, "loss": 0.5013, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9049250483512878, "rewards/margins": 1.0965297222137451, "rewards/rejected": -2.0014548301696777, "step": 320 }, { "epoch": 0.7568807339449541, "grad_norm": 29.667411994858202, "learning_rate": 8.49126331382102e-08, "logits/chosen": 0.9564372897148132, "logits/rejected": 1.7951595783233643, "logps/chosen": -377.6117858886719, "logps/rejected": -421.7931213378906, "loss": 0.5021, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.030896544456482, "rewards/margins": 0.7160336375236511, "rewards/rejected": -1.7469301223754883, "step": 330 }, { "epoch": 0.7798165137614679, "grad_norm": 30.895742020543867, "learning_rate": 7.041266247556812e-08, "logits/chosen": 0.8280539512634277, "logits/rejected": 1.6557743549346924, "logps/chosen": -388.16351318359375, "logps/rejected": -436.76531982421875, "loss": 0.4897, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.001708984375, "rewards/margins": 0.7661042809486389, "rewards/rejected": -1.7678134441375732, "step": 340 }, { "epoch": 0.8027522935779816, "grad_norm": 27.764567632511632, "learning_rate": 5.706553665319955e-08, "logits/chosen": 0.9885643124580383, "logits/rejected": 2.0599663257598877, "logps/chosen": -376.7607727050781, "logps/rejected": -424.14910888671875, "loss": 0.5125, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0226970911026, "rewards/margins": 0.8421795964241028, "rewards/rejected": -1.8648765087127686, "step": 350 }, { "epoch": 0.8256880733944955, "grad_norm": 25.930811449352582, "learning_rate": 4.4956936350761005e-08, "logits/chosen": 0.9005087614059448, "logits/rejected": 1.6520808935165405, "logps/chosen": -343.540771484375, "logps/rejected": -431.3505859375, "loss": 0.4866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.912207305431366, "rewards/margins": 0.8159275054931641, "rewards/rejected": -1.7281348705291748, "step": 360 }, { "epoch": 0.8486238532110092, "grad_norm": 26.612138820793557, "learning_rate": 3.416459164418123e-08, "logits/chosen": 0.6678069829940796, "logits/rejected": 1.7626101970672607, "logps/chosen": -394.8943176269531, "logps/rejected": -445.3042907714844, "loss": 0.498, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9530981183052063, "rewards/margins": 0.8741987943649292, "rewards/rejected": -1.8272969722747803, "step": 370 }, { "epoch": 0.8715596330275229, "grad_norm": 33.570889750971375, "learning_rate": 2.475778302439524e-08, "logits/chosen": 0.7491241693496704, "logits/rejected": 2.0040550231933594, "logps/chosen": -383.720703125, "logps/rejected": -444.7874450683594, "loss": 0.5038, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8734270930290222, "rewards/margins": 0.9978850483894348, "rewards/rejected": -1.871312141418457, "step": 380 }, { "epoch": 0.8944954128440367, "grad_norm": 27.00226950820523, "learning_rate": 1.6796896657433805e-08, "logits/chosen": 1.115740180015564, "logits/rejected": 2.061350107192993, "logps/chosen": -346.07574462890625, "logps/rejected": -409.1957092285156, "loss": 0.5106, "rewards/accuracies": 0.75, "rewards/chosen": -0.9411827325820923, "rewards/margins": 0.8577120900154114, "rewards/rejected": -1.7988946437835693, "step": 390 }, { "epoch": 0.9174311926605505, "grad_norm": 29.536262595417668, "learning_rate": 1.0333036740834855e-08, "logits/chosen": 1.2691802978515625, "logits/rejected": 2.047137975692749, "logps/chosen": -316.4430236816406, "logps/rejected": -404.92962646484375, "loss": 0.5037, "rewards/accuracies": 0.75, "rewards/chosen": -0.91883385181427, "rewards/margins": 0.8383063077926636, "rewards/rejected": -1.7571399211883545, "step": 400 }, { "epoch": 0.9174311926605505, "eval_logits/chosen": 0.9259076118469238, "eval_logits/rejected": 2.242485761642456, "eval_logps/chosen": -379.86492919921875, "eval_logps/rejected": -438.6234436035156, "eval_loss": 0.48977744579315186, "eval_rewards/accuracies": 0.7543103694915771, "eval_rewards/chosen": -0.9477482438087463, "eval_rewards/margins": 0.9732612371444702, "eval_rewards/rejected": -1.9210097789764404, "eval_runtime": 94.7635, "eval_samples_per_second": 19.185, "eval_steps_per_second": 0.306, "step": 400 }, { "epoch": 0.9403669724770642, "grad_norm": 34.10288961478374, "learning_rate": 5.4076974448211685e-09, "logits/chosen": 1.2957347631454468, "logits/rejected": 2.590200901031494, "logps/chosen": -376.28472900390625, "logps/rejected": -439.2782287597656, "loss": 0.4987, "rewards/accuracies": 0.75, "rewards/chosen": -1.053328275680542, "rewards/margins": 0.9589872360229492, "rewards/rejected": -2.012315273284912, "step": 410 }, { "epoch": 0.963302752293578, "grad_norm": 33.702853354259936, "learning_rate": 2.052496544188487e-09, "logits/chosen": 1.3565906286239624, "logits/rejected": 2.3881638050079346, "logps/chosen": -364.3350830078125, "logps/rejected": -450.5853576660156, "loss": 0.4968, "rewards/accuracies": 0.75, "rewards/chosen": -1.0489609241485596, "rewards/margins": 0.9981186985969543, "rewards/rejected": -2.047079563140869, "step": 420 }, { "epoch": 0.9862385321100917, "grad_norm": 31.154782672945057, "learning_rate": 2.889724508297886e-10, "logits/chosen": 0.9770921468734741, "logits/rejected": 2.4192652702331543, "logps/chosen": -402.85589599609375, "logps/rejected": -433.6226501464844, "loss": 0.4895, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9968045949935913, "rewards/margins": 0.8990565538406372, "rewards/rejected": -1.895861268043518, "step": 430 }, { "epoch": 1.0, "step": 436, "total_flos": 0.0, "train_loss": 0.5438270043889317, "train_runtime": 11297.0501, "train_samples_per_second": 4.936, "train_steps_per_second": 0.039 } ], "logging_steps": 10, "max_steps": 436, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }