|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 9.13641688324014, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.6193928718566895, |
|
"logits/rejected": -2.552712917327881, |
|
"logps/chosen": -265.43743896484375, |
|
"logps/rejected": -236.1606903076172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 8.363189408555627e-05, |
|
"rewards/margins": 0.0003909034130629152, |
|
"rewards/rejected": -0.00030727163539268076, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 8.694428459805497, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6578612327575684, |
|
"logits/rejected": -2.575941324234009, |
|
"logps/chosen": -298.83441162109375, |
|
"logps/rejected": -274.2864685058594, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0004915857571177185, |
|
"rewards/margins": 0.0012736506760120392, |
|
"rewards/rejected": -0.0017652364913374186, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 8.216933133006783, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.675276279449463, |
|
"logits/rejected": -2.601776599884033, |
|
"logps/chosen": -290.555908203125, |
|
"logps/rejected": -234.41598510742188, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.007768664509057999, |
|
"rewards/margins": 0.0121098468080163, |
|
"rewards/rejected": -0.0043411822989583015, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 8.711915321379939, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.660461902618408, |
|
"logits/rejected": -2.6112020015716553, |
|
"logps/chosen": -281.26617431640625, |
|
"logps/rejected": -267.74810791015625, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.039030514657497406, |
|
"rewards/margins": 0.03858140856027603, |
|
"rewards/rejected": 0.0004491090658120811, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 9.886855722455959, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.6204681396484375, |
|
"logits/rejected": -2.6130080223083496, |
|
"logps/chosen": -294.60052490234375, |
|
"logps/rejected": -304.6225280761719, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.016506755724549294, |
|
"rewards/margins": 0.07089035958051682, |
|
"rewards/rejected": -0.05438361316919327, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 11.187614568198285, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.575127124786377, |
|
"logits/rejected": -2.5189614295959473, |
|
"logps/chosen": -294.8846130371094, |
|
"logps/rejected": -274.777587890625, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05399893596768379, |
|
"rewards/margins": 0.11821160465478897, |
|
"rewards/rejected": -0.17221052944660187, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 12.12486219819406, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.4791793823242188, |
|
"logits/rejected": -2.43456768989563, |
|
"logps/chosen": -341.71612548828125, |
|
"logps/rejected": -300.43731689453125, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1414516121149063, |
|
"rewards/margins": 0.24884521961212158, |
|
"rewards/rejected": -0.3902968466281891, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 12.125843765946055, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.398482084274292, |
|
"logits/rejected": -2.2879607677459717, |
|
"logps/chosen": -305.4925537109375, |
|
"logps/rejected": -298.49224853515625, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1964271366596222, |
|
"rewards/margins": 0.2619345188140869, |
|
"rewards/rejected": -0.4583616256713867, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 14.181798116852052, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -2.314242124557495, |
|
"logits/rejected": -2.2462363243103027, |
|
"logps/chosen": -307.9869079589844, |
|
"logps/rejected": -303.9811706542969, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.20393487811088562, |
|
"rewards/margins": 0.3678644895553589, |
|
"rewards/rejected": -0.5717993974685669, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 21.310278276106065, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -1.5569207668304443, |
|
"logits/rejected": -1.490770936012268, |
|
"logps/chosen": -306.3553161621094, |
|
"logps/rejected": -343.92852783203125, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3393058478832245, |
|
"rewards/margins": 0.3806948661804199, |
|
"rewards/rejected": -0.7200007438659668, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": -1.366591215133667, |
|
"eval_logits/rejected": -1.1398286819458008, |
|
"eval_logps/chosen": -323.2030944824219, |
|
"eval_logps/rejected": -340.7015075683594, |
|
"eval_loss": 0.5524524450302124, |
|
"eval_rewards/accuracies": 0.693965494632721, |
|
"eval_rewards/chosen": -0.3811298906803131, |
|
"eval_rewards/margins": 0.5606611967086792, |
|
"eval_rewards/rejected": -0.9417910575866699, |
|
"eval_runtime": 95.9209, |
|
"eval_samples_per_second": 18.953, |
|
"eval_steps_per_second": 0.302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 22.6242102385037, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -1.3675466775894165, |
|
"logits/rejected": -1.107060432434082, |
|
"logps/chosen": -325.51593017578125, |
|
"logps/rejected": -339.6572265625, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6032779216766357, |
|
"rewards/margins": 0.45596203207969666, |
|
"rewards/rejected": -1.0592399835586548, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 26.734564490123315, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": -1.174917459487915, |
|
"logits/rejected": -1.0646188259124756, |
|
"logps/chosen": -337.2206726074219, |
|
"logps/rejected": -367.90960693359375, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6140136122703552, |
|
"rewards/margins": 0.5504432320594788, |
|
"rewards/rejected": -1.1644567251205444, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 25.5727518504957, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": -0.7371357083320618, |
|
"logits/rejected": -0.4288739562034607, |
|
"logps/chosen": -328.1370544433594, |
|
"logps/rejected": -368.2788391113281, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7195693850517273, |
|
"rewards/margins": 0.5370928645133972, |
|
"rewards/rejected": -1.256662130355835, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 27.718146660570127, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": -0.9468735456466675, |
|
"logits/rejected": -0.6918075680732727, |
|
"logps/chosen": -324.7828063964844, |
|
"logps/rejected": -322.2585144042969, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.48597660660743713, |
|
"rewards/margins": 0.4936434328556061, |
|
"rewards/rejected": -0.9796198606491089, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 27.968029844080455, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": -0.30871185660362244, |
|
"logits/rejected": 0.24544291198253632, |
|
"logps/chosen": -354.21392822265625, |
|
"logps/rejected": -385.76666259765625, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8767034411430359, |
|
"rewards/margins": 0.6490874290466309, |
|
"rewards/rejected": -1.5257909297943115, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 28.129228197799993, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": -0.545559287071228, |
|
"logits/rejected": -0.1624789535999298, |
|
"logps/chosen": -366.7821350097656, |
|
"logps/rejected": -417.90545654296875, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7023102045059204, |
|
"rewards/margins": 0.6170581579208374, |
|
"rewards/rejected": -1.3193682432174683, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 22.14745919356143, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": 0.30408772826194763, |
|
"logits/rejected": 0.8390473127365112, |
|
"logps/chosen": -362.6156311035156, |
|
"logps/rejected": -431.7569274902344, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9101476669311523, |
|
"rewards/margins": 0.7067325711250305, |
|
"rewards/rejected": -1.616880178451538, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 21.278511576979266, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": 0.29116564989089966, |
|
"logits/rejected": 1.1298694610595703, |
|
"logps/chosen": -386.0079650878906, |
|
"logps/rejected": -382.1528625488281, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8265001177787781, |
|
"rewards/margins": 0.6100791692733765, |
|
"rewards/rejected": -1.4365794658660889, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 27.634146578624538, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": 0.34451404213905334, |
|
"logits/rejected": 0.971908688545227, |
|
"logps/chosen": -325.24951171875, |
|
"logps/rejected": -408.52886962890625, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6713501811027527, |
|
"rewards/margins": 0.7704941034317017, |
|
"rewards/rejected": -1.4418442249298096, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 23.689525185525035, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": 0.3892287611961365, |
|
"logits/rejected": 1.2381625175476074, |
|
"logps/chosen": -350.74658203125, |
|
"logps/rejected": -392.03265380859375, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7246497869491577, |
|
"rewards/margins": 0.7571171522140503, |
|
"rewards/rejected": -1.4817668199539185, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": -0.12046875804662704, |
|
"eval_logits/rejected": 1.0060540437698364, |
|
"eval_logps/chosen": -347.0378112792969, |
|
"eval_logps/rejected": -388.8913269042969, |
|
"eval_loss": 0.5102300047874451, |
|
"eval_rewards/accuracies": 0.7456896305084229, |
|
"eval_rewards/chosen": -0.6194772124290466, |
|
"eval_rewards/margins": 0.804211437702179, |
|
"eval_rewards/rejected": -1.4236886501312256, |
|
"eval_runtime": 96.4808, |
|
"eval_samples_per_second": 18.843, |
|
"eval_steps_per_second": 0.301, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 29.46326810590551, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": 0.42962178587913513, |
|
"logits/rejected": 1.0683366060256958, |
|
"logps/chosen": -348.16741943359375, |
|
"logps/rejected": -402.72344970703125, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7486302256584167, |
|
"rewards/margins": 0.7351824045181274, |
|
"rewards/rejected": -1.4838125705718994, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 25.818080936093143, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": 1.011344075202942, |
|
"logits/rejected": 2.023766279220581, |
|
"logps/chosen": -427.4793395996094, |
|
"logps/rejected": -478.3731994628906, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0479902029037476, |
|
"rewards/margins": 0.8860396146774292, |
|
"rewards/rejected": -1.9340295791625977, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 22.47549460336781, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": 0.21876761317253113, |
|
"logits/rejected": 0.814139723777771, |
|
"logps/chosen": -328.03802490234375, |
|
"logps/rejected": -390.96160888671875, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.600945234298706, |
|
"rewards/margins": 0.6524718403816223, |
|
"rewards/rejected": -1.2534170150756836, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 21.171293378773015, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.24384799599647522, |
|
"logits/rejected": 0.6541222333908081, |
|
"logps/chosen": -352.31732177734375, |
|
"logps/rejected": -387.68975830078125, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6478549838066101, |
|
"rewards/margins": 0.6155428886413574, |
|
"rewards/rejected": -1.2633978128433228, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 20.854041628569117, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": 0.6312379837036133, |
|
"logits/rejected": 1.7261543273925781, |
|
"logps/chosen": -404.70721435546875, |
|
"logps/rejected": -441.50775146484375, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.0091339349746704, |
|
"rewards/margins": 0.801504909992218, |
|
"rewards/rejected": -1.8106390237808228, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 22.834810900622486, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": 1.0223064422607422, |
|
"logits/rejected": 1.8627128601074219, |
|
"logps/chosen": -392.29034423828125, |
|
"logps/rejected": -494.05767822265625, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.215757966041565, |
|
"rewards/margins": 0.9808815121650696, |
|
"rewards/rejected": -2.1966395378112793, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 24.01182646236212, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": 0.8764435052871704, |
|
"logits/rejected": 1.520572304725647, |
|
"logps/chosen": -372.30413818359375, |
|
"logps/rejected": -447.957763671875, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0624181032180786, |
|
"rewards/margins": 0.8986889123916626, |
|
"rewards/rejected": -1.9611070156097412, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 27.05306764931255, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": 0.5640047788619995, |
|
"logits/rejected": 1.584166169166565, |
|
"logps/chosen": -370.31890869140625, |
|
"logps/rejected": -454.9625549316406, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9732006192207336, |
|
"rewards/margins": 0.9384667277336121, |
|
"rewards/rejected": -1.9116674661636353, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 26.88772523041706, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": 0.8203527331352234, |
|
"logits/rejected": 1.9352061748504639, |
|
"logps/chosen": -392.50665283203125, |
|
"logps/rejected": -447.2642517089844, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0477584600448608, |
|
"rewards/margins": 0.8857167363166809, |
|
"rewards/rejected": -1.9334752559661865, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 23.287905403576715, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": 0.5250275731086731, |
|
"logits/rejected": 1.2394458055496216, |
|
"logps/chosen": -408.4005432128906, |
|
"logps/rejected": -430.564453125, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0245119333267212, |
|
"rewards/margins": 0.6925337314605713, |
|
"rewards/rejected": -1.717045545578003, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": 0.3984619081020355, |
|
"eval_logits/rejected": 1.7280592918395996, |
|
"eval_logps/chosen": -379.9385070800781, |
|
"eval_logps/rejected": -434.2354431152344, |
|
"eval_loss": 0.4939006567001343, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -0.9484842419624329, |
|
"eval_rewards/margins": 0.9286458492279053, |
|
"eval_rewards/rejected": -1.877130150794983, |
|
"eval_runtime": 93.6728, |
|
"eval_samples_per_second": 19.408, |
|
"eval_steps_per_second": 0.31, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 27.450541462100524, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": 0.4360331594944, |
|
"logits/rejected": 1.4486221075057983, |
|
"logps/chosen": -380.5530700683594, |
|
"logps/rejected": -445.94012451171875, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0258790254592896, |
|
"rewards/margins": 0.7690252065658569, |
|
"rewards/rejected": -1.794904351234436, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 28.27778056239839, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": 0.6628949642181396, |
|
"logits/rejected": 2.0110418796539307, |
|
"logps/chosen": -364.7244873046875, |
|
"logps/rejected": -434.63568115234375, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9049250483512878, |
|
"rewards/margins": 1.0965297222137451, |
|
"rewards/rejected": -2.0014548301696777, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 29.667411994858202, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": 0.9564372897148132, |
|
"logits/rejected": 1.7951595783233643, |
|
"logps/chosen": -377.6117858886719, |
|
"logps/rejected": -421.7931213378906, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.030896544456482, |
|
"rewards/margins": 0.7160336375236511, |
|
"rewards/rejected": -1.7469301223754883, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 30.895742020543867, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": 0.8280539512634277, |
|
"logits/rejected": 1.6557743549346924, |
|
"logps/chosen": -388.16351318359375, |
|
"logps/rejected": -436.76531982421875, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.001708984375, |
|
"rewards/margins": 0.7661042809486389, |
|
"rewards/rejected": -1.7678134441375732, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 27.764567632511632, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": 0.9885643124580383, |
|
"logits/rejected": 2.0599663257598877, |
|
"logps/chosen": -376.7607727050781, |
|
"logps/rejected": -424.14910888671875, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0226970911026, |
|
"rewards/margins": 0.8421795964241028, |
|
"rewards/rejected": -1.8648765087127686, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 25.930811449352582, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": 0.9005087614059448, |
|
"logits/rejected": 1.6520808935165405, |
|
"logps/chosen": -343.540771484375, |
|
"logps/rejected": -431.3505859375, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.912207305431366, |
|
"rewards/margins": 0.8159275054931641, |
|
"rewards/rejected": -1.7281348705291748, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 26.612138820793557, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": 0.6678069829940796, |
|
"logits/rejected": 1.7626101970672607, |
|
"logps/chosen": -394.8943176269531, |
|
"logps/rejected": -445.3042907714844, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9530981183052063, |
|
"rewards/margins": 0.8741987943649292, |
|
"rewards/rejected": -1.8272969722747803, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 33.570889750971375, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": 0.7491241693496704, |
|
"logits/rejected": 2.0040550231933594, |
|
"logps/chosen": -383.720703125, |
|
"logps/rejected": -444.7874450683594, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8734270930290222, |
|
"rewards/margins": 0.9978850483894348, |
|
"rewards/rejected": -1.871312141418457, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 27.00226950820523, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": 1.115740180015564, |
|
"logits/rejected": 2.061350107192993, |
|
"logps/chosen": -346.07574462890625, |
|
"logps/rejected": -409.1957092285156, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9411827325820923, |
|
"rewards/margins": 0.8577120900154114, |
|
"rewards/rejected": -1.7988946437835693, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 29.536262595417668, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": 1.2691802978515625, |
|
"logits/rejected": 2.047137975692749, |
|
"logps/chosen": -316.4430236816406, |
|
"logps/rejected": -404.92962646484375, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.91883385181427, |
|
"rewards/margins": 0.8383063077926636, |
|
"rewards/rejected": -1.7571399211883545, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": 0.9259076118469238, |
|
"eval_logits/rejected": 2.242485761642456, |
|
"eval_logps/chosen": -379.86492919921875, |
|
"eval_logps/rejected": -438.6234436035156, |
|
"eval_loss": 0.48977744579315186, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -0.9477482438087463, |
|
"eval_rewards/margins": 0.9732612371444702, |
|
"eval_rewards/rejected": -1.9210097789764404, |
|
"eval_runtime": 94.7635, |
|
"eval_samples_per_second": 19.185, |
|
"eval_steps_per_second": 0.306, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 34.10288961478374, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": 1.2957347631454468, |
|
"logits/rejected": 2.590200901031494, |
|
"logps/chosen": -376.28472900390625, |
|
"logps/rejected": -439.2782287597656, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.053328275680542, |
|
"rewards/margins": 0.9589872360229492, |
|
"rewards/rejected": -2.012315273284912, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 33.702853354259936, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": 1.3565906286239624, |
|
"logits/rejected": 2.3881638050079346, |
|
"logps/chosen": -364.3350830078125, |
|
"logps/rejected": -450.5853576660156, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0489609241485596, |
|
"rewards/margins": 0.9981186985969543, |
|
"rewards/rejected": -2.047079563140869, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 31.154782672945057, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": 0.9770921468734741, |
|
"logits/rejected": 2.4192652702331543, |
|
"logps/chosen": -402.85589599609375, |
|
"logps/rejected": -433.6226501464844, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9968045949935913, |
|
"rewards/margins": 0.8990565538406372, |
|
"rewards/rejected": -1.895861268043518, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5438270043889317, |
|
"train_runtime": 11297.0501, |
|
"train_samples_per_second": 4.936, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|