|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987515605493134, |
|
"eval_steps": 2000, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4.760079834365155, |
|
"learning_rate": 8.333333333333334e-08, |
|
"logits/chosen": -1.1630980968475342, |
|
"logits/rejected": -1.0201224088668823, |
|
"logps/chosen": -350.4145812988281, |
|
"logps/rejected": -220.30422973632812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.6284772366587372, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -1.0121445655822754, |
|
"logits/rejected": -0.9547010064125061, |
|
"logps/chosen": -320.40802001953125, |
|
"logps/rejected": -186.48419189453125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4982638955116272, |
|
"rewards/chosen": 0.0024558762088418007, |
|
"rewards/margins": 0.0015082670142874122, |
|
"rewards/rejected": 0.0009476091363467276, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.24809146375121, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"logits/chosen": -0.9381664395332336, |
|
"logits/rejected": -0.8647511601448059, |
|
"logps/chosen": -328.6592102050781, |
|
"logps/rejected": -212.4827880859375, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.7953125238418579, |
|
"rewards/chosen": 0.034552641212940216, |
|
"rewards/margins": 0.022216904908418655, |
|
"rewards/rejected": 0.012335737235844135, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.9797438421104276, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -1.020849585533142, |
|
"logits/rejected": -0.9427526593208313, |
|
"logps/chosen": -329.3730163574219, |
|
"logps/rejected": -215.0742645263672, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.854687511920929, |
|
"rewards/chosen": 0.1433950513601303, |
|
"rewards/margins": 0.10005545616149902, |
|
"rewards/rejected": 0.04333961382508278, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.971987358375742, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.1010403633117676, |
|
"logits/rejected": -1.0402967929840088, |
|
"logps/chosen": -310.91778564453125, |
|
"logps/rejected": -205.1094512939453, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.885937511920929, |
|
"rewards/chosen": 0.3175004720687866, |
|
"rewards/margins": 0.25547632575035095, |
|
"rewards/rejected": 0.06202414631843567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.284422715303357, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -1.176831841468811, |
|
"logits/rejected": -1.1168550252914429, |
|
"logps/chosen": -289.9400634765625, |
|
"logps/rejected": -193.466552734375, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.8765624761581421, |
|
"rewards/chosen": 0.4821470379829407, |
|
"rewards/margins": 0.42469802498817444, |
|
"rewards/rejected": 0.057449012994766235, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.693731441358458, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -1.1333694458007812, |
|
"logits/rejected": -1.0512011051177979, |
|
"logps/chosen": -268.7071838378906, |
|
"logps/rejected": -216.21939086914062, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.570508599281311, |
|
"rewards/margins": 0.6040245890617371, |
|
"rewards/rejected": -0.03351598605513573, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.982413579425004, |
|
"learning_rate": 4.995770395678171e-06, |
|
"logits/chosen": -0.9235696792602539, |
|
"logits/rejected": -0.8171814680099487, |
|
"logps/chosen": -288.454833984375, |
|
"logps/rejected": -243.9117431640625, |
|
"loss": 0.3919, |
|
"rewards/accuracies": 0.9359375238418579, |
|
"rewards/chosen": 0.4965239465236664, |
|
"rewards/margins": 0.8616136312484741, |
|
"rewards/rejected": -0.36508968472480774, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.0591291304636767, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -0.5841406583786011, |
|
"logits/rejected": -0.5758659243583679, |
|
"logps/chosen": -282.39080810546875, |
|
"logps/rejected": -267.4037170410156, |
|
"loss": 0.3708, |
|
"rewards/accuracies": 0.9234374761581421, |
|
"rewards/chosen": 0.3620058298110962, |
|
"rewards/margins": 1.001431941986084, |
|
"rewards/rejected": -0.639426052570343, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.7852348655427495, |
|
"learning_rate": 4.962019382530521e-06, |
|
"logits/chosen": -0.3057808578014374, |
|
"logits/rejected": -0.08402713388204575, |
|
"logps/chosen": -304.71258544921875, |
|
"logps/rejected": -306.744873046875, |
|
"loss": 0.3332, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": 0.34849730134010315, |
|
"rewards/margins": 1.234140157699585, |
|
"rewards/rejected": -0.8856428861618042, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.904931302273915, |
|
"learning_rate": 4.93261217644956e-06, |
|
"logits/chosen": -0.3401206433773041, |
|
"logits/rejected": -0.006557087413966656, |
|
"logps/chosen": -306.9206848144531, |
|
"logps/rejected": -352.503173828125, |
|
"loss": 0.284, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.13179777562618256, |
|
"rewards/margins": 1.5720919370651245, |
|
"rewards/rejected": -1.4402940273284912, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.332999329515302, |
|
"learning_rate": 4.894973780788722e-06, |
|
"logits/chosen": -0.3165335953235626, |
|
"logits/rejected": 0.0790834054350853, |
|
"logps/chosen": -350.21405029296875, |
|
"logps/rejected": -433.90643310546875, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.9609375, |
|
"rewards/chosen": -0.08845386654138565, |
|
"rewards/margins": 2.1888351440429688, |
|
"rewards/rejected": -2.2772889137268066, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.8287946526526464, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -0.34387272596359253, |
|
"logits/rejected": -0.02132757380604744, |
|
"logps/chosen": -342.6011047363281, |
|
"logps/rejected": -551.7183837890625, |
|
"loss": 0.1787, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -0.15126076340675354, |
|
"rewards/margins": 3.2273590564727783, |
|
"rewards/rejected": -3.378619432449341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.7220399456471203, |
|
"learning_rate": 4.7955402672006855e-06, |
|
"logits/chosen": -0.4739972949028015, |
|
"logits/rejected": -0.22087886929512024, |
|
"logps/chosen": -355.5228271484375, |
|
"logps/rejected": -637.9622192382812, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.20811741054058075, |
|
"rewards/margins": 4.130118370056152, |
|
"rewards/rejected": -4.338235378265381, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.216070201202239, |
|
"learning_rate": 4.734081600808531e-06, |
|
"logits/chosen": -0.5762065649032593, |
|
"logits/rejected": -0.37428033351898193, |
|
"logps/chosen": -342.99853515625, |
|
"logps/rejected": -746.65234375, |
|
"loss": 0.1198, |
|
"rewards/accuracies": 0.973437488079071, |
|
"rewards/chosen": -0.11533623933792114, |
|
"rewards/margins": 5.1436309814453125, |
|
"rewards/rejected": -5.258967399597168, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.452614644496966, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": -0.43740949034690857, |
|
"logits/rejected": -0.276010662317276, |
|
"logps/chosen": -353.4092712402344, |
|
"logps/rejected": -838.3703002929688, |
|
"loss": 0.145, |
|
"rewards/accuracies": 0.9671875238418579, |
|
"rewards/chosen": -0.23360753059387207, |
|
"rewards/margins": 5.936570167541504, |
|
"rewards/rejected": -6.170177459716797, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 6.909783574695525, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": 0.02835695818066597, |
|
"logits/rejected": 0.3119501769542694, |
|
"logps/chosen": -353.52203369140625, |
|
"logps/rejected": -654.4629516601562, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.973437488079071, |
|
"rewards/chosen": -0.25593429803848267, |
|
"rewards/margins": 4.225825786590576, |
|
"rewards/rejected": -4.481760501861572, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.7107339593657505, |
|
"learning_rate": 4.50530798188761e-06, |
|
"logits/chosen": -0.15743690729141235, |
|
"logits/rejected": 0.23819151520729065, |
|
"logps/chosen": -333.4959716796875, |
|
"logps/rejected": -659.2352294921875, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -0.00615291204303503, |
|
"rewards/margins": 4.596193790435791, |
|
"rewards/rejected": -4.602346420288086, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.623547042771, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": -0.31918713450431824, |
|
"logits/rejected": 0.03602874279022217, |
|
"logps/chosen": -330.2435302734375, |
|
"logps/rejected": -757.0585327148438, |
|
"loss": 0.1056, |
|
"rewards/accuracies": 0.979687511920929, |
|
"rewards/chosen": 0.007477378938347101, |
|
"rewards/margins": 5.585347652435303, |
|
"rewards/rejected": -5.577870845794678, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.1551725469519507, |
|
"learning_rate": 4.318434103932622e-06, |
|
"logits/chosen": -0.3329532742500305, |
|
"logits/rejected": -0.041298139840364456, |
|
"logps/chosen": -382.2855529785156, |
|
"logps/rejected": -988.2845458984375, |
|
"loss": 0.1055, |
|
"rewards/accuracies": 0.9703124761581421, |
|
"rewards/chosen": -0.40997394919395447, |
|
"rewards/margins": 7.356125831604004, |
|
"rewards/rejected": -7.766099452972412, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.5178815949434505, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -0.4541547894477844, |
|
"logits/rejected": -0.1613123118877411, |
|
"logps/chosen": -347.7196350097656, |
|
"logps/rejected": -880.2561645507812, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -0.13055315613746643, |
|
"rewards/margins": 6.6189093589782715, |
|
"rewards/rejected": -6.749462127685547, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.0381257493358285, |
|
"learning_rate": 4.106969024216348e-06, |
|
"logits/chosen": -0.5205026865005493, |
|
"logits/rejected": -0.25759488344192505, |
|
"logps/chosen": -393.9586486816406, |
|
"logps/rejected": -1019.0203247070312, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -0.4994255006313324, |
|
"rewards/margins": 7.512589931488037, |
|
"rewards/rejected": -8.012015342712402, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.3674783354384115, |
|
"learning_rate": 3.992896479256966e-06, |
|
"logits/chosen": -0.6750475168228149, |
|
"logits/rejected": -0.3842785954475403, |
|
"logps/chosen": -336.7120666503906, |
|
"logps/rejected": -858.0791015625, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.017676908522844315, |
|
"rewards/margins": 6.612088680267334, |
|
"rewards/rejected": -6.594411373138428, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.419780694811269, |
|
"learning_rate": 3.8737724451770155e-06, |
|
"logits/chosen": -0.6326015591621399, |
|
"logits/rejected": -0.40925782918930054, |
|
"logps/chosen": -375.2301025390625, |
|
"logps/rejected": -1089.742919921875, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.42722994089126587, |
|
"rewards/margins": 8.390230178833008, |
|
"rewards/rejected": -8.817461013793945, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 17.310687719433773, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -0.5065186023712158, |
|
"logits/rejected": -0.2878126800060272, |
|
"logps/chosen": -386.92266845703125, |
|
"logps/rejected": -1173.471435546875, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.9921875, |
|
"rewards/chosen": -0.5754821300506592, |
|
"rewards/margins": 9.043745040893555, |
|
"rewards/rejected": -9.619227409362793, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.566552488550366, |
|
"learning_rate": 3.621997950501156e-06, |
|
"logits/chosen": -0.23604285717010498, |
|
"logits/rejected": 0.03810877352952957, |
|
"logps/chosen": -380.695068359375, |
|
"logps/rejected": -1131.797607421875, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.9859374761581421, |
|
"rewards/chosen": -0.5175285935401917, |
|
"rewards/margins": 8.581127166748047, |
|
"rewards/rejected": -9.098657608032227, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.2662266467083074, |
|
"learning_rate": 3.4901994150978926e-06, |
|
"logits/chosen": -0.14488890767097473, |
|
"logits/rejected": 0.2611751854419708, |
|
"logps/chosen": -305.7989196777344, |
|
"logps/rejected": -813.2493896484375, |
|
"loss": 0.1017, |
|
"rewards/accuracies": 0.9765625, |
|
"rewards/chosen": 0.1130492091178894, |
|
"rewards/margins": 6.16524600982666, |
|
"rewards/rejected": -6.052196502685547, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.023161637845827, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -0.14377684891223907, |
|
"logits/rejected": 0.22304537892341614, |
|
"logps/chosen": -372.13165283203125, |
|
"logps/rejected": -896.8603515625, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.10399510711431503, |
|
"rewards/margins": 6.930342197418213, |
|
"rewards/rejected": -7.034337520599365, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.8073955958066197, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -0.10699782520532608, |
|
"logits/rejected": 0.14858277142047882, |
|
"logps/chosen": -438.87823486328125, |
|
"logps/rejected": -1203.0933837890625, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.9703124761581421, |
|
"rewards/chosen": -1.0280810594558716, |
|
"rewards/margins": 8.752517700195312, |
|
"rewards/rejected": -9.780599594116211, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.347308476957998, |
|
"learning_rate": 3.0765396768561005e-06, |
|
"logits/chosen": -0.4454914927482605, |
|
"logits/rejected": -0.15811693668365479, |
|
"logps/chosen": -349.51531982421875, |
|
"logps/rejected": -1020.2342529296875, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.13168838620185852, |
|
"rewards/margins": 7.97658634185791, |
|
"rewards/rejected": -8.108274459838867, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.923260234351252, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -0.46575579047203064, |
|
"logits/rejected": -0.18623068928718567, |
|
"logps/chosen": -382.58807373046875, |
|
"logps/rejected": -1112.0509033203125, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.979687511920929, |
|
"rewards/chosen": -0.5270034670829773, |
|
"rewards/margins": 8.469846725463867, |
|
"rewards/rejected": -8.996851921081543, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 4.035586599115476, |
|
"learning_rate": 2.7902322853130758e-06, |
|
"logits/chosen": -0.47277918457984924, |
|
"logits/rejected": -0.22883549332618713, |
|
"logps/chosen": -396.8818054199219, |
|
"logps/rejected": -1298.666748046875, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.6961787343025208, |
|
"rewards/margins": 10.189523696899414, |
|
"rewards/rejected": -10.885702133178711, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7894983493848236, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -0.5364641547203064, |
|
"logits/rejected": -0.28673312067985535, |
|
"logps/chosen": -379.64337158203125, |
|
"logps/rejected": -1212.439697265625, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.35370904207229614, |
|
"rewards/margins": 9.660139083862305, |
|
"rewards/rejected": -10.013849258422852, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.3096114791299955, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -0.5816466212272644, |
|
"logits/rejected": -0.35151442885398865, |
|
"logps/chosen": -379.9997863769531, |
|
"logps/rejected": -1187.98876953125, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.41831880807876587, |
|
"rewards/margins": 9.45728874206543, |
|
"rewards/rejected": -9.875606536865234, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.165697815332725, |
|
"learning_rate": 2.3546379277238107e-06, |
|
"logits/chosen": -0.47588858008384705, |
|
"logits/rejected": -0.26532530784606934, |
|
"logps/chosen": -454.8421325683594, |
|
"logps/rejected": -1527.9405517578125, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2038371562957764, |
|
"rewards/margins": 11.936580657958984, |
|
"rewards/rejected": -13.140419006347656, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.150182775021995, |
|
"learning_rate": 2.2097677146869242e-06, |
|
"logits/chosen": -0.5888150930404663, |
|
"logits/rejected": -0.2789239287376404, |
|
"logps/chosen": -349.5444030761719, |
|
"logps/rejected": -1127.449951171875, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.23732244968414307, |
|
"rewards/margins": 8.962444305419922, |
|
"rewards/rejected": -9.199767112731934, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.370472414334315, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -0.4766604006290436, |
|
"logits/rejected": -0.22312171757221222, |
|
"logps/chosen": -409.70135498046875, |
|
"logps/rejected": -1264.747314453125, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.979687511920929, |
|
"rewards/chosen": -0.5473332405090332, |
|
"rewards/margins": 9.985517501831055, |
|
"rewards/rejected": -10.532853126525879, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 4.485106240623341, |
|
"learning_rate": 1.9234603231439e-06, |
|
"logits/chosen": -0.40441417694091797, |
|
"logits/rejected": -0.13194730877876282, |
|
"logps/chosen": -391.0859069824219, |
|
"logps/rejected": -1242.066162109375, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.9859374761581421, |
|
"rewards/chosen": -0.6057429909706116, |
|
"rewards/margins": 9.785425186157227, |
|
"rewards/rejected": -10.391169548034668, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.3547912002584528, |
|
"learning_rate": 1.7829919182222752e-06, |
|
"logits/chosen": -0.3805684447288513, |
|
"logits/rejected": -0.11748667806386948, |
|
"logps/chosen": -420.901611328125, |
|
"logps/rejected": -1328.623291015625, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 0.9859374761581421, |
|
"rewards/chosen": -0.752387523651123, |
|
"rewards/margins": 10.4163236618042, |
|
"rewards/rejected": -11.16871166229248, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.41839867309329, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"logits/chosen": -0.439796507358551, |
|
"logits/rejected": -0.18884414434432983, |
|
"logps/chosen": -361.68634033203125, |
|
"logps/rejected": -1187.6492919921875, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.9859374761581421, |
|
"rewards/chosen": -0.2755175232887268, |
|
"rewards/margins": 9.546114921569824, |
|
"rewards/rejected": -9.82163143157959, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.9341085433225174, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -0.4470156133174896, |
|
"logits/rejected": -0.15826158225536346, |
|
"logps/chosen": -325.4810791015625, |
|
"logps/rejected": -963.2032470703125, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.9921875, |
|
"rewards/chosen": 0.05155152827501297, |
|
"rewards/margins": 7.570789337158203, |
|
"rewards/rejected": -7.519238471984863, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.2887818701903795, |
|
"learning_rate": 1.3780020494988447e-06, |
|
"logits/chosen": -0.38168513774871826, |
|
"logits/rejected": -0.13108135759830475, |
|
"logps/chosen": -338.5289001464844, |
|
"logps/rejected": -994.54345703125, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.13217389583587646, |
|
"rewards/margins": 7.651003360748291, |
|
"rewards/rejected": -7.783177375793457, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.334424423093157, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"logits/chosen": -0.32895341515541077, |
|
"logits/rejected": -0.11540427058935165, |
|
"logps/chosen": -408.0703125, |
|
"logps/rejected": -1260.2664794921875, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.7070173025131226, |
|
"rewards/margins": 9.677043914794922, |
|
"rewards/rejected": -10.384060859680176, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.446631906756751, |
|
"learning_rate": 1.1262275548229852e-06, |
|
"logits/chosen": -0.45178350806236267, |
|
"logits/rejected": -0.1636919528245926, |
|
"logps/chosen": -414.2293395996094, |
|
"logps/rejected": -1283.5029296875, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8513700366020203, |
|
"rewards/margins": 9.903493881225586, |
|
"rewards/rejected": -10.754863739013672, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.1936523282167912, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -0.44514569640159607, |
|
"logits/rejected": -0.20628270506858826, |
|
"logps/chosen": -388.30224609375, |
|
"logps/rejected": -1252.75146484375, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.9921875, |
|
"rewards/chosen": -0.6398779153823853, |
|
"rewards/margins": 9.881658554077148, |
|
"rewards/rejected": -10.521535873413086, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.7973345573120514, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits/chosen": -0.5846482515335083, |
|
"logits/rejected": -0.3184075355529785, |
|
"logps/chosen": -380.7689514160156, |
|
"logps/rejected": -1142.24365234375, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.995312511920929, |
|
"rewards/chosen": -0.4020051956176758, |
|
"rewards/margins": 8.967304229736328, |
|
"rewards/rejected": -9.36931037902832, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7683111661015913, |
|
"learning_rate": 7.843959053281663e-07, |
|
"logits/chosen": -0.4719081521034241, |
|
"logits/rejected": -0.248517706990242, |
|
"logps/chosen": -379.7674560546875, |
|
"logps/rejected": -1191.5079345703125, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.9859374761581421, |
|
"rewards/chosen": -0.3425825238227844, |
|
"rewards/margins": 9.473920822143555, |
|
"rewards/rejected": -9.816503524780273, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.619770880026216, |
|
"learning_rate": 6.815658960673782e-07, |
|
"logits/chosen": -0.5005173683166504, |
|
"logits/rejected": -0.22156552970409393, |
|
"logps/chosen": -389.5422668457031, |
|
"logps/rejected": -1232.5107421875, |
|
"loss": 0.0507, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.6200243830680847, |
|
"rewards/margins": 9.514973640441895, |
|
"rewards/rejected": -10.134997367858887, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.206190169965742, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -0.46861904859542847, |
|
"logits/rejected": -0.18907694518566132, |
|
"logps/chosen": -388.5583801269531, |
|
"logps/rejected": -1196.972900390625, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.9921875, |
|
"rewards/chosen": -0.40207749605178833, |
|
"rewards/margins": 9.50928783416748, |
|
"rewards/rejected": -9.911364555358887, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 5.22635215684865, |
|
"learning_rate": 4.946920181123904e-07, |
|
"logits/chosen": -0.49871087074279785, |
|
"logits/rejected": -0.23706772923469543, |
|
"logps/chosen": -378.83209228515625, |
|
"logps/rejected": -1230.994140625, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.36057573556900024, |
|
"rewards/margins": 9.88486099243164, |
|
"rewards/rejected": -10.245436668395996, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 4.399177165103731, |
|
"learning_rate": 4.1128047146765936e-07, |
|
"logits/chosen": -0.4957138001918793, |
|
"logits/rejected": -0.22013764083385468, |
|
"logps/chosen": -358.6220703125, |
|
"logps/rejected": -1129.539794921875, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.2314276248216629, |
|
"rewards/margins": 9.046243667602539, |
|
"rewards/rejected": -9.277670860290527, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.4578322691260586, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"logits/chosen": -0.3916376233100891, |
|
"logits/rejected": -0.13147909939289093, |
|
"logps/chosen": -376.29583740234375, |
|
"logps/rejected": -1182.824462890625, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.995312511920929, |
|
"rewards/chosen": -0.43332648277282715, |
|
"rewards/margins": 9.195769309997559, |
|
"rewards/rejected": -9.629096031188965, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.130490613601597, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -0.36289340257644653, |
|
"logits/rejected": -0.13988874852657318, |
|
"logps/chosen": -386.994873046875, |
|
"logps/rejected": -1234.4404296875, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.9859374761581421, |
|
"rewards/chosen": -0.5290244221687317, |
|
"rewards/margins": 9.592443466186523, |
|
"rewards/rejected": -10.121468544006348, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.5265891239165703, |
|
"learning_rate": 2.044597327993153e-07, |
|
"logits/chosen": -0.44066888093948364, |
|
"logits/rejected": -0.1672184318304062, |
|
"logps/chosen": -404.02349853515625, |
|
"logps/rejected": -1213.29638671875, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.50049889087677, |
|
"rewards/margins": 9.578344345092773, |
|
"rewards/rejected": -10.07884407043457, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.3929764183361966, |
|
"learning_rate": 1.507684480352292e-07, |
|
"logits/chosen": -0.4457983374595642, |
|
"logits/rejected": -0.20653533935546875, |
|
"logps/chosen": -387.2595520019531, |
|
"logps/rejected": -1257.850341796875, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.5553634762763977, |
|
"rewards/margins": 9.88083267211914, |
|
"rewards/rejected": -10.436195373535156, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.8197547536284149, |
|
"learning_rate": 1.0502621921127776e-07, |
|
"logits/chosen": -0.4422330856323242, |
|
"logits/rejected": -0.1755351722240448, |
|
"logps/chosen": -414.64715576171875, |
|
"logps/rejected": -1265.412353515625, |
|
"loss": 0.052, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.6048385500907898, |
|
"rewards/margins": 9.867313385009766, |
|
"rewards/rejected": -10.472152709960938, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.101887331185102, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -0.4752727448940277, |
|
"logits/rejected": -0.20606884360313416, |
|
"logps/chosen": -390.14984130859375, |
|
"logps/rejected": -1334.9105224609375, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 0.979687511920929, |
|
"rewards/chosen": -0.6308324933052063, |
|
"rewards/margins": 10.629243850708008, |
|
"rewards/rejected": -11.260076522827148, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.441898096294271, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -0.480734646320343, |
|
"logits/rejected": -0.21511860191822052, |
|
"logps/chosen": -387.4568786621094, |
|
"logps/rejected": -1270.74853515625, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.7548898458480835, |
|
"rewards/margins": 9.84511947631836, |
|
"rewards/rejected": -10.600008964538574, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.2741744504193826, |
|
"learning_rate": 1.6904105645142443e-08, |
|
"logits/chosen": -0.44097834825515747, |
|
"logits/rejected": -0.2139013707637787, |
|
"logps/chosen": -391.39453125, |
|
"logps/rejected": -1291.123779296875, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -0.5810464024543762, |
|
"rewards/margins": 10.310724258422852, |
|
"rewards/rejected": -10.891772270202637, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.909048473319547, |
|
"learning_rate": 4.229604321829561e-09, |
|
"logits/chosen": -0.48271116614341736, |
|
"logits/rejected": -0.22559651732444763, |
|
"logps/chosen": -406.5685119628906, |
|
"logps/rejected": -1254.4326171875, |
|
"loss": 0.0513, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.6645079851150513, |
|
"rewards/margins": 9.709487915039062, |
|
"rewards/rejected": -10.373995780944824, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.7152944727872392, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.49268728494644165, |
|
"logits/rejected": -0.23753933608531952, |
|
"logps/chosen": -411.12884521484375, |
|
"logps/rejected": -1246.8544921875, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 0.9921875, |
|
"rewards/chosen": -0.7164795994758606, |
|
"rewards/margins": 9.6405668258667, |
|
"rewards/rejected": -10.357046127319336, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 600, |
|
"total_flos": 0.0, |
|
"train_loss": 0.150745850255092, |
|
"train_runtime": 48820.8334, |
|
"train_samples_per_second": 0.788, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|