|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 352, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028409090909090908, |
|
"grad_norm": 90.01349453953418, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"logits/chosen": -2.8592569828033447, |
|
"logits/rejected": -2.642957925796509, |
|
"logps/chosen": -390.56573486328125, |
|
"logps/rejected": -607.8802490234375, |
|
"loss": 1.3762, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.001940618036314845, |
|
"rewards/margins": 0.013666175305843353, |
|
"rewards/rejected": -0.011725558899343014, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056818181818181816, |
|
"grad_norm": 38.16702665689489, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -2.842255115509033, |
|
"logits/rejected": -2.694774627685547, |
|
"logps/chosen": -328.5555419921875, |
|
"logps/rejected": -774.9117431640625, |
|
"loss": 1.0262, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.08589581400156021, |
|
"rewards/margins": 0.660616934299469, |
|
"rewards/rejected": -0.5747210383415222, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08522727272727272, |
|
"grad_norm": 9.474354712422207, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.857499837875366, |
|
"logits/rejected": -2.675124406814575, |
|
"logps/chosen": -295.38397216796875, |
|
"logps/rejected": -1112.8095703125, |
|
"loss": 0.3585, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36898040771484375, |
|
"rewards/margins": 4.717532157897949, |
|
"rewards/rejected": -4.3485517501831055, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 2.844664039016391, |
|
"learning_rate": 4.998023493068254e-07, |
|
"logits/chosen": -2.8356635570526123, |
|
"logits/rejected": -2.645285129547119, |
|
"logps/chosen": -314.98876953125, |
|
"logps/rejected": -2130.987060546875, |
|
"loss": 0.083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3176669478416443, |
|
"rewards/margins": 14.54858112335205, |
|
"rewards/rejected": -14.230911254882812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14204545454545456, |
|
"grad_norm": 0.18581946577481065, |
|
"learning_rate": 4.975823666181255e-07, |
|
"logits/chosen": -2.8282175064086914, |
|
"logits/rejected": -2.554896831512451, |
|
"logps/chosen": -441.171875, |
|
"logps/rejected": -4335.5703125, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.785366415977478, |
|
"rewards/margins": 36.03093338012695, |
|
"rewards/rejected": -36.8162956237793, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14204545454545456, |
|
"eval_logits/chosen": -2.965341806411743, |
|
"eval_logits/rejected": -2.5409843921661377, |
|
"eval_logps/chosen": -459.0153503417969, |
|
"eval_logps/rejected": -4976.9013671875, |
|
"eval_loss": 0.009123104624450207, |
|
"eval_rewards/accuracies": 0.9959677457809448, |
|
"eval_rewards/chosen": -0.9203360676765442, |
|
"eval_rewards/margins": 42.44078063964844, |
|
"eval_rewards/rejected": -43.361122131347656, |
|
"eval_runtime": 195.4208, |
|
"eval_samples_per_second": 19.983, |
|
"eval_steps_per_second": 0.317, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17045454545454544, |
|
"grad_norm": 1.8496563592278583, |
|
"learning_rate": 4.929173350101024e-07, |
|
"logits/chosen": -3.018855571746826, |
|
"logits/rejected": -2.489605188369751, |
|
"logps/chosen": -478.2201232910156, |
|
"logps/rejected": -5343.02783203125, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.1832506656646729, |
|
"rewards/margins": 45.8184814453125, |
|
"rewards/rejected": -47.00173568725586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19886363636363635, |
|
"grad_norm": 0.42838497413063575, |
|
"learning_rate": 4.858533249305336e-07, |
|
"logits/chosen": -2.8961918354034424, |
|
"logits/rejected": -1.8984692096710205, |
|
"logps/chosen": -537.8681640625, |
|
"logps/rejected": -5729.29150390625, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.587074875831604, |
|
"rewards/margins": 49.57848358154297, |
|
"rewards/rejected": -51.165557861328125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 23.183269598091908, |
|
"learning_rate": 4.764600984163808e-07, |
|
"logits/chosen": -3.015650510787964, |
|
"logits/rejected": -1.4822012186050415, |
|
"logps/chosen": -574.1959838867188, |
|
"logps/rejected": -7196.5283203125, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6787515878677368, |
|
"rewards/margins": 63.5491943359375, |
|
"rewards/rejected": -65.22795104980469, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2556818181818182, |
|
"grad_norm": 0.2590854602849931, |
|
"learning_rate": 4.6483042014491527e-07, |
|
"logits/chosen": -2.880042314529419, |
|
"logits/rejected": -1.109546422958374, |
|
"logps/chosen": -574.42529296875, |
|
"logps/rejected": -6483.9189453125, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.123119831085205, |
|
"rewards/margins": 56.34221267700195, |
|
"rewards/rejected": -58.4653434753418, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"grad_norm": 0.2975727339596996, |
|
"learning_rate": 4.510791413176912e-07, |
|
"logits/chosen": -2.7033655643463135, |
|
"logits/rejected": -0.46150803565979004, |
|
"logps/chosen": -574.6309814453125, |
|
"logps/rejected": -7050.77099609375, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0516088008880615, |
|
"rewards/margins": 61.398292541503906, |
|
"rewards/rejected": -63.44989776611328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"eval_logits/chosen": -2.5256500244140625, |
|
"eval_logits/rejected": 0.035765551030635834, |
|
"eval_logps/chosen": -558.7725830078125, |
|
"eval_logps/rejected": -6818.2158203125, |
|
"eval_loss": 0.0016336780972778797, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.9179083108901978, |
|
"eval_rewards/margins": 59.856361389160156, |
|
"eval_rewards/rejected": -61.77427673339844, |
|
"eval_runtime": 192.2507, |
|
"eval_samples_per_second": 20.312, |
|
"eval_steps_per_second": 0.322, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.04984017585029643, |
|
"learning_rate": 4.353420654246546e-07, |
|
"logits/chosen": -2.2970995903015137, |
|
"logits/rejected": 0.2104659080505371, |
|
"logps/chosen": -568.5081787109375, |
|
"logps/rejected": -6664.22412109375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.930869460105896, |
|
"rewards/margins": 58.71869659423828, |
|
"rewards/rejected": -60.649559020996094, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3409090909090909, |
|
"grad_norm": 0.03797940797804543, |
|
"learning_rate": 4.177746070897592e-07, |
|
"logits/chosen": -1.7653045654296875, |
|
"logits/rejected": 1.1280748844146729, |
|
"logps/chosen": -600.4093017578125, |
|
"logps/rejected": -7361.03662109375, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.188924789428711, |
|
"rewards/margins": 65.04915618896484, |
|
"rewards/rejected": -67.23808288574219, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3693181818181818, |
|
"grad_norm": 0.47736957873502217, |
|
"learning_rate": 3.9855025724292763e-07, |
|
"logits/chosen": -2.10666823387146, |
|
"logits/rejected": 1.7087266445159912, |
|
"logps/chosen": -594.9124755859375, |
|
"logps/rejected": -7779.0634765625, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2446084022521973, |
|
"rewards/margins": 69.14064025878906, |
|
"rewards/rejected": -71.38525390625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3977272727272727, |
|
"grad_norm": 1.020605288874105, |
|
"learning_rate": 3.7785886977585555e-07, |
|
"logits/chosen": -2.2910099029541016, |
|
"logits/rejected": 1.9655479192733765, |
|
"logps/chosen": -569.71533203125, |
|
"logps/rejected": -8695.2900390625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0367894172668457, |
|
"rewards/margins": 78.4962158203125, |
|
"rewards/rejected": -80.53299713134766, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42613636363636365, |
|
"grad_norm": 0.5904857854916888, |
|
"learning_rate": 3.5590478660213206e-07, |
|
"logits/chosen": -2.2953405380249023, |
|
"logits/rejected": 1.9216325283050537, |
|
"logps/chosen": -595.8374633789062, |
|
"logps/rejected": -8074.73974609375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.1777424812316895, |
|
"rewards/margins": 71.44985961914062, |
|
"rewards/rejected": -73.62760925292969, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42613636363636365, |
|
"eval_logits/chosen": -2.4309346675872803, |
|
"eval_logits/rejected": 1.8614341020584106, |
|
"eval_logps/chosen": -558.0396728515625, |
|
"eval_logps/rejected": -7546.6005859375, |
|
"eval_loss": 0.001163232373073697, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.9105795621871948, |
|
"eval_rewards/margins": 67.14754486083984, |
|
"eval_rewards/rejected": -69.05812072753906, |
|
"eval_runtime": 193.8607, |
|
"eval_samples_per_second": 20.143, |
|
"eval_steps_per_second": 0.32, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 5.391869442805235, |
|
"learning_rate": 3.3290481963801696e-07, |
|
"logits/chosen": -2.435987949371338, |
|
"logits/rejected": 2.048819065093994, |
|
"logps/chosen": -532.9867553710938, |
|
"logps/rejected": -7208.0419921875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.628488540649414, |
|
"rewards/margins": 64.28703308105469, |
|
"rewards/rejected": -65.91552734375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48295454545454547, |
|
"grad_norm": 0.031936510635288025, |
|
"learning_rate": 3.0908610963322626e-07, |
|
"logits/chosen": -2.173886775970459, |
|
"logits/rejected": 2.0431816577911377, |
|
"logps/chosen": -577.22314453125, |
|
"logps/rejected": -7725.6796875, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.896048903465271, |
|
"rewards/margins": 68.07765197753906, |
|
"rewards/rejected": -69.97369384765625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5113636363636364, |
|
"grad_norm": 0.018094668159337083, |
|
"learning_rate": 2.846838829972671e-07, |
|
"logits/chosen": -1.9897123575210571, |
|
"logits/rejected": 2.31811785697937, |
|
"logps/chosen": -567.0270385742188, |
|
"logps/rejected": -7191.4697265625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.061227798461914, |
|
"rewards/margins": 63.72364044189453, |
|
"rewards/rejected": -65.78486633300781, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5397727272727273, |
|
"grad_norm": 0.3615128328570267, |
|
"learning_rate": 2.5993912877423147e-07, |
|
"logits/chosen": -1.5569745302200317, |
|
"logits/rejected": 1.8910541534423828, |
|
"logps/chosen": -519.6112670898438, |
|
"logps/rejected": -8060.75390625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7399555444717407, |
|
"rewards/margins": 71.95148468017578, |
|
"rewards/rejected": -73.69144439697266, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 1.460001191569848, |
|
"learning_rate": 2.3509621870754504e-07, |
|
"logits/chosen": -1.1832640171051025, |
|
"logits/rejected": 2.8146424293518066, |
|
"logps/chosen": -540.25, |
|
"logps/rejected": -7328.6376953125, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.751201868057251, |
|
"rewards/margins": 65.22674560546875, |
|
"rewards/rejected": -66.97795104980469, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"eval_logits/chosen": -1.0991721153259277, |
|
"eval_logits/rejected": 2.4905478954315186, |
|
"eval_logps/chosen": -560.5505981445312, |
|
"eval_logps/rejected": -7817.95947265625, |
|
"eval_loss": 0.0009270149166695774, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.9356889724731445, |
|
"eval_rewards/margins": 69.83601379394531, |
|
"eval_rewards/rejected": -71.7717056274414, |
|
"eval_runtime": 195.666, |
|
"eval_samples_per_second": 19.957, |
|
"eval_steps_per_second": 0.317, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5965909090909091, |
|
"grad_norm": 0.6173080616307016, |
|
"learning_rate": 2.1040049389819624e-07, |
|
"logits/chosen": -1.0591375827789307, |
|
"logits/rejected": 2.6843762397766113, |
|
"logps/chosen": -585.5335083007812, |
|
"logps/rejected": -7571.32666015625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9489023685455322, |
|
"rewards/margins": 67.44532775878906, |
|
"rewards/rejected": -69.39422607421875, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 3.3870532652701733, |
|
"learning_rate": 1.8609584188988133e-07, |
|
"logits/chosen": -0.8726997375488281, |
|
"logits/rejected": 2.3560879230499268, |
|
"logps/chosen": -635.2283935546875, |
|
"logps/rejected": -6522.76708984375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.706278085708618, |
|
"rewards/margins": 56.02503204345703, |
|
"rewards/rejected": -58.73130416870117, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6534090909090909, |
|
"grad_norm": 84.74544834753664, |
|
"learning_rate": 1.624222881090439e-07, |
|
"logits/chosen": -1.068495512008667, |
|
"logits/rejected": 2.1958107948303223, |
|
"logps/chosen": -655.9327392578125, |
|
"logps/rejected": -7632.9365234375, |
|
"loss": 0.0224, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.6126813888549805, |
|
"rewards/margins": 66.86088562011719, |
|
"rewards/rejected": -69.47355651855469, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 0.655326329468716, |
|
"learning_rate": 1.3961362544602212e-07, |
|
"logits/chosen": -0.8508334159851074, |
|
"logits/rejected": 2.6375107765197754, |
|
"logps/chosen": -604.7547607421875, |
|
"logps/rejected": -6603.0595703125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.287423849105835, |
|
"rewards/margins": 57.80244827270508, |
|
"rewards/rejected": -60.08986282348633, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7102272727272727, |
|
"grad_norm": 0.8566182395838629, |
|
"learning_rate": 1.1789510538684522e-07, |
|
"logits/chosen": -0.9583337903022766, |
|
"logits/rejected": 2.481625556945801, |
|
"logps/chosen": -564.140869140625, |
|
"logps/rejected": -8373.404296875, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.12107253074646, |
|
"rewards/margins": 74.31715393066406, |
|
"rewards/rejected": -76.43821716308594, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7102272727272727, |
|
"eval_logits/chosen": -0.9364973902702332, |
|
"eval_logits/rejected": 2.6537041664123535, |
|
"eval_logps/chosen": -579.6390991210938, |
|
"eval_logps/rejected": -7301.04736328125, |
|
"eval_loss": 0.0008862165850587189, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -2.126574754714966, |
|
"eval_rewards/margins": 64.47601318359375, |
|
"eval_rewards/rejected": -66.60258483886719, |
|
"eval_runtime": 193.2705, |
|
"eval_samples_per_second": 20.205, |
|
"eval_steps_per_second": 0.321, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7386363636363636, |
|
"grad_norm": 0.3669759262567033, |
|
"learning_rate": 9.748121349736891e-08, |
|
"logits/chosen": -0.815521240234375, |
|
"logits/rejected": 2.5474696159362793, |
|
"logps/chosen": -614.1104736328125, |
|
"logps/rejected": -7755.7451171875, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.302823305130005, |
|
"rewards/margins": 68.47334289550781, |
|
"rewards/rejected": -70.77616882324219, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7670454545454546, |
|
"grad_norm": 0.14376906522497312, |
|
"learning_rate": 7.857355122839673e-08, |
|
"logits/chosen": -0.8737711906433105, |
|
"logits/rejected": 2.419881582260132, |
|
"logps/chosen": -592.2354125976562, |
|
"logps/rejected": -7475.37109375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.113858461380005, |
|
"rewards/margins": 65.95025634765625, |
|
"rewards/rejected": -68.0641098022461, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7954545454545454, |
|
"grad_norm": 0.17275320368872452, |
|
"learning_rate": 6.135884496044244e-08, |
|
"logits/chosen": -0.8250367045402527, |
|
"logits/rejected": 2.680527687072754, |
|
"logps/chosen": -574.1306762695312, |
|
"logps/rejected": -7469.40087890625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9495769739151, |
|
"rewards/margins": 66.2955322265625, |
|
"rewards/rejected": -68.24510955810547, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8238636363636364, |
|
"grad_norm": 0.036939229346128824, |
|
"learning_rate": 4.600710195020982e-08, |
|
"logits/chosen": -0.6854007244110107, |
|
"logits/rejected": 2.711479902267456, |
|
"logps/chosen": -595.4219970703125, |
|
"logps/rejected": -7571.99853515625, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0627870559692383, |
|
"rewards/margins": 67.24461364746094, |
|
"rewards/rejected": -69.30741119384766, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"grad_norm": 1.0005598289334474, |
|
"learning_rate": 3.2669931390104374e-08, |
|
"logits/chosen": -0.7712670564651489, |
|
"logits/rejected": 2.537261724472046, |
|
"logps/chosen": -546.4788818359375, |
|
"logps/rejected": -8221.009765625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9022438526153564, |
|
"rewards/margins": 74.09959411621094, |
|
"rewards/rejected": -76.00184631347656, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"eval_logits/chosen": -0.6079820394515991, |
|
"eval_logits/rejected": 2.6897590160369873, |
|
"eval_logps/chosen": -575.6599731445312, |
|
"eval_logps/rejected": -7868.724609375, |
|
"eval_loss": 0.0006528676021844149, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -2.086782693862915, |
|
"eval_rewards/margins": 70.19257354736328, |
|
"eval_rewards/rejected": -72.27935791015625, |
|
"eval_runtime": 193.3061, |
|
"eval_samples_per_second": 20.201, |
|
"eval_steps_per_second": 0.321, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8806818181818182, |
|
"grad_norm": 0.0660494270632579, |
|
"learning_rate": 2.147904716149135e-08, |
|
"logits/chosen": -0.5389717817306519, |
|
"logits/rejected": 2.602092981338501, |
|
"logps/chosen": -555.8554077148438, |
|
"logps/rejected": -7988.98974609375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9378883838653564, |
|
"rewards/margins": 70.9469985961914, |
|
"rewards/rejected": -72.8848876953125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.0488754281750263, |
|
"learning_rate": 1.254496706805433e-08, |
|
"logits/chosen": -0.7335542440414429, |
|
"logits/rejected": 2.5863664150238037, |
|
"logps/chosen": -590.0091552734375, |
|
"logps/rejected": -8039.3125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0733609199523926, |
|
"rewards/margins": 71.79994201660156, |
|
"rewards/rejected": -73.87330627441406, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.2519430705585286, |
|
"learning_rate": 5.955921395237318e-09, |
|
"logits/chosen": -0.6468337178230286, |
|
"logits/rejected": 2.546152353286743, |
|
"logps/chosen": -548.1500244140625, |
|
"logps/rejected": -8072.88037109375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.928818941116333, |
|
"rewards/margins": 72.32246398925781, |
|
"rewards/rejected": -74.25126647949219, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9659090909090909, |
|
"grad_norm": 0.8725515919272617, |
|
"learning_rate": 1.7769815745066474e-09, |
|
"logits/chosen": -0.8968937993049622, |
|
"logits/rejected": 2.3741860389709473, |
|
"logps/chosen": -561.4879760742188, |
|
"logps/rejected": -7435.28125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9250719547271729, |
|
"rewards/margins": 66.34008026123047, |
|
"rewards/rejected": -68.26514434814453, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9943181818181818, |
|
"grad_norm": 3.1933280173345713, |
|
"learning_rate": 4.9417557483610875e-11, |
|
"logits/chosen": -0.6227324604988098, |
|
"logits/rejected": 2.5037405490875244, |
|
"logps/chosen": -589.6453247070312, |
|
"logps/rejected": -7948.50927734375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.153775215148926, |
|
"rewards/margins": 70.8557357788086, |
|
"rewards/rejected": -73.00951385498047, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9943181818181818, |
|
"eval_logits/chosen": -0.5257502198219299, |
|
"eval_logits/rejected": 2.7151966094970703, |
|
"eval_logps/chosen": -577.6112670898438, |
|
"eval_logps/rejected": -7961.02392578125, |
|
"eval_loss": 0.000620901002548635, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -2.106295585632324, |
|
"eval_rewards/margins": 71.09603881835938, |
|
"eval_rewards/rejected": -73.20234680175781, |
|
"eval_runtime": 193.4431, |
|
"eval_samples_per_second": 20.187, |
|
"eval_steps_per_second": 0.321, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 352, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08502711300074721, |
|
"train_runtime": 9968.7204, |
|
"train_samples_per_second": 4.514, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 352, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|