|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.968, |
|
"eval_steps": 100, |
|
"global_step": 248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"logits/chosen": 0.4729592502117157, |
|
"logits/rejected": 0.38554269075393677, |
|
"logps/chosen": -213.08737182617188, |
|
"logps/rejected": -203.01974487304688, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": 0.15001463890075684, |
|
"logits/rejected": 0.20593884587287903, |
|
"logps/chosen": -161.55821228027344, |
|
"logps/rejected": -135.95449829101562, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.3819444477558136, |
|
"rewards/chosen": 3.554481372702867e-05, |
|
"rewards/margins": 0.0017159796552732587, |
|
"rewards/rejected": -0.001680435030721128, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.09242797642946243, |
|
"logits/rejected": 0.04898233711719513, |
|
"logps/chosen": -181.7728271484375, |
|
"logps/rejected": -151.2571563720703, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0012837719405069947, |
|
"rewards/margins": -0.0005185201880522072, |
|
"rewards/rejected": -0.0007652518688701093, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.993800445762451e-06, |
|
"logits/chosen": 0.1974470317363739, |
|
"logits/rejected": 0.24875693023204803, |
|
"logps/chosen": -175.55596923828125, |
|
"logps/rejected": -143.77584838867188, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.003100383561104536, |
|
"rewards/margins": -0.0019143905956298113, |
|
"rewards/rejected": -0.0011859927326440811, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.944388344834205e-06, |
|
"logits/chosen": 0.1500740945339203, |
|
"logits/rejected": 0.2011214792728424, |
|
"logps/chosen": -181.77581787109375, |
|
"logps/rejected": -164.48788452148438, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.0005139185814186931, |
|
"rewards/margins": 0.0008601135341450572, |
|
"rewards/rejected": -0.0013740319991484284, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8465431931347904e-06, |
|
"logits/chosen": 0.08030920475721359, |
|
"logits/rejected": 0.15738067030906677, |
|
"logps/chosen": -169.9337158203125, |
|
"logps/rejected": -144.90228271484375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0008664874476380646, |
|
"rewards/margins": -0.0005486059235408902, |
|
"rewards/rejected": -0.00031788164051249623, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.702203692102539e-06, |
|
"logits/chosen": 0.1897524893283844, |
|
"logits/rejected": 0.17828692495822906, |
|
"logps/chosen": -173.4358367919922, |
|
"logps/rejected": -145.06625366210938, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0021182517521083355, |
|
"rewards/margins": 0.0011023099068552256, |
|
"rewards/rejected": -0.0032205611933022738, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.514229781074239e-06, |
|
"logits/chosen": 0.2607804834842682, |
|
"logits/rejected": 0.23584774136543274, |
|
"logps/chosen": -188.595947265625, |
|
"logps/rejected": -160.49171447753906, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.0020389086566865444, |
|
"rewards/margins": -0.001390365301631391, |
|
"rewards/rejected": -0.0006485433550551534, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.286345970517195e-06, |
|
"logits/chosen": 0.17552152276039124, |
|
"logits/rejected": 0.24562516808509827, |
|
"logps/chosen": -165.12591552734375, |
|
"logps/rejected": -137.75808715820312, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00046771267079748213, |
|
"rewards/margins": 0.0003761005064006895, |
|
"rewards/rejected": -0.0008438131771981716, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.023067544670082e-06, |
|
"logits/chosen": 0.16295495629310608, |
|
"logits/rejected": 0.1336701214313507, |
|
"logps/chosen": -176.12887573242188, |
|
"logps/rejected": -145.37673950195312, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0007470982382073998, |
|
"rewards/margins": 0.0005909097963012755, |
|
"rewards/rejected": -0.001338008209131658, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.7296110958116845e-06, |
|
"logits/chosen": 0.18624618649482727, |
|
"logits/rejected": 0.15213565528392792, |
|
"logps/chosen": -177.76405334472656, |
|
"logps/rejected": -152.70240783691406, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.000859270163346082, |
|
"rewards/margins": 0.0012418597470968962, |
|
"rewards/rejected": -0.0003825899329967797, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -0.004967109765857458, |
|
"eval_logits/rejected": 0.09325645118951797, |
|
"eval_logps/chosen": -306.3399353027344, |
|
"eval_logps/rejected": -278.6739501953125, |
|
"eval_loss": 0.0021029466297477484, |
|
"eval_rewards/accuracies": 0.4909999966621399, |
|
"eval_rewards/chosen": 0.0005270715337246656, |
|
"eval_rewards/margins": 0.00038576460792683065, |
|
"eval_rewards/rejected": 0.00014130691124591976, |
|
"eval_runtime": 412.0589, |
|
"eval_samples_per_second": 4.854, |
|
"eval_steps_per_second": 1.213, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.4117911628292944e-06, |
|
"logits/chosen": 0.18669767677783966, |
|
"logits/rejected": 0.193131685256958, |
|
"logps/chosen": -186.2015380859375, |
|
"logps/rejected": -160.41812133789062, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.0022148210555315018, |
|
"rewards/margins": -0.0006464887410402298, |
|
"rewards/rejected": -0.0015683325473219156, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.075905022087675e-06, |
|
"logits/chosen": 0.07856817543506622, |
|
"logits/rejected": 0.13352298736572266, |
|
"logps/chosen": -174.9341278076172, |
|
"logps/rejected": -154.233642578125, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0001242739672306925, |
|
"rewards/margins": 0.0005846145795658231, |
|
"rewards/rejected": -0.00046034049591980875, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.728607913349464e-06, |
|
"logits/chosen": 0.19492605328559875, |
|
"logits/rejected": 0.16430191695690155, |
|
"logps/chosen": -174.35968017578125, |
|
"logps/rejected": -150.935302734375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0011604964965954423, |
|
"rewards/margins": 0.0007132277823984623, |
|
"rewards/rejected": -0.0018737241625785828, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.376781173017589e-06, |
|
"logits/chosen": 0.2628365159034729, |
|
"logits/rejected": 0.19500017166137695, |
|
"logps/chosen": -187.58023071289062, |
|
"logps/rejected": -166.26370239257812, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.0010837314184755087, |
|
"rewards/margins": -0.00033805653220042586, |
|
"rewards/rejected": -0.0007456748280674219, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0273958875043877e-06, |
|
"logits/chosen": 0.14645084738731384, |
|
"logits/rejected": 0.1681375354528427, |
|
"logps/chosen": -157.08926391601562, |
|
"logps/rejected": -131.46810913085938, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.002456915332004428, |
|
"rewards/margins": 0.0014492868212983012, |
|
"rewards/rejected": -0.0039062027353793383, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.6873747682962393e-06, |
|
"logits/chosen": 0.16395077109336853, |
|
"logits/rejected": 0.14529384672641754, |
|
"logps/chosen": -188.40296936035156, |
|
"logps/rejected": -158.95834350585938, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.00022082138457335532, |
|
"rewards/margins": 0.000981360673904419, |
|
"rewards/rejected": -0.0007605393184348941, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.363454985517803e-06, |
|
"logits/chosen": 0.21595752239227295, |
|
"logits/rejected": 0.17811095714569092, |
|
"logps/chosen": -178.89500427246094, |
|
"logps/rejected": -150.37576293945312, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0017019693041220307, |
|
"rewards/margins": 0.0014590112259611487, |
|
"rewards/rejected": 0.00024295765615534037, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.062054677808238e-06, |
|
"logits/chosen": 0.11008661985397339, |
|
"logits/rejected": 0.1656235158443451, |
|
"logps/chosen": -166.90554809570312, |
|
"logps/rejected": -149.75042724609375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0004684976884163916, |
|
"rewards/margins": 0.0005998688284307718, |
|
"rewards/rejected": -0.0010683666914701462, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.891457834794711e-07, |
|
"logits/chosen": 0.2820424437522888, |
|
"logits/rejected": 0.16322749853134155, |
|
"logps/chosen": -180.0753173828125, |
|
"logps/rejected": -143.658203125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 1.5354249853771762e-06, |
|
"rewards/margins": 0.0018281619995832443, |
|
"rewards/rejected": -0.0018266268307343125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.501357126768117e-07, |
|
"logits/chosen": 0.15994948148727417, |
|
"logits/rejected": 0.14330127835273743, |
|
"logps/chosen": -183.79759216308594, |
|
"logps/rejected": -155.71310424804688, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.001569538377225399, |
|
"rewards/margins": 0.0009444955503568053, |
|
"rewards/rejected": 0.000625042652245611, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_logits/chosen": -0.009462343528866768, |
|
"eval_logits/rejected": 0.08856771141290665, |
|
"eval_logps/chosen": -306.4609375, |
|
"eval_logps/rejected": -278.74566650390625, |
|
"eval_loss": 0.0021845391020178795, |
|
"eval_rewards/accuracies": 0.4950000047683716, |
|
"eval_rewards/chosen": -0.0006826075841672719, |
|
"eval_rewards/margins": -0.00010681045387173072, |
|
"eval_rewards/rejected": -0.0005757971666753292, |
|
"eval_runtime": 412.1683, |
|
"eval_samples_per_second": 4.852, |
|
"eval_steps_per_second": 1.213, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.4976020508682345e-07, |
|
"logits/chosen": 0.1456301361322403, |
|
"logits/rejected": 0.2356918305158615, |
|
"logps/chosen": -183.83010864257812, |
|
"logps/rejected": -155.9720458984375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0012806833256036043, |
|
"rewards/margins": 0.0026433460880070925, |
|
"rewards/rejected": -0.0013626629952341318, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.9198949610721273e-07, |
|
"logits/chosen": 0.11089984327554703, |
|
"logits/rejected": 0.18088462948799133, |
|
"logps/chosen": -183.67636108398438, |
|
"logps/rejected": -146.19009399414062, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0020343190990388393, |
|
"rewards/margins": 0.0024411864578723907, |
|
"rewards/rejected": -0.00040686698048375547, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.994965069994143e-08, |
|
"logits/chosen": 0.15200337767601013, |
|
"logits/rejected": 0.0883503332734108, |
|
"logps/chosen": -152.91159057617188, |
|
"logps/rejected": -137.67068481445312, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.0002739583433140069, |
|
"rewards/margins": 0.00024719498469494283, |
|
"rewards/rejected": -0.0005211535608395934, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5860623616664183e-08, |
|
"logits/chosen": 0.21818551421165466, |
|
"logits/rejected": 0.26517254114151, |
|
"logps/chosen": -185.5134735107422, |
|
"logps/rejected": -156.71102905273438, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0007516987388953567, |
|
"rewards/margins": -0.001160649349913001, |
|
"rewards/rejected": 0.0004089508147444576, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"step": 248, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0016499216942447088, |
|
"train_runtime": 2658.6406, |
|
"train_samples_per_second": 1.505, |
|
"train_steps_per_second": 0.093 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|