|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006279434850863423, |
|
"grad_norm": 49.46455363166422, |
|
"learning_rate": 6.25e-09, |
|
"logits/chosen": 0.7729572653770447, |
|
"logits/rejected": 0.9946910738945007, |
|
"logps/chosen": -236.32302856445312, |
|
"logps/pi_response": -130.86985778808594, |
|
"logps/ref_response": -130.86985778808594, |
|
"logps/rejected": -603.046630859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 39.43803954404861, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 0.586125373840332, |
|
"logits/rejected": 0.9812673926353455, |
|
"logps/chosen": -321.5859680175781, |
|
"logps/pi_response": -114.55306243896484, |
|
"logps/ref_response": -114.55144500732422, |
|
"logps/rejected": -546.476318359375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": -0.002124057849869132, |
|
"rewards/margins": -0.0025205437559634447, |
|
"rewards/rejected": 0.0003964858187828213, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 35.21511776692514, |
|
"learning_rate": 9.980706626858606e-08, |
|
"logits/chosen": 0.71666020154953, |
|
"logits/rejected": 1.056302785873413, |
|
"logps/chosen": -268.4955139160156, |
|
"logps/pi_response": -120.29206848144531, |
|
"logps/ref_response": -120.35611724853516, |
|
"logps/rejected": -554.8519897460938, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0067259399220347404, |
|
"rewards/margins": 0.013305353932082653, |
|
"rewards/rejected": -0.020031295716762543, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 29.26357792211296, |
|
"learning_rate": 9.765362502737097e-08, |
|
"logits/chosen": 0.7042983174324036, |
|
"logits/rejected": 1.0865741968154907, |
|
"logps/chosen": -327.14837646484375, |
|
"logps/pi_response": -118.11744689941406, |
|
"logps/ref_response": -118.01436614990234, |
|
"logps/rejected": -522.3547973632812, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.05294172838330269, |
|
"rewards/margins": 0.0403742715716362, |
|
"rewards/rejected": -0.09331601113080978, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 29.237988897280104, |
|
"learning_rate": 9.320944188084241e-08, |
|
"logits/chosen": 0.7036560773849487, |
|
"logits/rejected": 1.0425139665603638, |
|
"logps/chosen": -314.95477294921875, |
|
"logps/pi_response": -113.6474609375, |
|
"logps/ref_response": -112.6651840209961, |
|
"logps/rejected": -582.9769897460938, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.14075346291065216, |
|
"rewards/margins": 0.1760648787021637, |
|
"rewards/rejected": -0.31681832671165466, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 28.58811527839098, |
|
"learning_rate": 8.668815171119019e-08, |
|
"logits/chosen": 0.7629910707473755, |
|
"logits/rejected": 1.156697154045105, |
|
"logps/chosen": -294.5866394042969, |
|
"logps/pi_response": -118.6247329711914, |
|
"logps/ref_response": -117.3294906616211, |
|
"logps/rejected": -556.7156372070312, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15429341793060303, |
|
"rewards/margins": 0.2478162795305252, |
|
"rewards/rejected": -0.40210968255996704, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 19.524336970843382, |
|
"learning_rate": 7.840323733655779e-08, |
|
"logits/chosen": 0.7207380533218384, |
|
"logits/rejected": 1.0942904949188232, |
|
"logps/chosen": -313.4345703125, |
|
"logps/pi_response": -113.78782653808594, |
|
"logps/ref_response": -110.54020690917969, |
|
"logps/rejected": -610.8480224609375, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.30726358294487, |
|
"rewards/margins": 0.4200159013271332, |
|
"rewards/rejected": -0.7272794842720032, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 19.57511396533311, |
|
"learning_rate": 6.87529601804781e-08, |
|
"logits/chosen": 0.9113609194755554, |
|
"logits/rejected": 1.2666047811508179, |
|
"logps/chosen": -296.62127685546875, |
|
"logps/pi_response": -117.84214782714844, |
|
"logps/ref_response": -111.63108825683594, |
|
"logps/rejected": -629.7382202148438, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.41433459520339966, |
|
"rewards/margins": 0.5893218517303467, |
|
"rewards/rejected": -1.0036565065383911, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 23.55600516973884, |
|
"learning_rate": 5.8201215576551086e-08, |
|
"logits/chosen": 0.7490819692611694, |
|
"logits/rejected": 1.2997609376907349, |
|
"logps/chosen": -338.2764587402344, |
|
"logps/pi_response": -127.49137878417969, |
|
"logps/ref_response": -119.87553405761719, |
|
"logps/rejected": -695.3623657226562, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.4643521308898926, |
|
"rewards/margins": 0.773566484451294, |
|
"rewards/rejected": -1.2379186153411865, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 21.827438682416545, |
|
"learning_rate": 4.725523300678362e-08, |
|
"logits/chosen": 0.931191086769104, |
|
"logits/rejected": 1.334916353225708, |
|
"logps/chosen": -402.0022888183594, |
|
"logps/pi_response": -120.39073181152344, |
|
"logps/ref_response": -110.82243347167969, |
|
"logps/rejected": -706.8969116210938, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6253440380096436, |
|
"rewards/margins": 0.7057360410690308, |
|
"rewards/rejected": -1.3310800790786743, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 15.954130788308259, |
|
"learning_rate": 3.644119323817915e-08, |
|
"logits/chosen": 0.9159143567085266, |
|
"logits/rejected": 1.2248215675354004, |
|
"logps/chosen": -411.3912658691406, |
|
"logps/pi_response": -127.1515884399414, |
|
"logps/ref_response": -119.00660705566406, |
|
"logps/rejected": -621.9088745117188, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6569873690605164, |
|
"rewards/margins": 0.44270920753479004, |
|
"rewards/rejected": -1.0996966361999512, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 18.598007666864472, |
|
"learning_rate": 2.6278934458271994e-08, |
|
"logits/chosen": 0.8910084962844849, |
|
"logits/rejected": 1.170478105545044, |
|
"logps/chosen": -335.10687255859375, |
|
"logps/pi_response": -121.1594009399414, |
|
"logps/ref_response": -113.50992584228516, |
|
"logps/rejected": -652.3834228515625, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4804770052433014, |
|
"rewards/margins": 0.6785348057746887, |
|
"rewards/rejected": -1.159011960029602, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 19.38573962620474, |
|
"learning_rate": 1.725696330273575e-08, |
|
"logits/chosen": 0.8399345278739929, |
|
"logits/rejected": 1.1444717645645142, |
|
"logps/chosen": -328.6500244140625, |
|
"logps/pi_response": -122.32352447509766, |
|
"logps/ref_response": -115.1312026977539, |
|
"logps/rejected": -664.1232299804688, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.43035322427749634, |
|
"rewards/margins": 0.7509421706199646, |
|
"rewards/rejected": -1.181295394897461, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 17.23175686548954, |
|
"learning_rate": 9.808972011828054e-09, |
|
"logits/chosen": 0.7406191229820251, |
|
"logits/rejected": 1.2706494331359863, |
|
"logps/chosen": -332.7467956542969, |
|
"logps/pi_response": -127.5960922241211, |
|
"logps/ref_response": -121.97221374511719, |
|
"logps/rejected": -654.1246948242188, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.4437400698661804, |
|
"rewards/margins": 0.6695939898490906, |
|
"rewards/rejected": -1.1133341789245605, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 17.704873543452532, |
|
"learning_rate": 4.2929905518041705e-09, |
|
"logits/chosen": 0.8205103874206543, |
|
"logits/rejected": 1.2819491624832153, |
|
"logps/chosen": -346.9167175292969, |
|
"logps/pi_response": -123.3003158569336, |
|
"logps/ref_response": -116.151611328125, |
|
"logps/rejected": -589.7174072265625, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4823363423347473, |
|
"rewards/margins": 0.5467069745063782, |
|
"rewards/rejected": -1.029043436050415, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 20.711700427076533, |
|
"learning_rate": 9.741758728888216e-10, |
|
"logits/chosen": 0.6927149891853333, |
|
"logits/rejected": 1.12680983543396, |
|
"logps/chosen": -402.5865783691406, |
|
"logps/pi_response": -140.15695190429688, |
|
"logps/ref_response": -133.6136016845703, |
|
"logps/rejected": -667.4969482421875, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5095483064651489, |
|
"rewards/margins": 0.6510818600654602, |
|
"rewards/rejected": -1.160630226135254, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5759439048527172, |
|
"train_runtime": 4356.4071, |
|
"train_samples_per_second": 4.678, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|