|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 155, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.8946032524108887, |
|
"logits/rejected": -2.8606104850769043, |
|
"logps/chosen": -149.65138244628906, |
|
"logps/rejected": -147.93389892578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.7723653316497803, |
|
"logits/rejected": -2.7413721084594727, |
|
"logps/chosen": -127.34146881103516, |
|
"logps/rejected": -128.1923065185547, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0019323664018884301, |
|
"rewards/margins": 0.0003104716306552291, |
|
"rewards/rejected": 0.001621894771233201, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989790503518888e-06, |
|
"logits/chosen": -2.78098464012146, |
|
"logits/rejected": -2.7467544078826904, |
|
"logps/chosen": -123.1203384399414, |
|
"logps/rejected": -123.0350341796875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.028792519122362137, |
|
"rewards/margins": 0.0023718727752566338, |
|
"rewards/rejected": 0.026420647278428078, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8758891663695165e-06, |
|
"logits/chosen": -2.7641711235046387, |
|
"logits/rejected": -2.7216341495513916, |
|
"logps/chosen": -122.10086822509766, |
|
"logps/rejected": -121.503662109375, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.07782017439603806, |
|
"rewards/margins": 0.005080958362668753, |
|
"rewards/rejected": 0.07273921370506287, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.64113778721764e-06, |
|
"logits/chosen": -2.7458434104919434, |
|
"logits/rejected": -2.717912197113037, |
|
"logps/chosen": -109.0793228149414, |
|
"logps/rejected": -111.13133239746094, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0947461873292923, |
|
"rewards/margins": 0.010546943172812462, |
|
"rewards/rejected": 0.08419923484325409, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.297477038394368e-06, |
|
"logits/chosen": -2.7164647579193115, |
|
"logits/rejected": -2.6716389656066895, |
|
"logps/chosen": -124.75947570800781, |
|
"logps/rejected": -124.14982604980469, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.07808887958526611, |
|
"rewards/margins": 0.012092621996998787, |
|
"rewards/rejected": 0.06599625200033188, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.862387287468095e-06, |
|
"logits/chosen": -2.7368290424346924, |
|
"logits/rejected": -2.7067949771881104, |
|
"logps/chosen": -122.44651794433594, |
|
"logps/rejected": -123.14102935791016, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04377375915646553, |
|
"rewards/margins": 0.0176816638559103, |
|
"rewards/rejected": 0.02609209157526493, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.357999455114148e-06, |
|
"logits/chosen": -2.6553173065185547, |
|
"logits/rejected": -2.6178958415985107, |
|
"logps/chosen": -131.94534301757812, |
|
"logps/rejected": -128.55323791503906, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.01449825894087553, |
|
"rewards/margins": 0.04684123024344444, |
|
"rewards/rejected": -0.03234297037124634, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8099693217402807e-06, |
|
"logits/chosen": -2.680434226989746, |
|
"logits/rejected": -2.652169704437256, |
|
"logps/chosen": -135.7716522216797, |
|
"logps/rejected": -137.4171142578125, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.02753743901848793, |
|
"rewards/margins": 0.03442533686757088, |
|
"rewards/rejected": -0.06196277216076851, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.24617254146973e-06, |
|
"logits/chosen": -2.683811664581299, |
|
"logits/rejected": -2.6760880947113037, |
|
"logps/chosen": -109.57649230957031, |
|
"logps/rejected": -118.2197265625, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.08844764530658722, |
|
"rewards/margins": 0.03314244747161865, |
|
"rewards/rejected": 0.055305201560258865, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6952867418370707e-06, |
|
"logits/chosen": -2.662198543548584, |
|
"logits/rejected": -2.6310229301452637, |
|
"logps/chosen": -123.90461730957031, |
|
"logps/rejected": -129.31741333007812, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.07869021594524384, |
|
"rewards/margins": 0.050858981907367706, |
|
"rewards/rejected": 0.027831237763166428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -2.662461042404175, |
|
"eval_logits/rejected": -2.5754306316375732, |
|
"eval_logps/chosen": -289.1737365722656, |
|
"eval_logps/rejected": -269.4944763183594, |
|
"eval_loss": 0.6652680039405823, |
|
"eval_rewards/accuracies": 0.6399999856948853, |
|
"eval_rewards/chosen": -0.02158256620168686, |
|
"eval_rewards/margins": 0.060695480555295944, |
|
"eval_rewards/rejected": -0.0822780430316925, |
|
"eval_runtime": 382.0664, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.654, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1853328309581139e-06, |
|
"logits/chosen": -2.6986496448516846, |
|
"logits/rejected": -2.6445116996765137, |
|
"logps/chosen": -126.5402603149414, |
|
"logps/rejected": -126.73878479003906, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.008479948155581951, |
|
"rewards/margins": 0.04984438419342041, |
|
"rewards/rejected": -0.04136443883180618, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.422497088545436e-07, |
|
"logits/chosen": -2.6892261505126953, |
|
"logits/rejected": -2.61507511138916, |
|
"logps/chosen": -121.74040222167969, |
|
"logps/rejected": -122.19820404052734, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.008245681412518024, |
|
"rewards/margins": 0.041609544306993484, |
|
"rewards/rejected": -0.049855224788188934, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8857488050544903e-07, |
|
"logits/chosen": -2.6735100746154785, |
|
"logits/rejected": -2.6348578929901123, |
|
"logps/chosen": -125.81143951416016, |
|
"logps/rejected": -133.61953735351562, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.015428361482918262, |
|
"rewards/margins": 0.057859934866428375, |
|
"rewards/rejected": -0.04243157058954239, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4229808148697732e-07, |
|
"logits/chosen": -2.6265532970428467, |
|
"logits/rejected": -2.633122205734253, |
|
"logps/chosen": -131.5351104736328, |
|
"logps/rejected": -136.5831756591797, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.021531684324145317, |
|
"rewards/margins": 0.03703152388334274, |
|
"rewards/rejected": -0.015499832108616829, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5946226744029402e-08, |
|
"logits/chosen": -2.687284231185913, |
|
"logits/rejected": -2.641932249069214, |
|
"logps/chosen": -122.74555969238281, |
|
"logps/rejected": -129.6962127685547, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.03600780665874481, |
|
"rewards/margins": 0.05919266864657402, |
|
"rewards/rejected": -0.023184867575764656, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 155, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6804818130308582, |
|
"train_runtime": 6451.0629, |
|
"train_samples_per_second": 3.075, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 155, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|