|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99581589958159, |
|
"eval_steps": 500, |
|
"global_step": 119, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.6852197647094727, |
|
"logits/rejected": -2.6903719902038574, |
|
"logps/chosen": -263.7275390625, |
|
"logps/rejected": -230.14215087890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -2.7360000610351562, |
|
"logits/rejected": -2.735159158706665, |
|
"logps/chosen": -277.9869079589844, |
|
"logps/rejected": -253.91012573242188, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.004290024284273386, |
|
"rewards/margins": 0.002072603441774845, |
|
"rewards/rejected": 0.0022174210753291845, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.862705056474794e-07, |
|
"logits/chosen": -2.764007568359375, |
|
"logits/rejected": -2.7425270080566406, |
|
"logps/chosen": -286.0954284667969, |
|
"logps/rejected": -274.6139221191406, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.009724309667944908, |
|
"rewards/margins": 0.05151768773794174, |
|
"rewards/rejected": -0.041793376207351685, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.317841607379106e-07, |
|
"logits/chosen": -2.739412784576416, |
|
"logits/rejected": -2.731487512588501, |
|
"logps/chosen": -286.3190002441406, |
|
"logps/rejected": -291.80010986328125, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.25924447178840637, |
|
"rewards/margins": 0.24311673641204834, |
|
"rewards/rejected": -0.5023611783981323, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.403425107745314e-07, |
|
"logits/chosen": -2.7835636138916016, |
|
"logits/rejected": -2.7717299461364746, |
|
"logps/chosen": -322.2847900390625, |
|
"logps/rejected": -343.01287841796875, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4827890396118164, |
|
"rewards/margins": 0.33479487895965576, |
|
"rewards/rejected": -0.8175839185714722, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.197718133561508e-07, |
|
"logits/chosen": -2.7819108963012695, |
|
"logits/rejected": -2.7709670066833496, |
|
"logps/chosen": -355.53387451171875, |
|
"logps/rejected": -347.73553466796875, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5724222660064697, |
|
"rewards/margins": 0.429451048374176, |
|
"rewards/rejected": -1.001873254776001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.803914069597341e-07, |
|
"logits/chosen": -2.65649676322937, |
|
"logits/rejected": -2.6217668056488037, |
|
"logps/chosen": -364.63458251953125, |
|
"logps/rejected": -355.6944580078125, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7049036026000977, |
|
"rewards/margins": 0.4729110598564148, |
|
"rewards/rejected": -1.1778147220611572, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.3413050507959067e-07, |
|
"logits/chosen": -2.5201010704040527, |
|
"logits/rejected": -2.4627785682678223, |
|
"logps/chosen": -344.98284912109375, |
|
"logps/rejected": -325.82452392578125, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.628750741481781, |
|
"rewards/margins": 0.43215298652648926, |
|
"rewards/rejected": -1.060903787612915, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.935072052698059e-07, |
|
"logits/chosen": -2.376207113265991, |
|
"logits/rejected": -2.3495357036590576, |
|
"logps/chosen": -290.0788269042969, |
|
"logps/rejected": -357.1901550292969, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6383817791938782, |
|
"rewards/margins": 0.5417054891586304, |
|
"rewards/rejected": -1.1800873279571533, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7055709710194448e-07, |
|
"logits/chosen": -2.415030002593994, |
|
"logits/rejected": -2.389885187149048, |
|
"logps/chosen": -351.54156494140625, |
|
"logps/rejected": -358.383544921875, |
|
"loss": 0.5425, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.820050835609436, |
|
"rewards/margins": 0.46901077032089233, |
|
"rewards/rejected": -1.289061427116394, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.580316675034254e-08, |
|
"logits/chosen": -2.3294150829315186, |
|
"logits/rejected": -2.289402484893799, |
|
"logps/chosen": -357.5911560058594, |
|
"logps/rejected": -436.22357177734375, |
|
"loss": 0.5717, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0840901136398315, |
|
"rewards/margins": 0.5104671120643616, |
|
"rewards/rejected": -1.5945571660995483, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7355161444279343e-08, |
|
"logits/chosen": -2.406877040863037, |
|
"logits/rejected": -2.3612313270568848, |
|
"logps/chosen": -392.439697265625, |
|
"logps/rejected": -412.04833984375, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9446671605110168, |
|
"rewards/margins": 0.5704779624938965, |
|
"rewards/rejected": -1.5151450634002686, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 119, |
|
"total_flos": 0.0, |
|
"train_loss": 0.596198278314927, |
|
"train_runtime": 1970.3201, |
|
"train_samples_per_second": 7.757, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 119, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|