|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 96, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 178.77286563875307, |
|
"learning_rate": 1.7600461536419332e-08, |
|
"logits/chosen": -2.590585231781006, |
|
"logits/rejected": -2.5664222240448, |
|
"logps/chosen": -80.29847717285156, |
|
"logps/rejected": -53.10200881958008, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 160.97877202778918, |
|
"learning_rate": 1.760046153641933e-07, |
|
"logits/chosen": -2.555966377258301, |
|
"logits/rejected": -2.5380208492279053, |
|
"logps/chosen": -87.92900848388672, |
|
"logps/rejected": -81.04818725585938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.2013888955116272, |
|
"rewards/chosen": -0.0037568057887256145, |
|
"rewards/margins": -0.0011305783409625292, |
|
"rewards/rejected": -0.0026262274477630854, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 135.4750421201187, |
|
"learning_rate": 1.671668693916127e-07, |
|
"logits/chosen": -2.6140284538269043, |
|
"logits/rejected": -2.5652213096618652, |
|
"logps/chosen": -103.2249755859375, |
|
"logps/rejected": -89.80636596679688, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.0341915600001812, |
|
"rewards/margins": 0.01302550919353962, |
|
"rewards/rejected": 0.021166052669286728, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 183.23637457075452, |
|
"learning_rate": 1.5832912341903208e-07, |
|
"logits/chosen": -2.518775701522827, |
|
"logits/rejected": -2.5330777168273926, |
|
"logps/chosen": -67.13838958740234, |
|
"logps/rejected": -74.79261779785156, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": 0.08110855519771576, |
|
"rewards/margins": 0.044944558292627335, |
|
"rewards/rejected": 0.036163996905088425, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 117.61800068052908, |
|
"learning_rate": 1.4949137744645144e-07, |
|
"logits/chosen": -2.5774178504943848, |
|
"logits/rejected": -2.5653040409088135, |
|
"logps/chosen": -71.62496948242188, |
|
"logps/rejected": -70.44197082519531, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.14154085516929626, |
|
"rewards/margins": 0.06791118532419205, |
|
"rewards/rejected": 0.07362965494394302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 134.94145476491178, |
|
"learning_rate": 1.4065363147387083e-07, |
|
"logits/chosen": -2.491271495819092, |
|
"logits/rejected": -2.503357410430908, |
|
"logps/chosen": -50.327152252197266, |
|
"logps/rejected": -58.57765579223633, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": 0.15576013922691345, |
|
"rewards/margins": 0.0658084824681282, |
|
"rewards/rejected": 0.08995168656110764, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 132.63437591048415, |
|
"learning_rate": 1.3181588550129022e-07, |
|
"logits/chosen": -2.5763726234436035, |
|
"logits/rejected": -2.557838201522827, |
|
"logps/chosen": -78.12178039550781, |
|
"logps/rejected": -78.89440155029297, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.25709980726242065, |
|
"rewards/margins": 0.1368531435728073, |
|
"rewards/rejected": 0.12024664878845215, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 131.450312242274, |
|
"learning_rate": 1.229781395287096e-07, |
|
"logits/chosen": -2.5564064979553223, |
|
"logits/rejected": -2.5472121238708496, |
|
"logps/chosen": -94.9344253540039, |
|
"logps/rejected": -84.7915267944336, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.3827608525753021, |
|
"rewards/margins": 0.20402593910694122, |
|
"rewards/rejected": 0.1787349283695221, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 204.1138249065331, |
|
"learning_rate": 1.1414039355612898e-07, |
|
"logits/chosen": -2.5576930046081543, |
|
"logits/rejected": -2.5041146278381348, |
|
"logps/chosen": -84.25480651855469, |
|
"logps/rejected": -78.98388671875, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.37731510400772095, |
|
"rewards/margins": 0.2691493034362793, |
|
"rewards/rejected": 0.10816575586795807, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 173.37555247860905, |
|
"learning_rate": 1.0530264758354837e-07, |
|
"logits/chosen": -2.4902758598327637, |
|
"logits/rejected": -2.484860420227051, |
|
"logps/chosen": -53.16009521484375, |
|
"logps/rejected": -63.27381134033203, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.2224617451429367, |
|
"rewards/margins": 0.16971835494041443, |
|
"rewards/rejected": 0.052743397653102875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 96, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6600211213032404, |
|
"train_runtime": 962.6608, |
|
"train_samples_per_second": 6.35, |
|
"train_steps_per_second": 0.1 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|