|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 6.740061107347406, |
|
"learning_rate": 1.6666666666666664e-08, |
|
"logits/chosen": -2.7879996299743652, |
|
"logits/rejected": -2.789036750793457, |
|
"logps/chosen": -155.33309936523438, |
|
"logps/pi_response": -163.74407958984375, |
|
"logps/ref_response": -163.74407958984375, |
|
"logps/rejected": -160.54603576660156, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 7.101331538874614, |
|
"learning_rate": 9.860114570402053e-08, |
|
"logits/chosen": -2.732710599899292, |
|
"logits/rejected": -2.7246806621551514, |
|
"logps/chosen": -148.40945434570312, |
|
"logps/pi_response": -149.57220458984375, |
|
"logps/ref_response": -149.60110473632812, |
|
"logps/rejected": -148.08311462402344, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.00027455115923658013, |
|
"rewards/margins": -0.0001711228978820145, |
|
"rewards/rejected": 0.0004456740280147642, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 7.337554336815785, |
|
"learning_rate": 8.374915007591053e-08, |
|
"logits/chosen": -2.6503920555114746, |
|
"logits/rejected": -2.64430570602417, |
|
"logps/chosen": -160.72381591796875, |
|
"logps/pi_response": -162.48867797851562, |
|
"logps/ref_response": -162.63189697265625, |
|
"logps/rejected": -161.467041015625, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0015181514900177717, |
|
"rewards/margins": -0.00011065408762078732, |
|
"rewards/rejected": 0.0016288056503981352, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 6.797623155516534, |
|
"learning_rate": 5.738232820012406e-08, |
|
"logits/chosen": -2.6548681259155273, |
|
"logits/rejected": -2.6514317989349365, |
|
"logps/chosen": -148.1983642578125, |
|
"logps/pi_response": -147.19541931152344, |
|
"logps/ref_response": -147.06637573242188, |
|
"logps/rejected": -147.02438354492188, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.0014096560189500451, |
|
"rewards/margins": -0.000304248504107818, |
|
"rewards/rejected": -0.0011054074857383966, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 6.526352701379774, |
|
"learning_rate": 2.8496739886173992e-08, |
|
"logits/chosen": -2.7033307552337646, |
|
"logits/rejected": -2.697089433670044, |
|
"logps/chosen": -158.65147399902344, |
|
"logps/pi_response": -156.37876892089844, |
|
"logps/ref_response": -156.102783203125, |
|
"logps/rejected": -155.2120361328125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.002376242307946086, |
|
"rewards/margins": 2.0093168132007122e-05, |
|
"rewards/rejected": -0.0023963353596627712, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 6.8095971653212, |
|
"learning_rate": 6.947819411632222e-09, |
|
"logits/chosen": -2.687405586242676, |
|
"logits/rejected": -2.6960949897766113, |
|
"logps/chosen": -156.6856689453125, |
|
"logps/pi_response": -156.02719116210938, |
|
"logps/ref_response": -155.70803833007812, |
|
"logps/rejected": -160.33645629882812, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.002912611234933138, |
|
"rewards/margins": 0.0002370561269344762, |
|
"rewards/rejected": -0.0031496677547693253, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9874476987447699, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6932335586871131, |
|
"train_runtime": 2785.4094, |
|
"train_samples_per_second": 5.487, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|