|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 96, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -2.848330020904541, |
|
"logits/rejected": -2.834825038909912, |
|
"logps/chosen": -287.69622802734375, |
|
"logps/rejected": -255.33815002441406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.8012733459472656, |
|
"logits/rejected": -2.753434658050537, |
|
"logps/chosen": -244.99864196777344, |
|
"logps/rejected": -200.799560546875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.0004034777812194079, |
|
"rewards/margins": 0.0008078858954831958, |
|
"rewards/rejected": -0.00040440805605612695, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.83504027183137e-07, |
|
"logits/chosen": -2.7793192863464355, |
|
"logits/rejected": -2.7794222831726074, |
|
"logps/chosen": -267.78729248046875, |
|
"logps/rejected": -250.0585174560547, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.021286360919475555, |
|
"rewards/margins": 0.027004068717360497, |
|
"rewards/rejected": -0.00571770966053009, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3619304568594546e-07, |
|
"logits/chosen": -2.7962687015533447, |
|
"logits/rejected": -2.7896554470062256, |
|
"logps/chosen": -278.6320495605469, |
|
"logps/rejected": -262.569580078125, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0014000388327986002, |
|
"rewards/margins": 0.0615326389670372, |
|
"rewards/rejected": -0.060132600367069244, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.643105808261596e-07, |
|
"logits/chosen": -2.8134655952453613, |
|
"logits/rejected": -2.801830768585205, |
|
"logps/chosen": -290.64794921875, |
|
"logps/rejected": -259.28704833984375, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.10460182279348373, |
|
"rewards/margins": 0.1050100103020668, |
|
"rewards/rejected": -0.20961184799671173, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7734280209446865e-07, |
|
"logits/chosen": -2.7795636653900146, |
|
"logits/rejected": -2.7554478645324707, |
|
"logps/chosen": -303.3760986328125, |
|
"logps/rejected": -283.14141845703125, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.15783920884132385, |
|
"rewards/margins": 0.1601353883743286, |
|
"rewards/rejected": -0.3179745674133301, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8676665440207977e-07, |
|
"logits/chosen": -2.773420810699463, |
|
"logits/rejected": -2.792323589324951, |
|
"logps/chosen": -319.9610900878906, |
|
"logps/rejected": -301.3622741699219, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.33203256130218506, |
|
"rewards/margins": 0.21394209563732147, |
|
"rewards/rejected": -0.5459746718406677, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0453527111051183e-07, |
|
"logits/chosen": -2.770045518875122, |
|
"logits/rejected": -2.764028549194336, |
|
"logps/chosen": -324.46160888671875, |
|
"logps/rejected": -320.1623840332031, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2613491415977478, |
|
"rewards/margins": 0.3139723539352417, |
|
"rewards/rejected": -0.5753214955329895, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1500545527530544e-08, |
|
"logits/chosen": -2.8060431480407715, |
|
"logits/rejected": -2.773552417755127, |
|
"logps/chosen": -326.8184509277344, |
|
"logps/rejected": -345.7678527832031, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.369906485080719, |
|
"rewards/margins": 0.32024139165878296, |
|
"rewards/rejected": -0.690147876739502, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.981030611018234e-09, |
|
"logits/chosen": -2.753633499145508, |
|
"logits/rejected": -2.697878360748291, |
|
"logps/chosen": -304.6568603515625, |
|
"logps/rejected": -340.0152893066406, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.35058873891830444, |
|
"rewards/margins": 0.34887048602104187, |
|
"rewards/rejected": -0.6994592547416687, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 96, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6312613611419996, |
|
"train_runtime": 1566.6754, |
|
"train_samples_per_second": 7.804, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|