|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.25e-07, |
|
"logits/chosen": -2.7010061740875244, |
|
"logits/rejected": -2.6382064819335938, |
|
"logps/chosen": -391.27899169921875, |
|
"logps/rejected": -409.36163330078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.668689250946045, |
|
"logits/rejected": -2.6425745487213135, |
|
"logps/chosen": -330.8897705078125, |
|
"logps/rejected": -337.9491271972656, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4913194477558136, |
|
"rewards/chosen": -0.0030369642190635204, |
|
"rewards/margins": 0.0014728567330166698, |
|
"rewards/rejected": -0.004509821534156799, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.662149429321289, |
|
"logits/rejected": -2.6085782051086426, |
|
"logps/chosen": -327.1595458984375, |
|
"logps/rejected": -321.137939453125, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.635937511920929, |
|
"rewards/chosen": -0.009846845641732216, |
|
"rewards/margins": 0.014805642887949944, |
|
"rewards/rejected": -0.02465249039232731, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.605135440826416, |
|
"logits/rejected": -2.577131748199463, |
|
"logps/chosen": -320.46856689453125, |
|
"logps/rejected": -349.97857666015625, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.053877659142017365, |
|
"rewards/margins": 0.03185194730758667, |
|
"rewards/rejected": -0.08572960644960403, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.6149070262908936, |
|
"logits/rejected": -2.5788040161132812, |
|
"logps/chosen": -337.58331298828125, |
|
"logps/rejected": -333.2330017089844, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.07810668647289276, |
|
"rewards/margins": 0.03418826684355736, |
|
"rewards/rejected": -0.11229495704174042, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.6245689392089844, |
|
"logits/rejected": -2.574202537536621, |
|
"logps/chosen": -352.3199462890625, |
|
"logps/rejected": -360.7944030761719, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.629687488079071, |
|
"rewards/chosen": -0.05965545028448105, |
|
"rewards/margins": 0.05930715054273605, |
|
"rewards/rejected": -0.1189626082777977, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -2.583293914794922, |
|
"logits/rejected": -2.5575852394104004, |
|
"logps/chosen": -340.70123291015625, |
|
"logps/rejected": -365.50408935546875, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.651562511920929, |
|
"rewards/chosen": -0.1051960214972496, |
|
"rewards/margins": 0.07506568729877472, |
|
"rewards/rejected": -0.18026170134544373, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -2.5673248767852783, |
|
"logits/rejected": -2.5238747596740723, |
|
"logps/chosen": -322.8841857910156, |
|
"logps/rejected": -349.5296936035156, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09875378012657166, |
|
"rewards/margins": 0.07134519517421722, |
|
"rewards/rejected": -0.17009896039962769, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6737474944346991, |
|
"train_runtime": 6751.8687, |
|
"train_samples_per_second": 2.962, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|