|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9921671018276762, |
|
"eval_steps": 100, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.000000000000001e-07, |
|
"logits/chosen": 0.8826487064361572, |
|
"logits/rejected": 0.921362042427063, |
|
"logps/chosen": -36.58121871948242, |
|
"logps/rejected": -54.902320861816406, |
|
"loss": 2500.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": 0.8914464116096497, |
|
"logits/rejected": 0.8742997050285339, |
|
"logps/chosen": -87.8135986328125, |
|
"logps/rejected": -96.38023376464844, |
|
"loss": 2502.7986, |
|
"rewards/accuracies": 0.2152777761220932, |
|
"rewards/chosen": -0.00011578062549233437, |
|
"rewards/margins": -2.257831147289835e-05, |
|
"rewards/rejected": -9.320233948528767e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.83118057351089e-06, |
|
"logits/chosen": 0.7811827659606934, |
|
"logits/rejected": 0.8477146029472351, |
|
"logps/chosen": -91.7614517211914, |
|
"logps/rejected": -85.11201477050781, |
|
"loss": 2501.6135, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -3.986436786362901e-05, |
|
"rewards/margins": -0.0002826174022629857, |
|
"rewards/rejected": 0.00024275311443489045, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3475222930516484e-06, |
|
"logits/chosen": 0.8648529052734375, |
|
"logits/rejected": 0.8481088876724243, |
|
"logps/chosen": -85.29056549072266, |
|
"logps/rejected": -78.01969146728516, |
|
"loss": 2497.0559, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.0002869653981178999, |
|
"rewards/margins": 0.00010402966290712357, |
|
"rewards/rejected": 0.00018293573521077633, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6143458894413463e-06, |
|
"logits/chosen": 0.7654550075531006, |
|
"logits/rejected": 0.8423868417739868, |
|
"logps/chosen": -122.76900482177734, |
|
"logps/rejected": -108.79447174072266, |
|
"loss": 2500.084, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.00036479695700109005, |
|
"rewards/margins": -0.0001517109922133386, |
|
"rewards/rejected": -0.0002130859502358362, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.730670898658255e-06, |
|
"logits/chosen": 0.8405305743217468, |
|
"logits/rejected": 0.869471549987793, |
|
"logps/chosen": -65.78740692138672, |
|
"logps/rejected": -71.66648864746094, |
|
"loss": 2500.4, |
|
"rewards/accuracies": 0.21875, |
|
"rewards/chosen": -0.00032314873533323407, |
|
"rewards/margins": -0.00028933738940395415, |
|
"rewards/rejected": -3.381132773938589e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8158425248197931e-06, |
|
"logits/chosen": 0.8164768218994141, |
|
"logits/rejected": 0.8862431645393372, |
|
"logps/chosen": -117.6532974243164, |
|
"logps/rejected": -110.94734191894531, |
|
"loss": 2499.3979, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.0002028214803431183, |
|
"rewards/margins": -4.244589217705652e-05, |
|
"rewards/rejected": -0.0001603755954420194, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.934134090518593e-07, |
|
"logits/chosen": 0.8050645589828491, |
|
"logits/rejected": 0.8623224496841431, |
|
"logps/chosen": -82.68321990966797, |
|
"logps/rejected": -83.93122863769531, |
|
"loss": 2500.1014, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.00020013593893963844, |
|
"rewards/margins": 3.215682227164507e-05, |
|
"rewards/rejected": -0.00023229271755553782, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7445716067596506e-07, |
|
"logits/chosen": 0.76527339220047, |
|
"logits/rejected": 0.8293226957321167, |
|
"logps/chosen": -105.8263931274414, |
|
"logps/rejected": -104.97891998291016, |
|
"loss": 2496.7385, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.00012336719373706728, |
|
"rewards/margins": -3.703986237724166e-07, |
|
"rewards/rejected": -0.00012299678928684443, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.256725079024554e-08, |
|
"logits/chosen": 0.8350859880447388, |
|
"logits/rejected": 0.855826199054718, |
|
"logps/chosen": -77.66065979003906, |
|
"logps/rejected": -90.00727081298828, |
|
"loss": 2495.8914, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.00030836346559226513, |
|
"rewards/margins": 0.0007544533582404256, |
|
"rewards/rejected": -0.00044608983444049954, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 95, |
|
"total_flos": 0.0, |
|
"train_loss": 2499.3611225328946, |
|
"train_runtime": 1147.2033, |
|
"train_samples_per_second": 5.329, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|