phi3m0128-cds-0.8-kendall-onof-neg_if-corr-max-2-simpo-max1500-default
/
checkpoint-50
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.042973785990545764, | |
"eval_steps": 50, | |
"global_step": 50, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.008594757198109154, | |
"grad_norm": 0.05934199318289757, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 14.762972831726074, | |
"logits/rejected": 15.199728012084961, | |
"logps/chosen": -0.3259914815425873, | |
"logps/rejected": -0.34297481179237366, | |
"loss": 0.9377, | |
"rewards/accuracies": 0.4000000059604645, | |
"rewards/chosen": -0.4889872074127197, | |
"rewards/margins": 0.02547495998442173, | |
"rewards/rejected": -0.5144621729850769, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.017189514396218308, | |
"grad_norm": 0.06342790275812149, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.351249694824219, | |
"logits/rejected": 15.068448066711426, | |
"logps/chosen": -0.2809392511844635, | |
"logps/rejected": -0.3711296617984772, | |
"loss": 0.9352, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.42140883207321167, | |
"rewards/margins": 0.1352856159210205, | |
"rewards/rejected": -0.5566944479942322, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.02578427159432746, | |
"grad_norm": 0.053961098194122314, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 14.636960983276367, | |
"logits/rejected": 15.265243530273438, | |
"logps/chosen": -0.2820780873298645, | |
"logps/rejected": -0.34024301171302795, | |
"loss": 0.9351, | |
"rewards/accuracies": 0.4749999940395355, | |
"rewards/chosen": -0.42311716079711914, | |
"rewards/margins": 0.08724743127822876, | |
"rewards/rejected": -0.5103646516799927, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.034379028792436615, | |
"grad_norm": 0.13506193459033966, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 14.4556884765625, | |
"logits/rejected": 15.048967361450195, | |
"logps/chosen": -0.2897028625011444, | |
"logps/rejected": -0.34129124879837036, | |
"loss": 0.922, | |
"rewards/accuracies": 0.44999998807907104, | |
"rewards/chosen": -0.43455424904823303, | |
"rewards/margins": 0.07738252729177475, | |
"rewards/rejected": -0.5119368433952332, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"grad_norm": 0.05230574309825897, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.628789901733398, | |
"logits/rejected": 15.307828903198242, | |
"logps/chosen": -0.28786614537239075, | |
"logps/rejected": -0.3513876795768738, | |
"loss": 0.9201, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4317992329597473, | |
"rewards/margins": 0.09528233855962753, | |
"rewards/rejected": -0.5270815491676331, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"eval_logits/chosen": 14.234943389892578, | |
"eval_logits/rejected": 15.258601188659668, | |
"eval_logps/chosen": -0.2844341993331909, | |
"eval_logps/rejected": -0.3695394694805145, | |
"eval_loss": 0.9226060509681702, | |
"eval_rewards/accuracies": 0.5157894492149353, | |
"eval_rewards/chosen": -0.42665132880210876, | |
"eval_rewards/margins": 0.1276579648256302, | |
"eval_rewards/rejected": -0.5543092489242554, | |
"eval_runtime": 25.9356, | |
"eval_samples_per_second": 29.033, | |
"eval_steps_per_second": 3.663, | |
"step": 50 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.1369677154418688e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |