ironrock's picture
Training in progress, step 100, checkpoint
ce6687b verified
{
"best_metric": 0.039932698011398315,
"best_model_checkpoint": "./Zephyr/27-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.0-KTO_WeniGPT Experiment using KTO trainer with no collator-2_max_steps-786_batch_16_2024-03-27_ppid_9/checkpoint-100",
"epoch": 0.37753657385559225,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 2.2820355892181396,
"kl": 0.7391773462295532,
"learning_rate": 0.0001666666666666667,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 785.5091,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 20
},
{
"epoch": 0.15,
"grad_norm": NaN,
"kl": 1.364301085472107,
"learning_rate": 0.00019606299212598428,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 124.9389,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 40
},
{
"epoch": 0.19,
"eval_kl": 3.282975673675537,
"eval_logps/chosen": -132.00738525390625,
"eval_logps/rejected": -237.69239807128906,
"eval_loss": 0.09803511947393417,
"eval_rewards/chosen": 4.3711838722229,
"eval_rewards/margins": 8.833364486694336,
"eval_rewards/rejected": -4.462180137634277,
"eval_runtime": 474.8108,
"eval_samples_per_second": 1.07,
"eval_steps_per_second": 0.535,
"step": 50
},
{
"epoch": 0.23,
"grad_norm": 1.1073336601257324,
"kl": 0.8054396510124207,
"learning_rate": 0.00019107611548556432,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 91.2134,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 60
},
{
"epoch": 0.3,
"grad_norm": 0.4274377226829529,
"kl": 0.38704636693000793,
"learning_rate": 0.00018582677165354333,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 26.9569,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 80
},
{
"epoch": 0.38,
"grad_norm": 0.5270018577575684,
"kl": 0.47709473967552185,
"learning_rate": 0.0001805774278215223,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 10.8269,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 100
},
{
"epoch": 0.38,
"eval_kl": 0.762260377407074,
"eval_logps/chosen": -135.4526824951172,
"eval_logps/rejected": -541.3764038085938,
"eval_loss": 0.039932698011398315,
"eval_rewards/chosen": 4.026655197143555,
"eval_rewards/margins": 38.857234954833984,
"eval_rewards/rejected": -34.83057403564453,
"eval_runtime": 474.2877,
"eval_samples_per_second": 1.071,
"eval_steps_per_second": 0.536,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 786,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}