|
{ |
|
"best_metric": 0.12640328705310822, |
|
"best_model_checkpoint": "./Mistral/16-03-24-Weni-ZeroShot-3.4.16-Mistral-7b-DPO-1.0.0_ZeroShot DPO Training a improved dataset and with learning rate 40x smaller than SFT-2_max_steps-144_batch_32_2024-03-16_ppid_9/checkpoint-100", |
|
"epoch": 2.0408163265306123, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 26.23642921447754, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -1.1930553913116455, |
|
"logits/rejected": -1.1628875732421875, |
|
"logps/chosen": -21.34621810913086, |
|
"logps/rejected": -19.047561645507812, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.26292288303375244, |
|
"rewards/margins": 0.6182515025138855, |
|
"rewards/rejected": -0.35532867908477783, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 17.913877487182617, |
|
"learning_rate": 1.689922480620155e-05, |
|
"logits/chosen": -1.13822603225708, |
|
"logits/rejected": -1.0975478887557983, |
|
"logps/chosen": -19.194286346435547, |
|
"logps/rejected": -20.591718673706055, |
|
"loss": 0.3335, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 1.3181432485580444, |
|
"rewards/margins": 3.1427412033081055, |
|
"rewards/rejected": -1.824597716331482, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_logits/chosen": -1.100258231163025, |
|
"eval_logits/rejected": -1.0552455186843872, |
|
"eval_logps/chosen": -20.249515533447266, |
|
"eval_logps/rejected": -23.880615234375, |
|
"eval_loss": 0.1760517805814743, |
|
"eval_rewards/accuracies": 0.9147727489471436, |
|
"eval_rewards/chosen": 0.4012405574321747, |
|
"eval_rewards/margins": 4.824998378753662, |
|
"eval_rewards/rejected": -4.423757553100586, |
|
"eval_runtime": 82.5821, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 13.158234596252441, |
|
"learning_rate": 1.3798449612403102e-05, |
|
"logits/chosen": -1.141601324081421, |
|
"logits/rejected": -1.1046007871627808, |
|
"logps/chosen": -19.948759078979492, |
|
"logps/rejected": -22.05379867553711, |
|
"loss": 0.2502, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.9041095972061157, |
|
"rewards/margins": 3.8345859050750732, |
|
"rewards/rejected": -2.930476665496826, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 4.365915775299072, |
|
"learning_rate": 1.0697674418604651e-05, |
|
"logits/chosen": -1.1466914415359497, |
|
"logits/rejected": -1.1094379425048828, |
|
"logps/chosen": -19.571247100830078, |
|
"logps/rejected": -21.93134880065918, |
|
"loss": 0.1835, |
|
"rewards/accuracies": 0.926562488079071, |
|
"rewards/chosen": 1.109068751335144, |
|
"rewards/margins": 3.9569854736328125, |
|
"rewards/rejected": -2.847917079925537, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 7.621004104614258, |
|
"learning_rate": 7.596899224806202e-06, |
|
"logits/chosen": -1.1677355766296387, |
|
"logits/rejected": -1.133095622062683, |
|
"logps/chosen": -20.18477439880371, |
|
"logps/rejected": -22.280181884765625, |
|
"loss": 0.1615, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": 1.1184756755828857, |
|
"rewards/margins": 4.2123236656188965, |
|
"rewards/rejected": -3.0938479900360107, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_logits/chosen": -1.1095269918441772, |
|
"eval_logits/rejected": -1.0665117502212524, |
|
"eval_logps/chosen": -19.742412567138672, |
|
"eval_logps/rejected": -23.139616012573242, |
|
"eval_loss": 0.12640328705310822, |
|
"eval_rewards/accuracies": 0.9318181872367859, |
|
"eval_rewards/chosen": 0.8069248199462891, |
|
"eval_rewards/margins": 4.637885570526123, |
|
"eval_rewards/rejected": -3.830960750579834, |
|
"eval_runtime": 82.6113, |
|
"eval_samples_per_second": 2.106, |
|
"eval_steps_per_second": 0.266, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 144, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|