ironrock's picture
Upload folder using huggingface_hub
47420f0 verified
{
"best_metric": 0.12640328705310822,
"best_model_checkpoint": "./Mistral/16-03-24-Weni-ZeroShot-3.4.16-Mistral-7b-DPO-1.0.0_ZeroShot DPO Training a improved dataset and with learning rate 40x smaller than SFT-2_max_steps-144_batch_32_2024-03-16_ppid_9/checkpoint-100",
"epoch": 2.0408163265306123,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.41,
"grad_norm": 26.23642921447754,
"learning_rate": 2e-05,
"logits/chosen": -1.1930553913116455,
"logits/rejected": -1.1628875732421875,
"logps/chosen": -21.34621810913086,
"logps/rejected": -19.047561645507812,
"loss": 0.5766,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.26292288303375244,
"rewards/margins": 0.6182515025138855,
"rewards/rejected": -0.35532867908477783,
"step": 20
},
{
"epoch": 0.82,
"grad_norm": 17.913877487182617,
"learning_rate": 1.689922480620155e-05,
"logits/chosen": -1.13822603225708,
"logits/rejected": -1.0975478887557983,
"logps/chosen": -19.194286346435547,
"logps/rejected": -20.591718673706055,
"loss": 0.3335,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 1.3181432485580444,
"rewards/margins": 3.1427412033081055,
"rewards/rejected": -1.824597716331482,
"step": 40
},
{
"epoch": 1.02,
"eval_logits/chosen": -1.100258231163025,
"eval_logits/rejected": -1.0552455186843872,
"eval_logps/chosen": -20.249515533447266,
"eval_logps/rejected": -23.880615234375,
"eval_loss": 0.1760517805814743,
"eval_rewards/accuracies": 0.9147727489471436,
"eval_rewards/chosen": 0.4012405574321747,
"eval_rewards/margins": 4.824998378753662,
"eval_rewards/rejected": -4.423757553100586,
"eval_runtime": 82.5821,
"eval_samples_per_second": 2.107,
"eval_steps_per_second": 0.266,
"step": 50
},
{
"epoch": 1.22,
"grad_norm": 13.158234596252441,
"learning_rate": 1.3798449612403102e-05,
"logits/chosen": -1.141601324081421,
"logits/rejected": -1.1046007871627808,
"logps/chosen": -19.948759078979492,
"logps/rejected": -22.05379867553711,
"loss": 0.2502,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.9041095972061157,
"rewards/margins": 3.8345859050750732,
"rewards/rejected": -2.930476665496826,
"step": 60
},
{
"epoch": 1.63,
"grad_norm": 4.365915775299072,
"learning_rate": 1.0697674418604651e-05,
"logits/chosen": -1.1466914415359497,
"logits/rejected": -1.1094379425048828,
"logps/chosen": -19.571247100830078,
"logps/rejected": -21.93134880065918,
"loss": 0.1835,
"rewards/accuracies": 0.926562488079071,
"rewards/chosen": 1.109068751335144,
"rewards/margins": 3.9569854736328125,
"rewards/rejected": -2.847917079925537,
"step": 80
},
{
"epoch": 2.04,
"grad_norm": 7.621004104614258,
"learning_rate": 7.596899224806202e-06,
"logits/chosen": -1.1677355766296387,
"logits/rejected": -1.133095622062683,
"logps/chosen": -20.18477439880371,
"logps/rejected": -22.280181884765625,
"loss": 0.1615,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 1.1184756755828857,
"rewards/margins": 4.2123236656188965,
"rewards/rejected": -3.0938479900360107,
"step": 100
},
{
"epoch": 2.04,
"eval_logits/chosen": -1.1095269918441772,
"eval_logits/rejected": -1.0665117502212524,
"eval_logps/chosen": -19.742412567138672,
"eval_logps/rejected": -23.139616012573242,
"eval_loss": 0.12640328705310822,
"eval_rewards/accuracies": 0.9318181872367859,
"eval_rewards/chosen": 0.8069248199462891,
"eval_rewards/margins": 4.637885570526123,
"eval_rewards/rejected": -3.830960750579834,
"eval_runtime": 82.6113,
"eval_samples_per_second": 2.106,
"eval_steps_per_second": 0.266,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 144,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}