Mel-Iza0's picture
Training in progress, step 100, checkpoint
6034a28 verified
{
"best_metric": 0.3698837161064148,
"best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.1-KTO_KTO with Agents 1.2.0 dataset and Mixstral model, with tokenization zephyr chat template-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100",
"epoch": 0.684931506849315,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"grad_norm": 4.052237033843994,
"kl": 10.023146629333496,
"learning_rate": 0.0001785714285714286,
"logps/chosen": -232.72720336914062,
"logps/rejected": -241.88575744628906,
"loss": 0.4316,
"rewards/chosen": 1.2224007844924927,
"rewards/margins": 1.0828871726989746,
"rewards/rejected": 0.279308944940567,
"step": 20
},
{
"epoch": 0.27,
"grad_norm": 2.734194278717041,
"kl": 10.954643249511719,
"learning_rate": 0.00015000000000000001,
"logps/chosen": -228.13992309570312,
"logps/rejected": -270.3298034667969,
"loss": 0.4077,
"rewards/chosen": 1.687286615371704,
"rewards/margins": 1.9949144124984741,
"rewards/rejected": -0.2780403196811676,
"step": 40
},
{
"epoch": 0.34,
"eval_kl": 0.0,
"eval_logps/chosen": -315.1575927734375,
"eval_logps/rejected": -328.9866027832031,
"eval_loss": 0.466037392616272,
"eval_rewards/chosen": -7.166321754455566,
"eval_rewards/margins": 2.8236684799194336,
"eval_rewards/rejected": -10.133459091186523,
"eval_runtime": 353.626,
"eval_samples_per_second": 0.848,
"eval_steps_per_second": 0.212,
"step": 50
},
{
"epoch": 0.41,
"grad_norm": 1.9602097272872925,
"kl": 1.5916956663131714,
"learning_rate": 0.00012142857142857143,
"logps/chosen": -268.18023681640625,
"logps/rejected": -304.6438903808594,
"loss": 0.4028,
"rewards/chosen": -2.1896169185638428,
"rewards/margins": 4.930688858032227,
"rewards/rejected": -6.980587482452393,
"step": 60
},
{
"epoch": 0.55,
"grad_norm": 1.9362258911132812,
"kl": 3.6249618530273438,
"learning_rate": 9.285714285714286e-05,
"logps/chosen": -263.2818603515625,
"logps/rejected": -308.0414733886719,
"loss": 0.3873,
"rewards/chosen": -0.1703629493713379,
"rewards/margins": 5.487633228302002,
"rewards/rejected": -5.359496116638184,
"step": 80
},
{
"epoch": 0.68,
"grad_norm": 2.322431802749634,
"kl": 0.9517351388931274,
"learning_rate": 6.428571428571429e-05,
"logps/chosen": -240.21408081054688,
"logps/rejected": -329.3759765625,
"loss": 0.3351,
"rewards/chosen": 0.027771174907684326,
"rewards/margins": 7.602840423583984,
"rewards/rejected": -7.7202911376953125,
"step": 100
},
{
"epoch": 0.68,
"eval_kl": 0.48766180872917175,
"eval_logps/chosen": -261.78826904296875,
"eval_logps/rejected": -295.7093505859375,
"eval_loss": 0.3698837161064148,
"eval_rewards/chosen": -1.8293884992599487,
"eval_rewards/margins": 4.808248043060303,
"eval_rewards/rejected": -6.805734634399414,
"eval_runtime": 352.0516,
"eval_samples_per_second": 0.852,
"eval_steps_per_second": 0.213,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 145,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}