{ "best_metric": 0.3698837161064148, "best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.1-KTO_KTO with Agents 1.2.0 dataset and Mixstral model, with tokenization zephyr chat template-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 4.052237033843994, "kl": 10.023146629333496, "learning_rate": 0.0001785714285714286, "logps/chosen": -232.72720336914062, "logps/rejected": -241.88575744628906, "loss": 0.4316, "rewards/chosen": 1.2224007844924927, "rewards/margins": 1.0828871726989746, "rewards/rejected": 0.279308944940567, "step": 20 }, { "epoch": 0.27, "grad_norm": 2.734194278717041, "kl": 10.954643249511719, "learning_rate": 0.00015000000000000001, "logps/chosen": -228.13992309570312, "logps/rejected": -270.3298034667969, "loss": 0.4077, "rewards/chosen": 1.687286615371704, "rewards/margins": 1.9949144124984741, "rewards/rejected": -0.2780403196811676, "step": 40 }, { "epoch": 0.34, "eval_kl": 0.0, "eval_logps/chosen": -315.1575927734375, "eval_logps/rejected": -328.9866027832031, "eval_loss": 0.466037392616272, "eval_rewards/chosen": -7.166321754455566, "eval_rewards/margins": 2.8236684799194336, "eval_rewards/rejected": -10.133459091186523, "eval_runtime": 353.626, "eval_samples_per_second": 0.848, "eval_steps_per_second": 0.212, "step": 50 }, { "epoch": 0.41, "grad_norm": 1.9602097272872925, "kl": 1.5916956663131714, "learning_rate": 0.00012142857142857143, "logps/chosen": -268.18023681640625, "logps/rejected": -304.6438903808594, "loss": 0.4028, "rewards/chosen": -2.1896169185638428, "rewards/margins": 4.930688858032227, "rewards/rejected": -6.980587482452393, "step": 60 }, { "epoch": 0.55, "grad_norm": 1.9362258911132812, "kl": 3.6249618530273438, "learning_rate": 9.285714285714286e-05, "logps/chosen": -263.2818603515625, "logps/rejected": -308.0414733886719, "loss": 0.3873, "rewards/chosen": -0.1703629493713379, "rewards/margins": 5.487633228302002, "rewards/rejected": -5.359496116638184, "step": 80 }, { "epoch": 0.68, "grad_norm": 2.322431802749634, "kl": 0.9517351388931274, "learning_rate": 6.428571428571429e-05, "logps/chosen": -240.21408081054688, "logps/rejected": -329.3759765625, "loss": 0.3351, "rewards/chosen": 0.027771174907684326, "rewards/margins": 7.602840423583984, "rewards/rejected": -7.7202911376953125, "step": 100 }, { "epoch": 0.68, "eval_kl": 0.48766180872917175, "eval_logps/chosen": -261.78826904296875, "eval_logps/rejected": -295.7093505859375, "eval_loss": 0.3698837161064148, "eval_rewards/chosen": -1.8293884992599487, "eval_rewards/margins": 4.808248043060303, "eval_rewards/rejected": -6.805734634399414, "eval_runtime": 352.0516, "eval_samples_per_second": 0.852, "eval_steps_per_second": 0.213, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }