{ "best_metric": 0.3066587448120117, "best_model_checkpoint": "./zephyr/10-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.32-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-141_batch_16_2024-04-10_ppid_9/checkpoint-100", "epoch": 1.9138755980861244, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "grad_norm": 5.00665283203125, "kl": 10.353445053100586, "learning_rate": 0.00018088235294117647, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.7197, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 20 }, { "epoch": 0.77, "grad_norm": 2.9841599464416504, "kl": 39.412322998046875, "learning_rate": 0.00015147058823529412, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.3729, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 40 }, { "epoch": 0.96, "eval_kl": 55.016841888427734, "eval_logps/chosen": -286.4376220703125, "eval_logps/rejected": -240.9407958984375, "eval_loss": 0.43116259574890137, "eval_rewards/chosen": 5.732795238494873, "eval_rewards/margins": 1.4973056316375732, "eval_rewards/rejected": 4.235489845275879, "eval_runtime": 99.6825, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.552, "step": 50 }, { "epoch": 1.15, "grad_norm": 3.5945215225219727, "kl": 34.871253967285156, "learning_rate": 0.00012205882352941178, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.3635, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 60 }, { "epoch": 1.53, "grad_norm": 0.8525181412696838, "kl": 10.300385475158691, "learning_rate": 9.264705882352942e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.2092, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 80 }, { "epoch": 1.91, "grad_norm": 2.9749908447265625, "kl": 10.553510665893555, "learning_rate": 6.323529411764705e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.096, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 100 }, { "epoch": 1.91, "eval_kl": 12.353157997131348, "eval_logps/chosen": -305.37127685546875, "eval_logps/rejected": -291.5103759765625, "eval_loss": 0.3066587448120117, "eval_rewards/chosen": 3.839434862136841, "eval_rewards/margins": 4.660902500152588, "eval_rewards/rejected": -0.8214675784111023, "eval_runtime": 99.8047, "eval_samples_per_second": 2.184, "eval_steps_per_second": 0.551, "step": 100 } ], "logging_steps": 20, "max_steps": 141, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }