{ "best_metric": 0.012590945698320866, "best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-200", "epoch": 2.283539486203616, "eval_steps": 50, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "grad_norm": 3.973484516143799, "kl": 0.07447954267263412, "learning_rate": 0.00015833333333333332, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 1.3654, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 20 }, { "epoch": 0.3, "grad_norm": 2.1150100231170654, "kl": 0.24954533576965332, "learning_rate": 0.00019606299212598428, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.1911, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 40 }, { "epoch": 0.38, "eval_kl": 0.0, "eval_logps/chosen": -157.132080078125, "eval_logps/rejected": -431.1063232421875, "eval_loss": 0.019795970991253853, "eval_rewards/chosen": 5.025023460388184, "eval_rewards/margins": 28.0367374420166, "eval_rewards/rejected": -23.0117130279541, "eval_runtime": 209.568, "eval_samples_per_second": 2.252, "eval_steps_per_second": 0.563, "step": 50 }, { "epoch": 0.46, "grad_norm": 0.8193832039833069, "kl": 0.0, "learning_rate": 0.00019081364829396326, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.1212, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 60 }, { "epoch": 0.61, "grad_norm": 1.1438875198364258, "kl": 0.0, "learning_rate": 0.00018556430446194227, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0613, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 80 }, { "epoch": 0.76, "grad_norm": 1.5549229383468628, "kl": 0.30086809396743774, "learning_rate": 0.00018031496062992125, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0525, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 100 }, { "epoch": 0.76, "eval_kl": 0.0, "eval_logps/chosen": -151.0825958251953, "eval_logps/rejected": -502.1325378417969, "eval_loss": 0.015516282990574837, "eval_rewards/chosen": 5.629973411560059, "eval_rewards/margins": 35.744300842285156, "eval_rewards/rejected": -30.11433219909668, "eval_runtime": 209.5342, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 100 }, { "epoch": 0.91, "grad_norm": 0.16322359442710876, "kl": 0.0, "learning_rate": 0.0001750656167979003, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.037, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 120 }, { "epoch": 1.07, "grad_norm": 0.8122725486755371, "kl": 0.0, "learning_rate": 0.00016981627296587927, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0684, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 140 }, { "epoch": 1.14, "eval_kl": 0.0, "eval_logps/chosen": -145.42347717285156, "eval_logps/rejected": -530.3358154296875, "eval_loss": 0.010358058847486973, "eval_rewards/chosen": 6.19588565826416, "eval_rewards/margins": 39.130550384521484, "eval_rewards/rejected": -32.934661865234375, "eval_runtime": 209.5283, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 150 }, { "epoch": 1.22, "grad_norm": 0.11464398354291916, "kl": 0.0, "learning_rate": 0.00016456692913385828, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0167, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 160 }, { "epoch": 1.37, "grad_norm": 5.260560989379883, "kl": 0.0, "learning_rate": 0.00015931758530183726, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.131, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 180 }, { "epoch": 1.52, "grad_norm": 0.15409517288208008, "kl": 0.0, "learning_rate": 0.0001540682414698163, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0253, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 200 }, { "epoch": 1.52, "eval_kl": 0.0, "eval_logps/chosen": -147.8525390625, "eval_logps/rejected": -489.1932678222656, "eval_loss": 0.012590945698320866, "eval_rewards/chosen": 5.952979564666748, "eval_rewards/margins": 34.77338790893555, "eval_rewards/rejected": -28.82040786743164, "eval_runtime": 209.5044, "eval_samples_per_second": 2.253, "eval_steps_per_second": 0.563, "step": 200 }, { "epoch": 1.67, "grad_norm": 0.025852208957076073, "kl": 0.0, "learning_rate": 0.00014908136482939634, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0101, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 220 }, { "epoch": 1.83, "grad_norm": 0.013888155110180378, "kl": 0.1387307345867157, "learning_rate": 0.00014383202099737535, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0177, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 240 }, { "epoch": 1.9, "eval_kl": 0.0, "eval_logps/chosen": -142.92942810058594, "eval_logps/rejected": -740.499267578125, "eval_loss": 0.01631937175989151, "eval_rewards/chosen": 6.445290565490723, "eval_rewards/margins": 60.39630126953125, "eval_rewards/rejected": -53.951011657714844, "eval_runtime": 209.4415, "eval_samples_per_second": 2.254, "eval_steps_per_second": 0.563, "step": 250 }, { "epoch": 1.98, "grad_norm": 0.02793472446501255, "kl": 0.0, "learning_rate": 0.00013858267716535433, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0112, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 260 }, { "epoch": 2.13, "grad_norm": 0.0313153937458992, "kl": 0.0, "learning_rate": 0.00013333333333333334, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0067, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 280 }, { "epoch": 2.28, "grad_norm": 0.030554356053471565, "kl": 0.0, "learning_rate": 0.00012808398950131235, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.0126, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 300 }, { "epoch": 2.28, "eval_kl": 0.0, "eval_logps/chosen": -145.38067626953125, "eval_logps/rejected": -851.4735717773438, "eval_loss": 0.020638220012187958, "eval_rewards/chosen": 6.200164794921875, "eval_rewards/margins": 71.24861145019531, "eval_rewards/rejected": -65.04844665527344, "eval_runtime": 209.5591, "eval_samples_per_second": 2.252, "eval_steps_per_second": 0.563, "step": 300 } ], "logging_steps": 20, "max_steps": 786, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }