{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.032, "eval_steps": 100, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completion_length": 246.8125, "epoch": 0.0010666666666666667, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1e-06, "loss": 0.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward": 0.0, "rewards/format_reward": 0.0, "step": 1 }, { "clip_ratio": 0.0, "completion_length": 228.984375, "epoch": 0.005333333333333333, "grad_norm": 0.0002897010708693415, "kl": 0.0002434550688121817, "learning_rate": 2.959567305869736e-06, "loss": -0.0114, "reward": 0.0625, "reward_std": 0.0883883461356163, "rewards/accuracy_reward": 0.0625, "rewards/format_reward": 0.0, "step": 5 }, { "clip_ratio": 0.0, "completion_length": 234.275, "epoch": 0.010666666666666666, "grad_norm": 0.012982388027012348, "kl": 0.00034311858398723417, "learning_rate": 2.529362456803101e-06, "loss": -0.0046, "reward": 0.0625, "reward_std": 0.01767766922712326, "rewards/accuracy_reward": 0.0625, "rewards/format_reward": 0.0, "step": 10 }, { "clip_ratio": 0.0, "completion_length": 239.0, "epoch": 0.016, "grad_norm": 0.00560641847550869, "kl": 0.000386229114519665, "learning_rate": 1.7604722665003958e-06, "loss": 0.0031, "reward": 0.1375, "reward_std": 0.12374368458986282, "rewards/accuracy_reward": 0.1375, "rewards/format_reward": 0.0, "step": 15 }, { "clip_ratio": 0.0, "completion_length": 244.1125, "epoch": 0.021333333333333333, "grad_norm": 0.0002242429181933403, "kl": 0.00041347902733832596, "learning_rate": 9.058803509412648e-07, "loss": 0.0022, "reward": 0.05, "reward_std": 0.03535533845424652, "rewards/accuracy_reward": 0.05, "rewards/format_reward": 0.0, "step": 20 }, { "clip_ratio": 0.0, "completion_length": 242.75, "epoch": 0.02666666666666667, "grad_norm": 0.016108760610222816, "kl": 0.0003348791698954301, "learning_rate": 2.467682828805956e-07, "loss": 0.0156, "reward": 0.1375, "reward_std": 0.0883883461356163, "rewards/accuracy_reward": 0.1375, "rewards/format_reward": 0.0, "step": 25 }, { "clip_ratio": 0.0, "completion_length": 234.625, "epoch": 0.032, "grad_norm": 0.0001786644133972004, "kl": 0.0004210583254462108, "learning_rate": 0.0, "loss": -0.0019, "reward": 0.0625, "reward_std": 0.05303300768136978, "rewards/accuracy_reward": 0.0625, "rewards/format_reward": 0.0, "step": 30 }, { "epoch": 0.032, "step": 30, "total_flos": 0.0, "train_loss": 0.0008721697144210338, "train_runtime": 2002.6709, "train_samples_per_second": 0.24, "train_steps_per_second": 0.015 } ], "logging_steps": 5, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }