{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 189, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015873015873015872, "grad_norm": 2.956210802841563, "learning_rate": 2.6315789473684208e-08, "logits/chosen": -0.84765625, "logits/rejected": -0.84765625, "logps/chosen": -14.875, "logps/rejected": -14.875, "loss": 0.6914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.15873015873015872, "grad_norm": 0.0026515697903897284, "learning_rate": 2.631578947368421e-07, "logits/chosen": -0.8828125, "logits/rejected": -0.88671875, "logps/chosen": -15.9375, "logps/rejected": -15.8125, "loss": 0.6914, "rewards/accuracies": 0.02777777798473835, "rewards/chosen": 0.0008697509765625, "rewards/margins": -0.00043487548828125, "rewards/rejected": 0.00130462646484375, "step": 10 }, { "epoch": 0.31746031746031744, "grad_norm": 2.201768075236467, "learning_rate": 4.970588235294118e-07, "logits/chosen": -0.890625, "logits/rejected": -0.88671875, "logps/chosen": -15.625, "logps/rejected": -15.5625, "loss": 0.6894, "rewards/accuracies": 0.07500000298023224, "rewards/chosen": 0.0228271484375, "rewards/margins": 0.00860595703125, "rewards/rejected": 0.01422119140625, "step": 20 }, { "epoch": 0.47619047619047616, "grad_norm": 2.7819271307964892, "learning_rate": 4.676470588235294e-07, "logits/chosen": -0.890625, "logits/rejected": -0.875, "logps/chosen": -14.9375, "logps/rejected": -14.75, "loss": 0.689, "rewards/accuracies": 0.0625, "rewards/chosen": 0.0830078125, "rewards/margins": 0.0111083984375, "rewards/rejected": 0.07177734375, "step": 30 }, { "epoch": 0.6349206349206349, "grad_norm": 2.468434193598462, "learning_rate": 4.38235294117647e-07, "logits/chosen": -0.8984375, "logits/rejected": -0.91015625, "logps/chosen": -14.375, "logps/rejected": -14.8125, "loss": 0.6849, "rewards/accuracies": 0.11249999701976776, "rewards/chosen": 0.1552734375, "rewards/margins": 0.038330078125, "rewards/rejected": 0.11669921875, "step": 40 }, { "epoch": 0.7936507936507936, "grad_norm": 3.217396320056085, "learning_rate": 4.0882352941176465e-07, "logits/chosen": -0.87109375, "logits/rejected": -0.8828125, "logps/chosen": -13.125, "logps/rejected": -12.625, "loss": 0.6828, "rewards/accuracies": 0.03750000149011612, "rewards/chosen": 0.267578125, "rewards/margins": 0.01129150390625, "rewards/rejected": 0.255859375, "step": 50 }, { "epoch": 0.9523809523809523, "grad_norm": 3.5576289077982315, "learning_rate": 3.7941176470588235e-07, "logits/chosen": -0.85546875, "logits/rejected": -0.8671875, "logps/chosen": -12.125, "logps/rejected": -12.625, "loss": 0.6772, "rewards/accuracies": 0.02500000037252903, "rewards/chosen": 0.3359375, "rewards/margins": 0.0196533203125, "rewards/rejected": 0.31640625, "step": 60 }, { "epoch": 1.0, "eval_logits/chosen": -0.875, "eval_logits/rejected": -0.87109375, "eval_logps/chosen": -10.25, "eval_logps/rejected": -10.625, "eval_loss": 0.6818749904632568, "eval_rewards/accuracies": 0.0357142873108387, "eval_rewards/chosen": 0.458984375, "eval_rewards/margins": -0.0004177093505859375, "eval_rewards/rejected": 0.4609375, "eval_runtime": 12.3547, "eval_samples_per_second": 16.188, "eval_steps_per_second": 0.567, "step": 63 }, { "epoch": 1.1111111111111112, "grad_norm": 0.6480653257577673, "learning_rate": 3.5e-07, "logits/chosen": -0.83984375, "logits/rejected": -0.8515625, "logps/chosen": -9.8125, "logps/rejected": -11.0, "loss": 0.6654, "rewards/accuracies": 0.0625, "rewards/chosen": 0.52734375, "rewards/margins": 0.05859375, "rewards/rejected": 0.466796875, "step": 70 }, { "epoch": 1.2698412698412698, "grad_norm": 1.8542768080035792, "learning_rate": 3.205882352941177e-07, "logits/chosen": -0.828125, "logits/rejected": -0.83203125, "logps/chosen": -10.1875, "logps/rejected": -11.0, "loss": 0.6656, "rewards/accuracies": 0.03750000149011612, "rewards/chosen": 0.49609375, "rewards/margins": 0.05419921875, "rewards/rejected": 0.44140625, "step": 80 }, { "epoch": 1.4285714285714286, "grad_norm": 1.4880802722309492, "learning_rate": 2.911764705882353e-07, "logits/chosen": -0.83984375, "logits/rejected": -0.84765625, "logps/chosen": -10.6875, "logps/rejected": -11.5, "loss": 0.6672, "rewards/accuracies": 0.10000000149011612, "rewards/chosen": 0.474609375, "rewards/margins": 0.10400390625, "rewards/rejected": 0.37109375, "step": 90 }, { "epoch": 1.5873015873015874, "grad_norm": 0.9541857190755878, "learning_rate": 2.6176470588235295e-07, "logits/chosen": -0.84765625, "logits/rejected": -0.84765625, "logps/chosen": -12.3125, "logps/rejected": -13.875, "loss": 0.667, "rewards/accuracies": 0.08749999850988388, "rewards/chosen": 0.34765625, "rewards/margins": 0.1240234375, "rewards/rejected": 0.224609375, "step": 100 }, { "epoch": 1.746031746031746, "grad_norm": 1.3739568831557003, "learning_rate": 2.323529411764706e-07, "logits/chosen": -0.875, "logits/rejected": -0.8671875, "logps/chosen": -12.0, "logps/rejected": -13.375, "loss": 0.6519, "rewards/accuracies": 0.10000000149011612, "rewards/chosen": 0.419921875, "rewards/margins": 0.1884765625, "rewards/rejected": 0.23046875, "step": 110 }, { "epoch": 1.9047619047619047, "grad_norm": 1.2013802625805676, "learning_rate": 2.0294117647058823e-07, "logits/chosen": -0.8359375, "logits/rejected": -0.84375, "logps/chosen": -12.25, "logps/rejected": -13.4375, "loss": 0.6564, "rewards/accuracies": 0.07500000298023224, "rewards/chosen": 0.36328125, "rewards/margins": 0.10498046875, "rewards/rejected": 0.2578125, "step": 120 }, { "epoch": 2.0, "eval_logits/chosen": -0.85546875, "eval_logits/rejected": -0.85546875, "eval_logps/chosen": -11.9375, "eval_logps/rejected": -12.5, "eval_loss": 0.6790624856948853, "eval_rewards/accuracies": 0.0357142873108387, "eval_rewards/chosen": 0.2890625, "eval_rewards/margins": 0.01324462890625, "eval_rewards/rejected": 0.275390625, "eval_runtime": 14.8138, "eval_samples_per_second": 13.501, "eval_steps_per_second": 0.473, "step": 126 }, { "epoch": 2.0634920634920633, "grad_norm": 1.6810359268654005, "learning_rate": 1.7352941176470587e-07, "logits/chosen": -0.8359375, "logits/rejected": -0.8359375, "logps/chosen": -13.0, "logps/rejected": -15.0, "loss": 0.6466, "rewards/accuracies": 0.08749999850988388, "rewards/chosen": 0.255859375, "rewards/margins": 0.1630859375, "rewards/rejected": 0.09228515625, "step": 130 }, { "epoch": 2.2222222222222223, "grad_norm": 1.6049829222017113, "learning_rate": 1.441176470588235e-07, "logits/chosen": -0.83984375, "logits/rejected": -0.83984375, "logps/chosen": -13.1875, "logps/rejected": -13.0, "loss": 0.6546, "rewards/accuracies": 0.08749999850988388, "rewards/chosen": 0.30859375, "rewards/margins": 0.1064453125, "rewards/rejected": 0.203125, "step": 140 }, { "epoch": 2.380952380952381, "grad_norm": 0.8745967010917558, "learning_rate": 1.1470588235294116e-07, "logits/chosen": -0.85546875, "logits/rejected": -0.84375, "logps/chosen": -12.1875, "logps/rejected": -13.1875, "loss": 0.647, "rewards/accuracies": 0.08749999850988388, "rewards/chosen": 0.28125, "rewards/margins": 0.099609375, "rewards/rejected": 0.1806640625, "step": 150 }, { "epoch": 2.5396825396825395, "grad_norm": 2.5161816747376817, "learning_rate": 8.529411764705883e-08, "logits/chosen": -0.87109375, "logits/rejected": -0.87890625, "logps/chosen": -13.625, "logps/rejected": -15.3125, "loss": 0.6455, "rewards/accuracies": 0.08749999850988388, "rewards/chosen": 0.2734375, "rewards/margins": 0.1630859375, "rewards/rejected": 0.11083984375, "step": 160 }, { "epoch": 2.6984126984126986, "grad_norm": 1.779552803700363, "learning_rate": 5.5882352941176474e-08, "logits/chosen": -0.8203125, "logits/rejected": -0.83203125, "logps/chosen": -12.1875, "logps/rejected": -13.75, "loss": 0.6576, "rewards/accuracies": 0.05000000074505806, "rewards/chosen": 0.2890625, "rewards/margins": 0.11328125, "rewards/rejected": 0.17578125, "step": 170 }, { "epoch": 2.857142857142857, "grad_norm": 1.0681708171714699, "learning_rate": 2.6470588235294116e-08, "logits/chosen": -0.80859375, "logits/rejected": -0.796875, "logps/chosen": -13.0625, "logps/rejected": -14.375, "loss": 0.6566, "rewards/accuracies": 0.07500000298023224, "rewards/chosen": 0.2294921875, "rewards/margins": 0.09619140625, "rewards/rejected": 0.1328125, "step": 180 }, { "epoch": 3.0, "eval_logits/chosen": -0.85546875, "eval_logits/rejected": -0.8515625, "eval_logps/chosen": -12.375, "eval_logps/rejected": -12.9375, "eval_loss": 0.6785937547683716, "eval_rewards/accuracies": 0.0357142873108387, "eval_rewards/chosen": 0.2470703125, "eval_rewards/margins": 0.016845703125, "eval_rewards/rejected": 0.23046875, "eval_runtime": 14.6047, "eval_samples_per_second": 13.694, "eval_steps_per_second": 0.479, "step": 189 }, { "epoch": 3.0, "step": 189, "total_flos": 0.0, "train_loss": 0.6665452422288359, "train_runtime": 1664.0362, "train_samples_per_second": 3.606, "train_steps_per_second": 0.114 } ], "logging_steps": 10, "max_steps": 189, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }