{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 1000, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 0.4128397206827493, "learning_rate": 3.846153846153847e-07, "logits/chosen": -1.73323655128479, "logits/rejected": -1.963712453842163, "logps/chosen": -64.71795654296875, "logps/rejected": -92.56527709960938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.4, "grad_norm": 0.35034784903451766, "learning_rate": 3.846153846153847e-06, "logits/chosen": -1.7505415678024292, "logits/rejected": -1.915618658065796, "logps/chosen": -63.550048828125, "logps/rejected": -88.24057006835938, "loss": 0.6913, "rewards/accuracies": 0.5555555820465088, "rewards/chosen": -0.0015819871332496405, "rewards/margins": 0.0029085720889270306, "rewards/rejected": -0.004490559455007315, "step": 10 }, { "epoch": 0.8, "grad_norm": 0.3741999171763754, "learning_rate": 4.9519632010080765e-06, "logits/chosen": -1.7421376705169678, "logits/rejected": -1.9737510681152344, "logps/chosen": -64.17735290527344, "logps/rejected": -103.7110366821289, "loss": 0.6532, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.03122868202626705, "rewards/margins": 0.09635920822620392, "rewards/rejected": -0.12758789956569672, "step": 20 }, { "epoch": 1.2, "grad_norm": 0.21399782118300334, "learning_rate": 4.721114089947181e-06, "logits/chosen": -1.7182533740997314, "logits/rejected": -1.9536464214324951, "logps/chosen": -72.61327362060547, "logps/rejected": -138.14871215820312, "loss": 0.5782, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.10964199155569077, "rewards/margins": 0.36830809712409973, "rewards/rejected": -0.4779500961303711, "step": 30 }, { "epoch": 1.6, "grad_norm": 0.20675592706765678, "learning_rate": 4.316650805085068e-06, "logits/chosen": -1.6047031879425049, "logits/rejected": -1.9040508270263672, "logps/chosen": -70.1319580078125, "logps/rejected": -172.32015991210938, "loss": 0.5474, "rewards/accuracies": 0.8125, "rewards/chosen": -0.12685254216194153, "rewards/margins": 0.6549729108810425, "rewards/rejected": -0.7818254828453064, "step": 40 }, { "epoch": 2.0, "grad_norm": 0.32081236636733307, "learning_rate": 3.770188363116324e-06, "logits/chosen": -1.6701993942260742, "logits/rejected": -1.8626493215560913, "logps/chosen": -84.17461395263672, "logps/rejected": -167.61410522460938, "loss": 0.5244, "rewards/accuracies": 0.71875, "rewards/chosen": -0.21969938278198242, "rewards/margins": 0.575901985168457, "rewards/rejected": -0.7956013679504395, "step": 50 }, { "epoch": 2.4, "grad_norm": 0.38597827976301535, "learning_rate": 3.1244411954180677e-06, "logits/chosen": -1.5896217823028564, "logits/rejected": -1.8747373819351196, "logps/chosen": -67.0400161743164, "logps/rejected": -191.59365844726562, "loss": 0.4726, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.08698524534702301, "rewards/margins": 0.882093071937561, "rewards/rejected": -0.9690783619880676, "step": 60 }, { "epoch": 2.8, "grad_norm": 0.4702645263366345, "learning_rate": 2.429884359310328e-06, "logits/chosen": -1.572919249534607, "logits/rejected": -1.7691805362701416, "logps/chosen": -75.44322204589844, "logps/rejected": -183.8511962890625, "loss": 0.4811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12143620103597641, "rewards/margins": 0.8210474252700806, "rewards/rejected": -0.9424835443496704, "step": 70 }, { "epoch": 3.2, "grad_norm": 0.35995082384074073, "learning_rate": 1.7408081372259633e-06, "logits/chosen": -1.540151834487915, "logits/rejected": -1.7591164112091064, "logps/chosen": -64.67158508300781, "logps/rejected": -208.8355712890625, "loss": 0.3772, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.042745210230350494, "rewards/margins": 1.1398416757583618, "rewards/rejected": -1.182586908340454, "step": 80 }, { "epoch": 3.6, "grad_norm": 0.3349159272932947, "learning_rate": 1.1110744174509952e-06, "logits/chosen": -1.5150226354599, "logits/rejected": -1.7239547967910767, "logps/chosen": -65.3616714477539, "logps/rejected": -226.32937622070312, "loss": 0.3725, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.033232785761356354, "rewards/margins": 1.3398171663284302, "rewards/rejected": -1.3730499744415283, "step": 90 }, { "epoch": 4.0, "grad_norm": 0.3777138278726997, "learning_rate": 5.899065604459814e-07, "logits/chosen": -1.5114853382110596, "logits/rejected": -1.7169097661972046, "logps/chosen": -61.861358642578125, "logps/rejected": -252.6329345703125, "loss": 0.3478, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.0013225942384451628, "rewards/margins": 1.6115810871124268, "rewards/rejected": -1.6102584600448608, "step": 100 }, { "epoch": 4.4, "grad_norm": 0.31092930022093734, "learning_rate": 2.1804183734670277e-07, "logits/chosen": -1.5211278200149536, "logits/rejected": -1.7239795923233032, "logps/chosen": -63.83014678955078, "logps/rejected": -241.92819213867188, "loss": 0.3522, "rewards/accuracies": 0.71875, "rewards/chosen": -0.014286426827311516, "rewards/margins": 1.5209068059921265, "rewards/rejected": -1.5351933240890503, "step": 110 }, { "epoch": 4.8, "grad_norm": 0.336920496075524, "learning_rate": 2.454718665888589e-08, "logits/chosen": -1.4646048545837402, "logits/rejected": -1.6882946491241455, "logps/chosen": -61.223609924316406, "logps/rejected": -262.6458435058594, "loss": 0.3387, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0028733056969940662, "rewards/margins": 1.7208553552627563, "rewards/rejected": -1.7237287759780884, "step": 120 }, { "epoch": 5.0, "step": 125, "total_flos": 0.0, "train_loss": 0.46972908115386963, "train_runtime": 2062.102, "train_samples_per_second": 3.88, "train_steps_per_second": 0.061 } ], "logging_steps": 10, "max_steps": 125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }