{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998324958123953, "eval_steps": 100, "global_step": 149, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.3333333333333335e-07, "logits/chosen": -2.55656099319458, "logits/rejected": -2.55704402923584, "logps/chosen": -162.36532592773438, "logps/rejected": -172.43312072753906, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.666642189025879, "logits/rejected": -2.6329586505889893, "logps/chosen": -185.80641174316406, "logps/rejected": -184.18959045410156, "loss": 0.6928, "rewards/accuracies": 0.4548611044883728, "rewards/chosen": -0.001971459249034524, "rewards/margins": 0.0016002749325707555, "rewards/rejected": -0.003571733832359314, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.982842942906386e-06, "logits/chosen": -2.721466541290283, "logits/rejected": -2.67402982711792, "logps/chosen": -201.33470153808594, "logps/rejected": -194.63198852539062, "loss": 0.6949, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.10967914760112762, "rewards/margins": 0.0006104880012571812, "rewards/rejected": -0.11028961837291718, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.846996204000967e-06, "logits/chosen": -2.625549077987671, "logits/rejected": -2.607099771499634, "logps/chosen": -197.00973510742188, "logps/rejected": -195.684326171875, "loss": 0.6939, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.08795476704835892, "rewards/margins": -0.002985857194289565, "rewards/rejected": -0.08496890217065811, "step": 30 }, { "epoch": 0.27, "learning_rate": 4.582735470385229e-06, "logits/chosen": -2.633084535598755, "logits/rejected": -2.634359836578369, "logps/chosen": -185.71023559570312, "logps/rejected": -185.03509521484375, "loss": 0.6921, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.04532230272889137, "rewards/margins": 0.0011206632480025291, "rewards/rejected": -0.046442966908216476, "step": 40 }, { "epoch": 0.34, "learning_rate": 4.204519553876095e-06, "logits/chosen": -2.6109485626220703, "logits/rejected": -2.6062893867492676, "logps/chosen": -171.55734252929688, "logps/rejected": -173.1212158203125, "loss": 0.6912, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.014199512079358101, "rewards/margins": 0.0028473488055169582, "rewards/rejected": -0.017046859487891197, "step": 50 }, { "epoch": 0.4, "learning_rate": 3.7330422317447686e-06, "logits/chosen": -2.6457674503326416, "logits/rejected": -2.6292178630828857, "logps/chosen": -188.72109985351562, "logps/rejected": -184.30081176757812, "loss": 0.6905, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.00934157706797123, "rewards/margins": 0.002996337367221713, "rewards/rejected": -0.012337915599346161, "step": 60 }, { "epoch": 0.47, "learning_rate": 3.1941000034687516e-06, "logits/chosen": -2.6249117851257324, "logits/rejected": -2.611786365509033, "logps/chosen": -170.21836853027344, "logps/rejected": -177.2921142578125, "loss": 0.6905, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.03585924953222275, "rewards/margins": 0.0028343182057142258, "rewards/rejected": -0.03869356960058212, "step": 70 }, { "epoch": 0.54, "learning_rate": 2.6171806561748503e-06, "logits/chosen": -2.5946967601776123, "logits/rejected": -2.571646213531494, "logps/chosen": -184.07911682128906, "logps/rejected": -189.4521942138672, "loss": 0.6896, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.019358564168214798, "rewards/margins": 0.011903460137546062, "rewards/rejected": -0.031262025237083435, "step": 80 }, { "epoch": 0.6, "learning_rate": 2.0338498642707977e-06, "logits/chosen": -2.629333972930908, "logits/rejected": -2.6070916652679443, "logps/chosen": -183.12803649902344, "logps/rejected": -187.22447204589844, "loss": 0.6902, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.0804746001958847, "rewards/margins": 0.01135367900133133, "rewards/rejected": -0.09182827174663544, "step": 90 }, { "epoch": 0.67, "learning_rate": 1.4760240991587338e-06, "logits/chosen": -2.5863940715789795, "logits/rejected": -2.5969200134277344, "logps/chosen": -172.47142028808594, "logps/rejected": -180.51638793945312, "loss": 0.6889, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.05926589295268059, "rewards/margins": 0.011102231219410896, "rewards/rejected": -0.07036812603473663, "step": 100 }, { "epoch": 0.67, "eval_logits/chosen": -2.5383808612823486, "eval_logits/rejected": -2.4414656162261963, "eval_logps/chosen": -307.9212646484375, "eval_logps/rejected": -301.2935485839844, "eval_loss": 0.6852481365203857, "eval_rewards/accuracies": 0.5659999847412109, "eval_rewards/chosen": -0.0951852798461914, "eval_rewards/margins": 0.019401030614972115, "eval_rewards/rejected": -0.11458631604909897, "eval_runtime": 382.181, "eval_samples_per_second": 5.233, "eval_steps_per_second": 0.654, "step": 100 }, { "epoch": 0.74, "learning_rate": 9.742243453755202e-07, "logits/chosen": -2.612968683242798, "logits/rejected": -2.6128811836242676, "logps/chosen": -176.701416015625, "logps/rejected": -186.86849975585938, "loss": 0.6882, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.027405280619859695, "rewards/margins": 0.016830626875162125, "rewards/rejected": -0.04423590749502182, "step": 110 }, { "epoch": 0.8, "learning_rate": 5.559061696656199e-07, "logits/chosen": -2.585603713989258, "logits/rejected": -2.5767314434051514, "logps/chosen": -184.33139038085938, "logps/rejected": -189.44131469726562, "loss": 0.686, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.03763353452086449, "rewards/margins": 0.025313779711723328, "rewards/rejected": -0.06294731795787811, "step": 120 }, { "epoch": 0.87, "learning_rate": 2.4395751190352924e-07, "logits/chosen": -2.5703272819519043, "logits/rejected": -2.5677199363708496, "logps/chosen": -184.06341552734375, "logps/rejected": -188.58779907226562, "loss": 0.6884, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06450501084327698, "rewards/margins": 0.007478479295969009, "rewards/rejected": -0.07198350131511688, "step": 130 }, { "epoch": 0.94, "learning_rate": 5.544639001763719e-08, "logits/chosen": -2.6024391651153564, "logits/rejected": -2.5870203971862793, "logps/chosen": -199.04031372070312, "logps/rejected": -197.60330200195312, "loss": 0.686, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.06172681599855423, "rewards/margins": 0.017358267679810524, "rewards/rejected": -0.0790850818157196, "step": 140 }, { "epoch": 1.0, "step": 149, "total_flos": 0.0, "train_loss": 0.6901595840518107, "train_runtime": 6945.5042, "train_samples_per_second": 2.75, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 149, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }