{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9968652037617555, "eval_steps": 500, "global_step": 159, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-08, "logits/chosen": -2.689429759979248, "logits/rejected": -2.571552276611328, "logps/chosen": -143.16458129882812, "logps/rejected": -203.93856811523438, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.708827257156372, "logits/rejected": -2.686070680618286, "logps/chosen": -237.31149291992188, "logps/rejected": -247.18511962890625, "loss": 0.6899, "rewards/accuracies": 0.4375, "rewards/chosen": -0.01768648810684681, "rewards/margins": -0.0019140999065712094, "rewards/rejected": -0.015772389248013496, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.990353313429303e-07, "logits/chosen": -2.6622233390808105, "logits/rejected": -2.6616415977478027, "logps/chosen": -279.48223876953125, "logps/rejected": -328.67034912109375, "loss": 0.6672, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.3544561564922333, "rewards/margins": 0.08781943470239639, "rewards/rejected": -0.4422755837440491, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.882681251368548e-07, "logits/chosen": -2.530141830444336, "logits/rejected": -2.4791650772094727, "logps/chosen": -242.94580078125, "logps/rejected": -304.83319091796875, "loss": 0.6578, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.23640844225883484, "rewards/margins": 0.2698945105075836, "rewards/rejected": -0.5063029527664185, "step": 30 }, { "epoch": 0.25, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -2.3857040405273438, "logits/rejected": -2.350623846054077, "logps/chosen": -262.49359130859375, "logps/rejected": -348.61285400390625, "loss": 0.6375, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.4203917384147644, "rewards/margins": 0.2930702269077301, "rewards/rejected": -0.7134619951248169, "step": 40 }, { "epoch": 0.31, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -2.2722487449645996, "logits/rejected": -2.2434167861938477, "logps/chosen": -265.3743591308594, "logps/rejected": -326.30474853515625, "loss": 0.6153, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.47392672300338745, "rewards/margins": 0.19795864820480347, "rewards/rejected": -0.6718853712081909, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.920161866827889e-07, "logits/chosen": -2.1601130962371826, "logits/rejected": -2.137726306915283, "logps/chosen": -290.22393798828125, "logps/rejected": -320.2867736816406, "loss": 0.6209, "rewards/accuracies": 0.625, "rewards/chosen": -0.5836684703826904, "rewards/margins": 0.20530056953430176, "rewards/rejected": -0.7889690399169922, "step": 60 }, { "epoch": 0.44, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -2.1158602237701416, "logits/rejected": -2.0663974285125732, "logps/chosen": -261.1648864746094, "logps/rejected": -325.1148681640625, "loss": 0.629, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.49344220757484436, "rewards/margins": 0.26634153723716736, "rewards/rejected": -0.7597836852073669, "step": 70 }, { "epoch": 0.5, "learning_rate": 2.910060778827554e-07, "logits/chosen": -2.0561118125915527, "logits/rejected": -2.0465025901794434, "logps/chosen": -259.88214111328125, "logps/rejected": -330.5330505371094, "loss": 0.5935, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6631507873535156, "rewards/margins": 0.2730056643486023, "rewards/rejected": -0.9361563920974731, "step": 80 }, { "epoch": 0.56, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -2.163104295730591, "logits/rejected": -2.1310901641845703, "logps/chosen": -251.44955444335938, "logps/rejected": -355.19989013671875, "loss": 0.6082, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5009629726409912, "rewards/margins": 0.4423709511756897, "rewards/rejected": -0.9433339834213257, "step": 90 }, { "epoch": 0.63, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -2.061826229095459, "logits/rejected": -2.0707743167877197, "logps/chosen": -275.9271545410156, "logps/rejected": -368.3133850097656, "loss": 0.6033, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5269793272018433, "rewards/margins": 0.3954086899757385, "rewards/rejected": -0.9223880767822266, "step": 100 }, { "epoch": 0.69, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -2.105185031890869, "logits/rejected": -2.079219341278076, "logps/chosen": -246.35910034179688, "logps/rejected": -346.08642578125, "loss": 0.594, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5222919583320618, "rewards/margins": 0.4267689287662506, "rewards/rejected": -0.9490607976913452, "step": 110 }, { "epoch": 0.75, "learning_rate": 8.628481651367875e-08, "logits/chosen": -2.107372760772705, "logits/rejected": -2.0185680389404297, "logps/chosen": -271.4783935546875, "logps/rejected": -370.3096923828125, "loss": 0.5906, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5639175176620483, "rewards/margins": 0.45396023988723755, "rewards/rejected": -1.0178776979446411, "step": 120 }, { "epoch": 0.82, "learning_rate": 4.904486005914027e-08, "logits/chosen": -2.119300365447998, "logits/rejected": -2.106173515319824, "logps/chosen": -282.1510009765625, "logps/rejected": -376.592041015625, "loss": 0.5895, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.6762362718582153, "rewards/margins": 0.3490751385688782, "rewards/rejected": -1.0253114700317383, "step": 130 }, { "epoch": 0.88, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -2.127058267593384, "logits/rejected": -2.0316877365112305, "logps/chosen": -290.1690979003906, "logps/rejected": -389.51116943359375, "loss": 0.5877, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6831713318824768, "rewards/margins": 0.4633623957633972, "rewards/rejected": -1.146533727645874, "step": 140 }, { "epoch": 0.94, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -2.0759072303771973, "logits/rejected": -2.021393299102783, "logps/chosen": -278.86285400390625, "logps/rejected": -323.4399108886719, "loss": 0.5954, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6692460775375366, "rewards/margins": 0.32712554931640625, "rewards/rejected": -0.9963716268539429, "step": 150 }, { "epoch": 1.0, "step": 159, "total_flos": 0.0, "train_loss": 0.6192928140268386, "train_runtime": 2654.4881, "train_samples_per_second": 7.677, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 159, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }