|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 195, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"logits/chosen": -2.3828954696655273, |
|
"logits/rejected": -2.2103500366210938, |
|
"logps/chosen": -351.30865478515625, |
|
"logps/rejected": -310.087646484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.15350341796875, |
|
"logits/rejected": -2.057192087173462, |
|
"logps/chosen": -291.661865234375, |
|
"logps/rejected": -299.000244140625, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -0.00531815318390727, |
|
"rewards/margins": 0.006059659644961357, |
|
"rewards/rejected": -0.01137781422585249, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -1.7294094562530518, |
|
"logits/rejected": -1.6358362436294556, |
|
"logps/chosen": -349.6874084472656, |
|
"logps/rejected": -371.9268798828125, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.4694043695926666, |
|
"rewards/margins": 0.10101622343063354, |
|
"rewards/rejected": -0.5704206228256226, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959823971496575e-06, |
|
"logits/chosen": -1.3581154346466064, |
|
"logits/rejected": -1.2683781385421753, |
|
"logps/chosen": -328.9931945800781, |
|
"logps/rejected": -367.46417236328125, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.3038724958896637, |
|
"rewards/margins": 0.21112871170043945, |
|
"rewards/rejected": -0.5150011777877808, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8405871765993435e-06, |
|
"logits/chosen": -1.0669946670532227, |
|
"logits/rejected": -0.6835187673568726, |
|
"logps/chosen": -354.5089416503906, |
|
"logps/rejected": -383.76422119140625, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.4168413579463959, |
|
"rewards/margins": 0.37733370065689087, |
|
"rewards/rejected": -0.7941750884056091, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -0.7538890838623047, |
|
"logits/rejected": -0.34662288427352905, |
|
"logps/chosen": -361.6393737792969, |
|
"logps/rejected": -380.0272521972656, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.479973167181015, |
|
"rewards/margins": 0.41160327196121216, |
|
"rewards/rejected": -0.8915762901306152, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.382678665009028e-06, |
|
"logits/chosen": -0.7476059198379517, |
|
"logits/rejected": -0.4476490020751953, |
|
"logps/chosen": -321.3587951660156, |
|
"logps/rejected": -371.570556640625, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3474940359592438, |
|
"rewards/margins": 0.410334974527359, |
|
"rewards/rejected": -0.7578290104866028, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -0.5304551124572754, |
|
"logits/rejected": 0.0068548740819096565, |
|
"logps/chosen": -372.43316650390625, |
|
"logps/rejected": -403.68011474609375, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6598731875419617, |
|
"rewards/margins": 0.44455790519714355, |
|
"rewards/rejected": -1.10443115234375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.684671656182497e-06, |
|
"logits/chosen": -0.7597763538360596, |
|
"logits/rejected": -0.30586355924606323, |
|
"logps/chosen": -384.52679443359375, |
|
"logps/rejected": -407.8342590332031, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.5050551891326904, |
|
"rewards/margins": 0.4280626177787781, |
|
"rewards/rejected": -0.9331178665161133, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937369e-06, |
|
"logits/chosen": -0.6929324865341187, |
|
"logits/rejected": -0.2577061057090759, |
|
"logps/chosen": -342.2850036621094, |
|
"logps/rejected": -383.3541259765625, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5730774998664856, |
|
"rewards/margins": 0.38491758704185486, |
|
"rewards/rejected": -0.9579952359199524, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -0.8866588473320007, |
|
"logits/rejected": -0.39027491211891174, |
|
"logps/chosen": -326.8385925292969, |
|
"logps/rejected": -374.0113525390625, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.4010140001773834, |
|
"rewards/margins": 0.5182436108589172, |
|
"rewards/rejected": -0.9192575216293335, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -0.6312460899353027, |
|
"eval_logits/rejected": -0.05466047301888466, |
|
"eval_logps/chosen": -351.8179016113281, |
|
"eval_logps/rejected": -391.2296447753906, |
|
"eval_loss": 0.5503215789794922, |
|
"eval_rewards/accuracies": 0.7139999866485596, |
|
"eval_rewards/chosen": -0.6370265483856201, |
|
"eval_rewards/margins": 0.559273362159729, |
|
"eval_rewards/rejected": -1.1962999105453491, |
|
"eval_runtime": 396.9424, |
|
"eval_samples_per_second": 5.039, |
|
"eval_steps_per_second": 0.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3878379241237136e-06, |
|
"logits/chosen": -0.5016804933547974, |
|
"logits/rejected": -0.17540986835956573, |
|
"logps/chosen": -381.13250732421875, |
|
"logps/rejected": -445.72344970703125, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.8623906970024109, |
|
"rewards/margins": 0.4544965624809265, |
|
"rewards/rejected": -1.3168871402740479, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"logits/chosen": -0.5300595164299011, |
|
"logits/rejected": -0.16820363700389862, |
|
"logps/chosen": -343.6223449707031, |
|
"logps/rejected": -390.7423400878906, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.5444029569625854, |
|
"rewards/margins": 0.469682514667511, |
|
"rewards/rejected": -1.0140855312347412, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5174374208651913e-06, |
|
"logits/chosen": -0.7855179309844971, |
|
"logits/rejected": -0.3058822751045227, |
|
"logps/chosen": -345.6529235839844, |
|
"logps/rejected": -394.87310791015625, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.4353795647621155, |
|
"rewards/margins": 0.5461079478263855, |
|
"rewards/rejected": -0.9814874529838562, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.122757546369744e-06, |
|
"logits/chosen": -0.5981294512748718, |
|
"logits/rejected": 0.1230069175362587, |
|
"logps/chosen": -365.549560546875, |
|
"logps/rejected": -422.6253356933594, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6209978461265564, |
|
"rewards/margins": 0.6843111515045166, |
|
"rewards/rejected": -1.3053090572357178, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -0.31050771474838257, |
|
"logits/rejected": 0.17936445772647858, |
|
"logps/chosen": -382.371826171875, |
|
"logps/rejected": -423.1304626464844, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7858445048332214, |
|
"rewards/margins": 0.49087247252464294, |
|
"rewards/rejected": -1.276716947555542, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.774575140626317e-07, |
|
"logits/chosen": -0.3516121506690979, |
|
"logits/rejected": 0.05455173924565315, |
|
"logps/chosen": -383.17694091796875, |
|
"logps/rejected": -414.36322021484375, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.7312983870506287, |
|
"rewards/margins": 0.4293293058872223, |
|
"rewards/rejected": -1.160627841949463, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4757783024395244e-07, |
|
"logits/chosen": -0.3427812159061432, |
|
"logits/rejected": 0.1810428947210312, |
|
"logps/chosen": -328.9323425292969, |
|
"logps/rejected": -408.287353515625, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5952991247177124, |
|
"rewards/margins": 0.7224096059799194, |
|
"rewards/rejected": -1.3177087306976318, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.00928482603669e-08, |
|
"logits/chosen": -0.4558378756046295, |
|
"logits/rejected": 0.023540988564491272, |
|
"logps/chosen": -353.60015869140625, |
|
"logps/rejected": -400.02459716796875, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.6851487159729004, |
|
"rewards/margins": 0.5590900778770447, |
|
"rewards/rejected": -1.2442388534545898, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.006426501190233e-08, |
|
"logits/chosen": -0.32187455892562866, |
|
"logits/rejected": 0.002347037196159363, |
|
"logps/chosen": -363.7867126464844, |
|
"logps/rejected": -415.94952392578125, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.6913945078849792, |
|
"rewards/margins": 0.4907089173793793, |
|
"rewards/rejected": -1.1821033954620361, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 195, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5863106256876236, |
|
"train_runtime": 9239.1596, |
|
"train_samples_per_second": 2.706, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 195, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|