|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945, |
|
"eval_steps": 500, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.99154727967293, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.152977705001831, |
|
"logits/rejected": -2.3121213912963867, |
|
"logps/chosen": -254.60496520996094, |
|
"logps/rejected": -224.12643432617188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 15.888543656019479, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.4332120418548584, |
|
"logits/rejected": -2.349087953567505, |
|
"logps/chosen": -218.75491333007812, |
|
"logps/rejected": -224.596923828125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.43589743971824646, |
|
"rewards/chosen": -0.0051579843275249004, |
|
"rewards/margins": 0.0010460736230015755, |
|
"rewards/rejected": -0.006204057950526476, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 18.00599863061672, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": -2.44968318939209, |
|
"logits/rejected": -2.3714213371276855, |
|
"logps/chosen": -227.60235595703125, |
|
"logps/rejected": -245.01002502441406, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2174525260925293, |
|
"rewards/margins": 0.025719773024320602, |
|
"rewards/rejected": -0.243172287940979, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 17.361907335413566, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": -2.3445615768432617, |
|
"logits/rejected": -2.285738706588745, |
|
"logps/chosen": -264.4424743652344, |
|
"logps/rejected": -267.0367431640625, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.4923076927661896, |
|
"rewards/chosen": -0.6213539838790894, |
|
"rewards/margins": 0.022094279527664185, |
|
"rewards/rejected": -0.6434482336044312, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 17.40493821515386, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": -2.5398178100585938, |
|
"logits/rejected": -2.436666488647461, |
|
"logps/chosen": -252.19931030273438, |
|
"logps/rejected": -264.8144226074219, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5730769038200378, |
|
"rewards/chosen": -0.43709850311279297, |
|
"rewards/margins": 0.043844155967235565, |
|
"rewards/rejected": -0.4809426963329315, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 14.207926284792292, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": -2.579521894454956, |
|
"logits/rejected": -2.5611038208007812, |
|
"logps/chosen": -251.29273986816406, |
|
"logps/rejected": -248.70640563964844, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5269230604171753, |
|
"rewards/chosen": -0.18656322360038757, |
|
"rewards/margins": 0.01753987930715084, |
|
"rewards/rejected": -0.20410311222076416, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 16.71668002097283, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": -2.5447630882263184, |
|
"logits/rejected": -2.5085957050323486, |
|
"logps/chosen": -254.7252960205078, |
|
"logps/rejected": -250.2134552001953, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.4769230782985687, |
|
"rewards/chosen": -0.20926551520824432, |
|
"rewards/margins": 0.024503052234649658, |
|
"rewards/rejected": -0.23376856744289398, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 15.128093177880084, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": -2.4117591381073, |
|
"logits/rejected": -2.3856163024902344, |
|
"logps/chosen": -240.95860290527344, |
|
"logps/rejected": -251.49269104003906, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.5692307949066162, |
|
"rewards/chosen": -0.2835744321346283, |
|
"rewards/margins": 0.03665730357170105, |
|
"rewards/rejected": -0.32023176550865173, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 15.984185802661708, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": -2.502197742462158, |
|
"logits/rejected": -2.4183943271636963, |
|
"logps/chosen": -259.74285888671875, |
|
"logps/rejected": -259.47991943359375, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.22889916598796844, |
|
"rewards/margins": 0.03880741447210312, |
|
"rewards/rejected": -0.26770660281181335, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 16.944533463136676, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": -2.4329493045806885, |
|
"logits/rejected": -2.377147674560547, |
|
"logps/chosen": -238.77903747558594, |
|
"logps/rejected": -275.1826171875, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.5692307949066162, |
|
"rewards/chosen": -0.33898892998695374, |
|
"rewards/margins": 0.07301792502403259, |
|
"rewards/rejected": -0.41200685501098633, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 20.171571457821667, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": -2.404921770095825, |
|
"logits/rejected": -2.3588438034057617, |
|
"logps/chosen": -249.01068115234375, |
|
"logps/rejected": -261.3023376464844, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5807692408561707, |
|
"rewards/chosen": -0.362224817276001, |
|
"rewards/margins": 0.0861731767654419, |
|
"rewards/rejected": -0.4483979642391205, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 22.208608685833642, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": -2.384084939956665, |
|
"logits/rejected": -2.4500632286071777, |
|
"logps/chosen": -260.6557312011719, |
|
"logps/rejected": -259.0586853027344, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.4653846025466919, |
|
"rewards/chosen": -0.40450161695480347, |
|
"rewards/margins": 0.021371768787503242, |
|
"rewards/rejected": -0.42587342858314514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 21.198358455810045, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": -2.3326570987701416, |
|
"logits/rejected": -2.2644848823547363, |
|
"logps/chosen": -269.0829772949219, |
|
"logps/rejected": -277.46099853515625, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.5692307949066162, |
|
"rewards/chosen": -0.45251235365867615, |
|
"rewards/margins": 0.05605296790599823, |
|
"rewards/rejected": -0.508565366268158, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 24.487908321616903, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": -2.3290350437164307, |
|
"logits/rejected": -2.368476629257202, |
|
"logps/chosen": -264.3924255371094, |
|
"logps/rejected": -283.0903625488281, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.5346153974533081, |
|
"rewards/chosen": -0.5441482663154602, |
|
"rewards/margins": 0.08576709777116776, |
|
"rewards/rejected": -0.6299152970314026, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 24.363492054663, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": -2.415417194366455, |
|
"logits/rejected": -2.2751691341400146, |
|
"logps/chosen": -280.5184326171875, |
|
"logps/rejected": -270.88372802734375, |
|
"loss": 0.683, |
|
"rewards/accuracies": 0.5807692408561707, |
|
"rewards/chosen": -0.5419987440109253, |
|
"rewards/margins": 0.07570147514343262, |
|
"rewards/rejected": -0.6177002787590027, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 24.646600457502558, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": -2.4706640243530273, |
|
"logits/rejected": -2.419950485229492, |
|
"logps/chosen": -264.6922302246094, |
|
"logps/rejected": -289.4066467285156, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.6038461327552795, |
|
"rewards/chosen": -0.4906882047653198, |
|
"rewards/margins": 0.13459746539592743, |
|
"rewards/rejected": -0.6252856850624084, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6840333373718013, |
|
"train_runtime": 39783.2449, |
|
"train_samples_per_second": 0.503, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|