|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 195, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"logits/chosen": -2.5323238372802734, |
|
"logits/rejected": -2.550581216812134, |
|
"logps/chosen": -251.1321258544922, |
|
"logps/rejected": -304.1657409667969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.6382791996002197, |
|
"logits/rejected": -2.5627737045288086, |
|
"logps/chosen": -306.50714111328125, |
|
"logps/rejected": -308.0683898925781, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4618055522441864, |
|
"rewards/chosen": -0.00980505533516407, |
|
"rewards/margins": 0.002812173217535019, |
|
"rewards/rejected": -0.012617227621376514, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.5880370140075684, |
|
"logits/rejected": -2.574676275253296, |
|
"logps/chosen": -298.3855285644531, |
|
"logps/rejected": -308.91644287109375, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.03349592164158821, |
|
"rewards/margins": 0.012716387398540974, |
|
"rewards/rejected": -0.04621230810880661, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959823971496575e-06, |
|
"logits/chosen": -2.5550596714019775, |
|
"logits/rejected": -2.451047420501709, |
|
"logps/chosen": -319.48651123046875, |
|
"logps/rejected": -308.0125732421875, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06745009124279022, |
|
"rewards/margins": 0.05002344399690628, |
|
"rewards/rejected": -0.1174735426902771, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8405871765993435e-06, |
|
"logits/chosen": -2.552633285522461, |
|
"logits/rejected": -2.4666683673858643, |
|
"logps/chosen": -318.9139099121094, |
|
"logps/rejected": -328.02813720703125, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.11392641067504883, |
|
"rewards/margins": 0.05299054831266403, |
|
"rewards/rejected": -0.16691696643829346, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.5199167728424072, |
|
"logits/rejected": -2.494981527328491, |
|
"logps/chosen": -313.51763916015625, |
|
"logps/rejected": -337.84942626953125, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.15254025161266327, |
|
"rewards/margins": 0.10167612135410309, |
|
"rewards/rejected": -0.25421637296676636, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.382678665009028e-06, |
|
"logits/chosen": -2.4956717491149902, |
|
"logits/rejected": -2.420666456222534, |
|
"logps/chosen": -326.60302734375, |
|
"logps/rejected": -344.81622314453125, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2229386270046234, |
|
"rewards/margins": 0.10586366802453995, |
|
"rewards/rejected": -0.32880228757858276, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -2.4367499351501465, |
|
"logits/rejected": -2.3880956172943115, |
|
"logps/chosen": -304.6087951660156, |
|
"logps/rejected": -337.37860107421875, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.25156134366989136, |
|
"rewards/margins": 0.13926830887794495, |
|
"rewards/rejected": -0.3908296823501587, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.684671656182497e-06, |
|
"logits/chosen": -2.4801056385040283, |
|
"logits/rejected": -2.366367816925049, |
|
"logps/chosen": -307.1095275878906, |
|
"logps/rejected": -320.8377990722656, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.2161990851163864, |
|
"rewards/margins": 0.17663030326366425, |
|
"rewards/rejected": -0.39282941818237305, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937369e-06, |
|
"logits/chosen": -2.3947010040283203, |
|
"logits/rejected": -2.342723846435547, |
|
"logps/chosen": -295.4327392578125, |
|
"logps/rejected": -327.5935363769531, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2965458929538727, |
|
"rewards/margins": 0.1947285681962967, |
|
"rewards/rejected": -0.4912744462490082, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.314499855041504, |
|
"logits/rejected": -2.2232449054718018, |
|
"logps/chosen": -327.8091735839844, |
|
"logps/rejected": -362.3641052246094, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2903655767440796, |
|
"rewards/margins": 0.23061306774616241, |
|
"rewards/rejected": -0.5209786295890808, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -2.2760729789733887, |
|
"eval_logits/rejected": -2.1507985591888428, |
|
"eval_logps/chosen": -339.0613708496094, |
|
"eval_logps/rejected": -350.98443603515625, |
|
"eval_loss": 0.6181190609931946, |
|
"eval_rewards/accuracies": 0.6679999828338623, |
|
"eval_rewards/chosen": -0.4065861999988556, |
|
"eval_rewards/margins": 0.20490887761116028, |
|
"eval_rewards/rejected": -0.6114951372146606, |
|
"eval_runtime": 384.0631, |
|
"eval_samples_per_second": 5.207, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3878379241237136e-06, |
|
"logits/chosen": -2.16359281539917, |
|
"logits/rejected": -2.0681121349334717, |
|
"logps/chosen": -342.3879089355469, |
|
"logps/rejected": -355.98919677734375, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4416044354438782, |
|
"rewards/margins": 0.23517628014087677, |
|
"rewards/rejected": -0.6767807602882385, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"logits/chosen": -2.197049856185913, |
|
"logits/rejected": -2.077195644378662, |
|
"logps/chosen": -353.0827941894531, |
|
"logps/rejected": -376.5859375, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4651293158531189, |
|
"rewards/margins": 0.2698659300804138, |
|
"rewards/rejected": -0.7349953651428223, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5174374208651913e-06, |
|
"logits/chosen": -2.047089099884033, |
|
"logits/rejected": -1.901155710220337, |
|
"logps/chosen": -341.2831115722656, |
|
"logps/rejected": -376.09326171875, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4085904657840729, |
|
"rewards/margins": 0.3427460491657257, |
|
"rewards/rejected": -0.7513364553451538, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.122757546369744e-06, |
|
"logits/chosen": -1.988149642944336, |
|
"logits/rejected": -1.7608541250228882, |
|
"logps/chosen": -388.6386413574219, |
|
"logps/rejected": -387.81829833984375, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5027592182159424, |
|
"rewards/margins": 0.26467037200927734, |
|
"rewards/rejected": -0.7674296498298645, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -1.8329941034317017, |
|
"logits/rejected": -1.6359748840332031, |
|
"logps/chosen": -354.4386291503906, |
|
"logps/rejected": -402.38970947265625, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.4927349090576172, |
|
"rewards/margins": 0.3869919180870056, |
|
"rewards/rejected": -0.879726767539978, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.774575140626317e-07, |
|
"logits/chosen": -1.7938659191131592, |
|
"logits/rejected": -1.6463531255722046, |
|
"logps/chosen": -351.1708068847656, |
|
"logps/rejected": -407.2122497558594, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.5660358667373657, |
|
"rewards/margins": 0.35360515117645264, |
|
"rewards/rejected": -0.9196408987045288, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4757783024395244e-07, |
|
"logits/chosen": -1.7095705270767212, |
|
"logits/rejected": -1.6499723196029663, |
|
"logps/chosen": -335.1717224121094, |
|
"logps/rejected": -388.1880798339844, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5228649377822876, |
|
"rewards/margins": 0.3808245062828064, |
|
"rewards/rejected": -0.9036895036697388, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.00928482603669e-08, |
|
"logits/chosen": -1.8501991033554077, |
|
"logits/rejected": -1.6261276006698608, |
|
"logps/chosen": -354.8654479980469, |
|
"logps/rejected": -375.2087097167969, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.491665780544281, |
|
"rewards/margins": 0.341984361410141, |
|
"rewards/rejected": -0.8336501121520996, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.006426501190233e-08, |
|
"logits/chosen": -1.7825686931610107, |
|
"logits/rejected": -1.5483803749084473, |
|
"logps/chosen": -352.39453125, |
|
"logps/rejected": -381.07086181640625, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.5239015817642212, |
|
"rewards/margins": 0.3186902701854706, |
|
"rewards/rejected": -0.8425917625427246, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 195, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6237345188091963, |
|
"train_runtime": 8932.105, |
|
"train_samples_per_second": 2.799, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 195, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|