nash_dpo_rank4_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
fe74021 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 100,
"global_step": 195,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.5000000000000004e-07,
"logits/chosen": -2.5323238372802734,
"logits/rejected": -2.550581216812134,
"logps/chosen": -251.1321258544922,
"logps/rejected": -304.1657409667969,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"learning_rate": 2.5e-06,
"logits/chosen": -2.6382791996002197,
"logits/rejected": -2.5627737045288086,
"logps/chosen": -306.50714111328125,
"logps/rejected": -308.0683898925781,
"loss": 0.6928,
"rewards/accuracies": 0.4618055522441864,
"rewards/chosen": -0.00980505533516407,
"rewards/margins": 0.002812173217535019,
"rewards/rejected": -0.012617227621376514,
"step": 10
},
{
"epoch": 0.1,
"learning_rate": 5e-06,
"logits/chosen": -2.5880370140075684,
"logits/rejected": -2.574676275253296,
"logps/chosen": -298.3855285644531,
"logps/rejected": -308.91644287109375,
"loss": 0.6875,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.03349592164158821,
"rewards/margins": 0.012716387398540974,
"rewards/rejected": -0.04621230810880661,
"step": 20
},
{
"epoch": 0.15,
"learning_rate": 4.959823971496575e-06,
"logits/chosen": -2.5550596714019775,
"logits/rejected": -2.451047420501709,
"logps/chosen": -319.48651123046875,
"logps/rejected": -308.0125732421875,
"loss": 0.6752,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.06745009124279022,
"rewards/margins": 0.05002344399690628,
"rewards/rejected": -0.1174735426902771,
"step": 30
},
{
"epoch": 0.2,
"learning_rate": 4.8405871765993435e-06,
"logits/chosen": -2.552633285522461,
"logits/rejected": -2.4666683673858643,
"logps/chosen": -318.9139099121094,
"logps/rejected": -328.02813720703125,
"loss": 0.6639,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.11392641067504883,
"rewards/margins": 0.05299054831266403,
"rewards/rejected": -0.16691696643829346,
"step": 40
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -2.5199167728424072,
"logits/rejected": -2.494981527328491,
"logps/chosen": -313.51763916015625,
"logps/rejected": -337.84942626953125,
"loss": 0.6479,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.15254025161266327,
"rewards/margins": 0.10167612135410309,
"rewards/rejected": -0.25421637296676636,
"step": 50
},
{
"epoch": 0.31,
"learning_rate": 4.382678665009028e-06,
"logits/chosen": -2.4956717491149902,
"logits/rejected": -2.420666456222534,
"logps/chosen": -326.60302734375,
"logps/rejected": -344.81622314453125,
"loss": 0.6472,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.2229386270046234,
"rewards/margins": 0.10586366802453995,
"rewards/rejected": -0.32880228757858276,
"step": 60
},
{
"epoch": 0.36,
"learning_rate": 4.058724504646834e-06,
"logits/chosen": -2.4367499351501465,
"logits/rejected": -2.3880956172943115,
"logps/chosen": -304.6087951660156,
"logps/rejected": -337.37860107421875,
"loss": 0.6375,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.25156134366989136,
"rewards/margins": 0.13926830887794495,
"rewards/rejected": -0.3908296823501587,
"step": 70
},
{
"epoch": 0.41,
"learning_rate": 3.684671656182497e-06,
"logits/chosen": -2.4801056385040283,
"logits/rejected": -2.366367816925049,
"logps/chosen": -307.1095275878906,
"logps/rejected": -320.8377990722656,
"loss": 0.6347,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.2161990851163864,
"rewards/margins": 0.17663030326366425,
"rewards/rejected": -0.39282941818237305,
"step": 80
},
{
"epoch": 0.46,
"learning_rate": 3.272542485937369e-06,
"logits/chosen": -2.3947010040283203,
"logits/rejected": -2.342723846435547,
"logps/chosen": -295.4327392578125,
"logps/rejected": -327.5935363769531,
"loss": 0.629,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.2965458929538727,
"rewards/margins": 0.1947285681962967,
"rewards/rejected": -0.4912744462490082,
"step": 90
},
{
"epoch": 0.51,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -2.314499855041504,
"logits/rejected": -2.2232449054718018,
"logps/chosen": -327.8091735839844,
"logps/rejected": -362.3641052246094,
"loss": 0.6232,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.2903655767440796,
"rewards/margins": 0.23061306774616241,
"rewards/rejected": -0.5209786295890808,
"step": 100
},
{
"epoch": 0.51,
"eval_logits/chosen": -2.2760729789733887,
"eval_logits/rejected": -2.1507985591888428,
"eval_logps/chosen": -339.0613708496094,
"eval_logps/rejected": -350.98443603515625,
"eval_loss": 0.6181190609931946,
"eval_rewards/accuracies": 0.6679999828338623,
"eval_rewards/chosen": -0.4065861999988556,
"eval_rewards/margins": 0.20490887761116028,
"eval_rewards/rejected": -0.6114951372146606,
"eval_runtime": 384.0631,
"eval_samples_per_second": 5.207,
"eval_steps_per_second": 0.651,
"step": 100
},
{
"epoch": 0.56,
"learning_rate": 2.3878379241237136e-06,
"logits/chosen": -2.16359281539917,
"logits/rejected": -2.0681121349334717,
"logps/chosen": -342.3879089355469,
"logps/rejected": -355.98919677734375,
"loss": 0.6164,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.4416044354438782,
"rewards/margins": 0.23517628014087677,
"rewards/rejected": -0.6767807602882385,
"step": 110
},
{
"epoch": 0.61,
"learning_rate": 1.9436976651092143e-06,
"logits/chosen": -2.197049856185913,
"logits/rejected": -2.077195644378662,
"logps/chosen": -353.0827941894531,
"logps/rejected": -376.5859375,
"loss": 0.6133,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.4651293158531189,
"rewards/margins": 0.2698659300804138,
"rewards/rejected": -0.7349953651428223,
"step": 120
},
{
"epoch": 0.67,
"learning_rate": 1.5174374208651913e-06,
"logits/chosen": -2.047089099884033,
"logits/rejected": -1.901155710220337,
"logps/chosen": -341.2831115722656,
"logps/rejected": -376.09326171875,
"loss": 0.5841,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.4085904657840729,
"rewards/margins": 0.3427460491657257,
"rewards/rejected": -0.7513364553451538,
"step": 130
},
{
"epoch": 0.72,
"learning_rate": 1.122757546369744e-06,
"logits/chosen": -1.988149642944336,
"logits/rejected": -1.7608541250228882,
"logps/chosen": -388.6386413574219,
"logps/rejected": -387.81829833984375,
"loss": 0.5888,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.5027592182159424,
"rewards/margins": 0.26467037200927734,
"rewards/rejected": -0.7674296498298645,
"step": 140
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -1.8329941034317017,
"logits/rejected": -1.6359748840332031,
"logps/chosen": -354.4386291503906,
"logps/rejected": -402.38970947265625,
"loss": 0.5783,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.4927349090576172,
"rewards/margins": 0.3869919180870056,
"rewards/rejected": -0.879726767539978,
"step": 150
},
{
"epoch": 0.82,
"learning_rate": 4.774575140626317e-07,
"logits/chosen": -1.7938659191131592,
"logits/rejected": -1.6463531255722046,
"logps/chosen": -351.1708068847656,
"logps/rejected": -407.2122497558594,
"loss": 0.5802,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.5660358667373657,
"rewards/margins": 0.35360515117645264,
"rewards/rejected": -0.9196408987045288,
"step": 160
},
{
"epoch": 0.87,
"learning_rate": 2.4757783024395244e-07,
"logits/chosen": -1.7095705270767212,
"logits/rejected": -1.6499723196029663,
"logps/chosen": -335.1717224121094,
"logps/rejected": -388.1880798339844,
"loss": 0.593,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.5228649377822876,
"rewards/margins": 0.3808245062828064,
"rewards/rejected": -0.9036895036697388,
"step": 170
},
{
"epoch": 0.92,
"learning_rate": 9.00928482603669e-08,
"logits/chosen": -1.8501991033554077,
"logits/rejected": -1.6261276006698608,
"logps/chosen": -354.8654479980469,
"logps/rejected": -375.2087097167969,
"loss": 0.5925,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.491665780544281,
"rewards/margins": 0.341984361410141,
"rewards/rejected": -0.8336501121520996,
"step": 180
},
{
"epoch": 0.97,
"learning_rate": 1.006426501190233e-08,
"logits/chosen": -1.7825686931610107,
"logits/rejected": -1.5483803749084473,
"logps/chosen": -352.39453125,
"logps/rejected": -381.07086181640625,
"loss": 0.5833,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.5239015817642212,
"rewards/margins": 0.3186902701854706,
"rewards/rejected": -0.8425917625427246,
"step": 190
},
{
"epoch": 1.0,
"step": 195,
"total_flos": 0.0,
"train_loss": 0.6237345188091963,
"train_runtime": 8932.105,
"train_samples_per_second": 2.799,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 195,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}