|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.89351749420166, |
|
"logits/rejected": -2.7752203941345215, |
|
"logps/chosen": -345.7324523925781, |
|
"logps/rejected": -319.42047119140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.8028833866119385, |
|
"logits/rejected": -2.7471988201141357, |
|
"logps/chosen": -255.036865234375, |
|
"logps/rejected": -252.82679748535156, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.0003066221543122083, |
|
"rewards/margins": 0.0006635435856878757, |
|
"rewards/rejected": -0.0003569214604794979, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.7710824012756348, |
|
"logits/rejected": -2.7166454792022705, |
|
"logps/chosen": -277.1798400878906, |
|
"logps/rejected": -256.997802734375, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.012738336808979511, |
|
"rewards/margins": 0.008854442276060581, |
|
"rewards/rejected": 0.00388389453291893, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -2.7218027114868164, |
|
"logits/rejected": -2.658277750015259, |
|
"logps/chosen": -274.3503112792969, |
|
"logps/rejected": -246.41128540039062, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.03419749438762665, |
|
"rewards/margins": 0.027278240770101547, |
|
"rewards/rejected": 0.006919251289218664, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.7631821632385254, |
|
"logits/rejected": -2.6550638675689697, |
|
"logps/chosen": -268.45220947265625, |
|
"logps/rejected": -251.58743286132812, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.03486743941903114, |
|
"rewards/margins": 0.05773182958364487, |
|
"rewards/rejected": -0.022864393889904022, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -2.676445960998535, |
|
"logits/rejected": -2.62298321723938, |
|
"logps/chosen": -274.287841796875, |
|
"logps/rejected": -281.6821594238281, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.003023784141987562, |
|
"rewards/margins": 0.07856948673725128, |
|
"rewards/rejected": -0.08159326761960983, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.687849521636963, |
|
"logits/rejected": -2.6154909133911133, |
|
"logps/chosen": -287.0888366699219, |
|
"logps/rejected": -274.48822021484375, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.03279999643564224, |
|
"rewards/margins": 0.11418493092060089, |
|
"rewards/rejected": -0.14698493480682373, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -2.6265475749969482, |
|
"logits/rejected": -2.570312976837158, |
|
"logps/chosen": -304.10125732421875, |
|
"logps/rejected": -292.9505310058594, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.033603884279727936, |
|
"rewards/margins": 0.12847770750522614, |
|
"rewards/rejected": -0.16208159923553467, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.6333694458007812, |
|
"logits/rejected": -2.5370612144470215, |
|
"logps/chosen": -302.35736083984375, |
|
"logps/rejected": -276.11175537109375, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.04339645802974701, |
|
"rewards/margins": 0.14248773455619812, |
|
"rewards/rejected": -0.18588420748710632, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -2.584850311279297, |
|
"logits/rejected": -2.5956850051879883, |
|
"logps/chosen": -292.6184997558594, |
|
"logps/rejected": -311.091796875, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09801232814788818, |
|
"rewards/margins": 0.17943526804447174, |
|
"rewards/rejected": -0.27744758129119873, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.6319050788879395, |
|
"logits/rejected": -2.5922951698303223, |
|
"logps/chosen": -290.19976806640625, |
|
"logps/rejected": -273.79815673828125, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.1279720962047577, |
|
"rewards/margins": 0.16270975768566132, |
|
"rewards/rejected": -0.2906818985939026, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.6261305809020996, |
|
"eval_logits/rejected": -2.541619062423706, |
|
"eval_logps/chosen": -295.3544006347656, |
|
"eval_logps/rejected": -286.08172607421875, |
|
"eval_loss": 0.6284892559051514, |
|
"eval_rewards/accuracies": 0.699999988079071, |
|
"eval_rewards/chosen": -0.11314628273248672, |
|
"eval_rewards/margins": 0.1725120097398758, |
|
"eval_rewards/rejected": -0.2856582701206207, |
|
"eval_runtime": 384.1378, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -2.6338038444519043, |
|
"logits/rejected": -2.5288445949554443, |
|
"logps/chosen": -289.0359802246094, |
|
"logps/rejected": -275.2894287109375, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.06765095144510269, |
|
"rewards/margins": 0.20255950093269348, |
|
"rewards/rejected": -0.2702104449272156, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -2.552917003631592, |
|
"logits/rejected": -2.550574779510498, |
|
"logps/chosen": -274.2412109375, |
|
"logps/rejected": -288.6865539550781, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10379727929830551, |
|
"rewards/margins": 0.1611437350511551, |
|
"rewards/rejected": -0.2649410367012024, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -2.611356735229492, |
|
"logits/rejected": -2.5477137565612793, |
|
"logps/chosen": -291.66290283203125, |
|
"logps/rejected": -301.75537109375, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.11176357418298721, |
|
"rewards/margins": 0.20609335601329803, |
|
"rewards/rejected": -0.31785690784454346, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -2.6064445972442627, |
|
"logits/rejected": -2.479429244995117, |
|
"logps/chosen": -313.6204833984375, |
|
"logps/rejected": -287.5814514160156, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.09765791893005371, |
|
"rewards/margins": 0.21785131096839905, |
|
"rewards/rejected": -0.31550922989845276, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -2.58495831489563, |
|
"logits/rejected": -2.4780099391937256, |
|
"logps/chosen": -300.6700134277344, |
|
"logps/rejected": -281.14935302734375, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.11531106382608414, |
|
"rewards/margins": 0.22770515084266663, |
|
"rewards/rejected": -0.34301620721817017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6446946973984058, |
|
"train_runtime": 7192.2445, |
|
"train_samples_per_second": 2.781, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|