|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992429977289932, |
|
"eval_steps": 100, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"logits/chosen": -2.62508487701416, |
|
"logits/rejected": -2.638840436935425, |
|
"logps/chosen": -313.21063232421875, |
|
"logps/rejected": -286.36663818359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -2.7004079818725586, |
|
"logits/rejected": -2.6217572689056396, |
|
"logps/chosen": -292.9493408203125, |
|
"logps/rejected": -278.7856140136719, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.0015960136661306024, |
|
"rewards/margins": 0.0010866459924727678, |
|
"rewards/rejected": 0.0005093678482808173, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994932636402032e-06, |
|
"logits/chosen": -2.690582752227783, |
|
"logits/rejected": -2.671006917953491, |
|
"logps/chosen": -273.6416931152344, |
|
"logps/rejected": -290.06622314453125, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04393266513943672, |
|
"rewards/margins": 0.014766323380172253, |
|
"rewards/rejected": 0.029166344553232193, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.905416503522124e-06, |
|
"logits/chosen": -2.6617255210876465, |
|
"logits/rejected": -2.585472345352173, |
|
"logps/chosen": -288.24456787109375, |
|
"logps/rejected": -275.30908203125, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.08098463714122772, |
|
"rewards/margins": 0.06556984782218933, |
|
"rewards/rejected": 0.015414801426231861, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707922373336524e-06, |
|
"logits/chosen": -2.5689034461975098, |
|
"logits/rejected": -2.5172557830810547, |
|
"logps/chosen": -297.8088684082031, |
|
"logps/rejected": -299.01019287109375, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.03176301717758179, |
|
"rewards/margins": 0.09769946336746216, |
|
"rewards/rejected": -0.12946248054504395, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.411315662967732e-06, |
|
"logits/chosen": -2.543713331222534, |
|
"logits/rejected": -2.471020221710205, |
|
"logps/chosen": -278.70068359375, |
|
"logps/rejected": -281.05767822265625, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.058358293026685715, |
|
"rewards/margins": 0.13286305963993073, |
|
"rewards/rejected": -0.07450475543737411, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.028910905897229e-06, |
|
"logits/chosen": -2.5148937702178955, |
|
"logits/rejected": -2.403398036956787, |
|
"logps/chosen": -313.97503662109375, |
|
"logps/rejected": -300.5794677734375, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1467473804950714, |
|
"rewards/margins": 0.1627379208803177, |
|
"rewards/rejected": -0.3094852566719055, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.577874068920446e-06, |
|
"logits/chosen": -2.4615416526794434, |
|
"logits/rejected": -2.3834948539733887, |
|
"logps/chosen": -288.96875, |
|
"logps/rejected": -298.4138488769531, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0141445966437459, |
|
"rewards/margins": 0.17550477385520935, |
|
"rewards/rejected": -0.16136017441749573, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0784519801008546e-06, |
|
"logits/chosen": -2.386679172515869, |
|
"logits/rejected": -2.308007001876831, |
|
"logps/chosen": -303.1172180175781, |
|
"logps/rejected": -317.23577880859375, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.03143765777349472, |
|
"rewards/margins": 0.20620755851268768, |
|
"rewards/rejected": -0.237645223736763, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.553063458334059e-06, |
|
"logits/chosen": -2.4485552310943604, |
|
"logits/rejected": -2.3585047721862793, |
|
"logps/chosen": -294.64202880859375, |
|
"logps/rejected": -314.85906982421875, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.18389078974723816, |
|
"rewards/margins": 0.19124503433704376, |
|
"rewards/rejected": -0.3751358091831207, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.025292943281429e-06, |
|
"logits/chosen": -2.4612982273101807, |
|
"logits/rejected": -2.3962552547454834, |
|
"logps/chosen": -300.9443359375, |
|
"logps/rejected": -299.62554931640625, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.03618014603853226, |
|
"rewards/margins": 0.2104295939207077, |
|
"rewards/rejected": -0.24660976231098175, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -2.440356492996216, |
|
"eval_logits/rejected": -2.3331212997436523, |
|
"eval_logps/chosen": -314.80450439453125, |
|
"eval_logps/rejected": -316.8028259277344, |
|
"eval_loss": 0.6047022938728333, |
|
"eval_rewards/accuracies": 0.6980000138282776, |
|
"eval_rewards/chosen": -0.14116904139518738, |
|
"eval_rewards/margins": 0.23404958844184875, |
|
"eval_rewards/rejected": -0.37521862983703613, |
|
"eval_runtime": 384.1798, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5188318011445907e-06, |
|
"logits/chosen": -2.4451894760131836, |
|
"logits/rejected": -2.3738484382629395, |
|
"logps/chosen": -297.38006591796875, |
|
"logps/rejected": -310.2391662597656, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.10568475723266602, |
|
"rewards/margins": 0.23291108012199402, |
|
"rewards/rejected": -0.33859583735466003, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0564148305586296e-06, |
|
"logits/chosen": -2.5074470043182373, |
|
"logits/rejected": -2.3541178703308105, |
|
"logps/chosen": -313.4942932128906, |
|
"logps/rejected": -304.71240234375, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.05554385855793953, |
|
"rewards/margins": 0.2575618028640747, |
|
"rewards/rejected": -0.31310564279556274, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.587997083462197e-07, |
|
"logits/chosen": -2.472149133682251, |
|
"logits/rejected": -2.410820960998535, |
|
"logps/chosen": -306.9402770996094, |
|
"logps/rejected": -340.479736328125, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10773856937885284, |
|
"rewards/margins": 0.24569562077522278, |
|
"rewards/rejected": -0.35343414545059204, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.438351873250492e-07, |
|
"logits/chosen": -2.4470582008361816, |
|
"logits/rejected": -2.354292392730713, |
|
"logps/chosen": -300.5553283691406, |
|
"logps/rejected": -334.2596435546875, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.13135434687137604, |
|
"rewards/margins": 0.2554669976234436, |
|
"rewards/rejected": -0.38682132959365845, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2565987432367032e-07, |
|
"logits/chosen": -2.466301679611206, |
|
"logits/rejected": -2.3889572620391846, |
|
"logps/chosen": -304.37078857421875, |
|
"logps/rejected": -318.76507568359375, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.13493719696998596, |
|
"rewards/margins": 0.24466891586780548, |
|
"rewards/rejected": -0.379606157541275, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4067554877743861e-08, |
|
"logits/chosen": -2.437718152999878, |
|
"logits/rejected": -2.32914662361145, |
|
"logps/chosen": -297.7317810058594, |
|
"logps/rejected": -310.4150085449219, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.10253523290157318, |
|
"rewards/margins": 0.2659505009651184, |
|
"rewards/rejected": -0.3684857487678528, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6320372126319191, |
|
"train_runtime": 7509.7506, |
|
"train_samples_per_second": 2.814, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|