|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998324958123953, |
|
"eval_steps": 100, |
|
"global_step": 149, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"logits/chosen": -2.55656099319458, |
|
"logits/rejected": -2.55704402923584, |
|
"logps/chosen": -162.36532592773438, |
|
"logps/rejected": -172.43312072753906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -2.666642189025879, |
|
"logits/rejected": -2.6329586505889893, |
|
"logps/chosen": -185.80641174316406, |
|
"logps/rejected": -184.18959045410156, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4548611044883728, |
|
"rewards/chosen": -0.001971459249034524, |
|
"rewards/margins": 0.0016002749325707555, |
|
"rewards/rejected": -0.003571733832359314, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.982842942906386e-06, |
|
"logits/chosen": -2.721466541290283, |
|
"logits/rejected": -2.67402982711792, |
|
"logps/chosen": -201.33470153808594, |
|
"logps/rejected": -194.63198852539062, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.10967914760112762, |
|
"rewards/margins": 0.0006104880012571812, |
|
"rewards/rejected": -0.11028961837291718, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.846996204000967e-06, |
|
"logits/chosen": -2.625549077987671, |
|
"logits/rejected": -2.607099771499634, |
|
"logps/chosen": -197.00973510742188, |
|
"logps/rejected": -195.684326171875, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -0.08795476704835892, |
|
"rewards/margins": -0.002985857194289565, |
|
"rewards/rejected": -0.08496890217065811, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.582735470385229e-06, |
|
"logits/chosen": -2.633084535598755, |
|
"logits/rejected": -2.634359836578369, |
|
"logps/chosen": -185.71023559570312, |
|
"logps/rejected": -185.03509521484375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.04532230272889137, |
|
"rewards/margins": 0.0011206632480025291, |
|
"rewards/rejected": -0.046442966908216476, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.204519553876095e-06, |
|
"logits/chosen": -2.6109485626220703, |
|
"logits/rejected": -2.6062893867492676, |
|
"logps/chosen": -171.55734252929688, |
|
"logps/rejected": -173.1212158203125, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.014199512079358101, |
|
"rewards/margins": 0.0028473488055169582, |
|
"rewards/rejected": -0.017046859487891197, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7330422317447686e-06, |
|
"logits/chosen": -2.6457674503326416, |
|
"logits/rejected": -2.6292178630828857, |
|
"logps/chosen": -188.72109985351562, |
|
"logps/rejected": -184.30081176757812, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.00934157706797123, |
|
"rewards/margins": 0.002996337367221713, |
|
"rewards/rejected": -0.012337915599346161, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1941000034687516e-06, |
|
"logits/chosen": -2.6249117851257324, |
|
"logits/rejected": -2.611786365509033, |
|
"logps/chosen": -170.21836853027344, |
|
"logps/rejected": -177.2921142578125, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.03585924953222275, |
|
"rewards/margins": 0.0028343182057142258, |
|
"rewards/rejected": -0.03869356960058212, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6171806561748503e-06, |
|
"logits/chosen": -2.5946967601776123, |
|
"logits/rejected": -2.571646213531494, |
|
"logps/chosen": -184.07911682128906, |
|
"logps/rejected": -189.4521942138672, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.019358564168214798, |
|
"rewards/margins": 0.011903460137546062, |
|
"rewards/rejected": -0.031262025237083435, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0338498642707977e-06, |
|
"logits/chosen": -2.629333972930908, |
|
"logits/rejected": -2.6070916652679443, |
|
"logps/chosen": -183.12803649902344, |
|
"logps/rejected": -187.22447204589844, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.0804746001958847, |
|
"rewards/margins": 0.01135367900133133, |
|
"rewards/rejected": -0.09182827174663544, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4760240991587338e-06, |
|
"logits/chosen": -2.5863940715789795, |
|
"logits/rejected": -2.5969200134277344, |
|
"logps/chosen": -172.47142028808594, |
|
"logps/rejected": -180.51638793945312, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.05926589295268059, |
|
"rewards/margins": 0.011102231219410896, |
|
"rewards/rejected": -0.07036812603473663, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -2.5383808612823486, |
|
"eval_logits/rejected": -2.4414656162261963, |
|
"eval_logps/chosen": -307.9212646484375, |
|
"eval_logps/rejected": -301.2935485839844, |
|
"eval_loss": 0.6852481365203857, |
|
"eval_rewards/accuracies": 0.5659999847412109, |
|
"eval_rewards/chosen": -0.0951852798461914, |
|
"eval_rewards/margins": 0.019401030614972115, |
|
"eval_rewards/rejected": -0.11458631604909897, |
|
"eval_runtime": 382.181, |
|
"eval_samples_per_second": 5.233, |
|
"eval_steps_per_second": 0.654, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.742243453755202e-07, |
|
"logits/chosen": -2.612968683242798, |
|
"logits/rejected": -2.6128811836242676, |
|
"logps/chosen": -176.701416015625, |
|
"logps/rejected": -186.86849975585938, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.027405280619859695, |
|
"rewards/margins": 0.016830626875162125, |
|
"rewards/rejected": -0.04423590749502182, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.559061696656199e-07, |
|
"logits/chosen": -2.585603713989258, |
|
"logits/rejected": -2.5767314434051514, |
|
"logps/chosen": -184.33139038085938, |
|
"logps/rejected": -189.44131469726562, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.03763353452086449, |
|
"rewards/margins": 0.025313779711723328, |
|
"rewards/rejected": -0.06294731795787811, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4395751190352924e-07, |
|
"logits/chosen": -2.5703272819519043, |
|
"logits/rejected": -2.5677199363708496, |
|
"logps/chosen": -184.06341552734375, |
|
"logps/rejected": -188.58779907226562, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.06450501084327698, |
|
"rewards/margins": 0.007478479295969009, |
|
"rewards/rejected": -0.07198350131511688, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.544639001763719e-08, |
|
"logits/chosen": -2.6024391651153564, |
|
"logits/rejected": -2.5870203971862793, |
|
"logps/chosen": -199.04031372070312, |
|
"logps/rejected": -197.60330200195312, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.06172681599855423, |
|
"rewards/margins": 0.017358267679810524, |
|
"rewards/rejected": -0.0790850818157196, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 149, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6901595840518107, |
|
"train_runtime": 6945.5042, |
|
"train_samples_per_second": 2.75, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 149, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|