|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991836734693877, |
|
"eval_steps": 100, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.8246347904205322, |
|
"logits/rejected": -2.7856249809265137, |
|
"logps/chosen": -238.8000030517578, |
|
"logps/rejected": -252.79095458984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.786532402038574, |
|
"logits/rejected": -2.722121238708496, |
|
"logps/chosen": -234.37245178222656, |
|
"logps/rejected": -252.76654052734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3854166567325592, |
|
"rewards/chosen": 0.00019371736561879516, |
|
"rewards/margins": -0.0003117284504696727, |
|
"rewards/rejected": 0.0005054458160884678, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989490450759331e-06, |
|
"logits/chosen": -2.761892318725586, |
|
"logits/rejected": -2.7504990100860596, |
|
"logps/chosen": -254.9222869873047, |
|
"logps/rejected": -267.8934326171875, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.008798873052001, |
|
"rewards/margins": 0.0012144726933911443, |
|
"rewards/rejected": 0.007584400475025177, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.872270441827174e-06, |
|
"logits/chosen": -2.7136857509613037, |
|
"logits/rejected": -2.7073874473571777, |
|
"logps/chosen": -241.10507202148438, |
|
"logps/rejected": -277.07415771484375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.036142051219940186, |
|
"rewards/margins": 0.003209862159565091, |
|
"rewards/rejected": 0.032932184636592865, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.630851211353007e-06, |
|
"logits/chosen": -2.719532012939453, |
|
"logits/rejected": -2.6920456886291504, |
|
"logps/chosen": -238.81729125976562, |
|
"logps/rejected": -257.0670471191406, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.05626382678747177, |
|
"rewards/margins": 0.008078296668827534, |
|
"rewards/rejected": 0.048185527324676514, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.277872161641682e-06, |
|
"logits/chosen": -2.707075357437134, |
|
"logits/rejected": -2.682654619216919, |
|
"logps/chosen": -214.5452117919922, |
|
"logps/rejected": -231.02877807617188, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.045605290681123734, |
|
"rewards/margins": 0.007303851656615734, |
|
"rewards/rejected": 0.038301438093185425, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.831813362428005e-06, |
|
"logits/chosen": -2.7438042163848877, |
|
"logits/rejected": -2.724104166030884, |
|
"logps/chosen": -239.53213500976562, |
|
"logps/rejected": -274.93902587890625, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.010097012855112553, |
|
"rewards/margins": 0.017007894814014435, |
|
"rewards/rejected": -0.006910882890224457, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3160280345958614e-06, |
|
"logits/chosen": -2.7218570709228516, |
|
"logits/rejected": -2.7025914192199707, |
|
"logps/chosen": -247.56350708007812, |
|
"logps/rejected": -262.78961181640625, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.01939699612557888, |
|
"rewards/margins": 0.020038722082972527, |
|
"rewards/rejected": -0.0006417257827706635, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.757519902117886e-06, |
|
"logits/chosen": -2.650240898132324, |
|
"logits/rejected": -2.6454017162323, |
|
"logps/chosen": -229.64468383789062, |
|
"logps/rejected": -238.07791137695312, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.007218700833618641, |
|
"rewards/margins": 0.023447707295417786, |
|
"rewards/rejected": -0.030666405335068703, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.185529423440807e-06, |
|
"logits/chosen": -2.719679117202759, |
|
"logits/rejected": -2.6773438453674316, |
|
"logps/chosen": -231.2781524658203, |
|
"logps/rejected": -246.88418579101562, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.028139105066657066, |
|
"rewards/margins": 0.027326997369527817, |
|
"rewards/rejected": -0.05546610429883003, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6300029195778454e-06, |
|
"logits/chosen": -2.651785373687744, |
|
"logits/rejected": -2.6500556468963623, |
|
"logps/chosen": -232.26229858398438, |
|
"logps/rejected": -263.81927490234375, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.013562011532485485, |
|
"rewards/margins": 0.03466662019491196, |
|
"rewards/rejected": -0.048228632658720016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -2.7459018230438232, |
|
"eval_logits/rejected": -2.6667211055755615, |
|
"eval_logps/chosen": -288.20477294921875, |
|
"eval_logps/rejected": -266.5933532714844, |
|
"eval_loss": 0.6749266386032104, |
|
"eval_rewards/accuracies": 0.6079999804496765, |
|
"eval_rewards/chosen": -0.04165023937821388, |
|
"eval_rewards/margins": 0.04912487417459488, |
|
"eval_rewards/rejected": -0.09077510982751846, |
|
"eval_runtime": 384.036, |
|
"eval_samples_per_second": 5.208, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1200247470632394e-06, |
|
"logits/chosen": -2.6394271850585938, |
|
"logits/rejected": -2.623563766479492, |
|
"logps/chosen": -241.2955780029297, |
|
"logps/rejected": -255.7236785888672, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.05771399661898613, |
|
"rewards/margins": 0.021994810551404953, |
|
"rewards/rejected": -0.07970880717039108, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.822945986946386e-07, |
|
"logits/chosen": -2.7274205684661865, |
|
"logits/rejected": -2.681755542755127, |
|
"logps/chosen": -255.23196411132812, |
|
"logps/rejected": -279.19464111328125, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.05603557080030441, |
|
"rewards/margins": 0.03728459030389786, |
|
"rewards/rejected": -0.09332015365362167, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.397296523427807e-07, |
|
"logits/chosen": -2.654305934906006, |
|
"logits/rejected": -2.646434783935547, |
|
"logps/chosen": -262.47210693359375, |
|
"logps/rejected": -293.465087890625, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05943986773490906, |
|
"rewards/margins": 0.05442862585186958, |
|
"rewards/rejected": -0.11386849731206894, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.102647517397798e-07, |
|
"logits/chosen": -2.711536407470703, |
|
"logits/rejected": -2.693706512451172, |
|
"logps/chosen": -233.05447387695312, |
|
"logps/rejected": -257.82183837890625, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06369104981422424, |
|
"rewards/margins": 0.037472162395715714, |
|
"rewards/rejected": -0.10116322338581085, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.9134352763748345e-09, |
|
"logits/chosen": -2.659524440765381, |
|
"logits/rejected": -2.654564380645752, |
|
"logps/chosen": -237.49960327148438, |
|
"logps/rejected": -260.6737365722656, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.05076870322227478, |
|
"rewards/margins": 0.045312874019145966, |
|
"rewards/rejected": -0.09608156979084015, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6821690501730426, |
|
"train_runtime": 6321.1061, |
|
"train_samples_per_second": 3.101, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|