|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 1000, |
|
"global_step": 125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.490319312063127, |
|
"learning_rate": 7.692307692307694e-07, |
|
"logits/chosen": -1.73323655128479, |
|
"logits/rejected": -1.963712453842163, |
|
"logps/chosen": -64.71795654296875, |
|
"logps/rejected": -92.56527709960938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.4753255673765837, |
|
"learning_rate": 7.692307692307694e-06, |
|
"logits/chosen": -1.6273242235183716, |
|
"logits/rejected": -1.7998943328857422, |
|
"logps/chosen": -78.1596450805664, |
|
"logps/rejected": -136.78704833984375, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14767809212207794, |
|
"rewards/margins": 0.3422772288322449, |
|
"rewards/rejected": -0.48995527625083923, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4922013042570934, |
|
"learning_rate": 9.903926402016153e-06, |
|
"logits/chosen": -1.132021188735962, |
|
"logits/rejected": -1.4362722635269165, |
|
"logps/chosen": -60.54664993286133, |
|
"logps/rejected": -335.38629150390625, |
|
"loss": 0.3069, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.005078454967588186, |
|
"rewards/margins": 2.449418783187866, |
|
"rewards/rejected": -2.444340467453003, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.08650022357203001, |
|
"learning_rate": 9.442228179894362e-06, |
|
"logits/chosen": -1.4244499206542969, |
|
"logits/rejected": -1.7115017175674438, |
|
"logps/chosen": -159.7740020751953, |
|
"logps/rejected": -776.4279174804688, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.981249213218689, |
|
"rewards/margins": 5.87949275970459, |
|
"rewards/rejected": -6.86074161529541, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.6071478933413408, |
|
"learning_rate": 8.633301610170136e-06, |
|
"logits/chosen": -1.8324272632598877, |
|
"logits/rejected": -2.274733781814575, |
|
"logps/chosen": -173.323486328125, |
|
"logps/rejected": -1064.7208251953125, |
|
"loss": 0.017, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.1587677001953125, |
|
"rewards/margins": 8.547063827514648, |
|
"rewards/rejected": -9.705831527709961, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 10.37379661402861, |
|
"learning_rate": 7.540376726232648e-06, |
|
"logits/chosen": -2.072046995162964, |
|
"logits/rejected": -2.5416159629821777, |
|
"logps/chosen": -201.16822814941406, |
|
"logps/rejected": -1270.976806640625, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.3896355628967285, |
|
"rewards/margins": 10.439592361450195, |
|
"rewards/rejected": -11.829228401184082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.08724694925694036, |
|
"learning_rate": 6.248882390836135e-06, |
|
"logits/chosen": -1.5749847888946533, |
|
"logits/rejected": -2.108443021774292, |
|
"logps/chosen": -206.0604248046875, |
|
"logps/rejected": -1066.5167236328125, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.477189302444458, |
|
"rewards/margins": 8.241119384765625, |
|
"rewards/rejected": -9.718308448791504, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.5412553598400243, |
|
"learning_rate": 4.859768718620656e-06, |
|
"logits/chosen": -1.5107686519622803, |
|
"logits/rejected": -2.1837661266326904, |
|
"logps/chosen": -248.97695922851562, |
|
"logps/rejected": -1404.155517578125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8567733764648438, |
|
"rewards/margins": 11.288753509521484, |
|
"rewards/rejected": -13.145527839660645, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.008991447421855448, |
|
"learning_rate": 3.4816162744519266e-06, |
|
"logits/chosen": -1.091412901878357, |
|
"logits/rejected": -1.767606496810913, |
|
"logps/chosen": -267.06280517578125, |
|
"logps/rejected": -1556.9952392578125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.066657066345215, |
|
"rewards/margins": 12.597526550292969, |
|
"rewards/rejected": -14.6641845703125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.07467711604243485, |
|
"learning_rate": 2.2221488349019903e-06, |
|
"logits/chosen": -1.1434813737869263, |
|
"logits/rejected": -1.8074337244033813, |
|
"logps/chosen": -272.1551513671875, |
|
"logps/rejected": -1601.199951171875, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.1011674404144287, |
|
"rewards/margins": 13.020586013793945, |
|
"rewards/rejected": -15.121752738952637, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.002962899425955675, |
|
"learning_rate": 1.1798131208919628e-06, |
|
"logits/chosen": -1.2595702409744263, |
|
"logits/rejected": -1.8675899505615234, |
|
"logps/chosen": -248.87960815429688, |
|
"logps/rejected": -1562.575927734375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.868859887123108, |
|
"rewards/margins": 12.840827941894531, |
|
"rewards/rejected": -14.709688186645508, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.0049241554457892705, |
|
"learning_rate": 4.3608367469340553e-07, |
|
"logits/chosen": -1.464623212814331, |
|
"logits/rejected": -2.089043378829956, |
|
"logps/chosen": -271.04827880859375, |
|
"logps/rejected": -1640.91796875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.086467742919922, |
|
"rewards/margins": 13.438623428344727, |
|
"rewards/rejected": -15.525090217590332, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.016229074824409925, |
|
"learning_rate": 4.909437331777178e-08, |
|
"logits/chosen": -1.3727686405181885, |
|
"logits/rejected": -2.019606351852417, |
|
"logps/chosen": -249.13632202148438, |
|
"logps/rejected": -1610.74951171875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8820005655288696, |
|
"rewards/margins": 13.32276439666748, |
|
"rewards/rejected": -15.204765319824219, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 125, |
|
"total_flos": 0.0, |
|
"train_loss": 0.07995766662200912, |
|
"train_runtime": 9770.964, |
|
"train_samples_per_second": 0.819, |
|
"train_steps_per_second": 0.013 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|