|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 1000, |
|
"global_step": 125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4128397206827493, |
|
"learning_rate": 3.846153846153847e-07, |
|
"logits/chosen": -1.73323655128479, |
|
"logits/rejected": -1.963712453842163, |
|
"logps/chosen": -64.71795654296875, |
|
"logps/rejected": -92.56527709960938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.35034784903451766, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.7505415678024292, |
|
"logits/rejected": -1.915618658065796, |
|
"logps/chosen": -63.550048828125, |
|
"logps/rejected": -88.24057006835938, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": -0.0015819871332496405, |
|
"rewards/margins": 0.0029085720889270306, |
|
"rewards/rejected": -0.004490559455007315, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.3741999171763754, |
|
"learning_rate": 4.9519632010080765e-06, |
|
"logits/chosen": -1.7421376705169678, |
|
"logits/rejected": -1.9737510681152344, |
|
"logps/chosen": -64.17735290527344, |
|
"logps/rejected": -103.7110366821289, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.03122868202626705, |
|
"rewards/margins": 0.09635920822620392, |
|
"rewards/rejected": -0.12758789956569672, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.21399782118300334, |
|
"learning_rate": 4.721114089947181e-06, |
|
"logits/chosen": -1.7182533740997314, |
|
"logits/rejected": -1.9536464214324951, |
|
"logps/chosen": -72.61327362060547, |
|
"logps/rejected": -138.14871215820312, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.10964199155569077, |
|
"rewards/margins": 0.36830809712409973, |
|
"rewards/rejected": -0.4779500961303711, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.20675592706765678, |
|
"learning_rate": 4.316650805085068e-06, |
|
"logits/chosen": -1.6047031879425049, |
|
"logits/rejected": -1.9040508270263672, |
|
"logps/chosen": -70.1319580078125, |
|
"logps/rejected": -172.32015991210938, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.12685254216194153, |
|
"rewards/margins": 0.6549729108810425, |
|
"rewards/rejected": -0.7818254828453064, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.32081236636733307, |
|
"learning_rate": 3.770188363116324e-06, |
|
"logits/chosen": -1.6701993942260742, |
|
"logits/rejected": -1.8626493215560913, |
|
"logps/chosen": -84.17461395263672, |
|
"logps/rejected": -167.61410522460938, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.21969938278198242, |
|
"rewards/margins": 0.575901985168457, |
|
"rewards/rejected": -0.7956013679504395, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.38597827976301535, |
|
"learning_rate": 3.1244411954180677e-06, |
|
"logits/chosen": -1.5896217823028564, |
|
"logits/rejected": -1.8747373819351196, |
|
"logps/chosen": -67.0400161743164, |
|
"logps/rejected": -191.59365844726562, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.08698524534702301, |
|
"rewards/margins": 0.882093071937561, |
|
"rewards/rejected": -0.9690783619880676, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.4702645263366345, |
|
"learning_rate": 2.429884359310328e-06, |
|
"logits/chosen": -1.572919249534607, |
|
"logits/rejected": -1.7691805362701416, |
|
"logps/chosen": -75.44322204589844, |
|
"logps/rejected": -183.8511962890625, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12143620103597641, |
|
"rewards/margins": 0.8210474252700806, |
|
"rewards/rejected": -0.9424835443496704, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.35995082384074073, |
|
"learning_rate": 1.7408081372259633e-06, |
|
"logits/chosen": -1.540151834487915, |
|
"logits/rejected": -1.7591164112091064, |
|
"logps/chosen": -64.67158508300781, |
|
"logps/rejected": -208.8355712890625, |
|
"loss": 0.3772, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.042745210230350494, |
|
"rewards/margins": 1.1398416757583618, |
|
"rewards/rejected": -1.182586908340454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.3349159272932947, |
|
"learning_rate": 1.1110744174509952e-06, |
|
"logits/chosen": -1.5150226354599, |
|
"logits/rejected": -1.7239547967910767, |
|
"logps/chosen": -65.3616714477539, |
|
"logps/rejected": -226.32937622070312, |
|
"loss": 0.3725, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.033232785761356354, |
|
"rewards/margins": 1.3398171663284302, |
|
"rewards/rejected": -1.3730499744415283, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.3777138278726997, |
|
"learning_rate": 5.899065604459814e-07, |
|
"logits/chosen": -1.5114853382110596, |
|
"logits/rejected": -1.7169097661972046, |
|
"logps/chosen": -61.861358642578125, |
|
"logps/rejected": -252.6329345703125, |
|
"loss": 0.3478, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.0013225942384451628, |
|
"rewards/margins": 1.6115810871124268, |
|
"rewards/rejected": -1.6102584600448608, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.31092930022093734, |
|
"learning_rate": 2.1804183734670277e-07, |
|
"logits/chosen": -1.5211278200149536, |
|
"logits/rejected": -1.7239795923233032, |
|
"logps/chosen": -63.83014678955078, |
|
"logps/rejected": -241.92819213867188, |
|
"loss": 0.3522, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.014286426827311516, |
|
"rewards/margins": 1.5209068059921265, |
|
"rewards/rejected": -1.5351933240890503, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.336920496075524, |
|
"learning_rate": 2.454718665888589e-08, |
|
"logits/chosen": -1.4646048545837402, |
|
"logits/rejected": -1.6882946491241455, |
|
"logps/chosen": -61.223609924316406, |
|
"logps/rejected": -262.6458435058594, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.0028733056969940662, |
|
"rewards/margins": 1.7208553552627563, |
|
"rewards/rejected": -1.7237287759780884, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 125, |
|
"total_flos": 0.0, |
|
"train_loss": 0.46972908115386963, |
|
"train_runtime": 2062.102, |
|
"train_samples_per_second": 3.88, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|