Qin Liu
Model save
3006891 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 1000,
"global_step": 125,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 0.4128397206827493,
"learning_rate": 3.846153846153847e-07,
"logits/chosen": -1.73323655128479,
"logits/rejected": -1.963712453842163,
"logps/chosen": -64.71795654296875,
"logps/rejected": -92.56527709960938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.4,
"grad_norm": 0.35034784903451766,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -1.7505415678024292,
"logits/rejected": -1.915618658065796,
"logps/chosen": -63.550048828125,
"logps/rejected": -88.24057006835938,
"loss": 0.6913,
"rewards/accuracies": 0.5555555820465088,
"rewards/chosen": -0.0015819871332496405,
"rewards/margins": 0.0029085720889270306,
"rewards/rejected": -0.004490559455007315,
"step": 10
},
{
"epoch": 0.8,
"grad_norm": 0.3741999171763754,
"learning_rate": 4.9519632010080765e-06,
"logits/chosen": -1.7421376705169678,
"logits/rejected": -1.9737510681152344,
"logps/chosen": -64.17735290527344,
"logps/rejected": -103.7110366821289,
"loss": 0.6532,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.03122868202626705,
"rewards/margins": 0.09635920822620392,
"rewards/rejected": -0.12758789956569672,
"step": 20
},
{
"epoch": 1.2,
"grad_norm": 0.21399782118300334,
"learning_rate": 4.721114089947181e-06,
"logits/chosen": -1.7182533740997314,
"logits/rejected": -1.9536464214324951,
"logps/chosen": -72.61327362060547,
"logps/rejected": -138.14871215820312,
"loss": 0.5782,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.10964199155569077,
"rewards/margins": 0.36830809712409973,
"rewards/rejected": -0.4779500961303711,
"step": 30
},
{
"epoch": 1.6,
"grad_norm": 0.20675592706765678,
"learning_rate": 4.316650805085068e-06,
"logits/chosen": -1.6047031879425049,
"logits/rejected": -1.9040508270263672,
"logps/chosen": -70.1319580078125,
"logps/rejected": -172.32015991210938,
"loss": 0.5474,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.12685254216194153,
"rewards/margins": 0.6549729108810425,
"rewards/rejected": -0.7818254828453064,
"step": 40
},
{
"epoch": 2.0,
"grad_norm": 0.32081236636733307,
"learning_rate": 3.770188363116324e-06,
"logits/chosen": -1.6701993942260742,
"logits/rejected": -1.8626493215560913,
"logps/chosen": -84.17461395263672,
"logps/rejected": -167.61410522460938,
"loss": 0.5244,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.21969938278198242,
"rewards/margins": 0.575901985168457,
"rewards/rejected": -0.7956013679504395,
"step": 50
},
{
"epoch": 2.4,
"grad_norm": 0.38597827976301535,
"learning_rate": 3.1244411954180677e-06,
"logits/chosen": -1.5896217823028564,
"logits/rejected": -1.8747373819351196,
"logps/chosen": -67.0400161743164,
"logps/rejected": -191.59365844726562,
"loss": 0.4726,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -0.08698524534702301,
"rewards/margins": 0.882093071937561,
"rewards/rejected": -0.9690783619880676,
"step": 60
},
{
"epoch": 2.8,
"grad_norm": 0.4702645263366345,
"learning_rate": 2.429884359310328e-06,
"logits/chosen": -1.572919249534607,
"logits/rejected": -1.7691805362701416,
"logps/chosen": -75.44322204589844,
"logps/rejected": -183.8511962890625,
"loss": 0.4811,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.12143620103597641,
"rewards/margins": 0.8210474252700806,
"rewards/rejected": -0.9424835443496704,
"step": 70
},
{
"epoch": 3.2,
"grad_norm": 0.35995082384074073,
"learning_rate": 1.7408081372259633e-06,
"logits/chosen": -1.540151834487915,
"logits/rejected": -1.7591164112091064,
"logps/chosen": -64.67158508300781,
"logps/rejected": -208.8355712890625,
"loss": 0.3772,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.042745210230350494,
"rewards/margins": 1.1398416757583618,
"rewards/rejected": -1.182586908340454,
"step": 80
},
{
"epoch": 3.6,
"grad_norm": 0.3349159272932947,
"learning_rate": 1.1110744174509952e-06,
"logits/chosen": -1.5150226354599,
"logits/rejected": -1.7239547967910767,
"logps/chosen": -65.3616714477539,
"logps/rejected": -226.32937622070312,
"loss": 0.3725,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.033232785761356354,
"rewards/margins": 1.3398171663284302,
"rewards/rejected": -1.3730499744415283,
"step": 90
},
{
"epoch": 4.0,
"grad_norm": 0.3777138278726997,
"learning_rate": 5.899065604459814e-07,
"logits/chosen": -1.5114853382110596,
"logits/rejected": -1.7169097661972046,
"logps/chosen": -61.861358642578125,
"logps/rejected": -252.6329345703125,
"loss": 0.3478,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.0013225942384451628,
"rewards/margins": 1.6115810871124268,
"rewards/rejected": -1.6102584600448608,
"step": 100
},
{
"epoch": 4.4,
"grad_norm": 0.31092930022093734,
"learning_rate": 2.1804183734670277e-07,
"logits/chosen": -1.5211278200149536,
"logits/rejected": -1.7239795923233032,
"logps/chosen": -63.83014678955078,
"logps/rejected": -241.92819213867188,
"loss": 0.3522,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.014286426827311516,
"rewards/margins": 1.5209068059921265,
"rewards/rejected": -1.5351933240890503,
"step": 110
},
{
"epoch": 4.8,
"grad_norm": 0.336920496075524,
"learning_rate": 2.454718665888589e-08,
"logits/chosen": -1.4646048545837402,
"logits/rejected": -1.6882946491241455,
"logps/chosen": -61.223609924316406,
"logps/rejected": -262.6458435058594,
"loss": 0.3387,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.0028733056969940662,
"rewards/margins": 1.7208553552627563,
"rewards/rejected": -1.7237287759780884,
"step": 120
},
{
"epoch": 5.0,
"step": 125,
"total_flos": 0.0,
"train_loss": 0.46972908115386963,
"train_runtime": 2062.102,
"train_samples_per_second": 3.88,
"train_steps_per_second": 0.061
}
],
"logging_steps": 10,
"max_steps": 125,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}