Minbyul's picture
Model save
8d9775b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 122,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 36.349710133839,
"learning_rate": 3.846153846153846e-08,
"logits/chosen": -3.5315005779266357,
"logits/rejected": -3.440955638885498,
"logps/chosen": -912.1570434570312,
"logps/rejected": -1378.036376953125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.08,
"grad_norm": 41.12013014177843,
"learning_rate": 3.8461538461538463e-07,
"logits/chosen": -3.55020809173584,
"logits/rejected": -3.4724764823913574,
"logps/chosen": -894.586181640625,
"logps/rejected": -1449.21484375,
"loss": 0.6888,
"rewards/accuracies": 0.5069444179534912,
"rewards/chosen": 0.0002599477011244744,
"rewards/margins": 0.008236742578446865,
"rewards/rejected": -0.007976794615387917,
"step": 10
},
{
"epoch": 0.16,
"grad_norm": 29.704748896674143,
"learning_rate": 4.949291683053768e-07,
"logits/chosen": -3.5908989906311035,
"logits/rejected": -3.5577595233917236,
"logps/chosen": -912.923828125,
"logps/rejected": -1338.394775390625,
"loss": 0.5791,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.04868435114622116,
"rewards/margins": 0.28125035762786865,
"rewards/rejected": -0.2325659692287445,
"step": 20
},
{
"epoch": 0.25,
"grad_norm": 26.273281316637295,
"learning_rate": 4.70586371748506e-07,
"logits/chosen": -3.810521364212036,
"logits/rejected": -3.7334792613983154,
"logps/chosen": -955.4530029296875,
"logps/rejected": -1488.5167236328125,
"loss": 0.366,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.09059515595436096,
"rewards/margins": 1.322347640991211,
"rewards/rejected": -1.412942886352539,
"step": 30
},
{
"epoch": 0.33,
"grad_norm": 43.496160834466956,
"learning_rate": 4.280458575653296e-07,
"logits/chosen": -3.996204376220703,
"logits/rejected": -3.956129789352417,
"logps/chosen": -989.1363525390625,
"logps/rejected": -1658.587158203125,
"loss": 0.3256,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.5707166194915771,
"rewards/margins": 2.6343164443969727,
"rewards/rejected": -3.20503306388855,
"step": 40
},
{
"epoch": 0.41,
"grad_norm": 44.2304132089698,
"learning_rate": 3.7081709127108767e-07,
"logits/chosen": -4.008645057678223,
"logits/rejected": -3.9912617206573486,
"logps/chosen": -1022.1027221679688,
"logps/rejected": -1825.446533203125,
"loss": 0.1878,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.7217316031455994,
"rewards/margins": 3.8372483253479004,
"rewards/rejected": -4.5589799880981445,
"step": 50
},
{
"epoch": 0.49,
"grad_norm": 17.524621217634877,
"learning_rate": 3.0362127536287636e-07,
"logits/chosen": -4.027331352233887,
"logits/rejected": -4.025083065032959,
"logps/chosen": -1039.8206787109375,
"logps/rejected": -1963.096435546875,
"loss": 0.1549,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.7357276082038879,
"rewards/margins": 4.72170877456665,
"rewards/rejected": -5.457436561584473,
"step": 60
},
{
"epoch": 0.57,
"grad_norm": 14.62238967073387,
"learning_rate": 2.3200186419770823e-07,
"logits/chosen": -3.992643356323242,
"logits/rejected": -3.9795494079589844,
"logps/chosen": -954.447265625,
"logps/rejected": -1917.7783203125,
"loss": 0.153,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.6473932266235352,
"rewards/margins": 4.5014448165893555,
"rewards/rejected": -5.148838043212891,
"step": 70
},
{
"epoch": 0.66,
"grad_norm": 26.239628326203597,
"learning_rate": 1.6186724554503237e-07,
"logits/chosen": -3.955888032913208,
"logits/rejected": -3.937206745147705,
"logps/chosen": -976.7513427734375,
"logps/rejected": -1973.913818359375,
"loss": 0.1099,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.7037805318832397,
"rewards/margins": 5.219768524169922,
"rewards/rejected": -5.923549175262451,
"step": 80
},
{
"epoch": 0.74,
"grad_norm": 11.522776918137943,
"learning_rate": 9.900331622138063e-08,
"logits/chosen": -3.967766523361206,
"logits/rejected": -3.948270797729492,
"logps/chosen": -1030.423583984375,
"logps/rejected": -2060.10205078125,
"loss": 0.1077,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.9134801626205444,
"rewards/margins": 5.445040225982666,
"rewards/rejected": -6.358519554138184,
"step": 90
},
{
"epoch": 0.82,
"grad_norm": 17.331411142935814,
"learning_rate": 4.859616286322094e-08,
"logits/chosen": -3.9533779621124268,
"logits/rejected": -3.9539833068847656,
"logps/chosen": -1035.493896484375,
"logps/rejected": -1998.699951171875,
"loss": 0.1251,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.9830253720283508,
"rewards/margins": 5.410158634185791,
"rewards/rejected": -6.393183708190918,
"step": 100
},
{
"epoch": 0.82,
"eval_logits/chosen": -4.367298126220703,
"eval_logits/rejected": -3.9096977710723877,
"eval_logps/chosen": -250.05014038085938,
"eval_logps/rejected": -632.3324584960938,
"eval_loss": 0.46643248200416565,
"eval_rewards/accuracies": 0.875,
"eval_rewards/chosen": -0.30725225806236267,
"eval_rewards/margins": 0.5298991203308105,
"eval_rewards/rejected": -0.8371513485908508,
"eval_runtime": 3.458,
"eval_samples_per_second": 3.47,
"eval_steps_per_second": 0.289,
"step": 100
},
{
"epoch": 0.9,
"grad_norm": 16.867635583735968,
"learning_rate": 1.4804225250339281e-08,
"logits/chosen": -3.932652235031128,
"logits/rejected": -3.946476459503174,
"logps/chosen": -941.6383056640625,
"logps/rejected": -2050.407470703125,
"loss": 0.1244,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.7304863333702087,
"rewards/margins": 6.1875152587890625,
"rewards/rejected": -6.918001651763916,
"step": 110
},
{
"epoch": 0.98,
"grad_norm": 33.91302977437583,
"learning_rate": 4.152374292708538e-10,
"logits/chosen": -3.9486804008483887,
"logits/rejected": -3.9186534881591797,
"logps/chosen": -952.8955078125,
"logps/rejected": -1983.6126708984375,
"loss": 0.1104,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.8376309275627136,
"rewards/margins": 5.359804153442383,
"rewards/rejected": -6.197434902191162,
"step": 120
},
{
"epoch": 1.0,
"step": 122,
"total_flos": 0.0,
"train_loss": 0.2501322243545876,
"train_runtime": 1891.0061,
"train_samples_per_second": 4.125,
"train_steps_per_second": 0.065
}
],
"logging_steps": 10,
"max_steps": 122,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}