|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 122, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 36.349710133839, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -3.5315005779266357, |
|
"logits/rejected": -3.440955638885498, |
|
"logps/chosen": -912.1570434570312, |
|
"logps/rejected": -1378.036376953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 41.12013014177843, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -3.55020809173584, |
|
"logits/rejected": -3.4724764823913574, |
|
"logps/chosen": -894.586181640625, |
|
"logps/rejected": -1449.21484375, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.0002599477011244744, |
|
"rewards/margins": 0.008236742578446865, |
|
"rewards/rejected": -0.007976794615387917, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 29.704748896674143, |
|
"learning_rate": 4.949291683053768e-07, |
|
"logits/chosen": -3.5908989906311035, |
|
"logits/rejected": -3.5577595233917236, |
|
"logps/chosen": -912.923828125, |
|
"logps/rejected": -1338.394775390625, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.04868435114622116, |
|
"rewards/margins": 0.28125035762786865, |
|
"rewards/rejected": -0.2325659692287445, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 26.273281316637295, |
|
"learning_rate": 4.70586371748506e-07, |
|
"logits/chosen": -3.810521364212036, |
|
"logits/rejected": -3.7334792613983154, |
|
"logps/chosen": -955.4530029296875, |
|
"logps/rejected": -1488.5167236328125, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.09059515595436096, |
|
"rewards/margins": 1.322347640991211, |
|
"rewards/rejected": -1.412942886352539, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 43.496160834466956, |
|
"learning_rate": 4.280458575653296e-07, |
|
"logits/chosen": -3.996204376220703, |
|
"logits/rejected": -3.956129789352417, |
|
"logps/chosen": -989.1363525390625, |
|
"logps/rejected": -1658.587158203125, |
|
"loss": 0.3256, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.5707166194915771, |
|
"rewards/margins": 2.6343164443969727, |
|
"rewards/rejected": -3.20503306388855, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 44.2304132089698, |
|
"learning_rate": 3.7081709127108767e-07, |
|
"logits/chosen": -4.008645057678223, |
|
"logits/rejected": -3.9912617206573486, |
|
"logps/chosen": -1022.1027221679688, |
|
"logps/rejected": -1825.446533203125, |
|
"loss": 0.1878, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7217316031455994, |
|
"rewards/margins": 3.8372483253479004, |
|
"rewards/rejected": -4.5589799880981445, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 17.524621217634877, |
|
"learning_rate": 3.0362127536287636e-07, |
|
"logits/chosen": -4.027331352233887, |
|
"logits/rejected": -4.025083065032959, |
|
"logps/chosen": -1039.8206787109375, |
|
"logps/rejected": -1963.096435546875, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.7357276082038879, |
|
"rewards/margins": 4.72170877456665, |
|
"rewards/rejected": -5.457436561584473, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 14.62238967073387, |
|
"learning_rate": 2.3200186419770823e-07, |
|
"logits/chosen": -3.992643356323242, |
|
"logits/rejected": -3.9795494079589844, |
|
"logps/chosen": -954.447265625, |
|
"logps/rejected": -1917.7783203125, |
|
"loss": 0.153, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6473932266235352, |
|
"rewards/margins": 4.5014448165893555, |
|
"rewards/rejected": -5.148838043212891, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 26.239628326203597, |
|
"learning_rate": 1.6186724554503237e-07, |
|
"logits/chosen": -3.955888032913208, |
|
"logits/rejected": -3.937206745147705, |
|
"logps/chosen": -976.7513427734375, |
|
"logps/rejected": -1973.913818359375, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7037805318832397, |
|
"rewards/margins": 5.219768524169922, |
|
"rewards/rejected": -5.923549175262451, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 11.522776918137943, |
|
"learning_rate": 9.900331622138063e-08, |
|
"logits/chosen": -3.967766523361206, |
|
"logits/rejected": -3.948270797729492, |
|
"logps/chosen": -1030.423583984375, |
|
"logps/rejected": -2060.10205078125, |
|
"loss": 0.1077, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.9134801626205444, |
|
"rewards/margins": 5.445040225982666, |
|
"rewards/rejected": -6.358519554138184, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 17.331411142935814, |
|
"learning_rate": 4.859616286322094e-08, |
|
"logits/chosen": -3.9533779621124268, |
|
"logits/rejected": -3.9539833068847656, |
|
"logps/chosen": -1035.493896484375, |
|
"logps/rejected": -1998.699951171875, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.9830253720283508, |
|
"rewards/margins": 5.410158634185791, |
|
"rewards/rejected": -6.393183708190918, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/chosen": -4.367298126220703, |
|
"eval_logits/rejected": -3.9096977710723877, |
|
"eval_logps/chosen": -250.05014038085938, |
|
"eval_logps/rejected": -632.3324584960938, |
|
"eval_loss": 0.46643248200416565, |
|
"eval_rewards/accuracies": 0.875, |
|
"eval_rewards/chosen": -0.30725225806236267, |
|
"eval_rewards/margins": 0.5298991203308105, |
|
"eval_rewards/rejected": -0.8371513485908508, |
|
"eval_runtime": 3.458, |
|
"eval_samples_per_second": 3.47, |
|
"eval_steps_per_second": 0.289, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 16.867635583735968, |
|
"learning_rate": 1.4804225250339281e-08, |
|
"logits/chosen": -3.932652235031128, |
|
"logits/rejected": -3.946476459503174, |
|
"logps/chosen": -941.6383056640625, |
|
"logps/rejected": -2050.407470703125, |
|
"loss": 0.1244, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7304863333702087, |
|
"rewards/margins": 6.1875152587890625, |
|
"rewards/rejected": -6.918001651763916, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 33.91302977437583, |
|
"learning_rate": 4.152374292708538e-10, |
|
"logits/chosen": -3.9486804008483887, |
|
"logits/rejected": -3.9186534881591797, |
|
"logps/chosen": -952.8955078125, |
|
"logps/rejected": -1983.6126708984375, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8376309275627136, |
|
"rewards/margins": 5.359804153442383, |
|
"rewards/rejected": -6.197434902191162, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 122, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2501322243545876, |
|
"train_runtime": 1891.0061, |
|
"train_samples_per_second": 4.125, |
|
"train_steps_per_second": 0.065 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 122, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|