Minbyul's picture
Model save
cde4a6a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 106,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 32.01167376803002,
"learning_rate": 9.09090909090909e-09,
"logits/chosen": -3.1918134689331055,
"logits/rejected": -2.114994764328003,
"logps/chosen": -43.23323059082031,
"logps/rejected": -487.12335205078125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.09,
"grad_norm": 26.482209257499743,
"learning_rate": 9.09090909090909e-08,
"logits/chosen": -2.4537441730499268,
"logits/rejected": -2.188873291015625,
"logps/chosen": -104.89955139160156,
"logps/rejected": -340.5350341796875,
"loss": 0.6929,
"rewards/accuracies": 0.5069444179534912,
"rewards/chosen": -0.00020233324903529137,
"rewards/margins": 0.0006850131321698427,
"rewards/rejected": -0.0008873462211340666,
"step": 10
},
{
"epoch": 0.19,
"grad_norm": 27.340373748893274,
"learning_rate": 9.780178907671787e-08,
"logits/chosen": -2.5941455364227295,
"logits/rejected": -2.2474465370178223,
"logps/chosen": -112.54396057128906,
"logps/rejected": -382.1304626464844,
"loss": 0.6893,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.001845196820795536,
"rewards/margins": 0.008222021162509918,
"rewards/rejected": -0.006376823876053095,
"step": 20
},
{
"epoch": 0.28,
"grad_norm": 29.186381338310213,
"learning_rate": 9.045084971874737e-08,
"logits/chosen": -2.686591625213623,
"logits/rejected": -2.179025173187256,
"logps/chosen": -103.99493408203125,
"logps/rejected": -395.27813720703125,
"loss": 0.6802,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.005808202549815178,
"rewards/margins": 0.02655060589313507,
"rewards/rejected": -0.020742401480674744,
"step": 30
},
{
"epoch": 0.38,
"grad_norm": 25.724411216221245,
"learning_rate": 7.871643313414717e-08,
"logits/chosen": -2.667210578918457,
"logits/rejected": -2.4061439037323,
"logps/chosen": -100.63652038574219,
"logps/rejected": -365.4666748046875,
"loss": 0.6685,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.008249158971011639,
"rewards/margins": 0.0507800467312336,
"rewards/rejected": -0.042530883103609085,
"step": 40
},
{
"epoch": 0.47,
"grad_norm": 27.99539110543597,
"learning_rate": 6.387014543809223e-08,
"logits/chosen": -2.500103235244751,
"logits/rejected": -2.2492752075195312,
"logps/chosen": -121.36189270019531,
"logps/rejected": -427.84686279296875,
"loss": 0.6553,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.013524128124117851,
"rewards/margins": 0.08421863615512848,
"rewards/rejected": -0.07069449126720428,
"step": 50
},
{
"epoch": 0.57,
"grad_norm": 29.277621202155327,
"learning_rate": 4.7520812266338875e-08,
"logits/chosen": -2.5562338829040527,
"logits/rejected": -2.3804821968078613,
"logps/chosen": -104.37593078613281,
"logps/rejected": -411.9266662597656,
"loss": 0.6401,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.014270206913352013,
"rewards/margins": 0.11923271417617798,
"rewards/rejected": -0.10496251285076141,
"step": 60
},
{
"epoch": 0.66,
"grad_norm": 28.168581664040957,
"learning_rate": 3.1440137554088955e-08,
"logits/chosen": -2.517213821411133,
"logits/rejected": -2.3926634788513184,
"logps/chosen": -96.15052795410156,
"logps/rejected": -427.611083984375,
"loss": 0.6226,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.019482703879475594,
"rewards/margins": 0.1615636646747589,
"rewards/rejected": -0.14208097755908966,
"step": 70
},
{
"epoch": 0.75,
"grad_norm": 27.520854136320587,
"learning_rate": 1.7370711923791564e-08,
"logits/chosen": -2.592007875442505,
"logits/rejected": -2.249636173248291,
"logps/chosen": -104.5450668334961,
"logps/rejected": -369.75421142578125,
"loss": 0.6127,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.017579929903149605,
"rewards/margins": 0.14949214458465576,
"rewards/rejected": -0.1319122165441513,
"step": 80
},
{
"epoch": 0.85,
"grad_norm": 27.070318418613407,
"learning_rate": 6.837175952121305e-09,
"logits/chosen": -2.4734714031219482,
"logits/rejected": -2.3791213035583496,
"logps/chosen": -112.87300109863281,
"logps/rejected": -397.2712707519531,
"loss": 0.6142,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.019381331279873848,
"rewards/margins": 0.17430000007152557,
"rewards/rejected": -0.15491867065429688,
"step": 90
},
{
"epoch": 0.94,
"grad_norm": 25.619627780442052,
"learning_rate": 9.81001706259532e-10,
"logits/chosen": -2.4850308895111084,
"logits/rejected": -2.2666268348693848,
"logps/chosen": -106.1055908203125,
"logps/rejected": -518.9412841796875,
"loss": 0.6061,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.015003817155957222,
"rewards/margins": 0.22597499191761017,
"rewards/rejected": -0.2109711617231369,
"step": 100
},
{
"epoch": 1.0,
"step": 106,
"total_flos": 0.0,
"train_loss": 0.6464094665815245,
"train_runtime": 1236.0981,
"train_samples_per_second": 5.464,
"train_steps_per_second": 0.086
}
],
"logging_steps": 10,
"max_steps": 106,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}