|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 106, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 32.01167376803002, |
|
"learning_rate": 9.09090909090909e-09, |
|
"logits/chosen": -3.1918134689331055, |
|
"logits/rejected": -2.114994764328003, |
|
"logps/chosen": -43.23323059082031, |
|
"logps/rejected": -487.12335205078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 26.482209257499743, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -2.4537441730499268, |
|
"logits/rejected": -2.188873291015625, |
|
"logps/chosen": -104.89955139160156, |
|
"logps/rejected": -340.5350341796875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": -0.00020233324903529137, |
|
"rewards/margins": 0.0006850131321698427, |
|
"rewards/rejected": -0.0008873462211340666, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 27.340373748893274, |
|
"learning_rate": 9.780178907671787e-08, |
|
"logits/chosen": -2.5941455364227295, |
|
"logits/rejected": -2.2474465370178223, |
|
"logps/chosen": -112.54396057128906, |
|
"logps/rejected": -382.1304626464844, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.001845196820795536, |
|
"rewards/margins": 0.008222021162509918, |
|
"rewards/rejected": -0.006376823876053095, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 29.186381338310213, |
|
"learning_rate": 9.045084971874737e-08, |
|
"logits/chosen": -2.686591625213623, |
|
"logits/rejected": -2.179025173187256, |
|
"logps/chosen": -103.99493408203125, |
|
"logps/rejected": -395.27813720703125, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.005808202549815178, |
|
"rewards/margins": 0.02655060589313507, |
|
"rewards/rejected": -0.020742401480674744, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 25.724411216221245, |
|
"learning_rate": 7.871643313414717e-08, |
|
"logits/chosen": -2.667210578918457, |
|
"logits/rejected": -2.4061439037323, |
|
"logps/chosen": -100.63652038574219, |
|
"logps/rejected": -365.4666748046875, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.008249158971011639, |
|
"rewards/margins": 0.0507800467312336, |
|
"rewards/rejected": -0.042530883103609085, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 27.99539110543597, |
|
"learning_rate": 6.387014543809223e-08, |
|
"logits/chosen": -2.500103235244751, |
|
"logits/rejected": -2.2492752075195312, |
|
"logps/chosen": -121.36189270019531, |
|
"logps/rejected": -427.84686279296875, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.013524128124117851, |
|
"rewards/margins": 0.08421863615512848, |
|
"rewards/rejected": -0.07069449126720428, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 29.277621202155327, |
|
"learning_rate": 4.7520812266338875e-08, |
|
"logits/chosen": -2.5562338829040527, |
|
"logits/rejected": -2.3804821968078613, |
|
"logps/chosen": -104.37593078613281, |
|
"logps/rejected": -411.9266662597656, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.014270206913352013, |
|
"rewards/margins": 0.11923271417617798, |
|
"rewards/rejected": -0.10496251285076141, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 28.168581664040957, |
|
"learning_rate": 3.1440137554088955e-08, |
|
"logits/chosen": -2.517213821411133, |
|
"logits/rejected": -2.3926634788513184, |
|
"logps/chosen": -96.15052795410156, |
|
"logps/rejected": -427.611083984375, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.019482703879475594, |
|
"rewards/margins": 0.1615636646747589, |
|
"rewards/rejected": -0.14208097755908966, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 27.520854136320587, |
|
"learning_rate": 1.7370711923791564e-08, |
|
"logits/chosen": -2.592007875442505, |
|
"logits/rejected": -2.249636173248291, |
|
"logps/chosen": -104.5450668334961, |
|
"logps/rejected": -369.75421142578125, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.017579929903149605, |
|
"rewards/margins": 0.14949214458465576, |
|
"rewards/rejected": -0.1319122165441513, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 27.070318418613407, |
|
"learning_rate": 6.837175952121305e-09, |
|
"logits/chosen": -2.4734714031219482, |
|
"logits/rejected": -2.3791213035583496, |
|
"logps/chosen": -112.87300109863281, |
|
"logps/rejected": -397.2712707519531, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.019381331279873848, |
|
"rewards/margins": 0.17430000007152557, |
|
"rewards/rejected": -0.15491867065429688, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 25.619627780442052, |
|
"learning_rate": 9.81001706259532e-10, |
|
"logits/chosen": -2.4850308895111084, |
|
"logits/rejected": -2.2666268348693848, |
|
"logps/chosen": -106.1055908203125, |
|
"logps/rejected": -518.9412841796875, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.015003817155957222, |
|
"rewards/margins": 0.22597499191761017, |
|
"rewards/rejected": -0.2109711617231369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 106, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6464094665815245, |
|
"train_runtime": 1236.0981, |
|
"train_samples_per_second": 5.464, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 106, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|