|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 500, |
|
"global_step": 31, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 8.94253801855711, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -2.838801383972168, |
|
"logits/rejected": -2.8086488246917725, |
|
"logps/chosen": -257.9438171386719, |
|
"logps/pi_response": -93.52229309082031, |
|
"logps/ref_response": -93.52229309082031, |
|
"logps/rejected": -216.83636474609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.275195524114833, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": -2.8140869140625, |
|
"logits/rejected": -2.8060314655303955, |
|
"logps/chosen": -246.06365966796875, |
|
"logps/pi_response": -74.96839141845703, |
|
"logps/ref_response": -74.20439147949219, |
|
"logps/rejected": -178.8218536376953, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.016777267679572105, |
|
"rewards/margins": 0.01401255652308464, |
|
"rewards/rejected": 0.0027647078968584538, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.738990682860812, |
|
"learning_rate": 1.782991918222275e-07, |
|
"logits/chosen": -2.693695545196533, |
|
"logits/rejected": -2.6687941551208496, |
|
"logps/chosen": -225.8972625732422, |
|
"logps/pi_response": -78.36871337890625, |
|
"logps/ref_response": -68.63687896728516, |
|
"logps/rejected": -179.7605743408203, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.040796808898448944, |
|
"rewards/margins": 0.08692635595798492, |
|
"rewards/rejected": -0.04612954705953598, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 6.623989399704757, |
|
"learning_rate": 1.690410564514244e-09, |
|
"logits/chosen": -2.707422971725464, |
|
"logits/rejected": -2.6639552116394043, |
|
"logps/chosen": -243.7229461669922, |
|
"logps/pi_response": -95.37244415283203, |
|
"logps/ref_response": -70.27430725097656, |
|
"logps/rejected": -177.5935821533203, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.027013743296265602, |
|
"rewards/margins": 0.13096943497657776, |
|
"rewards/rejected": -0.15798316895961761, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"step": 31, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6603157808703761, |
|
"train_runtime": 1409.3993, |
|
"train_samples_per_second": 5.676, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 31, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|