|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 477.2496337890625, |
|
"learning_rate": 7.692307692307694e-07, |
|
"logits/chosen": 2.15625, |
|
"logits/rejected": 1.4375, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -131.0, |
|
"loss": 2.7656, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.534887996967882e-05, |
|
"learning_rate": 7.692307692307694e-06, |
|
"logits/chosen": 1.8359375, |
|
"logits/rejected": 1.1328125, |
|
"logps/chosen": -125.0, |
|
"logps/rejected": -187.0, |
|
"loss": 0.9163, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 1.46875, |
|
"rewards/margins": 8.3125, |
|
"rewards/rejected": -6.84375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.47921126473161e-12, |
|
"learning_rate": 9.903926402016153e-06, |
|
"logits/chosen": 0.3828125, |
|
"logits/rejected": 0.357421875, |
|
"logps/chosen": -160.0, |
|
"logps/rejected": -540.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.921875, |
|
"rewards/margins": 39.25, |
|
"rewards/rejected": -42.25, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.994430991895002e-15, |
|
"learning_rate": 9.442228179894362e-06, |
|
"logits/chosen": -0.060791015625, |
|
"logits/rejected": -0.28515625, |
|
"logps/chosen": -264.0, |
|
"logps/rejected": -728.0, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.9375, |
|
"rewards/margins": 47.25, |
|
"rewards/rejected": -61.25, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.643975006320093e-14, |
|
"learning_rate": 8.633301610170136e-06, |
|
"logits/chosen": 1.0703125, |
|
"logits/rejected": 0.59375, |
|
"logps/chosen": -164.0, |
|
"logps/rejected": -740.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.609375, |
|
"rewards/margins": 59.25, |
|
"rewards/rejected": -62.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.0163520159099946e-11, |
|
"learning_rate": 7.540376726232648e-06, |
|
"logits/chosen": 1.234375, |
|
"logits/rejected": 0.6640625, |
|
"logps/chosen": -130.0, |
|
"logps/rejected": -636.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.015625, |
|
"rewards/margins": 52.0, |
|
"rewards/rejected": -53.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.786671276360566e-15, |
|
"learning_rate": 6.248882390836135e-06, |
|
"logits/chosen": 1.359375, |
|
"logits/rejected": 0.78515625, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -660.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.71875, |
|
"rewards/margins": 52.5, |
|
"rewards/rejected": -54.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9845672956549099e-13, |
|
"learning_rate": 4.859768718620656e-06, |
|
"logits/chosen": 1.359375, |
|
"logits/rejected": 0.83984375, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -640.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.58203125, |
|
"rewards/margins": 52.0, |
|
"rewards/rejected": -52.5, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.1451468355701492e-15, |
|
"learning_rate": 3.4816162744519266e-06, |
|
"logits/chosen": 1.3125, |
|
"logits/rejected": 0.73828125, |
|
"logps/chosen": -149.0, |
|
"logps/rejected": -688.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.97265625, |
|
"rewards/margins": 55.75, |
|
"rewards/rejected": -56.75, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.8802392639811344e-13, |
|
"learning_rate": 2.2221488349019903e-06, |
|
"logits/chosen": 1.3359375, |
|
"logits/rejected": 0.79296875, |
|
"logps/chosen": -144.0, |
|
"logps/rejected": -680.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.796875, |
|
"rewards/margins": 54.5, |
|
"rewards/rejected": -56.25, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.8757966674675117e-07, |
|
"learning_rate": 1.1798131208919628e-06, |
|
"logits/chosen": 1.359375, |
|
"logits/rejected": 0.76171875, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -632.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4453125, |
|
"rewards/margins": 49.75, |
|
"rewards/rejected": -51.25, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.703149137021777e-14, |
|
"learning_rate": 4.3608367469340553e-07, |
|
"logits/chosen": 1.34375, |
|
"logits/rejected": 0.7734375, |
|
"logps/chosen": -153.0, |
|
"logps/rejected": -628.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.078125, |
|
"rewards/margins": 50.0, |
|
"rewards/rejected": -52.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.835807033238558e-16, |
|
"learning_rate": 4.909437331777178e-08, |
|
"logits/chosen": 1.3515625, |
|
"logits/rejected": 0.7734375, |
|
"logps/chosen": -144.0, |
|
"logps/rejected": -676.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.203125, |
|
"rewards/margins": 54.0, |
|
"rewards/rejected": -55.25, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 125, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08870122274137147, |
|
"train_runtime": 1811.5821, |
|
"train_samples_per_second": 1.104, |
|
"train_steps_per_second": 0.069 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|