|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009615384615384616, |
|
"grad_norm": 28.194065437278258, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -1.609375, |
|
"logits/rejected": -1.6484375, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -127.5, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 26.916436077863562, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.484375, |
|
"logps/chosen": -149.0, |
|
"logps/rejected": -128.0, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.3194444477558136, |
|
"rewards/chosen": -0.0045166015625, |
|
"rewards/margins": 0.01080322265625, |
|
"rewards/rejected": -0.0152587890625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 20.740344035629946, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -160.0, |
|
"logps/rejected": -133.0, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.03662109375, |
|
"rewards/margins": 0.2734375, |
|
"rewards/rejected": -0.2373046875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 6.570831756440352, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.484375, |
|
"logps/chosen": -142.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0072021484375, |
|
"rewards/margins": 1.390625, |
|
"rewards/rejected": -1.3828125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 0.3430324547403658, |
|
"learning_rate": 4.857142857142857e-07, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -169.0, |
|
"logps/rejected": -180.0, |
|
"loss": 0.0296, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.341796875, |
|
"rewards/margins": 4.8125, |
|
"rewards/rejected": -5.15625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 1.0998802903857687, |
|
"learning_rate": 4.6785714285714283e-07, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -200.0, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7265625, |
|
"rewards/margins": 6.625, |
|
"rewards/rejected": -7.34375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 2.398247571338227, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -173.0, |
|
"logps/rejected": -213.0, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9609375, |
|
"rewards/margins": 7.4375, |
|
"rewards/rejected": -8.375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 3.272238721827958, |
|
"learning_rate": 4.3214285714285713e-07, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -169.0, |
|
"logps/rejected": -219.0, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.796875, |
|
"rewards/margins": 8.125, |
|
"rewards/rejected": -8.875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.03234130751760337, |
|
"learning_rate": 4.142857142857143e-07, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.609375, |
|
"logps/chosen": -151.0, |
|
"logps/rejected": -229.0, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.90625, |
|
"rewards/margins": 9.0, |
|
"rewards/rejected": -9.875, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 2.22694231928167, |
|
"learning_rate": 3.9642857142857137e-07, |
|
"logits/chosen": -1.640625, |
|
"logits/rejected": -1.65625, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -229.0, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0390625, |
|
"rewards/margins": 8.75, |
|
"rewards/rejected": -9.8125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 0.04456450837390773, |
|
"learning_rate": 3.785714285714285e-07, |
|
"logits/chosen": -1.671875, |
|
"logits/rejected": -1.6328125, |
|
"logps/chosen": -174.0, |
|
"logps/rejected": -232.0, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.87109375, |
|
"rewards/margins": 9.0625, |
|
"rewards/rejected": -9.9375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.4765625, |
|
"eval_logits/rejected": -1.4140625, |
|
"eval_logps/chosen": -119.0, |
|
"eval_logps/rejected": -235.0, |
|
"eval_loss": 0.02374856546521187, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.83203125, |
|
"eval_rewards/margins": 9.8125, |
|
"eval_rewards/rejected": -10.625, |
|
"eval_runtime": 5.2091, |
|
"eval_samples_per_second": 13.054, |
|
"eval_steps_per_second": 0.576, |
|
"step": 104 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|