|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 200, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 198.7469829190425, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/generated": -2.8518388271331787, |
|
"logits/real": -2.9862747192382812, |
|
"logps/generated": -241.12493896484375, |
|
"logps/real": -248.8436279296875, |
|
"loss": 0.9204, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 191.54263414525988, |
|
"learning_rate": 4.727272727272727e-07, |
|
"logits/generated": -3.05552339553833, |
|
"logits/real": -3.0378544330596924, |
|
"logps/generated": -243.76849365234375, |
|
"logps/real": -230.84719848632812, |
|
"loss": 0.8961, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/generated": 0.18684834241867065, |
|
"rewards/margins": 0.20998667180538177, |
|
"rewards/real": 0.396835058927536, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 139.26955693169643, |
|
"learning_rate": 3.818181818181818e-07, |
|
"logits/generated": -2.9985575675964355, |
|
"logits/real": -2.984205484390259, |
|
"logps/generated": -250.49853515625, |
|
"logps/real": -217.1739044189453, |
|
"loss": 0.8668, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/generated": -1.4985097646713257, |
|
"rewards/margins": 0.8482303619384766, |
|
"rewards/real": -0.6502794623374939, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 117.51802509366077, |
|
"learning_rate": 2.909090909090909e-07, |
|
"logits/generated": -2.9175362586975098, |
|
"logits/real": -2.964449644088745, |
|
"logps/generated": -262.0832824707031, |
|
"logps/real": -228.2645721435547, |
|
"loss": 0.7592, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -1.5061546564102173, |
|
"rewards/margins": 1.8053114414215088, |
|
"rewards/real": 0.29915687441825867, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 176.64698900823225, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -2.9176225662231445, |
|
"logits/real": -2.9303154945373535, |
|
"logps/generated": -260.50018310546875, |
|
"logps/real": -230.49288940429688, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -2.2268033027648926, |
|
"rewards/margins": 2.359907627105713, |
|
"rewards/real": 0.13310480117797852, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 183.16237322543932, |
|
"learning_rate": 1.0909090909090908e-07, |
|
"logits/generated": -2.88456654548645, |
|
"logits/real": -2.944023609161377, |
|
"logps/generated": -260.68035888671875, |
|
"logps/real": -228.1358184814453, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -1.7078344821929932, |
|
"rewards/margins": 1.941307783126831, |
|
"rewards/real": 0.23347334563732147, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 95.0210404383474, |
|
"learning_rate": 1.818181818181818e-08, |
|
"logits/generated": -2.917457342147827, |
|
"logits/real": -2.9479122161865234, |
|
"logps/generated": -249.3968048095703, |
|
"logps/real": -225.28262329101562, |
|
"loss": 0.6543, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -1.3457591533660889, |
|
"rewards/margins": 1.6494863033294678, |
|
"rewards/real": 0.30372729897499084, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"step": 62, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7342928696063257, |
|
"train_runtime": 746.6525, |
|
"train_samples_per_second": 2.677, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 62, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|