|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 200, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 346.3062021602437, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/generated": -3.196486711502075, |
|
"logits/real": -2.3527207374572754, |
|
"logps/generated": -231.1735382080078, |
|
"logps/real": -99.6369400024414, |
|
"loss": 0.7803, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.4299127951132924, |
|
"learning_rate": 4.727272727272727e-07, |
|
"logits/generated": -3.081599235534668, |
|
"logits/real": -2.270655870437622, |
|
"logps/generated": -280.790771484375, |
|
"logps/real": -82.0130844116211, |
|
"loss": 0.2552, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/generated": -4.4418439865112305, |
|
"rewards/margins": 5.652818202972412, |
|
"rewards/real": 1.2109735012054443, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.9060775786875579, |
|
"learning_rate": 3.818181818181818e-07, |
|
"logits/generated": -3.208840847015381, |
|
"logits/real": -2.1548514366149902, |
|
"logps/generated": -326.5088195800781, |
|
"logps/real": -72.36115264892578, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.6439790725708, |
|
"rewards/margins": 12.68847370147705, |
|
"rewards/real": 3.044494867324829, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.9746129145559742, |
|
"learning_rate": 2.909090909090909e-07, |
|
"logits/generated": -3.225053071975708, |
|
"logits/real": -2.0585813522338867, |
|
"logps/generated": -353.51434326171875, |
|
"logps/real": -57.8585090637207, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.199175834655762, |
|
"rewards/margins": 14.336624145507812, |
|
"rewards/real": 3.137446165084839, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.114683085793999, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -3.1570346355438232, |
|
"logits/real": -2.1495680809020996, |
|
"logps/generated": -325.97515869140625, |
|
"logps/real": -78.28927612304688, |
|
"loss": 0.075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.375910758972168, |
|
"rewards/margins": 13.806567192077637, |
|
"rewards/real": 3.4306564331054688, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.7978164295236632, |
|
"learning_rate": 1.0909090909090908e-07, |
|
"logits/generated": -3.2370517253875732, |
|
"logits/real": -2.0745859146118164, |
|
"logps/generated": -342.4796447753906, |
|
"logps/real": -65.09725189208984, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.8725004196167, |
|
"rewards/margins": 14.377031326293945, |
|
"rewards/real": 3.5045323371887207, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.7374328187221965, |
|
"learning_rate": 1.818181818181818e-08, |
|
"logits/generated": -3.1993775367736816, |
|
"logits/real": -2.025631904602051, |
|
"logps/generated": -341.24053955078125, |
|
"logps/real": -61.965667724609375, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.041067123413086, |
|
"rewards/margins": 14.699417114257812, |
|
"rewards/real": 3.6583499908447266, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"step": 62, |
|
"total_flos": 0.0, |
|
"train_loss": 0.10689828472752724, |
|
"train_runtime": 796.2858, |
|
"train_samples_per_second": 2.51, |
|
"train_steps_per_second": 0.078 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 62, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|