Mistral-7B-v0.3-spin-2k-hhrlhf / trainer_state.json
AmberYifan's picture
Model save
dce475a verified
raw
history blame
4.39 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.992,
"eval_steps": 200,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 346.3062021602437,
"learning_rate": 7.142857142857142e-08,
"logits/generated": -3.196486711502075,
"logits/real": -2.3527207374572754,
"logps/generated": -231.1735382080078,
"logps/real": -99.6369400024414,
"loss": 0.7803,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.16,
"grad_norm": 2.4299127951132924,
"learning_rate": 4.727272727272727e-07,
"logits/generated": -3.081599235534668,
"logits/real": -2.270655870437622,
"logps/generated": -280.790771484375,
"logps/real": -82.0130844116211,
"loss": 0.2552,
"rewards/accuracies": 0.8888888955116272,
"rewards/generated": -4.4418439865112305,
"rewards/margins": 5.652818202972412,
"rewards/real": 1.2109735012054443,
"step": 10
},
{
"epoch": 0.32,
"grad_norm": 0.9060775786875579,
"learning_rate": 3.818181818181818e-07,
"logits/generated": -3.208840847015381,
"logits/real": -2.1548514366149902,
"logps/generated": -326.5088195800781,
"logps/real": -72.36115264892578,
"loss": 0.0712,
"rewards/accuracies": 1.0,
"rewards/generated": -9.6439790725708,
"rewards/margins": 12.68847370147705,
"rewards/real": 3.044494867324829,
"step": 20
},
{
"epoch": 0.48,
"grad_norm": 0.9746129145559742,
"learning_rate": 2.909090909090909e-07,
"logits/generated": -3.225053071975708,
"logits/real": -2.0585813522338867,
"logps/generated": -353.51434326171875,
"logps/real": -57.8585090637207,
"loss": 0.0641,
"rewards/accuracies": 1.0,
"rewards/generated": -11.199175834655762,
"rewards/margins": 14.336624145507812,
"rewards/real": 3.137446165084839,
"step": 30
},
{
"epoch": 0.64,
"grad_norm": 1.114683085793999,
"learning_rate": 2e-07,
"logits/generated": -3.1570346355438232,
"logits/real": -2.1495680809020996,
"logps/generated": -325.97515869140625,
"logps/real": -78.28927612304688,
"loss": 0.075,
"rewards/accuracies": 1.0,
"rewards/generated": -10.375910758972168,
"rewards/margins": 13.806567192077637,
"rewards/real": 3.4306564331054688,
"step": 40
},
{
"epoch": 0.8,
"grad_norm": 0.7978164295236632,
"learning_rate": 1.0909090909090908e-07,
"logits/generated": -3.2370517253875732,
"logits/real": -2.0745859146118164,
"logps/generated": -342.4796447753906,
"logps/real": -65.09725189208984,
"loss": 0.0641,
"rewards/accuracies": 1.0,
"rewards/generated": -10.8725004196167,
"rewards/margins": 14.377031326293945,
"rewards/real": 3.5045323371887207,
"step": 50
},
{
"epoch": 0.96,
"grad_norm": 0.7374328187221965,
"learning_rate": 1.818181818181818e-08,
"logits/generated": -3.1993775367736816,
"logits/real": -2.025631904602051,
"logps/generated": -341.24053955078125,
"logps/real": -61.965667724609375,
"loss": 0.0622,
"rewards/accuracies": 1.0,
"rewards/generated": -11.041067123413086,
"rewards/margins": 14.699417114257812,
"rewards/real": 3.6583499908447266,
"step": 60
},
{
"epoch": 0.992,
"step": 62,
"total_flos": 0.0,
"train_loss": 0.10689828472752724,
"train_runtime": 796.2858,
"train_samples_per_second": 2.51,
"train_steps_per_second": 0.078
}
],
"logging_steps": 10,
"max_steps": 62,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}