|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 100, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.120798164408008, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/chosen": -1.457259178161621, |
|
"logits/rejected": -1.0621511936187744, |
|
"logps/chosen": -272.0050354003906, |
|
"logps/rejected": -816.85107421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.902950407858967, |
|
"learning_rate": 4.963384589619232e-07, |
|
"logits/chosen": -1.310307502746582, |
|
"logits/rejected": -1.0846761465072632, |
|
"logps/chosen": -535.0698852539062, |
|
"logps/rejected": -898.6776733398438, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.001652209903113544, |
|
"rewards/margins": 0.001886376878246665, |
|
"rewards/rejected": -0.003538586664944887, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.3100220055883565, |
|
"learning_rate": 4.341852844691012e-07, |
|
"logits/chosen": -1.3299431800842285, |
|
"logits/rejected": -1.0885355472564697, |
|
"logps/chosen": -514.69140625, |
|
"logps/rejected": -881.6802978515625, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.011395025067031384, |
|
"rewards/margins": 0.07305588573217392, |
|
"rewards/rejected": -0.08445090055465698, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 7.330007892766956, |
|
"learning_rate": 3.135545835483718e-07, |
|
"logits/chosen": -1.2920414209365845, |
|
"logits/rejected": -1.104931116104126, |
|
"logps/chosen": -543.4114990234375, |
|
"logps/rejected": -906.7586059570312, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.043302834033966064, |
|
"rewards/margins": 0.253420889377594, |
|
"rewards/rejected": -0.29672369360923767, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.8988027878682825, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -1.2756078243255615, |
|
"logits/rejected": -1.09669029712677, |
|
"logps/chosen": -593.3638305664062, |
|
"logps/rejected": -903.4348754882812, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.08866459131240845, |
|
"rewards/margins": 0.4585743844509125, |
|
"rewards/rejected": -0.5472390651702881, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.092952620264841, |
|
"learning_rate": 5.6464597340229375e-08, |
|
"logits/chosen": -1.1793540716171265, |
|
"logits/rejected": -1.0933005809783936, |
|
"logps/chosen": -541.88671875, |
|
"logps/rejected": -1034.5299072265625, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.13559284806251526, |
|
"rewards/margins": 0.9057224988937378, |
|
"rewards/rejected": -1.0413153171539307, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.683839890605678, |
|
"learning_rate": 1.6295661628624447e-09, |
|
"logits/chosen": -1.1111315488815308, |
|
"logits/rejected": -1.098163366317749, |
|
"logps/chosen": -511.08526611328125, |
|
"logps/rejected": -985.9429931640625, |
|
"loss": 0.4288, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20404699444770813, |
|
"rewards/margins": 0.9808656573295593, |
|
"rewards/rejected": -1.1849125623703003, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 62, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5556549103029312, |
|
"train_runtime": 1091.242, |
|
"train_samples_per_second": 3.642, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 62, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|