|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 200, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 228.67711231286552, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/generated": -3.182648181915283, |
|
"logits/real": -3.236078977584839, |
|
"logps/generated": -231.1735382080078, |
|
"logps/real": -216.56446838378906, |
|
"loss": 0.914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 104.82372723323877, |
|
"learning_rate": 4.727272727272727e-07, |
|
"logits/generated": -3.0453360080718994, |
|
"logits/real": -3.0438425540924072, |
|
"logps/generated": -241.46511840820312, |
|
"logps/real": -229.99560546875, |
|
"loss": 0.842, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/generated": -0.5092797875404358, |
|
"rewards/margins": 0.3939414620399475, |
|
"rewards/real": -0.11533831059932709, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 160.6471422793089, |
|
"learning_rate": 3.818181818181818e-07, |
|
"logits/generated": -3.0548062324523926, |
|
"logits/real": -2.970714569091797, |
|
"logps/generated": -253.4170379638672, |
|
"logps/real": -231.45254516601562, |
|
"loss": 0.7195, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -2.3348288536071777, |
|
"rewards/margins": 1.3843599557876587, |
|
"rewards/real": -0.9504690170288086, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 146.55739161612283, |
|
"learning_rate": 2.909090909090909e-07, |
|
"logits/generated": -3.0757832527160645, |
|
"logits/real": -3.142075300216675, |
|
"logps/generated": -258.6484680175781, |
|
"logps/real": -223.58065795898438, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -1.7125927209854126, |
|
"rewards/margins": 1.9861854314804077, |
|
"rewards/real": 0.2735927999019623, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 102.71447615342244, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -2.938872814178467, |
|
"logits/real": -2.982527017593384, |
|
"logps/generated": -247.6828155517578, |
|
"logps/real": -228.6986541748047, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -2.5453975200653076, |
|
"rewards/margins": 1.9871200323104858, |
|
"rewards/real": -0.558277428150177, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 128.82542081084267, |
|
"learning_rate": 1.0909090909090908e-07, |
|
"logits/generated": -2.9596309661865234, |
|
"logits/real": -3.0371811389923096, |
|
"logps/generated": -258.96600341796875, |
|
"logps/real": -224.0094757080078, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -2.521024465560913, |
|
"rewards/margins": 2.7490367889404297, |
|
"rewards/real": 0.22801223397254944, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 140.9790674306219, |
|
"learning_rate": 1.818181818181818e-08, |
|
"logits/generated": -2.8980393409729004, |
|
"logits/real": -3.0217809677124023, |
|
"logps/generated": -256.95941162109375, |
|
"logps/real": -225.6009063720703, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/generated": -2.6129565238952637, |
|
"rewards/margins": 2.7330925464630127, |
|
"rewards/real": 0.12013578414916992, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"step": 62, |
|
"total_flos": 0.0, |
|
"train_loss": 0.648834865900778, |
|
"train_runtime": 793.7409, |
|
"train_samples_per_second": 2.518, |
|
"train_steps_per_second": 0.078 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 62, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|