{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 200, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 228.67711231286552, "learning_rate": 7.142857142857142e-08, "logits/generated": -3.182648181915283, "logits/real": -3.236078977584839, "logps/generated": -231.1735382080078, "logps/real": -216.56446838378906, "loss": 0.914, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.16, "grad_norm": 104.82372723323877, "learning_rate": 4.727272727272727e-07, "logits/generated": -3.0453360080718994, "logits/real": -3.0438425540924072, "logps/generated": -241.46511840820312, "logps/real": -229.99560546875, "loss": 0.842, "rewards/accuracies": 0.5694444179534912, "rewards/generated": -0.5092797875404358, "rewards/margins": 0.3939414620399475, "rewards/real": -0.11533831059932709, "step": 10 }, { "epoch": 0.32, "grad_norm": 160.6471422793089, "learning_rate": 3.818181818181818e-07, "logits/generated": -3.0548062324523926, "logits/real": -2.970714569091797, "logps/generated": -253.4170379638672, "logps/real": -231.45254516601562, "loss": 0.7195, "rewards/accuracies": 0.800000011920929, "rewards/generated": -2.3348288536071777, "rewards/margins": 1.3843599557876587, "rewards/real": -0.9504690170288086, "step": 20 }, { "epoch": 0.48, "grad_norm": 146.55739161612283, "learning_rate": 2.909090909090909e-07, "logits/generated": -3.0757832527160645, "logits/real": -3.142075300216675, "logps/generated": -258.6484680175781, "logps/real": -223.58065795898438, "loss": 0.5971, "rewards/accuracies": 0.8500000238418579, "rewards/generated": -1.7125927209854126, "rewards/margins": 1.9861854314804077, "rewards/real": 0.2735927999019623, "step": 30 }, { "epoch": 0.64, "grad_norm": 102.71447615342244, "learning_rate": 2e-07, "logits/generated": -2.938872814178467, "logits/real": -2.982527017593384, "logps/generated": -247.6828155517578, "logps/real": -228.6986541748047, "loss": 0.5605, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -2.5453975200653076, "rewards/margins": 1.9871200323104858, "rewards/real": -0.558277428150177, "step": 40 }, { "epoch": 0.8, "grad_norm": 128.82542081084267, "learning_rate": 1.0909090909090908e-07, "logits/generated": -2.9596309661865234, "logits/real": -3.0371811389923096, "logps/generated": -258.96600341796875, "logps/real": -224.0094757080078, "loss": 0.6154, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -2.521024465560913, "rewards/margins": 2.7490367889404297, "rewards/real": 0.22801223397254944, "step": 50 }, { "epoch": 0.96, "grad_norm": 140.9790674306219, "learning_rate": 1.818181818181818e-08, "logits/generated": -2.8980393409729004, "logits/real": -3.0217809677124023, "logps/generated": -256.95941162109375, "logps/real": -225.6009063720703, "loss": 0.491, "rewards/accuracies": 0.7875000238418579, "rewards/generated": -2.6129565238952637, "rewards/margins": 2.7330925464630127, "rewards/real": 0.12013578414916992, "step": 60 }, { "epoch": 0.992, "step": 62, "total_flos": 0.0, "train_loss": 0.648834865900778, "train_runtime": 793.7409, "train_samples_per_second": 2.518, "train_steps_per_second": 0.078 } ], "logging_steps": 10, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }