{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08206811653672548, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008206811653672548, "grad_norm": 0.06318386644124985, "learning_rate": 4.999451708687114e-06, "logits/chosen": -2.1367907524108887, "logits/rejected": -2.4948182106018066, "logps/chosen": -0.291498601436615, "logps/rejected": -0.3196522295475006, "loss": 7.5728, "rewards/accuracies": 0.5, "rewards/chosen": -0.4372479021549225, "rewards/margins": 0.04223042353987694, "rewards/rejected": -0.47947829961776733, "step": 10 }, { "epoch": 0.016413623307345096, "grad_norm": 0.07310314476490021, "learning_rate": 4.997807075247147e-06, "logits/chosen": -2.1456007957458496, "logits/rejected": -2.4455342292785645, "logps/chosen": -0.26213544607162476, "logps/rejected": -0.32332050800323486, "loss": 7.5298, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3932031989097595, "rewards/margins": 0.09177760779857635, "rewards/rejected": -0.4849807620048523, "step": 20 }, { "epoch": 0.024620434961017644, "grad_norm": 0.05936102196574211, "learning_rate": 4.9950668210706795e-06, "logits/chosen": -2.0765950679779053, "logits/rejected": -2.485799789428711, "logps/chosen": -0.26631081104278564, "logps/rejected": -0.32647624611854553, "loss": 7.5208, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.39946624636650085, "rewards/margins": 0.09024813771247864, "rewards/rejected": -0.4897143840789795, "step": 30 }, { "epoch": 0.03282724661469019, "grad_norm": 0.08499134331941605, "learning_rate": 4.9912321481237616e-06, "logits/chosen": -2.0753884315490723, "logits/rejected": -2.441580295562744, "logps/chosen": -0.2749950885772705, "logps/rejected": -0.30180150270462036, "loss": 7.4229, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.41249266266822815, "rewards/margins": 0.04020959883928299, "rewards/rejected": -0.45270222425460815, "step": 40 }, { "epoch": 0.04103405826836274, "grad_norm": 0.07681389898061752, "learning_rate": 4.986304738420684e-06, "logits/chosen": -2.145660877227783, "logits/rejected": -2.465946912765503, "logps/chosen": -0.24909739196300507, "logps/rejected": -0.2796121835708618, "loss": 7.4811, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.373646080493927, "rewards/margins": 0.045772187411785126, "rewards/rejected": -0.41941824555397034, "step": 50 }, { "epoch": 0.04103405826836274, "eval_logits/chosen": -2.012000799179077, "eval_logits/rejected": -2.5381252765655518, "eval_logps/chosen": -0.24157460033893585, "eval_logps/rejected": -0.2957758605480194, "eval_loss": 0.9317650198936462, "eval_rewards/accuracies": 0.5252525210380554, "eval_rewards/chosen": -0.3623619079589844, "eval_rewards/margins": 0.08130191266536713, "eval_rewards/rejected": -0.4436637759208679, "eval_runtime": 26.0809, "eval_samples_per_second": 30.214, "eval_steps_per_second": 3.796, "step": 50 }, { "epoch": 0.04924086992203529, "grad_norm": 0.06638535112142563, "learning_rate": 4.980286753286196e-06, "logits/chosen": -2.145846128463745, "logits/rejected": -2.4077115058898926, "logps/chosen": -0.22265203297138214, "logps/rejected": -0.30774614214897156, "loss": 7.4605, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3339780271053314, "rewards/margins": 0.1276412308216095, "rewards/rejected": -0.4616192877292633, "step": 60 }, { "epoch": 0.057447681575707836, "grad_norm": 0.057281140238046646, "learning_rate": 4.973180832407471e-06, "logits/chosen": -2.0021350383758545, "logits/rejected": -2.4299912452697754, "logps/chosen": -0.23488977551460266, "logps/rejected": -0.33270469307899475, "loss": 7.4257, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3523346781730652, "rewards/margins": 0.14672236144542694, "rewards/rejected": -0.4990570545196533, "step": 70 }, { "epoch": 0.06565449322938038, "grad_norm": 0.07725922018289566, "learning_rate": 4.964990092676263e-06, "logits/chosen": -2.117995023727417, "logits/rejected": -2.359265089035034, "logps/chosen": -0.21598832309246063, "logps/rejected": -0.300583153963089, "loss": 7.4384, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.32398244738578796, "rewards/margins": 0.12689228355884552, "rewards/rejected": -0.4508747458457947, "step": 80 }, { "epoch": 0.07386130488305294, "grad_norm": 0.0598183274269104, "learning_rate": 4.9557181268217225e-06, "logits/chosen": -2.282627582550049, "logits/rejected": -2.441333532333374, "logps/chosen": -0.23655852675437927, "logps/rejected": -0.3246815800666809, "loss": 7.4584, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.3548378050327301, "rewards/margins": 0.13218457996845245, "rewards/rejected": -0.48702239990234375, "step": 90 }, { "epoch": 0.08206811653672548, "grad_norm": 0.058213479816913605, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -2.1114468574523926, "logits/rejected": -2.5035691261291504, "logps/chosen": -0.23073866963386536, "logps/rejected": -0.29445192217826843, "loss": 7.4116, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.34610801935195923, "rewards/margins": 0.09556989371776581, "rewards/rejected": -0.44167789816856384, "step": 100 }, { "epoch": 0.08206811653672548, "eval_logits/chosen": -2.0183491706848145, "eval_logits/rejected": -2.5400593280792236, "eval_logps/chosen": -0.20393377542495728, "eval_logps/rejected": -0.2818409502506256, "eval_loss": 0.9129964113235474, "eval_rewards/accuracies": 0.5656565427780151, "eval_rewards/chosen": -0.3059006631374359, "eval_rewards/margins": 0.11686072498559952, "eval_rewards/rejected": -0.4227614104747772, "eval_runtime": 26.0825, "eval_samples_per_second": 30.212, "eval_steps_per_second": 3.796, "step": 100 } ], "logging_steps": 10, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.722958225759273e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }