{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8235294117647056, "eval_steps": 500, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.47058823529411764, "grad_norm": 2.172867774963379, "learning_rate": 0.00019047619047619048, "logits/chosen": 0.8107720017433167, "logits/rejected": 0.8326998353004456, "logps/chosen": -179.76402282714844, "logps/rejected": -192.94955444335938, "loss": 0.6833, "rewards/accuracies": 0.390625, "rewards/chosen": 0.007948305457830429, "rewards/margins": 0.020603589713573456, "rewards/rejected": -0.012655285187065601, "step": 4 }, { "epoch": 0.9411764705882353, "grad_norm": 2.0105249881744385, "learning_rate": 0.00015238095238095237, "logits/chosen": 0.8822616338729858, "logits/rejected": 0.8548164367675781, "logps/chosen": -188.2321319580078, "logps/rejected": -180.1151123046875, "loss": 0.6218, "rewards/accuracies": 0.78125, "rewards/chosen": 0.049046244472265244, "rewards/margins": 0.16583411395549774, "rewards/rejected": -0.116787850856781, "step": 8 }, { "epoch": 1.4117647058823528, "grad_norm": 1.7585980892181396, "learning_rate": 0.00011428571428571428, "logits/chosen": 0.7230716347694397, "logits/rejected": 0.8427420854568481, "logps/chosen": -167.85791015625, "logps/rejected": -209.00662231445312, "loss": 0.4884, "rewards/accuracies": 0.859375, "rewards/chosen": 0.09655643254518509, "rewards/margins": 0.5457525253295898, "rewards/rejected": -0.44919610023498535, "step": 12 }, { "epoch": 1.8823529411764706, "grad_norm": 2.200401782989502, "learning_rate": 7.619047619047618e-05, "logits/chosen": 0.8320086002349854, "logits/rejected": 0.7734792232513428, "logps/chosen": -190.69061279296875, "logps/rejected": -176.31326293945312, "loss": 0.4096, "rewards/accuracies": 0.890625, "rewards/chosen": 0.26952454447746277, "rewards/margins": 0.9362796545028687, "rewards/rejected": -0.6667550802230835, "step": 16 }, { "epoch": 2.3529411764705883, "grad_norm": 2.271818161010742, "learning_rate": 3.809523809523809e-05, "logits/chosen": 0.8026203513145447, "logits/rejected": 0.8115992546081543, "logps/chosen": -177.355712890625, "logps/rejected": -186.3753662109375, "loss": 0.3558, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3177458345890045, "rewards/margins": 1.1793969869613647, "rewards/rejected": -0.8616511225700378, "step": 20 }, { "epoch": 2.8235294117647056, "grad_norm": 2.257908821105957, "learning_rate": 0.0, "logits/chosen": 0.7548601031303406, "logits/rejected": 0.7559518814086914, "logps/chosen": -172.9368133544922, "logps/rejected": -193.7064208984375, "loss": 0.3295, "rewards/accuracies": 0.8984375, "rewards/chosen": 0.3441559076309204, "rewards/margins": 1.3037660121917725, "rewards/rejected": -0.9596100449562073, "step": 24 } ], "logging_steps": 4, "max_steps": 24, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }