|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996190476190476, |
|
"eval_steps": 100, |
|
"global_step": 451, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022164502164502164, |
|
"grad_norm": 2.910351514816284, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 1.5193, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04432900432900433, |
|
"grad_norm": 2.435637950897217, |
|
"learning_rate": 4.997674683630404e-06, |
|
"loss": 1.5122, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0664935064935065, |
|
"grad_norm": 2.5778419971466064, |
|
"learning_rate": 4.983480078908139e-06, |
|
"loss": 1.5007, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08865800865800866, |
|
"grad_norm": 3.0028574466705322, |
|
"learning_rate": 4.956455951420386e-06, |
|
"loss": 1.5252, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11082251082251082, |
|
"grad_norm": 2.561404228210449, |
|
"learning_rate": 4.916741906337227e-06, |
|
"loss": 1.4841, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.132987012987013, |
|
"grad_norm": 2.3713862895965576, |
|
"learning_rate": 4.864543104251587e-06, |
|
"loss": 1.4968, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15515151515151515, |
|
"grad_norm": 2.773104429244995, |
|
"learning_rate": 4.800129201330785e-06, |
|
"loss": 1.5239, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17731601731601732, |
|
"grad_norm": 2.6305017471313477, |
|
"learning_rate": 4.723832956287465e-06, |
|
"loss": 1.5138, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19948051948051948, |
|
"grad_norm": 2.8621950149536133, |
|
"learning_rate": 4.636048511366222e-06, |
|
"loss": 1.4801, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22164502164502164, |
|
"grad_norm": 2.79162335395813, |
|
"learning_rate": 4.537229356226238e-06, |
|
"loss": 1.509, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22164502164502164, |
|
"eval_dpo_accuracy": 0.4307142857142857, |
|
"eval_reward_accuracy": 0.8981428571428571, |
|
"eval_runtime": 77.5047, |
|
"eval_samples_per_second": 90.317, |
|
"eval_steps_per_second": 11.29, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2438095238095238, |
|
"grad_norm": 2.651031494140625, |
|
"learning_rate": 4.427885985238417e-06, |
|
"loss": 1.4879, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.265974025974026, |
|
"grad_norm": 2.7951595783233643, |
|
"learning_rate": 4.308583260299282e-06, |
|
"loss": 1.5208, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28813852813852814, |
|
"grad_norm": 2.712519407272339, |
|
"learning_rate": 4.179937492785189e-06, |
|
"loss": 1.5048, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3103030303030303, |
|
"grad_norm": 2.770768642425537, |
|
"learning_rate": 4.0426132597213485e-06, |
|
"loss": 1.5152, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33246753246753247, |
|
"grad_norm": 2.7763257026672363, |
|
"learning_rate": 3.897319970613119e-06, |
|
"loss": 1.5328, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35463203463203463, |
|
"grad_norm": 2.4849092960357666, |
|
"learning_rate": 3.7448082026751594e-06, |
|
"loss": 1.5193, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3767965367965368, |
|
"grad_norm": 2.8126847743988037, |
|
"learning_rate": 3.5858658233904335e-06, |
|
"loss": 1.4932, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39896103896103896, |
|
"grad_norm": 2.7220044136047363, |
|
"learning_rate": 3.4213139204297306e-06, |
|
"loss": 1.5076, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4211255411255411, |
|
"grad_norm": 2.623095750808716, |
|
"learning_rate": 3.2520025599574878e-06, |
|
"loss": 1.5336, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4432900432900433, |
|
"grad_norm": 2.739753484725952, |
|
"learning_rate": 3.078806395236313e-06, |
|
"loss": 1.5066, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4432900432900433, |
|
"eval_dpo_accuracy": 0.43028571428571427, |
|
"eval_reward_accuracy": 0.8991428571428571, |
|
"eval_runtime": 77.4491, |
|
"eval_samples_per_second": 90.382, |
|
"eval_steps_per_second": 11.298, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46545454545454545, |
|
"grad_norm": 2.728768825531006, |
|
"learning_rate": 2.9026201482158826e-06, |
|
"loss": 1.4946, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4876190476190476, |
|
"grad_norm": 2.8276684284210205, |
|
"learning_rate": 2.724353987448107e-06, |
|
"loss": 1.4973, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5097835497835498, |
|
"grad_norm": 2.889288902282715, |
|
"learning_rate": 2.5449288262059803e-06, |
|
"loss": 1.4954, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.531948051948052, |
|
"grad_norm": 2.7917494773864746, |
|
"learning_rate": 2.365271565095797e-06, |
|
"loss": 1.5146, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5541125541125541, |
|
"grad_norm": 2.705782890319824, |
|
"learning_rate": 2.186310303739121e-06, |
|
"loss": 1.522, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5762770562770563, |
|
"grad_norm": 2.7684695720672607, |
|
"learning_rate": 2.008969546260717e-06, |
|
"loss": 1.4967, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5984415584415584, |
|
"grad_norm": 2.9542436599731445, |
|
"learning_rate": 1.8341654253506098e-06, |
|
"loss": 1.4849, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6206060606060606, |
|
"grad_norm": 2.868518590927124, |
|
"learning_rate": 1.6628009695725348e-06, |
|
"loss": 1.5097, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6427705627705628, |
|
"grad_norm": 2.629920244216919, |
|
"learning_rate": 1.495761438367577e-06, |
|
"loss": 1.4932, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6649350649350649, |
|
"grad_norm": 2.820434331893921, |
|
"learning_rate": 1.3339097488521364e-06, |
|
"loss": 1.5209, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6649350649350649, |
|
"eval_dpo_accuracy": 0.4298571428571429, |
|
"eval_reward_accuracy": 0.8994285714285715, |
|
"eval_runtime": 77.4739, |
|
"eval_samples_per_second": 90.353, |
|
"eval_steps_per_second": 11.294, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6870995670995671, |
|
"grad_norm": 2.696439743041992, |
|
"learning_rate": 1.1780820180351262e-06, |
|
"loss": 1.4872, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7092640692640693, |
|
"grad_norm": 2.8368752002716064, |
|
"learning_rate": 1.029083243483064e-06, |
|
"loss": 1.5035, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 2.790517568588257, |
|
"learning_rate": 8.876831447465015e-07, |
|
"loss": 1.4983, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7535930735930736, |
|
"grad_norm": 2.788957118988037, |
|
"learning_rate": 7.546121870307743e-07, |
|
"loss": 1.51, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7757575757575758, |
|
"grad_norm": 2.6294381618499756, |
|
"learning_rate": 6.305578076525481e-07, |
|
"loss": 1.4995, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7979220779220779, |
|
"grad_norm": 2.827662467956543, |
|
"learning_rate": 5.161608647761013e-07, |
|
"loss": 1.4912, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8200865800865801, |
|
"grad_norm": 2.721107006072998, |
|
"learning_rate": 4.1201232677496846e-07, |
|
"loss": 1.5299, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8422510822510823, |
|
"grad_norm": 2.850194215774536, |
|
"learning_rate": 3.186502193215221e-07, |
|
"loss": 1.517, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8644155844155844, |
|
"grad_norm": 2.7145802974700928, |
|
"learning_rate": 2.3655684597566563e-07, |
|
"loss": 1.4859, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8865800865800866, |
|
"grad_norm": 2.9116082191467285, |
|
"learning_rate": 1.6615629663092492e-07, |
|
"loss": 1.4921, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8865800865800866, |
|
"eval_dpo_accuracy": 0.43, |
|
"eval_reward_accuracy": 0.899, |
|
"eval_runtime": 77.552, |
|
"eval_samples_per_second": 90.262, |
|
"eval_steps_per_second": 11.283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9087445887445887, |
|
"grad_norm": 2.624758243560791, |
|
"learning_rate": 1.078122566891654e-07, |
|
"loss": 1.5212, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9309090909090909, |
|
"grad_norm": 2.613771677017212, |
|
"learning_rate": 6.182612828162154e-08, |
|
"loss": 1.4931, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9530735930735931, |
|
"grad_norm": 2.632521390914917, |
|
"learning_rate": 2.843547324191259e-08, |
|
"loss": 1.512, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9752380952380952, |
|
"grad_norm": 2.524284601211548, |
|
"learning_rate": 7.81278587455303e-09, |
|
"loss": 1.4976, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9974025974025974, |
|
"grad_norm": 2.6475911140441895, |
|
"learning_rate": 6.460185877465997e-11, |
|
"loss": 1.5115, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 451, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|