|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.85951805804989, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.2437264919281006, |
|
"logits/rejected": -2.1319897174835205, |
|
"logps/chosen": -136.11781311035156, |
|
"logps/pi_response": -276.34149169921875, |
|
"logps/ref_response": -276.34149169921875, |
|
"logps/rejected": -134.32876586914062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.166671167458636, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.3832309246063232, |
|
"logits/rejected": -2.3636457920074463, |
|
"logps/chosen": -155.76785278320312, |
|
"logps/pi_response": -274.42333984375, |
|
"logps/ref_response": -272.425048828125, |
|
"logps/rejected": -158.64793395996094, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4270833432674408, |
|
"rewards/chosen": -0.00683738524094224, |
|
"rewards/margins": -0.00022508477559313178, |
|
"rewards/rejected": -0.006612300407141447, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.006144425101914, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -2.3142848014831543, |
|
"logits/rejected": -2.337123394012451, |
|
"logps/chosen": -169.91624450683594, |
|
"logps/pi_response": -305.30267333984375, |
|
"logps/ref_response": -275.4255065917969, |
|
"logps/rejected": -177.8936767578125, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.154428631067276, |
|
"rewards/margins": 0.013679690659046173, |
|
"rewards/rejected": -0.16810832917690277, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 25.3042204309977, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -2.33975887298584, |
|
"logits/rejected": -2.3151369094848633, |
|
"logps/chosen": -189.64102172851562, |
|
"logps/pi_response": -321.45294189453125, |
|
"logps/ref_response": -261.0726013183594, |
|
"logps/rejected": -194.24017333984375, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -0.33687421679496765, |
|
"rewards/margins": 0.005444393027573824, |
|
"rewards/rejected": -0.3423186242580414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 14.197120755569808, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -2.4677834510803223, |
|
"logits/rejected": -2.4844822883605957, |
|
"logps/chosen": -176.27413940429688, |
|
"logps/pi_response": -294.74114990234375, |
|
"logps/ref_response": -256.48724365234375, |
|
"logps/rejected": -179.17926025390625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.2434801161289215, |
|
"rewards/margins": -0.00023287050134968013, |
|
"rewards/rejected": -0.24324722588062286, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.243074464797877, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -2.445664882659912, |
|
"logits/rejected": -2.4546258449554443, |
|
"logps/chosen": -189.74288940429688, |
|
"logps/pi_response": -307.9224548339844, |
|
"logps/ref_response": -267.48931884765625, |
|
"logps/rejected": -200.6833038330078, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.2953701615333557, |
|
"rewards/margins": 0.01801210641860962, |
|
"rewards/rejected": -0.3133822977542877, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.31963304577925, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.477487087249756, |
|
"logits/rejected": -2.4785385131835938, |
|
"logps/chosen": -185.6737518310547, |
|
"logps/pi_response": -291.3546447753906, |
|
"logps/ref_response": -254.33984375, |
|
"logps/rejected": -188.54415893554688, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.26406130194664, |
|
"rewards/margins": 0.008015439845621586, |
|
"rewards/rejected": -0.2720767557621002, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.071293511713314, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -2.359812021255493, |
|
"logits/rejected": -2.355583429336548, |
|
"logps/chosen": -209.6289520263672, |
|
"logps/pi_response": -323.3484802246094, |
|
"logps/ref_response": -260.3892517089844, |
|
"logps/rejected": -210.88949584960938, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.5319920778274536, |
|
"rewards/margins": 0.012827059254050255, |
|
"rewards/rejected": -0.5448191165924072, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.843259909148866, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.4202404022216797, |
|
"logits/rejected": -2.362644910812378, |
|
"logps/chosen": -196.7368927001953, |
|
"logps/pi_response": -324.1815490722656, |
|
"logps/ref_response": -271.9532470703125, |
|
"logps/rejected": -200.54006958007812, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.402200847864151, |
|
"rewards/margins": 0.04481234401464462, |
|
"rewards/rejected": -0.4470131993293762, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.3973840448787, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -2.416743278503418, |
|
"logits/rejected": -2.436403751373291, |
|
"logps/chosen": -202.49813842773438, |
|
"logps/pi_response": -311.49896240234375, |
|
"logps/ref_response": -254.697509765625, |
|
"logps/rejected": -198.43460083007812, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3861594796180725, |
|
"rewards/margins": 0.029975295066833496, |
|
"rewards/rejected": -0.4161347448825836, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.031326931010696, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -2.3818562030792236, |
|
"logits/rejected": -2.393977403640747, |
|
"logps/chosen": -200.36917114257812, |
|
"logps/pi_response": -338.3829040527344, |
|
"logps/ref_response": -261.9479064941406, |
|
"logps/rejected": -211.229736328125, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5124812722206116, |
|
"rewards/margins": 0.037171028554439545, |
|
"rewards/rejected": -0.5496522188186646, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.411774484824894, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -2.4000236988067627, |
|
"logits/rejected": -2.2974681854248047, |
|
"logps/chosen": -204.61512756347656, |
|
"logps/pi_response": -330.70831298828125, |
|
"logps/ref_response": -250.0836639404297, |
|
"logps/rejected": -208.4372100830078, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5626230239868164, |
|
"rewards/margins": 0.039384625852108, |
|
"rewards/rejected": -0.6020076274871826, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.901638390226227, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -2.2554521560668945, |
|
"logits/rejected": -2.3232483863830566, |
|
"logps/chosen": -223.6305694580078, |
|
"logps/pi_response": -362.30291748046875, |
|
"logps/ref_response": -276.22747802734375, |
|
"logps/rejected": -226.0797119140625, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.6587863564491272, |
|
"rewards/margins": 0.01464476902037859, |
|
"rewards/rejected": -0.6734310984611511, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.415238088814565, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -2.2723705768585205, |
|
"logits/rejected": -2.223027229309082, |
|
"logps/chosen": -215.4403839111328, |
|
"logps/pi_response": -351.7759094238281, |
|
"logps/ref_response": -266.7939453125, |
|
"logps/rejected": -221.5651397705078, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.6349462270736694, |
|
"rewards/margins": 0.02343112602829933, |
|
"rewards/rejected": -0.6583773493766785, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.415730450012774, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -2.319291591644287, |
|
"logits/rejected": -2.372563600540161, |
|
"logps/chosen": -211.4011688232422, |
|
"logps/pi_response": -335.5188903808594, |
|
"logps/ref_response": -254.0056915283203, |
|
"logps/rejected": -212.92123413085938, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.5842172503471375, |
|
"rewards/margins": 0.024492263793945312, |
|
"rewards/rejected": -0.6087095141410828, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.173628365681967, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -2.2677788734436035, |
|
"logits/rejected": -2.2873096466064453, |
|
"logps/chosen": -209.3054656982422, |
|
"logps/pi_response": -347.63079833984375, |
|
"logps/ref_response": -265.3609313964844, |
|
"logps/rejected": -218.402587890625, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5842490196228027, |
|
"rewards/margins": 0.023630866780877113, |
|
"rewards/rejected": -0.6078798770904541, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.688222443828216, |
|
"train_runtime": 31827.1935, |
|
"train_samples_per_second": 0.628, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|