|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9965977492802931, |
|
"eval_steps": 100, |
|
"global_step": 238, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.852463483810425, |
|
"logits/rejected": -2.8067848682403564, |
|
"logps/chosen": -307.2070617675781, |
|
"logps/rejected": -292.9268493652344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.7673232555389404, |
|
"logits/rejected": -2.714327096939087, |
|
"logps/chosen": -278.9463806152344, |
|
"logps/rejected": -269.96435546875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5086805820465088, |
|
"rewards/chosen": 0.0003373799263499677, |
|
"rewards/margins": 0.0005398019566200674, |
|
"rewards/rejected": -0.0002024220593739301, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.789074659347534, |
|
"logits/rejected": -2.739534378051758, |
|
"logps/chosen": -288.80340576171875, |
|
"logps/rejected": -252.3999481201172, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6546875238418579, |
|
"rewards/chosen": 0.003567395731806755, |
|
"rewards/margins": 0.00781105924397707, |
|
"rewards/rejected": -0.004243663977831602, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.99030821197584e-06, |
|
"logits/chosen": -2.7394039630889893, |
|
"logits/rejected": -2.6750001907348633, |
|
"logps/chosen": -277.71734619140625, |
|
"logps/rejected": -260.70751953125, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.015827985480427742, |
|
"rewards/margins": 0.02965703047811985, |
|
"rewards/rejected": -0.013829047791659832, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931352528237398e-06, |
|
"logits/chosen": -2.73751163482666, |
|
"logits/rejected": -2.684286594390869, |
|
"logps/chosen": -285.98248291015625, |
|
"logps/rejected": -277.09478759765625, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.02657519280910492, |
|
"rewards/margins": 0.0554216094315052, |
|
"rewards/rejected": -0.028846416622400284, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820092227512736e-06, |
|
"logits/chosen": -2.729034900665283, |
|
"logits/rejected": -2.6729769706726074, |
|
"logps/chosen": -275.21514892578125, |
|
"logps/rejected": -280.52362060546875, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.01229151152074337, |
|
"rewards/margins": 0.09265317022800446, |
|
"rewards/rejected": -0.10494468361139297, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.658920803689553e-06, |
|
"logits/chosen": -2.6756510734558105, |
|
"logits/rejected": -2.621499538421631, |
|
"logps/chosen": -280.72991943359375, |
|
"logps/rejected": -274.62103271484375, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": -0.04894893616437912, |
|
"rewards/margins": 0.1295410692691803, |
|
"rewards/rejected": -0.17849001288414001, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451305466682615e-06, |
|
"logits/chosen": -2.6422786712646484, |
|
"logits/rejected": -2.585517644882202, |
|
"logps/chosen": -285.8612365722656, |
|
"logps/rejected": -291.7132873535156, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.10629353672266006, |
|
"rewards/margins": 0.18995100259780884, |
|
"rewards/rejected": -0.2962445318698883, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2017125538726574e-06, |
|
"logits/chosen": -2.660236120223999, |
|
"logits/rejected": -2.5804734230041504, |
|
"logps/chosen": -292.82135009765625, |
|
"logps/rejected": -288.45306396484375, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13801410794258118, |
|
"rewards/margins": 0.2476145476102829, |
|
"rewards/rejected": -0.3856286406517029, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.915511447755793e-06, |
|
"logits/chosen": -2.6195216178894043, |
|
"logits/rejected": -2.5578157901763916, |
|
"logps/chosen": -293.1012878417969, |
|
"logps/rejected": -303.3466796875, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.6734374761581421, |
|
"rewards/chosen": -0.19188150763511658, |
|
"rewards/margins": 0.25040262937545776, |
|
"rewards/rejected": -0.44228416681289673, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5988590667807542e-06, |
|
"logits/chosen": -2.614781618118286, |
|
"logits/rejected": -2.5338780879974365, |
|
"logps/chosen": -307.21429443359375, |
|
"logps/rejected": -317.61480712890625, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.21577294170856476, |
|
"rewards/margins": 0.3157234787940979, |
|
"rewards/rejected": -0.5314964056015015, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.6026527881622314, |
|
"eval_logits/rejected": -2.5165884494781494, |
|
"eval_logps/chosen": -309.4196472167969, |
|
"eval_logps/rejected": -308.91461181640625, |
|
"eval_loss": 0.6025363802909851, |
|
"eval_rewards/accuracies": 0.6940000057220459, |
|
"eval_rewards/chosen": -0.25379911065101624, |
|
"eval_rewards/margins": 0.26018837094306946, |
|
"eval_rewards/rejected": -0.5139874815940857, |
|
"eval_runtime": 384.168, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2585674142717483e-06, |
|
"logits/chosen": -2.6301052570343018, |
|
"logits/rejected": -2.5181527137756348, |
|
"logps/chosen": -304.8443603515625, |
|
"logps/rejected": -305.76165771484375, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.2113591432571411, |
|
"rewards/margins": 0.2960760295391083, |
|
"rewards/rejected": -0.507435142993927, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.901957034798671e-06, |
|
"logits/chosen": -2.571101665496826, |
|
"logits/rejected": -2.4931883811950684, |
|
"logps/chosen": -299.7984313964844, |
|
"logps/rejected": -306.89056396484375, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.281256765127182, |
|
"rewards/margins": 0.25873565673828125, |
|
"rewards/rejected": -0.5399924516677856, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.536699530523292e-06, |
|
"logits/chosen": -2.564572811126709, |
|
"logits/rejected": -2.4486539363861084, |
|
"logps/chosen": -308.9500732421875, |
|
"logps/rejected": -306.9311828613281, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.2418045997619629, |
|
"rewards/margins": 0.36538395285606384, |
|
"rewards/rejected": -0.6071885228157043, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1706525253979533e-06, |
|
"logits/chosen": -2.544403553009033, |
|
"logits/rejected": -2.417354106903076, |
|
"logps/chosen": -327.3999328613281, |
|
"logps/rejected": -330.9579162597656, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.29655271768569946, |
|
"rewards/margins": 0.36625009775161743, |
|
"rewards/rejected": -0.6628028750419617, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.811690627559351e-06, |
|
"logits/chosen": -2.4691693782806396, |
|
"logits/rejected": -2.3947396278381348, |
|
"logps/chosen": -303.43988037109375, |
|
"logps/rejected": -326.5160217285156, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.25864046812057495, |
|
"rewards/margins": 0.34002798795700073, |
|
"rewards/rejected": -0.5986684560775757, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4675360263490296e-06, |
|
"logits/chosen": -2.4801511764526367, |
|
"logits/rejected": -2.3981406688690186, |
|
"logps/chosen": -303.1156005859375, |
|
"logps/rejected": -314.0051574707031, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.25856736302375793, |
|
"rewards/margins": 0.3619126081466675, |
|
"rewards/rejected": -0.620479941368103, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1455923682523476e-06, |
|
"logits/chosen": -2.462503671646118, |
|
"logits/rejected": -2.3446600437164307, |
|
"logps/chosen": -325.72021484375, |
|
"logps/rejected": -326.17205810546875, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.3383588194847107, |
|
"rewards/margins": 0.36354926228523254, |
|
"rewards/rejected": -0.7019080519676208, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.527854855097226e-07, |
|
"logits/chosen": -2.4440627098083496, |
|
"logits/rejected": -2.290944814682007, |
|
"logps/chosen": -310.25933837890625, |
|
"logps/rejected": -306.5115966796875, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.32932430505752563, |
|
"rewards/margins": 0.3963031768798828, |
|
"rewards/rejected": -0.7256274223327637, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.954144037354645e-07, |
|
"logits/chosen": -2.4063987731933594, |
|
"logits/rejected": -2.3209547996520996, |
|
"logps/chosen": -316.28802490234375, |
|
"logps/rejected": -324.861083984375, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.3298575282096863, |
|
"rewards/margins": 0.40440693497657776, |
|
"rewards/rejected": -0.7342644929885864, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7901583375171277e-07, |
|
"logits/chosen": -2.37199068069458, |
|
"logits/rejected": -2.2828264236450195, |
|
"logps/chosen": -322.8636169433594, |
|
"logps/rejected": -347.17742919921875, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.3475632071495056, |
|
"rewards/margins": 0.3942633271217346, |
|
"rewards/rejected": -0.7418265342712402, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.372931718826294, |
|
"eval_logits/rejected": -2.2597038745880127, |
|
"eval_logps/chosen": -323.44244384765625, |
|
"eval_logps/rejected": -332.209228515625, |
|
"eval_loss": 0.5787754058837891, |
|
"eval_rewards/accuracies": 0.7200000286102295, |
|
"eval_rewards/chosen": -0.39402660727500916, |
|
"eval_rewards/margins": 0.35290706157684326, |
|
"eval_rewards/rejected": -0.74693363904953, |
|
"eval_runtime": 384.1102, |
|
"eval_samples_per_second": 5.207, |
|
"eval_steps_per_second": 0.651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0824506276503898e-07, |
|
"logits/chosen": -2.346809148788452, |
|
"logits/rejected": -2.259718656539917, |
|
"logps/chosen": -330.906005859375, |
|
"logps/rejected": -344.9519348144531, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": -0.37789514660835266, |
|
"rewards/margins": 0.33273065090179443, |
|
"rewards/rejected": -0.7106258273124695, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.677580722139673e-08, |
|
"logits/chosen": -2.3068695068359375, |
|
"logits/rejected": -2.2727885246276855, |
|
"logps/chosen": -312.369140625, |
|
"logps/rejected": -334.8897399902344, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.40535077452659607, |
|
"rewards/margins": 0.3713846206665039, |
|
"rewards/rejected": -0.7767353653907776, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7221181760899153e-08, |
|
"logits/chosen": -2.347468852996826, |
|
"logits/rejected": -2.2807364463806152, |
|
"logps/chosen": -321.6934814453125, |
|
"logps/rejected": -344.81976318359375, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": -0.3814999759197235, |
|
"rewards/margins": 0.3564358651638031, |
|
"rewards/rejected": -0.7379359006881714, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 238, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6080600586758942, |
|
"train_runtime": 20828.337, |
|
"train_samples_per_second": 2.935, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 238, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|