|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945, |
|
"eval_steps": 500, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.06976070111927, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.1194543838500977, |
|
"logits/rejected": -2.2610020637512207, |
|
"logps/chosen": -254.6973419189453, |
|
"logps/pi_response": -318.5512390136719, |
|
"logps/ref_response": -318.5512390136719, |
|
"logps/rejected": -224.19918823242188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.01968550655723, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.385725975036621, |
|
"logits/rejected": -2.3076765537261963, |
|
"logps/chosen": -218.1858673095703, |
|
"logps/pi_response": -268.26239013671875, |
|
"logps/ref_response": -266.3502197265625, |
|
"logps/rejected": -224.54605102539062, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.44871795177459717, |
|
"rewards/chosen": -0.006226478144526482, |
|
"rewards/margins": 0.0014922022819519043, |
|
"rewards/rejected": -0.007718680426478386, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.02312687332099, |
|
"learning_rate": 4.989490450759331e-07, |
|
"logits/chosen": -2.397501230239868, |
|
"logits/rejected": -2.3145182132720947, |
|
"logps/chosen": -228.54135131835938, |
|
"logps/pi_response": -300.3511047363281, |
|
"logps/ref_response": -260.5257873535156, |
|
"logps/rejected": -246.08518981933594, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5230769515037537, |
|
"rewards/chosen": -0.22692929208278656, |
|
"rewards/margins": 0.027445880696177483, |
|
"rewards/rejected": -0.254375159740448, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.496071851642174, |
|
"learning_rate": 4.872270441827174e-07, |
|
"logits/chosen": -2.2651801109313965, |
|
"logits/rejected": -2.206923007965088, |
|
"logps/chosen": -264.01336669921875, |
|
"logps/pi_response": -342.78717041015625, |
|
"logps/ref_response": -260.0426940917969, |
|
"logps/rejected": -266.82000732421875, |
|
"loss": 0.696, |
|
"rewards/accuracies": 0.4730769097805023, |
|
"rewards/chosen": -0.6173264980316162, |
|
"rewards/margins": 0.023560278117656708, |
|
"rewards/rejected": -0.6408867835998535, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.411127458960003, |
|
"learning_rate": 4.6308512113530063e-07, |
|
"logits/chosen": -2.4821219444274902, |
|
"logits/rejected": -2.384908437728882, |
|
"logps/chosen": -243.66514587402344, |
|
"logps/pi_response": -308.2298583984375, |
|
"logps/ref_response": -255.49522399902344, |
|
"logps/rejected": -256.4287109375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5538461804389954, |
|
"rewards/chosen": -0.3519066572189331, |
|
"rewards/margins": 0.04566844180226326, |
|
"rewards/rejected": -0.3975750505924225, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 14.517352316678341, |
|
"learning_rate": 4.277872161641681e-07, |
|
"logits/chosen": -2.545145273208618, |
|
"logits/rejected": -2.5242159366607666, |
|
"logps/chosen": -247.29806518554688, |
|
"logps/pi_response": -293.09326171875, |
|
"logps/ref_response": -275.4074401855469, |
|
"logps/rejected": -244.6110076904297, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.1450691968202591, |
|
"rewards/margins": 0.017832614481449127, |
|
"rewards/rejected": -0.16290180385112762, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.626546770334993, |
|
"learning_rate": 3.8318133624280046e-07, |
|
"logits/chosen": -2.511488676071167, |
|
"logits/rejected": -2.480903148651123, |
|
"logps/chosen": -257.10791015625, |
|
"logps/pi_response": -302.22210693359375, |
|
"logps/ref_response": -270.9198303222656, |
|
"logps/rejected": -252.54173278808594, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.48846152424812317, |
|
"rewards/chosen": -0.23286302387714386, |
|
"rewards/margins": 0.02372700721025467, |
|
"rewards/rejected": -0.2565900385379791, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.787453453048895, |
|
"learning_rate": 3.316028034595861e-07, |
|
"logits/chosen": -2.3820903301239014, |
|
"logits/rejected": -2.3555104732513428, |
|
"logps/chosen": -250.5824432373047, |
|
"logps/pi_response": -322.5419921875, |
|
"logps/ref_response": -270.929931640625, |
|
"logps/rejected": -260.3626403808594, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.557692289352417, |
|
"rewards/chosen": -0.37925985455513, |
|
"rewards/margins": 0.030029216781258583, |
|
"rewards/rejected": -0.40928906202316284, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 14.876878099596093, |
|
"learning_rate": 2.7575199021178855e-07, |
|
"logits/chosen": -2.463655948638916, |
|
"logits/rejected": -2.3770523071289062, |
|
"logps/chosen": -270.34197998046875, |
|
"logps/pi_response": -317.0186767578125, |
|
"logps/ref_response": -278.0060119628906, |
|
"logps/rejected": -269.89398193359375, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.5538461804389954, |
|
"rewards/chosen": -0.3352661728858948, |
|
"rewards/margins": 0.037235379219055176, |
|
"rewards/rejected": -0.37250155210494995, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.475251229694116, |
|
"learning_rate": 2.1855294234408068e-07, |
|
"logits/chosen": -2.37835693359375, |
|
"logits/rejected": -2.3291523456573486, |
|
"logps/chosen": -244.36837768554688, |
|
"logps/pi_response": -331.8450012207031, |
|
"logps/ref_response": -267.61846923828125, |
|
"logps/rejected": -280.1258544921875, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.5692307949066162, |
|
"rewards/chosen": -0.39472243189811707, |
|
"rewards/margins": 0.06756081432104111, |
|
"rewards/rejected": -0.46228325366973877, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.467183952290178, |
|
"learning_rate": 1.6300029195778453e-07, |
|
"logits/chosen": -2.353746175765991, |
|
"logits/rejected": -2.2994236946105957, |
|
"logps/chosen": -259.2685546875, |
|
"logps/pi_response": -356.6775817871094, |
|
"logps/ref_response": -270.3107604980469, |
|
"logps/rejected": -271.2638854980469, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.5769230723381042, |
|
"rewards/chosen": -0.4701143503189087, |
|
"rewards/margins": 0.08206918090581894, |
|
"rewards/rejected": -0.5521835088729858, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.52617676998604, |
|
"learning_rate": 1.1200247470632392e-07, |
|
"logits/chosen": -2.292710542678833, |
|
"logits/rejected": -2.357100486755371, |
|
"logps/chosen": -274.7545471191406, |
|
"logps/pi_response": -387.603515625, |
|
"logps/ref_response": -285.7787780761719, |
|
"logps/rejected": -272.741943359375, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.4923076927661896, |
|
"rewards/chosen": -0.5492157936096191, |
|
"rewards/margins": 0.018456529825925827, |
|
"rewards/rejected": -0.5676723718643188, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 23.08131985031319, |
|
"learning_rate": 6.822945986946385e-08, |
|
"logits/chosen": -2.2610812187194824, |
|
"logits/rejected": -2.1860787868499756, |
|
"logps/chosen": -282.33782958984375, |
|
"logps/pi_response": -372.6535949707031, |
|
"logps/ref_response": -265.4132080078125, |
|
"logps/rejected": -290.5935363769531, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.5461538434028625, |
|
"rewards/chosen": -0.5871608257293701, |
|
"rewards/margins": 0.05175128951668739, |
|
"rewards/rejected": -0.6389120817184448, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.961865819374246, |
|
"learning_rate": 3.397296523427806e-08, |
|
"logits/chosen": -2.2791786193847656, |
|
"logits/rejected": -2.31884503364563, |
|
"logps/chosen": -277.06085205078125, |
|
"logps/pi_response": -365.2495422363281, |
|
"logps/ref_response": -260.3804626464844, |
|
"logps/rejected": -295.7681884765625, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.5653846263885498, |
|
"rewards/chosen": -0.6707223057746887, |
|
"rewards/margins": 0.08594530820846558, |
|
"rewards/rejected": -0.7566676139831543, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.470744853394315, |
|
"learning_rate": 1.1026475173977978e-08, |
|
"logits/chosen": -2.3486456871032715, |
|
"logits/rejected": -2.201983690261841, |
|
"logps/chosen": -294.6012268066406, |
|
"logps/pi_response": -374.5638732910156, |
|
"logps/ref_response": -269.6314697265625, |
|
"logps/rejected": -284.3193359375, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.5653846263885498, |
|
"rewards/chosen": -0.683001697063446, |
|
"rewards/margins": 0.06894499808549881, |
|
"rewards/rejected": -0.751946747303009, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 26.50699233707175, |
|
"learning_rate": 5.913435276374834e-10, |
|
"logits/chosen": -2.3918538093566895, |
|
"logits/rejected": -2.34391450881958, |
|
"logps/chosen": -279.7143859863281, |
|
"logps/pi_response": -387.8644104003906, |
|
"logps/ref_response": -273.84423828125, |
|
"logps/rejected": -304.1023254394531, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.6153846383094788, |
|
"rewards/chosen": -0.6463515758514404, |
|
"rewards/margins": 0.1319323182106018, |
|
"rewards/rejected": -0.7782838940620422, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 153, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6840069897813734, |
|
"train_runtime": 41065.6381, |
|
"train_samples_per_second": 0.487, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|