|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.968, |
|
"eval_steps": 100, |
|
"global_step": 248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"logits/chosen": 0.7459792494773865, |
|
"logits/rejected": 0.8918710947036743, |
|
"logps/chosen": -76.09617614746094, |
|
"logps/rejected": -62.01979064941406, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": 0.6576791405677795, |
|
"logits/rejected": 0.7277867197990417, |
|
"logps/chosen": -80.96837615966797, |
|
"logps/rejected": -67.04137420654297, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.0003823735751211643, |
|
"rewards/margins": 0.000583351356908679, |
|
"rewards/rejected": -0.00020097770902793854, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.6884250044822693, |
|
"logits/rejected": 0.7408124804496765, |
|
"logps/chosen": -90.96418762207031, |
|
"logps/rejected": -76.06710815429688, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -5.052227152191335e-06, |
|
"rewards/margins": -0.0004625328874681145, |
|
"rewards/rejected": 0.0004574806080199778, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.993800445762451e-06, |
|
"logits/chosen": 0.753118634223938, |
|
"logits/rejected": 0.815566897392273, |
|
"logps/chosen": -75.56269836425781, |
|
"logps/rejected": -62.7692756652832, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0003265980340074748, |
|
"rewards/margins": 0.00027744597173295915, |
|
"rewards/rejected": 4.915207318845205e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.944388344834205e-06, |
|
"logits/chosen": 0.6737275719642639, |
|
"logits/rejected": 0.7539814710617065, |
|
"logps/chosen": -99.6846694946289, |
|
"logps/rejected": -84.5612564086914, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0005740473279729486, |
|
"rewards/margins": 0.00023269152734428644, |
|
"rewards/rejected": 0.00034135582973249257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8465431931347904e-06, |
|
"logits/chosen": 0.6654059290885925, |
|
"logits/rejected": 0.7606115341186523, |
|
"logps/chosen": -92.47711181640625, |
|
"logps/rejected": -79.58221435546875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0007205186993815005, |
|
"rewards/margins": 0.0004779071605298668, |
|
"rewards/rejected": 0.00024261146609205753, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.702203692102539e-06, |
|
"logits/chosen": 0.5616599321365356, |
|
"logits/rejected": 0.6836065649986267, |
|
"logps/chosen": -78.09901428222656, |
|
"logps/rejected": -59.116851806640625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0014349878765642643, |
|
"rewards/margins": 0.0005719334003515542, |
|
"rewards/rejected": 0.000863054592628032, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.514229781074239e-06, |
|
"logits/chosen": 0.6593050360679626, |
|
"logits/rejected": 0.7536520957946777, |
|
"logps/chosen": -76.82511901855469, |
|
"logps/rejected": -66.14640808105469, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.0013633746420964599, |
|
"rewards/margins": 1.1641532356165829e-11, |
|
"rewards/rejected": 0.001363374525681138, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.286345970517195e-06, |
|
"logits/chosen": 0.7199975252151489, |
|
"logits/rejected": 0.7695997953414917, |
|
"logps/chosen": -95.65029907226562, |
|
"logps/rejected": -79.29694366455078, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.002262389287352562, |
|
"rewards/margins": 0.00043650128645822406, |
|
"rewards/rejected": 0.0018258880591019988, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.023067544670082e-06, |
|
"logits/chosen": 0.6014922261238098, |
|
"logits/rejected": 0.7544792890548706, |
|
"logps/chosen": -85.02667999267578, |
|
"logps/rejected": -71.08529663085938, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0019673267379403114, |
|
"rewards/margins": 0.0005049472092650831, |
|
"rewards/rejected": 0.0014623795868828893, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.7296110958116845e-06, |
|
"logits/chosen": 0.6225594282150269, |
|
"logits/rejected": 0.6867057085037231, |
|
"logps/chosen": -84.804931640625, |
|
"logps/rejected": -70.72709655761719, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.001966786338016391, |
|
"rewards/margins": 0.00019253513892181218, |
|
"rewards/rejected": 0.0017742514610290527, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": 0.8210488557815552, |
|
"eval_logits/rejected": 0.8785973787307739, |
|
"eval_logps/chosen": -256.8260803222656, |
|
"eval_logps/rejected": -233.76206970214844, |
|
"eval_loss": 0.0011570560745894909, |
|
"eval_rewards/accuracies": 0.5049999952316284, |
|
"eval_rewards/chosen": -0.00211901543661952, |
|
"eval_rewards/margins": 0.00011873205221490934, |
|
"eval_rewards/rejected": -0.0022377476561814547, |
|
"eval_runtime": 840.5697, |
|
"eval_samples_per_second": 2.379, |
|
"eval_steps_per_second": 0.595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.4117911628292944e-06, |
|
"logits/chosen": 0.6016367673873901, |
|
"logits/rejected": 0.6993056535720825, |
|
"logps/chosen": -87.06333923339844, |
|
"logps/rejected": -73.28208923339844, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0032501216046512127, |
|
"rewards/margins": 0.0010317874839529395, |
|
"rewards/rejected": 0.0022183340042829514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.075905022087675e-06, |
|
"logits/chosen": 0.6844900250434875, |
|
"logits/rejected": 0.7173089981079102, |
|
"logps/chosen": -87.12005615234375, |
|
"logps/rejected": -70.78279876708984, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.003416379215195775, |
|
"rewards/margins": 0.0013540134532377124, |
|
"rewards/rejected": 0.0020623658783733845, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.728607913349464e-06, |
|
"logits/chosen": 0.6243213415145874, |
|
"logits/rejected": 0.6759988069534302, |
|
"logps/chosen": -84.78315734863281, |
|
"logps/rejected": -69.31483459472656, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.003974142484366894, |
|
"rewards/margins": 0.00143245083745569, |
|
"rewards/rejected": 0.0025416917633265257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.376781173017589e-06, |
|
"logits/chosen": 0.6302226185798645, |
|
"logits/rejected": 0.6924747824668884, |
|
"logps/chosen": -71.3794174194336, |
|
"logps/rejected": -64.13245391845703, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.003757910802960396, |
|
"rewards/margins": 0.001318571506999433, |
|
"rewards/rejected": 0.0024393394123762846, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0273958875043877e-06, |
|
"logits/chosen": 0.6476806402206421, |
|
"logits/rejected": 0.7262701988220215, |
|
"logps/chosen": -93.0534896850586, |
|
"logps/rejected": -75.62432861328125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004165604244917631, |
|
"rewards/margins": 0.0014166636392474174, |
|
"rewards/rejected": 0.0027489408385008574, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.6873747682962393e-06, |
|
"logits/chosen": 0.6091146469116211, |
|
"logits/rejected": 0.6964036226272583, |
|
"logps/chosen": -101.43008422851562, |
|
"logps/rejected": -83.43622589111328, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.005559985991567373, |
|
"rewards/margins": 0.001756802899762988, |
|
"rewards/rejected": 0.0038031828589737415, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.363454985517803e-06, |
|
"logits/chosen": 0.6119644045829773, |
|
"logits/rejected": 0.6556802988052368, |
|
"logps/chosen": -70.76570129394531, |
|
"logps/rejected": -60.847434997558594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004071240313351154, |
|
"rewards/margins": 0.0009927384089678526, |
|
"rewards/rejected": 0.0030785012058913708, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.062054677808238e-06, |
|
"logits/chosen": 0.6605072021484375, |
|
"logits/rejected": 0.7174011468887329, |
|
"logps/chosen": -83.5826416015625, |
|
"logps/rejected": -65.7984619140625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0050515844486653805, |
|
"rewards/margins": 0.0010326830670237541, |
|
"rewards/rejected": 0.004018902312964201, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.891457834794711e-07, |
|
"logits/chosen": 0.6303926706314087, |
|
"logits/rejected": 0.7238384485244751, |
|
"logps/chosen": -81.99962615966797, |
|
"logps/rejected": -67.91422271728516, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.005765286739915609, |
|
"rewards/margins": 0.001600590767338872, |
|
"rewards/rejected": 0.004164696671068668, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.501357126768117e-07, |
|
"logits/chosen": 0.6904739141464233, |
|
"logits/rejected": 0.7569997310638428, |
|
"logps/chosen": -91.30878448486328, |
|
"logps/rejected": -78.43734741210938, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.006007182411849499, |
|
"rewards/margins": 0.0020701445173472166, |
|
"rewards/rejected": 0.0039370376616716385, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_logits/chosen": 0.8083356618881226, |
|
"eval_logits/rejected": 0.8659967184066772, |
|
"eval_logps/chosen": -257.021484375, |
|
"eval_logps/rejected": -233.9424285888672, |
|
"eval_loss": 0.0011878832010552287, |
|
"eval_rewards/accuracies": 0.5009999871253967, |
|
"eval_rewards/chosen": -0.004072962328791618, |
|
"eval_rewards/margins": -3.138141255476512e-05, |
|
"eval_rewards/rejected": -0.004041580483317375, |
|
"eval_runtime": 749.8692, |
|
"eval_samples_per_second": 2.667, |
|
"eval_steps_per_second": 0.667, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.4976020508682345e-07, |
|
"logits/chosen": 0.5687087178230286, |
|
"logits/rejected": 0.6581428647041321, |
|
"logps/chosen": -82.01387023925781, |
|
"logps/rejected": -71.76741790771484, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.005611591041088104, |
|
"rewards/margins": 0.001832630136050284, |
|
"rewards/rejected": 0.003778961021453142, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.9198949610721273e-07, |
|
"logits/chosen": 0.6064807176589966, |
|
"logits/rejected": 0.6868988275527954, |
|
"logps/chosen": -88.08094787597656, |
|
"logps/rejected": -67.92098236083984, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.005776416976004839, |
|
"rewards/margins": 0.0024658578913658857, |
|
"rewards/rejected": 0.003310559317469597, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.994965069994143e-08, |
|
"logits/chosen": 0.6825326681137085, |
|
"logits/rejected": 0.7606233954429626, |
|
"logps/chosen": -90.36261749267578, |
|
"logps/rejected": -74.85285949707031, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.005841919686645269, |
|
"rewards/margins": 0.0014079047832638025, |
|
"rewards/rejected": 0.004434015601873398, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5860623616664183e-08, |
|
"logits/chosen": 0.6195131540298462, |
|
"logits/rejected": 0.6464000940322876, |
|
"logps/chosen": -86.05440521240234, |
|
"logps/rejected": -73.87252044677734, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006215591914951801, |
|
"rewards/margins": 0.0018872864311560988, |
|
"rewards/rejected": 0.004328305833041668, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"step": 248, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0010566485528421077, |
|
"train_runtime": 4635.3882, |
|
"train_samples_per_second": 0.863, |
|
"train_steps_per_second": 0.054 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|