NicholasCorrado's picture
Model save
3c44280 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9983492901947838,
"eval_steps": 1000,
"global_step": 378,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002641135688345989,
"grad_norm": 0.9479714304489147,
"learning_rate": 1.3157894736842104e-08,
"logits/chosen": -2.2716729640960693,
"logits/rejected": -2.241565704345703,
"logps/chosen": -156.80194091796875,
"logps/rejected": -147.06320190429688,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02641135688345989,
"grad_norm": 0.9657211466886476,
"learning_rate": 1.3157894736842104e-07,
"logits/chosen": -2.2696707248687744,
"logits/rejected": -2.2592086791992188,
"logps/chosen": -173.62896728515625,
"logps/rejected": -168.29458618164062,
"loss": 0.6931,
"rewards/accuracies": 0.4470486044883728,
"rewards/chosen": 0.00015925339539535344,
"rewards/margins": 9.125900396611542e-05,
"rewards/rejected": 6.799438415328041e-05,
"step": 10
},
{
"epoch": 0.05282271376691978,
"grad_norm": 0.952148373000707,
"learning_rate": 2.631578947368421e-07,
"logits/chosen": -2.2733893394470215,
"logits/rejected": -2.2671706676483154,
"logps/chosen": -169.05018615722656,
"logps/rejected": -169.22433471679688,
"loss": 0.6931,
"rewards/accuracies": 0.522656261920929,
"rewards/chosen": -9.141029295278713e-05,
"rewards/margins": 0.0001715569815132767,
"rewards/rejected": -0.00026296728174202144,
"step": 20
},
{
"epoch": 0.07923407065037966,
"grad_norm": 0.9659022104815798,
"learning_rate": 3.9473684210526315e-07,
"logits/chosen": -2.2659945487976074,
"logits/rejected": -2.2476842403411865,
"logps/chosen": -178.9219512939453,
"logps/rejected": -169.46163940429688,
"loss": 0.693,
"rewards/accuracies": 0.5289062261581421,
"rewards/chosen": -0.0015892453957349062,
"rewards/margins": 0.0002000469685299322,
"rewards/rejected": -0.0017892923206090927,
"step": 30
},
{
"epoch": 0.10564542753383956,
"grad_norm": 0.9273301854247932,
"learning_rate": 4.999573126145131e-07,
"logits/chosen": -2.2755210399627686,
"logits/rejected": -2.2551403045654297,
"logps/chosen": -179.63047790527344,
"logps/rejected": -163.6714630126953,
"loss": 0.6926,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0064210728742182255,
"rewards/margins": 0.0012297846842557192,
"rewards/rejected": -0.007650857325643301,
"step": 40
},
{
"epoch": 0.13205678441729943,
"grad_norm": 0.9799377076485188,
"learning_rate": 4.984647842238184e-07,
"logits/chosen": -2.28324294090271,
"logits/rejected": -2.2799413204193115,
"logps/chosen": -168.7937469482422,
"logps/rejected": -170.1733856201172,
"loss": 0.6917,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": -0.0193162951618433,
"rewards/margins": 0.0024972439277917147,
"rewards/rejected": -0.021813539788126945,
"step": 50
},
{
"epoch": 0.1584681413007593,
"grad_norm": 0.9599459222723423,
"learning_rate": 4.948524419003415e-07,
"logits/chosen": -2.2814371585845947,
"logits/rejected": -2.273639440536499,
"logps/chosen": -173.1953582763672,
"logps/rejected": -171.6744384765625,
"loss": 0.6911,
"rewards/accuracies": 0.547656238079071,
"rewards/chosen": -0.03955007344484329,
"rewards/margins": 0.00362972030416131,
"rewards/rejected": -0.043179791420698166,
"step": 60
},
{
"epoch": 0.18487949818421923,
"grad_norm": 0.9816456439800273,
"learning_rate": 4.891511048751102e-07,
"logits/chosen": -2.3176677227020264,
"logits/rejected": -2.3122127056121826,
"logps/chosen": -177.39488220214844,
"logps/rejected": -167.91244506835938,
"loss": 0.6896,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.06663568317890167,
"rewards/margins": 0.008323188871145248,
"rewards/rejected": -0.07495887577533722,
"step": 70
},
{
"epoch": 0.2112908550676791,
"grad_norm": 1.0368469143747399,
"learning_rate": 4.81409414945389e-07,
"logits/chosen": -2.342482328414917,
"logits/rejected": -2.3203022480010986,
"logps/chosen": -195.66636657714844,
"logps/rejected": -187.387939453125,
"loss": 0.6878,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.08949045091867447,
"rewards/margins": 0.01014741975814104,
"rewards/rejected": -0.09963786602020264,
"step": 80
},
{
"epoch": 0.237702211951139,
"grad_norm": 1.0442549418705969,
"learning_rate": 4.7169342148001546e-07,
"logits/chosen": -2.3382809162139893,
"logits/rejected": -2.3266994953155518,
"logps/chosen": -195.09518432617188,
"logps/rejected": -182.37796020507812,
"loss": 0.6874,
"rewards/accuracies": 0.573437511920929,
"rewards/chosen": -0.11916174739599228,
"rewards/margins": 0.014569459483027458,
"rewards/rejected": -0.13373121619224548,
"step": 90
},
{
"epoch": 0.26411356883459886,
"grad_norm": 1.064035782351039,
"learning_rate": 4.6008601790947314e-07,
"logits/chosen": -2.321099042892456,
"logits/rejected": -2.319929838180542,
"logps/chosen": -187.78164672851562,
"logps/rejected": -180.61904907226562,
"loss": 0.6849,
"rewards/accuracies": 0.5726562738418579,
"rewards/chosen": -0.13006748259067535,
"rewards/margins": 0.01908385381102562,
"rewards/rejected": -0.14915132522583008,
"step": 100
},
{
"epoch": 0.29052492571805877,
"grad_norm": 1.2007773205984222,
"learning_rate": 4.466862345083708e-07,
"logits/chosen": -2.321152925491333,
"logits/rejected": -2.3174567222595215,
"logps/chosen": -189.49288940429688,
"logps/rejected": -185.0203094482422,
"loss": 0.6838,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": -0.15054509043693542,
"rewards/margins": 0.022373218089342117,
"rewards/rejected": -0.17291830480098724,
"step": 110
},
{
"epoch": 0.3169362826015186,
"grad_norm": 1.4026602647859983,
"learning_rate": 4.3160839350405605e-07,
"logits/chosen": -2.310743808746338,
"logits/rejected": -2.309847831726074,
"logps/chosen": -188.81431579589844,
"logps/rejected": -188.99505615234375,
"loss": 0.6824,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.16854415833950043,
"rewards/margins": 0.026360681280493736,
"rewards/rejected": -0.19490481913089752,
"step": 120
},
{
"epoch": 0.34334763948497854,
"grad_norm": 1.2567707162408688,
"learning_rate": 4.149811337196807e-07,
"logits/chosen": -2.3110547065734863,
"logits/rejected": -2.302724599838257,
"logps/chosen": -201.25418090820312,
"logps/rejected": -193.19810485839844,
"loss": 0.6801,
"rewards/accuracies": 0.5703125,
"rewards/chosen": -0.20925810933113098,
"rewards/margins": 0.028272386640310287,
"rewards/rejected": -0.23753049969673157,
"step": 130
},
{
"epoch": 0.36975899636843845,
"grad_norm": 1.3171163187862418,
"learning_rate": 3.9694631307311825e-07,
"logits/chosen": -2.310455560684204,
"logits/rejected": -2.3021938800811768,
"logps/chosen": -201.97586059570312,
"logps/rejected": -196.26681518554688,
"loss": 0.6814,
"rewards/accuracies": 0.5679687261581421,
"rewards/chosen": -0.267736554145813,
"rewards/margins": 0.03569976985454559,
"rewards/rejected": -0.3034363389015198,
"step": 140
},
{
"epoch": 0.3961703532518983,
"grad_norm": 1.362073777172477,
"learning_rate": 3.776577982952267e-07,
"logits/chosen": -2.2907283306121826,
"logits/rejected": -2.2906551361083984,
"logps/chosen": -201.7469482421875,
"logps/rejected": -202.30398559570312,
"loss": 0.6805,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.3111319839954376,
"rewards/margins": 0.03633497655391693,
"rewards/rejected": -0.34746694564819336,
"step": 150
},
{
"epoch": 0.4225817101353582,
"grad_norm": 2.093940167532845,
"learning_rate": 3.572801521931522e-07,
"logits/chosen": -2.309051752090454,
"logits/rejected": -2.302381992340088,
"logps/chosen": -203.4032745361328,
"logps/rejected": -199.6255340576172,
"loss": 0.6787,
"rewards/accuracies": 0.577343761920929,
"rewards/chosen": -0.34017449617385864,
"rewards/margins": 0.04339217022061348,
"rewards/rejected": -0.38356661796569824,
"step": 160
},
{
"epoch": 0.4489930670188181,
"grad_norm": 1.9348127254360297,
"learning_rate": 3.35987229658482e-07,
"logits/chosen": -2.3065199851989746,
"logits/rejected": -2.2967591285705566,
"logps/chosen": -206.21694946289062,
"logps/rejected": -201.9674835205078,
"loss": 0.6763,
"rewards/accuracies": 0.5726562738418579,
"rewards/chosen": -0.3637041449546814,
"rewards/margins": 0.04687776044011116,
"rewards/rejected": -0.41058191657066345,
"step": 170
},
{
"epoch": 0.475404423902278,
"grad_norm": 1.6138340474053996,
"learning_rate": 3.139606943986089e-07,
"logits/chosen": -2.295161247253418,
"logits/rejected": -2.2992234230041504,
"logps/chosen": -216.4457550048828,
"logps/rejected": -214.0793914794922,
"loss": 0.6756,
"rewards/accuracies": 0.5601562261581421,
"rewards/chosen": -0.4113912582397461,
"rewards/margins": 0.0445592924952507,
"rewards/rejected": -0.4559505581855774,
"step": 180
},
{
"epoch": 0.5018157807857379,
"grad_norm": 1.8694312571116225,
"learning_rate": 2.913884690460325e-07,
"logits/chosen": -2.332610607147217,
"logits/rejected": -2.3261446952819824,
"logps/chosen": -223.5069122314453,
"logps/rejected": -212.2234344482422,
"loss": 0.6734,
"rewards/accuracies": 0.5757812261581421,
"rewards/chosen": -0.45355916023254395,
"rewards/margins": 0.04530250281095505,
"rewards/rejected": -0.4988616406917572,
"step": 190
},
{
"epoch": 0.5282271376691977,
"grad_norm": 1.6606210676628292,
"learning_rate": 2.684631318687185e-07,
"logits/chosen": -2.3509981632232666,
"logits/rejected": -2.3406052589416504,
"logps/chosen": -226.03836059570312,
"logps/rejected": -223.758544921875,
"loss": 0.678,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.45515409111976624,
"rewards/margins": 0.0432661809027195,
"rewards/rejected": -0.4984202980995178,
"step": 200
},
{
"epoch": 0.5546384945526577,
"grad_norm": 1.7021845659792814,
"learning_rate": 2.4538027376021755e-07,
"logits/chosen": -2.3477015495300293,
"logits/rejected": -2.338550090789795,
"logps/chosen": -220.02816772460938,
"logps/rejected": -216.43405151367188,
"loss": 0.6703,
"rewards/accuracies": 0.5804687738418579,
"rewards/chosen": -0.44584885239601135,
"rewards/margins": 0.060957133769989014,
"rewards/rejected": -0.5068060159683228,
"step": 210
},
{
"epoch": 0.5810498514361175,
"grad_norm": 1.8757866922329256,
"learning_rate": 2.2233682952712483e-07,
"logits/chosen": -2.33669376373291,
"logits/rejected": -2.332843780517578,
"logps/chosen": -223.82211303710938,
"logps/rejected": -217.61489868164062,
"loss": 0.6712,
"rewards/accuracies": 0.586718738079071,
"rewards/chosen": -0.4329432547092438,
"rewards/margins": 0.06651361286640167,
"rewards/rejected": -0.49945688247680664,
"step": 220
},
{
"epoch": 0.6074612083195774,
"grad_norm": 2.334294772609141,
"learning_rate": 1.995293977107475e-07,
"logits/chosen": -2.3312466144561768,
"logits/rejected": -2.326164722442627,
"logps/chosen": -219.48025512695312,
"logps/rejected": -222.7107391357422,
"loss": 0.671,
"rewards/accuracies": 0.5757812261581421,
"rewards/chosen": -0.4052560329437256,
"rewards/margins": 0.058724187314510345,
"rewards/rejected": -0.4639802575111389,
"step": 230
},
{
"epoch": 0.6338725652030373,
"grad_norm": 1.8694677367469326,
"learning_rate": 1.7715256327766884e-07,
"logits/chosen": -2.3552398681640625,
"logits/rejected": -2.340351104736328,
"logps/chosen": -226.2518310546875,
"logps/rejected": -220.1430206298828,
"loss": 0.6717,
"rewards/accuracies": 0.563281238079071,
"rewards/chosen": -0.43269434571266174,
"rewards/margins": 0.05727803707122803,
"rewards/rejected": -0.48997241258621216,
"step": 240
},
{
"epoch": 0.6602839220864972,
"grad_norm": 2.0002930172446627,
"learning_rate": 1.5539723748942242e-07,
"logits/chosen": -2.35202956199646,
"logits/rejected": -2.3460869789123535,
"logps/chosen": -219.1632537841797,
"logps/rejected": -222.41403198242188,
"loss": 0.6715,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -0.4845377504825592,
"rewards/margins": 0.07130294293165207,
"rewards/rejected": -0.5558406710624695,
"step": 250
},
{
"epoch": 0.6866952789699571,
"grad_norm": 2.0423010510210973,
"learning_rate": 1.3444902911492174e-07,
"logits/chosen": -2.3457489013671875,
"logits/rejected": -2.3403260707855225,
"logps/chosen": -229.6208038330078,
"logps/rejected": -227.60733032226562,
"loss": 0.6732,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.5344967842102051,
"rewards/margins": 0.058640915900468826,
"rewards/rejected": -0.5931377410888672,
"step": 260
},
{
"epoch": 0.7131066358534169,
"grad_norm": 1.810992406616959,
"learning_rate": 1.1448666088188763e-07,
"logits/chosen": -2.341614007949829,
"logits/rejected": -2.337841510772705,
"logps/chosen": -222.2272186279297,
"logps/rejected": -223.3246307373047,
"loss": 0.6716,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.5301269292831421,
"rewards/margins": 0.058558739721775055,
"rewards/rejected": -0.5886856317520142,
"step": 270
},
{
"epoch": 0.7395179927368769,
"grad_norm": 2.254614754539,
"learning_rate": 9.56804446775518e-08,
"logits/chosen": -2.372023582458496,
"logits/rejected": -2.3646726608276367,
"logps/chosen": -224.5756378173828,
"logps/rejected": -218.9865264892578,
"loss": 0.6733,
"rewards/accuracies": 0.567187488079071,
"rewards/chosen": -0.5263770818710327,
"rewards/margins": 0.061575133353471756,
"rewards/rejected": -0.5879522562026978,
"step": 280
},
{
"epoch": 0.7659293496203368,
"grad_norm": 2.086058658480454,
"learning_rate": 7.819082850768432e-08,
"logits/chosen": -2.3455469608306885,
"logits/rejected": -2.3464303016662598,
"logps/chosen": -224.189208984375,
"logps/rejected": -223.93930053710938,
"loss": 0.6693,
"rewards/accuracies": 0.5796874761581421,
"rewards/chosen": -0.499728262424469,
"rewards/margins": 0.06420499831438065,
"rewards/rejected": -0.563933253288269,
"step": 290
},
{
"epoch": 0.7923407065037966,
"grad_norm": 2.4783165290141382,
"learning_rate": 6.216702761078166e-08,
"logits/chosen": -2.3515076637268066,
"logits/rejected": -2.349012851715088,
"logps/chosen": -224.17892456054688,
"logps/rejected": -218.49209594726562,
"loss": 0.6696,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.5156034827232361,
"rewards/margins": 0.06623221933841705,
"rewards/rejected": -0.5818357467651367,
"step": 300
},
{
"epoch": 0.8187520633872565,
"grad_norm": 3.5523303199324854,
"learning_rate": 4.774575140626316e-08,
"logits/chosen": -2.342184543609619,
"logits/rejected": -2.3424227237701416,
"logps/chosen": -228.24533081054688,
"logps/rejected": -227.98974609375,
"loss": 0.6715,
"rewards/accuracies": 0.582812488079071,
"rewards/chosen": -0.5175895094871521,
"rewards/margins": 0.06086786836385727,
"rewards/rejected": -0.5784574151039124,
"step": 310
},
{
"epoch": 0.8451634202707164,
"grad_norm": 2.029707974108137,
"learning_rate": 3.5050037137906885e-08,
"logits/chosen": -2.3485968112945557,
"logits/rejected": -2.348895788192749,
"logps/chosen": -220.9480438232422,
"logps/rejected": -220.4337615966797,
"loss": 0.6734,
"rewards/accuracies": 0.586718738079071,
"rewards/chosen": -0.5326020121574402,
"rewards/margins": 0.06538228690624237,
"rewards/rejected": -0.5979843139648438,
"step": 320
},
{
"epoch": 0.8715747771541763,
"grad_norm": 1.9445736642879892,
"learning_rate": 2.4188200163467786e-08,
"logits/chosen": -2.3561959266662598,
"logits/rejected": -2.3535940647125244,
"logps/chosen": -223.76058959960938,
"logps/rejected": -222.8463592529297,
"loss": 0.671,
"rewards/accuracies": 0.5835937261581421,
"rewards/chosen": -0.5285231471061707,
"rewards/margins": 0.06788322329521179,
"rewards/rejected": -0.5964063405990601,
"step": 330
},
{
"epoch": 0.8979861340376362,
"grad_norm": 2.29874813836561,
"learning_rate": 1.5252909846235894e-08,
"logits/chosen": -2.358121395111084,
"logits/rejected": -2.3539624214172363,
"logps/chosen": -228.19039916992188,
"logps/rejected": -224.6275634765625,
"loss": 0.6684,
"rewards/accuracies": 0.58984375,
"rewards/chosen": -0.5251844525337219,
"rewards/margins": 0.06964431703090668,
"rewards/rejected": -0.5948287844657898,
"step": 340
},
{
"epoch": 0.924397490921096,
"grad_norm": 1.8422698404022058,
"learning_rate": 8.320398932703144e-09,
"logits/chosen": -2.3642985820770264,
"logits/rejected": -2.3563034534454346,
"logps/chosen": -229.8364715576172,
"logps/rejected": -225.88497924804688,
"loss": 0.669,
"rewards/accuracies": 0.577343761920929,
"rewards/chosen": -0.5176088213920593,
"rewards/margins": 0.06735256314277649,
"rewards/rejected": -0.5849614143371582,
"step": 350
},
{
"epoch": 0.950808847804556,
"grad_norm": 1.9065023664446652,
"learning_rate": 3.4498131616493565e-09,
"logits/chosen": -2.3470730781555176,
"logits/rejected": -2.347114324569702,
"logps/chosen": -225.67822265625,
"logps/rejected": -217.3546600341797,
"loss": 0.6727,
"rewards/accuracies": 0.5859375,
"rewards/chosen": -0.5201060771942139,
"rewards/margins": 0.0647495836019516,
"rewards/rejected": -0.5848556756973267,
"step": 360
},
{
"epoch": 0.9772202046880158,
"grad_norm": 2.1089219617743082,
"learning_rate": 6.827066535529947e-10,
"logits/chosen": -2.3542428016662598,
"logits/rejected": -2.351058006286621,
"logps/chosen": -226.4674835205078,
"logps/rejected": -225.4059295654297,
"loss": 0.6697,
"rewards/accuracies": 0.5609375238418579,
"rewards/chosen": -0.5389624834060669,
"rewards/margins": 0.05195971205830574,
"rewards/rejected": -0.5909221768379211,
"step": 370
},
{
"epoch": 0.9983492901947838,
"step": 378,
"total_flos": 0.0,
"train_loss": 0.6782766637347993,
"train_runtime": 9304.817,
"train_samples_per_second": 20.828,
"train_steps_per_second": 0.041
}
],
"logging_steps": 10,
"max_steps": 378,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}