|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9983492901947838, |
|
"eval_steps": 1000, |
|
"global_step": 378, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002641135688345989, |
|
"grad_norm": 0.9479714304489147, |
|
"learning_rate": 1.3157894736842104e-08, |
|
"logits/chosen": -2.2716729640960693, |
|
"logits/rejected": -2.241565704345703, |
|
"logps/chosen": -156.80194091796875, |
|
"logps/rejected": -147.06320190429688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02641135688345989, |
|
"grad_norm": 0.9657211466886476, |
|
"learning_rate": 1.3157894736842104e-07, |
|
"logits/chosen": -2.2696707248687744, |
|
"logits/rejected": -2.2592086791992188, |
|
"logps/chosen": -173.62896728515625, |
|
"logps/rejected": -168.29458618164062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4470486044883728, |
|
"rewards/chosen": 0.00015925339539535344, |
|
"rewards/margins": 9.125900396611542e-05, |
|
"rewards/rejected": 6.799438415328041e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05282271376691978, |
|
"grad_norm": 0.952148373000707, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -2.2733893394470215, |
|
"logits/rejected": -2.2671706676483154, |
|
"logps/chosen": -169.05018615722656, |
|
"logps/rejected": -169.22433471679688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.522656261920929, |
|
"rewards/chosen": -9.141029295278713e-05, |
|
"rewards/margins": 0.0001715569815132767, |
|
"rewards/rejected": -0.00026296728174202144, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07923407065037966, |
|
"grad_norm": 0.9659022104815798, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"logits/chosen": -2.2659945487976074, |
|
"logits/rejected": -2.2476842403411865, |
|
"logps/chosen": -178.9219512939453, |
|
"logps/rejected": -169.46163940429688, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5289062261581421, |
|
"rewards/chosen": -0.0015892453957349062, |
|
"rewards/margins": 0.0002000469685299322, |
|
"rewards/rejected": -0.0017892923206090927, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10564542753383956, |
|
"grad_norm": 0.9273301854247932, |
|
"learning_rate": 4.999573126145131e-07, |
|
"logits/chosen": -2.2755210399627686, |
|
"logits/rejected": -2.2551403045654297, |
|
"logps/chosen": -179.63047790527344, |
|
"logps/rejected": -163.6714630126953, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0064210728742182255, |
|
"rewards/margins": 0.0012297846842557192, |
|
"rewards/rejected": -0.007650857325643301, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13205678441729943, |
|
"grad_norm": 0.9799377076485188, |
|
"learning_rate": 4.984647842238184e-07, |
|
"logits/chosen": -2.28324294090271, |
|
"logits/rejected": -2.2799413204193115, |
|
"logps/chosen": -168.7937469482422, |
|
"logps/rejected": -170.1733856201172, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": -0.0193162951618433, |
|
"rewards/margins": 0.0024972439277917147, |
|
"rewards/rejected": -0.021813539788126945, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1584681413007593, |
|
"grad_norm": 0.9599459222723423, |
|
"learning_rate": 4.948524419003415e-07, |
|
"logits/chosen": -2.2814371585845947, |
|
"logits/rejected": -2.273639440536499, |
|
"logps/chosen": -173.1953582763672, |
|
"logps/rejected": -171.6744384765625, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.547656238079071, |
|
"rewards/chosen": -0.03955007344484329, |
|
"rewards/margins": 0.00362972030416131, |
|
"rewards/rejected": -0.043179791420698166, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18487949818421923, |
|
"grad_norm": 0.9816456439800273, |
|
"learning_rate": 4.891511048751102e-07, |
|
"logits/chosen": -2.3176677227020264, |
|
"logits/rejected": -2.3122127056121826, |
|
"logps/chosen": -177.39488220214844, |
|
"logps/rejected": -167.91244506835938, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.06663568317890167, |
|
"rewards/margins": 0.008323188871145248, |
|
"rewards/rejected": -0.07495887577533722, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2112908550676791, |
|
"grad_norm": 1.0368469143747399, |
|
"learning_rate": 4.81409414945389e-07, |
|
"logits/chosen": -2.342482328414917, |
|
"logits/rejected": -2.3203022480010986, |
|
"logps/chosen": -195.66636657714844, |
|
"logps/rejected": -187.387939453125, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08949045091867447, |
|
"rewards/margins": 0.01014741975814104, |
|
"rewards/rejected": -0.09963786602020264, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.237702211951139, |
|
"grad_norm": 1.0442549418705969, |
|
"learning_rate": 4.7169342148001546e-07, |
|
"logits/chosen": -2.3382809162139893, |
|
"logits/rejected": -2.3266994953155518, |
|
"logps/chosen": -195.09518432617188, |
|
"logps/rejected": -182.37796020507812, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.573437511920929, |
|
"rewards/chosen": -0.11916174739599228, |
|
"rewards/margins": 0.014569459483027458, |
|
"rewards/rejected": -0.13373121619224548, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26411356883459886, |
|
"grad_norm": 1.064035782351039, |
|
"learning_rate": 4.6008601790947314e-07, |
|
"logits/chosen": -2.321099042892456, |
|
"logits/rejected": -2.319929838180542, |
|
"logps/chosen": -187.78164672851562, |
|
"logps/rejected": -180.61904907226562, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.5726562738418579, |
|
"rewards/chosen": -0.13006748259067535, |
|
"rewards/margins": 0.01908385381102562, |
|
"rewards/rejected": -0.14915132522583008, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29052492571805877, |
|
"grad_norm": 1.2007773205984222, |
|
"learning_rate": 4.466862345083708e-07, |
|
"logits/chosen": -2.321152925491333, |
|
"logits/rejected": -2.3174567222595215, |
|
"logps/chosen": -189.49288940429688, |
|
"logps/rejected": -185.0203094482422, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.5640624761581421, |
|
"rewards/chosen": -0.15054509043693542, |
|
"rewards/margins": 0.022373218089342117, |
|
"rewards/rejected": -0.17291830480098724, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3169362826015186, |
|
"grad_norm": 1.4026602647859983, |
|
"learning_rate": 4.3160839350405605e-07, |
|
"logits/chosen": -2.310743808746338, |
|
"logits/rejected": -2.309847831726074, |
|
"logps/chosen": -188.81431579589844, |
|
"logps/rejected": -188.99505615234375, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.16854415833950043, |
|
"rewards/margins": 0.026360681280493736, |
|
"rewards/rejected": -0.19490481913089752, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34334763948497854, |
|
"grad_norm": 1.2567707162408688, |
|
"learning_rate": 4.149811337196807e-07, |
|
"logits/chosen": -2.3110547065734863, |
|
"logits/rejected": -2.302724599838257, |
|
"logps/chosen": -201.25418090820312, |
|
"logps/rejected": -193.19810485839844, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.20925810933113098, |
|
"rewards/margins": 0.028272386640310287, |
|
"rewards/rejected": -0.23753049969673157, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36975899636843845, |
|
"grad_norm": 1.3171163187862418, |
|
"learning_rate": 3.9694631307311825e-07, |
|
"logits/chosen": -2.310455560684204, |
|
"logits/rejected": -2.3021938800811768, |
|
"logps/chosen": -201.97586059570312, |
|
"logps/rejected": -196.26681518554688, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5679687261581421, |
|
"rewards/chosen": -0.267736554145813, |
|
"rewards/margins": 0.03569976985454559, |
|
"rewards/rejected": -0.3034363389015198, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3961703532518983, |
|
"grad_norm": 1.362073777172477, |
|
"learning_rate": 3.776577982952267e-07, |
|
"logits/chosen": -2.2907283306121826, |
|
"logits/rejected": -2.2906551361083984, |
|
"logps/chosen": -201.7469482421875, |
|
"logps/rejected": -202.30398559570312, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.3111319839954376, |
|
"rewards/margins": 0.03633497655391693, |
|
"rewards/rejected": -0.34746694564819336, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4225817101353582, |
|
"grad_norm": 2.093940167532845, |
|
"learning_rate": 3.572801521931522e-07, |
|
"logits/chosen": -2.309051752090454, |
|
"logits/rejected": -2.302381992340088, |
|
"logps/chosen": -203.4032745361328, |
|
"logps/rejected": -199.6255340576172, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.577343761920929, |
|
"rewards/chosen": -0.34017449617385864, |
|
"rewards/margins": 0.04339217022061348, |
|
"rewards/rejected": -0.38356661796569824, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4489930670188181, |
|
"grad_norm": 1.9348127254360297, |
|
"learning_rate": 3.35987229658482e-07, |
|
"logits/chosen": -2.3065199851989746, |
|
"logits/rejected": -2.2967591285705566, |
|
"logps/chosen": -206.21694946289062, |
|
"logps/rejected": -201.9674835205078, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.5726562738418579, |
|
"rewards/chosen": -0.3637041449546814, |
|
"rewards/margins": 0.04687776044011116, |
|
"rewards/rejected": -0.41058191657066345, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.475404423902278, |
|
"grad_norm": 1.6138340474053996, |
|
"learning_rate": 3.139606943986089e-07, |
|
"logits/chosen": -2.295161247253418, |
|
"logits/rejected": -2.2992234230041504, |
|
"logps/chosen": -216.4457550048828, |
|
"logps/rejected": -214.0793914794922, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.5601562261581421, |
|
"rewards/chosen": -0.4113912582397461, |
|
"rewards/margins": 0.0445592924952507, |
|
"rewards/rejected": -0.4559505581855774, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5018157807857379, |
|
"grad_norm": 1.8694312571116225, |
|
"learning_rate": 2.913884690460325e-07, |
|
"logits/chosen": -2.332610607147217, |
|
"logits/rejected": -2.3261446952819824, |
|
"logps/chosen": -223.5069122314453, |
|
"logps/rejected": -212.2234344482422, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5757812261581421, |
|
"rewards/chosen": -0.45355916023254395, |
|
"rewards/margins": 0.04530250281095505, |
|
"rewards/rejected": -0.4988616406917572, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5282271376691977, |
|
"grad_norm": 1.6606210676628292, |
|
"learning_rate": 2.684631318687185e-07, |
|
"logits/chosen": -2.3509981632232666, |
|
"logits/rejected": -2.3406052589416504, |
|
"logps/chosen": -226.03836059570312, |
|
"logps/rejected": -223.758544921875, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.45515409111976624, |
|
"rewards/margins": 0.0432661809027195, |
|
"rewards/rejected": -0.4984202980995178, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5546384945526577, |
|
"grad_norm": 1.7021845659792814, |
|
"learning_rate": 2.4538027376021755e-07, |
|
"logits/chosen": -2.3477015495300293, |
|
"logits/rejected": -2.338550090789795, |
|
"logps/chosen": -220.02816772460938, |
|
"logps/rejected": -216.43405151367188, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.5804687738418579, |
|
"rewards/chosen": -0.44584885239601135, |
|
"rewards/margins": 0.060957133769989014, |
|
"rewards/rejected": -0.5068060159683228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5810498514361175, |
|
"grad_norm": 1.8757866922329256, |
|
"learning_rate": 2.2233682952712483e-07, |
|
"logits/chosen": -2.33669376373291, |
|
"logits/rejected": -2.332843780517578, |
|
"logps/chosen": -223.82211303710938, |
|
"logps/rejected": -217.61489868164062, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.586718738079071, |
|
"rewards/chosen": -0.4329432547092438, |
|
"rewards/margins": 0.06651361286640167, |
|
"rewards/rejected": -0.49945688247680664, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6074612083195774, |
|
"grad_norm": 2.334294772609141, |
|
"learning_rate": 1.995293977107475e-07, |
|
"logits/chosen": -2.3312466144561768, |
|
"logits/rejected": -2.326164722442627, |
|
"logps/chosen": -219.48025512695312, |
|
"logps/rejected": -222.7107391357422, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5757812261581421, |
|
"rewards/chosen": -0.4052560329437256, |
|
"rewards/margins": 0.058724187314510345, |
|
"rewards/rejected": -0.4639802575111389, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6338725652030373, |
|
"grad_norm": 1.8694677367469326, |
|
"learning_rate": 1.7715256327766884e-07, |
|
"logits/chosen": -2.3552398681640625, |
|
"logits/rejected": -2.340351104736328, |
|
"logps/chosen": -226.2518310546875, |
|
"logps/rejected": -220.1430206298828, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.563281238079071, |
|
"rewards/chosen": -0.43269434571266174, |
|
"rewards/margins": 0.05727803707122803, |
|
"rewards/rejected": -0.48997241258621216, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6602839220864972, |
|
"grad_norm": 2.0002930172446627, |
|
"learning_rate": 1.5539723748942242e-07, |
|
"logits/chosen": -2.35202956199646, |
|
"logits/rejected": -2.3460869789123535, |
|
"logps/chosen": -219.1632537841797, |
|
"logps/rejected": -222.41403198242188, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.4845377504825592, |
|
"rewards/margins": 0.07130294293165207, |
|
"rewards/rejected": -0.5558406710624695, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6866952789699571, |
|
"grad_norm": 2.0423010510210973, |
|
"learning_rate": 1.3444902911492174e-07, |
|
"logits/chosen": -2.3457489013671875, |
|
"logits/rejected": -2.3403260707855225, |
|
"logps/chosen": -229.6208038330078, |
|
"logps/rejected": -227.60733032226562, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.5344967842102051, |
|
"rewards/margins": 0.058640915900468826, |
|
"rewards/rejected": -0.5931377410888672, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7131066358534169, |
|
"grad_norm": 1.810992406616959, |
|
"learning_rate": 1.1448666088188763e-07, |
|
"logits/chosen": -2.341614007949829, |
|
"logits/rejected": -2.337841510772705, |
|
"logps/chosen": -222.2272186279297, |
|
"logps/rejected": -223.3246307373047, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5301269292831421, |
|
"rewards/margins": 0.058558739721775055, |
|
"rewards/rejected": -0.5886856317520142, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7395179927368769, |
|
"grad_norm": 2.254614754539, |
|
"learning_rate": 9.56804446775518e-08, |
|
"logits/chosen": -2.372023582458496, |
|
"logits/rejected": -2.3646726608276367, |
|
"logps/chosen": -224.5756378173828, |
|
"logps/rejected": -218.9865264892578, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.567187488079071, |
|
"rewards/chosen": -0.5263770818710327, |
|
"rewards/margins": 0.061575133353471756, |
|
"rewards/rejected": -0.5879522562026978, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7659293496203368, |
|
"grad_norm": 2.086058658480454, |
|
"learning_rate": 7.819082850768432e-08, |
|
"logits/chosen": -2.3455469608306885, |
|
"logits/rejected": -2.3464303016662598, |
|
"logps/chosen": -224.189208984375, |
|
"logps/rejected": -223.93930053710938, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": -0.499728262424469, |
|
"rewards/margins": 0.06420499831438065, |
|
"rewards/rejected": -0.563933253288269, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7923407065037966, |
|
"grad_norm": 2.4783165290141382, |
|
"learning_rate": 6.216702761078166e-08, |
|
"logits/chosen": -2.3515076637268066, |
|
"logits/rejected": -2.349012851715088, |
|
"logps/chosen": -224.17892456054688, |
|
"logps/rejected": -218.49209594726562, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.5156034827232361, |
|
"rewards/margins": 0.06623221933841705, |
|
"rewards/rejected": -0.5818357467651367, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8187520633872565, |
|
"grad_norm": 3.5523303199324854, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -2.342184543609619, |
|
"logits/rejected": -2.3424227237701416, |
|
"logps/chosen": -228.24533081054688, |
|
"logps/rejected": -227.98974609375, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.582812488079071, |
|
"rewards/chosen": -0.5175895094871521, |
|
"rewards/margins": 0.06086786836385727, |
|
"rewards/rejected": -0.5784574151039124, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8451634202707164, |
|
"grad_norm": 2.029707974108137, |
|
"learning_rate": 3.5050037137906885e-08, |
|
"logits/chosen": -2.3485968112945557, |
|
"logits/rejected": -2.348895788192749, |
|
"logps/chosen": -220.9480438232422, |
|
"logps/rejected": -220.4337615966797, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.586718738079071, |
|
"rewards/chosen": -0.5326020121574402, |
|
"rewards/margins": 0.06538228690624237, |
|
"rewards/rejected": -0.5979843139648438, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8715747771541763, |
|
"grad_norm": 1.9445736642879892, |
|
"learning_rate": 2.4188200163467786e-08, |
|
"logits/chosen": -2.3561959266662598, |
|
"logits/rejected": -2.3535940647125244, |
|
"logps/chosen": -223.76058959960938, |
|
"logps/rejected": -222.8463592529297, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5835937261581421, |
|
"rewards/chosen": -0.5285231471061707, |
|
"rewards/margins": 0.06788322329521179, |
|
"rewards/rejected": -0.5964063405990601, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8979861340376362, |
|
"grad_norm": 2.29874813836561, |
|
"learning_rate": 1.5252909846235894e-08, |
|
"logits/chosen": -2.358121395111084, |
|
"logits/rejected": -2.3539624214172363, |
|
"logps/chosen": -228.19039916992188, |
|
"logps/rejected": -224.6275634765625, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.58984375, |
|
"rewards/chosen": -0.5251844525337219, |
|
"rewards/margins": 0.06964431703090668, |
|
"rewards/rejected": -0.5948287844657898, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.924397490921096, |
|
"grad_norm": 1.8422698404022058, |
|
"learning_rate": 8.320398932703144e-09, |
|
"logits/chosen": -2.3642985820770264, |
|
"logits/rejected": -2.3563034534454346, |
|
"logps/chosen": -229.8364715576172, |
|
"logps/rejected": -225.88497924804688, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.577343761920929, |
|
"rewards/chosen": -0.5176088213920593, |
|
"rewards/margins": 0.06735256314277649, |
|
"rewards/rejected": -0.5849614143371582, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.950808847804556, |
|
"grad_norm": 1.9065023664446652, |
|
"learning_rate": 3.4498131616493565e-09, |
|
"logits/chosen": -2.3470730781555176, |
|
"logits/rejected": -2.347114324569702, |
|
"logps/chosen": -225.67822265625, |
|
"logps/rejected": -217.3546600341797, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -0.5201060771942139, |
|
"rewards/margins": 0.0647495836019516, |
|
"rewards/rejected": -0.5848556756973267, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9772202046880158, |
|
"grad_norm": 2.1089219617743082, |
|
"learning_rate": 6.827066535529947e-10, |
|
"logits/chosen": -2.3542428016662598, |
|
"logits/rejected": -2.351058006286621, |
|
"logps/chosen": -226.4674835205078, |
|
"logps/rejected": -225.4059295654297, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.5609375238418579, |
|
"rewards/chosen": -0.5389624834060669, |
|
"rewards/margins": 0.05195971205830574, |
|
"rewards/rejected": -0.5909221768379211, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9983492901947838, |
|
"step": 378, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6782766637347993, |
|
"train_runtime": 9304.817, |
|
"train_samples_per_second": 20.828, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 378, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|