|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 34.35367990587695, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -0.49797338247299194, |
|
"logits/rejected": -0.5135231018066406, |
|
"logps/chosen": -1.1745355129241943, |
|
"logps/rejected": -1.3596293926239014, |
|
"loss": 2.1735, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1745355129241943, |
|
"rewards/margins": 0.18509384989738464, |
|
"rewards/rejected": -1.3596293926239014, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 16.378216146989434, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -0.521752655506134, |
|
"logits/rejected": -0.4988512396812439, |
|
"logps/chosen": -1.1591465473175049, |
|
"logps/rejected": -1.2624419927597046, |
|
"loss": 2.1407, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1591465473175049, |
|
"rewards/margins": 0.10329560935497284, |
|
"rewards/rejected": -1.2624419927597046, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 22.732280640563598, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -0.46235981583595276, |
|
"logits/rejected": -0.4507545530796051, |
|
"logps/chosen": -1.1068508625030518, |
|
"logps/rejected": -1.361823558807373, |
|
"loss": 2.1077, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1068508625030518, |
|
"rewards/margins": 0.2549726366996765, |
|
"rewards/rejected": -1.361823558807373, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 22.78289950681047, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.4406924247741699, |
|
"logits/rejected": -0.4528113007545471, |
|
"logps/chosen": -1.161055564880371, |
|
"logps/rejected": -1.2642455101013184, |
|
"loss": 2.167, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.161055564880371, |
|
"rewards/margins": 0.10318990051746368, |
|
"rewards/rejected": -1.2642455101013184, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 13.313947854898018, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -0.5021263360977173, |
|
"logits/rejected": -0.47814303636550903, |
|
"logps/chosen": -1.1769291162490845, |
|
"logps/rejected": -1.2403558492660522, |
|
"loss": 2.1418, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.1769291162490845, |
|
"rewards/margins": 0.06342674791812897, |
|
"rewards/rejected": -1.2403558492660522, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 21.17299953179056, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -0.4899294972419739, |
|
"logits/rejected": -0.49411076307296753, |
|
"logps/chosen": -1.1576581001281738, |
|
"logps/rejected": -1.280582070350647, |
|
"loss": 2.1692, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.1576581001281738, |
|
"rewards/margins": 0.12292404472827911, |
|
"rewards/rejected": -1.280582070350647, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 21.15597636264796, |
|
"learning_rate": 4.3749999999999994e-07, |
|
"logits/chosen": -0.4856337904930115, |
|
"logits/rejected": -0.4433709979057312, |
|
"logps/chosen": -1.1427704095840454, |
|
"logps/rejected": -1.2787848711013794, |
|
"loss": 2.134, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1427704095840454, |
|
"rewards/margins": 0.13601449131965637, |
|
"rewards/rejected": -1.2787848711013794, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 29.038714028264685, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.4945921301841736, |
|
"logits/rejected": -0.4987305998802185, |
|
"logps/chosen": -1.0738334655761719, |
|
"logps/rejected": -1.39645516872406, |
|
"loss": 2.0884, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0738334655761719, |
|
"rewards/margins": 0.3226216435432434, |
|
"rewards/rejected": -1.39645516872406, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 30.530804134708777, |
|
"learning_rate": 5.625e-07, |
|
"logits/chosen": -0.45864447951316833, |
|
"logits/rejected": -0.4690025746822357, |
|
"logps/chosen": -1.090343952178955, |
|
"logps/rejected": -1.317134976387024, |
|
"loss": 2.1074, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.090343952178955, |
|
"rewards/margins": 0.22679109871387482, |
|
"rewards/rejected": -1.317134976387024, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 21.789634951246953, |
|
"learning_rate": 5.999678242522831e-07, |
|
"logits/chosen": -0.4777728021144867, |
|
"logits/rejected": -0.49264296889305115, |
|
"logps/chosen": -1.1642675399780273, |
|
"logps/rejected": -1.4595439434051514, |
|
"loss": 2.1327, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1642675399780273, |
|
"rewards/margins": 0.2952764332294464, |
|
"rewards/rejected": -1.4595439434051514, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 175.8483724549221, |
|
"learning_rate": 5.996059263493219e-07, |
|
"logits/chosen": -0.4417606294155121, |
|
"logits/rejected": -0.4309404492378235, |
|
"logps/chosen": -1.1178853511810303, |
|
"logps/rejected": -1.343202829360962, |
|
"loss": 2.106, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.1178853511810303, |
|
"rewards/margins": 0.22531744837760925, |
|
"rewards/rejected": -1.343202829360962, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 18.6141832401793, |
|
"learning_rate": 5.988423976115163e-07, |
|
"logits/chosen": -0.48267728090286255, |
|
"logits/rejected": -0.47969865798950195, |
|
"logps/chosen": -1.2083253860473633, |
|
"logps/rejected": -1.33084237575531, |
|
"loss": 2.0877, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.2083253860473633, |
|
"rewards/margins": 0.12251707166433334, |
|
"rewards/rejected": -1.33084237575531, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 32.36335168763983, |
|
"learning_rate": 5.976782615723061e-07, |
|
"logits/chosen": -0.44251489639282227, |
|
"logits/rejected": -0.40677833557128906, |
|
"logps/chosen": -1.102402925491333, |
|
"logps/rejected": -1.6289558410644531, |
|
"loss": 2.0703, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.102402925491333, |
|
"rewards/margins": 0.5265528559684753, |
|
"rewards/rejected": -1.6289558410644531, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 48.08034129839621, |
|
"learning_rate": 5.961150787913738e-07, |
|
"logits/chosen": -0.3722413182258606, |
|
"logits/rejected": -0.3657040297985077, |
|
"logps/chosen": -1.1620112657546997, |
|
"logps/rejected": -1.426941990852356, |
|
"loss": 2.0511, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.1620112657546997, |
|
"rewards/margins": 0.2649305760860443, |
|
"rewards/rejected": -1.426941990852356, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 59.84416296380324, |
|
"learning_rate": 5.941549447626671e-07, |
|
"logits/chosen": -0.37373992800712585, |
|
"logits/rejected": -0.3564635217189789, |
|
"logps/chosen": -1.1539630889892578, |
|
"logps/rejected": -1.505507230758667, |
|
"loss": 2.0661, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1539630889892578, |
|
"rewards/margins": 0.3515442907810211, |
|
"rewards/rejected": -1.505507230758667, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 25.67817680738418, |
|
"learning_rate": 5.918004871053251e-07, |
|
"logits/chosen": -0.4051768183708191, |
|
"logits/rejected": -0.38645023107528687, |
|
"logps/chosen": -1.1455223560333252, |
|
"logps/rejected": -1.5474026203155518, |
|
"loss": 2.0988, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1455223560333252, |
|
"rewards/margins": 0.40188026428222656, |
|
"rewards/rejected": -1.5474026203155518, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 34.17749794765947, |
|
"learning_rate": 5.890548620412763e-07, |
|
"logits/chosen": -0.45642200112342834, |
|
"logits/rejected": -0.42394012212753296, |
|
"logps/chosen": -1.1208285093307495, |
|
"logps/rejected": -1.4425103664398193, |
|
"loss": 2.0996, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1208285093307495, |
|
"rewards/margins": 0.3216818571090698, |
|
"rewards/rejected": -1.4425103664398193, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 10.063019778918575, |
|
"learning_rate": 5.859217501642258e-07, |
|
"logits/chosen": -0.4428345561027527, |
|
"logits/rejected": -0.4144333004951477, |
|
"logps/chosen": -1.1374626159667969, |
|
"logps/rejected": -1.4302767515182495, |
|
"loss": 2.0461, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1374626159667969, |
|
"rewards/margins": 0.2928140461444855, |
|
"rewards/rejected": -1.4302767515182495, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 20.74891634135048, |
|
"learning_rate": 5.824053515057091e-07, |
|
"logits/chosen": -0.479747474193573, |
|
"logits/rejected": -0.39375734329223633, |
|
"logps/chosen": -1.159339189529419, |
|
"logps/rejected": -1.4628812074661255, |
|
"loss": 2.0797, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.159339189529419, |
|
"rewards/margins": 0.30354195833206177, |
|
"rewards/rejected": -1.4628812074661255, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 33.44699335449945, |
|
"learning_rate": 5.785103799048218e-07, |
|
"logits/chosen": -0.4181644022464752, |
|
"logits/rejected": -0.38857489824295044, |
|
"logps/chosen": -1.1860034465789795, |
|
"logps/rejected": -1.7242858409881592, |
|
"loss": 2.0744, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1860034465789795, |
|
"rewards/margins": 0.5382825136184692, |
|
"rewards/rejected": -1.7242858409881592, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 59.5883485681765, |
|
"learning_rate": 5.742420566891749e-07, |
|
"logits/chosen": -0.4103716015815735, |
|
"logits/rejected": -0.3968586027622223, |
|
"logps/chosen": -1.076053261756897, |
|
"logps/rejected": -1.4917399883270264, |
|
"loss": 2.0443, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.076053261756897, |
|
"rewards/margins": 0.4156867563724518, |
|
"rewards/rejected": -1.4917399883270264, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 80.31752384431189, |
|
"learning_rate": 5.696061036755478e-07, |
|
"logits/chosen": -0.4146521985530853, |
|
"logits/rejected": -0.38292473554611206, |
|
"logps/chosen": -1.0837781429290771, |
|
"logps/rejected": -1.5503333806991577, |
|
"loss": 2.0282, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0837781429290771, |
|
"rewards/margins": 0.4665554165840149, |
|
"rewards/rejected": -1.5503333806991577, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 98.56244903302768, |
|
"learning_rate": 5.64608735499618e-07, |
|
"logits/chosen": -0.28679025173187256, |
|
"logits/rejected": -0.2377845048904419, |
|
"logps/chosen": -1.111647367477417, |
|
"logps/rejected": -1.687975287437439, |
|
"loss": 2.0185, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.111647367477417, |
|
"rewards/margins": 0.576327919960022, |
|
"rewards/rejected": -1.687975287437439, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 20.27186258736798, |
|
"learning_rate": 5.592566512850545e-07, |
|
"logits/chosen": -0.27318406105041504, |
|
"logits/rejected": -0.2487379014492035, |
|
"logps/chosen": -1.2702689170837402, |
|
"logps/rejected": -1.556516408920288, |
|
"loss": 2.0975, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.2702689170837402, |
|
"rewards/margins": 0.2862473428249359, |
|
"rewards/rejected": -1.556516408920288, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 30.996972753737214, |
|
"learning_rate": 5.535570256631384e-07, |
|
"logits/chosen": -0.32224705815315247, |
|
"logits/rejected": -0.2949572503566742, |
|
"logps/chosen": -1.0966984033584595, |
|
"logps/rejected": -1.425978660583496, |
|
"loss": 2.0513, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0966984033584595, |
|
"rewards/margins": 0.3292803168296814, |
|
"rewards/rejected": -1.425978660583496, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 106.95100145667757, |
|
"learning_rate": 5.475174991549528e-07, |
|
"logits/chosen": -0.2838582396507263, |
|
"logits/rejected": -0.27573472261428833, |
|
"logps/chosen": -1.065918207168579, |
|
"logps/rejected": -1.2746469974517822, |
|
"loss": 2.0692, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.065918207168579, |
|
"rewards/margins": 0.2087288349866867, |
|
"rewards/rejected": -1.2746469974517822, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 18.450255655703543, |
|
"learning_rate": 5.411461679290317e-07, |
|
"logits/chosen": -0.33663275837898254, |
|
"logits/rejected": -0.24262118339538574, |
|
"logps/chosen": -1.1360746622085571, |
|
"logps/rejected": -1.793914556503296, |
|
"loss": 2.0112, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1360746622085571, |
|
"rewards/margins": 0.6578398942947388, |
|
"rewards/rejected": -1.793914556503296, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 20.28586481649801, |
|
"learning_rate": 5.34451572948201e-07, |
|
"logits/chosen": -0.25463438034057617, |
|
"logits/rejected": -0.18874910473823547, |
|
"logps/chosen": -1.2043225765228271, |
|
"logps/rejected": -1.7759593725204468, |
|
"loss": 1.9956, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2043225765228271, |
|
"rewards/margins": 0.5716367959976196, |
|
"rewards/rejected": -1.7759593725204468, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 16.12998962894494, |
|
"learning_rate": 5.274426885201582e-07, |
|
"logits/chosen": -0.3120715320110321, |
|
"logits/rejected": -0.28251615166664124, |
|
"logps/chosen": -1.1517010927200317, |
|
"logps/rejected": -1.5577958822250366, |
|
"loss": 2.04, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1517010927200317, |
|
"rewards/margins": 0.4060949683189392, |
|
"rewards/rejected": -1.5577958822250366, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 18.374398699221082, |
|
"learning_rate": 5.201289102671411e-07, |
|
"logits/chosen": -0.29038459062576294, |
|
"logits/rejected": -0.25689178705215454, |
|
"logps/chosen": -1.0535448789596558, |
|
"logps/rejected": -1.4643501043319702, |
|
"loss": 2.0025, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.0535448789596558, |
|
"rewards/margins": 0.41080522537231445, |
|
"rewards/rejected": -1.4643501043319702, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 21.42424041369703, |
|
"learning_rate": 5.12520042530811e-07, |
|
"logits/chosen": -0.33090248703956604, |
|
"logits/rejected": -0.26949256658554077, |
|
"logps/chosen": -1.1385178565979004, |
|
"logps/rejected": -1.5117579698562622, |
|
"loss": 2.0025, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1385178565979004, |
|
"rewards/margins": 0.3732401728630066, |
|
"rewards/rejected": -1.5117579698562622, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 17.090434114730737, |
|
"learning_rate": 5.046262852292346e-07, |
|
"logits/chosen": -0.2471882402896881, |
|
"logits/rejected": -0.1910603940486908, |
|
"logps/chosen": -1.179958701133728, |
|
"logps/rejected": -1.6289422512054443, |
|
"loss": 2.0344, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.179958701133728, |
|
"rewards/margins": 0.4489835202693939, |
|
"rewards/rejected": -1.6289422512054443, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 13.121179286172973, |
|
"learning_rate": 4.964582201835856e-07, |
|
"logits/chosen": -0.2789207696914673, |
|
"logits/rejected": -0.2165801227092743, |
|
"logps/chosen": -1.1143217086791992, |
|
"logps/rejected": -1.6779086589813232, |
|
"loss": 2.012, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1143217086791992, |
|
"rewards/margins": 0.5635868906974792, |
|
"rewards/rejected": -1.6779086589813232, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 13.822111204791623, |
|
"learning_rate": 4.880267969328908e-07, |
|
"logits/chosen": -0.26305219531059265, |
|
"logits/rejected": -0.16298075020313263, |
|
"logps/chosen": -1.2098379135131836, |
|
"logps/rejected": -1.6500240564346313, |
|
"loss": 2.0205, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2098379135131836, |
|
"rewards/margins": 0.44018617272377014, |
|
"rewards/rejected": -1.6500240564346313, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 18.589343338653535, |
|
"learning_rate": 4.793433180558423e-07, |
|
"logits/chosen": -0.26549848914146423, |
|
"logits/rejected": -0.13549579679965973, |
|
"logps/chosen": -1.1815907955169678, |
|
"logps/rejected": -1.6547002792358398, |
|
"loss": 2.0137, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1815907955169678, |
|
"rewards/margins": 0.4731093943119049, |
|
"rewards/rejected": -1.6547002792358398, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 24.192793785270847, |
|
"learning_rate": 4.704194240193467e-07, |
|
"logits/chosen": -0.20712879300117493, |
|
"logits/rejected": -0.14872625470161438, |
|
"logps/chosen": -1.2047398090362549, |
|
"logps/rejected": -1.6949182748794556, |
|
"loss": 2.0473, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2047398090362549, |
|
"rewards/margins": 0.49017828702926636, |
|
"rewards/rejected": -1.6949182748794556, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 21.72801779202531, |
|
"learning_rate": 4.6126707757412686e-07, |
|
"logits/chosen": -0.19427147507667542, |
|
"logits/rejected": -0.08655323088169098, |
|
"logps/chosen": -1.2102793455123901, |
|
"logps/rejected": -1.956017255783081, |
|
"loss": 1.9488, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2102793455123901, |
|
"rewards/margins": 0.745737612247467, |
|
"rewards/rejected": -1.956017255783081, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 52.99611536180731, |
|
"learning_rate": 4.5189854771829086e-07, |
|
"logits/chosen": -0.27349403500556946, |
|
"logits/rejected": -0.19519653916358948, |
|
"logps/chosen": -1.2331929206848145, |
|
"logps/rejected": -1.693256139755249, |
|
"loss": 2.0896, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2331929206848145, |
|
"rewards/margins": 0.4600633680820465, |
|
"rewards/rejected": -1.693256139755249, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 23.313047834378704, |
|
"learning_rate": 4.4232639325036807e-07, |
|
"logits/chosen": -0.2565682530403137, |
|
"logits/rejected": -0.20012107491493225, |
|
"logps/chosen": -1.2263553142547607, |
|
"logps/rejected": -1.6081535816192627, |
|
"loss": 2.0274, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2263553142547607, |
|
"rewards/margins": 0.38179832696914673, |
|
"rewards/rejected": -1.6081535816192627, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 28.77315959004056, |
|
"learning_rate": 4.32563445933859e-07, |
|
"logits/chosen": -0.2804745137691498, |
|
"logits/rejected": -0.2464865893125534, |
|
"logps/chosen": -1.2270160913467407, |
|
"logps/rejected": -1.6264985799789429, |
|
"loss": 2.046, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2270160913467407, |
|
"rewards/margins": 0.3994825482368469, |
|
"rewards/rejected": -1.6264985799789429, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 24.629126590195398, |
|
"learning_rate": 4.226227932958664e-07, |
|
"logits/chosen": -0.21598652005195618, |
|
"logits/rejected": -0.16539430618286133, |
|
"logps/chosen": -1.0299084186553955, |
|
"logps/rejected": -1.6446430683135986, |
|
"loss": 1.9694, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0299084186553955, |
|
"rewards/margins": 0.6147347688674927, |
|
"rewards/rejected": -1.6446430683135986, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 18.99935065324488, |
|
"learning_rate": 4.1251776108286854e-07, |
|
"logits/chosen": -0.24660630524158478, |
|
"logits/rejected": -0.19936877489089966, |
|
"logps/chosen": -1.2271109819412231, |
|
"logps/rejected": -1.5406509637832642, |
|
"loss": 2.0403, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2271109819412231, |
|
"rewards/margins": 0.31353995203971863, |
|
"rewards/rejected": -1.5406509637832642, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 28.14089359211311, |
|
"learning_rate": 4.022618953971514e-07, |
|
"logits/chosen": -0.28811579942703247, |
|
"logits/rejected": -0.23570355772972107, |
|
"logps/chosen": -1.1424353122711182, |
|
"logps/rejected": -1.6885287761688232, |
|
"loss": 2.0049, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1424353122711182, |
|
"rewards/margins": 0.5460935831069946, |
|
"rewards/rejected": -1.6885287761688232, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 32.89974992330637, |
|
"learning_rate": 3.918689445378477e-07, |
|
"logits/chosen": -0.30860984325408936, |
|
"logits/rejected": -0.18307650089263916, |
|
"logps/chosen": -1.2112575769424438, |
|
"logps/rejected": -1.7577863931655884, |
|
"loss": 2.0035, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2112575769424438, |
|
"rewards/margins": 0.5465287566184998, |
|
"rewards/rejected": -1.7577863931655884, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 20.40577743783828, |
|
"learning_rate": 3.813528405709251e-07, |
|
"logits/chosen": -0.29336491227149963, |
|
"logits/rejected": -0.19095419347286224, |
|
"logps/chosen": -1.1172130107879639, |
|
"logps/rejected": -1.7578521966934204, |
|
"loss": 1.949, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1172130107879639, |
|
"rewards/margins": 0.6406393051147461, |
|
"rewards/rejected": -1.7578521966934204, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 22.351884239960583, |
|
"learning_rate": 3.707276806528282e-07, |
|
"logits/chosen": -0.3431912362575531, |
|
"logits/rejected": -0.21362292766571045, |
|
"logps/chosen": -1.175959587097168, |
|
"logps/rejected": -1.914841890335083, |
|
"loss": 1.9597, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.175959587097168, |
|
"rewards/margins": 0.7388821840286255, |
|
"rewards/rejected": -1.914841890335083, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 32.41861896385678, |
|
"learning_rate": 3.6000770813281334e-07, |
|
"logits/chosen": -0.281482458114624, |
|
"logits/rejected": -0.2136625498533249, |
|
"logps/chosen": -1.1437963247299194, |
|
"logps/rejected": -1.6638180017471313, |
|
"loss": 1.9987, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1437963247299194, |
|
"rewards/margins": 0.5200216770172119, |
|
"rewards/rejected": -1.6638180017471313, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 25.062414755869742, |
|
"learning_rate": 3.4920729345930654e-07, |
|
"logits/chosen": -0.31132057309150696, |
|
"logits/rejected": -0.2606331408023834, |
|
"logps/chosen": -1.1404446363449097, |
|
"logps/rejected": -1.7029993534088135, |
|
"loss": 2.0234, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1404446363449097, |
|
"rewards/margins": 0.5625545978546143, |
|
"rewards/rejected": -1.7029993534088135, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 23.400197287310235, |
|
"learning_rate": 3.383409149158814e-07, |
|
"logits/chosen": -0.34879469871520996, |
|
"logits/rejected": -0.27785512804985046, |
|
"logps/chosen": -1.2463206052780151, |
|
"logps/rejected": -1.6453937292099, |
|
"loss": 2.0244, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2463206052780151, |
|
"rewards/margins": 0.39907318353652954, |
|
"rewards/rejected": -1.6453937292099, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 30.06789343033768, |
|
"learning_rate": 3.2742313921268035e-07, |
|
"logits/chosen": -0.2991330623626709, |
|
"logits/rejected": -0.24600133299827576, |
|
"logps/chosen": -1.1448876857757568, |
|
"logps/rejected": -1.7555782794952393, |
|
"loss": 2.0331, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1448876857757568, |
|
"rewards/margins": 0.6106906533241272, |
|
"rewards/rejected": -1.7555782794952393, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 26.26439332360777, |
|
"learning_rate": 3.1646860195929825e-07, |
|
"logits/chosen": -0.26393812894821167, |
|
"logits/rejected": -0.15189418196678162, |
|
"logps/chosen": -1.2170337438583374, |
|
"logps/rejected": -1.7952607870101929, |
|
"loss": 1.9685, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2170337438583374, |
|
"rewards/margins": 0.5782270431518555, |
|
"rewards/rejected": -1.7952607870101929, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 30.83419650992516, |
|
"learning_rate": 3.054919880453032e-07, |
|
"logits/chosen": -0.24465498328208923, |
|
"logits/rejected": -0.1657981127500534, |
|
"logps/chosen": -1.1195638179779053, |
|
"logps/rejected": -1.8283071517944336, |
|
"loss": 2.0058, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1195638179779053, |
|
"rewards/margins": 0.7087433934211731, |
|
"rewards/rejected": -1.8283071517944336, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 26.538461186209517, |
|
"learning_rate": 2.9450801195469686e-07, |
|
"logits/chosen": -0.2653834819793701, |
|
"logits/rejected": -0.21019785106182098, |
|
"logps/chosen": -1.2122917175292969, |
|
"logps/rejected": -1.5932838916778564, |
|
"loss": 1.9747, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2122917175292969, |
|
"rewards/margins": 0.38099202513694763, |
|
"rewards/rejected": -1.5932838916778564, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 30.571225220792357, |
|
"learning_rate": 2.835313980407017e-07, |
|
"logits/chosen": -0.2485169917345047, |
|
"logits/rejected": -0.1672702431678772, |
|
"logps/chosen": -1.2699711322784424, |
|
"logps/rejected": -1.6770769357681274, |
|
"loss": 2.0095, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2699711322784424, |
|
"rewards/margins": 0.40710583329200745, |
|
"rewards/rejected": -1.6770769357681274, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 15.142492975343366, |
|
"learning_rate": 2.7257686078731973e-07, |
|
"logits/chosen": -0.2676723599433899, |
|
"logits/rejected": -0.14488348364830017, |
|
"logps/chosen": -1.1701332330703735, |
|
"logps/rejected": -1.882002830505371, |
|
"loss": 1.9973, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1701332330703735, |
|
"rewards/margins": 0.7118695974349976, |
|
"rewards/rejected": -1.882002830505371, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 17.861104389604897, |
|
"learning_rate": 2.6165908508411857e-07, |
|
"logits/chosen": -0.27734139561653137, |
|
"logits/rejected": -0.1873548924922943, |
|
"logps/chosen": -1.096847653388977, |
|
"logps/rejected": -1.556873083114624, |
|
"loss": 1.979, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.096847653388977, |
|
"rewards/margins": 0.46002545952796936, |
|
"rewards/rejected": -1.556873083114624, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 29.049210332966997, |
|
"learning_rate": 2.5079270654069354e-07, |
|
"logits/chosen": -0.22700035572052002, |
|
"logits/rejected": -0.20169806480407715, |
|
"logps/chosen": -1.1871209144592285, |
|
"logps/rejected": -1.729832410812378, |
|
"loss": 1.9858, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1871209144592285, |
|
"rewards/margins": 0.5427114367485046, |
|
"rewards/rejected": -1.729832410812378, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 19.177779527300032, |
|
"learning_rate": 2.399922918671867e-07, |
|
"logits/chosen": -0.27257853746414185, |
|
"logits/rejected": -0.20175373554229736, |
|
"logps/chosen": -1.1817193031311035, |
|
"logps/rejected": -1.81912362575531, |
|
"loss": 1.9728, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1817193031311035, |
|
"rewards/margins": 0.6374045610427856, |
|
"rewards/rejected": -1.81912362575531, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 39.27420791348495, |
|
"learning_rate": 2.2927231934717176e-07, |
|
"logits/chosen": -0.2650902271270752, |
|
"logits/rejected": -0.204110786318779, |
|
"logps/chosen": -1.1773895025253296, |
|
"logps/rejected": -1.8753960132598877, |
|
"loss": 2.0113, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1773895025253296, |
|
"rewards/margins": 0.6980065107345581, |
|
"rewards/rejected": -1.8753960132598877, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 19.055812699519027, |
|
"learning_rate": 2.1864715942907487e-07, |
|
"logits/chosen": -0.31268611550331116, |
|
"logits/rejected": -0.26396140456199646, |
|
"logps/chosen": -1.2095041275024414, |
|
"logps/rejected": -1.6991554498672485, |
|
"loss": 1.9925, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2095041275024414, |
|
"rewards/margins": 0.4896513819694519, |
|
"rewards/rejected": -1.6991554498672485, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 24.000715730668365, |
|
"learning_rate": 2.081310554621522e-07, |
|
"logits/chosen": -0.2226269692182541, |
|
"logits/rejected": -0.17259590327739716, |
|
"logps/chosen": -1.2206140756607056, |
|
"logps/rejected": -1.8647050857543945, |
|
"loss": 1.9507, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2206140756607056, |
|
"rewards/margins": 0.6440912485122681, |
|
"rewards/rejected": -1.8647050857543945, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 24.680587241234605, |
|
"learning_rate": 1.9773810460284862e-07, |
|
"logits/chosen": -0.21720829606056213, |
|
"logits/rejected": -0.22596517205238342, |
|
"logps/chosen": -1.121246576309204, |
|
"logps/rejected": -1.6572654247283936, |
|
"loss": 1.9548, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.121246576309204, |
|
"rewards/margins": 0.536018967628479, |
|
"rewards/rejected": -1.6572654247283936, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 191.6202253290827, |
|
"learning_rate": 1.874822389171314e-07, |
|
"logits/chosen": -0.23975515365600586, |
|
"logits/rejected": -0.13572129607200623, |
|
"logps/chosen": -1.1023863554000854, |
|
"logps/rejected": -1.9153015613555908, |
|
"loss": 1.9671, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1023863554000854, |
|
"rewards/margins": 0.8129149675369263, |
|
"rewards/rejected": -1.9153015613555908, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 29.251518787297723, |
|
"learning_rate": 1.7737720670413356e-07, |
|
"logits/chosen": -0.19940456748008728, |
|
"logits/rejected": -0.15788142383098602, |
|
"logps/chosen": -1.2180979251861572, |
|
"logps/rejected": -1.7693755626678467, |
|
"loss": 1.9257, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2180979251861572, |
|
"rewards/margins": 0.5512775182723999, |
|
"rewards/rejected": -1.7693755626678467, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 38.573296633637, |
|
"learning_rate": 1.6743655406614095e-07, |
|
"logits/chosen": -0.2212747037410736, |
|
"logits/rejected": -0.13944557309150696, |
|
"logps/chosen": -1.165976881980896, |
|
"logps/rejected": -1.8154420852661133, |
|
"loss": 1.9466, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.165976881980896, |
|
"rewards/margins": 0.6494652032852173, |
|
"rewards/rejected": -1.8154420852661133, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 29.264456688369886, |
|
"learning_rate": 1.5767360674963198e-07, |
|
"logits/chosen": -0.21240024268627167, |
|
"logits/rejected": -0.13640090823173523, |
|
"logps/chosen": -1.1329911947250366, |
|
"logps/rejected": -1.5841938257217407, |
|
"loss": 1.9788, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1329911947250366, |
|
"rewards/margins": 0.45120254158973694, |
|
"rewards/rejected": -1.5841938257217407, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 21.84656926034516, |
|
"learning_rate": 1.4810145228170922e-07, |
|
"logits/chosen": -0.2895652651786804, |
|
"logits/rejected": -0.20374973118305206, |
|
"logps/chosen": -1.0948199033737183, |
|
"logps/rejected": -1.5535070896148682, |
|
"loss": 1.9921, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0948199033737183, |
|
"rewards/margins": 0.4586872160434723, |
|
"rewards/rejected": -1.5535070896148682, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 60.16510624521895, |
|
"learning_rate": 1.3873292242587306e-07, |
|
"logits/chosen": -0.2519373893737793, |
|
"logits/rejected": -0.1798809915781021, |
|
"logps/chosen": -1.3221559524536133, |
|
"logps/rejected": -1.7844518423080444, |
|
"loss": 2.0219, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3221559524536133, |
|
"rewards/margins": 0.4622960686683655, |
|
"rewards/rejected": -1.7844518423080444, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 31.10556234832626, |
|
"learning_rate": 1.295805759806533e-07, |
|
"logits/chosen": -0.28459057211875916, |
|
"logits/rejected": -0.1859065145254135, |
|
"logps/chosen": -1.1906864643096924, |
|
"logps/rejected": -1.7581002712249756, |
|
"loss": 1.9961, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1906864643096924, |
|
"rewards/margins": 0.567413866519928, |
|
"rewards/rejected": -1.7581002712249756, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 21.145366603131734, |
|
"learning_rate": 1.2065668194415777e-07, |
|
"logits/chosen": -0.19020649790763855, |
|
"logits/rejected": -0.15534143149852753, |
|
"logps/chosen": -1.1794466972351074, |
|
"logps/rejected": -1.6421940326690674, |
|
"loss": 2.0254, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1794466972351074, |
|
"rewards/margins": 0.4627472758293152, |
|
"rewards/rejected": -1.6421940326690674, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 29.29098473580897, |
|
"learning_rate": 1.1197320306710923e-07, |
|
"logits/chosen": -0.19632667303085327, |
|
"logits/rejected": -0.12513799965381622, |
|
"logps/chosen": -1.0603824853897095, |
|
"logps/rejected": -1.7353988885879517, |
|
"loss": 1.9288, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0603824853897095, |
|
"rewards/margins": 0.6750164031982422, |
|
"rewards/rejected": -1.7353988885879517, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 24.609476201697003, |
|
"learning_rate": 1.035417798164145e-07, |
|
"logits/chosen": -0.2547205984592438, |
|
"logits/rejected": -0.17141126096248627, |
|
"logps/chosen": -1.0777822732925415, |
|
"logps/rejected": -1.6378345489501953, |
|
"loss": 1.9002, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0777822732925415, |
|
"rewards/margins": 0.5600521564483643, |
|
"rewards/rejected": -1.6378345489501953, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 44.888955546131726, |
|
"learning_rate": 9.537371477076535e-08, |
|
"logits/chosen": -0.24833258986473083, |
|
"logits/rejected": -0.14280755817890167, |
|
"logps/chosen": -1.2661911249160767, |
|
"logps/rejected": -1.8641719818115234, |
|
"loss": 2.0054, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2661911249160767, |
|
"rewards/margins": 0.5979806184768677, |
|
"rewards/rejected": -1.8641719818115234, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 29.759713813955717, |
|
"learning_rate": 8.747995746918898e-08, |
|
"logits/chosen": -0.18603017926216125, |
|
"logits/rejected": -0.11623908579349518, |
|
"logps/chosen": -1.2385270595550537, |
|
"logps/rejected": -1.8711185455322266, |
|
"loss": 1.9635, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2385270595550537, |
|
"rewards/margins": 0.6325916051864624, |
|
"rewards/rejected": -1.8711185455322266, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 18.386697317487712, |
|
"learning_rate": 7.987108973285888e-08, |
|
"logits/chosen": -0.2141023874282837, |
|
"logits/rejected": -0.24412047863006592, |
|
"logps/chosen": -1.2150145769119263, |
|
"logps/rejected": -1.7247231006622314, |
|
"loss": 1.9899, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2150145769119263, |
|
"rewards/margins": 0.5097082853317261, |
|
"rewards/rejected": -1.7247231006622314, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 24.333148504874714, |
|
"learning_rate": 7.255731147984174e-08, |
|
"logits/chosen": -0.24650990962982178, |
|
"logits/rejected": -0.18783050775527954, |
|
"logps/chosen": -1.2390451431274414, |
|
"logps/rejected": -1.6897704601287842, |
|
"loss": 1.9425, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2390451431274414, |
|
"rewards/margins": 0.45072537660598755, |
|
"rewards/rejected": -1.6897704601287842, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 22.373563868926805, |
|
"learning_rate": 6.554842705179898e-08, |
|
"logits/chosen": -0.2532094120979309, |
|
"logits/rejected": -0.2002202570438385, |
|
"logps/chosen": -1.157409429550171, |
|
"logps/rejected": -1.7103229761123657, |
|
"loss": 1.9743, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.157409429550171, |
|
"rewards/margins": 0.55291348695755, |
|
"rewards/rejected": -1.7103229761123657, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 27.22781403012628, |
|
"learning_rate": 5.885383207096832e-08, |
|
"logits/chosen": -0.25118163228034973, |
|
"logits/rejected": -0.17533348500728607, |
|
"logps/chosen": -1.158809781074524, |
|
"logps/rejected": -1.7991712093353271, |
|
"loss": 1.9567, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.158809781074524, |
|
"rewards/margins": 0.6403613090515137, |
|
"rewards/rejected": -1.7991712093353271, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 41.14703828256896, |
|
"learning_rate": 5.2482500845047165e-08, |
|
"logits/chosen": -0.2559366524219513, |
|
"logits/rejected": -0.13099372386932373, |
|
"logps/chosen": -1.1643617153167725, |
|
"logps/rejected": -1.7700135707855225, |
|
"loss": 1.9559, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1643617153167725, |
|
"rewards/margins": 0.60565185546875, |
|
"rewards/rejected": -1.7700135707855225, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 24.3693128346703, |
|
"learning_rate": 4.644297433686162e-08, |
|
"logits/chosen": -0.19337531924247742, |
|
"logits/rejected": -0.14422497153282166, |
|
"logps/chosen": -1.0990240573883057, |
|
"logps/rejected": -1.7358309030532837, |
|
"loss": 1.9382, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0990240573883057, |
|
"rewards/margins": 0.6368069648742676, |
|
"rewards/rejected": -1.7358309030532837, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 27.686221570305076, |
|
"learning_rate": 4.074334871494558e-08, |
|
"logits/chosen": -0.27724021673202515, |
|
"logits/rejected": -0.21434080600738525, |
|
"logps/chosen": -1.2541942596435547, |
|
"logps/rejected": -1.889288306236267, |
|
"loss": 1.981, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2541942596435547, |
|
"rewards/margins": 0.6350940465927124, |
|
"rewards/rejected": -1.889288306236267, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 27.73558369495071, |
|
"learning_rate": 3.5391264500382e-08, |
|
"logits/chosen": -0.22901353240013123, |
|
"logits/rejected": -0.1688699871301651, |
|
"logps/chosen": -1.0957512855529785, |
|
"logps/rejected": -1.5964380502700806, |
|
"loss": 1.9786, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.0957512855529785, |
|
"rewards/margins": 0.500686526298523, |
|
"rewards/rejected": -1.5964380502700806, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 25.24757762967279, |
|
"learning_rate": 3.0393896324452226e-08, |
|
"logits/chosen": -0.23829559981822968, |
|
"logits/rejected": -0.14140795171260834, |
|
"logps/chosen": -1.1398870944976807, |
|
"logps/rejected": -1.7605117559432983, |
|
"loss": 1.9365, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1398870944976807, |
|
"rewards/margins": 0.6206245422363281, |
|
"rewards/rejected": -1.7605117559432983, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 24.11409806888563, |
|
"learning_rate": 2.5757943310825026e-08, |
|
"logits/chosen": -0.21624989807605743, |
|
"logits/rejected": -0.15116100013256073, |
|
"logps/chosen": -1.1420520544052124, |
|
"logps/rejected": -1.7557262182235718, |
|
"loss": 1.9847, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1420520544052124, |
|
"rewards/margins": 0.6136741042137146, |
|
"rewards/rejected": -1.7557262182235718, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 31.071203059945226, |
|
"learning_rate": 2.148962009517823e-08, |
|
"logits/chosen": -0.1776510775089264, |
|
"logits/rejected": -0.11379513889551163, |
|
"logps/chosen": -1.2811336517333984, |
|
"logps/rejected": -1.7914276123046875, |
|
"loss": 1.9474, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2811336517333984, |
|
"rewards/margins": 0.5102939605712891, |
|
"rewards/rejected": -1.7914276123046875, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 22.593650984666684, |
|
"learning_rate": 1.759464849429082e-08, |
|
"logits/chosen": -0.2011154592037201, |
|
"logits/rejected": -0.14906981587409973, |
|
"logps/chosen": -1.2140544652938843, |
|
"logps/rejected": -1.8150146007537842, |
|
"loss": 1.9428, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2140544652938843, |
|
"rewards/margins": 0.6009601354598999, |
|
"rewards/rejected": -1.8150146007537842, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 20.886785791055413, |
|
"learning_rate": 1.4078249835774169e-08, |
|
"logits/chosen": -0.2394082248210907, |
|
"logits/rejected": -0.1875392496585846, |
|
"logps/chosen": -1.15377676486969, |
|
"logps/rejected": -1.962689995765686, |
|
"loss": 1.931, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.15377676486969, |
|
"rewards/margins": 0.8089130520820618, |
|
"rewards/rejected": -1.962689995765686, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 24.47138715404228, |
|
"learning_rate": 1.0945137958723705e-08, |
|
"logits/chosen": -0.0983675867319107, |
|
"logits/rejected": -0.08297935873270035, |
|
"logps/chosen": -1.2065281867980957, |
|
"logps/rejected": -1.7344862222671509, |
|
"loss": 2.0038, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2065281867980957, |
|
"rewards/margins": 0.5279580354690552, |
|
"rewards/rejected": -1.7344862222671509, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 24.937539236981156, |
|
"learning_rate": 8.19951289467482e-09, |
|
"logits/chosen": -0.2168574333190918, |
|
"logits/rejected": -0.1608891487121582, |
|
"logps/chosen": -1.1793944835662842, |
|
"logps/rejected": -1.7278966903686523, |
|
"loss": 2.0001, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1793944835662842, |
|
"rewards/margins": 0.5485021471977234, |
|
"rewards/rejected": -1.7278966903686523, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 25.949713098879222, |
|
"learning_rate": 5.84505523733293e-09, |
|
"logits/chosen": -0.13714662194252014, |
|
"logits/rejected": -0.09888915717601776, |
|
"logps/chosen": -1.2185251712799072, |
|
"logps/rejected": -1.736971139907837, |
|
"loss": 1.9405, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2185251712799072, |
|
"rewards/margins": 0.5184457302093506, |
|
"rewards/rejected": -1.736971139907837, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 48.46134541581324, |
|
"learning_rate": 3.8849212086261466e-09, |
|
"logits/chosen": -0.17583271861076355, |
|
"logits/rejected": -0.12143261730670929, |
|
"logps/chosen": -1.3378379344940186, |
|
"logps/rejected": -1.6454353332519531, |
|
"loss": 1.997, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3378379344940186, |
|
"rewards/margins": 0.3075973391532898, |
|
"rewards/rejected": -1.6454353332519531, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 17.18309794816949, |
|
"learning_rate": 2.3217384276938756e-09, |
|
"logits/chosen": -0.15463075041770935, |
|
"logits/rejected": -0.10197849571704865, |
|
"logps/chosen": -1.0989463329315186, |
|
"logps/rejected": -1.7458966970443726, |
|
"loss": 1.932, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0989463329315186, |
|
"rewards/margins": 0.6469505429267883, |
|
"rewards/rejected": -1.7458966970443726, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 23.454717675856944, |
|
"learning_rate": 1.1576023884836472e-09, |
|
"logits/chosen": -0.25512656569480896, |
|
"logits/rejected": -0.15672564506530762, |
|
"logps/chosen": -1.2135329246520996, |
|
"logps/rejected": -1.7796437740325928, |
|
"loss": 1.9646, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2135329246520996, |
|
"rewards/margins": 0.5661108493804932, |
|
"rewards/rejected": -1.7796437740325928, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 38.4586695917519, |
|
"learning_rate": 3.940736506780395e-10, |
|
"logits/chosen": -0.2280760258436203, |
|
"logits/rejected": -0.14886632561683655, |
|
"logps/chosen": -1.1674778461456299, |
|
"logps/rejected": -1.6563600301742554, |
|
"loss": 2.0158, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1674778461456299, |
|
"rewards/margins": 0.4888822138309479, |
|
"rewards/rejected": -1.6563600301742554, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 20.371210573575656, |
|
"learning_rate": 3.2175747716822744e-11, |
|
"logits/chosen": -0.268063485622406, |
|
"logits/rejected": -0.13501767814159393, |
|
"logps/chosen": -1.2155284881591797, |
|
"logps/rejected": -1.7273550033569336, |
|
"loss": 1.9722, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2155284881591797, |
|
"rewards/margins": 0.5118265151977539, |
|
"rewards/rejected": -1.7273550033569336, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 2.0149041866606385, |
|
"train_runtime": 17499.3556, |
|
"train_samples_per_second": 3.494, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|