{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998691442030882, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010468463752944255, "grad_norm": 34.35367990587695, "learning_rate": 6.25e-08, "logits/chosen": -0.49797338247299194, "logits/rejected": -0.5135231018066406, "logps/chosen": -1.1745355129241943, "logps/rejected": -1.3596293926239014, "loss": 2.1735, "rewards/accuracies": 0.5625, "rewards/chosen": -1.1745355129241943, "rewards/margins": 0.18509384989738464, "rewards/rejected": -1.3596293926239014, "step": 5 }, { "epoch": 0.02093692750588851, "grad_norm": 16.378216146989434, "learning_rate": 1.25e-07, "logits/chosen": -0.521752655506134, "logits/rejected": -0.4988512396812439, "logps/chosen": -1.1591465473175049, "logps/rejected": -1.2624419927597046, "loss": 2.1407, "rewards/accuracies": 0.5, "rewards/chosen": -1.1591465473175049, "rewards/margins": 0.10329560935497284, "rewards/rejected": -1.2624419927597046, "step": 10 }, { "epoch": 0.031405391258832765, "grad_norm": 22.732280640563598, "learning_rate": 1.875e-07, "logits/chosen": -0.46235981583595276, "logits/rejected": -0.4507545530796051, "logps/chosen": -1.1068508625030518, "logps/rejected": -1.361823558807373, "loss": 2.1077, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1068508625030518, "rewards/margins": 0.2549726366996765, "rewards/rejected": -1.361823558807373, "step": 15 }, { "epoch": 0.04187385501177702, "grad_norm": 22.78289950681047, "learning_rate": 2.5e-07, "logits/chosen": -0.4406924247741699, "logits/rejected": -0.4528113007545471, "logps/chosen": -1.161055564880371, "logps/rejected": -1.2642455101013184, "loss": 2.167, "rewards/accuracies": 0.5625, "rewards/chosen": -1.161055564880371, "rewards/margins": 0.10318990051746368, "rewards/rejected": -1.2642455101013184, "step": 20 }, { "epoch": 0.05234231876472128, "grad_norm": 13.313947854898018, "learning_rate": 3.125e-07, "logits/chosen": -0.5021263360977173, "logits/rejected": -0.47814303636550903, "logps/chosen": -1.1769291162490845, "logps/rejected": -1.2403558492660522, "loss": 2.1418, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.1769291162490845, "rewards/margins": 0.06342674791812897, "rewards/rejected": -1.2403558492660522, "step": 25 }, { "epoch": 0.06281078251766553, "grad_norm": 21.17299953179056, "learning_rate": 3.75e-07, "logits/chosen": -0.4899294972419739, "logits/rejected": -0.49411076307296753, "logps/chosen": -1.1576581001281738, "logps/rejected": -1.280582070350647, "loss": 2.1692, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.1576581001281738, "rewards/margins": 0.12292404472827911, "rewards/rejected": -1.280582070350647, "step": 30 }, { "epoch": 0.07327924627060979, "grad_norm": 21.15597636264796, "learning_rate": 4.3749999999999994e-07, "logits/chosen": -0.4856337904930115, "logits/rejected": -0.4433709979057312, "logps/chosen": -1.1427704095840454, "logps/rejected": -1.2787848711013794, "loss": 2.134, "rewards/accuracies": 0.5625, "rewards/chosen": -1.1427704095840454, "rewards/margins": 0.13601449131965637, "rewards/rejected": -1.2787848711013794, "step": 35 }, { "epoch": 0.08374771002355404, "grad_norm": 29.038714028264685, "learning_rate": 5e-07, "logits/chosen": -0.4945921301841736, "logits/rejected": -0.4987305998802185, "logps/chosen": -1.0738334655761719, "logps/rejected": -1.39645516872406, "loss": 2.0884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0738334655761719, "rewards/margins": 0.3226216435432434, "rewards/rejected": -1.39645516872406, "step": 40 }, { "epoch": 0.0942161737764983, "grad_norm": 30.530804134708777, "learning_rate": 5.625e-07, "logits/chosen": -0.45864447951316833, "logits/rejected": -0.4690025746822357, "logps/chosen": -1.090343952178955, "logps/rejected": -1.317134976387024, "loss": 2.1074, "rewards/accuracies": 0.59375, "rewards/chosen": -1.090343952178955, "rewards/margins": 0.22679109871387482, "rewards/rejected": -1.317134976387024, "step": 45 }, { "epoch": 0.10468463752944256, "grad_norm": 21.789634951246953, "learning_rate": 5.999678242522831e-07, "logits/chosen": -0.4777728021144867, "logits/rejected": -0.49264296889305115, "logps/chosen": -1.1642675399780273, "logps/rejected": -1.4595439434051514, "loss": 2.1327, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1642675399780273, "rewards/margins": 0.2952764332294464, "rewards/rejected": -1.4595439434051514, "step": 50 }, { "epoch": 0.11515310128238682, "grad_norm": 175.8483724549221, "learning_rate": 5.996059263493219e-07, "logits/chosen": -0.4417606294155121, "logits/rejected": -0.4309404492378235, "logps/chosen": -1.1178853511810303, "logps/rejected": -1.343202829360962, "loss": 2.106, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -1.1178853511810303, "rewards/margins": 0.22531744837760925, "rewards/rejected": -1.343202829360962, "step": 55 }, { "epoch": 0.12562156503533106, "grad_norm": 18.6141832401793, "learning_rate": 5.988423976115163e-07, "logits/chosen": -0.48267728090286255, "logits/rejected": -0.47969865798950195, "logps/chosen": -1.2083253860473633, "logps/rejected": -1.33084237575531, "loss": 2.0877, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -1.2083253860473633, "rewards/margins": 0.12251707166433334, "rewards/rejected": -1.33084237575531, "step": 60 }, { "epoch": 0.1360900287882753, "grad_norm": 32.36335168763983, "learning_rate": 5.976782615723061e-07, "logits/chosen": -0.44251489639282227, "logits/rejected": -0.40677833557128906, "logps/chosen": -1.102402925491333, "logps/rejected": -1.6289558410644531, "loss": 2.0703, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.102402925491333, "rewards/margins": 0.5265528559684753, "rewards/rejected": -1.6289558410644531, "step": 65 }, { "epoch": 0.14655849254121958, "grad_norm": 48.08034129839621, "learning_rate": 5.961150787913738e-07, "logits/chosen": -0.3722413182258606, "logits/rejected": -0.3657040297985077, "logps/chosen": -1.1620112657546997, "logps/rejected": -1.426941990852356, "loss": 2.0511, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -1.1620112657546997, "rewards/margins": 0.2649305760860443, "rewards/rejected": -1.426941990852356, "step": 70 }, { "epoch": 0.15702695629416383, "grad_norm": 59.84416296380324, "learning_rate": 5.941549447626671e-07, "logits/chosen": -0.37373992800712585, "logits/rejected": -0.3564635217189789, "logps/chosen": -1.1539630889892578, "logps/rejected": -1.505507230758667, "loss": 2.0661, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.1539630889892578, "rewards/margins": 0.3515442907810211, "rewards/rejected": -1.505507230758667, "step": 75 }, { "epoch": 0.16749542004710807, "grad_norm": 25.67817680738418, "learning_rate": 5.918004871053251e-07, "logits/chosen": -0.4051768183708191, "logits/rejected": -0.38645023107528687, "logps/chosen": -1.1455223560333252, "logps/rejected": -1.5474026203155518, "loss": 2.0988, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.1455223560333252, "rewards/margins": 0.40188026428222656, "rewards/rejected": -1.5474026203155518, "step": 80 }, { "epoch": 0.17796388380005235, "grad_norm": 34.17749794765947, "learning_rate": 5.890548620412763e-07, "logits/chosen": -0.45642200112342834, "logits/rejected": -0.42394012212753296, "logps/chosen": -1.1208285093307495, "logps/rejected": -1.4425103664398193, "loss": 2.0996, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.1208285093307495, "rewards/margins": 0.3216818571090698, "rewards/rejected": -1.4425103664398193, "step": 85 }, { "epoch": 0.1884323475529966, "grad_norm": 10.063019778918575, "learning_rate": 5.859217501642258e-07, "logits/chosen": -0.4428345561027527, "logits/rejected": -0.4144333004951477, "logps/chosen": -1.1374626159667969, "logps/rejected": -1.4302767515182495, "loss": 2.0461, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.1374626159667969, "rewards/margins": 0.2928140461444855, "rewards/rejected": -1.4302767515182495, "step": 90 }, { "epoch": 0.19890081130594087, "grad_norm": 20.74891634135048, "learning_rate": 5.824053515057091e-07, "logits/chosen": -0.479747474193573, "logits/rejected": -0.39375734329223633, "logps/chosen": -1.159339189529419, "logps/rejected": -1.4628812074661255, "loss": 2.0797, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -1.159339189529419, "rewards/margins": 0.30354195833206177, "rewards/rejected": -1.4628812074661255, "step": 95 }, { "epoch": 0.2093692750588851, "grad_norm": 33.44699335449945, "learning_rate": 5.785103799048218e-07, "logits/chosen": -0.4181644022464752, "logits/rejected": -0.38857489824295044, "logps/chosen": -1.1860034465789795, "logps/rejected": -1.7242858409881592, "loss": 2.0744, "rewards/accuracies": 0.53125, "rewards/chosen": -1.1860034465789795, "rewards/margins": 0.5382825136184692, "rewards/rejected": -1.7242858409881592, "step": 100 }, { "epoch": 0.21983773881182936, "grad_norm": 59.5883485681765, "learning_rate": 5.742420566891749e-07, "logits/chosen": -0.4103716015815735, "logits/rejected": -0.3968586027622223, "logps/chosen": -1.076053261756897, "logps/rejected": -1.4917399883270264, "loss": 2.0443, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.076053261756897, "rewards/margins": 0.4156867563724518, "rewards/rejected": -1.4917399883270264, "step": 105 }, { "epoch": 0.23030620256477363, "grad_norm": 80.31752384431189, "learning_rate": 5.696061036755478e-07, "logits/chosen": -0.4146521985530853, "logits/rejected": -0.38292473554611206, "logps/chosen": -1.0837781429290771, "logps/rejected": -1.5503333806991577, "loss": 2.0282, "rewards/accuracies": 0.625, "rewards/chosen": -1.0837781429290771, "rewards/margins": 0.4665554165840149, "rewards/rejected": -1.5503333806991577, "step": 110 }, { "epoch": 0.24077466631771788, "grad_norm": 98.56244903302768, "learning_rate": 5.64608735499618e-07, "logits/chosen": -0.28679025173187256, "logits/rejected": -0.2377845048904419, "logps/chosen": -1.111647367477417, "logps/rejected": -1.687975287437439, "loss": 2.0185, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.111647367477417, "rewards/margins": 0.576327919960022, "rewards/rejected": -1.687975287437439, "step": 115 }, { "epoch": 0.2512431300706621, "grad_norm": 20.27186258736798, "learning_rate": 5.592566512850545e-07, "logits/chosen": -0.27318406105041504, "logits/rejected": -0.2487379014492035, "logps/chosen": -1.2702689170837402, "logps/rejected": -1.556516408920288, "loss": 2.0975, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.2702689170837402, "rewards/margins": 0.2862473428249359, "rewards/rejected": -1.556516408920288, "step": 120 }, { "epoch": 0.26171159382360637, "grad_norm": 30.996972753737214, "learning_rate": 5.535570256631384e-07, "logits/chosen": -0.32224705815315247, "logits/rejected": -0.2949572503566742, "logps/chosen": -1.0966984033584595, "logps/rejected": -1.425978660583496, "loss": 2.0513, "rewards/accuracies": 0.625, "rewards/chosen": -1.0966984033584595, "rewards/margins": 0.3292803168296814, "rewards/rejected": -1.425978660583496, "step": 125 }, { "epoch": 0.2721800575765506, "grad_norm": 106.95100145667757, "learning_rate": 5.475174991549528e-07, "logits/chosen": -0.2838582396507263, "logits/rejected": -0.27573472261428833, "logps/chosen": -1.065918207168579, "logps/rejected": -1.2746469974517822, "loss": 2.0692, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.065918207168579, "rewards/margins": 0.2087288349866867, "rewards/rejected": -1.2746469974517822, "step": 130 }, { "epoch": 0.2826485213294949, "grad_norm": 18.450255655703543, "learning_rate": 5.411461679290317e-07, "logits/chosen": -0.33663275837898254, "logits/rejected": -0.24262118339538574, "logps/chosen": -1.1360746622085571, "logps/rejected": -1.793914556503296, "loss": 2.0112, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.1360746622085571, "rewards/margins": 0.6578398942947388, "rewards/rejected": -1.793914556503296, "step": 135 }, { "epoch": 0.29311698508243916, "grad_norm": 20.28586481649801, "learning_rate": 5.34451572948201e-07, "logits/chosen": -0.25463438034057617, "logits/rejected": -0.18874910473823547, "logps/chosen": -1.2043225765228271, "logps/rejected": -1.7759593725204468, "loss": 1.9956, "rewards/accuracies": 0.65625, "rewards/chosen": -1.2043225765228271, "rewards/margins": 0.5716367959976196, "rewards/rejected": -1.7759593725204468, "step": 140 }, { "epoch": 0.3035854488353834, "grad_norm": 16.12998962894494, "learning_rate": 5.274426885201582e-07, "logits/chosen": -0.3120715320110321, "logits/rejected": -0.28251615166664124, "logps/chosen": -1.1517010927200317, "logps/rejected": -1.5577958822250366, "loss": 2.04, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1517010927200317, "rewards/margins": 0.4060949683189392, "rewards/rejected": -1.5577958822250366, "step": 145 }, { "epoch": 0.31405391258832765, "grad_norm": 18.374398699221082, "learning_rate": 5.201289102671411e-07, "logits/chosen": -0.29038459062576294, "logits/rejected": -0.25689178705215454, "logps/chosen": -1.0535448789596558, "logps/rejected": -1.4643501043319702, "loss": 2.0025, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.0535448789596558, "rewards/margins": 0.41080522537231445, "rewards/rejected": -1.4643501043319702, "step": 150 }, { "epoch": 0.3245223763412719, "grad_norm": 21.42424041369703, "learning_rate": 5.12520042530811e-07, "logits/chosen": -0.33090248703956604, "logits/rejected": -0.26949256658554077, "logps/chosen": -1.1385178565979004, "logps/rejected": -1.5117579698562622, "loss": 2.0025, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.1385178565979004, "rewards/margins": 0.3732401728630066, "rewards/rejected": -1.5117579698562622, "step": 155 }, { "epoch": 0.33499084009421615, "grad_norm": 17.090434114730737, "learning_rate": 5.046262852292346e-07, "logits/chosen": -0.2471882402896881, "logits/rejected": -0.1910603940486908, "logps/chosen": -1.179958701133728, "logps/rejected": -1.6289422512054443, "loss": 2.0344, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.179958701133728, "rewards/margins": 0.4489835202693939, "rewards/rejected": -1.6289422512054443, "step": 160 }, { "epoch": 0.34545930384716045, "grad_norm": 13.121179286172973, "learning_rate": 4.964582201835856e-07, "logits/chosen": -0.2789207696914673, "logits/rejected": -0.2165801227092743, "logps/chosen": -1.1143217086791992, "logps/rejected": -1.6779086589813232, "loss": 2.012, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.1143217086791992, "rewards/margins": 0.5635868906974792, "rewards/rejected": -1.6779086589813232, "step": 165 }, { "epoch": 0.3559277676001047, "grad_norm": 13.822111204791623, "learning_rate": 4.880267969328908e-07, "logits/chosen": -0.26305219531059265, "logits/rejected": -0.16298075020313263, "logps/chosen": -1.2098379135131836, "logps/rejected": -1.6500240564346313, "loss": 2.0205, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.2098379135131836, "rewards/margins": 0.44018617272377014, "rewards/rejected": -1.6500240564346313, "step": 170 }, { "epoch": 0.36639623135304894, "grad_norm": 18.589343338653535, "learning_rate": 4.793433180558423e-07, "logits/chosen": -0.26549848914146423, "logits/rejected": -0.13549579679965973, "logps/chosen": -1.1815907955169678, "logps/rejected": -1.6547002792358398, "loss": 2.0137, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.1815907955169678, "rewards/margins": 0.4731093943119049, "rewards/rejected": -1.6547002792358398, "step": 175 }, { "epoch": 0.3768646951059932, "grad_norm": 24.192793785270847, "learning_rate": 4.704194240193467e-07, "logits/chosen": -0.20712879300117493, "logits/rejected": -0.14872625470161438, "logps/chosen": -1.2047398090362549, "logps/rejected": -1.6949182748794556, "loss": 2.0473, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2047398090362549, "rewards/margins": 0.49017828702926636, "rewards/rejected": -1.6949182748794556, "step": 180 }, { "epoch": 0.38733315885893743, "grad_norm": 21.72801779202531, "learning_rate": 4.6126707757412686e-07, "logits/chosen": -0.19427147507667542, "logits/rejected": -0.08655323088169098, "logps/chosen": -1.2102793455123901, "logps/rejected": -1.956017255783081, "loss": 1.9488, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2102793455123901, "rewards/margins": 0.745737612247467, "rewards/rejected": -1.956017255783081, "step": 185 }, { "epoch": 0.39780162261188173, "grad_norm": 52.99611536180731, "learning_rate": 4.5189854771829086e-07, "logits/chosen": -0.27349403500556946, "logits/rejected": -0.19519653916358948, "logps/chosen": -1.2331929206848145, "logps/rejected": -1.693256139755249, "loss": 2.0896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.2331929206848145, "rewards/margins": 0.4600633680820465, "rewards/rejected": -1.693256139755249, "step": 190 }, { "epoch": 0.408270086364826, "grad_norm": 23.313047834378704, "learning_rate": 4.4232639325036807e-07, "logits/chosen": -0.2565682530403137, "logits/rejected": -0.20012107491493225, "logps/chosen": -1.2263553142547607, "logps/rejected": -1.6081535816192627, "loss": 2.0274, "rewards/accuracies": 0.65625, "rewards/chosen": -1.2263553142547607, "rewards/margins": 0.38179832696914673, "rewards/rejected": -1.6081535816192627, "step": 195 }, { "epoch": 0.4187385501177702, "grad_norm": 28.77315959004056, "learning_rate": 4.32563445933859e-07, "logits/chosen": -0.2804745137691498, "logits/rejected": -0.2464865893125534, "logps/chosen": -1.2270160913467407, "logps/rejected": -1.6264985799789429, "loss": 2.046, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.2270160913467407, "rewards/margins": 0.3994825482368469, "rewards/rejected": -1.6264985799789429, "step": 200 }, { "epoch": 0.42920701387071447, "grad_norm": 24.629126590195398, "learning_rate": 4.226227932958664e-07, "logits/chosen": -0.21598652005195618, "logits/rejected": -0.16539430618286133, "logps/chosen": -1.0299084186553955, "logps/rejected": -1.6446430683135986, "loss": 1.9694, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.0299084186553955, "rewards/margins": 0.6147347688674927, "rewards/rejected": -1.6446430683135986, "step": 205 }, { "epoch": 0.4396754776236587, "grad_norm": 18.99935065324488, "learning_rate": 4.1251776108286854e-07, "logits/chosen": -0.24660630524158478, "logits/rejected": -0.19936877489089966, "logps/chosen": -1.2271109819412231, "logps/rejected": -1.5406509637832642, "loss": 2.0403, "rewards/accuracies": 0.59375, "rewards/chosen": -1.2271109819412231, "rewards/margins": 0.31353995203971863, "rewards/rejected": -1.5406509637832642, "step": 210 }, { "epoch": 0.45014394137660296, "grad_norm": 28.14089359211311, "learning_rate": 4.022618953971514e-07, "logits/chosen": -0.28811579942703247, "logits/rejected": -0.23570355772972107, "logps/chosen": -1.1424353122711182, "logps/rejected": -1.6885287761688232, "loss": 2.0049, "rewards/accuracies": 0.75, "rewards/chosen": -1.1424353122711182, "rewards/margins": 0.5460935831069946, "rewards/rejected": -1.6885287761688232, "step": 215 }, { "epoch": 0.46061240512954726, "grad_norm": 32.89974992330637, "learning_rate": 3.918689445378477e-07, "logits/chosen": -0.30860984325408936, "logits/rejected": -0.18307650089263916, "logps/chosen": -1.2112575769424438, "logps/rejected": -1.7577863931655884, "loss": 2.0035, "rewards/accuracies": 0.59375, "rewards/chosen": -1.2112575769424438, "rewards/margins": 0.5465287566184998, "rewards/rejected": -1.7577863931655884, "step": 220 }, { "epoch": 0.4710808688824915, "grad_norm": 20.40577743783828, "learning_rate": 3.813528405709251e-07, "logits/chosen": -0.29336491227149963, "logits/rejected": -0.19095419347286224, "logps/chosen": -1.1172130107879639, "logps/rejected": -1.7578521966934204, "loss": 1.949, "rewards/accuracies": 0.65625, "rewards/chosen": -1.1172130107879639, "rewards/margins": 0.6406393051147461, "rewards/rejected": -1.7578521966934204, "step": 225 }, { "epoch": 0.48154933263543576, "grad_norm": 22.351884239960583, "learning_rate": 3.707276806528282e-07, "logits/chosen": -0.3431912362575531, "logits/rejected": -0.21362292766571045, "logps/chosen": -1.175959587097168, "logps/rejected": -1.914841890335083, "loss": 1.9597, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.175959587097168, "rewards/margins": 0.7388821840286255, "rewards/rejected": -1.914841890335083, "step": 230 }, { "epoch": 0.49201779638838, "grad_norm": 32.41861896385678, "learning_rate": 3.6000770813281334e-07, "logits/chosen": -0.281482458114624, "logits/rejected": -0.2136625498533249, "logps/chosen": -1.1437963247299194, "logps/rejected": -1.6638180017471313, "loss": 1.9987, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1437963247299194, "rewards/margins": 0.5200216770172119, "rewards/rejected": -1.6638180017471313, "step": 235 }, { "epoch": 0.5024862601413242, "grad_norm": 25.062414755869742, "learning_rate": 3.4920729345930654e-07, "logits/chosen": -0.31132057309150696, "logits/rejected": -0.2606331408023834, "logps/chosen": -1.1404446363449097, "logps/rejected": -1.7029993534088135, "loss": 2.0234, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.1404446363449097, "rewards/margins": 0.5625545978546143, "rewards/rejected": -1.7029993534088135, "step": 240 }, { "epoch": 0.5129547238942685, "grad_norm": 23.400197287310235, "learning_rate": 3.383409149158814e-07, "logits/chosen": -0.34879469871520996, "logits/rejected": -0.27785512804985046, "logps/chosen": -1.2463206052780151, "logps/rejected": -1.6453937292099, "loss": 2.0244, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.2463206052780151, "rewards/margins": 0.39907318353652954, "rewards/rejected": -1.6453937292099, "step": 245 }, { "epoch": 0.5234231876472127, "grad_norm": 30.06789343033768, "learning_rate": 3.2742313921268035e-07, "logits/chosen": -0.2991330623626709, "logits/rejected": -0.24600133299827576, "logps/chosen": -1.1448876857757568, "logps/rejected": -1.7555782794952393, "loss": 2.0331, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1448876857757568, "rewards/margins": 0.6106906533241272, "rewards/rejected": -1.7555782794952393, "step": 250 }, { "epoch": 0.533891651400157, "grad_norm": 26.26439332360777, "learning_rate": 3.1646860195929825e-07, "logits/chosen": -0.26393812894821167, "logits/rejected": -0.15189418196678162, "logps/chosen": -1.2170337438583374, "logps/rejected": -1.7952607870101929, "loss": 1.9685, "rewards/accuracies": 0.65625, "rewards/chosen": -1.2170337438583374, "rewards/margins": 0.5782270431518555, "rewards/rejected": -1.7952607870101929, "step": 255 }, { "epoch": 0.5443601151531012, "grad_norm": 30.83419650992516, "learning_rate": 3.054919880453032e-07, "logits/chosen": -0.24465498328208923, "logits/rejected": -0.1657981127500534, "logps/chosen": -1.1195638179779053, "logps/rejected": -1.8283071517944336, "loss": 2.0058, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.1195638179779053, "rewards/margins": 0.7087433934211731, "rewards/rejected": -1.8283071517944336, "step": 260 }, { "epoch": 0.5548285789060455, "grad_norm": 26.538461186209517, "learning_rate": 2.9450801195469686e-07, "logits/chosen": -0.2653834819793701, "logits/rejected": -0.21019785106182098, "logps/chosen": -1.2122917175292969, "logps/rejected": -1.5932838916778564, "loss": 1.9747, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2122917175292969, "rewards/margins": 0.38099202513694763, "rewards/rejected": -1.5932838916778564, "step": 265 }, { "epoch": 0.5652970426589898, "grad_norm": 30.571225220792357, "learning_rate": 2.835313980407017e-07, "logits/chosen": -0.2485169917345047, "logits/rejected": -0.1672702431678772, "logps/chosen": -1.2699711322784424, "logps/rejected": -1.6770769357681274, "loss": 2.0095, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2699711322784424, "rewards/margins": 0.40710583329200745, "rewards/rejected": -1.6770769357681274, "step": 270 }, { "epoch": 0.575765506411934, "grad_norm": 15.142492975343366, "learning_rate": 2.7257686078731973e-07, "logits/chosen": -0.2676723599433899, "logits/rejected": -0.14488348364830017, "logps/chosen": -1.1701332330703735, "logps/rejected": -1.882002830505371, "loss": 1.9973, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.1701332330703735, "rewards/margins": 0.7118695974349976, "rewards/rejected": -1.882002830505371, "step": 275 }, { "epoch": 0.5862339701648783, "grad_norm": 17.861104389604897, "learning_rate": 2.6165908508411857e-07, "logits/chosen": -0.27734139561653137, "logits/rejected": -0.1873548924922943, "logps/chosen": -1.096847653388977, "logps/rejected": -1.556873083114624, "loss": 1.979, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.096847653388977, "rewards/margins": 0.46002545952796936, "rewards/rejected": -1.556873083114624, "step": 280 }, { "epoch": 0.5967024339178225, "grad_norm": 29.049210332966997, "learning_rate": 2.5079270654069354e-07, "logits/chosen": -0.22700035572052002, "logits/rejected": -0.20169806480407715, "logps/chosen": -1.1871209144592285, "logps/rejected": -1.729832410812378, "loss": 1.9858, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.1871209144592285, "rewards/margins": 0.5427114367485046, "rewards/rejected": -1.729832410812378, "step": 285 }, { "epoch": 0.6071708976707668, "grad_norm": 19.177779527300032, "learning_rate": 2.399922918671867e-07, "logits/chosen": -0.27257853746414185, "logits/rejected": -0.20175373554229736, "logps/chosen": -1.1817193031311035, "logps/rejected": -1.81912362575531, "loss": 1.9728, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1817193031311035, "rewards/margins": 0.6374045610427856, "rewards/rejected": -1.81912362575531, "step": 290 }, { "epoch": 0.6176393614237111, "grad_norm": 39.27420791348495, "learning_rate": 2.2927231934717176e-07, "logits/chosen": -0.2650902271270752, "logits/rejected": -0.204110786318779, "logps/chosen": -1.1773895025253296, "logps/rejected": -1.8753960132598877, "loss": 2.0113, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.1773895025253296, "rewards/margins": 0.6980065107345581, "rewards/rejected": -1.8753960132598877, "step": 295 }, { "epoch": 0.6281078251766553, "grad_norm": 19.055812699519027, "learning_rate": 2.1864715942907487e-07, "logits/chosen": -0.31268611550331116, "logits/rejected": -0.26396140456199646, "logps/chosen": -1.2095041275024414, "logps/rejected": -1.6991554498672485, "loss": 1.9925, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2095041275024414, "rewards/margins": 0.4896513819694519, "rewards/rejected": -1.6991554498672485, "step": 300 }, { "epoch": 0.6385762889295996, "grad_norm": 24.000715730668365, "learning_rate": 2.081310554621522e-07, "logits/chosen": -0.2226269692182541, "logits/rejected": -0.17259590327739716, "logps/chosen": -1.2206140756607056, "logps/rejected": -1.8647050857543945, "loss": 1.9507, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2206140756607056, "rewards/margins": 0.6440912485122681, "rewards/rejected": -1.8647050857543945, "step": 305 }, { "epoch": 0.6490447526825438, "grad_norm": 24.680587241234605, "learning_rate": 1.9773810460284862e-07, "logits/chosen": -0.21720829606056213, "logits/rejected": -0.22596517205238342, "logps/chosen": -1.121246576309204, "logps/rejected": -1.6572654247283936, "loss": 1.9548, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.121246576309204, "rewards/margins": 0.536018967628479, "rewards/rejected": -1.6572654247283936, "step": 310 }, { "epoch": 0.6595132164354881, "grad_norm": 191.6202253290827, "learning_rate": 1.874822389171314e-07, "logits/chosen": -0.23975515365600586, "logits/rejected": -0.13572129607200623, "logps/chosen": -1.1023863554000854, "logps/rejected": -1.9153015613555908, "loss": 1.9671, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1023863554000854, "rewards/margins": 0.8129149675369263, "rewards/rejected": -1.9153015613555908, "step": 315 }, { "epoch": 0.6699816801884323, "grad_norm": 29.251518787297723, "learning_rate": 1.7737720670413356e-07, "logits/chosen": -0.19940456748008728, "logits/rejected": -0.15788142383098602, "logps/chosen": -1.2180979251861572, "logps/rejected": -1.7693755626678467, "loss": 1.9257, "rewards/accuracies": 0.625, "rewards/chosen": -1.2180979251861572, "rewards/margins": 0.5512775182723999, "rewards/rejected": -1.7693755626678467, "step": 320 }, { "epoch": 0.6804501439413766, "grad_norm": 38.573296633637, "learning_rate": 1.6743655406614095e-07, "logits/chosen": -0.2212747037410736, "logits/rejected": -0.13944557309150696, "logps/chosen": -1.165976881980896, "logps/rejected": -1.8154420852661133, "loss": 1.9466, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.165976881980896, "rewards/margins": 0.6494652032852173, "rewards/rejected": -1.8154420852661133, "step": 325 }, { "epoch": 0.6909186076943209, "grad_norm": 29.264456688369886, "learning_rate": 1.5767360674963198e-07, "logits/chosen": -0.21240024268627167, "logits/rejected": -0.13640090823173523, "logps/chosen": -1.1329911947250366, "logps/rejected": -1.5841938257217407, "loss": 1.9788, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1329911947250366, "rewards/margins": 0.45120254158973694, "rewards/rejected": -1.5841938257217407, "step": 330 }, { "epoch": 0.7013870714472651, "grad_norm": 21.84656926034516, "learning_rate": 1.4810145228170922e-07, "logits/chosen": -0.2895652651786804, "logits/rejected": -0.20374973118305206, "logps/chosen": -1.0948199033737183, "logps/rejected": -1.5535070896148682, "loss": 1.9921, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0948199033737183, "rewards/margins": 0.4586872160434723, "rewards/rejected": -1.5535070896148682, "step": 335 }, { "epoch": 0.7118555352002094, "grad_norm": 60.16510624521895, "learning_rate": 1.3873292242587306e-07, "logits/chosen": -0.2519373893737793, "logits/rejected": -0.1798809915781021, "logps/chosen": -1.3221559524536133, "logps/rejected": -1.7844518423080444, "loss": 2.0219, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3221559524536133, "rewards/margins": 0.4622960686683655, "rewards/rejected": -1.7844518423080444, "step": 340 }, { "epoch": 0.7223239989531536, "grad_norm": 31.10556234832626, "learning_rate": 1.295805759806533e-07, "logits/chosen": -0.28459057211875916, "logits/rejected": -0.1859065145254135, "logps/chosen": -1.1906864643096924, "logps/rejected": -1.7581002712249756, "loss": 1.9961, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.1906864643096924, "rewards/margins": 0.567413866519928, "rewards/rejected": -1.7581002712249756, "step": 345 }, { "epoch": 0.7327924627060979, "grad_norm": 21.145366603131734, "learning_rate": 1.2065668194415777e-07, "logits/chosen": -0.19020649790763855, "logits/rejected": -0.15534143149852753, "logps/chosen": -1.1794466972351074, "logps/rejected": -1.6421940326690674, "loss": 2.0254, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.1794466972351074, "rewards/margins": 0.4627472758293152, "rewards/rejected": -1.6421940326690674, "step": 350 }, { "epoch": 0.7432609264590422, "grad_norm": 29.29098473580897, "learning_rate": 1.1197320306710923e-07, "logits/chosen": -0.19632667303085327, "logits/rejected": -0.12513799965381622, "logps/chosen": -1.0603824853897095, "logps/rejected": -1.7353988885879517, "loss": 1.9288, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.0603824853897095, "rewards/margins": 0.6750164031982422, "rewards/rejected": -1.7353988885879517, "step": 355 }, { "epoch": 0.7537293902119864, "grad_norm": 24.609476201697003, "learning_rate": 1.035417798164145e-07, "logits/chosen": -0.2547205984592438, "logits/rejected": -0.17141126096248627, "logps/chosen": -1.0777822732925415, "logps/rejected": -1.6378345489501953, "loss": 1.9002, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0777822732925415, "rewards/margins": 0.5600521564483643, "rewards/rejected": -1.6378345489501953, "step": 360 }, { "epoch": 0.7641978539649307, "grad_norm": 44.888955546131726, "learning_rate": 9.537371477076535e-08, "logits/chosen": -0.24833258986473083, "logits/rejected": -0.14280755817890167, "logps/chosen": -1.2661911249160767, "logps/rejected": -1.8641719818115234, "loss": 2.0054, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2661911249160767, "rewards/margins": 0.5979806184768677, "rewards/rejected": -1.8641719818115234, "step": 365 }, { "epoch": 0.7746663177178749, "grad_norm": 29.759713813955717, "learning_rate": 8.747995746918898e-08, "logits/chosen": -0.18603017926216125, "logits/rejected": -0.11623908579349518, "logps/chosen": -1.2385270595550537, "logps/rejected": -1.8711185455322266, "loss": 1.9635, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.2385270595550537, "rewards/margins": 0.6325916051864624, "rewards/rejected": -1.8711185455322266, "step": 370 }, { "epoch": 0.7851347814708192, "grad_norm": 18.386697317487712, "learning_rate": 7.987108973285888e-08, "logits/chosen": -0.2141023874282837, "logits/rejected": -0.24412047863006592, "logps/chosen": -1.2150145769119263, "logps/rejected": -1.7247231006622314, "loss": 1.9899, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.2150145769119263, "rewards/margins": 0.5097082853317261, "rewards/rejected": -1.7247231006622314, "step": 375 }, { "epoch": 0.7956032452237635, "grad_norm": 24.333148504874714, "learning_rate": 7.255731147984174e-08, "logits/chosen": -0.24650990962982178, "logits/rejected": -0.18783050775527954, "logps/chosen": -1.2390451431274414, "logps/rejected": -1.6897704601287842, "loss": 1.9425, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.2390451431274414, "rewards/margins": 0.45072537660598755, "rewards/rejected": -1.6897704601287842, "step": 380 }, { "epoch": 0.8060717089767077, "grad_norm": 22.373563868926805, "learning_rate": 6.554842705179898e-08, "logits/chosen": -0.2532094120979309, "logits/rejected": -0.2002202570438385, "logps/chosen": -1.157409429550171, "logps/rejected": -1.7103229761123657, "loss": 1.9743, "rewards/accuracies": 0.59375, "rewards/chosen": -1.157409429550171, "rewards/margins": 0.55291348695755, "rewards/rejected": -1.7103229761123657, "step": 385 }, { "epoch": 0.816540172729652, "grad_norm": 27.22781403012628, "learning_rate": 5.885383207096832e-08, "logits/chosen": -0.25118163228034973, "logits/rejected": -0.17533348500728607, "logps/chosen": -1.158809781074524, "logps/rejected": -1.7991712093353271, "loss": 1.9567, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.158809781074524, "rewards/margins": 0.6403613090515137, "rewards/rejected": -1.7991712093353271, "step": 390 }, { "epoch": 0.8270086364825961, "grad_norm": 41.14703828256896, "learning_rate": 5.2482500845047165e-08, "logits/chosen": -0.2559366524219513, "logits/rejected": -0.13099372386932373, "logps/chosen": -1.1643617153167725, "logps/rejected": -1.7700135707855225, "loss": 1.9559, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.1643617153167725, "rewards/margins": 0.60565185546875, "rewards/rejected": -1.7700135707855225, "step": 395 }, { "epoch": 0.8374771002355405, "grad_norm": 24.3693128346703, "learning_rate": 4.644297433686162e-08, "logits/chosen": -0.19337531924247742, "logits/rejected": -0.14422497153282166, "logps/chosen": -1.0990240573883057, "logps/rejected": -1.7358309030532837, "loss": 1.9382, "rewards/accuracies": 0.65625, "rewards/chosen": -1.0990240573883057, "rewards/margins": 0.6368069648742676, "rewards/rejected": -1.7358309030532837, "step": 400 }, { "epoch": 0.8479455639884846, "grad_norm": 27.686221570305076, "learning_rate": 4.074334871494558e-08, "logits/chosen": -0.27724021673202515, "logits/rejected": -0.21434080600738525, "logps/chosen": -1.2541942596435547, "logps/rejected": -1.889288306236267, "loss": 1.981, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2541942596435547, "rewards/margins": 0.6350940465927124, "rewards/rejected": -1.889288306236267, "step": 405 }, { "epoch": 0.8584140277414289, "grad_norm": 27.73558369495071, "learning_rate": 3.5391264500382e-08, "logits/chosen": -0.22901353240013123, "logits/rejected": -0.1688699871301651, "logps/chosen": -1.0957512855529785, "logps/rejected": -1.5964380502700806, "loss": 1.9786, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.0957512855529785, "rewards/margins": 0.500686526298523, "rewards/rejected": -1.5964380502700806, "step": 410 }, { "epoch": 0.8688824914943732, "grad_norm": 25.24757762967279, "learning_rate": 3.0393896324452226e-08, "logits/chosen": -0.23829559981822968, "logits/rejected": -0.14140795171260834, "logps/chosen": -1.1398870944976807, "logps/rejected": -1.7605117559432983, "loss": 1.9365, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1398870944976807, "rewards/margins": 0.6206245422363281, "rewards/rejected": -1.7605117559432983, "step": 415 }, { "epoch": 0.8793509552473174, "grad_norm": 24.11409806888563, "learning_rate": 2.5757943310825026e-08, "logits/chosen": -0.21624989807605743, "logits/rejected": -0.15116100013256073, "logps/chosen": -1.1420520544052124, "logps/rejected": -1.7557262182235718, "loss": 1.9847, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.1420520544052124, "rewards/margins": 0.6136741042137146, "rewards/rejected": -1.7557262182235718, "step": 420 }, { "epoch": 0.8898194190002617, "grad_norm": 31.071203059945226, "learning_rate": 2.148962009517823e-08, "logits/chosen": -0.1776510775089264, "logits/rejected": -0.11379513889551163, "logps/chosen": -1.2811336517333984, "logps/rejected": -1.7914276123046875, "loss": 1.9474, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.2811336517333984, "rewards/margins": 0.5102939605712891, "rewards/rejected": -1.7914276123046875, "step": 425 }, { "epoch": 0.9002878827532059, "grad_norm": 22.593650984666684, "learning_rate": 1.759464849429082e-08, "logits/chosen": -0.2011154592037201, "logits/rejected": -0.14906981587409973, "logps/chosen": -1.2140544652938843, "logps/rejected": -1.8150146007537842, "loss": 1.9428, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2140544652938843, "rewards/margins": 0.6009601354598999, "rewards/rejected": -1.8150146007537842, "step": 430 }, { "epoch": 0.9107563465061502, "grad_norm": 20.886785791055413, "learning_rate": 1.4078249835774169e-08, "logits/chosen": -0.2394082248210907, "logits/rejected": -0.1875392496585846, "logps/chosen": -1.15377676486969, "logps/rejected": -1.962689995765686, "loss": 1.931, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.15377676486969, "rewards/margins": 0.8089130520820618, "rewards/rejected": -1.962689995765686, "step": 435 }, { "epoch": 0.9212248102590945, "grad_norm": 24.47138715404228, "learning_rate": 1.0945137958723705e-08, "logits/chosen": -0.0983675867319107, "logits/rejected": -0.08297935873270035, "logps/chosen": -1.2065281867980957, "logps/rejected": -1.7344862222671509, "loss": 2.0038, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.2065281867980957, "rewards/margins": 0.5279580354690552, "rewards/rejected": -1.7344862222671509, "step": 440 }, { "epoch": 0.9316932740120387, "grad_norm": 24.937539236981156, "learning_rate": 8.19951289467482e-09, "logits/chosen": -0.2168574333190918, "logits/rejected": -0.1608891487121582, "logps/chosen": -1.1793944835662842, "logps/rejected": -1.7278966903686523, "loss": 2.0001, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.1793944835662842, "rewards/margins": 0.5485021471977234, "rewards/rejected": -1.7278966903686523, "step": 445 }, { "epoch": 0.942161737764983, "grad_norm": 25.949713098879222, "learning_rate": 5.84505523733293e-09, "logits/chosen": -0.13714662194252014, "logits/rejected": -0.09888915717601776, "logps/chosen": -1.2185251712799072, "logps/rejected": -1.736971139907837, "loss": 1.9405, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2185251712799072, "rewards/margins": 0.5184457302093506, "rewards/rejected": -1.736971139907837, "step": 450 }, { "epoch": 0.9526302015179272, "grad_norm": 48.46134541581324, "learning_rate": 3.8849212086261466e-09, "logits/chosen": -0.17583271861076355, "logits/rejected": -0.12143261730670929, "logps/chosen": -1.3378379344940186, "logps/rejected": -1.6454353332519531, "loss": 1.997, "rewards/accuracies": 0.625, "rewards/chosen": -1.3378379344940186, "rewards/margins": 0.3075973391532898, "rewards/rejected": -1.6454353332519531, "step": 455 }, { "epoch": 0.9630986652708715, "grad_norm": 17.18309794816949, "learning_rate": 2.3217384276938756e-09, "logits/chosen": -0.15463075041770935, "logits/rejected": -0.10197849571704865, "logps/chosen": -1.0989463329315186, "logps/rejected": -1.7458966970443726, "loss": 1.932, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.0989463329315186, "rewards/margins": 0.6469505429267883, "rewards/rejected": -1.7458966970443726, "step": 460 }, { "epoch": 0.9735671290238157, "grad_norm": 23.454717675856944, "learning_rate": 1.1576023884836472e-09, "logits/chosen": -0.25512656569480896, "logits/rejected": -0.15672564506530762, "logps/chosen": -1.2135329246520996, "logps/rejected": -1.7796437740325928, "loss": 1.9646, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.2135329246520996, "rewards/margins": 0.5661108493804932, "rewards/rejected": -1.7796437740325928, "step": 465 }, { "epoch": 0.98403559277676, "grad_norm": 38.4586695917519, "learning_rate": 3.940736506780395e-10, "logits/chosen": -0.2280760258436203, "logits/rejected": -0.14886632561683655, "logps/chosen": -1.1674778461456299, "logps/rejected": -1.6563600301742554, "loss": 2.0158, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1674778461456299, "rewards/margins": 0.4888822138309479, "rewards/rejected": -1.6563600301742554, "step": 470 }, { "epoch": 0.9945040565297043, "grad_norm": 20.371210573575656, "learning_rate": 3.2175747716822744e-11, "logits/chosen": -0.268063485622406, "logits/rejected": -0.13501767814159393, "logps/chosen": -1.2155284881591797, "logps/rejected": -1.7273550033569336, "loss": 1.9722, "rewards/accuracies": 0.59375, "rewards/chosen": -1.2155284881591797, "rewards/margins": 0.5118265151977539, "rewards/rejected": -1.7273550033569336, "step": 475 }, { "epoch": 0.998691442030882, "step": 477, "total_flos": 0.0, "train_loss": 2.0149041866606385, "train_runtime": 17499.3556, "train_samples_per_second": 3.494, "train_steps_per_second": 0.027 } ], "logging_steps": 5, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }