diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6596 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 100, + "global_step": 4689, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.066098081023454e-09, + "logits/generated": -3.009117841720581, + "logits/real": -3.035973310470581, + "logps/generated": -135.85076904296875, + "logps/real": -392.24298095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 1.0660980810234541e-08, + "logits/generated": -3.0000903606414795, + "logits/real": -3.033531427383423, + "logps/generated": -123.82107543945312, + "logps/real": -288.15521240234375, + "loss": 0.6915, + "rewards/accuracies": 0.4027777910232544, + "rewards/generated": -0.00034361134748905897, + "rewards/margins": 0.0022192317992448807, + "rewards/real": 0.0018756203353404999, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 2.1321961620469082e-08, + "logits/generated": -2.994983673095703, + "logits/real": -3.063678026199341, + "logps/generated": -100.84471130371094, + "logps/real": -199.7611541748047, + "loss": 0.6709, + "rewards/accuracies": 0.8125, + "rewards/generated": -0.03251934424042702, + "rewards/margins": 0.0452335849404335, + "rewards/real": 0.01271424163132906, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 3.1982942430703625e-08, + "logits/generated": -2.9915778636932373, + "logits/real": -3.0394511222839355, + "logps/generated": -110.52748107910156, + "logps/real": -247.39794921875, + "loss": 0.5963, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -0.15009155869483948, + "rewards/margins": 0.23985818028450012, + "rewards/real": 0.08976660668849945, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 4.2643923240938164e-08, + "logits/generated": -2.9606637954711914, + "logits/real": -3.029853343963623, + "logps/generated": -108.22274017333984, + "logps/real": -235.333740234375, + "loss": 0.4724, + "rewards/accuracies": 0.9375, + "rewards/generated": -0.39079219102859497, + "rewards/margins": 0.5763204097747803, + "rewards/real": 0.18552818894386292, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 5.3304904051172704e-08, + "logits/generated": -2.973432779312134, + "logits/real": -3.049670457839966, + "logps/generated": -111.00482177734375, + "logps/real": -233.86471557617188, + "loss": 0.386, + "rewards/accuracies": 0.9375, + "rewards/generated": -0.5961061120033264, + "rewards/margins": 0.8909885287284851, + "rewards/real": 0.2948824167251587, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 6.396588486140725e-08, + "logits/generated": -2.8845043182373047, + "logits/real": -3.0369975566864014, + "logps/generated": -117.0033187866211, + "logps/real": -240.541259765625, + "loss": 0.2917, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -0.9591042399406433, + "rewards/margins": 1.4262845516204834, + "rewards/real": 0.4671803414821625, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 7.462686567164178e-08, + "logits/generated": -2.90877103805542, + "logits/real": -3.014878273010254, + "logps/generated": -116.1484146118164, + "logps/real": -237.03872680664062, + "loss": 0.2178, + "rewards/accuracies": 1.0, + "rewards/generated": -1.303546667098999, + "rewards/margins": 1.947913408279419, + "rewards/real": 0.6443666815757751, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 8.528784648187633e-08, + "logits/generated": -2.89373517036438, + "logits/real": -3.015916347503662, + "logps/generated": -122.8466796875, + "logps/real": -240.26565551757812, + "loss": 0.2029, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -1.497593641281128, + "rewards/margins": 2.123328447341919, + "rewards/real": 0.6257346868515015, + "step": 80 + }, + { + "epoch": 0.06, + "learning_rate": 9.594882729211087e-08, + "logits/generated": -2.8569562435150146, + "logits/real": -3.0172367095947266, + "logps/generated": -127.4751205444336, + "logps/real": -212.5527801513672, + "loss": 0.1732, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -1.9681780338287354, + "rewards/margins": 2.7209601402282715, + "rewards/real": 0.7527822256088257, + "step": 90 + }, + { + "epoch": 0.06, + "learning_rate": 1.0660980810234541e-07, + "logits/generated": -2.8705780506134033, + "logits/real": -2.9818317890167236, + "logps/generated": -121.19868469238281, + "logps/real": -237.79733276367188, + "loss": 0.1566, + "rewards/accuracies": 1.0, + "rewards/generated": -1.7731469869613647, + "rewards/margins": 2.562190294265747, + "rewards/real": 0.7890429496765137, + "step": 100 + }, + { + "epoch": 0.07, + "learning_rate": 1.1727078891257995e-07, + "logits/generated": -2.848050832748413, + "logits/real": -2.976525068283081, + "logps/generated": -132.4097137451172, + "logps/real": -262.47882080078125, + "loss": 0.1559, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -2.003335475921631, + "rewards/margins": 2.8780131340026855, + "rewards/real": 0.874677836894989, + "step": 110 + }, + { + "epoch": 0.08, + "learning_rate": 1.279317697228145e-07, + "logits/generated": -2.82133150100708, + "logits/real": -2.963655710220337, + "logps/generated": -137.30862426757812, + "logps/real": -256.87567138671875, + "loss": 0.1414, + "rewards/accuracies": 1.0, + "rewards/generated": -2.5247628688812256, + "rewards/margins": 3.375014066696167, + "rewards/real": 0.8502515554428101, + "step": 120 + }, + { + "epoch": 0.08, + "learning_rate": 1.3859275053304903e-07, + "logits/generated": -2.808093309402466, + "logits/real": -2.9520535469055176, + "logps/generated": -125.69123840332031, + "logps/real": -206.429931640625, + "loss": 0.1105, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -2.6616690158843994, + "rewards/margins": 3.4152088165283203, + "rewards/real": 0.7535400390625, + "step": 130 + }, + { + "epoch": 0.09, + "learning_rate": 1.4925373134328355e-07, + "logits/generated": -2.779547691345215, + "logits/real": -2.948424816131592, + "logps/generated": -141.017822265625, + "logps/real": -251.13095092773438, + "loss": 0.1126, + "rewards/accuracies": 1.0, + "rewards/generated": -3.277395248413086, + "rewards/margins": 4.223907470703125, + "rewards/real": 0.9465125203132629, + "step": 140 + }, + { + "epoch": 0.1, + "learning_rate": 1.5991471215351813e-07, + "logits/generated": -2.783216714859009, + "logits/real": -2.95442533493042, + "logps/generated": -149.49667358398438, + "logps/real": -203.93063354492188, + "loss": 0.0934, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -3.4615890979766846, + "rewards/margins": 4.269316673278809, + "rewards/real": 0.8077276349067688, + "step": 150 + }, + { + "epoch": 0.1, + "learning_rate": 1.7057569296375266e-07, + "logits/generated": -2.7575511932373047, + "logits/real": -2.9458460807800293, + "logps/generated": -144.13388061523438, + "logps/real": -260.1390686035156, + "loss": 0.087, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -3.196132183074951, + "rewards/margins": 4.332037448883057, + "rewards/real": 1.1359055042266846, + "step": 160 + }, + { + "epoch": 0.11, + "learning_rate": 1.8123667377398718e-07, + "logits/generated": -2.7579033374786377, + "logits/real": -2.943512439727783, + "logps/generated": -153.94261169433594, + "logps/real": -213.04147338867188, + "loss": 0.0943, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -3.8576176166534424, + "rewards/margins": 4.797067642211914, + "rewards/real": 0.9394500851631165, + "step": 170 + }, + { + "epoch": 0.12, + "learning_rate": 1.9189765458422174e-07, + "logits/generated": -2.7721304893493652, + "logits/real": -2.879235029220581, + "logps/generated": -147.11483764648438, + "logps/real": -197.543212890625, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/generated": -4.2951579093933105, + "rewards/margins": 5.02254581451416, + "rewards/real": 0.7273877859115601, + "step": 180 + }, + { + "epoch": 0.12, + "learning_rate": 2.025586353944563e-07, + "logits/generated": -2.7546629905700684, + "logits/real": -2.9070351123809814, + "logps/generated": -143.64361572265625, + "logps/real": -201.7648468017578, + "loss": 0.0929, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -4.264950752258301, + "rewards/margins": 5.123804092407227, + "rewards/real": 0.8588531613349915, + "step": 190 + }, + { + "epoch": 0.13, + "learning_rate": 2.1321961620469082e-07, + "logits/generated": -2.704319477081299, + "logits/real": -2.8970680236816406, + "logps/generated": -142.53402709960938, + "logps/real": -203.28872680664062, + "loss": 0.0702, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -4.170575141906738, + "rewards/margins": 5.0223588943481445, + "rewards/real": 0.8517836332321167, + "step": 200 + }, + { + "epoch": 0.13, + "learning_rate": 2.2388059701492537e-07, + "logits/generated": -2.714703321456909, + "logits/real": -2.902454137802124, + "logps/generated": -155.59530639648438, + "logps/real": -206.8930206298828, + "loss": 0.0655, + "rewards/accuracies": 1.0, + "rewards/generated": -5.102675437927246, + "rewards/margins": 5.6945576667785645, + "rewards/real": 0.5918816328048706, + "step": 210 + }, + { + "epoch": 0.14, + "learning_rate": 2.345415778251599e-07, + "logits/generated": -2.7051188945770264, + "logits/real": -2.8684916496276855, + "logps/generated": -158.3416290283203, + "logps/real": -230.9740447998047, + "loss": 0.0747, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -5.212487697601318, + "rewards/margins": 6.16409158706665, + "rewards/real": 0.9516040682792664, + "step": 220 + }, + { + "epoch": 0.15, + "learning_rate": 2.452025586353944e-07, + "logits/generated": -2.7530570030212402, + "logits/real": -2.8788464069366455, + "logps/generated": -161.42819213867188, + "logps/real": -214.85202026367188, + "loss": 0.0543, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -5.426655292510986, + "rewards/margins": 5.798591136932373, + "rewards/real": 0.37193647027015686, + "step": 230 + }, + { + "epoch": 0.15, + "learning_rate": 2.55863539445629e-07, + "logits/generated": -2.6958775520324707, + "logits/real": -2.8694121837615967, + "logps/generated": -161.2700958251953, + "logps/real": -178.91891479492188, + "loss": 0.0453, + "rewards/accuracies": 1.0, + "rewards/generated": -5.468385696411133, + "rewards/margins": 5.783989906311035, + "rewards/real": 0.315604031085968, + "step": 240 + }, + { + "epoch": 0.16, + "learning_rate": 2.665245202558635e-07, + "logits/generated": -2.712916851043701, + "logits/real": -2.84846568107605, + "logps/generated": -161.986328125, + "logps/real": -215.94033813476562, + "loss": 0.0668, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -6.006771087646484, + "rewards/margins": 6.370474815368652, + "rewards/real": 0.363704115152359, + "step": 250 + }, + { + "epoch": 0.17, + "learning_rate": 2.7718550106609805e-07, + "logits/generated": -2.6760001182556152, + "logits/real": -2.8535640239715576, + "logps/generated": -159.93551635742188, + "logps/real": -228.80801391601562, + "loss": 0.0695, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -5.786929130554199, + "rewards/margins": 5.9885358810424805, + "rewards/real": 0.20160651206970215, + "step": 260 + }, + { + "epoch": 0.17, + "learning_rate": 2.878464818763326e-07, + "logits/generated": -2.61970591545105, + "logits/real": -2.8495993614196777, + "logps/generated": -177.95175170898438, + "logps/real": -246.32052612304688, + "loss": 0.0479, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -6.833677768707275, + "rewards/margins": 7.163332462310791, + "rewards/real": 0.32965537905693054, + "step": 270 + }, + { + "epoch": 0.18, + "learning_rate": 2.985074626865671e-07, + "logits/generated": -2.6257996559143066, + "logits/real": -2.8162739276885986, + "logps/generated": -172.50180053710938, + "logps/real": -239.02352905273438, + "loss": 0.0459, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -7.3265485763549805, + "rewards/margins": 7.394839286804199, + "rewards/real": 0.06829099357128143, + "step": 280 + }, + { + "epoch": 0.19, + "learning_rate": 3.0916844349680174e-07, + "logits/generated": -2.6942734718322754, + "logits/real": -2.8194546699523926, + "logps/generated": -199.2237548828125, + "logps/real": -242.6060791015625, + "loss": 0.0424, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -8.099980354309082, + "rewards/margins": 7.564375877380371, + "rewards/real": -0.5356050729751587, + "step": 290 + }, + { + "epoch": 0.19, + "learning_rate": 3.1982942430703626e-07, + "logits/generated": -2.6435790061950684, + "logits/real": -2.797001838684082, + "logps/generated": -198.12205505371094, + "logps/real": -245.3691864013672, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/generated": -8.79419231414795, + "rewards/margins": 8.475809097290039, + "rewards/real": -0.3183833062648773, + "step": 300 + }, + { + "epoch": 0.2, + "learning_rate": 3.304904051172708e-07, + "logits/generated": -2.620265483856201, + "logits/real": -2.802964925765991, + "logps/generated": -201.2637939453125, + "logps/real": -251.5539093017578, + "loss": 0.0469, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -8.044418334960938, + "rewards/margins": 8.085213661193848, + "rewards/real": 0.04079418629407883, + "step": 310 + }, + { + "epoch": 0.2, + "learning_rate": 3.411513859275053e-07, + "logits/generated": -2.5675623416900635, + "logits/real": -2.8209726810455322, + "logps/generated": -212.57968139648438, + "logps/real": -236.8415985107422, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/generated": -10.560102462768555, + "rewards/margins": 9.677938461303711, + "rewards/real": -0.8821651339530945, + "step": 320 + }, + { + "epoch": 0.21, + "learning_rate": 3.5181236673773984e-07, + "logits/generated": -2.532376766204834, + "logits/real": -2.7409865856170654, + "logps/generated": -189.15724182128906, + "logps/real": -281.72003173828125, + "loss": 0.0543, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -8.800742149353027, + "rewards/margins": 8.003158569335938, + "rewards/real": -0.7975834608078003, + "step": 330 + }, + { + "epoch": 0.22, + "learning_rate": 3.6247334754797437e-07, + "logits/generated": -2.565183401107788, + "logits/real": -2.7490944862365723, + "logps/generated": -187.1248321533203, + "logps/real": -226.8002166748047, + "loss": 0.0525, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -8.799155235290527, + "rewards/margins": 7.27255916595459, + "rewards/real": -1.526595950126648, + "step": 340 + }, + { + "epoch": 0.22, + "learning_rate": 3.7313432835820895e-07, + "logits/generated": -2.5572450160980225, + "logits/real": -2.779174327850342, + "logps/generated": -200.363525390625, + "logps/real": -253.7314453125, + "loss": 0.0535, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -9.488149642944336, + "rewards/margins": 8.699429512023926, + "rewards/real": -0.7887213230133057, + "step": 350 + }, + { + "epoch": 0.23, + "learning_rate": 3.8379530916844347e-07, + "logits/generated": -2.5597970485687256, + "logits/real": -2.734602928161621, + "logps/generated": -207.70529174804688, + "logps/real": -247.5392608642578, + "loss": 0.0492, + "rewards/accuracies": 1.0, + "rewards/generated": -9.628881454467773, + "rewards/margins": 8.560700416564941, + "rewards/real": -1.0681811571121216, + "step": 360 + }, + { + "epoch": 0.24, + "learning_rate": 3.9445628997867805e-07, + "logits/generated": -2.5730834007263184, + "logits/real": -2.7402100563049316, + "logps/generated": -206.32864379882812, + "logps/real": -205.1163330078125, + "loss": 0.0677, + "rewards/accuracies": 0.9375, + "rewards/generated": -9.762407302856445, + "rewards/margins": 8.460587501525879, + "rewards/real": -1.3018196821212769, + "step": 370 + }, + { + "epoch": 0.24, + "learning_rate": 4.051172707889126e-07, + "logits/generated": -2.5265259742736816, + "logits/real": -2.725435972213745, + "logps/generated": -212.57608032226562, + "logps/real": -223.85104370117188, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/generated": -10.966038703918457, + "rewards/margins": 10.222475051879883, + "rewards/real": -0.7435646653175354, + "step": 380 + }, + { + "epoch": 0.25, + "learning_rate": 4.157782515991471e-07, + "logits/generated": -2.44181227684021, + "logits/real": -2.7299582958221436, + "logps/generated": -221.58920288085938, + "logps/real": -276.7346496582031, + "loss": 0.0388, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -11.691991806030273, + "rewards/margins": 11.166677474975586, + "rewards/real": -0.5253145098686218, + "step": 390 + }, + { + "epoch": 0.26, + "learning_rate": 4.2643923240938163e-07, + "logits/generated": -2.507145404815674, + "logits/real": -2.689694881439209, + "logps/generated": -230.5036163330078, + "logps/real": -235.1736602783203, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/generated": -11.178986549377441, + "rewards/margins": 9.90050220489502, + "rewards/real": -1.2784844636917114, + "step": 400 + }, + { + "epoch": 0.26, + "learning_rate": 4.371002132196162e-07, + "logits/generated": -2.502819061279297, + "logits/real": -2.6965878009796143, + "logps/generated": -234.39993286132812, + "logps/real": -237.62744140625, + "loss": 0.0323, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -12.532038688659668, + "rewards/margins": 10.712557792663574, + "rewards/real": -1.819482445716858, + "step": 410 + }, + { + "epoch": 0.27, + "learning_rate": 4.4776119402985074e-07, + "logits/generated": -2.4594552516937256, + "logits/real": -2.703640937805176, + "logps/generated": -215.9478302001953, + "logps/real": -272.41522216796875, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/generated": -11.244471549987793, + "rewards/margins": 10.069063186645508, + "rewards/real": -1.1754099130630493, + "step": 420 + }, + { + "epoch": 0.28, + "learning_rate": 4.5842217484008526e-07, + "logits/generated": -2.4474716186523438, + "logits/real": -2.6229748725891113, + "logps/generated": -228.8519744873047, + "logps/real": -245.61215209960938, + "loss": 0.0219, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -12.294897079467773, + "rewards/margins": 10.74439811706543, + "rewards/real": -1.5504984855651855, + "step": 430 + }, + { + "epoch": 0.28, + "learning_rate": 4.690831556503198e-07, + "logits/generated": -2.515381336212158, + "logits/real": -2.632841110229492, + "logps/generated": -221.27975463867188, + "logps/real": -260.13641357421875, + "loss": 0.0353, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -11.185568809509277, + "rewards/margins": 9.645071983337402, + "rewards/real": -1.54049813747406, + "step": 440 + }, + { + "epoch": 0.29, + "learning_rate": 4.797441364605543e-07, + "logits/generated": -2.4365837574005127, + "logits/real": -2.6345643997192383, + "logps/generated": -214.77029418945312, + "logps/real": -274.2986755371094, + "loss": 0.0363, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -11.434799194335938, + "rewards/margins": 10.464271545410156, + "rewards/real": -0.9705268740653992, + "step": 450 + }, + { + "epoch": 0.29, + "learning_rate": 4.904051172707888e-07, + "logits/generated": -2.4966938495635986, + "logits/real": -2.577971935272217, + "logps/generated": -224.62466430664062, + "logps/real": -238.035400390625, + "loss": 0.0624, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -12.316131591796875, + "rewards/margins": 10.328946113586426, + "rewards/real": -1.987186074256897, + "step": 460 + }, + { + "epoch": 0.3, + "learning_rate": 4.998815165876776e-07, + "logits/generated": -2.5350544452667236, + "logits/real": -2.603980302810669, + "logps/generated": -231.7646484375, + "logps/real": -275.49993896484375, + "loss": 0.0361, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -12.520462989807129, + "rewards/margins": 10.306425094604492, + "rewards/real": -2.214038372039795, + "step": 470 + }, + { + "epoch": 0.31, + "learning_rate": 4.98696682464455e-07, + "logits/generated": -2.4045310020446777, + "logits/real": -2.608654499053955, + "logps/generated": -233.0770263671875, + "logps/real": -309.36322021484375, + "loss": 0.0336, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -12.922886848449707, + "rewards/margins": 10.215009689331055, + "rewards/real": -2.707876682281494, + "step": 480 + }, + { + "epoch": 0.31, + "learning_rate": 4.975118483412322e-07, + "logits/generated": -2.4344067573547363, + "logits/real": -2.6011695861816406, + "logps/generated": -244.77059936523438, + "logps/real": -258.57525634765625, + "loss": 0.0324, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -14.29552173614502, + "rewards/margins": 10.700136184692383, + "rewards/real": -3.595385789871216, + "step": 490 + }, + { + "epoch": 0.32, + "learning_rate": 4.963270142180094e-07, + "logits/generated": -2.4870104789733887, + "logits/real": -2.630181074142456, + "logps/generated": -239.10421752929688, + "logps/real": -282.9891662597656, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/generated": -13.501965522766113, + "rewards/margins": 10.855205535888672, + "rewards/real": -2.6467597484588623, + "step": 500 + }, + { + "epoch": 0.33, + "learning_rate": 4.951421800947867e-07, + "logits/generated": -2.450176954269409, + "logits/real": -2.5793440341949463, + "logps/generated": -230.175048828125, + "logps/real": -299.5987243652344, + "loss": 0.0386, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -12.763737678527832, + "rewards/margins": 10.467870712280273, + "rewards/real": -2.295866012573242, + "step": 510 + }, + { + "epoch": 0.33, + "learning_rate": 4.93957345971564e-07, + "logits/generated": -2.473119020462036, + "logits/real": -2.5927822589874268, + "logps/generated": -239.88040161132812, + "logps/real": -218.50906372070312, + "loss": 0.0299, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -13.901123046875, + "rewards/margins": 10.96774673461914, + "rewards/real": -2.9333770275115967, + "step": 520 + }, + { + "epoch": 0.34, + "learning_rate": 4.927725118483413e-07, + "logits/generated": -2.455239772796631, + "logits/real": -2.552358388900757, + "logps/generated": -256.57354736328125, + "logps/real": -221.6819610595703, + "loss": 0.0191, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -14.927894592285156, + "rewards/margins": 11.572967529296875, + "rewards/real": -3.3549275398254395, + "step": 530 + }, + { + "epoch": 0.35, + "learning_rate": 4.915876777251184e-07, + "logits/generated": -2.435835599899292, + "logits/real": -2.5121445655822754, + "logps/generated": -263.62420654296875, + "logps/real": -271.3969421386719, + "loss": 0.029, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.717508316040039, + "rewards/margins": 12.2947998046875, + "rewards/real": -3.422708034515381, + "step": 540 + }, + { + "epoch": 0.35, + "learning_rate": 4.904028436018957e-07, + "logits/generated": -2.4802205562591553, + "logits/real": -2.5491814613342285, + "logps/generated": -241.066650390625, + "logps/real": -222.16323852539062, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/generated": -13.833559036254883, + "rewards/margins": 10.92573356628418, + "rewards/real": -2.907824993133545, + "step": 550 + }, + { + "epoch": 0.36, + "learning_rate": 4.892180094786729e-07, + "logits/generated": -2.429196357727051, + "logits/real": -2.5694682598114014, + "logps/generated": -249.312255859375, + "logps/real": -228.79592895507812, + "loss": 0.0602, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -14.980000495910645, + "rewards/margins": 11.490592002868652, + "rewards/real": -3.4894092082977295, + "step": 560 + }, + { + "epoch": 0.36, + "learning_rate": 4.880331753554502e-07, + "logits/generated": -2.584810972213745, + "logits/real": -2.7082631587982178, + "logps/generated": -221.3469696044922, + "logps/real": -252.34561157226562, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/generated": -12.08858871459961, + "rewards/margins": 11.336016654968262, + "rewards/real": -0.7525719404220581, + "step": 570 + }, + { + "epoch": 0.37, + "learning_rate": 4.868483412322275e-07, + "logits/generated": -2.567894458770752, + "logits/real": -2.661423921585083, + "logps/generated": -239.905517578125, + "logps/real": -263.59918212890625, + "loss": 0.0388, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -12.732629776000977, + "rewards/margins": 11.67176628112793, + "rewards/real": -1.0608632564544678, + "step": 580 + }, + { + "epoch": 0.38, + "learning_rate": 4.856635071090047e-07, + "logits/generated": -2.523099422454834, + "logits/real": -2.6167566776275635, + "logps/generated": -228.9334259033203, + "logps/real": -236.6662139892578, + "loss": 0.0337, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -11.951663970947266, + "rewards/margins": 11.008401870727539, + "rewards/real": -0.9432622194290161, + "step": 590 + }, + { + "epoch": 0.38, + "learning_rate": 4.84478672985782e-07, + "logits/generated": -2.4970996379852295, + "logits/real": -2.646541118621826, + "logps/generated": -250.40011596679688, + "logps/real": -237.4553680419922, + "loss": 0.0465, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -13.550500869750977, + "rewards/margins": 11.562530517578125, + "rewards/real": -1.9879701137542725, + "step": 600 + }, + { + "epoch": 0.39, + "learning_rate": 4.832938388625591e-07, + "logits/generated": -2.4692635536193848, + "logits/real": -2.5598652362823486, + "logps/generated": -243.6393585205078, + "logps/real": -216.361083984375, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/generated": -14.87501049041748, + "rewards/margins": 12.408052444458008, + "rewards/real": -2.466959238052368, + "step": 610 + }, + { + "epoch": 0.4, + "learning_rate": 4.821090047393365e-07, + "logits/generated": -2.459730625152588, + "logits/real": -2.5611660480499268, + "logps/generated": -250.59909057617188, + "logps/real": -267.4476623535156, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/generated": -14.998074531555176, + "rewards/margins": 11.819680213928223, + "rewards/real": -3.1783957481384277, + "step": 620 + }, + { + "epoch": 0.4, + "learning_rate": 4.809241706161137e-07, + "logits/generated": -2.427530288696289, + "logits/real": -2.5768373012542725, + "logps/generated": -273.51495361328125, + "logps/real": -301.49310302734375, + "loss": 0.0314, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.543191909790039, + "rewards/margins": 12.9131441116333, + "rewards/real": -2.6300482749938965, + "step": 630 + }, + { + "epoch": 0.41, + "learning_rate": 4.79739336492891e-07, + "logits/generated": -2.455411672592163, + "logits/real": -2.5930774211883545, + "logps/generated": -288.97998046875, + "logps/real": -279.2014465332031, + "loss": 0.0389, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -17.09014320373535, + "rewards/margins": 14.309709548950195, + "rewards/real": -2.7804324626922607, + "step": 640 + }, + { + "epoch": 0.42, + "learning_rate": 4.785545023696682e-07, + "logits/generated": -2.3936052322387695, + "logits/real": -2.58086895942688, + "logps/generated": -263.72552490234375, + "logps/real": -258.2544250488281, + "loss": 0.0273, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.33547592163086, + "rewards/margins": 12.990063667297363, + "rewards/real": -3.3454136848449707, + "step": 650 + }, + { + "epoch": 0.42, + "learning_rate": 4.773696682464455e-07, + "logits/generated": -2.4560112953186035, + "logits/real": -2.57716703414917, + "logps/generated": -265.07843017578125, + "logps/real": -228.6409912109375, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/generated": -15.906695365905762, + "rewards/margins": 12.9684476852417, + "rewards/real": -2.9382481575012207, + "step": 660 + }, + { + "epoch": 0.43, + "learning_rate": 4.7618483412322273e-07, + "logits/generated": -2.3757667541503906, + "logits/real": -2.5697007179260254, + "logps/generated": -273.296142578125, + "logps/real": -280.37835693359375, + "loss": 0.022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.565967559814453, + "rewards/margins": 14.934137344360352, + "rewards/real": -2.6318306922912598, + "step": 670 + }, + { + "epoch": 0.44, + "learning_rate": 4.7499999999999995e-07, + "logits/generated": -2.3877642154693604, + "logits/real": -2.4737722873687744, + "logps/generated": -270.6985168457031, + "logps/real": -216.703369140625, + "loss": 0.0445, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -16.873882293701172, + "rewards/margins": 13.297744750976562, + "rewards/real": -3.576136827468872, + "step": 680 + }, + { + "epoch": 0.44, + "learning_rate": 4.738151658767772e-07, + "logits/generated": -2.357905149459839, + "logits/real": -2.5049002170562744, + "logps/generated": -250.86215209960938, + "logps/real": -245.96664428710938, + "loss": 0.0277, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.14183521270752, + "rewards/margins": 13.497465133666992, + "rewards/real": -1.6443710327148438, + "step": 690 + }, + { + "epoch": 0.45, + "learning_rate": 4.726303317535545e-07, + "logits/generated": -2.3780131340026855, + "logits/real": -2.5022144317626953, + "logps/generated": -256.3135070800781, + "logps/real": -263.6549377441406, + "loss": 0.0301, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.227795600891113, + "rewards/margins": 13.609758377075195, + "rewards/real": -1.618038535118103, + "step": 700 + }, + { + "epoch": 0.45, + "learning_rate": 4.7144549763033177e-07, + "logits/generated": -2.4752538204193115, + "logits/real": -2.518859386444092, + "logps/generated": -264.70703125, + "logps/real": -244.23629760742188, + "loss": 0.0386, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -15.996221542358398, + "rewards/margins": 13.332077026367188, + "rewards/real": -2.664144992828369, + "step": 710 + }, + { + "epoch": 0.46, + "learning_rate": 4.70260663507109e-07, + "logits/generated": -2.468376874923706, + "logits/real": -2.4994096755981445, + "logps/generated": -264.22625732421875, + "logps/real": -266.2322998046875, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/generated": -15.571908950805664, + "rewards/margins": 13.056289672851562, + "rewards/real": -2.5156185626983643, + "step": 720 + }, + { + "epoch": 0.47, + "learning_rate": 4.690758293838862e-07, + "logits/generated": -2.4043707847595215, + "logits/real": -2.4540791511535645, + "logps/generated": -286.14727783203125, + "logps/real": -247.5970001220703, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/generated": -17.904537200927734, + "rewards/margins": 14.721110343933105, + "rewards/real": -3.1834263801574707, + "step": 730 + }, + { + "epoch": 0.47, + "learning_rate": 4.678909952606635e-07, + "logits/generated": -2.445075035095215, + "logits/real": -2.420685291290283, + "logps/generated": -282.981689453125, + "logps/real": -268.9815673828125, + "loss": 0.0255, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.945833206176758, + "rewards/margins": 14.0308198928833, + "rewards/real": -2.915013074874878, + "step": 740 + }, + { + "epoch": 0.48, + "learning_rate": 4.667061611374407e-07, + "logits/generated": -2.4320383071899414, + "logits/real": -2.505674362182617, + "logps/generated": -259.02423095703125, + "logps/real": -264.7741394042969, + "loss": 0.0232, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -15.274149894714355, + "rewards/margins": 12.89158821105957, + "rewards/real": -2.382561683654785, + "step": 750 + }, + { + "epoch": 0.49, + "learning_rate": 4.65521327014218e-07, + "logits/generated": -2.450739860534668, + "logits/real": -2.5071914196014404, + "logps/generated": -255.146484375, + "logps/real": -270.22210693359375, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/generated": -15.33598804473877, + "rewards/margins": 13.024540901184082, + "rewards/real": -2.3114476203918457, + "step": 760 + }, + { + "epoch": 0.49, + "learning_rate": 4.6433649289099525e-07, + "logits/generated": -2.4163241386413574, + "logits/real": -2.4942569732666016, + "logps/generated": -252.5504913330078, + "logps/real": -287.49334716796875, + "loss": 0.0382, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -14.969491958618164, + "rewards/margins": 12.219428062438965, + "rewards/real": -2.750063896179199, + "step": 770 + }, + { + "epoch": 0.5, + "learning_rate": 4.631516587677725e-07, + "logits/generated": -2.455021858215332, + "logits/real": -2.4540677070617676, + "logps/generated": -276.6857604980469, + "logps/real": -255.8550262451172, + "loss": 0.0156, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.444377899169922, + "rewards/margins": 14.266420364379883, + "rewards/real": -3.177957773208618, + "step": 780 + }, + { + "epoch": 0.51, + "learning_rate": 4.6196682464454974e-07, + "logits/generated": -2.4093470573425293, + "logits/real": -2.470271348953247, + "logps/generated": -253.94662475585938, + "logps/real": -264.2749938964844, + "loss": 0.0347, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.776174545288086, + "rewards/margins": 13.000862121582031, + "rewards/real": -2.775312900543213, + "step": 790 + }, + { + "epoch": 0.51, + "learning_rate": 4.60781990521327e-07, + "logits/generated": -2.470078229904175, + "logits/real": -2.5724174976348877, + "logps/generated": -267.72296142578125, + "logps/real": -302.47650146484375, + "loss": 0.0769, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.174127578735352, + "rewards/margins": 11.917966842651367, + "rewards/real": -3.256159543991089, + "step": 800 + }, + { + "epoch": 0.52, + "learning_rate": 4.5959715639810423e-07, + "logits/generated": -2.540130138397217, + "logits/real": -2.6211256980895996, + "logps/generated": -233.63998413085938, + "logps/real": -288.25494384765625, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/generated": -12.888757705688477, + "rewards/margins": 10.385416030883789, + "rewards/real": -2.503340005874634, + "step": 810 + }, + { + "epoch": 0.52, + "learning_rate": 4.5841232227488145e-07, + "logits/generated": -2.538295269012451, + "logits/real": -2.5893173217773438, + "logps/generated": -249.40536499023438, + "logps/real": -262.49261474609375, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/generated": -14.566215515136719, + "rewards/margins": 11.135366439819336, + "rewards/real": -3.4308483600616455, + "step": 820 + }, + { + "epoch": 0.53, + "learning_rate": 4.5722748815165873e-07, + "logits/generated": -2.488826274871826, + "logits/real": -2.6115565299987793, + "logps/generated": -265.40972900390625, + "logps/real": -316.6865539550781, + "loss": 0.0239, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -15.062512397766113, + "rewards/margins": 11.956504821777344, + "rewards/real": -3.1060070991516113, + "step": 830 + }, + { + "epoch": 0.54, + "learning_rate": 4.56042654028436e-07, + "logits/generated": -2.3590731620788574, + "logits/real": -2.495044469833374, + "logps/generated": -269.42681884765625, + "logps/real": -309.6129455566406, + "loss": 0.0397, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -15.904958724975586, + "rewards/margins": 12.251577377319336, + "rewards/real": -3.6533825397491455, + "step": 840 + }, + { + "epoch": 0.54, + "learning_rate": 4.5485781990521327e-07, + "logits/generated": -2.339799642562866, + "logits/real": -2.4667954444885254, + "logps/generated": -293.05267333984375, + "logps/real": -278.82855224609375, + "loss": 0.0349, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.821535110473633, + "rewards/margins": 14.51972770690918, + "rewards/real": -4.301807403564453, + "step": 850 + }, + { + "epoch": 0.55, + "learning_rate": 4.536729857819905e-07, + "logits/generated": -2.430203437805176, + "logits/real": -2.537501573562622, + "logps/generated": -272.36602783203125, + "logps/real": -316.49383544921875, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/generated": -15.430384635925293, + "rewards/margins": 12.318005561828613, + "rewards/real": -3.1123790740966797, + "step": 860 + }, + { + "epoch": 0.56, + "learning_rate": 4.5248815165876776e-07, + "logits/generated": -2.2888057231903076, + "logits/real": -2.4335665702819824, + "logps/generated": -282.5766906738281, + "logps/real": -279.062255859375, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/generated": -17.430299758911133, + "rewards/margins": 13.197275161743164, + "rewards/real": -4.233025550842285, + "step": 870 + }, + { + "epoch": 0.56, + "learning_rate": 4.5130331753554504e-07, + "logits/generated": -2.3086037635803223, + "logits/real": -2.4613966941833496, + "logps/generated": -259.74395751953125, + "logps/real": -214.3597869873047, + "loss": 0.041, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -16.724639892578125, + "rewards/margins": 12.922261238098145, + "rewards/real": -3.8023808002471924, + "step": 880 + }, + { + "epoch": 0.57, + "learning_rate": 4.5011848341232226e-07, + "logits/generated": -2.339136838912964, + "logits/real": -2.4579660892486572, + "logps/generated": -270.0120544433594, + "logps/real": -232.5855712890625, + "loss": 0.022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.816797256469727, + "rewards/margins": 13.385887145996094, + "rewards/real": -3.4309089183807373, + "step": 890 + }, + { + "epoch": 0.58, + "learning_rate": 4.489336492890995e-07, + "logits/generated": -2.2905521392822266, + "logits/real": -2.361793279647827, + "logps/generated": -292.40521240234375, + "logps/real": -229.45394897460938, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/generated": -18.920324325561523, + "rewards/margins": 14.62226390838623, + "rewards/real": -4.298060417175293, + "step": 900 + }, + { + "epoch": 0.58, + "learning_rate": 4.4774881516587675e-07, + "logits/generated": -2.34869122505188, + "logits/real": -2.428506374359131, + "logps/generated": -275.98199462890625, + "logps/real": -236.7742462158203, + "loss": 0.0458, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -16.499164581298828, + "rewards/margins": 11.50661563873291, + "rewards/real": -4.992548942565918, + "step": 910 + }, + { + "epoch": 0.59, + "learning_rate": 4.46563981042654e-07, + "logits/generated": -2.2736713886260986, + "logits/real": -2.445061206817627, + "logps/generated": -305.16497802734375, + "logps/real": -278.02374267578125, + "loss": 0.0338, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -20.268054962158203, + "rewards/margins": 13.73902416229248, + "rewards/real": -6.529031276702881, + "step": 920 + }, + { + "epoch": 0.6, + "learning_rate": 4.4537914691943124e-07, + "logits/generated": -2.2922310829162598, + "logits/real": -2.4935543537139893, + "logps/generated": -277.3129577636719, + "logps/real": -288.2353515625, + "loss": 0.0433, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -17.90709686279297, + "rewards/margins": 11.381673812866211, + "rewards/real": -6.525424003601074, + "step": 930 + }, + { + "epoch": 0.6, + "learning_rate": 4.441943127962085e-07, + "logits/generated": -2.3361358642578125, + "logits/real": -2.5857903957366943, + "logps/generated": -313.67254638671875, + "logps/real": -327.71337890625, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/generated": -19.346548080444336, + "rewards/margins": 13.977258682250977, + "rewards/real": -5.369288444519043, + "step": 940 + }, + { + "epoch": 0.61, + "learning_rate": 4.430094786729858e-07, + "logits/generated": -2.338799476623535, + "logits/real": -2.565807819366455, + "logps/generated": -290.45855712890625, + "logps/real": -251.903076171875, + "loss": 0.0231, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.45851707458496, + "rewards/margins": 14.581155776977539, + "rewards/real": -4.877361297607422, + "step": 950 + }, + { + "epoch": 0.61, + "learning_rate": 4.4182464454976306e-07, + "logits/generated": -2.3942151069641113, + "logits/real": -2.580857992172241, + "logps/generated": -293.68487548828125, + "logps/real": -248.95877075195312, + "loss": 0.0311, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.443933486938477, + "rewards/margins": 14.078336715698242, + "rewards/real": -5.365598201751709, + "step": 960 + }, + { + "epoch": 0.62, + "learning_rate": 4.4063981042654023e-07, + "logits/generated": -2.3892123699188232, + "logits/real": -2.667109727859497, + "logps/generated": -273.72802734375, + "logps/real": -326.7926025390625, + "loss": 0.0217, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -16.955928802490234, + "rewards/margins": 13.641815185546875, + "rewards/real": -3.3141121864318848, + "step": 970 + }, + { + "epoch": 0.63, + "learning_rate": 4.394549763033175e-07, + "logits/generated": -2.3923146724700928, + "logits/real": -2.559901714324951, + "logps/generated": -286.25128173828125, + "logps/real": -302.48834228515625, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/generated": -18.069164276123047, + "rewards/margins": 13.043965339660645, + "rewards/real": -5.025198936462402, + "step": 980 + }, + { + "epoch": 0.63, + "learning_rate": 4.382701421800948e-07, + "logits/generated": -2.410012722015381, + "logits/real": -2.62170147895813, + "logps/generated": -307.0192565917969, + "logps/real": -260.86248779296875, + "loss": 0.0173, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.955039978027344, + "rewards/margins": 15.444422721862793, + "rewards/real": -4.510618686676025, + "step": 990 + }, + { + "epoch": 0.64, + "learning_rate": 4.37085308056872e-07, + "logits/generated": -2.387606143951416, + "logits/real": -2.554452419281006, + "logps/generated": -283.7722473144531, + "logps/real": -292.525390625, + "loss": 0.054, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -17.216604232788086, + "rewards/margins": 12.737438201904297, + "rewards/real": -4.4791669845581055, + "step": 1000 + }, + { + "epoch": 0.65, + "learning_rate": 4.3590047393364927e-07, + "logits/generated": -2.4104082584381104, + "logits/real": -2.5261144638061523, + "logps/generated": -282.57440185546875, + "logps/real": -258.09112548828125, + "loss": 0.0461, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -17.301794052124023, + "rewards/margins": 12.839556694030762, + "rewards/real": -4.462237358093262, + "step": 1010 + }, + { + "epoch": 0.65, + "learning_rate": 4.3471563981042654e-07, + "logits/generated": -2.3187150955200195, + "logits/real": -2.5287089347839355, + "logps/generated": -286.00872802734375, + "logps/real": -295.44097900390625, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/generated": -18.19363021850586, + "rewards/margins": 14.453539848327637, + "rewards/real": -3.7400927543640137, + "step": 1020 + }, + { + "epoch": 0.66, + "learning_rate": 4.335308056872038e-07, + "logits/generated": -2.332850694656372, + "logits/real": -2.53454327583313, + "logps/generated": -293.3934631347656, + "logps/real": -295.30621337890625, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/generated": -18.05362892150879, + "rewards/margins": 13.677447319030762, + "rewards/real": -4.376180171966553, + "step": 1030 + }, + { + "epoch": 0.67, + "learning_rate": 4.32345971563981e-07, + "logits/generated": -2.2965195178985596, + "logits/real": -2.4986045360565186, + "logps/generated": -274.46112060546875, + "logps/real": -285.53778076171875, + "loss": 0.0384, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.356571197509766, + "rewards/margins": 12.720812797546387, + "rewards/real": -4.635758399963379, + "step": 1040 + }, + { + "epoch": 0.67, + "learning_rate": 4.3116113744075825e-07, + "logits/generated": -2.300463914871216, + "logits/real": -2.478001117706299, + "logps/generated": -303.2227783203125, + "logps/real": -287.7460021972656, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/generated": -19.46930694580078, + "rewards/margins": 14.814462661743164, + "rewards/real": -4.654845237731934, + "step": 1050 + }, + { + "epoch": 0.68, + "learning_rate": 4.299763033175355e-07, + "logits/generated": -2.3426880836486816, + "logits/real": -2.5291225910186768, + "logps/generated": -289.17120361328125, + "logps/real": -266.672119140625, + "loss": 0.0375, + "rewards/accuracies": 0.949999988079071, + "rewards/generated": -18.12887191772461, + "rewards/margins": 13.657282829284668, + "rewards/real": -4.471587657928467, + "step": 1060 + }, + { + "epoch": 0.68, + "learning_rate": 4.2879146919431274e-07, + "logits/generated": -2.3856160640716553, + "logits/real": -2.5741703510284424, + "logps/generated": -296.04132080078125, + "logps/real": -313.6421813964844, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/generated": -18.098491668701172, + "rewards/margins": 14.236276626586914, + "rewards/real": -3.862215042114258, + "step": 1070 + }, + { + "epoch": 0.69, + "learning_rate": 4.2760663507109e-07, + "logits/generated": -2.3564302921295166, + "logits/real": -2.53045916557312, + "logps/generated": -310.1300964355469, + "logps/real": -310.7464599609375, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/generated": -19.677143096923828, + "rewards/margins": 15.148625373840332, + "rewards/real": -4.528520584106445, + "step": 1080 + }, + { + "epoch": 0.7, + "learning_rate": 4.264218009478673e-07, + "logits/generated": -2.399268388748169, + "logits/real": -2.4992451667785645, + "logps/generated": -290.4671936035156, + "logps/real": -260.6993713378906, + "loss": 0.0119, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.11050033569336, + "rewards/margins": 14.06385326385498, + "rewards/real": -5.0466485023498535, + "step": 1090 + }, + { + "epoch": 0.7, + "learning_rate": 4.2523696682464456e-07, + "logits/generated": -2.289309501647949, + "logits/real": -2.5127501487731934, + "logps/generated": -306.51641845703125, + "logps/real": -326.2106018066406, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/generated": -19.816822052001953, + "rewards/margins": 14.96058177947998, + "rewards/real": -4.856239318847656, + "step": 1100 + }, + { + "epoch": 0.71, + "learning_rate": 4.240521327014218e-07, + "logits/generated": -2.3970062732696533, + "logits/real": -2.5096230506896973, + "logps/generated": -272.3966064453125, + "logps/real": -278.3008728027344, + "loss": 0.036, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -16.609127044677734, + "rewards/margins": 12.158090591430664, + "rewards/real": -4.451037406921387, + "step": 1110 + }, + { + "epoch": 0.72, + "learning_rate": 4.22867298578199e-07, + "logits/generated": -2.376469135284424, + "logits/real": -2.57863187789917, + "logps/generated": -291.18463134765625, + "logps/real": -284.26727294921875, + "loss": 0.0263, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -18.04172134399414, + "rewards/margins": 13.597633361816406, + "rewards/real": -4.444087028503418, + "step": 1120 + }, + { + "epoch": 0.72, + "learning_rate": 4.216824644549763e-07, + "logits/generated": -2.399825096130371, + "logits/real": -2.5337142944335938, + "logps/generated": -279.63177490234375, + "logps/real": -290.47589111328125, + "loss": 0.0298, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.438886642456055, + "rewards/margins": 12.631416320800781, + "rewards/real": -4.807469844818115, + "step": 1130 + }, + { + "epoch": 0.73, + "learning_rate": 4.2049763033175355e-07, + "logits/generated": -2.3421072959899902, + "logits/real": -2.4822893142700195, + "logps/generated": -308.11932373046875, + "logps/real": -330.6546936035156, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/generated": -18.39755630493164, + "rewards/margins": 13.66209888458252, + "rewards/real": -4.735455513000488, + "step": 1140 + }, + { + "epoch": 0.74, + "learning_rate": 4.1931279620853077e-07, + "logits/generated": -2.342663288116455, + "logits/real": -2.5248234272003174, + "logps/generated": -309.02117919921875, + "logps/real": -296.07037353515625, + "loss": 0.0355, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -20.545312881469727, + "rewards/margins": 15.586636543273926, + "rewards/real": -4.958677768707275, + "step": 1150 + }, + { + "epoch": 0.74, + "learning_rate": 4.1812796208530804e-07, + "logits/generated": -2.337934970855713, + "logits/real": -2.4495997428894043, + "logps/generated": -301.7442626953125, + "logps/real": -266.92401123046875, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/generated": -19.53786849975586, + "rewards/margins": 13.502288818359375, + "rewards/real": -6.03557825088501, + "step": 1160 + }, + { + "epoch": 0.75, + "learning_rate": 4.169431279620853e-07, + "logits/generated": -2.2867414951324463, + "logits/real": -2.378726005554199, + "logps/generated": -292.9967346191406, + "logps/real": -269.1019287109375, + "loss": 0.032, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.068912506103516, + "rewards/margins": 13.344259262084961, + "rewards/real": -5.7246527671813965, + "step": 1170 + }, + { + "epoch": 0.75, + "learning_rate": 4.1575829383886253e-07, + "logits/generated": -2.275810718536377, + "logits/real": -2.4016122817993164, + "logps/generated": -312.2940979003906, + "logps/real": -276.72027587890625, + "loss": 0.0155, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.791011810302734, + "rewards/margins": 15.633366584777832, + "rewards/real": -5.157645225524902, + "step": 1180 + }, + { + "epoch": 0.76, + "learning_rate": 4.145734597156398e-07, + "logits/generated": -2.264380931854248, + "logits/real": -2.3693957328796387, + "logps/generated": -299.9407653808594, + "logps/real": -277.1905517578125, + "loss": 0.0168, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.110647201538086, + "rewards/margins": 14.023755073547363, + "rewards/real": -6.086895942687988, + "step": 1190 + }, + { + "epoch": 0.77, + "learning_rate": 4.1338862559241703e-07, + "logits/generated": -2.276496410369873, + "logits/real": -2.4009838104248047, + "logps/generated": -344.5177307128906, + "logps/real": -310.5501708984375, + "loss": 0.0237, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.132648468017578, + "rewards/margins": 16.120880126953125, + "rewards/real": -7.011769771575928, + "step": 1200 + }, + { + "epoch": 0.77, + "learning_rate": 4.122037914691943e-07, + "logits/generated": -2.283618211746216, + "logits/real": -2.4024503231048584, + "logps/generated": -287.85919189453125, + "logps/real": -278.244140625, + "loss": 0.0222, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.882511138916016, + "rewards/margins": 13.156415939331055, + "rewards/real": -5.726097106933594, + "step": 1210 + }, + { + "epoch": 0.78, + "learning_rate": 4.110189573459715e-07, + "logits/generated": -2.295532703399658, + "logits/real": -2.3860344886779785, + "logps/generated": -302.39312744140625, + "logps/real": -255.3650665283203, + "loss": 0.0232, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -20.025859832763672, + "rewards/margins": 14.425395011901855, + "rewards/real": -5.600464820861816, + "step": 1220 + }, + { + "epoch": 0.79, + "learning_rate": 4.098341232227488e-07, + "logits/generated": -2.3488059043884277, + "logits/real": -2.474379062652588, + "logps/generated": -315.18756103515625, + "logps/real": -282.739990234375, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/generated": -19.212299346923828, + "rewards/margins": 14.532743453979492, + "rewards/real": -4.679556369781494, + "step": 1230 + }, + { + "epoch": 0.79, + "learning_rate": 4.0864928909952607e-07, + "logits/generated": -2.3911032676696777, + "logits/real": -2.5036733150482178, + "logps/generated": -285.2294921875, + "logps/real": -312.811279296875, + "loss": 0.0261, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.23556900024414, + "rewards/margins": 11.865394592285156, + "rewards/real": -5.370173931121826, + "step": 1240 + }, + { + "epoch": 0.8, + "learning_rate": 4.074644549763033e-07, + "logits/generated": -2.255256414413452, + "logits/real": -2.4088189601898193, + "logps/generated": -281.2724914550781, + "logps/real": -258.9282531738281, + "loss": 0.0448, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.890933990478516, + "rewards/margins": 12.679600715637207, + "rewards/real": -6.211331844329834, + "step": 1250 + }, + { + "epoch": 0.81, + "learning_rate": 4.0627962085308056e-07, + "logits/generated": -2.271714448928833, + "logits/real": -2.46032977104187, + "logps/generated": -288.73077392578125, + "logps/real": -320.3868713378906, + "loss": 0.0413, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -18.242067337036133, + "rewards/margins": 13.210156440734863, + "rewards/real": -5.0319108963012695, + "step": 1260 + }, + { + "epoch": 0.81, + "learning_rate": 4.0509478672985783e-07, + "logits/generated": -2.35395884513855, + "logits/real": -2.448251724243164, + "logps/generated": -300.24700927734375, + "logps/real": -272.71044921875, + "loss": 0.0568, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -18.208026885986328, + "rewards/margins": 12.991012573242188, + "rewards/real": -5.21701717376709, + "step": 1270 + }, + { + "epoch": 0.82, + "learning_rate": 4.0390995260663505e-07, + "logits/generated": -2.374265193939209, + "logits/real": -2.5217158794403076, + "logps/generated": -290.9504699707031, + "logps/real": -316.7360534667969, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/generated": -18.148794174194336, + "rewards/margins": 13.157681465148926, + "rewards/real": -4.99111270904541, + "step": 1280 + }, + { + "epoch": 0.83, + "learning_rate": 4.0272511848341227e-07, + "logits/generated": -2.316471576690674, + "logits/real": -2.527329206466675, + "logps/generated": -287.37139892578125, + "logps/real": -346.442626953125, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/generated": -17.83603286743164, + "rewards/margins": 12.730929374694824, + "rewards/real": -5.105101585388184, + "step": 1290 + }, + { + "epoch": 0.83, + "learning_rate": 4.0154028436018954e-07, + "logits/generated": -2.3851349353790283, + "logits/real": -2.5525565147399902, + "logps/generated": -287.569091796875, + "logps/real": -334.72625732421875, + "loss": 0.0247, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.36513900756836, + "rewards/margins": 12.514284133911133, + "rewards/real": -4.850854396820068, + "step": 1300 + }, + { + "epoch": 0.84, + "learning_rate": 4.003554502369668e-07, + "logits/generated": -2.350069522857666, + "logits/real": -2.5130248069763184, + "logps/generated": -290.7829895019531, + "logps/real": -285.68975830078125, + "loss": 0.0513, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -18.050655364990234, + "rewards/margins": 12.557219505310059, + "rewards/real": -5.493437767028809, + "step": 1310 + }, + { + "epoch": 0.84, + "learning_rate": 3.991706161137441e-07, + "logits/generated": -2.3853585720062256, + "logits/real": -2.5433051586151123, + "logps/generated": -280.49371337890625, + "logps/real": -309.3614807128906, + "loss": 0.0172, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.24584197998047, + "rewards/margins": 11.380583763122559, + "rewards/real": -5.865257740020752, + "step": 1320 + }, + { + "epoch": 0.85, + "learning_rate": 3.979857819905213e-07, + "logits/generated": -2.2883365154266357, + "logits/real": -2.509340763092041, + "logps/generated": -281.46185302734375, + "logps/real": -317.1385192871094, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/generated": -17.596721649169922, + "rewards/margins": 12.168859481811523, + "rewards/real": -5.427859783172607, + "step": 1330 + }, + { + "epoch": 0.86, + "learning_rate": 3.968009478672986e-07, + "logits/generated": -2.276294469833374, + "logits/real": -2.529151201248169, + "logps/generated": -275.62384033203125, + "logps/real": -320.17840576171875, + "loss": 0.0174, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.126468658447266, + "rewards/margins": 12.034772872924805, + "rewards/real": -5.091695785522461, + "step": 1340 + }, + { + "epoch": 0.86, + "learning_rate": 3.9561611374407585e-07, + "logits/generated": -2.397404432296753, + "logits/real": -2.5299489498138428, + "logps/generated": -279.8531799316406, + "logps/real": -248.511474609375, + "loss": 0.0293, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.51804542541504, + "rewards/margins": 12.492974281311035, + "rewards/real": -5.025073051452637, + "step": 1350 + }, + { + "epoch": 0.87, + "learning_rate": 3.94431279620853e-07, + "logits/generated": -2.3150904178619385, + "logits/real": -2.5204906463623047, + "logps/generated": -291.2157897949219, + "logps/real": -299.140380859375, + "loss": 0.0278, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.304561614990234, + "rewards/margins": 13.8828763961792, + "rewards/real": -4.421683311462402, + "step": 1360 + }, + { + "epoch": 0.88, + "learning_rate": 3.932464454976303e-07, + "logits/generated": -2.308262825012207, + "logits/real": -2.466447591781616, + "logps/generated": -288.3290100097656, + "logps/real": -261.1494140625, + "loss": 0.0338, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.667638778686523, + "rewards/margins": 13.086454391479492, + "rewards/real": -5.581185340881348, + "step": 1370 + }, + { + "epoch": 0.88, + "learning_rate": 3.9206161137440757e-07, + "logits/generated": -2.327383041381836, + "logits/real": -2.471449851989746, + "logps/generated": -295.0485534667969, + "logps/real": -267.4327087402344, + "loss": 0.0226, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -18.104694366455078, + "rewards/margins": 12.788865089416504, + "rewards/real": -5.315830707550049, + "step": 1380 + }, + { + "epoch": 0.89, + "learning_rate": 3.9087677725118484e-07, + "logits/generated": -2.2615890502929688, + "logits/real": -2.4707770347595215, + "logps/generated": -306.2514343261719, + "logps/real": -302.1688232421875, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/generated": -19.19952964782715, + "rewards/margins": 14.145184516906738, + "rewards/real": -5.054343223571777, + "step": 1390 + }, + { + "epoch": 0.9, + "learning_rate": 3.8969194312796206e-07, + "logits/generated": -2.276962995529175, + "logits/real": -2.43565034866333, + "logps/generated": -311.103515625, + "logps/real": -271.02191162109375, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/generated": -20.81112289428711, + "rewards/margins": 14.516395568847656, + "rewards/real": -6.2947282791137695, + "step": 1400 + }, + { + "epoch": 0.9, + "learning_rate": 3.8850710900473933e-07, + "logits/generated": -2.2672057151794434, + "logits/real": -2.4533634185791016, + "logps/generated": -306.622802734375, + "logps/real": -320.2618408203125, + "loss": 0.0143, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.772937774658203, + "rewards/margins": 14.028053283691406, + "rewards/real": -5.744885444641113, + "step": 1410 + }, + { + "epoch": 0.91, + "learning_rate": 3.873222748815166e-07, + "logits/generated": -2.372107744216919, + "logits/real": -2.481254816055298, + "logps/generated": -281.7245788574219, + "logps/real": -283.98748779296875, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/generated": -17.529558181762695, + "rewards/margins": 11.68727970123291, + "rewards/real": -5.842276096343994, + "step": 1420 + }, + { + "epoch": 0.91, + "learning_rate": 3.8613744075829377e-07, + "logits/generated": -2.2598013877868652, + "logits/real": -2.47208833694458, + "logps/generated": -285.4142150878906, + "logps/real": -285.35162353515625, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/generated": -18.84803581237793, + "rewards/margins": 12.526016235351562, + "rewards/real": -6.322018623352051, + "step": 1430 + }, + { + "epoch": 0.92, + "learning_rate": 3.8495260663507104e-07, + "logits/generated": -2.3580639362335205, + "logits/real": -2.4859871864318848, + "logps/generated": -307.646240234375, + "logps/real": -323.43414306640625, + "loss": 0.0195, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.400405883789062, + "rewards/margins": 13.40168285369873, + "rewards/real": -5.998722553253174, + "step": 1440 + }, + { + "epoch": 0.93, + "learning_rate": 3.837677725118483e-07, + "logits/generated": -2.2192952632904053, + "logits/real": -2.428776979446411, + "logps/generated": -305.4685974121094, + "logps/real": -296.8509216308594, + "loss": 0.0205, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.33831024169922, + "rewards/margins": 13.525497436523438, + "rewards/real": -5.812812805175781, + "step": 1450 + }, + { + "epoch": 0.93, + "learning_rate": 3.825829383886256e-07, + "logits/generated": -2.2133755683898926, + "logits/real": -2.446166753768921, + "logps/generated": -308.5284729003906, + "logps/real": -311.6910095214844, + "loss": 0.0166, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.147266387939453, + "rewards/margins": 14.619921684265137, + "rewards/real": -5.52734375, + "step": 1460 + }, + { + "epoch": 0.94, + "learning_rate": 3.813981042654028e-07, + "logits/generated": -2.251068592071533, + "logits/real": -2.3732194900512695, + "logps/generated": -301.8031311035156, + "logps/real": -263.5353698730469, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/generated": -20.869558334350586, + "rewards/margins": 13.65925407409668, + "rewards/real": -7.210305213928223, + "step": 1470 + }, + { + "epoch": 0.95, + "learning_rate": 3.802132701421801e-07, + "logits/generated": -2.2254586219787598, + "logits/real": -2.400791645050049, + "logps/generated": -323.98382568359375, + "logps/real": -320.86419677734375, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/generated": -21.44746971130371, + "rewards/margins": 15.346704483032227, + "rewards/real": -6.100764751434326, + "step": 1480 + }, + { + "epoch": 0.95, + "learning_rate": 3.7902843601895736e-07, + "logits/generated": -2.26737117767334, + "logits/real": -2.390167713165283, + "logps/generated": -312.045166015625, + "logps/real": -311.6873474121094, + "loss": 0.0164, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.39379119873047, + "rewards/margins": 14.455484390258789, + "rewards/real": -5.938305854797363, + "step": 1490 + }, + { + "epoch": 0.96, + "learning_rate": 3.778436018957346e-07, + "logits/generated": -2.294706344604492, + "logits/real": -2.4145607948303223, + "logps/generated": -298.1540222167969, + "logps/real": -306.29913330078125, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/generated": -19.42539405822754, + "rewards/margins": 14.029606819152832, + "rewards/real": -5.395786762237549, + "step": 1500 + }, + { + "epoch": 0.97, + "learning_rate": 3.766587677725118e-07, + "logits/generated": -2.3276476860046387, + "logits/real": -2.3730132579803467, + "logps/generated": -314.5223083496094, + "logps/real": -288.64404296875, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/generated": -20.662466049194336, + "rewards/margins": 14.94616985321045, + "rewards/real": -5.716297626495361, + "step": 1510 + }, + { + "epoch": 0.97, + "learning_rate": 3.7547393364928907e-07, + "logits/generated": -2.2427303791046143, + "logits/real": -2.3749637603759766, + "logps/generated": -315.2216796875, + "logps/real": -287.2800598144531, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/generated": -20.293941497802734, + "rewards/margins": 14.373272895812988, + "rewards/real": -5.920670509338379, + "step": 1520 + }, + { + "epoch": 0.98, + "learning_rate": 3.7428909952606634e-07, + "logits/generated": -2.350670337677002, + "logits/real": -2.3875861167907715, + "logps/generated": -317.49090576171875, + "logps/real": -294.496337890625, + "loss": 0.0287, + "rewards/accuracies": 0.9624999761581421, + "rewards/generated": -20.363561630249023, + "rewards/margins": 13.375410079956055, + "rewards/real": -6.988152503967285, + "step": 1530 + }, + { + "epoch": 0.99, + "learning_rate": 3.7310426540284356e-07, + "logits/generated": -2.2628579139709473, + "logits/real": -2.4144299030303955, + "logps/generated": -286.41558837890625, + "logps/real": -317.0753479003906, + "loss": 0.0267, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -18.326913833618164, + "rewards/margins": 13.711787223815918, + "rewards/real": -4.6151275634765625, + "step": 1540 + }, + { + "epoch": 0.99, + "learning_rate": 3.7191943127962083e-07, + "logits/generated": -2.2808165550231934, + "logits/real": -2.3928608894348145, + "logps/generated": -290.4697265625, + "logps/real": -281.3714904785156, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/generated": -18.250659942626953, + "rewards/margins": 12.909965515136719, + "rewards/real": -5.340696811676025, + "step": 1550 + }, + { + "epoch": 1.0, + "learning_rate": 3.707345971563981e-07, + "logits/generated": -2.2403323650360107, + "logits/real": -2.375622272491455, + "logps/generated": -299.19879150390625, + "logps/real": -264.7336730957031, + "loss": 0.0178, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.26840591430664, + "rewards/margins": 14.170907974243164, + "rewards/real": -5.097498416900635, + "step": 1560 + }, + { + "epoch": 1.0, + "learning_rate": 3.695497630331754e-07, + "logits/generated": -2.2569198608398438, + "logits/real": -2.3728420734405518, + "logps/generated": -313.026123046875, + "logps/real": -280.13641357421875, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/generated": -19.841373443603516, + "rewards/margins": 15.321266174316406, + "rewards/real": -4.520107269287109, + "step": 1570 + }, + { + "epoch": 1.01, + "learning_rate": 3.683649289099526e-07, + "logits/generated": -2.2165145874023438, + "logits/real": -2.3671815395355225, + "logps/generated": -299.0692138671875, + "logps/real": -294.7959899902344, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/generated": -19.413881301879883, + "rewards/margins": 15.00433349609375, + "rewards/real": -4.409549713134766, + "step": 1580 + }, + { + "epoch": 1.02, + "learning_rate": 3.671800947867298e-07, + "logits/generated": -2.136179208755493, + "logits/real": -2.3207507133483887, + "logps/generated": -303.35137939453125, + "logps/real": -287.4952392578125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/generated": -20.405773162841797, + "rewards/margins": 15.668429374694824, + "rewards/real": -4.73734188079834, + "step": 1590 + }, + { + "epoch": 1.02, + "learning_rate": 3.659952606635071e-07, + "logits/generated": -2.13120698928833, + "logits/real": -2.320891857147217, + "logps/generated": -306.2853698730469, + "logps/real": -283.64691162109375, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -19.937503814697266, + "rewards/margins": 15.81896686553955, + "rewards/real": -4.11853551864624, + "step": 1600 + }, + { + "epoch": 1.03, + "learning_rate": 3.648104265402843e-07, + "logits/generated": -2.1870360374450684, + "logits/real": -2.368260622024536, + "logps/generated": -300.4111328125, + "logps/real": -296.3978576660156, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.767839431762695, + "rewards/margins": 14.85081958770752, + "rewards/real": -4.917020797729492, + "step": 1610 + }, + { + "epoch": 1.04, + "learning_rate": 3.636255924170616e-07, + "logits/generated": -2.1743369102478027, + "logits/real": -2.3282134532928467, + "logps/generated": -321.19610595703125, + "logps/real": -281.8503723144531, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/generated": -21.900375366210938, + "rewards/margins": 16.69029426574707, + "rewards/real": -5.210080623626709, + "step": 1620 + }, + { + "epoch": 1.04, + "learning_rate": 3.6244075829383886e-07, + "logits/generated": -2.220449447631836, + "logits/real": -2.344844341278076, + "logps/generated": -303.0509948730469, + "logps/real": -268.18267822265625, + "loss": 0.0111, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.523670196533203, + "rewards/margins": 14.576342582702637, + "rewards/real": -4.947329044342041, + "step": 1630 + }, + { + "epoch": 1.05, + "learning_rate": 3.6125592417061613e-07, + "logits/generated": -2.132232904434204, + "logits/real": -2.3197970390319824, + "logps/generated": -314.4263916015625, + "logps/real": -292.0470275878906, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/generated": -20.414146423339844, + "rewards/margins": 16.534496307373047, + "rewards/real": -3.879650592803955, + "step": 1640 + }, + { + "epoch": 1.06, + "learning_rate": 3.6007109004739335e-07, + "logits/generated": -2.2374231815338135, + "logits/real": -2.352515697479248, + "logps/generated": -316.6592102050781, + "logps/real": -296.956787109375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/generated": -20.78524398803711, + "rewards/margins": 16.528079986572266, + "rewards/real": -4.257164001464844, + "step": 1650 + }, + { + "epoch": 1.06, + "learning_rate": 3.588862559241706e-07, + "logits/generated": -2.1662914752960205, + "logits/real": -2.328010082244873, + "logps/generated": -304.8622741699219, + "logps/real": -291.980712890625, + "loss": 0.0142, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.998083114624023, + "rewards/margins": 15.650113105773926, + "rewards/real": -4.347971439361572, + "step": 1660 + }, + { + "epoch": 1.07, + "learning_rate": 3.5770142180094784e-07, + "logits/generated": -2.306222438812256, + "logits/real": -2.3590731620788574, + "logps/generated": -312.7652282714844, + "logps/real": -287.0503234863281, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/generated": -20.211284637451172, + "rewards/margins": 15.283581733703613, + "rewards/real": -4.927702903747559, + "step": 1670 + }, + { + "epoch": 1.07, + "learning_rate": 3.5651658767772506e-07, + "logits/generated": -2.223875045776367, + "logits/real": -2.365973472595215, + "logps/generated": -300.7914733886719, + "logps/real": -289.1277770996094, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/generated": -19.49875259399414, + "rewards/margins": 15.722567558288574, + "rewards/real": -3.7761855125427246, + "step": 1680 + }, + { + "epoch": 1.08, + "learning_rate": 3.5533175355450234e-07, + "logits/generated": -2.167297840118408, + "logits/real": -2.2996826171875, + "logps/generated": -310.1976013183594, + "logps/real": -246.65658569335938, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/generated": -21.07327651977539, + "rewards/margins": 16.01577377319336, + "rewards/real": -5.057503700256348, + "step": 1690 + }, + { + "epoch": 1.09, + "learning_rate": 3.541469194312796e-07, + "logits/generated": -2.1909658908843994, + "logits/real": -2.3059241771698, + "logps/generated": -312.3503112792969, + "logps/real": -264.6876220703125, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.395238876342773, + "rewards/margins": 15.531048774719238, + "rewards/real": -4.864190578460693, + "step": 1700 + }, + { + "epoch": 1.09, + "learning_rate": 3.529620853080569e-07, + "logits/generated": -2.2165889739990234, + "logits/real": -2.3362364768981934, + "logps/generated": -315.99798583984375, + "logps/real": -290.00384521484375, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/generated": -20.464693069458008, + "rewards/margins": 16.096210479736328, + "rewards/real": -4.3684821128845215, + "step": 1710 + }, + { + "epoch": 1.1, + "learning_rate": 3.517772511848341e-07, + "logits/generated": -2.1913318634033203, + "logits/real": -2.2931675910949707, + "logps/generated": -293.7994689941406, + "logps/real": -273.39923095703125, + "loss": 0.0174, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.379470825195312, + "rewards/margins": 14.768470764160156, + "rewards/real": -4.61099910736084, + "step": 1720 + }, + { + "epoch": 1.11, + "learning_rate": 3.505924170616114e-07, + "logits/generated": -2.169776201248169, + "logits/real": -2.252234935760498, + "logps/generated": -335.75946044921875, + "logps/real": -265.8716735839844, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/generated": -23.035526275634766, + "rewards/margins": 17.36758041381836, + "rewards/real": -5.667943477630615, + "step": 1730 + }, + { + "epoch": 1.11, + "learning_rate": 3.4940758293838865e-07, + "logits/generated": -2.240286111831665, + "logits/real": -2.3108315467834473, + "logps/generated": -301.48870849609375, + "logps/real": -264.96429443359375, + "loss": 0.0108, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.83957862854004, + "rewards/margins": 14.789782524108887, + "rewards/real": -5.049793720245361, + "step": 1740 + }, + { + "epoch": 1.12, + "learning_rate": 3.482227488151658e-07, + "logits/generated": -2.24094295501709, + "logits/real": -2.2868783473968506, + "logps/generated": -319.02813720703125, + "logps/real": -294.453369140625, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/generated": -21.49105453491211, + "rewards/margins": 16.97692108154297, + "rewards/real": -4.51413106918335, + "step": 1750 + }, + { + "epoch": 1.13, + "learning_rate": 3.470379146919431e-07, + "logits/generated": -2.2272396087646484, + "logits/real": -2.369088888168335, + "logps/generated": -295.22247314453125, + "logps/real": -313.66143798828125, + "loss": 0.0102, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.529632568359375, + "rewards/margins": 14.83338737487793, + "rewards/real": -3.69624662399292, + "step": 1760 + }, + { + "epoch": 1.13, + "learning_rate": 3.4585308056872036e-07, + "logits/generated": -2.240562677383423, + "logits/real": -2.347003698348999, + "logps/generated": -324.1022644042969, + "logps/real": -263.2792053222656, + "loss": 0.0035, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.202388763427734, + "rewards/margins": 15.57550048828125, + "rewards/real": -4.626888751983643, + "step": 1770 + }, + { + "epoch": 1.14, + "learning_rate": 3.4466824644549763e-07, + "logits/generated": -2.2323672771453857, + "logits/real": -2.3367342948913574, + "logps/generated": -296.6525573730469, + "logps/real": -260.25048828125, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/generated": -19.346256256103516, + "rewards/margins": 14.571496963500977, + "rewards/real": -4.7747626304626465, + "step": 1780 + }, + { + "epoch": 1.15, + "learning_rate": 3.4348341232227485e-07, + "logits/generated": -2.1286113262176514, + "logits/real": -2.3490428924560547, + "logps/generated": -309.1128234863281, + "logps/real": -294.2756042480469, + "loss": 0.0134, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.084026336669922, + "rewards/margins": 15.66749095916748, + "rewards/real": -4.416535377502441, + "step": 1790 + }, + { + "epoch": 1.15, + "learning_rate": 3.422985781990521e-07, + "logits/generated": -2.293304443359375, + "logits/real": -2.436685800552368, + "logps/generated": -314.29083251953125, + "logps/real": -287.0198669433594, + "loss": 0.0138, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.650161743164062, + "rewards/margins": 15.928117752075195, + "rewards/real": -4.722043991088867, + "step": 1800 + }, + { + "epoch": 1.16, + "learning_rate": 3.411137440758294e-07, + "logits/generated": -2.3594422340393066, + "logits/real": -2.5309062004089355, + "logps/generated": -283.93548583984375, + "logps/real": -290.99139404296875, + "loss": 0.0074, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.6586856842041, + "rewards/margins": 13.922680854797363, + "rewards/real": -3.7360050678253174, + "step": 1810 + }, + { + "epoch": 1.16, + "learning_rate": 3.3992890995260667e-07, + "logits/generated": -2.2194488048553467, + "logits/real": -2.469252824783325, + "logps/generated": -310.2274169921875, + "logps/real": -270.26385498046875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/generated": -20.600818634033203, + "rewards/margins": 15.282350540161133, + "rewards/real": -5.318469524383545, + "step": 1820 + }, + { + "epoch": 1.17, + "learning_rate": 3.3874407582938384e-07, + "logits/generated": -2.2475979328155518, + "logits/real": -2.450146436691284, + "logps/generated": -330.9019775390625, + "logps/real": -325.27789306640625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -21.632858276367188, + "rewards/margins": 16.27450942993164, + "rewards/real": -5.358347415924072, + "step": 1830 + }, + { + "epoch": 1.18, + "learning_rate": 3.375592417061611e-07, + "logits/generated": -2.2602717876434326, + "logits/real": -2.5164554119110107, + "logps/generated": -302.0518798828125, + "logps/real": -302.34881591796875, + "loss": 0.0114, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -19.150955200195312, + "rewards/margins": 15.48846435546875, + "rewards/real": -3.6624884605407715, + "step": 1840 + }, + { + "epoch": 1.18, + "learning_rate": 3.363744075829384e-07, + "logits/generated": -2.328455924987793, + "logits/real": -2.5169782638549805, + "logps/generated": -301.71575927734375, + "logps/real": -282.5423583984375, + "loss": 0.0094, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -19.608911514282227, + "rewards/margins": 15.527392387390137, + "rewards/real": -4.081518173217773, + "step": 1850 + }, + { + "epoch": 1.19, + "learning_rate": 3.351895734597156e-07, + "logits/generated": -2.3264639377593994, + "logits/real": -2.420229196548462, + "logps/generated": -290.0008544921875, + "logps/real": -266.5218505859375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/generated": -19.041057586669922, + "rewards/margins": 14.385503768920898, + "rewards/real": -4.655551910400391, + "step": 1860 + }, + { + "epoch": 1.2, + "learning_rate": 3.340047393364929e-07, + "logits/generated": -2.231644630432129, + "logits/real": -2.3841605186462402, + "logps/generated": -315.7859802246094, + "logps/real": -264.2816467285156, + "loss": 0.0096, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.186296463012695, + "rewards/margins": 16.85459327697754, + "rewards/real": -4.33170223236084, + "step": 1870 + }, + { + "epoch": 1.2, + "learning_rate": 3.3281990521327015e-07, + "logits/generated": -2.28442120552063, + "logits/real": -2.4339089393615723, + "logps/generated": -305.69390869140625, + "logps/real": -332.723388671875, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/generated": -20.054880142211914, + "rewards/margins": 14.866659164428711, + "rewards/real": -5.18821907043457, + "step": 1880 + }, + { + "epoch": 1.21, + "learning_rate": 3.316350710900474e-07, + "logits/generated": -2.1956193447113037, + "logits/real": -2.391106605529785, + "logps/generated": -304.96148681640625, + "logps/real": -269.6791687011719, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.582901000976562, + "rewards/margins": 16.172176361083984, + "rewards/real": -4.410725116729736, + "step": 1890 + }, + { + "epoch": 1.22, + "learning_rate": 3.304502369668246e-07, + "logits/generated": -2.1891417503356934, + "logits/real": -2.4029784202575684, + "logps/generated": -323.70172119140625, + "logps/real": -313.7615966796875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/generated": -21.427139282226562, + "rewards/margins": 16.72661590576172, + "rewards/real": -4.700521945953369, + "step": 1900 + }, + { + "epoch": 1.22, + "learning_rate": 3.2926540284360186e-07, + "logits/generated": -2.2477095127105713, + "logits/real": -2.407269239425659, + "logps/generated": -326.52703857421875, + "logps/real": -317.97808837890625, + "loss": 0.0081, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.259418487548828, + "rewards/margins": 17.468002319335938, + "rewards/real": -3.791417360305786, + "step": 1910 + }, + { + "epoch": 1.23, + "learning_rate": 3.2808056872037913e-07, + "logits/generated": -2.2605175971984863, + "logits/real": -2.4191315174102783, + "logps/generated": -315.5021667480469, + "logps/real": -311.63360595703125, + "loss": 0.0162, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.560211181640625, + "rewards/margins": 15.904121398925781, + "rewards/real": -4.6560869216918945, + "step": 1920 + }, + { + "epoch": 1.23, + "learning_rate": 3.2689573459715635e-07, + "logits/generated": -2.133112668991089, + "logits/real": -2.3952369689941406, + "logps/generated": -330.35308837890625, + "logps/real": -308.2007141113281, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -21.54788589477539, + "rewards/margins": 16.650882720947266, + "rewards/real": -4.897005558013916, + "step": 1930 + }, + { + "epoch": 1.24, + "learning_rate": 3.2571090047393363e-07, + "logits/generated": -2.223424196243286, + "logits/real": -2.3665499687194824, + "logps/generated": -338.943115234375, + "logps/real": -278.33984375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/generated": -22.474721908569336, + "rewards/margins": 16.395320892333984, + "rewards/real": -6.079402446746826, + "step": 1940 + }, + { + "epoch": 1.25, + "learning_rate": 3.245260663507109e-07, + "logits/generated": -2.159966468811035, + "logits/real": -2.3640992641448975, + "logps/generated": -356.1936950683594, + "logps/real": -290.65447998046875, + "loss": 0.0075, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.283023834228516, + "rewards/margins": 18.25802230834961, + "rewards/real": -6.0250043869018555, + "step": 1950 + }, + { + "epoch": 1.25, + "learning_rate": 3.2334123222748817e-07, + "logits/generated": -2.139967918395996, + "logits/real": -2.2997608184814453, + "logps/generated": -333.8506774902344, + "logps/real": -259.5197448730469, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/generated": -23.21651840209961, + "rewards/margins": 17.41876792907715, + "rewards/real": -5.797752857208252, + "step": 1960 + }, + { + "epoch": 1.26, + "learning_rate": 3.221563981042654e-07, + "logits/generated": -2.023646116256714, + "logits/real": -2.324492931365967, + "logps/generated": -348.760986328125, + "logps/real": -314.42315673828125, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/generated": -24.4593448638916, + "rewards/margins": 18.718902587890625, + "rewards/real": -5.740442276000977, + "step": 1970 + }, + { + "epoch": 1.27, + "learning_rate": 3.209715639810426e-07, + "logits/generated": -2.134831666946411, + "logits/real": -2.299773693084717, + "logps/generated": -347.0093688964844, + "logps/real": -295.0484619140625, + "loss": 0.0048, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.788387298583984, + "rewards/margins": 17.44914436340332, + "rewards/real": -6.3392462730407715, + "step": 1980 + }, + { + "epoch": 1.27, + "learning_rate": 3.197867298578199e-07, + "logits/generated": -2.0692691802978516, + "logits/real": -2.304626941680908, + "logps/generated": -326.07366943359375, + "logps/real": -276.44549560546875, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/generated": -22.178640365600586, + "rewards/margins": 17.356496810913086, + "rewards/real": -4.822144508361816, + "step": 1990 + }, + { + "epoch": 1.28, + "learning_rate": 3.186018957345971e-07, + "logits/generated": -2.078138589859009, + "logits/real": -2.3022000789642334, + "logps/generated": -337.51348876953125, + "logps/real": -269.2457275390625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/generated": -22.881973266601562, + "rewards/margins": 17.953664779663086, + "rewards/real": -4.928309917449951, + "step": 2000 + }, + { + "epoch": 1.29, + "learning_rate": 3.174170616113744e-07, + "logits/generated": -2.1155338287353516, + "logits/real": -2.3254213333129883, + "logps/generated": -311.26519775390625, + "logps/real": -275.30755615234375, + "loss": 0.0037, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.179630279541016, + "rewards/margins": 15.785786628723145, + "rewards/real": -5.3938446044921875, + "step": 2010 + }, + { + "epoch": 1.29, + "learning_rate": 3.1623222748815165e-07, + "logits/generated": -2.102132797241211, + "logits/real": -2.2949185371398926, + "logps/generated": -329.10809326171875, + "logps/real": -266.596435546875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -22.795299530029297, + "rewards/margins": 17.099586486816406, + "rewards/real": -5.695716857910156, + "step": 2020 + }, + { + "epoch": 1.3, + "learning_rate": 3.150473933649289e-07, + "logits/generated": -2.1224253177642822, + "logits/real": -2.2488582134246826, + "logps/generated": -374.454833984375, + "logps/real": -301.51336669921875, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/generated": -26.284189224243164, + "rewards/margins": 20.073284149169922, + "rewards/real": -6.210905075073242, + "step": 2030 + }, + { + "epoch": 1.31, + "learning_rate": 3.1386255924170614e-07, + "logits/generated": -1.984297752380371, + "logits/real": -2.2786407470703125, + "logps/generated": -347.9783020019531, + "logps/real": -304.0118713378906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/generated": -23.826396942138672, + "rewards/margins": 18.487462997436523, + "rewards/real": -5.338932037353516, + "step": 2040 + }, + { + "epoch": 1.31, + "learning_rate": 3.126777251184834e-07, + "logits/generated": -2.0077693462371826, + "logits/real": -2.2613766193389893, + "logps/generated": -348.8348388671875, + "logps/real": -292.101806640625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/generated": -24.404682159423828, + "rewards/margins": 19.08761978149414, + "rewards/real": -5.317059516906738, + "step": 2050 + }, + { + "epoch": 1.32, + "learning_rate": 3.1149289099526064e-07, + "logits/generated": -2.02852201461792, + "logits/real": -2.29878306388855, + "logps/generated": -344.41717529296875, + "logps/real": -307.5559387207031, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/generated": -22.812238693237305, + "rewards/margins": 18.065067291259766, + "rewards/real": -4.747171878814697, + "step": 2060 + }, + { + "epoch": 1.32, + "learning_rate": 3.103080568720379e-07, + "logits/generated": -2.0517935752868652, + "logits/real": -2.2768099308013916, + "logps/generated": -323.74639892578125, + "logps/real": -269.71893310546875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/generated": -22.311723709106445, + "rewards/margins": 17.752567291259766, + "rewards/real": -4.559154033660889, + "step": 2070 + }, + { + "epoch": 1.33, + "learning_rate": 3.0912322274881513e-07, + "logits/generated": -1.9986320734024048, + "logits/real": -2.286355972290039, + "logps/generated": -342.7187805175781, + "logps/real": -335.91143798828125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/generated": -23.347644805908203, + "rewards/margins": 18.038827896118164, + "rewards/real": -5.308821201324463, + "step": 2080 + }, + { + "epoch": 1.34, + "learning_rate": 3.079383886255924e-07, + "logits/generated": -2.086455821990967, + "logits/real": -2.2344307899475098, + "logps/generated": -373.5683288574219, + "logps/real": -286.02728271484375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/generated": -26.34897232055664, + "rewards/margins": 21.176847457885742, + "rewards/real": -5.172126770019531, + "step": 2090 + }, + { + "epoch": 1.34, + "learning_rate": 3.067535545023697e-07, + "logits/generated": -2.0571374893188477, + "logits/real": -2.2799127101898193, + "logps/generated": -333.6913146972656, + "logps/real": -269.0575866699219, + "loss": 0.0039, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -22.993635177612305, + "rewards/margins": 17.178089141845703, + "rewards/real": -5.815545082092285, + "step": 2100 + }, + { + "epoch": 1.35, + "learning_rate": 3.055687203791469e-07, + "logits/generated": -2.0613174438476562, + "logits/real": -2.289515256881714, + "logps/generated": -354.09716796875, + "logps/real": -322.1733093261719, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -24.417530059814453, + "rewards/margins": 18.71674346923828, + "rewards/real": -5.700786113739014, + "step": 2110 + }, + { + "epoch": 1.36, + "learning_rate": 3.0438388625592417e-07, + "logits/generated": -2.080763101577759, + "logits/real": -2.314450740814209, + "logps/generated": -338.1839599609375, + "logps/real": -306.02886962890625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/generated": -22.660348892211914, + "rewards/margins": 16.645235061645508, + "rewards/real": -6.01511287689209, + "step": 2120 + }, + { + "epoch": 1.36, + "learning_rate": 3.0319905213270144e-07, + "logits/generated": -2.092369794845581, + "logits/real": -2.2747347354888916, + "logps/generated": -386.8224792480469, + "logps/real": -286.4530334472656, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/generated": -27.519500732421875, + "rewards/margins": 21.448610305786133, + "rewards/real": -6.070888996124268, + "step": 2130 + }, + { + "epoch": 1.37, + "learning_rate": 3.0201421800947866e-07, + "logits/generated": -2.027413845062256, + "logits/real": -2.234687328338623, + "logps/generated": -377.666015625, + "logps/real": -314.5318908691406, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/generated": -26.735797882080078, + "rewards/margins": 20.168197631835938, + "rewards/real": -6.567601680755615, + "step": 2140 + }, + { + "epoch": 1.38, + "learning_rate": 3.008293838862559e-07, + "logits/generated": -2.048600912094116, + "logits/real": -2.174879550933838, + "logps/generated": -389.923828125, + "logps/real": -308.6571960449219, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/generated": -27.314075469970703, + "rewards/margins": 19.176753997802734, + "rewards/real": -8.137316703796387, + "step": 2150 + }, + { + "epoch": 1.38, + "learning_rate": 2.9964454976303315e-07, + "logits/generated": -2.0499486923217773, + "logits/real": -2.1856703758239746, + "logps/generated": -381.08001708984375, + "logps/real": -319.88995361328125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/generated": -26.891727447509766, + "rewards/margins": 19.29157257080078, + "rewards/real": -7.60015344619751, + "step": 2160 + }, + { + "epoch": 1.39, + "learning_rate": 2.984597156398104e-07, + "logits/generated": -1.9547226428985596, + "logits/real": -2.22841215133667, + "logps/generated": -346.93646240234375, + "logps/real": -305.8375244140625, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/generated": -24.429630279541016, + "rewards/margins": 17.81489372253418, + "rewards/real": -6.6147356033325195, + "step": 2170 + }, + { + "epoch": 1.39, + "learning_rate": 2.9727488151658765e-07, + "logits/generated": -1.9613704681396484, + "logits/real": -2.265411853790283, + "logps/generated": -342.83575439453125, + "logps/real": -336.58807373046875, + "loss": 0.0099, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.710336685180664, + "rewards/margins": 18.872779846191406, + "rewards/real": -4.837557792663574, + "step": 2180 + }, + { + "epoch": 1.4, + "learning_rate": 2.960900473933649e-07, + "logits/generated": -1.9236023426055908, + "logits/real": -2.200582504272461, + "logps/generated": -348.8911437988281, + "logps/real": -265.9325256347656, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/generated": -24.7271671295166, + "rewards/margins": 18.50712013244629, + "rewards/real": -6.220047950744629, + "step": 2190 + }, + { + "epoch": 1.41, + "learning_rate": 2.949052132701422e-07, + "logits/generated": -1.9877769947052002, + "logits/real": -2.205859899520874, + "logps/generated": -351.3087463378906, + "logps/real": -295.75042724609375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/generated": -24.641620635986328, + "rewards/margins": 18.743465423583984, + "rewards/real": -5.89815616607666, + "step": 2200 + }, + { + "epoch": 1.41, + "learning_rate": 2.9372037914691946e-07, + "logits/generated": -1.9616267681121826, + "logits/real": -2.109819173812866, + "logps/generated": -381.1598205566406, + "logps/real": -299.94085693359375, + "loss": 0.0114, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -27.39129066467285, + "rewards/margins": 20.818603515625, + "rewards/real": -6.572684288024902, + "step": 2210 + }, + { + "epoch": 1.42, + "learning_rate": 2.9253554502369663e-07, + "logits/generated": -1.94171941280365, + "logits/real": -2.114952564239502, + "logps/generated": -389.1252136230469, + "logps/real": -274.3523864746094, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/generated": -28.433679580688477, + "rewards/margins": 21.05464744567871, + "rewards/real": -7.379031181335449, + "step": 2220 + }, + { + "epoch": 1.43, + "learning_rate": 2.913507109004739e-07, + "logits/generated": -2.000523090362549, + "logits/real": -2.2058169841766357, + "logps/generated": -385.126953125, + "logps/real": -307.17401123046875, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/generated": -27.793697357177734, + "rewards/margins": 20.554990768432617, + "rewards/real": -7.238706111907959, + "step": 2230 + }, + { + "epoch": 1.43, + "learning_rate": 2.901658767772512e-07, + "logits/generated": -2.1331028938293457, + "logits/real": -2.261775255203247, + "logps/generated": -308.9983215332031, + "logps/real": -257.3055114746094, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/generated": -20.865665435791016, + "rewards/margins": 15.690821647644043, + "rewards/real": -5.174844264984131, + "step": 2240 + }, + { + "epoch": 1.44, + "learning_rate": 2.889810426540284e-07, + "logits/generated": -2.1631789207458496, + "logits/real": -2.312990665435791, + "logps/generated": -308.62359619140625, + "logps/real": -290.2818298339844, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/generated": -20.118532180786133, + "rewards/margins": 15.512298583984375, + "rewards/real": -4.606230735778809, + "step": 2250 + }, + { + "epoch": 1.45, + "learning_rate": 2.8779620853080567e-07, + "logits/generated": -2.306006669998169, + "logits/real": -2.395618438720703, + "logps/generated": -288.3228454589844, + "logps/real": -268.11212158203125, + "loss": 0.0263, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -17.302173614501953, + "rewards/margins": 14.114949226379395, + "rewards/real": -3.1872246265411377, + "step": 2260 + }, + { + "epoch": 1.45, + "learning_rate": 2.8661137440758294e-07, + "logits/generated": -2.1571130752563477, + "logits/real": -2.408663511276245, + "logps/generated": -290.388671875, + "logps/real": -312.73199462890625, + "loss": 0.0048, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -18.008201599121094, + "rewards/margins": 15.325651168823242, + "rewards/real": -2.682548761367798, + "step": 2270 + }, + { + "epoch": 1.46, + "learning_rate": 2.854265402843602e-07, + "logits/generated": -2.1750426292419434, + "logits/real": -2.3797481060028076, + "logps/generated": -287.82562255859375, + "logps/real": -281.77349853515625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/generated": -18.010038375854492, + "rewards/margins": 14.384750366210938, + "rewards/real": -3.625290632247925, + "step": 2280 + }, + { + "epoch": 1.47, + "learning_rate": 2.842417061611374e-07, + "logits/generated": -2.150526523590088, + "logits/real": -2.3643288612365723, + "logps/generated": -306.185546875, + "logps/real": -303.93426513671875, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/generated": -19.840198516845703, + "rewards/margins": 16.938533782958984, + "rewards/real": -2.901662826538086, + "step": 2290 + }, + { + "epoch": 1.47, + "learning_rate": 2.8305687203791465e-07, + "logits/generated": -2.22916841506958, + "logits/real": -2.382570743560791, + "logps/generated": -298.77288818359375, + "logps/real": -287.53533935546875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -18.984249114990234, + "rewards/margins": 14.85334587097168, + "rewards/real": -4.130903244018555, + "step": 2300 + }, + { + "epoch": 1.48, + "learning_rate": 2.8187203791469193e-07, + "logits/generated": -2.1493663787841797, + "logits/real": -2.3570504188537598, + "logps/generated": -296.1643371582031, + "logps/real": -278.86376953125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/generated": -18.78934097290039, + "rewards/margins": 15.320466995239258, + "rewards/real": -3.4688727855682373, + "step": 2310 + }, + { + "epoch": 1.48, + "learning_rate": 2.806872037914692e-07, + "logits/generated": -2.055908679962158, + "logits/real": -2.341315984725952, + "logps/generated": -322.4883728027344, + "logps/real": -305.29534912109375, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/generated": -20.715917587280273, + "rewards/margins": 16.561243057250977, + "rewards/real": -4.154674053192139, + "step": 2320 + }, + { + "epoch": 1.49, + "learning_rate": 2.795023696682464e-07, + "logits/generated": -2.1408910751342773, + "logits/real": -2.2490763664245605, + "logps/generated": -323.20245361328125, + "logps/real": -231.9637908935547, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/generated": -22.67896842956543, + "rewards/margins": 16.87398910522461, + "rewards/real": -5.804980278015137, + "step": 2330 + }, + { + "epoch": 1.5, + "learning_rate": 2.783175355450237e-07, + "logits/generated": -2.1436820030212402, + "logits/real": -2.2497756481170654, + "logps/generated": -315.02166748046875, + "logps/real": -274.26824951171875, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/generated": -21.036762237548828, + "rewards/margins": 16.998281478881836, + "rewards/real": -4.038480758666992, + "step": 2340 + }, + { + "epoch": 1.5, + "learning_rate": 2.7713270142180097e-07, + "logits/generated": -2.0521388053894043, + "logits/real": -2.257903575897217, + "logps/generated": -323.6043395996094, + "logps/real": -284.04071044921875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/generated": -21.63604164123535, + "rewards/margins": 16.72218132019043, + "rewards/real": -4.913861274719238, + "step": 2350 + }, + { + "epoch": 1.51, + "learning_rate": 2.759478672985782e-07, + "logits/generated": -2.0604171752929688, + "logits/real": -2.2787575721740723, + "logps/generated": -322.8190612792969, + "logps/real": -262.122802734375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/generated": -22.032209396362305, + "rewards/margins": 16.762222290039062, + "rewards/real": -5.269987106323242, + "step": 2360 + }, + { + "epoch": 1.52, + "learning_rate": 2.747630331753554e-07, + "logits/generated": -2.072727680206299, + "logits/real": -2.246783494949341, + "logps/generated": -317.24407958984375, + "logps/real": -293.20184326171875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -20.969411849975586, + "rewards/margins": 15.59190845489502, + "rewards/real": -5.377503395080566, + "step": 2370 + }, + { + "epoch": 1.52, + "learning_rate": 2.735781990521327e-07, + "logits/generated": -2.0133070945739746, + "logits/real": -2.256195306777954, + "logps/generated": -333.7389831542969, + "logps/real": -275.0140686035156, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/generated": -23.798381805419922, + "rewards/margins": 17.914201736450195, + "rewards/real": -5.88417911529541, + "step": 2380 + }, + { + "epoch": 1.53, + "learning_rate": 2.7239336492890995e-07, + "logits/generated": -2.0671088695526123, + "logits/real": -2.2632241249084473, + "logps/generated": -326.7292175292969, + "logps/real": -265.8837890625, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/generated": -22.720916748046875, + "rewards/margins": 16.91278839111328, + "rewards/real": -5.80812931060791, + "step": 2390 + }, + { + "epoch": 1.54, + "learning_rate": 2.7120853080568717e-07, + "logits/generated": -2.159471273422241, + "logits/real": -2.258662700653076, + "logps/generated": -320.0276184082031, + "logps/real": -258.6063232421875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -20.94855308532715, + "rewards/margins": 15.71070384979248, + "rewards/real": -5.237849712371826, + "step": 2400 + }, + { + "epoch": 1.54, + "learning_rate": 2.7002369668246444e-07, + "logits/generated": -2.0678138732910156, + "logits/real": -2.2282309532165527, + "logps/generated": -323.46160888671875, + "logps/real": -293.63043212890625, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/generated": -21.792064666748047, + "rewards/margins": 16.06268310546875, + "rewards/real": -5.729379653930664, + "step": 2410 + }, + { + "epoch": 1.55, + "learning_rate": 2.688388625592417e-07, + "logits/generated": -2.0311505794525146, + "logits/real": -2.227372169494629, + "logps/generated": -331.828857421875, + "logps/real": -310.86114501953125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/generated": -22.839397430419922, + "rewards/margins": 16.84860610961914, + "rewards/real": -5.990791320800781, + "step": 2420 + }, + { + "epoch": 1.55, + "learning_rate": 2.6765402843601894e-07, + "logits/generated": -2.0514676570892334, + "logits/real": -2.1547234058380127, + "logps/generated": -336.21075439453125, + "logps/real": -264.7240295410156, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/generated": -23.538625717163086, + "rewards/margins": 16.194805145263672, + "rewards/real": -7.3438215255737305, + "step": 2430 + }, + { + "epoch": 1.56, + "learning_rate": 2.664691943127962e-07, + "logits/generated": -2.0216307640075684, + "logits/real": -2.211293935775757, + "logps/generated": -358.1768798828125, + "logps/real": -316.6245422363281, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/generated": -24.672779083251953, + "rewards/margins": 19.19417381286621, + "rewards/real": -5.478603363037109, + "step": 2440 + }, + { + "epoch": 1.57, + "learning_rate": 2.6528436018957343e-07, + "logits/generated": -2.041341543197632, + "logits/real": -2.10255765914917, + "logps/generated": -344.2445373535156, + "logps/real": -247.41921997070312, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/generated": -24.607763290405273, + "rewards/margins": 17.24384307861328, + "rewards/real": -7.36392068862915, + "step": 2450 + }, + { + "epoch": 1.57, + "learning_rate": 2.640995260663507e-07, + "logits/generated": -2.0063443183898926, + "logits/real": -2.1921615600585938, + "logps/generated": -352.2701721191406, + "logps/real": -318.3006896972656, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/generated": -24.384395599365234, + "rewards/margins": 18.296855926513672, + "rewards/real": -6.0875396728515625, + "step": 2460 + }, + { + "epoch": 1.58, + "learning_rate": 2.629146919431279e-07, + "logits/generated": -2.015026092529297, + "logits/real": -2.216576099395752, + "logps/generated": -364.0120849609375, + "logps/real": -310.111083984375, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/generated": -26.099964141845703, + "rewards/margins": 19.072391510009766, + "rewards/real": -7.0275726318359375, + "step": 2470 + }, + { + "epoch": 1.59, + "learning_rate": 2.617298578199052e-07, + "logits/generated": -2.0228731632232666, + "logits/real": -2.152249813079834, + "logps/generated": -367.5391845703125, + "logps/real": -313.62969970703125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/generated": -25.40250015258789, + "rewards/margins": 18.70431900024414, + "rewards/real": -6.698182582855225, + "step": 2480 + }, + { + "epoch": 1.59, + "learning_rate": 2.6054502369668247e-07, + "logits/generated": -2.0362861156463623, + "logits/real": -2.1551527976989746, + "logps/generated": -319.7622375488281, + "logps/real": -263.19439697265625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/generated": -21.64724349975586, + "rewards/margins": 17.379783630371094, + "rewards/real": -4.267460823059082, + "step": 2490 + }, + { + "epoch": 1.6, + "learning_rate": 2.5936018957345974e-07, + "logits/generated": -1.9363447427749634, + "logits/real": -2.1170012950897217, + "logps/generated": -337.524658203125, + "logps/real": -263.98779296875, + "loss": 0.0045, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.839879989624023, + "rewards/margins": 19.158344268798828, + "rewards/real": -4.681534767150879, + "step": 2500 + }, + { + "epoch": 1.61, + "learning_rate": 2.5817535545023696e-07, + "logits/generated": -1.991236686706543, + "logits/real": -2.1649136543273926, + "logps/generated": -323.94805908203125, + "logps/real": -315.3453674316406, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/generated": -21.72148323059082, + "rewards/margins": 16.818408966064453, + "rewards/real": -4.903075218200684, + "step": 2510 + }, + { + "epoch": 1.61, + "learning_rate": 2.5699052132701423e-07, + "logits/generated": -2.0180463790893555, + "logits/real": -2.0788466930389404, + "logps/generated": -311.48992919921875, + "logps/real": -245.39083862304688, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/generated": -21.404287338256836, + "rewards/margins": 16.352306365966797, + "rewards/real": -5.051980972290039, + "step": 2520 + }, + { + "epoch": 1.62, + "learning_rate": 2.5580568720379145e-07, + "logits/generated": -2.0104432106018066, + "logits/real": -2.135164737701416, + "logps/generated": -333.90020751953125, + "logps/real": -293.7203674316406, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -23.03586196899414, + "rewards/margins": 18.168453216552734, + "rewards/real": -4.867411136627197, + "step": 2530 + }, + { + "epoch": 1.63, + "learning_rate": 2.5462085308056867e-07, + "logits/generated": -2.0322206020355225, + "logits/real": -2.1355350017547607, + "logps/generated": -348.94915771484375, + "logps/real": -311.4462585449219, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/generated": -23.815948486328125, + "rewards/margins": 18.45013427734375, + "rewards/real": -5.365814208984375, + "step": 2540 + }, + { + "epoch": 1.63, + "learning_rate": 2.5343601895734595e-07, + "logits/generated": -2.015996217727661, + "logits/real": -2.027782440185547, + "logps/generated": -354.2010192871094, + "logps/real": -256.85198974609375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/generated": -24.54047966003418, + "rewards/margins": 19.149431228637695, + "rewards/real": -5.391049385070801, + "step": 2550 + }, + { + "epoch": 1.64, + "learning_rate": 2.522511848341232e-07, + "logits/generated": -1.9637119770050049, + "logits/real": -2.039952278137207, + "logps/generated": -347.145263671875, + "logps/real": -247.9694366455078, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/generated": -24.598966598510742, + "rewards/margins": 18.92831802368164, + "rewards/real": -5.670650005340576, + "step": 2560 + }, + { + "epoch": 1.64, + "learning_rate": 2.510663507109005e-07, + "logits/generated": -1.9824374914169312, + "logits/real": -2.1313223838806152, + "logps/generated": -318.1536865234375, + "logps/real": -312.9999084472656, + "loss": 0.0114, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.444355010986328, + "rewards/margins": 16.674604415893555, + "rewards/real": -4.769750595092773, + "step": 2570 + }, + { + "epoch": 1.65, + "learning_rate": 2.498815165876777e-07, + "logits/generated": -2.0012238025665283, + "logits/real": -2.179154872894287, + "logps/generated": -295.0409851074219, + "logps/real": -306.1881103515625, + "loss": 0.0101, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -18.802841186523438, + "rewards/margins": 15.604291915893555, + "rewards/real": -3.1985487937927246, + "step": 2580 + }, + { + "epoch": 1.66, + "learning_rate": 2.48696682464455e-07, + "logits/generated": -1.9817421436309814, + "logits/real": -2.15266489982605, + "logps/generated": -314.5645751953125, + "logps/real": -322.54107666015625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/generated": -20.512224197387695, + "rewards/margins": 17.625017166137695, + "rewards/real": -2.887207269668579, + "step": 2590 + }, + { + "epoch": 1.66, + "learning_rate": 2.475118483412322e-07, + "logits/generated": -1.8838014602661133, + "logits/real": -2.065337896347046, + "logps/generated": -348.7995300292969, + "logps/real": -263.181640625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -24.87447166442871, + "rewards/margins": 21.053264617919922, + "rewards/real": -3.821207046508789, + "step": 2600 + }, + { + "epoch": 1.67, + "learning_rate": 2.463270142180095e-07, + "logits/generated": -1.9176208972930908, + "logits/real": -2.047250747680664, + "logps/generated": -333.9322204589844, + "logps/real": -290.37420654296875, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/generated": -22.631237030029297, + "rewards/margins": 18.093896865844727, + "rewards/real": -4.537338733673096, + "step": 2610 + }, + { + "epoch": 1.68, + "learning_rate": 2.451421800947867e-07, + "logits/generated": -1.869368314743042, + "logits/real": -2.067248821258545, + "logps/generated": -328.75384521484375, + "logps/real": -284.6513671875, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/generated": -21.998384475708008, + "rewards/margins": 18.069828033447266, + "rewards/real": -3.928557872772217, + "step": 2620 + }, + { + "epoch": 1.68, + "learning_rate": 2.4395734597156397e-07, + "logits/generated": -1.9721105098724365, + "logits/real": -1.9859319925308228, + "logps/generated": -322.07733154296875, + "logps/real": -214.5552520751953, + "loss": 0.0086, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -22.10195541381836, + "rewards/margins": 18.13933753967285, + "rewards/real": -3.962615489959717, + "step": 2630 + }, + { + "epoch": 1.69, + "learning_rate": 2.4277251184834124e-07, + "logits/generated": -1.9935197830200195, + "logits/real": -2.0847668647766113, + "logps/generated": -336.79388427734375, + "logps/real": -272.2264404296875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/generated": -22.606184005737305, + "rewards/margins": 18.493946075439453, + "rewards/real": -4.112237453460693, + "step": 2640 + }, + { + "epoch": 1.7, + "learning_rate": 2.4158767772511846e-07, + "logits/generated": -1.9344911575317383, + "logits/real": -2.0520946979522705, + "logps/generated": -325.8318786621094, + "logps/real": -301.53857421875, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/generated": -21.865243911743164, + "rewards/margins": 17.928768157958984, + "rewards/real": -3.936476230621338, + "step": 2650 + }, + { + "epoch": 1.7, + "learning_rate": 2.4040284360189573e-07, + "logits/generated": -1.8800468444824219, + "logits/real": -2.0344691276550293, + "logps/generated": -332.1402282714844, + "logps/real": -301.62042236328125, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/generated": -22.578327178955078, + "rewards/margins": 18.19384765625, + "rewards/real": -4.384476661682129, + "step": 2660 + }, + { + "epoch": 1.71, + "learning_rate": 2.39218009478673e-07, + "logits/generated": -1.901908278465271, + "logits/real": -2.0253829956054688, + "logps/generated": -334.27960205078125, + "logps/real": -262.5256652832031, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -24.076019287109375, + "rewards/margins": 19.42727279663086, + "rewards/real": -4.64874267578125, + "step": 2670 + }, + { + "epoch": 1.71, + "learning_rate": 2.3803317535545023e-07, + "logits/generated": -1.9247627258300781, + "logits/real": -2.078843593597412, + "logps/generated": -337.6062927246094, + "logps/real": -317.027099609375, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/generated": -22.987529754638672, + "rewards/margins": 18.43692398071289, + "rewards/real": -4.550606727600098, + "step": 2680 + }, + { + "epoch": 1.72, + "learning_rate": 2.3684834123222747e-07, + "logits/generated": -1.9171488285064697, + "logits/real": -2.0078930854797363, + "logps/generated": -360.25799560546875, + "logps/real": -293.09429931640625, + "loss": 0.0072, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.852455139160156, + "rewards/margins": 19.05927848815918, + "rewards/real": -5.793177127838135, + "step": 2690 + }, + { + "epoch": 1.73, + "learning_rate": 2.3566350710900475e-07, + "logits/generated": -1.9496829509735107, + "logits/real": -2.0645487308502197, + "logps/generated": -333.0656433105469, + "logps/real": -307.91094970703125, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -23.068565368652344, + "rewards/margins": 16.719371795654297, + "rewards/real": -6.3491926193237305, + "step": 2700 + }, + { + "epoch": 1.73, + "learning_rate": 2.3447867298578197e-07, + "logits/generated": -1.86553156375885, + "logits/real": -2.009887933731079, + "logps/generated": -384.51861572265625, + "logps/real": -254.32211303710938, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -27.905101776123047, + "rewards/margins": 22.367870330810547, + "rewards/real": -5.537230014801025, + "step": 2710 + }, + { + "epoch": 1.74, + "learning_rate": 2.3329383886255924e-07, + "logits/generated": -1.8696056604385376, + "logits/real": -2.008697509765625, + "logps/generated": -332.2572937011719, + "logps/real": -264.5096130371094, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -23.768789291381836, + "rewards/margins": 18.116247177124023, + "rewards/real": -5.652542591094971, + "step": 2720 + }, + { + "epoch": 1.75, + "learning_rate": 2.3210900473933649e-07, + "logits/generated": -1.9646952152252197, + "logits/real": -2.0399162769317627, + "logps/generated": -362.4119873046875, + "logps/real": -262.9007568359375, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/generated": -24.729812622070312, + "rewards/margins": 19.58879852294922, + "rewards/real": -5.1410112380981445, + "step": 2730 + }, + { + "epoch": 1.75, + "learning_rate": 2.3092417061611373e-07, + "logits/generated": -1.8820825815200806, + "logits/real": -2.106609344482422, + "logps/generated": -345.9579162597656, + "logps/real": -277.6873779296875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/generated": -24.09268569946289, + "rewards/margins": 19.987350463867188, + "rewards/real": -4.1053361892700195, + "step": 2740 + }, + { + "epoch": 1.76, + "learning_rate": 2.2973933649289098e-07, + "logits/generated": -1.8273859024047852, + "logits/real": -2.048422336578369, + "logps/generated": -351.7268981933594, + "logps/real": -271.568115234375, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/generated": -24.743534088134766, + "rewards/margins": 20.314531326293945, + "rewards/real": -4.429001808166504, + "step": 2750 + }, + { + "epoch": 1.77, + "learning_rate": 2.2855450236966822e-07, + "logits/generated": -1.8855018615722656, + "logits/real": -2.0111851692199707, + "logps/generated": -360.0576171875, + "logps/real": -229.41171264648438, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/generated": -26.014429092407227, + "rewards/margins": 22.26042938232422, + "rewards/real": -3.753999710083008, + "step": 2760 + }, + { + "epoch": 1.77, + "learning_rate": 2.273696682464455e-07, + "logits/generated": -1.9568793773651123, + "logits/real": -2.0069072246551514, + "logps/generated": -325.590576171875, + "logps/real": -239.29653930664062, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/generated": -22.475872039794922, + "rewards/margins": 18.081890106201172, + "rewards/real": -4.393985748291016, + "step": 2770 + }, + { + "epoch": 1.78, + "learning_rate": 2.2618483412322272e-07, + "logits/generated": -1.8930606842041016, + "logits/real": -2.063122272491455, + "logps/generated": -332.5451965332031, + "logps/real": -307.4909362792969, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/generated": -22.505624771118164, + "rewards/margins": 18.05331039428711, + "rewards/real": -4.452314853668213, + "step": 2780 + }, + { + "epoch": 1.79, + "learning_rate": 2.25e-07, + "logits/generated": -1.8584035634994507, + "logits/real": -2.032623291015625, + "logps/generated": -357.2879333496094, + "logps/real": -272.9072265625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -25.67281723022461, + "rewards/margins": 22.030052185058594, + "rewards/real": -3.642765760421753, + "step": 2790 + }, + { + "epoch": 1.79, + "learning_rate": 2.2381516587677724e-07, + "logits/generated": -1.986790418624878, + "logits/real": -2.0681300163269043, + "logps/generated": -320.0332946777344, + "logps/real": -247.047119140625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/generated": -21.96512794494629, + "rewards/margins": 17.276538848876953, + "rewards/real": -4.688588619232178, + "step": 2800 + }, + { + "epoch": 1.8, + "learning_rate": 2.226303317535545e-07, + "logits/generated": -1.8750879764556885, + "logits/real": -2.0300040245056152, + "logps/generated": -353.9804992675781, + "logps/real": -279.1330261230469, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/generated": -24.2004337310791, + "rewards/margins": 19.79877471923828, + "rewards/real": -4.4016571044921875, + "step": 2810 + }, + { + "epoch": 1.8, + "learning_rate": 2.2144549763033173e-07, + "logits/generated": -1.9594194889068604, + "logits/real": -2.0362954139709473, + "logps/generated": -349.21844482421875, + "logps/real": -291.8232727050781, + "loss": 0.0067, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -23.7236385345459, + "rewards/margins": 18.65010643005371, + "rewards/real": -5.073533058166504, + "step": 2820 + }, + { + "epoch": 1.81, + "learning_rate": 2.20260663507109e-07, + "logits/generated": -1.9532238245010376, + "logits/real": -2.059518814086914, + "logps/generated": -358.15362548828125, + "logps/real": -295.75555419921875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/generated": -24.933134078979492, + "rewards/margins": 19.796754837036133, + "rewards/real": -5.136380195617676, + "step": 2830 + }, + { + "epoch": 1.82, + "learning_rate": 2.1907582938388625e-07, + "logits/generated": -1.911240816116333, + "logits/real": -2.035658121109009, + "logps/generated": -355.701171875, + "logps/real": -262.9356689453125, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/generated": -25.080434799194336, + "rewards/margins": 20.165552139282227, + "rewards/real": -4.914883136749268, + "step": 2840 + }, + { + "epoch": 1.82, + "learning_rate": 2.178909952606635e-07, + "logits/generated": -1.9019253253936768, + "logits/real": -1.9727287292480469, + "logps/generated": -339.832763671875, + "logps/real": -244.0312957763672, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/generated": -24.016708374023438, + "rewards/margins": 19.056392669677734, + "rewards/real": -4.9603142738342285, + "step": 2850 + }, + { + "epoch": 1.83, + "learning_rate": 2.1670616113744074e-07, + "logits/generated": -1.8738031387329102, + "logits/real": -1.9713420867919922, + "logps/generated": -358.6424865722656, + "logps/real": -236.83349609375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/generated": -25.90484046936035, + "rewards/margins": 20.887779235839844, + "rewards/real": -5.017061710357666, + "step": 2860 + }, + { + "epoch": 1.84, + "learning_rate": 2.15521327014218e-07, + "logits/generated": -1.9246352910995483, + "logits/real": -2.0558464527130127, + "logps/generated": -349.736083984375, + "logps/real": -273.07879638671875, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/generated": -24.154857635498047, + "rewards/margins": 19.935977935791016, + "rewards/real": -4.218877792358398, + "step": 2870 + }, + { + "epoch": 1.84, + "learning_rate": 2.1433649289099526e-07, + "logits/generated": -1.8406873941421509, + "logits/real": -2.04058575630188, + "logps/generated": -361.99090576171875, + "logps/real": -329.4713439941406, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/generated": -25.09902572631836, + "rewards/margins": 20.601699829101562, + "rewards/real": -4.4973249435424805, + "step": 2880 + }, + { + "epoch": 1.85, + "learning_rate": 2.131516587677725e-07, + "logits/generated": -1.9347474575042725, + "logits/real": -2.107963800430298, + "logps/generated": -338.20220947265625, + "logps/real": -339.88177490234375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/generated": -22.654552459716797, + "rewards/margins": 17.851367950439453, + "rewards/real": -4.80318546295166, + "step": 2890 + }, + { + "epoch": 1.86, + "learning_rate": 2.1196682464454975e-07, + "logits/generated": -1.9360589981079102, + "logits/real": -2.094698190689087, + "logps/generated": -345.03936767578125, + "logps/real": -347.36328125, + "loss": 0.005, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.069780349731445, + "rewards/margins": 18.0810489654541, + "rewards/real": -4.988730430603027, + "step": 2900 + }, + { + "epoch": 1.86, + "learning_rate": 2.10781990521327e-07, + "logits/generated": -1.8973820209503174, + "logits/real": -2.0721583366394043, + "logps/generated": -354.6455993652344, + "logps/real": -321.29388427734375, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/generated": -24.49210548400879, + "rewards/margins": 20.05221939086914, + "rewards/real": -4.439886569976807, + "step": 2910 + }, + { + "epoch": 1.87, + "learning_rate": 2.0959715639810427e-07, + "logits/generated": -1.8905102014541626, + "logits/real": -2.0525612831115723, + "logps/generated": -350.294189453125, + "logps/real": -321.59576416015625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/generated": -24.51646614074707, + "rewards/margins": 18.570537567138672, + "rewards/real": -5.945926189422607, + "step": 2920 + }, + { + "epoch": 1.87, + "learning_rate": 2.0841232227488152e-07, + "logits/generated": -1.8868262767791748, + "logits/real": -2.0915586948394775, + "logps/generated": -346.82244873046875, + "logps/real": -324.75360107421875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/generated": -23.300960540771484, + "rewards/margins": 18.860183715820312, + "rewards/real": -4.440775394439697, + "step": 2930 + }, + { + "epoch": 1.88, + "learning_rate": 2.0722748815165874e-07, + "logits/generated": -2.009647846221924, + "logits/real": -2.112830400466919, + "logps/generated": -314.67822265625, + "logps/real": -273.21246337890625, + "loss": 0.0047, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.59378433227539, + "rewards/margins": 16.634010314941406, + "rewards/real": -4.959776878356934, + "step": 2940 + }, + { + "epoch": 1.89, + "learning_rate": 2.06042654028436e-07, + "logits/generated": -1.8931806087493896, + "logits/real": -2.008685350418091, + "logps/generated": -350.7391662597656, + "logps/real": -279.31610107421875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/generated": -24.823436737060547, + "rewards/margins": 19.396289825439453, + "rewards/real": -5.427145957946777, + "step": 2950 + }, + { + "epoch": 1.89, + "learning_rate": 2.0485781990521326e-07, + "logits/generated": -1.8286612033843994, + "logits/real": -2.0201098918914795, + "logps/generated": -330.6366271972656, + "logps/real": -285.68145751953125, + "loss": 0.0077, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.9193172454834, + "rewards/margins": 17.24250602722168, + "rewards/real": -4.676810264587402, + "step": 2960 + }, + { + "epoch": 1.9, + "learning_rate": 2.0367298578199053e-07, + "logits/generated": -1.962898850440979, + "logits/real": -2.057426929473877, + "logps/generated": -323.2264404296875, + "logps/real": -326.7906799316406, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/generated": -21.479419708251953, + "rewards/margins": 17.808374404907227, + "rewards/real": -3.6710457801818848, + "step": 2970 + }, + { + "epoch": 1.91, + "learning_rate": 2.0248815165876775e-07, + "logits/generated": -1.92549729347229, + "logits/real": -2.0787177085876465, + "logps/generated": -340.48388671875, + "logps/real": -327.29638671875, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/generated": -22.590269088745117, + "rewards/margins": 18.792264938354492, + "rewards/real": -3.798003673553467, + "step": 2980 + }, + { + "epoch": 1.91, + "learning_rate": 2.0130331753554502e-07, + "logits/generated": -2.082226276397705, + "logits/real": -2.060159683227539, + "logps/generated": -309.9555358886719, + "logps/real": -248.53634643554688, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -20.31454849243164, + "rewards/margins": 16.472434997558594, + "rewards/real": -3.842111587524414, + "step": 2990 + }, + { + "epoch": 1.92, + "learning_rate": 2.0011848341232227e-07, + "logits/generated": -2.0672497749328613, + "logits/real": -2.071643829345703, + "logps/generated": -317.7587890625, + "logps/real": -244.15213012695312, + "loss": 0.0031, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -20.6685848236084, + "rewards/margins": 16.24555778503418, + "rewards/real": -4.423028945922852, + "step": 3000 + }, + { + "epoch": 1.93, + "learning_rate": 1.9893364928909952e-07, + "logits/generated": -2.0025432109832764, + "logits/real": -2.051884412765503, + "logps/generated": -323.74127197265625, + "logps/real": -253.74807739257812, + "loss": 0.0121, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.344167709350586, + "rewards/margins": 17.927459716796875, + "rewards/real": -3.41670560836792, + "step": 3010 + }, + { + "epoch": 1.93, + "learning_rate": 1.9774881516587676e-07, + "logits/generated": -2.0415196418762207, + "logits/real": -1.9388816356658936, + "logps/generated": -339.0467224121094, + "logps/real": -225.0322265625, + "loss": 0.007, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.312593460083008, + "rewards/margins": 18.541973114013672, + "rewards/real": -4.770620346069336, + "step": 3020 + }, + { + "epoch": 1.94, + "learning_rate": 1.96563981042654e-07, + "logits/generated": -2.0513784885406494, + "logits/real": -2.1174235343933105, + "logps/generated": -330.4163818359375, + "logps/real": -336.45703125, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/generated": -21.52487564086914, + "rewards/margins": 18.490808486938477, + "rewards/real": -3.034066915512085, + "step": 3030 + }, + { + "epoch": 1.94, + "learning_rate": 1.9537914691943128e-07, + "logits/generated": -1.978915810585022, + "logits/real": -2.065412759780884, + "logps/generated": -305.82666015625, + "logps/real": -273.7213439941406, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/generated": -20.667461395263672, + "rewards/margins": 16.536968231201172, + "rewards/real": -4.130496025085449, + "step": 3040 + }, + { + "epoch": 1.95, + "learning_rate": 1.9419431279620853e-07, + "logits/generated": -1.9168899059295654, + "logits/real": -1.9833358526229858, + "logps/generated": -328.87860107421875, + "logps/real": -255.411865234375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/generated": -22.704120635986328, + "rewards/margins": 18.399288177490234, + "rewards/real": -4.30483341217041, + "step": 3050 + }, + { + "epoch": 1.96, + "learning_rate": 1.9300947867298577e-07, + "logits/generated": -2.037984609603882, + "logits/real": -2.1186954975128174, + "logps/generated": -327.5215148925781, + "logps/real": -289.11798095703125, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -21.641704559326172, + "rewards/margins": 17.83352279663086, + "rewards/real": -3.8081812858581543, + "step": 3060 + }, + { + "epoch": 1.96, + "learning_rate": 1.9182464454976302e-07, + "logits/generated": -1.9606168270111084, + "logits/real": -2.066399574279785, + "logps/generated": -335.49310302734375, + "logps/real": -285.63916015625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -22.89196014404297, + "rewards/margins": 18.424591064453125, + "rewards/real": -4.467370510101318, + "step": 3070 + }, + { + "epoch": 1.97, + "learning_rate": 1.906398104265403e-07, + "logits/generated": -1.9231208562850952, + "logits/real": -1.9928699731826782, + "logps/generated": -349.9872131347656, + "logps/real": -250.4529571533203, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/generated": -24.382661819458008, + "rewards/margins": 19.963834762573242, + "rewards/real": -4.418826580047607, + "step": 3080 + }, + { + "epoch": 1.98, + "learning_rate": 1.8945497630331754e-07, + "logits/generated": -2.0129315853118896, + "logits/real": -2.0563480854034424, + "logps/generated": -344.85418701171875, + "logps/real": -267.9552917480469, + "loss": 0.5252, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -23.324771881103516, + "rewards/margins": 19.245107650756836, + "rewards/real": -4.079663276672363, + "step": 3090 + }, + { + "epoch": 1.98, + "learning_rate": 1.8827014218009476e-07, + "logits/generated": -2.067603349685669, + "logits/real": -2.162640333175659, + "logps/generated": -311.3556823730469, + "logps/real": -290.5445556640625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/generated": -21.031274795532227, + "rewards/margins": 17.459707260131836, + "rewards/real": -3.571566343307495, + "step": 3100 + }, + { + "epoch": 1.99, + "learning_rate": 1.8708530805687203e-07, + "logits/generated": -2.022533416748047, + "logits/real": -2.082559585571289, + "logps/generated": -357.4684143066406, + "logps/real": -272.97613525390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/generated": -25.341123580932617, + "rewards/margins": 21.033334732055664, + "rewards/real": -4.3077898025512695, + "step": 3110 + }, + { + "epoch": 2.0, + "learning_rate": 1.8590047393364928e-07, + "logits/generated": -2.0808520317077637, + "logits/real": -2.121340036392212, + "logps/generated": -372.4068603515625, + "logps/real": -298.7972717285156, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/generated": -25.399755477905273, + "rewards/margins": 20.64004135131836, + "rewards/real": -4.759713172912598, + "step": 3120 + }, + { + "epoch": 2.0, + "learning_rate": 1.8471563981042655e-07, + "logits/generated": -2.081714630126953, + "logits/real": -2.120727300643921, + "logps/generated": -337.6578674316406, + "logps/real": -305.0014953613281, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -22.75674057006836, + "rewards/margins": 18.288433074951172, + "rewards/real": -4.468310356140137, + "step": 3130 + }, + { + "epoch": 2.01, + "learning_rate": 1.8353080568720377e-07, + "logits/generated": -2.0968246459960938, + "logits/real": -2.1070868968963623, + "logps/generated": -362.4174499511719, + "logps/real": -297.3021545410156, + "loss": 0.0024, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.106538772583008, + "rewards/margins": 20.243976593017578, + "rewards/real": -4.862562656402588, + "step": 3140 + }, + { + "epoch": 2.02, + "learning_rate": 1.8234597156398104e-07, + "logits/generated": -2.073312997817993, + "logits/real": -2.031247615814209, + "logps/generated": -362.1297302246094, + "logps/real": -264.1739807128906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/generated": -25.403884887695312, + "rewards/margins": 19.92275619506836, + "rewards/real": -5.481128692626953, + "step": 3150 + }, + { + "epoch": 2.02, + "learning_rate": 1.811611374407583e-07, + "logits/generated": -2.0962705612182617, + "logits/real": -2.139554262161255, + "logps/generated": -340.9764404296875, + "logps/real": -297.6211853027344, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -23.11123275756836, + "rewards/margins": 17.79618263244629, + "rewards/real": -5.315046787261963, + "step": 3160 + }, + { + "epoch": 2.03, + "learning_rate": 1.7997630331753554e-07, + "logits/generated": -2.072605609893799, + "logits/real": -2.092684268951416, + "logps/generated": -334.9135437011719, + "logps/real": -268.64984130859375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/generated": -24.23314666748047, + "rewards/margins": 18.820934295654297, + "rewards/real": -5.412210464477539, + "step": 3170 + }, + { + "epoch": 2.03, + "learning_rate": 1.7879146919431278e-07, + "logits/generated": -2.103445529937744, + "logits/real": -2.1847872734069824, + "logps/generated": -348.1864929199219, + "logps/real": -362.13543701171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -23.36882972717285, + "rewards/margins": 19.663820266723633, + "rewards/real": -3.7050089836120605, + "step": 3180 + }, + { + "epoch": 2.04, + "learning_rate": 1.7760663507109003e-07, + "logits/generated": -2.0266237258911133, + "logits/real": -2.1335673332214355, + "logps/generated": -337.2787170410156, + "logps/real": -351.09234619140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -22.30938148498535, + "rewards/margins": 18.489295959472656, + "rewards/real": -3.820082426071167, + "step": 3190 + }, + { + "epoch": 2.05, + "learning_rate": 1.764218009478673e-07, + "logits/generated": -2.089790105819702, + "logits/real": -2.045483112335205, + "logps/generated": -348.64691162109375, + "logps/real": -275.2323303222656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -24.00542640686035, + "rewards/margins": 19.243324279785156, + "rewards/real": -4.762101173400879, + "step": 3200 + }, + { + "epoch": 2.05, + "learning_rate": 1.7523696682464452e-07, + "logits/generated": -2.064568519592285, + "logits/real": -2.1267263889312744, + "logps/generated": -356.4788513183594, + "logps/real": -309.7297668457031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -24.821239471435547, + "rewards/margins": 20.07695198059082, + "rewards/real": -4.744289875030518, + "step": 3210 + }, + { + "epoch": 2.06, + "learning_rate": 1.740521327014218e-07, + "logits/generated": -2.0550034046173096, + "logits/real": -2.1228978633880615, + "logps/generated": -342.8502197265625, + "logps/real": -311.9898681640625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/generated": -23.458721160888672, + "rewards/margins": 19.106483459472656, + "rewards/real": -4.352238178253174, + "step": 3220 + }, + { + "epoch": 2.07, + "learning_rate": 1.7286729857819904e-07, + "logits/generated": -2.0442018508911133, + "logits/real": -2.037679672241211, + "logps/generated": -368.05096435546875, + "logps/real": -234.1935272216797, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/generated": -25.96514892578125, + "rewards/margins": 21.14073371887207, + "rewards/real": -4.824419975280762, + "step": 3230 + }, + { + "epoch": 2.07, + "learning_rate": 1.7168246445497631e-07, + "logits/generated": -2.0806503295898438, + "logits/real": -2.138878583908081, + "logps/generated": -350.0105285644531, + "logps/real": -273.70556640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -24.906692504882812, + "rewards/margins": 19.990764617919922, + "rewards/real": -4.915929317474365, + "step": 3240 + }, + { + "epoch": 2.08, + "learning_rate": 1.7049763033175353e-07, + "logits/generated": -2.0372068881988525, + "logits/real": -2.050089120864868, + "logps/generated": -350.42095947265625, + "logps/real": -272.7009582519531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -24.547452926635742, + "rewards/margins": 19.302318572998047, + "rewards/real": -5.2451372146606445, + "step": 3250 + }, + { + "epoch": 2.09, + "learning_rate": 1.693127962085308e-07, + "logits/generated": -2.1300880908966064, + "logits/real": -2.2184221744537354, + "logps/generated": -359.78472900390625, + "logps/real": -349.1603698730469, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/generated": -23.26993751525879, + "rewards/margins": 19.06733512878418, + "rewards/real": -4.202603340148926, + "step": 3260 + }, + { + "epoch": 2.09, + "learning_rate": 1.6812796208530805e-07, + "logits/generated": -2.011422872543335, + "logits/real": -2.0868258476257324, + "logps/generated": -361.9312744140625, + "logps/real": -315.9602966308594, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -24.96506690979004, + "rewards/margins": 20.63692283630371, + "rewards/real": -4.328146457672119, + "step": 3270 + }, + { + "epoch": 2.1, + "learning_rate": 1.669431279620853e-07, + "logits/generated": -2.047356128692627, + "logits/real": -2.114980697631836, + "logps/generated": -342.90252685546875, + "logps/real": -288.1419372558594, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/generated": -22.95301628112793, + "rewards/margins": 17.99421501159668, + "rewards/real": -4.95880126953125, + "step": 3280 + }, + { + "epoch": 2.1, + "learning_rate": 1.6575829383886255e-07, + "logits/generated": -2.0588772296905518, + "logits/real": -2.0936694145202637, + "logps/generated": -365.0455017089844, + "logps/real": -304.8596496582031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -25.46891212463379, + "rewards/margins": 19.947111129760742, + "rewards/real": -5.521799564361572, + "step": 3290 + }, + { + "epoch": 2.11, + "learning_rate": 1.645734597156398e-07, + "logits/generated": -2.035792827606201, + "logits/real": -2.089658498764038, + "logps/generated": -363.96466064453125, + "logps/real": -310.9200134277344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -24.831939697265625, + "rewards/margins": 20.766572952270508, + "rewards/real": -4.065365791320801, + "step": 3300 + }, + { + "epoch": 2.12, + "learning_rate": 1.6338862559241706e-07, + "logits/generated": -2.032679319381714, + "logits/real": -2.05472731590271, + "logps/generated": -352.28948974609375, + "logps/real": -283.97161865234375, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.409162521362305, + "rewards/margins": 19.225811004638672, + "rewards/real": -5.183350563049316, + "step": 3310 + }, + { + "epoch": 2.12, + "learning_rate": 1.622037914691943e-07, + "logits/generated": -2.0000383853912354, + "logits/real": -2.0034682750701904, + "logps/generated": -356.1659240722656, + "logps/real": -244.78005981445312, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -25.83150863647461, + "rewards/margins": 20.468902587890625, + "rewards/real": -5.362606525421143, + "step": 3320 + }, + { + "epoch": 2.13, + "learning_rate": 1.6101895734597156e-07, + "logits/generated": -2.0317182540893555, + "logits/real": -2.0870862007141113, + "logps/generated": -363.60394287109375, + "logps/real": -261.621826171875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/generated": -25.53971290588379, + "rewards/margins": 20.445377349853516, + "rewards/real": -5.094333171844482, + "step": 3330 + }, + { + "epoch": 2.14, + "learning_rate": 1.598341232227488e-07, + "logits/generated": -2.082185745239258, + "logits/real": -2.1197800636291504, + "logps/generated": -357.8829345703125, + "logps/real": -276.6432189941406, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -24.641422271728516, + "rewards/margins": 19.46223258972168, + "rewards/real": -5.1791863441467285, + "step": 3340 + }, + { + "epoch": 2.14, + "learning_rate": 1.5864928909952605e-07, + "logits/generated": -2.0468974113464355, + "logits/real": -2.0383057594299316, + "logps/generated": -383.3557434082031, + "logps/real": -272.8359680175781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -27.14200782775879, + "rewards/margins": 21.54371452331543, + "rewards/real": -5.598288536071777, + "step": 3350 + }, + { + "epoch": 2.15, + "learning_rate": 1.5746445497630332e-07, + "logits/generated": -1.9730154275894165, + "logits/real": -2.0217251777648926, + "logps/generated": -347.0853576660156, + "logps/real": -291.0683898925781, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -25.273738861083984, + "rewards/margins": 19.66875457763672, + "rewards/real": -5.604984283447266, + "step": 3360 + }, + { + "epoch": 2.16, + "learning_rate": 1.5627962085308054e-07, + "logits/generated": -2.016352415084839, + "logits/real": -2.0556960105895996, + "logps/generated": -358.643310546875, + "logps/real": -292.9967041015625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -25.528522491455078, + "rewards/margins": 20.312397003173828, + "rewards/real": -5.216123104095459, + "step": 3370 + }, + { + "epoch": 2.16, + "learning_rate": 1.5509478672985782e-07, + "logits/generated": -2.0128049850463867, + "logits/real": -2.0695934295654297, + "logps/generated": -361.88629150390625, + "logps/real": -289.5730895996094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -25.175642013549805, + "rewards/margins": 20.242481231689453, + "rewards/real": -4.933161735534668, + "step": 3380 + }, + { + "epoch": 2.17, + "learning_rate": 1.5390995260663506e-07, + "logits/generated": -2.0474679470062256, + "logits/real": -2.0958378314971924, + "logps/generated": -371.52984619140625, + "logps/real": -277.81854248046875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/generated": -26.174081802368164, + "rewards/margins": 20.208127975463867, + "rewards/real": -5.9659528732299805, + "step": 3390 + }, + { + "epoch": 2.18, + "learning_rate": 1.5272511848341233e-07, + "logits/generated": -1.981287956237793, + "logits/real": -1.9887183904647827, + "logps/generated": -371.8958740234375, + "logps/real": -270.61627197265625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -27.01461410522461, + "rewards/margins": 20.439838409423828, + "rewards/real": -6.574775695800781, + "step": 3400 + }, + { + "epoch": 2.18, + "learning_rate": 1.5154028436018955e-07, + "logits/generated": -2.091592788696289, + "logits/real": -2.0786731243133545, + "logps/generated": -378.8370056152344, + "logps/real": -287.45281982421875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -26.378387451171875, + "rewards/margins": 21.110021591186523, + "rewards/real": -5.268365859985352, + "step": 3410 + }, + { + "epoch": 2.19, + "learning_rate": 1.5035545023696683e-07, + "logits/generated": -2.031449794769287, + "logits/real": -2.08339262008667, + "logps/generated": -368.6145935058594, + "logps/real": -299.513427734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/generated": -25.480379104614258, + "rewards/margins": 20.795780181884766, + "rewards/real": -4.684597969055176, + "step": 3420 + }, + { + "epoch": 2.19, + "learning_rate": 1.4917061611374407e-07, + "logits/generated": -2.0079009532928467, + "logits/real": -2.053812026977539, + "logps/generated": -364.58734130859375, + "logps/real": -280.71612548828125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/generated": -25.69207763671875, + "rewards/margins": 20.46217918395996, + "rewards/real": -5.229896545410156, + "step": 3430 + }, + { + "epoch": 2.2, + "learning_rate": 1.4798578199052132e-07, + "logits/generated": -2.052233934402466, + "logits/real": -2.1017303466796875, + "logps/generated": -376.9831237792969, + "logps/real": -309.8131408691406, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.689865112304688, + "rewards/margins": 19.916255950927734, + "rewards/real": -5.7736077308654785, + "step": 3440 + }, + { + "epoch": 2.21, + "learning_rate": 1.4680094786729857e-07, + "logits/generated": -2.0397682189941406, + "logits/real": -2.0936062335968018, + "logps/generated": -353.9073791503906, + "logps/real": -299.5115966796875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/generated": -24.632925033569336, + "rewards/margins": 19.708118438720703, + "rewards/real": -4.924810886383057, + "step": 3450 + }, + { + "epoch": 2.21, + "learning_rate": 1.456161137440758e-07, + "logits/generated": -1.9885776042938232, + "logits/real": -2.034236192703247, + "logps/generated": -360.8766784667969, + "logps/real": -280.8948059082031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -26.076366424560547, + "rewards/margins": 20.19205665588379, + "rewards/real": -5.884313106536865, + "step": 3460 + }, + { + "epoch": 2.22, + "learning_rate": 1.4443127962085309e-07, + "logits/generated": -2.0221621990203857, + "logits/real": -2.0913443565368652, + "logps/generated": -377.02734375, + "logps/real": -307.8632507324219, + "loss": 0.0089, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -26.230060577392578, + "rewards/margins": 20.80460548400879, + "rewards/real": -5.425457000732422, + "step": 3470 + }, + { + "epoch": 2.23, + "learning_rate": 1.4324644549763033e-07, + "logits/generated": -1.970720648765564, + "logits/real": -2.0222678184509277, + "logps/generated": -364.70477294921875, + "logps/real": -270.23492431640625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/generated": -25.826274871826172, + "rewards/margins": 19.925310134887695, + "rewards/real": -5.900964736938477, + "step": 3480 + }, + { + "epoch": 2.23, + "learning_rate": 1.4206161137440758e-07, + "logits/generated": -1.985548734664917, + "logits/real": -2.059246301651001, + "logps/generated": -399.61077880859375, + "logps/real": -299.520263671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -28.505584716796875, + "rewards/margins": 22.971120834350586, + "rewards/real": -5.534466743469238, + "step": 3490 + }, + { + "epoch": 2.24, + "learning_rate": 1.4087677725118482e-07, + "logits/generated": -2.0236446857452393, + "logits/real": -2.041933536529541, + "logps/generated": -376.0531921386719, + "logps/real": -258.92315673828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -26.45395851135254, + "rewards/margins": 21.224918365478516, + "rewards/real": -5.229036808013916, + "step": 3500 + }, + { + "epoch": 2.25, + "learning_rate": 1.396919431279621e-07, + "logits/generated": -1.960320234298706, + "logits/real": -2.0525546073913574, + "logps/generated": -351.20184326171875, + "logps/real": -317.5999755859375, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -24.332490921020508, + "rewards/margins": 19.579879760742188, + "rewards/real": -4.752608299255371, + "step": 3510 + }, + { + "epoch": 2.25, + "learning_rate": 1.3850710900473934e-07, + "logits/generated": -1.9520498514175415, + "logits/real": -2.0305888652801514, + "logps/generated": -374.1609191894531, + "logps/real": -324.40753173828125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/generated": -27.1029052734375, + "rewards/margins": 20.919063568115234, + "rewards/real": -6.183840751647949, + "step": 3520 + }, + { + "epoch": 2.26, + "learning_rate": 1.3732227488151656e-07, + "logits/generated": -1.9751561880111694, + "logits/real": -1.9569429159164429, + "logps/generated": -413.66143798828125, + "logps/real": -266.12664794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.187402725219727, + "rewards/margins": 24.67436408996582, + "rewards/real": -6.513040065765381, + "step": 3530 + }, + { + "epoch": 2.26, + "learning_rate": 1.3613744075829384e-07, + "logits/generated": -2.030137538909912, + "logits/real": -2.0903306007385254, + "logps/generated": -369.189453125, + "logps/real": -314.4388122558594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -25.56549072265625, + "rewards/margins": 19.738645553588867, + "rewards/real": -5.826841831207275, + "step": 3540 + }, + { + "epoch": 2.27, + "learning_rate": 1.3495260663507108e-07, + "logits/generated": -1.9893041849136353, + "logits/real": -2.0538480281829834, + "logps/generated": -384.92987060546875, + "logps/real": -279.54071044921875, + "loss": 0.0045, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -28.083690643310547, + "rewards/margins": 22.457271575927734, + "rewards/real": -5.626420021057129, + "step": 3550 + }, + { + "epoch": 2.28, + "learning_rate": 1.3376777251184836e-07, + "logits/generated": -1.9623091220855713, + "logits/real": -2.057955265045166, + "logps/generated": -360.659912109375, + "logps/real": -299.2305603027344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -25.131025314331055, + "rewards/margins": 20.248781204223633, + "rewards/real": -4.882245063781738, + "step": 3560 + }, + { + "epoch": 2.28, + "learning_rate": 1.3258293838862558e-07, + "logits/generated": -1.9596290588378906, + "logits/real": -2.014758348464966, + "logps/generated": -369.7837829589844, + "logps/real": -292.29656982421875, + "loss": 0.0024, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.983783721923828, + "rewards/margins": 20.629314422607422, + "rewards/real": -5.354469299316406, + "step": 3570 + }, + { + "epoch": 2.29, + "learning_rate": 1.3139810426540285e-07, + "logits/generated": -1.9941390752792358, + "logits/real": -2.0187554359436035, + "logps/generated": -402.2820739746094, + "logps/real": -298.3996887207031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -29.100866317749023, + "rewards/margins": 23.757795333862305, + "rewards/real": -5.3430681228637695, + "step": 3580 + }, + { + "epoch": 2.3, + "learning_rate": 1.302132701421801e-07, + "logits/generated": -1.9865878820419312, + "logits/real": -1.99555242061615, + "logps/generated": -370.57489013671875, + "logps/real": -270.86138916015625, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -26.78008460998535, + "rewards/margins": 20.997785568237305, + "rewards/real": -5.782297611236572, + "step": 3590 + }, + { + "epoch": 2.3, + "learning_rate": 1.2902843601895734e-07, + "logits/generated": -1.9569337368011475, + "logits/real": -2.038245439529419, + "logps/generated": -385.96734619140625, + "logps/real": -300.8575744628906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -26.998550415039062, + "rewards/margins": 21.897846221923828, + "rewards/real": -5.100704669952393, + "step": 3600 + }, + { + "epoch": 2.31, + "learning_rate": 1.278436018957346e-07, + "logits/generated": -1.9720693826675415, + "logits/real": -2.037219762802124, + "logps/generated": -357.646240234375, + "logps/real": -312.5133056640625, + "loss": 0.0071, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.564912796020508, + "rewards/margins": 20.172155380249023, + "rewards/real": -5.392756462097168, + "step": 3610 + }, + { + "epoch": 2.32, + "learning_rate": 1.2665876777251183e-07, + "logits/generated": -1.9715849161148071, + "logits/real": -2.0456924438476562, + "logps/generated": -389.7020263671875, + "logps/real": -263.62359619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -27.894466400146484, + "rewards/margins": 21.80486297607422, + "rewards/real": -6.089602470397949, + "step": 3620 + }, + { + "epoch": 2.32, + "learning_rate": 1.254739336492891e-07, + "logits/generated": -1.9623218774795532, + "logits/real": -1.9732621908187866, + "logps/generated": -381.63861083984375, + "logps/real": -290.93035888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -27.41843032836914, + "rewards/margins": 21.812026977539062, + "rewards/real": -5.6064043045043945, + "step": 3630 + }, + { + "epoch": 2.33, + "learning_rate": 1.2428909952606635e-07, + "logits/generated": -1.9537547826766968, + "logits/real": -2.0077974796295166, + "logps/generated": -398.73822021484375, + "logps/real": -308.29779052734375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -29.166988372802734, + "rewards/margins": 23.160018920898438, + "rewards/real": -6.0069708824157715, + "step": 3640 + }, + { + "epoch": 2.34, + "learning_rate": 1.231042654028436e-07, + "logits/generated": -1.9947917461395264, + "logits/real": -1.9984674453735352, + "logps/generated": -376.10589599609375, + "logps/real": -315.67144775390625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/generated": -27.812427520751953, + "rewards/margins": 21.052278518676758, + "rewards/real": -6.760148525238037, + "step": 3650 + }, + { + "epoch": 2.34, + "learning_rate": 1.2191943127962085e-07, + "logits/generated": -2.017561197280884, + "logits/real": -1.9898500442504883, + "logps/generated": -381.71728515625, + "logps/real": -296.3028564453125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -25.982290267944336, + "rewards/margins": 19.864425659179688, + "rewards/real": -6.117864608764648, + "step": 3660 + }, + { + "epoch": 2.35, + "learning_rate": 1.207345971563981e-07, + "logits/generated": -1.9187358617782593, + "logits/real": -1.9610626697540283, + "logps/generated": -382.46905517578125, + "logps/real": -290.3877258300781, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -27.577932357788086, + "rewards/margins": 21.187341690063477, + "rewards/real": -6.390590667724609, + "step": 3670 + }, + { + "epoch": 2.35, + "learning_rate": 1.1954976303317534e-07, + "logits/generated": -1.9850883483886719, + "logits/real": -2.047105073928833, + "logps/generated": -375.0716552734375, + "logps/real": -340.9063415527344, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -26.463632583618164, + "rewards/margins": 21.219701766967773, + "rewards/real": -5.243931293487549, + "step": 3680 + }, + { + "epoch": 2.36, + "learning_rate": 1.183649289099526e-07, + "logits/generated": -1.8932054042816162, + "logits/real": -1.8967291116714478, + "logps/generated": -359.06365966796875, + "logps/real": -283.9896240234375, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -25.62455177307129, + "rewards/margins": 19.897136688232422, + "rewards/real": -5.727414131164551, + "step": 3690 + }, + { + "epoch": 2.37, + "learning_rate": 1.1718009478672986e-07, + "logits/generated": -1.8854577541351318, + "logits/real": -1.8945916891098022, + "logps/generated": -380.1351318359375, + "logps/real": -284.0939025878906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -28.0650634765625, + "rewards/margins": 23.04536247253418, + "rewards/real": -5.019701957702637, + "step": 3700 + }, + { + "epoch": 2.37, + "learning_rate": 1.159952606635071e-07, + "logits/generated": -1.9407052993774414, + "logits/real": -1.9002673625946045, + "logps/generated": -395.69915771484375, + "logps/real": -285.1513977050781, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/generated": -28.094594955444336, + "rewards/margins": 22.06725311279297, + "rewards/real": -6.027345657348633, + "step": 3710 + }, + { + "epoch": 2.38, + "learning_rate": 1.1481042654028436e-07, + "logits/generated": -1.8787548542022705, + "logits/real": -1.8866329193115234, + "logps/generated": -390.35162353515625, + "logps/real": -278.56182861328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -28.97882080078125, + "rewards/margins": 22.92947769165039, + "rewards/real": -6.049341678619385, + "step": 3720 + }, + { + "epoch": 2.39, + "learning_rate": 1.136255924170616e-07, + "logits/generated": -1.9647785425186157, + "logits/real": -1.893686294555664, + "logps/generated": -373.9754943847656, + "logps/real": -302.5462646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -26.09256362915039, + "rewards/margins": 20.720409393310547, + "rewards/real": -5.372152805328369, + "step": 3730 + }, + { + "epoch": 2.39, + "learning_rate": 1.1244075829383886e-07, + "logits/generated": -1.9494024515151978, + "logits/real": -1.8903011083602905, + "logps/generated": -388.49896240234375, + "logps/real": -317.46038818359375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -27.872272491455078, + "rewards/margins": 22.287893295288086, + "rewards/real": -5.584378719329834, + "step": 3740 + }, + { + "epoch": 2.4, + "learning_rate": 1.112559241706161e-07, + "logits/generated": -1.9262495040893555, + "logits/real": -1.961554765701294, + "logps/generated": -386.28167724609375, + "logps/real": -334.42181396484375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -26.91708755493164, + "rewards/margins": 21.78619384765625, + "rewards/real": -5.130893707275391, + "step": 3750 + }, + { + "epoch": 2.41, + "learning_rate": 1.1007109004739336e-07, + "logits/generated": -1.9308741092681885, + "logits/real": -1.8776057958602905, + "logps/generated": -372.0367431640625, + "logps/real": -292.9879150390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -26.954111099243164, + "rewards/margins": 21.254783630371094, + "rewards/real": -5.699324607849121, + "step": 3760 + }, + { + "epoch": 2.41, + "learning_rate": 1.0888625592417061e-07, + "logits/generated": -1.9344863891601562, + "logits/real": -1.8149305582046509, + "logps/generated": -409.9720153808594, + "logps/real": -289.69500732421875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -29.697546005249023, + "rewards/margins": 23.537311553955078, + "rewards/real": -6.1602349281311035, + "step": 3770 + }, + { + "epoch": 2.42, + "learning_rate": 1.0770142180094787e-07, + "logits/generated": -1.9099664688110352, + "logits/real": -1.847806692123413, + "logps/generated": -381.0286560058594, + "logps/real": -284.8314208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -27.63125228881836, + "rewards/margins": 21.55826187133789, + "rewards/real": -6.072990417480469, + "step": 3780 + }, + { + "epoch": 2.42, + "learning_rate": 1.0651658767772511e-07, + "logits/generated": -1.916006326675415, + "logits/real": -1.90207040309906, + "logps/generated": -396.6690368652344, + "logps/real": -309.1549377441406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -29.109012603759766, + "rewards/margins": 22.90389633178711, + "rewards/real": -6.205114364624023, + "step": 3790 + }, + { + "epoch": 2.43, + "learning_rate": 1.0533175355450237e-07, + "logits/generated": -1.8688459396362305, + "logits/real": -1.777832269668579, + "logps/generated": -398.3630065917969, + "logps/real": -297.55621337890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -29.57208251953125, + "rewards/margins": 23.178564071655273, + "rewards/real": -6.393516540527344, + "step": 3800 + }, + { + "epoch": 2.44, + "learning_rate": 1.0414691943127962e-07, + "logits/generated": -1.9176315069198608, + "logits/real": -1.8100831508636475, + "logps/generated": -409.38543701171875, + "logps/real": -302.27642822265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -30.042591094970703, + "rewards/margins": 24.186843872070312, + "rewards/real": -5.8557515144348145, + "step": 3810 + }, + { + "epoch": 2.44, + "learning_rate": 1.0296208530805687e-07, + "logits/generated": -1.8687940835952759, + "logits/real": -1.8604339361190796, + "logps/generated": -380.5201416015625, + "logps/real": -306.03607177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -27.1380672454834, + "rewards/margins": 21.264789581298828, + "rewards/real": -5.87327766418457, + "step": 3820 + }, + { + "epoch": 2.45, + "learning_rate": 1.0177725118483411e-07, + "logits/generated": -1.9053184986114502, + "logits/real": -1.9573678970336914, + "logps/generated": -377.49066162109375, + "logps/real": -322.23114013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -26.4570369720459, + "rewards/margins": 20.188465118408203, + "rewards/real": -6.26857328414917, + "step": 3830 + }, + { + "epoch": 2.46, + "learning_rate": 1.0059241706161137e-07, + "logits/generated": -1.8795225620269775, + "logits/real": -1.8401075601577759, + "logps/generated": -415.0228576660156, + "logps/real": -294.7200927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.20663070678711, + "rewards/margins": 25.226472854614258, + "rewards/real": -5.980162143707275, + "step": 3840 + }, + { + "epoch": 2.46, + "learning_rate": 9.940758293838862e-08, + "logits/generated": -1.7763783931732178, + "logits/real": -1.7465555667877197, + "logps/generated": -414.9600524902344, + "logps/real": -258.68865966796875, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -30.951168060302734, + "rewards/margins": 24.258214950561523, + "rewards/real": -6.692956447601318, + "step": 3850 + }, + { + "epoch": 2.47, + "learning_rate": 9.822274881516588e-08, + "logits/generated": -1.8327720165252686, + "logits/real": -1.8621619939804077, + "logps/generated": -400.9129943847656, + "logps/real": -288.2205505371094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -29.8226318359375, + "rewards/margins": 23.871675491333008, + "rewards/real": -5.950957298278809, + "step": 3860 + }, + { + "epoch": 2.48, + "learning_rate": 9.703791469194312e-08, + "logits/generated": -1.8595101833343506, + "logits/real": -1.9030145406723022, + "logps/generated": -375.13726806640625, + "logps/real": -311.8053283691406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -27.23568344116211, + "rewards/margins": 19.974584579467773, + "rewards/real": -7.261102199554443, + "step": 3870 + }, + { + "epoch": 2.48, + "learning_rate": 9.585308056872038e-08, + "logits/generated": -1.896691918373108, + "logits/real": -1.899420976638794, + "logps/generated": -395.8522644042969, + "logps/real": -334.49566650390625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -28.630496978759766, + "rewards/margins": 23.07138442993164, + "rewards/real": -5.559113025665283, + "step": 3880 + }, + { + "epoch": 2.49, + "learning_rate": 9.466824644549763e-08, + "logits/generated": -1.9497692584991455, + "logits/real": -1.9165337085723877, + "logps/generated": -410.4734802246094, + "logps/real": -317.7174987792969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -29.726776123046875, + "rewards/margins": 22.598018646240234, + "rewards/real": -7.128758907318115, + "step": 3890 + }, + { + "epoch": 2.5, + "learning_rate": 9.348341232227488e-08, + "logits/generated": -1.9241125583648682, + "logits/real": -1.9029643535614014, + "logps/generated": -407.60406494140625, + "logps/real": -347.81939697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -29.964529037475586, + "rewards/margins": 23.82811164855957, + "rewards/real": -6.136418342590332, + "step": 3900 + }, + { + "epoch": 2.5, + "learning_rate": 9.229857819905212e-08, + "logits/generated": -1.8210303783416748, + "logits/real": -1.811648964881897, + "logps/generated": -425.33349609375, + "logps/real": -271.4889221191406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -32.074005126953125, + "rewards/margins": 24.543872833251953, + "rewards/real": -7.5301337242126465, + "step": 3910 + }, + { + "epoch": 2.51, + "learning_rate": 9.111374407582938e-08, + "logits/generated": -1.8708751201629639, + "logits/real": -1.8068602085113525, + "logps/generated": -458.919921875, + "logps/real": -279.9232482910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -33.913780212402344, + "rewards/margins": 26.395395278930664, + "rewards/real": -7.518378257751465, + "step": 3920 + }, + { + "epoch": 2.51, + "learning_rate": 8.992890995260663e-08, + "logits/generated": -1.8618663549423218, + "logits/real": -1.8846619129180908, + "logps/generated": -391.3426208496094, + "logps/real": -329.83099365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -28.733356475830078, + "rewards/margins": 21.27267074584961, + "rewards/real": -7.46068811416626, + "step": 3930 + }, + { + "epoch": 2.52, + "learning_rate": 8.874407582938389e-08, + "logits/generated": -1.8135093450546265, + "logits/real": -1.8050518035888672, + "logps/generated": -409.73284912109375, + "logps/real": -296.38873291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -30.615734100341797, + "rewards/margins": 23.74664306640625, + "rewards/real": -6.8690900802612305, + "step": 3940 + }, + { + "epoch": 2.53, + "learning_rate": 8.755924170616114e-08, + "logits/generated": -1.868835210800171, + "logits/real": -1.8635787963867188, + "logps/generated": -397.95159912109375, + "logps/real": -321.065185546875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/generated": -28.5548038482666, + "rewards/margins": 21.278282165527344, + "rewards/real": -7.276516914367676, + "step": 3950 + }, + { + "epoch": 2.53, + "learning_rate": 8.63744075829384e-08, + "logits/generated": -1.8097864389419556, + "logits/real": -1.7577613592147827, + "logps/generated": -443.16552734375, + "logps/real": -305.9537353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.76448059082031, + "rewards/margins": 25.336135864257812, + "rewards/real": -8.428342819213867, + "step": 3960 + }, + { + "epoch": 2.54, + "learning_rate": 8.518957345971564e-08, + "logits/generated": -1.8915891647338867, + "logits/real": -1.8530080318450928, + "logps/generated": -429.5049743652344, + "logps/real": -289.32135009765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -32.58146286010742, + "rewards/margins": 25.115055084228516, + "rewards/real": -7.466407775878906, + "step": 3970 + }, + { + "epoch": 2.55, + "learning_rate": 8.40047393364929e-08, + "logits/generated": -1.9106069803237915, + "logits/real": -1.8629109859466553, + "logps/generated": -409.6499938964844, + "logps/real": -333.20416259765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -29.9986629486084, + "rewards/margins": 23.40935516357422, + "rewards/real": -6.5893120765686035, + "step": 3980 + }, + { + "epoch": 2.55, + "learning_rate": 8.281990521327013e-08, + "logits/generated": -1.8551766872406006, + "logits/real": -1.7436447143554688, + "logps/generated": -455.6358337402344, + "logps/real": -287.45489501953125, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.54535675048828, + "rewards/margins": 26.85367774963379, + "rewards/real": -7.691675662994385, + "step": 3990 + }, + { + "epoch": 2.56, + "learning_rate": 8.163507109004738e-08, + "logits/generated": -1.8847310543060303, + "logits/real": -1.7997153997421265, + "logps/generated": -436.38079833984375, + "logps/real": -352.53582763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -32.270843505859375, + "rewards/margins": 25.198665618896484, + "rewards/real": -7.072180271148682, + "step": 4000 + }, + { + "epoch": 2.57, + "learning_rate": 8.045023696682464e-08, + "logits/generated": -1.7876107692718506, + "logits/real": -1.8028312921524048, + "logps/generated": -394.3335876464844, + "logps/real": -319.1776428222656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -29.803787231445312, + "rewards/margins": 21.604991912841797, + "rewards/real": -8.198799133300781, + "step": 4010 + }, + { + "epoch": 2.57, + "learning_rate": 7.926540284360189e-08, + "logits/generated": -1.8726141452789307, + "logits/real": -1.8162380456924438, + "logps/generated": -417.1497497558594, + "logps/real": -305.0618896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -31.330730438232422, + "rewards/margins": 23.804101943969727, + "rewards/real": -7.526628017425537, + "step": 4020 + }, + { + "epoch": 2.58, + "learning_rate": 7.808056872037915e-08, + "logits/generated": -1.7631124258041382, + "logits/real": -1.7794008255004883, + "logps/generated": -448.10430908203125, + "logps/real": -308.09625244140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/generated": -33.852027893066406, + "rewards/margins": 26.34817886352539, + "rewards/real": -7.503846168518066, + "step": 4030 + }, + { + "epoch": 2.58, + "learning_rate": 7.689573459715639e-08, + "logits/generated": -1.805232048034668, + "logits/real": -1.7675590515136719, + "logps/generated": -404.6713562011719, + "logps/real": -301.56793212890625, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -30.712646484375, + "rewards/margins": 22.97926139831543, + "rewards/real": -7.733384609222412, + "step": 4040 + }, + { + "epoch": 2.59, + "learning_rate": 7.571090047393365e-08, + "logits/generated": -1.7601591348648071, + "logits/real": -1.749284029006958, + "logps/generated": -433.79034423828125, + "logps/real": -329.8743591308594, + "loss": 0.0024, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.34691619873047, + "rewards/margins": 24.43657112121582, + "rewards/real": -8.910343170166016, + "step": 4050 + }, + { + "epoch": 2.6, + "learning_rate": 7.45260663507109e-08, + "logits/generated": -1.7626146078109741, + "logits/real": -1.7438932657241821, + "logps/generated": -427.18743896484375, + "logps/real": -290.01751708984375, + "loss": 0.0046, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.555919647216797, + "rewards/margins": 23.180891036987305, + "rewards/real": -8.375027656555176, + "step": 4060 + }, + { + "epoch": 2.6, + "learning_rate": 7.334123222748814e-08, + "logits/generated": -1.8292105197906494, + "logits/real": -1.7760603427886963, + "logps/generated": -439.42193603515625, + "logps/real": -292.0125427246094, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -32.864219665527344, + "rewards/margins": 24.91449737548828, + "rewards/real": -7.949720859527588, + "step": 4070 + }, + { + "epoch": 2.61, + "learning_rate": 7.215639810426539e-08, + "logits/generated": -1.7653682231903076, + "logits/real": -1.6930913925170898, + "logps/generated": -437.002685546875, + "logps/real": -267.6156311035156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.63715362548828, + "rewards/margins": 25.4591064453125, + "rewards/real": -8.178048133850098, + "step": 4080 + }, + { + "epoch": 2.62, + "learning_rate": 7.097156398104265e-08, + "logits/generated": -1.7562963962554932, + "logits/real": -1.7341216802597046, + "logps/generated": -434.3614807128906, + "logps/real": -289.385498046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -32.92387771606445, + "rewards/margins": 24.789287567138672, + "rewards/real": -8.134592056274414, + "step": 4090 + }, + { + "epoch": 2.62, + "learning_rate": 6.97867298578199e-08, + "logits/generated": -1.7824033498764038, + "logits/real": -1.8202848434448242, + "logps/generated": -422.67523193359375, + "logps/real": -367.13006591796875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -31.36875343322754, + "rewards/margins": 24.126026153564453, + "rewards/real": -7.242722988128662, + "step": 4100 + }, + { + "epoch": 2.63, + "learning_rate": 6.860189573459716e-08, + "logits/generated": -1.7355695962905884, + "logits/real": -1.7290878295898438, + "logps/generated": -451.7454528808594, + "logps/real": -326.6995544433594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.70394515991211, + "rewards/margins": 25.241628646850586, + "rewards/real": -8.462319374084473, + "step": 4110 + }, + { + "epoch": 2.64, + "learning_rate": 6.74170616113744e-08, + "logits/generated": -1.7768300771713257, + "logits/real": -1.7098900079727173, + "logps/generated": -452.030029296875, + "logps/real": -327.0111083984375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -34.611488342285156, + "rewards/margins": 26.06864356994629, + "rewards/real": -8.542844772338867, + "step": 4120 + }, + { + "epoch": 2.64, + "learning_rate": 6.623222748815166e-08, + "logits/generated": -1.715787649154663, + "logits/real": -1.6770191192626953, + "logps/generated": -441.8128356933594, + "logps/real": -296.57110595703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/generated": -34.08544158935547, + "rewards/margins": 26.49606704711914, + "rewards/real": -7.5893754959106445, + "step": 4130 + }, + { + "epoch": 2.65, + "learning_rate": 6.504739336492891e-08, + "logits/generated": -1.7766027450561523, + "logits/real": -1.6856123208999634, + "logps/generated": -450.623779296875, + "logps/real": -295.4322204589844, + "loss": 0.0051, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -34.94755172729492, + "rewards/margins": 26.268753051757812, + "rewards/real": -8.678799629211426, + "step": 4140 + }, + { + "epoch": 2.66, + "learning_rate": 6.386255924170615e-08, + "logits/generated": -1.7369333505630493, + "logits/real": -1.6449072360992432, + "logps/generated": -429.2544860839844, + "logps/real": -283.56549072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.380615234375, + "rewards/margins": 24.91189956665039, + "rewards/real": -8.468714714050293, + "step": 4150 + }, + { + "epoch": 2.66, + "learning_rate": 6.26777251184834e-08, + "logits/generated": -1.6794822216033936, + "logits/real": -1.662726640701294, + "logps/generated": -423.33184814453125, + "logps/real": -332.1803894042969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/generated": -31.899770736694336, + "rewards/margins": 23.491817474365234, + "rewards/real": -8.407957077026367, + "step": 4160 + }, + { + "epoch": 2.67, + "learning_rate": 6.149289099526066e-08, + "logits/generated": -1.7565444707870483, + "logits/real": -1.7407537698745728, + "logps/generated": -402.0395202636719, + "logps/real": -327.73248291015625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -29.680978775024414, + "rewards/margins": 20.341283798217773, + "rewards/real": -9.339695930480957, + "step": 4170 + }, + { + "epoch": 2.67, + "learning_rate": 6.030805687203791e-08, + "logits/generated": -1.6737483739852905, + "logits/real": -1.7306264638900757, + "logps/generated": -436.11260986328125, + "logps/real": -338.9502868652344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -32.65018844604492, + "rewards/margins": 25.60652732849121, + "rewards/real": -7.043665409088135, + "step": 4180 + }, + { + "epoch": 2.68, + "learning_rate": 5.912322274881516e-08, + "logits/generated": -1.6893961429595947, + "logits/real": -1.7002031803131104, + "logps/generated": -438.9224548339844, + "logps/real": -327.76141357421875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/generated": -33.08007049560547, + "rewards/margins": 24.394916534423828, + "rewards/real": -8.685155868530273, + "step": 4190 + }, + { + "epoch": 2.69, + "learning_rate": 5.793838862559241e-08, + "logits/generated": -1.7550160884857178, + "logits/real": -1.7153337001800537, + "logps/generated": -444.51824951171875, + "logps/real": -278.1734619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -33.22731399536133, + "rewards/margins": 24.608617782592773, + "rewards/real": -8.618697166442871, + "step": 4200 + }, + { + "epoch": 2.69, + "learning_rate": 5.6753554502369666e-08, + "logits/generated": -1.7421271800994873, + "logits/real": -1.6843255758285522, + "logps/generated": -428.5711975097656, + "logps/real": -331.9335632324219, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -32.22383499145508, + "rewards/margins": 24.138261795043945, + "rewards/real": -8.085573196411133, + "step": 4210 + }, + { + "epoch": 2.7, + "learning_rate": 5.556872037914691e-08, + "logits/generated": -1.7008402347564697, + "logits/real": -1.634892225265503, + "logps/generated": -433.476806640625, + "logps/real": -287.6949768066406, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -32.910457611083984, + "rewards/margins": 24.70734977722168, + "rewards/real": -8.203106880187988, + "step": 4220 + }, + { + "epoch": 2.71, + "learning_rate": 5.4383886255924165e-08, + "logits/generated": -1.7923284769058228, + "logits/real": -1.7142051458358765, + "logps/generated": -453.1385803222656, + "logps/real": -300.06781005859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -34.70769119262695, + "rewards/margins": 25.626541137695312, + "rewards/real": -9.081144332885742, + "step": 4230 + }, + { + "epoch": 2.71, + "learning_rate": 5.319905213270142e-08, + "logits/generated": -1.7816097736358643, + "logits/real": -1.9599437713623047, + "logps/generated": -453.1598205566406, + "logps/real": -349.44171142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -34.06262969970703, + "rewards/margins": 25.226978302001953, + "rewards/real": -8.835651397705078, + "step": 4240 + }, + { + "epoch": 2.72, + "learning_rate": 5.201421800947867e-08, + "logits/generated": -1.7193883657455444, + "logits/real": -1.9186795949935913, + "logps/generated": -463.2687072753906, + "logps/real": -305.72314453125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -35.47248458862305, + "rewards/margins": 25.576107025146484, + "rewards/real": -9.896378517150879, + "step": 4250 + }, + { + "epoch": 2.73, + "learning_rate": 5.082938388625592e-08, + "logits/generated": -1.802926778793335, + "logits/real": -1.9906041622161865, + "logps/generated": -439.27899169921875, + "logps/real": -342.9306945800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -33.147518157958984, + "rewards/margins": 23.982521057128906, + "rewards/real": -9.164995193481445, + "step": 4260 + }, + { + "epoch": 2.73, + "learning_rate": 4.964454976303317e-08, + "logits/generated": -1.8892700672149658, + "logits/real": -2.009641647338867, + "logps/generated": -437.8634338378906, + "logps/real": -341.58135986328125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -33.01290512084961, + "rewards/margins": 23.391399383544922, + "rewards/real": -9.621504783630371, + "step": 4270 + }, + { + "epoch": 2.74, + "learning_rate": 4.845971563981042e-08, + "logits/generated": -1.7956806421279907, + "logits/real": -1.9637285470962524, + "logps/generated": -460.10601806640625, + "logps/real": -356.51959228515625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/generated": -34.52501678466797, + "rewards/margins": 25.541818618774414, + "rewards/real": -8.983198165893555, + "step": 4280 + }, + { + "epoch": 2.74, + "learning_rate": 4.7274881516587676e-08, + "logits/generated": -1.7769763469696045, + "logits/real": -1.9866300821304321, + "logps/generated": -436.766357421875, + "logps/real": -326.61431884765625, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -32.676849365234375, + "rewards/margins": 24.444482803344727, + "rewards/real": -8.232365608215332, + "step": 4290 + }, + { + "epoch": 2.75, + "learning_rate": 4.609004739336492e-08, + "logits/generated": -1.8134574890136719, + "logits/real": -1.982287049293518, + "logps/generated": -467.0006408691406, + "logps/real": -319.8429260253906, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -35.988407135009766, + "rewards/margins": 27.567617416381836, + "rewards/real": -8.420794486999512, + "step": 4300 + }, + { + "epoch": 2.76, + "learning_rate": 4.4905213270142176e-08, + "logits/generated": -1.6407321691513062, + "logits/real": -1.7915595769882202, + "logps/generated": -438.76019287109375, + "logps/real": -317.3435974121094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.72128677368164, + "rewards/margins": 24.755414962768555, + "rewards/real": -8.965871810913086, + "step": 4310 + }, + { + "epoch": 2.76, + "learning_rate": 4.372037914691943e-08, + "logits/generated": -1.745700478553772, + "logits/real": -1.9136505126953125, + "logps/generated": -435.6446228027344, + "logps/real": -311.99542236328125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/generated": -33.17008590698242, + "rewards/margins": 24.030542373657227, + "rewards/real": -9.139547348022461, + "step": 4320 + }, + { + "epoch": 2.77, + "learning_rate": 4.253554502369668e-08, + "logits/generated": -1.6967980861663818, + "logits/real": -1.8322668075561523, + "logps/generated": -457.9407653808594, + "logps/real": -311.10418701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/generated": -35.436546325683594, + "rewards/margins": 27.187633514404297, + "rewards/real": -8.24891185760498, + "step": 4330 + }, + { + "epoch": 2.78, + "learning_rate": 4.135071090047393e-08, + "logits/generated": -1.7738192081451416, + "logits/real": -2.0109634399414062, + "logps/generated": -443.57568359375, + "logps/real": -337.6534729003906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.9232177734375, + "rewards/margins": 23.900415420532227, + "rewards/real": -10.022802352905273, + "step": 4340 + }, + { + "epoch": 2.78, + "learning_rate": 4.016587677725118e-08, + "logits/generated": -1.744932770729065, + "logits/real": -1.8545424938201904, + "logps/generated": -459.23614501953125, + "logps/real": -283.38372802734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -35.45733642578125, + "rewards/margins": 24.5408878326416, + "rewards/real": -10.9164457321167, + "step": 4350 + }, + { + "epoch": 2.79, + "learning_rate": 3.8981042654028434e-08, + "logits/generated": -1.6494481563568115, + "logits/real": -1.8807508945465088, + "logps/generated": -449.21856689453125, + "logps/real": -347.99090576171875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -34.048919677734375, + "rewards/margins": 25.075237274169922, + "rewards/real": -8.97368049621582, + "step": 4360 + }, + { + "epoch": 2.8, + "learning_rate": 3.779620853080569e-08, + "logits/generated": -1.7925529479980469, + "logits/real": -1.838467001914978, + "logps/generated": -463.3714294433594, + "logps/real": -335.3411865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -34.72087478637695, + "rewards/margins": 26.26070213317871, + "rewards/real": -8.46017074584961, + "step": 4370 + }, + { + "epoch": 2.8, + "learning_rate": 3.661137440758294e-08, + "logits/generated": -1.6993271112442017, + "logits/real": -1.7849382162094116, + "logps/generated": -451.7796936035156, + "logps/real": -284.07977294921875, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/generated": -35.063255310058594, + "rewards/margins": 26.02164077758789, + "rewards/real": -9.041617393493652, + "step": 4380 + }, + { + "epoch": 2.81, + "learning_rate": 3.5426540284360186e-08, + "logits/generated": -1.5902955532073975, + "logits/real": -1.7994792461395264, + "logps/generated": -469.3233947753906, + "logps/real": -322.81182861328125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/generated": -36.78888702392578, + "rewards/margins": 27.999755859375, + "rewards/real": -8.789128303527832, + "step": 4390 + }, + { + "epoch": 2.82, + "learning_rate": 3.424170616113744e-08, + "logits/generated": -1.7078588008880615, + "logits/real": -1.9263912439346313, + "logps/generated": -462.34368896484375, + "logps/real": -322.7901916503906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/generated": -35.061485290527344, + "rewards/margins": 26.207286834716797, + "rewards/real": -8.854198455810547, + "step": 4400 + }, + { + "epoch": 2.82, + "learning_rate": 3.305687203791469e-08, + "logits/generated": -1.7351045608520508, + "logits/real": -1.7719390392303467, + "logps/generated": -442.97735595703125, + "logps/real": -315.83087158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.271141052246094, + "rewards/margins": 24.884248733520508, + "rewards/real": -8.386889457702637, + "step": 4410 + }, + { + "epoch": 2.83, + "learning_rate": 3.1872037914691945e-08, + "logits/generated": -1.6406385898590088, + "logits/real": -1.8298654556274414, + "logps/generated": -455.2843322753906, + "logps/real": -318.89337158203125, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -35.38450241088867, + "rewards/margins": 26.2564640045166, + "rewards/real": -9.12803840637207, + "step": 4420 + }, + { + "epoch": 2.83, + "learning_rate": 3.068720379146919e-08, + "logits/generated": -1.7697868347167969, + "logits/real": -1.822705864906311, + "logps/generated": -438.582275390625, + "logps/real": -304.01617431640625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -33.55647277832031, + "rewards/margins": 23.950986862182617, + "rewards/real": -9.605484008789062, + "step": 4430 + }, + { + "epoch": 2.84, + "learning_rate": 2.9502369668246444e-08, + "logits/generated": -1.6771583557128906, + "logits/real": -1.854857087135315, + "logps/generated": -430.46282958984375, + "logps/real": -316.00958251953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/generated": -32.684688568115234, + "rewards/margins": 23.534297943115234, + "rewards/real": -9.150388717651367, + "step": 4440 + }, + { + "epoch": 2.85, + "learning_rate": 2.8317535545023697e-08, + "logits/generated": -1.7022714614868164, + "logits/real": -1.8785192966461182, + "logps/generated": -466.81396484375, + "logps/real": -324.5412292480469, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -36.14876174926758, + "rewards/margins": 27.45867919921875, + "rewards/real": -8.690082550048828, + "step": 4450 + }, + { + "epoch": 2.85, + "learning_rate": 2.7132701421800947e-08, + "logits/generated": -1.694084882736206, + "logits/real": -1.75972580909729, + "logps/generated": -468.11016845703125, + "logps/real": -322.52679443359375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -36.64365768432617, + "rewards/margins": 27.119192123413086, + "rewards/real": -9.524468421936035, + "step": 4460 + }, + { + "epoch": 2.86, + "learning_rate": 2.59478672985782e-08, + "logits/generated": -1.61457097530365, + "logits/real": -1.8486804962158203, + "logps/generated": -432.85284423828125, + "logps/real": -327.989990234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.129432678222656, + "rewards/margins": 24.179370880126953, + "rewards/real": -8.950057983398438, + "step": 4470 + }, + { + "epoch": 2.87, + "learning_rate": 2.476303317535545e-08, + "logits/generated": -1.6624841690063477, + "logits/real": -1.8223241567611694, + "logps/generated": -456.5455017089844, + "logps/real": -319.22509765625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/generated": -35.40416717529297, + "rewards/margins": 26.277795791625977, + "rewards/real": -9.12637710571289, + "step": 4480 + }, + { + "epoch": 2.87, + "learning_rate": 2.3578199052132702e-08, + "logits/generated": -1.7079921960830688, + "logits/real": -1.829472541809082, + "logps/generated": -408.7738342285156, + "logps/real": -334.1302795410156, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/generated": -30.858179092407227, + "rewards/margins": 22.388179779052734, + "rewards/real": -8.470001220703125, + "step": 4490 + }, + { + "epoch": 2.88, + "learning_rate": 2.239336492890995e-08, + "logits/generated": -1.759307861328125, + "logits/real": -1.8515949249267578, + "logps/generated": -441.416259765625, + "logps/real": -318.9807434082031, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/generated": -33.875450134277344, + "rewards/margins": 24.575389862060547, + "rewards/real": -9.300056457519531, + "step": 4500 + }, + { + "epoch": 2.89, + "learning_rate": 2.1208530805687202e-08, + "logits/generated": -1.6304250955581665, + "logits/real": -1.8564281463623047, + "logps/generated": -444.32659912109375, + "logps/real": -306.439697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.86289978027344, + "rewards/margins": 25.232988357543945, + "rewards/real": -8.62990951538086, + "step": 4510 + }, + { + "epoch": 2.89, + "learning_rate": 2.002369668246445e-08, + "logits/generated": -1.7856998443603516, + "logits/real": -1.8842220306396484, + "logps/generated": -467.9981384277344, + "logps/real": -315.4876403808594, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -35.54204559326172, + "rewards/margins": 26.91533851623535, + "rewards/real": -8.626705169677734, + "step": 4520 + }, + { + "epoch": 2.9, + "learning_rate": 1.8838862559241704e-08, + "logits/generated": -1.7326618432998657, + "logits/real": -1.9145400524139404, + "logps/generated": -440.7589416503906, + "logps/real": -306.02862548828125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/generated": -33.05302429199219, + "rewards/margins": 24.692596435546875, + "rewards/real": -8.360427856445312, + "step": 4530 + }, + { + "epoch": 2.9, + "learning_rate": 1.7654028436018954e-08, + "logits/generated": -1.7552152872085571, + "logits/real": -1.817386269569397, + "logps/generated": -461.41070556640625, + "logps/real": -327.1177978515625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -34.42705535888672, + "rewards/margins": 25.052804946899414, + "rewards/real": -9.374256134033203, + "step": 4540 + }, + { + "epoch": 2.91, + "learning_rate": 1.6469194312796207e-08, + "logits/generated": -1.7163751125335693, + "logits/real": -1.8879632949829102, + "logps/generated": -425.6315002441406, + "logps/real": -320.1476135253906, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.34682273864746, + "rewards/margins": 22.973106384277344, + "rewards/real": -8.373712539672852, + "step": 4550 + }, + { + "epoch": 2.92, + "learning_rate": 1.528436018957346e-08, + "logits/generated": -1.7140891551971436, + "logits/real": -1.8986284732818604, + "logps/generated": -440.7607421875, + "logps/real": -326.2283935546875, + "loss": 0.0047, + "rewards/accuracies": 0.9750000238418579, + "rewards/generated": -32.59244918823242, + "rewards/margins": 24.93307876586914, + "rewards/real": -7.659371852874756, + "step": 4560 + }, + { + "epoch": 2.92, + "learning_rate": 1.409952606635071e-08, + "logits/generated": -1.7714250087738037, + "logits/real": -1.9002504348754883, + "logps/generated": -453.63543701171875, + "logps/real": -300.76885986328125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -35.10737228393555, + "rewards/margins": 27.279804229736328, + "rewards/real": -7.827570915222168, + "step": 4570 + }, + { + "epoch": 2.93, + "learning_rate": 1.2914691943127961e-08, + "logits/generated": -1.6990067958831787, + "logits/real": -1.9161514043807983, + "logps/generated": -414.04302978515625, + "logps/real": -300.9434814453125, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.5983829498291, + "rewards/margins": 22.450963973999023, + "rewards/real": -9.147419929504395, + "step": 4580 + }, + { + "epoch": 2.94, + "learning_rate": 1.1729857819905212e-08, + "logits/generated": -1.7278430461883545, + "logits/real": -1.7927424907684326, + "logps/generated": -472.21429443359375, + "logps/real": -341.255126953125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/generated": -35.60918426513672, + "rewards/margins": 27.14546775817871, + "rewards/real": -8.463715553283691, + "step": 4590 + }, + { + "epoch": 2.94, + "learning_rate": 1.0545023696682464e-08, + "logits/generated": -1.6661436557769775, + "logits/real": -1.824730634689331, + "logps/generated": -441.8641052246094, + "logps/real": -310.7605895996094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -33.483917236328125, + "rewards/margins": 25.23209571838379, + "rewards/real": -8.251824378967285, + "step": 4600 + }, + { + "epoch": 2.95, + "learning_rate": 9.360189573459715e-09, + "logits/generated": -1.776623010635376, + "logits/real": -1.8051350116729736, + "logps/generated": -457.2853088378906, + "logps/real": -318.8353271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -35.16924285888672, + "rewards/margins": 26.07401466369629, + "rewards/real": -9.09522533416748, + "step": 4610 + }, + { + "epoch": 2.96, + "learning_rate": 8.175355450236966e-09, + "logits/generated": -1.771087646484375, + "logits/real": -1.8877332210540771, + "logps/generated": -436.79986572265625, + "logps/real": -330.20916748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -32.55842590332031, + "rewards/margins": 24.00469970703125, + "rewards/real": -8.553728103637695, + "step": 4620 + }, + { + "epoch": 2.96, + "learning_rate": 6.990521327014218e-09, + "logits/generated": -1.7417593002319336, + "logits/real": -1.8479112386703491, + "logps/generated": -403.1101989746094, + "logps/real": -297.042724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/generated": -30.246688842773438, + "rewards/margins": 22.63168716430664, + "rewards/real": -7.615001678466797, + "step": 4630 + }, + { + "epoch": 2.97, + "learning_rate": 5.805687203791469e-09, + "logits/generated": -1.7405914068222046, + "logits/real": -1.8920139074325562, + "logps/generated": -443.20306396484375, + "logps/real": -322.84783935546875, + "loss": 0.0024, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -33.4544792175293, + "rewards/margins": 24.869140625, + "rewards/real": -8.585339546203613, + "step": 4640 + }, + { + "epoch": 2.98, + "learning_rate": 4.62085308056872e-09, + "logits/generated": -1.7274150848388672, + "logits/real": -1.89272141456604, + "logps/generated": -461.22607421875, + "logps/real": -324.2146301269531, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -36.303890228271484, + "rewards/margins": 27.63534927368164, + "rewards/real": -8.668540954589844, + "step": 4650 + }, + { + "epoch": 2.98, + "learning_rate": 3.4360189573459714e-09, + "logits/generated": -1.7416044473648071, + "logits/real": -1.9509055614471436, + "logps/generated": -453.283935546875, + "logps/real": -366.43060302734375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/generated": -33.6645393371582, + "rewards/margins": 25.695453643798828, + "rewards/real": -7.96908712387085, + "step": 4660 + }, + { + "epoch": 2.99, + "learning_rate": 2.2511848341232227e-09, + "logits/generated": -1.8054618835449219, + "logits/real": -1.8594818115234375, + "logps/generated": -428.821044921875, + "logps/real": -289.93951416015625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/generated": -32.35812759399414, + "rewards/margins": 23.261104583740234, + "rewards/real": -9.097023010253906, + "step": 4670 + }, + { + "epoch": 2.99, + "learning_rate": 1.0663507109004738e-09, + "logits/generated": -1.6955547332763672, + "logits/real": -1.981899619102478, + "logps/generated": -423.219482421875, + "logps/real": -349.48504638671875, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/generated": -31.58346176147461, + "rewards/margins": 23.281259536743164, + "rewards/real": -8.302202224731445, + "step": 4680 + }, + { + "epoch": 3.0, + "step": 4689, + "total_flos": 0.0, + "train_loss": 0.023988249511193074, + "train_runtime": 36939.4965, + "train_samples_per_second": 4.061, + "train_steps_per_second": 0.127 + } + ], + "logging_steps": 10, + "max_steps": 4689, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}